CBMC
unicode.h
Go to the documentation of this file.
1 /*******************************************************************\
2 
3 Module:
4 
5 Author: Daniel Kroening, kroening@kroening.com
6 
7 \*******************************************************************/
8 
9 #ifndef CPROVER_UTIL_UNICODE_H
10 #define CPROVER_UTIL_UNICODE_H
11 
12 #include <algorithm>
13 #include <string>
14 #include <vector>
15 
16 // we follow the ideas suggested at
17 // http://www.utf8everywhere.org/
18 
19 std::string narrow(const wchar_t *s);
20 std::wstring widen(const char *s);
21 std::string narrow(const std::wstring &s);
22 std::wstring widen(const std::string &s);
23 
24 // This removes the need to have a #ifdef whenever using std::fstream.
25 #ifdef _WIN32
26 # define widen_if_needed(s) widen(s)
27 #else
28 # define widen_if_needed(s) (s)
29 #endif
30 
31 std::string
32 utf32_native_endian_to_utf8(const std::basic_string<unsigned int> &s);
33 
36 std::u32string utf8_to_utf32(const std::string &utf8_str);
37 
38 std::wstring utf8_to_utf16_native_endian(const std::string &in);
39 std::string utf16_native_endian_to_java(const char16_t ch);
40 std::string utf16_native_endian_to_java(const std::wstring &in);
41 std::string utf16_native_endian_to_java_string(const std::wstring &in);
42 
43 std::vector<std::string> narrow_argv(int argc, const wchar_t **argv_wide);
44 
48 std::string utf16_native_endian_to_utf8(char16_t utf16_char);
49 
52 std::string utf16_native_endian_to_utf8(const std::u16string &utf16_str);
53 
58 char16_t codepoint_hex_to_utf16_native_endian(const std::string &hex);
59 
63 std::string codepoint_hex_to_utf8(const std::string &hex);
64 
65 template <typename It>
66 std::vector<const char *> to_c_str_array(It b, It e)
67 {
68  // Assumes that walking the range will be faster than repeated allocation
69  std::vector<const char *> ret(std::distance(b, e) + 1, nullptr);
71  b, e, std::begin(ret), [](const std::string &s) { return s.c_str(); });
72  return ret;
73 }
74 
75 #endif // CPROVER_UTIL_UNICODE_H
static abstract_object_pointert transform(const exprt &expr, const std::vector< abstract_object_pointert > &operands, const abstract_environmentt &environment, const namespacet &ns)
std::vector< std::string > narrow_argv(int argc, const wchar_t **argv_wide)
Definition: unicode.cpp:149
std::string narrow(const wchar_t *s)
Definition: unicode.cpp:33
std::string utf32_native_endian_to_utf8(const std::basic_string< unsigned int > &s)
Definition: unicode.cpp:137
std::u32string utf8_to_utf32(const std::string &utf8_str)
Convert UTF8-encoded string to UTF-32 with architecture-native endianness.
Definition: unicode.cpp:206
std::wstring widen(const char *s)
Definition: unicode.cpp:49
char16_t codepoint_hex_to_utf16_native_endian(const std::string &hex)
Definition: unicode.cpp:379
std::string utf16_native_endian_to_utf8(char16_t utf16_char)
Definition: unicode.cpp:360
std::string codepoint_hex_to_utf8(const std::string &hex)
Definition: unicode.cpp:385
std::string utf16_native_endian_to_java_string(const std::wstring &in)
Escapes non-printable characters, whitespace except for spaces, double quotes and backslashes.
Definition: unicode.cpp:351
std::vector< const char * > to_c_str_array(It b, It e)
Definition: unicode.h:66
std::string utf16_native_endian_to_java(const char16_t ch)
Definition: unicode.cpp:336
std::wstring utf8_to_utf16_native_endian(const std::string &in)
Convert UTF8-encoded string to UTF-16 with architecture-native endianness.
Definition: unicode.cpp:192