CBMC
unicode.cpp File Reference
#include "unicode.h"
#include "invariant.h"
#include <codecvt>
#include <cstdint>
#include <iomanip>
#include <locale>
#include <sstream>
+ Include dependency graph for unicode.cpp:

Go to the source code of this file.

Functions

static void utf8_append_code (unsigned int c, std::string &result)
 Appends a unicode character to a utf8-encoded string. More...
 
std::string narrow (const wchar_t *s)
 
std::wstring widen (const char *s)
 
std::string narrow (const std::wstring &s)
 
std::wstring widen (const std::string &s)
 
std::string utf32_native_endian_to_utf8 (const std::basic_string< unsigned int > &s)
 
std::vector< std::string > narrow_argv (int argc, const wchar_t **argv_wide)
 
static void utf16_append_code (unsigned int code, std::wstring &result)
 
std::wstring utf8_to_utf16_native_endian (const std::string &in)
 Convert UTF8-encoded string to UTF-16 with architecture-native endianness. More...
 
std::u32string utf8_to_utf32 (const std::string &utf8_str)
 Convert UTF8-encoded string to UTF-32 with architecture-native endianness. More...
 
static void utf16_native_endian_to_java_string (const wchar_t ch, std::ostringstream &result, const std::locale &loc)
 Escapes non-printable characters, whitespace except for spaces, double quotes and backslashes. More...
 
static void utf16_native_endian_to_java (const wchar_t ch, std::ostringstream &result, const std::locale &loc)
 Escapes non-printable characters, whitespace except for spaces, double- and single-quotes and backslashes. More...
 
std::string utf16_native_endian_to_java (const char16_t ch)
 
std::string utf16_native_endian_to_java_string (const std::wstring &in)
 Escapes non-printable characters, whitespace except for spaces, double quotes and backslashes. More...
 
std::string utf16_native_endian_to_utf8 (const char16_t utf16_char)
 
std::string utf16_native_endian_to_utf8 (const std::u16string &utf16_str)
 
char16_t codepoint_hex_to_utf16_native_endian (const std::string &hex)
 
std::string codepoint_hex_to_utf8 (const std::string &hex)
 

Function Documentation

◆ codepoint_hex_to_utf16_native_endian()

char16_t codepoint_hex_to_utf16_native_endian ( const std::string &  hex)
Parameters
hexrepresentation of a BMP codepoint as a four-digit string (e.g. "0041" for \u0041)
Returns
encoding of the codepoint as a single UTF-16 character in architecture-native endianness encoding

Definition at line 379 of file unicode.cpp.

◆ codepoint_hex_to_utf8()

std::string codepoint_hex_to_utf8 ( const std::string &  hex)
Parameters
hexrepresentation of a BMP codepoint as a four-digit string (e.g. "0041" for \u0041)
Returns
UTF-8 encoding of the codepoint

Definition at line 385 of file unicode.cpp.

◆ narrow() [1/2]

std::string narrow ( const std::wstring &  s)

Definition at line 64 of file unicode.cpp.

◆ narrow() [2/2]

std::string narrow ( const wchar_t *  s)

Definition at line 33 of file unicode.cpp.

◆ narrow_argv()

std::vector<std::string> narrow_argv ( int  argc,
const wchar_t **  argv_wide 
)

Definition at line 149 of file unicode.cpp.

◆ utf16_append_code()

static void utf16_append_code ( unsigned int  code,
std::wstring &  result 
)
static

Definition at line 163 of file unicode.cpp.

◆ utf16_native_endian_to_java() [1/2]

std::string utf16_native_endian_to_java ( const char16_t  ch)
Parameters
chUTF-16 character in architecture-native endianness encoding
Returns
String in US-ASCII format, with \uxxxx escapes for other characters

Definition at line 336 of file unicode.cpp.

◆ utf16_native_endian_to_java() [2/2]

static void utf16_native_endian_to_java ( const wchar_t  ch,
std::ostringstream &  result,
const std::locale &  loc 
)
static

Escapes non-printable characters, whitespace except for spaces, double- and single-quotes and backslashes.

This should yield a valid Java identifier.

Parameters
chUTF-16 character in architecture-native endianness encoding
resultstream to receive string in US-ASCII format, with \uxxxx escapes for other characters
loclocale to check for printable characters

Definition at line 317 of file unicode.cpp.

◆ utf16_native_endian_to_java_string() [1/2]

std::string utf16_native_endian_to_java_string ( const std::wstring &  in)

Escapes non-printable characters, whitespace except for spaces, double quotes and backslashes.

This should yield a valid Java string literal. Note that this specifically does not escape single quotes, as these are not required to be escaped for Java string literals.

Parameters
inString in UTF-16 (native endianness) format
Returns
Valid Java string literal in US-ASCII format, with \uxxxx escapes for other characters

Definition at line 351 of file unicode.cpp.

◆ utf16_native_endian_to_java_string() [2/2]

static void utf16_native_endian_to_java_string ( const wchar_t  ch,
std::ostringstream &  result,
const std::locale &  loc 
)
static

Escapes non-printable characters, whitespace except for spaces, double quotes and backslashes.

This should yield a valid Java string literal. Note that this specifically does not escape single quotes, as these are not required to be escaped for Java string literals.

Parameters
chUTF-16 character in architecture-native endianness encoding
resultstream to receive string in US-ASCII format, with \uxxxx escapes for other characters
loclocale to check for printable characters

Definition at line 273 of file unicode.cpp.

◆ utf16_native_endian_to_utf8() [1/2]

std::string utf16_native_endian_to_utf8 ( char16_t  utf16_char)
Parameters
utf16_charUTF-16 character in architecture-native endianness encoding
Returns
UTF-8 encoding of the same codepoint

Definition at line 360 of file unicode.cpp.

◆ utf16_native_endian_to_utf8() [2/2]

std::string utf16_native_endian_to_utf8 ( const std::u16string &  utf16_str)
Parameters
utf16_strUTF-16 string in architecture-native endianness encoding
Returns
UTF-8 encoding of the string

Definition at line 365 of file unicode.cpp.

◆ utf32_native_endian_to_utf8()

std::string utf32_native_endian_to_utf8 ( const std::basic_string< unsigned int > &  s)
Parameters
sUTF-32 encoded wide string
Returns
utf8-encoded string with the same unicode characters as the input.

Definition at line 137 of file unicode.cpp.

◆ utf8_append_code()

static void utf8_append_code ( unsigned int  c,
std::string &  result 
)
static

Appends a unicode character to a utf8-encoded string.

parameters: character to append, string to append to

Definition at line 110 of file unicode.cpp.

◆ utf8_to_utf16_native_endian()

std::wstring utf8_to_utf16_native_endian ( const std::string &  in)

Convert UTF8-encoded string to UTF-16 with architecture-native endianness.

parameters: String in UTF-8 format
Returns
String in UTF-16 format. The encoding follows the endianness of the architecture iff swap_bytes is true.

Definition at line 192 of file unicode.cpp.

◆ utf8_to_utf32()

std::u32string utf8_to_utf32 ( const std::string &  utf8_str)

Convert UTF8-encoded string to UTF-32 with architecture-native endianness.

parameters: String in UTF-8 format
Returns
String in UTF-32 format.

Definition at line 206 of file unicode.cpp.

◆ widen() [1/2]

std::wstring widen ( const char *  s)

Definition at line 49 of file unicode.cpp.

◆ widen() [2/2]

std::wstring widen ( const std::string &  s)

Definition at line 87 of file unicode.cpp.