ddc
|
Functions | |
int | u8_seqlen (const utf8str &s, size_t i) |
size_t | u8_toucs (ucs4str &dst, const utf8str &src) |
ucs4str | u8_toucs (const utf8str &src) |
size_t | u8_toutf8 (utf8str &dst, const ucs4str &src) |
utf8str | u8_toutf8 (const ucs4str &src) |
size_t | u8_wc_len (ucs4 ch) |
size_t | u8_ws_len (const ucs4str &src) |
size_t | u8_wc_toutf8 (utf8str &dst, ucs4 ch) |
utf8str | u8_wc_toutf8 (ucs4 ch) |
size_t | u8_offset (const utf8str &s, int charnum) |
size_t | u8_charnum (const utf8str &s, int offset) |
ucs4 | u8_nextchar (const utf8str &s, size_t *i) |
ucs4 | u8_nextcharn (const utf8str &s, size_t slen, size_t *i) |
size_t | u8_strlen (const utf8str &s) |
void | u8_inc (const utf8str &s, size_t *i) |
void | u8_dec (const utf8str &s, size_t *i) |
int | octal_digit (char c) |
int | hex_digit (char c) |
Variables | |
static const uint32_t | offsetsFromUTF8 [6] |
static const char | trailingBytesForUTF8 [256] |
int u8_seqlen | ( | const utf8str & | s, |
size_t | i | ||
) |
returns length of next utf-8 sequence
References trailingBytesForUTF8.
convert UTF-8 byte string src
to UCS-4 wide character string dst
, without error checking. Data is appended to dst
.
dst | = destination UCS-4 string |
src | = source UTF-8 byte string |
References offsetsFromUTF8, and trailingBytesForUTF8.
Referenced by hex_digit(), and u8_toucs().
convert UTF-8 byte string src
to a new UCS-4 string
References u8_toucs().
convert UCS-4 wide character string to UTF-8 byte string.
dst | = destination UTF-8 string |
src | = source UCS-4 string |
Referenced by u8_toutf8().
convenience wrapper: UCS-4 string -> UTF-8 string
References u8_toutf8().
size_t u8_wc_len | ( | ucs4 | ch | ) |
(moo) get number of bytes required for representing a wide character ch in UTF-8. Returns 0 on error.
Referenced by u8_wc_toutf8(), and u8_ws_len().
size_t u8_ws_len | ( | const ucs4str & | src | ) |
(moo) get number of bytes required for representing a wide character string ws in UTF-8
References u8_wc_len().
append single UCS-4 character to a UTF-8 string
dst | UTF-8 destination buffer |
ch | UCS-4 character to convert |
Referenced by hex_digit(), u8_wc_toutf8(), unescapeCString(), unescapeJsonString(), and unescapeUtf8String().
convience wrapper: UCS-4 char -> UTF-8 string
References u8_wc_len(), and u8_wc_toutf8().
size_t u8_offset | ( | const utf8str & | s, |
int | charnum | ||
) |
(logical) character number to (physical) byte offset
References isutf.
size_t u8_charnum | ( | const utf8str & | s, |
int | offset | ||
) |
(physical) byte offset to (logical) character number
References isutf.
read and return next logical character, updating an index variable
References isutf, and offsetsFromUTF8.
Referenced by hex_digit(), and u8_strlen().
(moo): return next character, updating an index variable which may not exceed length slen
References isutf, and offsetsFromUTF8.
size_t u8_strlen | ( | const utf8str & | s | ) |
count the number of characters in a UTF-8 string
References u8_nextchar().
int octal_digit | ( | char | c | ) |
int hex_digit | ( | char | c | ) |
References isutf, octal_digit(), offsetsFromUTF8, u8_nextchar(), u8_toucs(), u8_wc_toutf8(), and u_int32_t.
|
static |
Referenced by hex_digit(), u8_nextchar(), u8_nextcharn(), and u8_toucs().
|
static |
Referenced by u8_seqlen(), and u8_toucs().