ddc
utf8xx.h
Go to the documentation of this file.
1 //-*- Mode: C++ -*-
2 // DDC originally by Alexey Sokirko
3 // Changes and modifications 2011-2015 by Bryan Jurish
4 //
5 // This file is part of DDC.
6 //
7 // DDC is free software: you can redistribute it and/or modify
8 // it under the terms of the GNU Lesser General Public License as published by
9 // the Free Software Foundation, either version 3 of the License, or
10 // (at your option) any later version.
11 //
12 // DDC is distributed in the hope that it will be useful,
13 // but WITHOUT ANY WARRANTY; without even the implied warranty of
14 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 // GNU Lesser General Public License for more details.
16 //
17 // You should have received a copy of the GNU Lesser General Public License
18 // along with DDC. If not, see <http://www.gnu.org/licenses/>.
19 //
20 #ifndef UTF8XX_H
21 #define UTF8XX_H
22 
23 /*--moo--*/
41 #include <stdint.h>
42 #include <string.h>
43 #include <assert.h>
44 
45 #ifndef u_int32_t
46 # define u_int32_t uint32_t
47 #endif
48 
49 #ifndef ucs4
50 typedef u_int32_t ucs4;
51 #endif
52 
53 #ifndef uchar
54 typedef unsigned char uchar;
55 #endif
56 
57 #define UTF8XX_MAXBYTES 4
58 #define UTF8XX_MAXBYTES1 5
59 /*--/moo--*/
60 
61 #include <stdarg.h>
62 
63 #include <string>
64 #include <vector>
65 
67 #define isutf(c) (((c)&0xC0)!=0x80)
68 
70 typedef std::vector<ucs4> ucs4str;
71 
73 typedef std::string utf8str;
74 
76 int u8_seqlen(const utf8str &s, size_t i);
77 
85 size_t u8_toucs(ucs4str &dst, const utf8str &src);
86 
88 ucs4str u8_toucs(const utf8str &src);
89 
95 size_t u8_toutf8(utf8str &dst, const ucs4str &src);
96 
98 utf8str u8_toutf8(const ucs4str &src);
99 
103 size_t u8_wc_len(ucs4 ch);
104 
106 size_t u8_ws_len(const ucs4str &src);
107 
113 size_t u8_wc_toutf8(utf8str &dst, ucs4 ch);
114 
117 
118 
120 size_t u8_offset(const utf8str &s, int charnum);
121 
123 size_t u8_charnum(const utf8str &s, int offset);
124 //int u8_charnum(char *s, int offset);
125 
127 ucs4 u8_nextchar(const utf8str &s, size_t *i);
128 
130 ucs4 u8_nextcharn(const utf8str &s, size_t slen, size_t *i);
131 
133 size_t u8_strlen(const utf8str &s);
134 
136 void u8_inc(const utf8str &s, size_t *i);
137 
139 void u8_dec(const utf8str &s, size_t *i);
140 
141 #ifdef UTF8XX_C_API
142 //-- NOT YET CONVERTED TO C++ CONVENTIONS
143 
147 int u8_read_escape_sequence(char *src, u_int32_t *dest);
148 
151 int u8_escape_wchar(char *buf, int sz, u_int32_t ch);
152 
154 int u8_unescape(char *buf, int sz, char *src);
155 
159 int u8_escape(char *buf, int sz, char *src, int escape_quotes);
160 
162 int octal_digit(char c);
163 int hex_digit(char c);
164 
167 char *u8_strchr(char *s, u_int32_t ch, int *charn);
168 
171 char *u8_memchr(char *s, u_int32_t ch, size_t sz, int *charn);
172 
173 int u8_is_locale_utf8(char *locale);
174 
178 int u8_vprintf(char *fmt, va_list ap);
179 int u8_printf(char *fmt, ...);
180 
181 #endif /* UTFXX_C_API */
182 
183 #endif /* UTF8XX_H */
184 
185 /*--- emacs style variables ---
186  * Local Variables:
187  * mode: C++
188  * c-file-style: "ellemtel"
189  * c-basic-offset: 4
190  * tab-width: 8
191  * indent-tabs-mode: nil
192  * End:
193  */
size_t u8_wc_len(ucs4 ch)
Definition: CommonLib/utf8xx.cpp:156
int hex_digit(char c)
Definition: CommonLib/utf8xx.cpp:293
size_t u8_ws_len(const ucs4str &src)
Definition: CommonLib/utf8xx.cpp:166
int u8_seqlen(const utf8str &s, size_t i)
Definition: CommonLib/utf8xx.cpp:68
size_t u8_charnum(const utf8str &s, int offset)
Definition: CommonLib/utf8xx.cpp:223
uint32_t ucs4
Definition: utf8xx.h:50
ucs4 u8_nextcharn(const utf8str &s, size_t slen, size_t *i)
Definition: CommonLib/utf8xx.cpp:252
void u8_inc(const utf8str &s, size_t *i)
Definition: CommonLib/utf8xx.cpp:276
size_t u8_offset(const utf8str &s, int charnum)
Definition: CommonLib/utf8xx.cpp:211
ucs4 u8_nextchar(const utf8str &s, size_t *i)
Definition: CommonLib/utf8xx.cpp:236
int octal_digit(char c)
Definition: CommonLib/utf8xx.cpp:288
std::string utf8str
Definition: utf8xx.h:73
#define u_int32_t
Definition: utf8xx.h:46
std::vector< ucs4 > ucs4str
Definition: utf8xx.h:70
void u8_dec(const utf8str &s, size_t *i)
Definition: CommonLib/utf8xx.cpp:282
size_t u8_strlen(const utf8str &s)
Definition: CommonLib/utf8xx.cpp:268
unsigned char uchar
Definition: utf8xx.h:54
size_t u8_toutf8(utf8str &dst, const ucs4str &src)
Definition: CommonLib/utf8xx.cpp:120
size_t u8_wc_toutf8(utf8str &dst, ucs4 ch)
Definition: CommonLib/utf8xx.cpp:175
size_t u8_toucs(ucs4str &dst, const utf8str &src)
Definition: CommonLib/utf8xx.cpp:83