ddc
ddcIconv.h
Go to the documentation of this file.
1 /* -*- Mode: C++ -*- */
2 // DDC originally by Alexey Sokirko
3 // Changes and modifications 2011-2015 by Bryan Jurish
4 //
5 // This file is part of DDC.
6 //
7 // DDC is free software: you can redistribute it and/or modify
8 // it under the terms of the GNU Lesser General Public License as published by
9 // the Free Software Foundation, either version 3 of the License, or
10 // (at your option) any later version.
11 //
12 // DDC is distributed in the hope that it will be useful,
13 // but WITHOUT ANY WARRANTY; without even the implied warranty of
14 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 // GNU Lesser General Public License for more details.
16 //
17 // You should have received a copy of the GNU Lesser General Public License
18 // along with DDC. If not, see <http://www.gnu.org/licenses/>.
19 //
20 
21 /*--------------------------------------------------------------------------
22  * File: ddcIconv.h
23  * Author: Bryan Jurish <jurish@uni-potsdam.de>
24  * Description: iconv interface
25  *--------------------------------------------------------------------------*/
26 
32 #ifndef _DDC_ICONV_H
33 #define _DDC_ICONV_H
34 
35 #include <stdlib.h>
36 #include <string.h>
37 #include <errno.h>
38 #include <string>
39 
40 #include <iconv.h>
41 
42 using namespace std;
43 
45 #define DDC_ICONV_BUFLEN 256
46 
48 #define DDC_ICONV_PTHREAD 1
49 
50 #ifdef DDC_ICONV_PTHREAD
51 # include <pthread.h>
52 # define PTOP(x) x
53 #else
54 # define PTOP(x)
55 #endif
56 
58 class ddcIconv {
59 public:
60  //--------------------------------------------------------------
62 
63  string enc_src;
64  string enc_dst;
65 
66 
67 public:
68  //--------------------------------------------------------------
70 
71  iconv_t ic;
72 
73  char ibuf[DDC_ICONV_BUFLEN];
74  char obuf[DDC_ICONV_BUFLEN];
75 
76  PTOP(pthread_mutex_t mut;)
77 
78 
79  public:
80  //--------------------------------------------------------------
82 
83 
84  ddcIconv(const std::string &srcEncoding="", const std::string &dstEncoding="")
85  : enc_src(""),
86  enc_dst(""),
87  ic((iconv_t)-1)
88  {
89  PTOP(pthread_mutex_init(&mut,NULL);)
90  if (!open(srcEncoding,dstEncoding))
91  close();
92  };
93 
95  ~ddcIconv(void)
96  {
97  close();
98  PTOP(pthread_mutex_destroy(&mut);)
99  };
101 
102  //--------------------------------------------------------------
104 
105 
107  inline bool empty(void) const
108  {
109  return (enc_src.empty() || enc_src=="-" || enc_dst.empty() || enc_dst=="-");
110  };
111 
113  inline bool valid(void) const
114  {
115  return ic != (iconv_t)-1;
116  };
117 
119  inline bool good(void) const
120  {
121  return empty() || valid();
122  };
123 
128  inline bool open(const std::string &srcEncoding, const std::string dstEncoding)
129  {
130  close();
131  PTOP(pthread_mutex_lock(&mut);)
132  enc_src = srcEncoding;
133  enc_dst = dstEncoding;
134  if (!empty())
135  ic = iconv_open(enc_dst.c_str(), enc_src.c_str());
136  PTOP(pthread_mutex_unlock(&mut);)
137  return valid();
138  };
139 
141  inline bool close(void) {
142  if (!empty() && valid() && iconv_close(ic)==-1) return false;
143  PTOP(pthread_mutex_lock(&mut);)
144  enc_src.clear();
145  enc_dst.clear();
146  ic = (iconv_t)-1;
147  PTOP(pthread_mutex_unlock(&mut);)
148  return true;
149  };
150 
152  inline void reset(void) {
153  if (!empty() && valid())
154  iconv(ic, NULL,NULL, NULL,NULL);
155  };
157 
158  //--------------------------------------------------------------
160 
161 
162  inline bool convert(const char *idata, const size_t ilen, string &out)
163  {
164  if (!idata || !*idata) {
165  out.clear();
166  return true;
167  }
168  size_t itodo=ilen;
169 
170  PTOP(pthread_mutex_lock(&mut);)
171  out.clear();
172  out.reserve(ilen);
173  reset();
174 
175  while (itodo > 0) {
176  size_t ileft0 = itodo > DDC_ICONV_BUFLEN ? DDC_ICONV_BUFLEN : itodo;
177  size_t oleft = DDC_ICONV_BUFLEN, ileft=ileft0;
178  char *iptr=ibuf, *optr=obuf;
179 
180  //-- copy next chunk of input data to buffer
181  memcpy(ibuf, idata+(ilen-itodo), ileft);
182 
183  //-- underlying iconv call
184  size_t nx = iconv(ic, &iptr, &ileft, &optr, &oleft);
185  if (nx==(size_t)-1) {
186  //-- exception
187  switch (errno) {
188  case E2BIG: //-- not enough space in output buffer
189  out.append(obuf,DDC_ICONV_BUFLEN-oleft);
190  break;
191  case EINVAL: //-- incomplete byte sequence at end of input buffer
192  if (itodo-ileft0) {
193  //-- we really do have some data left: keep going
194  out.append(obuf,DDC_ICONV_BUFLEN-oleft);
195  break;
196  }
197  case EILSEQ: //-- invalid byte sequence in input
198  out.append(obuf,DDC_ICONV_BUFLEN-oleft);
199  if (ileft) {
200  out.push_back(*iptr);
201  ileft--;
202  }
203  reset();
204  break;
205  case EBADF: //-- invalid converter object
206  default:
207  PTOP(pthread_mutex_unlock(&mut);)
208  return false;
209  break;
210  }
211  } else {
212  //-- the usual
213  out.append(obuf,DDC_ICONV_BUFLEN-oleft);
214  }
215  itodo -= (ileft0-ileft);
216  }
217 
218  PTOP(pthread_mutex_unlock(&mut);)
219  return true;
220  };
221 
223  inline bool convert(const string &in, string &out)
224  {
225  if (empty()) {
226  out = in;
227  return true;
228  }
229  return convert(in.data(), in.size(), out);
230  };
231 
233  inline std::string convert(const std::string &in)
234  {
235  std::string out("");
236  convert(in,out);
237  return out;
238  };
240 
241 }; //-- /class ddcIconv
242 
243 #undef DDC_ICONV_BUFLEN
244 #undef PTOP
245 
246 #endif //_DDC_ICONV_H
247 
248 /*--- emacs style variables ---
249  * Local Variables:
250  * mode: C++
251  * c-file-style: "ellemtel"
252  * c-basic-offset: 4
253  * tab-width: 8
254  * indent-tabs-mode: nil
255  * End:
256  */
Interface to iconv.h character-conversion routines.
Definition: ddcIconv.h:58
bool empty(void) const
Definition: ddcIconv.h:107
~ddcIconv(void)
Definition: ddcIconv.h:95
bool convert(const char *idata, const size_t ilen, string &out)
Definition: ddcIconv.h:162
bool good(void) const
Definition: ddcIconv.h:119
bool valid(void) const
Definition: ddcIconv.h:113
#define DDC_ICONV_BUFLEN
default buffer length (in bytes) for iconv conversions
Definition: ddcIconv.h:45
string enc_src
source encoding
Definition: ddcIconv.h:63
void reset(void)
Definition: ddcIconv.h:152
bool convert(const string &in, string &out)
Definition: ddcIconv.h:223
#define PTOP(x)
Definition: ddcIconv.h:52
bool open(const std::string &srcEncoding, const std::string dstEncoding)
Definition: ddcIconv.h:128
string enc_dst
destination encoding
Definition: ddcIconv.h:64
bool close(void)
Definition: ddcIconv.h:141
iconv_t ic
underlying iconv object
Definition: ddcIconv.h:71
ddcIconv(const std::string &srcEncoding="", const std::string &dstEncoding="")
Definition: ddcIconv.h:84
std::string convert(const std::string &in)
Definition: ddcIconv.h:233