Main Page | Directories | File List

mootRecode.h

Go to the documentation of this file.
00001 /* -*- Mode: C++ -*- */
00002 
00003 /*
00004    libmoot : moocow's part-of-speech tagging library
00005    Copyright (C) 2003-2005 by Bryan Jurish <moocow@ling.uni-potsdam.de>
00006 
00007    This library is free software; you can redistribute it and/or
00008    modify it under the terms of the GNU Lesser General Public
00009    License as published by the Free Software Foundation; either
00010    version 2.1 of the License, or (at your option) any later version.
00011    
00012    This library is distributed in the hope that it will be useful,
00013    but WITHOUT ANY WARRANTY; without even the implied warranty of
00014    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00015    Lesser General Public License for more details.
00016    
00017    You should have received a copy of the GNU Lesser General Public
00018    License along with this library; if not, write to the Free Software
00019    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
00020 */
00021 
00022 /*--------------------------------------------------------------------------
00023  * File: mootRecode.h
00024  * Author: Bryan Jurish <moocow@ling.uni-potsdam.de>
00025  * Description:
00026  *   + moocow's PoS tagger : librecode interface
00027  *--------------------------------------------------------------------------*/
00028 
00029 #ifndef _MOOT_RECODE_H
00030 #define _MOOT_RECODE_H
00031 
00032 #include <mootConfig.h>
00033 
00034 #ifdef MOOT_RECODE_ENABLED
00035 #include <stdbool.h>
00036 #include <recodext.h>
00037 #endif // MOOT_RECODE_ENABLED
00038 
00039 #include <assert.h>
00040 #include <string>
00041 
00042 #include <mootIO.h>
00043 
00044 namespace moot {
00045 
00046 using namespace std;
00047 
00049 class mootRecoder {
00050 public:
00051   /*--------------------------------------------------------------
00052    * DATA
00053    */
00054 #ifdef MOOT_RECODE_ENABLED
00055   RECODE_OUTER   rc_outer;   
00056   RECODE_REQUEST rc_request; 
00057 #else
00058   void *         rc_outer;   
00059   void *         rc_request; 
00060 #endif // MOOT_RECODE_ENABLED
00061   std::string    rc_reqstr;  
00062 
00063 
00064 
00065 public:
00066   /*--------------------------------------------------------------
00067    * METHODS
00068    */
00069   /*----------------------------------------------------*/
00073   mootRecoder(const std::string &requestString="", bool diacritics_only=false)
00074     : rc_outer(NULL), rc_request(NULL)
00075   {
00076     if (!requestString.empty()) scan_request(requestString,diacritics_only);
00077   };
00078 
00080   mootRecoder(const std::string &src, const std::string &dst, bool diacritics_only=false)
00081     : rc_outer(NULL), rc_request(NULL)
00082   {
00083     scan_request(src, dst, diacritics_only);
00084   };
00085 
00087   ~mootRecoder(void)
00088   {
00089 #ifdef MOOT_RECODE_ENABLED
00090     if (rc_request) recode_delete_request(rc_request); 
00091     if (rc_outer) recode_delete_outer(rc_outer);
00092 #endif // MOOT_RECODE_ENABLED
00093   };
00094 
00096   void ensure_outer(void) {
00097 #ifdef MOOT_RECODE_ENABLED
00098     if (!rc_outer) rc_outer = recode_new_outer(true); // (AUTO_ABORT)
00099 #endif // MOOT_RECODE_ENABLED
00100   };
00102 
00103   /*----------------------------------------------------*/
00107   inline void scan_request(const std::string &reqstr, bool diacritics_only=false) {
00108     ensure_outer();
00109     rc_reqstr = reqstr;
00110 #ifdef MOOT_RECODE_ENABLED
00111     if (!rc_request) rc_request = recode_new_request(rc_outer);
00112     assert(rc_request != NULL);
00113     if (!reqstr.empty()) {
00114       rc_request->diacritics_only = diacritics_only;  // this MUST happen before scan_request() !
00115       if (!recode_scan_request(rc_request, reqstr.c_str())) {
00116         fprintf(stderr, "mootRecode::scan_request(): failed to scan request `%s'\n",
00117                 reqstr.c_str());
00118         if (rc_request) {
00119           recode_delete_request(rc_request);
00120           rc_request = NULL;
00121         }
00122       }
00123       return;
00124     }
00125     else if (rc_request) {
00126       recode_delete_request(rc_request);
00127       rc_request = NULL;
00128     }
00129 #else
00130     fprintf(stderr, "mootRecode::scan_request(): librecode disabled! (request=\"%s\"\n",
00131             reqstr.c_str());
00132 #endif // MOOT_RECODE_ENABLED
00133   };
00134 
00136   inline void scan_request(const std::string &src, const std::string &dst, bool diacritics_only=false)
00137   {
00138     if (src.empty() && dst.empty()) scan_request(src,diacritics_only);
00139     else {
00140       std::string reqstr = src;
00141       reqstr.append("..");
00142       reqstr.append(dst);
00143       scan_request(reqstr,diacritics_only);
00144     }
00145   };
00147 
00148   /*----------------------------------------------------*/
00152   inline bool string2file(const char *in, FILE *out)
00153   {
00154 #ifdef MOOT_RECODE_ENABLED
00155     if (rc_request)
00156       return recode_string_to_file(rc_request,in,out);
00157 #endif
00158     fputs(in,out);
00159     return !ferror(out);
00160   };
00161 
00163   inline bool buffer2file(const char *buf, size_t buflen, FILE *out)
00164   {
00165 #ifdef MOOT_RECODE_ENABLED
00166     if (rc_request)
00167       return recode_buffer_to_file(rc_request,buf,buflen,out);
00168 #endif
00169     fwrite(buf,buflen,1,out);
00170     return !ferror(out);
00171   };
00172 
00174   inline bool string2file(const std::string &in, FILE *out)
00175   {
00176 #ifdef MOOT_RECODE_ENABLED
00177     if (rc_request)
00178       return recode_buffer_to_file(rc_request,in.data(),in.size(),out);
00179 #endif
00180     fwrite(in.data(), in.size(), 1, out);
00181     return !ferror(out);
00182   };
00184 
00185   /*----------------------------------------------------*/
00189   inline bool buffer2buffer(const char *in, size_t in_size, char **out, size_t *out_used, size_t *out_alloc)
00190   {
00191 #ifdef MOOT_RECODE_ENABLED
00192     if (rc_request)
00193       return recode_buffer_to_buffer(rc_request, in, in_size, out, out_used, out_alloc);
00194 #endif
00195     if (!*out) *out = (char *)malloc(in_size);
00196     else if (*out_alloc < in_size) {
00197       free(*out);
00198       *out = (char *)malloc(in_size);
00199     }
00200     assert(*out != NULL);
00201     *out_alloc = in_size;
00202     memcpy(*out, in, in_size);
00203     *out_used = in_size;
00204     return true;
00205   };
00206 
00208   inline bool string2buffer(const char *s, char **out, size_t *out_used, size_t *out_alloc)
00209   {
00210     return buffer2buffer(s,strlen(s), out,out_used,out_alloc);
00211   };
00212 
00214   inline bool string2buffer(const std::string &in, char **out, size_t *out_used, size_t *out_alloc)
00215   {
00216     return buffer2buffer(in.data(),in.size(), out,out_used,out_alloc);
00217   };
00219 
00220   /*----------------------------------------------------*/
00224   inline bool buffer2string(const char *in, size_t in_size, std::string &out)
00225   {
00226     char *tmp = NULL;
00227     size_t out_used = 0, out_alloc = 0;
00228     bool rv = buffer2buffer(in,in_size, &tmp, &out_used, &out_alloc);
00229     if (tmp) {
00230       out.append(tmp, out_used);
00231       free(tmp);
00232     }
00233     return rv;
00234   }
00235 
00237   inline bool string2string(const char *s, std::string &out)
00238   {
00239     return buffer2string(s,strlen(s), out);
00240   };
00241 
00243   inline bool string2string(const std::string &in, std::string &out)
00244   {
00245     return buffer2string(in.data(),in.size(), out);
00246   };
00247   /* ... etc. */
00249 
00250   /*----------------------------------------------------*/
00254   inline bool string2mstream(const char *in, mootio::mostream *out)
00255   {
00256     std::string s;
00257     bool rv = string2string(in,s);
00258     return out && out->puts(s) && rv;
00259   };
00260 
00262   inline bool buffer2mstream(const char *buf, size_t buflen, mootio::mostream *out)
00263   {
00264     std::string s;
00265     bool rv = buffer2string(buf,buflen,s);
00266     return out && out->puts(s) && rv;
00267   };
00268 
00270   inline bool string2mstream(const std::string &in, mootio::mostream *out)
00271   {
00272     std::string s;
00273     bool rv = string2string(in,s);
00274     return out && out->puts(s) && rv;
00275   };
00277 
00278 }; //-- /class mootRecoder
00279 
00280 
00281 
00285 class mootXMLRecoder {
00286 public:
00287   /*--------------------------------------------------------------
00288    * DATA
00289    */
00290   bool        standalone;         
00291 
00292   mootRecoder rc1;                
00293   mootRecoder rc2;                
00294 
00295   char       *buf1;               
00296   size_t      buf1_used;          
00297   size_t      buf1_alloc;         
00298 
00299   char       *buf2;               
00300   size_t      buf2_used;          
00301   size_t      buf2_alloc;         
00302 
00303 public:
00304   /*--------------------------------------------------------------
00305    * METHODS
00306    */
00307 
00308   /*----------------------------------------------------*/
00312   mootXMLRecoder(const std::string &src="", const std::string &dst="")
00313     : buf1(NULL), buf1_used(0), buf1_alloc(0),
00314       buf2(NULL), buf2_used(0), buf2_alloc(0)
00315   {
00316     //-- share outer context
00317     rc1.ensure_outer();
00318     rc2.rc_outer = rc1.rc_outer;
00319 
00320     scan_request(src,dst);
00321   };
00322 
00324   ~mootXMLRecoder(void)
00325   {
00326     rc2.rc_outer = NULL; 
00327     if (buf1) free(buf1);
00328     if (buf2) free(buf2);
00329   };
00331 
00332   /*----------------------------------------------------*/
00336   inline void scan_request(const std::string &reqstr)
00337   {
00338     size_t dst_begin    = reqstr.rfind("..");
00339     string src(reqstr, 0, dst_begin);
00340     string dst(reqstr, dst_begin);
00341     standalone = (dst == "XML-standalone" || dst == "h0");
00342     scan_request(src,dst);
00343   };
00344 
00346   inline void scan_request(const std::string &src, const std::string &dst) {
00347     standalone = (dst.empty() || dst == "XML-standalone" || dst == "h0");
00348     if (standalone) {
00349       if (src.empty()) {
00350         rc1.scan_request("",false);
00351         return;
00352       }
00353       rc1.scan_request(src,dst,false);
00354     } else {
00355       rc1.scan_request(src,"HTML_4.0",false);
00356       rc2.scan_request("HTML_4.0",dst,true);
00357     }
00358   };
00360 
00361   /*----------------------------------------------------*/
00365   inline bool buffer2file(const char *in, size_t in_size, FILE *out)
00366   {
00367     if (standalone) return rc1.buffer2file(in,in_size, out);
00368     //-- temp bools are ugly, but librecode seems to be returning weird...
00369     bool rv1 = rc1.buffer2buffer(in,in_size, &buf1,&buf1_used,&buf1_alloc);
00370     bool rv2 = rc2.buffer2file(buf1,buf1_used, out);
00371     return rv1 && rv2;
00372   };
00373 
00375   inline bool string2file(const char *in, FILE *out)
00376   {
00377     return buffer2file(in,strlen(in),out);
00378   };
00379 
00381   inline bool string2file(const std::string &in, FILE *out)
00382   {
00383     return buffer2file(in.data(),in.size(), out);
00384   };
00385 
00386   /*----------------------------------------------------*/
00390   inline bool buffer2buffer(const char *in, size_t in_size, char **out, size_t *out_used, size_t *out_alloc)
00391   {
00392     if (standalone) return rc1.buffer2buffer(in,in_size, out,out_used,out_alloc);
00393     //-- temp bools are ugly, but librecode is returning weird...
00394     bool rv1 = rc1.buffer2buffer(in,in_size, &buf1,&buf1_used,&buf1_alloc);
00395     bool rv2 = rc2.buffer2buffer(buf1,buf1_used, out,out_used,out_alloc);
00396     return rv1 && rv2;
00397   };
00398 
00400   inline bool string2buffer(const char *s, char **out, size_t *out_used, size_t *out_alloc)
00401   {
00402     return buffer2buffer(s,strlen(s), out,out_used,out_alloc);
00403   };
00404 
00406   inline bool string2buffer(const std::string &in, char **out, size_t *out_used, size_t *out_alloc)
00407   {
00408     return buffer2buffer(in.data(),in.size(), out,out_used,out_alloc);
00409   };
00411 
00412   /*----------------------------------------------------*/
00416   inline bool buffer2string(const char *in, size_t in_size, std::string &out)
00417   {
00418     if (standalone) {
00419       //-- re-use internal buffer
00420       bool rv1 = rc1.buffer2buffer(in,in_size, &buf1,&buf1_used,&buf1_alloc);
00421       if (buf1) out.append(buf1,buf1_used);
00422       return rv1;
00423     }
00424     //-- temp bools are ugly, but librecode seems to be returning weird...
00425     bool rv1 = rc1.buffer2buffer(in,in_size, &buf1,&buf1_used,&buf1_alloc);
00426     bool rv2 = rc2.buffer2string(buf1,buf1_used, out);
00427     return rv1 && rv2;
00428   };
00429 
00431   inline bool string2string(const char *s, std::string &out)
00432   {
00433     return buffer2string(s,strlen(s), out);
00434   };
00435 
00437   inline bool string2string(const std::string &in, std::string &out)
00438   {
00439     return buffer2string(in.data(),in.size(), out);
00440   };
00441   /* ... etc. */
00443 
00444   /*----------------------------------------------------*/
00448   inline bool buffer2mstream(const char *in, size_t in_size, mootio::mostream *out)
00449   {
00450     if (standalone) {
00451       //-- re-use internal buffer1
00452       bool rv1 = rc1.buffer2buffer(in,in_size, &buf1,&buf1_used,&buf1_alloc);
00453       if (buf1) return 
00454                   out && (out->write(buf1,buf1_used) == buf1_used) && rv1;
00455     }
00456     //-- temp bools are ugly, but librecode seems to be returning weird...
00457     bool rv1 = rc1.buffer2buffer(in,in_size, &buf1,&buf1_used,&buf1_alloc);
00458     bool rv2 = rc2.buffer2buffer(buf1,buf1_used, &buf2,&buf2_used,&buf2_alloc);
00459     return
00460       out && (out->write(buf2,buf2_used) == buf2_used) && rv1 && rv2;
00461   };
00462 
00464   inline bool string2mstream(const char *s, mootio::mostream *out)
00465   {
00466     return buffer2mstream(s,strlen(s), out);
00467   };
00468 
00470   inline bool string2mstream(const std::string &in, mootio::mostream *out)
00471   {
00472     return buffer2mstream(in.data(),in.size(), out);
00473   };
00474   /* ... etc. */
00476 
00477 }; //-- /class mootXMLRecoder
00478 
00479 
00480 
00481 }; //-- /namespace moot
00482 
00483 #endif //_MOOT_RECODE_H
00484 

Generated on Sat Sep 17 01:20:33 2005 for libmoot by  doxygen 1.4.4