Main Page   Namespace List   Class Hierarchy   Alphabetical List   Compound List   File List   Namespace Members   Compound Members   File Members  

mootRecode.h

Go to the documentation of this file.
00001 /* -*- Mode: C++ -*- */
00002 
00003 /*
00004    libmoot : moocow's part-of-speech tagging library
00005    Copyright (C) 2003-2004 by Bryan Jurish <moocow@ling.uni-potsdam.de>
00006 
00007    This program is free software; you can redistribute it and/or modify
00008    it under the terms of the GNU General Public License as published by
00009    the Free Software Foundation; either version 2 of the License, or
00010    (at your option) any later version.
00011 
00012    This program is distributed in the hope that it will be useful,
00013    but WITHOUT ANY WARRANTY; without even the implied warranty of
00014    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00015    GNU General Public License for more details.
00016 
00017    You should have received a copy of the GNU General Public License
00018    along with this program; if not, write to the Free Software
00019    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
00020 */
00021 
00022 /*--------------------------------------------------------------------------
00023  * File: mootRecode.h
00024  * Author: Bryan Jurish <moocow@ling.uni-potsdam.de>
00025  * Description:
00026  *   + moocow's PoS tagger : librecode interface
00027  *--------------------------------------------------------------------------*/
00028 
00029 #ifndef _MOOT_RECODE_H
00030 #define _MOOT_RECODE_H
00031 
00032 #include <mootConfig.h>
00033 
00034 #ifdef MOOT_RECODE_ENABLED
00035 #include <stdbool.h>
00036 #include <recodext.h>
00037 #endif // MOOT_RECODE_ENABLED
00038 
00039 #include <assert.h>
00040 #include <string>
00041 
00042 #include <mootIO.h>
00043 
00044 namespace moot {
00045 
00046 using namespace std;
00047 
00049 class mootRecoder {
00050 public:
00051   /*--------------------------------------------------------------
00052    * DATA
00053    */
00054 #ifdef MOOT_RECODE_ENABLED
00055   RECODE_OUTER   rc_outer;   
00056   RECODE_REQUEST rc_request; 
00057 #else
00058   void *         rc_outer;   
00059   void *         rc_request; 
00060 #endif // MOOT_RECODE_ENABLED
00061   std::string    rc_reqstr;  
00062 
00063 
00064 
00065 public:
00066   /*--------------------------------------------------------------
00067    * METHODS
00068    */
00069   /*----------------------------------------------------*/
00073   mootRecoder(const std::string &requestString="", bool diacritics_only=false)
00074     : rc_outer(NULL), rc_request(NULL)
00075   {
00076     if (!requestString.empty()) scan_request(requestString,diacritics_only);
00077   };
00078 
00080   mootRecoder(const std::string &src, const std::string &dst, bool diacritics_only=false)
00081     : rc_outer(NULL), rc_request(NULL)
00082   {
00083     scan_request(src, dst, diacritics_only);
00084   };
00085 
00087   ~mootRecoder(void)
00088   {
00089 #ifdef MOOT_RECODE_ENABLED
00090     if (rc_request) recode_delete_request(rc_request); 
00091     if (rc_outer) recode_delete_outer(rc_outer);
00092 #endif // MOOT_RECODE_ENABLED
00093   };
00094 
00096   void ensure_outer(void) {
00097 #ifdef MOOT_RECODE_ENABLED
00098     if (!rc_outer) rc_outer = recode_new_outer(true); // (AUTO_ABORT)
00099 #endif // MOOT_RECODE_ENABLED
00100   };
00102 
00103   /*----------------------------------------------------*/
00107   inline void scan_request(const std::string &reqstr, bool diacritics_only=false) {
00108     ensure_outer();
00109     rc_reqstr = reqstr;
00110 #ifdef MOOT_RECODE_ENABLED
00111     if (!rc_request) rc_request = recode_new_request(rc_outer);
00112     assert(rc_request != NULL);
00113     if (!reqstr.empty()) {
00114       rc_request->diacritics_only = diacritics_only;  // this MUST happen before scan_request() !
00115       if (!recode_scan_request(rc_request, reqstr.c_str())) {
00116         fprintf(stderr, "mootRecode::scan_request(): failed to scan request `%s'\n",
00117                 reqstr.c_str());
00118         if (rc_request) {
00119           recode_delete_request(rc_request);
00120           rc_request = NULL;
00121         }
00122       }
00123       return;
00124     }
00125     else if (rc_request) {
00126       recode_delete_request(rc_request);
00127       rc_request = NULL;
00128     }
00129 #else
00130     fprintf(stderr, "mootRecode::scan_request(): librecode disabled! (request=\"%s\"\n",
00131             reqstr.c_str());
00132 #endif // MOOT_RECODE_ENABLED
00133   };
00134 
00136   inline void scan_request(const std::string &src, const std::string &dst, bool diacritics_only=false)
00137   {
00138     if (src.empty() && dst.empty()) scan_request(src,diacritics_only);
00139     else {
00140       std::string reqstr = src;
00141       reqstr.append("..");
00142       reqstr.append(dst);
00143       scan_request(reqstr,diacritics_only);
00144     }
00145   };
00147 
00148   /*----------------------------------------------------*/
00152   inline bool string2file(const char *in, FILE *out)
00153   {
00154 #ifdef MOOT_RECODE_ENABLED
00155     if (rc_request)
00156       return recode_string_to_file(rc_request,in,out);
00157 #endif
00158     fputs(in,out);
00159     return !ferror(out);
00160   };
00161 
00163   inline bool buffer2file(const char *buf, size_t buflen, FILE *out)
00164   {
00165 #ifdef MOOT_RECODE_ENABLED
00166     if (rc_request)
00167       return recode_buffer_to_file(rc_request,buf,buflen,out);
00168 #endif
00169     fwrite(buf,buflen,1,out);
00170     return !ferror(out);
00171   };
00172 
00174   inline bool string2file(const std::string &in, FILE *out)
00175   {
00176 #ifdef MOOT_RECODE_ENABLED
00177     if (rc_request)
00178       return recode_buffer_to_file(rc_request,in.data(),in.size(),out);
00179 #endif
00180     fwrite(in.data(), in.size(), 1, out);
00181     return !ferror(out);
00182   };
00184 
00185   /*----------------------------------------------------*/
00189   inline bool buffer2buffer(const char *in, size_t in_size, char **out, size_t *out_used, size_t *out_alloc)
00190   {
00191 #ifdef MOOT_RECODE_ENABLED
00192     if (rc_request)
00193       return recode_buffer_to_buffer(rc_request, in, in_size, out, out_used, out_alloc);
00194 #endif
00195     if (!*out) *out = (char *)malloc(in_size);
00196     else if (*out_alloc < in_size) {
00197       free(*out);
00198       *out = (char *)malloc(in_size);
00199     }
00200     assert(*out != NULL);
00201     *out_alloc = in_size;
00202     memcpy(*out, in, in_size);
00203     *out_used = in_size;
00204     return true;
00205   };
00206 
00208   inline bool string2buffer(const char *s, char **out, size_t *out_used, size_t *out_alloc)
00209   {
00210     return buffer2buffer(s,strlen(s), out,out_used,out_alloc);
00211   };
00212 
00214   inline bool string2buffer(const std::string &in, char **out, size_t *out_used, size_t *out_alloc)
00215   {
00216     return buffer2buffer(in.data(),in.size(), out,out_used,out_alloc);
00217   };
00219 
00220   /*----------------------------------------------------*/
00224   inline bool buffer2string(const char *in, size_t in_size, std::string &out)
00225   {
00226     char *tmp = NULL;
00227     size_t out_used = 0, out_alloc = 0;
00228     bool rv = buffer2buffer(in,in_size, &tmp, &out_used, &out_alloc);
00229     if (tmp) {
00230       out.append(tmp, out_used);
00231       free(tmp);
00232     }
00233     return rv;
00234   }
00235 
00237   inline bool string2string(const char *s, std::string &out)
00238   {
00239     return buffer2string(s,strlen(s), out);
00240   };
00241 
00243   inline bool string2string(const std::string &in, std::string &out)
00244   {
00245     return buffer2string(in.data(),in.size(), out);
00246   };
00247   /* ... etc. */
00249 
00250   /*----------------------------------------------------*/
00254   inline bool string2mstream(const char *in, mootio::mostream *out)
00255   {
00256     std::string s;
00257     bool rv = string2string(in,s);
00258     return out && out->puts(s) && rv;
00259   };
00260 
00262   inline bool buffer2mstream(const char *buf, size_t buflen, mootio::mostream *out)
00263   {
00264     std::string s;
00265     bool rv = buffer2string(buf,buflen,s);
00266     return out && out->puts(s) && rv;
00267   };
00268 
00270   inline bool string2mstream(const std::string &in, mootio::mostream *out)
00271   {
00272     std::string s;
00273     bool rv = string2string(in,s);
00274     return out && out->puts(s) && rv;
00275   };
00277 
00278 }; //-- /class mootRecoder
00279 
00280 
00281 
00285 class mootXMLRecoder {
00286 public:
00287   /*--------------------------------------------------------------
00288    * DATA
00289    */
00290   bool        standalone;         
00291 
00292   mootRecoder rc1;                
00293   mootRecoder rc2;                
00294 
00295   char       *buf1;               
00296   size_t      buf1_used;          
00297   size_t      buf1_alloc;         
00298 
00299   char       *buf2;               
00300   size_t      buf2_used;          
00301   size_t      buf2_alloc;         
00302 
00303 public:
00304   /*--------------------------------------------------------------
00305    * METHODS
00306    */
00307 
00308   /*----------------------------------------------------*/
00312   mootXMLRecoder(const std::string &src="", const std::string &dst="")
00313     : buf1(NULL), buf1_used(0), buf1_alloc(0),
00314       buf2(NULL), buf2_used(0), buf2_alloc(0)
00315   {
00316     //-- share outer context
00317     rc1.ensure_outer();
00318     rc2.rc_outer = rc1.rc_outer;
00319 
00320     scan_request(src,dst);
00321   };
00322 
00324   ~mootXMLRecoder(void)
00325   {
00326     rc2.rc_outer = NULL; 
00327     if (buf1) free(buf1);
00328     if (buf2) free(buf2);
00329   };
00331 
00332   /*----------------------------------------------------*/
00336   inline void scan_request(const std::string &reqstr)
00337   {
00338     size_t dst_begin    = reqstr.rfind("..");
00339     string src(reqstr, 0, dst_begin);
00340     string dst(reqstr, dst_begin);
00341     standalone = (dst == "XML-standalone" || dst == "h0");
00342     scan_request(src,dst);
00343   };
00344 
00346   inline void scan_request(const std::string &src, const std::string &dst) {
00347     standalone = (dst.empty() || dst == "XML-standalone" || dst == "h0");
00348     if (standalone) {
00349       if (src.empty()) {
00350         rc1.scan_request("",false);
00351         return;
00352       }
00353       rc1.scan_request(src,dst,false);
00354     } else {
00355       rc1.scan_request(src,"HTML_4.0",false);
00356       rc2.scan_request("HTML_4.0",dst,true);
00357     }
00358   };
00360 
00361   /*----------------------------------------------------*/
00365   inline bool buffer2file(const char *in, size_t in_size, FILE *out)
00366   {
00367     if (standalone) return rc1.buffer2file(in,in_size, out);
00368     //-- temp bools are ugly, but librecode seems to be returning weird...
00369     bool rv1 = rc1.buffer2buffer(in,in_size, &buf1,&buf1_used,&buf1_alloc);
00370     bool rv2 = rc2.buffer2file(buf1,buf1_used, out);
00371     return rv1 && rv2;
00372   };
00373 
00375   inline bool string2file(const char *in, FILE *out)
00376   {
00377     return buffer2file(in,strlen(in),out);
00378   };
00379 
00381   inline bool string2file(const std::string &in, FILE *out)
00382   {
00383     return buffer2file(in.data(),in.size(), out);
00384   };
00385 
00386   /*----------------------------------------------------*/
00390   inline bool buffer2buffer(const char *in, size_t in_size, char **out, size_t *out_used, size_t *out_alloc)
00391   {
00392     if (standalone) return rc1.buffer2buffer(in,in_size, out,out_used,out_alloc);
00393     //-- temp bools are ugly, but librecode is returning weird...
00394     bool rv1 = rc1.buffer2buffer(in,in_size, &buf1,&buf1_used,&buf1_alloc);
00395     bool rv2 = rc2.buffer2buffer(buf1,buf1_used, out,out_used,out_alloc);
00396     return rv1 && rv2;
00397   };
00398 
00400   inline bool string2buffer(const char *s, char **out, size_t *out_used, size_t *out_alloc)
00401   {
00402     return buffer2buffer(s,strlen(s), out,out_used,out_alloc);
00403   };
00404 
00406   inline bool string2buffer(const std::string &in, char **out, size_t *out_used, size_t *out_alloc)
00407   {
00408     return buffer2buffer(in.data(),in.size(), out,out_used,out_alloc);
00409   };
00411 
00412   /*----------------------------------------------------*/
00416   inline bool buffer2string(const char *in, size_t in_size, std::string &out)
00417   {
00418     if (standalone) {
00419       //-- re-use internal buffer
00420       bool rv1 = rc1.buffer2buffer(in,in_size, &buf1,&buf1_used,&buf1_alloc);
00421       if (buf1) out.append(buf1,buf1_used);
00422       return rv1;
00423     }
00424     //-- temp bools are ugly, but librecode seems to be returning weird...
00425     bool rv1 = rc1.buffer2buffer(in,in_size, &buf1,&buf1_used,&buf1_alloc);
00426     bool rv2 = rc2.buffer2string(buf1,buf1_used, out);
00427     return rv1 && rv2;
00428   };
00429 
00431   inline bool string2string(const char *s, std::string &out)
00432   {
00433     return buffer2string(s,strlen(s), out);
00434   };
00435 
00437   inline bool string2string(const std::string &in, std::string &out)
00438   {
00439     return buffer2string(in.data(),in.size(), out);
00440   };
00441   /* ... etc. */
00443 
00444   /*----------------------------------------------------*/
00448   inline bool buffer2mstream(const char *in, size_t in_size, mootio::mostream *out)
00449   {
00450     if (standalone) {
00451       //-- re-use internal buffer1
00452       bool rv1 = rc1.buffer2buffer(in,in_size, &buf1,&buf1_used,&buf1_alloc);
00453       if (buf1) return 
00454                   out && (out->write(buf1,buf1_used) == buf1_used) && rv1;
00455     }
00456     //-- temp bools are ugly, but librecode seems to be returning weird...
00457     bool rv1 = rc1.buffer2buffer(in,in_size, &buf1,&buf1_used,&buf1_alloc);
00458     bool rv2 = rc2.buffer2buffer(buf1,buf1_used, &buf2,&buf2_used,&buf2_alloc);
00459     return
00460       out && (out->write(buf2,buf2_used) == buf2_used) && rv1 && rv2;
00461   };
00462 
00464   inline bool string2mstream(const char *s, mootio::mostream *out)
00465   {
00466     return buffer2mstream(s,strlen(s), out);
00467   };
00468 
00470   inline bool string2mstream(const std::string &in, mootio::mostream *out)
00471   {
00472     return buffer2mstream(in.data(),in.size(), out);
00473   };
00474   /* ... etc. */
00476 
00477 }; //-- /class mootXMLRecoder
00478 
00479 
00480 
00481 }; //-- /namespace moot
00482 
00483 #endif //_MOOT_RECODE_H
00484 

Generated on Wed Jul 28 15:48:03 2004 for libmoot by doxygen1.2.15