Main Page   Namespace List   Class Hierarchy   Alphabetical List   Compound List   File List   Namespace Members   Compound Members   File Members  

mootBinIO.h

Go to the documentation of this file.
00001 /* -*- Mode: C++ -*- */
00002 
00003 /*
00004    libmoot : moocow's part-of-speech tagging library
00005    Copyright (C) 2003-2005 by Bryan Jurish <moocow@ling.uni-potsdam.de>
00006 
00007    This library is free software; you can redistribute it and/or
00008    modify it under the terms of the GNU Lesser General Public
00009    License as published by the Free Software Foundation; either
00010    version 2.1 of the License, or (at your option) any later version.
00011    
00012    This library is distributed in the hope that it will be useful,
00013    but WITHOUT ANY WARRANTY; without even the implied warranty of
00014    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00015    Lesser General Public License for more details.
00016    
00017    You should have received a copy of the GNU Lesser General Public
00018    License along with this library; if not, write to the Free Software
00019    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
00020 */
00021 
00022 /*--------------------------------------------------------------------------
00023  * File: mootBinIO.h
00024  * Author: Bryan Jurish <moocow@ling.uni-potsdam.de>
00025  * Description:
00026  *   + moot PoS tagger : abstract templates for binary librarians
00027  *--------------------------------------------------------------------------*/
00028 
00029 #ifndef _MOOT_BINIO_H
00030 #define _MOOT_BINIO_H
00031 
00032 #include <stdlib.h>
00033 
00034 #include <vector>
00035 #include <string>
00036 #include <map>
00037 #include <set>
00038 
00039 #include <mootTypes.h>
00040 #include <mootEnum.h>
00041 #include <mootIO.h>
00042 #include <mootHMM.h>
00043 #include <mootAssocVector.h>
00044 #include <mootSuffixTrie.h>
00045 
00047 namespace mootBinIO {
00048   using namespace std;
00049   using namespace moot;
00050   using namespace mootio;
00051 
00052   /*------------------------------------------------------------
00053    * Generic items
00054    */
00056   template<class T> class Item {
00057   public:
00059     inline bool load(mootio::mistream *is, T &x) const
00060     {
00061       return is->read((char *)&x, sizeof(T)) == sizeof(T);
00062     };
00063 
00065     inline bool save(mootio::mostream *os, const T &x) const
00066     {
00067       return os->write((char *)&x, sizeof(T));
00068     };
00069 
00076     inline bool load_n(mootio::mistream *is, T *&x, size_t &n) const {
00077       //-- get saved size
00078       Item<size_t> size_item;
00079       size_t saved_size;
00080       if (!size_item.load(is, saved_size)) return false;
00081 
00082       //-- re-allocate if necessary
00083       if (saved_size > n) {
00084         if (x) free(x);
00085         x = (T *)malloc(saved_size*sizeof(T));
00086         if (!x) {
00087           n = 0;
00088           return false;
00089         }
00090       }
00091 
00092       //-- read in items
00093       ByteCount wanted = sizeof(T)*saved_size;
00094       if (is->read((char *)x, wanted) != wanted) return false;
00095       n=saved_size;
00096       return true;
00097     };
00098 
00104     inline bool save_n(mootio::mostream *os, const T *x, size_t n) const {
00105       //-- get saved size
00106       Item<size_t> size_item;
00107       if (!size_item.save(os, n)) return false;
00108 
00109       //-- save items
00110       return os->write((char *)x, n*sizeof(T));
00111     };
00112   };
00113 
00114   /*------------------------------------------------------------
00115    * C-strings
00116    */
00121   template<> class Item<char *> {
00122   public:
00123     Item<char> charItem;
00124 
00125   public:
00126     inline bool load(mootio::mistream *is, char *&x) const
00127     {
00128       size_t len=0;
00129       return charItem.load_n(is,x,len);
00130     };
00131  
00132     inline bool save(mootio::mostream *os, const char *x) const
00133     {
00134       if (x) {
00135         size_t len = strlen(x)+1;
00136         return charItem.save_n(os,x,len);
00137       } else {
00138         return charItem.save_n(os,"",1);
00139       }
00140     };
00141   };
00142 
00143   /*------------------------------------------------------------
00144    * C++ strings
00145    */
00150   template<> class Item<string> {
00151   public:
00152     Item<char> charItem;
00153   public:
00154     inline bool load(mootio::mistream *is, string &x) const
00155     {
00156       char *buf=NULL;
00157       size_t len=0;
00158       bool rc = charItem.load_n(is,buf,len);
00159       if (rc && len) x.assign(buf,len);
00160       if (buf) free(buf);
00161       return rc;
00162     };
00163 
00164     inline bool save(mootio::mostream *os, const string &x) const
00165     {
00166       return charItem.save_n(os,x.data(),x.size());
00167     };
00168   };
00169 
00170   /*------------------------------------------------------------
00171    * STL: vectors
00172    */
00174   template<class ValT> class Item<vector<ValT> > {
00175   public:
00176     Item<ValT> val_item;
00177   public:
00178     inline bool load(mootio::mistream *is, vector<ValT> &x) const
00179     {
00180       //-- get saved size
00181       Item<size_t> size_item;
00182       size_t len;
00183       if (!size_item.load(is, len)) return false;
00184 
00185       //-- resize
00186       x.clear();
00187       x.reserve(len);
00188 
00189       //-- read in items
00190       for ( ; len > 0; len--) {
00191         x.push_back(ValT());
00192         if (!val_item.load(is,x.back())) return false;
00193       }
00194       return len==0;
00195     };
00196 
00197     inline bool save(mootio::mostream *os, const vector<ValT> &x) const
00198     {
00199       //-- save size
00200       Item<size_t> size_item;
00201       if (!size_item.save(os, x.size())) return false;
00202 
00203       //-- save items
00204       for (typename vector<ValT>::const_iterator xi = x.begin(); xi != x.end(); xi++) {
00205         if (!val_item.save(os,*xi)) return false;
00206       }
00207       return true;
00208     };
00209   };
00210 
00211 
00212   /*------------------------------------------------------------
00213    * STL: set<>
00214    */
00216   template<class ValT> class Item<set<ValT> > {
00217   public:
00218     Item<ValT> val_item;
00219   public:
00220     inline bool load(mootio::mistream *is, set<ValT> &x) const
00221     {
00222       //-- load size
00223       Item<size_t> size_item;
00224       size_t len;
00225       if (!size_item.load(is, len)) return false;
00226 
00227       //-- clear
00228       x.clear();
00229 
00230       //-- read items
00231       ValT tmp;
00232       for ( ; len > 0; len--) {
00233         if (!val_item.load(is,tmp))
00234           return false;
00235         x.insert(tmp);
00236       }
00237       return len==0;
00238     };
00239 
00240     inline bool save(mootio::mostream *os, const set<ValT> &x) const
00241     {
00242       //-- save size
00243       Item<size_t> size_item;
00244       if (!size_item.save(os, x.size())) return false;
00245 
00246       //-- save items
00247       for (typename set<ValT>::const_iterator xi = x.begin(); xi != x.end(); xi++) {
00248         if (!val_item.save(os,*xi)) return false;
00249       }
00250       return true;
00251     };
00252   };
00253 
00254   /*------------------------------------------------------------
00255    * STL: hash_set<>
00256    */
00258   template<class ValT> class Item<hash_set<ValT> > {
00259   public:
00260     Item<ValT> val_item;
00261   public:
00262     inline bool load(mootio::mistream *is, hash_set<ValT> &x) const
00263     {
00264       //-- load size
00265       Item<size_t> size_item;
00266       size_t len;
00267       if (!size_item.load(is, len)) return false;
00268 
00269       //-- clear & resize
00270       x.clear();
00271       x.resize(len);
00272 
00273       //-- read items
00274       ValT tmp;
00275       for ( ; len > 0; len--) {
00276         if (!val_item.load(is,tmp)) return false;
00277         x.insert(tmp);
00278       }
00279       return len==0;
00280     };
00281 
00282     inline bool save(mootio::mostream *os, const hash_set<ValT> &x) const
00283     {
00284       //-- save size
00285       Item<size_t> size_item;
00286       if (!size_item.save(os, x.size())) return false;
00287 
00288       //-- save items
00289       for (typename hash_set<ValT>::const_iterator xi = x.begin(); xi != x.end(); xi++) {
00290         if (!val_item.save(os,*xi)) return false;
00291       }
00292       return true;
00293     };
00294   };
00295 
00296 
00297   /*------------------------------------------------------------
00298    * STL: map<>
00299    */
00301   template<class KeyT, class ValT> class Item<map<KeyT,ValT> > {
00302   public:
00303     Item<KeyT> key_item;
00304     Item<ValT> val_item;
00305   public:
00306     inline bool load(mootio::mistream *is, map<KeyT,ValT> &x) const
00307     {
00308       //-- load size
00309       Item<size_t> size_item;
00310       size_t len;
00311       if (!size_item.load(is, len)) return false;
00312 
00313       //-- clear
00314       x.clear();
00315 
00316       //-- read items
00317       KeyT key_tmp;
00318       ValT val_tmp;
00319       for ( ; len > 0; len--) {
00320         if (!key_item.load(is,key_tmp) || !val_item.load(is,val_tmp))
00321           return false;
00322         x[key_tmp] = val_tmp;
00323       }
00324       return len==0;
00325     };
00326 
00327     inline bool save(mootio::mostream *os, const map<KeyT,ValT> &x) const
00328     {
00329       //-- save size
00330       Item<size_t> size_item;
00331       if (!size_item.save(os, x.size())) return false;
00332 
00333       //-- save items
00334       for (typename map<KeyT,ValT>::const_iterator xi = x.begin(); xi != x.end(); xi++) {
00335         if (!key_item.save(os,xi->first) || !val_item.save(os,xi->second))
00336           return false;
00337       }
00338       return true;
00339     };
00340   };
00341 
00342 
00343   /*------------------------------------------------------------
00344    * STL: hash_map<>
00345    */
00347   template<class KeyT, class ValT, class HashFuncT, class EqualFuncT>
00348   class Item<hash_map<KeyT,ValT,HashFuncT,EqualFuncT> > {
00349   public:
00350     Item<KeyT> key_item;
00351     Item<ValT> val_item;
00352   public:
00353     inline bool load(mootio::mistream *is, hash_map<KeyT,ValT,HashFuncT,EqualFuncT> &x) const
00354     {
00355       //-- load size
00356       Item<size_t> size_item;
00357       size_t len;
00358       if (!size_item.load(is, len)) return false;
00359 
00360       //-- clear & resize
00361       x.clear();
00362       x.resize(len);
00363 
00364       //-- read items
00365       KeyT key_tmp;
00366       ValT val_tmp;
00367       for ( ; len > 0; len--) {
00368         if (!key_item.load(is,key_tmp) || !val_item.load(is,val_tmp))
00369           return false;
00370         x[key_tmp] = val_tmp;
00371       }
00372       return len==0;
00373     };
00374 
00375     inline bool save(mootio::mostream *os, const hash_map<KeyT,ValT,HashFuncT,EqualFuncT> &x) const
00376     {
00377       //-- save size
00378       Item<size_t> size_item;
00379       if (!size_item.save(os, x.size())) return false;
00380 
00381       //-- save items
00382       for (typename hash_map<KeyT,ValT,HashFuncT,EqualFuncT>::const_iterator xi = x.begin();
00383            xi != x.end();
00384            xi++)
00385         {
00386           if (!key_item.save(os,xi->first) || !val_item.save(os,xi->second))
00387             return false;
00388         }
00389       return true;
00390     };
00391   };
00392 
00393   /*------------------------------------------------------------
00394    * STL: pair
00395    */
00397   template<class T1, class T2>
00398   class Item<std::pair<T1,T2> > {
00399   public:
00400     Item<T1>   item1;
00401     Item<T2>   item2;
00402   public:
00403     inline bool load(mootio::mistream *is, std::pair<T1,T2> &x) const
00404     {
00405       return (item1.load(is,x.first) && !item2.load(is,x.second));
00406     };
00407 
00408     inline bool save(mootio::mostream *os, const std::pair<T1,T2> &x) const
00409     {
00410       return (item1.save(os,x.first) && item2.save(os,x.second));
00411     };
00412   };
00413 
00414   /*------------------------------------------------------------
00415    * moot types: Trigram
00416    */
00417 #if defined(MOOT_USE_TRIGRAMS) && defined(MOOT_HASH_TRIGRAMS)
00418   template <>
00419   class Item<mootHMM::Trigram> {
00420   public:
00421     Item<mootHMM::TagID> tagid_item;
00422   public:
00423     inline bool load(mootio::mistream *is, mootHMM::Trigram &x) const
00424     {
00425       return (tagid_item.load(is, x.tag1)
00426               && tagid_item.load(is, x.tag2)
00427               && tagid_item.load(is, x.tag3));
00428     };
00429 
00430     inline bool save(mootio::mostream *os, const mootHMM::Trigram &x) const
00431     {
00432       return (tagid_item.save(os, x.tag1)
00433               && tagid_item.save(os, x.tag2)
00434               && tagid_item.save(os, x.tag3));
00435     };
00436   };
00437 #endif // MOOT_USE_TRIGRAMS && MOOT_HASH_TRIGRAMS
00438 
00439   /*------------------------------------------------------------
00440    * moot types: mootEnum
00441    */
00443   template<class NameT, class HashFunc, class NameEqlFunc>
00444   class Item<mootEnum<NameT,HashFunc,NameEqlFunc> > {
00445   public:
00446     Item<typename mootEnum<NameT,HashFunc,NameEqlFunc>::Id2NameMap> i2n_item;
00447   public:
00448     inline bool load(mootio::mistream *is, mootEnum<NameT,HashFunc,NameEqlFunc> &x) const
00449     {
00450       if (i2n_item.load(is, x.ids2names)) {
00451         x.names2ids.resize(x.ids2names.size());
00452         unsigned u;
00453         typename mootEnum<NameT,HashFunc,NameEqlFunc>::Id2NameMap::const_iterator ni;
00454         for (ni = x.ids2names.begin(), u = 0; ni != x.ids2names.end(); ni++, u++)
00455           {
00456             x.names2ids[*ni] = u;
00457           }
00458         return true;
00459       }
00460       return false;
00461     };
00462 
00463     inline bool save(mootio::mostream *os, const mootEnum<NameT,HashFunc,NameEqlFunc> &x) const
00464     {
00465       return i2n_item.save(os, x.ids2names);
00466     };
00467   };
00468 
00469   /*------------------------------------------------------------
00470    * moot types: AssocVector
00471    */
00472   template<typename KeyT, typename ValT>
00473   class Item<AssocVector<KeyT,ValT> >
00474   {
00475   public:
00476     typedef typename AssocVector<KeyT,ValT>::assoc_vector_type assoc_vector_type;
00477     Item<assoc_vector_type> vec_item;
00478   public:
00479     inline bool load(mootio::mistream *is, AssocVector<KeyT,ValT> &x) const
00480     { return vec_item.load(is,x); };
00481     inline bool save(mootio::mostream *os, const AssocVector<KeyT,ValT> &x) const
00482     { return vec_item.save(os,x); };
00483   };
00484 
00485   /*------------------------------------------------------------
00486    * moot types: TrieVectorNode
00487    */
00488   template <typename DataT, typename CharT, typename UCharT>
00489   class Item<TrieVectorNode<DataT,CharT,UCharT> > {
00490   public:
00491     Item<CharT> char_item;
00492     Item<UCharT> uchar_item;
00493     Item<DataT> data_item;
00494     Item<size_t> size_item;
00495   public:
00496     inline bool load(mootio::mistream *is, TrieVectorNode<DataT,CharT,UCharT> &x) const
00497     {
00498       return (size_item.load(is,x.mother)
00499               && size_item.load(is,x.mindtr)
00500               && char_item.load(is,x.label)
00501               && uchar_item.load(is,x.ndtrs)
00502               && data_item.load(is,x.data));
00503     };
00504     inline bool save(mootio::mostream *os, const TrieVectorNode<DataT,CharT,UCharT> &x) const
00505     {
00506       return (size_item.save(os,x.mother)
00507               && size_item.save(os,x.mindtr)
00508               && char_item.save(os,x.label)
00509               && uchar_item.save(os,x.ndtrs)
00510               && data_item.save(os,x.data));
00511     };
00512   };
00513   
00514 
00515   /*------------------------------------------------------------
00516    * moot types: SuffixTrie
00517    */
00519   template<>
00520   class Item<SuffixTrie> {
00521   public:
00522     Item<SuffixTrie::vector_type> vec_item;
00523     Item<CountT>                  maxcount_item;
00524     Item<ProbT>                   theta_item;
00525   public:
00526     inline bool load(mootio::mistream *is, SuffixTrie &x) const
00527     {
00528       x.clear();
00529       return (maxcount_item.load(is, x.maxcount)
00530               && theta_item.load(is, x.theta)
00531               && vec_item.load(is,x));
00532     };
00533     inline bool save(mootio::mostream *os, const SuffixTrie &x) const
00534     {
00535       return (maxcount_item.save(os, x.maxcount)
00536               && theta_item.save(os, x.theta)
00537               && vec_item.save(os, x));
00538     };
00539   };
00540 
00541   /*------------------------------------------------------------
00542    * public typedefs: Generic header information
00543    */
00545   class HeaderInfo {
00546   public:
00548     typedef unsigned int VersionT;
00549     
00551     typedef unsigned int MagicT;
00552     
00554     typedef unsigned long int FlagsT;
00555   public:
00556     MagicT    magic;      
00557     VersionT  version;    
00558     VersionT  revision;   
00559     VersionT  minver;     
00560     VersionT  minrev;     
00561     FlagsT    flags;      
00562   public:
00564     HeaderInfo(MagicT mag=0,
00565                VersionT ver=0, VersionT rev=0,
00566                VersionT mver=0, VersionT mrev=0,
00567                FlagsT f=0)
00568       : magic(mag),
00569         version(ver),
00570         revision(rev),
00571         minver(mver),
00572         minrev(mrev),
00573         flags(f)
00574     {};
00575 
00577     HeaderInfo(const string &IDstring,
00578                VersionT ver=0, VersionT rev=0,
00579                VersionT mver=0, VersionT mrev=0,
00580                FlagsT f=0)
00581       : version(ver),
00582         revision(rev),
00583         minver(mver),
00584         minrev(mrev),
00585         flags(f)
00586     {
00587       magic = 0;
00588       for (string::const_iterator si = IDstring.begin(); si != IDstring.end(); si++) {
00589         magic = (magic<<5)-magic + (MagicT)*si;
00590       }
00591     };
00592   };
00593 
00594 }; //-- mootBinIO
00595 
00596 
00597 #endif /* _MOOT_BINIO_H */

Generated on Mon Sep 11 16:10:33 2006 for libmoot by doxygen1.2.18