mootBinIO.h
Go to the documentation of this file.
1 /* -*- Mode: C++ -*- */
2 
3 /*
4  libmoot : moocow's part-of-speech tagging library
5  Copyright (C) 2003-2014 by Bryan Jurish <moocow@cpan.org>
6 
7  This library is free software; you can redistribute it and/or
8  modify it under the terms of the GNU Lesser General Public
9  License as published by the Free Software Foundation; either
10  version 3 of the License, or (at your option) any later version.
11 
12  This library is distributed in the hope that it will be useful,
13  but WITHOUT ANY WARRANTY; without even the implied warranty of
14  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15  Lesser General Public License for more details.
16 
17  You should have received a copy of the GNU Lesser General Public
18  License along with this library; if not, write to the Free Software
19  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 */
21 
22 /*--------------------------------------------------------------------------
23  * File: mootBinIO.h
24  * Author: Bryan Jurish <moocow@cpan.org>
25  * Description:
26  * + moot PoS tagger : abstract templates for binary librarians
27  *--------------------------------------------------------------------------*/
28 
33 #ifndef _MOOT_BINIO_H
34 #define _MOOT_BINIO_H
35 
36 //#include <stdlib.h> //-- via mootHMM -> mootTokenIO -> mootTokenLexer -> mootGenericLexer -> mootBufferIO
37 
38 #include <mootHMM.h>
39 
41 namespace mootBinIO {
42  using namespace std;
43  using namespace moot;
44  using namespace mootio;
45 
46  /*------------------------------------------------------------
47  * Generic functions
48  */
50  template<typename T>
51  inline bool loadItem(mootio::mistream *is, T &x)
52  {
53  return is->read(reinterpret_cast<char *>(&x), sizeof(T)) == sizeof(T);
54  };
55 
57  template<typename T>
58  inline bool saveItem(mootio::mostream *os, const T &x)
59  {
60  return os->write(reinterpret_cast<const char *>(&x), sizeof(T));
61  };
62 
63  /*------------------------------------------------------------
64  * Generic items
65  */
67  template<class T> class Item {
68  public:
70  inline bool load(mootio::mistream *is, T &x) const
71  {
72  return loadItem<T>(is,x);
73  };
74 
76  inline bool save(mootio::mostream *os, const T &x) const
77  {
78  return saveItem<T>(os,x);
79  };
80 
87  inline bool load_n(mootio::mistream *is, T *&x, size_t &n) const {
88  //-- get saved size
89  Size saved_size;
90  if (!loadItem<Size>(is,saved_size)) return false;
91 
92  //-- re-allocate if necessary
93  if (saved_size > n) {
94  if (x) free(x);
95  x = reinterpret_cast<T*>(malloc(saved_size*sizeof(T)));
96  if (!x) {
97  n = 0;
98  return false;
99  }
100  }
101 
102  //-- read in items
103  ByteCount wanted = sizeof(T)*saved_size;
104  if (is->read(reinterpret_cast<char *>(x), wanted) != wanted) return false;
105  n=saved_size;
106  return true;
107  };
108 
114  inline bool save_n(mootio::mostream *os, const T *x, size_t n) const {
115  //-- get saved size
116  Size tmp = n;
117  if (!saveItem<Size>(os,tmp)) return false;
118 
119  //-- save items
120  return os->write(reinterpret_cast<const char *>(x), n*sizeof(T));
121  };
122  };
123 
124 #ifdef MOOT_32BIT_FORCE
125  /*------------------------------------------------------------
126  * size_t : special case bashes to 32-bits
127  */
132  template<> class Item<size_t> {
133  public:
134 
135  public:
136  inline bool load(mootio::mistream *is, size_t &x) const
137  {
138  if (sizeof(size_t)==sizeof(Size)) {
139  //-- 32-bit native sizes: just load raw data
140  return loadItem<size_t>(is,x);
141  }
142  Size tmp;
143  bool rc = loadItem<Size>(is,tmp);
144  x = tmp;
145  return rc;
146  };
147 
148  inline bool save(mootio::mostream *os, const size_t &x) const
149  {
150  if (sizeof(size_t)==sizeof(Size)) {
151  //-- 32-bit native sizes: just load raw data
152  return saveItem<size_t>(os,x);
153  }
154  Size tmp = x;
155  return saveItem<Size>(os, tmp);
156  };
157  };
158 #endif /* MOOT_32BIT_FORCE */
159 
160 
161  /*------------------------------------------------------------
162  * C-strings
163  */
168  template<> class Item<char *> {
169  public:
171 
172  public:
173  inline bool load(mootio::mistream *is, char *&x) const
174  {
175  size_t len=0;
176  return charItem.load_n(is,x,len);
177  };
178 
179  inline bool save(mootio::mostream *os, const char *x) const
180  {
181  if (x) {
182  size_t len = strlen(x)+1;
183  return charItem.save_n(os,x,len);
184  } else {
185  return charItem.save_n(os,"",1);
186  }
187  };
188  };
189 
190  /*------------------------------------------------------------
191  * C++ strings
192  */
197  template<> class Item<string> {
198  public:
200  public:
201  inline bool load(mootio::mistream *is, string &x) const
202  {
203  char *buf=NULL;
204  size_t len=0;
205  bool rc = charItem.load_n(is,buf,len);
206  if (rc && len) x.assign(buf,len);
207  if (buf) free(buf);
208  return rc;
209  };
210 
211  inline bool save(mootio::mostream *os, const string &x) const
212  {
213  return charItem.save_n(os,x.data(),x.size());
214  };
215  };
216 
217  /*------------------------------------------------------------
218  * STL: vectors
219  */
221  template<class ValT> class Item<vector<ValT> > {
222  public:
223  Item<ValT> val_item;
224  public:
225  inline bool load(mootio::mistream *is, vector<ValT> &x) const
226  {
227  //-- get saved size
228  Item<size_t> size_item;
229  size_t len;
230  if (!size_item.load(is, len)) return false;
231 
232  //-- resize
233  x.clear();
234  x.reserve(len);
235 
236  //-- read in items
237  for ( ; len > 0; len--) {
238  x.push_back(ValT());
239  if (!val_item.load(is,x.back())) return false;
240  }
241  return len==0;
242  };
243 
244  inline bool save(mootio::mostream *os, const vector<ValT> &x) const
245  {
246  //-- save size
247  Item<size_t> size_item;
248  if (!size_item.save(os, x.size())) return false;
249 
250  //-- save items
251  for (typename vector<ValT>::const_iterator xi = x.begin(); xi != x.end(); xi++) {
252  if (!val_item.save(os,*xi)) return false;
253  }
254  return true;
255  };
256  };
257 
258 
259  /*------------------------------------------------------------
260  * STL: set<>
261  */
263  template<class ValT> class Item<set<ValT> > {
264  public:
265  Item<ValT> val_item;
266  public:
267  inline bool load(mootio::mistream *is, set<ValT> &x) const
268  {
269  //-- load size
270  Item<size_t> size_item;
271  size_t len;
272  if (!size_item.load(is, len)) return false;
273 
274  //-- clear
275  x.clear();
276 
277  //-- read items
278  ValT tmp;
279  for ( ; len > 0; len--) {
280  if (!val_item.load(is,tmp))
281  return false;
282  x.insert(tmp);
283  }
284  return len==0;
285  };
286 
287  inline bool save(mootio::mostream *os, const set<ValT> &x) const
288  {
289  //-- save size
290  Item<size_t> size_item;
291  if (!size_item.save(os, x.size())) return false;
292 
293  //-- save items
294  for (typename set<ValT>::const_iterator xi = x.begin(); xi != x.end(); xi++) {
295  if (!val_item.save(os,*xi)) return false;
296  }
297  return true;
298  };
299  };
300 
301  /*------------------------------------------------------------
302  * STL: hash_set<>
303  */
305  template<class ValT> class Item<hash_set<ValT> > {
306  public:
307  Item<ValT> val_item;
308  public:
309  inline bool load(mootio::mistream *is, hash_set<ValT> &x) const
310  {
311  //-- load size
312  Item<size_t> size_item;
313  size_t len;
314  if (!size_item.load(is, len)) return false;
315 
316  //-- clear & resize
317  x.clear();
318  x.resize(len);
319 
320  //-- read items
321  ValT tmp;
322  for ( ; len > 0; len--) {
323  if (!val_item.load(is,tmp)) return false;
324  x.insert(tmp);
325  }
326  return len==0;
327  };
328 
329  inline bool save(mootio::mostream *os, const hash_set<ValT> &x) const
330  {
331  //-- save size
332  Item<size_t> size_item;
333  if (!size_item.save(os, x.size())) return false;
334 
335  //-- save items
336  for (typename hash_set<ValT>::const_iterator xi = x.begin(); xi != x.end(); xi++) {
337  if (!val_item.save(os,*xi)) return false;
338  }
339  return true;
340  };
341  };
342 
343 
344  /*------------------------------------------------------------
345  * STL: map<>
346  */
348  template<class KeyT, class ValT> class Item<map<KeyT,ValT> > {
349  public:
350  Item<KeyT> key_item;
351  Item<ValT> val_item;
352  public:
353  inline bool load(mootio::mistream *is, map<KeyT,ValT> &x) const
354  {
355  //-- load size
356  Item<size_t> size_item;
357  size_t len;
358  if (!size_item.load(is, len)) return false;
359 
360  //-- clear
361  x.clear();
362 
363  //-- read items
364  KeyT key_tmp;
365  ValT val_tmp;
366  for ( ; len > 0; len--) {
367  if (!key_item.load(is,key_tmp) || !val_item.load(is,val_tmp))
368  return false;
369  x[key_tmp] = val_tmp;
370  }
371  return len==0;
372  };
373 
374  inline bool save(mootio::mostream *os, const map<KeyT,ValT> &x) const
375  {
376  //-- save size
377  Item<size_t> size_item;
378  if (!size_item.save(os, x.size())) return false;
379 
380  //-- save items
381  for (typename map<KeyT,ValT>::const_iterator xi = x.begin(); xi != x.end(); xi++) {
382  if (!key_item.save(os,xi->first) || !val_item.save(os,xi->second))
383  return false;
384  }
385  return true;
386  };
387  };
388 
389 
390  /*------------------------------------------------------------
391  * STL: hash_map<>
392  */
394  template<class KeyT, class ValT, class HashFuncT, class EqualFuncT>
395  class Item<hash_map<KeyT,ValT,HashFuncT,EqualFuncT> > {
396  public:
397  Item<KeyT> key_item;
398  Item<ValT> val_item;
399  public:
400  inline bool load(mootio::mistream *is, hash_map<KeyT,ValT,HashFuncT,EqualFuncT> &x) const
401  {
402  //-- load size
403  Item<size_t> size_item;
404  size_t len;
405  if (!size_item.load(is, len)) return false;
406 
407  //-- clear & resize
408  x.clear();
409  x.resize(len);
410 
411  //-- read items
412  KeyT key_tmp;
413  ValT val_tmp;
414  for ( ; len > 0; len--) {
415  if (!key_item.load(is,key_tmp) || !val_item.load(is,val_tmp))
416  return false;
417  x[key_tmp] = val_tmp;
418  }
419  return len==0;
420  };
421 
422  inline bool save(mootio::mostream *os, const hash_map<KeyT,ValT,HashFuncT,EqualFuncT> &x) const
423  {
424  //-- save size
425  Item<size_t> size_item;
426  if (!size_item.save(os, x.size())) return false;
427 
428  //-- save items
429  for (typename hash_map<KeyT,ValT,HashFuncT,EqualFuncT>::const_iterator xi = x.begin();
430  xi != x.end();
431  xi++)
432  {
433  if (!key_item.save(os,xi->first) || !val_item.save(os,xi->second))
434  return false;
435  }
436  return true;
437  };
438  };
439 
440  /*------------------------------------------------------------
441  * STL: pair
442  */
444  template<class T1, class T2>
445  class Item<std::pair<T1,T2> > {
446  public:
447  Item<T1> item1;
448  Item<T2> item2;
449  public:
450  inline bool load(mootio::mistream *is, std::pair<T1,T2> &x) const
451  {
452  return (item1.load(is,x.first) && !item2.load(is,x.second));
453  };
454 
455  inline bool save(mootio::mostream *os, const std::pair<T1,T2> &x) const
456  {
457  return (item1.save(os,x.first) && item2.save(os,x.second));
458  };
459  };
460 
461  /*------------------------------------------------------------
462  * moot types: Trigram
463  */
464  template <>
465  class Item<mootHMM::Trigram> {
466  public:
468  public:
469  inline bool load(mootio::mistream *is, mootHMM::Trigram &x) const
470  {
471  return (tagid_item.load(is, x.tag1)
472  && tagid_item.load(is, x.tag2)
473  && tagid_item.load(is, x.tag3));
474  };
475  inline bool save(mootio::mostream *os, const mootHMM::Trigram &x) const
476  {
477  return (tagid_item.save(os, x.tag1)
478  && tagid_item.save(os, x.tag2)
479  && tagid_item.save(os, x.tag3));
480  };
481  };
482 
483  /*------------------------------------------------------------
484  * moot types: mootEnum
485  */
487  template<class NameT, class HashFunc, class NameEqlFunc>
488  class Item<mootEnum<NameT,HashFunc,NameEqlFunc> > {
489  public:
491  public:
493  {
494  if (i2n_item.load(is, x.ids2names)) {
495  x.names2ids.resize(x.ids2names.size());
496  unsigned u;
498  for (ni = x.ids2names.begin(), u = 0; ni != x.ids2names.end(); ni++, u++)
499  {
500  x.names2ids[*ni] = u;
501  }
502  return true;
503  }
504  return false;
505  };
506 
507  inline bool save(mootio::mostream *os, const mootEnum<NameT,HashFunc,NameEqlFunc> &x) const
508  {
509  return i2n_item.save(os, x.ids2names);
510  };
511  };
512 
513  /*------------------------------------------------------------
514  * moot types: AssocVector
515  */
516  template<typename KeyT, typename ValT>
517  class Item<AssocVector<KeyT,ValT> >
518  {
519  public:
521  Item<assoc_vector_type> vec_item;
522  public:
523  inline bool load(mootio::mistream *is, AssocVector<KeyT,ValT> &x) const
524  { return vec_item.load(is,x); };
525  inline bool save(mootio::mostream *os, const AssocVector<KeyT,ValT> &x) const
526  { return vec_item.save(os,x); };
527  };
528 
529  /*------------------------------------------------------------
530  * moot types: TrieVectorNode
531  */
532  template <typename DataT, typename CharT, typename UCharT>
533  class Item<TrieVectorNode<DataT,CharT,UCharT> > {
534  public:
535  Item<CharT> char_item;
536  Item<UCharT> uchar_item;
537  Item<DataT> data_item;
538  Item<size_t> size_item;
539  public:
540  inline bool load(mootio::mistream *is, TrieVectorNode<DataT,CharT,UCharT> &x) const
541  {
542  return (size_item.load(is,x.mother)
543  && size_item.load(is,x.mindtr)
544  && char_item.load(is,x.label)
545  && uchar_item.load(is,x.ndtrs)
546  && data_item.load(is,x.data));
547  };
548  inline bool save(mootio::mostream *os, const TrieVectorNode<DataT,CharT,UCharT> &x) const
549  {
550  return (size_item.save(os,x.mother)
551  && size_item.save(os,x.mindtr)
552  && char_item.save(os,x.label)
553  && uchar_item.save(os,x.ndtrs)
554  && data_item.save(os,x.data));
555  };
556  };
557 
558 
559  /*------------------------------------------------------------
560  * moot types: SuffixTrie
561  */
563  template<>
564  class Item<SuffixTrie> {
565  public:
567  Item<CountT> maxcount_item;
568  Item<ProbT> theta_item;
569  public:
570  inline bool load(mootio::mistream *is, SuffixTrie &x) const
571  {
572  x.clear();
573  return (maxcount_item.load(is, x.maxcount)
574  && theta_item.load(is, x.theta)
575  && vec_item.load(is,x));
576  };
577  inline bool save(mootio::mostream *os, const SuffixTrie &x) const
578  {
579  return (maxcount_item.save(os, x.maxcount)
580  && theta_item.save(os, x.theta)
581  && vec_item.save(os, x));
582  };
583  };
584 
585  /*------------------------------------------------------------
586  * moot types: Taster
587  */
589  template<>
590  class Item<mootTaster::Rule> {
591  public:
592  Item<string> string_item;
593  Item<UInt> uint_item;
594  public:
595  inline bool load(mootio::mistream *is, mootTaster::Rule &x) const
596  {
597  x.clear();
598  if (!(string_item.load(is,x.lab)
599  && string_item.load(is,x.re_s)
600  && uint_item.load(is,x.id)))
601  return false;
602  x.compile();
603  return true;
604  };
605  inline bool save(mootio::mostream *os, const mootTaster::Rule &x) const
606  {
607  return (string_item.save(os,x.lab)
608  && string_item.save(os,x.re_s)
609  && uint_item.save(os,x.id));
610  };
611  };
612 
614  template<>
615  class Item<mootTaster> {
616  public:
618  Item<string> string_item;
619  Item<UInt> uint_item;
620  public:
621  inline bool load(mootio::mistream *is, mootTaster &x) const
622  {
623  x.clear();
624  return (rules_item.load(is,x.rules)
625  && string_item.load(is,x.nolabel)
626  && uint_item.load(is,x.noid));
627  };
628  inline bool save(mootio::mostream *os, const mootTaster &x) const
629  {
630  return (rules_item.save(os,x.rules)
631  && string_item.save(os,x.nolabel)
632  && uint_item.save(os,x.noid));
633  };
634  };
635 
636 }; //-- mootBinIO
637 
638 
639 #endif /* _MOOT_BINIO_H */
UCharT ndtrs
number of daughters
Definition: mootTrieVector.h:66
bool load(mootio::mistream *is, string &x) const
Definition: mootBinIO.h:201
Definition: mootAssocVector.h:39
DataT data
data associated with this node
Definition: mootTrieVector.h:67
1st-order Hidden Markov Model Tagger/Disambiguator class.
Definition: mootHMM.h:120
High-level heuristic token classifier .
Definition: mootFlavor.h:62
CharT label
label of arc to this node
Definition: mootTrieVector.h:65
Hidden Markov Model tagger/disambiguator.
Top-level class for suffix tries.
Definition: mootSuffixTrie.h:46
virtual bool write(const char *buf, size_t n)
Definition: mootIO.h:218
Name2IdMap names2ids
maps names to IDs
Definition: mootEnum.h:78
Binary item I/O template class, used for binary HMM model files.
Definition: mootBinIO.h:67
bool load(mootio::mistream *is, char *&x) const
Definition: mootBinIO.h:173
Namespace for structured binary stream I/O.
Definition: mootBinHeader.h:37
ProbT theta
standard deviation of unigram MLEs
Definition: mootSuffixTrie.h:71
TagID tag1
previous-previous tag_{i-2} or 0
Definition: mootHMM.h:243
Binary I/O template instantiation for signed integers.
Definition: mootBinIO.h:132
bool load(mootio::mistream *is, T &x) const
Definition: mootBinIO.h:70
bool load_n(mootio::mistream *is, T *&x, size_t &n) const
Definition: mootBinIO.h:87
Abstract base class for output stream wrappers.
Definition: mootIO.h:194
bool save(mootio::mostream *os, const T &x) const
Definition: mootBinIO.h:76
LISP-style assoc list using vector<>: map-like class with small memory footprint. Useful for small as...
Definition: mootAssocVector.h:130
Definition: mootEnum.h:67
Definition: mootTrieVector.h:58
void clear(void)
Definition: mootTrieVector.h:205
Tag-trigram key type for HMM probability lookup table (only used if hash_ngrams is true) ...
Definition: mootHMM.h:241
CountT maxcount
raw frequency upper bound
Definition: mootSuffixTrie.h:70
TagID tag3
current tag: tag_i
Definition: mootHMM.h:245
AssocVector< KeyT, ValT >::assoc_vector_type assoc_vector_type
Definition: mootBinIO.h:522
bool save_n(mootio::mostream *os, const T *x, size_t n) const
Definition: mootBinIO.h:114
mootFlavorID noid
id to return if no rule matches (default: empty)
Definition: mootFlavor.h:145
bool saveItem(mootio::mostream *os, const T &x)
Definition: mootBinIO.h:58
Item< char > charItem
Definition: mootBinIO.h:170
bool save(mootio::mostream *os, const char *x) const
Definition: mootBinIO.h:179
Rules rules
matching heuristics in order of decreasing priority
Definition: mootFlavor.h:143
type for a single regex-based token classification heuristic
Definition: mootFlavor.h:74
Item< char > charItem
Definition: mootBinIO.h:199
Namespace for I/O stream wrappers.
Definition: mootBufferIO.h:45
mootFlavorID id
numeric id (zero by default)
Definition: mootFlavor.h:79
std::vector< assoc_node_type > assoc_vector_type
Definition: mootAssocVector.h:138
virtual ByteCount read(char *buf, size_t n)
Definition: mootIO.h:156
string re_s
POSIX.2 regex to match ("extended" regex string; see regex(7) manpage)
Definition: mootFlavor.h:80
Binary I/O template instantiation for STL strings.
Definition: mootBinIO.h:197
bool save(mootio::mostream *os, const size_t &x) const
Definition: mootBinIO.h:148
bool loadItem(mootio::mistream *is, T &x)
Definition: mootBinIO.h:51
bool save(mootio::mostream *os, const string &x) const
Definition: mootBinIO.h:213
mootFlavorStr lab
symbolic label
Definition: mootFlavor.h:78
int ByteCount
typedef for byte counts (should be signed, for compatibility)
Definition: mootIO.h:52
BinUInt Size
Definition: mootTypes.h:89
size_t mindtr
index of first arc from this node
Definition: mootTrieVector.h:47
TagID tag2
previous tag: tag_{i-1} or 0
Definition: mootHMM.h:244
size_t mother
index of this node&#39;s mother
Definition: mootTrieVector.h:46
mootFlavorStr nolabel
label to return if no rule matches (default: empty)
Definition: mootFlavor.h:144
Id2NameMap ids2names
maps IDs to names
Definition: mootEnum.h:79
Abstract base class for input stream wrappers.
Definition: mootIO.h:129
bool load(mootio::mistream *is, size_t &x) const
Definition: mootBinIO.h:136