mootClassfreqs.h
Go to the documentation of this file.
1 /* -*- Mode: C++ -*- */
2 
3 /*
4  libmoot : moocow's part-of-speech tagging library
5  Copyright (C) 2003-2017 by Bryan Jurish <moocow@cpan.org>
6 
7  This library is free software; you can redistribute it and/or
8  modify it under the terms of the GNU Lesser General Public
9  License as published by the Free Software Foundation; either
10  version 3 of the License, or (at your option) any later version.
11 
12  This library is distributed in the hope that it will be useful,
13  but WITHOUT ANY WARRANTY; without even the implied warranty of
14  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15  Lesser General Public License for more details.
16 
17  You should have received a copy of the GNU Lesser General Public
18  License along with this library; if not, write to the Free Software
19  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 */
21 
22 /*============================================================================
23  * File: mootClassfreqs.h
24  * Author: Bryan Jurish <moocow@cpan.org>
25  * Description:
26  * Class for storage & retrieval of lexical-class frequencies (nested map<>)
27  *============================================================================*/
28 
34 #ifndef _moot_CLASSFREQS_H
35 #define _moot_CLASSFREQS_H
36 
37 #include <mootLexfreqs.h>
38 
39 moot_BEGIN_NAMESPACE
40 
45 public:
46  //------ public typedefs
47 
52 
57 
60 
62  struct LexClassHash {
63  public:
64  inline size_t operator()(const LexClass &x) const {
65  size_t hv = 0;
66  moot_hash<mootTagString> hasher;
67  for (LexClass::const_iterator xi = x.begin(); xi != x.end(); xi++) {
68  hv = 5*hv + hasher(*xi);
69  }
70  return hv;
71  };
72  };
74  struct LexClassEqual {
75  public:
76  inline size_t operator()(const LexClass &x, const LexClass &y) const {
77  return x==y;
78  };
79  };
80 
84  typedef hash_map<LexClass,
85  ClassfreqEntry,
89 
91  typedef hash_map<mootTagString,CountT> TagfreqTable;
92 
93 public:
94  //------ public data
96  TagfreqTable tagtable;
99 public:
100  //------ public methods
102  mootClassfreqs(size_t initial_bucket_count=0) : totalcount(0)
103  {
104  if (initial_bucket_count != 0) {
105  lctable.resize(initial_bucket_count);
106  }
107  };
108 
111  clear();
112  }
113 
114  //------ public methods: manipulation
115 
117  void clear(void);
118 
120  inline void add_count(const LexClass &lclass,
121  const mootTagString &tag,
122  const CountT count)
123  {
124  //-- adjust token-table
125  ClassfreqTable::iterator lci = lctable.find(lclass);
126  if (lci == lctable.end()) {
127  //-- new class
128  lci = lctable.insert(ClassfreqTable::value_type(lclass,ClassfreqEntry(count))).first;
129  lci->second.freqs[tag] = count;
130  } else {
131  //-- known class
132  lci->second.count += count;
133 
134  ClassfreqSubtable::iterator lsi = lci->second.freqs.find(tag);
135  if (lsi == lci->second.freqs.end()) {
136  //-- unknown (tok,tag) pair
137  lci->second.freqs[tag] = count;
138  } else {
139  //-- known (tok,tag) pair: just add
140  lsi->second += count;
141  }
142  }
143 
144  //-- adjust total tag-count
145  TagfreqTable::iterator lctagi = tagtable.find(tag);
146  if (lctagi != tagtable.end()) {
147  lctagi->second += count;
148  } else {
149  tagtable[tag] = count;
150  }
151 
152  //-- adjust total token-count
153  totalcount += count;
154  };
155 
156  //------ public methods: lookup
157  const CountT taglookup(const mootTagString &tag) const
158  {
159  TagfreqTable::const_iterator tagi = tagtable.find(tag);
160  return tagi == tagtable.end() ? 0 : tagi->second;
161  };
162 
166  size_t n_pairs(void);
167 
173  size_t n_impossible(void);
174 
175  //------ public methods: i/o
176 
178  bool load(const char *filename);
179 
181  bool load(FILE *file, const char *filename = NULL);
182 
184  bool save(const char *filename);
185 
187  bool save(FILE *file, const char *filename = NULL);
188 };
189 
190 
191 moot_END_NAMESPACE
192 
193 #endif /* _moot_CLASSFREQS_H */
size_t operator()(const LexClass &x) const
Definition: mootClassfreqs.h:64
~mootClassfreqs()
Definition: mootClassfreqs.h:110
ClassfreqTable lctable
Definition: mootClassfreqs.h:95
map< mootTagString, LexfreqCount > LexfreqSubtable
Definition: mootLexfreqs.h:54
CountT totalcount
Definition: mootClassfreqs.h:97
size_t operator()(const LexClass &x, const LexClass &y) const
Definition: mootClassfreqs.h:76
Definition: mootClassfreqs.h:62
HMM training data: lexical frequencies: raw.
hash_map< LexClass, ClassfreqEntry, LexClassHash, LexClassEqual > ClassfreqTable
Definition: mootClassfreqs.h:88
mootClassfreqs(size_t initial_bucket_count=0)
Definition: mootClassfreqs.h:102
Class for storage and retrieval of raw lexical-class frequencies.
Definition: mootClassfreqs.h:44
set< mootTagString > mootTagSet
Definition: mootToken.h:65
Definition: mootLexfreqs.h:59
void add_count(const LexClass &lclass, const mootTagString &tag, const CountT count)
Definition: mootClassfreqs.h:120
Definition: mootClassfreqs.h:74
hash_map< mootTagString, CountT > TagfreqTable
Definition: mootClassfreqs.h:91
mootTagSet LexClass
Definition: mootClassfreqs.h:59
string mootTagString
Definition: mootToken.h:59
ProbT CountT
Definition: mootTypes.h:67
mootLexfreqs::LexfreqEntry ClassfreqEntry
Definition: mootClassfreqs.h:56
mootLexfreqs::LexfreqSubtable ClassfreqSubtable
Definition: mootClassfreqs.h:51
const CountT taglookup(const mootTagString &tag) const
Definition: mootClassfreqs.h:157
TagfreqTable tagtable
Definition: mootClassfreqs.h:96