34 #ifndef _moot_NGRAMS_H 35 #define _moot_NGRAMS_H 80 class Ngram :
public deque<mootTagString> {
120 if (size() >= 3) pop_front();
127 for (const_iterator i = begin(); i != end(); i++) {
167 {
return ngtable.size(); };
170 size_t n_bigrams(
void)
const;
173 size_t n_trigrams(
void)
const;
182 ngtable[tag].count += count;
192 ngtable[tag1].freqs[tag2].count += count;
201 ngtable[tag1].freqs[tag2].freqs[tag3] += count;
210 switch (ngram.size()) {
214 add_count(ngram[0],count);
217 add_count(ngram[0],ngram[1],count);
220 add_count(ngram[0],ngram[1],ngram[2],count);
224 add_count(ngram[ngram.size()-3],ngram[ngram.size()-2],ngram[ngram.size()-1],count);
239 size_t ngsize = ngram.size();
240 if (ngsize < 1)
return;
242 NgramTable::iterator ngi1 = ngtable.find(ngram.
tag1());
243 if (ngi1 == ngtable.end()) {
244 ngi1 = ngtable.insert(pair<mootTagString,UnigramEntry>(ngram.
tag1(),
UnigramEntry())).first;
246 ngi1->second.count += count;
249 if (ngsize < 2)
return;
250 BigramTable::iterator ngi2 = ngi1->second.freqs.find(ngram.
tag2());
251 if (ngi2 == ngi1->second.freqs.end()) {
255 ngi2->second.count += count;
257 if (ngsize < 3)
return;
258 TrigramTable::iterator ngi3 = ngi2->second.freqs.find(ngram.
tag3());
259 if (ngi3 == ngi2->second.freqs.end()) {
260 ngi2->second.freqs[ngram.
tag3()] = count;
262 ngi3->second += count;
271 NgramTable::const_iterator ugi = ngtable.find(tag);
272 return ugi == ngtable.end() ? 0 : ugi->second.count;
278 NgramTable::const_iterator ugi = ngtable.find(tag1);
279 if (ugi == ngtable.end())
return 0;
280 BigramTable::const_iterator bgi = ugi->second.freqs.find(tag2);
281 return bgi == ugi->second.freqs.end() ? 0 : bgi->second.count;
287 NgramTable::const_iterator ugi = ngtable.find(tag1);
288 if (ugi == ngtable.end())
return 0;
289 BigramTable::const_iterator bgi = ugi->second.freqs.find(tag2);
290 if (bgi == ugi->second.freqs.end())
return 0;
291 TrigramTable::const_iterator tgi = bgi->second.freqs.find(tag3);
292 return tgi == bgi->second.freqs.end() ? 0 : tgi->second;
298 bool load(
const char *filename);
301 bool load(FILE *file,
const char *filename = NULL);
304 bool save(
const char *filename,
bool compact=
false);
307 bool save(FILE *file,
const char *filename = NULL,
bool compact=
false);
map< mootTagString, BigramEntry > BigramTable
Definition: mootNgrams.h:65
string as_string(void) const
Definition: mootNgrams.h:125
map< mootTagString, NgramCount > TrigramTable
Definition: mootNgrams.h:53
NgramCount ugtotal
Definition: mootNgrams.h:143
Ngram(const mootTagString &tag1)
Definition: mootNgrams.h:85
void add_count(const mootTagString &tag, const NgramCount count)
Definition: mootNgrams.h:180
BigramTable freqs
Definition: mootNgrams.h:71
CountT count
Definition: mootNgrams.h:58
classes and utilities for tokens and associated analyses
mootNgrams(void)
Definition: mootNgrams.h:148
CountT count
Definition: mootNgrams.h:70
NgramTable ngtable
Definition: mootNgrams.h:142
CountT NgramCount
Definition: mootNgrams.h:50
void clear(void)
Definition: mootNgrams.h:158
const mootTagString & tag2(void) const
Definition: mootNgrams.h:111
void push(const mootTagString &tag_new=mootTagString(""))
Definition: mootNgrams.h:119
Definition: mootNgrams.h:56
Definition: mootNgrams.h:68
Class for storage & retrieval of raw N-Gram frequencies.
Definition: mootNgrams.h:44
Ngram(void)
Definition: mootNgrams.h:83
size_t n_unigrams(void) const
Definition: mootNgrams.h:166
TrigramTable freqs
Definition: mootNgrams.h:59
const NgramCount lookup(const mootTagString &tag1, const mootTagString &tag2) const
Definition: mootNgrams.h:276
void add_count(const Ngram &ngram, const NgramCount count)
Definition: mootNgrams.h:208
BigramEntry(const CountT bg_count=0)
Definition: mootNgrams.h:61
map< mootTagString, UnigramEntry > NgramTable
Definition: mootNgrams.h:77
UnigramEntry(const CountT ug_count=0)
Definition: mootNgrams.h:73
string mootTagString
Definition: mootToken.h:59
ProbT CountT
Definition: mootTypes.h:67
~mootNgrams()
Definition: mootNgrams.h:151
Definition: mootNgrams.h:80
~Ngram(void)
Definition: mootNgrams.h:101
const NgramCount lookup(const mootTagString &tag1, const mootTagString &tag2, const mootTagString &tag3) const
Definition: mootNgrams.h:285
Ngram(const mootTagString &tag1, const mootTagString &tag2)
Definition: mootNgrams.h:89
Ngram(const mootTagString &tag1, const mootTagString &tag2, const mootTagString &tag3)
Definition: mootNgrams.h:94
void add_count(const mootTagString &tag1, const mootTagString &tag2, const NgramCount count)
Definition: mootNgrams.h:190
const mootTagString & tag1(void) const
Definition: mootNgrams.h:109
const NgramCount lookup(const mootTagString &tag) const
Definition: mootNgrams.h:269
const mootTagString & tag3(void) const
Definition: mootNgrams.h:113
void add_count(const mootTagString &tag1, const mootTagString &tag2, const mootTagString &tag3, const NgramCount count)
Definition: mootNgrams.h:199
void add_counts(const Ngram &ngram, const NgramCount count)
Definition: mootNgrams.h:237