34 #ifndef _moot_LEXFREQS_H 35 #define _moot_LEXFREQS_H 50 typedef CountT LexfreqCount;
55 typedef map<mootTagString,LexfreqCount> LexfreqSubtable;
63 LexfreqSubtable freqs;
69 const LexfreqSubtable &tok_tagfreqs)
70 : count(tok_count), freqs(tok_tagfreqs)
89 LexfreqTokTable lftable;
90 LexfreqTagTable tagtable;
91 LexfreqCount n_tokens;
92 LexfreqCount unknown_threshhold;
99 : n_tokens(0), unknown_threshhold(1.0), taster(&
builtinTaster)
101 if (initial_bucket_count != 0)
102 lftable.resize(initial_bucket_count);
126 LexfreqTokTable::const_iterator wi = lftable.find(w);
127 return wi == lftable.end() ? 0 : wi->second.count;
133 LexfreqTagTable::const_iterator ti = tagtable.find(tag);
134 return ti == tagtable.end() ? 0 : ti->second;
140 LexfreqTokTable::const_iterator wi = lftable.find(w);
141 if (wi == lftable.end())
return 0;
142 LexfreqSubtable::const_iterator wti = wi->second.freqs.find(tag);
143 return wti == wi->second.freqs.end() ? 0 : wti->second;
158 void compute_specials(
bool compute_unknown=
true);
167 void remove_specials(
bool remove_unknown=
true);
173 void discount_specials(
CountT zf_special=1.0);
178 size_t n_pairs(
void);
183 bool load(
const char *filename);
186 bool load(FILE *file,
const char *filename = NULL);
189 bool save(
const char *filename);
192 bool save(FILE *file,
const char *filename = NULL);
High-level heuristic token classifier .
Definition: mootFlavor.h:62
classes and utilities for tokens and associated analyses
classes and utilities for regex-based token "flavor" heuristics
Definition: mootLexfreqs.h:59
Class for storage and retrieval of raw lexical frequencies.
Definition: mootLexfreqs.h:44
hash_map< mootTagString, LexfreqCount > LexfreqTagTable
Definition: mootLexfreqs.h:84
const mootTaster builtinTaster
string mootTagString
Definition: mootToken.h:59
ProbT CountT
Definition: mootTypes.h:67
hash_map< mootTokString, LexfreqEntry > LexfreqTokTable
Definition: mootLexfreqs.h:81
string mootTokString
Definition: mootToken.h:62