00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029 #ifndef _moot_LEXFREQS_H
00030 #define _moot_LEXFREQS_H
00031
00032 #include <mootTypes.h>
00033 #include <mootToken.h>
00034
00035 moot_BEGIN_NAMESPACE
00036
00040 class mootLexfreqs {
00041 public:
00042
00043
00045 typedef CountT LexfreqCount;
00046
00050 typedef map<mootTagString,LexfreqCount> LexfreqSubtable;
00051
00055 class LexfreqEntry {
00056 public:
00057 LexfreqCount count;
00058 LexfreqSubtable freqs;
00059 public:
00060 LexfreqEntry(const LexfreqCount tok_count=0)
00061 : count(tok_count)
00062 {};
00063 LexfreqEntry(const LexfreqCount tok_count,
00064 const LexfreqSubtable &tok_tagfreqs)
00065 : count(tok_count), freqs(tok_tagfreqs)
00066 {};
00068 void clear(void) {
00069 count = 0;
00070 freqs.clear();
00071 };
00072 };
00073
00077 typedef hash_map<mootTokString,LexfreqEntry> LexfreqTokTable;
00078
00080 typedef hash_map<mootTagString,LexfreqCount> LexfreqTagTable;
00081
00082 public:
00083
00084 LexfreqTokTable lftable;
00085 LexfreqTagTable tagtable;
00086 LexfreqCount n_tokens;
00088 public:
00089
00091 mootLexfreqs(size_t initial_bucket_count=0) : n_tokens(0)
00092 {
00093 if (initial_bucket_count != 0) {
00094 lftable.resize(initial_bucket_count);
00095 }
00096 };
00097
00099 ~mootLexfreqs() {
00100 clear();
00101 }
00102
00103
00104
00106 void clear(void);
00107
00109 inline void add_count(const mootTokString &text,
00110 const mootTagString &tag,
00111 const LexfreqCount count)
00112 {
00113
00114 LexfreqTokTable::iterator lfi = lftable.find(text);
00115 if (lfi == lftable.end()) {
00116
00117 lfi = lftable.insert(LexfreqTokTable::value_type(text,LexfreqEntry(count))).first;
00118 lfi->second.freqs[tag] = count;
00119 } else {
00120
00121 lfi->second.count += count;
00122
00123 LexfreqSubtable::iterator lsi = lfi->second.freqs.find(tag);
00124 if (lsi == lfi->second.freqs.end()) {
00125
00126 lfi->second.freqs[tag] = count;
00127 } else {
00128
00129 lsi->second += count;
00130 }
00131 }
00132
00133 if (!isTokFlavorName(text)) {
00134
00135 LexfreqTagTable::iterator lftagi = tagtable.find(tag);
00136 if (lftagi != tagtable.end()) {
00137 lftagi->second += count;
00138 } else {
00139 tagtable[tag] = count;
00140 }
00141
00142
00143 n_tokens += count;
00144 }
00145 };
00146
00147
00148 const LexfreqCount taglookup(const mootTagString &tag) const
00149 {
00150 LexfreqTagTable::const_iterator tagi = tagtable.find(tag);
00151 return tagi == tagtable.end() ? 0 : tagi->second;
00152 };
00153
00161 void compute_specials(void);
00162
00166 size_t n_pairs(void);
00167
00168
00169
00171 bool load(const char *filename);
00172
00174 bool load(FILE *file, const char *filename = NULL);
00175
00177 bool save(const char *filename);
00178
00180 bool save(FILE *file, const char *filename = NULL);
00181 };
00182
00183
00184 moot_END_NAMESPACE
00185
00186 #endif