00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029 #ifndef _moot_LEXFREQS_H
00030 #define _moot_LEXFREQS_H
00031
00032 #include <mootTypes.h>
00033 moot_BEGIN_NAMESPACE
00034
00038 class mootLexfreqs {
00039 public:
00040
00041
00043 typedef CountT LexfreqCount;
00044
00048 typedef map<mootTagString,LexfreqCount> LexfreqSubtable;
00049
00053 class LexfreqEntry {
00054 public:
00055 LexfreqCount count;
00056 LexfreqSubtable freqs;
00057 public:
00058 LexfreqEntry(const LexfreqCount tok_count=0)
00059 : count(tok_count)
00060 {};
00061 LexfreqEntry(const LexfreqCount tok_count,
00062 const LexfreqSubtable &tok_tagfreqs)
00063 : count(tok_count), freqs(tok_tagfreqs)
00064 {};
00066 void clear(void) {
00067 count = 0;
00068 freqs.clear();
00069 };
00070 };
00071
00075 typedef hash_map<mootTokString,LexfreqEntry> LexfreqTokTable;
00076
00078 typedef hash_map<mootTagString,LexfreqCount> LexfreqTagTable;
00079
00080 public:
00081
00082 LexfreqTokTable lftable;
00083 LexfreqTagTable tagtable;
00084 LexfreqCount n_tokens;
00086 public:
00087
00089 mootLexfreqs(size_t initial_bucket_count=0) : n_tokens(0)
00090 {
00091 if (initial_bucket_count != 0) {
00092 lftable.resize(initial_bucket_count);
00093 }
00094 };
00095
00097 ~mootLexfreqs() {
00098 clear();
00099 }
00100
00101
00102
00104 void clear(void);
00105
00107 inline void add_count(const mootTokString &text,
00108 const mootTagString &tag,
00109 const LexfreqCount count)
00110 {
00111
00112 LexfreqTokTable::iterator lfi = lftable.find(text);
00113 if (lfi == lftable.end()) {
00114
00115 lfi = lftable.insert(LexfreqTokTable::value_type(text,LexfreqEntry(count))).first;
00116 lfi->second.freqs[tag] = count;
00117 } else {
00118
00119 lfi->second.count += count;
00120
00121 LexfreqSubtable::iterator lsi = lfi->second.freqs.find(tag);
00122 if (lsi == lfi->second.freqs.end()) {
00123
00124 lfi->second.freqs[tag] = count;
00125 } else {
00126
00127 lsi->second += count;
00128 }
00129 }
00130
00131
00132 LexfreqTagTable::iterator lftagi = tagtable.find(tag);
00133 if (lftagi != tagtable.end()) {
00134 lftagi->second += count;
00135 } else {
00136 tagtable[tag] = count;
00137 }
00138
00139
00140 n_tokens += count;
00141 };
00142
00143
00144 const LexfreqCount taglookup(const mootTagString &tag) const
00145 {
00146 LexfreqTagTable::const_iterator tagi = tagtable.find(tag);
00147 return tagi == tagtable.end() ? 0 : tagi->second;
00148 };
00149
00157 void compute_specials(void);
00158
00162 size_t n_pairs(void);
00163
00164
00165
00167 bool load(const char *filename);
00168
00170 bool load(FILE *file, const char *filename = NULL);
00171
00173 bool save(const char *filename);
00174
00176 bool save(FILE *file, const char *filename = NULL);
00177 };
00178
00179
00180 moot_END_NAMESPACE
00181
00182 #endif