Class for storage and retrieval of raw lexical frequencies.
Classes | |
class | LexfreqEntry |
Public Types | |
typedef CountT | LexfreqCount |
typedef map< mootTagString, LexfreqCount > | LexfreqSubtable |
typedef hash_map< mootTokString, LexfreqEntry > | LexfreqTokTable |
typedef hash_map< mootTagString, LexfreqCount > | LexfreqTagTable |
Public Member Functions | |
mootLexfreqs (size_t initial_bucket_count=0) | |
~mootLexfreqs () | |
void | clear (void) |
void | add_count (const mootTokString &text, const mootTagString &tag, const LexfreqCount count) |
void | remove_word (const mootTokString &text) |
LexfreqCount | f_word (const mootTokString &w) const |
LexfreqCount | f_tag (const mootTagString &tag) const |
LexfreqCount | f_word_tag (const mootTokString &w, const mootTagString &tag) const |
void | compute_specials (bool compute_unknown=true) |
void | remove_specials (bool remove_unknown=true) |
void | discount_specials (CountT zf_special=1.0) |
size_t | n_pairs (void) |
bool | load (const char *filename) |
bool | load (FILE *file, const char *filename=__null) |
bool | save (const char *filename) |
bool | save (FILE *file, const char *filename=__null) |
Public Attributes | |
LexfreqTokTable | lftable |
LexfreqTagTable | tagtable |
LexfreqCount | n_tokens |
LexfreqCount | unknown_threshhold |
const mootTaster * | taster |
Type for a single lexeme+tag co-occurrence count
typedef map<mootTagString,LexfreqCount> moot::mootLexfreqs::LexfreqSubtable |
Type for frequency lookup subtables.
typedef hash_map<mootTokString,LexfreqEntry> moot::mootLexfreqs::LexfreqTokTable |
Type for the lexical frequency lookup table.
typedef hash_map<mootTagString,LexfreqCount> moot::mootLexfreqs::LexfreqTagTable |
Lookup table: tag->Count(tag)
|
inline |
Default constructor
|
inline |
Default destructor
References add_count(), moot::mootLexfreqs::LexfreqEntry::clear(), moot::mootLexfreqs::LexfreqEntry::count, and remove_word().
void moot::mootLexfreqs::clear | ( | void | ) |
Clear internal table(s)
Referenced by moot::mootHMMTrainer::clear().
void moot::mootLexfreqs::add_count | ( | const mootTokString & | text, |
const mootTagString & | tag, | ||
const LexfreqCount | count | ||
) |
Add 'count' to the current count for (token,tag)
Referenced by ~mootLexfreqs().
void moot::mootLexfreqs::remove_word | ( | const mootTokString & | text | ) |
Remove entry for a word
Referenced by ~mootLexfreqs().
|
inline |
get total frequency of a text type ("token")
|
inline |
get frequency of a tag
|
inline |
get total frequency of a (word,tag) pair
References compute_specials(), discount_specials(), load(), n_pairs(), remove_specials(), and save().
void moot::mootLexfreqs::compute_specials | ( | bool | compute_unknown = true | ) |
Compute counts for 'special' pseudo-lexemes to the object. These include all flavors defined by taster (if specified and non-null), as well as the special token. You should have set taster before calling this method.
compute_unknown | whether to also compute entry |
Referenced by f_word_tag().
void moot::mootLexfreqs::remove_specials | ( | bool | remove_unknown = true | ) |
Remove entries for 'special' pseudo-lexemes from the object. You should have set taster before calling this method.
taster | mootTaster for determining which lexemes to remove |
compute_unknown | whether to also remove entry |
Referenced by f_word_tag().
void moot::mootLexfreqs::discount_specials | ( | CountT | zf_special = 1.0 | ) |
Discount pseudo-frequencies for 'special' pseudo-lexemes.
zf_special | total frequency mass to alot for 'special' pseudo-lexemes. |
Referenced by f_word_tag().
size_t moot::mootLexfreqs::n_pairs | ( | void | ) |
Return the number of distinct (token,tag) pairs we've counted.
Referenced by f_word_tag().
bool moot::mootLexfreqs::load | ( | const char * | filename | ) |
Load data from a TnT-style parameter file
Referenced by f_word_tag().
bool moot::mootLexfreqs::load | ( | FILE * | file, |
const char * | filename = __null |
||
) |
Load data from a TnT-style parameter file (stream version)
bool moot::mootLexfreqs::save | ( | const char * | filename | ) |
Save data to a TnT-style paramater file
Referenced by f_word_tag().
bool moot::mootLexfreqs::save | ( | FILE * | file, |
const char * | filename = __null |
||
) |
Save data to a TnT-style paramater file (stream version)
LexfreqTokTable moot::mootLexfreqs::lftable |
lexeme->(tag->count) lookup table
LexfreqTagTable moot::mootLexfreqs::tagtable |
tag->count lookup table
LexfreqCount moot::mootLexfreqs::n_tokens |
total number of tokens counted
LexfreqCount moot::mootLexfreqs::unknown_threshhold |
maximum frequency for special lexeme (default=1)
const mootTaster* moot::mootLexfreqs::taster |
regex-based token flavor heuristics (default=builtin; NULL for none)