High-level class to gather training data for a mootHMM or mootCHMM.
Public Types | |
Training types | |
typedef mootNgrams::Ngram | Ngram |
typedef mootNgrams::NgramCount | CountT |
typedef set< mootTagString > | TagSet |
Public Member Functions | |
Constructor / destructor | |
mootHMMTrainer (void) | |
~mootHMMTrainer (void) | |
Reset / Clear | |
void | clear (void) |
Top-level training methods | |
bool | train_from_reader (TokenReader *reader) |
bool | train_from_stream (FILE *in=stdin, const string &srcname="(unknown)") |
bool | train_from_file (const string &filename) |
bool | train_finish (void) |
Mid-level training methods | |
void | train_init (void) |
void | train_bos (void) |
void | train_token (const mootToken &curtok) |
void | train_eos (void) |
Warnings / Errors | |
void | carp (const char *fmt,...) |
Public Attributes | |
Training data | |
mootNgrams | ngrams |
mootLexfreqs | lexfreqs |
mootClassfreqs | lcfreqs |
mootTaster | taster |
Flags | |
bool | want_ngrams |
bool | want_lexfreqs |
bool | want_classfreqs |
bool | want_flavors |
Pragmatic constants | |
mootTagString | eos_tag |
Protected Attributes | |
Runtime training state | |
Ngram | ng |
bool | last_was_eos |
Type for an N-gram
Type for counts
typedef set<mootTagString> moot::mootHMMTrainer::TagSet |
Type for current tag-sets
|
inline |
Default constructor
|
inline |
Default destructor
|
inline |
Reset / clear the object.
References moot::mootLexfreqs::clear(), moot::mootClassfreqs::clear(), moot::mootNgrams::clear(), and moot::mootTaster::set_default_rules().
bool moot::mootHMMTrainer::train_from_reader | ( | TokenReader * | reader | ) |
Gather training data using TokenIO layer
bool moot::mootHMMTrainer::train_from_stream | ( | FILE * | in = stdin , |
const string & | srcname = "(unknown)" |
||
) |
Gather training data from a native text-format C-stream
bool moot::mootHMMTrainer::train_from_file | ( | const string & | filename | ) |
Gather training data from a file using mootTaggerLexer
bool moot::mootHMMTrainer::train_finish | ( | void | ) |
Finish training and compute "special" pseudo-frequencies (e.g. , flavors, etc.)
void moot::mootHMMTrainer::train_init | ( | void | ) |
Initialize training data
void moot::mootHMMTrainer::train_bos | ( | void | ) |
Initialize data for training a new sentence
void moot::mootHMMTrainer::train_token | ( | const mootToken & | curtok | ) |
Gather training information for a single token, using mootToken interface
void moot::mootHMMTrainer::train_eos | ( | void | ) |
Gather training information for a sentence boundary.
void moot::mootHMMTrainer::carp | ( | const char * | fmt, |
... | |||
) |
Error reporting
mootNgrams moot::mootHMMTrainer::ngrams |
Raw n-gram frequency data
mootLexfreqs moot::mootHMMTrainer::lexfreqs |
Raw lexical frequency data
mootClassfreqs moot::mootHMMTrainer::lcfreqs |
Raw lexical-class frequency data
mootTaster moot::mootHMMTrainer::taster |
Heuristic token classifier (default: built-in rules)
bool moot::mootHMMTrainer::want_ngrams |
Whether to gather n-gram frequency data
bool moot::mootHMMTrainer::want_lexfreqs |
Whether to gather lexical frequency data
bool moot::mootHMMTrainer::want_classfreqs |
Whether to gather lexical-class frequency data
bool moot::mootHMMTrainer::want_flavors |
Whether to gather lexical-flavor information
mootTagString moot::mootHMMTrainer::eos_tag |
String indicating end-of-sentence: this is usually __$
|
protected |
Current n-gram window
|
protected |
Stupid hack