mootDynHMM subclass for dynamic lexical probabilities
Public Member Functions | |
Constructors etc. | |
mootDynLexHMM (void) | |
virtual | ~mootDynLexHMM (void) |
virtual void | set_options (const mootDynHMMOptions &opts) |
Compilation & initialization | |
virtual bool | load_model (const string &modelname, const mootTagString &start_tag_str="__$", const char *myname="mootDynLexHMM::load_model()", bool do_estimate_nglambdas=true, bool do_estimate_wlambdas=true, bool do_estimate_clambdas=true, bool do_build_suffix_trie=true, bool do_compute_logprobs=true) |
virtual bool | compile (mootLexfreqs &lexfreqs, mootNgrams &ngrams, mootClassfreqs &classfreqs, const mootTagString &start_tag_str="__$") |
Tagging: Hooks | |
virtual void | tag_hook_pre (mootSentence &sent) |
virtual void | tag_hook_post (mootSentence &sent) |
User-level Niceties | |
virtual void | tw_put_info (moot::TokenWriter *tw) |
Tagging: Hooks: Low-level Utilities | |
void | dynlex_clear (void) |
virtual ProbT | dynlex_analysis_freq (const mootToken &tok, const mootToken::Analysis &a) |
virtual void | dynlex_populate_lexprobs (void) |
Public Member Functions inherited from moot::mootDynHMM | |
mootDynHMM (void) | |
virtual | ~mootDynHMM (void) |
void | lex_clear (void) |
void | lex_resize (TokID tokid_max) |
TokID | lex_get_tokid (const mootTokString &tok_text) |
void | tagset_resize (TagID tagid_max) |
TagID | get_tagid (const mootTagString &tagstr) |
virtual void | tag_io (TokenReader *reader, TokenWriter *writer) |
virtual void | tag_sentence (mootSentence &sentence) |
Public Member Functions inherited from moot::mootHMM | |
mootHMM (void) | |
virtual | ~mootHMM (void) |
void | clear (bool wipe_everything=true, bool unlogify=false) |
bool | save (const char *filename, int compression_level=(-1)) |
bool | save (mootio::mostream *obs, const char *filename=__null) |
bool | _bindump (mootio::mostream *obs, const mootBinIO::HeaderInfo &hdr, const char *filename=__null) |
bool | load (const char *filename=__null) |
bool | load (mootio::mistream *ibs, const char *filename=__null) |
bool | _binload (mootio::mistream *ibs, const mootBinIO::HeaderInfo &hdr, const char *filename=__null) |
void | unknown_token_name (const mootTokString &name) |
void | unknown_tag_name (const mootTokString &name) |
void | unknown_class_name (const mootTagSet &tagset) |
virtual bool | compile (const mootLexfreqs &lexfreqs, const mootNgrams &ngrams, const mootClassfreqs &classfreqs, const mootTagString &start_tag_str="__$", const mootTaster &mtaster=builtinTaster) |
void | assign_ids_fl (void) |
void | assign_ids_lf (const mootLexfreqs &lexfreqs) |
void | assign_ids_ng (const mootNgrams &ngrams) |
void | assign_ids_cf (const mootClassfreqs &classfreqs) |
void | compile_unknown_lexclass (const mootClassfreqs &classfreqs) |
bool | estimate_lambdas (const mootNgrams &ngrams) |
bool | estimate_wlambdas (const mootLexfreqs &lf) |
bool | estimate_clambdas (const mootClassfreqs &cf) |
bool | build_suffix_trie (const mootLexfreqs &lf, const mootNgrams &ng, bool verbose=false) |
bool | compute_logprobs (void) |
void | set_ngram_prob (ProbT p, TagID t1=0, TagID t2=0, TagID t3=0) |
void | tag_sentence (mootSentence &sentence) |
virtual void | tag_stream (TokenReader *reader, TokenWriter *writer) |
void | viterbi_clear (void) |
void | viterbi_step (const mootToken &token) |
void | viterbi_step (TokID tokid, const LexClass &lexclass, const mootTokString &toktext="") |
void | viterbi_step (TokID tokid, ClassID classid, const LexClass &lclass, const mootTokString &toktext="") |
void | viterbi_step (TokID tokid, const mootTokString &toktext="") |
void | viterbi_step (const mootTokString &token_text) |
void | viterbi_step (const mootTokString &token_text, const set< mootTagString > &tags) |
void | viterbi_step (TokID tokid, TagID tagid, ViterbiColumn *col=__null) |
void | viterbi_step (const mootTokString &toktext, const mootTagString &tag) |
void | viterbi_finish (const TagID final_tagid) |
void | viterbi_finish (void) |
void | viterbi_flush (TokenWriter *writer, mootSentence &toks, ViterbiNode *nod) |
void | tag_mark_best (mootSentence &sentence) |
void | tag_mark_best (ViterbiPathNode *pnod, mootSentence &sentence) |
void | tag_dump_trace (mootSentence &sentence, bool dumpPredict=false) |
ViterbiPathNode * | viterbi_best_path (void) |
ViterbiPathNode * | viterbi_best_path (TagID tagid) |
ViterbiPathNode * | viterbi_best_path (const mootTagString &tagstr) |
ViterbiNode * | viterbi_best_node (void) |
ViterbiNode * | viterbi_best_node (TagID tagid) |
ViterbiNode * | viterbi_flushable_node (void) |
ViterbiPathNode * | viterbi_node_path (ViterbiNode *node) |
bool | viterbi_column_ok (const ViterbiColumn *col) const |
ViterbiColumn * | viterbi_populate_row (TagID curtagid, ProbT wordpr=0.0, ViterbiColumn *col=__null, ProbT probmin=1.0) |
void | viterbi_clear_bestpath (void) |
void | _viterbi_step_fallback (TokID tokid, ViterbiColumn *col) |
ViterbiNode * | viterbi_get_node (void) |
ViterbiRow * | viterbi_get_row (void) |
ViterbiColumn * | viterbi_get_column (void) |
ViterbiPathNode * | viterbi_get_pathnode (void) |
TokID | token2id (const mootTokString &token) const |
void | token2lexclass (const mootToken &token, LexClass &tok_class) const |
LexClass * | tagset2lexclass (const mootTagSet &tagset, LexClass *lclass=__null, bool add_tagids=false) |
ClassID | class2id (const LexClass &lclass, bool autopopulate=true, bool autocreate=true) |
const ProbT | wordp (const TokID tokid, const TagID tagid) const |
const ProbT | wordp (const mootTokString &tokstr, const mootTagString &tagstr) const |
const ProbT | classp (const ClassID classid, const TagID tagid) const |
const ProbT | classp (const LexClass &lclass, const mootTagString &tagstr) const |
const ProbT | tagp (const TagID tagid) const |
const ProbT | tagp (const mootTagString &tag) const |
const ProbT | tagp (const TagID prevtagid, const TagID tagid) const |
const ProbT | tagp (const mootTagString &prevtag, const mootTagString &tag) const |
const ProbT | tagp (const Trigram &trigram, ProbT ProbZero=-1E+38) const |
const ProbT | tagp (const TagID prevtagid2, const TagID prevtagid1, const TagID tagid) const |
const ProbT | tagp (const mootTagString &prevtag2, const mootTagString &prevtag1, const mootTagString &tag) const |
void | carp (const char *fmt,...) |
void | txtdump (FILE *file, bool dump_constants=true, bool dump_lexprobs=true, bool dump_classprobs=true, bool dump_suftrie=true, bool dump_ngprobs=true) |
void | viterbi_txtdump (TokenWriter *w, int ncols=0) |
void | viterbi_txtdump_col (TokenWriter *w, ViterbiColumn *col, int colnum=0) |
Public Attributes | |
bool | invert_lexp |
TagStr | newtag_str |
TagID | newtag_id |
ProbT | newtag_f |
TagTokProbMap | Ftw |
TokProbMap | Fw |
TokProbMap | Ft |
ProbT | Ftw_eps |
size_t | tagids_size_orig |
Public Attributes inherited from moot::mootHMM | |
int | verbose |
size_t | ndots |
bool | save_ambiguities |
bool | save_flavors |
bool | save_mark_unknown |
bool | hash_ngrams |
bool | relax |
bool | use_lex_classes |
bool | use_flavors |
TagID | start_tagid |
ProbT | unknown_lex_threshhold |
ProbT | unknown_class_threshhold |
LexClass | uclass |
ProbT | nglambda1 |
ProbT | nglambda2 |
ProbT | nglambda3 |
ProbT | wlambda0 |
ProbT | wlambda1 |
ProbT | clambda0 |
ProbT | clambda1 |
ProbT | beamwd |
TokIDTable | tokids |
TagIDTable | tagids |
ClassIDTable | classids |
mootTaster | taster |
size_t | n_tags |
size_t | n_toks |
size_t | n_classes |
LexProbTable | lexprobs |
LexClassProbTable | lcprobs |
NgramProbHash | ngprobsh |
NgramProbArray | ngprobsa |
SuffixTrie | suftrie |
ViterbiColumn * | vtable |
size_t | nsents |
size_t | ntokens |
size_t | nnewtokens |
size_t | nunclassed |
size_t | nnewclasses |
size_t | nunknown |
size_t | nfallbacks |
Additional Inherited Members | |
Protected Attributes inherited from moot::mootHMM | |
ViterbiNode * | trash_nodes |
ViterbiRow * | trash_rows |
ViterbiColumn * | trash_columns |
ViterbiPathNode * | trash_pathnodes |
TagID | vtagid |
ProbT | vbestpr |
ProbT | vtagpr |
ProbT | vwordpr |
ViterbiNode * | vbestpn |
ViterbiPathNode * | vbestpath |
useful alias
useful alias
typedef std::map<TokStr,ProbT> moot::mootDynLexHMM::TokProbMap |
lexical string (sub-)map: w -> p(w|t)
typedef std::map<TagStr,ProbT> moot::mootDynLexHMM::TagProbMap |
lexical string (sub-)map: t -> p(t)
typedef std::map<TagStr,TokProbMap> moot::mootDynLexHMM::TagTokProbMap |
lexical string map: t -> (w -> p(w|t))
typedef std::map<TagStr,TagProbMap> moot::mootDynLexHMM::TokTagProbMap |
lexical string map: w -> (t -> p(w|t))
|
inline |
|
inlinevirtual |
|
inlinevirtual |
Set generic user-level options.
Reimplemented from moot::mootDynHMM.
Reimplemented in moot::mootDynLexHMM_Boltzmann.
References moot::mootDynHMMOptions::Ftw_eps, moot::mootDynHMMOptions::invert_lexp, and moot::mootDynHMMOptions::newtag_str.
Referenced by moot::mootDynLexHMM_Boltzmann::set_options().
|
virtual |
load a binary or text mode
Reimplemented from moot::mootHMM.
|
virtual |
compile a text model; ensures newtag_id is defined
|
virtual |
Prepare dynamic lexicon for tagging sent
. Default implementation does nothing.
Reimplemented from moot::mootDynHMM.
|
virtual |
Cleanup dynamic lexicon after tagging sent
. Default implementation does nothing.
Reimplemented from moot::mootDynHMM.
|
virtual |
Write some debugging header information to an output stream
Reimplemented from moot::mootDynHMM.
Reimplemented in moot::mootDynLexHMM_Boltzmann.
Referenced by moot::mootDynLexHMM_Boltzmann::tw_put_info().
void moot::mootDynLexHMM::dynlex_clear | ( | void | ) |
Clear dynamic lexica
|
inlinevirtual |
Estimate pseudo-frequency for the tag associated with analysis a
of token tok
. Called by tag_hook_pre() for each vanilla (token,analysis) in the input sentence; returned value is used to (re-)populate Ftw, Fw, and Ft data fields for each input sentence.
Has no effect with default tag_hook_pre() if returned pseudo-frequency is <=0. Default implementation just returns a.prob + Ftw_eps
Reimplemented in moot::mootDynLexHMM_Boltzmann.
References moot::mootDynHMMOptions::Ftw_eps, and moot::mootToken::Analysis::prob.
|
virtual |
Converts pseudo-frequency fields Ftw, Ft, and Fw to mootHMM::lexprobs. Sets lexprobs[w][t] = log( wlambda1 * Ftw(w,t)/Z(w,t) )
bool moot::mootDynLexHMM::invert_lexp |
If true (the default), dynamic lexical probabilities will be estimated (incorrectly) as p(w|t) := f(w,t)/f(w) == p(t|w)
, rather than p(w|t) := f(w,t)/f(w)
.
Despite incorrectness, true is the default value here, which at least makes some sense for dynamic lexical maps which are functions of the input token's text type (mootToken::text()).
TagStr moot::mootDynLexHMM::newtag_str |
tag string to copy for "missing" tags (default="@NEW")
TagID moot::mootDynLexHMM::newtag_id |
ID for "missing" tags
ProbT moot::mootDynLexHMM::newtag_f |
Raw frequency for 'new' tag, if not already in model. Default=0.5
TagTokProbMap moot::mootDynLexHMM::Ftw |
pseudo-frequency lexicon: t -> (w -> f(w,t))
TokProbMap moot::mootDynLexHMM::Fw |
pseudo-frequency (sub-)lexicon: w -> f(w)
TokProbMap moot::mootDynLexHMM::Ft |
pseudo-frequency (sub-)lexicon: t -> f(t)
ProbT moot::mootDynLexHMM::Ftw_eps |
Raw pseudo-frequency smoothing constant (non-log) for f(w,t)
size_t moot::mootDynLexHMM::tagids_size_orig |
original size of tagids