mootDynHMM subclass for dynamic lexical probabilities

Inheritance diagram for moot::mootDynLexHMM:

Collaboration diagram for moot::mootDynLexHMM:

[legend]

Public Types
typedef mootTokString	TokStr

typedef mootTagString	TagStr

typedef std::map< TokStr, ProbT >	TokProbMap

typedef std::map< TagStr, ProbT >	TagProbMap

typedef std::map< TagStr, TokProbMap >	TagTokProbMap

typedef std::map< TagStr, TagProbMap >	TokTagProbMap

Public Types inherited from moot::mootHMM
typedef mootEnumID	TagID

typedef mootEnumID	TokID

typedef mootEnumID	FlavorID

typedef mootEnumID	ClassID

typedef moot::VerbosityLevel	VerbosityLevel

typedef set< TagID >	LexClass

typedef mootEnum< mootTagString, __gnu_cxx::hash< mootTagString >, equal_to< mootTagString > >	TagIDTable

typedef mootEnum< mootTokString, __gnu_cxx::hash< mootTokString >, equal_to< mootTokString > >	TokIDTable

typedef mootEnum< LexClass, LexClassHash, LexClassEqual >	ClassIDTable

typedef AssocVector< TagID, ProbT >	LexProbSubTable

typedef LexProbSubTable	LexClassProbSubTable

typedef vector< LexProbSubTable >	LexProbTable

typedef LexProbTable	LexClassProbTable

typedef ProbT *	UnigramProbTable

typedef hash_map< Trigram, ProbT, Trigram::HashFcn, Trigram::EqualFcn >	TrigramProbHash

typedef ProbT *	TrigramProbArray
	Type for uni-, bi- and trigram probability lookup table. More...

typedef Trigram	NgramProbKey
	Generic n-gram key: trigrams. More...

typedef TrigramProbHash	NgramProbHash
	Generic n-gram probabilities: trigrams, hashed. More...

typedef TrigramProbArray	NgramProbArray
	Generic n-gram probabilities: trigrams, dense. More...

Public Member Functions
Constructors etc.
	mootDynLexHMM (void)

virtual	~mootDynLexHMM (void)

virtual void	set_options (const mootDynHMMOptions &opts)

Compilation & initialization
virtual bool	load_model (const string &modelname, const mootTagString &start_tag_str="__$", const char *myname="mootDynLexHMM::load_model()", bool do_estimate_nglambdas=true, bool do_estimate_wlambdas=true, bool do_estimate_clambdas=true, bool do_build_suffix_trie=true, bool do_compute_logprobs=true)

virtual bool	compile (mootLexfreqs &lexfreqs, mootNgrams &ngrams, mootClassfreqs &classfreqs, const mootTagString &start_tag_str="__$")

Tagging: Hooks
virtual void	tag_hook_pre (mootSentence &sent)

virtual void	tag_hook_post (mootSentence &sent)

User-level Niceties
virtual void	tw_put_info (moot::TokenWriter *tw)

Tagging: Hooks: Low-level Utilities
void	dynlex_clear (void)

virtual ProbT	dynlex_analysis_freq (const mootToken &tok, const mootToken::Analysis &a)

virtual void	dynlex_populate_lexprobs (void)

Public Member Functions inherited from moot::mootDynHMM
	mootDynHMM (void)

virtual	~mootDynHMM (void)

void	lex_clear (void)

void	lex_resize (TokID tokid_max)

TokID	lex_get_tokid (const mootTokString &tok_text)

void	tagset_resize (TagID tagid_max)

TagID	get_tagid (const mootTagString &tagstr)

virtual void	tag_io (TokenReader reader, TokenWriter writer)

virtual void	tag_sentence (mootSentence &sentence)

Public Member Functions inherited from moot::mootHMM
	mootHMM (void)

virtual	~mootHMM (void)

void	clear (bool wipe_everything=true, bool unlogify=false)

bool	save (const char *filename, int compression_level=(-1))

bool	save (mootio::mostream obs, const char filename=__null)

bool	_bindump (mootio::mostream obs, const mootBinIO::HeaderInfo &hdr, const char filename=__null)

bool	load (const char *filename=__null)

bool	load (mootio::mistream ibs, const char filename=__null)

bool	_binload (mootio::mistream ibs, const mootBinIO::HeaderInfo &hdr, const char filename=__null)

void	unknown_token_name (const mootTokString &name)

void	unknown_tag_name (const mootTokString &name)

void	unknown_class_name (const mootTagSet &tagset)

virtual bool	compile (const mootLexfreqs &lexfreqs, const mootNgrams &ngrams, const mootClassfreqs &classfreqs, const mootTagString &start_tag_str="__$", const mootTaster &mtaster=builtinTaster)

void	assign_ids_fl (void)

void	assign_ids_lf (const mootLexfreqs &lexfreqs)

void	assign_ids_ng (const mootNgrams &ngrams)

void	assign_ids_cf (const mootClassfreqs &classfreqs)

void	compile_unknown_lexclass (const mootClassfreqs &classfreqs)

bool	estimate_lambdas (const mootNgrams &ngrams)

bool	estimate_wlambdas (const mootLexfreqs &lf)

bool	estimate_clambdas (const mootClassfreqs &cf)

bool	build_suffix_trie (const mootLexfreqs &lf, const mootNgrams &ng, bool verbose=false)

bool	compute_logprobs (void)

void	set_ngram_prob (ProbT p, TagID t1=0, TagID t2=0, TagID t3=0)

void	tag_sentence (mootSentence &sentence)

virtual void	tag_stream (TokenReader reader, TokenWriter writer)

void	viterbi_clear (void)

void	viterbi_step (const mootToken &token)

void	viterbi_step (TokID tokid, const LexClass &lexclass, const mootTokString &toktext="")

void	viterbi_step (TokID tokid, ClassID classid, const LexClass &lclass, const mootTokString &toktext="")

void	viterbi_step (TokID tokid, const mootTokString &toktext="")

void	viterbi_step (const mootTokString &token_text)

void	viterbi_step (const mootTokString &token_text, const set< mootTagString > &tags)

void	viterbi_step (TokID tokid, TagID tagid, ViterbiColumn *col=__null)

void	viterbi_step (const mootTokString &toktext, const mootTagString &tag)

void	viterbi_finish (const TagID final_tagid)

void	viterbi_finish (void)

void	viterbi_flush (TokenWriter writer, mootSentence &toks, ViterbiNode nod)

void	tag_mark_best (mootSentence &sentence)

void	tag_mark_best (ViterbiPathNode *pnod, mootSentence &sentence)

void	tag_dump_trace (mootSentence &sentence, bool dumpPredict=false)

ViterbiPathNode *	viterbi_best_path (void)

ViterbiPathNode *	viterbi_best_path (TagID tagid)

ViterbiPathNode *	viterbi_best_path (const mootTagString &tagstr)

ViterbiNode *	viterbi_best_node (void)

ViterbiNode *	viterbi_best_node (TagID tagid)

ViterbiNode *	viterbi_flushable_node (void)

ViterbiPathNode *	viterbi_node_path (ViterbiNode *node)

bool	viterbi_column_ok (const ViterbiColumn *col) const

ViterbiColumn *	viterbi_populate_row (TagID curtagid, ProbT wordpr=0.0, ViterbiColumn *col=__null, ProbT probmin=1.0)

void	viterbi_clear_bestpath (void)

void	_viterbi_step_fallback (TokID tokid, ViterbiColumn *col)

ViterbiNode *	viterbi_get_node (void)

ViterbiRow *	viterbi_get_row (void)

ViterbiColumn *	viterbi_get_column (void)

ViterbiPathNode *	viterbi_get_pathnode (void)

TokID	token2id (const mootTokString &token) const

void	token2lexclass (const mootToken &token, LexClass &tok_class) const

LexClass *	tagset2lexclass (const mootTagSet &tagset, LexClass *lclass=__null, bool add_tagids=false)

ClassID	class2id (const LexClass &lclass, bool autopopulate=true, bool autocreate=true)

const ProbT	wordp (const TokID tokid, const TagID tagid) const

const ProbT	wordp (const mootTokString &tokstr, const mootTagString &tagstr) const

const ProbT	classp (const ClassID classid, const TagID tagid) const

const ProbT	classp (const LexClass &lclass, const mootTagString &tagstr) const

const ProbT	tagp (const TagID tagid) const

const ProbT	tagp (const mootTagString &tag) const

const ProbT	tagp (const TagID prevtagid, const TagID tagid) const

const ProbT	tagp (const mootTagString &prevtag, const mootTagString &tag) const

const ProbT	tagp (const Trigram &trigram, ProbT ProbZero=-1E+38) const

const ProbT	tagp (const TagID prevtagid2, const TagID prevtagid1, const TagID tagid) const

const ProbT	tagp (const mootTagString &prevtag2, const mootTagString &prevtag1, const mootTagString &tag) const

void	carp (const char *fmt,...)

void	txtdump (FILE *file, bool dump_constants=true, bool dump_lexprobs=true, bool dump_classprobs=true, bool dump_suftrie=true, bool dump_ngprobs=true)

void	viterbi_txtdump (TokenWriter *w, int ncols=0)

void	viterbi_txtdump_col (TokenWriter w, ViterbiColumn col, int colnum=0)

Public Attributes
bool	invert_lexp

TagStr	newtag_str

TagID	newtag_id

ProbT	newtag_f

TagTokProbMap	Ftw

TokProbMap	Fw

TokProbMap	Ft

ProbT	Ftw_eps

size_t	tagids_size_orig

Public Attributes inherited from moot::mootHMM
int	verbose

size_t	ndots

bool	save_ambiguities

bool	save_flavors

bool	save_mark_unknown

bool	hash_ngrams

bool	relax

bool	use_lex_classes

bool	use_flavors

TagID	start_tagid

ProbT	unknown_lex_threshhold

ProbT	unknown_class_threshhold

LexClass	uclass

ProbT	nglambda1

ProbT	nglambda2

ProbT	nglambda3

ProbT	wlambda0

ProbT	wlambda1

ProbT	clambda0

ProbT	clambda1

ProbT	beamwd

TokIDTable	tokids

TagIDTable	tagids

ClassIDTable	classids

mootTaster	taster

size_t	n_tags

size_t	n_toks

size_t	n_classes

LexProbTable	lexprobs

LexClassProbTable	lcprobs

NgramProbHash	ngprobsh

NgramProbArray	ngprobsa

SuffixTrie	suftrie

ViterbiColumn *	vtable

size_t	nsents

size_t	ntokens

size_t	nnewtokens

size_t	nunclassed

size_t	nnewclasses

size_t	nunknown

size_t	nfallbacks

Additional Inherited Members
Protected Attributes inherited from moot::mootHMM
ViterbiNode *	trash_nodes

ViterbiRow *	trash_rows

ViterbiColumn *	trash_columns

ViterbiPathNode *	trash_pathnodes

TagID	vtagid

ProbT	vbestpr

ProbT	vtagpr

ProbT	vwordpr

ViterbiNode *	vbestpn

ViterbiPathNode *	vbestpath

Member Typedef Documentation

◆ TokStr

typedef mootTokString moot::mootDynLexHMM::TokStr

useful alias

◆ TagStr

typedef mootTagString moot::mootDynLexHMM::TagStr

useful alias

◆ TokProbMap

typedef std::map<TokStr,ProbT> moot::mootDynLexHMM::TokProbMap

lexical string (sub-)map: w -> p(w|t)

◆ TagProbMap

typedef std::map<TagStr,ProbT> moot::mootDynLexHMM::TagProbMap

lexical string (sub-)map: t -> p(t)

◆ TagTokProbMap

typedef std::map<TagStr,TokProbMap> moot::mootDynLexHMM::TagTokProbMap

lexical string map: t -> (w -> p(w|t))

◆ TokTagProbMap

typedef std::map<TagStr,TagProbMap> moot::mootDynLexHMM::TokTagProbMap

lexical string map: w -> (t -> p(w|t))

Constructor & Destructor Documentation

◆ mootDynLexHMM()

moot::mootDynLexHMM::mootDynLexHMM ( void )

inline

◆ ~mootDynLexHMM()

virtual moot::mootDynLexHMM::~mootDynLexHMM ( void )

inlinevirtual

Member Function Documentation

◆ set_options()

virtual void moot::mootDynLexHMM::set_options ( const mootDynHMMOptions & opts )

inlinevirtual

Set generic user-level options.

Reimplemented from moot::mootDynHMM.

Reimplemented in moot::mootDynLexHMM_Boltzmann.

References moot::mootDynHMMOptions::Ftw_eps, moot::mootDynHMMOptions::invert_lexp, and moot::mootDynHMMOptions::newtag_str.

Referenced by moot::mootDynLexHMM_Boltzmann::set_options().

◆ load_model()

virtual bool moot::mootDynLexHMM::load_model	(	const string &	modelname,
		const mootTagString &	start_tag_str = `"__$"`,
		const char *	myname = `"mootDynLexHMM::load_model()"`,
		bool	do_estimate_nglambdas = `true`,
		bool	do_estimate_wlambdas = `true`,
		bool	do_estimate_clambdas = `true`,
		bool	do_build_suffix_trie = `true`,
		bool	do_compute_logprobs = `true`
	)

virtual

load a binary or text mode

Reimplemented from moot::mootHMM.

◆ compile()

virtual bool moot::mootDynLexHMM::compile	(	mootLexfreqs &	lexfreqs,
		mootNgrams &	ngrams,
		mootClassfreqs &	classfreqs,
		const mootTagString &	start_tag_str = `"__$"`
	)

virtual

compile a text model; ensures newtag_id is defined

◆ tag_hook_pre()

virtual void moot::mootDynLexHMM::tag_hook_pre ( mootSentence & sent )

virtual

Prepare dynamic lexicon for tagging sent. Default implementation does nothing.

Reimplemented from moot::mootDynHMM.

◆ tag_hook_post()

virtual void moot::mootDynLexHMM::tag_hook_post ( mootSentence & sent )

virtual

Cleanup dynamic lexicon after tagging sent. Default implementation does nothing.

Reimplemented from moot::mootDynHMM.

◆ tw_put_info()

virtual void moot::mootDynLexHMM::tw_put_info ( moot::TokenWriter * tw )

virtual

Write some debugging header information to an output stream

Reimplemented from moot::mootDynHMM.

Reimplemented in moot::mootDynLexHMM_Boltzmann.

Referenced by moot::mootDynLexHMM_Boltzmann::tw_put_info().

◆ dynlex_clear()

void moot::mootDynLexHMM::dynlex_clear ( void )

Clear dynamic lexica

◆ dynlex_analysis_freq()

virtual ProbT moot::mootDynLexHMM::dynlex_analysis_freq	(	const mootToken &	tok,
		const mootToken::Analysis &	a
	)

inlinevirtual

Estimate pseudo-frequency for the tag associated with analysis a of token tok. Called by tag_hook_pre() for each vanilla (token,analysis) in the input sentence; returned value is used to (re-)populate Ftw, Fw, and Ft data fields for each input sentence.

Has no effect with default tag_hook_pre() if returned pseudo-frequency is <=0. Default implementation just returns a.prob + Ftw_eps

Returns: pseudo-frequency f ~= f(tok.text(),a.tag).

Reimplemented in moot::mootDynLexHMM_Boltzmann.

References moot::mootDynHMMOptions::Ftw_eps, and moot::mootToken::Analysis::prob.

◆ dynlex_populate_lexprobs()

virtual void moot::mootDynLexHMM::dynlex_populate_lexprobs ( void )

virtual

Converts pseudo-frequency fields Ftw, Ft, and Fw to mootHMM::lexprobs. Sets lexprobs[w][t] = log( wlambda1 * Ftw(w,t)/Z(w,t) )

Member Data Documentation

◆ invert_lexp

bool moot::mootDynLexHMM::invert_lexp

If true (the default), dynamic lexical probabilities will be estimated (incorrectly) as p(w|t) := f(w,t)/f(w) == p(t|w), rather than p(w|t) := f(w,t)/f(w).

Despite incorrectness, true is the default value here, which at least makes some sense for dynamic lexical maps which are functions of the input token's text type (mootToken::text()).

◆ newtag_str

TagStr moot::mootDynLexHMM::newtag_str

tag string to copy for "missing" tags (default="@NEW")

◆ newtag_id

TagID moot::mootDynLexHMM::newtag_id

ID for "missing" tags

◆ newtag_f

ProbT moot::mootDynLexHMM::newtag_f

Raw frequency for 'new' tag, if not already in model. Default=0.5

◆ Ftw

TagTokProbMap moot::mootDynLexHMM::Ftw

pseudo-frequency lexicon: t -> (w -> f(w,t))

◆ Fw

TokProbMap moot::mootDynLexHMM::Fw

pseudo-frequency (sub-)lexicon: w -> f(w)

◆ Ft

TokProbMap moot::mootDynLexHMM::Ft

pseudo-frequency (sub-)lexicon: t -> f(t)

◆ Ftw_eps

ProbT moot::mootDynLexHMM::Ftw_eps

Raw pseudo-frequency smoothing constant (non-log) for f(w,t)

◆ tagids_size_orig

size_t moot::mootDynLexHMM::tagids_size_orig

original size of tagids

The documentation for this class was generated from the following file:

mootDynHMM.h

Public Types

Public Member Functions

Public Attributes

Additional Inherited Members

Member Typedef Documentation

◆ TokStr

◆ TagStr

◆ TokProbMap

◆ TagProbMap

◆ TagTokProbMap

◆ TokTagProbMap

Constructor & Destructor Documentation

◆ mootDynLexHMM()

◆ ~mootDynLexHMM()

Member Function Documentation

◆ set_options()

◆ load_model()

◆ compile()

◆ tag_hook_pre()

◆ tag_hook_post()

◆ tw_put_info()

◆ dynlex_clear()

◆ dynlex_analysis_freq()

◆ dynlex_populate_lexprobs()

Member Data Documentation

◆ invert_lexp

◆ newtag_str

◆ newtag_id

◆ newtag_f

◆ Ftw

◆ Fw

◆ Ft

◆ Ftw_eps

◆ tagids_size_orig