Top-level class for suffix tries.
Public Types | |
typedef mootEnumID | TagID |
typedef mootEnum< mootTagString > | TagIDTable |
typedef TrieVector< SuffixTrieDataT > | TrieType |
Public Types inherited from moot::TrieVector< SuffixTrieDataT > | |
typedef SuffixTrieDataT | data_type |
typedef char | char_type |
typedef unsigned char | uchar_type |
typedef TrieVector< data_type, char_type, uchar_type > | trie_type |
typedef TrieVectorNode< data_type, char_type, uchar_type > | node_type |
typedef std::vector< node_type > | vector_type |
typedef vector_type::iterator | iterator |
typedef vector_type::const_iterator | const_iterator |
typedef std::string | string_type |
typedef string_type::iterator | string_iterator |
typedef string_type::const_iterator | const_string_iterator |
typedef string_type::reverse_iterator | reverse_string_iterator |
typedef string_type::const_reverse_iterator | const_reverse_string_iterator |
typedef std::map< string_type, NodeId > | map_type |
typedef map_type::iterator | map_iterator |
typedef map_type::const_iterator | const_map_iterator |
Public Types inherited from moot::TrieVectorBase | |
typedef size_t | NodeId |
Public Member Functions | |
Constructors etc. | |
SuffixTrie (size_t max_length=SuffixTrieDefaultMaxLen, bool use_case=true, size_t max_count=10) | |
~SuffixTrie (void) | |
Compilation | |
bool | build (const mootLexfreqs &lf, const mootNgrams &ng, const TagIDTable &tagids, TagID eos_tagid, bool verbose=false) |
bool | _build_insert (const mootLexfreqs &lf) |
bool | _build_assoc (const mootLexfreqs &lf, const TagIDTable &tagids) |
bool | _build_compute_theta (const mootLexfreqs &lf, const mootNgrams &ng, const TagIDTable &tagids, TagID eos_tagid) |
bool | _build_compute_mles (const mootLexfreqs &lf, const mootNgrams &ng, const TagIDTable &tagids, TagID eos_tagid) |
bool | _build_invert_mles (const mootNgrams &ng, const TagIDTable &tagids, TagID eos_tagid) |
Lookup | |
iterator | find_ancestor_nonempty (iterator dtr, size_t *matchlen=__null) |
const_iterator | const_find_ancestor_nonempty (const_iterator dtr, size_t *matchlen=__null) const |
iterator | rfind_longest_nonempty (const mootTokString &tokstr, size_t *matchlen=__null) |
const_iterator | rfind_longest_nonempty (const mootTokString &tokstr, size_t *matchlen=__null) const |
const SuffixTrieDataT & | sufprobs (const mootTokString &tokstr, size_t *matchlen=__null) const |
Debug / Dump | |
void | txtdump (FILE *out, const TagIDTable &tagids) const |
Public Member Functions inherited from moot::TrieVector< SuffixTrieDataT > | |
TrieVector (size_t max_len=NoMaxLen, bool use_case=false) | |
~TrieVector (void) | |
void | clear (void) |
const size_t & | maxlen (void) const |
size_t & | maxlen (void) |
bool | compiled (void) const |
void | ensure_compiled (void) |
const SuffixTrieDataT & | default_data (void) const |
SuffixTrieDataT & | default_data (void) |
string_type | trie_canonicalize (string_type &s) const |
void | trie_key (const string_type &s, const size_t max_len, string_type &dst) const |
string_type | trie_key (const string_type &s, const size_t max_len) const |
string_type | trie_key (const string_type &s) const |
void | trie_rkey (const string_type &s, const size_t max_len, string_type &dst) const |
string_type | trie_rkey (const string_type &s, size_t max_len) const |
string_type | trie_rkey (const string_type &s) const |
void | trie_insert (const string_type &s, size_t max_len) |
void | trie_insert (const string_type &s) |
void | trie_rinsert (const string_type &s, size_t max_len) |
void | trie_rinsert (const string_type &s) |
iterator | find_dtr (const node_type &from, char label) |
const_iterator | find_dtr (const node_type &from, char label) const |
NodeId | find_dtr_id (NodeId fromid, char label) const |
iterator | first_dtr (const node_type &from) |
const_iterator | first_dtr (const node_type &from) const |
iterator | find_mother (const node_type &to) |
const_iterator | find_mother (const node_type &to) const |
NodeId | find_mother_id (NodeId toid) const |
string_type | node_rstring (const node_type &node) const |
string_type | node_rstring (NodeId nodeid) const |
string_type | node_string (const node_type &node) const |
string_type | node_string (NodeId nodeid) const |
size_t | node_depth (const node_type &node) const |
size_t | node_depth (NodeId nodeid) const |
void | compile (void) |
iterator | find_longest (const string_type &s, size_t *matchlen=__null) |
const_iterator | find_longest (const string_type &s, size_t *matchlen=__null) const |
iterator | rfind_longest (const string_type &s, size_t *matchlen=__null) |
const_iterator | rfind_longest (const string_type &s, size_t *matchlen=__null) const |
Public Member Functions inherited from moot::TrieVectorBase | |
TrieVectorBase (size_t maxlen=NoMaxLen, bool use_case=false) | |
Public Attributes | |
CountT | maxcount |
raw frequency upper bound More... | |
ProbT | theta |
standard deviation of unigram MLEs More... | |
Public Attributes inherited from moot::TrieVector< SuffixTrieDataT > | |
map_type | trie_pending |
pending insertions More... | |
data_type | trie_default_data |
default data More... | |
Public Attributes inherited from moot::TrieVectorBase | |
size_t | trie_maxlen |
maximum trie depth More... | |
bool | trie_use_case |
whether to use case More... | |
Static Public Attributes | |
static const size_t | SuffixTrieDefaultMaxLen = 0 |
Static Public Attributes inherited from moot::TrieVectorBase | |
static const NodeId | NoNode = static_cast<size_t>(-1) |
static const size_t | NoMaxLen = static_cast<size_t>(-1) |
typedef mootEnumID moot::SuffixTrie::TagID |
Typedef for a tag-id
Typedef for tag-id lookup table
Underlying trie template type
|
inline |
Default constructor
|
inline |
Destructor
References _build_assoc(), _build_compute_mles(), _build_compute_theta(), _build_insert(), _build_invert_mles(), and build().
bool moot::SuffixTrie::build | ( | const mootLexfreqs & | lf, |
const mootNgrams & | ng, | ||
const TagIDTable & | tagids, | ||
TagID | eos_tagid, | ||
bool | verbose = false |
||
) |
Construct a suffix trie from a mootLexfreqs object and a mootEnum object for tagids
Referenced by moot::mootHMM::build_suffix_trie(), and ~SuffixTrie().
bool moot::SuffixTrie::_build_insert | ( | const mootLexfreqs & | lf | ) |
Low-level compilation utilitiy: enqueue pending suffix arcs
Referenced by ~SuffixTrie().
bool moot::SuffixTrie::_build_assoc | ( | const mootLexfreqs & | lf, |
const TagIDTable & | tagids | ||
) |
Low-level compilation utilitiy: assign count data to all trie nodes
Referenced by ~SuffixTrie().
bool moot::SuffixTrie::_build_compute_theta | ( | const mootLexfreqs & | lf, |
const mootNgrams & | ng, | ||
const TagIDTable & | tagids, | ||
TagID | eos_tagid | ||
) |
Low-level compilation utilitiy: compute smoothing constants
Referenced by ~SuffixTrie().
bool moot::SuffixTrie::_build_compute_mles | ( | const mootLexfreqs & | lf, |
const mootNgrams & | ng, | ||
const TagIDTable & | tagids, | ||
TagID | eos_tagid | ||
) |
Low-level compilation utilitiy: compute MLE probabilities P(tag|suffix)
Referenced by ~SuffixTrie().
bool moot::SuffixTrie::_build_invert_mles | ( | const mootNgrams & | ng, |
const TagIDTable & | tagids, | ||
TagID | eos_tagid | ||
) |
Low-level compilation utilitiy: Bayesian inversion: compute P(suffix|t) = P(t|suffix)*P(suffix)/P(t)
Referenced by ~SuffixTrie().
|
inline |
Get first ancestor (or self) with non-empty data, or end() on failure (read/write)
References moot::TrieVector< SuffixTrieDataT >::find_mother().
Referenced by rfind_longest_nonempty().
|
inline |
Get first real ancestor (or self) with non-empty data, or end() on failure (read-only)
References moot::TrieVector< SuffixTrieDataT >::find_mother().
Referenced by rfind_longest_nonempty().
|
inline |
Get reverse-longest match iterator with actual data, read/write
References find_ancestor_nonempty(), and moot::TrieVector< SuffixTrieDataT >::rfind_longest().
Referenced by sufprobs().
|
inline |
Get reverse-longest match iterator with actual data, read-only
References const_find_ancestor_nonempty(), and moot::TrieVector< SuffixTrieDataT >::rfind_longest().
|
inline |
Get (log-) probability table tagid=>log(P(tokstr|tagid)) based on longest matched suffix
References moot::TrieVector< SuffixTrieDataT >::default_data(), rfind_longest_nonempty(), and txtdump().
void moot::SuffixTrie::txtdump | ( | FILE * | out, |
const TagIDTable & | tagids | ||
) | const |
Referenced by sufprobs().
|
static |
CountT moot::SuffixTrie::maxcount |
Referenced by mootBinIO::Item< SuffixTrie >::load(), and mootBinIO::Item< SuffixTrie >::save().
ProbT moot::SuffixTrie::theta |
Referenced by mootBinIO::Item< SuffixTrie >::load(), and mootBinIO::Item< SuffixTrie >::save().