00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029 #ifndef _MOOT_HMM_H
00030 #define _MOOT_HMM_H
00031
00032 #ifdef __GNUC__
00033 # include <float.h>
00034 #endif // __GNUC__
00035
00036 #include <string.h>
00037 #include <ctype.h>
00038
00039 #include <mootTypes.h>
00040 #include <mootIO.h>
00041 #include <mootZIO.h>
00042 #include <mootToken.h>
00043 #include <mootTokenIO.h>
00044 #include <mootLexfreqs.h>
00045 #include <mootClassfreqs.h>
00046 #include <mootNgrams.h>
00047 #include <mootEnum.h>
00048
00055 #define mootProbEpsilon 1.19209290E-06F
00056
00057
00058
00059
00060
00061
00062
00063
00064
00065
00074 #define MOOT_PROB_NEG -3E+38
00075 #define MOOT_PROB_ZERO -1E+38
00076 #define MOOT_PROB_ONE 0.0
00077
00078
00079
00080
00081
00082
00083
00084
00085
00086
00087
00088
00095 #define MOOT_LEX_UNKNOWN_TOKENS
00096
00097
00123 #define MOOT_LEX_UNKNOWN_CLASSES
00124
00125
00131
00132 #undef MOOT_VITERBI_DEBUG
00133
00134
00135 moot_BEGIN_NAMESPACE
00136
00137
00138
00139
00140
00147 class mootHMM {
00148 public:
00149
00152
00154 typedef enum {
00155 vlSilent,
00156 vlErrors,
00157 vlWarnings,
00158 vlProgress,
00159 vlEverything
00160 } VerbosityLevel;
00161
00162
00164 typedef mootEnumID TagID;
00165
00167 typedef mootEnumID TokID;
00168
00173 typedef mootEnumID ClassID;
00175
00176
00177
00178
00180
00181
00186 typedef set<TagID> LexClass;
00187
00189 struct LexClassHash {
00190 public:
00191 inline size_t operator()(const LexClass &x) const {
00192 size_t hv = 0;
00193 for (LexClass::const_iterator xi = x.begin(); xi != x.end(); xi++) {
00194 hv = 5*hv + *xi;
00195 }
00196 return hv;
00197 };
00198 };
00200 struct LexClassEqual {
00201 public:
00202 inline size_t operator()(const LexClass &x, const LexClass &y) const {
00203 return x==y;
00204 };
00205 };
00207
00208
00211
00213 typedef mootEnum<mootTagString,
00214 hash<mootTagString>,
00215 equal_to<mootTagString> >
00216 TagIDTable;
00217
00219 typedef mootEnum<mootTokString,
00220 hash<mootTokString>,
00221 equal_to<mootTokString> >
00222 TokIDTable;
00223
00225 typedef mootEnum<LexClass,
00226 LexClassHash,
00227 LexClassEqual>
00228 ClassIDTable;
00229
00231 typedef map<TagID,ProbT> LexProbSubTable;
00232
00237 typedef LexProbSubTable LexClassProbSubTable;
00238
00242 typedef vector<LexProbSubTable> LexProbTable;
00243
00258 typedef LexProbTable LexClassProbTable;
00259
00271 typedef ProbT *BigramProbTable;
00272
00273 #if defined(MOOT_USE_TRIGRAMS)
00274 # if defined(MOOT_HASH_TRIGRAMS)
00275
00276 class Trigram {
00277 public:
00278
00280 struct HashFcn {
00281 public:
00282 inline size_t operator()(const Trigram &x) const
00283 {
00284 return
00285 (0xdeece66d * ((0xdeece66d * x.tag1) + x.tag2)) + x.tag3;
00286 };
00287 };
00288
00290 struct EqualFcn {
00291 public:
00292 inline size_t operator()(const Trigram &x, const Trigram &y) const
00293 {
00294 return
00295 x.tag1==y.tag1 && x.tag2==y.tag2 && x.tag3==y.tag3;
00296
00297 };
00298 };
00299
00300 public:
00301 TagID tag1;
00302 TagID tag2;
00303 TagID tag3;
00304
00305 public:
00307 Trigram(TagID t1=0, TagID t2=0, TagID t3=0)
00308 : tag1(t1), tag2(t2), tag3(t3)
00309 {};
00310
00312 ~Trigram(void) {};
00313 };
00314
00317 typedef
00318 hash_map<Trigram,ProbT,
00319 Trigram::HashFcn,
00320 Trigram::EqualFcn>
00321 TrigramProbTable;
00322
00323 # else
00324
00325
00343 typedef ProbT* TrigramProbTable;
00344 # endif // MOOT_HASH_TRIGRAMS
00345
00346 #endif // MOOT_USE_TRIGRAMS
00347
00348
00349
00352
00360 class ViterbiNode {
00361 public:
00362 TagID tagid;
00363 #ifdef MOOT_USE_TRIGRAMS
00364 TagID ptagid;
00365 #endif
00366 ProbT lprob;
00367
00368 class ViterbiNode *pth_prev;
00369 class ViterbiNode *nod_next;
00370 };
00371
00372 #ifdef MOOT_USE_TRIGRAMS
00373
00378 class ViterbiRow {
00379 public:
00380 TagID tagid;
00381 class ViterbiNode *nodes;
00382 class ViterbiRow *row_next;
00383 };
00384 #else
00385 typedef ViterbiNode ViterbiRow;
00386 #endif
00387
00388
00395 class ViterbiColumn {
00396 public:
00397 ViterbiRow *rows;
00398 ViterbiColumn *col_prev;
00399 };
00400
00417 struct ViterbiPathNode {
00418 public:
00419 ViterbiNode *node;
00420 ViterbiPathNode *path_next;
00421 };
00423
00424
00425 public:
00426
00433 int verbose;
00434
00439 size_t ndots;
00441
00442
00445
00453 bool use_lex_classes;
00454
00461 TagID start_tagid;
00462
00471 ProbT unknown_lex_threshhold;
00472
00481 ProbT unknown_class_threshhold;
00482
00488 LexClass uclass;
00490
00491
00494 ProbT nglambda1;
00495 ProbT nglambda2;
00496 #ifdef MOOT_USE_TRIGRAMS
00497 ProbT nglambda3;
00498 #endif
00499 ProbT wlambda0;
00500 ProbT wlambda1;
00502 ProbT clambda0;
00503 ProbT clambda1;
00510 ProbT beamwd;
00512
00513
00516 TokIDTable tokids;
00517 TagIDTable tagids;
00518 ClassIDTable classids;
00520
00521 TokID flavids[NTokFlavors];
00523
00524
00527 size_t n_tags;
00528 size_t n_toks;
00529 size_t n_classes;
00531 LexProbTable lexprobs;
00532 LexClassProbTable lcprobs;
00533 #ifdef MOOT_USE_TRIGRAMS
00534 TrigramProbTable ngprobs3;
00535 #else
00536 BigramProbTable ngprobs2;
00537 #endif
00538
00539
00540
00543 ViterbiColumn *vtable;
00545
00546
00549 size_t nsents;
00550 size_t ntokens;
00551 size_t nnewtokens;
00552 size_t nunclassed;
00553 size_t nnewclasses;
00554 size_t nunknown;
00555 size_t nfallbacks;
00557
00558 protected:
00559
00562 ViterbiNode *trash_nodes;
00563 #ifdef MOOT_USE_TRIGRAMS
00564 ViterbiRow *trash_rows;
00565 #endif
00566 ViterbiColumn *trash_columns;
00567 ViterbiPathNode *trash_pathnodes;
00569
00570
00573 TagID vtagid;
00574 ProbT vbestpr;
00575 ProbT vtagpr;
00576 ProbT vwordpr;
00577 ViterbiNode *vbestpn;
00579 ViterbiPathNode *vbestpath;
00581 ProbT bbestpr;
00582 ProbT bpprmin;
00584
00585 public:
00586
00590 mootHMM(void);
00591
00593 ~mootHMM(void) { clear(true,false); };
00595
00596
00604 void clear(bool wipe_everything=true, bool unlogify=false);
00606
00607
00611 bool save(const char *filename, int compression_level=MOOT_DEFAULT_COMPRESSION);
00612
00614 bool save(mootio::mostream *obs, const char *filename=NULL);
00615
00617 bool _bindump(mootio::mostream *obs, const char *filename=NULL);
00618
00620 bool load(const char *filename=NULL);
00621
00623 bool load(mootio::mistream *ibs, const char *filename=NULL);
00624
00626 bool _binload(mootio::mistream *ibs, const char *filename=NULL);
00628
00629
00633 inline void unknown_token_name(const mootTokString &name)
00634 {
00635 tokids.unknown_name(name);
00636 };
00637
00639 inline void unknown_tag_name(const mootTokString &name)
00640 {
00641 tagids.unknown_name(name);
00642 };
00643
00644
00645
00646
00647
00648 inline void unknown_class_name(const mootTagSet &tagset)
00649 {
00650 tagset2lexclass(tagset,&uclass,false);
00651 };
00653
00654
00655
00670 bool load_model(const string &modelname,
00671 const mootTagString &start_tag_str="__$",
00672 const char *myname="mootHMM::load_model()",
00673 bool do_estimate_nglambdas=true,
00674 bool do_estimate_wlambdas=true,
00675 bool do_estimate_clambdas=true,
00676 bool do_compute_logprobs=true);
00677
00683 bool compile(const mootLexfreqs &lexfreqs,
00684 const mootNgrams &ngrams,
00685 const mootClassfreqs &classfreqs,
00686 const mootTagString &start_tag_str="__$");
00687
00689 void assign_ids_lf(const mootLexfreqs &lexfreqs);
00690
00692 void assign_ids_ng(const mootNgrams &ngrams);
00693
00695 void assign_ids_cf(const mootClassfreqs &classfreqs);
00696
00698 void compile_unknown_lexclass(const mootClassfreqs &classfreqs);
00699
00701 bool estimate_lambdas(const mootNgrams &ngrams);
00702
00704 bool estimate_wlambdas(const mootLexfreqs &lf);
00705
00707 bool estimate_clambdas(const mootClassfreqs &cf);
00708
00710 bool compute_logprobs(void);
00712
00713
00714
00717
00719 void tag_io(TokenReader *reader, TokenWriter *writer)
00720 {
00721 int rtok;
00722 mootSentence *sent;
00723 while (reader && (rtok = reader->get_sentence()) != TokTypeEOF) {
00724 sent = reader->sentence();
00725 if (!sent) continue;
00726 tag_sentence(*sent);
00727 #ifdef MOOT_VITERBI_DEBUG
00728 viterbi_txtdump(stderr);
00729 #endif
00730
00731 if (writer) writer->put_sentence(*sent);
00732 }
00733 };
00734
00736
00737
00743 inline void tag_sentence(mootSentence &sentence) {
00744 viterbi_clear();
00745 for (mootSentence::const_iterator si = sentence.begin();
00746 si != sentence.end();
00747 si++)
00748 {
00749 viterbi_step(*si);
00750 if (ndots && (ntokens % ndots)==0) fputc('.', stderr);
00751 }
00752 viterbi_finish();
00753 tag_mark_best(sentence);
00754 nsents++;
00755 };
00757
00758
00759
00760
00763
00764
00765
00767 void viterbi_clear(void);
00768
00769
00770
00775 inline void viterbi_step(const mootToken &token) {
00776 if (token.toktype() != TokTypeVanilla) return;
00777 ntokens++;
00778 LexClass tok_class;
00779 for (mootToken::Analyses::const_iterator ani = token.analyses().begin();
00780 ani != token.analyses().end();
00781 ani++)
00782 {
00783 tok_class.insert(tagids.name2id(ani->tag));
00784 }
00785 viterbi_step(token2id(token.text()), tok_class);
00786 };
00787
00788
00789
00795 inline void viterbi_step(TokID tokid, const LexClass &lexclass)
00796 {
00797 if (use_lex_classes) {
00798 if (lexclass.empty()) {
00799 nunclassed++;
00800 viterbi_step(tokid, 0, uclass);
00801 } else {
00802
00803 ClassID classid = class2id(lexclass,0,1);
00804 viterbi_step(tokid,classid,lexclass);
00805 }
00806 } else {
00807
00808 if (lexclass.empty()) {
00809 nunclassed++;
00810 viterbi_step(tokid);
00811 } else {
00812 viterbi_step(tokid,0,lexclass);
00813 }
00814 }
00815 };
00816
00817
00818
00823 void viterbi_step(TokID tokid, ClassID classid, const LexClass &lclass);
00824
00825
00826
00833 void viterbi_step(TokID tokid);
00834
00835
00836
00843 inline void viterbi_step(const mootTokString &token_text) {
00844 return viterbi_step(token2id(token_text));
00845 };
00846
00847
00848
00854 inline void viterbi_step(const mootTokString &token_text, const set<mootTagString> &tags)
00855 {
00856 LexClass lclass;
00857 tagset2lexclass(tags,&lclass);
00858 viterbi_step(token2id(token_text), lclass);
00859 };
00860
00861
00862
00866 void viterbi_step(TokID tokid, TagID tagid, ViterbiColumn *col=NULL);
00867
00868
00869
00875 inline void viterbi_step(const mootTokString &token, const mootTagString &tag)
00876 {
00877 return viterbi_step(token2id(token), tagids.name2id(tag));
00878 };
00879
00880
00881
00882
00886 inline void viterbi_finish(const TagID final_tagid)
00887 {
00888 viterbi_step(0, final_tagid);
00889 };
00890
00894 inline void viterbi_finish(void)
00895 {
00896 viterbi_step(0, start_tagid);
00897 };
00898
00909 void tag_mark_best(mootSentence &sentence);
00911
00912
00913
00914
00917
00919 inline ViterbiPathNode *viterbi_best_path(void)
00920 {
00921 return viterbi_node_path(viterbi_best_node());
00922 };
00923
00925 inline ViterbiPathNode *viterbi_best_path(TagID tagid)
00926 {
00927 return viterbi_node_path(viterbi_best_node(tagid));
00928 };
00929
00931 inline ViterbiPathNode *viterbi_best_path(const mootTagString &tagstr)
00932 {
00933 return viterbi_best_path(tagids.name2id(tagstr));
00934 };
00935
00942 inline ViterbiNode *viterbi_best_node(void)
00943 {
00944 ViterbiNode *pnod;
00945 vbestpr = MOOT_PROB_NEG;
00946 vbestpn = NULL;
00947
00948 #ifdef MOOT_USE_TRIGRAMS
00949 ViterbiRow *prow;
00950 for (prow = vtable->rows; prow != NULL; prow = prow->row_next) {
00951 for (pnod = prow->nodes; pnod != NULL; pnod = pnod->nod_next) {
00952 if (pnod->lprob > vbestpr) {
00953 vbestpr = pnod->lprob;
00954 vbestpn = pnod;
00955 }
00956 }
00957 }
00958 #else // !MOOT_USE_TRIGRAMS
00959 for (pnod = vtable->rows; pnod != NULL; pnod = pnod->nod_next) {
00960 if (pnod->lprob > vbestpr) {
00961 vbestpr = pnod->lprob;
00962 vbestpn = pnod;
00963 }
00964 }
00965 #endif // MOOT_USE_TRIGRAMS
00966 return vbestpn;
00967 };
00968
00975 inline ViterbiNode *viterbi_best_node(TagID tagid)
00976 {
00977 ViterbiNode *pnod;
00978 vbestpr = MOOT_PROB_NEG;
00979 #ifdef MOOT_USE_TRIGRAMS
00980 ViterbiRow *prow;
00981 vbestpn = NULL;
00982 for (prow = vtable->rows; prow != NULL; prow = prow->row_next) {
00983 if (prow->tagid == tagid) {
00984 for (pnod = prow->nodes; pnod != NULL; pnod = pnod->nod_next) {
00985 if (pnod->lprob > vbestpr) {
00986 vbestpr = pnod->lprob;
00987 vbestpn = pnod;
00988 }
00989 }
00990 return vbestpn;
00991 }
00992 }
00993 #else // !MOOT_USE_TRIGRAMS
00994 for (pnod = vtable->rows; pnod != NULL; pnod = pnod->nod_next) {
00995 if (pnod->tagid == tagid) return pnod;
00996 }
00997 #endif // MOOT_USE_TRIGRAMS
00998 return NULL;
00999 };
01000
01001
01002
01010 inline ViterbiPathNode *viterbi_node_path(ViterbiNode *node)
01011 {
01012 viterbi_clear_bestpath();
01013 ViterbiPathNode *pnod;
01014 for ( ; node != NULL; node = node->pth_prev) {
01015 pnod = viterbi_get_pathnode();
01016 pnod->node = node;
01017 pnod->path_next = vbestpath;
01018 vbestpath = pnod;
01019 }
01020 return vbestpath;
01021 };
01023
01024
01025
01026
01028
01029
01031 inline bool viterbi_column_ok(const ViterbiColumn *col) const {
01032 return (col
01033 && col->rows
01034 #ifdef MOOT_USE_TRIGRAMS
01035 && col->rows->nodes
01036 #endif
01037 );
01038 };
01039
01049 inline ViterbiColumn *viterbi_populate_row(TagID curtagid,
01050 ProbT wordpr=MOOT_PROB_ONE,
01051 ViterbiColumn *col=NULL)
01052 {
01053 #ifdef MOOT_USE_TRIGRAMS
01054 ViterbiRow *prow, *row = viterbi_get_row();
01055 ViterbiNode *pnod, *nod = NULL;
01056
01057 if (!col) {
01058 col = viterbi_get_column();
01059 col->rows = NULL;
01060 }
01061 col->col_prev = vtable;
01062 row->nodes = NULL;
01063
01064 for (prow = vtable->rows; prow != NULL; prow = prow->row_next) {
01065 vbestpr = MOOT_PROB_NEG;
01066 vbestpn = NULL;
01067
01068 for (pnod = prow->nodes; pnod != NULL; pnod = pnod->nod_next) {
01069
01070 if (beamwd && pnod->lprob < bpprmin) continue;
01071
01072
01073 vtagpr = pnod->lprob + tagp(pnod->ptagid, prow->tagid, curtagid);
01074 if (vtagpr > vbestpr) {
01075 vbestpr = vtagpr;
01076 vbestpn = pnod;
01077 }
01078 }
01079
01080
01081 if (vbestpn != NULL) {
01082 nod = viterbi_get_node();
01083 nod->tagid = curtagid;
01084 nod->ptagid = prow->tagid;
01085 nod->lprob = vbestpr + wordpr;
01086
01087 nod->pth_prev = vbestpn;
01088 nod->nod_next = row->nodes;
01089
01090 row->nodes = nod;
01091
01092
01093 if (nod->lprob > bbestpr) bbestpr = nod->lprob;
01094 }
01095 }
01096
01097
01098 row->tagid = curtagid;
01099 row->row_next = col->rows;
01100 col->rows = row;
01101
01102 #else
01103
01104 ViterbiNode *pnod, *nod = NULL;
01105
01106 if (!col) {
01107 col = viterbi_get_column();
01108 col->rows = NULL;
01109 }
01110 col->col_prev = vtable;
01111
01112 vbestpr = MOOT_PROB_NEG;
01113 vbestpn = NULL;
01114
01115 for (pnod = vtable->rows; pnod != NULL; pnod = pnod->nod_next) {
01116
01117 if (beamwd && pnod->lprob < bpprmin) continue;
01118
01119
01120 vtagpr = pnod->lprob + tagp(pnod->tagid, curtagid);
01121 if (vtagpr > vbestpr) {
01122 vbestpr = vtagpr;
01123 vbestpn = pnod;
01124 }
01125 }
01126
01127
01128 nod = viterbi_get_node();
01129 nod->tagid = curtagid;
01130 nod->lprob = vbestpr + wordpr;
01131 nod->pth_prev = vbestpn;
01132 nod->nod_next = col->rows;
01133
01134
01135 nod->nod_next = col->rows;
01136 col->rows = nod;
01137
01138
01139 if (nod->lprob > bbestpr) bbestpr = nod->lprob;
01140
01141 #endif // MOOT_USE_TRIGRAMS
01142
01143 return col;
01144 };
01145
01146
01147
01148
01150 inline void viterbi_clear_bestpath(void)
01151 {
01152
01153 ViterbiPathNode *pnod, *pnod_next;
01154 for (pnod = vbestpath; pnod != NULL; pnod = pnod_next) {
01155 pnod_next = pnod->path_next;
01156 pnod->path_next = trash_pathnodes;
01157 trash_pathnodes = pnod;
01158 }
01159 vbestpath = NULL;
01160 };
01161
01162
01163
01169 void _viterbi_step_fallback(TokID tokid, ViterbiColumn *col);
01171
01172
01173
01178 inline ViterbiNode *viterbi_get_node(void) {
01179 ViterbiNode *nod;
01180 if (trash_nodes != NULL) {
01181 nod = trash_nodes;
01182 trash_nodes = nod->nod_next;
01183 } else {
01184 nod = new ViterbiNode();
01185 }
01186 return nod;
01187 };
01188
01189
01190
01192 inline ViterbiRow *viterbi_get_row(void) {
01193 #ifdef MOOT_USE_TRIGRAMS
01194 ViterbiRow *row;
01195 if (trash_rows != NULL) {
01196 row = trash_rows;
01197 trash_rows = row->row_next;
01198 } else {
01199 row = new ViterbiRow();
01200 }
01201 return row;
01202 #else
01203 return viterbi_get_node();
01204 #endif //MOOT_USE_TRIGRAMS
01205 };
01206
01207
01208
01210 inline ViterbiColumn *viterbi_get_column(void) {
01211 ViterbiColumn *col;
01212 if (trash_columns != NULL) {
01213 col = trash_columns;
01214 trash_columns = col->col_prev;
01215 } else {
01216 col = new ViterbiColumn();
01217 }
01218 return col;
01219 };
01220
01221
01222
01224 inline ViterbiPathNode *viterbi_get_pathnode(void) {
01225 ViterbiPathNode *pnod;
01226 if (trash_pathnodes != NULL) {
01227 pnod = trash_pathnodes;
01228 trash_pathnodes = pnod->path_next;
01229 } else {
01230 pnod = new ViterbiPathNode();
01231 }
01232 return pnod;
01233 };
01235
01236
01237
01238
01239
01243 inline TokID token2id(const mootTokString &token) const
01244 {
01245 mootTokenFlavor flav = tokenFlavor(token);
01246 return flavids[flav]==0 ? tokids.name2id(token) : flavids[flav];
01247 };
01248
01259 inline LexClass *tagset2lexclass(const mootTagSet &tagset,
01260 LexClass *lclass=NULL,
01261 bool add_tagids=false)
01262 {
01263 if (!lclass) lclass = new LexClass();
01264
01265 for (mootTagSet::const_iterator tsi = tagset.begin();
01266 tsi != tagset.end();
01267 tsi++)
01268 {
01269
01270 TagID tagid = tagids.name2id(*tsi);
01271 if (add_tagids && tagid==0) tagid = tagids.insert(*tsi);
01272
01273
01274 lclass->insert(tagid);
01275 }
01276 return lclass;
01277 };
01278
01279
01285 inline ClassID class2id(const LexClass &lclass,
01286 bool autopopulate=true,
01287 bool autocreate=true)
01288 {
01289 ClassID cid = classids.name2id(lclass);
01290 if (cid == 0) {
01291 nnewclasses++;
01292 if (!autopopulate && !autocreate) return cid;
01293
01294
01295 cid = classids.insert(lclass);
01296 if (cid >= lcprobs.size()) {
01297 n_classes = cid+1;
01298
01299
01300
01301 lcprobs.resize(n_classes);
01302 }
01303 if (autopopulate) {
01304 LexClassProbSubTable &lcps = lcprobs[cid];
01305 if (!lclass.empty()) {
01306
01307 ProbT lcprob = log(1.0/((ProbT)lclass.size()));
01308
01309 for (LexClass::const_iterator lci = lclass.begin(); lci != lclass.end(); lci++) {
01310 lcps[*lci] = lcprob;
01311 }
01312 } else {
01313
01314 const LexProbSubTable &lps = lexprobs[0];
01315 ProbT lpprob = log(1.0/((ProbT)lps.size()));
01316
01317 for (LexProbSubTable::const_iterator lpsi = lps.begin(); lpsi != lps.end(); lpsi++) {
01318 lcps[lpsi->first] = lpprob;
01319 }
01320 }
01321 }
01322 }
01323 return cid;
01324 };
01326
01327
01328
01331
01332
01333
01334
01339 inline const ProbT wordp(const TokID tokid, const TagID tagid) const
01340 {
01341 if (tokid >= lexprobs.size()) return MOOT_PROB_ZERO;
01342 const LexProbSubTable &lps = lexprobs[tokid];
01343 LexProbSubTable::const_iterator lpsi = lps.find(tagid);
01344 return lpsi != lps.end() ? lpsi->second : MOOT_PROB_ZERO;
01345 };
01346
01353 inline const ProbT wordp(const mootTokString token, const mootTagString tag) const
01354 {
01355 return wordp(token2id(token), tagids.name2id(tag));
01356 };
01357
01358
01359
01360
01364 inline const ProbT classp(const ClassID classid, const TagID tagid) const
01365 {
01366 if (classid >= lcprobs.size()) return MOOT_PROB_ZERO;
01367 const LexClassProbSubTable &lps = lcprobs[classid];
01368 LexClassProbSubTable::const_iterator lpsi = lps.find(tagid);
01369 return lpsi != lps.end() ? lpsi->second : MOOT_PROB_ZERO;
01370 };
01371
01378 inline const ProbT classp(const LexClass &lclass, const mootTagString tag) const
01379 {
01380 return classp(classids.name2id(lclass), tagids.name2id(tag));
01381 };
01382
01383
01384
01385
01389 inline const ProbT tagp(const TagID tagid) const
01390 {
01391 return
01392 #ifdef MOOT_USE_TRIGRAMS
01393 tagp(0,0,tagid);
01394 #else
01395 ngprobs2 && tagid < n_tags
01396 ? ngprobs2[tagid]
01397 : MOOT_PROB_ZERO;
01398 #endif // MOOT_USE_TRIGRAMS
01399 };
01400
01406 inline const ProbT tagp(const mootTagString &tag) const
01407 {
01408 return tagp(tagids.name2id(tag));
01409 };
01410
01411
01412
01413
01418 inline const ProbT tagp(const TagID prevtagid, const TagID tagid) const
01419 {
01420 return
01421 #ifdef MOOT_USE_TRIGRAMS
01422 tagp(0,prevtagid,tagid);
01423 #else
01424 ngprobs2 && prevtagid < n_tags && tagid < n_tags
01425 ? ngprobs2[(n_tags*prevtagid)+tagid]
01426 : MOOT_PROB_ZERO;
01427 #endif
01428 };
01429
01435 inline const ProbT tagp(const mootTagString &prevtag, const mootTagString &tag) const
01436 {
01437 return tagp(tagids.name2id(prevtag), tagids.name2id(tag));
01438 };
01439
01440
01441
01442
01443 #ifdef MOOT_USE_TRIGRAMS
01444
01450 #ifdef MOOT_HASH_TRIGRAMS
01451 inline const ProbT tagp(const Trigram &trigram, ProbT ProbZero=MOOT_PROB_ZERO) const
01452 {
01453 TrigramProbTable::const_iterator tgti = ngprobs3.find(trigram);
01454 return tgti != ngprobs3.end() ? tgti->second : ProbZero;
01455 };
01456 #endif //MOOT_HASH_TRIGRAMS
01457
01464 inline const ProbT tagp(const TagID prevtagid2, const TagID prevtagid1, const TagID tagid) const
01465 {
01466 return
01467 #ifdef MOOT_HASH_TRIGRAMS
01468 tagp(Trigram(prevtagid2,prevtagid1,tagid))
01469 #else
01470 ngprobs3 && prevtagid2 < n_tags && prevtagid1 < n_tags && tagid < n_tags
01471 ? ngprobs3[(n_tags*((n_tags*prevtagid2)+prevtagid1))+tagid]
01472 : MOOT_PROB_ZERO;
01473 #endif
01474 ;
01475 };
01476
01484 inline const ProbT tagp(const mootTagString &prevtag2,
01485 const mootTagString &prevtag1,
01486 const mootTagString &tag)
01487 const
01488 {
01489 return tagp(tagids.name2id(prevtag2), tagids.name2id(prevtag1), tagids.name2id(tag));
01490 };
01491 #endif // MOOT_USE_TRIGRAMS
01492
01493
01494
01495
01496
01497
01501 void carp(char *fmt, ...);
01503
01504
01505
01506
01509
01511 void txtdump(FILE *file);
01512
01514 void viterbi_txtdump(FILE *file);
01516 };
01517
01518 moot_END_NAMESPACE
01519
01520 #endif