mootMIParser.h
Go to the documentation of this file.
1 /* -*- Mode: C++ -*- */
2 /*
3  libmoot : moocow's part-of-speech tagging library
4  Copyright (C) 2009 by Bryan Jurish <moocow@cpan.org>
5 
6  This library is free software; you can redistribute it and/or
7  modify it under the terms of the GNU Lesser General Public
8  License as published by the Free Software Foundation; either
9  version 3 of the License, or (at your option) any later version.
10 
11  This library is distributed in the hope that it will be useful,
12  but WITHOUT ANY WARRANTY; without even the implied warranty of
13  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  Lesser General Public License for more details.
15 
16  You should have received a copy of the GNU Lesser General Public
17  License along with this library; if not, write to the Free Software
18  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 */
20 
21 /*--------------------------------------------------------------------------
22  * File: mootMIParser.h
23  * Author: Bryan Jurish <moocow@cpan.org>
24  * Description:
25  * + moot PoS tagger: MI "parser"
26  *--------------------------------------------------------------------------*/
27 
33 #ifndef _MOOT_MIPARSER_H
34 #define _MOOT_MIPARSER_H
35 
36 #include <mootDynHMM.h>
37 #include <assert.h>
38 
39 moot_BEGIN_NAMESPACE
40 
41 
42 /*======================================================================
43  *Generic Utilities
44  */
45 
46 
47 /*======================================================================
48  * mootMIParser : MI "parser" class
49  */
50 
56 class mootMIParser: public mootDynHMM {
57 public:
58  /*---------------------------------------------------------------------*/
59  /* Types */
61  typedef size_t SentIndex;
62  typedef std::pair< ProbT, SentIndex > ProbIndexPair;
63  typedef std::vector< ProbIndexPair > ProbIndexPairVector;
64  typedef std::vector< mootToken* > TokenPtrVector;
65 
66 public:
67  /*---------------------------------------------------------------------*/
68  /* Data */
70  TagID newtag_id;
72  bool text_tags;
76 public:
77  /*---------------------------------------------------------------------*/
82  : newtag_str("@NEW"),
83  newtag_id(0),
84  newtag_f(0.5),
85  text_tags(false),
86  output_tag("$MIParser"),
87  output_prefix("[$MIParser] ")
88  {};
89 
91  virtual ~mootMIParser(void)
92  {};
93 
95  virtual void set_options(const mootDynHMMOptions &opts)
96  {
97  newtag_str = opts.newtag_str;
98  text_tags = opts.text_tags;
99  };
101 
102  //------------------------------------------------------------
103  // Utilities
105 
106 
107  //---------------------------------------------------------------------
109 
110 
111  virtual bool load_model(const string &modelname,
112  const mootTagString &start_tag_str="__$",
113  const char *myname="mootDynLexHMM::load_model()",
114  bool do_estimate_nglambdas=true,
115  bool do_estimate_wlambdas=true,
116  bool do_estimate_clambdas=true,
117  bool do_build_suffix_trie=true,
118  bool do_compute_logprobs=true);
119 
121  virtual bool compile(mootLexfreqs &lexfreqs,
122  mootNgrams &ngrams,
123  mootClassfreqs &classfreqs,
124  const mootTagString &start_tag_str="__$");
126 
127  //------------------------------------------------------------
130 
131 
132  /*---------------------------------------------------------------------*/
134 
135 
137  virtual void tw_put_info(moot::TokenWriter *tw)
138  {
139  //mootDynHMM::tw_put_info(tw);
140  tw->printf_raw(" +DynHMM class : %s\n", "mootMIParser");
141  tw->printf_raw(" New Tag : %s\n", newtag_str.c_str());
142  tw->printf_raw(" N-gram source : %s\n", text_tags ? "text" : "tags");
143  tw->printf_raw(" Output Tag : %s\n", output_tag.c_str());
144  tw->printf_raw(" Output Prefix : \"%s\"\n", output_prefix.c_str());
145  };
147 
148  //------------------------------------------------------------
149  // Tagging: Top-level
152 
157  virtual void tag_sentence(mootSentence &sentence);
159 };
160 
161 
162 moot_END_NAMESPACE
163 
164 #endif /* _MOOT_DYNHMM_H */
mootTagString output_prefix
Definition: mootMIParser.h:74
size_t SentIndex
Definition: mootMIParser.h:61
std::pair< ProbT, SentIndex > ProbIndexPair
Definition: mootMIParser.h:62
mootTagString newtag_str
Definition: mootMIParser.h:69
bool text_tags
Definition: mootDynHMM.h:68
TagID newtag_id
Definition: mootMIParser.h:70
mootMIParser(void)
Definition: mootMIParser.h:81
mootTagString output_tag
Definition: mootMIParser.h:73
std::vector< mootToken *> TokenPtrVector
Definition: mootMIParser.h:64
abstract HMM subclass for use with dynamic lexical probabilities.
Definition: mootDynHMM.h:97
virtual void printf_raw(const char *fmt,...)
Class for storage and retrieval of raw lexical-class frequencies.
Definition: mootClassfreqs.h:44
Class for storage & retrieval of raw N-Gram frequencies.
Definition: mootNgrams.h:44
Generic user-level options structure for built-in mootDynHMM subclasses.
Definition: mootDynHMM.h:56
Class for storage and retrieval of raw lexical frequencies.
Definition: mootLexfreqs.h:44
HMM subclass for MI parsing.
Definition: mootMIParser.h:56
virtual void tw_put_info(moot::TokenWriter *tw)
Definition: mootMIParser.h:137
virtual void set_options(const mootDynHMMOptions &opts)
Definition: mootMIParser.h:95
float ProbT
Definition: mootTypes.h:63
ProbT newtag_f
Definition: mootMIParser.h:71
virtual ~mootMIParser(void)
Definition: mootMIParser.h:91
list< mootToken > mootSentence
Definition: mootToken.h:630
Abstract class for token output.
Definition: mootTokenIO.h:700
string mootTagString
Definition: mootToken.h:59
std::vector< ProbIndexPair > ProbIndexPairVector
Definition: mootMIParser.h:63
bool text_tags
Definition: mootMIParser.h:72
Hidden Markov Model tagger/disambiguator for dynamic lexical probabilities.
std::string newtag_str
Definition: mootDynHMM.h:60
mootTagString TagStr
Definition: mootMIParser.h:60