mootFlavor.h
Go to the documentation of this file.
1 /* -*- Mode: C++ -*- */
2 
3 /*
4  libmoot : moocow's part-of-speech tagging library
5  Copyright (C) 2012-2014 by Bryan Jurish <moocow@cpan.org>
6 
7  This library is free software; you can redistribute it and/or
8  modify it under the terms of the GNU Lesser General Public
9  License as published by the Free Software Foundation; either
10  version 3 of the License, or (at your option) any later version.
11 
12  This library is distributed in the hope that it will be useful,
13  but WITHOUT ANY WARRANTY; without even the implied warranty of
14  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15  Lesser General Public License for more details.
16 
17  You should have received a copy of the GNU Lesser General Public
18  License along with this library; if not, write to the Free Software
19  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 */
21 
22 /*--------------------------------------------------------------------------
23  * File: mootFlavor.h
24  * Author: Bryan Jurish <moocow@cpan.org>
25  * Description:
26  * + moocow's PoS tagger : token information : "flavors"
27  *--------------------------------------------------------------------------*/
28 
34 #ifndef _moot_FLAVOR_H
35 #define _moot_FLAVOR_H
36 
37 #include <regex.h>
38 #include <list>
39 #include <vector>
40 #include <set>
41 #include <mootTypes.h>
42 #include <mootIO.h>
43 #include <mootCIO.h>
44 
45 namespace moot {
46  using namespace std;
47 
48 //==============================================================================
49 // types
50 typedef string mootFlavorStr;
51 typedef UInt mootFlavorID;
52 
53 //==============================================================================
54 // mootTaster
55 
56 class mootTaster; //-- forward decl
57 
59 extern const mootTaster builtinTaster;
60 
68 class mootTaster {
69 public:
70  //--------------------------------------------------------------------
71  // Embedded Types & Constants
72 
73  //------------------------------------------------------------
74  // mootTaster::Rule
80  class Rule {
81  public:
82  //--------------------------------------------------
83  // mootTaster::Rule: data members
84  mootFlavorStr lab;
85  mootFlavorID id;
86  string re_s;
87  regex_t *re_t;
88 
89  public:
90  //--------------------------------------------------
91  // mootTaster::Rule: constructors etc.
92 
94  Rule(const mootFlavorStr &label="", const std::string &regex="")
95  : lab(label), id(0), re_s(regex), re_t(NULL)
96  { compile(); };
97 
99  Rule(const Rule &r)
100  : lab(r.lab), id(r.id), re_s(r.re_s), re_t(NULL)
101  { compile(); };
104  ~Rule(void)
105  { clear(); };
106 
108  void clear();
109 
111  void compile();
114  inline bool operator==(const Rule &r2) const
115  { return lab==r2.lab && /*id==r2.id &&*/ re_s==r2.re_s; };
118  inline Rule& operator=(const Rule &r2)
119  { lab=r2.lab; id=r2.id; re_s=r2.re_s; compile(); return *this; };
120 
121  public:
122  //--------------------------------------------------
123  // mootTaster::Rule: methods
126  inline bool match(const char *s) const
127  {
128  if (!re_t) return false;
129  return regexec(re_t, s, 0, NULL, 0)==0;
130  };
131 
133  inline bool match(const std::string &s) const
134  { return match(s.c_str()); };
135  };
136  //--/mootTaster::Rule
137 
138 public:
139  //--------------------------------------------------------------------
140  // Data Members
141  typedef vector<Rule> Rules;
142 
143  Rules rules;
144  mootFlavorStr nolabel;
145  mootFlavorID noid;
146  set<mootFlavorStr> labels;
147 
148 public:
149  //--------------------------------------------------------------------
150  // Constructors etc.
151 
153  mootTaster(const mootFlavorStr &default_label="", mootFlavorID default_id=0)
154  : nolabel(default_label), noid(default_id)
155  { set_default_rules(); };
156 
159  {};
160 
162  void clear();
163 
164 public:
165  //--------------------------------------------------------------------
166  // methods: info
167 
169  inline size_t size() const
170  { return rules.size(); };
171 
173  inline bool empty() const
174  { return rules.empty(); };
175 
177  inline bool operator==(const mootTaster &t2) const
178  { return rules==t2.rules && nolabel==t2.nolabel && noid==t2.noid; };
179 
181  inline bool is_builtin(void) const
182  { return operator==(builtinTaster); };
183 
185  inline mootTaster& operator=(const mootTaster &t2)
186  { rules=t2.rules; nolabel=t2.nolabel; noid=t2.noid; labels=t2.labels; return *this; };
187 
188 public:
189  //--------------------------------------------------------------------
190  // methods: rule-set
191 
193  inline void append_rule(const Rule &r)
194  {
195  rules.push_back(r);
196  labels.insert(r.lab);
197  };
198 
200  inline void append_rule(const mootFlavorStr &label, const std::string &regex)
201  { append_rule(Rule(label,regex)); };
202 
204  void set_default_label(const mootFlavorStr &label, bool update_rules=true);
205 
206 public:
207  //--------------------------------------------------------------------
208  // methods: info
209 
211  inline bool has_label(const mootFlavorStr &l) const
212  { return labels.find(l) != labels.end(); };
213 
214 public:
215  //--------------------------------------------------------------------
216  // methods: matching
217 
218  //--------------------------------------
220  Rules::const_iterator find(const char *s) const;
221 
223  inline Rules::const_iterator find(const std::string &s) const
224  { return find(s.c_str()); };
225 
226  //--------------------------------------
228  inline const mootFlavorStr& flavor(const char *s) const
229  {
230  Rules::const_iterator ri = find(s);
231  return (ri == rules.end()) ? nolabel : ri->lab;
232  };
233 
235  inline const mootFlavorStr& flavor(const string &s) const
236  { return flavor(s.c_str()); };
237 
238  //--------------------------------------
240  inline mootFlavorID flavor_id(const char *s) const
241  {
242  Rules::const_iterator ri = find(s);
243  return (ri == rules.end()) ? noid : ri->id;
244  };
245 
247  inline mootFlavorID flavor_id(const string &s) const
248  { return flavor_id(s.c_str()); };
249 
250 public:
251  //--------------------------------------------------------------------
252  // methods: I/O: load
253 
254 
266  bool load(mootio::mistream *mis, const std::string &prefix="");
267 
269  bool load(const char *filename, const std::string &prefix="");
270 
272  bool load(const std::string &filename, const std::string &prefix="")
273  { return load(filename.c_str(), prefix); };
274 
276  void set_default_rules(void);
277 
278  //--------------------------------------------------------------------
279  // methods: I/O: save
280 
284  bool save(mootio::mostream *mos, const std::string &prefix="") const;
285 
287  bool save(const char *filename, const std::string &prefix="") const;
288 
290  bool save(const std::string &filename, const std::string &prefix="") const
291  { return save(filename.c_str(), prefix); };
292 
294  bool save(FILE *f, const std::string &prefix="") const
295  { mootio::mcstream mcs(f); return save(&mcs,prefix); };
296 
297 }; //-- /mootTaster
298 
299 
300 }; /* namespace moot */
301 
302 #endif /* _moot_FLAVOR_H */
Definition: mootAssocVector.h:39
~mootTaster()
Definition: mootFlavor.h:158
size_t size() const
Definition: mootFlavor.h:169
UInt mootFlavorID
Definition: mootFlavor.h:45
High-level heuristic token classifier .
Definition: mootFlavor.h:62
mootio abstraction layer for C FILE*s
void append_rule(const Rule &r)
Definition: mootFlavor.h:193
void append_rule(const mootFlavorStr &label, const std::string &regex)
Definition: mootFlavor.h:200
mootTaster(const mootFlavorStr &default_label="", mootFlavorID default_id=0)
Definition: mootFlavor.h:153
string mootFlavorStr
Definition: mootFlavor.h:44
Rules::const_iterator find(const std::string &s) const
Definition: mootFlavor.h:223
Abstract base class for output stream wrappers.
Definition: mootIO.h:194
bool save(const std::string &filename, const std::string &prefix="") const
Definition: mootFlavor.h:290
mootFlavorID flavor_id(const string &s) const
Definition: mootFlavor.h:247
vector< Rule > Rules
Definition: mootFlavor.h:141
const mootFlavorStr & flavor(const string &s) const
Definition: mootFlavor.h:235
mootFlavorID flavor_id(const char *s) const
Definition: mootFlavor.h:240
bool save(FILE *f, const std::string &prefix="") const
Definition: mootFlavor.h:294
mootFlavorID noid
id to return if no rule matches (default: empty)
Definition: mootFlavor.h:145
bool has_label(const mootFlavorStr &l) const
Definition: mootFlavor.h:211
BinUInt UInt
Definition: mootTypes.h:86
const mootTaster builtinTaster
Rules rules
matching heuristics in order of decreasing priority
Definition: mootFlavor.h:143
const mootFlavorStr & flavor(const char *s) const
Definition: mootFlavor.h:228
type for a single regex-based token classification heuristic
Definition: mootFlavor.h:74
bool empty() const
Definition: mootFlavor.h:173
mootFlavorID id
numeric id (zero by default)
Definition: mootFlavor.h:79
bool operator==(const mootTaster &t2) const
Definition: mootFlavor.h:177
generic I/O abstraction layer
bool is_builtin(void) const
Definition: mootFlavor.h:181
string re_s
POSIX.2 regex to match ("extended" regex string; see regex(7) manpage)
Definition: mootFlavor.h:80
Common typedefs and constants.
Wrapper class for C FILE* streams.
Definition: mootCIO.h:54
mootFlavorStr lab
symbolic label
Definition: mootFlavor.h:78
bool match(const std::string &s) const
Definition: mootFlavor.h:133
set< mootFlavorStr > labels
set of all flavor labels
Definition: mootFlavor.h:146
bool load(const std::string &filename, const std::string &prefix="")
Definition: mootFlavor.h:272
mootFlavorStr nolabel
label to return if no rule matches (default: empty)
Definition: mootFlavor.h:144
Abstract base class for input stream wrappers.
Definition: mootIO.h:129
mootTaster & operator=(const mootTaster &t2)
Definition: mootFlavor.h:185