mootLexfreqs.h
Go to the documentation of this file.
1 /* -*- Mode: C++ -*- */
2 
3 /*
4  libmoot : moocow's part-of-speech tagging library
5  Copyright (C) 2003-2014 by Bryan Jurish <moocow@cpan.org>
6 
7  This library is free software; you can redistribute it and/or
8  modify it under the terms of the GNU Lesser General Public
9  License as published by the Free Software Foundation; either
10  version 3 of the License, or (at your option) any later version.
11 
12  This library is distributed in the hope that it will be useful,
13  but WITHOUT ANY WARRANTY; without even the implied warranty of
14  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15  Lesser General Public License for more details.
16 
17  You should have received a copy of the GNU Lesser General Public
18  License along with this library; if not, write to the Free Software
19  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 */
21 
22 /*============================================================================
23  * File: mootLexfreqs.h
24  * Author: Bryan Jurish <moocow@cpan.org>
25  * Description:
26  * Class for storage & retrieval of lexical frequencies (nested map<> version)
27  *============================================================================*/
28 
34 #ifndef _moot_LEXFREQS_H
35 #define _moot_LEXFREQS_H
36 
37 #include <mootToken.h>
38 #include <mootFlavor.h>
39 
40 moot_BEGIN_NAMESPACE
41 
45 class mootLexfreqs {
46 public:
47  //------ public typedefs
48 
50  typedef CountT LexfreqCount;
51 
55  typedef map<mootTagString,LexfreqCount> LexfreqSubtable;
56 
60  class LexfreqEntry {
61  public:
62  LexfreqCount count;
63  LexfreqSubtable freqs;
64  public:
65  LexfreqEntry(const LexfreqCount tok_count=0)
66  : count(tok_count)
67  {};
68  LexfreqEntry(const LexfreqCount tok_count,
69  const LexfreqSubtable &tok_tagfreqs)
70  : count(tok_count), freqs(tok_tagfreqs)
71  {};
73  void clear(void) {
74  count = 0;
75  freqs.clear();
76  };
77  };
78 
82  typedef hash_map<mootTokString,LexfreqEntry> LexfreqTokTable;
83 
85  typedef hash_map<mootTagString,LexfreqCount> LexfreqTagTable;
86 
87 public:
88  //------ public data
89  LexfreqTokTable lftable;
90  LexfreqTagTable tagtable;
91  LexfreqCount n_tokens;
92  LexfreqCount unknown_threshhold;
93  const mootTaster *taster;
95 public:
96  //------ public methods
98  mootLexfreqs(size_t initial_bucket_count=0)
99  : n_tokens(0), unknown_threshhold(1.0), taster(&builtinTaster)
100  {
101  if (initial_bucket_count != 0)
102  lftable.resize(initial_bucket_count);
103  };
104 
106  ~mootLexfreqs() {
107  clear();
108  }
109 
110  //------ public methods: manipulation
111 
113  void clear(void);
114 
116  void add_count(const mootTokString &text, const mootTagString &tag, const LexfreqCount count);
117 
119  void remove_word(const mootTokString &text);
120 
121  //------ public methods: lookup
122 
124  inline LexfreqCount f_word(const mootTokString &w) const
125  {
126  LexfreqTokTable::const_iterator wi = lftable.find(w);
127  return wi == lftable.end() ? 0 : wi->second.count;
128  };
129 
131  inline LexfreqCount f_tag(const mootTagString &tag) const
132  {
133  LexfreqTagTable::const_iterator ti = tagtable.find(tag);
134  return ti == tagtable.end() ? 0 : ti->second;
135  };
136 
138  inline LexfreqCount f_word_tag(const mootTokString &w, const mootTagString &tag) const
139  {
140  LexfreqTokTable::const_iterator wi = lftable.find(w);
141  if (wi == lftable.end()) return 0;
142  LexfreqSubtable::const_iterator wti = wi->second.freqs.find(tag);
143  return wti == wi->second.freqs.end() ? 0 : wti->second;
144  };
145 
158  void compute_specials(bool compute_unknown=true);
159 
167  void remove_specials(bool remove_unknown=true);
168 
173  void discount_specials(CountT zf_special=1.0);
174 
178  size_t n_pairs(void);
179 
180  //------ public methods: i/o
181 
183  bool load(const char *filename);
184 
186  bool load(FILE *file, const char *filename = NULL);
187 
189  bool save(const char *filename);
190 
192  bool save(FILE *file, const char *filename = NULL);
193 };
194 
195 
196 moot_END_NAMESPACE
197 
198 #endif /* _moot_LEXFREQS_H */
High-level heuristic token classifier .
Definition: mootFlavor.h:62
classes and utilities for tokens and associated analyses
classes and utilities for regex-based token "flavor" heuristics
Definition: mootLexfreqs.h:59
Class for storage and retrieval of raw lexical frequencies.
Definition: mootLexfreqs.h:44
hash_map< mootTagString, LexfreqCount > LexfreqTagTable
Definition: mootLexfreqs.h:84
const mootTaster builtinTaster
string mootTagString
Definition: mootToken.h:59
ProbT CountT
Definition: mootTypes.h:67
hash_map< mootTokString, LexfreqEntry > LexfreqTokTable
Definition: mootLexfreqs.h:81
string mootTokString
Definition: mootToken.h:62