Main Page   Namespace List   Class Hierarchy   Alphabetical List   Compound List   File List   Namespace Members   Compound Members   File Members  

mootLexfreqs.h

Go to the documentation of this file.
00001 /* -*- Mode: C++ -*- */
00002 
00003 /*
00004    libmoot : moocow's part-of-speech tagging library
00005    Copyright (C) 2003-2004 by Bryan Jurish <moocow@ling.uni-potsdam.de>
00006 
00007    This program is free software; you can redistribute it and/or modify
00008    it under the terms of the GNU General Public License as published by
00009    the Free Software Foundation; either version 2 of the License, or
00010    (at your option) any later version.
00011 
00012    This program is distributed in the hope that it will be useful,
00013    but WITHOUT ANY WARRANTY; without even the implied warranty of
00014    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00015    GNU General Public License for more details.
00016 
00017    You should have received a copy of the GNU General Public License
00018    along with this program; if not, write to the Free Software
00019    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
00020 */
00021 
00022 /*============================================================================
00023  * File: mootLexfreqs.h
00024  * Author:  Bryan Jurish <moocow@ling.uni-potsdam.de>
00025  * Description:
00026  *    Class for storage & retrieval of lexical frequencies (nested map<> version)
00027  *============================================================================*/
00028 
00029 #ifndef _moot_LEXFREQS_H
00030 #define _moot_LEXFREQS_H
00031 
00032 #include <mootTypes.h>
00033 moot_BEGIN_NAMESPACE
00034 
00038 class mootLexfreqs {
00039 public:
00040   //------ public typedefs
00041 
00043   typedef CountT LexfreqCount;
00044 
00048   typedef map<mootTagString,LexfreqCount> LexfreqSubtable;
00049 
00053   class LexfreqEntry {
00054   public:
00055     LexfreqCount     count;  
00056     LexfreqSubtable  freqs;  
00057   public:
00058     LexfreqEntry(const LexfreqCount tok_count=0)
00059       : count(tok_count)
00060     {};
00061     LexfreqEntry(const LexfreqCount tok_count,
00062                  const LexfreqSubtable &tok_tagfreqs)
00063       : count(tok_count), freqs(tok_tagfreqs)
00064     {};
00066     void clear(void) {
00067       count = 0;
00068       freqs.clear();
00069     };
00070   };
00071 
00075   typedef hash_map<mootTokString,LexfreqEntry> LexfreqTokTable;
00076 
00078   typedef hash_map<mootTagString,LexfreqCount> LexfreqTagTable;
00079 
00080 public:
00081   //------ public data
00082   LexfreqTokTable    lftable;    
00083   LexfreqTagTable    tagtable;   
00084   LexfreqCount       n_tokens;   
00086 public:
00087   //------ public methods
00089   mootLexfreqs(size_t initial_bucket_count=0) : n_tokens(0)
00090   {
00091     if (initial_bucket_count != 0) {
00092       lftable.resize(initial_bucket_count);
00093     }
00094   };
00095 
00097   ~mootLexfreqs() {
00098     clear();
00099   }
00100 
00101   //------ public methods: manipulation
00102 
00104   void clear(void);
00105 
00107   inline void add_count(const mootTokString &text,
00108                         const mootTagString &tag,
00109                         const LexfreqCount count)
00110   {
00111     //-- adjust token-table
00112     LexfreqTokTable::iterator lfi = lftable.find(text);
00113     if (lfi == lftable.end()) {
00114       //-- new token
00115       lfi = lftable.insert(LexfreqTokTable::value_type(text,LexfreqEntry(count))).first;
00116       lfi->second.freqs[tag] = count;
00117     } else {
00118       //-- known token
00119       lfi->second.count += count;
00120 
00121       LexfreqSubtable::iterator lsi = lfi->second.freqs.find(tag);
00122       if (lsi == lfi->second.freqs.end()) {
00123         //-- unknown (tok,tag) pair
00124         lfi->second.freqs[tag] = count;
00125       } else {
00126         //-- known (tok,tag) pair: just add
00127         lsi->second += count;
00128       }
00129     }
00130 
00131     //-- adjust total tag-count
00132     LexfreqTagTable::iterator lftagi = tagtable.find(tag);
00133     if (lftagi != tagtable.end()) {
00134       lftagi->second += count;
00135     } else {
00136       tagtable[tag] = count;
00137     }
00138 
00139     //-- adjust total token-count
00140     n_tokens += count;
00141   };
00142 
00143   //------ public methods: lookup
00144   const LexfreqCount taglookup(const mootTagString &tag) const
00145   {
00146     LexfreqTagTable::const_iterator tagi = tagtable.find(tag);
00147     return tagi == tagtable.end() ? 0 : tagi->second;
00148   };
00149 
00157   void compute_specials(void);
00158 
00162   size_t n_pairs(void);
00163 
00164   //------ public methods: i/o
00165 
00167   bool load(const char *filename);
00168 
00170   bool load(FILE *file, const char *filename = NULL);
00171 
00173   bool save(const char *filename);
00174 
00176   bool save(FILE *file, const char *filename = NULL);
00177 };
00178 
00179 
00180 moot_END_NAMESPACE
00181 
00182 #endif /* _moot_LEXFREQS_H */

Generated on Wed Jul 28 15:48:03 2004 for libmoot by doxygen1.2.15