Main Page | Directories | Namespace List | Class Hierarchy | Alphabetical List | Class List | File List | Namespace Members | Class Members | File Members

mootLexfreqs.h

Go to the documentation of this file.
00001 /* -*- Mode: C++ -*- */
00002 
00003 /*
00004    libmoot : moocow's part-of-speech tagging library
00005    Copyright (C) 2003-2004 by Bryan Jurish <moocow@ling.uni-potsdam.de>
00006 
00007    This library is free software; you can redistribute it and/or
00008    modify it under the terms of the GNU Lesser General Public
00009    License as published by the Free Software Foundation; either
00010    version 2.1 of the License, or (at your option) any later version.
00011    
00012    This library is distributed in the hope that it will be useful,
00013    but WITHOUT ANY WARRANTY; without even the implied warranty of
00014    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00015    Lesser General Public License for more details.
00016    
00017    You should have received a copy of the GNU Lesser General Public
00018    License along with this library; if not, write to the Free Software
00019    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
00020 */
00021 
00022 /*============================================================================
00023  * File: mootLexfreqs.h
00024  * Author:  Bryan Jurish <moocow@ling.uni-potsdam.de>
00025  * Description:
00026  *    Class for storage & retrieval of lexical frequencies (nested map<> version)
00027  *============================================================================*/
00028 
00029 #ifndef _moot_LEXFREQS_H
00030 #define _moot_LEXFREQS_H
00031 
00032 #include <mootTypes.h>
00033 #include <mootToken.h>
00034 
00035 moot_BEGIN_NAMESPACE
00036 
00040 class mootLexfreqs {
00041 public:
00042   //------ public typedefs
00043 
00045   typedef CountT LexfreqCount;
00046 
00050   typedef map<mootTagString,LexfreqCount> LexfreqSubtable;
00051 
00055   class LexfreqEntry {
00056   public:
00057     LexfreqCount     count;  
00058     LexfreqSubtable  freqs;  
00059   public:
00060     LexfreqEntry(const LexfreqCount tok_count=0)
00061       : count(tok_count)
00062     {};
00063     LexfreqEntry(const LexfreqCount tok_count,
00064                  const LexfreqSubtable &tok_tagfreqs)
00065       : count(tok_count), freqs(tok_tagfreqs)
00066     {};
00068     void clear(void) {
00069       count = 0;
00070       freqs.clear();
00071     };
00072   };
00073 
00077   typedef hash_map<mootTokString,LexfreqEntry> LexfreqTokTable;
00078 
00080   typedef hash_map<mootTagString,LexfreqCount> LexfreqTagTable;
00081 
00082 public:
00083   //------ public data
00084   LexfreqTokTable    lftable;    
00085   LexfreqTagTable    tagtable;   
00086   LexfreqCount       n_tokens;   
00088 public:
00089   //------ public methods
00091   mootLexfreqs(size_t initial_bucket_count=0) : n_tokens(0)
00092   {
00093     if (initial_bucket_count != 0) {
00094       lftable.resize(initial_bucket_count);
00095     }
00096   };
00097 
00099   ~mootLexfreqs() {
00100     clear();
00101   }
00102 
00103   //------ public methods: manipulation
00104 
00106   void clear(void);
00107 
00109   inline void add_count(const mootTokString &text,
00110                         const mootTagString &tag,
00111                         const LexfreqCount count)
00112   {
00113     //-- adjust token-table
00114     LexfreqTokTable::iterator lfi = lftable.find(text);
00115     if (lfi == lftable.end()) {
00116       //-- new token
00117       lfi = lftable.insert(LexfreqTokTable::value_type(text,LexfreqEntry(count))).first;
00118       lfi->second.freqs[tag] = count;
00119     } else {
00120       //-- known token
00121       lfi->second.count += count;
00122 
00123       LexfreqSubtable::iterator lsi = lfi->second.freqs.find(tag);
00124       if (lsi == lfi->second.freqs.end()) {
00125         //-- unknown (tok,tag) pair
00126         lfi->second.freqs[tag] = count;
00127       } else {
00128         //-- known (tok,tag) pair: just add
00129         lsi->second += count;
00130       }
00131     }
00132 
00133     if (!isTokFlavorName(text)) {
00134       //-- adjust total tag-count
00135       LexfreqTagTable::iterator lftagi = tagtable.find(tag);
00136       if (lftagi != tagtable.end()) {
00137         lftagi->second += count;
00138       } else {
00139         tagtable[tag] = count;
00140       }
00141 
00142       //-- adjust total token-count
00143       n_tokens += count;
00144     }
00145   };
00146 
00147   //------ public methods: lookup
00148   const LexfreqCount taglookup(const mootTagString &tag) const
00149   {
00150     LexfreqTagTable::const_iterator tagi = tagtable.find(tag);
00151     return tagi == tagtable.end() ? 0 : tagi->second;
00152   };
00153 
00161   void compute_specials(void);
00162 
00166   size_t n_pairs(void);
00167 
00168   //------ public methods: i/o
00169 
00171   bool load(const char *filename);
00172 
00174   bool load(FILE *file, const char *filename = NULL);
00175 
00177   bool save(const char *filename);
00178 
00180   bool save(FILE *file, const char *filename = NULL);
00181 };
00182 
00183 
00184 moot_END_NAMESPACE
00185 
00186 #endif /* _moot_LEXFREQS_H */

Generated on Mon Jun 27 13:05:25 2005 for libmoot by  doxygen 1.3.8-20040913