Main Page   Namespace List   Class Hierarchy   Alphabetical List   Compound List   File List   Namespace Members   Compound Members   File Members  

mootClassfreqs.h

Go to the documentation of this file.
00001 /* -*- Mode: C++ -*- */
00002 
00003 /*
00004    libmoot : moocow's part-of-speech tagging library
00005    Copyright (C) 2003-2004 by Bryan Jurish <moocow@ling.uni-potsdam.de>
00006 
00007    This program is free software; you can redistribute it and/or modify
00008    it under the terms of the GNU General Public License as published by
00009    the Free Software Foundation; either version 2 of the License, or
00010    (at your option) any later version.
00011 
00012    This program is distributed in the hope that it will be useful,
00013    but WITHOUT ANY WARRANTY; without even the implied warranty of
00014    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00015    GNU General Public License for more details.
00016 
00017    You should have received a copy of the GNU General Public License
00018    along with this program; if not, write to the Free Software
00019    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
00020 */
00021 
00022 /*============================================================================
00023  * File: mootClassfreqs.h
00024  * Author:  Bryan Jurish <moocow@ling.uni-potsdam.de>
00025  * Description:
00026  *    Class for storage & retrieval of lexical-class frequencies (nested map<>)
00027  *============================================================================*/
00028 
00029 #ifndef _moot_CLASSFREQS_H
00030 #define _moot_CLASSFREQS_H
00031 
00032 #include <mootTypes.h>
00033 #include <mootLexfreqs.h>
00034 moot_BEGIN_NAMESPACE
00035 
00039 class mootClassfreqs {
00040 public:
00041   //------ public typedefs
00042 
00046   typedef mootLexfreqs::LexfreqSubtable ClassfreqSubtable;
00047 
00051   typedef mootLexfreqs::LexfreqEntry ClassfreqEntry;
00052 
00054   typedef mootTagSet LexClass;
00055 
00057   struct LexClassHash {
00058   public:
00059     inline size_t operator()(const LexClass &x) const {
00060       size_t hv = 0;
00061       hash<mootTagString> hasher;
00062       for (LexClass::const_iterator xi = x.begin(); xi != x.end(); xi++) {
00063         hv = 5*hv + hasher(*xi);
00064       }
00065       return hv;
00066     };
00067   };
00069   struct LexClassEqual {
00070   public:
00071     inline size_t operator()(const LexClass &x, const LexClass &y) const {
00072       return x==y;
00073     };
00074   };
00075 
00079   typedef hash_map<LexClass,
00080                    ClassfreqEntry,
00081                    LexClassHash,
00082                    LexClassEqual>
00083           ClassfreqTable;
00084 
00086   typedef hash_map<mootTagString,CountT> TagfreqTable;
00087 
00088 public:
00089   //------ public data
00090   ClassfreqTable    lctable;    
00091   TagfreqTable      tagtable;   
00092   CountT            totalcount; 
00094 public:
00095   //------ public methods
00097   mootClassfreqs(size_t initial_bucket_count=0) : totalcount(0)
00098   {
00099     if (initial_bucket_count != 0) {
00100       lctable.resize(initial_bucket_count);
00101     }
00102   };
00103 
00105   ~mootClassfreqs() {
00106     clear();
00107   }
00108 
00109   //------ public methods: manipulation
00110 
00112   void clear(void);
00113 
00115   inline void add_count(const LexClass &lclass,
00116                         const mootTagString &tag,
00117                         const CountT count)
00118   {
00119     //-- adjust token-table
00120     ClassfreqTable::iterator lci = lctable.find(lclass);
00121     if (lci == lctable.end()) {
00122       //-- new class
00123       lci = lctable.insert(ClassfreqTable::value_type(lclass,ClassfreqEntry(count))).first;
00124       lci->second.freqs[tag] = count;
00125     } else {
00126       //-- known class
00127       lci->second.count += count;
00128 
00129       ClassfreqSubtable::iterator lsi = lci->second.freqs.find(tag);
00130       if (lsi == lci->second.freqs.end()) {
00131         //-- unknown (tok,tag) pair
00132         lci->second.freqs[tag] = count;
00133       } else {
00134         //-- known (tok,tag) pair: just add
00135         lsi->second += count;
00136       }
00137     }
00138 
00139     //-- adjust total tag-count
00140     TagfreqTable::iterator lctagi = tagtable.find(tag);
00141     if (lctagi != tagtable.end()) {
00142       lctagi->second += count;
00143     } else {
00144       tagtable[tag] = count;
00145     }
00146 
00147     //-- adjust total token-count
00148     totalcount += count;
00149   };
00150 
00151   //------ public methods: lookup
00152   const CountT taglookup(const mootTagString &tag) const
00153   {
00154     TagfreqTable::const_iterator tagi = tagtable.find(tag);
00155     return tagi == tagtable.end() ? 0 : tagi->second;
00156   };
00157 
00161   size_t n_pairs(void);
00162 
00168   size_t n_impossible(void);
00169 
00170   //------ public methods: i/o
00171 
00173   bool load(const char *filename);
00174 
00176   bool load(FILE *file, const char *filename = NULL);
00177 
00179   bool save(const char *filename);
00180 
00182   bool save(FILE *file, const char *filename = NULL);
00183 };
00184 
00185 
00186 moot_END_NAMESPACE
00187 
00188 #endif /* _moot_CLASSFREQS_H */

Generated on Wed Jul 28 15:48:02 2004 for libmoot by doxygen1.2.15