Main Page   Namespace List   Class Hierarchy   Alphabetical List   Compound List   File List   Namespace Members   Compound Members   File Members  

mootClassfreqs.h

Go to the documentation of this file.
00001 /* -*- Mode: C++ -*- */
00002 
00003 /*
00004    libmoot : moocow's part-of-speech tagging library
00005    Copyright (C) 2003-2005 by Bryan Jurish <moocow@ling.uni-potsdam.de>
00006 
00007    This library is free software; you can redistribute it and/or
00008    modify it under the terms of the GNU Lesser General Public
00009    License as published by the Free Software Foundation; either
00010    version 2.1 of the License, or (at your option) any later version.
00011    
00012    This library is distributed in the hope that it will be useful,
00013    but WITHOUT ANY WARRANTY; without even the implied warranty of
00014    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00015    Lesser General Public License for more details.
00016    
00017    You should have received a copy of the GNU Lesser General Public
00018    License along with this library; if not, write to the Free Software
00019    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
00020 */
00021 
00022 /*============================================================================
00023  * File: mootClassfreqs.h
00024  * Author:  Bryan Jurish <moocow@ling.uni-potsdam.de>
00025  * Description:
00026  *    Class for storage & retrieval of lexical-class frequencies (nested map<>)
00027  *============================================================================*/
00028 
00029 #ifndef _moot_CLASSFREQS_H
00030 #define _moot_CLASSFREQS_H
00031 
00032 #include <mootTypes.h>
00033 #include <mootLexfreqs.h>
00034 moot_BEGIN_NAMESPACE
00035 
00039 class mootClassfreqs {
00040 public:
00041   //------ public typedefs
00042 
00046   typedef mootLexfreqs::LexfreqSubtable ClassfreqSubtable;
00047 
00051   typedef mootLexfreqs::LexfreqEntry ClassfreqEntry;
00052 
00054   typedef mootTagSet LexClass;
00055 
00057   struct LexClassHash {
00058   public:
00059     inline size_t operator()(const LexClass &x) const {
00060       size_t hv = 0;
00061       hash<mootTagString> hasher;
00062       for (LexClass::const_iterator xi = x.begin(); xi != x.end(); xi++) {
00063         hv = 5*hv + hasher(*xi);
00064       }
00065       return hv;
00066     };
00067   };
00069   struct LexClassEqual {
00070   public:
00071     inline size_t operator()(const LexClass &x, const LexClass &y) const {
00072       return x==y;
00073     };
00074   };
00075 
00079   typedef hash_map<LexClass,
00080                    ClassfreqEntry,
00081                    LexClassHash,
00082                    LexClassEqual>
00083           ClassfreqTable;
00084 
00086   typedef hash_map<mootTagString,CountT> TagfreqTable;
00087 
00088 public:
00089   //------ public data
00090   ClassfreqTable    lctable;    
00091   TagfreqTable      tagtable;   
00092   CountT            totalcount; 
00094 public:
00095   //------ public methods
00097   mootClassfreqs(size_t initial_bucket_count=0) : totalcount(0)
00098   {
00099     if (initial_bucket_count != 0) {
00100       lctable.resize(initial_bucket_count);
00101     }
00102   };
00103 
00105   ~mootClassfreqs() {
00106     clear();
00107   }
00108 
00109   //------ public methods: manipulation
00110 
00112   void clear(void);
00113 
00115   inline void add_count(const LexClass &lclass,
00116                         const mootTagString &tag,
00117                         const CountT count)
00118   {
00119     //-- adjust token-table
00120     ClassfreqTable::iterator lci = lctable.find(lclass);
00121     if (lci == lctable.end()) {
00122       //-- new class
00123       lci = lctable.insert(ClassfreqTable::value_type(lclass,ClassfreqEntry(count))).first;
00124       lci->second.freqs[tag] = count;
00125     } else {
00126       //-- known class
00127       lci->second.count += count;
00128 
00129       ClassfreqSubtable::iterator lsi = lci->second.freqs.find(tag);
00130       if (lsi == lci->second.freqs.end()) {
00131         //-- unknown (tok,tag) pair
00132         lci->second.freqs[tag] = count;
00133       } else {
00134         //-- known (tok,tag) pair: just add
00135         lsi->second += count;
00136       }
00137     }
00138 
00139     //-- adjust total tag-count
00140     TagfreqTable::iterator lctagi = tagtable.find(tag);
00141     if (lctagi != tagtable.end()) {
00142       lctagi->second += count;
00143     } else {
00144       tagtable[tag] = count;
00145     }
00146 
00147     //-- adjust total token-count
00148     totalcount += count;
00149   };
00150 
00151   //------ public methods: lookup
00152   const CountT taglookup(const mootTagString &tag) const
00153   {
00154     TagfreqTable::const_iterator tagi = tagtable.find(tag);
00155     return tagi == tagtable.end() ? 0 : tagi->second;
00156   };
00157 
00161   size_t n_pairs(void);
00162 
00168   size_t n_impossible(void);
00169 
00170   //------ public methods: i/o
00171 
00173   bool load(const char *filename);
00174 
00176   bool load(FILE *file, const char *filename = NULL);
00177 
00179   bool save(const char *filename);
00180 
00182   bool save(FILE *file, const char *filename = NULL);
00183 };
00184 
00185 
00186 moot_END_NAMESPACE
00187 
00188 #endif /* _moot_CLASSFREQS_H */

Generated on Mon Sep 11 16:10:33 2006 for libmoot by doxygen1.2.18