Main Page   Namespace List   Class Hierarchy   Alphabetical List   Compound List   File List   Namespace Members   Compound Members   File Members  

mootHMMTrainer.h

Go to the documentation of this file.
00001 /* -*- Mode: C++ -*- */
00002 
00003 /*
00004    libmoot : moocow's part-of-speech tagging library
00005    Copyright (C) 2003-2004 by Bryan Jurish <moocow@ling.uni-potsdam.de>
00006 
00007    This program is free software; you can redistribute it and/or modify
00008    it under the terms of the GNU General Public License as published by
00009    the Free Software Foundation; either version 2 of the License, or
00010    (at your option) any later version.
00011 
00012    This program is distributed in the hope that it will be useful,
00013    but WITHOUT ANY WARRANTY; without even the implied warranty of
00014    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00015    GNU General Public License for more details.
00016 
00017    You should have received a copy of the GNU General Public License
00018    along with this program; if not, write to the Free Software
00019    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
00020 */
00021 
00022 /*--------------------------------------------------------------------------
00023  * File: mootHMMTrainer.h
00024  * Author: Bryan Jurish <moocow@ling.uni-potsdam.de>
00025  * Description:
00026  *   + Trainer for moocow's PoS-tagger: shared headers
00027  *--------------------------------------------------------------------------*/
00028 
00029 #ifndef _moot_HMM_TRAINER_H_
00030 #define _moot_HMM_TRAINER_H_
00031 
00032 #include <stdio.h>
00033 
00034 #include <set>
00035 #include <deque>
00036 
00037 #include "mootTypes.h"
00038 #include "mootToken.h"
00039 #include "mootTokenIO.h"
00040 #include "mootNgrams.h"
00041 #include "mootLexfreqs.h"
00042 #include "mootClassfreqs.h"
00043 
00044 moot_BEGIN_NAMESPACE
00045 
00046 using namespace std;
00047 
00048 /*--------------------------------------------------------------------------
00049  * mootHMMTrainer : HMM trainer class
00050  *--------------------------------------------------------------------------*/
00051 
00053 class mootHMMTrainer {
00054 
00055 public:
00056   /*-------------------------------------------------------------*/
00058 
00059 
00060   typedef mootNgrams::Ngram      Ngram;
00061 
00063   typedef mootNgrams::NgramCount CountT;
00064 
00066   typedef set<mootTagString> TagSet;
00067 
00068 public:
00069   /*-------------------------------------------------------------*/
00071 
00072 
00074   mootNgrams   ngrams;
00075 
00077   mootLexfreqs lexfreqs;
00078 
00080   mootClassfreqs lcfreqs;
00082 
00083   /*-------------------------------------------------------------*/
00085 
00086   bool want_ngrams;     
00087   bool want_lexfreqs;   
00088   bool want_classfreqs; 
00090 
00091   /*-------------------------------------------------------------*/
00093 
00094 
00095   mootTagString eos_tag;
00097 
00098 protected:
00099   /*------------------------------------------------------------*/
00101 
00102 
00103   Ngram ng;
00104 
00106   bool last_was_eos;
00108 
00109 public:
00110   /*------------------------------------------------------------*/
00112 
00113 
00114   mootHMMTrainer(void)
00115     : want_ngrams(true),
00116       want_lexfreqs(true),
00117       want_classfreqs(true),
00118       eos_tag("__$"),
00119       last_was_eos(false)
00120   {};
00121 
00123   ~mootHMMTrainer(void) {};
00125 
00126 
00127   /*------------------------------------------------------------*/
00129 
00130 
00131   inline void clear(void)
00132   {
00133     lexfreqs.clear();
00134     ngrams.clear();
00135     lcfreqs.clear();
00136   };
00138 
00139   /*------------------------------------------------------------*/
00141 
00142 
00143   bool train_from_reader(TokenReader *reader);
00144 
00146   bool train_from_stream(FILE *in=stdin, const string &srcname="(unknown)")
00147   {
00148     TokenReader *tr = TokenIO::new_reader(tiofNative|tiofWellDone);
00149     tr->reader_name(srcname);
00150     tr->from_file(in);
00151     //TokenReaderCookedFile reader(true,in,srcname);
00152     //reader.lexer.ignore_first_analysis = true;
00153     bool rc = train_from_reader(tr);
00154     delete tr;
00155     return rc;
00156   };
00157 
00159   bool train_from_file(const string &filename);
00161 
00162   /*------------------------------------------------------------*/
00164 
00165 
00166   void train_init(void);
00167 
00169   void train_bos(void);
00170 
00172   void train_token(const mootToken &curtok);
00173 
00175   void train_eos(void);
00177 
00178   /*------------------------------------------------------------*/
00180 
00181 
00183   void carp(char *fmt, ...);
00184 
00186 };
00187 
00188 moot_END_NAMESPACE
00189 
00190 #endif /* _moot_HMM_TRAINER_H */

Generated on Wed Jul 28 15:48:03 2004 for libmoot by doxygen1.2.15