Main Page   Namespace List   Class Hierarchy   Alphabetical List   Compound List   File List   Namespace Members   Compound Members   File Members  

mootHMMTrainer.h

Go to the documentation of this file.
00001 /* -*- Mode: C++ -*- */
00002 
00003 /*
00004    libmoot : moocow's part-of-speech tagging library
00005    Copyright (C) 2003-2005 by Bryan Jurish <moocow@ling.uni-potsdam.de>
00006 
00007    This library is free software; you can redistribute it and/or
00008    modify it under the terms of the GNU Lesser General Public
00009    License as published by the Free Software Foundation; either
00010    version 2.1 of the License, or (at your option) any later version.
00011    
00012    This library is distributed in the hope that it will be useful,
00013    but WITHOUT ANY WARRANTY; without even the implied warranty of
00014    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00015    Lesser General Public License for more details.
00016    
00017    You should have received a copy of the GNU Lesser General Public
00018    License along with this library; if not, write to the Free Software
00019    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
00020 */
00021 
00022 /*--------------------------------------------------------------------------
00023  * File: mootHMMTrainer.h
00024  * Author: Bryan Jurish <moocow@ling.uni-potsdam.de>
00025  * Description:
00026  *   + Trainer for moocow's PoS-tagger: shared headers
00027  *--------------------------------------------------------------------------*/
00028 
00029 #ifndef _moot_HMM_TRAINER_H_
00030 #define _moot_HMM_TRAINER_H_
00031 
00032 #include <stdio.h>
00033 
00034 #include <set>
00035 #include <deque>
00036 
00037 #include "mootTypes.h"
00038 #include "mootToken.h"
00039 #include "mootTokenIO.h"
00040 #include "mootNgrams.h"
00041 #include "mootLexfreqs.h"
00042 #include "mootClassfreqs.h"
00043 
00044 moot_BEGIN_NAMESPACE
00045 
00046 using namespace std;
00047 
00048 /*--------------------------------------------------------------------------
00049  * mootHMMTrainer : HMM trainer class
00050  *--------------------------------------------------------------------------*/
00051 
00053 class mootHMMTrainer {
00054 
00055 public:
00056   /*-------------------------------------------------------------*/
00058 
00059 
00060   typedef mootNgrams::Ngram      Ngram;
00061 
00063   typedef mootNgrams::NgramCount CountT;
00064 
00066   typedef set<mootTagString> TagSet;
00067 
00068 public:
00069   /*-------------------------------------------------------------*/
00071 
00072 
00074   mootNgrams   ngrams;
00075 
00077   mootLexfreqs lexfreqs;
00078 
00080   mootClassfreqs lcfreqs;
00082 
00083   /*-------------------------------------------------------------*/
00085 
00086   bool want_ngrams;     
00087   bool want_lexfreqs;   
00088   bool want_classfreqs; 
00090 
00091   /*-------------------------------------------------------------*/
00093 
00094 
00095   mootTagString eos_tag;
00097 
00098 protected:
00099   /*------------------------------------------------------------*/
00101 
00102 
00103   Ngram ng;
00104 
00106   bool last_was_eos;
00108 
00109 public:
00110   /*------------------------------------------------------------*/
00112 
00113 
00114   mootHMMTrainer(void)
00115     : want_ngrams(true),
00116       want_lexfreqs(true),
00117       want_classfreqs(true),
00118       eos_tag("__$"),
00119       last_was_eos(false)
00120   {};
00121 
00123   ~mootHMMTrainer(void) {};
00125 
00126 
00127   /*------------------------------------------------------------*/
00129 
00130 
00131   inline void clear(void)
00132   {
00133     lexfreqs.clear();
00134     ngrams.clear();
00135     lcfreqs.clear();
00136   };
00138 
00139   /*------------------------------------------------------------*/
00141 
00142 
00143   bool train_from_reader(TokenReader *reader);
00144 
00146   bool train_from_stream(FILE *in=stdin, const string &srcname="(unknown)")
00147   {
00148     TokenReader *tr = TokenIO::new_reader(tiofNative|tiofWellDone);
00149     tr->reader_name(srcname);
00150     tr->from_file(in);
00151     //TokenReaderCookedFile reader(true,in,srcname);
00152     //reader.lexer.ignore_first_analysis = true;
00153     bool rc = train_from_reader(tr);
00154     delete tr;
00155     return rc;
00156   };
00157 
00159   bool train_from_file(const string &filename);
00161 
00162   /*------------------------------------------------------------*/
00164 
00165 
00166   void train_init(void);
00167 
00169   void train_bos(void);
00170 
00172   void train_token(const mootToken &curtok);
00173 
00175   void train_eos(void);
00177 
00178   /*------------------------------------------------------------*/
00180 
00181 
00183   void carp(char *fmt, ...);
00184 
00186 };
00187 
00188 moot_END_NAMESPACE
00189 
00190 #endif /* _moot_HMM_TRAINER_H */

Generated on Mon Sep 11 16:10:33 2006 for libmoot by doxygen1.2.18