wasteTrainWriter.h
Go to the documentation of this file.
1 /* -*- Mode: C++; coding: utf-8; c-basic-offset: 2; -*- */
2 /*
3  libmoot : moot part-of-speech tagging library
4  Copyright (C) 2013 by Bryan Jurish <moocow@cpan.org> and Kay-Michael Würzner
5 
6  This library is free software; you can redistribute it and/or
7  modify it under the terms of the GNU Lesser General Public
8  License as published by the Free Software Foundation; either
9  version 3 of the License, or (at your option) any later version.
10 
11  This library is distributed in the hope that it will be useful,
12  but WITHOUT ANY WARRANTY; without even the implied warranty of
13  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  Lesser General Public License for more details.
15 
16  You should have received a copy of the GNU Lesser General Public
17  License along with this library; if not, write to the Free Software
18  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 */
20 
32 #ifndef _WASTE_TRAIN_WRITER_H
33 #define _WASTE_TRAIN_WRITER_H
34 
35 #include <wasteLexer.h>
36 #include <wasteScanner.h>
37 
38 moot_BEGIN_NAMESPACE
39 
40 //============================================================================
41 // wasteTrainer
50 class wasteTrainWriter : public TokenWriter {
51 public:
52  //----------------------------------------------------------------------
54 
55 
56  static void rtt_unescape(std::string &s);
58 
59 public:
60  //----------------------------------------------------------------------
62 
63  wasteTokenScanner wt_scanner;
64  wasteLexerReader wt_lexer;
65  TokenWriter *wt_writer;
67  mootSentence wt_segbuf;
68  mootToken *wt_pseg;
70  std::string wt_txtbuf;
71  bool wt_at_eos;
73 
74 public:
75  //----------------------------------------------------------------------
77 
78 
79  wasteTrainWriter(int fmt=tiofUnknown, const std::string &myname="wasteTrainer");
80 
82  virtual ~wasteTrainWriter
83 ();
85 
86  //------------------------------------------------------------
88 
89 
90  virtual void to_mstream(mootio::mostream *mostreamp);
91 
93  virtual void close(void);
95 
96  //------------------------------------------------------------
98 
99  virtual void put_token(const mootToken &token);
101  virtual void put_sentence(const mootSentence &sentence) {
102  this->put_tokens(sentence);
103  this->put_token( mootToken(TokTypeEOS) );
104  };
105 
106  virtual void put_raw_buffer(const char *buf, size_t len);
108 
109  //------------------------------------------------------------
111 
112 
114  void to_writer(TokenWriter *writer);
115 
117  void flush_buffer(bool force=false);
119 };
120 
121 
122 moot_END_NAMESPACE
123 
124 #endif /* _WASTE_TRAIN_WRITER_H */
unknown format
Definition: mootTokenIO.h:50
Mid-level scanner stage, wraps moot::wasteLexer in moot::TokenReader API.
Definition: wasteLexer.h:395
Abstract base class for output stream wrappers.
Definition: mootIO.h:194
High-level token information object.
Definition: mootToken.h:96
list< mootToken > mootSentence
Definition: mootToken.h:630
Abstract class for token output.
Definition: mootTokenIO.h:700
Declarations of the mid level lexer for the waste tokenizer.
Definition: mootToken.h:78
Raw text scanner class returning mootToken; wraps wasteScanner.
Definition: wasteScanner.h:148
Declarations of the low level scanner for the waste tokenizer.
TokenWriter wrapper class for writing WASTE tokenizer &#39;well-done&#39; training data from pre-tokenized in...
Definition: wasteTrainWriter.h:49