wasteDecoder.h
Go to the documentation of this file.
1 /* -*- Mode: C++; coding: utf-8; c-basic-offset: 2; -*- */
2 /*
3  libmoot : moot part-of-speech tagging library
4  Copyright (C) 2013 by Bryan Jurish <moocow@cpan.org> and Kay-Michael Würzner
5 
6  This library is free software; you can redistribute it and/or
7  modify it under the terms of the GNU Lesser General Public
8  License as published by the Free Software Foundation; either
9  version 3 of the License, or (at your option) any later version.
10 
11  This library is distributed in the hope that it will be useful,
12  but WITHOUT ANY WARRANTY; without even the implied warranty of
13  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  Lesser General Public License for more details.
15 
16  You should have received a copy of the GNU Lesser General Public
17  License along with this library; if not, write to the Free Software
18  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 */
20 
32 #ifndef _WASTE_DECODER_H
33 #define _WASTE_DECODER_H
34 
35 #include <mootTokenIO.h>
36 #include <wasteTypes.h>
37 
38 moot_BEGIN_NAMESPACE
39 
40 /*============================================================================
41  * wasteDecoder
42  */
44 class wasteDecoder : public TokenWriter
45 {
46 public:
47  //------------------------------------------------------------
49 
50 
51  inline static bool tag_attr_s(const mootTagString &tagstr)
52  { return waste_tag_attr_get(tagstr,wtap_s,false); }
53 
55  inline static bool tag_attr_S(const mootTagString &tagstr)
56  { return waste_tag_attr_get(tagstr,wtap_S,false); }
57 
59  inline static bool tag_attr_w(const mootTagString &tagstr)
60  { return waste_tag_attr_get(tagstr,wtap_w,true); }
61 
62 public:
63  //------------------------------------------------------------
65 
66  bool wd_sb;
67  bool wd_eos;
68  mootSentence wd_buf;
69  mootToken *wd_tok;
70  TokenWriter *wd_sink;
72 
73 public:
74  //------------------------------------------------------------
76 
77 
78  wasteDecoder(int fmt=tiofWellDone, const std::string &name="wasteDecoder")
79  : TokenWriter(fmt,name),
80  wd_sb(false),
81  wd_eos(false),
82  wd_tok(NULL),
83  wd_sink(NULL)
84  {};
85 
87  ~wasteDecoder();
89 
90  //------------------------------------------------------------
92 
93 
94  virtual void to_mstream(mootio::mostream *mostreamp);
95 
99  virtual void close(void);
101 
102  //------------------------------------------------------------
104 
105  virtual void put_token(const mootToken &token) {
106  _put_token(token);
107  };
108  virtual void put_tokens(const mootSentence &tokens) {
109  _put_tokens(tokens);
110  };
111  virtual void put_sentence(const mootSentence &sentence) {
112  _put_sentence(sentence);
113  };
114  virtual void put_raw_buffer(const char *buf, size_t len) {
115  _put_raw_buffer(buf,len);
116  };
118 
119  //------------------------------------------------------------
121 
122 
123  void to_writer(TokenWriter *sink);
124 
126  void flush_buffer(bool force=false);
127 
129  inline mootToken &buffer_peek(void)
130  { return wd_buf.front(); };
131 
133  inline bool buffer_can_shift(void)
134  { return !wd_buf.empty() && wd_tok != &(wd_buf.front()); };
135 
137  inline void buffer_shift(void)
138  { if (!wd_buf.empty()) wd_buf.pop_front(); };
139 
140  void _put_token(const mootToken &token);
141  void _put_tokens(const mootSentence &tokens);
142  void _put_sentence(const mootSentence &sentence);
143  void _put_raw_buffer(const char *buf, size_t len); //-- not supported
145 
146 };
147 
148 moot_END_NAMESPACE
149 
150 #endif /* _WASTE_DECODER_H */
151 
static const int tiofWellDone
Definition: mootTokenIO.h:79
Definition: wasteTypes.h:112
bool waste_tag_attr_get(const std::string &tagstr, size_t rpos, bool mydefault=false)
Definition: wasteTypes.h:121
Abstract base class for output stream wrappers.
Definition: mootIO.h:194
Common definitions for WASTE HMM-based tokenizer.
waste decoder component converts hidden tag attributes &#39;s&#39;,&#39;S&#39;,&#39;w&#39; to sentence- and token-boundaries ...
Definition: wasteDecoder.h:43
High-level token information object.
Definition: mootToken.h:96
list< mootToken > mootSentence
Definition: mootToken.h:630
Abstract class for token output.
Definition: mootTokenIO.h:700
Abstract and native classes for I/O of moot::mootToken objects.
string mootTagString
Definition: mootToken.h:59
Definition: wasteTypes.h:114
Definition: wasteTypes.h:113