35 #ifndef _WASTE_LEXER_H 36 #define _WASTE_LEXER_H 106 this->wlt_type = lextype;
107 this->wlt_blanked = blanked;
129 typedef std::vector<std::vector<std::vector<std::vector<std::vector<std::vector<std::string> > > > > > wasteTagset;
132 typedef std::list<wasteLexerToken> wasteLexerBuffer;
193 static const int n_hidden = 7;
203 wasteTagset wl_tagset;
205 wasteLexerBuffer wl_lexbuf;
237 inline len length_attr(
size_t length)
const 263 void buffer_token(
const mootToken& stok);
277 inline void lexbuf_pop_front(
void)
280 if (wl_current_tok==front) wl_current_tok=NULL;
281 if (wl_head_tok==front) wl_head_tok=NULL;
282 wl_lexbuf.pop_front();
320 const std::string &name =
"wasteLexerReader");
341 virtual void close(
void);
344 inline void dehyph_mode(
bool on)
346 lexer.wl_dehyph_mode = on;
376 virtual size_t line_number(
void) {
return scanner ? scanner->
line_number() : 0; };
379 virtual size_t line_number(
size_t n) {
return scanner ? scanner->
line_number(n) : 0; };
382 virtual size_t column_number(
void) {
return scanner ? scanner->
column_number() : 0; };
385 virtual size_t column_number(
size_t n) {
return scanner ? scanner->
column_number(n) : 0; };
Definition: mootAssocVector.h:39
wasteLexerType wlt_type
Definition: wasteLexer.h:177
wasteLexer_state
Definition: wasteLexer.h:149
wasteLexerToken(wasteLexerType type=wLexerTypeOther, bool blanked=true, bool bos=true, bool eos=false, bool bow=true)
Definition: wasteLexer.h:191
bool S
Definition: wasteLexer.h:180
wasteLexerTypeE wasteLexerType
Definition: wasteTypes.h:101
virtual mootio::ByteOffset byte_number(void)
Definition: mootTokenIO.h:500
void set_wlt_data(wasteLexerType lextype, bool blanked, bool s, bool S, bool w)
Definition: wasteLexer.h:204
Definition: wasteLexer.h:155
Definition: wasteLexer.h:151
virtual size_t column_number(void)
Definition: mootTokenIO.h:494
Abstract class for token input.
Definition: mootTokenIO.h:208
Mid-level scanner stage performs (optional) hyphenation normalization and text classification.
Definition: wasteLexer.h:221
bool s
Definition: wasteLexer.h:179
Mid-level scanner stage, wraps moot::wasteLexer in moot::TokenReader API.
Definition: wasteLexer.h:395
Common definitions for WASTE HMM-based tokenizer.
static const int ls_head_hyph_nl
Definition: wasteLexer.h:162
High-level token information object.
Definition: mootToken.h:96
literal token text included
Definition: mootTokenIO.h:57
Definition: wasteLexer.h:167
Definition: wasteLexer.h:158
simple hash_set<>-based lexicon class
Definition: wasteLexicon.h:43
virtual size_t line_number(void)
Definition: mootTokenIO.h:488
bool w
Definition: wasteLexer.h:181
Definition: wasteLexer.h:156
moot::OffsetT ByteOffset
typedef for (byte) offsets (may be unsigned)
Definition: mootIO.h:55
list< mootToken > mootSentence
Definition: mootToken.h:630
mootTokenTypeE
Definition: mootToken.h:71
mootToken wlt_token
Definition: wasteLexer.h:173
Abstract and native classes for I/O of moot::mootToken objects.
static const int ls_init
Definition: wasteLexer.h:160
bool wlt_blanked
Definition: wasteLexer.h:178
Definition: wasteLexer.h:153
Definition: wasteLexer.h:157
len
Definition: wasteLexer.h:282
simple hash_set<>-based lexicon class for moot::wasteLexer
Abstract base class for input stream wrappers.
Definition: mootIO.h:129
static const int ls_head_hyph
Definition: wasteLexer.h:161
Definition: wasteTypes.h:97
Definition: wasteLexer.h:152
Definition: wasteLexer.h:154