waste decoder component converts hidden tag attributes 's','S','w' to sentence- and token-boundaries
Public Member Functions | |
Constructors etc. | |
underlying data sink | |
wasteDecoder (int fmt=tiofWellDone, const std::string &name="wasteDecoder") | |
~wasteDecoder () | |
TokenWriter API: Output Selection | |
virtual void | to_mstream (mootio::mostream *mostreamp) |
virtual void | close (void) |
TokenWriter API: Token Stream Access | |
virtual void | put_token (const mootToken &token) |
virtual void | put_tokens (const mootSentence &tokens) |
virtual void | put_sentence (const mootSentence &sentence) |
virtual void | put_raw_buffer (const char *buf, size_t len) |
wasteDecoder: local methods | |
void | to_writer (TokenWriter *sink) |
void | flush_buffer (bool force=false) |
mootToken & | buffer_peek (void) |
bool | buffer_can_shift (void) |
void | buffer_shift (void) |
void | _put_token (const mootToken &token) |
void | _put_tokens (const mootSentence &tokens) |
void | _put_sentence (const mootSentence &sentence) |
void | _put_raw_buffer (const char *buf, size_t len) |
Public Member Functions inherited from moot::TokenWriter | |
TokenWriter (int fmt=tiofWellDone, const std::string &name="TokenWriter") | |
virtual | ~TokenWriter (void) |
virtual void | to_mstream (mootio::mostream &mos) |
virtual void | to_filename (const char *filename) |
virtual void | to_file (FILE *file) |
virtual void | to_fd (int fd) |
virtual void | to_cxxstream (std::ostream &os) |
virtual bool | opened (void) |
virtual bool | flush (void) |
bool | autoflush (mootio::mostream *os) |
virtual void | put_comment_block_begin (void) |
virtual void | put_comment_block_end (void) |
virtual void | put_comment_buffer (const char *buf, size_t len) |
virtual void | put_comment (const char *s) |
virtual void | put_comment_buffer (const std::string &s) |
virtual void | printf_comment (const char *fmt,...) |
virtual void | put_raw (const char *s) |
virtual void | put_raw (const std::string &s) |
virtual void | printf_raw (const char *fmt,...) |
virtual void | writer_name (const std::string &myname) |
virtual void | carp (const char *fmt,...) |
Static Public Member Functions | |
static methods | |
static bool | tag_attr_s (const mootTagString &tagstr) |
static bool | tag_attr_S (const mootTagString &tagstr) |
static bool | tag_attr_w (const mootTagString &tagstr) |
Static Public Member Functions inherited from moot::TokenIO | |
static int | parse_format_string (const std::string &fmtString) |
static int | guess_filename_format (const char *filename) |
static bool | is_empty_format (int fmt) |
static int | sanitize_format (int fmt, int fmt_implied=tiofNone, int fmt_default=tiofNone) |
static int | parse_format_request (const char *request, const char *filename=__null, int fmt_implied=tiofNone, int fmt_default=tiofNone) |
static std::string | format_canonical_string (int fmt) |
static class TokenReader * | new_reader (int fmt) |
static class TokenWriter * | new_writer (int fmt) |
static class TokenReader * | file_reader (const char *filename, const char *fmt_request=__null, int fmt_implied=tiofNone, int fmt_default=tiofNone) |
static class TokenWriter * | file_writer (const char *filename, const char *fmt_request=__null, int fmt_implied=tiofNone, int fmt_default=tiofNone) |
static size_t | pipe_tokens (class TokenReader *reader, class TokenWriter *writer) |
static size_t | pipe_sentences (class TokenReader *reader, class TokenWriter *writer) |
Public Attributes | |
public data | |
bool | wd_sb |
bool | wd_eos |
mootSentence | wd_buf |
mootToken * | wd_tok |
TokenWriter * | wd_sink |
Public Attributes inherited from moot::TokenWriter | |
int | tw_format |
std::string | tw_name |
mootio::mostream * | tw_ostream |
bool | tw_ostream_created |
bool | tw_is_comment_block |
void * | tw_data |
|
inline |
Default constructor
References close(), to_mstream(), and ~wasteDecoder().
moot::wasteDecoder::~wasteDecoder | ( | ) |
destructor calls close()
Referenced by wasteDecoder().
|
inlinestatic |
get boolean tag attribute 's' (beginning-of-sentence)
References moot::waste_tag_attr_get(), and moot::wtap_s.
|
inlinestatic |
get boolean tag attribute 'S' (wnd-of-sentence)
References moot::waste_tag_attr_get(), and moot::wtap_S.
|
inlinestatic |
get boolean tag attribute 'w' (beginning-of-word)
References moot::waste_tag_attr_get(), and moot::wtap_w.
|
virtual |
Select output to a mootio::mostream pointer; just wraps sink->to_mstream()
Reimplemented from moot::TokenWriter.
Referenced by wasteDecoder().
|
virtual |
Finish output to currently selected sink & perform any required cleanup operations. wasteDecoder override force-fluses buffer and unsets sink.
Reimplemented from moot::TokenWriter.
Referenced by wasteDecoder().
|
inlinevirtual |
Write a single token to the currently selected output sink. Descendants must override this method.
Reimplemented from moot::TokenWriter.
References _put_token().
|
inlinevirtual |
Write a single (partial) sentence to the currently selected output sink. Descendants may override this method. Default implementation just calls put_token() for every element of sentence.
Reimplemented from moot::TokenWriter.
References _put_tokens().
|
inlinevirtual |
Write a single sentence to the currently selected output sink. Descendants may override this method. Default implementation just calls put_sentence().
Reimplemented from moot::TokenWriter.
References _put_sentence().
|
inlinevirtual |
Write some data to the currently selected output sink Descendants may override this method.
Reimplemented from moot::TokenWriter.
References _put_raw_buffer(), flush_buffer(), and to_writer().
void moot::wasteDecoder::to_writer | ( | TokenWriter * | sink | ) |
Select output to subordinate TokenWriter
Referenced by put_raw_buffer().
void moot::wasteDecoder::flush_buffer | ( | bool | force = false | ) |
flush buffer to current output sink if defined
Referenced by put_raw_buffer().
|
inline |
peek at top buffer element
|
inline |
returns true iff it is safe to shift the buffer
|
inline |
shift the first element off the buffer if possible
References _put_raw_buffer(), _put_sentence(), _put_token(), and _put_tokens().
void moot::wasteDecoder::_put_token | ( | const mootToken & | token | ) |
Referenced by buffer_shift(), and put_token().
void moot::wasteDecoder::_put_tokens | ( | const mootSentence & | tokens | ) |
Referenced by buffer_shift(), and put_tokens().
void moot::wasteDecoder::_put_sentence | ( | const mootSentence & | sentence | ) |
Referenced by buffer_shift(), and put_sentence().
void moot::wasteDecoder::_put_raw_buffer | ( | const char * | buf, |
size_t | len | ||
) |
Referenced by buffer_shift(), and put_raw_buffer().
bool moot::wasteDecoder::wd_sb |
bool moot::wasteDecoder::wd_eos |
whether an SB has been seen
mootSentence moot::wasteDecoder::wd_buf |
whether an EOS attribute has been seen
mootToken* moot::wasteDecoder::wd_tok |
intermediate token buffer
TokenWriter* moot::wasteDecoder::wd_sink |
current token under construction (NULL for none), pointer into wd_buf