34 #ifndef _moot_TOKEN_IO_H 35 #define _moot_TOKEN_IO_H 132 const char *filename=NULL,
206 static const size_t TR_DEFAULT_BUFSIZE = 256;
219 bool tr_istream_created;
260 const std::string &name =
"TokenReader")
264 tr_istream_created(false),
279 inline void tr_clear(
void)
281 if (tr_token) tr_token->
clear();
282 if (tr_sentence) tr_sentence->clear();
301 tr_istream = mistreamp;
305 tr_istream_created =
false;
314 this->from_mstream(&mis);
323 virtual void from_filename(
const char *filename)
326 tr_istream_created =
true;
327 if (!tr_istream || !tr_istream->
valid()) {
328 this->carp(
"open failed for \"%s\": %s", filename, strerror(errno));
339 virtual void from_file(FILE *file)
342 tr_istream_created =
true;
351 virtual void from_fd(
int fd)
354 throw domain_error(
"from_fd(): not implemented");
363 virtual void from_buffer(
const void *buf,
size_t len)
366 tr_istream_created =
true;
375 virtual void from_string(
const char *s)
377 from_buffer(s,strlen(s));
386 virtual void from_cxxstream(std::istream &is)
389 tr_istream_created =
true;
400 virtual void close(
void) {
401 if (tr_istream_created) {
403 if (tr_istream)
delete tr_istream;
405 tr_istream_created =
false;
412 virtual bool opened(
void)
414 return tr_istream!=NULL && tr_istream->
valid();
431 inline mootToken *token(
void) {
return tr_token; };
440 inline mootSentence *sentence(
void) {
return tr_sentence; };
448 throw domain_error(
"TokenReader: get_token() not implemented");
466 virtual void reader_name(
const std::string &myname) { tr_name = myname; };
469 virtual size_t line_number(
void) {
return 0; };
472 virtual size_t line_number(
size_t n) {
return n; };
475 virtual size_t column_number(
void) {
return 0; };
478 virtual size_t column_number(
size_t n) {
return n; };
487 virtual void carp(
const char *fmt, ...);
519 const std::string &name =
"TokenReaderNative")
527 tr_sentence = &trn_sentence;
567 virtual size_t line_number(
void) {
return lexer.
theLine; };
570 virtual size_t line_number(
size_t n) {
return lexer.
theLine = n; };
573 virtual size_t column_number(
void) {
return lexer.
theColumn; };
576 virtual size_t column_number(
size_t n) {
return lexer.
theColumn = n; };
596 inline bool input_is_tagged(
void)
606 inline bool input_is_tagged(
bool is_tagged)
624 inline bool input_has_locations(
void)
633 inline bool input_has_locations(
bool has_locs)
649 inline bool input_has_cost(
void)
658 inline bool input_has_cost(
bool has_cost)
691 bool tw_ostream_created;
694 bool tw_is_comment_block;
715 const std::string &name=
"TokenWriter")
719 tw_ostream_created(false),
720 tw_is_comment_block(false),
745 tw_ostream = mostreamp;
746 if ( !(tw_format&
tiofNull) && (!tw_ostream || !tw_ostream->
valid())) {
747 this->carp(
"Warning: selecting output to invalid stream");
749 tw_ostream_created =
false;
758 this->to_mstream(&mos);
766 virtual void to_filename(
const char *filename)
769 tw_ostream_created =
true;
770 if (!tw_ostream || !tw_ostream->
valid()) {
771 this->carp(
"open failed for \"%s\": %s", filename, strerror(errno));
782 virtual void to_file(FILE *file)
785 tw_ostream_created =
true;
794 virtual void to_fd(
int fd)
797 throw domain_error(
"to_fd(): not implemented.");
806 virtual void to_cxxstream(std::ostream &os)
809 tw_ostream_created =
true;
820 virtual void close(
void) {
821 if (tw_is_comment_block) this->put_comment_block_end();
822 if (tw_ostream && tw_ostream_created) {
826 tw_ostream_created =
false;
833 virtual bool opened(
void)
835 return tw_ostream!=NULL && tw_ostream->
valid();
839 virtual bool flush(
void)
841 return this->opened() && tw_ostream->
flush();
859 virtual void put_token(
const mootToken &token) {
860 throw domain_error(
"TokenWriter: put_token() not implemented");
870 for (mootSentence::const_iterator si=tokens.begin(); si!=tokens.end(); si++)
871 this->put_token(*si);
881 this->put_tokens(sentence);
893 virtual void put_comment_block_begin(
void) {
894 tw_is_comment_block =
true;
902 virtual void put_comment_block_end(
void) {
903 tw_is_comment_block =
false;
910 virtual void put_comment_buffer(
const char *buf,
size_t len) {
911 this->put_comment_block_begin();
912 this->put_raw_buffer(buf,len);
913 this->put_comment_block_end();
920 virtual void put_comment(
const char *s) {
921 this->put_comment_buffer(s,strlen(s));
928 virtual void put_comment_buffer(
const std::string &s) {
929 this->put_comment_buffer(s.data(),s.size());
936 virtual void printf_comment(
const char *fmt, ...);
946 virtual void put_raw_buffer(
const char *buf,
size_t len)
952 virtual void put_raw(
const char *s) {
953 this->put_raw_buffer(s,strlen(s));
959 virtual void put_raw(
const std::string &s) {
960 this->put_raw_buffer(s.data(),s.size());
967 virtual void printf_raw(
const char *fmt, ...);
975 virtual void writer_name(
const std::string &myname) { tw_name = myname; };
978 virtual void carp(
const char *fmt, ...);
1004 const std::string name=
"TokenWriterNative")
1036 virtual void put_token(
const mootToken &token) {
1037 _put_token(token,tw_ostream);
1040 _put_tokens(tokens,tw_ostream);
1043 _put_sentence(sentence,tw_ostream);
1046 virtual void put_raw_buffer(
const char *buf,
size_t len) {
1047 _put_raw_buffer(buf,len,tw_ostream);
1074 inline std::string token2string(
const mootToken &token)
1076 mostream *tw_ostream_old = tw_ostream;
1078 tw_ostream = &twn_tmpbuf;
1079 _put_token(token,tw_ostream);
1080 std::string t2s(twn_tmpbuf.
data(), twn_tmpbuf.
size());
1081 tw_ostream = tw_ostream_old;
1088 inline std::string sentence2string(
const mootSentence &sentence)
1091 _put_sentence(sentence,&twn_tmpbuf);
1092 return std::string(twn_tmpbuf.
data(), twn_tmpbuf.
size());
1133 tr_sentence = &tb_sentence;
1146 throw domain_error(
"from_mstream(): not implemented for class moot::TokenBuffer");
1152 throw domain_error(
"to_mstream(): not implemented for class moot::TokenBuffer");
1156 virtual void close()
1160 virtual bool opened()
1170 virtual void clear_buffer();
1198 virtual void put_token(
const mootToken &token);
1220 virtual void put_raw_buffer(
const char *buf,
size_t len);
size_t size(void) const
Definition: mootBufferIO.h:177
bool parse_analysis_cost
Definition: mootTokenLexer.h:128
Conserve raw XML.
Definition: mootTokenIO.h:55
Definition: mootAssocVector.h:39
unknown format
Definition: mootTokenIO.h:50
Class for native "cooked" text-format token input.
Definition: mootTokenIO.h:516
flex++ lexer for moot PoS tagger native text input (guts for moot::TokenReaderNative) ...
Definition: mootTokenLexer.h:71
null i/o, useful for testing
Definition: mootTokenIO.h:51
autoflush output stream after write (native i/o only)?
Definition: mootTokenIO.h:65
const char * data(void) const
Definition: mootBufferIO.h:174
static const int tiofMedium
Definition: mootTokenIO.h:76
size_t theColumn
Definition: mootGenericLexer.h:103
static class TokenWriter * file_writer(const char *filename, const char *fmt_request=__null, int fmt_implied=tiofNone, int fmt_default=tiofNone)
TokenIOFormatE
Definition: mootTokenIO.h:48
static const int tiofRare
Definition: mootTokenIO.h:70
Class for in-memory token buffers using mootSentence.
Definition: mootTokenIO.h:1139
static const int tiofWellDone
Definition: mootTokenIO.h:79
static const int tiofMediumRare
Definition: mootTokenIO.h:73
flex++ lexer for moot::TokenReaderNative guts: autogenerated headers
virtual void from_mstream(mootio::mistream &mis)
Definition: mootTokenIO.h:325
virtual void to_file(FILE *out=stdout)
static size_t pipe_sentences(class TokenReader *reader, class TokenWriter *writer)
virtual bool close(void)
Definition: mootIO.h:120
Abstract class for token input.
Definition: mootTokenIO.h:208
input is tagged ("medium" or "well done")
Definition: mootTokenIO.h:59
static std::string format_canonical_string(int fmt)
Pretty-print (XML only)
Definition: mootTokenIO.h:56
static bool is_empty_format(int fmt)
Wrapper class for named file input using C FILE*s.
Definition: mootCIO.h:286
XML format.
Definition: mootTokenIO.h:54
Abstract base class for output stream wrappers.
Definition: mootIO.h:194
Definition: mootCxxIO.h:141
High-level token information object.
Definition: mootToken.h:96
bool first_analysis_is_best
Definition: mootTokenLexer.h:128
literal token text included
Definition: mootTokenIO.h:57
no format
Definition: mootTokenIO.h:49
static int parse_format_string(const std::string &fmtString)
bool analysis_cost_details
Definition: mootTokenLexer.h:128
virtual bool flush(void)
Definition: mootIO.h:215
mootio abstraction layer for C++ streams
bool parse_location
Definition: mootTokenLexer.h:128
native text format
Definition: mootTokenIO.h:53
Class for native "cooked" text-format token output.
Definition: mootTokenIO.h:1019
TokenIOFormatE TokenIOFormat
Definition: mootTokenIO.h:67
moot::OffsetT ByteOffset
typedef for (byte) offsets (may be unsigned)
Definition: mootIO.h:55
virtual void close(void)
Definition: mootTokenIO.h:415
static class TokenWriter * new_writer(int fmt)
static int guess_filename_format(const char *filename)
list< mootToken > mootSentence
Definition: mootToken.h:630
some user-defined format
Definition: mootTokenIO.h:52
Wrapper class for named file output using C FILE*s.
Definition: mootCIO.h:327
mootTokenTypeE
Definition: mootToken.h:71
void clear(void)
Definition: mootBufferIO.h:333
Abstract class for token output.
Definition: mootTokenIO.h:700
static class TokenReader * file_reader(const char *filename, const char *fmt_request=__null, int fmt_implied=tiofNone, int fmt_default=tiofNone)
save full Viterbi trellis trace?
Definition: mootTokenIO.h:63
static size_t pipe_tokens(class TokenReader *reader, class TokenWriter *writer)
bool ignore_first_analysis
Definition: mootTokenLexer.h:128
Streambuf-like class for I/O on C char* buffers.
Definition: mootBufferIO.h:242
static int parse_format_request(const char *request, const char *filename=__null, int fmt_implied=tiofNone, int fmt_default=tiofNone)
virtual bool valid(void)
Definition: mootIO.h:99
locations appear as first non-tag analysis
Definition: mootTokenIO.h:61
moot::mootToken mtoken_default
Definition: mootTokenLexer.h:128
Wrapper class for C FILE* streams.
Definition: mootCIO.h:54
parse/output analysis 'prob' field
Definition: mootTokenIO.h:62
void clear(void)
Definition: mootToken.h:396
size_t theLine
Definition: mootGenericLexer.h:102
Definition: mootCxxIO.h:90
input is pre-analyzed (>= "medium rare")
Definition: mootTokenIO.h:58
mootio::ByteOffset theByte
Definition: mootGenericLexer.h:104
Streambuf-like class for input from C char* buffers.
Definition: mootBufferIO.h:60
include Viterbi trellis predictions in trace?
Definition: mootTokenIO.h:64
Abstract base class for input stream wrappers.
Definition: mootIO.h:129
Abstract class for token I/O.
Definition: mootTokenIO.h:85
static int sanitize_format(int fmt, int fmt_implied=tiofNone, int fmt_default=tiofNone)
pruned output
Definition: mootTokenIO.h:60
static class TokenReader * new_reader(int fmt)