List of all members
moot::TokenWriterExpat Class Reference

Experimental XML writer class for use with expat-parsed XML or vanilla input.

Inheritance diagram for moot::TokenWriterExpat:
Inheritance graph
[legend]
Collaboration diagram for moot::TokenWriterExpat:
Collaboration graph
[legend]

Public Member Functions

Constructors and Such
 TokenWriterExpat (int fmt=tiofXML, bool got_raw_xml=false, const std::string &encoding="", const std::string &name="TokenWriterExpat")
 
void setEncoding (const std::string &encoding="")
 
virtual ~TokenWriterExpat (void)
 
Output Selection
virtual void to_mstream (mootio::mostream *os)
 
virtual void close (void)
 
Overrides
virtual void put_token (const mootToken &token)
 
virtual void put_tokens (const mootSentence &tokens)
 
virtual void put_sentence (const mootSentence &sentence)
 
virtual void put_comment_block_begin (void)
 
virtual void put_comment_block_end (void)
 
virtual void put_raw_buffer (const char *buf, size_t len)
 
Output Utilities
void _put_token_raw (const mootToken &token, mootio::mostream *os)
 
void _put_token_gen (const mootToken &token, mootio::mostream *os)
 
void _put_token (const mootToken &token, mootio::mostream *os)
 
void _put_tokens (const mootSentence &tokens, mootio::mostream *os)
 
void _put_sentence (const mootSentence &sentence, mootio::mostream *os)
 
void _put_comment_block_begin (mootio::mostream *os)
 
void _put_comment_block_end (mootio::mostream *os)
 
void _put_raw_buffer (const char *buf, size_t len, mootio::mostream *os)
 
- Public Member Functions inherited from moot::TokenWriter
 TokenWriter (int fmt=tiofWellDone, const std::string &name="TokenWriter")
 
virtual ~TokenWriter (void)
 
virtual void to_mstream (mootio::mostream &mos)
 
virtual void to_filename (const char *filename)
 
virtual void to_file (FILE *file)
 
virtual void to_fd (int fd)
 
virtual void to_cxxstream (std::ostream &os)
 
virtual bool opened (void)
 
virtual bool flush (void)
 
bool autoflush (mootio::mostream *os)
 
virtual void put_comment_buffer (const char *buf, size_t len)
 
virtual void put_comment (const char *s)
 
virtual void put_comment_buffer (const std::string &s)
 
virtual void printf_comment (const char *fmt,...)
 
virtual void put_raw (const char *s)
 
virtual void put_raw (const std::string &s)
 
virtual void printf_raw (const char *fmt,...)
 
virtual void writer_name (const std::string &myname)
 
virtual void carp (const char *fmt,...)
 

Public Attributes

Output Parameters
bool use_raw_xml
 
std::string root_elt
 Default name of root element. Default="doc". More...
 
std::string eos_elt
 Default name of 'eos' element (sentence boundary). Default="eos". More...
 
std::string token_elt
 Default name of 'token' element. Default="token". More...
 
std::string text_elt
 Default name of 'text' element. Default="text". More...
 
std::string analysis_elt
 Default name of 'analysis' element. Default="analysis". More...
 
std::string postag_attr
 Default name of 'pos' attribute (of 'analysis' elt) Default="pos". More...
 
std::string besttag_elt
 Name of 'best tag' element. Default="moot.tag". More...
 
std::string location_elt
 Name of 'best tag' element. Default="moot.loc". More...
 
std::string offset_attr
 Name of 'location' element 'offset' attribute. Default="offset". More...
 
std::string length_attr
 Name of 'location' element 'length' attribute. Default="length". More...
 
Internal Data
std::string twx_encoding
 
mootXMLRecoder twx_recoder
 
int lastc
 
- Public Attributes inherited from moot::TokenWriter
int tw_format
 
std::string tw_name
 
mootio::mostreamtw_ostream
 
bool tw_ostream_created
 
bool tw_is_comment_block
 
void * tw_data
 

Additional Inherited Members

- Static Public Member Functions inherited from moot::TokenIO
static int parse_format_string (const std::string &fmtString)
 
static int guess_filename_format (const char *filename)
 
static bool is_empty_format (int fmt)
 
static int sanitize_format (int fmt, int fmt_implied=tiofNone, int fmt_default=tiofNone)
 
static int parse_format_request (const char *request, const char *filename=__null, int fmt_implied=tiofNone, int fmt_default=tiofNone)
 
static std::string format_canonical_string (int fmt)
 
static class TokenReadernew_reader (int fmt)
 
static class TokenWriternew_writer (int fmt)
 
static class TokenReaderfile_reader (const char *filename, const char *fmt_request=__null, int fmt_implied=tiofNone, int fmt_default=tiofNone)
 
static class TokenWriterfile_writer (const char *filename, const char *fmt_request=__null, int fmt_implied=tiofNone, int fmt_default=tiofNone)
 
static size_t pipe_tokens (class TokenReader *reader, class TokenWriter *writer)
 
static size_t pipe_sentences (class TokenReader *reader, class TokenWriter *writer)
 

Constructor & Destructor Documentation

◆ TokenWriterExpat()

moot::TokenWriterExpat::TokenWriterExpat ( int  fmt = tiofXML,
bool  got_raw_xml = false,
const std::string &  encoding = "",
const std::string &  name = "TokenWriterExpat" 
)

Default constructor:

Parameters
fmtoutput format, a bitmask of TokenIOFormat flags
got_raw_xmlwhether this object is part of a lossless XML I/O chain: useful but cryptic – see use_raw_xml for details.
encodingdestination encoding. still somewhat buggy.

◆ ~TokenWriterExpat()

virtual moot::TokenWriterExpat::~TokenWriterExpat ( void  )
inlinevirtual

Default destructor

Member Function Documentation

◆ setEncoding()

void moot::TokenWriterExpat::setEncoding ( const std::string &  encoding = "")
inline

Set default output encoding. Still somewhat buggy

References moot::mootXMLRecoder::scan_request().

◆ to_mstream()

virtual void moot::TokenWriterExpat::to_mstream ( mootio::mostream os)
virtual

Select output to a mootio::mostream

Reimplemented from moot::TokenWriter.

◆ close()

virtual void moot::TokenWriterExpat::close ( void  )
virtual

Close currently selected output sink

Reimplemented from moot::TokenWriter.

◆ put_token()

virtual void moot::TokenWriterExpat::put_token ( const mootToken token)
inlinevirtual

Write a single token to the current output sink.

Reimplemented from moot::TokenWriter.

◆ put_tokens()

virtual void moot::TokenWriterExpat::put_tokens ( const mootSentence tokens)
inlinevirtual

Write a partial sentence to the current output sink.

Reimplemented from moot::TokenWriter.

◆ put_sentence()

virtual void moot::TokenWriterExpat::put_sentence ( const mootSentence sentence)
inlinevirtual

Write a whole sentence to the current output sink.

Reimplemented from moot::TokenWriter.

◆ put_comment_block_begin()

virtual void moot::TokenWriterExpat::put_comment_block_begin ( void  )
inlinevirtual

Begin a comment block.

Reimplemented from moot::TokenWriter.

◆ put_comment_block_end()

virtual void moot::TokenWriterExpat::put_comment_block_end ( void  )
inlinevirtual

End a comment block, if one is active.

Reimplemented from moot::TokenWriter.

◆ put_raw_buffer()

virtual void moot::TokenWriterExpat::put_raw_buffer ( const char *  buf,
size_t  len 
)
inlinevirtual

Write some raw data to the current sink. No recoding is performed.

Reimplemented from moot::TokenWriter.

◆ _put_token_raw()

void moot::TokenWriterExpat::_put_token_raw ( const mootToken token,
mootio::mostream os 
)

Write a single token to a mootio::mostream, raw mode

◆ _put_token_gen()

void moot::TokenWriterExpat::_put_token_gen ( const mootToken token,
mootio::mostream os 
)

Write a single token to a mootio::mostream, gen mode

◆ _put_token()

void moot::TokenWriterExpat::_put_token ( const mootToken token,
mootio::mostream os 
)
inline

Write a single token to a mootio::mostream

◆ _put_tokens()

void moot::TokenWriterExpat::_put_tokens ( const mootSentence tokens,
mootio::mostream os 
)
inline

Write a partial sentence to a C stream, obeying use_raw_xml flag

References moot::tiofNone, and mootio::mstream::valid().

◆ _put_sentence()

void moot::TokenWriterExpat::_put_sentence ( const mootSentence sentence,
mootio::mostream os 
)
inline

Write a single sentence to a C stream, obeying use_raw_xml flag

References moot::tiofNone, moot::TokTypeEOS, and mootio::mstream::valid().

◆ _put_comment_block_begin()

void moot::TokenWriterExpat::_put_comment_block_begin ( mootio::mostream os)

Begin a comment block.

◆ _put_comment_block_end()

void moot::TokenWriterExpat::_put_comment_block_end ( mootio::mostream os)

End a comment block, if one is active.

◆ _put_raw_buffer()

void moot::TokenWriterExpat::_put_raw_buffer ( const char *  buf,
size_t  len,
mootio::mostream os 
)

Write some raw data to the current sink. No recoding is performed.

Member Data Documentation

◆ use_raw_xml

bool moot::TokenWriterExpat::use_raw_xml

Whether the data to write contains raw XML as generated by TokenReaderExpat with save_raw_xml=true. Useful for lossless XML I/O.

Warning
If this flag is set, arguments to put_* methods must provide tokens of type TokTypeXMLRaw for all document content except the following:
  • sentence boundary markers (TokTypeEOS)
  • 'vanilla' tokens (TokTypeVanilla) In 'use_raw_xml' mode, only the 'besttag' element will be written for 'vanilla' tokens – in other words, TokTypeXMLRaw tokens are expected for token start- and end- elements, as well as for text, analyses, etc., although no checking is performed for the presence of such elements.

If the flag is false (the default), a document will be generated with a default structure (see root_elt, eos_elt, token_elt, etc.), which should be compatible with the default behavior of TokenReaderExpat.

◆ root_elt

std::string moot::TokenWriterExpat::root_elt

◆ eos_elt

std::string moot::TokenWriterExpat::eos_elt

◆ token_elt

std::string moot::TokenWriterExpat::token_elt

◆ text_elt

std::string moot::TokenWriterExpat::text_elt

◆ analysis_elt

std::string moot::TokenWriterExpat::analysis_elt

◆ postag_attr

std::string moot::TokenWriterExpat::postag_attr

◆ besttag_elt

std::string moot::TokenWriterExpat::besttag_elt

◆ location_elt

std::string moot::TokenWriterExpat::location_elt

◆ offset_attr

std::string moot::TokenWriterExpat::offset_attr

◆ length_attr

std::string moot::TokenWriterExpat::length_attr

◆ twx_encoding

std::string moot::TokenWriterExpat::twx_encoding

Name of destination encoding. See setEncoding()

◆ twx_recoder

mootXMLRecoder moot::TokenWriterExpat::twx_recoder

Recoder object: handles text (re-)coding

◆ lastc

int moot::TokenWriterExpat::lastc

Last character written


The documentation for this class was generated from the following file: