Experimental XML writer class for use with expat-parsed XML or vanilla input.
|
|
| TokenWriterExpat (int fmt=tiofXML, bool got_raw_xml=false, const std::string &encoding="", const std::string &name="TokenWriterExpat") |
|
void | setEncoding (const std::string &encoding="") |
|
virtual | ~TokenWriterExpat (void) |
|
|
virtual void | to_mstream (mootio::mostream *os) |
|
virtual void | close (void) |
|
|
virtual void | put_token (const mootToken &token) |
|
virtual void | put_tokens (const mootSentence &tokens) |
|
virtual void | put_sentence (const mootSentence &sentence) |
|
virtual void | put_comment_block_begin (void) |
|
virtual void | put_comment_block_end (void) |
|
virtual void | put_raw_buffer (const char *buf, size_t len) |
|
|
void | _put_token_raw (const mootToken &token, mootio::mostream *os) |
|
void | _put_token_gen (const mootToken &token, mootio::mostream *os) |
|
void | _put_token (const mootToken &token, mootio::mostream *os) |
|
void | _put_tokens (const mootSentence &tokens, mootio::mostream *os) |
|
void | _put_sentence (const mootSentence &sentence, mootio::mostream *os) |
|
void | _put_comment_block_begin (mootio::mostream *os) |
|
void | _put_comment_block_end (mootio::mostream *os) |
|
void | _put_raw_buffer (const char *buf, size_t len, mootio::mostream *os) |
|
| TokenWriter (int fmt=tiofWellDone, const std::string &name="TokenWriter") |
|
virtual | ~TokenWriter (void) |
|
virtual void | to_mstream (mootio::mostream &mos) |
|
virtual void | to_filename (const char *filename) |
|
virtual void | to_file (FILE *file) |
|
virtual void | to_fd (int fd) |
|
virtual void | to_cxxstream (std::ostream &os) |
|
virtual bool | opened (void) |
|
virtual bool | flush (void) |
|
bool | autoflush (mootio::mostream *os) |
|
virtual void | put_comment_buffer (const char *buf, size_t len) |
|
virtual void | put_comment (const char *s) |
|
virtual void | put_comment_buffer (const std::string &s) |
|
virtual void | printf_comment (const char *fmt,...) |
|
virtual void | put_raw (const char *s) |
|
virtual void | put_raw (const std::string &s) |
|
virtual void | printf_raw (const char *fmt,...) |
|
virtual void | writer_name (const std::string &myname) |
|
virtual void | carp (const char *fmt,...) |
|
|
static int | parse_format_string (const std::string &fmtString) |
|
static int | guess_filename_format (const char *filename) |
|
static bool | is_empty_format (int fmt) |
|
static int | sanitize_format (int fmt, int fmt_implied=tiofNone, int fmt_default=tiofNone) |
|
static int | parse_format_request (const char *request, const char *filename=__null, int fmt_implied=tiofNone, int fmt_default=tiofNone) |
|
static std::string | format_canonical_string (int fmt) |
|
static class TokenReader * | new_reader (int fmt) |
|
static class TokenWriter * | new_writer (int fmt) |
|
static class TokenReader * | file_reader (const char *filename, const char *fmt_request=__null, int fmt_implied=tiofNone, int fmt_default=tiofNone) |
|
static class TokenWriter * | file_writer (const char *filename, const char *fmt_request=__null, int fmt_implied=tiofNone, int fmt_default=tiofNone) |
|
static size_t | pipe_tokens (class TokenReader *reader, class TokenWriter *writer) |
|
static size_t | pipe_sentences (class TokenReader *reader, class TokenWriter *writer) |
|
◆ TokenWriterExpat()
moot::TokenWriterExpat::TokenWriterExpat |
( |
int |
fmt = tiofXML , |
|
|
bool |
got_raw_xml = false , |
|
|
const std::string & |
encoding = "" , |
|
|
const std::string & |
name = "TokenWriterExpat" |
|
) |
| |
Default constructor:
- Parameters
-
fmt | output format, a bitmask of TokenIOFormat flags |
got_raw_xml | whether this object is part of a lossless XML I/O chain: useful but cryptic – see use_raw_xml for details. |
encoding | destination encoding. still somewhat buggy. |
◆ ~TokenWriterExpat()
virtual moot::TokenWriterExpat::~TokenWriterExpat |
( |
void |
| ) |
|
|
inlinevirtual |
◆ setEncoding()
void moot::TokenWriterExpat::setEncoding |
( |
const std::string & |
encoding = "" | ) |
|
|
inline |
◆ to_mstream()
◆ close()
virtual void moot::TokenWriterExpat::close |
( |
void |
| ) |
|
|
virtual |
◆ put_token()
virtual void moot::TokenWriterExpat::put_token |
( |
const mootToken & |
token | ) |
|
|
inlinevirtual |
Write a single token to the current output sink.
Reimplemented from moot::TokenWriter.
◆ put_tokens()
virtual void moot::TokenWriterExpat::put_tokens |
( |
const mootSentence & |
tokens | ) |
|
|
inlinevirtual |
Write a partial sentence to the current output sink.
Reimplemented from moot::TokenWriter.
◆ put_sentence()
virtual void moot::TokenWriterExpat::put_sentence |
( |
const mootSentence & |
sentence | ) |
|
|
inlinevirtual |
Write a whole sentence to the current output sink.
Reimplemented from moot::TokenWriter.
◆ put_comment_block_begin()
virtual void moot::TokenWriterExpat::put_comment_block_begin |
( |
void |
| ) |
|
|
inlinevirtual |
◆ put_comment_block_end()
virtual void moot::TokenWriterExpat::put_comment_block_end |
( |
void |
| ) |
|
|
inlinevirtual |
◆ put_raw_buffer()
virtual void moot::TokenWriterExpat::put_raw_buffer |
( |
const char * |
buf, |
|
|
size_t |
len |
|
) |
| |
|
inlinevirtual |
Write some raw data to the current sink. No recoding is performed.
Reimplemented from moot::TokenWriter.
◆ _put_token_raw()
◆ _put_token_gen()
◆ _put_token()
◆ _put_tokens()
◆ _put_sentence()
◆ _put_comment_block_begin()
◆ _put_comment_block_end()
End a comment block, if one is active.
◆ _put_raw_buffer()
void moot::TokenWriterExpat::_put_raw_buffer |
( |
const char * |
buf, |
|
|
size_t |
len, |
|
|
mootio::mostream * |
os |
|
) |
| |
Write some raw data to the current sink. No recoding is performed.
◆ use_raw_xml
bool moot::TokenWriterExpat::use_raw_xml |
Whether the data to write contains raw XML as generated by TokenReaderExpat with save_raw_xml=true
. Useful for lossless XML I/O.
- Warning
- If this flag is set, arguments to put_* methods must provide tokens of type TokTypeXMLRaw for all document content except the following:
- sentence boundary markers (TokTypeEOS)
- 'vanilla' tokens (TokTypeVanilla) In 'use_raw_xml' mode, only the 'besttag' element will be written for 'vanilla' tokens – in other words, TokTypeXMLRaw tokens are expected for token start- and end- elements, as well as for text, analyses, etc., although no checking is performed for the presence of such elements.
If the flag is false (the default), a document will be generated with a default structure (see root_elt, eos_elt, token_elt, etc.), which should be compatible with the default behavior of TokenReaderExpat.
◆ root_elt
std::string moot::TokenWriterExpat::root_elt |
◆ eos_elt
std::string moot::TokenWriterExpat::eos_elt |
◆ token_elt
std::string moot::TokenWriterExpat::token_elt |
◆ text_elt
std::string moot::TokenWriterExpat::text_elt |
◆ analysis_elt
std::string moot::TokenWriterExpat::analysis_elt |
◆ postag_attr
std::string moot::TokenWriterExpat::postag_attr |
◆ besttag_elt
std::string moot::TokenWriterExpat::besttag_elt |
◆ location_elt
std::string moot::TokenWriterExpat::location_elt |
◆ offset_attr
std::string moot::TokenWriterExpat::offset_attr |
◆ length_attr
std::string moot::TokenWriterExpat::length_attr |
◆ twx_encoding
std::string moot::TokenWriterExpat::twx_encoding |
◆ twx_recoder
Recoder object: handles text (re-)coding
◆ lastc
int moot::TokenWriterExpat::lastc |
The documentation for this class was generated from the following file: