34 #ifndef _MOOT_TOKEN_EXPAT_IO_H 35 #define _MOOT_TOKEN_EXPAT_IO_H 39 #ifdef MOOT_EXPAT_ENABLED 75 TRX_Default = 0x00000000,
76 TRX_IsOuter = 0x00000001,
77 TRX_IsRoot = 0x00000002,
78 TRX_IsBodyE = 0x00000004,
79 TRX_IsBodyD = 0x00000008,
80 TRX_IsTokenE = 0x00000010,
81 TRX_IsTokenD = 0x00000020,
82 TRX_IsTokTextE = 0x00000040,
83 TRX_IsTokTextD = 0x00000080,
84 TRX_IsAnalysisE = 0x00000100,
85 TRX_IsAnalysisD = 0x00000200,
86 TRX_IsBestTagE = 0x00000400,
87 TRX_IsBestTagD = 0x00000800,
92 const static int defaultNodeInheritanceMask
93 = TRX_IsBodyD|TRX_IsTokenD|TRX_IsTokTextD|TRX_IsAnalysisD|TRX_IsBestTagD;
111 std::string body_elt;
113 std::string token_elt;
114 std::string text_elt;
115 std::string analysis_elt;
116 std::string postag_attr;
117 std::string besttag_elt;
118 std::string location_elt;
119 std::string offset_attr;
120 std::string length_attr;
153 size_t buflen =MOOT_DEFAULT_EXPAT_BUFLEN,
155 const std::string &encoding =
"",
156 const std::string &name =
"TokenReaderExpat")
164 analysis_elt(
"analysis"),
166 besttag_elt(
"moot.tag"),
167 location_elt(
"moot.loc"),
168 offset_attr(
"offset"),
169 length_attr(
"length"),
173 tr_sentence = &trx_sentbuf;
189 virtual void reset(
void);
199 virtual void reader_name(
const std::string &myname)
201 TokenReader::reader_name(myname);
206 virtual void close(
void);
209 TokenReader::from_mstream(mistreamp);
210 mootExpatParser::from_mstream(tr_istream,
false);
214 TokenReader::from_mstream(mis);
215 mootExpatParser::from_mstream(tr_istream,
false);
218 virtual void from_filename(
const char *filename) {
219 TokenReader::from_filename(filename);
220 mootExpatParser::from_mstream(tr_istream,
false);
222 virtual void from_file(FILE *infile) {
223 TokenReader::from_file(infile);
224 mootExpatParser::from_mstream(tr_istream,
false);
226 virtual void from_fd(
int fd) {
227 TokenReader::from_fd(fd);
228 mootExpatParser::from_mstream(tr_istream,
false);
230 virtual void from_buffer(
const void *buf,
size_t len) {
231 TokenReader::from_buffer(buf,len);
232 mootExpatParser::from_mstream(tr_istream,
false);
234 virtual void from_cxxstream(std::istream &is) {
235 TokenReader::from_cxxstream(is);
236 mootExpatParser::from_mstream(tr_istream,
false);
273 bool ensure_cb_fullsents(
void);
276 inline int next_node_info(
int emptyStackValue=TRX_IsOuter,
277 int inheritanceMask=defaultNodeInheritanceMask)
279 return (stack.empty()
281 : (stack.front() & inheritanceMask));
285 inline int top_node_info(
int emptyStackValue=TRX_IsOuter)
287 return stack.empty() ? emptyStackValue : stack.front();
291 #ifdef MOOT_DEBUG_EXPAT 310 if (!info) info = top_node_info();
312 save_context_data(ctb, toktype, info);
331 void save_context_data(
const char *text,
size_t len,
341 virtual void XmlDeclHandler(
const XML_Char *version,
342 const XML_Char *encoding,
344 virtual void StartElementHandler(
const char *el,
const char **attr);
345 virtual void EndElementHandler(
const char *el);
346 virtual void CharacterDataHandler(
const XML_Char *s,
int len);
347 virtual void CommentHandler(
const XML_Char *s);
348 virtual void DefaultHandler(
const XML_Char *s,
int len);
355 virtual size_t line_number(
void) {
356 return parser ?
static_cast<size_t>(XML_GetCurrentLineNumber(parser)) : 0;
360 virtual size_t line_number(
size_t n) {
return line_number(); };
363 virtual size_t column_number(
void) {
364 return parser ?
static_cast<size_t>(XML_GetCurrentLineNumber(parser)) : 0;
368 virtual size_t column_number(
size_t n) {
return column_number(); };
379 virtual void carp(
char *fmt, ...);
436 std::string root_elt;
438 std::string token_elt;
439 std::string text_elt;
440 std::string analysis_elt;
441 std::string postag_attr;
442 std::string besttag_elt;
443 std::string location_elt;
444 std::string offset_attr;
445 std::string length_attr;
452 std::string twx_encoding;
476 ,
bool got_raw_xml =
false 477 ,
const std::string &encoding =
"" 478 ,
const std::string &name =
"TokenWriterExpat" 485 inline void setEncoding(
const std::string &encoding=
"")
487 twx_encoding = encoding;
512 virtual void close(
void);
521 virtual void put_token(
const mootToken &token) {
522 _put_token(token,tw_ostream);
527 _put_tokens(tokens,tw_ostream);
531 virtual void put_sentence(
const mootSentence &sentence) {
532 _put_sentence(sentence,tw_ostream);
536 virtual void put_comment_block_begin(
void) {
537 _put_comment_block_begin(tw_ostream);
541 virtual void put_comment_block_end(
void) {
542 _put_comment_block_end(tw_ostream);
546 virtual void put_raw_buffer(
const char *buf,
size_t len) {
547 _put_raw_buffer(buf,len,tw_ostream);
565 if (use_raw_xml) _put_token_raw(token,os);
566 else _put_token_gen(token,os);
574 for (mootSentence::const_iterator si=tokens.begin(); si!=tokens.end(); si++) _put_token_raw(*si, os);
576 for (mootSentence::const_iterator si=tokens.begin(); si!=tokens.end(); si++) _put_token_gen(*si, os);
583 if (!os || (tw_format&tiofNone) || !os->
valid())
return;
585 for (mootSentence::const_iterator si=sentence.begin(); si!=sentence.end(); si++) _put_token_raw(*si, os);
587 for (mootSentence::const_iterator si=sentence.begin(); si!=sentence.end(); si++) _put_token_gen(*si, os);
605 #endif // moot_EXPAT_ENABLED 607 #endif // MOOT_EXPAT_TOKEN_IO_H Experimental XML writer class for use with expat-parsed XML or vanilla input.
Definition: mootTokenExpatIO.h:385
Conserve raw XML.
Definition: mootTokenIO.h:55
interface to librecode (optional)
size_t cb_offset
current read offset position in buffer
Definition: mootBufferIO.h:64
safely includes autoheader preprocessor macros
Definition: mootToken.h:76
slist< int > NodeInfoStack
Definition: mootTokenExpatIO.h:94
Abstract class for token input.
Definition: mootTokenIO.h:208
XML format.
Definition: mootTokenIO.h:54
Abstract base class for output stream wrappers.
Definition: mootIO.h:194
High-level token information object.
Definition: mootToken.h:96
void scan_request(const std::string &reqstr)
Definition: mootRecode.h:371
Utility class for expat input contexts.
Definition: mootExpatParser.h:69
no format
Definition: mootTokenIO.h:49
Experimental XML reader class using expat.
Definition: mootTokenExpatIO.h:62
C++ Wrapper for expat XML parsers.
Definition: mootExpatParser.h:56
size_t cb_used
used length of buffer (in bytes)
Definition: mootBufferIO.h:65
Special 2-phase recoder object for XML text.
Definition: mootRecode.h:314
moot::OffsetT ByteOffset
typedef for (byte) offsets (may be unsigned)
Definition: mootIO.h:55
list< mootToken > mootSentence
Definition: mootToken.h:630
mootTokenTypeE
Definition: mootToken.h:71
Abstract class for token output.
Definition: mootTokenIO.h:700
Abstract and native classes for I/O of moot::mootToken objects.
Definition: mootToken.h:78
virtual bool valid(void)
Definition: mootIO.h:99
C++ wrapper class for generic expat XML parsers (optional)
const char * cb_rdata
underlying character data buffer
Definition: mootBufferIO.h:63
Streambuf-like class for input from C char* buffers.
Definition: mootBufferIO.h:60
Abstract base class for input stream wrappers.
Definition: mootIO.h:129
xmlNodeFlags
Definition: mootTokenExpatIO.h:71