00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027 #ifndef MOOT_TOKEN_XML_DOC_IO_H
00028
00029 #include <mootXmlDoc.h>
00030 #ifdef MOOT_LIBXML_ENABLED
00031
00032 #include <mootTypes.h>
00033 #include <mootToken.h>
00034 #include <mootTokenIO.h>
00035
00036 moot_BEGIN_NAMESPACE
00037
00038 using namespace std;
00039
00047 class TokenXmlDoc : public mootXmlDoc, public TokenReader, public TokenWriter
00048 {
00049 public:
00050
00051
00052
00054
00055
00056 public:
00057
00058
00059
00061 std::string srcname;
00062
00063
00064
00065
00066
00067
00071 const static int defaultParserOptions
00072 = (0
00073 | XML_PARSE_RECOVER
00074
00075
00076
00077
00078
00079 | XML_PARSE_NOCDATA
00080 );
00082
00083
00084
00093 mootXPathQuery xpqSentence;
00094
00101 mootXPathQuery xpqToken;
00102
00111 mootXPathQuery xpqText;
00112
00120 mootXPathQuery xpqBesttag;
00121
00131 mootXPathQuery xpqAnalysis;
00132
00140 mootXPathQuery xpqPostag;
00142
00143
00146 mootToken curtok;
00147 mootSentence cursent;
00148 mootToken::Analysis curanal;
00149
00150
00152
00153
00154
00155
00156
00157
00165 std::string outputRootName;
00167
00168
00171 xmlNodePtr outputNode;
00172
00173
00174 public:
00175
00176
00177
00178
00179
00182
00184 TokenXmlDoc(int fmt=tiofWellDone)
00185 :
00186
00187 srcname(""),
00188
00189 xpqSentence("
00190 xpqToken("./w",true),
00191 xpqText("./text/text()",true),
00192 xpqBesttag("./moot.tag/text()",true),
00193 xpqAnalysis(".
00194 xpqPostag("./@pos",true),
00195
00196
00197
00198
00199
00200 outputRootName("doc"),
00201 outputNode(NULL)
00202 {
00203
00204 if (! (fmt&tiofXML) ) fmt |= tiofXML;
00205 tw_format = tr_format = fmt;
00206
00207
00208 tr_token = &curtok;
00209 tr_sentence = &cursent;
00210
00211
00212 xml_options = defaultParserOptions;
00213 if (fmt & tiofPretty) {
00214 xml_options |= XML_PARSE_NOBLANKS;
00215 xml_format = true;
00216 }
00217 };
00218
00220 virtual ~TokenXmlDoc(void)
00221 {
00222
00223 tr_token = NULL;
00224 tr_sentence = NULL;
00225 };
00226
00227
00228
00229
00231 virtual void reset(void)
00232 {
00233 mootXmlDoc::reset();
00234
00235 };
00237
00238
00239
00243
00244
00245
00246
00247
00248
00249
00250
00251
00252
00256 virtual void sourceName(const std::string &myname) { srcname = myname; };
00257
00262 virtual void fromFile(FILE *file)
00263 {
00264 TokenReader::fromFile(file);
00265 loadFile(file,NULL,NULL,srcname);
00266 };
00267
00272 virtual void fromString(const char *s)
00273 {
00274 TokenReader::fromString(s);
00275 loadBuffer(s, strlen(s), NULL, NULL, srcname);
00276 };
00277
00281 virtual mootTokenType get_token(void);
00282
00288 virtual mootTokenType get_sentence(void);
00289
00291 virtual void carp(const char *fmt, ...);
00293
00294
00295
00298
00306 virtual void toString(std::string &s)
00307 {
00308 TokenWriter::toString(s);
00309 };
00310
00318 virtual void toFile(FILE *file)
00319 {
00320 tw_format |= tiofFile;
00321 tw_format &= ~tiofString;
00322 tw_sink = file;
00323 };
00324
00339 virtual void put_token(const mootToken &token);
00340
00345 virtual void put_sentence(const mootSentence &sentence);
00346
00348 void put_token_local(const mootToken &token);
00349
00351 void put_token_nonlocal(const mootToken &token);
00353
00354
00355
00368 virtual bool _post_load_hook(void);
00369
00371 bool evalQuery(mootXPathQuery &query, xmlNodePtr xml_ctx=NULL);
00372
00379 inline bool is_local_token(const mootToken &token) const
00380 {
00381 return (token.toktype() == TokTypeXML
00382 &&
00383 ((const xmlNodePtr)(token.user_data))->doc == xml_doc);
00384 };
00385
00392 inline bool is_local_sentence(const mootSentence &sentence) const
00393 {
00394 for (mootSentence::const_iterator si = sentence.begin();
00395 si != sentence.end();
00396 si++)
00397 {
00398 if (!is_local_token(*si)) return false;
00399 }
00400 return true;
00401 };
00403
00404 };
00405
00406 moot_END_NAMESPACE
00407
00408 #endif
00409
00410 #endif