00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029 #ifndef _moot_TOKEN_IO_H
00030 #define _moot_TOKEN_IO_H
00031
00032 #include <mootToken.h>
00033 #include <mootTokenLexer.h>
00034
00035 #include <mootIO.h>
00036 #include <mootCIO.h>
00037 #include <mootCxxIO.h>
00038 #include <mootBufferIO.h>
00039
00040 #include <stdio.h>
00041 #include <stdarg.h>
00042 #include <stdexcept>
00043
00044
00045 namespace moot {
00046
00047
00048
00049
00051 enum TokenIOFormatE {
00052 tiofNone = 0x00000000,
00053 tiofUnknown = 0x00000001,
00054 tiofNull = 0x00000002,
00055 tiofUser = 0x00000004,
00056 tiofNative = 0x00000008,
00057 tiofXML = 0x00000010,
00058 tiofConserve = 0x00000020,
00059 tiofPretty = 0x00000040,
00060 tiofText = 0x00000080,
00061 tiofAnalyzed = 0x00000100,
00062 tiofTagged = 0x00000200,
00063 tiofPruned = 0x00000400,
00064 };
00065 typedef TokenIOFormatE TokenIOFormat;
00066
00068 static const int tiofRare = tiofText;
00069
00071 static const int tiofMediumRare = tiofText|tiofAnalyzed;
00072
00074 static const int tiofMedium = tiofText|tiofTagged;
00075
00077 static const int tiofWellDone = tiofText|tiofAnalyzed|tiofTagged;
00078
00080
00081
00082
00083
00084
00085
00086
00087
00088
00089
00090
00091
00092
00093
00094
00095
00096
00098 class TokenIO {
00099 public:
00100
00102
00103
00111 static int parse_format_string(const std::string &fmtString);
00112
00120 static int guess_filename_format(const char *filename);
00121
00126 static bool is_empty_format(int fmt);
00127
00131 static int sanitize_format(int fmt,
00132 int fmt_implied=tiofNone,
00133 int fmt_default=tiofNone);
00134
00143 static int parse_format_request(const char *request,
00144 const char *filename=NULL,
00145 int fmt_implied=tiofNone,
00146 int fmt_default=tiofNone);
00147
00149 static std::string format_canonical_string(int fmt);
00151
00152
00154
00155
00161 static class TokenReader *new_reader(int fmt);
00162
00169 static class TokenWriter *new_writer(int fmt);
00171 };
00172
00173
00174
00175
00176
00178 class TokenReader : public TokenIO {
00179 public:
00181 static const size_t TR_DEFAULT_BUFSIZE = 256;
00182
00183 public:
00185 int tr_format;
00186
00188 std::string tr_name;
00189
00191 mootio::mistream *tr_istream;
00192
00194 bool tr_istream_created;
00195
00204 mootToken *tr_token;
00205
00214 mootSentence *tr_sentence;
00215
00216 public:
00217
00218
00219
00228 TokenReader(int fmt =tiofUnknown,
00229 const std::string &name ="TokenReader")
00230 : tr_format(fmt),
00231 tr_name(name),
00232 tr_istream(NULL),
00233 tr_istream_created(false),
00234 tr_token(NULL),
00235 tr_sentence(NULL)
00236 {};
00237
00239 virtual ~TokenReader(void)
00240 {
00241 TokenReader::close();
00242 };
00243
00247 inline void tr_clear(void)
00248 {
00249 if (tr_token) tr_token->clear();
00250 if (tr_sentence) tr_sentence->clear();
00251 };
00253
00254
00255
00256
00257
00260
00266 virtual void from_mstream(mootio::mistream *mistreamp) {
00267 close();
00268 tr_istream = mistreamp;
00269 byte_number(1);
00270 line_number(1);
00271 column_number(0);
00272 tr_istream_created = false;
00273 };
00274
00279 virtual void from_mstream(mootio::mistream &mis) {
00280 from_mstream(&mis);
00281 };
00282
00289 virtual void from_filename(const char *filename)
00290 {
00291 from_mstream(new mootio::mifstream(filename,"rb"));
00292 tr_istream_created = true;
00293 if (!tr_istream->valid()) {
00294 carp("open failed for \"%s\": %s", filename, strerror(errno));
00295 close();
00296 }
00297 };
00298
00305 virtual void from_file(FILE *file)
00306 {
00307 from_mstream(new mootio::micstream(file));
00308 tr_istream_created = true;
00309 };
00310
00317 virtual void from_fd(int fd)
00318 {
00319 close();
00320 throw domain_error("from_fd(): not implemented");
00321 };
00322
00329 virtual void from_buffer(const void *buf, size_t len)
00330 {
00331 from_mstream(new mootio::micbuffer(buf,len));
00332 tr_istream_created = true;
00333 };
00334
00341 virtual void from_string(const char *s) {
00342 from_buffer(s,strlen(s));
00343 };
00344
00351 virtual void from_cxxstream(std::istream &is)
00352 {
00353 from_mstream(new mootio::micxxstream(is));
00354 tr_istream_created = true;
00355 };
00356
00365 virtual void close(void) {
00366 if (tr_istream_created) {
00367 tr_istream->close();
00368 if (tr_istream) delete tr_istream;
00369 }
00370 tr_istream_created = false;
00371 tr_istream = NULL;
00372 };
00374
00375
00376
00377
00380
00388 inline mootToken *token(void) { return tr_token; };
00389
00397 inline mootSentence *sentence(void) { return tr_sentence; };
00398
00404 virtual mootTokenType get_token(void) {
00405 throw domain_error("TokenReader: get_token() not implemented");
00406 };
00407
00413 virtual mootTokenType get_sentence(void);
00415
00416
00417
00418
00423 virtual void reader_name(const std::string &myname) { tr_name = myname; };
00424
00426 virtual size_t line_number(void) { return 0; };
00427
00429 virtual size_t line_number(size_t n) { return n; };
00430
00432 virtual size_t column_number(void) { return 0; };
00433
00435 virtual size_t column_number(size_t n) { return n; };
00436
00438 virtual size_t byte_number(void) { return 0; };
00439
00441 virtual size_t byte_number(size_t n) { return n; };
00442
00444 virtual void carp(const char *fmt, ...);
00446 };
00447
00448
00449
00450
00451
00455 class TokenReaderNative : public TokenReader {
00456 public:
00457
00458
00459
00461 mootTokenLexer lexer;
00462
00464 mootSentence trn_sentence;
00465
00466 public:
00467
00468
00469
00476 TokenReaderNative(int fmt =tiofWellDone,
00477 const std::string &name ="TokenReaderNative")
00478 : TokenReader(fmt,name)
00479 {
00480 tr_format |= tiofNative;
00481 input_is_tagged(tr_format&tiofTagged);
00482
00483 tr_sentence = &trn_sentence;
00484 tr_token = &lexer.mtoken_default;
00485
00486 lexer.to_file(stderr);
00487 };
00488
00490 virtual ~TokenReaderNative(void)
00491 {
00492 close();
00493 };
00495
00496
00497
00498
00500
00501
00502 virtual void from_mstream(mootio::mistream *mis);
00504
00505
00506
00507
00508
00511 virtual mootTokenType get_token(void);
00512 virtual mootTokenType get_sentence(void);
00514
00515
00516
00517
00518
00521
00523 virtual size_t line_number(void) { return lexer.theLine; };
00524
00526 virtual size_t line_number(size_t n) { return lexer.theLine = n; };
00527
00529 virtual size_t column_number(void) { return lexer.theColumn; };
00530
00532 virtual size_t column_number(size_t n) { return lexer.theColumn = n; };
00534
00535
00536
00537
00538
00546 inline bool input_is_tagged(void)
00547 {
00548 return lexer.first_analysis_is_best;
00549 };
00550
00556 inline bool input_is_tagged(bool is_tagged)
00557 {
00558 if (is_tagged) {
00559 tr_format |= tiofTagged;
00560 lexer.first_analysis_is_best = true;
00561 lexer.ignore_first_analysis = true;
00562 } else {
00563 tr_format &= ~tiofTagged;
00564 lexer.first_analysis_is_best = false;
00565 lexer.ignore_first_analysis = false;
00566 }
00567 return is_tagged;
00568 };
00570 };
00571
00572
00573
00574
00575
00576
00577
00578
00579
00581 class TokenWriter : public TokenIO {
00582 public:
00584 int tw_format;
00585
00587 std::string tw_name;
00588
00590 mootio::mostream *tw_ostream;
00591
00593 bool tw_ostream_created;
00594
00596 bool tw_is_comment_block;
00597
00598 public:
00599
00600
00601
00609 TokenWriter(int fmt=tiofWellDone,
00610 const std::string &name="TokenWriter")
00611 : tw_format(fmt),
00612 tw_name(name),
00613 tw_ostream(NULL),
00614 tw_ostream_created(false)
00615 {};
00616
00618 virtual ~TokenWriter(void)
00619 {
00620
00621 };
00623
00624
00625
00626
00629
00635 virtual void to_mstream(mootio::mostream *mostreamp) {
00636 close();
00637 tw_ostream = mostreamp;
00638 if (!(tw_format&tiofNull) && (!tw_ostream || !tw_ostream->valid())) {
00639 carp("Warning: selecting output to invalid stream");
00640 }
00641 tw_ostream_created = false;
00642 };
00643
00648 virtual void to_mstream(mootio::mostream &mos) {
00649 to_mstream(&mos);
00650 };
00651
00657 virtual void to_filename(const char *filename)
00658 {
00659 to_mstream(new mootio::mofstream(filename,"wb"));
00660 tw_ostream_created = true;
00661 if (!tw_ostream->valid()) {
00662 carp("open failed for \"%s\": %s", filename, strerror(errno));
00663 close();
00664 }
00665 };
00666
00673 virtual void to_file(FILE *file)
00674 {
00675 to_mstream(new mootio::mocstream(file));
00676 tw_ostream_created = true;
00677 };
00678
00685 virtual void to_fd(int fd)
00686 {
00687 close();
00688 throw domain_error("to_fd(): not implemented.");
00689 };
00690
00697 virtual void to_cxxstream(std::ostream &os)
00698 {
00699 to_mstream(new mootio::mocxxstream(os));
00700 tw_ostream_created = true;
00701 };
00702
00711 virtual void close(void) {
00712 if (tw_is_comment_block) put_comment_block_end();
00713 if (tw_ostream && tw_ostream_created) {
00714 tw_ostream->close();
00715 delete tw_ostream;
00716 }
00717 tw_ostream_created = false;
00718 tw_ostream = NULL;
00719 };
00721
00722
00723
00730 virtual void put_token(const mootToken &token) {
00731 throw domain_error("TokenWriter: put_token() not implemented");
00732 };
00733
00739 virtual void put_sentence(const mootSentence &sentence)
00740 {
00741 for (mootSentence::const_iterator si = sentence.begin(); si != sentence.end(); si++)
00742 put_token(*si);
00743 };
00745
00746
00754 virtual void put_comment_block_begin(void) {
00755 tw_is_comment_block = true;
00756 };
00757
00763 virtual void put_comment_block_end(void) {
00764 tw_is_comment_block = false;
00765 };
00766
00771 virtual void put_comment_buffer(const char *buf, size_t len) {
00772 put_comment_block_begin();
00773 put_raw_buffer(buf,len);
00774 put_comment_block_end();
00775 };
00776
00781 virtual void put_comment(const char *s) {
00782 put_comment_buffer(s,strlen(s));
00783 };
00784
00789 virtual void put_comment_buffer(const std::string &s) {
00790 put_comment_buffer(s.data(),s.size());
00791 };
00792
00797 virtual void printf_comment(const char *fmt, ...);
00799
00800
00807 virtual void put_raw_buffer(const char *buf, size_t len)
00808 {};
00813 virtual void put_raw(const char *s) {
00814 put_raw_buffer(s,strlen(s));
00815 };
00820 virtual void put_raw(const std::string &s) {
00821 put_raw_buffer(s.data(),s.size());
00822 };
00823
00828 virtual void printf_raw(const char *fmt, ...);
00830
00831
00836 virtual void writer_name(const std::string &myname) { tw_name = myname; };
00837
00839 virtual void carp(const char *fmt, ...);
00841 };
00842
00843
00844
00845
00849 class TokenWriterNative : public TokenWriter {
00850 public:
00851
00852
00853
00855 mootio::mocbuffer twn_tmpbuf;
00856
00857 public:
00858
00859
00860
00864 TokenWriterNative(int fmt=tiofWellDone,
00865 const std::string name="TokenWriterNative")
00866 : TokenWriter(fmt,name)
00867 {
00868 if (! tw_format&tiofNative ) tw_format |= tiofNative;
00869 };
00870
00872 virtual ~TokenWriterNative(void)
00873 {
00874
00875 };
00877
00878
00879
00880
00882
00883
00884
00885
00886
00887
00888
00889
00890
00891
00892
00893
00894
00897 virtual void put_token(const mootToken &token) {
00898 _put_token(token,tw_ostream);
00899 };
00900 virtual void put_sentence(const mootSentence &sentence) {
00901 _put_sentence(sentence,tw_ostream);
00902 };
00903
00904 virtual void put_raw_buffer(const char *buf, size_t len) {
00905 _put_raw_buffer(buf,len,tw_ostream);
00906 };
00908
00909
00910
00911
00915 void _put_token(const mootToken &token, mootio::mostream *os);
00916
00918 void _put_sentence(const mootSentence &sentence, mootio::mostream *os);
00919
00921 void _put_comment(const char *buf, size_t len, mootio::mostream *os);
00922
00924 void _put_raw_buffer(const char *buf, size_t len, mootio::mostream *os);
00925
00929 inline std::string token2string(const mootToken &token)
00930 {
00931 mostream *tw_ostream_old = tw_ostream;
00932 twn_tmpbuf.clear();
00933 tw_ostream = &twn_tmpbuf;
00934 _put_token(token,tw_ostream);
00935 std::string t2s(twn_tmpbuf.data(), twn_tmpbuf.size());
00936 tw_ostream = tw_ostream_old;
00937 return t2s;
00938 };
00939
00943 inline std::string sentence2string(const mootSentence &sentence)
00944 {
00945 twn_tmpbuf.clear();
00946 _put_sentence(sentence,&twn_tmpbuf);
00947 return std::string(twn_tmpbuf.data(), twn_tmpbuf.size());
00948 };
00950 };
00951
00952 };
00953
00954 #endif