mootExpatParser.h
Go to the documentation of this file.
1 /* -*- Mode: C++ -*- */
2 
3 /*
4  libmoot : moocow's part-of-speech tagging library
5  Copyright (C) 2003-2009 by Bryan Jurish <moocow@cpan.org>
6 
7  This library is free software; you can redistribute it and/or
8  modify it under the terms of the GNU Lesser General Public
9  License as published by the Free Software Foundation; either
10  version 3 of the License, or (at your option) any later version.
11 
12  This library is distributed in the hope that it will be useful,
13  but WITHOUT ANY WARRANTY; without even the implied warranty of
14  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15  Lesser General Public License for more details.
16 
17  You should have received a copy of the GNU Lesser General Public
18  License along with this library; if not, write to the Free Software
19  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 */
21 
22 /*--------------------------------------------------------------------------
23  * File: mootExpatParser.h (formerly mootXmlParser.h)
24  * Author: Bryan Jurish <moocow@cpan.org>
25  * Description:
26  * + moocow's PoS tagger : expat wrapper
27  *--------------------------------------------------------------------------*/
28 
34 #ifndef _MOOT_EXPAT_PARSER_H
35 #define _MOOT_EXPAT_PARSER_H
36 
37 #include <mootConfig.h>
38 
39 #ifdef MOOT_EXPAT_ENABLED
40 
41 #include <expat.h>
42 
43 #include <stdexcept>
44 
45 #include <mootCIO.h>
46 #include <mootCxxIO.h>
47 #include <mootBufferIO.h>
48 
49 
50 
52 #define MOOT_DEFAULT_EXPAT_BUFLEN 8192
53 
54 namespace moot {
55 
56 /*--------------------------------------------------------------
57  * TYPES
58  */
59 
61  class mootExpatParser {
62  public:
63  /*----------------------------------------------------
64  * mootExpatParser: ContextBuffer
65  */
66 
74  class ContextBuffer : public mootio::micbuffer {
75  public:
77  ContextBuffer(const char *buffer, size_t buflen)
78  : mootio::micbuffer(buffer, buflen)
79  {};
80 
82  ContextBuffer(XML_Parser parser)
83  : mootio::micbuffer(NULL,0)
84  {
85  assert(parser != NULL);
86  int cb_offset_i, cb_used_i;
87  cb_rdata = XML_GetInputContext(parser, &cb_offset_i, &cb_used_i);
88  cb_rdata += cb_offset_i;
89  cb_offset = 0;
90  cb_used = XML_GetCurrentByteCount(parser);
91  };
92  ~ContextBuffer(void) {};
93 
94  }; //-- /class mootExpatParser::ContextBuffer
95 
96  public:
97  /*----------------------------------------------------
98  * mootExpatParser: Data
99  */
102  //-- i/o streams
104  bool xp_istream_created;
105 
109  //-- expat stuff
110  char *xml_buf;
111  size_t xml_buflen;
112  std::string xml_encoding;
113  //int xml_done; ///< whether we're done parsing yet
114  XML_Parser parser;
116 
117  public:
120  /*----------------------------------------------------
121  * mootExpatParser: Constructor
122  */
128  mootExpatParser(size_t bufsize=MOOT_DEFAULT_EXPAT_BUFLEN,
129  const std::string &encoding="");
130 
132  virtual void setEncoding(const std::string &encoding="")
133  {
134  xml_encoding = encoding;
135  reset();
136  };
137 
142  virtual void reset(void);
143 
144  /*----------------------------------------------------
145  * mootExpatParser: Destructor
146  */
148  virtual ~mootExpatParser(void);
150 
151  /*----------------------------------------------------*/
154 
160  virtual void from_mstream(mootio::mistream *mistreamp, bool autoclose=false);
161 
167  virtual void from_mstream(mootio::mistream &mistream, bool autoclose=false) { from_mstream(&mistream,autoclose); };
168 
175  virtual void from_filename(const char *filename);
176 
183  virtual void from_file(FILE *file);
184 
191  virtual void from_fd(int fd)
192  {
193  close();
194  throw domain_error("from_fd(): not implemented");
195  };
196 
203  virtual void from_buffer(const void *buf, size_t len);
204 
211  virtual void from_string(const char *s) { from_buffer(s,strlen(s)); };
212 
219  virtual void from_cxxstream(std::istream &is);
220 
228  virtual void close(void);
230 
231  /*----------------------------------------------------*/
238  virtual bool parse_check(void);
239 
241  bool parse_filename(const std::string &filename);
242 
244  bool parse_file(FILE *infile=stdin, const std::string &in_name="");
245 
255  bool parse_buffer(const char *buf, size_t buflen);
256 
261  bool parse_all(mootio::mistream *in=NULL);
262 
275  bool parse_chunk(int &nbytes, int &is_final, mootio::mistream *in=NULL);
277 
278  /*----------------------------------------------------*/
281 
282  /*----------------------------------------------------
283  * mootExpatParser: Printing and Context
284  */
286  virtual void context_dump(FILE *tofile=NULL);
287 
289  virtual std::string context_string(void)
290  {
291  return parser ? ContextBuffer(parser).as_string() : std::string("");
292  };
293 
294  /*----------------------------------------------------
295  * mootExpatParser: Error Reporting
296  */
298  virtual void carp(const char *fmt, ...);
299 
301  virtual void xpcarp(const char *fmt, ...);
302 
304  inline long int line_number(void)
305  { return XML_GetCurrentLineNumber(parser); }
306 
308  inline int column_number(void)
309  { return XML_GetCurrentColumnNumber(parser); }
310 
312  inline long byte_offset(void)
313  { return XML_GetCurrentByteIndex(parser); }
314 
316  inline int byte_count(void)
317  { return XML_GetCurrentByteCount(parser); }
322  /*----------------------------------------------------
323  * mootExpatParser: Handlers
324  */
326  virtual void XmlDeclHandler(const XML_Char *version,
327  const XML_Char *encoding,
328  int standalone)
329  {
330  XML_DefaultCurrent(parser);
331  };
332 
334  virtual void StartElementHandler(const char *el, const char **attr)
335  {
336  XML_DefaultCurrent(parser);
337  };
338 
340  virtual void EndElementHandler(const char *el)
341  {
342  XML_DefaultCurrent(parser);
343  };
344 
346  virtual void CharacterDataHandler(const XML_Char *s, int len)
347  {
348  XML_DefaultCurrent(parser);
349  };
350 
352  virtual void ProcessingInstructionHandler(const XML_Char *s,
353  const XML_Char *target,
354  const XML_Char *data)
355  {
356  XML_DefaultCurrent(parser);
357  };
358 
360  virtual void CommentHandler(const XML_Char *s)
361  {
362  XML_DefaultCurrent(parser);
363  };
364 
365 
367  virtual void StartCdataSectionHandler(void)
368  {
369  XML_DefaultCurrent(parser);
370  };
373  virtual void EndCdataSectionHandler(void)
374  {
375  XML_DefaultCurrent(parser);
376  };
377 
379  virtual void DefaultHandler(const XML_Char *s, int len)
380  {};
382 
385  /*----------------------------------------------------
386  * mootExpatParser: Expat wrappers (static)
387  */
388  /* expat callback wrapper */
389  static void _xp_XmlDeclHandler(mootExpatParser *mp,
390  const XML_Char *version,
391  const XML_Char *encoding,
392  int standalone)
393  {
394  if (mp) mp->XmlDeclHandler(version,encoding,standalone);
395  };
396 
397  /* expat callback wrapper */
399  const char *el,
400  const char **attr)
401  {
402  if (mp) mp->StartElementHandler(el,attr);
403  };
404 
405  /* expat callback wrapper */
406  static void _xp_EndElementHandler(mootExpatParser *mp, const char *el)
407  {
408  if (mp) mp->EndElementHandler(el);
409  };
410 
411  /* expat callback wrapper */
413  const XML_Char *s,
414  int len)
415  {
416  if (mp) mp->CharacterDataHandler(s,len);
417  };
418 
419  /* expat callback wrapper */
420  static void _xp_CommentHandler(mootExpatParser *mp,
421  const XML_Char *s)
422  {
423  if (mp) mp->CommentHandler(s);
424  };
425 
426  /* expat callback wrapper */
427  static void _xp_DefaultHandler(mootExpatParser *mp,
428  const XML_Char *s,
429  int len)
430  {
431  if (mp) mp->DefaultHandler(s,len);
432  };
434  };
435 
436 }; // moot_END_NAMESPACE
437 
438 #endif // moot_EXPAT_ENABLED
439 
440 #endif // MOOT_EXPAT_PARSER_H
virtual void CommentHandler(const XML_Char *s)
Definition: mootExpatParser.h:371
Definition: mootAssocVector.h:39
int column_number(void)
Definition: mootExpatParser.h:319
std::string xml_encoding
Input encoding override (goofy)
Definition: mootExpatParser.h:115
bool parse_filename(const std::string &filename)
bool parse_all(mootio::mistream *in=__null)
virtual void from_string(const char *s)
Definition: mootExpatParser.h:214
const char * data(void) const
Definition: mootBufferIO.h:174
long int line_number(void)
Definition: mootExpatParser.h:315
bool xp_istream_created
whether input stream mis was created locally
Definition: mootExpatParser.h:107
mootio abstraction layer for C FILE*s
virtual void from_fd(int fd)
Definition: mootExpatParser.h:194
virtual void EndCdataSectionHandler(void)
Definition: mootExpatParser.h:384
size_t xml_buflen
Allocated size of xml_buf.
Definition: mootExpatParser.h:114
static void _xp_XmlDeclHandler(mootExpatParser *mp, const XML_Char *version, const XML_Char *encoding, int standalone)
Definition: mootExpatParser.h:400
size_t cb_offset
current read offset position in buffer
Definition: mootBufferIO.h:64
safely includes autoheader preprocessor macros
virtual void StartCdataSectionHandler(void)
Definition: mootExpatParser.h:378
bool parse_chunk(int &nbytes, int &is_final, mootio::mistream *in=__null)
virtual void from_cxxstream(std::istream &is)
virtual ~mootExpatParser(void)
static void _xp_CharacterDataHandler(mootExpatParser *mp, const XML_Char *s, int len)
Definition: mootExpatParser.h:423
virtual bool close(void)
Definition: mootIO.h:120
virtual void from_mstream(mootio::mistream *mistreamp, bool autoclose=false)
static void _xp_DefaultHandler(mootExpatParser *mp, const XML_Char *s, int len)
Definition: mootExpatParser.h:438
virtual void XmlDeclHandler(const XML_Char *version, const XML_Char *encoding, int standalone)
Definition: mootExpatParser.h:337
virtual void from_buffer(const void *buf, size_t len)
virtual void carp(const char *fmt,...)
~ContextBuffer(void)
Definition: mootExpatParser.h:95
static void _xp_StartElementHandler(mootExpatParser *mp, const char *el, const char **attr)
Definition: mootExpatParser.h:409
virtual void DefaultHandler(const XML_Char *s, int len)
Definition: mootExpatParser.h:390
int byte_count(void)
Definition: mootExpatParser.h:327
virtual void CharacterDataHandler(const XML_Char *s, int len)
Definition: mootExpatParser.h:357
virtual void setEncoding(const std::string &encoding="")
Definition: mootExpatParser.h:135
virtual void from_filename(const char *filename)
Utility class for expat input contexts.
Definition: mootExpatParser.h:69
C++ Wrapper for expat XML parsers.
Definition: mootExpatParser.h:56
mootio abstraction layer for C++ streams
bool parse_file(FILE *infile=stdin, const std::string &in_name="")
size_t cb_used
used length of buffer (in bytes)
Definition: mootBufferIO.h:65
virtual void from_file(FILE *file)
mootExpatParser(size_t bufsize=8192, const std::string &encoding="")
virtual void StartElementHandler(const char *el, const char **attr)
Definition: mootExpatParser.h:345
XML_Parser parser
The underlying expat parser object.
Definition: mootExpatParser.h:117
Namespace for I/O stream wrappers.
Definition: mootBufferIO.h:45
mootio::mistream * xp_istream
Current input stream.
Definition: mootExpatParser.h:106
virtual void EndElementHandler(const char *el)
Definition: mootExpatParser.h:351
mootio abstraction layer for C char* buffers
virtual void xpcarp(const char *fmt,...)
ContextBuffer(const char *buffer, size_t buflen)
Definition: mootExpatParser.h:72
static void _xp_CommentHandler(mootExpatParser *mp, const XML_Char *s)
Definition: mootExpatParser.h:431
long byte_offset(void)
Definition: mootExpatParser.h:323
virtual void reset(void)
ContextBuffer(XML_Parser parser)
Definition: mootExpatParser.h:77
virtual void ProcessingInstructionHandler(const XML_Char *s, const XML_Char *target, const XML_Char *data)
Definition: mootExpatParser.h:363
const char * cb_rdata
underlying character data buffer
Definition: mootBufferIO.h:63
Streambuf-like class for input from C char* buffers.
Definition: mootBufferIO.h:60
virtual void context_dump(FILE *tofile=__null)
bool parse_buffer(const char *buf, size_t buflen)
Abstract base class for input stream wrappers.
Definition: mootIO.h:129
char * xml_buf
Parse buffer for expat parser.
Definition: mootExpatParser.h:113
static void _xp_EndElementHandler(mootExpatParser *mp, const char *el)
Definition: mootExpatParser.h:417
virtual bool parse_check(void)
virtual std::string context_string(void)
Definition: mootExpatParser.h:300