Main Page   Namespace List   Class Hierarchy   Alphabetical List   Compound List   File List   Namespace Members   Compound Members   File Members  

mootExpatParser.h

Go to the documentation of this file.
00001 /* -*- Mode: C++ -*- */
00002 
00003 /*
00004    libmoot : moocow's part-of-speech tagging library
00005    Copyright (C) 2003-2004 by Bryan Jurish <moocow@ling.uni-potsdam.de>
00006 
00007    This program is free software; you can redistribute it and/or modify
00008    it under the terms of the GNU General Public License as published by
00009    the Free Software Foundation; either version 2 of the License, or
00010    (at your option) any later version.
00011 
00012    This program is distributed in the hope that it will be useful,
00013    but WITHOUT ANY WARRANTY; without even the implied warranty of
00014    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00015    GNU General Public License for more details.
00016 
00017    You should have received a copy of the GNU General Public License
00018    along with this program; if not, write to the Free Software
00019    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
00020 */
00021 
00022 /*--------------------------------------------------------------------------
00023  * File: mootXmlParser.h
00024  * Author: Bryan Jurish <moocow@ling.uni-potsdam.de>
00025  * Description:
00026  *   + moocow's PoS tagger : expat wrapper
00027  *--------------------------------------------------------------------------*/
00028 
00029 #ifndef _MOOT_EXPAT_PARSER_H
00030 #define _MOOT_EXPAT_PARSER_H
00031 
00032 #include <mootConfig.h>
00033 
00034 #ifdef MOOT_EXPAT_ENABLED
00035 
00036 #include <stdio.h>
00037 #include <errno.h>
00038 #include <string.h>
00039 #include <stdarg.h>
00040 #include <assert.h>
00041 
00042 #include <expat.h>
00043 
00044 #include <string>
00045 #include <stdexcept>
00046 
00047 #include <mootUtils.h>
00048 #include <mootIO.h>
00049 #include <mootCIO.h>
00050 #include <mootCxxIO.h>
00051 #include <mootBufferIO.h>
00052 
00053 
00054 
00056 #define MOOT_DEFAULT_EXPAT_BUFLEN 8192
00057 
00058 namespace moot {
00059 
00060 /*--------------------------------------------------------------
00061  * TYPES
00062  */
00063 
00065   class mootExpatParser {
00066   public:
00067     /*----------------------------------------------------
00068      * mootExpatParser: ContextBuffer
00069      */
00070 
00078     class ContextBuffer : public mootio::micbuffer {
00079     public:
00081       ContextBuffer(const char *buffer, size_t buflen)
00082         : mootio::micbuffer(buffer, buflen)
00083       {};
00084 
00086       ContextBuffer(XML_Parser parser)
00087         : mootio::micbuffer(NULL,0)
00088       {
00089         assert(parser != NULL);
00090         cb_rdata  = XML_GetInputContext(parser, (int*)&cb_offset, (int*)&cb_used);
00091         cb_rdata += cb_offset;
00092         cb_offset = 0;
00093         cb_used   = XML_GetCurrentByteCount(parser);
00094       };
00095       ~ContextBuffer(void) {};
00096 
00097     }; //-- /class mootExpatParser::ContextBuffer
00098 
00099   public:
00100     /*----------------------------------------------------
00101      * mootExpatParser: Data
00102      */
00105     //-- i/o streams
00106     mootio::mistream *xp_istream;         
00107     bool              xp_istream_created; 
00108 
00109 
00112     //-- expat stuff
00113     char              *xml_buf;      
00114     size_t             xml_buflen;   
00115     std::string        xml_encoding; 
00116     //int                xml_done;     ///< whether we're done parsing yet
00117     XML_Parser         parser;       
00118 
00119  
00120   public:
00123     /*----------------------------------------------------
00124      * mootExpatParser: Constructor
00125      */
00131     mootExpatParser(size_t bufsize=MOOT_DEFAULT_EXPAT_BUFLEN,
00132                     const std::string &encoding="");
00133 
00135     virtual void setEncoding(const std::string &encoding="")
00136     {
00137       xml_encoding = encoding;
00138       reset();
00139     };
00140 
00145     virtual void reset(void);
00146 
00147     /*----------------------------------------------------
00148      * mootExpatParser: Destructor
00149      */
00151     virtual ~mootExpatParser(void);
00153 
00154     /*----------------------------------------------------*/
00157 
00163     virtual void from_mstream(mootio::mistream *mistreamp);
00164 
00170     virtual void from_mstream(mootio::mistream &mistream) { from_mstream(&mistream); };
00171 
00178     virtual void from_filename(const char *filename);
00179 
00186     virtual void from_file(FILE *file);
00187 
00194     virtual void from_fd(int fd)
00195     {
00196       close();
00197       throw domain_error("from_fd(): not implemented");
00198     };
00199     
00206     virtual void from_buffer(const void *buf, size_t len);
00207 
00214     virtual void from_string(const char *s) { from_buffer(s,strlen(s)); };
00215 
00222     virtual void from_cxxstream(std::istream &is);
00223 
00231     virtual void close(void);
00233 
00234     /*----------------------------------------------------*/
00241     virtual bool parse_check(void);
00242 
00244     bool parse_filename(const std::string &filename);
00245 
00247     bool parse_file(FILE *infile=stdin, const std::string &in_name="");
00248 
00260     bool parse_buffer(const char *buf, size_t buflen);
00261 
00266     bool parse_all(mootio::mistream *in=NULL);
00267 
00280     bool parse_chunk(int &nbytes, int &is_final, mootio::mistream *in=NULL);
00282 
00283     /*----------------------------------------------------*/
00286 
00287     /*----------------------------------------------------
00288      * mootExpatParser: Printing and Context
00289      */
00291     virtual void context_dump(FILE *tofile=NULL);
00292 
00294     virtual std::string context_string(void)
00295     {
00296       return parser ? ContextBuffer(parser).as_string() : std::string("");
00297     };
00298 
00299     /*----------------------------------------------------
00300      * mootExpatParser: Error Reporting
00301      */
00303     virtual void carp(char *fmt, ...);
00304 
00306     virtual void xpcarp(char *fmt, ...);
00308 
00311     /*----------------------------------------------------
00312      * mootExpatParser: Handlers
00313      */
00315     virtual void XmlDeclHandler(const XML_Char  *version,
00316                                 const XML_Char  *encoding,
00317                                 int             standalone)
00318     {
00319       XML_DefaultCurrent(parser);
00320     };
00321 
00323     virtual void StartElementHandler(const char *el, const char **attr)
00324     {
00325       XML_DefaultCurrent(parser);
00326     };
00327 
00329     virtual void EndElementHandler(const char *el)
00330     {
00331       XML_DefaultCurrent(parser);
00332     };
00333 
00335     virtual void CharacterDataHandler(const XML_Char *s, int len)
00336     {
00337       XML_DefaultCurrent(parser);
00338     };
00339 
00341     virtual void ProcessingInstructionHandler(const XML_Char *s,
00342                                               const XML_Char *target,
00343                                               const XML_Char *data)
00344     {
00345       XML_DefaultCurrent(parser);
00346     };
00347 
00349     virtual void CommentHandler(const XML_Char *s)
00350     {
00351       XML_DefaultCurrent(parser);
00352     };
00353 
00354 
00356     virtual void StartCdataSectionHandler(void)
00357     {
00358       XML_DefaultCurrent(parser);
00359     };
00360 
00362     virtual void EndCdataSectionHandler(void)
00363     {
00364       XML_DefaultCurrent(parser);
00365     };
00366 
00368     virtual void DefaultHandler(const XML_Char *s, int len)
00369     {};
00371 
00374     /*----------------------------------------------------
00375      * mootExpatParser: Expat wrappers (static)
00376      */
00377     /* expat callback wrapper */
00378     static void _xp_XmlDeclHandler(mootExpatParser    *mp,
00379                                    const XML_Char  *version,
00380                                    const XML_Char  *encoding,
00381                                    int             standalone)
00382     {
00383       if (mp) mp->XmlDeclHandler(version,encoding,standalone);
00384     };
00385 
00386     /* expat callback wrapper */
00387     static void _xp_StartElementHandler(mootExpatParser *mp,
00388                                         const char *el,
00389                                         const char **attr)
00390     {
00391       if (mp) mp->StartElementHandler(el,attr);
00392     };
00393 
00394     /* expat callback wrapper */
00395     static void _xp_EndElementHandler(mootExpatParser *mp, const char *el)
00396     {
00397       if (mp) mp->EndElementHandler(el);
00398     };
00399 
00400     /* expat callback wrapper */
00401     static void _xp_CharacterDataHandler(mootExpatParser *mp,
00402                                          const XML_Char *s,
00403                                          int len)
00404     {
00405       if (mp) mp->CharacterDataHandler(s,len);
00406     };
00407 
00408     /* expat callback wrapper */
00409     static void _xp_CommentHandler(mootExpatParser *mp,
00410                                    const XML_Char *s)
00411     {
00412       if (mp) mp->CommentHandler(s);
00413     };
00414 
00415     /* expat callback wrapper */
00416     static void _xp_DefaultHandler(mootExpatParser *mp,
00417                                    const XML_Char *s,
00418                                    int len)
00419     {
00420       if (mp) mp->DefaultHandler(s,len);
00421     };
00423   };
00424 
00425 }; // moot_END_NAMESPACE
00426 
00427 #endif // moot_EXPAT_ENABLED
00428 
00429 #endif // MOOT_EXPAT_PARSER_H

Generated on Wed Jul 28 15:48:02 2004 for libmoot by doxygen1.2.15