Main Page | Directories | Namespace List | Class Hierarchy | Alphabetical List | Class List | File List | Namespace Members | Class Members | File Members

mootExpatParser.h

Go to the documentation of this file.
00001 /* -*- Mode: C++ -*- */
00002 
00003 /*
00004    libmoot : moocow's part-of-speech tagging library
00005    Copyright (C) 2003-2004 by Bryan Jurish <moocow@ling.uni-potsdam.de>
00006 
00007    This library is free software; you can redistribute it and/or
00008    modify it under the terms of the GNU Lesser General Public
00009    License as published by the Free Software Foundation; either
00010    version 2.1 of the License, or (at your option) any later version.
00011    
00012    This library is distributed in the hope that it will be useful,
00013    but WITHOUT ANY WARRANTY; without even the implied warranty of
00014    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00015    Lesser General Public License for more details.
00016    
00017    You should have received a copy of the GNU Lesser General Public
00018    License along with this library; if not, write to the Free Software
00019    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
00020 */
00021 
00022 /*--------------------------------------------------------------------------
00023  * File: mootXmlParser.h
00024  * Author: Bryan Jurish <moocow@ling.uni-potsdam.de>
00025  * Description:
00026  *   + moocow's PoS tagger : expat wrapper
00027  *--------------------------------------------------------------------------*/
00028 
00029 #ifndef _MOOT_EXPAT_PARSER_H
00030 #define _MOOT_EXPAT_PARSER_H
00031 
00032 #include <mootConfig.h>
00033 
00034 #ifdef MOOT_EXPAT_ENABLED
00035 
00036 #include <stdio.h>
00037 #include <errno.h>
00038 #include <string.h>
00039 #include <stdarg.h>
00040 #include <assert.h>
00041 
00042 #include <expat.h>
00043 
00044 #include <string>
00045 #include <stdexcept>
00046 
00047 #include <mootUtils.h>
00048 #include <mootIO.h>
00049 #include <mootCIO.h>
00050 #include <mootCxxIO.h>
00051 #include <mootBufferIO.h>
00052 
00053 
00054 
00056 #define MOOT_DEFAULT_EXPAT_BUFLEN 8192
00057 
00058 namespace moot {
00059 
00060 /*--------------------------------------------------------------
00061  * TYPES
00062  */
00063 
00065   class mootExpatParser {
00066   public:
00067     /*----------------------------------------------------
00068      * mootExpatParser: ContextBuffer
00069      */
00070 
00078     class ContextBuffer : public mootio::micbuffer {
00079     public:
00081       ContextBuffer(const char *buffer, size_t buflen)
00082         : mootio::micbuffer(buffer, buflen)
00083       {};
00084 
00086       ContextBuffer(XML_Parser parser)
00087         : mootio::micbuffer(NULL,0)
00088       {
00089         assert(parser != NULL);
00090         cb_rdata  = XML_GetInputContext(parser, (int*)&cb_offset, (int*)&cb_used);
00091         cb_rdata += cb_offset;
00092         cb_offset = 0;
00093         cb_used   = XML_GetCurrentByteCount(parser);
00094       };
00095       ~ContextBuffer(void) {};
00096 
00097     }; //-- /class mootExpatParser::ContextBuffer
00098 
00099   public:
00100     /*----------------------------------------------------
00101      * mootExpatParser: Data
00102      */
00105     //-- i/o streams
00106     mootio::mistream *xp_istream;         
00107     bool              xp_istream_created; 
00108 
00109 
00112     //-- expat stuff
00113     char              *xml_buf;      
00114     size_t             xml_buflen;   
00115     std::string        xml_encoding; 
00116     //int                xml_done;     ///< whether we're done parsing yet
00117     XML_Parser         parser;       
00118 
00119  
00120   public:
00123     /*----------------------------------------------------
00124      * mootExpatParser: Constructor
00125      */
00131     mootExpatParser(size_t bufsize=MOOT_DEFAULT_EXPAT_BUFLEN,
00132                     const std::string &encoding="");
00133 
00135     virtual void setEncoding(const std::string &encoding="")
00136     {
00137       xml_encoding = encoding;
00138       reset();
00139     };
00140 
00145     virtual void reset(void);
00146 
00147     /*----------------------------------------------------
00148      * mootExpatParser: Destructor
00149      */
00151     virtual ~mootExpatParser(void);
00153 
00154     /*----------------------------------------------------*/
00157 
00163     virtual void from_mstream(mootio::mistream *mistreamp);
00164 
00170     virtual void from_mstream(mootio::mistream &mistream) { from_mstream(&mistream); };
00171 
00178     virtual void from_filename(const char *filename);
00179 
00186     virtual void from_file(FILE *file);
00187 
00194     virtual void from_fd(int fd)
00195     {
00196       close();
00197       throw domain_error("from_fd(): not implemented");
00198     };
00199     
00206     virtual void from_buffer(const void *buf, size_t len);
00207 
00214     virtual void from_string(const char *s) { from_buffer(s,strlen(s)); };
00215 
00222     virtual void from_cxxstream(std::istream &is);
00223 
00231     virtual void close(void);
00233 
00234     /*----------------------------------------------------*/
00241     virtual bool parse_check(void);
00242 
00244     bool parse_filename(const std::string &filename);
00245 
00247     bool parse_file(FILE *infile=stdin, const std::string &in_name="");
00248 
00260     bool parse_buffer(const char *buf, size_t buflen);
00261 
00266     bool parse_all(mootio::mistream *in=NULL);
00267 
00280     bool parse_chunk(int &nbytes, int &is_final, mootio::mistream *in=NULL);
00282 
00283     /*----------------------------------------------------*/
00286 
00287     /*----------------------------------------------------
00288      * mootExpatParser: Printing and Context
00289      */
00291     virtual void context_dump(FILE *tofile=NULL);
00292 
00294     virtual std::string context_string(void)
00295     {
00296       return parser ? ContextBuffer(parser).as_string() : std::string("");
00297     };
00298 
00299     /*----------------------------------------------------
00300      * mootExpatParser: Error Reporting
00301      */
00303     virtual void carp(char *fmt, ...);
00304 
00306     virtual void xpcarp(char *fmt, ...);
00308 
00311     /*----------------------------------------------------
00312      * mootExpatParser: Handlers
00313      */
00315     virtual void XmlDeclHandler(const XML_Char  *version,
00316                                 const XML_Char  *encoding,
00317                                 int             standalone)
00318     {
00319       XML_DefaultCurrent(parser);
00320     };
00321 
00323     virtual void StartElementHandler(const char *el, const char **attr)
00324     {
00325       XML_DefaultCurrent(parser);
00326     };
00327 
00329     virtual void EndElementHandler(const char *el)
00330     {
00331       XML_DefaultCurrent(parser);
00332     };
00333 
00335     virtual void CharacterDataHandler(const XML_Char *s, int len)
00336     {
00337       XML_DefaultCurrent(parser);
00338     };
00339 
00341     virtual void ProcessingInstructionHandler(const XML_Char *s,
00342                                               const XML_Char *target,
00343                                               const XML_Char *data)
00344     {
00345       XML_DefaultCurrent(parser);
00346     };
00347 
00349     virtual void CommentHandler(const XML_Char *s)
00350     {
00351       XML_DefaultCurrent(parser);
00352     };
00353 
00354 
00356     virtual void StartCdataSectionHandler(void)
00357     {
00358       XML_DefaultCurrent(parser);
00359     };
00360 
00362     virtual void EndCdataSectionHandler(void)
00363     {
00364       XML_DefaultCurrent(parser);
00365     };
00366 
00368     virtual void DefaultHandler(const XML_Char *s, int len)
00369     {};
00371 
00374     /*----------------------------------------------------
00375      * mootExpatParser: Expat wrappers (static)
00376      */
00377     /* expat callback wrapper */
00378     static void _xp_XmlDeclHandler(mootExpatParser    *mp,
00379                                    const XML_Char  *version,
00380                                    const XML_Char  *encoding,
00381                                    int             standalone)
00382     {
00383       if (mp) mp->XmlDeclHandler(version,encoding,standalone);
00384     };
00385 
00386     /* expat callback wrapper */
00387     static void _xp_StartElementHandler(mootExpatParser *mp,
00388                                         const char *el,
00389                                         const char **attr)
00390     {
00391       if (mp) mp->StartElementHandler(el,attr);
00392     };
00393 
00394     /* expat callback wrapper */
00395     static void _xp_EndElementHandler(mootExpatParser *mp, const char *el)
00396     {
00397       if (mp) mp->EndElementHandler(el);
00398     };
00399 
00400     /* expat callback wrapper */
00401     static void _xp_CharacterDataHandler(mootExpatParser *mp,
00402                                          const XML_Char *s,
00403                                          int len)
00404     {
00405       if (mp) mp->CharacterDataHandler(s,len);
00406     };
00407 
00408     /* expat callback wrapper */
00409     static void _xp_CommentHandler(mootExpatParser *mp,
00410                                    const XML_Char *s)
00411     {
00412       if (mp) mp->CommentHandler(s);
00413     };
00414 
00415     /* expat callback wrapper */
00416     static void _xp_DefaultHandler(mootExpatParser *mp,
00417                                    const XML_Char *s,
00418                                    int len)
00419     {
00420       if (mp) mp->DefaultHandler(s,len);
00421     };
00423   };
00424 
00425 }; // moot_END_NAMESPACE
00426 
00427 #endif // moot_EXPAT_ENABLED
00428 
00429 #endif // MOOT_EXPAT_PARSER_H

Generated on Mon Jun 27 13:05:25 2005 for libmoot by  doxygen 1.3.8-20040913