Main Page | Directories | File List

mootBufferIO.h

Go to the documentation of this file.
00001 /* -*- Mode: C++ -*- */
00002 
00003 /*
00004    libmoot : moocow's part-of-speech tagging library
00005    Copyright (C) 2004-2005 by Bryan Jurish <moocow@ling.uni-potsdam.de>
00006 
00007    This library is free software; you can redistribute it and/or
00008    modify it under the terms of the GNU Lesser General Public
00009    License as published by the Free Software Foundation; either
00010    version 2.1 of the License, or (at your option) any later version.
00011    
00012    This library is distributed in the hope that it will be useful,
00013    but WITHOUT ANY WARRANTY; without even the implied warranty of
00014    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00015    Lesser General Public License for more details.
00016    
00017    You should have received a copy of the GNU Lesser General Public
00018    License along with this library; if not, write to the Free Software
00019    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
00020 */
00021 
00022 /*--------------------------------------------------------------------------
00023  * File: mootBufferIO.h
00024  * Author: Bryan Jurish <moocow@ling.uni-potsdam.de>
00025  * Description:
00026  *   + moot PoS tagger : low-level I/O routines: C buffer I/O
00027  *--------------------------------------------------------------------------*/
00028 
00029 #ifndef _MOOT_BUFFERIO_H
00030 #define _MOOT_BUFFERIO_H
00031 
00032 #include <stdio.h>   // for EOF
00033 #include <string.h>  // for memcpy() and friends
00034 #include <assert.h>  // for sanity checks
00035 
00036 #include <mootUtils.h> // for trimming
00037 #include <mootIO.h>    // for everything else
00038 
00039 
00040 namespace mootio {
00041   //using namespace std;
00042 
00043   /*====================================================================
00044    * mootio: classes
00045    *====================================================================*/
00046 
00055   class micbuffer : virtual public mistream
00056   {
00057   public:
00058     const char  *cb_rdata;   
00059     size_t       cb_offset;  
00060     size_t       cb_used;    
00061 
00062   public:
00063     /*----------------------------------------------------------
00064      * micbuffer: constructors
00065      */
00067 
00068 
00074     micbuffer(const void *data, size_t len)
00075       : cb_rdata((const char*)data),
00076         cb_offset(0),
00077         cb_used(len)
00078     {};
00079 
00081     micbuffer(const micbuffer &cb)
00082       : cb_rdata(cb.cb_rdata),
00083         cb_offset(cb.cb_offset),
00084         cb_used(cb.cb_used)
00085     {};
00086 
00088     virtual ~micbuffer(void) {};
00089 
00091     inline void clear(void)
00092     {
00093       cb_offset = 0;
00094       cb_used = 0;
00095     };
00096 
00098     inline void release(void)
00099     {
00100       clear();
00101       cb_rdata = NULL;
00102     };
00103 
00105     inline void assign(const void *data, size_t len) {
00106       cb_rdata  = (const char *)data;
00107       cb_used   = len;
00108       cb_offset = 0;
00109     };
00111 
00112     /*----------------------------------------------------------
00113      * micbuffer: Integrity
00114      */
00116 
00117 
00118     virtual bool valid(void) { return true; };
00119 
00121     virtual bool eof(void) { return cb_offset >= cb_used; };
00123 
00124     /*----------------------------------------------------------
00125      * micbuffer: Input
00126      */
00128 
00129 
00132     virtual ByteCount read(char *buf, size_t n) {
00133       if (n==0 || cb_offset >= cb_used) return 0;
00134       else if (n < cb_used - cb_offset) {
00135         //-- normal case: copy local data to buf
00136         memcpy(buf, cb_rdata+cb_offset, n);
00137         cb_offset += n;
00138         return (ByteCount)n;
00139       }
00140       else {
00141         //-- partial read: copy some data to buf
00142         memcpy(buf, cb_rdata+cb_offset, cb_used-cb_offset);
00143         ByteCount nread = (ByteCount)(cb_used-cb_offset);
00144         cb_offset = cb_used;
00145         return nread;
00146       }
00147     };
00148 
00150     virtual int getbyte(void) {
00151       if (cb_offset >= cb_used) return EOF;
00152       return cb_rdata[cb_offset++];
00153     };
00155 
00156     /*----------------------------------------------------------
00157      * micbuffer: Utilties
00158      */
00160 
00161 
00163     inline const char* data(void) const { return cb_rdata; };
00164 
00166     inline size_t size(void) const { return cb_used; };
00167 
00169     inline size_t capacity(void) const { return cb_used; };
00170 
00172     inline size_t offset(void) const { return cb_offset; };
00173 
00181     inline std::string as_string(bool normalize_ws=false,
00182                                  bool trim_left=false,
00183                                  bool trim_right=false)
00184       const
00185     {
00186       std::string str;
00187       str.reserve(cb_used-cb_offset);
00188       to_string(str, normalize_ws, trim_left, trim_right);
00189       return str;
00190     };
00191 
00200     inline void to_string(std::string &str,
00201                           bool normalize_ws =false,
00202                           bool trim_left    =false,
00203                           bool trim_right   =false)
00204       const
00205     {
00206       if (!cb_rdata) return;
00207       if (!normalize_ws)
00208         str.append(cb_rdata+cb_offset, cb_used-cb_offset);
00209       else
00210         moot::moot_normalize_ws(cb_rdata+cb_offset, cb_used-cb_offset,
00211                                 str,
00212                                 trim_left, trim_right);
00213     };
00215   }; //-- /micbuffer
00216 
00217 
00218   /*====================================================================
00219    * mootio: input/output: mcbuffer
00220    *====================================================================*/
00221 
00231   class mcbuffer
00232     : virtual public micbuffer,
00233       virtual public mostream
00234   {
00235   public:
00237     static const size_t CB_DEFAULT_SIZE = 32;
00238 
00240     static const size_t CB_DEFAULT_GET = 32;
00241 
00242   public:
00243     char   *cb_wdata;   
00244     size_t  cb_alloc;   
00245     size_t  cb_get;     
00246     bool    cb_created; 
00247 
00248   public:
00249     /*----------------------------------------------------------
00250      * mcbuffer: constructors
00251      */
00253 
00254 
00255     mcbuffer(size_t size=CB_DEFAULT_SIZE)
00256       : micbuffer(NULL,0),
00257         cb_wdata(NULL),
00258         cb_alloc(0),
00259         cb_get(CB_DEFAULT_GET),
00260         cb_created(true)
00261     {
00262       if (size) reserve(size);
00263     };
00264 
00273     mcbuffer(void *data,
00274             size_t used,
00275             size_t alloc=0,
00276             size_t get=CB_DEFAULT_GET)
00277       : micbuffer((const char *)data,used),
00278         cb_wdata((char *)data),
00279         cb_alloc(alloc),
00280         cb_get(get),
00281         cb_created(false)
00282     {
00283       if (!alloc) cb_alloc = cb_used;
00284       cb_rdata = cb_wdata;
00285     };
00286 
00288     mcbuffer(const micbuffer &cb)
00289       : micbuffer(NULL,0),
00290         cb_wdata(NULL),
00291         cb_created(true)
00292     {
00293       reserve(cb.cb_used);
00294       memcpy(cb_wdata, cb.cb_rdata, cb.cb_used);
00295       cb_offset = cb.cb_offset;
00296       cb_used   = cb.cb_used;
00297       cb_rdata  = cb_wdata;
00298     };
00299 
00301     inline void assign(const void *data, size_t len) {
00302       reserve(len);
00303       memcpy(cb_wdata, data, len);
00304       cb_rdata   = cb_wdata;
00305       cb_offset  = 0;
00306       cb_used    = len;
00307       cb_created = true;
00308     };
00309 
00311     virtual ~mcbuffer(void) { release(); };
00312 
00314     inline void clear(void)
00315     {
00316       cb_offset = 0;
00317       cb_used = 0;
00318     };
00319 
00321     inline void release(void)
00322     {
00323       clear();
00324       if (cb_created && cb_wdata) free(cb_wdata);
00325       cb_wdata = NULL;
00326       cb_rdata = NULL;
00327       cb_alloc = 0;
00328     };
00330 
00331     /*----------------------------------------------------------
00332      * mcbuffer: Output
00333      */
00335 
00336 
00337     virtual bool flush(void) {
00338       if (!cb_offset || !cb_wdata) return true;
00339       memmove(cb_wdata, cb_wdata+cb_offset, cb_used-cb_offset);
00340       cb_used -= cb_offset;
00341       cb_offset = 0;
00342       return true;
00343     };
00344 
00346     virtual bool write(const char *buf, size_t n) {
00347       if (!reserve(n+cb_used, cb_get)) return false;
00348       memcpy(cb_wdata+cb_used, buf, n);
00349       cb_used += n;
00350       return true;
00351     };
00352 
00354     virtual bool putc(unsigned char c) {
00355       if (!reserve(1+cb_used, cb_get)) return false;
00356       cb_wdata[cb_used++] = c;
00357       return true;
00358     };
00359 
00361     virtual bool puts(const char *s) {
00362       return write(s,strlen(s));
00363     };
00365     virtual bool puts(const std::string &s) {
00366       return write(s.data(),s.size());
00367     };
00368 
00370     virtual bool vprintf(const char *fmt, va_list &ap)
00371     {
00372       size_t nchars = vsnprintf(cb_wdata+cb_used, cb_alloc-cb_used, fmt, ap);
00373       if (nchars >= cb_alloc-cb_used) {
00374         if (!reserve(1+nchars+cb_used, cb_get)) return false;
00375         vsnprintf(cb_wdata+cb_used, cb_alloc-cb_used, fmt, ap);
00376       }
00377       cb_used += nchars;
00378       return true;
00379     };
00381 
00382     /*----------------------------------------------------------
00383      * mcbuffer: Utilties
00384      */
00386 
00387 
00388     inline bool reserve(size_t size, size_t pad=0) {
00389       if (size > cb_alloc) {
00390         size_t newalloc = size+pad;
00391         if (cb_created) {
00392           //-- local buffer: we can just realloc()
00393           if (cb_wdata) cb_wdata = (char *)realloc(cb_wdata, newalloc);
00394           else cb_wdata = (char *)malloc(newalloc);
00395         } else {
00396           //-- user buffer: we need to slurp it
00397           char *newdata = (char *)malloc(newalloc);
00398           memcpy(newdata, cb_wdata, cb_used);
00399           cb_wdata = newdata;
00400           cb_created = true;
00401         }
00402         assert(cb_wdata != NULL);
00403         cb_rdata = cb_wdata;
00404         cb_alloc = newalloc;
00405       }
00406       return true;
00407     };
00409 
00410   }; //-- /mcbuffer
00411 
00412   /*====================================================================
00413    * mootio: c-buffers: aliases
00414    *====================================================================*/
00415   typedef micbuffer mibuffer;   
00416   typedef mcbuffer  mobuffer;   
00417   typedef mcbuffer  mocbuffer;  
00418   typedef mcbuffer  miocbuffer; 
00419   typedef mcbuffer  miobuffer;  
00420   typedef mcbuffer  mbuffer;    
00421 
00422 }; //-- /namespace mootio
00423 
00424 
00425 #endif //_MOOT_BUFFERIO_H

Generated on Sat Sep 17 01:20:33 2005 for libmoot by  doxygen 1.4.4