Main Page | Directories | Namespace List | Class Hierarchy | Alphabetical List | Class List | File List | Namespace Members | Class Members | File Members

mootUtils.h

Go to the documentation of this file.
00001 /* -*- Mode: C++ -*- */
00002 
00003 /*
00004    libmoot : moocow's part-of-speech tagging library
00005    Copyright (C) 2003-2004 by Bryan Jurish <moocow@ling.uni-potsdam.de>
00006 
00007    This library is free software; you can redistribute it and/or
00008    modify it under the terms of the GNU Lesser General Public
00009    License as published by the Free Software Foundation; either
00010    version 2.1 of the License, or (at your option) any later version.
00011    
00012    This library is distributed in the hope that it will be useful,
00013    but WITHOUT ANY WARRANTY; without even the implied warranty of
00014    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00015    Lesser General Public License for more details.
00016    
00017    You should have received a copy of the GNU Lesser General Public
00018    License along with this library; if not, write to the Free Software
00019    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
00020 */
00021 
00022 /*--------------------------------------------------------------------------
00023  * File: mootUtils.h
00024  * Author: Bryan Jurish <moocow@ling.uni-potsdam.de>
00025  * Description:
00026  *   + moocow's PoS tagger : useful utilities
00027  *--------------------------------------------------------------------------*/
00028 
00029 #ifndef _moot_UTILS_H
00030 #define _moot_UTILS_H
00031 
00032 #include <stdio.h>
00033 #include <string.h>
00034 #include <string>
00035 #include <list>
00036 
00037 #include <mootIO.h>
00038 #include <mootCIO.h>
00039 
00040 namespace moot {
00041   using namespace std;
00042   using namespace mootio;
00043 
00044   /*----------------------------------------------------------------------*/
00047   
00052   bool moot_parse_doubles(char *str, double *dbls, size_t ndbls);
00053   
00054   /*----------------------------------------------------------------------
00055    * normalize_ws_*()
00056    */
00068   void moot_normalize_ws(const char *buf,
00069                          size_t len,
00070                          std::string &out,
00071                          bool trim_left=true,
00072                          bool trim_right=true);
00073 
00084   void moot_normalize_ws(const std::string &in,
00085                          std::string &out,
00086                          bool trim_left=true,
00087                          bool trim_right=true);
00088 
00098   inline void moot_normalize_ws(const char *s,
00099                                 std::string &out,
00100                                 bool trim_left=true,
00101                                 bool trim_right=true)
00102   {
00103     moot_normalize_ws(s, strlen(s), out, trim_left, trim_right);
00104   };
00105 
00115   inline std::string moot_normalize_ws(const char *buf,
00116                                        size_t len,
00117                                        bool trim_left=true,
00118                                        bool trim_right=true)
00119   {
00120     std::string out;
00121     out.reserve(len);
00122     moot_normalize_ws(buf,len, out, trim_left,trim_right);
00123     return out;
00124   };
00125 
00134   inline std::string moot_normalize_ws(const char *s,
00135                                        bool trim_left=true,
00136                                        bool trim_right=true)
00137   {
00138     return moot_normalize_ws(s,strlen(s), trim_left,trim_right);
00139   };
00140 
00149   inline std::string moot_normalize_ws(const std::string &s,
00150                                        bool trim_left=true,
00151                                        bool trim_right=true)
00152   {
00153     return moot_normalize_ws(s.data(),s.size(), trim_left,trim_right);
00154   };
00155 
00156   /*----------------------------------------------------------------------
00157    * remove_newlines()
00158    */
00166   inline void moot_remove_newlines(char *buf, size_t len)
00167   {
00168     for (; len > 0; len--, buf++) {
00169       if (*buf == '\n') *buf = ' ';
00170     }
00171   };
00172 
00174   inline void moot_remove_newlines(char *s)
00175   {
00176     moot_remove_newlines(s, strlen(s));
00177   };
00178 
00180   inline void moot_remove_newlines(std::string &s)
00181   {
00182     for (std::string::iterator si = s.begin(); si != s.end(); si++) {
00183       if (*si == '\n') *si = ' ';
00184     }
00185   };
00186 
00193   void moot_strtok(const std::string &s,
00194                    const std::string &delim,
00195                    std::list<std::string> &out);
00196 
00202   inline std::list<std::string> moot_strtok(const std::string &s,
00203                                             const std::string &delim)
00204   {
00205     std::list<std::string> slist;
00206     moot_strtok(s,delim,slist);
00207     return slist;
00208   };
00210 
00211   /*----------------------------------------------------------------------*/
00214   
00216   bool moot_file_exists(const char *filename);
00217   
00219   std::string moot_unextend(const char *filename);
00220 
00227   inline char *moot_extension(const char *filename, size_t pos)
00228   {
00229     return (char *)memrchr(filename, '.', pos);
00230   };
00231 
00233   inline char *moot_extension(const char *filename)
00234   {
00235     return moot_extension(filename, strlen(filename));
00236   };
00237 
00247   bool hmm_parse_model_name(const std::string &modelname,
00248                             std::string &binfile,
00249                             std::string &lexfile,
00250                             std::string &ngfile,
00251                             std::string &lcfile);
00252 
00261   bool hmm_parse_model_name_text(const std::string &modelname,
00262                                  std::string &lexfile,
00263                                  std::string &ngfile,
00264                                  std::string &lcfile);
00265 
00266 
00267 
00272   class cmdutil_file_churner {
00273   public:
00274     // -- command-line data
00275     char  *progname;  
00276     char **inputs;    
00277     int    ninputs;   
00279     // -- operation flags
00280     bool use_list;    
00282     // -- file data
00283     mifstream           in;    
00284     mifstream           list;  
00286     // -- buffer data
00287     std::string         line; 
00289   public:
00291     cmdutil_file_churner(char *my_progname=NULL,
00292                          char **my_inputs=NULL,
00293                          int my_ninputs=0,
00294                          bool my_use_list=false)
00295       : progname(my_progname),
00296         inputs(my_inputs),
00297         ninputs(my_ninputs),
00298         use_list(my_use_list)
00299     {};
00300 
00302     ~cmdutil_file_churner() {};
00303 
00305     FILE *first_input_file();
00306 
00308     std::string &first_input_name();
00309 
00311     FILE *next_input_file();
00312 
00314     std::string &next_input_name();
00315 
00316   private:
00318     FILE *next_list_file();
00319   };
00321 
00322   /*----------------------------------------------------------------------*/
00326   std::string moot_banner(void);
00327 
00329   std::string moot_program_banner(const std::string &prog_name,
00330                                   const std::string &prog_version,
00331                                   const std::string &prog_author,
00332                                   bool is_free=true);
00334 
00335 }; /* namespace moot */
00336 
00337 #endif /* _moot_UTILS_H */

Generated on Mon Jun 27 13:05:25 2005 for libmoot by  doxygen 1.3.8-20040913