Main Page   Namespace List   Class Hierarchy   Alphabetical List   Compound List   File List   Namespace Members   Compound Members   File Members  

mootUtils.h

Go to the documentation of this file.
00001 /* -*- Mode: C++ -*- */
00002 
00003 /*
00004    libmoot : moocow's part-of-speech tagging library
00005    Copyright (C) 2003-2006 by Bryan Jurish <moocow@ling.uni-potsdam.de>
00006 
00007    This library is free software; you can redistribute it and/or
00008    modify it under the terms of the GNU Lesser General Public
00009    License as published by the Free Software Foundation; either
00010    version 2.1 of the License, or (at your option) any later version.
00011    
00012    This library is distributed in the hope that it will be useful,
00013    but WITHOUT ANY WARRANTY; without even the implied warranty of
00014    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00015    Lesser General Public License for more details.
00016    
00017    You should have received a copy of the GNU Lesser General Public
00018    License along with this library; if not, write to the Free Software
00019    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
00020 */
00021 
00022 /*--------------------------------------------------------------------------
00023  * File: mootUtils.h
00024  * Author: Bryan Jurish <moocow@ling.uni-potsdam.de>
00025  * Description:
00026  *   + moocow's PoS tagger : useful utilities
00027  *--------------------------------------------------------------------------*/
00028 
00029 #ifndef _moot_UTILS_H
00030 #define _moot_UTILS_H
00031 
00032 #include <stdio.h>
00033 #include <string.h>
00034 #include <string>
00035 #include <list>
00036 
00037 #include <mootIO.h>
00038 #include <mootCIO.h>
00039 
00040 namespace moot {
00041   using namespace std;
00042   using namespace mootio;
00043 
00044   /*----------------------------------------------------------------------*/
00047   
00052   bool moot_parse_doubles(char *str, double *dbls, size_t ndbls);
00053   
00054   /*----------------------------------------------------------------------
00055    * normalize_ws_*()
00056    */
00068   void moot_normalize_ws(const char *buf,
00069                          size_t len,
00070                          std::string &out,
00071                          bool trim_left=true,
00072                          bool trim_right=true);
00073 
00084   void moot_normalize_ws(const std::string &in,
00085                          std::string &out,
00086                          bool trim_left=true,
00087                          bool trim_right=true);
00088 
00098   inline void moot_normalize_ws(const char *s,
00099                                 std::string &out,
00100                                 bool trim_left=true,
00101                                 bool trim_right=true)
00102   {
00103     moot_normalize_ws(s, strlen(s), out, trim_left, trim_right);
00104   };
00105 
00115   inline std::string moot_normalize_ws(const char *buf,
00116                                        size_t len,
00117                                        bool trim_left=true,
00118                                        bool trim_right=true)
00119   {
00120     std::string out;
00121     out.reserve(len);
00122     moot_normalize_ws(buf,len, out, trim_left,trim_right);
00123     return out;
00124   };
00125 
00134   inline std::string moot_normalize_ws(const char *s,
00135                                        bool trim_left=true,
00136                                        bool trim_right=true)
00137   {
00138     return moot_normalize_ws(s,strlen(s), trim_left,trim_right);
00139   };
00140 
00149   inline std::string moot_normalize_ws(const std::string &s,
00150                                        bool trim_left=true,
00151                                        bool trim_right=true)
00152   {
00153     return moot_normalize_ws(s.data(),s.size(), trim_left,trim_right);
00154   };
00155 
00156   /*----------------------------------------------------------------------
00157    * remove_newlines()
00158    */
00166   inline void moot_remove_newlines(char *buf, size_t len)
00167   {
00168     for (; len > 0; len--, buf++) {
00169       if (*buf == '\n') *buf = ' ';
00170     }
00171   };
00172 
00174   inline void moot_remove_newlines(char *s)
00175   {
00176     moot_remove_newlines(s, strlen(s));
00177   };
00178 
00180   inline void moot_remove_newlines(std::string &s)
00181   {
00182     for (std::string::iterator si = s.begin(); si != s.end(); si++) {
00183       if (*si == '\n') *si = ' ';
00184     }
00185   };
00186 
00193   void moot_strtok(const std::string &s,
00194                    const std::string &delim,
00195                    std::list<std::string> &out);
00196 
00202   inline std::list<std::string> moot_strtok(const std::string &s,
00203                                             const std::string &delim)
00204   {
00205     std::list<std::string> slist;
00206     moot_strtok(s,delim,slist);
00207     return slist;
00208   };
00210 
00211   /*----------------------------------------------------------------------*/
00214   
00216   bool moot_file_exists(const char *filename);
00217   
00219   std::string moot_unextend(const char *filename);
00220 
00227   char *moot_extension(const char *filename, size_t pos);
00228 
00230   inline char *moot_extension(const char *filename)
00231   {
00232     return moot_extension(filename, strlen(filename));
00233   };
00234 
00244   bool hmm_parse_model_name(const std::string &modelname,
00245                             std::string &binfile,
00246                             std::string &lexfile,
00247                             std::string &ngfile,
00248                             std::string &lcfile);
00249 
00258   bool hmm_parse_model_name_text(const std::string &modelname,
00259                                  std::string &lexfile,
00260                                  std::string &ngfile,
00261                                  std::string &lcfile);
00262 
00263 
00264 
00269   class cmdutil_file_churner {
00270   public:
00271     // -- command-line data
00272     char  *progname;  
00273     char **inputs;    
00274     int    ninputs;   
00276     // -- operation flags
00277     bool use_list;        
00278     bool paranoid;        
00280     // -- file data
00281     mifstream           in;    
00282     mifstream           list;  
00284     // -- buffer data
00285     std::string         line; 
00287   private:
00288     bool is_first_input;      
00290   public:
00292     cmdutil_file_churner(char *my_progname=NULL,
00293                          char **my_inputs=NULL,
00294                          int my_ninputs=0,
00295                          bool my_use_list=false,
00296                          bool my_paranoid=true)
00297       : progname(my_progname),
00298         inputs(my_inputs),
00299         ninputs(my_ninputs),
00300         use_list(my_use_list),
00301         paranoid(my_paranoid),
00302         is_first_input(true)
00303     {};
00304 
00306     ~cmdutil_file_churner() {};
00307 
00311     FILE *first_input_file();
00312 
00316     std::string &first_input_name();
00317 
00319     FILE *next_input_file();
00320 
00322     std::string &next_input_name();
00323 
00324   private:
00326     FILE *next_list_file();
00327   };
00329 
00330   /*----------------------------------------------------------------------*/
00334   std::string moot_banner(void);
00335 
00337   std::string moot_program_banner(const std::string &prog_name,
00338                                   const std::string &prog_version,
00339                                   const std::string &prog_author,
00340                                   bool is_free=true);
00342 
00343 }; /* namespace moot */
00344 
00345 #endif /* _moot_UTILS_H */

Generated on Mon Sep 11 16:10:33 2006 for libmoot by doxygen1.2.18