Main Page   Namespace List   Class Hierarchy   Alphabetical List   Compound List   File List   Namespace Members   Compound Members   File Members  

mootUtils.h

Go to the documentation of this file.
00001 /* -*- Mode: C++ -*- */
00002 
00003 /*
00004    libmoot : moocow's part-of-speech tagging library
00005    Copyright (C) 2003-2004 by Bryan Jurish <moocow@ling.uni-potsdam.de>
00006 
00007    This program is free software; you can redistribute it and/or modify
00008    it under the terms of the GNU General Public License as published by
00009    the Free Software Foundation; either version 2 of the License, or
00010    (at your option) any later version.
00011 
00012    This program is distributed in the hope that it will be useful,
00013    but WITHOUT ANY WARRANTY; without even the implied warranty of
00014    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00015    GNU General Public License for more details.
00016 
00017    You should have received a copy of the GNU General Public License
00018    along with this program; if not, write to the Free Software
00019    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
00020 */
00021 
00022 /*--------------------------------------------------------------------------
00023  * File: mootUtils.h
00024  * Author: Bryan Jurish <moocow@ling.uni-potsdam.de>
00025  * Description:
00026  *   + moocow's PoS tagger : useful utilities
00027  *--------------------------------------------------------------------------*/
00028 
00029 #ifndef _moot_UTILS_H
00030 #define _moot_UTILS_H
00031 
00032 #include <stdio.h>
00033 #include <string.h>
00034 #include <string>
00035 #include <list>
00036 
00037 #include <mootIO.h>
00038 #include <mootCIO.h>
00039 
00040 namespace moot {
00041   using namespace std;
00042   using namespace mootio;
00043 
00044   /*----------------------------------------------------------------------*/
00047   
00052   bool moot_parse_doubles(char *str, double *dbls, size_t ndbls);
00053   
00054   /*----------------------------------------------------------------------
00055    * normalize_ws_*()
00056    */
00068   void moot_normalize_ws(const char *buf,
00069                          size_t len,
00070                          std::string &out,
00071                          bool trim_left=true,
00072                          bool trim_right=true);
00073 
00084   void moot_normalize_ws(const std::string &in,
00085                          std::string &out,
00086                          bool trim_left=true,
00087                          bool trim_right=true);
00088 
00098   inline void moot_normalize_ws(const char *s,
00099                                 std::string &out,
00100                                 bool trim_left=true,
00101                                 bool trim_right=true)
00102   {
00103     moot_normalize_ws(s, strlen(s), out, trim_left, trim_right);
00104   };
00105 
00115   inline std::string moot_normalize_ws(const char *buf,
00116                                        size_t len,
00117                                        bool trim_left=true,
00118                                        bool trim_right=true)
00119   {
00120     std::string out;
00121     out.reserve(len);
00122     moot_normalize_ws(buf,len, out, trim_left,trim_right);
00123     return out;
00124   };
00125 
00134   inline std::string moot_normalize_ws(const char *s,
00135                                        bool trim_left=true,
00136                                        bool trim_right=true)
00137   {
00138     return moot_normalize_ws(s,strlen(s), trim_left,trim_right);
00139   };
00140 
00149   inline std::string moot_normalize_ws(const std::string &s,
00150                                        bool trim_left=true,
00151                                        bool trim_right=true)
00152   {
00153     return moot_normalize_ws(s.data(),s.size(), trim_left,trim_right);
00154   };
00155 
00156   /*----------------------------------------------------------------------
00157    * remove_newlines()
00158    */
00166   inline void moot_remove_newlines(char *buf, size_t len)
00167   {
00168     for (; len > 0; len--, buf++) {
00169       if (*buf == '\n') *buf = ' ';
00170     }
00171   };
00172 
00174   inline void moot_remove_newlines(char *s)
00175   {
00176     moot_remove_newlines(s, strlen(s));
00177   };
00178 
00180   inline void moot_remove_newlines(std::string &s)
00181   {
00182     for (std::string::iterator si = s.begin(); si != s.end(); si++) {
00183       if (*si == '\n') *si = ' ';
00184     }
00185   };
00186 
00193   void moot_strtok(const std::string &s,
00194                    const std::string &delim,
00195                    std::list<std::string> &out);
00196 
00202   inline std::list<std::string> moot_strtok(const std::string &s,
00203                                             const std::string &delim)
00204   {
00205     std::list<std::string> slist;
00206     moot_strtok(s,delim,slist);
00207     return slist;
00208   };
00210 
00211   /*----------------------------------------------------------------------*/
00214   
00216   bool moot_file_exists(const char *filename);
00217   
00219   std::string moot_unextend(const char *filename);
00220 
00227   inline char *moot_extension(const char *filename, size_t pos)
00228   {
00229     return (char *)memrchr(filename, '.', pos);
00230   };
00231 
00233   inline char *moot_extension(const char *filename)
00234   {
00235     return moot_extension(filename, strlen(filename));
00236   };
00237 
00247   bool hmm_parse_model_name(const std::string &modelname,
00248                             std::string &binfile,
00249                             std::string &lexfile,
00250                             std::string &ngfile,
00251                             std::string &lcfile);
00252 
00261   bool hmm_parse_model_name_text(const std::string &modelname,
00262                                  std::string &lexfile,
00263                                  std::string &ngfile,
00264                                  std::string &lcfile);
00265 
00266 
00267 
00272   class cmdutil_file_churner {
00273   public:
00274     // -- command-line data
00275     char  *progname;  
00276     char **inputs;    
00277     int    ninputs;   
00279     // -- operation flags
00280     bool use_list;    
00282     // -- file data
00283     mifstream           in;    
00284     mifstream           list;  
00286     // -- buffer data
00287     std::string         line; 
00289   public:
00291     cmdutil_file_churner(char *my_progname=NULL,
00292                          char **my_inputs=NULL,
00293                          int my_ninputs=0,
00294                          bool my_use_list=false)
00295       : progname(my_progname),
00296         inputs(my_inputs),
00297         ninputs(my_ninputs),
00298         use_list(my_use_list)
00299     {};
00300 
00302     ~cmdutil_file_churner() {};
00303 
00305     FILE *first_input_file();
00306 
00308     std::string &first_input_name();
00309 
00311     FILE *next_input_file();
00312 
00314     std::string &next_input_name();
00315 
00316   private:
00318     FILE *next_list_file();
00319   };
00321 
00322   /*----------------------------------------------------------------------*/
00326   std::string moot_banner(void);
00327 
00329   std::string moot_program_banner(const std::string &prog_name,
00330                                   const std::string &prog_version,
00331                                   const std::string &prog_author,
00332                                   bool is_free=true);
00334 
00335 }; /* namespace moot */
00336 
00337 #endif /* _moot_UTILS_H */

Generated on Wed Jul 28 15:48:03 2004 for libmoot by doxygen1.2.15