00001
00002
00003
00004
00005 #ifndef utilit_h
00006 #define utilit_h
00007
00008
00009 #include "ddcConfig.h"
00010 #include <stdint.h>
00011
00012
00013
00014 #undef DDC_USE_UTF8
00015
00016
00017
00018 #include <stdio.h>
00019
00020
00021 #define WIN32_LEAN_AND_MEAN // Exclude rarely-used stuff from Windows headers
00022
00023 #ifdef WIN32
00024
00025
00026 #ifdef DETECT_MEMORY_LEAK
00027 extern void* operator new(size_t nSize, const char* lpszFileName, int nLine);
00028 extern void operator delete(void* p, const char* lpszFileName, int nLine);
00029 #define DEBUG_NEW new(THIS_FILE, __LINE__)
00030 #endif
00031
00032
00033 #include "windows.h"
00034 #include "winuser.h"
00035 #endif
00036
00037 #include <assert.h>
00038 #include <stdarg.h>
00039 #include <string.h>
00040 #include <ctype.h>
00041 #include <string>
00042 #include <algorithm>
00043 #include <stdexcept>
00044 #include <vector>
00045 #include "set"
00046 #include "stack"
00047 #include "map"
00048 #include "time.h"
00049
00050
00051 #ifndef __GNUC__
00052 #pragma warning (disable : 4018)
00053 #pragma warning (disable : 4244)
00054 #pragma warning (disable : 4103)
00055 #pragma warning (disable : 4530)
00056 #pragma warning (disable : 4251)
00057 #pragma warning (disable : 4996)
00058 #endif
00059
00060
00061 #ifdef STLPORT
00062 using namespace stlport;
00063 #else
00064 using namespace std;
00065 #endif
00066
00067
00068
00069
00070
00071
00072
00073
00074
00075
00076 typedef unsigned char BYTE;
00077
00078 #ifdef WIN32
00079 #include <io.h>
00080 #include <fcntl.h>
00081 typedef __int64 QWORD;
00082
00083 #else
00084 #include <unistd.h>
00085 typedef unsigned int DWORD;
00086 typedef unsigned short WORD;
00087 typedef long long QWORD;
00088 typedef unsigned int UINT;
00089 const unsigned int _MAX_PATH = 512;
00090 #endif
00091
00092 typedef vector<string> StringVector;
00093 typedef set<string> StringSet;
00094 typedef vector<DWORD> DwordVector;
00095
00096 inline QWORD GetMaxQWORD ()
00097 {
00098 #ifdef WIN32
00099 return 0xffffffffffffffff;
00100 #else
00101 return 0xffffffffffffffffULL;
00102 #endif
00103 };
00104
00105 const BYTE cHyphenChar = (BYTE) '-';
00106 const BYTE cPeriodChar = (BYTE) '.';
00107 const BYTE cEofChar = (BYTE) 0x1a;
00108 const BYTE cPtrChar = (BYTE) 0x13;
00109 const BYTE cSpaceChar = (BYTE) 32;
00110 const BYTE cBackChar = (BYTE) 'Ђ';
00111 const BYTE cFullChar = (BYTE) '«';
00112 const BYTE cNumChar = (BYTE) 'N';
00113 const BYTE cAmpChar = (BYTE) '&';
00114 const BYTE cParagraph = (BYTE) 21;
00115 const BYTE cCompanyChar = (BYTE) 176;
00116 const BYTE cIonChar = (BYTE) 183;
00117 const BYTE cNumberChar = (BYTE) 0xB9;
00118 const BYTE cPiChar = (BYTE) 182;
00119 const BYTE cEllipseChar = (BYTE) 133;
00120
00121
00122 typedef enum { morphUnknown = 0, morphRussian = 1, morphEnglish = 2, morphGerman = 3, morphGeneric = 4, morphURL=5, morphDigits=6} MorphLanguageEnum;
00123
00124 template <class T1, class T2, class T3>
00125 struct troika : public pair<T1, T2>
00126 {
00127 T3 third;
00128 };
00129
00130
00131 typedef QWORD file_off_t;
00132
00133 class CExpc
00134 {
00135 public:
00136 string m_strCause;
00137 int m_ErrorCode;
00138
00139 CExpc(int ErrorCode, const string& Cause);
00140 CExpc(const string& Cause);
00141 CExpc(const CExpc& from);
00142 CExpc(const char* format, ... );
00143 CExpc& operator= (const CExpc& from);
00144 };
00145
00146
00147
00148 extern bool FileExists (const char *FName);
00149 extern file_off_t FileSize (const char *filename);
00150 extern bool FSeek(FILE* fp, file_off_t pos, int origin);
00151 extern file_off_t FTell(FILE* fp);
00152
00153
00154
00155 extern void ErrorMessage (const string& Titul, const string& Message);
00156 extern void ErrorMessage (const string& Message);
00157 extern string MakeFName ( const string& InpitFileName, const string& Ext);
00158 extern bool MakePath (const char* RossPath, const char* FileName, char* FullPath);
00159 extern string GetPathByFile (string FileName);
00160 extern bool IsBinFile (const char* FileName);
00161 extern bool IsEmptyLine ( const char *t);
00162
00163
00164 extern bool IsHtmlFile (const string& FileName);
00165 extern bool IsTableFile (const string& FileName);
00166 extern bool IsJsonFile (const string& FileName);
00167 extern string FileExtension (const string& FileName);
00168
00169 extern void AddFile(const char* MainFile, const char* ToAdd);
00170 extern bool RmlMoveFile(const char *oldpath, const char *newpath);
00171 extern bool RmlCopyFile(const char *oldpath, const char *newpath);
00172 extern string CreateTempFileName();
00173 extern bool MakeDir(const string& txt);
00174 extern bool RemoveWithPrint (const string& FileName);
00175
00176
00177 extern string GetRegistryString (string RegistryPath);
00178 extern string GetRegistryStringFromLocalIniFile (string RegistryPath);
00179 extern bool CanGetRegistryString (string RegistryPath);
00180 extern void SetRegistryString (string RegistryPath, string Value);
00181 extern bool IsRmlRegistered(string& Error);
00182 extern bool CheckEvaluationTime();
00183 extern struct tm RmlGetCurrentTime ();
00184 extern bool ReadTimeOutFromRegistry(bool bReadFromLocalFile, int& TimeOut);
00185 extern string GetIniFilePath();
00186 extern string GetRmlVariable();
00187 extern string BuildRMLPath (const char* s);
00188
00189
00190 extern char* rtrim (char* s);
00191 extern bool IsSuperEqualChar (BYTE ch1, BYTE ch2, MorphLanguageEnum langua);
00192 extern bool strscmp ( const char *s1, const char *s2, size_t l, MorphLanguageEnum langua);
00193 extern int CompareWithoutRegister ( const char *s1, const char *s2, size_t l, MorphLanguageEnum langua);
00194 extern BYTE force_rus_char (BYTE ch);
00195 extern bool force_to_rus (char* dest, const char* sour, size_t len);
00196 extern char* IntToStr (int Value, char* Buffer);
00197 extern string& IntToStr (int Value, string& oBuffer);
00198 extern string Format( const char* format, ... );
00199 extern string& TrimLeft (string& str);
00200 extern string& TrimRight (string& str);
00201 extern string& Trim (string& str);
00202 extern void rml_TRACE( const char* format, ... );
00203 extern bool LoadFileToString(string FileName, string& Result);
00204 extern void SqueezeSpacesAndDeleteEoln(string& s);
00205 extern void KOI8ToWin (string& s);
00206 extern void WinToKOI8 (string& s);
00207
00208 extern DWORD StringCrc32(const char* szString);
00209
00210
00211
00212 extern void QPEncodeString(string& s);
00213 extern void QPDecodeString(string& s);
00214 #ifndef WIN32
00215 extern void strrev(char* s);
00216 #endif
00217
00218 const BYTE UnknownPartOfSpeech = 0xff;
00219 const int UnknownSyntaxElement = 0xffff;
00220 typedef enum {DontKillHomonyms = 0, CoverageKillHomonyms = 1} KillHomonymsEnum;
00221
00222 typedef enum { LocThes = 0, FinThes, CompThes, OmniThes, NoneThes } EThesType;
00223 typedef enum { EClause = 0, EWord, EGroup, ENoneType } EUnitType;
00224
00225
00226 extern int GetPredictionPartOfSpeech(const string& PartOfSpeech, MorphLanguageEnum langua);
00227 extern bool GetLanguageByString (string s, MorphLanguageEnum& Result);
00228 extern string GetStringByLanguage (MorphLanguageEnum Langua);
00229
00230
00231
00232
00233
00234
00235
00236 template <class T>
00237 void ClearVector(vector<T>& V)
00238 {
00239 V.clear();
00240 vector<T> dummy (V);
00241 V.swap (dummy);
00242 };
00243
00244
00245
00246
00247
00248
00249
00250
00251 extern int isbracket (BYTE x);
00252 extern size_t dual_bracket (BYTE x);
00253
00254
00255
00256
00257 extern bool is_upper_roman_digit (BYTE ch);
00258 extern bool is_lower_roman_digit (BYTE ch);
00259 extern bool is_roman_number ( const char *s, size_t len);
00260 extern bool is_pseudo_graph(BYTE x);
00261 extern bool is_word_delim (BYTE x);
00262
00263
00264 extern bool is_spc_fill (BYTE x);
00265
00266
00267
00268
00269 extern bool is_english_upper(BYTE x);
00270 extern bool is_english_lower(BYTE x);
00271
00272 const BYTE Auml = 196;
00273 const BYTE auml = 228;
00274 const BYTE Uuml = 220;
00275 const BYTE uuml = 252;
00276 const BYTE Ouml = 214;
00277 const BYTE ouml = 246;
00278 const BYTE szlig = 223;
00279 const BYTE Nu = 181;
00280 const BYTE agrave = 224;
00281 const BYTE egrave = 232;
00282 const BYTE eacute = 233;
00283
00284
00285 const BYTE LowerJO = (BYTE)'ё';
00286 const BYTE UpperJO = (BYTE)'Ё';
00287 const BYTE Apostrophe = 39;
00288
00289
00290
00291
00292 extern bool is_german_upper(BYTE x);
00293 extern bool is_german_lower(BYTE x);
00294 extern bool is_russian_upper(BYTE x);
00295 extern bool is_russian_lower(BYTE x);
00296
00297 extern bool is_lower_consonant(BYTE x, MorphLanguageEnum Langua);
00298 extern bool is_upper_consonant(BYTE x, MorphLanguageEnum Langua);
00299 extern bool is_lower_vowel(BYTE x, MorphLanguageEnum Langua);
00300 extern bool is_upper_vowel(BYTE x, MorphLanguageEnum Langua);
00301
00302 extern bool is_english_alpha(BYTE x);
00303 extern bool is_russian_alpha(BYTE x);
00304 extern bool is_german_alpha(BYTE x);
00305
00306 extern bool is_alpha (BYTE x);
00307 extern bool is_alpha (BYTE x, MorphLanguageEnum langua);
00308 extern bool is_lower_alpha(BYTE x, MorphLanguageEnum langua);
00309 extern bool is_upper_alpha(BYTE x, MorphLanguageEnum langua);
00310 extern bool isnspace(BYTE x);
00311
00312
00313
00314 extern BYTE etoupper (BYTE ch);
00315 extern BYTE etolower (BYTE ch);
00316 extern BYTE rtoupper (BYTE ch);
00317 extern BYTE rtolower (BYTE ch);
00318 extern BYTE gtoupper (BYTE ch);
00319 extern BYTE gtolower (BYTE ch);
00320 extern BYTE ReverseChar (BYTE ch, MorphLanguageEnum langua);
00321 extern char* RusMakeUpper (char *word);
00322 extern char* EngMakeUpper (char *word);
00323 extern string& EngMakeUpper (string& word);
00324 extern string& EngMakeLower (string& word);
00325 extern char* GerMakeUpper (char *word);
00326 extern string& GerMakeUpper (string& word);
00327 extern char* RusMakeLower (char *word);
00328 extern string& EngRusMakeLower (string& word);
00329 extern char* EngRusMakeLower (char* word);
00330 extern char* RmlMakeUpper (char *word, MorphLanguageEnum langua);
00331 extern string& RmlMakeUpper (string& word, MorphLanguageEnum langua);
00332 extern string& RmlMakeLower (string& word, MorphLanguageEnum langua);
00333
00334
00335 extern string& EngRusMakeUpper (string& word);
00336
00337 extern char* EngRusMakeUpper (char* word);
00338
00339
00340
00341 extern bool IsRussian (const char *word);
00342 extern bool IsRussian (const string& word);
00343 extern bool IsEnglish (const char *word);
00344 extern bool IsEnglish (const string& word);
00345 extern bool IsGerman (const char *word);
00346 extern bool IsGerman (const string& word);
00347 extern bool CheckLanguage (const char *word, MorphLanguageEnum langua);
00348 extern bool CheckLanguage (const string& word, MorphLanguageEnum langua);
00349
00350
00351
00352 extern void ConvertJO2Je(string& src);
00353 extern void ConvertJO2Je(char* src);
00354 extern void ConvertJO2Je(char* src, size_t Length);
00355 extern string ConvertASCIIToHtmlSymbols(const string& txt);
00356
00357
00358 template <class T, class Pred, class Conv>
00359 T& RegisterConverter (T& word, size_t Len, Pred P, Conv C)
00360 {
00361 for( size_t i = 0 ; i < Len; i++ )
00362 if ( P( (BYTE)word[i] ) )
00363 word[i] = C ( (BYTE)word[i] );
00364
00365 return word;
00366 }
00367
00368
00369 template <class T>
00370 T& GerEngRusMakeUpperTemplate (T& word, MorphLanguageEnum Langua, size_t Len )
00371 {
00372 if (Len == 0) return word;
00373
00374 if (Langua == morphGerman)
00375 return RegisterConverter(word, Len, is_german_lower, gtoupper);
00376 else
00377 for( size_t i = 0 ; i < Len; i++ )
00378 if (is_russian_lower((BYTE)word[i]))
00379 word[i] = rtoupper ( (BYTE)word[i] );
00380 else
00381 if (is_english_lower((BYTE)word[i]))
00382 word[i] = etoupper ( (BYTE)word[i] );
00383
00384 return word;
00385 };
00386
00387
00388
00389
00390
00391
00392 #define _QM(X) (((QWORD)1)<<X)
00393
00394 enum RegisterEnum {AnyRegister=0, LowLow=1, UpLow=2, UpUp=3};
00395
00396
00397 #endif
00398
00399