ddc
utilit.h
Go to the documentation of this file.
1 //-*- Mode: C++ -*-
2 //
3 // This file is part of DDC.
4 //
5 // DDC is free software: you can redistribute it and/or modify
6 // it under the terms of the GNU Lesser General Public License as published by
7 // the Free Software Foundation, either version 3 of the License, or
8 // (at your option) any later version.
9 //
10 // DDC is distributed in the hope that it will be useful,
11 // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 // GNU Lesser General Public License for more details.
14 //
15 // You should have received a copy of the GNU Lesser General Public License
16 // along with DDC. If not, see <http://www.gnu.org/licenses/>.
17 //
18 // ========== Dialing Syntax Analysis (www.aot.ru)
19 // ========== Copyright by Alexey Sokirko
20 // Changes and modifications 2011-2018 by Bryan Jurish
21 
22 #ifndef utilit_h
23 #define utilit_h
24 
25 //-- BEGIN moo (Wed, 11 May 2011 14:44:12 +0200): get size-limited types (e.g. uint32_t)
26 #include "ddcConfig.h"
27 #include <stdint.h>
28 
29 //-- DDC_USE_UTF8 : define this to allow high bits in morphology
30 // + doesn't work well for "real" utf8 b/c upper-casing works byte-wise (bummer)
31 #undef DDC_USE_UTF8
32 
33 //-- END moo
34 #include <stdio.h>
35 
36 
37 #define WIN32_LEAN_AND_MEAN // Exclude rarely-used stuff from Windows headers
38 
39 #ifdef WIN32
40 
41 // it is used to find memory leaks
42 #ifdef DETECT_MEMORY_LEAK
43 extern void* operator new(size_t nSize, const char* lpszFileName, int nLine);
44 extern void operator delete(void* p, const char* lpszFileName, int nLine);
45 #define DEBUG_NEW new(THIS_FILE, __LINE__)
46 #endif
47 
48 
49 #include "windows.h"
50 #include "winuser.h"
51 #endif
52 
53 #include <assert.h>
54 #include <stdarg.h>
55 #include <string.h>
56 #include <ctype.h>
57 #include <string>
58 #include <algorithm>
59 #include <stdexcept>
60 #include <vector>
61 #include <typeinfo> //-- moo: for typeid()
62 #include "set"
63 #include "stack"
64 #include "map"
65 #include "time.h"
66 
67 #ifdef HAVE_MALLOC_H
68 # include <malloc.h>
69 #endif
70 
71 #ifndef __GNUC__
72 #pragma warning (disable : 4018)
73 #pragma warning (disable : 4244)
74 #pragma warning (disable : 4103)
75 #pragma warning (disable : 4530)
76 #pragma warning (disable : 4251)
77 #pragma warning (disable : 4996)
78 #endif
79 
80 
81 #ifdef STLPORT
82 using namespace stlport;
83 #else
84 using namespace std;
85 #endif
86 
87 
88 
89 //#if !defined (STLPORT) || defined(__STL_USE_NAMESPACES)
90 //using namespace std;
91 //#endif
92 
93 
94 typedef unsigned char BYTE;
95 
96 #ifdef WIN32
97 #include <io.h>
98 #include <fcntl.h>
99 typedef __int64 QWORD;
100 
101 #else
102 #include <unistd.h>
103 // typedef unsigned int DWORD;
104 // typedef unsigned short WORD;
105 typedef uint32_t DWORD;
106 typedef uint16_t WORD;
107 typedef uint64_t QWORD;
108 typedef unsigned int UINT; //-- unused
109 const unsigned int _MAX_PATH = 512;
110 #endif
111 
112 //-- moo: branch prediction macros; see e.g. https://www.geeksforgeeks.org/branch-prediction-macros-in-gcc/
113 #ifdef __GNUC__
114 # define likely(x) __builtin_expect(!!(x), 1)
115 # define unlikely(x) __builtin_expect(!!(x), 0)
116 #else
117 # define likely(x) x
118 # define unlikely(x) x
119 #endif
120 
121 //-- moo: more utilities
122 #include "../CommonLib/ddcLocale.h"
123 #include "../CommonLib/ddcVersion.h"
124 #include "../CommonLib/ddcString.h"
125 
126 //-- moo: replace UINT_MAX with DWORD_MAX
127 // + c++ chokes on fixed-width limit macros (UINT32_MAX) unless you #define __STDC_LIMIT_MACROS , which is unportable
128 #ifndef DWORD_MAX
129 //# define DWORD_MAX UINT32_MAX
130 # define DWORD_MAX (0xffffffff)
131 #endif
132 
133 #ifndef QWORD_MAX
134 #ifdef WIN32
135 #define QWORD_MAX 0xffffffffffffffff
136 #else
137 #define QWORD_MAX 0xffffffffffffffffULL
138 #endif
139 #endif
140 
141 //-- moo: ugly hack for gcc 4.7.2 (kaskade; debian wheezy)
142 #ifndef SIZE_MAX
143 # define SIZE_MAX ((size_t)-1)
144 #endif
145 
146 typedef vector<string> StringVector;
147 typedef set<string> StringSet;
148 typedef vector<DWORD> DwordVector;
149 
151 { return QWORD_MAX; }
152 
153 const BYTE cHyphenChar = (BYTE) '-';
154 const BYTE cParagraph = (BYTE) 21;
155 const BYTE cCompanyChar = (BYTE) 176;
156 const BYTE cIonChar = (BYTE) 183;
157 const BYTE cNumberChar = (BYTE) 0xB9;
158 const BYTE cPiChar = (BYTE) 182;
159 const BYTE cEllipseChar = (BYTE) 133;
160 
161 // morphology&syntax
162 typedef enum {
171 
172 template <class T1, class T2, class T3>
173 struct troika : public pair<T1, T2>
174 {
175  T3 third;
176 };
177 
178 
180 
183 class CExpc : public runtime_error
184 {
185 public:
188 
190  string m_strCause;
191 
192 public:
193  CExpc(int ErrorCode) throw();
194  CExpc(int ErrorCode, const string& Cause) throw();
195  CExpc(const string& Cause) throw();
196 
197  CExpc(int ErrorCode, const char* format, ... ) throw();
198  CExpc(const char* format, ... ) throw();
199 
201  CExpc(const CExpc& from) throw();
202 
204  virtual ~CExpc() throw() {};
205 
207  void Set(const char* format, ... ) throw();
208 
210  void vSet(const char *format, va_list& arglst) throw();
211 
213  CExpc& operator= (const CExpc& from) throw();
214 
216  virtual const char* what() const throw();
217 
219  virtual int code() const throw();
220 };
221 
223 class CFatalExpc : public CExpc
224 {
225 public:
226  CFatalExpc(int ErrorCode) throw() : CExpc(ErrorCode) {};
227  CFatalExpc(int ErrorCode, const string& Cause) throw() : CExpc(ErrorCode, Cause) {};
228  CFatalExpc(const string& Cause) throw() : CExpc(Cause) {};
229 
231  CFatalExpc(const CFatalExpc& from) throw() : CExpc(from) {};
232 
234  virtual ~CFatalExpc() throw() {};
235 };
236 
237 
238 
239 // working with files
240 extern bool FileExists (const char *FName);
241 inline bool FileExists (const string& FName)
242 { return FileExists(FName.c_str()); };
243 
244 extern bool IsDirectory(const char* DirName);
245 inline bool IsDirectory(const string& DirName)
246 { return IsDirectory(DirName.c_str()); };
247 
248 extern file_off_t FileSize (const char *filename);
249 inline file_off_t FileSize (const string& filename)
250 { return FileSize(filename.c_str()); };
251 
252 extern time_t FileMTime (const char *filename);
253 extern bool FSeek(FILE* fp, file_off_t pos, int origin);
254 extern file_off_t FTell(FILE* fp);
255 
256 
257 extern void ErrorMessage (const string& Label, const string& Message);
258 extern void ErrorMessage (const string& Message);
259 extern string MakeFName ( const string& InpitFileName, const string& Ext);
260 extern bool MakePath (const char* RossPath, const char* FileName, char* FullPath);
261 extern string GetPathByFile (string FileName);
262 extern bool IsBinFile (const char* FileName);
263 extern bool IsEmptyLine ( const char *t);
264 
265 //-- filename stuff
266 extern bool IsHtmlFile (const string& FileName);
267 extern bool IsTableFile (const string& FileName);
268 extern bool IsJsonFile (const string& FileName);
269 extern string FileExtension (const string& FileName);
270 
272 extern string FileDirectory (const string& FileName);
273 
275 extern string FileBasename (const string& FileName);
276 
278 extern string FileBasename (const string& FileName, const string& FileExtension);
279 
281 extern string CurrentDirectory(void);
282 
284 extern bool IsAbsolutePath (const string& PathName);
285 
287 extern string RelativePathName(const string &RefDir, const string &PathName);
288 
290 extern string RelativeFileName(const string &RefFile, const string &PathName);
291 
293 extern void AddFile(const char* MainFile, const char* ToAdd);
294 
296 extern void FileAppend(FILE *src, FILE *dst);
297 extern void FileAppend(const char* srcFile, const char* dstFile);
298 extern void FileAppend(const string& srcFile, const string& dstFile);
299 
301 extern void FileAppendPartial(FILE *src, FILE *dst, size_t nBytes);
302 
303 extern bool RmlMoveFile(const char *oldpath, const char *newpath);
304 extern bool RmlCopyFile(const char *oldpath, const char *newpath);
305 extern string CreateTempFileName();
306 extern bool MakeDir(const string& DirName);
307 extern bool MakeDirP(const string& DirName);
308 extern bool RemoveWithPrint (const string& FileName);
309 
310 
311 //======================================================================
312 // moo: get memory statistics using /proc/PID/statm (linux only)
313 
316 {
317  size_t VmPeak; //< Peak virtual memory size.
318  size_t VmSize; //< Virtual memory size.
319  size_t VmLck; //< Locked memory size (see mlock(3)).
320  size_t VmPin; //< Pinned memory size (since Linux 3.2). These are pages that can't be moved because something needs to directly access physical memory.
321  size_t VmHWM; //< Peak resident set size ("high water mark").
322  size_t VmRSS; //< Resident set size. Note that the value here is the sum of RssAnon, RssFile, and RssShmem.
323  size_t VmData; //< Size of data segment.
324  size_t VmStk; //< Size of stack segment.
325  size_t VmExe; //< Size of text (code) segment.
326  size_t VmLib; //< Shared library code size.
327  size_t VmPTE; //< Page table entries size (since Linux 2.6.10).
328  size_t VmPMD; //< Size of second-level page tables (since Linux 4.0).
329  size_t VmSwap; //< Swapped-out virtual memory size by anonymous private pages; shmem swap usage is not included (since Linux 2.6.34).
330  size_t RssFile; //< Size of resident file mappings. (since Linux 4.5).
331  size_t RssAnon; //< Size of resident file mappings. (since Linux 4.5).
332 
335  : VmSize(0), VmLck(0), VmPin(0), VmHWM(0), VmRSS(0), VmData(0), VmStk(0), VmExe(0), VmLib(0), VmPTE(0), VmPMD(0), VmSwap(0), RssFile(0), RssAnon(0)
336  {};
337 
340  { fetch(pid); };
341 
342  inline void clear(void)
343  { memset(this, 0, sizeof(DDCProcessMemoryStatus)); };
344 
346  void fetch(pid_t pid);
347 
349  inline void fetch(void)
350  { fetch(getpid()); };
351 };
352 
353 
356 size_t GetNumberOfOpenFiles(pid_t pid=0);
357 
358 
359 //======================================================================
360 // working with registry
361 extern string GetRegistryString (string RegistryPath);
362 extern bool CanGetRegistryString (string RegistryPath);
363 extern bool IsRmlRegistered(string& Error);
364 extern struct tm RmlGetCurrentTime ();
365 extern string GetIniFilePath();
366 extern string GetRmlVariable();
367 
368 //======================================================================
369 // working with strings
370 extern char* rtrim (char* s);
371 extern bool IsSuperEqualChar (BYTE ch1, BYTE ch2, MorphLanguageEnum langua);
372 extern int CompareWithoutRegister ( const char *s1, const char *s2, size_t l, MorphLanguageEnum langua);
373 extern BYTE force_rus_char (BYTE ch);
374 extern bool force_to_rus (char* dest, const char* sour, size_t len);
375 extern char* IntToStr (int Value, char* Buffer);
376 extern string& IntToStr (int Value, string& oBuffer);
377 extern string& TrimLeft (string& str);
378 extern string& TrimRight (string& str);
379 extern string& Trim (string& str);
380 extern bool LoadFileToString(string FileName, string& Result, bool clobber=true);
381 extern bool SaveStringToFile(const string& Str, const string& FileName);
382 extern void KOI8ToWin (string& s);
383 extern void WinToKOI8 (string& s);
384 extern void WinToKOI8 (string& s);
385 extern bool StartsWith(const string& body, const string& prefix);
386 
387 //======================================================================
389 extern string timestampLocal(time_t timeval);
390 extern string timestampUTC(time_t timeval);
391 
393 vector<string> stringSplit(const char *s, const char *delims);
394 
398 void stringSplitE(const string& s, const string& delims, vector<string>& tokens);
399 
401 vector<string> stringSplitE(const string& s, const string& delims);
402 
404 extern uint32_t date2int( signed int y, unsigned int m=1, unsigned int d=1);
405 
407 extern void int2date(uint32_t i, signed int *y, unsigned int *m=NULL, unsigned int *d=NULL);
408 
410 extern string int2hex(int32_t i);
411 
413 extern int32_t hex2int(const string& s);
414 
416 extern void ddc_thread_init(const char *log_label=NULL);
417 
419 extern const char *ddc_log_label(void);
420 
422 extern const char *ddc_set_log_label(const char *log_label);
423 
425 
426 extern int GetPredictionPartOfSpeech(const string& PartOfSpeech, MorphLanguageEnum langua);
427 extern bool GetLanguageByString (string s, MorphLanguageEnum& Result);
428 extern string GetStringByLanguage (MorphLanguageEnum Langua);
429 
430 //======================================================================
431 // integer utilities
432 
438 inline uint32_t log2u32(uint32_t v)
439 {
440 #ifdef __GNUC__
441  //-- gcc has __builtin_clz()
442  return (v==0 ? 0 : __builtin_clz(v)^31);
443 #else
444  //-- everywhere else, we use DeBruijn multiplication
445  static const int MultiplyDeBruijnBitPosition[32] =
446  {
447  0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30,
448  8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31
449  };
450 
451  v |= v >> 1; // first round down to one less than a power of 2
452  v |= v >> 2;
453  v |= v >> 4;
454  v |= v >> 8;
455  v |= v >> 16;
456 
457  return MultiplyDeBruijnBitPosition[(uint32_t)(v * 0x07C4ACDDU) >> 27];
458 #endif
459 };
460 
465 template <class VecT>
466 inline size_t VectorStride(const VecT& v)
467 {
468  return v.empty() ? 1 : max((size_t)1, (size_t)v[v.size()-1]/v.size());
469 };
470 
471 //======================================================================
472 
473 //-- agressive heap trimming
474 inline void TrimHeap()
475 {
476 #if HAVE_MALLOC_TRIM
477  malloc_trim(0);
478 #endif
479 };
480 
481 
482 /* generic STL clear-and-swap , should free up memory better than STL clear() */
483 template <class ContainerT>
484 void ClearContainer(ContainerT& C)
485 {
486  C.clear();
487  ContainerT tmp;
488  tmp.swap(C);
489 }
490 
491 /* also clears capacity */
492 template <class T>
493 inline void ClearVector(vector<T>& V)
494 { ClearContainer(V); };
495 
496 /* also clears capacity */
497 inline void ClearString(string& S)
498 { ClearContainer(S); };
499 
500 
501 extern int isbracket (BYTE x);
502 extern size_t dual_bracket (BYTE x);
503 
504 
505 extern bool is_pseudo_graph(BYTE x);
506 extern bool is_spc_fill (BYTE x);
507 
508 
509 
510 
511 extern bool is_english_upper(BYTE x);
512 extern bool is_english_lower(BYTE x);
513 
514 // Windows-1252
515 const BYTE Auml = 196;
516 const BYTE auml = 228;
517 const BYTE Uuml = 220;
518 const BYTE uuml = 252;
519 const BYTE Ouml = 214;
520 const BYTE ouml = 246;
521 const BYTE szlig = 223;
522 const BYTE Nu = 181;
523 const BYTE agrave = 224;
524 const BYTE egrave = 232;
525 const BYTE eacute = 233;
526 
527 
528 const BYTE LowerJO_cp1251 = 184;
529 const BYTE UpperJO_cp1251 = 168;
530 const BYTE LowerE_cp1251 = 229;
531 const BYTE UpperE_cp1251 = 197;
532 const BYTE Apostrophe = 39;
533 
534 
535 extern bool is_german_upper(BYTE x);
536 extern bool is_german_lower(BYTE x);
537 extern bool is_russian_upper(BYTE x);
538 extern bool is_russian_lower(BYTE x);
539 
540 extern bool is_upper_consonant(BYTE x, MorphLanguageEnum Langua);
541 extern bool is_lower_vowel(BYTE x, MorphLanguageEnum Langua);
542 extern bool is_upper_vowel(BYTE x, MorphLanguageEnum Langua);
543 
544 extern bool is_english_alpha(BYTE x);
545 extern bool is_russian_alpha(BYTE x);
546 extern bool is_german_alpha(BYTE x);
547 
548 extern bool is_alpha (BYTE x);
549 extern bool is_alpha (BYTE x, MorphLanguageEnum langua);
550 extern bool is_lower_alpha(BYTE x, MorphLanguageEnum langua);
551 extern bool is_upper_alpha(BYTE x, MorphLanguageEnum langua);
552 extern bool isnspace(BYTE x);
553 
554 
555 // =============== Register ========================================
556 extern BYTE etoupper (BYTE ch);
557 extern BYTE etolower (BYTE ch);
558 extern BYTE rtoupper (BYTE ch);
559 extern BYTE rtolower (BYTE ch);
560 extern BYTE gtoupper (BYTE ch);
561 extern BYTE gtolower (BYTE ch);
562 extern BYTE ReverseChar (BYTE ch, MorphLanguageEnum langua);
563 extern string& EngMakeUpper (string& word);
564 extern string& EngMakeLower (string& word);
565 extern string& EngRusMakeLower (string& word);
566 extern char* RmlMakeUpper (char *word, MorphLanguageEnum langua);
567 extern string& RmlMakeUpper (string& word, MorphLanguageEnum langua);
568 extern string& RmlMakeLower (string& word, MorphLanguageEnum langua);
569 extern string& EngRusMakeUpper (string& word);
570 extern char* EngRusMakeUpper (char* word);
571 
572 
573 // check languaage
574 extern bool IsRussian (const char *word);
575 extern bool CheckLanguage (const char *word, MorphLanguageEnum langua);
576 
577 
578 
579 extern void ConvertJO2Je(string& src);
580 extern void ConvertJO2Je(char* src, size_t Length);
581 extern string ConvertASCIIToHtmlSymbols(const string& txt);
582 
583 
584 template <class T, class Pred, class Conv>
585 T& RegisterConverter (T& word, size_t Len, Pred P, Conv C)
586 {
587  for( size_t i = 0 ; i < Len; i++ )
588  if ( P( (BYTE)word[i] ) )
589  word[i] = C ( (BYTE)word[i] );
590 
591  return word;
592 }
593 
594 
595 template <class T>
596 T& GerEngRusMakeUpperTemplate (T& word, MorphLanguageEnum Langua, size_t Len )
597 {
598  if (Len == 0) return word;
599 
600  if (Langua == morphGerman)
601  return RegisterConverter(word, Len, is_german_lower, gtoupper);
602  else
603  for( size_t i = 0 ; i < Len; i++ )
604  if (is_russian_lower((BYTE)word[i]))
605  word[i] = rtoupper ( (BYTE)word[i] );
606  else
607  if (is_english_lower((BYTE)word[i]))
608  word[i] = etoupper ( (BYTE)word[i] );
609 
610  return word;
611 };
612 
613 
614 
615 // QWORD mask
616 #define _QM(X) (((QWORD)1)<<X)
617 
619 
620 
621 #endif
622 
623 /*--- emacs style variables ---
624  * Local Variables:
625  * mode: C++
626  * c-file-style: "ellemtel"
627  * c-basic-offset: 4
628  * tab-width: 8
629  * indent-tabs-mode: nil
630  * End:
631  */
void ClearContainer(ContainerT &C)
Definition: utilit.h:484
bool is_lower_vowel(BYTE x, MorphLanguageEnum Langua)
Definition: utilit.cpp:1450
T3 third
Definition: utilit.h:175
vector< DWORD > DwordVector
Definition: utilit.h:148
char log_label[64]
Definition: ddc_daemon.cpp:52
const BYTE uuml
Definition: utilit.h:518
int isbracket(BYTE x)
Definition: utilit.cpp:1783
bool is_russian_lower(BYTE x)
Definition: utilit.cpp:1387
void int2date(uint32_t i, signed int *y, unsigned int *m=NULL, unsigned int *d=NULL)
moo: int-to-date decoding (v2.2.4: use signed years)
Definition: utilit.cpp:2126
char * IntToStr(int Value, char *Buffer)
Definition: utilit.cpp:651
T & GerEngRusMakeUpperTemplate(T &word, MorphLanguageEnum Langua, size_t Len)
Definition: utilit.h:596
size_t VmRSS
Definition: utilit.h:322
void ErrorMessage(const string &Label, const string &Message)
Definition: utilit.cpp:400
string RelativeFileName(const string &RefFile, const string &PathName)
If PathName is absolute, it is returned; otherwise returns (FileDirectory(RefFile) + "/" + PathName) ...
Definition: utilit.cpp:784
string FileBasename(const string &FileName)
Returns file part of FileName, like basename(3)
Definition: utilit.cpp:686
BYTE gtolower(BYTE ch)
Definition: utilit.cpp:1364
bool is_english_alpha(BYTE x)
Definition: utilit.cpp:1271
string & Trim(string &str)
Definition: utilit.cpp:1762
DDCProcessMemoryStatus(void)
Definition: utilit.h:334
bool is_english_upper(BYTE x)
Definition: utilit.cpp:1248
size_t GetNumberOfOpenFiles(pid_t pid=0)
Definition: utilit.cpp:1012
const BYTE cIonChar
Definition: utilit.h:156
const char * ddc_set_log_label(const char *log_label)
set thread-local log prefix; returns old label (if any)
Definition: utilit.cpp:2196
BYTE force_rus_char(BYTE ch)
Definition: utilit.cpp:619
void AddFile(const char *MainFile, const char *ToAdd)
AddFile(): old ugly file-append hack using system("cat...")
Definition: utilit.cpp:790
const BYTE LowerE_cp1251
Definition: utilit.h:530
BYTE ReverseChar(BYTE ch, MorphLanguageEnum langua)
Definition: utilit.cpp:1540
string CreateTempFileName()
Definition: utilit.cpp:481
const BYTE agrave
Definition: utilit.h:523
const BYTE LowerJO_cp1251
Definition: utilit.h:528
size_t VmExe
Definition: utilit.h:325
const BYTE Auml
Definition: utilit.h:515
const BYTE szlig
Definition: utilit.h:521
size_t dual_bracket(BYTE x)
Definition: utilit.cpp:1791
uint64_t QWORD
Definition: utilit.h:107
int GetPredictionPartOfSpeech(const string &PartOfSpeech, MorphLanguageEnum langua)
Definition: utilit.cpp:1126
QWORD file_off_t
Definition: utilit.h:179
BYTE rtoupper(BYTE ch)
Definition: utilit.cpp:1423
const BYTE UnknownPartOfSpeech
Definition: utilit.h:424
bool is_german_upper(BYTE x)
Definition: utilit.cpp:1320
string timestampUTC(time_t timeval)
Definition: utilit.cpp:2075
uint32_t log2u32(uint32_t v)
Definition: utilit.h:438
bool is_english_lower(BYTE x)
Definition: utilit.cpp:1260
size_t VmPTE
Definition: utilit.h:327
generic local exception class moo: derived from stdexcept runtime_error
Definition: utilit.h:183
Definition: utilit.h:618
CFatalExpc(const CFatalExpc &from)
copy constructor
Definition: utilit.h:231
void WinToKOI8(string &s)
Definition: utilit.cpp:2360
const BYTE Apostrophe
Definition: utilit.h:532
bool CheckLanguage(const char *word, MorphLanguageEnum langua)
Definition: utilit.cpp:1623
QWORD GetMaxQWORD()
Definition: utilit.h:150
string RelativePathName(const string &RefDir, const string &PathName)
If PathName is absolute, it is returned; otherwise returns (RefDir + "/" + PathName) ...
Definition: utilit.cpp:773
BYTE gtoupper(BYTE ch)
Definition: utilit.cpp:1351
void fetch(void)
Definition: utilit.h:349
string & TrimRight(string &str)
Definition: utilit.cpp:1754
bool is_upper_alpha(BYTE x, MorphLanguageEnum langua)
Definition: utilit.cpp:1518
const unsigned int _MAX_PATH
Definition: utilit.h:109
size_t VmStk
Definition: utilit.h:324
bool IsSuperEqualChar(BYTE ch1, BYTE ch2, MorphLanguageEnum langua)
Definition: utilit.cpp:549
bool is_german_alpha(BYTE x)
Definition: utilit.cpp:1346
void stringSplitE(const string &s, const string &delims, vector< string > &tokens)
Definition: utilit.cpp:2093
const BYTE cCompanyChar
Definition: utilit.h:155
string MakeFName(const string &InpitFileName, const string &Ext)
Definition: utilit.cpp:413
string GetStringByLanguage(MorphLanguageEnum Langua)
Definition: utilit.cpp:1180
bool GetLanguageByString(string s, MorphLanguageEnum &Result)
Definition: utilit.cpp:1152
string int2hex(int32_t i)
moo: int-to-hexidecimal string encoding; encoding preserves sort order (for subcorpus->server communi...
Definition: utilit.cpp:2135
RegisterEnum
Definition: utilit.h:618
size_t VectorStride(const VecT &v)
Definition: utilit.h:466
Definition: utilit.h:167
size_t RssAnon
Definition: utilit.h:331
bool RmlCopyFile(const char *oldpath, const char *newpath)
Definition: utilit.cpp:2476
bool IsJsonFile(const string &FileName)
Definition: utilit.cpp:748
time_t FileMTime(const char *filename)
Definition: utilit.cpp:352
char * RmlMakeUpper(char *word, MorphLanguageEnum langua)
Definition: utilit.cpp:1645
bool force_to_rus(char *dest, const char *sour, size_t len)
Definition: utilit.cpp:636
uint16_t WORD
Definition: utilit.h:106
bool FileExists(const char *FName)
Definition: utilit.cpp:335
void clear(void)
Definition: utilit.h:342
const BYTE ouml
Definition: utilit.h:520
string m_strCause
symbolic error message (should also be returned by what() method)
Definition: utilit.h:190
bool StartsWith(const string &body, const string &prefix)
Definition: utilit.cpp:1769
const BYTE Ouml
Definition: utilit.h:519
bool IsDirectory(const char *DirName)
Definition: utilit.cpp:364
void TrimHeap()
Definition: utilit.h:474
BYTE etolower(BYTE ch)
Definition: utilit.cpp:1296
int CompareWithoutRegister(const char *s1, const char *s2, size_t l, MorphLanguageEnum langua)
Definition: utilit.cpp:591
bool IsRussian(const char *word)
Definition: utilit.cpp:1618
const BYTE Nu
Definition: utilit.h:522
size_t VmLck
Definition: utilit.h:319
void ClearVector(vector< T > &V)
Definition: utilit.h:493
bool isnspace(BYTE x)
Definition: utilit.cpp:1533
size_t VmPin
Definition: utilit.h:320
const BYTE Uuml
Definition: utilit.h:517
string & EngRusMakeUpper(string &word)
Definition: utilit.cpp:1679
const BYTE cParagraph
Definition: utilit.h:154
const BYTE UpperE_cp1251
Definition: utilit.h:531
bool is_upper_consonant(BYTE x, MorphLanguageEnum Langua)
Definition: utilit.cpp:1476
size_t RssFile
Definition: utilit.h:330
bool IsHtmlFile(const string &FileName)
Definition: utilit.cpp:728
Definition: utilit.h:166
bool IsAbsolutePath(const string &PathName)
Returns true iff PathName is an absolute path (simple string heuristics)
Definition: utilit.cpp:758
string FileExtension(const string &FileName)
Definition: utilit.cpp:665
void ClearString(string &S)
Definition: utilit.h:497
string ConvertASCIIToHtmlSymbols(const string &txt)
Definition: utilit.cpp:2381
Definition: utilit.h:173
size_t VmPMD
Definition: utilit.h:328
bool IsBinFile(const char *FileName)
Definition: utilit.cpp:474
int m_ErrorCode
internal error code (see ../CommonLib/DDCInternalError.h)
Definition: utilit.h:187
Definition: utilit.h:618
size_t VmLib
Definition: utilit.h:326
BYTE rtolower(BYTE ch)
Definition: utilit.cpp:1434
string GetRegistryString(string RegistryPath)
Definition: utilit.cpp:1037
vector< string > StringVector
Definition: utilit.h:146
string & EngMakeUpper(string &word)
Definition: utilit.cpp:1578
bool is_lower_alpha(BYTE x, MorphLanguageEnum langua)
Definition: utilit.cpp:1503
CFatalExpc(int ErrorCode, const string &Cause)
Definition: utilit.h:227
bool is_upper_vowel(BYTE x, MorphLanguageEnum Langua)
Definition: utilit.cpp:1463
size_t VmSize
Definition: utilit.h:318
vector< string > stringSplit(const char *s, const char *delims)
moo: convenience wrapper for string-to-vector tokenization using StringTokenizer
Definition: utilit.cpp:2085
Definition: utilit.h:618
CFatalExpc(int ErrorCode)
Definition: utilit.h:226
void FileAppend(FILE *src, FILE *dst)
FileAppend(): newer better replacement for AddFile()
Definition: utilit.cpp:802
size_t VmData
Definition: utilit.h:323
DDCProcessMemoryStatus(pid_t pid)
Definition: utilit.h:339
string & RmlMakeLower(string &word, MorphLanguageEnum langua)
Definition: utilit.cpp:1665
Definition: utilit.h:169
char * rtrim(char *s)
Definition: utilit.cpp:532
set< string > StringSet
Definition: utilit.h:147
string timestampLocal(time_t timeval)
moo: timestamp stuff
Definition: utilit.cpp:2067
bool MakeDirP(const string &DirName)
Definition: utilit.cpp:2426
bool FSeek(FILE *fp, file_off_t pos, int origin)
Definition: utilit.cpp:376
string & TrimLeft(string &str)
Definition: utilit.cpp:1746
unsigned char BYTE
Definition: utilit.h:94
const BYTE UpperJO_cp1251
Definition: utilit.h:529
const BYTE cNumberChar
Definition: utilit.h:157
string CurrentDirectory(void)
Wrapper for getcwd(); returns current working directory of current process.
Definition: utilit.cpp:707
Definition: utilit.h:315
CFatalExpc(const string &Cause)
Definition: utilit.h:228
uint32_t date2int(signed int y, unsigned int m=1, unsigned int d=1)
moo: date-to-int encoding (v2.2.4: use signed years)
Definition: utilit.cpp:2118
local exception class, fatal (can be used to force immediate bail-out)
Definition: utilit.h:223
string & EngMakeLower(string &word)
Definition: utilit.cpp:1583
virtual ~CFatalExpc()
destructor
Definition: utilit.h:234
void KOI8ToWin(string &s)
Definition: utilit.cpp:2367
const BYTE cPiChar
Definition: utilit.h:158
bool is_russian_upper(BYTE x)
Definition: utilit.cpp:1382
string GetIniFilePath()
Definition: utilit.cpp:964
const BYTE eacute
Definition: utilit.h:525
string GetPathByFile(string FileName)
Definition: utilit.cpp:454
int32_t hex2int(const string &s)
moo: int-to-hexidecimal string deccoding; ; encoding preserves sort order
Definition: utilit.cpp:2150
bool MakeDir(const string &DirName)
Definition: utilit.cpp:2417
void FileAppendPartial(FILE *src, FILE *dst, size_t nBytes)
FileAppendPartial(): append a substring of src to dst.
Definition: utilit.cpp:841
bool CanGetRegistryString(string RegistryPath)
Definition: utilit.cpp:1044
bool is_russian_alpha(BYTE x)
Definition: utilit.cpp:1418
bool IsEmptyLine(const char *t)
Definition: utilit.cpp:517
MorphLanguageEnum
Definition: utilit.h:162
void ddc_thread_init(const char *log_label=NULL)
register a thread-local log prefix (unix only)
Definition: utilit.cpp:2179
bool MakePath(const char *RossPath, const char *FileName, char *FullPath)
Definition: utilit.cpp:430
BYTE etoupper(BYTE ch)
Definition: utilit.cpp:1276
string FileDirectory(const string &FileName)
Returns directory part of FileName, or "." if FileName is a bare filename, like dirname(3) ...
Definition: utilit.cpp:673
string & EngRusMakeLower(string &word)
Definition: utilit.cpp:1631
bool SaveStringToFile(const string &Str, const string &FileName)
Definition: utilit.cpp:2219
const char * ddc_log_label(void)
get thread-local log prefix, or empty string if none registered
Definition: utilit.cpp:2187
T & RegisterConverter(T &word, size_t Len, Pred P, Conv C)
Definition: utilit.h:585
bool IsRmlRegistered(string &Error)
Definition: utilit.cpp:1057
size_t VmPeak
Definition: utilit.h:317
uint32_t DWORD
Definition: utilit.h:105
bool is_alpha(BYTE x)
Definition: utilit.cpp:1482
file_off_t FileSize(const char *filename)
Definition: utilit.cpp:340
bool is_spc_fill(BYTE x)
Definition: utilit.cpp:1199
const BYTE egrave
Definition: utilit.h:524
bool RemoveWithPrint(const string &FileName)
Definition: utilit.cpp:2442
const BYTE cHyphenChar
Definition: utilit.h:153
const BYTE cEllipseChar
Definition: utilit.h:159
void ConvertJO2Je(string &src)
Definition: utilit.cpp:1731
unsigned int UINT
Definition: utilit.h:108
string GetRmlVariable()
Definition: utilit.cpp:881
size_t VmSwap
Definition: utilit.h:329
Definition: utilit.h:618
Definition: utilit.h:165
bool is_pseudo_graph(BYTE x)
Definition: utilit.cpp:2228
size_t VmHWM
Definition: utilit.h:321
Definition: utilit.h:168
bool LoadFileToString(string FileName, string &Result, bool clobber=true)
Definition: utilit.cpp:2206
const BYTE auml
Definition: utilit.h:516
file_off_t FTell(FILE *fp)
Definition: utilit.cpp:388
Definition: utilit.h:163
bool IsTableFile(const string &FileName)
Definition: utilit.cpp:738
bool is_german_lower(BYTE x)
Definition: utilit.cpp:1325
Definition: utilit.h:164
bool RmlMoveFile(const char *oldpath, const char *newpath)
Definition: utilit.cpp:2505