ddc
HTMLConv.h
Go to the documentation of this file.
1 //
2 // This file is part of DDC.
3 //
4 // DDC is free software: you can redistribute it and/or modify
5 // it under the terms of the GNU Lesser General Public License as published by
6 // the Free Software Foundation, either version 3 of the License, or
7 // (at your option) any later version.
8 //
9 // DDC is distributed in the hope that it will be useful,
10 // but WITHOUT ANY WARRANTY; without even the implied warranty of
11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 // GNU Lesser General Public License for more details.
13 //
14 // You should have received a copy of the GNU Lesser General Public License
15 // along with DDC. If not, see <http://www.gnu.org/licenses/>.
16 //
17 // ========== Dialing Graphematical Module (www.aot.ru)
18 // ========== Copyright by Alexey Sokirko and Andrey Kalinin(1996-2001), Bryan Jurish (2011)
19 
20 #ifndef HTMLConv_h
21 #define HTMLConv_h
22 
23 
24 #include "../CommonLib/utilit.h"
25 
26 class HTML
27 {
28 public:
30 
31  HTML()
32  {
33  m_bCollectOffsets = false;
34  };
35  string GetTextFromHtmlFile(string FileName);
36  string GetTextFromHTMLBuffer(const char* Buffer, size_t BufferLen);
37  unsigned long getOffset(unsigned long off);
38 
39 private:
40  bool checkTag(const string& str, const char* tag);
41  void addOffset(unsigned long);
42 
43  struct offset_range
44  {
45  unsigned long low;
46  unsigned long high;
47  offset_range(unsigned long l = 0L, unsigned long h = 0L) : low(l), high(h){};
48  };
49 
50  vector<offset_range> offsets;
51 };
52 
53 
54 #endif
55 
56 /*--- emacs style variables ---
57  * Local Variables:
58  * mode: C++
59  * c-file-style: "ellemtel"
60  * c-basic-offset: 4
61  * tab-width: 8
62  * indent-tabs-mode: nil
63  * End:
64  */
bool checkTag(const string &str, const char *tag)
Definition: HtmlConv.cpp:256
string GetTextFromHtmlFile(string FileName)
Definition: HtmlConv.cpp:277
unsigned long getOffset(unsigned long off)
Definition: HtmlConv.cpp:27
Definition: HTMLConv.h:43
offset_range(unsigned long l=0L, unsigned long h=0L)
Definition: HTMLConv.h:47
string GetTextFromHTMLBuffer(const char *Buffer, size_t BufferLen)
Definition: HtmlConv.cpp:65
vector< offset_range > offsets
Definition: HTMLConv.h:50
bool m_bCollectOffsets
Definition: HTMLConv.h:29
void addOffset(unsigned long)
Definition: HtmlConv.cpp:49
Definition: HTMLConv.h:26
unsigned long low
Definition: HTMLConv.h:45
HTML()
Definition: HTMLConv.h:31
unsigned long high
Definition: HTMLConv.h:46