00001 // ========== This file is under LGPL, the GNU Lesser General Public Licence 00002 // ========== Dialing Graphematical Module (www.aot.ru) 00003 // ========== Copyright by Alexey Sokirko and Andrey Kalinin(1996-2001) 00004 00005 #ifndef HTMLConv_h 00006 #define HTMLConv_h 00007 00008 00009 #include "../common/utilit.h" 00010 00011 class HTML 00012 { 00013 public: 00014 bool m_bCollectOffsets; 00015 00016 HTML() 00017 { 00018 m_bCollectOffsets = false; 00019 }; 00020 string GetTextFromHtmlFile(string FileName); 00021 string GetTextFromHTMLBuffer(const char* Buffer, size_t BufferLen); 00022 unsigned long getOffset(unsigned long off); 00023 00024 private: 00025 bool checkTag(const string& str, const char* tag); 00026 void addOffset(unsigned long); 00027 00028 struct offset_range 00029 { 00030 unsigned long low; 00031 unsigned long high; 00032 offset_range(unsigned long l = 0L, unsigned long h = 0L) : low(l), high(h){}; 00033 }; 00034 00035 vector<offset_range> offsets; 00036 }; 00037 00038 00039 #endif