Go to the documentation of this file.00001 #ifndef __Bibliography_H_
00002 #define __Bibliography_H_
00003
00004 #include "../common/util_classes.h"
00005 #include "../common/MorphXmlToken.h"
00006 #include <list>
00007 #include "ConcCommon.h"
00008 #include "../PCRE/pcre_rml.h"
00009
00010
00011 const int UnknownTextAreaNo = -1;
00015 struct CBibliography
00016 {
00018 string m_OrigBibl;
00020 string m_ScanBibl;
00022 string m_DateStr;
00024 DWORD m_StartPageInfo;
00026 vector<string> m_BiblAttribs;
00027
00028
00029 CBibliography();
00031 void CleanBibliography();
00033 void ReadFromString(const string& S);
00035 string WriteToString() const;
00037 bool ConvertDateToInt(int& Result) const;
00038 };
00039
00040
00041
00042
00046 const string FreeBiblAttribOptionFieldName = "Bibl";
00047 const string TextAreaOptionFieldName = "textarea";
00048 class CConcXml
00049 {
00050
00051 class CFreeBiblIndex
00052 {
00053 string GetIndexFileName (string Path) const;
00054
00055 public:
00057 string m_Name;
00059 string m_Xpath;
00061 vector<DWORD> m_ValuesForEachFile;
00062
00064 bool m_bShowInHeader;
00065
00066 public:
00067 CFreeBiblIndex();
00068 virtual ~CFreeBiblIndex();
00069 string GetDescriptionStr () const;
00070 int GetIntegerValue(DWORD FileNo ) const;
00071
00072 virtual void clear();
00073 virtual string GetStringValue(DWORD FileNo ) const;
00074 virtual bool ReadFromDisk (string Path, DWORD FileBreaksSize);
00075 virtual void CreateUnion (string Path, const CFreeBiblIndex* pIndex1, const CFreeBiblIndex* pIndex2);
00076 virtual bool GetValueForDDCFilter (const RML_RE::Options &RegexOpts, string Value, CDDCFilterWithBounds& Filter) const;
00077 virtual bool RegisterBiblStringItemId(const string& Value);
00078 virtual bool DeleteBiblFiles (string Path) const;
00079 virtual bool ConvertAndSaveToDiskAfterIndexing (string Path);
00080 virtual string GetTypeStr() const;
00081
00082 };
00083
00084 class CFreeBiblStringIndex : public CFreeBiblIndex
00085 {
00086 struct CStringItem
00087 {
00088 string m_BiblString;
00089 DWORD m_BiblId;
00090 bool operator < (const CStringItem& X) const
00091 {
00092 return m_BiblString < X.m_BiblString;
00093 };
00094 bool operator == (const CStringItem& X) const
00095 {
00096 return m_BiblString == X.m_BiblString;
00097 };
00098
00099 };
00101 list<CStringItem > m_BuildStringItems;
00103 vector<string> m_Values;
00104
00105 void CreateUnionOfBiblStrings (const vector<string>& _X1, const vector<string>& _X2, vector<DWORD>& Transfer1, vector<DWORD>& Transfer2);
00106 string GetStringFileName (string Path) const;
00107 bool ReadBiblStringItems (vector<string>& Set, string FileName) const;
00108 bool WriteBiblStringItems (const vector<string>& Set, string FileName) const;
00109
00110
00111 public:
00112
00113 CFreeBiblStringIndex();
00114 virtual ~CFreeBiblStringIndex();
00115 void clear();
00116 string GetTypeStr() const;
00117 string GetStringValue(DWORD FileNo ) const;
00118 bool ReadFromDisk (string Path, DWORD FileBreaksSize);
00119 void CreateUnion (string Path, const CFreeBiblIndex* pIndex1, const CFreeBiblIndex* pIndex2);
00120 bool GetValueForDDCFilter (const RML_RE::Options &RegexOpts, string Value, CDDCFilterWithBounds& Filter) const;
00121 bool RegisterBiblStringItemId(const string& Value);
00122 bool DeleteBiblFiles (string Path) const;
00123 bool ConvertAndSaveToDiskAfterIndexing(string Path);
00124 };
00125
00126 class CTextArea
00127 {
00128 public:
00130 string m_TextAreaName;
00132 string m_Xpath;
00133 };
00134
00135
00136
00137 typedef map<string, CFreeBiblIndex*> FreeBiblStringMap;
00138 FreeBiblStringMap m_FreeBiblIndices;
00139
00140
00141 vector<file_off_t> m_EndOffsetsInBiblFile;
00142 vector<int> m_Dates;
00143
00144 FILE* m_BiblBodyFile;
00145 string m_Path;
00146 size_t m_BiblBodyFileSize;
00147 string m_OrigXPath;
00148 string m_ScanXPath;
00149 string m_DateXPath;
00150 string m_StartPageXPath;
00151 vector<CTextArea> m_TextAreas;
00152
00153
00154 void DeleteFiles();
00155 string GetBiblIndexFileName() const;
00156 string GetBiblFileName() const;
00157 string GetBiblDateIndexFileName() const;
00158 int GetTextAreaByName(const string& Name) const;
00159
00160
00161
00162
00163
00164
00165
00166 public:
00167 CConcXml();
00168 ~CConcXml();
00169
00171 void FreeBiblIndices();
00172
00174 bool Start(string ProjectFileName);
00176 bool AddIndexItem(const CBibliography& Bibliography);
00178 bool FinalSaveBibliography();
00180 void ExitWithoutSave();
00182 bool RegisterFreeBiblAttributes(string fields, string& ErrorStr);
00184 bool RegisterTextAreas(string fields, string& ErrorStr);
00186 string GetFreeBibiAttributesDescr() const;
00188 string GetTextAreasDescr() const;
00189 void SetPath(string ProjectFileName);
00190 bool LoadBibl(string Path, size_t FileBreaksSize);
00192 CBibliography GetFullBibliographyOfHit(size_t FileNo) const;
00194 bool CheckBibl(size_t FileBreaksNumber) const;
00195 bool UniteBibliography(const CConcXml& B1, const CConcXml& B2);
00197 bool GetValueFromBiblSet (const RML_RE::Options &RegexOpts,string Value, CDDCFilterWithBounds& Filter) const;
00199 bool LoadXmlAndReadBibliography(TiXmlDocument& doc, const char* pFileBuffer, CBibliography& Bibl, string& strError);
00201 bool ReadMorphXmlFileIntoGraTable(string FileName, const char* pFileBuffer, vector<CXmlToken>& GraTable, string& strError, CBibliography& Bibl);
00203 string GetVisibleFreeHeaderBiblAttributes(size_t FileNo, string Delim) const;
00205 string GetFreeHeaderBiblAttributesWithNames(size_t FileNo, char Delim) const;
00207 string GetFreeHeaderBiblAttributesJson(size_t FileNo, bool assume_utf8=true) const;
00209 void SetFreeBiblAttribsEmpty(CBibliography& Bibl);
00211 bool GetTextAreaElements(const TiXmlDocument& doc, vector<TiXmlElement*>& Result, string& strError) const;
00213 size_t GetTextAreasCount() const;
00215 int ProcessTextAreaNoInQueryStr(string& Query) const;
00216 void InitLessByDate(vector<CHit>& Hits) const;
00217 void InitGreaterByDate(vector<CHit>& Hits) const;
00218 void InitLessByBiblIntegerField(string FreeBiblAttribName, vector<CHit>& Hits) const;
00219 void InitGreaterByBiblIntegerField(string FreeBiblAttribName, vector<CHit>& Hits) const;
00220 void InitNoSort(vector<CHit>& Hits) const;
00221 bool IsRegisteredBiblField(string FreeBiblAttribName) const;
00222
00223 };
00224
00225
00226 #endif