ddc
BiblIndex.h
Go to the documentation of this file.
1 //
2 // Created by sokirko on 12.10.19.
3 //
4 
5 #ifndef PROJECT_BIBLINDEX_H
6 #define PROJECT_BIBLINDEX_H
7 
8 #include "BiblExpander.h"
9 #include "ConcCommon.h"
10 #include "MorphXmlToken.h"
11 #include "Bibliography.h"
12 #include "FreeBiblIndex.h"
13 
15 
16 
17 const int UnknownTextAreaNo = -1;
18 
19 
20 class CBiblIndex {
21  //--------------------------------------------------------------
22  class CTextArea {
23  public:
27  string m_Xpath;
28  };
29 private:
35  vector<CTextArea> m_TextAreas;
36 
37  string ReadFromFile(file_off_t start, size_t nbytes) const;
38 protected:
39 
40 
41  //--------------------------------------------------------------
42  typedef map<string, CFreeBiblIndex *> FreeBiblStringMap;
43  typedef map<string, string> FreeBiblAliasMap;
44  typedef map<string, CBiblExpander *> BiblExpanderMap;
45 
46 protected:
47  FreeBiblStringMap m_FreeBiblIndices;
48  FreeBiblAliasMap m_FreeBiblAlias;
49  BiblExpanderMap m_BiblExpanders;
50  map<string, size_t> m_FreeBiblNameToPosition; // map from name to CBibliography::m_BiblAttribs positions
51 
54  string m_Path;
55  string m_OrigXPath;
56  string m_ScanXPath;
57  string m_DateXPath;
59 public:
71 
72 
73 public:
74  CBiblIndex();
75 
77  void FreeBiblIndices();
78 
80  void FreeBiblExpanders();
81 
83  bool RegisterFreeBiblAttributes(string fields, string &ErrorStr);
84 
86  bool RegisterTextAreas(string fields, string &ErrorStr);
87 
89  string GetFreeBibiAttributesDescr() const;
90 
92  string GetTextAreasDescr() const;
93 
94  void SetPath(string ProjectFileName);
95 
96  void LoadBibl(string Path, size_t FileBreaksSize, bool useMMap = false);
97 
99  CBibliography GetFullBibliographyOfHit(size_t FileNo) const;
100 
101  string GetBiblIndexFileName() const;
102 
103  string GetBiblFileName() const;
104 
105  string GetBiblDateIndexFileName() const;
106 
107  int GetTextAreaByName(const string &Name) const;
108 
110  CFreeBiblIndex *GetFreeBiblIndex(const string &FreeBiblAttribNameOrAlias) const;
111 
113  CBiblExpander *GetBiblExpander(const string &ExpanderName) const;
114 
120  bool GetFilterBounds(CDDCFilterWithBounds &Filter, const string &LoValue, const string &HiValue) const;
121 
127  bool GetFilterValue(CDDCFilterWithBounds &Filter, const string &Value) const;
128 
132  bool GetFilterValues(CDDCFilterWithBounds &Filter, const string &Regex) const;
133 
136  bool GetFilterValues(CDDCFilterWithBounds &Filter, const set<string> &Values) const;
137 
139  void GetTextAreaElements(const TiXmlDocument &doc, vector<TiXmlElement *> &Result) const;
140 
142  size_t GetTextAreasCount() const;
143 
146  //int ProcessTextAreaNoInQueryStr(string& Query) const;
147 
150  int WithinTextArea(const vector<string> &Within) const;
151 
152  void InitNoSort(vector<CHit> &Hits) const;
153 
154  void InitSortByDate(vector<CHit> &Hits) const;
155 
156  void InitSortByBiblIntegerField(string FreeBiblAttribNameOrAlias, vector<CHit> &Hits) const;
157 
158  bool IsRegisteredBiblField(const string &FreeBiblAttribNameOrAlias) const;
159 
162  CFreeBiblIndexTypeId GetBiblFieldTypeId(const string &FreeBiblAttribName) const;
163 
165  string FreeBiblMapToJson(bool useUtf) const;
166  string FreeBiblAliasMapToJson() const;
167  string BiblExpanderMapToJson() const;
168 
169 
171  const ddcDateVector &GetDates() const { return m_Dates; };
172 
174  bool HasFreeBiblIndex(const string &name) const {return GetFreeBiblIndex(name) != 0;};
175 
177  bool HasBiblExpander(const string &ExpanderName) const {return GetBiblExpander(ExpanderName) != 0;};
178 
180  const CFreeBiblIndexInterface* GetFreeBiblIndexConst(const string &name) const {
181  return GetFreeBiblIndex(name);
182  };
184  void SetRegexOptions(const RML_RE::Options &opts);
185 
187  string GetVisibleFreeHeaderBiblAttributes(size_t FileNo, string Delim) const;
188 
190  string GetFreeHeaderBiblAttributesWithNames(size_t FileNo, char Delim) const;
191 
193  string
194  GetFreeHeaderBiblAttributesJson(size_t FileNo, bool assume_utf8 = true, bool include_invisible = false) const;
195 
197  string
198  GetFreeHeaderBiblAttributesTabsDump(size_t FileNo, bool assume_utf8 = true, bool include_invisible = false) const;
199 
201  bool FileMatches(DWORD FileNo, const vector<CDDCFilterWithBounds> &Filters) const;
202 
204  CBiblExpander *AddBiblExpander(const string &spec);
205 
206 };
207 
208 
209 #endif //PROJECT_BIBLINDEX_H
210 
211 /*--- emacs style variables ---
212  * Local Variables:
213  * mode: C++
214  * c-file-style: "ellemtel"
215  * c-basic-offset: 4
216  * tab-width: 8
217  * indent-tabs-mode: nil
218  * End:
219  */
bool IsRegisteredBiblField(const string &FreeBiblAttribNameOrAlias) const
Definition: BiblIndex.cpp:464
string ReadFromFile(file_off_t start, size_t nbytes) const
void InitSortByBiblIntegerField(string FreeBiblAttribNameOrAlias, vector< CHit > &Hits) const
Definition: BiblIndex.cpp:377
Definition: FreeBiblIndex.h:33
bool FileMatches(DWORD FileNo, const vector< CDDCFilterWithBounds > &Filters) const
test whether all compiled Filters match FileNo
Definition: BiblIndex.cpp:400
bool RegisterFreeBiblAttributes(string fields, string &ErrorStr)
initializes free bibliographical attribute descriptions
Definition: BiblIndex.cpp:93
ddcVecFile< DWORD > ddcDateVector
Definition: BiblIndex.h:14
Definition: pcre_rml.h:46
RML_RE::Options m_RegexOpts
common regex options
Definition: BiblIndex.h:34
Definition: Bibliography.h:13
bool RegisterTextAreas(string fields, string &ErrorStr)
initializes free text areas descriptions
Definition: BiblIndex.cpp:48
QWORD file_off_t
Definition: utilit.h:179
BiblExpanderMap m_BiblExpanders
Definition: BiblIndex.h:49
string m_Path
Definition: BiblIndex.h:54
string FreeBiblAliasMapToJson() const
Definition: BiblIndex.cpp:674
void SetPath(string ProjectFileName)
Definition: BiblIndex.cpp:42
string m_StartPageXPath
Definition: BiblIndex.h:58
A file for globally defined constants and classes.
void FreeBiblExpanders()
clears m_BiblExpanders
Definition: BiblIndex.cpp:34
string m_Xpath
the XPath to this information (from options file)
Definition: BiblIndex.h:27
Definition: ddcMMap.h:733
Abstract API for bibliographic metadata-expansion modules.
Definition: BiblExpander.h:31
string GetBiblIndexFileName() const
Definition: BiblIndex.cpp:307
FreeBiblAliasMap m_FreeBiblAlias
Definition: BiblIndex.h:48
ddcVecFile< file_off_t > m_EndOffsetsInBiblFile
Definition: BiblIndex.h:52
string GetBiblDateIndexFileName() const
Definition: BiblIndex.cpp:311
Definition: ConcCommon.h:248
vector< CTextArea > m_TextAreas
Definition: BiblIndex.h:35
string GetFreeHeaderBiblAttributesJson(size_t FileNo, bool assume_utf8=true, bool include_invisible=false) const
return names and values of all free bibliographical attributes for the given FileNo as JSON (without ...
Definition: BiblIndex.cpp:578
CFreeBiblIndexTypeId
Definition: FreeBiblIndex.h:15
int WithinTextArea(const vector< string > &Within) const
Definition: BiblIndex.cpp:618
bool HasBiblExpander(const string &ExpanderName) const
moo: not quite as ugly or dangerous a hack
Definition: BiblIndex.h:177
string m_TextAreaName
the name of the index (from options file)
Definition: BiblIndex.h:25
map< string, string > FreeBiblAliasMap
Definition: BiblIndex.h:43
string FreeBiblMapToJson(bool useUtf) const
print to json
Definition: BiblIndex.cpp:652
string m_OrigXPath
Definition: BiblIndex.h:55
string BiblExpanderMapToJson() const
Definition: BiblIndex.cpp:689
Definition: tinyxml.h:1097
string GetFreeBibiAttributesDescr() const
return free bibliographical attribute description
Definition: BiblIndex.cpp:267
bool GetFilterValues(CDDCFilterWithBounds &Filter, const string &Regex) const
Definition: BiblIndex.cpp:502
void InitSortByDate(vector< CHit > &Hits) const
Definition: BiblIndex.cpp:369
size_t GetTextAreasCount() const
return all text area names
Definition: BiblIndex.cpp:544
void InitNoSort(vector< CHit > &Hits) const
Definition: BiblIndex.cpp:361
map< string, size_t > m_FreeBiblNameToPosition
Definition: BiblIndex.h:50
Definition: BiblIndex.h:22
CFreeBiblIndexTypeId GetBiblFieldTypeId(const string &FreeBiblAttribName) const
Definition: BiblIndex.cpp:468
string GetFreeHeaderBiblAttributesWithNames(size_t FileNo, char Delim) const
return names and values of all free bibliographical attributes for the given FileNo delimited by "Del...
Definition: BiblIndex.cpp:563
const ddcDateVector & GetDates() const
moo: ugly dangerous hack
Definition: BiblIndex.h:171
map< string, CBiblExpander * > BiblExpanderMap
Definition: BiblIndex.h:44
string m_DefaultAttrName
name of default bibliographic field to query if no literal match is found This can be used in conjun...
Definition: BiblIndex.h:70
string GetBiblFileName() const
Definition: BiblIndex.cpp:315
Definition: morph_const.h:107
const CFreeBiblIndexInterface * GetFreeBiblIndexConst(const string &name) const
moo: not quite as ugly or dangerous a hack (respects aliases)
Definition: BiblIndex.h:180
ddcFileOrMMap m_BiblBodyFile
Definition: BiblIndex.h:32
void LoadBibl(string Path, size_t FileBreaksSize, bool useMMap=false)
Definition: BiblIndex.cpp:320
CBiblIndex()
Definition: BiblIndex.cpp:23
FreeBiblStringMap m_FreeBiblIndices
Definition: BiblIndex.h:47
bool m_bMemoryMap
Definition: BiblIndex.h:31
string m_DateXPath
Definition: BiblIndex.h:57
Definition: ConcCommon.h:318
CBiblExpander * AddBiblExpander(const string &spec)
add a new bibliographic expander to m_BiblExpanders, or replace an existing one; returns new expander...
Definition: BiblIndex.cpp:420
bool GetFilterBounds(CDDCFilterWithBounds &Filter, const string &LoValue, const string &HiValue) const
Definition: BiblIndex.cpp:474
ddcDateVector m_Dates
Definition: BiblIndex.h:53
void FreeBiblIndices()
clears m_FreeBiblIndices
Definition: BiblIndex.cpp:27
string GetVisibleFreeHeaderBiblAttributes(size_t FileNo, string Delim) const
return values of all visible free bibliographical attributes for the given FileNo delimited by "Delim...
Definition: BiblIndex.cpp:548
bool HasFreeBiblIndex(const string &name) const
not quite as ugly or dangerous a hack (respects aliases)
Definition: BiblIndex.h:174
bool GetFilterValue(CDDCFilterWithBounds &Filter, const string &Value) const
Definition: BiblIndex.cpp:482
const int UnknownTextAreaNo
Definition: BiblIndex.h:17
Definition: BiblIndex.h:20
void SetRegexOptions(const RML_RE::Options &opts)
set regex options for all registered fields
Definition: BiblIndex.cpp:393
string GetFreeHeaderBiblAttributesTabsDump(size_t FileNo, bool assume_utf8=true, bool include_invisible=false) const
return names and values of all free bibliographical attributes for the given FileNo as tt-comments (f...
Definition: BiblIndex.cpp:593
string GetTextAreasDescr() const
return full text area description
Definition: BiblIndex.cpp:291
uint32_t DWORD
Definition: utilit.h:105
CBiblExpander * GetBiblExpander(const string &ExpanderName) const
moo: not quite as ugly or dangerous a hack
Definition: BiblIndex.cpp:644
void GetTextAreaElements(const TiXmlDocument &doc, vector< TiXmlElement *> &Result) const
return all text area elements for this document ("doc")
Definition: BiblIndex.cpp:530
string m_ScanXPath
Definition: BiblIndex.h:56
CBibliography GetFullBibliographyOfHit(size_t FileNo) const
returns the bibliographical record
Definition: BiblIndex.cpp:345
CFreeBiblIndex * GetFreeBiblIndex(const string &FreeBiblAttribNameOrAlias) const
moo: not quite as ugly or dangerous a hack (respects aliases)
Definition: BiblIndex.cpp:629
map< string, CFreeBiblIndex * > FreeBiblStringMap
Definition: BiblIndex.h:42
int GetTextAreaByName(const string &Name) const
Definition: BiblIndex.cpp:611