25 #include "../ConcordLib/IndexSetForLoadingStage.h" 26 #include "../ConcordLib/IndexSetForQueryingStage.h" 27 #include "../PCRE/pcre_rml.h" 41 COutputToken(
string WordStr,
BYTE Highlight) : m_TokenStr(WordStr), m_Highlight(Highlight) {};
63 bool ConvertLoadIndexToWorkingIndex();
67 void CreateUnionTokenStorages(
const vector<CStringIndexSet *> &IndexSets,
const vector<size_t> &TokenCounts,
68 const vector<vector<DWORD> > &xlateItemId);
71 void OpenStorageFile();
75 string GetName()
const;
78 bool ConvertTempStorageToPersistent();
91 return m_StringBuffer.m_data + W.GetIndexItemOffset();
99 string GetStorageFileName()
const;
102 void CloseStorageFile();
105 void InitIndexSet(
string Name,
string ShortName,
bool bCreateItemStorage,
bool bCompress);
108 void ReadFromTheDisk();
111 bool DestroyIndexSet();
114 bool WriteToFile(
bool bAfterLoading);
117 void UnionIndexSets(
const vector<CStringIndexSet *> &IndexSets,
const vector<size_t> &TokenCounts);
120 void CreateSplitPartitions(vector<CStringIndexSet *> &Partitions,
const vector<CTokenNo> &EndTokenNo)
const;
123 void EnsureSuffixIndex();
126 bool GetTokensFromStorage(
const size_t start_offset,
const size_t end_offset, vector<COutputToken> &Tokens)
const;
149 DWORD GetTypeIndexIdLowerBound(
const string &ValueStr)
const;
152 DWORD GetTypeIndexId(
const string &ValueStr)
const;
155 bool GetContextBounds(
CDDCFilterWithBounds &Filter,
const string &LoValue,
const string &HiValue)
const;
158 void FindOccurrences(
const vector<DWORD> &IndexItems,
const size_t PeriodNo, vector<CTokenNo> &occurrences,
163 FindChunkOccurrences(
const vector<DWORD> &IndexItems, vector<CTokenNo> &occurrences, vector<DWORD> &ChunkLengths,
167 void QueryTokenList(
const string &WordForm, vector<DWORD> &MatchWords)
const;
170 void QueryTokenListWithRightTruncation(
const string &Prefix, vector<DWORD> &MatchWords)
const;
173 void QueryTokenListWithLeftTruncation(
const string &Suffix, vector<DWORD> &MatchWords)
const;
176 void QueryTokenListUsingRegExp(
RML_RE &RegExp, vector<DWORD> &MatchWords,
bool negated =
false,
177 bool ignore_diacritics =
false)
const;
180 void QueryTokenListUniversal(vector<DWORD> &MatchWords)
const;
183 void DumpStorage(FILE *output)
const;
bool operator==(const COutputToken &X) const
Definition: IndexSet.h:43
Definition: ddcMMap.h:226
const char * GetIndexItemStr(const T &W) const
this function returns a string(which was indexed by DDC) by an instance of CIndexItem or CItemIndexFo...
Definition: IndexSet.h:90
Definition: StringIndexator.h:121
string m_InterpStr
the interpretation of the token
Definition: IndexSet.h:35
Definition: ddcMMap.h:733
COutputToken()
Definition: IndexSet.h:39
map< string, CShortOccurCache > CShortOccurCacheMap
a type for index string to its occurrences
Definition: ConcCommon.h:477
ddcFileOrMMap m_StorageFile
a file for index storage
Definition: IndexSet.h:59
COutputToken(string WordStr, BYTE Highlight)
Definition: IndexSet.h:41
Definition: morph_const.h:107
string m_Name
the main name of the index set, for example "Token", "MorphPattern", "Thes", "Chunk"...
Definition: IndexSet.h:83
unsigned char BYTE
Definition: utilit.h:94
CIndexSetForLoadingStage is a part of DDC which is used only on the loading stage.
Definition: IndexSetForLoadingStage.h:126
Definition: ConcCommon.h:318
Definition: pcre_rml.h:41
string m_TokenStr
the token itself
Definition: IndexSet.h:33
DWORD CTokenNo
integer type CTokenNo is used to refer an index of a token in the corpus
Definition: ConcCommon.h:63
string m_ShortName
a short name of the index set, for example "m", "w", "t", "c"
Definition: IndexSet.h:86
uint32_t DWORD
Definition: utilit.h:105
Definition: IndexSetForQueryingStage.h:154
Definition: IndexSet.h:31
Definition: IndexSet.h:57
BYTE m_Highlight
how DDC should highlight this token in the output hit
Definition: IndexSet.h:37