21 #ifndef StringIndexator_h 22 #define StringIndexator_h 24 #include "../ConcordLib/IndexSet.h" 129 bool RegisterChunkIndex();
131 string GetSearchPeriodsFileName()
const;
133 bool DestroyIndices();
135 void ReadIndicesFromTheDisk();
137 void ClearStringIndices();
150 void IndexTokenFixLongColumns(
const size_t MaxLen,
const size_t nCols,
const char *InputLine,
char *Out);
185 bool RegisterStringIndices(
const string& IndicesStr);
188 bool RegisterIndexAliases(
const string& IndexAliasStr);
191 bool RegisterIndexAlias(
const string& AliasFrom,
const string& AliasTo);
194 void RegisterIndexAlias(
const string& AliasFrom,
CStringIndexSet* idx);
197 void SetPath(
string Path);
199 string GetIndicesString()
const;
201 string GetIndexAliasString()
const;
204 size_t GetSearchPeriodsCount()
const;
209 bool StartIndexing(
string Path);
211 void TerminateIndexing();
213 bool FinalSaveAllIndices(
bool bAfterLoading);
215 bool AddInputLoadIndexToMemoryLoadIndex();
217 bool AddMemoryLoadIndexToMainLoadIndex();
219 bool SaveMemoryLoadIndex();
const CTokenNo & GetSearchPeriod(size_t i) const
get a corpus period by an index
Definition: StringIndexator.h:206
void IndexOneToken(const size_t MaxIndexLineLength, size_t ncols, const char *InputLine)
Definition: toktrim.cc:63
Definition: StringIndexator.h:111
bool m_bMemoryMap
whether to directly mmap() index file data (default=false)
Definition: StringIndexator.h:164
Definition: StringIndexator.h:121
vector< CStringIndexSet * > ColumnMap
Definition: StringIndexator.h:112
string m_Path
where all indices are stored
Definition: StringIndexator.h:161
size_t m_MaxRegExpExpansionSize
the maximal number of index items which can be included in an expansion set of one regular expression...
Definition: StringIndexator.h:176
const char ddc_archive_stub[]
Definition: StringIndexator.h:109
vector< CStringIndexSet * > m_Indices
the registered indices, by positional index
Definition: StringIndexator.h:167
IndexAliasMap m_IndexAlias
declared index aliases (FROM -> TO); not really used at runtime
Definition: StringIndexator.h:170
vector< CTokenNo > m_SearchPeriods
search periods of the corpus
Definition: StringIndexator.h:126
map< string, CStringIndexSet * > IndexMap
typedef for index symbol table
Definition: StringIndexator.h:157
Definition: morph_const.h:107
DWORD CTokenNo
integer type CTokenNo is used to refer an index of a token in the corpus
Definition: ConcCommon.h:63
IndexMap m_IndexMap
all registered indices, keyed by long-name, short-name, or label (LABEL -> INDEX) ...
Definition: StringIndexator.h:173
CTokenNo CorpusEndTokenNo
Definition: StringIndexator.h:114
vector< CStringIndexSet * > AbsentIndices
Definition: StringIndexator.h:113
Definition: IndexSet.h:57
map< string, string > IndexAliasMap
typedef for index alias maps
Definition: StringIndexator.h:154
CStringIndexSet * m_pChunkIndex
a quick reference to a chunk index, if CConcIndexator::m_bIndexChunks is on, otherwise null ...
Definition: StringIndexator.h:179