ddc
|
#include <StringIndexator.h>
Public Types | |
typedef map< string, string > | IndexAliasMap |
typedef for index alias maps More... | |
typedef map< string, CStringIndexSet * > | IndexMap |
typedef for index symbol table More... | |
Public Member Functions | |
CStringIndexator () | |
~CStringIndexator () | |
bool | RegisterStringIndices (const string &IndicesStr) |
read index declarations from a string and register them More... | |
bool | RegisterIndexAliases (const string &IndexAliasStr) |
read index alias declarations from a string and register them; returns true iff all registrations were successful More... | |
bool | RegisterIndexAlias (const string &AliasFrom, const string &AliasTo) |
register a single index alias (low-level); returns true iff AliasTo resolves to a known index according to m_AliasMap More... | |
void | RegisterIndexAlias (const string &AliasFrom, CStringIndexSet *idx) |
register a single index label or alias (lowest-level); if idx is NULL, any existing entry for AliasFrom will be deleted More... | |
void | SetPath (string Path) |
set the path to the indices More... | |
string | GetIndicesString () const |
return all registered index declarations, in opt-file syntax More... | |
string | GetIndexAliasString () const |
return all registered index aliases, in opt-file syntax More... | |
size_t | GetSearchPeriodsCount () const |
return the number of corpus periods More... | |
const CTokenNo & | GetSearchPeriod (size_t i) const |
get a corpus period by an index More... | |
bool | StartIndexing (string Path) |
call CreateTempFiles for all registered indices More... | |
void | TerminateIndexing () |
call DeleteTempFiles for all registered indices More... | |
bool | FinalSaveAllIndices (bool bAfterLoading) |
final saving all indices to disk (converting temp files to persistent) More... | |
bool | AddInputLoadIndexToMemoryLoadIndex () |
unites input index with memory index and clears input load index More... | |
bool | AddMemoryLoadIndexToMainLoadIndex () |
unites memory index with main index and clears memory load index More... | |
bool | SaveMemoryLoadIndex () |
store memory load index on the disk More... | |
CStringIndexSet * | GetIndexByName (const string &Name) |
return a pointer to the index by CStringIndexSet::m_Name (linear search) More... | |
CStringIndexSet * | GetIndexByNameOrShortName (const string &Name) |
return a pointer to the index by CStringIndexSet::m_Name or CStringIndexSet::m_ShortName (linear search) More... | |
CStringIndexSet * | GetIndexByAlias (const string &Alias) const |
return a pointer to the index by long-name, short-name, or alias (most abstract, uses m_IndexMap) More... | |
CStringIndexSet * | GetTokenIndex () |
return the first index that normally contains tokens themselves More... | |
const CStringIndexSet * | GetTokenIndex () const |
return the first index that normally contains tokens themselves More... | |
Public Attributes | |
string | m_Path |
where all indices are stored More... | |
bool | m_bMemoryMap |
whether to directly mmap() index file data (default=false) More... | |
vector< CStringIndexSet * > | m_Indices |
the registered indices, by positional index More... | |
IndexAliasMap | m_IndexAlias |
declared index aliases (FROM -> TO); not really used at runtime More... | |
IndexMap | m_IndexMap |
all registered indices, keyed by long-name, short-name, or label (LABEL -> INDEX) More... | |
size_t | m_MaxRegExpExpansionSize |
the maximal number of index items which can be included in an expansion set of one regular expression More... | |
CStringIndexSet * | m_pChunkIndex |
a quick reference to a chunk index, if CConcIndexator::m_bIndexChunks is on, otherwise null More... | |
Protected Member Functions | |
bool | RegisterChunkIndex () |
register chunk index (chunks:NP, VP etc) More... | |
string | GetSearchPeriodsFileName () const |
return the file name for search periods More... | |
bool | DestroyIndices () |
call DestroyIndexSet for all registered indices More... | |
void | ReadIndicesFromTheDisk () |
call ReadFromTheDisk for all registered indices More... | |
void | ClearStringIndices () |
clear m_Indices More... | |
void | IndexOneToken (CTokenIndexator *document, const char *Line, bool tryFixErrors=true) |
index one token and its properies (delimited by CConcCommon.h::globalFieldDelimeter) More... | |
void | IndexTokenFixLongColumns (const size_t MaxLen, const size_t nCols, const char *InputLine, char *Out) |
moo: truncate long columns in InputLine, storing result in Out More... | |
Protected Attributes | |
vector< CTokenNo > | m_SearchPeriods |
search periods of the corpus More... | |
CStringIndexator contains a set of all token indices and corpus periods. It contains also the main path to the project file.
typedef map<string,string> CStringIndexator::IndexAliasMap |
typedef for index alias maps
typedef map<string,CStringIndexSet*> CStringIndexator::IndexMap |
typedef for index symbol table
CStringIndexator::CStringIndexator | ( | ) |
References m_bMemoryMap, m_MaxRegExpExpansionSize, m_Path, and m_pChunkIndex.
CStringIndexator::~CStringIndexator | ( | ) |
|
protected |
register chunk index (chunks:NP, VP etc)
References ChunkIndexName, GetIndexByAlias(), CStringIndexSet::InitIndexSet(), m_Indices, and m_pChunkIndex.
Referenced by CConcordance::LoadOptionsFromString().
|
protected |
return the file name for search periods
References m_Path, and MakeFName().
Referenced by CConcIndexator::DestroyIndex(), FinalSaveAllIndices(), and ReadIndicesFromTheDisk().
|
protected |
call DestroyIndexSet for all registered indices
References m_Indices.
Referenced by CConcIndexator::DestroyIndex().
|
protected |
call ReadFromTheDisk for all registered indices
References GetSearchPeriodsFileName(), m_Indices, m_SearchPeriods, and ReadVector().
|
protected |
clear m_Indices
References m_Indices.
Referenced by RegisterStringIndices(), and ~CStringIndexator().
|
protected |
index one token and its properies (delimited by CConcCommon.h::globalFieldDelimeter)
References CTokenIndexator::AbsentIndices, CTokenIndexator::ColumnMap, CTokenIndexator::CorpusEndTokenNo, ddc_archive_stub, DDC_STATIC_BUFLEN, ddcLogWarn, Format(), globalFieldDelimeter, IndexTokenFixLongColumns(), CIndexSetForLoadingStage::InsertToInputLoadIndex(), CIndexSetForLoadingStage::m_bUseItemStorage, and m_Indices.
Referenced by CConcIndexator::IndexMorphXml(), CConcIndexator::IndexOneTableTextArea(), and CConcIndexator::IndexTextOrHtmlFile().
|
protected |
moo: truncate long columns in InputLine, storing result in Out
References ddcLogWarn, Format(), globalFieldDelimeter, and stringSplit().
Referenced by IndexOneToken().
bool CStringIndexator::RegisterStringIndices | ( | const string & | IndicesStr | ) |
read index declarations from a string and register them
References ClearStringIndices(), ErrorMessage(), GetIndexByAlias(), CStringIndexSet::InitIndexSet(), m_Indices, Name, RegisterIndexAlias(), Trim(), and StringTokenizer::val().
Referenced by CConcordance::LoadOptionsFromString().
bool CStringIndexator::RegisterIndexAliases | ( | const string & | IndexAliasStr | ) |
read index alias declarations from a string and register them; returns true iff all registrations were successful
References ddcLogWarn, Format(), StringTokenizer::next_token(), RegisterIndexAlias(), Trim(), and StringTokenizer::val().
Referenced by CConcordance::LoadOptionsFromString().
bool CStringIndexator::RegisterIndexAlias | ( | const string & | AliasFrom, |
const string & | AliasTo | ||
) |
register a single index alias (low-level); returns true iff AliasTo resolves to a known index according to m_AliasMap
References ddcLogWarn, Format(), GetIndexByAlias(), and m_IndexAlias.
Referenced by RegisterIndexAliases(), and RegisterStringIndices().
void CStringIndexator::RegisterIndexAlias | ( | const string & | AliasFrom, |
CStringIndexSet * | idx | ||
) |
register a single index label or alias (lowest-level); if idx is NULL, any existing entry for AliasFrom will be deleted
References ddcLogWarn, Format(), GetIndexByAlias(), m_IndexMap, and CStringIndexSet::m_Name.
void CStringIndexator::SetPath | ( | string | Path | ) |
set the path to the indices
References m_Path.
string CStringIndexator::GetIndicesString | ( | ) | const |
return all registered index declarations, in opt-file syntax
References ChunkIndexName, Format(), m_Indices, and Trim().
Referenced by CConcordance::LoadOptionsFromString(), and CConcordance::SaveOptionsToString().
string CStringIndexator::GetIndexAliasString | ( | ) | const |
return all registered index aliases, in opt-file syntax
References m_IndexAlias.
Referenced by CConcordance::LoadOptionsFromString(), and CConcordance::SaveOptionsToString().
size_t CStringIndexator::GetSearchPeriodsCount | ( | ) | const |
return the number of corpus periods
References m_SearchPeriods.
Referenced by CIndexSetForQueryingStage::BuildPeriodsDivisionAndCompress(), CStringIndexSet::ConvertLoadIndexToWorkingIndex(), CDDCLeafServer::handle__info(), CIndexSetForQueryingStage::LoadPeriodDivision(), CIndexSetForQueryingStage::ReadAllOccurrences(), CIndexSetForQueryingStage::WritePeriodsDivision(), and ConcIndexatorInvoker::WriteTimeStatistics().
|
inline |
get a corpus period by an index
References Name.
Referenced by CIndexSetForQueryingStage::AddOccurs(), CIndexSetForQueryingStage::BuildPeriodsDivisionAndCompress(), CStringIndexSet::ConvertLoadIndexToWorkingIndex(), and CDDCLeafServer::handle__info().
bool CStringIndexator::StartIndexing | ( | string | Path | ) |
call CreateTempFiles for all registered indices
References m_Indices, and m_Path.
Referenced by CConcIndexator::StartIndexing().
void CStringIndexator::TerminateIndexing | ( | ) |
call DeleteTempFiles for all registered indices
References m_Indices.
Referenced by CConcIndexator::TerminateIndexing().
bool CStringIndexator::FinalSaveAllIndices | ( | bool | bAfterLoading | ) |
final saving all indices to disk (converting temp files to persistent)
References GetSearchPeriodsFileName(), m_Indices, m_SearchPeriods, and WriteVector().
Referenced by CConcIndexator::CreateAsUnion(), ConcIndexatorInvoker::FinalizeIndex(), and CConcIndexator::SplitProject().
bool CStringIndexator::AddInputLoadIndexToMemoryLoadIndex | ( | ) |
unites input index with memory index and clears input load index
References m_Indices.
Referenced by ConcIndexatorInvoker::AddInputLoadIndexToMemoryLoadIndexWrapper(), ConcIndexatorInvoker::FinalizeIndex(), and ConcIndexatorInvoker::SaveLoadIndexToDisk().
bool CStringIndexator::AddMemoryLoadIndexToMainLoadIndex | ( | ) |
unites memory index with main index and clears memory load index
References m_Indices.
Referenced by ConcIndexatorInvoker::FinalizeIndex(), and ConcIndexatorInvoker::SaveLoadIndexToDisk().
bool CStringIndexator::SaveMemoryLoadIndex | ( | ) |
store memory load index on the disk
References m_Indices.
Referenced by ConcIndexatorInvoker::FinalizeIndex(), and ConcIndexatorInvoker::SaveLoadIndexToDisk().
CStringIndexSet * CStringIndexator::GetIndexByName | ( | const string & | Name | ) |
return a pointer to the index by CStringIndexSet::m_Name (linear search)
References m_Indices.
CStringIndexSet * CStringIndexator::GetIndexByNameOrShortName | ( | const string & | Name | ) |
return a pointer to the index by CStringIndexSet::m_Name or CStringIndexSet::m_ShortName (linear search)
CStringIndexSet * CStringIndexator::GetIndexByAlias | ( | const string & | Alias | ) | const |
return a pointer to the index by long-name, short-name, or alias (most abstract, uses m_IndexMap)
References m_IndexMap.
Referenced by CQueryTokenNode::BuildRegExp(), CQCountKeyExprToken::Compile(), CQFContextSort::Compile(), CreateMorphIndex(), CQueryTokenNode::EvaluateWithoutHits(), CQueryTokenNode::GetIndex(), CQToken::IndexName(), CConcordance::LoadOptionsFromString(), RegisterChunkIndex(), RegisterIndexAlias(), and RegisterStringIndices().
CStringIndexSet * CStringIndexator::GetTokenIndex | ( | ) |
return the first index that normally contains tokens themselves
References m_Indices.
Referenced by CQFContextSort::Compile().
const CStringIndexSet * CStringIndexator::GetTokenIndex | ( | ) | const |
return the first index that normally contains tokens themselves
References m_Indices.
|
protected |
search periods of the corpus
Referenced by CConcIndexator::CalculateSearchPeriods(), CConcIndexator::CreateAsUnion(), FinalSaveAllIndices(), GetSearchPeriodsCount(), and ReadIndicesFromTheDisk().
string CStringIndexator::m_Path |
where all indices are stored
Referenced by CIndexSetForQueryingStage::AssertHasPath(), CConcIndexator::CreateAsUnion(), CConcIndexator::CreateMorphIndexWrapper(), CStringIndexator(), CConcIndexator::DestroyIndex(), ConcIndexatorInvoker::FinalizeIndex(), CIndexSetForQueryingStage::GetFileNameForInfos(), CIndexSetForQueryingStage::GetOccHdrFileName(), CIndexSetForQueryingStage::GetOccursFileName(), CIndexSetForQueryingStage::GetPeriodsDivisionFileName(), GetSearchPeriodsFileName(), CStringIndexSet::GetStorageFileName(), CIndexSetForQueryingStage::GetSuffixFileName(), CDDCLeafServer::handle__info(), CConcordance::InitDefaultOptions(), CConcordance::LoadOptionsFromString(), SetPath(), CConcIndexator::SplitProject(), CConcIndexator::StartIndexing(), StartIndexing(), CConcIndexator::TerminateIndexing(), and ConcIndexatorInvoker::WriteTimeStatistics().
bool CStringIndexator::m_bMemoryMap |
whether to directly mmap() index file data (default=false)
Referenced by CStringIndexator(), CDDCLeafServer::handle__info(), CConcordance::InitDefaultOptions(), CIndexSetForQueryingStage::LoadIndexSet(), CConcordance::LoadOptionsFromString(), CIndexSetForQueryingStage::LoadPeriodDivision(), CStringIndexSet::OpenStorageFile(), CStringIndexSet::ReadFromTheDisk(), and CConcordance::SaveOptionsToString().
vector<CStringIndexSet*> CStringIndexator::m_Indices |
the registered indices, by positional index
Referenced by AddInputLoadIndexToMemoryLoadIndex(), AddMemoryLoadIndexToMainLoadIndex(), ClearStringIndices(), CConcIndexator::CreateAsUnion(), CIndexDocument::CustomColumnMap(), CIndexDocument::DefaultColumnMap(), DestroyIndices(), FinalSaveAllIndices(), GetIndexByName(), GetIndexByNameOrShortName(), GetIndicesString(), GetTokenIndex(), CDDCLeafServer::handle__info(), IndexOneToken(), ReadIndicesFromTheDisk(), RegisterChunkIndex(), RegisterStringIndices(), CConcIndexator::RollbackIndexOneFile(), SaveMemoryLoadIndex(), CConcIndexator::SplitProject(), StartIndexing(), and TerminateIndexing().
IndexAliasMap CStringIndexator::m_IndexAlias |
declared index aliases (FROM -> TO); not really used at runtime
Referenced by GetIndexAliasString(), and RegisterIndexAlias().
IndexMap CStringIndexator::m_IndexMap |
all registered indices, keyed by long-name, short-name, or label (LABEL -> INDEX)
Referenced by GetIndexByAlias(), CDDCLeafServer::handle__info(), and RegisterIndexAlias().
size_t CStringIndexator::m_MaxRegExpExpansionSize |
the maximal number of index items which can be included in an expansion set of one regular expression
Referenced by CStringIndexator(), CConcordance::LoadOptionsFromString(), CStringIndexSet::QueryTokenList(), CStringIndexSet::QueryTokenListUsingRegExp(), CStringIndexSet::QueryTokenListWithLeftTruncation(), CStringIndexSet::QueryTokenListWithRightTruncation(), and CConcordance::SaveOptionsToString().
CStringIndexSet* CStringIndexator::m_pChunkIndex |
a quick reference to a chunk index, if CConcIndexator::m_bIndexChunks is on, otherwise null
Referenced by CQTokChunk::Create(), CQueryTokenNode::CreateChunkPattern(), CStringIndexator(), CQueryTokenNode::EvaluateWithoutHits(), CConcIndexator::IndexOneTableTextArea(), and RegisterChunkIndex().