ddc
|
#include <IndexSet.h>
Public Member Functions | |
template<class T > | |
const char * | GetIndexItemStr (const T &W) const |
this function returns a string(which was indexed by DDC) by an instance of CIndexItem or CItemIndexForLoading More... | |
CStringIndexSet (const CStringIndexator *pParent) | |
~CStringIndexSet () | |
string | GetStorageFileName () const |
return file name for storage More... | |
void | CloseStorageFile () |
close storage file More... | |
void | InitIndexSet (string Name, string ShortName, bool bCreateItemStorage, bool bCompress) |
initialize all class slots More... | |
void | ReadFromTheDisk () |
read index from the disk More... | |
bool | DestroyIndexSet () |
clear all vectors of the index and removes index files More... | |
bool | WriteToFile (bool bAfterLoading) |
write index to file More... | |
void | UnionIndexSets (const vector< CStringIndexSet *> &IndexSets, const vector< size_t > &TokenCounts) |
build union of multiple indices , throws CExpc if fails (new) More... | |
void | CreateSplitPartitions (vector< CStringIndexSet *> &Partitions, const vector< CTokenNo > &EndTokenNo) const |
partition this index into Paritions[] on EndTokenNo[] (exclusive) More... | |
void | EnsureSuffixIndex () |
ensure that m_rIndex is non-empty, populating it from m_Index if required (called during index compilation) More... | |
bool | GetTokensFromStorage (const size_t start_offset, const size_t end_offset, vector< COutputToken > &Tokens) const |
return sequence of tokens(strings) [start_offset, end_offset] More... | |
DWORD | GetTokenIndexId (CTokenNo TokenNo) const |
ddcVecFile< CIndexItem >::const_iterator | GetTypeIndexIdLowerBoundIter (const string &ValueStr) const |
ddcVecFile< CIndexItem >::const_iterator | GetTypeIndexIdUpperBoundIter (const string &ValueStr) const |
DWORD | GetTypeIndexIdLowerBound (const string &ValueStr) const |
DWORD | GetTypeIndexId (const string &ValueStr) const |
gets index-local item-id of string type ValueStr, or DWORD_MAX if not in index More... | |
bool | GetContextBounds (CDDCFilterWithBounds &Filter, const string &LoValue, const string &HiValue) const |
populate bounds for context-sort operator More... | |
void | FindOccurrences (const vector< DWORD > &IndexItems, const size_t PeriodNo, vector< CTokenNo > &occurrences, CShortOccurCacheMap *pCaches, vector< int > &CacheIds) const |
find all occurrences of index items in subcorpora PeriodNo, using cache pCaches More... | |
void | FindChunkOccurrences (const vector< DWORD > &IndexItems, vector< CTokenNo > &occurrences, vector< DWORD > &ChunkLengths, size_t PeriodNo, CShortOccurCacheMap *pCaches, vector< int > &CacheIds) const |
find all occurrences of index items in subcorpora PeriodNo, using cache pCaches (if occurrences are written by chunks) More... | |
void | QueryTokenList (const string &WordForm, vector< DWORD > &MatchWords) const |
search for a string "WordForm", and add it to "MatchWords", if it is found More... | |
void | QueryTokenListWithRightTruncation (const string &Prefix, vector< DWORD > &MatchWords) const |
search for all strings, which begin with "Prefix", and add their IDs to "MatchWords" More... | |
void | QueryTokenListWithLeftTruncation (const string &Suffix, vector< DWORD > &MatchWords) const |
search for all strings, which end with "Suffix", adding their IDs to "MatchWords" (requires m_rIndex, introduced in v2.1.21) More... | |
void | QueryTokenListUsingRegExp (RML_RE &RegExp, vector< DWORD > &MatchWords, bool negated=false, bool ignore_diacritics=false) const |
search for all index items, which satisfy regular expession "RegExp", and add them to "MatchWords" More... | |
void | QueryTokenListUniversal (vector< DWORD > &MatchWords) const |
populate "MatchWords" with all index item indices More... | |
void | DumpStorage (FILE *output) const |
print the string representation of the whole storage to stdout More... | |
![]() | |
CIndexSetForLoadingStage () | |
virtual | ~CIndexSetForLoadingStage () |
bool | CreateTempFiles (string Path) |
creates temporary files for indexing More... | |
bool | DeleteTempFiles () |
deletes temporary files after indexing More... | |
size_t | GetMemoryLoadIndexItemsCount () const |
gets the number of items in memory load index More... | |
bool | SaveMemoryLoadIndex () |
saves memory index More... | |
bool | AddInputLoadIndexToMemoryLoadIndex () |
add the input load index to the memory load index and clear the input load index More... | |
void | SortInputAndMemoryIndices () |
sort the input and the memory load indices More... | |
bool | AddMemoryLoadIndexToMainLoadIndex () |
add the memory load index to the main load index and clear the memory load index More... | |
void | InsertToInputLoadIndex (const char *Str, size_t StrLen, const vector< CTokenNo > &occurrences) |
updates input or memory load index with one string More... | |
void | RollbackLoadIndex (CTokenNo startTrimTokenNo) |
rolls back buffered index data starting at startTrimTokenNo More... | |
void | PrintLoadIndexStats (FILE *f=stderr) const |
debug: print input/memory load index stats More... | |
![]() | |
CIndexSetForQueryingStage (const CStringIndexator *pParent) | |
virtual | ~CIndexSetForQueryingStage () |
bool | DestroyIndexSet () |
destroy index set and remove index files More... | |
void | ReadAllOccurrences (size_t IndexItemNo, vector< CTokenNo > &Occurs) const |
reads all occurrences of IndexItemNo (this function can allocate much memory; it should be used carefully) More... | |
Public Attributes | |
string | m_Name |
the main name of the index set, for example "Token", "MorphPattern", "Thes", "Chunk"... More... | |
string | m_ShortName |
a short name of the index set, for example "m", "w", "t", "c" More... | |
![]() | |
bool | m_bUseItemStorage |
if true, then the program creates and uses a storage for this index More... | |
ddcVecFile< char > | m_StringBuffer |
a buffer for storing index strings (compile-time) More... | |
![]() | |
ddcVecFile< CIndexItem > | m_Index |
the main index(from strings to the ordered list of their occurrences) More... | |
CSuffixIndex | m_rIndex |
optional auxiliary index for suffix-queries; ItemIds lexicographically sorted by reverse string-value More... | |
PeriodsDivisionMapT | m_EndPeriodOffsets |
all corpus period divisions for the long occurrence lists More... | |
const CStringIndexator * | m_pParent |
a pointer to the collection of indices, which contains a reference to this index More... | |
bool | m_bCompressOccurrences |
if true, then the occurrences should be compresses (up to 30% for huge corpora) More... | |
Private Member Functions | |
bool | ConvertLoadIndexToWorkingIndex () |
convert a temporal index set to the work index More... | |
void | CreateUnionTokenStorages (const vector< CStringIndexSet *> &IndexSets, const vector< size_t > &TokenCounts, const vector< vector< DWORD > > &xlateItemId) |
make concatenation of multiple storages (new) More... | |
void | OpenStorageFile () |
open storage file More... | |
string | GetName () const |
return m_Name (an implementation of pure member CIndexSetForLoadingStage::GetName ) More... | |
bool | ConvertTempStorageToPersistent () |
converts temporary index storage to persistent one (replacing a reference to m_StringBuffer by a index item no) More... | |
Private Attributes | |
ddcFileOrMMap | m_StorageFile |
a file for index storage More... | |
Additional Inherited Members | |
![]() | |
size_t | AddItemStrToBuffer (const char *Str, size_t StrLen) |
add a string to m_StringBuffer More... | |
![]() | |
void | AssertHasPath () const |
return true, if the project path is initialized More... | |
void | AddOccurs (size_t IndexItemNo, const bool bOneOccurrence, const size_t StartOccurNo, const size_t EndOccurNo, vector< CTokenNo > &Occurs, size_t PeriodNo, COccurrBuffer &OccursBuffer, CShortOccurCache *pCacheByIndexSet, int &CacheId) const |
a function for reading occurrences for one index item More... | |
string | GetOccursFileName () const |
return the file name for the file occurrences More... | |
string | GetOccHdrFileName () const |
return the name of file for m_Index More... | |
string | GetSuffixFileName () const |
return the name of file for m_rIndex (for suffix-queries) More... | |
string | GetPeriodsDivisionFileName () const |
return the name of file for occurrences period division More... | |
string | GetFileNameForInfos () const |
return the name of file for CIndexSetForLoadingStage::m_StringBuffer More... | |
file_off_t | GetOccurrsFileSize () const |
return the size of the file for occurrences More... | |
size_t | GetStartOccurNo (size_t IndexNo) const |
get the offset of the first occurrence of index item no IndexNo in the file of occurrences(m_OccursFp) More... | |
bool | BuildPeriodsDivisionAndCompress (const DWORD TokenId, vector< CTokenNo > &InputTokens) |
build a period division for one index item More... | |
bool | AddOneIndexItem (CItemIndexForLoading &M, FILE *res_fp, size_t &CurrPositionInResFile, const CTokenNo EndTokeNo) |
write one index item to result file More... | |
bool | WritePeriodsDivision () |
write index item's period division to disk More... | |
bool | LoadIndexSet (bool bLoadHeaderOfOccurrences=true) |
load index set from binaries More... | |
![]() | |
FILE * | m_TempStorageFile |
a temporal file for index storage More... | |
string | m_TempStorageFileName |
a temporary file, where the index storage is stored More... | |
string | m_MainOccurTempFileName |
a temporary file, where the main index is stored More... | |
Class CStringIndexSet is the upmost implementation of one index set. The main functions deals with the searching of strings in index and with the retrieving its occurrences. On the other hand, this class is an inheritor of CIndexSetForLoadingStage and CIndexSetForQueryingStage, and therefore it provides a connection between them during the load phase (for example CStringIndexSet::ConvertLoadIndexToWorkingIndex).
CStringIndexSet::CStringIndexSet | ( | const CStringIndexator * | pParent | ) |
CStringIndexSet::~CStringIndexSet | ( | ) |
|
private |
convert a temporal index set to the work index
References CIndexSetForQueryingStage::AddOneIndexItem(), ddcVecFile< T >::clear(), ddcMapFile< KeyT, ValT >::clear(), ddcVecFile< T >::empty(), CItemIndexForLoading::FreeOccurs(), CIndexSetForQueryingStage::GetOccursFileName(), CStringIndexator::GetSearchPeriod(), CStringIndexator::GetSearchPeriodsCount(), CItemIndexForLoading::InitOccurs(), CIndexSetForQueryingStage::m_EndPeriodOffsets, CIndexSetForQueryingStage::m_Index, CIndexSetForLoadingStage::m_MainOccurTempFileName, CIndexSetForQueryingStage::m_pParent, CItemIndexForLoading::ReadFromTemporalFile(), and CIndexSetForQueryingStage::WritePeriodsDivision().
Referenced by WriteToFile().
|
private |
make concatenation of multiple storages (new)
References CloseStorageFile(), Format(), GetStorageFileName(), CIndexSetForLoadingStage::m_bUseItemStorage, OpenStorageFile(), and SaveOnePartOfUnionTokenStorage().
Referenced by UnionIndexSets().
|
private |
open storage file
References CloseStorageFile(), GetStorageFileName(), CStringIndexator::m_bMemoryMap, CIndexSetForQueryingStage::m_pParent, m_StorageFile, and ddcFileOrMMap::Open().
Referenced by CreateUnionTokenStorages(), ReadFromTheDisk(), and WriteToFile().
|
privatevirtual |
return m_Name (an implementation of pure member CIndexSetForLoadingStage::GetName )
Implements CIndexSetForLoadingStage.
References m_Name.
Referenced by FindChunkOccurrences(), FindOccurrences(), and GetStorageFileName().
|
private |
converts temporary index storage to persistent one (replacing a reference to m_StringBuffer by a index item no)
References ddcVecFile< T >::empty(), ddcVecFile< T >::ensureVec(), GetStorageFileName(), CIndexSetForLoadingStage::m_bUseItemStorage, CIndexSetForQueryingStage::m_Index, CIndexSetForLoadingStage::m_TempStorageFile, CIndexSetForLoadingStage::m_TempStorageFileName, and ddcVecFile< T >::m_vec.
Referenced by WriteToFile().
|
inline |
this function returns a string(which was indexed by DDC) by an instance of CIndexItem or CItemIndexForLoading
References Name.
Referenced by CreateMorphIndex(), CreateSplitPartitions(), DumpStorage(), FindChunkOccurrences(), GetTokensFromStorage(), GetTypeIndexId(), CConcSession::InitSortByContext(), QueryTokenList(), and QueryTokenListUsingRegExp().
string CStringIndexSet::GetStorageFileName | ( | ) | const |
return file name for storage
References GetName(), CStringIndexator::m_Path, CIndexSetForQueryingStage::m_pParent, and MakeFName().
Referenced by ConvertTempStorageToPersistent(), CreatePartitionStorage(), CreateSplitPartitions(), CreateUnionTokenStorages(), DestroyIndexSet(), DumpStorage(), and OpenStorageFile().
void CStringIndexSet::CloseStorageFile | ( | ) |
close storage file
References ddcFileOrMMap::Close(), and m_StorageFile.
Referenced by CreateUnionTokenStorages(), DestroyIndexSet(), OpenStorageFile(), and ~CStringIndexSet().
void CStringIndexSet::InitIndexSet | ( | string | Name, |
string | ShortName, | ||
bool | bCreateItemStorage, | ||
bool | bCompress | ||
) |
initialize all class slots
References CIndexSetForQueryingStage::m_bCompressOccurrences, CIndexSetForLoadingStage::m_bUseItemStorage, m_Name, m_ShortName, and Name.
Referenced by CStringIndexator::RegisterChunkIndex(), and CStringIndexator::RegisterStringIndices().
void CStringIndexSet::ReadFromTheDisk | ( | ) |
read index from the disk
References CIndexSetForQueryingStage::GetFileNameForInfos(), CIndexSetForQueryingStage::LoadIndexSet(), CStringIndexator::m_bMemoryMap, CIndexSetForLoadingStage::m_bUseItemStorage, CIndexSetForQueryingStage::m_pParent, CIndexSetForLoadingStage::m_StringBuffer, ddcVecFile< T >::open(), and OpenStorageFile().
Referenced by CreateMorphIndex().
bool CStringIndexSet::DestroyIndexSet | ( | ) |
clear all vectors of the index and removes index files
References ddcVecFile< T >::close(), CloseStorageFile(), CIndexSetForQueryingStage::DestroyIndexSet(), FileExists(), GetStorageFileName(), and CIndexSetForLoadingStage::m_StringBuffer.
Referenced by CreateMorphIndex().
bool CStringIndexSet::WriteToFile | ( | bool | bAfterLoading | ) |
write index to file
References ConvertLoadIndexToWorkingIndex(), ConvertTempStorageToPersistent(), CIndexSetForLoadingStage::DeleteTempFiles(), EnsureSuffixIndex(), ddcVecFile< T >::ensureVec(), CIndexSetForQueryingStage::GetFileNameForInfos(), CIndexSetForQueryingStage::GetOccHdrFileName(), CIndexSetForQueryingStage::GetSuffixFileName(), CIndexSetForLoadingStage::m_bUseItemStorage, CIndexSetForQueryingStage::m_Index, CIndexSetForQueryingStage::m_rIndex, CIndexSetForLoadingStage::m_StringBuffer, ddcVecFile< T >::m_vec, OpenStorageFile(), and WriteVector().
Referenced by CreateMorphIndex().
void CStringIndexSet::UnionIndexSets | ( | const vector< CStringIndexSet *> & | IndexSets, |
const vector< size_t > & | TokenCounts | ||
) |
build union of multiple indices , throws CExpc if fails (new)
References CIndexSetForLoadingStage::AddItemStrToBuffer(), CIndexSetForQueryingStage::AddOneIndexItem(), ddcVecFile< T >::clear(), CreateUnionTokenStorages(), ddcLogDebug, ddcVecFile< T >::ensureVec(), Format(), CItemIndexForLoading::FreeOccurs(), CItemIndexForLoading::GetIndexItemOffset(), CItemIndexForLoading::GetOccurs(), CIndexSetForQueryingStage::GetOccursFileName(), CItemIndexForLoading::InitOccurs(), CIndexSetForLoadingStage::m_bUseItemStorage, CIndexSetForQueryingStage::m_Index, m_Name, m_ShortName, CIndexSetForLoadingStage::m_StringBuffer, ddcVecFile< T >::m_vec, CItemIndexForLoading::SetIndexItemOffset(), ddcVecFile< T >::size(), utrace, and CIndexSetForQueryingStage::WritePeriodsDivision().
Referenced by CConcIndexator::CreateAsUnion().
void CStringIndexSet::CreateSplitPartitions | ( | vector< CStringIndexSet *> & | Partitions, |
const vector< CTokenNo > & | EndTokenNo | ||
) | const |
partition this index into Paritions[] on EndTokenNo[] (exclusive)
References CIndexSetForQueryingStage::AddOneIndexItem(), ClearVector(), CreatePartitionStorage(), ddcLogTrace, Format(), GetIndexItemStr(), CIndexSetForQueryingStage::GetOccursFileName(), GetStorageFileName(), CIndexSetForLoadingStage::m_bUseItemStorage, CIndexSetForQueryingStage::m_Index, m_Name, CIndexSetForLoadingStage::m_StringBuffer, ddcVecFile< T >::m_vec, CIndexSetForQueryingStage::ReadAllOccurrences(), ddcVecFile< T >::size(), and CIndexSetForQueryingStage::WritePeriodsDivision().
Referenced by CConcIndexator::SplitProject().
void CStringIndexSet::EnsureSuffixIndex | ( | ) |
ensure that m_rIndex is non-empty, populating it from m_Index if required (called during index compilation)
References ddcVecFile< T >::ensureVec(), CIndexSetForQueryingStage::m_Index, CIndexSetForQueryingStage::m_rIndex, CIndexSetForLoadingStage::m_StringBuffer, ddcVecFile< T >::m_vec, and ddcVecFile< T >::size().
Referenced by WriteToFile().
bool CStringIndexSet::GetTokensFromStorage | ( | const size_t | start_offset, |
const size_t | end_offset, | ||
vector< COutputToken > & | Tokens | ||
) | const |
return sequence of tokens(strings) [start_offset, end_offset]
References GetIndexItemStr(), CIndexSetForLoadingStage::m_bUseItemStorage, CIndexSetForQueryingStage::m_Index, m_StorageFile, ddcFileOrMMap::ReadBuffer(), and ddcVecFile< T >::size().
Referenced by CConcordance::DumpFileIndexTabs(), and CConcSession::GetTokensFromStorageByBreak().
gets index-local item-id of token number TokenNo
CIndexItem TokenItem = m_Index[ItemId]
is the indexed attribute for token number TokenNo, or DWORD_MAX if an error occurs (e.g. no such token) References CIndexSetForLoadingStage::m_bUseItemStorage, CIndexSetForQueryingStage::m_Index, m_StorageFile, ddcFileOrMMap::ReadBuffer(), and ddcVecFile< T >::size().
Referenced by CConcSession::InitSortByContext().
ddcVecFile< CIndexItem >::const_iterator CStringIndexSet::GetTypeIndexIdLowerBoundIter | ( | const string & | ValueStr | ) | const |
gets local m_Index iterator (lower-bound) for type ValueStr
References ddcVecFile< T >::begin(), ddcVecFile< T >::end(), ddcVecFile< T >::m_data, CIndexSetForQueryingStage::m_Index, and CIndexSetForLoadingStage::m_StringBuffer.
Referenced by GetContextBounds(), GetTypeIndexId(), GetTypeIndexIdLowerBound(), and QueryTokenList().
ddcVecFile< CIndexItem >::const_iterator CStringIndexSet::GetTypeIndexIdUpperBoundIter | ( | const string & | ValueStr | ) | const |
gets local m_Index iterator (upper bound) for string type ValueStr
References ddcVecFile< T >::begin(), ddcVecFile< T >::end(), ddcVecFile< T >::m_data, CIndexSetForQueryingStage::m_Index, and CIndexSetForLoadingStage::m_StringBuffer.
Referenced by GetContextBounds().
DWORD CStringIndexSet::GetTypeIndexIdLowerBound | ( | const string & | ValueStr | ) | const |
gets index-local item-id (lower bound) of string type ValueStr
References ddcVecFile< T >::begin(), GetTypeIndexIdLowerBoundIter(), and CIndexSetForQueryingStage::m_Index.
Referenced by CConcSession::SortKeyLB().
DWORD CStringIndexSet::GetTypeIndexId | ( | const string & | ValueStr | ) | const |
gets index-local item-id of string type ValueStr, or DWORD_MAX if not in index
References ddcVecFile< T >::begin(), ddcVecFile< T >::end(), GetIndexItemStr(), GetTypeIndexIdLowerBoundIter(), and CIndexSetForQueryingStage::m_Index.
bool CStringIndexSet::GetContextBounds | ( | CDDCFilterWithBounds & | Filter, |
const string & | LoValue, | ||
const string & | HiValue | ||
) | const |
populate bounds for context-sort operator
References CHitSortKey::assign(), ddcVecFile< T >::begin(), CDDCFilterWithBounds::clear(), ddcVecFile< T >::end(), GetTypeIndexIdLowerBoundIter(), GetTypeIndexIdUpperBoundIter(), CHitSortKey::i, CDDCFilterWithBounds::m_bSet, CIndexSetForQueryingStage::m_Index, CDDCFilterWithBounds::m_KeyHi, CDDCFilterWithBounds::m_KeyLo, and ddcVecFile< T >::size().
Referenced by CQFContextSort::Compile().
void CStringIndexSet::FindOccurrences | ( | const vector< DWORD > & | IndexItems, |
const size_t | PeriodNo, | ||
vector< CTokenNo > & | occurrences, | ||
CShortOccurCacheMap * | pCaches, | ||
vector< int > & | CacheIds | ||
) | const |
find all occurrences of index items in subcorpora PeriodNo, using cache pCaches
References CIndexSetForQueryingStage::AddOccurs(), CIndexItem::GetEndOccurOffset(), GetName(), CIndexSetForQueryingStage::GetStartOccurNo(), CIndexItem::HasOneOccurrence(), CIndexSetForQueryingStage::m_Index, and SortOccurrences().
Referenced by CQueryTokenNode::EvaluateWithoutHits().
void CStringIndexSet::FindChunkOccurrences | ( | const vector< DWORD > & | IndexItems, |
vector< CTokenNo > & | occurrences, | ||
vector< DWORD > & | ChunkLengths, | ||
size_t | PeriodNo, | ||
CShortOccurCacheMap * | pCaches, | ||
vector< int > & | CacheIds | ||
) | const |
find all occurrences of index items in subcorpora PeriodNo, using cache pCaches (if occurrences are written by chunks)
References CIndexSetForQueryingStage::AddOccurs(), GetIndexItemStr(), GetName(), CIndexSetForQueryingStage::GetStartOccurNo(), and CIndexSetForQueryingStage::m_Index.
Referenced by CQueryTokenNode::EvaluateWithoutHits().
void CStringIndexSet::QueryTokenList | ( | const string & | WordForm, |
vector< DWORD > & | MatchWords | ||
) | const |
search for a string "WordForm", and add it to "MatchWords", if it is found
References ddcVecFile< T >::begin(), ddcVecFile< T >::end(), GetIndexItemStr(), GetTypeIndexIdLowerBoundIter(), CIndexSetForQueryingStage::m_Index, CStringIndexator::m_MaxRegExpExpansionSize, and CIndexSetForQueryingStage::m_pParent.
Referenced by CQueryTokenNode::CreateSetPattern(), and CQueryTokenNode::CreateStringPattern().
void CStringIndexSet::QueryTokenListWithRightTruncation | ( | const string & | Prefix, |
vector< DWORD > & | MatchWords | ||
) | const |
search for all strings, which begin with "Prefix", and add their IDs to "MatchWords"
References ddcVecFile< T >::begin(), ddcVecFile< T >::end(), ddcVecFile< T >::m_data, CIndexSetForQueryingStage::m_Index, CStringIndexator::m_MaxRegExpExpansionSize, CIndexSetForQueryingStage::m_pParent, and CIndexSetForLoadingStage::m_StringBuffer.
Referenced by CQueryTokenNode::CreatePrefixSetPattern().
void CStringIndexSet::QueryTokenListWithLeftTruncation | ( | const string & | Suffix, |
vector< DWORD > & | MatchWords | ||
) | const |
search for all strings, which end with "Suffix", adding their IDs to "MatchWords" (requires m_rIndex, introduced in v2.1.21)
References ddcVecFile< T >::begin(), ddcVecFile< T >::end(), ddcVecFile< T >::m_data, CIndexSetForQueryingStage::m_Index, CStringIndexator::m_MaxRegExpExpansionSize, CIndexSetForQueryingStage::m_pParent, CIndexSetForQueryingStage::m_rIndex, and CIndexSetForLoadingStage::m_StringBuffer.
Referenced by CQueryTokenNode::CreateSuffixSetPattern().
void CStringIndexSet::QueryTokenListUsingRegExp | ( | RML_RE & | RegExp, |
vector< DWORD > & | MatchWords, | ||
bool | negated = false , |
||
bool | ignore_diacritics = false |
||
) | const |
search for all index items, which satisfy regular expession "RegExp", and add them to "MatchWords"
References ddcIconv::convert(), GetIndexItemStr(), CIndexSetForQueryingStage::m_Index, CStringIndexator::m_MaxRegExpExpansionSize, CIndexSetForQueryingStage::m_pParent, RML_RE::PartialMatch(), RML_RE::pattern(), and ddcVecFile< T >::size().
Referenced by CQueryTokenNode::BuildRegExp().
void CStringIndexSet::QueryTokenListUniversal | ( | vector< DWORD > & | MatchWords | ) | const |
populate "MatchWords" with all index item indices
References CIndexSetForQueryingStage::m_Index, and ddcVecFile< T >::size().
void CStringIndexSet::DumpStorage | ( | FILE * | output | ) | const |
print the string representation of the whole storage to stdout
References GetIndexItemStr(), GetStorageFileName(), CIndexSetForLoadingStage::m_bUseItemStorage, and CIndexSetForQueryingStage::m_Index.
|
private |
a file for index storage
Referenced by CloseStorageFile(), GetTokenIndexId(), GetTokensFromStorage(), and OpenStorageFile().
string CStringIndexSet::m_Name |
the main name of the index set, for example "Token", "MorphPattern", "Thes", "Chunk"...
Referenced by CQFContextSort::Compile(), CConcIndexator::CreateAsUnion(), CreateSplitPartitions(), CConcordance::DumpFileIndexTabs(), CQueryTokenNode::GetIndex(), CConcordance::GetIndicesToShowStr(), GetName(), CConcSession::GetTokensFromStorageByBreak(), CDDCLeafServer::handle__info(), CQToken::IndexName(), InitIndexSet(), CStringIndexator::RegisterIndexAlias(), CConcIndexator::SplitProject(), and UnionIndexSets().
string CStringIndexSet::m_ShortName |
a short name of the index set, for example "m", "w", "t", "c"
Referenced by CConcSession::BuildJsonContextString(), CConcIndexator::CreateAsUnion(), CConcordance::DumpFileIndexTabs(), CDDCLeafServer::handle__info(), InitIndexSet(), CConcSession::ShowBibliographyForTable(), CConcSession::ShowBibliographyForTextOrHtml(), CConcIndexator::SplitProject(), and UnionIndexSets().