ddc
|
#include <IndexSetForQueryingStage.h>
Public Member Functions | |
CIndexSetForQueryingStage (const CStringIndexator *pParent) | |
virtual | ~CIndexSetForQueryingStage () |
virtual string | GetName () const =0 |
return the name of the index (CStringIndexSet::m_Name) More... | |
bool | DestroyIndexSet () |
destroy index set and remove index files More... | |
void | ReadAllOccurrences (size_t IndexItemNo, vector< CTokenNo > &Occurs) const |
reads all occurrences of IndexItemNo (this function can allocate much memory; it should be used carefully) More... | |
Public Attributes | |
ddcVecFile< CIndexItem > | m_Index |
the main index(from strings to the ordered list of their occurrences) More... | |
CSuffixIndex | m_rIndex |
optional auxiliary index for suffix-queries; ItemIds lexicographically sorted by reverse string-value More... | |
PeriodsDivisionMapT | m_EndPeriodOffsets |
all corpus period divisions for the long occurrence lists More... | |
const CStringIndexator * | m_pParent |
a pointer to the collection of indices, which contains a reference to this index More... | |
bool | m_bCompressOccurrences |
if true, then the occurrences should be compresses (up to 30% for huge corpora) More... | |
Protected Member Functions | |
void | AssertHasPath () const |
return true, if the project path is initialized More... | |
void | AddOccurs (size_t IndexItemNo, const bool bOneOccurrence, const size_t StartOccurNo, const size_t EndOccurNo, vector< CTokenNo > &Occurs, size_t PeriodNo, COccurrBuffer &OccursBuffer, CShortOccurCache *pCacheByIndexSet, int &CacheId) const |
a function for reading occurrences for one index item More... | |
string | GetOccursFileName () const |
return the file name for the file occurrences More... | |
string | GetOccHdrFileName () const |
return the name of file for m_Index More... | |
string | GetSuffixFileName () const |
return the name of file for m_rIndex (for suffix-queries) More... | |
string | GetPeriodsDivisionFileName () const |
return the name of file for occurrences period division More... | |
string | GetFileNameForInfos () const |
return the name of file for CIndexSetForLoadingStage::m_StringBuffer More... | |
file_off_t | GetOccurrsFileSize () const |
return the size of the file for occurrences More... | |
size_t | GetStartOccurNo (size_t IndexNo) const |
get the offset of the first occurrence of index item no IndexNo in the file of occurrences(m_OccursFp) More... | |
bool | BuildPeriodsDivisionAndCompress (const DWORD TokenId, vector< CTokenNo > &InputTokens) |
build a period division for one index item More... | |
bool | AddOneIndexItem (CItemIndexForLoading &M, FILE *res_fp, size_t &CurrPositionInResFile, const CTokenNo EndTokeNo) |
write one index item to result file More... | |
bool | WritePeriodsDivision () |
write index item's period division to disk More... | |
bool | LoadIndexSet (bool bLoadHeaderOfOccurrences=true) |
load index set from binaries More... | |
Private Member Functions | |
bool | LoadPeriodDivision () |
load all period divisions to m_EndPeriodOffsets More... | |
void | ReadOccurrences (CTokenNo *OutBuffer, file_off_t FilePosition, size_t Count) const |
Private Attributes | |
ddcFileOrMMap | m_OccursFp |
the main file of occurrences More... | |
This class is a part of CStringIndexSet class which is used only during querying. It contains the important serialization primitives.
CIndexSetForQueryingStage::CIndexSetForQueryingStage | ( | const CStringIndexator * | pParent | ) |
References m_pParent.
|
virtual |
|
private |
load all period divisions to m_EndPeriodOffsets
References CExpc::code(), GetPeriodsDivisionFileName(), CStringIndexator::GetSearchPeriodsCount(), CStringIndexator::m_bMemoryMap, m_EndPeriodOffsets, m_pParent, ddcMapFile< KeyT, ValT >::open(), and CExpc::what().
Referenced by LoadIndexSet().
|
private |
reads Count occurences starting at FilePosition from m_OccursFp into OutBuffer
References m_OccursFp, and ddcFileOrMMap::ReadBuffer().
Referenced by AddOccurs().
|
protected |
return true, if the project path is initialized
References errNonePath, ErrorMessage(), CStringIndexator::m_Path, and m_pParent.
Referenced by DestroyIndexSet(), GetFileNameForInfos(), GetOccHdrFileName(), GetOccurrsFileSize(), GetOccursFileName(), GetPeriodsDivisionFileName(), and GetSuffixFileName().
|
protected |
a function for reading occurrences for one index item
References CShortOccurCache::AddNewIndexItemNoToCache(), ddcMapFile< KeyT, ValT >::begin(), CShortOccurCache::CouldContainMore(), DearchiveOccurrences(), ddcMapFile< KeyT, ValT >::end(), ddcMapFile< KeyT, ValT >::find(), CShortOccurCache::GetOccurrencesFromCache(), CStringIndexator::GetSearchPeriod(), ddcMapFile< KeyT, ValT >::key(), m_bCompressOccurrences, m_EndPeriodOffsets, m_pParent, MaxCompressRatio, OCCDEBUG, OccurBufferSize, and ReadOccurrences().
Referenced by CStringIndexSet::FindChunkOccurrences(), CStringIndexSet::FindOccurrences(), and ReadAllOccurrences().
|
protected |
return the file name for the file occurrences
References AssertHasPath(), GetName(), CStringIndexator::m_Path, m_pParent, and MakeFName().
Referenced by CStringIndexSet::ConvertLoadIndexToWorkingIndex(), CStringIndexSet::CreateSplitPartitions(), DestroyIndexSet(), GetOccurrsFileSize(), LoadIndexSet(), and CStringIndexSet::UnionIndexSets().
|
protected |
return the name of file for m_Index
References AssertHasPath(), GetName(), CStringIndexator::m_Path, m_pParent, and MakeFName().
Referenced by DestroyIndexSet(), LoadIndexSet(), and CStringIndexSet::WriteToFile().
|
protected |
return the name of file for m_rIndex (for suffix-queries)
References AssertHasPath(), GetName(), CStringIndexator::m_Path, m_pParent, and MakeFName().
Referenced by DestroyIndexSet(), LoadIndexSet(), and CStringIndexSet::WriteToFile().
|
protected |
return the name of file for occurrences period division
References AssertHasPath(), GetName(), CStringIndexator::m_Path, m_pParent, and MakeFName().
Referenced by DestroyIndexSet(), LoadPeriodDivision(), and WritePeriodsDivision().
|
protected |
return the name of file for CIndexSetForLoadingStage::m_StringBuffer
References AssertHasPath(), GetName(), CStringIndexator::m_Path, m_pParent, and MakeFName().
Referenced by DestroyIndexSet(), CStringIndexSet::ReadFromTheDisk(), and CStringIndexSet::WriteToFile().
|
protected |
return the size of the file for occurrences
References AssertHasPath(), FileSize(), and GetOccursFileName().
|
protected |
get the offset of the first occurrence of index item no IndexNo in the file of occurrences(m_OccursFp)
References m_Index.
Referenced by CStringIndexSet::FindChunkOccurrences(), CStringIndexSet::FindOccurrences(), and ReadAllOccurrences().
|
protected |
build a period division for one index item
References CompressPortion(), ddcMapFile< KeyT, ValT >::ensureMap(), GetName(), CStringIndexator::GetSearchPeriod(), CStringIndexator::GetSearchPeriodsCount(), m_bCompressOccurrences, m_EndPeriodOffsets, ddcMapFile< KeyT, ValT >::m_map, m_pParent, and OccurBufferSize.
Referenced by AddOneIndexItem().
|
protected |
write one index item to result file
References CIndexItem::AddItemIndexFlags(), BuildPeriodsDivisionAndCompress(), CItemIndexForLoading::CheckOccurrences(), ddcLogDie, Format(), CItemIndexForLoading::GetIndexItemOffset(), GetName(), CItemIndexForLoading::GetOccurs(), CItemIndexForLoading::GetOccursSize(), m_Index, ddcVecFile< T >::push_back(), CIndexItem::SetEndOccurOffset(), CIndexItem::SetIndexItemOffsetAndFlags(), ddcVecFile< T >::size(), TheOnlyOccurIsInEndOccurNo, and CItemIndexForLoading::WriteOccurrences().
Referenced by CStringIndexSet::ConvertLoadIndexToWorkingIndex(), CStringIndexSet::CreateSplitPartitions(), and CStringIndexSet::UnionIndexSets().
|
protected |
write index item's period division to disk
References ddcLogDebug, ddcMapFile< KeyT, ValT >::ensureMap(), Format(), GetPeriodsDivisionFileName(), CStringIndexator::GetSearchPeriodsCount(), m_EndPeriodOffsets, ddcMapFile< KeyT, ValT >::m_map, m_pParent, and WriteVectorInner().
Referenced by CStringIndexSet::ConvertLoadIndexToWorkingIndex(), CStringIndexSet::CreateSplitPartitions(), and CStringIndexSet::UnionIndexSets().
|
protected |
load index set from binaries
References ddcLogWarn, FileExists(), Format(), GetOccHdrFileName(), GetOccursFileName(), GetSuffixFileName(), LoadPeriodDivision(), CStringIndexator::m_bMemoryMap, m_Index, m_OccursFp, m_pParent, m_rIndex, ddcVecFile< T >::open(), and ddcFileOrMMap::Open().
Referenced by CStringIndexSet::ReadFromTheDisk().
|
pure virtual |
return the name of the index (CStringIndexSet::m_Name)
Implemented in CStringIndexSet.
Referenced by AddOneIndexItem(), BuildPeriodsDivisionAndCompress(), GetFileNameForInfos(), GetOccHdrFileName(), GetOccursFileName(), GetPeriodsDivisionFileName(), and GetSuffixFileName().
bool CIndexSetForQueryingStage::DestroyIndexSet | ( | ) |
destroy index set and remove index files
References AssertHasPath(), ddcVecFile< T >::clear(), ddcMapFile< KeyT, ValT >::clear(), FileExists(), GetFileNameForInfos(), GetOccHdrFileName(), GetOccursFileName(), GetPeriodsDivisionFileName(), GetSuffixFileName(), m_EndPeriodOffsets, m_Index, and m_rIndex.
Referenced by CStringIndexSet::DestroyIndexSet().
void CIndexSetForQueryingStage::ReadAllOccurrences | ( | size_t | IndexItemNo, |
vector< CTokenNo > & | Occurs | ||
) | const |
reads all occurrences of IndexItemNo (this function can allocate much memory; it should be used carefully)
References AddOccurs(), CStringIndexator::GetSearchPeriodsCount(), GetStartOccurNo(), m_Index, and m_pParent.
Referenced by CreateMorphIndex(), and CStringIndexSet::CreateSplitPartitions().
|
private |
the main file of occurrences
Referenced by LoadIndexSet(), and ReadOccurrences().
ddcVecFile<CIndexItem> CIndexSetForQueryingStage::m_Index |
the main index(from strings to the ordered list of their occurrences)
Referenced by AddOneIndexItem(), CStringIndexSet::ConvertLoadIndexToWorkingIndex(), CStringIndexSet::ConvertTempStorageToPersistent(), CreateMorphIndex(), CStringIndexSet::CreateSplitPartitions(), DestroyIndexSet(), CStringIndexSet::DumpStorage(), CStringIndexSet::EnsureSuffixIndex(), CStringIndexSet::FindChunkOccurrences(), CStringIndexSet::FindOccurrences(), CStringIndexSet::GetContextBounds(), GetStartOccurNo(), CStringIndexSet::GetTokenIndexId(), CStringIndexSet::GetTokensFromStorage(), CStringIndexSet::GetTypeIndexId(), CStringIndexSet::GetTypeIndexIdLowerBound(), CStringIndexSet::GetTypeIndexIdLowerBoundIter(), CStringIndexSet::GetTypeIndexIdUpperBoundIter(), CDDCLeafServer::handle__info(), CConcSession::InitSortByContext(), LoadIndexSet(), CStringIndexSet::QueryTokenList(), CStringIndexSet::QueryTokenListUniversal(), CStringIndexSet::QueryTokenListUsingRegExp(), CStringIndexSet::QueryTokenListWithLeftTruncation(), CStringIndexSet::QueryTokenListWithRightTruncation(), ReadAllOccurrences(), CStringIndexSet::UnionIndexSets(), and CStringIndexSet::WriteToFile().
CSuffixIndex CIndexSetForQueryingStage::m_rIndex |
optional auxiliary index for suffix-queries; ItemIds lexicographically sorted by reverse string-value
Referenced by CQueryTokenNode::CreateSuffixSetPattern(), DestroyIndexSet(), CStringIndexSet::EnsureSuffixIndex(), LoadIndexSet(), CStringIndexSet::QueryTokenListWithLeftTruncation(), and CStringIndexSet::WriteToFile().
PeriodsDivisionMapT CIndexSetForQueryingStage::m_EndPeriodOffsets |
all corpus period divisions for the long occurrence lists
Referenced by AddOccurs(), BuildPeriodsDivisionAndCompress(), CStringIndexSet::ConvertLoadIndexToWorkingIndex(), DestroyIndexSet(), LoadPeriodDivision(), and WritePeriodsDivision().
const CStringIndexator* CIndexSetForQueryingStage::m_pParent |
a pointer to the collection of indices, which contains a reference to this index
Referenced by AddOccurs(), AssertHasPath(), BuildPeriodsDivisionAndCompress(), CIndexSetForQueryingStage(), CStringIndexSet::ConvertLoadIndexToWorkingIndex(), GetFileNameForInfos(), GetOccHdrFileName(), GetOccursFileName(), GetPeriodsDivisionFileName(), CStringIndexSet::GetStorageFileName(), GetSuffixFileName(), LoadIndexSet(), LoadPeriodDivision(), CStringIndexSet::OpenStorageFile(), CStringIndexSet::QueryTokenList(), CStringIndexSet::QueryTokenListUsingRegExp(), CStringIndexSet::QueryTokenListWithLeftTruncation(), CStringIndexSet::QueryTokenListWithRightTruncation(), ReadAllOccurrences(), CStringIndexSet::ReadFromTheDisk(), and WritePeriodsDivision().
bool CIndexSetForQueryingStage::m_bCompressOccurrences |
if true, then the occurrences should be compresses (up to 30% for huge corpora)
Referenced by AddOccurs(), BuildPeriodsDivisionAndCompress(), and CStringIndexSet::InitIndexSet().