CIndexSetForLoadingStage is a part of DDC which is used only on the loading stage. More...
#include <IndexSetForLoadingStage.h>
CIndexSetForLoadingStage is a part of DDC which is used only on the loading stage.
CIndexSetForLoadingStage contains temporary file names and all load indices for one index set. While indexing three indices are used:
CIndexSetForLoadingStage::CIndexSetForLoadingStage | ( | ) |
References m_BigramBorder, m_bUseItemStorage, m_MaxBigramWindowSize, m_TempBigramsFile, and m_TempStorageFile.
CIndexSetForLoadingStage::~CIndexSetForLoadingStage | ( | ) |
virtual string CIndexSetForLoadingStage::GetName | ( | ) | const [private, pure virtual] |
return the name of the index (CStringIndexSet::m_Name)
Referenced by AddInputLoadIndexToMemoryLoadIndex(), AddItemStrToBuffer(), AddMemoryLoadIndexToMainLoadIndex(), AddToMemoryLoadIndexAndClear(), CreateTempFiles(), and SaveMemoryLoadIndex().
bool CIndexSetForLoadingStage::FindIndexItemInVector | ( | const char * | Item, | |
vector< CItemIndexForLoading >::iterator & | it, | |||
vector< CItemIndexForLoading > & | V | |||
) | [private] |
find a string in vector "V", returning iterator "it", using m_LoadLess1
References LessIndexString1< IndexType >::are_equal(), and m_LoadLess1.
Referenced by FindIndexItem().
bool CIndexSetForLoadingStage::FindIndexItem | ( | const char * | Item, | |
vector< CItemIndexForLoading >::iterator & | it | |||
) | [private] |
finds an item in the swap index set, if it is not found, finds the item in the file index set
References FindIndexItemInVector(), GetHashNo(), m_InputLoadIndexHash, and m_MemoryLoadIndexHash.
Referenced by IndexOneBigram(), and InsertToInputLoadIndex().
bool CIndexSetForLoadingStage::AddToMemoryLoadIndexAndClear | ( | vector< CItemIndexForLoading > & | Body, | |
vector< CItemIndexForLoading > & | FileIndexSet | |||
) | [private] |
References GetName(), and m_LoadLess2.
Referenced by AddInputLoadIndexToMemoryLoadIndex().
int CIndexSetForLoadingStage::GetHashNo | ( | const char * | Str | ) | const [private] |
Referenced by FindIndexItem(), and InsertToInputLoadIndex().
size_t CIndexSetForLoadingStage::AddItemStrToBuffer | ( | const char * | Str, | |
size_t | StrLen | |||
) | [protected] |
add a string to m_StringBuffer
References GetName(), and m_StringBuffer.
Referenced by InsertToInputLoadIndex(), and CStringIndexSet::UnionIndexSet().
bool CIndexSetForLoadingStage::IndexOneBigram | ( | const char * | Word1, | |
BYTE | Word1Len, | |||
const char * | Word2, | |||
BYTE | Word2Len, | |||
const CTokenNo & | TokenOffset, | |||
char | SecondWordAddress | |||
) | [protected] |
index one left bigrams
References BinaryWriteItem(), CriticalTokenLength, FindIndexItem(), CTempBigram::m_Distance, CTempBigram::m_LeftTokenId, CTempBigram::m_RightTokenCorpusAddress, CTempBigram::m_RightTokenId, and m_TempBigramsFile.
Referenced by InsertToInputLoadIndex(), and ProcessBigramBorder().
bool CIndexSetForLoadingStage::CreateTempFiles | ( | string | Path | ) |
creates temporary files for indexing
References GetName(), m_bUseItemStorage, m_CurrOccurTempFileName, m_LeftContext, m_MainOccurTempFileName, m_TempBigramsFile, m_TempBigramsFileName, m_TempStorageFile, m_TempStorageFileName, MakeFName(), and UseBigrams().
Referenced by CConcIndexator::CreateMorphIndex().
bool CIndexSetForLoadingStage::DeleteTempFiles | ( | ) |
deletes temporary files after indexing
References CloseTempBigramsFile(), m_bUseItemStorage, m_CurrOccurTempFileName, m_MainOccurTempFileName, m_MemoryLoadIndexHash, m_TempBigramsFileName, m_TempStorageFile, m_TempStorageFileName, and RemoveWithPrint().
Referenced by CStringIndexSet::WriteToFile(), and ~CIndexSetForLoadingStage().
size_t CIndexSetForLoadingStage::GetMemoryLoadIndexItemsCount | ( | ) | const |
gets the number of items in memory load index
References m_MemoryLoadIndexHash.
bool CIndexSetForLoadingStage::SaveMemoryLoadIndex | ( | ) |
saves memory index
References GetName(), m_CurrOccurTempFileName, m_MemoryLoadIndexHash, CExpc::m_strCause, and WriteLoadIndexToTempFileAndClear().
Referenced by CConcIndexator::CreateMorphIndex().
bool CIndexSetForLoadingStage::AddInputLoadIndexToMemoryLoadIndex | ( | ) |
add the input load index to the memory load index and clear the input load index
References AddToMemoryLoadIndexAndClear(), GetName(), m_InputLoadIndexHash, and m_MemoryLoadIndexHash.
Referenced by CConcIndexator::CreateMorphIndex().
void CIndexSetForLoadingStage::SortInputAndMemoryIndices | ( | ) |
sort the input and the memory load indices
References CItemIndexForLoading::GetOccurs(), m_InputLoadIndexHash, and m_MemoryLoadIndexHash.
Referenced by CConcIndexator::CreateMorphIndex().
bool CIndexSetForLoadingStage::AddMemoryLoadIndexToMainLoadIndex | ( | ) |
add the memory load index to the main load index and clear the memory load index
References CItemIndexForLoading::FreeOccurs(), CItemIndexForLoading::GetIndexItemOffset(), GetName(), CItemIndexForLoading::GetOccurs(), CItemIndexForLoading::InitOccurs(), m_CurrOccurTempFileName, m_LoadLess2, m_MainOccurTempFileName, CExpc::m_strCause, MakeFName(), CItemIndexForLoading::ReadFromTemporalFile(), RmlMoveFile(), and CItemIndexForLoading::WriteToTemporalFile().
Referenced by CConcIndexator::CreateMorphIndex().
void CIndexSetForLoadingStage::InsertToInputLoadIndex | ( | const char * | Str, | |
size_t | StrLen, | |||
const vector< CTokenNo > & | occurrences | |||
) |
updates input or memory load index with one string
References AddItemStrToBuffer(), FindIndexItem(), GetHashNo(), CItemIndexForLoading::GetOccurs(), IndexOneBigram(), CItemIndexForLoading::InitOccurs(), m_bUseItemStorage, m_InputLoadIndexHash, m_LeftContext, m_MaxBigramWindowSize, m_TempStorageFile, and CItemIndexForLoading::SetIndexItemOffset().
Referenced by CConcIndexator::CreateMorphIndex(), and CConcIndexator::IndexOneTableTextArea().
void CIndexSetForLoadingStage::CloseTempBigramsFile | ( | ) |
closes all temporary bigrams file
References m_TempBigramsFile.
Referenced by DeleteTempFiles(), and CStringIndexSet::WriteToFile().
bool CIndexSetForLoadingStage::UseBigrams | ( | ) | const |
true, if DDC should create bigrams for this index
References m_MaxBigramWindowSize.
Referenced by CreateTempFiles(), CStringIndexSet::DestroyIndexSet(), CQueryTokenNode::EvaluateWithoutHits(), CStringIndexSet::ReadFromTheDisk(), and CStringIndexSet::WriteToFile().
void CIndexSetForLoadingStage::ProcessBigramBorder | ( | CTokenNo | occurrence | ) |
add "Wi <eos>" bigrams for end of sentence
References IndexOneBigram(), and m_LeftContext.
a less operator for two buffer pointers
Referenced by AddMemoryLoadIndexToMainLoadIndex(), and AddToMemoryLoadIndexAndClear().
a less operator for a buffer pointer and a const char*
Referenced by FindIndexItemInVector().
string CIndexSetForLoadingStage::m_CurrOccurTempFileName [private] |
a temporary file, where the memory index set is stored
Referenced by AddMemoryLoadIndexToMainLoadIndex(), CreateTempFiles(), DeleteTempFiles(), and SaveMemoryLoadIndex().
vector<CItemIndexForLoading > CIndexSetForLoadingStage::m_MemoryLoadIndexHash[256] [private] |
memory index set (hashed by ASCII)
Referenced by AddInputLoadIndexToMemoryLoadIndex(), DeleteTempFiles(), FindIndexItem(), GetMemoryLoadIndexItemsCount(), SaveMemoryLoadIndex(), and SortInputAndMemoryIndices().
vector<CItemIndexForLoading > CIndexSetForLoadingStage::m_InputLoadIndexHash[256] [private] |
input memory index set (hashed by ASCII)
Referenced by AddInputLoadIndexToMemoryLoadIndex(), FindIndexItem(), InsertToInputLoadIndex(), and SortInputAndMemoryIndices().
FILE* CIndexSetForLoadingStage::m_TempBigramsFile [private] |
a temporal file for left bigrams
Referenced by CIndexSetForLoadingStage(), CloseTempBigramsFile(), CreateTempFiles(), and IndexOneBigram().
deque<string> CIndexSetForLoadingStage::m_LeftContext [private] |
Referenced by CreateTempFiles(), InsertToInputLoadIndex(), and ProcessBigramBorder().
string CIndexSetForLoadingStage::m_TempBigramsFileName [protected] |
a temporary file to write left bigrams
Referenced by CreateTempFiles(), DeleteTempFiles(), and CStringIndexSet::WriteToFile().
FILE* CIndexSetForLoadingStage::m_TempStorageFile [protected] |
a temporal file for index storage
Referenced by CIndexSetForLoadingStage(), CStringIndexSet::ConvertTempStorageToPersistent(), CreateTempFiles(), DeleteTempFiles(), and InsertToInputLoadIndex().
string CIndexSetForLoadingStage::m_TempStorageFileName [protected] |
a temporary file, where the index storage is stored
Referenced by CStringIndexSet::ConvertTempStorageToPersistent(), CreateTempFiles(), and DeleteTempFiles().
string CIndexSetForLoadingStage::m_MainOccurTempFileName [protected] |
a temporary file, where the main index is stored
Referenced by AddMemoryLoadIndexToMainLoadIndex(), CStringIndexSet::ConvertLoadIndexToWorkingIndex(), CreateTempFiles(), and DeleteTempFiles().
if true, then the program creates and uses a storage for this index
Referenced by CIndexSetForLoadingStage(), CStringIndexSet::ConvertTempStorageToPersistent(), CreateTempFiles(), CStringIndexSet::CreateUnionTokenStorage(), DeleteTempFiles(), CStringIndexSet::DumpBigramsOfOneDirection(), CStringIndexSet::DumpStorage(), CStringIndexSet::GetTokensFromStorage(), CStringIndexSet::InitIndexSet(), InsertToInputLoadIndex(), CStringIndexSet::ReadFromTheDisk(), CStringIndexSet::UnionIndexSet(), and CStringIndexSet::WriteToFile().
m_MaxBigramWindowSize>0, then DDC creates bigrams for the index
Referenced by CIndexSetForLoadingStage(), InsertToInputLoadIndex(), CConcIndexator::LoadOptionsFromString(), and UseBigrams().
the break collection that bigrams could not trespass
Referenced by CIndexSetForLoadingStage(), and CConcIndexator::LoadOptionsFromString().
vector<char> CIndexSetForLoadingStage::m_StringBuffer |
a buffer for storing index strings
Referenced by AddItemStrToBuffer(), CStringIndexSet::DestroyIndexSet(), CStringIndexSet::GetIndexItemStr(), CStringIndexSet::QueryTokenList(), CStringIndexSet::QueryTokenListWithRightTruncation(), CStringIndexSet::ReadFromTheDisk(), CStringIndexSet::UnionIndexSet(), and CStringIndexSet::WriteToFile().