#include <ConcBigrams.h>
CIndexSetForBigrams::CIndexSetForBigrams | ( | const CStringIndexator * | pParent | ) |
the temp index(used only for bigrams)
References CIndexSetForQueryingStage::m_bCompressOccurrences.
CIndexSetForBigrams::~CIndexSetForBigrams | ( | ) | [virtual] |
void CIndexSetForBigrams::WriteBigramAndItsOccurs | ( | CSecondWord & | I, | |
const DWORD | BigramId, | |||
CItemIndexForLoading & | M, | |||
FILE * | res_fp, | |||
size_t & | CurrPositionInResFile, | |||
FILE * | second_word_fp | |||
) | [private] |
References BinaryWriteItem(), CIndexSetForQueryingStage::BuildPeriodsDivisionAndCompress(), CItemIndexForLoading::ClearOccurrences(), CItemIndexForLoading::GetOccurs(), CItemIndexForLoading::GetOccursSize(), CSecondWord::m_EndOccurOffset, CSecondWord::m_StartOccur, CSecondWord::m_TheOnlyOccur, and CItemIndexForLoading::WriteOccurrences().
Referenced by ConvertTempBigramsToPersistent().
bool CIndexSetForBigrams::CreateRightBigrams | ( | string | TempRightBigramsFileName | ) | [private] |
References BinaryReadItem(), BinaryWriteItem(), FileSize(), CIndexBigram::GetFirstWordFileName(), GetRightToLeftPerdiv(), CIndexBigram::GetSecondWordFileName(), CIndexSetForQueryingStage::m_EndPeriodOffsets, CIndexBigram::m_FirstWords, CTempBigram2::m_LeftBigramOffset, CTempBigram2::m_LeftTokenId, m_RightBigrams, CTempBigram2::m_SecondWord, CExpc::m_strCause, RemoveWithPrint(), and WriteVector().
Referenced by ConvertTempBigramsToPersistent().
string CIndexSetForBigrams::GetRightToLeftPerdiv | ( | ) | const [private] |
References CStringIndexator::m_Path, CIndexSetForQueryingStage::m_pParent, and MakeFName().
Referenced by CreateRightBigrams(), DestroyIndexSet(), and LoadIndexSet().
string CIndexSetForBigrams::GetName | ( | ) | const [virtual] |
return the name of the index (CStringIndexSet::m_Name)
Implements CIndexSetForQueryingStage.
bool CIndexSetForBigrams::DestroyIndexSet | ( | ) |
destroy index set and remove index files
Reimplemented from CIndexSetForQueryingStage.
References CIndexSetForQueryingStage::DestroyIndexSet(), GetRightToLeftPerdiv(), m_LeftBigrams, m_RightBigrams, CIndexBigram::RemoveFiles(), and RemoveWithPrint().
Referenced by CStringIndexSet::DestroyIndexSet().
bool CIndexSetForBigrams::LoadIndexSet | ( | ) |
References FileExists(), GetRightToLeftPerdiv(), CIndexBigram::LoadBigrams(), m_LeftBigrams, m_RightBigrams, m_RightToLeftPerdivIds, and ReadVector().
Referenced by CStringIndexSet::ReadFromTheDisk(), and CStringIndexSet::WriteToFile().
bool CIndexSetForBigrams::ConvertTempBigramsToPersistent | ( | string | TempBigramsFileName, | |
const vector< CIndexItem > & | MainTokenIndex | |||
) |
convert temporary left bigrams file to persistent (left and right)
References BinaryReadItem(), BinaryWriteItem(), ConvertBufferOffsetToTokenId(), CreateRightBigrams(), CreateTempFileName(), FileSize(), CIndexBigram::GetFirstWordFileName(), CIndexSetForQueryingStage::GetOccursFileName(), CIndexBigram::GetSecondWordFileName(), CTempBigram::InitUnknown(), CTempBigram::LessWithoutAddress(), CTempBigram::m_Distance, CIndexBigram::m_FirstWords, CTempBigram2::m_LeftBigramOffset, m_LeftBigrams, CTempBigram2::m_LeftTokenId, CTempBigram::m_LeftTokenId, CTempBigram::m_RightTokenCorpusAddress, CTempBigram::m_RightTokenId, CTempBigram2::m_SecondWord, CExpc::m_strCause, WriteBigramAndItsOccurs(), CIndexSetForQueryingStage::WritePeriodsDivision(), and WriteVector().
Referenced by CStringIndexSet::WriteToFile().
void CIndexSetForBigrams::FindOccurrencesForBigrams | ( | const BigramDirectionEnum | bigr_direc, | |
const vector< DWORD > & | IndexItems, | |||
const size_t | PeriodNo, | |||
vector< pair< CTokenNo, DWORD > > & | occurrences, | |||
CShortOccurCacheMap * | pCaches, | |||
vector< int > & | CacheIds | |||
) | const |
the same as FindOccurrences, but save the idx of token to the second part of the occurrences's items
References CIndexSetForQueryingStage::AddOccurs(), bdRightBigram, CSecondWord::m_Distance, CSecondWord::m_EndOccurOffset, CSecondWord::m_EndOfSentenceFlag, m_LeftBigrams, m_RightBigrams, m_RightToLeftPerdivIds, CSecondWord::m_SecondWordId, CSecondWord::m_StartOccur, CSecondWord::m_TheOnlyOccur, and CIndexBigram::ReadSecondWords().
Referenced by CQueryTokenNode::EvaluateWithoutHits().
void CIndexSetForBigrams::ReadAllOccurrences | ( | size_t | IndexItemNo, | |
const CSecondWord & | I, | |||
vector< CTokenNo > & | Occurs | |||
) | const |
read all occurrences of IndexItemN
References CIndexSetForQueryingStage::AddOccurs(), CStringIndexator::GetSearchPeriodsCount(), CSecondWord::m_EndOccurOffset, CIndexSetForQueryingStage::m_pParent, CSecondWord::m_StartOccur, and CSecondWord::m_TheOnlyOccur.
Referenced by CStringIndexSet::DumpBigramsOfOneDirection().
map<DWORD, DWORD> CIndexSetForBigrams::m_RightToLeftPerdivIds [private] |
Referenced by FindOccurrencesForBigrams(), and LoadIndexSet().