#include <HitBorder.h>
Class CHitBorders contains all break collections and all page breaks.
CHitBorders::CHitBorders | ( | ) |
References m_FileBreakCollectionNo.
string CHitBorders::GetPageBreaksFileName | ( | string | Path | ) | const [private] |
returns the file name for page breaks
References MakeFName().
Referenced by BordersEndIndexing(), LoadHitBorders(), RemoveHitBordersFileAndClear(), and UniteBorders().
string CHitBorders::GetShortNameByName | ( | const string & | BreakName | ) | const [private] |
returns the short name of a break collection by the long or the short name
References m_Breaks.
Referenced by ProcessHitTypeStrInQueryStr().
bool CHitBorders::StartIndexing | ( | string | Path | ) | [protected] |
opens for writing all CBreakCollectionDescr::m_FileForIndexing from m_Breaks
References ErrorMessage(), Format(), CHitBorders::CBreakCollection::GetBreakFileName(), m_Breaks, CHitBorders::CBreakCollection::m_FileForIndexing, and m_PageBreaks.
Referenced by CConcIndexator::StartIndexing().
bool CHitBorders::RemoveHitBordersFileAndClear | ( | string | Path | ) | [protected] |
deletes all break files
References FileExists(), GetPageBreaksFileName(), m_Breaks, and m_PageBreaks.
Referenced by CConcIndexator::DestroyIndex().
void CHitBorders::AddPageBreak | ( | const CPageNumber & | P | ) | [protected] |
adds one page break
References m_PageBreaks, CPageNumber::m_PageNumber, CPageNumber::m_StartTokenNo, and UnknownPageNumber.
Referenced by CConcIndexator::IndexMorphXml(), CConcIndexator::IndexOneTableTextArea(), and CConcIndexator::IndexTextOrHtmlFile().
bool CHitBorders::UniteBorders | ( | const CHitBorders & | H1, | |
const CHitBorders & | H2, | |||
const DWORD | EndTokenNo1, | |||
const string & | Path | |||
) | [protected] |
creates union of H1 and H2 for all m_Breaks and for m_PageBreaks
References GetBorderIndicesString(), CHitBorders::CBreakCollection::GetBreakFileName(), GetBreaksByName(), GetFileBreaks(), GetPageBreaksFileName(), CHitBorders::CBreakCollection::m_BreakOffsets, m_Breaks, m_DefaultBreakName, m_FileBreakCollectionNo, m_LastTextAreaBreaks, m_PageBreaks, CHitBorders::CBreakCollection::m_ShortName, m_ShortName2BreakCollection, PredefinedFileBreakName, RegisterBreak(), UnionBreaks(), and WriteVector().
Referenced by CConcIndexator::CreateAsUnion().
bool CHitBorders::RegisterBreak | ( | string | ShortName, | |
string | LongName | |||
) | [protected] |
References ErrorMessage(), m_Breaks, m_LastTextAreaBreaks, CHitBorders::CBreakCollection::m_LongName, CHitBorders::CBreakCollection::m_ShortName, and m_ShortName2BreakCollection.
Referenced by RegisterBorderIndices(), and UniteBorders().
string CHitBorders::GetBreakCollectionShortName | ( | size_t | i | ) | const [protected] |
References m_Breaks.
Referenced by CConcIndexator::SaveOptionsToString().
int CHitBorders::GetBreakCollectionIndexByName | ( | string | ShortName | ) | const [protected] |
References m_ShortName2BreakCollection.
Referenced by CConcIndexator::IndexOneTableTextArea(), CConcIndexator::IndexTextOrHtmlFile(), and CConcIndexator::LoadOptionsFromString().
References m_Breaks, CHitBorders::CBreakCollection::m_FileForIndexing, m_LastTextAreaBreaks, and save_to_bytes().
Referenced by AddBreakByName(), EndTextAreaBorders(), CConcIndexator::IndexOneTableTextArea(), and CConcIndexator::IndexTextOrHtmlFile().
string CHitBorders::GetBorderIndicesString | ( | ) | const |
return the string representation of break collection descriptions
References Format(), m_Breaks, m_DefaultBreakName, CHitBorders::CBreakCollection::m_LongName, CHitBorders::CBreakCollection::m_ShortName, m_ShortName2BreakCollection, PredefinedFileBreakName, and Trim().
Referenced by CConcIndexator::LoadOptionsFromString(), CConcIndexator::SaveOptionsToString(), and UniteBorders().
vector< string > CHitBorders::GetBorderIndicesStringVector | ( | ) | const |
the same as GetBorderIndicesString, but each break collection description is written into a separate vector element
References m_Breaks, CHitBorders::CBreakCollection::m_LongName, and m_ShortName2BreakCollection.
string CHitBorders::ProcessHitTypeStrInQueryStr | ( | string & | Query | ) | const |
returns the short name of the break collection, which is specified in the input query(if nothing is specified, it returns m_DefaultBreakName)
References GetShortNameByName(), and m_DefaultBreakName.
Referenced by CQueryParser::ParseQuery().
const vector< CTokenNo > * CHitBorders::GetBreaksByName | ( | const string & | ShortName | ) | const |
returns a break collection by a short name
References m_Breaks, and m_ShortName2BreakCollection.
Referenced by CConcHolder::SetHitType(), and UniteBorders().
CTokenNo CHitBorders::GetCorpusEndTokenNo | ( | ) | const |
returns the value of the last file break (which should be equal to the last value of any break collection)
References GetFileBreaks(), and m_FileBreakCollectionNo.
Referenced by CConcIndexator::CalculateSearchPeriods(), CConcIndexator::CreateAsUnion(), CConcIndexatorInvoker::FinalizeIndex(), and CConcHolder::InitLessByRank().
const vector< CTokenNo > & CHitBorders::GetFileBreaks | ( | ) | const |
quick reference to file breaks
References m_Breaks, and m_FileBreakCollectionNo.
Referenced by CConcIndexator::CalculateSearchPeriods(), CConcHolder::GetContext(), CConcHolder::GetContextJson(), GetCorpusEndTokenNo(), GetFileStartTokenNo(), CConcHolder::InitFileReferences(), CConcIndexator::LoadProject(), and UniteBorders().
CTokenNo CHitBorders::GetFileStartTokenNo | ( | size_t | FileNo | ) | const |
returns the start position of corpus file FileNo
References GetFileBreaks(), and m_FileBreakCollectionNo.
Referenced by CConcIndexator::CalculateSearchPeriods(), CConcHolder::GetContext(), and CConcHolder::GetContextJson().
DWORD CHitBorders::GetPageNumber | ( | size_t | No | ) | const |
returns m_PageBreaks[No].m_PageNumber (see CPageNumber)
References m_PageBreaks, and UnknownPageNumber.
Referenced by CConcHolder::GenerateHitStrings().
bool CHitBorders::IsRegisteredBreak | ( | const string & | ShortName | ) | const |
returns true if a short name is found in m_Breaks
References m_ShortName2BreakCollection.
Referenced by CConcIndexator::IndexOneTableTextArea().
bool CHitBorders::RegisterBorderIndices | ( | const char * | IndicesStr | ) |
creates empty elements of m_Breaks by its string descriptions
References ErrorMessage(), Format(), m_Breaks, m_DefaultBreakName, m_FileBreakCollectionNo, m_LastTextAreaBreaks, m_ShortName2BreakCollection, PredefinedFileBreakName, PredefinedTableLineTag, PredefinedTextAreaBreakName, RegisterBreak(), Trim(), and StringTokenizer::val().
Referenced by CConcIndexator::LoadOptionsFromString().
bool CHitBorders::LoadHitBorders | ( | string | Path | ) |
load break collections from the disk
References GetPageBreaksFileName(), m_Breaks, m_PageBreaks, and ReadVector().
Referenced by CConcIndexatorInvoker::FinalizeIndex(), and CConcIndexator::LoadProject().
void CHitBorders::ConvertHitsToPageBreaks | ( | vector< CHit >::const_iterator | hits_begin, | |
vector< CHit >::const_iterator | hits_end, | |||
const vector< CTokenNo > & | Breaks, | |||
DwordVector & | PageBreaks | |||
) | const |
converts hits to page breaks, which contains this breaks
References m_PageBreaks.
Referenced by CConcHolder::GenerateHitStrings().
bool CHitBorders::AddBreakByName | ( | const string & | ShortName, | |
const CTokenNo & | B | |||
) |
adds one break to a collection identified by a short name (during indexing)
References AddBreakByIndex(), and m_ShortName2BreakCollection.
Referenced by CConcIndexatorInvoker::BuildIndex(), CConcIndexator::IndexMorphXml(), and CConcIndexator::IndexTextOrHtmlFile().
bool CHitBorders::BordersEndIndexing | ( | string | Path | ) |
closes all CBreakCollectionDescr::m_FileForIndexing from m_Breaks (during indexing)
References GetPageBreaksFileName(), m_Breaks, m_PageBreaks, and WriteVector().
Referenced by CConcIndexatorInvoker::FinalizeIndex(), and CConcIndexator::TerminateIndexing().
void CHitBorders::StartTextAreaBorders | ( | ) |
must be called before indexing each text area in order to create at least on break in each text area
References m_Breaks, and m_LastTextAreaBreaks.
Referenced by CConcIndexator::IndexOneTableTextArea().
bool CHitBorders::EndTextAreaBorders | ( | DWORD | TextAreaEndTokenNo | ) |
must be called after indexing each text area in order to create at least on break in each text area
References AddBreakByIndex(), m_FileBreakCollectionNo, and m_LastTextAreaBreaks.
Referenced by CConcIndexator::IndexOneTableTextArea().
vector<CBreakCollection> CHitBorders::m_Breaks [private] |
all breaks
Referenced by AddBreakByIndex(), BordersEndIndexing(), GetBorderIndicesString(), GetBorderIndicesStringVector(), GetBreakCollectionShortName(), GetBreaksByName(), GetFileBreaks(), GetShortNameByName(), LoadHitBorders(), RegisterBorderIndices(), RegisterBreak(), RemoveHitBordersFileAndClear(), StartIndexing(), StartTextAreaBorders(), and UniteBorders().
map<string,int> CHitBorders::m_ShortName2BreakCollection [private] |
the map from CBreakCollection.m_ShortName to the index in m_Breaks
Referenced by AddBreakByName(), GetBorderIndicesString(), GetBorderIndicesStringVector(), GetBreakCollectionIndexByName(), GetBreaksByName(), IsRegisteredBreak(), RegisterBorderIndices(), RegisterBreak(), and UniteBorders().
int CHitBorders::m_FileBreakCollectionNo [private] |
a quick reference to file breaks (which are also stored in m_Breaks)
Referenced by CHitBorders(), EndTextAreaBorders(), GetCorpusEndTokenNo(), GetFileBreaks(), GetFileStartTokenNo(), RegisterBorderIndices(), and UniteBorders().
string CHitBorders::m_DefaultBreakName [private] |
The name of the default break collection (written in the options file).
Referenced by GetBorderIndicesString(), ProcessHitTypeStrInQueryStr(), RegisterBorderIndices(), and UniteBorders().
vector<CPageNumber> CHitBorders::m_PageBreaks [private] |
page number collection
Referenced by AddPageBreak(), BordersEndIndexing(), ConvertHitsToPageBreaks(), GetPageNumber(), LoadHitBorders(), RemoveHitBordersFileAndClear(), StartIndexing(), and UniteBorders().
vector<DWORD> CHitBorders::m_LastTextAreaBreaks [private] |
Referenced by AddBreakByIndex(), EndTextAreaBorders(), RegisterBorderIndices(), RegisterBreak(), StartTextAreaBorders(), and UniteBorders().