24 #include "../CommonLib/ddcMMap.h" 54 : m_StartTokenNo(StartTokenNo), m_PageNumber(PageNumber) {};
87 string GetBreakFileName(
string Path)
const;
89 void ReadFromDisk(
string Path,
bool useMMap =
false);
91 bool ClearAll(
string Path);
93 void CloseFileForIndexing();
99 void SaveToFile(
const string &FileName);
122 string GetPageBreaksFileName(
string Path)
const;
125 string GetShortNameByName(
const string &BreakName)
const;
132 bool StartIndexing(
string Path);
135 bool RemoveHitBordersFileAndClear(
string Path);
141 void SavePageBreaks(
const string &ProjectPath);
143 int RegisterBreak(
string ShortName,
string LongName);
145 int EnsureRegisteredBreak(
string ShortName,
string LongName);
147 int GetBreakCollectionIndexByName(
string ShortName)
const;
149 void AddBreakByIndex(
DWORD BreakCollectionNo,
const CTokenNo &B);
157 const vector<CBreakCollection> &
GetBreaks(
void)
const {
return m_Breaks; };
164 string GetBorderIndicesString()
const;
169 string WithinBreakName(
const vector<string> &Within)
const;
172 const ddcBreakVector *GetBreaksByName(
const string &ShortName)
const;
175 CTokenNo GetCorpusEndTokenNo()
const;
181 CTokenNo GetFileStartTokenNo(
size_t FileNo)
const;
184 DWORD GetPageNumber(
size_t No)
const;
187 bool IsRegisteredBreak(
const string &ShortName)
const;
190 void RegisterBorderIndices(
const char *IndicesStr);
193 bool LoadHitBorders(
string Path,
bool useMMap =
false);
196 void ConvertHitsToPageBreaks(vector<CHit>::const_iterator hits_begin, vector<CHit>::const_iterator hits_end,
203 void AddBreakByName(
const string &ShortName,
const CTokenNo &B);
206 void BordersEndIndexing(
string Path);
209 void StartTextAreaBorders();
212 void EndTextAreaBorders(
DWORD TextAreaEndTokenNo);
vector< DWORD > DwordVector
Definition: utilit.h:148
Definition: HitBorder.h:47
Definition: HitBorder.h:67
ddcBreakVector m_BreakOffsets
the breaks themselves
Definition: HitBorder.h:82
Definition: HitBorder.h:72
int m_FileBreakCollectionNo
a quick reference to file breaks (which are also stored in m_Breaks)
Definition: HitBorder.h:113
bool operator<(const CPageNumber &X) const
Definition: HitBorder.h:56
DWORD m_PageNumber
the page number itself (as it was mentioned in the source text)
Definition: HitBorder.h:51
map< string, int > m_ShortName2BreakCollection
the map from CBreakCollection.m_ShortName to the index in m_Breaks
Definition: HitBorder.h:107
string m_DefaultBreakName
The name of the default break collection (written in the options file)
Definition: HitBorder.h:115
map< string, int > m_LongName2BreakCollection
the map from CBreakCollection.m_LongName to the index in m_Breaks
Definition: HitBorder.h:110
Definition: morph_const.h:107
CTokenNo m_StartTokenNo
the starting position (in tokens) of the beginning of the page
Definition: HitBorder.h:49
ddcVecFile< CTokenNo > ddcBreakVector
Definition: HitBorder.h:43
vector< CBreakCollection > m_Breaks
all breaks
Definition: HitBorder.h:104
string m_ShortName
short name of this break collection
Definition: HitBorder.h:74
const vector< CBreakCollection > & GetBreaks(void) const
moo: get break collection map (dangerous)
Definition: HitBorder.h:157
DWORD CTokenNo
integer type CTokenNo is used to refer an index of a token in the corpus
Definition: ConcCommon.h:63
FILE * m_FileForIndexing
a file for temporally storing breaks during indexing
Definition: HitBorder.h:79
string m_LongName
long name of this break collection
Definition: HitBorder.h:76
uint32_t DWORD
Definition: utilit.h:105
ddcVecFile< CPageNumber > m_PageBreaks
page number collection
Definition: HitBorder.h:118
CPageNumber(CTokenNo StartTokenNo=0, DWORD PageNumber=0)
Definition: HitBorder.h:53
vector< DWORD > m_LastTextAreaBreaks
Definition: HitBorder.h:128