19 #ifndef IndexSetForIndexingStage_h 20 #define IndexSetForIndexingStage_h 24 #include "../CommonLib/ddcMMap.h" 64 m_IndexItemOffset = Value;
70 assert (m_pCurrOccurs);
77 assert (m_pCurrOccurs);
83 return m_pCurrOccurs->size();
139 virtual string GetName()
const = 0;
142 vector<CItemIndexForLoading > m_MemoryLoadIndexHash[256] ;
145 vector<CItemIndexForLoading > m_InputLoadIndexHash[256] ;
148 bool FindIndexItemInVector (
const char* Item, vector<CItemIndexForLoading>::iterator& it, vector<CItemIndexForLoading>& V);
151 bool FindIndexItem (
const char* Item, vector<CItemIndexForLoading>::iterator& it);
153 bool AddToMemoryLoadIndexAndClear(vector<CItemIndexForLoading>& Body, vector<CItemIndexForLoading>& FileIndexSet);
154 int GetHashNo(
const char* Str)
const;
165 size_t AddItemStrToBuffer(
const char* Str,
size_t StrLen);
180 bool CreateTempFiles (
string Path);
182 bool DeleteTempFiles();
184 size_t GetMemoryLoadIndexItemsCount()
const;
186 bool SaveMemoryLoadIndex();
188 bool AddInputLoadIndexToMemoryLoadIndex();
190 void SortInputAndMemoryIndices();
192 bool AddMemoryLoadIndexToMainLoadIndex();
194 void InsertToInputLoadIndex(
const char* Str,
size_t StrLen,
const vector<CTokenNo>& occurrences);
197 void RollbackLoadIndex(
CTokenNo startTrimTokenNo);
200 void PrintLoadIndexStats(FILE *f=stderr)
const;
void WriteToTemporalFile(FILE *fp) const
write vector of occurrences to a temporal file
Definition: IndexSetForLoadingStage.cpp:136
string m_CurrOccurTempFileName
a temporary file, where the memory index set is stored
Definition: IndexSetForLoadingStage.h:136
bool ddcEnableAnonymousTokens
Definition: IndexSetForLoadingStage.cpp:36
size_t GetIndexItemOffset() const
gets the reference to the index item
Definition: IndexSetForLoadingStage.h:57
FILE * m_TempStorageFile
a temporal file for index storage
Definition: IndexSetForLoadingStage.h:158
bool CheckOccurrences(CTokenNo EndTokenNo) const
checks the order of occurrences
Definition: IndexSetForLoadingStage.cpp:78
bool ReadFromTemporalFile(FILE *fp)
read vector of occurrences from a temporal file
Definition: IndexSetForLoadingStage.cpp:115
vector< CTokenNo > * m_pCurrOccurs
current vector of occurrences
Definition: IndexSetForLoadingStage.h:49
vector< CTokenNo > * GetOccurs()
gets vector of occurrences
Definition: IndexSetForLoadingStage.h:68
bool InitOccurs()
initializes vector of occurrences
Definition: IndexSetForLoadingStage.cpp:44
string m_MainOccurTempFileName
a temporary file, where the main index is stored
Definition: IndexSetForLoadingStage.h:162
bool WriteOccurrences(FILE *fp) const
writes vector of occurrences to a file
Definition: IndexSetForLoadingStage.cpp:72
const vector< CTokenNo > * GetOccurs() const
gets vector of occurrences (const)
Definition: IndexSetForLoadingStage.h:75
bool m_bUseItemStorage
if true, then the program creates and uses a storage for this index
Definition: IndexSetForLoadingStage.h:170
size_t m_IndexItemOffset
a reference to CStringIndexSet::m_StringBuffer
Definition: IndexSetForLoadingStage.h:52
size_t GetOccursSize() const
Definition: IndexSetForLoadingStage.h:81
void FreeOccurs()
deletes vector of occurrences
Definition: IndexSetForLoadingStage.cpp:56
CIndexSetForLoadingStage is a part of DDC which is used only on the loading stage.
Definition: IndexSetForLoadingStage.h:126
CItemIndexForLoading holds all occurrences of one index item (token, morph. pattern...) on the index stage.
Definition: IndexSetForLoadingStage.h:46
void ClearOccurrences()
clears vector of occurrences to a file
Definition: IndexSetForLoadingStage.cpp:107
DWORD CTokenNo
integer type CTokenNo is used to refer an index of a token in the corpus
Definition: ConcCommon.h:63
void SetIndexItemOffset(size_t Value)
sets the reference to the index item
Definition: IndexSetForLoadingStage.h:62
string m_TempStorageFileName
a temporary file, where the index storage is stored
Definition: IndexSetForLoadingStage.h:160
LessIndexString2< CItemIndexForLoading > m_LoadLess2
a less operator for two buffer pointers
Definition: IndexSetForLoadingStage.h:129
ddcVecFile< char > m_StringBuffer
a buffer for storing index strings (compile-time)
Definition: IndexSetForLoadingStage.h:173
LessIndexString1< CItemIndexForLoading > m_LoadLess1
a less operator for a buffer pointer and a const char*
Definition: IndexSetForLoadingStage.h:132