Go to the documentation of this file.00001 #ifndef IndexSetForIndexingStage_h
00002 #define IndexSetForIndexingStage_h
00003
00004
00005 #include "DDCLessOperators.h"
00006
00007
00009
00016 class CItemIndexForLoading
00017 {
00019 vector< CTokenNo >* m_pCurrOccurs;
00020
00022 size_t m_IndexItemOffset;
00023
00024 public:
00025
00027 size_t GetIndexItemOffset() const
00028 {
00029 return m_IndexItemOffset;
00030 };
00032 void SetIndexItemOffset(size_t Value)
00033 {
00034 m_IndexItemOffset = Value;
00035 };
00036
00038 vector<CTokenNo>* GetOccurs()
00039 {
00040 assert (m_pCurrOccurs);
00041 return m_pCurrOccurs;
00042 };
00043
00045 const vector<CTokenNo>* GetOccurs() const
00046 {
00047 assert (m_pCurrOccurs);
00048 return m_pCurrOccurs;
00049 };
00050
00051 size_t GetOccursSize() const
00052 {
00053 return m_pCurrOccurs->size();
00054 };
00056 bool InitOccurs();
00057
00059 void FreeOccurs();
00060
00062 bool WriteOccurrences(FILE* fp) const;
00063
00065 bool CheckOccurrences(CTokenNo EndTokenNo) const;
00066
00068 void ClearOccurrences();
00069
00071 bool ReadFromTemporalFile (FILE* fp);
00072
00074 void WriteToTemporalFile (FILE* fp) const;
00075 };
00076
00077
00078
00079
00080
00082
00096 class CIndexSetForLoadingStage
00097 {
00099 LessIndexString2< CItemIndexForLoading> m_LoadLess2;
00101 LessIndexString1< CItemIndexForLoading> m_LoadLess1;
00102
00103
00105 string m_CurrOccurTempFileName;
00106
00108 virtual string GetName() const = 0;
00109
00111 vector<CItemIndexForLoading > m_MemoryLoadIndexHash[256] ;
00112
00114 vector<CItemIndexForLoading > m_InputLoadIndexHash[256] ;
00116 FILE* m_TempBigramsFile;
00117
00118 deque<string> m_LeftContext;
00119
00121 bool FindIndexItemInVector (const char* Item, vector<CItemIndexForLoading>::iterator& it, vector<CItemIndexForLoading>& V);
00122
00123
00124 bool FindIndexItem (const char* Item, vector<CItemIndexForLoading>::iterator& it);
00125
00126 bool AddToMemoryLoadIndexAndClear(vector<CItemIndexForLoading>& Body, vector<CItemIndexForLoading>& FileIndexSet);
00127 int GetHashNo(const char* Str) const;
00128
00129 protected:
00131 string m_TempBigramsFileName;
00133 FILE* m_TempStorageFile;
00135 string m_TempStorageFileName;
00137 string m_MainOccurTempFileName;
00138
00140 size_t AddItemStrToBuffer(const char* Str, size_t StrLen);
00141
00143 bool IndexOneBigram(const char* Word1, BYTE Word1Len, const char* Word2, BYTE Word2Len,const CTokenNo& TokenOffset, char SecondWordAddress);
00144
00145
00146 public:
00148 bool m_bUseItemStorage;
00149
00151 int m_MaxBigramWindowSize;
00152
00154 int m_BigramBorder;
00155
00157 vector<char> m_StringBuffer;
00158
00159
00160 CIndexSetForLoadingStage ();
00161 ~CIndexSetForLoadingStage ();
00162
00164 bool CreateTempFiles (string Path);
00166 bool DeleteTempFiles();
00168 size_t GetMemoryLoadIndexItemsCount() const;
00170 bool SaveMemoryLoadIndex();
00172 bool AddInputLoadIndexToMemoryLoadIndex();
00174 void SortInputAndMemoryIndices();
00176 bool AddMemoryLoadIndexToMainLoadIndex();
00178 void InsertToInputLoadIndex(const char* Str, size_t StrLen, const vector<CTokenNo>& occurrences);
00180 void CloseTempBigramsFile();
00182 bool UseBigrams() const;
00184 void ProcessBigramBorder(CTokenNo occurrence);
00185 };
00186
00187
00188
00189 #endif