00001 #ifndef StringIndexator_h 00002 #define StringIndexator_h 00003 00004 #include "../ConcordLib/IndexSet.h" 00005 00088 //const char ddc_archive_stub[] = "ddc_archive_stub"; 00089 const char ddc_archive_stub[] = ""; 00090 00095 class CStringIndexator { 00096 00097 protected: 00098 00100 vector<CTokenNo> m_SearchPeriods; 00101 00103 bool RegisterChunkIndex(); 00105 string GetSearchPeriodsFileName() const; 00107 bool DestroyIndices(); 00109 bool ReadIndicesFromTheDisk(); 00111 void ClearStringIndices(); 00113 bool IndexOneToken (const char* Line, const CTokenNo& TokenNo); 00114 00115 public: 00117 string m_Path; 00119 vector<CStringIndexSet*> m_Indices; 00121 size_t m_MaxRegExpExpansionSize; 00123 CStringIndexSet* m_pChunkIndex; 00125 CStringIndexSet* m_pLeftBigramsIndex; 00127 CStringIndexSet* m_pRightBigramsIndex; 00128 00129 00130 CStringIndexator(); 00131 ~CStringIndexator(); 00132 00134 bool RegisterStringIndices(string IndicesStr); 00136 void SetPath(string Path); 00138 string GetIndicesString() const; 00140 CStringIndexSet* GetIndexByNameOrShortName(const string& Name); 00142 size_t GetSearchPeriodsCount() const; 00144 const CTokenNo& GetSearchPeriod(size_t i) const { return m_SearchPeriods[i]; }; 00145 00147 bool StartIndexing(string Path); 00149 bool TerminateIndexing(); 00151 bool FinalSaveAllIndices(bool bAfterLoading); 00153 bool AddInputLoadIndexToMemoryLoadIndex(); 00155 bool AddMemoryLoadIndexToMainLoadIndex(); 00157 bool SaveMemoryLoadIndex(); 00159 CStringIndexSet* GetIndexByName(const string& Name); 00161 CStringIndexSet* GetTokenIndex(); 00163 const CStringIndexSet* GetTokenIndex() const; 00165 void ProcessBigramBorders (const int BreakCollectionNo, CTokenNo occurrence); 00166 }; 00167 00168 00169 00170 00171 00172 #endif