Go to the documentation of this file.00001 #ifndef IndexSetForQueryingStage_h
00002 #define IndexSetForQueryingStage_h
00003
00004
00005 #include "../common/bserialize.h"
00007
00008
00009 const DWORD TheOnlyOccurIsInEndOccurNo = 1<<31;
00010 const DWORD EndOfSentenceForBigrams = 1<<30;
00011 const DWORD AllFlags = TheOnlyOccurIsInEndOccurNo | EndOfSentenceForBigrams;
00012
00018 class CIndexItem
00019 {
00020
00021 DWORD m_IndexItemOffsetAndFlags;
00022
00023
00024 DWORD m_EndOccurOffset;
00025
00026 public:
00028 DWORD GetIndexItemOffsetAndFlags() const
00029 {
00030 return m_IndexItemOffsetAndFlags;
00031 };
00033 void SetIndexItemOffsetAndFlags(DWORD Value)
00034 {
00035 m_IndexItemOffsetAndFlags = Value;
00036 };
00037
00038 static size_t GetMaximalNumberOfRunningTokens()
00039 {
00040 return ~0 & ~AllFlags;
00041 };
00042
00044 DWORD GetIndexItemOffset() const
00045 {
00046 return m_IndexItemOffsetAndFlags & ~AllFlags;
00047 };
00048
00050 void SetIndexItemOffset(DWORD Value)
00051 {
00052 m_IndexItemOffsetAndFlags &= AllFlags;
00053 m_IndexItemOffsetAndFlags |= Value & ~AllFlags;
00054 };
00056 DWORD GetItemIndexFlags() const
00057 {
00058 return m_IndexItemOffsetAndFlags & AllFlags;
00059 };
00061 void AddItemIndexFlags(DWORD Value)
00062 {
00063 m_IndexItemOffsetAndFlags |= Value & AllFlags;
00064 };
00066 DWORD GetEndOccurOffset() const
00067 {
00068 return m_EndOccurOffset;
00069 };
00071 void SetEndOccurOffset(DWORD EndOccurOffset)
00072 {
00073 m_EndOccurOffset = EndOccurOffset;
00074 };
00075 bool HasOneOccurrence () const
00076 {
00077 return (m_IndexItemOffsetAndFlags & TheOnlyOccurIsInEndOccurNo) != 0;
00078 }
00079 };
00080
00081 inline size_t get_size_in_bytes (const CIndexItem& t)
00082 {
00083 return 8;
00084 };
00085
00086 inline size_t save_to_bytes(const CIndexItem& i, BYTE* buf)
00087 {
00088 buf += save_to_bytes(i.GetIndexItemOffsetAndFlags(), buf);
00089 buf += save_to_bytes(i.GetEndOccurOffset(), buf);
00090 return get_size_in_bytes(i);
00091 }
00092
00093 inline size_t restore_from_bytes(CIndexItem& i, const BYTE* buf)
00094 {
00095 DWORD d;
00096
00097 buf += restore_from_bytes(d, buf);
00098 i.SetIndexItemOffsetAndFlags(d);
00099
00100 buf += restore_from_bytes(d, buf);
00101 i.SetEndOccurOffset(d);
00102
00103 return get_size_in_bytes(i);
00104 }
00105
00106
00107 class CStringIndexator;
00108 class CItemIndexForLoading;
00109
00113 class CIndexSetForQueryingStage
00114 {
00116 FILE* m_OccursFp;
00117
00118
00120 void CloseOccursFile();
00122 bool LoadPeriodDevision();
00123
00124
00125 protected:
00126
00128 void AssertHasPath() const;
00130 void AddOccurs (size_t IndexItemNo, const bool bOneOccurrence, const size_t StartOccurNo, const size_t EndOccurNo, vector<CTokenNo>& Occurs, size_t PeriodNo, COccurrBuffer& OccursBuffer, CShortOccurCache* pCacheByIndexSet, int& CacheId) const;
00132 string GetOccursFileName() const;
00134 string GetOccHdrFileName() const;
00135
00136
00138 string GetPeriodsDevisionFileName () const;
00140 string GetFileNameForInfos() const;
00142 file_off_t GetOccurrsFileSize() const;
00144 size_t GetStartOccurNo(size_t IndexNo) const;
00146 bool BuildPeriodsDivisionAndCompress (const DWORD TokenId, vector<CTokenNo>& InputTokens);
00148 bool AddOneIndexItem (CItemIndexForLoading& M, FILE* res_fp, size_t& CurrPositionInResFile, const CTokenNo EndTokeNo);
00150 bool WritePeriodsDivision ();
00151
00152
00153 public:
00155 vector<CIndexItem> m_Index;
00156
00158 PeriodsDivisionMap m_EndPeriodOffsets;
00159
00161 const CStringIndexator* m_pParent;
00162
00164 bool m_bCompressOccurrences;
00165
00166
00167 CIndexSetForQueryingStage(const CStringIndexator* pParent);
00168 ~CIndexSetForQueryingStage();
00170 virtual string GetName() const = 0;
00172 bool LoadIndexSet(bool bLoadHeaderOfOccurrences = true);
00174 bool DestroyIndexSet ();
00176 void ReadAllOccurrences (size_t IndexItemNo, vector<CTokenNo>& Occurs) const;
00177
00178 };
00179
00180
00181
00182 #endif