ddc
ConcSession.h
Go to the documentation of this file.
1 //*-*- Mode: C++ -*- */
2 //
3 // DDC originally by Alexey Sokirko
4 // Changes and modifications 2011-2018 by Bryan Jurish
5 //
6 // This file is part of DDC (formerly ConcHolder.h)
7 //
8 // DDC is free software: you can redistribute it and/or modify
9 // it under the terms of the GNU Lesser General Public License as published by
10 // the Free Software Foundation, either version 3 of the License, or
11 // (at your option) any later version.
12 //
13 // DDC is distributed in the hope that it will be useful,
14 // but WITHOUT ANY WARRANTY; without even the implied warranty of
15 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 // GNU Lesser General Public License for more details.
17 //
18 // You should have received a copy of the GNU Lesser General Public License
19 // along with DDC. If not, see <http://www.gnu.org/licenses/>.
20 //
21 #ifndef DDC_CONC_SESSION_H
22 #define DDC_CONC_SESSION_H
23 
24 #include "ConcIndexatorInvoker.h"
25 #include "QueryResult.h"
26 #include "NavHint.h"
27 #include "../CommonLib/ddcRandom.h"
28 
29 //==============================================================================
30 // Typedefs & Forward Declarations
31 
33 typedef bool (*SaveTriggerType) (const string&,DWORD LParam);
34 
35 class CConcSessionContext; //-- forward decl
36 class CQueryCompiler; //-- forward decl
37 class CQuery; //-- forward decl
38 
39 //==============================================================================
40 // Classes & Structs
41 
42 //----------------------------------------------------------------------
55 {
56 public:
57  //------------------------------------------------------
59 
62 
65 
67  size_t m_WorkerId;
68 
71 
74 
77 
80 
84 
85 public:
86  //------------------------------------------------------
88 
90  unsigned int m_RandomSeed;
91 
94 
97 
99  string m_ErrorStr;
100 
107 
113 
117 
122 
125 
128 
131 
133 
134 
135  //------------------------------------------------------
137 
139  void AddFileReference(const long FileNo);
141  void ShowBibliographyForTextOrHtml(const CHit& Hit, DWORD PageNumber);
143  bool ShowBibliographyForTable(DWORD PageNumber, const CHit& Hit, const vector<COutputToken>& Tokens);
145  bool GenerateOneHitString(DWORD PageNumber, const CHit& Hit, const vector<COutputToken>& Tokens);
147  bool GenerateOneHitStringJson(DWORD PageNumber, const CHit& Hit, const vector<COutputToken>& Tokens);
149  bool GetContext(int StartBreakNo, int EndBreakNo, const DWORD CurrFileNo, const bool bConvertASCIIToHtmlSymbols, string& Result) const;
151  bool GetContextJson(int StartBreakNo, int EndBreakNo, const DWORD CurrFileNo, string& js) const;
152 
155  DDCErrorEnum GetAllHits(const string &Query, size_t Start, size_t Limit);
156 
162  DDCErrorEnum GetAllHits(CQuery *QueryRoot, size_t Start, size_t Limit);
163 
165  bool IsUniversalCountQuery(CQuery *QueryRoot) const;
166 
168  bool TryToGetFromCache(const string &Query, DWORD& EndHitNo);
169 
171  void SaveToCache(const string& Query, vector<size_t>::const_iterator start, vector<size_t>::const_iterator end);
172 
174  void SetHitType();
175 
177  bool GetFileSnippets(const int HitNo, vector<COutputToken>& Tokens) const;
179  bool SaveOccurrences(const vector<DWORD>& ChunkLengths, int ContextSize, const vector<CTokenNo>& Occurrences, const vector<CHit>& Hits, SaveTriggerType SaveTrigger,DWORD LParam);
181  bool GetTokensFromStorageByBreak(size_t IndexNo, size_t BreakNo, vector<COutputToken>& Tokens) const;
183  void InitFileReferences(vector<CHit>& Hits) const;
184 
187  void InitSortKeyForHits(const CQuery *pQuery, const CDDCFilterWithBounds& Filter, vector<size_t>& PeriodHitsIndex);
188  void InitSortByRank(const CQuery *pQuery) const;
189  void InitSortBySize(const CQuery *pQuery) const;
190  void InitSortByRandom(const CQuery *pQuery) const;
191  void InitSortByContext(const CQuery *pQuery, const CDDCFilterWithBounds& Filter) const;
192 
200  void SortKeyLB(CHitSortKey &key, const CDDCFilterWithBounds& Filter);
202 
203 public:
204  //------------------------------------------------------
206 
207 
214  CConcSession(CConcSessionContext *SessionContext=NULL);
215 
217  ~CConcSession();
219 
220  //------------------------------------------------------
222 
223 
225  CConcSession* WorkerClone(size_t WorkerId);
226 
228  void WorkerCloneFree();
229 
231  int LockSessionContext();
232 
234  int UnlockSessionContext();
235 
237  void ClearQueryCache();
238 
240  size_t CacheSize(void) const;
242 
243  //------------------------------------------------------
245 
246 
248  const ddcBreakVector& GetBreaks() const;
249 
252 
254  string GetResultFormatStr() const;
255 
257  void SetResultFormat(string ResultTypeStr);
258 
263  DDCErrorEnum GetOccurrences(const string &Query, int ContextSize, SaveTriggerType SaveTrigger,DWORD LParam);
264 
265 
267 
278  DDCErrorEnum SimpleQuery(const string &Query, DWORD& EndHitNo, DWORD& HitsCount);
279 
282  DDCErrorEnum GetHits(const string &QueryStr, DWORD& EndHitNo);
283 
286  DDCErrorEnum GetHits(CQuery *QueryRoot, DWORD& EndHitNo);
287 
291  DDCErrorEnum GetHits(CQuery *QueryRoot, DWORD& EndHitNo, const string &QueryStr);
292 
296  DDCErrorEnum GenerateHitStrings(const int StartHitNo, bool UseAdditionalHitDelimiter=true);
297 
301  DDCErrorEnum GenerateCountStrings(const int StartHitNo, bool UseAdditionalHitDelimiter=true);
302 
304  size_t GetOffsetHint(const size_t StartHitNo) const;
305 
307  string GetSortKeyHint(const size_t StartHitNo) const;
308 
311  string GetHitIds() const;
312 
315  string GetCountIds() const;
316 
319 
321  void SetTimeOut(int TimeOut);
322 
324  void ClearQuery();
325 
327  int GetTextArea() const;
328 
330  void ClearQueryResults();
331 
333  static DDCFormatTypeEnum GetResultFormatByString(const string& ResultTypeStr );
334 
336  static void DecorateQueryResults(const string& ResultTypeStr, string& QueryResultString);
337 
340  bool HasRankOrderOperator() const;
341 
344  bool HasMatchIdOperator() const;
345 
347  int GetBreakStarterLength() const;
348 
350  string BuildJsonContextString (const vector<COutputToken>& Tokens, bool doHighlight=true) const;
351 
353  string CanonicalQueryString(const string &Query);
354 
356  string JsonQueryString(const string &Query);
357 
359  void SetRandomSeed(unsigned int seed1=0) const;
360 
363  { return &(m_pConcordance->m_Txd); };
365 };
366 
367 
368 
369 #endif
370 
371 /*--- emacs style variables ---
372  * Local Variables:
373  * mode: C++
374  * c-file-style: "ellemtel"
375  * c-basic-offset: 4
376  * tab-width: 8
377  * indent-tabs-mode: nil
378  * End:
379  */
void AddFileReference(const long FileNo)
add a reference to FileNo according to m_ResultFormat
Definition: ConcSession.cpp:355
int UnlockSessionContext()
Definition: ConcSession.cpp:335
void ClearQuery()
clears the current parsed query (if any)
Definition: ConcSession.cpp:2376
bool GenerateOneHitStringJson(DWORD PageNumber, const CHit &Hit, const vector< COutputToken > &Tokens)
json: add hit string built by Hit to to m_QueryResultStr
Definition: ConcSession.cpp:874
void InitFileReferences(vector< CHit > &Hits) const
initializes CHit::m_FileNo for each hit of Hits
Definition: ConcSession.cpp:1227
string GetSortKeyHint(const size_t StartHitNo) const
get sortkey-hint appropriate for next page (after GenerateHitStrings()); used by CDDCLeafServer ...
Definition: ConcSession.cpp:1934
DDCFormatTypeEnum
FormatTypeEnum defines the format of output hits:
Definition: ConcCommon.h:468
string CanonicalQueryString(const string &Query)
moo: return a canonical representation of the query string Query (implicitly parses) ...
Definition: ConcSession.cpp:2396
CShortOccurCacheMap m_ShortOccurCaches
a cache for short occurrence lists which is used during iterating through corpus periods and evaluati...
Definition: ConcSession.h:93
CQueryCompiler * m_pQueryCompiler
current query compiler, for compilation & evaluation of input queries.
Definition: ConcSession.h:70
DDCRandom * m_pRandom
pseudo-random number generator
Definition: ConcSession.h:73
bool GetContext(int StartBreakNo, int EndBreakNo, const DWORD CurrFileNo, const bool bConvertASCIIToHtmlSymbols, string &Result) const
add hit strings [StartBreakNo, EndBreakNo) without highlighting to m_QueryResultStr ...
Definition: ConcSession.cpp:631
void InitSortByRandom(const CQuery *pQuery) const
Definition: ConcSession.cpp:1437
void WorkerCloneFree()
Definition: ConcSession.cpp:324
bool m_bSessionMaster
are we acting as a session master? if false, m_pSessionContext will be freed on object destruction; d...
Definition: ConcSession.h:64
DDCErrorEnum SimpleQuery(const string &Query, DWORD &EndHitNo, DWORD &HitsCount)
SimpleQuery finds hits by the given query. EndHitNo is used as an input/output parameter.
Definition: ConcSession.cpp:2123
void SetResultFormat(string ResultTypeStr)
set the current format of hit
Definition: ConcSession.cpp:2304
HitSortOrderEnum
Definition: ConcCommon.h:172
void SetRandomSeed(unsigned int seed1=0) const
moo: set internal random seed to m_RandomSeed+seed1
Definition: ConcSession.cpp:2416
bool SaveOccurrences(const vector< DWORD > &ChunkLengths, int ContextSize, const vector< CTokenNo > &Occurrences, const vector< CHit > &Hits, SaveTriggerType SaveTrigger, DWORD LParam)
saves current found hits using SaveTrigger, this function is only called from GetOccurrences ...
Definition: ConcSession.cpp:2166
size_t m_WorkerId
local worker-thread ID (default=0)
Definition: ConcSession.h:67
string BuildJsonContextString(const vector< COutputToken > &Tokens, bool doHighlight=true) const
moo: build a json context string by parsing delimited token data
Definition: ConcSession.cpp:554
DDCErrorEnum GetAllHits(const string &Query, size_t Start, size_t Limit)
Definition: ConcSession.cpp:1650
bool ShowBibliographyForTable(DWORD PageNumber, const CHit &Hit, const vector< COutputToken > &Tokens)
add bibliographical information about Hit to m_QueryResultStr under TableFormat
Definition: ConcSession.cpp:731
const ddcBreakVector * m_pBreaks
a pointer to the current hits collection
Definition: ConcSession.h:79
string GetHitIds() const
Definition: ConcSession.cpp:1950
void ClearQueryResults()
clears CQueryResult fields, also m_ErrorStr and m_ResultOffset
Definition: ConcSession.cpp:2352
void InitSortBySize(const CQuery *pQuery) const
Definition: ConcSession.cpp:1426
CQueryResult: query results, possibly aggregated by "break".
Definition: QueryResult.h:43
CConcSession(CConcSessionContext *SessionContext=NULL)
Definition: ConcSession.cpp:252
string GetResultFormatStr() const
return string representation of m_ResultFormat
Definition: ConcSession.cpp:2309
CConcIndexatorInvoker is a class for invoking an index process.
Definition: ConcIndexatorInvoker.h:21
common session data, sharable by multiple worker threads extracted from ddc-2.0.x CConcHolder; now C...
Definition: ConcSessionContext.h:32
Definition: Concordance.h:126
void SortKeyLB(CHitSortKey &key, const CDDCFilterWithBounds &Filter)
Definition: ConcSession.cpp:1596
string m_RequestPath
full request path leading to this session (used by CDDCLeafServer)
Definition: ConcSession.h:124
bool(* SaveTriggerType)(const string &, DWORD LParam)
save trigger for ConcordPattern application
Definition: ConcSession.h:33
size_t m_ResultLimit
Definition: ConcSession.h:116
map< string, CShortOccurCache > CShortOccurCacheMap
a type for index string to its occurrences
Definition: ConcCommon.h:477
class for a single (thread-local) DDC query session; formerly CConcHolder An instance of CConcSessio...
Definition: ConcSession.h:54
DDCFormatTypeEnum GetResultFormat() const
return the current format of hit
Definition: ConcSession.h:251
Definition: ConcCommon.h:263
void SetHitType()
sets hit type, initializing m_pBreaks
Definition: ConcSession.cpp:2332
HitSortOrderEnum HitSortOrder() const
get logical hit sort order; replaces HitsShouldBeSorted()
Definition: ConcSession.cpp:2360
TxDispatcher * GetTxDispatcher() const
moo: get term-expansion dispatcher for this object (wrapper for &m_pConcordance.m_Txd) ...
Definition: ConcSession.h:362
CConcSession * WorkerClone(size_t WorkerId)
Definition: ConcSession.cpp:290
time_t m_QueryEndTime
how much time a query can be processed, by default unlimited (-1)
Definition: ConcSession.h:76
size_t CacheSize(void) const
Definition: ConcSession.cpp:346
CConcSessionContext * m_pSessionContext
shared session data (cache, etc.)
Definition: ConcSession.h:61
void SetTimeOut(int TimeOut)
sets timeout for query processing
Definition: ConcSession.cpp:2365
DDCErrorEnum
Definition: DDCInternalError.h:24
Definition: QueryCompiler.h:50
DDCErrorEnum GenerateCountStrings(const int StartHitNo, bool UseAdditionalHitDelimiter=true)
Definition: ConcSession.cpp:1174
string GetCountIds() const
Definition: ConcSession.cpp:1970
bool GetTokensFromStorageByBreak(size_t IndexNo, size_t BreakNo, vector< COutputToken > &Tokens) const
initializes Tokens with words of hit BreakNo
Definition: ConcSession.cpp:985
DDCErrorEnum GetHits(const string &QueryStr, DWORD &EndHitNo)
Definition: ConcSession.cpp:1996
int GetTextArea() const
return the text area to be be searched
Definition: ConcSession.cpp:2345
Definition: ConcCommon.h:438
int LockSessionContext()
Definition: ConcSession.cpp:330
Definition: ConcCommon.h:318
bool GenerateOneHitString(DWORD PageNumber, const CHit &Hit, const vector< COutputToken > &Tokens)
add hit string built by Hit to to m_QueryResultStr
Definition: ConcSession.cpp:785
bool HasRankOrderOperator() const
Definition: ConcSession.cpp:2381
void InitSortKeyForHits(const CQuery *pQuery, const CDDCFilterWithBounds &Filter, vector< size_t > &PeriodHitsIndex)
Definition: ConcSession.cpp:1535
string m_ResultMinKey
Definition: ConcSession.h:121
bool TryToGetFromCache(const string &Query, DWORD &EndHitNo)
checks if Query si already in the cache, and if true, it returns its hit results from the cache ...
Definition: ConcSession.cpp:1984
string m_ErrorStr
most recent error message (if applicable)
Definition: ConcSession.h:99
void ClearQueryCache()
Definition: ConcSession.cpp:340
bool HasMatchIdOperator() const
Definition: ConcSession.cpp:2391
void ShowBibliographyForTextOrHtml(const CHit &Hit, DWORD PageNumber)
add bibliographical information about Hit to m_QueryResultStr
Definition: ConcSession.cpp:371
DDCErrorEnum GenerateHitStrings(const int StartHitNo, bool UseAdditionalHitDelimiter=true)
Definition: ConcSession.cpp:1081
DDCErrorEnum GetOccurrences(const string &Query, int ContextSize, SaveTriggerType SaveTrigger, DWORD LParam)
Finds all occurrences of Query(only occurrences , not hits!), if Query is an atomic query (CQueryNode...
Definition: ConcSession.cpp:2247
size_t m_ResultOffset
Definition: ConcSession.h:112
static void DecorateQueryResults(const string &ResultTypeStr, string &QueryResultString)
adds header and footer to QueryResultStr according to format ResultTypeStr
Definition: ConcSession.cpp:1208
thread-safe pseudo-random number stream using drand48() & friends
Definition: ddcRandom.h:38
void InitSortByContext(const CQuery *pQuery, const CDDCFilterWithBounds &Filter) const
Definition: ConcSession.cpp:1448
uint32_t DWORD
Definition: utilit.h:105
size_t m_CurrentSearchPeriodNo
The index of the current subcorpora, which is now being processed.
Definition: ConcSession.h:127
string m_QueryResultStr
the result of the query (its format depends upon m_ResultFormat)
Definition: ConcSession.h:96
DDCFormatTypeEnum m_ResultFormat
the format of query result
Definition: ConcSession.h:82
bool GetContextJson(int StartBreakNo, int EndBreakNo, const DWORD CurrFileNo, string &js) const
append json hit strings [StartBreakNo, EndBreakNo) without highlighting to js
Definition: ConcSession.cpp:673
const ddcBreakVector & GetBreaks() const
GetBreaks returns the vector of current breaks (by m_pBreaks).
Definition: ConcSession.cpp:2326
static DDCFormatTypeEnum GetResultFormatByString(const string &ResultTypeStr)
converts a string to a FormatTypeEnum
Definition: ConcSession.cpp:2293
int GetBreakStarterLength() const
return the length of break prefix, where DDC should search (#within[sentence, 10]) ...
size_t GetOffsetHint(const size_t StartHitNo) const
get offset-hint appropriate for next page (after GenerateHitStrings()); used by CDDCLeafServer ...
Definition: ConcSession.cpp:1929
string m_AdditionalHitDelimiter
a delimiter which should be used between hits in m_QueryResultStr in the distributed model ...
Definition: ConcSession.h:130
TxDispatcher: name-based expansion dispatcher.
Definition: TermExpander.h:325
~CConcSession()
Definition: ConcSession.cpp:278
string JsonQueryString(const string &Query)
moo: return a JSONr epresentation of the query string Query (implicitly parses)
Definition: ConcSession.cpp:2406
bool IsUniversalCountQuery(CQuery *QueryRoot) const
check if CountQuery is a count(*) query suitable for use with GetUniversalCounts() ...
Definition: ConcSession.cpp:1924
Truly abstract (index-independent) representation of a parsed query Should eventually replace old ad...
Definition: Query.h:36
unsigned int m_RandomSeed
initial random-state components for m_pRandom
Definition: ConcSession.h:90
void SaveToCache(const string &Query, vector< size_t >::const_iterator start, vector< size_t >::const_iterator end)
stores Query to the cache
Definition: ConcSession.cpp:1989
TxDispatcher m_Txd
term expansion dispatcher; should define at least an entry for "default"
Definition: Concordance.h:299
void InitSortByRank(const CQuery *pQuery) const
Definition: ConcSession.cpp:1349
bool GetFileSnippets(const int HitNo, vector< COutputToken > &Tokens) const
creates snippets, concatenating contexts of found words
Definition: ConcSession.cpp:1001
CConcordance * m_pConcordance
m_pConcordance is the main (and the only) pointer to corpus indices and break collections. During the querying this pointer is used as a constant. Class CConcSession&#39;s original name "CConcHolder" was chosen because the class "holds" this pointer.
Definition: ConcSession.h:106