ddc
BranchServer.h
Go to the documentation of this file.
1 //-*- Mode: C++ -*-
2 //
3 // DDC originally by Alexey Sokirko
4 // Changes and modifications 2011-2020 by Bryan Jurish
5 //
6 // This file is part of DDC.
7 //
8 // DDC is free software: you can redistribute it and/or modify
9 // it under the terms of the GNU Lesser General Public License as published by
10 // the Free Software Foundation, either version 3 of the License, or
11 // (at your option) any later version.
12 //
13 // DDC is distributed in the hope that it will be useful,
14 // but WITHOUT ANY WARRANTY; without even the implied warranty of
15 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 // GNU Lesser General Public License for more details.
17 //
18 // You should have received a copy of the GNU Lesser General Public License
19 // along with DDC. If not, see <http://www.gnu.org/licenses/>.
20 //
21 #ifndef DDC_BRANCH_SERVER_H
22 #define DDC_BRANCH_SERVER_H
23 
24 #include "../ConcordDmnLib/Server.h"
25 #include "../ConcordLib/QueryCompiler.h"
26 
27 //======================================================================
29 struct CHitToSort
30 {
32  size_t m_HitNo;
33  string m_SortKeyStr;
34 
35  CHitToSort(BYTE HostNo=0)
36  : m_HostNo(HostNo)
37  {};
38 
39  inline bool operator < (const CHitToSort& X) const {
40  return m_SortKeyStr < X.m_SortKeyStr;
41  };
42 
44  inline void steal(CHitToSort &X)
45  {
46  m_HostNo = X.m_HostNo;
47  m_HitNo = X.m_HitNo;
48  m_SortKeyStr.swap(X.m_SortKeyStr);
49  };
50 
52  inline string debugStr(void) const
53  {
54  return Format("{HostNo=%d,HitNo=%zd,SortKey=%s}", m_HostNo, m_HitNo, jsonStr(m_SortKeyStr).c_str());
55  };
56 };
57 
58 //==============================================================================
60 struct LessByHitNo {
61  bool operator() (const CHitToSort &a, const CHitToSort &b) const
62  { return a.m_HitNo < b.m_HitNo; };
63 };
64 
65 
66 //======================================================================
78  HitSortOrderEnum m_SortOrder; //< hit sort order; formerly bool m_bSort
80  string m_ErrorStr;
81  vector<CHitToSort> m_Hits;
82  vector<size_t> m_DtrEnds; //< EndHitNo components scanned from each immediate daughter subcorpus
83  vector<SOCKET> m_Sockets;
84  size_t m_Offset; //< logical offset of m_Hits[0] (sum over number of front-trimmed hits for all subcorpora)
85 
88  {
89  CloseSockets(); //-- fix mantis bug #718?
90  };
91 
93  void Clear();
94 
96  inline bool HasErrors() const
97  {
98  return (m_NetworkError != neSuccess ) || (m_InternalError != errNone);
99  };
100 
102  void CloseSockets();
103 
105  void CollectCounts();
106 
108  void PruneHits(size_t PruneLimit);
109 
116  size_t HostHitsLimit(const vector<size_t>& HostHitIds);
117 
119  inline size_t PhysicalOffset(size_t LogicalHitNo) const
120  { return LogicalHitNo - m_Offset; };
121 
123  inline size_t LogicalOffset(size_t PhysicalHitNo) const
124  { return PhysicalHitNo + m_Offset; };
125 
127  inline size_t LogicalMax(size_t LogicalHitNo) const
128  { return min(LogicalHitNo, LogicalOffset(m_Hits.size())); };
129 
131  inline size_t PhysicalMax(size_t LogicalHitNo) const
132  { return PhysicalOffset(LogicalMax(LogicalHitNo)); };
133 
135  inline size_t LogicalSize(size_t StartHitNo, size_t ResultLimit) const
136  { return LogicalMax(StartHitNo+ResultLimit) - LogicalMax(StartHitNo); };
137 
138 };
139 
140 //======================================================================
143 {
146 };
147 
148 //======================================================================
149 
156 {
157  typedef vector<DWORD> VecT;
158  typedef map<DWORD, set<string> > MapT;
159 
160  VecT m_vec;
161  MapT m_map;
162 
164  {};
165 
167  {};
168 
169  inline size_t size() const
170  { return m_vec.size(); };
171 
172  inline void resize(size_t sz)
173  { m_vec.resize(sz); };
174 
175  void Clear();
176 
178  void Insert(DWORD HostId, const string& SubPath);
179 
181  inline void Insert(DWORD HostId)
182  { Insert(HostId,"*"); };
183 
185  void InsertAll(DWORD nDtrs, const string &SubPath);
186 
188  string DtrPaths(DWORD HostId) const;
189 
191  string toString() const;
192 };
193 
194 
195 //======================================================================
198 public:
199  string sQuery;
200  string sResultFormat;
201  size_t StartHitNo;
202  size_t ResultLimit;
203  int TimeOut;
206  size_t EndHitNo;
207  size_t HitsCount;
209  //DwordVector PossibleHosts; ///< indices of selected immediate subcorpora
211  string HitDistrib;
212  string DocDistrib;
213  string HitsKey;
214  string ResponseBody;
215  string *pErrorStr;
216  string ReqPath;
220 
221 public:
223  CRunQueryData(const string & _Query="", const string& _ResultFormat="JSON", size_t _Start=0, size_t _Limit=10, int _TimeOut=10, string *_pErrorStr=NULL);
224 
226  virtual ~CRunQueryData();
227 
229  void Init(const string & _Query="", const string& _ResultFormat="JSON", size_t _Start=0, size_t _Limit=10, int _TimeOut=10, string *_pErrorStr=NULL, const string& _ReqPath="", const NavHint& _Hint=NavHint());
230 
232  void Clear();
233 
235  inline CQuery* Query() const
236  { return pQCompiler->m_pQuery; };
237 
239  inline CQueryOptions* QueryOptions() const
240  { return Query()->m_Options; };
241 
243  inline bool IsPruneQuery() const
244  { return QueryOptions()->IsPruneFilter(); };
245 
247  void toString(string &Response, size_t WorkerId=0, const DDCVersionT& vCompat=DDCVersion);
248 
249 };
250 
251 //======================================================================
253 typedef map<string,size_t> CHostIdMap;
254 
255 //======================================================================
258 {
259 public:
260 
261 public:
262  //--------------------------------------------------------------
263  // local data
264  map<string, CQueryStatistics> m_QueryToStatis; //-- unused?
265  vector<CHost> m_Hosts;
266  CHostIdMap m_HostMap;
268  string m_ErrorStr; //-- subcorpus error
269 
270  //-- low-level data
271  CFirstHitsQueryResult m_FHResult; //< first-hits result
272  CRunQueryData m_QueryData; //< run-query data
273  NavHintCache *m_pNavCache; //< navigation hint cache (shared over multiple worker threads)
274  size_t m_NavCacheStep; //-- minimum offset increment for update of navigation hint cache
275 
277 
278 public:
279  //--------------------------------------------------------------
280  // Constructors etc.
281 
285  CDDCBranchServer(bool master=true);
286 
288  virtual ~CDDCBranchServer();
289 
291  virtual void Shutdown();
292 
294  virtual void SetOptions(const CDDCServerOptions &Options);
295 
296 public:
297  //--------------------------------------------------------------
298  // DDCServer API: client threads
299 
303  virtual CDDCServer *WorkerClone(void) const
304  { return (CDDCServer*)new CDDCBranchServer(false); };
305 
307  virtual void WorkerCloneInit(const CDDCServer *Parent, size_t WorkerId);
308 
310  virtual void WorkerCloneFree(void);
311 
312 public:
313  //--------------------------------------------------------------
314  // CHost API: overrides
316  virtual bool Close(SOCKET& rConnectedSocket, bool linger=false, int timeout=5);
317 
319  virtual bool CloseDtr(SOCKET& rConnectedSocket, bool linger=false, int timeout=5);
320 
321 public:
322  //--------------------------------------------------------------
323  // DDCServer API: request handlers: required
324 
325  /* request handler: get_first_hits: required (leaf) */
326  virtual void handle__get_first_hits(string &Result, const char *Query, int TimeOut, size_t ResultLimit, const string& ReqPath, const NavHint& Hint);
327 
328  /* request handler: get_hit_strings: required (leaf) */
329  virtual void handle__get_hit_strings(string &Result, const char *ResultType, size_t StartHitNo, size_t ResultLimit);
330 
331  /* request handler: run_query: required (branch) */
332  virtual void handle__run_query(string &Result, const char *CorpusName, const char *Query, const char *ResultType, size_t StartHitNo, size_t ResultLimit, int TimeOut, const string& ReqPath, const NavHint& Hint);
333 
335  virtual void handle__expand_terms(string &Result, const string& pipeline, const string& terms, int timeout, const string &corpus);
336 
338  virtual void handle__status(string &Result, int timeout=10, bool verbose=false);
339 
341  virtual void handle__info(string &Result, int timeout=10, bool verbose=false);
342 
344  virtual void handle__reload(string &Result, int depth=0);
345 
347  virtual void handle__clear_cache(string &Result, int depth=0);
348 
350  virtual void handle__nodes(string &Result, int depth=-1);
351 
353  virtual void handle__close_socket(string &Result, SOCKET &rConnectedSocket);
354 
355 public:
356  //--------------------------------------------------------------
357  // local methods
358 
360  void InitHostMap();
361 
363  void GetSubcorpusJsonStrings(vector<string>& Results, const string &request, int timeout=10);
364 
366  string GetSubcorpusJsonArray(const string &request, int timeout=10);
367 
369  void RunDistributed();
370 
372  NetworkErrorsEnum RunGetFirstHits();
373 
375  void CheckFirstHitsResult(NetworkErrorsEnum networkError=neSuccess);
376 
378  NetworkErrorsEnum RunGetHitStrings();
379 
381  NetworkErrorsEnum RunDistributed(string Query,
382  const string& ResultType,
383  size_t StartHitNo,
384  size_t ResultLimit,
385  int& iInternalError,
386  size_t& EndHitNo,
387  size_t& HitsCount,
388  int TimeOut,
389  string& ResultString,
390  size_t& RelevantDocsCount,
391  string& HitDistrib,
392  string& DocDistrib,
393  string& HitsKey);
394 
396  void UpdateNavCache();
397 
401  NetworkErrorsEnum GetHitContexts(CFirstHitsQueryResult& FHResult,
402  const string& ResultType,
403  DDCFormatTypeEnum ResultTypeE,
404  CSelectedHosts& SelectedHosts,
405  int& InternalError,
406  int TimeOut,
407  string& ResultString
408  );
409 
411  NetworkErrorsEnum GetHitCounts(CFirstHitsQueryResult& FHResult, DDCFormatTypeEnum ResultTypeE, string& ResultString);
412 
414  void GetFirstHitsFromCorpora(const CSelectedHosts& SelectedHosts,
415  const string Query,
416  const size_t StartHitNo,
417  const size_t ResultLimit,
418  const int TimeOut,
419  CFirstHitsQueryResult& Result);
420 
422  NetworkErrorsEnum SendGetHitStringsRequest(BYTE SelectedHostNo,
423  const vector< vector<size_t> >& SortedHits,
424  const vector<size_t>& FirstOffsets,
425  const string& ResultType);
426 
428  NetworkErrorsEnum GetHitStringsFromOneCorpus(CHost& Host,
429  const string& ResultType,
430  size_t StartHitNo,
431  size_t ResultLimit,
432  int TimeOut,
433  SOCKET& CorpusSocket,
434  int& InternalError,
435  string& Result,
436  string& ResultHint);
437 
443  bool ReadSelectedHosts(string& CleanQuery);
444 
448  size_t FindHostID(const string& CorpusName) const;
449 
453  CHost* FindHost(const string& CorpusName);
454 
456  void AlignCorpusPath(const string& UserPath, string& Dtr, string& Rest);
457 };
458 
461 
462 #endif /* DDC_BRANCH_SERVER_H */
463 
464 /*--- emacs style variables ---
465  * Local Variables:
466  * mode: C++
467  * c-file-style: "ellemtel"
468  * c-basic-offset: 4
469  * tab-width: 8
470  * indent-tabs-mode: nil
471  * End:
472  */
CSelectedHosts SelectedHosts
maps immediate subcorpora to selected sub-subcorpora
Definition: BranchServer.h:210
DwordVector m_HitsEnds
Definition: BranchServer.h:144
vector< DWORD > DwordVector
Definition: utilit.h:148
string sResultFormat
result format (string)
Definition: BranchServer.h:200
CDDCBranchServer CDDCServerListenHost
Definition: BranchServer.h:460
string m_QueryResultString
Definition: BranchServer.h:267
Definition: BranchServer.h:197
~CSelectedHosts()
Definition: BranchServer.h:166
DDCFormatTypeEnum
FormatTypeEnum defines the format of output hits:
Definition: ConcCommon.h:468
navigation hint-key for branch server get_first_hits() "paging"
Definition: NavHint.h:57
HitSortOrderEnum m_SortOrder
Definition: BranchServer.h:78
string m_ErrorStr
Definition: BranchServer.h:268
void steal(CHitToSort &X)
Definition: BranchServer.h:44
void resize(size_t sz)
Definition: BranchServer.h:172
string Format(const char *format,...)
Definition: ddcString.cpp:393
size_t StartHitNo
first hit to return
Definition: BranchServer.h:201
size_t LogicalOffset(size_t PhysicalHitNo) const
Definition: BranchServer.h:123
Definition: BranchServer.h:155
vector< size_t > m_DtrEnds
Definition: BranchServer.h:82
#define SOCKET
Definition: string_socket.h:66
string HitsKey
key for response body data (json only)
Definition: BranchServer.h:213
string DocDistrib
?
Definition: BranchServer.h:212
NetworkErrorsEnum
Definition: string_socket.h:45
CQueryCompiler * pQCompiler
query compiler for parsing input queries (parsed query is pQCompiler->m_pQuery)
Definition: BranchServer.h:219
string ResponseBody
response body data
Definition: BranchServer.h:214
size_t m_HitNo
subcorpus-local hit-number for context queries OR hit count for count-queries
Definition: BranchServer.h:32
bool operator<(const CHitToSort &X) const
Definition: BranchServer.h:39
Definition: BranchServer.h:257
size_t m_AllRelevantDocsCount
Definition: BranchServer.h:74
NavHintKey navKey
navigation hint key
Definition: BranchServer.h:217
CQueryOptions * QueryOptions() const
Definition: BranchServer.h:239
size_t LogicalMax(size_t LogicalHitNo) const
Definition: BranchServer.h:127
CHitToSort(BYTE HostNo=0)
Definition: BranchServer.h:35
string * pErrorStr
error string pointer (empty or NULL for success)
Definition: BranchServer.h:215
vector< CHitToSort > m_Hits
Definition: BranchServer.h:81
MapT m_map
sub-paths relative to each selected daugther, keyed by Host-ID
Definition: BranchServer.h:161
CFirstHitsQueryResult m_FHResult
Definition: BranchServer.h:271
HitSortOrderEnum
Definition: ConcCommon.h:172
Definition: string_socket.h:46
vector< CHost > m_Hosts
Definition: BranchServer.h:265
map< DWORD, set< string > > MapT
Definition: BranchServer.h:158
size_t LogicalSize(size_t StartHitNo, size_t ResultLimit) const
Definition: BranchServer.h:135
NetworkErrorsEnum m_NetworkError
Definition: BranchServer.h:79
string jsonStr(ddcObject *o)
json strinfication wrapper
Definition: ddcObject.h:208
const DDCVersionT DDCVersion(2, 2, 8)
CHostIdMap m_HostMap
maps short daughter labels (without "server:" prefix) to m_Hosts[] offsets
Definition: BranchServer.h:266
map< string, size_t > CHostIdMap
Definition: BranchServer.h:253
string m_SortKeyStr
hit sort-string
Definition: BranchServer.h:33
~CFirstHitsQueryResult()
Definition: BranchServer.h:87
vector< int > m_Sockets
Definition: BranchServer.h:83
CQuery * m_pQuery
root of the abstract parse tree
Definition: QueryCompiler.h:67
BYTE m_HostNo
subcorpus id
Definition: BranchServer.h:31
Definition: BranchServer.h:60
generic socket host wrapper class irritatingly also used for client connections
Definition: CHost.h:31
parsed get_first_hits response
Definition: BranchServer.h:72
NavHintCache * m_pNavCache
Definition: BranchServer.h:273
bool HasErrors() const
Definition: BranchServer.h:96
string m_RelevantDocsDistributionStr
Definition: BranchServer.h:76
root-level query options (filters and flags)
Definition: QueryOptions.h:39
void Insert(DWORD HostId)
Definition: BranchServer.h:181
CQuery * Query() const
Definition: BranchServer.h:235
DWORD m_FreqCount
Definition: BranchServer.h:145
Definition: ServerOptions.h:30
Definition: Server.h:46
vector< DWORD > VecT
Definition: BranchServer.h:157
size_t EndHitNo
final hit number returned
Definition: BranchServer.h:206
navigation hint with respect to local subcorpus tree, for get_first_hits "paging" ...
Definition: NavHint.h:115
size_t size() const
Definition: BranchServer.h:169
int iInternalError
internal error code
Definition: BranchServer.h:204
Definition: BranchServer.h:29
Definition: BranchServer.h:142
Definition: QueryCompiler.h:50
size_t ResultLimit
max number of hits to return
Definition: BranchServer.h:202
string m_ErrorStr
Definition: BranchServer.h:80
unsigned char BYTE
Definition: utilit.h:94
int m_InternalError
Definition: BranchServer.h:77
CRunQueryData m_QueryData
Definition: BranchServer.h:272
string HitDistrib
?
Definition: BranchServer.h:211
LRU cache for faster "paging" through daughter hits for a branch server.
Definition: NavHint.h:215
string ReqPath
request path (>=v2.2.8)
Definition: BranchServer.h:216
size_t m_Offset
Definition: BranchServer.h:84
VecT m_vec
Host-IDs of selected daughter(s); empty for all daughters.
Definition: BranchServer.h:160
bool IsPruneQuery() const
Definition: BranchServer.h:243
Definition: DDCInternalError.h:26
bool m_bHitStringsParallel
send get_hit_strings requests in parallel?
Definition: BranchServer.h:276
string debugStr(void) const
Definition: BranchServer.h:52
int iNetworkError
network error code
Definition: BranchServer.h:205
string m_HitsDistributionStr
Definition: BranchServer.h:75
virtual CDDCServer * WorkerClone(void) const
Definition: BranchServer.h:303
size_t RelevantDocsCount
?
Definition: BranchServer.h:208
map< string, CQueryStatistics > m_QueryToStatis
Definition: BranchServer.h:264
size_t m_AllHitsCount
Definition: BranchServer.h:73
size_t m_NavCacheStep
Definition: BranchServer.h:274
Definition: ddcVersion.h:12
uint32_t DWORD
Definition: utilit.h:105
CSelectedHosts()
Definition: BranchServer.h:163
int TimeOut
query timeout (seconds)
Definition: BranchServer.h:203
string sQuery
query string
Definition: BranchServer.h:199
size_t HitsCount
total number of hits
Definition: BranchServer.h:207
NavHint navHint
user- or cache-supplied navigation hint
Definition: BranchServer.h:218
size_t PhysicalMax(size_t LogicalHitNo) const
Definition: BranchServer.h:131
Truly abstract (index-independent) representation of a parsed query Should eventually replace old ad...
Definition: Query.h:36
size_t PhysicalOffset(size_t LogicalHitNo) const
Definition: BranchServer.h:119