ddc
QCount.h
Go to the documentation of this file.
1 //-*- Mode: C++ -*-
2 // DDC originally by Alexey Sokirko
3 // Changes and modifications 2011-2020 by Bryan Jurish
4 //
5 // This file is part of DDC.
6 //
7 // DDC is free software: you can redistribute it and/or modify
8 // it under the terms of the GNU Lesser General Public License as published by
9 // the Free Software Foundation, either version 3 of the License, or
10 // (at your option) any later version.
11 //
12 // DDC is distributed in the hope that it will be useful,
13 // but WITHOUT ANY WARRANTY; without even the implied warranty of
14 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 // GNU Lesser General Public License for more details.
16 //
17 // You should have received a copy of the GNU Lesser General Public License
18 // along with DDC. If not, see <http://www.gnu.org/licenses/>.
19 //
20 
21 #ifndef DDC_QCOUNT_H
22 #define DDC_QCOUNT_H
23 
24 #include "../ConcordLib/Query.h"
25 
26 //======================================================================
27 //typedef vector<string> CQCountByList;
28 
30 const size_t DDCDefaultCountSampleSize = (size_t)-1;
31 
32 // workaround for "error: overriding ‘virtual std::invalid_argument::~invalid_argument() throw ()’" under g++-5.4.0 / ubuntu 16.04.6
33 #ifndef _GLIBCXX_USE_NOEXCEPT
34 # define _GLIBCXX_USE_NOEXCEPT
35 #endif
36 
38 class CQCountKeyBadOccurrence : public invalid_argument
39 {
40 public:
41  CQCountKeyBadOccurrence(const string& what_arg)
42  : invalid_argument(what_arg)
43  {};
45 };
46 
47 //==============================================================================
48 // Hit sizes
49 
50 //==============================================================================
51 // CQCountKeyExpr and subclasses
52 
53 //----------------------------------------------------------------------
55 class CQCountKeyExpr : public CQuery
56 {
57 public:
59  CQCountKeyExpr(const string& label="")
60  : CQuery(label)
61  {};
62 
64  //virtual void Clear(bool deep=true);
65 
67  virtual ~CQCountKeyExpr()
68  {};
69 
71  virtual bool CanCountByFile() const { return true; };
72 
74  virtual bool Compile(CQueryCompiler *compiler);
75 
77  virtual string& Evaluate(const CHit &Hit);
78 
80  virtual string toString() { return m_Label; };
81 
83  string jsonClass() { return "CQCountKeyExpr"; };
84 
86  string jsonData() { return "\"label\":" + jsonStr(m_Label); };
87 };
88 
89 //----------------------------------------------------------------------
92 public:
93  CQCountKeyExprConstant(const string &label="*") : CQCountKeyExpr(label) {};
95  virtual string& Evaluate(const CHit &Hit) { return m_Label; };
96  virtual string jsonClass() { return "CQCountKeyExprConstant"; };
97  virtual string toString() { return string("@'") + escapeChars(m_Label,"\\'") + "'"; };
98 };
99 
100 //----------------------------------------------------------------------
103 public:
106 
108  string m_CurKey;
109 
110 public:
111  CQCountKeyExprMeta(const string &label="*") : CQCountKeyExpr(label), m_CurFile(-1), m_CurKey("") {};
112  virtual ~CQCountKeyExprMeta() {};
113 };
114 
115 //----------------------------------------------------------------------
118 private:
119  char buf[16];
120 public:
121  CQCountKeyExprFileId(const string &label="fileid") : CQCountKeyExprMeta(label) {};
122  virtual ~CQCountKeyExprFileId() {};
123  virtual string& Evaluate(const CHit &Hit);
124  virtual string jsonClass() { return "CQCountKeyExprFileId"; };
125 };
126 
127 //----------------------------------------------------------------------
130 public:
132 public:
133  CQCountKeyExprIndexed(const string &label="file") : CQCountKeyExprMeta(label) {};
135  virtual bool Compile(CQueryCompiler *compiler);
136  virtual string jsonClass() { return "CQCountKeyExprIndexed"; };
137 };
138 
139 //----------------------------------------------------------------------
142 public:
143  CQCountKeyExprFileName(const string &label="filename") : CQCountKeyExprIndexed(label) {};
145  virtual string& Evaluate(const CHit &Hit);
146  virtual string jsonClass() { return "CQCountKeyExprFileName"; };
147 };
148 
149 //----------------------------------------------------------------------
152 {
153 public:
155  signed int m_year;
156  unsigned int m_month;
157  unsigned int m_day;
158  char buf[16];
159 
160 public:
161  CQCountKeyExprDate(const string &label="date") : CQCountKeyExprIndexed(label), m_pDates(NULL) {};
162  virtual ~CQCountKeyExprDate() {};
163  virtual bool Compile(CQueryCompiler *compiler);
164  virtual string& Evaluate(const CHit &Hit);
165  virtual string jsonClass() { return "CQCountKeyExprDate"; };
166 };
167 
168 //----------------------------------------------------------------------
171 {
172 public:
173  unsigned int m_slice;
174 public:
175  CQCountKeyExprDateSlice(const string &label="date", int slice=1) : CQCountKeyExprDate(label), m_slice(slice) {};
177  virtual bool Compile(CQueryCompiler *compiler);
178  virtual string& Evaluate(const CHit &Hit);
179  virtual string toString() { return m_Label + Format("/%d",m_slice); };
180  virtual string jsonClass() { return "CQCountKeyExprDateSlice"; };
181  virtual string jsonData() { return CQCountKeyExprDate::jsonData() + Format(",\"slice\":%d",m_slice); };
182 };
183 
184 //----------------------------------------------------------------------
187 {
188 public:
190 public:
191  CQCountKeyExprBibl(const string &attrName="") : CQCountKeyExprIndexed(attrName) {};
192  virtual ~CQCountKeyExprBibl() {};
193  virtual bool Compile(CQueryCompiler *compiler);
194  virtual string& Evaluate(const CHit &Hit);
195  virtual string jsonClass() { return "CQCountKeyExprBibl"; };
196  virtual string toString() { return string("'") + escapeChars(m_Label,"\\'") + "'"; };
197 };
198 
199 //----------------------------------------------------------------------
202 {
203 public:
205  string m_Pattern;
207  string m_Modifiers;
208  bool isGlobal;
210 
211 public:
212  CQCountKeyExprRegex(CQCountKeyExpr *src, const string& pattern, const string &replacement, const string& modifiers=string(""))
213  : CQCountKeyExprIndexed("regex"), m_Src(src), m_Pattern(pattern), m_Replacement(replacement), m_Modifiers(modifiers), isGlobal(false), m_Regex(NULL) {};
214  virtual ~CQCountKeyExprRegex();
215  virtual void DisownChildren();
216  virtual bool CanCountByFile() const { return m_Src->CanCountByFile(); };
217  virtual bool Compile(CQueryCompiler *compiler);
218  virtual string& Evaluate(const CHit &Hit);
219  inline void EvaluateRegex(const CHit &Hit, const string& srcKey)
220  {
221  m_CurKey = srcKey;
222  if (isGlobal)
223  m_Regex->GlobalReplace(m_Replacement, &m_CurKey);
224  else
225  m_Regex->Replace(m_Replacement, &m_CurKey);
226  m_CurFile = Hit.m_FileNo;
227  };
228 
229  //-- ddcObject API
230  virtual ddcObjectList Children() const;
231  virtual string toString();
232  virtual string jsonClass() { return "CQCountKeyExprRegex"; };
233  virtual string jsonData();
234 };
235 
236 //----------------------------------------------------------------------
239 {
240 public:
242  string m_IndexName;
243 
246 
248  int m_Offset;
249 
252 
253 public:
254  CQCountKeyExprToken(const string &IndexName="Token", BYTE MatchId=0, int Offset=0)
255  : CQCountKeyExprIndexed("token"), m_IndexName(IndexName), m_MatchId(MatchId), m_Offset(Offset), m_pIndex(NULL)
256  {};
257  virtual ~CQCountKeyExprToken() {};
258  virtual BYTE GetMatchId() const { return m_MatchId; };
259  virtual BYTE SetMatchId(BYTE matchId) { return m_MatchId = matchId; };
260  virtual bool CanCountByFile() const { return false; };
261  virtual bool Compile(CQueryCompiler *compiler);
262  virtual string& Evaluate(const CHit &Hit);
263  virtual string toString();
264  virtual string jsonClass() { return "CQCountKeyExprToken"; };
265  virtual string jsonData();
266 };
267 
268 //----------------------------------------------------------------------
269 // count-key expr: list
271 {
272 public:
274  vector<CQCountKeyExpr*> m_Exprs;
275 
277  string m_Key;
278 
279 public:
280  CQCountKeyExprList() : m_Key("") {};
281 
283  : m_Exprs(src.m_Exprs), m_Key(src.m_Key)
284  {};
285 
286  virtual ~CQCountKeyExprList();
287 
289  virtual void Clear();
290 
292  inline bool empty() const { return m_Exprs.empty(); };
293 
296  { m_Exprs.push_back(expr); };
297 
299  virtual bool CanCountByFile() const;
300 
301  virtual BYTE GetMatchId() const;
302 
303  virtual bool Compile(CQueryCompiler *compiler);
304  virtual string& Evaluate(const CHit &Hit);
305 
306  //-- ddcObject API
307  virtual ddcObjectList Children() const;
308  virtual void DisownChildren();
309  virtual string toString();
310  virtual string jsonClass() { return "CQCountKeyExprList"; };
311  virtual string jsonData();
312 
313 };
314 
315 //======================================================================
320 class CQCount : public CQuery {
321 public:
324 
326  size_t m_sample;
327 
330 
332  string m_lo;
333 
335  string m_hi;
336 
339 
342 
345 
346 public:
348  CQCount(CQuery *dtr_=NULL, CQCountKeyExprList* keys_=NULL, size_t sample_=DDCDefaultCountSampleSize, HitSortEnum sort_=NoSort, string lb="", string ub="")
349  : CQuery("count"),
350  m_dtr(dtr_),
351  m_sample(sample_),
352  m_sort(sort_),
353  m_keys(keys_),
354  m_lo(lb),
355  m_hi(ub),
356  m_CountSample(0)
357  {
358  if (sample_==0) m_sample = DDCDefaultCountSampleSize;
359  };
360 
362  virtual ~CQCount(void);
363 
364 public:
366  virtual BYTE GetMatchId() const;
367 
369  bool CompileLocal(CQueryCompiler *compiler, bool compileDtr=true, bool compileKeys=true);
370 
372  virtual bool CheckSatisfiable(CQueryCompiler *compiler);
373 
375  virtual bool Compile(CQueryCompiler *compiler);
376 
378  virtual void Evaluate();
379 
381  virtual bool CanCountUniversal();
382 
384  virtual bool CanCountByFile() const;
385 
390  virtual void CountUniversal(CQueryResult &Result, size_t Limit);
391 
397  virtual void CountLocal(CQueryResult &Result, size_t Limit);
398 
403  virtual void ConvertCountsToHits(CQueryResult &Result, size_t Limit);
404 
406  virtual string toString();
407  //virtual string optionsToString() { return ""; };
408  virtual string countOptionsToString();
409 
410  virtual string jsonClass() { return "CQCount"; };
411  virtual string jsonData();
412  //virtual string optionsToJson() { return "null"; };
413 
415  virtual void ClearNode();
416 
418  virtual void Clear(bool deep=true);
419 
420  //-- ddcObject API
421  virtual ddcObjectList Children() const;
422  virtual void DisownChildren();
423 };
424 
425 
426 //======================================================================
431 //======================================================================
436 class CQKeys : public CQuery {
437 public:
439  typedef set< vector<string> > StringTupleSet;
440 
441 public:
444 
446  size_t m_CountLimit;
447 
449  vector<string> m_IndexNames;
450 
453 
455  vector<CQTokSet*> m_dtrs;
456 
459 
460  // pseudo-private: all value tuples
461  //StringTupleSet m_tuples;
462 
463 public:
465  CQKeys(CQCount *qCount=NULL, size_t CountLimit=DDCDefaultCountSampleSize)
466  : CQuery("keys"), m_qCount(qCount), m_CountLimit(CountLimit), m_MatchId(0), m_proxy(NULL)
467  {};
468 
470  CQKeys(CQCount *qCount, size_t CountLimit, const vector<string>& IndexNames)
471  : CQuery("keys"), m_qCount(qCount), m_CountLimit(CountLimit), m_IndexNames(IndexNames), m_MatchId(0), m_proxy(NULL)
472  {};
473 
475  virtual ~CQKeys(void);
476 
477 public:
479  virtual bool Compile(CQueryCompiler *compiler);
480 
482  virtual void Evaluate();
483 
485  virtual BYTE GetMatchId() const;
486 
488  virtual BYTE SetMatchId(BYTE matchId)
489  { return m_MatchId = matchId; };
490 
492  virtual string toString();
493 
494  virtual string jsonClass() { return "CQKeys"; };
495  virtual string jsonData();
496 
498  virtual void ClearNode();
499 
501  virtual void Clear(bool deep=true);
502 
503  //-- ddcObject API
504  virtual ddcObjectList Children() const;
505  virtual void DisownChildren();
506 };
507 
508 
509 //======================================================================
510 // functions
511 
513 void GenerateCountString(string &ResultStr, DDCFormatTypeEnum fmt, const string& key, size_t count);
514 
516 inline void GenerateCountString(string &ResultStr, DDCFormatTypeEnum fmt, const CHit &hit)
517 { GenerateCountString(ResultStr, fmt, hit.m_SortKey.s, hit.m_Value.m_Count); };
518 
519 
520 #endif /* DDC_QCOUNT_H */
521 
522 /*--- emacs style variables ---
523  * Local Variables:
524  * mode: C++
525  * c-file-style: "ellemtel"
526  * c-basic-offset: 4
527  * tab-width: 8
528  * indent-tabs-mode: nil
529  * End:
530  */
Definition: QCount.h:91
CQCountKeyExprList()
Definition: QCount.h:280
CQCountKeyExpr(const string &label="")
Definition: QCount.h:59
CQCountKeyExprList(const CQCountKeyExprList &src)
Definition: QCount.h:282
signed int m_year
Definition: QCount.h:155
string m_Replacement
Definition: QCount.h:206
void EvaluateRegex(const CHit &Hit, const string &srcKey)
Definition: QCount.h:219
virtual bool CanCountByFile() const
Definition: QCount.h:260
Definition: QCount.h:170
virtual ~CQCountKeyExprFileId()
Definition: QCount.h:122
Definition: QCount.h:201
virtual string toString()
Definition: QCount.h:179
RML_RE * m_Regex
Definition: QCount.h:209
DDCFormatTypeEnum
FormatTypeEnum defines the format of output hits:
Definition: ConcCommon.h:468
unsigned int m_slice
Definition: QCount.h:173
Definition: QCount.h:102
virtual string & Evaluate(const CHit &Hit)
Definition: QCount.h:95
virtual string jsonClass()
Definition: QCount.h:96
string Format(const char *format,...)
Definition: ddcString.cpp:393
Definition: QCount.h:151
string s
secondary sort key, new for v2.0.19
Definition: ConcCommon.h:268
CQCountKeyExprFileName(const string &label="filename")
Definition: QCount.h:143
bool Replace(const StringPiece &rewrite, string *str) const
Definition: PCRE/pcre_rml.cpp:407
string m_CurKey
last key generated
Definition: QCount.h:108
virtual ~CQCountKeyExprMeta()
Definition: QCount.h:112
virtual ~CQCountKeyExprDate()
Definition: QCount.h:162
virtual ~CQCountKeyExprIndexed()
Definition: QCount.h:134
BYTE m_MatchId
match-id of anchor token (0 for any)
Definition: QCount.h:452
void GenerateCountString(string &ResultStr, DDCFormatTypeEnum fmt, const string &key, size_t count)
Definition: QCount.cpp:1047
count query
Definition: QCount.h:320
virtual string toString()
Definition: QCount.h:80
class CConcordance * m_pConcordance
Definition: QCount.h:131
CQCountKeyExprConstant(const string &label="*")
Definition: QCount.h:93
CQKeys(CQCount *qCount=NULL, size_t CountLimit=DDCDefaultCountSampleSize)
bison constructor
Definition: QCount.h:465
void PushKey(CQCountKeyExpr *expr)
Definition: QCount.h:295
string jsonData()
Definition: QCount.h:86
size_t m_CountSample
temporary for tracking count-query sample size
Definition: QCount.h:344
virtual string jsonClass()
Definition: QCount.h:165
virtual ~CQCountKeyExprFileName()
Definition: QCount.h:144
virtual string jsonData()
Definition: QCount.h:181
BYTE m_MatchId
match-id of anchor token (0 for any)
Definition: QCount.h:245
virtual string jsonClass()
Definition: QCount.h:410
string m_IndexName
name of token index attribute
Definition: QCount.h:242
Definition: ConcCommon.h:248
virtual string jsonClass()
Definition: QCount.h:264
DWORD m_CurFile
last file-number evaluated
Definition: QCount.h:105
virtual bool CanCountByFile() const
Definition: QCount.h:216
CQCount * m_qCount
underlying count() query supplying keys
Definition: QCount.h:443
string jsonStr(ddcObject *o)
json strinfication wrapper
Definition: ddcObject.h:208
union CHit::@0 m_Value
CQCountKeyExprIndexed(const string &label="file")
Definition: QCount.h:133
CQueryResult: query results, possibly aggregated by "break".
Definition: QueryResult.h:43
virtual ~CQCountKeyExprConstant()
Definition: QCount.h:94
virtual ~CQCountKeyBadOccurrence()
Definition: QCount.h:44
string jsonClass()
Definition: QCount.h:83
Definition: Concordance.h:126
virtual string jsonClass()
Definition: QCount.h:494
CCountMap m_Counts
temporary for constructing count-query results
Definition: QCount.h:341
size_t m_sample
sample size (sub-query limit)
Definition: QCount.h:326
CQCountKeyExprToken(const string &IndexName="Token", BYTE MatchId=0, int Offset=0)
Definition: QCount.h:254
vector< CQCountKeyExpr * > m_Exprs
Definition: QCount.h:274
virtual ~CQCountKeyExprBibl()
Definition: QCount.h:192
virtual string jsonClass()
Definition: QCount.h:310
virtual string toString()
Definition: QCount.h:196
HitSortEnum m_sort
sort-order one of: NoSort, (Less|Greater)ByCount(Key|Value)
Definition: QCount.h:329
list< ddcObject * > ddcObjectList
Definition: ddcObject.h:28
virtual string jsonClass()
Definition: QCount.h:195
const CFreeBiblIndexInterface * m_Bibl
Definition: QCount.h:189
bool isGlobal
Definition: QCount.h:208
virtual string jsonClass()
Definition: QCount.h:124
CQCountKeyBadOccurrence(const string &what_arg)
Definition: QCount.h:41
virtual ~CQCountKeyExpr()
Definition: QCount.h:67
vector< string > m_IndexNames
target index-tuple
Definition: QCount.h:449
CQKeys(CQCount *qCount, size_t CountLimit, const vector< string > &IndexNames)
bison constructor
Definition: QCount.h:470
Definition: QCount.h:270
CStringIndexSet * m_pIndex
underlying index-set (post-compile())
Definition: QCount.h:251
Definition: QCount.h:238
bool empty() const
Definition: QCount.h:292
virtual BYTE GetMatchId() const
Get current match-id (should return a nontrivial match-id if any was used, else 0; default just retur...
Definition: QCount.h:258
string m_hi
minimum sort-key to return (empty for none)
Definition: QCount.h:335
string m_Modifiers
Definition: QCount.h:207
int GlobalReplace(const StringPiece &rewrite, string *str) const
Definition: PCRE/pcre_rml.cpp:424
Definition: QCount.h:55
DWORD m_FileNo
the index of coprus file, where this hit is found, it is equal to m_BreakNo if user searches within f...
Definition: ConcCommon.h:445
Definition: QueryCompiler.h:50
virtual string jsonClass()
Definition: QCount.h:146
unsigned char BYTE
Definition: utilit.h:94
string m_lo
maximum sort-key to return (empty for none)
Definition: QCount.h:332
set< vector< string > > StringTupleSet
typedef for target value-tuples
Definition: QCount.h:439
virtual BYTE SetMatchId(BYTE matchId)
Set match-id, returns new value. Default throws an exception.
Definition: QCount.h:259
string m_Key
Definition: QCount.h:277
map< string, size_t > CCountMap
type for count-query maps
Definition: QueryResult.h:30
Definition: ConcCommon.h:438
Definition: QCount.h:117
Definition: QCount.h:186
virtual string jsonClass()
Definition: QCount.h:136
Definition: pcre_rml.h:41
const size_t DDCDefaultCountSampleSize
Definition: QCount.h:30
CQCountKeyExprDateSlice(const string &label="date", int slice=1)
Definition: QCount.h:175
CQCountKeyExpr * m_Src
Definition: QCount.h:204
CQCount(CQuery *dtr_=NULL, CQCountKeyExprList *keys_=NULL, size_t sample_=DDCDefaultCountSampleSize, HitSortEnum sort_=NoSort, string lb="", string ub="")
bison constructor
Definition: QCount.h:348
const ddcDateVector * m_pDates
Definition: QCount.h:154
unsigned int m_month
Definition: QCount.h:156
CQCountKeyExprFileId(const string &label="fileid")
Definition: QCount.h:121
CHitSortKey m_SortKey
Definition: ConcCommon.h:457
size_t m_Count
count for this item (for count-queries)
Definition: ConcCommon.h:452
int m_Offset
offset of reference token
Definition: QCount.h:248
CQCountKeyExprRegex(CQCountKeyExpr *src, const string &pattern, const string &replacement, const string &modifiers=string(""))
Definition: QCount.h:212
unsigned int m_day
Definition: QCount.h:157
#define _GLIBCXX_USE_NOEXCEPT
Definition: QCount.h:34
virtual string jsonClass()
Definition: QCount.h:232
virtual string toString()
Definition: QCount.h:97
CQuery * m_proxy
pseudo-private: proxy query for join of independent attributes (WITH)
Definition: QCount.h:458
Definition: QCount.h:129
virtual bool CanCountByFile() const
Definition: QCount.h:71
CQCountKeyExprBibl(const string &attrName="")
Definition: QCount.h:191
vector< CQTokSet * > m_dtrs
pseudo-private: temporary sub-queries for each CQCountKeyExprToken key-expression in m_qCount->m_keys...
Definition: QCount.h:455
string m_Pattern
Definition: QCount.h:205
uint32_t DWORD
Definition: utilit.h:105
Definition: QCount.h:38
CQCountKeyExprMeta(const string &label="*")
Definition: QCount.h:111
virtual ~CQCountKeyExprDateSlice()
Definition: QCount.h:176
CQCountKeyExprDate(const string &label="date")
Definition: QCount.h:161
virtual BYTE SetMatchId(BYTE matchId)
Set match-id, returns new value.
Definition: QCount.h:488
utility struct for iterating over hit occurrences
Definition: QCount.h:436
HitSortEnum
Definition: ConcCommon.h:119
Definition: IndexSet.h:57
Definition: QCount.h:141
Truly abstract (index-independent) representation of a parsed query Should eventually replace old ad...
Definition: Query.h:36
CQCountKeyExprList * m_keys
list of count-bin metadata key expressions
Definition: QCount.h:338
virtual ~CQCountKeyExprToken()
Definition: QCount.h:257
virtual string jsonClass()
Definition: QCount.h:180
no sort operators, only filtering (used by #has_field[])
Definition: ConcCommon.h:121
size_t m_CountLimit
limit for m_qCount subquery
Definition: QCount.h:446
CQuery * m_dtr
underlying query conditions to be counted
Definition: QCount.h:323
string escapeChars(const string &s, const string &chars)
Definition: ddcString.cpp:353