91 m_bUseNodeIndices(bUseNodeIndices),
92 m_bUseMatchIds(bUseMatchIds),
101 virtual void Evaluate(
bool bSeparateHits=
false);
120 if (m_bUseNodeIndices)
131 const CQueryNode& FromNode2,
int start2,
int end2);
135 const CQueryNode& FromNode2,
int start2,
int end2,
136 const CQueryNode& FromNode3,
int start3,
int end3);
165 {
return (HitNo == 0) ? 0 : m_Hits[HitNo-1].m_HighlightOccurrenceEnd; };
169 {
return m_Hits[HitNo].m_HighlightOccurrenceEnd >= m_Occurrences.size() ? m_Occurrences.size() : m_Hits[HitNo].m_HighlightOccurrenceEnd; };
185 {
return m_Hits.empty() ? 1 : max((
size_t)1, (m_Hits.back().m_BreakNo-m_Hits.front().m_BreakNo)/m_Hits.size()); };
197 void hits_add(
const CQueryNode& NodeFrom, vector<CHit>::const_iterator First);
211 :
CQueryNode(false), m_pChild1(dtr1), m_pChild2(dtr2)
230 void hits_and_positions_assign(
const CQueryNode &NodeFrom);
233 void hits_and_positions_swap(
CQueryNode &NodeFrom);
237 void hits_and_positions_union(
bool bSeparateHits=
false);
241 void hits_and_positions_intersection(
bool bSeparateHits=
false);
245 void hits_and_positions_difference(
bool bSeparateHits=
false);
258 void Evaluate(
bool bSeparateHits=
false);
268 void Evaluate(
bool bSeparateHits=
false);
305 void SetMatchId(
BYTE MatchId);
314 void MakeUnique(
void);
317 bool CreateStringPattern(
const CConcHolder* pHolder,
const string& IndexName,
const string& Value);
320 bool CreateRegexPattern(
const CConcHolder* pHolder,
const string& IndexName,
const string& Regex,
bool negated=
false);
323 bool CreateSetPattern(
const CConcHolder* pHolder,
const string& IndexName,
const set<string>& Values);
326 bool CreatePrefixSetPattern(
const CConcHolder* pHolder,
const string& IndexName,
const set<string>& Prefixes);
329 bool CreatePrefixPattern(
const CConcHolder* pHolder,
const string& IndexName,
const string& Prefix);
332 bool CreateSuffixSetPattern(
const CConcHolder* pHolder,
const string& IndexName,
const set<string>& Suffixes);
335 bool CreateSuffixPattern(
const CConcHolder* pHolder,
const string& IndexName,
const string& Suffix);
338 bool CreateInfixSetPattern(
const CConcHolder* pHolder,
const string& IndexName,
const set<string>& Infixes);
341 bool CreateInfixPattern(
const CConcHolder* pHolder,
const string& IndexName,
const string& Infix);
344 bool CreateUniversalPattern(
const CConcHolder *pHolder,
const string& SourceStr=
"*");
347 bool CreateNullPattern(
const CConcHolder *pHolder,
const string& IndexName=
"Token");
350 bool CreateLemmaPattern(
const CConcHolder* pHolder,
const string& Value);
353 bool CreateThesPattern(
const CConcHolder* pHolder,
const string& IndexName,
const string& src);
356 bool CreateMorphAnnotationPattern(
const CConcHolder* pHolder,
const string& IndexName,
const vector<string> &Items);
359 bool CreateFileList(
const CConcHolder* pHolder,
const string &IndexName,
const string& FileName);
362 bool CreateChunkPattern(
const CConcHolder* pHolder,
const string& IndexName,
const string& ChunkTypeStr);
365 bool CreateAnchorPattern(
const CConcHolder *pHolder,
const string& BreakName,
int anchor);
368 bool BuildRegExp(
string RegExpStr, vector<DWORD>& IndexItems,
bool negated=
false);
371 void Evaluate(
bool bSeparateHits=
false);
392 bool AddDistance(
const string& s,
char op=
'<');
401 bool Create(
const CConcHolder* pHolder,
const vector<const CQueryNode*>& SequenceObj,
const vector<string>& DistanceStrings);
404 bool Create(
const CConcHolder* pHolder,
const vector<CQueryTokenNode*>& Items,
const vector<BYTE>& Distances,
const vector<BYTE>& DistanceOps);
407 void Evaluate(
bool bSeparateHits=
false);
441 void FindOccurrences2();
442 void FindOccurrences3();
443 void Evaluate(
bool bSeparateHits=
false);
465 void Evaluate(
bool bSeparateHits=
false);
567 {
return cur < cnt; };
574 inline operator bool()
const 600 if (!valid())
return;
608 while (valid() && tmin() < Tok) operator++();
623 #define DDC_OCCITER_LB_COEF 4 670 stride = node && cnt ? max((
CTokenNo)1, (node->m_Occurrences.back()-node->m_Occurrences.front())/cnt) : 1;
693 {
return node->m_Occurrences[cur]; };
705 if (!valid())
return;
712 if (valid() && tmin() < Tok) {
714 cur = lower_bound(node->m_Occurrences.begin()+cur, node->m_Occurrences.end(), Tok) - node->m_Occurrences.begin();
716 do { ++cur; }
while (valid() && tmin() < Tok);
CQOccurrenceIterator(CQueryNode *node_=NULL)
Definition: QueryNode.h:517
CQOccurrenceIterator (formerly CQNearEvalItem) is used to iterate over query occurrences; used by nea...
Definition: QueryNode.h:510
bool m_bUseNodeIndices
should DDC uses m_OccurrenceNodeIndices (m_OccurrenceNodeIndices is necessary only for #less_by_rank)...
Definition: QueryNode.h:58
bool m_bChunk
true, if it is a chunk (NP, VP and so on)
Definition: QueryNode.h:283
vector< DWORD > m_ChunkOffsets
Definition: QueryNode.h:81
bool m_bUseMatchIds
should DDC use m_OccurrenceMatchIds to track match-ids?
Definition: QueryNode.h:64
void assign(CQueryNode *node_)
Definition: QueryNode.h:549
virtual ~CQueryNode()
Destructor.
Definition: QueryNode.cpp:41
DWORD m_HighlightOccurrenceEnd
the end offset of token occurrences to be highlighted in CQueryNode::m_Occurrences and later in CConc...
Definition: ConcCommon.h:443
CQuerySequenceNode is an implementation of sequence operator, for example, "Mother and father"...
Definition: QueryNode.h:383
CQueryOrOperation implements the hit union or "or" operator: for example, "Mother||father".
Definition: QueryNode.h:263
vector< BYTE > m_DistanceOps
Definition: QueryNode.h:386
uint32_t log2u32(uint32_t v)
Definition: utilit.h:438
CQOccurrenceIterator & operator=(CQueryNode *node_)
Definition: QueryNode.h:559
BYTE m_Distance
the distance between the first child and the second child
Definition: QueryNode.h:432
CTokenNo tmax() const
Definition: QueryNode.h:590
CTokenNo tmin() const
Definition: QueryNode.h:692
vector< CHit > m_Hits
all hits of this node in the current subcorpora
Definition: QueryNode.h:70
CQueryNode * m_pChild1
the first child
Definition: QueryNode.h:423
CTokenNo tmax_prev() const
Definition: QueryNode.h:594
#define DDC_OCCITER_LB_COEF
Definition: QueryNode.h:623
CQOccurrenceIterator(const CQOccurrenceIterator &b)
Definition: QueryNode.h:521
CQueryNode(bool bUseNodeIndices=false, bool bUseMatchIds=false, const CConcHolder *holder=NULL)
Constructor.
Definition: QueryNode.h:88
virtual void Evaluate(bool bSeparateHits=false)
Definition: QueryNode.cpp:70
DWORD GetFirstOccurrenceInHit(DWORD HitNo, BYTE MatchId) const
Definition: QueryNode.cpp:368
CQueryWithoutNode: (EXPR1 without EXPR2) behaves like (EXPR1 WITH !EXPR2)
Definition: QueryNode.h:472
CQTokenOccurrenceIterator(CQueryNode *node_=NULL)
Definition: QueryNode.h:636
virtual ~CQOccurrenceIterator()
Definition: QueryNode.h:524
void invalidate()
Definition: QueryNode.h:570
DWORD GetLastOccurrenceInHit(DWORD HitNo, BYTE MatchId) const
Definition: QueryNode.cpp:378
CQueryBinaryOperationNode(CQueryNode *dtr1, CQueryNode *dtr2)
Definition: QueryNode.h:210
DWORD chpos
index into node->m_ChunkLengths[] (match occurrence)
Definition: QueryNode.h:514
CQueryNode * m_pChild2
the second child
Definition: QueryNode.h:426
CQueryNearNode is a class which is used to search two or three child nodes, which are inside some tex...
Definition: QueryNode.h:419
vector< CQueryTokenNode * > m_Items
Definition: QueryNode.h:388
DWORD stride
average number of tokens between occurrences in node
Definition: QueryNode.h:633
void AddOccurrence(const CQueryNode &FromNode, int FromOcc)
insers a single occurrence FromNode.m_Occurrences[occ]
Definition: QueryNode.h:117
CQTokenOccurrenceIterator & operator=(CQueryTokenNode *node_)
Definition: QueryNode.h:674
CQueryNode is an abstract class for any node in a query parse tree. A parse tree is built by YACC dur...
Definition: QueryNode.h:42
CQOccurrenceIterator & operator=(const CQOccurrenceIterator &b)
Definition: QueryNode.h:539
int m_AnchorOffset
integer offset for break-anchor ($.) queries
Definition: QueryNode.h:292
const size_t MaxDistanceForNear
Definition: QueryNode.cpp:36
Definition: QueryNode.h:194
CQueryNode * m_pChild2
the second operation member
Definition: QueryNode.h:204
vector< BYTE > m_OccurrenceMatchIds
match-ids for each occurrence in m_Occurrences (only if m_bUseMatchIds is true)
Definition: QueryNode.h:67
CQueryTokenNode is a class which corresponds to one token or it's singular property (for example lemm...
Definition: QueryNode.h:273
void operator++()
Definition: QueryNode.h:703
vector< DWORD > m_IndexItems
find index items in the index which is called m_IndexName
Definition: QueryNode.h:277
class for a single (thread-local) DDC query session; formerly CConcHolder An instance of CConcSessio...
Definition: ConcSession.h:54
void operator++()
Definition: QueryNode.h:598
CQTokenOccurrenceIterator & operator=(const CQTokenOccurrenceIterator &b)
Definition: QueryNode.h:658
string m_IndexName
the name of index or break collection on which this node is built (see CQueryChunkNode and CQueryToke...
Definition: QueryNode.h:280
vector< BYTE > m_OccurrenceNodeIndices
query node indices for each occurrence (the origin for each occurrence)
Definition: QueryNode.h:61
CTokenNo tmin() const
Definition: QueryNode.h:586
void AddOccurrences2(const CQueryNode &FromNode1, int start1, int end1, const CQueryNode &FromNode2, int start2, int end2)
appends occurrences from both FromNode1 and FromNode2, maintaining sort-order and respecting both m_b...
Definition: QueryNode.cpp:108
bool valid() const
Definition: QueryNode.h:566
virtual void EvaluateWithoutHits()
initial-stage query evaluation EvaluateWithoutHits is the first stage of the evaluation of one query...
Definition: QueryNode.cpp:66
CQueryWithorNode()
Definition: QueryNode.h:492
virtual ~CQueryWithNode()
Definition: QueryNode.h:459
virtual size_t GetNodeFrequencyByNodeIndex(size_t NodeIndex) const
get occurrence count by query-node index; used by rank-sort operator. default implementation in CQuer...
Definition: QueryNode.cpp:54
CQueryWithNode is a class which is used to search a node which simultaneously has two properties...
Definition: QueryNode.h:452
bool GetNegation() const
get m_bNegated
Definition: QueryNode.cpp:91
CQueryWithorNode: (EXPR1 withor EXPR2) behaves like (EXPR1 || EXPR2) under token-identity.
Definition: QueryNode.h:489
CQueryAndOperation implements the hit intersection or "and" operator: for example, "Mother&&father".
Definition: QueryNode.h:253
DWORD HitOccurrencesBegin(DWORD HitNo) const
get the index of the first occurrence in m_Occurrences[] for hit number HitNo
Definition: QueryNode.h:164
CQTokenOccurrenceIterator(const CQTokenOccurrenceIterator &b)
Definition: QueryNode.h:640
vector< CTokenNo > m_Occurrences
all occurrences of this node in the current subcorpus, which should be highlighted ...
Definition: QueryNode.h:55
BYTE m_MatchId
Definition: QueryNode.h:455
size_t chunksize() const
Definition: QueryNode.h:685
bool m_bAnchor
true iff this is an anchor query
Definition: QueryNode.h:286
virtual ~CQueryAndOperation()
Definition: QueryNode.h:257
const CConcHolder * m_pHolder
a reference to the parent holder (moo: should be unused until evaluation time, but isn't!) ...
Definition: QueryNode.h:73
DWORD GetMiddleOccurrenceInHit(DWORD HitNo, BYTE MatchId) const
Definition: QueryNode.cpp:388
void seek(const CTokenNo Tok)
Definition: QueryNode.h:710
vector< DWORD > m_ChunkLengths
Definition: QueryNode.h:77
BYTE m_NodeIndex
a unique index of the node in query tree (used by rank-sort operator)
Definition: QueryNode.h:296
virtual bool IsUniversalWildcard() const
return true iff this is a universal wildcard query (*); default returns false
Definition: QueryNode.cpp:208
CQTokenOccurrenceIterator (formerly CQNearEvalItem) is used to iterate over query occurrences;...
Definition: QueryNode.h:631
CQueryWithoutNode()
Definition: QueryNode.h:475
CTokenNo tocc(DWORD occ) const
Definition: QueryNode.h:582
CQueryBinaryOperationNode()
Definition: QueryNode.h:207
unsigned char BYTE
Definition: utilit.h:94
int cur
index into node->m_Occurrences[]
Definition: QueryNode.h:513
DWORD HitOccurrencesEnd(DWORD HitNo) const
get the index of the first occurrence in m_Occurrences[] after hit number HitNo
Definition: QueryNode.h:168
void ConvertOccurrencesToHitsForPatterns(bool bSeparateHits)
convert occurrences to hits for pattern query construction (like "mother likes father") ...
Definition: QueryNode.cpp:291
virtual ~CQueryOrOperation()
Definition: QueryNode.h:267
Definition: ConcCommon.h:438
bool operator==(const CQTokenOccurrenceIterator &b) const
Definition: QueryNode.h:722
int cnt
limit for node->m_Occurrences[]
Definition: QueryNode.h:512
void SetHolder(const CConcHolder *pHolder)
set m_pHolder dependent properties m_bUseNodeIndices, m_bUseMatchIds
Definition: QueryNode.cpp:213
virtual ~CQueryWithorNode()
Definition: QueryNode.h:494
DWORD BreakStride() const
get average size of gap between breaks in m_Hits[]; returned value is always >= 1 ...
Definition: QueryNode.h:184
BYTE m_MatchId
user-specified match-id (0 specifies default behavior, which is to use highlight-id=255) ...
Definition: QueryNode.h:299
void ConvertOccurrencesToHits(bool bSeparateHits)
convert occurrences to hits using m_pHolder->GetBreaks()
Definition: QueryNode.cpp:227
bool m_bAtomic
true if this node is a description of one token or a sequence of adjacent tokens
Definition: QueryNode.h:46
CQueryNode * node
(sub)query node
Definition: QueryNode.h:511
virtual ~CQTokenOccurrenceIterator()
Definition: QueryNode.h:643
void ClearAndReserveOccurrences(int size)
Definition: QueryNode.cpp:187
void AddOccurrences(const CQueryNode &FromNode, int start, int end)
inserts occurrences [FromNode.m_Occurrences.begin()+start, FromNode.m_Occurrences.begin()+end) into m_Occurrences[] (and m_OccurrenceNodeIndices if appropriate)
Definition: QueryNode.cpp:97
CQueryNode * m_pChild3
the third child (may be NULL)
Definition: QueryNode.h:429
void pushHit(DWORD BreakNo)
Definition: QueryNode.h:221
DWORD CTokenNo
integer type CTokenNo is used to refer an index of a token in the corpus
Definition: ConcCommon.h:63
MorphLanguageEnum
Definition: utilit.h:162
void AddOccurrences3(const CQueryNode &FromNode1, int start1, int end1, const CQueryNode &FromNode2, int start2, int end2, const CQueryNode &FromNode3, int start3, int end3)
appends occurrences from FromNode1, FromNode2, and FromNode3 maintaining sort-order and respecting bo...
Definition: QueryNode.cpp:142
CQueryNode * m_pChild1
the first operation member
Definition: QueryNode.h:201
uint32_t DWORD
Definition: utilit.h:105
void assign(CQueryNode *node_)
Definition: QueryNode.h:667
bool m_bAny
true iff this is a "universal wildcard" query
Definition: QueryNode.h:289
void EnsureChunkOffsets()
ensure m_ChunkOffsets[] is populated
Definition: QueryNode.cpp:75
Definition: IndexSet.h:57
vector< BYTE > m_Distances
Definition: QueryNode.h:387
bool m_bNegated
is the node negated?
Definition: QueryNode.h:49
vector< int > m_CacheIds
???
Definition: QueryNode.h:84
void GetWordForms(const MorphLanguageEnum Langua, const string &src, set< string > &WordForms)
Definition: graveyard.cc:110
void seek(const CTokenNo Tok)
Definition: QueryNode.h:606
void SetNegation(bool Value)
set m_bNegated
Definition: QueryNode.cpp:87
size_t chunksize() const
Definition: QueryNode.h:578
void SwapOccurrences(CQueryNode &Node, bool swapChunkLengths=false)
swaps m_Occurrences[], m_OccurrenceNodeIndices[], and m_OccurrenceMatchIds[] between caller object an...
Definition: QueryNode.cpp:200
virtual ~CQueryWithoutNode()
Definition: QueryNode.h:477
string m_Source
the string from which this node was created
Definition: QueryNode.h:52
CTokenNo tmax() const
Definition: QueryNode.h:696