ddc
|
CQueryTokenNode is a class which corresponds to one token or it's singular property (for example lemma) More...
#include <QueryNode.h>
Public Member Functions | |
CQueryTokenNode (int NodeIndex, BYTE MatchId=0) | |
virtual | ~CQueryTokenNode () |
void | SetMatchId (BYTE MatchId) |
compilation utility: set match-id More... | |
virtual bool | IsUniversalWildcard () const |
return true iff this is a universal wildcard query (*); override returns m_bAny More... | |
CStringIndexSet * | GetIndex (const CConcHolder *pHolder, const string &IndexName) |
low-level: populates m_IndexName and returns appropriate index; throws exception if no appropriate index is found More... | |
void | MakeUnique (void) |
low-level: makes m_IndexItems unique More... | |
bool | CreateStringPattern (const CConcHolder *pHolder, const string &IndexName, const string &Value) |
create pattern for matching literal index values (`$INDEX=' queries) More... | |
bool | CreateRegexPattern (const CConcHolder *pHolder, const string &IndexName, const string &Regex, bool negated=false) |
create pattern for matching literal index values (`$INDEX=/REGEX/' queries) More... | |
bool | CreateSetPattern (const CConcHolder *pHolder, const string &IndexName, const set< string > &Values) |
create pattern for matching literal index values (`$INDEX=VAL1,...}' queries) More... | |
bool | CreatePrefixSetPattern (const CConcHolder *pHolder, const string &IndexName, const set< string > &Prefixes) |
create pattern for matching set-valued right-truncated (prefix-set) queries (`$INDEX={PREFIX,...}*') More... | |
bool | CreatePrefixPattern (const CConcHolder *pHolder, const string &IndexName, const string &Prefix) |
create pattern for matching simple right-truncated (prefix) queries (`$INDEX=PREFIX*') More... | |
bool | CreateSuffixSetPattern (const CConcHolder *pHolder, const string &IndexName, const set< string > &Suffixes) |
create pattern for matching set-valued left-truncated (suffix-set) queries (`$INDEX=*{SUFFIX,...}'): uses regex More... | |
bool | CreateSuffixPattern (const CConcHolder *pHolder, const string &IndexName, const string &Suffix) |
create pattern for matching set-valued left-truncated (suffix-set) queries (`$INDEX=*SUFFIX'): uses regex More... | |
bool | CreateInfixSetPattern (const CConcHolder *pHolder, const string &IndexName, const set< string > &Infixes) |
create pattern for matching set-valued substring (infix-set) queries (`$INDEX=*{INFIX,...}*'): uses regex More... | |
bool | CreateInfixPattern (const CConcHolder *pHolder, const string &IndexName, const string &Infix) |
create pattern for matching simple substring (infix) queries (`$INDEX=*INFIX*'): uses regex More... | |
bool | CreateUniversalPattern (const CConcHolder *pHolder, const string &SourceStr="*") |
create universal-match set More... | |
bool | CreateNullPattern (const CConcHolder *pHolder, const string &IndexName="Token") |
create empty match-set More... | |
bool | CreateLemmaPattern (const CConcHolder *pHolder, const string &Value) |
create lemma-match pattern (old) More... | |
bool | CreateThesPattern (const CConcHolder *pHolder, const string &IndexName, const string &src) |
create pattern which is associated with "Thes" index (old) More... | |
bool | CreateMorphAnnotationPattern (const CConcHolder *pHolder, const string &IndexName, const vector< string > &Items) |
create pattern which is associated with "MorphPattern" or "Lemma" index (old) More... | |
bool | CreateFileList (const CConcHolder *pHolder, const string &IndexName, const string &FileName) |
create pattern which loads target values from the file FileName More... | |
bool | CreateChunkPattern (const CConcHolder *pHolder, const string &IndexName, const string &ChunkTypeStr) |
create pattern which is associated with "Chunk" index More... | |
bool | CreateAnchorPattern (const CConcHolder *pHolder, const string &BreakName, int anchor) |
create an anchor pattern More... | |
bool | BuildRegExp (string RegExpStr, vector< DWORD > &IndexItems, bool negated=false) |
generic regex query utility More... | |
void | EvaluateWithoutHits () |
initial-stage query evaluation EvaluateWithoutHits is the first stage of the evaluation of one query. The main task it to build CQueryNode::m_Occurrences, which is written by chunks, sorted by the first item of a chunk. More... | |
void | Evaluate (bool bSeparateHits=false) |
size_t | GetNodeFrequencyByNodeIndex (size_t NodeIndex) const |
get occurrence count by query-node index; used by rank-sort operator. default implementation in CQueryNode just chokes. More... | |
Public Member Functions inherited from CQueryNode | |
CQueryNode (bool bUseNodeIndices=false, bool bUseMatchIds=false, const CConcHolder *holder=NULL) | |
Constructor. More... | |
virtual | ~CQueryNode () |
Destructor. More... | |
void | SetNegation (bool Value) |
set m_bNegated More... | |
bool | GetNegation () const |
get m_bNegated More... | |
void | AddOccurrence (const CQueryNode &FromNode, int FromOcc) |
insers a single occurrence FromNode.m_Occurrences[occ] More... | |
void | AddOccurrences (const CQueryNode &FromNode, int start, int end) |
inserts occurrences [FromNode.m_Occurrences.begin()+start, FromNode.m_Occurrences.begin()+end) into m_Occurrences[] (and m_OccurrenceNodeIndices if appropriate) More... | |
void | AddOccurrences2 (const CQueryNode &FromNode1, int start1, int end1, const CQueryNode &FromNode2, int start2, int end2) |
appends occurrences from both FromNode1 and FromNode2, maintaining sort-order and respecting both m_bUseNodeIndices and m_bUseMatchIds More... | |
void | AddOccurrences3 (const CQueryNode &FromNode1, int start1, int end1, const CQueryNode &FromNode2, int start2, int end2, const CQueryNode &FromNode3, int start3, int end3) |
appends occurrences from FromNode1, FromNode2, and FromNode3 maintaining sort-order and respecting both m_bUseNodeIndices and m_bUseMatchIds More... | |
void | ClearAndReserveOccurrences (int size) |
void | SwapOccurrences (CQueryNode &Node, bool swapChunkLengths=false) |
swaps m_Occurrences[], m_OccurrenceNodeIndices[], and m_OccurrenceMatchIds[] between caller object and Node More... | |
void | ConvertOccurrencesToHits (bool bSeparateHits) |
convert occurrences to hits using m_pHolder->GetBreaks() More... | |
void | ConvertOccurrencesToHitsForPatterns (bool bSeparateHits) |
convert occurrences to hits for pattern query construction (like "mother likes father") More... | |
void | EnsureChunkOffsets () |
ensure m_ChunkOffsets[] is populated More... | |
void | SetHolder (const CConcHolder *pHolder) |
set m_pHolder dependent properties m_bUseNodeIndices, m_bUseMatchIds More... | |
DWORD | HitOccurrencesBegin (DWORD HitNo) const |
get the index of the first occurrence in m_Occurrences[] for hit number HitNo More... | |
DWORD | HitOccurrencesEnd (DWORD HitNo) const |
get the index of the first occurrence in m_Occurrences[] after hit number HitNo More... | |
DWORD | GetFirstOccurrenceInHit (DWORD HitNo, BYTE MatchId) const |
DWORD | GetLastOccurrenceInHit (DWORD HitNo, BYTE MatchId) const |
DWORD | GetMiddleOccurrenceInHit (DWORD HitNo, BYTE MatchId) const |
DWORD | BreakStride () const |
get average size of gap between breaks in m_Hits[]; returned value is always >= 1 More... | |
Public Attributes | |
vector< DWORD > | m_IndexItems |
find index items in the index which is called m_IndexName More... | |
string | m_IndexName |
the name of index or break collection on which this node is built (see CQueryChunkNode and CQueryTokenNode nodes) More... | |
bool | m_bChunk |
true, if it is a chunk (NP, VP and so on) More... | |
bool | m_bAnchor |
true iff this is an anchor query More... | |
bool | m_bAny |
true iff this is a "universal wildcard" query More... | |
int | m_AnchorOffset |
integer offset for break-anchor ($.) queries More... | |
BYTE | m_NodeIndex |
a unique index of the node in query tree (used by rank-sort operator) More... | |
BYTE | m_MatchId |
user-specified match-id (0 specifies default behavior, which is to use highlight-id=255) More... | |
Public Attributes inherited from CQueryNode | |
bool | m_bAtomic |
true if this node is a description of one token or a sequence of adjacent tokens More... | |
bool | m_bNegated |
is the node negated? More... | |
string | m_Source |
the string from which this node was created More... | |
vector< CTokenNo > | m_Occurrences |
all occurrences of this node in the current subcorpus, which should be highlighted More... | |
bool | m_bUseNodeIndices |
should DDC uses m_OccurrenceNodeIndices (m_OccurrenceNodeIndices is necessary only for #less_by_rank) More... | |
vector< BYTE > | m_OccurrenceNodeIndices |
query node indices for each occurrence (the origin for each occurrence) More... | |
bool | m_bUseMatchIds |
should DDC use m_OccurrenceMatchIds to track match-ids? More... | |
vector< BYTE > | m_OccurrenceMatchIds |
match-ids for each occurrence in m_Occurrences (only if m_bUseMatchIds is true) More... | |
vector< CHit > | m_Hits |
all hits of this node in the current subcorpora More... | |
const CConcHolder * | m_pHolder |
a reference to the parent holder (moo: should be unused until evaluation time, but isn't!) More... | |
vector< DWORD > | m_ChunkLengths |
vector< DWORD > | m_ChunkOffsets |
vector< int > | m_CacheIds |
??? More... | |
CQueryTokenNode is a class which corresponds to one token or it's singular property (for example lemma)
CQueryTokenNode::CQueryTokenNode | ( | int | NodeIndex, |
BYTE | MatchId = 0 |
||
) |
References DefaultMatchId, m_AnchorOffset, m_bAnchor, m_bAny, CQueryNode::m_bAtomic, m_bChunk, CQueryNode::m_bUseMatchIds, m_MatchId, and m_NodeIndex.
|
virtual |
void CQueryTokenNode::SetMatchId | ( | BYTE | MatchId | ) |
compilation utility: set match-id
References DefaultMatchId, CQueryNode::m_bUseMatchIds, and m_MatchId.
|
virtual |
return true iff this is a universal wildcard query (*); override returns m_bAny
Reimplemented from CQueryNode.
References m_bAny.
Referenced by CQueryWithNode::EvaluateWithoutHits().
CStringIndexSet * CQueryTokenNode::GetIndex | ( | const CConcHolder * | pHolder, |
const string & | IndexName | ||
) |
low-level: populates m_IndexName and returns appropriate index; throws exception if no appropriate index is found
References errParseError, CStringIndexator::GetIndexByAlias(), m_IndexName, CStringIndexSet::m_Name, CConcSession::m_pConcordance, and CQueryNode::SetHolder().
Referenced by CreateInfixSetPattern(), CreateNullPattern(), CreatePrefixSetPattern(), CreateRegexPattern(), CreateSetPattern(), CreateStringPattern(), and CreateSuffixSetPattern().
void CQueryTokenNode::MakeUnique | ( | void | ) |
low-level: makes m_IndexItems unique
References m_IndexItems.
Referenced by CreatePrefixSetPattern(), CreateSetPattern(), and CreateSuffixSetPattern().
bool CQueryTokenNode::CreateStringPattern | ( | const CConcHolder * | pHolder, |
const string & | IndexName, | ||
const string & | Value | ||
) |
create pattern for matching literal index values (`$INDEX=' queries)
References GetIndex(), m_IndexItems, CQueryNode::m_Source, and CStringIndexSet::QueryTokenList().
Referenced by CQToken::Create().
bool CQueryTokenNode::CreateRegexPattern | ( | const CConcHolder * | pHolder, |
const string & | IndexName, | ||
const string & | Regex, | ||
bool | negated = false |
||
) |
create pattern for matching literal index values (`$INDEX=/REGEX/' queries)
References BuildRegExp(), errParseError, GetIndex(), m_IndexItems, and CQueryNode::m_Source.
Referenced by CQTokRegex::Create().
bool CQueryTokenNode::CreateSetPattern | ( | const CConcHolder * | pHolder, |
const string & | IndexName, | ||
const set< string > & | Values | ||
) |
create pattern for matching literal index values (`$INDEX=VAL1,...}' queries)
References GetIndex(), m_IndexItems, CQueryNode::m_Source, MakeUnique(), and CStringIndexSet::QueryTokenList().
Referenced by CQTokSet::Create(), CQTokInfl::Create(), CQTokSetInfl::Create(), and CreateFileList().
bool CQueryTokenNode::CreatePrefixSetPattern | ( | const CConcHolder * | pHolder, |
const string & | IndexName, | ||
const set< string > & | Prefixes | ||
) |
create pattern for matching set-valued right-truncated (prefix-set) queries (`$INDEX={PREFIX,...}*')
References GetIndex(), m_IndexItems, CQueryNode::m_Source, MakeUnique(), and CStringIndexSet::QueryTokenListWithRightTruncation().
Referenced by CQTokPrefixSet::Create(), and CreatePrefixPattern().
bool CQueryTokenNode::CreatePrefixPattern | ( | const CConcHolder * | pHolder, |
const string & | IndexName, | ||
const string & | Prefix | ||
) |
create pattern for matching simple right-truncated (prefix) queries (`$INDEX=PREFIX*')
References CreatePrefixSetPattern().
Referenced by CQTokPrefix::Create().
bool CQueryTokenNode::CreateSuffixSetPattern | ( | const CConcHolder * | pHolder, |
const string & | IndexName, | ||
const set< string > & | Suffixes | ||
) |
create pattern for matching set-valued left-truncated (suffix-set) queries (`$INDEX=*{SUFFIX,...}'): uses regex
References BuildRegExp(), GetIndex(), m_IndexItems, CIndexSetForQueryingStage::m_rIndex, CQueryNode::m_Source, MakeUnique(), and CStringIndexSet::QueryTokenListWithLeftTruncation().
Referenced by CQTokSuffixSet::Create(), and CreateSuffixPattern().
bool CQueryTokenNode::CreateSuffixPattern | ( | const CConcHolder * | pHolder, |
const string & | IndexName, | ||
const string & | Suffix | ||
) |
create pattern for matching set-valued left-truncated (suffix-set) queries (`$INDEX=*SUFFIX'): uses regex
References CreateSuffixSetPattern().
Referenced by CQTokSuffix::Create().
bool CQueryTokenNode::CreateInfixSetPattern | ( | const CConcHolder * | pHolder, |
const string & | IndexName, | ||
const set< string > & | Infixes | ||
) |
create pattern for matching set-valued substring (infix-set) queries (`$INDEX=*{INFIX,...}*'): uses regex
References BuildRegExp(), GetIndex(), m_IndexItems, and CQueryNode::m_Source.
Referenced by CQTokInfixSet::Create(), and CreateInfixPattern().
bool CQueryTokenNode::CreateInfixPattern | ( | const CConcHolder * | pHolder, |
const string & | IndexName, | ||
const string & | Infix | ||
) |
create pattern for matching simple substring (infix) queries (`$INDEX=*INFIX*'): uses regex
References CreateInfixSetPattern().
Referenced by CQTokInfix::Create().
bool CQueryTokenNode::CreateUniversalPattern | ( | const CConcHolder * | pHolder, |
const string & | SourceStr = "*" |
||
) |
create universal-match set
References m_bAnchor, m_bAny, CQueryNode::m_bAtomic, m_bChunk, m_IndexName, CQueryNode::m_Source, and CQueryNode::SetHolder().
Referenced by CQTokAny::Create().
bool CQueryTokenNode::CreateNullPattern | ( | const CConcHolder * | pHolder, |
const string & | IndexName = "Token" |
||
) |
create empty match-set
References GetIndex(), and m_IndexItems.
Referenced by CQToken::Compile().
bool CQueryTokenNode::CreateLemmaPattern | ( | const CConcHolder * | pHolder, |
const string & | Value | ||
) |
create lemma-match pattern (old)
References BuildRegExp(), m_IndexItems, m_IndexName, CConcordance::m_Language, CConcSession::m_pConcordance, CQueryNode::m_Source, RmlMakeLower(), and CQueryNode::SetHolder().
bool CQueryTokenNode::CreateThesPattern | ( | const CConcHolder * | pHolder, |
const string & | IndexName, | ||
const string & | src | ||
) |
create pattern which is associated with "Thes" index (old)
References BuildRegExp(), m_IndexItems, m_IndexName, CConcordance::m_Language, CConcSession::m_pConcordance, CQueryNode::m_Source, RmlMakeUpper(), and CQueryNode::SetHolder().
Referenced by CQTokThes::Create().
bool CQueryTokenNode::CreateMorphAnnotationPattern | ( | const CConcHolder * | pHolder, |
const string & | IndexName, | ||
const vector< string > & | Items | ||
) |
create pattern which is associated with "MorphPattern" or "Lemma" index (old)
References BuildRegExp(), GetIndexItemSetByVectorString(), m_IndexItems, m_IndexName, CQueryNode::m_Source, and CQueryNode::SetHolder().
Referenced by CQTokMorph::Create().
bool CQueryTokenNode::CreateFileList | ( | const CConcHolder * | pHolder, |
const string & | IndexName, | ||
const string & | FileName | ||
) |
create pattern which loads target values from the file FileName
References CreateSetPattern(), CQueryNode::m_Source, and Trim().
Referenced by CQTokFile::Create().
bool CQueryTokenNode::CreateChunkPattern | ( | const CConcHolder * | pHolder, |
const string & | IndexName, | ||
const string & | ChunkTypeStr | ||
) |
create pattern which is associated with "Chunk" index
References BuildRegExp(), ChunkIndexName, m_bAnchor, CQueryNode::m_bAtomic, m_bChunk, m_IndexItems, m_IndexName, CStringIndexator::m_pChunkIndex, CConcSession::m_pConcordance, CQueryNode::m_Source, and CQueryNode::SetHolder().
Referenced by CQTokChunk::Create().
bool CQueryTokenNode::CreateAnchorPattern | ( | const CConcHolder * | pHolder, |
const string & | BreakName, | ||
int | anchor | ||
) |
create an anchor pattern
References m_AnchorOffset, m_bAnchor, CQueryNode::m_bAtomic, m_bChunk, m_IndexName, and CQueryNode::SetHolder().
Referenced by CQTokAnchor::Create().
bool CQueryTokenNode::BuildRegExp | ( | string | RegExpStr, |
vector< DWORD > & | IndexItems, | ||
bool | negated = false |
||
) |
generic regex query utility
References ddcIconv::convert(), CStringIndexator::GetIndexByAlias(), CConcordance::GetRegexOptions(), m_IndexName, CConcSession::m_pConcordance, CQueryNode::m_pHolder, and CStringIndexSet::QueryTokenListUsingRegExp().
Referenced by CreateChunkPattern(), CreateInfixSetPattern(), CreateLemmaPattern(), CreateMorphAnnotationPattern(), CreateRegexPattern(), CreateSuffixSetPattern(), and CreateThesPattern().
|
virtual |
initial-stage query evaluation EvaluateWithoutHits is the first stage of the evaluation of one query. The main task it to build CQueryNode::m_Occurrences, which is written by chunks, sorted by the first item of a chunk.
Reimplemented from CQueryNode.
References CQueryNode::ClearAndReserveOccurrences(), CStringIndexSet::FindChunkOccurrences(), CStringIndexSet::FindOccurrences(), CConcordance::GetAllOccurrences(), CStringIndexator::GetIndexByAlias(), CConcordance::GetOccurrencesByPosition(), GetOccurrencesSize, m_AnchorOffset, m_bAnchor, m_bAny, m_bChunk, CQueryNode::m_bUseMatchIds, CQueryNode::m_bUseNodeIndices, CQueryNode::m_CacheIds, CQueryNode::m_ChunkLengths, CConcSession::m_CurrentSearchPeriodNo, m_IndexItems, m_IndexName, m_MatchId, m_NodeIndex, CQueryNode::m_OccurrenceMatchIds, CQueryNode::m_OccurrenceNodeIndices, CQueryNode::m_Occurrences, CStringIndexator::m_pChunkIndex, CConcSession::m_pConcordance, CQueryNode::m_pHolder, and CConcSession::m_ShortOccurCaches.
Referenced by Evaluate(), CQueryWithNode::EvaluateWithoutHits(), CQueryWithoutNode::EvaluateWithoutHits(), and CQueryWithorNode::EvaluateWithoutHits().
|
virtual |
evaluate query node with respect to m_pHolder , populating m_Hits[] attributes m_BreakNo, m_HighlightOcurrencesEnd . If bSeparateHits is false (default), hits in m_Hits[] will by aggregated by m_BreakNo (i.e. at most one instance of any m_BreakNo value in m_Hits[])
Reimplemented from CQueryNode.
References CQueryNode::ConvertOccurrencesToHits(), CQueryNode::ConvertOccurrencesToHitsForPatterns(), EvaluateWithoutHits(), and m_bChunk.
|
virtual |
get occurrence count by query-node index; used by rank-sort operator. default implementation in CQueryNode just chokes.
Reimplemented from CQueryNode.
References m_NodeIndex, and CQueryNode::m_Occurrences.
vector<DWORD> CQueryTokenNode::m_IndexItems |
find index items in the index which is called m_IndexName
Referenced by CreateChunkPattern(), CreateInfixSetPattern(), CreateLemmaPattern(), CreateMorphAnnotationPattern(), CreateNullPattern(), CreatePrefixSetPattern(), CreateRegexPattern(), CreateSetPattern(), CreateStringPattern(), CreateSuffixSetPattern(), CreateThesPattern(), EvaluateWithoutHits(), and MakeUnique().
string CQueryTokenNode::m_IndexName |
the name of index or break collection on which this node is built (see CQueryChunkNode and CQueryTokenNode nodes)
Referenced by BuildRegExp(), CreateAnchorPattern(), CreateChunkPattern(), CreateLemmaPattern(), CreateMorphAnnotationPattern(), CreateThesPattern(), CreateUniversalPattern(), EvaluateWithoutHits(), and GetIndex().
bool CQueryTokenNode::m_bChunk |
true, if it is a chunk (NP, VP and so on)
Referenced by CQueryTokenNode(), CreateAnchorPattern(), CreateChunkPattern(), CreateUniversalPattern(), Evaluate(), and EvaluateWithoutHits().
bool CQueryTokenNode::m_bAnchor |
true iff this is an anchor query
Referenced by CQueryTokenNode(), CreateAnchorPattern(), CreateChunkPattern(), CreateUniversalPattern(), and EvaluateWithoutHits().
bool CQueryTokenNode::m_bAny |
true iff this is a "universal wildcard" query
Referenced by CQOccurrenceIterator::bind(), CQTokenOccurrenceIterator::bind(), CQueryTokenNode(), CreateUniversalPattern(), EvaluateWithoutHits(), and IsUniversalWildcard().
int CQueryTokenNode::m_AnchorOffset |
integer offset for break-anchor ($.) queries
Referenced by CQueryTokenNode(), CreateAnchorPattern(), and EvaluateWithoutHits().
BYTE CQueryTokenNode::m_NodeIndex |
a unique index of the node in query tree (used by rank-sort operator)
Referenced by CQOccurrenceIterator::bind(), CQTokenOccurrenceIterator::bind(), CQueryTokenNode(), EvaluateWithoutHits(), CQueryWithNode::EvaluateWithoutHits(), and GetNodeFrequencyByNodeIndex().
BYTE CQueryTokenNode::m_MatchId |
user-specified match-id (0 specifies default behavior, which is to use highlight-id=255)
Referenced by CQOccurrenceIterator::bind(), CQTokenOccurrenceIterator::bind(), CQueryTokenNode(), EvaluateWithoutHits(), and SetMatchId().