18 #ifndef __ConcCommon_H_ 19 #define __ConcCommon_H_ 27 #include "../CommonLib/utilit.h" 28 #include "../CommonLib/ddcLog.h" 33 #include "../GraphanLib/GraphmatFile.h" 34 #include "../LemmatizerLib/Lemmatizers.h" 36 #include "../AgramtabLib/EngGramTab.h" 37 #include "../AgramtabLib/RusGramTab.h" 38 #include "../AgramtabLib/GerGramTab.h" 39 #include "../CommonLib/DDC_common.h" 40 #include "../CommonLib/ddcObject.h" 41 #include "../tinyxml/tinyxml.h" 43 #include "../ConcordLib/CCurl.h" 73 #define DDC_SORTKEY_MAXLEN 256 98 m_VectorStartOffset = VectorStartOffset;
99 m_VectorLength = VectorLength;
202 "LessByFreeBiblField",
203 "GreaterByFreeBiblField",
206 "LessByMiddleContext",
207 "GreaterByMiddleContext",
209 "GreaterByLeftContext",
210 "LessByRightContext",
211 "GreaterByRightContext",
216 "GreaterByCountValue",
253 virtual string GetStringValue(
DWORD FileNo)
const = 0;
257 virtual DWORD GetIntegerLowerBound(
const string &Value)
const = 0;
284 {
return (i < x.
i) || (i==x.
i && s < x.
s); };
288 {
return (i > x.
i) || (i==x.
i && s > x.
s); };
292 { i=i_; s.clear(); };
295 inline void assign(
int i_,
const string &s_)
299 inline void operator= (
const std::pair<int,string>& p)
300 { i=p.first; s=p.second; };
303 inline void operator= (
int i_)
304 { i=i_; s.clear(); };
307 inline void operator= (
const string &s_)
358 m_SatisfiedValues.clear();
363 switch (m_FilterType) {
385 switch (m_FilterType) {
415 {
return m_FilterType ==
NoSort; };
459 CHit(
DWORD BreakNo=0) : m_BreakNo(BreakNo), m_SortKey(0) { m_Value.m_Count=0; };
bool CouldContainMore() const
Definition: ConcSession.cpp:244
vector< CTokenNo > COccurrBuffer
a type for holding occurrences during reading from the disk
Definition: ConcCommon.h:480
const string ChunkIndexName
a globally defined index name for chunks
Definition: ConcCommon.h:56
sort by the issue date (ascending)
Definition: ConcCommon.h:123
bool operator()(const CHit &h, const DWORD brkno) const
Definition: ConcCommon.h:501
DDCFormatTypeEnum
FormatTypeEnum defines the format of output hits:
Definition: ConcCommon.h:468
sort by central context (ascending)
Definition: ConcCommon.h:139
const CTokenNo * GetOccurrencesFromCache(const int CacheId, DWORD &Length) const
Definition: ConcSession.cpp:238
CHitSortKey(int i_=0)
default constructor
Definition: ConcCommon.h:271
void FreeConcordDicts()
deletes morphology dictionaries
Definition: InitDicts.cpp:151
CHitSortKey m_KeyHi
the upper bound of the filter (by default (INT_MAX,"")), valid only if m_bSet==false; formerly int m_...
Definition: ConcCommon.h:345
DWORD CFileNo
integer type CFileNo is used to refer to a single document (file) in the corpus
Definition: ConcCommon.h:66
vector< CTokenNo > m_OccurrencesBody
this vector contains all occurrences for this cache
Definition: ConcCommon.h:107
string s
secondary sort key, new for v2.0.19
Definition: ConcCommon.h:268
void assign(int i_, const string &s_)
assignment given integer + string
Definition: ConcCommon.h:295
Definition: ConcCommon.h:470
Definition: ConcCommon.h:472
const CLemmatizer * GetLemmatizerByLanguage(MorphLanguageEnum Langua)
return a morphology dictionary by a language indentifier
Definition: InitDicts.cpp:75
const string PredefinedTableLineTag
a globally defined xml-tag, which is used to separate records if CConcIndexator::m_IndexType is Free_...
Definition: ConcCommon.h:54
sort by count()-key (ascending)
Definition: ConcCommon.h:153
bool IsNullSort(HitSortOrderEnum e)
Definition: ConcCommon.h:182
bool IsCountValueSort(HitSortOrderEnum e)
Definition: ConcCommon.h:191
bool IsPruneFilterType(HitSortEnum e)
Definition: ConcCommon.h:168
DWORD m_HighlightOccurrenceEnd
the end offset of token occurrences to be highlighted in CQueryNode::m_Occurrences and later in CConc...
Definition: ConcCommon.h:443
sort by count()-value (descending)
Definition: ConcCommon.h:159
class for global query filters aka "query operators"
Definition: QueryFilter.h:35
CHitSortKey m_KeyLo
the lower bound of the filter (by default (INT_MIN,"")), valid only if m_bSet==false; formerly int m_...
Definition: ConcCommon.h:342
sort by the size of the hit in tokens (descending)
Definition: ConcCommon.h:129
Definition: ConcCommon.h:473
bool IsCountSort(HitSortOrderEnum e)
Definition: ConcCommon.h:185
sort by right context (ascending)
Definition: ConcCommon.h:147
end-of-enum sentinel
Definition: ConcCommon.h:165
size_t m_DebugRankNo
the string which displays how the rank was calculated (for rank-sorted queries)
Definition: ConcCommon.h:449
const string MorphAnnotationsDelimRegExp
a regular expression, which passes everything within one morphological annotation ...
Definition: ConcCommon.h:485
const CFreeBiblIndexInterface * m_BiblIndex
pointer to the CConcXml::CFreeBiblIndex responsible for populating this filter (only after compile) ...
Definition: ConcCommon.h:351
HitSortOrderEnum
Definition: ConcCommon.h:172
sort by left context (descending)
Definition: ConcCommon.h:145
Definition: ConcCommon.h:177
int m_ContextMatchId
match-id of reference token for context-sort operators (default=0:any)
Definition: ConcCommon.h:336
Definition: agramtab_.h:39
CHitSortKey(int i_, const string &s_)
constructor given integer and string
Definition: ConcCommon.h:275
static const char * HitSortEnumStrings[HitSortsCount]
Definition: ConcCommon.h:222
Definition: ConcCommon.h:248
sort by #prune[]-key (descending)
Definition: ConcCommon.h:163
size_t m_VectorLength
Definition: ConcCommon.h:93
sort by count()-value (ascending)
Definition: ConcCommon.h:157
void Clear()
Definition: ConcSession.cpp:225
bool m_bNegated
true iff this is a negated filter
Definition: ConcCommon.h:333
HitSortEnum m_FilterType
the type of the filter
Definition: ConcCommon.h:321
CHit(DWORD BreakNo=0)
Definition: ConcCommon.h:459
Definition: ConcCommon.h:176
size_t AddNewIndexItemNoToCache(const CTokenNo *pStart, const CTokenNo *pEnd)
Definition: ConcSession.cpp:231
bool m_bSet
Definition: ConcCommon.h:330
Definition: ConcCommon.h:175
CHitCompareByBreak()
Definition: ConcCommon.h:499
Definition: ConcCommon.h:85
bool IsFileFilter() const
returns true iff this is a file-based filter (for optimized count(*) queries)
Definition: ConcCommon.h:383
map< string, CShortOccurCache > CShortOccurCacheMap
a type for index string to its occurrences
Definition: ConcCommon.h:477
const size_t MaxShortOccurCacheSize
MaxShortOccurCacheSize is the upper bound of CShortOccurCache::m_Data.size() It is introduced to rest...
Definition: ConcCommon.h:69
Definition: ConcCommon.h:174
compare hits by break-number (for query evaluation, e.g. CQueryBinaryOperationNode::hits_and_position...
Definition: ConcCommon.h:497
const string PredefinedTextAreaBreakName
a globally defined break collection name for text areas
Definition: ConcCommon.h:60
bool IsPruneFilter(void) const
returns true iff this is a pruning filter
Definition: ConcCommon.h:418
sort by document rank (descending)
Definition: ConcCommon.h:137
CDataReference(size_t VectorStartOffset, size_t VectorLength)
Definition: ConcCommon.h:96
Definition: ConcCommon.h:263
void ddcInitLocale(void)
initialize the locale from current environment if not already initialized
Definition: ddcLocale.cpp:31
sort by document (ascending)
Definition: ConcCommon.h:135
const string PredefinedFileBreakName
a globally defined break collection name for corpus files
Definition: ConcCommon.h:58
sort by right context (descending)
Definition: ConcCommon.h:149
sort by a free bibliographical field (ascending)
Definition: ConcCommon.h:131
const string MorphAnnotationsDelim
a delimiter between morphological annotations
Definition: ConcCommon.h:483
vector< CDataReference > m_IndexItemNo2Occurrences
this map contains a relation from index item No to the address of its occurrences ...
Definition: ConcCommon.h:104
virtual ~CFreeBiblIndexInterface()
Definition: ConcCommon.h:250
sort by count()-key (descending)
Definition: ConcCommon.h:155
DWORD m_BreakNo
the index of the break, which this hit represents (in the break collection CConcHolder::GetBreaks) ...
Definition: ConcCommon.h:441
sort by #prune[]-key (ascending)
Definition: ConcCommon.h:161
CDDCFilterWithBounds()
Definition: ConcCommon.h:422
class CQFilter * m_Parent
pointer to parent CQFilter (if any)
Definition: ConcCommon.h:354
DWORD m_FileNo
the index of coprus file, where this hit is found, it is equal to m_BreakNo if user searches within f...
Definition: ConcCommon.h:445
string m_AttrName
Definition: ConcCommon.h:325
HitSortOrderEnum SortOrder() const
returns integer sort order as a HitSortOrderEnum (-1:descending, 0:none, 1:ascending, 2:count_keys, 3:count_values)
Definition: ConcCommon.h:362
sort by the size of the hit in tokens (ascending)
Definition: ConcCommon.h:127
Definition: ConcCommon.h:178
Definition: ConcCommon.h:438
void clear()
clear key
Definition: ConcCommon.h:279
const size_t MaxBiblStringLen
Definition: ConcCommon.h:75
bool IsCountKeySort(HitSortOrderEnum e)
Definition: ConcCommon.h:188
Definition: ConcCommon.h:318
bool IsTrivialFilter(void) const
returns true iff this is a trivial-sort filter (i.e. does not change original hit-sort order) ...
Definition: ConcCommon.h:414
const CAgramtab * GetGramtabByLanguage(MorphLanguageEnum Langua)
return a grammatical table by a language indentifier
Definition: InitDicts.cpp:101
sort by match context (descending)
Definition: ConcCommon.h:141
bool InitConcordDicts()
initializes morphology dictionaries
Definition: InitDicts.cpp:125
CHitSortKey m_SortKey
Definition: ConcCommon.h:457
const char globalFieldDelimeter
a globally defined delimeter, which is used to delimit fields in one record (the first field is alway...
Definition: ConcCommon.h:52
size_t m_Count
count for this item (for count-queries)
Definition: ConcCommon.h:452
int i
primary integer sort key; formerly CHit::m_OrderId, CDDCFilterWithBounds.m_LevelStart|m_LevelEnd ...
Definition: ConcCommon.h:265
sort by a free bibliographical field (descending)
Definition: ConcCommon.h:133
DWORD CTokenNo
integer type CTokenNo is used to refer an index of a token in the corpus
Definition: ConcCommon.h:63
MorphLanguageEnum
Definition: utilit.h:162
Definition: Lemmatizers.h:37
Definition: ConcCommon.h:471
static const char * HitSortEnumNames[HitSortsCount]
Definition: ConcCommon.h:195
void clear()
Definition: ConcCommon.h:356
size_t m_VectorStartOffset
Definition: ConcCommon.h:91
uint32_t DWORD
Definition: utilit.h:105
sort by left context (ascending)
Definition: ConcCommon.h:143
sort by the issue date (descending)
Definition: ConcCommon.h:125
int m_ContextOffset
offset from matched token for context-sort operators
Definition: ConcCommon.h:339
Definition: ConcCommon.h:179
HitSortEnum
Definition: ConcCommon.h:119
sort by random key
Definition: ConcCommon.h:151
Definition: ConcCommon.h:469
the structure holds a pointer to a vector of occurrences and its size
Definition: ConcCommon.h:88
void assign(int i_)
assignment given integer
Definition: ConcCommon.h:291
string GetIndexItemSetByVectorString(const vector< string > &TokenProperties, bool bRegexp)
return a string representation of a set of token properties (in the format which is used in the index...
Definition: Concordance.cpp:25
Definition: ConcCommon.h:173
no sort operators, only filtering (used by #has_field[])
Definition: ConcCommon.h:121
void ddcInitCurl(void)
global initialization function (multiple calls should be safe)
Definition: CCurl.cpp:83
void ddcInitGlobal(void)
global intialization
Definition: ConcCommon.h:46
set< int > m_SatisfiedValues
the possible (integer) values for this bibliographical field, valid only if m_bSet==true ...
Definition: ConcCommon.h:348