ddc
|
A file for globally defined constants and classes. More...
#include "../CommonLib/utilit.h"
#include "../CommonLib/ddcLog.h"
#include "list"
#include "limits.h"
#include "../GraphanLib/GraphmatFile.h"
#include "../LemmatizerLib/Lemmatizers.h"
#include "../AgramtabLib/EngGramTab.h"
#include "../AgramtabLib/RusGramTab.h"
#include "../AgramtabLib/GerGramTab.h"
#include "../CommonLib/DDC_common.h"
#include "../CommonLib/ddcObject.h"
#include "../tinyxml/tinyxml.h"
#include "../ConcordLib/CCurl.h"
Go to the source code of this file.
Classes | |
class | CShortOccurCache |
struct | CShortOccurCache::CDataReference |
the structure holds a pointer to a vector of occurrences and its size More... | |
class | CFreeBiblIndexInterface |
struct | CHitSortKey |
struct | CDDCFilterWithBounds |
struct | CHit |
struct | CHitCompareByBreak |
compare hits by break-number (for query evaluation, e.g. CQueryBinaryOperationNode::hits_and_positions_intersection()) More... | |
Macros | |
#define | DDC_SORTKEY_MAXLEN 256 |
Typedefs | |
typedef DWORD | CTokenNo |
integer type CTokenNo is used to refer an index of a token in the corpus More... | |
typedef DWORD | CFileNo |
integer type CFileNo is used to refer to a single document (file) in the corpus More... | |
typedef map< string, CShortOccurCache > | CShortOccurCacheMap |
a type for index string to its occurrences More... | |
typedef vector< CTokenNo > | COccurrBuffer |
a type for holding occurrences during reading from the disk More... | |
Enumerations | |
enum | HitSortEnum { NoSort = 0, LessByDate = 1, GreaterByDate = 2, LessBySize = 3, GreaterBySize = 4, LessByFreeBiblField = 5, GreaterByFreeBiblField = 6, LessByRank = 7, GreaterByRank = 8, LessByMiddleContext = 9, GreaterByMiddleContext = 10, LessByLeftContext = 11, GreaterByLeftContext = 12, LessByRightContext = 13, GreaterByRightContext = 14, RandomSort = 15, LessByCountKey = 16, GreaterByCountKey = 17, LessByCountValue = 18, GreaterByCountValue = 19, LessByPruneKey = 20, GreaterByPruneKey = 21, HitSortsCount = 22 } |
enum | HitSortOrderEnum { hsoDescending = -1, hsoNone = 0, hsoAscending = 1, hsoAscendingCountKeys = 2, hsoDescendingCountKeys = 3, hsoAscendingCountValues = 4, hsoDescendingCountValues = 5 } |
enum | DDCFormatTypeEnum { DDC_ResultHTML, DDC_ResultText, DDC_ResultTable, DDC_ResultDocIds, DDC_ResultJson } |
FormatTypeEnum defines the format of output hits: More... | |
Functions | |
void | ddcInitGlobal (void) |
global intialization More... | |
bool | IsPruneFilterType (HitSortEnum e) |
bool | IsNullSort (HitSortOrderEnum e) |
bool | IsCountSort (HitSortOrderEnum e) |
bool | IsCountKeySort (HitSortOrderEnum e) |
bool | IsCountValueSort (HitSortOrderEnum e) |
bool | InitConcordDicts () |
initializes morphology dictionaries More... | |
void | FreeConcordDicts () |
deletes morphology dictionaries More... | |
const CLemmatizer * | GetLemmatizerByLanguage (MorphLanguageEnum Langua) |
return a morphology dictionary by a language indentifier More... | |
const CAgramtab * | GetGramtabByLanguage (MorphLanguageEnum Langua) |
return a grammatical table by a language indentifier More... | |
string | GetIndexItemSetByVectorString (const vector< string > &TokenProperties, bool bRegexp) |
return a string representation of a set of token properties (in the format which is used in the index) More... | |
Variables | |
const char | globalFieldDelimeter = '\t' |
a globally defined delimeter, which is used to delimit fields in one record (the first field is always a token) More... | |
const string | PredefinedTableLineTag = "l" |
a globally defined xml-tag, which is used to separate records if CConcIndexator::m_IndexType is Free_Index More... | |
const string | ChunkIndexName = "chunk" |
a globally defined index name for chunks More... | |
const string | PredefinedFileBreakName = "file" |
a globally defined break collection name for corpus files More... | |
const string | PredefinedTextAreaBreakName = "textarea" |
a globally defined break collection name for text areas More... | |
const size_t | MaxShortOccurCacheSize = 1000000 |
MaxShortOccurCacheSize is the upper bound of CShortOccurCache::m_Data.size() It is introduced to restrict memory usage. More... | |
const size_t | MaxBiblStringLen = 20000 |
static const char * | HitSortEnumNames [HitSortsCount] |
static const char * | HitSortEnumStrings [HitSortsCount] |
const string | MorphAnnotationsDelim = "#" |
a delimiter between morphological annotations More... | |
const string | MorphAnnotationsDelimRegExp = "[^#]*" |
a regular expression, which passes everything within one morphological annotation More... | |
A file for globally defined constants and classes.
#define DDC_SORTKEY_MAXLEN 256 |
maximum length of string sort-keys passed from subcorpus servers to distributed server, including terminating NUL (see CConcHolder::GetHitIds() and CDDCServerListenHost::GetFirstHitsFromCorpora())
Referenced by CConcSession::GetCountIds(), CDDCBranchServer::GetFirstHitsFromCorpora(), and CConcSession::GetHitIds().
integer type CFileNo is used to refer to a single document (file) in the corpus
typedef map<string,CShortOccurCache> CShortOccurCacheMap |
a type for index string to its occurrences
typedef vector<CTokenNo> COccurrBuffer |
a type for holding occurrences during reading from the disk
enum HitSortEnum |
HitSortEnum This enum defines the types of all possible orders which can be apllied to an output hit set.
enum HitSortOrderEnum |
enum DDCFormatTypeEnum |
FormatTypeEnum defines the format of output hits:
Enumerator | |
---|---|
DDC_ResultHTML | |
DDC_ResultText | |
DDC_ResultTable | |
DDC_ResultDocIds | |
DDC_ResultJson |
|
inline |
global intialization
References ddcInitCurl(), and ddcInitLocale().
Referenced by main().
|
inline |
References GreaterByPruneKey, and LessByPruneKey.
Referenced by CQueryOptions::IsPruneFilter(), and CDDCFilterWithBounds::IsPruneFilter().
|
inline |
References hsoNone.
|
inline |
References hsoAscendingCountKeys, and hsoDescendingCountValues.
Referenced by CFirstHitsQueryResult::CollectCounts(), CConcSession::GenerateHitStrings(), CDDCBranchServer::GetFirstHitsFromCorpora(), CConcSession::GetHitIds(), CConcSession::GetSortKeyHint(), CDDCLeafServer::handle__get_first_hits(), CDDCBranchServer::handle__get_first_hits(), CDDCBranchServer::handle__get_hit_strings(), CDDCLeafServer::handle__run_query(), CDDCBranchServer::RunDistributed(), CDDCBranchServer::RunGetFirstHits(), and CDDCBranchServer::UpdateNavCache().
|
inline |
References hsoAscendingCountKeys, and hsoDescendingCountKeys.
|
inline |
References hsoAscendingCountValues, and hsoDescendingCountValues.
bool InitConcordDicts | ( | ) |
initializes morphology dictionaries
References bEnglishMorph, bGermanMorph, bRussianMorph, ddcLogInfo, InitMorphologySystem(), and CExpc::m_strCause.
Referenced by do_start().
void FreeConcordDicts | ( | ) |
deletes morphology dictionaries
References engGramTab, engLemmatizer, gerGramTab, gerLemmatizer, rusGramTab, and rusLemmatizer.
Referenced by UnloadData().
const CLemmatizer* GetLemmatizerByLanguage | ( | MorphLanguageEnum | Langua | ) |
return a morphology dictionary by a language indentifier
References bEnglishMorph, bGermanMorph, bRussianMorph, engLemmatizer, gerLemmatizer, InitMorphologySystem(), morphEnglish, morphGerman, morphRussian, and rusLemmatizer.
Referenced by TxMorph::expand(), GetGramInfosFromWord(), GetParadigmCollection(), and GetWordForms().
const CAgramtab* GetGramtabByLanguage | ( | MorphLanguageEnum | Langua | ) |
return a grammatical table by a language indentifier
References bEnglishMorph, bGermanMorph, bRussianMorph, engGramTab, gerGramTab, InitMorphologySystem(), morphEnglish, morphGerman, morphRussian, and rusGramTab.
Referenced by GetGramInfosFromWord(), GetGramInfoStr(), GetParadigmByGroups(), GetParadigmFromDictionary(), and GetStringByParadigm().
string GetIndexItemSetByVectorString | ( | const vector< string > & | TokenProperties, |
bool | bRegexp | ||
) |
return a string representation of a set of token properties (in the format which is used in the index)
References MorphAnnotationsDelimRegExp.
Referenced by CQueryTokenNode::CreateMorphAnnotationPattern(), CreateMorphIndex(), CConcIndexator::IndexMorphXml(), and CHitCompareByBreak::operator()().
const char globalFieldDelimeter = '\t' |
a globally defined delimeter, which is used to delimit fields in one record (the first field is always a token)
Referenced by CConcIndexator::IndexMorphXml(), CStringIndexator::IndexOneToken(), CConcIndexator::IndexTextOrHtmlFile(), CStringIndexator::IndexTokenFixLongColumns(), and TruncateLongColumns().
const string PredefinedTableLineTag = "l" |
a globally defined xml-tag, which is used to separate records if CConcIndexator::m_IndexType is Free_Index
Referenced by GetCWBFormattedStringRecursive(), and CHitBorders::RegisterBorderIndices().
const string ChunkIndexName = "chunk" |
a globally defined index name for chunks
Referenced by CQueryTokenNode::CreateChunkPattern(), CStringIndexator::GetIndicesString(), and CStringIndexator::RegisterChunkIndex().
const string PredefinedFileBreakName = "file" |
a globally defined break collection name for corpus files
Referenced by CQCount::CountLocal(), CConcIndexator::CreateAsUnion(), CConcSession::GenerateHitStrings(), CHitBorders::GetBorderIndicesString(), CConcSession::GetContext(), CConcSession::GetContextJson(), ConcIndexatorInvoker::IndexFile(), CTabFormatIndexator::ProcessBreakRecord(), CHitBorders::RegisterBorderIndices(), and CQueryOptions::toString().
const string PredefinedTextAreaBreakName = "textarea" |
a globally defined break collection name for text areas
Referenced by CConcIndexator::IndexTextOrHtmlFile(), CHitBorders::RegisterBorderIndices(), and CConcSession::SetHitType().
const size_t MaxShortOccurCacheSize = 1000000 |
MaxShortOccurCacheSize is the upper bound of CShortOccurCache::m_Data.size() It is introduced to restrict memory usage.
Referenced by CShortOccurCache::CouldContainMore().
const size_t MaxBiblStringLen = 20000 |
|
static |
symbolic names for HitSortEnum, for debugging
Referenced by CConcSession::GetAllHits(), and CQCount::jsonData().
|
static |
symbolic names for HitSortEnum, for stringification
Referenced by CQCount::countOptionsToString(), CQFSort::jsonType(), CConcSession::SortKeyLB(), CQFSort::toString(), CQFBiblSort::toString(), CQFContextSort::toString(), and CQFPrune::toString().
const string MorphAnnotationsDelim = "#" |
a delimiter between morphological annotations
Referenced by GetGramInfosFromWord(), and CConcIndexator::IndexMorphXml().
const string MorphAnnotationsDelimRegExp = "[^#]*" |
a regular expression, which passes everything within one morphological annotation
Referenced by GetIndexItemSetByVectorString().