A file for globally defined constants and classes. More...
#include "../common/utilit.h"
#include "list"
#include "limits.h"
#include "../GraphanLib/GraphmatFile.h"
#include "../LemmatizerLib/Lemmatizers.h"
#include "../AgramtabLib/EngGramTab.h"
#include "../AgramtabLib/RusGramTab.h"
#include "../AgramtabLib/GerGramTab.h"
#include "../common/DDC_common.h"
#include "../tinyxml/tinyxml.h"
#include "../GraphanLib/GraphmatFile.h"
#include "../LemmatizerLib/Lemmatizers.h"
#include "../AgramtabLib/EngGramTab.h"
#include "../AgramtabLib/RusGramTab.h"
#include "../AgramtabLib/GerGramTab.h"
#include "../common/DDC_common.h"
#include "../tinyxml/tinyxml.h"
Go to the source code of this file.
A file for globally defined constants and classes.
typedef map<size_t, vector<DWORD> > PeriodsDivisionMap |
a type for mappping an index item no to its period division
typedef map<string,CShortOccurCache> CShortOccurCacheMap |
a type for index string to its occurrences
typedef vector<CTokenNo> COccurrBuffer |
a type for holding occurrences during reading from the disk
enum HitSortEnum |
HitSortEnum This enum defines the types of all possible orders which can be apllied to an output hit set.
enum BigramDirectionEnum |
bool InitConcordDicts | ( | ) |
initializes morphology dictionaries
References bEnglishMorph, bGermanMorph, bRussianMorph, InitMorphologySystem(), and CExpc::m_strCause.
Referenced by main().
void FreeConcordDicts | ( | ) |
deletes morphology dictionaries
Referenced by UnloadData().
const CLemmatizer* GetLemmatizerByLanguage | ( | MorphLanguageEnum | Langua | ) |
return a morphology dictionary by a language indentifier
References bEnglishMorph, bGermanMorph, bRussianMorph, InitMorphologySystem(), morphEnglish, morphGerman, and morphRussian.
Referenced by GetGramInfosFromWord(), GetParadigmCollection(), and GetWordForms().
const CAgramtab* GetGramtabByLanguage | ( | MorphLanguageEnum | Langua | ) |
return a grammatical table by a language indentifier
References bEnglishMorph, bGermanMorph, bRussianMorph, InitMorphologySystem(), morphEnglish, morphGerman, and morphRussian.
Referenced by GetGramInfosFromWord(), GetGramInfoStr(), GetParadigmByGroups(), GetParadigmFromDictionary(), and GetStringByParadigm().
void concord_daemon_log | ( | const string & | t | ) |
write a dump message to a log file
References Format(), GetRegistryString(), and RmlGetCurrentTime().
Referenced by CConcHolder::GetFileSnippets(), CDDCServerListenHost::GetFirstHitsFromCorpora(), CConcHolder::GetHits(), CDDCServerListenHost::GetHitStringsFromOneCorpora(), CConcHolder::GetOccurrences(), CConcHolder::GetTokensFromStorageByBreak(), LoadDDCServer(), CDDCCorpusListenHost::LoadHolder(), LoadLocalCorpora(), CConcIndexator::LoadMaskedFiles(), CConcIndexator::LoadSourceFilesAndOptions(), main(), CDDCServerListenHost::ProcessSocketString(), CDDCCorpusListenHost::ProcessSocketString(), ReloadCorpus(), CDDCServerListenHost::ReloadCorpusProcessSignal(), CDDCServerListenHost::ReplaceHolderIfNeeded(), CConcHolder::SaveOccurrences(), SaveTrigger(), CConcHolder::SimpleQuery(), and termination_handler().
string GetDDCErrorString | ( | DDCErrorEnum | ErrorCode | ) | [inline] |
return a string representation of a DDC error
References errParseError, errProcessMorphology, errReadOccurrenceFile, errReadSourceFile, errTimeoutElapsed, errUnknown, and errUnknownPath.
const char globalFieldDelimeter = '\t' |
a globally defined delimeter, which is used to delimit fields in one record (the first field is always a token)
Referenced by CConcIndexator::IndexMorphXml(), CStringIndexator::IndexOneToken(), and CConcIndexator::IndexTextOrHtmlFile().
const string PredefinedTableLineTag = "l" |
a globally defined xml-tag, which is used to separate records if CConcIndexator::m_IndexType is Free_Index
Referenced by GetCWBFormattedStringRecursive(), and CHitBorders::RegisterBorderIndices().
const string ChunkIndexName = "chunk" |
a globally defined index name for chunks
Referenced by CQueryTokenNode::CreateChunkPattern(), CStringIndexator::GetIndicesString(), and CStringIndexator::RegisterChunkIndex().
const string LeftBigramsIndexName = "left" |
a globally defined left bigrams index name
const string RightBigramsIndexName = "right" |
a globally defined right bigrams index name
const string PredefinedFileBreakName = "file" |
a globally defined break collection name for corpus files
Referenced by CConcIndexatorInvoker::BuildIndex(), CConcHolder::GenerateHitStrings(), CHitBorders::GetBorderIndicesString(), CConcHolder::GetContext(), CConcHolder::GetContextJson(), CHitBorders::RegisterBorderIndices(), and CHitBorders::UniteBorders().
const string PredefinedTextAreaBreakName = "textarea" |
a globally defined break collection name for text areas
Referenced by CConcIndexator::IndexTextOrHtmlFile(), CHitBorders::RegisterBorderIndices(), and CConcHolder::SetHitType().
const size_t MaxShortOccurCacheSize = 1000000 |
MaxShortOccurCacheSize is the upper bound of CShortOccurCache::m_Data.size() It is introduced to restrict memory usage.
Referenced by CShortOccurCache::CouldContainMore().
const string MorphAnnotationsDelim = "#" |
a delimiter between morphological annotations
Referenced by GetGramInfosFromWord(), and CConcIndexator::IndexMorphXml().
const string MorphAnnotationsDelimRegExp = "[^#]*" |
a regular expression, which passes everything within one morphological annotation
Referenced by CConcIndexator::GetIndexItemSetByVectorString().