#include "StdConc.h"
#include "../common/util_classes.h"
#include "../common/DwdsThesaurus.h"
#include "ConcCommon.h"
#include "ConcIndexator.h"
#include "sys/stat.h"
#include "time.h"
#include "limits.h"
#include "ConcordAlgorithm.h"
#include "../tinyxml/tinyxml.h"
#include "../RmlTar/RmlTar.h"
#include "../GraphanLib/HTMLConv.h"
void InitMorphologySystem | ( | T *& | Lemmatizer, | |
Y *& | Gramtab | |||
) |
References ErrorMessage(), Format(), and GetStringByLanguage().
Referenced by GetGramtabByLanguage(), GetLemmatizerByLanguage(), and InitConcordDicts().
const CLemmatizer* GetLemmatizerByLanguage | ( | MorphLanguageEnum | Langua | ) |
return a morphology dictionary by a language indentifier
References bEnglishMorph, bGermanMorph, bRussianMorph, InitMorphologySystem(), morphEnglish, morphGerman, and morphRussian.
Referenced by GetGramInfosFromWord(), GetParadigmCollection(), and GetWordForms().
const CAgramtab* GetGramtabByLanguage | ( | MorphLanguageEnum | Langua | ) |
return a grammatical table by a language indentifier
References bEnglishMorph, bGermanMorph, bRussianMorph, InitMorphologySystem(), morphEnglish, morphGerman, and morphRussian.
Referenced by GetGramInfosFromWord(), GetGramInfoStr(), GetParadigmByGroups(), GetParadigmFromDictionary(), and GetStringByParadigm().
bool InitConcordDicts | ( | ) |
initializes morphology dictionaries
References bEnglishMorph, bGermanMorph, bRussianMorph, InitMorphologySystem(), and CExpc::m_strCause.
Referenced by main().
void FreeConcordDicts | ( | ) |
deletes morphology dictionaries
Referenced by UnloadData().
void concord_daemon_log | ( | const string & | t | ) |
write a dump message to a log file
References Format(), GetRegistryString(), and RmlGetCurrentTime().
Referenced by CConcHolder::GetFileSnippets(), CDDCServerListenHost::GetFirstHitsFromCorpora(), CConcHolder::GetHits(), CDDCServerListenHost::GetHitStringsFromOneCorpora(), CConcHolder::GetOccurrences(), CConcHolder::GetTokensFromStorageByBreak(), LoadDDCServer(), CDDCCorpusListenHost::LoadHolder(), LoadLocalCorpora(), CConcIndexator::LoadMaskedFiles(), CConcIndexator::LoadSourceFilesAndOptions(), main(), CDDCServerListenHost::ProcessSocketString(), CDDCCorpusListenHost::ProcessSocketString(), ReloadCorpus(), CDDCServerListenHost::ReloadCorpusProcessSignal(), CDDCServerListenHost::ReplaceHolderIfNeeded(), CConcHolder::SaveOccurrences(), SaveTrigger(), CConcHolder::SimpleQuery(), and termination_handler().
bool IsWord | ( | const CGraphmatFile * | piGraphmat, | |
long | GraLine | |||
) | [inline] |
References CUnitHolder::HasDescr(), OLLE, and ORLE.
Referenced by CConcIndexator::IsDWDSToken().
bool IsDigit | ( | const CGraphmatFile * | piGraphmat, | |
long | GraLine | |||
) | [inline] |
References CUnitHolder::HasDescr(), ODigits, and ONumChar.
Referenced by CConcIndexator::IsDWDSToken().
bool IsSentenceEnd | ( | const CGraphmatFile * | piGraphmat, | |
long | GraLine | |||
) | [inline] |
References CUnitHolder::HasDescr(), and OSentEnd.
Referenced by CConcIndexator::IndexTextOrHtmlFile(), and CConcIndexator::IsDWDSToken().
vector<string> GetGramInfosFromWord | ( | const char * | GraLine, | |
MorphLanguageEnum | Langua, | |||
bool | bCapital | |||
) |
References _QM, gEIG, CLemmatizer::GetAllAncodesQuick(), GetGramtabByLanguage(), GetLemmatizerByLanguage(), Grammems, GrammemsCount, gSUB, MorphAnnotationsDelim, morphGerman, and UnknownPartOfSpeech.
Referenced by CConcIndexator::CreateMorphIndex().
bool GetTextFromXMLRecursive | ( | TiXmlNode * | parent, | |
string & | Result | |||
) |
References ErrorMessage(), TiXmlNode::FirstChild(), Format(), TiXmlNode::NextSibling(), Trim(), TiXmlNode::Type(), UnknownPageNumber, and TiXmlNode::Value().
Referenced by CConcIndexator::LoadXmlFile().
bool GetCWBFormattedStringRecursive | ( | const TiXmlNode * | parent, | |
string & | Result | |||
) |
References TiXmlElement::Attribute(), ErrorMessage(), TiXmlNode::FirstChild(), Format(), TiXmlNode::NextSibling(), PredefinedTableLineTag, TiXmlNode::ToElement(), Trim(), TiXmlNode::Type(), and TiXmlNode::Value().
Referenced by CConcIndexator::IndexTable().
bool IsXmlFile | ( | const string & | FileName | ) |
bool IsTarFile | ( | const string & | FileName | ) |
bool ReadSourceFile | ( | const string & | SourceFileName, | |
string & | CorpusFileName, | |||
TAR * | pTar, | |||
vector< char > & | Buffer, | |||
bool & | bError | |||
) |
References HTML::GetTextFromHTMLBuffer(), IsHtmlFile(), ReadVector(), and tar_get_next_file().
Referenced by CConcIndexatorInvoker::BuildIndex().
void newhandler | ( | ) |
bool bEnglishMorph = false |
Referenced by GetGramtabByLanguage(), GetLemmatizerByLanguage(), and InitConcordDicts().
bool bRussianMorph = false |
Referenced by GetGramtabByLanguage(), GetLemmatizerByLanguage(), and InitConcordDicts().
bool bGermanMorph = false |
Referenced by GetGramtabByLanguage(), GetLemmatizerByLanguage(), and InitConcordDicts().
CEngGramTab* engGramTab = 0 |
CRusGramTab* rusGramTab = 0 |
CGerGramTab* gerGramTab = 0 |