ddc
|
#include "StdConc.h"
#include "DwdsThesaurus.h"
#include "ConcIndexator.h"
#include "DocumentIterator.h"
Functions | |
bool | IsWord (const CGraphmatFile &piGraphmat, long GraLine) |
bool | IsDigit (const CGraphmatFile &piGraphmat, long GraLine) |
bool | IsSentenceEnd (const CGraphmatFile &piGraphmat, long GraLine) |
void | GetTextFromXMLRecursive (TiXmlNode *parent, string &Result) |
void | GetCWBFormattedStringRecursive (const TiXmlNode *parent, string &Result) |
bool | IsXmlFile (const string &FileName) |
bool | CreateMorphIndex (const CStringIndexator *indexator, string path, MorphLanguageEnum language, size_t maxTokenCountInOnePeriod) |
Variables | |
const DWORD | DefaultMaxTokenCountInOnePeriod = 5000000 |
global default value (5000000) for CConcIndexator::m_UserMaxTokenCountInOnePeriod More... | |
const DWORD | DefaultMaxInputLoadIndexSize = 400000 |
global default value for (400000) CConcIndexator::m_UserMaxInputLoadIndexSize (must be <= DefaultMaxTokenCountInOnePeriod) More... | |
bool IsWord | ( | const CGraphmatFile & | piGraphmat, |
long | GraLine | ||
) |
References CUnitHolder::HasDescr(), OLLE, and ORLE.
Referenced by CConcIndexator::IsDWDSToken().
bool IsDigit | ( | const CGraphmatFile & | piGraphmat, |
long | GraLine | ||
) |
References CUnitHolder::HasDescr(), ODigits, and ONumChar.
Referenced by CConcIndexator::IsDWDSToken().
bool IsSentenceEnd | ( | const CGraphmatFile & | piGraphmat, |
long | GraLine | ||
) |
References CUnitHolder::HasDescr(), and OSentEnd.
Referenced by CConcIndexator::IndexTextOrHtmlFile(), and CConcIndexator::IsDWDSToken().
void GetTextFromXMLRecursive | ( | TiXmlNode * | parent, |
string & | Result | ||
) |
References TiXmlNode::COMMENT, TiXmlNode::ELEMENT, TiXmlNode::FirstChild(), Format(), TiXmlNode::NextSibling(), TiXmlNode::TEXT, Trim(), TiXmlNode::Type(), UnknownPageNumber, and TiXmlNode::Value().
Referenced by CConcIndexator::LoadXmlFile().
void GetCWBFormattedStringRecursive | ( | const TiXmlNode * | parent, |
string & | Result | ||
) |
References TiXmlElement::Attribute(), TiXmlNode::COMMENT, TiXmlNode::ELEMENT, TiXmlNode::FirstChild(), Format(), TiXmlNode::NextSibling(), PredefinedTableLineTag, TiXmlNode::TEXT, TiXmlNode::ToElement(), Trim(), TiXmlNode::Type(), and TiXmlNode::Value().
Referenced by CConcIndexator::IndexFreeIndex().
bool IsXmlFile | ( | const string & | FileName | ) |
bool CreateMorphIndex | ( | const CStringIndexator * | indexator, |
string | path, | ||
MorphLanguageEnum | language, | ||
size_t | maxTokenCountInOnePeriod | ||
) |
References CIndexSetForLoadingStage::AddInputLoadIndexToMemoryLoadIndex(), CIndexSetForLoadingStage::AddMemoryLoadIndexToMainLoadIndex(), CheckLanguage(), CIndexSetForLoadingStage::CreateTempFiles(), CStringIndexSet::DestroyIndexSet(), ErrorMessage(), GetGramInfosFromWord(), CStringIndexator::GetIndexByAlias(), GetIndexItemSetByVectorString(), CStringIndexSet::GetIndexItemStr(), CIndexSetForLoadingStage::InsertToInputLoadIndex(), is_upper_alpha(), CIndexSetForQueryingStage::m_Index, CIndexSetForQueryingStage::ReadAllOccurrences(), CStringIndexSet::ReadFromTheDisk(), CIndexSetForLoadingStage::SaveMemoryLoadIndex(), ddcVecFile< T >::size(), CIndexSetForLoadingStage::SortInputAndMemoryIndices(), and CStringIndexSet::WriteToFile().
Referenced by CConcIndexator::CreateMorphIndexWrapper(), and CConcIndexator::IndexOneFile().
const DWORD DefaultMaxTokenCountInOnePeriod = 5000000 |
global default value (5000000) for CConcIndexator::m_UserMaxTokenCountInOnePeriod
Referenced by CConcIndexator::GetMaxTokenCountInOnePeriod(), and CConcordance::InitDefaultOptions().
const DWORD DefaultMaxInputLoadIndexSize = 400000 |
global default value for (400000) CConcIndexator::m_UserMaxInputLoadIndexSize (must be <= DefaultMaxTokenCountInOnePeriod)
Referenced by CConcIndexator::GetMaxInputLoadIndexSize(), and CConcordance::InitDefaultOptions().