#include "StdConc.h"
#include "QueryNode.h"
#include "../common/DwdsThesaurus.h"
#include "../common/util_classes.h"
#define GetOccurrencesSize | ( | ) | m_Occurrences.size() |
Referenced by CQueryNode::ConvertOccurrencesToHits(), CQueryNode::ConvertOccurrencesToHitsForPatterns(), CQueryWithNode::EvaluateWithoutHits(), CQuerySequenceNode::EvaluateWithoutHits(), CQueryTokenNode::EvaluateWithoutHits(), CQueryBinaryOperationNode::hits_add(), CQueryBinaryOperationNode::hits_and_positions_difference(), CQueryBinaryOperationNode::hits_and_positions_intersection(), and CQueryBinaryOperationNode::hits_and_positions_union().
void GetWordForms | ( | const MorphLanguageEnum | Langua, | |
const string & | src, | |||
set< string > & | WordForms | |||
) |
compute the set of types WordForms
which may share a lemma with src
in language Langua
.
The underlying CLemmatizer should handle transcoding of user queries transparently, assuming you called CLemmatizer::initIconv() with the appropriate encoding arguments.
This is very ugly, and the entire morphology system needs an overhaul.
Formerly a hidden function in QueryNode.cpp.
References CLemmatizer::CreateParadigmCollection(), errProcessMorphology, Format(), CFormInfo::GetCount(), GetLemmatizerByLanguage(), CFormInfo::GetWordForm(), is_upper_alpha(), morphUnknown, CLemmatizer::recode_ext2int(), CLemmatizer::recode_int2ext(), ReverseChar(), and RmlMakeLower().
Referenced by CQueryTokenNode::CreateTokenPattern().
void AddFormsWithDifferentRegisters | ( | const MorphLanguageEnum | Langua, | |
set< string > & | WordForms | |||
) |
References is_lower_alpha(), ReverseChar(), RmlMakeLower(), and RmlMakeUpper().
Referenced by CQueryTokenNode::CreateFileList(), and CQueryTokenNode::CreateTokenPattern().
Referenced by CConcIndexatorInvoker::BuildIndex().
const size_t MaxDistanceForNear = 32 |
Referenced by CQuerySequenceNode::AddDistance(), and CQueryNearNode::ReadDistanceFromString().