21 #ifndef __LEMMATIZERS_H_ 22 #define __LEMMATIZERS_H_ 24 #pragma warning (disable : 4786) 30 #include "../CommonLib/ddcIconv.h" 32 #pragma warning (disable : 4250) 52 virtual void FilterSrc(
string& src)
const = 0;
59 bool LemmatizeWord(
string& InputWordStr,
const bool cap,
const bool predict, vector<CAutomAnnotationInner>& results,
bool bGetLemmaInfos)
const;
63 bool CheckAbbreviation(
string InputWordStr,vector<CAutomAnnotationInner>& FindResults,
bool is_cap)
const;
65 void PredictByDataBase(
string InputWordStr, vector<CAutomAnnotationInner>& results,
bool is_cap)
const;
66 bool IsPrefix(
const string& Prefix)
const;
91 bool CheckABC(
const string& WordForm)
const;
97 bool initIconv(
const string &enc_internal=
"",
const string &enc_external=
"UTF8");
98 inline std::string
recode_ext2int(
const std::string s_ext)
const {
return m_ic_ext2int==NULL ? s_ext : m_ic_ext2int->
convert(s_ext); }
99 inline std::string
recode_int2ext(
const std::string s_int)
const {
return m_ic_int2ext==NULL ? s_int : m_ic_int2ext->
convert(s_int); }
106 bool CreateParadigmCollection(
bool bNorm,
string& WordStr,
bool capital,
bool bUsePrediction, vector<CFormInfo>& Result)
const;
108 bool GetAllAncodesAndLemmasQuick(
string& InputWordStr,
bool capital,
char* OutBuffer,
size_t MaxBufferSize,
bool bUsePrediction)
const;
151 #endif //__LEMMATIZERS_H_ void PredictByDataBase(string InputWordStr, vector< CAutomAnnotationInner > &results, bool is_cap) const
Definition: Lemmatizers.cpp:469
bool IsHyphenPrefix(const string &Prefix) const
Definition: Lemmatizers.cpp:71
Interface to iconv.h character-conversion routines.
Definition: ddcIconv.h:58
ddcIconv * m_ic_ext2int
iconv converter from user encoding to morph-internal encoding
Definition: Lemmatizers.h:80
virtual ~CLemmatizerEnglish()
Definition: Lemmatizers.h:130
bool m_bMaximalPrediction
Definition: Lemmatizers.h:71
CPredictBase m_Predict
Definition: Lemmatizers.h:48
void GetAllAncodesQuick(const BYTE *WordForm, bool capital, BYTE *OutBuffer, bool bUsePrediction) const
Definition: Lemmatizers.cpp:178
bool m_bAllowRussianJo
Definition: Lemmatizers.h:73
set< string > m_PrefixesSet
Definition: Lemmatizers.h:49
bool LoadDictionariesRegistry(string &strError)
Definition: Lemmatizers.cpp:286
bool m_bLoaded
Definition: Lemmatizers.h:70
bool IsPrefix(const string &Prefix) const
Definition: Lemmatizers.cpp:84
CStatistic m_Statistic
Definition: Lemmatizers.h:47
bool convert(const char *idata, const size_t ilen, string &out)
Definition: ddcIconv.h:162
MorphLanguageEnum GetLanguage() const
Definition: Lemmatizers.h:89
virtual ~CLemmatizerRussian()
Definition: Lemmatizers.h:120
void ReadOptions(string FileName)
Definition: Lemmatizers.cpp:259
string GetPath() const
Definition: Lemmatizers.cpp:45
bool CreateParadigmCollection(bool bNorm, string &WordStr, bool capital, bool bUsePrediction, vector< CFormInfo > &Result) const
Definition: Lemmatizers.cpp:360
std::string m_enc_ext
external encoding (default=""=none)
Definition: Lemmatizers.h:79
virtual void FilterSrc(string &src) const =0
Definition: Lemmatizers.h:116
CAutomAnnotationInner ConvertPredictTupleToAnnot(const CPredictTuple &input) const
Definition: Lemmatizers.cpp:445
bool IsHyphenPostfix(const string &Postfix) const
Definition: Lemmatizers.cpp:66
Definition: MorphDict.h:47
std::string m_enc_int
internal encoding (default=""=none)
Definition: Lemmatizers.h:78
set< string > m_HyphenPrefixes
Definition: Lemmatizers.h:45
virtual ~CLemmatizer()
Definition: Lemmatizers.cpp:38
ddcIconv * m_ic_int2ext
iconv converter from morph-internal encoding to user encoding
Definition: Lemmatizers.h:81
std::string recode_int2ext(const std::string s_int) const
Definition: Lemmatizers.h:99
CMorphAutomat * m_pFormAutomat
Definition: MorphDict.h:52
string GetRegistryString() const
Definition: Lemmatizers.h:54
void AssignWeightIfNeed(vector< CAutomAnnotationInner > &FindResults) const
Definition: Lemmatizers.cpp:164
std::string recode_ext2int(const std::string s_ext) const
Definition: Lemmatizers.h:98
bool initIconv(const string &enc_internal="", const string &enc_external="UTF8")
Definition: Lemmatizers.cpp:275
set< string > m_HyphenPostfixes
Definition: Lemmatizers.h:43
bool CheckAbbreviation(string InputWordStr, vector< CAutomAnnotationInner > &FindResults, bool is_cap) const
Definition: Lemmatizers.cpp:457
bool LemmatizeWord(string &InputWordStr, const bool cap, const bool predict, vector< CAutomAnnotationInner > &results, bool bGetLemmaInfos) const
Definition: Lemmatizers.cpp:92
string m_Registry
Definition: Lemmatizers.h:40
unsigned char BYTE
Definition: utilit.h:94
bool GetAllAncodesAndLemmasQuick(string &InputWordStr, bool capital, char *OutBuffer, size_t MaxBufferSize, bool bUsePrediction) const
Definition: Lemmatizers.cpp:210
bool CheckABC(const string &WordForm) const
Definition: Lemmatizers.cpp:61
Definition: Lemmatizers.h:138
Definition: Statistic.h:28
MorphLanguageEnum m_Language
Definition: MorphAutomat.h:118
Definition: MorphAutomat.h:91
MorphLanguageEnum
Definition: utilit.h:162
Definition: Lemmatizers.h:37
CLemmatizer(MorphLanguageEnum Language)
Definition: Lemmatizers.cpp:25
bool m_bUseStatistic
Definition: Lemmatizers.h:72
const CStatistic & GetStatistic() const
Definition: Lemmatizers.cpp:77
Definition: GraphmatFile.h:28
Definition: Lemmatizers.h:125
virtual ~CLemmatizerGerman()
Definition: Lemmatizers.h:143