ddc
MorphDict.h
Go to the documentation of this file.
1 //
2 // This file is part of DDC.
3 //
4 // DDC is free software: you can redistribute it and/or modify
5 // it under the terms of the GNU Lesser General Public License as published by
6 // the Free Software Foundation, either version 3 of the License, or
7 // (at your option) any later version.
8 //
9 // DDC is distributed in the hope that it will be useful,
10 // but WITHOUT ANY WARRANTY; without even the implied warranty of
11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 // GNU Lesser General Public License for more details.
13 //
14 // You should have received a copy of the GNU Lesser General Public License
15 // along with DDC. If not, see <http://www.gnu.org/licenses/>.
16 //
17 // ========== Dialing Lemmatizer (www.aot.ru)
18 // ========== Copyright by Alexey Sokirko, Bryan Jurish (2011)
19 
20 #ifndef AhoKorasickMorph_h
21 #define AhoKorasickMorph_h
22 
23 
24 #include "MorphAutomat.h"
25 #include "../MorphWizardLib/FormInfo.h"
26 
27 
28 
30 {
33 
34  bool operator < (const CLemmaInfoAndLemma& X) const
35  {
36  if (m_LemmaInfo.m_FlexiaModelNo != X.m_LemmaInfo.m_FlexiaModelNo)
37  return m_LemmaInfo.m_FlexiaModelNo < X.m_LemmaInfo.m_FlexiaModelNo;
38 
39  return m_LemmaStrNo < X.m_LemmaStrNo;
40  };
41 
42 };
43 
44 
45 
46 
47 class CMorphDict
48 {
49 
50 protected:
51 
53 
54  vector<int> m_ModelsIndex;
55 
56 
57 
59  {
61 
62  IsLessMorphInterp(const CShortStringHolder& SearchInfos) : m_SearchInfos(SearchInfos) {};
63 
64  bool operator () (const CLemmaInfoAndLemma& _X1, const char* _X2) const
65  {
66  const char* base1 = m_SearchInfos[_X1.m_LemmaStrNo].GetString();
67 
68  return strcmp(base1, _X2) < 0;
69  };
70  bool operator () (const char* _X1, const CLemmaInfoAndLemma& _X2) const
71  {
72  const char* base2 = m_SearchInfos[_X2.m_LemmaStrNo].GetString();
73 
74  return strcmp(_X1, base2) < 0;
75  };
76  bool operator () (const CLemmaInfoAndLemma& _X1, const CLemmaInfoAndLemma& _X2) const
77  {
78  const char* base1 = m_SearchInfos[_X1.m_LemmaStrNo].GetString();
79  const char* base2 = m_SearchInfos[_X2.m_LemmaStrNo].GetString();
80 
81  return strcmp(base1, base2) < 0;
82  };
83 
84 
85  };
87  void GetLemmaInfos (const string& Text, size_t TextPos, vector<CAutomAnnotationInner>& Infos) const;
88  void CreateModelsIndex();
89 
90 public:
91  vector<CFlexiaModel> m_FlexiaModels;
92  vector<CAccentModel> m_AccentModels;
94  vector<CLemmaInfoAndLemma> m_LemmaInfos;
96  vector<BYTE> m_NPSs;
97 
98  CMorphDict(MorphLanguageEnum Language);
99  virtual ~CMorphDict();
100  void InitAutomat(CMorphAutomat* pFormAutomat);
101 
102  // loading & saving
103  bool Load(string GrammarFileName);
104  bool Save(string GrammarFileName) const;
105 
106 
107  void PredictBySuffix (const string& Text, size_t& TextOffset, size_t MinimalPredictSuffixlen, vector<CAutomAnnotationInner>& Infos) const;
108  string GetAllMorphInterpsStr (const string& Text,const size_t TextPos, bool bFullInterp) const;
109 };
110 
111 
112 
113 #endif
114 
115 /*--- emacs style variables ---
116  * Local Variables:
117  * mode: C++
118  * c-file-style: "ellemtel"
119  * c-basic-offset: 4
120  * tab-width: 8
121  * indent-tabs-mode: nil
122  * End:
123  */
Definition: MorphDict.h:58
CShortStringHolder m_Bases
Definition: MorphDict.h:93
StringVector m_Prefixes
Definition: MorphDict.h:95
vector< CLemmaInfoAndLemma > m_LemmaInfos
Definition: MorphDict.h:94
vector< BYTE > m_NPSs
Definition: MorphDict.h:96
IsLessMorphInterp(const CShortStringHolder &SearchInfos)
Definition: MorphDict.h:62
vector< CFlexiaModel > m_FlexiaModels
Definition: MorphDict.h:91
const CShortStringHolder & m_SearchInfos
Definition: MorphDict.h:60
CLemmaInfo m_LemmaInfo
Definition: MorphDict.h:32
Definition: MorphDict.h:47
WORD m_FlexiaModelNo
Definition: FormInfo.h:91
vector< int > m_ModelsIndex
Definition: MorphDict.h:54
CMorphAutomat * m_pFormAutomat
Definition: MorphDict.h:52
vector< string > StringVector
Definition: utilit.h:146
vector< CAccentModel > m_AccentModels
Definition: MorphDict.h:92
int m_LemmaStrNo
Definition: MorphDict.h:31
Definition: MorphDict.h:29
bool operator<(const CLemmaInfoAndLemma &X) const
Definition: MorphDict.h:34
IsLessMorphInterp m_SearchInfoLess
Definition: MorphDict.h:86
MorphLanguageEnum
Definition: utilit.h:162
Definition: util_classes.h:50
Definition: MorphAutomat.h:139
Definition: FormInfo.h:89