ddc
RusGramTab.h
Go to the documentation of this file.
1 //
2 // This file is part of DDC.
3 //
4 // DDC is free software: you can redistribute it and/or modify
5 // it under the terms of the GNU Lesser General Public License as published by
6 // the Free Software Foundation, either version 3 of the License, or
7 // (at your option) any later version.
8 //
9 // DDC is distributed in the hope that it will be useful,
10 // but WITHOUT ANY WARRANTY; without even the implied warranty of
11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 // GNU Lesser General Public License for more details.
13 //
14 // You should have received a copy of the GNU Lesser General Public License
15 // along with DDC. If not, see <http://www.gnu.org/licenses/>.
16 //
17 // ========== Dialing Lemmatizer (www.aot.ru)
18 // ========== Copyright by Alexey Sokirko, Bryan Jurish (2011)
19 
20 #ifndef __RUSGRAMTAB_H_
21 #define __RUSGRAMTAB_H_
22 
23 
24 #include "agramtab_.h"
25 #include "rus_consts.h"
26 
28 
29 const unsigned int StartUp = 0xC0E0; //\xc0\xe0\xa0
30 const unsigned int EndUp = 0x10000; //\xff\xff
31 const unsigned int MaxGrmCount = EndUp -StartUp; // // 16159 (16 \xca\xe1)
33 // \xed\xe5 \xe4\xee\xeb\xe6\xed\xee \xe1\xfb\xf2\xfc \xef\xf0\xee\xe1\xe5\xeb\xee\xe2 \xec\xe5\xe6\xe4\xf3 \xf1\xeb\xee\xe2\xe0\xec\xe8
35 { "\xd1", // 0
36  "\xcf", // 1
37  "\xc3", // 2
38  "\xcc\xd1", // 3
39  "\xcc\xd1-\xcf", // 4
40  "\xcc\xd1-\xcf\xd0\xc5\xc4\xca", // 5
41  "\xd7\xc8\xd1\xcb", // 6
42  "\xd7\xc8\xd1\xcb-\xcf", // 7
43  "\xcd", // 8
44  "\xcf\xd0\xc5\xc4\xca", //9
45  "\xcf\xd0\xc5\xc4\xcb", // 10
46  "\xcf\xce\xd1\xcb", // 11
47  "\xd1\xce\xde\xc7", // 12
48  "\xcc\xc5\xc6\xc4", // 13
49  "\xc2\xc2\xce\xc4\xcd",// 14
50  "\xd4\xd0\xc0\xc7", // 15
51  "\xd7\xc0\xd1\xd2", // 16
52  "\xca\xd0_\xcf\xd0\xc8\xcb", // 17
53  "\xcf\xd0\xc8\xd7\xc0\xd1\xd2\xc8\xc5", //18
54  "\xc4\xc5\xc5\xcf\xd0\xc8\xd7\xc0\xd1\xd2\xc8\xc5", //19
55  "\xca\xd0_\xcf\xd0\xc8\xd7\xc0\xd1\xd2\xc8\xc5", // 20
56  "\xc8\xcd\xd4\xc8\xcd\xc8\xd2\xc8\xc2" //21
57 };
58 
59 const short GrammemsCount = 52;
60 const char Grammems[GrammemsCount][10] = {
61  // 0..1
62  "\xec\xed","\xe5\xe4",
63  // 2..8
64  "\xe8\xec","\xf0\xe4","\xe4\xf2","\xe2\xed","\xf2\xe2","\xef\xf0","\xe7\xe2",
65  // \xf0\xee\xe4 9-12
66  "\xec\xf0","\xe6\xf0","\xf1\xf0","\xec\xf0-\xe6\xf0",
67  // 13..15
68  "\xed\xf1\xf2","\xe1\xf3\xe4","\xef\xf0\xf8",
69  // 16..18
70  "1\xeb","2\xeb","3\xeb",
71  // 19
72  "\xef\xe2\xeb",
73  // 20..21
74  "\xee\xe4","\xed\xee",
75  // 22
76  "\xf1\xf0\xe0\xe2\xed",
77  // 23..24
78  "\xf1\xe2","\xed\xf1",
79  // 25..26
80  "\xed\xef","\xef\xe5",
81  // 27..28
82  "\xe4\xf1\xf2","\xf1\xf2\xf0",
83  // 29-31
84  "0", "\xe0\xe1\xe1\xf0", "\xee\xf2\xf7",
85  // 32-33
86  "\xeb\xee\xea", "\xee\xf0\xe3",
87  // 34-35
88  "\xea\xe0\xf7", "\xe4\xf4\xf1\xf2",
89  // 36-37 (\xed\xe0\xf0\xe5\xf7\xe8\xff)
90  "\xe2\xee\xef\xf0", "\xf3\xea\xe0\xe7\xe0\xf2",
91  // 38..39
92  "\xe8\xec\xff","\xf4\xe0\xec",
93  // 40
94  "\xe1\xe5\xe7\xeb",
95  // 41,42
96  "\xe6\xe0\xf0\xe3", "\xee\xef\xf7",
97  // 43,44,45
98  "\xf0\xe0\xe7\xe3", "\xef\xf0\xe8\xf2\xff\xe6", "\xe0\xf0\xf5",
99  // \xe4\xeb\xff \xe2\xf2\xee\xf0\xee\xe3\xee \xf0\xee\xe4\xe8\xf2\xe5\xeb\xfc\xed\xee\xe3\xee \xe8 \xe2\xf2\xee\xf0\xee\xe3\xee \xef\xf0\xe5\xe4\xeb\xee\xe6\xed\xee\xe3\xee
100  "2",
101  "\xef\xee\xfd\xf2", "\xef\xf0\xee\xf4",
102  "\xef\xf0\xe5\xe2", "\xef\xee\xeb\xee\xe6"
103 };
104 
105 
106 
107 
108 const int rClauseTypesCount = 12;
109 const char rClauseTypes [rClauseTypesCount][30] =
110 {
111  "\xc3\xcb_\xcb\xc8\xd7\xcd",
112  "\xc4\xcf\xd0",
113  "\xca\xd0_\xcf\xd0\xd7",
114  "\xca\xd0_\xcf\xd0\xc8\xcb",
115  "\xcf\xd0\xc5\xc4\xca",
116  "\xcf\xd0\xd7",
117  "\xc8\xcd\xd4",
118  "\xc2\xc2\xce\xc4",
119  "\xd2\xc8\xd0\xc5",
120  "\xcd\xd1\xce",
121  "\xd1\xd0\xc0\xc2\xcd",
122  "\xca\xce\xcf\xd3\xcb"
123 };
124 
125 
126 
127 
128 class CRusGramTab : public CAgramtab{
129 public:
131  CRusGramTab();
132  ~CRusGramTab();
133 
134  BYTE GetPartOfSpeechesCount () const;
135  const char* GetPartOfSpeechStr(BYTE i) const;
136  size_t GetGrammemsCount() const;
137  const char* GetGrammemStr(size_t i) const;
138  size_t GetMaxGrmCount() const;
139  CAgramtabLine*& GetLine(size_t LineNo);
140  const CAgramtabLine* GetLine(size_t LineNo) const;
141  size_t s2i(const char * s ) const;
142  string i2s(WORD i) const;
143 
144  bool ProcessPOSAndGrammems (const char* tab_str, BYTE& PartOfSpeech, QWORD& grammems) const;
145  const char* GetRegistryString() const;
146 
147  bool GleicheCase(const char* gram_code_noun, const char* gram_code_adj) const;
148  bool GleicheCaseNumber(const char* gram_code1, const char* gram_code2) const;
149  QWORD GleicheGenderNumberCase(const char* common_gram_code_noun, const char* gram_code_noun, const char* gram_code_adj) const;
150 
151  bool GleicheGenderNumber(const char* gram_code1, const char* gram_code2) const;
152  bool GleicheSubjectPredicate(const char* gram_code1, const char* gram_code2) const;
153  long GetClauseTypeByName(const char* TypeName) const;
154 
155  const char* GetClauseNameByType(long type) const;
156  const size_t GetClauseTypesCount() const;
157 
158  bool IsStrongClauseRoot(const DWORD Poses) const;
159  bool is_month (const char* lemma) const;
160  bool is_small_number (const char* lemma) const;
161  bool IsMorphNoun (size_t Poses) const;
162  bool is_morph_adj (size_t poses) const;
163  bool is_morph_participle (size_t poses) const;
164  bool is_morph_pronoun (size_t poses) const;
165  bool is_morph_pronoun_adjective(size_t poses) const;
166  bool is_left_noun_modifier (size_t poses, QWORD grammems) const;
167  bool is_numeral (size_t poses) const;
168  bool is_verb_form (size_t poses) const;
169  bool is_infinitive(size_t poses) const;
170  bool is_morph_predk(size_t poses) const;
171  bool is_morph_adv(size_t poses) const;
172  bool is_morph_article(size_t poses) const;
173  bool is_morph_personal_pronoun (size_t poses, QWORD grammems) const;
174  bool IsSimpleParticle(const char* lemma, size_t poses) const;
175  bool IsSynNoun(size_t Poses, const char* Lemma) const;
176  bool IsStandardParamAbbr (const char* WordStrUpper) const;
177 
178 };
179 
180 extern bool GenderNumberCaseRussian (const CAgramtabLine* l1, const CAgramtabLine* l2);
181 extern bool FiniteFormCoordRussian (const CAgramtabLine* l1, const CAgramtabLine* l2);
182 
183 #endif //__RUSGRAMTAB_H_
184 
185 /*--- emacs style variables ---
186  * Local Variables:
187  * mode: C++
188  * c-file-style: "ellemtel"
189  * c-basic-offset: 4
190  * tab-width: 8
191  * indent-tabs-mode: nil
192  * End:
193  */
const char Grammems[GrammemsCount][10]
Definition: RusGramTab.h:60
bool GenderNumberCaseRussian(const CAgramtabLine *l1, const CAgramtabLine *l2)
Definition: RusGramTab.cpp:275
CAgramtabLine * Lines[MaxGrmCount]
Definition: RusGramTab.h:130
rPartOfSpeeches
Definition: morph_const.h:27
bool IsStandardParamAbbr(const char *WordStrUpper) const
Definition: RusGramTab.cpp:602
bool GleicheSubjectPredicate(const char *gram_code1, const char *gram_code2) const
Definition: RusGramTab.cpp:364
string i2s(WORD i) const
Definition: RusGramTab.cpp:86
bool FiniteFormCoordRussian(const CAgramtabLine *l1, const CAgramtabLine *l2)
Definition: RusGramTab.cpp:288
bool is_morph_participle(size_t poses) const
Definition: RusGramTab.cpp:468
uint64_t QWORD
Definition: utilit.h:107
const int rClauseTypesCount
Definition: RusGramTab.h:108
bool is_morph_personal_pronoun(size_t poses, QWORD grammems) const
Definition: RusGramTab.cpp:527
const char * GetGrammemStr(size_t i) const
Definition: RusGramTab.cpp:68
long GetClauseTypeByName(const char *TypeName) const
Definition: RusGramTab.cpp:380
const char * GetRegistryString() const
Definition: RusGramTab.cpp:43
bool is_small_number(const char *lemma) const
Definition: RusGramTab.cpp:444
bool is_morph_adj(size_t poses) const
Definition: RusGramTab.cpp:462
bool is_morph_pronoun(size_t poses) const
Definition: RusGramTab.cpp:474
QWORD GleicheGenderNumberCase(const char *common_gram_code_noun, const char *gram_code_noun, const char *gram_code_adj) const
Definition: RusGramTab.cpp:332
Definition: agramtab_.h:39
const unsigned int EndUp
Definition: RusGramTab.h:30
const char * GetClauseNameByType(long type) const
Definition: RusGramTab.cpp:394
Definition: RusGramTab.h:128
CAgramtabLine *& GetLine(size_t LineNo)
Definition: RusGramTab.cpp:72
uint16_t WORD
Definition: utilit.h:106
bool is_verb_form(size_t poses) const
Definition: RusGramTab.cpp:500
const char rClauseTypes[rClauseTypesCount][30]
Definition: RusGramTab.h:109
bool ProcessPOSAndGrammems(const char *tab_str, BYTE &PartOfSpeech, QWORD &grammems) const
Definition: RusGramTab.cpp:99
const size_t GetClauseTypesCount() const
Definition: RusGramTab.cpp:375
bool is_morph_article(size_t poses) const
Definition: RusGramTab.cpp:613
bool is_morph_predk(size_t poses) const
Definition: RusGramTab.cpp:515
Definition: agramtab_.h:28
bool is_morph_adv(size_t poses) const
Definition: RusGramTab.cpp:521
const unsigned int StartUp
Definition: RusGramTab.h:29
bool IsStrongClauseRoot(const DWORD Poses) const
Definition: RusGramTab.cpp:414
const char * GetPartOfSpeechStr(BYTE i) const
Definition: RusGramTab.cpp:60
size_t GetGrammemsCount() const
Definition: RusGramTab.cpp:64
BYTE GetPartOfSpeechesCount() const
Definition: RusGramTab.cpp:50
bool GleicheGenderNumber(const char *gram_code1, const char *gram_code2) const
Definition: RusGramTab.cpp:360
unsigned char BYTE
Definition: utilit.h:94
~CRusGramTab()
Definition: RusGramTab.cpp:36
const short GrammemsCount
Definition: RusGramTab.h:59
bool IsSimpleParticle(const char *lemma, size_t poses) const
Definition: RusGramTab.cpp:538
bool is_left_noun_modifier(size_t poses, QWORD grammems) const
Definition: RusGramTab.cpp:486
size_t s2i(const char *s) const
Definition: RusGramTab.cpp:81
bool GleicheCaseNumber(const char *gram_code1, const char *gram_code2) const
Definition: RusGramTab.cpp:356
bool GleicheCase(const char *gram_code_noun, const char *gram_code_adj) const
Definition: RusGramTab.cpp:369
bool is_month(const char *lemma) const
Definition: RusGramTab.cpp:433
bool IsSynNoun(size_t Poses, const char *Lemma) const
Definition: RusGramTab.cpp:581
CRusGramTab()
Definition: RusGramTab.cpp:27
size_t GetMaxGrmCount() const
Definition: RusGramTab.cpp:55
uint32_t DWORD
Definition: utilit.h:105
bool is_morph_pronoun_adjective(size_t poses) const
Definition: RusGramTab.cpp:480
bool is_infinitive(size_t poses) const
Definition: RusGramTab.cpp:510
const BYTE rPartOfSpeechCount
Definition: RusGramTab.h:32
bool is_numeral(size_t poses) const
Definition: RusGramTab.cpp:495
const unsigned int MaxGrmCount
Definition: RusGramTab.h:31
bool IsMorphNoun(size_t Poses) const
Definition: RusGramTab.cpp:454