ddc
GerGramTab.h
Go to the documentation of this file.
1 //
2 // This file is part of DDC.
3 //
4 // DDC is free software: you can redistribute it and/or modify
5 // it under the terms of the GNU Lesser General Public License as published by
6 // the Free Software Foundation, either version 3 of the License, or
7 // (at your option) any later version.
8 //
9 // DDC is distributed in the hope that it will be useful,
10 // but WITHOUT ANY WARRANTY; without even the implied warranty of
11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 // GNU Lesser General Public License for more details.
13 //
14 // You should have received a copy of the GNU Lesser General Public License
15 // along with DDC. If not, see <http://www.gnu.org/licenses/>.
16 //
17 // ========== Dialing Lemmatizer (www.aot.ru)
18 // ========== Copyright by Alexey Sokirko, Bryan Jurish (2011)
19 
20 #ifndef __GERGRAMTAB_H_
21 #define __GERGRAMTAB_H_
22 
23 
24 #include "agramtab_.h"
25 #include "ger_consts.h"
26 
27 
29 // CEngGramTab
30 const unsigned int gStartUp = 0x4141; //AA 
31 const unsigned int gEndUp = 0x7A7B; //zz + 1
32 const unsigned int gMaxGrmCount = gEndUp -gStartUp; // // 5911 (5 Êá)
33 
35 
37  "ART",
38  "ADJ",
39  "ADV",
40  "EIG",
41  "SUB",
42  "VER",
43  "PA1",
44  "PA2",
45  "PRO",
46  "PRP",
47  "KON",
48  "NEG",
49  "INJ",
50  "ZAL",
51  "ZUS",
52  "PROBEG",
53  "INF"
54 };
55 const short gGrammemsCount = 62;
56 const char gGrammems[gGrammemsCount][10] = {
57 
58 //common unknown 0..3
59  "noa", // ohne artikel
60  "prd", // predikativ
61  "pro",
62  "tmp",
63 
64 
65 // eigennamen 4..12
66  "nac","mou","cou","geo","wat","geb","std","lok","vor",
67 
68 // reflexive Verben 13..14
69  "sich-akk","sich-dat",
70 
71 // verb clasess 15..18
72  "sft","non","mod","aux",
73 
74 // verb forms 19..26
75  "kj1","kj2","pa1","pa2","eiz","imp","prt","prae",
76 
77 //adjective 27..29
78  "gru","kom","sup",
79 
80 // konjunk 30..34
81  "pri","inf","vgl","neb","unt",
82 
83 
84 //pronouns 35..41
85  "per","dem","inr","pos","ref","rin","alg",
86 
87 //adjective's articles 42.44
88  "sol","ind","def",
89 
90 //persons 45..47
91  "1", "2", "3",
92 
93 //genus 48..50
94  "fem","mas","neu",
95 
96 
97 // number 51..52
98  "plu","sin",
99 
100 //cases 53..56
101  "nom","gen","dat","akk",
102 
103 //abbreviation 57
104  "abbr",
105 
106 //Einwohnerbezeichnung 58
107  "ew",
108 
109 //Transitiv 59,60,61
110  "trans", "intra", "imper"
111 
112 
113 };
114 
115 
116 const int gClauseTypesCount = 3;
117 const char gClauseTypes [gClauseTypesCount][30] =
118 {
119  "VERBSATZ", "PARTIZIPIALSATZ", "INFINITIVSATZ"
120 };
121 
122 
123 
124 
125 
126 class CGerGramTab : public CAgramtab
127 {
128 public:
130  CGerGramTab();
131  ~CGerGramTab();
132 
134  const char* GetPartOfSpeechStr(BYTE i) const {return gPartOfSpeeches[i];};
135  size_t GetGrammemsCount() const{return gGrammemsCount;};
136  const char* GetGrammemStr(size_t i) const{return gGrammems[i];};
137  size_t GetMaxGrmCount() const{return gMaxGrmCount;};
138  CAgramtabLine*& GetLine(size_t LineNo) {return Lines[LineNo];}
139  const CAgramtabLine* GetLine(size_t LineNo) const {return Lines[LineNo];}
140  size_t s2i(const char * s ) const { return (unsigned char) s[0]*0x100+(unsigned char) s[1] - gStartUp;};
141 
142  string i2s(WORD i) const
143  {
144  i += gStartUp;
145  char res[3];
146 
147  res[0] = (i >> 8);
148  res[1] = (0xFF & i);
149  res[2] = 0;
150  return res;
151  };
152  int GetGroupTypebyName(const char* TypeName) const;
153 
154 
155 
156  const char* GetRegistryString() const
157  {
158  return "Software\\Dialing\\Lemmatizer\\German\\Ggramtab";
159  };
160 
161  long GetClauseTypeByName(const char* TypeName) const;
162  const char* GetClauseNameByType(long type) const;
163  bool GleicheGenderNumber(const char* gram_code1, const char* gram_code2) const;
164  bool GleicheSubjectPredicate(const char* gram_code1, const char* gram_code2) const;
165  const size_t GetClauseTypesCount() const
166  {
167  return gClauseTypesCount;
168  };
169  bool IsStrongClauseRoot(const DWORD Poses) const;
170  bool is_month (const char* lemma) const;
171  bool is_small_number (const char* lemma) const;
172  bool IsMorphNoun (size_t Poses) const;
173  bool is_morph_adj (size_t poses) const;
174  bool is_morph_participle (size_t poses) const;
175  bool is_morph_pronoun (size_t poses) const;
176  bool is_morph_pronoun_adjective(size_t poses) const;
177  bool is_left_noun_modifier (size_t poses, QWORD grammems) const;
178  bool is_numeral (size_t poses) const;
179  bool is_verb_form (size_t poses) const;
180  bool is_infinitive(size_t poses) const;
181  bool is_morph_predk(size_t poses) const;
182  bool is_morph_adv(size_t poses) const;
183  bool is_morph_personal_pronoun (size_t poses, QWORD grammems) const;
184  bool is_morph_article(size_t poses) const;
185 
186  bool IsSimpleParticle(const char* lemma, size_t poses) const;
187  bool IsSynNoun(size_t Poses, const char* Lemma) const;
188  bool IsStandardParamAbbr (const char* WordStrUpper) const;
189  bool GleicheCase(const char* gram_code_noun, const char* gram_code_adj) const;
190  bool GleicheCaseNumber(const char* gram_code1, const char* gram_code2) const;
191  QWORD GleicheGenderNumberCase(const char* common_gram_code_noun, const char* gram_code_noun, const char* gram_code_adj) const;
192 
193 };
194 
195 
196 
204 
205 
206 
207 
208 
209 
210 #endif //__GERGRAMTAB_H_
211 
212 /*--- emacs style variables ---
213  * Local Variables:
214  * mode: C++
215  * c-file-style: "ellemtel"
216  * c-basic-offset: 4
217  * tab-width: 8
218  * indent-tabs-mode: nil
219  * End:
220  */
CAgramtabLine * Lines[gMaxGrmCount]
Definition: GerGramTab.h:129
bool GleicheCaseNumber(const char *gram_code1, const char *gram_code2) const
Definition: GerGramTab.cpp:245
Definition: ger_consts.h:76
Definition: ger_consts.h:130
bool is_morph_predk(size_t poses) const
Definition: GerGramTab.cpp:200
CGerGramTab()
Definition: GerGramTab.cpp:27
CAgramtabLine *& GetLine(size_t LineNo)
Definition: GerGramTab.h:138
const CAgramtabLine * GetLine(size_t LineNo) const
Definition: GerGramTab.h:139
size_t GetGrammemsCount() const
Definition: GerGramTab.h:135
Definition: ger_consts.h:140
BYTE GetPartOfSpeechesCount() const
Definition: GerGramTab.h:133
const char * GetRegistryString() const
Definition: GerGramTab.h:156
const char gClauseTypes[gClauseTypesCount][30]
Definition: GerGramTab.h:117
Definition: ger_consts.h:78
size_t GetMaxGrmCount() const
Definition: GerGramTab.h:137
const size_t GetClauseTypesCount() const
Definition: GerGramTab.h:165
const unsigned int gStartUp
Definition: GerGramTab.h:30
Definition: ger_consts.h:125
Definition: ger_consts.h:143
const QWORD gAllPersons
Definition: GerGramTab.h:201
Definition: ger_consts.h:84
uint64_t QWORD
Definition: utilit.h:107
gPartOfSpeeches
Definition: ger_consts.h:28
QWORD GleicheGenderNumberCase(const char *common_gram_code_noun, const char *gram_code_noun, const char *gram_code_adj) const
Definition: GerGramTab.cpp:263
bool is_morph_article(size_t poses) const
Definition: GerGramTab.cpp:268
const short gGrammemsCount
Definition: GerGramTab.h:55
Definition: ger_consts.h:79
const QWORD gAllGenders
Definition: GerGramTab.h:200
const QWORD gAllCases
Definition: GerGramTab.h:198
size_t s2i(const char *s) const
Definition: GerGramTab.h:140
bool GleicheGenderNumber(const char *gram_code1, const char *gram_code2) const
Definition: GerGramTab.cpp:80
Definition: ger_consts.h:83
bool IsSynNoun(size_t Poses, const char *Lemma) const
Definition: GerGramTab.cpp:223
long GetClauseTypeByName(const char *TypeName) const
Definition: GerGramTab.cpp:46
Definition: GerGramTab.h:126
bool is_left_noun_modifier(size_t poses, QWORD grammems) const
Definition: GerGramTab.cpp:169
const char * GetPartOfSpeechStr(BYTE i) const
Definition: GerGramTab.h:134
Definition: agramtab_.h:39
bool IsStrongClauseRoot(const DWORD Poses) const
Definition: GerGramTab.cpp:124
Definition: ger_consts.h:90
Definition: ger_consts.h:128
Definition: ger_consts.h:142
uint16_t WORD
Definition: utilit.h:106
bool is_numeral(size_t poses) const
Definition: GerGramTab.cpp:181
Definition: ger_consts.h:88
bool is_month(const char *lemma) const
Definition: GerGramTab.cpp:136
bool is_morph_pronoun_adjective(size_t poses) const
Definition: GerGramTab.cpp:164
const int gClauseTypesCount
Definition: GerGramTab.h:116
Definition: agramtab_.h:28
Definition: ger_consts.h:129
#define _QM(X)
Definition: utilit.h:616
string i2s(WORD i) const
Definition: GerGramTab.h:142
Definition: ger_consts.h:136
const unsigned int gMaxGrmCount
Definition: GerGramTab.h:32
const QWORD gAllNumbers
Definition: GerGramTab.h:199
Definition: ger_consts.h:116
Definition: ger_consts.h:141
Definition: ger_consts.h:124
Definition: ger_consts.h:77
bool is_morph_personal_pronoun(size_t poses, QWORD grammems) const
Definition: GerGramTab.cpp:210
bool is_morph_participle(size_t poses) const
Definition: GerGramTab.cpp:152
Definition: ger_consts.h:118
bool is_verb_form(size_t poses) const
Definition: GerGramTab.cpp:186
Definition: ger_consts.h:89
bool is_morph_adj(size_t poses) const
Definition: GerGramTab.cpp:147
bool is_small_number(const char *lemma) const
Definition: GerGramTab.cpp:131
bool IsStandardParamAbbr(const char *WordStrUpper) const
Definition: GerGramTab.cpp:230
unsigned char BYTE
Definition: utilit.h:94
bool IsSimpleParticle(const char *lemma, size_t poses) const
Definition: GerGramTab.cpp:215
const char * GetGrammemStr(size_t i) const
Definition: GerGramTab.h:136
int GetGroupTypebyName(const char *TypeName) const
gGrammems
Definition: ger_consts.h:50
const QWORD gAllVerbClasses
Definition: GerGramTab.h:203
const char * GetClauseNameByType(long type) const
Definition: GerGramTab.cpp:60
bool GleicheCase(const char *gram_code_noun, const char *gram_code_adj) const
Definition: GerGramTab.cpp:241
bool is_morph_adv(size_t poses) const
Definition: GerGramTab.cpp:205
const QWORD gBestimtheit
Definition: GerGramTab.h:197
bool is_infinitive(size_t poses) const
Definition: GerGramTab.cpp:195
uint32_t DWORD
Definition: utilit.h:105
const BYTE gPartOfSpeechesCount
Definition: GerGramTab.h:34
const QWORD gAllVerbForms
Definition: GerGramTab.h:202
bool IsMorphNoun(size_t Poses) const
Definition: GerGramTab.cpp:141
bool GleicheSubjectPredicate(const char *gram_code1, const char *gram_code2) const
Definition: GerGramTab.cpp:116
const unsigned int gEndUp
Definition: GerGramTab.h:31
Definition: ger_consts.h:135
~CGerGramTab()
Definition: GerGramTab.cpp:37
bool is_morph_pronoun(size_t poses) const
Definition: GerGramTab.cpp:158
Definition: ger_consts.h:117
Definition: ger_consts.h:123