ddc
EngGramTab.h
Go to the documentation of this file.
1 //
2 // This file is part of DDC.
3 //
4 // DDC is free software: you can redistribute it and/or modify
5 // it under the terms of the GNU Lesser General Public License as published by
6 // the Free Software Foundation, either version 3 of the License, or
7 // (at your option) any later version.
8 //
9 // DDC is distributed in the hope that it will be useful,
10 // but WITHOUT ANY WARRANTY; without even the implied warranty of
11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 // GNU Lesser General Public License for more details.
13 //
14 // You should have received a copy of the GNU Lesser General Public License
15 // along with DDC. If not, see <http://www.gnu.org/licenses/>.
16 //
17 // ========== Dialing Lemmatizer (www.aot.ru)
18 // ========== Copyright by Alexey Sokirko, Bryan Jurish (2011)
19 
20 #ifndef __EngGRAMTAB_H_
21 #define __EngGRAMTAB_H_
22 
23 
24 #include "agramtab_.h" // main symbols
25 
26 
28 // CEngGramTab
29 const unsigned int eStartUp = 0x6161; //aa 
30 const unsigned int eEndUp = 0x7A7B; //zz + 1
31 const unsigned int eMaxGrmCount = eEndUp -eStartUp; // // 5911 (5 Êá)
32 
34 // íå äîëæíî áûòü ïðîáåëîâ ìåæäó ñëîâàìè
36 {
37  "NOUN",
38  "ADJECTIVE", // ïîëíîå ïðèëàãàòåëüíîå
39  "VERB",
40  "VBE",
41  "MOD",
42  "NUMERAL",
43  "CONJ",
44  "INT",
45  "PREP",
46  "PART",
47  "ARTICLE",
48  "ADVERB",
49  "PN",
50  "ORDNUM",
51  "PRON",
52  "POSS",
53  "PN_ADJ"
54 };
55 const short eGrammemsCount = 34;
56 const char eGrammems[eGrammemsCount][8] = {
57  "sg", "pl", "m", "f", "anim", "perf", "nom", "obj", "narr","geo",
58  "prop" ,"pers", "poss", "pred", "uncount", "ref", "dem", "mass", "comp", "sup",
59  "1", "2", "3", "prsa", "inf", "pasa", "pp", "ing", "fut", "if", "plsq", "plsgs", "name","org"};
60 
61 
62 
63 
64 
65 class CEngGramTab : public CAgramtab
66 {
67 public:
69  CEngGramTab();
70  ~CEngGramTab();
71 
73  const char* GetPartOfSpeechStr(BYTE i) const {return ePartOfSpeeches[i];};
74  size_t GetGrammemsCount() const {return eGrammemsCount;};
75  const char* GetGrammemStr(size_t i) const {return eGrammems[i];};
76  size_t GetMaxGrmCount() const {return eMaxGrmCount;};
77  CAgramtabLine*& GetLine(size_t LineNo) {return Lines[LineNo];}
78  const CAgramtabLine* GetLine(size_t LineNo) const {return Lines[LineNo];};
79  size_t s2i(const char * s ) const { return (unsigned char) s[0]*0x100+(unsigned char) s[1] - eStartUp;};
80 
81 
82  string i2s(WORD i) const
83  {
84  i += eStartUp;
85  char res[3];
86 
87  res[0] = (i >> 8);
88  res[1] = (0xFF & i);
89  res[2] = 0;
90  return res;
91  };
92 
93  const char* GetRegistryString() const
94  {
95  return "Software\\Dialing\\Lemmatizer\\English\\Egramtab";
96  };
97 
98  long GetClauseTypeByName(const char* TypeName) const {assert(false); return 0;};
99  const char* GetClauseNameByType(long type) const {assert(false); return 0;};
100  bool GleicheGenderNumber(const char* gram_code1, const char* gram_code2) const;
101  bool GleicheSubjectPredicate(const char* gram_code1, const char* gram_code2) const;
102  const size_t GetClauseTypesCount() const
103  {
104  return 0;
105  };
106 
107  bool IsStrongClauseRoot(const DWORD Poses) const;
108  bool is_month (const char* lemma) const;
109  bool is_small_number (const char* lemma) const;
110  bool IsMorphNoun (size_t Poses) const;
111  bool is_morph_adj (size_t poses) const;
112  bool is_morph_participle (size_t poses) const;
113  bool is_morph_pronoun (size_t poses) const;
114  bool is_morph_pronoun_adjective(size_t poses) const;
115  bool is_left_noun_modifier (size_t poses, QWORD grammems) const;
116  bool is_numeral (size_t poses) const;
117  bool is_verb_form (size_t poses) const;
118  bool is_infinitive(size_t poses) const;
119  bool is_morph_predk(size_t poses) const;
120  bool is_morph_adv(size_t poses) const;
121  bool is_morph_personal_pronoun (size_t poses, QWORD grammems) const;
122  bool is_morph_article(size_t poses) const;
123 
124  bool IsSimpleParticle(const char* lemma, size_t poses) const;
125  bool IsSynNoun(size_t Poses, const char* Lemma) const;
126  bool IsStandardParamAbbr (const char* WordStrUpper) const;
127  bool GleicheCase(const char* gram_code_noun, const char* gram_code_adj) const;
128  bool GleicheCaseNumber(const char* gram_code1, const char* gram_code2) const;
129  QWORD GleicheGenderNumberCase(const char* common_gram_code_noun, const char* gram_code_noun, const char* gram_code_adj) const;
130 
131 
132 
133 };
134 
135 
136 #endif
137 
138 /*--- emacs style variables ---
139  * Local Variables:
140  * mode: C++
141  * c-file-style: "ellemtel"
142  * c-basic-offset: 4
143  * tab-width: 8
144  * indent-tabs-mode: nil
145  * End:
146  */
const BYTE ePartOfSpeechesCount
Definition: EngGramTab.h:33
bool GleicheSubjectPredicate(const char *gram_code1, const char *gram_code2) const
Definition: EngGramTab.cpp:50
bool GleicheGenderNumber(const char *gram_code1, const char *gram_code2) const
Definition: EngGramTab.cpp:45
const unsigned int eEndUp
Definition: EngGramTab.h:30
bool is_morph_predk(size_t poses) const
Definition: EngGramTab.cpp:120
string i2s(WORD i) const
Definition: EngGramTab.h:82
CAgramtabLine * Lines[eMaxGrmCount]
Definition: EngGramTab.h:68
bool is_month(const char *lemma) const
Definition: EngGramTab.cpp:67
long GetClauseTypeByName(const char *TypeName) const
Definition: EngGramTab.h:98
uint64_t QWORD
Definition: utilit.h:107
const unsigned int eStartUp
Definition: EngGramTab.h:29
bool is_small_number(const char *lemma) const
Definition: EngGramTab.cpp:62
CAgramtabLine *& GetLine(size_t LineNo)
Definition: EngGramTab.h:77
bool is_left_noun_modifier(size_t poses, QWORD grammems) const
Definition: EngGramTab.cpp:97
bool is_infinitive(size_t poses) const
Definition: EngGramTab.cpp:115
bool is_morph_pronoun(size_t poses) const
Definition: EngGramTab.cpp:87
Definition: agramtab_.h:39
const CAgramtabLine * GetLine(size_t LineNo) const
Definition: EngGramTab.h:78
Definition: EngGramTab.h:65
uint16_t WORD
Definition: utilit.h:106
bool IsSynNoun(size_t Poses, const char *Lemma) const
Definition: EngGramTab.cpp:146
eGrammems
Definition: eng_consts.h:48
const unsigned int eMaxGrmCount
Definition: EngGramTab.h:31
QWORD GleicheGenderNumberCase(const char *common_gram_code_noun, const char *gram_code_noun, const char *gram_code_adj) const
Definition: EngGramTab.cpp:166
bool is_morph_pronoun_adjective(size_t poses) const
Definition: EngGramTab.cpp:92
Definition: agramtab_.h:28
BYTE GetPartOfSpeechesCount() const
Definition: EngGramTab.h:72
const size_t GetClauseTypesCount() const
Definition: EngGramTab.h:102
bool is_numeral(size_t poses) const
Definition: EngGramTab.cpp:103
bool is_morph_personal_pronoun(size_t poses, QWORD grammems) const
Definition: EngGramTab.cpp:130
bool GleicheCase(const char *gram_code_noun, const char *gram_code_adj) const
Definition: EngGramTab.cpp:158
size_t GetGrammemsCount() const
Definition: EngGramTab.h:74
const short eGrammemsCount
Definition: EngGramTab.h:55
const char * GetPartOfSpeechStr(BYTE i) const
Definition: EngGramTab.h:73
bool IsSimpleParticle(const char *lemma, size_t poses) const
Definition: EngGramTab.cpp:135
bool IsStandardParamAbbr(const char *WordStrUpper) const
Definition: EngGramTab.cpp:151
bool is_morph_adv(size_t poses) const
Definition: EngGramTab.cpp:125
unsigned char BYTE
Definition: utilit.h:94
~CEngGramTab()
Definition: EngGramTab.cpp:30
bool IsStrongClauseRoot(const DWORD Poses) const
Definition: EngGramTab.cpp:56
const char * GetGrammemStr(size_t i) const
Definition: EngGramTab.h:75
bool GleicheCaseNumber(const char *gram_code1, const char *gram_code2) const
Definition: EngGramTab.cpp:162
const char * GetRegistryString() const
Definition: EngGramTab.h:93
ePartOfSpeeches
Definition: eng_consts.h:26
bool is_morph_article(size_t poses) const
Definition: EngGramTab.cpp:171
uint32_t DWORD
Definition: utilit.h:105
CEngGramTab()
Definition: EngGramTab.cpp:23
bool is_morph_participle(size_t poses) const
Definition: EngGramTab.cpp:82
size_t s2i(const char *s) const
Definition: EngGramTab.h:79
bool IsMorphNoun(size_t Poses) const
Definition: EngGramTab.cpp:72
const char * GetClauseNameByType(long type) const
Definition: EngGramTab.h:99
bool is_morph_adj(size_t poses) const
Definition: EngGramTab.cpp:77
bool is_verb_form(size_t poses) const
Definition: EngGramTab.cpp:108
size_t GetMaxGrmCount() const
Definition: EngGramTab.h:76