00001
00002
00003
00004
00005 #ifndef __EngGRAMTAB_H_
00006 #define __EngGRAMTAB_H_
00007
00008
00009 #include "agramtab_.h"
00010
00011
00013
00014 const unsigned int eStartUp = 0x6161;
00015 const unsigned int eEndUp = 0x7A7B;
00016 const unsigned int eMaxGrmCount = eEndUp -eStartUp;
00017
00018 const BYTE ePartOfSpeechesCount = 17;
00019
00020 const char ePartOfSpeeches[ePartOfSpeechesCount][20] =
00021 {
00022 "NOUN",
00023 "ADJECTIVE",
00024 "VERB",
00025 "VBE",
00026 "MOD",
00027 "NUMERAL",
00028 "CONJ",
00029 "INT",
00030 "PREP",
00031 "PART",
00032 "ARTICLE",
00033 "ADVERB",
00034 "PN",
00035 "ORDNUM",
00036 "PRON",
00037 "POSS",
00038 "PN_ADJ"
00039 };
00040 const short eGrammemsCount = 34;
00041 const char eGrammems[eGrammemsCount][8] = {
00042 "sg", "pl", "m", "f", "anim", "perf", "nom", "obj", "narr","geo",
00043 "prop" ,"pers", "poss", "pred", "uncount", "ref", "dem", "mass", "comp", "sup",
00044 "1", "2", "3", "prsa", "inf", "pasa", "pp", "ing", "fut", "if", "plsq", "plsgs", "name","org"};
00045
00046
00047
00048
00049
00050 class CEngGramTab : public CAgramtab
00051 {
00052 public:
00053 CAgramtabLine* Lines[eMaxGrmCount];
00054 CEngGramTab();
00055 ~CEngGramTab();
00056
00057 BYTE GetPartOfSpeechesCount () const {return ePartOfSpeechesCount;};
00058 const char* GetPartOfSpeechStr(BYTE i) const {return ePartOfSpeeches[i];};
00059 size_t GetGrammemsCount() const {return eGrammemsCount;};
00060 const char* GetGrammemStr(size_t i) const {return eGrammems[i];};
00061 size_t GetMaxGrmCount() const {return eMaxGrmCount;};
00062 CAgramtabLine*& GetLine(size_t LineNo) {return Lines[LineNo];}
00063 const CAgramtabLine* GetLine(size_t LineNo) const {return Lines[LineNo];};
00064 size_t s2i(const char * s ) const { return (unsigned char) s[0]*0x100+(unsigned char) s[1] - eStartUp;};
00065
00066
00067 string i2s(WORD i) const
00068 {
00069 i += eStartUp;
00070 char res[3];
00071
00072 res[0] = (i >> 8);
00073 res[1] = (0xFF & i);
00074 res[2] = 0;
00075 return res;
00076 };
00077
00078 const char* GetRegistryString() const
00079 {
00080 return "Software\\Dialing\\Lemmatizer\\English\\Egramtab";
00081 };
00082
00083 long GetClauseTypeByName(const char* TypeName) const {assert(false); return 0;};
00084 const char* GetClauseNameByType(long type) const {assert(false); return 0;};
00085 bool GleicheGenderNumber(const char* gram_code1, const char* gram_code2) const;
00086 bool GleicheSubjectPredicate(const char* gram_code1, const char* gram_code2) const;
00087 const size_t GetClauseTypesCount() const
00088 {
00089 return 0;
00090 };
00091
00092 bool IsStrongClauseRoot(const DWORD Poses) const;
00093 bool is_month (const char* lemma) const;
00094 bool is_small_number (const char* lemma) const;
00095 bool IsMorphNoun (size_t Poses) const;
00096 bool is_morph_adj (size_t poses) const;
00097 bool is_morph_participle (size_t poses) const;
00098 bool is_morph_pronoun (size_t poses) const;
00099 bool is_morph_pronoun_adjective(size_t poses) const;
00100 bool is_left_noun_modifier (size_t poses, QWORD grammems) const;
00101 bool is_numeral (size_t poses) const;
00102 bool is_verb_form (size_t poses) const;
00103 bool is_infinitive(size_t poses) const;
00104 bool is_morph_predk(size_t poses) const;
00105 bool is_morph_adv(size_t poses) const;
00106 bool is_morph_personal_pronoun (size_t poses, QWORD grammems) const;
00107 bool is_morph_article(size_t poses) const;
00108
00109 bool IsSimpleParticle(const char* lemma, size_t poses) const;
00110 bool IsSynNoun(size_t Poses, const char* Lemma) const;
00111 bool IsStandardParamAbbr (const char* WordStrUpper) const;
00112 bool GleicheCase(const char* gram_code_noun, const char* gram_code_adj) const;
00113 bool GleicheCaseNumber(const char* gram_code1, const char* gram_code2) const;
00114 QWORD GleicheGenderNumberCase(const char* common_gram_code_noun, const char* gram_code_noun, const char* gram_code_adj) const;
00115
00116
00117
00118 };
00119
00120
00121 #endif