ddc: EngGramTab.h Source File

Go to the documentation of this file.
00001 // ==========  This file is under  LGPL, the GNU Lesser General Public Licence
00002 // ==========  Dialing Lemmatizer (www.aot.ru)
00003 // ==========  Copyright by Alexey Sokirko
00004 
00005 #ifndef __EngGRAMTAB_H_
00006 #define __EngGRAMTAB_H_
00007 
00008 
00009 #include "agramtab_.h"       // main symbols
00010 
00011 
00013 // CEngGramTab
00014         const unsigned int eStartUp =  0x6161; //aa�
00015         const unsigned int eEndUp =  0x7A7B;  //zz + 1
00016         const unsigned int eMaxGrmCount  = eEndUp -eStartUp; // // 5911  (5 ��) 
00017 
00018         const BYTE ePartOfSpeechesCount = 17;
00019         // �� ������ ���� �������� ����� ������� 
00020         const char ePartOfSpeeches[ePartOfSpeechesCount][20] = 
00021         {
00022                 "NOUN",
00023                 "ADJECTIVE", // ������ ��������������
00024                 "VERB",
00025                 "VBE",
00026                 "MOD",
00027                 "NUMERAL",
00028                 "CONJ",
00029                 "INT",
00030                 "PREP",
00031                 "PART",
00032                 "ARTICLE",
00033                 "ADVERB",
00034                 "PN",
00035                 "ORDNUM",
00036                 "PRON",
00037                 "POSS",
00038                 "PN_ADJ"
00039         };
00040         const short eGrammemsCount = 34;
00041     const char eGrammems[eGrammemsCount][8] = {
00042       "sg", "pl", "m", "f", "anim", "perf", "nom", "obj", "narr","geo", 
00043       "prop" ,"pers", "poss", "pred", "uncount", "ref", "dem", "mass", "comp", "sup", 
00044       "1", "2", "3", "prsa", "inf", "pasa", "pp", "ing", "fut", "if", "plsq", "plsgs", "name","org"};
00045 
00046 
00047 
00048 
00049 
00050 class CEngGramTab : public CAgramtab
00051 {
00052 public:
00053         CAgramtabLine*  Lines[eMaxGrmCount];
00054         CEngGramTab();
00055         ~CEngGramTab();
00056         
00057         BYTE GetPartOfSpeechesCount () const {return ePartOfSpeechesCount;};
00058         const char*   GetPartOfSpeechStr(BYTE i) const {return ePartOfSpeeches[i];};
00059         size_t GetGrammemsCount()  const {return eGrammemsCount;};
00060         const char*   GetGrammemStr(size_t i) const {return eGrammems[i];};
00061         size_t GetMaxGrmCount() const {return eMaxGrmCount;};
00062         CAgramtabLine*& GetLine(size_t LineNo) {return Lines[LineNo];}
00063         const CAgramtabLine* GetLine(size_t LineNo) const {return Lines[LineNo];};
00064         size_t s2i(const char * s ) const { return  (unsigned char) s[0]*0x100+(unsigned char) s[1] - eStartUp;};
00065 
00066 
00067         string i2s(WORD i)  const
00068         { 
00069                 i += eStartUp;
00070                 char res[3];
00071 
00072                 res[0] = (i >> 8);
00073                 res[1] = (0xFF & i);
00074                 res[2] = 0;
00075                 return  res;
00076         };
00077 
00078         const char* GetRegistryString() const 
00079         {
00080                 return "Software\\Dialing\\Lemmatizer\\English\\Egramtab";
00081         };
00082 
00083         long GetClauseTypeByName(const char* TypeName) const {assert(false); return 0;};
00084         const char* GetClauseNameByType(long type) const {assert(false); return 0;};
00085         bool GleicheGenderNumber(const char* gram_code1, const char* gram_code2) const;
00086         bool GleicheSubjectPredicate(const char* gram_code1, const char* gram_code2) const;
00087         const size_t GetClauseTypesCount() const 
00088         {
00089                 return 0;
00090         };
00091         
00092         bool IsStrongClauseRoot(const DWORD Poses) const;
00093         bool is_month (const char* lemma) const;
00094         bool is_small_number (const char* lemma) const;
00095         bool IsMorphNoun (size_t Poses)  const;
00096         bool is_morph_adj (size_t poses) const;
00097         bool is_morph_participle (size_t poses) const;
00098         bool is_morph_pronoun (size_t poses) const;
00099         bool is_morph_pronoun_adjective(size_t poses) const;
00100         bool is_left_noun_modifier  (size_t poses, QWORD grammems) const;
00101         bool is_numeral (size_t poses) const;
00102         bool is_verb_form (size_t poses) const;
00103         bool is_infinitive(size_t poses) const;
00104         bool is_morph_predk(size_t poses) const;
00105         bool is_morph_adv(size_t poses) const;
00106         bool is_morph_personal_pronoun (size_t poses, QWORD grammems) const;
00107         bool is_morph_article(size_t poses) const;
00108 
00109         bool IsSimpleParticle(const char* lemma, size_t poses) const;
00110         bool IsSynNoun(size_t Poses, const char* Lemma) const;
00111         bool IsStandardParamAbbr (const char* WordStrUpper) const;
00112         bool GleicheCase(const char* gram_code_noun, const char* gram_code_adj) const;
00113         bool GleicheCaseNumber(const char* gram_code1, const char* gram_code2) const;
00114         QWORD GleicheGenderNumberCase(const char* common_gram_code_noun, const char* gram_code_noun, const char* gram_code_adj) const;
00115         
00116 
00117 
00118 };
00119 
00120 
00121 #endif