00001 #ifndef lemword_h 00002 #define lemword_h 00003 00004 #include "Homonym.h" 00005 #include "PlmLine.h" 00006 #include "gra_descr.h" 00007 00008 class CLemWord 00009 { 00010 // graphematical descriptors in one string (without some binary flags that could be restored by CLemWord::BuildGraphemDescr() ) 00011 00012 00013 QWORD m_GraDescrs; 00014 int ProcessGraphematicalDescriptors(const char* LineStr); 00015 public: 00016 00017 // ======= Graphematics ====================== 00018 string m_UnparsedGraphemDescriptorsStr; 00019 00020 // input word form 00021 string m_strWord; 00022 00023 // input word uppercase form 00024 string m_strUpperWord; 00025 00026 // is graphematical space 00027 bool m_bSpace; 00028 00029 00030 // graphematical descriptor RLE or LLE 00031 bool m_bWord; 00032 00033 // a single comma 00034 bool m_bComma; 00035 00036 // a single hyphen 00037 bool m_bDash; 00038 00039 // graphematical register 00040 RegisterEnum m_Register; 00041 00042 // offset in the graphematcil buffer (= in the input file for text files) 00043 int m_GraphematicalUnitOffset; 00044 00045 int m_TokenLengthInFile; 00046 00047 // true, if this word has a space before ot it is at the beginning of the sentence. 00048 bool m_bHasSpaceBefore; 00049 00050 // true if the word was deleted and should be ignored 00051 bool m_bDeleted; 00052 00053 // is morphologically predicted 00054 bool m_bPredicted; 00055 00056 00057 CLemWord(); 00058 00059 void DeleteOborotMarks(); 00060 bool AddNextHomonym(const char* strPlmLine); 00061 bool ProcessPlmLineForTheFirstHomonym(const char* strPlmLine, MorphLanguageEnum langua, int& OborotNo); 00062 bool HasDes(Descriptors g) const; 00063 void DelDes(Descriptors g); 00064 void AddDes(Descriptors g); 00065 00066 void Reset(); 00067 00068 00069 00070 void SetWordStr (string NewValue, MorphLanguageEnum langua); 00071 00072 bool FindLemma(string strLemma) const; 00073 int GetHomonymByPOS(BYTE POS) const; 00074 bool HasPos(BYTE POS) const; 00075 bool HasGrammem(BYTE Grammem) const; 00076 int GetHomonymByGrammem(BYTE grammem) const; 00077 int GetHomonymByPOSandGrammem(BYTE POS, BYTE grammem) const; 00078 int GetHomonymByPosesandGrammem(size_t Poses, BYTE grammem) const; 00079 bool IsWordUpper(const char* s) const {return m_strUpperWord == s; }; 00080 00081 void SetAllOtherHomsDel(int iHom); 00082 00083 00084 00085 00086 virtual size_t GetHomonymsCount() const = 0; 00087 virtual const CHomonym* GetHomonym(int i) const = 0; 00088 virtual CHomonym* GetHomonym(int i) = 0; 00089 virtual void EraseHomonym(int iHom) = 0;; 00090 virtual CHomonym* AddNewHomonym() = 0;; 00091 00092 void DeleteMarkedHomonymsBeforeClauses(); 00093 void SetHomonymsDel(bool Value); 00094 00095 bool IsFirstOfGraPair(EGraPairType type) const; 00096 bool IsFirstOfGraPair() const; 00097 bool IsSecondOfGraPair(EGraPairType type) const; 00098 bool IsSecondOfGraPair() const; 00099 00100 00101 int GetOborotNo() const; 00102 bool HasOborot1() const; 00103 bool HasOborot2() const; 00104 bool IsInOborot() const; 00105 bool CanBeSynNoun() const; 00106 void KillHomonymOfPartOfSpeech(int iPartOfSpeech); 00107 virtual void InitLevelSpecific(CHomonym* pHom) {}; 00108 string GetPlmStr (const CHomonym* pHomonym, bool bFirstHomonym) const; 00109 string GetDebugString(const CHomonym* pHomonym, bool bFirstHomonym) const; 00110 string BuildGraphemDescr () const; 00111 size_t GetPoses() const; 00112 QWORD GetGrammems() const; 00113 bool HasAnalyticalBe() const; 00114 }; 00115 00116 00117 #endif