Go to the documentation of this file.00001
00002
00003
00004
00005 #ifndef __GRAPHMATFILE_H_
00006 #define __GRAPHMATFILE_H_
00007
00008 #include "UnitHolder.h"
00009
00010 class CGraphanDicts;
00011 class CDictionary;
00012
00013 class CGraphmatFile : public CUnitHolder
00014 {
00015 string m_SourceFileName;
00016 string m_LastError;
00017
00018
00019
00020
00021 bool IsKey(size_t LB, size_t HB, size_t& GraLast) const;
00022 bool FindKeySequence (const char* title, size_t i,size_t HB, size_t& GraLast) const;
00023 bool DealBullet (size_t i, size_t HB);
00024 bool DealAsteriskBullet (size_t LB, size_t HB);
00025 int DealBulletsWithTwoBrackets (size_t StartPos, size_t EndPos);
00026 bool DealEnglishStyleFIO (size_t StartPos, size_t EndPos);
00027 bool DealAbbrev (size_t StartPos, size_t EndPos);
00028 size_t FindOborotto (size_t i, size_t HB, short& OborotNo, vector<WORD>& OborortIds) const;
00029 void DealOborotto(size_t HB);
00030 int DealReferences (size_t i,size_t HB);
00031 int DealStop (size_t i);
00032 void DealModifierKey (size_t LB, size_t HB);
00033 void DealSimpleKey (size_t LB, size_t HB);
00034 void DealKeySequence(size_t LB, size_t HB);
00035 void DealQuotedKeySequence(size_t LB, size_t HB);
00036 void DealGermanDividedCompounds(size_t LB, size_t HB);
00037 void DealExtensionsAndLocalFileNames(size_t LB, size_t HB);
00038 int HasIndention (size_t LB, size_t HB);
00039 int CountEndL (size_t LB, size_t HB);
00040 int CountSpaces (size_t LB, size_t HB);
00041 int DealFIO (size_t i,size_t HB);
00042 int DealShortFIO(size_t i, size_t HB);
00043 void DealNames (size_t LB, size_t HB);
00044 bool DealSentBreaker ();
00045 void InitNonContextDescriptors (CGraLine& L);
00046 bool GraphmatMain ();
00047 int InitContextDescriptors (size_t LB, size_t HB);
00048 void MacSynHierarchy ();
00049
00050
00051
00052
00053 public:
00054 const CGraphanDicts* m_pDicts;
00055 bool m_bConvertRussianJo2Je;
00056
00057 size_t m_MinParOfs;
00058 size_t m_MaxParOfs;
00059 size_t m_TabSize;
00060
00061 size_t m_MaxUpLen;
00062
00063 string m_GraOutputFile;
00064
00065 string m_XmlMacSynOutputFile;
00066
00067 bool m_bSentBreaker;
00068
00069 bool m_bMacSynHierarchy;
00070
00071
00072
00073 bool m_bSubdueWrongMacroSynUnitToMainRoot;
00074
00075 bool m_bSentenceAsParagraph;
00076
00077
00078 bool m_bForceToRus;
00079
00080
00081 bool m_bEmptyLineIsSentenceDelim;
00082
00083
00084 bool m_bUseParagraphTagToDivide;
00085
00086
00087 bool m_bUseIndention;
00088
00089 bool m_bFilterUnprintableSymbols;
00090
00091 bool m_bRecognizeShortFIOs;
00092
00093 size_t m_MaxSentenceLength;
00094
00095
00096 CGraphmatFile();
00097 ~CGraphmatFile();
00098
00099
00100 bool LoadDicts();
00101 bool LoadStringToGraphan(const string& szBuffer);
00102 bool LoadFileToGraphan (const string& CommandLine);
00103 void FreeDicts();
00104 const string& GetLastError() const;
00105
00106
00107
00108 void GetGraphematicalLine (char* line, size_t NumLine) const;
00109 MorphLanguageEnum GetTokenLanguage (int LineNo) const;
00110 bool StartsFixedOborot(size_t LineNo) const;
00111 const CDictionary* GetOborDic() const;
00112
00113 void WriteGraphMat (const char* FName) const;
00114
00115
00116
00117
00118
00119 };
00120
00121
00122 #endif //__GRAPHMATFILE_H_