Go to the documentation of this file.00001
00002
00003
00004
00005 #ifndef __UNITSHOLDER_H_
00006 #define __UNITSHOLDER_H_
00007
00008 #include "graline.h"
00009
00010
00011 class CUnitHolder
00012 {
00013 vector<CGraLine> m_Units;
00014 vector<char> m_TokenBuf;
00015 vector<char> m_UnitBufUpper;
00016 vector<BYTE> m_InputBuffer;
00017
00018 map<size_t, short> m_FoundOborots;
00019 map<size_t, DWORD> m_FoundPageBreaks;
00020 public:
00021 const vector<CGraLine>& GetUnits() const { return m_Units; };
00022 CGraLine& GetUnit(size_t UnitNo);
00023 const vector<char>& GetUnitBuf() const;
00024 const vector<BYTE>& GetInputBuffer() const;
00025
00026 MorphLanguageEnum m_Language;
00027
00028 CUnitHolder();
00029
00030 void SetState (size_t LB, size_t HB, WORD state);
00031 void SetDes(size_t x, Descriptors des);
00032 bool HasDescr (size_t i, int descr) const { return (m_Units[i].GetDescriptors() & _QM (descr)) > 0; }
00033
00034
00035
00036 bool AreGrouped (size_t LB, size_t HB) const;
00037 bool HasGrouped (size_t LB, size_t HB) const;
00038 bool HasAbbreviation (size_t LB, size_t HB) const;
00039
00040
00041
00042 size_t PassSpace (size_t i, size_t HB) const;
00043 size_t PPunctOrSoft(size_t i, size_t HB) const;
00044 size_t PPunct(size_t i, size_t HB) const;
00045 size_t FindSpace (size_t i, size_t HB) const;
00046 size_t BSpace (size_t i, size_t LB = 0) const;
00047 size_t PSoft (size_t i, size_t HB) const;
00048 size_t BSoft (size_t i) const;
00049
00050
00051 bool IsHyphen(size_t x) const;
00052 bool is_latin_alpha (int ch) const;
00053 bool is_lowercase (int ch) const;
00054 bool is_uppercase (int ch) const;
00055 bool StrSuperCompare (int UnitNo, const char* s) const;
00056 bool IsOneAlpha(size_t x) const;
00057 bool IsOneChar(size_t x, int i) const;
00058 bool IsOneULet(size_t x) const;
00059 bool FirstUpper(size_t x) const;
00060 bool IsBulletWord (size_t x) const;
00061 bool IsOneFullStop (size_t i) const;
00062 bool EmptyLineBeforeGraph (size_t i, size_t HB) const;
00063 bool IsQuestionOrExclamationMarks (size_t i) const;
00064 bool IsSentenceEndMark (size_t i) const;
00065 bool IsOneCloseQuotationMark (size_t i) const;
00066 bool IsOneOpenQuotationMark (size_t i) const;
00067
00068
00069 void FreeTable();
00070 void BuildUnitBufferUpper ();
00071 void InitTokenBuffer();
00072 bool InitInputBuffer(const string& S);
00073 void ClearInputBuffer();
00074 void AddUnit(const CGraLine& NewLine);
00075 const char* GetUnitBufferStart() const;
00076 const char* GetUnitUpperBufferStart() const;
00077 const char* GetUppercaseToken(DWORD LineNo) const;
00078 string GetToken(DWORD LineNo) const;
00079 size_t GetTokensCount() const;
00080 DWORD GetTokenInputOffset(DWORD LineNo) const;
00081 BYTE GetTokenLength(DWORD LineNo) const;
00082
00083
00084
00085 size_t GetUnitBufferSize() const;
00086 void DeleteDescr(size_t LineNo, Descriptors d);
00087
00088 void MakeOneWord(size_t StartLineNo, size_t EndLineNo);
00089 void ClearPairDescriptors(size_t StartLineNo, size_t EndLineNo);
00090
00091
00092
00093 void SetOborotNo(size_t LineNo, short OborotNo);
00094 short GetOborotNo(size_t LineNo) const;
00095
00096 void SetPageNumber(size_t LineNo, DWORD PageNumber);
00097 DWORD GetPageNumber(size_t LineNo) const;
00098 };
00099
00100
00101
00102 #endif