ddc
|
#include <UnitHolder.h>
Public Member Functions | |
const vector< CGraLine > & | GetUnits () const |
CGraLine & | GetUnit (size_t UnitNo) |
const vector< char > & | GetUnitBuf () const |
const vector< BYTE > & | GetInputBuffer () const |
CUnitHolder () | |
void | SetState (size_t LB, size_t HB, WORD state) |
void | SetDes (size_t x, Descriptors des) |
bool | HasDescr (size_t i, int descr) const |
bool | AreGrouped (size_t LB, size_t HB) const |
bool | HasGrouped (size_t LB, size_t HB) const |
bool | HasAbbreviation (size_t LB, size_t HB) const |
size_t | PassSpace (size_t i, size_t HB) const |
size_t | PPunctOrSoft (size_t i, size_t HB) const |
size_t | PPunct (size_t i, size_t HB) const |
size_t | BSpace (size_t i, size_t LB=0) const |
size_t | PSoft (size_t i, size_t HB) const |
size_t | BSoft (size_t i) const |
bool | IsHyphen (size_t x) const |
bool | is_latin_alpha (int ch) const |
bool | is_lowercase (int ch) const |
bool | is_uppercase (int ch) const |
bool | IsOneAlpha (size_t x) const |
bool | IsOneChar (size_t x, int i) const |
bool | IsOneULet (size_t x) const |
bool | FirstUpper (size_t x) const |
bool | IsBulletWord (size_t x) const |
bool | IsOneFullStop (size_t i) const |
bool | EmptyLineBeforeGraph (size_t i, size_t HB) const |
bool | IsQuestionOrExclamationMarks (size_t i) const |
bool | IsSentenceEndMark (size_t i) const |
bool | IsOneCloseQuotationMark (size_t i) const |
bool | IsOneOpenQuotationMark (size_t i) const |
void | FreeTable () |
void | BuildUnitBufferUpper () |
void | InitTokenBuffer () |
bool | InitInputBuffer (const string &S) |
void | ClearInputBuffer () |
void | AddUnit (const CGraLine &NewLine) |
const char * | GetUnitBufferStart () const |
const char * | GetUnitUpperBufferStart () const |
const char * | GetUppercaseToken (DWORD LineNo) const |
string | GetToken (DWORD LineNo) const |
size_t | GetTokensCount () const |
DWORD | GetTokenInputOffset (DWORD LineNo) const |
BYTE | GetTokenLength (DWORD LineNo) const |
void | DeleteDescr (size_t LineNo, Descriptors d) |
void | SetOborotNo (size_t LineNo, short OborotNo) |
short | GetOborotNo (size_t LineNo) const |
void | SetPageNumber (size_t LineNo, DWORD PageNumber) |
DWORD | GetPageNumber (size_t LineNo) const |
Public Attributes | |
MorphLanguageEnum | m_Language |
Private Attributes | |
vector< CGraLine > | m_Units |
vector< char > | m_TokenBuf |
vector< char > | m_UnitBufUpper |
vector< BYTE > | m_InputBuffer |
map< size_t, short > | m_FoundOborots |
map< size_t, DWORD > | m_FoundPageBreaks |
CUnitHolder::CUnitHolder | ( | ) |
References m_Language, and morphRussian.
|
inline |
References GetInputBuffer(), GetUnit(), GetUnitBuf(), and m_Units.
Referenced by CalculateLMarg(), CanBeFileName(), CanBeRussianInitial(), CheckComma(), CGraphmatFile::CountEndL(), CGraphmatFile::CountSpaces(), CStrToCompare::CStrToCompare(), CGraphmatFile::DealAsteriskBullet(), CGraphmatFile::DealBullet(), CGraphmatFile::DealBulletsWithTwoBrackets(), CGraphmatFile::DealEnglishStyleFIO(), CGraphmatFile::DealExtensionsAndLocalFileNames(), CGraphmatFile::DealFIO(), CGraphmatFile::DealGermanDividedCompounds(), DealIndention(), CGraphmatFile::DealKeySequence(), CGraphmatFile::DealModifierKey(), CGraphmatFile::DealNames(), CGraphmatFile::DealOborotto(), CGraphmatFile::DealReferences(), CGraphmatFile::DealSentBreaker(), CGraphmatFile::DealShortFIO(), DealSimpleEnglishNames(), CGraphmatFile::FindKeySequence(), CGraphmatFile::FindOborotto(), FindSentEndAfterParagraph(), CGraphmatFile::GetGraphematicalLine(), GetToken(), GetTokenInputOffset(), GetTokenLength(), GetTokensCount(), GetUppercaseToken(), CGraphmatFile::GraphmatMain(), CConcIndexator::IndexTextOrHtmlFile(), CGraphmatFile::InitContextDescriptors(), InitEnglishNameSlot(), CGraphmatFile::IsKey(), IsLastInGroupOrFree(), CGraphmatFile::MacSynHierarchy(), MapCorrectMinSpace(), ParagraphTagBeforeGraph(), RubiconText(), SetSentMarkers(), and CGraphmatFile::WriteGraphMat().
CGraLine & CUnitHolder::GetUnit | ( | size_t | UnitNo | ) |
References m_Units.
Referenced by GetUnits(), CGraphmatFile::GraphmatMain(), CGraphmatFile::InitContextDescriptors(), and InitEnglishNameSlot().
const vector< char > & CUnitHolder::GetUnitBuf | ( | ) | const |
References m_TokenBuf.
Referenced by GetUnitBufferStart(), and GetUnits().
const vector< BYTE > & CUnitHolder::GetInputBuffer | ( | ) | const |
References m_InputBuffer.
Referenced by GetUnits(), CGraphmatFile::GraphmatMain(), and CGraLine::ReadWord().
void CUnitHolder::SetState | ( | size_t | LB, |
size_t | HB, | ||
WORD | state | ||
) |
References m_Units.
Referenced by CGraphmatFile::DealBullet(), CGraphmatFile::DealBulletsWithTwoBrackets(), CGraphmatFile::DealEnglishStyleFIO(), CGraphmatFile::DealExtensionsAndLocalFileNames(), CGraphmatFile::DealFIO(), CGraphmatFile::DealGermanDividedCompounds(), CGraphmatFile::DealKeySequence(), CGraphmatFile::DealModifierKey(), CGraphmatFile::DealOborotto(), CGraphmatFile::DealReferences(), CGraphmatFile::DealShortFIO(), DealSimpleEnglishNames(), and CGraphmatFile::DealSimpleKey().
void CUnitHolder::SetDes | ( | size_t | x, |
Descriptors | des | ||
) |
References m_Units.
Referenced by CGraphmatFile::DealAsteriskBullet(), CGraphmatFile::DealBullet(), CGraphmatFile::DealBulletsWithTwoBrackets(), CGraphmatFile::DealEnglishStyleFIO(), CGraphmatFile::DealExtensionsAndLocalFileNames(), CGraphmatFile::DealFIO(), CGraphmatFile::DealGermanDividedCompounds(), DealIndention(), CGraphmatFile::DealKeySequence(), CGraphmatFile::DealModifierKey(), CGraphmatFile::DealNames(), CGraphmatFile::DealOborotto(), CGraphmatFile::DealReferences(), CGraphmatFile::DealShortFIO(), DealSimpleEnglishNames(), CGraphmatFile::DealSimpleKey(), InitTokenBuffer(), CGraphmatFile::MacSynHierarchy(), and SetSentMarkers().
|
inline |
References _QM, AddUnit(), AreGrouped(), BSoft(), BSpace(), BuildUnitBufferUpper(), ClearInputBuffer(), DeleteDescr(), EmptyLineBeforeGraph(), FirstUpper(), FreeTable(), GetOborotNo(), GetPageNumber(), GetToken(), GetTokenInputOffset(), GetTokenLength(), GetTokensCount(), GetUnitBufferStart(), GetUnitUpperBufferStart(), GetUppercaseToken(), HasAbbreviation(), HasGrouped(), InitInputBuffer(), InitTokenBuffer(), is_latin_alpha(), is_lowercase(), is_uppercase(), IsBulletWord(), IsHyphen(), IsOneAlpha(), IsOneChar(), IsOneCloseQuotationMark(), IsOneFullStop(), IsOneOpenQuotationMark(), IsOneULet(), IsQuestionOrExclamationMarks(), IsSentenceEndMark(), PassSpace(), PPunct(), PPunctOrSoft(), PSoft(), SetOborotNo(), and SetPageNumber().
Referenced by CanBeFileName(), CanBeRussianInitial(), CheckComma(), CheckGermanSentenceBreak(), CGraphmatFile::DealAsteriskBullet(), CGraphmatFile::DealBullet(), CGraphmatFile::DealBulletsWithTwoBrackets(), CGraphmatFile::DealEnglishStyleFIO(), CGraphmatFile::DealFIO(), CGraphmatFile::DealGermanDividedCompounds(), CGraphmatFile::DealKeySequence(), CGraphmatFile::DealModifierKey(), CGraphmatFile::DealNames(), CGraphmatFile::DealSentBreaker(), CGraphmatFile::DealShortFIO(), DealSimpleEnglishNames(), CGraphmatFile::FindKeySequence(), FindSentEndAfterParagraph(), FirstUpper(), CGraphmatFile::GraphmatMain(), CGraphmatFile::HasIndention(), CConcIndexator::IndexTextOrHtmlFile(), CConSent::InitBasicInformation(), CGraphmatFile::InitContextDescriptors(), InitEnglishNameSlot(), IsBulletWord(), IsDigit(), CConcIndexator::IsDWDSToken(), IsHyphen(), IsLastInGroupOrFree(), IsOneAlpha(), IsSentenceEnd(), IsWord(), PPunct(), PPunctOrSoft(), RubiconText(), and SetSentMarkers().
bool CUnitHolder::AreGrouped | ( | size_t | LB, |
size_t | HB | ||
) | const |
bool CUnitHolder::HasGrouped | ( | size_t | LB, |
size_t | HB | ||
) | const |
References m_Units.
Referenced by CGraphmatFile::DealBulletsWithTwoBrackets(), CGraphmatFile::DealEnglishStyleFIO(), CGraphmatFile::DealExtensionsAndLocalFileNames(), CGraphmatFile::DealModifierKey(), DealSimpleEnglishNames(), CGraphmatFile::DealSimpleKey(), and HasDescr().
bool CUnitHolder::HasAbbreviation | ( | size_t | LB, |
size_t | HB | ||
) | const |
References m_Units.
Referenced by CGraphmatFile::DealExtensionsAndLocalFileNames(), and HasDescr().
size_t CUnitHolder::PassSpace | ( | size_t | i, |
size_t | HB | ||
) | const |
References m_Units.
Referenced by AdjustRubiconsInSpaceCompact(), CGraphmatFile::DealAsteriskBullet(), CGraphmatFile::DealBullet(), CGraphmatFile::DealBulletsWithTwoBrackets(), CGraphmatFile::DealReferences(), CGraphmatFile::DealSentBreaker(), HasDescr(), CGraphmatFile::InitContextDescriptors(), and MapCorrectMinSpace().
size_t CUnitHolder::PPunctOrSoft | ( | size_t | i, |
size_t | HB | ||
) | const |
References HasDescr(), m_Units, and OPun.
Referenced by CGraphmatFile::DealSentBreaker(), and HasDescr().
size_t CUnitHolder::PPunct | ( | size_t | i, |
size_t | HB | ||
) | const |
References HasDescr(), and OPun.
Referenced by CGraphmatFile::DealSentBreaker(), and HasDescr().
size_t CUnitHolder::BSpace | ( | size_t | i, |
size_t | LB = 0 |
||
) | const |
References m_Units.
Referenced by AdjustRubiconsInSpaceCompact(), CGraphmatFile::DealAsteriskBullet(), CGraphmatFile::DealBullet(), CGraphmatFile::DealBulletsWithTwoBrackets(), DealIndention(), EmptyLineBeforeGraph(), HasDescr(), and CConSent::InitBasicInformation().
size_t CUnitHolder::PSoft | ( | size_t | i, |
size_t | HB | ||
) | const |
References m_Units.
Referenced by CGraphmatFile::DealEnglishStyleFIO(), CGraphmatFile::DealFIO(), CGraphmatFile::DealGermanDividedCompounds(), CGraphmatFile::DealSentBreaker(), CGraphmatFile::DealShortFIO(), CGraphmatFile::FindKeySequence(), CGraphmatFile::FindOborotto(), HasDescr(), CGraphmatFile::MacSynHierarchy(), and RecognizeSimpleCS().
size_t CUnitHolder::BSoft | ( | size_t | i | ) | const |
References m_Units.
Referenced by CheckGermanSentenceBreak(), CGraphmatFile::DealFIO(), CGraphmatFile::DealKeySequence(), CGraphmatFile::DealSentBreaker(), CGraphmatFile::DealShortFIO(), CGraphmatFile::FindKeySequence(), FindSentEndAfterParagraph(), HasDescr(), and CConSent::InitBasicInformation().
bool CUnitHolder::IsHyphen | ( | size_t | x | ) | const |
References HasDescr(), m_Units, and OHyp.
Referenced by HasDescr().
bool CUnitHolder::is_latin_alpha | ( | int | ch | ) | const |
References is_english_alpha(), is_german_alpha(), m_Language, and morphGerman.
Referenced by HasDescr(), CGraphmatFile::InitNonContextDescriptors(), and IsOneAlpha().
bool CUnitHolder::is_lowercase | ( | int | ch | ) | const |
References is_english_lower(), is_german_lower(), is_russian_lower(), m_Language, and morphGerman.
Referenced by HasDescr(), and CGraphmatFile::InitNonContextDescriptors().
bool CUnitHolder::is_uppercase | ( | int | ch | ) | const |
References is_english_upper(), is_german_upper(), is_russian_upper(), m_Language, and morphGerman.
Referenced by HasDescr(), and CGraphmatFile::InitNonContextDescriptors().
bool CUnitHolder::IsOneAlpha | ( | size_t | x | ) | const |
References GetTokenLength(), HasDescr(), is_latin_alpha(), m_Units, OLLE, and ORLE.
Referenced by CGraphmatFile::DealSimpleKey(), HasDescr(), IsBulletWord(), and CGraphmatFile::IsKey().
bool CUnitHolder::IsOneChar | ( | size_t | x, |
int | i | ||
) | const |
References m_Units.
Referenced by CheckComma(), CGraphmatFile::DealGermanDividedCompounds(), and HasDescr().
bool CUnitHolder::IsOneULet | ( | size_t | x | ) | const |
bool CUnitHolder::FirstUpper | ( | size_t | x | ) | const |
References HasDescr(), OUp, and OUpLw.
Referenced by CGraphmatFile::DealFIO(), CGraphmatFile::DealShortFIO(), and HasDescr().
bool CUnitHolder::IsBulletWord | ( | size_t | x | ) | const |
References HasDescr(), IsOneAlpha(), and ODigits.
Referenced by CGraphmatFile::DealBullet(), CGraphmatFile::DealBulletsWithTwoBrackets(), and HasDescr().
bool CUnitHolder::IsOneFullStop | ( | size_t | i | ) | const |
References GetTokenLength(), and m_Units.
Referenced by CheckGermanSentenceBreak(), CGraphmatFile::DealBullet(), CGraphmatFile::DealEnglishStyleFIO(), CGraphmatFile::DealExtensionsAndLocalFileNames(), CGraphmatFile::DealFIO(), CGraphmatFile::DealReferences(), CGraphmatFile::DealShortFIO(), HasDescr(), and IsSentenceEndMark().
bool CUnitHolder::EmptyLineBeforeGraph | ( | size_t | i, |
size_t | HB | ||
) | const |
References BSpace(), GetToken(), GetTokenLength(), and m_Units.
Referenced by HasDescr(), and RubiconText().
bool CUnitHolder::IsQuestionOrExclamationMarks | ( | size_t | i | ) | const |
References GetToken(), GetTokenLength(), and m_Units.
Referenced by HasDescr(), and IsSentenceEndMark().
bool CUnitHolder::IsSentenceEndMark | ( | size_t | i | ) | const |
References cEllipseChar, GetTokenLength(), IsOneFullStop(), IsQuestionOrExclamationMarks(), and m_Units.
Referenced by CGraphmatFile::DealSentBreaker(), FindSentEndAfterParagraph(), and HasDescr().
bool CUnitHolder::IsOneCloseQuotationMark | ( | size_t | i | ) | const |
References GetToken(), GetTokenLength(), and m_Units.
Referenced by CGraphmatFile::DealSentBreaker(), and HasDescr().
bool CUnitHolder::IsOneOpenQuotationMark | ( | size_t | i | ) | const |
References GetToken(), GetTokenLength(), and m_Units.
Referenced by HasDescr(), and SetSentMarkers().
void CUnitHolder::FreeTable | ( | ) |
References ClearVector(), m_FoundOborots, m_FoundPageBreaks, m_TokenBuf, and m_Units.
Referenced by HasDescr(), CConcIndexator::IndexTextOrHtmlFile(), InitTokenBuffer(), and CGraphmatFile::~CGraphmatFile().
void CUnitHolder::BuildUnitBufferUpper | ( | ) |
References GerEngRusMakeUpperTemplate(), m_Language, m_UnitBufUpper, and m_Units.
Referenced by CGraphmatFile::GraphmatMain(), and HasDescr().
void CUnitHolder::InitTokenBuffer | ( | ) |
References FreeTable(), m_InputBuffer, m_TokenBuf, m_Units, OBeg, and SetDes().
Referenced by CGraphmatFile::GraphmatMain(), and HasDescr().
bool CUnitHolder::InitInputBuffer | ( | const string & | S | ) |
References m_InputBuffer.
Referenced by HasDescr(), and CGraphmatFile::LoadStringToGraphan().
void CUnitHolder::ClearInputBuffer | ( | ) |
References ClearVector(), and m_InputBuffer.
Referenced by CGraphmatFile::GraphmatMain(), and HasDescr().
void CUnitHolder::AddUnit | ( | const CGraLine & | NewLine | ) |
References m_Units.
Referenced by CGraphmatFile::GraphmatMain(), and HasDescr().
const char * CUnitHolder::GetUnitBufferStart | ( | ) | const |
References GetUnitBuf().
Referenced by GetUppercaseToken(), CGraphmatFile::GraphmatMain(), and HasDescr().
const char * CUnitHolder::GetUnitUpperBufferStart | ( | ) | const |
References m_UnitBufUpper.
Referenced by GetUppercaseToken(), and HasDescr().
const char * CUnitHolder::GetUppercaseToken | ( | DWORD | LineNo | ) | const |
References GetUnitBufferStart(), GetUnits(), and GetUnitUpperBufferStart().
Referenced by CheckGermanSentenceBreak(), CStrToCompare::CStrToCompare(), CGraphmatFile::DealExtensionsAndLocalFileNames(), CGraphmatFile::DealGermanDividedCompounds(), CGraphmatFile::DealOborotto(), CGraphmatFile::DealReferences(), HasDescr(), CConcIndexator::IndexTextOrHtmlFile(), and InitEnglishNameSlot().
string CUnitHolder::GetToken | ( | DWORD | LineNo | ) | const |
References CriticalTokenLength, GetTokenLength(), and GetUnits().
Referenced by EmptyLineBeforeGraph(), CGraphmatFile::GraphmatMain(), HasDescr(), CGraphmatFile::IsKey(), IsOneCloseQuotationMark(), IsOneOpenQuotationMark(), and IsQuestionOrExclamationMarks().
size_t CUnitHolder::GetTokensCount | ( | ) | const |
References GetUnits().
Referenced by HasDescr(), and CConcIndexator::IndexTextOrHtmlFile().
References GetUnits().
Referenced by CGraphmatFile::DealSentBreaker(), HasDescr(), and SetSentMarkers().
References GetUnits().
Referenced by CGraphmatFile::DealEnglishStyleFIO(), CGraphmatFile::DealExtensionsAndLocalFileNames(), CGraphmatFile::DealGermanDividedCompounds(), EmptyLineBeforeGraph(), CGraphmatFile::FindKeySequence(), GetToken(), CGraphmatFile::GraphmatMain(), HasDescr(), IsOneAlpha(), IsOneCloseQuotationMark(), IsOneFullStop(), IsOneOpenQuotationMark(), IsQuestionOrExclamationMarks(), and IsSentenceEndMark().
void CUnitHolder::DeleteDescr | ( | size_t | LineNo, |
Descriptors | d | ||
) |
References m_Units.
Referenced by CGraphmatFile::DealKeySequence(), HasDescr(), and CGraphmatFile::InitContextDescriptors().
void CUnitHolder::SetOborotNo | ( | size_t | LineNo, |
short | OborotNo | ||
) |
References m_FoundOborots, and m_Units.
Referenced by CGraphmatFile::DealOborotto(), and HasDescr().
short CUnitHolder::GetOborotNo | ( | size_t | LineNo | ) | const |
References m_FoundOborots, and m_Units.
Referenced by CGraphmatFile::GetGraphematicalLine(), and HasDescr().
void CUnitHolder::SetPageNumber | ( | size_t | LineNo, |
DWORD | PageNumber | ||
) |
References m_FoundPageBreaks, m_Units, and UnknownPageNumber.
Referenced by CGraphmatFile::GraphmatMain(), and HasDescr().
DWORD CUnitHolder::GetPageNumber | ( | size_t | LineNo | ) | const |
References m_FoundPageBreaks, m_Units, and UnknownPageNumber.
Referenced by CGraphmatFile::GetGraphematicalLine(), HasDescr(), and CConcIndexator::IndexTextOrHtmlFile().
|
private |
Referenced by AddUnit(), AreGrouped(), BSoft(), BSpace(), BuildUnitBufferUpper(), DeleteDescr(), EmptyLineBeforeGraph(), FreeTable(), GetOborotNo(), GetPageNumber(), GetUnit(), GetUnits(), HasAbbreviation(), HasGrouped(), InitTokenBuffer(), IsHyphen(), IsOneAlpha(), IsOneChar(), IsOneCloseQuotationMark(), IsOneFullStop(), IsOneOpenQuotationMark(), IsQuestionOrExclamationMarks(), IsSentenceEndMark(), PassSpace(), PPunctOrSoft(), PSoft(), SetDes(), SetOborotNo(), SetPageNumber(), and SetState().
|
private |
Referenced by FreeTable(), GetUnitBuf(), and InitTokenBuffer().
|
private |
Referenced by BuildUnitBufferUpper(), and GetUnitUpperBufferStart().
|
private |
Referenced by ClearInputBuffer(), GetInputBuffer(), InitInputBuffer(), and InitTokenBuffer().
|
private |
Referenced by FreeTable(), GetOborotNo(), and SetOborotNo().
|
private |
Referenced by FreeTable(), GetPageNumber(), and SetPageNumber().
MorphLanguageEnum CUnitHolder::m_Language |
Referenced by BuildUnitBufferUpper(), CStrToCompare::CStrToCompare(), CUnitHolder(), CGraphmatFile::DealSentBreaker(), CGraphmatFile::FindKeySequence(), CGraphmatFile::InitContextDescriptors(), CConcIndexator::InitGraphan(), CGraphmatFile::InitNonContextDescriptors(), is_latin_alpha(), is_lowercase(), is_uppercase(), CGraphmatFile::IsKey(), CGraLine::LengthUntilDelimiters(), and CGraphmatFile::LoadDicts().