ddc
|
#include <GraphmatFile.h>
Public Member Functions | |
CGraphmatFile () | |
~CGraphmatFile () | |
bool | LoadDicts () |
bool | LoadStringToGraphan (const string &szBuffer) |
const string & | GetLastError () const |
void | GetGraphematicalLine (char *line, size_t NumLine) const |
void | WriteGraphMat (const char *FName) const |
Public Member Functions inherited from CUnitHolder | |
const vector< CGraLine > & | GetUnits () const |
CGraLine & | GetUnit (size_t UnitNo) |
const vector< char > & | GetUnitBuf () const |
const vector< BYTE > & | GetInputBuffer () const |
CUnitHolder () | |
void | SetState (size_t LB, size_t HB, WORD state) |
void | SetDes (size_t x, Descriptors des) |
bool | HasDescr (size_t i, int descr) const |
bool | AreGrouped (size_t LB, size_t HB) const |
bool | HasGrouped (size_t LB, size_t HB) const |
bool | HasAbbreviation (size_t LB, size_t HB) const |
size_t | PassSpace (size_t i, size_t HB) const |
size_t | PPunctOrSoft (size_t i, size_t HB) const |
size_t | PPunct (size_t i, size_t HB) const |
size_t | BSpace (size_t i, size_t LB=0) const |
size_t | PSoft (size_t i, size_t HB) const |
size_t | BSoft (size_t i) const |
bool | IsHyphen (size_t x) const |
bool | is_latin_alpha (int ch) const |
bool | is_lowercase (int ch) const |
bool | is_uppercase (int ch) const |
bool | IsOneAlpha (size_t x) const |
bool | IsOneChar (size_t x, int i) const |
bool | IsOneULet (size_t x) const |
bool | FirstUpper (size_t x) const |
bool | IsBulletWord (size_t x) const |
bool | IsOneFullStop (size_t i) const |
bool | EmptyLineBeforeGraph (size_t i, size_t HB) const |
bool | IsQuestionOrExclamationMarks (size_t i) const |
bool | IsSentenceEndMark (size_t i) const |
bool | IsOneCloseQuotationMark (size_t i) const |
bool | IsOneOpenQuotationMark (size_t i) const |
void | FreeTable () |
void | BuildUnitBufferUpper () |
void | InitTokenBuffer () |
bool | InitInputBuffer (const string &S) |
void | ClearInputBuffer () |
void | AddUnit (const CGraLine &NewLine) |
const char * | GetUnitBufferStart () const |
const char * | GetUnitUpperBufferStart () const |
const char * | GetUppercaseToken (DWORD LineNo) const |
string | GetToken (DWORD LineNo) const |
size_t | GetTokensCount () const |
DWORD | GetTokenInputOffset (DWORD LineNo) const |
BYTE | GetTokenLength (DWORD LineNo) const |
void | DeleteDescr (size_t LineNo, Descriptors d) |
void | SetOborotNo (size_t LineNo, short OborotNo) |
short | GetOborotNo (size_t LineNo) const |
void | SetPageNumber (size_t LineNo, DWORD PageNumber) |
DWORD | GetPageNumber (size_t LineNo) const |
Public Attributes | |
const CGraphanDicts * | m_pDicts |
bool | m_bConvertRussianJo2Je |
size_t | m_MinParOfs |
size_t | m_MaxParOfs |
size_t | m_TabSize |
string | m_GraOutputFile |
bool | m_bSentBreaker |
bool | m_bForceToRus |
bool | m_bEmptyLineIsSentenceDelim |
bool | m_bUseParagraphTagToDivide |
bool | m_bUseIndention |
bool | m_bFilterUnprintableSymbols |
bool | m_bRecognizeShortFIOs |
size_t | m_MaxSentenceLength |
Public Attributes inherited from CUnitHolder | |
MorphLanguageEnum | m_Language |
Private Member Functions | |
bool | IsKey (size_t LB, size_t HB, size_t &GraLast) const |
bool | FindKeySequence (const char *title, size_t i, size_t HB, size_t &GraLast) const |
bool | DealBullet (size_t i, size_t HB) |
bool | DealAsteriskBullet (size_t LB, size_t HB) |
int | DealBulletsWithTwoBrackets (size_t StartPos, size_t EndPos) |
bool | DealEnglishStyleFIO (size_t StartPos, size_t EndPos) |
bool | DealAbbrev (size_t StartPos, size_t EndPos) |
size_t | FindOborotto (size_t i, size_t HB, short &OborotNo, vector< WORD > &OborortIds) const |
void | DealOborotto (size_t HB) |
int | DealReferences (size_t i, size_t HB) |
void | DealModifierKey (size_t LB, size_t HB) |
void | DealSimpleKey (size_t LB, size_t HB) |
void | DealKeySequence (size_t LB, size_t HB) |
void | DealGermanDividedCompounds (size_t LB, size_t HB) |
void | DealExtensionsAndLocalFileNames (size_t LB, size_t HB) |
int | HasIndention (size_t LB, size_t HB) |
int | CountEndL (size_t LB, size_t HB) |
int | CountSpaces (size_t LB, size_t HB) |
int | DealFIO (size_t i, size_t HB) |
int | DealShortFIO (size_t i, size_t HB) |
void | DealNames (size_t LB, size_t HB) |
bool | DealSentBreaker () |
void | InitNonContextDescriptors (CGraLine &L) |
bool | GraphmatMain () |
int | InitContextDescriptors (size_t LB, size_t HB) |
void | MacSynHierarchy () |
Private Attributes | |
string | m_LastError |
CGraphmatFile::CGraphmatFile | ( | ) |
CGraphmatFile::~CGraphmatFile | ( | ) |
|
private |
References FindKeySequence(), CUnitHolder::GetToken(), CUnitHolder::GetUnits(), CUnitHolder::IsOneAlpha(), CGraphanDicts::m_Keys, CUnitHolder::m_Language, m_pDicts, and ReverseChar().
Referenced by DealModifierKey(), and DealSimpleKey().
|
private |
References CUnitHolder::BSoft(), CUnitHolder::GetTokenLength(), CUnitHolder::GetUnits(), CUnitHolder::HasDescr(), IsSuperEqualChar(), CUnitHolder::m_Language, OPun, and CUnitHolder::PSoft().
Referenced by IsKey().
|
private |
References CUnitHolder::BSpace(), CUnitHolder::GetUnits(), CUnitHolder::HasDescr(), CUnitHolder::IsBulletWord(), CUnitHolder::IsOneFullStop(), OBullet, OCls, OOpn, OPar, OPun, CUnitHolder::PassSpace(), CUnitHolder::SetDes(), CUnitHolder::SetState(), and stGrouped.
Referenced by InitContextDescriptors().
|
private |
References CUnitHolder::BSpace(), CUnitHolder::GetUnits(), CUnitHolder::HasDescr(), CAsteriskHyp::LineNo, MaxBulletSectionSize, OBullet, OPar, OPun, CUnitHolder::PassSpace(), CUnitHolder::SetDes(), and CAsteriskHyp::UnitNo.
Referenced by InitContextDescriptors().
|
private |
References CUnitHolder::BSpace(), CUnitHolder::GetUnits(), CUnitHolder::HasDescr(), CUnitHolder::HasGrouped(), CUnitHolder::IsBulletWord(), OBullet, OCls, OOpn, OPar, CUnitHolder::PassSpace(), CUnitHolder::SetDes(), CUnitHolder::SetState(), and stGrouped.
Referenced by InitContextDescriptors().
|
private |
References CUnitHolder::GetTokenLength(), CUnitHolder::GetUnits(), CUnitHolder::HasDescr(), CUnitHolder::HasGrouped(), CUnitHolder::IsOneFullStop(), OFAM1, OFAM2, OLLE, CUnitHolder::PSoft(), CUnitHolder::SetDes(), CUnitHolder::SetState(), and stGrouped.
Referenced by InitContextDescriptors().
|
private |
References abbrev_lower_bound(), AbbrevIsEqualToString(), abUpperCase, CStrToCompare::CStrToCompare(), CStrToCompare::m_Str, CStrToCompare::m_StrLen, NumberPlace, OAbbr1, OAbbr2, ODigits, and stAbbreviation.
Referenced by InitContextDescriptors().
|
private |
References CUnitHolder::GetUnits(), CGraphanDicts::m_Oborottos, CGraphanDicts::m_OborottosFirstWordIndex, m_pDicts, CGraphemOborot::m_TokenIds, and CUnitHolder::PSoft().
Referenced by DealOborotto().
|
private |
References FindOborotto(), CUnitHolder::GetUnits(), CUnitHolder::GetUppercaseToken(), CGraphanDicts::m_OborotTokens, m_pDicts, OEXPR1, OEXPR2, CUnitHolder::SetDes(), CUnitHolder::SetOborotNo(), CUnitHolder::SetState(), and stGrouped.
Referenced by InitContextDescriptors().
|
private |
References CUnitHolder::GetUnits(), CUnitHolder::GetUppercaseToken(), CUnitHolder::IsOneFullStop(), ORef1, ORef2, CUnitHolder::PassSpace(), CUnitHolder::SetDes(), CUnitHolder::SetState(), and stGrouped.
Referenced by InitContextDescriptors().
|
private |
References CUnitHolder::GetUnits(), CUnitHolder::HasDescr(), CUnitHolder::HasGrouped(), IsKey(), OHyp, OKey1, OKey2, CUnitHolder::SetDes(), CUnitHolder::SetState(), and stGrouped.
Referenced by InitContextDescriptors().
|
private |
References CUnitHolder::HasGrouped(), IsKey(), CUnitHolder::IsOneAlpha(), OKey1, OKey2, CUnitHolder::SetDes(), CUnitHolder::SetState(), and stGrouped.
Referenced by InitContextDescriptors().
|
private |
References CUnitHolder::BSoft(), CUnitHolder::DeleteDescr(), CUnitHolder::GetUnits(), CUnitHolder::HasDescr(), OKey1, OKey2, CUnitHolder::SetDes(), CUnitHolder::SetState(), and stGrouped.
Referenced by InitContextDescriptors().
|
private |
References CUnitHolder::GetTokenLength(), CUnitHolder::GetUnits(), CUnitHolder::GetUppercaseToken(), CUnitHolder::HasDescr(), CUnitHolder::IsOneChar(), OGerDivComp1, OGerDivComp2, OHyp, OLLE, CUnitHolder::PSoft(), CUnitHolder::SetDes(), CUnitHolder::SetState(), and stGrouped.
Referenced by InitContextDescriptors().
|
private |
References CanBeFileName(), CUnitHolder::GetTokenLength(), CUnitHolder::GetUnits(), CUnitHolder::GetUppercaseToken(), CUnitHolder::HasAbbreviation(), CUnitHolder::HasGrouped(), is_english_alpha(), CGraphanDicts::IsExtension(), CUnitHolder::IsOneFullStop(), m_pDicts, OFile1, OFile2, CUnitHolder::SetDes(), CUnitHolder::SetState(), and stGrouped.
Referenced by InitContextDescriptors().
|
private |
References CUnitHolder::HasDescr(), and OPar.
Referenced by DealFIO(), and DealShortFIO().
|
private |
References CUnitHolder::GetUnits().
Referenced by DealFIO(), and DealShortFIO().
|
private |
References CUnitHolder::GetUnits().
Referenced by DealFIO(), and DealShortFIO().
|
private |
References CUnitHolder::BSoft(), CanBeRussianInitial(), CountEndL(), CountSpaces(), CUnitHolder::FirstUpper(), CUnitHolder::GetUnits(), CUnitHolder::HasDescr(), HasIndention(), CUnitHolder::IsOneFullStop(), OEXPR1, OEXPR2, OFAM1, OFAM2, CUnitHolder::PSoft(), CUnitHolder::SetDes(), CUnitHolder::SetState(), and stGrouped.
Referenced by InitContextDescriptors().
|
private |
References CUnitHolder::BSoft(), CanBeRussianInitial(), CountEndL(), CountSpaces(), CUnitHolder::FirstUpper(), CUnitHolder::GetUnits(), CUnitHolder::HasDescr(), HasIndention(), CUnitHolder::IsOneFullStop(), OEXPR1, OEXPR2, OFAM1, OFAM2, CUnitHolder::PSoft(), CUnitHolder::SetDes(), CUnitHolder::SetState(), and stGrouped.
Referenced by InitContextDescriptors().
|
private |
References CUnitHolder::GetUnits(), CUnitHolder::HasDescr(), ONam, OSentEnd, OUpLw, and CUnitHolder::SetDes().
Referenced by InitContextDescriptors().
|
private |
References CUnitHolder::BSoft(), CheckComma(), CheckGermanSentenceBreak(), dual_bracket(), FindSentEndAfterParagraph(), CUnitHolder::GetTokenInputOffset(), CUnitHolder::GetUnits(), CUnitHolder::HasDescr(), IsLastInGroupOrFree(), CUnitHolder::IsOneCloseQuotationMark(), CUnitHolder::IsSentenceEndMark(), CUnitHolder::m_Language, m_MaxSentenceLength, morphGerman, OBullet, OLLE, OOpn, OPar, ORLE, OSentEnd, OUp, OUpLw, CUnitHolder::PassSpace(), CUnitHolder::PPunct(), CUnitHolder::PPunctOrSoft(), CUnitHolder::PSoft(), and SetSentMarkers().
Referenced by GraphmatMain().
|
private |
References cHyphenChar, force_to_rus(), CGraLine::GetDescriptors(), CGraLine::GetToken(), CGraLine::GetTokenLength(), CUnitHolder::is_latin_alpha(), CUnitHolder::is_lowercase(), is_russian_alpha(), CUnitHolder::is_uppercase(), isbracket(), CGraLine::IsElectronicAddress(), CGraLine::IsEOLN(), CGraLine::IsIdent(), CGraLine::IsNotPrint(), CGraLine::IsParagraphChar(), CGraLine::IsPunct(), CGraLine::IsSpace(), m_bForceToRus, CUnitHolder::m_Language, morphGerman, morphRussian, Nu, OCls, ODel, ODigits, OElectAddr, OEOLN, OHyp, OLLE, OLw, ONil, ONumChar, OOpn, OParagraph, OPlu, OPun, ORLE, OSpc, OUnk, OUp, OUpLw, CGraLine::SetDes(), and szlig.
Referenced by GraphmatMain().
|
private |
References CUnitHolder::AddUnit(), CUnitHolder::BuildUnitBufferUpper(), CUnitHolder::ClearInputBuffer(), ConvertJO2Je(), DealSentBreaker(), CUnitHolder::GetInputBuffer(), CUnitHolder::GetToken(), CGraLine::GetTokenLength(), CUnitHolder::GetTokenLength(), CUnitHolder::GetUnit(), CUnitHolder::GetUnitBufferStart(), CUnitHolder::GetUnits(), CUnitHolder::HasDescr(), InitContextDescriptors(), InitNonContextDescriptors(), CUnitHolder::InitTokenBuffer(), CGraLine::IsPageBreak(), CGraLine::IsSingleSpaceToDelete(), m_bConvertRussianJo2Je, m_bSentBreaker, m_GraOutputFile, m_LastError, MacSynHierarchy(), ORLE, CGraLine::ReadWord(), CUnitHolder::SetPageNumber(), CGraLine::SetSingleSpaceAfter(), CGraLine::SetToken(), and WriteGraphMat().
Referenced by LoadStringToGraphan().
|
private |
References BigTextLengthInFilledLines, CalculateLMarg(), DealAbbrev(), DealAsteriskBullet(), DealBullet(), DealBulletsWithTwoBrackets(), DealEnglishStyleFIO(), DealExtensionsAndLocalFileNames(), DealFIO(), DealGermanDividedCompounds(), DealIndention(), DealKeySequence(), DealModifierKey(), DealNames(), DealOborotto(), DealReferences(), DealShortFIO(), DealSimpleEnglishNames(), DealSimpleKey(), CUnitHolder::DeleteDescr(), CGraLine::GetInputOffset(), CUnitHolder::GetUnit(), CUnitHolder::GetUnits(), CUnitHolder::HasDescr(), InitEnglishNameSlot(), m_bRecognizeShortFIOs, m_bUseIndention, CUnitHolder::m_Language, m_LastError, MapCorrectMinSpace(), morphGerman, OBullet, OPar, and CUnitHolder::PassSpace().
Referenced by GraphmatMain().
|
private |
References CUnitHolder::GetUnits(), InitDoc(), CUnitHolder::PSoft(), RecognizeCS(), and CUnitHolder::SetDes().
Referenced by GraphmatMain().
bool CGraphmatFile::LoadDicts | ( | ) |
References CGraphanDicts::BuildOborottos(), PtrHolder< T >::Get(), GetRegistryString(), CDictionary::Load(), CUnitHolder::m_Language, m_LastError, m_pDicts, CGraphanDicts::m_pOborDic, CExpc::m_strCause, morphGerman, morphUnknown, CGraphanDicts::ReadAbbrevations(), CGraphanDicts::ReadENames(), CGraphanDicts::ReadExtensions(), CGraphanDicts::ReadIdents(), CGraphanDicts::ReadKeyboard(), CGraphanDicts::ReadSpaces(), and PtrHolder< T >::Reset().
Referenced by CConcIndexator::InitGraphan().
bool CGraphmatFile::LoadStringToGraphan | ( | const string & | szBuffer | ) |
References Format(), GraphmatMain(), CUnitHolder::InitInputBuffer(), m_GraOutputFile, m_LastError, and CExpc::m_strCause.
Referenced by CConcIndexator::LoadFileIntoGraphan(), and CConcIndexator::LoadXmlFile().
const string & CGraphmatFile::GetLastError | ( | ) | const |
References m_LastError.
Referenced by CConcIndexator::InitGraphan(), CConcIndexator::LoadFileIntoGraphan(), and CConcIndexator::LoadXmlFile().
void CGraphmatFile::GetGraphematicalLine | ( | char * | line, |
size_t | NumLine | ||
) | const |
References _QM, CGraLine::GetDescriptors(), GetDescriptorStr(), CGraLine::GetInputOffset(), CUnitHolder::GetOborotNo(), CUnitHolder::GetPageNumber(), CGraLine::GetToken(), CGraLine::GetTokenLength(), CUnitHolder::GetUnits(), IntToStr(), CGraLine::IsNotPrint(), CGraLine::IsPageBreak(), CGraLine::IsParagraphTag(), CGraLine::IsSoft(), CGraphanDicts::m_Oborottos, m_pDicts, SEOLN, SSpace, and STab.
Referenced by WriteGraphMat().
void CGraphmatFile::WriteGraphMat | ( | const char * | FName | ) | const |
References CriticalGraphemLineLength, GetGraphematicalLine(), and CUnitHolder::GetUnits().
Referenced by GraphmatMain().
|
private |
Referenced by GetLastError(), GraphmatMain(), InitContextDescriptors(), LoadDicts(), and LoadStringToGraphan().
const CGraphanDicts* CGraphmatFile::m_pDicts |
bool CGraphmatFile::m_bConvertRussianJo2Je |
Referenced by CGraphmatFile(), GraphmatMain(), and CConcIndexator::InitGraphan().
size_t CGraphmatFile::m_MinParOfs |
Referenced by CGraphmatFile(), and DealIndention().
size_t CGraphmatFile::m_MaxParOfs |
Referenced by CGraphmatFile(), and DealIndention().
size_t CGraphmatFile::m_TabSize |
Referenced by CGraphmatFile(), and CGraLine::ReadWord().
string CGraphmatFile::m_GraOutputFile |
Referenced by GraphmatMain(), and LoadStringToGraphan().
bool CGraphmatFile::m_bSentBreaker |
Referenced by CGraphmatFile(), and GraphmatMain().
bool CGraphmatFile::m_bForceToRus |
Referenced by CGraphmatFile(), and InitNonContextDescriptors().
bool CGraphmatFile::m_bEmptyLineIsSentenceDelim |
Referenced by CGraphmatFile(), CConcIndexator::InitGraphan(), and RubiconText().
bool CGraphmatFile::m_bUseParagraphTagToDivide |
Referenced by CGraphmatFile(), CConcIndexator::InitGraphan(), and CGraLine::ReadWord().
bool CGraphmatFile::m_bUseIndention |
Referenced by CGraphmatFile(), InitContextDescriptors(), and CConcIndexator::InitGraphan().
bool CGraphmatFile::m_bFilterUnprintableSymbols |
Referenced by CGraphmatFile(), CConcIndexator::InitGraphan(), and CGraLine::ReadWord().
bool CGraphmatFile::m_bRecognizeShortFIOs |
Referenced by CGraphmatFile(), and InitContextDescriptors().
size_t CGraphmatFile::m_MaxSentenceLength |
Referenced by CGraphmatFile(), and DealSentBreaker().