#include <GraphmatFile.h>
CGraphmatFile::CGraphmatFile | ( | ) |
References m_bConvertRussianJo2Je, m_bEmptyLineIsSentenceDelim, m_bFilterUnprintableSymbols, m_bForceToRus, m_bMacSynHierarchy, m_bRecognizeShortFIOs, m_bSentBreaker, m_bSentenceAsParagraph, m_bSubdueWrongMacroSynUnitToMainRoot, m_bUseIndention, m_bUseParagraphTagToDivide, m_MaxParOfs, m_MaxSentenceLength, m_MaxUpLen, m_MinParOfs, m_pDicts, m_TabSize, and morphUnknown.
CGraphmatFile::~CGraphmatFile | ( | ) |
bool CGraphmatFile::IsKey | ( | size_t | LB, | |
size_t | HB, | |||
size_t & | GraLast | |||
) | const [private] |
References FindKeySequence(), CUnitHolder::GetToken(), CUnitHolder::GetUnits(), CUnitHolder::IsOneAlpha(), CGraphanDicts::m_Keys, CUnitHolder::m_Language, m_pDicts, and ReverseChar().
Referenced by DealModifierKey(), and DealSimpleKey().
bool CGraphmatFile::FindKeySequence | ( | const char * | title, | |
size_t | i, | |||
size_t | HB, | |||
size_t & | GraLast | |||
) | const [private] |
References CUnitHolder::BSoft(), CUnitHolder::GetTokenLength(), CUnitHolder::GetUnits(), CUnitHolder::HasDescr(), IsSuperEqualChar(), CUnitHolder::m_Language, OPun, and CUnitHolder::PSoft().
Referenced by IsKey().
bool CGraphmatFile::DealBullet | ( | size_t | i, | |
size_t | HB | |||
) | [private] |
References CUnitHolder::BSpace(), CUnitHolder::GetUnits(), CUnitHolder::HasDescr(), CUnitHolder::IsBulletWord(), CUnitHolder::IsOneFullStop(), OBullet, OCls, OOpn, OPar, OPun, CUnitHolder::PassSpace(), CUnitHolder::SetDes(), CUnitHolder::SetState(), and stGrouped.
Referenced by InitContextDescriptors().
bool CGraphmatFile::DealAsteriskBullet | ( | size_t | LB, | |
size_t | HB | |||
) | [private] |
References CUnitHolder::BSpace(), CUnitHolder::GetUnits(), CUnitHolder::HasDescr(), CAsteriskHyp::LineNo, MaxBulletSectionSize, OBullet, OPar, OPun, CUnitHolder::PassSpace(), CUnitHolder::SetDes(), and CAsteriskHyp::UnitNo.
Referenced by InitContextDescriptors().
int CGraphmatFile::DealBulletsWithTwoBrackets | ( | size_t | StartPos, | |
size_t | EndPos | |||
) | [private] |
References CUnitHolder::BSpace(), CUnitHolder::GetUnits(), CUnitHolder::HasDescr(), CUnitHolder::HasGrouped(), CUnitHolder::IsBulletWord(), OBullet, OCls, OOpn, OPar, CUnitHolder::PassSpace(), CUnitHolder::SetDes(), CUnitHolder::SetState(), and stGrouped.
Referenced by InitContextDescriptors().
bool CGraphmatFile::DealEnglishStyleFIO | ( | size_t | StartPos, | |
size_t | EndPos | |||
) | [private] |
References CUnitHolder::GetTokenLength(), CUnitHolder::GetUnits(), CUnitHolder::HasDescr(), CUnitHolder::HasGrouped(), CUnitHolder::IsOneFullStop(), OFAM1, OFAM2, OLLE, CUnitHolder::PSoft(), CUnitHolder::SetDes(), CUnitHolder::SetState(), and stGrouped.
Referenced by InitContextDescriptors().
bool CGraphmatFile::DealAbbrev | ( | size_t | StartPos, | |
size_t | EndPos | |||
) | [private] |
References abbrev_lower_bound(), AbbrevIsEqualToString(), abUpperCase, CUnitHolder::GetUnits(), CUnitHolder::HasDescr(), CGraphanDicts::m_Abbrevs, m_pDicts, CStrToCompare::m_Str, CStrToCompare::m_StrLen, NumberPlace, OAbbr1, OAbbr2, ODigits, CUnitHolder::PSoft(), CUnitHolder::SetDes(), CUnitHolder::SetState(), and stAbbreviation.
Referenced by InitContextDescriptors().
size_t CGraphmatFile::FindOborotto | ( | size_t | i, | |
size_t | HB, | |||
short & | OborotNo, | |||
vector< WORD > & | OborortIds | |||
) | const [private] |
References CUnitHolder::GetUnits(), CGraphanDicts::m_Oborottos, CGraphanDicts::m_OborottosFirstWordIndex, m_pDicts, CGraphemOborot::m_TokenIds, and CUnitHolder::PSoft().
Referenced by DealOborotto().
void CGraphmatFile::DealOborotto | ( | size_t | HB | ) | [private] |
References FindOborotto(), CUnitHolder::GetUnits(), CUnitHolder::GetUppercaseToken(), CGraphanDicts::m_OborotTokens, m_pDicts, OEXPR1, OEXPR2, CUnitHolder::SetDes(), CUnitHolder::SetOborotNo(), CUnitHolder::SetState(), and stGrouped.
Referenced by InitContextDescriptors().
int CGraphmatFile::DealReferences | ( | size_t | i, | |
size_t | HB | |||
) | [private] |
References CUnitHolder::GetUnits(), CUnitHolder::GetUppercaseToken(), CUnitHolder::IsOneFullStop(), ORef1, ORef2, CUnitHolder::PassSpace(), CUnitHolder::SetDes(), CUnitHolder::SetState(), and stGrouped.
Referenced by InitContextDescriptors().
int CGraphmatFile::DealStop | ( | size_t | i | ) | [private] |
void CGraphmatFile::DealModifierKey | ( | size_t | LB, | |
size_t | HB | |||
) | [private] |
References CUnitHolder::GetUnits(), CUnitHolder::HasDescr(), CUnitHolder::HasGrouped(), IsKey(), OHyp, OKey1, OKey2, CUnitHolder::SetDes(), CUnitHolder::SetState(), and stGrouped.
Referenced by InitContextDescriptors().
void CGraphmatFile::DealSimpleKey | ( | size_t | LB, | |
size_t | HB | |||
) | [private] |
References CUnitHolder::HasGrouped(), IsKey(), CUnitHolder::IsOneAlpha(), OKey1, OKey2, CUnitHolder::SetDes(), CUnitHolder::SetState(), and stGrouped.
Referenced by InitContextDescriptors().
void CGraphmatFile::DealKeySequence | ( | size_t | LB, | |
size_t | HB | |||
) | [private] |
References CUnitHolder::BSoft(), CUnitHolder::DeleteDescr(), CUnitHolder::GetUnits(), CUnitHolder::HasDescr(), OKey1, OKey2, CUnitHolder::SetDes(), CUnitHolder::SetState(), and stGrouped.
Referenced by InitContextDescriptors().
void CGraphmatFile::DealQuotedKeySequence | ( | size_t | LB, | |
size_t | HB | |||
) | [private] |
References CUnitHolder::DeleteDescr(), CUnitHolder::HasDescr(), CUnitHolder::IsOneCloseQuotationMark(), CUnitHolder::IsOneOpenQuotationMark(), OKey1, OKey2, CUnitHolder::SetDes(), CUnitHolder::SetState(), and stGrouped.
void CGraphmatFile::DealGermanDividedCompounds | ( | size_t | LB, | |
size_t | HB | |||
) | [private] |
References CUnitHolder::GetTokenLength(), CUnitHolder::GetUnits(), CUnitHolder::GetUppercaseToken(), CUnitHolder::HasDescr(), CUnitHolder::IsOneChar(), OGerDivComp1, OGerDivComp2, OHyp, OLLE, CUnitHolder::PSoft(), CUnitHolder::SetDes(), CUnitHolder::SetState(), and stGrouped.
Referenced by InitContextDescriptors().
void CGraphmatFile::DealExtensionsAndLocalFileNames | ( | size_t | LB, | |
size_t | HB | |||
) | [private] |
References CanBeFileName(), CUnitHolder::GetTokenLength(), CUnitHolder::GetUnits(), CUnitHolder::GetUppercaseToken(), CUnitHolder::HasAbbreviation(), CUnitHolder::HasGrouped(), is_english_alpha(), CGraphanDicts::IsExtension(), CUnitHolder::IsOneFullStop(), m_pDicts, OFile1, OFile2, CUnitHolder::SetDes(), CUnitHolder::SetState(), and stGrouped.
Referenced by InitContextDescriptors().
int CGraphmatFile::HasIndention | ( | size_t | LB, | |
size_t | HB | |||
) | [private] |
References CUnitHolder::HasDescr(), and OPar.
Referenced by DealFIO(), and DealShortFIO().
int CGraphmatFile::CountEndL | ( | size_t | LB, | |
size_t | HB | |||
) | [private] |
References CUnitHolder::GetUnits().
Referenced by DealFIO(), and DealShortFIO().
int CGraphmatFile::CountSpaces | ( | size_t | LB, | |
size_t | HB | |||
) | [private] |
References CUnitHolder::GetUnits().
Referenced by DealFIO(), and DealShortFIO().
int CGraphmatFile::DealFIO | ( | size_t | i, | |
size_t | HB | |||
) | [private] |
References CUnitHolder::BSoft(), CanBeRussianInitial(), CountEndL(), CountSpaces(), CUnitHolder::FirstUpper(), CUnitHolder::GetUnits(), CUnitHolder::HasDescr(), HasIndention(), CUnitHolder::IsOneFullStop(), OEXPR1, OEXPR2, OFAM1, OFAM2, CUnitHolder::PSoft(), CUnitHolder::SetDes(), CUnitHolder::SetState(), and stGrouped.
Referenced by InitContextDescriptors().
int CGraphmatFile::DealShortFIO | ( | size_t | i, | |
size_t | HB | |||
) | [private] |
References CUnitHolder::BSoft(), CanBeRussianInitial(), CountEndL(), CountSpaces(), CUnitHolder::FirstUpper(), CUnitHolder::GetUnits(), CUnitHolder::HasDescr(), HasIndention(), CUnitHolder::IsOneFullStop(), OEXPR1, OEXPR2, OFAM1, OFAM2, CUnitHolder::PSoft(), CUnitHolder::SetDes(), CUnitHolder::SetState(), and stGrouped.
Referenced by InitContextDescriptors().
void CGraphmatFile::DealNames | ( | size_t | LB, | |
size_t | HB | |||
) | [private] |
References CUnitHolder::GetUnits(), CUnitHolder::HasDescr(), ONam, OSentEnd, OUpLw, and CUnitHolder::SetDes().
Referenced by InitContextDescriptors().
bool CGraphmatFile::DealSentBreaker | ( | ) | [private] |
References CUnitHolder::BSoft(), CheckComma(), CheckGermanSentenceBreak(), dual_bracket(), FindSentEndAfterParagraph(), CUnitHolder::GetTokenInputOffset(), CUnitHolder::GetUnits(), CUnitHolder::HasDescr(), IsLastInGroupOrFree(), CUnitHolder::IsOneCloseQuotationMark(), CUnitHolder::IsSentenceEndMark(), m_bSentenceAsParagraph, CUnitHolder::m_Language, m_MaxSentenceLength, morphGerman, OBullet, OLLE, OOpn, OPar, ORLE, OSentEnd, OUp, OUpLw, CUnitHolder::PassSpace(), CUnitHolder::PPunct(), CUnitHolder::PPunctOrSoft(), CUnitHolder::PSoft(), and SetSentMarkers().
Referenced by GraphmatMain().
void CGraphmatFile::InitNonContextDescriptors | ( | CGraLine & | L | ) | [private] |
References cHyphenChar, force_to_rus(), CGraLine::GetDescriptors(), CGraLine::GetToken(), CGraLine::GetTokenLength(), CUnitHolder::is_latin_alpha(), CUnitHolder::is_lowercase(), is_russian_alpha(), CUnitHolder::is_uppercase(), isbracket(), CGraLine::IsElectronicAddress(), CGraLine::IsEOLN(), CGraLine::IsIdent(), CGraLine::IsNotPrint(), CGraLine::IsParagraphChar(), CGraLine::IsPunct(), CGraLine::IsSpace(), m_bForceToRus, CUnitHolder::m_Language, morphGerman, morphRussian, Nu, OCls, ODel, ODigits, OElectAddr, OEOLN, OHyp, OLLE, OLw, ONil, ONumChar, OOpn, OParagraph, OPlu, OPun, ORLE, OSpc, OUnk, OUp, OUpLw, CGraLine::SetDes(), and szlig.
Referenced by GraphmatMain().
bool CGraphmatFile::GraphmatMain | ( | ) | [private] |
References CUnitHolder::AddUnit(), CUnitHolder::BuildUnitBufferUpper(), CUnitHolder::ClearInputBuffer(), ConvertJO2Je(), DealSentBreaker(), CUnitHolder::GetInputBuffer(), CUnitHolder::GetToken(), CUnitHolder::GetTokenLength(), CGraLine::GetTokenLength(), CUnitHolder::GetUnit(), CUnitHolder::GetUnitBufferStart(), CUnitHolder::GetUnits(), CUnitHolder::HasDescr(), InitContextDescriptors(), InitNonContextDescriptors(), CUnitHolder::InitTokenBuffer(), CGraLine::IsPageBreak(), CGraLine::IsSingleSpaceToDelete(), m_bConvertRussianJo2Je, m_bSentBreaker, m_GraOutputFile, m_LastError, MacSynHierarchy(), ORLE, CGraLine::ReadWord(), CUnitHolder::SetPageNumber(), CGraLine::SetSingleSpaceAfter(), CGraLine::SetToken(), and WriteGraphMat().
Referenced by LoadFileToGraphan(), and LoadStringToGraphan().
int CGraphmatFile::InitContextDescriptors | ( | size_t | LB, | |
size_t | HB | |||
) | [private] |
References BigTextLengthInFilledLines, CalculateLMarg(), DealAbbrev(), DealAsteriskBullet(), DealBullet(), DealBulletsWithTwoBrackets(), DealEnglishStyleFIO(), DealExtensionsAndLocalFileNames(), DealFIO(), DealGermanDividedCompounds(), DealIndention(), DealKeySequence(), DealModifierKey(), DealNames(), DealOborotto(), DealReferences(), DealShortFIO(), DealSimpleEnglishNames(), DealSimpleKey(), CUnitHolder::DeleteDescr(), CGraLine::GetInputOffset(), CUnitHolder::GetUnit(), CUnitHolder::GetUnits(), CUnitHolder::HasDescr(), InitEnglishNameSlot(), m_bRecognizeShortFIOs, m_bUseIndention, CUnitHolder::m_Language, m_LastError, MapCorrectMinSpace(), morphGerman, OBullet, OPar, and CUnitHolder::PassSpace().
Referenced by GraphmatMain().
void CGraphmatFile::MacSynHierarchy | ( | ) | [private] |
References FindExplanatory(), FindHeadings(), FindParents(), FindSimilarHeadings(), CUnitHolder::GetUnits(), InitDoc(), m_bMacSynHierarchy, m_XmlMacSynOutputFile, CUnitHolder::PSoft(), RecognizeCS(), SetDepthOfBullet(), CUnitHolder::SetDes(), SetMacroSyntDependcies(), and WriteXmlMacSyn().
Referenced by GraphmatMain().
bool CGraphmatFile::LoadDicts | ( | ) |
References CGraphanDicts::BuildOborottos(), GetRegistryString(), CDictionary::Load(), _share_pointer_t< T >::m_bOwnPointer, CUnitHolder::m_Language, m_LastError, m_pDicts, CGraphanDicts::m_pOborDic, _share_pointer_t< T >::m_Pointer, CExpc::m_strCause, morphGerman, morphUnknown, CGraphanDicts::ReadAbbrevations(), CGraphanDicts::ReadENames(), CGraphanDicts::ReadExtensions(), CGraphanDicts::ReadIdents(), CGraphanDicts::ReadKeyboard(), CGraphanDicts::ReadSpaces(), and _share_pointer_t< T >::SetPointer().
Referenced by CConcIndexatorInvoker::BuildIndex(), and CMorphologyHolder::LoadGraphanAndLemmatizer().
bool CGraphmatFile::LoadStringToGraphan | ( | const string & | szBuffer | ) |
References Format(), GraphmatMain(), CUnitHolder::InitInputBuffer(), m_GraOutputFile, m_LastError, CExpc::m_strCause, and m_XmlMacSynOutputFile.
Referenced by CMorphologyHolder::GetMorphology(), CConcIndexator::LoadFileIntoGraphan(), and CConcIndexator::LoadXmlFile().
bool CGraphmatFile::LoadFileToGraphan | ( | const string & | CommandLine | ) |
References Format(), HTML::GetTextFromHtmlFile(), GraphmatMain(), CUnitHolder::InitInputBuffer(), IsHtmlFile(), LoadFileToString(), m_LastError, m_SourceFileName, and CExpc::m_strCause.
Referenced by CMorphologyHolder::GetMorphology().
void CGraphmatFile::FreeDicts | ( | ) |
References m_pDicts.
Referenced by CMorphologyHolder::LoadGraphanAndLemmatizer().
const string & CGraphmatFile::GetLastError | ( | ) | const |
References m_LastError.
Referenced by CConcIndexatorInvoker::BuildIndex(), CConcIndexator::LoadFileIntoGraphan(), and CConcIndexator::LoadXmlFile().
void CGraphmatFile::GetGraphematicalLine | ( | char * | line, | |
size_t | NumLine | |||
) | const |
References _QM, CGraLine::GetDescriptors(), GetDescriptorStr(), CGraLine::GetInputOffset(), CUnitHolder::GetOborotNo(), CUnitHolder::GetPageNumber(), CGraLine::GetToken(), CGraLine::GetTokenLength(), CUnitHolder::GetUnits(), IntToStr(), CGraLine::IsNotPrint(), CGraLine::IsPageBreak(), CGraLine::IsParagraphTag(), CGraLine::IsSoft(), CGraphanDicts::m_Oborottos, and m_pDicts.
Referenced by CPlmLineCollection::ProcessPlmLines(), and WriteGraphMat().
MorphLanguageEnum CGraphmatFile::GetTokenLanguage | ( | int | LineNo | ) | const |
References CUnitHolder::HasDescr(), CUnitHolder::m_Language, morphGerman, OLLE, and ORLE.
Referenced by CMorphologyHolder::GetMorphology(), CLemmatizer::ProcessHyphenWords(), and CPlmLineCollection::ProcessPlmLines().
bool CGraphmatFile::StartsFixedOborot | ( | size_t | LineNo | ) | const |
References CUnitHolder::GetOborotNo(), CGraphanDicts::m_Oborottos, and m_pDicts.
Referenced by CLemmatizer::ProcessHyphenWords(), and CPlmLineCollection::ProcessPlmLines().
const CDictionary * CGraphmatFile::GetOborDic | ( | ) | const |
References CGraphanDicts::GetOborDic(), and m_pDicts.
Referenced by CSyntaxHolder::LoadSyntax().
void CGraphmatFile::WriteGraphMat | ( | const char * | FName | ) | const |
References CriticalGraphemLineLength, GetGraphematicalLine(), and CUnitHolder::GetUnits().
Referenced by GraphmatMain().
string CGraphmatFile::m_SourceFileName [private] |
Referenced by LoadFileToGraphan().
string CGraphmatFile::m_LastError [private] |
Referenced by GetLastError(), GraphmatMain(), InitContextDescriptors(), LoadDicts(), LoadFileToGraphan(), and LoadStringToGraphan().
Referenced by CGraphmatFile(), DealAbbrev(), DealExtensionsAndLocalFileNames(), DealOborotto(), FindOborotto(), FreeDicts(), GetGraphematicalLine(), GetOborDic(), InitEnglishNameSlot(), IsKey(), CGraLine::LengthUntilDelimiters(), LoadDicts(), CGraLine::ReadWord(), StartsFixedOborot(), and ~CGraphmatFile().
Referenced by CGraphmatFile(), GraphmatMain(), and CConcIndexator::InitGraphanProperties().
size_t CGraphmatFile::m_MinParOfs |
Referenced by CGraphmatFile(), and DealIndention().
size_t CGraphmatFile::m_MaxParOfs |
Referenced by CGraphmatFile(), and DealIndention().
size_t CGraphmatFile::m_TabSize |
Referenced by CGraphmatFile(), and CGraLine::ReadWord().
size_t CGraphmatFile::m_MaxUpLen |
Referenced by CGraphmatFile().
Referenced by GraphmatMain(), and LoadStringToGraphan().
Referenced by LoadStringToGraphan(), and MacSynHierarchy().
Referenced by CGraphmatFile(), and GraphmatMain().
Referenced by CGraphmatFile(), and MacSynHierarchy().
Referenced by CGraphmatFile(), and SetMacroSyntDependcies().
Referenced by CGraphmatFile(), and DealSentBreaker().
Referenced by CGraphmatFile(), and InitNonContextDescriptors().
Referenced by CGraphmatFile(), CConcIndexator::InitGraphanProperties(), and RubiconText().
Referenced by CGraphmatFile(), CConcIndexator::InitGraphanProperties(), and CGraLine::ReadWord().
Referenced by CGraphmatFile(), InitContextDescriptors(), and CConcIndexator::InitGraphanProperties().
Referenced by CGraphmatFile(), CConcIndexator::InitGraphanProperties(), and CGraLine::ReadWord().
Referenced by CGraphmatFile(), and InitContextDescriptors().
Referenced by CGraphmatFile(), and DealSentBreaker().