ddc
|
#include <DocumentIterator.h>
Public Member Functions | |
bool | IsEmpty () const |
void | Initialize (const CConcIndexator *parent) |
void | ReplaceZeroCharWithSpace () |
void | NormalizeDocumentBuffer () |
void | DefaultColumnMap (const CConcIndexator *parent) |
void | CustomColumnMap (const CConcordance *parent, vector< CStringIndexSet *> &columnMap) |
Public Attributes | |
string | CorpusFileName |
vector< char > | DocumentBuffer |
vector< string > | DocumentLines |
size_t | DroppedLinesCount |
Public Attributes inherited from CTokenIndexator | |
vector< CStringIndexSet * > | ColumnMap |
vector< CStringIndexSet * > | AbsentIndices |
CTokenNo | CorpusEndTokenNo |
bool CIndexDocument::IsEmpty | ( | ) | const |
References DocumentBuffer, and DocumentLines.
Referenced by ConcIndexatorInvoker::IndexFile().
void CIndexDocument::Initialize | ( | const CConcIndexator * | parent | ) |
References DefaultColumnMap(), DocumentBuffer, DocumentLines, and DroppedLinesCount.
Referenced by CDocumentIterator::NextDocument().
void CIndexDocument::ReplaceZeroCharWithSpace | ( | ) |
void CIndexDocument::NormalizeDocumentBuffer | ( | ) |
References CorpusFileName, DocumentBuffer, HTML::GetTextFromHTMLBuffer(), IsHtmlFile(), and replaceZeroCharWithSpace().
Referenced by CDocumentIterator::NextDocument().
void CIndexDocument::DefaultColumnMap | ( | const CConcIndexator * | parent | ) |
References CTokenIndexator::AbsentIndices, CTokenIndexator::ColumnMap, and CStringIndexator::m_Indices.
Referenced by Initialize().
void CIndexDocument::CustomColumnMap | ( | const CConcordance * | parent, |
vector< CStringIndexSet *> & | columnMap | ||
) |
References CTokenIndexator::AbsentIndices, CTokenIndexator::ColumnMap, and CStringIndexator::m_Indices.
Referenced by CTabFormatIndexator::IndexTabFormat().
string CIndexDocument::CorpusFileName |
Referenced by ConcIndexatorInvoker::IndexFile(), CConcIndexator::IndexFreeIndex(), CConcIndexator::IndexMorphXml(), CTabFormatIndexator::IndexTabFormat(), CConcIndexator::IndexTextOrHtmlFile(), CDocumentIterator::NextDocument(), NormalizeDocumentBuffer(), and CDocumentIterator::ReadTabFormatDocument().
vector<char> CIndexDocument::DocumentBuffer |
vector<string> CIndexDocument::DocumentLines |
Referenced by CTabFormatIndexator::IndexTabFormat(), Initialize(), IsEmpty(), and CDocumentIterator::ReadTabFormatDocument().
size_t CIndexDocument::DroppedLinesCount |
Referenced by CTabFormatIndexator::IndexTabFormat(), Initialize(), and CTabFormatIndexator::ProcessMetaField().