29 #ifndef ConcIndexator_h 30 #define ConcIndexator_h 107 bool CreateAsUnion(vector<CConcIndexator* > &X,
bool inheritOptions =
true);
110 bool CreateAsUnion(
const vector<string> &Xfiles,
bool inheritOptions =
true);
116 bool SplitProject(vector<CConcIndexator*> &Subs,
const string &SubOptions)
const;
const DWORD DefaultMaxTokenCountInOnePeriod
global default value (5000000) for CConcIndexator::m_UserMaxTokenCountInOnePeriod ...
Definition: ConcIndexator.cpp:29
Definition: HitBorder.h:47
void IndexTextOrHtmlFile(CIndexDocument *document)
Definition: ConcIndexator.cpp:473
void IndexFreeIndex(CIndexDocument *document)
Definition: ConcIndexator.cpp:408
Definition: DocumentIterator.h:15
void IndexOneFile(CIndexDocument *document)
index one file according to m_IndexType
Definition: ConcIndexator.cpp:631
Definition: Bibliography.h:13
CConcIndexator()
Definition: ConcIndexator.cpp:55
bool SaveCorpusFileList() const
saves corpus file list (*._con)
Definition: ConcIndexator.cpp:93
bool SaveMaskedFileIds() const
saves masked file-ids (*._masked_ids)
Definition: ConcIndexator.cpp:133
const DWORD DefaultMaxInputLoadIndexSize
global default value for (400000) CConcIndexator::m_UserMaxInputLoadIndexSize (must be <= DefaultMaxT...
Definition: ConcIndexator.cpp:31
void InitGraphan()
initializes graphematics using current options
Definition: ConcIndexator.cpp:76
void CalculateSearchPeriods(DWORD MaxTokenCountInOnePeriod)
finds all subcorpora
Definition: ConcIndexator.cpp:296
const CDwdsThesaurus * m_pDwdsThesaurus
a reference to DWDS thesaurus if applicable
Definition: ConcIndexator.h:50
CGraphmatFile Graphmat
graphmat (tokenization parser)
Definition: ConcIndexator.h:47
void TerminateIndexing()
terminates indexing (for exceptions)
Definition: ConcIndexator.cpp:618
void StartIndexing()
begins indexing
Definition: ConcIndexator.cpp:606
void IndexMorphXml(CIndexDocument *document)
Definition: IndexMorphXml.cpp:25
DWORD GetMaxTokenCountInOnePeriod() const
returns the size of one subcorpus
Definition: ConcIndexator.cpp:168
void LoadXmlFile(string FileName, const char *pFileBuffer, CBibliography &Bibl)
Definition: ConcIndexator.cpp:249
void NormalEndIndexing()
finishes indexing (normal way)
Definition: ConcIndexator.cpp:624
void RollbackIndexOneFile(CTokenNo startTrimTokenNo)
rollback data buffered by an immediate preceding failed IndexOneFile()
Definition: ConcIndexator.cpp:651
DWORD GetMaxInputLoadIndexSize() const
returns the max size of input index in tokens must be less than GetMaxTokenCountInOnePeriod() ...
Definition: ConcIndexator.cpp:175
void LoadFileIntoGraphan(string FileName, const char *pFileBuffer, CBibliography &Bibl)
Definition: ConcIndexator.cpp:446
bool CreateMorphIndexWrapper()
creates morphology index
Definition: ConcIndexator.cpp:647
Definition: DwdsThesaurus.h:25
bool SplitProject(vector< CConcIndexator *> &Subs) const
split project uniformly into sub-projects (new; sub-projects inherit parent options) ...
Definition: ConcordSplit.cpp:24
Definition: ConcIndexator.h:44
bool IsDWDSToken(long GraLine) const
graphematical definition of a token for DWDSIndex
Definition: ConcIndexator.cpp:66
DWORD CTokenNo
integer type CTokenNo is used to refer an index of a token in the corpus
Definition: ConcCommon.h:63
~CConcIndexator()
Definition: ConcIndexator.cpp:62
uint32_t DWORD
Definition: utilit.h:105
Definition: GraphmatFile.h:28
void DestroyIndex()
destroy all index files
Definition: ConcIndexator.cpp:139
void IndexOneTableTextArea(const string &Text, const CPageNumber &StartPageFromHeader, size_t &page_breaks_count, CIndexDocument *document)
Definition: ConcIndexator.cpp:316
bool CreateAsUnion(vector< CConcIndexator * > &X, bool inheritOptions=true)
creates new concordance as union of one or more concordances (new)
Definition: ConcordUnion.cpp:24