38 #include "../CommonLib/ddcCorpusList.h" 75 bool SaveSourceFileList(
string FileName);
78 void DeleteSourceFile(
long ItemNo);
81 void AddSourceFile(
const char *FileName);
84 void DeleteAllSourceFiles();
89 size_t GetSourceFilesCount()
const;
92 string GetSourceFile(
size_t FileNo)
const;
96 void ReadSourceFileList(
string FileName);
99 int FoundNotExistedFile()
const;
102 bool IsModified()
const;
148 string GetBiblIndexFileName()
const;
150 string GetBiblFileName()
const;
158 const char *GetIndexTypeStr()
const;
161 bool ReadIndexTypeFromStr(
const string &s);
164 void DumpFileIndexTabs(
DWORD FileNo, FILE *f)
const;
211 void AssertHasPath()
const;
214 void LoadOptionsFromString(
string Options);
216 void InitDefaultOptions();
218 string SaveOptionsToString()
const;
327 void RegisterIndicesToShow(
const string &IndexListStr);
330 string GetIndicesToShowStr(
bool Positional =
false)
const;
345 return RML_RE::Options(pcrecpp::RE_Options(), m_PcreCharacterTables.data());
367 string GetHtmlReference(
size_t posFile)
const;
370 string GetShortFilename(
size_t posFile)
const;
373 string GetFileNameForCorpusFileNames()
const;
376 string GetFileNameForMaskedFiles()
const;
379 string GetFileNameForMaskedFileIds()
const;
394 void LoadSourceFilesAndOptions(
string FileName,
bool reallyReadSourceFiles =
true);
397 void LoadCorpusFiles();
400 string GetCommonFilePrefix()
const;
403 void LoadMaskedFiles();
406 void LoadOptionsFromFile(
const string &OptFile);
409 void LoadProject(
string FileName,
bool includeSourceFiles =
true);
412 time_t UpdateTimestamp(
const char *filename);
415 void DumpBibliography(FILE *f = stdout)
const;
418 string DumpFileBibliography(
DWORD FileNo)
const;
421 void DumpFileIndexJson(
DWORD FileNo, FILE *f = stdout)
const;
428 void DumpIndexToSingleTabFile(FILE *outfp)
const;
431 bool SaveOptions(
string FileName)
const;
434 bool GetAllOccurrences(vector<CTokenNo> &occurrences,
size_t searchPeriodNo)
const;
437 bool GetOccurrencesByPosition(
const string &BreakName,
int anchor, vector<CTokenNo> &occurrences,
438 size_t searchPeriodNo)
const;
bool m_bShowNumberOfRelevantDocuments
if true, then DDC always calculates the number of documents, where at lease one hit is found ...
Definition: Concordance.h:230
size_t GetMaskedFilesCount() const
get count of masked files
Definition: Concordance.h:391
Definition: ddcCorpusList.h:31
DWORD m_UserMaxInputLoadIndexSize
The maximal number of occurrences in the input load index, by default 400000.
Definition: Concordance.h:207
bool m_bDwdsCorpusInterface
if m_bDwdsCorpusInterface is on, the program outputs results in DWDS format
Definition: Concordance.h:133
DWORD CFileNo
integer type CFileNo is used to refer to a single document (file) in the corpus
Definition: ConcCommon.h:66
map< string, string > m_OpDefaultIndexNames
maps token-query operators to default index names; keys are as returned by CQToken::OperatorKey() ...
Definition: Concordance.h:314
size_t GetCorpusFilesCount() const
get the number of indexed corpus files
Definition: Concordance.h:385
Definition: pcre_rml.h:46
CHighlightTags m_TextHighlighting
highlighting delimeters for CConcHolder::m_ResultFormat == DDC_ResultText
Definition: Concordance.h:250
size_t m_MaxQueryCacheSize
moo: maximum number of queries to be cached by an associated CConcHolder (default=512) ...
Definition: Concordance.h:296
bool m_bUseDwdsThesaurus
Enables indexing and querying using DWDS Thesaurus.
Definition: Concordance.h:209
Definition: HitBorder.h:67
CHighlightTags m_HtmlHighlighting
highlighting tags for CConcHolder::m_ResultFormat == DDC_ResultHTML
Definition: Concordance.h:247
bool m_bUseParagraphTagToDivide
Enables using "<p>" tag as a paragraph delimiter.
Definition: Concordance.h:199
CHighlightTags m_TableHighlighting
highlighting delimeters for CConcHolder::m_ResultFormat == DDC_ResultTable
Definition: Concordance.h:253
Definition: StringIndexator.h:121
Definition: Concordance.h:111
A file for globally defined constants and classes.
string m_InternetPathPrefix
Definition: Concordance.h:141
bool m_bQueryOnlyFiles
prohibits sentence break collection under DWDS_Index or MorphXML_Index
Definition: Concordance.h:232
A type for corpus without annotations, which are written for each word. Fr example the input text can...
Definition: Concordance.h:175
string GetCorpusFile(CFileNo FileNo) const
get corpus file by index
Definition: Concordance.h:388
bool m_bIndexPunctuation
Enables indexing all punctuation marks.
Definition: Concordance.h:197
bool m_bLemmaQueryUsesMorphPattern
interpret "%foo" queries using MorphPattern? (default=true)
Definition: Concordance.h:284
bool m_bGutenbergInterface
if m_bGutenbergInterface is on, the program outputs results in a format of Gutenberg project ...
Definition: Concordance.h:135
This index type is free and therefore it should be defined in the options file (fields "Indices" and ...
Definition: Concordance.h:187
bool UseTabFormatForLoading() const
Definition: Concordance.h:439
bool m_bEmptyLineIsSentenceDelim
if m_bEmptyLineIsSentenceDelim is on, every empty line in the input file is considered to be the end ...
Definition: Concordance.h:201
bool OutputBibliographyOfHits() const
return true, if DDC should output bibliographical information for hits instead of corpus file names ...
Definition: Concordance.h:361
A type for xml-texts, if their words have predefined and written annotations. DDC always builds a tok...
Definition: Concordance.h:179
vector< string > m_SourceFiles
Source files
Definition: Concordance.h:68
CConcXml m_BiblIndex
a member which holds a index for bibliographical information
Definition: Concordance.h:244
bool m_bAllowCountByTokenAttributes
using any token attribute as a count-key will throw an exception unless this is true (default=true) ...
Definition: Concordance.h:281
bool m_bOutputBibliographyOfHits
Should we show bibliography of the hits instead of filename.
Definition: Concordance.h:139
map< string, pair< bool, string > > m_ServerInfo
maps symbolic keys to string constants to be included in corpus 'info' response as info...
Definition: Concordance.h:320
double m_TfIdfRank
the parameter for TfIdf ranking
Definition: Concordance.h:264
bool m_bDisableDefaultQueryLexicalExpansion
if true, then no default lexical expansion fo querz words occurs
Definition: Concordance.h:256
Definition: Concordance.h:126
Definition: Concordance.h:113
RML_RE::Options GetRegexOptions() const
return default pcre regex options
Definition: Concordance.h:341
string m_LocalPathPrefix
Definition: Concordance.h:142
CIndexDumpFormatE
Definition: Concordance.h:110
size_t size() const
returns the number of enumerated strings
Definition: ddcCorpusList.h:76
Definition: Concordance.h:112
ddcCorpusList m_CorpusFiles
Corpus files
Definition: Concordance.h:239
bool IndexPunctuation() const
wrapper for m_bIndexPunctuation
Definition: Concordance.h:364
DWORD m_UserMaxTokenCountInOnePeriod
The maximal number of occurrences in one subcorpora (defined by user)
Definition: Concordance.h:205
size_t m_MaxCachedHitsCount
moo: maximum number of hits in a CConcHolder cache entry – query results with more than MaxCachedHit...
Definition: Concordance.h:293
bool IsDwdsCorpusInterface() const
return true, if DDC outputs results in DWDS format
Definition: Concordance.h:349
DDCIndexTypeEnum m_IndexType
the type of index
Definition: Concordance.h:195
MorphLanguageEnum m_Language
the language of the corpus
Definition: Concordance.h:222
int m_LeftKwicContextSize
the size of the left context of the highlighted words in document search
Definition: Concordance.h:258
bool UseDwdsThesaurus() const
return true, if DWDS thesaurus is enabled (index "Thes")
Definition: Concordance.h:358
bool m_bUseIndention
if m_bUseIndention is on, the program tries to find paragraphs using indentions
Definition: Concordance.h:203
string m_InterpDelimiter
delimiter to use between token index fields in output
Definition: Concordance.h:270
vector< size_t > m_IndicesToShow
indices to show for Free_Index
Definition: Concordance.h:287
Definition: Concordance.h:114
CMaskedFileSet m_MaskedFiles
Definition: Concordance.h:241
bool m_bAllowUnsafeQueries
potentially unsafe queries will throw an exception unless this is true (default=false) ...
Definition: Concordance.h:278
double m_NearRank
the parameter for Near ranking
Definition: Concordance.h:266
int m_RightKwicContextSize
the size of the right context of the highlighted words in document search
Definition: Concordance.h:260
bool HasContextOperator() const
return true, if query context operator (#Cntxt) is switched off
Definition: Concordance.h:355
bool m_bResumeOnIndexErrors
if true, CConcIndexatorInvoker skips source documents with errors
Definition: Concordance.h:236
bool IsGutenbergInterface() const
return true, if DDC outputs results in Gutenberg project format
Definition: Concordance.h:352
enum CIndexDumpFormatE CIndexDumpFormat
bool m_bIndexChunks
Enables indexing and querying using chunks.
Definition: Concordance.h:226
bool m_bCaseSensitive
if true, then the default search is case sensitive
Definition: Concordance.h:228
string m_CommonFilePrefix
Definition: Concordance.h:143
int m_NumberOfKwicLinesInSnippets
the maximal number of kwic lines in file snippets
Definition: Concordance.h:262
bool m_Utf8
whether to assume indexed data is utf8 encoded (default=no)
Definition: Concordance.h:275
Definition: DwdsThesaurus.h:25
Definition: Concordance.h:65
MorphLanguageEnum
Definition: utilit.h:162
time_t m_Timestamp
moo: timestamp of project *._con file
Definition: Concordance.h:290
bool m_bIndexMorphPatterns
Enables the index of morph patterns.
Definition: Concordance.h:224
double m_PositionRank
the parameter for Position ranking
Definition: Concordance.h:268
uint32_t DWORD
Definition: utilit.h:105
Definition: IndexSet.h:31
set< CFileNo > CMaskedFileSet
Definition: Concordance.h:118
TxDispatcher: name-based expansion dispatcher.
Definition: TermExpander.h:325
DDCIndexTypeEnum
enum DDCIndexTypeEnum contains index types. Each index type determines DDC indices and break collecti...
Definition: Concordance.h:170
bool m_bArchiveIndex
sets that index should be archived under DWDS_Index or MorphXML_Index
Definition: Concordance.h:234
bool m_bNoContextOperator
should we switch off context operator (#Cntxt) due copyright
Definition: Concordance.h:137
string m_TokenDelimiter
delimiter to use between tokens in output
Definition: Concordance.h:272
vector< BYTE > m_PcreCharacterTables
a table of character properties for regular expressions which depend on CConcIndexator::m_Language ...
Definition: Concordance.h:130
bool m_bModifiedListOfFiles
Definition: Concordance.h:69
TxDispatcher m_Txd
term expansion dispatcher; should define at least an entry for "default"
Definition: Concordance.h:299