A file for globally defined constants and classes. More...

#include "../common/utilit.h"
#include "list"
#include "limits.h"
#include "../GraphanLib/GraphmatFile.h"
#include "../LemmatizerLib/Lemmatizers.h"
#include "../AgramtabLib/EngGramTab.h"
#include "../AgramtabLib/RusGramTab.h"
#include "../AgramtabLib/GerGramTab.h"
#include "../common/DDC_common.h"
#include "../tinyxml/tinyxml.h"
#include "../GraphanLib/GraphmatFile.h"
#include "../LemmatizerLib/Lemmatizers.h"
#include "../AgramtabLib/EngGramTab.h"
#include "../AgramtabLib/RusGramTab.h"
#include "../AgramtabLib/GerGramTab.h"
#include "../common/DDC_common.h"
#include "../tinyxml/tinyxml.h"

Include dependency graph for ConcCommon.h:

This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Classes

class CShortOccurCache
struct CShortOccurCache::CDataReference
the structure holds a pointer to a vector of occurrences and its size More...
struct CDDCFilterWithBounds
struct CHit

Typedefs

typedef DWORD CTokenNo
integer type CTokenNo is used to refer an index of a token in the corpus
typedef map< size_t, vector
< DWORD > > PeriodsDivisionMap
a type for mappping an index item no to its period division
typedef map< string,
CShortOccurCache > CShortOccurCacheMap
a type for index string to its occurrences
typedef vector< CTokenNo > COccurrBuffer
a type for holding occurrences during reading from the disk

Enumerations

enum HitSortEnum {
  NoSort = 0, LessByDate = 1, GreaterByDate = 2, LessBySize = 3,
  GreaterBySize = 4, LessByFreeBiblField = 5, GreaterByFreeBiblField = 6, LessByRank = 7,
  GreaterByRank = 8, LessByLeftContext = 9, LessByRightContext = 10, HitSortsCount = 11
}
enum BigramDirectionEnum { bdDontUseBigrams, bdLeftBigram, bdRightBigram }

Functions

bool InitConcordDicts ()
initializes morphology dictionaries
void FreeConcordDicts ()
deletes morphology dictionaries
const CLemmatizer * GetLemmatizerByLanguage (MorphLanguageEnum Langua)
return a morphology dictionary by a language indentifier
const CAgramtab * GetGramtabByLanguage (MorphLanguageEnum Langua)
return a grammatical table by a language indentifier
void concord_daemon_log (const string &t)
write a dump message to a log file
string GetDDCErrorString (DDCErrorEnum ErrorCode)
return a string representation of a DDC error

Variables

const char globalFieldDelimeter = '\t'
a globally defined delimeter, which is used to delimit fields in one record (the first field is always a token)
const string PredefinedTableLineTag = "l"
a globally defined xml-tag, which is used to separate records if CConcIndexator::m_IndexType is Free_Index
const string ChunkIndexName = "chunk"
a globally defined index name for chunks
const string LeftBigramsIndexName = "left"
a globally defined left bigrams index name
const string RightBigramsIndexName = "right"
a globally defined right bigrams index name
const string PredefinedFileBreakName = "file"
a globally defined break collection name for corpus files
const string PredefinedTextAreaBreakName = "textarea"
a globally defined break collection name for text areas
const size_t MaxShortOccurCacheSize = 1000000
MaxShortOccurCacheSize is the upper bound of CShortOccurCache::m_Data.size() It is introduced to restrict memory usage.
const string MorphAnnotationsDelim = "#"
a delimiter between morphological annotations
const string MorphAnnotationsDelimRegExp = "[^#]*"
a regular expression, which passes everything within one morphological annotation

Detailed Description

A file for globally defined constants and classes.

Typedef Documentation

typedef DWORD CTokenNo

integer type CTokenNo is used to refer an index of a token in the corpus

typedef map<size_t, vector<DWORD> > PeriodsDivisionMap

a type for mappping an index item no to its period division

typedef map<string,CShortOccurCache> CShortOccurCacheMap

a type for index string to its occurrences

typedef vector<CTokenNo> COccurrBuffer

a type for holding occurrences during reading from the disk

Enumeration Type Documentation

enum HitSortEnum

HitSortEnum This enum defines the types of all possible orders which can be apllied to an output hit set.

Enumerator:

NoSort	no sort operators, only filtering
LessByDate	sort by the issue date(increasing)
GreaterByDate	sort by the issue date (decreasing)
LessBySize	sort by the size of the hit in tokens (increasing)
GreaterBySize	sort by the size of the hit in tokens (decreasing)
LessByFreeBiblField	sort by a free bibliographical field(increasing)
GreaterByFreeBiblField	sort by a free bibliographical field(decreasing)
LessByRank	sort by document (increasing)
GreaterByRank	sort by document rank (decreasing)
LessByLeftContext	sort by document rank (decreasing)
LessByRightContext	sort by document rank (decreasing)
HitSortsCount