ddc
|
#include <BiblIndex.h>
Classes | |
class | CTextArea |
Public Member Functions | |
CBiblIndex () | |
void | FreeBiblIndices () |
clears m_FreeBiblIndices More... | |
void | FreeBiblExpanders () |
clears m_BiblExpanders More... | |
bool | RegisterFreeBiblAttributes (string fields, string &ErrorStr) |
initializes free bibliographical attribute descriptions More... | |
bool | RegisterTextAreas (string fields, string &ErrorStr) |
initializes free text areas descriptions More... | |
string | GetFreeBibiAttributesDescr () const |
return free bibliographical attribute description More... | |
string | GetTextAreasDescr () const |
return full text area description More... | |
void | SetPath (string ProjectFileName) |
void | LoadBibl (string Path, size_t FileBreaksSize, bool useMMap=false) |
CBibliography | GetFullBibliographyOfHit (size_t FileNo) const |
returns the bibliographical record More... | |
string | GetBiblIndexFileName () const |
string | GetBiblFileName () const |
string | GetBiblDateIndexFileName () const |
int | GetTextAreaByName (const string &Name) const |
CFreeBiblIndex * | GetFreeBiblIndex (const string &FreeBiblAttribNameOrAlias) const |
moo: not quite as ugly or dangerous a hack (respects aliases) More... | |
CBiblExpander * | GetBiblExpander (const string &ExpanderName) const |
moo: not quite as ugly or dangerous a hack More... | |
bool | GetFilterBounds (CDDCFilterWithBounds &Filter, const string &LoValue, const string &HiValue) const |
bool | GetFilterValue (CDDCFilterWithBounds &Filter, const string &Value) const |
bool | GetFilterValues (CDDCFilterWithBounds &Filter, const string &Regex) const |
bool | GetFilterValues (CDDCFilterWithBounds &Filter, const set< string > &Values) const |
void | GetTextAreaElements (const TiXmlDocument &doc, vector< TiXmlElement *> &Result) const |
return all text area elements for this document ("doc") More... | |
size_t | GetTextAreasCount () const |
return all text area names More... | |
int | WithinTextArea (const vector< string > &Within) const |
void | InitNoSort (vector< CHit > &Hits) const |
void | InitSortByDate (vector< CHit > &Hits) const |
void | InitSortByBiblIntegerField (string FreeBiblAttribNameOrAlias, vector< CHit > &Hits) const |
bool | IsRegisteredBiblField (const string &FreeBiblAttribNameOrAlias) const |
CFreeBiblIndexTypeId | GetBiblFieldTypeId (const string &FreeBiblAttribName) const |
string | FreeBiblMapToJson (bool useUtf) const |
print to json More... | |
string | FreeBiblAliasMapToJson () const |
string | BiblExpanderMapToJson () const |
const ddcDateVector & | GetDates () const |
moo: ugly dangerous hack More... | |
bool | HasFreeBiblIndex (const string &name) const |
not quite as ugly or dangerous a hack (respects aliases) More... | |
bool | HasBiblExpander (const string &ExpanderName) const |
moo: not quite as ugly or dangerous a hack More... | |
const CFreeBiblIndexInterface * | GetFreeBiblIndexConst (const string &name) const |
moo: not quite as ugly or dangerous a hack (respects aliases) More... | |
void | SetRegexOptions (const RML_RE::Options &opts) |
set regex options for all registered fields More... | |
string | GetVisibleFreeHeaderBiblAttributes (size_t FileNo, string Delim) const |
return values of all visible free bibliographical attributes for the given FileNo delimited by "Delim" More... | |
string | GetFreeHeaderBiblAttributesWithNames (size_t FileNo, char Delim) const |
return names and values of all free bibliographical attributes for the given FileNo delimited by "Delim" More... | |
string | GetFreeHeaderBiblAttributesJson (size_t FileNo, bool assume_utf8=true, bool include_invisible=false) const |
return names and values of all free bibliographical attributes for the given FileNo as JSON (without enclosing {}) More... | |
string | GetFreeHeaderBiblAttributesTabsDump (size_t FileNo, bool assume_utf8=true, bool include_invisible=false) const |
return names and values of all free bibliographical attributes for the given FileNo as tt-comments (for tt-mode dump) More... | |
bool | FileMatches (DWORD FileNo, const vector< CDDCFilterWithBounds > &Filters) const |
test whether all compiled Filters match FileNo More... | |
CBiblExpander * | AddBiblExpander (const string &spec) |
add a new bibliographic expander to m_BiblExpanders, or replace an existing one; returns new expander More... | |
Public Attributes | |
string | m_DefaultAttrName |
name of default bibliographic field to query if no literal match is found This can be used in conjunction with a constant bibliographic metadata attribute (CConcXml::CFreeBiblStringConstant) to provide a default value for unknown bibliographic metadata attributes, e.g. to facilitate interoperability between multiple corpora. If set to the empty string (the default), query filters on an undefined bibliographic attribute will raise an error. More... | |
Protected Types | |
typedef map< string, CFreeBiblIndex * > | FreeBiblStringMap |
typedef map< string, string > | FreeBiblAliasMap |
typedef map< string, CBiblExpander * > | BiblExpanderMap |
Protected Attributes | |
FreeBiblStringMap | m_FreeBiblIndices |
FreeBiblAliasMap | m_FreeBiblAlias |
BiblExpanderMap | m_BiblExpanders |
map< string, size_t > | m_FreeBiblNameToPosition |
ddcVecFile< file_off_t > | m_EndOffsetsInBiblFile |
ddcDateVector | m_Dates |
string | m_Path |
string | m_OrigXPath |
string | m_ScanXPath |
string | m_DateXPath |
string | m_StartPageXPath |
Private Member Functions | |
string | ReadFromFile (file_off_t start, size_t nbytes) const |
Private Attributes | |
bool | m_bMemoryMap |
ddcFileOrMMap | m_BiblBodyFile |
RML_RE::Options | m_RegexOpts |
common regex options More... | |
vector< CTextArea > | m_TextAreas |
|
protected |
|
protected |
|
protected |
CBiblIndex::CBiblIndex | ( | ) |
References m_bMemoryMap.
|
private |
void CBiblIndex::FreeBiblIndices | ( | ) |
clears m_FreeBiblIndices
References m_FreeBiblIndices.
Referenced by RegisterFreeBiblAttributes(), and CConcXml::~CConcXml().
void CBiblIndex::FreeBiblExpanders | ( | ) |
clears m_BiblExpanders
References m_BiblExpanders.
Referenced by CConcXml::~CConcXml().
bool CBiblIndex::RegisterFreeBiblAttributes | ( | string | fields, |
string & | ErrorStr | ||
) |
initializes free bibliographical attribute descriptions
References CheckXPath(), ddcLogWarn, ErrorMessage(), Format(), FreeBiblAttribOptionFieldName, FreeBiblIndices(), IntegerTypeStr, CFreeBiblIndex::m_bShowInHeader, m_DateXPath, m_FreeBiblAlias, m_FreeBiblIndices, m_FreeBiblNameToPosition, CFreeBiblIndex::m_Name, m_OrigXPath, CFreeBiblIndex::m_pRegexOpts, m_RegexOpts, m_ScanXPath, m_StartPageXPath, CFreeBiblIndex::m_Xpath, Name, StringAliasTypeStr, StringConstantTypeStr, StringTypeStr, Trim(), unescapeCString(), and StringTokenizer::val().
Referenced by CConcordance::LoadOptionsFromString().
bool CBiblIndex::RegisterTextAreas | ( | string | fields, |
string & | ErrorStr | ||
) |
initializes free text areas descriptions
References CheckXPath(), CBiblIndex::CTextArea::m_TextAreaName, m_TextAreas, CBiblIndex::CTextArea::m_Xpath, morphEnglish, RmlMakeLower(), TextAreaOptionFieldName, Trim(), and StringTokenizer::val().
Referenced by CConcordance::LoadOptionsFromString().
string CBiblIndex::GetFreeBibiAttributesDescr | ( | ) | const |
return free bibliographical attribute description
References Format(), FreeBiblAttribOptionFieldName, m_DateXPath, m_FreeBiblAlias, m_FreeBiblIndices, m_OrigXPath, m_ScanXPath, and m_StartPageXPath.
Referenced by CConcordance::LoadOptionsFromString(), and CConcordance::SaveOptionsToString().
string CBiblIndex::GetTextAreasDescr | ( | ) | const |
return full text area description
References Format(), m_TextAreas, and TextAreaOptionFieldName.
Referenced by CConcordance::LoadOptionsFromString(), and CConcordance::SaveOptionsToString().
void CBiblIndex::SetPath | ( | string | ProjectFileName | ) |
References m_Path, and m_RegexOpts.
Referenced by CConcIndexator::CreateAsUnion(), LoadBibl(), and CConcXml::Start().
void CBiblIndex::LoadBibl | ( | string | Path, |
size_t | FileBreaksSize, | ||
bool | useMMap = false |
||
) |
References Format(), GetBiblDateIndexFileName(), GetBiblFileName(), GetBiblIndexFileName(), m_BiblBodyFile, m_bMemoryMap, m_Dates, m_EndOffsetsInBiblFile, m_FreeBiblIndices, m_Path, ddcVecFile< T >::open(), ddcFileOrMMap::Open(), SetPath(), and ddcVecFile< T >::size().
CBibliography CBiblIndex::GetFullBibliographyOfHit | ( | size_t | FileNo | ) | const |
returns the bibliographical record
References ArrayPtrHolder< T >::Get(), m_BiblBodyFile, m_EndOffsetsInBiblFile, ddcFileOrMMap::ReadBuffer(), CBibliography::ReadFromString(), and ddcVecFile< T >::size().
string CBiblIndex::GetBiblIndexFileName | ( | ) | const |
References m_Path, and MakeFName().
Referenced by CConcXml::DeleteFiles(), CConcXml::FinalSaveBibliography(), LoadBibl(), CConcXml::SplitBibliography(), and CConcXml::UnionBibliographies().
string CBiblIndex::GetBiblFileName | ( | ) | const |
References m_Path, and MakeFName().
Referenced by CConcXml::DeleteFiles(), LoadBibl(), CConcXml::SplitBibliography(), CConcXml::Start(), and CConcXml::UnionBibliographies().
string CBiblIndex::GetBiblDateIndexFileName | ( | ) | const |
References m_Path, and MakeFName().
Referenced by CConcXml::DeleteFiles(), CConcXml::FinalSaveBibliography(), LoadBibl(), CConcXml::SplitBibliography(), and CConcXml::UnionBibliographies().
int CBiblIndex::GetTextAreaByName | ( | const string & | Name | ) | const |
References m_TextAreas, and UnknownTextAreaNo.
Referenced by WithinTextArea().
CFreeBiblIndex * CBiblIndex::GetFreeBiblIndex | ( | const string & | FreeBiblAttribNameOrAlias | ) | const |
moo: not quite as ugly or dangerous a hack (respects aliases)
References errRuntime, m_FreeBiblAlias, m_FreeBiblIndices, and MAX_ALIAS_REDIR.
Referenced by CQFSort::GetBiblConstant(), GetBiblFieldTypeId(), GetFilterBounds(), GetFilterValue(), GetFilterValues(), GetFreeBiblIndexConst(), HasFreeBiblIndex(), InitSortByBiblIntegerField(), IsRegisteredBiblField(), and CConcXml::SplitBibliography().
CBiblExpander * CBiblIndex::GetBiblExpander | ( | const string & | ExpanderName | ) | const |
moo: not quite as ugly or dangerous a hack
References m_BiblExpanders.
Referenced by HasBiblExpander().
bool CBiblIndex::GetFilterBounds | ( | CDDCFilterWithBounds & | Filter, |
const string & | LoValue, | ||
const string & | HiValue | ||
) | const |
initialize a CDDCFilterWithBounds object for sorting by bibliographical information, given bound(s)
Filter | filter to initialize |
LoValue | lower bound or emtpy for none (inclusive) |
HiValue | upper bound or emtpy for none (exclusive)
|
References CDDCFilterWithBounds::clear(), CFreeBiblIndex::GetFilterBounds(), GetFreeBiblIndex(), and CDDCFilterWithBounds::m_AttrName.
Referenced by CQFBiblSort::Compile().
bool CBiblIndex::GetFilterValue | ( | CDDCFilterWithBounds & | Filter, |
const string & | Value | ||
) | const |
initialize a CDDCFilterWithBounds object for filtering by bibliographical information, for a literal target value.
Filter | filter to initialize |
Value | literal value used to set Filter.m_LevelStart
|
References CDDCFilterWithBounds::clear(), CFreeBiblIndex::GetFilterValue(), GetFilterValues(), GetFreeBiblIndex(), CDDCFilterWithBounds::m_AttrName, and m_BiblExpanders.
Referenced by CQFHasFieldValue::Compile().
bool CBiblIndex::GetFilterValues | ( | CDDCFilterWithBounds & | Filter, |
const string & | Regex | ||
) | const |
initialize a CDDCFilterWithBounds object for filtering by bibliographical information, for a target regex. calls CConcXml::CFreeBiblIndex::GetFilterValues()
References CDDCFilterWithBounds::clear(), CFreeBiblIndex::GetFilterValues(), GetFreeBiblIndex(), and CDDCFilterWithBounds::m_AttrName.
Referenced by CQFHasFieldRegex::Compile(), CQFHasFieldSet::Compile(), GetFilterValue(), and GetFilterValues().
bool CBiblIndex::GetFilterValues | ( | CDDCFilterWithBounds & | Filter, |
const set< string > & | Values | ||
) | const |
initialize a CDDCFilterWithBounds object for filtering by bibliographical information, for a set of target values.
References CDDCFilterWithBounds::clear(), CFreeBiblIndex::GetFilterValues(), GetFilterValues(), GetFreeBiblIndex(), CDDCFilterWithBounds::m_AttrName, and m_BiblExpanders.
void CBiblIndex::GetTextAreaElements | ( | const TiXmlDocument & | doc, |
vector< TiXmlElement *> & | Result | ||
) | const |
return all text area elements for this document ("doc")
References Format(), m_TextAreas, TiXmlDocument::RootElement(), TiXmlNode::ToElement(), and TinyXPath::XNp_xpath_node().
Referenced by CConcIndexator::IndexFreeIndex(), CConcIndexator::LoadXmlFile(), and CConcXml::ReadMorphXmlFileIntoGraTable().
size_t CBiblIndex::GetTextAreasCount | ( | ) | const |
return all text area names
References m_TextAreas.
Referenced by CQueryNode::ConvertOccurrencesToHits(), CQueryNode::ConvertOccurrencesToHitsForPatterns(), CConcIndexator::IndexTextOrHtmlFile(), and CConcIndexator::LoadXmlFile().
int CBiblIndex::WithinTextArea | ( | const vector< string > & | Within | ) | const |
return text area index, which is specified in the input query (if nothing is specified, it returns UnknownTextAreaNo)
returns index of the last valid text area named in Within
.
References GetTextAreaByName(), and UnknownTextAreaNo.
Referenced by CQueryOptions::Compile().
void CBiblIndex::InitNoSort | ( | vector< CHit > & | Hits | ) | const |
References CHit::m_SortKey.
Referenced by InitSortByBiblIntegerField().
void CBiblIndex::InitSortByDate | ( | vector< CHit > & | Hits | ) | const |
References ddcVecFile< T >::empty(), m_Dates, CHit::m_FileNo, and CHit::m_SortKey.
void CBiblIndex::InitSortByBiblIntegerField | ( | string | FreeBiblAttribNameOrAlias, |
vector< CHit > & | Hits | ||
) | const |
References CHitSortKey::assign(), GetFreeBiblIndex(), CFreeBiblIndex::GetIntegerValue(), CFreeBiblIndex::GetStringValue(), InitNoSort(), CHit::m_FileNo, and CHit::m_SortKey.
bool CBiblIndex::IsRegisteredBiblField | ( | const string & | FreeBiblAttribNameOrAlias | ) | const |
References GetFreeBiblIndex().
Referenced by CConcordance::LoadOptionsFromString().
CFreeBiblIndexTypeId CBiblIndex::GetBiblFieldTypeId | ( | const string & | FreeBiblAttribName | ) | const |
get the registered type of a registered bibliographic field as a CFreeBiblIndexTypeId
References cfbiUnknown, GetFreeBiblIndex(), and CFreeBiblIndex::GetTypeId().
string CBiblIndex::FreeBiblMapToJson | ( | bool | useUtf | ) | const |
print to json
References cfbiStringConstant, Format(), CFreeBiblIndex::GetTypeId(), CFreeBiblIndex::GetXpathStr(), jsonStr(), CFreeBiblIndex::m_bShowInHeader, m_FreeBiblIndices, CFreeBiblIndex::m_Name, CFreeBiblIndex::m_Xpath, and CFreeBiblIndex::size().
Referenced by CDDCLeafServer::handle__info().
string CBiblIndex::FreeBiblAliasMapToJson | ( | ) | const |
References jsonStr(), and m_FreeBiblAlias.
Referenced by CDDCLeafServer::handle__info().
string CBiblIndex::BiblExpanderMapToJson | ( | ) | const |
References jsonStr(), m_BiblExpanders, CBiblExpander::m_BxLabel, CBiblExpander::m_BxParam, and CBiblExpander::m_BxTarget.
Referenced by CDDCLeafServer::handle__info().
|
inline |
moo: ugly dangerous hack
References m_Dates.
|
inline |
not quite as ugly or dangerous a hack (respects aliases)
References GetFreeBiblIndex().
Referenced by CQFSort::ResolveAttributeName().
|
inline |
moo: not quite as ugly or dangerous a hack
References GetBiblExpander().
Referenced by CQFSort::ResolveAttributeName().
|
inline |
moo: not quite as ugly or dangerous a hack (respects aliases)
References AddBiblExpander(), FileMatches(), GetFreeBiblIndex(), GetFreeHeaderBiblAttributesJson(), GetFreeHeaderBiblAttributesTabsDump(), GetFreeHeaderBiblAttributesWithNames(), GetVisibleFreeHeaderBiblAttributes(), and SetRegexOptions().
void CBiblIndex::SetRegexOptions | ( | const RML_RE::Options & | opts | ) |
set regex options for all registered fields
References m_FreeBiblIndices, and m_RegexOpts.
Referenced by GetFreeBiblIndexConst(), and CConcordance::LoadOptionsFromString().
string CBiblIndex::GetVisibleFreeHeaderBiblAttributes | ( | size_t | FileNo, |
string | Delim | ||
) | const |
return values of all visible free bibliographical attributes for the given FileNo delimited by "Delim"
References CFreeBiblIndex::GetStringValue(), CFreeBiblIndex::m_bShowInHeader, and m_FreeBiblIndices.
Referenced by GetFreeBiblIndexConst().
string CBiblIndex::GetFreeHeaderBiblAttributesWithNames | ( | size_t | FileNo, |
char | Delim | ||
) | const |
return names and values of all free bibliographical attributes for the given FileNo delimited by "Delim"
References CFreeBiblIndex::GetStringValue(), CFreeBiblIndex::m_bShowInHeader, m_FreeBiblIndices, and CFreeBiblIndex::m_Name.
Referenced by GetFreeBiblIndexConst().
string CBiblIndex::GetFreeHeaderBiblAttributesJson | ( | size_t | FileNo, |
bool | assume_utf8 = true , |
||
bool | include_invisible = false |
||
) | const |
return names and values of all free bibliographical attributes for the given FileNo as JSON (without enclosing {})
References CFreeBiblIndex::GetStringValue(), jsonStr(), CFreeBiblIndex::m_bShowInHeader, m_FreeBiblIndices, and CFreeBiblIndex::m_Name.
Referenced by GetFreeBiblIndexConst().
string CBiblIndex::GetFreeHeaderBiblAttributesTabsDump | ( | size_t | FileNo, |
bool | assume_utf8 = true , |
||
bool | include_invisible = false |
||
) | const |
return names and values of all free bibliographical attributes for the given FileNo as tt-comments (for tt-mode dump)
References CFreeBiblIndex::GetStringValue(), jsonStr(), CFreeBiblIndex::m_bShowInHeader, m_FreeBiblIndices, and CFreeBiblIndex::m_Name.
Referenced by GetFreeBiblIndexConst().
bool CBiblIndex::FileMatches | ( | DWORD | FileNo, |
const vector< CDDCFilterWithBounds > & | Filters | ||
) | const |
test whether all compiled Filters match FileNo
References CFreeBiblIndex::FileMatches(), GreaterByDate, LessByDate, and m_Dates.
Referenced by CQCount::CountUniversal(), and GetFreeBiblIndexConst().
CBiblExpander * CBiblIndex::AddBiblExpander | ( | const string & | spec | ) |
add a new bibliographic expander to m_BiblExpanders, or replace an existing one; returns new expander
References EngMakeLower(), errLogic, errRuntime, m_BiblExpanders, CTermExpander::m_Class, and Trim().
Referenced by GetFreeBiblIndexConst(), and CConcordance::LoadOptionsFromString().
|
private |
whether to mmap resident data (default=false)
Referenced by CBiblIndex(), and LoadBibl().
|
private |
Referenced by GetFullBibliographyOfHit(), and LoadBibl().
|
private |
common regex options
Referenced by RegisterFreeBiblAttributes(), SetPath(), and SetRegexOptions().
|
private |
Referenced by GetTextAreaByName(), GetTextAreaElements(), GetTextAreasCount(), GetTextAreasDescr(), and RegisterTextAreas().
|
protected |
Referenced by CConcXml::AddIndexItem(), CConcXml::DeleteFiles(), CConcXml::FinalSaveBibliography(), FreeBiblIndices(), FreeBiblMapToJson(), GetFreeBibiAttributesDescr(), GetFreeBiblIndex(), GetFreeHeaderBiblAttributesJson(), GetFreeHeaderBiblAttributesTabsDump(), GetFreeHeaderBiblAttributesWithNames(), GetVisibleFreeHeaderBiblAttributes(), LoadBibl(), CConcXml::LoadXmlAndReadBibliography(), RegisterFreeBiblAttributes(), CConcXml::SetFreeBiblAttribsEmpty(), SetRegexOptions(), CConcXml::SplitBibliography(), CConcXml::Start(), and CConcXml::UnionBibliographies().
|
protected |
Referenced by FreeBiblAliasMapToJson(), GetFreeBibiAttributesDescr(), GetFreeBiblIndex(), and RegisterFreeBiblAttributes().
|
protected |
Referenced by AddBiblExpander(), BiblExpanderMapToJson(), FreeBiblExpanders(), GetBiblExpander(), GetFilterValue(), and GetFilterValues().
|
protected |
Referenced by RegisterFreeBiblAttributes(), and CConcXml::SetFreeBiblByName().
|
protected |
|
protected |
|
protected |
|
protected |
|
protected |
|
protected |
|
protected |
string CBiblIndex::m_DefaultAttrName |
name of default bibliographic field to query if no literal match is found This can be used in conjunction with a constant bibliographic metadata attribute (CConcXml::CFreeBiblStringConstant) to provide a default value for unknown bibliographic metadata attributes, e.g. to facilitate interoperability between multiple corpora. If set to the empty string (the default), query filters on an undefined bibliographic attribute will raise an error.
Referenced by CConcordance::InitDefaultOptions(), CConcordance::LoadOptionsFromString(), CQFSort::ResolveAttributeName(), and CConcordance::SaveOptionsToString().