21 #ifndef DDC_STRING_ENUM_H 22 #define DDC_STRING_ENUM_H 30 template <
typename OffT_=DWORD>
58 ddcStringEnum(
const string& cdataFile,
const string& offsetsFile,
const string& fallbackFile=
"",
bool useMMap=
false)
60 {
open(cdataFile,offsetsFile,fallbackFile,useMMap); };
76 {
return m_offsets ? m_offsets.
size() : m_strings.size(); };
80 {
return m_offsets ? m_offsets.
empty() : m_strings.empty(); };
84 {
return m_offsets.
opened() || !m_strings.empty(); };
91 inline operator bool()
const 96 {
return m_offsets ? (m_cdata.
m_data+m_offsets.
m_data[idx]) : m_strings[idx].c_str(); };
105 m_strings.push_back(value);
114 for (
const OffT* oi=m_offsets.
begin(); oi != m_offsets.
end(); ++oi)
115 v.push_back( m_cdata.
m_data + *oi );
125 ddcLogWarn(
Format(
"WARNING: ddcStringEnum(\"%s\") demoting to std::vector<> implementation", m_cdata.
filename().c_str()));
153 : m_buf(se.m_cdata.m_data)
158 {
return (m_buf+off) < key; };
160 {
return key < (m_buf+off); };
169 if (lb != m_offsets.
end())
return (lb - m_offsets.
begin());
172 vector<string>::const_iterator lb = std::lower_bound(m_strings.begin(), m_strings.end(), key);
173 if (lb != m_strings.end())
return (lb - m_strings.begin());
184 if (ub != m_offsets.
end())
return (ub - m_offsets.
begin());
187 vector<string>::const_iterator ub = std::upper_bound(m_strings.begin(), m_strings.end(), key);
188 if (ub != m_strings.end())
return (ub - m_strings.begin());
194 inline IdT
find(
const string& key)
const 197 return (lbid != NO_ID &&
operator[](lbid) == key) ? lbid :
NO_ID;
205 ddcStringEnum&
open(
const string& cdataFile,
const string& offsetsFile,
const string& fallbackFile,
bool useMMap=
false)
210 m_cdata.
open(cdataFile, useMMap);
211 m_offsets.
open(offsetsFile, useMMap);
212 ddcLogDebug(
Format(
"ddcStringEnum[%s]::open(): read %zi item(s) from %s + %s", (useMMap ?
"mmap" :
"slurp"),
size(), cdataFile.c_str(), offsetsFile.c_str()));
214 else if (!fallbackFile.empty() &&
FileExists(fallbackFile.c_str())) {
215 ddcLogWarn(
"WARNING: ddcStringEnum::open(): loading fallback file '"+fallbackFile+
"' in slurp mode: please upgrade your index!");
217 ddcLogDebug(
Format(
"ddcStringEnum[compat]::open(): read %zi item(s) from %s",
size(), fallbackFile.c_str()));
219 throw invalid_argument(
"ddcStringEnum::open(\""+cdataFile+
"\",\""+offsetsFile+
"\",\""+fallbackFile+
"\"): no file(s) found!");
229 FILE * fp = fopen(filename.c_str(),
"rb");
231 throw invalid_argument(
Format(
"ddcStringEnum::openCompat(): open failed for '%s': %s'", filename.c_str(), strerror(errno)));
233 char buf[m_bufsize+1];
234 while (fgets(buf, m_bufsize, fp)) {
237 m_strings.push_back(q);
ddcVecFile< char > m_cdata
underlying character data
Definition: ddcStringEnum.h:42
bool opened() const
check whether the object is opened
Definition: ddcStringEnum.h:83
size_t m_bufsize
buffer size for fallback loading (default=20000: old value for MaxBiblStringLen from Bibliography...
Definition: ddcStringEnum.h:46
void openCompat(const string &filename)
open in compatibility-mode; lifted from CConcXml::CFreeBiblStringIndex::ReadBiblStringItems (vector<s...
Definition: ddcStringEnum.h:225
string Format(const char *format,...)
Definition: ddcString.cpp:393
string & Trim(string &str)
Definition: utilit.cpp:1762
void push_back(const string &value)
simulate vector::push_back(): used by indexing routines
Definition: ddcStringEnum.h:102
void to_vector(vector< string > &v) const
populate v with an equivalent vector-implementation (for compile-time compatibility) ...
Definition: ddcStringEnum.h:109
IdT upper_bound(const string &key) const
returns id of upper-bound for key, or NO_ID if not found
Definition: ddcStringEnum.h:179
bool empty() const
STL-esque wrapper.
Definition: ddcMMap.h:268
ddcStringEnum(const string &cdataFile, const string &offsetsFile, const string &fallbackFile="", bool useMMap=false)
construct and open
Definition: ddcStringEnum.h:58
ddcVecFile & open(const std::string &filename, bool useMMap=false)
map a named file filename
Definition: ddcMMap.h:312
void close()
unmap current file(s), if any
Definition: ddcStringEnum.h:244
bool FileExists(const char *FName)
Definition: utilit.cpp:335
size_t size() const
returns the number of enumerated strings
Definition: ddcStringEnum.h:75
IdKeyLess(const ddcStringEnum &se)
Definition: ddcStringEnum.h:152
ddcVecFile< OffT > m_offsets
offsets in m_cdata[] of NUL-terminated item strings; associated strings must be sorted in lexicograph...
Definition: ddcStringEnum.h:43
IdT lower_bound(const string &key) const
returns id of lower-bound for key, or NO_ID if not found
Definition: ddcStringEnum.h:164
void ensureVec()
force vector-implementation
Definition: ddcStringEnum.h:122
bool operator()(const OffT off, const string &key)
Definition: ddcStringEnum.h:157
void ClearVector(vector< T > &V)
Definition: utilit.h:493
#define ddcLogWarn(Msg)
Definition: ddcLog.h:106
size_t size() const
returns the number of objects of type T in this vector, or 0
Definition: ddcMMap.h:265
bool operator()(const string &key, const OffT off)
Definition: ddcStringEnum.h:159
bool opened() const
returns true iff m_data is non-NULL
Definition: ddcMMap.h:271
const char * m_buf
Definition: ddcStringEnum.h:147
comparison helper struct for string-to-id search
Definition: ddcStringEnum.h:146
~ddcStringEnum()
default destructor calls close()
Definition: ddcStringEnum.h:63
bool empty() const
STL-esque wrapper.
Definition: ddcStringEnum.h:79
T * m_data
data (pointer to m_mmap or m_vec data)
Definition: ddcMMap.h:236
void close()
unmap current file (if any)
Definition: ddcMMap.h:330
vector< string > m_strings
backwards-compatible vector<string> implementation, for read/write access
Definition: ddcStringEnum.h:44
OffT_ OffT
typedef for offset values
Definition: ddcStringEnum.h:36
Definition: ddcStringEnum.h:31
ddcStringEnum & open(const string &cdataFile, const string &offsetsFile, const string &fallbackFile, bool useMMap=false)
map a named file filename
Definition: ddcStringEnum.h:205
IdT find(const string &key) const
returns id of key, or NO_ID if not found; wraps lower_bound()
Definition: ddcStringEnum.h:194
DWORD IdT
typedef for integer IDs
Definition: ddcStringEnum.h:37
uint32_t DWORD
Definition: utilit.h:105
IdKeyLess(const char *buf)
Definition: ddcStringEnum.h:149
const string & filename() const
returns filename of last open(), or NULL
Definition: ddcMMap.h:259
static const IdT NO_ID
constant returned by string-to-id search methods indicating no match was found
Definition: ddcStringEnum.h:143
iterator begin()
Definition: ddcMMap.h:300
void clear()
clear() method wraps close()
Definition: ddcStringEnum.h:67
#define ddcLogDebug(Msg)
Definition: ddcLog.h:112
ddcStringEnum()
default constructor
Definition: ddcStringEnum.h:53
iterator end()
Definition: ddcMMap.h:302
const char * operator[](size_t idx) const
indexing operator
Definition: ddcStringEnum.h:95