34 #ifndef MOOT_TRIE_VECTOR_H 35 #define MOOT_TRIE_VECTOR_H 39 #undef MOOT_TRIE_VECTOR_DEBUG 41 #ifdef MOOT_TRIE_VECTOR_DEBUG 61 : mother(mother_index), mindtr(mindtr_index)
68 template <
typename DataT,
typename CharT =
char,
typename UCharT =
unsigned char>
81 size_t mindtr_index=0,
93 const DataT &node_data)
121 static const NodeId NoNode =
static_cast<size_t>(-1);
124 static const size_t NoMaxLen =
static_cast<size_t>(-1);
132 : trie_maxlen(maxlen),
133 trie_use_case(use_case)
153 template<
class DataT,
typename CharT =
char,
typename UCharT =
unsigned char>
156 public std::vector<TrieVectorNode<DataT,CharT,UCharT> >
175 typedef typename vector_type::iterator
iterator;
187 typedef std::map<string_type,NodeId>
map_type;
194 map_type trie_pending;
195 data_type trie_default_data;
208 trie_pending(map_type()),
209 trie_default_data(data_type())
216 inline void clear(
void)
218 vector_type::clear();
219 trie_pending.clear();
228 inline const size_t &maxlen(
void)
const 229 {
return trie_maxlen; };
232 inline size_t &maxlen(
void)
233 {
return trie_maxlen; };
236 inline bool compiled(
void)
const 237 {
return !trie_pending.empty(); };
240 inline void ensure_compiled(
void)
241 {
if (!compiled()) compile(); };
244 inline const DataT &default_data(
void)
const 245 {
return trie_default_data; };
248 inline DataT &default_data(
void)
249 {
return trie_default_data; };
257 inline string_type trie_canonicalize(string_type &s)
const 259 if (!trie_use_case) {
260 for (string_iterator si = s.begin(); si != s.end(); si++) {
268 inline void trie_key(
const string_type &s,
269 const size_t max_len,
273 dst.assign(s,0,max_len);
274 trie_canonicalize(dst);
278 inline string_type trie_key(
const string_type &s,
const size_t max_len)
282 trie_key(s,max_len,key);
287 inline string_type trie_key(
const string_type &s)
const 288 {
return trie_key(s,trie_maxlen); };
292 inline void trie_rkey(
const string_type &s,
293 const size_t max_len,
297 dst.assign(s.rbegin(), s.rbegin() + (max_len > s.size() ?
300 trie_canonicalize(dst);
304 inline string_type trie_rkey(
const string_type &s,
size_t max_len)
const 307 trie_rkey(s, max_len, key);
312 inline string_type trie_rkey(
const string_type &s)
const 313 {
return trie_rkey(s,trie_maxlen); };
320 inline void trie_insert(
const string_type &s,
size_t max_len)
321 { trie_pending[trie_key(s,max_len)] = 0; };
324 inline void trie_insert(
const string_type &s)
325 { trie_pending[trie_key(s,trie_maxlen)] = 0; };
328 inline void trie_rinsert(
const string_type &s,
size_t max_len)
329 { trie_pending[trie_rkey(s,max_len)] = 0; };
332 inline void trie_rinsert(
const string_type &s)
333 { trie_pending[trie_rkey(s,trie_maxlen)] = 0; };
349 inline iterator find_dtr(
const node_type &from, CharT label)
353 if (!trie_use_case) label = tolower(label);
354 for (dn=0, di=this->begin()+from.
mindtr; di != this->end() && dn < from.
ndtrs; di++, dn++) {
355 if (di->label == label)
return di;
361 inline const_iterator find_dtr(
const node_type &from, CharT label)
const 365 if (!trie_use_case) label = tolower(label);
366 for (dn=0, di=this->begin()+from.
mindtr; di != this->end() && dn < from.
ndtrs; di++, dn++) {
367 if (di->label == label)
return di;
373 inline NodeId find_dtr_id(
NodeId fromid, CharT label)
const 375 const_iterator di = find_dtr(*(this->begin()+fromid), label);
376 return (di==this->end() ? NoNode : (di-this->begin()));
389 inline iterator first_dtr(
const node_type &from)
390 {
return ( from.
ndtrs == 0 ? this->end() : (this->begin()+from.
mindtr) ); };
393 inline const_iterator first_dtr(
const node_type &from)
const 394 {
return ( from.
ndtrs == 0 ? this->end() : (this->begin()+from.
mindtr) ); };
405 inline iterator find_mother(
const node_type &to)
406 {
return (to.
mother == NoNode ? this->end() : (this->begin()+to.
mother)); };
409 inline const_iterator find_mother(
const node_type &to)
const 410 {
return (to.
mother == NoNode ? this->end() : (this->begin()+to.
mother)); };
414 {
return (this->begin()+toid)->mother; };
417 inline string_type node_rstring(
const node_type &node)
const 419 if (node.
mother == NoNode)
return string_type();
420 string_type s(1, node.
label);
422 for (mi=find_mother(node); mi != this->end() && mi->mother != NoNode; mi=find_mother(*mi)) {
423 s.push_back(mi->label);
429 inline string_type node_rstring(
NodeId nodeid)
const 430 {
return node_rstring(*(this->begin()+nodeid)); };
433 inline string_type node_string(
const node_type &node)
const 435 string_type s = node_rstring(node);
436 reverse(s.begin(),s.end());
441 inline string_type node_string(
NodeId nodeid)
const 442 {
return node_string(*(this->begin()+nodeid)); };
450 for (mi=find_mother(node); mi != this->end() && mi->mother != NoNode; mi=find_mother(*mi)) {
457 inline size_t node_depth(
NodeId nodeid)
const 458 {
return node_depth(*(this->begin()+nodeid)); };
466 inline void compile(
void)
468 vector_type::clear();
470 this->push_back(
node_type(NoNode,NoNode,0,0,trie_default_data));
479 for (pos=0, changed=
true; pos < trie_maxlen && changed; pos++) {
483 for (pi=trie_pending.begin(); pi != trie_pending.end(); pi++) {
484 const string_type &kstr = pi->first;
485 NodeId &knodid = pi->second;
486 if (kstr.size() <= pos)
continue;
489 dnodid = find_dtr_id(knodid,dlabel);
491 if (dnodid == NoNode) {
492 dnodid = vector_type::size();
501 node_type &mnode = this->operator[](knodid);
504 if (mnode.
mindtr == NoNode)
514 trie_pending.clear();
532 inline iterator find_longest(
const string_type &s,
533 size_t *matchlen=NULL)
535 const_string_iterator si;
536 iterator di, i = this->begin();
539 for (si = s.begin() , di = i , pos=0;
540 si != s.end() && pos < trie_maxlen;
541 si++ , i = di , pos++)
543 di = find_dtr(*di,*si);
544 if (di==this->end())
break;
546 if (matchlen) *matchlen = pos;
551 inline const_iterator find_longest(
const string_type &s,
552 size_t *matchlen=NULL)
555 const_string_iterator si;
556 const_iterator di, i = this->begin();
559 for (si = s.begin() , di = i , pos=0;
560 si != s.end() && pos < trie_maxlen;
561 si++ , i = di , pos++)
563 di = find_dtr(*di,*si);
564 if (di==this->end())
break;
566 if (matchlen) *matchlen = pos;
575 inline iterator rfind_longest(
const string_type &s,
576 size_t *matchlen=NULL)
578 const_reverse_string_iterator si;
579 iterator di, i = this->begin();
582 for (si = s.rbegin() , di = i , pos=0;
583 si != s.rend() && pos < trie_maxlen;
584 si++ , i = di , pos++)
586 di = find_dtr(*di,*si);
587 if (di==this->end())
break;
589 if (matchlen) *matchlen = pos;
594 inline const_iterator rfind_longest(
const string_type &s,
595 size_t *matchlen=NULL)
598 const_reverse_string_iterator si;
599 const_iterator di, i = this->begin();
602 for (si = s.rbegin() , di = i , pos=0;
603 si != s.rend() && pos < trie_maxlen;
604 si++ , i = di , pos++)
606 di = find_dtr(*di,*si);
607 if (di==this->end())
break;
609 if (matchlen) *matchlen = pos;
614 #ifdef MOOT_TRIE_VECTOR_DEBUG 621 void dump(FILE *out,
const CharT delim=0)
623 const_iterator i, mi;
624 for (i = this->begin(); i != this->end(); i++) {
625 string_type s = node_rstring(*i);
626 if (s.empty())
continue;
628 fwrite(s.data(),
sizeof(CharT), s.size(), out);
633 void bindump(FILE *out) {
634 for (const_iterator i=this->begin(); i != this->end(); i++) {
635 fwrite(&(*i),
sizeof(
node_type), 1, out);
640 void arcdump(FILE *out) {
641 for (const_iterator i=this->begin(); i != this->end(); i++) {
642 fprintf(out,
"node=%u\t mom=%u\t mindtr=%u\t label=%c\t ndtrs=%u\n",
643 i-this->begin(), i->mother, i->mindtr, i->label, i->ndtrs);
647 #endif //-- /MOOT_TRIE_VECTOR_DEBUG 652 #endif //-- MOOT_TRIE_VECTOR_H UCharT ndtrs
number of daughters
Definition: mootTrieVector.h:66
string_type::iterator string_iterator
Definition: mootTrieVector.h:171
Definition: mootAssocVector.h:39
CharT label
label of arc to this node
Definition: mootTrieVector.h:65
string_type::const_reverse_iterator const_reverse_string_iterator
Definition: mootTrieVector.h:174
Top-level trie class-template using an adjaceny table.
Definition: mootTrieVector.h:143
string_type::reverse_iterator reverse_string_iterator
Definition: mootTrieVector.h:173
safely includes autoheader preprocessor macros
Base class for TrieVector.
Definition: mootTrieVector.h:103
vector_type::iterator iterator
Definition: mootTrieVector.h:164
size_t NodeId
Definition: mootTrieVector.h:106
TrieVectorNodeBase(size_t mother_index=0, size_t mindtr_index=0)
Definition: mootTrieVector.h:49
LISP-style assoc list using vector<>: map-like class with small memory footprint. Useful for small as...
Definition: mootAssocVector.h:130
Definition: mootTrieVector.h:58
std::map< string_type, NodeId > map_type
Definition: mootTrieVector.h:176
vector_type::const_iterator const_iterator
Definition: mootTrieVector.h:165
std::string string_type
Definition: mootTrieVector.h:169
string_type::const_iterator const_string_iterator
Definition: mootTrieVector.h:172
Definition: mootTrieVector.h:44
UCharT uchar_type
Definition: mootTrieVector.h:62
unsigned char uchar_type
Definition: mootTrieVector.h:152
CharT char_type
Definition: mootTrieVector.h:61
char char_type
Definition: mootTrieVector.h:151
std::vector< node_type > vector_type
Definition: mootTrieVector.h:162
map_type::const_iterator const_map_iterator
Definition: mootTrieVector.h:178
DataT data_type
Definition: mootTrieVector.h:60
size_t mindtr
index of first arc from this node
Definition: mootTrieVector.h:47
map_type::iterator map_iterator
Definition: mootTrieVector.h:177
size_t mother
index of this node's mother
Definition: mootTrieVector.h:46
size_t node_depth(NodeId nodeid) const
Definition: mootTrieVector.h:446