ddc
TermExpander.h
Go to the documentation of this file.
1 //-*- Mode: C++ -*-
2 //
3 // DDC originally by Alexey Sokirko
4 // Changes and modifications 2011-2018 by Bryan Jurish
5 //
6 // This file is part of DDC.
7 //
8 // DDC is free software: you can redistribute it and/or modify
9 // it under the terms of the GNU Lesser General Public License as published by
10 // the Free Software Foundation, either version 3 of the License, or
11 // (at your option) any later version.
12 //
13 // DDC is distributed in the hope that it will be useful,
14 // but WITHOUT ANY WARRANTY; without even the implied warranty of
15 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 // GNU Lesser General Public License for more details.
17 //
18 // You should have received a copy of the GNU Lesser General Public License
19 // along with DDC. If not, see <http://www.gnu.org/licenses/>.
20 //
21 
22 #ifndef DDC_TERM_EXPANDER_H
23 #define DDC_TERM_EXPANDER_H
24 
25 #include "../CommonLib/utilit.h"
26 #include "CCurl.h"
27 
28 //======================================================================
34 public:
36  string m_Class;
37 
39  string m_Label;
40 
42  string m_Param;
43 
44 public:
45  //------------------------------------------------------------
47 
48  CTermExpander(const string &cls, const string& label, const string& param)
50  : m_Class(cls), m_Label(label), m_Param(param)
51  {};
52 
54  CTermExpander(const string &label=string("NoLabel"), const string& param=string(""))
55  : m_Class("None"), m_Label(label), m_Param(param)
56  {};
57 
59  virtual CTermExpander* mapClone(map<CTermExpander*,CTermExpander*>& xlate) const;
60 
62  virtual CTermExpander* clone(map<CTermExpander*,CTermExpander*>& xlate) const
63  { return new CTermExpander(m_Class, m_Label, m_Param); };
64 
66  virtual ~CTermExpander(void) {};
67 
69  virtual void compile(void) {};
71 
72 public:
73  //------------------------------------------------------------
75 
76  virtual string configString(void) const;
78 
80  virtual void expand(const string& src, set<string>& dst);
81 
83  virtual void expand(const set<string>& src, set<string>& dst);
85 };
86 
87 //======================================================================
89 class TxId : public CTermExpander {
90 public:
91  TxId(const string &label=string("Id"), const string& param=string(""))
92  : CTermExpander("Id",label,param)
93  {};
94  virtual ~TxId(void) {};
95 
96  virtual CTermExpander* clone(map<CTermExpander*,CTermExpander*>& xlate) const
97  { return new TxId(m_Label,m_Param); };
98 
99  virtual void expand(const set<string>& src, set<string>& dst);
100 };
101 
102 //======================================================================
104 typedef vector<CTermExpander*> TxList;
105 
106 class TxChain : public CTermExpander {
107 public:
109  TxList m_Chain;
110 
111 public:
113  TxChain(const string &label=string("Chain"), const string& param=string(""))
114  : CTermExpander("Chain",label,param)
115  {};
116 
118  virtual CTermExpander* clone(map<CTermExpander*,CTermExpander*>& xlate) const;
119 
121  virtual ~TxChain(void) {};
122 
124  void append(TxList& txl);
125 
127  inline void append(TxChain& txc);
128 
130  void append(CTermExpander *tx);
131 
133  string chainString(const string& joinstr=string("|"));
134 
136  virtual void expand(const set<string>& src, set<string>& dst);
137 };
138 
139 //======================================================================
141 class TxLang : public CTermExpander {
142 public:
145 
146 public:
148  TxLang(const string& cls=string("Lang"), const string &label=string("Lang"), const string& param=string("Generic"));
149  virtual ~TxLang(void) {};
150 
151  virtual CTermExpander* clone(map<CTermExpander*,CTermExpander*>& xlate) const
152  { return (CTermExpander*)new TxLang(*this); };
153 
154  //virtual void expand(const set<string>& src, set<string>& dst);
155 
157  virtual void setLanguage(MorphLanguageEnum lang);
158 
162  virtual void setLanguage(const string &lang);
163 };
164 
165 //======================================================================
167 class TxMorph : public TxLang {
168 public:
170  TxMorph(const string &label=string("Morph"), const string& param=string(""))
171  : TxLang("Morph",label,param)
172  {};
173  virtual ~TxMorph(void) {};
174 
175  virtual CTermExpander* clone(map<CTermExpander*,CTermExpander*>& xlate) const
176  { return (CTermExpander*)new TxMorph(*this); };
177 
179  virtual void expand(const set<string>& src, set<string>& dst);
180 };
181 
182 //======================================================================
184 class TxToLower : public TxLang {
185 public:
187  TxToLower(const string &label=string("ToLower"), const string& param=string(""))
188  : TxLang("ToLower",label,param)
189  {};
190  virtual ~TxToLower(void) {};
191 
192  virtual CTermExpander* clone(map<CTermExpander*,CTermExpander*>& xlate) const
193  { return (CTermExpander*)new TxToLower(*this); };
194 
196  virtual void expand(const set<string>& src, set<string>& dst);
197 };
198 
199 //======================================================================
201 class TxToUpper : public TxLang {
202 public:
204  TxToUpper(const string &label=string("ToUpper"), const string& param=string(""))
205  : TxLang("ToUpper",label,param)
206  {};
207  virtual ~TxToUpper(void) {};
208 
209  virtual CTermExpander* clone(map<CTermExpander*,CTermExpander*>& xlate) const
210  { return (CTermExpander*)new TxToUpper(*this); };
211 
213  virtual void expand(const set<string>& src, set<string>& dst);
214 };
215 
216 //======================================================================
218 class TxCase : public TxLang {
219 public:
221  TxCase(const string &label=string("Case"), const string& param=string(""))
222  : TxLang("Case",label,param)
223  {};
224  virtual ~TxCase(void) {};
225 
226  virtual CTermExpander* clone(map<CTermExpander*,CTermExpander*>& xlate) const
227  { return (CTermExpander*)new TxCase(*this); };
228 
230  virtual void expand(const set<string>& src, set<string>& dst);
231 };
232 
233 
234 //======================================================================
236 class TxCurl : public CTermExpander {
237 public:
240 
241 public:
243  TxCurl(const string &cls=string("Curl"), const string &label=string("Curl"), const string& param=string(""))
244  : CTermExpander(cls,label,param)
245  {};
246 
248  virtual ~TxCurl(void) {};
249 
251  virtual CTermExpander* clone(map<CTermExpander*,CTermExpander*>& xlate) const;
252 
253  //--------------------------------------------------------------
254  //\name Subclass API
256 
259  virtual void compile(void);
260 
262  virtual bool perform(void)
263  { return m_CCurl.perform_cached(); };
265 
266  //--------------------------------------------------------------
267  // expansion API (nothing here)
268  //virtual void expand(const set<string>& src, set<string>& dst);
269 };
270 
271 
272 //======================================================================
275 class TxCab : public TxCurl {
276 public:
278  string m_UrlBase;
279 
281  int m_Debug;
282 
286 
287 public:
288  //------------------------------------------------------------
289  TxCab(const string &label=string("Cab"), const string& param=string(""), int Debug=0, int MapMode=0)
290  : TxCurl("Cab",label,param), m_Debug(Debug), m_MapMode(MapMode)
291  {};
292  virtual ~TxCab(void) {};
293 
294  virtual CTermExpander* clone(map<CTermExpander*,CTermExpander*>& xlate) const;
295 
302  virtual void compile(void);
303 
304  virtual void expand(const set<string>& src, set<string>& dst);
305 };
306 
307 //======================================================================
309 class TxCabMap : public TxCab {
310 public:
311  //------------------------------------------------------------
312  TxCabMap(const string &label=string("CabMap"), const string& param=string(""), int Debug=0, int MapMode=1)
313  : TxCab(label,param,Debug,MapMode)
314  {
315  m_Class = "CabMap";
316  };
317  virtual ~TxCabMap(void) {};
318 
319  virtual CTermExpander* clone(map<CTermExpander*,CTermExpander*>& xlate) const;
320 };
321 
322 
323 //======================================================================
326 public:
328  typedef map<string,CTermExpander*> TxMap;
329 
331  TxMap m_tx;
332 
334  list<string> m_txlist;
335 
336 public:
337  //--------------------------------------------------------------
338  //\name Constructors etc.
340  TxDispatcher(void)
342  {};
343 
345  TxDispatcher* clone() const;
346 
348  ~TxDispatcher(void);
349 
351  void clear(bool doDelete=true);
352 
357  void ensureDefaultExpanders(MorphLanguageEnum mlang, MorphLanguageEnum clang, bool doInfl=true, bool doCase=true);
358 
361  { ensureDefaultExpanders(lang,lang); };
363 
364 public:
365  //--------------------------------------------------------------
366  //\name I/O API
368  CTermExpander *addExpander(const string &spec);
374 
376  void compile(void);
377 
380  string configString(const string &prefix="") const;
382 
383 public:
384  //--------------------------------------------------------------
385  //\name Runtime API
387  void insert(const string &label, CTermExpander *tx);
389 
391  void insert(CTermExpander *tx);
392 
394  void remove(const string &label);
395 
397  CTermExpander *get(const string &label) const;
398 
400  TxChain getChain(const vector<string> &labels);
401 
403  TxChain getChain(const string& spec);
405 };
406 
407 
408 //======================================================================
409 // Utilities
410 
412 void parseTermExpanderSpec(const string &spec, string &txclass, string &txlabel, string &txparam);
413 
415 CTermExpander *newTermExpander(const string& class_="Id", const string& label=string(""), const string& param=string(""));
416 
417 #endif /* DDC_TERM_EXPANDER_H */
418 
419 /*--- emacs style variables ---
420  * Local Variables:
421  * mode: C++
422  * c-file-style: "ellemtel"
423  * c-basic-offset: 4
424  * tab-width: 8
425  * indent-tabs-mode: nil
426  * End:
427  */
virtual CTermExpander * clone(map< CTermExpander *, CTermExpander *> &xlate) const
clone constructor to create a new expansion object (override this for derived classes) */ ...
Definition: TermExpander.h:209
TxMorph: default backwards-compatible morphological expansion (morphy)
Definition: TermExpander.h:167
TxId: identity expander (null-op): params: none.
Definition: TermExpander.h:89
CTermExpander(const string &cls, const string &label, const string &param)
Full constructor (abstract base classes only)
Definition: TermExpander.h:49
CCurl: abstract term expander using libcurl.
Definition: CCurl.h:72
virtual ~CTermExpander(void)
Default destructor.
Definition: TermExpander.h:66
virtual CTermExpander * clone(map< CTermExpander *, CTermExpander *> &xlate) const
clone constructor to create a new expansion object (override this for derived classes) */ ...
Definition: TermExpander.h:175
virtual ~TxLang(void)
Definition: TermExpander.h:149
TxCase: letter-case (upper<->lower) expander.
Definition: TermExpander.h:218
TxChain(const string &label=string("Chain"), const string &param=string(""))
default constructor
Definition: TermExpander.h:113
virtual void compile(void)
set up object based on label and/or parameter string; default implementation does nothing ...
Definition: TermExpander.h:69
virtual ~TxChain(void)
Destructor: sub-expanders are NOT implicitly destroyed!
Definition: TermExpander.h:121
virtual string configString(void) const
String form of this expander. Default is m_Class + " " + m_Label + " " + m_Param. ...
Definition: TermExpander.cpp:42
TxCase(const string &label=string("Case"), const string &param=string(""))
param: language name, passed to GetLanguageByString()
Definition: TermExpander.h:221
virtual ~TxMorph(void)
Definition: TermExpander.h:173
TxToLower: convert input to all-lower-case.
Definition: TermExpander.h:184
virtual ~TxCase(void)
Definition: TermExpander.h:224
virtual ~TxCab(void)
Definition: TermExpander.h:292
TxToUpper(const string &label=string("ToUpper"), const string &param=string(""))
param: language name, passed to GetLanguageByString()
Definition: TermExpander.h:204
Definition: utilit.h:167
string m_Label
unique name for this expander
Definition: TermExpander.h:39
virtual ~TxToLower(void)
Definition: TermExpander.h:190
CTermExpander(const string &label=string("NoLabel"), const string &param=string(""))
Default constructor, to be overridden by subclasses.
Definition: TermExpander.h:54
void parseTermExpanderSpec(const string &spec, string &txclass, string &txlabel, string &txparam)
parse a term expander specification as a space-separated (" \t\n\r") list "LABEL [CLASS=LABEL [PARAM=...
Definition: TermExpander.cpp:565
vector< CTermExpander * > TxList
TxChain: serial chain of (multiple) expanders.
Definition: TermExpander.h:104
virtual CTermExpander * clone(map< CTermExpander *, CTermExpander *> &xlate) const
clone constructor to create a new expansion object (override this for derived classes) */ ...
Definition: TermExpander.h:62
Abstract API for term-expansion modules (e.g. thesauri, morphologies, equivalence maps...
Definition: TermExpander.h:33
virtual CTermExpander * clone(map< CTermExpander *, CTermExpander *> &xlate) const
clone constructor to create a new expansion object (override this for derived classes) */ ...
Definition: TermExpander.h:226
string m_Param
string argument(s) for this expander (parameters, for stringification)
Definition: TermExpander.h:42
virtual CTermExpander * clone(map< CTermExpander *, CTermExpander *> &xlate) const
clone constructor to create a new expansion object (override this for derived classes) */ ...
Definition: TermExpander.h:192
virtual ~TxToUpper(void)
Definition: TermExpander.h:207
TxCab: HTTP-based term expander using DTA::CAB HTTP protocol I/O format is DTA::CAB::Format::ExpandL...
Definition: TermExpander.h:275
virtual CTermExpander * clone(map< CTermExpander *, CTermExpander *> &xlate) const
clone constructor to create a new expansion object (override this for derived classes) */ ...
Definition: TermExpander.h:96
list< string > m_txlist
list of defined expander names in definition order (for config-string generation) ...
Definition: TermExpander.h:334
virtual bool perform(void)
Wrapper for m_CCurl.perform_cached()
Definition: TermExpander.h:262
TxLang: abstract base class for language-dependent term expanders.
Definition: TermExpander.h:141
int m_MapMode
Definition: TermExpander.h:285
TxToUpper: convert input to all-upper-case.
Definition: TermExpander.h:201
virtual CTermExpander * clone(map< CTermExpander *, CTermExpander *> &xlate) const
clone constructor to create a new expansion object (override this for derived classes) */ ...
Definition: TermExpander.h:151
map< string, CTermExpander * > TxMap
typedef for label->expander map
Definition: TermExpander.h:328
TxCab(const string &label=string("Cab"), const string &param=string(""), int Debug=0, int MapMode=0)
Definition: TermExpander.h:289
virtual ~TxCabMap(void)
Definition: TermExpander.h:317
virtual ~TxCurl(void)
Destructor frees m_Curl if non-NULL.
Definition: TermExpander.h:248
TxCurl: abstract term expander using libcurl.
Definition: TermExpander.h:236
TxMorph(const string &label=string("Morph"), const string &param=string(""))
param: language name, passed to GetLanguageByString()
Definition: TermExpander.h:170
TxCurl(const string &cls=string("Curl"), const string &label=string("Curl"), const string &param=string(""))
default constructor; no implicit compilation
Definition: TermExpander.h:243
TxList m_Chain
chain of sub-expanders; these must be explicitly freed
Definition: TermExpander.h:109
string m_UrlBase
base URL; newline-separated query argument(s) will be appended for each expansion query ...
Definition: TermExpander.h:278
TxCabMap: HTTP-based term expander using DTA::CAB HTTP protocol in "map-mode".
Definition: TermExpander.h:309
Definition: TermExpander.h:106
bool perform_cached(CCurlCache *cache=&ddcCurlCache)
Definition: CCurl.cpp:137
MorphLanguageEnum
Definition: utilit.h:162
void ensureDefaultExpanders(MorphLanguageEnum lang=morphGeneric)
wrapper for ensureDefaultExpanders(lang,lang)
Definition: TermExpander.h:360
string m_Class
class name for this expander
Definition: TermExpander.h:36
int m_Debug
debug?
Definition: TermExpander.h:281
CTermExpander * newTermExpander(const string &class_="Id", const string &label=string(""), const string &param=string(""))
create a new TermExpander from a parsed specification
Definition: TermExpander.cpp:587
virtual void expand(const string &src, set< string > &dst)
Expand a single term term to a set of terms dst.
Definition: TermExpander.cpp:47
virtual ~TxId(void)
Definition: TermExpander.h:94
virtual CTermExpander * mapClone(map< CTermExpander *, CTermExpander *> &xlate) const
clone constructor, handles sub-object cloning according to xlate map */
Definition: TermExpander.cpp:32
CCurl m_CCurl
CCurl object to use for expansion query.
Definition: TermExpander.h:239
TxId(const string &label=string("Id"), const string &param=string(""))
Definition: TermExpander.h:91
TxDispatcher: name-based expansion dispatcher.
Definition: TermExpander.h:325
TxMap m_tx
maps expander labels to expanders (label keys are case-sensitive!)
Definition: TermExpander.h:331
TxCabMap(const string &label=string("CabMap"), const string &param=string(""), int Debug=0, int MapMode=1)
Definition: TermExpander.h:312
MorphLanguageEnum m_Lang
DDC-language to use for expansion.
Definition: TermExpander.h:144
TxToLower(const string &label=string("ToLower"), const string &param=string(""))
param: language name, passed to GetLanguageByString()
Definition: TermExpander.h:187