ddc
MorphXmlToken.h
Go to the documentation of this file.
1 // DDC originally by Alexey Sokirko
2 // Changes and modifications 2011-2015 by Bryan Jurish
3 //
4 // This file is part of DDC.
5 //
6 // DDC is free software: you can redistribute it and/or modify
7 // it under the terms of the GNU Lesser General Public License as published by
8 // the Free Software Foundation, either version 3 of the License, or
9 // (at your option) any later version.
10 //
11 // DDC is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 // GNU Lesser General Public License for more details.
15 //
16 // You should have received a copy of the GNU Lesser General Public License
17 // along with DDC. If not, see <http://www.gnu.org/licenses/>.
18 //
19 #ifndef __MorphXMLtoken_h
20 #define __MorphXMLtoken_h
21 
22 #include "../CommonLib/utilit.h"
23 
24 const string XmlPunctType = "pun";
25 const string XmlWordType = "w";
28 {
30  string m_Lemma;
32  string m_GrammemsStr;
35  void GetAsSetOfProperties(vector<string>& Result) const
36  {
37  Result.clear();
38 
39  StringTokenizer tok2(m_GrammemsStr.c_str(), " \t,|; ");
40  while (tok2())
41  Result.push_back(tok2.val());
42 
43  sort(Result.begin(), Result.end());
44 
45  };
46 };
47 
49 struct CXmlToken
50 {
52  string m_Type;
54  string m_WordStr;
56  vector<CXmlMorphAnnot> m_Annots;
60  {
61  m_bLastInSentence = false;
62  };
63  ;
64 };
65 #endif
66 
67 /*--- emacs style variables ---
68  * Local Variables:
69  * mode: C++
70  * c-file-style: "ellemtel"
71  * c-basic-offset: 4
72  * tab-width: 8
73  * indent-tabs-mode: nil
74  * End:
75  */
vector< CXmlMorphAnnot > m_Annots
all morphological annotations
Definition: MorphXmlToken.h:56
CXmlMorphAnnot holds one morphological interpretation, which is read from xml under CConcIndexator::m...
Definition: MorphXmlToken.h:27
void GetAsSetOfProperties(vector< string > &Result) const
Definition: MorphXmlToken.h:35
bool m_bLastInSentence
true if the token is last in the sentence
Definition: MorphXmlToken.h:58
string m_Lemma
lemma
Definition: MorphXmlToken.h:30
const string XmlWordType
Definition: MorphXmlToken.h:25
Definition: ddcString.h:168
CXmlToken holds a word and all ist morphological interpretations (used under CConcIndexator::m_IndexT...
Definition: MorphXmlToken.h:49
const string XmlPunctType
Definition: MorphXmlToken.h:24
string m_WordStr
the string itself
Definition: MorphXmlToken.h:54
string m_Type
type of token (a word, a punctuation mark)
Definition: MorphXmlToken.h:52
string m_GrammemsStr
other morphological features
Definition: MorphXmlToken.h:32
CXmlToken()
Definition: MorphXmlToken.h:59