ddc
wizard.h
Go to the documentation of this file.
1 // DDC originally by Alexey Sokirko
2 // Changes and modifications 2011-2014 by Bryan Jurish
3 //
4 // This file is part of DDC.
5 //
6 // DDC is free software: you can redistribute it and/or modify
7 // it under the terms of the GNU Lesser General Public License as published by
8 // the Free Software Foundation, either version 3 of the License, or
9 // (at your option) any later version.
10 //
11 // DDC is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 // GNU Lesser General Public License for more details.
15 //
16 // You should have received a copy of the GNU Lesser General Public License
17 // along with DDC. If not, see <http://www.gnu.org/licenses/>.
18 //
19 #ifndef _INCL_WIZARD_H
20 #define _INCL_WIZARD_H
21 
22 #pragma warning(disable:4786)
23 #pragma warning(disable:4503)
24 #include "../CommonLib/utilit.h"
25 #include "../AgramtabLib/agramtab_.h"
26 #include "FormInfo.h"
27 #include "OperationMeter.h"
28 
29 //----------------------------------------------------------------------------
30 //----------------------------------------------------------------------------
31 
32 //----------------------------------------------------------------------------
33 const WORD UnknownSessionNo = 0xffff-1;
34 const WORD UnknownPrefixSetNo = 0xffff-1;
35 const BYTE UnknownAccent = 0xff; // не менять - уже проставлено в mrd
36 
37 // Nick [17/Dec/2003]
38 const WORD AnyParadigmNo = 0xffff;
39 const WORD AnyAccentModelNo = 0xffff;
40 const WORD AnySessionNo = 0xffff;
41 const WORD AnyPrefixSetNo = 0xffff;
42 const BYTE AnyAccent = 0xff-1;
43 extern const char* AnyCommonAncode;
44 
45 
46 //----------------------------------------------------------------------------
47 // CParadigmInfo is a special class, which is used only in Morphwizard
48 //----------------------------------------------------------------------------
49 struct CParadigmInfo : public CLemmaInfo
50 {
55 
56  CParadigmInfo();
57  CParadigmInfo(WORD ParadigmNo, WORD AccentModelNo, WORD SessionNo, BYTE AuxAccent, const char* CommonAncode, WORD PrefixSetNo);
58  virtual ~CParadigmInfo() {};
59  bool operator == (const CParadigmInfo& X) const;
60 
61  static CParadigmInfo AnyParadigmInfo(); // Nick [17/Dec/2003]
62  bool IsAnyEqual( const CParadigmInfo& X ) const; // Nick [17/Dec/2003]
63 };
64 
65 
66 typedef multimap<string, CParadigmInfo> LemmaMap;
67 typedef LemmaMap::iterator lemma_iterator_t;
68 typedef LemmaMap::const_iterator const_lemma_iterator_t;
69 
70 
71 
72 //----------------------------------------------------------------------------
74 {
76  string m_Suffix;
77 
78  // grammatical code of the lemma
80 
81  // common gramcode of the lemma
83 
84  string m_SourceLemma;
85  mutable size_t m_Frequence;
87 
88  bool operator < (const CPredictSuffix& X) const
89  {
90  if (m_FlexiaModelNo != X.m_FlexiaModelNo)
91  return m_FlexiaModelNo < X.m_FlexiaModelNo;
92 
93  if (m_SourceLemmaAncode != X.m_SourceLemmaAncode)
94  ;//m_SourceLemmaAncode < X.m_SourceLemmaAncode; //-- missing return? Fri, 14 Feb 2020 13:15:27 +0100 moocow
95 
96  return m_Suffix < X.m_Suffix;
97  };
98  bool operator == (const CPredictSuffix& X) const
99  {
100  return (m_FlexiaModelNo == X.m_FlexiaModelNo)
101  && (m_Suffix == X.m_Suffix)
102  && (m_SourceLemmaAncode == X.m_SourceLemmaAncode);
103  };
104 };
105 
106 //----------------------------------------------------------------------------
108 {
109  string m_UserName;
112 
113  bool operator == (const CMorphSession& X) const;
114  bool ReadFromString(const string& s);
115  string ToString() const;
116  void SetEmpty();
117  bool IsEmpty() const;
118 };
119 
120 //----------------------------------------------------------------------------
121 class MorphWizardMeter;
122 
125 
126 typedef set<CPredictSuffix> predict_container_t;
127 
129 {
132  string m_SlfStr;
133  string m_AuthorStr;
136 
137 
138  void SetEmpty();
139  bool ReadFromFile(FILE* fp, int& line_no, bool& bError, string& Errors);
140  bool SaveToFile(FILE* fp) const;
141  bool SaveHeaderToFile(FILE* fp) const;
142 };
143 
144 //----------------------------------------------------------------------------
146 {
147 
149 
153 
154 
156 
157 
159  size_t m_SessionNo;
160  vector<CMorphSession> m_Sessions;
161 
162  friend class MorphWizardMeter;
163  MorphWizardMeter* m_pMeter; // Nick 30.11.2003
164 
165  void load_gramtab();
166  void ReadSessions (FILE* fp);
167 public:
168  struct AncodeLess
169  {
171  void init(const CAgramtab* pGramTab);
172  bool operator()(const string &s1, const string &s2)const ;
173  };
175 
176  // a vector of all Paradigms
177  vector<CFlexiaModel> m_FlexiaModels;
178 
179  // all accent models
180  vector<CAccentModel> m_AccentModels;
181 
182 
183 
184  vector<set<string> > m_PrefixSets;
185 
186  // the multimap from lemma to paradigms (the most largest list)
188 
189 
190  // the keys from .mwz file (project file)
191  map<string, string> m_ProjectFileKeys;
192 
194  vector< predict_container_t::const_iterator> m_CurrentPredictedParadigms;
195 
196 
198 
200  vector<BYTE> m_PcreCharacterTables;
201 
202  bool m_bLoaded;
203 
207  string m_MrdPath;
209 
210  MorphoWizard();
211  ~MorphoWizard();
212 
213  //================= general: loading, saving, logging ======================
214  bool load_wizard(const char *path,const char *user_name, bool bCreatePrediction= true);
215  void load_mrd(bool guest, bool bCreatePrediction);
216  bool load_static(MorphLanguageEnum langua);
217  void load_string_vector(const string &name, StringVector &res);
218  string& get_value(const string &key);
219  void log(const string &messg);
220  void log(const string &lemm, const CFlexiaModel &p, bool is_added);
221  bool is_changed(){return m_bWasChanged;}
222  void save_mrd();
223  string get_lock_file_name() const;
224  string get_log_file_name() const;
225  string get_predict_src_file_path(int mode) const;
226  void MakeReadOnly();
227  void CreatePredictIndex();
228  void pack();
229  WORD GetCurrentSessionNo() const;
230  size_t del_dup_lemm();
231 
232  //=============== simple primitives for ancode and lemma iterator =========
233  string get_pos_string (const lemma_iterator_t it) const ;
234  string get_pos_string (const string &code) const;
235  string get_lemm_string (const_lemma_iterator_t it) const;
236  string get_lemm_string_with_accents (const_lemma_iterator_t it) const;
237  string get_base_string (const_lemma_iterator_t it) const;
238  string get_grammem_string (const string &code) const;
239  string get_grammem_string (lemma_iterator_t it) const;
240  QWORD get_all_lemma_grammems (const_lemma_iterator_t it) const;
241  string get_common_grammems_string(const_lemma_iterator_t it) const;
242  string get_prefix_set(const_lemma_iterator_t it) const;
243  string get_pos_string_and_grammems (const string &code) const;
244  const CMorphSession& get_session (int SessionNo) const;
245  bool IsGerman () const { return m_Language == morphGerman;};
246  const StringVector& get_poses(){return m_PosesList;}
247  const StringVector& get_grammems(){return m_GrammemsList;}
248  const StringVector& get_type_grammems(){return m_TypeGrammemsList;}
249 
250  // =========== find procedures ================
251  void find_lemm_by_grammem(const string &pos_and_grammems, vector<lemma_iterator_t> &res);
252  void find_lemm(string lemm, bool bCheckLemmaPrefix, vector<lemma_iterator_t> &res);
253  void find_lemm_by_user(string username, vector<lemma_iterator_t> &res);
254  void find_wordforms(string lemm, vector<lemma_iterator_t> &res);
255  void find_ancodes(const string &ancode, vector<lemma_iterator_t> &res);
256  void find_lemm_by_prdno(WORD no, vector<lemma_iterator_t> &res);
257  void find_lemm_by_accent_model(int no, vector<lemma_iterator_t> &res);
258 
259  void find_lemm_by_prd_info( const CParadigmInfo& info, vector<lemma_iterator_t> &res);
260 
261  //============ Main functions for editing dictionary ====================
262  // Mrd -> Slf (Converting from dictionary to text representation)
263  string mrd_to_slf(const string &lemm, const CFlexiaModel&p, WORD AccentModelNo, BYTE AuxAccent, int line_size) const;
264  // Slf -> Mrd (Converting from text to dictionary representation)
265  void slf_to_mrd(const string &s, string &lemm, CFlexiaModel& FlexiaModel, CAccentModel& AccentModel, BYTE& AuxAccent, int& line_no_err) const;
266  void check_paradigm(long line_no);
267  void remove_lemm(lemma_iterator_t it);
268  void predict_lemm(const string &lemm, const int preffer_suf_len, int minimal_frequence, bool bOnlyMainPartOfSpeeches);
269  string get_slf_string (lemma_iterator_t it, string &dict, string& Prefixes, int line_size = 79);
270  void get_wordforms(const_lemma_iterator_t it, StringVector& forms) const;
271  string create_slf_from_predicted(int PredictParadigmNo, string &dict, int line_size = 79) const;
272  CParadigmInfo add_lemma(const string &slf, string common_grammems, const string& prefixes, int& line_no_err, WORD SessionNo = UnknownSessionNo);
273  void set_to_delete_false();
274  void delete_checked_lemms();
275  void clear_predicted_paradigms();
276  bool change_prd_info(CParadigmInfo& I, const string& Lemma, WORD NewParadigmNo, WORD newAccentModelNo, bool keepOldAccents );
277  string show_differences_in_two_paradigms(WORD FlexiaModelNo1, WORD FlexiaModelNo2) const;
278 
279  bool slf2ancode(const string slf_line, string& gramcode) const;
280  bool check_common_grammems(string common_grammems) const;
281  bool check_prefixes(string prefixes) const;
282  bool attach_form_prefixes_to_bases();
283  bool prepare_for_RML();
284 
285  bool HasMeter() const { return !!m_pMeter; }
286  MorphWizardMeter* GetMeter() { return m_pMeter; }
287  bool HasUnknownAccents( lemma_iterator_t it ) const;
288  bool IsPartialAccented( lemma_iterator_t it ) const;
289  BYTE GetLemmaAccent( const_lemma_iterator_t it ) const;
290  bool ReadNextParadigmFromFile(FILE* fp, CDumpParadigm& P, int& line_no, bool& bError, string& Errors) const;
291  bool StartSession(string user_name);
292  void EndSession();
293  string GetUserName() const;
294  void StartLastSessionOfUser(string user_name);
295  WORD RegisterSession(const CMorphSession& S);
296 private:
297  BYTE _GetReverseVowelNo( const string& form, WORD accentModelNo, WORD formInd ) const;
298  void SetAccent(WORD AccentModelNo, BYTE AuxAccent, int FormNo, string& form) const;
299  string get_prefix_set_str(WORD PrefixSetNo) const;
300  void ReadOnePrefixSet(string PrefixSet, set<string>& Result) const;
301  void ReadPrefixSets (FILE* fp);
302  WORD AddPrefixSet(string PrefixSetStr);
303 
304 
305 
306 };
307 
308 //----------------------------------------------------------------------------
310 {
311 public:
312  MorphWizardMeter( MorphoWizard& wizard ) : m_pWizard(&wizard) { wizard.m_pMeter=this; }
313  virtual ~MorphWizardMeter() { m_pWizard->m_pMeter=NULL; }
314 private:
316 };
317 
318 
319 
320 
321 extern BYTE TransferReverseVowelNoToCharNo( const string& form, BYTE AccentCharNo, MorphLanguageEnum Language);
322 
323 #endif // _INCL_WIZARD_H
324 
325 /*--- emacs style variables ---
326  * Local Variables:
327  * mode: C++
328  * c-file-style: "ellemtel"
329  * c-basic-offset: 4
330  * tab-width: 8
331  * indent-tabs-mode: nil
332  * End:
333  */
MorphWizardMeter(MorphoWizard &wizard)
Definition: wizard.h:312
virtual ~CParadigmInfo()
Definition: wizard.h:58
StringVector m_PosesList
Definition: wizard.h:150
bool operator<(const CLemmaInfo &obj) const
Definition: FormInfo.h:110
set< CPredictSuffix > predict_container_t
Definition: wizard.h:126
bool IsAnyEqual(const CParadigmInfo &X) const
Definition: wizard.cpp:241
Definition: wizard.h:128
bool m_bLoaded
Definition: wizard.h:202
string m_LastSessionSave
Definition: wizard.h:111
const WORD AnyPrefixSetNo
Definition: wizard.h:41
map< string, string > m_ProjectFileKeys
Definition: wizard.h:191
string m_Suffix
Definition: wizard.h:76
LemmaMap m_LemmaToParadigm
Definition: wizard.h:187
uint64_t QWORD
Definition: utilit.h:107
MorphLanguageEnum m_Language
Definition: wizard.h:197
const WORD AnyAccentModelNo
Definition: wizard.h:39
multimap< string, CParadigmInfo > LemmaMap
Definition: wizard.h:66
bool is_changed()
Definition: wizard.h:221
StringVector m_Users
Definition: wizard.h:158
MorphoWizard * m_pWizard
Definition: wizard.h:315
size_t m_Frequence
Definition: wizard.h:85
LemmaMap::const_iterator const_lemma_iterator_t
Definition: wizard.h:68
bool operator==(const CParadigmInfo &X) const
Definition: wizard.cpp:222
Definition: agramtab_.h:39
const CAgramtab * m_pGramTab
Definition: wizard.h:204
string m_AuthorStr
Definition: wizard.h:133
vector< CFlexiaModel > m_FlexiaModels
Definition: wizard.h:177
vector< predict_container_t::const_iterator > m_CurrentPredictedParadigms
Definition: wizard.h:194
const int MinPredictSuffixLength
Definition: wizard.h:123
string m_PrefixSetStr
Definition: wizard.h:86
const StringVector & get_type_grammems()
Definition: wizard.h:248
uint16_t WORD
Definition: utilit.h:106
string m_PrefixesStr
Definition: wizard.h:131
StringVector m_GrammemsList
Definition: wizard.h:151
string m_SessionStart
Definition: wizard.h:110
bool m_bToDelete
Definition: wizard.h:54
int m_FirstSlfLineNo
Definition: wizard.h:135
Definition: wizard.h:107
const int MaxPredictSuffixLength
Definition: wizard.h:124
vector< BYTE > m_PcreCharacterTables
a table of character properties for regular expressions which depend on CConcIndexator::m_Language ...
Definition: wizard.h:200
Definition: utilit.h:166
Definition: FormInfo.h:48
Definition: FormInfo.h:69
string m_LanguageStr
Definition: wizard.h:206
bool m_bWasChanged
Definition: wizard.h:148
static CParadigmInfo AnyParadigmInfo()
Definition: wizard.cpp:234
vector< string > StringVector
Definition: utilit.h:146
const char * AnyCommonAncode
Definition: wizard.cpp:35
string m_MrdPath
Definition: wizard.h:207
MorphWizardMeter * m_pMeter
Definition: wizard.h:163
CMorphSession m_Session
Definition: wizard.h:134
const WORD AnySessionNo
Definition: wizard.h:40
const StringVector & get_grammems()
Definition: wizard.h:247
const WORD AnyParadigmNo
Definition: wizard.h:38
Definition: wizard.h:145
string m_SourceLemma
Definition: wizard.h:84
string m_CurrentNewLemma
Definition: wizard.h:193
unsigned char BYTE
Definition: utilit.h:94
BYTE m_AuxAccent
Definition: wizard.h:53
bool IsGerman() const
Definition: wizard.h:245
WORD m_PrefixSetNo
Definition: wizard.h:52
bool m_ReadOnly
Definition: wizard.h:205
vector< CAccentModel > m_AccentModels
Definition: wizard.h:180
string m_SourceCommonAncode
Definition: wizard.h:82
const BYTE AnyAccent
Definition: wizard.h:42
vector< set< string > > m_PrefixSets
Definition: wizard.h:184
size_t m_SessionNo
Definition: wizard.h:159
vector< CMorphSession > m_Sessions
Definition: wizard.h:160
WORD m_FlexiaModelNo
Definition: wizard.h:75
LemmaMap::iterator lemma_iterator_t
Definition: wizard.h:67
string m_SlfStr
Definition: wizard.h:132
MorphLanguageEnum
Definition: utilit.h:162
bool m_bFullTrace
Definition: wizard.h:208
const StringVector & get_poses()
Definition: wizard.h:246
string m_SourceLemmaAncode
Definition: wizard.h:79
Definition: wizard.h:168
Definition: FormInfo.h:89
WORD m_SessionNo
Definition: wizard.h:51
CParadigmInfo()
Definition: wizard.cpp:203
Definition: wizard.h:49
string m_UserName
Definition: wizard.h:109
const WORD UnknownSessionNo
Definition: wizard.h:33
Definition: wizard.h:309
string m_TypeGrammemsStr
Definition: wizard.h:130
bool HasMeter() const
Definition: wizard.h:285
void log(string s)
Definition: CreatePredictionBase.cpp:140
Definition: OperationMeter.h:134
const WORD UnknownPrefixSetNo
Definition: wizard.h:34
const CAgramtab * m_pGramTab
Definition: wizard.h:170
virtual ~MorphWizardMeter()
Definition: wizard.h:313
AncodeLess ancode_less
Definition: wizard.h:174
Definition: wizard.h:73
StringVector m_TypeGrammemsList
Definition: wizard.h:152
const BYTE UnknownAccent
Definition: wizard.h:35
MorphWizardMeter * GetMeter()
Definition: wizard.h:286
BYTE TransferReverseVowelNoToCharNo(const string &form, BYTE AccentCharNo, MorphLanguageEnum Language)
Definition: wizard.cpp:1463