00001 #ifndef _INCL_WIZARD_H
00002 #define _INCL_WIZARD_H
00003
00004 #pragma warning(disable:4786)
00005 #pragma warning(disable:4503)
00006 #include "../common/utilit.h"
00007 #include "../AgramtabLib/agramtab_.h"
00008 #include "FormInfo.h"
00009 #include "OperationMeter.h"
00010
00011
00012
00013
00014
00015 const WORD UnknownSessionNo = 0xffff-1;
00016 const WORD UnknownPrefixSetNo = 0xffff-1;
00017 const BYTE UnknownAccent = 0xff;
00018
00019
00020 const WORD AnyParadigmNo = 0xffff;
00021 const WORD AnyAccentModelNo = 0xffff;
00022 const WORD AnySessionNo = 0xffff;
00023 const WORD AnyPrefixSetNo = 0xffff;
00024 const BYTE AnyAccent = 0xff-1;
00025 extern const char* AnyCommonAncode;
00026
00027
00028
00029
00030
00031 struct CParadigmInfo : public CLemmaInfo
00032 {
00033 WORD m_SessionNo;
00034 WORD m_PrefixSetNo;
00035 BYTE m_AuxAccent;
00036 bool m_bToDelete;
00037
00038 CParadigmInfo();
00039 CParadigmInfo(WORD ParadigmNo, WORD AccentModelNo, WORD SessionNo, BYTE AuxAccent, const char* CommonAncode, WORD PrefixSetNo);
00040 bool operator == (const CParadigmInfo& X) const;
00041
00042 static CParadigmInfo AnyParadigmInfo();
00043 bool IsAnyEqual( const CParadigmInfo& X ) const;
00044 };
00045
00046
00047 typedef multimap<string, CParadigmInfo> LemmaMap;
00048 typedef LemmaMap::iterator lemma_iterator_t;
00049 typedef LemmaMap::const_iterator const_lemma_iterator_t;
00050
00051
00052
00053
00054 struct CPredictSuffix
00055 {
00056 WORD m_FlexiaModelNo;
00057 string m_Suffix;
00058
00059
00060 string m_SourceLemmaAncode;
00061
00062
00063 string m_SourceCommonAncode;
00064
00065 string m_SourceLemma;
00066 mutable size_t m_Frequence;
00067 string m_PrefixSetStr;
00068
00069 bool operator < (const CPredictSuffix& X) const
00070 {
00071 if (m_FlexiaModelNo != X.m_FlexiaModelNo)
00072 return m_FlexiaModelNo < X.m_FlexiaModelNo;
00073
00074 if (m_SourceLemmaAncode != X.m_SourceLemmaAncode)
00075 m_SourceLemmaAncode < X.m_SourceLemmaAncode;
00076
00077 return m_Suffix < X.m_Suffix;
00078 };
00079 bool operator == (const CPredictSuffix& X) const
00080 {
00081 return (m_FlexiaModelNo == X.m_FlexiaModelNo)
00082 && (m_Suffix == X.m_Suffix)
00083 && (m_SourceLemmaAncode == X.m_SourceLemmaAncode);
00084 };
00085 };
00086
00087
00088 struct CMorphSession
00089 {
00090 string m_UserName;
00091 string m_SessionStart;
00092 string m_LastSessionSave;
00093
00094 bool operator == (const CMorphSession& X) const;
00095 bool ReadFromString(const string& s);
00096 string ToString() const;
00097 void SetEmpty();
00098 bool IsEmpty() const;
00099 };
00100
00101
00102 class MorphWizardMeter;
00103
00104 const int MinPredictSuffixLength = 2;
00105 const int MaxPredictSuffixLength = 5;
00106
00107 typedef set<CPredictSuffix> predict_container_t;
00108
00109 struct CDumpParadigm
00110 {
00111 string m_TypeGrammemsStr;
00112 string m_PrefixesStr;
00113 string m_SlfStr;
00114 string m_AuthorStr;
00115 CMorphSession m_Session;
00116 int m_FirstSlfLineNo;
00117
00118
00119 void SetEmpty();
00120 bool ReadFromFile(FILE* fp, int& line_no, bool& bError, string& Errors);
00121 bool SaveToFile(FILE* fp) const;
00122 bool SaveHeaderToFile(FILE* fp) const;
00123 };
00124
00125
00126 class MorphoWizard
00127 {
00128
00129 bool m_bWasChanged;
00130
00131 StringVector m_PosesList;
00132 StringVector m_GrammemsList;
00133 StringVector m_TypeGrammemsList;
00134
00135
00136 predict_container_t m_PredictIndex[MaxPredictSuffixLength-MinPredictSuffixLength+1];
00137
00138
00139 StringVector m_Users;
00140 size_t m_SessionNo;
00141 vector<CMorphSession> m_Sessions;
00142
00143 friend class MorphWizardMeter;
00144 MorphWizardMeter* m_pMeter;
00145
00146 void load_gramtab();
00147 void ReadSessions (FILE* fp);
00148 public:
00149 struct AncodeLess
00150 {
00151 const CAgramtab* m_pGramTab;
00152 void init(const CAgramtab* pGramTab);
00153 bool operator()(const string &s1, const string &s2)const ;
00154 };
00155 AncodeLess ancode_less;
00156
00157
00158 vector<CFlexiaModel> m_FlexiaModels;
00159
00160
00161 vector<CAccentModel> m_AccentModels;
00162
00163
00164
00165 vector<set<string> > m_PrefixSets;
00166
00167
00168 LemmaMap m_LemmaToParadigm;
00169
00170
00171
00172 map<string, string> m_ProjectFileKeys;
00173
00174 string m_CurrentNewLemma;
00175 vector< predict_container_t::const_iterator> m_CurrentPredictedParadigms;
00176
00177
00178 MorphLanguageEnum m_Language;
00179
00181 vector<BYTE> m_PcreCharacterTables;
00182
00183 bool m_bLoaded;
00184
00185 const CAgramtab* m_pGramTab;
00186 bool m_ReadOnly;
00187 string m_LanguageStr;
00188 string m_MrdPath;
00189 bool m_bFullTrace;
00190
00191 MorphoWizard();
00192 ~MorphoWizard();
00193
00194
00195 bool load_wizard(const char *path,const char *user_name, bool bCreatePrediction= true);
00196 void load_mrd(bool guest, bool bCreatePrediction);
00197 bool load_static(MorphLanguageEnum langua);
00198 void load_string_vector(const string &name, StringVector &res);
00199 string& get_value(const string &key);
00200 void log(const string &messg);
00201 void log(const string &lemm, const CFlexiaModel &p, bool is_added);
00202 bool is_changed(){return m_bWasChanged;}
00203 void save_mrd();
00204 string get_lock_file_name() const;
00205 string get_log_file_name() const;
00206 string get_predict_src_file_path(int mode) const;
00207 void MakeReadOnly();
00208 void CreatePredictIndex();
00209 void pack();
00210 WORD GetCurrentSessionNo() const;
00211 size_t del_dup_lemm();
00212
00213
00214 string get_pos_string (const lemma_iterator_t it) const ;
00215 string get_pos_string (const string &code) const;
00216 string get_lemm_string (const_lemma_iterator_t it) const;
00217 string get_lemm_string_with_accents (const_lemma_iterator_t it) const;
00218 string get_base_string (const_lemma_iterator_t it) const;
00219 string get_grammem_string (const string &code) const;
00220 string get_grammem_string (lemma_iterator_t it) const;
00221 QWORD get_all_lemma_grammems (const_lemma_iterator_t it) const;
00222 string get_common_grammems_string(const_lemma_iterator_t it) const;
00223 string get_prefix_set(const_lemma_iterator_t it) const;
00224 string get_pos_string_and_grammems (const string &code) const;
00225 const CMorphSession& get_session (int SessionNo) const;
00226 bool IsGerman () const { return m_Language == morphGerman;};
00227 const StringVector& get_poses(){return m_PosesList;}
00228 const StringVector& get_grammems(){return m_GrammemsList;}
00229 const StringVector& get_type_grammems(){return m_TypeGrammemsList;}
00230
00231
00232 void find_lemm_by_grammem(const string &pos_and_grammems, vector<lemma_iterator_t> &res);
00233 void find_lemm(string lemm, bool bCheckLemmaPrefix, vector<lemma_iterator_t> &res);
00234 void find_lemm_by_user(string username, vector<lemma_iterator_t> &res);
00235 void find_wordforms(string lemm, vector<lemma_iterator_t> &res);
00236 void find_ancodes(const string &ancode, vector<lemma_iterator_t> &res);
00237 void find_lemm_by_prdno(WORD no, vector<lemma_iterator_t> &res);
00238 void find_lemm_by_accent_model(int no, vector<lemma_iterator_t> &res);
00239
00240 void find_lemm_by_prd_info( const CParadigmInfo& info, vector<lemma_iterator_t> &res);
00241
00242
00243
00244 string mrd_to_slf(const string &lemm, const CFlexiaModel&p, WORD AccentModelNo, BYTE AuxAccent, int line_size) const;
00245
00246 void slf_to_mrd(const string &s, string &lemm, CFlexiaModel& FlexiaModel, CAccentModel& AccentModel, BYTE& AuxAccent, int& line_no_err) const;
00247 void check_paradigm(long line_no);
00248 void remove_lemm(lemma_iterator_t it);
00249 void predict_lemm(const string &lemm, const int preffer_suf_len, int minimal_frequence, bool bOnlyMainPartOfSpeeches);
00250 string get_slf_string (lemma_iterator_t it, string &dict, string& Prefixes, int line_size = 79);
00251 void get_wordforms(const_lemma_iterator_t it, StringVector& forms) const;
00252 string create_slf_from_predicted(int PredictParadigmNo, string &dict, int line_size = 79) const;
00253 CParadigmInfo add_lemma(const string &slf, string common_grammems, const string& prefixes, int& line_no_err, WORD SessionNo = UnknownSessionNo);
00254 void set_to_delete_false();
00255 void delete_checked_lemms();
00256 void clear_predicted_paradigms();
00257 bool change_prd_info(CParadigmInfo& I, const string& Lemma, WORD NewParadigmNo, WORD newAccentModelNo, bool keepOldAccents );
00258 string show_differences_in_two_paradigms(WORD FlexiaModelNo1, WORD FlexiaModelNo2) const;
00259
00260 bool slf2ancode(const string slf_line, string& gramcode) const;
00261 bool check_common_grammems(string common_grammems) const;
00262 bool check_prefixes(string prefixes) const;
00263 bool attach_form_prefixes_to_bases();
00264 bool prepare_for_RML();
00265
00266 bool HasMeter() const { return !!m_pMeter; }
00267 MorphWizardMeter* GetMeter() { return m_pMeter; }
00268 bool HasUnknownAccents( lemma_iterator_t it ) const;
00269 bool IsPartialAccented( lemma_iterator_t it ) const;
00270 BYTE GetLemmaAccent( const_lemma_iterator_t it ) const;
00271 bool ReadNextParadigmFromFile(FILE* fp, CDumpParadigm& P, int& line_no, bool& bError, string& Errors) const;
00272 bool StartSession(string user_name);
00273 void EndSession();
00274 string GetUserName() const;
00275 void StartLastSessionOfUser(string user_name);
00276 WORD RegisterSession(const CMorphSession& S);
00277 private:
00278 BYTE _GetReverseVowelNo( const string& form, WORD accentModelNo, WORD formInd ) const;
00279 void SetAccent(WORD AccentModelNo, BYTE AuxAccent, int FormNo, string& form) const;
00280 string get_prefix_set_str(WORD PrefixSetNo) const;
00281 void ReadOnePrefixSet(string PrefixSet, set<string>& Result) const;
00282 void ReadPrefixSets (FILE* fp);
00283 WORD AddPrefixSet(string PrefixSetStr);
00284
00285
00286
00287 };
00288
00289
00290 class MorphWizardMeter: public CFileMeterRML
00291 {
00292 public:
00293 MorphWizardMeter( MorphoWizard& wizard ) : m_pWizard(&wizard) { wizard.m_pMeter=this; }
00294 virtual ~MorphWizardMeter() { m_pWizard->m_pMeter=NULL; }
00295 private:
00296 MorphoWizard* m_pWizard;
00297 };
00298
00299
00300
00301
00302 extern BYTE TransferReverseVowelNoToCharNo( const string& form, BYTE AccentCharNo, MorphLanguageEnum Language);
00303
00304 #endif // _INCL_WIZARD_H