00001 // ========== This file is under LGPL, the GNU Lesser General Public Licence 00002 // ========== Dialing Lemmatizer (www.aot.ru) 00003 // ========== Copyright by Alexey Sokirko 00004 00005 #ifndef MorphDictBuilder_h 00006 #define MorphDictBuilder_h 00007 00008 00009 #include "MorphDict.h" 00010 #include "MorphAutomBuilder.h" 00011 00012 #include "../MorphWizardLib/wizard.h" 00013 00014 00015 class CMorphDictBuilder : public CMorphDict 00016 { 00017 00018 00019 // m_ModelInfo[i][j] is a word which should be written into CTrieNodeBuild::m_Info 00020 // where i is the index of MorphoWizard::m_FlexiaModels 00021 // and j is a the index of CFlexiaModel::m_Flexia 00022 vector< vector <bool> > m_ModelInfo; 00023 vector< DwordVector > m_PrefixSets; 00024 00025 00026 void ClearRegister(); 00027 00028 00029 bool CheckFlexiaGramInfo(const MorphoWizard& Wizard) const; 00030 bool GeneratePrefixes(const MorphoWizard& Wizard); 00031 00032 // debug functions 00033 bool CheckRegister() const; 00034 00035 CMorphAutomatBuilder* GetFormBuilder() {return (CMorphAutomatBuilder*)m_pFormAutomat; }; 00036 00037 00038 00039 public: 00040 CMorphDictBuilder(MorphLanguageEnum Language); 00041 ~CMorphDictBuilder(); 00042 00043 bool CreateAutomat(const MorphoWizard& Wizard); 00044 bool GenerateLemmas(const MorphoWizard& Wizard); 00045 bool GenerateUnitedFlexModels(const MorphoWizard& Wizard); 00046 bool GenPredictIdx(const MorphoWizard& wizard, int PostfixLength, int MinFreq, string path); 00047 }; 00048 00049 00050 00051 00052 #endif