ddc
MorphAutomBuilder.h
Go to the documentation of this file.
1 //
2 // This file is part of DDC.
3 //
4 // DDC is free software: you can redistribute it and/or modify
5 // it under the terms of the GNU Lesser General Public License as published by
6 // the Free Software Foundation, either version 3 of the License, or
7 // (at your option) any later version.
8 //
9 // DDC is distributed in the hope that it will be useful,
10 // but WITHOUT ANY WARRANTY; without even the implied warranty of
11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 // GNU Lesser General Public License for more details.
13 //
14 // You should have received a copy of the GNU Lesser General Public License
15 // along with DDC. If not, see <http://www.gnu.org/licenses/>.
16 //
17 // ========== Dialing Lemmatizer (www.aot.ru),
18 // ========== Copyright by Alexey Sokirko (2004), Bryan Jurish (2011)
19 
20 
21 // The main function of this header is CMorphAutomatBuilder::AddStringDaciuk
22 // This function is an implementation of algorithm for constructing minimal,
23 // deterministic, acyclic FSM from unordered set of strings, which was described
24 // in "Jan Daciuk, Stoyan Mihov, Bruce Watson, and Richard Watson,
25 // Incremental Construction of Minimal Acyclic Finite State Automata,
26 // Computational Linguistics, 26(1), March 2000."
27 
28 #ifndef MorphAutomBuilder_h
29 #define MorphAutomBuilder_h
30 
31 #include "MorphAutomat.h"
32 
33 
34 
35 struct CTrieNodeBuild;
36 struct IsLessRegister: public less<CTrieNodeBuild*>
37 {
38  bool operator ()(const CTrieNodeBuild* pNodeNo1, const CTrieNodeBuild* pNodeNo2) const;
39 };
40 
41 typedef set<CTrieNodeBuild*, IsLessRegister> CTrieRegister;
42 
43 
44 
45 
47 {
48  bool m_bFinal;
51  CTrieRegister::iterator m_pRegister;
53  int m_NodeId;
56 
57 
58  void Initialize();
59  void AddChild(CTrieNodeBuild* Child, BYTE ChildNo);
60  void ModifyChild(CTrieNodeBuild* Child, BYTE ChildNo, bool bUpdateIncoming);
61  CTrieNodeBuild* GetNextNode(BYTE RelationChar) const;
62  void SetNodeIdNullRecursive ();
63  void UnregisterRecursive();
64 
65 
66  // debug function
67  bool CheckIncomingRelationsCountRecursive(map<const CTrieNodeBuild*, size_t>& Node2Incoming) const;
68  void GetIncomingRelationsCountRecursive(map<const CTrieNodeBuild*, size_t>& Node2Incoming) const;
69  bool CheckRegisterRecursive() const;
70  void SetFinal(bool bFinal);
71 };
72 
73 
74 
75 
77 {
78 
80 
81  // a hash by the letter of the first child and the number of children
83 
84  vector<CTrieNodeBuild*> m_Prefix;
85 
86  vector<CTrieNodeBuild*> m_DeletedNodes;
87 
88  void ClearBuildNodes();
89 
90  CTrieNodeBuild* CreateNode();
91  void DeleteNode(CTrieNodeBuild* pNode);
92  CTrieNodeBuild* CloneNode(const CTrieNodeBuild* pPrototype);
93 
94  void UpdateCommonPrefix(const string& WordForm);
95  CTrieNodeBuild* AddSuffix(CTrieNodeBuild* pParentNodeNo, const char* WordForm);
96  CTrieNodeBuild* ReplaceOrRegister(CTrieNodeBuild* pNode);
97  void DeleteFromRegister(CTrieNodeBuild* pNode);
98  int GetFirstConfluenceState() const;
99  void UnregisterNode(CTrieNodeBuild* pNode);
100 
101  CTrieRegister& GetRegister(const CTrieNodeBuild* pNode);
102 
103  // debug functions
104  bool CheckRegister() const;
105  bool IsValid() const;
106 
107 
108 public:
109  CMorphAutomatBuilder(MorphLanguageEnum Language, BYTE AnnotChar);
111 
112  void InitTrie();
113  bool AddStringDaciuk(const string& WordForm);
114  void ClearRegister();
115  void ConvertBuildRelationsToRelations();
116 };
117 
118 
119 
120 #endif
121 
122 /*--- emacs style variables ---
123  * Local Variables:
124  * mode: C++
125  * c-file-style: "ellemtel"
126  * c-basic-offset: 4
127  * tab-width: 8
128  * indent-tabs-mode: nil
129  * End:
130  */
Definition: MorphAutomBuilder.h:76
CTrieRegister::iterator m_pRegister
Definition: MorphAutomBuilder.h:51
const size_t MaxAlphabetSize
Definition: MorphAutomat.h:28
vector< CTrieNodeBuild * > m_Prefix
Definition: MorphAutomBuilder.h:84
set< CTrieNodeBuild *, IsLessRegister > CTrieRegister
Definition: MorphAutomBuilder.h:41
bool m_bFinal
Definition: MorphAutomBuilder.h:48
bool m_bRegistered
Definition: MorphAutomBuilder.h:52
int m_IncomingRelationsCount
Definition: MorphAutomBuilder.h:49
vector< CTrieNodeBuild * > m_DeletedNodes
Definition: MorphAutomBuilder.h:86
int m_NodeId
Definition: MorphAutomBuilder.h:53
BYTE m_FirstChildNo
Definition: MorphAutomBuilder.h:54
Definition: MorphAutomBuilder.h:36
unsigned char BYTE
Definition: utilit.h:94
bool operator()(const CTrieNodeBuild *pNodeNo1, const CTrieNodeBuild *pNodeNo2) const
Definition: MorphAutomBuilder.cpp:188
MorphLanguageEnum
Definition: utilit.h:162
Definition: MorphAutomat.h:139
BYTE m_SecondChildNo
Definition: MorphAutomBuilder.h:55
CTrieNodeBuild * m_pRoot
Definition: MorphAutomBuilder.h:79
Definition: MorphAutomBuilder.h:46