github.com/balzaczyy/golucene@v0.0.0-20151210033525-d0be9ee89713/core/util/automaton/minimizationOperations.go (about) 1 package automaton 2 3 import ( 4 "container/list" 5 // "fmt" 6 "github.com/balzaczyy/golucene/core/util" 7 "unicode" 8 ) 9 10 // util/automaton/MinimizationOperations.java 11 12 // Minimizes (and determinizes if not already deterministic) the 13 // given automaton 14 func minimize(a *Automaton) *Automaton { 15 return minimizeHopcroft(a) 16 } 17 18 // Minimizes the given automaton using Hopcroft's alforithm. 19 func minimizeHopcroft(a *Automaton) *Automaton { 20 if a.numStates() == 0 || !a.IsAccept(0) && a.numTransitions(0) == 0 { 21 // fastmatch for common case 22 return newEmptyAutomaton() 23 } 24 a = determinize(a) 25 if a.numTransitions(0) == 1 { 26 t := newTransition() 27 a.transition(0, 0, t) 28 if t.dest == 0 && t.min == MIN_CODE_POINT && 29 t.max == unicode.MaxRune { 30 // accepts all strings 31 return a 32 } 33 } 34 a = totalize(a) 35 36 // initialize data structure 37 sigma := a.startPoints() 38 sigmaLen, statesLen := len(sigma), a.numStates() 39 40 reverse := make([][][]int, statesLen) 41 for i, _ := range reverse { 42 reverse[i] = make([][]int, sigmaLen) 43 } 44 partition := make([]map[int]bool, statesLen) 45 splitblock := make([][]int, statesLen) 46 block := make([]int, statesLen) 47 active := make([][]*StateList, statesLen) 48 for i, _ := range active { 49 active[i] = make([]*StateList, sigmaLen) 50 } 51 active2 := make([][]*StateListNode, statesLen) 52 for i, _ := range active2 { 53 active2[i] = make([]*StateListNode, sigmaLen) 54 } 55 pending := list.New() 56 pending2 := util.NewOpenBitSet() // sigmaLen * statesLen bits 57 split := util.NewOpenBitSet() // statesLen bits 58 refine := util.NewOpenBitSet() // statesLen bits 59 refine2 := util.NewOpenBitSet() // statesLen bits 60 for q, _ := range splitblock { 61 partition[q] = make(map[int]bool) 62 for x, _ := range active[q] { 63 active[q][x] = new(StateList) 64 } 65 } 66 // find initial partition and reverse edges 67 for q := 0; q < statesLen; q++ { 68 j := or(a.IsAccept(q), 0, 1).(int) 69 partition[j][q] = true 70 block[q] = j 71 for x, v := range sigma { 72 n := a.step(q, v) 73 assert2(n >= 0 && n < len(reverse), "%v", n) 74 r := reverse[a.step(q, v)] 75 r[x] = append(r[x], q) 76 } 77 } 78 // initialize active sets 79 for j := 0; j <= 1; j++ { 80 for x := 0; x < sigmaLen; x++ { 81 for q, _ := range partition[j] { 82 if reverse[q][x] != nil { 83 active2[q][x] = active[j][x].add(q) 84 } 85 } 86 } 87 } 88 // initialize pending 89 for x := 0; x < sigmaLen; x++ { 90 j := or(active[0][x].size <= active[1][x].size, 0, 1).(int) 91 pending.PushBack(&IntPair{j, x}) 92 pending2.Set(int64(x*statesLen + j)) 93 } 94 // process pending until fixed point 95 k := 2 96 // fmt.Println("start min") 97 for pending.Len() > 0 { 98 // fmt.Println(" cycle pending") 99 ip := pending.Remove(pending.Front()).(*IntPair) 100 p, x := ip.n1, ip.n2 101 // fmt.Printf(" pop n1=%v n2=%v\n", ip.n1, ip.n2) 102 pending2.Clear(int64(x*statesLen + p)) 103 // find states that need to be split off their blocks 104 for m := active[p][x].first; m != nil; m = m.next { 105 if r := reverse[m.q][x]; r != nil { 106 for _, i := range r { 107 if !split.Get(int64(i)) { 108 split.Set(int64(i)) 109 j := block[i] 110 splitblock[j] = append(splitblock[j], i) 111 if !refine2.Get(int64(j)) { 112 refine2.Set(int64(j)) 113 refine.Set(int64(j)) 114 } 115 } 116 } 117 } 118 } 119 // refine blocks 120 for j := int(refine.NextSetBit(0)); j >= 0; j = int(refine.NextSetBit(int64(j) + 1)) { 121 sb := splitblock[j] 122 if len(sb) < len(partition[j]) { 123 b1, b2 := partition[j], partition[k] 124 for _, s := range sb { 125 delete(b1, s) 126 b2[s] = true 127 block[s] = k 128 for c, sn := range active2[s] { 129 if sn != nil && sn.sl == active[j][c] { 130 sn.remove() 131 active2[s][c] = active[k][c].add(s) 132 } 133 } 134 } 135 // update pending 136 for c, _ := range active[j] { 137 aj := active[j][c].size 138 ak := active[k][c].size 139 ofs := int64(c * statesLen) 140 if !pending2.Get(ofs+int64(j)) && 0 < aj && aj <= ak { 141 pending2.Set(ofs + int64(j)) 142 pending.PushBack(&IntPair{j, c}) 143 } else { 144 pending2.Set(ofs + int64(k)) 145 pending.PushBack(&IntPair{k, c}) 146 } 147 } 148 k++ 149 } 150 refine2.Clear(int64(j)) 151 for _, s := range sb { 152 split.Clear(int64(s)) 153 } 154 splitblock[j] = nil // clear sb 155 } 156 refine = util.NewOpenBitSet() // not quite efficient 157 } 158 159 ans := newEmptyAutomaton() 160 t := newTransition() 161 // fmt.Printf(" k=%v\n", k) 162 163 // make a new state for each equivalence class, set initial state 164 stateMap := make([]int, statesLen) 165 stateRep := make([]int, k) 166 167 ans.createState() 168 169 // fmt.Printf("min: k=%v\n", k) 170 for n := 0; n < k; n++ { 171 // fmt.Printf(" n=%v\n", n) 172 173 isInitial := false 174 for q, _ := range partition[n] { 175 if q == 0 { 176 isInitial = true 177 // fmt.Println(" isInitial!") 178 break 179 } 180 } 181 182 newState := 0 183 if !isInitial { 184 newState = ans.createState() 185 } 186 187 // fmt.Printf(" newState=%v\n", newState) 188 189 for q, _ := range partition[n] { 190 stateMap[q] = newState 191 // fmt.Printf(" q=%v isAccept?=%v\n", q, a.IsAccept(q)) 192 ans.setAccept(newState, a.IsAccept(q)) 193 stateRep[newState] = q // select representative 194 } 195 } 196 197 // build transitions and set acceptance 198 for n := 0; n < k; n++ { 199 numTransitions := a.initTransition(stateRep[n], t) 200 for i := 0; i < numTransitions; i++ { 201 a.nextTransition(t) 202 // fmt.Println(" add trans") 203 ans.addTransitionRange(n, stateMap[t.dest], t.min, t.max) 204 } 205 } 206 ans.finishState() 207 // fmt.Printf("%v states\n", ans.numStates()) 208 209 return removeDeadStates(ans) 210 } 211 212 func or(cond bool, v1, v2 interface{}) interface{} { 213 if cond { 214 return v1 215 } 216 return v2 217 } 218 219 type IntPair struct{ n1, n2 int } 220 221 type StateList struct { 222 size int 223 first, last *StateListNode 224 } 225 226 func (sl *StateList) add(q int) *StateListNode { 227 return newStateListNode(q, sl) 228 } 229 230 type StateListNode struct { 231 q int 232 next, prev *StateListNode 233 sl *StateList 234 } 235 236 func newStateListNode(q int, sl *StateList) *StateListNode { 237 ans := &StateListNode{q: q, sl: sl} 238 sl.size++ 239 if sl.size == 1 { 240 sl.first, sl.last = ans, ans 241 } else { 242 sl.last.next = ans 243 ans.prev = sl.last 244 sl.last = ans 245 } 246 return ans 247 } 248 249 func (node *StateListNode) remove() { 250 node.sl.size-- 251 if node.sl.first == node { 252 node.sl.first = node.next 253 } else { 254 node.prev.next = node.next 255 } 256 if node.sl.last == node { 257 node.sl.last = node.prev 258 } else { 259 node.next.prev = node.prev 260 } 261 }