github.com/balzaczyy/golucene@v0.0.0-20151210033525-d0be9ee89713/core/util/automaton/minimizationOperations.go (about)

     1  package automaton
     2  
     3  import (
     4  	"container/list"
     5  	// "fmt"
     6  	"github.com/balzaczyy/golucene/core/util"
     7  	"unicode"
     8  )
     9  
    10  // util/automaton/MinimizationOperations.java
    11  
    12  // Minimizes (and determinizes if not already deterministic) the
    13  // given automaton
    14  func minimize(a *Automaton) *Automaton {
    15  	return minimizeHopcroft(a)
    16  }
    17  
    18  // Minimizes the given automaton using Hopcroft's alforithm.
    19  func minimizeHopcroft(a *Automaton) *Automaton {
    20  	if a.numStates() == 0 || !a.IsAccept(0) && a.numTransitions(0) == 0 {
    21  		// fastmatch for common case
    22  		return newEmptyAutomaton()
    23  	}
    24  	a = determinize(a)
    25  	if a.numTransitions(0) == 1 {
    26  		t := newTransition()
    27  		a.transition(0, 0, t)
    28  		if t.dest == 0 && t.min == MIN_CODE_POINT &&
    29  			t.max == unicode.MaxRune {
    30  			// accepts all strings
    31  			return a
    32  		}
    33  	}
    34  	a = totalize(a)
    35  
    36  	// initialize data structure
    37  	sigma := a.startPoints()
    38  	sigmaLen, statesLen := len(sigma), a.numStates()
    39  
    40  	reverse := make([][][]int, statesLen)
    41  	for i, _ := range reverse {
    42  		reverse[i] = make([][]int, sigmaLen)
    43  	}
    44  	partition := make([]map[int]bool, statesLen)
    45  	splitblock := make([][]int, statesLen)
    46  	block := make([]int, statesLen)
    47  	active := make([][]*StateList, statesLen)
    48  	for i, _ := range active {
    49  		active[i] = make([]*StateList, sigmaLen)
    50  	}
    51  	active2 := make([][]*StateListNode, statesLen)
    52  	for i, _ := range active2 {
    53  		active2[i] = make([]*StateListNode, sigmaLen)
    54  	}
    55  	pending := list.New()
    56  	pending2 := util.NewOpenBitSet() // sigmaLen * statesLen bits
    57  	split := util.NewOpenBitSet()    // statesLen bits
    58  	refine := util.NewOpenBitSet()   // statesLen bits
    59  	refine2 := util.NewOpenBitSet()  // statesLen bits
    60  	for q, _ := range splitblock {
    61  		partition[q] = make(map[int]bool)
    62  		for x, _ := range active[q] {
    63  			active[q][x] = new(StateList)
    64  		}
    65  	}
    66  	// find initial partition and reverse edges
    67  	for q := 0; q < statesLen; q++ {
    68  		j := or(a.IsAccept(q), 0, 1).(int)
    69  		partition[j][q] = true
    70  		block[q] = j
    71  		for x, v := range sigma {
    72  			n := a.step(q, v)
    73  			assert2(n >= 0 && n < len(reverse), "%v", n)
    74  			r := reverse[a.step(q, v)]
    75  			r[x] = append(r[x], q)
    76  		}
    77  	}
    78  	// initialize active sets
    79  	for j := 0; j <= 1; j++ {
    80  		for x := 0; x < sigmaLen; x++ {
    81  			for q, _ := range partition[j] {
    82  				if reverse[q][x] != nil {
    83  					active2[q][x] = active[j][x].add(q)
    84  				}
    85  			}
    86  		}
    87  	}
    88  	// initialize pending
    89  	for x := 0; x < sigmaLen; x++ {
    90  		j := or(active[0][x].size <= active[1][x].size, 0, 1).(int)
    91  		pending.PushBack(&IntPair{j, x})
    92  		pending2.Set(int64(x*statesLen + j))
    93  	}
    94  	// process pending until fixed point
    95  	k := 2
    96  	// fmt.Println("start min")
    97  	for pending.Len() > 0 {
    98  		// fmt.Println("  cycle pending")
    99  		ip := pending.Remove(pending.Front()).(*IntPair)
   100  		p, x := ip.n1, ip.n2
   101  		// fmt.Printf("    pop n1=%v n2=%v\n", ip.n1, ip.n2)
   102  		pending2.Clear(int64(x*statesLen + p))
   103  		// find states that need to be split off their blocks
   104  		for m := active[p][x].first; m != nil; m = m.next {
   105  			if r := reverse[m.q][x]; r != nil {
   106  				for _, i := range r {
   107  					if !split.Get(int64(i)) {
   108  						split.Set(int64(i))
   109  						j := block[i]
   110  						splitblock[j] = append(splitblock[j], i)
   111  						if !refine2.Get(int64(j)) {
   112  							refine2.Set(int64(j))
   113  							refine.Set(int64(j))
   114  						}
   115  					}
   116  				}
   117  			}
   118  		}
   119  		// refine blocks
   120  		for j := int(refine.NextSetBit(0)); j >= 0; j = int(refine.NextSetBit(int64(j) + 1)) {
   121  			sb := splitblock[j]
   122  			if len(sb) < len(partition[j]) {
   123  				b1, b2 := partition[j], partition[k]
   124  				for _, s := range sb {
   125  					delete(b1, s)
   126  					b2[s] = true
   127  					block[s] = k
   128  					for c, sn := range active2[s] {
   129  						if sn != nil && sn.sl == active[j][c] {
   130  							sn.remove()
   131  							active2[s][c] = active[k][c].add(s)
   132  						}
   133  					}
   134  				}
   135  				// update pending
   136  				for c, _ := range active[j] {
   137  					aj := active[j][c].size
   138  					ak := active[k][c].size
   139  					ofs := int64(c * statesLen)
   140  					if !pending2.Get(ofs+int64(j)) && 0 < aj && aj <= ak {
   141  						pending2.Set(ofs + int64(j))
   142  						pending.PushBack(&IntPair{j, c})
   143  					} else {
   144  						pending2.Set(ofs + int64(k))
   145  						pending.PushBack(&IntPair{k, c})
   146  					}
   147  				}
   148  				k++
   149  			}
   150  			refine2.Clear(int64(j))
   151  			for _, s := range sb {
   152  				split.Clear(int64(s))
   153  			}
   154  			splitblock[j] = nil // clear sb
   155  		}
   156  		refine = util.NewOpenBitSet() // not quite efficient
   157  	}
   158  
   159  	ans := newEmptyAutomaton()
   160  	t := newTransition()
   161  	// fmt.Printf("  k=%v\n", k)
   162  
   163  	// make a new state for each equivalence class, set initial state
   164  	stateMap := make([]int, statesLen)
   165  	stateRep := make([]int, k)
   166  
   167  	ans.createState()
   168  
   169  	// fmt.Printf("min: k=%v\n", k)
   170  	for n := 0; n < k; n++ {
   171  		// fmt.Printf("    n=%v\n", n)
   172  
   173  		isInitial := false
   174  		for q, _ := range partition[n] {
   175  			if q == 0 {
   176  				isInitial = true
   177  				// fmt.Println("    isInitial!")
   178  				break
   179  			}
   180  		}
   181  
   182  		newState := 0
   183  		if !isInitial {
   184  			newState = ans.createState()
   185  		}
   186  
   187  		// fmt.Printf("  newState=%v\n", newState)
   188  
   189  		for q, _ := range partition[n] {
   190  			stateMap[q] = newState
   191  			// fmt.Printf("      q=%v isAccept?=%v\n", q, a.IsAccept(q))
   192  			ans.setAccept(newState, a.IsAccept(q))
   193  			stateRep[newState] = q // select representative
   194  		}
   195  	}
   196  
   197  	// build transitions and set acceptance
   198  	for n := 0; n < k; n++ {
   199  		numTransitions := a.initTransition(stateRep[n], t)
   200  		for i := 0; i < numTransitions; i++ {
   201  			a.nextTransition(t)
   202  			// fmt.Println("  add trans")
   203  			ans.addTransitionRange(n, stateMap[t.dest], t.min, t.max)
   204  		}
   205  	}
   206  	ans.finishState()
   207  	// fmt.Printf("%v states\n", ans.numStates())
   208  
   209  	return removeDeadStates(ans)
   210  }
   211  
   212  func or(cond bool, v1, v2 interface{}) interface{} {
   213  	if cond {
   214  		return v1
   215  	}
   216  	return v2
   217  }
   218  
   219  type IntPair struct{ n1, n2 int }
   220  
   221  type StateList struct {
   222  	size        int
   223  	first, last *StateListNode
   224  }
   225  
   226  func (sl *StateList) add(q int) *StateListNode {
   227  	return newStateListNode(q, sl)
   228  }
   229  
   230  type StateListNode struct {
   231  	q          int
   232  	next, prev *StateListNode
   233  	sl         *StateList
   234  }
   235  
   236  func newStateListNode(q int, sl *StateList) *StateListNode {
   237  	ans := &StateListNode{q: q, sl: sl}
   238  	sl.size++
   239  	if sl.size == 1 {
   240  		sl.first, sl.last = ans, ans
   241  	} else {
   242  		sl.last.next = ans
   243  		ans.prev = sl.last
   244  		sl.last = ans
   245  	}
   246  	return ans
   247  }
   248  
   249  func (node *StateListNode) remove() {
   250  	node.sl.size--
   251  	if node.sl.first == node {
   252  		node.sl.first = node.next
   253  	} else {
   254  		node.prev.next = node.next
   255  	}
   256  	if node.sl.last == node {
   257  		node.sl.last = node.prev
   258  	} else {
   259  		node.next.prev = node.prev
   260  	}
   261  }