github.com/balzaczyy/golucene@v0.0.0-20151210033525-d0be9ee89713/core/util/automaton/automaton.go (about)

     1  package automaton
     2  
     3  import (
     4  	"fmt"
     5  	"github.com/balzaczyy/golucene/core/util"
     6  	"sort"
     7  	"unicode"
     8  )
     9  
    10  // util/automaton/Automaton.java
    11  
    12  /*
    13  Represents an automaton and all its states and transitions. States
    14  are integers and must be created using {@link #createState}.  Mark a
    15  state as an accept state using {@link #setAccept}.  Add transitions
    16  using {@link #addTransition}.  Each state must have all of its
    17  transitions added at once; if this is too restrictive then use
    18  {@link Automaton.Builder} instead.  State 0 is always the
    19  initial state.  Once a state is finished, either
    20  because you've starting adding transitions to another state or you
    21  call {@link #finishState}, then that states transitions are sorted
    22  (first by min, then max, then dest) and reduced (transitions with
    23  adjacent labels going to the same dest are combined).
    24  */
    25  type Automaton struct {
    26  	curState      int
    27  	states        []int // 2x
    28  	transitions   []int // 3x
    29  	isAccept      *util.OpenBitSet
    30  	deterministic bool
    31  }
    32  
    33  func newEmptyAutomaton() *Automaton {
    34  	return &Automaton{
    35  		deterministic: true,
    36  		curState:      -1,
    37  		isAccept:      util.NewOpenBitSet(),
    38  	}
    39  }
    40  
    41  func (a *Automaton) String() string {
    42  	return fmt.Sprintf("{curState=%v,states=%v,transitions=%v,isAccept=%v,%v}",
    43  		a.curState, a.states, a.transitions, a.isAccept, a.deterministic)
    44  }
    45  
    46  /* Create a new state. */
    47  func (a *Automaton) createState() int {
    48  	state := len(a.states) / 2
    49  	a.states = append(a.states, -1, 0)
    50  	return state
    51  }
    52  
    53  /* Set or clear this state as an accept state. */
    54  func (a *Automaton) setAccept(state int, accept bool) {
    55  	assert2(state < a.numStates(), "state=%v is out of bounds (numStates=%v)", state, a.numStates())
    56  	if accept {
    57  		a.isAccept.Set(int64(state))
    58  	} else {
    59  		a.isAccept.Clear(int64(state))
    60  	}
    61  }
    62  
    63  /*
    64  Sugar to get all transitions for all states. This is object-heavy;
    65  it's better to iterate state by state instead.
    66  */
    67  func (a *Automaton) sortedTransitions() [][]*Transition {
    68  	numStates := a.numStates()
    69  	transitions := make([][]*Transition, numStates)
    70  	for s := 0; s < numStates; s++ {
    71  		numTransitions := a.numTransitions(s)
    72  		transitions[s] = make([]*Transition, numTransitions)
    73  		for t := 0; t < numTransitions; t++ {
    74  			transition := newTransition()
    75  			a.transition(s, t, transition)
    76  			transitions[s][t] = transition
    77  		}
    78  	}
    79  	return transitions
    80  }
    81  
    82  /* Returns true if this state is an accept state. */
    83  func (a *Automaton) IsAccept(state int) bool {
    84  	return a.isAccept.Get(int64(state))
    85  }
    86  
    87  /* Add a new transition with min = max = label. */
    88  func (a *Automaton) addTransition(source, dest, label int) {
    89  	a.addTransitionRange(source, dest, label, label)
    90  }
    91  
    92  /* Add a new transition with the specified source, dest, min, max. */
    93  func (a *Automaton) addTransitionRange(source, dest, min, max int) {
    94  	assert(len(a.transitions)%3 == 0)
    95  	assert2(source < a.numStates(), "source=%v is out of bounds (maxState is %v)", source, a.numStates()-1)
    96  	assert2(dest < a.numStates(), "dest=%v is out of bounds (maxState is %v)", dest, a.numStates()-1)
    97  
    98  	if a.curState != source {
    99  		if a.curState != -1 {
   100  			a.finishCurrentState()
   101  		}
   102  
   103  		// move to next source:
   104  		a.curState = source
   105  		assert2(a.states[2*a.curState] == -1, "from state (%v) already had transitions added", source)
   106  		assert(a.states[2*a.curState+1] == 0)
   107  		a.states[2*a.curState] = len(a.transitions)
   108  	}
   109  
   110  	a.transitions = append(a.transitions, dest, min, max)
   111  
   112  	// increment transition count for this state
   113  	a.states[2*a.curState+1]++
   114  }
   115  
   116  /*
   117  Add a [virtual] epsilon transition between source and dest. Dest
   118  state must already have all transitions added because this method
   119  simply copies those same transitions over to source.
   120  */
   121  func (a *Automaton) addEpsilon(source, dest int) {
   122  	t := newTransition()
   123  	count := a.initTransition(dest, t)
   124  	for i := 0; i < count; i++ {
   125  		a.nextTransition(t)
   126  		a.addTransitionRange(source, t.dest, t.min, t.max)
   127  	}
   128  	if a.IsAccept(dest) {
   129  		a.setAccept(source, true)
   130  	}
   131  }
   132  
   133  /*
   134  Copies over all state/transition from other. The state numbers are
   135  sequentially assigned (appended).
   136  */
   137  func (a *Automaton) copy(other *Automaton) {
   138  	// bulk copy and then fixup the state pointers
   139  	stateOffset := a.numStates()
   140  	a.states = append(a.states, other.states...)
   141  	for i := 0; i < len(other.states); i += 2 {
   142  		if a.states[stateOffset*2+i] != -1 {
   143  			a.states[stateOffset*2+i] += len(a.transitions)
   144  		}
   145  	}
   146  	otherAcceptState := other.isAccept
   147  	for state := otherAcceptState.NextSetBit(0); state != -1; state = otherAcceptState.NextSetBit(state + 1) {
   148  		a.setAccept(stateOffset+int(state), true)
   149  	}
   150  
   151  	// bulk copy and then fixup dest for each transition
   152  	transOffset := len(a.transitions)
   153  	a.transitions = append(a.transitions, other.transitions...)
   154  	for i := 0; i < len(other.transitions); i += 3 {
   155  		a.transitions[transOffset+i] += stateOffset
   156  	}
   157  
   158  	if !other.deterministic {
   159  		a.deterministic = false
   160  	}
   161  }
   162  
   163  /* Freezes the last state, sorting and reducing the transitions. */
   164  func (a *Automaton) finishCurrentState() {
   165  	numTransitions := a.states[2*a.curState+1]
   166  	assert(numTransitions > 0)
   167  
   168  	offset := a.states[2*a.curState]
   169  	start := offset / 3
   170  	util.NewInPlaceMergeSorter(destMinMaxSorter(a.transitions)).Sort(start, start+numTransitions)
   171  
   172  	// reduce any "adjacent" transitions:
   173  	upto, min, max, dest := 0, -1, -1, -1
   174  
   175  	for i := 0; i < numTransitions; i++ {
   176  		tDest := a.transitions[offset+3*i]
   177  		tMin := a.transitions[offset+3*i+1]
   178  		tMax := a.transitions[offset+3*i+2]
   179  
   180  		if dest == tDest {
   181  			if tMin <= max+1 {
   182  				if tMax > max {
   183  					max = tMax
   184  				}
   185  			} else {
   186  				if dest != -1 {
   187  					a.transitions[offset+3*upto] = dest
   188  					a.transitions[offset+3*upto+1] = min
   189  					a.transitions[offset+3*upto+2] = max
   190  					upto++
   191  				}
   192  				min, max = tMin, tMax
   193  			}
   194  		} else {
   195  			if dest != -1 {
   196  				a.transitions[offset+3*upto] = dest
   197  				a.transitions[offset+3*upto+1] = min
   198  				a.transitions[offset+3*upto+2] = max
   199  				upto++
   200  			}
   201  			dest, min, max = tDest, tMin, tMax
   202  		}
   203  	}
   204  
   205  	if dest != -1 {
   206  		// last transition
   207  		a.transitions[offset+3*upto] = dest
   208  		a.transitions[offset+3*upto+1] = min
   209  		a.transitions[offset+3*upto+2] = max
   210  		upto++
   211  	}
   212  
   213  	a.transitions = a.transitions[:len(a.transitions)-(numTransitions-upto)*3]
   214  	a.states[2*a.curState+1] = upto
   215  
   216  	// sort transitions by min/max/dest:
   217  	util.NewInPlaceMergeSorter(minMaxDestSorter(a.transitions)).Sort(start, start+upto)
   218  
   219  	if a.deterministic && upto > 1 {
   220  		lastMax := a.transitions[offset+2]
   221  		for i := 1; i < upto; i++ {
   222  			min = a.transitions[offset+3*i+1]
   223  			if min <= lastMax {
   224  				a.deterministic = false
   225  				break
   226  			}
   227  			lastMax = a.transitions[offset+3*i+2]
   228  		}
   229  	}
   230  }
   231  
   232  /*
   233  Finishes the current state; call this once you are done adding
   234  transitions for a state. This is automatically called if you start
   235  adding transitions to a new source state, but for the last state you
   236  add, you need to call this method yourself.
   237  */
   238  func (a *Automaton) finishState() {
   239  	if a.curState != -1 {
   240  		a.finishCurrentState()
   241  		a.curState = -1
   242  	}
   243  }
   244  
   245  /* How many states this automaton has. */
   246  func (a *Automaton) numStates() int {
   247  	return len(a.states) / 2
   248  }
   249  
   250  /* How many transitions this state has. */
   251  func (a *Automaton) numTransitions(state int) int {
   252  	if count := a.states[2*state+1]; count != -1 {
   253  		return count
   254  	}
   255  	return 0
   256  }
   257  
   258  type destMinMaxSorter []int
   259  
   260  func (s destMinMaxSorter) Len() int {
   261  	panic("niy")
   262  }
   263  
   264  func (s destMinMaxSorter) Swap(i, j int) {
   265  	iStart, jStart := 3*i, 3*j
   266  	for n := 0; n < 3; n++ {
   267  		s[iStart+n], s[jStart+n] = s[jStart+n], s[iStart+n]
   268  	}
   269  }
   270  
   271  func (s destMinMaxSorter) Less(i, j int) bool {
   272  	iStart := 3 * i
   273  	jStart := 3 * j
   274  
   275  	// first dest:
   276  	iDest := s[iStart]
   277  	jDest := s[jStart]
   278  	if iDest < jDest {
   279  		return true
   280  	} else if iDest > jDest {
   281  		return false
   282  	}
   283  
   284  	// then min:
   285  	iMin := s[iStart+1]
   286  	jMin := s[jStart+1]
   287  	if iMin < jMin {
   288  		return true
   289  	} else if iMin > jMin {
   290  		return false
   291  	}
   292  
   293  	// then max:
   294  	iMax := s[iStart+2]
   295  	jMax := s[jStart+2]
   296  	return iMax < jMax
   297  }
   298  
   299  type minMaxDestSorter []int
   300  
   301  func (s minMaxDestSorter) Len() int {
   302  	panic("niy")
   303  }
   304  
   305  func (s minMaxDestSorter) Swap(i, j int) {
   306  	iStart, jStart := 3*i, 3*j
   307  	for n := 0; n < 3; n++ {
   308  		s[iStart+n], s[jStart+n] = s[jStart+n], s[iStart+n]
   309  	}
   310  }
   311  
   312  func (s minMaxDestSorter) Less(i, j int) bool {
   313  	iStart, jStart := 3*i, 3*j
   314  
   315  	iMin, jMin := s[iStart+1], s[jStart+1]
   316  	if iMin < jMin {
   317  		return true
   318  	} else if iMin > jMin {
   319  		return false
   320  	}
   321  
   322  	iMax, jMax := s[iStart+2], s[jStart+2]
   323  	if iMax < jMax {
   324  		return true
   325  	} else if iMax > jMax {
   326  		return false
   327  	}
   328  
   329  	iDest, jDest := s[iStart], s[jStart]
   330  	return iDest < jDest
   331  }
   332  
   333  /*
   334  Initialize the provided Transition to iterate through all transitions
   335  leaving the specified state. You must call nextTransition() to get
   336  each transition. Returns the number of transitions leaving this tate.
   337  */
   338  func (a *Automaton) initTransition(state int, t *Transition) int {
   339  	assert2(state < a.numStates(), "state=%v nextState=%v", state, a.numStates())
   340  	t.source = state
   341  	t.transitionUpto = a.states[2*state]
   342  	return a.numTransitions(state)
   343  }
   344  
   345  /* Iterate to the next transition after the provided one */
   346  func (a *Automaton) nextTransition(t *Transition) {
   347  	// make sure there is still a transition left
   348  	assert((t.transitionUpto + 3 - a.states[2*t.source]) <= 3*a.states[2*t.source+1])
   349  	t.dest = a.transitions[t.transitionUpto]
   350  	t.min = a.transitions[t.transitionUpto+1]
   351  	t.max = a.transitions[t.transitionUpto+2]
   352  	t.transitionUpto += 3
   353  }
   354  
   355  /*
   356  Fill the provided Transition with the index'th transition leaving the
   357  specified state.
   358  */
   359  func (a *Automaton) transition(state, index int, t *Transition) {
   360  	i := a.states[2*state] + 3*index
   361  	t.source = state
   362  	t.dest = a.transitions[i]
   363  	t.min = a.transitions[i+1]
   364  	t.max = a.transitions[i+2]
   365  }
   366  
   367  // L563
   368  /* Returns sorted array of all interval start points. */
   369  func (a *Automaton) startPoints() []int {
   370  	pointset := make(map[int]bool)
   371  	pointset[MIN_CODE_POINT] = true
   372  	// fmt.Println("getStartPoints")
   373  	for s := 0; s < len(a.states); s += 2 {
   374  		trans := a.states[s]
   375  		limit := trans + 3*a.states[s+1]
   376  		// fmt.Printf("  state=%v trans=%v limit=%v\n", s/2, trans, limit)
   377  		for trans < limit {
   378  			min, max := a.transitions[trans+1], a.transitions[trans+2]
   379  			// fmt.Printf("    min=%v\n", min)
   380  			pointset[min] = true
   381  			if max < unicode.MaxRune {
   382  				pointset[max+1] = true
   383  			}
   384  			trans += 3
   385  		}
   386  	}
   387  	var points []int
   388  	for m, _ := range pointset {
   389  		points = append(points, m)
   390  	}
   391  	sort.Ints(points)
   392  	return points
   393  }
   394  
   395  /* Performs lookup in transitions, assuming determinism. */
   396  func (a *Automaton) step(state, label int) int {
   397  	assert(state >= 0)
   398  	assert(label >= 0)
   399  	if 2*state >= len(a.states) {
   400  		return -1 // invalid state
   401  	}
   402  	trans := a.states[2*state]
   403  	limit := trans + 3*a.states[2*state+1]
   404  	// TODO binary search
   405  	for trans < limit {
   406  		dest, min, max := a.transitions[trans], a.transitions[trans+1], a.transitions[trans+2]
   407  		if min <= label && label <= max {
   408  			return dest
   409  		}
   410  		trans += 3
   411  	}
   412  	return -1
   413  }
   414  
   415  // Go doesn't have unicode.MinRune which should be 0
   416  const MIN_CODE_POINT = 0
   417  
   418  type AutomatonBuilder struct {
   419  	transitions []int
   420  	a           *Automaton
   421  }
   422  
   423  func newAutomatonBuilder() *AutomatonBuilder {
   424  	return &AutomatonBuilder{
   425  		a: newEmptyAutomaton(),
   426  	}
   427  }
   428  
   429  func (b *AutomatonBuilder) addTransitionRange(source, dest, min, max int) {
   430  	b.transitions = append(b.transitions, source, dest, min, max)
   431  }
   432  
   433  type srcMinMaxDestSorter []int
   434  
   435  func (s srcMinMaxDestSorter) Len() int {
   436  	panic("niy")
   437  }
   438  
   439  func (s srcMinMaxDestSorter) Swap(i, j int) {
   440  	iStart, jStart := 4*i, 4*j
   441  	for n := 0; n < 4; n++ {
   442  		s[iStart+n], s[jStart+n] = s[jStart+n], s[iStart+n]
   443  	}
   444  }
   445  
   446  func (s srcMinMaxDestSorter) Less(i, j int) bool {
   447  	iStart, jStart := 4*i, 4*j
   448  
   449  	iSrc, jSrc := s[iStart], s[jStart]
   450  	if iSrc < jSrc {
   451  		return true
   452  	} else if iSrc > jSrc {
   453  		return false
   454  	}
   455  
   456  	iMin, jMin := s[iStart+2], s[jStart+2]
   457  	if iMin < jMin {
   458  		return true
   459  	} else if iMin > jMin {
   460  		return false
   461  	}
   462  
   463  	iMax, jMax := s[iStart+3], s[jStart+3]
   464  	if iMax < jMax {
   465  		return true
   466  	} else if iMax > jMax {
   467  		return false
   468  	}
   469  
   470  	iDest, jDest := s[iStart+1], s[jStart+1]
   471  	return iDest < jDest
   472  }
   473  
   474  /* Compiles all added states and transitions into a new Automaton and returns it. */
   475  func (b *AutomatonBuilder) finish() *Automaton {
   476  	// fmt.Printf("LA.Builder.finish: count=%v\n", len(b.transitions)/4)
   477  	// fmt.Println("finish pending")
   478  	util.NewInPlaceMergeSorter(srcMinMaxDestSorter(b.transitions)).Sort(0, len(b.transitions)/4)
   479  	for upto := 0; upto < len(b.transitions); upto += 4 {
   480  		b.a.addTransitionRange(
   481  			b.transitions[upto],
   482  			b.transitions[upto+1],
   483  			b.transitions[upto+2],
   484  			b.transitions[upto+3],
   485  		)
   486  	}
   487  
   488  	b.a.finishState()
   489  	return b.a
   490  }
   491  
   492  func (b *AutomatonBuilder) createState() int {
   493  	return b.a.createState()
   494  }
   495  
   496  func (b *AutomatonBuilder) setAccept(state int, accept bool) {
   497  	b.a.setAccept(state, accept)
   498  }
   499  
   500  func (b *AutomatonBuilder) isAccept(state int) bool {
   501  	return b.a.IsAccept(state)
   502  }
   503  
   504  func (b *AutomatonBuilder) copy(other *Automaton) {
   505  	offset := b.a.numStates()
   506  	otherNumStates := other.numStates()
   507  	for s := 0; s < otherNumStates; s++ {
   508  		newState := b.createState()
   509  		b.setAccept(newState, other.IsAccept(s))
   510  	}
   511  	t := newTransition()
   512  	for s := 0; s < otherNumStates; s++ {
   513  		count := other.initTransition(s, t)
   514  		for i := 0; i < count; i++ {
   515  			other.nextTransition(t)
   516  			b.addTransitionRange(offset+s, offset+t.dest, t.min, t.max)
   517  		}
   518  	}
   519  }