gopkg.in/alecthomas/gometalinter.v3@v3.0.0/_linters/src/github.com/mibk/dupl/suffixtree/suffixtree.go (about)

     1  package suffixtree
     2  
     3  import (
     4  	"bytes"
     5  	"fmt"
     6  	"math"
     7  	"strings"
     8  )
     9  
    10  const infinity = math.MaxInt32
    11  
    12  // Pos denotes position in data slice.
    13  type Pos int32
    14  
    15  type Token interface {
    16  	Val() int
    17  }
    18  
    19  // STree is a struct representing a suffix tree.
    20  type STree struct {
    21  	data     []Token
    22  	root     *state
    23  	auxState *state // auxiliary state
    24  
    25  	// active point
    26  	s          *state
    27  	start, end Pos
    28  }
    29  
    30  // New creates new suffix tree.
    31  func New() *STree {
    32  	t := new(STree)
    33  	t.data = make([]Token, 0, 50)
    34  	t.root = newState(t)
    35  	t.auxState = newState(t)
    36  	t.root.linkState = t.auxState
    37  	t.s = t.root
    38  	return t
    39  }
    40  
    41  // Update refreshes the suffix tree to by new data.
    42  func (t *STree) Update(data ...Token) {
    43  	t.data = append(t.data, data...)
    44  	for _ = range data {
    45  		t.update()
    46  		t.s, t.start = t.canonize(t.s, t.start, t.end)
    47  		t.end++
    48  	}
    49  }
    50  
    51  // update transforms suffix tree T(n) to T(n+1).
    52  func (t *STree) update() {
    53  	oldr := t.root
    54  
    55  	// (s, (start, end)) is the canonical reference pair for the active point
    56  	s := t.s
    57  	start, end := t.start, t.end
    58  	var r *state
    59  	for {
    60  		var endPoint bool
    61  		r, endPoint = t.testAndSplit(s, start, end-1)
    62  		if endPoint {
    63  			break
    64  		}
    65  		r.fork(end)
    66  		if oldr != t.root {
    67  			oldr.linkState = r
    68  		}
    69  		oldr = r
    70  		s, start = t.canonize(s.linkState, start, end-1)
    71  	}
    72  	if oldr != t.root {
    73  		oldr.linkState = r
    74  	}
    75  
    76  	// update active point
    77  	t.s = s
    78  	t.start = start
    79  }
    80  
    81  // testAndSplit tests whether a state with canonical ref. pair
    82  // (s, (start, end)) is the end point, that is, a state that have
    83  // a c-transition. If not, then state (exs, (start, end)) is made
    84  // explicit (if not already so).
    85  func (t *STree) testAndSplit(s *state, start, end Pos) (exs *state, endPoint bool) {
    86  	c := t.data[t.end]
    87  	if start <= end {
    88  		tr := s.findTran(t.data[start])
    89  		splitPoint := tr.start + end - start + 1
    90  		if t.data[splitPoint].Val() == c.Val() {
    91  			return s, true
    92  		}
    93  		// make the (s, (start, end)) state explicit
    94  		newSt := newState(s.tree)
    95  		newSt.addTran(splitPoint, tr.end, tr.state)
    96  		tr.end = splitPoint - 1
    97  		tr.state = newSt
    98  		return newSt, false
    99  	}
   100  	if s == t.auxState || s.findTran(c) != nil {
   101  		return s, true
   102  	}
   103  	return s, false
   104  }
   105  
   106  // canonize returns updated state and start position for ref. pair
   107  // (s, (start, end)) of state r so the new ref. pair is canonical,
   108  // that is, referenced from the closest explicit ancestor of r.
   109  func (t *STree) canonize(s *state, start, end Pos) (*state, Pos) {
   110  	if s == t.auxState {
   111  		s, start = t.root, start+1
   112  	}
   113  	if start > end {
   114  		return s, start
   115  	}
   116  
   117  	var tr *tran
   118  	for {
   119  		if start <= end {
   120  			tr = s.findTran(t.data[start])
   121  			if tr == nil {
   122  				panic(fmt.Sprintf("there should be some transition for '%d' at %d",
   123  					t.data[start].Val(), start))
   124  			}
   125  		}
   126  		if tr.end-tr.start > end-start {
   127  			break
   128  		}
   129  		start += tr.end - tr.start + 1
   130  		s = tr.state
   131  	}
   132  	if s == nil {
   133  		panic("there should always be some suffix link resolution")
   134  	}
   135  	return s, start
   136  }
   137  
   138  func (t *STree) At(p Pos) Token {
   139  	if p < 0 || p >= Pos(len(t.data)) {
   140  		panic("position out of bounds")
   141  	}
   142  	return t.data[p]
   143  }
   144  
   145  func (t *STree) String() string {
   146  	buf := new(bytes.Buffer)
   147  	printState(buf, t.root, 0)
   148  	return buf.String()
   149  }
   150  
   151  func printState(buf *bytes.Buffer, s *state, ident int) {
   152  	for _, tr := range s.trans {
   153  		fmt.Fprint(buf, strings.Repeat("  ", ident))
   154  		fmt.Fprintf(buf, "* (%d, %d)\n", tr.start, tr.ActEnd())
   155  		printState(buf, tr.state, ident+1)
   156  	}
   157  }
   158  
   159  // state is an explicit state of the suffix tree.
   160  type state struct {
   161  	tree      *STree
   162  	trans     []*tran
   163  	linkState *state
   164  }
   165  
   166  func newState(t *STree) *state {
   167  	return &state{
   168  		tree:      t,
   169  		trans:     make([]*tran, 0),
   170  		linkState: nil,
   171  	}
   172  }
   173  
   174  func (s *state) addTran(start, end Pos, r *state) {
   175  	s.trans = append(s.trans, newTran(start, end, r))
   176  }
   177  
   178  // fork creates a new branch from the state s.
   179  func (s *state) fork(i Pos) *state {
   180  	r := newState(s.tree)
   181  	s.addTran(i, infinity, r)
   182  	return r
   183  }
   184  
   185  // findTran finds c-transition.
   186  func (s *state) findTran(c Token) *tran {
   187  	for _, tran := range s.trans {
   188  		if s.tree.data[tran.start].Val() == c.Val() {
   189  			return tran
   190  		}
   191  	}
   192  	return nil
   193  }
   194  
   195  // tran represents a state's transition.
   196  type tran struct {
   197  	start, end Pos
   198  	state      *state
   199  }
   200  
   201  func newTran(start, end Pos, s *state) *tran {
   202  	return &tran{start, end, s}
   203  }
   204  
   205  func (t *tran) len() int {
   206  	return int(t.end - t.start + 1)
   207  }
   208  
   209  // ActEnd returns actual end position as consistent with
   210  // the actual length of the data in the STree.
   211  func (t *tran) ActEnd() Pos {
   212  	if t.end == infinity {
   213  		return Pos(len(t.state.tree.data)) - 1
   214  	}
   215  	return t.end
   216  }