gopkg.in/alecthomas/gometalinter.v3@v3.0.0/_linters/src/github.com/mibk/dupl/suffixtree/suffixtree.go (about) 1 package suffixtree 2 3 import ( 4 "bytes" 5 "fmt" 6 "math" 7 "strings" 8 ) 9 10 const infinity = math.MaxInt32 11 12 // Pos denotes position in data slice. 13 type Pos int32 14 15 type Token interface { 16 Val() int 17 } 18 19 // STree is a struct representing a suffix tree. 20 type STree struct { 21 data []Token 22 root *state 23 auxState *state // auxiliary state 24 25 // active point 26 s *state 27 start, end Pos 28 } 29 30 // New creates new suffix tree. 31 func New() *STree { 32 t := new(STree) 33 t.data = make([]Token, 0, 50) 34 t.root = newState(t) 35 t.auxState = newState(t) 36 t.root.linkState = t.auxState 37 t.s = t.root 38 return t 39 } 40 41 // Update refreshes the suffix tree to by new data. 42 func (t *STree) Update(data ...Token) { 43 t.data = append(t.data, data...) 44 for _ = range data { 45 t.update() 46 t.s, t.start = t.canonize(t.s, t.start, t.end) 47 t.end++ 48 } 49 } 50 51 // update transforms suffix tree T(n) to T(n+1). 52 func (t *STree) update() { 53 oldr := t.root 54 55 // (s, (start, end)) is the canonical reference pair for the active point 56 s := t.s 57 start, end := t.start, t.end 58 var r *state 59 for { 60 var endPoint bool 61 r, endPoint = t.testAndSplit(s, start, end-1) 62 if endPoint { 63 break 64 } 65 r.fork(end) 66 if oldr != t.root { 67 oldr.linkState = r 68 } 69 oldr = r 70 s, start = t.canonize(s.linkState, start, end-1) 71 } 72 if oldr != t.root { 73 oldr.linkState = r 74 } 75 76 // update active point 77 t.s = s 78 t.start = start 79 } 80 81 // testAndSplit tests whether a state with canonical ref. pair 82 // (s, (start, end)) is the end point, that is, a state that have 83 // a c-transition. If not, then state (exs, (start, end)) is made 84 // explicit (if not already so). 85 func (t *STree) testAndSplit(s *state, start, end Pos) (exs *state, endPoint bool) { 86 c := t.data[t.end] 87 if start <= end { 88 tr := s.findTran(t.data[start]) 89 splitPoint := tr.start + end - start + 1 90 if t.data[splitPoint].Val() == c.Val() { 91 return s, true 92 } 93 // make the (s, (start, end)) state explicit 94 newSt := newState(s.tree) 95 newSt.addTran(splitPoint, tr.end, tr.state) 96 tr.end = splitPoint - 1 97 tr.state = newSt 98 return newSt, false 99 } 100 if s == t.auxState || s.findTran(c) != nil { 101 return s, true 102 } 103 return s, false 104 } 105 106 // canonize returns updated state and start position for ref. pair 107 // (s, (start, end)) of state r so the new ref. pair is canonical, 108 // that is, referenced from the closest explicit ancestor of r. 109 func (t *STree) canonize(s *state, start, end Pos) (*state, Pos) { 110 if s == t.auxState { 111 s, start = t.root, start+1 112 } 113 if start > end { 114 return s, start 115 } 116 117 var tr *tran 118 for { 119 if start <= end { 120 tr = s.findTran(t.data[start]) 121 if tr == nil { 122 panic(fmt.Sprintf("there should be some transition for '%d' at %d", 123 t.data[start].Val(), start)) 124 } 125 } 126 if tr.end-tr.start > end-start { 127 break 128 } 129 start += tr.end - tr.start + 1 130 s = tr.state 131 } 132 if s == nil { 133 panic("there should always be some suffix link resolution") 134 } 135 return s, start 136 } 137 138 func (t *STree) At(p Pos) Token { 139 if p < 0 || p >= Pos(len(t.data)) { 140 panic("position out of bounds") 141 } 142 return t.data[p] 143 } 144 145 func (t *STree) String() string { 146 buf := new(bytes.Buffer) 147 printState(buf, t.root, 0) 148 return buf.String() 149 } 150 151 func printState(buf *bytes.Buffer, s *state, ident int) { 152 for _, tr := range s.trans { 153 fmt.Fprint(buf, strings.Repeat(" ", ident)) 154 fmt.Fprintf(buf, "* (%d, %d)\n", tr.start, tr.ActEnd()) 155 printState(buf, tr.state, ident+1) 156 } 157 } 158 159 // state is an explicit state of the suffix tree. 160 type state struct { 161 tree *STree 162 trans []*tran 163 linkState *state 164 } 165 166 func newState(t *STree) *state { 167 return &state{ 168 tree: t, 169 trans: make([]*tran, 0), 170 linkState: nil, 171 } 172 } 173 174 func (s *state) addTran(start, end Pos, r *state) { 175 s.trans = append(s.trans, newTran(start, end, r)) 176 } 177 178 // fork creates a new branch from the state s. 179 func (s *state) fork(i Pos) *state { 180 r := newState(s.tree) 181 s.addTran(i, infinity, r) 182 return r 183 } 184 185 // findTran finds c-transition. 186 func (s *state) findTran(c Token) *tran { 187 for _, tran := range s.trans { 188 if s.tree.data[tran.start].Val() == c.Val() { 189 return tran 190 } 191 } 192 return nil 193 } 194 195 // tran represents a state's transition. 196 type tran struct { 197 start, end Pos 198 state *state 199 } 200 201 func newTran(start, end Pos, s *state) *tran { 202 return &tran{start, end, s} 203 } 204 205 func (t *tran) len() int { 206 return int(t.end - t.start + 1) 207 } 208 209 // ActEnd returns actual end position as consistent with 210 // the actual length of the data in the STree. 211 func (t *tran) ActEnd() Pos { 212 if t.end == infinity { 213 return Pos(len(t.state.tree.data)) - 1 214 } 215 return t.end 216 }