github.com/serversong/goreporter@v0.0.0-20200325104552-3cfaf44fd178/linters/copycheck/suffixtree/suffixtree.go (about) 1 package suffixtree 2 3 import ( 4 "bytes" 5 "fmt" 6 "log" 7 "math" 8 "strings" 9 ) 10 11 const infinity = math.MaxInt32 12 13 // Pos denotes position in data slice. 14 type Pos int32 15 16 type Token interface { 17 Val() int 18 } 19 20 // STree is a struct representing a suffix tree. 21 type STree struct { 22 data []Token 23 root *state 24 auxState *state // auxiliary state 25 26 // active point 27 s *state 28 start, end Pos 29 } 30 31 // New creates new suffix tree. 32 func New() *STree { 33 t := new(STree) 34 t.data = make([]Token, 0, 50) 35 t.root = newState(t) 36 t.auxState = newState(t) 37 t.root.linkState = t.auxState 38 t.s = t.root 39 return t 40 } 41 42 // Update refreshes the suffix tree to by new data. 43 func (t *STree) Update(data ...Token) { 44 t.data = append(t.data, data...) 45 for range data { 46 t.update() 47 t.s, t.start = t.canonize(t.s, t.start, t.end) 48 t.end++ 49 } 50 } 51 52 // update transforms suffix tree T(n) to T(n+1). 53 func (t *STree) update() { 54 oldr := t.root 55 56 // (s, (start, end)) is the canonical reference pair for the active point 57 s := t.s 58 start, end := t.start, t.end 59 var r *state 60 for { 61 var endPoint bool 62 r, endPoint = t.testAndSplit(s, start, end-1) 63 if endPoint { 64 break 65 } 66 r.fork(end) 67 if oldr != t.root { 68 oldr.linkState = r 69 } 70 oldr = r 71 s, start = t.canonize(s.linkState, start, end-1) 72 } 73 if oldr != t.root { 74 oldr.linkState = r 75 } 76 77 // update active point 78 t.s = s 79 t.start = start 80 } 81 82 // testAndSplit tests whether a state with canonical ref. pair 83 // (s, (start, end)) is the end point, that is, a state that have 84 // a c-transition. If not, then state (exs, (start, end)) is made 85 // explicit (if not already so). 86 func (t *STree) testAndSplit(s *state, start, end Pos) (exs *state, endPoint bool) { 87 c := t.data[t.end] 88 if start <= end { 89 tr := s.findTran(t.data[start]) 90 splitPoint := tr.start + end - start + 1 91 if t.data[splitPoint].Val() == c.Val() { 92 return s, true 93 } 94 // make the (s, (start, end)) state explicit 95 newSt := newState(s.tree) 96 newSt.addTran(splitPoint, tr.end, tr.state) 97 tr.end = splitPoint - 1 98 tr.state = newSt 99 return newSt, false 100 } 101 if s == t.auxState || s.findTran(c) != nil { 102 return s, true 103 } 104 return s, false 105 } 106 107 // canonize returns updated state and start position for ref. pair 108 // (s, (start, end)) of state r so the new ref. pair is canonical, 109 // that is, referenced from the closest explicit ancestor of r. 110 func (t *STree) canonize(s *state, start, end Pos) (*state, Pos) { 111 if s == t.auxState { 112 s, start = t.root, start+1 113 } 114 if start > end { 115 return s, start 116 } 117 118 var tr *tran 119 for { 120 if start <= end { 121 tr = s.findTran(t.data[start]) 122 if tr == nil { 123 log.Fatal(fmt.Sprintf("there should be some transition for '%d' at %d", 124 t.data[start].Val(), start)) 125 } 126 } 127 if tr.end-tr.start > end-start { 128 break 129 } 130 start += tr.end - tr.start + 1 131 s = tr.state 132 } 133 if s == nil { 134 log.Fatal("there should always be some suffix link resolution") 135 } 136 return s, start 137 } 138 139 func (t *STree) At(p Pos) Token { 140 if p < 0 || p >= Pos(len(t.data)) { 141 log.Fatal("position out of bounds") 142 } 143 return t.data[p] 144 } 145 146 func (t *STree) String() string { 147 buf := new(bytes.Buffer) 148 printState(buf, t.root, 0) 149 return buf.String() 150 } 151 152 func printState(buf *bytes.Buffer, s *state, ident int) { 153 for _, tr := range s.trans { 154 fmt.Fprint(buf, strings.Repeat(" ", ident)) 155 fmt.Fprintf(buf, "* (%d, %d)\n", tr.start, tr.ActEnd()) 156 printState(buf, tr.state, ident+1) 157 } 158 } 159 160 // state is an explicit state of the suffix tree. 161 type state struct { 162 tree *STree 163 trans []*tran 164 linkState *state 165 } 166 167 func newState(t *STree) *state { 168 return &state{ 169 tree: t, 170 trans: make([]*tran, 0), 171 linkState: nil, 172 } 173 } 174 175 func (s *state) addTran(start, end Pos, r *state) { 176 s.trans = append(s.trans, newTran(start, end, r)) 177 } 178 179 // fork creates a new branch from the state s. 180 func (s *state) fork(i Pos) *state { 181 r := newState(s.tree) 182 s.addTran(i, infinity, r) 183 return r 184 } 185 186 // findTran finds c-transition. 187 func (s *state) findTran(c Token) *tran { 188 for _, tran := range s.trans { 189 if s.tree.data[tran.start].Val() == c.Val() { 190 return tran 191 } 192 } 193 return nil 194 } 195 196 // tran represents a state's transition. 197 type tran struct { 198 start, end Pos 199 state *state 200 } 201 202 func newTran(start, end Pos, s *state) *tran { 203 return &tran{start, end, s} 204 } 205 206 func (t *tran) len() int { 207 return int(t.end - t.start + 1) 208 } 209 210 // ActEnd returns actual end position as consistent with 211 // the actual length of the data in the STree. 212 func (t *tran) ActEnd() Pos { 213 if t.end == infinity { 214 return Pos(len(t.state.tree.data)) - 1 215 } 216 return t.end 217 }