github.com/ledgerwatch/erigon-lib@v1.0.0/patricia/patricia.go (about) 1 /* 2 Copyright 2021 Erigon contributors 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package patricia 18 19 import ( 20 "fmt" 21 "math/bits" 22 "strings" 23 24 "github.com/ledgerwatch/erigon-lib/sais" 25 "golang.org/x/exp/slices" 26 ) 27 28 // Implementation of paticia tree for efficient search of substrings from a dictionary in a given string 29 type node struct { 30 val interface{} // value associated with the key 31 n0 *node 32 n1 *node 33 p0 uint32 34 p1 uint32 35 } 36 37 func tostr(x uint32) string { 38 str := fmt.Sprintf("%b", x) 39 for len(str) < 32 { 40 str = "0" + str 41 } 42 return str[:x&0x1f] 43 } 44 45 // print assumes values are byte slices 46 func (n *node) print(sb *strings.Builder, indent string) { 47 sb.WriteString(indent) 48 fmt.Fprintf(sb, "%p ", n) 49 sb.WriteString(tostr(n.p0)) 50 sb.WriteString("\n") 51 if n.n0 != nil { 52 n.n0.print(sb, indent+" ") 53 } 54 sb.WriteString(indent) 55 fmt.Fprintf(sb, "%p ", n) 56 sb.WriteString(tostr(n.p1)) 57 sb.WriteString("\n") 58 if n.n1 != nil { 59 n.n1.print(sb, indent+" ") 60 } 61 if n.val != nil { 62 sb.WriteString(indent) 63 sb.WriteString("val:") 64 fmt.Fprintf(sb, " %x", n.val.([]byte)) 65 sb.WriteString("\n") 66 } 67 } 68 69 func (n *node) String() string { 70 var sb strings.Builder 71 n.print(&sb, "") 72 return sb.String() 73 } 74 75 // state represent a position anywhere inside patricia tree 76 // position can be identified by combination of node, and the partitioning 77 // of that node's p0 or p1 into head and tail. 78 // As with p0 and p1, head and tail are encoded as follows: 79 // lowest 5 bits encode the length in bits, and the remaining 27 bits 80 // encode the actual head or tail. 81 // For example, if the position is at the beginning of a node, 82 // head would be zero, and tail would be equal to either p0 or p1, 83 // depending on whether the position corresponds to going left (0) or right (1). 84 type state struct { 85 n *node 86 head uint32 87 tail uint32 88 } 89 90 func (s *state) String() string { 91 return fmt.Sprintf("%p head %s tail %s", s.n, tostr(s.head), tostr(s.tail)) 92 } 93 94 func (s *state) reset(n *node) { 95 s.n = n 96 s.head = 0 97 s.tail = 0 98 } 99 100 func makestate(n *node) *state { 101 return &state{n: n, head: 0, tail: 0} 102 } 103 104 // transition consumes next byte of the key, moves the state to corresponding 105 // node of the patricia tree and returns divergence prefix (0 if there is no divergence) 106 func (s *state) transition(b byte, readonly bool) uint32 { 107 bitsLeft := 8 // Bits in b to process 108 b32 := uint32(b) << 24 109 for bitsLeft > 0 { 110 if s.head == 0 { 111 // tail has not been determined yet, do it now 112 if b32&0x80000000 == 0 { 113 s.tail = s.n.p0 114 } else { 115 s.tail = s.n.p1 116 } 117 } 118 if s.tail == 0 { 119 // state positioned at the end of the current node 120 return b32 | uint32(bitsLeft) 121 } 122 tailLen := int(s.tail & 0x1f) 123 firstDiff := bits.LeadingZeros32(s.tail ^ b32) // First bit where b32 and tail are different 124 if firstDiff < bitsLeft { 125 // divergence (where the key being searched and the existing structure of patricia tree becomes incompatible) is within currently supplied byte of the search key, b 126 if firstDiff >= tailLen { 127 // divergence is within currently supplied byte of the search key, b, but outside of the current node 128 bitsLeft -= tailLen 129 b32 <<= tailLen 130 // Need to switch to the next node 131 if (s.head == 0 && s.tail&0x80000000 == 0) || (s.head != 0 && s.head&0x80000000 == 0) { 132 if s.n.n0 == nil { 133 panic("") 134 } 135 s.n = s.n.n0 136 } else { 137 if s.n.n1 == nil { 138 panic("") 139 } 140 s.n = s.n.n1 141 } 142 s.head = 0 143 s.tail = 0 144 } else { 145 // divergence is within currently supplied byte of the search key, b, and within the current node 146 bitsLeft -= firstDiff 147 b32 <<= firstDiff 148 // there is divergence, move head and tail 149 mask := ^(uint32(1)<<(32-firstDiff) - 1) 150 s.head |= (s.tail & mask) >> (s.head & 0x1f) 151 s.head += uint32(firstDiff) 152 s.tail = (s.tail&0xffffffe0)<<firstDiff | (s.tail & 0x1f) 153 s.tail -= uint32(firstDiff) 154 return b32 | uint32(bitsLeft) 155 } 156 } else if tailLen < bitsLeft { 157 // divergence is outside of currently supplied byte of the search key, b 158 bitsLeft -= tailLen 159 b32 <<= tailLen 160 // Switch to the next node 161 if (s.head == 0 && s.tail&0x80000000 == 0) || (s.head != 0 && s.head&0x80000000 == 0) { 162 if s.n.n0 == nil { 163 if readonly { 164 return b32 | uint32(bitsLeft) 165 } 166 s.n.n0 = &node{} 167 if b32&0x80000000 == 0 { 168 s.n.n0.p0 = b32 | uint32(bitsLeft) 169 } else { 170 s.n.n0.p1 = b32 | uint32(bitsLeft) 171 } 172 } 173 s.n = s.n.n0 174 } else { 175 if s.n.n1 == nil { 176 if readonly { 177 return b32 | uint32(bitsLeft) 178 } 179 s.n.n1 = &node{} 180 if b32&0x80000000 == 0 { 181 s.n.n1.p0 = b32 | uint32(bitsLeft) 182 } else { 183 s.n.n1.p1 = b32 | uint32(bitsLeft) 184 } 185 } 186 s.n = s.n.n1 187 } 188 s.head = 0 189 s.tail = 0 190 } else { 191 // key byte is consumed, but stay on the same node 192 mask := ^(uint32(1)<<(32-bitsLeft) - 1) 193 s.head |= (s.tail & mask) >> (s.head & 0x1f) 194 s.head += uint32(bitsLeft) 195 s.tail = (s.tail&0xffffffe0)<<bitsLeft | (s.tail & 0x1f) 196 s.tail -= uint32(bitsLeft) 197 bitsLeft = 0 198 if s.tail == 0 { 199 if s.head&0x80000000 == 0 { 200 if s.n.n0 != nil { 201 s.n = s.n.n0 202 s.head = 0 203 } 204 } else { 205 if s.n.n1 != nil { 206 s.n = s.n.n1 207 s.head = 0 208 } 209 } 210 } 211 } 212 } 213 return 0 214 } 215 216 func (s *state) diverge(divergence uint32) { 217 if s.tail == 0 { 218 // try to add to the existing head 219 //fmt.Printf("adding divergence to existing head\n") 220 dLen := int(divergence & 0x1f) 221 headLen := int(s.head & 0x1f) 222 d32 := divergence & 0xffffffe0 223 //fmt.Printf("headLen %d + dLen %d = %d\n", headLen, dLen, headLen+dLen) 224 if headLen+dLen > 27 { 225 mask := ^(uint32(1)<<(headLen+5) - 1) 226 //fmt.Printf("mask = %b\n", mask) 227 s.head |= (d32 & mask) >> headLen 228 s.head += uint32(27 - headLen) 229 //fmt.Printf("s.head %s\n", tostr(s.head)) 230 var dn node 231 if (s.head == 0 && s.tail&0x80000000 == 0) || (s.head != 0 && s.head&0x80000000 == 0) { 232 s.n.p0 = s.head 233 s.n.n0 = &dn 234 } else { 235 s.n.p1 = s.head 236 s.n.n1 = &dn 237 } 238 s.n = &dn 239 s.head = 0 240 s.tail = 0 241 d32 <<= 27 - headLen 242 dLen -= (27 - headLen) 243 headLen = 0 244 } 245 //fmt.Printf("headLen %d + dLen %d = %d\n", headLen, dLen, headLen+dLen) 246 mask := ^(uint32(1)<<(32-dLen) - 1) 247 //fmt.Printf("mask = %b\n", mask) 248 s.head |= (d32 & mask) >> headLen 249 s.head += uint32(dLen) 250 //fmt.Printf("s.head %s\n", tostr(s.head)) 251 if (s.head == 0 && s.tail&0x80000000 == 0) || (s.head != 0 && s.head&0x80000000 == 0) { 252 s.n.p0 = s.head 253 } else { 254 s.n.p1 = s.head 255 } 256 return 257 } 258 // create a new node 259 var dn node 260 if divergence&0x80000000 == 0 { 261 dn.p0 = divergence 262 dn.p1 = s.tail 263 if (s.head == 0 && s.tail&0x80000000 == 0) || (s.head != 0 && s.head&0x80000000 == 0) { 264 dn.n1 = s.n.n0 265 } else { 266 dn.n1 = s.n.n1 267 } 268 } else { 269 dn.p1 = divergence 270 dn.p0 = s.tail 271 if (s.head == 0 && s.tail&0x80000000 == 0) || (s.head != 0 && s.head&0x80000000 == 0) { 272 dn.n0 = s.n.n0 273 } else { 274 dn.n0 = s.n.n1 275 } 276 } 277 if (s.head == 0 && s.tail&0x80000000 == 0) || (s.head != 0 && s.head&0x80000000 == 0) { 278 s.n.n0 = &dn 279 s.n.p0 = s.head 280 } else { 281 s.n.n1 = &dn 282 s.n.p1 = s.head 283 } 284 s.n = &dn 285 s.head = divergence 286 s.tail = 0 287 } 288 289 func (n *node) insert(key []byte, value interface{}) { 290 s := makestate(n) 291 for _, b := range key { 292 divergence := s.transition(b, false /* readonly */) 293 if divergence != 0 { 294 s.diverge(divergence) 295 } 296 } 297 s.insert(value) 298 } 299 300 func (s *state) insert(value interface{}) { 301 if s.tail != 0 { 302 s.diverge(0) 303 } 304 if s.head != 0 { 305 var dn node 306 if s.head&0x80000000 == 0 { 307 s.n.n0 = &dn 308 } else { 309 s.n.n1 = &dn 310 } 311 s.n = &dn 312 s.head = 0 313 } 314 //fmt.Printf("set val to %p\n", s.n) 315 s.n.val = value 316 } 317 318 func (n *node) get(key []byte) (interface{}, bool) { 319 s := makestate(n) 320 for _, b := range key { 321 divergence := s.transition(b, true /* readonly */) 322 //fmt.Printf("get %x, b = %x, divergence = %s\nstate=%s\n", key, b, tostr(divergence), s) 323 if divergence != 0 { 324 return nil, false 325 } 326 } 327 if s.tail != 0 { 328 return nil, false 329 } 330 return s.n.val, s.n.val != nil 331 } 332 333 type PatriciaTree struct { 334 root node 335 } 336 337 func (pt *PatriciaTree) Insert(key []byte, value interface{}) { 338 //fmt.Printf("%p Insert [%x]\n", pt, key) 339 pt.root.insert(key, value) 340 } 341 342 func (pt *PatriciaTree) Get(key []byte) (interface{}, bool) { 343 return pt.root.get(key) 344 } 345 346 type Match struct { 347 Val interface{} 348 Start int 349 End int 350 } 351 352 type Matches []Match 353 354 func (m Matches) Len() int { 355 return len(m) 356 } 357 358 func (m Matches) Less(i, j int) bool { 359 return m[i].Start < m[j].Start 360 } 361 362 func (m *Matches) Swap(i, j int) { 363 (*m)[i], (*m)[j] = (*m)[j], (*m)[i] 364 } 365 366 type MatchFinder struct { 367 pt *PatriciaTree 368 s state 369 matches []Match 370 } 371 372 func NewMatchFinder(pt *PatriciaTree) *MatchFinder { 373 return &MatchFinder{pt: pt} 374 } 375 376 type MatchFinder2 struct { 377 top *node // Top of nodeStack 378 pt *PatriciaTree 379 nodeStack []*node 380 matchStack []Match 381 matches Matches 382 sa []int32 383 lcp []int32 384 inv []int32 385 headLen int 386 tailLen int 387 side int // 0, 1, or 2 (if side is not determined yet) 388 } 389 390 func NewMatchFinder2(pt *PatriciaTree) *MatchFinder2 { 391 return &MatchFinder2{pt: pt, top: &pt.root, nodeStack: []*node{&pt.root}, side: 2} 392 } 393 394 // unfold consumes next byte of the key, moves the state to corresponding 395 // node of the patricia tree and returns divergence prefix (0 if there is no divergence) 396 func (mf2 *MatchFinder2) unfold(b byte) uint32 { 397 //fmt.Printf("unfold %x, headLen = %d, tailLen = %d, nodeStackLen = %d\n", b, mf2.headLen, mf2.tailLen, len(mf2.nodeStack)) 398 //var sb strings.Builder 399 bitsLeft := 8 // Bits in b to process 400 b32 := uint32(b) << 24 401 for bitsLeft > 0 { 402 if mf2.side == 2 { 403 // tail has not been determined yet, do it now 404 if b32&0x80000000 == 0 { 405 mf2.side = 0 406 mf2.headLen = 0 407 mf2.tailLen = int(mf2.top.p0 & 0x1f) 408 } else { 409 mf2.side = 1 410 mf2.headLen = 0 411 mf2.tailLen = int(mf2.top.p1 & 0x1f) 412 } 413 if mf2.tailLen == 0 { 414 // state positioned at the end of the current node 415 mf2.side = 2 416 //fmt.Fprintf(&sb, "1 ") 417 //fmt.Printf("%s\n", sb.String()) 418 return b32 | uint32(bitsLeft) 419 } 420 } 421 if mf2.tailLen == 0 { 422 // Need to switch to the next node 423 if mf2.side == 0 { 424 if mf2.top.n0 == nil { 425 //fmt.Fprintf(&sb, "2 ") 426 //fmt.Printf("%s\n", sb.String()) 427 return b32 | uint32(bitsLeft) 428 } 429 mf2.nodeStack = append(mf2.nodeStack, mf2.top.n0) 430 mf2.top = mf2.top.n0 431 //fmt.Fprintf(&sb, "a1,0,bl=%d ", bitsLeft) 432 } else if mf2.side == 1 { 433 if mf2.top.n1 == nil { 434 //fmt.Fprintf(&sb, "3 ") 435 //fmt.Printf("%s\n", sb.String()) 436 return b32 | uint32(bitsLeft) 437 } 438 mf2.nodeStack = append(mf2.nodeStack, mf2.top.n1) 439 mf2.top = mf2.top.n1 440 //fmt.Fprintf(&sb, "a1,1,bl=%d ", bitsLeft) 441 } else { 442 panic("") 443 } 444 mf2.headLen = 0 445 mf2.side = 2 446 } 447 var tail uint32 448 if mf2.side == 0 { 449 tail = (mf2.top.p0 & 0xffffffe0) << mf2.headLen 450 } else if mf2.side == 1 { 451 tail = (mf2.top.p1 & 0xffffffe0) << mf2.headLen 452 } else { 453 return b32 | uint32(bitsLeft) 454 } 455 firstDiff := bits.LeadingZeros32(tail ^ b32) // First bit where b32 and tail are different 456 if firstDiff < bitsLeft { 457 // divergence (where the key being searched and the existing structure of patricia tree becomes incompatible) is within currently supplied byte of the search key, b 458 if firstDiff >= mf2.tailLen { 459 // divergence is within currently supplied byte of the search key, b, but outside of the current node 460 //fmt.Fprintf(&sb, "4,tl=%d ", mf2.tailLen) 461 bitsLeft -= mf2.tailLen 462 b32 <<= mf2.tailLen 463 mf2.headLen += mf2.tailLen 464 mf2.tailLen = 0 465 } else { 466 // divergence is within currently supplied byte of the search key, b, and within the current node 467 bitsLeft -= firstDiff 468 b32 <<= firstDiff 469 // there is divergence, move head and tail 470 mf2.tailLen -= firstDiff 471 mf2.headLen += firstDiff 472 //fmt.Fprintf(&sb, "5 ") 473 //fmt.Printf("%s\n", sb.String()) 474 return b32 | uint32(bitsLeft) 475 } 476 } else if mf2.tailLen < bitsLeft { 477 // divergence is outside of currently supplied byte of the search key, b 478 bitsLeft -= mf2.tailLen 479 b32 <<= mf2.tailLen 480 mf2.headLen += mf2.tailLen 481 mf2.tailLen = 0 482 //fmt.Fprintf(&sb, "6 ") 483 } else { 484 // key byte is consumed, but stay on the same node 485 //fmt.Fprintf(&sb, "7,bl=%d ", bitsLeft) 486 mf2.tailLen -= bitsLeft 487 mf2.headLen += bitsLeft 488 bitsLeft = 0 489 b32 = 0 490 } 491 if mf2.tailLen == 0 { 492 // Need to switch to the next node 493 if mf2.side == 0 { 494 if mf2.top.n0 == nil { 495 //fmt.Fprintf(&sb, "8 ") 496 //fmt.Printf("%s\n", sb.String()) 497 return b32 | uint32(bitsLeft) 498 } 499 mf2.nodeStack = append(mf2.nodeStack, mf2.top.n0) 500 mf2.top = mf2.top.n0 501 //fmt.Fprintf(&sb, "a2,0,bl=%d ", bitsLeft) 502 } else if mf2.side == 1 { 503 if mf2.top.n1 == nil { 504 //fmt.Fprintf(&sb, "9 ") 505 //fmt.Printf("%s\n", sb.String()) 506 return b32 | uint32(bitsLeft) 507 } 508 mf2.nodeStack = append(mf2.nodeStack, mf2.top.n1) 509 mf2.top = mf2.top.n1 510 //fmt.Fprintf(&sb, "a2,1,bl=%d ", bitsLeft) 511 } else { 512 panic("") 513 } 514 mf2.headLen = 0 515 mf2.side = 2 516 } 517 } 518 //fmt.Printf("%s\n", sb.String()) 519 return 0 520 } 521 522 // unfold moves the match finder back up the stack by specified number of bits 523 func (mf2 *MatchFinder2) fold(bits int) { 524 //fmt.Printf("fold %d, headLen = %d, tailLen = %d, nodeStackLen = %d\n", bits, mf2.headLen, mf2.tailLen, len(mf2.nodeStack)) 525 bitsLeft := bits 526 for bitsLeft > 0 { 527 //fmt.Printf("headLen = %d, bitsLeft = %d, head = %b, tail = %b, nodeStackLen = %d\n", headLen, bitsLeft, mf2.head, mf2.tail, len(mf2.nodeStack)) 528 if mf2.headLen == bitsLeft { 529 mf2.headLen = 0 530 mf2.tailLen = 0 531 mf2.side = 2 532 bitsLeft = 0 533 } else if mf2.headLen >= bitsLeft { 534 // folding only affects top node, take bits from end of the head and prepend it to the tail 535 mf2.headLen -= bitsLeft 536 mf2.tailLen += bitsLeft 537 bitsLeft = 0 538 } else { 539 // folding affects not only top node, remove top node 540 bitsLeft -= mf2.headLen 541 mf2.nodeStack = mf2.nodeStack[:len(mf2.nodeStack)-1] 542 prevTop := mf2.top 543 mf2.top = mf2.nodeStack[len(mf2.nodeStack)-1] 544 if mf2.top.n0 == prevTop { 545 mf2.side = 0 546 mf2.headLen = int(mf2.top.p0 & 0x1f) 547 //fmt.Printf("mf2.head = p0 %b\n", mf2.head) 548 } else if mf2.top.n1 == prevTop { 549 mf2.side = 1 550 mf2.headLen = int(mf2.top.p1 & 0x1f) 551 //fmt.Printf("mf2.head = p1 %b\n", mf2.head) 552 } else { 553 panic("") 554 } 555 mf2.tailLen = 0 556 } 557 } 558 } 559 560 func (mf2 *MatchFinder2) FindLongestMatches(data []byte) []Match { 561 //fmt.Printf("mf2=%p pt=%p data=[%x]\n", mf2, mf2.pt, data) 562 mf2.matches = mf2.matches[:0] 563 if len(data) < 2 { 564 return mf2.matches 565 } 566 mf2.nodeStack = append(mf2.nodeStack[:0], &mf2.pt.root) 567 mf2.matchStack = mf2.matchStack[:0] 568 mf2.top = &mf2.pt.root 569 mf2.side = 2 570 mf2.tailLen = 0 571 mf2.headLen = 0 572 n := len(data) 573 if cap(mf2.sa) < n { 574 mf2.sa = make([]int32, n) 575 } else { 576 mf2.sa = mf2.sa[:n] 577 } 578 if err := sais.Sais(data, mf2.sa); err != nil { 579 panic(err) 580 } 581 if cap(mf2.inv) < n { 582 mf2.inv = make([]int32, n) 583 } else { 584 mf2.inv = mf2.inv[:n] 585 } 586 for i := 0; i < n; i++ { 587 mf2.inv[mf2.sa[i]] = int32(i) 588 } 589 var k int 590 // Process all suffixes one by one starting from 591 // first suffix in txt[] 592 if cap(mf2.lcp) < n { 593 mf2.lcp = make([]int32, n) 594 } else { 595 mf2.lcp = mf2.lcp[:n] 596 } 597 for i := 0; i < n; i++ { 598 /* If the current suffix is at n-1, then we don’t 599 have next substring to consider. So lcp is not 600 defined for this substring, we put zero. */ 601 if mf2.inv[i] == int32(n-1) { 602 k = 0 603 continue 604 } 605 606 /* j contains index of the next substring to 607 be considered to compare with the present 608 substring, i.e., next string in suffix array */ 609 j := int(mf2.sa[mf2.inv[i]+1]) 610 611 // Directly start matching from k'th index as 612 // at-least k-1 characters will match 613 for i+k < n && j+k < n && data[i+k] == data[j+k] { 614 k++ 615 } 616 mf2.lcp[mf2.inv[i]] = int32(k) // lcp for the present suffix. 617 618 // Deleting the starting character from the string. 619 if k > 0 { 620 k-- 621 } 622 } 623 //fmt.Printf("sa=[%d]\n", mf2.sa) 624 //fmt.Printf("lcp=[%d]\n", mf2.lcp) 625 depth := 0 // Depth in bits 626 var lastMatch *Match 627 for i := 0; i < n; i++ { 628 // lcp[i] is the Longest Common Prefix of suffixes starting from sa[i] and sa[i+1] 629 //fmt.Printf("Suffix [%x], depth = %d\n", data[mf2.sa[i]:n], depth) 630 if i > 0 { 631 lcp := int(mf2.lcp[i-1]) 632 // lcp[i-1] is the Longest Common Prefix of suffixes starting from sa[i-1] and sa[i] 633 if depth > 8*lcp { 634 //fmt.Printf("before fold depth = %d, mf2.lcp[i-1] = %d\n", depth, mf2.lcp[i-1]) 635 mf2.fold(depth - 8*lcp) 636 depth = 8 * lcp 637 //b1, d1 := mf2.Current() 638 //fmt.Printf("current: [%x] %d, depth = %d\n", b1, d1, depth) 639 //fmt.Printf("after fold depth = %d\n", depth) 640 for lastMatch != nil && lastMatch.End-lastMatch.Start > lcp { 641 //fmt.Printf("Popped %d: [%d-%d] [%x]\n", len(mf2.matchStack)-1, lastMatch.Start, lastMatch.End, data[lastMatch.Start:lastMatch.End]) 642 mf2.matchStack = mf2.matchStack[:len(mf2.matchStack)-1] 643 if len(mf2.matchStack) == 0 { 644 lastMatch = nil 645 } else { 646 lastMatch = &mf2.matchStack[len(mf2.matchStack)-1] 647 } 648 } 649 } else { 650 r := depth % 8 651 if r > 0 { 652 mf2.fold(r) 653 depth -= r 654 //b1, d1 := mf2.Current() 655 //fmt.Printf("current: [%x] %d, depth = %d\n", b1, d1, depth) 656 } 657 } 658 } 659 sa := int(mf2.sa[i]) 660 start := sa + depth/8 661 for end := start + 1; end <= n; end++ { 662 //fmt.Printf("Looking at [%d-%d] [%x]\n", sa, end, data[sa:end]) 663 d := mf2.unfold(data[end-1]) 664 depth += 8 - int(d&0x1f) 665 //fmt.Printf("after unfold depth=%d\n", depth) 666 //b1, d1 := mf2.Current() 667 //fmt.Printf("current: [%x][%x] %d, depth = %d\n", b1, data[sa:end], d1, depth) 668 if d != 0 { 669 //fmt.Printf("divergence found: %b\n", d) 670 break 671 } 672 if mf2.tailLen != 0 || mf2.top.val == nil { 673 //fmt.Printf("tailLen = %d, val == nil %t, p=%p\n", mf2.tailLen, mf2.top.val == nil, mf2.top) 674 continue 675 } 676 if cap(mf2.matchStack) == len(mf2.matchStack) { 677 mf2.matchStack = append(mf2.matchStack, Match{}) 678 } else { 679 mf2.matchStack = mf2.matchStack[:len(mf2.matchStack)+1] 680 } 681 lastMatch = &mf2.matchStack[len(mf2.matchStack)-1] 682 // This possibly overwrites previous match for the same start position 683 //fmt.Printf("Push on the match stack [%d-%d] [%x]\n", sa, end, data[sa:end]) 684 lastMatch.Start = sa 685 lastMatch.End = end 686 lastMatch.Val = mf2.top.val 687 } 688 if lastMatch != nil { 689 mf2.matches = append(mf2.matches, Match{}) 690 m := &mf2.matches[len(mf2.matches)-1] 691 m.Start = sa 692 m.End = sa + lastMatch.End - lastMatch.Start 693 m.Val = lastMatch.Val 694 //fmt.Printf("Added new Match: [%d-%d] [%x]\n", m.Start, m.End, data[m.Start:m.End]) 695 } 696 } 697 //fmt.Printf("before sorting %d matches\n", len(mf2.matches)) 698 if len(mf2.matches) < 2 { 699 return mf2.matches 700 } 701 //sort.Sort(&mf2.matches) 702 slices.SortFunc(mf2.matches, func(i, j Match) bool { return i.Start < j.Start }) 703 704 lastEnd := mf2.matches[0].End 705 j := 1 706 for i, m := range mf2.matches { 707 if i > 0 { 708 if m.End > lastEnd { 709 if i != j { 710 mf2.matches[j] = m 711 } 712 lastEnd = m.End 713 j++ 714 } 715 } 716 } 717 return mf2.matches[:j] 718 } 719 720 func (mf2 *MatchFinder2) Current() ([]byte, int) { 721 var b []byte 722 var depth int 723 last := len(mf2.nodeStack) - 1 724 for i, n := range mf2.nodeStack { 725 var p uint32 726 if i < last { 727 next := mf2.nodeStack[i+1] 728 if n.n0 == next { 729 p = n.p0 730 } else if n.n1 == next { 731 p = n.p1 732 } else { 733 panic("") 734 } 735 } else { 736 if mf2.side == 0 { 737 p = n.p0 738 } else if mf2.side == 1 { 739 p = n.p1 740 } 741 p = (p & 0xffffffe0) | uint32(mf2.headLen) 742 } 743 fmt.Printf("i,p=%d, %b\n", i, p) 744 // Add bit by bit 745 for (p & 0x1f) > 0 { 746 if depth >= 8*len(b) { 747 b = append(b, 0) 748 } 749 if p&0x80000000 != 0 { 750 b[depth/8] |= uint8(1) << (7 - (depth % 8)) 751 } 752 depth++ 753 p = ((p & 0xffffffe0) << 1) | (p & 0x1f) - 1 754 } 755 } 756 return b, depth 757 } 758 759 func (mf *MatchFinder) FindLongestMatches(data []byte) []Match { 760 matchCount := 0 761 s := &mf.s 762 lastEnd := 0 763 for start := 0; start < len(data); start++ { 764 s.reset(&mf.pt.root) 765 emitted := false 766 for end := start + 1; end <= len(data); end++ { 767 if d := s.transition(data[end-1], true /* readonly */); d != 0 { 768 break 769 } 770 if s.tail != 0 || s.n.val == nil || end <= lastEnd { 771 continue 772 } 773 var m *Match 774 if emitted { 775 m = &mf.matches[matchCount-1] 776 } else { 777 if matchCount == len(mf.matches) { 778 mf.matches = append(mf.matches, Match{}) 779 m = &mf.matches[len(mf.matches)-1] 780 } else { 781 m = &mf.matches[matchCount] 782 } 783 matchCount++ 784 emitted = true 785 } 786 // This possibly overwrites previous match for the same start position 787 m.Start = start 788 m.End = end 789 m.Val = s.n.val 790 lastEnd = end 791 } 792 } 793 return mf.matches[:matchCount] 794 }