github.com/ledgerwatch/erigon-lib@v1.0.0/patricia/patricia.go (about)

     1  /*
     2     Copyright 2021 Erigon contributors
     3  
     4     Licensed under the Apache License, Version 2.0 (the "License");
     5     you may not use this file except in compliance with the License.
     6     You may obtain a copy of the License at
     7  
     8         http://www.apache.org/licenses/LICENSE-2.0
     9  
    10     Unless required by applicable law or agreed to in writing, software
    11     distributed under the License is distributed on an "AS IS" BASIS,
    12     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13     See the License for the specific language governing permissions and
    14     limitations under the License.
    15  */
    16  
    17  package patricia
    18  
    19  import (
    20  	"fmt"
    21  	"math/bits"
    22  	"strings"
    23  
    24  	"github.com/ledgerwatch/erigon-lib/sais"
    25  	"golang.org/x/exp/slices"
    26  )
    27  
    28  // Implementation of paticia tree for efficient search of substrings from a dictionary in a given string
    29  type node struct {
    30  	val interface{} // value associated with the key
    31  	n0  *node
    32  	n1  *node
    33  	p0  uint32
    34  	p1  uint32
    35  }
    36  
    37  func tostr(x uint32) string {
    38  	str := fmt.Sprintf("%b", x)
    39  	for len(str) < 32 {
    40  		str = "0" + str
    41  	}
    42  	return str[:x&0x1f]
    43  }
    44  
    45  // print assumes values are byte slices
    46  func (n *node) print(sb *strings.Builder, indent string) {
    47  	sb.WriteString(indent)
    48  	fmt.Fprintf(sb, "%p ", n)
    49  	sb.WriteString(tostr(n.p0))
    50  	sb.WriteString("\n")
    51  	if n.n0 != nil {
    52  		n.n0.print(sb, indent+"    ")
    53  	}
    54  	sb.WriteString(indent)
    55  	fmt.Fprintf(sb, "%p ", n)
    56  	sb.WriteString(tostr(n.p1))
    57  	sb.WriteString("\n")
    58  	if n.n1 != nil {
    59  		n.n1.print(sb, indent+"    ")
    60  	}
    61  	if n.val != nil {
    62  		sb.WriteString(indent)
    63  		sb.WriteString("val:")
    64  		fmt.Fprintf(sb, " %x", n.val.([]byte))
    65  		sb.WriteString("\n")
    66  	}
    67  }
    68  
    69  func (n *node) String() string {
    70  	var sb strings.Builder
    71  	n.print(&sb, "")
    72  	return sb.String()
    73  }
    74  
    75  // state represent a position anywhere inside patricia tree
    76  // position can be identified by combination of node, and the partitioning
    77  // of that node's p0 or p1 into head and tail.
    78  // As with p0 and p1, head and tail are encoded as follows:
    79  // lowest 5 bits encode the length in bits, and the remaining 27 bits
    80  // encode the actual head or tail.
    81  // For example, if the position is at the beginning of a node,
    82  // head would be zero, and tail would be equal to either p0 or p1,
    83  // depending on whether the position corresponds to going left (0) or right (1).
    84  type state struct {
    85  	n    *node
    86  	head uint32
    87  	tail uint32
    88  }
    89  
    90  func (s *state) String() string {
    91  	return fmt.Sprintf("%p head %s tail %s", s.n, tostr(s.head), tostr(s.tail))
    92  }
    93  
    94  func (s *state) reset(n *node) {
    95  	s.n = n
    96  	s.head = 0
    97  	s.tail = 0
    98  }
    99  
   100  func makestate(n *node) *state {
   101  	return &state{n: n, head: 0, tail: 0}
   102  }
   103  
   104  // transition consumes next byte of the key, moves the state to corresponding
   105  // node of the patricia tree and returns divergence prefix (0 if there is no divergence)
   106  func (s *state) transition(b byte, readonly bool) uint32 {
   107  	bitsLeft := 8 // Bits in b to process
   108  	b32 := uint32(b) << 24
   109  	for bitsLeft > 0 {
   110  		if s.head == 0 {
   111  			// tail has not been determined yet, do it now
   112  			if b32&0x80000000 == 0 {
   113  				s.tail = s.n.p0
   114  			} else {
   115  				s.tail = s.n.p1
   116  			}
   117  		}
   118  		if s.tail == 0 {
   119  			// state positioned at the end of the current node
   120  			return b32 | uint32(bitsLeft)
   121  		}
   122  		tailLen := int(s.tail & 0x1f)
   123  		firstDiff := bits.LeadingZeros32(s.tail ^ b32) // First bit where b32 and tail are different
   124  		if firstDiff < bitsLeft {
   125  			// divergence (where the key being searched and the existing structure of patricia tree becomes incompatible) is within currently supplied byte of the search key, b
   126  			if firstDiff >= tailLen {
   127  				// divergence is within currently supplied byte of the search key, b, but outside of the current node
   128  				bitsLeft -= tailLen
   129  				b32 <<= tailLen
   130  				// Need to switch to the next node
   131  				if (s.head == 0 && s.tail&0x80000000 == 0) || (s.head != 0 && s.head&0x80000000 == 0) {
   132  					if s.n.n0 == nil {
   133  						panic("")
   134  					}
   135  					s.n = s.n.n0
   136  				} else {
   137  					if s.n.n1 == nil {
   138  						panic("")
   139  					}
   140  					s.n = s.n.n1
   141  				}
   142  				s.head = 0
   143  				s.tail = 0
   144  			} else {
   145  				// divergence is within currently supplied byte of the search key, b, and within the current node
   146  				bitsLeft -= firstDiff
   147  				b32 <<= firstDiff
   148  				// there is divergence, move head and tail
   149  				mask := ^(uint32(1)<<(32-firstDiff) - 1)
   150  				s.head |= (s.tail & mask) >> (s.head & 0x1f)
   151  				s.head += uint32(firstDiff)
   152  				s.tail = (s.tail&0xffffffe0)<<firstDiff | (s.tail & 0x1f)
   153  				s.tail -= uint32(firstDiff)
   154  				return b32 | uint32(bitsLeft)
   155  			}
   156  		} else if tailLen < bitsLeft {
   157  			// divergence is outside of currently supplied byte of the search key, b
   158  			bitsLeft -= tailLen
   159  			b32 <<= tailLen
   160  			// Switch to the next node
   161  			if (s.head == 0 && s.tail&0x80000000 == 0) || (s.head != 0 && s.head&0x80000000 == 0) {
   162  				if s.n.n0 == nil {
   163  					if readonly {
   164  						return b32 | uint32(bitsLeft)
   165  					}
   166  					s.n.n0 = &node{}
   167  					if b32&0x80000000 == 0 {
   168  						s.n.n0.p0 = b32 | uint32(bitsLeft)
   169  					} else {
   170  						s.n.n0.p1 = b32 | uint32(bitsLeft)
   171  					}
   172  				}
   173  				s.n = s.n.n0
   174  			} else {
   175  				if s.n.n1 == nil {
   176  					if readonly {
   177  						return b32 | uint32(bitsLeft)
   178  					}
   179  					s.n.n1 = &node{}
   180  					if b32&0x80000000 == 0 {
   181  						s.n.n1.p0 = b32 | uint32(bitsLeft)
   182  					} else {
   183  						s.n.n1.p1 = b32 | uint32(bitsLeft)
   184  					}
   185  				}
   186  				s.n = s.n.n1
   187  			}
   188  			s.head = 0
   189  			s.tail = 0
   190  		} else {
   191  			// key byte is consumed, but stay on the same node
   192  			mask := ^(uint32(1)<<(32-bitsLeft) - 1)
   193  			s.head |= (s.tail & mask) >> (s.head & 0x1f)
   194  			s.head += uint32(bitsLeft)
   195  			s.tail = (s.tail&0xffffffe0)<<bitsLeft | (s.tail & 0x1f)
   196  			s.tail -= uint32(bitsLeft)
   197  			bitsLeft = 0
   198  			if s.tail == 0 {
   199  				if s.head&0x80000000 == 0 {
   200  					if s.n.n0 != nil {
   201  						s.n = s.n.n0
   202  						s.head = 0
   203  					}
   204  				} else {
   205  					if s.n.n1 != nil {
   206  						s.n = s.n.n1
   207  						s.head = 0
   208  					}
   209  				}
   210  			}
   211  		}
   212  	}
   213  	return 0
   214  }
   215  
   216  func (s *state) diverge(divergence uint32) {
   217  	if s.tail == 0 {
   218  		// try to add to the existing head
   219  		//fmt.Printf("adding divergence to existing head\n")
   220  		dLen := int(divergence & 0x1f)
   221  		headLen := int(s.head & 0x1f)
   222  		d32 := divergence & 0xffffffe0
   223  		//fmt.Printf("headLen %d + dLen %d = %d\n", headLen, dLen, headLen+dLen)
   224  		if headLen+dLen > 27 {
   225  			mask := ^(uint32(1)<<(headLen+5) - 1)
   226  			//fmt.Printf("mask = %b\n", mask)
   227  			s.head |= (d32 & mask) >> headLen
   228  			s.head += uint32(27 - headLen)
   229  			//fmt.Printf("s.head %s\n", tostr(s.head))
   230  			var dn node
   231  			if (s.head == 0 && s.tail&0x80000000 == 0) || (s.head != 0 && s.head&0x80000000 == 0) {
   232  				s.n.p0 = s.head
   233  				s.n.n0 = &dn
   234  			} else {
   235  				s.n.p1 = s.head
   236  				s.n.n1 = &dn
   237  			}
   238  			s.n = &dn
   239  			s.head = 0
   240  			s.tail = 0
   241  			d32 <<= 27 - headLen
   242  			dLen -= (27 - headLen)
   243  			headLen = 0
   244  		}
   245  		//fmt.Printf("headLen %d + dLen %d = %d\n", headLen, dLen, headLen+dLen)
   246  		mask := ^(uint32(1)<<(32-dLen) - 1)
   247  		//fmt.Printf("mask = %b\n", mask)
   248  		s.head |= (d32 & mask) >> headLen
   249  		s.head += uint32(dLen)
   250  		//fmt.Printf("s.head %s\n", tostr(s.head))
   251  		if (s.head == 0 && s.tail&0x80000000 == 0) || (s.head != 0 && s.head&0x80000000 == 0) {
   252  			s.n.p0 = s.head
   253  		} else {
   254  			s.n.p1 = s.head
   255  		}
   256  		return
   257  	}
   258  	// create a new node
   259  	var dn node
   260  	if divergence&0x80000000 == 0 {
   261  		dn.p0 = divergence
   262  		dn.p1 = s.tail
   263  		if (s.head == 0 && s.tail&0x80000000 == 0) || (s.head != 0 && s.head&0x80000000 == 0) {
   264  			dn.n1 = s.n.n0
   265  		} else {
   266  			dn.n1 = s.n.n1
   267  		}
   268  	} else {
   269  		dn.p1 = divergence
   270  		dn.p0 = s.tail
   271  		if (s.head == 0 && s.tail&0x80000000 == 0) || (s.head != 0 && s.head&0x80000000 == 0) {
   272  			dn.n0 = s.n.n0
   273  		} else {
   274  			dn.n0 = s.n.n1
   275  		}
   276  	}
   277  	if (s.head == 0 && s.tail&0x80000000 == 0) || (s.head != 0 && s.head&0x80000000 == 0) {
   278  		s.n.n0 = &dn
   279  		s.n.p0 = s.head
   280  	} else {
   281  		s.n.n1 = &dn
   282  		s.n.p1 = s.head
   283  	}
   284  	s.n = &dn
   285  	s.head = divergence
   286  	s.tail = 0
   287  }
   288  
   289  func (n *node) insert(key []byte, value interface{}) {
   290  	s := makestate(n)
   291  	for _, b := range key {
   292  		divergence := s.transition(b, false /* readonly */)
   293  		if divergence != 0 {
   294  			s.diverge(divergence)
   295  		}
   296  	}
   297  	s.insert(value)
   298  }
   299  
   300  func (s *state) insert(value interface{}) {
   301  	if s.tail != 0 {
   302  		s.diverge(0)
   303  	}
   304  	if s.head != 0 {
   305  		var dn node
   306  		if s.head&0x80000000 == 0 {
   307  			s.n.n0 = &dn
   308  		} else {
   309  			s.n.n1 = &dn
   310  		}
   311  		s.n = &dn
   312  		s.head = 0
   313  	}
   314  	//fmt.Printf("set val to %p\n", s.n)
   315  	s.n.val = value
   316  }
   317  
   318  func (n *node) get(key []byte) (interface{}, bool) {
   319  	s := makestate(n)
   320  	for _, b := range key {
   321  		divergence := s.transition(b, true /* readonly */)
   322  		//fmt.Printf("get %x, b = %x, divergence = %s\nstate=%s\n", key, b, tostr(divergence), s)
   323  		if divergence != 0 {
   324  			return nil, false
   325  		}
   326  	}
   327  	if s.tail != 0 {
   328  		return nil, false
   329  	}
   330  	return s.n.val, s.n.val != nil
   331  }
   332  
   333  type PatriciaTree struct {
   334  	root node
   335  }
   336  
   337  func (pt *PatriciaTree) Insert(key []byte, value interface{}) {
   338  	//fmt.Printf("%p Insert [%x]\n", pt, key)
   339  	pt.root.insert(key, value)
   340  }
   341  
   342  func (pt *PatriciaTree) Get(key []byte) (interface{}, bool) {
   343  	return pt.root.get(key)
   344  }
   345  
   346  type Match struct {
   347  	Val   interface{}
   348  	Start int
   349  	End   int
   350  }
   351  
   352  type Matches []Match
   353  
   354  func (m Matches) Len() int {
   355  	return len(m)
   356  }
   357  
   358  func (m Matches) Less(i, j int) bool {
   359  	return m[i].Start < m[j].Start
   360  }
   361  
   362  func (m *Matches) Swap(i, j int) {
   363  	(*m)[i], (*m)[j] = (*m)[j], (*m)[i]
   364  }
   365  
   366  type MatchFinder struct {
   367  	pt      *PatriciaTree
   368  	s       state
   369  	matches []Match
   370  }
   371  
   372  func NewMatchFinder(pt *PatriciaTree) *MatchFinder {
   373  	return &MatchFinder{pt: pt}
   374  }
   375  
   376  type MatchFinder2 struct {
   377  	top        *node // Top of nodeStack
   378  	pt         *PatriciaTree
   379  	nodeStack  []*node
   380  	matchStack []Match
   381  	matches    Matches
   382  	sa         []int32
   383  	lcp        []int32
   384  	inv        []int32
   385  	headLen    int
   386  	tailLen    int
   387  	side       int // 0, 1, or 2 (if side is not determined yet)
   388  }
   389  
   390  func NewMatchFinder2(pt *PatriciaTree) *MatchFinder2 {
   391  	return &MatchFinder2{pt: pt, top: &pt.root, nodeStack: []*node{&pt.root}, side: 2}
   392  }
   393  
   394  // unfold consumes next byte of the key, moves the state to corresponding
   395  // node of the patricia tree and returns divergence prefix (0 if there is no divergence)
   396  func (mf2 *MatchFinder2) unfold(b byte) uint32 {
   397  	//fmt.Printf("unfold %x, headLen = %d, tailLen = %d, nodeStackLen = %d\n", b, mf2.headLen, mf2.tailLen, len(mf2.nodeStack))
   398  	//var sb strings.Builder
   399  	bitsLeft := 8 // Bits in b to process
   400  	b32 := uint32(b) << 24
   401  	for bitsLeft > 0 {
   402  		if mf2.side == 2 {
   403  			// tail has not been determined yet, do it now
   404  			if b32&0x80000000 == 0 {
   405  				mf2.side = 0
   406  				mf2.headLen = 0
   407  				mf2.tailLen = int(mf2.top.p0 & 0x1f)
   408  			} else {
   409  				mf2.side = 1
   410  				mf2.headLen = 0
   411  				mf2.tailLen = int(mf2.top.p1 & 0x1f)
   412  			}
   413  			if mf2.tailLen == 0 {
   414  				// state positioned at the end of the current node
   415  				mf2.side = 2
   416  				//fmt.Fprintf(&sb, "1 ")
   417  				//fmt.Printf("%s\n", sb.String())
   418  				return b32 | uint32(bitsLeft)
   419  			}
   420  		}
   421  		if mf2.tailLen == 0 {
   422  			// Need to switch to the next node
   423  			if mf2.side == 0 {
   424  				if mf2.top.n0 == nil {
   425  					//fmt.Fprintf(&sb, "2 ")
   426  					//fmt.Printf("%s\n", sb.String())
   427  					return b32 | uint32(bitsLeft)
   428  				}
   429  				mf2.nodeStack = append(mf2.nodeStack, mf2.top.n0)
   430  				mf2.top = mf2.top.n0
   431  				//fmt.Fprintf(&sb, "a1,0,bl=%d ", bitsLeft)
   432  			} else if mf2.side == 1 {
   433  				if mf2.top.n1 == nil {
   434  					//fmt.Fprintf(&sb, "3 ")
   435  					//fmt.Printf("%s\n", sb.String())
   436  					return b32 | uint32(bitsLeft)
   437  				}
   438  				mf2.nodeStack = append(mf2.nodeStack, mf2.top.n1)
   439  				mf2.top = mf2.top.n1
   440  				//fmt.Fprintf(&sb, "a1,1,bl=%d ", bitsLeft)
   441  			} else {
   442  				panic("")
   443  			}
   444  			mf2.headLen = 0
   445  			mf2.side = 2
   446  		}
   447  		var tail uint32
   448  		if mf2.side == 0 {
   449  			tail = (mf2.top.p0 & 0xffffffe0) << mf2.headLen
   450  		} else if mf2.side == 1 {
   451  			tail = (mf2.top.p1 & 0xffffffe0) << mf2.headLen
   452  		} else {
   453  			return b32 | uint32(bitsLeft)
   454  		}
   455  		firstDiff := bits.LeadingZeros32(tail ^ b32) // First bit where b32 and tail are different
   456  		if firstDiff < bitsLeft {
   457  			// divergence (where the key being searched and the existing structure of patricia tree becomes incompatible) is within currently supplied byte of the search key, b
   458  			if firstDiff >= mf2.tailLen {
   459  				// divergence is within currently supplied byte of the search key, b, but outside of the current node
   460  				//fmt.Fprintf(&sb, "4,tl=%d ", mf2.tailLen)
   461  				bitsLeft -= mf2.tailLen
   462  				b32 <<= mf2.tailLen
   463  				mf2.headLen += mf2.tailLen
   464  				mf2.tailLen = 0
   465  			} else {
   466  				// divergence is within currently supplied byte of the search key, b, and within the current node
   467  				bitsLeft -= firstDiff
   468  				b32 <<= firstDiff
   469  				// there is divergence, move head and tail
   470  				mf2.tailLen -= firstDiff
   471  				mf2.headLen += firstDiff
   472  				//fmt.Fprintf(&sb, "5 ")
   473  				//fmt.Printf("%s\n", sb.String())
   474  				return b32 | uint32(bitsLeft)
   475  			}
   476  		} else if mf2.tailLen < bitsLeft {
   477  			// divergence is outside of currently supplied byte of the search key, b
   478  			bitsLeft -= mf2.tailLen
   479  			b32 <<= mf2.tailLen
   480  			mf2.headLen += mf2.tailLen
   481  			mf2.tailLen = 0
   482  			//fmt.Fprintf(&sb, "6 ")
   483  		} else {
   484  			// key byte is consumed, but stay on the same node
   485  			//fmt.Fprintf(&sb, "7,bl=%d ", bitsLeft)
   486  			mf2.tailLen -= bitsLeft
   487  			mf2.headLen += bitsLeft
   488  			bitsLeft = 0
   489  			b32 = 0
   490  		}
   491  		if mf2.tailLen == 0 {
   492  			// Need to switch to the next node
   493  			if mf2.side == 0 {
   494  				if mf2.top.n0 == nil {
   495  					//fmt.Fprintf(&sb, "8 ")
   496  					//fmt.Printf("%s\n", sb.String())
   497  					return b32 | uint32(bitsLeft)
   498  				}
   499  				mf2.nodeStack = append(mf2.nodeStack, mf2.top.n0)
   500  				mf2.top = mf2.top.n0
   501  				//fmt.Fprintf(&sb, "a2,0,bl=%d ", bitsLeft)
   502  			} else if mf2.side == 1 {
   503  				if mf2.top.n1 == nil {
   504  					//fmt.Fprintf(&sb, "9 ")
   505  					//fmt.Printf("%s\n", sb.String())
   506  					return b32 | uint32(bitsLeft)
   507  				}
   508  				mf2.nodeStack = append(mf2.nodeStack, mf2.top.n1)
   509  				mf2.top = mf2.top.n1
   510  				//fmt.Fprintf(&sb, "a2,1,bl=%d ", bitsLeft)
   511  			} else {
   512  				panic("")
   513  			}
   514  			mf2.headLen = 0
   515  			mf2.side = 2
   516  		}
   517  	}
   518  	//fmt.Printf("%s\n", sb.String())
   519  	return 0
   520  }
   521  
   522  // unfold moves the match finder back up the stack by specified number of bits
   523  func (mf2 *MatchFinder2) fold(bits int) {
   524  	//fmt.Printf("fold %d, headLen = %d, tailLen = %d, nodeStackLen = %d\n", bits, mf2.headLen, mf2.tailLen, len(mf2.nodeStack))
   525  	bitsLeft := bits
   526  	for bitsLeft > 0 {
   527  		//fmt.Printf("headLen = %d, bitsLeft = %d, head = %b, tail = %b, nodeStackLen = %d\n", headLen, bitsLeft, mf2.head, mf2.tail, len(mf2.nodeStack))
   528  		if mf2.headLen == bitsLeft {
   529  			mf2.headLen = 0
   530  			mf2.tailLen = 0
   531  			mf2.side = 2
   532  			bitsLeft = 0
   533  		} else if mf2.headLen >= bitsLeft {
   534  			// folding only affects top node, take bits from end of the head and prepend it to the tail
   535  			mf2.headLen -= bitsLeft
   536  			mf2.tailLen += bitsLeft
   537  			bitsLeft = 0
   538  		} else {
   539  			// folding affects not only top node, remove top node
   540  			bitsLeft -= mf2.headLen
   541  			mf2.nodeStack = mf2.nodeStack[:len(mf2.nodeStack)-1]
   542  			prevTop := mf2.top
   543  			mf2.top = mf2.nodeStack[len(mf2.nodeStack)-1]
   544  			if mf2.top.n0 == prevTop {
   545  				mf2.side = 0
   546  				mf2.headLen = int(mf2.top.p0 & 0x1f)
   547  				//fmt.Printf("mf2.head = p0 %b\n", mf2.head)
   548  			} else if mf2.top.n1 == prevTop {
   549  				mf2.side = 1
   550  				mf2.headLen = int(mf2.top.p1 & 0x1f)
   551  				//fmt.Printf("mf2.head = p1 %b\n", mf2.head)
   552  			} else {
   553  				panic("")
   554  			}
   555  			mf2.tailLen = 0
   556  		}
   557  	}
   558  }
   559  
   560  func (mf2 *MatchFinder2) FindLongestMatches(data []byte) []Match {
   561  	//fmt.Printf("mf2=%p pt=%p data=[%x]\n", mf2, mf2.pt, data)
   562  	mf2.matches = mf2.matches[:0]
   563  	if len(data) < 2 {
   564  		return mf2.matches
   565  	}
   566  	mf2.nodeStack = append(mf2.nodeStack[:0], &mf2.pt.root)
   567  	mf2.matchStack = mf2.matchStack[:0]
   568  	mf2.top = &mf2.pt.root
   569  	mf2.side = 2
   570  	mf2.tailLen = 0
   571  	mf2.headLen = 0
   572  	n := len(data)
   573  	if cap(mf2.sa) < n {
   574  		mf2.sa = make([]int32, n)
   575  	} else {
   576  		mf2.sa = mf2.sa[:n]
   577  	}
   578  	if err := sais.Sais(data, mf2.sa); err != nil {
   579  		panic(err)
   580  	}
   581  	if cap(mf2.inv) < n {
   582  		mf2.inv = make([]int32, n)
   583  	} else {
   584  		mf2.inv = mf2.inv[:n]
   585  	}
   586  	for i := 0; i < n; i++ {
   587  		mf2.inv[mf2.sa[i]] = int32(i)
   588  	}
   589  	var k int
   590  	// Process all suffixes one by one starting from
   591  	// first suffix in txt[]
   592  	if cap(mf2.lcp) < n {
   593  		mf2.lcp = make([]int32, n)
   594  	} else {
   595  		mf2.lcp = mf2.lcp[:n]
   596  	}
   597  	for i := 0; i < n; i++ {
   598  		/* If the current suffix is at n-1, then we don’t
   599  		   have next substring to consider. So lcp is not
   600  		   defined for this substring, we put zero. */
   601  		if mf2.inv[i] == int32(n-1) {
   602  			k = 0
   603  			continue
   604  		}
   605  
   606  		/* j contains index of the next substring to
   607  		   be considered  to compare with the present
   608  		   substring, i.e., next string in suffix array */
   609  		j := int(mf2.sa[mf2.inv[i]+1])
   610  
   611  		// Directly start matching from k'th index as
   612  		// at-least k-1 characters will match
   613  		for i+k < n && j+k < n && data[i+k] == data[j+k] {
   614  			k++
   615  		}
   616  		mf2.lcp[mf2.inv[i]] = int32(k) // lcp for the present suffix.
   617  
   618  		// Deleting the starting character from the string.
   619  		if k > 0 {
   620  			k--
   621  		}
   622  	}
   623  	//fmt.Printf("sa=[%d]\n", mf2.sa)
   624  	//fmt.Printf("lcp=[%d]\n", mf2.lcp)
   625  	depth := 0 // Depth in bits
   626  	var lastMatch *Match
   627  	for i := 0; i < n; i++ {
   628  		// lcp[i] is the Longest Common Prefix of suffixes starting from sa[i] and sa[i+1]
   629  		//fmt.Printf("Suffix [%x], depth = %d\n", data[mf2.sa[i]:n], depth)
   630  		if i > 0 {
   631  			lcp := int(mf2.lcp[i-1])
   632  			// lcp[i-1] is the Longest Common Prefix of suffixes starting from sa[i-1] and sa[i]
   633  			if depth > 8*lcp {
   634  				//fmt.Printf("before fold depth = %d, mf2.lcp[i-1] = %d\n", depth, mf2.lcp[i-1])
   635  				mf2.fold(depth - 8*lcp)
   636  				depth = 8 * lcp
   637  				//b1, d1 := mf2.Current()
   638  				//fmt.Printf("current: [%x] %d, depth = %d\n", b1, d1, depth)
   639  				//fmt.Printf("after fold depth = %d\n", depth)
   640  				for lastMatch != nil && lastMatch.End-lastMatch.Start > lcp {
   641  					//fmt.Printf("Popped %d: [%d-%d] [%x]\n", len(mf2.matchStack)-1, lastMatch.Start, lastMatch.End, data[lastMatch.Start:lastMatch.End])
   642  					mf2.matchStack = mf2.matchStack[:len(mf2.matchStack)-1]
   643  					if len(mf2.matchStack) == 0 {
   644  						lastMatch = nil
   645  					} else {
   646  						lastMatch = &mf2.matchStack[len(mf2.matchStack)-1]
   647  					}
   648  				}
   649  			} else {
   650  				r := depth % 8
   651  				if r > 0 {
   652  					mf2.fold(r)
   653  					depth -= r
   654  					//b1, d1 := mf2.Current()
   655  					//fmt.Printf("current: [%x] %d, depth = %d\n", b1, d1, depth)
   656  				}
   657  			}
   658  		}
   659  		sa := int(mf2.sa[i])
   660  		start := sa + depth/8
   661  		for end := start + 1; end <= n; end++ {
   662  			//fmt.Printf("Looking at [%d-%d] [%x]\n", sa, end, data[sa:end])
   663  			d := mf2.unfold(data[end-1])
   664  			depth += 8 - int(d&0x1f)
   665  			//fmt.Printf("after unfold depth=%d\n", depth)
   666  			//b1, d1 := mf2.Current()
   667  			//fmt.Printf("current: [%x][%x] %d, depth = %d\n", b1, data[sa:end], d1, depth)
   668  			if d != 0 {
   669  				//fmt.Printf("divergence found: %b\n", d)
   670  				break
   671  			}
   672  			if mf2.tailLen != 0 || mf2.top.val == nil {
   673  				//fmt.Printf("tailLen = %d, val == nil %t, p=%p\n", mf2.tailLen, mf2.top.val == nil, mf2.top)
   674  				continue
   675  			}
   676  			if cap(mf2.matchStack) == len(mf2.matchStack) {
   677  				mf2.matchStack = append(mf2.matchStack, Match{})
   678  			} else {
   679  				mf2.matchStack = mf2.matchStack[:len(mf2.matchStack)+1]
   680  			}
   681  			lastMatch = &mf2.matchStack[len(mf2.matchStack)-1]
   682  			// This possibly overwrites previous match for the same start position
   683  			//fmt.Printf("Push on the match stack [%d-%d] [%x]\n", sa, end, data[sa:end])
   684  			lastMatch.Start = sa
   685  			lastMatch.End = end
   686  			lastMatch.Val = mf2.top.val
   687  		}
   688  		if lastMatch != nil {
   689  			mf2.matches = append(mf2.matches, Match{})
   690  			m := &mf2.matches[len(mf2.matches)-1]
   691  			m.Start = sa
   692  			m.End = sa + lastMatch.End - lastMatch.Start
   693  			m.Val = lastMatch.Val
   694  			//fmt.Printf("Added new Match: [%d-%d] [%x]\n", m.Start, m.End, data[m.Start:m.End])
   695  		}
   696  	}
   697  	//fmt.Printf("before sorting %d matches\n", len(mf2.matches))
   698  	if len(mf2.matches) < 2 {
   699  		return mf2.matches
   700  	}
   701  	//sort.Sort(&mf2.matches)
   702  	slices.SortFunc(mf2.matches, func(i, j Match) bool { return i.Start < j.Start })
   703  
   704  	lastEnd := mf2.matches[0].End
   705  	j := 1
   706  	for i, m := range mf2.matches {
   707  		if i > 0 {
   708  			if m.End > lastEnd {
   709  				if i != j {
   710  					mf2.matches[j] = m
   711  				}
   712  				lastEnd = m.End
   713  				j++
   714  			}
   715  		}
   716  	}
   717  	return mf2.matches[:j]
   718  }
   719  
   720  func (mf2 *MatchFinder2) Current() ([]byte, int) {
   721  	var b []byte
   722  	var depth int
   723  	last := len(mf2.nodeStack) - 1
   724  	for i, n := range mf2.nodeStack {
   725  		var p uint32
   726  		if i < last {
   727  			next := mf2.nodeStack[i+1]
   728  			if n.n0 == next {
   729  				p = n.p0
   730  			} else if n.n1 == next {
   731  				p = n.p1
   732  			} else {
   733  				panic("")
   734  			}
   735  		} else {
   736  			if mf2.side == 0 {
   737  				p = n.p0
   738  			} else if mf2.side == 1 {
   739  				p = n.p1
   740  			}
   741  			p = (p & 0xffffffe0) | uint32(mf2.headLen)
   742  		}
   743  		fmt.Printf("i,p=%d, %b\n", i, p)
   744  		// Add bit by bit
   745  		for (p & 0x1f) > 0 {
   746  			if depth >= 8*len(b) {
   747  				b = append(b, 0)
   748  			}
   749  			if p&0x80000000 != 0 {
   750  				b[depth/8] |= uint8(1) << (7 - (depth % 8))
   751  			}
   752  			depth++
   753  			p = ((p & 0xffffffe0) << 1) | (p & 0x1f) - 1
   754  		}
   755  	}
   756  	return b, depth
   757  }
   758  
   759  func (mf *MatchFinder) FindLongestMatches(data []byte) []Match {
   760  	matchCount := 0
   761  	s := &mf.s
   762  	lastEnd := 0
   763  	for start := 0; start < len(data); start++ {
   764  		s.reset(&mf.pt.root)
   765  		emitted := false
   766  		for end := start + 1; end <= len(data); end++ {
   767  			if d := s.transition(data[end-1], true /* readonly */); d != 0 {
   768  				break
   769  			}
   770  			if s.tail != 0 || s.n.val == nil || end <= lastEnd {
   771  				continue
   772  			}
   773  			var m *Match
   774  			if emitted {
   775  				m = &mf.matches[matchCount-1]
   776  			} else {
   777  				if matchCount == len(mf.matches) {
   778  					mf.matches = append(mf.matches, Match{})
   779  					m = &mf.matches[len(mf.matches)-1]
   780  				} else {
   781  					m = &mf.matches[matchCount]
   782  				}
   783  				matchCount++
   784  				emitted = true
   785  			}
   786  			// This possibly overwrites previous match for the same start position
   787  			m.Start = start
   788  			m.End = end
   789  			m.Val = s.n.val
   790  			lastEnd = end
   791  		}
   792  	}
   793  	return mf.matches[:matchCount]
   794  }