github.com/pyroscope-io/pyroscope@v0.37.3-0.20230725203016-5f6947968bd0/pkg/storage/dict/trie.go (about)

     1  package dict
     2  
     3  import (
     4  	"bytes"
     5  	"io"
     6  
     7  	"github.com/pyroscope-io/pyroscope/pkg/util/varint"
     8  )
     9  
    10  // this implementation is a copy of another trie implementation in this repo
    11  //   albeit slightly different
    12  // TODO: maybe dedup them
    13  type trieNode struct {
    14  	label    []byte
    15  	children []*trieNode
    16  }
    17  
    18  func newTrieNode(label []byte) *trieNode {
    19  	return &trieNode{
    20  		label:    label,
    21  		children: make([]*trieNode, 0),
    22  	}
    23  }
    24  
    25  func (tn *trieNode) insert(t2 *trieNode) {
    26  	tn.children = append(tn.children, t2)
    27  }
    28  
    29  // TODO: too complicated, need to refactor / document this
    30  func (tn *trieNode) findNodeAt(key []byte, vw varint.Writer, w io.Writer) {
    31  	// log.Debug("findNodeAt")
    32  	key2 := make([]byte, len(key))
    33  	// TODO: remove
    34  	copy(key2, key)
    35  	key = key2
    36  
    37  OuterLoop:
    38  	for {
    39  		// log.Debug("findNodeAt, key", string(key))
    40  
    41  		if len(key) == 0 {
    42  			// fn(tn)
    43  			return
    44  		}
    45  
    46  		// 4 options:
    47  		// trie:
    48  		// foo -> bar
    49  		// 1) no leads (baz)
    50  		//    create a new child, call fn with it
    51  		// 2) lead, key matches (foo)
    52  		//    call fn with existing child
    53  		// 3) lead, key matches, shorter (fo / fop)
    54  		//    split existing child, set that as tn
    55  		// 4) lead, key matches, longer (fooo)
    56  		//    go to existing child, set that as tn
    57  
    58  		leadIndex := -1
    59  		for k, v := range tn.children {
    60  			if v.label[0] == key[0] {
    61  				leadIndex = k
    62  			}
    63  		}
    64  
    65  		if leadIndex == -1 { // 1
    66  			// log.Debug("case 1")
    67  			newTn := newTrieNode(key)
    68  			tn.insert(newTn)
    69  			i := len(tn.children) - 1
    70  			vw.Write(w, uint64(i))
    71  			vw.Write(w, uint64(len(key)))
    72  			// fn(newTn)
    73  			return
    74  		}
    75  
    76  		leadKey := tn.children[leadIndex].label
    77  		// log.Debug("lead key", string(leadKey))
    78  		lk := len(key)
    79  		llk := len(leadKey)
    80  		for i := 0; i < lk; i++ {
    81  			if i == llk { // 4 fooo / foo i = 3 llk = 3
    82  				// log.Debug("case 4")
    83  				tn = tn.children[leadIndex]
    84  				key = key[llk:]
    85  				vw.Write(w, uint64(leadIndex))
    86  				vw.Write(w, uint64(llk))
    87  				continue OuterLoop
    88  			}
    89  			if leadKey[i] != key[i] { // 3
    90  				a := leadKey[:i] // ab
    91  				b := leadKey[i:] // c
    92  				newTn := newTrieNode(a)
    93  				newTn.children = []*trieNode{tn.children[leadIndex]}
    94  				tn.children[leadIndex].label = b
    95  				tn.children[leadIndex] = newTn
    96  				tn = newTn
    97  				key = key[i:]
    98  
    99  				vw.Write(w, uint64(leadIndex))
   100  				vw.Write(w, uint64(i))
   101  				continue OuterLoop
   102  			}
   103  		}
   104  		// lk < llk
   105  		if !bytes.Equal(key, leadKey) { // 3
   106  			a := leadKey[:lk] // ab
   107  			b := leadKey[lk:] // c
   108  			newTn := newTrieNode(a)
   109  			newTn.children = []*trieNode{tn.children[leadIndex]}
   110  			tn.children[leadIndex].label = b
   111  			tn.children[leadIndex] = newTn
   112  			tn = newTn
   113  			key = key[lk:]
   114  
   115  			vw.Write(w, uint64(leadIndex))
   116  			vw.Write(w, uint64(lk))
   117  			continue OuterLoop
   118  		}
   119  
   120  		// 2
   121  		vw.Write(w, uint64(leadIndex))
   122  		vw.Write(w, uint64(len(leadKey)))
   123  		return
   124  	}
   125  }