github.com/pyroscope-io/pyroscope@v0.37.3-0.20230725203016-5f6947968bd0/pkg/structs/transporttrie/serialize.go (about)

     1  package transporttrie
     2  
     3  import (
     4  	"bufio"
     5  	"bytes"
     6  	"errors"
     7  	"io"
     8  
     9  	"github.com/pyroscope-io/pyroscope/pkg/util/varint"
    10  )
    11  
    12  func (t *Trie) Serialize(w io.Writer) error {
    13  	nodes := []*trieNode{t.root}
    14  	for len(nodes) > 0 {
    15  		tn := nodes[0]
    16  		nodes = nodes[1:]
    17  
    18  		name := tn.name
    19  		_, err := varint.Write(w, uint64(len(name)))
    20  		if err != nil {
    21  			return err
    22  		}
    23  		_, err = w.Write(name)
    24  		if err != nil {
    25  			return err
    26  		}
    27  
    28  		val := tn.value
    29  		if t.Divider != 1 || t.Multiplier != 1 {
    30  			val = val * uint64(t.Multiplier) / uint64(t.Divider)
    31  		}
    32  		_, err = varint.Write(w, uint64(val))
    33  		if err != nil {
    34  			return err
    35  		}
    36  		_, err = varint.Write(w, uint64(len(tn.children)))
    37  		if err != nil {
    38  			return err
    39  		}
    40  
    41  		nodes = append(tn.children, nodes...)
    42  	}
    43  	return nil
    44  }
    45  
    46  type offset struct {
    47  	descCount int
    48  	suffixLen int
    49  }
    50  
    51  // IterateRaw iterates through the serialized trie and calls cb function for
    52  // every leaf. k references bytes from buf, therefore it must not be modified
    53  // or used outside of cb, a copy of k should be used instead.
    54  func IterateRaw(r io.Reader, buf []byte, cb func(k []byte, v int)) error {
    55  	br, ok := r.(*bufio.Reader)
    56  	if !ok {
    57  		br = bufio.NewReader(r)
    58  	}
    59  
    60  	b := bytes.NewBuffer(buf)
    61  	var offsets []offset
    62  	var copied int64
    63  	for {
    64  		nameLen, err := varint.Read(br)
    65  		switch {
    66  		case err == nil:
    67  		case errors.Is(err, io.EOF):
    68  			return nil
    69  		default:
    70  			return err
    71  		}
    72  		if nameLen != 0 {
    73  			copied, err = b.ReadFrom(io.LimitReader(br, int64(nameLen)))
    74  			if err != nil {
    75  				return err
    76  			}
    77  		}
    78  		value, err := varint.Read(br)
    79  		if err != nil {
    80  			return err
    81  		}
    82  		descCount, err := varint.Read(br)
    83  		if err != nil {
    84  			return err
    85  		}
    86  
    87  		// It may be a node or a leaf. Regardless, if it has
    88  		// a value, there was a corresponding signature.
    89  		if value > 0 {
    90  			cb(b.Bytes(), int(value))
    91  		}
    92  
    93  		if descCount != 0 {
    94  			// A node. Add node suffix and save offset.
    95  			offsets = append(offsets, offset{
    96  				descCount: int(descCount),
    97  				suffixLen: int(copied),
    98  			})
    99  			continue
   100  		}
   101  
   102  		// A leaf. Cut the current label.
   103  		b.Truncate(b.Len() - int(copied))
   104  		// Cut parent suffix, if it has no more
   105  		// descendants, and it is not the root.
   106  		i := len(offsets) - 1
   107  		if i < 0 {
   108  			continue
   109  		}
   110  		offsets[i].descCount--
   111  		for ; i > 0; i-- {
   112  			if offsets[i].descCount != 0 {
   113  				break
   114  			}
   115  			// No descending nodes left.
   116  			// Cut suffix and remove the offset.
   117  			b.Truncate(b.Len() - offsets[i].suffixLen)
   118  			offsets = offsets[:i]
   119  			// Decrease parent counter, if applicable.
   120  			if p := len(offsets) - 1; p > 0 {
   121  				offsets[p].descCount--
   122  			}
   123  		}
   124  	}
   125  }
   126  
   127  func Deserialize(r io.Reader) (*Trie, error) {
   128  	t := New()
   129  	br := bufio.NewReader(r) // TODO if it's already a bytereader skip
   130  
   131  	parents := []*trieNode{t.root}
   132  	for len(parents) > 0 {
   133  		parent := parents[0]
   134  		parents = parents[1:]
   135  
   136  		nameLen, err := varint.Read(br)
   137  		// if err == io.EOF {
   138  		// 	return t, nil
   139  		// }
   140  		nameBuf := make([]byte, nameLen) // TODO: there are better ways to do this?
   141  		_, err = io.ReadAtLeast(br, nameBuf, int(nameLen))
   142  		// log.Debug(n, len(parents))
   143  		// log.Debugf("%d", nameLen, string(nameBuf), n)
   144  		if err != nil {
   145  			return nil, err
   146  		}
   147  		tn := newTrieNode(nameBuf)
   148  		// TODO: insert into parent
   149  		parent.insert(tn)
   150  
   151  		tn.value, err = varint.Read(br)
   152  		if err != nil {
   153  			return nil, err
   154  		}
   155  
   156  		childrenLen, err := varint.Read(br)
   157  		if err != nil {
   158  			return nil, err
   159  		}
   160  
   161  		for i := uint64(0); i < childrenLen; i++ {
   162  			parents = append([]*trieNode{tn}, parents...)
   163  		}
   164  	}
   165  
   166  	t.root = t.root.children[0]
   167  
   168  	return t, nil
   169  }
   170  
   171  func (t *Trie) Bytes() []byte {
   172  	b := bytes.Buffer{}
   173  	t.Serialize(&b)
   174  	return b.Bytes()
   175  }
   176  
   177  func FromBytes(p []byte) *Trie {
   178  	t, _ := Deserialize(bytes.NewReader(p))
   179  	return t
   180  }