github.com/pyroscope-io/pyroscope@v0.37.3-0.20230725203016-5f6947968bd0/pkg/storage/tree/serialize.go (about)

     1  package tree
     2  
     3  import (
     4  	"bufio"
     5  	"bytes"
     6  	"encoding/base64"
     7  	"io"
     8  
     9  	"github.com/pyroscope-io/pyroscope/pkg/storage/dict"
    10  	"github.com/pyroscope-io/pyroscope/pkg/util/varint"
    11  )
    12  
    13  // serialization format version. it's not very useful right now, but it will be in the future
    14  const currentVersion = 1
    15  
    16  var lostDuringSerializationName = []byte("other")
    17  
    18  // warning: this function modifies the tree
    19  func (t *Tree) SerializeTruncate(d *dict.Dict, maxNodes int, w io.Writer) error {
    20  	t.Lock()
    21  	defer t.Unlock()
    22  	vw := varint.NewWriter()
    23  	var err error
    24  	if _, err = vw.Write(w, currentVersion); err != nil {
    25  		return err
    26  	}
    27  
    28  	var b bytes.Buffer // Temporary buffer for dictionary keys.
    29  	minVal := t.minValue(maxNodes)
    30  	nodes := make([]*treeNode, 1, 128)
    31  	nodes[0] = t.root
    32  	for len(nodes) > 0 {
    33  		tn := nodes[0]
    34  		nodes = nodes[1:]
    35  
    36  		b.Reset()
    37  		d.PutValue([]byte(tn.Name), &b)
    38  		if _, err = vw.Write(w, uint64(b.Len())); err != nil {
    39  			return err
    40  		}
    41  		if _, err = w.Write(b.Bytes()); err != nil {
    42  			return err
    43  		}
    44  		if _, err = vw.Write(w, tn.Self); err != nil {
    45  			return err
    46  		}
    47  
    48  		cNodes := tn.ChildrenNodes
    49  		tn.ChildrenNodes = tn.ChildrenNodes[:0]
    50  
    51  		other := uint64(0)
    52  		for _, cn := range cNodes {
    53  			isOtherNode := bytes.Equal(cn.Name, lostDuringSerializationName)
    54  			if cn.Total >= minVal || isOtherNode {
    55  				tn.ChildrenNodes = append(tn.ChildrenNodes, cn)
    56  			} else {
    57  				// Truncated children accounted as parent self.
    58  				other += cn.Total
    59  			}
    60  		}
    61  
    62  		if other > 0 {
    63  			otherNode := tn.insert(lostDuringSerializationName)
    64  			otherNode.Self += other
    65  			otherNode.Total += other
    66  		}
    67  
    68  		if len(tn.ChildrenNodes) > 0 {
    69  			nodes = append(tn.ChildrenNodes, nodes...)
    70  		} else {
    71  			tn.ChildrenNodes = nil // Just to make it eligible for GC.
    72  		}
    73  		if _, err = vw.Write(w, uint64(len(tn.ChildrenNodes))); err != nil {
    74  			return err
    75  		}
    76  	}
    77  	return nil
    78  }
    79  
    80  type parentNode struct {
    81  	node   *treeNode
    82  	parent *parentNode
    83  }
    84  
    85  func Deserialize(d *dict.Dict, r io.Reader) (*Tree, error) {
    86  	t := New()
    87  
    88  	type reader interface {
    89  		io.ByteReader
    90  		io.Reader
    91  	}
    92  	var br reader
    93  	switch x := r.(type) {
    94  	case *bytes.Buffer:
    95  		br = x
    96  	case *bytes.Reader:
    97  		br = x
    98  	case *bufio.Reader:
    99  		br = x
   100  	default:
   101  		br = bufio.NewReader(r)
   102  	}
   103  
   104  	// reads serialization format version, see comment at the top
   105  	_, err := varint.Read(br)
   106  	if err != nil {
   107  		return nil, err
   108  	}
   109  
   110  	parents := []*parentNode{{t.root, nil}}
   111  	j := 0
   112  
   113  	var nameBuf bytes.Buffer
   114  	for len(parents) > 0 {
   115  		j++
   116  		parent := parents[0]
   117  		parents = parents[1:]
   118  
   119  		labelLen, err := varint.Read(br)
   120  		labelLinkBuf := make([]byte, labelLen) // TODO: there are better ways to do this?
   121  		_, err = io.ReadAtLeast(br, labelLinkBuf, int(labelLen))
   122  		if err != nil {
   123  			return nil, err
   124  		}
   125  
   126  		nameBuf.Reset()
   127  		if !d.GetValue(labelLinkBuf, &nameBuf) {
   128  			// these strings has to be at least slightly different, hence base64 Addon
   129  			nameBuf.Reset()
   130  			nameBuf.WriteString("label not found " + base64.URLEncoding.EncodeToString(labelLinkBuf))
   131  		}
   132  		tn := parent.node.insert(nameBuf.Bytes())
   133  		tn.Self, err = varint.Read(br)
   134  		tn.Total = tn.Self
   135  		if err != nil {
   136  			return nil, err
   137  		}
   138  
   139  		pn := parent
   140  		for pn != nil {
   141  			pn.node.Total += tn.Self
   142  			pn = pn.parent
   143  		}
   144  
   145  		childrenLen, err := varint.Read(br)
   146  		if err != nil {
   147  			return nil, err
   148  		}
   149  
   150  		for i := uint64(0); i < childrenLen; i++ {
   151  			parents = append([]*parentNode{{tn, parent}}, parents...)
   152  		}
   153  	}
   154  
   155  	t.root = t.root.ChildrenNodes[0]
   156  
   157  	return t, nil
   158  }
   159  
   160  // used in the cloud
   161  func DeserializeNoDict(r io.Reader) (*Tree, error) {
   162  	t := New()
   163  	br := bufio.NewReader(r) // TODO if it's already a bytereader skip
   164  
   165  	parents := []*parentNode{{t.root, nil}}
   166  	j := 0
   167  
   168  	for len(parents) > 0 {
   169  		j++
   170  		parent := parents[0]
   171  		parents = parents[1:]
   172  
   173  		nameLen, err := varint.Read(br)
   174  		// if err == io.EOF {
   175  		// 	return t, nil
   176  		// }
   177  		nameBuf := make([]byte, nameLen) // TODO: there are better ways to do this?
   178  		_, err = io.ReadAtLeast(br, nameBuf, int(nameLen))
   179  		if err != nil {
   180  			return nil, err
   181  		}
   182  		tn := parent.node.insert(nameBuf)
   183  
   184  		tn.Self, err = varint.Read(br)
   185  		tn.Total = tn.Self
   186  		if err != nil {
   187  			return nil, err
   188  		}
   189  
   190  		pn := parent
   191  		for pn != nil {
   192  			pn.node.Total += tn.Self
   193  			pn = pn.parent
   194  		}
   195  
   196  		childrenLen, err := varint.Read(br)
   197  		if err != nil {
   198  			return nil, err
   199  		}
   200  
   201  		for i := uint64(0); i < childrenLen; i++ {
   202  			parents = append([]*parentNode{{tn, parent}}, parents...)
   203  		}
   204  	}
   205  
   206  	t.root = t.root.ChildrenNodes[0]
   207  
   208  	return t, nil
   209  }
   210  
   211  // used in the cloud
   212  // warning: this function modifies the tree
   213  func (t *Tree) SerializeTruncateNoDict(maxNodes int, w io.Writer) error {
   214  	t.Lock()
   215  	defer t.Unlock()
   216  	vw := varint.NewWriter()
   217  	var err error
   218  	minVal := t.minValue(maxNodes)
   219  	nodes := make([]*treeNode, 1, 1024)
   220  	nodes[0] = t.root
   221  	for len(nodes) > 0 {
   222  		tn := nodes[0]
   223  		nodes = nodes[1:]
   224  		if _, err = vw.Write(w, uint64(len(tn.Name))); err != nil {
   225  			return err
   226  		}
   227  		if _, err = w.Write(tn.Name); err != nil {
   228  			return err
   229  		}
   230  
   231  		if _, err = vw.Write(w, tn.Self); err != nil {
   232  			return err
   233  		}
   234  		cNodes := tn.ChildrenNodes
   235  		tn.ChildrenNodes = tn.ChildrenNodes[:0]
   236  
   237  		other := uint64(0)
   238  		for _, cn := range cNodes {
   239  			isOtherNode := bytes.Equal(cn.Name, lostDuringSerializationName)
   240  			if cn.Total >= minVal || isOtherNode {
   241  				tn.ChildrenNodes = append(tn.ChildrenNodes, cn)
   242  			} else {
   243  				// Truncated children accounted as parent self.
   244  				other += cn.Total
   245  			}
   246  		}
   247  
   248  		if other > 0 {
   249  			otherNode := tn.insert(lostDuringSerializationName)
   250  			otherNode.Self += other
   251  			otherNode.Total += other
   252  		}
   253  
   254  		if len(tn.ChildrenNodes) > 0 {
   255  			nodes = append(tn.ChildrenNodes, nodes...)
   256  		} else {
   257  			tn.ChildrenNodes = nil // Just to make it eligible for GC.
   258  		}
   259  		if _, err = vw.Write(w, uint64(len(tn.ChildrenNodes))); err != nil {
   260  			return err
   261  		}
   262  	}
   263  	return nil
   264  }