github.com/grafana/pyroscope@v1.18.0/pkg/model/tree.go (about)

     1  package model
     2  
     3  import (
     4  	"bytes"
     5  	"fmt"
     6  	"io"
     7  	"sort"
     8  	"strconv"
     9  	"strings"
    10  
    11  	profilev1 "github.com/grafana/pyroscope/api/gen/proto/go/google/v1"
    12  
    13  	dvarint "github.com/dennwc/varint"
    14  	"github.com/xlab/treeprint"
    15  
    16  	"github.com/grafana/pyroscope/pkg/og/util/varint"
    17  	"github.com/grafana/pyroscope/pkg/slices"
    18  	"github.com/grafana/pyroscope/pkg/util/minheap"
    19  )
    20  
    21  type Tree struct {
    22  	root []*node
    23  }
    24  
    25  type node struct {
    26  	parent      *node
    27  	children    []*node
    28  	self, total int64
    29  	name        string
    30  }
    31  
    32  func (t *Tree) String() string {
    33  	type branch struct {
    34  		nodes []*node
    35  		treeprint.Tree
    36  	}
    37  	tree := treeprint.New()
    38  	for _, n := range t.root {
    39  		b := tree.AddBranch(fmt.Sprintf("%s: self %d total %d", n.name, n.self, n.total))
    40  		remaining := append([]*branch{}, &branch{nodes: n.children, Tree: b})
    41  		for len(remaining) > 0 {
    42  			current := remaining[0]
    43  			remaining = remaining[1:]
    44  			for _, n := range current.nodes {
    45  				if len(n.children) > 0 {
    46  					remaining = append(remaining, &branch{nodes: n.children, Tree: current.AddBranch(fmt.Sprintf("%s: self %d total %d", n.name, n.self, n.total))})
    47  				} else {
    48  					current.AddNode(fmt.Sprintf("%s: self %d total %d", n.name, n.self, n.total))
    49  				}
    50  			}
    51  		}
    52  	}
    53  	return tree.String()
    54  }
    55  
    56  func (t *Tree) Total() (v int64) {
    57  	for _, n := range t.root {
    58  		v += n.total
    59  	}
    60  	return v
    61  }
    62  
    63  func (t *Tree) InsertStack(v int64, stack ...string) {
    64  	if v <= 0 {
    65  		return
    66  	}
    67  	r := &node{children: t.root}
    68  	n := r
    69  	for s := range stack {
    70  		name := stack[s]
    71  		n.total += v
    72  		// Inlined node.insert
    73  		i, j := 0, len(n.children)
    74  		for i < j {
    75  			h := int(uint(i+j) >> 1)
    76  			if n.children[h].name < name {
    77  				i = h + 1
    78  			} else {
    79  				j = h
    80  			}
    81  		}
    82  		if i < len(n.children) && n.children[i].name == name {
    83  			n = n.children[i]
    84  		} else {
    85  			child := &node{parent: n, name: name}
    86  			n.children = append(n.children, child)
    87  			copy(n.children[i+1:], n.children[i:])
    88  			n.children[i] = child
    89  			n = child
    90  		}
    91  	}
    92  	// Leaf.
    93  	n.total += v
    94  	n.self += v
    95  	t.root = r.children
    96  }
    97  
    98  func (t *Tree) WriteCollapsed(dst io.Writer) {
    99  	t.IterateStacks(func(_ string, self int64, stack []string) {
   100  		slices.Reverse(stack)
   101  		_, _ = fmt.Fprintf(dst, "%s %d\n", strings.Join(stack, ";"), self)
   102  	})
   103  }
   104  
   105  func (t *Tree) IterateStacks(cb func(name string, self int64, stack []string)) {
   106  	nodes := make([]*node, len(t.root), 1024)
   107  	stack := make([]string, 0, 64)
   108  	copy(nodes, t.root)
   109  	for len(nodes) > 0 {
   110  		n := nodes[0]
   111  		self := n.self
   112  		label := n.name
   113  		if self > 0 {
   114  			current := n
   115  			stack = stack[:0]
   116  			for current != nil && current.parent != nil {
   117  				stack = append(stack, current.name)
   118  				current = current.parent
   119  			}
   120  			cb(label, self, stack)
   121  		}
   122  		nodes = nodes[1:]
   123  		nodes = append(nodes, n.children...)
   124  	}
   125  }
   126  
   127  // Default Depth First Search slice capacity. The value should be equal
   128  // to the number of all the siblings of the tree leaf ascendants.
   129  //
   130  // Chosen empirically. For very deep stacks (>128), it's likely that the
   131  // slice will grow to 1-4K nodes, depending on the trace branching.
   132  const defaultDFSSize = 128
   133  
   134  func (t *Tree) Merge(src *Tree) {
   135  	if t.Total() == 0 && src.Total() > 0 {
   136  		*t = *src
   137  		return
   138  	}
   139  	if src.Total() == 0 {
   140  		return
   141  	}
   142  
   143  	srcNodes := make([]*node, 0, defaultDFSSize)
   144  	srcRoot := &node{children: src.root}
   145  	srcNodes = append(srcNodes, srcRoot)
   146  
   147  	dstNodes := make([]*node, 0, defaultDFSSize)
   148  	dstRoot := &node{children: t.root}
   149  	dstNodes = append(dstNodes, dstRoot)
   150  
   151  	var st, dt *node
   152  	for len(srcNodes) > 0 {
   153  		st, srcNodes = srcNodes[len(srcNodes)-1], srcNodes[:len(srcNodes)-1]
   154  		dt, dstNodes = dstNodes[len(dstNodes)-1], dstNodes[:len(dstNodes)-1]
   155  
   156  		dt.self += st.self
   157  		dt.total += st.total
   158  
   159  		for _, srcChildNode := range st.children {
   160  			// Note that we don't copy the name, but reference it.
   161  			dstChildNode := dt.insert(srcChildNode.name)
   162  			srcNodes = append(srcNodes, srcChildNode)
   163  			dstNodes = append(dstNodes, dstChildNode)
   164  		}
   165  	}
   166  
   167  	t.root = dstRoot.children
   168  }
   169  
   170  func (t *Tree) FormatNodeNames(fn func(string) string) {
   171  	nodes := make([]*node, 0, defaultDFSSize)
   172  	nodes = append(nodes, &node{children: t.root})
   173  	var n *node
   174  	var fix bool
   175  	for len(nodes) > 0 {
   176  		n, nodes = nodes[len(nodes)-1], nodes[:len(nodes)-1]
   177  		m := n.name
   178  		n.name = fn(m)
   179  		if m != n.name {
   180  			fix = true
   181  		}
   182  		nodes = append(nodes, n.children...)
   183  	}
   184  	if !fix {
   185  		return
   186  	}
   187  	t.Fix()
   188  }
   189  
   190  // Fix re-establishes order of nodes and merges duplicates.
   191  func (t *Tree) Fix() {
   192  	if len(t.root) == 0 {
   193  		return
   194  	}
   195  	r := &node{children: t.root}
   196  	for _, n := range r.children {
   197  		n.parent = r
   198  	}
   199  	nodes := make([][]*node, 0, defaultDFSSize)
   200  	nodes = append(nodes, r.children)
   201  	var n []*node
   202  	for len(nodes) > 0 {
   203  		n, nodes = nodes[len(nodes)-1], nodes[:len(nodes)-1]
   204  		if len(n) == 0 {
   205  			continue
   206  		}
   207  		sort.Slice(n, func(i, j int) bool {
   208  			return n[i].name < n[j].name
   209  		})
   210  		p := n[0]
   211  		j := 1
   212  		for _, c := range n[1:] {
   213  			if p.name == c.name {
   214  				for _, x := range c.children {
   215  					x.parent = p
   216  				}
   217  				p.children = append(p.children, c.children...)
   218  				p.total += c.total
   219  				p.self += c.self
   220  				continue
   221  			}
   222  			p = c
   223  			n[j] = c
   224  			j++
   225  		}
   226  		n = n[:j]
   227  		for _, c := range n {
   228  			c.parent.children = n
   229  			nodes = append(nodes, c.children)
   230  		}
   231  	}
   232  	t.root = r.children
   233  }
   234  
   235  func (n *node) String() string {
   236  	return fmt.Sprintf("{%s: self %d total %d}", n.name, n.self, n.total)
   237  }
   238  
   239  func (n *node) insert(name string) *node {
   240  	i := sort.Search(len(n.children), func(i int) bool {
   241  		return n.children[i].name >= name
   242  	})
   243  	if i < len(n.children) && n.children[i].name == name {
   244  		return n.children[i]
   245  	}
   246  	// We don't clone the name: it is caller responsibility
   247  	// to maintain the memory ownership.
   248  	child := &node{parent: n, name: name}
   249  	n.children = append(n.children, child)
   250  	copy(n.children[i+1:], n.children[i:])
   251  	n.children[i] = child
   252  	return child
   253  }
   254  
   255  // minValue returns the minimum "total" value a node in a tree has to have to show up in
   256  // the resulting flamegraph
   257  func (t *Tree) minValue(maxNodes int64) int64 {
   258  	if maxNodes < 1 {
   259  		return 0
   260  	}
   261  	nodes := make([]*node, 0, max(int64(len(t.root)), defaultDFSSize))
   262  	treeSize := t.size(nodes)
   263  	if treeSize <= maxNodes {
   264  		return 0
   265  	}
   266  
   267  	h := make([]int64, 0, maxNodes)
   268  
   269  	nodes = append(nodes[:0], t.root...)
   270  	var n *node
   271  	for len(nodes) > 0 {
   272  		last := len(nodes) - 1
   273  		n, nodes = nodes[last], nodes[:last]
   274  		if len(h) >= int(maxNodes) {
   275  			if n.total > h[0] {
   276  				h = minheap.Pop(h)
   277  			} else {
   278  				continue
   279  			}
   280  		}
   281  		h = minheap.Push(h, n.total)
   282  		nodes = append(nodes, n.children...)
   283  	}
   284  
   285  	if len(h) < int(maxNodes) {
   286  		return 0
   287  	}
   288  
   289  	return h[0]
   290  }
   291  
   292  // size reports number of nodes the tree consists of.
   293  // Provided buffer used for DFS traversal.
   294  func (t *Tree) size(buf []*node) int64 {
   295  	nodes := append(buf, t.root...)
   296  	var s int64
   297  	var n *node
   298  	for len(nodes) > 0 {
   299  		last := len(nodes) - 1
   300  		n, nodes = nodes[last], nodes[:last]
   301  		nodes = append(nodes, n.children...)
   302  		s++
   303  	}
   304  	return s
   305  }
   306  
   307  const truncatedNodeName = "other"
   308  
   309  var truncatedNodeNameBytes = []byte(truncatedNodeName)
   310  
   311  // Bytes returns marshaled tree byte representation; the number of nodes
   312  // is limited to maxNodes. The function modifies the tree: truncated nodes
   313  // are removed from the tree in place.
   314  func (t *Tree) Bytes(maxNodes int64) []byte {
   315  	var buf bytes.Buffer
   316  	_ = t.MarshalTruncate(&buf, maxNodes)
   317  	return buf.Bytes()
   318  }
   319  
   320  // MarshalTruncate writes tree byte representation to the writer provider,
   321  // the number of nodes is limited to maxNodes. The function modifies
   322  // the tree: truncated nodes are removed from the tree.
   323  func (t *Tree) MarshalTruncate(w io.Writer, maxNodes int64) (err error) {
   324  	if len(t.root) == 0 {
   325  		return nil
   326  	}
   327  
   328  	vw := varint.NewWriter()
   329  	minVal := t.minValue(maxNodes)
   330  	nodes := make([]*node, 1, defaultDFSSize)
   331  	nodes[0] = &node{children: t.root} // Virtual root node.
   332  	var n *node
   333  
   334  	for len(nodes) > 0 {
   335  		last := len(nodes) - 1
   336  		n, nodes = nodes[last], nodes[:last]
   337  		if _, err = vw.Write(w, uint64(len(n.name))); err != nil {
   338  			return err
   339  		}
   340  		if _, err = w.Write(unsafeStringBytes(n.name)); err != nil {
   341  			return err
   342  		}
   343  		if _, err = vw.Write(w, uint64(n.self)); err != nil {
   344  			return err
   345  		}
   346  
   347  		var other int64
   348  		var j int
   349  		for _, cn := range n.children {
   350  			if cn.total >= minVal || cn.name == truncatedNodeName {
   351  				n.children[j] = cn
   352  				j++
   353  			} else {
   354  				other += cn.total
   355  			}
   356  		}
   357  
   358  		n.children = n.children[:j]
   359  		if other > 0 {
   360  			o := n.insert(truncatedNodeName)
   361  			o.total += other
   362  			o.self += other
   363  		}
   364  
   365  		if len(n.children) > 0 {
   366  			nodes = append(nodes, n.children...)
   367  		}
   368  		if _, err = vw.Write(w, uint64(len(n.children))); err != nil {
   369  			return err
   370  		}
   371  	}
   372  
   373  	return nil
   374  }
   375  
   376  var errMalformedTreeBytes = fmt.Errorf("malformed tree bytes")
   377  
   378  const estimateBytesPerNode = 16 // Chosen empirically.
   379  
   380  func MustUnmarshalTree(b []byte) *Tree {
   381  	if len(b) == 0 {
   382  		return new(Tree)
   383  	}
   384  	t, err := UnmarshalTree(b)
   385  	if err != nil {
   386  		panic(err)
   387  	}
   388  	return t
   389  }
   390  
   391  func UnmarshalTree(b []byte) (*Tree, error) {
   392  	t := new(Tree)
   393  	if len(b) < 2 {
   394  		return t, nil
   395  	}
   396  	size := estimateBytesPerNode
   397  	if e := len(b) / estimateBytesPerNode; e > estimateBytesPerNode {
   398  		size = e
   399  	}
   400  	parents := make([]*node, 1, size)
   401  	// Virtual root node.
   402  	root := new(node)
   403  	parents[0] = root
   404  	var parent *node
   405  	var offset int
   406  
   407  	for len(parents) > 0 {
   408  		parent, parents = parents[len(parents)-1], parents[:len(parents)-1]
   409  		nameLen, o := dvarint.Uvarint(b[offset:])
   410  		if o < 0 {
   411  			return nil, errMalformedTreeBytes
   412  		}
   413  		offset += o
   414  		// Note that we allocate a string, instead of referencing b's capacity.
   415  		name := string(b[offset : offset+int(nameLen)])
   416  		offset += int(nameLen)
   417  		value, o := dvarint.Uvarint(b[offset:])
   418  		if o < 0 {
   419  			return nil, errMalformedTreeBytes
   420  		}
   421  		offset += o
   422  		childrenLen, o := dvarint.Uvarint(b[offset:])
   423  		if o < 0 {
   424  			return nil, errMalformedTreeBytes
   425  		}
   426  		offset += o
   427  
   428  		n := parent.insert(name)
   429  		n.children = make([]*node, 0, childrenLen)
   430  		n.self = int64(value)
   431  
   432  		pn := n
   433  		for pn.parent != nil {
   434  			pn.total += n.self
   435  			pn = pn.parent
   436  		}
   437  
   438  		for i := uint64(0); i < childrenLen; i++ {
   439  			parents = append(parents, n)
   440  		}
   441  	}
   442  
   443  	// Remove the virtual root.
   444  	t.root = root.children[0].children
   445  
   446  	return t, nil
   447  }
   448  
   449  // TreeFromBackendProfile is a wrapper...
   450  func TreeFromBackendProfile(profile *profilev1.Profile, maxNodes int64) ([]byte, error) {
   451  	return TreeFromBackendProfileSampleType(profile, maxNodes, 0)
   452  }
   453  
   454  // TreeFromBackendProfileSampleType converts a pprof profile to a tree format with maxNodes limit
   455  func TreeFromBackendProfileSampleType(profile *profilev1.Profile, maxNodes int64, sampleType int) ([]byte, error) {
   456  	t := NewStacktraceTree(int(maxNodes * 2))
   457  	stack := make([]int32, 0, 64)
   458  	m := make(map[uint64]int32)
   459  
   460  	for i := range profile.Sample {
   461  		stack = stack[:0]
   462  		for j := range profile.Sample[i].LocationId {
   463  			locIdx := int(profile.Sample[i].LocationId[j]) - 1
   464  			if locIdx < 0 || len(profile.Location) <= locIdx {
   465  				return nil, fmt.Errorf("invalid location ID %d in sample %d", profile.Sample[i].LocationId[j], i)
   466  			}
   467  
   468  			loc := profile.Location[locIdx]
   469  			if len(loc.Line) > 0 {
   470  				for l := range loc.Line {
   471  					stack = append(stack, int32(profile.Function[loc.Line[l].FunctionId-1].Name))
   472  				}
   473  				continue
   474  			}
   475  			addr, ok := m[loc.Address]
   476  			if !ok {
   477  				addr = int32(len(profile.StringTable))
   478  				profile.StringTable = append(profile.StringTable, strconv.FormatInt(int64(loc.Address), 16))
   479  				m[loc.Address] = addr
   480  			}
   481  			stack = append(stack, addr)
   482  		}
   483  
   484  		if sampleType < 0 || sampleType >= len(profile.Sample[i].Value) {
   485  			return nil, fmt.Errorf("invalid sampleType index %d for sample %d (len=%d)", sampleType, i, len(profile.Sample[i].Value))
   486  		}
   487  
   488  		t.Insert(stack, profile.Sample[i].Value[sampleType])
   489  	}
   490  
   491  	b := bytes.NewBuffer(nil)
   492  	b.Grow(100 << 10)
   493  	t.Bytes(b, maxNodes, profile.StringTable)
   494  	return b.Bytes(), nil
   495  }