github.com/grafana/pyroscope@v1.18.0/pkg/phlaredb/symdb/resolver_tree.go (about)

     1  package symdb
     2  
     3  import (
     4  	"context"
     5  	"strconv"
     6  	"sync"
     7  
     8  	"golang.org/x/sync/errgroup"
     9  
    10  	"github.com/grafana/pyroscope/pkg/iter"
    11  	"github.com/grafana/pyroscope/pkg/model"
    12  	schemav1 "github.com/grafana/pyroscope/pkg/phlaredb/schemas/v1"
    13  	"github.com/grafana/pyroscope/pkg/util"
    14  	"github.com/grafana/pyroscope/pkg/util/minheap"
    15  )
    16  
    17  func buildTree(
    18  	ctx context.Context,
    19  	symbols *Symbols,
    20  	appender *SampleAppender,
    21  	maxNodes int64,
    22  	selection *SelectedStackTraces,
    23  ) (*model.Tree, error) {
    24  	if !selection.HasValidCallSite() {
    25  		// TODO(bryan) Maybe return an error here? buildPprof returns a blank
    26  		// profile. So mimicking that behavior for now.
    27  		return &model.Tree{}, nil
    28  	}
    29  
    30  	// If the number of samples is large (> 128K) and the StacktraceResolver
    31  	// implements the range iterator, we will be building the tree based on
    32  	// the parent pointer tree of the partition (a copy of). The only exception
    33  	// is when the number of nodes is not limited, or is close to the number of
    34  	// nodes in the original tree: the optimization is still beneficial in terms
    35  	// of CPU, but is very expensive in terms of memory.
    36  	iterator, ok := symbols.Stacktraces.(StacktraceIDRangeIterator)
    37  	if ok && shouldCopyTree(appender, maxNodes) {
    38  		ranges := iterator.SplitStacktraceIDRanges(appender)
    39  		return buildTreeFromParentPointerTrees(ctx, ranges, symbols, maxNodes, selection)
    40  	}
    41  	// Otherwise, use the basic approach: resolve each stack trace
    42  	// and insert them into the new tree one by one. The method
    43  	// performs best on small sample sets.
    44  	samples := appender.Samples()
    45  	t := treeSymbolsFromPool()
    46  	defer t.reset()
    47  	t.init(symbols, samples, selection)
    48  	if err := symbols.Stacktraces.ResolveStacktraceLocations(ctx, t, samples.StacktraceIDs); err != nil {
    49  		return nil, err
    50  	}
    51  	return t.tree.Tree(maxNodes, t.symbols.Strings), nil
    52  }
    53  
    54  func shouldCopyTree(appender *SampleAppender, maxNodes int64) bool {
    55  	const copyThreshold = 128 << 10
    56  	expensiveTruncation := maxNodes <= 0 || maxNodes > int64(appender.Len())
    57  	return appender.Len() > copyThreshold && !expensiveTruncation
    58  }
    59  
    60  type treeSymbols struct {
    61  	symbols *Symbols
    62  	samples *schemav1.Samples
    63  	tree    *model.StacktraceTree
    64  	lines   []int32
    65  	cur     int
    66  
    67  	selection        *SelectedStackTraces
    68  	funcNamesMatcher func(funcNames []int32) bool
    69  }
    70  
    71  var treeSymbolsPool = sync.Pool{
    72  	New: func() any { return new(treeSymbols) },
    73  }
    74  
    75  func treeSymbolsFromPool() *treeSymbols {
    76  	return treeSymbolsPool.Get().(*treeSymbols)
    77  }
    78  
    79  func (r *treeSymbols) reset() {
    80  	r.symbols = nil
    81  	r.samples = nil
    82  	r.tree.Reset()
    83  	r.lines = r.lines[:0]
    84  	r.cur = 0
    85  	treeSymbolsPool.Put(r)
    86  }
    87  
    88  func (r *treeSymbols) init(symbols *Symbols, samples schemav1.Samples, selection *SelectedStackTraces) {
    89  	r.symbols = symbols
    90  	r.samples = &samples
    91  	r.selection = selection
    92  
    93  	if r.tree == nil {
    94  		// Branching factor.
    95  		r.tree = model.NewStacktraceTree(samples.Len() * 2)
    96  	}
    97  	if r.selection != nil && len(r.selection.callSite) > 0 {
    98  		r.funcNamesMatcher = r.funcNamesMatchSelection
    99  	}
   100  }
   101  func (r *treeSymbols) InsertStacktrace(_ uint32, locations []int32) {
   102  	r.lines = r.lines[:0]
   103  	for i := 0; i < len(locations); i++ {
   104  		lines := r.symbols.Locations[locations[i]].Line
   105  		for j := 0; j < len(lines); j++ {
   106  			f := r.symbols.Functions[lines[j].FunctionId]
   107  			r.lines = append(r.lines, int32(f.Name))
   108  		}
   109  	}
   110  	if r.funcNamesMatcher == nil || r.funcNamesMatcher(r.lines) {
   111  		r.tree.Insert(r.lines, int64(r.samples.Values[r.cur]))
   112  	}
   113  	r.cur++
   114  }
   115  
   116  // funcNamesMatchSelection checks if the funcNames match the selection.
   117  // Note funcNames is a slice of function name references and is reversed. The first item is the last function in the stack trace.
   118  func (r *treeSymbols) funcNamesMatchSelection(funcNames []int32) bool {
   119  	if len(funcNames) < int(r.selection.depth) {
   120  		return false
   121  	}
   122  
   123  	for i := 0; i < int(r.selection.depth); i++ {
   124  		if r.symbols.Strings[funcNames[len(funcNames)-1-i]] != r.selection.callSite[i] {
   125  			return false
   126  		}
   127  	}
   128  	return true
   129  }
   130  
   131  func buildTreeFromParentPointerTrees(
   132  	ctx context.Context,
   133  	ranges iter.Iterator[*StacktraceIDRange],
   134  	symbols *Symbols,
   135  	maxNodes int64,
   136  	selection *SelectedStackTraces,
   137  ) (*model.Tree, error) {
   138  	m := model.NewTreeMerger()
   139  	g, _ := errgroup.WithContext(ctx)
   140  	for ranges.Next() {
   141  		sr := ranges.At()
   142  		g.Go(util.RecoverPanic(func() error {
   143  			m.MergeTree(buildTreeForStacktraceIDRange(sr, symbols, maxNodes, selection))
   144  			return nil
   145  		}))
   146  	}
   147  	if err := g.Wait(); err != nil {
   148  		return nil, err
   149  	}
   150  	return m.Tree(), nil
   151  }
   152  
   153  type nodeResult int64
   154  
   155  const (
   156  	nodeResultUnknown nodeResult = iota
   157  	nodeResultMatch
   158  	nodeResultDescendant
   159  	nodeResultAncestor
   160  	nodeResultNoMatch
   161  )
   162  
   163  func markNAncestors(idx int, nodes []Node, result nodeResult, depth int) {
   164  	count := 0
   165  	for idx != sentinel {
   166  		if depth > 0 && count >= depth {
   167  			break
   168  		}
   169  		if nodes[idx].Value != int64(nodeResultUnknown) {
   170  			break
   171  		}
   172  		nodes[idx].Value = int64(result)
   173  		idx = int(nodes[idx].Parent)
   174  		count++
   175  	}
   176  }
   177  
   178  type selectedNodeMarker struct {
   179  	symbols   *Symbols
   180  	selection *SelectedStackTraces
   181  	nodes     []Node
   182  
   183  	leaf         int // node we started with
   184  	current      int // current node index
   185  	depth        int // current stack depth
   186  	selectionIdx int // references which callsite is need to be matched next
   187  }
   188  
   189  // markAncestors marks the ancestors of the leaf node we started with with the given result
   190  // will only mark the ancestors that are not already marked
   191  func (m *selectedNodeMarker) markAncestors(result nodeResult) {
   192  	markNAncestors(m.leaf, m.nodes, result, -1)
   193  }
   194  
   195  // markMatch marks the match node and its ancestors and descendants
   196  func (m *selectedNodeMarker) markMatch() {
   197  	// get to the match node
   198  	matchNode := m.leaf
   199  	for i := 0; i < m.depth-int(m.selection.depth); i++ {
   200  		matchNode = int(m.nodes[matchNode].Parent)
   201  	}
   202  	// first mark the match node's ancestors
   203  	markNAncestors(matchNode, m.nodes, nodeResultAncestor, -1)
   204  	// mark the match node as a match
   205  	m.nodes[matchNode].Value = int64(nodeResultMatch)
   206  	// mark the match node's descendants
   207  	markNAncestors(matchNode, m.nodes, nodeResultDescendant, -1)
   208  }
   209  
   210  func (m *selectedNodeMarker) reset(idx int) {
   211  	m.leaf = idx
   212  	m.current = idx
   213  	m.depth = 0
   214  	m.selectionIdx = m.firstSelection()
   215  }
   216  
   217  func (m *selectedNodeMarker) firstSelection() int {
   218  	return int(m.selection.depth) - 1
   219  }
   220  
   221  // nodeMatch checks if the current node matches the selection and update m.selectionIdx to reflect the next selection to match
   222  // If it is -1 the full stack has been matched
   223  func (m *selectedNodeMarker) matchNode() {
   224  	for _, l := range m.symbols.Locations[m.nodes[m.current].Location].Line {
   225  		if m.selectionIdx < 0 {
   226  			m.selectionIdx = m.firstSelection()
   227  			return
   228  		}
   229  		if m.selection.callSite[m.selectionIdx] != m.selection.funcNames[l.FunctionId] {
   230  			m.selectionIdx = m.firstSelection()
   231  			return
   232  		}
   233  		m.selectionIdx--
   234  	}
   235  }
   236  
   237  // markStack marks the stack from the left node to the root node
   238  func (m *selectedNodeMarker) markStack(leaf int) {
   239  	m.reset(leaf)
   240  	for {
   241  		// if node result is known, we can mark nodes right away
   242  		currentResult := nodeResult(m.nodes[m.current].Value)
   243  		if currentResult != nodeResultUnknown {
   244  			switch currentResult {
   245  			case nodeResultDescendant, nodeResultMatch:
   246  				m.markAncestors(nodeResultDescendant)
   247  			case nodeResultAncestor, nodeResultNoMatch:
   248  				m.markAncestors(nodeResultNoMatch)
   249  			default:
   250  				panic("unhandled node result: " + strconv.Itoa(int(currentResult)))
   251  			}
   252  			return
   253  		}
   254  
   255  		// check if the functionNames on this node, match the selector
   256  		m.matchNode()
   257  
   258  		// if the next node is the root or we are on the root node already break
   259  		if next := m.nodes[m.current].Parent; next == sentinel || m.nodes[next].Parent == sentinel {
   260  			if m.selectionIdx == -1 {
   261  				// we found the match
   262  				m.markMatch()
   263  				return
   264  			}
   265  
   266  			// mark everything that is deepeer than the selection as no match
   267  			if m.depth > int(m.selection.depth) {
   268  				markNAncestors(m.leaf, m.nodes, nodeResultNoMatch, m.depth-int(m.selection.depth))
   269  			}
   270  			return
   271  		}
   272  
   273  		m.current = int(m.nodes[m.current].Parent)
   274  		m.depth++
   275  	}
   276  }
   277  
   278  // markSelectedNodes marks the nodes that are matched by the StacktraceSelector
   279  // When processing the nodes from the parent pointer tree, it will temporarily use the values field to keep track of the state of each node.
   280  // After the nodes are processed, the values field set to 0 and the truncation mark is used to mark the nodes that are not matched.
   281  func markSelectedNodes(
   282  	symbols *Symbols,
   283  	selection *SelectedStackTraces,
   284  	nodes []Node,
   285  ) []Node {
   286  	m := &selectedNodeMarker{
   287  		symbols:   symbols,
   288  		selection: selection,
   289  		nodes:     nodes,
   290  	}
   291  
   292  	// iterate over all nodes and check if they or their descendants match the selection
   293  	for idx := range m.nodes {
   294  		m.markStack(idx)
   295  	}
   296  
   297  	// iterate once again over all nodes and mark the nodes that are not matched as truncated
   298  	for idx := range m.nodes {
   299  		if nodes[idx].Value != int64(nodeResultDescendant) && nodes[idx].Value != int64(nodeResultMatch) {
   300  			// mark them as truncated
   301  			nodes[idx].Location |= truncationMark
   302  		}
   303  		// reset the value
   304  		nodes[idx].Value = 0
   305  	}
   306  
   307  	return m.nodes
   308  }
   309  
   310  func buildTreeForStacktraceIDRange(
   311  	stacktraces *StacktraceIDRange,
   312  	symbols *Symbols,
   313  	maxNodes int64,
   314  	selection *SelectedStackTraces,
   315  ) *model.Tree {
   316  	// Get the parent pointer tree for the range. The tree is
   317  	// not specific to the samples we've collected and includes
   318  	// all the stack traces.
   319  	nodes := stacktraces.Nodes()
   320  	// Filter stacktrace filter
   321  	if selection != nil && len(selection.callSite) > 0 {
   322  		nodes = markSelectedNodes(symbols, selection, nodes)
   323  	}
   324  
   325  	// SetNodeValues sets values to the nodes that match the
   326  	// samples we've collected; those are not always leaves:
   327  	// a node may have its own value (self) and children.
   328  	stacktraces.SetNodeValues(nodes)
   329  	// Propagate the values to the parent nodes. This is required
   330  	// to identify the nodes that should be removed from the tree.
   331  	// For each node, the value should be a sum of all the child
   332  	// nodes (total).
   333  	propagateNodeValues(nodes)
   334  	// Next step is truncation: we need to mark leaf nodes of the
   335  	// stack traces we want to keep, and ensure that their values
   336  	// reflect their own weight (total for truncated leaves, self
   337  	// for the true leaves).
   338  	// We preserve more nodes than requested to preserve more
   339  	// locations with inlined functions. The multiplier is
   340  	// chosen empirically; it should be roughly equal to the
   341  	// ratio of nodes in the location tree to the nodes in the
   342  	// function tree (after truncation).
   343  	markNodesForTruncation(nodes, maxNodes*4)
   344  	// We now build an intermediate tree from the marked stack
   345  	// traces. The reason is that the intermediate tree is
   346  	// substantially bigger than the final one. The intermediate
   347  	// tree is optimized for inserts and lookups, while the output
   348  	// tree is optimized for merge operations.
   349  	t := model.NewStacktraceTree(int(maxNodes))
   350  	insertStacktraces(t, nodes, symbols)
   351  	// Finally, we convert the stack trace tree into the function
   352  	// tree, dropping insignificant functions, and symbolizing the
   353  	// nodes (function names).
   354  	return t.Tree(maxNodes, symbols.Strings)
   355  }
   356  
   357  func propagateNodeValues(nodes []Node) {
   358  	for i := len(nodes) - 1; i >= 1; i-- {
   359  		if p := nodes[i].Parent; p > 0 {
   360  			nodes[p].Value += nodes[i].Value
   361  		}
   362  	}
   363  }
   364  
   365  func markNodesForTruncation(nodes []Node, maxNodes int64) {
   366  	m := minValue(nodes, maxNodes)
   367  	for i := 1; i < len(nodes); i++ {
   368  		p := nodes[i].Parent
   369  		v := nodes[i].Value
   370  		// Remove previous truncation mark, potential set by the stacktrace filter
   371  		nodes[i].Location &= ^truncationMark
   372  		if v < m {
   373  			nodes[i].Location |= truncationMark
   374  			// Preserve values of truncated locations. The weight
   375  			// of the truncated chain is accounted in the parent.
   376  			if p >= 0 && nodes[p].Location&truncationMark != 0 {
   377  				continue
   378  			}
   379  		}
   380  		// Subtract the value of the location from the parent:
   381  		// by doing so we ensure that the transient nodes have zero
   382  		// weight, and then will be ignored by the tree builder.
   383  		if p >= 0 {
   384  			nodes[p].Value -= v
   385  		}
   386  	}
   387  }
   388  
   389  func insertStacktraces(t *model.StacktraceTree, nodes []Node, symbols *Symbols) {
   390  	l := int32(len(nodes))
   391  	s := make([]int32, 0, 64)
   392  	for i := int32(1); i < l; i++ {
   393  		p := nodes[i].Parent
   394  		v := nodes[i].Value
   395  		if v > 0 && nodes[p].Location&truncationMark == 0 {
   396  			s = resolveStack(s, nodes, i, symbols)
   397  			t.Insert(s, v)
   398  		}
   399  	}
   400  }
   401  
   402  func resolveStack(dst []int32, nodes []Node, i int32, symbols *Symbols) []int32 {
   403  	dst = dst[:0]
   404  	for i > 0 {
   405  		j := nodes[i].Location
   406  		if j&truncationMark > 0 {
   407  			dst = append(dst, sentinel)
   408  		} else {
   409  			loc := symbols.Locations[j]
   410  			for l := 0; l < len(loc.Line); l++ {
   411  				dst = append(dst, int32(symbols.Functions[loc.Line[l].FunctionId].Name))
   412  			}
   413  		}
   414  		i = nodes[i].Parent
   415  	}
   416  	return dst
   417  }
   418  
   419  func minValue(nodes []Node, maxNodes int64) int64 {
   420  	if maxNodes < 1 || maxNodes >= int64(len(nodes)) {
   421  		return 0
   422  	}
   423  	h := make([]int64, 0, maxNodes)
   424  	for i := range nodes {
   425  		v := nodes[i].Value
   426  		if len(h) >= int(maxNodes) {
   427  			if v > h[0] {
   428  				h = minheap.Pop(h)
   429  			} else {
   430  				continue
   431  			}
   432  		}
   433  		h = minheap.Push(h, v)
   434  	}
   435  	if len(h) < int(maxNodes) {
   436  		return 0
   437  	}
   438  	return h[0]
   439  }