github.com/grafana/pyroscope@v1.18.0/pkg/phlaredb/symdb/resolver_tree.go (about) 1 package symdb 2 3 import ( 4 "context" 5 "strconv" 6 "sync" 7 8 "golang.org/x/sync/errgroup" 9 10 "github.com/grafana/pyroscope/pkg/iter" 11 "github.com/grafana/pyroscope/pkg/model" 12 schemav1 "github.com/grafana/pyroscope/pkg/phlaredb/schemas/v1" 13 "github.com/grafana/pyroscope/pkg/util" 14 "github.com/grafana/pyroscope/pkg/util/minheap" 15 ) 16 17 func buildTree( 18 ctx context.Context, 19 symbols *Symbols, 20 appender *SampleAppender, 21 maxNodes int64, 22 selection *SelectedStackTraces, 23 ) (*model.Tree, error) { 24 if !selection.HasValidCallSite() { 25 // TODO(bryan) Maybe return an error here? buildPprof returns a blank 26 // profile. So mimicking that behavior for now. 27 return &model.Tree{}, nil 28 } 29 30 // If the number of samples is large (> 128K) and the StacktraceResolver 31 // implements the range iterator, we will be building the tree based on 32 // the parent pointer tree of the partition (a copy of). The only exception 33 // is when the number of nodes is not limited, or is close to the number of 34 // nodes in the original tree: the optimization is still beneficial in terms 35 // of CPU, but is very expensive in terms of memory. 36 iterator, ok := symbols.Stacktraces.(StacktraceIDRangeIterator) 37 if ok && shouldCopyTree(appender, maxNodes) { 38 ranges := iterator.SplitStacktraceIDRanges(appender) 39 return buildTreeFromParentPointerTrees(ctx, ranges, symbols, maxNodes, selection) 40 } 41 // Otherwise, use the basic approach: resolve each stack trace 42 // and insert them into the new tree one by one. The method 43 // performs best on small sample sets. 44 samples := appender.Samples() 45 t := treeSymbolsFromPool() 46 defer t.reset() 47 t.init(symbols, samples, selection) 48 if err := symbols.Stacktraces.ResolveStacktraceLocations(ctx, t, samples.StacktraceIDs); err != nil { 49 return nil, err 50 } 51 return t.tree.Tree(maxNodes, t.symbols.Strings), nil 52 } 53 54 func shouldCopyTree(appender *SampleAppender, maxNodes int64) bool { 55 const copyThreshold = 128 << 10 56 expensiveTruncation := maxNodes <= 0 || maxNodes > int64(appender.Len()) 57 return appender.Len() > copyThreshold && !expensiveTruncation 58 } 59 60 type treeSymbols struct { 61 symbols *Symbols 62 samples *schemav1.Samples 63 tree *model.StacktraceTree 64 lines []int32 65 cur int 66 67 selection *SelectedStackTraces 68 funcNamesMatcher func(funcNames []int32) bool 69 } 70 71 var treeSymbolsPool = sync.Pool{ 72 New: func() any { return new(treeSymbols) }, 73 } 74 75 func treeSymbolsFromPool() *treeSymbols { 76 return treeSymbolsPool.Get().(*treeSymbols) 77 } 78 79 func (r *treeSymbols) reset() { 80 r.symbols = nil 81 r.samples = nil 82 r.tree.Reset() 83 r.lines = r.lines[:0] 84 r.cur = 0 85 treeSymbolsPool.Put(r) 86 } 87 88 func (r *treeSymbols) init(symbols *Symbols, samples schemav1.Samples, selection *SelectedStackTraces) { 89 r.symbols = symbols 90 r.samples = &samples 91 r.selection = selection 92 93 if r.tree == nil { 94 // Branching factor. 95 r.tree = model.NewStacktraceTree(samples.Len() * 2) 96 } 97 if r.selection != nil && len(r.selection.callSite) > 0 { 98 r.funcNamesMatcher = r.funcNamesMatchSelection 99 } 100 } 101 func (r *treeSymbols) InsertStacktrace(_ uint32, locations []int32) { 102 r.lines = r.lines[:0] 103 for i := 0; i < len(locations); i++ { 104 lines := r.symbols.Locations[locations[i]].Line 105 for j := 0; j < len(lines); j++ { 106 f := r.symbols.Functions[lines[j].FunctionId] 107 r.lines = append(r.lines, int32(f.Name)) 108 } 109 } 110 if r.funcNamesMatcher == nil || r.funcNamesMatcher(r.lines) { 111 r.tree.Insert(r.lines, int64(r.samples.Values[r.cur])) 112 } 113 r.cur++ 114 } 115 116 // funcNamesMatchSelection checks if the funcNames match the selection. 117 // Note funcNames is a slice of function name references and is reversed. The first item is the last function in the stack trace. 118 func (r *treeSymbols) funcNamesMatchSelection(funcNames []int32) bool { 119 if len(funcNames) < int(r.selection.depth) { 120 return false 121 } 122 123 for i := 0; i < int(r.selection.depth); i++ { 124 if r.symbols.Strings[funcNames[len(funcNames)-1-i]] != r.selection.callSite[i] { 125 return false 126 } 127 } 128 return true 129 } 130 131 func buildTreeFromParentPointerTrees( 132 ctx context.Context, 133 ranges iter.Iterator[*StacktraceIDRange], 134 symbols *Symbols, 135 maxNodes int64, 136 selection *SelectedStackTraces, 137 ) (*model.Tree, error) { 138 m := model.NewTreeMerger() 139 g, _ := errgroup.WithContext(ctx) 140 for ranges.Next() { 141 sr := ranges.At() 142 g.Go(util.RecoverPanic(func() error { 143 m.MergeTree(buildTreeForStacktraceIDRange(sr, symbols, maxNodes, selection)) 144 return nil 145 })) 146 } 147 if err := g.Wait(); err != nil { 148 return nil, err 149 } 150 return m.Tree(), nil 151 } 152 153 type nodeResult int64 154 155 const ( 156 nodeResultUnknown nodeResult = iota 157 nodeResultMatch 158 nodeResultDescendant 159 nodeResultAncestor 160 nodeResultNoMatch 161 ) 162 163 func markNAncestors(idx int, nodes []Node, result nodeResult, depth int) { 164 count := 0 165 for idx != sentinel { 166 if depth > 0 && count >= depth { 167 break 168 } 169 if nodes[idx].Value != int64(nodeResultUnknown) { 170 break 171 } 172 nodes[idx].Value = int64(result) 173 idx = int(nodes[idx].Parent) 174 count++ 175 } 176 } 177 178 type selectedNodeMarker struct { 179 symbols *Symbols 180 selection *SelectedStackTraces 181 nodes []Node 182 183 leaf int // node we started with 184 current int // current node index 185 depth int // current stack depth 186 selectionIdx int // references which callsite is need to be matched next 187 } 188 189 // markAncestors marks the ancestors of the leaf node we started with with the given result 190 // will only mark the ancestors that are not already marked 191 func (m *selectedNodeMarker) markAncestors(result nodeResult) { 192 markNAncestors(m.leaf, m.nodes, result, -1) 193 } 194 195 // markMatch marks the match node and its ancestors and descendants 196 func (m *selectedNodeMarker) markMatch() { 197 // get to the match node 198 matchNode := m.leaf 199 for i := 0; i < m.depth-int(m.selection.depth); i++ { 200 matchNode = int(m.nodes[matchNode].Parent) 201 } 202 // first mark the match node's ancestors 203 markNAncestors(matchNode, m.nodes, nodeResultAncestor, -1) 204 // mark the match node as a match 205 m.nodes[matchNode].Value = int64(nodeResultMatch) 206 // mark the match node's descendants 207 markNAncestors(matchNode, m.nodes, nodeResultDescendant, -1) 208 } 209 210 func (m *selectedNodeMarker) reset(idx int) { 211 m.leaf = idx 212 m.current = idx 213 m.depth = 0 214 m.selectionIdx = m.firstSelection() 215 } 216 217 func (m *selectedNodeMarker) firstSelection() int { 218 return int(m.selection.depth) - 1 219 } 220 221 // nodeMatch checks if the current node matches the selection and update m.selectionIdx to reflect the next selection to match 222 // If it is -1 the full stack has been matched 223 func (m *selectedNodeMarker) matchNode() { 224 for _, l := range m.symbols.Locations[m.nodes[m.current].Location].Line { 225 if m.selectionIdx < 0 { 226 m.selectionIdx = m.firstSelection() 227 return 228 } 229 if m.selection.callSite[m.selectionIdx] != m.selection.funcNames[l.FunctionId] { 230 m.selectionIdx = m.firstSelection() 231 return 232 } 233 m.selectionIdx-- 234 } 235 } 236 237 // markStack marks the stack from the left node to the root node 238 func (m *selectedNodeMarker) markStack(leaf int) { 239 m.reset(leaf) 240 for { 241 // if node result is known, we can mark nodes right away 242 currentResult := nodeResult(m.nodes[m.current].Value) 243 if currentResult != nodeResultUnknown { 244 switch currentResult { 245 case nodeResultDescendant, nodeResultMatch: 246 m.markAncestors(nodeResultDescendant) 247 case nodeResultAncestor, nodeResultNoMatch: 248 m.markAncestors(nodeResultNoMatch) 249 default: 250 panic("unhandled node result: " + strconv.Itoa(int(currentResult))) 251 } 252 return 253 } 254 255 // check if the functionNames on this node, match the selector 256 m.matchNode() 257 258 // if the next node is the root or we are on the root node already break 259 if next := m.nodes[m.current].Parent; next == sentinel || m.nodes[next].Parent == sentinel { 260 if m.selectionIdx == -1 { 261 // we found the match 262 m.markMatch() 263 return 264 } 265 266 // mark everything that is deepeer than the selection as no match 267 if m.depth > int(m.selection.depth) { 268 markNAncestors(m.leaf, m.nodes, nodeResultNoMatch, m.depth-int(m.selection.depth)) 269 } 270 return 271 } 272 273 m.current = int(m.nodes[m.current].Parent) 274 m.depth++ 275 } 276 } 277 278 // markSelectedNodes marks the nodes that are matched by the StacktraceSelector 279 // When processing the nodes from the parent pointer tree, it will temporarily use the values field to keep track of the state of each node. 280 // After the nodes are processed, the values field set to 0 and the truncation mark is used to mark the nodes that are not matched. 281 func markSelectedNodes( 282 symbols *Symbols, 283 selection *SelectedStackTraces, 284 nodes []Node, 285 ) []Node { 286 m := &selectedNodeMarker{ 287 symbols: symbols, 288 selection: selection, 289 nodes: nodes, 290 } 291 292 // iterate over all nodes and check if they or their descendants match the selection 293 for idx := range m.nodes { 294 m.markStack(idx) 295 } 296 297 // iterate once again over all nodes and mark the nodes that are not matched as truncated 298 for idx := range m.nodes { 299 if nodes[idx].Value != int64(nodeResultDescendant) && nodes[idx].Value != int64(nodeResultMatch) { 300 // mark them as truncated 301 nodes[idx].Location |= truncationMark 302 } 303 // reset the value 304 nodes[idx].Value = 0 305 } 306 307 return m.nodes 308 } 309 310 func buildTreeForStacktraceIDRange( 311 stacktraces *StacktraceIDRange, 312 symbols *Symbols, 313 maxNodes int64, 314 selection *SelectedStackTraces, 315 ) *model.Tree { 316 // Get the parent pointer tree for the range. The tree is 317 // not specific to the samples we've collected and includes 318 // all the stack traces. 319 nodes := stacktraces.Nodes() 320 // Filter stacktrace filter 321 if selection != nil && len(selection.callSite) > 0 { 322 nodes = markSelectedNodes(symbols, selection, nodes) 323 } 324 325 // SetNodeValues sets values to the nodes that match the 326 // samples we've collected; those are not always leaves: 327 // a node may have its own value (self) and children. 328 stacktraces.SetNodeValues(nodes) 329 // Propagate the values to the parent nodes. This is required 330 // to identify the nodes that should be removed from the tree. 331 // For each node, the value should be a sum of all the child 332 // nodes (total). 333 propagateNodeValues(nodes) 334 // Next step is truncation: we need to mark leaf nodes of the 335 // stack traces we want to keep, and ensure that their values 336 // reflect their own weight (total for truncated leaves, self 337 // for the true leaves). 338 // We preserve more nodes than requested to preserve more 339 // locations with inlined functions. The multiplier is 340 // chosen empirically; it should be roughly equal to the 341 // ratio of nodes in the location tree to the nodes in the 342 // function tree (after truncation). 343 markNodesForTruncation(nodes, maxNodes*4) 344 // We now build an intermediate tree from the marked stack 345 // traces. The reason is that the intermediate tree is 346 // substantially bigger than the final one. The intermediate 347 // tree is optimized for inserts and lookups, while the output 348 // tree is optimized for merge operations. 349 t := model.NewStacktraceTree(int(maxNodes)) 350 insertStacktraces(t, nodes, symbols) 351 // Finally, we convert the stack trace tree into the function 352 // tree, dropping insignificant functions, and symbolizing the 353 // nodes (function names). 354 return t.Tree(maxNodes, symbols.Strings) 355 } 356 357 func propagateNodeValues(nodes []Node) { 358 for i := len(nodes) - 1; i >= 1; i-- { 359 if p := nodes[i].Parent; p > 0 { 360 nodes[p].Value += nodes[i].Value 361 } 362 } 363 } 364 365 func markNodesForTruncation(nodes []Node, maxNodes int64) { 366 m := minValue(nodes, maxNodes) 367 for i := 1; i < len(nodes); i++ { 368 p := nodes[i].Parent 369 v := nodes[i].Value 370 // Remove previous truncation mark, potential set by the stacktrace filter 371 nodes[i].Location &= ^truncationMark 372 if v < m { 373 nodes[i].Location |= truncationMark 374 // Preserve values of truncated locations. The weight 375 // of the truncated chain is accounted in the parent. 376 if p >= 0 && nodes[p].Location&truncationMark != 0 { 377 continue 378 } 379 } 380 // Subtract the value of the location from the parent: 381 // by doing so we ensure that the transient nodes have zero 382 // weight, and then will be ignored by the tree builder. 383 if p >= 0 { 384 nodes[p].Value -= v 385 } 386 } 387 } 388 389 func insertStacktraces(t *model.StacktraceTree, nodes []Node, symbols *Symbols) { 390 l := int32(len(nodes)) 391 s := make([]int32, 0, 64) 392 for i := int32(1); i < l; i++ { 393 p := nodes[i].Parent 394 v := nodes[i].Value 395 if v > 0 && nodes[p].Location&truncationMark == 0 { 396 s = resolveStack(s, nodes, i, symbols) 397 t.Insert(s, v) 398 } 399 } 400 } 401 402 func resolveStack(dst []int32, nodes []Node, i int32, symbols *Symbols) []int32 { 403 dst = dst[:0] 404 for i > 0 { 405 j := nodes[i].Location 406 if j&truncationMark > 0 { 407 dst = append(dst, sentinel) 408 } else { 409 loc := symbols.Locations[j] 410 for l := 0; l < len(loc.Line); l++ { 411 dst = append(dst, int32(symbols.Functions[loc.Line[l].FunctionId].Name)) 412 } 413 } 414 i = nodes[i].Parent 415 } 416 return dst 417 } 418 419 func minValue(nodes []Node, maxNodes int64) int64 { 420 if maxNodes < 1 || maxNodes >= int64(len(nodes)) { 421 return 0 422 } 423 h := make([]int64, 0, maxNodes) 424 for i := range nodes { 425 v := nodes[i].Value 426 if len(h) >= int(maxNodes) { 427 if v > h[0] { 428 h = minheap.Pop(h) 429 } else { 430 continue 431 } 432 } 433 h = minheap.Push(h, v) 434 } 435 if len(h) < int(maxNodes) { 436 return 0 437 } 438 return h[0] 439 }