github.com/pyroscope-io/pyroscope@v0.37.3-0.20230725203016-5f6947968bd0/pkg/convert/pprof/parser.go (about) 1 package pprof 2 3 import ( 4 "bytes" 5 "context" 6 "fmt" 7 "time" 8 9 "github.com/pyroscope-io/pyroscope/pkg/storage" 10 "github.com/pyroscope-io/pyroscope/pkg/storage/metadata" 11 "github.com/pyroscope-io/pyroscope/pkg/storage/segment" 12 "github.com/pyroscope-io/pyroscope/pkg/storage/tree" 13 ) 14 15 type ParserInterface interface { 16 ParsePprof(ctx context.Context, startTime, endTime time.Time, bs []byte, cumulativeOnly bool) error 17 } 18 19 type Parser struct { 20 putter storage.Putter 21 spyName string 22 labels map[string]string 23 skipExemplars bool 24 sampleTypes map[string]*tree.SampleTypeConfig 25 stackFrameFormatter StackFrameFormatter 26 27 cache tree.LabelsCache 28 sampleTypesFilter func(string) bool 29 } 30 31 type ParserConfig struct { 32 Putter storage.Putter 33 SpyName string 34 Labels map[string]string 35 SkipExemplars bool 36 SampleTypes map[string]*tree.SampleTypeConfig 37 StackFrameFormatter StackFrameFormatter 38 } 39 40 func NewParser(config ParserConfig) *Parser { 41 if config.StackFrameFormatter == nil { 42 config.StackFrameFormatter = &UnsafeFunctionNameFormatter{} 43 } 44 return &Parser{ 45 putter: config.Putter, 46 spyName: config.SpyName, 47 labels: config.Labels, 48 sampleTypes: config.SampleTypes, 49 skipExemplars: config.SkipExemplars, 50 stackFrameFormatter: config.StackFrameFormatter, 51 52 cache: make(tree.LabelsCache), 53 sampleTypesFilter: filterKnownSamples(config.SampleTypes), 54 } 55 } 56 57 func filterKnownSamples(sampleTypes map[string]*tree.SampleTypeConfig) func(string) bool { 58 return func(s string) bool { 59 _, ok := sampleTypes[s] 60 return ok 61 } 62 } 63 64 func (p *Parser) Reset() { p.cache = make(tree.LabelsCache) } 65 66 func (p *Parser) ParsePprof(ctx context.Context, startTime, endTime time.Time, bs []byte, cumulativeOnly bool) error { 67 b := bytes.NewReader(bs) 68 return DecodePool(b, func(profile *tree.Profile) error { 69 return p.Convert(ctx, startTime, endTime, profile, cumulativeOnly) 70 }) 71 } 72 73 func (p *Parser) Convert(ctx context.Context, startTime, endTime time.Time, profile *tree.Profile, cumulativeOnly bool) error { 74 return p.iterate(profile, cumulativeOnly, func(vt *tree.ValueType, l tree.Labels, t *tree.Tree) (keep bool, err error) { 75 if vt.Type >= int64(len(profile.StringTable)) { 76 return false, fmt.Errorf("sample value type is invalid: %d", vt.Type) 77 } 78 sampleType := profile.StringTable[vt.Type] 79 sampleTypeConfig, ok := p.sampleTypes[sampleType] 80 if !ok { 81 return false, fmt.Errorf("sample value type is unknown") 82 } 83 pi := storage.PutInput{ 84 StartTime: startTime, 85 EndTime: endTime, 86 SpyName: p.spyName, 87 Val: t, 88 } 89 // Cumulative profiles require two consecutive samples, 90 // therefore we have to cache this trie. 91 if sampleTypeConfig.Cumulative { 92 prev, found := p.load(vt.Type, l) 93 if !found { 94 // Keep the current entry in cache. 95 return true, nil 96 } 97 // Take diff with the previous tree. 98 // The result is written to prev, t is not changed. 99 pi.Val = prev.Diff(t) 100 } 101 pi.AggregationType = sampleTypeConfig.Aggregation 102 if sampleTypeConfig.Sampled { 103 pi.SampleRate = sampleRate(profile) 104 } 105 if sampleTypeConfig.DisplayName != "" { 106 sampleType = sampleTypeConfig.DisplayName 107 } 108 if sampleTypeConfig.Units != "" { 109 pi.Units = sampleTypeConfig.Units 110 } else { 111 // TODO(petethepig): this conversion is questionable 112 pi.Units = metadata.Units(profile.StringTable[vt.Unit]) 113 } 114 pi.Key = p.buildName(sampleType, profile.ResolveLabels(l)) 115 err = p.putter.Put(ctx, &pi) 116 return sampleTypeConfig.Cumulative, err 117 }) 118 } 119 120 func sampleRate(p *tree.Profile) uint32 { 121 if p.Period <= 0 || p.PeriodType == nil { 122 return 0 123 } 124 sampleUnit := time.Nanosecond 125 switch p.StringTable[p.PeriodType.Unit] { 126 case "microseconds": 127 sampleUnit = time.Microsecond 128 case "milliseconds": 129 sampleUnit = time.Millisecond 130 case "seconds": 131 sampleUnit = time.Second 132 } 133 return uint32(time.Second / (sampleUnit * time.Duration(p.Period))) 134 } 135 136 func (p *Parser) buildName(sampleTypeName string, labels map[string]string) *segment.Key { 137 for k, v := range p.labels { 138 labels[k] = v 139 } 140 labels["__name__"] += "." + sampleTypeName 141 return segment.NewKey(labels) 142 } 143 144 func (p *Parser) load(sampleType int64, labels tree.Labels) (*tree.Tree, bool) { 145 e, ok := p.cache.Get(sampleType, labels.Hash()) 146 if !ok { 147 return nil, false 148 } 149 return e.Tree, true 150 } 151 152 func (p *Parser) iterate(x *tree.Profile, cumulativeOnly bool, fn func(vt *tree.ValueType, l tree.Labels, t *tree.Tree) (keep bool, err error)) error { 153 c := make(tree.LabelsCache) 154 p.readTrees(x, c, tree.NewFinder(x), cumulativeOnly) 155 for sampleType, entries := range c { 156 if t, ok := x.ResolveSampleType(sampleType); ok { 157 for h, e := range entries { 158 keep, err := fn(t, e.Labels, e.Tree) 159 if err != nil { 160 return err 161 } 162 if !keep { 163 c.Remove(sampleType, h) 164 } 165 } 166 } 167 } 168 p.cache = c 169 return nil 170 } 171 172 // readTrees generates trees from the profile populating c. 173 func (p *Parser) readTrees(x *tree.Profile, c tree.LabelsCache, f tree.Finder, cumulativeOnly bool) { 174 // SampleType value indexes. 175 indexes := make([]int, 0, len(x.SampleType)) 176 // Corresponding type IDs used as the main cache keys. 177 types := make([]int64, 0, len(x.SampleType)) 178 for i, s := range x.SampleType { 179 st := x.StringTable[s.Type] 180 if p.sampleTypesFilter != nil && p.sampleTypesFilter(st) { 181 if !cumulativeOnly || (cumulativeOnly && p.sampleTypes[st].Cumulative) { 182 indexes = append(indexes, i) 183 types = append(types, s.Type) 184 } 185 } 186 } 187 if len(indexes) == 0 { 188 return 189 } 190 stack := make([][]byte, 0, 16) 191 for _, s := range x.Sample { 192 for i := len(s.LocationId) - 1; i >= 0; i-- { 193 // Resolve stack. 194 loc, ok := f.FindLocation(s.LocationId[i]) 195 if !ok { 196 continue 197 } 198 // Multiple line indicates this location has inlined functions, 199 // where the last entry represents the caller into which the 200 // preceding entries were inlined. 201 // 202 // E.g., if memcpy() is inlined into printf: 203 // line[0].function_name == "memcpy" 204 // line[1].function_name == "printf" 205 // 206 // Therefore iteration goes in reverse order. 207 for j := len(loc.Line) - 1; j >= 0; j-- { 208 fn, ok := f.FindFunction(loc.Line[j].FunctionId) 209 if !ok || x.StringTable[fn.Name] == "" { 210 continue 211 } 212 sf := p.stackFrameFormatter.format(x, fn, loc.Line[j]) 213 stack = append(stack, sf) 214 } 215 } 216 // Insert tree nodes. 217 for i, vi := range indexes { 218 v := uint64(s.Value[vi]) 219 if v == 0 { 220 continue 221 } 222 // If the sample has ProfileID label, it belongs to an exemplar. 223 if j := labelIndex(x, s.Label, segment.ProfileIDLabelName); j >= 0 { 224 // Regardless of whether we should skip exemplars or not, the value 225 // should be appended to the exemplar baseline profile (w/o ProfileID label). 226 c.GetOrCreateTree(types[i], tree.CutLabel(s.Label, j)).InsertStack(stack, v) 227 if p.skipExemplars { 228 continue 229 } 230 } 231 c.GetOrCreateTree(types[i], s.Label).InsertStack(stack, v) 232 } 233 stack = stack[:0] 234 } 235 } 236 237 func labelIndex(p *tree.Profile, labels tree.Labels, key string) int { 238 for i, label := range labels { 239 if n, ok := p.ResolveLabelName(label); ok && n == key { 240 return i 241 } 242 } 243 return -1 244 }