github.com/grafana/pyroscope@v1.18.0/pkg/og/convert/pprof/profile.go (about) 1 package pprof 2 3 import ( 4 "bytes" 5 "context" 6 "encoding/json" 7 "fmt" 8 "mime/multipart" 9 "path/filepath" 10 "strings" 11 "time" 12 13 "connectrpc.com/connect" 14 "github.com/grafana/dskit/tenant" 15 "github.com/prometheus/prometheus/model/labels" 16 17 profilev1 "github.com/grafana/pyroscope/api/gen/proto/go/google/v1" 18 v1 "github.com/grafana/pyroscope/api/gen/proto/go/types/v1" 19 distributormodel "github.com/grafana/pyroscope/pkg/distributor/model" 20 phlaremodel "github.com/grafana/pyroscope/pkg/model" 21 "github.com/grafana/pyroscope/pkg/og/ingestion" 22 "github.com/grafana/pyroscope/pkg/og/storage" 23 "github.com/grafana/pyroscope/pkg/og/storage/tree" 24 "github.com/grafana/pyroscope/pkg/og/util/form" 25 "github.com/grafana/pyroscope/pkg/pprof" 26 ) 27 28 type RawProfile struct { 29 RawData []byte // Represents raw request body as per ingestion API. 30 FormDataContentType string // Set optionally, if RawData is multipart form. 31 // Initializes lazily on handleRawData, if not present. 32 Profile []byte // Represents raw pprof data. 33 34 SampleTypeConfig map[string]*tree.SampleTypeConfig 35 } 36 37 func (p *RawProfile) ContentType() string { 38 if p.FormDataContentType == "" { 39 return "binary/octet-stream" 40 } 41 return p.FormDataContentType 42 } 43 44 const ( 45 formFieldProfile = "profile" 46 formFieldPreviousProfile = "prev_profile" 47 formFieldSampleTypeConfig = "sample_type_config" 48 ) 49 50 // ParseToPprof is not doing much now. It parses the profile with no processing/splitting, adds labels. 51 func (p *RawProfile) ParseToPprof(ctx context.Context, md ingestion.Metadata, limits ingestion.Limits) (res *distributormodel.PushRequest, err error) { 52 defer func() { 53 r := recover() 54 if r != nil { 55 err = fmt.Errorf("/ingest pprof.(*RawProfile).ParseToPprof panic %v", r) 56 } 57 }() 58 err = p.handleRawData() 59 if err != nil { 60 return nil, fmt.Errorf("failed to parse pprof /ingest multipart form %w", err) 61 } 62 res = &distributormodel.PushRequest{ 63 ReceivedCompressedProfileSize: len(p.Profile), 64 RawProfileType: distributormodel.RawProfileTypePPROF, 65 Series: nil, 66 } 67 if len(p.Profile) == 0 { 68 return res, nil 69 } 70 71 tenantID, err := tenant.TenantID(ctx) 72 if err != nil { 73 return nil, err 74 } 75 maxBytes := int64(limits.MaxProfileSizeBytes(tenantID)) 76 77 profile, err := pprof.RawFromBytesWithLimit(p.Profile, maxBytes) 78 if err != nil { 79 return nil, connect.NewError(connect.CodeInvalidArgument, err) 80 } 81 82 fixTime(profile, md) 83 FixFunctionNamesForScriptingLanguages(profile, md) 84 if p.isDotnetspy(md) { 85 FixFunctionIDForBrokenDotnet(profile.Profile) 86 fixSampleTypes(profile.Profile) 87 } 88 89 res.Series = []*distributormodel.ProfileSeries{{ 90 Labels: p.createLabels(profile, md), 91 Profile: profile, 92 RawProfile: p.Profile, 93 }} 94 return 95 } 96 97 func (p *RawProfile) isDotnetspy(md ingestion.Metadata) bool { 98 if md.SpyName == "dotnetspy" { 99 return true 100 } 101 stc := p.getSampleTypes() 102 return md.SpyName == "unknown" && stc != nil && stc["inuse-space"] != nil 103 } 104 105 func fixTime(profile *pprof.Profile, md ingestion.Metadata) { 106 // for old versions of pyspy, rbspy, pyroscope-rs 107 // https://github.com/grafana/pyroscope-rs/pull/134 108 // profile.TimeNanos can be in microseconds 109 x := time.Unix(0, profile.TimeNanos) 110 if x.IsZero() || x.Year() == 1970 { 111 profile.TimeNanos = md.StartTime.UnixNano() 112 } 113 } 114 115 func (p *RawProfile) Parse(_ context.Context, _ storage.Putter, _ storage.MetricsExporter, md ingestion.Metadata) error { 116 return fmt.Errorf("parsing pprof to tree/storage.Putter is no longer supported") 117 } 118 119 func (p *RawProfile) handleRawData() (err error) { 120 if p.FormDataContentType != "" { 121 // The profile was ingested as a multipart form. Load parts to 122 // Profile, PreviousProfile, and SampleTypeConfig. 123 if err := p.loadPprofFromForm(); err != nil { 124 return err 125 } 126 } else { 127 p.Profile = p.RawData 128 } 129 130 return nil 131 } 132 133 func (p *RawProfile) loadPprofFromForm() error { 134 boundary, err := form.ParseBoundary(p.FormDataContentType) 135 if err != nil { 136 return err 137 } 138 139 f, err := multipart.NewReader(bytes.NewReader(p.RawData), boundary).ReadForm(32 << 20) 140 if err != nil { 141 return err 142 } 143 defer func() { 144 _ = f.RemoveAll() 145 }() 146 147 p.Profile, err = form.ReadField(f, formFieldProfile) 148 if err != nil { 149 return err 150 } 151 PreviousProfile, err := form.ReadField(f, formFieldPreviousProfile) 152 if err != nil { 153 return err 154 } 155 if PreviousProfile != nil { 156 return fmt.Errorf("unsupported client version. " + 157 "Please update github.com/grafana/pyroscope-go to the latest version") 158 } 159 160 r, err := form.ReadField(f, formFieldSampleTypeConfig) 161 if err != nil || r == nil { 162 return err 163 } 164 var config map[string]*tree.SampleTypeConfig 165 if err = json.Unmarshal(r, &config); err != nil { 166 return err 167 } 168 p.SampleTypeConfig = config 169 return nil 170 } 171 172 func (p *RawProfile) metricName(profile *pprof.Profile) string { 173 stConfigs := p.getSampleTypes() 174 var st string 175 for _, ist := range profile.Profile.SampleType { 176 st = profile.StringTable[ist.Type] 177 if st == "wall" { 178 return st 179 } 180 } 181 for _, ist := range profile.Profile.SampleType { 182 st = profile.StringTable[ist.Type] 183 stConfig := stConfigs[st] 184 185 if stConfig != nil && stConfig.DisplayName != "" { 186 st = stConfig.DisplayName 187 } 188 if strings.Contains(st, "cpu") { 189 return "process_cpu" 190 } 191 if strings.Contains(st, "alloc_") || strings.Contains(st, "inuse_") || st == "space" || st == "objects" { 192 return "memory" 193 } 194 if strings.Contains(st, "mutex_") { 195 return "mutex" 196 } 197 if strings.Contains(st, "block_") { 198 return "block" 199 } 200 if strings.Contains(st, "goroutines") { 201 return "goroutines" 202 } 203 } 204 return st // should not happen 205 206 } 207 208 func (p *RawProfile) createLabels(profile *pprof.Profile, md ingestion.Metadata) []*v1.LabelPair { 209 hasServiceName := false 210 for k := range md.LabelSet.Labels() { 211 if k == phlaremodel.LabelNameServiceName { 212 hasServiceName = true 213 break 214 } 215 } 216 217 ls := make([]*v1.LabelPair, 0, len(md.LabelSet.Labels())+4) 218 ls = append(ls, &v1.LabelPair{ 219 Name: labels.MetricName, 220 Value: p.metricName(profile), 221 }, &v1.LabelPair{ 222 Name: phlaremodel.LabelNameDelta, 223 Value: "false", 224 }, &v1.LabelPair{ 225 Name: phlaremodel.LabelNamePyroscopeSpy, 226 Value: md.SpyName, 227 }) 228 229 // Only add service_name if it doesn't exist 230 if !hasServiceName { 231 ls = append(ls, &v1.LabelPair{ 232 Name: phlaremodel.LabelNameServiceName, 233 Value: md.LabelSet.ServiceName(), 234 }) 235 } 236 237 for k, v := range md.LabelSet.Labels() { 238 if !phlaremodel.IsLabelAllowedForIngestion(k) { 239 continue 240 } 241 ls = append(ls, &v1.LabelPair{ 242 Name: k, 243 Value: v, 244 }) 245 } 246 return ls 247 } 248 func (p *RawProfile) getSampleTypes() map[string]*tree.SampleTypeConfig { 249 sampleTypes := tree.DefaultSampleTypeMapping 250 if p.SampleTypeConfig != nil { 251 sampleTypes = p.SampleTypeConfig 252 } 253 return sampleTypes 254 } 255 256 func needFunctionNameRewrite(md ingestion.Metadata) bool { 257 return isScriptingSpy(md) 258 } 259 260 func SpyNameForFunctionNameRewrite() string { 261 return "scripting" 262 } 263 264 func isScriptingSpy(md ingestion.Metadata) bool { 265 return md.SpyName == "pyspy" || md.SpyName == "rbspy" || md.SpyName == "scripting" 266 } 267 268 // FixFunctionNamesForScriptingLanguages modifies the function names in the provided profile 269 // to include line numbers. This is a workaround for frontend limitations in rendering line numbers. 270 // The function is specifically designed for profiles generated by scripting languages. 271 // Note: This function modifies the provided profile in place. 272 func FixFunctionNamesForScriptingLanguages(p *pprof.Profile, md ingestion.Metadata) { 273 if !needFunctionNameRewrite(md) { 274 return 275 } 276 smap := map[string]int{} 277 addString := func(name string) int { 278 sid := smap[name] 279 if sid == 0 { 280 sid = len(p.StringTable) 281 p.StringTable = append(p.StringTable, name) 282 smap[name] = sid 283 } 284 return sid 285 } 286 funcId2Index := map[uint64]int64{} 287 newFunctions := map[string]*profilev1.Function{} 288 maxId := uint64(0) 289 for index, fn := range p.Function { 290 funcId2Index[fn.Id] = int64(index) 291 if fn.Id > maxId { 292 maxId = fn.Id 293 } 294 } 295 for _, location := range p.Location { 296 for _, line := range location.Line { 297 fn := p.Function[funcId2Index[line.FunctionId]] 298 filename := p.StringTable[fn.Filename] 299 // Skip rewriting for pyspy if the filename is an absolute path 300 if md.SpyName == "pyspy" && filepath.IsAbs(filename) { 301 continue 302 } 303 name := fmt.Sprintf("%s %s", filename, p.StringTable[fn.Name]) 304 newFunc, ok := newFunctions[name] 305 if !ok { 306 maxId++ 307 newFunc = &profilev1.Function{ 308 Id: maxId, 309 Name: int64(addString(name)), 310 Filename: fn.Filename, 311 SystemName: fn.SystemName, 312 StartLine: fn.StartLine, 313 } 314 newFunctions[name] = newFunc 315 p.Function = append(p.Function, newFunc) 316 } 317 line.FunctionId = newFunc.Id 318 } 319 } 320 } 321 322 func fixSampleTypes(profile *profilev1.Profile) { 323 for _, st := range profile.SampleType { 324 sts := profile.StringTable[st.Type] 325 if strings.Contains(sts, "-") { 326 sts = strings.ReplaceAll(sts, "-", "_") 327 profile.StringTable[st.Type] = sts 328 } 329 } 330 } 331 332 func FixFunctionIDForBrokenDotnet(profile *profilev1.Profile) { 333 for _, function := range profile.Function { 334 if function.Id != 0 { 335 return 336 } 337 } 338 if len(profile.Function) != len(profile.Location) { 339 return 340 } 341 for i := range profile.Location { 342 profile.Function[i].Id = profile.Location[i].Id 343 } 344 }