github.com/grafana/pyroscope@v1.18.0/pkg/og/convert/pprof/profile.go (about)

     1  package pprof
     2  
     3  import (
     4  	"bytes"
     5  	"context"
     6  	"encoding/json"
     7  	"fmt"
     8  	"mime/multipart"
     9  	"path/filepath"
    10  	"strings"
    11  	"time"
    12  
    13  	"connectrpc.com/connect"
    14  	"github.com/grafana/dskit/tenant"
    15  	"github.com/prometheus/prometheus/model/labels"
    16  
    17  	profilev1 "github.com/grafana/pyroscope/api/gen/proto/go/google/v1"
    18  	v1 "github.com/grafana/pyroscope/api/gen/proto/go/types/v1"
    19  	distributormodel "github.com/grafana/pyroscope/pkg/distributor/model"
    20  	phlaremodel "github.com/grafana/pyroscope/pkg/model"
    21  	"github.com/grafana/pyroscope/pkg/og/ingestion"
    22  	"github.com/grafana/pyroscope/pkg/og/storage"
    23  	"github.com/grafana/pyroscope/pkg/og/storage/tree"
    24  	"github.com/grafana/pyroscope/pkg/og/util/form"
    25  	"github.com/grafana/pyroscope/pkg/pprof"
    26  )
    27  
    28  type RawProfile struct {
    29  	RawData             []byte // Represents raw request body as per ingestion API.
    30  	FormDataContentType string // Set optionally, if RawData is multipart form.
    31  	// Initializes lazily on handleRawData, if not present.
    32  	Profile []byte // Represents raw pprof data.
    33  
    34  	SampleTypeConfig map[string]*tree.SampleTypeConfig
    35  }
    36  
    37  func (p *RawProfile) ContentType() string {
    38  	if p.FormDataContentType == "" {
    39  		return "binary/octet-stream"
    40  	}
    41  	return p.FormDataContentType
    42  }
    43  
    44  const (
    45  	formFieldProfile          = "profile"
    46  	formFieldPreviousProfile  = "prev_profile"
    47  	formFieldSampleTypeConfig = "sample_type_config"
    48  )
    49  
    50  // ParseToPprof is not doing much now. It parses the profile with no processing/splitting, adds labels.
    51  func (p *RawProfile) ParseToPprof(ctx context.Context, md ingestion.Metadata, limits ingestion.Limits) (res *distributormodel.PushRequest, err error) {
    52  	defer func() {
    53  		r := recover()
    54  		if r != nil {
    55  			err = fmt.Errorf("/ingest pprof.(*RawProfile).ParseToPprof panic %v", r)
    56  		}
    57  	}()
    58  	err = p.handleRawData()
    59  	if err != nil {
    60  		return nil, fmt.Errorf("failed to parse pprof /ingest multipart form %w", err)
    61  	}
    62  	res = &distributormodel.PushRequest{
    63  		ReceivedCompressedProfileSize: len(p.Profile),
    64  		RawProfileType:                distributormodel.RawProfileTypePPROF,
    65  		Series:                        nil,
    66  	}
    67  	if len(p.Profile) == 0 {
    68  		return res, nil
    69  	}
    70  
    71  	tenantID, err := tenant.TenantID(ctx)
    72  	if err != nil {
    73  		return nil, err
    74  	}
    75  	maxBytes := int64(limits.MaxProfileSizeBytes(tenantID))
    76  
    77  	profile, err := pprof.RawFromBytesWithLimit(p.Profile, maxBytes)
    78  	if err != nil {
    79  		return nil, connect.NewError(connect.CodeInvalidArgument, err)
    80  	}
    81  
    82  	fixTime(profile, md)
    83  	FixFunctionNamesForScriptingLanguages(profile, md)
    84  	if p.isDotnetspy(md) {
    85  		FixFunctionIDForBrokenDotnet(profile.Profile)
    86  		fixSampleTypes(profile.Profile)
    87  	}
    88  
    89  	res.Series = []*distributormodel.ProfileSeries{{
    90  		Labels:     p.createLabels(profile, md),
    91  		Profile:    profile,
    92  		RawProfile: p.Profile,
    93  	}}
    94  	return
    95  }
    96  
    97  func (p *RawProfile) isDotnetspy(md ingestion.Metadata) bool {
    98  	if md.SpyName == "dotnetspy" {
    99  		return true
   100  	}
   101  	stc := p.getSampleTypes()
   102  	return md.SpyName == "unknown" && stc != nil && stc["inuse-space"] != nil
   103  }
   104  
   105  func fixTime(profile *pprof.Profile, md ingestion.Metadata) {
   106  	// for old versions of pyspy, rbspy, pyroscope-rs
   107  	// https://github.com/grafana/pyroscope-rs/pull/134
   108  	// profile.TimeNanos can be in microseconds
   109  	x := time.Unix(0, profile.TimeNanos)
   110  	if x.IsZero() || x.Year() == 1970 {
   111  		profile.TimeNanos = md.StartTime.UnixNano()
   112  	}
   113  }
   114  
   115  func (p *RawProfile) Parse(_ context.Context, _ storage.Putter, _ storage.MetricsExporter, md ingestion.Metadata) error {
   116  	return fmt.Errorf("parsing pprof to tree/storage.Putter is no longer supported")
   117  }
   118  
   119  func (p *RawProfile) handleRawData() (err error) {
   120  	if p.FormDataContentType != "" {
   121  		// The profile was ingested as a multipart form. Load parts to
   122  		// Profile, PreviousProfile, and SampleTypeConfig.
   123  		if err := p.loadPprofFromForm(); err != nil {
   124  			return err
   125  		}
   126  	} else {
   127  		p.Profile = p.RawData
   128  	}
   129  
   130  	return nil
   131  }
   132  
   133  func (p *RawProfile) loadPprofFromForm() error {
   134  	boundary, err := form.ParseBoundary(p.FormDataContentType)
   135  	if err != nil {
   136  		return err
   137  	}
   138  
   139  	f, err := multipart.NewReader(bytes.NewReader(p.RawData), boundary).ReadForm(32 << 20)
   140  	if err != nil {
   141  		return err
   142  	}
   143  	defer func() {
   144  		_ = f.RemoveAll()
   145  	}()
   146  
   147  	p.Profile, err = form.ReadField(f, formFieldProfile)
   148  	if err != nil {
   149  		return err
   150  	}
   151  	PreviousProfile, err := form.ReadField(f, formFieldPreviousProfile)
   152  	if err != nil {
   153  		return err
   154  	}
   155  	if PreviousProfile != nil {
   156  		return fmt.Errorf("unsupported client version. " +
   157  			"Please update github.com/grafana/pyroscope-go to the latest version")
   158  	}
   159  
   160  	r, err := form.ReadField(f, formFieldSampleTypeConfig)
   161  	if err != nil || r == nil {
   162  		return err
   163  	}
   164  	var config map[string]*tree.SampleTypeConfig
   165  	if err = json.Unmarshal(r, &config); err != nil {
   166  		return err
   167  	}
   168  	p.SampleTypeConfig = config
   169  	return nil
   170  }
   171  
   172  func (p *RawProfile) metricName(profile *pprof.Profile) string {
   173  	stConfigs := p.getSampleTypes()
   174  	var st string
   175  	for _, ist := range profile.Profile.SampleType {
   176  		st = profile.StringTable[ist.Type]
   177  		if st == "wall" {
   178  			return st
   179  		}
   180  	}
   181  	for _, ist := range profile.Profile.SampleType {
   182  		st = profile.StringTable[ist.Type]
   183  		stConfig := stConfigs[st]
   184  
   185  		if stConfig != nil && stConfig.DisplayName != "" {
   186  			st = stConfig.DisplayName
   187  		}
   188  		if strings.Contains(st, "cpu") {
   189  			return "process_cpu"
   190  		}
   191  		if strings.Contains(st, "alloc_") || strings.Contains(st, "inuse_") || st == "space" || st == "objects" {
   192  			return "memory"
   193  		}
   194  		if strings.Contains(st, "mutex_") {
   195  			return "mutex"
   196  		}
   197  		if strings.Contains(st, "block_") {
   198  			return "block"
   199  		}
   200  		if strings.Contains(st, "goroutines") {
   201  			return "goroutines"
   202  		}
   203  	}
   204  	return st // should not happen
   205  
   206  }
   207  
   208  func (p *RawProfile) createLabels(profile *pprof.Profile, md ingestion.Metadata) []*v1.LabelPair {
   209  	hasServiceName := false
   210  	for k := range md.LabelSet.Labels() {
   211  		if k == phlaremodel.LabelNameServiceName {
   212  			hasServiceName = true
   213  			break
   214  		}
   215  	}
   216  
   217  	ls := make([]*v1.LabelPair, 0, len(md.LabelSet.Labels())+4)
   218  	ls = append(ls, &v1.LabelPair{
   219  		Name:  labels.MetricName,
   220  		Value: p.metricName(profile),
   221  	}, &v1.LabelPair{
   222  		Name:  phlaremodel.LabelNameDelta,
   223  		Value: "false",
   224  	}, &v1.LabelPair{
   225  		Name:  phlaremodel.LabelNamePyroscopeSpy,
   226  		Value: md.SpyName,
   227  	})
   228  
   229  	// Only add service_name if it doesn't exist
   230  	if !hasServiceName {
   231  		ls = append(ls, &v1.LabelPair{
   232  			Name:  phlaremodel.LabelNameServiceName,
   233  			Value: md.LabelSet.ServiceName(),
   234  		})
   235  	}
   236  
   237  	for k, v := range md.LabelSet.Labels() {
   238  		if !phlaremodel.IsLabelAllowedForIngestion(k) {
   239  			continue
   240  		}
   241  		ls = append(ls, &v1.LabelPair{
   242  			Name:  k,
   243  			Value: v,
   244  		})
   245  	}
   246  	return ls
   247  }
   248  func (p *RawProfile) getSampleTypes() map[string]*tree.SampleTypeConfig {
   249  	sampleTypes := tree.DefaultSampleTypeMapping
   250  	if p.SampleTypeConfig != nil {
   251  		sampleTypes = p.SampleTypeConfig
   252  	}
   253  	return sampleTypes
   254  }
   255  
   256  func needFunctionNameRewrite(md ingestion.Metadata) bool {
   257  	return isScriptingSpy(md)
   258  }
   259  
   260  func SpyNameForFunctionNameRewrite() string {
   261  	return "scripting"
   262  }
   263  
   264  func isScriptingSpy(md ingestion.Metadata) bool {
   265  	return md.SpyName == "pyspy" || md.SpyName == "rbspy" || md.SpyName == "scripting"
   266  }
   267  
   268  // FixFunctionNamesForScriptingLanguages modifies the function names in the provided profile
   269  // to include line numbers. This is a workaround for frontend limitations in rendering line numbers.
   270  // The function is specifically designed for profiles generated by scripting languages.
   271  // Note: This function modifies the provided profile in place.
   272  func FixFunctionNamesForScriptingLanguages(p *pprof.Profile, md ingestion.Metadata) {
   273  	if !needFunctionNameRewrite(md) {
   274  		return
   275  	}
   276  	smap := map[string]int{}
   277  	addString := func(name string) int {
   278  		sid := smap[name]
   279  		if sid == 0 {
   280  			sid = len(p.StringTable)
   281  			p.StringTable = append(p.StringTable, name)
   282  			smap[name] = sid
   283  		}
   284  		return sid
   285  	}
   286  	funcId2Index := map[uint64]int64{}
   287  	newFunctions := map[string]*profilev1.Function{}
   288  	maxId := uint64(0)
   289  	for index, fn := range p.Function {
   290  		funcId2Index[fn.Id] = int64(index)
   291  		if fn.Id > maxId {
   292  			maxId = fn.Id
   293  		}
   294  	}
   295  	for _, location := range p.Location {
   296  		for _, line := range location.Line {
   297  			fn := p.Function[funcId2Index[line.FunctionId]]
   298  			filename := p.StringTable[fn.Filename]
   299  			// Skip rewriting for pyspy if the filename is an absolute path
   300  			if md.SpyName == "pyspy" && filepath.IsAbs(filename) {
   301  				continue
   302  			}
   303  			name := fmt.Sprintf("%s %s", filename, p.StringTable[fn.Name])
   304  			newFunc, ok := newFunctions[name]
   305  			if !ok {
   306  				maxId++
   307  				newFunc = &profilev1.Function{
   308  					Id:         maxId,
   309  					Name:       int64(addString(name)),
   310  					Filename:   fn.Filename,
   311  					SystemName: fn.SystemName,
   312  					StartLine:  fn.StartLine,
   313  				}
   314  				newFunctions[name] = newFunc
   315  				p.Function = append(p.Function, newFunc)
   316  			}
   317  			line.FunctionId = newFunc.Id
   318  		}
   319  	}
   320  }
   321  
   322  func fixSampleTypes(profile *profilev1.Profile) {
   323  	for _, st := range profile.SampleType {
   324  		sts := profile.StringTable[st.Type]
   325  		if strings.Contains(sts, "-") {
   326  			sts = strings.ReplaceAll(sts, "-", "_")
   327  			profile.StringTable[st.Type] = sts
   328  		}
   329  	}
   330  }
   331  
   332  func FixFunctionIDForBrokenDotnet(profile *profilev1.Profile) {
   333  	for _, function := range profile.Function {
   334  		if function.Id != 0 {
   335  			return
   336  		}
   337  	}
   338  	if len(profile.Function) != len(profile.Location) {
   339  		return
   340  	}
   341  	for i := range profile.Location {
   342  		profile.Function[i].Id = profile.Location[i].Id
   343  	}
   344  }