github.com/grafana/pyroscope@v1.18.0/pkg/og/convert/speedscope/parser.go (about)

     1  package speedscope
     2  
     3  import (
     4  	"cmp"
     5  	"context"
     6  	"encoding/json"
     7  	"fmt"
     8  	"slices"
     9  
    10  	"github.com/grafana/pyroscope/pkg/og/ingestion"
    11  	"github.com/grafana/pyroscope/pkg/og/storage"
    12  	"github.com/grafana/pyroscope/pkg/og/storage/metadata"
    13  	"github.com/grafana/pyroscope/pkg/og/storage/tree"
    14  	"golang.org/x/exp/maps"
    15  )
    16  
    17  // RawProfile implements ingestion.RawProfile for Speedscope format
    18  type RawProfile struct {
    19  	RawData []byte
    20  }
    21  
    22  // Parse parses a profile
    23  func (p *RawProfile) Parse(ctx context.Context, putter storage.Putter, _ storage.MetricsExporter, md ingestion.Metadata) error {
    24  	profiles, err := parseAll(p.RawData, md)
    25  	if err != nil {
    26  		return err
    27  	}
    28  
    29  	for _, putInput := range profiles {
    30  		err = putter.Put(ctx, putInput)
    31  		if err != nil {
    32  			return err
    33  		}
    34  	}
    35  	return nil
    36  }
    37  
    38  func parseAll(rawData []byte, md ingestion.Metadata) ([]*storage.PutInput, error) {
    39  	file := speedscopeFile{}
    40  	err := json.Unmarshal(rawData, &file)
    41  	if err != nil {
    42  		return nil, err
    43  	}
    44  	if file.Schema != schema {
    45  		return nil, fmt.Errorf("Unknown schema: %s", file.Schema)
    46  	}
    47  
    48  	results := make([]*storage.PutInput, 0, len(file.Profiles))
    49  	// Not a pointer, we _want_ to copy on call
    50  	input := storage.PutInput{
    51  		StartTime:  md.StartTime,
    52  		EndTime:    md.EndTime,
    53  		SpyName:    md.SpyName,
    54  		SampleRate: md.SampleRate,
    55  		LabelSet:   md.LabelSet,
    56  	}
    57  
    58  	file.Profiles = mergeProfiles(file.Profiles)
    59  
    60  	for _, prof := range file.Profiles {
    61  		putInput, err := parseOne(&prof, input, file.Shared.Frames, len(file.Profiles) > 1)
    62  		if err != nil {
    63  			return nil, err
    64  		}
    65  		results = append(results, putInput)
    66  	}
    67  	return results, nil
    68  }
    69  
    70  // mergeProfiles combines profiles with the same mergeKey.
    71  // This prevents situations downstream where two different
    72  // profiles are deduped during congestion for having the
    73  // same label set and timestamp.
    74  func mergeProfiles(profiles []profile) []profile {
    75  	type mergeKey struct {
    76  		name       string
    77  		t          string
    78  		unit       unit
    79  		startValue float64
    80  	}
    81  
    82  	merged := make(map[mergeKey]profile)
    83  	for _, prof := range profiles {
    84  		k := mergeKey{
    85  			name:       prof.Name,
    86  			t:          prof.Type,
    87  			unit:       prof.Unit,
    88  			startValue: prof.StartValue,
    89  		}
    90  
    91  		if mergedProf, ok := merged[k]; ok {
    92  			mergedProf.Samples = append(mergedProf.Samples, prof.Samples...)
    93  			mergedProf.Events = append(mergedProf.Events, prof.Events...)
    94  			mergedProf.Weights = append(mergedProf.Weights, prof.Weights...)
    95  			merged[k] = mergedProf
    96  		} else {
    97  			merged[k] = prof
    98  		}
    99  	}
   100  
   101  	m := maps.Values(merged)
   102  	slices.SortFunc(m, func(a, b profile) int {
   103  		return cmp.Compare(a.StartValue, b.StartValue)
   104  	})
   105  	return m
   106  }
   107  
   108  func parseOne(prof *profile, putInput storage.PutInput, frames []frame, multi bool) (*storage.PutInput, error) {
   109  	// Fixup some metadata
   110  	putInput.Units = prof.Unit.chooseMetadataUnit()
   111  	putInput.AggregationType = metadata.SumAggregationType
   112  	if multi {
   113  		putInput.LabelSet = prof.Unit.chooseKey(putInput.LabelSet)
   114  	}
   115  
   116  	// This label is important to prevent all speedscope profiles
   117  	// from the same ingestion upload being deduped during compaction.
   118  	// Currently, all profiles are associated with the same timestamp
   119  	// from `putInput`. Since profiles are deduped over label set + timestamp,
   120  	// this label prevents unintended downstream deduping. See also mergeProfiles
   121  	// which addresses the case where the profile names (and other relevant fields)
   122  	// are the same for multiple profiles.
   123  	putInput.LabelSet.Add("profile_name", prof.Name)
   124  
   125  	// TODO(petethepig): We need a way to tell if it's a default or a value set by user
   126  	//   See https://github.com/pyroscope-io/pyroscope/issues/1598
   127  	if putInput.SampleRate == 100 {
   128  		putInput.SampleRate = uint32(prof.Unit.defaultSampleRate())
   129  	}
   130  
   131  	var err error
   132  	tr := tree.New()
   133  	switch prof.Type {
   134  	case profileEvented:
   135  		err = parseEvented(tr, prof, frames)
   136  	case profileSampled:
   137  		err = parseSampled(tr, prof, frames)
   138  	default:
   139  		return nil, fmt.Errorf("Profile type %s not supported", prof.Type)
   140  	}
   141  	if err != nil {
   142  		return nil, err
   143  	}
   144  
   145  	putInput.Val = tr
   146  	return &putInput, nil
   147  }
   148  
   149  func parseEvented(tr *tree.Tree, prof *profile, frames []frame) error {
   150  	last := prof.StartValue
   151  	indexStack := []int{}
   152  	nameStack := []string{}
   153  	precisionMultiplier := prof.Unit.precisionMultiplier()
   154  
   155  	for _, ev := range prof.Events {
   156  		if ev.At < last {
   157  			return fmt.Errorf("Events out of order, %f < %f", ev.At, last)
   158  		}
   159  		fid := int(ev.Frame)
   160  		if fid < 0 || fid >= len(frames) {
   161  			return fmt.Errorf("Invalid frame %d", fid)
   162  		}
   163  
   164  		if ev.Type == eventClose {
   165  			if len(indexStack) == 0 {
   166  				return fmt.Errorf("No stack to close at %f", ev.At)
   167  			}
   168  			lastIdx := len(indexStack) - 1
   169  			if indexStack[lastIdx] != fid {
   170  				return fmt.Errorf("Closing non-open frame %d", fid)
   171  			}
   172  
   173  			// Close this frame
   174  			tr.InsertStackString(nameStack, uint64(ev.At-last)*precisionMultiplier)
   175  			indexStack = indexStack[:lastIdx]
   176  			nameStack = nameStack[:lastIdx]
   177  		} else if ev.Type == eventOpen {
   178  			// Add any time up til now
   179  			if len(nameStack) > 0 {
   180  				tr.InsertStackString(nameStack, uint64(ev.At-last))
   181  			}
   182  
   183  			// Open the frame
   184  			indexStack = append(indexStack, fid)
   185  			nameStack = append(nameStack, frames[fid].Name)
   186  		} else {
   187  			return fmt.Errorf("Unknown event type %s", ev.Type)
   188  		}
   189  
   190  		last = ev.At
   191  	}
   192  
   193  	return nil
   194  }
   195  
   196  func parseSampled(tr *tree.Tree, prof *profile, frames []frame) error {
   197  	if len(prof.Samples) != len(prof.Weights) {
   198  		return fmt.Errorf("Unequal lengths of samples and weights: %d != %d", len(prof.Samples), len(prof.Weights))
   199  	}
   200  
   201  	precisionMultiplier := prof.Unit.precisionMultiplier()
   202  	stack := []string{}
   203  	for i, samp := range prof.Samples {
   204  		weight := prof.Weights[i]
   205  		if weight < 0 {
   206  			return fmt.Errorf("Negative weight %f", weight)
   207  		}
   208  
   209  		for _, frameID := range samp {
   210  			fid := int(frameID)
   211  			if fid < 0 || fid > len(frames) {
   212  				return fmt.Errorf("Invalid frame %d", fid)
   213  			}
   214  			stack = append(stack, frames[fid].Name)
   215  		}
   216  		tr.InsertStackString(stack, uint64(weight)*precisionMultiplier)
   217  
   218  		stack = stack[:0] // clear, but retain memory
   219  	}
   220  	return nil
   221  }
   222  
   223  // Bytes returns the raw bytes of the profile
   224  func (p *RawProfile) Bytes() ([]byte, error) {
   225  	return p.RawData, nil
   226  }
   227  
   228  // ContentType returns the HTTP ContentType of the profile
   229  func (*RawProfile) ContentType() string {
   230  	return "application/json"
   231  }