github.com/grafana/pyroscope@v1.18.0/pkg/model/pprofsplit/pprof_split_test.go (about)

     1  package pprofsplit
     2  
     3  import (
     4  	"fmt"
     5  	"math/rand"
     6  	"testing"
     7  
     8  	"github.com/prometheus/common/model"
     9  	"github.com/prometheus/prometheus/model/relabel"
    10  	"github.com/stretchr/testify/assert"
    11  	"github.com/stretchr/testify/require"
    12  
    13  	profilev1 "github.com/grafana/pyroscope/api/gen/proto/go/google/v1"
    14  	typesv1 "github.com/grafana/pyroscope/api/gen/proto/go/types/v1"
    15  	phlaremodel "github.com/grafana/pyroscope/pkg/model"
    16  	"github.com/grafana/pyroscope/pkg/validation"
    17  )
    18  
    19  type sampleSeries struct {
    20  	labels  []*typesv1.LabelPair
    21  	samples []*profilev1.Sample
    22  }
    23  
    24  type mockVisitor struct {
    25  	profile           phlaremodel.Labels
    26  	series            []sampleSeries
    27  	discardedBytes    int
    28  	discardedProfiles int
    29  	err               error
    30  }
    31  
    32  func (m *mockVisitor) VisitProfile(labels phlaremodel.Labels) {
    33  	m.profile = labels
    34  }
    35  
    36  func (m *mockVisitor) VisitSampleSeries(labels phlaremodel.Labels, samples []*profilev1.Sample) {
    37  	m.series = append(m.series, sampleSeries{
    38  		labels:  labels,
    39  		samples: samples,
    40  	})
    41  }
    42  
    43  func (m *mockVisitor) ValidateLabels(labels phlaremodel.Labels) (phlaremodel.Labels, error) {
    44  	return labels, m.err
    45  }
    46  
    47  func (m *mockVisitor) Discarded(profiles, bytes int) {
    48  	m.discardedBytes += bytes
    49  	m.discardedProfiles += profiles
    50  }
    51  
    52  func Test_VisitSampleSeries(t *testing.T) {
    53  	defaultRelabelConfigs := validation.MockDefaultOverrides().IngestionRelabelingRules("")
    54  
    55  	type testCase struct {
    56  		description string
    57  		rules       []*relabel.Config
    58  		labels      []*typesv1.LabelPair
    59  		profile     *profilev1.Profile
    60  
    61  		expected       []sampleSeries
    62  		expectNoSeries bool
    63  		expectLabels   phlaremodel.Labels
    64  
    65  		expectBytesDropped    int
    66  		expectProfilesDropped int
    67  	}
    68  
    69  	testCases := []testCase{
    70  		{
    71  			description: "no series labels, no sample labels",
    72  			profile: &profilev1.Profile{
    73  				Sample: []*profilev1.Sample{{
    74  					Value: []int64{1},
    75  				}},
    76  			},
    77  			expectNoSeries: true,
    78  			expectLabels:   nil,
    79  		},
    80  		{
    81  			description: "has series labels, no sample labels",
    82  			labels: []*typesv1.LabelPair{
    83  				{Name: "foo", Value: "bar"},
    84  			},
    85  			profile: &profilev1.Profile{
    86  				Sample: []*profilev1.Sample{{
    87  					Value: []int64{1},
    88  				}},
    89  			},
    90  			expectNoSeries: true,
    91  			expectLabels: []*typesv1.LabelPair{
    92  				{Name: "foo", Value: "bar"},
    93  			},
    94  		},
    95  		{
    96  			description: "no series labels, all samples have identical label set",
    97  			profile: &profilev1.Profile{
    98  				StringTable: []string{"", "foo", "bar"},
    99  				Sample: []*profilev1.Sample{{
   100  					Value: []int64{1},
   101  					Label: []*profilev1.Label{
   102  						{Key: 1, Str: 2},
   103  					},
   104  				}},
   105  			},
   106  			expected: []sampleSeries{
   107  				{
   108  					labels: []*typesv1.LabelPair{
   109  						{Name: "foo", Value: "bar"},
   110  					},
   111  					samples: []*profilev1.Sample{{
   112  						Value: []int64{1},
   113  						Label: []*profilev1.Label{},
   114  					}},
   115  				},
   116  			},
   117  		},
   118  		{
   119  			description: "has series labels, all samples have identical label set",
   120  			labels: []*typesv1.LabelPair{
   121  				{Name: "baz", Value: "qux"},
   122  			},
   123  			profile: &profilev1.Profile{
   124  				StringTable: []string{"", "foo", "bar"},
   125  				Sample: []*profilev1.Sample{{
   126  					Value: []int64{1},
   127  					Label: []*profilev1.Label{
   128  						{Key: 1, Str: 2},
   129  					},
   130  				}},
   131  			},
   132  			expected: []sampleSeries{
   133  				{
   134  					labels: []*typesv1.LabelPair{
   135  						{Name: "baz", Value: "qux"},
   136  						{Name: "foo", Value: "bar"},
   137  					},
   138  					samples: []*profilev1.Sample{{
   139  						Value: []int64{1},
   140  						Label: []*profilev1.Label{},
   141  					}},
   142  				},
   143  			},
   144  		},
   145  		{
   146  			description: "has series labels, and the only sample label name overlaps with series label, creating overlapping groups",
   147  			labels: []*typesv1.LabelPair{
   148  				{Name: "foo", Value: "bar"},
   149  			},
   150  			profile: &profilev1.Profile{
   151  				StringTable: []string{"", "foo", "bar"},
   152  				Sample: []*profilev1.Sample{
   153  					{
   154  						Value: []int64{1},
   155  						Label: []*profilev1.Label{
   156  							{Key: 1, Str: 2},
   157  						},
   158  					},
   159  					{
   160  						Value: []int64{2},
   161  					},
   162  				},
   163  			},
   164  			expected: []sampleSeries{
   165  				{
   166  					labels: []*typesv1.LabelPair{
   167  						{Name: "foo", Value: "bar"},
   168  					},
   169  					samples: []*profilev1.Sample{
   170  						{
   171  							Value: []int64{3},
   172  							Label: nil,
   173  						},
   174  					},
   175  				},
   176  			},
   177  		},
   178  		{
   179  			description: "has series labels, samples have distinct label sets",
   180  			labels: []*typesv1.LabelPair{
   181  				{Name: "baz", Value: "qux"},
   182  			},
   183  			profile: &profilev1.Profile{
   184  				StringTable: []string{"", "foo", "bar", "waldo", "fred"},
   185  				Sample: []*profilev1.Sample{
   186  					{
   187  						Value: []int64{1},
   188  						Label: []*profilev1.Label{
   189  							{Key: 1, Str: 2},
   190  						},
   191  					},
   192  					{
   193  						Value: []int64{2},
   194  						Label: []*profilev1.Label{
   195  							{Key: 3, Str: 4},
   196  						},
   197  					},
   198  				},
   199  			},
   200  			expected: []sampleSeries{
   201  				{
   202  					labels: []*typesv1.LabelPair{
   203  						{Name: "baz", Value: "qux"},
   204  						{Name: "foo", Value: "bar"},
   205  					},
   206  					samples: []*profilev1.Sample{{
   207  						Value: []int64{1},
   208  						Label: []*profilev1.Label{},
   209  					}},
   210  				},
   211  				{
   212  					labels: []*typesv1.LabelPair{
   213  						{Name: "baz", Value: "qux"},
   214  						{Name: "waldo", Value: "fred"},
   215  					},
   216  					samples: []*profilev1.Sample{{
   217  						Value: []int64{2},
   218  						Label: []*profilev1.Label{},
   219  					}},
   220  				},
   221  			},
   222  		},
   223  		{
   224  			description: "has series labels that should be renamed to no longer include godeltaprof",
   225  			rules:       defaultRelabelConfigs,
   226  			labels: []*typesv1.LabelPair{
   227  				{Name: "__name__", Value: "godeltaprof_memory"},
   228  			},
   229  			profile: &profilev1.Profile{
   230  				StringTable: []string{""},
   231  				Sample: []*profilev1.Sample{{
   232  					Value: []int64{2},
   233  					Label: []*profilev1.Label{},
   234  				}},
   235  			},
   236  			expected: []sampleSeries{
   237  				{
   238  					labels: []*typesv1.LabelPair{
   239  						{Name: "__delta__", Value: "false"},
   240  						{Name: "__name__", Value: "memory"},
   241  						{Name: "__name_replaced__", Value: "godeltaprof_memory"},
   242  					},
   243  					samples: []*profilev1.Sample{{
   244  						Value: []int64{2},
   245  						Label: []*profilev1.Label{},
   246  					}},
   247  				},
   248  			},
   249  		},
   250  		{
   251  			description: "has series labels and sample label, which relabel rules drop",
   252  			rules: []*relabel.Config{
   253  				{
   254  					Action:       relabel.Drop,
   255  					SourceLabels: []model.LabelName{"__name__", "span_name"},
   256  					Separator:    "/",
   257  					Regex:        relabel.MustNewRegexp("unwanted/randomness"),
   258  				},
   259  			},
   260  			labels: []*typesv1.LabelPair{
   261  				{Name: "__name__", Value: "unwanted"},
   262  			},
   263  			profile: &profilev1.Profile{
   264  				StringTable: []string{"", "span_name", "randomness"},
   265  				Sample: []*profilev1.Sample{
   266  					{
   267  						Value: []int64{2},
   268  						Label: []*profilev1.Label{
   269  							{Key: 1, Str: 2},
   270  						},
   271  					},
   272  					{
   273  						Value: []int64{1},
   274  					},
   275  				},
   276  			},
   277  			expectProfilesDropped: 0,
   278  			expectBytesDropped:    3,
   279  			expected: []sampleSeries{
   280  				{
   281  					labels: []*typesv1.LabelPair{
   282  						{Name: "__name__", Value: "unwanted"},
   283  					},
   284  					samples: []*profilev1.Sample{{
   285  						Value: []int64{1},
   286  					}},
   287  				},
   288  			},
   289  		},
   290  		{
   291  			description: "has series/sample labels, drops everything",
   292  			rules: []*relabel.Config{
   293  				{
   294  					Action: relabel.Drop,
   295  					Regex:  relabel.MustNewRegexp(".*"),
   296  				},
   297  			},
   298  			labels: []*typesv1.LabelPair{
   299  				{Name: "__name__", Value: "unwanted"},
   300  			},
   301  			profile: &profilev1.Profile{
   302  				StringTable: []string{"", "span_name", "randomness"},
   303  				Sample: []*profilev1.Sample{
   304  					{
   305  						Value: []int64{2},
   306  						Label: []*profilev1.Label{
   307  							{Key: 1, Str: 2},
   308  						},
   309  					},
   310  					{
   311  						Value: []int64{1},
   312  					},
   313  				},
   314  			},
   315  			expectProfilesDropped: 1,
   316  			expectBytesDropped:    6,
   317  			expected:              []sampleSeries{},
   318  		},
   319  		{
   320  			description: "has series labels / sample rules, drops samples label",
   321  			rules: []*relabel.Config{
   322  				{
   323  					Action:      relabel.Replace,
   324  					Regex:       relabel.MustNewRegexp(".*"),
   325  					Replacement: "",
   326  					TargetLabel: "span_name",
   327  				},
   328  			},
   329  			labels: []*typesv1.LabelPair{
   330  				{Name: "__name__", Value: "unwanted"},
   331  			},
   332  			profile: &profilev1.Profile{
   333  				StringTable: []string{"", "span_name", "randomness"},
   334  				Sample: []*profilev1.Sample{
   335  					{
   336  						Value: []int64{2},
   337  						Label: []*profilev1.Label{
   338  							{Key: 1, Str: 2},
   339  						},
   340  					},
   341  					{
   342  						Value: []int64{1},
   343  					},
   344  				},
   345  			},
   346  			expected: []sampleSeries{
   347  				{
   348  					labels: []*typesv1.LabelPair{
   349  						{Name: "__name__", Value: "unwanted"},
   350  					},
   351  					samples: []*profilev1.Sample{{
   352  						Value: []int64{3},
   353  					}},
   354  				},
   355  			},
   356  		},
   357  		{
   358  			description: "does not drop samples when a label is dropped",
   359  			rules: []*relabel.Config{
   360  				{
   361  					Action: relabel.LabelDrop,
   362  					Regex:  relabel.MustNewRegexp("^label_to_drop$"),
   363  				},
   364  			},
   365  			labels: []*typesv1.LabelPair{},
   366  			profile: &profilev1.Profile{
   367  				StringTable: []string{"", "label_to_drop", "value_1", "value_2"},
   368  				Sample: []*profilev1.Sample{
   369  					{
   370  						LocationId: []uint64{1, 2},
   371  						Value:      []int64{2},
   372  						Label:      []*profilev1.Label{{Key: 1, Str: 2}},
   373  					},
   374  					{
   375  						LocationId: []uint64{1, 3},
   376  						Value:      []int64{2},
   377  						Label:      []*profilev1.Label{{Key: 1, Str: 2}},
   378  					},
   379  					{
   380  						LocationId: []uint64{1, 3},
   381  						Value:      []int64{2},
   382  						Label:      []*profilev1.Label{{Key: 1, Str: 3}}, // will get merged with the previous one
   383  					},
   384  					{
   385  						LocationId: []uint64{1, 4},
   386  						Value:      []int64{2},
   387  						Label:      []*profilev1.Label{{Key: 1, Str: 3}},
   388  					},
   389  				},
   390  			},
   391  			expectProfilesDropped: 0,
   392  			expectBytesDropped:    0,
   393  			expected: []sampleSeries{
   394  				{
   395  					labels: []*typesv1.LabelPair{},
   396  					samples: []*profilev1.Sample{
   397  						{
   398  							LocationId: []uint64{1, 2},
   399  							Label:      []*profilev1.Label{},
   400  							Value:      []int64{2},
   401  						},
   402  						{
   403  							LocationId: []uint64{1, 3},
   404  							Label:      []*profilev1.Label{},
   405  							Value:      []int64{4},
   406  						},
   407  						{
   408  							LocationId: []uint64{1, 4},
   409  							Label:      []*profilev1.Label{},
   410  							Value:      []int64{2},
   411  						},
   412  					},
   413  				},
   414  			},
   415  		},
   416  		{
   417  			description: "ensure only samples of same stacktraces get grouped",
   418  			labels: []*typesv1.LabelPair{
   419  				{Name: "__name__", Value: "profile"},
   420  			},
   421  			profile: &profilev1.Profile{
   422  				StringTable: []string{"", "foo", "bar", "binary", "span_id", "aaaabbbbccccdddd", "__name__"},
   423  				Sample: []*profilev1.Sample{
   424  					{
   425  						LocationId: []uint64{1, 2},
   426  						Value:      []int64{2},
   427  						Label: []*profilev1.Label{
   428  							// This __name__ label is expected to be removed as it overlaps with the series label name
   429  							{Key: 6, Str: 1},
   430  						},
   431  					},
   432  					{
   433  						LocationId: []uint64{1, 2},
   434  						Value:      []int64{1},
   435  					},
   436  					{
   437  						LocationId: []uint64{1, 2},
   438  						Value:      []int64{4},
   439  						Label: []*profilev1.Label{
   440  							{Key: 4, Str: 5},
   441  						},
   442  					},
   443  					{
   444  						Value: []int64{8},
   445  					},
   446  					{
   447  						Value: []int64{16},
   448  						Label: []*profilev1.Label{
   449  							{Key: 1, Str: 2},
   450  						},
   451  					},
   452  				},
   453  			},
   454  			expected: []sampleSeries{
   455  				{
   456  					labels: []*typesv1.LabelPair{
   457  						{Name: "__name__", Value: "profile"},
   458  					},
   459  					samples: []*profilev1.Sample{
   460  						{
   461  							LocationId: []uint64{1, 2},
   462  							Value:      []int64{3},
   463  						},
   464  						{
   465  							LocationId: []uint64{1, 2},
   466  							Value:      []int64{4},
   467  							Label: []*profilev1.Label{
   468  								{Key: 4, Str: 5},
   469  							},
   470  						},
   471  						{
   472  							Value: []int64{8},
   473  						},
   474  					},
   475  				},
   476  				{
   477  					labels: []*typesv1.LabelPair{
   478  						{Name: "__name__", Value: "profile"},
   479  						{Name: "foo", Value: "bar"},
   480  					},
   481  					samples: []*profilev1.Sample{{
   482  						Value: []int64{16},
   483  						Label: []*profilev1.Label{},
   484  					}},
   485  				},
   486  			},
   487  		},
   488  		{
   489  			description: "merging groups after label drop should preserve all samples",
   490  			rules: []*relabel.Config{
   491  				{
   492  					Action: relabel.LabelDrop,
   493  					Regex:  relabel.MustNewRegexp("^drop_me$"),
   494  				},
   495  			},
   496  			labels: []*typesv1.LabelPair{},
   497  			profile: &profilev1.Profile{
   498  				StringTable: []string{"", "do_not_drop_me", "drop_me", "a"},
   499  				Sample: []*profilev1.Sample{
   500  					// Group 1: no labels
   501  					{LocationId: []uint64{1}, Value: []int64{100}, Label: []*profilev1.Label{}},
   502  					{LocationId: []uint64{2}, Value: []int64{101}, Label: []*profilev1.Label{}},
   503  					// Group 2: do_not_drop_me=a (should stay intact)
   504  					{LocationId: []uint64{3}, Value: []int64{200}, Label: []*profilev1.Label{{Key: 1, Str: 3}}},
   505  					{LocationId: []uint64{4}, Value: []int64{201}, Label: []*profilev1.Label{{Key: 1, Str: 3}}},
   506  					// Group 3: drop_me=a (should be merged with the first group)
   507  					{LocationId: []uint64{5}, Value: []int64{300}, Label: []*profilev1.Label{{Key: 2, Str: 3}}},
   508  					{LocationId: []uint64{6}, Value: []int64{301}, Label: []*profilev1.Label{{Key: 2, Str: 3}}},
   509  				},
   510  			},
   511  			// After merging, expect 2 groups containing all 6 samples
   512  			expected: []sampleSeries{
   513  				{
   514  					labels: []*typesv1.LabelPair{},
   515  					samples: []*profilev1.Sample{
   516  						{LocationId: []uint64{1}, Value: []int64{100}, Label: []*profilev1.Label{}},
   517  						{LocationId: []uint64{2}, Value: []int64{101}, Label: []*profilev1.Label{}},
   518  						{LocationId: []uint64{5}, Value: []int64{300}, Label: []*profilev1.Label{}},
   519  						{LocationId: []uint64{6}, Value: []int64{301}, Label: []*profilev1.Label{}},
   520  					},
   521  				},
   522  				{
   523  					labels: []*typesv1.LabelPair{
   524  						{Name: "do_not_drop_me", Value: "a"},
   525  					},
   526  					samples: []*profilev1.Sample{
   527  						{LocationId: []uint64{3}, Value: []int64{200}, Label: []*profilev1.Label{}},
   528  						{LocationId: []uint64{4}, Value: []int64{201}, Label: []*profilev1.Label{}},
   529  					},
   530  				},
   531  			},
   532  		},
   533  	}
   534  
   535  	for _, tc := range testCases {
   536  		tc := tc
   537  
   538  		t.Run(tc.description, func(t *testing.T) {
   539  			v := new(mockVisitor)
   540  			require.NoError(t, VisitSampleSeries(tc.profile, tc.labels, tc.rules, v))
   541  			assert.Equal(t, tc.expectBytesDropped, v.discardedBytes)
   542  			assert.Equal(t, tc.expectProfilesDropped, v.discardedProfiles)
   543  
   544  			if tc.expectNoSeries {
   545  				assert.Nil(t, v.series)
   546  				assert.Equal(t, tc.expectLabels, v.profile)
   547  				return
   548  			}
   549  
   550  			for i, actual := range v.series {
   551  				expected := tc.expected[i]
   552  				assert.Equal(t, expected.labels, actual.labels)
   553  				assert.Equal(t, expected.samples, actual.samples)
   554  			}
   555  		})
   556  	}
   557  }
   558  
   559  func Benchmark_VisitSampleSeries_HighCardinality(b *testing.B) {
   560  	defaultRelabelConfigs := validation.MockDefaultOverrides().IngestionRelabelingRules("")
   561  	defaultRelabelConfigs = append(defaultRelabelConfigs, &relabel.Config{
   562  		Action: relabel.LabelDrop,
   563  		Regex:  relabel.MustNewRegexp("^high_cardinality_label$"),
   564  	})
   565  
   566  	stringTable := []string{"", "foo", "bar", "binary", "span_id", "aaaabbbbccccdddd", "high_cardinality_label"}
   567  	highCardinalityOffset := int64(len(stringTable))
   568  	for i := 0; i < 10000; i++ {
   569  		stringTable = append(stringTable, fmt.Sprintf("value_%d", i))
   570  	}
   571  
   572  	profile := &profilev1.Profile{
   573  		StringTable: stringTable,
   574  		Location:    []*profilev1.Location{{Id: 1, MappingId: 1, Line: []*profilev1.Line{{FunctionId: 1}}}},
   575  		Mapping:     []*profilev1.Mapping{{}, {Id: 1, Filename: 3}},
   576  		Function:    []*profilev1.Function{{Id: 1, Name: 1}},
   577  	}
   578  
   579  	for i := 0; i < 30000; i++ {
   580  		labelValue := highCardinalityOffset + int64(i/10)
   581  		if rand.Float64() < 0.3 {
   582  			labelValue = highCardinalityOffset - 2 // lower the cardinality to create large groups
   583  		}
   584  		labels := []*profilev1.Label{
   585  			{Key: highCardinalityOffset - 1, Str: labelValue},
   586  		}
   587  		profile.Sample = append(profile.Sample, &profilev1.Sample{
   588  			LocationId: []uint64{uint64(i + 1)},
   589  			Value:      []int64{2},
   590  			Label:      labels,
   591  		})
   592  	}
   593  
   594  	b.ResetTimer()
   595  	b.ReportAllocs()
   596  
   597  	for i := 0; i < b.N; i++ {
   598  		visitor := new(mockVisitor)
   599  		err := VisitSampleSeries(profile, []*typesv1.LabelPair{
   600  			{Name: "__name__", Value: "profile"},
   601  			{Name: "foo", Value: "bar"},
   602  		}, defaultRelabelConfigs, visitor)
   603  		require.NoError(b, err)
   604  	}
   605  }