github.com/grafana/pyroscope@v1.18.0/pkg/model/pprofsplit/pprof_split_by_test.go

github.com/grafana/pyroscope@v1.18.0/pkg/model/pprofsplit/pprof_split_by_test.go (about)

     1  package pprofsplit
     2  
     3  import (
     4  	"fmt"
     5  	"strings"
     6  	"testing"
     7  
     8  	"github.com/prometheus/common/model"
     9  	"github.com/prometheus/prometheus/model/relabel"
    10  	"github.com/stretchr/testify/assert"
    11  	"github.com/stretchr/testify/require"
    12  
    13  	profilev1 "github.com/grafana/pyroscope/api/gen/proto/go/google/v1"
    14  	typesv1 "github.com/grafana/pyroscope/api/gen/proto/go/types/v1"
    15  )
    16  
    17  type testSample struct {
    18  	labels string // "foo=bar,baz=qux"
    19  	value  int64
    20  }
    21  
    22  type expectedSeries struct {
    23  	labels  string       // "foo=bar,baz=qux"
    24  	samples []testSample // samples with their labels
    25  }
    26  
    27  func Test_VisitSampleSeriesBy(t *testing.T) {
    28  	// Test cases are mostly generated by AI.
    29  	// Some very specific cases were added manually.
    30  	testCases := []struct {
    31  		description  string
    32  		seriesLabels string            // Series-level labels. Label order matters.
    33  		samples      []testSample      // Input samples. Label order does not matter.
    34  		splitBy      []string          // Labels to split by.
    35  		rules        []*relabel.Config // Relabel rules to apply.
    36  		expected     []expectedSeries
    37  	}{
    38  		{
    39  			description:  "split profile by group by labels",
    40  			seriesLabels: "__name__=profile,foo=bar",
    41  			samples: []testSample{
    42  				{labels: "service_name=web,endpoint=/users", value: 100},
    43  				{labels: "service_name=api,endpoint=/users", value: 300},
    44  			},
    45  			splitBy: []string{"service_name"},
    46  			expected: []expectedSeries{
    47  				{
    48  					labels: "__name__=profile,endpoint=/users,foo=bar,service_name=web",
    49  					samples: []testSample{
    50  						{labels: "", value: 100},
    51  					},
    52  				},
    53  				{
    54  					labels: "__name__=profile,endpoint=/users,foo=bar,service_name=api",
    55  					samples: []testSample{
    56  						{labels: "", value: 300},
    57  					},
    58  				},
    59  			},
    60  		},
    61  		{
    62  			description:  "group by labels are not overridden",
    63  			seriesLabels: "__name__=profile,foo=bar,service_name=app",
    64  			samples: []testSample{
    65  				{labels: "service_name=web,endpoint=/users", value: 100},
    66  				{labels: "service_name=api,endpoint=/orders", value: 300},
    67  			},
    68  			splitBy: []string{"service_name"},
    69  			expected: []expectedSeries{
    70  				{
    71  					labels: "__name__=profile,foo=bar,service_name=app",
    72  					samples: []testSample{
    73  						{labels: "endpoint=/users", value: 100},
    74  						{labels: "endpoint=/orders", value: 300},
    75  					},
    76  				},
    77  			},
    78  		},
    79  		{
    80  			description:  "split by multiple labels",
    81  			seriesLabels: "__name__=profile,app=web",
    82  			samples: []testSample{
    83  				{labels: "service_name=auth,region=us-east,endpoint=/login", value: 150},
    84  				{labels: "service_name=auth,region=us-west,endpoint=/login", value: 200},
    85  				{labels: "service_name=api,region=us-east,endpoint=/users", value: 250},
    86  				{labels: "service_name=api,region=us-west,endpoint=/orders", value: 300},
    87  			},
    88  			splitBy: []string{"service_name", "region"},
    89  			expected: []expectedSeries{
    90  				{
    91  					labels: "__name__=profile,app=web,endpoint=/login,region=us-east,service_name=auth",
    92  					samples: []testSample{
    93  						{labels: "", value: 150},
    94  					},
    95  				},
    96  				{
    97  					labels: "__name__=profile,app=web,endpoint=/login,region=us-west,service_name=auth",
    98  					samples: []testSample{
    99  						{labels: "", value: 200},
   100  					},
   101  				},
   102  				{
   103  					labels: "__name__=profile,app=web,endpoint=/users,region=us-east,service_name=api",
   104  					samples: []testSample{
   105  						{labels: "", value: 250},
   106  					},
   107  				},
   108  				{
   109  					labels: "__name__=profile,app=web,endpoint=/orders,region=us-west,service_name=api",
   110  					samples: []testSample{
   111  						{labels: "", value: 300},
   112  					},
   113  				},
   114  			},
   115  		},
   116  		{
   117  			description:  "split by non-existent label",
   118  			seriesLabels: "__name__=profile,app=test",
   119  			samples: []testSample{
   120  				{labels: "service_name=web,endpoint=/users", value: 100},
   121  				{labels: "service_name=api,endpoint=/orders", value: 200},
   122  			},
   123  			splitBy: []string{"missing_label"},
   124  			expected: []expectedSeries{
   125  				{
   126  					labels: "__name__=profile,app=test",
   127  					samples: []testSample{
   128  						{labels: "endpoint=/users,service_name=web", value: 100},
   129  						{labels: "endpoint=/orders,service_name=api", value: 200},
   130  					},
   131  				},
   132  			},
   133  		},
   134  		{
   135  			description:  "samples with no labels",
   136  			seriesLabels: "__name__=profile,env=prod",
   137  			samples: []testSample{
   138  				{labels: "", value: 500},
   139  				{labels: "", value: 600},
   140  			},
   141  			splitBy:  []string{"service_name"},
   142  			expected: []expectedSeries{},
   143  		},
   144  		{
   145  			description:  "split by label with empty value",
   146  			seriesLabels: "__name__=profile,app=web",
   147  			samples: []testSample{
   148  				{labels: "service_name=,endpoint=/health", value: 75},
   149  				{labels: "service_name=api,endpoint=/health", value: 125},
   150  			},
   151  			splitBy: []string{"service_name"},
   152  			expected: []expectedSeries{
   153  				{
   154  					labels: "__name__=profile,app=web,endpoint=/health",
   155  					samples: []testSample{
   156  						{labels: "", value: 75},
   157  					},
   158  				},
   159  				{
   160  					labels: "__name__=profile,app=web,endpoint=/health,service_name=api",
   161  					samples: []testSample{
   162  						{labels: "", value: 125},
   163  					},
   164  				},
   165  			},
   166  		},
   167  		{
   168  			description:  "no split by labels",
   169  			seriesLabels: "__name__=profile,env=test",
   170  			samples: []testSample{
   171  				{labels: "service_name=web,endpoint=/users", value: 400},
   172  				{labels: "service_name=api,endpoint=/orders", value: 500},
   173  			},
   174  			splitBy: []string{},
   175  			expected: []expectedSeries{
   176  				{
   177  					labels: "__name__=profile,env=test",
   178  					samples: []testSample{
   179  						{labels: "endpoint=/users,service_name=web", value: 400},
   180  						{labels: "endpoint=/orders,service_name=api", value: 500},
   181  					},
   182  				},
   183  			},
   184  		},
   185  		{
   186  			description:  "multiple samples with same split-by label value",
   187  			seriesLabels: "__name__=profile,version=1.0",
   188  			samples: []testSample{
   189  				{labels: "service_name=web,method=GET", value: 100},
   190  				{labels: "service_name=web,method=POST", value: 150},
   191  				{labels: "service_name=api,method=GET", value: 200},
   192  			},
   193  			splitBy: []string{"service_name"},
   194  			expected: []expectedSeries{
   195  				{
   196  					labels: "__name__=profile,service_name=web,version=1.0",
   197  					samples: []testSample{
   198  						{labels: "method=GET", value: 100},
   199  						{labels: "method=POST", value: 150},
   200  					},
   201  				},
   202  				{
   203  					labels: "__name__=profile,method=GET,service_name=api,version=1.0",
   204  					samples: []testSample{
   205  						{labels: "", value: 200},
   206  					},
   207  				},
   208  			},
   209  		},
   210  		{
   211  			description:  "partial overlap between series and sample labels",
   212  			seriesLabels: "__name__=profile,service_name=main,region=us-east",
   213  			samples: []testSample{
   214  				{labels: "service_name=web,env=prod", value: 300},
   215  				{labels: "region=eu-west,env=staging", value: 400},
   216  			},
   217  			splitBy: []string{"env"},
   218  			expected: []expectedSeries{
   219  				{
   220  					labels: "__name__=profile,env=prod,region=us-east,service_name=main",
   221  					samples: []testSample{
   222  						{labels: "", value: 300},
   223  					},
   224  				},
   225  				{
   226  					labels: "__name__=profile,env=staging,region=us-east,service_name=main",
   227  					samples: []testSample{
   228  						{labels: "", value: 400},
   229  					},
   230  				},
   231  			},
   232  		},
   233  		{
   234  			description:  "complex scenario with overlapping labels",
   235  			seriesLabels: "__name__=profile,app=frontend,env=prod",
   236  			samples: []testSample{
   237  				{labels: "service_name=auth,env=staging,version=v1", value: 100},
   238  				{labels: "service_name=auth,region=us-west,version=v2", value: 200},
   239  				{labels: "service_name=api,env=dev,region=eu-central", value: 300},
   240  			},
   241  			splitBy: []string{"service_name", "version"},
   242  			expected: []expectedSeries{
   243  				{
   244  					labels: "__name__=profile,app=frontend,env=prod,service_name=auth,version=v1",
   245  					samples: []testSample{
   246  						{labels: "", value: 100},
   247  					},
   248  				},
   249  				{
   250  					labels: "__name__=profile,app=frontend,env=prod,region=us-west,service_name=auth,version=v2",
   251  					samples: []testSample{
   252  						{labels: "", value: 200},
   253  					},
   254  				},
   255  				{
   256  					labels: "__name__=profile,app=frontend,env=prod,region=eu-central,service_name=api",
   257  					samples: []testSample{
   258  						{labels: "", value: 300},
   259  					},
   260  				},
   261  			},
   262  		},
   263  		{
   264  			description:  "single sample with multiple labels",
   265  			seriesLabels: "__name__=profile,region=us-east",
   266  			samples: []testSample{
   267  				{labels: "service_name=web,method=GET,status=200", value: 42},
   268  			},
   269  			splitBy: []string{"service_name"},
   270  			expected: []expectedSeries{
   271  				{
   272  					labels: "__name__=profile,method=GET,region=us-east,service_name=web,status=200",
   273  					samples: []testSample{
   274  						{labels: "", value: 42},
   275  					},
   276  				},
   277  			},
   278  		},
   279  		{
   280  			description:  "mixed samples - some with labels, some without",
   281  			seriesLabels: "__name__=profile,app=myapp",
   282  			samples: []testSample{
   283  				{labels: "service_name=auth,endpoint=/login", value: 100},
   284  				{labels: "", value: 200},
   285  				{labels: "service_name=api", value: 300},
   286  			},
   287  			splitBy: []string{"service_name"},
   288  			expected: []expectedSeries{
   289  				{
   290  					labels: "__name__=profile,app=myapp",
   291  					samples: []testSample{
   292  						{labels: "", value: 200},
   293  					},
   294  				},
   295  				{
   296  					labels: "__name__=profile,app=myapp,endpoint=/login,service_name=auth",
   297  					samples: []testSample{
   298  						{labels: "", value: 100},
   299  					},
   300  				},
   301  				{
   302  					labels: "__name__=profile,app=myapp,service_name=api",
   303  					samples: []testSample{
   304  						{labels: "", value: 300},
   305  					},
   306  				},
   307  			},
   308  		},
   309  		{
   310  			description:  "split by multiple labels with partial matches",
   311  			seriesLabels: "__name__=profile,environment=prod",
   312  			samples: []testSample{
   313  				{labels: "service_name=web,region=us-east,tier=frontend", value: 100},
   314  				{labels: "service_name=web,tier=frontend", value: 150},
   315  				{labels: "service_name=api,region=us-west", value: 200},
   316  				{labels: "region=eu-central", value: 250},
   317  			},
   318  			splitBy: []string{"service_name", "region"},
   319  			expected: []expectedSeries{
   320  				{
   321  					labels: "__name__=profile,environment=prod,region=us-east,service_name=web,tier=frontend",
   322  					samples: []testSample{
   323  						{labels: "", value: 100},
   324  					},
   325  				},
   326  				{
   327  					labels: "__name__=profile,environment=prod,service_name=web,tier=frontend",
   328  					samples: []testSample{
   329  						{labels: "", value: 150},
   330  					},
   331  				},
   332  				{
   333  					labels: "__name__=profile,environment=prod,region=us-west,service_name=api",
   334  					samples: []testSample{
   335  						{labels: "", value: 200},
   336  					},
   337  				},
   338  				{
   339  					labels: "__name__=profile,environment=prod,region=eu-central",
   340  					samples: []testSample{
   341  						{labels: "", value: 250},
   342  					},
   343  				},
   344  			},
   345  		},
   346  		{
   347  			description:  "unicode and special characters in labels",
   348  			seriesLabels: "__name__=profile,app=测试应用",
   349  			samples: []testSample{
   350  				{labels: "service_name=微服务-api,endpoint=/用户/登录", value: 100},
   351  				{labels: "service_name=web-frontend,endpoint=/status", value: 200},
   352  			},
   353  			splitBy: []string{"service_name"},
   354  			expected: []expectedSeries{
   355  				{
   356  					labels: "__name__=profile,app=测试应用,endpoint=/用户/登录,service_name=微服务-api",
   357  					samples: []testSample{
   358  						{labels: "", value: 100},
   359  					},
   360  				},
   361  				{
   362  					labels: "__name__=profile,app=测试应用,endpoint=/status,service_name=web-frontend",
   363  					samples: []testSample{
   364  						{labels: "", value: 200},
   365  					},
   366  				},
   367  			},
   368  		},
   369  		{
   370  			description:  "many labels with different combinations",
   371  			seriesLabels: "__name__=profile,cluster=prod-cluster",
   372  			samples: []testSample{
   373  				{labels: "service=auth,method=POST,status=200,region=us-east,az=us-east-1a", value: 50},
   374  				{labels: "service=auth,method=POST,status=200,region=us-east,az=us-east-1b", value: 75},
   375  				{labels: "service=auth,method=GET,status=200,region=us-west,az=us-west-2a", value: 25},
   376  				{labels: "service=api,method=POST,status=500,region=eu-central,az=eu-central-1a", value: 100},
   377  			},
   378  			splitBy: []string{"service", "method", "status"},
   379  			expected: []expectedSeries{
   380  				{
   381  					labels: "__name__=profile,cluster=prod-cluster,method=POST,region=us-east,service=auth,status=200",
   382  					samples: []testSample{
   383  						{labels: "az=us-east-1a", value: 50},
   384  						{labels: "az=us-east-1b", value: 75},
   385  					},
   386  				},
   387  				{
   388  					labels: "__name__=profile,az=us-west-2a,cluster=prod-cluster,method=GET,region=us-west,service=auth,status=200",
   389  					samples: []testSample{
   390  						{labels: "", value: 25},
   391  					},
   392  				},
   393  				{
   394  					labels: "__name__=profile,az=eu-central-1a,cluster=prod-cluster,method=POST,region=eu-central,service=api,status=500",
   395  					samples: []testSample{
   396  						{labels: "", value: 100},
   397  					},
   398  				},
   399  			},
   400  		},
   401  		{
   402  			description:  "split by labels that exist in series labels",
   403  			seriesLabels: "__name__=profile,service_name=main-service,region=global",
   404  			samples: []testSample{
   405  				{labels: "service_name=auth,endpoint=/login", value: 100},
   406  				{labels: "region=us-east,endpoint=/health", value: 200},
   407  				{labels: "method=GET", value: 300},
   408  			},
   409  			splitBy: []string{"service_name", "region"},
   410  			expected: []expectedSeries{
   411  				{
   412  					labels: "__name__=profile,region=global,service_name=main-service",
   413  					samples: []testSample{
   414  						{labels: "endpoint=/login", value: 100},
   415  						{labels: "endpoint=/health", value: 200},
   416  						{labels: "method=GET", value: 300},
   417  					},
   418  				},
   419  			},
   420  		},
   421  		{
   422  			description:  "empty string values in split-by labels",
   423  			seriesLabels: "__name__=profile,app=test-app",
   424  			samples: []testSample{
   425  				{labels: "env=,version=v1.0,service=web", value: 100},
   426  				{labels: "env=prod,version=,service=api", value: 200},
   427  				{labels: "env=staging,version=v2.0,service=", value: 300},
   428  			},
   429  			splitBy: []string{"env", "version", "service"},
   430  			expected: []expectedSeries{
   431  				{
   432  					labels: "__name__=profile,app=test-app,service=web,version=v1.0",
   433  					samples: []testSample{
   434  						{labels: "", value: 100},
   435  					},
   436  				},
   437  				{
   438  					labels: "__name__=profile,app=test-app,env=prod,service=api",
   439  					samples: []testSample{
   440  						{labels: "", value: 200},
   441  					},
   442  				},
   443  				{
   444  					labels: "__name__=profile,app=test-app,env=staging,version=v2.0",
   445  					samples: []testSample{
   446  						{labels: "", value: 300},
   447  					},
   448  				},
   449  			},
   450  		},
   451  		{
   452  			description:  "duplicate split-by label values across different samples",
   453  			seriesLabels: "__name__=profile,datacenter=dc1",
   454  			samples: []testSample{
   455  				{labels: "service=web,tier=frontend,instance=web-1", value: 100},
   456  				{labels: "service=web,tier=frontend,instance=web-2", value: 150},
   457  				{labels: "service=web,tier=backend,instance=web-3", value: 200},
   458  				{labels: "service=api,tier=frontend,instance=api-1", value: 250},
   459  			},
   460  			splitBy: []string{"service", "tier"},
   461  			expected: []expectedSeries{
   462  				{
   463  					labels: "__name__=profile,datacenter=dc1,service=web,tier=frontend",
   464  					samples: []testSample{
   465  						{labels: "instance=web-1", value: 100},
   466  						{labels: "instance=web-2", value: 150},
   467  					},
   468  				},
   469  				{
   470  					labels: "__name__=profile,datacenter=dc1,instance=web-3,service=web,tier=backend",
   471  					samples: []testSample{
   472  						{labels: "", value: 200},
   473  					},
   474  				},
   475  				{
   476  					labels: "__name__=profile,datacenter=dc1,instance=api-1,service=api,tier=frontend",
   477  					samples: []testSample{
   478  						{labels: "", value: 250},
   479  					},
   480  				},
   481  			},
   482  		},
   483  		{
   484  			description:  "relabel rules drop entire profile",
   485  			seriesLabels: "__name__=profile,app=test",
   486  			samples: []testSample{
   487  				{labels: "service=auth,env=prod", value: 100},
   488  				{labels: "service=api,env=staging", value: 200},
   489  			},
   490  			splitBy: []string{"service"},
   491  			rules: []*relabel.Config{
   492  				{
   493  					Action:       relabel.Drop,
   494  					Regex:        relabel.MustNewRegexp("test"),
   495  					SourceLabels: []model.LabelName{"app"},
   496  				},
   497  			},
   498  			expected: []expectedSeries{},
   499  		},
   500  		{
   501  			description:  "relabel rules drop some sample groups",
   502  			seriesLabels: "__name__=profile,component=backend",
   503  			samples: []testSample{
   504  				{labels: "service=auth,env=prod", value: 100},
   505  				{labels: "service=api,env=staging", value: 200},
   506  				{labels: "service=web,env=prod", value: 300},
   507  			},
   508  			splitBy: []string{"service"},
   509  			rules: []*relabel.Config{
   510  				{
   511  					Action:       relabel.Drop,
   512  					Regex:        relabel.MustNewRegexp("staging"),
   513  					SourceLabels: []model.LabelName{"env"},
   514  				},
   515  			},
   516  			expected: []expectedSeries{
   517  				{
   518  					labels: "__name__=profile,component=backend,env=prod,service=auth",
   519  					samples: []testSample{
   520  						{labels: "", value: 100},
   521  					},
   522  				},
   523  				{
   524  					labels: "__name__=profile,component=backend,env=prod,service=web",
   525  					samples: []testSample{
   526  						{labels: "", value: 300},
   527  					},
   528  				},
   529  			},
   530  		},
   531  		{
   532  			description:  "samples with same stack trace get merged",
   533  			seriesLabels: "__name__=profile,app=merger",
   534  			samples: []testSample{
   535  				{labels: "service=auth,method=GET", value: 100},
   536  				{labels: "service=auth,method=GET", value: 50}, // Same labels but different location IDs won't merge
   537  				{labels: "service=api,method=POST", value: 200},
   538  			},
   539  			splitBy: []string{"service"},
   540  			expected: []expectedSeries{
   541  				{
   542  					labels: "__name__=profile,app=merger,method=GET,service=auth",
   543  					samples: []testSample{
   544  						{labels: "", value: 100},
   545  						{labels: "", value: 50}, // Separate samples since different location IDs
   546  					},
   547  				},
   548  				{
   549  					labels: "__name__=profile,app=merger,method=POST,service=api",
   550  					samples: []testSample{
   551  						{labels: "", value: 200},
   552  					},
   553  				},
   554  			},
   555  		},
   556  		{
   557  			description:  "empty profile after all groups dropped by relabel rules",
   558  			seriesLabels: "__name__=profile,app=filter",
   559  			samples: []testSample{
   560  				{labels: "drop=true,service=auth", value: 100},
   561  				{labels: "drop=true,service=api", value: 200},
   562  			},
   563  			splitBy: []string{"service"},
   564  			rules: []*relabel.Config{
   565  				{
   566  					Action:       relabel.Drop,
   567  					Regex:        relabel.MustNewRegexp("true"),
   568  					SourceLabels: []model.LabelName{"drop"},
   569  				},
   570  			},
   571  			expected: []expectedSeries{},
   572  		},
   573  		{
   574  			description:  "complex sample merging with multiple values",
   575  			seriesLabels: "__name__=profile,cluster=main",
   576  			samples: []testSample{
   577  				{labels: "service=web,endpoint=/api", value: 100},
   578  				{labels: "service=web,endpoint=/api", value: 150}, // Same labels but different LocationIds
   579  				{labels: "service=web,endpoint=/health", value: 50},
   580  				{labels: "service=web,endpoint=/api", value: 75}, // Same labels but different LocationIds
   581  			},
   582  			splitBy: []string{"service"},
   583  			expected: []expectedSeries{
   584  				{
   585  					labels: "__name__=profile,cluster=main,service=web",
   586  					samples: []testSample{
   587  						{labels: "endpoint=/api", value: 100},
   588  						{labels: "endpoint=/api", value: 150},
   589  						{labels: "endpoint=/api", value: 75},
   590  						{labels: "endpoint=/health", value: 50},
   591  					},
   592  				},
   593  			},
   594  		},
   595  		{
   596  			description:  "string table expansion with new label names and values",
   597  			seriesLabels: "__name__=profile,existing_label=existing_value",
   598  			samples: []testSample{
   599  				{labels: "completely_new_label=completely_new_value", value: 100},
   600  				{labels: "another_new_label=another_new_value", value: 200},
   601  			},
   602  			splitBy: []string{"completely_new_label"},
   603  			expected: []expectedSeries{
   604  				{
   605  					labels: "__name__=profile,completely_new_label=completely_new_value,existing_label=existing_value",
   606  					samples: []testSample{
   607  						{labels: "", value: 100},
   608  					},
   609  				},
   610  				{
   611  					labels: "__name__=profile,another_new_label=another_new_value,existing_label=existing_value",
   612  					samples: []testSample{
   613  						{labels: "", value: 200},
   614  					},
   615  				},
   616  			},
   617  		},
   618  		{
   619  			description:  "samples with identical location IDs for merging",
   620  			seriesLabels: "__name__=profile,service=test",
   621  			samples: []testSample{
   622  				{labels: "endpoint=/api,method=GET", value: 200},
   623  				{labels: "endpoint=/api,method=GET", value: 300}, // Same labels but different location IDs
   624  				{labels: "endpoint=/health,method=GET", value: 100},
   625  			},
   626  			splitBy: []string{"endpoint"},
   627  			expected: []expectedSeries{
   628  				{
   629  					labels: "__name__=profile,endpoint=/api,method=GET,service=test",
   630  					samples: []testSample{
   631  						{labels: "", value: 200},
   632  						{labels: "", value: 300}, // Separate samples due to different location IDs
   633  					},
   634  				},
   635  				{
   636  					labels: "__name__=profile,endpoint=/health,method=GET,service=test",
   637  					samples: []testSample{
   638  						{labels: "", value: 100},
   639  					},
   640  				},
   641  			},
   642  		},
   643  		{
   644  			description:  "fingerprint collision with different label sets",
   645  			seriesLabels: "__name__=profile,cluster=test",
   646  			samples: []testSample{
   647  				// These might create hash collisions but have different actual labels
   648  				{labels: "service=a,region=b", value: 100},
   649  				{labels: "service=c,region=d", value: 200},
   650  				{labels: "service=a,region=b", value: 50}, // Same labels but different location IDs
   651  			},
   652  			splitBy: []string{"service"},
   653  			expected: []expectedSeries{
   654  				{
   655  					labels: "__name__=profile,cluster=test,region=b,service=a",
   656  					samples: []testSample{
   657  						{labels: "", value: 100},
   658  						{labels: "", value: 50}, // Separate samples due to different location IDs
   659  					},
   660  				},
   661  				{
   662  					labels: "__name__=profile,cluster=test,region=d,service=c",
   663  					samples: []testSample{
   664  						{labels: "", value: 200},
   665  					},
   666  				},
   667  			},
   668  		},
   669  		{
   670  			description:  "no samples in profile",
   671  			seriesLabels: "__name__=profile,app=empty",
   672  			samples:      []testSample{},
   673  			splitBy:      []string{"service"},
   674  			expected:     []expectedSeries{},
   675  		},
   676  		{
   677  			description:  "profile with only samples having empty values",
   678  			seriesLabels: "__name__=profile,app=zero",
   679  			samples: []testSample{
   680  				{labels: "service=auth", value: 0},
   681  				{labels: "service=api", value: 0},
   682  			},
   683  			splitBy: []string{"service"},
   684  			expected: []expectedSeries{
   685  				{
   686  					labels: "__name__=profile,app=zero,service=auth",
   687  					samples: []testSample{
   688  						{labels: "", value: 0},
   689  					},
   690  				},
   691  				{
   692  					labels: "__name__=profile,app=zero,service=api",
   693  					samples: []testSample{
   694  						{labels: "", value: 0},
   695  					},
   696  				},
   697  			},
   698  		},
   699  		{
   700  			description:  "force string table expansion during sample processing",
   701  			seriesLabels: "__name__=profile,app=test",
   702  			samples: []testSample{
   703  				{labels: "service=auth", value: 100},
   704  				{labels: "service=api", value: 200},
   705  			},
   706  			splitBy: []string{"service"},
   707  			expected: []expectedSeries{
   708  				{
   709  					labels: "__name__=profile,app=test,service=auth",
   710  					samples: []testSample{
   711  						{labels: "", value: 100},
   712  					},
   713  				},
   714  				{
   715  					labels: "__name__=profile,app=test,service=api",
   716  					samples: []testSample{
   717  						{labels: "", value: 200},
   718  					},
   719  				},
   720  			},
   721  		},
   722  		{
   723  			description:  "relabel rules drop pprof labels",
   724  			seriesLabels: "__name__=profile,app=test",
   725  			samples: []testSample{
   726  				{labels: "service=auth,internal_debug=true,endpoint=/login", value: 100},
   727  				{labels: "service=api,internal_debug=false,endpoint=/users", value: 200},
   728  				{labels: "service=web,temp_flag=remove_me,endpoint=/health", value: 300},
   729  			},
   730  			splitBy: []string{"service"},
   731  			rules: []*relabel.Config{
   732  				{
   733  					Action: relabel.LabelDrop,
   734  					Regex:  relabel.MustNewRegexp("internal_.*|temp_.*"),
   735  				},
   736  			},
   737  			expected: []expectedSeries{
   738  				{
   739  					labels: "__name__=profile,app=test,endpoint=/login,service=auth",
   740  					samples: []testSample{
   741  						{labels: "", value: 100},
   742  					},
   743  				},
   744  				{
   745  					labels: "__name__=profile,app=test,endpoint=/users,service=api",
   746  					samples: []testSample{
   747  						{labels: "", value: 200},
   748  					},
   749  				},
   750  				{
   751  					labels: "__name__=profile,app=test,endpoint=/health,service=web",
   752  					samples: []testSample{
   753  						{labels: "", value: 300},
   754  					},
   755  				},
   756  			},
   757  		},
   758  		{
   759  			description:  "relabel rules keep only labels without whitespace",
   760  			seriesLabels: "__name__=profile,app=filtertest,service=auth",
   761  			samples: []testSample{
   762  				{labels: "bad label=value1,endpoint=/login,temp flag=debug", value: 100},
   763  				{labels: "another bad=value2,endpoint=/users,good_label=keep", value: 200},
   764  				{labels: "weird name=value3,endpoint=/health", value: 300},
   765  			},
   766  			splitBy: []string{"service"},
   767  			rules: []*relabel.Config{
   768  				{
   769  					Action: relabel.LabelKeep,
   770  					Regex:  relabel.MustNewRegexp("^[^\\s]+$"),
   771  				},
   772  			},
   773  			expected: []expectedSeries{
   774  				{
   775  					labels: "__name__=profile,app=filtertest,service=auth",
   776  					samples: []testSample{
   777  						{labels: "endpoint=/login", value: 100},
   778  						{labels: "endpoint=/users,good_label=keep", value: 200},
   779  						{labels: "endpoint=/health", value: 300},
   780  					},
   781  				},
   782  			},
   783  		},
   784  		{
   785  			description:  "relabel rules replace dots with underscores in label values",
   786  			seriesLabels: "__name__=profile,app=normalizer",
   787  			samples: []testSample{
   788  				{labels: "service=com.example.auth,endpoint=/api/v1.0,version=1.2.3", value: 100},
   789  				{labels: "service=org.service.api,endpoint=/health.check,version=2.0.1", value: 200},
   790  				{labels: "service=net.frontend.web,endpoint=/home.page,version=1.0.0", value: 300},
   791  			},
   792  			splitBy: []string{"service"},
   793  			rules: []*relabel.Config{
   794  				{
   795  					SourceLabels: []model.LabelName{"service"},
   796  					Regex:        relabel.MustNewRegexp("([^.]*)\\.(.*)\\.(.*)"),
   797  					Replacement:  "${1}_${2}_${3}",
   798  					TargetLabel:  "service",
   799  					Action:       relabel.Replace,
   800  				},
   801  				{
   802  					SourceLabels: []model.LabelName{"endpoint"},
   803  					Regex:        relabel.MustNewRegexp("([^.]*)\\.(.*)"),
   804  					Replacement:  "${1}_${2}",
   805  					TargetLabel:  "endpoint",
   806  					Action:       relabel.Replace,
   807  				},
   808  				{
   809  					SourceLabels: []model.LabelName{"version"},
   810  					Regex:        relabel.MustNewRegexp("([^.]*)\\.(.*)\\.(.*)"),
   811  					Replacement:  "${1}_${2}_${3}",
   812  					TargetLabel:  "version",
   813  					Action:       relabel.Replace,
   814  				},
   815  			},
   816  			expected: []expectedSeries{
   817  				{
   818  					labels: "__name__=profile,app=normalizer,endpoint=/api/v1_0,service=com_example_auth,version=1_2_3",
   819  					samples: []testSample{
   820  						{labels: "", value: 100},
   821  					},
   822  				},
   823  				{
   824  					labels: "__name__=profile,app=normalizer,endpoint=/health_check,service=org_service_api,version=2_0_1",
   825  					samples: []testSample{
   826  						{labels: "", value: 200},
   827  					},
   828  				},
   829  				{
   830  					labels: "__name__=profile,app=normalizer,endpoint=/home_page,service=net_frontend_web,version=1_0_0",
   831  					samples: []testSample{
   832  						{labels: "", value: 300},
   833  					},
   834  				},
   835  			},
   836  		},
   837  	}
   838  
   839  	for _, tc := range testCases {
   840  		t.Run(tc.description, func(t *testing.T) {
   841  			profile := &profilev1.Profile{
   842  				StringTable: []string{""},
   843  				Sample:      make([]*profilev1.Sample, len(tc.samples)),
   844  			}
   845  			lookup := stringLookup(profile)
   846  			reverseLookup := stringReverseLookup(profile)
   847  			for i, s := range tc.samples {
   848  				profile.Sample[i] = &profilev1.Sample{
   849  					LocationId: []uint64{uint64(i + 1)}, // unique location ID per sample
   850  					Value:      []int64{s.value},
   851  					Label:      parseSampleLabels(t, lookup, s.labels),
   852  				}
   853  			}
   854  
   855  			visitor := new(mockVisitor)
   856  			seriesLabels := parseLabels(t, tc.seriesLabels)
   857  			require.NoError(t, VisitSampleSeriesBy(profile, seriesLabels, tc.rules, visitor, tc.splitBy...))
   858  			require.Len(t, visitor.series, len(tc.expected))
   859  
   860  			for i, actual := range visitor.series {
   861  				expected := tc.expected[i]
   862  				expectedLabels := parseLabels(t, expected.labels)
   863  				assert.Equal(t, expectedLabels, actual.labels, fmt.Sprintf("want: %s,\ngot:  %s",
   864  					formatLabels(expectedLabels),
   865  					formatLabels(actual.labels)))
   866  
   867  				require.Len(t, actual.samples, len(expected.samples))
   868  				for j, actualSample := range actual.samples {
   869  					expectedSample := expected.samples[j]
   870  					assert.Equal(t, expectedSample.value, actualSample.Value[0])
   871  					expectedSampleLabels := parseSampleLabels(t, lookup, expectedSample.labels)
   872  					assert.Equal(t, expectedSampleLabels, actualSample.Label, fmt.Sprintf("want: %s, got %s",
   873  						formatSampleLabels(reverseLookup, expectedSampleLabels),
   874  						formatSampleLabels(reverseLookup, actualSample.Label)))
   875  				}
   876  			}
   877  		})
   878  	}
   879  }
   880  
   881  func stringLookup(p *profilev1.Profile) func(string) int64 {
   882  	stringIndex := map[string]int64{"": 0}
   883  	return func(s string) int64 {
   884  		if idx, ok := stringIndex[s]; ok {
   885  			return idx
   886  		}
   887  		i := int64(len(p.StringTable))
   888  		p.StringTable = append(p.StringTable, s)
   889  		stringIndex[s] = i
   890  		return i
   891  	}
   892  }
   893  
   894  func stringReverseLookup(p *profilev1.Profile) func(int64) string {
   895  	return func(i int64) string {
   896  		return p.StringTable[i]
   897  	}
   898  }
   899  
   900  func parseLabels(t *testing.T, s string) []*typesv1.LabelPair {
   901  	if s == "" {
   902  		// To simplify assertions we return an empty slice instead of nil.
   903  		return []*typesv1.LabelPair{}
   904  	}
   905  	var labels []*typesv1.LabelPair
   906  	for _, pair := range strings.Split(s, ",") {
   907  		parts := strings.SplitN(pair, "=", 2)
   908  		if len(parts) != 2 {
   909  			t.Fatal("invalid series labels:", s)
   910  		}
   911  		labels = append(labels, &typesv1.LabelPair{
   912  			Name:  parts[0],
   913  			Value: parts[1],
   914  		})
   915  	}
   916  	return labels
   917  }
   918  
   919  func formatLabels(labels []*typesv1.LabelPair) string {
   920  	var b strings.Builder
   921  	for i, label := range labels {
   922  		if i > 0 {
   923  			b.WriteByte(',')
   924  		}
   925  		b.WriteString(label.Name + "=" + label.Value)
   926  	}
   927  	return b.String()
   928  }
   929  
   930  func parseSampleLabels(t *testing.T, str func(string) int64, s string) []*profilev1.Label {
   931  	if s == "" {
   932  		// To simplify assertions we return an empty slice instead of nil.
   933  		return []*profilev1.Label{}
   934  	}
   935  	var labels []*profilev1.Label
   936  	for _, pair := range strings.Split(s, ",") {
   937  		parts := strings.SplitN(pair, "=", 2)
   938  		if len(parts) != 2 {
   939  			t.Fatal("invalid sample labels:", s)
   940  		}
   941  		labels = append(labels, &profilev1.Label{
   942  			Key: str(parts[0]),
   943  			Str: str(parts[1]),
   944  		})
   945  	}
   946  	return labels
   947  }
   948  
   949  func formatSampleLabels(lookup func(int64) string, labels []*profilev1.Label) string {
   950  	var b strings.Builder
   951  	for i, label := range labels {
   952  		if i > 0 {
   953  			b.WriteByte(',')
   954  		}
   955  		b.WriteString(lookup(label.Key) + "=" + lookup(label.Str))
   956  	}
   957  	return b.String()
   958  }