github.com/grafana/pyroscope@v1.18.0/pkg/distributor/distributor_test.go (about)

     1  package distributor
     2  
     3  import (
     4  	"bytes"
     5  	"context"
     6  	"errors"
     7  	"fmt"
     8  	"math"
     9  	"net/http"
    10  	"net/http/httptest"
    11  	"os"
    12  	"runtime/pprof"
    13  	"strconv"
    14  	"strings"
    15  	"sync"
    16  	"testing"
    17  	"time"
    18  
    19  	"connectrpc.com/connect"
    20  	"google.golang.org/grpc"
    21  	"google.golang.org/grpc/health/grpc_health_v1"
    22  
    23  	"github.com/go-kit/log"
    24  	"github.com/grafana/dskit/kv"
    25  	"github.com/grafana/dskit/ring"
    26  	"github.com/grafana/dskit/ring/client"
    27  	"github.com/grafana/dskit/services"
    28  	"github.com/prometheus/client_golang/prometheus"
    29  	"github.com/prometheus/client_golang/prometheus/testutil"
    30  	"github.com/prometheus/common/model"
    31  	"github.com/prometheus/prometheus/model/relabel"
    32  	"github.com/stretchr/testify/assert"
    33  	"github.com/stretchr/testify/require"
    34  
    35  	profilev1 "github.com/grafana/pyroscope/api/gen/proto/go/google/v1"
    36  	pushv1 "github.com/grafana/pyroscope/api/gen/proto/go/push/v1"
    37  	"github.com/grafana/pyroscope/api/gen/proto/go/push/v1/pushv1connect"
    38  	typesv1 "github.com/grafana/pyroscope/api/gen/proto/go/types/v1"
    39  	connectapi "github.com/grafana/pyroscope/pkg/api/connect"
    40  	"github.com/grafana/pyroscope/pkg/clientpool"
    41  	"github.com/grafana/pyroscope/pkg/distributor/annotation"
    42  	"github.com/grafana/pyroscope/pkg/distributor/ingestlimits"
    43  	distributormodel "github.com/grafana/pyroscope/pkg/distributor/model"
    44  	"github.com/grafana/pyroscope/pkg/distributor/sampling"
    45  	phlaremodel "github.com/grafana/pyroscope/pkg/model"
    46  	pprof2 "github.com/grafana/pyroscope/pkg/pprof"
    47  	pproftesthelper "github.com/grafana/pyroscope/pkg/pprof/testhelper"
    48  	"github.com/grafana/pyroscope/pkg/tenant"
    49  	"github.com/grafana/pyroscope/pkg/test/mocks/mockwritepath"
    50  	"github.com/grafana/pyroscope/pkg/testhelper"
    51  	"github.com/grafana/pyroscope/pkg/util"
    52  	"github.com/grafana/pyroscope/pkg/validation"
    53  )
    54  
    55  var ringConfig = util.CommonRingConfig{
    56  	KVStore:      kv.Config{Store: "inmemory"},
    57  	InstanceID:   "foo",
    58  	InstancePort: 8080,
    59  	InstanceAddr: "127.0.0.1",
    60  	ListenPort:   8080,
    61  }
    62  
    63  var (
    64  	clientOptions  = append(connectapi.DefaultClientOptions(), connect.WithInterceptors(tenant.NewAuthInterceptor(true)))
    65  	handlerOptions = append(connectapi.DefaultHandlerOptions(), connect.WithInterceptors(tenant.NewAuthInterceptor(true)))
    66  )
    67  
    68  type poolFactory struct {
    69  	f func(addr string) (client.PoolClient, error)
    70  }
    71  
    72  func (pf *poolFactory) FromInstance(inst ring.InstanceDesc) (client.PoolClient, error) {
    73  	return pf.f(inst.Addr)
    74  }
    75  
    76  func newTestDistributor(t testing.TB, logger log.Logger, overrides *validation.Overrides) (*Distributor, *fakeIngester, error) {
    77  	ing := newFakeIngester(t, false)
    78  	d, err := New(Config{
    79  		DistributorRing: ringConfig,
    80  	}, testhelper.NewMockRing([]ring.InstanceDesc{
    81  		{Addr: "foo"},
    82  	}, 3), &poolFactory{func(addr string) (client.PoolClient, error) {
    83  		return ing, nil
    84  	}}, overrides, nil, logger, nil)
    85  	return d, ing, err
    86  }
    87  
    88  func NewTestDistributor(t testing.TB, logger log.Logger, overrides *validation.Overrides) (*Distributor, error) {
    89  	d, _, err := newTestDistributor(t, logger, overrides)
    90  	return d, err
    91  }
    92  
    93  func Test_ConnectPush(t *testing.T) {
    94  	mux := http.NewServeMux()
    95  	d, ing, err := newTestDistributor(t,
    96  		log.NewLogfmtLogger(os.Stdout),
    97  		newOverrides(t),
    98  	)
    99  
   100  	require.NoError(t, err)
   101  	mux.Handle(pushv1connect.NewPusherServiceHandler(d, handlerOptions...))
   102  	s := httptest.NewServer(mux)
   103  	defer s.Close()
   104  
   105  	client := pushv1connect.NewPusherServiceClient(http.DefaultClient, s.URL, clientOptions...)
   106  	resp, err := client.Push(tenant.InjectTenantID(context.Background(), "foo"), connect.NewRequest(&pushv1.PushRequest{
   107  		Series: []*pushv1.RawProfileSeries{
   108  			{
   109  				Labels: []*typesv1.LabelPair{
   110  					{Name: "cluster", Value: "us-central1"},
   111  					{Name: phlaremodel.LabelNameServiceName, Value: "svc"},
   112  					{Name: "__name__", Value: "cpu"},
   113  				},
   114  				Samples: []*pushv1.RawSample{
   115  					{
   116  						RawProfile: collectTestProfileBytes(t),
   117  					},
   118  				},
   119  			},
   120  		},
   121  	}))
   122  	require.NoError(t, err)
   123  	require.NotNil(t, resp)
   124  	require.Equal(t, 3, len(ing.requests[0].Series))
   125  }
   126  
   127  func Test_Replication(t *testing.T) {
   128  	ingesters := map[string]*fakeIngester{
   129  		"1": newFakeIngester(t, false),
   130  		"2": newFakeIngester(t, false),
   131  		"3": newFakeIngester(t, true),
   132  	}
   133  	ctx := tenant.InjectTenantID(context.Background(), "foo")
   134  	req := connect.NewRequest(&pushv1.PushRequest{
   135  		Series: []*pushv1.RawProfileSeries{
   136  			{
   137  				Labels: []*typesv1.LabelPair{
   138  					{Name: "cluster", Value: "us-central1"},
   139  					{Name: phlaremodel.LabelNameServiceName, Value: "svc"},
   140  					{Name: "__name__", Value: "cpu"},
   141  				},
   142  				Samples: []*pushv1.RawSample{
   143  					{
   144  						RawProfile: collectTestProfileBytes(t),
   145  					},
   146  				},
   147  			},
   148  		},
   149  	})
   150  	d, err := New(Config{DistributorRing: ringConfig}, testhelper.NewMockRing([]ring.InstanceDesc{
   151  		{Addr: "1"},
   152  		{Addr: "2"},
   153  		{Addr: "3"},
   154  	}, 3), &poolFactory{f: func(addr string) (client.PoolClient, error) {
   155  		return ingesters[addr], nil
   156  	}}, newOverrides(t), nil, log.NewLogfmtLogger(os.Stdout), nil)
   157  	require.NoError(t, err)
   158  	// only 1 ingester failing should be fine.
   159  	resp, err := d.Push(ctx, req)
   160  	require.NoError(t, err)
   161  	require.NotNil(t, resp)
   162  	// 2 ingesters failing with a replication of 3 should return an error.
   163  	ingesters["2"].fail = true
   164  	resp, err = d.Push(ctx, req)
   165  	require.Error(t, err)
   166  	require.Nil(t, resp)
   167  }
   168  
   169  func Test_Subservices(t *testing.T) {
   170  	ing := newFakeIngester(t, false)
   171  	d, err := New(Config{
   172  		PoolConfig:      clientpool.PoolConfig{ClientCleanupPeriod: 1 * time.Second},
   173  		DistributorRing: ringConfig,
   174  	}, testhelper.NewMockRing([]ring.InstanceDesc{
   175  		{Addr: "foo"},
   176  	}, 1), &poolFactory{f: func(addr string) (client.PoolClient, error) {
   177  		return ing, nil
   178  	}}, newOverrides(t), nil, log.NewLogfmtLogger(os.Stdout), nil)
   179  
   180  	require.NoError(t, err)
   181  	require.NoError(t, d.StartAsync(context.Background()))
   182  	require.Eventually(t, func() bool {
   183  		fmt.Println(d.State())
   184  		return d.State() == services.Running && d.pool.State() == services.Running
   185  	}, 5*time.Second, 100*time.Millisecond)
   186  	d.StopAsync()
   187  	require.Eventually(t, func() bool {
   188  		fmt.Println(d.State())
   189  		return d.State() == services.Terminated && d.pool.State() == services.Terminated
   190  	}, 5*time.Second, 100*time.Millisecond)
   191  }
   192  
   193  func collectTestProfileBytes(t *testing.T) []byte {
   194  	t.Helper()
   195  
   196  	buf := bytes.NewBuffer(nil)
   197  	require.NoError(t, pprof.WriteHeapProfile(buf))
   198  	return buf.Bytes()
   199  }
   200  
   201  func hugeProfileBytes(t *testing.T) []byte {
   202  	t.Helper()
   203  	b := pproftesthelper.NewProfileBuilderWithLabels(time.Now().UnixNano(), nil)
   204  	p := b.CPUProfile()
   205  	for i := 0; i < 10_000; i++ {
   206  		p.ForStacktraceString(fmt.Sprintf("my_%d", i), "other").AddSamples(1)
   207  	}
   208  	bs, err := p.MarshalVT()
   209  	require.NoError(t, err)
   210  	return bs
   211  }
   212  
   213  type fakeIngester struct {
   214  	t        testing.TB
   215  	requests []*pushv1.PushRequest
   216  	fail     bool
   217  	testhelper.FakePoolClient
   218  
   219  	mtx sync.Mutex
   220  }
   221  
   222  func (i *fakeIngester) List(ctx context.Context, in *grpc_health_v1.HealthListRequest, opts ...grpc.CallOption) (*grpc_health_v1.HealthListResponse, error) {
   223  	return nil, errors.New("not implemented")
   224  }
   225  
   226  func (i *fakeIngester) Push(_ context.Context, req *connect.Request[pushv1.PushRequest]) (*connect.Response[pushv1.PushResponse], error) {
   227  	i.mtx.Lock()
   228  	defer i.mtx.Unlock()
   229  	i.requests = append(i.requests, req.Msg)
   230  	if i.fail {
   231  		return nil, errors.New("foo")
   232  	}
   233  	res := connect.NewResponse(&pushv1.PushResponse{})
   234  	return res, nil
   235  }
   236  
   237  func newFakeIngester(t testing.TB, fail bool) *fakeIngester {
   238  	return &fakeIngester{t: t, fail: fail}
   239  }
   240  
   241  func Test_Limits(t *testing.T) {
   242  	type testCase struct {
   243  		description              string
   244  		pushReq                  *pushv1.PushRequest
   245  		overrides                *validation.Overrides
   246  		expectedCode             connect.Code
   247  		expectedValidationReason validation.Reason
   248  	}
   249  
   250  	testCases := []testCase{
   251  		{
   252  			description: "rate_limit",
   253  			pushReq: &pushv1.PushRequest{
   254  				Series: []*pushv1.RawProfileSeries{
   255  					{
   256  						Labels: []*typesv1.LabelPair{
   257  							{Name: "cluster", Value: "us-central1"},
   258  							{Name: phlaremodel.LabelNameServiceName, Value: "svc"},
   259  							{Name: "__name__", Value: "cpu"},
   260  						},
   261  						Samples: []*pushv1.RawSample{
   262  							{
   263  								RawProfile: collectTestProfileBytes(t),
   264  							},
   265  						},
   266  					},
   267  				},
   268  			},
   269  			overrides: validation.MockOverrides(func(defaults *validation.Limits, tenantLimits map[string]*validation.Limits) {
   270  				l := validation.MockDefaultLimits()
   271  				l.IngestionRateMB = 0.0150
   272  				l.IngestionBurstSizeMB = 0.0015
   273  				tenantLimits["user-1"] = l
   274  			}),
   275  			expectedCode:             connect.CodeResourceExhausted,
   276  			expectedValidationReason: validation.RateLimited,
   277  		},
   278  		{
   279  			description: "rate_limit_invalid_profile",
   280  			pushReq: &pushv1.PushRequest{
   281  				Series: []*pushv1.RawProfileSeries{
   282  					{
   283  						Labels: []*typesv1.LabelPair{
   284  							{Name: "__name__", Value: "cpu"},
   285  							{Name: phlaremodel.LabelNameServiceName, Value: "svc"},
   286  						},
   287  						Samples: []*pushv1.RawSample{{
   288  							RawProfile: hugeProfileBytes(t),
   289  						}},
   290  					},
   291  				},
   292  			},
   293  			overrides: validation.MockOverrides(func(defaults *validation.Limits, tenantLimits map[string]*validation.Limits) {
   294  				l := validation.MockDefaultLimits()
   295  				l.IngestionBurstSizeMB = 0.0015
   296  				l.MaxProfileStacktraceSamples = 100
   297  				tenantLimits["user-1"] = l
   298  			}),
   299  			expectedCode:             connect.CodeResourceExhausted,
   300  			expectedValidationReason: validation.RateLimited,
   301  		},
   302  		{
   303  			description: "labels_limit",
   304  			pushReq: &pushv1.PushRequest{
   305  				Series: []*pushv1.RawProfileSeries{
   306  					{
   307  						Labels: []*typesv1.LabelPair{
   308  							{Name: "clusterdddwqdqdqdqdqdqw", Value: "us-central1"},
   309  							{Name: "__name__", Value: "cpu"},
   310  							{Name: phlaremodel.LabelNameServiceName, Value: "svc"},
   311  						},
   312  						Samples: []*pushv1.RawSample{
   313  							{
   314  								RawProfile: collectTestProfileBytes(t),
   315  							},
   316  						},
   317  					},
   318  				},
   319  			},
   320  			overrides: validation.MockOverrides(func(defaults *validation.Limits, tenantLimits map[string]*validation.Limits) {
   321  				l := validation.MockDefaultLimits()
   322  				l.MaxLabelNameLength = 12
   323  				tenantLimits["user-1"] = l
   324  			}),
   325  			expectedCode:             connect.CodeInvalidArgument,
   326  			expectedValidationReason: validation.LabelNameTooLong,
   327  		},
   328  	}
   329  
   330  	for _, tc := range testCases {
   331  		tc := tc
   332  		t.Run(tc.description, func(t *testing.T) {
   333  			mux := http.NewServeMux()
   334  			ing := newFakeIngester(t, false)
   335  			d, err := New(Config{
   336  				DistributorRing: ringConfig,
   337  			}, testhelper.NewMockRing([]ring.InstanceDesc{
   338  				{Addr: "foo"},
   339  			}, 3), &poolFactory{f: func(addr string) (client.PoolClient, error) {
   340  				return ing, nil
   341  			}}, tc.overrides, nil, log.NewLogfmtLogger(os.Stdout), nil)
   342  
   343  			require.NoError(t, err)
   344  
   345  			expectedMetricDelta := map[prometheus.Collector]float64{
   346  				validation.DiscardedBytes.WithLabelValues(string(tc.expectedValidationReason), "user-1"): float64(uncompressedProfileSize(t, tc.pushReq)),
   347  				// todo make sure pyroscope_distributor_received_decompressed_bytes_sum is not incremented
   348  			}
   349  			m1 := metricsDump(expectedMetricDelta)
   350  
   351  			mux.Handle(pushv1connect.NewPusherServiceHandler(d, handlerOptions...))
   352  			s := httptest.NewServer(mux)
   353  			defer s.Close()
   354  
   355  			client := pushv1connect.NewPusherServiceClient(http.DefaultClient, s.URL, clientOptions...)
   356  			resp, err := client.Push(tenant.InjectTenantID(context.Background(), "user-1"), connect.NewRequest(tc.pushReq))
   357  			require.Error(t, err)
   358  			require.Equal(t, tc.expectedCode, connect.CodeOf(err))
   359  			require.Nil(t, resp)
   360  			expectMetricsChange(t, m1, metricsDump(expectedMetricDelta), expectedMetricDelta)
   361  		})
   362  	}
   363  }
   364  
   365  func Test_Sessions_Limit(t *testing.T) {
   366  	type testCase struct {
   367  		description    string
   368  		seriesLabels   phlaremodel.Labels
   369  		expectedLabels phlaremodel.Labels
   370  		maxSessions    int
   371  	}
   372  
   373  	testCases := []testCase{
   374  		{
   375  			description: "session_disabled",
   376  			seriesLabels: []*typesv1.LabelPair{
   377  				{Name: "cluster", Value: "us-central1"},
   378  				{Name: phlaremodel.LabelNameServiceName, Value: "svc"},
   379  				{Name: phlaremodel.LabelNameSessionID, Value: phlaremodel.SessionID(1).String()},
   380  				{Name: "__name__", Value: "cpu"},
   381  			},
   382  			expectedLabels: []*typesv1.LabelPair{
   383  				{Name: "cluster", Value: "us-central1"},
   384  				{Name: phlaremodel.LabelNameServiceName, Value: "svc"},
   385  				{Name: "__name__", Value: "cpu"},
   386  			},
   387  			maxSessions: 0,
   388  		},
   389  		{
   390  			description: "session_limited",
   391  			seriesLabels: []*typesv1.LabelPair{
   392  				{Name: "cluster", Value: "us-central1"},
   393  				{Name: phlaremodel.LabelNameServiceName, Value: "svc"},
   394  				{Name: phlaremodel.LabelNameSessionID, Value: phlaremodel.SessionID(4).String()},
   395  				{Name: "__name__", Value: "cpu"},
   396  			},
   397  			expectedLabels: []*typesv1.LabelPair{
   398  				{Name: "cluster", Value: "us-central1"},
   399  				{Name: phlaremodel.LabelNameServiceName, Value: "svc"},
   400  				{Name: phlaremodel.LabelNameSessionID, Value: phlaremodel.SessionID(1).String()},
   401  				{Name: "__name__", Value: "cpu"},
   402  			},
   403  			maxSessions: 3,
   404  		},
   405  		{
   406  			description: "session_not_specified",
   407  			seriesLabels: []*typesv1.LabelPair{
   408  				{Name: "cluster", Value: "us-central1"},
   409  				{Name: phlaremodel.LabelNameServiceName, Value: "svc"},
   410  				{Name: "__name__", Value: "cpu"},
   411  			},
   412  			expectedLabels: []*typesv1.LabelPair{
   413  				{Name: "cluster", Value: "us-central1"},
   414  				{Name: phlaremodel.LabelNameServiceName, Value: "svc"},
   415  				{Name: "__name__", Value: "cpu"},
   416  			},
   417  			maxSessions: 3,
   418  		},
   419  	}
   420  
   421  	for _, tc := range testCases {
   422  		tc := tc
   423  		t.Run(tc.description, func(t *testing.T) {
   424  			ing := newFakeIngester(t, false)
   425  			d, err := New(
   426  				Config{DistributorRing: ringConfig},
   427  				testhelper.NewMockRing([]ring.InstanceDesc{{Addr: "foo"}}, 3),
   428  				&poolFactory{f: func(addr string) (client.PoolClient, error) { return ing, nil }},
   429  				validation.MockOverrides(func(defaults *validation.Limits, tenantLimits map[string]*validation.Limits) {
   430  					l := validation.MockDefaultLimits()
   431  					l.MaxSessionsPerSeries = tc.maxSessions
   432  					tenantLimits["user-1"] = l
   433  				}), nil, log.NewLogfmtLogger(os.Stdout), nil)
   434  
   435  			require.NoError(t, err)
   436  			limit := d.limits.MaxSessionsPerSeries("user-1")
   437  			assert.Equal(t, tc.expectedLabels, d.limitMaxSessionsPerSeries(limit, tc.seriesLabels))
   438  		})
   439  	}
   440  }
   441  
   442  func Test_IngestLimits(t *testing.T) {
   443  	type testCase struct {
   444  		description        string
   445  		pushReq            *distributormodel.PushRequest
   446  		overrides          *validation.Overrides
   447  		verifyExpectations func(t *testing.T, err error, req *distributormodel.PushRequest)
   448  	}
   449  
   450  	testCases := []testCase{
   451  		{
   452  			description: "ingest_limit_reached_no_series",
   453  			pushReq:     &distributormodel.PushRequest{},
   454  			overrides: validation.MockOverrides(func(defaults *validation.Limits, tenantLimits map[string]*validation.Limits) {
   455  				l := validation.MockDefaultLimits()
   456  				l.IngestionLimit = &ingestlimits.Config{
   457  					PeriodType:     "hour",
   458  					PeriodLimitMb:  128,
   459  					LimitResetTime: 1737721086,
   460  					LimitReached:   true,
   461  					Sampling: ingestlimits.SamplingConfig{
   462  						NumRequests: 0,
   463  						Period:      time.Minute,
   464  					},
   465  				}
   466  				tenantLimits["user-1"] = l
   467  			}),
   468  			verifyExpectations: func(t *testing.T, err error, req *distributormodel.PushRequest) {
   469  				require.Error(t, err)
   470  				require.Equal(t, connect.CodeInvalidArgument, connect.CodeOf(err))
   471  			},
   472  		},
   473  		{
   474  			description: "ingest_limit_reached_no_profile",
   475  			pushReq:     &distributormodel.PushRequest{Series: []*distributormodel.ProfileSeries{{}}},
   476  			overrides: validation.MockOverrides(func(defaults *validation.Limits, tenantLimits map[string]*validation.Limits) {
   477  				l := validation.MockDefaultLimits()
   478  				l.IngestionLimit = &ingestlimits.Config{
   479  					PeriodType:     "hour",
   480  					PeriodLimitMb:  128,
   481  					LimitResetTime: 1737721086,
   482  					LimitReached:   true,
   483  					Sampling: ingestlimits.SamplingConfig{
   484  						NumRequests: 0,
   485  						Period:      time.Minute,
   486  					},
   487  				}
   488  				tenantLimits["user-1"] = l
   489  			}),
   490  			verifyExpectations: func(t *testing.T, err error, req *distributormodel.PushRequest) {
   491  				require.Error(t, err)
   492  				require.Equal(t, connect.CodeInvalidArgument, connect.CodeOf(err))
   493  			},
   494  		},
   495  		{
   496  			description: "ingest_limit_reached",
   497  			pushReq: &distributormodel.PushRequest{Series: []*distributormodel.ProfileSeries{{
   498  				Profile: pprof2.RawFromProto(testProfile(1)),
   499  			}}},
   500  			overrides: validation.MockOverrides(func(defaults *validation.Limits, tenantLimits map[string]*validation.Limits) {
   501  				l := validation.MockDefaultLimits()
   502  				l.IngestionLimit = &ingestlimits.Config{
   503  					PeriodType:     "hour",
   504  					PeriodLimitMb:  128,
   505  					LimitResetTime: 1737721086,
   506  					LimitReached:   true,
   507  					Sampling: ingestlimits.SamplingConfig{
   508  						NumRequests: 0,
   509  						Period:      time.Minute,
   510  					},
   511  				}
   512  				tenantLimits["user-1"] = l
   513  			}),
   514  			verifyExpectations: func(t *testing.T, err error, req *distributormodel.PushRequest) {
   515  				require.Error(t, err)
   516  				require.Equal(t, connect.CodeResourceExhausted, connect.CodeOf(err))
   517  			},
   518  		},
   519  		{
   520  			description: "ingest_limit_reached_sampling",
   521  			pushReq: &distributormodel.PushRequest{
   522  				Series: []*distributormodel.ProfileSeries{
   523  					{
   524  						Labels: []*typesv1.LabelPair{
   525  							{Name: "__name__", Value: "cpu"},
   526  							{Name: phlaremodel.LabelNameServiceName, Value: "svc"},
   527  						},
   528  						Profile: pprof2.RawFromProto(testProfile(1)),
   529  					},
   530  				},
   531  			},
   532  			overrides: validation.MockOverrides(func(defaults *validation.Limits, tenantLimits map[string]*validation.Limits) {
   533  				l := validation.MockDefaultLimits()
   534  				l.IngestionLimit = &ingestlimits.Config{
   535  					PeriodType:     "hour",
   536  					PeriodLimitMb:  128,
   537  					LimitResetTime: 1737721086,
   538  					LimitReached:   true,
   539  					Sampling: ingestlimits.SamplingConfig{
   540  						NumRequests: 1,
   541  						Period:      time.Minute,
   542  					},
   543  				}
   544  				tenantLimits["user-1"] = l
   545  			}),
   546  			verifyExpectations: func(t *testing.T, err error, req *distributormodel.PushRequest) {
   547  				require.NoError(t, err)
   548  				require.Equal(t, 1, len(req.Series[0].Annotations))
   549  				// annotations are json encoded and contain some of the limit config fields
   550  				require.True(t, strings.Contains(req.Series[0].Annotations[0].Value, "\"periodLimitMb\":128"))
   551  			},
   552  		},
   553  		{
   554  			description: "ingest_limit_reached_with_sampling_error",
   555  			pushReq: &distributormodel.PushRequest{
   556  				Series: []*distributormodel.ProfileSeries{
   557  					{
   558  						Labels: []*typesv1.LabelPair{
   559  							{Name: "__name__", Value: "cpu"},
   560  							{Name: phlaremodel.LabelNameServiceName, Value: "svc"},
   561  						},
   562  						Profile: pprof2.RawFromProto(testProfile(1)),
   563  					},
   564  				},
   565  			},
   566  			overrides: validation.MockOverrides(func(defaults *validation.Limits, tenantLimits map[string]*validation.Limits) {
   567  				l := validation.MockDefaultLimits()
   568  				l.IngestionLimit = &ingestlimits.Config{
   569  					PeriodType:     "hour",
   570  					PeriodLimitMb:  128,
   571  					LimitResetTime: 1737721086,
   572  					LimitReached:   true,
   573  					Sampling: ingestlimits.SamplingConfig{
   574  						NumRequests: 0,
   575  						Period:      time.Minute,
   576  					},
   577  				}
   578  				tenantLimits["user-1"] = l
   579  			}),
   580  			verifyExpectations: func(t *testing.T, err error, req *distributormodel.PushRequest) {
   581  				require.Error(t, err)
   582  				require.Equal(t, connect.CodeResourceExhausted, connect.CodeOf(err))
   583  				require.Empty(t, req.Series[0].Annotations)
   584  			},
   585  		},
   586  		{
   587  			description: "ingest_limit_reached_with_multiple_usage_groups",
   588  			pushReq: &distributormodel.PushRequest{
   589  				Series: []*distributormodel.ProfileSeries{
   590  					{
   591  						Labels: []*typesv1.LabelPair{
   592  							{Name: "__name__", Value: "cpu"},
   593  							{Name: phlaremodel.LabelNameServiceName, Value: "svc1"},
   594  						},
   595  						RawProfile: collectTestProfileBytes(t),
   596  						Profile: pprof2.RawFromProto(&profilev1.Profile{
   597  							Sample: []*profilev1.Sample{{
   598  								Value: []int64{1},
   599  							}},
   600  							StringTable: []string{""},
   601  						}),
   602  					},
   603  					{
   604  						Labels: []*typesv1.LabelPair{
   605  							{Name: "__name__", Value: "cpu"},
   606  							{Name: phlaremodel.LabelNameServiceName, Value: "svc2"},
   607  						},
   608  						Profile: pprof2.RawFromProto(testProfile(1)),
   609  					},
   610  				},
   611  			},
   612  			overrides: validation.MockOverrides(func(defaults *validation.Limits, tenantLimits map[string]*validation.Limits) {
   613  				l := validation.MockDefaultLimits()
   614  				l.IngestionLimit = &ingestlimits.Config{
   615  					PeriodType:     "hour",
   616  					PeriodLimitMb:  128,
   617  					LimitResetTime: 1737721086,
   618  					LimitReached:   false,
   619  					UsageGroups: map[string]ingestlimits.UsageGroup{
   620  						"group-1": {
   621  							PeriodLimitMb: 64,
   622  							LimitReached:  true,
   623  						},
   624  						"group-2": {
   625  							PeriodLimitMb: 32,
   626  							LimitReached:  true,
   627  						},
   628  					},
   629  				}
   630  				usageGroupCfg, err := validation.NewUsageGroupConfig(map[string]string{
   631  					"group-1": "{service_name=\"svc1\"}",
   632  					"group-2": "{service_name=\"svc2\"}",
   633  				})
   634  				require.NoError(t, err)
   635  				l.DistributorUsageGroups = usageGroupCfg
   636  				tenantLimits["user-1"] = l
   637  			}),
   638  			verifyExpectations: func(t *testing.T, err error, req *distributormodel.PushRequest) {
   639  				require.Error(t, err)
   640  				require.Equal(t, connect.CodeResourceExhausted, connect.CodeOf(err))
   641  				require.Empty(t, req.Series[0].Annotations)
   642  				require.Empty(t, req.Series[1].Annotations)
   643  			},
   644  		},
   645  		{
   646  			description: "ingest_limit_reached_with_sampling_and_usage_groups",
   647  			pushReq: &distributormodel.PushRequest{
   648  				Series: []*distributormodel.ProfileSeries{
   649  					{
   650  						Labels: []*typesv1.LabelPair{
   651  							{Name: "__name__", Value: "cpu"},
   652  							{Name: phlaremodel.LabelNameServiceName, Value: "svc"},
   653  						},
   654  						Profile: pprof2.RawFromProto(testProfile(1)),
   655  					},
   656  				},
   657  			},
   658  			overrides: validation.MockOverrides(func(defaults *validation.Limits, tenantLimits map[string]*validation.Limits) {
   659  				l := validation.MockDefaultLimits()
   660  				l.IngestionLimit = &ingestlimits.Config{
   661  					PeriodType:     "hour",
   662  					PeriodLimitMb:  128,
   663  					LimitResetTime: 1737721086,
   664  					LimitReached:   true,
   665  					Sampling: ingestlimits.SamplingConfig{
   666  						NumRequests: 100,
   667  						Period:      time.Minute,
   668  					},
   669  					UsageGroups: map[string]ingestlimits.UsageGroup{
   670  						"group-1": {
   671  							PeriodLimitMb: 64,
   672  							LimitReached:  true,
   673  						},
   674  					},
   675  				}
   676  				usageGroupCfg, err := validation.NewUsageGroupConfig(map[string]string{
   677  					"group-1": "{service_name=\"svc\"}",
   678  				})
   679  				require.NoError(t, err)
   680  				l.DistributorUsageGroups = usageGroupCfg
   681  				tenantLimits["user-1"] = l
   682  			}),
   683  			verifyExpectations: func(t *testing.T, err error, req *distributormodel.PushRequest) {
   684  				require.NoError(t, err)
   685  				require.Len(t, req.Series[0].Annotations, 2)
   686  				assert.Contains(t, req.Series[0].Annotations[0].Value, "\"periodLimitMb\":128")
   687  				assert.Contains(t, req.Series[0].Annotations[1].Value, "\"usageGroup\":\"group-1\"")
   688  				assert.Contains(t, req.Series[0].Annotations[1].Value, "\"periodLimitMb\":64")
   689  			},
   690  		},
   691  	}
   692  	for _, tc := range testCases {
   693  		t.Run(tc.description, func(t *testing.T) {
   694  			ing := newFakeIngester(t, false)
   695  			d, err := New(Config{
   696  				DistributorRing: ringConfig,
   697  			}, testhelper.NewMockRing([]ring.InstanceDesc{
   698  				{Addr: "foo"},
   699  			}, 3), &poolFactory{f: func(addr string) (client.PoolClient, error) {
   700  				return ing, nil
   701  			}}, tc.overrides, nil, log.NewLogfmtLogger(os.Stdout), nil)
   702  			require.NoError(t, err)
   703  
   704  			err = d.PushBatch(tenant.InjectTenantID(context.Background(), "user-1"), tc.pushReq)
   705  			tc.verifyExpectations(t, err, tc.pushReq)
   706  		})
   707  	}
   708  }
   709  
   710  func Test_SampleLabels_Ingester(t *testing.T) {
   711  	o := validation.MockDefaultOverrides()
   712  	defaultRelabelConfigs := o.IngestionRelabelingRules("")
   713  
   714  	type testCase struct {
   715  		description           string
   716  		pushReq               *distributormodel.ProfileSeries
   717  		expectedSeries        []*distributormodel.ProfileSeries
   718  		relabelRules          []*relabel.Config
   719  		expectBytesDropped    float64
   720  		expectProfilesDropped float64
   721  		expectError           error
   722  	}
   723  	const dummyTenantID = "tenant1"
   724  
   725  	testCases := []testCase{
   726  		{
   727  			description: "no series labels, no sample labels",
   728  			pushReq: &distributormodel.ProfileSeries{
   729  				TenantID: dummyTenantID,
   730  				Profile: pprof2.RawFromProto(&profilev1.Profile{
   731  					Sample: []*profilev1.Sample{{
   732  						Value: []int64{1},
   733  					}},
   734  				}),
   735  			},
   736  			expectError: connect.NewError(connect.CodeInvalidArgument, validation.NewErrorf(validation.MissingLabels, validation.MissingLabelsErrorMsg)),
   737  		},
   738  		{
   739  			description: "validation error propagation and accounting",
   740  			pushReq: &distributormodel.ProfileSeries{
   741  				TenantID: dummyTenantID,
   742  				Labels: []*typesv1.LabelPair{
   743  					{Name: "foo", Value: "bar"},
   744  				},
   745  				Profile: pprof2.RawFromProto(&profilev1.Profile{
   746  					Sample: []*profilev1.Sample{{
   747  						Value: []int64{1},
   748  					}},
   749  				}),
   750  			},
   751  			expectError: connect.NewError(connect.CodeInvalidArgument, fmt.Errorf(`invalid labels '{foo="bar"}' with error: invalid metric name`)),
   752  		},
   753  		{
   754  			description: "has series labels, no sample labels",
   755  			pushReq: &distributormodel.ProfileSeries{
   756  				TenantID: dummyTenantID,
   757  				Labels: []*typesv1.LabelPair{
   758  					{Name: "service_name", Value: "service"},
   759  					{Name: "__name__", Value: "cpu"},
   760  					{Name: "foo", Value: "bar"},
   761  				},
   762  				Profile: pprof2.RawFromProto(&profilev1.Profile{
   763  					Sample: []*profilev1.Sample{{
   764  						Value: []int64{1},
   765  					}},
   766  				}),
   767  			},
   768  			expectedSeries: []*distributormodel.ProfileSeries{
   769  				{
   770  					Labels: []*typesv1.LabelPair{
   771  						{Name: "__name__", Value: "cpu"},
   772  						{Name: "foo", Value: "bar"},
   773  						{Name: "service_name", Value: "service"},
   774  					},
   775  					Profile: pprof2.RawFromProto(&profilev1.Profile{
   776  						Sample: []*profilev1.Sample{{
   777  							Value: []int64{1},
   778  						}},
   779  					}),
   780  				},
   781  			},
   782  		},
   783  		{
   784  			description: "all samples have identical label set",
   785  			pushReq: &distributormodel.ProfileSeries{
   786  				TenantID: dummyTenantID,
   787  				Labels: []*typesv1.LabelPair{
   788  					{Name: "service_name", Value: "service"},
   789  					{Name: "__name__", Value: "cpu"},
   790  				},
   791  				Profile: pprof2.RawFromProto(&profilev1.Profile{
   792  					StringTable: []string{"", "foo", "bar"},
   793  					Sample: []*profilev1.Sample{{
   794  						Value: []int64{1},
   795  						Label: []*profilev1.Label{
   796  							{Key: 1, Str: 2},
   797  						},
   798  					}},
   799  				}),
   800  			},
   801  			expectedSeries: []*distributormodel.ProfileSeries{
   802  				{
   803  					Labels: []*typesv1.LabelPair{
   804  						{Name: "__name__", Value: "cpu"},
   805  						{Name: "foo", Value: "bar"},
   806  						{Name: "service_name", Value: "service"},
   807  					},
   808  					Profile: pprof2.RawFromProto(&profilev1.Profile{
   809  						StringTable: []string{"", "foo", "bar"},
   810  						Sample: []*profilev1.Sample{{
   811  							Value: []int64{1},
   812  							Label: []*profilev1.Label{},
   813  						}},
   814  					}),
   815  				},
   816  			},
   817  		},
   818  		{
   819  			description: "has series labels, all samples have identical label set",
   820  			pushReq: &distributormodel.ProfileSeries{
   821  				TenantID: dummyTenantID,
   822  				Labels: []*typesv1.LabelPair{
   823  					{Name: "service_name", Value: "service"},
   824  					{Name: "__name__", Value: "cpu"},
   825  					{Name: "baz", Value: "qux"},
   826  				},
   827  				Profile: pprof2.RawFromProto(&profilev1.Profile{
   828  					StringTable: []string{"", "foo", "bar"},
   829  					Sample: []*profilev1.Sample{{
   830  						Value: []int64{1},
   831  						Label: []*profilev1.Label{
   832  							{Key: 1, Str: 2},
   833  						},
   834  					}},
   835  				}),
   836  			},
   837  			expectedSeries: []*distributormodel.ProfileSeries{
   838  				{
   839  					Labels: []*typesv1.LabelPair{
   840  						{Name: "__name__", Value: "cpu"},
   841  						{Name: "baz", Value: "qux"},
   842  						{Name: "foo", Value: "bar"},
   843  						{Name: "service_name", Value: "service"},
   844  					},
   845  					Profile: pprof2.RawFromProto(&profilev1.Profile{
   846  						StringTable: []string{"", "foo", "bar"},
   847  						Sample: []*profilev1.Sample{{
   848  							Value: []int64{1},
   849  							Label: []*profilev1.Label{},
   850  						}},
   851  					}),
   852  				},
   853  			},
   854  		},
   855  		{
   856  			description: "has series labels, and the only sample label name overlaps with series label, creating overlapping groups",
   857  			pushReq: &distributormodel.ProfileSeries{
   858  				TenantID: dummyTenantID,
   859  				Labels: []*typesv1.LabelPair{
   860  					{Name: "service_name", Value: "service"},
   861  					{Name: "__name__", Value: "cpu"},
   862  					{Name: "foo", Value: "bar"},
   863  					{Name: "baz", Value: "qux"},
   864  				},
   865  				Profile: pprof2.RawFromProto(&profilev1.Profile{
   866  					StringTable: []string{"", "foo", "bar"},
   867  					Sample: []*profilev1.Sample{
   868  						{
   869  							Value: []int64{1},
   870  							Label: []*profilev1.Label{
   871  								{Key: 1, Str: 2},
   872  							},
   873  						},
   874  						{
   875  							Value: []int64{2},
   876  						},
   877  					},
   878  				}),
   879  			},
   880  			expectedSeries: []*distributormodel.ProfileSeries{
   881  				{
   882  					Labels: []*typesv1.LabelPair{
   883  						{Name: "__name__", Value: "cpu"},
   884  						{Name: "baz", Value: "qux"},
   885  						{Name: "foo", Value: "bar"},
   886  						{Name: "service_name", Value: "service"},
   887  					},
   888  
   889  					Profile: pprof2.RawFromProto(&profilev1.Profile{
   890  						StringTable: []string{"", "foo", "bar"},
   891  						Sample: []*profilev1.Sample{
   892  							{
   893  								Value: []int64{3},
   894  								Label: nil,
   895  							},
   896  						},
   897  					}),
   898  				},
   899  			},
   900  		},
   901  		{
   902  			description: "has series labels, samples have distinct label sets",
   903  			pushReq: &distributormodel.ProfileSeries{
   904  				TenantID: dummyTenantID,
   905  				Labels: []*typesv1.LabelPair{
   906  					{Name: "service_name", Value: "service"},
   907  					{Name: "__name__", Value: "cpu"},
   908  					{Name: "baz", Value: "qux"},
   909  				},
   910  
   911  				Profile: pprof2.RawFromProto(&profilev1.Profile{
   912  					StringTable: []string{"", "foo", "bar", "waldo", "fred"},
   913  					Sample: []*profilev1.Sample{
   914  						{
   915  							Value: []int64{1},
   916  							Label: []*profilev1.Label{
   917  								{Key: 1, Str: 2},
   918  							},
   919  						},
   920  						{
   921  							Value: []int64{2},
   922  							Label: []*profilev1.Label{
   923  								{Key: 3, Str: 4},
   924  							},
   925  						},
   926  					},
   927  				}),
   928  			},
   929  			expectedSeries: []*distributormodel.ProfileSeries{
   930  				{
   931  					TenantID: dummyTenantID,
   932  					Labels: []*typesv1.LabelPair{
   933  						{Name: "__name__", Value: "cpu"},
   934  						{Name: "baz", Value: "qux"},
   935  						{Name: "foo", Value: "bar"},
   936  						{Name: "service_name", Value: "service"},
   937  					},
   938  
   939  					Profile: pprof2.RawFromProto(&profilev1.Profile{
   940  						StringTable: []string{""},
   941  						Sample: []*profilev1.Sample{{
   942  							Value: []int64{1},
   943  							Label: []*profilev1.Label{},
   944  						}},
   945  					}),
   946  				},
   947  				{
   948  					TenantID: dummyTenantID,
   949  					Labels: []*typesv1.LabelPair{
   950  						{Name: "__name__", Value: "cpu"},
   951  						{Name: "baz", Value: "qux"},
   952  						{Name: "service_name", Value: "service"},
   953  						{Name: "waldo", Value: "fred"},
   954  					},
   955  
   956  					Profile: pprof2.RawFromProto(&profilev1.Profile{
   957  						StringTable: []string{""},
   958  						Sample: []*profilev1.Sample{{
   959  							Value: []int64{2},
   960  							Label: []*profilev1.Label{},
   961  						}},
   962  					}),
   963  				},
   964  			},
   965  		},
   966  		{
   967  			description:  "has series labels that should be renamed to no longer include godeltaprof",
   968  			relabelRules: defaultRelabelConfigs,
   969  			pushReq: &distributormodel.ProfileSeries{
   970  				TenantID: dummyTenantID,
   971  				Labels: []*typesv1.LabelPair{
   972  					{Name: "__name__", Value: "godeltaprof_memory"},
   973  					{Name: "service_name", Value: "service"},
   974  				},
   975  
   976  				Profile: pprof2.RawFromProto(&profilev1.Profile{
   977  					StringTable: []string{""},
   978  					Sample: []*profilev1.Sample{{
   979  						Value: []int64{2},
   980  						Label: []*profilev1.Label{},
   981  					}},
   982  				}),
   983  			},
   984  			expectedSeries: []*distributormodel.ProfileSeries{
   985  				{
   986  					Labels: []*typesv1.LabelPair{
   987  						{Name: "__delta__", Value: "false"},
   988  						{Name: "__name__", Value: "memory"},
   989  						{Name: "__name_replaced__", Value: "godeltaprof_memory"},
   990  						{Name: "service_name", Value: "service"},
   991  					},
   992  
   993  					Profile: pprof2.RawFromProto(&profilev1.Profile{
   994  						StringTable: []string{""},
   995  						Sample: []*profilev1.Sample{{
   996  							Value: []int64{2},
   997  							Label: []*profilev1.Label{},
   998  						}},
   999  					}),
  1000  				},
  1001  			},
  1002  		},
  1003  		{
  1004  			description: "has series labels and sample label, which relabel rules drop",
  1005  			relabelRules: []*relabel.Config{
  1006  				{Action: relabel.Drop, SourceLabels: []model.LabelName{"__name__", "span_name"}, Separator: "/", Regex: relabel.MustNewRegexp("unwanted/randomness")},
  1007  			},
  1008  			pushReq: &distributormodel.ProfileSeries{
  1009  				TenantID: dummyTenantID,
  1010  				Labels: []*typesv1.LabelPair{
  1011  					{Name: "__name__", Value: "unwanted"},
  1012  					{Name: "service_name", Value: "service"},
  1013  				},
  1014  
  1015  				Profile: pprof2.RawFromProto(&profilev1.Profile{
  1016  					StringTable: []string{"", "span_name", "randomness"},
  1017  					Sample: []*profilev1.Sample{
  1018  						{
  1019  							Value: []int64{2},
  1020  							Label: []*profilev1.Label{
  1021  								{Key: 1, Str: 2},
  1022  							},
  1023  						},
  1024  						{
  1025  							Value: []int64{1},
  1026  						},
  1027  					},
  1028  				}),
  1029  			},
  1030  			expectProfilesDropped: 0,
  1031  			expectBytesDropped:    3,
  1032  			expectedSeries: []*distributormodel.ProfileSeries{
  1033  				{
  1034  					TenantID: dummyTenantID,
  1035  					Labels: []*typesv1.LabelPair{
  1036  						{Name: "__name__", Value: "unwanted"},
  1037  						{Name: "service_name", Value: "service"},
  1038  					},
  1039  
  1040  					Profile: pprof2.RawFromProto(&profilev1.Profile{
  1041  						StringTable: []string{""},
  1042  						Sample: []*profilev1.Sample{{
  1043  							Value: []int64{1},
  1044  						}},
  1045  					}),
  1046  				},
  1047  			},
  1048  		},
  1049  		{
  1050  			description: "has series/sample labels, drops everything",
  1051  			relabelRules: []*relabel.Config{
  1052  				{Action: relabel.Drop, Regex: relabel.MustNewRegexp(".*")},
  1053  			},
  1054  			pushReq: &distributormodel.ProfileSeries{
  1055  				TenantID: dummyTenantID,
  1056  				Labels: []*typesv1.LabelPair{
  1057  					{Name: "__name__", Value: "unwanted"},
  1058  					{Name: "service_name", Value: "service"},
  1059  				},
  1060  
  1061  				Profile: pprof2.RawFromProto(&profilev1.Profile{
  1062  					StringTable: []string{"", "span_name", "randomness"},
  1063  					Sample: []*profilev1.Sample{
  1064  						{
  1065  							Value: []int64{2},
  1066  							Label: []*profilev1.Label{
  1067  								{Key: 1, Str: 2},
  1068  							},
  1069  						},
  1070  						{
  1071  							Value: []int64{1},
  1072  						},
  1073  					},
  1074  				}),
  1075  			},
  1076  			expectProfilesDropped: 1,
  1077  			expectBytesDropped:    6,
  1078  		},
  1079  		{
  1080  			description: "has series labels / sample rules, drops samples label",
  1081  			relabelRules: []*relabel.Config{
  1082  				{Action: relabel.Replace, Regex: relabel.MustNewRegexp(".*"), Replacement: "", TargetLabel: "span_name"},
  1083  			},
  1084  			pushReq: &distributormodel.ProfileSeries{
  1085  				TenantID: dummyTenantID,
  1086  				Labels: []*typesv1.LabelPair{
  1087  					{Name: "__name__", Value: "unwanted"},
  1088  					{Name: "service_name", Value: "service"},
  1089  				},
  1090  
  1091  				Profile: pprof2.RawFromProto(&profilev1.Profile{
  1092  					StringTable: []string{"", "span_name", "randomness"},
  1093  					Sample: []*profilev1.Sample{
  1094  						{
  1095  							Value: []int64{2},
  1096  							Label: []*profilev1.Label{
  1097  								{Key: 1, Str: 2},
  1098  							},
  1099  						},
  1100  						{
  1101  							Value: []int64{1},
  1102  						},
  1103  					},
  1104  				}),
  1105  			},
  1106  			expectedSeries: []*distributormodel.ProfileSeries{
  1107  				{
  1108  					Labels: []*typesv1.LabelPair{
  1109  						{Name: "__name__", Value: "unwanted"},
  1110  						{Name: "service_name", Value: "service"},
  1111  					},
  1112  
  1113  					Profile: pprof2.RawFromProto(&profilev1.Profile{
  1114  						StringTable: []string{""},
  1115  						Sample: []*profilev1.Sample{{
  1116  							Value: []int64{3},
  1117  						}},
  1118  					}),
  1119  				},
  1120  			},
  1121  		},
  1122  		{
  1123  			description: "ensure only samples of same stacktraces get grouped",
  1124  			pushReq: &distributormodel.ProfileSeries{
  1125  				TenantID: dummyTenantID,
  1126  				Labels: []*typesv1.LabelPair{
  1127  					{Name: "__name__", Value: "profile"},
  1128  					{Name: "service_name", Value: "service"},
  1129  				},
  1130  
  1131  				Profile: pprof2.RawFromProto(&profilev1.Profile{
  1132  					StringTable: []string{"", "foo", "bar", "binary", "span_id", "aaaabbbbccccdddd", "__name__"},
  1133  					Location: []*profilev1.Location{
  1134  						{Id: 1, MappingId: 1, Line: []*profilev1.Line{{FunctionId: 1}}},
  1135  						{Id: 2, MappingId: 1, Line: []*profilev1.Line{{FunctionId: 2}}},
  1136  					},
  1137  					Mapping: []*profilev1.Mapping{{}, {Id: 1, Filename: 3}},
  1138  					Function: []*profilev1.Function{
  1139  						{Id: 1, Name: 1},
  1140  						{Id: 2, Name: 2},
  1141  					},
  1142  					Sample: []*profilev1.Sample{
  1143  						{
  1144  							LocationId: []uint64{1, 2},
  1145  							Value:      []int64{2},
  1146  							Label: []*profilev1.Label{
  1147  								{Key: 6, Str: 1}, // This __name__ label is expected to be removed as it overlaps with the series label name
  1148  
  1149  							},
  1150  						},
  1151  						{
  1152  							LocationId: []uint64{1, 2},
  1153  							Value:      []int64{1},
  1154  						},
  1155  						{
  1156  							LocationId: []uint64{1, 2},
  1157  							Value:      []int64{4},
  1158  							Label: []*profilev1.Label{
  1159  								{Key: 4, Str: 5},
  1160  							},
  1161  						},
  1162  						{
  1163  							Value: []int64{8},
  1164  						},
  1165  						{
  1166  							Value: []int64{16},
  1167  							Label: []*profilev1.Label{
  1168  								{Key: 1, Str: 2},
  1169  							},
  1170  						},
  1171  					},
  1172  				}),
  1173  			},
  1174  			expectedSeries: []*distributormodel.ProfileSeries{
  1175  				{
  1176  					Labels: []*typesv1.LabelPair{
  1177  						{Name: "__name__", Value: "profile"},
  1178  						{Name: "service_name", Value: "service"},
  1179  					},
  1180  
  1181  					Profile: pprof2.RawFromProto(&profilev1.Profile{
  1182  						StringTable: []string{""},
  1183  						Sample: []*profilev1.Sample{
  1184  							{
  1185  								LocationId: []uint64{1, 2},
  1186  								Value:      []int64{3},
  1187  							},
  1188  							{
  1189  								LocationId: []uint64{1, 2},
  1190  								Value:      []int64{4},
  1191  								Label: []*profilev1.Label{
  1192  									{Key: 1, Str: 2},
  1193  								},
  1194  							},
  1195  							{
  1196  								Value: []int64{8},
  1197  							},
  1198  						},
  1199  					}),
  1200  				},
  1201  				{
  1202  					Labels: []*typesv1.LabelPair{
  1203  						{Name: "__name__", Value: "profile"},
  1204  						{Name: "foo", Value: "bar"},
  1205  						{Name: "service_name", Value: "service"},
  1206  					},
  1207  
  1208  					Profile: pprof2.RawFromProto(&profilev1.Profile{
  1209  						StringTable: []string{""},
  1210  						Sample: []*profilev1.Sample{{
  1211  							Value: []int64{16},
  1212  							Label: []*profilev1.Label{},
  1213  						}},
  1214  					}),
  1215  				},
  1216  			},
  1217  		},
  1218  	}
  1219  
  1220  	for _, tc := range testCases {
  1221  		tc := tc
  1222  
  1223  		// These are both required to be set to fulfill the usage group
  1224  		// reporting. Neither are validated by the tests, nor do they influence
  1225  		// test behavior in any way.
  1226  		ug := &validation.UsageGroupConfig{}
  1227  
  1228  		t.Run(tc.description, func(t *testing.T) {
  1229  			overrides := validation.MockOverrides(func(defaults *validation.Limits, tenantLimits map[string]*validation.Limits) {
  1230  				l := validation.MockDefaultLimits()
  1231  				l.IngestionRelabelingRules = tc.relabelRules
  1232  				l.DistributorUsageGroups = ug
  1233  				tenantLimits[dummyTenantID] = l
  1234  			})
  1235  			d, err := New(Config{
  1236  				DistributorRing: ringConfig,
  1237  			}, testhelper.NewMockRing([]ring.InstanceDesc{
  1238  				{Addr: "foo"},
  1239  			}, 3), &poolFactory{func(addr string) (client.PoolClient, error) {
  1240  				return newFakeIngester(t, false), nil
  1241  			}}, overrides, nil, log.NewLogfmtLogger(os.Stdout), nil)
  1242  			require.NoError(t, err)
  1243  			var series []*distributormodel.ProfileSeries
  1244  			series, err = d.visitSampleSeries(tc.pushReq, visitSampleSeriesForIngester)
  1245  			assert.Equal(t, tc.expectBytesDropped, float64(tc.pushReq.DiscardedBytesRelabeling))
  1246  			assert.Equal(t, tc.expectProfilesDropped, float64(tc.pushReq.DiscardedProfilesRelabeling))
  1247  
  1248  			if tc.expectError != nil {
  1249  				assert.Error(t, err)
  1250  				assert.Equal(t, tc.expectError.Error(), err.Error())
  1251  				return
  1252  			} else {
  1253  				assert.NoError(t, err)
  1254  			}
  1255  
  1256  			require.Len(t, series, len(tc.expectedSeries))
  1257  			for i, actualSeries := range series {
  1258  				expectedSeries := tc.expectedSeries[i]
  1259  				assert.Equal(t, expectedSeries.Labels, actualSeries.Labels)
  1260  				expectedProfile := expectedSeries.Profile
  1261  				assert.Equal(t, expectedProfile.Sample, actualSeries.Profile.Sample)
  1262  			}
  1263  		})
  1264  	}
  1265  }
  1266  
  1267  func Test_SampleLabels_SegmentWriter(t *testing.T) {
  1268  	o := validation.MockDefaultOverrides()
  1269  	defaultRelabelConfigs := o.IngestionRelabelingRules("")
  1270  
  1271  	type testCase struct {
  1272  		description           string
  1273  		pushReq               *distributormodel.ProfileSeries
  1274  		expectedSeries        []*distributormodel.ProfileSeries
  1275  		relabelRules          []*relabel.Config
  1276  		expectBytesDropped    float64
  1277  		expectProfilesDropped float64
  1278  		expectError           error
  1279  	}
  1280  	const dummyTenantID = "tenant1"
  1281  
  1282  	testCases := []testCase{
  1283  		{
  1284  			description: "no series labels, no sample labels",
  1285  			pushReq: &distributormodel.ProfileSeries{
  1286  				TenantID: dummyTenantID,
  1287  
  1288  				Profile: pprof2.RawFromProto(&profilev1.Profile{
  1289  					Sample: []*profilev1.Sample{{
  1290  						Value: []int64{1},
  1291  					}},
  1292  				}),
  1293  			},
  1294  			expectError: connect.NewError(connect.CodeInvalidArgument, validation.NewErrorf(validation.MissingLabels, validation.MissingLabelsErrorMsg)),
  1295  		},
  1296  		{
  1297  			description: "validation error propagation",
  1298  			pushReq: &distributormodel.ProfileSeries{
  1299  				TenantID: dummyTenantID,
  1300  				Labels: []*typesv1.LabelPair{
  1301  					{Name: "foo", Value: "bar"},
  1302  				},
  1303  
  1304  				Profile: pprof2.RawFromProto(&profilev1.Profile{
  1305  					Sample: []*profilev1.Sample{{
  1306  						Value: []int64{1},
  1307  					}},
  1308  				}),
  1309  			},
  1310  			expectError: connect.NewError(connect.CodeInvalidArgument, fmt.Errorf(`invalid labels '{foo="bar"}' with error: invalid metric name`)),
  1311  		},
  1312  		{
  1313  			description: "has series labels, no sample labels",
  1314  			pushReq: &distributormodel.ProfileSeries{
  1315  				TenantID: dummyTenantID,
  1316  				Labels: []*typesv1.LabelPair{
  1317  					{Name: "service_name", Value: "service"},
  1318  					{Name: "__name__", Value: "cpu"},
  1319  					{Name: "foo", Value: "bar"},
  1320  				},
  1321  
  1322  				Profile: pprof2.RawFromProto(&profilev1.Profile{
  1323  					Sample: []*profilev1.Sample{{
  1324  						Value: []int64{1},
  1325  					}},
  1326  				}),
  1327  			},
  1328  			expectedSeries: []*distributormodel.ProfileSeries{
  1329  				{
  1330  					TenantID: dummyTenantID,
  1331  					Labels: []*typesv1.LabelPair{
  1332  						{Name: "__name__", Value: "cpu"},
  1333  						{Name: "foo", Value: "bar"},
  1334  						{Name: "service_name", Value: "service"},
  1335  					},
  1336  
  1337  					Profile: pprof2.RawFromProto(&profilev1.Profile{
  1338  						Sample: []*profilev1.Sample{{
  1339  							Value: []int64{1},
  1340  						}},
  1341  					}),
  1342  				},
  1343  			},
  1344  		},
  1345  		{
  1346  			description: "all samples have identical label set",
  1347  			pushReq: &distributormodel.ProfileSeries{
  1348  				TenantID: dummyTenantID,
  1349  				Labels: []*typesv1.LabelPair{
  1350  					{Name: "service_name", Value: "service"},
  1351  					{Name: "__name__", Value: "cpu"},
  1352  				},
  1353  
  1354  				Profile: pprof2.RawFromProto(&profilev1.Profile{
  1355  					StringTable: []string{"", "foo", "bar"},
  1356  					Sample: []*profilev1.Sample{{
  1357  						Value: []int64{1},
  1358  						Label: []*profilev1.Label{
  1359  							{Key: 1, Str: 2},
  1360  						},
  1361  					}},
  1362  				}),
  1363  			},
  1364  			expectedSeries: []*distributormodel.ProfileSeries{
  1365  				{
  1366  					Labels: []*typesv1.LabelPair{
  1367  						{Name: "__name__", Value: "cpu"},
  1368  						{Name: "foo", Value: "bar"},
  1369  						{Name: "service_name", Value: "service"},
  1370  					},
  1371  
  1372  					Profile: pprof2.RawFromProto(&profilev1.Profile{
  1373  						StringTable: []string{"", "foo", "bar"},
  1374  						Sample: []*profilev1.Sample{{
  1375  							Value: []int64{1},
  1376  							Label: []*profilev1.Label{},
  1377  						}},
  1378  					}),
  1379  				},
  1380  			},
  1381  		},
  1382  		{
  1383  			description: "has series labels, all samples have identical label set",
  1384  			pushReq: &distributormodel.ProfileSeries{
  1385  				TenantID: dummyTenantID,
  1386  				Labels: []*typesv1.LabelPair{
  1387  					{Name: "service_name", Value: "service"},
  1388  					{Name: "__name__", Value: "cpu"},
  1389  					{Name: "baz", Value: "qux"},
  1390  				},
  1391  
  1392  				Profile: pprof2.RawFromProto(&profilev1.Profile{
  1393  					StringTable: []string{"", "foo", "bar"},
  1394  					Sample: []*profilev1.Sample{{
  1395  						Value: []int64{1},
  1396  						Label: []*profilev1.Label{
  1397  							{Key: 1, Str: 2},
  1398  						},
  1399  					}},
  1400  				}),
  1401  			},
  1402  			expectedSeries: []*distributormodel.ProfileSeries{
  1403  				{
  1404  					Labels: []*typesv1.LabelPair{
  1405  						{Name: "__name__", Value: "cpu"},
  1406  						{Name: "baz", Value: "qux"},
  1407  						{Name: "foo", Value: "bar"},
  1408  						{Name: "service_name", Value: "service"},
  1409  					},
  1410  
  1411  					Profile: pprof2.RawFromProto(&profilev1.Profile{
  1412  						StringTable: []string{"", "foo", "bar"},
  1413  						Sample: []*profilev1.Sample{{
  1414  							Value: []int64{1},
  1415  							Label: []*profilev1.Label{},
  1416  						}},
  1417  					}),
  1418  				},
  1419  			},
  1420  		},
  1421  		{
  1422  			description: "has series labels, and the only sample label name overlaps with series label, creating overlapping groups",
  1423  			pushReq: &distributormodel.ProfileSeries{
  1424  				TenantID: dummyTenantID,
  1425  				Labels: []*typesv1.LabelPair{
  1426  					{Name: "service_name", Value: "service"},
  1427  					{Name: "__name__", Value: "cpu"},
  1428  					{Name: "foo", Value: "bar"},
  1429  					{Name: "baz", Value: "qux"},
  1430  				},
  1431  
  1432  				Profile: pprof2.RawFromProto(&profilev1.Profile{
  1433  					StringTable: []string{"", "foo", "bar"},
  1434  					Sample: []*profilev1.Sample{
  1435  						{
  1436  							Value: []int64{1},
  1437  							Label: []*profilev1.Label{
  1438  								{Key: 1, Str: 2},
  1439  							},
  1440  						},
  1441  						{
  1442  							Value: []int64{2},
  1443  						},
  1444  					},
  1445  				}),
  1446  			},
  1447  			expectedSeries: []*distributormodel.ProfileSeries{
  1448  				{
  1449  					Labels: []*typesv1.LabelPair{
  1450  						{Name: "__name__", Value: "cpu"},
  1451  						{Name: "baz", Value: "qux"},
  1452  						{Name: "foo", Value: "bar"},
  1453  						{Name: "service_name", Value: "service"},
  1454  					},
  1455  
  1456  					Profile: pprof2.RawFromProto(&profilev1.Profile{
  1457  						StringTable: []string{"", "foo", "bar"},
  1458  						Sample: []*profilev1.Sample{
  1459  							{
  1460  								Value: []int64{3},
  1461  								Label: nil,
  1462  							},
  1463  						},
  1464  					}),
  1465  				},
  1466  			},
  1467  		},
  1468  		{
  1469  			description: "has series labels, samples have distinct label sets",
  1470  			pushReq: &distributormodel.ProfileSeries{
  1471  				TenantID: dummyTenantID,
  1472  				Labels: []*typesv1.LabelPair{
  1473  					{Name: "service_name", Value: "service"},
  1474  					{Name: "__name__", Value: "cpu"},
  1475  					{Name: "baz", Value: "qux"},
  1476  				},
  1477  
  1478  				Profile: pprof2.RawFromProto(&profilev1.Profile{
  1479  					StringTable: []string{"", "foo", "bar", "waldo", "fred"},
  1480  					Sample: []*profilev1.Sample{
  1481  						{
  1482  							Value: []int64{1},
  1483  							Label: []*profilev1.Label{
  1484  								{Key: 1, Str: 2},
  1485  							},
  1486  						},
  1487  						{
  1488  							Value: []int64{2},
  1489  							Label: []*profilev1.Label{
  1490  								{Key: 3, Str: 4},
  1491  							},
  1492  						},
  1493  					},
  1494  				}),
  1495  			},
  1496  			expectedSeries: []*distributormodel.ProfileSeries{
  1497  				{
  1498  					TenantID: dummyTenantID,
  1499  					Labels: []*typesv1.LabelPair{
  1500  						{Name: "__name__", Value: "cpu"},
  1501  						{Name: "baz", Value: "qux"},
  1502  						{Name: "service_name", Value: "service"},
  1503  					},
  1504  
  1505  					Profile: pprof2.RawFromProto(&profilev1.Profile{
  1506  						StringTable: []string{"", "foo", "bar", "waldo", "fred"},
  1507  						Sample: []*profilev1.Sample{
  1508  							{
  1509  								Value: []int64{1},
  1510  								Label: []*profilev1.Label{
  1511  									{Key: 1, Str: 2},
  1512  								},
  1513  							},
  1514  							{
  1515  								Value: []int64{2},
  1516  								Label: []*profilev1.Label{
  1517  									{Key: 3, Str: 4},
  1518  								},
  1519  							},
  1520  						},
  1521  					}),
  1522  				},
  1523  			},
  1524  		},
  1525  		{
  1526  			description:  "has series labels that should be renamed to no longer include godeltaprof",
  1527  			relabelRules: defaultRelabelConfigs,
  1528  			pushReq: &distributormodel.ProfileSeries{
  1529  				TenantID: dummyTenantID,
  1530  				Labels: []*typesv1.LabelPair{
  1531  					{Name: "__name__", Value: "godeltaprof_memory"},
  1532  					{Name: "service_name", Value: "service"},
  1533  				},
  1534  
  1535  				Profile: pprof2.RawFromProto(&profilev1.Profile{
  1536  					StringTable: []string{""},
  1537  					Sample: []*profilev1.Sample{{
  1538  						Value: []int64{2},
  1539  						Label: []*profilev1.Label{},
  1540  					}},
  1541  				}),
  1542  			},
  1543  			expectedSeries: []*distributormodel.ProfileSeries{
  1544  				{
  1545  					Labels: []*typesv1.LabelPair{
  1546  						{Name: "__delta__", Value: "false"},
  1547  						{Name: "__name__", Value: "memory"},
  1548  						{Name: "__name_replaced__", Value: "godeltaprof_memory"},
  1549  						{Name: "service_name", Value: "service"},
  1550  					},
  1551  
  1552  					Profile: pprof2.RawFromProto(&profilev1.Profile{
  1553  						StringTable: []string{""},
  1554  						Sample: []*profilev1.Sample{{
  1555  							Value: []int64{2},
  1556  							Label: []*profilev1.Label{},
  1557  						}},
  1558  					}),
  1559  				},
  1560  			},
  1561  		},
  1562  		{
  1563  			description: "has series labels and sample label, which relabel rules drop",
  1564  			relabelRules: []*relabel.Config{
  1565  				{Action: relabel.Drop, SourceLabels: []model.LabelName{"__name__", "span_name"}, Separator: "/", Regex: relabel.MustNewRegexp("unwanted/randomness")},
  1566  			},
  1567  			pushReq: &distributormodel.ProfileSeries{
  1568  				TenantID: dummyTenantID,
  1569  				Labels: []*typesv1.LabelPair{
  1570  					{Name: "__name__", Value: "unwanted"},
  1571  					{Name: "service_name", Value: "service"},
  1572  				},
  1573  
  1574  				Profile: pprof2.RawFromProto(&profilev1.Profile{
  1575  					StringTable: []string{"", "span_name", "randomness"},
  1576  					Sample: []*profilev1.Sample{
  1577  						{
  1578  							Value: []int64{2},
  1579  							Label: []*profilev1.Label{
  1580  								{Key: 1, Str: 2},
  1581  							},
  1582  						},
  1583  						{
  1584  							Value: []int64{1},
  1585  						},
  1586  					},
  1587  				}),
  1588  			},
  1589  			expectProfilesDropped: 0,
  1590  			expectBytesDropped:    3,
  1591  			expectedSeries: []*distributormodel.ProfileSeries{
  1592  				{
  1593  					Labels: []*typesv1.LabelPair{
  1594  						{Name: "__name__", Value: "unwanted"},
  1595  						{Name: "service_name", Value: "service"},
  1596  					},
  1597  
  1598  					Profile: pprof2.RawFromProto(&profilev1.Profile{
  1599  						StringTable: []string{""},
  1600  						Sample: []*profilev1.Sample{{
  1601  							Value: []int64{1},
  1602  						}},
  1603  					}),
  1604  				},
  1605  			},
  1606  		},
  1607  		{
  1608  			description: "has series/sample labels, drops everything",
  1609  			relabelRules: []*relabel.Config{
  1610  				{Action: relabel.Drop, Regex: relabel.MustNewRegexp(".*")},
  1611  			},
  1612  			pushReq: &distributormodel.ProfileSeries{
  1613  				TenantID: dummyTenantID,
  1614  				Labels: []*typesv1.LabelPair{
  1615  					{Name: "__name__", Value: "unwanted"},
  1616  					{Name: "service_name", Value: "service"},
  1617  				},
  1618  
  1619  				Profile: pprof2.RawFromProto(&profilev1.Profile{
  1620  					StringTable: []string{"", "span_name", "randomness"},
  1621  					Sample: []*profilev1.Sample{
  1622  						{
  1623  							Value: []int64{2},
  1624  							Label: []*profilev1.Label{
  1625  								{Key: 1, Str: 2},
  1626  							},
  1627  						},
  1628  						{
  1629  							Value: []int64{1},
  1630  						},
  1631  					},
  1632  				}),
  1633  			},
  1634  			expectProfilesDropped: 1,
  1635  			expectBytesDropped:    6,
  1636  		},
  1637  		{
  1638  			description: "has series labels / sample rules, drops samples label",
  1639  			relabelRules: []*relabel.Config{
  1640  				{Action: relabel.Replace, Regex: relabel.MustNewRegexp(".*"), Replacement: "", TargetLabel: "span_name"},
  1641  			},
  1642  			pushReq: &distributormodel.ProfileSeries{
  1643  				TenantID: dummyTenantID,
  1644  				Labels: []*typesv1.LabelPair{
  1645  					{Name: "__name__", Value: "unwanted"},
  1646  					{Name: "service_name", Value: "service"},
  1647  				},
  1648  
  1649  				Profile: pprof2.RawFromProto(&profilev1.Profile{
  1650  					StringTable: []string{"", "span_name", "randomness"},
  1651  					Sample: []*profilev1.Sample{
  1652  						{
  1653  							Value: []int64{2},
  1654  							Label: []*profilev1.Label{
  1655  								{Key: 1, Str: 2},
  1656  							},
  1657  						},
  1658  						{
  1659  							Value: []int64{1},
  1660  						},
  1661  					},
  1662  				}),
  1663  			},
  1664  			expectedSeries: []*distributormodel.ProfileSeries{
  1665  				{
  1666  					Labels: []*typesv1.LabelPair{
  1667  						{Name: "__name__", Value: "unwanted"},
  1668  						{Name: "service_name", Value: "service"},
  1669  					},
  1670  
  1671  					Profile: pprof2.RawFromProto(&profilev1.Profile{
  1672  						StringTable: []string{""},
  1673  						Sample: []*profilev1.Sample{{
  1674  							Value: []int64{3},
  1675  						}},
  1676  					}),
  1677  				},
  1678  			},
  1679  		},
  1680  		{
  1681  			description: "ensure only samples of same stacktraces get grouped",
  1682  			pushReq: &distributormodel.ProfileSeries{
  1683  				TenantID: dummyTenantID,
  1684  				Labels: []*typesv1.LabelPair{
  1685  					{Name: "__name__", Value: "profile"},
  1686  					{Name: "service_name", Value: "service"},
  1687  				},
  1688  
  1689  				Profile: pprof2.RawFromProto(&profilev1.Profile{
  1690  					StringTable: []string{"", "foo", "bar", "binary", "span_id", "aaaabbbbccccdddd", "__name__"},
  1691  					Location: []*profilev1.Location{
  1692  						{Id: 1, MappingId: 1, Line: []*profilev1.Line{{FunctionId: 1}}},
  1693  						{Id: 2, MappingId: 1, Line: []*profilev1.Line{{FunctionId: 2}}},
  1694  					},
  1695  					Mapping: []*profilev1.Mapping{{}, {Id: 1, Filename: 3}},
  1696  					Function: []*profilev1.Function{
  1697  						{Id: 1, Name: 1},
  1698  						{Id: 2, Name: 2},
  1699  					},
  1700  					Sample: []*profilev1.Sample{
  1701  						{
  1702  							LocationId: []uint64{1, 2},
  1703  							Value:      []int64{2},
  1704  							Label: []*profilev1.Label{
  1705  								{Key: 6, Str: 1},
  1706  							},
  1707  						},
  1708  						{
  1709  							LocationId: []uint64{1, 2},
  1710  							Value:      []int64{1},
  1711  						},
  1712  						{
  1713  							LocationId: []uint64{1, 2},
  1714  							Value:      []int64{4},
  1715  							Label: []*profilev1.Label{
  1716  								{Key: 4, Str: 5},
  1717  							},
  1718  						},
  1719  						{
  1720  							Value: []int64{8},
  1721  						},
  1722  						{
  1723  							Value: []int64{16},
  1724  							Label: []*profilev1.Label{
  1725  								{Key: 1, Str: 2},
  1726  							},
  1727  						},
  1728  					},
  1729  				}),
  1730  			},
  1731  			expectedSeries: []*distributormodel.ProfileSeries{
  1732  				{
  1733  					Labels: []*typesv1.LabelPair{
  1734  						{Name: "__name__", Value: "profile"},
  1735  						{Name: "service_name", Value: "service"},
  1736  					},
  1737  
  1738  					Profile: pprof2.RawFromProto(&profilev1.Profile{
  1739  						StringTable: []string{"", "span_id", "aaaabbbbccccdddd", "foo", "bar", "binary"},
  1740  						Location: []*profilev1.Location{
  1741  							{Id: 1, MappingId: 1, Line: []*profilev1.Line{{FunctionId: 1}}},
  1742  							{Id: 2, MappingId: 1, Line: []*profilev1.Line{{FunctionId: 2}}},
  1743  						},
  1744  						Mapping: []*profilev1.Mapping{{Id: 1, Filename: 5}},
  1745  						Function: []*profilev1.Function{
  1746  							{Id: 1, Name: 1},
  1747  							{Id: 2, Name: 2},
  1748  						},
  1749  						Sample: []*profilev1.Sample{
  1750  							{
  1751  								LocationId: []uint64{1, 2},
  1752  								Value:      []int64{3},
  1753  							},
  1754  							{
  1755  								LocationId: []uint64{1, 2},
  1756  								Value:      []int64{4},
  1757  								Label: []*profilev1.Label{
  1758  									{Key: 1, Str: 2},
  1759  								},
  1760  							},
  1761  							{
  1762  								Value: []int64{8},
  1763  							},
  1764  							{
  1765  								Value: []int64{16},
  1766  								Label: []*profilev1.Label{
  1767  									{Key: 3, Str: 4},
  1768  								},
  1769  							},
  1770  						},
  1771  					}),
  1772  				},
  1773  			},
  1774  		},
  1775  	}
  1776  
  1777  	for _, tc := range testCases {
  1778  		tc := tc
  1779  
  1780  		// These are both required to be set to fulfill the usage group
  1781  		// reporting. Neither are validated by the tests, nor do they influence
  1782  		// test behavior in any way.
  1783  		ug := &validation.UsageGroupConfig{}
  1784  
  1785  		t.Run(tc.description, func(t *testing.T) {
  1786  			overrides := validation.MockOverrides(func(defaults *validation.Limits, tenantLimits map[string]*validation.Limits) {
  1787  				l := validation.MockDefaultLimits()
  1788  				l.IngestionRelabelingRules = tc.relabelRules
  1789  				l.DistributorUsageGroups = ug
  1790  				tenantLimits[dummyTenantID] = l
  1791  			})
  1792  			d, err := New(Config{
  1793  				DistributorRing: ringConfig,
  1794  			}, testhelper.NewMockRing([]ring.InstanceDesc{
  1795  				{Addr: "foo"},
  1796  			}, 3), &poolFactory{func(addr string) (client.PoolClient, error) {
  1797  				return newFakeIngester(t, false), nil
  1798  			}}, overrides, nil, log.NewLogfmtLogger(os.Stdout), new(mockwritepath.MockSegmentWriterClient))
  1799  
  1800  			require.NoError(t, err)
  1801  			var series []*distributormodel.ProfileSeries
  1802  			series, err = d.visitSampleSeries(tc.pushReq, visitSampleSeriesForSegmentWriter)
  1803  			assert.Equal(t, tc.expectBytesDropped, float64(tc.pushReq.DiscardedBytesRelabeling))
  1804  			assert.Equal(t, tc.expectProfilesDropped, float64(tc.pushReq.DiscardedProfilesRelabeling))
  1805  
  1806  			if tc.expectError != nil {
  1807  				assert.Error(t, err)
  1808  				assert.Equal(t, tc.expectError.Error(), err.Error())
  1809  				return
  1810  			} else {
  1811  				assert.NoError(t, err)
  1812  			}
  1813  
  1814  			require.Len(t, series, len(tc.expectedSeries))
  1815  			for i, actualSeries := range series {
  1816  				expectedSeries := tc.expectedSeries[i]
  1817  				assert.Equal(t, expectedSeries.Labels, actualSeries.Labels)
  1818  				expectedProfile := expectedSeries.Profile
  1819  				assert.Equal(t, expectedProfile.Sample, actualSeries.Profile.Sample)
  1820  			}
  1821  		})
  1822  	}
  1823  }
  1824  
  1825  func TestBadPushRequest(t *testing.T) {
  1826  	mux := http.NewServeMux()
  1827  	ing := newFakeIngester(t, false)
  1828  	d, err := New(Config{
  1829  		DistributorRing: ringConfig,
  1830  	}, testhelper.NewMockRing([]ring.InstanceDesc{
  1831  		{Addr: "foo"},
  1832  	}, 3), &poolFactory{f: func(addr string) (client.PoolClient, error) {
  1833  		return ing, nil
  1834  	}}, newOverrides(t), nil, log.NewLogfmtLogger(os.Stdout), nil)
  1835  
  1836  	require.NoError(t, err)
  1837  	mux.Handle(pushv1connect.NewPusherServiceHandler(d, handlerOptions...))
  1838  	s := httptest.NewServer(mux)
  1839  	defer s.Close()
  1840  
  1841  	client := pushv1connect.NewPusherServiceClient(http.DefaultClient, s.URL, clientOptions...)
  1842  
  1843  	_, err = client.Push(tenant.InjectTenantID(context.Background(), "foo"), connect.NewRequest(&pushv1.PushRequest{
  1844  		Series: []*pushv1.RawProfileSeries{
  1845  			{
  1846  				Labels: []*typesv1.LabelPair{
  1847  					{Name: "cluster", Value: "us-central1"},
  1848  					{Name: "__name__", Value: "cpu"},
  1849  				},
  1850  			},
  1851  		},
  1852  	}))
  1853  	require.Error(t, err)
  1854  	require.Equal(t, connect.CodeInvalidArgument, connect.CodeOf(err))
  1855  }
  1856  
  1857  func newOverrides(t *testing.T) *validation.Overrides {
  1858  	t.Helper()
  1859  	return validation.MockOverrides(func(defaults *validation.Limits, tenantLimits map[string]*validation.Limits) {
  1860  		l := validation.MockDefaultLimits()
  1861  		l.IngestionRateMB = 0.0150
  1862  		l.IngestionBurstSizeMB = 0.0015
  1863  		l.MaxLabelNameLength = 12
  1864  		tenantLimits["user-1"] = l
  1865  	})
  1866  }
  1867  
  1868  func TestPush_ShuffleSharding(t *testing.T) {
  1869  	// initialize 10 fake ingesters
  1870  	var (
  1871  		ingesters = map[string]*fakeIngester{}
  1872  		ringDesc  = make([]ring.InstanceDesc, 10)
  1873  	)
  1874  	for pos := range ringDesc {
  1875  		ingesters[strconv.Itoa(pos)] = newFakeIngester(t, false)
  1876  		ringDesc[pos] = ring.InstanceDesc{
  1877  			Addr: strconv.Itoa(pos),
  1878  		}
  1879  	}
  1880  
  1881  	overrides := validation.MockOverrides(func(defaults *validation.Limits, tenantLimits map[string]*validation.Limits) {
  1882  		// 3 shards by default
  1883  		defaults.IngestionTenantShardSize = 3
  1884  
  1885  		// user with sharding disabled
  1886  		user6 := validation.MockDefaultLimits()
  1887  		user6.IngestionTenantShardSize = 0
  1888  		tenantLimits["user-6"] = user6
  1889  
  1890  		// user with only 1 shard (less than replication factor)
  1891  		user7 := validation.MockDefaultLimits()
  1892  		user7.IngestionTenantShardSize = 1
  1893  		tenantLimits["user-7"] = user7
  1894  
  1895  		// user with 9 shards
  1896  		user8 := validation.MockDefaultLimits()
  1897  		user8.IngestionTenantShardSize = 9
  1898  		tenantLimits["user-8"] = user8
  1899  
  1900  		// user with 27 shards (more shards than ingesters)
  1901  		user9 := validation.MockDefaultLimits()
  1902  		user9.IngestionTenantShardSize = 27
  1903  		tenantLimits["user-9"] = user9
  1904  	})
  1905  
  1906  	// get distributor ready
  1907  	d, err := New(Config{DistributorRing: ringConfig}, testhelper.NewMockRing(ringDesc, 3),
  1908  		&poolFactory{func(addr string) (client.PoolClient, error) {
  1909  			return ingesters[addr], nil
  1910  		}},
  1911  		overrides,
  1912  		nil,
  1913  		log.NewLogfmtLogger(os.Stdout),
  1914  		nil,
  1915  	)
  1916  	require.NoError(t, err)
  1917  
  1918  	mux := http.NewServeMux()
  1919  	mux.Handle(pushv1connect.NewPusherServiceHandler(d, handlerOptions...))
  1920  	s := httptest.NewServer(mux)
  1921  	defer s.Close()
  1922  
  1923  	client := pushv1connect.NewPusherServiceClient(http.DefaultClient, s.URL, clientOptions...)
  1924  
  1925  	// Empty profiles are discarded before sending to ingesters.
  1926  	var buf bytes.Buffer
  1927  	_, err = pprof2.RawFromProto(&profilev1.Profile{
  1928  		SampleType: []*profilev1.ValueType{{}},
  1929  		Sample: []*profilev1.Sample{{
  1930  			LocationId: []uint64{1},
  1931  			Value:      []int64{1},
  1932  		}},
  1933  		StringTable: []string{""},
  1934  	}).WriteTo(&buf)
  1935  	require.NoError(t, err)
  1936  	profileBytes := buf.Bytes()
  1937  
  1938  	for i := 0; i < 10; i++ {
  1939  		tenantID := fmt.Sprintf("user-%d", i)
  1940  
  1941  		// push 50 series each
  1942  		for j := 0; j < 50; j++ {
  1943  			_, err = client.Push(tenant.InjectTenantID(context.Background(), tenantID), connect.NewRequest(&pushv1.PushRequest{
  1944  				Series: []*pushv1.RawProfileSeries{
  1945  					{
  1946  						Labels: []*typesv1.LabelPair{
  1947  							{Name: "pod", Value: fmt.Sprintf("my-stateful-stuff-%d", j)},
  1948  							{Name: "cluster", Value: "us-central1"},
  1949  							{Name: "tenant", Value: tenantID},
  1950  							{Name: phlaremodel.LabelNameServiceName, Value: "svc"},
  1951  							{Name: "__name__", Value: "cpu"},
  1952  						},
  1953  						Samples: []*pushv1.RawSample{
  1954  							{ID: "0000000-0000-0000-0000-000000000001", RawProfile: profileBytes},
  1955  						},
  1956  					},
  1957  				},
  1958  			}))
  1959  			require.NoError(t, err)
  1960  		}
  1961  	}
  1962  
  1963  	ingestersByTenantID := make(map[string]map[string]int)
  1964  
  1965  	// now let's check tenants per ingester
  1966  	for ingID, ing := range ingesters {
  1967  		ing.mtx.Lock()
  1968  		for _, req := range ing.requests {
  1969  			for _, s := range req.Series {
  1970  				for _, l := range s.Labels {
  1971  					if l.Name == "tenant" {
  1972  						m := ingestersByTenantID[l.Value]
  1973  						if m == nil {
  1974  							m = make(map[string]int)
  1975  							ingestersByTenantID[l.Value] = m
  1976  						}
  1977  						m[ingID]++
  1978  					}
  1979  				}
  1980  			}
  1981  		}
  1982  		ing.mtx.Unlock()
  1983  	}
  1984  
  1985  	for tenantID, ingesters := range ingestersByTenantID {
  1986  		switch tenantID {
  1987  		case "user-6", "user-9": // users with disabled sharding and higher than ingester count should have all ingesters
  1988  			require.Equal(t, 10, len(ingesters))
  1989  		case "user-8": // user 8 has 9 configured
  1990  			require.Equal(t, 9, len(ingesters))
  1991  		default: // everyone else should fall back to 3, which is the replication factor
  1992  			require.Equal(t, 3, len(ingesters))
  1993  
  1994  			var series int
  1995  			for _, count := range ingesters {
  1996  				series += count
  1997  			}
  1998  			require.Equal(t, 150, series)
  1999  		}
  2000  	}
  2001  }
  2002  
  2003  func TestPush_Aggregation(t *testing.T) {
  2004  	const maxSessions = 8
  2005  	ingesterClient := newFakeIngester(t, false)
  2006  	d, err := New(
  2007  		Config{DistributorRing: ringConfig, PushTimeout: time.Second * 10},
  2008  		testhelper.NewMockRing([]ring.InstanceDesc{{Addr: "foo"}}, 3),
  2009  		&poolFactory{f: func(addr string) (client.PoolClient, error) { return ingesterClient, nil }},
  2010  		validation.MockOverrides(func(defaults *validation.Limits, tenantLimits map[string]*validation.Limits) {
  2011  			l := validation.MockDefaultLimits()
  2012  			l.DistributorAggregationPeriod = model.Duration(time.Second)
  2013  			l.DistributorAggregationWindow = model.Duration(time.Second)
  2014  			l.MaxSessionsPerSeries = maxSessions
  2015  			l.IngestionLimit = &ingestlimits.Config{
  2016  				PeriodType:     "hour",
  2017  				PeriodLimitMb:  128,
  2018  				LimitResetTime: time.Now().Unix(),
  2019  				LimitReached:   true,
  2020  				Sampling: ingestlimits.SamplingConfig{
  2021  					NumRequests: 100,
  2022  					Period:      time.Minute,
  2023  				},
  2024  			}
  2025  			tenantLimits["user-1"] = l
  2026  		}),
  2027  		nil, log.NewLogfmtLogger(os.Stdout), nil,
  2028  	)
  2029  	require.NoError(t, err)
  2030  	ctx := tenant.InjectTenantID(context.Background(), "user-1")
  2031  
  2032  	const (
  2033  		clients  = 10
  2034  		requests = 10
  2035  	)
  2036  
  2037  	var wg sync.WaitGroup
  2038  	wg.Add(clients)
  2039  	for i := 0; i < clients; i++ {
  2040  		i := i
  2041  		go func() {
  2042  			defer wg.Done()
  2043  			for j := 0; j < requests; j++ {
  2044  				err := d.PushBatch(ctx, &distributormodel.PushRequest{
  2045  					Series: []*distributormodel.ProfileSeries{
  2046  						{
  2047  							Labels: []*typesv1.LabelPair{
  2048  								{Name: "cluster", Value: "us-central1"},
  2049  								{Name: "client", Value: strconv.Itoa(i)},
  2050  								{Name: "__name__", Value: "cpu"},
  2051  								{
  2052  									Name:  phlaremodel.LabelNameSessionID,
  2053  									Value: phlaremodel.SessionID(i*j + i).String(),
  2054  								},
  2055  							},
  2056  
  2057  							Profile: &pprof2.Profile{
  2058  								Profile: testProfile(0),
  2059  							},
  2060  						},
  2061  					},
  2062  				})
  2063  				require.NoError(t, err)
  2064  			}
  2065  		}()
  2066  	}
  2067  
  2068  	wg.Wait()
  2069  	d.asyncRequests.Wait()
  2070  
  2071  	var sum int64
  2072  	sessions := make(map[string]struct{})
  2073  	assert.GreaterOrEqual(t, len(ingesterClient.requests), 20)
  2074  	assert.Less(t, len(ingesterClient.requests), 100)
  2075  
  2076  	// Verify that throttled requests have annotations
  2077  	for i, req := range ingesterClient.requests {
  2078  		for _, series := range req.Series {
  2079  			require.Lenf(t, series.Annotations, 1, "failed request %d", i)
  2080  			assert.Equal(t, annotation.ProfileAnnotationKeyThrottled, series.Annotations[0].Key)
  2081  			assert.Contains(t, series.Annotations[0].Value, "\"periodLimitMb\":128")
  2082  		}
  2083  	}
  2084  
  2085  	for _, r := range ingesterClient.requests {
  2086  		for _, s := range r.Series {
  2087  			sessionID := phlaremodel.Labels(s.Labels).Get(phlaremodel.LabelNameSessionID)
  2088  			sessions[sessionID] = struct{}{}
  2089  			p, err := pprof2.RawFromBytes(s.Samples[0].RawProfile)
  2090  			require.NoError(t, err)
  2091  			for _, x := range p.Sample {
  2092  				sum += x.Value[0]
  2093  			}
  2094  		}
  2095  	}
  2096  
  2097  	// RF * samples_per_profile * clients * requests
  2098  	assert.Equal(t, int64(3*2*clients*requests), sum)
  2099  	assert.Equal(t, len(sessions), maxSessions)
  2100  }
  2101  
  2102  func testProfile(t int64) *profilev1.Profile {
  2103  	return &profilev1.Profile{
  2104  		SampleType: []*profilev1.ValueType{
  2105  			{
  2106  				Type: 1,
  2107  				Unit: 2,
  2108  			},
  2109  			{
  2110  				Type: 3,
  2111  				Unit: 4,
  2112  			},
  2113  		},
  2114  		Sample: []*profilev1.Sample{
  2115  			{
  2116  				LocationId: []uint64{1, 2},
  2117  				Value:      []int64{1, 10000000},
  2118  				Label: []*profilev1.Label{
  2119  					{Key: 5, Str: 6},
  2120  				},
  2121  			},
  2122  			{
  2123  				LocationId: []uint64{1, 2, 3},
  2124  				Value:      []int64{1, 10000000},
  2125  				Label: []*profilev1.Label{
  2126  					{Key: 5, Str: 6},
  2127  					{Key: 7, Str: 8},
  2128  				},
  2129  			},
  2130  		},
  2131  		Mapping: []*profilev1.Mapping{
  2132  			{
  2133  				Id:           1,
  2134  				HasFunctions: true,
  2135  			},
  2136  		},
  2137  		Location: []*profilev1.Location{
  2138  			{
  2139  				Id:        1,
  2140  				MappingId: 1,
  2141  				Line:      []*profilev1.Line{{FunctionId: 1}},
  2142  			},
  2143  			{
  2144  				Id:        2,
  2145  				MappingId: 1,
  2146  				Line:      []*profilev1.Line{{FunctionId: 2}},
  2147  			},
  2148  			{
  2149  				Id:        3,
  2150  				MappingId: 1,
  2151  				Line:      []*profilev1.Line{{FunctionId: 3}},
  2152  			},
  2153  		},
  2154  		Function: []*profilev1.Function{
  2155  			{
  2156  				Id:         1,
  2157  				Name:       9,
  2158  				SystemName: 9,
  2159  				Filename:   10,
  2160  			},
  2161  			{
  2162  				Id:         2,
  2163  				Name:       11,
  2164  				SystemName: 11,
  2165  				Filename:   12,
  2166  			},
  2167  			{
  2168  				Id:         3,
  2169  				Name:       13,
  2170  				SystemName: 13,
  2171  				Filename:   14,
  2172  			},
  2173  		},
  2174  		StringTable: []string{
  2175  			"",
  2176  			"samples",
  2177  			"count",
  2178  			"cpu",
  2179  			"nanoseconds",
  2180  			// Labels
  2181  			"foo",
  2182  			"bar",
  2183  			"function",
  2184  			"slow",
  2185  			// Functions
  2186  			"func-foo",
  2187  			"func-foo-path",
  2188  			"func-bar",
  2189  			"func-bar-path",
  2190  			"func-baz",
  2191  			"func-baz-path",
  2192  		},
  2193  		TimeNanos:     t,
  2194  		DurationNanos: 10000000000,
  2195  		PeriodType: &profilev1.ValueType{
  2196  			Type: 3,
  2197  			Unit: 4,
  2198  		},
  2199  		Period: 10000000,
  2200  	}
  2201  }
  2202  
  2203  func TestInjectMappingVersions(t *testing.T) {
  2204  	alreadyVersionned := testProfile(3)
  2205  	alreadyVersionned.StringTable = append(alreadyVersionned.StringTable, `foo`)
  2206  	alreadyVersionned.Mapping[0].BuildId = int64(len(alreadyVersionned.StringTable) - 1)
  2207  	in := []*distributormodel.ProfileSeries{
  2208  		{
  2209  			Labels: []*typesv1.LabelPair{},
  2210  
  2211  			Profile: &pprof2.Profile{
  2212  				Profile: testProfile(1),
  2213  			},
  2214  		},
  2215  		{
  2216  			Labels: []*typesv1.LabelPair{
  2217  				{Name: phlaremodel.LabelNameServiceRepository, Value: "grafana/pyroscope"},
  2218  			},
  2219  
  2220  			Profile: &pprof2.Profile{
  2221  				Profile: testProfile(2),
  2222  			},
  2223  		},
  2224  		{
  2225  			Labels: []*typesv1.LabelPair{
  2226  				{Name: phlaremodel.LabelNameServiceRepository, Value: "grafana/pyroscope"},
  2227  				{Name: phlaremodel.LabelNameServiceGitRef, Value: "foobar"},
  2228  				{Name: phlaremodel.LabelNameServiceRootPath, Value: "some-path"},
  2229  			},
  2230  
  2231  			Profile: &pprof2.Profile{
  2232  				Profile: testProfile(2),
  2233  			},
  2234  		},
  2235  		{
  2236  			Labels: []*typesv1.LabelPair{
  2237  				{Name: phlaremodel.LabelNameServiceRepository, Value: "grafana/pyroscope"},
  2238  				{Name: phlaremodel.LabelNameServiceGitRef, Value: "foobar"},
  2239  				{Name: phlaremodel.LabelNameServiceRootPath, Value: "some-path"},
  2240  			},
  2241  
  2242  			Profile: &pprof2.Profile{
  2243  				Profile: alreadyVersionned,
  2244  			},
  2245  		},
  2246  	}
  2247  	for _, s := range in {
  2248  		err := injectMappingVersions(s)
  2249  		require.NoError(t, err)
  2250  	}
  2251  
  2252  	require.Equal(t, "", in[0].Profile.StringTable[in[0].Profile.Mapping[0].BuildId])
  2253  	require.Equal(t, `{"repository":"grafana/pyroscope"}`, in[1].Profile.StringTable[in[1].Profile.Mapping[0].BuildId])
  2254  	require.Equal(t, `{"repository":"grafana/pyroscope","git_ref":"foobar","root_path":"some-path"}`, in[2].Profile.StringTable[in[2].Profile.Mapping[0].BuildId])
  2255  	require.Equal(t, `{"repository":"grafana/pyroscope","git_ref":"foobar","build_id":"foo","root_path":"some-path"}`, in[3].Profile.StringTable[in[3].Profile.Mapping[0].BuildId])
  2256  }
  2257  
  2258  func uncompressedProfileSize(t *testing.T, req *pushv1.PushRequest) int {
  2259  	var size int
  2260  	for _, s := range req.Series {
  2261  		for _, label := range s.Labels {
  2262  			size += len(label.Name) + len(label.Value)
  2263  		}
  2264  		for _, sample := range s.Samples {
  2265  			p, err := pprof2.RawFromBytes(sample.RawProfile)
  2266  			require.NoError(t, err)
  2267  			size += p.SizeVT()
  2268  		}
  2269  	}
  2270  	return size
  2271  }
  2272  
  2273  func metricsDump(metrics map[prometheus.Collector]float64) map[prometheus.Collector]float64 {
  2274  	res := make(map[prometheus.Collector]float64)
  2275  	for m := range metrics {
  2276  		res[m] = testutil.ToFloat64(m)
  2277  	}
  2278  	return res
  2279  }
  2280  
  2281  func expectMetricsChange(t *testing.T, m1, m2, expectedChange map[prometheus.Collector]float64) {
  2282  	for counter, expectedDelta := range expectedChange {
  2283  		delta := m2[counter] - m1[counter]
  2284  		assert.Equal(t, expectedDelta, delta, "metric %s", counter)
  2285  	}
  2286  }
  2287  
  2288  func TestPush_LabelRewrites(t *testing.T) {
  2289  	ing := newFakeIngester(t, false)
  2290  	d, err := New(Config{
  2291  		DistributorRing: ringConfig,
  2292  	}, testhelper.NewMockRing([]ring.InstanceDesc{
  2293  		{Addr: "mock"},
  2294  		{Addr: "mock"},
  2295  		{Addr: "mock"},
  2296  	}, 3), &poolFactory{f: func(addr string) (client.PoolClient, error) {
  2297  		return ing, nil
  2298  	}}, newOverrides(t), nil, log.NewLogfmtLogger(os.Stdout), nil)
  2299  	require.NoError(t, err)
  2300  
  2301  	ctx := tenant.InjectTenantID(context.Background(), "user-1")
  2302  
  2303  	for idx, tc := range []struct {
  2304  		name           string
  2305  		series         []*typesv1.LabelPair
  2306  		expectedSeries string
  2307  	}{
  2308  		{
  2309  			name:           "empty series",
  2310  			series:         []*typesv1.LabelPair{},
  2311  			expectedSeries: `{__name__="process_cpu", service_name="unknown_service"}`,
  2312  		},
  2313  		{
  2314  			name: "series with service_name labels",
  2315  			series: []*typesv1.LabelPair{
  2316  				{Name: "service_name", Value: "my-service"},
  2317  				{Name: "cloud_region", Value: "my-region"},
  2318  			},
  2319  			expectedSeries: `{__name__="process_cpu", cloud_region="my-region", service_name="my-service"}`,
  2320  		},
  2321  	} {
  2322  		t.Run(tc.name, func(t *testing.T) {
  2323  			ing.mtx.Lock()
  2324  			ing.requests = ing.requests[:0]
  2325  			ing.mtx.Unlock()
  2326  
  2327  			p := pproftesthelper.NewProfileBuilderWithLabels(1000*int64(idx), tc.series).CPUProfile()
  2328  			p.ForStacktraceString("world", "hello").AddSamples(1)
  2329  
  2330  			data, err := p.MarshalVT()
  2331  			require.NoError(t, err)
  2332  
  2333  			_, err = d.Push(ctx, connect.NewRequest(&pushv1.PushRequest{
  2334  				Series: []*pushv1.RawProfileSeries{
  2335  					{
  2336  						Labels: p.Labels,
  2337  						Samples: []*pushv1.RawSample{
  2338  							{RawProfile: data},
  2339  						},
  2340  					},
  2341  				},
  2342  			}))
  2343  			require.NoError(t, err)
  2344  
  2345  			ing.mtx.Lock()
  2346  			require.Len(t, ing.requests, 1)
  2347  			require.Greater(t, len(ing.requests[0].Series), 1)
  2348  			actualSeries := phlaremodel.LabelPairsString(ing.requests[0].Series[0].Labels)
  2349  			assert.Equal(t, tc.expectedSeries, actualSeries)
  2350  			ing.mtx.Unlock()
  2351  		})
  2352  	}
  2353  }
  2354  
  2355  func TestDistributor_shouldSample(t *testing.T) {
  2356  	tests := []struct {
  2357  		name           string
  2358  		groups         []validation.UsageGroupMatchName
  2359  		samplingConfig *sampling.Config
  2360  		expected       bool
  2361  		expectedMatch  *sampling.Source
  2362  	}{
  2363  		{
  2364  			name:     "no sampling config - should accept",
  2365  			groups:   []validation.UsageGroupMatchName{},
  2366  			expected: true,
  2367  		},
  2368  		{
  2369  			name:   "no matching groups - should accept",
  2370  			groups: []validation.UsageGroupMatchName{{ConfiguredName: "group1", ResolvedName: "group1"}},
  2371  			samplingConfig: &sampling.Config{
  2372  				UsageGroups: map[string]sampling.UsageGroupSampling{
  2373  					"group2": {Probability: 0.5},
  2374  				},
  2375  			},
  2376  			expected: true,
  2377  		},
  2378  		{
  2379  			name:   "matching group with 1.0 probability - should accept",
  2380  			groups: []validation.UsageGroupMatchName{{ConfiguredName: "group1", ResolvedName: "group1"}},
  2381  			samplingConfig: &sampling.Config{
  2382  				UsageGroups: map[string]sampling.UsageGroupSampling{
  2383  					"group1": {Probability: 1.0},
  2384  				},
  2385  			},
  2386  			expected: true,
  2387  			expectedMatch: &sampling.Source{
  2388  				UsageGroup:  "group1",
  2389  				Probability: 1.0,
  2390  			},
  2391  		},
  2392  		{
  2393  			name:   "matching group with dynamic name - should accept",
  2394  			groups: []validation.UsageGroupMatchName{{ConfiguredName: "configured-name", ResolvedName: "resolved-name"}},
  2395  			samplingConfig: &sampling.Config{
  2396  				UsageGroups: map[string]sampling.UsageGroupSampling{
  2397  					"configured-name": {Probability: 1.0},
  2398  				},
  2399  			},
  2400  			expected: true,
  2401  			expectedMatch: &sampling.Source{
  2402  				UsageGroup:  "resolved-name",
  2403  				Probability: 1.0,
  2404  			},
  2405  		},
  2406  		{
  2407  			name:   "matching group with 0.0 probability - should reject",
  2408  			groups: []validation.UsageGroupMatchName{{ConfiguredName: "group1", ResolvedName: "group1"}},
  2409  			samplingConfig: &sampling.Config{
  2410  				UsageGroups: map[string]sampling.UsageGroupSampling{
  2411  					"group1": {Probability: 0.0},
  2412  				},
  2413  			},
  2414  			expected: false,
  2415  			expectedMatch: &sampling.Source{
  2416  				UsageGroup:  "group1",
  2417  				Probability: 0.0,
  2418  			},
  2419  		},
  2420  		{
  2421  			name: "multiple matching groups - should use minimum probability",
  2422  			groups: []validation.UsageGroupMatchName{
  2423  				{ConfiguredName: "group1", ResolvedName: "group1"},
  2424  				{ConfiguredName: "group2", ResolvedName: "group2"},
  2425  			},
  2426  			samplingConfig: &sampling.Config{
  2427  				UsageGroups: map[string]sampling.UsageGroupSampling{
  2428  					"group1": {Probability: 1.0},
  2429  					"group2": {Probability: 0.0},
  2430  				},
  2431  			},
  2432  			expected: false,
  2433  			expectedMatch: &sampling.Source{
  2434  				UsageGroup:  "group2",
  2435  				Probability: 0.0,
  2436  			},
  2437  		},
  2438  		{
  2439  			name: "multiple matching groups - should prioritize specific group",
  2440  			groups: []validation.UsageGroupMatchName{
  2441  				{ConfiguredName: "${labels.service_name}", ResolvedName: "test_service"},
  2442  				{ConfiguredName: "test_service", ResolvedName: "test_service"},
  2443  			},
  2444  			samplingConfig: &sampling.Config{
  2445  				UsageGroups: map[string]sampling.UsageGroupSampling{
  2446  					"${labels.service_name}": {Probability: 1.0},
  2447  					"test_service":           {Probability: 0.0},
  2448  				},
  2449  			},
  2450  			expected: false,
  2451  			expectedMatch: &sampling.Source{
  2452  				UsageGroup:  "test_service",
  2453  				Probability: 0.0,
  2454  			},
  2455  		},
  2456  		{
  2457  			name: "multiple matching groups - should prioritize specific group (reversed order)",
  2458  			groups: []validation.UsageGroupMatchName{
  2459  				{ConfiguredName: "test_service", ResolvedName: "test_service"},
  2460  				{ConfiguredName: "${labels.service_name}", ResolvedName: "test_service"},
  2461  			},
  2462  			samplingConfig: &sampling.Config{
  2463  				UsageGroups: map[string]sampling.UsageGroupSampling{
  2464  					"${labels.service_name}": {Probability: 1.0},
  2465  					"test_service":           {Probability: 0.0},
  2466  				},
  2467  			},
  2468  			expected: false,
  2469  			expectedMatch: &sampling.Source{
  2470  				UsageGroup:  "test_service",
  2471  				Probability: 0.0,
  2472  			},
  2473  		},
  2474  		{
  2475  			name: "single usage group, multiple sampling rules - should prioritize specific rule",
  2476  			groups: []validation.UsageGroupMatchName{
  2477  				{ConfiguredName: "${labels.service_name}", ResolvedName: "test_service"},
  2478  			},
  2479  			samplingConfig: &sampling.Config{
  2480  				UsageGroups: map[string]sampling.UsageGroupSampling{
  2481  					"${labels.service_name}": {Probability: 1.0},
  2482  					"test_service":           {Probability: 0.0},
  2483  				},
  2484  			},
  2485  			expected: false,
  2486  			expectedMatch: &sampling.Source{
  2487  				UsageGroup:  "test_service",
  2488  				Probability: 0.0,
  2489  			},
  2490  		},
  2491  	}
  2492  
  2493  	for _, tt := range tests {
  2494  		t.Run(tt.name, func(t *testing.T) {
  2495  			overrides := validation.MockOverrides(func(defaults *validation.Limits, tenantLimits map[string]*validation.Limits) {
  2496  				l := validation.MockDefaultLimits()
  2497  				l.DistributorSampling = tt.samplingConfig
  2498  				tenantLimits["test-tenant"] = l
  2499  			})
  2500  			d := &Distributor{
  2501  				limits: overrides,
  2502  			}
  2503  
  2504  			sample, match := d.shouldSample("test-tenant", tt.groups)
  2505  			assert.Equal(t, tt.expected, sample)
  2506  			assert.Equal(t, tt.expectedMatch, match)
  2507  		})
  2508  	}
  2509  }
  2510  
  2511  func TestDistributor_shouldSample_Probability(t *testing.T) {
  2512  	tests := []struct {
  2513  		name        string
  2514  		probability float64
  2515  		usageGroups []validation.UsageGroupMatchName
  2516  	}{
  2517  		{
  2518  			name:        "30% sampling rate",
  2519  			probability: 0.3,
  2520  			usageGroups: []validation.UsageGroupMatchName{{ConfiguredName: "${labels.service_name}", ResolvedName: "test-service-1"}},
  2521  		},
  2522  		{
  2523  			name:        "70% sampling rate",
  2524  			probability: 0.7,
  2525  			usageGroups: []validation.UsageGroupMatchName{{ConfiguredName: "${labels.service_name}", ResolvedName: "test-service-1"}},
  2526  		},
  2527  		{
  2528  			name:        "10% sampling rate",
  2529  			probability: 0.1,
  2530  			usageGroups: []validation.UsageGroupMatchName{{ConfiguredName: "${labels.service_name}", ResolvedName: "test-service-1"}},
  2531  		},
  2532  		{
  2533  			name:        "0% sampling rate for the baseline group",
  2534  			probability: 0.0,
  2535  			usageGroups: []validation.UsageGroupMatchName{{ConfiguredName: "${labels.service_name}", ResolvedName: "test-service-2"}},
  2536  		},
  2537  	}
  2538  
  2539  	const iterations = 10000
  2540  	tenantID := "test-tenant"
  2541  
  2542  	for _, tt := range tests {
  2543  		t.Run(tt.name, func(t *testing.T) {
  2544  			samplingConfig := &sampling.Config{
  2545  				UsageGroups: map[string]sampling.UsageGroupSampling{
  2546  					"${labels.service_name}": {Probability: 0.0}, // we drop all profiles by default
  2547  					"test-service-1":         {Probability: tt.probability},
  2548  				},
  2549  			}
  2550  
  2551  			overrides := validation.MockOverrides(func(defaults *validation.Limits, tenantLimits map[string]*validation.Limits) {
  2552  				l := validation.MockDefaultLimits()
  2553  				l.DistributorSampling = samplingConfig
  2554  				tenantLimits[tenantID] = l
  2555  			})
  2556  			d := &Distributor{
  2557  				limits: overrides,
  2558  			}
  2559  
  2560  			accepted := 0
  2561  			for i := 0; i < iterations; i++ {
  2562  				if s, _ := d.shouldSample(tenantID, tt.usageGroups); s {
  2563  					accepted++
  2564  				}
  2565  			}
  2566  
  2567  			actualRate := float64(accepted) / float64(iterations)
  2568  			expectedRate := tt.probability
  2569  			deviation := math.Abs(actualRate - expectedRate)
  2570  
  2571  			tolerance := 0.05
  2572  			assert.True(t, deviation <= tolerance,
  2573  				"Sampling rate %.3f is outside tolerance %.3f of expected rate %.3f (deviation: %.3f)",
  2574  				actualRate, tolerance, expectedRate, deviation)
  2575  
  2576  			t.Logf("Expected: %.3f, Actual: %.3f, Deviation: %.3f", expectedRate, actualRate, deviation)
  2577  		})
  2578  	}
  2579  }