github.com/grafana/pyroscope@v1.18.0/pkg/distributor/distributor.go (about)

     1  package distributor
     2  
     3  import (
     4  	"bytes"
     5  	"context"
     6  	"encoding/json"
     7  	"expvar"
     8  	"flag"
     9  	"fmt"
    10  	"hash/fnv"
    11  	"math/rand"
    12  	"net/http"
    13  	"sort"
    14  	"sync"
    15  	"time"
    16  
    17  	"connectrpc.com/connect"
    18  	"go.uber.org/atomic"
    19  
    20  	"github.com/dustin/go-humanize"
    21  	"github.com/go-kit/log"
    22  	"github.com/go-kit/log/level"
    23  	"github.com/google/uuid"
    24  	"github.com/grafana/dskit/kv"
    25  	"github.com/grafana/dskit/limiter"
    26  	"github.com/grafana/dskit/multierror"
    27  	"github.com/grafana/dskit/ring"
    28  	ring_client "github.com/grafana/dskit/ring/client"
    29  	"github.com/grafana/dskit/services"
    30  	"github.com/opentracing/opentracing-go"
    31  	"github.com/opentracing/opentracing-go/ext"
    32  	"github.com/pkg/errors"
    33  	"github.com/prometheus/client_golang/prometheus"
    34  	"github.com/prometheus/client_golang/prometheus/promauto"
    35  	"github.com/prometheus/common/model"
    36  	"golang.org/x/sync/errgroup"
    37  
    38  	profilev1 "github.com/grafana/pyroscope/api/gen/proto/go/google/v1"
    39  	pushv1 "github.com/grafana/pyroscope/api/gen/proto/go/push/v1"
    40  	segmentwriterv1 "github.com/grafana/pyroscope/api/gen/proto/go/segmentwriter/v1"
    41  	typesv1 "github.com/grafana/pyroscope/api/gen/proto/go/types/v1"
    42  	connectapi "github.com/grafana/pyroscope/pkg/api/connect"
    43  	"github.com/grafana/pyroscope/pkg/clientpool"
    44  	"github.com/grafana/pyroscope/pkg/distributor/aggregator"
    45  	"github.com/grafana/pyroscope/pkg/distributor/ingestlimits"
    46  	distributormodel "github.com/grafana/pyroscope/pkg/distributor/model"
    47  	"github.com/grafana/pyroscope/pkg/distributor/sampling"
    48  	"github.com/grafana/pyroscope/pkg/distributor/writepath"
    49  	phlaremodel "github.com/grafana/pyroscope/pkg/model"
    50  	"github.com/grafana/pyroscope/pkg/model/pprofsplit"
    51  	"github.com/grafana/pyroscope/pkg/model/relabel"
    52  	"github.com/grafana/pyroscope/pkg/model/sampletype"
    53  	"github.com/grafana/pyroscope/pkg/pprof"
    54  	"github.com/grafana/pyroscope/pkg/tenant"
    55  	"github.com/grafana/pyroscope/pkg/usagestats"
    56  	"github.com/grafana/pyroscope/pkg/util"
    57  	"github.com/grafana/pyroscope/pkg/util/spanlogger"
    58  	"github.com/grafana/pyroscope/pkg/validation"
    59  )
    60  
    61  type PushClient interface {
    62  	Push(context.Context, *connect.Request[pushv1.PushRequest]) (*connect.Response[pushv1.PushResponse], error)
    63  }
    64  
    65  const (
    66  	// distributorRingKey is the key under which we store the distributors ring in the KVStore.
    67  	distributorRingKey = "distributor"
    68  
    69  	// ringAutoForgetUnhealthyPeriods is how many consecutive timeout periods an unhealthy instance
    70  	// in the ring will be automatically removed after.
    71  	ringAutoForgetUnhealthyPeriods = 10
    72  
    73  	ProfileName = "__name__"
    74  )
    75  
    76  // Config for a Distributor.
    77  type Config struct {
    78  	PushTimeout time.Duration
    79  	PoolConfig  clientpool.PoolConfig `yaml:"pool_config,omitempty"`
    80  
    81  	// Distributors ring
    82  	DistributorRing util.CommonRingConfig `yaml:"ring"`
    83  }
    84  
    85  // RegisterFlags registers distributor-related flags.
    86  func (cfg *Config) RegisterFlags(fs *flag.FlagSet, logger log.Logger) {
    87  	cfg.PoolConfig.RegisterFlagsWithPrefix("distributor", fs)
    88  	fs.DurationVar(&cfg.PushTimeout, "distributor.push.timeout", 5*time.Second, "Timeout when pushing data to ingester.")
    89  	cfg.DistributorRing.RegisterFlags("distributor.ring.", "collectors/", "distributors", fs, logger)
    90  }
    91  
    92  // Distributor coordinates replicates and distribution of log streams.
    93  type Distributor struct {
    94  	services.Service
    95  	logger log.Logger
    96  
    97  	cfg           Config
    98  	limits        Limits
    99  	ingestersRing ring.ReadRing
   100  	pool          *ring_client.Pool
   101  
   102  	// The global rate limiter requires a distributors ring to count
   103  	// the number of healthy instances
   104  	distributorsLifecycler *ring.BasicLifecycler
   105  	distributorsRing       *ring.Ring
   106  	healthyInstancesCount  *atomic.Uint32
   107  	ingestionRateLimiter   *limiter.RateLimiter
   108  	aggregator             *aggregator.MultiTenantAggregator[*pprof.ProfileMerge]
   109  	asyncRequests          sync.WaitGroup
   110  	ingestionLimitsSampler *ingestlimits.Sampler
   111  	usageGroupEvaluator    *validation.UsageGroupEvaluator
   112  
   113  	subservices        *services.Manager
   114  	subservicesWatcher *services.FailureWatcher
   115  
   116  	// Metrics and stats.
   117  	metrics                 *metrics
   118  	rfStats                 *expvar.Int
   119  	bytesReceivedStats      *usagestats.Statistics
   120  	bytesReceivedTotalStats *usagestats.Counter
   121  	profileReceivedStats    *usagestats.MultiCounter
   122  	profileSizeStats        *usagestats.MultiStatistics
   123  
   124  	router        *writepath.Router
   125  	segmentWriter writepath.SegmentWriterClient
   126  }
   127  
   128  type Limits interface {
   129  	IngestionRateBytes(tenantID string) float64
   130  	IngestionBurstSizeBytes(tenantID string) int
   131  	IngestionLimit(tenantID string) *ingestlimits.Config
   132  	IngestionBodyLimitBytes(tenantID string) int64
   133  	DistributorSampling(tenantID string) *sampling.Config
   134  	IngestionTenantShardSize(tenantID string) int
   135  	MaxLabelNameLength(tenantID string) int
   136  	MaxLabelValueLength(tenantID string) int
   137  	MaxLabelNamesPerSeries(tenantID string) int
   138  	MaxProfileSizeBytes(tenantID string) int
   139  	MaxProfileStacktraceSamples(tenantID string) int
   140  	MaxProfileStacktraceSampleLabels(tenantID string) int
   141  	MaxProfileStacktraceDepth(tenantID string) int
   142  	MaxProfileSymbolValueLength(tenantID string) int
   143  	MaxSessionsPerSeries(tenantID string) int
   144  	EnforceLabelsOrder(tenantID string) bool
   145  	IngestionRelabelingRules(tenantID string) []*relabel.Config
   146  	SampleTypeRelabelingRules(tenantID string) []*relabel.Config
   147  	DistributorUsageGroups(tenantID string) *validation.UsageGroupConfig
   148  	WritePathOverrides(tenantID string) writepath.Config
   149  	validation.ProfileValidationLimits
   150  	aggregator.Limits
   151  }
   152  
   153  func New(
   154  	config Config,
   155  	ingesterRing ring.ReadRing,
   156  	ingesterClientFactory ring_client.PoolFactory,
   157  	limits Limits,
   158  	reg prometheus.Registerer,
   159  	logger log.Logger,
   160  	segmentWriter writepath.SegmentWriterClient,
   161  	ingesterClientsOptions ...connect.ClientOption,
   162  ) (*Distributor, error) {
   163  	ingesterClientsOptions = append(
   164  		connectapi.DefaultClientOptions(),
   165  		ingesterClientsOptions...,
   166  	)
   167  
   168  	clients := promauto.With(reg).NewGauge(prometheus.GaugeOpts{
   169  		Namespace: "pyroscope",
   170  		Name:      "distributor_ingester_clients",
   171  		Help:      "The current number of ingester clients.",
   172  	})
   173  	d := &Distributor{
   174  		cfg:                     config,
   175  		logger:                  logger,
   176  		ingestersRing:           ingesterRing,
   177  		pool:                    clientpool.NewIngesterPool(config.PoolConfig, ingesterRing, ingesterClientFactory, clients, logger, ingesterClientsOptions...),
   178  		segmentWriter:           segmentWriter,
   179  		metrics:                 newMetrics(reg),
   180  		healthyInstancesCount:   atomic.NewUint32(0),
   181  		aggregator:              aggregator.NewMultiTenantAggregator[*pprof.ProfileMerge](limits, reg),
   182  		limits:                  limits,
   183  		rfStats:                 usagestats.NewInt("distributor_replication_factor"),
   184  		bytesReceivedStats:      usagestats.NewStatistics("distributor_bytes_received"),
   185  		bytesReceivedTotalStats: usagestats.NewCounter("distributor_bytes_received_total"),
   186  		profileReceivedStats:    usagestats.NewMultiCounter("distributor_profiles_received", "lang"),
   187  		profileSizeStats:        usagestats.NewMultiStatistics("distributor_profile_sizes", "lang"),
   188  	}
   189  
   190  	ingesterRoute := writepath.IngesterFunc(d.sendRequestsToIngester)
   191  	segmentWriterRoute := writepath.IngesterFunc(d.sendRequestsToSegmentWriter)
   192  	d.router = writepath.NewRouter(logger, reg, ingesterRoute, segmentWriterRoute)
   193  
   194  	var err error
   195  	subservices := []services.Service(nil)
   196  	subservices = append(subservices, d.pool)
   197  
   198  	distributorsRing, distributorsLifecycler, err := newRingAndLifecycler(config.DistributorRing, d.healthyInstancesCount, logger, reg)
   199  	if err != nil {
   200  		return nil, err
   201  	}
   202  
   203  	d.ingestionLimitsSampler = ingestlimits.NewSampler(distributorsRing)
   204  	d.usageGroupEvaluator = validation.NewUsageGroupEvaluator(logger)
   205  
   206  	subservices = append(subservices, distributorsLifecycler, distributorsRing, d.aggregator, d.ingestionLimitsSampler)
   207  
   208  	d.ingestionRateLimiter = limiter.NewRateLimiter(newGlobalRateStrategy(newIngestionRateStrategy(limits), d), 10*time.Second)
   209  	d.distributorsLifecycler = distributorsLifecycler
   210  	d.distributorsRing = distributorsRing
   211  
   212  	d.subservices, err = services.NewManager(subservices...)
   213  	if err != nil {
   214  		return nil, errors.Wrap(err, "services manager")
   215  	}
   216  	d.subservicesWatcher = services.NewFailureWatcher()
   217  	d.subservicesWatcher.WatchManager(d.subservices)
   218  
   219  	d.Service = services.NewBasicService(d.starting, d.running, d.stopping)
   220  	d.rfStats.Set(int64(ingesterRing.ReplicationFactor()))
   221  	d.metrics.replicationFactor.Set(float64(ingesterRing.ReplicationFactor()))
   222  	return d, nil
   223  }
   224  
   225  func (d *Distributor) starting(ctx context.Context) error {
   226  	return services.StartManagerAndAwaitHealthy(ctx, d.subservices)
   227  }
   228  
   229  func (d *Distributor) running(ctx context.Context) error {
   230  	select {
   231  	case <-ctx.Done():
   232  		return nil
   233  	case err := <-d.subservicesWatcher.Chan():
   234  		return errors.Wrap(err, "distributor subservice failed")
   235  	}
   236  }
   237  
   238  func (d *Distributor) stopping(_ error) error {
   239  	d.asyncRequests.Wait()
   240  	return services.StopManagerAndAwaitStopped(context.Background(), d.subservices)
   241  }
   242  
   243  func isKnownConnectError(err error) bool {
   244  	ce := new(connect.Error)
   245  	if !errors.As(err, &ce) {
   246  		return false
   247  	}
   248  	return ce.Code() != connect.CodeUnknown
   249  }
   250  
   251  func isKnownValidationError(err error) bool {
   252  	return validation.ReasonOf(err) != validation.Unknown
   253  }
   254  
   255  func (d *Distributor) Push(ctx context.Context, grpcReq *connect.Request[pushv1.PushRequest]) (_ *connect.Response[pushv1.PushResponse], err error) {
   256  	sp, ctx := opentracing.StartSpanFromContext(ctx, "Distributor.Push")
   257  	defer sp.Finish()
   258  
   259  	tenantID, err := tenant.ExtractTenantIDFromContext(ctx)
   260  	if err != nil {
   261  		return nil, connect.NewError(connect.CodeUnauthenticated, err)
   262  	}
   263  
   264  	defer func() {
   265  		if err == nil {
   266  			return
   267  		}
   268  
   269  		// log error
   270  		ext.LogError(sp, err)
   271  		level.Debug(util.LoggerWithContext(ctx, d.logger)).Log("msg", "failed to validate profile", "err", err)
   272  
   273  		// wrap the errors with InvalidArgument code for profile validation errors, so they return 400
   274  		if !isKnownConnectError(err) && isKnownValidationError(err) {
   275  			err = connect.NewError(connect.CodeInvalidArgument, err)
   276  		}
   277  	}()
   278  
   279  	maxProfileSizeBytes := int64(d.limits.MaxProfileSizeBytes(tenantID))
   280  	maxRequestSizeBytes := d.limits.IngestionBodyLimitBytes(tenantID)
   281  	requestSizeUsed := int64(0)
   282  	requestProfileCount := 0
   283  
   284  	req := &distributormodel.PushRequest{
   285  		Series:         make([]*distributormodel.ProfileSeries, 0, len(grpcReq.Msg.Series)),
   286  		RawProfileType: distributormodel.RawProfileTypePPROF,
   287  	}
   288  	allErrors := multierror.New()
   289  	for _, grpcSeries := range grpcReq.Msg.Series {
   290  		for _, grpcSample := range grpcSeries.Samples {
   291  			profile, err := pprof.RawFromBytesWithLimit(grpcSample.RawProfile, maxProfileSizeBytes)
   292  			if err != nil {
   293  				// check if decompression size has been exceeded
   294  				dsErr := new(pprof.ErrDecompressedSizeExceedsLimit)
   295  				if errors.As(err, &dsErr) {
   296  					validation.DiscardedBytes.WithLabelValues(string(validation.ProfileSizeLimit), tenantID).Add(float64(maxProfileSizeBytes))
   297  					validation.DiscardedProfiles.WithLabelValues(string(validation.ProfileSizeLimit), tenantID).Add(float64(1))
   298  					err = validation.NewErrorf(validation.ProfileSizeLimit, "uncompressed profile payload size exceeds limit of %s", humanize.Bytes(uint64(maxProfileSizeBytes)))
   299  				}
   300  				allErrors.Add(err)
   301  				continue
   302  			}
   303  			requestSizeUsed += int64(profile.RawSize())
   304  			requestProfileCount += 1
   305  			if maxRequestSizeBytes > 0 && requestSizeUsed > maxRequestSizeBytes {
   306  				validation.DiscardedBytes.WithLabelValues(string(validation.BodySizeLimit), tenantID).Add(float64(requestSizeUsed))
   307  				validation.DiscardedProfiles.WithLabelValues(string(validation.BodySizeLimit), tenantID).Add(float64(requestProfileCount))
   308  				return nil, validation.NewErrorf(validation.BodySizeLimit, "uncompressed batched profile payload size exceeds limit of %s", humanize.Bytes(uint64(maxRequestSizeBytes)))
   309  			}
   310  			series := &distributormodel.ProfileSeries{
   311  				Labels:     grpcSeries.Labels,
   312  				Profile:    profile,
   313  				RawProfile: grpcSample.RawProfile,
   314  				ID:         grpcSample.ID,
   315  			}
   316  			req.Series = append(req.Series, series)
   317  		}
   318  	}
   319  	// If we have validation errors and no valid profiles, return the validation errors
   320  	// instead of calling PushBatch which would return "no profiles received"
   321  	if len(req.Series) == 0 && allErrors.Err() != nil {
   322  		return nil, allErrors.Err()
   323  	}
   324  	if err := d.PushBatch(ctx, req); err != nil {
   325  		allErrors.Add(err)
   326  	}
   327  	err = allErrors.Err()
   328  	if err != nil {
   329  		return nil, err
   330  	}
   331  	return connect.NewResponse(new(pushv1.PushResponse)), err
   332  }
   333  
   334  func (d *Distributor) GetProfileLanguage(series *distributormodel.ProfileSeries) string {
   335  	if series.Language != "" {
   336  		return series.Language
   337  	}
   338  	lang := series.GetLanguage()
   339  	if lang == "" {
   340  		lang = pprof.GetLanguage(series.Profile)
   341  	}
   342  	series.Language = lang
   343  	return series.Language
   344  }
   345  
   346  func (d *Distributor) PushBatch(ctx context.Context, req *distributormodel.PushRequest) error {
   347  	sp, ctx := opentracing.StartSpanFromContext(ctx, "Distributor.PushBatch")
   348  	defer sp.Finish()
   349  
   350  	tenantID, err := tenant.ExtractTenantIDFromContext(ctx)
   351  	if err != nil {
   352  		return connect.NewError(connect.CodeUnauthenticated, err)
   353  	}
   354  	sp.SetTag("tenant_id", tenantID)
   355  
   356  	if len(req.Series) == 0 {
   357  		return noNewProfilesReceivedError()
   358  	}
   359  
   360  	d.bytesReceivedTotalStats.Inc(int64(req.ReceivedCompressedProfileSize))
   361  	d.bytesReceivedStats.Record(float64(req.ReceivedCompressedProfileSize))
   362  	if req.RawProfileType != distributormodel.RawProfileTypePPROF {
   363  		// if a single profile contains multiple profile types/names (e.g. jfr) then there is no such thing as
   364  		// compressed size per profile type as all profile types are compressed once together. So we can not count
   365  		// compressed bytes per profile type. Instead we count compressed bytes per profile.
   366  		profName := req.RawProfileType // use "jfr" as profile name
   367  		d.metrics.receivedCompressedBytes.WithLabelValues(string(profName), tenantID).Observe(float64(req.ReceivedCompressedProfileSize))
   368  	}
   369  
   370  	res := multierror.New()
   371  	errorsMutex := new(sync.Mutex)
   372  	wg := new(sync.WaitGroup)
   373  	for index, s := range req.Series {
   374  		wg.Add(1)
   375  		go func() {
   376  			defer wg.Done()
   377  			itErr := util.RecoverPanic(func() error {
   378  				return d.pushSeries(ctx, s, req.RawProfileType, tenantID)
   379  			})()
   380  
   381  			if itErr != nil {
   382  				itErr = fmt.Errorf("push series with index %d and id %s failed: %w", index, s.ID, itErr)
   383  			}
   384  			errorsMutex.Lock()
   385  			res.Add(itErr)
   386  			errorsMutex.Unlock()
   387  		}()
   388  	}
   389  	wg.Wait()
   390  	return res.Err()
   391  }
   392  
   393  type lazyUsageGroups func() []validation.UsageGroupMatchName
   394  
   395  func (l lazyUsageGroups) String() string {
   396  	groups := l()
   397  	result := make([]string, len(groups))
   398  	for pos := range groups {
   399  		result[pos] = groups[pos].String()
   400  	}
   401  	return fmt.Sprintf("%v", result)
   402  }
   403  
   404  type pushLog struct {
   405  	fields []any
   406  	lvl    func(log.Logger) log.Logger
   407  	msg    string
   408  }
   409  
   410  func newPushLog(capacity int) *pushLog {
   411  	fields := make([]any, 2, (capacity+1)*2)
   412  	fields[0] = "msg"
   413  	return &pushLog{
   414  		fields: fields,
   415  	}
   416  }
   417  
   418  func (p *pushLog) addFields(fields ...any) {
   419  	p.fields = append(p.fields, fields...)
   420  }
   421  
   422  func (p *pushLog) log(logger log.Logger, err error) {
   423  	// determine log level
   424  	if p.lvl == nil {
   425  		if err != nil {
   426  			p.lvl = level.Warn
   427  		} else {
   428  			p.lvl = level.Debug
   429  		}
   430  	}
   431  
   432  	if err != nil {
   433  		p.addFields("err", err)
   434  	}
   435  
   436  	// update message
   437  	if p.msg == "" {
   438  		if err != nil {
   439  			p.msg = "profile rejected"
   440  		} else {
   441  			p.msg = "profile accepted"
   442  		}
   443  	}
   444  	p.fields[1] = p.msg
   445  	p.lvl(logger).Log(p.fields...)
   446  }
   447  
   448  func (d *Distributor) pushSeries(ctx context.Context, req *distributormodel.ProfileSeries, origin distributormodel.RawProfileType, tenantID string) (err error) {
   449  	if req.Profile == nil {
   450  		return noNewProfilesReceivedError()
   451  	}
   452  	now := model.Now()
   453  
   454  	logger := spanlogger.FromContext(ctx, log.With(d.logger, "tenant", tenantID))
   455  	finalLog := newPushLog(10)
   456  	defer func() {
   457  		finalLog.log(logger, err)
   458  	}()
   459  
   460  	req.TenantID = tenantID
   461  	serviceName := phlaremodel.Labels(req.Labels).Get(phlaremodel.LabelNameServiceName)
   462  	if serviceName == "" {
   463  		req.Labels = append(req.Labels, &typesv1.LabelPair{Name: phlaremodel.LabelNameServiceName, Value: phlaremodel.AttrServiceNameFallback})
   464  	} else {
   465  		finalLog.addFields("service_name", serviceName)
   466  	}
   467  	sort.Sort(phlaremodel.Labels(req.Labels))
   468  
   469  	if req.ID != "" {
   470  		finalLog.addFields("profile_id", req.ID)
   471  	}
   472  
   473  	req.TotalProfiles = 1
   474  	req.TotalBytesUncompressed = calculateRequestSize(req)
   475  	d.metrics.observeProfileSize(tenantID, StageReceived, req.TotalBytesUncompressed)
   476  
   477  	if err := d.checkIngestLimit(req); err != nil {
   478  		finalLog.msg = "rejecting profile due to global ingest limit"
   479  		finalLog.lvl = level.Debug
   480  		validation.DiscardedProfiles.WithLabelValues(string(validation.IngestLimitReached), tenantID).Add(float64(req.TotalProfiles))
   481  		validation.DiscardedBytes.WithLabelValues(string(validation.IngestLimitReached), tenantID).Add(float64(req.TotalBytesUncompressed))
   482  		return err
   483  	}
   484  
   485  	if err := d.rateLimit(tenantID, req); err != nil {
   486  		return err
   487  	}
   488  
   489  	usageGroups := d.limits.DistributorUsageGroups(tenantID)
   490  
   491  	profName := phlaremodel.Labels(req.Labels).Get(ProfileName)
   492  	finalLog.addFields("profile_type", profName)
   493  
   494  	groups := d.usageGroupEvaluator.GetMatch(tenantID, usageGroups, req.Labels)
   495  	finalLog.addFields("matched_usage_groups", lazyUsageGroups(groups.Names))
   496  	if err := d.checkUsageGroupsIngestLimit(req, groups.Names()); err != nil {
   497  		finalLog.msg = "rejecting profile due to usage group ingest limit"
   498  		finalLog.lvl = level.Debug
   499  		validation.DiscardedProfiles.WithLabelValues(string(validation.IngestLimitReached), tenantID).Add(float64(req.TotalProfiles))
   500  		validation.DiscardedBytes.WithLabelValues(string(validation.IngestLimitReached), tenantID).Add(float64(req.TotalBytesUncompressed))
   501  		groups.CountDiscardedBytes(string(validation.IngestLimitReached), req.TotalBytesUncompressed)
   502  		return err
   503  	}
   504  
   505  	willSample, samplingSource := d.shouldSample(tenantID, groups.Names())
   506  	if !willSample {
   507  		finalLog.addFields(
   508  			"usage_group", samplingSource.UsageGroup,
   509  			"probability", samplingSource.Probability,
   510  		)
   511  		finalLog.msg = "skipping profile due to sampling"
   512  		validation.DiscardedProfiles.WithLabelValues(string(validation.SkippedBySamplingRules), tenantID).Add(float64(req.TotalProfiles))
   513  		validation.DiscardedBytes.WithLabelValues(string(validation.SkippedBySamplingRules), tenantID).Add(float64(req.TotalBytesUncompressed))
   514  		groups.CountDiscardedBytes(string(validation.SkippedBySamplingRules), req.TotalBytesUncompressed)
   515  		return nil
   516  	}
   517  	if samplingSource != nil {
   518  		if err := req.MarkSampledRequest(samplingSource); err != nil {
   519  			return err
   520  		}
   521  	}
   522  
   523  	profLanguage := d.GetProfileLanguage(req)
   524  	if profLanguage != "" {
   525  		finalLog.addFields("detected_language", profLanguage)
   526  	}
   527  
   528  	usagestats.NewCounter(fmt.Sprintf("distributor_profile_type_%s_received", profName)).Inc(1)
   529  	d.profileReceivedStats.Inc(1, profLanguage)
   530  	if origin == distributormodel.RawProfileTypePPROF {
   531  		d.metrics.receivedCompressedBytes.WithLabelValues(profName, tenantID).Observe(float64(len(req.RawProfile)))
   532  	}
   533  	p := req.Profile
   534  	decompressedSize := p.SizeVT()
   535  	profTime := model.TimeFromUnixNano(p.TimeNanos).Time()
   536  	finalLog.addFields(
   537  		"profile_time", profTime,
   538  		"ingestion_delay", now.Time().Sub(profTime),
   539  		"decompressed_size", decompressedSize,
   540  		"sample_count", len(p.Sample),
   541  	)
   542  	d.metrics.observeProfileSize(tenantID, StageSampled, int64(decompressedSize))                              //todo use req.TotalBytesUncompressed to include labels siz
   543  	d.metrics.receivedDecompressedBytes.WithLabelValues(profName, tenantID).Observe(float64(decompressedSize)) // deprecated TODO remove
   544  	d.metrics.receivedSamples.WithLabelValues(profName, tenantID).Observe(float64(len(p.Sample)))
   545  	d.profileSizeStats.Record(float64(decompressedSize), profLanguage)
   546  	groups.CountReceivedBytes(profName, int64(decompressedSize))
   547  
   548  	validated, err := validation.ValidateProfile(d.limits, tenantID, p, decompressedSize, req.Labels, now)
   549  	if err != nil {
   550  		reason := string(validation.ReasonOf(err))
   551  		finalLog.addFields("reason", reason)
   552  		validation.DiscardedProfiles.WithLabelValues(reason, tenantID).Add(float64(req.TotalProfiles))
   553  		validation.DiscardedBytes.WithLabelValues(reason, tenantID).Add(float64(req.TotalBytesUncompressed))
   554  		groups.CountDiscardedBytes(reason, req.TotalBytesUncompressed)
   555  		return connect.NewError(connect.CodeInvalidArgument, err)
   556  	}
   557  
   558  	symbolsSize, samplesSize := profileSizeBytes(p.Profile)
   559  	d.metrics.receivedSamplesBytes.WithLabelValues(profName, tenantID).Observe(float64(samplesSize))
   560  	d.metrics.receivedSymbolsBytes.WithLabelValues(profName, tenantID).Observe(float64(symbolsSize))
   561  
   562  	// Normalisation is quite an expensive operation,
   563  	// therefore it should be done after the rate limit check.
   564  	if req.Language == "go" {
   565  		sp, _ := opentracing.StartSpanFromContext(ctx, "pprof.FixGoProfile")
   566  		req.Profile.Profile = pprof.FixGoProfile(req.Profile.Profile)
   567  		sp.Finish()
   568  	}
   569  	{
   570  		sp, _ := opentracing.StartSpanFromContext(ctx, "sampletype.Relabel")
   571  		sampleTypeRules := d.limits.SampleTypeRelabelingRules(req.TenantID)
   572  		sampletype.Relabel(validated, sampleTypeRules, req.Labels)
   573  		sp.Finish()
   574  	}
   575  	{
   576  		sp, _ := opentracing.StartSpanFromContext(ctx, "Profile.Normalize")
   577  		req.Profile.Normalize()
   578  		sp.Finish()
   579  		d.metrics.observeProfileSize(tenantID, StageNormalized, calculateRequestSize(req))
   580  	}
   581  
   582  	if len(req.Profile.Sample) == 0 {
   583  		// TODO(kolesnikovae):
   584  		//   Normalization may cause all profiles and series to be empty.
   585  		//   We should report it as an error and account for discarded data.
   586  		//   The check should be done after ValidateProfile and normalization.
   587  		return nil
   588  	}
   589  
   590  	if err := injectMappingVersions(req); err != nil {
   591  		_ = level.Warn(logger).Log("msg", "failed to inject mapping versions", "err", err)
   592  	}
   593  
   594  	// Reduce cardinality of the session_id label.
   595  	maxSessionsPerSeries := d.limits.MaxSessionsPerSeries(req.TenantID)
   596  	req.Labels = d.limitMaxSessionsPerSeries(maxSessionsPerSeries, req.Labels)
   597  
   598  	aggregated, err := d.aggregate(ctx, req)
   599  	if err != nil {
   600  		return err
   601  	}
   602  	if aggregated {
   603  		return nil
   604  	}
   605  
   606  	// Write path router directs the request to the ingester or segment
   607  	// writer, or both, depending on the configuration.
   608  	// The router uses sendRequestsToSegmentWriter and sendRequestsToIngester
   609  	// functions to send the request to the appropriate service; these are
   610  	// called independently, and may be called concurrently: the request is
   611  	// cloned in this case – the callee may modify the request safely.
   612  	config := d.limits.WritePathOverrides(req.TenantID)
   613  	return d.router.Send(ctx, req, config)
   614  }
   615  
   616  func noNewProfilesReceivedError() *connect.Error {
   617  	return connect.NewError(connect.CodeInvalidArgument, fmt.Errorf("no profiles received"))
   618  }
   619  
   620  // If aggregation is configured for the tenant, we try to determine
   621  // whether the profile is eligible for aggregation based on the series
   622  // profile rate, and handle it asynchronously, if this is the case.
   623  //
   624  // NOTE(kolesnikovae): aggregated profiles are handled on best-effort
   625  // basis (at-most-once delivery semantics): any error occurred will
   626  // not be returned to the client, and it must not retry sending.
   627  //
   628  // Aggregation is only meant to be used for cases, when clients do not
   629  // form individual series (e.g., server-less workload), and typically
   630  // are ephemeral in its nature, and therefore retrying is not possible
   631  // or desirable, as it prolongs life-time duration of the clients.
   632  func (d *Distributor) aggregate(ctx context.Context, req *distributormodel.ProfileSeries) (bool, error) {
   633  	a, ok := d.aggregator.AggregatorForTenant(req.TenantID)
   634  	if !ok {
   635  		// Aggregation is not configured for the tenant.
   636  		return false, nil
   637  	}
   638  
   639  	series := req
   640  
   641  	// First, we drop __session_id__ label to increase probability
   642  	// of aggregation, which is handled done per series.
   643  	profile := series.Profile.Profile
   644  	labels := phlaremodel.Labels(series.Labels)
   645  	if _, hasSessionID := labels.GetLabel(phlaremodel.LabelNameSessionID); hasSessionID {
   646  		labels = labels.Clone().Delete(phlaremodel.LabelNameSessionID)
   647  	}
   648  	r, ok, err := a.Aggregate(labels.Hash(), profile.TimeNanos, mergeProfile(profile))
   649  	if err != nil {
   650  		return false, connect.NewError(connect.CodeInvalidArgument, err)
   651  	}
   652  	if !ok {
   653  		// Aggregation is not needed.
   654  		return false, nil
   655  	}
   656  	handler := r.Handler()
   657  	if handler == nil {
   658  		// Aggregation is handled in another goroutine.
   659  		return true, nil
   660  	}
   661  
   662  	// Aggregation is needed, and we own the result handler.
   663  	// Note that the labels include the source series labels with
   664  	// session ID: this is required to ensure fair load distribution.
   665  	d.asyncRequests.Add(1)
   666  	labels = phlaremodel.Labels(req.Labels).Clone()
   667  	annotations := req.Annotations
   668  	go func() {
   669  		defer d.asyncRequests.Done()
   670  		sendErr := util.RecoverPanic(func() error {
   671  			localCtx, cancel := context.WithTimeout(context.Background(), d.cfg.PushTimeout)
   672  			defer cancel()
   673  			localCtx = tenant.InjectTenantID(localCtx, req.TenantID)
   674  			if sp := opentracing.SpanFromContext(ctx); sp != nil {
   675  				localCtx = opentracing.ContextWithSpan(localCtx, sp)
   676  			}
   677  			// Obtain the aggregated profile.
   678  			p, handleErr := handler()
   679  			if handleErr != nil {
   680  				return handleErr
   681  			}
   682  			aggregated := &distributormodel.ProfileSeries{
   683  				TenantID:    req.TenantID,
   684  				Labels:      labels,
   685  				Profile:     pprof.RawFromProto(p.Profile()),
   686  				Annotations: annotations,
   687  			}
   688  			config := d.limits.WritePathOverrides(req.TenantID)
   689  			return d.router.Send(localCtx, aggregated, config)
   690  		})()
   691  		if sendErr != nil {
   692  			_ = level.Error(d.logger).Log("msg", "failed to handle aggregation", "tenant", req.TenantID, "err", err)
   693  		}
   694  	}()
   695  
   696  	return true, nil
   697  }
   698  
   699  // visitSampleSeriesForIngester creates a profile per unique label set in pprof labels.
   700  func visitSampleSeriesForIngester(profile *profilev1.Profile, labels []*typesv1.LabelPair, rules []*relabel.Config, visitor *sampleSeriesVisitor) error {
   701  	return pprofsplit.VisitSampleSeries(profile, labels, rules, visitor)
   702  }
   703  
   704  func (d *Distributor) sendRequestsToIngester(ctx context.Context, req *distributormodel.ProfileSeries) (resp *connect.Response[pushv1.PushResponse], err error) {
   705  	sampleSeries, err := d.visitSampleSeries(req, visitSampleSeriesForIngester)
   706  	if err != nil {
   707  		return nil, err
   708  	}
   709  	if len(sampleSeries) == 0 {
   710  		return connect.NewResponse(&pushv1.PushResponse{}), nil
   711  	}
   712  
   713  	enforceLabelOrder := d.limits.EnforceLabelsOrder(req.TenantID)
   714  	keys := make([]uint32, len(sampleSeries))
   715  	for i, s := range sampleSeries {
   716  		if enforceLabelOrder {
   717  			s.Labels = phlaremodel.Labels(s.Labels).InsertSorted(phlaremodel.LabelNameOrder, phlaremodel.LabelOrderEnforced)
   718  		}
   719  		keys[i] = TokenFor(req.TenantID, phlaremodel.LabelPairsString(s.Labels))
   720  	}
   721  
   722  	profiles := make([]*profileTracker, 0, len(sampleSeries))
   723  	for _, series := range sampleSeries {
   724  		p := series.Profile
   725  		// zip the data back into the buffer
   726  		bw := bytes.NewBuffer(series.RawProfile[:0])
   727  		if _, err = p.WriteTo(bw); err != nil {
   728  			return nil, err
   729  		}
   730  		series.ID = uuid.NewString()
   731  		series.RawProfile = bw.Bytes()
   732  		profiles = append(profiles, &profileTracker{profile: series})
   733  	}
   734  
   735  	const maxExpectedReplicationSet = 5 // typical replication factor 3 plus one for inactive plus one for luck
   736  	var descs [maxExpectedReplicationSet]ring.InstanceDesc
   737  
   738  	samplesByIngester := map[string][]*profileTracker{}
   739  	ingesterDescs := map[string]ring.InstanceDesc{}
   740  	for i, key := range keys {
   741  		// Get a subring if tenant has shuffle shard size configured.
   742  		subRing := d.ingestersRing.ShuffleShard(req.TenantID, d.limits.IngestionTenantShardSize(req.TenantID))
   743  
   744  		replicationSet, err := subRing.Get(key, ring.Write, descs[:0], nil, nil)
   745  		if err != nil {
   746  			return nil, err
   747  		}
   748  		profiles[i].minSuccess = len(replicationSet.Instances) - replicationSet.MaxErrors
   749  		profiles[i].maxFailures = replicationSet.MaxErrors
   750  		for _, ingester := range replicationSet.Instances {
   751  			samplesByIngester[ingester.Addr] = append(samplesByIngester[ingester.Addr], profiles[i])
   752  			ingesterDescs[ingester.Addr] = ingester
   753  		}
   754  	}
   755  	tracker := pushTracker{
   756  		done: make(chan struct{}, 1), // buffer avoids blocking if caller terminates - sendProfiles() only sends once on each
   757  		err:  make(chan error, 1),
   758  	}
   759  	tracker.samplesPending.Store(int32(len(profiles)))
   760  	for ingester, samples := range samplesByIngester {
   761  		go func(ingester ring.InstanceDesc, samples []*profileTracker) {
   762  			// Use a background context to make sure all ingesters get samples even if we return early
   763  			localCtx, cancel := context.WithTimeout(context.Background(), d.cfg.PushTimeout)
   764  			defer cancel()
   765  			localCtx = tenant.InjectTenantID(localCtx, req.TenantID)
   766  			if sp := opentracing.SpanFromContext(ctx); sp != nil {
   767  				localCtx = opentracing.ContextWithSpan(localCtx, sp)
   768  			}
   769  			d.sendProfiles(localCtx, ingester, samples, &tracker)
   770  		}(ingesterDescs[ingester], samples)
   771  	}
   772  	select {
   773  	case err = <-tracker.err:
   774  		return nil, err
   775  	case <-tracker.done:
   776  		return connect.NewResponse(&pushv1.PushResponse{}), nil
   777  	case <-ctx.Done():
   778  		return nil, ctx.Err()
   779  	}
   780  }
   781  
   782  // visitSampleSeriesForSegmentWriter creates a profile per service.
   783  // Labels that are shared by all pprof samples are used as series labels.
   784  // Unique sample labels (not present in series labels) are preserved:
   785  // pprof split takes place in segment-writers.
   786  func visitSampleSeriesForSegmentWriter(profile *profilev1.Profile, labels []*typesv1.LabelPair, rules []*relabel.Config, visitor *sampleSeriesVisitor) error {
   787  	return pprofsplit.VisitSampleSeriesBy(profile, labels, rules, visitor, phlaremodel.LabelNameServiceName)
   788  }
   789  
   790  func (d *Distributor) sendRequestsToSegmentWriter(ctx context.Context, req *distributormodel.ProfileSeries) (*connect.Response[pushv1.PushResponse], error) {
   791  	// NOTE(kolesnikovae): if we return early, e.g., due to a validation error,
   792  	//   or if there are no series, the write path router has already seen the
   793  	//   request, and could have already accounted for the size, latency, etc.
   794  	serviceSeries, err := d.visitSampleSeries(req, visitSampleSeriesForSegmentWriter)
   795  	if err != nil {
   796  		return nil, err
   797  	}
   798  	if len(serviceSeries) == 0 {
   799  		return connect.NewResponse(&pushv1.PushResponse{}), nil
   800  	}
   801  
   802  	// TODO(kolesnikovae): Add profiles per request histogram.
   803  	// In most cases, we only have a single profile. We should avoid
   804  	// batching multiple profiles into a single request: overhead of handling
   805  	// multiple profiles in a single request is substantial: we need to
   806  	// allocate memory for all profiles at once, and wait for multiple requests
   807  	// routed to different shards to complete is generally a bad idea because
   808  	// it's hard to reason about latencies, retries, and error handling.
   809  	config := d.limits.WritePathOverrides(req.TenantID)
   810  	requests := make([]*segmentwriterv1.PushRequest, 0, len(serviceSeries)*2)
   811  	for _, s := range serviceSeries {
   812  		buf, err := pprof.Marshal(s.Profile.Profile, config.Compression == writepath.CompressionGzip)
   813  		if err != nil {
   814  			panic(fmt.Sprintf("failed to marshal profile: %v", err))
   815  		}
   816  		// Ideally, the ID should identify the whole request, and be
   817  		// deterministic (e.g, based on the request hash). In practice,
   818  		// the API allows batches, which makes it difficult to handle.
   819  		profileID := uuid.New()
   820  		requests = append(requests, &segmentwriterv1.PushRequest{
   821  			TenantId:    req.TenantID,
   822  			Labels:      s.Labels,
   823  			Profile:     buf,
   824  			ProfileId:   profileID[:],
   825  			Annotations: s.Annotations,
   826  		})
   827  	}
   828  
   829  	if len(requests) == 1 {
   830  		if _, err := d.segmentWriter.Push(ctx, requests[0]); err != nil {
   831  			return nil, err
   832  		}
   833  		return connect.NewResponse(&pushv1.PushResponse{}), nil
   834  	}
   835  
   836  	// Fallback. We should minimize probability of this branch.
   837  	g, ctx := errgroup.WithContext(ctx)
   838  	for _, r := range requests {
   839  		r := r
   840  		g.Go(func() error {
   841  			_, pushErr := d.segmentWriter.Push(ctx, r)
   842  			return pushErr
   843  		})
   844  	}
   845  	if err := g.Wait(); err != nil {
   846  		return nil, err
   847  	}
   848  
   849  	return connect.NewResponse(&pushv1.PushResponse{}), nil
   850  }
   851  
   852  // profileSizeBytes returns the size of symbols and samples in bytes.
   853  func profileSizeBytes(p *profilev1.Profile) (symbols, samples int64) {
   854  	fullSize := p.SizeVT()
   855  	// remove samples
   856  	samplesSlice := p.Sample
   857  	p.Sample = nil
   858  
   859  	symbols = int64(p.SizeVT())
   860  	samples = int64(fullSize) - symbols
   861  
   862  	// count labels in samples
   863  	samplesLabels := 0
   864  	for _, s := range samplesSlice {
   865  		for _, l := range s.Label {
   866  			samplesLabels += len(p.StringTable[l.Key]) + len(p.StringTable[l.Str]) + len(p.StringTable[l.NumUnit])
   867  		}
   868  	}
   869  	symbols -= int64(samplesLabels)
   870  	samples += int64(samplesLabels)
   871  
   872  	// restore samples
   873  	p.Sample = samplesSlice
   874  	return
   875  }
   876  
   877  func mergeProfile(profile *profilev1.Profile) aggregator.AggregateFn[*pprof.ProfileMerge] {
   878  	return func(m *pprof.ProfileMerge) (*pprof.ProfileMerge, error) {
   879  		if m == nil {
   880  			m = new(pprof.ProfileMerge)
   881  		}
   882  		if err := m.Merge(profile, true); err != nil {
   883  			return nil, connect.NewError(connect.CodeInvalidArgument, err)
   884  		}
   885  		return m, nil
   886  	}
   887  }
   888  
   889  func (d *Distributor) sendProfiles(ctx context.Context, ingester ring.InstanceDesc, profileTrackers []*profileTracker, pushTracker *pushTracker) {
   890  	err := d.sendProfilesErr(ctx, ingester, profileTrackers)
   891  	// If we succeed, decrement each sample's pending count by one.  If we reach
   892  	// the required number of successful puts on this sample, then decrement the
   893  	// number of pending samples by one.  If we successfully push all samples to
   894  	// min success ingesters, wake up the waiting rpc so it can return early.
   895  	// Similarly, track the number of errors, and if it exceeds maxFailures
   896  	// shortcut the waiting rpc.
   897  	//
   898  	// The use of atomic increments here guarantees only a single sendSamples
   899  	// goroutine will write to either channel.
   900  	for i := range profileTrackers {
   901  		if err != nil {
   902  			if profileTrackers[i].failed.Inc() <= int32(profileTrackers[i].maxFailures) {
   903  				continue
   904  			}
   905  			if pushTracker.samplesFailed.Inc() == 1 {
   906  				pushTracker.err <- err
   907  			}
   908  		} else {
   909  			if profileTrackers[i].succeeded.Inc() != int32(profileTrackers[i].minSuccess) {
   910  				continue
   911  			}
   912  			if pushTracker.samplesPending.Dec() == 0 {
   913  				pushTracker.done <- struct{}{}
   914  			}
   915  		}
   916  	}
   917  }
   918  
   919  func (d *Distributor) sendProfilesErr(ctx context.Context, ingester ring.InstanceDesc, profileTrackers []*profileTracker) error {
   920  	c, err := d.pool.GetClientFor(ingester.Addr)
   921  	if err != nil {
   922  		return err
   923  	}
   924  
   925  	req := connect.NewRequest(&pushv1.PushRequest{
   926  		Series: make([]*pushv1.RawProfileSeries, 0, len(profileTrackers)),
   927  	})
   928  
   929  	for _, p := range profileTrackers {
   930  		series := &pushv1.RawProfileSeries{
   931  			Labels: p.profile.Labels,
   932  			Samples: []*pushv1.RawSample{{
   933  				RawProfile: p.profile.RawProfile,
   934  				ID:         p.profile.ID,
   935  			}},
   936  			Annotations: p.profile.Annotations,
   937  		}
   938  
   939  		req.Msg.Series = append(req.Msg.Series, series)
   940  	}
   941  
   942  	_, err = c.(PushClient).Push(ctx, req)
   943  	return err
   944  }
   945  
   946  func (d *Distributor) ServeHTTP(w http.ResponseWriter, req *http.Request) {
   947  	if d.distributorsRing != nil {
   948  		d.distributorsRing.ServeHTTP(w, req)
   949  	} else {
   950  		ringNotEnabledPage := `
   951  			<!DOCTYPE html>
   952  			<html>
   953  				<head>
   954  					<meta charset="UTF-8">
   955  					<title>Distributor Status</title>
   956  				</head>
   957  				<body>
   958  					<h1>Distributor Status</h1>
   959  					<p>Distributor is not running with global limits enabled</p>
   960  				</body>
   961  			</html>`
   962  		util.WriteHTMLResponse(w, ringNotEnabledPage)
   963  	}
   964  }
   965  
   966  // HealthyInstancesCount implements the ReadLifecycler interface
   967  //
   968  // We use a ring lifecycler delegate to count the number of members of the
   969  // ring. The count is then used to enforce rate limiting correctly for each
   970  // distributor. $EFFECTIVE_RATE_LIMIT = $GLOBAL_RATE_LIMIT / $NUM_INSTANCES
   971  func (d *Distributor) HealthyInstancesCount() int {
   972  	return int(d.healthyInstancesCount.Load())
   973  }
   974  
   975  func (d *Distributor) limitMaxSessionsPerSeries(maxSessionsPerSeries int, labels phlaremodel.Labels) phlaremodel.Labels {
   976  	if maxSessionsPerSeries == 0 {
   977  		return labels.Delete(phlaremodel.LabelNameSessionID)
   978  	}
   979  	sessionIDLabel, ok := labels.GetLabel(phlaremodel.LabelNameSessionID)
   980  	if !ok {
   981  		return labels
   982  	}
   983  	sessionID, err := phlaremodel.ParseSessionID(sessionIDLabel.Value)
   984  	if err != nil {
   985  		_ = level.Debug(d.logger).Log("msg", "invalid session_id", "err", err)
   986  		return labels.Delete(phlaremodel.LabelNameSessionID)
   987  	}
   988  	sessionIDLabel.Value = phlaremodel.SessionID(int(sessionID) % maxSessionsPerSeries).String()
   989  	return labels
   990  }
   991  
   992  func (d *Distributor) rateLimit(tenantID string, req *distributormodel.ProfileSeries) error {
   993  	if !d.ingestionRateLimiter.AllowN(time.Now(), tenantID, int(req.TotalBytesUncompressed)) {
   994  		validation.DiscardedProfiles.WithLabelValues(string(validation.RateLimited), tenantID).Add(float64(req.TotalProfiles))
   995  		validation.DiscardedBytes.WithLabelValues(string(validation.RateLimited), tenantID).Add(float64(req.TotalBytesUncompressed))
   996  		return connect.NewError(connect.CodeResourceExhausted,
   997  			fmt.Errorf("push rate limit (%s) exceeded while adding %s", humanize.IBytes(uint64(d.limits.IngestionRateBytes(tenantID))), humanize.IBytes(uint64(req.TotalBytesUncompressed))),
   998  		)
   999  	}
  1000  	return nil
  1001  }
  1002  
  1003  func calculateRequestSize(req *distributormodel.ProfileSeries) int64 {
  1004  	// include the labels in the size calculation
  1005  	bs := int64(0)
  1006  	for _, lbs := range req.Labels {
  1007  		bs += int64(len(lbs.Name))
  1008  		bs += int64(len(lbs.Value))
  1009  	}
  1010  
  1011  	bs += int64(req.Profile.SizeVT())
  1012  	return bs
  1013  }
  1014  
  1015  func (d *Distributor) checkIngestLimit(req *distributormodel.ProfileSeries) error {
  1016  	l := d.limits.IngestionLimit(req.TenantID)
  1017  	if l == nil {
  1018  		return nil
  1019  	}
  1020  
  1021  	if l.LimitReached {
  1022  		// we want to allow a very small portion of the traffic after reaching the limit
  1023  		if d.ingestionLimitsSampler.AllowRequest(req.TenantID, l.Sampling) {
  1024  			if err := req.MarkThrottledTenant(l); err != nil {
  1025  				return err
  1026  			}
  1027  			return nil
  1028  		}
  1029  		limitResetTime := time.Unix(l.LimitResetTime, 0).UTC().Format(time.RFC3339)
  1030  		return connect.NewError(connect.CodeResourceExhausted,
  1031  			fmt.Errorf("limit of %s/%s reached, next reset at %s", humanize.IBytes(uint64(l.PeriodLimitMb*1024*1024)), l.PeriodType, limitResetTime))
  1032  	}
  1033  
  1034  	return nil
  1035  }
  1036  
  1037  func (d *Distributor) checkUsageGroupsIngestLimit(req *distributormodel.ProfileSeries, groupsInRequest []validation.UsageGroupMatchName) error {
  1038  	l := d.limits.IngestionLimit(req.TenantID)
  1039  	if l == nil || len(l.UsageGroups) == 0 {
  1040  		return nil
  1041  	}
  1042  
  1043  	for _, group := range groupsInRequest {
  1044  		limit, ok := l.UsageGroups[group.ResolvedName]
  1045  		if !ok {
  1046  			limit, ok = l.UsageGroups[group.ConfiguredName]
  1047  		}
  1048  		if !ok || !limit.LimitReached {
  1049  			continue
  1050  		}
  1051  		if d.ingestionLimitsSampler.AllowRequest(req.TenantID, l.Sampling) {
  1052  			if err := req.MarkThrottledUsageGroup(l, group.ResolvedName); err != nil {
  1053  				return err
  1054  			}
  1055  			return nil
  1056  		}
  1057  		limitResetTime := time.Unix(l.LimitResetTime, 0).UTC().Format(time.RFC3339)
  1058  		return connect.NewError(connect.CodeResourceExhausted,
  1059  			fmt.Errorf("limit of %s/%s reached for usage group %s, next reset at %s", humanize.IBytes(uint64(limit.PeriodLimitMb*1024*1024)), l.PeriodType, group, limitResetTime))
  1060  	}
  1061  
  1062  	return nil
  1063  }
  1064  
  1065  // shouldSample returns true if the profile should be injected and optionally the usage group that was responsible for the decision.
  1066  func (d *Distributor) shouldSample(tenantID string, groupsInRequest []validation.UsageGroupMatchName) (bool, *sampling.Source) {
  1067  	l := d.limits.DistributorSampling(tenantID)
  1068  	if l == nil {
  1069  		return true, nil
  1070  	}
  1071  
  1072  	samplingProbability := 1.0
  1073  	var match *validation.UsageGroupMatchName
  1074  	for _, group := range groupsInRequest {
  1075  		probabilityCfg, found := l.UsageGroups[group.ResolvedName]
  1076  		if !found {
  1077  			probabilityCfg, found = l.UsageGroups[group.ConfiguredName]
  1078  		}
  1079  		if !found {
  1080  			continue
  1081  		}
  1082  		// a less specific group loses to a more specific one
  1083  		if match != nil && match.IsMoreSpecificThan(&group) {
  1084  			continue
  1085  		}
  1086  		// lower probability wins; when tied, the more specific group wins
  1087  		if probabilityCfg.Probability <= samplingProbability {
  1088  			samplingProbability = probabilityCfg.Probability
  1089  			match = &group
  1090  		}
  1091  	}
  1092  
  1093  	if match == nil {
  1094  		return true, nil
  1095  	}
  1096  
  1097  	source := &sampling.Source{
  1098  		UsageGroup:  match.ResolvedName,
  1099  		Probability: samplingProbability,
  1100  	}
  1101  
  1102  	return rand.Float64() <= samplingProbability, source
  1103  }
  1104  
  1105  type profileTracker struct {
  1106  	profile     *distributormodel.ProfileSeries
  1107  	minSuccess  int
  1108  	maxFailures int
  1109  	succeeded   atomic.Int32
  1110  	failed      atomic.Int32
  1111  }
  1112  
  1113  type pushTracker struct {
  1114  	samplesPending atomic.Int32
  1115  	samplesFailed  atomic.Int32
  1116  	done           chan struct{}
  1117  	err            chan error
  1118  }
  1119  
  1120  // TokenFor generates a token used for finding ingesters from ring
  1121  func TokenFor(tenantID, labels string) uint32 {
  1122  	h := fnv.New32()
  1123  	_, _ = h.Write([]byte(tenantID))
  1124  	_, _ = h.Write([]byte(labels))
  1125  	return h.Sum32()
  1126  }
  1127  
  1128  // newRingAndLifecycler creates a new distributor ring and lifecycler with all required lifecycler delegates
  1129  func newRingAndLifecycler(cfg util.CommonRingConfig, instanceCount *atomic.Uint32, logger log.Logger, reg prometheus.Registerer) (*ring.Ring, *ring.BasicLifecycler, error) {
  1130  	reg = prometheus.WrapRegistererWithPrefix("pyroscope_", reg)
  1131  	kvStore, err := kv.NewClient(cfg.KVStore, ring.GetCodec(), kv.RegistererWithKVName(reg, "distributor-lifecycler"), logger)
  1132  	if err != nil {
  1133  		return nil, nil, errors.Wrap(err, "failed to initialize distributors' KV store")
  1134  	}
  1135  
  1136  	lifecyclerCfg, err := toBasicLifecyclerConfig(cfg, logger)
  1137  	if err != nil {
  1138  		return nil, nil, errors.Wrap(err, "failed to build distributors' lifecycler config")
  1139  	}
  1140  
  1141  	var delegate ring.BasicLifecyclerDelegate
  1142  	delegate = ring.NewInstanceRegisterDelegate(ring.ACTIVE, lifecyclerCfg.NumTokens)
  1143  	delegate = newHealthyInstanceDelegate(instanceCount, cfg.HeartbeatTimeout, delegate)
  1144  	delegate = ring.NewLeaveOnStoppingDelegate(delegate, logger)
  1145  	delegate = ring.NewAutoForgetDelegate(ringAutoForgetUnhealthyPeriods*cfg.HeartbeatTimeout, delegate, logger)
  1146  
  1147  	distributorsLifecycler, err := ring.NewBasicLifecycler(lifecyclerCfg, "distributor", distributorRingKey, kvStore, delegate, logger, reg)
  1148  	if err != nil {
  1149  		return nil, nil, errors.Wrap(err, "failed to initialize distributors' lifecycler")
  1150  	}
  1151  
  1152  	distributorsRing, err := ring.New(cfg.ToRingConfig(), "distributor", distributorRingKey, logger, reg)
  1153  	if err != nil {
  1154  		return nil, nil, errors.Wrap(err, "failed to initialize distributors' ring client")
  1155  	}
  1156  
  1157  	return distributorsRing, distributorsLifecycler, nil
  1158  }
  1159  
  1160  // injectMappingVersions extract from the labels the mapping version and inject it into the profile's main mapping. (mapping[0])
  1161  func injectMappingVersions(s *distributormodel.ProfileSeries) error {
  1162  	version, ok := phlaremodel.ServiceVersionFromLabels(s.Labels)
  1163  	if !ok {
  1164  		return nil
  1165  	}
  1166  	for _, m := range s.Profile.Mapping {
  1167  		version.BuildID = s.Profile.StringTable[m.BuildId]
  1168  		versionString, err := json.Marshal(version)
  1169  		if err != nil {
  1170  			return err
  1171  		}
  1172  		s.Profile.StringTable = append(s.Profile.StringTable, string(versionString))
  1173  		m.BuildId = int64(len(s.Profile.StringTable) - 1)
  1174  	}
  1175  	return nil
  1176  }
  1177  
  1178  type visitFunc func(*profilev1.Profile, []*typesv1.LabelPair, []*relabel.Config, *sampleSeriesVisitor) error
  1179  
  1180  func (d *Distributor) visitSampleSeries(s *distributormodel.ProfileSeries, visit visitFunc) ([]*distributormodel.ProfileSeries, error) {
  1181  	relabelingRules := d.limits.IngestionRelabelingRules(s.TenantID)
  1182  	usageConfig := d.limits.DistributorUsageGroups(s.TenantID)
  1183  	var result []*distributormodel.ProfileSeries
  1184  	usageGroups := d.usageGroupEvaluator.GetMatch(s.TenantID, usageConfig, s.Labels)
  1185  	visitor := &sampleSeriesVisitor{
  1186  		tenantID: s.TenantID,
  1187  		limits:   d.limits,
  1188  		profile:  s.Profile,
  1189  		logger:   d.logger,
  1190  	}
  1191  	if err := visit(s.Profile.Profile, s.Labels, relabelingRules, visitor); err != nil {
  1192  		validation.DiscardedProfiles.WithLabelValues(string(validation.ReasonOf(err)), s.TenantID).Add(float64(s.TotalProfiles))
  1193  		validation.DiscardedBytes.WithLabelValues(string(validation.ReasonOf(err)), s.TenantID).Add(float64(s.TotalBytesUncompressed))
  1194  		usageGroups.CountDiscardedBytes(string(validation.ReasonOf(err)), s.TotalBytesUncompressed)
  1195  		return nil, connect.NewError(connect.CodeInvalidArgument, err)
  1196  	}
  1197  	for _, ss := range visitor.series {
  1198  		ss.Annotations = s.Annotations
  1199  		ss.Language = s.Language
  1200  		result = append(result, ss)
  1201  	}
  1202  	s.DiscardedProfilesRelabeling += int64(visitor.discardedProfiles)
  1203  	s.DiscardedBytesRelabeling += int64(visitor.discardedBytes)
  1204  	if visitor.discardedBytes > 0 {
  1205  		usageGroups.CountDiscardedBytes(string(validation.DroppedByRelabelRules), int64(visitor.discardedBytes))
  1206  	}
  1207  
  1208  	if s.DiscardedBytesRelabeling > 0 {
  1209  		validation.DiscardedBytes.WithLabelValues(string(validation.DroppedByRelabelRules), s.TenantID).Add(float64(s.DiscardedBytesRelabeling))
  1210  	}
  1211  	if s.DiscardedProfilesRelabeling > 0 {
  1212  		validation.DiscardedProfiles.WithLabelValues(string(validation.DroppedByRelabelRules), s.TenantID).Add(float64(s.DiscardedProfilesRelabeling))
  1213  	}
  1214  	// todo should we do normalization after relabeling?
  1215  	return result, nil
  1216  }
  1217  
  1218  type sampleSeriesVisitor struct {
  1219  	tenantID string
  1220  	limits   Limits
  1221  	profile  *pprof.Profile
  1222  	exp      *pprof.SampleExporter
  1223  	series   []*distributormodel.ProfileSeries
  1224  	logger   log.Logger
  1225  
  1226  	discardedBytes    int
  1227  	discardedProfiles int
  1228  }
  1229  
  1230  func (v *sampleSeriesVisitor) ValidateLabels(labels phlaremodel.Labels) (phlaremodel.Labels, error) {
  1231  	return validation.ValidateLabels(v.limits, v.tenantID, labels, v.logger)
  1232  }
  1233  
  1234  func (v *sampleSeriesVisitor) VisitProfile(labels phlaremodel.Labels) {
  1235  	v.series = append(v.series, &distributormodel.ProfileSeries{
  1236  		Profile: v.profile,
  1237  		Labels:  labels,
  1238  	})
  1239  }
  1240  
  1241  func (v *sampleSeriesVisitor) VisitSampleSeries(labels phlaremodel.Labels, samples []*profilev1.Sample) {
  1242  	if v.exp == nil {
  1243  		v.exp = pprof.NewSampleExporter(v.profile.Profile)
  1244  	}
  1245  	v.series = append(v.series, &distributormodel.ProfileSeries{
  1246  		Profile: exportSamples(v.exp, samples),
  1247  		Labels:  labels,
  1248  	})
  1249  }
  1250  
  1251  func (v *sampleSeriesVisitor) Discarded(profiles, bytes int) {
  1252  	v.discardedProfiles += profiles
  1253  	v.discardedBytes += bytes
  1254  }
  1255  
  1256  func exportSamples(e *pprof.SampleExporter, samples []*profilev1.Sample) *pprof.Profile {
  1257  	samplesCopy := make([]*profilev1.Sample, len(samples))
  1258  	copy(samplesCopy, samples)
  1259  	clear(samples)
  1260  	n := pprof.NewProfile()
  1261  	e.ExportSamples(n.Profile, samplesCopy)
  1262  	return n
  1263  }