github.com/grafana/pyroscope@v1.18.0/pkg/distributor/distributor.go (about) 1 package distributor 2 3 import ( 4 "bytes" 5 "context" 6 "encoding/json" 7 "expvar" 8 "flag" 9 "fmt" 10 "hash/fnv" 11 "math/rand" 12 "net/http" 13 "sort" 14 "sync" 15 "time" 16 17 "connectrpc.com/connect" 18 "go.uber.org/atomic" 19 20 "github.com/dustin/go-humanize" 21 "github.com/go-kit/log" 22 "github.com/go-kit/log/level" 23 "github.com/google/uuid" 24 "github.com/grafana/dskit/kv" 25 "github.com/grafana/dskit/limiter" 26 "github.com/grafana/dskit/multierror" 27 "github.com/grafana/dskit/ring" 28 ring_client "github.com/grafana/dskit/ring/client" 29 "github.com/grafana/dskit/services" 30 "github.com/opentracing/opentracing-go" 31 "github.com/opentracing/opentracing-go/ext" 32 "github.com/pkg/errors" 33 "github.com/prometheus/client_golang/prometheus" 34 "github.com/prometheus/client_golang/prometheus/promauto" 35 "github.com/prometheus/common/model" 36 "golang.org/x/sync/errgroup" 37 38 profilev1 "github.com/grafana/pyroscope/api/gen/proto/go/google/v1" 39 pushv1 "github.com/grafana/pyroscope/api/gen/proto/go/push/v1" 40 segmentwriterv1 "github.com/grafana/pyroscope/api/gen/proto/go/segmentwriter/v1" 41 typesv1 "github.com/grafana/pyroscope/api/gen/proto/go/types/v1" 42 connectapi "github.com/grafana/pyroscope/pkg/api/connect" 43 "github.com/grafana/pyroscope/pkg/clientpool" 44 "github.com/grafana/pyroscope/pkg/distributor/aggregator" 45 "github.com/grafana/pyroscope/pkg/distributor/ingestlimits" 46 distributormodel "github.com/grafana/pyroscope/pkg/distributor/model" 47 "github.com/grafana/pyroscope/pkg/distributor/sampling" 48 "github.com/grafana/pyroscope/pkg/distributor/writepath" 49 phlaremodel "github.com/grafana/pyroscope/pkg/model" 50 "github.com/grafana/pyroscope/pkg/model/pprofsplit" 51 "github.com/grafana/pyroscope/pkg/model/relabel" 52 "github.com/grafana/pyroscope/pkg/model/sampletype" 53 "github.com/grafana/pyroscope/pkg/pprof" 54 "github.com/grafana/pyroscope/pkg/tenant" 55 "github.com/grafana/pyroscope/pkg/usagestats" 56 "github.com/grafana/pyroscope/pkg/util" 57 "github.com/grafana/pyroscope/pkg/util/spanlogger" 58 "github.com/grafana/pyroscope/pkg/validation" 59 ) 60 61 type PushClient interface { 62 Push(context.Context, *connect.Request[pushv1.PushRequest]) (*connect.Response[pushv1.PushResponse], error) 63 } 64 65 const ( 66 // distributorRingKey is the key under which we store the distributors ring in the KVStore. 67 distributorRingKey = "distributor" 68 69 // ringAutoForgetUnhealthyPeriods is how many consecutive timeout periods an unhealthy instance 70 // in the ring will be automatically removed after. 71 ringAutoForgetUnhealthyPeriods = 10 72 73 ProfileName = "__name__" 74 ) 75 76 // Config for a Distributor. 77 type Config struct { 78 PushTimeout time.Duration 79 PoolConfig clientpool.PoolConfig `yaml:"pool_config,omitempty"` 80 81 // Distributors ring 82 DistributorRing util.CommonRingConfig `yaml:"ring"` 83 } 84 85 // RegisterFlags registers distributor-related flags. 86 func (cfg *Config) RegisterFlags(fs *flag.FlagSet, logger log.Logger) { 87 cfg.PoolConfig.RegisterFlagsWithPrefix("distributor", fs) 88 fs.DurationVar(&cfg.PushTimeout, "distributor.push.timeout", 5*time.Second, "Timeout when pushing data to ingester.") 89 cfg.DistributorRing.RegisterFlags("distributor.ring.", "collectors/", "distributors", fs, logger) 90 } 91 92 // Distributor coordinates replicates and distribution of log streams. 93 type Distributor struct { 94 services.Service 95 logger log.Logger 96 97 cfg Config 98 limits Limits 99 ingestersRing ring.ReadRing 100 pool *ring_client.Pool 101 102 // The global rate limiter requires a distributors ring to count 103 // the number of healthy instances 104 distributorsLifecycler *ring.BasicLifecycler 105 distributorsRing *ring.Ring 106 healthyInstancesCount *atomic.Uint32 107 ingestionRateLimiter *limiter.RateLimiter 108 aggregator *aggregator.MultiTenantAggregator[*pprof.ProfileMerge] 109 asyncRequests sync.WaitGroup 110 ingestionLimitsSampler *ingestlimits.Sampler 111 usageGroupEvaluator *validation.UsageGroupEvaluator 112 113 subservices *services.Manager 114 subservicesWatcher *services.FailureWatcher 115 116 // Metrics and stats. 117 metrics *metrics 118 rfStats *expvar.Int 119 bytesReceivedStats *usagestats.Statistics 120 bytesReceivedTotalStats *usagestats.Counter 121 profileReceivedStats *usagestats.MultiCounter 122 profileSizeStats *usagestats.MultiStatistics 123 124 router *writepath.Router 125 segmentWriter writepath.SegmentWriterClient 126 } 127 128 type Limits interface { 129 IngestionRateBytes(tenantID string) float64 130 IngestionBurstSizeBytes(tenantID string) int 131 IngestionLimit(tenantID string) *ingestlimits.Config 132 IngestionBodyLimitBytes(tenantID string) int64 133 DistributorSampling(tenantID string) *sampling.Config 134 IngestionTenantShardSize(tenantID string) int 135 MaxLabelNameLength(tenantID string) int 136 MaxLabelValueLength(tenantID string) int 137 MaxLabelNamesPerSeries(tenantID string) int 138 MaxProfileSizeBytes(tenantID string) int 139 MaxProfileStacktraceSamples(tenantID string) int 140 MaxProfileStacktraceSampleLabels(tenantID string) int 141 MaxProfileStacktraceDepth(tenantID string) int 142 MaxProfileSymbolValueLength(tenantID string) int 143 MaxSessionsPerSeries(tenantID string) int 144 EnforceLabelsOrder(tenantID string) bool 145 IngestionRelabelingRules(tenantID string) []*relabel.Config 146 SampleTypeRelabelingRules(tenantID string) []*relabel.Config 147 DistributorUsageGroups(tenantID string) *validation.UsageGroupConfig 148 WritePathOverrides(tenantID string) writepath.Config 149 validation.ProfileValidationLimits 150 aggregator.Limits 151 } 152 153 func New( 154 config Config, 155 ingesterRing ring.ReadRing, 156 ingesterClientFactory ring_client.PoolFactory, 157 limits Limits, 158 reg prometheus.Registerer, 159 logger log.Logger, 160 segmentWriter writepath.SegmentWriterClient, 161 ingesterClientsOptions ...connect.ClientOption, 162 ) (*Distributor, error) { 163 ingesterClientsOptions = append( 164 connectapi.DefaultClientOptions(), 165 ingesterClientsOptions..., 166 ) 167 168 clients := promauto.With(reg).NewGauge(prometheus.GaugeOpts{ 169 Namespace: "pyroscope", 170 Name: "distributor_ingester_clients", 171 Help: "The current number of ingester clients.", 172 }) 173 d := &Distributor{ 174 cfg: config, 175 logger: logger, 176 ingestersRing: ingesterRing, 177 pool: clientpool.NewIngesterPool(config.PoolConfig, ingesterRing, ingesterClientFactory, clients, logger, ingesterClientsOptions...), 178 segmentWriter: segmentWriter, 179 metrics: newMetrics(reg), 180 healthyInstancesCount: atomic.NewUint32(0), 181 aggregator: aggregator.NewMultiTenantAggregator[*pprof.ProfileMerge](limits, reg), 182 limits: limits, 183 rfStats: usagestats.NewInt("distributor_replication_factor"), 184 bytesReceivedStats: usagestats.NewStatistics("distributor_bytes_received"), 185 bytesReceivedTotalStats: usagestats.NewCounter("distributor_bytes_received_total"), 186 profileReceivedStats: usagestats.NewMultiCounter("distributor_profiles_received", "lang"), 187 profileSizeStats: usagestats.NewMultiStatistics("distributor_profile_sizes", "lang"), 188 } 189 190 ingesterRoute := writepath.IngesterFunc(d.sendRequestsToIngester) 191 segmentWriterRoute := writepath.IngesterFunc(d.sendRequestsToSegmentWriter) 192 d.router = writepath.NewRouter(logger, reg, ingesterRoute, segmentWriterRoute) 193 194 var err error 195 subservices := []services.Service(nil) 196 subservices = append(subservices, d.pool) 197 198 distributorsRing, distributorsLifecycler, err := newRingAndLifecycler(config.DistributorRing, d.healthyInstancesCount, logger, reg) 199 if err != nil { 200 return nil, err 201 } 202 203 d.ingestionLimitsSampler = ingestlimits.NewSampler(distributorsRing) 204 d.usageGroupEvaluator = validation.NewUsageGroupEvaluator(logger) 205 206 subservices = append(subservices, distributorsLifecycler, distributorsRing, d.aggregator, d.ingestionLimitsSampler) 207 208 d.ingestionRateLimiter = limiter.NewRateLimiter(newGlobalRateStrategy(newIngestionRateStrategy(limits), d), 10*time.Second) 209 d.distributorsLifecycler = distributorsLifecycler 210 d.distributorsRing = distributorsRing 211 212 d.subservices, err = services.NewManager(subservices...) 213 if err != nil { 214 return nil, errors.Wrap(err, "services manager") 215 } 216 d.subservicesWatcher = services.NewFailureWatcher() 217 d.subservicesWatcher.WatchManager(d.subservices) 218 219 d.Service = services.NewBasicService(d.starting, d.running, d.stopping) 220 d.rfStats.Set(int64(ingesterRing.ReplicationFactor())) 221 d.metrics.replicationFactor.Set(float64(ingesterRing.ReplicationFactor())) 222 return d, nil 223 } 224 225 func (d *Distributor) starting(ctx context.Context) error { 226 return services.StartManagerAndAwaitHealthy(ctx, d.subservices) 227 } 228 229 func (d *Distributor) running(ctx context.Context) error { 230 select { 231 case <-ctx.Done(): 232 return nil 233 case err := <-d.subservicesWatcher.Chan(): 234 return errors.Wrap(err, "distributor subservice failed") 235 } 236 } 237 238 func (d *Distributor) stopping(_ error) error { 239 d.asyncRequests.Wait() 240 return services.StopManagerAndAwaitStopped(context.Background(), d.subservices) 241 } 242 243 func isKnownConnectError(err error) bool { 244 ce := new(connect.Error) 245 if !errors.As(err, &ce) { 246 return false 247 } 248 return ce.Code() != connect.CodeUnknown 249 } 250 251 func isKnownValidationError(err error) bool { 252 return validation.ReasonOf(err) != validation.Unknown 253 } 254 255 func (d *Distributor) Push(ctx context.Context, grpcReq *connect.Request[pushv1.PushRequest]) (_ *connect.Response[pushv1.PushResponse], err error) { 256 sp, ctx := opentracing.StartSpanFromContext(ctx, "Distributor.Push") 257 defer sp.Finish() 258 259 tenantID, err := tenant.ExtractTenantIDFromContext(ctx) 260 if err != nil { 261 return nil, connect.NewError(connect.CodeUnauthenticated, err) 262 } 263 264 defer func() { 265 if err == nil { 266 return 267 } 268 269 // log error 270 ext.LogError(sp, err) 271 level.Debug(util.LoggerWithContext(ctx, d.logger)).Log("msg", "failed to validate profile", "err", err) 272 273 // wrap the errors with InvalidArgument code for profile validation errors, so they return 400 274 if !isKnownConnectError(err) && isKnownValidationError(err) { 275 err = connect.NewError(connect.CodeInvalidArgument, err) 276 } 277 }() 278 279 maxProfileSizeBytes := int64(d.limits.MaxProfileSizeBytes(tenantID)) 280 maxRequestSizeBytes := d.limits.IngestionBodyLimitBytes(tenantID) 281 requestSizeUsed := int64(0) 282 requestProfileCount := 0 283 284 req := &distributormodel.PushRequest{ 285 Series: make([]*distributormodel.ProfileSeries, 0, len(grpcReq.Msg.Series)), 286 RawProfileType: distributormodel.RawProfileTypePPROF, 287 } 288 allErrors := multierror.New() 289 for _, grpcSeries := range grpcReq.Msg.Series { 290 for _, grpcSample := range grpcSeries.Samples { 291 profile, err := pprof.RawFromBytesWithLimit(grpcSample.RawProfile, maxProfileSizeBytes) 292 if err != nil { 293 // check if decompression size has been exceeded 294 dsErr := new(pprof.ErrDecompressedSizeExceedsLimit) 295 if errors.As(err, &dsErr) { 296 validation.DiscardedBytes.WithLabelValues(string(validation.ProfileSizeLimit), tenantID).Add(float64(maxProfileSizeBytes)) 297 validation.DiscardedProfiles.WithLabelValues(string(validation.ProfileSizeLimit), tenantID).Add(float64(1)) 298 err = validation.NewErrorf(validation.ProfileSizeLimit, "uncompressed profile payload size exceeds limit of %s", humanize.Bytes(uint64(maxProfileSizeBytes))) 299 } 300 allErrors.Add(err) 301 continue 302 } 303 requestSizeUsed += int64(profile.RawSize()) 304 requestProfileCount += 1 305 if maxRequestSizeBytes > 0 && requestSizeUsed > maxRequestSizeBytes { 306 validation.DiscardedBytes.WithLabelValues(string(validation.BodySizeLimit), tenantID).Add(float64(requestSizeUsed)) 307 validation.DiscardedProfiles.WithLabelValues(string(validation.BodySizeLimit), tenantID).Add(float64(requestProfileCount)) 308 return nil, validation.NewErrorf(validation.BodySizeLimit, "uncompressed batched profile payload size exceeds limit of %s", humanize.Bytes(uint64(maxRequestSizeBytes))) 309 } 310 series := &distributormodel.ProfileSeries{ 311 Labels: grpcSeries.Labels, 312 Profile: profile, 313 RawProfile: grpcSample.RawProfile, 314 ID: grpcSample.ID, 315 } 316 req.Series = append(req.Series, series) 317 } 318 } 319 // If we have validation errors and no valid profiles, return the validation errors 320 // instead of calling PushBatch which would return "no profiles received" 321 if len(req.Series) == 0 && allErrors.Err() != nil { 322 return nil, allErrors.Err() 323 } 324 if err := d.PushBatch(ctx, req); err != nil { 325 allErrors.Add(err) 326 } 327 err = allErrors.Err() 328 if err != nil { 329 return nil, err 330 } 331 return connect.NewResponse(new(pushv1.PushResponse)), err 332 } 333 334 func (d *Distributor) GetProfileLanguage(series *distributormodel.ProfileSeries) string { 335 if series.Language != "" { 336 return series.Language 337 } 338 lang := series.GetLanguage() 339 if lang == "" { 340 lang = pprof.GetLanguage(series.Profile) 341 } 342 series.Language = lang 343 return series.Language 344 } 345 346 func (d *Distributor) PushBatch(ctx context.Context, req *distributormodel.PushRequest) error { 347 sp, ctx := opentracing.StartSpanFromContext(ctx, "Distributor.PushBatch") 348 defer sp.Finish() 349 350 tenantID, err := tenant.ExtractTenantIDFromContext(ctx) 351 if err != nil { 352 return connect.NewError(connect.CodeUnauthenticated, err) 353 } 354 sp.SetTag("tenant_id", tenantID) 355 356 if len(req.Series) == 0 { 357 return noNewProfilesReceivedError() 358 } 359 360 d.bytesReceivedTotalStats.Inc(int64(req.ReceivedCompressedProfileSize)) 361 d.bytesReceivedStats.Record(float64(req.ReceivedCompressedProfileSize)) 362 if req.RawProfileType != distributormodel.RawProfileTypePPROF { 363 // if a single profile contains multiple profile types/names (e.g. jfr) then there is no such thing as 364 // compressed size per profile type as all profile types are compressed once together. So we can not count 365 // compressed bytes per profile type. Instead we count compressed bytes per profile. 366 profName := req.RawProfileType // use "jfr" as profile name 367 d.metrics.receivedCompressedBytes.WithLabelValues(string(profName), tenantID).Observe(float64(req.ReceivedCompressedProfileSize)) 368 } 369 370 res := multierror.New() 371 errorsMutex := new(sync.Mutex) 372 wg := new(sync.WaitGroup) 373 for index, s := range req.Series { 374 wg.Add(1) 375 go func() { 376 defer wg.Done() 377 itErr := util.RecoverPanic(func() error { 378 return d.pushSeries(ctx, s, req.RawProfileType, tenantID) 379 })() 380 381 if itErr != nil { 382 itErr = fmt.Errorf("push series with index %d and id %s failed: %w", index, s.ID, itErr) 383 } 384 errorsMutex.Lock() 385 res.Add(itErr) 386 errorsMutex.Unlock() 387 }() 388 } 389 wg.Wait() 390 return res.Err() 391 } 392 393 type lazyUsageGroups func() []validation.UsageGroupMatchName 394 395 func (l lazyUsageGroups) String() string { 396 groups := l() 397 result := make([]string, len(groups)) 398 for pos := range groups { 399 result[pos] = groups[pos].String() 400 } 401 return fmt.Sprintf("%v", result) 402 } 403 404 type pushLog struct { 405 fields []any 406 lvl func(log.Logger) log.Logger 407 msg string 408 } 409 410 func newPushLog(capacity int) *pushLog { 411 fields := make([]any, 2, (capacity+1)*2) 412 fields[0] = "msg" 413 return &pushLog{ 414 fields: fields, 415 } 416 } 417 418 func (p *pushLog) addFields(fields ...any) { 419 p.fields = append(p.fields, fields...) 420 } 421 422 func (p *pushLog) log(logger log.Logger, err error) { 423 // determine log level 424 if p.lvl == nil { 425 if err != nil { 426 p.lvl = level.Warn 427 } else { 428 p.lvl = level.Debug 429 } 430 } 431 432 if err != nil { 433 p.addFields("err", err) 434 } 435 436 // update message 437 if p.msg == "" { 438 if err != nil { 439 p.msg = "profile rejected" 440 } else { 441 p.msg = "profile accepted" 442 } 443 } 444 p.fields[1] = p.msg 445 p.lvl(logger).Log(p.fields...) 446 } 447 448 func (d *Distributor) pushSeries(ctx context.Context, req *distributormodel.ProfileSeries, origin distributormodel.RawProfileType, tenantID string) (err error) { 449 if req.Profile == nil { 450 return noNewProfilesReceivedError() 451 } 452 now := model.Now() 453 454 logger := spanlogger.FromContext(ctx, log.With(d.logger, "tenant", tenantID)) 455 finalLog := newPushLog(10) 456 defer func() { 457 finalLog.log(logger, err) 458 }() 459 460 req.TenantID = tenantID 461 serviceName := phlaremodel.Labels(req.Labels).Get(phlaremodel.LabelNameServiceName) 462 if serviceName == "" { 463 req.Labels = append(req.Labels, &typesv1.LabelPair{Name: phlaremodel.LabelNameServiceName, Value: phlaremodel.AttrServiceNameFallback}) 464 } else { 465 finalLog.addFields("service_name", serviceName) 466 } 467 sort.Sort(phlaremodel.Labels(req.Labels)) 468 469 if req.ID != "" { 470 finalLog.addFields("profile_id", req.ID) 471 } 472 473 req.TotalProfiles = 1 474 req.TotalBytesUncompressed = calculateRequestSize(req) 475 d.metrics.observeProfileSize(tenantID, StageReceived, req.TotalBytesUncompressed) 476 477 if err := d.checkIngestLimit(req); err != nil { 478 finalLog.msg = "rejecting profile due to global ingest limit" 479 finalLog.lvl = level.Debug 480 validation.DiscardedProfiles.WithLabelValues(string(validation.IngestLimitReached), tenantID).Add(float64(req.TotalProfiles)) 481 validation.DiscardedBytes.WithLabelValues(string(validation.IngestLimitReached), tenantID).Add(float64(req.TotalBytesUncompressed)) 482 return err 483 } 484 485 if err := d.rateLimit(tenantID, req); err != nil { 486 return err 487 } 488 489 usageGroups := d.limits.DistributorUsageGroups(tenantID) 490 491 profName := phlaremodel.Labels(req.Labels).Get(ProfileName) 492 finalLog.addFields("profile_type", profName) 493 494 groups := d.usageGroupEvaluator.GetMatch(tenantID, usageGroups, req.Labels) 495 finalLog.addFields("matched_usage_groups", lazyUsageGroups(groups.Names)) 496 if err := d.checkUsageGroupsIngestLimit(req, groups.Names()); err != nil { 497 finalLog.msg = "rejecting profile due to usage group ingest limit" 498 finalLog.lvl = level.Debug 499 validation.DiscardedProfiles.WithLabelValues(string(validation.IngestLimitReached), tenantID).Add(float64(req.TotalProfiles)) 500 validation.DiscardedBytes.WithLabelValues(string(validation.IngestLimitReached), tenantID).Add(float64(req.TotalBytesUncompressed)) 501 groups.CountDiscardedBytes(string(validation.IngestLimitReached), req.TotalBytesUncompressed) 502 return err 503 } 504 505 willSample, samplingSource := d.shouldSample(tenantID, groups.Names()) 506 if !willSample { 507 finalLog.addFields( 508 "usage_group", samplingSource.UsageGroup, 509 "probability", samplingSource.Probability, 510 ) 511 finalLog.msg = "skipping profile due to sampling" 512 validation.DiscardedProfiles.WithLabelValues(string(validation.SkippedBySamplingRules), tenantID).Add(float64(req.TotalProfiles)) 513 validation.DiscardedBytes.WithLabelValues(string(validation.SkippedBySamplingRules), tenantID).Add(float64(req.TotalBytesUncompressed)) 514 groups.CountDiscardedBytes(string(validation.SkippedBySamplingRules), req.TotalBytesUncompressed) 515 return nil 516 } 517 if samplingSource != nil { 518 if err := req.MarkSampledRequest(samplingSource); err != nil { 519 return err 520 } 521 } 522 523 profLanguage := d.GetProfileLanguage(req) 524 if profLanguage != "" { 525 finalLog.addFields("detected_language", profLanguage) 526 } 527 528 usagestats.NewCounter(fmt.Sprintf("distributor_profile_type_%s_received", profName)).Inc(1) 529 d.profileReceivedStats.Inc(1, profLanguage) 530 if origin == distributormodel.RawProfileTypePPROF { 531 d.metrics.receivedCompressedBytes.WithLabelValues(profName, tenantID).Observe(float64(len(req.RawProfile))) 532 } 533 p := req.Profile 534 decompressedSize := p.SizeVT() 535 profTime := model.TimeFromUnixNano(p.TimeNanos).Time() 536 finalLog.addFields( 537 "profile_time", profTime, 538 "ingestion_delay", now.Time().Sub(profTime), 539 "decompressed_size", decompressedSize, 540 "sample_count", len(p.Sample), 541 ) 542 d.metrics.observeProfileSize(tenantID, StageSampled, int64(decompressedSize)) //todo use req.TotalBytesUncompressed to include labels siz 543 d.metrics.receivedDecompressedBytes.WithLabelValues(profName, tenantID).Observe(float64(decompressedSize)) // deprecated TODO remove 544 d.metrics.receivedSamples.WithLabelValues(profName, tenantID).Observe(float64(len(p.Sample))) 545 d.profileSizeStats.Record(float64(decompressedSize), profLanguage) 546 groups.CountReceivedBytes(profName, int64(decompressedSize)) 547 548 validated, err := validation.ValidateProfile(d.limits, tenantID, p, decompressedSize, req.Labels, now) 549 if err != nil { 550 reason := string(validation.ReasonOf(err)) 551 finalLog.addFields("reason", reason) 552 validation.DiscardedProfiles.WithLabelValues(reason, tenantID).Add(float64(req.TotalProfiles)) 553 validation.DiscardedBytes.WithLabelValues(reason, tenantID).Add(float64(req.TotalBytesUncompressed)) 554 groups.CountDiscardedBytes(reason, req.TotalBytesUncompressed) 555 return connect.NewError(connect.CodeInvalidArgument, err) 556 } 557 558 symbolsSize, samplesSize := profileSizeBytes(p.Profile) 559 d.metrics.receivedSamplesBytes.WithLabelValues(profName, tenantID).Observe(float64(samplesSize)) 560 d.metrics.receivedSymbolsBytes.WithLabelValues(profName, tenantID).Observe(float64(symbolsSize)) 561 562 // Normalisation is quite an expensive operation, 563 // therefore it should be done after the rate limit check. 564 if req.Language == "go" { 565 sp, _ := opentracing.StartSpanFromContext(ctx, "pprof.FixGoProfile") 566 req.Profile.Profile = pprof.FixGoProfile(req.Profile.Profile) 567 sp.Finish() 568 } 569 { 570 sp, _ := opentracing.StartSpanFromContext(ctx, "sampletype.Relabel") 571 sampleTypeRules := d.limits.SampleTypeRelabelingRules(req.TenantID) 572 sampletype.Relabel(validated, sampleTypeRules, req.Labels) 573 sp.Finish() 574 } 575 { 576 sp, _ := opentracing.StartSpanFromContext(ctx, "Profile.Normalize") 577 req.Profile.Normalize() 578 sp.Finish() 579 d.metrics.observeProfileSize(tenantID, StageNormalized, calculateRequestSize(req)) 580 } 581 582 if len(req.Profile.Sample) == 0 { 583 // TODO(kolesnikovae): 584 // Normalization may cause all profiles and series to be empty. 585 // We should report it as an error and account for discarded data. 586 // The check should be done after ValidateProfile and normalization. 587 return nil 588 } 589 590 if err := injectMappingVersions(req); err != nil { 591 _ = level.Warn(logger).Log("msg", "failed to inject mapping versions", "err", err) 592 } 593 594 // Reduce cardinality of the session_id label. 595 maxSessionsPerSeries := d.limits.MaxSessionsPerSeries(req.TenantID) 596 req.Labels = d.limitMaxSessionsPerSeries(maxSessionsPerSeries, req.Labels) 597 598 aggregated, err := d.aggregate(ctx, req) 599 if err != nil { 600 return err 601 } 602 if aggregated { 603 return nil 604 } 605 606 // Write path router directs the request to the ingester or segment 607 // writer, or both, depending on the configuration. 608 // The router uses sendRequestsToSegmentWriter and sendRequestsToIngester 609 // functions to send the request to the appropriate service; these are 610 // called independently, and may be called concurrently: the request is 611 // cloned in this case – the callee may modify the request safely. 612 config := d.limits.WritePathOverrides(req.TenantID) 613 return d.router.Send(ctx, req, config) 614 } 615 616 func noNewProfilesReceivedError() *connect.Error { 617 return connect.NewError(connect.CodeInvalidArgument, fmt.Errorf("no profiles received")) 618 } 619 620 // If aggregation is configured for the tenant, we try to determine 621 // whether the profile is eligible for aggregation based on the series 622 // profile rate, and handle it asynchronously, if this is the case. 623 // 624 // NOTE(kolesnikovae): aggregated profiles are handled on best-effort 625 // basis (at-most-once delivery semantics): any error occurred will 626 // not be returned to the client, and it must not retry sending. 627 // 628 // Aggregation is only meant to be used for cases, when clients do not 629 // form individual series (e.g., server-less workload), and typically 630 // are ephemeral in its nature, and therefore retrying is not possible 631 // or desirable, as it prolongs life-time duration of the clients. 632 func (d *Distributor) aggregate(ctx context.Context, req *distributormodel.ProfileSeries) (bool, error) { 633 a, ok := d.aggregator.AggregatorForTenant(req.TenantID) 634 if !ok { 635 // Aggregation is not configured for the tenant. 636 return false, nil 637 } 638 639 series := req 640 641 // First, we drop __session_id__ label to increase probability 642 // of aggregation, which is handled done per series. 643 profile := series.Profile.Profile 644 labels := phlaremodel.Labels(series.Labels) 645 if _, hasSessionID := labels.GetLabel(phlaremodel.LabelNameSessionID); hasSessionID { 646 labels = labels.Clone().Delete(phlaremodel.LabelNameSessionID) 647 } 648 r, ok, err := a.Aggregate(labels.Hash(), profile.TimeNanos, mergeProfile(profile)) 649 if err != nil { 650 return false, connect.NewError(connect.CodeInvalidArgument, err) 651 } 652 if !ok { 653 // Aggregation is not needed. 654 return false, nil 655 } 656 handler := r.Handler() 657 if handler == nil { 658 // Aggregation is handled in another goroutine. 659 return true, nil 660 } 661 662 // Aggregation is needed, and we own the result handler. 663 // Note that the labels include the source series labels with 664 // session ID: this is required to ensure fair load distribution. 665 d.asyncRequests.Add(1) 666 labels = phlaremodel.Labels(req.Labels).Clone() 667 annotations := req.Annotations 668 go func() { 669 defer d.asyncRequests.Done() 670 sendErr := util.RecoverPanic(func() error { 671 localCtx, cancel := context.WithTimeout(context.Background(), d.cfg.PushTimeout) 672 defer cancel() 673 localCtx = tenant.InjectTenantID(localCtx, req.TenantID) 674 if sp := opentracing.SpanFromContext(ctx); sp != nil { 675 localCtx = opentracing.ContextWithSpan(localCtx, sp) 676 } 677 // Obtain the aggregated profile. 678 p, handleErr := handler() 679 if handleErr != nil { 680 return handleErr 681 } 682 aggregated := &distributormodel.ProfileSeries{ 683 TenantID: req.TenantID, 684 Labels: labels, 685 Profile: pprof.RawFromProto(p.Profile()), 686 Annotations: annotations, 687 } 688 config := d.limits.WritePathOverrides(req.TenantID) 689 return d.router.Send(localCtx, aggregated, config) 690 })() 691 if sendErr != nil { 692 _ = level.Error(d.logger).Log("msg", "failed to handle aggregation", "tenant", req.TenantID, "err", err) 693 } 694 }() 695 696 return true, nil 697 } 698 699 // visitSampleSeriesForIngester creates a profile per unique label set in pprof labels. 700 func visitSampleSeriesForIngester(profile *profilev1.Profile, labels []*typesv1.LabelPair, rules []*relabel.Config, visitor *sampleSeriesVisitor) error { 701 return pprofsplit.VisitSampleSeries(profile, labels, rules, visitor) 702 } 703 704 func (d *Distributor) sendRequestsToIngester(ctx context.Context, req *distributormodel.ProfileSeries) (resp *connect.Response[pushv1.PushResponse], err error) { 705 sampleSeries, err := d.visitSampleSeries(req, visitSampleSeriesForIngester) 706 if err != nil { 707 return nil, err 708 } 709 if len(sampleSeries) == 0 { 710 return connect.NewResponse(&pushv1.PushResponse{}), nil 711 } 712 713 enforceLabelOrder := d.limits.EnforceLabelsOrder(req.TenantID) 714 keys := make([]uint32, len(sampleSeries)) 715 for i, s := range sampleSeries { 716 if enforceLabelOrder { 717 s.Labels = phlaremodel.Labels(s.Labels).InsertSorted(phlaremodel.LabelNameOrder, phlaremodel.LabelOrderEnforced) 718 } 719 keys[i] = TokenFor(req.TenantID, phlaremodel.LabelPairsString(s.Labels)) 720 } 721 722 profiles := make([]*profileTracker, 0, len(sampleSeries)) 723 for _, series := range sampleSeries { 724 p := series.Profile 725 // zip the data back into the buffer 726 bw := bytes.NewBuffer(series.RawProfile[:0]) 727 if _, err = p.WriteTo(bw); err != nil { 728 return nil, err 729 } 730 series.ID = uuid.NewString() 731 series.RawProfile = bw.Bytes() 732 profiles = append(profiles, &profileTracker{profile: series}) 733 } 734 735 const maxExpectedReplicationSet = 5 // typical replication factor 3 plus one for inactive plus one for luck 736 var descs [maxExpectedReplicationSet]ring.InstanceDesc 737 738 samplesByIngester := map[string][]*profileTracker{} 739 ingesterDescs := map[string]ring.InstanceDesc{} 740 for i, key := range keys { 741 // Get a subring if tenant has shuffle shard size configured. 742 subRing := d.ingestersRing.ShuffleShard(req.TenantID, d.limits.IngestionTenantShardSize(req.TenantID)) 743 744 replicationSet, err := subRing.Get(key, ring.Write, descs[:0], nil, nil) 745 if err != nil { 746 return nil, err 747 } 748 profiles[i].minSuccess = len(replicationSet.Instances) - replicationSet.MaxErrors 749 profiles[i].maxFailures = replicationSet.MaxErrors 750 for _, ingester := range replicationSet.Instances { 751 samplesByIngester[ingester.Addr] = append(samplesByIngester[ingester.Addr], profiles[i]) 752 ingesterDescs[ingester.Addr] = ingester 753 } 754 } 755 tracker := pushTracker{ 756 done: make(chan struct{}, 1), // buffer avoids blocking if caller terminates - sendProfiles() only sends once on each 757 err: make(chan error, 1), 758 } 759 tracker.samplesPending.Store(int32(len(profiles))) 760 for ingester, samples := range samplesByIngester { 761 go func(ingester ring.InstanceDesc, samples []*profileTracker) { 762 // Use a background context to make sure all ingesters get samples even if we return early 763 localCtx, cancel := context.WithTimeout(context.Background(), d.cfg.PushTimeout) 764 defer cancel() 765 localCtx = tenant.InjectTenantID(localCtx, req.TenantID) 766 if sp := opentracing.SpanFromContext(ctx); sp != nil { 767 localCtx = opentracing.ContextWithSpan(localCtx, sp) 768 } 769 d.sendProfiles(localCtx, ingester, samples, &tracker) 770 }(ingesterDescs[ingester], samples) 771 } 772 select { 773 case err = <-tracker.err: 774 return nil, err 775 case <-tracker.done: 776 return connect.NewResponse(&pushv1.PushResponse{}), nil 777 case <-ctx.Done(): 778 return nil, ctx.Err() 779 } 780 } 781 782 // visitSampleSeriesForSegmentWriter creates a profile per service. 783 // Labels that are shared by all pprof samples are used as series labels. 784 // Unique sample labels (not present in series labels) are preserved: 785 // pprof split takes place in segment-writers. 786 func visitSampleSeriesForSegmentWriter(profile *profilev1.Profile, labels []*typesv1.LabelPair, rules []*relabel.Config, visitor *sampleSeriesVisitor) error { 787 return pprofsplit.VisitSampleSeriesBy(profile, labels, rules, visitor, phlaremodel.LabelNameServiceName) 788 } 789 790 func (d *Distributor) sendRequestsToSegmentWriter(ctx context.Context, req *distributormodel.ProfileSeries) (*connect.Response[pushv1.PushResponse], error) { 791 // NOTE(kolesnikovae): if we return early, e.g., due to a validation error, 792 // or if there are no series, the write path router has already seen the 793 // request, and could have already accounted for the size, latency, etc. 794 serviceSeries, err := d.visitSampleSeries(req, visitSampleSeriesForSegmentWriter) 795 if err != nil { 796 return nil, err 797 } 798 if len(serviceSeries) == 0 { 799 return connect.NewResponse(&pushv1.PushResponse{}), nil 800 } 801 802 // TODO(kolesnikovae): Add profiles per request histogram. 803 // In most cases, we only have a single profile. We should avoid 804 // batching multiple profiles into a single request: overhead of handling 805 // multiple profiles in a single request is substantial: we need to 806 // allocate memory for all profiles at once, and wait for multiple requests 807 // routed to different shards to complete is generally a bad idea because 808 // it's hard to reason about latencies, retries, and error handling. 809 config := d.limits.WritePathOverrides(req.TenantID) 810 requests := make([]*segmentwriterv1.PushRequest, 0, len(serviceSeries)*2) 811 for _, s := range serviceSeries { 812 buf, err := pprof.Marshal(s.Profile.Profile, config.Compression == writepath.CompressionGzip) 813 if err != nil { 814 panic(fmt.Sprintf("failed to marshal profile: %v", err)) 815 } 816 // Ideally, the ID should identify the whole request, and be 817 // deterministic (e.g, based on the request hash). In practice, 818 // the API allows batches, which makes it difficult to handle. 819 profileID := uuid.New() 820 requests = append(requests, &segmentwriterv1.PushRequest{ 821 TenantId: req.TenantID, 822 Labels: s.Labels, 823 Profile: buf, 824 ProfileId: profileID[:], 825 Annotations: s.Annotations, 826 }) 827 } 828 829 if len(requests) == 1 { 830 if _, err := d.segmentWriter.Push(ctx, requests[0]); err != nil { 831 return nil, err 832 } 833 return connect.NewResponse(&pushv1.PushResponse{}), nil 834 } 835 836 // Fallback. We should minimize probability of this branch. 837 g, ctx := errgroup.WithContext(ctx) 838 for _, r := range requests { 839 r := r 840 g.Go(func() error { 841 _, pushErr := d.segmentWriter.Push(ctx, r) 842 return pushErr 843 }) 844 } 845 if err := g.Wait(); err != nil { 846 return nil, err 847 } 848 849 return connect.NewResponse(&pushv1.PushResponse{}), nil 850 } 851 852 // profileSizeBytes returns the size of symbols and samples in bytes. 853 func profileSizeBytes(p *profilev1.Profile) (symbols, samples int64) { 854 fullSize := p.SizeVT() 855 // remove samples 856 samplesSlice := p.Sample 857 p.Sample = nil 858 859 symbols = int64(p.SizeVT()) 860 samples = int64(fullSize) - symbols 861 862 // count labels in samples 863 samplesLabels := 0 864 for _, s := range samplesSlice { 865 for _, l := range s.Label { 866 samplesLabels += len(p.StringTable[l.Key]) + len(p.StringTable[l.Str]) + len(p.StringTable[l.NumUnit]) 867 } 868 } 869 symbols -= int64(samplesLabels) 870 samples += int64(samplesLabels) 871 872 // restore samples 873 p.Sample = samplesSlice 874 return 875 } 876 877 func mergeProfile(profile *profilev1.Profile) aggregator.AggregateFn[*pprof.ProfileMerge] { 878 return func(m *pprof.ProfileMerge) (*pprof.ProfileMerge, error) { 879 if m == nil { 880 m = new(pprof.ProfileMerge) 881 } 882 if err := m.Merge(profile, true); err != nil { 883 return nil, connect.NewError(connect.CodeInvalidArgument, err) 884 } 885 return m, nil 886 } 887 } 888 889 func (d *Distributor) sendProfiles(ctx context.Context, ingester ring.InstanceDesc, profileTrackers []*profileTracker, pushTracker *pushTracker) { 890 err := d.sendProfilesErr(ctx, ingester, profileTrackers) 891 // If we succeed, decrement each sample's pending count by one. If we reach 892 // the required number of successful puts on this sample, then decrement the 893 // number of pending samples by one. If we successfully push all samples to 894 // min success ingesters, wake up the waiting rpc so it can return early. 895 // Similarly, track the number of errors, and if it exceeds maxFailures 896 // shortcut the waiting rpc. 897 // 898 // The use of atomic increments here guarantees only a single sendSamples 899 // goroutine will write to either channel. 900 for i := range profileTrackers { 901 if err != nil { 902 if profileTrackers[i].failed.Inc() <= int32(profileTrackers[i].maxFailures) { 903 continue 904 } 905 if pushTracker.samplesFailed.Inc() == 1 { 906 pushTracker.err <- err 907 } 908 } else { 909 if profileTrackers[i].succeeded.Inc() != int32(profileTrackers[i].minSuccess) { 910 continue 911 } 912 if pushTracker.samplesPending.Dec() == 0 { 913 pushTracker.done <- struct{}{} 914 } 915 } 916 } 917 } 918 919 func (d *Distributor) sendProfilesErr(ctx context.Context, ingester ring.InstanceDesc, profileTrackers []*profileTracker) error { 920 c, err := d.pool.GetClientFor(ingester.Addr) 921 if err != nil { 922 return err 923 } 924 925 req := connect.NewRequest(&pushv1.PushRequest{ 926 Series: make([]*pushv1.RawProfileSeries, 0, len(profileTrackers)), 927 }) 928 929 for _, p := range profileTrackers { 930 series := &pushv1.RawProfileSeries{ 931 Labels: p.profile.Labels, 932 Samples: []*pushv1.RawSample{{ 933 RawProfile: p.profile.RawProfile, 934 ID: p.profile.ID, 935 }}, 936 Annotations: p.profile.Annotations, 937 } 938 939 req.Msg.Series = append(req.Msg.Series, series) 940 } 941 942 _, err = c.(PushClient).Push(ctx, req) 943 return err 944 } 945 946 func (d *Distributor) ServeHTTP(w http.ResponseWriter, req *http.Request) { 947 if d.distributorsRing != nil { 948 d.distributorsRing.ServeHTTP(w, req) 949 } else { 950 ringNotEnabledPage := ` 951 <!DOCTYPE html> 952 <html> 953 <head> 954 <meta charset="UTF-8"> 955 <title>Distributor Status</title> 956 </head> 957 <body> 958 <h1>Distributor Status</h1> 959 <p>Distributor is not running with global limits enabled</p> 960 </body> 961 </html>` 962 util.WriteHTMLResponse(w, ringNotEnabledPage) 963 } 964 } 965 966 // HealthyInstancesCount implements the ReadLifecycler interface 967 // 968 // We use a ring lifecycler delegate to count the number of members of the 969 // ring. The count is then used to enforce rate limiting correctly for each 970 // distributor. $EFFECTIVE_RATE_LIMIT = $GLOBAL_RATE_LIMIT / $NUM_INSTANCES 971 func (d *Distributor) HealthyInstancesCount() int { 972 return int(d.healthyInstancesCount.Load()) 973 } 974 975 func (d *Distributor) limitMaxSessionsPerSeries(maxSessionsPerSeries int, labels phlaremodel.Labels) phlaremodel.Labels { 976 if maxSessionsPerSeries == 0 { 977 return labels.Delete(phlaremodel.LabelNameSessionID) 978 } 979 sessionIDLabel, ok := labels.GetLabel(phlaremodel.LabelNameSessionID) 980 if !ok { 981 return labels 982 } 983 sessionID, err := phlaremodel.ParseSessionID(sessionIDLabel.Value) 984 if err != nil { 985 _ = level.Debug(d.logger).Log("msg", "invalid session_id", "err", err) 986 return labels.Delete(phlaremodel.LabelNameSessionID) 987 } 988 sessionIDLabel.Value = phlaremodel.SessionID(int(sessionID) % maxSessionsPerSeries).String() 989 return labels 990 } 991 992 func (d *Distributor) rateLimit(tenantID string, req *distributormodel.ProfileSeries) error { 993 if !d.ingestionRateLimiter.AllowN(time.Now(), tenantID, int(req.TotalBytesUncompressed)) { 994 validation.DiscardedProfiles.WithLabelValues(string(validation.RateLimited), tenantID).Add(float64(req.TotalProfiles)) 995 validation.DiscardedBytes.WithLabelValues(string(validation.RateLimited), tenantID).Add(float64(req.TotalBytesUncompressed)) 996 return connect.NewError(connect.CodeResourceExhausted, 997 fmt.Errorf("push rate limit (%s) exceeded while adding %s", humanize.IBytes(uint64(d.limits.IngestionRateBytes(tenantID))), humanize.IBytes(uint64(req.TotalBytesUncompressed))), 998 ) 999 } 1000 return nil 1001 } 1002 1003 func calculateRequestSize(req *distributormodel.ProfileSeries) int64 { 1004 // include the labels in the size calculation 1005 bs := int64(0) 1006 for _, lbs := range req.Labels { 1007 bs += int64(len(lbs.Name)) 1008 bs += int64(len(lbs.Value)) 1009 } 1010 1011 bs += int64(req.Profile.SizeVT()) 1012 return bs 1013 } 1014 1015 func (d *Distributor) checkIngestLimit(req *distributormodel.ProfileSeries) error { 1016 l := d.limits.IngestionLimit(req.TenantID) 1017 if l == nil { 1018 return nil 1019 } 1020 1021 if l.LimitReached { 1022 // we want to allow a very small portion of the traffic after reaching the limit 1023 if d.ingestionLimitsSampler.AllowRequest(req.TenantID, l.Sampling) { 1024 if err := req.MarkThrottledTenant(l); err != nil { 1025 return err 1026 } 1027 return nil 1028 } 1029 limitResetTime := time.Unix(l.LimitResetTime, 0).UTC().Format(time.RFC3339) 1030 return connect.NewError(connect.CodeResourceExhausted, 1031 fmt.Errorf("limit of %s/%s reached, next reset at %s", humanize.IBytes(uint64(l.PeriodLimitMb*1024*1024)), l.PeriodType, limitResetTime)) 1032 } 1033 1034 return nil 1035 } 1036 1037 func (d *Distributor) checkUsageGroupsIngestLimit(req *distributormodel.ProfileSeries, groupsInRequest []validation.UsageGroupMatchName) error { 1038 l := d.limits.IngestionLimit(req.TenantID) 1039 if l == nil || len(l.UsageGroups) == 0 { 1040 return nil 1041 } 1042 1043 for _, group := range groupsInRequest { 1044 limit, ok := l.UsageGroups[group.ResolvedName] 1045 if !ok { 1046 limit, ok = l.UsageGroups[group.ConfiguredName] 1047 } 1048 if !ok || !limit.LimitReached { 1049 continue 1050 } 1051 if d.ingestionLimitsSampler.AllowRequest(req.TenantID, l.Sampling) { 1052 if err := req.MarkThrottledUsageGroup(l, group.ResolvedName); err != nil { 1053 return err 1054 } 1055 return nil 1056 } 1057 limitResetTime := time.Unix(l.LimitResetTime, 0).UTC().Format(time.RFC3339) 1058 return connect.NewError(connect.CodeResourceExhausted, 1059 fmt.Errorf("limit of %s/%s reached for usage group %s, next reset at %s", humanize.IBytes(uint64(limit.PeriodLimitMb*1024*1024)), l.PeriodType, group, limitResetTime)) 1060 } 1061 1062 return nil 1063 } 1064 1065 // shouldSample returns true if the profile should be injected and optionally the usage group that was responsible for the decision. 1066 func (d *Distributor) shouldSample(tenantID string, groupsInRequest []validation.UsageGroupMatchName) (bool, *sampling.Source) { 1067 l := d.limits.DistributorSampling(tenantID) 1068 if l == nil { 1069 return true, nil 1070 } 1071 1072 samplingProbability := 1.0 1073 var match *validation.UsageGroupMatchName 1074 for _, group := range groupsInRequest { 1075 probabilityCfg, found := l.UsageGroups[group.ResolvedName] 1076 if !found { 1077 probabilityCfg, found = l.UsageGroups[group.ConfiguredName] 1078 } 1079 if !found { 1080 continue 1081 } 1082 // a less specific group loses to a more specific one 1083 if match != nil && match.IsMoreSpecificThan(&group) { 1084 continue 1085 } 1086 // lower probability wins; when tied, the more specific group wins 1087 if probabilityCfg.Probability <= samplingProbability { 1088 samplingProbability = probabilityCfg.Probability 1089 match = &group 1090 } 1091 } 1092 1093 if match == nil { 1094 return true, nil 1095 } 1096 1097 source := &sampling.Source{ 1098 UsageGroup: match.ResolvedName, 1099 Probability: samplingProbability, 1100 } 1101 1102 return rand.Float64() <= samplingProbability, source 1103 } 1104 1105 type profileTracker struct { 1106 profile *distributormodel.ProfileSeries 1107 minSuccess int 1108 maxFailures int 1109 succeeded atomic.Int32 1110 failed atomic.Int32 1111 } 1112 1113 type pushTracker struct { 1114 samplesPending atomic.Int32 1115 samplesFailed atomic.Int32 1116 done chan struct{} 1117 err chan error 1118 } 1119 1120 // TokenFor generates a token used for finding ingesters from ring 1121 func TokenFor(tenantID, labels string) uint32 { 1122 h := fnv.New32() 1123 _, _ = h.Write([]byte(tenantID)) 1124 _, _ = h.Write([]byte(labels)) 1125 return h.Sum32() 1126 } 1127 1128 // newRingAndLifecycler creates a new distributor ring and lifecycler with all required lifecycler delegates 1129 func newRingAndLifecycler(cfg util.CommonRingConfig, instanceCount *atomic.Uint32, logger log.Logger, reg prometheus.Registerer) (*ring.Ring, *ring.BasicLifecycler, error) { 1130 reg = prometheus.WrapRegistererWithPrefix("pyroscope_", reg) 1131 kvStore, err := kv.NewClient(cfg.KVStore, ring.GetCodec(), kv.RegistererWithKVName(reg, "distributor-lifecycler"), logger) 1132 if err != nil { 1133 return nil, nil, errors.Wrap(err, "failed to initialize distributors' KV store") 1134 } 1135 1136 lifecyclerCfg, err := toBasicLifecyclerConfig(cfg, logger) 1137 if err != nil { 1138 return nil, nil, errors.Wrap(err, "failed to build distributors' lifecycler config") 1139 } 1140 1141 var delegate ring.BasicLifecyclerDelegate 1142 delegate = ring.NewInstanceRegisterDelegate(ring.ACTIVE, lifecyclerCfg.NumTokens) 1143 delegate = newHealthyInstanceDelegate(instanceCount, cfg.HeartbeatTimeout, delegate) 1144 delegate = ring.NewLeaveOnStoppingDelegate(delegate, logger) 1145 delegate = ring.NewAutoForgetDelegate(ringAutoForgetUnhealthyPeriods*cfg.HeartbeatTimeout, delegate, logger) 1146 1147 distributorsLifecycler, err := ring.NewBasicLifecycler(lifecyclerCfg, "distributor", distributorRingKey, kvStore, delegate, logger, reg) 1148 if err != nil { 1149 return nil, nil, errors.Wrap(err, "failed to initialize distributors' lifecycler") 1150 } 1151 1152 distributorsRing, err := ring.New(cfg.ToRingConfig(), "distributor", distributorRingKey, logger, reg) 1153 if err != nil { 1154 return nil, nil, errors.Wrap(err, "failed to initialize distributors' ring client") 1155 } 1156 1157 return distributorsRing, distributorsLifecycler, nil 1158 } 1159 1160 // injectMappingVersions extract from the labels the mapping version and inject it into the profile's main mapping. (mapping[0]) 1161 func injectMappingVersions(s *distributormodel.ProfileSeries) error { 1162 version, ok := phlaremodel.ServiceVersionFromLabels(s.Labels) 1163 if !ok { 1164 return nil 1165 } 1166 for _, m := range s.Profile.Mapping { 1167 version.BuildID = s.Profile.StringTable[m.BuildId] 1168 versionString, err := json.Marshal(version) 1169 if err != nil { 1170 return err 1171 } 1172 s.Profile.StringTable = append(s.Profile.StringTable, string(versionString)) 1173 m.BuildId = int64(len(s.Profile.StringTable) - 1) 1174 } 1175 return nil 1176 } 1177 1178 type visitFunc func(*profilev1.Profile, []*typesv1.LabelPair, []*relabel.Config, *sampleSeriesVisitor) error 1179 1180 func (d *Distributor) visitSampleSeries(s *distributormodel.ProfileSeries, visit visitFunc) ([]*distributormodel.ProfileSeries, error) { 1181 relabelingRules := d.limits.IngestionRelabelingRules(s.TenantID) 1182 usageConfig := d.limits.DistributorUsageGroups(s.TenantID) 1183 var result []*distributormodel.ProfileSeries 1184 usageGroups := d.usageGroupEvaluator.GetMatch(s.TenantID, usageConfig, s.Labels) 1185 visitor := &sampleSeriesVisitor{ 1186 tenantID: s.TenantID, 1187 limits: d.limits, 1188 profile: s.Profile, 1189 logger: d.logger, 1190 } 1191 if err := visit(s.Profile.Profile, s.Labels, relabelingRules, visitor); err != nil { 1192 validation.DiscardedProfiles.WithLabelValues(string(validation.ReasonOf(err)), s.TenantID).Add(float64(s.TotalProfiles)) 1193 validation.DiscardedBytes.WithLabelValues(string(validation.ReasonOf(err)), s.TenantID).Add(float64(s.TotalBytesUncompressed)) 1194 usageGroups.CountDiscardedBytes(string(validation.ReasonOf(err)), s.TotalBytesUncompressed) 1195 return nil, connect.NewError(connect.CodeInvalidArgument, err) 1196 } 1197 for _, ss := range visitor.series { 1198 ss.Annotations = s.Annotations 1199 ss.Language = s.Language 1200 result = append(result, ss) 1201 } 1202 s.DiscardedProfilesRelabeling += int64(visitor.discardedProfiles) 1203 s.DiscardedBytesRelabeling += int64(visitor.discardedBytes) 1204 if visitor.discardedBytes > 0 { 1205 usageGroups.CountDiscardedBytes(string(validation.DroppedByRelabelRules), int64(visitor.discardedBytes)) 1206 } 1207 1208 if s.DiscardedBytesRelabeling > 0 { 1209 validation.DiscardedBytes.WithLabelValues(string(validation.DroppedByRelabelRules), s.TenantID).Add(float64(s.DiscardedBytesRelabeling)) 1210 } 1211 if s.DiscardedProfilesRelabeling > 0 { 1212 validation.DiscardedProfiles.WithLabelValues(string(validation.DroppedByRelabelRules), s.TenantID).Add(float64(s.DiscardedProfilesRelabeling)) 1213 } 1214 // todo should we do normalization after relabeling? 1215 return result, nil 1216 } 1217 1218 type sampleSeriesVisitor struct { 1219 tenantID string 1220 limits Limits 1221 profile *pprof.Profile 1222 exp *pprof.SampleExporter 1223 series []*distributormodel.ProfileSeries 1224 logger log.Logger 1225 1226 discardedBytes int 1227 discardedProfiles int 1228 } 1229 1230 func (v *sampleSeriesVisitor) ValidateLabels(labels phlaremodel.Labels) (phlaremodel.Labels, error) { 1231 return validation.ValidateLabels(v.limits, v.tenantID, labels, v.logger) 1232 } 1233 1234 func (v *sampleSeriesVisitor) VisitProfile(labels phlaremodel.Labels) { 1235 v.series = append(v.series, &distributormodel.ProfileSeries{ 1236 Profile: v.profile, 1237 Labels: labels, 1238 }) 1239 } 1240 1241 func (v *sampleSeriesVisitor) VisitSampleSeries(labels phlaremodel.Labels, samples []*profilev1.Sample) { 1242 if v.exp == nil { 1243 v.exp = pprof.NewSampleExporter(v.profile.Profile) 1244 } 1245 v.series = append(v.series, &distributormodel.ProfileSeries{ 1246 Profile: exportSamples(v.exp, samples), 1247 Labels: labels, 1248 }) 1249 } 1250 1251 func (v *sampleSeriesVisitor) Discarded(profiles, bytes int) { 1252 v.discardedProfiles += profiles 1253 v.discardedBytes += bytes 1254 } 1255 1256 func exportSamples(e *pprof.SampleExporter, samples []*profilev1.Sample) *pprof.Profile { 1257 samplesCopy := make([]*profilev1.Sample, len(samples)) 1258 copy(samplesCopy, samples) 1259 clear(samples) 1260 n := pprof.NewProfile() 1261 e.ExportSamples(n.Profile, samplesCopy) 1262 return n 1263 }