github.com/muhammadn/cortex@v1.9.1-0.20220510110439-46bb7000d03d/pkg/distributor/distributor.go (about) 1 package distributor 2 3 import ( 4 "context" 5 "flag" 6 "fmt" 7 "net/http" 8 "sort" 9 "strings" 10 "time" 11 12 "github.com/go-kit/log" 13 "github.com/go-kit/log/level" 14 "github.com/grafana/dskit/limiter" 15 "github.com/grafana/dskit/ring" 16 ring_client "github.com/grafana/dskit/ring/client" 17 "github.com/grafana/dskit/services" 18 "github.com/opentracing/opentracing-go" 19 "github.com/pkg/errors" 20 "github.com/prometheus/client_golang/prometheus" 21 "github.com/prometheus/client_golang/prometheus/promauto" 22 "github.com/prometheus/common/model" 23 "github.com/prometheus/prometheus/pkg/labels" 24 "github.com/prometheus/prometheus/pkg/relabel" 25 "github.com/prometheus/prometheus/scrape" 26 "github.com/weaveworks/common/httpgrpc" 27 "github.com/weaveworks/common/instrument" 28 "github.com/weaveworks/common/user" 29 "go.uber.org/atomic" 30 31 "github.com/cortexproject/cortex/pkg/cortexpb" 32 ingester_client "github.com/cortexproject/cortex/pkg/ingester/client" 33 "github.com/cortexproject/cortex/pkg/prom1/storage/metric" 34 "github.com/cortexproject/cortex/pkg/tenant" 35 "github.com/cortexproject/cortex/pkg/util" 36 "github.com/cortexproject/cortex/pkg/util/extract" 37 util_log "github.com/cortexproject/cortex/pkg/util/log" 38 util_math "github.com/cortexproject/cortex/pkg/util/math" 39 "github.com/cortexproject/cortex/pkg/util/validation" 40 ) 41 42 var ( 43 emptyPreallocSeries = cortexpb.PreallocTimeseries{} 44 45 supportedShardingStrategies = []string{util.ShardingStrategyDefault, util.ShardingStrategyShuffle} 46 47 // Validation errors. 48 errInvalidShardingStrategy = errors.New("invalid sharding strategy") 49 errInvalidTenantShardSize = errors.New("invalid tenant shard size, the value must be greater than 0") 50 51 // Distributor instance limits errors. 52 errTooManyInflightPushRequests = errors.New("too many inflight push requests in distributor") 53 errMaxSamplesPushRateLimitReached = errors.New("distributor's samples push rate limit reached") 54 ) 55 56 const ( 57 typeSamples = "samples" 58 typeMetadata = "metadata" 59 60 instanceIngestionRateTickInterval = time.Second 61 ) 62 63 // Distributor is a storage.SampleAppender and a client.Querier which 64 // forwards appends and queries to individual ingesters. 65 type Distributor struct { 66 services.Service 67 68 cfg Config 69 log log.Logger 70 ingestersRing ring.ReadRing 71 ingesterPool *ring_client.Pool 72 limits *validation.Overrides 73 74 // The global rate limiter requires a distributors ring to count 75 // the number of healthy instances 76 distributorsLifeCycler *ring.Lifecycler 77 distributorsRing *ring.Ring 78 79 // For handling HA replicas. 80 HATracker *haTracker 81 82 // Per-user rate limiter. 83 ingestionRateLimiter *limiter.RateLimiter 84 85 // Manager for subservices (HA Tracker, distributor ring and client pool) 86 subservices *services.Manager 87 subservicesWatcher *services.FailureWatcher 88 89 activeUsers *util.ActiveUsersCleanupService 90 91 ingestionRate *util_math.EwmaRate 92 inflightPushRequests atomic.Int64 93 94 // Metrics 95 queryDuration *instrument.HistogramCollector 96 receivedSamples *prometheus.CounterVec 97 receivedExemplars *prometheus.CounterVec 98 receivedMetadata *prometheus.CounterVec 99 incomingSamples *prometheus.CounterVec 100 incomingExemplars *prometheus.CounterVec 101 incomingMetadata *prometheus.CounterVec 102 nonHASamples *prometheus.CounterVec 103 dedupedSamples *prometheus.CounterVec 104 labelsHistogram prometheus.Histogram 105 ingesterAppends *prometheus.CounterVec 106 ingesterAppendFailures *prometheus.CounterVec 107 ingesterQueries *prometheus.CounterVec 108 ingesterQueryFailures *prometheus.CounterVec 109 replicationFactor prometheus.Gauge 110 latestSeenSampleTimestampPerUser *prometheus.GaugeVec 111 } 112 113 // Config contains the configuration required to 114 // create a Distributor 115 type Config struct { 116 PoolConfig PoolConfig `yaml:"pool"` 117 118 HATrackerConfig HATrackerConfig `yaml:"ha_tracker"` 119 120 MaxRecvMsgSize int `yaml:"max_recv_msg_size"` 121 RemoteTimeout time.Duration `yaml:"remote_timeout"` 122 ExtraQueryDelay time.Duration `yaml:"extra_queue_delay"` 123 124 ShardingStrategy string `yaml:"sharding_strategy"` 125 ShardByAllLabels bool `yaml:"shard_by_all_labels"` 126 ExtendWrites bool `yaml:"extend_writes"` 127 128 // Distributors ring 129 DistributorRing RingConfig `yaml:"ring"` 130 131 // for testing and for extending the ingester by adding calls to the client 132 IngesterClientFactory ring_client.PoolFactory `yaml:"-"` 133 134 // when true the distributor does not validate the label name, Cortex doesn't directly use 135 // this (and should never use it) but this feature is used by other projects built on top of it 136 SkipLabelNameValidation bool `yaml:"-"` 137 138 // This config is dynamically injected because defined in the querier config. 139 ShuffleShardingLookbackPeriod time.Duration `yaml:"-"` 140 141 // Limits for distributor 142 InstanceLimits InstanceLimits `yaml:"instance_limits"` 143 } 144 145 type InstanceLimits struct { 146 MaxIngestionRate float64 `yaml:"max_ingestion_rate"` 147 MaxInflightPushRequests int `yaml:"max_inflight_push_requests"` 148 } 149 150 // RegisterFlags adds the flags required to config this to the given FlagSet 151 func (cfg *Config) RegisterFlags(f *flag.FlagSet) { 152 cfg.PoolConfig.RegisterFlags(f) 153 cfg.HATrackerConfig.RegisterFlags(f) 154 cfg.DistributorRing.RegisterFlags(f) 155 156 f.IntVar(&cfg.MaxRecvMsgSize, "distributor.max-recv-msg-size", 100<<20, "remote_write API max receive message size (bytes).") 157 f.DurationVar(&cfg.RemoteTimeout, "distributor.remote-timeout", 2*time.Second, "Timeout for downstream ingesters.") 158 f.DurationVar(&cfg.ExtraQueryDelay, "distributor.extra-query-delay", 0, "Time to wait before sending more than the minimum successful query requests.") 159 f.BoolVar(&cfg.ShardByAllLabels, "distributor.shard-by-all-labels", false, "Distribute samples based on all labels, as opposed to solely by user and metric name.") 160 f.StringVar(&cfg.ShardingStrategy, "distributor.sharding-strategy", util.ShardingStrategyDefault, fmt.Sprintf("The sharding strategy to use. Supported values are: %s.", strings.Join(supportedShardingStrategies, ", "))) 161 f.BoolVar(&cfg.ExtendWrites, "distributor.extend-writes", true, "Try writing to an additional ingester in the presence of an ingester not in the ACTIVE state. It is useful to disable this along with -ingester.unregister-on-shutdown=false in order to not spread samples to extra ingesters during rolling restarts with consistent naming.") 162 163 f.Float64Var(&cfg.InstanceLimits.MaxIngestionRate, "distributor.instance-limits.max-ingestion-rate", 0, "Max ingestion rate (samples/sec) that this distributor will accept. This limit is per-distributor, not per-tenant. Additional push requests will be rejected. Current ingestion rate is computed as exponentially weighted moving average, updated every second. 0 = unlimited.") 164 f.IntVar(&cfg.InstanceLimits.MaxInflightPushRequests, "distributor.instance-limits.max-inflight-push-requests", 0, "Max inflight push requests that this distributor can handle. This limit is per-distributor, not per-tenant. Additional requests will be rejected. 0 = unlimited.") 165 } 166 167 // Validate config and returns error on failure 168 func (cfg *Config) Validate(limits validation.Limits) error { 169 if !util.StringsContain(supportedShardingStrategies, cfg.ShardingStrategy) { 170 return errInvalidShardingStrategy 171 } 172 173 if cfg.ShardingStrategy == util.ShardingStrategyShuffle && limits.IngestionTenantShardSize <= 0 { 174 return errInvalidTenantShardSize 175 } 176 177 return cfg.HATrackerConfig.Validate() 178 } 179 180 const ( 181 instanceLimitsMetric = "cortex_distributor_instance_limits" 182 instanceLimitsMetricHelp = "Instance limits used by this distributor." // Must be same for all registrations. 183 limitLabel = "limit" 184 ) 185 186 // New constructs a new Distributor 187 func New(cfg Config, clientConfig ingester_client.Config, limits *validation.Overrides, ingestersRing ring.ReadRing, canJoinDistributorsRing bool, reg prometheus.Registerer, log log.Logger) (*Distributor, error) { 188 if cfg.IngesterClientFactory == nil { 189 cfg.IngesterClientFactory = func(addr string) (ring_client.PoolClient, error) { 190 return ingester_client.MakeIngesterClient(addr, clientConfig) 191 } 192 } 193 194 cfg.PoolConfig.RemoteTimeout = cfg.RemoteTimeout 195 196 haTracker, err := newHATracker(cfg.HATrackerConfig, limits, reg, log) 197 if err != nil { 198 return nil, err 199 } 200 201 subservices := []services.Service(nil) 202 subservices = append(subservices, haTracker) 203 204 // Create the configured ingestion rate limit strategy (local or global). In case 205 // it's an internal dependency and can't join the distributors ring, we skip rate 206 // limiting. 207 var ingestionRateStrategy limiter.RateLimiterStrategy 208 var distributorsLifeCycler *ring.Lifecycler 209 var distributorsRing *ring.Ring 210 211 if !canJoinDistributorsRing { 212 ingestionRateStrategy = newInfiniteIngestionRateStrategy() 213 } else if limits.IngestionRateStrategy() == validation.GlobalIngestionRateStrategy { 214 distributorsLifeCycler, err = ring.NewLifecycler(cfg.DistributorRing.ToLifecyclerConfig(), nil, "distributor", ring.DistributorRingKey, true, log, prometheus.WrapRegistererWithPrefix("cortex_", reg)) 215 if err != nil { 216 return nil, err 217 } 218 219 distributorsRing, err = ring.New(cfg.DistributorRing.ToRingConfig(), "distributor", ring.DistributorRingKey, log, prometheus.WrapRegistererWithPrefix("cortex_", reg)) 220 if err != nil { 221 return nil, errors.Wrap(err, "failed to initialize distributors' ring client") 222 } 223 subservices = append(subservices, distributorsLifeCycler, distributorsRing) 224 225 ingestionRateStrategy = newGlobalIngestionRateStrategy(limits, distributorsLifeCycler) 226 } else { 227 ingestionRateStrategy = newLocalIngestionRateStrategy(limits) 228 } 229 230 d := &Distributor{ 231 cfg: cfg, 232 log: log, 233 ingestersRing: ingestersRing, 234 ingesterPool: NewPool(cfg.PoolConfig, ingestersRing, cfg.IngesterClientFactory, log), 235 distributorsLifeCycler: distributorsLifeCycler, 236 distributorsRing: distributorsRing, 237 limits: limits, 238 ingestionRateLimiter: limiter.NewRateLimiter(ingestionRateStrategy, 10*time.Second), 239 HATracker: haTracker, 240 ingestionRate: util_math.NewEWMARate(0.2, instanceIngestionRateTickInterval), 241 242 queryDuration: instrument.NewHistogramCollector(promauto.With(reg).NewHistogramVec(prometheus.HistogramOpts{ 243 Namespace: "cortex", 244 Name: "distributor_query_duration_seconds", 245 Help: "Time spent executing expression and exemplar queries.", 246 Buckets: []float64{.005, .01, .025, .05, .1, .25, .5, 1, 2.5, 5, 10, 20, 30}, 247 }, []string{"method", "status_code"})), 248 receivedSamples: promauto.With(reg).NewCounterVec(prometheus.CounterOpts{ 249 Namespace: "cortex", 250 Name: "distributor_received_samples_total", 251 Help: "The total number of received samples, excluding rejected and deduped samples.", 252 }, []string{"user"}), 253 receivedExemplars: promauto.With(reg).NewCounterVec(prometheus.CounterOpts{ 254 Namespace: "cortex", 255 Name: "distributor_received_exemplars_total", 256 Help: "The total number of received exemplars, excluding rejected and deduped exemplars.", 257 }, []string{"user"}), 258 receivedMetadata: promauto.With(reg).NewCounterVec(prometheus.CounterOpts{ 259 Namespace: "cortex", 260 Name: "distributor_received_metadata_total", 261 Help: "The total number of received metadata, excluding rejected.", 262 }, []string{"user"}), 263 incomingSamples: promauto.With(reg).NewCounterVec(prometheus.CounterOpts{ 264 Namespace: "cortex", 265 Name: "distributor_samples_in_total", 266 Help: "The total number of samples that have come in to the distributor, including rejected or deduped samples.", 267 }, []string{"user"}), 268 incomingExemplars: promauto.With(reg).NewCounterVec(prometheus.CounterOpts{ 269 Namespace: "cortex", 270 Name: "distributor_exemplars_in_total", 271 Help: "The total number of exemplars that have come in to the distributor, including rejected or deduped exemplars.", 272 }, []string{"user"}), 273 incomingMetadata: promauto.With(reg).NewCounterVec(prometheus.CounterOpts{ 274 Namespace: "cortex", 275 Name: "distributor_metadata_in_total", 276 Help: "The total number of metadata the have come in to the distributor, including rejected.", 277 }, []string{"user"}), 278 nonHASamples: promauto.With(reg).NewCounterVec(prometheus.CounterOpts{ 279 Namespace: "cortex", 280 Name: "distributor_non_ha_samples_received_total", 281 Help: "The total number of received samples for a user that has HA tracking turned on, but the sample didn't contain both HA labels.", 282 }, []string{"user"}), 283 dedupedSamples: promauto.With(reg).NewCounterVec(prometheus.CounterOpts{ 284 Namespace: "cortex", 285 Name: "distributor_deduped_samples_total", 286 Help: "The total number of deduplicated samples.", 287 }, []string{"user", "cluster"}), 288 labelsHistogram: promauto.With(reg).NewHistogram(prometheus.HistogramOpts{ 289 Namespace: "cortex", 290 Name: "labels_per_sample", 291 Help: "Number of labels per sample.", 292 Buckets: []float64{5, 10, 15, 20, 25}, 293 }), 294 ingesterAppends: promauto.With(reg).NewCounterVec(prometheus.CounterOpts{ 295 Namespace: "cortex", 296 Name: "distributor_ingester_appends_total", 297 Help: "The total number of batch appends sent to ingesters.", 298 }, []string{"ingester", "type"}), 299 ingesterAppendFailures: promauto.With(reg).NewCounterVec(prometheus.CounterOpts{ 300 Namespace: "cortex", 301 Name: "distributor_ingester_append_failures_total", 302 Help: "The total number of failed batch appends sent to ingesters.", 303 }, []string{"ingester", "type", "status"}), 304 ingesterQueries: promauto.With(reg).NewCounterVec(prometheus.CounterOpts{ 305 Namespace: "cortex", 306 Name: "distributor_ingester_queries_total", 307 Help: "The total number of queries sent to ingesters.", 308 }, []string{"ingester"}), 309 ingesterQueryFailures: promauto.With(reg).NewCounterVec(prometheus.CounterOpts{ 310 Namespace: "cortex", 311 Name: "distributor_ingester_query_failures_total", 312 Help: "The total number of failed queries sent to ingesters.", 313 }, []string{"ingester"}), 314 replicationFactor: promauto.With(reg).NewGauge(prometheus.GaugeOpts{ 315 Namespace: "cortex", 316 Name: "distributor_replication_factor", 317 Help: "The configured replication factor.", 318 }), 319 latestSeenSampleTimestampPerUser: promauto.With(reg).NewGaugeVec(prometheus.GaugeOpts{ 320 Name: "cortex_distributor_latest_seen_sample_timestamp_seconds", 321 Help: "Unix timestamp of latest received sample per user.", 322 }, []string{"user"}), 323 } 324 325 promauto.With(reg).NewGauge(prometheus.GaugeOpts{ 326 Name: instanceLimitsMetric, 327 Help: instanceLimitsMetricHelp, 328 ConstLabels: map[string]string{limitLabel: "max_inflight_push_requests"}, 329 }).Set(float64(cfg.InstanceLimits.MaxInflightPushRequests)) 330 promauto.With(reg).NewGauge(prometheus.GaugeOpts{ 331 Name: instanceLimitsMetric, 332 Help: instanceLimitsMetricHelp, 333 ConstLabels: map[string]string{limitLabel: "max_ingestion_rate"}, 334 }).Set(cfg.InstanceLimits.MaxIngestionRate) 335 336 promauto.With(reg).NewGaugeFunc(prometheus.GaugeOpts{ 337 Name: "cortex_distributor_inflight_push_requests", 338 Help: "Current number of inflight push requests in distributor.", 339 }, func() float64 { 340 return float64(d.inflightPushRequests.Load()) 341 }) 342 promauto.With(reg).NewGaugeFunc(prometheus.GaugeOpts{ 343 Name: "cortex_distributor_ingestion_rate_samples_per_second", 344 Help: "Current ingestion rate in samples/sec that distributor is using to limit access.", 345 }, func() float64 { 346 return d.ingestionRate.Rate() 347 }) 348 349 d.replicationFactor.Set(float64(ingestersRing.ReplicationFactor())) 350 d.activeUsers = util.NewActiveUsersCleanupWithDefaultValues(d.cleanupInactiveUser) 351 352 subservices = append(subservices, d.ingesterPool, d.activeUsers) 353 d.subservices, err = services.NewManager(subservices...) 354 if err != nil { 355 return nil, err 356 } 357 d.subservicesWatcher = services.NewFailureWatcher() 358 d.subservicesWatcher.WatchManager(d.subservices) 359 360 d.Service = services.NewBasicService(d.starting, d.running, d.stopping) 361 return d, nil 362 } 363 364 func (d *Distributor) starting(ctx context.Context) error { 365 if d.cfg.InstanceLimits != (InstanceLimits{}) { 366 util_log.WarnExperimentalUse("distributor instance limits") 367 } 368 369 // Only report success if all sub-services start properly 370 return services.StartManagerAndAwaitHealthy(ctx, d.subservices) 371 } 372 373 func (d *Distributor) running(ctx context.Context) error { 374 ingestionRateTicker := time.NewTicker(instanceIngestionRateTickInterval) 375 defer ingestionRateTicker.Stop() 376 377 for { 378 select { 379 case <-ctx.Done(): 380 return nil 381 382 case <-ingestionRateTicker.C: 383 d.ingestionRate.Tick() 384 385 case err := <-d.subservicesWatcher.Chan(): 386 return errors.Wrap(err, "distributor subservice failed") 387 } 388 } 389 } 390 391 func (d *Distributor) cleanupInactiveUser(userID string) { 392 d.ingestersRing.CleanupShuffleShardCache(userID) 393 394 d.HATracker.cleanupHATrackerMetricsForUser(userID) 395 396 d.receivedSamples.DeleteLabelValues(userID) 397 d.receivedExemplars.DeleteLabelValues(userID) 398 d.receivedMetadata.DeleteLabelValues(userID) 399 d.incomingSamples.DeleteLabelValues(userID) 400 d.incomingExemplars.DeleteLabelValues(userID) 401 d.incomingMetadata.DeleteLabelValues(userID) 402 d.nonHASamples.DeleteLabelValues(userID) 403 d.latestSeenSampleTimestampPerUser.DeleteLabelValues(userID) 404 405 if err := util.DeleteMatchingLabels(d.dedupedSamples, map[string]string{"user": userID}); err != nil { 406 level.Warn(d.log).Log("msg", "failed to remove cortex_distributor_deduped_samples_total metric for user", "user", userID, "err", err) 407 } 408 409 validation.DeletePerUserValidationMetrics(userID, d.log) 410 } 411 412 // Called after distributor is asked to stop via StopAsync. 413 func (d *Distributor) stopping(_ error) error { 414 return services.StopManagerAndAwaitStopped(context.Background(), d.subservices) 415 } 416 417 func (d *Distributor) tokenForLabels(userID string, labels []cortexpb.LabelAdapter) (uint32, error) { 418 if d.cfg.ShardByAllLabels { 419 return shardByAllLabels(userID, labels), nil 420 } 421 422 unsafeMetricName, err := extract.UnsafeMetricNameFromLabelAdapters(labels) 423 if err != nil { 424 return 0, err 425 } 426 return shardByMetricName(userID, unsafeMetricName), nil 427 } 428 429 func (d *Distributor) tokenForMetadata(userID string, metricName string) uint32 { 430 if d.cfg.ShardByAllLabels { 431 return shardByMetricName(userID, metricName) 432 } 433 434 return shardByUser(userID) 435 } 436 437 // shardByMetricName returns the token for the given metric. The provided metricName 438 // is guaranteed to not be retained. 439 func shardByMetricName(userID string, metricName string) uint32 { 440 h := shardByUser(userID) 441 h = ingester_client.HashAdd32(h, metricName) 442 return h 443 } 444 445 func shardByUser(userID string) uint32 { 446 h := ingester_client.HashNew32() 447 h = ingester_client.HashAdd32(h, userID) 448 return h 449 } 450 451 // This function generates different values for different order of same labels. 452 func shardByAllLabels(userID string, labels []cortexpb.LabelAdapter) uint32 { 453 h := shardByUser(userID) 454 for _, label := range labels { 455 h = ingester_client.HashAdd32(h, label.Name) 456 h = ingester_client.HashAdd32(h, label.Value) 457 } 458 return h 459 } 460 461 // Remove the label labelname from a slice of LabelPairs if it exists. 462 func removeLabel(labelName string, labels *[]cortexpb.LabelAdapter) { 463 for i := 0; i < len(*labels); i++ { 464 pair := (*labels)[i] 465 if pair.Name == labelName { 466 *labels = append((*labels)[:i], (*labels)[i+1:]...) 467 return 468 } 469 } 470 } 471 472 // Returns a boolean that indicates whether or not we want to remove the replica label going forward, 473 // and an error that indicates whether we want to accept samples based on the cluster/replica found in ts. 474 // nil for the error means accept the sample. 475 func (d *Distributor) checkSample(ctx context.Context, userID, cluster, replica string) (removeReplicaLabel bool, _ error) { 476 // If the sample doesn't have either HA label, accept it. 477 // At the moment we want to accept these samples by default. 478 if cluster == "" || replica == "" { 479 return false, nil 480 } 481 482 // If replica label is too long, don't use it. We accept the sample here, but it will fail validation later anyway. 483 if len(replica) > d.limits.MaxLabelValueLength(userID) { 484 return false, nil 485 } 486 487 // At this point we know we have both HA labels, we should lookup 488 // the cluster/instance here to see if we want to accept this sample. 489 err := d.HATracker.checkReplica(ctx, userID, cluster, replica, time.Now()) 490 // checkReplica should only have returned an error if there was a real error talking to Consul, or if the replica labels don't match. 491 if err != nil { // Don't accept the sample. 492 return false, err 493 } 494 return true, nil 495 } 496 497 // Validates a single series from a write request. Will remove labels if 498 // any are configured to be dropped for the user ID. 499 // Returns the validated series with it's labels/samples, and any error. 500 // The returned error may retain the series labels. 501 func (d *Distributor) validateSeries(ts cortexpb.PreallocTimeseries, userID string, skipLabelNameValidation bool) (cortexpb.PreallocTimeseries, validation.ValidationError) { 502 d.labelsHistogram.Observe(float64(len(ts.Labels))) 503 if err := validation.ValidateLabels(d.limits, userID, ts.Labels, skipLabelNameValidation); err != nil { 504 return emptyPreallocSeries, err 505 } 506 507 var samples []cortexpb.Sample 508 if len(ts.Samples) > 0 { 509 // Only alloc when data present 510 samples = make([]cortexpb.Sample, 0, len(ts.Samples)) 511 for _, s := range ts.Samples { 512 if err := validation.ValidateSample(d.limits, userID, ts.Labels, s); err != nil { 513 return emptyPreallocSeries, err 514 } 515 samples = append(samples, s) 516 } 517 } 518 519 var exemplars []cortexpb.Exemplar 520 if len(ts.Exemplars) > 0 { 521 // Only alloc when data present 522 exemplars = make([]cortexpb.Exemplar, 0, len(ts.Exemplars)) 523 for _, e := range ts.Exemplars { 524 if err := validation.ValidateExemplar(userID, ts.Labels, e); err != nil { 525 // An exemplar validation error prevents ingesting samples 526 // in the same series object. However because the current Prometheus 527 // remote write implementation only populates one or the other, 528 // there never will be any. 529 return emptyPreallocSeries, err 530 } 531 exemplars = append(exemplars, e) 532 } 533 } 534 535 return cortexpb.PreallocTimeseries{ 536 TimeSeries: &cortexpb.TimeSeries{ 537 Labels: ts.Labels, 538 Samples: samples, 539 Exemplars: exemplars, 540 }, 541 }, 542 nil 543 } 544 545 // Push implements client.IngesterServer 546 func (d *Distributor) Push(ctx context.Context, req *cortexpb.WriteRequest) (*cortexpb.WriteResponse, error) { 547 userID, err := tenant.TenantID(ctx) 548 if err != nil { 549 return nil, err 550 } 551 552 // We will report *this* request in the error too. 553 inflight := d.inflightPushRequests.Inc() 554 defer d.inflightPushRequests.Dec() 555 556 if d.cfg.InstanceLimits.MaxInflightPushRequests > 0 && inflight > int64(d.cfg.InstanceLimits.MaxInflightPushRequests) { 557 return nil, errTooManyInflightPushRequests 558 } 559 560 if d.cfg.InstanceLimits.MaxIngestionRate > 0 { 561 if rate := d.ingestionRate.Rate(); rate >= d.cfg.InstanceLimits.MaxIngestionRate { 562 return nil, errMaxSamplesPushRateLimitReached 563 } 564 } 565 566 now := time.Now() 567 d.activeUsers.UpdateUserTimestamp(userID, now) 568 569 source := util.GetSourceIPsFromOutgoingCtx(ctx) 570 571 var firstPartialErr error 572 removeReplica := false 573 574 numSamples := 0 575 numExemplars := 0 576 for _, ts := range req.Timeseries { 577 numSamples += len(ts.Samples) 578 numExemplars += len(ts.Exemplars) 579 } 580 // Count the total samples in, prior to validation or deduplication, for comparison with other metrics. 581 d.incomingSamples.WithLabelValues(userID).Add(float64(numSamples)) 582 d.incomingExemplars.WithLabelValues(userID).Add(float64(numExemplars)) 583 // Count the total number of metadata in. 584 d.incomingMetadata.WithLabelValues(userID).Add(float64(len(req.Metadata))) 585 586 // A WriteRequest can only contain series or metadata but not both. This might change in the future. 587 // For each timeseries or samples, we compute a hash to distribute across ingesters; 588 // check each sample/metadata and discard if outside limits. 589 validatedTimeseries := make([]cortexpb.PreallocTimeseries, 0, len(req.Timeseries)) 590 validatedMetadata := make([]*cortexpb.MetricMetadata, 0, len(req.Metadata)) 591 metadataKeys := make([]uint32, 0, len(req.Metadata)) 592 seriesKeys := make([]uint32, 0, len(req.Timeseries)) 593 validatedSamples := 0 594 validatedExemplars := 0 595 596 if d.limits.AcceptHASamples(userID) && len(req.Timeseries) > 0 { 597 cluster, replica := findHALabels(d.limits.HAReplicaLabel(userID), d.limits.HAClusterLabel(userID), req.Timeseries[0].Labels) 598 removeReplica, err = d.checkSample(ctx, userID, cluster, replica) 599 if err != nil { 600 // Ensure the request slice is reused if the series get deduped. 601 cortexpb.ReuseSlice(req.Timeseries) 602 603 if errors.Is(err, replicasNotMatchError{}) { 604 // These samples have been deduped. 605 d.dedupedSamples.WithLabelValues(userID, cluster).Add(float64(numSamples)) 606 return nil, httpgrpc.Errorf(http.StatusAccepted, err.Error()) 607 } 608 609 if errors.Is(err, tooManyClustersError{}) { 610 validation.DiscardedSamples.WithLabelValues(validation.TooManyHAClusters, userID).Add(float64(numSamples)) 611 return nil, httpgrpc.Errorf(http.StatusBadRequest, err.Error()) 612 } 613 614 return nil, err 615 } 616 // If there wasn't an error but removeReplica is false that means we didn't find both HA labels. 617 if !removeReplica { 618 d.nonHASamples.WithLabelValues(userID).Add(float64(numSamples)) 619 } 620 } 621 622 latestSampleTimestampMs := int64(0) 623 defer func() { 624 // Update this metric even in case of errors. 625 if latestSampleTimestampMs > 0 { 626 d.latestSeenSampleTimestampPerUser.WithLabelValues(userID).Set(float64(latestSampleTimestampMs) / 1000) 627 } 628 }() 629 630 // For each timeseries, compute a hash to distribute across ingesters; 631 // check each sample and discard if outside limits. 632 for _, ts := range req.Timeseries { 633 // Use timestamp of latest sample in the series. If samples for series are not ordered, metric for user may be wrong. 634 if len(ts.Samples) > 0 { 635 latestSampleTimestampMs = util_math.Max64(latestSampleTimestampMs, ts.Samples[len(ts.Samples)-1].TimestampMs) 636 } 637 638 if mrc := d.limits.MetricRelabelConfigs(userID); len(mrc) > 0 { 639 l := relabel.Process(cortexpb.FromLabelAdaptersToLabels(ts.Labels), mrc...) 640 ts.Labels = cortexpb.FromLabelsToLabelAdapters(l) 641 } 642 643 // If we found both the cluster and replica labels, we only want to include the cluster label when 644 // storing series in Cortex. If we kept the replica label we would end up with another series for the same 645 // series we're trying to dedupe when HA tracking moves over to a different replica. 646 if removeReplica { 647 removeLabel(d.limits.HAReplicaLabel(userID), &ts.Labels) 648 } 649 650 for _, labelName := range d.limits.DropLabels(userID) { 651 removeLabel(labelName, &ts.Labels) 652 } 653 654 if len(ts.Labels) == 0 { 655 continue 656 } 657 658 // We rely on sorted labels in different places: 659 // 1) When computing token for labels, and sharding by all labels. Here different order of labels returns 660 // different tokens, which is bad. 661 // 2) In validation code, when checking for duplicate label names. As duplicate label names are rejected 662 // later in the validation phase, we ignore them here. 663 sortLabelsIfNeeded(ts.Labels) 664 665 // Generate the sharding token based on the series labels without the HA replica 666 // label and dropped labels (if any) 667 key, err := d.tokenForLabels(userID, ts.Labels) 668 if err != nil { 669 return nil, err 670 } 671 672 skipLabelNameValidation := d.cfg.SkipLabelNameValidation || req.GetSkipLabelNameValidation() 673 validatedSeries, validationErr := d.validateSeries(ts, userID, skipLabelNameValidation) 674 675 // Errors in validation are considered non-fatal, as one series in a request may contain 676 // invalid data but all the remaining series could be perfectly valid. 677 if validationErr != nil && firstPartialErr == nil { 678 // The series labels may be retained by validationErr but that's not a problem for this 679 // use case because we format it calling Error() and then we discard it. 680 firstPartialErr = httpgrpc.Errorf(http.StatusBadRequest, validationErr.Error()) 681 } 682 683 // validateSeries would have returned an emptyPreallocSeries if there were no valid samples. 684 if validatedSeries == emptyPreallocSeries { 685 continue 686 } 687 688 seriesKeys = append(seriesKeys, key) 689 validatedTimeseries = append(validatedTimeseries, validatedSeries) 690 validatedSamples += len(ts.Samples) 691 validatedExemplars += len(ts.Exemplars) 692 } 693 694 for _, m := range req.Metadata { 695 err := validation.ValidateMetadata(d.limits, userID, m) 696 697 if err != nil { 698 if firstPartialErr == nil { 699 firstPartialErr = err 700 } 701 702 continue 703 } 704 705 metadataKeys = append(metadataKeys, d.tokenForMetadata(userID, m.MetricFamilyName)) 706 validatedMetadata = append(validatedMetadata, m) 707 } 708 709 d.receivedSamples.WithLabelValues(userID).Add(float64(validatedSamples)) 710 d.receivedExemplars.WithLabelValues(userID).Add((float64(validatedExemplars))) 711 d.receivedMetadata.WithLabelValues(userID).Add(float64(len(validatedMetadata))) 712 713 if len(seriesKeys) == 0 && len(metadataKeys) == 0 { 714 // Ensure the request slice is reused if there's no series or metadata passing the validation. 715 cortexpb.ReuseSlice(req.Timeseries) 716 717 return &cortexpb.WriteResponse{}, firstPartialErr 718 } 719 720 totalN := validatedSamples + validatedExemplars + len(validatedMetadata) 721 if !d.ingestionRateLimiter.AllowN(now, userID, totalN) { 722 // Ensure the request slice is reused if the request is rate limited. 723 cortexpb.ReuseSlice(req.Timeseries) 724 725 validation.DiscardedSamples.WithLabelValues(validation.RateLimited, userID).Add(float64(validatedSamples)) 726 validation.DiscardedExemplars.WithLabelValues(validation.RateLimited, userID).Add(float64(validatedExemplars)) 727 validation.DiscardedMetadata.WithLabelValues(validation.RateLimited, userID).Add(float64(len(validatedMetadata))) 728 // Return a 429 here to tell the client it is going too fast. 729 // Client may discard the data or slow down and re-send. 730 // Prometheus v2.26 added a remote-write option 'retry_on_http_429'. 731 return nil, httpgrpc.Errorf(http.StatusTooManyRequests, "ingestion rate limit (%v) exceeded while adding %d samples and %d metadata", d.ingestionRateLimiter.Limit(now, userID), validatedSamples, len(validatedMetadata)) 732 } 733 734 // totalN included samples and metadata. Ingester follows this pattern when computing its ingestion rate. 735 d.ingestionRate.Add(int64(totalN)) 736 737 subRing := d.ingestersRing 738 739 // Obtain a subring if required. 740 if d.cfg.ShardingStrategy == util.ShardingStrategyShuffle { 741 subRing = d.ingestersRing.ShuffleShard(userID, d.limits.IngestionTenantShardSize(userID)) 742 } 743 744 keys := append(seriesKeys, metadataKeys...) 745 initialMetadataIndex := len(seriesKeys) 746 747 op := ring.WriteNoExtend 748 if d.cfg.ExtendWrites { 749 op = ring.Write 750 } 751 752 err = ring.DoBatch(ctx, op, subRing, keys, func(ingester ring.InstanceDesc, indexes []int) error { 753 timeseries := make([]cortexpb.PreallocTimeseries, 0, len(indexes)) 754 var metadata []*cortexpb.MetricMetadata 755 756 for _, i := range indexes { 757 if i >= initialMetadataIndex { 758 metadata = append(metadata, validatedMetadata[i-initialMetadataIndex]) 759 } else { 760 timeseries = append(timeseries, validatedTimeseries[i]) 761 } 762 } 763 764 // Use a background context to make sure all ingesters get samples even if we return early 765 localCtx, cancel := context.WithTimeout(context.Background(), d.cfg.RemoteTimeout) 766 defer cancel() 767 localCtx = user.InjectOrgID(localCtx, userID) 768 if sp := opentracing.SpanFromContext(ctx); sp != nil { 769 localCtx = opentracing.ContextWithSpan(localCtx, sp) 770 } 771 772 // Get clientIP(s) from Context and add it to localCtx 773 localCtx = util.AddSourceIPsToOutgoingContext(localCtx, source) 774 775 return d.send(localCtx, ingester, timeseries, metadata, req.Source) 776 }, func() { cortexpb.ReuseSlice(req.Timeseries) }) 777 if err != nil { 778 return nil, err 779 } 780 return &cortexpb.WriteResponse{}, firstPartialErr 781 } 782 783 func sortLabelsIfNeeded(labels []cortexpb.LabelAdapter) { 784 // no need to run sort.Slice, if labels are already sorted, which is most of the time. 785 // we can avoid extra memory allocations (mostly interface-related) this way. 786 sorted := true 787 last := "" 788 for _, l := range labels { 789 if strings.Compare(last, l.Name) > 0 { 790 sorted = false 791 break 792 } 793 last = l.Name 794 } 795 796 if sorted { 797 return 798 } 799 800 sort.Slice(labels, func(i, j int) bool { 801 return strings.Compare(labels[i].Name, labels[j].Name) < 0 802 }) 803 } 804 805 func (d *Distributor) send(ctx context.Context, ingester ring.InstanceDesc, timeseries []cortexpb.PreallocTimeseries, metadata []*cortexpb.MetricMetadata, source cortexpb.WriteRequest_SourceEnum) error { 806 h, err := d.ingesterPool.GetClientFor(ingester.Addr) 807 if err != nil { 808 return err 809 } 810 c := h.(ingester_client.IngesterClient) 811 812 req := cortexpb.WriteRequest{ 813 Timeseries: timeseries, 814 Metadata: metadata, 815 Source: source, 816 } 817 _, err = c.Push(ctx, &req) 818 819 if len(metadata) > 0 { 820 d.ingesterAppends.WithLabelValues(ingester.Addr, typeMetadata).Inc() 821 if err != nil { 822 d.ingesterAppendFailures.WithLabelValues(ingester.Addr, typeMetadata, getErrorStatus(err)).Inc() 823 } 824 } 825 if len(timeseries) > 0 { 826 d.ingesterAppends.WithLabelValues(ingester.Addr, typeSamples).Inc() 827 if err != nil { 828 d.ingesterAppendFailures.WithLabelValues(ingester.Addr, typeSamples, getErrorStatus(err)).Inc() 829 } 830 } 831 832 return err 833 } 834 835 func getErrorStatus(err error) string { 836 status := "5xx" 837 httpResp, ok := httpgrpc.HTTPResponseFromError(err) 838 if ok && httpResp.Code/100 == 4 { 839 status = "4xx" 840 } 841 842 return status 843 } 844 845 // ForReplicationSet runs f, in parallel, for all ingesters in the input replication set. 846 func (d *Distributor) ForReplicationSet(ctx context.Context, replicationSet ring.ReplicationSet, f func(context.Context, ingester_client.IngesterClient) (interface{}, error)) ([]interface{}, error) { 847 return replicationSet.Do(ctx, d.cfg.ExtraQueryDelay, func(ctx context.Context, ing *ring.InstanceDesc) (interface{}, error) { 848 client, err := d.ingesterPool.GetClientFor(ing.Addr) 849 if err != nil { 850 return nil, err 851 } 852 853 return f(ctx, client.(ingester_client.IngesterClient)) 854 }) 855 } 856 857 // LabelValuesForLabelName returns all of the label values that are associated with a given label name. 858 func (d *Distributor) LabelValuesForLabelName(ctx context.Context, from, to model.Time, labelName model.LabelName, matchers ...*labels.Matcher) ([]string, error) { 859 replicationSet, err := d.GetIngestersForMetadata(ctx) 860 if err != nil { 861 return nil, err 862 } 863 864 req, err := ingester_client.ToLabelValuesRequest(labelName, from, to, matchers) 865 if err != nil { 866 return nil, err 867 } 868 869 resps, err := d.ForReplicationSet(ctx, replicationSet, func(ctx context.Context, client ingester_client.IngesterClient) (interface{}, error) { 870 return client.LabelValues(ctx, req) 871 }) 872 if err != nil { 873 return nil, err 874 } 875 876 valueSet := map[string]struct{}{} 877 for _, resp := range resps { 878 for _, v := range resp.(*ingester_client.LabelValuesResponse).LabelValues { 879 valueSet[v] = struct{}{} 880 } 881 } 882 883 values := make([]string, 0, len(valueSet)) 884 for v := range valueSet { 885 values = append(values, v) 886 } 887 888 // We need the values returned to be sorted. 889 sort.Strings(values) 890 891 return values, nil 892 } 893 894 // LabelNames returns all of the label names. 895 func (d *Distributor) LabelNames(ctx context.Context, from, to model.Time) ([]string, error) { 896 replicationSet, err := d.GetIngestersForMetadata(ctx) 897 if err != nil { 898 return nil, err 899 } 900 901 req := &ingester_client.LabelNamesRequest{ 902 StartTimestampMs: int64(from), 903 EndTimestampMs: int64(to), 904 } 905 resps, err := d.ForReplicationSet(ctx, replicationSet, func(ctx context.Context, client ingester_client.IngesterClient) (interface{}, error) { 906 return client.LabelNames(ctx, req) 907 }) 908 if err != nil { 909 return nil, err 910 } 911 912 valueSet := map[string]struct{}{} 913 for _, resp := range resps { 914 for _, v := range resp.(*ingester_client.LabelNamesResponse).LabelNames { 915 valueSet[v] = struct{}{} 916 } 917 } 918 919 values := make([]string, 0, len(valueSet)) 920 for v := range valueSet { 921 values = append(values, v) 922 } 923 924 sort.Strings(values) 925 926 return values, nil 927 } 928 929 // MetricsForLabelMatchers gets the metrics that match said matchers 930 func (d *Distributor) MetricsForLabelMatchers(ctx context.Context, from, through model.Time, matchers ...*labels.Matcher) ([]metric.Metric, error) { 931 replicationSet, err := d.GetIngestersForMetadata(ctx) 932 if err != nil { 933 return nil, err 934 } 935 936 req, err := ingester_client.ToMetricsForLabelMatchersRequest(from, through, matchers) 937 if err != nil { 938 return nil, err 939 } 940 941 resps, err := d.ForReplicationSet(ctx, replicationSet, func(ctx context.Context, client ingester_client.IngesterClient) (interface{}, error) { 942 return client.MetricsForLabelMatchers(ctx, req) 943 }) 944 if err != nil { 945 return nil, err 946 } 947 948 metrics := map[model.Fingerprint]model.Metric{} 949 for _, resp := range resps { 950 ms := ingester_client.FromMetricsForLabelMatchersResponse(resp.(*ingester_client.MetricsForLabelMatchersResponse)) 951 for _, m := range ms { 952 metrics[m.Fingerprint()] = m 953 } 954 } 955 956 result := make([]metric.Metric, 0, len(metrics)) 957 for _, m := range metrics { 958 result = append(result, metric.Metric{ 959 Metric: m, 960 }) 961 } 962 return result, nil 963 } 964 965 // MetricsMetadata returns all metric metadata of a user. 966 func (d *Distributor) MetricsMetadata(ctx context.Context) ([]scrape.MetricMetadata, error) { 967 replicationSet, err := d.GetIngestersForMetadata(ctx) 968 if err != nil { 969 return nil, err 970 } 971 972 req := &ingester_client.MetricsMetadataRequest{} 973 // TODO(gotjosh): We only need to look in all the ingesters if shardByAllLabels is enabled. 974 resps, err := d.ForReplicationSet(ctx, replicationSet, func(ctx context.Context, client ingester_client.IngesterClient) (interface{}, error) { 975 return client.MetricsMetadata(ctx, req) 976 }) 977 if err != nil { 978 return nil, err 979 } 980 981 result := []scrape.MetricMetadata{} 982 dedupTracker := map[cortexpb.MetricMetadata]struct{}{} 983 for _, resp := range resps { 984 r := resp.(*ingester_client.MetricsMetadataResponse) 985 for _, m := range r.Metadata { 986 // Given we look across all ingesters - dedup the metadata. 987 _, ok := dedupTracker[*m] 988 if ok { 989 continue 990 } 991 dedupTracker[*m] = struct{}{} 992 993 result = append(result, scrape.MetricMetadata{ 994 Metric: m.MetricFamilyName, 995 Help: m.Help, 996 Unit: m.Unit, 997 Type: cortexpb.MetricMetadataMetricTypeToMetricType(m.GetType()), 998 }) 999 } 1000 } 1001 1002 return result, nil 1003 } 1004 1005 // UserStats returns statistics about the current user. 1006 func (d *Distributor) UserStats(ctx context.Context) (*UserStats, error) { 1007 replicationSet, err := d.GetIngestersForMetadata(ctx) 1008 if err != nil { 1009 return nil, err 1010 } 1011 1012 // Make sure we get a successful response from all of them. 1013 replicationSet.MaxErrors = 0 1014 1015 req := &ingester_client.UserStatsRequest{} 1016 resps, err := d.ForReplicationSet(ctx, replicationSet, func(ctx context.Context, client ingester_client.IngesterClient) (interface{}, error) { 1017 return client.UserStats(ctx, req) 1018 }) 1019 if err != nil { 1020 return nil, err 1021 } 1022 1023 totalStats := &UserStats{} 1024 for _, resp := range resps { 1025 r := resp.(*ingester_client.UserStatsResponse) 1026 totalStats.IngestionRate += r.IngestionRate 1027 totalStats.APIIngestionRate += r.ApiIngestionRate 1028 totalStats.RuleIngestionRate += r.RuleIngestionRate 1029 totalStats.NumSeries += r.NumSeries 1030 } 1031 1032 totalStats.IngestionRate /= float64(d.ingestersRing.ReplicationFactor()) 1033 totalStats.NumSeries /= uint64(d.ingestersRing.ReplicationFactor()) 1034 1035 return totalStats, nil 1036 } 1037 1038 // UserIDStats models ingestion statistics for one user, including the user ID 1039 type UserIDStats struct { 1040 UserID string `json:"userID"` 1041 UserStats 1042 } 1043 1044 // AllUserStats returns statistics about all users. 1045 // Note it does not divide by the ReplicationFactor like UserStats() 1046 func (d *Distributor) AllUserStats(ctx context.Context) ([]UserIDStats, error) { 1047 // Add up by user, across all responses from ingesters 1048 perUserTotals := make(map[string]UserStats) 1049 1050 req := &ingester_client.UserStatsRequest{} 1051 ctx = user.InjectOrgID(ctx, "1") // fake: ingester insists on having an org ID 1052 // Not using d.ForReplicationSet(), so we can fail after first error. 1053 replicationSet, err := d.ingestersRing.GetAllHealthy(ring.Read) 1054 if err != nil { 1055 return nil, err 1056 } 1057 for _, ingester := range replicationSet.Instances { 1058 client, err := d.ingesterPool.GetClientFor(ingester.Addr) 1059 if err != nil { 1060 return nil, err 1061 } 1062 resp, err := client.(ingester_client.IngesterClient).AllUserStats(ctx, req) 1063 if err != nil { 1064 return nil, err 1065 } 1066 for _, u := range resp.Stats { 1067 s := perUserTotals[u.UserId] 1068 s.IngestionRate += u.Data.IngestionRate 1069 s.APIIngestionRate += u.Data.ApiIngestionRate 1070 s.RuleIngestionRate += u.Data.RuleIngestionRate 1071 s.NumSeries += u.Data.NumSeries 1072 perUserTotals[u.UserId] = s 1073 } 1074 } 1075 1076 // Turn aggregated map into a slice for return 1077 response := make([]UserIDStats, 0, len(perUserTotals)) 1078 for id, stats := range perUserTotals { 1079 response = append(response, UserIDStats{ 1080 UserID: id, 1081 UserStats: UserStats{ 1082 IngestionRate: stats.IngestionRate, 1083 APIIngestionRate: stats.APIIngestionRate, 1084 RuleIngestionRate: stats.RuleIngestionRate, 1085 NumSeries: stats.NumSeries, 1086 }, 1087 }) 1088 } 1089 1090 return response, nil 1091 } 1092 1093 func (d *Distributor) ServeHTTP(w http.ResponseWriter, req *http.Request) { 1094 if d.distributorsRing != nil { 1095 d.distributorsRing.ServeHTTP(w, req) 1096 } else { 1097 var ringNotEnabledPage = ` 1098 <!DOCTYPE html> 1099 <html> 1100 <head> 1101 <meta charset="UTF-8"> 1102 <title>Cortex Distributor Status</title> 1103 </head> 1104 <body> 1105 <h1>Cortex Distributor Status</h1> 1106 <p>Distributor is not running with global limits enabled</p> 1107 </body> 1108 </html>` 1109 util.WriteHTMLResponse(w, ringNotEnabledPage) 1110 } 1111 }