github.com/muhammadn/cortex@v1.9.1-0.20220510110439-46bb7000d03d/pkg/ingester/ingester.go (about) 1 package ingester 2 3 import ( 4 "context" 5 "flag" 6 "fmt" 7 "net/http" 8 "os" 9 "strings" 10 "sync" 11 "time" 12 13 "github.com/go-kit/log" 14 "github.com/go-kit/log/level" 15 "github.com/gogo/status" 16 "github.com/grafana/dskit/ring" 17 "github.com/grafana/dskit/services" 18 "github.com/pkg/errors" 19 "github.com/prometheus/client_golang/prometheus" 20 "github.com/prometheus/common/model" 21 "github.com/prometheus/prometheus/pkg/labels" 22 tsdb_record "github.com/prometheus/prometheus/tsdb/record" 23 "github.com/weaveworks/common/httpgrpc" 24 "go.uber.org/atomic" 25 "golang.org/x/time/rate" 26 "google.golang.org/grpc/codes" 27 28 cortex_chunk "github.com/cortexproject/cortex/pkg/chunk" 29 "github.com/cortexproject/cortex/pkg/cortexpb" 30 "github.com/cortexproject/cortex/pkg/ingester/client" 31 "github.com/cortexproject/cortex/pkg/storage/tsdb" 32 "github.com/cortexproject/cortex/pkg/tenant" 33 "github.com/cortexproject/cortex/pkg/util" 34 logutil "github.com/cortexproject/cortex/pkg/util/log" 35 util_math "github.com/cortexproject/cortex/pkg/util/math" 36 "github.com/cortexproject/cortex/pkg/util/spanlogger" 37 "github.com/cortexproject/cortex/pkg/util/validation" 38 ) 39 40 const ( 41 // Number of timeseries to return in each batch of a QueryStream. 42 queryStreamBatchSize = 128 43 44 // Discarded Metadata metric labels. 45 perUserMetadataLimit = "per_user_metadata_limit" 46 perMetricMetadataLimit = "per_metric_metadata_limit" 47 48 // Period at which to attempt purging metadata from memory. 49 metadataPurgePeriod = 5 * time.Minute 50 ) 51 52 var ( 53 // This is initialised if the WAL is enabled and the records are fetched from this pool. 54 recordPool sync.Pool 55 56 errIngesterStopping = errors.New("ingester stopping") 57 ) 58 59 // Config for an Ingester. 60 type Config struct { 61 WALConfig WALConfig `yaml:"walconfig" doc:"description=Configures the Write-Ahead Log (WAL) for the Cortex chunks storage. This config is ignored when running the Cortex blocks storage."` 62 LifecyclerConfig ring.LifecyclerConfig `yaml:"lifecycler"` 63 64 // Config for transferring chunks. Zero or negative = no retries. 65 MaxTransferRetries int `yaml:"max_transfer_retries"` 66 67 // Config for chunk flushing. 68 FlushCheckPeriod time.Duration `yaml:"flush_period"` 69 RetainPeriod time.Duration `yaml:"retain_period"` 70 MaxChunkIdle time.Duration `yaml:"max_chunk_idle_time"` 71 MaxStaleChunkIdle time.Duration `yaml:"max_stale_chunk_idle_time"` 72 FlushOpTimeout time.Duration `yaml:"flush_op_timeout"` 73 MaxChunkAge time.Duration `yaml:"max_chunk_age"` 74 ChunkAgeJitter time.Duration `yaml:"chunk_age_jitter"` 75 ConcurrentFlushes int `yaml:"concurrent_flushes"` 76 SpreadFlushes bool `yaml:"spread_flushes"` 77 78 // Config for metadata purging. 79 MetadataRetainPeriod time.Duration `yaml:"metadata_retain_period"` 80 81 RateUpdatePeriod time.Duration `yaml:"rate_update_period"` 82 83 ActiveSeriesMetricsEnabled bool `yaml:"active_series_metrics_enabled"` 84 ActiveSeriesMetricsUpdatePeriod time.Duration `yaml:"active_series_metrics_update_period"` 85 ActiveSeriesMetricsIdleTimeout time.Duration `yaml:"active_series_metrics_idle_timeout"` 86 87 // Use blocks storage. 88 BlocksStorageEnabled bool `yaml:"-"` 89 BlocksStorageConfig tsdb.BlocksStorageConfig `yaml:"-"` 90 StreamChunksWhenUsingBlocks bool `yaml:"-"` 91 // Runtime-override for type of streaming query to use (chunks or samples). 92 StreamTypeFn func() QueryStreamType `yaml:"-"` 93 94 // Injected at runtime and read from the distributor config, required 95 // to accurately apply global limits. 96 DistributorShardingStrategy string `yaml:"-"` 97 DistributorShardByAllLabels bool `yaml:"-"` 98 99 DefaultLimits InstanceLimits `yaml:"instance_limits"` 100 InstanceLimitsFn func() *InstanceLimits `yaml:"-"` 101 102 IgnoreSeriesLimitForMetricNames string `yaml:"ignore_series_limit_for_metric_names"` 103 104 // For testing, you can override the address and ID of this ingester. 105 ingesterClientFactory func(addr string, cfg client.Config) (client.HealthAndIngesterClient, error) 106 } 107 108 // RegisterFlags adds the flags required to config this to the given FlagSet 109 func (cfg *Config) RegisterFlags(f *flag.FlagSet) { 110 cfg.LifecyclerConfig.RegisterFlags(f) 111 cfg.WALConfig.RegisterFlags(f) 112 113 f.IntVar(&cfg.MaxTransferRetries, "ingester.max-transfer-retries", 10, "Number of times to try and transfer chunks before falling back to flushing. Negative value or zero disables hand-over. This feature is supported only by the chunks storage.") 114 115 f.DurationVar(&cfg.FlushCheckPeriod, "ingester.flush-period", 1*time.Minute, "Period with which to attempt to flush chunks.") 116 f.DurationVar(&cfg.RetainPeriod, "ingester.retain-period", 5*time.Minute, "Period chunks will remain in memory after flushing.") 117 f.DurationVar(&cfg.MaxChunkIdle, "ingester.max-chunk-idle", 5*time.Minute, "Maximum chunk idle time before flushing.") 118 f.DurationVar(&cfg.MaxStaleChunkIdle, "ingester.max-stale-chunk-idle", 2*time.Minute, "Maximum chunk idle time for chunks terminating in stale markers before flushing. 0 disables it and a stale series is not flushed until the max-chunk-idle timeout is reached.") 119 f.DurationVar(&cfg.FlushOpTimeout, "ingester.flush-op-timeout", 1*time.Minute, "Timeout for individual flush operations.") 120 f.DurationVar(&cfg.MaxChunkAge, "ingester.max-chunk-age", 12*time.Hour, "Maximum chunk age before flushing.") 121 f.DurationVar(&cfg.ChunkAgeJitter, "ingester.chunk-age-jitter", 0, "Range of time to subtract from -ingester.max-chunk-age to spread out flushes") 122 f.IntVar(&cfg.ConcurrentFlushes, "ingester.concurrent-flushes", 50, "Number of concurrent goroutines flushing to dynamodb.") 123 f.BoolVar(&cfg.SpreadFlushes, "ingester.spread-flushes", true, "If true, spread series flushes across the whole period of -ingester.max-chunk-age.") 124 125 f.DurationVar(&cfg.MetadataRetainPeriod, "ingester.metadata-retain-period", 10*time.Minute, "Period at which metadata we have not seen will remain in memory before being deleted.") 126 127 f.DurationVar(&cfg.RateUpdatePeriod, "ingester.rate-update-period", 15*time.Second, "Period with which to update the per-user ingestion rates.") 128 f.BoolVar(&cfg.ActiveSeriesMetricsEnabled, "ingester.active-series-metrics-enabled", true, "Enable tracking of active series and export them as metrics.") 129 f.DurationVar(&cfg.ActiveSeriesMetricsUpdatePeriod, "ingester.active-series-metrics-update-period", 1*time.Minute, "How often to update active series metrics.") 130 f.DurationVar(&cfg.ActiveSeriesMetricsIdleTimeout, "ingester.active-series-metrics-idle-timeout", 10*time.Minute, "After what time a series is considered to be inactive.") 131 f.BoolVar(&cfg.StreamChunksWhenUsingBlocks, "ingester.stream-chunks-when-using-blocks", false, "Stream chunks when using blocks. This is experimental feature and not yet tested. Once ready, it will be made default and this config option removed.") 132 133 f.Float64Var(&cfg.DefaultLimits.MaxIngestionRate, "ingester.instance-limits.max-ingestion-rate", 0, "Max ingestion rate (samples/sec) that ingester will accept. This limit is per-ingester, not per-tenant. Additional push requests will be rejected. Current ingestion rate is computed as exponentially weighted moving average, updated every second. This limit only works when using blocks engine. 0 = unlimited.") 134 f.Int64Var(&cfg.DefaultLimits.MaxInMemoryTenants, "ingester.instance-limits.max-tenants", 0, "Max users that this ingester can hold. Requests from additional users will be rejected. This limit only works when using blocks engine. 0 = unlimited.") 135 f.Int64Var(&cfg.DefaultLimits.MaxInMemorySeries, "ingester.instance-limits.max-series", 0, "Max series that this ingester can hold (across all tenants). Requests to create additional series will be rejected. This limit only works when using blocks engine. 0 = unlimited.") 136 f.Int64Var(&cfg.DefaultLimits.MaxInflightPushRequests, "ingester.instance-limits.max-inflight-push-requests", 0, "Max inflight push requests that this ingester can handle (across all tenants). Additional requests will be rejected. 0 = unlimited.") 137 138 f.StringVar(&cfg.IgnoreSeriesLimitForMetricNames, "ingester.ignore-series-limit-for-metric-names", "", "Comma-separated list of metric names, for which -ingester.max-series-per-metric and -ingester.max-global-series-per-metric limits will be ignored. Does not affect max-series-per-user or max-global-series-per-metric limits.") 139 } 140 141 func (cfg *Config) getIgnoreSeriesLimitForMetricNamesMap() map[string]struct{} { 142 if cfg.IgnoreSeriesLimitForMetricNames == "" { 143 return nil 144 } 145 146 result := map[string]struct{}{} 147 148 for _, s := range strings.Split(cfg.IgnoreSeriesLimitForMetricNames, ",") { 149 tr := strings.TrimSpace(s) 150 if tr != "" { 151 result[tr] = struct{}{} 152 } 153 } 154 155 if len(result) == 0 { 156 return nil 157 } 158 159 return result 160 } 161 162 // Ingester deals with "in flight" chunks. Based on Prometheus 1.x 163 // MemorySeriesStorage. 164 type Ingester struct { 165 *services.BasicService 166 167 cfg Config 168 clientConfig client.Config 169 170 metrics *ingesterMetrics 171 logger log.Logger 172 173 chunkStore ChunkStore 174 lifecycler *ring.Lifecycler 175 limits *validation.Overrides 176 limiter *Limiter 177 subservicesWatcher *services.FailureWatcher 178 179 userStatesMtx sync.RWMutex // protects userStates and stopped 180 userStates *userStates 181 stopped bool // protected by userStatesMtx 182 183 // For storing metadata ingested. 184 usersMetadataMtx sync.RWMutex 185 usersMetadata map[string]*userMetricsMetadata 186 187 // One queue per flush thread. Fingerprint is used to 188 // pick a queue. 189 flushQueues []*util.PriorityQueue 190 flushQueuesDone sync.WaitGroup 191 192 // Spread out calls to the chunk store over the flush period 193 flushRateLimiter *rate.Limiter 194 195 // This should never be nil. 196 wal WAL 197 // To be passed to the WAL. 198 registerer prometheus.Registerer 199 200 // Hooks for injecting behaviour from tests. 201 preFlushUserSeries func() 202 preFlushChunks func() 203 204 // Prometheus block storage 205 TSDBState TSDBState 206 207 // Rate of pushed samples. Only used by V2-ingester to limit global samples push rate. 208 ingestionRate *util_math.EwmaRate 209 inflightPushRequests atomic.Int64 210 } 211 212 // ChunkStore is the interface we need to store chunks 213 type ChunkStore interface { 214 Put(ctx context.Context, chunks []cortex_chunk.Chunk) error 215 } 216 217 // New constructs a new Ingester. 218 func New(cfg Config, clientConfig client.Config, limits *validation.Overrides, chunkStore ChunkStore, registerer prometheus.Registerer, logger log.Logger) (*Ingester, error) { 219 defaultInstanceLimits = &cfg.DefaultLimits 220 221 if cfg.ingesterClientFactory == nil { 222 cfg.ingesterClientFactory = client.MakeIngesterClient 223 } 224 225 if cfg.BlocksStorageEnabled { 226 return NewV2(cfg, clientConfig, limits, registerer, logger) 227 } 228 229 if cfg.WALConfig.WALEnabled { 230 // If WAL is enabled, we don't transfer out the data to any ingester. 231 // Either the next ingester which takes it's place should recover from WAL 232 // or the data has to be flushed during scaledown. 233 cfg.MaxTransferRetries = 0 234 235 // Transfers are disabled with WAL, hence no need to wait for transfers. 236 cfg.LifecyclerConfig.JoinAfter = 0 237 238 recordPool = sync.Pool{ 239 New: func() interface{} { 240 return &WALRecord{} 241 }, 242 } 243 } 244 245 if cfg.WALConfig.WALEnabled || cfg.WALConfig.Recover { 246 if err := os.MkdirAll(cfg.WALConfig.Dir, os.ModePerm); err != nil { 247 return nil, err 248 } 249 } 250 251 i := &Ingester{ 252 cfg: cfg, 253 clientConfig: clientConfig, 254 255 limits: limits, 256 chunkStore: chunkStore, 257 flushQueues: make([]*util.PriorityQueue, cfg.ConcurrentFlushes), 258 flushRateLimiter: rate.NewLimiter(rate.Inf, 1), 259 usersMetadata: map[string]*userMetricsMetadata{}, 260 registerer: registerer, 261 logger: logger, 262 } 263 i.metrics = newIngesterMetrics(registerer, true, cfg.ActiveSeriesMetricsEnabled, i.getInstanceLimits, nil, &i.inflightPushRequests) 264 265 var err error 266 // During WAL recovery, it will create new user states which requires the limiter. 267 // Hence initialise the limiter before creating the WAL. 268 // The '!cfg.WALConfig.WALEnabled' argument says don't flush on shutdown if the WAL is enabled. 269 i.lifecycler, err = ring.NewLifecycler(cfg.LifecyclerConfig, i, "ingester", ring.IngesterRingKey, !cfg.WALConfig.WALEnabled || cfg.WALConfig.FlushOnShutdown, logger, prometheus.WrapRegistererWithPrefix("cortex_", registerer)) 270 if err != nil { 271 return nil, err 272 } 273 274 i.limiter = NewLimiter( 275 limits, 276 i.lifecycler, 277 cfg.DistributorShardingStrategy, 278 cfg.DistributorShardByAllLabels, 279 cfg.LifecyclerConfig.RingConfig.ReplicationFactor, 280 cfg.LifecyclerConfig.RingConfig.ZoneAwarenessEnabled) 281 282 i.subservicesWatcher = services.NewFailureWatcher() 283 i.subservicesWatcher.WatchService(i.lifecycler) 284 285 i.BasicService = services.NewBasicService(i.starting, i.loop, i.stopping) 286 return i, nil 287 } 288 289 func (i *Ingester) starting(ctx context.Context) error { 290 if i.cfg.WALConfig.Recover { 291 level.Info(i.logger).Log("msg", "recovering from WAL") 292 start := time.Now() 293 if err := recoverFromWAL(i); err != nil { 294 level.Error(i.logger).Log("msg", "failed to recover from WAL", "time", time.Since(start).String()) 295 return errors.Wrap(err, "failed to recover from WAL") 296 } 297 elapsed := time.Since(start) 298 level.Info(i.logger).Log("msg", "recovery from WAL completed", "time", elapsed.String()) 299 i.metrics.walReplayDuration.Set(elapsed.Seconds()) 300 } 301 302 // If the WAL recover happened, then the userStates would already be set. 303 if i.userStates == nil { 304 i.userStates = newUserStates(i.limiter, i.cfg, i.metrics, i.logger) 305 } 306 307 var err error 308 i.wal, err = newWAL(i.cfg.WALConfig, i.userStates.cp, i.registerer, i.logger) 309 if err != nil { 310 return errors.Wrap(err, "starting WAL") 311 } 312 313 // Now that user states have been created, we can start the lifecycler. 314 // Important: we want to keep lifecycler running until we ask it to stop, so we need to give it independent context 315 if err := i.lifecycler.StartAsync(context.Background()); err != nil { 316 return errors.Wrap(err, "failed to start lifecycler") 317 } 318 if err := i.lifecycler.AwaitRunning(ctx); err != nil { 319 return errors.Wrap(err, "failed to start lifecycler") 320 } 321 322 i.startFlushLoops() 323 324 return nil 325 } 326 327 func (i *Ingester) startFlushLoops() { 328 i.flushQueuesDone.Add(i.cfg.ConcurrentFlushes) 329 for j := 0; j < i.cfg.ConcurrentFlushes; j++ { 330 i.flushQueues[j] = util.NewPriorityQueue(i.metrics.flushQueueLength) 331 go i.flushLoop(j) 332 } 333 } 334 335 // NewForFlusher constructs a new Ingester to be used by flusher target. 336 // Compared to the 'New' method: 337 // * Always replays the WAL. 338 // * Does not start the lifecycler. 339 func NewForFlusher(cfg Config, chunkStore ChunkStore, limits *validation.Overrides, registerer prometheus.Registerer, logger log.Logger) (*Ingester, error) { 340 if cfg.BlocksStorageEnabled { 341 return NewV2ForFlusher(cfg, limits, registerer, logger) 342 } 343 344 i := &Ingester{ 345 cfg: cfg, 346 chunkStore: chunkStore, 347 flushQueues: make([]*util.PriorityQueue, cfg.ConcurrentFlushes), 348 flushRateLimiter: rate.NewLimiter(rate.Inf, 1), 349 wal: &noopWAL{}, 350 limits: limits, 351 logger: logger, 352 } 353 i.metrics = newIngesterMetrics(registerer, true, false, i.getInstanceLimits, nil, &i.inflightPushRequests) 354 355 i.BasicService = services.NewBasicService(i.startingForFlusher, i.loopForFlusher, i.stopping) 356 return i, nil 357 } 358 359 func (i *Ingester) startingForFlusher(ctx context.Context) error { 360 level.Info(i.logger).Log("msg", "recovering from WAL") 361 362 // We recover from WAL always. 363 start := time.Now() 364 if err := recoverFromWAL(i); err != nil { 365 level.Error(i.logger).Log("msg", "failed to recover from WAL", "time", time.Since(start).String()) 366 return err 367 } 368 elapsed := time.Since(start) 369 370 level.Info(i.logger).Log("msg", "recovery from WAL completed", "time", elapsed.String()) 371 i.metrics.walReplayDuration.Set(elapsed.Seconds()) 372 373 i.startFlushLoops() 374 return nil 375 } 376 377 func (i *Ingester) loopForFlusher(ctx context.Context) error { 378 for { 379 select { 380 case <-ctx.Done(): 381 return nil 382 383 case err := <-i.subservicesWatcher.Chan(): 384 return errors.Wrap(err, "ingester subservice failed") 385 } 386 } 387 } 388 389 func (i *Ingester) loop(ctx context.Context) error { 390 flushTicker := time.NewTicker(i.cfg.FlushCheckPeriod) 391 defer flushTicker.Stop() 392 393 rateUpdateTicker := time.NewTicker(i.cfg.RateUpdatePeriod) 394 defer rateUpdateTicker.Stop() 395 396 metadataPurgeTicker := time.NewTicker(metadataPurgePeriod) 397 defer metadataPurgeTicker.Stop() 398 399 var activeSeriesTickerChan <-chan time.Time 400 if i.cfg.ActiveSeriesMetricsEnabled { 401 t := time.NewTicker(i.cfg.ActiveSeriesMetricsUpdatePeriod) 402 activeSeriesTickerChan = t.C 403 defer t.Stop() 404 } 405 406 for { 407 select { 408 case <-metadataPurgeTicker.C: 409 i.purgeUserMetricsMetadata() 410 411 case <-flushTicker.C: 412 i.sweepUsers(false) 413 414 case <-rateUpdateTicker.C: 415 i.userStates.updateRates() 416 417 case <-activeSeriesTickerChan: 418 i.userStates.purgeAndUpdateActiveSeries(time.Now().Add(-i.cfg.ActiveSeriesMetricsIdleTimeout)) 419 420 case <-ctx.Done(): 421 return nil 422 423 case err := <-i.subservicesWatcher.Chan(): 424 return errors.Wrap(err, "ingester subservice failed") 425 } 426 } 427 } 428 429 // stopping is run when ingester is asked to stop 430 func (i *Ingester) stopping(_ error) error { 431 i.wal.Stop() 432 433 // This will prevent us accepting any more samples 434 i.stopIncomingRequests() 435 436 // Lifecycler can be nil if the ingester is for a flusher. 437 if i.lifecycler != nil { 438 // Next initiate our graceful exit from the ring. 439 return services.StopAndAwaitTerminated(context.Background(), i.lifecycler) 440 } 441 442 return nil 443 } 444 445 // ShutdownHandler triggers the following set of operations in order: 446 // * Change the state of ring to stop accepting writes. 447 // * Flush all the chunks. 448 func (i *Ingester) ShutdownHandler(w http.ResponseWriter, r *http.Request) { 449 originalFlush := i.lifecycler.FlushOnShutdown() 450 // We want to flush the chunks if transfer fails irrespective of original flag. 451 i.lifecycler.SetFlushOnShutdown(true) 452 453 // In the case of an HTTP shutdown, we want to unregister no matter what. 454 originalUnregister := i.lifecycler.ShouldUnregisterOnShutdown() 455 i.lifecycler.SetUnregisterOnShutdown(true) 456 457 _ = services.StopAndAwaitTerminated(context.Background(), i) 458 // Set state back to original. 459 i.lifecycler.SetFlushOnShutdown(originalFlush) 460 i.lifecycler.SetUnregisterOnShutdown(originalUnregister) 461 462 w.WriteHeader(http.StatusNoContent) 463 } 464 465 // stopIncomingRequests is called during the shutdown process. 466 func (i *Ingester) stopIncomingRequests() { 467 i.userStatesMtx.Lock() 468 defer i.userStatesMtx.Unlock() 469 i.stopped = true 470 } 471 472 // check that ingester has finished starting, i.e. it is in Running or Stopping state. 473 // Why Stopping? Because ingester still runs, even when it is transferring data out in Stopping state. 474 // Ingester handles this state on its own (via `stopped` flag). 475 func (i *Ingester) checkRunningOrStopping() error { 476 s := i.State() 477 if s == services.Running || s == services.Stopping { 478 return nil 479 } 480 return status.Error(codes.Unavailable, s.String()) 481 } 482 483 // Using block store, the ingester is only available when it is in a Running state. The ingester is not available 484 // when stopping to prevent any read or writes to the TSDB after the ingester has closed them. 485 func (i *Ingester) checkRunning() error { 486 s := i.State() 487 if s == services.Running { 488 return nil 489 } 490 return status.Error(codes.Unavailable, s.String()) 491 } 492 493 // Push implements client.IngesterServer 494 func (i *Ingester) Push(ctx context.Context, req *cortexpb.WriteRequest) (*cortexpb.WriteResponse, error) { 495 if err := i.checkRunning(); err != nil { 496 return nil, err 497 } 498 499 // We will report *this* request in the error too. 500 inflight := i.inflightPushRequests.Inc() 501 defer i.inflightPushRequests.Dec() 502 503 gl := i.getInstanceLimits() 504 if gl != nil && gl.MaxInflightPushRequests > 0 { 505 if inflight > gl.MaxInflightPushRequests { 506 return nil, errTooManyInflightPushRequests 507 } 508 } 509 510 if i.cfg.BlocksStorageEnabled { 511 return i.v2Push(ctx, req) 512 } 513 514 // NOTE: because we use `unsafe` in deserialisation, we must not 515 // retain anything from `req` past the call to ReuseSlice 516 defer cortexpb.ReuseSlice(req.Timeseries) 517 518 userID, err := tenant.TenantID(ctx) 519 if err != nil { 520 return nil, err 521 } 522 523 // Given metadata is a best-effort approach, and we don't halt on errors 524 // process it before samples. Otherwise, we risk returning an error before ingestion. 525 i.pushMetadata(ctx, userID, req.GetMetadata()) 526 527 var firstPartialErr *validationError 528 var record *WALRecord 529 if i.cfg.WALConfig.WALEnabled { 530 record = recordPool.Get().(*WALRecord) 531 record.UserID = userID 532 // Assuming there is not much churn in most cases, there is no use 533 // keeping the record.Labels slice hanging around. 534 record.Series = nil 535 if cap(record.Samples) < len(req.Timeseries) { 536 record.Samples = make([]tsdb_record.RefSample, 0, len(req.Timeseries)) 537 } else { 538 record.Samples = record.Samples[:0] 539 } 540 } 541 542 for _, ts := range req.Timeseries { 543 seriesSamplesIngested := 0 544 for _, s := range ts.Samples { 545 // append() copies the memory in `ts.Labels` except on the error path 546 err := i.append(ctx, userID, ts.Labels, model.Time(s.TimestampMs), model.SampleValue(s.Value), req.Source, record) 547 if err == nil { 548 seriesSamplesIngested++ 549 continue 550 } 551 552 i.metrics.ingestedSamplesFail.Inc() 553 if ve, ok := err.(*validationError); ok { 554 if firstPartialErr == nil { 555 firstPartialErr = ve 556 } 557 continue 558 } 559 560 // non-validation error: abandon this request 561 return nil, grpcForwardableError(userID, http.StatusInternalServerError, err) 562 } 563 564 if i.cfg.ActiveSeriesMetricsEnabled && seriesSamplesIngested > 0 { 565 // updateActiveSeries will copy labels if necessary. 566 i.updateActiveSeries(userID, time.Now(), ts.Labels) 567 } 568 } 569 570 if record != nil { 571 // Log the record only if there was no error in ingestion. 572 if err := i.wal.Log(record); err != nil { 573 return nil, err 574 } 575 recordPool.Put(record) 576 } 577 578 if firstPartialErr != nil { 579 // grpcForwardableError turns the error into a string so it no longer references `req` 580 return &cortexpb.WriteResponse{}, grpcForwardableError(userID, firstPartialErr.code, firstPartialErr) 581 } 582 583 return &cortexpb.WriteResponse{}, nil 584 } 585 586 // NOTE: memory for `labels` is unsafe; anything retained beyond the 587 // life of this function must be copied 588 func (i *Ingester) append(ctx context.Context, userID string, labels labelPairs, timestamp model.Time, value model.SampleValue, source cortexpb.WriteRequest_SourceEnum, record *WALRecord) error { 589 labels.removeBlanks() 590 591 var ( 592 state *userState 593 fp model.Fingerprint 594 ) 595 i.userStatesMtx.RLock() 596 defer func() { 597 i.userStatesMtx.RUnlock() 598 if state != nil { 599 state.fpLocker.Unlock(fp) 600 } 601 }() 602 if i.stopped { 603 return errIngesterStopping 604 } 605 606 // getOrCreateSeries copies the memory for `labels`, except on the error path. 607 state, fp, series, err := i.userStates.getOrCreateSeries(ctx, userID, labels, record) 608 if err != nil { 609 if ve, ok := err.(*validationError); ok { 610 state.discardedSamples.WithLabelValues(ve.errorType).Inc() 611 } 612 613 // Reset the state so that the defer will not try to unlock the fpLocker 614 // in case of error, because that lock has already been released on error. 615 state = nil 616 return err 617 } 618 619 prevNumChunks := len(series.chunkDescs) 620 if i.cfg.SpreadFlushes && prevNumChunks > 0 { 621 // Map from the fingerprint hash to a point in the cycle of period MaxChunkAge 622 startOfCycle := timestamp.Add(-(timestamp.Sub(model.Time(0)) % i.cfg.MaxChunkAge)) 623 slot := startOfCycle.Add(time.Duration(uint64(fp) % uint64(i.cfg.MaxChunkAge))) 624 // If adding this sample means the head chunk will span that point in time, close so it will get flushed 625 if series.head().FirstTime < slot && timestamp >= slot { 626 series.closeHead(reasonSpreadFlush) 627 } 628 } 629 630 if err := series.add(model.SamplePair{ 631 Value: value, 632 Timestamp: timestamp, 633 }); err != nil { 634 if ve, ok := err.(*validationError); ok { 635 state.discardedSamples.WithLabelValues(ve.errorType).Inc() 636 if ve.noReport { 637 return nil 638 } 639 } 640 return err 641 } 642 643 if record != nil { 644 record.Samples = append(record.Samples, tsdb_record.RefSample{ 645 Ref: uint64(fp), 646 T: int64(timestamp), 647 V: float64(value), 648 }) 649 } 650 651 i.metrics.memoryChunks.Add(float64(len(series.chunkDescs) - prevNumChunks)) 652 i.metrics.ingestedSamples.Inc() 653 switch source { 654 case cortexpb.RULE: 655 state.ingestedRuleSamples.Inc() 656 case cortexpb.API: 657 fallthrough 658 default: 659 state.ingestedAPISamples.Inc() 660 } 661 662 return err 663 } 664 665 // pushMetadata returns number of ingested metadata. 666 func (i *Ingester) pushMetadata(ctx context.Context, userID string, metadata []*cortexpb.MetricMetadata) int { 667 ingestedMetadata := 0 668 failedMetadata := 0 669 670 var firstMetadataErr error 671 for _, metadata := range metadata { 672 err := i.appendMetadata(userID, metadata) 673 if err == nil { 674 ingestedMetadata++ 675 continue 676 } 677 678 failedMetadata++ 679 if firstMetadataErr == nil { 680 firstMetadataErr = err 681 } 682 } 683 684 i.metrics.ingestedMetadata.Add(float64(ingestedMetadata)) 685 i.metrics.ingestedMetadataFail.Add(float64(failedMetadata)) 686 687 // If we have any error with regard to metadata we just log and no-op. 688 // We consider metadata a best effort approach, errors here should not stop processing. 689 if firstMetadataErr != nil { 690 logger := logutil.WithContext(ctx, i.logger) 691 level.Warn(logger).Log("msg", "failed to ingest some metadata", "err", firstMetadataErr) 692 } 693 694 return ingestedMetadata 695 } 696 697 func (i *Ingester) appendMetadata(userID string, m *cortexpb.MetricMetadata) error { 698 i.userStatesMtx.RLock() 699 if i.stopped { 700 i.userStatesMtx.RUnlock() 701 return errIngesterStopping 702 } 703 i.userStatesMtx.RUnlock() 704 705 userMetadata := i.getOrCreateUserMetadata(userID) 706 707 return userMetadata.add(m.GetMetricFamilyName(), m) 708 } 709 710 func (i *Ingester) getOrCreateUserMetadata(userID string) *userMetricsMetadata { 711 userMetadata := i.getUserMetadata(userID) 712 if userMetadata != nil { 713 return userMetadata 714 } 715 716 i.usersMetadataMtx.Lock() 717 defer i.usersMetadataMtx.Unlock() 718 719 // Ensure it was not created between switching locks. 720 userMetadata, ok := i.usersMetadata[userID] 721 if !ok { 722 userMetadata = newMetadataMap(i.limiter, i.metrics, userID) 723 i.usersMetadata[userID] = userMetadata 724 } 725 return userMetadata 726 } 727 728 func (i *Ingester) getUserMetadata(userID string) *userMetricsMetadata { 729 i.usersMetadataMtx.RLock() 730 defer i.usersMetadataMtx.RUnlock() 731 return i.usersMetadata[userID] 732 } 733 734 func (i *Ingester) deleteUserMetadata(userID string) { 735 i.usersMetadataMtx.Lock() 736 um := i.usersMetadata[userID] 737 delete(i.usersMetadata, userID) 738 i.usersMetadataMtx.Unlock() 739 740 if um != nil { 741 // We need call purge to update i.metrics.memMetadata correctly (it counts number of metrics with metadata in memory). 742 // Passing zero time means purge everything. 743 um.purge(time.Time{}) 744 } 745 } 746 747 func (i *Ingester) getUsersWithMetadata() []string { 748 i.usersMetadataMtx.RLock() 749 defer i.usersMetadataMtx.RUnlock() 750 751 userIDs := make([]string, 0, len(i.usersMetadata)) 752 for userID := range i.usersMetadata { 753 userIDs = append(userIDs, userID) 754 } 755 756 return userIDs 757 } 758 759 func (i *Ingester) purgeUserMetricsMetadata() { 760 deadline := time.Now().Add(-i.cfg.MetadataRetainPeriod) 761 762 for _, userID := range i.getUsersWithMetadata() { 763 metadata := i.getUserMetadata(userID) 764 if metadata == nil { 765 continue 766 } 767 768 // Remove all metadata that we no longer need to retain. 769 metadata.purge(deadline) 770 } 771 } 772 773 // Query implements service.IngesterServer 774 func (i *Ingester) Query(ctx context.Context, req *client.QueryRequest) (*client.QueryResponse, error) { 775 if i.cfg.BlocksStorageEnabled { 776 return i.v2Query(ctx, req) 777 } 778 779 if err := i.checkRunningOrStopping(); err != nil { 780 return nil, err 781 } 782 783 userID, err := tenant.TenantID(ctx) 784 if err != nil { 785 return nil, err 786 } 787 788 from, through, matchers, err := client.FromQueryRequest(req) 789 if err != nil { 790 return nil, err 791 } 792 793 i.metrics.queries.Inc() 794 795 i.userStatesMtx.RLock() 796 state, ok, err := i.userStates.getViaContext(ctx) 797 i.userStatesMtx.RUnlock() 798 if err != nil { 799 return nil, err 800 } else if !ok { 801 return &client.QueryResponse{}, nil 802 } 803 804 result := &client.QueryResponse{} 805 numSeries, numSamples := 0, 0 806 maxSamplesPerQuery := i.limits.MaxSamplesPerQuery(userID) 807 err = state.forSeriesMatching(ctx, matchers, func(ctx context.Context, _ model.Fingerprint, series *memorySeries) error { 808 values, err := series.samplesForRange(from, through) 809 if err != nil { 810 return err 811 } 812 if len(values) == 0 { 813 return nil 814 } 815 numSeries++ 816 817 numSamples += len(values) 818 if numSamples > maxSamplesPerQuery { 819 return httpgrpc.Errorf(http.StatusRequestEntityTooLarge, "exceeded maximum number of samples in a query (%d)", maxSamplesPerQuery) 820 } 821 822 ts := cortexpb.TimeSeries{ 823 Labels: cortexpb.FromLabelsToLabelAdapters(series.metric), 824 Samples: make([]cortexpb.Sample, 0, len(values)), 825 } 826 for _, s := range values { 827 ts.Samples = append(ts.Samples, cortexpb.Sample{ 828 Value: float64(s.Value), 829 TimestampMs: int64(s.Timestamp), 830 }) 831 } 832 result.Timeseries = append(result.Timeseries, ts) 833 return nil 834 }, nil, 0) 835 i.metrics.queriedSeries.Observe(float64(numSeries)) 836 i.metrics.queriedSamples.Observe(float64(numSamples)) 837 return result, err 838 } 839 840 // QueryStream implements service.IngesterServer 841 func (i *Ingester) QueryStream(req *client.QueryRequest, stream client.Ingester_QueryStreamServer) error { 842 if i.cfg.BlocksStorageEnabled { 843 return i.v2QueryStream(req, stream) 844 } 845 846 if err := i.checkRunningOrStopping(); err != nil { 847 return err 848 } 849 850 spanLog, ctx := spanlogger.New(stream.Context(), "QueryStream") 851 defer spanLog.Finish() 852 853 from, through, matchers, err := client.FromQueryRequest(req) 854 if err != nil { 855 return err 856 } 857 858 i.metrics.queries.Inc() 859 860 i.userStatesMtx.RLock() 861 state, ok, err := i.userStates.getViaContext(ctx) 862 i.userStatesMtx.RUnlock() 863 if err != nil { 864 return err 865 } else if !ok { 866 return nil 867 } 868 869 numSeries, numChunks := 0, 0 870 reuseWireChunks := [queryStreamBatchSize][]client.Chunk{} 871 batch := make([]client.TimeSeriesChunk, 0, queryStreamBatchSize) 872 // We'd really like to have series in label order, not FP order, so we 873 // can iteratively merge them with entries coming from the chunk store. But 874 // that would involve locking all the series & sorting, so until we have 875 // a better solution in the ingesters I'd rather take the hit in the queriers. 876 err = state.forSeriesMatching(stream.Context(), matchers, func(ctx context.Context, _ model.Fingerprint, series *memorySeries) error { 877 chunks := make([]*desc, 0, len(series.chunkDescs)) 878 for _, chunk := range series.chunkDescs { 879 if !(chunk.FirstTime.After(through) || chunk.LastTime.Before(from)) { 880 chunks = append(chunks, chunk.slice(from, through)) 881 } 882 } 883 884 if len(chunks) == 0 { 885 return nil 886 } 887 888 numSeries++ 889 reusePos := len(batch) 890 wireChunks, err := toWireChunks(chunks, reuseWireChunks[reusePos]) 891 if err != nil { 892 return err 893 } 894 reuseWireChunks[reusePos] = wireChunks 895 896 numChunks += len(wireChunks) 897 batch = append(batch, client.TimeSeriesChunk{ 898 Labels: cortexpb.FromLabelsToLabelAdapters(series.metric), 899 Chunks: wireChunks, 900 }) 901 902 return nil 903 }, func(ctx context.Context) error { 904 if len(batch) == 0 { 905 return nil 906 } 907 err = client.SendQueryStream(stream, &client.QueryStreamResponse{ 908 Chunkseries: batch, 909 }) 910 batch = batch[:0] 911 return err 912 }, queryStreamBatchSize) 913 if err != nil { 914 return err 915 } 916 917 i.metrics.queriedSeries.Observe(float64(numSeries)) 918 i.metrics.queriedChunks.Observe(float64(numChunks)) 919 level.Debug(spanLog).Log("streams", numSeries) 920 level.Debug(spanLog).Log("chunks", numChunks) 921 return err 922 } 923 924 // Query implements service.IngesterServer 925 func (i *Ingester) QueryExemplars(ctx context.Context, req *client.ExemplarQueryRequest) (*client.ExemplarQueryResponse, error) { 926 if !i.cfg.BlocksStorageEnabled { 927 return nil, errors.New("not supported") 928 } 929 930 return i.v2QueryExemplars(ctx, req) 931 } 932 933 // LabelValues returns all label values that are associated with a given label name. 934 func (i *Ingester) LabelValues(ctx context.Context, req *client.LabelValuesRequest) (*client.LabelValuesResponse, error) { 935 if i.cfg.BlocksStorageEnabled { 936 return i.v2LabelValues(ctx, req) 937 } 938 939 if err := i.checkRunningOrStopping(); err != nil { 940 return nil, err 941 } 942 943 i.userStatesMtx.RLock() 944 defer i.userStatesMtx.RUnlock() 945 state, ok, err := i.userStates.getViaContext(ctx) 946 if err != nil { 947 return nil, err 948 } else if !ok { 949 return &client.LabelValuesResponse{}, nil 950 } 951 952 resp := &client.LabelValuesResponse{} 953 resp.LabelValues = append(resp.LabelValues, state.index.LabelValues(req.LabelName)...) 954 955 return resp, nil 956 } 957 958 // LabelNames return all the label names. 959 func (i *Ingester) LabelNames(ctx context.Context, req *client.LabelNamesRequest) (*client.LabelNamesResponse, error) { 960 if i.cfg.BlocksStorageEnabled { 961 return i.v2LabelNames(ctx, req) 962 } 963 964 if err := i.checkRunningOrStopping(); err != nil { 965 return nil, err 966 } 967 968 i.userStatesMtx.RLock() 969 defer i.userStatesMtx.RUnlock() 970 state, ok, err := i.userStates.getViaContext(ctx) 971 if err != nil { 972 return nil, err 973 } else if !ok { 974 return &client.LabelNamesResponse{}, nil 975 } 976 977 resp := &client.LabelNamesResponse{} 978 resp.LabelNames = append(resp.LabelNames, state.index.LabelNames()...) 979 980 return resp, nil 981 } 982 983 // MetricsForLabelMatchers returns all the metrics which match a set of matchers. 984 func (i *Ingester) MetricsForLabelMatchers(ctx context.Context, req *client.MetricsForLabelMatchersRequest) (*client.MetricsForLabelMatchersResponse, error) { 985 if i.cfg.BlocksStorageEnabled { 986 return i.v2MetricsForLabelMatchers(ctx, req) 987 } 988 989 if err := i.checkRunningOrStopping(); err != nil { 990 return nil, err 991 } 992 993 i.userStatesMtx.RLock() 994 defer i.userStatesMtx.RUnlock() 995 state, ok, err := i.userStates.getViaContext(ctx) 996 if err != nil { 997 return nil, err 998 } else if !ok { 999 return &client.MetricsForLabelMatchersResponse{}, nil 1000 } 1001 1002 // TODO Right now we ignore start and end. 1003 _, _, matchersSet, err := client.FromMetricsForLabelMatchersRequest(req) 1004 if err != nil { 1005 return nil, err 1006 } 1007 1008 lss := map[model.Fingerprint]labels.Labels{} 1009 for _, matchers := range matchersSet { 1010 if err := state.forSeriesMatching(ctx, matchers, func(ctx context.Context, fp model.Fingerprint, series *memorySeries) error { 1011 if _, ok := lss[fp]; !ok { 1012 lss[fp] = series.metric 1013 } 1014 return nil 1015 }, nil, 0); err != nil { 1016 return nil, err 1017 } 1018 } 1019 1020 result := &client.MetricsForLabelMatchersResponse{ 1021 Metric: make([]*cortexpb.Metric, 0, len(lss)), 1022 } 1023 for _, ls := range lss { 1024 result.Metric = append(result.Metric, &cortexpb.Metric{Labels: cortexpb.FromLabelsToLabelAdapters(ls)}) 1025 } 1026 1027 return result, nil 1028 } 1029 1030 // MetricsMetadata returns all the metric metadata of a user. 1031 func (i *Ingester) MetricsMetadata(ctx context.Context, req *client.MetricsMetadataRequest) (*client.MetricsMetadataResponse, error) { 1032 i.userStatesMtx.RLock() 1033 if err := i.checkRunningOrStopping(); err != nil { 1034 i.userStatesMtx.RUnlock() 1035 return nil, err 1036 } 1037 i.userStatesMtx.RUnlock() 1038 1039 userID, err := tenant.TenantID(ctx) 1040 if err != nil { 1041 return nil, err 1042 } 1043 1044 userMetadata := i.getUserMetadata(userID) 1045 1046 if userMetadata == nil { 1047 return &client.MetricsMetadataResponse{}, nil 1048 } 1049 1050 return &client.MetricsMetadataResponse{Metadata: userMetadata.toClientMetadata()}, nil 1051 } 1052 1053 // UserStats returns ingestion statistics for the current user. 1054 func (i *Ingester) UserStats(ctx context.Context, req *client.UserStatsRequest) (*client.UserStatsResponse, error) { 1055 if i.cfg.BlocksStorageEnabled { 1056 return i.v2UserStats(ctx, req) 1057 } 1058 1059 if err := i.checkRunningOrStopping(); err != nil { 1060 return nil, err 1061 } 1062 1063 i.userStatesMtx.RLock() 1064 defer i.userStatesMtx.RUnlock() 1065 state, ok, err := i.userStates.getViaContext(ctx) 1066 if err != nil { 1067 return nil, err 1068 } else if !ok { 1069 return &client.UserStatsResponse{}, nil 1070 } 1071 1072 apiRate := state.ingestedAPISamples.Rate() 1073 ruleRate := state.ingestedRuleSamples.Rate() 1074 return &client.UserStatsResponse{ 1075 IngestionRate: apiRate + ruleRate, 1076 ApiIngestionRate: apiRate, 1077 RuleIngestionRate: ruleRate, 1078 NumSeries: uint64(state.fpToSeries.length()), 1079 }, nil 1080 } 1081 1082 // AllUserStats returns ingestion statistics for all users known to this ingester. 1083 func (i *Ingester) AllUserStats(ctx context.Context, req *client.UserStatsRequest) (*client.UsersStatsResponse, error) { 1084 if i.cfg.BlocksStorageEnabled { 1085 return i.v2AllUserStats(ctx, req) 1086 } 1087 1088 if err := i.checkRunningOrStopping(); err != nil { 1089 return nil, err 1090 } 1091 1092 i.userStatesMtx.RLock() 1093 defer i.userStatesMtx.RUnlock() 1094 users := i.userStates.cp() 1095 1096 response := &client.UsersStatsResponse{ 1097 Stats: make([]*client.UserIDStatsResponse, 0, len(users)), 1098 } 1099 for userID, state := range users { 1100 apiRate := state.ingestedAPISamples.Rate() 1101 ruleRate := state.ingestedRuleSamples.Rate() 1102 response.Stats = append(response.Stats, &client.UserIDStatsResponse{ 1103 UserId: userID, 1104 Data: &client.UserStatsResponse{ 1105 IngestionRate: apiRate + ruleRate, 1106 ApiIngestionRate: apiRate, 1107 RuleIngestionRate: ruleRate, 1108 NumSeries: uint64(state.fpToSeries.length()), 1109 }, 1110 }) 1111 } 1112 return response, nil 1113 } 1114 1115 // CheckReady is the readiness handler used to indicate to k8s when the ingesters 1116 // are ready for the addition or removal of another ingester. 1117 func (i *Ingester) CheckReady(ctx context.Context) error { 1118 if err := i.checkRunningOrStopping(); err != nil { 1119 return fmt.Errorf("ingester not ready: %v", err) 1120 } 1121 return i.lifecycler.CheckReady(ctx) 1122 } 1123 1124 // labels will be copied if needed. 1125 func (i *Ingester) updateActiveSeries(userID string, now time.Time, labels []cortexpb.LabelAdapter) { 1126 i.userStatesMtx.RLock() 1127 defer i.userStatesMtx.RUnlock() 1128 1129 i.userStates.updateActiveSeriesForUser(userID, now, cortexpb.FromLabelAdaptersToLabels(labels)) 1130 }