github.com/yankunsam/loki/v2@v2.6.3-0.20220817130409-389df5235c27/pkg/storage/stores/indexshipper/compactor/compactor.go (about) 1 package compactor 2 3 import ( 4 "context" 5 "flag" 6 "fmt" 7 "net/http" 8 "path/filepath" 9 "strconv" 10 "strings" 11 "sync" 12 "time" 13 14 "github.com/grafana/loki/pkg/validation" 15 16 "github.com/go-kit/log/level" 17 "github.com/grafana/dskit/kv" 18 "github.com/grafana/dskit/ring" 19 "github.com/grafana/dskit/services" 20 "github.com/pkg/errors" 21 "github.com/prometheus/client_golang/prometheus" 22 "github.com/prometheus/common/model" 23 24 "github.com/grafana/loki/pkg/storage/chunk/client" 25 "github.com/grafana/loki/pkg/storage/chunk/client/local" 26 chunk_util "github.com/grafana/loki/pkg/storage/chunk/client/util" 27 "github.com/grafana/loki/pkg/storage/config" 28 "github.com/grafana/loki/pkg/storage/stores/indexshipper/compactor/deletion" 29 "github.com/grafana/loki/pkg/storage/stores/indexshipper/compactor/retention" 30 shipper_storage "github.com/grafana/loki/pkg/storage/stores/indexshipper/storage" 31 "github.com/grafana/loki/pkg/usagestats" 32 "github.com/grafana/loki/pkg/util" 33 util_log "github.com/grafana/loki/pkg/util/log" 34 ) 35 36 // Here is how the generic compactor works: 37 // 1. Find the index type from table name using schemaPeriodForTable. 38 // 2. Find the registered IndexCompactor for the index type. 39 // 3. Build an instance of TableCompactor using IndexCompactor.NewIndexCompactor, with all the required information to do a compaction. 40 // 4. Run the compaction using TableCompactor.Compact, which would set the new/updated CompactedIndex for each IndexSet. 41 // 5. If retention is enabled, run retention on the CompactedIndex using its retention.IndexProcessor implementation. 42 // 6. Convert the CompactedIndex to a file using the IndexCompactor.ToIndexFile for uploading. 43 // 7. If we uploaded successfully, delete the old index files. 44 45 const ( 46 // ringAutoForgetUnhealthyPeriods is how many consecutive timeout periods an unhealthy instance 47 // in the ring will be automatically removed. 48 ringAutoForgetUnhealthyPeriods = 10 49 50 // ringKey is the key under which we store the store gateways ring in the KVStore. 51 ringKey = "compactor" 52 53 // ringNameForServer is the name of the ring used by the compactor server. 54 ringNameForServer = "compactor" 55 56 // ringKeyOfLeader is a somewhat arbitrary ID to pull from the ring to see who will be elected the leader 57 ringKeyOfLeader = 0 58 59 // ringReplicationFactor should be 1 because we only want to pull back one node from the Ring 60 ringReplicationFactor = 1 61 62 // ringNumTokens sets our single token in the ring, 63 // we only need to insert 1 token to be used for leader election purposes. 64 ringNumTokens = 1 65 ) 66 67 var ( 68 retentionEnabledStats = usagestats.NewString("compactor_retention_enabled") 69 defaultRetentionStats = usagestats.NewString("compactor_default_retention") 70 ) 71 72 type Config struct { 73 WorkingDirectory string `yaml:"working_directory"` 74 SharedStoreType string `yaml:"shared_store"` 75 SharedStoreKeyPrefix string `yaml:"shared_store_key_prefix"` 76 CompactionInterval time.Duration `yaml:"compaction_interval"` 77 ApplyRetentionInterval time.Duration `yaml:"apply_retention_interval"` 78 RetentionEnabled bool `yaml:"retention_enabled"` 79 RetentionDeleteDelay time.Duration `yaml:"retention_delete_delay"` 80 RetentionDeleteWorkCount int `yaml:"retention_delete_worker_count"` 81 RetentionTableTimeout time.Duration `yaml:"retention_table_timeout"` 82 DeleteBatchSize int `yaml:"delete_batch_size"` 83 DeleteRequestCancelPeriod time.Duration `yaml:"delete_request_cancel_period"` 84 MaxCompactionParallelism int `yaml:"max_compaction_parallelism"` 85 CompactorRing util.RingConfig `yaml:"compactor_ring,omitempty"` 86 RunOnce bool `yaml:"-"` 87 } 88 89 // RegisterFlags registers flags. 90 func (cfg *Config) RegisterFlags(f *flag.FlagSet) { 91 f.StringVar(&cfg.WorkingDirectory, "boltdb.shipper.compactor.working-directory", "", "Directory where files can be downloaded for compaction.") 92 f.StringVar(&cfg.SharedStoreType, "boltdb.shipper.compactor.shared-store", "", "Shared store used for storing boltdb files. Supported types: gcs, s3, azure, swift, filesystem") 93 f.StringVar(&cfg.SharedStoreKeyPrefix, "boltdb.shipper.compactor.shared-store.key-prefix", "index/", "Prefix to add to Object Keys in Shared store. Path separator(if any) should always be a '/'. Prefix should never start with a separator but should always end with it.") 94 f.DurationVar(&cfg.CompactionInterval, "boltdb.shipper.compactor.compaction-interval", 10*time.Minute, "Interval at which to re-run the compaction operation.") 95 f.DurationVar(&cfg.ApplyRetentionInterval, "boltdb.shipper.compactor.apply-retention-interval", 0, "Interval at which to apply/enforce retention. 0 means run at same interval as compaction. If non-zero, it should always be a multiple of compaction interval.") 96 f.DurationVar(&cfg.RetentionDeleteDelay, "boltdb.shipper.compactor.retention-delete-delay", 2*time.Hour, "Delay after which chunks will be fully deleted during retention.") 97 f.BoolVar(&cfg.RetentionEnabled, "boltdb.shipper.compactor.retention-enabled", false, "(Experimental) Activate custom (per-stream,per-tenant) retention.") 98 f.IntVar(&cfg.RetentionDeleteWorkCount, "boltdb.shipper.compactor.retention-delete-worker-count", 150, "The total amount of worker to use to delete chunks.") 99 f.IntVar(&cfg.DeleteBatchSize, "boltdb.shipper.compactor.delete-batch-size", 70, "The max number of delete requests to run per compaction cycle.") 100 f.DurationVar(&cfg.DeleteRequestCancelPeriod, "boltdb.shipper.compactor.delete-request-cancel-period", 24*time.Hour, "Allow cancellation of delete request until duration after they are created. Data would be deleted only after delete requests have been older than this duration. Ideally this should be set to at least 24h.") 101 f.DurationVar(&cfg.RetentionTableTimeout, "boltdb.shipper.compactor.retention-table-timeout", 0, "The maximum amount of time to spend running retention and deletion on any given table in the index.") 102 f.IntVar(&cfg.MaxCompactionParallelism, "boltdb.shipper.compactor.max-compaction-parallelism", 1, "Maximum number of tables to compact in parallel. While increasing this value, please make sure compactor has enough disk space allocated to be able to store and compact as many tables.") 103 f.BoolVar(&cfg.RunOnce, "boltdb.shipper.compactor.run-once", false, "Run the compactor one time to cleanup and compact index files only (no retention applied)") 104 cfg.CompactorRing.RegisterFlagsWithPrefix("boltdb.shipper.compactor.", "collectors/", f) 105 } 106 107 // Validate verifies the config does not contain inappropriate values 108 func (cfg *Config) Validate() error { 109 if cfg.MaxCompactionParallelism < 1 { 110 return errors.New("max compaction parallelism must be >= 1") 111 } 112 if cfg.RetentionEnabled && cfg.ApplyRetentionInterval != 0 && cfg.ApplyRetentionInterval%cfg.CompactionInterval != 0 { 113 return errors.New("interval for applying retention should either be set to a 0 or a multiple of compaction interval") 114 } 115 116 return shipper_storage.ValidateSharedStoreKeyPrefix(cfg.SharedStoreKeyPrefix) 117 } 118 119 type Compactor struct { 120 services.Service 121 122 cfg Config 123 indexStorageClient shipper_storage.Client 124 tableMarker retention.TableMarker 125 sweeper *retention.Sweeper 126 deleteRequestsStore deletion.DeleteRequestsStore 127 DeleteRequestsHandler *deletion.DeleteRequestHandler 128 deleteRequestsManager *deletion.DeleteRequestsManager 129 expirationChecker retention.ExpirationChecker 130 metrics *metrics 131 running bool 132 wg sync.WaitGroup 133 indexCompactors map[string]IndexCompactor 134 schemaConfig config.SchemaConfig 135 136 // Ring used for running a single compactor 137 ringLifecycler *ring.BasicLifecycler 138 ring *ring.Ring 139 ringPollPeriod time.Duration 140 141 // Subservices manager. 142 subservices *services.Manager 143 subservicesWatcher *services.FailureWatcher 144 } 145 146 func NewCompactor(cfg Config, objectClient client.ObjectClient, schemaConfig config.SchemaConfig, limits *validation.Overrides, r prometheus.Registerer) (*Compactor, error) { 147 retentionEnabledStats.Set("false") 148 if cfg.RetentionEnabled { 149 retentionEnabledStats.Set("true") 150 } 151 if limits != nil { 152 defaultRetentionStats.Set(limits.DefaultLimits().RetentionPeriod.String()) 153 } 154 if cfg.SharedStoreType == "" { 155 return nil, errors.New("compactor shared_store_type must be specified") 156 } 157 158 compactor := &Compactor{ 159 cfg: cfg, 160 ringPollPeriod: 5 * time.Second, 161 indexCompactors: map[string]IndexCompactor{}, 162 schemaConfig: schemaConfig, 163 } 164 165 ringStore, err := kv.NewClient( 166 cfg.CompactorRing.KVStore, 167 ring.GetCodec(), 168 kv.RegistererWithKVName(prometheus.WrapRegistererWithPrefix("loki_", r), "compactor"), 169 util_log.Logger, 170 ) 171 if err != nil { 172 return nil, errors.Wrap(err, "create KV store client") 173 } 174 lifecyclerCfg, err := cfg.CompactorRing.ToLifecyclerConfig(ringNumTokens, util_log.Logger) 175 if err != nil { 176 return nil, errors.Wrap(err, "invalid ring lifecycler config") 177 } 178 179 // Define lifecycler delegates in reverse order (last to be called defined first because they're 180 // chained via "next delegate"). 181 delegate := ring.BasicLifecyclerDelegate(compactor) 182 delegate = ring.NewLeaveOnStoppingDelegate(delegate, util_log.Logger) 183 delegate = ring.NewTokensPersistencyDelegate(cfg.CompactorRing.TokensFilePath, ring.JOINING, delegate, util_log.Logger) 184 delegate = ring.NewAutoForgetDelegate(ringAutoForgetUnhealthyPeriods*cfg.CompactorRing.HeartbeatTimeout, delegate, util_log.Logger) 185 186 compactor.ringLifecycler, err = ring.NewBasicLifecycler(lifecyclerCfg, ringNameForServer, ringKey, ringStore, delegate, util_log.Logger, r) 187 if err != nil { 188 return nil, errors.Wrap(err, "create ring lifecycler") 189 } 190 191 ringCfg := cfg.CompactorRing.ToRingConfig(ringReplicationFactor) 192 compactor.ring, err = ring.NewWithStoreClientAndStrategy(ringCfg, ringNameForServer, ringKey, ringStore, ring.NewIgnoreUnhealthyInstancesReplicationStrategy(), prometheus.WrapRegistererWithPrefix("cortex_", r), util_log.Logger) 193 if err != nil { 194 return nil, errors.Wrap(err, "create ring client") 195 } 196 197 compactor.subservices, err = services.NewManager(compactor.ringLifecycler, compactor.ring) 198 if err != nil { 199 return nil, err 200 } 201 compactor.subservicesWatcher = services.NewFailureWatcher() 202 compactor.subservicesWatcher.WatchManager(compactor.subservices) 203 204 if err := compactor.init(objectClient, schemaConfig, limits, r); err != nil { 205 return nil, err 206 } 207 208 compactor.Service = services.NewBasicService(compactor.starting, compactor.loop, compactor.stopping) 209 return compactor, nil 210 } 211 212 func (c *Compactor) init(objectClient client.ObjectClient, schemaConfig config.SchemaConfig, limits *validation.Overrides, r prometheus.Registerer) error { 213 err := chunk_util.EnsureDirectory(c.cfg.WorkingDirectory) 214 if err != nil { 215 return err 216 } 217 c.indexStorageClient = shipper_storage.NewIndexStorageClient(objectClient, c.cfg.SharedStoreKeyPrefix) 218 c.metrics = newMetrics(r) 219 220 if c.cfg.RetentionEnabled { 221 var encoder client.KeyEncoder 222 if _, ok := objectClient.(*local.FSObjectClient); ok { 223 encoder = client.FSEncoder 224 } 225 226 chunkClient := client.NewClient(objectClient, encoder, schemaConfig) 227 228 retentionWorkDir := filepath.Join(c.cfg.WorkingDirectory, "retention") 229 c.sweeper, err = retention.NewSweeper(retentionWorkDir, chunkClient, c.cfg.RetentionDeleteWorkCount, c.cfg.RetentionDeleteDelay, r) 230 if err != nil { 231 return err 232 } 233 234 if err := c.initDeletes(r, limits); err != nil { 235 return err 236 } 237 238 c.tableMarker, err = retention.NewMarker(retentionWorkDir, c.expirationChecker, c.cfg.RetentionTableTimeout, chunkClient, r) 239 if err != nil { 240 return err 241 } 242 } 243 244 return nil 245 } 246 247 func (c *Compactor) initDeletes(r prometheus.Registerer, limits *validation.Overrides) error { 248 deletionWorkDir := filepath.Join(c.cfg.WorkingDirectory, "deletion") 249 250 store, err := deletion.NewDeleteStore(deletionWorkDir, c.indexStorageClient) 251 if err != nil { 252 return err 253 } 254 c.deleteRequestsStore = store 255 256 c.DeleteRequestsHandler = deletion.NewDeleteRequestHandler( 257 c.deleteRequestsStore, 258 c.cfg.DeleteRequestCancelPeriod, 259 r, 260 ) 261 262 c.deleteRequestsManager = deletion.NewDeleteRequestsManager( 263 c.deleteRequestsStore, 264 c.cfg.DeleteRequestCancelPeriod, 265 c.cfg.DeleteBatchSize, 266 limits, 267 r, 268 ) 269 270 c.expirationChecker = newExpirationChecker(retention.NewExpirationChecker(limits), c.deleteRequestsManager) 271 return nil 272 } 273 274 func (c *Compactor) starting(ctx context.Context) (err error) { 275 // In case this function will return error we want to unregister the instance 276 // from the ring. We do it ensuring dependencies are gracefully stopped if they 277 // were already started. 278 defer func() { 279 if err == nil || c.subservices == nil { 280 return 281 } 282 283 if stopErr := services.StopManagerAndAwaitStopped(context.Background(), c.subservices); stopErr != nil { 284 level.Error(util_log.Logger).Log("msg", "failed to gracefully stop compactor dependencies", "err", stopErr) 285 } 286 }() 287 288 if err := services.StartManagerAndAwaitHealthy(ctx, c.subservices); err != nil { 289 return errors.Wrap(err, "unable to start compactor subservices") 290 } 291 292 // The BasicLifecycler does not automatically move state to ACTIVE such that any additional work that 293 // someone wants to do can be done before becoming ACTIVE. For the query compactor we don't currently 294 // have any additional work so we can become ACTIVE right away. 295 296 // Wait until the ring client detected this instance in the JOINING state to 297 // make sure that when we'll run the initial sync we already know the tokens 298 // assigned to this instance. 299 level.Info(util_log.Logger).Log("msg", "waiting until compactor is JOINING in the ring") 300 if err := ring.WaitInstanceState(ctx, c.ring, c.ringLifecycler.GetInstanceID(), ring.JOINING); err != nil { 301 return err 302 } 303 level.Info(util_log.Logger).Log("msg", "compactor is JOINING in the ring") 304 305 // Change ring state to ACTIVE 306 if err = c.ringLifecycler.ChangeState(ctx, ring.ACTIVE); err != nil { 307 return errors.Wrapf(err, "switch instance to %s in the ring", ring.ACTIVE) 308 } 309 310 // Wait until the ring client detected this instance in the ACTIVE state to 311 // make sure that when we'll run the loop it won't be detected as a ring 312 // topology change. 313 level.Info(util_log.Logger).Log("msg", "waiting until compactor is ACTIVE in the ring") 314 if err := ring.WaitInstanceState(ctx, c.ring, c.ringLifecycler.GetInstanceID(), ring.ACTIVE); err != nil { 315 return err 316 } 317 level.Info(util_log.Logger).Log("msg", "compactor is ACTIVE in the ring") 318 319 return nil 320 } 321 322 func (c *Compactor) loop(ctx context.Context) error { 323 if c.cfg.RunOnce { 324 level.Info(util_log.Logger).Log("msg", "running single compaction") 325 err := c.RunCompaction(ctx, false) 326 if err != nil { 327 level.Error(util_log.Logger).Log("msg", "compaction encountered an error", "err", err) 328 } 329 level.Info(util_log.Logger).Log("msg", "single compaction finished") 330 level.Info(util_log.Logger).Log("msg", "interrupt or terminate the process to finish") 331 332 // Wait for Loki to shutdown. 333 <-ctx.Done() 334 level.Info(util_log.Logger).Log("msg", "compactor exiting") 335 return nil 336 } 337 338 if c.cfg.RetentionEnabled { 339 if c.deleteRequestsStore != nil { 340 defer c.deleteRequestsStore.Stop() 341 } 342 if c.deleteRequestsManager != nil { 343 defer c.deleteRequestsManager.Stop() 344 } 345 } 346 347 syncTicker := time.NewTicker(c.ringPollPeriod) 348 defer syncTicker.Stop() 349 350 var runningCtx context.Context 351 var runningCancel context.CancelFunc 352 353 for { 354 select { 355 case <-ctx.Done(): 356 if runningCancel != nil { 357 runningCancel() 358 } 359 c.wg.Wait() 360 level.Info(util_log.Logger).Log("msg", "compactor exiting") 361 return nil 362 case <-syncTicker.C: 363 bufDescs, bufHosts, bufZones := ring.MakeBuffersForGet() 364 rs, err := c.ring.Get(ringKeyOfLeader, ring.Write, bufDescs, bufHosts, bufZones) 365 if err != nil { 366 level.Error(util_log.Logger).Log("msg", "error asking ring for who should run the compactor, will check again", "err", err) 367 continue 368 } 369 370 addrs := rs.GetAddresses() 371 if len(addrs) != 1 { 372 level.Error(util_log.Logger).Log("msg", "too many addresses (more that one) return when asking the ring who should run the compactor, will check again") 373 continue 374 } 375 if c.ringLifecycler.GetInstanceAddr() == addrs[0] { 376 // If not running, start 377 if !c.running { 378 level.Info(util_log.Logger).Log("msg", "this instance has been chosen to run the compactor, starting compactor") 379 runningCtx, runningCancel = context.WithCancel(ctx) 380 go c.runCompactions(runningCtx) 381 c.running = true 382 c.metrics.compactorRunning.Set(1) 383 } 384 } else { 385 // If running, shutdown 386 if c.running { 387 level.Info(util_log.Logger).Log("msg", "this instance should no longer run the compactor, stopping compactor") 388 runningCancel() 389 c.wg.Wait() 390 c.running = false 391 c.metrics.compactorRunning.Set(0) 392 level.Info(util_log.Logger).Log("msg", "compactor stopped") 393 } 394 } 395 } 396 } 397 } 398 399 func (c *Compactor) runCompactions(ctx context.Context) { 400 // To avoid races, wait 1 compaction interval before actually starting the compactor 401 // this allows the ring to settle if there are a lot of ring changes and gives 402 // time for existing compactors to shutdown before this starts to avoid 403 // multiple compactors running at the same time. 404 func() { 405 t := time.NewTimer(c.cfg.CompactionInterval) 406 defer t.Stop() 407 level.Info(util_log.Logger).Log("msg", fmt.Sprintf("waiting %v for ring to stay stable and previous compactions to finish before starting compactor", c.cfg.CompactionInterval)) 408 select { 409 case <-ctx.Done(): 410 return 411 case <-t.C: 412 level.Info(util_log.Logger).Log("msg", "compactor startup delay completed") 413 break 414 } 415 }() 416 417 lastRetentionRunAt := time.Unix(0, 0) 418 runCompaction := func() { 419 applyRetention := false 420 if c.cfg.RetentionEnabled && time.Since(lastRetentionRunAt) >= c.cfg.ApplyRetentionInterval { 421 level.Info(util_log.Logger).Log("msg", "applying retention with compaction") 422 applyRetention = true 423 } 424 425 err := c.RunCompaction(ctx, applyRetention) 426 if err != nil { 427 level.Error(util_log.Logger).Log("msg", "failed to run compaction", "err", err) 428 } 429 430 if applyRetention { 431 lastRetentionRunAt = time.Now() 432 } 433 } 434 435 c.wg.Add(1) 436 go func() { 437 defer c.wg.Done() 438 runCompaction() 439 440 ticker := time.NewTicker(c.cfg.CompactionInterval) 441 defer ticker.Stop() 442 443 for { 444 select { 445 case <-ticker.C: 446 runCompaction() 447 case <-ctx.Done(): 448 return 449 } 450 } 451 }() 452 if c.cfg.RetentionEnabled { 453 c.wg.Add(1) 454 go func() { 455 // starts the chunk sweeper 456 defer func() { 457 c.sweeper.Stop() 458 c.wg.Done() 459 }() 460 c.sweeper.Start() 461 <-ctx.Done() 462 }() 463 } 464 level.Info(util_log.Logger).Log("msg", "compactor started") 465 } 466 467 func (c *Compactor) stopping(_ error) error { 468 return services.StopManagerAndAwaitStopped(context.Background(), c.subservices) 469 } 470 471 func (c *Compactor) CompactTable(ctx context.Context, tableName string, applyRetention bool) error { 472 schemaCfg, ok := schemaPeriodForTable(c.schemaConfig, tableName) 473 if !ok { 474 level.Error(util_log.Logger).Log("msg", "skipping compaction since we can't find schema for table", "table", tableName) 475 return nil 476 } 477 478 indexCompactor, ok := c.indexCompactors[schemaCfg.IndexType] 479 if !ok { 480 return fmt.Errorf("index processor not found for index type %s", schemaCfg.IndexType) 481 } 482 483 table, err := newTable(ctx, filepath.Join(c.cfg.WorkingDirectory, tableName), c.indexStorageClient, indexCompactor, 484 schemaCfg, c.tableMarker, c.expirationChecker) 485 if err != nil { 486 level.Error(util_log.Logger).Log("msg", "failed to initialize table for compaction", "table", tableName, "err", err) 487 return err 488 } 489 490 interval := retention.ExtractIntervalFromTableName(tableName) 491 intervalMayHaveExpiredChunks := false 492 if applyRetention { 493 intervalMayHaveExpiredChunks = c.expirationChecker.IntervalMayHaveExpiredChunks(interval, "") 494 } 495 496 err = table.compact(intervalMayHaveExpiredChunks) 497 if err != nil { 498 level.Error(util_log.Logger).Log("msg", "failed to compact files", "table", tableName, "err", err) 499 return err 500 } 501 return nil 502 } 503 504 func (c *Compactor) RegisterIndexCompactor(indexType string, indexCompactor IndexCompactor) { 505 c.indexCompactors[indexType] = indexCompactor 506 } 507 508 func (c *Compactor) RunCompaction(ctx context.Context, applyRetention bool) error { 509 status := statusSuccess 510 start := time.Now() 511 512 if applyRetention { 513 c.expirationChecker.MarkPhaseStarted() 514 } 515 516 defer func() { 517 c.metrics.compactTablesOperationTotal.WithLabelValues(status).Inc() 518 runtime := time.Since(start) 519 if status == statusSuccess { 520 c.metrics.compactTablesOperationDurationSeconds.Set(runtime.Seconds()) 521 c.metrics.compactTablesOperationLastSuccess.SetToCurrentTime() 522 if applyRetention { 523 c.metrics.applyRetentionLastSuccess.SetToCurrentTime() 524 } 525 } 526 527 if applyRetention { 528 if status == statusSuccess { 529 c.expirationChecker.MarkPhaseFinished() 530 } else { 531 c.expirationChecker.MarkPhaseFailed() 532 } 533 } 534 if runtime > c.cfg.CompactionInterval { 535 level.Warn(util_log.Logger).Log("msg", fmt.Sprintf("last compaction took %s which is longer than the compaction interval of %s, this can lead to duplicate compactors running if not running a standalone compactor instance.", runtime, c.cfg.CompactionInterval)) 536 } 537 }() 538 539 // refresh index list cache since previous compaction would have changed the index files in the object store 540 c.indexStorageClient.RefreshIndexListCache(ctx) 541 542 tables, err := c.indexStorageClient.ListTables(ctx) 543 if err != nil { 544 status = statusFailure 545 return err 546 } 547 548 compactTablesChan := make(chan string) 549 errChan := make(chan error) 550 551 for i := 0; i < c.cfg.MaxCompactionParallelism; i++ { 552 go func() { 553 var err error 554 defer func() { 555 errChan <- err 556 }() 557 558 for { 559 select { 560 case tableName, ok := <-compactTablesChan: 561 if !ok { 562 return 563 } 564 565 level.Info(util_log.Logger).Log("msg", "compacting table", "table-name", tableName) 566 err = c.CompactTable(ctx, tableName, applyRetention) 567 if err != nil { 568 return 569 } 570 level.Info(util_log.Logger).Log("msg", "finished compacting table", "table-name", tableName) 571 case <-ctx.Done(): 572 return 573 } 574 } 575 }() 576 } 577 578 go func() { 579 for _, tableName := range tables { 580 if tableName == deletion.DeleteRequestsTableName { 581 // we do not want to compact or apply retention on delete requests table 582 continue 583 } 584 585 select { 586 case compactTablesChan <- tableName: 587 case <-ctx.Done(): 588 return 589 } 590 } 591 592 close(compactTablesChan) 593 }() 594 595 var firstErr error 596 // read all the errors 597 for i := 0; i < c.cfg.MaxCompactionParallelism; i++ { 598 err := <-errChan 599 if err != nil && firstErr == nil { 600 status = statusFailure 601 firstErr = err 602 } 603 } 604 605 return firstErr 606 } 607 608 type expirationChecker struct { 609 retentionExpiryChecker retention.ExpirationChecker 610 deletionExpiryChecker retention.ExpirationChecker 611 } 612 613 func newExpirationChecker(retentionExpiryChecker, deletionExpiryChecker retention.ExpirationChecker) retention.ExpirationChecker { 614 return &expirationChecker{retentionExpiryChecker, deletionExpiryChecker} 615 } 616 617 func (e *expirationChecker) Expired(ref retention.ChunkEntry, now model.Time) (bool, []retention.IntervalFilter) { 618 if expired, nonDeletedIntervals := e.retentionExpiryChecker.Expired(ref, now); expired { 619 return expired, nonDeletedIntervals 620 } 621 622 return e.deletionExpiryChecker.Expired(ref, now) 623 } 624 625 func (e *expirationChecker) MarkPhaseStarted() { 626 e.retentionExpiryChecker.MarkPhaseStarted() 627 e.deletionExpiryChecker.MarkPhaseStarted() 628 } 629 630 func (e *expirationChecker) MarkPhaseFailed() { 631 e.retentionExpiryChecker.MarkPhaseFailed() 632 e.deletionExpiryChecker.MarkPhaseFailed() 633 } 634 635 func (e *expirationChecker) MarkPhaseFinished() { 636 e.retentionExpiryChecker.MarkPhaseFinished() 637 e.deletionExpiryChecker.MarkPhaseFinished() 638 } 639 640 func (e *expirationChecker) MarkPhaseTimedOut() { 641 e.retentionExpiryChecker.MarkPhaseTimedOut() 642 e.deletionExpiryChecker.MarkPhaseTimedOut() 643 } 644 645 func (e *expirationChecker) IntervalMayHaveExpiredChunks(interval model.Interval, userID string) bool { 646 return e.retentionExpiryChecker.IntervalMayHaveExpiredChunks(interval, userID) || e.deletionExpiryChecker.IntervalMayHaveExpiredChunks(interval, userID) 647 } 648 649 func (e *expirationChecker) DropFromIndex(ref retention.ChunkEntry, tableEndTime model.Time, now model.Time) bool { 650 return e.retentionExpiryChecker.DropFromIndex(ref, tableEndTime, now) || e.deletionExpiryChecker.DropFromIndex(ref, tableEndTime, now) 651 } 652 653 func (c *Compactor) OnRingInstanceRegister(_ *ring.BasicLifecycler, ringDesc ring.Desc, instanceExists bool, instanceID string, instanceDesc ring.InstanceDesc) (ring.InstanceState, ring.Tokens) { 654 // When we initialize the compactor instance in the ring we want to start from 655 // a clean situation, so whatever is the state we set it JOINING, while we keep existing 656 // tokens (if any) or the ones loaded from file. 657 var tokens []uint32 658 if instanceExists { 659 tokens = instanceDesc.GetTokens() 660 } 661 662 takenTokens := ringDesc.GetTokens() 663 newTokens := ring.GenerateTokens(ringNumTokens-len(tokens), takenTokens) 664 665 // Tokens sorting will be enforced by the parent caller. 666 tokens = append(tokens, newTokens...) 667 668 return ring.JOINING, tokens 669 } 670 671 func (c *Compactor) OnRingInstanceTokens(_ *ring.BasicLifecycler, _ ring.Tokens) {} 672 func (c *Compactor) OnRingInstanceStopping(_ *ring.BasicLifecycler) {} 673 func (c *Compactor) OnRingInstanceHeartbeat(_ *ring.BasicLifecycler, _ *ring.Desc, _ *ring.InstanceDesc) { 674 } 675 676 func (c *Compactor) ServeHTTP(w http.ResponseWriter, req *http.Request) { 677 c.ring.ServeHTTP(w, req) 678 } 679 680 func schemaPeriodForTable(cfg config.SchemaConfig, tableName string) (config.PeriodConfig, bool) { 681 // first round removes configs that does not have the prefix. 682 candidates := []config.PeriodConfig{} 683 for _, schema := range cfg.Configs { 684 if strings.HasPrefix(tableName, schema.IndexTables.Prefix) { 685 candidates = append(candidates, schema) 686 } 687 } 688 // WARN we assume period is always daily. This is only true for boltdb-shipper. 689 var ( 690 matched config.PeriodConfig 691 found bool 692 ) 693 for _, schema := range candidates { 694 periodIndex, err := strconv.ParseInt(strings.TrimPrefix(tableName, schema.IndexTables.Prefix), 10, 64) 695 if err != nil { 696 continue 697 } 698 periodSec := int64(schema.IndexTables.Period / time.Second) 699 tableTs := model.TimeFromUnix(periodIndex * periodSec) 700 if tableTs.After(schema.From.Time) || tableTs == schema.From.Time { 701 matched = schema 702 found = true 703 } 704 } 705 706 return matched, found 707 }