github.com/muhammadn/cortex@v1.9.1-0.20220510110439-46bb7000d03d/pkg/compactor/compactor.go (about) 1 package compactor 2 3 import ( 4 "context" 5 "flag" 6 "fmt" 7 "hash/fnv" 8 "io/ioutil" 9 "math/rand" 10 "os" 11 "path" 12 "path/filepath" 13 "strings" 14 "time" 15 16 "github.com/go-kit/log" 17 "github.com/go-kit/log/level" 18 "github.com/grafana/dskit/backoff" 19 "github.com/grafana/dskit/flagext" 20 "github.com/grafana/dskit/ring" 21 "github.com/grafana/dskit/services" 22 "github.com/pkg/errors" 23 "github.com/prometheus/client_golang/prometheus" 24 "github.com/prometheus/client_golang/prometheus/promauto" 25 "github.com/prometheus/prometheus/tsdb" 26 "github.com/thanos-io/thanos/pkg/block" 27 "github.com/thanos-io/thanos/pkg/block/metadata" 28 "github.com/thanos-io/thanos/pkg/compact" 29 "github.com/thanos-io/thanos/pkg/compact/downsample" 30 "github.com/thanos-io/thanos/pkg/objstore" 31 32 "github.com/cortexproject/cortex/pkg/storage/bucket" 33 cortex_tsdb "github.com/cortexproject/cortex/pkg/storage/tsdb" 34 "github.com/cortexproject/cortex/pkg/storage/tsdb/bucketindex" 35 "github.com/cortexproject/cortex/pkg/util" 36 util_log "github.com/cortexproject/cortex/pkg/util/log" 37 ) 38 39 const ( 40 blocksMarkedForDeletionName = "cortex_compactor_blocks_marked_for_deletion_total" 41 blocksMarkedForDeletionHelp = "Total number of blocks marked for deletion in compactor." 42 ) 43 44 var ( 45 errInvalidBlockRanges = "compactor block range periods should be divisible by the previous one, but %s is not divisible by %s" 46 RingOp = ring.NewOp([]ring.InstanceState{ring.ACTIVE}, nil) 47 48 DefaultBlocksGrouperFactory = func(ctx context.Context, cfg Config, bkt objstore.Bucket, logger log.Logger, reg prometheus.Registerer, blocksMarkedForDeletion prometheus.Counter, garbageCollectedBlocks prometheus.Counter) compact.Grouper { 49 return compact.NewDefaultGrouper( 50 logger, 51 bkt, 52 false, // Do not accept malformed indexes 53 true, // Enable vertical compaction 54 reg, 55 blocksMarkedForDeletion, 56 garbageCollectedBlocks, 57 prometheus.NewCounter(prometheus.CounterOpts{}), 58 metadata.NoneFunc) 59 } 60 61 DefaultBlocksCompactorFactory = func(ctx context.Context, cfg Config, logger log.Logger, reg prometheus.Registerer) (compact.Compactor, compact.Planner, error) { 62 compactor, err := tsdb.NewLeveledCompactor(ctx, reg, logger, cfg.BlockRanges.ToMilliseconds(), downsample.NewPool(), nil) 63 if err != nil { 64 return nil, nil, err 65 } 66 67 planner := compact.NewTSDBBasedPlanner(logger, cfg.BlockRanges.ToMilliseconds()) 68 return compactor, planner, nil 69 } 70 ) 71 72 // BlocksGrouperFactory builds and returns the grouper to use to compact a tenant's blocks. 73 type BlocksGrouperFactory func( 74 ctx context.Context, 75 cfg Config, 76 bkt objstore.Bucket, 77 logger log.Logger, 78 reg prometheus.Registerer, 79 blocksMarkedForDeletion prometheus.Counter, 80 garbageCollectedBlocks prometheus.Counter, 81 ) compact.Grouper 82 83 // BlocksCompactorFactory builds and returns the compactor and planner to use to compact a tenant's blocks. 84 type BlocksCompactorFactory func( 85 ctx context.Context, 86 cfg Config, 87 logger log.Logger, 88 reg prometheus.Registerer, 89 ) (compact.Compactor, compact.Planner, error) 90 91 // Config holds the Compactor config. 92 type Config struct { 93 BlockRanges cortex_tsdb.DurationList `yaml:"block_ranges"` 94 BlockSyncConcurrency int `yaml:"block_sync_concurrency"` 95 MetaSyncConcurrency int `yaml:"meta_sync_concurrency"` 96 ConsistencyDelay time.Duration `yaml:"consistency_delay"` 97 DataDir string `yaml:"data_dir"` 98 CompactionInterval time.Duration `yaml:"compaction_interval"` 99 CompactionRetries int `yaml:"compaction_retries"` 100 CompactionConcurrency int `yaml:"compaction_concurrency"` 101 CleanupInterval time.Duration `yaml:"cleanup_interval"` 102 CleanupConcurrency int `yaml:"cleanup_concurrency"` 103 DeletionDelay time.Duration `yaml:"deletion_delay"` 104 TenantCleanupDelay time.Duration `yaml:"tenant_cleanup_delay"` 105 106 // Whether the migration of block deletion marks to the global markers location is enabled. 107 BlockDeletionMarksMigrationEnabled bool `yaml:"block_deletion_marks_migration_enabled"` 108 109 EnabledTenants flagext.StringSliceCSV `yaml:"enabled_tenants"` 110 DisabledTenants flagext.StringSliceCSV `yaml:"disabled_tenants"` 111 112 // Compactors sharding. 113 ShardingEnabled bool `yaml:"sharding_enabled"` 114 ShardingRing RingConfig `yaml:"sharding_ring"` 115 116 // No need to add options to customize the retry backoff, 117 // given the defaults should be fine, but allow to override 118 // it in tests. 119 retryMinBackoff time.Duration `yaml:"-"` 120 retryMaxBackoff time.Duration `yaml:"-"` 121 122 // Allow downstream projects to customise the blocks compactor. 123 BlocksGrouperFactory BlocksGrouperFactory `yaml:"-"` 124 BlocksCompactorFactory BlocksCompactorFactory `yaml:"-"` 125 } 126 127 // RegisterFlags registers the Compactor flags. 128 func (cfg *Config) RegisterFlags(f *flag.FlagSet) { 129 cfg.ShardingRing.RegisterFlags(f) 130 131 cfg.BlockRanges = cortex_tsdb.DurationList{2 * time.Hour, 12 * time.Hour, 24 * time.Hour} 132 cfg.retryMinBackoff = 10 * time.Second 133 cfg.retryMaxBackoff = time.Minute 134 135 f.Var(&cfg.BlockRanges, "compactor.block-ranges", "List of compaction time ranges.") 136 f.DurationVar(&cfg.ConsistencyDelay, "compactor.consistency-delay", 0, fmt.Sprintf("Minimum age of fresh (non-compacted) blocks before they are being processed. Malformed blocks older than the maximum of consistency-delay and %s will be removed.", compact.PartialUploadThresholdAge)) 137 f.IntVar(&cfg.BlockSyncConcurrency, "compactor.block-sync-concurrency", 20, "Number of Go routines to use when syncing block index and chunks files from the long term storage.") 138 f.IntVar(&cfg.MetaSyncConcurrency, "compactor.meta-sync-concurrency", 20, "Number of Go routines to use when syncing block meta files from the long term storage.") 139 f.StringVar(&cfg.DataDir, "compactor.data-dir", "./data", "Data directory in which to cache blocks and process compactions") 140 f.DurationVar(&cfg.CompactionInterval, "compactor.compaction-interval", time.Hour, "The frequency at which the compaction runs") 141 f.IntVar(&cfg.CompactionRetries, "compactor.compaction-retries", 3, "How many times to retry a failed compaction within a single compaction run.") 142 f.IntVar(&cfg.CompactionConcurrency, "compactor.compaction-concurrency", 1, "Max number of concurrent compactions running.") 143 f.DurationVar(&cfg.CleanupInterval, "compactor.cleanup-interval", 15*time.Minute, "How frequently compactor should run blocks cleanup and maintenance, as well as update the bucket index.") 144 f.IntVar(&cfg.CleanupConcurrency, "compactor.cleanup-concurrency", 20, "Max number of tenants for which blocks cleanup and maintenance should run concurrently.") 145 f.BoolVar(&cfg.ShardingEnabled, "compactor.sharding-enabled", false, "Shard tenants across multiple compactor instances. Sharding is required if you run multiple compactor instances, in order to coordinate compactions and avoid race conditions leading to the same tenant blocks simultaneously compacted by different instances.") 146 f.DurationVar(&cfg.DeletionDelay, "compactor.deletion-delay", 12*time.Hour, "Time before a block marked for deletion is deleted from bucket. "+ 147 "If not 0, blocks will be marked for deletion and compactor component will permanently delete blocks marked for deletion from the bucket. "+ 148 "If 0, blocks will be deleted straight away. Note that deleting blocks immediately can cause query failures.") 149 f.DurationVar(&cfg.TenantCleanupDelay, "compactor.tenant-cleanup-delay", 6*time.Hour, "For tenants marked for deletion, this is time between deleting of last block, and doing final cleanup (marker files, debug files) of the tenant.") 150 f.BoolVar(&cfg.BlockDeletionMarksMigrationEnabled, "compactor.block-deletion-marks-migration-enabled", true, "When enabled, at compactor startup the bucket will be scanned and all found deletion marks inside the block location will be copied to the markers global location too. This option can (and should) be safely disabled as soon as the compactor has successfully run at least once.") 151 152 f.Var(&cfg.EnabledTenants, "compactor.enabled-tenants", "Comma separated list of tenants that can be compacted. If specified, only these tenants will be compacted by compactor, otherwise all tenants can be compacted. Subject to sharding.") 153 f.Var(&cfg.DisabledTenants, "compactor.disabled-tenants", "Comma separated list of tenants that cannot be compacted by this compactor. If specified, and compactor would normally pick given tenant for compaction (via -compactor.enabled-tenants or sharding), it will be ignored instead.") 154 } 155 156 func (cfg *Config) Validate() error { 157 // Each block range period should be divisible by the previous one. 158 for i := 1; i < len(cfg.BlockRanges); i++ { 159 if cfg.BlockRanges[i]%cfg.BlockRanges[i-1] != 0 { 160 return errors.Errorf(errInvalidBlockRanges, cfg.BlockRanges[i].String(), cfg.BlockRanges[i-1].String()) 161 } 162 } 163 164 return nil 165 } 166 167 // ConfigProvider defines the per-tenant config provider for the Compactor. 168 type ConfigProvider interface { 169 bucket.TenantConfigProvider 170 CompactorBlocksRetentionPeriod(user string) time.Duration 171 } 172 173 // Compactor is a multi-tenant TSDB blocks compactor based on Thanos. 174 type Compactor struct { 175 services.Service 176 177 compactorCfg Config 178 storageCfg cortex_tsdb.BlocksStorageConfig 179 cfgProvider ConfigProvider 180 logger log.Logger 181 parentLogger log.Logger 182 registerer prometheus.Registerer 183 allowedTenants *util.AllowedTenants 184 185 // Functions that creates bucket client, grouper, planner and compactor using the context. 186 // Useful for injecting mock objects from tests. 187 bucketClientFactory func(ctx context.Context) (objstore.Bucket, error) 188 blocksGrouperFactory BlocksGrouperFactory 189 blocksCompactorFactory BlocksCompactorFactory 190 191 // Users scanner, used to discover users from the bucket. 192 usersScanner *cortex_tsdb.UsersScanner 193 194 // Blocks cleaner is responsible to hard delete blocks marked for deletion. 195 blocksCleaner *BlocksCleaner 196 197 // Underlying compactor and planner used to compact TSDB blocks. 198 blocksCompactor compact.Compactor 199 blocksPlanner compact.Planner 200 201 // Client used to run operations on the bucket storing blocks. 202 bucketClient objstore.Bucket 203 204 // Ring used for sharding compactions. 205 ringLifecycler *ring.Lifecycler 206 ring *ring.Ring 207 ringSubservices *services.Manager 208 ringSubservicesWatcher *services.FailureWatcher 209 210 // Metrics. 211 compactionRunsStarted prometheus.Counter 212 compactionRunsCompleted prometheus.Counter 213 compactionRunsFailed prometheus.Counter 214 compactionRunsLastSuccess prometheus.Gauge 215 compactionRunDiscoveredTenants prometheus.Gauge 216 compactionRunSkippedTenants prometheus.Gauge 217 compactionRunSucceededTenants prometheus.Gauge 218 compactionRunFailedTenants prometheus.Gauge 219 compactionRunInterval prometheus.Gauge 220 blocksMarkedForDeletion prometheus.Counter 221 garbageCollectedBlocks prometheus.Counter 222 223 // TSDB syncer metrics 224 syncerMetrics *syncerMetrics 225 } 226 227 // NewCompactor makes a new Compactor. 228 func NewCompactor(compactorCfg Config, storageCfg cortex_tsdb.BlocksStorageConfig, cfgProvider ConfigProvider, logger log.Logger, registerer prometheus.Registerer) (*Compactor, error) { 229 bucketClientFactory := func(ctx context.Context) (objstore.Bucket, error) { 230 return bucket.NewClient(ctx, storageCfg.Bucket, "compactor", logger, registerer) 231 } 232 233 blocksGrouperFactory := compactorCfg.BlocksGrouperFactory 234 if blocksGrouperFactory == nil { 235 blocksGrouperFactory = DefaultBlocksGrouperFactory 236 } 237 238 blocksCompactorFactory := compactorCfg.BlocksCompactorFactory 239 if blocksCompactorFactory == nil { 240 blocksCompactorFactory = DefaultBlocksCompactorFactory 241 } 242 243 cortexCompactor, err := newCompactor(compactorCfg, storageCfg, cfgProvider, logger, registerer, bucketClientFactory, blocksGrouperFactory, blocksCompactorFactory) 244 if err != nil { 245 return nil, errors.Wrap(err, "failed to create Cortex blocks compactor") 246 } 247 248 return cortexCompactor, nil 249 } 250 251 func newCompactor( 252 compactorCfg Config, 253 storageCfg cortex_tsdb.BlocksStorageConfig, 254 cfgProvider ConfigProvider, 255 logger log.Logger, 256 registerer prometheus.Registerer, 257 bucketClientFactory func(ctx context.Context) (objstore.Bucket, error), 258 blocksGrouperFactory BlocksGrouperFactory, 259 blocksCompactorFactory BlocksCompactorFactory, 260 ) (*Compactor, error) { 261 c := &Compactor{ 262 compactorCfg: compactorCfg, 263 storageCfg: storageCfg, 264 cfgProvider: cfgProvider, 265 parentLogger: logger, 266 logger: log.With(logger, "component", "compactor"), 267 registerer: registerer, 268 syncerMetrics: newSyncerMetrics(registerer), 269 bucketClientFactory: bucketClientFactory, 270 blocksGrouperFactory: blocksGrouperFactory, 271 blocksCompactorFactory: blocksCompactorFactory, 272 allowedTenants: util.NewAllowedTenants(compactorCfg.EnabledTenants, compactorCfg.DisabledTenants), 273 274 compactionRunsStarted: promauto.With(registerer).NewCounter(prometheus.CounterOpts{ 275 Name: "cortex_compactor_runs_started_total", 276 Help: "Total number of compaction runs started.", 277 }), 278 compactionRunsCompleted: promauto.With(registerer).NewCounter(prometheus.CounterOpts{ 279 Name: "cortex_compactor_runs_completed_total", 280 Help: "Total number of compaction runs successfully completed.", 281 }), 282 compactionRunsFailed: promauto.With(registerer).NewCounter(prometheus.CounterOpts{ 283 Name: "cortex_compactor_runs_failed_total", 284 Help: "Total number of compaction runs failed.", 285 }), 286 compactionRunsLastSuccess: promauto.With(registerer).NewGauge(prometheus.GaugeOpts{ 287 Name: "cortex_compactor_last_successful_run_timestamp_seconds", 288 Help: "Unix timestamp of the last successful compaction run.", 289 }), 290 compactionRunDiscoveredTenants: promauto.With(registerer).NewGauge(prometheus.GaugeOpts{ 291 Name: "cortex_compactor_tenants_discovered", 292 Help: "Number of tenants discovered during the current compaction run. Reset to 0 when compactor is idle.", 293 }), 294 compactionRunSkippedTenants: promauto.With(registerer).NewGauge(prometheus.GaugeOpts{ 295 Name: "cortex_compactor_tenants_skipped", 296 Help: "Number of tenants skipped during the current compaction run. Reset to 0 when compactor is idle.", 297 }), 298 compactionRunSucceededTenants: promauto.With(registerer).NewGauge(prometheus.GaugeOpts{ 299 Name: "cortex_compactor_tenants_processing_succeeded", 300 Help: "Number of tenants successfully processed during the current compaction run. Reset to 0 when compactor is idle.", 301 }), 302 compactionRunFailedTenants: promauto.With(registerer).NewGauge(prometheus.GaugeOpts{ 303 Name: "cortex_compactor_tenants_processing_failed", 304 Help: "Number of tenants failed processing during the current compaction run. Reset to 0 when compactor is idle.", 305 }), 306 compactionRunInterval: promauto.With(registerer).NewGauge(prometheus.GaugeOpts{ 307 Name: "cortex_compactor_compaction_interval_seconds", 308 Help: "The configured interval on which compaction is run in seconds. Useful when compared to the last successful run metric to accurately detect multiple failed compaction runs.", 309 }), 310 blocksMarkedForDeletion: promauto.With(registerer).NewCounter(prometheus.CounterOpts{ 311 Name: blocksMarkedForDeletionName, 312 Help: blocksMarkedForDeletionHelp, 313 ConstLabels: prometheus.Labels{"reason": "compaction"}, 314 }), 315 garbageCollectedBlocks: promauto.With(registerer).NewCounter(prometheus.CounterOpts{ 316 Name: "cortex_compactor_garbage_collected_blocks_total", 317 Help: "Total number of blocks marked for deletion by compactor.", 318 }), 319 } 320 321 if len(compactorCfg.EnabledTenants) > 0 { 322 level.Info(c.logger).Log("msg", "compactor using enabled users", "enabled", strings.Join(compactorCfg.EnabledTenants, ", ")) 323 } 324 if len(compactorCfg.DisabledTenants) > 0 { 325 level.Info(c.logger).Log("msg", "compactor using disabled users", "disabled", strings.Join(compactorCfg.DisabledTenants, ", ")) 326 } 327 328 c.Service = services.NewBasicService(c.starting, c.running, c.stopping) 329 330 // The last successful compaction run metric is exposed as seconds since epoch, so we need to use seconds for this metric. 331 c.compactionRunInterval.Set(c.compactorCfg.CompactionInterval.Seconds()) 332 333 return c, nil 334 } 335 336 // Start the compactor. 337 func (c *Compactor) starting(ctx context.Context) error { 338 var err error 339 340 // Create bucket client. 341 c.bucketClient, err = c.bucketClientFactory(ctx) 342 if err != nil { 343 return errors.Wrap(err, "failed to create bucket client") 344 } 345 346 // Create blocks compactor dependencies. 347 c.blocksCompactor, c.blocksPlanner, err = c.blocksCompactorFactory(ctx, c.compactorCfg, c.logger, c.registerer) 348 if err != nil { 349 return errors.Wrap(err, "failed to initialize compactor dependencies") 350 } 351 352 // Wrap the bucket client to write block deletion marks in the global location too. 353 c.bucketClient = bucketindex.BucketWithGlobalMarkers(c.bucketClient) 354 355 // Create the users scanner. 356 c.usersScanner = cortex_tsdb.NewUsersScanner(c.bucketClient, c.ownUser, c.parentLogger) 357 358 // Create the blocks cleaner (service). 359 c.blocksCleaner = NewBlocksCleaner(BlocksCleanerConfig{ 360 DeletionDelay: c.compactorCfg.DeletionDelay, 361 CleanupInterval: util.DurationWithJitter(c.compactorCfg.CleanupInterval, 0.1), 362 CleanupConcurrency: c.compactorCfg.CleanupConcurrency, 363 BlockDeletionMarksMigrationEnabled: c.compactorCfg.BlockDeletionMarksMigrationEnabled, 364 TenantCleanupDelay: c.compactorCfg.TenantCleanupDelay, 365 }, c.bucketClient, c.usersScanner, c.cfgProvider, c.parentLogger, c.registerer) 366 367 // Initialize the compactors ring if sharding is enabled. 368 if c.compactorCfg.ShardingEnabled { 369 lifecyclerCfg := c.compactorCfg.ShardingRing.ToLifecyclerConfig() 370 c.ringLifecycler, err = ring.NewLifecycler(lifecyclerCfg, ring.NewNoopFlushTransferer(), "compactor", ring.CompactorRingKey, false, c.logger, prometheus.WrapRegistererWithPrefix("cortex_", c.registerer)) 371 if err != nil { 372 return errors.Wrap(err, "unable to initialize compactor ring lifecycler") 373 } 374 375 c.ring, err = ring.New(lifecyclerCfg.RingConfig, "compactor", ring.CompactorRingKey, c.logger, prometheus.WrapRegistererWithPrefix("cortex_", c.registerer)) 376 if err != nil { 377 return errors.Wrap(err, "unable to initialize compactor ring") 378 } 379 380 c.ringSubservices, err = services.NewManager(c.ringLifecycler, c.ring) 381 if err == nil { 382 c.ringSubservicesWatcher = services.NewFailureWatcher() 383 c.ringSubservicesWatcher.WatchManager(c.ringSubservices) 384 385 err = services.StartManagerAndAwaitHealthy(ctx, c.ringSubservices) 386 } 387 388 if err != nil { 389 return errors.Wrap(err, "unable to start compactor ring dependencies") 390 } 391 392 // If sharding is enabled we should wait until this instance is 393 // ACTIVE within the ring. This MUST be done before starting the 394 // any other component depending on the users scanner, because the 395 // users scanner depends on the ring (to check whether an user belongs 396 // to this shard or not). 397 level.Info(c.logger).Log("msg", "waiting until compactor is ACTIVE in the ring") 398 399 ctxWithTimeout, cancel := context.WithTimeout(ctx, c.compactorCfg.ShardingRing.WaitActiveInstanceTimeout) 400 defer cancel() 401 if err := ring.WaitInstanceState(ctxWithTimeout, c.ring, c.ringLifecycler.ID, ring.ACTIVE); err != nil { 402 level.Error(c.logger).Log("msg", "compactor failed to become ACTIVE in the ring", "err", err) 403 return err 404 } 405 level.Info(c.logger).Log("msg", "compactor is ACTIVE in the ring") 406 407 // In the event of a cluster cold start or scale up of 2+ compactor instances at the same 408 // time, we may end up in a situation where each new compactor instance starts at a slightly 409 // different time and thus each one starts with a different state of the ring. It's better 410 // to just wait the ring stability for a short time. 411 if c.compactorCfg.ShardingRing.WaitStabilityMinDuration > 0 { 412 minWaiting := c.compactorCfg.ShardingRing.WaitStabilityMinDuration 413 maxWaiting := c.compactorCfg.ShardingRing.WaitStabilityMaxDuration 414 415 level.Info(c.logger).Log("msg", "waiting until compactor ring topology is stable", "min_waiting", minWaiting.String(), "max_waiting", maxWaiting.String()) 416 if err := ring.WaitRingStability(ctx, c.ring, RingOp, minWaiting, maxWaiting); err != nil { 417 level.Warn(c.logger).Log("msg", "compactor ring topology is not stable after the max waiting time, proceeding anyway") 418 } else { 419 level.Info(c.logger).Log("msg", "compactor ring topology is stable") 420 } 421 } 422 } 423 424 // Ensure an initial cleanup occurred before starting the compactor. 425 if err := services.StartAndAwaitRunning(ctx, c.blocksCleaner); err != nil { 426 c.ringSubservices.StopAsync() 427 return errors.Wrap(err, "failed to start the blocks cleaner") 428 } 429 430 return nil 431 } 432 433 func (c *Compactor) stopping(_ error) error { 434 ctx := context.Background() 435 436 services.StopAndAwaitTerminated(ctx, c.blocksCleaner) //nolint:errcheck 437 if c.ringSubservices != nil { 438 return services.StopManagerAndAwaitStopped(ctx, c.ringSubservices) 439 } 440 return nil 441 } 442 443 func (c *Compactor) running(ctx context.Context) error { 444 // Run an initial compaction before starting the interval. 445 c.compactUsers(ctx) 446 447 ticker := time.NewTicker(util.DurationWithJitter(c.compactorCfg.CompactionInterval, 0.05)) 448 defer ticker.Stop() 449 450 for { 451 select { 452 case <-ticker.C: 453 c.compactUsers(ctx) 454 case <-ctx.Done(): 455 return nil 456 case err := <-c.ringSubservicesWatcher.Chan(): 457 return errors.Wrap(err, "compactor subservice failed") 458 } 459 } 460 } 461 462 func (c *Compactor) compactUsers(ctx context.Context) { 463 succeeded := false 464 compactionErrorCount := 0 465 466 c.compactionRunsStarted.Inc() 467 468 defer func() { 469 if succeeded && compactionErrorCount == 0 { 470 c.compactionRunsCompleted.Inc() 471 c.compactionRunsLastSuccess.SetToCurrentTime() 472 } else { 473 c.compactionRunsFailed.Inc() 474 } 475 476 // Reset progress metrics once done. 477 c.compactionRunDiscoveredTenants.Set(0) 478 c.compactionRunSkippedTenants.Set(0) 479 c.compactionRunSucceededTenants.Set(0) 480 c.compactionRunFailedTenants.Set(0) 481 }() 482 483 level.Info(c.logger).Log("msg", "discovering users from bucket") 484 users, err := c.discoverUsersWithRetries(ctx) 485 if err != nil { 486 level.Error(c.logger).Log("msg", "failed to discover users from bucket", "err", err) 487 return 488 } 489 490 level.Info(c.logger).Log("msg", "discovered users from bucket", "users", len(users)) 491 c.compactionRunDiscoveredTenants.Set(float64(len(users))) 492 493 // When starting multiple compactor replicas nearly at the same time, running in a cluster with 494 // a large number of tenants, we may end up in a situation where the 1st user is compacted by 495 // multiple replicas at the same time. Shuffling users helps reduce the likelihood this will happen. 496 rand.Shuffle(len(users), func(i, j int) { 497 users[i], users[j] = users[j], users[i] 498 }) 499 500 // Keep track of users owned by this shard, so that we can delete the local files for all other users. 501 ownedUsers := map[string]struct{}{} 502 for _, userID := range users { 503 // Ensure the context has not been canceled (ie. compactor shutdown has been triggered). 504 if ctx.Err() != nil { 505 level.Info(c.logger).Log("msg", "interrupting compaction of user blocks", "err", err) 506 return 507 } 508 509 // Ensure the user ID belongs to our shard. 510 if owned, err := c.ownUser(userID); err != nil { 511 c.compactionRunSkippedTenants.Inc() 512 level.Warn(c.logger).Log("msg", "unable to check if user is owned by this shard", "user", userID, "err", err) 513 continue 514 } else if !owned { 515 c.compactionRunSkippedTenants.Inc() 516 level.Debug(c.logger).Log("msg", "skipping user because it is not owned by this shard", "user", userID) 517 continue 518 } 519 520 ownedUsers[userID] = struct{}{} 521 522 if markedForDeletion, err := cortex_tsdb.TenantDeletionMarkExists(ctx, c.bucketClient, userID); err != nil { 523 c.compactionRunSkippedTenants.Inc() 524 level.Warn(c.logger).Log("msg", "unable to check if user is marked for deletion", "user", userID, "err", err) 525 continue 526 } else if markedForDeletion { 527 c.compactionRunSkippedTenants.Inc() 528 level.Debug(c.logger).Log("msg", "skipping user because it is marked for deletion", "user", userID) 529 continue 530 } 531 532 level.Info(c.logger).Log("msg", "starting compaction of user blocks", "user", userID) 533 534 if err = c.compactUserWithRetries(ctx, userID); err != nil { 535 c.compactionRunFailedTenants.Inc() 536 compactionErrorCount++ 537 level.Error(c.logger).Log("msg", "failed to compact user blocks", "user", userID, "err", err) 538 continue 539 } 540 541 c.compactionRunSucceededTenants.Inc() 542 level.Info(c.logger).Log("msg", "successfully compacted user blocks", "user", userID) 543 } 544 545 // Delete local files for unowned tenants, if there are any. This cleans up 546 // leftover local files for tenants that belong to different compactors now, 547 // or have been deleted completely. 548 for userID := range c.listTenantsWithMetaSyncDirectories() { 549 if _, owned := ownedUsers[userID]; owned { 550 continue 551 } 552 553 dir := c.metaSyncDirForUser(userID) 554 s, err := os.Stat(dir) 555 if err != nil { 556 if !os.IsNotExist(err) { 557 level.Warn(c.logger).Log("msg", "failed to stat local directory with user data", "dir", dir, "err", err) 558 } 559 continue 560 } 561 562 if s.IsDir() { 563 err := os.RemoveAll(dir) 564 if err == nil { 565 level.Info(c.logger).Log("msg", "deleted directory for user not owned by this shard", "dir", dir) 566 } else { 567 level.Warn(c.logger).Log("msg", "failed to delete directory for user not owned by this shard", "dir", dir, "err", err) 568 } 569 } 570 } 571 572 succeeded = true 573 } 574 575 func (c *Compactor) compactUserWithRetries(ctx context.Context, userID string) error { 576 var lastErr error 577 578 retries := backoff.New(ctx, backoff.Config{ 579 MinBackoff: c.compactorCfg.retryMinBackoff, 580 MaxBackoff: c.compactorCfg.retryMaxBackoff, 581 MaxRetries: c.compactorCfg.CompactionRetries, 582 }) 583 584 for retries.Ongoing() { 585 lastErr = c.compactUser(ctx, userID) 586 if lastErr == nil { 587 return nil 588 } 589 590 retries.Wait() 591 } 592 593 return lastErr 594 } 595 596 func (c *Compactor) compactUser(ctx context.Context, userID string) error { 597 bucket := bucket.NewUserBucketClient(userID, c.bucketClient, c.cfgProvider) 598 reg := prometheus.NewRegistry() 599 defer c.syncerMetrics.gatherThanosSyncerMetrics(reg) 600 601 ulogger := util_log.WithUserID(userID, c.logger) 602 603 // Filters out duplicate blocks that can be formed from two or more overlapping 604 // blocks that fully submatches the source blocks of the older blocks. 605 deduplicateBlocksFilter := block.NewDeduplicateFilter() 606 607 // While fetching blocks, we filter out blocks that were marked for deletion by using IgnoreDeletionMarkFilter. 608 // No delay is used -- all blocks with deletion marker are ignored, and not considered for compaction. 609 ignoreDeletionMarkFilter := block.NewIgnoreDeletionMarkFilter( 610 ulogger, 611 bucket, 612 0, 613 c.compactorCfg.MetaSyncConcurrency) 614 615 fetcher, err := block.NewMetaFetcher( 616 ulogger, 617 c.compactorCfg.MetaSyncConcurrency, 618 bucket, 619 c.metaSyncDirForUser(userID), 620 reg, 621 // List of filters to apply (order matters). 622 []block.MetadataFilter{ 623 // Remove the ingester ID because we don't shard blocks anymore, while still 624 // honoring the shard ID if sharding was done in the past. 625 NewLabelRemoverFilter([]string{cortex_tsdb.IngesterIDExternalLabel}), 626 block.NewConsistencyDelayMetaFilter(ulogger, c.compactorCfg.ConsistencyDelay, reg), 627 ignoreDeletionMarkFilter, 628 deduplicateBlocksFilter, 629 }, 630 nil, 631 ) 632 if err != nil { 633 return err 634 } 635 636 syncer, err := compact.NewMetaSyncer( 637 ulogger, 638 reg, 639 bucket, 640 fetcher, 641 deduplicateBlocksFilter, 642 ignoreDeletionMarkFilter, 643 c.blocksMarkedForDeletion, 644 c.garbageCollectedBlocks, 645 c.compactorCfg.BlockSyncConcurrency, 646 ) 647 if err != nil { 648 return errors.Wrap(err, "failed to create syncer") 649 } 650 651 compactor, err := compact.NewBucketCompactor( 652 ulogger, 653 syncer, 654 c.blocksGrouperFactory(ctx, c.compactorCfg, bucket, ulogger, reg, c.blocksMarkedForDeletion, c.garbageCollectedBlocks), 655 c.blocksPlanner, 656 c.blocksCompactor, 657 path.Join(c.compactorCfg.DataDir, "compact"), 658 bucket, 659 c.compactorCfg.CompactionConcurrency, 660 false, 661 ) 662 if err != nil { 663 return errors.Wrap(err, "failed to create bucket compactor") 664 } 665 666 if err := compactor.Compact(ctx); err != nil { 667 return errors.Wrap(err, "compaction") 668 } 669 670 return nil 671 } 672 673 func (c *Compactor) discoverUsersWithRetries(ctx context.Context) ([]string, error) { 674 var lastErr error 675 676 retries := backoff.New(ctx, backoff.Config{ 677 MinBackoff: c.compactorCfg.retryMinBackoff, 678 MaxBackoff: c.compactorCfg.retryMaxBackoff, 679 MaxRetries: c.compactorCfg.CompactionRetries, 680 }) 681 682 for retries.Ongoing() { 683 var users []string 684 685 users, lastErr = c.discoverUsers(ctx) 686 if lastErr == nil { 687 return users, nil 688 } 689 690 retries.Wait() 691 } 692 693 return nil, lastErr 694 } 695 696 func (c *Compactor) discoverUsers(ctx context.Context) ([]string, error) { 697 var users []string 698 699 err := c.bucketClient.Iter(ctx, "", func(entry string) error { 700 users = append(users, strings.TrimSuffix(entry, "/")) 701 return nil 702 }) 703 704 return users, err 705 } 706 707 func (c *Compactor) ownUser(userID string) (bool, error) { 708 if !c.allowedTenants.IsAllowed(userID) { 709 return false, nil 710 } 711 712 // Always owned if sharding is disabled. 713 if !c.compactorCfg.ShardingEnabled { 714 return true, nil 715 } 716 717 // Hash the user ID. 718 hasher := fnv.New32a() 719 _, _ = hasher.Write([]byte(userID)) 720 userHash := hasher.Sum32() 721 722 // Check whether this compactor instance owns the user. 723 rs, err := c.ring.Get(userHash, RingOp, nil, nil, nil) 724 if err != nil { 725 return false, err 726 } 727 728 if len(rs.Instances) != 1 { 729 return false, fmt.Errorf("unexpected number of compactors in the shard (expected 1, got %d)", len(rs.Instances)) 730 } 731 732 return rs.Instances[0].Addr == c.ringLifecycler.Addr, nil 733 } 734 735 const compactorMetaPrefix = "compactor-meta-" 736 737 // metaSyncDirForUser returns directory to store cached meta files. 738 // The fetcher stores cached metas in the "meta-syncer/" sub directory, 739 // but we prefix it with "compactor-meta-" in order to guarantee no clashing with 740 // the directory used by the Thanos Syncer, whatever is the user ID. 741 func (c *Compactor) metaSyncDirForUser(userID string) string { 742 return filepath.Join(c.compactorCfg.DataDir, compactorMetaPrefix+userID) 743 } 744 745 // This function returns tenants with meta sync directories found on local disk. On error, it returns nil map. 746 func (c *Compactor) listTenantsWithMetaSyncDirectories() map[string]struct{} { 747 result := map[string]struct{}{} 748 749 files, err := ioutil.ReadDir(c.compactorCfg.DataDir) 750 if err != nil { 751 return nil 752 } 753 754 for _, f := range files { 755 if !f.IsDir() { 756 continue 757 } 758 759 if !strings.HasPrefix(f.Name(), compactorMetaPrefix) { 760 continue 761 } 762 763 result[f.Name()[len(compactorMetaPrefix):]] = struct{}{} 764 } 765 766 return result 767 }