github.com/thanos-io/thanos@v0.32.5/pkg/block/fetcher.go (about) 1 // Copyright (c) The Thanos Authors. 2 // Licensed under the Apache License 2.0. 3 4 package block 5 6 import ( 7 "context" 8 "encoding/json" 9 "io" 10 "os" 11 "path" 12 "path/filepath" 13 "sort" 14 "strings" 15 "sync" 16 "time" 17 18 "github.com/go-kit/log" 19 "github.com/go-kit/log/level" 20 "github.com/golang/groupcache/singleflight" 21 "github.com/oklog/ulid" 22 "github.com/pkg/errors" 23 "github.com/prometheus/client_golang/prometheus" 24 "github.com/prometheus/client_golang/prometheus/promauto" 25 "github.com/prometheus/prometheus/model/labels" 26 "github.com/prometheus/prometheus/model/relabel" 27 "github.com/thanos-io/objstore" 28 "golang.org/x/sync/errgroup" 29 "gopkg.in/yaml.v2" 30 31 "github.com/thanos-io/thanos/pkg/block/metadata" 32 "github.com/thanos-io/thanos/pkg/errutil" 33 "github.com/thanos-io/thanos/pkg/extprom" 34 "github.com/thanos-io/thanos/pkg/model" 35 "github.com/thanos-io/thanos/pkg/runutil" 36 ) 37 38 const FetcherConcurrency = 32 39 40 // FetcherMetrics holds metrics tracked by the metadata fetcher. This struct and its fields are exported 41 // to allow depending projects (eg. Cortex) to implement their own custom metadata fetcher while tracking 42 // compatible metrics. 43 type FetcherMetrics struct { 44 Syncs prometheus.Counter 45 SyncFailures prometheus.Counter 46 SyncDuration prometheus.Histogram 47 48 Synced *extprom.TxGaugeVec 49 Modified *extprom.TxGaugeVec 50 } 51 52 // Submit applies new values for metrics tracked by transaction GaugeVec. 53 func (s *FetcherMetrics) Submit() { 54 s.Synced.Submit() 55 s.Modified.Submit() 56 } 57 58 // ResetTx starts new transaction for metrics tracked by transaction GaugeVec. 59 func (s *FetcherMetrics) ResetTx() { 60 s.Synced.ResetTx() 61 s.Modified.ResetTx() 62 } 63 64 const ( 65 fetcherSubSys = "blocks_meta" 66 67 CorruptedMeta = "corrupted-meta-json" 68 NoMeta = "no-meta-json" 69 LoadedMeta = "loaded" 70 FailedMeta = "failed" 71 72 // Synced label values. 73 labelExcludedMeta = "label-excluded" 74 timeExcludedMeta = "time-excluded" 75 tooFreshMeta = "too-fresh" 76 duplicateMeta = "duplicate" 77 // Blocks that are marked for deletion can be loaded as well. This is done to make sure that we load blocks that are meant to be deleted, 78 // but don't have a replacement block yet. 79 MarkedForDeletionMeta = "marked-for-deletion" 80 81 // MarkedForNoCompactionMeta is label for blocks which are loaded but also marked for no compaction. This label is also counted in `loaded` label metric. 82 MarkedForNoCompactionMeta = "marked-for-no-compact" 83 84 // MarkedForNoDownsampleMeta is label for blocks which are loaded but also marked for no downsample. This label is also counted in `loaded` label metric. 85 MarkedForNoDownsampleMeta = "marked-for-no-downsample" 86 87 // Modified label values. 88 replicaRemovedMeta = "replica-label-removed" 89 ) 90 91 func NewFetcherMetrics(reg prometheus.Registerer, syncedExtraLabels, modifiedExtraLabels [][]string) *FetcherMetrics { 92 var m FetcherMetrics 93 94 m.Syncs = promauto.With(reg).NewCounter(prometheus.CounterOpts{ 95 Subsystem: fetcherSubSys, 96 Name: "syncs_total", 97 Help: "Total blocks metadata synchronization attempts", 98 }) 99 m.SyncFailures = promauto.With(reg).NewCounter(prometheus.CounterOpts{ 100 Subsystem: fetcherSubSys, 101 Name: "sync_failures_total", 102 Help: "Total blocks metadata synchronization failures", 103 }) 104 m.SyncDuration = promauto.With(reg).NewHistogram(prometheus.HistogramOpts{ 105 Subsystem: fetcherSubSys, 106 Name: "sync_duration_seconds", 107 Help: "Duration of the blocks metadata synchronization in seconds", 108 Buckets: []float64{0.01, 1, 10, 100, 300, 600, 1000}, 109 }) 110 m.Synced = extprom.NewTxGaugeVec( 111 reg, 112 prometheus.GaugeOpts{ 113 Subsystem: fetcherSubSys, 114 Name: "synced", 115 Help: "Number of block metadata synced", 116 }, 117 []string{"state"}, 118 append([][]string{ 119 {CorruptedMeta}, 120 {NoMeta}, 121 {LoadedMeta}, 122 {tooFreshMeta}, 123 {FailedMeta}, 124 {labelExcludedMeta}, 125 {timeExcludedMeta}, 126 {duplicateMeta}, 127 {MarkedForDeletionMeta}, 128 {MarkedForNoCompactionMeta}, 129 }, syncedExtraLabels...)..., 130 ) 131 m.Modified = extprom.NewTxGaugeVec( 132 reg, 133 prometheus.GaugeOpts{ 134 Subsystem: fetcherSubSys, 135 Name: "modified", 136 Help: "Number of blocks whose metadata changed", 137 }, 138 []string{"modified"}, 139 append([][]string{ 140 {replicaRemovedMeta}, 141 }, modifiedExtraLabels...)..., 142 ) 143 return &m 144 } 145 146 type MetadataFetcher interface { 147 Fetch(ctx context.Context) (metas map[ulid.ULID]*metadata.Meta, partial map[ulid.ULID]error, err error) 148 UpdateOnChange(func([]metadata.Meta, error)) 149 } 150 151 // GaugeVec hides something like a Prometheus GaugeVec or an extprom.TxGaugeVec. 152 type GaugeVec interface { 153 WithLabelValues(lvs ...string) prometheus.Gauge 154 } 155 156 // Filter allows filtering or modifying metas from the provided map or returns error. 157 type MetadataFilter interface { 158 Filter(ctx context.Context, metas map[ulid.ULID]*metadata.Meta, synced GaugeVec, modified GaugeVec) error 159 } 160 161 // BaseFetcher is a struct that synchronizes filtered metadata of all block in the object storage with the local state. 162 // Go-routine safe. 163 type BaseFetcher struct { 164 logger log.Logger 165 concurrency int 166 bkt objstore.InstrumentedBucketReader 167 168 // Optional local directory to cache meta.json files. 169 cacheDir string 170 syncs prometheus.Counter 171 g singleflight.Group 172 173 mtx sync.Mutex 174 cached map[ulid.ULID]*metadata.Meta 175 } 176 177 // NewBaseFetcher constructs BaseFetcher. 178 func NewBaseFetcher(logger log.Logger, concurrency int, bkt objstore.InstrumentedBucketReader, dir string, reg prometheus.Registerer) (*BaseFetcher, error) { 179 if logger == nil { 180 logger = log.NewNopLogger() 181 } 182 183 cacheDir := "" 184 if dir != "" { 185 cacheDir = filepath.Join(dir, "meta-syncer") 186 if err := os.MkdirAll(cacheDir, os.ModePerm); err != nil { 187 return nil, err 188 } 189 } 190 191 return &BaseFetcher{ 192 logger: log.With(logger, "component", "block.BaseFetcher"), 193 concurrency: concurrency, 194 bkt: bkt, 195 cacheDir: cacheDir, 196 cached: map[ulid.ULID]*metadata.Meta{}, 197 syncs: promauto.With(reg).NewCounter(prometheus.CounterOpts{ 198 Subsystem: fetcherSubSys, 199 Name: "base_syncs_total", 200 Help: "Total blocks metadata synchronization attempts by base Fetcher", 201 }), 202 }, nil 203 } 204 205 // NewRawMetaFetcher returns basic meta fetcher without proper handling for eventual consistent backends or partial uploads. 206 // NOTE: Not suitable to use in production. 207 func NewRawMetaFetcher(logger log.Logger, bkt objstore.InstrumentedBucketReader) (*MetaFetcher, error) { 208 return NewMetaFetcher(logger, 1, bkt, "", nil, nil) 209 } 210 211 // NewMetaFetcher returns meta fetcher. 212 func NewMetaFetcher(logger log.Logger, concurrency int, bkt objstore.InstrumentedBucketReader, dir string, reg prometheus.Registerer, filters []MetadataFilter) (*MetaFetcher, error) { 213 b, err := NewBaseFetcher(logger, concurrency, bkt, dir, reg) 214 if err != nil { 215 return nil, err 216 } 217 return b.NewMetaFetcher(reg, filters), nil 218 } 219 220 // NewMetaFetcher transforms BaseFetcher into actually usable *MetaFetcher. 221 func (f *BaseFetcher) NewMetaFetcher(reg prometheus.Registerer, filters []MetadataFilter, logTags ...interface{}) *MetaFetcher { 222 return &MetaFetcher{metrics: NewFetcherMetrics(reg, nil, nil), wrapped: f, filters: filters, logger: log.With(f.logger, logTags...)} 223 } 224 225 var ( 226 ErrorSyncMetaNotFound = errors.New("meta.json not found") 227 ErrorSyncMetaCorrupted = errors.New("meta.json corrupted") 228 ) 229 230 // loadMeta returns metadata from object storage or error. 231 // It returns `ErrorSyncMetaNotFound` and `ErrorSyncMetaCorrupted` sentinel errors in those cases. 232 func (f *BaseFetcher) loadMeta(ctx context.Context, id ulid.ULID) (*metadata.Meta, error) { 233 var ( 234 metaFile = path.Join(id.String(), MetaFilename) 235 cachedBlockDir = filepath.Join(f.cacheDir, id.String()) 236 ) 237 238 if m, seen := f.cached[id]; seen { 239 return m, nil 240 } 241 242 // Best effort load from local dir. 243 if f.cacheDir != "" { 244 m, err := metadata.ReadFromDir(cachedBlockDir) 245 if err == nil { 246 return m, nil 247 } 248 249 if !errors.Is(err, os.ErrNotExist) { 250 level.Warn(f.logger).Log("msg", "best effort read of the local meta.json failed; removing cached block dir", "dir", cachedBlockDir, "err", err) 251 if err := os.RemoveAll(cachedBlockDir); err != nil { 252 level.Warn(f.logger).Log("msg", "best effort remove of cached dir failed; ignoring", "dir", cachedBlockDir, "err", err) 253 } 254 } 255 } 256 257 r, err := f.bkt.ReaderWithExpectedErrs(f.bkt.IsObjNotFoundErr).Get(ctx, metaFile) 258 if f.bkt.IsObjNotFoundErr(err) { 259 // Meta.json was deleted between bkt.Exists and here. 260 return nil, errors.Wrapf(ErrorSyncMetaNotFound, "%v", err) 261 } 262 if err != nil { 263 return nil, errors.Wrapf(err, "get meta file: %v", metaFile) 264 } 265 266 defer runutil.CloseWithLogOnErr(f.logger, r, "close bkt meta get") 267 268 metaContent, err := io.ReadAll(r) 269 if err != nil { 270 return nil, errors.Wrapf(err, "read meta file: %v", metaFile) 271 } 272 273 m := &metadata.Meta{} 274 if err := json.Unmarshal(metaContent, m); err != nil { 275 return nil, errors.Wrapf(ErrorSyncMetaCorrupted, "meta.json %v unmarshal: %v", metaFile, err) 276 } 277 278 if m.Version != metadata.TSDBVersion1 { 279 return nil, errors.Errorf("unexpected meta file: %s version: %d", metaFile, m.Version) 280 } 281 282 // Best effort cache in local dir. 283 if f.cacheDir != "" { 284 if err := os.MkdirAll(cachedBlockDir, os.ModePerm); err != nil { 285 level.Warn(f.logger).Log("msg", "best effort mkdir of the meta.json block dir failed; ignoring", "dir", cachedBlockDir, "err", err) 286 } 287 288 if err := m.WriteToDir(f.logger, cachedBlockDir); err != nil { 289 level.Warn(f.logger).Log("msg", "best effort save of the meta.json to local dir failed; ignoring", "dir", cachedBlockDir, "err", err) 290 } 291 } 292 return m, nil 293 } 294 295 type response struct { 296 metas map[ulid.ULID]*metadata.Meta 297 partial map[ulid.ULID]error 298 // If metaErr > 0 it means incomplete view, so some metas, failed to be loaded. 299 metaErrs errutil.MultiError 300 301 noMetas float64 302 corruptedMetas float64 303 } 304 305 func (f *BaseFetcher) fetchMetadata(ctx context.Context) (interface{}, error) { 306 f.syncs.Inc() 307 308 var ( 309 resp = response{ 310 metas: make(map[ulid.ULID]*metadata.Meta), 311 partial: make(map[ulid.ULID]error), 312 } 313 eg errgroup.Group 314 ch = make(chan ulid.ULID, f.concurrency) 315 mtx sync.Mutex 316 ) 317 level.Debug(f.logger).Log("msg", "fetching meta data", "concurrency", f.concurrency) 318 for i := 0; i < f.concurrency; i++ { 319 eg.Go(func() error { 320 for id := range ch { 321 meta, err := f.loadMeta(ctx, id) 322 if err == nil { 323 mtx.Lock() 324 resp.metas[id] = meta 325 mtx.Unlock() 326 continue 327 } 328 329 switch errors.Cause(err) { 330 default: 331 mtx.Lock() 332 resp.metaErrs.Add(err) 333 mtx.Unlock() 334 continue 335 case ErrorSyncMetaNotFound: 336 mtx.Lock() 337 resp.noMetas++ 338 mtx.Unlock() 339 case ErrorSyncMetaCorrupted: 340 mtx.Lock() 341 resp.corruptedMetas++ 342 mtx.Unlock() 343 } 344 345 mtx.Lock() 346 resp.partial[id] = err 347 mtx.Unlock() 348 } 349 return nil 350 }) 351 } 352 353 partialBlocks := make(map[ulid.ULID]bool) 354 // Workers scheduled, distribute blocks. 355 eg.Go(func() error { 356 defer close(ch) 357 return f.bkt.Iter(ctx, "", func(name string) error { 358 parts := strings.Split(name, "/") 359 dir, file := parts[0], parts[len(parts)-1] 360 id, ok := IsBlockDir(dir) 361 if !ok { 362 return nil 363 } 364 if _, ok := partialBlocks[id]; !ok { 365 partialBlocks[id] = true 366 } 367 if !IsBlockMetaFile(file) { 368 return nil 369 } 370 partialBlocks[id] = false 371 372 select { 373 case <-ctx.Done(): 374 return ctx.Err() 375 case ch <- id: 376 } 377 378 return nil 379 }, objstore.WithRecursiveIter) 380 }) 381 382 if err := eg.Wait(); err != nil { 383 return nil, errors.Wrap(err, "BaseFetcher: iter bucket") 384 } 385 386 mtx.Lock() 387 for blockULID, isPartial := range partialBlocks { 388 if isPartial { 389 resp.partial[blockULID] = errors.Errorf("block %s has no meta file", blockULID) 390 resp.noMetas++ 391 } 392 } 393 mtx.Unlock() 394 395 if len(resp.metaErrs) > 0 { 396 return resp, nil 397 } 398 399 // Only for complete view of blocks update the cache. 400 cached := make(map[ulid.ULID]*metadata.Meta, len(resp.metas)) 401 for id, m := range resp.metas { 402 cached[id] = m 403 } 404 405 f.mtx.Lock() 406 f.cached = cached 407 f.mtx.Unlock() 408 409 // Best effort cleanup of disk-cached metas. 410 if f.cacheDir != "" { 411 fis, err := os.ReadDir(f.cacheDir) 412 names := make([]string, 0, len(fis)) 413 for _, fi := range fis { 414 names = append(names, fi.Name()) 415 } 416 if err != nil { 417 level.Warn(f.logger).Log("msg", "best effort remove of not needed cached dirs failed; ignoring", "err", err) 418 } else { 419 for _, n := range names { 420 id, ok := IsBlockDir(n) 421 if !ok { 422 continue 423 } 424 425 if _, ok := resp.metas[id]; ok { 426 continue 427 } 428 429 cachedBlockDir := filepath.Join(f.cacheDir, id.String()) 430 431 // No such block loaded, remove the local dir. 432 if err := os.RemoveAll(cachedBlockDir); err != nil { 433 level.Warn(f.logger).Log("msg", "best effort remove of not needed cached dir failed; ignoring", "dir", cachedBlockDir, "err", err) 434 } 435 } 436 } 437 } 438 return resp, nil 439 } 440 441 func (f *BaseFetcher) fetch(ctx context.Context, metrics *FetcherMetrics, filters []MetadataFilter) (_ map[ulid.ULID]*metadata.Meta, _ map[ulid.ULID]error, err error) { 442 start := time.Now() 443 defer func() { 444 metrics.SyncDuration.Observe(time.Since(start).Seconds()) 445 if err != nil { 446 metrics.SyncFailures.Inc() 447 } 448 }() 449 metrics.Syncs.Inc() 450 metrics.ResetTx() 451 452 // Run this in thread safe run group. 453 // TODO(bwplotka): Consider custom singleflight with ttl. 454 v, err := f.g.Do("", func() (i interface{}, err error) { 455 // NOTE: First go routine context will go through. 456 return f.fetchMetadata(ctx) 457 }) 458 if err != nil { 459 return nil, nil, err 460 } 461 resp := v.(response) 462 463 // Copy as same response might be reused by different goroutines. 464 metas := make(map[ulid.ULID]*metadata.Meta, len(resp.metas)) 465 for id, m := range resp.metas { 466 metas[id] = m 467 } 468 469 metrics.Synced.WithLabelValues(FailedMeta).Set(float64(len(resp.metaErrs))) 470 metrics.Synced.WithLabelValues(NoMeta).Set(resp.noMetas) 471 metrics.Synced.WithLabelValues(CorruptedMeta).Set(resp.corruptedMetas) 472 473 for _, filter := range filters { 474 // NOTE: filter can update synced metric accordingly to the reason of the exclude. 475 if err := filter.Filter(ctx, metas, metrics.Synced, metrics.Modified); err != nil { 476 return nil, nil, errors.Wrap(err, "filter metas") 477 } 478 } 479 480 metrics.Synced.WithLabelValues(LoadedMeta).Set(float64(len(metas))) 481 metrics.Submit() 482 483 if len(resp.metaErrs) > 0 { 484 return metas, resp.partial, errors.Wrap(resp.metaErrs.Err(), "incomplete view") 485 } 486 487 level.Info(f.logger).Log("msg", "successfully synchronized block metadata", "duration", time.Since(start).String(), "duration_ms", time.Since(start).Milliseconds(), "cached", f.countCached(), "returned", len(metas), "partial", len(resp.partial)) 488 return metas, resp.partial, nil 489 } 490 491 func (f *BaseFetcher) countCached() int { 492 f.mtx.Lock() 493 defer f.mtx.Unlock() 494 495 return len(f.cached) 496 } 497 498 type MetaFetcher struct { 499 wrapped *BaseFetcher 500 metrics *FetcherMetrics 501 502 filters []MetadataFilter 503 504 listener func([]metadata.Meta, error) 505 506 logger log.Logger 507 } 508 509 // Fetch returns all block metas as well as partial blocks (blocks without or with corrupted meta file) from the bucket. 510 // It's caller responsibility to not change the returned metadata files. Maps can be modified. 511 // 512 // Returned error indicates a failure in fetching metadata. Returned meta can be assumed as correct, with some blocks missing. 513 func (f *MetaFetcher) Fetch(ctx context.Context) (metas map[ulid.ULID]*metadata.Meta, partial map[ulid.ULID]error, err error) { 514 metas, partial, err = f.wrapped.fetch(ctx, f.metrics, f.filters) 515 if f.listener != nil { 516 blocks := make([]metadata.Meta, 0, len(metas)) 517 for _, meta := range metas { 518 blocks = append(blocks, *meta) 519 } 520 f.listener(blocks, err) 521 } 522 return metas, partial, err 523 } 524 525 // UpdateOnChange allows to add listener that will be update on every change. 526 func (f *MetaFetcher) UpdateOnChange(listener func([]metadata.Meta, error)) { 527 f.listener = listener 528 } 529 530 var _ MetadataFilter = &TimePartitionMetaFilter{} 531 532 // TimePartitionMetaFilter is a BaseFetcher filter that filters out blocks that are outside of specified time range. 533 // Not go-routine safe. 534 type TimePartitionMetaFilter struct { 535 minTime, maxTime model.TimeOrDurationValue 536 } 537 538 // NewTimePartitionMetaFilter creates TimePartitionMetaFilter. 539 func NewTimePartitionMetaFilter(MinTime, MaxTime model.TimeOrDurationValue) *TimePartitionMetaFilter { 540 return &TimePartitionMetaFilter{minTime: MinTime, maxTime: MaxTime} 541 } 542 543 // Filter filters out blocks that are outside of specified time range. 544 func (f *TimePartitionMetaFilter) Filter(_ context.Context, metas map[ulid.ULID]*metadata.Meta, synced GaugeVec, modified GaugeVec) error { 545 for id, m := range metas { 546 if m.MaxTime >= f.minTime.PrometheusTimestamp() && m.MinTime <= f.maxTime.PrometheusTimestamp() { 547 continue 548 } 549 synced.WithLabelValues(timeExcludedMeta).Inc() 550 delete(metas, id) 551 } 552 return nil 553 } 554 555 var _ MetadataFilter = &LabelShardedMetaFilter{} 556 557 // LabelShardedMetaFilter represents struct that allows sharding. 558 // Not go-routine safe. 559 type LabelShardedMetaFilter struct { 560 relabelConfig []*relabel.Config 561 } 562 563 // NewLabelShardedMetaFilter creates LabelShardedMetaFilter. 564 func NewLabelShardedMetaFilter(relabelConfig []*relabel.Config) *LabelShardedMetaFilter { 565 return &LabelShardedMetaFilter{relabelConfig: relabelConfig} 566 } 567 568 // Special label that will have an ULID of the meta.json being referenced to. 569 const BlockIDLabel = "__block_id" 570 571 // Filter filters out blocks that have no labels after relabelling of each block external (Thanos) labels. 572 func (f *LabelShardedMetaFilter) Filter(_ context.Context, metas map[ulid.ULID]*metadata.Meta, synced GaugeVec, modified GaugeVec) error { 573 var lbls labels.Labels 574 for id, m := range metas { 575 lbls = lbls[:0] 576 lbls = append(lbls, labels.Label{Name: BlockIDLabel, Value: id.String()}) 577 for k, v := range m.Thanos.Labels { 578 lbls = append(lbls, labels.Label{Name: k, Value: v}) 579 } 580 581 if processedLabels, _ := relabel.Process(lbls, f.relabelConfig...); len(processedLabels) == 0 { 582 synced.WithLabelValues(labelExcludedMeta).Inc() 583 delete(metas, id) 584 } 585 } 586 return nil 587 } 588 589 var _ MetadataFilter = &DefaultDeduplicateFilter{} 590 591 type DeduplicateFilter interface { 592 DuplicateIDs() []ulid.ULID 593 } 594 595 // DefaultDeduplicateFilter is a BaseFetcher filter that filters out older blocks that have exactly the same data. 596 // Not go-routine safe. 597 type DefaultDeduplicateFilter struct { 598 duplicateIDs []ulid.ULID 599 concurrency int 600 mu sync.Mutex 601 } 602 603 // NewDeduplicateFilter creates DefaultDeduplicateFilter. 604 func NewDeduplicateFilter(concurrency int) *DefaultDeduplicateFilter { 605 return &DefaultDeduplicateFilter{concurrency: concurrency} 606 } 607 608 // Filter filters out duplicate blocks that can be formed 609 // from two or more overlapping blocks that fully submatches the source blocks of the older blocks. 610 func (f *DefaultDeduplicateFilter) Filter(_ context.Context, metas map[ulid.ULID]*metadata.Meta, synced GaugeVec, modified GaugeVec) error { 611 f.duplicateIDs = f.duplicateIDs[:0] 612 613 var wg sync.WaitGroup 614 var groupChan = make(chan []*metadata.Meta) 615 616 // Start up workers to deduplicate workgroups when they're ready. 617 for i := 0; i < f.concurrency; i++ { 618 wg.Add(1) 619 go func() { 620 defer wg.Done() 621 for group := range groupChan { 622 f.filterGroup(group, metas, synced) 623 } 624 }() 625 } 626 627 // We need only look within a compaction group for duplicates, so splitting by group key gives us parallelizable streams. 628 metasByCompactionGroup := make(map[string][]*metadata.Meta) 629 for _, meta := range metas { 630 groupKey := meta.Thanos.GroupKey() 631 metasByCompactionGroup[groupKey] = append(metasByCompactionGroup[groupKey], meta) 632 } 633 for _, group := range metasByCompactionGroup { 634 groupChan <- group 635 } 636 close(groupChan) 637 wg.Wait() 638 639 return nil 640 } 641 642 func (f *DefaultDeduplicateFilter) filterGroup(metaSlice []*metadata.Meta, metas map[ulid.ULID]*metadata.Meta, synced GaugeVec) { 643 sort.Slice(metaSlice, func(i, j int) bool { 644 ilen := len(metaSlice[i].Compaction.Sources) 645 jlen := len(metaSlice[j].Compaction.Sources) 646 647 if ilen == jlen { 648 return metaSlice[i].ULID.Compare(metaSlice[j].ULID) < 0 649 } 650 651 return ilen-jlen > 0 652 }) 653 654 var coveringSet []*metadata.Meta 655 var duplicates []ulid.ULID 656 childLoop: 657 for _, child := range metaSlice { 658 childSources := child.Compaction.Sources 659 for _, parent := range coveringSet { 660 parentSources := parent.Compaction.Sources 661 662 // child's sources are present in parent's sources, filter it out. 663 if contains(parentSources, childSources) { 664 duplicates = append(duplicates, child.ULID) 665 continue childLoop 666 } 667 } 668 669 // Child's sources not covered by any member of coveringSet, add it to coveringSet. 670 coveringSet = append(coveringSet, child) 671 } 672 673 f.mu.Lock() 674 for _, duplicate := range duplicates { 675 if metas[duplicate] != nil { 676 f.duplicateIDs = append(f.duplicateIDs, duplicate) 677 } 678 synced.WithLabelValues(duplicateMeta).Inc() 679 delete(metas, duplicate) 680 } 681 f.mu.Unlock() 682 } 683 684 // DuplicateIDs returns slice of block ids that are filtered out by DefaultDeduplicateFilter. 685 func (f *DefaultDeduplicateFilter) DuplicateIDs() []ulid.ULID { 686 return f.duplicateIDs 687 } 688 689 func contains(s1, s2 []ulid.ULID) bool { 690 for _, a := range s2 { 691 found := false 692 for _, e := range s1 { 693 if a.Compare(e) == 0 { 694 found = true 695 break 696 } 697 } 698 if !found { 699 return false 700 } 701 } 702 return true 703 } 704 705 var _ MetadataFilter = &ReplicaLabelRemover{} 706 707 // ReplicaLabelRemover is a BaseFetcher filter that modifies external labels of existing blocks, it removes given replica labels from the metadata of blocks that have it. 708 type ReplicaLabelRemover struct { 709 logger log.Logger 710 711 replicaLabels []string 712 } 713 714 // NewReplicaLabelRemover creates a ReplicaLabelRemover. 715 func NewReplicaLabelRemover(logger log.Logger, replicaLabels []string) *ReplicaLabelRemover { 716 return &ReplicaLabelRemover{logger: logger, replicaLabels: replicaLabels} 717 } 718 719 // Filter modifies external labels of existing blocks, it removes given replica labels from the metadata of blocks that have it. 720 func (r *ReplicaLabelRemover) Filter(_ context.Context, metas map[ulid.ULID]*metadata.Meta, synced GaugeVec, modified GaugeVec) error { 721 if len(r.replicaLabels) == 0 { 722 return nil 723 } 724 725 countReplicaLabelRemoved := make(map[string]int, len(metas)) 726 for u, meta := range metas { 727 l := make(map[string]string) 728 for n, v := range meta.Thanos.Labels { 729 l[n] = v 730 } 731 732 for _, replicaLabel := range r.replicaLabels { 733 if _, exists := l[replicaLabel]; exists { 734 delete(l, replicaLabel) 735 countReplicaLabelRemoved[replicaLabel] += 1 736 modified.WithLabelValues(replicaRemovedMeta).Inc() 737 } 738 } 739 if len(l) == 0 { 740 level.Warn(r.logger).Log("msg", "block has no labels left, creating one", r.replicaLabels[0], "deduped") 741 l[r.replicaLabels[0]] = "deduped" 742 } 743 744 nm := *meta 745 nm.Thanos.Labels = l 746 metas[u] = &nm 747 } 748 for replicaLabelRemoved, count := range countReplicaLabelRemoved { 749 level.Debug(r.logger).Log("msg", "removed replica label", "label", replicaLabelRemoved, "count", count) 750 } 751 return nil 752 } 753 754 // ConsistencyDelayMetaFilter is a BaseFetcher filter that filters out blocks that are created before a specified consistency delay. 755 // Not go-routine safe. 756 type ConsistencyDelayMetaFilter struct { 757 logger log.Logger 758 consistencyDelay time.Duration 759 } 760 761 // NewConsistencyDelayMetaFilter creates ConsistencyDelayMetaFilter. 762 func NewConsistencyDelayMetaFilter(logger log.Logger, consistencyDelay time.Duration, reg prometheus.Registerer) *ConsistencyDelayMetaFilter { 763 if logger == nil { 764 logger = log.NewNopLogger() 765 } 766 _ = promauto.With(reg).NewGaugeFunc(prometheus.GaugeOpts{ 767 Name: "consistency_delay_seconds", 768 Help: "Configured consistency delay in seconds.", 769 }, func() float64 { 770 return consistencyDelay.Seconds() 771 }) 772 773 return &ConsistencyDelayMetaFilter{ 774 logger: logger, 775 consistencyDelay: consistencyDelay, 776 } 777 } 778 779 // Filter filters out blocks that filters blocks that have are created before a specified consistency delay. 780 func (f *ConsistencyDelayMetaFilter) Filter(_ context.Context, metas map[ulid.ULID]*metadata.Meta, synced GaugeVec, modified GaugeVec) error { 781 for id, meta := range metas { 782 // TODO(khyatisoneji): Remove the checks about Thanos Source 783 // by implementing delete delay to fetch metas. 784 // TODO(bwplotka): Check consistency delay based on file upload / modification time instead of ULID. 785 if ulid.Now()-id.Time() < uint64(f.consistencyDelay/time.Millisecond) && 786 meta.Thanos.Source != metadata.BucketRepairSource && 787 meta.Thanos.Source != metadata.CompactorSource && 788 meta.Thanos.Source != metadata.CompactorRepairSource { 789 790 level.Debug(f.logger).Log("msg", "block is too fresh for now", "block", id) 791 synced.WithLabelValues(tooFreshMeta).Inc() 792 delete(metas, id) 793 } 794 } 795 796 return nil 797 } 798 799 // IgnoreDeletionMarkFilter is a filter that filters out the blocks that are marked for deletion after a given delay. 800 // The delay duration is to make sure that the replacement block can be fetched before we filter out the old block. 801 // Delay is not considered when computing DeletionMarkBlocks map. 802 // Not go-routine safe. 803 type IgnoreDeletionMarkFilter struct { 804 logger log.Logger 805 delay time.Duration 806 concurrency int 807 bkt objstore.InstrumentedBucketReader 808 809 mtx sync.Mutex 810 deletionMarkMap map[ulid.ULID]*metadata.DeletionMark 811 } 812 813 // NewIgnoreDeletionMarkFilter creates IgnoreDeletionMarkFilter. 814 func NewIgnoreDeletionMarkFilter(logger log.Logger, bkt objstore.InstrumentedBucketReader, delay time.Duration, concurrency int) *IgnoreDeletionMarkFilter { 815 return &IgnoreDeletionMarkFilter{ 816 logger: logger, 817 bkt: bkt, 818 delay: delay, 819 concurrency: concurrency, 820 } 821 } 822 823 // DeletionMarkBlocks returns block ids that were marked for deletion. 824 func (f *IgnoreDeletionMarkFilter) DeletionMarkBlocks() map[ulid.ULID]*metadata.DeletionMark { 825 f.mtx.Lock() 826 defer f.mtx.Unlock() 827 828 deletionMarkMap := make(map[ulid.ULID]*metadata.DeletionMark, len(f.deletionMarkMap)) 829 for id, meta := range f.deletionMarkMap { 830 deletionMarkMap[id] = meta 831 } 832 833 return deletionMarkMap 834 } 835 836 // Filter filters out blocks that are marked for deletion after a given delay. 837 // It also returns the blocks that can be deleted since they were uploaded delay duration before current time. 838 func (f *IgnoreDeletionMarkFilter) Filter(ctx context.Context, metas map[ulid.ULID]*metadata.Meta, synced GaugeVec, modified GaugeVec) error { 839 deletionMarkMap := make(map[ulid.ULID]*metadata.DeletionMark) 840 841 // Make a copy of block IDs to check, in order to avoid concurrency issues 842 // between the scheduler and workers. 843 blockIDs := make([]ulid.ULID, 0, len(metas)) 844 for id := range metas { 845 blockIDs = append(blockIDs, id) 846 } 847 848 var ( 849 eg errgroup.Group 850 ch = make(chan ulid.ULID, f.concurrency) 851 mtx sync.Mutex 852 ) 853 854 for i := 0; i < f.concurrency; i++ { 855 eg.Go(func() error { 856 var lastErr error 857 for id := range ch { 858 m := &metadata.DeletionMark{} 859 if err := metadata.ReadMarker(ctx, f.logger, f.bkt, id.String(), m); err != nil { 860 if errors.Cause(err) == metadata.ErrorMarkerNotFound { 861 continue 862 } 863 if errors.Cause(err) == metadata.ErrorUnmarshalMarker { 864 level.Warn(f.logger).Log("msg", "found partial deletion-mark.json; if we will see it happening often for the same block, consider manually deleting deletion-mark.json from the object storage", "block", id, "err", err) 865 continue 866 } 867 // Remember the last error and continue to drain the channel. 868 lastErr = err 869 continue 870 } 871 872 // Keep track of the blocks marked for deletion and filter them out if their 873 // deletion time is greater than the configured delay. 874 mtx.Lock() 875 deletionMarkMap[id] = m 876 if time.Since(time.Unix(m.DeletionTime, 0)).Seconds() > f.delay.Seconds() { 877 synced.WithLabelValues(MarkedForDeletionMeta).Inc() 878 delete(metas, id) 879 } 880 mtx.Unlock() 881 } 882 883 return lastErr 884 }) 885 } 886 887 // Workers scheduled, distribute blocks. 888 eg.Go(func() error { 889 defer close(ch) 890 891 for _, id := range blockIDs { 892 select { 893 case ch <- id: 894 // Nothing to do. 895 case <-ctx.Done(): 896 return ctx.Err() 897 } 898 } 899 900 return nil 901 }) 902 903 if err := eg.Wait(); err != nil { 904 return errors.Wrap(err, "filter blocks marked for deletion") 905 } 906 907 f.mtx.Lock() 908 f.deletionMarkMap = deletionMarkMap 909 f.mtx.Unlock() 910 911 return nil 912 } 913 914 var ( 915 SelectorSupportedRelabelActions = map[relabel.Action]struct{}{relabel.Keep: {}, relabel.Drop: {}, relabel.HashMod: {}} 916 ) 917 918 // ParseRelabelConfig parses relabel configuration. 919 // If supportedActions not specified, all relabel actions are valid. 920 func ParseRelabelConfig(contentYaml []byte, supportedActions map[relabel.Action]struct{}) ([]*relabel.Config, error) { 921 var relabelConfig []*relabel.Config 922 if err := yaml.Unmarshal(contentYaml, &relabelConfig); err != nil { 923 return nil, errors.Wrap(err, "parsing relabel configuration") 924 } 925 926 if supportedActions != nil { 927 for _, cfg := range relabelConfig { 928 if _, ok := supportedActions[cfg.Action]; !ok { 929 return nil, errors.Errorf("unsupported relabel action: %v", cfg.Action) 930 } 931 } 932 } 933 934 return relabelConfig, nil 935 }