github.com/thanos-io/thanos@v0.32.5/pkg/block/fetcher.go (about)

     1  // Copyright (c) The Thanos Authors.
     2  // Licensed under the Apache License 2.0.
     3  
     4  package block
     5  
     6  import (
     7  	"context"
     8  	"encoding/json"
     9  	"io"
    10  	"os"
    11  	"path"
    12  	"path/filepath"
    13  	"sort"
    14  	"strings"
    15  	"sync"
    16  	"time"
    17  
    18  	"github.com/go-kit/log"
    19  	"github.com/go-kit/log/level"
    20  	"github.com/golang/groupcache/singleflight"
    21  	"github.com/oklog/ulid"
    22  	"github.com/pkg/errors"
    23  	"github.com/prometheus/client_golang/prometheus"
    24  	"github.com/prometheus/client_golang/prometheus/promauto"
    25  	"github.com/prometheus/prometheus/model/labels"
    26  	"github.com/prometheus/prometheus/model/relabel"
    27  	"github.com/thanos-io/objstore"
    28  	"golang.org/x/sync/errgroup"
    29  	"gopkg.in/yaml.v2"
    30  
    31  	"github.com/thanos-io/thanos/pkg/block/metadata"
    32  	"github.com/thanos-io/thanos/pkg/errutil"
    33  	"github.com/thanos-io/thanos/pkg/extprom"
    34  	"github.com/thanos-io/thanos/pkg/model"
    35  	"github.com/thanos-io/thanos/pkg/runutil"
    36  )
    37  
    38  const FetcherConcurrency = 32
    39  
    40  // FetcherMetrics holds metrics tracked by the metadata fetcher. This struct and its fields are exported
    41  // to allow depending projects (eg. Cortex) to implement their own custom metadata fetcher while tracking
    42  // compatible metrics.
    43  type FetcherMetrics struct {
    44  	Syncs        prometheus.Counter
    45  	SyncFailures prometheus.Counter
    46  	SyncDuration prometheus.Histogram
    47  
    48  	Synced   *extprom.TxGaugeVec
    49  	Modified *extprom.TxGaugeVec
    50  }
    51  
    52  // Submit applies new values for metrics tracked by transaction GaugeVec.
    53  func (s *FetcherMetrics) Submit() {
    54  	s.Synced.Submit()
    55  	s.Modified.Submit()
    56  }
    57  
    58  // ResetTx starts new transaction for metrics tracked by transaction GaugeVec.
    59  func (s *FetcherMetrics) ResetTx() {
    60  	s.Synced.ResetTx()
    61  	s.Modified.ResetTx()
    62  }
    63  
    64  const (
    65  	fetcherSubSys = "blocks_meta"
    66  
    67  	CorruptedMeta = "corrupted-meta-json"
    68  	NoMeta        = "no-meta-json"
    69  	LoadedMeta    = "loaded"
    70  	FailedMeta    = "failed"
    71  
    72  	// Synced label values.
    73  	labelExcludedMeta = "label-excluded"
    74  	timeExcludedMeta  = "time-excluded"
    75  	tooFreshMeta      = "too-fresh"
    76  	duplicateMeta     = "duplicate"
    77  	// Blocks that are marked for deletion can be loaded as well. This is done to make sure that we load blocks that are meant to be deleted,
    78  	// but don't have a replacement block yet.
    79  	MarkedForDeletionMeta = "marked-for-deletion"
    80  
    81  	// MarkedForNoCompactionMeta is label for blocks which are loaded but also marked for no compaction. This label is also counted in `loaded` label metric.
    82  	MarkedForNoCompactionMeta = "marked-for-no-compact"
    83  
    84  	// MarkedForNoDownsampleMeta is label for blocks which are loaded but also marked for no downsample. This label is also counted in `loaded` label metric.
    85  	MarkedForNoDownsampleMeta = "marked-for-no-downsample"
    86  
    87  	// Modified label values.
    88  	replicaRemovedMeta = "replica-label-removed"
    89  )
    90  
    91  func NewFetcherMetrics(reg prometheus.Registerer, syncedExtraLabels, modifiedExtraLabels [][]string) *FetcherMetrics {
    92  	var m FetcherMetrics
    93  
    94  	m.Syncs = promauto.With(reg).NewCounter(prometheus.CounterOpts{
    95  		Subsystem: fetcherSubSys,
    96  		Name:      "syncs_total",
    97  		Help:      "Total blocks metadata synchronization attempts",
    98  	})
    99  	m.SyncFailures = promauto.With(reg).NewCounter(prometheus.CounterOpts{
   100  		Subsystem: fetcherSubSys,
   101  		Name:      "sync_failures_total",
   102  		Help:      "Total blocks metadata synchronization failures",
   103  	})
   104  	m.SyncDuration = promauto.With(reg).NewHistogram(prometheus.HistogramOpts{
   105  		Subsystem: fetcherSubSys,
   106  		Name:      "sync_duration_seconds",
   107  		Help:      "Duration of the blocks metadata synchronization in seconds",
   108  		Buckets:   []float64{0.01, 1, 10, 100, 300, 600, 1000},
   109  	})
   110  	m.Synced = extprom.NewTxGaugeVec(
   111  		reg,
   112  		prometheus.GaugeOpts{
   113  			Subsystem: fetcherSubSys,
   114  			Name:      "synced",
   115  			Help:      "Number of block metadata synced",
   116  		},
   117  		[]string{"state"},
   118  		append([][]string{
   119  			{CorruptedMeta},
   120  			{NoMeta},
   121  			{LoadedMeta},
   122  			{tooFreshMeta},
   123  			{FailedMeta},
   124  			{labelExcludedMeta},
   125  			{timeExcludedMeta},
   126  			{duplicateMeta},
   127  			{MarkedForDeletionMeta},
   128  			{MarkedForNoCompactionMeta},
   129  		}, syncedExtraLabels...)...,
   130  	)
   131  	m.Modified = extprom.NewTxGaugeVec(
   132  		reg,
   133  		prometheus.GaugeOpts{
   134  			Subsystem: fetcherSubSys,
   135  			Name:      "modified",
   136  			Help:      "Number of blocks whose metadata changed",
   137  		},
   138  		[]string{"modified"},
   139  		append([][]string{
   140  			{replicaRemovedMeta},
   141  		}, modifiedExtraLabels...)...,
   142  	)
   143  	return &m
   144  }
   145  
   146  type MetadataFetcher interface {
   147  	Fetch(ctx context.Context) (metas map[ulid.ULID]*metadata.Meta, partial map[ulid.ULID]error, err error)
   148  	UpdateOnChange(func([]metadata.Meta, error))
   149  }
   150  
   151  // GaugeVec hides something like a Prometheus GaugeVec or an extprom.TxGaugeVec.
   152  type GaugeVec interface {
   153  	WithLabelValues(lvs ...string) prometheus.Gauge
   154  }
   155  
   156  // Filter allows filtering or modifying metas from the provided map or returns error.
   157  type MetadataFilter interface {
   158  	Filter(ctx context.Context, metas map[ulid.ULID]*metadata.Meta, synced GaugeVec, modified GaugeVec) error
   159  }
   160  
   161  // BaseFetcher is a struct that synchronizes filtered metadata of all block in the object storage with the local state.
   162  // Go-routine safe.
   163  type BaseFetcher struct {
   164  	logger      log.Logger
   165  	concurrency int
   166  	bkt         objstore.InstrumentedBucketReader
   167  
   168  	// Optional local directory to cache meta.json files.
   169  	cacheDir string
   170  	syncs    prometheus.Counter
   171  	g        singleflight.Group
   172  
   173  	mtx    sync.Mutex
   174  	cached map[ulid.ULID]*metadata.Meta
   175  }
   176  
   177  // NewBaseFetcher constructs BaseFetcher.
   178  func NewBaseFetcher(logger log.Logger, concurrency int, bkt objstore.InstrumentedBucketReader, dir string, reg prometheus.Registerer) (*BaseFetcher, error) {
   179  	if logger == nil {
   180  		logger = log.NewNopLogger()
   181  	}
   182  
   183  	cacheDir := ""
   184  	if dir != "" {
   185  		cacheDir = filepath.Join(dir, "meta-syncer")
   186  		if err := os.MkdirAll(cacheDir, os.ModePerm); err != nil {
   187  			return nil, err
   188  		}
   189  	}
   190  
   191  	return &BaseFetcher{
   192  		logger:      log.With(logger, "component", "block.BaseFetcher"),
   193  		concurrency: concurrency,
   194  		bkt:         bkt,
   195  		cacheDir:    cacheDir,
   196  		cached:      map[ulid.ULID]*metadata.Meta{},
   197  		syncs: promauto.With(reg).NewCounter(prometheus.CounterOpts{
   198  			Subsystem: fetcherSubSys,
   199  			Name:      "base_syncs_total",
   200  			Help:      "Total blocks metadata synchronization attempts by base Fetcher",
   201  		}),
   202  	}, nil
   203  }
   204  
   205  // NewRawMetaFetcher returns basic meta fetcher without proper handling for eventual consistent backends or partial uploads.
   206  // NOTE: Not suitable to use in production.
   207  func NewRawMetaFetcher(logger log.Logger, bkt objstore.InstrumentedBucketReader) (*MetaFetcher, error) {
   208  	return NewMetaFetcher(logger, 1, bkt, "", nil, nil)
   209  }
   210  
   211  // NewMetaFetcher returns meta fetcher.
   212  func NewMetaFetcher(logger log.Logger, concurrency int, bkt objstore.InstrumentedBucketReader, dir string, reg prometheus.Registerer, filters []MetadataFilter) (*MetaFetcher, error) {
   213  	b, err := NewBaseFetcher(logger, concurrency, bkt, dir, reg)
   214  	if err != nil {
   215  		return nil, err
   216  	}
   217  	return b.NewMetaFetcher(reg, filters), nil
   218  }
   219  
   220  // NewMetaFetcher transforms BaseFetcher into actually usable *MetaFetcher.
   221  func (f *BaseFetcher) NewMetaFetcher(reg prometheus.Registerer, filters []MetadataFilter, logTags ...interface{}) *MetaFetcher {
   222  	return &MetaFetcher{metrics: NewFetcherMetrics(reg, nil, nil), wrapped: f, filters: filters, logger: log.With(f.logger, logTags...)}
   223  }
   224  
   225  var (
   226  	ErrorSyncMetaNotFound  = errors.New("meta.json not found")
   227  	ErrorSyncMetaCorrupted = errors.New("meta.json corrupted")
   228  )
   229  
   230  // loadMeta returns metadata from object storage or error.
   231  // It returns `ErrorSyncMetaNotFound` and `ErrorSyncMetaCorrupted` sentinel errors in those cases.
   232  func (f *BaseFetcher) loadMeta(ctx context.Context, id ulid.ULID) (*metadata.Meta, error) {
   233  	var (
   234  		metaFile       = path.Join(id.String(), MetaFilename)
   235  		cachedBlockDir = filepath.Join(f.cacheDir, id.String())
   236  	)
   237  
   238  	if m, seen := f.cached[id]; seen {
   239  		return m, nil
   240  	}
   241  
   242  	// Best effort load from local dir.
   243  	if f.cacheDir != "" {
   244  		m, err := metadata.ReadFromDir(cachedBlockDir)
   245  		if err == nil {
   246  			return m, nil
   247  		}
   248  
   249  		if !errors.Is(err, os.ErrNotExist) {
   250  			level.Warn(f.logger).Log("msg", "best effort read of the local meta.json failed; removing cached block dir", "dir", cachedBlockDir, "err", err)
   251  			if err := os.RemoveAll(cachedBlockDir); err != nil {
   252  				level.Warn(f.logger).Log("msg", "best effort remove of cached dir failed; ignoring", "dir", cachedBlockDir, "err", err)
   253  			}
   254  		}
   255  	}
   256  
   257  	r, err := f.bkt.ReaderWithExpectedErrs(f.bkt.IsObjNotFoundErr).Get(ctx, metaFile)
   258  	if f.bkt.IsObjNotFoundErr(err) {
   259  		// Meta.json was deleted between bkt.Exists and here.
   260  		return nil, errors.Wrapf(ErrorSyncMetaNotFound, "%v", err)
   261  	}
   262  	if err != nil {
   263  		return nil, errors.Wrapf(err, "get meta file: %v", metaFile)
   264  	}
   265  
   266  	defer runutil.CloseWithLogOnErr(f.logger, r, "close bkt meta get")
   267  
   268  	metaContent, err := io.ReadAll(r)
   269  	if err != nil {
   270  		return nil, errors.Wrapf(err, "read meta file: %v", metaFile)
   271  	}
   272  
   273  	m := &metadata.Meta{}
   274  	if err := json.Unmarshal(metaContent, m); err != nil {
   275  		return nil, errors.Wrapf(ErrorSyncMetaCorrupted, "meta.json %v unmarshal: %v", metaFile, err)
   276  	}
   277  
   278  	if m.Version != metadata.TSDBVersion1 {
   279  		return nil, errors.Errorf("unexpected meta file: %s version: %d", metaFile, m.Version)
   280  	}
   281  
   282  	// Best effort cache in local dir.
   283  	if f.cacheDir != "" {
   284  		if err := os.MkdirAll(cachedBlockDir, os.ModePerm); err != nil {
   285  			level.Warn(f.logger).Log("msg", "best effort mkdir of the meta.json block dir failed; ignoring", "dir", cachedBlockDir, "err", err)
   286  		}
   287  
   288  		if err := m.WriteToDir(f.logger, cachedBlockDir); err != nil {
   289  			level.Warn(f.logger).Log("msg", "best effort save of the meta.json to local dir failed; ignoring", "dir", cachedBlockDir, "err", err)
   290  		}
   291  	}
   292  	return m, nil
   293  }
   294  
   295  type response struct {
   296  	metas   map[ulid.ULID]*metadata.Meta
   297  	partial map[ulid.ULID]error
   298  	// If metaErr > 0 it means incomplete view, so some metas, failed to be loaded.
   299  	metaErrs errutil.MultiError
   300  
   301  	noMetas        float64
   302  	corruptedMetas float64
   303  }
   304  
   305  func (f *BaseFetcher) fetchMetadata(ctx context.Context) (interface{}, error) {
   306  	f.syncs.Inc()
   307  
   308  	var (
   309  		resp = response{
   310  			metas:   make(map[ulid.ULID]*metadata.Meta),
   311  			partial: make(map[ulid.ULID]error),
   312  		}
   313  		eg  errgroup.Group
   314  		ch  = make(chan ulid.ULID, f.concurrency)
   315  		mtx sync.Mutex
   316  	)
   317  	level.Debug(f.logger).Log("msg", "fetching meta data", "concurrency", f.concurrency)
   318  	for i := 0; i < f.concurrency; i++ {
   319  		eg.Go(func() error {
   320  			for id := range ch {
   321  				meta, err := f.loadMeta(ctx, id)
   322  				if err == nil {
   323  					mtx.Lock()
   324  					resp.metas[id] = meta
   325  					mtx.Unlock()
   326  					continue
   327  				}
   328  
   329  				switch errors.Cause(err) {
   330  				default:
   331  					mtx.Lock()
   332  					resp.metaErrs.Add(err)
   333  					mtx.Unlock()
   334  					continue
   335  				case ErrorSyncMetaNotFound:
   336  					mtx.Lock()
   337  					resp.noMetas++
   338  					mtx.Unlock()
   339  				case ErrorSyncMetaCorrupted:
   340  					mtx.Lock()
   341  					resp.corruptedMetas++
   342  					mtx.Unlock()
   343  				}
   344  
   345  				mtx.Lock()
   346  				resp.partial[id] = err
   347  				mtx.Unlock()
   348  			}
   349  			return nil
   350  		})
   351  	}
   352  
   353  	partialBlocks := make(map[ulid.ULID]bool)
   354  	// Workers scheduled, distribute blocks.
   355  	eg.Go(func() error {
   356  		defer close(ch)
   357  		return f.bkt.Iter(ctx, "", func(name string) error {
   358  			parts := strings.Split(name, "/")
   359  			dir, file := parts[0], parts[len(parts)-1]
   360  			id, ok := IsBlockDir(dir)
   361  			if !ok {
   362  				return nil
   363  			}
   364  			if _, ok := partialBlocks[id]; !ok {
   365  				partialBlocks[id] = true
   366  			}
   367  			if !IsBlockMetaFile(file) {
   368  				return nil
   369  			}
   370  			partialBlocks[id] = false
   371  
   372  			select {
   373  			case <-ctx.Done():
   374  				return ctx.Err()
   375  			case ch <- id:
   376  			}
   377  
   378  			return nil
   379  		}, objstore.WithRecursiveIter)
   380  	})
   381  
   382  	if err := eg.Wait(); err != nil {
   383  		return nil, errors.Wrap(err, "BaseFetcher: iter bucket")
   384  	}
   385  
   386  	mtx.Lock()
   387  	for blockULID, isPartial := range partialBlocks {
   388  		if isPartial {
   389  			resp.partial[blockULID] = errors.Errorf("block %s has no meta file", blockULID)
   390  			resp.noMetas++
   391  		}
   392  	}
   393  	mtx.Unlock()
   394  
   395  	if len(resp.metaErrs) > 0 {
   396  		return resp, nil
   397  	}
   398  
   399  	// Only for complete view of blocks update the cache.
   400  	cached := make(map[ulid.ULID]*metadata.Meta, len(resp.metas))
   401  	for id, m := range resp.metas {
   402  		cached[id] = m
   403  	}
   404  
   405  	f.mtx.Lock()
   406  	f.cached = cached
   407  	f.mtx.Unlock()
   408  
   409  	// Best effort cleanup of disk-cached metas.
   410  	if f.cacheDir != "" {
   411  		fis, err := os.ReadDir(f.cacheDir)
   412  		names := make([]string, 0, len(fis))
   413  		for _, fi := range fis {
   414  			names = append(names, fi.Name())
   415  		}
   416  		if err != nil {
   417  			level.Warn(f.logger).Log("msg", "best effort remove of not needed cached dirs failed; ignoring", "err", err)
   418  		} else {
   419  			for _, n := range names {
   420  				id, ok := IsBlockDir(n)
   421  				if !ok {
   422  					continue
   423  				}
   424  
   425  				if _, ok := resp.metas[id]; ok {
   426  					continue
   427  				}
   428  
   429  				cachedBlockDir := filepath.Join(f.cacheDir, id.String())
   430  
   431  				// No such block loaded, remove the local dir.
   432  				if err := os.RemoveAll(cachedBlockDir); err != nil {
   433  					level.Warn(f.logger).Log("msg", "best effort remove of not needed cached dir failed; ignoring", "dir", cachedBlockDir, "err", err)
   434  				}
   435  			}
   436  		}
   437  	}
   438  	return resp, nil
   439  }
   440  
   441  func (f *BaseFetcher) fetch(ctx context.Context, metrics *FetcherMetrics, filters []MetadataFilter) (_ map[ulid.ULID]*metadata.Meta, _ map[ulid.ULID]error, err error) {
   442  	start := time.Now()
   443  	defer func() {
   444  		metrics.SyncDuration.Observe(time.Since(start).Seconds())
   445  		if err != nil {
   446  			metrics.SyncFailures.Inc()
   447  		}
   448  	}()
   449  	metrics.Syncs.Inc()
   450  	metrics.ResetTx()
   451  
   452  	// Run this in thread safe run group.
   453  	// TODO(bwplotka): Consider custom singleflight with ttl.
   454  	v, err := f.g.Do("", func() (i interface{}, err error) {
   455  		// NOTE: First go routine context will go through.
   456  		return f.fetchMetadata(ctx)
   457  	})
   458  	if err != nil {
   459  		return nil, nil, err
   460  	}
   461  	resp := v.(response)
   462  
   463  	// Copy as same response might be reused by different goroutines.
   464  	metas := make(map[ulid.ULID]*metadata.Meta, len(resp.metas))
   465  	for id, m := range resp.metas {
   466  		metas[id] = m
   467  	}
   468  
   469  	metrics.Synced.WithLabelValues(FailedMeta).Set(float64(len(resp.metaErrs)))
   470  	metrics.Synced.WithLabelValues(NoMeta).Set(resp.noMetas)
   471  	metrics.Synced.WithLabelValues(CorruptedMeta).Set(resp.corruptedMetas)
   472  
   473  	for _, filter := range filters {
   474  		// NOTE: filter can update synced metric accordingly to the reason of the exclude.
   475  		if err := filter.Filter(ctx, metas, metrics.Synced, metrics.Modified); err != nil {
   476  			return nil, nil, errors.Wrap(err, "filter metas")
   477  		}
   478  	}
   479  
   480  	metrics.Synced.WithLabelValues(LoadedMeta).Set(float64(len(metas)))
   481  	metrics.Submit()
   482  
   483  	if len(resp.metaErrs) > 0 {
   484  		return metas, resp.partial, errors.Wrap(resp.metaErrs.Err(), "incomplete view")
   485  	}
   486  
   487  	level.Info(f.logger).Log("msg", "successfully synchronized block metadata", "duration", time.Since(start).String(), "duration_ms", time.Since(start).Milliseconds(), "cached", f.countCached(), "returned", len(metas), "partial", len(resp.partial))
   488  	return metas, resp.partial, nil
   489  }
   490  
   491  func (f *BaseFetcher) countCached() int {
   492  	f.mtx.Lock()
   493  	defer f.mtx.Unlock()
   494  
   495  	return len(f.cached)
   496  }
   497  
   498  type MetaFetcher struct {
   499  	wrapped *BaseFetcher
   500  	metrics *FetcherMetrics
   501  
   502  	filters []MetadataFilter
   503  
   504  	listener func([]metadata.Meta, error)
   505  
   506  	logger log.Logger
   507  }
   508  
   509  // Fetch returns all block metas as well as partial blocks (blocks without or with corrupted meta file) from the bucket.
   510  // It's caller responsibility to not change the returned metadata files. Maps can be modified.
   511  //
   512  // Returned error indicates a failure in fetching metadata. Returned meta can be assumed as correct, with some blocks missing.
   513  func (f *MetaFetcher) Fetch(ctx context.Context) (metas map[ulid.ULID]*metadata.Meta, partial map[ulid.ULID]error, err error) {
   514  	metas, partial, err = f.wrapped.fetch(ctx, f.metrics, f.filters)
   515  	if f.listener != nil {
   516  		blocks := make([]metadata.Meta, 0, len(metas))
   517  		for _, meta := range metas {
   518  			blocks = append(blocks, *meta)
   519  		}
   520  		f.listener(blocks, err)
   521  	}
   522  	return metas, partial, err
   523  }
   524  
   525  // UpdateOnChange allows to add listener that will be update on every change.
   526  func (f *MetaFetcher) UpdateOnChange(listener func([]metadata.Meta, error)) {
   527  	f.listener = listener
   528  }
   529  
   530  var _ MetadataFilter = &TimePartitionMetaFilter{}
   531  
   532  // TimePartitionMetaFilter is a BaseFetcher filter that filters out blocks that are outside of specified time range.
   533  // Not go-routine safe.
   534  type TimePartitionMetaFilter struct {
   535  	minTime, maxTime model.TimeOrDurationValue
   536  }
   537  
   538  // NewTimePartitionMetaFilter creates TimePartitionMetaFilter.
   539  func NewTimePartitionMetaFilter(MinTime, MaxTime model.TimeOrDurationValue) *TimePartitionMetaFilter {
   540  	return &TimePartitionMetaFilter{minTime: MinTime, maxTime: MaxTime}
   541  }
   542  
   543  // Filter filters out blocks that are outside of specified time range.
   544  func (f *TimePartitionMetaFilter) Filter(_ context.Context, metas map[ulid.ULID]*metadata.Meta, synced GaugeVec, modified GaugeVec) error {
   545  	for id, m := range metas {
   546  		if m.MaxTime >= f.minTime.PrometheusTimestamp() && m.MinTime <= f.maxTime.PrometheusTimestamp() {
   547  			continue
   548  		}
   549  		synced.WithLabelValues(timeExcludedMeta).Inc()
   550  		delete(metas, id)
   551  	}
   552  	return nil
   553  }
   554  
   555  var _ MetadataFilter = &LabelShardedMetaFilter{}
   556  
   557  // LabelShardedMetaFilter represents struct that allows sharding.
   558  // Not go-routine safe.
   559  type LabelShardedMetaFilter struct {
   560  	relabelConfig []*relabel.Config
   561  }
   562  
   563  // NewLabelShardedMetaFilter creates LabelShardedMetaFilter.
   564  func NewLabelShardedMetaFilter(relabelConfig []*relabel.Config) *LabelShardedMetaFilter {
   565  	return &LabelShardedMetaFilter{relabelConfig: relabelConfig}
   566  }
   567  
   568  // Special label that will have an ULID of the meta.json being referenced to.
   569  const BlockIDLabel = "__block_id"
   570  
   571  // Filter filters out blocks that have no labels after relabelling of each block external (Thanos) labels.
   572  func (f *LabelShardedMetaFilter) Filter(_ context.Context, metas map[ulid.ULID]*metadata.Meta, synced GaugeVec, modified GaugeVec) error {
   573  	var lbls labels.Labels
   574  	for id, m := range metas {
   575  		lbls = lbls[:0]
   576  		lbls = append(lbls, labels.Label{Name: BlockIDLabel, Value: id.String()})
   577  		for k, v := range m.Thanos.Labels {
   578  			lbls = append(lbls, labels.Label{Name: k, Value: v})
   579  		}
   580  
   581  		if processedLabels, _ := relabel.Process(lbls, f.relabelConfig...); len(processedLabels) == 0 {
   582  			synced.WithLabelValues(labelExcludedMeta).Inc()
   583  			delete(metas, id)
   584  		}
   585  	}
   586  	return nil
   587  }
   588  
   589  var _ MetadataFilter = &DefaultDeduplicateFilter{}
   590  
   591  type DeduplicateFilter interface {
   592  	DuplicateIDs() []ulid.ULID
   593  }
   594  
   595  // DefaultDeduplicateFilter is a BaseFetcher filter that filters out older blocks that have exactly the same data.
   596  // Not go-routine safe.
   597  type DefaultDeduplicateFilter struct {
   598  	duplicateIDs []ulid.ULID
   599  	concurrency  int
   600  	mu           sync.Mutex
   601  }
   602  
   603  // NewDeduplicateFilter creates DefaultDeduplicateFilter.
   604  func NewDeduplicateFilter(concurrency int) *DefaultDeduplicateFilter {
   605  	return &DefaultDeduplicateFilter{concurrency: concurrency}
   606  }
   607  
   608  // Filter filters out duplicate blocks that can be formed
   609  // from two or more overlapping blocks that fully submatches the source blocks of the older blocks.
   610  func (f *DefaultDeduplicateFilter) Filter(_ context.Context, metas map[ulid.ULID]*metadata.Meta, synced GaugeVec, modified GaugeVec) error {
   611  	f.duplicateIDs = f.duplicateIDs[:0]
   612  
   613  	var wg sync.WaitGroup
   614  	var groupChan = make(chan []*metadata.Meta)
   615  
   616  	// Start up workers to deduplicate workgroups when they're ready.
   617  	for i := 0; i < f.concurrency; i++ {
   618  		wg.Add(1)
   619  		go func() {
   620  			defer wg.Done()
   621  			for group := range groupChan {
   622  				f.filterGroup(group, metas, synced)
   623  			}
   624  		}()
   625  	}
   626  
   627  	// We need only look within a compaction group for duplicates, so splitting by group key gives us parallelizable streams.
   628  	metasByCompactionGroup := make(map[string][]*metadata.Meta)
   629  	for _, meta := range metas {
   630  		groupKey := meta.Thanos.GroupKey()
   631  		metasByCompactionGroup[groupKey] = append(metasByCompactionGroup[groupKey], meta)
   632  	}
   633  	for _, group := range metasByCompactionGroup {
   634  		groupChan <- group
   635  	}
   636  	close(groupChan)
   637  	wg.Wait()
   638  
   639  	return nil
   640  }
   641  
   642  func (f *DefaultDeduplicateFilter) filterGroup(metaSlice []*metadata.Meta, metas map[ulid.ULID]*metadata.Meta, synced GaugeVec) {
   643  	sort.Slice(metaSlice, func(i, j int) bool {
   644  		ilen := len(metaSlice[i].Compaction.Sources)
   645  		jlen := len(metaSlice[j].Compaction.Sources)
   646  
   647  		if ilen == jlen {
   648  			return metaSlice[i].ULID.Compare(metaSlice[j].ULID) < 0
   649  		}
   650  
   651  		return ilen-jlen > 0
   652  	})
   653  
   654  	var coveringSet []*metadata.Meta
   655  	var duplicates []ulid.ULID
   656  childLoop:
   657  	for _, child := range metaSlice {
   658  		childSources := child.Compaction.Sources
   659  		for _, parent := range coveringSet {
   660  			parentSources := parent.Compaction.Sources
   661  
   662  			// child's sources are present in parent's sources, filter it out.
   663  			if contains(parentSources, childSources) {
   664  				duplicates = append(duplicates, child.ULID)
   665  				continue childLoop
   666  			}
   667  		}
   668  
   669  		// Child's sources not covered by any member of coveringSet, add it to coveringSet.
   670  		coveringSet = append(coveringSet, child)
   671  	}
   672  
   673  	f.mu.Lock()
   674  	for _, duplicate := range duplicates {
   675  		if metas[duplicate] != nil {
   676  			f.duplicateIDs = append(f.duplicateIDs, duplicate)
   677  		}
   678  		synced.WithLabelValues(duplicateMeta).Inc()
   679  		delete(metas, duplicate)
   680  	}
   681  	f.mu.Unlock()
   682  }
   683  
   684  // DuplicateIDs returns slice of block ids that are filtered out by DefaultDeduplicateFilter.
   685  func (f *DefaultDeduplicateFilter) DuplicateIDs() []ulid.ULID {
   686  	return f.duplicateIDs
   687  }
   688  
   689  func contains(s1, s2 []ulid.ULID) bool {
   690  	for _, a := range s2 {
   691  		found := false
   692  		for _, e := range s1 {
   693  			if a.Compare(e) == 0 {
   694  				found = true
   695  				break
   696  			}
   697  		}
   698  		if !found {
   699  			return false
   700  		}
   701  	}
   702  	return true
   703  }
   704  
   705  var _ MetadataFilter = &ReplicaLabelRemover{}
   706  
   707  // ReplicaLabelRemover is a BaseFetcher filter that modifies external labels of existing blocks, it removes given replica labels from the metadata of blocks that have it.
   708  type ReplicaLabelRemover struct {
   709  	logger log.Logger
   710  
   711  	replicaLabels []string
   712  }
   713  
   714  // NewReplicaLabelRemover creates a ReplicaLabelRemover.
   715  func NewReplicaLabelRemover(logger log.Logger, replicaLabels []string) *ReplicaLabelRemover {
   716  	return &ReplicaLabelRemover{logger: logger, replicaLabels: replicaLabels}
   717  }
   718  
   719  // Filter modifies external labels of existing blocks, it removes given replica labels from the metadata of blocks that have it.
   720  func (r *ReplicaLabelRemover) Filter(_ context.Context, metas map[ulid.ULID]*metadata.Meta, synced GaugeVec, modified GaugeVec) error {
   721  	if len(r.replicaLabels) == 0 {
   722  		return nil
   723  	}
   724  
   725  	countReplicaLabelRemoved := make(map[string]int, len(metas))
   726  	for u, meta := range metas {
   727  		l := make(map[string]string)
   728  		for n, v := range meta.Thanos.Labels {
   729  			l[n] = v
   730  		}
   731  
   732  		for _, replicaLabel := range r.replicaLabels {
   733  			if _, exists := l[replicaLabel]; exists {
   734  				delete(l, replicaLabel)
   735  				countReplicaLabelRemoved[replicaLabel] += 1
   736  				modified.WithLabelValues(replicaRemovedMeta).Inc()
   737  			}
   738  		}
   739  		if len(l) == 0 {
   740  			level.Warn(r.logger).Log("msg", "block has no labels left, creating one", r.replicaLabels[0], "deduped")
   741  			l[r.replicaLabels[0]] = "deduped"
   742  		}
   743  
   744  		nm := *meta
   745  		nm.Thanos.Labels = l
   746  		metas[u] = &nm
   747  	}
   748  	for replicaLabelRemoved, count := range countReplicaLabelRemoved {
   749  		level.Debug(r.logger).Log("msg", "removed replica label", "label", replicaLabelRemoved, "count", count)
   750  	}
   751  	return nil
   752  }
   753  
   754  // ConsistencyDelayMetaFilter is a BaseFetcher filter that filters out blocks that are created before a specified consistency delay.
   755  // Not go-routine safe.
   756  type ConsistencyDelayMetaFilter struct {
   757  	logger           log.Logger
   758  	consistencyDelay time.Duration
   759  }
   760  
   761  // NewConsistencyDelayMetaFilter creates ConsistencyDelayMetaFilter.
   762  func NewConsistencyDelayMetaFilter(logger log.Logger, consistencyDelay time.Duration, reg prometheus.Registerer) *ConsistencyDelayMetaFilter {
   763  	if logger == nil {
   764  		logger = log.NewNopLogger()
   765  	}
   766  	_ = promauto.With(reg).NewGaugeFunc(prometheus.GaugeOpts{
   767  		Name: "consistency_delay_seconds",
   768  		Help: "Configured consistency delay in seconds.",
   769  	}, func() float64 {
   770  		return consistencyDelay.Seconds()
   771  	})
   772  
   773  	return &ConsistencyDelayMetaFilter{
   774  		logger:           logger,
   775  		consistencyDelay: consistencyDelay,
   776  	}
   777  }
   778  
   779  // Filter filters out blocks that filters blocks that have are created before a specified consistency delay.
   780  func (f *ConsistencyDelayMetaFilter) Filter(_ context.Context, metas map[ulid.ULID]*metadata.Meta, synced GaugeVec, modified GaugeVec) error {
   781  	for id, meta := range metas {
   782  		// TODO(khyatisoneji): Remove the checks about Thanos Source
   783  		//  by implementing delete delay to fetch metas.
   784  		// TODO(bwplotka): Check consistency delay based on file upload / modification time instead of ULID.
   785  		if ulid.Now()-id.Time() < uint64(f.consistencyDelay/time.Millisecond) &&
   786  			meta.Thanos.Source != metadata.BucketRepairSource &&
   787  			meta.Thanos.Source != metadata.CompactorSource &&
   788  			meta.Thanos.Source != metadata.CompactorRepairSource {
   789  
   790  			level.Debug(f.logger).Log("msg", "block is too fresh for now", "block", id)
   791  			synced.WithLabelValues(tooFreshMeta).Inc()
   792  			delete(metas, id)
   793  		}
   794  	}
   795  
   796  	return nil
   797  }
   798  
   799  // IgnoreDeletionMarkFilter is a filter that filters out the blocks that are marked for deletion after a given delay.
   800  // The delay duration is to make sure that the replacement block can be fetched before we filter out the old block.
   801  // Delay is not considered when computing DeletionMarkBlocks map.
   802  // Not go-routine safe.
   803  type IgnoreDeletionMarkFilter struct {
   804  	logger      log.Logger
   805  	delay       time.Duration
   806  	concurrency int
   807  	bkt         objstore.InstrumentedBucketReader
   808  
   809  	mtx             sync.Mutex
   810  	deletionMarkMap map[ulid.ULID]*metadata.DeletionMark
   811  }
   812  
   813  // NewIgnoreDeletionMarkFilter creates IgnoreDeletionMarkFilter.
   814  func NewIgnoreDeletionMarkFilter(logger log.Logger, bkt objstore.InstrumentedBucketReader, delay time.Duration, concurrency int) *IgnoreDeletionMarkFilter {
   815  	return &IgnoreDeletionMarkFilter{
   816  		logger:      logger,
   817  		bkt:         bkt,
   818  		delay:       delay,
   819  		concurrency: concurrency,
   820  	}
   821  }
   822  
   823  // DeletionMarkBlocks returns block ids that were marked for deletion.
   824  func (f *IgnoreDeletionMarkFilter) DeletionMarkBlocks() map[ulid.ULID]*metadata.DeletionMark {
   825  	f.mtx.Lock()
   826  	defer f.mtx.Unlock()
   827  
   828  	deletionMarkMap := make(map[ulid.ULID]*metadata.DeletionMark, len(f.deletionMarkMap))
   829  	for id, meta := range f.deletionMarkMap {
   830  		deletionMarkMap[id] = meta
   831  	}
   832  
   833  	return deletionMarkMap
   834  }
   835  
   836  // Filter filters out blocks that are marked for deletion after a given delay.
   837  // It also returns the blocks that can be deleted since they were uploaded delay duration before current time.
   838  func (f *IgnoreDeletionMarkFilter) Filter(ctx context.Context, metas map[ulid.ULID]*metadata.Meta, synced GaugeVec, modified GaugeVec) error {
   839  	deletionMarkMap := make(map[ulid.ULID]*metadata.DeletionMark)
   840  
   841  	// Make a copy of block IDs to check, in order to avoid concurrency issues
   842  	// between the scheduler and workers.
   843  	blockIDs := make([]ulid.ULID, 0, len(metas))
   844  	for id := range metas {
   845  		blockIDs = append(blockIDs, id)
   846  	}
   847  
   848  	var (
   849  		eg  errgroup.Group
   850  		ch  = make(chan ulid.ULID, f.concurrency)
   851  		mtx sync.Mutex
   852  	)
   853  
   854  	for i := 0; i < f.concurrency; i++ {
   855  		eg.Go(func() error {
   856  			var lastErr error
   857  			for id := range ch {
   858  				m := &metadata.DeletionMark{}
   859  				if err := metadata.ReadMarker(ctx, f.logger, f.bkt, id.String(), m); err != nil {
   860  					if errors.Cause(err) == metadata.ErrorMarkerNotFound {
   861  						continue
   862  					}
   863  					if errors.Cause(err) == metadata.ErrorUnmarshalMarker {
   864  						level.Warn(f.logger).Log("msg", "found partial deletion-mark.json; if we will see it happening often for the same block, consider manually deleting deletion-mark.json from the object storage", "block", id, "err", err)
   865  						continue
   866  					}
   867  					// Remember the last error and continue to drain the channel.
   868  					lastErr = err
   869  					continue
   870  				}
   871  
   872  				// Keep track of the blocks marked for deletion and filter them out if their
   873  				// deletion time is greater than the configured delay.
   874  				mtx.Lock()
   875  				deletionMarkMap[id] = m
   876  				if time.Since(time.Unix(m.DeletionTime, 0)).Seconds() > f.delay.Seconds() {
   877  					synced.WithLabelValues(MarkedForDeletionMeta).Inc()
   878  					delete(metas, id)
   879  				}
   880  				mtx.Unlock()
   881  			}
   882  
   883  			return lastErr
   884  		})
   885  	}
   886  
   887  	// Workers scheduled, distribute blocks.
   888  	eg.Go(func() error {
   889  		defer close(ch)
   890  
   891  		for _, id := range blockIDs {
   892  			select {
   893  			case ch <- id:
   894  				// Nothing to do.
   895  			case <-ctx.Done():
   896  				return ctx.Err()
   897  			}
   898  		}
   899  
   900  		return nil
   901  	})
   902  
   903  	if err := eg.Wait(); err != nil {
   904  		return errors.Wrap(err, "filter blocks marked for deletion")
   905  	}
   906  
   907  	f.mtx.Lock()
   908  	f.deletionMarkMap = deletionMarkMap
   909  	f.mtx.Unlock()
   910  
   911  	return nil
   912  }
   913  
   914  var (
   915  	SelectorSupportedRelabelActions = map[relabel.Action]struct{}{relabel.Keep: {}, relabel.Drop: {}, relabel.HashMod: {}}
   916  )
   917  
   918  // ParseRelabelConfig parses relabel configuration.
   919  // If supportedActions not specified, all relabel actions are valid.
   920  func ParseRelabelConfig(contentYaml []byte, supportedActions map[relabel.Action]struct{}) ([]*relabel.Config, error) {
   921  	var relabelConfig []*relabel.Config
   922  	if err := yaml.Unmarshal(contentYaml, &relabelConfig); err != nil {
   923  		return nil, errors.Wrap(err, "parsing relabel configuration")
   924  	}
   925  
   926  	if supportedActions != nil {
   927  		for _, cfg := range relabelConfig {
   928  			if _, ok := supportedActions[cfg.Action]; !ok {
   929  				return nil, errors.Errorf("unsupported relabel action: %v", cfg.Action)
   930  			}
   931  		}
   932  	}
   933  
   934  	return relabelConfig, nil
   935  }