github.com/grafana/pyroscope@v1.18.0/pkg/storegateway/bucket_index_metadata_fetcher.go (about)

     1  // SPDX-License-Identifier: AGPL-3.0-only
     2  // Provenance-includes-location: https://github.com/cortexproject/cortex/blob/master/pkg/storegateway/bucket_index_metadata_fetcher.go
     3  // Provenance-includes-license: Apache-2.0
     4  // Provenance-includes-copyright: The Cortex Authors.
     5  
     6  package storegateway
     7  
     8  import (
     9  	"context"
    10  	"time"
    11  
    12  	"github.com/go-kit/log"
    13  	"github.com/go-kit/log/level"
    14  	"github.com/oklog/ulid/v2"
    15  	"github.com/pkg/errors"
    16  	"github.com/prometheus/client_golang/prometheus"
    17  
    18  	"github.com/grafana/pyroscope/pkg/objstore"
    19  	"github.com/grafana/pyroscope/pkg/phlaredb/block"
    20  	"github.com/grafana/pyroscope/pkg/phlaredb/bucketindex"
    21  )
    22  
    23  const (
    24  	corruptedBucketIndex     = "corrupted-bucket-index"
    25  	noBucketIndex            = "no-bucket-index"
    26  	bucketIndexOlderThanHour = "bucket-index-older-than-hour"
    27  )
    28  
    29  // BucketIndexMetadataFetcher is a Thanos MetadataFetcher implementation leveraging on the Mimir bucket index.
    30  type BucketIndexMetadataFetcher struct {
    31  	userID      string
    32  	bkt         objstore.Bucket
    33  	cfgProvider objstore.TenantConfigProvider
    34  	logger      log.Logger
    35  	filters     []block.MetadataFilter
    36  	metrics     *block.FetcherMetrics
    37  	fallback    block.MetadataFetcher
    38  }
    39  
    40  func NewBucketIndexMetadataFetcher(
    41  	userID string,
    42  	bkt objstore.Bucket,
    43  	cfgProvider objstore.TenantConfigProvider,
    44  	logger log.Logger,
    45  	reg prometheus.Registerer,
    46  	filters []block.MetadataFilter,
    47  ) *BucketIndexMetadataFetcher {
    48  	return &BucketIndexMetadataFetcher{
    49  		userID:      userID,
    50  		bkt:         bkt,
    51  		cfgProvider: cfgProvider,
    52  		logger:      logger,
    53  		filters:     filters,
    54  		metrics:     block.NewFetcherMetrics(reg, [][]string{{corruptedBucketIndex}, {noBucketIndex}, {minTimeExcludedMeta}}),
    55  	}
    56  }
    57  
    58  func (f *BucketIndexMetadataFetcher) fallbackFetch(ctx context.Context) (metas map[ulid.ULID]*block.Meta, partial map[ulid.ULID]error, err error) {
    59  	if f.fallback == nil {
    60  		userBucket := objstore.NewTenantBucketClient(f.userID, f.bkt, f.cfgProvider)
    61  		// Empty cache dir path disables on-disk cache.
    62  		// Provided that an in-memory cache is maintained,
    63  		// and store gateway does not have a persistent
    64  		// file system, on-disk cache is not helpful.
    65  		fetcher, err := block.NewMetaFetcherWithMetrics(f.logger, 16, userBucket, "", f.metrics, f.filters)
    66  		if err != nil {
    67  			return nil, nil, err
    68  		}
    69  		f.fallback = fetcher
    70  	}
    71  
    72  	return f.fallback.Fetch(ctx)
    73  }
    74  
    75  // Fetch implements block.MetadataFetcher. Not goroutine-safe.
    76  func (f *BucketIndexMetadataFetcher) Fetch(ctx context.Context) (metas map[ulid.ULID]*block.Meta, partial map[ulid.ULID]error, err error) {
    77  	f.metrics.ResetTx()
    78  
    79  	start := time.Now()
    80  
    81  	// Fetch the bucket index.
    82  	idx, err := bucketindex.ReadIndex(ctx, f.bkt, f.userID, f.cfgProvider, f.logger)
    83  	if errors.Is(err, bucketindex.ErrIndexNotFound) {
    84  		// This is a legit case happening when the first blocks of a tenant have recently been uploaded by ingesters
    85  		// and their bucket index has not been created yet.
    86  		defer func() {
    87  			f.metrics.Synced.WithLabelValues(noBucketIndex).Set(1)
    88  			f.metrics.Submit()
    89  		}()
    90  
    91  		level.Warn(f.logger).Log("msg", "no bucket index found, falling back to fetching directly from bucket", "user", f.userID)
    92  		return f.fallbackFetch(ctx)
    93  	}
    94  	if errors.Is(err, bucketindex.ErrIndexCorrupted) {
    95  		// In case a single tenant bucket index is corrupted, we don't want the store-gateway to fail at startup
    96  		// because unable to fetch blocks metadata. We'll act as if the tenant has no bucket index, but the query
    97  		// will fail anyway in the querier (the querier fails in the querier if bucket index is corrupted).
    98  		level.Error(f.logger).Log("msg", "corrupted bucket index found, falling back to fetching directly from bucket", "user", f.userID, "err", err)
    99  		defer func() {
   100  			f.metrics.Synced.WithLabelValues(corruptedBucketIndex).Set(1)
   101  			f.metrics.Submit()
   102  		}()
   103  
   104  		return f.fallbackFetch(ctx)
   105  	}
   106  	if err != nil {
   107  		f.metrics.Synced.WithLabelValues(block.FailedMeta).Set(1)
   108  		f.metrics.Submit()
   109  
   110  		return nil, nil, errors.Wrapf(err, "read bucket index")
   111  	}
   112  
   113  	// check if index is older than 1 hour, fallback to metafetcher
   114  	if time.Unix(idx.UpdatedAt, 0).Before(start.Add(-1 * time.Hour)) {
   115  		defer func() {
   116  			f.metrics.Synced.WithLabelValues(bucketIndexOlderThanHour).Set(1)
   117  			f.metrics.Submit()
   118  		}()
   119  
   120  		level.Warn(f.logger).Log("msg", "bucket index is older than 1 hour, falling back to fetching directly from bucket", "user", f.userID)
   121  		return f.fallbackFetch(ctx)
   122  	}
   123  
   124  	defer func() {
   125  		f.metrics.SyncDuration.Observe(time.Since(start).Seconds())
   126  		if err != nil {
   127  			f.metrics.SyncFailures.Inc()
   128  		}
   129  	}()
   130  	f.metrics.Syncs.Inc()
   131  
   132  	// Build block metas out of the index.
   133  	metas = make(map[ulid.ULID]*block.Meta, len(idx.Blocks))
   134  	for _, b := range idx.Blocks {
   135  		metas[b.ID] = b.Meta()
   136  	}
   137  
   138  	for _, filter := range f.filters {
   139  		var err error
   140  
   141  		// NOTE: filter can update synced metric accordingly to the reason of the exclude.
   142  		if customFilter, ok := filter.(MetadataFilterWithBucketIndex); ok {
   143  			err = customFilter.FilterWithBucketIndex(ctx, metas, idx, f.metrics.Synced)
   144  		} else {
   145  			err = filter.Filter(ctx, metas, f.metrics.Synced)
   146  		}
   147  
   148  		if err != nil {
   149  			return nil, nil, errors.Wrap(err, "filter metas")
   150  		}
   151  	}
   152  
   153  	f.metrics.Synced.WithLabelValues(block.LoadedMeta).Set(float64(len(metas)))
   154  	f.metrics.Submit()
   155  
   156  	return metas, nil, nil
   157  }