github.com/grafana/pyroscope@v1.18.0/pkg/storegateway/block_filter.go (about)

     1  package storegateway
     2  
     3  import (
     4  	"context"
     5  	"time"
     6  
     7  	"github.com/go-kit/log"
     8  	"github.com/go-kit/log/level"
     9  	"github.com/grafana/dskit/ring"
    10  	"github.com/oklog/ulid/v2"
    11  	"github.com/pkg/errors"
    12  	"github.com/prometheus/prometheus/model/timestamp"
    13  
    14  	"github.com/grafana/pyroscope/pkg/objstore"
    15  	"github.com/grafana/pyroscope/pkg/phlaredb/block"
    16  	"github.com/grafana/pyroscope/pkg/phlaredb/bucketindex"
    17  )
    18  
    19  const (
    20  	shardExcludedMeta = "shard-excluded"
    21  )
    22  
    23  var errStoreGatewayUnhealthy = errors.New("store-gateway is unhealthy in the ring")
    24  
    25  type ShardingStrategy interface {
    26  	// FilterUsers whose blocks should be loaded by the store-gateway. Returns the list of user IDs
    27  	// that should be synced by the store-gateway.
    28  	FilterUsers(ctx context.Context, userIDs []string) ([]string, error)
    29  
    30  	// FilterBlocks filters metas in-place keeping only blocks that should be loaded by the store-gateway.
    31  	// The provided loaded map contains blocks which have been previously returned by this function and
    32  	// are now loaded or loading in the store-gateway.
    33  	FilterBlocks(ctx context.Context, userID string, metas map[ulid.ULID]*block.Meta, loaded map[ulid.ULID]struct{}, synced block.GaugeVec) error
    34  }
    35  
    36  type shardingMetadataFilterAdapter struct {
    37  	userID   string
    38  	strategy ShardingStrategy
    39  
    40  	// Keep track of the last blocks returned by the Filter() function.
    41  	lastBlocks map[ulid.ULID]struct{}
    42  }
    43  
    44  // SardingStrategy is a shuffle sharding strategy, based on the hash ring formed by store-gateways,
    45  // where each tenant blocks are sharded across a subset of store-gateway instances.
    46  type ShuffleShardingStrategy struct {
    47  	r            *ring.Ring
    48  	instanceID   string
    49  	instanceAddr string
    50  	limits       ShardingLimits
    51  	logger       log.Logger
    52  }
    53  
    54  // NewShuffleShardingStrategy makes a new ShuffleShardingStrategy.
    55  func NewShuffleShardingStrategy(r *ring.Ring, instanceID, instanceAddr string, limits ShardingLimits, logger log.Logger) *ShuffleShardingStrategy {
    56  	return &ShuffleShardingStrategy{
    57  		r:            r,
    58  		instanceID:   instanceID,
    59  		instanceAddr: instanceAddr,
    60  		limits:       limits,
    61  		logger:       logger,
    62  	}
    63  }
    64  
    65  // FilterUsers implements ShardingStrategy.
    66  func (s *ShuffleShardingStrategy) FilterUsers(_ context.Context, userIDs []string) ([]string, error) {
    67  	// As a protection, ensure the store-gateway instance is healthy in the ring. It could also be missing
    68  	// in the ring if it was failing to heartbeat the ring and it got remove from another healthy store-gateway
    69  	// instance, because of the auto-forget feature.
    70  	if set, err := s.r.GetAllHealthy(BlocksOwnerSync); err != nil {
    71  		return nil, err
    72  	} else if !set.Includes(s.instanceAddr) {
    73  		return nil, errStoreGatewayUnhealthy
    74  	}
    75  
    76  	var filteredIDs []string
    77  
    78  	for _, userID := range userIDs {
    79  		subRing := GetShuffleShardingSubring(s.r, userID, s.limits)
    80  
    81  		// Include the user only if it belongs to this store-gateway shard.
    82  		if subRing.HasInstance(s.instanceID) {
    83  			filteredIDs = append(filteredIDs, userID)
    84  		}
    85  	}
    86  
    87  	return filteredIDs, nil
    88  }
    89  
    90  // FilterBlocks implements ShardingStrategy.
    91  func (s *ShuffleShardingStrategy) FilterBlocks(_ context.Context, userID string, metas map[ulid.ULID]*block.Meta, loaded map[ulid.ULID]struct{}, synced block.GaugeVec) error {
    92  	// As a protection, ensure the store-gateway instance is healthy in the ring. If it's unhealthy because it's failing
    93  	// to heartbeat or get updates from the ring, or even removed from the ring because of the auto-forget feature, then
    94  	// keep the previously loaded blocks.
    95  	if set, err := s.r.GetAllHealthy(BlocksOwnerSync); err != nil || !set.Includes(s.instanceAddr) {
    96  		for blockID := range metas {
    97  			if _, ok := loaded[blockID]; ok {
    98  				level.Warn(s.logger).Log("msg", "store-gateway is unhealthy in the ring but block is kept because was previously loaded", "block", blockID.String(), "err", err)
    99  			} else {
   100  				level.Warn(s.logger).Log("msg", "store-gateway is unhealthy in the ring and block has been excluded because was not previously loaded", "block", blockID.String(), "err", err)
   101  
   102  				// Skip the block.
   103  				synced.WithLabelValues(shardExcludedMeta).Inc()
   104  				delete(metas, blockID)
   105  			}
   106  		}
   107  
   108  		return nil
   109  	}
   110  
   111  	r := GetShuffleShardingSubring(s.r, userID, s.limits)
   112  	bufDescs, bufHosts, bufZones := ring.MakeBuffersForGet()
   113  
   114  	for blockID := range metas {
   115  		key := block.HashBlockID(blockID)
   116  
   117  		// Check if the block is owned by the store-gateway
   118  		set, err := r.Get(key, BlocksOwnerSync, bufDescs, bufHosts, bufZones)
   119  		// If an error occurs while checking the ring, we keep the previously loaded blocks.
   120  		if err != nil {
   121  			if _, ok := loaded[blockID]; ok {
   122  				level.Warn(s.logger).Log("msg", "failed to check block owner but block is kept because was previously loaded", "block", blockID.String(), "err", err)
   123  			} else {
   124  				level.Warn(s.logger).Log("msg", "failed to check block owner and block has been excluded because was not previously loaded", "block", blockID.String(), "err", err)
   125  
   126  				// Skip the block.
   127  				synced.WithLabelValues(shardExcludedMeta).Inc()
   128  				delete(metas, blockID)
   129  			}
   130  
   131  			continue
   132  		}
   133  
   134  		// Keep the block if it is owned by the store-gateway.
   135  		if set.Includes(s.instanceAddr) {
   136  			continue
   137  		}
   138  
   139  		// The block is not owned by the store-gateway. However, if it's currently loaded
   140  		// we can safely unload it only once at least 1 authoritative owner is available
   141  		// for queries.
   142  		if _, ok := loaded[blockID]; ok {
   143  			// The ring Get() returns an error if there's no available instance.
   144  			if _, err := r.Get(key, BlocksOwnerRead, bufDescs, bufHosts, bufZones); err != nil {
   145  				// Keep the block.
   146  				continue
   147  			}
   148  		}
   149  
   150  		// The block is not owned by the store-gateway and there's at least 1 available
   151  		// authoritative owner available for queries, so we can filter it out (and unload
   152  		// it if it was loaded).
   153  		synced.WithLabelValues(shardExcludedMeta).Inc()
   154  		delete(metas, blockID)
   155  	}
   156  
   157  	return nil
   158  }
   159  
   160  // GetShuffleShardingSubring returns the subring to be used for a given user. This function
   161  // should be used both by store-gateway and querier in order to guarantee the same logic is used.
   162  func GetShuffleShardingSubring(ring *ring.Ring, userID string, limits ShardingLimits) ring.ReadRing {
   163  	shardSize := limits.StoreGatewayTenantShardSize(userID)
   164  
   165  	// A shard size of 0 means shuffle sharding is disabled for this specific user,
   166  	// so we just return the full ring so that blocks will be sharded across all store-gateways.
   167  	if shardSize <= 0 {
   168  		return ring
   169  	}
   170  
   171  	return ring.ShuffleShard(userID, shardSize)
   172  }
   173  
   174  func NewShardingMetadataFilterAdapter(userID string, strategy ShardingStrategy) block.MetadataFilter {
   175  	return &shardingMetadataFilterAdapter{
   176  		userID:     userID,
   177  		strategy:   strategy,
   178  		lastBlocks: map[ulid.ULID]struct{}{},
   179  	}
   180  }
   181  
   182  // Filter implements block.MetadataFilter.
   183  // This function is NOT safe for use by multiple goroutines concurrently.
   184  func (a *shardingMetadataFilterAdapter) Filter(ctx context.Context, metas map[ulid.ULID]*block.Meta, synced block.GaugeVec) error {
   185  	if err := a.strategy.FilterBlocks(ctx, a.userID, metas, a.lastBlocks, synced); err != nil {
   186  		return err
   187  	}
   188  
   189  	// Keep track of the last filtered blocks.
   190  	a.lastBlocks = make(map[ulid.ULID]struct{}, len(metas))
   191  	for blockID := range metas {
   192  		a.lastBlocks[blockID] = struct{}{}
   193  	}
   194  
   195  	return nil
   196  }
   197  
   198  const minTimeExcludedMeta = "min-time-excluded"
   199  
   200  // minTimeMetaFilter filters out blocks that contain the most recent data (based on block MinTime).
   201  type minTimeMetaFilter struct {
   202  	limit time.Duration
   203  }
   204  
   205  func newMinTimeMetaFilter(limit time.Duration) *minTimeMetaFilter {
   206  	return &minTimeMetaFilter{limit: limit}
   207  }
   208  
   209  func (f *minTimeMetaFilter) Filter(_ context.Context, metas map[ulid.ULID]*block.Meta, synced block.GaugeVec) error {
   210  	if f.limit <= 0 {
   211  		return nil
   212  	}
   213  
   214  	limitTime := timestamp.FromTime(time.Now().Add(-f.limit))
   215  
   216  	for id, m := range metas {
   217  		if int64(m.MinTime) < limitTime {
   218  			continue
   219  		}
   220  
   221  		synced.WithLabelValues(minTimeExcludedMeta).Inc()
   222  		delete(metas, id)
   223  	}
   224  	return nil
   225  }
   226  
   227  type MetadataFilterWithBucketIndex interface {
   228  	// FilterWithBucketIndex is like Thanos MetadataFilter.Filter() but it provides in input the bucket index too.
   229  	FilterWithBucketIndex(ctx context.Context, metas map[ulid.ULID]*block.Meta, idx *bucketindex.Index, synced block.GaugeVec) error
   230  }
   231  
   232  // IgnoreDeletionMarkFilter is like the Thanos IgnoreDeletionMarkFilter, but it also implements
   233  // the MetadataFilterWithBucketIndex interface.
   234  type IgnoreDeletionMarkFilter struct {
   235  	upstream *block.IgnoreDeletionMarkFilter
   236  
   237  	delay           time.Duration
   238  	deletionMarkMap map[ulid.ULID]*block.DeletionMark
   239  }
   240  
   241  // NewIgnoreDeletionMarkFilter creates IgnoreDeletionMarkFilter.
   242  func NewIgnoreDeletionMarkFilter(logger log.Logger, bkt objstore.BucketReader, delay time.Duration, concurrency int) *IgnoreDeletionMarkFilter {
   243  	return &IgnoreDeletionMarkFilter{
   244  		upstream: block.NewIgnoreDeletionMarkFilter(logger, bkt, delay, concurrency),
   245  		delay:    delay,
   246  	}
   247  }
   248  
   249  // DeletionMarkBlocks returns blocks that were marked for deletion.
   250  func (f *IgnoreDeletionMarkFilter) DeletionMarkBlocks() map[ulid.ULID]*block.DeletionMark {
   251  	// If the cached deletion marks exist it means the filter function was called with the bucket
   252  	// index, so it's safe to return it.
   253  	if f.deletionMarkMap != nil {
   254  		return f.deletionMarkMap
   255  	}
   256  
   257  	return f.upstream.DeletionMarkBlocks()
   258  }
   259  
   260  // Filter implements block.MetadataFilter.
   261  func (f *IgnoreDeletionMarkFilter) Filter(ctx context.Context, metas map[ulid.ULID]*block.Meta, synced block.GaugeVec) error {
   262  	return f.upstream.Filter(ctx, metas, synced)
   263  }
   264  
   265  // FilterWithBucketIndex implements MetadataFilterWithBucketIndex.
   266  func (f *IgnoreDeletionMarkFilter) FilterWithBucketIndex(_ context.Context, metas map[ulid.ULID]*block.Meta, idx *bucketindex.Index, synced block.GaugeVec) error {
   267  	// Build a map of block deletion marks
   268  	marks := make(map[ulid.ULID]*block.DeletionMark, len(idx.BlockDeletionMarks))
   269  	for _, mark := range idx.BlockDeletionMarks {
   270  		marks[mark.ID] = mark.BlockDeletionMark()
   271  	}
   272  
   273  	// Keep it cached.
   274  	f.deletionMarkMap = marks
   275  
   276  	for _, mark := range marks {
   277  		if _, ok := metas[mark.ID]; !ok {
   278  			continue
   279  		}
   280  
   281  		if time.Since(time.Unix(mark.DeletionTime, 0)).Seconds() > f.delay.Seconds() {
   282  			synced.WithLabelValues(block.MarkedForDeletionMeta).Inc()
   283  			delete(metas, mark.ID)
   284  		}
   285  	}
   286  
   287  	return nil
   288  }