github.com/grafana/pyroscope@v1.18.0/pkg/storegateway/block_filter.go (about) 1 package storegateway 2 3 import ( 4 "context" 5 "time" 6 7 "github.com/go-kit/log" 8 "github.com/go-kit/log/level" 9 "github.com/grafana/dskit/ring" 10 "github.com/oklog/ulid/v2" 11 "github.com/pkg/errors" 12 "github.com/prometheus/prometheus/model/timestamp" 13 14 "github.com/grafana/pyroscope/pkg/objstore" 15 "github.com/grafana/pyroscope/pkg/phlaredb/block" 16 "github.com/grafana/pyroscope/pkg/phlaredb/bucketindex" 17 ) 18 19 const ( 20 shardExcludedMeta = "shard-excluded" 21 ) 22 23 var errStoreGatewayUnhealthy = errors.New("store-gateway is unhealthy in the ring") 24 25 type ShardingStrategy interface { 26 // FilterUsers whose blocks should be loaded by the store-gateway. Returns the list of user IDs 27 // that should be synced by the store-gateway. 28 FilterUsers(ctx context.Context, userIDs []string) ([]string, error) 29 30 // FilterBlocks filters metas in-place keeping only blocks that should be loaded by the store-gateway. 31 // The provided loaded map contains blocks which have been previously returned by this function and 32 // are now loaded or loading in the store-gateway. 33 FilterBlocks(ctx context.Context, userID string, metas map[ulid.ULID]*block.Meta, loaded map[ulid.ULID]struct{}, synced block.GaugeVec) error 34 } 35 36 type shardingMetadataFilterAdapter struct { 37 userID string 38 strategy ShardingStrategy 39 40 // Keep track of the last blocks returned by the Filter() function. 41 lastBlocks map[ulid.ULID]struct{} 42 } 43 44 // SardingStrategy is a shuffle sharding strategy, based on the hash ring formed by store-gateways, 45 // where each tenant blocks are sharded across a subset of store-gateway instances. 46 type ShuffleShardingStrategy struct { 47 r *ring.Ring 48 instanceID string 49 instanceAddr string 50 limits ShardingLimits 51 logger log.Logger 52 } 53 54 // NewShuffleShardingStrategy makes a new ShuffleShardingStrategy. 55 func NewShuffleShardingStrategy(r *ring.Ring, instanceID, instanceAddr string, limits ShardingLimits, logger log.Logger) *ShuffleShardingStrategy { 56 return &ShuffleShardingStrategy{ 57 r: r, 58 instanceID: instanceID, 59 instanceAddr: instanceAddr, 60 limits: limits, 61 logger: logger, 62 } 63 } 64 65 // FilterUsers implements ShardingStrategy. 66 func (s *ShuffleShardingStrategy) FilterUsers(_ context.Context, userIDs []string) ([]string, error) { 67 // As a protection, ensure the store-gateway instance is healthy in the ring. It could also be missing 68 // in the ring if it was failing to heartbeat the ring and it got remove from another healthy store-gateway 69 // instance, because of the auto-forget feature. 70 if set, err := s.r.GetAllHealthy(BlocksOwnerSync); err != nil { 71 return nil, err 72 } else if !set.Includes(s.instanceAddr) { 73 return nil, errStoreGatewayUnhealthy 74 } 75 76 var filteredIDs []string 77 78 for _, userID := range userIDs { 79 subRing := GetShuffleShardingSubring(s.r, userID, s.limits) 80 81 // Include the user only if it belongs to this store-gateway shard. 82 if subRing.HasInstance(s.instanceID) { 83 filteredIDs = append(filteredIDs, userID) 84 } 85 } 86 87 return filteredIDs, nil 88 } 89 90 // FilterBlocks implements ShardingStrategy. 91 func (s *ShuffleShardingStrategy) FilterBlocks(_ context.Context, userID string, metas map[ulid.ULID]*block.Meta, loaded map[ulid.ULID]struct{}, synced block.GaugeVec) error { 92 // As a protection, ensure the store-gateway instance is healthy in the ring. If it's unhealthy because it's failing 93 // to heartbeat or get updates from the ring, or even removed from the ring because of the auto-forget feature, then 94 // keep the previously loaded blocks. 95 if set, err := s.r.GetAllHealthy(BlocksOwnerSync); err != nil || !set.Includes(s.instanceAddr) { 96 for blockID := range metas { 97 if _, ok := loaded[blockID]; ok { 98 level.Warn(s.logger).Log("msg", "store-gateway is unhealthy in the ring but block is kept because was previously loaded", "block", blockID.String(), "err", err) 99 } else { 100 level.Warn(s.logger).Log("msg", "store-gateway is unhealthy in the ring and block has been excluded because was not previously loaded", "block", blockID.String(), "err", err) 101 102 // Skip the block. 103 synced.WithLabelValues(shardExcludedMeta).Inc() 104 delete(metas, blockID) 105 } 106 } 107 108 return nil 109 } 110 111 r := GetShuffleShardingSubring(s.r, userID, s.limits) 112 bufDescs, bufHosts, bufZones := ring.MakeBuffersForGet() 113 114 for blockID := range metas { 115 key := block.HashBlockID(blockID) 116 117 // Check if the block is owned by the store-gateway 118 set, err := r.Get(key, BlocksOwnerSync, bufDescs, bufHosts, bufZones) 119 // If an error occurs while checking the ring, we keep the previously loaded blocks. 120 if err != nil { 121 if _, ok := loaded[blockID]; ok { 122 level.Warn(s.logger).Log("msg", "failed to check block owner but block is kept because was previously loaded", "block", blockID.String(), "err", err) 123 } else { 124 level.Warn(s.logger).Log("msg", "failed to check block owner and block has been excluded because was not previously loaded", "block", blockID.String(), "err", err) 125 126 // Skip the block. 127 synced.WithLabelValues(shardExcludedMeta).Inc() 128 delete(metas, blockID) 129 } 130 131 continue 132 } 133 134 // Keep the block if it is owned by the store-gateway. 135 if set.Includes(s.instanceAddr) { 136 continue 137 } 138 139 // The block is not owned by the store-gateway. However, if it's currently loaded 140 // we can safely unload it only once at least 1 authoritative owner is available 141 // for queries. 142 if _, ok := loaded[blockID]; ok { 143 // The ring Get() returns an error if there's no available instance. 144 if _, err := r.Get(key, BlocksOwnerRead, bufDescs, bufHosts, bufZones); err != nil { 145 // Keep the block. 146 continue 147 } 148 } 149 150 // The block is not owned by the store-gateway and there's at least 1 available 151 // authoritative owner available for queries, so we can filter it out (and unload 152 // it if it was loaded). 153 synced.WithLabelValues(shardExcludedMeta).Inc() 154 delete(metas, blockID) 155 } 156 157 return nil 158 } 159 160 // GetShuffleShardingSubring returns the subring to be used for a given user. This function 161 // should be used both by store-gateway and querier in order to guarantee the same logic is used. 162 func GetShuffleShardingSubring(ring *ring.Ring, userID string, limits ShardingLimits) ring.ReadRing { 163 shardSize := limits.StoreGatewayTenantShardSize(userID) 164 165 // A shard size of 0 means shuffle sharding is disabled for this specific user, 166 // so we just return the full ring so that blocks will be sharded across all store-gateways. 167 if shardSize <= 0 { 168 return ring 169 } 170 171 return ring.ShuffleShard(userID, shardSize) 172 } 173 174 func NewShardingMetadataFilterAdapter(userID string, strategy ShardingStrategy) block.MetadataFilter { 175 return &shardingMetadataFilterAdapter{ 176 userID: userID, 177 strategy: strategy, 178 lastBlocks: map[ulid.ULID]struct{}{}, 179 } 180 } 181 182 // Filter implements block.MetadataFilter. 183 // This function is NOT safe for use by multiple goroutines concurrently. 184 func (a *shardingMetadataFilterAdapter) Filter(ctx context.Context, metas map[ulid.ULID]*block.Meta, synced block.GaugeVec) error { 185 if err := a.strategy.FilterBlocks(ctx, a.userID, metas, a.lastBlocks, synced); err != nil { 186 return err 187 } 188 189 // Keep track of the last filtered blocks. 190 a.lastBlocks = make(map[ulid.ULID]struct{}, len(metas)) 191 for blockID := range metas { 192 a.lastBlocks[blockID] = struct{}{} 193 } 194 195 return nil 196 } 197 198 const minTimeExcludedMeta = "min-time-excluded" 199 200 // minTimeMetaFilter filters out blocks that contain the most recent data (based on block MinTime). 201 type minTimeMetaFilter struct { 202 limit time.Duration 203 } 204 205 func newMinTimeMetaFilter(limit time.Duration) *minTimeMetaFilter { 206 return &minTimeMetaFilter{limit: limit} 207 } 208 209 func (f *minTimeMetaFilter) Filter(_ context.Context, metas map[ulid.ULID]*block.Meta, synced block.GaugeVec) error { 210 if f.limit <= 0 { 211 return nil 212 } 213 214 limitTime := timestamp.FromTime(time.Now().Add(-f.limit)) 215 216 for id, m := range metas { 217 if int64(m.MinTime) < limitTime { 218 continue 219 } 220 221 synced.WithLabelValues(minTimeExcludedMeta).Inc() 222 delete(metas, id) 223 } 224 return nil 225 } 226 227 type MetadataFilterWithBucketIndex interface { 228 // FilterWithBucketIndex is like Thanos MetadataFilter.Filter() but it provides in input the bucket index too. 229 FilterWithBucketIndex(ctx context.Context, metas map[ulid.ULID]*block.Meta, idx *bucketindex.Index, synced block.GaugeVec) error 230 } 231 232 // IgnoreDeletionMarkFilter is like the Thanos IgnoreDeletionMarkFilter, but it also implements 233 // the MetadataFilterWithBucketIndex interface. 234 type IgnoreDeletionMarkFilter struct { 235 upstream *block.IgnoreDeletionMarkFilter 236 237 delay time.Duration 238 deletionMarkMap map[ulid.ULID]*block.DeletionMark 239 } 240 241 // NewIgnoreDeletionMarkFilter creates IgnoreDeletionMarkFilter. 242 func NewIgnoreDeletionMarkFilter(logger log.Logger, bkt objstore.BucketReader, delay time.Duration, concurrency int) *IgnoreDeletionMarkFilter { 243 return &IgnoreDeletionMarkFilter{ 244 upstream: block.NewIgnoreDeletionMarkFilter(logger, bkt, delay, concurrency), 245 delay: delay, 246 } 247 } 248 249 // DeletionMarkBlocks returns blocks that were marked for deletion. 250 func (f *IgnoreDeletionMarkFilter) DeletionMarkBlocks() map[ulid.ULID]*block.DeletionMark { 251 // If the cached deletion marks exist it means the filter function was called with the bucket 252 // index, so it's safe to return it. 253 if f.deletionMarkMap != nil { 254 return f.deletionMarkMap 255 } 256 257 return f.upstream.DeletionMarkBlocks() 258 } 259 260 // Filter implements block.MetadataFilter. 261 func (f *IgnoreDeletionMarkFilter) Filter(ctx context.Context, metas map[ulid.ULID]*block.Meta, synced block.GaugeVec) error { 262 return f.upstream.Filter(ctx, metas, synced) 263 } 264 265 // FilterWithBucketIndex implements MetadataFilterWithBucketIndex. 266 func (f *IgnoreDeletionMarkFilter) FilterWithBucketIndex(_ context.Context, metas map[ulid.ULID]*block.Meta, idx *bucketindex.Index, synced block.GaugeVec) error { 267 // Build a map of block deletion marks 268 marks := make(map[ulid.ULID]*block.DeletionMark, len(idx.BlockDeletionMarks)) 269 for _, mark := range idx.BlockDeletionMarks { 270 marks[mark.ID] = mark.BlockDeletionMark() 271 } 272 273 // Keep it cached. 274 f.deletionMarkMap = marks 275 276 for _, mark := range marks { 277 if _, ok := metas[mark.ID]; !ok { 278 continue 279 } 280 281 if time.Since(time.Unix(mark.DeletionTime, 0)).Seconds() > f.delay.Seconds() { 282 synced.WithLabelValues(block.MarkedForDeletionMeta).Inc() 283 delete(metas, mark.ID) 284 } 285 } 286 287 return nil 288 }