github.com/grafana/pyroscope@v1.18.0/pkg/storegateway/gateway.go (about) 1 package storegateway 2 3 import ( 4 "context" 5 "flag" 6 "time" 7 8 "github.com/go-kit/log" 9 "github.com/go-kit/log/level" 10 "github.com/grafana/dskit/kv" 11 "github.com/grafana/dskit/ring" 12 "github.com/grafana/dskit/services" 13 "github.com/pkg/errors" 14 "github.com/prometheus/client_golang/prometheus" 15 "github.com/prometheus/client_golang/prometheus/promauto" 16 17 phlareobj "github.com/grafana/pyroscope/pkg/objstore" 18 "github.com/grafana/pyroscope/pkg/util" 19 "github.com/grafana/pyroscope/pkg/validation" 20 ) 21 22 const ( 23 syncReasonInitial = "initial" 24 syncReasonPeriodic = "periodic" 25 syncReasonRingChange = "ring-change" 26 27 // ringAutoForgetUnhealthyPeriods is how many consecutive timeout periods an unhealthy instance 28 // in the ring will be automatically removed. 29 ringAutoForgetUnhealthyPeriods = 10 30 ) 31 32 // Validation errors. 33 var errInvalidTenantShardSize = errors.New("invalid tenant shard size, the value must be greater or equal to 0") 34 35 type Limits interface { 36 ShardingLimits 37 phlareobj.TenantConfigProvider 38 } 39 40 // ShardingLimits is the interface that should be implemented by the limits provider, 41 // limiting the scope of the limits to the ones required by sharding strategies. 42 type ShardingLimits interface { 43 StoreGatewayTenantShardSize(tenantID string) int 44 } 45 46 type StoreGateway struct { 47 services.Service 48 logger log.Logger 49 50 gatewayCfg Config 51 stores *BucketStores 52 53 // Ring used for sharding blocks. 54 ringLifecycler *ring.BasicLifecycler 55 ring *ring.Ring 56 57 // Subservices manager (ring, lifecycler) 58 subservices *services.Manager 59 subservicesWatcher *services.FailureWatcher 60 61 bucketSync *prometheus.CounterVec 62 } 63 64 type Config struct { 65 ShardingRing RingConfig `yaml:"sharding_ring" doc:"description=The hash ring configuration."` 66 BucketStoreConfig BucketStoreConfig `yaml:"bucket_store,omitempty"` 67 } 68 69 // RegisterFlags registers the Config flags. 70 func (cfg *Config) RegisterFlags(f *flag.FlagSet, logger log.Logger) { 71 cfg.ShardingRing.RegisterFlags(f, logger) 72 cfg.BucketStoreConfig.RegisterFlags(f, logger) 73 } 74 75 func (c *Config) Validate(limits validation.Limits) error { 76 if err := c.BucketStoreConfig.Validate(util.Logger); err != nil { 77 return errors.Wrap(err, "bucket store config") 78 } 79 if limits.StoreGatewayTenantShardSize < 0 { 80 return errInvalidTenantShardSize 81 } 82 83 return nil 84 } 85 86 func NewStoreGateway(gatewayCfg Config, storageBucket phlareobj.Bucket, limits Limits, logger log.Logger, reg prometheus.Registerer) (*StoreGateway, error) { 87 ringStore, err := kv.NewClient( 88 gatewayCfg.ShardingRing.Ring.KVStore, 89 ring.GetCodec(), 90 kv.RegistererWithKVName(prometheus.WrapRegistererWithPrefix("pyroscope_", reg), "store-gateway"), 91 logger, 92 ) 93 if err != nil { 94 return nil, errors.Wrap(err, "create KV store client") 95 } 96 97 return newStoreGateway(gatewayCfg, storageBucket, ringStore, limits, logger, reg) 98 } 99 100 func newStoreGateway(gatewayCfg Config, storageBucket phlareobj.Bucket, ringStore kv.Client, limits Limits, logger log.Logger, reg prometheus.Registerer) (*StoreGateway, error) { 101 var err error 102 103 g := &StoreGateway{ 104 gatewayCfg: gatewayCfg, 105 logger: logger, 106 bucketSync: promauto.With(reg).NewCounterVec(prometheus.CounterOpts{ 107 Name: "pyroscope_storegateway_bucket_sync_total", 108 Help: "Total number of times the bucket sync operation triggered.", 109 }, []string{"reason"}), 110 } 111 112 // Init metrics. 113 g.bucketSync.WithLabelValues(syncReasonInitial) 114 g.bucketSync.WithLabelValues(syncReasonPeriodic) 115 g.bucketSync.WithLabelValues(syncReasonRingChange) 116 117 // Init sharding strategy. 118 var shardingStrategy ShardingStrategy 119 120 lifecyclerCfg, err := gatewayCfg.ShardingRing.ToLifecyclerConfig(logger) 121 if err != nil { 122 return nil, errors.Wrap(err, "invalid ring lifecycler config") 123 } 124 125 // Define lifecycler delegates in reverse order (last to be called defined first because they're 126 // chained via "next delegate"). 127 delegate := ring.BasicLifecyclerDelegate(ring.NewInstanceRegisterDelegate(ring.JOINING, RingNumTokens)) 128 delegate = ring.NewLeaveOnStoppingDelegate(delegate, logger) 129 delegate = ring.NewTokensPersistencyDelegate(gatewayCfg.ShardingRing.TokensFilePath, ring.JOINING, delegate, logger) 130 delegate = ring.NewAutoForgetDelegate(ringAutoForgetUnhealthyPeriods*gatewayCfg.ShardingRing.Ring.HeartbeatTimeout, delegate, logger) 131 132 g.ringLifecycler, err = ring.NewBasicLifecycler(lifecyclerCfg, RingNameForServer, RingKey, ringStore, delegate, logger, prometheus.WrapRegistererWithPrefix("pyroscope_", reg)) 133 if err != nil { 134 return nil, errors.Wrap(err, "create ring lifecycler") 135 } 136 137 ringCfg := gatewayCfg.ShardingRing.ToRingConfig() 138 g.ring, err = ring.NewWithStoreClientAndStrategy(ringCfg, RingNameForServer, RingKey, ringStore, ring.NewIgnoreUnhealthyInstancesReplicationStrategy(), prometheus.WrapRegistererWithPrefix("pyroscope_", reg), logger) 139 if err != nil { 140 return nil, errors.Wrap(err, "create ring client") 141 } 142 143 shardingStrategy = NewShuffleShardingStrategy(g.ring, lifecyclerCfg.ID, lifecyclerCfg.Addr, limits, logger) 144 145 g.stores, err = NewBucketStores(gatewayCfg.BucketStoreConfig, shardingStrategy, storageBucket, limits, logger, prometheus.WrapRegistererWith(prometheus.Labels{"component": "store-gateway"}, reg)) 146 if err != nil { 147 return nil, errors.Wrap(err, "create bucket stores") 148 } 149 150 g.Service = services.NewBasicService(g.starting, g.running, g.stopping) 151 152 return g, nil 153 } 154 155 func (g *StoreGateway) starting(ctx context.Context) (err error) { 156 // In case this function will return error we want to unregister the instance 157 // from the ring. We do it ensuring dependencies are gracefully stopped if they 158 // were already started. 159 defer func() { 160 if err == nil || g.subservices == nil { 161 return 162 } 163 164 if stopErr := services.StopManagerAndAwaitStopped(context.Background(), g.subservices); stopErr != nil { 165 level.Error(g.logger).Log("msg", "failed to gracefully stop store-gateway dependencies", "err", stopErr) 166 } 167 }() 168 169 // First of all we register the instance in the ring and wait 170 // until the lifecycler successfully started. 171 if g.subservices, err = services.NewManager(g.ringLifecycler, g.ring); err != nil { 172 return errors.Wrap(err, "unable to start store-gateway dependencies") 173 } 174 175 g.subservicesWatcher = services.NewFailureWatcher() 176 g.subservicesWatcher.WatchManager(g.subservices) 177 178 if err = services.StartManagerAndAwaitHealthy(ctx, g.subservices); err != nil { 179 return errors.Wrap(err, "unable to start store-gateway dependencies") 180 } 181 182 // Wait until the ring client detected this instance in the JOINING state to 183 // make sure that when we'll run the initial sync we already know the tokens 184 // assigned to this instance. 185 level.Info(g.logger).Log("msg", "waiting until store-gateway is JOINING in the ring") 186 if err := ring.WaitInstanceState(ctx, g.ring, g.ringLifecycler.GetInstanceID(), ring.JOINING); err != nil { 187 return err 188 } 189 level.Info(g.logger).Log("msg", "store-gateway is JOINING in the ring") 190 191 // In the event of a cluster cold start or scale up of 2+ store-gateway instances at the same 192 // time, we may end up in a situation where each new store-gateway instance starts at a slightly 193 // different time and thus each one starts with a different state of the ring. It's better 194 // to just wait a short time for ring stability. 195 if g.gatewayCfg.ShardingRing.WaitStabilityMinDuration > 0 { 196 minWaiting := g.gatewayCfg.ShardingRing.WaitStabilityMinDuration 197 maxWaiting := g.gatewayCfg.ShardingRing.WaitStabilityMaxDuration 198 199 level.Info(g.logger).Log("msg", "waiting until store-gateway ring topology is stable", "min_waiting", minWaiting.String(), "max_waiting", maxWaiting.String()) 200 if err := ring.WaitRingTokensStability(ctx, g.ring, BlocksOwnerSync, minWaiting, maxWaiting); err != nil { 201 level.Warn(g.logger).Log("msg", "store-gateway ring topology is not stable after the max waiting time, proceeding anyway") 202 } else { 203 level.Info(g.logger).Log("msg", "store-gateway ring topology is stable") 204 } 205 } 206 207 // At this point, if sharding is enabled, the instance is registered with some tokens 208 // and we can run the initial synchronization. 209 g.bucketSync.WithLabelValues(syncReasonInitial).Inc() 210 if err = g.stores.InitialSync(ctx); err != nil { 211 return errors.Wrap(err, "initial blocks synchronization") 212 } 213 214 // Now that the initial sync is done, we should have loaded all blocks 215 // assigned to our shard, so we can switch to ACTIVE and start serving 216 // requests. 217 if err = g.ringLifecycler.ChangeState(ctx, ring.ACTIVE); err != nil { 218 return errors.Wrapf(err, "switch instance to %s in the ring", ring.ACTIVE) 219 } 220 221 // Wait until the ring client detected this instance in the ACTIVE state to 222 // make sure that when we'll run the loop it won't be detected as a ring 223 // topology change. 224 level.Info(g.logger).Log("msg", "waiting until store-gateway is ACTIVE in the ring") 225 if err := ring.WaitInstanceState(ctx, g.ring, g.ringLifecycler.GetInstanceID(), ring.ACTIVE); err != nil { 226 return err 227 } 228 level.Info(g.logger).Log("msg", "store-gateway is ACTIVE in the ring") 229 230 return nil 231 } 232 233 func (g *StoreGateway) running(ctx context.Context) error { 234 // Apply a jitter to the sync frequency in order to increase the probability 235 // of hitting the shared cache (if any). 236 syncTicker := time.NewTicker(util.DurationWithJitter(g.gatewayCfg.BucketStoreConfig.SyncInterval, 0.2)) 237 defer syncTicker.Stop() 238 239 ringLastState, _ := g.ring.GetAllHealthy(BlocksOwnerSync) // nolint:errcheck 240 ringTicker := time.NewTicker(util.DurationWithJitter(g.gatewayCfg.ShardingRing.RingCheckPeriod, 0.2)) 241 defer ringTicker.Stop() 242 243 for { 244 select { 245 case <-syncTicker.C: 246 g.syncStores(ctx, syncReasonPeriodic) 247 case <-ringTicker.C: 248 // We ignore the error because in case of error it will return an empty 249 // replication set which we use to compare with the previous state. 250 currRingState, _ := g.ring.GetAllHealthy(BlocksOwnerSync) // nolint:errcheck 251 252 if ring.HasReplicationSetChanged(ringLastState, currRingState) { 253 ringLastState = currRingState 254 g.syncStores(ctx, syncReasonRingChange) 255 } 256 case <-ctx.Done(): 257 return nil 258 case err := <-g.subservicesWatcher.Chan(): 259 return errors.Wrap(err, "store gateway subservice failed") 260 } 261 } 262 } 263 264 func (g *StoreGateway) stopping(_ error) error { 265 if g.subservices != nil { 266 if err := services.StopManagerAndAwaitStopped(context.Background(), g.subservices); err != nil { 267 level.Warn(g.logger).Log("msg", "failed to stop store-gateway subservices", "err", err) 268 } 269 } 270 271 return nil 272 } 273 274 func (g *StoreGateway) syncStores(ctx context.Context, reason string) { 275 level.Info(g.logger).Log("msg", "synchronizing TSDB blocks for all users", "reason", reason) 276 g.bucketSync.WithLabelValues(reason).Inc() 277 278 if err := g.stores.SyncBlocks(ctx); err != nil { 279 level.Warn(g.logger).Log("msg", "failed to synchronize TSDB blocks", "reason", reason, "err", err) 280 } else { 281 level.Info(g.logger).Log("msg", "successfully synchronized TSDB blocks for all users", "reason", reason) 282 } 283 }