github.com/grafana/pyroscope@v1.18.0/pkg/validation/exporter/ring.go (about) 1 // SPDX-License-Identifier: AGPL-3.0-only 2 3 package exporter 4 5 import ( 6 "context" 7 "flag" 8 "fmt" 9 "net" 10 "strconv" 11 "time" 12 13 "github.com/go-kit/log" 14 "github.com/go-kit/log/level" 15 "github.com/grafana/dskit/kv" 16 "github.com/grafana/dskit/ring" 17 "github.com/grafana/dskit/services" 18 "github.com/pkg/errors" 19 "github.com/prometheus/client_golang/prometheus" 20 21 "github.com/grafana/pyroscope/pkg/util" 22 ) 23 24 const ( 25 // ringKey is the key under which we store the overrides-exporter's ring in the KVStore. 26 ringKey = "overrides-exporter" 27 28 // ringNumTokens is how many tokens each overrides-exporter should have in the 29 // ring. Overrides-exporter uses tokens to establish a ring leader, therefore 30 // only one token is needed. 31 ringNumTokens = 1 32 33 // ringAutoForgetUnhealthyPeriods is how many consecutive timeout periods an 34 // unhealthy instance in the ring will be automatically removed after. 35 ringAutoForgetUnhealthyPeriods = 4 36 37 // leaderToken is the special token that makes the owner the ring leader. 38 leaderToken = 0 39 ) 40 41 // ringOp is used as an instance state filter when obtaining instances from the 42 // ring. Instances in the LEAVING state are included to help minimise the number 43 // of leader changes during rollout and scaling operations. These instances will 44 // be forgotten after ringAutoForgetUnhealthyPeriods (see 45 // `KeepInstanceInTheRingOnShutdown`). 46 var ringOp = ring.NewOp([]ring.InstanceState{ring.ACTIVE, ring.LEAVING}, nil) 47 48 // RingConfig holds the configuration for the overrides-exporter ring. 49 type RingConfig struct { 50 Ring util.CommonRingConfig `yaml:",inline"` 51 52 // Ring stability (used to decrease token reshuffling on scale-up). 53 WaitStabilityMinDuration time.Duration `yaml:"wait_stability_min_duration" category:"advanced"` 54 WaitStabilityMaxDuration time.Duration `yaml:"wait_stability_max_duration" category:"advanced"` 55 } 56 57 // RegisterFlags configures this RingConfig to the given flag set and sets defaults. 58 func (c *RingConfig) RegisterFlags(f *flag.FlagSet, logger log.Logger) { 59 const flagNamePrefix = "overrides-exporter.ring." 60 const kvStorePrefix = "collectors/" 61 const componentPlural = "overrides-exporters" 62 c.Ring.RegisterFlags(flagNamePrefix, kvStorePrefix, componentPlural, f, logger) 63 // Ring stability flags. 64 f.DurationVar(&c.WaitStabilityMinDuration, flagNamePrefix+"wait-stability-min-duration", 0, "Minimum time to wait for ring stability at startup, if set to positive value. Set to 0 to disable.") 65 f.DurationVar(&c.WaitStabilityMaxDuration, flagNamePrefix+"wait-stability-max-duration", 5*time.Minute, "Maximum time to wait for ring stability at startup. If the overrides-exporter ring keeps changing after this period of time, it will start anyway.") 66 } 67 68 // toBasicLifecyclerConfig transforms a RingConfig into configuration that can be used to create a BasicLifecycler. 69 func (c *RingConfig) toBasicLifecyclerConfig(logger log.Logger) (ring.BasicLifecyclerConfig, error) { 70 instanceAddr, err := ring.GetInstanceAddr(c.Ring.InstanceAddr, c.Ring.InstanceInterfaceNames, logger, c.Ring.EnableIPv6) 71 if err != nil { 72 return ring.BasicLifecyclerConfig{}, err 73 } 74 75 instancePort := ring.GetInstancePort(c.Ring.InstancePort, c.Ring.ListenPort) 76 77 return ring.BasicLifecyclerConfig{ 78 ID: c.Ring.InstanceID, 79 Addr: net.JoinHostPort(instanceAddr, strconv.Itoa(instancePort)), 80 HeartbeatPeriod: c.Ring.HeartbeatPeriod, 81 HeartbeatTimeout: c.Ring.HeartbeatTimeout, 82 TokensObservePeriod: 0, 83 NumTokens: ringNumTokens, 84 KeepInstanceInTheRingOnShutdown: true, 85 }, nil 86 } 87 88 func (cfg *RingConfig) ToRingConfig() ring.Config { 89 rc := ring.Config{} 90 rc.KVStore = cfg.Ring.KVStore 91 rc.HeartbeatTimeout = cfg.Ring.HeartbeatTimeout 92 rc.ReplicationFactor = 1 93 rc.SubringCacheDisabled = true 94 95 return rc 96 } 97 98 // Validate the Config. 99 func (c *RingConfig) Validate() error { 100 if c.WaitStabilityMinDuration > 0 { 101 if c.WaitStabilityMinDuration > c.WaitStabilityMaxDuration { 102 return errors.New("-overrides-exporter.ring.wait-stability-max-duration must be greater or equal " + 103 "to -overrides-exporter.ring.wait-stability-min-duration") 104 } 105 } 106 return nil 107 } 108 109 // overridesExporterRing is a ring client that overrides-exporters can use to 110 // establish a leader replica that is the unique exporter of per-tenant limit metrics. 111 type overridesExporterRing struct { 112 services.Service 113 114 config RingConfig 115 116 client *ring.Ring 117 lifecycler *ring.BasicLifecycler 118 119 subserviceManager *services.Manager 120 subserviceWatcher *services.FailureWatcher 121 logger log.Logger 122 } 123 124 // newRing creates a new overridesExporterRing from the given configuration. 125 func newRing(config RingConfig, logger log.Logger, reg prometheus.Registerer) (*overridesExporterRing, error) { 126 reg = prometheus.WrapRegistererWithPrefix("pyroscope_", reg) 127 kvStore, err := kv.NewClient( 128 config.Ring.KVStore, 129 ring.GetCodec(), 130 kv.RegistererWithKVName(reg, "overrides-exporter-lifecycler"), 131 logger, 132 ) 133 if err != nil { 134 return nil, errors.Wrap(err, "failed to initialize overrides-exporter's KV store") 135 } 136 137 delegate := ring.BasicLifecyclerDelegate(ring.NewInstanceRegisterDelegate(ring.ACTIVE, ringNumTokens)) 138 delegate = ring.NewLeaveOnStoppingDelegate(delegate, logger) 139 delegate = ring.NewAutoForgetDelegate(ringAutoForgetUnhealthyPeriods*config.Ring.HeartbeatTimeout, delegate, logger) 140 141 lifecyclerConfig, err := config.toBasicLifecyclerConfig(logger) 142 if err != nil { 143 return nil, err 144 } 145 146 const ringName = "overrides-exporter" 147 lifecycler, err := ring.NewBasicLifecycler(lifecyclerConfig, ringName, ringKey, kvStore, delegate, logger, reg) 148 if err != nil { 149 return nil, errors.Wrap(err, "failed to initialize overrides-exporter's lifecycler") 150 } 151 152 ringClient, err := ring.New(config.ToRingConfig(), ringName, ringKey, logger, reg) 153 if err != nil { 154 return nil, errors.Wrap(err, "failed to create a overrides-exporter ring client") 155 } 156 157 manager, err := services.NewManager(lifecycler, ringClient) 158 if err != nil { 159 return nil, errors.Wrap(err, "failed to create service manager") 160 } 161 162 r := &overridesExporterRing{ 163 config: config, 164 client: ringClient, 165 lifecycler: lifecycler, 166 subserviceManager: manager, 167 subserviceWatcher: services.NewFailureWatcher(), 168 logger: logger, 169 } 170 r.Service = services.NewBasicService(r.starting, r.running, r.stopping) 171 return r, nil 172 } 173 174 // isLeader checks whether this instance is the leader replica that exports metrics for all tenants. 175 func (r *overridesExporterRing) isLeader() (bool, error) { 176 // Get the leader from the ring and check whether it's this replica. 177 rl, err := ringLeader(r.client) 178 if err != nil { 179 return false, err 180 } 181 182 return rl.Addr == r.lifecycler.GetInstanceAddr(), nil 183 } 184 185 // ringLeader returns the ring member that owns the special token. 186 func ringLeader(r ring.ReadRing) (*ring.InstanceDesc, error) { 187 rs, err := r.Get(leaderToken, ringOp, nil, nil, nil) 188 if err != nil { 189 return nil, errors.Wrapf(err, "failed to get a healthy instance for token %d", leaderToken) 190 } 191 if len(rs.Instances) != 1 { 192 return nil, fmt.Errorf("got %d instances for token %d (but expected 1)", len(rs.Instances), leaderToken) 193 } 194 195 return &rs.Instances[0], nil 196 } 197 198 func (r *overridesExporterRing) starting(ctx context.Context) error { 199 r.subserviceWatcher.WatchManager(r.subserviceManager) 200 if err := services.StartManagerAndAwaitHealthy(ctx, r.subserviceManager); err != nil { 201 return errors.Wrap(err, "unable to start overrides-exporter ring subservice manager") 202 } 203 204 level.Info(r.logger).Log("msg", "waiting until overrides-exporter is ACTIVE in the ring") 205 if err := ring.WaitInstanceState(ctx, r.client, r.lifecycler.GetInstanceID(), ring.ACTIVE); err != nil { 206 return errors.Wrap(err, "overrides-exporter failed to become ACTIVE in the ring") 207 } 208 level.Info(r.logger).Log("msg", "overrides-exporter is ACTIVE in the ring") 209 210 // In the event of a cluster cold start or scale up of 2+ overrides-exporter 211 // instances at the same time, the leader token may hop from one instance to 212 // another, creating high series churn for the limit metrics. Waiting for a 213 // stable ring helps to counteract that. 214 if r.config.WaitStabilityMinDuration > 0 { 215 minWaiting := r.config.WaitStabilityMinDuration 216 maxWaiting := r.config.WaitStabilityMaxDuration 217 218 level.Info(r.logger).Log("msg", "waiting until overrides-exporter ring topology is stable", "min_waiting", minWaiting.String(), "max_waiting", maxWaiting.String()) 219 if err := ring.WaitRingTokensStability(ctx, r.client, ringOp, minWaiting, maxWaiting); err != nil { 220 level.Warn(r.logger).Log("msg", "overrides-exporter ring topology is not stable after the max waiting time, proceeding anyway") 221 } else { 222 level.Info(r.logger).Log("msg", "overrides-exporter ring topology is stable") 223 } 224 } 225 return nil 226 } 227 228 func (r *overridesExporterRing) running(ctx context.Context) error { 229 select { 230 case <-ctx.Done(): 231 return nil 232 case err := <-r.subserviceWatcher.Chan(): 233 return errors.Wrap(err, "a subservice of overrides-exporter ring has failed") 234 } 235 } 236 237 func (r *overridesExporterRing) stopping(_ error) error { 238 return errors.Wrap( 239 services.StopManagerAndAwaitStopped(context.Background(), r.subserviceManager), 240 "failed to stop overrides-exporter's ring subservice manager", 241 ) 242 }