github.com/grafana/pyroscope@v1.18.0/pkg/validation/exporter/ring.go (about)

     1  // SPDX-License-Identifier: AGPL-3.0-only
     2  
     3  package exporter
     4  
     5  import (
     6  	"context"
     7  	"flag"
     8  	"fmt"
     9  	"net"
    10  	"strconv"
    11  	"time"
    12  
    13  	"github.com/go-kit/log"
    14  	"github.com/go-kit/log/level"
    15  	"github.com/grafana/dskit/kv"
    16  	"github.com/grafana/dskit/ring"
    17  	"github.com/grafana/dskit/services"
    18  	"github.com/pkg/errors"
    19  	"github.com/prometheus/client_golang/prometheus"
    20  
    21  	"github.com/grafana/pyroscope/pkg/util"
    22  )
    23  
    24  const (
    25  	// ringKey is the key under which we store the overrides-exporter's ring in the KVStore.
    26  	ringKey = "overrides-exporter"
    27  
    28  	// ringNumTokens is how many tokens each overrides-exporter should have in the
    29  	// ring. Overrides-exporter uses tokens to establish a ring leader, therefore
    30  	// only one token is needed.
    31  	ringNumTokens = 1
    32  
    33  	// ringAutoForgetUnhealthyPeriods is how many consecutive timeout periods an
    34  	// unhealthy instance in the ring will be automatically removed after.
    35  	ringAutoForgetUnhealthyPeriods = 4
    36  
    37  	// leaderToken is the special token that makes the owner the ring leader.
    38  	leaderToken = 0
    39  )
    40  
    41  // ringOp is used as an instance state filter when obtaining instances from the
    42  // ring. Instances in the LEAVING state are included to help minimise the number
    43  // of leader changes during rollout and scaling operations. These instances will
    44  // be forgotten after ringAutoForgetUnhealthyPeriods (see
    45  // `KeepInstanceInTheRingOnShutdown`).
    46  var ringOp = ring.NewOp([]ring.InstanceState{ring.ACTIVE, ring.LEAVING}, nil)
    47  
    48  // RingConfig holds the configuration for the overrides-exporter ring.
    49  type RingConfig struct {
    50  	Ring util.CommonRingConfig `yaml:",inline"`
    51  
    52  	// Ring stability (used to decrease token reshuffling on scale-up).
    53  	WaitStabilityMinDuration time.Duration `yaml:"wait_stability_min_duration" category:"advanced"`
    54  	WaitStabilityMaxDuration time.Duration `yaml:"wait_stability_max_duration" category:"advanced"`
    55  }
    56  
    57  // RegisterFlags configures this RingConfig to the given flag set and sets defaults.
    58  func (c *RingConfig) RegisterFlags(f *flag.FlagSet, logger log.Logger) {
    59  	const flagNamePrefix = "overrides-exporter.ring."
    60  	const kvStorePrefix = "collectors/"
    61  	const componentPlural = "overrides-exporters"
    62  	c.Ring.RegisterFlags(flagNamePrefix, kvStorePrefix, componentPlural, f, logger)
    63  	// Ring stability flags.
    64  	f.DurationVar(&c.WaitStabilityMinDuration, flagNamePrefix+"wait-stability-min-duration", 0, "Minimum time to wait for ring stability at startup, if set to positive value. Set to 0 to disable.")
    65  	f.DurationVar(&c.WaitStabilityMaxDuration, flagNamePrefix+"wait-stability-max-duration", 5*time.Minute, "Maximum time to wait for ring stability at startup. If the overrides-exporter ring keeps changing after this period of time, it will start anyway.")
    66  }
    67  
    68  // toBasicLifecyclerConfig transforms a RingConfig into configuration that can be used to create a BasicLifecycler.
    69  func (c *RingConfig) toBasicLifecyclerConfig(logger log.Logger) (ring.BasicLifecyclerConfig, error) {
    70  	instanceAddr, err := ring.GetInstanceAddr(c.Ring.InstanceAddr, c.Ring.InstanceInterfaceNames, logger, c.Ring.EnableIPv6)
    71  	if err != nil {
    72  		return ring.BasicLifecyclerConfig{}, err
    73  	}
    74  
    75  	instancePort := ring.GetInstancePort(c.Ring.InstancePort, c.Ring.ListenPort)
    76  
    77  	return ring.BasicLifecyclerConfig{
    78  		ID:                              c.Ring.InstanceID,
    79  		Addr:                            net.JoinHostPort(instanceAddr, strconv.Itoa(instancePort)),
    80  		HeartbeatPeriod:                 c.Ring.HeartbeatPeriod,
    81  		HeartbeatTimeout:                c.Ring.HeartbeatTimeout,
    82  		TokensObservePeriod:             0,
    83  		NumTokens:                       ringNumTokens,
    84  		KeepInstanceInTheRingOnShutdown: true,
    85  	}, nil
    86  }
    87  
    88  func (cfg *RingConfig) ToRingConfig() ring.Config {
    89  	rc := ring.Config{}
    90  	rc.KVStore = cfg.Ring.KVStore
    91  	rc.HeartbeatTimeout = cfg.Ring.HeartbeatTimeout
    92  	rc.ReplicationFactor = 1
    93  	rc.SubringCacheDisabled = true
    94  
    95  	return rc
    96  }
    97  
    98  // Validate the Config.
    99  func (c *RingConfig) Validate() error {
   100  	if c.WaitStabilityMinDuration > 0 {
   101  		if c.WaitStabilityMinDuration > c.WaitStabilityMaxDuration {
   102  			return errors.New("-overrides-exporter.ring.wait-stability-max-duration must be greater or equal " +
   103  				"to -overrides-exporter.ring.wait-stability-min-duration")
   104  		}
   105  	}
   106  	return nil
   107  }
   108  
   109  // overridesExporterRing is a ring client that overrides-exporters can use to
   110  // establish a leader replica that is the unique exporter of per-tenant limit metrics.
   111  type overridesExporterRing struct {
   112  	services.Service
   113  
   114  	config RingConfig
   115  
   116  	client     *ring.Ring
   117  	lifecycler *ring.BasicLifecycler
   118  
   119  	subserviceManager *services.Manager
   120  	subserviceWatcher *services.FailureWatcher
   121  	logger            log.Logger
   122  }
   123  
   124  // newRing creates a new overridesExporterRing from the given configuration.
   125  func newRing(config RingConfig, logger log.Logger, reg prometheus.Registerer) (*overridesExporterRing, error) {
   126  	reg = prometheus.WrapRegistererWithPrefix("pyroscope_", reg)
   127  	kvStore, err := kv.NewClient(
   128  		config.Ring.KVStore,
   129  		ring.GetCodec(),
   130  		kv.RegistererWithKVName(reg, "overrides-exporter-lifecycler"),
   131  		logger,
   132  	)
   133  	if err != nil {
   134  		return nil, errors.Wrap(err, "failed to initialize overrides-exporter's KV store")
   135  	}
   136  
   137  	delegate := ring.BasicLifecyclerDelegate(ring.NewInstanceRegisterDelegate(ring.ACTIVE, ringNumTokens))
   138  	delegate = ring.NewLeaveOnStoppingDelegate(delegate, logger)
   139  	delegate = ring.NewAutoForgetDelegate(ringAutoForgetUnhealthyPeriods*config.Ring.HeartbeatTimeout, delegate, logger)
   140  
   141  	lifecyclerConfig, err := config.toBasicLifecyclerConfig(logger)
   142  	if err != nil {
   143  		return nil, err
   144  	}
   145  
   146  	const ringName = "overrides-exporter"
   147  	lifecycler, err := ring.NewBasicLifecycler(lifecyclerConfig, ringName, ringKey, kvStore, delegate, logger, reg)
   148  	if err != nil {
   149  		return nil, errors.Wrap(err, "failed to initialize overrides-exporter's lifecycler")
   150  	}
   151  
   152  	ringClient, err := ring.New(config.ToRingConfig(), ringName, ringKey, logger, reg)
   153  	if err != nil {
   154  		return nil, errors.Wrap(err, "failed to create a overrides-exporter ring client")
   155  	}
   156  
   157  	manager, err := services.NewManager(lifecycler, ringClient)
   158  	if err != nil {
   159  		return nil, errors.Wrap(err, "failed to create service manager")
   160  	}
   161  
   162  	r := &overridesExporterRing{
   163  		config:            config,
   164  		client:            ringClient,
   165  		lifecycler:        lifecycler,
   166  		subserviceManager: manager,
   167  		subserviceWatcher: services.NewFailureWatcher(),
   168  		logger:            logger,
   169  	}
   170  	r.Service = services.NewBasicService(r.starting, r.running, r.stopping)
   171  	return r, nil
   172  }
   173  
   174  // isLeader checks whether this instance is the leader replica that exports metrics for all tenants.
   175  func (r *overridesExporterRing) isLeader() (bool, error) {
   176  	// Get the leader from the ring and check whether it's this replica.
   177  	rl, err := ringLeader(r.client)
   178  	if err != nil {
   179  		return false, err
   180  	}
   181  
   182  	return rl.Addr == r.lifecycler.GetInstanceAddr(), nil
   183  }
   184  
   185  // ringLeader returns the ring member that owns the special token.
   186  func ringLeader(r ring.ReadRing) (*ring.InstanceDesc, error) {
   187  	rs, err := r.Get(leaderToken, ringOp, nil, nil, nil)
   188  	if err != nil {
   189  		return nil, errors.Wrapf(err, "failed to get a healthy instance for token %d", leaderToken)
   190  	}
   191  	if len(rs.Instances) != 1 {
   192  		return nil, fmt.Errorf("got %d instances for token %d (but expected 1)", len(rs.Instances), leaderToken)
   193  	}
   194  
   195  	return &rs.Instances[0], nil
   196  }
   197  
   198  func (r *overridesExporterRing) starting(ctx context.Context) error {
   199  	r.subserviceWatcher.WatchManager(r.subserviceManager)
   200  	if err := services.StartManagerAndAwaitHealthy(ctx, r.subserviceManager); err != nil {
   201  		return errors.Wrap(err, "unable to start overrides-exporter ring subservice manager")
   202  	}
   203  
   204  	level.Info(r.logger).Log("msg", "waiting until overrides-exporter is ACTIVE in the ring")
   205  	if err := ring.WaitInstanceState(ctx, r.client, r.lifecycler.GetInstanceID(), ring.ACTIVE); err != nil {
   206  		return errors.Wrap(err, "overrides-exporter failed to become ACTIVE in the ring")
   207  	}
   208  	level.Info(r.logger).Log("msg", "overrides-exporter is ACTIVE in the ring")
   209  
   210  	// In the event of a cluster cold start or scale up of 2+ overrides-exporter
   211  	// instances at the same time, the leader token may hop from one instance to
   212  	// another, creating high series churn for the limit metrics. Waiting for a
   213  	// stable ring helps to counteract that.
   214  	if r.config.WaitStabilityMinDuration > 0 {
   215  		minWaiting := r.config.WaitStabilityMinDuration
   216  		maxWaiting := r.config.WaitStabilityMaxDuration
   217  
   218  		level.Info(r.logger).Log("msg", "waiting until overrides-exporter ring topology is stable", "min_waiting", minWaiting.String(), "max_waiting", maxWaiting.String())
   219  		if err := ring.WaitRingTokensStability(ctx, r.client, ringOp, minWaiting, maxWaiting); err != nil {
   220  			level.Warn(r.logger).Log("msg", "overrides-exporter ring topology is not stable after the max waiting time, proceeding anyway")
   221  		} else {
   222  			level.Info(r.logger).Log("msg", "overrides-exporter ring topology is stable")
   223  		}
   224  	}
   225  	return nil
   226  }
   227  
   228  func (r *overridesExporterRing) running(ctx context.Context) error {
   229  	select {
   230  	case <-ctx.Done():
   231  		return nil
   232  	case err := <-r.subserviceWatcher.Chan():
   233  		return errors.Wrap(err, "a subservice of overrides-exporter ring has failed")
   234  	}
   235  }
   236  
   237  func (r *overridesExporterRing) stopping(_ error) error {
   238  	return errors.Wrap(
   239  		services.StopManagerAndAwaitStopped(context.Background(), r.subserviceManager),
   240  		"failed to stop overrides-exporter's ring subservice manager",
   241  	)
   242  }