github.com/grafana/pyroscope@v1.18.0/pkg/validation/exporter/ring_test.go (about)

     1  // SPDX-License-Identifier: AGPL-3.0-only
     2  
     3  package exporter
     4  
     5  import (
     6  	"context"
     7  	"testing"
     8  	"time"
     9  
    10  	"github.com/go-kit/log"
    11  	"github.com/grafana/dskit/kv/consul"
    12  	"github.com/grafana/dskit/ring"
    13  	"github.com/grafana/dskit/services"
    14  	"github.com/grafana/dskit/test"
    15  	"github.com/stretchr/testify/assert"
    16  	"github.com/stretchr/testify/require"
    17  )
    18  
    19  func TestOverridesExporter_emptyRing(t *testing.T) {
    20  	ringStore, closer := consul.NewInMemoryClient(ring.GetCodec(), log.NewNopLogger(), nil)
    21  	t.Cleanup(func() { assert.NoError(t, closer.Close()) })
    22  
    23  	// Create an empty ring.
    24  	ctx := context.Background()
    25  	require.NoError(t, ringStore.CAS(ctx, ringKey, func(in interface{}) (out interface{}, retry bool, err error) {
    26  		return ring.NewDesc(), true, nil
    27  	}))
    28  
    29  	cfg := RingConfig{}
    30  	cfg.Ring.KVStore.Mock = ringStore
    31  
    32  	cfg.Ring.InstanceID = "instance-1"
    33  	cfg.Ring.InstanceAddr = "127.0.0.1"
    34  	i1, err := newRing(cfg, log.NewNopLogger(), nil)
    35  	require.NoError(t, err)
    36  	require.NoError(t, services.StartAndAwaitRunning(ctx, i1.client))
    37  	t.Cleanup(func() { require.NoError(t, services.StopAndAwaitTerminated(ctx, i1.client)) })
    38  
    39  	_, err = i1.isLeader()
    40  	require.ErrorIs(t, err, ring.ErrEmptyRing)
    41  }
    42  
    43  // TestOverridesExporterRing_scaleDownAndUp tests that a maximum of one leader
    44  // replica exists at any point in time while the number of replicas is scaled.
    45  func TestOverridesExporterRing_scaleDown(t *testing.T) {
    46  	ringStore, closer := consul.NewInMemoryClient(ring.GetCodec(), log.NewNopLogger(), nil)
    47  	t.Cleanup(func() { assert.NoError(t, closer.Close()) })
    48  
    49  	cfg1 := RingConfig{}
    50  	cfg1.Ring.KVStore.Mock = ringStore
    51  	cfg1.Ring.HeartbeatPeriod = 1 * time.Second
    52  	cfg1.Ring.HeartbeatTimeout = 15 * time.Second
    53  
    54  	cfg1.Ring.InstanceID = "instance-1"
    55  	cfg1.Ring.InstanceAddr = "127.0.0.1"
    56  	i1, err := newRing(cfg1, log.NewNopLogger(), nil)
    57  	require.NoError(t, err)
    58  	l1 := i1.lifecycler
    59  
    60  	cfg2 := cfg1
    61  	cfg2.Ring.InstanceID = "instance-2"
    62  	cfg2.Ring.InstanceAddr = "127.0.0.2"
    63  	i2, err := newRing(cfg2, log.NewNopLogger(), nil)
    64  	require.NoError(t, err)
    65  	l2 := i2.lifecycler
    66  
    67  	// Register instances in the ring (manually, to be able to assign tokens).
    68  	ctx := context.Background()
    69  	require.NoError(t, ringStore.CAS(ctx, ringKey, func(in interface{}) (out interface{}, retry bool, err error) {
    70  		desc := ring.NewDesc()
    71  		desc.AddIngester(l1.GetInstanceID(), l1.GetInstanceAddr(), "", []uint32{leaderToken + 1}, ring.ACTIVE, time.Now(), false, time.Now())
    72  		desc.AddIngester(l2.GetInstanceID(), l2.GetInstanceAddr(), "", []uint32{leaderToken + 2}, ring.ACTIVE, time.Now(), false, time.Now())
    73  		return desc, true, nil
    74  	}))
    75  
    76  	require.NoError(t, services.StartAndAwaitRunning(ctx, i1))
    77  	require.NoError(t, services.StartAndAwaitRunning(ctx, i2))
    78  	t.Cleanup(func() { require.NoError(t, services.StopAndAwaitTerminated(ctx, i2)) })
    79  
    80  	// Wait until the clients have received the ring update.
    81  	test.Poll(t, time.Second, []int{2, 2}, func() interface{} {
    82  		rs1, _ := i1.client.GetAllHealthy(ringOp)
    83  		rs2, _ := i2.client.GetAllHealthy(ringOp)
    84  		return []int{len(rs1.Instances), len(rs2.Instances)}
    85  	})
    86  
    87  	// instance-1 should be the leader
    88  	i1IsLeader, err := i1.isLeader()
    89  	require.NoError(t, err)
    90  	i2IsLeader, err := i2.isLeader()
    91  	require.NoError(t, err)
    92  
    93  	require.True(t, i1IsLeader)
    94  	require.False(t, i2IsLeader)
    95  
    96  	// --- Scale down ---
    97  
    98  	// Stop instance-1.
    99  	require.NoError(t, services.StopAndAwaitTerminated(ctx, i1))
   100  
   101  	// Wait for the leader to have advertised its leaving state to the ring
   102  	test.Poll(t, 5*time.Second, ring.LEAVING, func() interface{} {
   103  		rs, _ := i2.client.GetAllHealthy(ringOp)
   104  		for _, instance := range rs.Instances {
   105  			if instance.Addr == l1.GetInstanceAddr() {
   106  				return instance.GetState()
   107  			}
   108  		}
   109  		return nil
   110  	})
   111  
   112  	i2IsLeader, err = i2.isLeader()
   113  	require.NoError(t, err)
   114  	// Since the previous leader is still in the ring but in state ring.LEAVING,
   115  	// no other instance should be the leader now.
   116  	require.False(t, i2IsLeader)
   117  
   118  	// After a certain period of time (ringAutoForgetUnhealthyPeriods *
   119  	// cfg.HeartbeatTimeout) the instance's heartbeat will expire. If the instance
   120  	// becomes healthy again during this period (e.g. during rollout), it will rejoin
   121  	// the ring and resume its function as the leader. Otherwise, it will be
   122  	// auto-forgotten from the ring and a different replica will become the leader.
   123  
   124  	// Expire the heartbeat so the previous leader can be auto-forgotten.
   125  	require.NoError(t, ringStore.CAS(ctx, ringKey, func(in interface{}) (out interface{}, retry bool, err error) {
   126  		desc := in.(*ring.Desc)
   127  		instance := desc.Ingesters[l1.GetInstanceID()]
   128  		instance.Timestamp = time.Now().Add(-ringAutoForgetUnhealthyPeriods * cfg1.Ring.HeartbeatTimeout).Unix()
   129  		desc.Ingesters[l1.GetInstanceID()] = instance
   130  		return desc, true, nil
   131  	}))
   132  
   133  	// Once the previous leader has been removed from the ring, instance-2 should
   134  	// become the new leader.
   135  	test.Poll(t, 5*time.Second, true, func() interface{} {
   136  		isLeader, _ := i2.isLeader()
   137  		return isLeader
   138  	})
   139  }