github.com/muhammadn/cortex@v1.9.1-0.20220510110439-46bb7000d03d/pkg/ingester/lifecycle_test.go (about)

     1  package ingester
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"io"
     7  	"math"
     8  	"net/http"
     9  	"net/http/httptest"
    10  	"testing"
    11  	"time"
    12  
    13  	"github.com/go-kit/log"
    14  	"github.com/grafana/dskit/flagext"
    15  	"github.com/grafana/dskit/kv"
    16  	"github.com/grafana/dskit/kv/consul"
    17  	"github.com/grafana/dskit/ring"
    18  	"github.com/grafana/dskit/services"
    19  	"github.com/prometheus/common/model"
    20  	"github.com/prometheus/prometheus/pkg/labels"
    21  	"github.com/stretchr/testify/assert"
    22  	"github.com/stretchr/testify/require"
    23  	"github.com/weaveworks/common/user"
    24  	"google.golang.org/grpc"
    25  	"google.golang.org/grpc/health/grpc_health_v1"
    26  
    27  	"github.com/cortexproject/cortex/pkg/chunk"
    28  	"github.com/cortexproject/cortex/pkg/cortexpb"
    29  	"github.com/cortexproject/cortex/pkg/ingester/client"
    30  	"github.com/cortexproject/cortex/pkg/util/test"
    31  	"github.com/cortexproject/cortex/pkg/util/validation"
    32  )
    33  
    34  const userID = "1"
    35  
    36  func defaultIngesterTestConfig(t testing.TB) Config {
    37  	t.Helper()
    38  
    39  	consul, closer := consul.NewInMemoryClient(ring.GetCodec(), log.NewNopLogger(), nil)
    40  	t.Cleanup(func() { assert.NoError(t, closer.Close()) })
    41  
    42  	cfg := Config{}
    43  	flagext.DefaultValues(&cfg)
    44  	flagext.DefaultValues(&cfg.BlocksStorageConfig)
    45  	cfg.FlushCheckPeriod = 99999 * time.Hour
    46  	cfg.MaxChunkIdle = 99999 * time.Hour
    47  	cfg.ConcurrentFlushes = 1
    48  	cfg.LifecyclerConfig.RingConfig.KVStore.Mock = consul
    49  	cfg.LifecyclerConfig.NumTokens = 1
    50  	cfg.LifecyclerConfig.ListenPort = 0
    51  	cfg.LifecyclerConfig.Addr = "localhost"
    52  	cfg.LifecyclerConfig.ID = "localhost"
    53  	cfg.LifecyclerConfig.FinalSleep = 0
    54  	cfg.MaxTransferRetries = 0
    55  	cfg.ActiveSeriesMetricsEnabled = true
    56  	return cfg
    57  }
    58  
    59  func defaultClientTestConfig() client.Config {
    60  	clientConfig := client.Config{}
    61  	flagext.DefaultValues(&clientConfig)
    62  	return clientConfig
    63  }
    64  
    65  func defaultLimitsTestConfig() validation.Limits {
    66  	limits := validation.Limits{}
    67  	flagext.DefaultValues(&limits)
    68  	return limits
    69  }
    70  
    71  // TestIngesterRestart tests a restarting ingester doesn't keep adding more tokens.
    72  func TestIngesterRestart(t *testing.T) {
    73  	config := defaultIngesterTestConfig(t)
    74  	clientConfig := defaultClientTestConfig()
    75  	limits := defaultLimitsTestConfig()
    76  	config.LifecyclerConfig.UnregisterOnShutdown = false
    77  
    78  	{
    79  		_, ingester := newTestStore(t, config, clientConfig, limits, nil)
    80  		time.Sleep(100 * time.Millisecond)
    81  		// Doesn't actually unregister due to UnregisterFromRing: false.
    82  		require.NoError(t, services.StopAndAwaitTerminated(context.Background(), ingester))
    83  	}
    84  
    85  	test.Poll(t, 100*time.Millisecond, 1, func() interface{} {
    86  		return numTokens(config.LifecyclerConfig.RingConfig.KVStore.Mock, "localhost", ring.IngesterRingKey)
    87  	})
    88  
    89  	{
    90  		_, ingester := newTestStore(t, config, clientConfig, limits, nil)
    91  		time.Sleep(100 * time.Millisecond)
    92  		// Doesn't actually unregister due to UnregisterFromRing: false.
    93  		require.NoError(t, services.StopAndAwaitTerminated(context.Background(), ingester))
    94  	}
    95  
    96  	time.Sleep(200 * time.Millisecond)
    97  
    98  	test.Poll(t, 100*time.Millisecond, 1, func() interface{} {
    99  		return numTokens(config.LifecyclerConfig.RingConfig.KVStore.Mock, "localhost", ring.IngesterRingKey)
   100  	})
   101  }
   102  
   103  func TestIngester_ShutdownHandler(t *testing.T) {
   104  	for _, unregister := range []bool{false, true} {
   105  		t.Run(fmt.Sprintf("unregister=%t", unregister), func(t *testing.T) {
   106  			config := defaultIngesterTestConfig(t)
   107  			clientConfig := defaultClientTestConfig()
   108  			limits := defaultLimitsTestConfig()
   109  			config.LifecyclerConfig.UnregisterOnShutdown = unregister
   110  			_, ingester := newTestStore(t, config, clientConfig, limits, nil)
   111  
   112  			// Make sure the ingester has been added to the ring.
   113  			test.Poll(t, 100*time.Millisecond, 1, func() interface{} {
   114  				return numTokens(config.LifecyclerConfig.RingConfig.KVStore.Mock, "localhost", ring.IngesterRingKey)
   115  			})
   116  
   117  			recorder := httptest.NewRecorder()
   118  			ingester.ShutdownHandler(recorder, nil)
   119  			require.Equal(t, http.StatusNoContent, recorder.Result().StatusCode)
   120  
   121  			// Make sure the ingester has been removed from the ring even when UnregisterFromRing is false.
   122  			test.Poll(t, 100*time.Millisecond, 0, func() interface{} {
   123  				return numTokens(config.LifecyclerConfig.RingConfig.KVStore.Mock, "localhost", ring.IngesterRingKey)
   124  			})
   125  		})
   126  	}
   127  }
   128  
   129  func TestIngesterChunksTransfer(t *testing.T) {
   130  	limits, err := validation.NewOverrides(defaultLimitsTestConfig(), nil)
   131  	require.NoError(t, err)
   132  
   133  	// Start the first ingester, and get it into ACTIVE state.
   134  	cfg1 := defaultIngesterTestConfig(t)
   135  	cfg1.LifecyclerConfig.ID = "ingester1"
   136  	cfg1.LifecyclerConfig.Addr = "ingester1"
   137  	cfg1.LifecyclerConfig.JoinAfter = 0 * time.Second
   138  	cfg1.MaxTransferRetries = 10
   139  	ing1, err := New(cfg1, defaultClientTestConfig(), limits, nil, nil, log.NewNopLogger())
   140  	require.NoError(t, err)
   141  	require.NoError(t, services.StartAndAwaitRunning(context.Background(), ing1))
   142  
   143  	test.Poll(t, 100*time.Millisecond, ring.ACTIVE, func() interface{} {
   144  		return ing1.lifecycler.GetState()
   145  	})
   146  
   147  	// Now write a sample to this ingester
   148  	req, expectedResponse, _, _ := mockWriteRequest(t, labels.Labels{{Name: labels.MetricName, Value: "foo"}}, 456, 123000)
   149  	ctx := user.InjectOrgID(context.Background(), userID)
   150  	_, err = ing1.Push(ctx, req)
   151  	require.NoError(t, err)
   152  
   153  	// Start a second ingester, but let it go into PENDING
   154  	cfg2 := defaultIngesterTestConfig(t)
   155  	cfg2.LifecyclerConfig.RingConfig.KVStore.Mock = cfg1.LifecyclerConfig.RingConfig.KVStore.Mock
   156  	cfg2.LifecyclerConfig.ID = "ingester2"
   157  	cfg2.LifecyclerConfig.Addr = "ingester2"
   158  	cfg2.LifecyclerConfig.JoinAfter = 100 * time.Second
   159  	ing2, err := New(cfg2, defaultClientTestConfig(), limits, nil, nil, log.NewNopLogger())
   160  	require.NoError(t, err)
   161  	require.NoError(t, services.StartAndAwaitRunning(context.Background(), ing2))
   162  
   163  	// Let ing2 send chunks to ing1
   164  	ing1.cfg.ingesterClientFactory = func(addr string, _ client.Config) (client.HealthAndIngesterClient, error) {
   165  		return ingesterClientAdapater{
   166  			ingester: ing2,
   167  		}, nil
   168  	}
   169  
   170  	// Now stop the first ingester, and wait for the second ingester to become ACTIVE.
   171  	require.NoError(t, services.StopAndAwaitTerminated(context.Background(), ing1))
   172  
   173  	test.Poll(t, 10*time.Second, ring.ACTIVE, func() interface{} {
   174  		return ing2.lifecycler.GetState()
   175  	})
   176  
   177  	// And check the second ingester has the sample
   178  	matcher, err := labels.NewMatcher(labels.MatchEqual, model.MetricNameLabel, "foo")
   179  	require.NoError(t, err)
   180  
   181  	request, err := client.ToQueryRequest(model.TimeFromUnix(0), model.TimeFromUnix(200), []*labels.Matcher{matcher})
   182  	require.NoError(t, err)
   183  
   184  	response, err := ing2.Query(ctx, request)
   185  	require.NoError(t, err)
   186  	assert.Equal(t, expectedResponse, response)
   187  
   188  	// Check we can send the same sample again to the new ingester and get the same result
   189  	req, _, _, _ = mockWriteRequest(t, labels.Labels{{Name: labels.MetricName, Value: "foo"}}, 456, 123000)
   190  	_, err = ing2.Push(ctx, req)
   191  	require.NoError(t, err)
   192  	response, err = ing2.Query(ctx, request)
   193  	require.NoError(t, err)
   194  	assert.Equal(t, expectedResponse, response)
   195  }
   196  
   197  func TestIngesterBadTransfer(t *testing.T) {
   198  	limits, err := validation.NewOverrides(defaultLimitsTestConfig(), nil)
   199  	require.NoError(t, err)
   200  
   201  	// Start ingester in PENDING.
   202  	cfg := defaultIngesterTestConfig(t)
   203  	cfg.LifecyclerConfig.ID = "ingester1"
   204  	cfg.LifecyclerConfig.Addr = "ingester1"
   205  	cfg.LifecyclerConfig.JoinAfter = 100 * time.Second
   206  	ing, err := New(cfg, defaultClientTestConfig(), limits, nil, nil, log.NewNopLogger())
   207  	require.NoError(t, err)
   208  	require.NoError(t, services.StartAndAwaitRunning(context.Background(), ing))
   209  
   210  	test.Poll(t, 100*time.Millisecond, ring.PENDING, func() interface{} {
   211  		return ing.lifecycler.GetState()
   212  	})
   213  
   214  	// Now transfer 0 series to this ingester, ensure it errors.
   215  	client := ingesterClientAdapater{ingester: ing}
   216  	stream, err := client.TransferChunks(context.Background())
   217  	require.NoError(t, err)
   218  	_, err = stream.CloseAndRecv()
   219  	require.Error(t, err)
   220  
   221  	// Check the ingester is still waiting.
   222  	require.Equal(t, ring.PENDING, ing.lifecycler.GetState())
   223  }
   224  
   225  type ingesterTransferChunkStreamMock struct {
   226  	ctx  context.Context
   227  	reqs chan *client.TimeSeriesChunk
   228  	resp chan *client.TransferChunksResponse
   229  	err  chan error
   230  
   231  	grpc.ServerStream
   232  	grpc.ClientStream
   233  }
   234  
   235  func (s *ingesterTransferChunkStreamMock) Send(tsc *client.TimeSeriesChunk) error {
   236  	s.reqs <- tsc
   237  	return nil
   238  }
   239  
   240  func (s *ingesterTransferChunkStreamMock) CloseAndRecv() (*client.TransferChunksResponse, error) {
   241  	close(s.reqs)
   242  	select {
   243  	case resp := <-s.resp:
   244  		return resp, nil
   245  	case err := <-s.err:
   246  		return nil, err
   247  	}
   248  }
   249  
   250  func (s *ingesterTransferChunkStreamMock) SendAndClose(resp *client.TransferChunksResponse) error {
   251  	s.resp <- resp
   252  	return nil
   253  }
   254  
   255  func (s *ingesterTransferChunkStreamMock) ErrorAndClose(err error) {
   256  	s.err <- err
   257  }
   258  
   259  func (s *ingesterTransferChunkStreamMock) Recv() (*client.TimeSeriesChunk, error) {
   260  	req, ok := <-s.reqs
   261  	if !ok {
   262  		return nil, io.EOF
   263  	}
   264  	return req, nil
   265  }
   266  
   267  func (s *ingesterTransferChunkStreamMock) Context() context.Context {
   268  	return s.ctx
   269  }
   270  
   271  func (*ingesterTransferChunkStreamMock) SendMsg(m interface{}) error {
   272  	return nil
   273  }
   274  
   275  func (*ingesterTransferChunkStreamMock) RecvMsg(m interface{}) error {
   276  	return nil
   277  }
   278  
   279  type ingesterClientAdapater struct {
   280  	client.IngesterClient
   281  	grpc_health_v1.HealthClient
   282  	ingester client.IngesterServer
   283  }
   284  
   285  func (i ingesterClientAdapater) TransferChunks(ctx context.Context, _ ...grpc.CallOption) (client.Ingester_TransferChunksClient, error) {
   286  	stream := &ingesterTransferChunkStreamMock{
   287  		ctx:  ctx,
   288  		reqs: make(chan *client.TimeSeriesChunk),
   289  		resp: make(chan *client.TransferChunksResponse),
   290  		err:  make(chan error),
   291  	}
   292  	go func() {
   293  		err := i.ingester.TransferChunks(stream)
   294  		if err != nil {
   295  			stream.ErrorAndClose(err)
   296  		}
   297  	}()
   298  	return stream, nil
   299  }
   300  
   301  func (i ingesterClientAdapater) Close() error {
   302  	return nil
   303  }
   304  
   305  func (i ingesterClientAdapater) Check(ctx context.Context, in *grpc_health_v1.HealthCheckRequest, opts ...grpc.CallOption) (*grpc_health_v1.HealthCheckResponse, error) {
   306  	return nil, nil
   307  }
   308  
   309  // TestIngesterFlush tries to test that the ingester flushes chunks before
   310  // removing itself from the ring.
   311  func TestIngesterFlush(t *testing.T) {
   312  	// Start the ingester, and get it into ACTIVE state.
   313  	store, ing := newDefaultTestStore(t)
   314  
   315  	test.Poll(t, 100*time.Millisecond, ring.ACTIVE, func() interface{} {
   316  		return ing.lifecycler.GetState()
   317  	})
   318  
   319  	// Now write a sample to this ingester
   320  	var (
   321  		lbls       = []labels.Labels{{{Name: labels.MetricName, Value: "foo"}}}
   322  		sampleData = []cortexpb.Sample{
   323  			{
   324  				TimestampMs: 123000,
   325  				Value:       456,
   326  			},
   327  		}
   328  	)
   329  	ctx := user.InjectOrgID(context.Background(), userID)
   330  	_, err := ing.Push(ctx, cortexpb.ToWriteRequest(lbls, sampleData, nil, cortexpb.API))
   331  	require.NoError(t, err)
   332  
   333  	// We add a 100ms sleep into the flush loop, such that we can reliably detect
   334  	// if the ingester is removing its token from Consul before flushing chunks.
   335  	ing.preFlushUserSeries = func() {
   336  		time.Sleep(100 * time.Millisecond)
   337  	}
   338  
   339  	// Now stop the ingester.  Don't call shutdown, as it waits for all goroutines
   340  	// to exit.  We just want to check that by the time the token is removed from
   341  	// the ring, the data is in the chunk store.
   342  	require.NoError(t, services.StopAndAwaitTerminated(context.Background(), ing.lifecycler))
   343  	test.Poll(t, 200*time.Millisecond, 0, func() interface{} {
   344  		r, err := ing.lifecycler.KVStore.Get(context.Background(), ring.IngesterRingKey)
   345  		if err != nil {
   346  			return -1
   347  		}
   348  		return len(r.(*ring.Desc).Ingesters)
   349  	})
   350  
   351  	// And check the store has the chunk
   352  	res, err := chunk.ChunksToMatrix(context.Background(), store.chunks[userID], model.Time(0), model.Time(math.MaxInt64))
   353  	require.NoError(t, err)
   354  	assert.Equal(t, model.Matrix{
   355  		&model.SampleStream{
   356  			Metric: model.Metric{
   357  				model.MetricNameLabel: "foo",
   358  			},
   359  			Values: []model.SamplePair{
   360  				{Timestamp: model.TimeFromUnix(123), Value: model.SampleValue(456)},
   361  			},
   362  		},
   363  	}, res)
   364  }
   365  
   366  // numTokens determines the number of tokens owned by the specified
   367  // address
   368  func numTokens(c kv.Client, name, ringKey string) int {
   369  	ringDesc, err := c.Get(context.Background(), ringKey)
   370  
   371  	// The ringDesc may be null if the lifecycler hasn't stored the ring
   372  	// to the KVStore yet.
   373  	if ringDesc == nil || err != nil {
   374  		return 0
   375  	}
   376  	rd := ringDesc.(*ring.Desc)
   377  	return len(rd.Ingesters[name].Tokens)
   378  }