github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/kv/kvserver/replica_rangefeed_test.go (about)

     1  // Copyright 2014 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package kvserver_test
    12  
    13  import (
    14  	"context"
    15  	"reflect"
    16  	"testing"
    17  	"time"
    18  
    19  	"github.com/cockroachdb/cockroach/pkg/keys"
    20  	"github.com/cockroachdb/cockroach/pkg/kv"
    21  	"github.com/cockroachdb/cockroach/pkg/kv/kvclient/kvcoord"
    22  	"github.com/cockroachdb/cockroach/pkg/kv/kvserver"
    23  	"github.com/cockroachdb/cockroach/pkg/roachpb"
    24  	"github.com/cockroachdb/cockroach/pkg/sql"
    25  	"github.com/cockroachdb/cockroach/pkg/testutils"
    26  	"github.com/cockroachdb/cockroach/pkg/testutils/sqlutils"
    27  	"github.com/cockroachdb/cockroach/pkg/util/encoding"
    28  	"github.com/cockroachdb/cockroach/pkg/util/hlc"
    29  	"github.com/cockroachdb/cockroach/pkg/util/leaktest"
    30  	"github.com/cockroachdb/cockroach/pkg/util/syncutil"
    31  	"github.com/cockroachdb/errors"
    32  	"github.com/stretchr/testify/require"
    33  	"go.etcd.io/etcd/raft"
    34  	"go.etcd.io/etcd/raft/raftpb"
    35  	"google.golang.org/grpc/metadata"
    36  )
    37  
    38  // testStream is a mock implementation of roachpb.Internal_RangeFeedServer.
    39  type testStream struct {
    40  	ctx    context.Context
    41  	cancel func()
    42  	mu     struct {
    43  		syncutil.Mutex
    44  		events []*roachpb.RangeFeedEvent
    45  	}
    46  }
    47  
    48  func newTestStream() *testStream {
    49  	ctx, cancel := context.WithCancel(context.Background())
    50  	return &testStream{ctx: ctx, cancel: cancel}
    51  }
    52  
    53  func (s *testStream) SendMsg(m interface{}) error  { panic("unimplemented") }
    54  func (s *testStream) RecvMsg(m interface{}) error  { panic("unimplemented") }
    55  func (s *testStream) SetHeader(metadata.MD) error  { panic("unimplemented") }
    56  func (s *testStream) SendHeader(metadata.MD) error { panic("unimplemented") }
    57  func (s *testStream) SetTrailer(metadata.MD)       { panic("unimplemented") }
    58  
    59  func (s *testStream) Context() context.Context {
    60  	return s.ctx
    61  }
    62  
    63  func (s *testStream) Cancel() {
    64  	s.cancel()
    65  }
    66  
    67  func (s *testStream) Send(e *roachpb.RangeFeedEvent) error {
    68  	s.mu.Lock()
    69  	defer s.mu.Unlock()
    70  	s.mu.events = append(s.mu.events, e)
    71  	return nil
    72  }
    73  
    74  func (s *testStream) Events() []*roachpb.RangeFeedEvent {
    75  	s.mu.Lock()
    76  	defer s.mu.Unlock()
    77  	return s.mu.events
    78  }
    79  
    80  func TestReplicaRangefeed(t *testing.T) {
    81  	defer leaktest.AfterTest(t)()
    82  
    83  	ctx := context.Background()
    84  	sc := kvserver.TestStoreConfig(nil)
    85  	sc.Clock = nil // manual clock
    86  	kvserver.RangefeedEnabled.Override(&sc.Settings.SV, true)
    87  	mtc := &multiTestContext{
    88  		storeConfig: &sc,
    89  		// This test was written before the multiTestContext started creating many
    90  		// system ranges at startup, and hasn't been update to take that into
    91  		// account.
    92  		startWithSingleRange: true,
    93  	}
    94  	defer mtc.Stop()
    95  	mtc.Start(t, 3)
    96  	mtc.replicateRange(1, 1, 2)
    97  	db := mtc.dbs[0].NonTransactionalSender()
    98  
    99  	// Split the range so that the RHS uses epoch-based leases.
   100  	startKey := []byte("a")
   101  	splitArgs := adminSplitArgs(startKey)
   102  	if _, pErr := kv.SendWrapped(ctx, db, splitArgs); pErr != nil {
   103  		t.Fatalf("split saw unexpected error: %v", pErr)
   104  	}
   105  	rangeID := mtc.Store(0).LookupReplica(startKey).RangeID
   106  
   107  	// Insert a key before starting the rangefeeds.
   108  	initTime := mtc.clock().Now()
   109  	mtc.manualClock.Increment(1)
   110  	ts1 := mtc.clock().Now()
   111  	incArgs := incrementArgs(roachpb.Key("b"), 9)
   112  	_, pErr := kv.SendWrappedWith(ctx, db, roachpb.Header{Timestamp: ts1}, incArgs)
   113  	if pErr != nil {
   114  		t.Fatal(pErr)
   115  	}
   116  	mtc.waitForValues(roachpb.Key("b"), []int64{9, 9, 9})
   117  
   118  	replNum := 3
   119  	streams := make([]*testStream, replNum)
   120  	streamErrC := make(chan *roachpb.Error, replNum)
   121  	rangefeedSpan := roachpb.Span{Key: roachpb.Key("a"), EndKey: roachpb.Key("z")}
   122  	for i := 0; i < replNum; i++ {
   123  		stream := newTestStream()
   124  		streams[i] = stream
   125  		go func(i int) {
   126  			req := roachpb.RangeFeedRequest{
   127  				Header: roachpb.Header{
   128  					Timestamp: initTime,
   129  					RangeID:   rangeID,
   130  				},
   131  				Span:     rangefeedSpan,
   132  				WithDiff: true,
   133  			}
   134  
   135  			pErr := mtc.Store(i).RangeFeed(&req, stream)
   136  			streamErrC <- pErr
   137  		}(i)
   138  	}
   139  
   140  	checkForExpEvents := func(expEvents []*roachpb.RangeFeedEvent) {
   141  		t.Helper()
   142  		for i, stream := range streams {
   143  			var events []*roachpb.RangeFeedEvent
   144  			testutils.SucceedsSoon(t, func() error {
   145  				if len(streamErrC) > 0 {
   146  					// Break if the error channel is already populated.
   147  					return nil
   148  				}
   149  
   150  				events = stream.Events()
   151  				if len(events) < len(expEvents) {
   152  					return errors.Errorf("too few events: %v", events)
   153  				}
   154  				return nil
   155  			})
   156  
   157  			if len(streamErrC) > 0 {
   158  				t.Fatalf("unexpected error from stream: %v", <-streamErrC)
   159  			}
   160  			if !reflect.DeepEqual(events, expEvents) {
   161  				t.Fatalf("incorrect events on stream %d, found %v, want %v", i, events, expEvents)
   162  			}
   163  		}
   164  	}
   165  
   166  	// Wait for all streams to observe the catch-up related events.
   167  	expVal1 := roachpb.Value{Timestamp: ts1}
   168  	expVal1.SetInt(9)
   169  	expVal1.InitChecksum(roachpb.Key("b"))
   170  	expEvents := []*roachpb.RangeFeedEvent{
   171  		{Val: &roachpb.RangeFeedValue{
   172  			Key: roachpb.Key("b"), Value: expVal1,
   173  		}},
   174  		{Checkpoint: &roachpb.RangeFeedCheckpoint{
   175  			Span:       rangefeedSpan,
   176  			ResolvedTS: hlc.Timestamp{},
   177  		}},
   178  	}
   179  	checkForExpEvents(expEvents)
   180  
   181  	// Insert a key non-transactionally.
   182  	mtc.manualClock.Increment(1)
   183  	ts2 := mtc.clock().Now()
   184  	pArgs := putArgs(roachpb.Key("c"), []byte("val2"))
   185  	_, pErr = kv.SendWrappedWith(ctx, db, roachpb.Header{Timestamp: ts2}, pArgs)
   186  	if pErr != nil {
   187  		t.Fatal(pErr)
   188  	}
   189  
   190  	// Insert a second key transactionally.
   191  	mtc.manualClock.Increment(1)
   192  	ts3 := mtc.clock().Now()
   193  	if err := mtc.dbs[1].Txn(ctx, func(ctx context.Context, txn *kv.Txn) error {
   194  		txn.SetFixedTimestamp(ctx, ts3)
   195  		return txn.Put(ctx, roachpb.Key("m"), []byte("val3"))
   196  	}); err != nil {
   197  		t.Fatal(err)
   198  	}
   199  	// Read to force intent resolution.
   200  	if _, err := mtc.dbs[1].Get(ctx, roachpb.Key("m")); err != nil {
   201  		t.Fatal(err)
   202  	}
   203  
   204  	// Update the originally incremented key non-transactionally.
   205  	mtc.manualClock.Increment(1)
   206  	ts4 := mtc.clock().Now()
   207  	_, pErr = kv.SendWrappedWith(ctx, db, roachpb.Header{Timestamp: ts4}, incArgs)
   208  	if pErr != nil {
   209  		t.Fatal(pErr)
   210  	}
   211  
   212  	// Update the originally incremented key transactionally.
   213  	mtc.manualClock.Increment(1)
   214  	ts5 := mtc.clock().Now()
   215  	if err := mtc.dbs[1].Txn(ctx, func(ctx context.Context, txn *kv.Txn) error {
   216  		txn.SetFixedTimestamp(ctx, ts5)
   217  		_, err := txn.Inc(ctx, incArgs.Key, 7)
   218  		return err
   219  	}); err != nil {
   220  		t.Fatal(err)
   221  	}
   222  	// Read to force intent resolution.
   223  	if _, err := mtc.dbs[1].Get(ctx, roachpb.Key("b")); err != nil {
   224  		t.Fatal(err)
   225  	}
   226  
   227  	// Wait for all streams to observe the expected events.
   228  	expVal2 := roachpb.MakeValueFromBytesAndTimestamp([]byte("val2"), ts2)
   229  	expVal3 := roachpb.MakeValueFromBytesAndTimestamp([]byte("val3"), ts3)
   230  	expVal3.InitChecksum([]byte("m")) // client.Txn sets value checksum
   231  	expVal4 := roachpb.Value{Timestamp: ts4}
   232  	expVal4.SetInt(18)
   233  	expVal4.InitChecksum(roachpb.Key("b"))
   234  	expVal5 := roachpb.Value{Timestamp: ts5}
   235  	expVal5.SetInt(25)
   236  	expVal5.InitChecksum(roachpb.Key("b"))
   237  	expVal1NoTS, expVal4NoTS := expVal1, expVal4
   238  	expVal1NoTS.Timestamp, expVal4NoTS.Timestamp = hlc.Timestamp{}, hlc.Timestamp{}
   239  	expEvents = append(expEvents, []*roachpb.RangeFeedEvent{
   240  		{Val: &roachpb.RangeFeedValue{
   241  			Key: roachpb.Key("c"), Value: expVal2,
   242  		}},
   243  		{Val: &roachpb.RangeFeedValue{
   244  			Key: roachpb.Key("m"), Value: expVal3,
   245  		}},
   246  		{Val: &roachpb.RangeFeedValue{
   247  			Key: roachpb.Key("b"), Value: expVal4, PrevValue: expVal1NoTS,
   248  		}},
   249  		{Val: &roachpb.RangeFeedValue{
   250  			Key: roachpb.Key("b"), Value: expVal5, PrevValue: expVal4NoTS,
   251  		}},
   252  	}...)
   253  	checkForExpEvents(expEvents)
   254  
   255  	// Cancel each of the rangefeed streams.
   256  	for _, stream := range streams {
   257  		stream.Cancel()
   258  
   259  		pErr := <-streamErrC
   260  		if !testutils.IsPError(pErr, "context canceled") {
   261  			t.Fatalf("got error for RangeFeed: %v", pErr)
   262  		}
   263  	}
   264  
   265  	// Bump the GC threshold and assert that RangeFeed below the timestamp will
   266  	// catch an error.
   267  	gcReq := &roachpb.GCRequest{
   268  		Threshold: initTime.Add(0, 1),
   269  	}
   270  	gcReq.Key = roachpb.Key(startKey)
   271  	gcReq.EndKey = mtc.Store(0).LookupReplica(startKey).Desc().EndKey.AsRawKey()
   272  	var ba roachpb.BatchRequest
   273  	ba.RangeID = rangeID
   274  	ba.Add(gcReq)
   275  	if _, pErr := mtc.Store(0).Send(ctx, ba); pErr != nil {
   276  		t.Fatal(pErr)
   277  	}
   278  
   279  	req := roachpb.RangeFeedRequest{
   280  		Header: roachpb.Header{
   281  			Timestamp: initTime,
   282  			RangeID:   rangeID,
   283  		},
   284  		Span: roachpb.Span{Key: roachpb.Key("a"), EndKey: roachpb.Key("z")},
   285  	}
   286  
   287  	testutils.SucceedsSoon(t, func() error {
   288  		for i := 0; i < replNum; i++ {
   289  			repl := mtc.Store(i).LookupReplica(startKey)
   290  			if repl == nil {
   291  				return errors.Errorf("replica not found on node #%d", i+1)
   292  			}
   293  			if cur := repl.GetGCThreshold(); cur.Less(gcReq.Threshold) {
   294  				return errors.Errorf("%s has GCThreshold %s < %s; hasn't applied the bump yet", repl, cur, gcReq.Threshold)
   295  			}
   296  			stream := newTestStream()
   297  			timer := time.AfterFunc(10*time.Second, stream.Cancel)
   298  			defer timer.Stop()
   299  			defer stream.Cancel()
   300  
   301  			if pErr := mtc.Store(i).RangeFeed(&req, stream); !testutils.IsPError(
   302  				pErr, `must be after replica GC threshold`,
   303  			) {
   304  				return pErr.GoError()
   305  			}
   306  		}
   307  		return nil
   308  	})
   309  }
   310  
   311  func TestReplicaRangefeedExpiringLeaseError(t *testing.T) {
   312  	defer leaktest.AfterTest(t)()
   313  
   314  	sc := kvserver.TestStoreConfig(nil)
   315  	sc.Clock = nil // manual clock
   316  	kvserver.RangefeedEnabled.Override(&sc.Settings.SV, true)
   317  	mtc := &multiTestContext{
   318  		storeConfig: &sc,
   319  		// This test was written before the multiTestContext started creating many
   320  		// system ranges at startup, and hasn't been update to take that into
   321  		// account.
   322  		startWithSingleRange: true,
   323  	}
   324  	defer mtc.Stop()
   325  	mtc.Start(t, 1)
   326  
   327  	// Establish a rangefeed on the replica we plan to remove.
   328  	stream := newTestStream()
   329  	req := roachpb.RangeFeedRequest{
   330  		Header: roachpb.Header{
   331  			RangeID: 1,
   332  		},
   333  		Span: roachpb.Span{Key: roachpb.Key("a"), EndKey: roachpb.Key("z")},
   334  	}
   335  
   336  	// Cancel the stream's context so that RangeFeed would return
   337  	// immediately even if it didn't return the correct error.
   338  	stream.Cancel()
   339  
   340  	pErr := mtc.Store(0).RangeFeed(&req, stream)
   341  	const exp = "expiration-based leases are incompatible with rangefeeds"
   342  	if !testutils.IsPError(pErr, exp) {
   343  		t.Errorf("expected error %q, found %v", exp, pErr)
   344  	}
   345  }
   346  
   347  func TestReplicaRangefeedRetryErrors(t *testing.T) {
   348  	defer leaktest.AfterTest(t)()
   349  	ctx := context.Background()
   350  
   351  	startKey := []byte("a")
   352  	setup := func(subT *testing.T) (*multiTestContext, roachpb.RangeID) {
   353  		subT.Helper()
   354  		sc := kvserver.TestStoreConfig(nil)
   355  		sc.Clock = nil // manual clock
   356  		kvserver.RangefeedEnabled.Override(&sc.Settings.SV, true)
   357  		mtc := &multiTestContext{
   358  			storeConfig: &sc,
   359  			// This test was written before the multiTestContext started creating many
   360  			// system ranges at startup, and hasn't been update to take that into
   361  			// account.
   362  			startWithSingleRange: true,
   363  		}
   364  		mtc.Start(subT, 3)
   365  		mtc.replicateRange(1, 1, 2)
   366  
   367  		// Split the range so that the RHS uses epoch-based leases.
   368  		splitArgs := adminSplitArgs(startKey)
   369  		if _, pErr := kv.SendWrapped(ctx, mtc.distSenders[0], splitArgs); pErr != nil {
   370  			subT.Fatalf("split saw unexpected error: %v", pErr)
   371  		}
   372  		rangeID := mtc.Store(0).LookupReplica(startKey).RangeID
   373  
   374  		// Write to the RHS of the split and wait for all replicas to process it.
   375  		// This ensures that all replicas have seen the split before we move on.
   376  		incArgs := incrementArgs(roachpb.Key("a"), 9)
   377  		if _, pErr := kv.SendWrapped(ctx, mtc.distSenders[0], incArgs); pErr != nil {
   378  			t.Fatal(pErr)
   379  		}
   380  		mtc.waitForValues(roachpb.Key("a"), []int64{9, 9, 9})
   381  
   382  		return mtc, rangeID
   383  	}
   384  
   385  	waitForInitialCheckpointAcrossSpan := func(
   386  		subT *testing.T, stream *testStream, streamErrC <-chan *roachpb.Error, span roachpb.Span,
   387  	) {
   388  		subT.Helper()
   389  		expEvents := []*roachpb.RangeFeedEvent{
   390  			{Checkpoint: &roachpb.RangeFeedCheckpoint{
   391  				Span:       span,
   392  				ResolvedTS: hlc.Timestamp{},
   393  			}},
   394  		}
   395  		var events []*roachpb.RangeFeedEvent
   396  		testutils.SucceedsSoon(t, func() error {
   397  			if len(streamErrC) > 0 {
   398  				// Break if the error channel is already populated.
   399  				return nil
   400  			}
   401  
   402  			events = stream.Events()
   403  			if len(events) < len(expEvents) {
   404  				return errors.Errorf("too few events: %v", events)
   405  			}
   406  			return nil
   407  		})
   408  		if len(streamErrC) > 0 {
   409  			subT.Fatalf("unexpected error from stream: %v", <-streamErrC)
   410  		}
   411  		if !reflect.DeepEqual(events, expEvents) {
   412  			subT.Fatalf("incorrect events on stream, found %v, want %v", events, expEvents)
   413  		}
   414  	}
   415  
   416  	assertRangefeedRetryErr := func(
   417  		subT *testing.T, pErr *roachpb.Error, expReason roachpb.RangeFeedRetryError_Reason,
   418  	) {
   419  		subT.Helper()
   420  		expErr := roachpb.NewRangeFeedRetryError(expReason)
   421  		if pErr == nil {
   422  			subT.Fatalf("got nil error for RangeFeed: expecting %v", expErr)
   423  		}
   424  		rfErr, ok := pErr.GetDetail().(*roachpb.RangeFeedRetryError)
   425  		if !ok {
   426  			subT.Fatalf("got incorrect error for RangeFeed: %v; expecting %v", pErr, expErr)
   427  		}
   428  		if rfErr.Reason != expReason {
   429  			subT.Fatalf("got incorrect RangeFeedRetryError reason for RangeFeed: %v; expecting %v",
   430  				rfErr.Reason, expReason)
   431  		}
   432  	}
   433  
   434  	t.Run(roachpb.RangeFeedRetryError_REASON_REPLICA_REMOVED.String(), func(t *testing.T) {
   435  		const removeStore = 2
   436  		mtc, rangeID := setup(t)
   437  		defer mtc.Stop()
   438  
   439  		// Establish a rangefeed on the replica we plan to remove.
   440  		stream := newTestStream()
   441  		streamErrC := make(chan *roachpb.Error, 1)
   442  		rangefeedSpan := roachpb.Span{Key: roachpb.Key("a"), EndKey: roachpb.Key("z")}
   443  		go func() {
   444  			req := roachpb.RangeFeedRequest{
   445  				Header: roachpb.Header{
   446  					RangeID: rangeID,
   447  				},
   448  				Span: rangefeedSpan,
   449  			}
   450  
   451  			pErr := mtc.Store(removeStore).RangeFeed(&req, stream)
   452  			streamErrC <- pErr
   453  		}()
   454  
   455  		// Wait for the first checkpoint event.
   456  		waitForInitialCheckpointAcrossSpan(t, stream, streamErrC, rangefeedSpan)
   457  
   458  		// Remove the replica from the range.
   459  		mtc.unreplicateRange(rangeID, removeStore)
   460  
   461  		// Check the error.
   462  		pErr := <-streamErrC
   463  		assertRangefeedRetryErr(t, pErr, roachpb.RangeFeedRetryError_REASON_REPLICA_REMOVED)
   464  	})
   465  	t.Run(roachpb.RangeFeedRetryError_REASON_RANGE_SPLIT.String(), func(t *testing.T) {
   466  		mtc, rangeID := setup(t)
   467  		defer mtc.Stop()
   468  
   469  		// Establish a rangefeed on the replica we plan to split.
   470  		stream := newTestStream()
   471  		streamErrC := make(chan *roachpb.Error, 1)
   472  		rangefeedSpan := roachpb.Span{Key: roachpb.Key("a"), EndKey: roachpb.Key("z")}
   473  		go func() {
   474  			req := roachpb.RangeFeedRequest{
   475  				Header: roachpb.Header{
   476  					RangeID: rangeID,
   477  				},
   478  				Span: rangefeedSpan,
   479  			}
   480  
   481  			pErr := mtc.Store(0).RangeFeed(&req, stream)
   482  			streamErrC <- pErr
   483  		}()
   484  
   485  		// Wait for the first checkpoint event.
   486  		waitForInitialCheckpointAcrossSpan(t, stream, streamErrC, rangefeedSpan)
   487  
   488  		// Split the range.
   489  		args := adminSplitArgs([]byte("m"))
   490  		if _, pErr := kv.SendWrapped(ctx, mtc.distSenders[0], args); pErr != nil {
   491  			t.Fatalf("split saw unexpected error: %v", pErr)
   492  		}
   493  
   494  		// Check the error.
   495  		pErr := <-streamErrC
   496  		assertRangefeedRetryErr(t, pErr, roachpb.RangeFeedRetryError_REASON_RANGE_SPLIT)
   497  	})
   498  	t.Run(roachpb.RangeFeedRetryError_REASON_RANGE_MERGED.String(), func(t *testing.T) {
   499  		mtc, rangeID := setup(t)
   500  		defer mtc.Stop()
   501  
   502  		// Split the range.
   503  		splitKey := []byte("m")
   504  		splitArgs := adminSplitArgs(splitKey)
   505  		if _, pErr := kv.SendWrapped(ctx, mtc.distSenders[0], splitArgs); pErr != nil {
   506  			t.Fatalf("split saw unexpected error: %v", pErr)
   507  		}
   508  		rightRangeID := mtc.Store(0).LookupReplica(splitKey).RangeID
   509  
   510  		// Write to the RHS of the split and wait for all replicas to process it.
   511  		// This ensures that all replicas have seen the split before we move on.
   512  		incArgs := incrementArgs(roachpb.Key("n"), 9)
   513  		if _, pErr := kv.SendWrapped(ctx, mtc.distSenders[0], incArgs); pErr != nil {
   514  			t.Fatal(pErr)
   515  		}
   516  		mtc.waitForValues(roachpb.Key("n"), []int64{9, 9, 9})
   517  
   518  		// Establish a rangefeed on the left replica.
   519  		streamLeft := newTestStream()
   520  		streamLeftErrC := make(chan *roachpb.Error, 1)
   521  		rangefeedLeftSpan := roachpb.Span{Key: roachpb.Key("a"), EndKey: splitKey}
   522  		go func() {
   523  			req := roachpb.RangeFeedRequest{
   524  				Header: roachpb.Header{
   525  					RangeID: rangeID,
   526  				},
   527  				Span: rangefeedLeftSpan,
   528  			}
   529  
   530  			pErr := mtc.Store(0).RangeFeed(&req, streamLeft)
   531  			streamLeftErrC <- pErr
   532  		}()
   533  
   534  		// Establish a rangefeed on the right replica.
   535  		streamRight := newTestStream()
   536  		streamRightErrC := make(chan *roachpb.Error, 1)
   537  		rangefeedRightSpan := roachpb.Span{Key: splitKey, EndKey: roachpb.Key("z")}
   538  		go func() {
   539  			req := roachpb.RangeFeedRequest{
   540  				Header: roachpb.Header{
   541  					RangeID: rightRangeID,
   542  				},
   543  				Span: rangefeedRightSpan,
   544  			}
   545  
   546  			pErr := mtc.Store(0).RangeFeed(&req, streamRight)
   547  			streamRightErrC <- pErr
   548  		}()
   549  
   550  		// Wait for the first checkpoint event on each stream.
   551  		waitForInitialCheckpointAcrossSpan(t, streamLeft, streamLeftErrC, rangefeedLeftSpan)
   552  		waitForInitialCheckpointAcrossSpan(t, streamRight, streamRightErrC, rangefeedRightSpan)
   553  
   554  		// Merge the ranges back together
   555  		mergeArgs := adminMergeArgs(startKey)
   556  		if _, pErr := kv.SendWrapped(ctx, mtc.distSenders[0], mergeArgs); pErr != nil {
   557  			t.Fatalf("merge saw unexpected error: %v", pErr)
   558  		}
   559  
   560  		// Check the errors.
   561  		pErrLeft, pErrRight := <-streamLeftErrC, <-streamRightErrC
   562  		assertRangefeedRetryErr(t, pErrLeft, roachpb.RangeFeedRetryError_REASON_RANGE_MERGED)
   563  		assertRangefeedRetryErr(t, pErrRight, roachpb.RangeFeedRetryError_REASON_RANGE_MERGED)
   564  	})
   565  	t.Run(roachpb.RangeFeedRetryError_REASON_RAFT_SNAPSHOT.String(), func(t *testing.T) {
   566  		mtc, rangeID := setup(t)
   567  		defer mtc.Stop()
   568  		partitionStore := mtc.Store(2)
   569  
   570  		mtc.stores[0].SetReplicaGCQueueActive(false)
   571  		mtc.stores[1].SetReplicaGCQueueActive(false)
   572  		mtc.stores[2].SetReplicaGCQueueActive(false)
   573  
   574  		// Establish a rangefeed on the replica we plan to partition.
   575  		stream := newTestStream()
   576  		streamErrC := make(chan *roachpb.Error, 1)
   577  		rangefeedSpan := roachpb.Span{Key: roachpb.Key("a"), EndKey: roachpb.Key("z")}
   578  		go func() {
   579  			req := roachpb.RangeFeedRequest{
   580  				Header: roachpb.Header{
   581  					RangeID: rangeID,
   582  				},
   583  				Span: rangefeedSpan,
   584  			}
   585  
   586  			timer := time.AfterFunc(10*time.Second, stream.Cancel)
   587  			defer timer.Stop()
   588  
   589  			pErr := partitionStore.RangeFeed(&req, stream)
   590  			streamErrC <- pErr
   591  		}()
   592  
   593  		// Wait for the first checkpoint event.
   594  		waitForInitialCheckpointAcrossSpan(t, stream, streamErrC, rangefeedSpan)
   595  
   596  		// Force the leader off the replica on partitionedStore. If it's the
   597  		// leader, this test will fall over when it cuts the replica off from
   598  		// Raft traffic.
   599  		testutils.SucceedsSoon(t, func() error {
   600  			repl, err := partitionStore.GetReplica(rangeID)
   601  			if err != nil {
   602  				return err
   603  			}
   604  			raftStatus := repl.RaftStatus()
   605  			if raftStatus != nil && raftStatus.RaftState == raft.StateFollower {
   606  				return nil
   607  			}
   608  			err = repl.AdminTransferLease(ctx, roachpb.StoreID(1))
   609  			return errors.Errorf("not raft follower: %+v, transferred lease: %v", raftStatus, err)
   610  		})
   611  
   612  		// Partition the replica from the rest of its range.
   613  		mtc.transport.Listen(partitionStore.Ident.StoreID, &unreliableRaftHandler{
   614  			rangeID:            rangeID,
   615  			RaftMessageHandler: partitionStore,
   616  		})
   617  
   618  		// Perform a write on the range.
   619  		pArgs := putArgs(roachpb.Key("c"), []byte("val2"))
   620  		if _, pErr := kv.SendWrapped(ctx, mtc.distSenders[0], pArgs); pErr != nil {
   621  			t.Fatal(pErr)
   622  		}
   623  
   624  		// Get that command's log index.
   625  		repl, err := mtc.Store(0).GetReplica(rangeID)
   626  		if err != nil {
   627  			t.Fatal(err)
   628  		}
   629  		index, err := repl.GetLastIndex()
   630  		if err != nil {
   631  			t.Fatal(err)
   632  		}
   633  
   634  		// Truncate the log at index+1 (log entries < N are removed, so this
   635  		// includes the put). This necessitates a snapshot when the partitioned
   636  		// replica rejoins the rest of the range.
   637  		truncArgs := truncateLogArgs(index+1, rangeID)
   638  		truncArgs.Key = startKey
   639  		if _, err := kv.SendWrapped(ctx, mtc.distSenders[0], truncArgs); err != nil {
   640  			t.Fatal(err)
   641  		}
   642  
   643  		// Remove the partition. Snapshot should follow.
   644  		mtc.transport.Listen(partitionStore.Ident.StoreID, &unreliableRaftHandler{
   645  			rangeID:            rangeID,
   646  			RaftMessageHandler: partitionStore,
   647  			unreliableRaftHandlerFuncs: unreliableRaftHandlerFuncs{
   648  				dropReq: func(req *kvserver.RaftMessageRequest) bool {
   649  					// Make sure that even going forward no MsgApp for what we just truncated can
   650  					// make it through. The Raft transport is asynchronous so this is necessary
   651  					// to make the test pass reliably.
   652  					// NB: the Index on the message is the log index that _precedes_ any of the
   653  					// entries in the MsgApp, so filter where msg.Index < index, not <= index.
   654  					return req.Message.Type == raftpb.MsgApp && req.Message.Index < index
   655  				},
   656  				dropHB:   func(*kvserver.RaftHeartbeat) bool { return false },
   657  				dropResp: func(*kvserver.RaftMessageResponse) bool { return false },
   658  			},
   659  		})
   660  
   661  		// Check the error.
   662  		pErr := <-streamErrC
   663  		assertRangefeedRetryErr(t, pErr, roachpb.RangeFeedRetryError_REASON_RAFT_SNAPSHOT)
   664  	})
   665  	t.Run(roachpb.RangeFeedRetryError_REASON_LOGICAL_OPS_MISSING.String(), func(t *testing.T) {
   666  		mtc, _ := setup(t)
   667  		defer mtc.Stop()
   668  
   669  		// Split the range so that the RHS is not a system range and thus will
   670  		// respect the rangefeed_enabled cluster setting.
   671  		startKey := keys.UserTableDataMin
   672  		splitArgs := adminSplitArgs(startKey)
   673  		if _, pErr := kv.SendWrapped(ctx, mtc.distSenders[0], splitArgs); pErr != nil {
   674  			t.Fatalf("split saw unexpected error: %v", pErr)
   675  		}
   676  		rightRangeID := mtc.Store(0).LookupReplica(roachpb.RKey(startKey)).RangeID
   677  
   678  		// Establish a rangefeed.
   679  		stream := newTestStream()
   680  		streamErrC := make(chan *roachpb.Error, 1)
   681  
   682  		endKey := keys.TableDataMax
   683  		rangefeedSpan := roachpb.Span{Key: startKey, EndKey: endKey}
   684  		go func() {
   685  			req := roachpb.RangeFeedRequest{
   686  				Header: roachpb.Header{
   687  					RangeID: rightRangeID,
   688  				},
   689  				Span: rangefeedSpan,
   690  			}
   691  
   692  			pErr := mtc.Store(0).RangeFeed(&req, stream)
   693  			streamErrC <- pErr
   694  		}()
   695  
   696  		// Wait for the first checkpoint event.
   697  		waitForInitialCheckpointAcrossSpan(t, stream, streamErrC, rangefeedSpan)
   698  
   699  		// Disable rangefeeds, which stops logical op logs from being provided
   700  		// with Raft commands.
   701  		kvserver.RangefeedEnabled.Override(&mtc.storeConfig.Settings.SV, false)
   702  
   703  		// Perform a write on the range.
   704  		writeKey := encoding.EncodeStringAscending(keys.SystemSQLCodec.TablePrefix(55), "c")
   705  		pArgs := putArgs(writeKey, []byte("val2"))
   706  		if _, pErr := kv.SendWrapped(ctx, mtc.distSenders[0], pArgs); pErr != nil {
   707  			t.Fatal(pErr)
   708  		}
   709  
   710  		// Check the error.
   711  		pErr := <-streamErrC
   712  		assertRangefeedRetryErr(t, pErr, roachpb.RangeFeedRetryError_REASON_LOGICAL_OPS_MISSING)
   713  	})
   714  }
   715  
   716  // TestReplicaRangefeedPushesTransactions tests that rangefeed detects intents
   717  // that are holding up its resolved timestamp and periodically pushes them to
   718  // ensure that its resolved timestamp continues to advance.
   719  func TestReplicaRangefeedPushesTransactions(t *testing.T) {
   720  	defer leaktest.AfterTest(t)()
   721  
   722  	ctx := context.Background()
   723  	tc, db, _, repls := setupTestClusterForClosedTimestampTesting(ctx, t, testingTargetDuration)
   724  	defer tc.Stopper().Stop(ctx)
   725  
   726  	sqlDB := sqlutils.MakeSQLRunner(db)
   727  	sqlDB.Exec(t, `SET CLUSTER SETTING kv.rangefeed.enabled = true`)
   728  	// While we're here, drop the target duration. This was set to
   729  	// testingTargetDuration above, but this is higher then it needs to be now
   730  	// that cluster and schema setup is complete.
   731  	sqlDB.Exec(t, `SET CLUSTER SETTING kv.closed_timestamp.target_duration = '10ms'`)
   732  
   733  	// Make sure all the nodes have gotten the rangefeed enabled setting from
   734  	// gossip, so that they will immediately be able to accept RangeFeeds. The
   735  	// target_duration one is just to speed up the test, we don't care if it has
   736  	// propagated everywhere yet.
   737  	testutils.SucceedsSoon(t, func() error {
   738  		for i := 0; i < tc.NumServers(); i++ {
   739  			var enabled bool
   740  			if err := tc.ServerConn(i).QueryRow(
   741  				`SHOW CLUSTER SETTING kv.rangefeed.enabled`,
   742  			).Scan(&enabled); err != nil {
   743  				return err
   744  			}
   745  			if !enabled {
   746  				return errors.Errorf(`waiting for rangefeed to be enabled on node %d`, i)
   747  			}
   748  		}
   749  		return nil
   750  	})
   751  
   752  	ts1 := tc.Server(0).Clock().Now()
   753  	rangeFeedCtx, rangeFeedCancel := context.WithCancel(ctx)
   754  	defer rangeFeedCancel()
   755  	rangeFeedChs := make([]chan *roachpb.RangeFeedEvent, len(repls))
   756  	rangeFeedErrC := make(chan error, len(repls))
   757  	for i := range repls {
   758  		desc := repls[i].Desc()
   759  		ds := tc.Server(i).DistSenderI().(*kvcoord.DistSender)
   760  		rangeFeedCh := make(chan *roachpb.RangeFeedEvent)
   761  		rangeFeedChs[i] = rangeFeedCh
   762  		go func() {
   763  			span := roachpb.Span{
   764  				Key: desc.StartKey.AsRawKey(), EndKey: desc.EndKey.AsRawKey(),
   765  			}
   766  			rangeFeedErrC <- ds.RangeFeed(rangeFeedCtx, span, ts1, false /* withDiff */, rangeFeedCh)
   767  		}()
   768  	}
   769  
   770  	// Wait for a RangeFeed checkpoint on each RangeFeed after the RangeFeed
   771  	// initial scan time (which is the timestamp passed in the request) to make
   772  	// sure everything is set up. We intentionally don't care about the spans in
   773  	// the checkpoints, just verifying that something has made it past the
   774  	// initial scan and is running.
   775  	waitForCheckpoint := func(ts hlc.Timestamp) {
   776  		t.Helper()
   777  		for _, rangeFeedCh := range rangeFeedChs {
   778  			checkpointed := false
   779  			for !checkpointed {
   780  				select {
   781  				case event := <-rangeFeedCh:
   782  					if c := event.Checkpoint; c != nil && ts.Less(c.ResolvedTS) {
   783  						checkpointed = true
   784  					}
   785  				case err := <-rangeFeedErrC:
   786  					t.Fatal(err)
   787  				}
   788  			}
   789  		}
   790  	}
   791  	waitForCheckpoint(ts1)
   792  
   793  	// Start a transaction and write an intent on the range. This intent would
   794  	// prevent from the rangefeed's resolved timestamp from advancing. To get
   795  	// around this, the rangefeed periodically pushes all intents on its range
   796  	// to higher timestamps.
   797  	tx1, err := db.BeginTx(ctx, nil)
   798  	require.NoError(t, err)
   799  	_, err = tx1.ExecContext(ctx, "INSERT INTO cttest.kv VALUES (1, 'test')")
   800  	require.NoError(t, err)
   801  
   802  	// Read the current transaction timestamp. This prevents the txn from committing
   803  	// if it ever gets pushed.
   804  	var ts2Str string
   805  	require.NoError(t, tx1.QueryRowContext(ctx, "SELECT cluster_logical_timestamp()").Scan(&ts2Str))
   806  	ts2, err := sql.ParseHLC(ts2Str)
   807  	require.NoError(t, err)
   808  
   809  	// Wait for the RangeFeed checkpoint on each RangeFeed to exceed this timestamp.
   810  	// For this to be possible, it must push the transaction's timestamp forward.
   811  	waitForCheckpoint(ts2)
   812  
   813  	// The txn should not be able to commit since its commit timestamp was pushed
   814  	// and it has observed its timestamp.
   815  	require.Regexp(t, "TransactionRetryError: retry txn", tx1.Commit())
   816  
   817  	// Make sure the RangeFeed hasn't errored yet.
   818  	select {
   819  	case err := <-rangeFeedErrC:
   820  		t.Fatal(err)
   821  	default:
   822  	}
   823  	// Now cancel it and wait for it to shut down.
   824  	rangeFeedCancel()
   825  }
   826  
   827  // TestReplicaRangefeedNudgeSlowClosedTimestamp tests that rangefeed detects
   828  // that its closed timestamp updates have stalled and requests new information
   829  // from its Range's leaseholder. This is a regression test for #35142.
   830  func TestReplicaRangefeedNudgeSlowClosedTimestamp(t *testing.T) {
   831  	defer leaktest.AfterTest(t)()
   832  
   833  	ctx := context.Background()
   834  	tc, db, desc, repls := setupTestClusterForClosedTimestampTesting(ctx, t, testingTargetDuration)
   835  	defer tc.Stopper().Stop(ctx)
   836  
   837  	sqlDB := sqlutils.MakeSQLRunner(db)
   838  	sqlDB.Exec(t, `SET CLUSTER SETTING kv.rangefeed.enabled = true`)
   839  	// While we're here, drop the target duration. This was set to
   840  	// testingTargetDuration above, but this is higher then it needs to be now
   841  	// that cluster and schema setup is complete.
   842  	sqlDB.Exec(t, `SET CLUSTER SETTING kv.closed_timestamp.target_duration = '10ms'`)
   843  
   844  	// Make sure all the nodes have gotten the rangefeed enabled setting from
   845  	// gossip, so that they will immediately be able to accept RangeFeeds. The
   846  	// target_duration one is just to speed up the test, we don't care if it has
   847  	// propagated everywhere yet.
   848  	testutils.SucceedsSoon(t, func() error {
   849  		for i := 0; i < tc.NumServers(); i++ {
   850  			var enabled bool
   851  			if err := tc.ServerConn(i).QueryRow(
   852  				`SHOW CLUSTER SETTING kv.rangefeed.enabled`,
   853  			).Scan(&enabled); err != nil {
   854  				return err
   855  			}
   856  			if !enabled {
   857  				return errors.Errorf(`waiting for rangefeed to be enabled on node %d`, i)
   858  			}
   859  		}
   860  		return nil
   861  	})
   862  
   863  	ts1 := tc.Server(0).Clock().Now()
   864  	rangeFeedCtx, rangeFeedCancel := context.WithCancel(ctx)
   865  	defer rangeFeedCancel()
   866  	rangeFeedChs := make([]chan *roachpb.RangeFeedEvent, len(repls))
   867  	rangeFeedErrC := make(chan error, len(repls))
   868  	for i := range repls {
   869  		ds := tc.Server(i).DistSenderI().(*kvcoord.DistSender)
   870  		rangeFeedCh := make(chan *roachpb.RangeFeedEvent)
   871  		rangeFeedChs[i] = rangeFeedCh
   872  		go func() {
   873  			span := roachpb.Span{
   874  				Key: desc.StartKey.AsRawKey(), EndKey: desc.EndKey.AsRawKey(),
   875  			}
   876  			rangeFeedErrC <- ds.RangeFeed(rangeFeedCtx, span, ts1, false /* withDiff */, rangeFeedCh)
   877  		}()
   878  	}
   879  
   880  	// Wait for a RangeFeed checkpoint on each RangeFeed after the RangeFeed
   881  	// initial scan time (which is the timestamp passed in the request) to make
   882  	// sure everything is set up. We intentionally don't care about the spans in
   883  	// the checkpoints, just verifying that something has made it past the
   884  	// initial scan and is running.
   885  	waitForCheckpoint := func(ts hlc.Timestamp) {
   886  		t.Helper()
   887  		for _, rangeFeedCh := range rangeFeedChs {
   888  			checkpointed := false
   889  			for !checkpointed {
   890  				select {
   891  				case event := <-rangeFeedCh:
   892  					if c := event.Checkpoint; c != nil && ts.Less(c.ResolvedTS) {
   893  						checkpointed = true
   894  					}
   895  				case err := <-rangeFeedErrC:
   896  					t.Fatal(err)
   897  				}
   898  			}
   899  		}
   900  	}
   901  	waitForCheckpoint(ts1)
   902  
   903  	// Clear the closed timestamp storage on each server. This simulates the case
   904  	// where a closed timestamp message is lost or a node restarts. To recover,
   905  	// the servers will need to request an update from the leaseholder.
   906  	for i := 0; i < tc.NumServers(); i++ {
   907  		stores := tc.Server(i).GetStores().(*kvserver.Stores)
   908  		err := stores.VisitStores(func(s *kvserver.Store) error {
   909  			s.ClearClosedTimestampStorage()
   910  			return nil
   911  		})
   912  		require.NoError(t, err)
   913  	}
   914  
   915  	// Wait for another RangeFeed checkpoint after the store was cleared. Without
   916  	// RangeFeed nudging closed timestamps, this doesn't happen on its own. Again,
   917  	// we intentionally don't care about the spans in the checkpoints, just
   918  	// verifying that something has made it past the cleared time.
   919  	ts2 := tc.Server(0).Clock().Now()
   920  	waitForCheckpoint(ts2)
   921  
   922  	// Make sure the RangeFeed hasn't errored yet.
   923  	select {
   924  	case err := <-rangeFeedErrC:
   925  		t.Fatal(err)
   926  	default:
   927  	}
   928  	// Now cancel it and wait for it to shut down.
   929  	rangeFeedCancel()
   930  }