github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/kv/kvserver/replicate_queue_test.go (about)

     1  // Copyright 2016 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package kvserver_test
    12  
    13  import (
    14  	"context"
    15  	gosql "database/sql"
    16  	"encoding/json"
    17  	"fmt"
    18  	"math"
    19  	"strings"
    20  	"testing"
    21  	"time"
    22  
    23  	"github.com/cockroachdb/cockroach/pkg/base"
    24  	"github.com/cockroachdb/cockroach/pkg/config/zonepb"
    25  	"github.com/cockroachdb/cockroach/pkg/keys"
    26  	"github.com/cockroachdb/cockroach/pkg/kv/kvserver"
    27  	"github.com/cockroachdb/cockroach/pkg/kv/kvserver/kvserverpb"
    28  	"github.com/cockroachdb/cockroach/pkg/roachpb"
    29  	"github.com/cockroachdb/cockroach/pkg/server"
    30  	"github.com/cockroachdb/cockroach/pkg/testutils"
    31  	"github.com/cockroachdb/cockroach/pkg/testutils/sqlutils"
    32  	"github.com/cockroachdb/cockroach/pkg/testutils/testcluster"
    33  	"github.com/cockroachdb/cockroach/pkg/util"
    34  	"github.com/cockroachdb/cockroach/pkg/util/leaktest"
    35  	"github.com/cockroachdb/cockroach/pkg/util/log"
    36  	"github.com/cockroachdb/errors"
    37  	"github.com/gogo/protobuf/proto"
    38  	"github.com/stretchr/testify/require"
    39  	"go.etcd.io/etcd/raft/tracker"
    40  )
    41  
    42  func TestReplicateQueueRebalance(t *testing.T) {
    43  	defer leaktest.AfterTest(t)()
    44  
    45  	if util.RaceEnabled {
    46  		// This test was seen taking north of 20m under race.
    47  		t.Skip("too heavyweight for race")
    48  	}
    49  
    50  	testutils.RunTrueAndFalse(t, "atomic", func(t *testing.T, atomic bool) {
    51  		testReplicateQueueRebalanceInner(t, atomic)
    52  	})
    53  }
    54  
    55  func testReplicateQueueRebalanceInner(t *testing.T, atomic bool) {
    56  	if testing.Short() {
    57  		t.Skip("short flag")
    58  	}
    59  
    60  	const numNodes = 5
    61  
    62  	tc := testcluster.StartTestCluster(t, numNodes,
    63  		base.TestClusterArgs{
    64  			ReplicationMode: base.ReplicationAuto,
    65  			ServerArgs: base.TestServerArgs{
    66  				ScanMinIdleTime: time.Millisecond,
    67  				ScanMaxIdleTime: time.Millisecond,
    68  			},
    69  		},
    70  	)
    71  	defer tc.Stopper().Stop(context.Background())
    72  
    73  	for _, server := range tc.Servers {
    74  		st := server.ClusterSettings()
    75  		st.Manual.Store(true)
    76  		kvserver.LoadBasedRebalancingMode.Override(&st.SV, int64(kvserver.LBRebalancingOff))
    77  		// NB: usually it's preferred to set the cluster settings, but this is less
    78  		// boilerplate than setting it and then waiting for all nodes to have it.
    79  		kvserver.UseAtomicReplicationChanges.Override(&st.SV, atomic)
    80  	}
    81  
    82  	const newRanges = 10
    83  	trackedRanges := map[roachpb.RangeID]struct{}{}
    84  	for i := 0; i < newRanges; i++ {
    85  		tableID := keys.MinUserDescID + i
    86  		splitKey := keys.SystemSQLCodec.TablePrefix(uint32(tableID))
    87  		// Retry the splits on descriptor errors which are likely as the replicate
    88  		// queue is already hard at work.
    89  		testutils.SucceedsSoon(t, func() error {
    90  			desc := tc.LookupRangeOrFatal(t, splitKey)
    91  			if i > 0 && len(desc.Replicas().Voters()) > 3 {
    92  				// Some system ranges have five replicas but user ranges only three,
    93  				// so we'll see downreplications early in the startup process which
    94  				// we want to ignore. Delay the splits so that we don't create
    95  				// more over-replicated ranges.
    96  				// We don't do this for i=0 since that range stays at five replicas.
    97  				return errors.Errorf("still downreplicating: %s", &desc)
    98  			}
    99  			_, rightDesc, err := tc.SplitRange(splitKey)
   100  			if err != nil {
   101  				return err
   102  			}
   103  			t.Logf("split off %s", &rightDesc)
   104  			if i > 0 {
   105  				trackedRanges[rightDesc.RangeID] = struct{}{}
   106  			}
   107  			return nil
   108  		})
   109  	}
   110  
   111  	countReplicas := func() []int {
   112  		counts := make([]int, len(tc.Servers))
   113  		for _, s := range tc.Servers {
   114  			err := s.Stores().VisitStores(func(s *kvserver.Store) error {
   115  				counts[s.StoreID()-1] += s.ReplicaCount()
   116  				return nil
   117  			})
   118  			if err != nil {
   119  				t.Fatal(err)
   120  			}
   121  		}
   122  		return counts
   123  	}
   124  
   125  	initialRanges, err := server.ExpectedInitialRangeCount(tc.Servers[0].DB(), zonepb.DefaultZoneConfigRef(), zonepb.DefaultSystemZoneConfigRef())
   126  	if err != nil {
   127  		t.Fatal(err)
   128  	}
   129  	numRanges := newRanges + initialRanges
   130  	numReplicas := numRanges * 3
   131  	const minThreshold = 0.9
   132  	minReplicas := int(math.Floor(minThreshold * (float64(numReplicas) / numNodes)))
   133  
   134  	testutils.SucceedsSoon(t, func() error {
   135  		counts := countReplicas()
   136  		for _, c := range counts {
   137  			if c < minReplicas {
   138  				err := errors.Errorf(
   139  					"not balanced (want at least %d replicas on all stores): %d", minReplicas, counts)
   140  				log.Infof(context.Background(), "%v", err)
   141  				return err
   142  			}
   143  		}
   144  		return nil
   145  	})
   146  
   147  	// Query the range log to see if anything unexpected happened. Concretely,
   148  	// we'll make sure that our tracked ranges never had >3 replicas.
   149  	infos, err := queryRangeLog(tc.Conns[0], `SELECT info FROM system.rangelog ORDER BY timestamp DESC`)
   150  	require.NoError(t, err)
   151  	for _, info := range infos {
   152  		if _, ok := trackedRanges[info.UpdatedDesc.RangeID]; !ok || len(info.UpdatedDesc.Replicas().Voters()) <= 3 {
   153  			continue
   154  		}
   155  		// If we have atomic changes enabled, we expect to never see four replicas
   156  		// on our tracked ranges. If we don't have atomic changes, we can't avoid
   157  		// it.
   158  		if atomic {
   159  			t.Error(info)
   160  		} else {
   161  			t.Log(info)
   162  		}
   163  	}
   164  }
   165  
   166  // Test that up-replication only proceeds if there are a good number of
   167  // candidates to up-replicate to. Specifically, we won't up-replicate to an
   168  // even number of replicas unless there is an additional candidate that will
   169  // allow a subsequent up-replication to an odd number.
   170  func TestReplicateQueueUpReplicate(t *testing.T) {
   171  	defer leaktest.AfterTest(t)()
   172  	const replicaCount = 3
   173  
   174  	tc := testcluster.StartTestCluster(t, 1,
   175  		base.TestClusterArgs{ReplicationMode: base.ReplicationAuto},
   176  	)
   177  	defer tc.Stopper().Stop(context.Background())
   178  
   179  	testKey := keys.MetaMin
   180  	desc, err := tc.LookupRange(testKey)
   181  	if err != nil {
   182  		t.Fatal(err)
   183  	}
   184  
   185  	if len(desc.InternalReplicas) != 1 {
   186  		t.Fatalf("replica count, want 1, current %d", len(desc.InternalReplicas))
   187  	}
   188  
   189  	tc.AddServer(t, base.TestServerArgs{})
   190  
   191  	if err := tc.Servers[0].Stores().VisitStores(func(s *kvserver.Store) error {
   192  		return s.ForceReplicationScanAndProcess()
   193  	}); err != nil {
   194  		t.Fatal(err)
   195  	}
   196  	// After the initial splits have been performed, all of the resulting ranges
   197  	// should be present in replicate queue purgatory (because we only have a
   198  	// single store in the test and thus replication cannot succeed).
   199  	expected, err := tc.Servers[0].ExpectedInitialRangeCount()
   200  	if err != nil {
   201  		t.Fatal(err)
   202  	}
   203  
   204  	var store *kvserver.Store
   205  	_ = tc.Servers[0].Stores().VisitStores(func(s *kvserver.Store) error {
   206  		store = s
   207  		return nil
   208  	})
   209  
   210  	if n := store.ReplicateQueuePurgatoryLength(); expected != n {
   211  		t.Fatalf("expected %d replicas in purgatory, but found %d", expected, n)
   212  	}
   213  
   214  	tc.AddServer(t, base.TestServerArgs{})
   215  
   216  	// Now wait until the replicas have been up-replicated to the
   217  	// desired number.
   218  	testutils.SucceedsSoon(t, func() error {
   219  		desc, err := tc.LookupRange(testKey)
   220  		if err != nil {
   221  			t.Fatal(err)
   222  		}
   223  		if len(desc.InternalReplicas) != replicaCount {
   224  			return errors.Errorf("replica count, want %d, current %d", replicaCount, len(desc.InternalReplicas))
   225  		}
   226  		return nil
   227  	})
   228  
   229  	infos, err := filterRangeLog(
   230  		tc.Conns[0], kvserverpb.RangeLogEventType_add, kvserverpb.ReasonRangeUnderReplicated,
   231  	)
   232  	if err != nil {
   233  		t.Fatal(err)
   234  	}
   235  	if len(infos) < 1 {
   236  		t.Fatalf("found no upreplication due to underreplication in the range logs")
   237  	}
   238  }
   239  
   240  // TestReplicateQueueDownReplicate verifies that the replication queue will
   241  // notice over-replicated ranges and remove replicas from them.
   242  func TestReplicateQueueDownReplicate(t *testing.T) {
   243  	defer leaktest.AfterTest(t)()
   244  	ctx := context.Background()
   245  	const replicaCount = 3
   246  
   247  	// The goal of this test is to ensure that down replication occurs correctly
   248  	// using the replicate queue, and to ensure that's the case, the test
   249  	// cluster needs to be kept in auto replication mode.
   250  	tc := testcluster.StartTestCluster(t, replicaCount+2,
   251  		base.TestClusterArgs{
   252  			ReplicationMode: base.ReplicationAuto,
   253  			ServerArgs: base.TestServerArgs{
   254  				ScanMinIdleTime: 10 * time.Millisecond,
   255  				ScanMaxIdleTime: 10 * time.Millisecond,
   256  			},
   257  		},
   258  	)
   259  	defer tc.Stopper().Stop(ctx)
   260  
   261  	// Disable the replication queues so that the range we're about to create
   262  	// doesn't get down-replicated too soon.
   263  	tc.ToggleReplicateQueues(false)
   264  
   265  	testKey := tc.ScratchRange(t)
   266  	desc := tc.LookupRangeOrFatal(t, testKey)
   267  	// At the end of StartTestCluster(), all ranges have 5 replicas since they're
   268  	// all "system ranges". When the ScratchRange() splits its range, it also
   269  	// starts up with 5 replicas. Since it's not a system range, its default zone
   270  	// config asks for 3x replication, and the replication queue will
   271  	// down-replicate it.
   272  	require.Len(t, desc.Replicas().All(), 5)
   273  	// Re-enable the replication queue.
   274  	tc.ToggleReplicateQueues(true)
   275  
   276  	// Now wait until the replicas have been down-replicated back to the
   277  	// desired number.
   278  	testutils.SucceedsSoon(t, func() error {
   279  		desc, err := tc.LookupRange(testKey)
   280  		if err != nil {
   281  			t.Fatal(err)
   282  		}
   283  		if len(desc.InternalReplicas) != replicaCount {
   284  			return errors.Errorf("replica count, want %d, current %d", replicaCount, len(desc.InternalReplicas))
   285  		}
   286  		return nil
   287  	})
   288  
   289  	infos, err := filterRangeLog(
   290  		tc.Conns[0], kvserverpb.RangeLogEventType_remove, kvserverpb.ReasonRangeOverReplicated,
   291  	)
   292  	if err != nil {
   293  		t.Fatal(err)
   294  	}
   295  	if len(infos) < 1 {
   296  		t.Fatalf("found no downreplication due to over-replication in the range logs")
   297  	}
   298  }
   299  
   300  // queryRangeLog queries the range log. The query must be of type:
   301  // `SELECT info from system.rangelog ...`.
   302  func queryRangeLog(
   303  	conn *gosql.DB, query string, args ...interface{},
   304  ) ([]kvserverpb.RangeLogEvent_Info, error) {
   305  	rows, err := conn.Query(query, args...)
   306  	if err != nil {
   307  		return nil, err
   308  	}
   309  
   310  	var sl []kvserverpb.RangeLogEvent_Info
   311  	defer rows.Close()
   312  	var numEntries int
   313  	for rows.Next() {
   314  		numEntries++
   315  		var infoStr string
   316  		if err := rows.Scan(&infoStr); err != nil {
   317  			return nil, err
   318  		}
   319  		var info kvserverpb.RangeLogEvent_Info
   320  		if err := json.Unmarshal([]byte(infoStr), &info); err != nil {
   321  			return nil, errors.Errorf("error unmarshaling info string %q: %s", infoStr, err)
   322  		}
   323  		sl = append(sl, info)
   324  	}
   325  	if err := rows.Err(); err != nil {
   326  		return nil, err
   327  	}
   328  	return sl, nil
   329  }
   330  
   331  func filterRangeLog(
   332  	conn *gosql.DB, eventType kvserverpb.RangeLogEventType, reason kvserverpb.RangeLogEventReason,
   333  ) ([]kvserverpb.RangeLogEvent_Info, error) {
   334  	return queryRangeLog(conn, `SELECT info FROM system.rangelog WHERE "eventType" = $1 AND info LIKE concat('%', $2, '%');`, eventType.String(), reason)
   335  }
   336  
   337  func toggleReplicationQueues(tc *testcluster.TestCluster, active bool) {
   338  	for _, s := range tc.Servers {
   339  		_ = s.Stores().VisitStores(func(store *kvserver.Store) error {
   340  			store.SetReplicateQueueActive(active)
   341  			return nil
   342  		})
   343  	}
   344  }
   345  
   346  func toggleSplitQueues(tc *testcluster.TestCluster, active bool) {
   347  	for _, s := range tc.Servers {
   348  		_ = s.Stores().VisitStores(func(store *kvserver.Store) error {
   349  			store.SetSplitQueueActive(active)
   350  			return nil
   351  		})
   352  	}
   353  }
   354  
   355  // Test that ranges larger than range_max_bytes that can't be split can still be
   356  // processed by the replication queue (in particular, up-replicated).
   357  func TestLargeUnsplittableRangeReplicate(t *testing.T) {
   358  	defer leaktest.AfterTest(t)()
   359  
   360  	if testing.Short() || testutils.NightlyStress() || util.RaceEnabled {
   361  		t.Skip("https://github.com/cockroachdb/cockroach/issues/38565")
   362  	}
   363  	ctx := context.Background()
   364  
   365  	// Create a cluster with really small ranges.
   366  	const rangeMaxSize = base.MinRangeMaxBytes
   367  	zcfg := zonepb.DefaultZoneConfig()
   368  	zcfg.RangeMinBytes = proto.Int64(rangeMaxSize / 2)
   369  	zcfg.RangeMaxBytes = proto.Int64(rangeMaxSize)
   370  	tc := testcluster.StartTestCluster(t, 5,
   371  		base.TestClusterArgs{
   372  			ReplicationMode: base.ReplicationAuto,
   373  			ServerArgs: base.TestServerArgs{
   374  				ScanMinIdleTime: time.Millisecond,
   375  				ScanMaxIdleTime: time.Millisecond,
   376  				Knobs: base.TestingKnobs{
   377  					Server: &server.TestingKnobs{
   378  						DefaultZoneConfigOverride: &zcfg,
   379  					},
   380  				},
   381  			},
   382  		},
   383  	)
   384  	defer tc.Stopper().Stop(ctx)
   385  
   386  	// We're going to create a table with a big row and a small row. We'll split
   387  	// the table in between the rows, to produce a large range and a small one.
   388  	// Then we'll increase the replication factor to 5 and check that both ranges
   389  	// behave the same - i.e. they both get up-replicated. For the purposes of
   390  	// this test we're only worried about the large one up-replicating, but we
   391  	// test the small one as a control so that we don't fool ourselves.
   392  
   393  	// Disable the queues so they don't mess with our manual relocation. We'll
   394  	// re-enable them later.
   395  	toggleReplicationQueues(tc, false /* active */)
   396  	toggleSplitQueues(tc, false /* active */)
   397  
   398  	db := tc.Conns[0]
   399  	_, err := db.Exec("create table t (i int primary key, s string)")
   400  	require.NoError(t, err)
   401  
   402  	_, err = db.Exec(`ALTER TABLE t EXPERIMENTAL_RELOCATE VALUES (ARRAY[1,2,3], 1)`)
   403  	require.NoError(t, err)
   404  	_, err = db.Exec(`ALTER TABLE t SPLIT AT VALUES (2)`)
   405  	require.NoError(t, err)
   406  
   407  	toggleReplicationQueues(tc, true /* active */)
   408  	toggleSplitQueues(tc, true /* active */)
   409  
   410  	// We're going to create a row that's larger than range_max_bytes, but not
   411  	// large enough that write back-pressuring kicks in and refuses it.
   412  	var sb strings.Builder
   413  	for i := 0; i < 1.5*rangeMaxSize; i++ {
   414  		sb.WriteRune('a')
   415  	}
   416  	_, err = db.Exec("insert into t(i,s) values (1, $1)", sb.String())
   417  	require.NoError(t, err)
   418  	_, err = db.Exec("insert into t(i,s) values (2, 'b')")
   419  	require.NoError(t, err)
   420  
   421  	// Now ask everybody to up-replicate.
   422  	_, err = db.Exec("alter table t configure zone using num_replicas = 5")
   423  	require.NoError(t, err)
   424  
   425  	forceProcess := func() {
   426  		// Speed up the queue processing.
   427  		for _, s := range tc.Servers {
   428  			err := s.Stores().VisitStores(func(store *kvserver.Store) error {
   429  				return store.ForceReplicationScanAndProcess()
   430  			})
   431  			require.NoError(t, err)
   432  		}
   433  	}
   434  
   435  	// Wait until the smaller range (the 2nd) has up-replicated.
   436  	testutils.SucceedsSoon(t, func() error {
   437  		forceProcess()
   438  		r := db.QueryRow(
   439  			"select replicas from [show ranges from table t] where start_key='/2'")
   440  		var repl string
   441  		if err := r.Scan(&repl); err != nil {
   442  			return err
   443  		}
   444  		if repl != "{1,2,3,4,5}" {
   445  			return fmt.Errorf("not up-replicated yet. replicas: %s", repl)
   446  		}
   447  		return nil
   448  	})
   449  
   450  	// Now check that the large range also gets up-replicated.
   451  	testutils.SucceedsSoon(t, func() error {
   452  		forceProcess()
   453  		r := db.QueryRow(
   454  			"select replicas from [show ranges from table t] where start_key is null")
   455  		var repl string
   456  		if err := r.Scan(&repl); err != nil {
   457  			return err
   458  		}
   459  		if repl != "{1,2,3,4,5}" {
   460  			return fmt.Errorf("not up-replicated yet")
   461  		}
   462  		return nil
   463  	})
   464  }
   465  
   466  type delayingRaftMessageHandler struct {
   467  	kvserver.RaftMessageHandler
   468  	leaseHolderNodeID uint64
   469  	rangeID           roachpb.RangeID
   470  }
   471  
   472  const (
   473  	queryInterval = 10 * time.Millisecond
   474  	raftDelay     = 175 * time.Millisecond
   475  )
   476  
   477  func (h delayingRaftMessageHandler) HandleRaftRequest(
   478  	ctx context.Context,
   479  	req *kvserver.RaftMessageRequest,
   480  	respStream kvserver.RaftMessageResponseStream,
   481  ) *roachpb.Error {
   482  	if h.rangeID != req.RangeID {
   483  		return h.RaftMessageHandler.HandleRaftRequest(ctx, req, respStream)
   484  	}
   485  	go func() {
   486  		time.Sleep(raftDelay)
   487  		err := h.RaftMessageHandler.HandleRaftRequest(ctx, req, respStream)
   488  		if err != nil {
   489  			log.Infof(ctx, "HandleRaftRequest returned err %s", err)
   490  		}
   491  	}()
   492  
   493  	return nil
   494  }
   495  
   496  func TestTransferLeaseToLaggingNode(t *testing.T) {
   497  	defer leaktest.AfterTest(t)()
   498  
   499  	ctx := context.Background()
   500  	clusterArgs := base.TestClusterArgs{
   501  		ServerArgsPerNode: map[int]base.TestServerArgs{
   502  			0: {
   503  				ScanMaxIdleTime: time.Millisecond,
   504  				StoreSpecs: []base.StoreSpec{{
   505  					InMemory: true, Attributes: roachpb.Attributes{Attrs: []string{"n1"}},
   506  				}},
   507  			},
   508  			1: {
   509  				ScanMaxIdleTime: time.Millisecond,
   510  				StoreSpecs: []base.StoreSpec{{
   511  					InMemory: true, Attributes: roachpb.Attributes{Attrs: []string{"n2"}},
   512  				}},
   513  			},
   514  			2: {
   515  				ScanMaxIdleTime: time.Millisecond,
   516  				StoreSpecs: []base.StoreSpec{{
   517  					InMemory: true, Attributes: roachpb.Attributes{Attrs: []string{"n3"}},
   518  				}},
   519  			},
   520  		},
   521  	}
   522  
   523  	tc := testcluster.StartTestCluster(t,
   524  		len(clusterArgs.ServerArgsPerNode), clusterArgs)
   525  	defer tc.Stopper().Stop(ctx)
   526  
   527  	if err := tc.WaitForFullReplication(); err != nil {
   528  		t.Fatal(err)
   529  	}
   530  
   531  	// Get the system.comments' range and lease holder
   532  	var rangeID roachpb.RangeID
   533  	var leaseHolderNodeID uint64
   534  	s := sqlutils.MakeSQLRunner(tc.Conns[0])
   535  	s.Exec(t, "insert into system.comments values(0,0,0,'abc')")
   536  	s.QueryRow(t,
   537  		"select range_id, lease_holder from "+
   538  			"[show ranges from table system.comments] limit 1",
   539  	).Scan(&rangeID, &leaseHolderNodeID)
   540  	remoteNodeID := uint64(1)
   541  	if leaseHolderNodeID == 1 {
   542  		remoteNodeID = 2
   543  	}
   544  	log.Infof(ctx, "RangeID %d, RemoteNodeID %d, LeaseHolderNodeID %d",
   545  		rangeID, remoteNodeID, leaseHolderNodeID)
   546  	leaseHolderSrv := tc.Servers[leaseHolderNodeID-1]
   547  	leaseHolderStoreID := leaseHolderSrv.GetFirstStoreID()
   548  	leaseHolderStore, err := leaseHolderSrv.Stores().GetStore(leaseHolderStoreID)
   549  	if err != nil {
   550  		t.Fatal(err)
   551  	}
   552  
   553  	// Start delaying Raft messages to the remote node
   554  	remoteSrv := tc.Servers[remoteNodeID-1]
   555  	remoteStoreID := remoteSrv.GetFirstStoreID()
   556  	remoteStore, err := remoteSrv.Stores().GetStore(remoteStoreID)
   557  	if err != nil {
   558  		t.Fatal(err)
   559  	}
   560  	remoteStore.Transport().Listen(
   561  		remoteStoreID,
   562  		delayingRaftMessageHandler{remoteStore, leaseHolderNodeID, rangeID},
   563  	)
   564  
   565  	workerReady := make(chan bool)
   566  	// Create persistent range load.
   567  	tc.Stopper().RunWorker(ctx, func(ctx context.Context) {
   568  		s = sqlutils.MakeSQLRunner(tc.Conns[remoteNodeID-1])
   569  		workerReady <- true
   570  		for {
   571  			s.Exec(t, fmt.Sprintf("update system.comments set comment='abc' "+
   572  				"where type=0 and object_id=0 and sub_id=0"))
   573  
   574  			select {
   575  			case <-ctx.Done():
   576  				return
   577  			case <-tc.Stopper().ShouldQuiesce():
   578  				return
   579  			case <-time.After(queryInterval):
   580  			}
   581  		}
   582  	})
   583  	<-workerReady
   584  	// Wait until we see remote making progress
   585  	leaseHolderRepl, err := leaseHolderStore.GetReplica(rangeID)
   586  	if err != nil {
   587  		t.Fatal(err)
   588  	}
   589  
   590  	var remoteRepl *kvserver.Replica
   591  	testutils.SucceedsSoon(t, func() error {
   592  		remoteRepl, err = remoteStore.GetReplica(rangeID)
   593  		return err
   594  	})
   595  	testutils.SucceedsSoon(t, func() error {
   596  		status := leaseHolderRepl.RaftStatus()
   597  		progress := status.Progress[uint64(remoteRepl.ReplicaID())]
   598  		if progress.Match > 0 {
   599  			return nil
   600  		}
   601  		return errors.Errorf(
   602  			"remote is not making progress: %+v", progress.Match,
   603  		)
   604  	})
   605  
   606  	// Wait until we see the remote replica lagging behind
   607  	for {
   608  		// Ensure that the replica on the remote node is lagging.
   609  		status := leaseHolderRepl.RaftStatus()
   610  		progress := status.Progress[uint64(remoteRepl.ReplicaID())]
   611  		if progress.State == tracker.StateReplicate &&
   612  			(status.Commit-progress.Match) > 0 {
   613  			break
   614  		}
   615  		time.Sleep(13 * time.Millisecond)
   616  	}
   617  
   618  	// Set the zone preference for the replica to show that it has to be moved
   619  	// to the remote node.
   620  	desc, zone := leaseHolderRepl.DescAndZone()
   621  	newZone := *zone
   622  	newZone.LeasePreferences = []zonepb.LeasePreference{
   623  		{
   624  			Constraints: []zonepb.Constraint{
   625  				{
   626  					Type:  zonepb.Constraint_REQUIRED,
   627  					Value: fmt.Sprintf("n%d", remoteNodeID),
   628  				},
   629  			},
   630  		},
   631  	}
   632  
   633  	// By now the lease holder may have changed.
   634  	testutils.SucceedsSoon(t, func() error {
   635  		leaseBefore, _ := leaseHolderRepl.GetLease()
   636  		log.Infof(ctx, "Lease before transfer %+v\n", leaseBefore)
   637  
   638  		if uint64(leaseBefore.Replica.NodeID) == remoteNodeID {
   639  			log.Infof(
   640  				ctx,
   641  				"Lease successfully transferred to desired node %d\n",
   642  				remoteNodeID,
   643  			)
   644  			return nil
   645  		}
   646  		currentSrv := tc.Servers[leaseBefore.Replica.NodeID-1]
   647  		leaseStore, err := currentSrv.Stores().GetStore(currentSrv.GetFirstStoreID())
   648  		if err != nil {
   649  			return err
   650  		}
   651  		leaseRepl, err := leaseStore.GetReplica(rangeID)
   652  		if err != nil {
   653  			return err
   654  		}
   655  		transferred, err := leaseStore.FindTargetAndTransferLease(
   656  			ctx, leaseRepl, desc, &newZone)
   657  		if err != nil {
   658  			return err
   659  		}
   660  		if !transferred {
   661  			return errors.Errorf("unable to transfer")
   662  		}
   663  		return errors.Errorf("Repeat check for correct leaseholder")
   664  	})
   665  }