github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/kv/kvserver/client_relocate_range_test.go (about)

     1  // Copyright 2019 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package kvserver_test
    12  
    13  import (
    14  	"context"
    15  	"sort"
    16  	"testing"
    17  
    18  	"github.com/cockroachdb/cockroach/pkg/base"
    19  	"github.com/cockroachdb/cockroach/pkg/keys"
    20  	"github.com/cockroachdb/cockroach/pkg/kv/kvserver"
    21  	"github.com/cockroachdb/cockroach/pkg/roachpb"
    22  	"github.com/cockroachdb/cockroach/pkg/testutils"
    23  	"github.com/cockroachdb/cockroach/pkg/testutils/testcluster"
    24  	"github.com/cockroachdb/cockroach/pkg/util/leaktest"
    25  	"github.com/cockroachdb/errors"
    26  	"github.com/stretchr/testify/require"
    27  )
    28  
    29  func relocateAndCheck(
    30  	t *testing.T,
    31  	tc *testcluster.TestCluster,
    32  	startKey roachpb.RKey,
    33  	targets []roachpb.ReplicationTarget,
    34  ) (retries int) {
    35  	testutils.SucceedsSoon(t, func() error {
    36  		err := tc.Servers[0].DB().
    37  			AdminRelocateRange(context.Background(), startKey.AsRawKey(), targets)
    38  		if err != nil {
    39  			retries++
    40  		}
    41  		return err
    42  	})
    43  	desc, err := tc.Servers[0].LookupRange(startKey.AsRawKey())
    44  	require.NoError(t, err)
    45  	requireDescMembers(t, desc, targets)
    46  	requireLeaseAt(t, tc, desc, targets[0])
    47  	return retries
    48  }
    49  
    50  func requireDescMembers(
    51  	t *testing.T, desc roachpb.RangeDescriptor, targets []roachpb.ReplicationTarget,
    52  ) {
    53  	t.Helper()
    54  	targets = append([]roachpb.ReplicationTarget(nil), targets...)
    55  	sort.Slice(targets, func(i, j int) bool { return targets[i].StoreID < targets[j].StoreID })
    56  
    57  	have := make([]roachpb.ReplicationTarget, 0, len(targets))
    58  	for _, rDesc := range desc.Replicas().All() {
    59  		have = append(have, roachpb.ReplicationTarget{
    60  			NodeID:  rDesc.NodeID,
    61  			StoreID: rDesc.StoreID,
    62  		})
    63  	}
    64  	sort.Slice(have, func(i, j int) bool { return have[i].StoreID < have[j].StoreID })
    65  	require.Equal(t, targets, have)
    66  }
    67  
    68  func requireLeaseAt(
    69  	t *testing.T,
    70  	tc *testcluster.TestCluster,
    71  	desc roachpb.RangeDescriptor,
    72  	target roachpb.ReplicationTarget,
    73  ) {
    74  	t.Helper()
    75  	// NB: under stressrace the lease will sometimes be inactive by the time
    76  	// it's returned here, so don't use FindRangeLeaseHolder which fails when
    77  	// that happens.
    78  	testutils.SucceedsSoon(t, func() error {
    79  		lease, _, err := tc.FindRangeLease(desc, &target)
    80  		if err != nil {
    81  			return err
    82  		}
    83  		if target != (roachpb.ReplicationTarget{
    84  			NodeID:  lease.Replica.NodeID,
    85  			StoreID: lease.Replica.StoreID,
    86  		}) {
    87  			return errors.Errorf("lease %v is not held by %+v", lease, target)
    88  		}
    89  		return nil
    90  	})
    91  }
    92  
    93  func TestAdminRelocateRange(t *testing.T) {
    94  	defer leaktest.AfterTest(t)()
    95  
    96  	ctx := context.Background()
    97  
    98  	type intercept struct {
    99  		ops         []roachpb.ReplicationChange
   100  		leaseTarget *roachpb.ReplicationTarget
   101  		err         error
   102  	}
   103  	var intercepted []intercept
   104  
   105  	requireNumAtomic := func(expAtomic int, expSingle int, f func() (retries int)) {
   106  		t.Helper()
   107  		intercepted = nil
   108  		retries := f()
   109  		var actAtomic, actSingle int
   110  		for _, ic := range intercepted {
   111  			if ic.err != nil {
   112  				continue
   113  			}
   114  			if len(ic.ops) == 2 && ic.ops[0].ChangeType == roachpb.ADD_REPLICA && ic.ops[1].ChangeType == roachpb.REMOVE_REPLICA {
   115  				actAtomic++
   116  			} else {
   117  				actSingle++
   118  			}
   119  		}
   120  		actAtomic -= retries
   121  		require.Equal(t, expAtomic, actAtomic, "wrong number of atomic changes: %+v", intercepted)
   122  		require.Equal(t, expSingle, actSingle, "wrong number of single changes: %+v", intercepted)
   123  	}
   124  
   125  	knobs := base.TestingKnobs{
   126  		Store: &kvserver.StoreTestingKnobs{
   127  			BeforeRelocateOne: func(ops []roachpb.ReplicationChange, leaseTarget *roachpb.ReplicationTarget, err error) {
   128  				intercepted = append(intercepted, intercept{
   129  					ops:         ops,
   130  					leaseTarget: leaseTarget,
   131  					err:         err,
   132  				})
   133  			},
   134  		},
   135  	}
   136  	args := base.TestClusterArgs{
   137  		ServerArgs:      base.TestServerArgs{Knobs: knobs},
   138  		ReplicationMode: base.ReplicationManual,
   139  	}
   140  	tc := testcluster.StartTestCluster(t, 6, args)
   141  	defer tc.Stopper().Stop(ctx)
   142  
   143  	// s1 (LH) ---> s2 (LH) s1 s3
   144  	// Pure upreplication.
   145  	k := keys.MustAddr(tc.ScratchRange(t))
   146  	{
   147  		targets := tc.Targets(1, 0, 2)
   148  		// Expect two single additions, and that's it.
   149  		requireNumAtomic(0, 2, func() int {
   150  			return relocateAndCheck(t, tc, k, targets)
   151  		})
   152  	}
   153  
   154  	// s1 (LH) s2 s3 ---> s4 (LH) s5 s6.
   155  	// This is trickier because the leaseholder gets removed, and so do all
   156  	// other replicas (i.e. a simple lease transfer at the beginning won't solve
   157  	// the problem).
   158  	{
   159  		targets := tc.Targets(3, 4, 5)
   160  		// Should carry out three swaps. Note that the leaseholder gets removed
   161  		// in the process (i.e. internally the lease must've been moved around
   162  		// to achieve that).
   163  		requireNumAtomic(3, 0, func() int {
   164  			return relocateAndCheck(t, tc, k, targets)
   165  		})
   166  	}
   167  
   168  	// s4 (LH) s5 s6 ---> s5 (LH)
   169  	// Pure downreplication.
   170  	{
   171  		requireNumAtomic(0, 2, func() int {
   172  			return relocateAndCheck(t, tc, k, tc.Targets(4))
   173  		})
   174  	}
   175  
   176  	// s5 (LH) ---> s3 (LH)
   177  	// Lateral movement while at replication factor one. In this case atomic
   178  	// replication changes cannot be used; we add-then-remove instead.
   179  	{
   180  		requireNumAtomic(0, 2, func() int {
   181  			return relocateAndCheck(t, tc, k, tc.Targets(2))
   182  		})
   183  	}
   184  
   185  	// s3 (LH) ---> s2 (LH) s4 s1 --> s4 (LH) s2 s6 s1 --> s3 (LH) s5
   186  	// A grab bag.
   187  	{
   188  		// s3 -(add)-> s3 s2 -(swap)-> s4 s2 -(add)-> s4 s2 s1 (=s2 s4 s1)
   189  		requireNumAtomic(1, 2, func() int {
   190  			return relocateAndCheck(t, tc, k, tc.Targets(1, 3, 0))
   191  		})
   192  		// s2 s4 s1 -(add)-> s2 s4 s1 s6 (=s4 s2 s6 s1)
   193  		requireNumAtomic(0, 1, func() int {
   194  			return relocateAndCheck(t, tc, k, tc.Targets(3, 1, 5, 0))
   195  		})
   196  		// s4 s2 s6 s1 -(swap)-> s3 s2 s6 s1 -(swap)-> s3 s5 s6 s1 -(del)-> s3 s5 s6 -(del)-> s3 s5
   197  		requireNumAtomic(2, 2, func() int {
   198  			return relocateAndCheck(t, tc, k, tc.Targets(2, 4))
   199  		})
   200  	}
   201  }