github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/roachpb/metadata_replicas_test.go (about)

     1  // Copyright 2019 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package roachpb
    12  
    13  import (
    14  	"context"
    15  	"math/rand"
    16  	"testing"
    17  	"time"
    18  
    19  	"github.com/cockroachdb/cockroach/pkg/util/leaktest"
    20  	"github.com/cockroachdb/cockroach/pkg/util/log"
    21  	"github.com/cockroachdb/cockroach/pkg/util/randutil"
    22  	"github.com/cockroachdb/cockroach/pkg/util/timeutil"
    23  	"github.com/stretchr/testify/assert"
    24  	"github.com/stretchr/testify/require"
    25  	"go.etcd.io/etcd/raft"
    26  	"go.etcd.io/etcd/raft/confchange"
    27  	"go.etcd.io/etcd/raft/quorum"
    28  	"go.etcd.io/etcd/raft/tracker"
    29  )
    30  
    31  func rd(typ *ReplicaType, id uint64) ReplicaDescriptor {
    32  	return ReplicaDescriptor{
    33  		Type:      typ,
    34  		NodeID:    NodeID(100 * id),
    35  		StoreID:   StoreID(10 * id),
    36  		ReplicaID: ReplicaID(id),
    37  	}
    38  }
    39  
    40  var vn = (*ReplicaType)(nil) // should be treated like VoterFull
    41  var v = ReplicaTypeVoterFull()
    42  var vi = ReplicaTypeVoterIncoming()
    43  var vo = ReplicaTypeVoterOutgoing()
    44  var vd = ReplicaTypeVoterDemoting()
    45  var l = ReplicaTypeLearner()
    46  
    47  func TestVotersLearnersAll(t *testing.T) {
    48  
    49  	tests := [][]ReplicaDescriptor{
    50  		{},
    51  		{rd(v, 1)},
    52  		{rd(vn, 1)},
    53  		{rd(l, 1)},
    54  		{rd(v, 1), rd(l, 2), rd(v, 3)},
    55  		{rd(vn, 1), rd(l, 2), rd(v, 3)},
    56  		{rd(l, 1), rd(v, 2), rd(l, 3)},
    57  		{rd(l, 1), rd(vn, 2), rd(l, 3)},
    58  		{rd(vi, 1)},
    59  		{rd(vo, 1)},
    60  		{rd(l, 1), rd(vo, 2), rd(vi, 3), rd(vi, 4)},
    61  	}
    62  	for _, test := range tests {
    63  		t.Run("", func(t *testing.T) {
    64  			r := MakeReplicaDescriptors(test)
    65  			seen := map[ReplicaDescriptor]struct{}{}
    66  			for _, voter := range r.Voters() {
    67  				typ := voter.GetType()
    68  				switch typ {
    69  				case VOTER_FULL, VOTER_INCOMING:
    70  					seen[voter] = struct{}{}
    71  				default:
    72  					assert.FailNow(t, "unexpectedly got a %s as Voter()", typ)
    73  				}
    74  			}
    75  			for _, learner := range r.Learners() {
    76  				seen[learner] = struct{}{}
    77  				assert.Equal(t, LEARNER, learner.GetType())
    78  			}
    79  
    80  			all := r.All()
    81  			// Make sure that VOTER_OUTGOING is the only type that is skipped both
    82  			// by Learners() and Voters()
    83  			for _, rd := range all {
    84  				typ := rd.GetType()
    85  				if _, seen := seen[rd]; !seen {
    86  					assert.Equal(t, VOTER_OUTGOING, typ)
    87  				} else {
    88  					assert.NotEqual(t, VOTER_OUTGOING, typ)
    89  				}
    90  			}
    91  			assert.Equal(t, len(test), len(all))
    92  		})
    93  	}
    94  }
    95  
    96  func TestReplicaDescriptorsRemove(t *testing.T) {
    97  	tests := []struct {
    98  		replicas []ReplicaDescriptor
    99  		remove   ReplicationTarget
   100  		expected bool
   101  	}{
   102  		{
   103  			remove:   ReplicationTarget{NodeID: 1, StoreID: 1},
   104  			expected: false,
   105  		},
   106  		{
   107  			replicas: []ReplicaDescriptor{{NodeID: 1, StoreID: 1}},
   108  			remove:   ReplicationTarget{NodeID: 2, StoreID: 2},
   109  			expected: false,
   110  		},
   111  		{
   112  			replicas: []ReplicaDescriptor{{NodeID: 1, StoreID: 1}},
   113  			remove:   ReplicationTarget{NodeID: 1, StoreID: 1},
   114  			expected: true,
   115  		},
   116  		{
   117  			// Make sure we sort after the swap in removal.
   118  			replicas: []ReplicaDescriptor{
   119  				{NodeID: 1, StoreID: 1},
   120  				{NodeID: 2, StoreID: 2},
   121  				{NodeID: 3, StoreID: 3},
   122  				{NodeID: 4, StoreID: 4, Type: ReplicaTypeLearner()},
   123  			},
   124  			remove:   ReplicationTarget{NodeID: 2, StoreID: 2},
   125  			expected: true,
   126  		},
   127  	}
   128  	for i, test := range tests {
   129  		r := MakeReplicaDescriptors(test.replicas)
   130  		lenBefore := len(r.All())
   131  		removedDesc, ok := r.RemoveReplica(test.remove.NodeID, test.remove.StoreID)
   132  		assert.Equal(t, test.expected, ok, "testcase %d", i)
   133  		if ok {
   134  			assert.Equal(t, test.remove.NodeID, removedDesc.NodeID, "testcase %d", i)
   135  			assert.Equal(t, test.remove.StoreID, removedDesc.StoreID, "testcase %d", i)
   136  			assert.Equal(t, lenBefore-1, len(r.All()), "testcase %d", i)
   137  		} else {
   138  			assert.Equal(t, lenBefore, len(r.All()), "testcase %d", i)
   139  		}
   140  		for _, voter := range r.Voters() {
   141  			assert.Equal(t, VOTER_FULL, voter.GetType(), "testcase %d", i)
   142  		}
   143  		for _, learner := range r.Learners() {
   144  			assert.Equal(t, LEARNER, learner.GetType(), "testcase %d", i)
   145  		}
   146  	}
   147  }
   148  
   149  func TestReplicaDescriptorsConfState(t *testing.T) {
   150  	tests := []struct {
   151  		in  []ReplicaDescriptor
   152  		out string
   153  	}{
   154  		{
   155  			[]ReplicaDescriptor{rd(v, 1)},
   156  			"Voters:[1] VotersOutgoing:[] Learners:[] LearnersNext:[] AutoLeave:false",
   157  		},
   158  		// Make sure nil is treated like VoterFull.
   159  		{
   160  			[]ReplicaDescriptor{rd(vn, 1)},
   161  			"Voters:[1] VotersOutgoing:[] Learners:[] LearnersNext:[] AutoLeave:false",
   162  		},
   163  		{
   164  			[]ReplicaDescriptor{rd(l, 1), rd(vn, 2)},
   165  			"Voters:[2] VotersOutgoing:[] Learners:[1] LearnersNext:[] AutoLeave:false",
   166  		},
   167  		// First joint case. We're adding n3 (via atomic replication changes), so the outgoing
   168  		// config we have to get rid of consists only of n2 (even though n2 remains a voter).
   169  		// Note that we could simplify this config so that it's not joint, but raft expects
   170  		// the config exactly as described by the descriptor so we don't try.
   171  		{
   172  			[]ReplicaDescriptor{rd(l, 1), rd(v, 2), rd(vi, 3)},
   173  			"Voters:[2 3] VotersOutgoing:[2] Learners:[1] LearnersNext:[] AutoLeave:false",
   174  		},
   175  		// More complex joint change: a replica swap, switching out n4 for n3 from the initial
   176  		// set of voters n2, n4 (plus learner n1 before and after).
   177  		{
   178  			[]ReplicaDescriptor{rd(l, 1), rd(v, 2), rd(vi, 3), rd(vo, 4)},
   179  			"Voters:[2 3] VotersOutgoing:[2 4] Learners:[1] LearnersNext:[] AutoLeave:false",
   180  		},
   181  		// Upreplicating from n1,n2 to n1,n2,n3,n4.
   182  		{
   183  			[]ReplicaDescriptor{rd(v, 1), rd(v, 2), rd(vi, 3), rd(vi, 4)},
   184  			"Voters:[1 2 3 4] VotersOutgoing:[1 2] Learners:[] LearnersNext:[] AutoLeave:false",
   185  		},
   186  		// Downreplicating from n1,n2,n3,n4 to n1,n2.
   187  		{
   188  			[]ReplicaDescriptor{rd(v, 1), rd(v, 2), rd(vo, 3), rd(vo, 4)},
   189  			"Voters:[1 2] VotersOutgoing:[1 2 3 4] Learners:[] LearnersNext:[] AutoLeave:false",
   190  		},
   191  		// Completely switching to a new set of replicas: n1,n2 to n4,n5. Throw a learner in for fun.
   192  		{
   193  			[]ReplicaDescriptor{rd(vo, 1), rd(vo, 2), rd(vi, 3), rd(vi, 4), rd(l, 5)},
   194  			"Voters:[3 4] VotersOutgoing:[1 2] Learners:[5] LearnersNext:[] AutoLeave:false",
   195  		},
   196  		// Throw in a voter demotion. The demoting voter should be treated as Outgoing and LearnersNext.
   197  		{
   198  			[]ReplicaDescriptor{rd(vo, 1), rd(vd, 2), rd(vi, 3), rd(vi, 4), rd(l, 5)},
   199  			"Voters:[3 4] VotersOutgoing:[1 2] Learners:[5] LearnersNext:[2] AutoLeave:false",
   200  		},
   201  	}
   202  
   203  	for _, test := range tests {
   204  		t.Run("", func(t *testing.T) {
   205  			r := MakeReplicaDescriptors(test.in)
   206  			cs := r.ConfState()
   207  			require.Equal(t, test.out, raft.DescribeConfState(cs))
   208  		})
   209  	}
   210  }
   211  
   212  func TestReplicaDescriptorsCanMakeProgress(t *testing.T) {
   213  	defer leaktest.AfterTest(t)()
   214  
   215  	type descWithLiveness struct {
   216  		live bool
   217  		ReplicaDescriptor
   218  	}
   219  
   220  	for _, test := range []struct {
   221  		rds []descWithLiveness
   222  		exp bool
   223  	}{
   224  		// One out of one voter dead.
   225  		{[]descWithLiveness{{false, rd(v, 1)}}, false},
   226  		// Three out of three voters dead.
   227  		{[]descWithLiveness{
   228  			{false, rd(v, 1)},
   229  			{false, rd(v, 2)},
   230  			{false, rd(v, 3)},
   231  		}, false},
   232  		// Two out of three voters dead.
   233  		{[]descWithLiveness{
   234  			{false, rd(v, 1)},
   235  			{true, rd(v, 2)},
   236  			{false, rd(v, 3)},
   237  		}, false},
   238  		// Two out of three voters alive.
   239  		{[]descWithLiveness{
   240  			{true, rd(v, 1)},
   241  			{false, rd(v, 2)},
   242  			{true, rd(v, 3)},
   243  		}, true},
   244  		// Two out of three voters alive, but one is an incoming voter. The outgoing
   245  		// group doesn't have quorum.
   246  		{[]descWithLiveness{
   247  			{true, rd(v, 1)},
   248  			{false, rd(v, 2)},
   249  			{true, rd(vi, 3)},
   250  		}, false},
   251  		// Two out of three voters alive, but one is an outgoing voter. The incoming
   252  		// group doesn't have quorum.
   253  		{[]descWithLiveness{
   254  			{true, rd(v, 1)},
   255  			{false, rd(v, 2)},
   256  			{true, rd(vd, 3)},
   257  		}, false},
   258  		// Two out of three voters dead, and they're all incoming voters. (This
   259  		// can't happen in practice because it means there were zero voters prior
   260  		// to the conf change, but still this result is correct, similar to others
   261  		// below).
   262  		{[]descWithLiveness{
   263  			{false, rd(vi, 1)},
   264  			{false, rd(vi, 2)},
   265  			{true, rd(vi, 3)},
   266  		}, false},
   267  		// Two out of three voters dead, and two are outgoing, one incoming.
   268  		{[]descWithLiveness{
   269  			{false, rd(vi, 1)},
   270  			{false, rd(vo, 2)},
   271  			{true, rd(vo, 3)},
   272  		}, false},
   273  		// 1 and 3 are alive, but that's not a quorum for (1 3)&&(2 3) which is
   274  		// the config here.
   275  		{[]descWithLiveness{
   276  			{true, rd(vi, 1)},
   277  			{false, rd(vo, 2)},
   278  			{true, rd(v, 3)},
   279  		}, false},
   280  		// Same as above, but all three alive.
   281  		{[]descWithLiveness{
   282  			{true, rd(vi, 1)},
   283  			{true, rd(vo, 2)},
   284  			{true, rd(v, 3)},
   285  		}, true},
   286  		// Same, but there are a few learners that should not matter.
   287  		{[]descWithLiveness{
   288  			{true, rd(vi, 1)},
   289  			{true, rd(vo, 2)},
   290  			{true, rd(v, 3)},
   291  			{false, rd(l, 4)},
   292  			{false, rd(l, 5)},
   293  			{false, rd(l, 6)},
   294  			{false, rd(l, 7)},
   295  		}, true},
   296  		// Non-joint case that should be live unless the learner is somehow taken
   297  		// into account.
   298  		{[]descWithLiveness{
   299  			{true, rd(v, 1)},
   300  			{true, rd(v, 2)},
   301  			{false, rd(v, 4)},
   302  			{false, rd(l, 4)},
   303  		}, true},
   304  	} {
   305  		t.Run("", func(t *testing.T) {
   306  			rds := make([]ReplicaDescriptor, 0, len(test.rds))
   307  			for _, rDesc := range test.rds {
   308  				rds = append(rds, rDesc.ReplicaDescriptor)
   309  			}
   310  
   311  			act := MakeReplicaDescriptors(rds).CanMakeProgress(func(rd ReplicaDescriptor) bool {
   312  				for _, rdi := range test.rds {
   313  					if rdi.ReplicaID == rd.ReplicaID {
   314  						return rdi.live
   315  					}
   316  				}
   317  				return false
   318  			})
   319  			require.Equal(t, test.exp, act, "input: %+v", test)
   320  		})
   321  	}
   322  }
   323  
   324  // Test that ReplicaDescriptors.CanMakeProgress() agrees with the equivalent
   325  // etcd/raft's code. We generate random configs and then see whether out
   326  // determination for unavailability matches etcd/raft.
   327  func TestReplicaDescriptorsCanMakeProgressRandom(t *testing.T) {
   328  	defer leaktest.AfterTest(t)()
   329  	ctx := context.Background()
   330  	randutil.SeedForTests()
   331  
   332  	var progress, noProgress, skipped int
   333  
   334  	start := timeutil.Now()
   335  	for timeutil.Now().Sub(start) < 100*time.Millisecond {
   336  		// Generate a random range configuration with between 1 and 7 replicas.
   337  		size := 1 + rand.Intn(6)
   338  		rds := make([]ReplicaDescriptor, size)
   339  		liveness := make([]bool, size)
   340  		// Generate a bunch of bits, each one representing the liveness of a different replica.
   341  		livenessBits := rand.Int31()
   342  		for i := range rds {
   343  			rds[i].ReplicaID = ReplicaID(i + 1)
   344  			typ := ReplicaType(rand.Intn(len(ReplicaType_name)))
   345  			rds[i].Type = &typ
   346  			liveness[i] = (livenessBits >> i & 1) == 0
   347  		}
   348  
   349  		rng := MakeReplicaDescriptors(rds)
   350  
   351  		crdbCanMakeProgress := rng.CanMakeProgress(func(rd ReplicaDescriptor) bool {
   352  			return liveness[rd.ReplicaID-1]
   353  		})
   354  
   355  		raftCanMakeProgress, skip := func() (res bool, skip bool) {
   356  			cfg, _, err := confchange.Restore(
   357  				confchange.Changer{Tracker: tracker.MakeProgressTracker(1)},
   358  				rng.ConfState(),
   359  			)
   360  			if err != nil {
   361  				if err.Error() != "removed all voters" {
   362  					t.Fatal(err)
   363  				}
   364  				return false, true
   365  			}
   366  			votes := make(map[uint64]bool, len(rng.wrapped))
   367  			for _, rDesc := range rng.wrapped {
   368  				if liveness[rDesc.ReplicaID-1] {
   369  					votes[uint64(rDesc.ReplicaID)] = true
   370  				}
   371  			}
   372  			return cfg.Voters.VoteResult(votes) == quorum.VoteWon, false
   373  		}()
   374  
   375  		if skip {
   376  			// Going to an empty config, which is non-sensical. Skipping input.
   377  			skipped++
   378  			continue
   379  		}
   380  		require.Equalf(t, raftCanMakeProgress, crdbCanMakeProgress,
   381  			"input: %s liveness: %v", rng, liveness)
   382  		if crdbCanMakeProgress {
   383  			progress++
   384  		} else {
   385  			noProgress++
   386  		}
   387  	}
   388  	log.Infof(ctx, "progress: %d cases. no progress: %d cases. skipped: %d cases.",
   389  		progress, noProgress, skipped)
   390  }