github.com/kaisenlinux/docker@v0.0.0-20230510090727-ea55db55fac7/swarmkit/manager/state/raft/membership/cluster_test.go (about)

     1  package membership_test
     2  
     3  import (
     4  	"context"
     5  	"errors"
     6  	"fmt"
     7  	"io/ioutil"
     8  	"os"
     9  	"testing"
    10  	"time"
    11  
    12  	"google.golang.org/grpc/grpclog"
    13  
    14  	"github.com/coreos/etcd/raft/raftpb"
    15  	"github.com/docker/swarmkit/api"
    16  	cautils "github.com/docker/swarmkit/ca/testutils"
    17  	"github.com/docker/swarmkit/manager/state/raft"
    18  	"github.com/docker/swarmkit/manager/state/raft/membership"
    19  	raftutils "github.com/docker/swarmkit/manager/state/raft/testutils"
    20  	"github.com/docker/swarmkit/testutils"
    21  	"github.com/sirupsen/logrus"
    22  	"github.com/stretchr/testify/assert"
    23  )
    24  
    25  var tc *cautils.TestCA
    26  
    27  func init() {
    28  	grpclog.SetLoggerV2(grpclog.NewLoggerV2(ioutil.Discard, ioutil.Discard, ioutil.Discard))
    29  	logrus.SetOutput(ioutil.Discard)
    30  }
    31  
    32  func TestMain(m *testing.M) {
    33  	tc = cautils.NewTestCA(nil)
    34  	res := m.Run()
    35  	tc.Stop()
    36  	os.Exit(res)
    37  }
    38  
    39  func newTestMember(id uint64) *membership.Member {
    40  	return &membership.Member{
    41  		RaftMember: &api.RaftMember{RaftID: id},
    42  	}
    43  }
    44  
    45  func newTestCluster(members []*membership.Member, removed []*membership.Member) *membership.Cluster {
    46  	c := membership.NewCluster()
    47  	for _, m := range members {
    48  		c.AddMember(m)
    49  	}
    50  	for _, m := range removed {
    51  		c.AddMember(m)
    52  		c.RemoveMember(m.RaftID)
    53  	}
    54  	return c
    55  }
    56  
    57  func TestClusterMember(t *testing.T) {
    58  	members := []*membership.Member{
    59  		newTestMember(1),
    60  		newTestMember(2),
    61  	}
    62  	tests := []struct {
    63  		id    uint64
    64  		match bool
    65  	}{
    66  		{1, true},
    67  		{2, true},
    68  		{3, false},
    69  	}
    70  	for i, tt := range tests {
    71  		c := newTestCluster(members, nil)
    72  		m := c.GetMember(tt.id)
    73  		if g := m != nil; g != tt.match {
    74  			t.Errorf("#%d: find member = %v, want %v", i, g, tt.match)
    75  		}
    76  		if m != nil && m.RaftID != tt.id {
    77  			t.Errorf("#%d: id = %x, want %x", i, m.RaftID, tt.id)
    78  		}
    79  	}
    80  }
    81  
    82  func TestMembers(t *testing.T) {
    83  	cls := membership.NewCluster()
    84  	defer cls.Clear()
    85  	cls.AddMember(&membership.Member{RaftMember: &api.RaftMember{RaftID: 1}})
    86  	cls.AddMember(&membership.Member{RaftMember: &api.RaftMember{RaftID: 5}})
    87  	cls.AddMember(&membership.Member{RaftMember: &api.RaftMember{RaftID: 20}})
    88  	cls.AddMember(&membership.Member{RaftMember: &api.RaftMember{RaftID: 50}})
    89  	cls.AddMember(&membership.Member{RaftMember: &api.RaftMember{RaftID: 10}})
    90  
    91  	assert.Len(t, cls.Members(), 5)
    92  }
    93  
    94  func TestGetMember(t *testing.T) {
    95  	members := []*membership.Member{
    96  		newTestMember(1),
    97  	}
    98  	removed := []*membership.Member{
    99  		newTestMember(2),
   100  	}
   101  	cls := newTestCluster(members, removed)
   102  
   103  	m := cls.GetMember(1)
   104  	assert.NotNil(t, m)
   105  	assert.Equal(t, m.RaftID, uint64(1))
   106  
   107  	m = cls.GetMember(2)
   108  	assert.Nil(t, m)
   109  
   110  	m = cls.GetMember(3)
   111  	assert.Nil(t, m)
   112  }
   113  
   114  func TestClusterAddMember(t *testing.T) {
   115  	members := []*membership.Member{
   116  		newTestMember(1),
   117  	}
   118  	removed := []*membership.Member{
   119  		newTestMember(2),
   120  	}
   121  	cls := newTestCluster(members, removed)
   122  
   123  	// Cannot add a node present in the removed set
   124  	err := cls.AddMember(&membership.Member{RaftMember: &api.RaftMember{RaftID: 2}})
   125  	assert.Error(t, err)
   126  	assert.Equal(t, err, membership.ErrIDRemoved)
   127  	assert.Nil(t, cls.GetMember(2))
   128  
   129  	err = cls.AddMember(&membership.Member{RaftMember: &api.RaftMember{RaftID: 3}})
   130  	assert.NoError(t, err)
   131  	assert.NotNil(t, cls.GetMember(3))
   132  }
   133  
   134  func TestClusterRemoveMember(t *testing.T) {
   135  	members := []*membership.Member{
   136  		newTestMember(1),
   137  	}
   138  	removed := []*membership.Member{
   139  		newTestMember(2),
   140  	}
   141  	cls := newTestCluster(members, removed)
   142  
   143  	// Can remove a node whose ID is not yet in the member list
   144  	err := cls.RemoveMember(3)
   145  	assert.NoError(t, err)
   146  	assert.Nil(t, cls.GetMember(3))
   147  
   148  	err = cls.RemoveMember(1)
   149  	assert.NoError(t, err)
   150  	assert.Nil(t, cls.GetMember(1))
   151  }
   152  
   153  func TestIsIDRemoved(t *testing.T) {
   154  	members := []*membership.Member{
   155  		newTestMember(1),
   156  	}
   157  	removed := []*membership.Member{
   158  		newTestMember(2),
   159  	}
   160  	cls := newTestCluster(members, removed)
   161  
   162  	assert.False(t, cls.IsIDRemoved(1))
   163  	assert.True(t, cls.IsIDRemoved(2))
   164  }
   165  
   166  func TestClear(t *testing.T) {
   167  	members := []*membership.Member{
   168  		newTestMember(1),
   169  		newTestMember(2),
   170  		newTestMember(3),
   171  	}
   172  	removed := []*membership.Member{
   173  		newTestMember(4),
   174  		newTestMember(5),
   175  		newTestMember(6),
   176  	}
   177  	cls := newTestCluster(members, removed)
   178  
   179  	cls.Clear()
   180  	assert.Equal(t, len(cls.Members()), 0)
   181  	assert.Equal(t, len(cls.Removed()), 0)
   182  }
   183  
   184  func TestValidateConfigurationChange(t *testing.T) {
   185  	members := []*membership.Member{
   186  		newTestMember(1),
   187  		newTestMember(2),
   188  		newTestMember(3),
   189  	}
   190  	removed := []*membership.Member{
   191  		newTestMember(4),
   192  		newTestMember(5),
   193  		newTestMember(6),
   194  	}
   195  	cls := newTestCluster(members, removed)
   196  
   197  	m := &api.RaftMember{RaftID: 1}
   198  	existingMember, err := m.Marshal()
   199  	assert.NoError(t, err)
   200  	assert.NotNil(t, existingMember)
   201  
   202  	m = &api.RaftMember{RaftID: 7}
   203  	newMember, err := m.Marshal()
   204  	assert.NoError(t, err)
   205  	assert.NotNil(t, newMember)
   206  
   207  	m = &api.RaftMember{RaftID: 4}
   208  	removedMember, err := m.Marshal()
   209  	assert.NoError(t, err)
   210  	assert.NotNil(t, removedMember)
   211  
   212  	n := &api.Node{}
   213  	node, err := n.Marshal()
   214  	assert.NoError(t, err)
   215  	assert.NotNil(t, node)
   216  
   217  	// Add node but ID exists
   218  	cc := raftpb.ConfChange{ID: 1, Type: raftpb.ConfChangeAddNode, NodeID: 1, Context: existingMember}
   219  	err = cls.ValidateConfigurationChange(cc)
   220  	assert.Error(t, err)
   221  	assert.Equal(t, err, membership.ErrIDExists)
   222  
   223  	// Any configuration change but ID in remove set
   224  	cc = raftpb.ConfChange{ID: 4, Type: raftpb.ConfChangeAddNode, NodeID: 4, Context: removedMember}
   225  	err = cls.ValidateConfigurationChange(cc)
   226  	assert.Error(t, err)
   227  	assert.Equal(t, err, membership.ErrIDRemoved)
   228  
   229  	// Remove Node but ID not found in memberlist
   230  	cc = raftpb.ConfChange{ID: 7, Type: raftpb.ConfChangeRemoveNode, NodeID: 7, Context: newMember}
   231  	err = cls.ValidateConfigurationChange(cc)
   232  	assert.Error(t, err)
   233  	assert.Equal(t, err, membership.ErrIDNotFound)
   234  
   235  	// Update Node but ID not found in memberlist
   236  	cc = raftpb.ConfChange{ID: 7, Type: raftpb.ConfChangeUpdateNode, NodeID: 7, Context: newMember}
   237  	err = cls.ValidateConfigurationChange(cc)
   238  	assert.Error(t, err)
   239  	assert.Equal(t, err, membership.ErrIDNotFound)
   240  
   241  	// Any configuration change but can't unmarshal config
   242  	cc = raftpb.ConfChange{ID: 7, Type: raftpb.ConfChangeAddNode, NodeID: 7, Context: []byte("abcdef")}
   243  	err = cls.ValidateConfigurationChange(cc)
   244  	assert.Error(t, err)
   245  	assert.Equal(t, err, membership.ErrCannotUnmarshalConfig)
   246  
   247  	// Invalid configuration change
   248  	cc = raftpb.ConfChange{ID: 1, Type: 10, NodeID: 1, Context: newMember}
   249  	err = cls.ValidateConfigurationChange(cc)
   250  	assert.Error(t, err)
   251  	assert.Equal(t, err, membership.ErrConfigChangeInvalid)
   252  }
   253  
   254  func TestCanRemoveMember(t *testing.T) {
   255  	nodes, clockSource := raftutils.NewRaftCluster(t, tc)
   256  	defer raftutils.TeardownCluster(nodes)
   257  
   258  	// Stop node 2 and node 3 (2 nodes out of 3)
   259  	nodes[2].Server.Stop()
   260  	nodes[2].ShutdownRaft()
   261  	nodes[3].Server.Stop()
   262  	nodes[3].ShutdownRaft()
   263  
   264  	// Node 2 and Node 3 should be listed as Unreachable
   265  	assert.NoError(t, testutils.PollFunc(clockSource, func() error {
   266  		members := nodes[1].GetMemberlist()
   267  		if len(members) != 3 {
   268  			return fmt.Errorf("expected 3 nodes, got %d", len(members))
   269  		}
   270  		if members[nodes[2].Config.ID].Status.Reachability == api.RaftMemberStatus_REACHABLE {
   271  			return errors.New("expected node 2 to be unreachable")
   272  		}
   273  		if members[nodes[3].Config.ID].Status.Reachability == api.RaftMemberStatus_REACHABLE {
   274  			return errors.New("expected node 3 to be unreachable")
   275  		}
   276  		return nil
   277  	}))
   278  
   279  	// Removing nodes at this point fails because we lost quorum
   280  	for i := 1; i <= 3; i++ {
   281  		ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
   282  		defer cancel()
   283  		err := nodes[1].RemoveMember(ctx, uint64(i))
   284  		assert.Error(t, err)
   285  		members := nodes[1].GetMemberlist()
   286  		assert.Equal(t, len(members), 3)
   287  	}
   288  
   289  	// Restart node 2 and node 3
   290  	nodes[2] = raftutils.RestartNode(t, clockSource, nodes[2], false)
   291  	nodes[3] = raftutils.RestartNode(t, clockSource, nodes[3], false)
   292  	raftutils.WaitForCluster(t, clockSource, nodes)
   293  
   294  	var leader uint64
   295  	leaderIndex := func() uint64 {
   296  		for i, n := range nodes {
   297  			if n.Config.ID == n.Leader() {
   298  				return i
   299  			}
   300  		}
   301  		return 0
   302  	}
   303  
   304  	// Node 2 and Node 3 should be listed as Reachable
   305  	assert.NoError(t, testutils.PollFunc(clockSource, func() error {
   306  		leader = leaderIndex()
   307  		if leader == 0 {
   308  			return errors.New("no leader")
   309  		}
   310  		members := nodes[leader].GetMemberlist()
   311  		if len(members) != 3 {
   312  			return fmt.Errorf("expected 3 nodes, got %d", len(members))
   313  		}
   314  		if members[nodes[2].Config.ID].Status.Reachability == api.RaftMemberStatus_UNREACHABLE {
   315  			return errors.New("expected node 2 to be reachable")
   316  		}
   317  		if members[nodes[3].Config.ID].Status.Reachability == api.RaftMemberStatus_UNREACHABLE {
   318  			return errors.New("expected node 3 to be reachable")
   319  		}
   320  		return nil
   321  	}))
   322  
   323  	// Stop Node 3 (1 node out of 3)
   324  	nodes[3].Server.Stop()
   325  	nodes[3].ShutdownRaft()
   326  
   327  	// Node 3 should be listed as Unreachable
   328  	assert.NoError(t, testutils.PollFunc(clockSource, func() error {
   329  		leader = leaderIndex()
   330  		if leader == 0 {
   331  			return errors.New("no leader")
   332  		}
   333  		members := nodes[leader].GetMemberlist()
   334  		if len(members) != 3 {
   335  			return fmt.Errorf("expected 3 nodes, got %d", len(members))
   336  		}
   337  		if members[nodes[3].Config.ID].Status.Reachability == api.RaftMemberStatus_REACHABLE {
   338  			return errors.New("expected node 3 to be unreachable")
   339  		}
   340  		return nil
   341  	}))
   342  
   343  	// Removing node 2 should fail (this would break the quorum)
   344  	ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
   345  	err := nodes[leader].RemoveMember(ctx, nodes[2].Config.ID)
   346  	cancel()
   347  	assert.EqualError(t, err, raft.ErrCannotRemoveMember.Error())
   348  	members := nodes[leader].GetMemberlist()
   349  	assert.Equal(t, len(members), 3)
   350  
   351  	// Removing node 3 works fine because it is already unreachable
   352  	ctx, cancel = context.WithTimeout(context.Background(), 10*time.Second)
   353  	err = nodes[leader].RemoveMember(ctx, nodes[3].Config.ID)
   354  	cancel()
   355  	assert.NoError(t, err)
   356  	members = nodes[leader].GetMemberlist()
   357  	assert.Nil(t, members[nodes[3].Config.ID])
   358  	assert.Equal(t, len(members), 2)
   359  
   360  	// Add back node 3
   361  	raftutils.ShutdownNode(nodes[3])
   362  	nodes[3] = raftutils.NewJoinNode(t, clockSource, nodes[leader].Address, tc)
   363  	raftutils.WaitForCluster(t, clockSource, nodes)
   364  
   365  	// Node 2 and Node 3 should be listed as Reachable
   366  	assert.NoError(t, testutils.PollFunc(clockSource, func() error {
   367  		leader = leaderIndex()
   368  		if leader == 0 {
   369  			return errors.New("no leader")
   370  		}
   371  		members := nodes[leader].GetMemberlist()
   372  		if len(members) != 3 {
   373  			return fmt.Errorf("expected 3 nodes, got %d", len(members))
   374  		}
   375  		if members[nodes[2].Config.ID].Status.Reachability != api.RaftMemberStatus_REACHABLE {
   376  			return errors.New("expected node 2 to be reachable")
   377  		}
   378  		if members[nodes[3].Config.ID].Status.Reachability != api.RaftMemberStatus_REACHABLE {
   379  			return errors.New("expected node 3 to be reachable")
   380  		}
   381  		return nil
   382  	}))
   383  
   384  	// Removing node 3 should succeed
   385  	ctx, cancel = context.WithTimeout(context.Background(), 10*time.Second)
   386  	err = nodes[leader].RemoveMember(ctx, nodes[3].Config.ID)
   387  	cancel()
   388  	assert.NoError(t, err)
   389  	members = nodes[leader].GetMemberlist()
   390  	assert.Nil(t, members[nodes[3].Config.ID])
   391  	assert.Equal(t, len(members), 2)
   392  
   393  	// Removing node 2 should succeed
   394  	ctx, cancel = context.WithTimeout(context.Background(), 10*time.Second)
   395  	err = nodes[leader].RemoveMember(ctx, nodes[2].Config.ID)
   396  	cancel()
   397  	assert.NoError(t, err)
   398  	members = nodes[leader].GetMemberlist()
   399  	assert.Nil(t, members[nodes[2].Config.ID])
   400  	assert.Equal(t, len(members), 1)
   401  }