github.com/kaisenlinux/docker@v0.0.0-20230510090727-ea55db55fac7/swarmkit/manager/state/raft/membership/cluster_test.go (about) 1 package membership_test 2 3 import ( 4 "context" 5 "errors" 6 "fmt" 7 "io/ioutil" 8 "os" 9 "testing" 10 "time" 11 12 "google.golang.org/grpc/grpclog" 13 14 "github.com/coreos/etcd/raft/raftpb" 15 "github.com/docker/swarmkit/api" 16 cautils "github.com/docker/swarmkit/ca/testutils" 17 "github.com/docker/swarmkit/manager/state/raft" 18 "github.com/docker/swarmkit/manager/state/raft/membership" 19 raftutils "github.com/docker/swarmkit/manager/state/raft/testutils" 20 "github.com/docker/swarmkit/testutils" 21 "github.com/sirupsen/logrus" 22 "github.com/stretchr/testify/assert" 23 ) 24 25 var tc *cautils.TestCA 26 27 func init() { 28 grpclog.SetLoggerV2(grpclog.NewLoggerV2(ioutil.Discard, ioutil.Discard, ioutil.Discard)) 29 logrus.SetOutput(ioutil.Discard) 30 } 31 32 func TestMain(m *testing.M) { 33 tc = cautils.NewTestCA(nil) 34 res := m.Run() 35 tc.Stop() 36 os.Exit(res) 37 } 38 39 func newTestMember(id uint64) *membership.Member { 40 return &membership.Member{ 41 RaftMember: &api.RaftMember{RaftID: id}, 42 } 43 } 44 45 func newTestCluster(members []*membership.Member, removed []*membership.Member) *membership.Cluster { 46 c := membership.NewCluster() 47 for _, m := range members { 48 c.AddMember(m) 49 } 50 for _, m := range removed { 51 c.AddMember(m) 52 c.RemoveMember(m.RaftID) 53 } 54 return c 55 } 56 57 func TestClusterMember(t *testing.T) { 58 members := []*membership.Member{ 59 newTestMember(1), 60 newTestMember(2), 61 } 62 tests := []struct { 63 id uint64 64 match bool 65 }{ 66 {1, true}, 67 {2, true}, 68 {3, false}, 69 } 70 for i, tt := range tests { 71 c := newTestCluster(members, nil) 72 m := c.GetMember(tt.id) 73 if g := m != nil; g != tt.match { 74 t.Errorf("#%d: find member = %v, want %v", i, g, tt.match) 75 } 76 if m != nil && m.RaftID != tt.id { 77 t.Errorf("#%d: id = %x, want %x", i, m.RaftID, tt.id) 78 } 79 } 80 } 81 82 func TestMembers(t *testing.T) { 83 cls := membership.NewCluster() 84 defer cls.Clear() 85 cls.AddMember(&membership.Member{RaftMember: &api.RaftMember{RaftID: 1}}) 86 cls.AddMember(&membership.Member{RaftMember: &api.RaftMember{RaftID: 5}}) 87 cls.AddMember(&membership.Member{RaftMember: &api.RaftMember{RaftID: 20}}) 88 cls.AddMember(&membership.Member{RaftMember: &api.RaftMember{RaftID: 50}}) 89 cls.AddMember(&membership.Member{RaftMember: &api.RaftMember{RaftID: 10}}) 90 91 assert.Len(t, cls.Members(), 5) 92 } 93 94 func TestGetMember(t *testing.T) { 95 members := []*membership.Member{ 96 newTestMember(1), 97 } 98 removed := []*membership.Member{ 99 newTestMember(2), 100 } 101 cls := newTestCluster(members, removed) 102 103 m := cls.GetMember(1) 104 assert.NotNil(t, m) 105 assert.Equal(t, m.RaftID, uint64(1)) 106 107 m = cls.GetMember(2) 108 assert.Nil(t, m) 109 110 m = cls.GetMember(3) 111 assert.Nil(t, m) 112 } 113 114 func TestClusterAddMember(t *testing.T) { 115 members := []*membership.Member{ 116 newTestMember(1), 117 } 118 removed := []*membership.Member{ 119 newTestMember(2), 120 } 121 cls := newTestCluster(members, removed) 122 123 // Cannot add a node present in the removed set 124 err := cls.AddMember(&membership.Member{RaftMember: &api.RaftMember{RaftID: 2}}) 125 assert.Error(t, err) 126 assert.Equal(t, err, membership.ErrIDRemoved) 127 assert.Nil(t, cls.GetMember(2)) 128 129 err = cls.AddMember(&membership.Member{RaftMember: &api.RaftMember{RaftID: 3}}) 130 assert.NoError(t, err) 131 assert.NotNil(t, cls.GetMember(3)) 132 } 133 134 func TestClusterRemoveMember(t *testing.T) { 135 members := []*membership.Member{ 136 newTestMember(1), 137 } 138 removed := []*membership.Member{ 139 newTestMember(2), 140 } 141 cls := newTestCluster(members, removed) 142 143 // Can remove a node whose ID is not yet in the member list 144 err := cls.RemoveMember(3) 145 assert.NoError(t, err) 146 assert.Nil(t, cls.GetMember(3)) 147 148 err = cls.RemoveMember(1) 149 assert.NoError(t, err) 150 assert.Nil(t, cls.GetMember(1)) 151 } 152 153 func TestIsIDRemoved(t *testing.T) { 154 members := []*membership.Member{ 155 newTestMember(1), 156 } 157 removed := []*membership.Member{ 158 newTestMember(2), 159 } 160 cls := newTestCluster(members, removed) 161 162 assert.False(t, cls.IsIDRemoved(1)) 163 assert.True(t, cls.IsIDRemoved(2)) 164 } 165 166 func TestClear(t *testing.T) { 167 members := []*membership.Member{ 168 newTestMember(1), 169 newTestMember(2), 170 newTestMember(3), 171 } 172 removed := []*membership.Member{ 173 newTestMember(4), 174 newTestMember(5), 175 newTestMember(6), 176 } 177 cls := newTestCluster(members, removed) 178 179 cls.Clear() 180 assert.Equal(t, len(cls.Members()), 0) 181 assert.Equal(t, len(cls.Removed()), 0) 182 } 183 184 func TestValidateConfigurationChange(t *testing.T) { 185 members := []*membership.Member{ 186 newTestMember(1), 187 newTestMember(2), 188 newTestMember(3), 189 } 190 removed := []*membership.Member{ 191 newTestMember(4), 192 newTestMember(5), 193 newTestMember(6), 194 } 195 cls := newTestCluster(members, removed) 196 197 m := &api.RaftMember{RaftID: 1} 198 existingMember, err := m.Marshal() 199 assert.NoError(t, err) 200 assert.NotNil(t, existingMember) 201 202 m = &api.RaftMember{RaftID: 7} 203 newMember, err := m.Marshal() 204 assert.NoError(t, err) 205 assert.NotNil(t, newMember) 206 207 m = &api.RaftMember{RaftID: 4} 208 removedMember, err := m.Marshal() 209 assert.NoError(t, err) 210 assert.NotNil(t, removedMember) 211 212 n := &api.Node{} 213 node, err := n.Marshal() 214 assert.NoError(t, err) 215 assert.NotNil(t, node) 216 217 // Add node but ID exists 218 cc := raftpb.ConfChange{ID: 1, Type: raftpb.ConfChangeAddNode, NodeID: 1, Context: existingMember} 219 err = cls.ValidateConfigurationChange(cc) 220 assert.Error(t, err) 221 assert.Equal(t, err, membership.ErrIDExists) 222 223 // Any configuration change but ID in remove set 224 cc = raftpb.ConfChange{ID: 4, Type: raftpb.ConfChangeAddNode, NodeID: 4, Context: removedMember} 225 err = cls.ValidateConfigurationChange(cc) 226 assert.Error(t, err) 227 assert.Equal(t, err, membership.ErrIDRemoved) 228 229 // Remove Node but ID not found in memberlist 230 cc = raftpb.ConfChange{ID: 7, Type: raftpb.ConfChangeRemoveNode, NodeID: 7, Context: newMember} 231 err = cls.ValidateConfigurationChange(cc) 232 assert.Error(t, err) 233 assert.Equal(t, err, membership.ErrIDNotFound) 234 235 // Update Node but ID not found in memberlist 236 cc = raftpb.ConfChange{ID: 7, Type: raftpb.ConfChangeUpdateNode, NodeID: 7, Context: newMember} 237 err = cls.ValidateConfigurationChange(cc) 238 assert.Error(t, err) 239 assert.Equal(t, err, membership.ErrIDNotFound) 240 241 // Any configuration change but can't unmarshal config 242 cc = raftpb.ConfChange{ID: 7, Type: raftpb.ConfChangeAddNode, NodeID: 7, Context: []byte("abcdef")} 243 err = cls.ValidateConfigurationChange(cc) 244 assert.Error(t, err) 245 assert.Equal(t, err, membership.ErrCannotUnmarshalConfig) 246 247 // Invalid configuration change 248 cc = raftpb.ConfChange{ID: 1, Type: 10, NodeID: 1, Context: newMember} 249 err = cls.ValidateConfigurationChange(cc) 250 assert.Error(t, err) 251 assert.Equal(t, err, membership.ErrConfigChangeInvalid) 252 } 253 254 func TestCanRemoveMember(t *testing.T) { 255 nodes, clockSource := raftutils.NewRaftCluster(t, tc) 256 defer raftutils.TeardownCluster(nodes) 257 258 // Stop node 2 and node 3 (2 nodes out of 3) 259 nodes[2].Server.Stop() 260 nodes[2].ShutdownRaft() 261 nodes[3].Server.Stop() 262 nodes[3].ShutdownRaft() 263 264 // Node 2 and Node 3 should be listed as Unreachable 265 assert.NoError(t, testutils.PollFunc(clockSource, func() error { 266 members := nodes[1].GetMemberlist() 267 if len(members) != 3 { 268 return fmt.Errorf("expected 3 nodes, got %d", len(members)) 269 } 270 if members[nodes[2].Config.ID].Status.Reachability == api.RaftMemberStatus_REACHABLE { 271 return errors.New("expected node 2 to be unreachable") 272 } 273 if members[nodes[3].Config.ID].Status.Reachability == api.RaftMemberStatus_REACHABLE { 274 return errors.New("expected node 3 to be unreachable") 275 } 276 return nil 277 })) 278 279 // Removing nodes at this point fails because we lost quorum 280 for i := 1; i <= 3; i++ { 281 ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) 282 defer cancel() 283 err := nodes[1].RemoveMember(ctx, uint64(i)) 284 assert.Error(t, err) 285 members := nodes[1].GetMemberlist() 286 assert.Equal(t, len(members), 3) 287 } 288 289 // Restart node 2 and node 3 290 nodes[2] = raftutils.RestartNode(t, clockSource, nodes[2], false) 291 nodes[3] = raftutils.RestartNode(t, clockSource, nodes[3], false) 292 raftutils.WaitForCluster(t, clockSource, nodes) 293 294 var leader uint64 295 leaderIndex := func() uint64 { 296 for i, n := range nodes { 297 if n.Config.ID == n.Leader() { 298 return i 299 } 300 } 301 return 0 302 } 303 304 // Node 2 and Node 3 should be listed as Reachable 305 assert.NoError(t, testutils.PollFunc(clockSource, func() error { 306 leader = leaderIndex() 307 if leader == 0 { 308 return errors.New("no leader") 309 } 310 members := nodes[leader].GetMemberlist() 311 if len(members) != 3 { 312 return fmt.Errorf("expected 3 nodes, got %d", len(members)) 313 } 314 if members[nodes[2].Config.ID].Status.Reachability == api.RaftMemberStatus_UNREACHABLE { 315 return errors.New("expected node 2 to be reachable") 316 } 317 if members[nodes[3].Config.ID].Status.Reachability == api.RaftMemberStatus_UNREACHABLE { 318 return errors.New("expected node 3 to be reachable") 319 } 320 return nil 321 })) 322 323 // Stop Node 3 (1 node out of 3) 324 nodes[3].Server.Stop() 325 nodes[3].ShutdownRaft() 326 327 // Node 3 should be listed as Unreachable 328 assert.NoError(t, testutils.PollFunc(clockSource, func() error { 329 leader = leaderIndex() 330 if leader == 0 { 331 return errors.New("no leader") 332 } 333 members := nodes[leader].GetMemberlist() 334 if len(members) != 3 { 335 return fmt.Errorf("expected 3 nodes, got %d", len(members)) 336 } 337 if members[nodes[3].Config.ID].Status.Reachability == api.RaftMemberStatus_REACHABLE { 338 return errors.New("expected node 3 to be unreachable") 339 } 340 return nil 341 })) 342 343 // Removing node 2 should fail (this would break the quorum) 344 ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) 345 err := nodes[leader].RemoveMember(ctx, nodes[2].Config.ID) 346 cancel() 347 assert.EqualError(t, err, raft.ErrCannotRemoveMember.Error()) 348 members := nodes[leader].GetMemberlist() 349 assert.Equal(t, len(members), 3) 350 351 // Removing node 3 works fine because it is already unreachable 352 ctx, cancel = context.WithTimeout(context.Background(), 10*time.Second) 353 err = nodes[leader].RemoveMember(ctx, nodes[3].Config.ID) 354 cancel() 355 assert.NoError(t, err) 356 members = nodes[leader].GetMemberlist() 357 assert.Nil(t, members[nodes[3].Config.ID]) 358 assert.Equal(t, len(members), 2) 359 360 // Add back node 3 361 raftutils.ShutdownNode(nodes[3]) 362 nodes[3] = raftutils.NewJoinNode(t, clockSource, nodes[leader].Address, tc) 363 raftutils.WaitForCluster(t, clockSource, nodes) 364 365 // Node 2 and Node 3 should be listed as Reachable 366 assert.NoError(t, testutils.PollFunc(clockSource, func() error { 367 leader = leaderIndex() 368 if leader == 0 { 369 return errors.New("no leader") 370 } 371 members := nodes[leader].GetMemberlist() 372 if len(members) != 3 { 373 return fmt.Errorf("expected 3 nodes, got %d", len(members)) 374 } 375 if members[nodes[2].Config.ID].Status.Reachability != api.RaftMemberStatus_REACHABLE { 376 return errors.New("expected node 2 to be reachable") 377 } 378 if members[nodes[3].Config.ID].Status.Reachability != api.RaftMemberStatus_REACHABLE { 379 return errors.New("expected node 3 to be reachable") 380 } 381 return nil 382 })) 383 384 // Removing node 3 should succeed 385 ctx, cancel = context.WithTimeout(context.Background(), 10*time.Second) 386 err = nodes[leader].RemoveMember(ctx, nodes[3].Config.ID) 387 cancel() 388 assert.NoError(t, err) 389 members = nodes[leader].GetMemberlist() 390 assert.Nil(t, members[nodes[3].Config.ID]) 391 assert.Equal(t, len(members), 2) 392 393 // Removing node 2 should succeed 394 ctx, cancel = context.WithTimeout(context.Background(), 10*time.Second) 395 err = nodes[leader].RemoveMember(ctx, nodes[2].Config.ID) 396 cancel() 397 assert.NoError(t, err) 398 members = nodes[leader].GetMemberlist() 399 assert.Nil(t, members[nodes[2].Config.ID]) 400 assert.Equal(t, len(members), 1) 401 }