github.com/moby/docker@v26.1.3+incompatible/integration-cli/docker_api_swarm_test.go (about) 1 //go:build !windows 2 3 package main 4 5 import ( 6 "context" 7 "fmt" 8 "net" 9 "net/http" 10 "os" 11 "path/filepath" 12 "runtime" 13 "strings" 14 "sync" 15 "testing" 16 "time" 17 18 "github.com/cloudflare/cfssl/csr" 19 "github.com/cloudflare/cfssl/helpers" 20 "github.com/cloudflare/cfssl/initca" 21 "github.com/docker/docker/api/types" 22 "github.com/docker/docker/api/types/container" 23 "github.com/docker/docker/api/types/swarm" 24 "github.com/docker/docker/errdefs" 25 "github.com/docker/docker/integration-cli/checker" 26 "github.com/docker/docker/integration-cli/daemon" 27 "github.com/docker/docker/testutil" 28 testdaemon "github.com/docker/docker/testutil/daemon" 29 "github.com/docker/docker/testutil/request" 30 "github.com/moby/swarmkit/v2/ca" 31 "gotest.tools/v3/assert" 32 is "gotest.tools/v3/assert/cmp" 33 "gotest.tools/v3/poll" 34 ) 35 36 var defaultReconciliationTimeout = 30 * time.Second 37 38 func (s *DockerSwarmSuite) TestAPISwarmInit(c *testing.T) { 39 ctx := testutil.GetContext(c) 40 // todo: should find a better way to verify that components are running than /info 41 d1 := s.AddDaemon(ctx, c, true, true) 42 info := d1.SwarmInfo(ctx, c) 43 assert.Equal(c, info.ControlAvailable, true) 44 assert.Equal(c, info.LocalNodeState, swarm.LocalNodeStateActive) 45 assert.Equal(c, info.Cluster.RootRotationInProgress, false) 46 47 d2 := s.AddDaemon(ctx, c, true, false) 48 info = d2.SwarmInfo(ctx, c) 49 assert.Equal(c, info.ControlAvailable, false) 50 assert.Equal(c, info.LocalNodeState, swarm.LocalNodeStateActive) 51 52 // Leaving cluster 53 assert.NilError(c, d2.SwarmLeave(ctx, c, false)) 54 55 info = d2.SwarmInfo(ctx, c) 56 assert.Equal(c, info.ControlAvailable, false) 57 assert.Equal(c, info.LocalNodeState, swarm.LocalNodeStateInactive) 58 59 d2.SwarmJoin(ctx, c, swarm.JoinRequest{ 60 ListenAddr: d1.SwarmListenAddr(), 61 JoinToken: d1.JoinTokens(c).Worker, 62 RemoteAddrs: []string{d1.SwarmListenAddr()}, 63 }) 64 65 info = d2.SwarmInfo(ctx, c) 66 assert.Equal(c, info.ControlAvailable, false) 67 assert.Equal(c, info.LocalNodeState, swarm.LocalNodeStateActive) 68 69 // Current state restoring after restarts 70 d1.Stop(c) 71 d2.Stop(c) 72 73 d1.StartNode(c) 74 d2.StartNode(c) 75 76 info = d1.SwarmInfo(ctx, c) 77 assert.Equal(c, info.ControlAvailable, true) 78 assert.Equal(c, info.LocalNodeState, swarm.LocalNodeStateActive) 79 80 info = d2.SwarmInfo(ctx, c) 81 assert.Equal(c, info.ControlAvailable, false) 82 assert.Equal(c, info.LocalNodeState, swarm.LocalNodeStateActive) 83 } 84 85 func (s *DockerSwarmSuite) TestAPISwarmJoinToken(c *testing.T) { 86 ctx := testutil.GetContext(c) 87 d1 := s.AddDaemon(ctx, c, false, false) 88 d1.SwarmInit(ctx, c, swarm.InitRequest{}) 89 90 // todo: error message differs depending if some components of token are valid 91 92 d2 := s.AddDaemon(ctx, c, false, false) 93 c2 := d2.NewClientT(c) 94 err := c2.SwarmJoin(testutil.GetContext(c), swarm.JoinRequest{ 95 ListenAddr: d2.SwarmListenAddr(), 96 RemoteAddrs: []string{d1.SwarmListenAddr()}, 97 }) 98 assert.ErrorContains(c, err, "join token is necessary") 99 info := d2.SwarmInfo(ctx, c) 100 assert.Equal(c, info.LocalNodeState, swarm.LocalNodeStateInactive) 101 102 err = c2.SwarmJoin(testutil.GetContext(c), swarm.JoinRequest{ 103 ListenAddr: d2.SwarmListenAddr(), 104 JoinToken: "foobaz", 105 RemoteAddrs: []string{d1.SwarmListenAddr()}, 106 }) 107 assert.ErrorContains(c, err, "invalid join token") 108 info = d2.SwarmInfo(ctx, c) 109 assert.Equal(c, info.LocalNodeState, swarm.LocalNodeStateInactive) 110 111 workerToken := d1.JoinTokens(c).Worker 112 113 d2.SwarmJoin(ctx, c, swarm.JoinRequest{ 114 ListenAddr: d2.SwarmListenAddr(), 115 JoinToken: workerToken, 116 RemoteAddrs: []string{d1.SwarmListenAddr()}, 117 }) 118 info = d2.SwarmInfo(ctx, c) 119 assert.Equal(c, info.LocalNodeState, swarm.LocalNodeStateActive) 120 assert.NilError(c, d2.SwarmLeave(ctx, c, false)) 121 info = d2.SwarmInfo(ctx, c) 122 assert.Equal(c, info.LocalNodeState, swarm.LocalNodeStateInactive) 123 124 // change tokens 125 d1.RotateTokens(c) 126 127 err = c2.SwarmJoin(testutil.GetContext(c), swarm.JoinRequest{ 128 ListenAddr: d2.SwarmListenAddr(), 129 JoinToken: workerToken, 130 RemoteAddrs: []string{d1.SwarmListenAddr()}, 131 }) 132 assert.ErrorContains(c, err, "join token is necessary") 133 info = d2.SwarmInfo(ctx, c) 134 assert.Equal(c, info.LocalNodeState, swarm.LocalNodeStateInactive) 135 136 workerToken = d1.JoinTokens(c).Worker 137 138 d2.SwarmJoin(ctx, c, swarm.JoinRequest{JoinToken: workerToken, RemoteAddrs: []string{d1.SwarmListenAddr()}}) 139 info = d2.SwarmInfo(ctx, c) 140 assert.Equal(c, info.LocalNodeState, swarm.LocalNodeStateActive) 141 assert.NilError(c, d2.SwarmLeave(ctx, c, false)) 142 info = d2.SwarmInfo(ctx, c) 143 assert.Equal(c, info.LocalNodeState, swarm.LocalNodeStateInactive) 144 145 // change spec, don't change tokens 146 d1.UpdateSwarm(c, func(s *swarm.Spec) {}) 147 148 err = c2.SwarmJoin(testutil.GetContext(c), swarm.JoinRequest{ 149 ListenAddr: d2.SwarmListenAddr(), 150 RemoteAddrs: []string{d1.SwarmListenAddr()}, 151 }) 152 assert.ErrorContains(c, err, "join token is necessary") 153 info = d2.SwarmInfo(ctx, c) 154 assert.Equal(c, info.LocalNodeState, swarm.LocalNodeStateInactive) 155 156 d2.SwarmJoin(ctx, c, swarm.JoinRequest{JoinToken: workerToken, RemoteAddrs: []string{d1.SwarmListenAddr()}}) 157 info = d2.SwarmInfo(ctx, c) 158 assert.Equal(c, info.LocalNodeState, swarm.LocalNodeStateActive) 159 assert.NilError(c, d2.SwarmLeave(ctx, c, false)) 160 info = d2.SwarmInfo(ctx, c) 161 assert.Equal(c, info.LocalNodeState, swarm.LocalNodeStateInactive) 162 } 163 164 func (s *DockerSwarmSuite) TestUpdateSwarmAddExternalCA(c *testing.T) { 165 ctx := testutil.GetContext(c) 166 d1 := s.AddDaemon(ctx, c, false, false) 167 d1.SwarmInit(ctx, c, swarm.InitRequest{}) 168 d1.UpdateSwarm(c, func(s *swarm.Spec) { 169 s.CAConfig.ExternalCAs = []*swarm.ExternalCA{ 170 { 171 Protocol: swarm.ExternalCAProtocolCFSSL, 172 URL: "https://thishasnoca.org", 173 }, 174 { 175 Protocol: swarm.ExternalCAProtocolCFSSL, 176 URL: "https://thishasacacert.org", 177 CACert: "cacert", 178 }, 179 } 180 }) 181 info := d1.SwarmInfo(ctx, c) 182 assert.Equal(c, len(info.Cluster.Spec.CAConfig.ExternalCAs), 2) 183 assert.Equal(c, info.Cluster.Spec.CAConfig.ExternalCAs[0].CACert, "") 184 assert.Equal(c, info.Cluster.Spec.CAConfig.ExternalCAs[1].CACert, "cacert") 185 } 186 187 func (s *DockerSwarmSuite) TestAPISwarmCAHash(c *testing.T) { 188 ctx := testutil.GetContext(c) 189 d1 := s.AddDaemon(ctx, c, true, true) 190 d2 := s.AddDaemon(ctx, c, false, false) 191 splitToken := strings.Split(d1.JoinTokens(c).Worker, "-") 192 splitToken[2] = "1kxftv4ofnc6mt30lmgipg6ngf9luhwqopfk1tz6bdmnkubg0e" 193 replacementToken := strings.Join(splitToken, "-") 194 c2 := d2.NewClientT(c) 195 err := c2.SwarmJoin(testutil.GetContext(c), swarm.JoinRequest{ 196 ListenAddr: d2.SwarmListenAddr(), 197 JoinToken: replacementToken, 198 RemoteAddrs: []string{d1.SwarmListenAddr()}, 199 }) 200 assert.ErrorContains(c, err, "remote CA does not match fingerprint") 201 } 202 203 func (s *DockerSwarmSuite) TestAPISwarmPromoteDemote(c *testing.T) { 204 ctx := testutil.GetContext(c) 205 d1 := s.AddDaemon(ctx, c, false, false) 206 d1.SwarmInit(ctx, c, swarm.InitRequest{}) 207 d2 := s.AddDaemon(ctx, c, true, false) 208 209 info := d2.SwarmInfo(ctx, c) 210 assert.Equal(c, info.ControlAvailable, false) 211 assert.Equal(c, info.LocalNodeState, swarm.LocalNodeStateActive) 212 213 d1.UpdateNode(ctx, c, d2.NodeID(), func(n *swarm.Node) { 214 n.Spec.Role = swarm.NodeRoleManager 215 }) 216 217 poll.WaitOn(c, pollCheck(c, d2.CheckControlAvailable(ctx), checker.True()), poll.WithTimeout(defaultReconciliationTimeout)) 218 219 d1.UpdateNode(ctx, c, d2.NodeID(), func(n *swarm.Node) { 220 n.Spec.Role = swarm.NodeRoleWorker 221 }) 222 223 poll.WaitOn(c, pollCheck(c, d2.CheckControlAvailable(ctx), checker.False()), poll.WithTimeout(defaultReconciliationTimeout)) 224 225 // Wait for the role to change to worker in the cert. This is partially 226 // done because it's something worth testing in its own right, and 227 // partially because changing the role from manager to worker and then 228 // back to manager quickly might cause the node to pause for awhile 229 // while waiting for the role to change to worker, and the test can 230 // time out during this interval. 231 poll.WaitOn(c, pollCheck(c, func(c *testing.T) (interface{}, string) { 232 certBytes, err := os.ReadFile(filepath.Join(d2.Folder, "root", "swarm", "certificates", "swarm-node.crt")) 233 if err != nil { 234 return "", fmt.Sprintf("error: %v", err) 235 } 236 certs, err := helpers.ParseCertificatesPEM(certBytes) 237 if err == nil && len(certs) > 0 && len(certs[0].Subject.OrganizationalUnit) > 0 { 238 return certs[0].Subject.OrganizationalUnit[0], "" 239 } 240 return "", "could not get organizational unit from certificate" 241 }, checker.Equals("swarm-worker")), poll.WithTimeout(defaultReconciliationTimeout)) 242 243 // Demoting last node should fail 244 node := d1.GetNode(ctx, c, d1.NodeID()) 245 node.Spec.Role = swarm.NodeRoleWorker 246 url := fmt.Sprintf("/nodes/%s/update?version=%d", node.ID, node.Version.Index) 247 res, body, err := request.Post(testutil.GetContext(c), url, request.Host(d1.Sock()), request.JSONBody(node.Spec)) 248 assert.NilError(c, err) 249 b, err := request.ReadBody(body) 250 assert.NilError(c, err) 251 assert.Equal(c, res.StatusCode, http.StatusBadRequest, "output: %q", string(b)) 252 253 // The warning specific to demoting the last manager is best-effort and 254 // won't appear until the Role field of the demoted manager has been 255 // updated. 256 // Yes, I know this looks silly, but checker.Matches is broken, since 257 // it anchors the regexp contrary to the documentation, and this makes 258 // it impossible to match something that includes a line break. 259 if !strings.Contains(string(b), "last manager of the swarm") { 260 assert.Assert(c, strings.Contains(string(b), "this would result in a loss of quorum")) 261 } 262 info = d1.SwarmInfo(ctx, c) 263 assert.Equal(c, info.LocalNodeState, swarm.LocalNodeStateActive) 264 assert.Equal(c, info.ControlAvailable, true) 265 266 // Promote already demoted node 267 d1.UpdateNode(ctx, c, d2.NodeID(), func(n *swarm.Node) { 268 n.Spec.Role = swarm.NodeRoleManager 269 }) 270 271 poll.WaitOn(c, pollCheck(c, d2.CheckControlAvailable(ctx), checker.True()), poll.WithTimeout(defaultReconciliationTimeout)) 272 } 273 274 func (s *DockerSwarmSuite) TestAPISwarmLeaderProxy(c *testing.T) { 275 ctx := testutil.GetContext(c) 276 // add three managers, one of these is leader 277 d1 := s.AddDaemon(ctx, c, true, true) 278 d2 := s.AddDaemon(ctx, c, true, true) 279 d3 := s.AddDaemon(ctx, c, true, true) 280 281 // start a service by hitting each of the 3 managers 282 d1.CreateService(ctx, c, simpleTestService, func(s *swarm.Service) { 283 s.Spec.Name = "test1" 284 }) 285 d2.CreateService(ctx, c, simpleTestService, func(s *swarm.Service) { 286 s.Spec.Name = "test2" 287 }) 288 d3.CreateService(ctx, c, simpleTestService, func(s *swarm.Service) { 289 s.Spec.Name = "test3" 290 }) 291 292 // 3 services should be started now, because the requests were proxied to leader 293 // query each node and make sure it returns 3 services 294 for _, d := range []*daemon.Daemon{d1, d2, d3} { 295 services := d.ListServices(ctx, c) 296 assert.Equal(c, len(services), 3) 297 } 298 } 299 300 func (s *DockerSwarmSuite) TestAPISwarmLeaderElection(c *testing.T) { 301 ctx := testutil.GetContext(c) 302 if runtime.GOARCH == "s390x" { 303 c.Skip("Disabled on s390x") 304 } 305 if runtime.GOARCH == "ppc64le" { 306 c.Skip("Disabled on ppc64le") 307 } 308 309 // Create 3 nodes 310 d1 := s.AddDaemon(ctx, c, true, true) 311 d2 := s.AddDaemon(ctx, c, true, true) 312 d3 := s.AddDaemon(ctx, c, true, true) 313 314 // assert that the first node we made is the leader, and the other two are followers 315 assert.Equal(c, d1.GetNode(ctx, c, d1.NodeID()).ManagerStatus.Leader, true) 316 assert.Equal(c, d1.GetNode(ctx, c, d2.NodeID()).ManagerStatus.Leader, false) 317 assert.Equal(c, d1.GetNode(ctx, c, d3.NodeID()).ManagerStatus.Leader, false) 318 319 d1.Stop(c) 320 321 var ( 322 leader *daemon.Daemon // keep track of leader 323 followers []*daemon.Daemon // keep track of followers 324 ) 325 var lastErr error 326 checkLeader := func(nodes ...*daemon.Daemon) checkF { 327 return func(c *testing.T) (interface{}, string) { 328 // clear these out before each run 329 leader = nil 330 followers = nil 331 for _, d := range nodes { 332 n := d.GetNode(ctx, c, d.NodeID(), func(err error) bool { 333 if strings.Contains(err.Error(), context.DeadlineExceeded.Error()) || strings.Contains(err.Error(), "swarm does not have a leader") { 334 lastErr = err 335 return true 336 } 337 return false 338 }) 339 if n == nil { 340 return false, fmt.Sprintf("failed to get node: %v", lastErr) 341 } 342 if n.ManagerStatus.Leader { 343 leader = d 344 } else { 345 followers = append(followers, d) 346 } 347 } 348 349 if leader == nil { 350 return false, "no leader elected" 351 } 352 353 return true, fmt.Sprintf("elected %v", leader.ID()) 354 } 355 } 356 357 // wait for an election to occur 358 c.Logf("Waiting for election to occur...") 359 poll.WaitOn(c, pollCheck(c, checkLeader(d2, d3), checker.True()), poll.WithTimeout(defaultReconciliationTimeout)) 360 361 // assert that we have a new leader 362 assert.Assert(c, leader != nil) 363 364 // Keep track of the current leader, since we want that to be chosen. 365 stableleader := leader 366 367 // add the d1, the initial leader, back 368 d1.StartNode(c) 369 370 // wait for possible election 371 c.Logf("Waiting for possible election...") 372 poll.WaitOn(c, pollCheck(c, checkLeader(d1, d2, d3), checker.True()), poll.WithTimeout(defaultReconciliationTimeout)) 373 // pick out the leader and the followers again 374 375 // verify that we still only have 1 leader and 2 followers 376 assert.Assert(c, leader != nil) 377 assert.Equal(c, len(followers), 2) 378 // and that after we added d1 back, the leader hasn't changed 379 assert.Equal(c, leader.NodeID(), stableleader.NodeID()) 380 } 381 382 func (s *DockerSwarmSuite) TestAPISwarmRaftQuorum(c *testing.T) { 383 ctx := testutil.GetContext(c) 384 if runtime.GOARCH == "s390x" { 385 c.Skip("Disabled on s390x") 386 } 387 if runtime.GOARCH == "ppc64le" { 388 c.Skip("Disabled on ppc64le") 389 } 390 391 d1 := s.AddDaemon(ctx, c, true, true) 392 d2 := s.AddDaemon(ctx, c, true, true) 393 d3 := s.AddDaemon(ctx, c, true, true) 394 395 d1.CreateService(ctx, c, simpleTestService) 396 397 d2.Stop(c) 398 399 // make sure there is a leader 400 poll.WaitOn(c, pollCheck(c, d1.CheckLeader(ctx), checker.IsNil()), poll.WithTimeout(defaultReconciliationTimeout)) 401 402 d1.CreateService(ctx, c, simpleTestService, func(s *swarm.Service) { 403 s.Spec.Name = "top1" 404 }) 405 406 d3.Stop(c) 407 408 var service swarm.Service 409 simpleTestService(&service) 410 service.Spec.Name = "top2" 411 cli := d1.NewClientT(c) 412 defer cli.Close() 413 414 // d1 will eventually step down from leader because there is no longer an active quorum, wait for that to happen 415 poll.WaitOn(c, pollCheck(c, func(c *testing.T) (interface{}, string) { 416 _, err := cli.ServiceCreate(testutil.GetContext(c), service.Spec, types.ServiceCreateOptions{}) 417 return err.Error(), "" 418 }, checker.Contains("Make sure more than half of the managers are online.")), poll.WithTimeout(defaultReconciliationTimeout*2)) 419 420 d2.StartNode(c) 421 422 // make sure there is a leader 423 poll.WaitOn(c, pollCheck(c, d1.CheckLeader(ctx), checker.IsNil()), poll.WithTimeout(defaultReconciliationTimeout)) 424 425 d1.CreateService(ctx, c, simpleTestService, func(s *swarm.Service) { 426 s.Spec.Name = "top3" 427 }) 428 } 429 430 func (s *DockerSwarmSuite) TestAPISwarmLeaveRemovesContainer(c *testing.T) { 431 ctx := testutil.GetContext(c) 432 d := s.AddDaemon(ctx, c, true, true) 433 434 instances := 2 435 d.CreateService(ctx, c, simpleTestService, setInstances(instances)) 436 437 id, err := d.Cmd("run", "-d", "busybox", "top") 438 assert.NilError(c, err, id) 439 id = strings.TrimSpace(id) 440 441 poll.WaitOn(c, pollCheck(c, d.CheckActiveContainerCount(ctx), checker.Equals(instances+1)), poll.WithTimeout(defaultReconciliationTimeout)) 442 443 assert.ErrorContains(c, d.SwarmLeave(ctx, c, false), "") 444 assert.NilError(c, d.SwarmLeave(ctx, c, true)) 445 446 poll.WaitOn(c, pollCheck(c, d.CheckActiveContainerCount(ctx), checker.Equals(1)), poll.WithTimeout(defaultReconciliationTimeout)) 447 448 id2, err := d.Cmd("ps", "-q") 449 assert.NilError(c, err, id2) 450 assert.Assert(c, strings.HasPrefix(id, strings.TrimSpace(id2))) 451 } 452 453 // #23629 454 func (s *DockerSwarmSuite) TestAPISwarmLeaveOnPendingJoin(c *testing.T) { 455 testRequires(c, Network) 456 457 ctx := testutil.GetContext(c) 458 s.AddDaemon(ctx, c, true, true) 459 d2 := s.AddDaemon(ctx, c, false, false) 460 461 id, err := d2.Cmd("run", "-d", "busybox", "top") 462 assert.NilError(c, err, id) 463 id = strings.TrimSpace(id) 464 465 c2 := d2.NewClientT(c) 466 err = c2.SwarmJoin(testutil.GetContext(c), swarm.JoinRequest{ 467 ListenAddr: d2.SwarmListenAddr(), 468 RemoteAddrs: []string{"123.123.123.123:1234"}, 469 }) 470 assert.ErrorContains(c, err, "Timeout was reached") 471 472 info := d2.SwarmInfo(ctx, c) 473 assert.Equal(c, info.LocalNodeState, swarm.LocalNodeStatePending) 474 475 assert.NilError(c, d2.SwarmLeave(ctx, c, true)) 476 477 poll.WaitOn(c, pollCheck(c, d2.CheckActiveContainerCount(ctx), checker.Equals(1)), poll.WithTimeout(defaultReconciliationTimeout)) 478 479 id2, err := d2.Cmd("ps", "-q") 480 assert.NilError(c, err, id2) 481 assert.Assert(c, strings.HasPrefix(id, strings.TrimSpace(id2))) 482 } 483 484 // #23705 485 func (s *DockerSwarmSuite) TestAPISwarmRestoreOnPendingJoin(c *testing.T) { 486 testRequires(c, Network) 487 488 ctx := testutil.GetContext(c) 489 d := s.AddDaemon(ctx, c, false, false) 490 client := d.NewClientT(c) 491 err := client.SwarmJoin(testutil.GetContext(c), swarm.JoinRequest{ 492 ListenAddr: d.SwarmListenAddr(), 493 RemoteAddrs: []string{"123.123.123.123:1234"}, 494 }) 495 assert.ErrorContains(c, err, "Timeout was reached") 496 497 poll.WaitOn(c, pollCheck(c, d.CheckLocalNodeState(ctx), checker.Equals(swarm.LocalNodeStatePending)), poll.WithTimeout(defaultReconciliationTimeout)) 498 499 d.RestartNode(c) 500 501 info := d.SwarmInfo(ctx, c) 502 assert.Equal(c, info.LocalNodeState, swarm.LocalNodeStateInactive) 503 } 504 505 func (s *DockerSwarmSuite) TestAPISwarmManagerRestore(c *testing.T) { 506 ctx := testutil.GetContext(c) 507 d1 := s.AddDaemon(ctx, c, true, true) 508 509 instances := 2 510 id := d1.CreateService(ctx, c, simpleTestService, setInstances(instances)) 511 512 d1.GetService(ctx, c, id) 513 d1.RestartNode(c) 514 d1.GetService(ctx, c, id) 515 516 d2 := s.AddDaemon(ctx, c, true, true) 517 d2.GetService(ctx, c, id) 518 d2.RestartNode(c) 519 d2.GetService(ctx, c, id) 520 521 d3 := s.AddDaemon(ctx, c, true, true) 522 d3.GetService(ctx, c, id) 523 d3.RestartNode(c) 524 d3.GetService(ctx, c, id) 525 526 err := d3.Kill() 527 assert.NilError(c, err) 528 time.Sleep(1 * time.Second) // time to handle signal 529 d3.StartNode(c) 530 d3.GetService(ctx, c, id) 531 } 532 533 func (s *DockerSwarmSuite) TestAPISwarmScaleNoRollingUpdate(c *testing.T) { 534 ctx := testutil.GetContext(c) 535 d := s.AddDaemon(ctx, c, true, true) 536 537 instances := 2 538 id := d.CreateService(ctx, c, simpleTestService, setInstances(instances)) 539 540 poll.WaitOn(c, pollCheck(c, d.CheckActiveContainerCount(ctx), checker.Equals(instances)), poll.WithTimeout(defaultReconciliationTimeout)) 541 containers := d.ActiveContainers(ctx, c) 542 instances = 4 543 d.UpdateService(ctx, c, d.GetService(ctx, c, id), setInstances(instances)) 544 poll.WaitOn(c, pollCheck(c, d.CheckActiveContainerCount(ctx), checker.Equals(instances)), poll.WithTimeout(defaultReconciliationTimeout)) 545 containers2 := d.ActiveContainers(ctx, c) 546 547 loop0: 548 for _, c1 := range containers { 549 for _, c2 := range containers2 { 550 if c1 == c2 { 551 continue loop0 552 } 553 } 554 c.Errorf("container %v not found in new set %#v", c1, containers2) 555 } 556 } 557 558 func (s *DockerSwarmSuite) TestAPISwarmInvalidAddress(c *testing.T) { 559 ctx := testutil.GetContext(c) 560 d := s.AddDaemon(ctx, c, false, false) 561 req := swarm.InitRequest{ 562 ListenAddr: "", 563 } 564 res, _, err := request.Post(testutil.GetContext(c), "/swarm/init", request.Host(d.Sock()), request.JSONBody(req)) 565 assert.NilError(c, err) 566 assert.Equal(c, res.StatusCode, http.StatusBadRequest) 567 568 req2 := swarm.JoinRequest{ 569 ListenAddr: "0.0.0.0:2377", 570 RemoteAddrs: []string{""}, 571 } 572 res, _, err = request.Post(testutil.GetContext(c), "/swarm/join", request.Host(d.Sock()), request.JSONBody(req2)) 573 assert.NilError(c, err) 574 assert.Equal(c, res.StatusCode, http.StatusBadRequest) 575 } 576 577 func (s *DockerSwarmSuite) TestAPISwarmForceNewCluster(c *testing.T) { 578 ctx := testutil.GetContext(c) 579 d1 := s.AddDaemon(ctx, c, true, true) 580 d2 := s.AddDaemon(ctx, c, true, true) 581 582 instances := 2 583 id := d1.CreateService(ctx, c, simpleTestService, setInstances(instances)) 584 poll.WaitOn(c, pollCheck(c, reducedCheck(sumAsIntegers, d1.CheckActiveContainerCount(ctx), d2.CheckActiveContainerCount(ctx)), checker.Equals(instances)), poll.WithTimeout(defaultReconciliationTimeout)) 585 586 // drain d2, all containers should move to d1 587 d1.UpdateNode(ctx, c, d2.NodeID(), func(n *swarm.Node) { 588 n.Spec.Availability = swarm.NodeAvailabilityDrain 589 }) 590 poll.WaitOn(c, pollCheck(c, d1.CheckActiveContainerCount(ctx), checker.Equals(instances)), poll.WithTimeout(defaultReconciliationTimeout)) 591 poll.WaitOn(c, pollCheck(c, d2.CheckActiveContainerCount(ctx), checker.Equals(0)), poll.WithTimeout(defaultReconciliationTimeout)) 592 593 d2.Stop(c) 594 595 d1.SwarmInit(ctx, c, swarm.InitRequest{ 596 ForceNewCluster: true, 597 Spec: swarm.Spec{}, 598 }) 599 600 poll.WaitOn(c, pollCheck(c, d1.CheckActiveContainerCount(ctx), checker.Equals(instances)), poll.WithTimeout(defaultReconciliationTimeout)) 601 602 d3 := s.AddDaemon(ctx, c, true, true) 603 info := d3.SwarmInfo(ctx, c) 604 assert.Equal(c, info.ControlAvailable, true) 605 assert.Equal(c, info.LocalNodeState, swarm.LocalNodeStateActive) 606 607 instances = 4 608 d3.UpdateService(ctx, c, d3.GetService(ctx, c, id), setInstances(instances)) 609 610 poll.WaitOn(c, pollCheck(c, reducedCheck(sumAsIntegers, d1.CheckActiveContainerCount(ctx), d3.CheckActiveContainerCount(ctx)), checker.Equals(instances)), poll.WithTimeout(defaultReconciliationTimeout)) 611 } 612 613 func simpleTestService(s *swarm.Service) { 614 ureplicas := uint64(1) 615 restartDelay := 100 * time.Millisecond 616 617 s.Spec = swarm.ServiceSpec{ 618 TaskTemplate: swarm.TaskSpec{ 619 ContainerSpec: &swarm.ContainerSpec{ 620 Image: "busybox:latest", 621 Command: []string{"/bin/top"}, 622 }, 623 RestartPolicy: &swarm.RestartPolicy{ 624 Delay: &restartDelay, 625 }, 626 }, 627 Mode: swarm.ServiceMode{ 628 Replicated: &swarm.ReplicatedService{ 629 Replicas: &ureplicas, 630 }, 631 }, 632 } 633 s.Spec.Name = "top" 634 } 635 636 func serviceForUpdate(s *swarm.Service) { 637 ureplicas := uint64(1) 638 restartDelay := 100 * time.Millisecond 639 640 s.Spec = swarm.ServiceSpec{ 641 TaskTemplate: swarm.TaskSpec{ 642 ContainerSpec: &swarm.ContainerSpec{ 643 Image: "busybox:latest", 644 Command: []string{"/bin/top"}, 645 }, 646 RestartPolicy: &swarm.RestartPolicy{ 647 Delay: &restartDelay, 648 }, 649 }, 650 Mode: swarm.ServiceMode{ 651 Replicated: &swarm.ReplicatedService{ 652 Replicas: &ureplicas, 653 }, 654 }, 655 UpdateConfig: &swarm.UpdateConfig{ 656 Parallelism: 2, 657 Delay: 4 * time.Second, 658 FailureAction: swarm.UpdateFailureActionContinue, 659 }, 660 RollbackConfig: &swarm.UpdateConfig{ 661 Parallelism: 3, 662 Delay: 4 * time.Second, 663 FailureAction: swarm.UpdateFailureActionContinue, 664 }, 665 } 666 s.Spec.Name = "updatetest" 667 } 668 669 func setInstances(replicas int) testdaemon.ServiceConstructor { 670 ureplicas := uint64(replicas) 671 return func(s *swarm.Service) { 672 s.Spec.Mode = swarm.ServiceMode{ 673 Replicated: &swarm.ReplicatedService{ 674 Replicas: &ureplicas, 675 }, 676 } 677 } 678 } 679 680 func setUpdateOrder(order string) testdaemon.ServiceConstructor { 681 return func(s *swarm.Service) { 682 if s.Spec.UpdateConfig == nil { 683 s.Spec.UpdateConfig = &swarm.UpdateConfig{} 684 } 685 s.Spec.UpdateConfig.Order = order 686 } 687 } 688 689 func setRollbackOrder(order string) testdaemon.ServiceConstructor { 690 return func(s *swarm.Service) { 691 if s.Spec.RollbackConfig == nil { 692 s.Spec.RollbackConfig = &swarm.UpdateConfig{} 693 } 694 s.Spec.RollbackConfig.Order = order 695 } 696 } 697 698 func setImage(image string) testdaemon.ServiceConstructor { 699 return func(s *swarm.Service) { 700 if s.Spec.TaskTemplate.ContainerSpec == nil { 701 s.Spec.TaskTemplate.ContainerSpec = &swarm.ContainerSpec{} 702 } 703 s.Spec.TaskTemplate.ContainerSpec.Image = image 704 } 705 } 706 707 func setFailureAction(failureAction string) testdaemon.ServiceConstructor { 708 return func(s *swarm.Service) { 709 s.Spec.UpdateConfig.FailureAction = failureAction 710 } 711 } 712 713 func setMaxFailureRatio(maxFailureRatio float32) testdaemon.ServiceConstructor { 714 return func(s *swarm.Service) { 715 s.Spec.UpdateConfig.MaxFailureRatio = maxFailureRatio 716 } 717 } 718 719 func setParallelism(parallelism uint64) testdaemon.ServiceConstructor { 720 return func(s *swarm.Service) { 721 s.Spec.UpdateConfig.Parallelism = parallelism 722 } 723 } 724 725 func setConstraints(constraints []string) testdaemon.ServiceConstructor { 726 return func(s *swarm.Service) { 727 if s.Spec.TaskTemplate.Placement == nil { 728 s.Spec.TaskTemplate.Placement = &swarm.Placement{} 729 } 730 s.Spec.TaskTemplate.Placement.Constraints = constraints 731 } 732 } 733 734 func setPlacementPrefs(prefs []swarm.PlacementPreference) testdaemon.ServiceConstructor { 735 return func(s *swarm.Service) { 736 if s.Spec.TaskTemplate.Placement == nil { 737 s.Spec.TaskTemplate.Placement = &swarm.Placement{} 738 } 739 s.Spec.TaskTemplate.Placement.Preferences = prefs 740 } 741 } 742 743 func setGlobalMode(s *swarm.Service) { 744 s.Spec.Mode = swarm.ServiceMode{ 745 Global: &swarm.GlobalService{}, 746 } 747 } 748 749 func checkClusterHealth(c *testing.T, cl []*daemon.Daemon, managerCount, workerCount int) { 750 var totalMCount, totalWCount int 751 752 ctx := testutil.GetContext(c) 753 for _, d := range cl { 754 var info swarm.Info 755 756 // check info in a poll.WaitOn(), because if the cluster doesn't have a leader, `info` will return an error 757 checkInfo := func(c *testing.T) (interface{}, string) { 758 client := d.NewClientT(c) 759 daemonInfo, err := client.Info(ctx) 760 info = daemonInfo.Swarm 761 return err, "cluster not ready in time" 762 } 763 poll.WaitOn(c, pollCheck(c, checkInfo, checker.IsNil()), poll.WithTimeout(defaultReconciliationTimeout)) 764 if !info.ControlAvailable { 765 totalWCount++ 766 continue 767 } 768 769 var leaderFound bool 770 totalMCount++ 771 var mCount, wCount int 772 773 for _, n := range d.ListNodes(ctx, c) { 774 waitReady := func(c *testing.T) (interface{}, string) { 775 if n.Status.State == swarm.NodeStateReady { 776 return true, "" 777 } 778 nn := d.GetNode(ctx, c, n.ID) 779 n = *nn 780 return n.Status.State == swarm.NodeStateReady, fmt.Sprintf("state of node %s, reported by %s", n.ID, d.NodeID()) 781 } 782 poll.WaitOn(c, pollCheck(c, waitReady, checker.True()), poll.WithTimeout(defaultReconciliationTimeout)) 783 784 waitActive := func(c *testing.T) (interface{}, string) { 785 if n.Spec.Availability == swarm.NodeAvailabilityActive { 786 return true, "" 787 } 788 nn := d.GetNode(ctx, c, n.ID) 789 n = *nn 790 return n.Spec.Availability == swarm.NodeAvailabilityActive, fmt.Sprintf("availability of node %s, reported by %s", n.ID, d.NodeID()) 791 } 792 poll.WaitOn(c, pollCheck(c, waitActive, checker.True()), poll.WithTimeout(defaultReconciliationTimeout)) 793 794 if n.Spec.Role == swarm.NodeRoleManager { 795 assert.Assert(c, n.ManagerStatus != nil, "manager status of node %s (manager), reported by %s", n.ID, d.NodeID()) 796 if n.ManagerStatus.Leader { 797 leaderFound = true 798 } 799 mCount++ 800 } else { 801 assert.Assert(c, n.ManagerStatus == nil, "manager status of node %s (worker), reported by %s", n.ID, d.NodeID()) 802 wCount++ 803 } 804 } 805 assert.Equal(c, leaderFound, true, "lack of leader reported by node %s", info.NodeID) 806 assert.Equal(c, mCount, managerCount, "managers count reported by node %s", info.NodeID) 807 assert.Equal(c, wCount, workerCount, "workers count reported by node %s", info.NodeID) 808 } 809 assert.Equal(c, totalMCount, managerCount) 810 assert.Equal(c, totalWCount, workerCount) 811 } 812 813 func (s *DockerSwarmSuite) TestAPISwarmRestartCluster(c *testing.T) { 814 ctx := testutil.GetContext(c) 815 mCount, wCount := 5, 1 816 817 var nodes []*daemon.Daemon 818 for i := 0; i < mCount; i++ { 819 manager := s.AddDaemon(ctx, c, true, true) 820 info := manager.SwarmInfo(ctx, c) 821 assert.Equal(c, info.ControlAvailable, true) 822 assert.Equal(c, info.LocalNodeState, swarm.LocalNodeStateActive) 823 nodes = append(nodes, manager) 824 } 825 826 for i := 0; i < wCount; i++ { 827 worker := s.AddDaemon(ctx, c, true, false) 828 info := worker.SwarmInfo(ctx, c) 829 assert.Equal(c, info.ControlAvailable, false) 830 assert.Equal(c, info.LocalNodeState, swarm.LocalNodeStateActive) 831 nodes = append(nodes, worker) 832 } 833 834 // stop whole cluster 835 { 836 var wg sync.WaitGroup 837 wg.Add(len(nodes)) 838 errs := make(chan error, len(nodes)) 839 840 for _, d := range nodes { 841 go func(daemon *daemon.Daemon) { 842 defer wg.Done() 843 if err := daemon.StopWithError(); err != nil { 844 errs <- err 845 } 846 }(d) 847 } 848 wg.Wait() 849 close(errs) 850 for err := range errs { 851 assert.NilError(c, err) 852 } 853 } 854 855 // start whole cluster 856 { 857 var wg sync.WaitGroup 858 wg.Add(len(nodes)) 859 errs := make(chan error, len(nodes)) 860 861 for _, d := range nodes { 862 go func(daemon *daemon.Daemon) { 863 defer wg.Done() 864 if err := daemon.StartWithError("--iptables=false"); err != nil { 865 errs <- err 866 } 867 }(d) 868 } 869 wg.Wait() 870 close(errs) 871 for err := range errs { 872 assert.NilError(c, err) 873 } 874 } 875 876 checkClusterHealth(c, nodes, mCount, wCount) 877 } 878 879 func (s *DockerSwarmSuite) TestAPISwarmServicesUpdateWithName(c *testing.T) { 880 ctx := testutil.GetContext(c) 881 d := s.AddDaemon(ctx, c, true, true) 882 883 instances := 2 884 id := d.CreateService(ctx, c, simpleTestService, setInstances(instances)) 885 poll.WaitOn(c, pollCheck(c, d.CheckActiveContainerCount(ctx), checker.Equals(instances)), poll.WithTimeout(defaultReconciliationTimeout)) 886 887 service := d.GetService(ctx, c, id) 888 instances = 5 889 890 setInstances(instances)(service) 891 cli := d.NewClientT(c) 892 defer cli.Close() 893 _, err := cli.ServiceUpdate(ctx, service.Spec.Name, service.Version, service.Spec, types.ServiceUpdateOptions{}) 894 assert.NilError(c, err) 895 poll.WaitOn(c, pollCheck(c, d.CheckActiveContainerCount(ctx), checker.Equals(instances)), poll.WithTimeout(defaultReconciliationTimeout)) 896 } 897 898 // Unlocking an unlocked swarm results in an error 899 func (s *DockerSwarmSuite) TestAPISwarmUnlockNotLocked(c *testing.T) { 900 ctx := testutil.GetContext(c) 901 d := s.AddDaemon(ctx, c, true, true) 902 err := d.SwarmUnlock(c, swarm.UnlockRequest{UnlockKey: "wrong-key"}) 903 assert.ErrorContains(c, err, "swarm is not locked") 904 } 905 906 // #29885 907 func (s *DockerSwarmSuite) TestAPISwarmErrorHandling(c *testing.T) { 908 ctx := testutil.GetContext(c) 909 ln, err := net.Listen("tcp", fmt.Sprintf(":%d", defaultSwarmPort)) 910 assert.NilError(c, err) 911 defer ln.Close() 912 d := s.AddDaemon(ctx, c, false, false) 913 client := d.NewClientT(c) 914 _, err = client.SwarmInit(testutil.GetContext(c), swarm.InitRequest{ 915 ListenAddr: d.SwarmListenAddr(), 916 }) 917 assert.ErrorContains(c, err, "address already in use") 918 } 919 920 // Test case for 30178 921 func (s *DockerSwarmSuite) TestAPISwarmHealthcheckNone(c *testing.T) { 922 // Issue #36386 can be a independent one, which is worth further investigation. 923 c.Skip("Root cause of Issue #36386 is needed") 924 ctx := testutil.GetContext(c) 925 d := s.AddDaemon(ctx, c, true, true) 926 927 out, err := d.Cmd("network", "create", "-d", "overlay", "lb") 928 assert.NilError(c, err, out) 929 930 instances := 1 931 d.CreateService(ctx, c, simpleTestService, setInstances(instances), func(s *swarm.Service) { 932 if s.Spec.TaskTemplate.ContainerSpec == nil { 933 s.Spec.TaskTemplate.ContainerSpec = &swarm.ContainerSpec{} 934 } 935 s.Spec.TaskTemplate.ContainerSpec.Healthcheck = &container.HealthConfig{} 936 s.Spec.TaskTemplate.Networks = []swarm.NetworkAttachmentConfig{ 937 {Target: "lb"}, 938 } 939 }) 940 941 poll.WaitOn(c, pollCheck(c, d.CheckActiveContainerCount(ctx), checker.Equals(instances)), poll.WithTimeout(defaultReconciliationTimeout)) 942 943 containers := d.ActiveContainers(testutil.GetContext(c), c) 944 945 out, err = d.Cmd("exec", containers[0], "ping", "-c1", "-W3", "top") 946 assert.NilError(c, err, out) 947 } 948 949 func (s *DockerSwarmSuite) TestSwarmRepeatedRootRotation(c *testing.T) { 950 ctx := testutil.GetContext(c) 951 m := s.AddDaemon(ctx, c, true, true) 952 w := s.AddDaemon(ctx, c, true, false) 953 954 info := m.SwarmInfo(ctx, c) 955 956 currentTrustRoot := info.Cluster.TLSInfo.TrustRoot 957 958 // rotate multiple times 959 for i := 0; i < 4; i++ { 960 var err error 961 var cert, key []byte 962 if i%2 != 0 { 963 cert, _, key, err = initca.New(&csr.CertificateRequest{ 964 CN: "newRoot", 965 KeyRequest: csr.NewKeyRequest(), 966 CA: &csr.CAConfig{Expiry: ca.RootCAExpiration}, 967 }) 968 assert.NilError(c, err) 969 } 970 expectedCert := string(cert) 971 m.UpdateSwarm(c, func(s *swarm.Spec) { 972 s.CAConfig.SigningCACert = expectedCert 973 s.CAConfig.SigningCAKey = string(key) 974 s.CAConfig.ForceRotate++ 975 }) 976 977 // poll to make sure update succeeds 978 var clusterTLSInfo swarm.TLSInfo 979 for j := 0; j < 18; j++ { 980 info := m.SwarmInfo(ctx, c) 981 982 // the desired CA cert and key is always redacted 983 assert.Equal(c, info.Cluster.Spec.CAConfig.SigningCAKey, "") 984 assert.Equal(c, info.Cluster.Spec.CAConfig.SigningCACert, "") 985 986 clusterTLSInfo = info.Cluster.TLSInfo 987 988 // if root rotation is done and the trust root has changed, we don't have to poll anymore 989 if !info.Cluster.RootRotationInProgress && clusterTLSInfo.TrustRoot != currentTrustRoot { 990 break 991 } 992 993 // root rotation not done 994 time.Sleep(250 * time.Millisecond) 995 } 996 if cert != nil { 997 assert.Equal(c, clusterTLSInfo.TrustRoot, expectedCert) 998 } 999 // could take another second or two for the nodes to trust the new roots after they've all gotten 1000 // new TLS certificates 1001 for j := 0; j < 18; j++ { 1002 mInfo := m.GetNode(ctx, c, m.NodeID()).Description.TLSInfo 1003 wInfo := m.GetNode(ctx, c, w.NodeID()).Description.TLSInfo 1004 1005 if mInfo.TrustRoot == clusterTLSInfo.TrustRoot && wInfo.TrustRoot == clusterTLSInfo.TrustRoot { 1006 break 1007 } 1008 1009 // nodes don't trust root certs yet 1010 time.Sleep(250 * time.Millisecond) 1011 } 1012 1013 assert.DeepEqual(c, m.GetNode(ctx, c, m.NodeID()).Description.TLSInfo, clusterTLSInfo) 1014 assert.DeepEqual(c, m.GetNode(ctx, c, w.NodeID()).Description.TLSInfo, clusterTLSInfo) 1015 currentTrustRoot = clusterTLSInfo.TrustRoot 1016 } 1017 } 1018 1019 func (s *DockerSwarmSuite) TestAPINetworkInspectWithScope(c *testing.T) { 1020 ctx := testutil.GetContext(c) 1021 d := s.AddDaemon(ctx, c, true, true) 1022 1023 name := "test-scoped-network" 1024 apiclient := d.NewClientT(c) 1025 1026 resp, err := apiclient.NetworkCreate(ctx, name, types.NetworkCreate{Driver: "overlay"}) 1027 assert.NilError(c, err) 1028 1029 network, err := apiclient.NetworkInspect(ctx, name, types.NetworkInspectOptions{}) 1030 assert.NilError(c, err) 1031 assert.Check(c, is.Equal("swarm", network.Scope)) 1032 assert.Check(c, is.Equal(resp.ID, network.ID)) 1033 1034 _, err = apiclient.NetworkInspect(ctx, name, types.NetworkInspectOptions{Scope: "local"}) 1035 assert.Check(c, is.ErrorType(err, errdefs.IsNotFound)) 1036 }