github.com/portworx/docker@v1.12.1/integration-cli/docker_api_swarm_test.go (about) 1 // +build !windows 2 3 package main 4 5 import ( 6 "fmt" 7 "net/http" 8 "os" 9 "path/filepath" 10 "strconv" 11 "strings" 12 "sync" 13 "syscall" 14 "time" 15 16 "github.com/docker/docker/pkg/integration/checker" 17 "github.com/docker/engine-api/types/swarm" 18 "github.com/go-check/check" 19 ) 20 21 var defaultReconciliationTimeout = 30 * time.Second 22 23 func (s *DockerSwarmSuite) TestApiSwarmInit(c *check.C) { 24 testRequires(c, Network) 25 // todo: should find a better way to verify that components are running than /info 26 d1 := s.AddDaemon(c, true, true) 27 info, err := d1.info() 28 c.Assert(err, checker.IsNil) 29 c.Assert(info.ControlAvailable, checker.True) 30 c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateActive) 31 32 d2 := s.AddDaemon(c, true, false) 33 info, err = d2.info() 34 c.Assert(err, checker.IsNil) 35 c.Assert(info.ControlAvailable, checker.False) 36 c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateActive) 37 38 // Leaving cluster 39 c.Assert(d2.Leave(false), checker.IsNil) 40 41 info, err = d2.info() 42 c.Assert(err, checker.IsNil) 43 c.Assert(info.ControlAvailable, checker.False) 44 c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateInactive) 45 46 c.Assert(d2.Join(swarm.JoinRequest{JoinToken: d1.joinTokens(c).Worker, RemoteAddrs: []string{d1.listenAddr}}), checker.IsNil) 47 48 info, err = d2.info() 49 c.Assert(err, checker.IsNil) 50 c.Assert(info.ControlAvailable, checker.False) 51 c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateActive) 52 53 // Current state restoring after restarts 54 err = d1.Stop() 55 c.Assert(err, checker.IsNil) 56 err = d2.Stop() 57 c.Assert(err, checker.IsNil) 58 59 err = d1.Start() 60 c.Assert(err, checker.IsNil) 61 err = d2.Start() 62 c.Assert(err, checker.IsNil) 63 64 info, err = d1.info() 65 c.Assert(err, checker.IsNil) 66 c.Assert(info.ControlAvailable, checker.True) 67 c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateActive) 68 69 info, err = d2.info() 70 c.Assert(err, checker.IsNil) 71 c.Assert(info.ControlAvailable, checker.False) 72 c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateActive) 73 } 74 75 func (s *DockerSwarmSuite) TestApiSwarmJoinToken(c *check.C) { 76 testRequires(c, Network) 77 d1 := s.AddDaemon(c, false, false) 78 c.Assert(d1.Init(swarm.InitRequest{}), checker.IsNil) 79 80 d2 := s.AddDaemon(c, false, false) 81 err := d2.Join(swarm.JoinRequest{RemoteAddrs: []string{d1.listenAddr}}) 82 c.Assert(err, checker.NotNil) 83 c.Assert(err.Error(), checker.Contains, "join token is necessary") 84 info, err := d2.info() 85 c.Assert(err, checker.IsNil) 86 c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateInactive) 87 88 err = d2.Join(swarm.JoinRequest{JoinToken: "foobaz", RemoteAddrs: []string{d1.listenAddr}}) 89 c.Assert(err, checker.NotNil) 90 c.Assert(err.Error(), checker.Contains, "join token is necessary") 91 info, err = d2.info() 92 c.Assert(err, checker.IsNil) 93 c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateInactive) 94 95 workerToken := d1.joinTokens(c).Worker 96 97 c.Assert(d2.Join(swarm.JoinRequest{JoinToken: workerToken, RemoteAddrs: []string{d1.listenAddr}}), checker.IsNil) 98 info, err = d2.info() 99 c.Assert(err, checker.IsNil) 100 c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateActive) 101 c.Assert(d2.Leave(false), checker.IsNil) 102 info, err = d2.info() 103 c.Assert(err, checker.IsNil) 104 c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateInactive) 105 106 // change tokens 107 d1.rotateTokens(c) 108 109 err = d2.Join(swarm.JoinRequest{JoinToken: workerToken, RemoteAddrs: []string{d1.listenAddr}}) 110 c.Assert(err, checker.NotNil) 111 c.Assert(err.Error(), checker.Contains, "join token is necessary") 112 info, err = d2.info() 113 c.Assert(err, checker.IsNil) 114 c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateInactive) 115 116 workerToken = d1.joinTokens(c).Worker 117 118 c.Assert(d2.Join(swarm.JoinRequest{JoinToken: workerToken, RemoteAddrs: []string{d1.listenAddr}}), checker.IsNil) 119 info, err = d2.info() 120 c.Assert(err, checker.IsNil) 121 c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateActive) 122 c.Assert(d2.Leave(false), checker.IsNil) 123 info, err = d2.info() 124 c.Assert(err, checker.IsNil) 125 c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateInactive) 126 127 // change spec, don't change tokens 128 d1.updateSwarm(c, func(s *swarm.Spec) {}) 129 130 err = d2.Join(swarm.JoinRequest{RemoteAddrs: []string{d1.listenAddr}}) 131 c.Assert(err, checker.NotNil) 132 c.Assert(err.Error(), checker.Contains, "join token is necessary") 133 info, err = d2.info() 134 c.Assert(err, checker.IsNil) 135 c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateInactive) 136 137 c.Assert(d2.Join(swarm.JoinRequest{JoinToken: workerToken, RemoteAddrs: []string{d1.listenAddr}}), checker.IsNil) 138 info, err = d2.info() 139 c.Assert(err, checker.IsNil) 140 c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateActive) 141 c.Assert(d2.Leave(false), checker.IsNil) 142 info, err = d2.info() 143 c.Assert(err, checker.IsNil) 144 c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateInactive) 145 } 146 147 func (s *DockerSwarmSuite) TestApiSwarmCAHash(c *check.C) { 148 testRequires(c, Network) 149 d1 := s.AddDaemon(c, true, true) 150 d2 := s.AddDaemon(c, false, false) 151 splitToken := strings.Split(d1.joinTokens(c).Worker, "-") 152 splitToken[2] = "1kxftv4ofnc6mt30lmgipg6ngf9luhwqopfk1tz6bdmnkubg0e" 153 replacementToken := strings.Join(splitToken, "-") 154 err := d2.Join(swarm.JoinRequest{JoinToken: replacementToken, RemoteAddrs: []string{d1.listenAddr}}) 155 c.Assert(err, checker.NotNil) 156 c.Assert(err.Error(), checker.Contains, "remote CA does not match fingerprint") 157 } 158 159 func (s *DockerSwarmSuite) TestApiSwarmPromoteDemote(c *check.C) { 160 testRequires(c, Network) 161 d1 := s.AddDaemon(c, false, false) 162 c.Assert(d1.Init(swarm.InitRequest{}), checker.IsNil) 163 d2 := s.AddDaemon(c, true, false) 164 165 info, err := d2.info() 166 c.Assert(err, checker.IsNil) 167 c.Assert(info.ControlAvailable, checker.False) 168 c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateActive) 169 170 d1.updateNode(c, d2.NodeID, func(n *swarm.Node) { 171 n.Spec.Role = swarm.NodeRoleManager 172 }) 173 174 waitAndAssert(c, defaultReconciliationTimeout, d2.checkControlAvailable, checker.True) 175 176 d1.updateNode(c, d2.NodeID, func(n *swarm.Node) { 177 n.Spec.Role = swarm.NodeRoleWorker 178 }) 179 180 waitAndAssert(c, defaultReconciliationTimeout, d2.checkControlAvailable, checker.False) 181 182 // Demoting last node should fail 183 node := d1.getNode(c, d1.NodeID) 184 node.Spec.Role = swarm.NodeRoleWorker 185 url := fmt.Sprintf("/nodes/%s/update?version=%d", node.ID, node.Version.Index) 186 status, out, err := d1.SockRequest("POST", url, node.Spec) 187 c.Assert(err, checker.IsNil) 188 c.Assert(status, checker.Equals, http.StatusInternalServerError, check.Commentf("output: %q", string(out))) 189 c.Assert(string(out), checker.Contains, "last manager of the swarm") 190 info, err = d1.info() 191 c.Assert(err, checker.IsNil) 192 c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateActive) 193 c.Assert(info.ControlAvailable, checker.True) 194 195 // Promote already demoted node 196 d1.updateNode(c, d2.NodeID, func(n *swarm.Node) { 197 n.Spec.Role = swarm.NodeRoleManager 198 }) 199 200 waitAndAssert(c, defaultReconciliationTimeout, d2.checkControlAvailable, checker.True) 201 } 202 203 func (s *DockerSwarmSuite) TestApiSwarmServicesEmptyList(c *check.C) { 204 testRequires(c, Network) 205 d := s.AddDaemon(c, true, true) 206 207 services := d.listServices(c) 208 c.Assert(services, checker.NotNil) 209 c.Assert(len(services), checker.Equals, 0, check.Commentf("services: %#v", services)) 210 } 211 212 func (s *DockerSwarmSuite) TestApiSwarmServicesCreate(c *check.C) { 213 testRequires(c, Network) 214 d := s.AddDaemon(c, true, true) 215 216 instances := 2 217 id := d.createService(c, simpleTestService, setInstances(instances)) 218 waitAndAssert(c, defaultReconciliationTimeout, d.checkActiveContainerCount, checker.Equals, instances) 219 220 service := d.getService(c, id) 221 instances = 5 222 d.updateService(c, service, setInstances(instances)) 223 waitAndAssert(c, defaultReconciliationTimeout, d.checkActiveContainerCount, checker.Equals, instances) 224 225 d.removeService(c, service.ID) 226 waitAndAssert(c, defaultReconciliationTimeout, d.checkActiveContainerCount, checker.Equals, 0) 227 } 228 229 func (s *DockerSwarmSuite) TestApiSwarmServicesMultipleAgents(c *check.C) { 230 testRequires(c, Network) 231 d1 := s.AddDaemon(c, true, true) 232 d2 := s.AddDaemon(c, true, false) 233 d3 := s.AddDaemon(c, true, false) 234 235 time.Sleep(1 * time.Second) // make sure all daemons are ready to accept tasks 236 237 instances := 9 238 id := d1.createService(c, simpleTestService, setInstances(instances)) 239 240 waitAndAssert(c, defaultReconciliationTimeout, d1.checkActiveContainerCount, checker.GreaterThan, 0) 241 waitAndAssert(c, defaultReconciliationTimeout, d2.checkActiveContainerCount, checker.GreaterThan, 0) 242 waitAndAssert(c, defaultReconciliationTimeout, d3.checkActiveContainerCount, checker.GreaterThan, 0) 243 244 waitAndAssert(c, defaultReconciliationTimeout, reducedCheck(sumAsIntegers, d1.checkActiveContainerCount, d2.checkActiveContainerCount, d3.checkActiveContainerCount), checker.Equals, instances) 245 246 // reconciliation on d2 node down 247 c.Assert(d2.Stop(), checker.IsNil) 248 249 waitAndAssert(c, defaultReconciliationTimeout, reducedCheck(sumAsIntegers, d1.checkActiveContainerCount, d3.checkActiveContainerCount), checker.Equals, instances) 250 251 // test downscaling 252 instances = 5 253 d1.updateService(c, d1.getService(c, id), setInstances(instances)) 254 waitAndAssert(c, defaultReconciliationTimeout, reducedCheck(sumAsIntegers, d1.checkActiveContainerCount, d3.checkActiveContainerCount), checker.Equals, instances) 255 256 } 257 258 func (s *DockerSwarmSuite) TestApiSwarmServicesCreateGlobal(c *check.C) { 259 testRequires(c, Network) 260 d1 := s.AddDaemon(c, true, true) 261 d2 := s.AddDaemon(c, true, false) 262 d3 := s.AddDaemon(c, true, false) 263 264 d1.createService(c, simpleTestService, setGlobalMode) 265 266 waitAndAssert(c, defaultReconciliationTimeout, d1.checkActiveContainerCount, checker.Equals, 1) 267 waitAndAssert(c, defaultReconciliationTimeout, d2.checkActiveContainerCount, checker.Equals, 1) 268 waitAndAssert(c, defaultReconciliationTimeout, d3.checkActiveContainerCount, checker.Equals, 1) 269 270 d4 := s.AddDaemon(c, true, false) 271 d5 := s.AddDaemon(c, true, false) 272 273 waitAndAssert(c, defaultReconciliationTimeout, d4.checkActiveContainerCount, checker.Equals, 1) 274 waitAndAssert(c, defaultReconciliationTimeout, d5.checkActiveContainerCount, checker.Equals, 1) 275 } 276 277 func (s *DockerSwarmSuite) TestApiSwarmServicesUpdate(c *check.C) { 278 const nodeCount = 3 279 var daemons [nodeCount]*SwarmDaemon 280 for i := 0; i < nodeCount; i++ { 281 daemons[i] = s.AddDaemon(c, true, i == 0) 282 } 283 // wait for nodes ready 284 waitAndAssert(c, 5*time.Second, daemons[0].checkNodeReadyCount, checker.Equals, nodeCount) 285 286 // service image at start 287 image1 := "busybox:latest" 288 // target image in update 289 image2 := "busybox:test" 290 291 // create a different tag 292 for _, d := range daemons { 293 out, err := d.Cmd("tag", image1, image2) 294 c.Assert(err, checker.IsNil, check.Commentf(out)) 295 } 296 297 // create service 298 instances := 5 299 parallelism := 2 300 id := daemons[0].createService(c, serviceForUpdate, setInstances(instances)) 301 302 // wait for tasks ready 303 waitAndAssert(c, defaultReconciliationTimeout, daemons[0].checkRunningTaskImages, checker.DeepEquals, 304 map[string]int{image1: instances}) 305 306 // issue service update 307 service := daemons[0].getService(c, id) 308 daemons[0].updateService(c, service, setImage(image2)) 309 310 // first batch 311 waitAndAssert(c, defaultReconciliationTimeout, daemons[0].checkRunningTaskImages, checker.DeepEquals, 312 map[string]int{image1: instances - parallelism, image2: parallelism}) 313 314 // 2nd batch 315 waitAndAssert(c, defaultReconciliationTimeout, daemons[0].checkRunningTaskImages, checker.DeepEquals, 316 map[string]int{image1: instances - 2*parallelism, image2: 2 * parallelism}) 317 318 // 3nd batch 319 waitAndAssert(c, defaultReconciliationTimeout, daemons[0].checkRunningTaskImages, checker.DeepEquals, 320 map[string]int{image2: instances}) 321 } 322 323 func (s *DockerSwarmSuite) TestApiSwarmServicesStateReporting(c *check.C) { 324 testRequires(c, Network) 325 testRequires(c, SameHostDaemon) 326 testRequires(c, DaemonIsLinux) 327 328 d1 := s.AddDaemon(c, true, true) 329 d2 := s.AddDaemon(c, true, true) 330 d3 := s.AddDaemon(c, true, false) 331 332 time.Sleep(1 * time.Second) // make sure all daemons are ready to accept 333 334 instances := 9 335 d1.createService(c, simpleTestService, setInstances(instances)) 336 337 waitAndAssert(c, defaultReconciliationTimeout, reducedCheck(sumAsIntegers, d1.checkActiveContainerCount, d2.checkActiveContainerCount, d3.checkActiveContainerCount), checker.Equals, instances) 338 339 getContainers := func() map[string]*SwarmDaemon { 340 m := make(map[string]*SwarmDaemon) 341 for _, d := range []*SwarmDaemon{d1, d2, d3} { 342 for _, id := range d.activeContainers() { 343 m[id] = d 344 } 345 } 346 return m 347 } 348 349 containers := getContainers() 350 c.Assert(containers, checker.HasLen, instances) 351 var toRemove string 352 for i := range containers { 353 toRemove = i 354 } 355 356 _, err := containers[toRemove].Cmd("stop", toRemove) 357 c.Assert(err, checker.IsNil) 358 359 waitAndAssert(c, defaultReconciliationTimeout, reducedCheck(sumAsIntegers, d1.checkActiveContainerCount, d2.checkActiveContainerCount, d3.checkActiveContainerCount), checker.Equals, instances) 360 361 containers2 := getContainers() 362 c.Assert(containers2, checker.HasLen, instances) 363 for i := range containers { 364 if i == toRemove { 365 c.Assert(containers2[i], checker.IsNil) 366 } else { 367 c.Assert(containers2[i], checker.NotNil) 368 } 369 } 370 371 containers = containers2 372 for i := range containers { 373 toRemove = i 374 } 375 376 // try with killing process outside of docker 377 pidStr, err := containers[toRemove].Cmd("inspect", "-f", "{{.State.Pid}}", toRemove) 378 c.Assert(err, checker.IsNil) 379 pid, err := strconv.Atoi(strings.TrimSpace(pidStr)) 380 c.Assert(err, checker.IsNil) 381 c.Assert(syscall.Kill(pid, syscall.SIGKILL), checker.IsNil) 382 383 time.Sleep(time.Second) // give some time to handle the signal 384 385 waitAndAssert(c, defaultReconciliationTimeout, reducedCheck(sumAsIntegers, d1.checkActiveContainerCount, d2.checkActiveContainerCount, d3.checkActiveContainerCount), checker.Equals, instances) 386 387 containers2 = getContainers() 388 c.Assert(containers2, checker.HasLen, instances) 389 for i := range containers { 390 if i == toRemove { 391 c.Assert(containers2[i], checker.IsNil) 392 } else { 393 c.Assert(containers2[i], checker.NotNil) 394 } 395 } 396 } 397 398 func (s *DockerSwarmSuite) TestApiSwarmLeaderElection(c *check.C) { 399 // Create 3 nodes 400 d1 := s.AddDaemon(c, true, true) 401 d2 := s.AddDaemon(c, true, true) 402 d3 := s.AddDaemon(c, true, true) 403 404 // assert that the first node we made is the leader, and the other two are followers 405 c.Assert(d1.getNode(c, d1.NodeID).ManagerStatus.Leader, checker.True) 406 c.Assert(d1.getNode(c, d2.NodeID).ManagerStatus.Leader, checker.False) 407 c.Assert(d1.getNode(c, d3.NodeID).ManagerStatus.Leader, checker.False) 408 409 leader := d1 410 411 // stop the leader 412 leader.Stop() 413 414 // wait for an election to occur 415 var newleader *SwarmDaemon 416 417 for _, d := range []*SwarmDaemon{d2, d3} { 418 if d.getNode(c, d.NodeID).ManagerStatus.Leader { 419 newleader = d 420 break 421 } 422 } 423 424 // assert that we have a new leader 425 c.Assert(newleader, checker.NotNil) 426 427 // add the old leader back 428 leader.Start() 429 430 // clear leader and reinit the followers list 431 followers := make([]*SwarmDaemon, 0, 3) 432 433 // pick out the leader and the followers again 434 for _, d := range []*SwarmDaemon{d1, d2, d3} { 435 if d1.getNode(c, d.NodeID).ManagerStatus.Leader { 436 leader = d 437 } else { 438 followers = append(followers, d) 439 } 440 } 441 442 // verify that we still only have 1 leader and 2 followers 443 c.Assert(leader, checker.NotNil) 444 c.Assert(followers, checker.HasLen, 2) 445 // and that after we added d1 back, the leader hasn't changed 446 c.Assert(leader.NodeID, checker.Equals, newleader.NodeID) 447 } 448 449 func (s *DockerSwarmSuite) TestApiSwarmRaftQuorum(c *check.C) { 450 testRequires(c, Network) 451 d1 := s.AddDaemon(c, true, true) 452 d2 := s.AddDaemon(c, true, true) 453 d3 := s.AddDaemon(c, true, true) 454 455 d1.createService(c, simpleTestService) 456 457 c.Assert(d2.Stop(), checker.IsNil) 458 459 d1.createService(c, simpleTestService, func(s *swarm.Service) { 460 s.Spec.Name = "top1" 461 }) 462 463 c.Assert(d3.Stop(), checker.IsNil) 464 465 var service swarm.Service 466 simpleTestService(&service) 467 service.Spec.Name = "top2" 468 status, out, err := d1.SockRequest("POST", "/services/create", service.Spec) 469 c.Assert(err, checker.IsNil) 470 c.Assert(status, checker.Equals, http.StatusInternalServerError, check.Commentf("deadline exceeded", string(out))) 471 472 c.Assert(d2.Start(), checker.IsNil) 473 474 d1.createService(c, simpleTestService, func(s *swarm.Service) { 475 s.Spec.Name = "top3" 476 }) 477 } 478 479 func (s *DockerSwarmSuite) TestApiSwarmListNodes(c *check.C) { 480 testRequires(c, Network) 481 d1 := s.AddDaemon(c, true, true) 482 d2 := s.AddDaemon(c, true, false) 483 d3 := s.AddDaemon(c, true, false) 484 485 nodes := d1.listNodes(c) 486 c.Assert(len(nodes), checker.Equals, 3, check.Commentf("nodes: %#v", nodes)) 487 488 loop0: 489 for _, n := range nodes { 490 for _, d := range []*SwarmDaemon{d1, d2, d3} { 491 if n.ID == d.NodeID { 492 continue loop0 493 } 494 } 495 c.Errorf("unknown nodeID %v", n.ID) 496 } 497 } 498 499 func (s *DockerSwarmSuite) TestApiSwarmNodeUpdate(c *check.C) { 500 testRequires(c, Network) 501 d := s.AddDaemon(c, true, true) 502 503 nodes := d.listNodes(c) 504 505 d.updateNode(c, nodes[0].ID, func(n *swarm.Node) { 506 n.Spec.Availability = swarm.NodeAvailabilityPause 507 }) 508 509 n := d.getNode(c, nodes[0].ID) 510 c.Assert(n.Spec.Availability, checker.Equals, swarm.NodeAvailabilityPause) 511 } 512 513 func (s *DockerSwarmSuite) TestApiSwarmNodeRemove(c *check.C) { 514 testRequires(c, Network) 515 d1 := s.AddDaemon(c, true, true) 516 d2 := s.AddDaemon(c, true, false) 517 _ = s.AddDaemon(c, true, false) 518 519 nodes := d1.listNodes(c) 520 c.Assert(len(nodes), checker.Equals, 3, check.Commentf("nodes: %#v", nodes)) 521 522 // Getting the info so we can take the NodeID 523 d2Info, err := d2.info() 524 c.Assert(err, checker.IsNil) 525 526 // forceful removal of d2 should work 527 d1.removeNode(c, d2Info.NodeID, true) 528 529 nodes = d1.listNodes(c) 530 c.Assert(len(nodes), checker.Equals, 2, check.Commentf("nodes: %#v", nodes)) 531 532 // Restart the node that was removed 533 err = d2.Restart() 534 c.Assert(err, checker.IsNil) 535 536 // Give some time for the node to rejoin 537 time.Sleep(1 * time.Second) 538 539 // Make sure the node didn't rejoin 540 nodes = d1.listNodes(c) 541 c.Assert(len(nodes), checker.Equals, 2, check.Commentf("nodes: %#v", nodes)) 542 } 543 544 func (s *DockerSwarmSuite) TestApiSwarmNodeDrainPause(c *check.C) { 545 testRequires(c, Network) 546 d1 := s.AddDaemon(c, true, true) 547 d2 := s.AddDaemon(c, true, false) 548 549 time.Sleep(1 * time.Second) // make sure all daemons are ready to accept tasks 550 551 // start a service, expect balanced distribution 552 instances := 8 553 id := d1.createService(c, simpleTestService, setInstances(instances)) 554 555 waitAndAssert(c, defaultReconciliationTimeout, d1.checkActiveContainerCount, checker.GreaterThan, 0) 556 waitAndAssert(c, defaultReconciliationTimeout, d2.checkActiveContainerCount, checker.GreaterThan, 0) 557 waitAndAssert(c, defaultReconciliationTimeout, reducedCheck(sumAsIntegers, d1.checkActiveContainerCount, d2.checkActiveContainerCount), checker.Equals, instances) 558 559 // drain d2, all containers should move to d1 560 d1.updateNode(c, d2.NodeID, func(n *swarm.Node) { 561 n.Spec.Availability = swarm.NodeAvailabilityDrain 562 }) 563 waitAndAssert(c, defaultReconciliationTimeout, d1.checkActiveContainerCount, checker.Equals, instances) 564 waitAndAssert(c, defaultReconciliationTimeout, d2.checkActiveContainerCount, checker.Equals, 0) 565 566 // set d2 back to active 567 d1.updateNode(c, d2.NodeID, func(n *swarm.Node) { 568 n.Spec.Availability = swarm.NodeAvailabilityActive 569 }) 570 571 instances = 1 572 d1.updateService(c, d1.getService(c, id), setInstances(instances)) 573 574 waitAndAssert(c, defaultReconciliationTimeout*2, reducedCheck(sumAsIntegers, d1.checkActiveContainerCount, d2.checkActiveContainerCount), checker.Equals, instances) 575 576 instances = 8 577 d1.updateService(c, d1.getService(c, id), setInstances(instances)) 578 579 // drained node first so we don't get any old containers 580 waitAndAssert(c, defaultReconciliationTimeout, d2.checkActiveContainerCount, checker.GreaterThan, 0) 581 waitAndAssert(c, defaultReconciliationTimeout, d1.checkActiveContainerCount, checker.GreaterThan, 0) 582 waitAndAssert(c, defaultReconciliationTimeout*2, reducedCheck(sumAsIntegers, d1.checkActiveContainerCount, d2.checkActiveContainerCount), checker.Equals, instances) 583 584 d2ContainerCount := len(d2.activeContainers()) 585 586 // set d2 to paused, scale service up, only d1 gets new tasks 587 d1.updateNode(c, d2.NodeID, func(n *swarm.Node) { 588 n.Spec.Availability = swarm.NodeAvailabilityPause 589 }) 590 591 instances = 14 592 d1.updateService(c, d1.getService(c, id), setInstances(instances)) 593 594 waitAndAssert(c, defaultReconciliationTimeout, d1.checkActiveContainerCount, checker.Equals, instances-d2ContainerCount) 595 waitAndAssert(c, defaultReconciliationTimeout, d2.checkActiveContainerCount, checker.Equals, d2ContainerCount) 596 597 } 598 599 func (s *DockerSwarmSuite) TestApiSwarmLeaveRemovesContainer(c *check.C) { 600 testRequires(c, Network) 601 d := s.AddDaemon(c, true, true) 602 603 instances := 2 604 d.createService(c, simpleTestService, setInstances(instances)) 605 606 id, err := d.Cmd("run", "-d", "busybox", "top") 607 c.Assert(err, checker.IsNil) 608 id = strings.TrimSpace(id) 609 610 waitAndAssert(c, defaultReconciliationTimeout, d.checkActiveContainerCount, checker.Equals, instances+1) 611 612 c.Assert(d.Leave(false), checker.NotNil) 613 c.Assert(d.Leave(true), checker.IsNil) 614 615 waitAndAssert(c, defaultReconciliationTimeout, d.checkActiveContainerCount, checker.Equals, 1) 616 617 id2, err := d.Cmd("ps", "-q") 618 c.Assert(err, checker.IsNil) 619 c.Assert(id, checker.HasPrefix, strings.TrimSpace(id2)) 620 } 621 622 // #23629 623 func (s *DockerSwarmSuite) TestApiSwarmLeaveOnPendingJoin(c *check.C) { 624 s.AddDaemon(c, true, true) 625 d2 := s.AddDaemon(c, false, false) 626 627 id, err := d2.Cmd("run", "-d", "busybox", "top") 628 c.Assert(err, checker.IsNil) 629 id = strings.TrimSpace(id) 630 631 go d2.Join(swarm.JoinRequest{ 632 RemoteAddrs: []string{"nosuchhost:1234"}, 633 }) 634 635 waitAndAssert(c, defaultReconciliationTimeout, d2.checkLocalNodeState, checker.Equals, swarm.LocalNodeStateInactive) 636 637 waitAndAssert(c, defaultReconciliationTimeout, d2.checkActiveContainerCount, checker.Equals, 1) 638 639 id2, err := d2.Cmd("ps", "-q") 640 c.Assert(err, checker.IsNil) 641 c.Assert(id, checker.HasPrefix, strings.TrimSpace(id2)) 642 } 643 644 // #23705 645 func (s *DockerSwarmSuite) TestApiSwarmRestoreOnPendingJoin(c *check.C) { 646 d := s.AddDaemon(c, false, false) 647 go d.Join(swarm.JoinRequest{ 648 RemoteAddrs: []string{"nosuchhost:1234"}, 649 }) 650 651 waitAndAssert(c, defaultReconciliationTimeout, d.checkLocalNodeState, checker.Equals, swarm.LocalNodeStateInactive) 652 653 c.Assert(d.Stop(), checker.IsNil) 654 c.Assert(d.Start(), checker.IsNil) 655 656 info, err := d.info() 657 c.Assert(err, checker.IsNil) 658 c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateInactive) 659 } 660 661 func (s *DockerSwarmSuite) TestApiSwarmManagerRestore(c *check.C) { 662 testRequires(c, Network) 663 d1 := s.AddDaemon(c, true, true) 664 665 instances := 2 666 id := d1.createService(c, simpleTestService, setInstances(instances)) 667 668 d1.getService(c, id) 669 d1.Stop() 670 d1.Start() 671 d1.getService(c, id) 672 673 d2 := s.AddDaemon(c, true, true) 674 d2.getService(c, id) 675 d2.Stop() 676 d2.Start() 677 d2.getService(c, id) 678 679 d3 := s.AddDaemon(c, true, true) 680 d3.getService(c, id) 681 d3.Stop() 682 d3.Start() 683 d3.getService(c, id) 684 685 d3.Kill() 686 time.Sleep(1 * time.Second) // time to handle signal 687 d3.Start() 688 d3.getService(c, id) 689 } 690 691 func (s *DockerSwarmSuite) TestApiSwarmScaleNoRollingUpdate(c *check.C) { 692 testRequires(c, Network) 693 d := s.AddDaemon(c, true, true) 694 695 instances := 2 696 id := d.createService(c, simpleTestService, setInstances(instances)) 697 698 waitAndAssert(c, defaultReconciliationTimeout, d.checkActiveContainerCount, checker.Equals, instances) 699 containers := d.activeContainers() 700 instances = 4 701 d.updateService(c, d.getService(c, id), setInstances(instances)) 702 waitAndAssert(c, defaultReconciliationTimeout, d.checkActiveContainerCount, checker.Equals, instances) 703 containers2 := d.activeContainers() 704 705 loop0: 706 for _, c1 := range containers { 707 for _, c2 := range containers2 { 708 if c1 == c2 { 709 continue loop0 710 } 711 } 712 c.Errorf("container %v not found in new set %#v", c1, containers2) 713 } 714 } 715 716 func (s *DockerSwarmSuite) TestApiSwarmInvalidAddress(c *check.C) { 717 d := s.AddDaemon(c, false, false) 718 req := swarm.InitRequest{ 719 ListenAddr: "", 720 } 721 status, _, err := d.SockRequest("POST", "/swarm/init", req) 722 c.Assert(err, checker.IsNil) 723 c.Assert(status, checker.Equals, http.StatusInternalServerError) 724 725 req2 := swarm.JoinRequest{ 726 ListenAddr: "0.0.0.0:2377", 727 RemoteAddrs: []string{""}, 728 } 729 status, _, err = d.SockRequest("POST", "/swarm/join", req2) 730 c.Assert(err, checker.IsNil) 731 c.Assert(status, checker.Equals, http.StatusInternalServerError) 732 } 733 734 func (s *DockerSwarmSuite) TestApiSwarmForceNewCluster(c *check.C) { 735 d1 := s.AddDaemon(c, true, true) 736 d2 := s.AddDaemon(c, true, true) 737 738 instances := 2 739 id := d1.createService(c, simpleTestService, setInstances(instances)) 740 waitAndAssert(c, defaultReconciliationTimeout, reducedCheck(sumAsIntegers, d1.checkActiveContainerCount, d2.checkActiveContainerCount), checker.Equals, instances) 741 742 // drain d2, all containers should move to d1 743 d1.updateNode(c, d2.NodeID, func(n *swarm.Node) { 744 n.Spec.Availability = swarm.NodeAvailabilityDrain 745 }) 746 waitAndAssert(c, defaultReconciliationTimeout, d1.checkActiveContainerCount, checker.Equals, instances) 747 waitAndAssert(c, defaultReconciliationTimeout, d2.checkActiveContainerCount, checker.Equals, 0) 748 749 c.Assert(d2.Stop(), checker.IsNil) 750 751 c.Assert(d1.Init(swarm.InitRequest{ 752 ForceNewCluster: true, 753 Spec: swarm.Spec{}, 754 }), checker.IsNil) 755 756 waitAndAssert(c, defaultReconciliationTimeout, d1.checkActiveContainerCount, checker.Equals, instances) 757 758 d3 := s.AddDaemon(c, true, true) 759 info, err := d3.info() 760 c.Assert(err, checker.IsNil) 761 c.Assert(info.ControlAvailable, checker.True) 762 c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateActive) 763 764 instances = 4 765 d3.updateService(c, d3.getService(c, id), setInstances(instances)) 766 767 waitAndAssert(c, defaultReconciliationTimeout, reducedCheck(sumAsIntegers, d1.checkActiveContainerCount, d3.checkActiveContainerCount), checker.Equals, instances) 768 } 769 770 func simpleTestService(s *swarm.Service) { 771 var ureplicas uint64 772 ureplicas = 1 773 s.Spec = swarm.ServiceSpec{ 774 TaskTemplate: swarm.TaskSpec{ 775 ContainerSpec: swarm.ContainerSpec{ 776 Image: "busybox:latest", 777 Command: []string{"/bin/top"}, 778 }, 779 }, 780 Mode: swarm.ServiceMode{ 781 Replicated: &swarm.ReplicatedService{ 782 Replicas: &ureplicas, 783 }, 784 }, 785 } 786 s.Spec.Name = "top" 787 } 788 789 func serviceForUpdate(s *swarm.Service) { 790 var ureplicas uint64 791 ureplicas = 1 792 s.Spec = swarm.ServiceSpec{ 793 TaskTemplate: swarm.TaskSpec{ 794 ContainerSpec: swarm.ContainerSpec{ 795 Image: "busybox:latest", 796 Command: []string{"/bin/top"}, 797 }, 798 }, 799 Mode: swarm.ServiceMode{ 800 Replicated: &swarm.ReplicatedService{ 801 Replicas: &ureplicas, 802 }, 803 }, 804 UpdateConfig: &swarm.UpdateConfig{ 805 Parallelism: 2, 806 Delay: 8 * time.Second, 807 FailureAction: swarm.UpdateFailureActionContinue, 808 }, 809 } 810 s.Spec.Name = "updatetest" 811 } 812 813 func setInstances(replicas int) serviceConstructor { 814 ureplicas := uint64(replicas) 815 return func(s *swarm.Service) { 816 s.Spec.Mode = swarm.ServiceMode{ 817 Replicated: &swarm.ReplicatedService{ 818 Replicas: &ureplicas, 819 }, 820 } 821 } 822 } 823 824 func setImage(image string) serviceConstructor { 825 return func(s *swarm.Service) { 826 s.Spec.TaskTemplate.ContainerSpec.Image = image 827 } 828 } 829 830 func setGlobalMode(s *swarm.Service) { 831 s.Spec.Mode = swarm.ServiceMode{ 832 Global: &swarm.GlobalService{}, 833 } 834 } 835 836 func checkClusterHealth(c *check.C, cl []*SwarmDaemon, managerCount, workerCount int) { 837 var totalMCount, totalWCount int 838 for _, d := range cl { 839 info, err := d.info() 840 c.Assert(err, check.IsNil) 841 if !info.ControlAvailable { 842 totalWCount++ 843 continue 844 } 845 var leaderFound bool 846 totalMCount++ 847 var mCount, wCount int 848 for _, n := range d.listNodes(c) { 849 c.Assert(n.Status.State, checker.Equals, swarm.NodeStateReady, check.Commentf("state of node %s, reported by %s", n.ID, d.Info.NodeID)) 850 c.Assert(n.Spec.Availability, checker.Equals, swarm.NodeAvailabilityActive, check.Commentf("availability of node %s, reported by %s", n.ID, d.Info.NodeID)) 851 if n.Spec.Role == swarm.NodeRoleManager { 852 c.Assert(n.ManagerStatus, checker.NotNil, check.Commentf("manager status of node %s (manager), reported by %s", n.ID, d.Info.NodeID)) 853 if n.ManagerStatus.Leader { 854 leaderFound = true 855 } 856 mCount++ 857 } else { 858 c.Assert(n.ManagerStatus, checker.IsNil, check.Commentf("manager status of node %s (worker), reported by %s", n.ID, d.Info.NodeID)) 859 wCount++ 860 } 861 } 862 c.Assert(leaderFound, checker.True, check.Commentf("lack of leader reported by node %s", info.NodeID)) 863 c.Assert(mCount, checker.Equals, managerCount, check.Commentf("managers count reported by node %s", info.NodeID)) 864 c.Assert(wCount, checker.Equals, workerCount, check.Commentf("workers count reported by node %s", info.NodeID)) 865 } 866 c.Assert(totalMCount, checker.Equals, managerCount) 867 c.Assert(totalWCount, checker.Equals, workerCount) 868 } 869 870 func (s *DockerSwarmSuite) TestApiSwarmRestartCluster(c *check.C) { 871 mCount, wCount := 5, 1 872 873 var nodes []*SwarmDaemon 874 for i := 0; i < mCount; i++ { 875 manager := s.AddDaemon(c, true, true) 876 info, err := manager.info() 877 c.Assert(err, checker.IsNil) 878 c.Assert(info.ControlAvailable, checker.True) 879 c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateActive) 880 nodes = append(nodes, manager) 881 } 882 883 for i := 0; i < wCount; i++ { 884 worker := s.AddDaemon(c, true, false) 885 info, err := worker.info() 886 c.Assert(err, checker.IsNil) 887 c.Assert(info.ControlAvailable, checker.False) 888 c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateActive) 889 nodes = append(nodes, worker) 890 } 891 892 // stop whole cluster 893 { 894 var wg sync.WaitGroup 895 wg.Add(len(nodes)) 896 errs := make(chan error, len(nodes)) 897 898 for _, d := range nodes { 899 go func(daemon *SwarmDaemon) { 900 defer wg.Done() 901 if err := daemon.Stop(); err != nil { 902 errs <- err 903 } 904 if root := os.Getenv("DOCKER_REMAP_ROOT"); root != "" { 905 daemon.root = filepath.Dir(daemon.root) 906 } 907 }(d) 908 } 909 wg.Wait() 910 close(errs) 911 for err := range errs { 912 c.Assert(err, check.IsNil) 913 } 914 } 915 916 // start whole cluster 917 { 918 var wg sync.WaitGroup 919 wg.Add(len(nodes)) 920 errs := make(chan error, len(nodes)) 921 922 for _, d := range nodes { 923 go func(daemon *SwarmDaemon) { 924 defer wg.Done() 925 if err := daemon.Start("--iptables=false"); err != nil { 926 errs <- err 927 } 928 }(d) 929 } 930 wg.Wait() 931 close(errs) 932 for err := range errs { 933 c.Assert(err, check.IsNil) 934 } 935 } 936 937 checkClusterHealth(c, nodes, mCount, wCount) 938 }