github.com/dpiddy/docker@v1.12.2-rc1/integration-cli/docker_api_swarm_test.go (about)

     1  // +build !windows
     2  
     3  package main
     4  
     5  import (
     6  	"fmt"
     7  	"net/http"
     8  	"os"
     9  	"path/filepath"
    10  	"strconv"
    11  	"strings"
    12  	"sync"
    13  	"syscall"
    14  	"time"
    15  
    16  	"github.com/docker/docker/pkg/integration/checker"
    17  	"github.com/docker/engine-api/types/swarm"
    18  	"github.com/go-check/check"
    19  )
    20  
    21  var defaultReconciliationTimeout = 30 * time.Second
    22  
    23  func (s *DockerSwarmSuite) TestApiSwarmInit(c *check.C) {
    24  	testRequires(c, Network)
    25  	// todo: should find a better way to verify that components are running than /info
    26  	d1 := s.AddDaemon(c, true, true)
    27  	info, err := d1.info()
    28  	c.Assert(err, checker.IsNil)
    29  	c.Assert(info.ControlAvailable, checker.True)
    30  	c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateActive)
    31  
    32  	d2 := s.AddDaemon(c, true, false)
    33  	info, err = d2.info()
    34  	c.Assert(err, checker.IsNil)
    35  	c.Assert(info.ControlAvailable, checker.False)
    36  	c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateActive)
    37  
    38  	// Leaving cluster
    39  	c.Assert(d2.Leave(false), checker.IsNil)
    40  
    41  	info, err = d2.info()
    42  	c.Assert(err, checker.IsNil)
    43  	c.Assert(info.ControlAvailable, checker.False)
    44  	c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateInactive)
    45  
    46  	c.Assert(d2.Join(swarm.JoinRequest{JoinToken: d1.joinTokens(c).Worker, RemoteAddrs: []string{d1.listenAddr}}), checker.IsNil)
    47  
    48  	info, err = d2.info()
    49  	c.Assert(err, checker.IsNil)
    50  	c.Assert(info.ControlAvailable, checker.False)
    51  	c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateActive)
    52  
    53  	// Current state restoring after restarts
    54  	err = d1.Stop()
    55  	c.Assert(err, checker.IsNil)
    56  	err = d2.Stop()
    57  	c.Assert(err, checker.IsNil)
    58  
    59  	err = d1.Start()
    60  	c.Assert(err, checker.IsNil)
    61  	err = d2.Start()
    62  	c.Assert(err, checker.IsNil)
    63  
    64  	info, err = d1.info()
    65  	c.Assert(err, checker.IsNil)
    66  	c.Assert(info.ControlAvailable, checker.True)
    67  	c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateActive)
    68  
    69  	info, err = d2.info()
    70  	c.Assert(err, checker.IsNil)
    71  	c.Assert(info.ControlAvailable, checker.False)
    72  	c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateActive)
    73  }
    74  
    75  func (s *DockerSwarmSuite) TestApiSwarmJoinToken(c *check.C) {
    76  	testRequires(c, Network)
    77  	d1 := s.AddDaemon(c, false, false)
    78  	c.Assert(d1.Init(swarm.InitRequest{}), checker.IsNil)
    79  
    80  	d2 := s.AddDaemon(c, false, false)
    81  	err := d2.Join(swarm.JoinRequest{RemoteAddrs: []string{d1.listenAddr}})
    82  	c.Assert(err, checker.NotNil)
    83  	c.Assert(err.Error(), checker.Contains, "join token is necessary")
    84  	info, err := d2.info()
    85  	c.Assert(err, checker.IsNil)
    86  	c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateInactive)
    87  
    88  	err = d2.Join(swarm.JoinRequest{JoinToken: "foobaz", RemoteAddrs: []string{d1.listenAddr}})
    89  	c.Assert(err, checker.NotNil)
    90  	c.Assert(err.Error(), checker.Contains, "join token is necessary")
    91  	info, err = d2.info()
    92  	c.Assert(err, checker.IsNil)
    93  	c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateInactive)
    94  
    95  	workerToken := d1.joinTokens(c).Worker
    96  
    97  	c.Assert(d2.Join(swarm.JoinRequest{JoinToken: workerToken, RemoteAddrs: []string{d1.listenAddr}}), checker.IsNil)
    98  	info, err = d2.info()
    99  	c.Assert(err, checker.IsNil)
   100  	c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateActive)
   101  	c.Assert(d2.Leave(false), checker.IsNil)
   102  	info, err = d2.info()
   103  	c.Assert(err, checker.IsNil)
   104  	c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateInactive)
   105  
   106  	// change tokens
   107  	d1.rotateTokens(c)
   108  
   109  	err = d2.Join(swarm.JoinRequest{JoinToken: workerToken, RemoteAddrs: []string{d1.listenAddr}})
   110  	c.Assert(err, checker.NotNil)
   111  	c.Assert(err.Error(), checker.Contains, "join token is necessary")
   112  	info, err = d2.info()
   113  	c.Assert(err, checker.IsNil)
   114  	c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateInactive)
   115  
   116  	workerToken = d1.joinTokens(c).Worker
   117  
   118  	c.Assert(d2.Join(swarm.JoinRequest{JoinToken: workerToken, RemoteAddrs: []string{d1.listenAddr}}), checker.IsNil)
   119  	info, err = d2.info()
   120  	c.Assert(err, checker.IsNil)
   121  	c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateActive)
   122  	c.Assert(d2.Leave(false), checker.IsNil)
   123  	info, err = d2.info()
   124  	c.Assert(err, checker.IsNil)
   125  	c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateInactive)
   126  
   127  	// change spec, don't change tokens
   128  	d1.updateSwarm(c, func(s *swarm.Spec) {})
   129  
   130  	err = d2.Join(swarm.JoinRequest{RemoteAddrs: []string{d1.listenAddr}})
   131  	c.Assert(err, checker.NotNil)
   132  	c.Assert(err.Error(), checker.Contains, "join token is necessary")
   133  	info, err = d2.info()
   134  	c.Assert(err, checker.IsNil)
   135  	c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateInactive)
   136  
   137  	c.Assert(d2.Join(swarm.JoinRequest{JoinToken: workerToken, RemoteAddrs: []string{d1.listenAddr}}), checker.IsNil)
   138  	info, err = d2.info()
   139  	c.Assert(err, checker.IsNil)
   140  	c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateActive)
   141  	c.Assert(d2.Leave(false), checker.IsNil)
   142  	info, err = d2.info()
   143  	c.Assert(err, checker.IsNil)
   144  	c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateInactive)
   145  }
   146  
   147  func (s *DockerSwarmSuite) TestApiSwarmCAHash(c *check.C) {
   148  	testRequires(c, Network)
   149  	d1 := s.AddDaemon(c, true, true)
   150  	d2 := s.AddDaemon(c, false, false)
   151  	splitToken := strings.Split(d1.joinTokens(c).Worker, "-")
   152  	splitToken[2] = "1kxftv4ofnc6mt30lmgipg6ngf9luhwqopfk1tz6bdmnkubg0e"
   153  	replacementToken := strings.Join(splitToken, "-")
   154  	err := d2.Join(swarm.JoinRequest{JoinToken: replacementToken, RemoteAddrs: []string{d1.listenAddr}})
   155  	c.Assert(err, checker.NotNil)
   156  	c.Assert(err.Error(), checker.Contains, "remote CA does not match fingerprint")
   157  }
   158  
   159  func (s *DockerSwarmSuite) TestApiSwarmPromoteDemote(c *check.C) {
   160  	testRequires(c, Network)
   161  	d1 := s.AddDaemon(c, false, false)
   162  	c.Assert(d1.Init(swarm.InitRequest{}), checker.IsNil)
   163  	d2 := s.AddDaemon(c, true, false)
   164  
   165  	info, err := d2.info()
   166  	c.Assert(err, checker.IsNil)
   167  	c.Assert(info.ControlAvailable, checker.False)
   168  	c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateActive)
   169  
   170  	d1.updateNode(c, d2.NodeID, func(n *swarm.Node) {
   171  		n.Spec.Role = swarm.NodeRoleManager
   172  	})
   173  
   174  	waitAndAssert(c, defaultReconciliationTimeout, d2.checkControlAvailable, checker.True)
   175  
   176  	d1.updateNode(c, d2.NodeID, func(n *swarm.Node) {
   177  		n.Spec.Role = swarm.NodeRoleWorker
   178  	})
   179  
   180  	waitAndAssert(c, defaultReconciliationTimeout, d2.checkControlAvailable, checker.False)
   181  
   182  	// Demoting last node should fail
   183  	node := d1.getNode(c, d1.NodeID)
   184  	node.Spec.Role = swarm.NodeRoleWorker
   185  	url := fmt.Sprintf("/nodes/%s/update?version=%d", node.ID, node.Version.Index)
   186  	status, out, err := d1.SockRequest("POST", url, node.Spec)
   187  	c.Assert(err, checker.IsNil)
   188  	c.Assert(status, checker.Equals, http.StatusInternalServerError, check.Commentf("output: %q", string(out)))
   189  	c.Assert(string(out), checker.Contains, "last manager of the swarm")
   190  	info, err = d1.info()
   191  	c.Assert(err, checker.IsNil)
   192  	c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateActive)
   193  	c.Assert(info.ControlAvailable, checker.True)
   194  
   195  	// Promote already demoted node
   196  	d1.updateNode(c, d2.NodeID, func(n *swarm.Node) {
   197  		n.Spec.Role = swarm.NodeRoleManager
   198  	})
   199  
   200  	waitAndAssert(c, defaultReconciliationTimeout, d2.checkControlAvailable, checker.True)
   201  }
   202  
   203  func (s *DockerSwarmSuite) TestApiSwarmServicesEmptyList(c *check.C) {
   204  	testRequires(c, Network)
   205  	d := s.AddDaemon(c, true, true)
   206  
   207  	services := d.listServices(c)
   208  	c.Assert(services, checker.NotNil)
   209  	c.Assert(len(services), checker.Equals, 0, check.Commentf("services: %#v", services))
   210  }
   211  
   212  func (s *DockerSwarmSuite) TestApiSwarmServicesCreate(c *check.C) {
   213  	testRequires(c, Network)
   214  	d := s.AddDaemon(c, true, true)
   215  
   216  	instances := 2
   217  	id := d.createService(c, simpleTestService, setInstances(instances))
   218  	waitAndAssert(c, defaultReconciliationTimeout, d.checkActiveContainerCount, checker.Equals, instances)
   219  
   220  	service := d.getService(c, id)
   221  	instances = 5
   222  	d.updateService(c, service, setInstances(instances))
   223  	waitAndAssert(c, defaultReconciliationTimeout, d.checkActiveContainerCount, checker.Equals, instances)
   224  
   225  	d.removeService(c, service.ID)
   226  	waitAndAssert(c, defaultReconciliationTimeout, d.checkActiveContainerCount, checker.Equals, 0)
   227  }
   228  
   229  func (s *DockerSwarmSuite) TestApiSwarmServicesMultipleAgents(c *check.C) {
   230  	testRequires(c, Network)
   231  	d1 := s.AddDaemon(c, true, true)
   232  	d2 := s.AddDaemon(c, true, false)
   233  	d3 := s.AddDaemon(c, true, false)
   234  
   235  	time.Sleep(1 * time.Second) // make sure all daemons are ready to accept tasks
   236  
   237  	instances := 9
   238  	id := d1.createService(c, simpleTestService, setInstances(instances))
   239  
   240  	waitAndAssert(c, defaultReconciliationTimeout, d1.checkActiveContainerCount, checker.GreaterThan, 0)
   241  	waitAndAssert(c, defaultReconciliationTimeout, d2.checkActiveContainerCount, checker.GreaterThan, 0)
   242  	waitAndAssert(c, defaultReconciliationTimeout, d3.checkActiveContainerCount, checker.GreaterThan, 0)
   243  
   244  	waitAndAssert(c, defaultReconciliationTimeout, reducedCheck(sumAsIntegers, d1.checkActiveContainerCount, d2.checkActiveContainerCount, d3.checkActiveContainerCount), checker.Equals, instances)
   245  
   246  	// reconciliation on d2 node down
   247  	c.Assert(d2.Stop(), checker.IsNil)
   248  
   249  	waitAndAssert(c, defaultReconciliationTimeout, reducedCheck(sumAsIntegers, d1.checkActiveContainerCount, d3.checkActiveContainerCount), checker.Equals, instances)
   250  
   251  	// test downscaling
   252  	instances = 5
   253  	d1.updateService(c, d1.getService(c, id), setInstances(instances))
   254  	waitAndAssert(c, defaultReconciliationTimeout, reducedCheck(sumAsIntegers, d1.checkActiveContainerCount, d3.checkActiveContainerCount), checker.Equals, instances)
   255  
   256  }
   257  
   258  func (s *DockerSwarmSuite) TestApiSwarmServicesCreateGlobal(c *check.C) {
   259  	testRequires(c, Network)
   260  	d1 := s.AddDaemon(c, true, true)
   261  	d2 := s.AddDaemon(c, true, false)
   262  	d3 := s.AddDaemon(c, true, false)
   263  
   264  	d1.createService(c, simpleTestService, setGlobalMode)
   265  
   266  	waitAndAssert(c, defaultReconciliationTimeout, d1.checkActiveContainerCount, checker.Equals, 1)
   267  	waitAndAssert(c, defaultReconciliationTimeout, d2.checkActiveContainerCount, checker.Equals, 1)
   268  	waitAndAssert(c, defaultReconciliationTimeout, d3.checkActiveContainerCount, checker.Equals, 1)
   269  
   270  	d4 := s.AddDaemon(c, true, false)
   271  	d5 := s.AddDaemon(c, true, false)
   272  
   273  	waitAndAssert(c, defaultReconciliationTimeout, d4.checkActiveContainerCount, checker.Equals, 1)
   274  	waitAndAssert(c, defaultReconciliationTimeout, d5.checkActiveContainerCount, checker.Equals, 1)
   275  }
   276  
   277  func (s *DockerSwarmSuite) TestApiSwarmServicesUpdate(c *check.C) {
   278  	const nodeCount = 3
   279  	var daemons [nodeCount]*SwarmDaemon
   280  	for i := 0; i < nodeCount; i++ {
   281  		daemons[i] = s.AddDaemon(c, true, i == 0)
   282  	}
   283  	// wait for nodes ready
   284  	waitAndAssert(c, 5*time.Second, daemons[0].checkNodeReadyCount, checker.Equals, nodeCount)
   285  
   286  	// service image at start
   287  	image1 := "busybox:latest"
   288  	// target image in update
   289  	image2 := "busybox:test"
   290  
   291  	// create a different tag
   292  	for _, d := range daemons {
   293  		out, err := d.Cmd("tag", image1, image2)
   294  		c.Assert(err, checker.IsNil, check.Commentf(out))
   295  	}
   296  
   297  	// create service
   298  	instances := 5
   299  	parallelism := 2
   300  	id := daemons[0].createService(c, serviceForUpdate, setInstances(instances))
   301  
   302  	// wait for tasks ready
   303  	waitAndAssert(c, defaultReconciliationTimeout, daemons[0].checkRunningTaskImages, checker.DeepEquals,
   304  		map[string]int{image1: instances})
   305  
   306  	// issue service update
   307  	service := daemons[0].getService(c, id)
   308  	daemons[0].updateService(c, service, setImage(image2))
   309  
   310  	// first batch
   311  	waitAndAssert(c, defaultReconciliationTimeout, daemons[0].checkRunningTaskImages, checker.DeepEquals,
   312  		map[string]int{image1: instances - parallelism, image2: parallelism})
   313  
   314  	// 2nd batch
   315  	waitAndAssert(c, defaultReconciliationTimeout, daemons[0].checkRunningTaskImages, checker.DeepEquals,
   316  		map[string]int{image1: instances - 2*parallelism, image2: 2 * parallelism})
   317  
   318  	// 3nd batch
   319  	waitAndAssert(c, defaultReconciliationTimeout, daemons[0].checkRunningTaskImages, checker.DeepEquals,
   320  		map[string]int{image2: instances})
   321  }
   322  
   323  func (s *DockerSwarmSuite) TestApiSwarmServicesStateReporting(c *check.C) {
   324  	testRequires(c, Network)
   325  	testRequires(c, SameHostDaemon)
   326  	testRequires(c, DaemonIsLinux)
   327  
   328  	d1 := s.AddDaemon(c, true, true)
   329  	d2 := s.AddDaemon(c, true, true)
   330  	d3 := s.AddDaemon(c, true, false)
   331  
   332  	time.Sleep(1 * time.Second) // make sure all daemons are ready to accept
   333  
   334  	instances := 9
   335  	d1.createService(c, simpleTestService, setInstances(instances))
   336  
   337  	waitAndAssert(c, defaultReconciliationTimeout, reducedCheck(sumAsIntegers, d1.checkActiveContainerCount, d2.checkActiveContainerCount, d3.checkActiveContainerCount), checker.Equals, instances)
   338  
   339  	getContainers := func() map[string]*SwarmDaemon {
   340  		m := make(map[string]*SwarmDaemon)
   341  		for _, d := range []*SwarmDaemon{d1, d2, d3} {
   342  			for _, id := range d.activeContainers() {
   343  				m[id] = d
   344  			}
   345  		}
   346  		return m
   347  	}
   348  
   349  	containers := getContainers()
   350  	c.Assert(containers, checker.HasLen, instances)
   351  	var toRemove string
   352  	for i := range containers {
   353  		toRemove = i
   354  	}
   355  
   356  	_, err := containers[toRemove].Cmd("stop", toRemove)
   357  	c.Assert(err, checker.IsNil)
   358  
   359  	waitAndAssert(c, defaultReconciliationTimeout, reducedCheck(sumAsIntegers, d1.checkActiveContainerCount, d2.checkActiveContainerCount, d3.checkActiveContainerCount), checker.Equals, instances)
   360  
   361  	containers2 := getContainers()
   362  	c.Assert(containers2, checker.HasLen, instances)
   363  	for i := range containers {
   364  		if i == toRemove {
   365  			c.Assert(containers2[i], checker.IsNil)
   366  		} else {
   367  			c.Assert(containers2[i], checker.NotNil)
   368  		}
   369  	}
   370  
   371  	containers = containers2
   372  	for i := range containers {
   373  		toRemove = i
   374  	}
   375  
   376  	// try with killing process outside of docker
   377  	pidStr, err := containers[toRemove].Cmd("inspect", "-f", "{{.State.Pid}}", toRemove)
   378  	c.Assert(err, checker.IsNil)
   379  	pid, err := strconv.Atoi(strings.TrimSpace(pidStr))
   380  	c.Assert(err, checker.IsNil)
   381  	c.Assert(syscall.Kill(pid, syscall.SIGKILL), checker.IsNil)
   382  
   383  	time.Sleep(time.Second) // give some time to handle the signal
   384  
   385  	waitAndAssert(c, defaultReconciliationTimeout, reducedCheck(sumAsIntegers, d1.checkActiveContainerCount, d2.checkActiveContainerCount, d3.checkActiveContainerCount), checker.Equals, instances)
   386  
   387  	containers2 = getContainers()
   388  	c.Assert(containers2, checker.HasLen, instances)
   389  	for i := range containers {
   390  		if i == toRemove {
   391  			c.Assert(containers2[i], checker.IsNil)
   392  		} else {
   393  			c.Assert(containers2[i], checker.NotNil)
   394  		}
   395  	}
   396  }
   397  
   398  func (s *DockerSwarmSuite) TestApiSwarmLeaderElection(c *check.C) {
   399  	// Create 3 nodes
   400  	d1 := s.AddDaemon(c, true, true)
   401  	d2 := s.AddDaemon(c, true, true)
   402  	d3 := s.AddDaemon(c, true, true)
   403  
   404  	// assert that the first node we made is the leader, and the other two are followers
   405  	c.Assert(d1.getNode(c, d1.NodeID).ManagerStatus.Leader, checker.True)
   406  	c.Assert(d1.getNode(c, d2.NodeID).ManagerStatus.Leader, checker.False)
   407  	c.Assert(d1.getNode(c, d3.NodeID).ManagerStatus.Leader, checker.False)
   408  
   409  	leader := d1
   410  
   411  	// stop the leader
   412  	leader.Stop()
   413  
   414  	// wait for an election to occur
   415  	var newleader *SwarmDaemon
   416  
   417  	for _, d := range []*SwarmDaemon{d2, d3} {
   418  		if d.getNode(c, d.NodeID).ManagerStatus.Leader {
   419  			newleader = d
   420  			break
   421  		}
   422  	}
   423  
   424  	// assert that we have a new leader
   425  	c.Assert(newleader, checker.NotNil)
   426  
   427  	// add the old leader back
   428  	leader.Start()
   429  
   430  	// clear leader and reinit the followers list
   431  	followers := make([]*SwarmDaemon, 0, 3)
   432  
   433  	// pick out the leader and the followers again
   434  	for _, d := range []*SwarmDaemon{d1, d2, d3} {
   435  		if d1.getNode(c, d.NodeID).ManagerStatus.Leader {
   436  			leader = d
   437  		} else {
   438  			followers = append(followers, d)
   439  		}
   440  	}
   441  
   442  	// verify that we still only have 1 leader and 2 followers
   443  	c.Assert(leader, checker.NotNil)
   444  	c.Assert(followers, checker.HasLen, 2)
   445  	// and that after we added d1 back, the leader hasn't changed
   446  	c.Assert(leader.NodeID, checker.Equals, newleader.NodeID)
   447  }
   448  
   449  func (s *DockerSwarmSuite) TestApiSwarmRaftQuorum(c *check.C) {
   450  	testRequires(c, Network)
   451  	d1 := s.AddDaemon(c, true, true)
   452  	d2 := s.AddDaemon(c, true, true)
   453  	d3 := s.AddDaemon(c, true, true)
   454  
   455  	d1.createService(c, simpleTestService)
   456  
   457  	c.Assert(d2.Stop(), checker.IsNil)
   458  
   459  	d1.createService(c, simpleTestService, func(s *swarm.Service) {
   460  		s.Spec.Name = "top1"
   461  	})
   462  
   463  	c.Assert(d3.Stop(), checker.IsNil)
   464  
   465  	var service swarm.Service
   466  	simpleTestService(&service)
   467  	service.Spec.Name = "top2"
   468  	status, out, err := d1.SockRequest("POST", "/services/create", service.Spec)
   469  	c.Assert(err, checker.IsNil)
   470  	c.Assert(status, checker.Equals, http.StatusInternalServerError, check.Commentf("deadline exceeded", string(out)))
   471  
   472  	c.Assert(d2.Start(), checker.IsNil)
   473  
   474  	d1.createService(c, simpleTestService, func(s *swarm.Service) {
   475  		s.Spec.Name = "top3"
   476  	})
   477  }
   478  
   479  func (s *DockerSwarmSuite) TestApiSwarmListNodes(c *check.C) {
   480  	testRequires(c, Network)
   481  	d1 := s.AddDaemon(c, true, true)
   482  	d2 := s.AddDaemon(c, true, false)
   483  	d3 := s.AddDaemon(c, true, false)
   484  
   485  	nodes := d1.listNodes(c)
   486  	c.Assert(len(nodes), checker.Equals, 3, check.Commentf("nodes: %#v", nodes))
   487  
   488  loop0:
   489  	for _, n := range nodes {
   490  		for _, d := range []*SwarmDaemon{d1, d2, d3} {
   491  			if n.ID == d.NodeID {
   492  				continue loop0
   493  			}
   494  		}
   495  		c.Errorf("unknown nodeID %v", n.ID)
   496  	}
   497  }
   498  
   499  func (s *DockerSwarmSuite) TestApiSwarmNodeUpdate(c *check.C) {
   500  	testRequires(c, Network)
   501  	d := s.AddDaemon(c, true, true)
   502  
   503  	nodes := d.listNodes(c)
   504  
   505  	d.updateNode(c, nodes[0].ID, func(n *swarm.Node) {
   506  		n.Spec.Availability = swarm.NodeAvailabilityPause
   507  	})
   508  
   509  	n := d.getNode(c, nodes[0].ID)
   510  	c.Assert(n.Spec.Availability, checker.Equals, swarm.NodeAvailabilityPause)
   511  }
   512  
   513  func (s *DockerSwarmSuite) TestApiSwarmNodeRemove(c *check.C) {
   514  	testRequires(c, Network)
   515  	d1 := s.AddDaemon(c, true, true)
   516  	d2 := s.AddDaemon(c, true, false)
   517  	_ = s.AddDaemon(c, true, false)
   518  
   519  	nodes := d1.listNodes(c)
   520  	c.Assert(len(nodes), checker.Equals, 3, check.Commentf("nodes: %#v", nodes))
   521  
   522  	// Getting the info so we can take the NodeID
   523  	d2Info, err := d2.info()
   524  	c.Assert(err, checker.IsNil)
   525  
   526  	// forceful removal of d2 should work
   527  	d1.removeNode(c, d2Info.NodeID, true)
   528  
   529  	nodes = d1.listNodes(c)
   530  	c.Assert(len(nodes), checker.Equals, 2, check.Commentf("nodes: %#v", nodes))
   531  
   532  	// Restart the node that was removed
   533  	err = d2.Restart()
   534  	c.Assert(err, checker.IsNil)
   535  
   536  	// Give some time for the node to rejoin
   537  	time.Sleep(1 * time.Second)
   538  
   539  	// Make sure the node didn't rejoin
   540  	nodes = d1.listNodes(c)
   541  	c.Assert(len(nodes), checker.Equals, 2, check.Commentf("nodes: %#v", nodes))
   542  }
   543  
   544  func (s *DockerSwarmSuite) TestApiSwarmNodeDrainPause(c *check.C) {
   545  	testRequires(c, Network)
   546  	d1 := s.AddDaemon(c, true, true)
   547  	d2 := s.AddDaemon(c, true, false)
   548  
   549  	time.Sleep(1 * time.Second) // make sure all daemons are ready to accept tasks
   550  
   551  	// start a service, expect balanced distribution
   552  	instances := 8
   553  	id := d1.createService(c, simpleTestService, setInstances(instances))
   554  
   555  	waitAndAssert(c, defaultReconciliationTimeout, d1.checkActiveContainerCount, checker.GreaterThan, 0)
   556  	waitAndAssert(c, defaultReconciliationTimeout, d2.checkActiveContainerCount, checker.GreaterThan, 0)
   557  	waitAndAssert(c, defaultReconciliationTimeout, reducedCheck(sumAsIntegers, d1.checkActiveContainerCount, d2.checkActiveContainerCount), checker.Equals, instances)
   558  
   559  	// drain d2, all containers should move to d1
   560  	d1.updateNode(c, d2.NodeID, func(n *swarm.Node) {
   561  		n.Spec.Availability = swarm.NodeAvailabilityDrain
   562  	})
   563  	waitAndAssert(c, defaultReconciliationTimeout, d1.checkActiveContainerCount, checker.Equals, instances)
   564  	waitAndAssert(c, defaultReconciliationTimeout, d2.checkActiveContainerCount, checker.Equals, 0)
   565  
   566  	// set d2 back to active
   567  	d1.updateNode(c, d2.NodeID, func(n *swarm.Node) {
   568  		n.Spec.Availability = swarm.NodeAvailabilityActive
   569  	})
   570  
   571  	instances = 1
   572  	d1.updateService(c, d1.getService(c, id), setInstances(instances))
   573  
   574  	waitAndAssert(c, defaultReconciliationTimeout*2, reducedCheck(sumAsIntegers, d1.checkActiveContainerCount, d2.checkActiveContainerCount), checker.Equals, instances)
   575  
   576  	instances = 8
   577  	d1.updateService(c, d1.getService(c, id), setInstances(instances))
   578  
   579  	// drained node first so we don't get any old containers
   580  	waitAndAssert(c, defaultReconciliationTimeout, d2.checkActiveContainerCount, checker.GreaterThan, 0)
   581  	waitAndAssert(c, defaultReconciliationTimeout, d1.checkActiveContainerCount, checker.GreaterThan, 0)
   582  	waitAndAssert(c, defaultReconciliationTimeout*2, reducedCheck(sumAsIntegers, d1.checkActiveContainerCount, d2.checkActiveContainerCount), checker.Equals, instances)
   583  
   584  	d2ContainerCount := len(d2.activeContainers())
   585  
   586  	// set d2 to paused, scale service up, only d1 gets new tasks
   587  	d1.updateNode(c, d2.NodeID, func(n *swarm.Node) {
   588  		n.Spec.Availability = swarm.NodeAvailabilityPause
   589  	})
   590  
   591  	instances = 14
   592  	d1.updateService(c, d1.getService(c, id), setInstances(instances))
   593  
   594  	waitAndAssert(c, defaultReconciliationTimeout, d1.checkActiveContainerCount, checker.Equals, instances-d2ContainerCount)
   595  	waitAndAssert(c, defaultReconciliationTimeout, d2.checkActiveContainerCount, checker.Equals, d2ContainerCount)
   596  
   597  }
   598  
   599  func (s *DockerSwarmSuite) TestApiSwarmLeaveRemovesContainer(c *check.C) {
   600  	testRequires(c, Network)
   601  	d := s.AddDaemon(c, true, true)
   602  
   603  	instances := 2
   604  	d.createService(c, simpleTestService, setInstances(instances))
   605  
   606  	id, err := d.Cmd("run", "-d", "busybox", "top")
   607  	c.Assert(err, checker.IsNil)
   608  	id = strings.TrimSpace(id)
   609  
   610  	waitAndAssert(c, defaultReconciliationTimeout, d.checkActiveContainerCount, checker.Equals, instances+1)
   611  
   612  	c.Assert(d.Leave(false), checker.NotNil)
   613  	c.Assert(d.Leave(true), checker.IsNil)
   614  
   615  	waitAndAssert(c, defaultReconciliationTimeout, d.checkActiveContainerCount, checker.Equals, 1)
   616  
   617  	id2, err := d.Cmd("ps", "-q")
   618  	c.Assert(err, checker.IsNil)
   619  	c.Assert(id, checker.HasPrefix, strings.TrimSpace(id2))
   620  }
   621  
   622  // #23629
   623  func (s *DockerSwarmSuite) TestApiSwarmLeaveOnPendingJoin(c *check.C) {
   624  	s.AddDaemon(c, true, true)
   625  	d2 := s.AddDaemon(c, false, false)
   626  
   627  	id, err := d2.Cmd("run", "-d", "busybox", "top")
   628  	c.Assert(err, checker.IsNil)
   629  	id = strings.TrimSpace(id)
   630  
   631  	go d2.Join(swarm.JoinRequest{
   632  		RemoteAddrs: []string{"nosuchhost:1234"},
   633  	})
   634  
   635  	waitAndAssert(c, defaultReconciliationTimeout, d2.checkLocalNodeState, checker.Equals, swarm.LocalNodeStateInactive)
   636  
   637  	waitAndAssert(c, defaultReconciliationTimeout, d2.checkActiveContainerCount, checker.Equals, 1)
   638  
   639  	id2, err := d2.Cmd("ps", "-q")
   640  	c.Assert(err, checker.IsNil)
   641  	c.Assert(id, checker.HasPrefix, strings.TrimSpace(id2))
   642  }
   643  
   644  // #23705
   645  func (s *DockerSwarmSuite) TestApiSwarmRestoreOnPendingJoin(c *check.C) {
   646  	d := s.AddDaemon(c, false, false)
   647  	go d.Join(swarm.JoinRequest{
   648  		RemoteAddrs: []string{"nosuchhost:1234"},
   649  	})
   650  
   651  	waitAndAssert(c, defaultReconciliationTimeout, d.checkLocalNodeState, checker.Equals, swarm.LocalNodeStateInactive)
   652  
   653  	c.Assert(d.Stop(), checker.IsNil)
   654  	c.Assert(d.Start(), checker.IsNil)
   655  
   656  	info, err := d.info()
   657  	c.Assert(err, checker.IsNil)
   658  	c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateInactive)
   659  }
   660  
   661  func (s *DockerSwarmSuite) TestApiSwarmManagerRestore(c *check.C) {
   662  	testRequires(c, Network)
   663  	d1 := s.AddDaemon(c, true, true)
   664  
   665  	instances := 2
   666  	id := d1.createService(c, simpleTestService, setInstances(instances))
   667  
   668  	d1.getService(c, id)
   669  	d1.Stop()
   670  	d1.Start()
   671  	d1.getService(c, id)
   672  
   673  	d2 := s.AddDaemon(c, true, true)
   674  	d2.getService(c, id)
   675  	d2.Stop()
   676  	d2.Start()
   677  	d2.getService(c, id)
   678  
   679  	d3 := s.AddDaemon(c, true, true)
   680  	d3.getService(c, id)
   681  	d3.Stop()
   682  	d3.Start()
   683  	d3.getService(c, id)
   684  
   685  	d3.Kill()
   686  	time.Sleep(1 * time.Second) // time to handle signal
   687  	d3.Start()
   688  	d3.getService(c, id)
   689  }
   690  
   691  func (s *DockerSwarmSuite) TestApiSwarmScaleNoRollingUpdate(c *check.C) {
   692  	testRequires(c, Network)
   693  	d := s.AddDaemon(c, true, true)
   694  
   695  	instances := 2
   696  	id := d.createService(c, simpleTestService, setInstances(instances))
   697  
   698  	waitAndAssert(c, defaultReconciliationTimeout, d.checkActiveContainerCount, checker.Equals, instances)
   699  	containers := d.activeContainers()
   700  	instances = 4
   701  	d.updateService(c, d.getService(c, id), setInstances(instances))
   702  	waitAndAssert(c, defaultReconciliationTimeout, d.checkActiveContainerCount, checker.Equals, instances)
   703  	containers2 := d.activeContainers()
   704  
   705  loop0:
   706  	for _, c1 := range containers {
   707  		for _, c2 := range containers2 {
   708  			if c1 == c2 {
   709  				continue loop0
   710  			}
   711  		}
   712  		c.Errorf("container %v not found in new set %#v", c1, containers2)
   713  	}
   714  }
   715  
   716  func (s *DockerSwarmSuite) TestApiSwarmInvalidAddress(c *check.C) {
   717  	d := s.AddDaemon(c, false, false)
   718  	req := swarm.InitRequest{
   719  		ListenAddr: "",
   720  	}
   721  	status, _, err := d.SockRequest("POST", "/swarm/init", req)
   722  	c.Assert(err, checker.IsNil)
   723  	c.Assert(status, checker.Equals, http.StatusInternalServerError)
   724  
   725  	req2 := swarm.JoinRequest{
   726  		ListenAddr:  "0.0.0.0:2377",
   727  		RemoteAddrs: []string{""},
   728  	}
   729  	status, _, err = d.SockRequest("POST", "/swarm/join", req2)
   730  	c.Assert(err, checker.IsNil)
   731  	c.Assert(status, checker.Equals, http.StatusInternalServerError)
   732  }
   733  
   734  func (s *DockerSwarmSuite) TestApiSwarmForceNewCluster(c *check.C) {
   735  	d1 := s.AddDaemon(c, true, true)
   736  	d2 := s.AddDaemon(c, true, true)
   737  
   738  	instances := 2
   739  	id := d1.createService(c, simpleTestService, setInstances(instances))
   740  	waitAndAssert(c, defaultReconciliationTimeout, reducedCheck(sumAsIntegers, d1.checkActiveContainerCount, d2.checkActiveContainerCount), checker.Equals, instances)
   741  
   742  	// drain d2, all containers should move to d1
   743  	d1.updateNode(c, d2.NodeID, func(n *swarm.Node) {
   744  		n.Spec.Availability = swarm.NodeAvailabilityDrain
   745  	})
   746  	waitAndAssert(c, defaultReconciliationTimeout, d1.checkActiveContainerCount, checker.Equals, instances)
   747  	waitAndAssert(c, defaultReconciliationTimeout, d2.checkActiveContainerCount, checker.Equals, 0)
   748  
   749  	c.Assert(d2.Stop(), checker.IsNil)
   750  
   751  	c.Assert(d1.Init(swarm.InitRequest{
   752  		ForceNewCluster: true,
   753  		Spec:            swarm.Spec{},
   754  	}), checker.IsNil)
   755  
   756  	waitAndAssert(c, defaultReconciliationTimeout, d1.checkActiveContainerCount, checker.Equals, instances)
   757  
   758  	d3 := s.AddDaemon(c, true, true)
   759  	info, err := d3.info()
   760  	c.Assert(err, checker.IsNil)
   761  	c.Assert(info.ControlAvailable, checker.True)
   762  	c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateActive)
   763  
   764  	instances = 4
   765  	d3.updateService(c, d3.getService(c, id), setInstances(instances))
   766  
   767  	waitAndAssert(c, defaultReconciliationTimeout, reducedCheck(sumAsIntegers, d1.checkActiveContainerCount, d3.checkActiveContainerCount), checker.Equals, instances)
   768  }
   769  
   770  func simpleTestService(s *swarm.Service) {
   771  	var ureplicas uint64
   772  	ureplicas = 1
   773  	s.Spec = swarm.ServiceSpec{
   774  		TaskTemplate: swarm.TaskSpec{
   775  			ContainerSpec: swarm.ContainerSpec{
   776  				Image:   "busybox:latest",
   777  				Command: []string{"/bin/top"},
   778  			},
   779  		},
   780  		Mode: swarm.ServiceMode{
   781  			Replicated: &swarm.ReplicatedService{
   782  				Replicas: &ureplicas,
   783  			},
   784  		},
   785  	}
   786  	s.Spec.Name = "top"
   787  }
   788  
   789  func serviceForUpdate(s *swarm.Service) {
   790  	var ureplicas uint64
   791  	ureplicas = 1
   792  	s.Spec = swarm.ServiceSpec{
   793  		TaskTemplate: swarm.TaskSpec{
   794  			ContainerSpec: swarm.ContainerSpec{
   795  				Image:   "busybox:latest",
   796  				Command: []string{"/bin/top"},
   797  			},
   798  		},
   799  		Mode: swarm.ServiceMode{
   800  			Replicated: &swarm.ReplicatedService{
   801  				Replicas: &ureplicas,
   802  			},
   803  		},
   804  		UpdateConfig: &swarm.UpdateConfig{
   805  			Parallelism:   2,
   806  			Delay:         8 * time.Second,
   807  			FailureAction: swarm.UpdateFailureActionContinue,
   808  		},
   809  	}
   810  	s.Spec.Name = "updatetest"
   811  }
   812  
   813  func setInstances(replicas int) serviceConstructor {
   814  	ureplicas := uint64(replicas)
   815  	return func(s *swarm.Service) {
   816  		s.Spec.Mode = swarm.ServiceMode{
   817  			Replicated: &swarm.ReplicatedService{
   818  				Replicas: &ureplicas,
   819  			},
   820  		}
   821  	}
   822  }
   823  
   824  func setImage(image string) serviceConstructor {
   825  	return func(s *swarm.Service) {
   826  		s.Spec.TaskTemplate.ContainerSpec.Image = image
   827  	}
   828  }
   829  
   830  func setGlobalMode(s *swarm.Service) {
   831  	s.Spec.Mode = swarm.ServiceMode{
   832  		Global: &swarm.GlobalService{},
   833  	}
   834  }
   835  
   836  func checkClusterHealth(c *check.C, cl []*SwarmDaemon, managerCount, workerCount int) {
   837  	var totalMCount, totalWCount int
   838  	for _, d := range cl {
   839  		info, err := d.info()
   840  		c.Assert(err, check.IsNil)
   841  		if !info.ControlAvailable {
   842  			totalWCount++
   843  			continue
   844  		}
   845  		var leaderFound bool
   846  		totalMCount++
   847  		var mCount, wCount int
   848  		for _, n := range d.listNodes(c) {
   849  			c.Assert(n.Status.State, checker.Equals, swarm.NodeStateReady, check.Commentf("state of node %s, reported by %s", n.ID, d.Info.NodeID))
   850  			c.Assert(n.Spec.Availability, checker.Equals, swarm.NodeAvailabilityActive, check.Commentf("availability of node %s, reported by %s", n.ID, d.Info.NodeID))
   851  			if n.Spec.Role == swarm.NodeRoleManager {
   852  				c.Assert(n.ManagerStatus, checker.NotNil, check.Commentf("manager status of node %s (manager), reported by %s", n.ID, d.Info.NodeID))
   853  				if n.ManagerStatus.Leader {
   854  					leaderFound = true
   855  				}
   856  				mCount++
   857  			} else {
   858  				c.Assert(n.ManagerStatus, checker.IsNil, check.Commentf("manager status of node %s (worker), reported by %s", n.ID, d.Info.NodeID))
   859  				wCount++
   860  			}
   861  		}
   862  		c.Assert(leaderFound, checker.True, check.Commentf("lack of leader reported by node %s", info.NodeID))
   863  		c.Assert(mCount, checker.Equals, managerCount, check.Commentf("managers count reported by node %s", info.NodeID))
   864  		c.Assert(wCount, checker.Equals, workerCount, check.Commentf("workers count reported by node %s", info.NodeID))
   865  	}
   866  	c.Assert(totalMCount, checker.Equals, managerCount)
   867  	c.Assert(totalWCount, checker.Equals, workerCount)
   868  }
   869  
   870  func (s *DockerSwarmSuite) TestApiSwarmRestartCluster(c *check.C) {
   871  	mCount, wCount := 5, 1
   872  
   873  	var nodes []*SwarmDaemon
   874  	for i := 0; i < mCount; i++ {
   875  		manager := s.AddDaemon(c, true, true)
   876  		info, err := manager.info()
   877  		c.Assert(err, checker.IsNil)
   878  		c.Assert(info.ControlAvailable, checker.True)
   879  		c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateActive)
   880  		nodes = append(nodes, manager)
   881  	}
   882  
   883  	for i := 0; i < wCount; i++ {
   884  		worker := s.AddDaemon(c, true, false)
   885  		info, err := worker.info()
   886  		c.Assert(err, checker.IsNil)
   887  		c.Assert(info.ControlAvailable, checker.False)
   888  		c.Assert(info.LocalNodeState, checker.Equals, swarm.LocalNodeStateActive)
   889  		nodes = append(nodes, worker)
   890  	}
   891  
   892  	// stop whole cluster
   893  	{
   894  		var wg sync.WaitGroup
   895  		wg.Add(len(nodes))
   896  		errs := make(chan error, len(nodes))
   897  
   898  		for _, d := range nodes {
   899  			go func(daemon *SwarmDaemon) {
   900  				defer wg.Done()
   901  				if err := daemon.Stop(); err != nil {
   902  					errs <- err
   903  				}
   904  				if root := os.Getenv("DOCKER_REMAP_ROOT"); root != "" {
   905  					daemon.root = filepath.Dir(daemon.root)
   906  				}
   907  			}(d)
   908  		}
   909  		wg.Wait()
   910  		close(errs)
   911  		for err := range errs {
   912  			c.Assert(err, check.IsNil)
   913  		}
   914  	}
   915  
   916  	// start whole cluster
   917  	{
   918  		var wg sync.WaitGroup
   919  		wg.Add(len(nodes))
   920  		errs := make(chan error, len(nodes))
   921  
   922  		for _, d := range nodes {
   923  			go func(daemon *SwarmDaemon) {
   924  				defer wg.Done()
   925  				if err := daemon.Start("--iptables=false"); err != nil {
   926  					errs <- err
   927  				}
   928  			}(d)
   929  		}
   930  		wg.Wait()
   931  		close(errs)
   932  		for err := range errs {
   933  			c.Assert(err, check.IsNil)
   934  		}
   935  	}
   936  
   937  	checkClusterHealth(c, nodes, mCount, wCount)
   938  }