github.com/kim0/docker@v0.6.2-0.20161130212042-4addda3f07e7/daemon/cluster/cluster.go (about)

     1  package cluster
     2  
     3  import (
     4  	"encoding/json"
     5  	"fmt"
     6  	"io/ioutil"
     7  	"net"
     8  	"os"
     9  	"path/filepath"
    10  	"strings"
    11  	"sync"
    12  	"time"
    13  
    14  	"google.golang.org/grpc"
    15  
    16  	"github.com/Sirupsen/logrus"
    17  	"github.com/docker/docker/api/errors"
    18  	apitypes "github.com/docker/docker/api/types"
    19  	"github.com/docker/docker/api/types/filters"
    20  	"github.com/docker/docker/api/types/network"
    21  	types "github.com/docker/docker/api/types/swarm"
    22  	"github.com/docker/docker/daemon/cluster/convert"
    23  	executorpkg "github.com/docker/docker/daemon/cluster/executor"
    24  	"github.com/docker/docker/daemon/cluster/executor/container"
    25  	"github.com/docker/docker/opts"
    26  	"github.com/docker/docker/pkg/ioutils"
    27  	"github.com/docker/docker/pkg/signal"
    28  	"github.com/docker/docker/runconfig"
    29  	swarmapi "github.com/docker/swarmkit/api"
    30  	swarmnode "github.com/docker/swarmkit/node"
    31  	"golang.org/x/net/context"
    32  )
    33  
    34  const swarmDirName = "swarm"
    35  const controlSocket = "control.sock"
    36  const swarmConnectTimeout = 20 * time.Second
    37  const swarmRequestTimeout = 20 * time.Second
    38  const stateFile = "docker-state.json"
    39  const defaultAddr = "0.0.0.0:2377"
    40  
    41  const (
    42  	initialReconnectDelay = 100 * time.Millisecond
    43  	maxReconnectDelay     = 30 * time.Second
    44  )
    45  
    46  // ErrNoSwarm is returned on leaving a cluster that was never initialized
    47  var ErrNoSwarm = fmt.Errorf("This node is not part of a swarm")
    48  
    49  // ErrSwarmExists is returned on initialize or join request for a cluster that has already been activated
    50  var ErrSwarmExists = fmt.Errorf("This node is already part of a swarm. Use \"docker swarm leave\" to leave this swarm and join another one.")
    51  
    52  // ErrPendingSwarmExists is returned on initialize or join request for a cluster that is already processing a similar request but has not succeeded yet.
    53  var ErrPendingSwarmExists = fmt.Errorf("This node is processing an existing join request that has not succeeded yet. Use \"docker swarm leave\" to cancel the current request.")
    54  
    55  // ErrSwarmJoinTimeoutReached is returned when cluster join could not complete before timeout was reached.
    56  var ErrSwarmJoinTimeoutReached = fmt.Errorf("Timeout was reached before node was joined. The attempt to join the swarm will continue in the background. Use the \"docker info\" command to see the current swarm status of your node.")
    57  
    58  // NetworkSubnetsProvider exposes functions for retrieving the subnets
    59  // of networks managed by Docker, so they can be filtered.
    60  type NetworkSubnetsProvider interface {
    61  	V4Subnets() []net.IPNet
    62  	V6Subnets() []net.IPNet
    63  }
    64  
    65  // Config provides values for Cluster.
    66  type Config struct {
    67  	Root                   string
    68  	Name                   string
    69  	Backend                executorpkg.Backend
    70  	NetworkSubnetsProvider NetworkSubnetsProvider
    71  
    72  	// DefaultAdvertiseAddr is the default host/IP or network interface to use
    73  	// if no AdvertiseAddr value is specified.
    74  	DefaultAdvertiseAddr string
    75  
    76  	// path to store runtime state, such as the swarm control socket
    77  	RuntimeRoot string
    78  }
    79  
    80  // Cluster provides capabilities to participate in a cluster as a worker or a
    81  // manager.
    82  type Cluster struct {
    83  	sync.RWMutex
    84  	*node
    85  	root            string
    86  	runtimeRoot     string
    87  	config          Config
    88  	configEvent     chan struct{} // todo: make this array and goroutine safe
    89  	actualLocalAddr string        // after resolution, not persisted
    90  	stop            bool
    91  	err             error
    92  	cancelDelay     func()
    93  	attachers       map[string]*attacher
    94  }
    95  
    96  // attacher manages the in-memory attachment state of a container
    97  // attachment to a global scope network managed by swarm manager. It
    98  // helps in identifying the attachment ID via the taskID and the
    99  // corresponding attachment configuration obtained from the manager.
   100  type attacher struct {
   101  	taskID           string
   102  	config           *network.NetworkingConfig
   103  	attachWaitCh     chan *network.NetworkingConfig
   104  	attachCompleteCh chan struct{}
   105  	detachWaitCh     chan struct{}
   106  }
   107  
   108  type node struct {
   109  	*swarmnode.Node
   110  	done           chan struct{}
   111  	ready          bool
   112  	conn           *grpc.ClientConn
   113  	client         swarmapi.ControlClient
   114  	reconnectDelay time.Duration
   115  	config         nodeStartConfig
   116  }
   117  
   118  // nodeStartConfig holds configuration needed to start a new node. Exported
   119  // fields of this structure are saved to disk in json. Unexported fields
   120  // contain data that shouldn't be persisted between daemon reloads.
   121  type nodeStartConfig struct {
   122  	// LocalAddr is this machine's local IP or hostname, if specified.
   123  	LocalAddr string
   124  	// RemoteAddr is the address that was given to "swarm join". It is used
   125  	// to find LocalAddr if necessary.
   126  	RemoteAddr string
   127  	// ListenAddr is the address we bind to, including a port.
   128  	ListenAddr string
   129  	// AdvertiseAddr is the address other nodes should connect to,
   130  	// including a port.
   131  	AdvertiseAddr   string
   132  	joinAddr        string
   133  	forceNewCluster bool
   134  	joinToken       string
   135  }
   136  
   137  // New creates a new Cluster instance using provided config.
   138  func New(config Config) (*Cluster, error) {
   139  	root := filepath.Join(config.Root, swarmDirName)
   140  	if err := os.MkdirAll(root, 0700); err != nil {
   141  		return nil, err
   142  	}
   143  	if config.RuntimeRoot == "" {
   144  		config.RuntimeRoot = root
   145  	}
   146  	if err := os.MkdirAll(config.RuntimeRoot, 0700); err != nil {
   147  		return nil, err
   148  	}
   149  	c := &Cluster{
   150  		root:        root,
   151  		config:      config,
   152  		configEvent: make(chan struct{}, 10),
   153  		runtimeRoot: config.RuntimeRoot,
   154  		attachers:   make(map[string]*attacher),
   155  	}
   156  
   157  	nodeConfig, err := c.loadState()
   158  	if err != nil {
   159  		if os.IsNotExist(err) {
   160  			return c, nil
   161  		}
   162  		return nil, err
   163  	}
   164  
   165  	n, err := c.startNewNode(*nodeConfig)
   166  	if err != nil {
   167  		return nil, err
   168  	}
   169  
   170  	select {
   171  	case <-time.After(swarmConnectTimeout):
   172  		logrus.Errorf("swarm component could not be started before timeout was reached")
   173  	case <-n.Ready():
   174  	case <-n.done:
   175  		return nil, fmt.Errorf("swarm component could not be started: %v", c.err)
   176  	}
   177  	go c.reconnectOnFailure(n)
   178  	return c, nil
   179  }
   180  
   181  func (c *Cluster) loadState() (*nodeStartConfig, error) {
   182  	dt, err := ioutil.ReadFile(filepath.Join(c.root, stateFile))
   183  	if err != nil {
   184  		return nil, err
   185  	}
   186  	// missing certificate means no actual state to restore from
   187  	if _, err := os.Stat(filepath.Join(c.root, "certificates/swarm-node.crt")); err != nil {
   188  		if os.IsNotExist(err) {
   189  			c.clearState()
   190  		}
   191  		return nil, err
   192  	}
   193  	var st nodeStartConfig
   194  	if err := json.Unmarshal(dt, &st); err != nil {
   195  		return nil, err
   196  	}
   197  	return &st, nil
   198  }
   199  
   200  func (c *Cluster) saveState(config nodeStartConfig) error {
   201  	dt, err := json.Marshal(config)
   202  	if err != nil {
   203  		return err
   204  	}
   205  	return ioutils.AtomicWriteFile(filepath.Join(c.root, stateFile), dt, 0600)
   206  }
   207  
   208  func (c *Cluster) reconnectOnFailure(n *node) {
   209  	for {
   210  		<-n.done
   211  		c.Lock()
   212  		if c.stop || c.node != nil {
   213  			c.Unlock()
   214  			return
   215  		}
   216  		n.reconnectDelay *= 2
   217  		if n.reconnectDelay > maxReconnectDelay {
   218  			n.reconnectDelay = maxReconnectDelay
   219  		}
   220  		logrus.Warnf("Restarting swarm in %.2f seconds", n.reconnectDelay.Seconds())
   221  		delayCtx, cancel := context.WithTimeout(context.Background(), n.reconnectDelay)
   222  		c.cancelDelay = cancel
   223  		c.Unlock()
   224  		<-delayCtx.Done()
   225  		if delayCtx.Err() != context.DeadlineExceeded {
   226  			return
   227  		}
   228  		c.Lock()
   229  		if c.node != nil {
   230  			c.Unlock()
   231  			return
   232  		}
   233  		var err error
   234  		config := n.config
   235  		config.RemoteAddr = c.getRemoteAddress()
   236  		config.joinAddr = config.RemoteAddr
   237  		n, err = c.startNewNode(config)
   238  		if err != nil {
   239  			c.err = err
   240  			close(n.done)
   241  		}
   242  		c.Unlock()
   243  	}
   244  }
   245  
   246  func (c *Cluster) startNewNode(conf nodeStartConfig) (*node, error) {
   247  	if err := c.config.Backend.IsSwarmCompatible(); err != nil {
   248  		return nil, err
   249  	}
   250  
   251  	actualLocalAddr := conf.LocalAddr
   252  	if actualLocalAddr == "" {
   253  		// If localAddr was not specified, resolve it automatically
   254  		// based on the route to joinAddr. localAddr can only be left
   255  		// empty on "join".
   256  		listenHost, _, err := net.SplitHostPort(conf.ListenAddr)
   257  		if err != nil {
   258  			return nil, fmt.Errorf("could not parse listen address: %v", err)
   259  		}
   260  
   261  		listenAddrIP := net.ParseIP(listenHost)
   262  		if listenAddrIP == nil || !listenAddrIP.IsUnspecified() {
   263  			actualLocalAddr = listenHost
   264  		} else {
   265  			if conf.RemoteAddr == "" {
   266  				// Should never happen except using swarms created by
   267  				// old versions that didn't save remoteAddr.
   268  				conf.RemoteAddr = "8.8.8.8:53"
   269  			}
   270  			conn, err := net.Dial("udp", conf.RemoteAddr)
   271  			if err != nil {
   272  				return nil, fmt.Errorf("could not find local IP address: %v", err)
   273  			}
   274  			localHostPort := conn.LocalAddr().String()
   275  			actualLocalAddr, _, _ = net.SplitHostPort(localHostPort)
   276  			conn.Close()
   277  		}
   278  	}
   279  
   280  	c.node = nil
   281  	c.cancelDelay = nil
   282  	c.stop = false
   283  	n, err := swarmnode.New(&swarmnode.Config{
   284  		Hostname:           c.config.Name,
   285  		ForceNewCluster:    conf.forceNewCluster,
   286  		ListenControlAPI:   filepath.Join(c.runtimeRoot, controlSocket),
   287  		ListenRemoteAPI:    conf.ListenAddr,
   288  		AdvertiseRemoteAPI: conf.AdvertiseAddr,
   289  		JoinAddr:           conf.joinAddr,
   290  		StateDir:           c.root,
   291  		JoinToken:          conf.joinToken,
   292  		Executor:           container.NewExecutor(c.config.Backend),
   293  		HeartbeatTick:      1,
   294  		ElectionTick:       3,
   295  	})
   296  	if err != nil {
   297  		return nil, err
   298  	}
   299  	ctx := context.Background()
   300  	if err := n.Start(ctx); err != nil {
   301  		return nil, err
   302  	}
   303  	node := &node{
   304  		Node:           n,
   305  		done:           make(chan struct{}),
   306  		reconnectDelay: initialReconnectDelay,
   307  		config:         conf,
   308  	}
   309  	c.node = node
   310  	c.actualLocalAddr = actualLocalAddr // not saved
   311  	c.saveState(conf)
   312  
   313  	c.config.Backend.SetClusterProvider(c)
   314  	go func() {
   315  		err := n.Err(ctx)
   316  		if err != nil {
   317  			logrus.Errorf("cluster exited with error: %v", err)
   318  		}
   319  		c.Lock()
   320  		c.node = nil
   321  		c.err = err
   322  		c.Unlock()
   323  		close(node.done)
   324  	}()
   325  
   326  	go func() {
   327  		select {
   328  		case <-n.Ready():
   329  			c.Lock()
   330  			node.ready = true
   331  			c.err = nil
   332  			c.Unlock()
   333  		case <-ctx.Done():
   334  		}
   335  		c.configEvent <- struct{}{}
   336  	}()
   337  
   338  	go func() {
   339  		for conn := range n.ListenControlSocket(ctx) {
   340  			c.Lock()
   341  			if node.conn != conn {
   342  				if conn == nil {
   343  					node.client = nil
   344  				} else {
   345  					node.client = swarmapi.NewControlClient(conn)
   346  				}
   347  			}
   348  			node.conn = conn
   349  			c.Unlock()
   350  			c.configEvent <- struct{}{}
   351  		}
   352  	}()
   353  
   354  	return node, nil
   355  }
   356  
   357  // Init initializes new cluster from user provided request.
   358  func (c *Cluster) Init(req types.InitRequest) (string, error) {
   359  	c.Lock()
   360  	if node := c.node; node != nil {
   361  		if !req.ForceNewCluster {
   362  			c.Unlock()
   363  			return "", ErrSwarmExists
   364  		}
   365  		if err := c.stopNode(); err != nil {
   366  			c.Unlock()
   367  			return "", err
   368  		}
   369  	}
   370  
   371  	if err := validateAndSanitizeInitRequest(&req); err != nil {
   372  		c.Unlock()
   373  		return "", err
   374  	}
   375  
   376  	listenHost, listenPort, err := resolveListenAddr(req.ListenAddr)
   377  	if err != nil {
   378  		c.Unlock()
   379  		return "", err
   380  	}
   381  
   382  	advertiseHost, advertisePort, err := c.resolveAdvertiseAddr(req.AdvertiseAddr, listenPort)
   383  	if err != nil {
   384  		c.Unlock()
   385  		return "", err
   386  	}
   387  
   388  	localAddr := listenHost
   389  
   390  	// If the advertise address is not one of the system's
   391  	// addresses, we also require a listen address.
   392  	listenAddrIP := net.ParseIP(listenHost)
   393  	if listenAddrIP != nil && listenAddrIP.IsUnspecified() {
   394  		advertiseIP := net.ParseIP(advertiseHost)
   395  		if advertiseIP == nil {
   396  			// not an IP
   397  			c.Unlock()
   398  			return "", errMustSpecifyListenAddr
   399  		}
   400  
   401  		systemIPs := listSystemIPs()
   402  
   403  		found := false
   404  		for _, systemIP := range systemIPs {
   405  			if systemIP.Equal(advertiseIP) {
   406  				found = true
   407  				break
   408  			}
   409  		}
   410  		if !found {
   411  			c.Unlock()
   412  			return "", errMustSpecifyListenAddr
   413  		}
   414  		localAddr = advertiseIP.String()
   415  	}
   416  
   417  	// todo: check current state existing
   418  	n, err := c.startNewNode(nodeStartConfig{
   419  		forceNewCluster: req.ForceNewCluster,
   420  		LocalAddr:       localAddr,
   421  		ListenAddr:      net.JoinHostPort(listenHost, listenPort),
   422  		AdvertiseAddr:   net.JoinHostPort(advertiseHost, advertisePort),
   423  	})
   424  	if err != nil {
   425  		c.Unlock()
   426  		return "", err
   427  	}
   428  	c.Unlock()
   429  
   430  	select {
   431  	case <-n.Ready():
   432  		if err := initClusterSpec(n, req.Spec); err != nil {
   433  			return "", err
   434  		}
   435  		go c.reconnectOnFailure(n)
   436  		return n.NodeID(), nil
   437  	case <-n.done:
   438  		c.RLock()
   439  		defer c.RUnlock()
   440  		if !req.ForceNewCluster { // if failure on first attempt don't keep state
   441  			if err := c.clearState(); err != nil {
   442  				return "", err
   443  			}
   444  		}
   445  		return "", c.err
   446  	}
   447  }
   448  
   449  // Join makes current Cluster part of an existing swarm cluster.
   450  func (c *Cluster) Join(req types.JoinRequest) error {
   451  	c.Lock()
   452  	if node := c.node; node != nil {
   453  		c.Unlock()
   454  		return ErrSwarmExists
   455  	}
   456  	if err := validateAndSanitizeJoinRequest(&req); err != nil {
   457  		c.Unlock()
   458  		return err
   459  	}
   460  
   461  	listenHost, listenPort, err := resolveListenAddr(req.ListenAddr)
   462  	if err != nil {
   463  		c.Unlock()
   464  		return err
   465  	}
   466  
   467  	var advertiseAddr string
   468  	if req.AdvertiseAddr != "" {
   469  		advertiseHost, advertisePort, err := c.resolveAdvertiseAddr(req.AdvertiseAddr, listenPort)
   470  		// For joining, we don't need to provide an advertise address,
   471  		// since the remote side can detect it.
   472  		if err == nil {
   473  			advertiseAddr = net.JoinHostPort(advertiseHost, advertisePort)
   474  		}
   475  	}
   476  
   477  	// todo: check current state existing
   478  	n, err := c.startNewNode(nodeStartConfig{
   479  		RemoteAddr:    req.RemoteAddrs[0],
   480  		ListenAddr:    net.JoinHostPort(listenHost, listenPort),
   481  		AdvertiseAddr: advertiseAddr,
   482  		joinAddr:      req.RemoteAddrs[0],
   483  		joinToken:     req.JoinToken,
   484  	})
   485  	if err != nil {
   486  		c.Unlock()
   487  		return err
   488  	}
   489  	c.Unlock()
   490  
   491  	select {
   492  	case <-time.After(swarmConnectTimeout):
   493  		// attempt to connect will continue in background, also reconnecting
   494  		go c.reconnectOnFailure(n)
   495  		return ErrSwarmJoinTimeoutReached
   496  	case <-n.Ready():
   497  		go c.reconnectOnFailure(n)
   498  		return nil
   499  	case <-n.done:
   500  		c.RLock()
   501  		defer c.RUnlock()
   502  		return c.err
   503  	}
   504  }
   505  
   506  // stopNode is a helper that stops the active c.node and waits until it has
   507  // shut down. Call while keeping the cluster lock.
   508  func (c *Cluster) stopNode() error {
   509  	if c.node == nil {
   510  		return nil
   511  	}
   512  	c.stop = true
   513  	if c.cancelDelay != nil {
   514  		c.cancelDelay()
   515  		c.cancelDelay = nil
   516  	}
   517  	node := c.node
   518  	ctx, cancel := context.WithTimeout(context.Background(), 15*time.Second)
   519  	defer cancel()
   520  	// TODO: can't hold lock on stop because it calls back to network
   521  	c.Unlock()
   522  	defer c.Lock()
   523  	if err := node.Stop(ctx); err != nil && !strings.Contains(err.Error(), "context canceled") {
   524  		return err
   525  	}
   526  	<-node.done
   527  	return nil
   528  }
   529  
   530  func removingManagerCausesLossOfQuorum(reachable, unreachable int) bool {
   531  	return reachable-2 <= unreachable
   532  }
   533  
   534  func isLastManager(reachable, unreachable int) bool {
   535  	return reachable == 1 && unreachable == 0
   536  }
   537  
   538  // Leave shuts down Cluster and removes current state.
   539  func (c *Cluster) Leave(force bool) error {
   540  	c.Lock()
   541  	node := c.node
   542  	if node == nil {
   543  		c.Unlock()
   544  		return ErrNoSwarm
   545  	}
   546  
   547  	if node.Manager() != nil && !force {
   548  		msg := "You are attempting to leave the swarm on a node that is participating as a manager. "
   549  		if c.isActiveManager() {
   550  			active, reachable, unreachable, err := c.managerStats()
   551  			if err == nil {
   552  				if active && removingManagerCausesLossOfQuorum(reachable, unreachable) {
   553  					if isLastManager(reachable, unreachable) {
   554  						msg += "Removing the last manager erases all current state of the swarm. Use `--force` to ignore this message. "
   555  						c.Unlock()
   556  						return fmt.Errorf(msg)
   557  					}
   558  					msg += fmt.Sprintf("Removing this node leaves %v managers out of %v. Without a Raft quorum your swarm will be inaccessible. ", reachable-1, reachable+unreachable)
   559  				}
   560  			}
   561  		} else {
   562  			msg += "Doing so may lose the consensus of your cluster. "
   563  		}
   564  
   565  		msg += "The only way to restore a swarm that has lost consensus is to reinitialize it with `--force-new-cluster`. Use `--force` to suppress this message."
   566  		c.Unlock()
   567  		return fmt.Errorf(msg)
   568  	}
   569  	if err := c.stopNode(); err != nil {
   570  		logrus.Errorf("failed to shut down cluster node: %v", err)
   571  		signal.DumpStacks("")
   572  		c.Unlock()
   573  		return err
   574  	}
   575  	c.Unlock()
   576  	if nodeID := node.NodeID(); nodeID != "" {
   577  		nodeContainers, err := c.listContainerForNode(nodeID)
   578  		if err != nil {
   579  			return err
   580  		}
   581  		for _, id := range nodeContainers {
   582  			if err := c.config.Backend.ContainerRm(id, &apitypes.ContainerRmConfig{ForceRemove: true}); err != nil {
   583  				logrus.Errorf("error removing %v: %v", id, err)
   584  			}
   585  		}
   586  	}
   587  	c.configEvent <- struct{}{}
   588  	// todo: cleanup optional?
   589  	if err := c.clearState(); err != nil {
   590  		return err
   591  	}
   592  	return nil
   593  }
   594  
   595  func (c *Cluster) listContainerForNode(nodeID string) ([]string, error) {
   596  	var ids []string
   597  	filters := filters.NewArgs()
   598  	filters.Add("label", fmt.Sprintf("com.docker.swarm.node.id=%s", nodeID))
   599  	containers, err := c.config.Backend.Containers(&apitypes.ContainerListOptions{
   600  		Filter: filters,
   601  	})
   602  	if err != nil {
   603  		return []string{}, err
   604  	}
   605  	for _, c := range containers {
   606  		ids = append(ids, c.ID)
   607  	}
   608  	return ids, nil
   609  }
   610  
   611  func (c *Cluster) clearState() error {
   612  	// todo: backup this data instead of removing?
   613  	if err := os.RemoveAll(c.root); err != nil {
   614  		return err
   615  	}
   616  	if err := os.MkdirAll(c.root, 0700); err != nil {
   617  		return err
   618  	}
   619  	c.config.Backend.SetClusterProvider(nil)
   620  	return nil
   621  }
   622  
   623  func (c *Cluster) getRequestContext() (context.Context, func()) { // TODO: not needed when requests don't block on qourum lost
   624  	return context.WithTimeout(context.Background(), swarmRequestTimeout)
   625  }
   626  
   627  // Inspect retrieves the configuration properties of a managed swarm cluster.
   628  func (c *Cluster) Inspect() (types.Swarm, error) {
   629  	c.RLock()
   630  	defer c.RUnlock()
   631  
   632  	if !c.isActiveManager() {
   633  		return types.Swarm{}, c.errNoManager()
   634  	}
   635  
   636  	ctx, cancel := c.getRequestContext()
   637  	defer cancel()
   638  
   639  	swarm, err := getSwarm(ctx, c.client)
   640  	if err != nil {
   641  		return types.Swarm{}, err
   642  	}
   643  
   644  	return convert.SwarmFromGRPC(*swarm), nil
   645  }
   646  
   647  // Update updates configuration of a managed swarm cluster.
   648  func (c *Cluster) Update(version uint64, spec types.Spec, flags types.UpdateFlags) error {
   649  	c.RLock()
   650  	defer c.RUnlock()
   651  
   652  	if !c.isActiveManager() {
   653  		return c.errNoManager()
   654  	}
   655  
   656  	ctx, cancel := c.getRequestContext()
   657  	defer cancel()
   658  
   659  	swarm, err := getSwarm(ctx, c.client)
   660  	if err != nil {
   661  		return err
   662  	}
   663  
   664  	// In update, client should provide the complete spec of the swarm, including
   665  	// Name and Labels. If a field is specified with 0 or nil, then the default value
   666  	// will be used to swarmkit.
   667  	clusterSpec, err := convert.SwarmSpecToGRPC(spec)
   668  	if err != nil {
   669  		return err
   670  	}
   671  
   672  	_, err = c.client.UpdateCluster(
   673  		ctx,
   674  		&swarmapi.UpdateClusterRequest{
   675  			ClusterID: swarm.ID,
   676  			Spec:      &clusterSpec,
   677  			ClusterVersion: &swarmapi.Version{
   678  				Index: version,
   679  			},
   680  			Rotation: swarmapi.JoinTokenRotation{
   681  				RotateWorkerToken:  flags.RotateWorkerToken,
   682  				RotateManagerToken: flags.RotateManagerToken,
   683  			},
   684  		},
   685  	)
   686  	return err
   687  }
   688  
   689  // IsManager returns true if Cluster is participating as a manager.
   690  func (c *Cluster) IsManager() bool {
   691  	c.RLock()
   692  	defer c.RUnlock()
   693  	return c.isActiveManager()
   694  }
   695  
   696  // IsAgent returns true if Cluster is participating as a worker/agent.
   697  func (c *Cluster) IsAgent() bool {
   698  	c.RLock()
   699  	defer c.RUnlock()
   700  	return c.node != nil && c.ready
   701  }
   702  
   703  // GetLocalAddress returns the local address.
   704  func (c *Cluster) GetLocalAddress() string {
   705  	c.RLock()
   706  	defer c.RUnlock()
   707  	return c.actualLocalAddr
   708  }
   709  
   710  // GetListenAddress returns the listen address.
   711  func (c *Cluster) GetListenAddress() string {
   712  	c.RLock()
   713  	defer c.RUnlock()
   714  	if c.node != nil {
   715  		return c.node.config.ListenAddr
   716  	}
   717  	return ""
   718  }
   719  
   720  // GetAdvertiseAddress returns the remotely reachable address of this node.
   721  func (c *Cluster) GetAdvertiseAddress() string {
   722  	c.RLock()
   723  	defer c.RUnlock()
   724  	if c.node != nil && c.node.config.AdvertiseAddr != "" {
   725  		advertiseHost, _, _ := net.SplitHostPort(c.node.config.AdvertiseAddr)
   726  		return advertiseHost
   727  	}
   728  	return c.actualLocalAddr
   729  }
   730  
   731  // GetRemoteAddress returns a known advertise address of a remote manager if
   732  // available.
   733  // todo: change to array/connect with info
   734  func (c *Cluster) GetRemoteAddress() string {
   735  	c.RLock()
   736  	defer c.RUnlock()
   737  	return c.getRemoteAddress()
   738  }
   739  
   740  func (c *Cluster) getRemoteAddress() string {
   741  	if c.node == nil {
   742  		return ""
   743  	}
   744  	nodeID := c.node.NodeID()
   745  	for _, r := range c.node.Remotes() {
   746  		if r.NodeID != nodeID {
   747  			return r.Addr
   748  		}
   749  	}
   750  	return ""
   751  }
   752  
   753  // ListenClusterEvents returns a channel that receives messages on cluster
   754  // participation changes.
   755  // todo: make cancelable and accessible to multiple callers
   756  func (c *Cluster) ListenClusterEvents() <-chan struct{} {
   757  	return c.configEvent
   758  }
   759  
   760  // Info returns information about the current cluster state.
   761  func (c *Cluster) Info() types.Info {
   762  	info := types.Info{
   763  		NodeAddr: c.GetAdvertiseAddress(),
   764  	}
   765  
   766  	c.RLock()
   767  	defer c.RUnlock()
   768  
   769  	if c.node == nil {
   770  		info.LocalNodeState = types.LocalNodeStateInactive
   771  		if c.cancelDelay != nil {
   772  			info.LocalNodeState = types.LocalNodeStateError
   773  		}
   774  	} else {
   775  		info.LocalNodeState = types.LocalNodeStatePending
   776  		if c.ready == true {
   777  			info.LocalNodeState = types.LocalNodeStateActive
   778  		}
   779  	}
   780  	if c.err != nil {
   781  		info.Error = c.err.Error()
   782  	}
   783  
   784  	ctx, cancel := c.getRequestContext()
   785  	defer cancel()
   786  
   787  	if c.isActiveManager() {
   788  		info.ControlAvailable = true
   789  		swarm, err := c.Inspect()
   790  		if err != nil {
   791  			info.Error = err.Error()
   792  		}
   793  
   794  		// Strip JoinTokens
   795  		info.Cluster = swarm.ClusterInfo
   796  
   797  		if r, err := c.client.ListNodes(ctx, &swarmapi.ListNodesRequest{}); err == nil {
   798  			info.Nodes = len(r.Nodes)
   799  			for _, n := range r.Nodes {
   800  				if n.ManagerStatus != nil {
   801  					info.Managers = info.Managers + 1
   802  				}
   803  			}
   804  		}
   805  	}
   806  
   807  	if c.node != nil {
   808  		for _, r := range c.node.Remotes() {
   809  			info.RemoteManagers = append(info.RemoteManagers, types.Peer{NodeID: r.NodeID, Addr: r.Addr})
   810  		}
   811  		info.NodeID = c.node.NodeID()
   812  	}
   813  
   814  	return info
   815  }
   816  
   817  // isActiveManager should not be called without a read lock
   818  func (c *Cluster) isActiveManager() bool {
   819  	return c.node != nil && c.conn != nil
   820  }
   821  
   822  // errNoManager returns error describing why manager commands can't be used.
   823  // Call with read lock.
   824  func (c *Cluster) errNoManager() error {
   825  	if c.node == nil {
   826  		return fmt.Errorf("This node is not a swarm manager. Use \"docker swarm init\" or \"docker swarm join\" to connect this node to swarm and try again.")
   827  	}
   828  	if c.node.Manager() != nil {
   829  		return fmt.Errorf("This node is not a swarm manager. Manager is being prepared or has trouble connecting to the cluster.")
   830  	}
   831  	return fmt.Errorf("This node is not a swarm manager. Worker nodes can't be used to view or modify cluster state. Please run this command on a manager node or promote the current node to a manager.")
   832  }
   833  
   834  // GetServices returns all services of a managed swarm cluster.
   835  func (c *Cluster) GetServices(options apitypes.ServiceListOptions) ([]types.Service, error) {
   836  	c.RLock()
   837  	defer c.RUnlock()
   838  
   839  	if !c.isActiveManager() {
   840  		return nil, c.errNoManager()
   841  	}
   842  
   843  	filters, err := newListServicesFilters(options.Filter)
   844  	if err != nil {
   845  		return nil, err
   846  	}
   847  	ctx, cancel := c.getRequestContext()
   848  	defer cancel()
   849  
   850  	r, err := c.client.ListServices(
   851  		ctx,
   852  		&swarmapi.ListServicesRequest{Filters: filters})
   853  	if err != nil {
   854  		return nil, err
   855  	}
   856  
   857  	services := []types.Service{}
   858  
   859  	for _, service := range r.Services {
   860  		services = append(services, convert.ServiceFromGRPC(*service))
   861  	}
   862  
   863  	return services, nil
   864  }
   865  
   866  // CreateService creates a new service in a managed swarm cluster.
   867  func (c *Cluster) CreateService(s types.ServiceSpec, encodedAuth string) (string, error) {
   868  	c.RLock()
   869  	defer c.RUnlock()
   870  
   871  	if !c.isActiveManager() {
   872  		return "", c.errNoManager()
   873  	}
   874  
   875  	ctx, cancel := c.getRequestContext()
   876  	defer cancel()
   877  
   878  	err := c.populateNetworkID(ctx, c.client, &s)
   879  	if err != nil {
   880  		return "", err
   881  	}
   882  
   883  	serviceSpec, err := convert.ServiceSpecToGRPC(s)
   884  	if err != nil {
   885  		return "", err
   886  	}
   887  
   888  	if encodedAuth != "" {
   889  		ctnr := serviceSpec.Task.GetContainer()
   890  		if ctnr == nil {
   891  			return "", fmt.Errorf("service does not use container tasks")
   892  		}
   893  		ctnr.PullOptions = &swarmapi.ContainerSpec_PullOptions{RegistryAuth: encodedAuth}
   894  	}
   895  
   896  	r, err := c.client.CreateService(ctx, &swarmapi.CreateServiceRequest{Spec: &serviceSpec})
   897  	if err != nil {
   898  		return "", err
   899  	}
   900  
   901  	return r.Service.ID, nil
   902  }
   903  
   904  // GetService returns a service based on an ID or name.
   905  func (c *Cluster) GetService(input string) (types.Service, error) {
   906  	c.RLock()
   907  	defer c.RUnlock()
   908  
   909  	if !c.isActiveManager() {
   910  		return types.Service{}, c.errNoManager()
   911  	}
   912  
   913  	ctx, cancel := c.getRequestContext()
   914  	defer cancel()
   915  
   916  	service, err := getService(ctx, c.client, input)
   917  	if err != nil {
   918  		return types.Service{}, err
   919  	}
   920  	return convert.ServiceFromGRPC(*service), nil
   921  }
   922  
   923  // UpdateService updates existing service to match new properties.
   924  func (c *Cluster) UpdateService(serviceIDOrName string, version uint64, spec types.ServiceSpec, encodedAuth string, registryAuthFrom string) error {
   925  	c.RLock()
   926  	defer c.RUnlock()
   927  
   928  	if !c.isActiveManager() {
   929  		return c.errNoManager()
   930  	}
   931  
   932  	ctx, cancel := c.getRequestContext()
   933  	defer cancel()
   934  
   935  	err := c.populateNetworkID(ctx, c.client, &spec)
   936  	if err != nil {
   937  		return err
   938  	}
   939  
   940  	serviceSpec, err := convert.ServiceSpecToGRPC(spec)
   941  	if err != nil {
   942  		return err
   943  	}
   944  
   945  	currentService, err := getService(ctx, c.client, serviceIDOrName)
   946  	if err != nil {
   947  		return err
   948  	}
   949  
   950  	if encodedAuth != "" {
   951  		ctnr := serviceSpec.Task.GetContainer()
   952  		if ctnr == nil {
   953  			return fmt.Errorf("service does not use container tasks")
   954  		}
   955  		ctnr.PullOptions = &swarmapi.ContainerSpec_PullOptions{RegistryAuth: encodedAuth}
   956  	} else {
   957  		// this is needed because if the encodedAuth isn't being updated then we
   958  		// shouldn't lose it, and continue to use the one that was already present
   959  		var ctnr *swarmapi.ContainerSpec
   960  		switch registryAuthFrom {
   961  		case apitypes.RegistryAuthFromSpec, "":
   962  			ctnr = currentService.Spec.Task.GetContainer()
   963  		case apitypes.RegistryAuthFromPreviousSpec:
   964  			if currentService.PreviousSpec == nil {
   965  				return fmt.Errorf("service does not have a previous spec")
   966  			}
   967  			ctnr = currentService.PreviousSpec.Task.GetContainer()
   968  		default:
   969  			return fmt.Errorf("unsupported registryAuthFromValue")
   970  		}
   971  		if ctnr == nil {
   972  			return fmt.Errorf("service does not use container tasks")
   973  		}
   974  		serviceSpec.Task.GetContainer().PullOptions = ctnr.PullOptions
   975  	}
   976  
   977  	_, err = c.client.UpdateService(
   978  		ctx,
   979  		&swarmapi.UpdateServiceRequest{
   980  			ServiceID: currentService.ID,
   981  			Spec:      &serviceSpec,
   982  			ServiceVersion: &swarmapi.Version{
   983  				Index: version,
   984  			},
   985  		},
   986  	)
   987  	return err
   988  }
   989  
   990  // RemoveService removes a service from a managed swarm cluster.
   991  func (c *Cluster) RemoveService(input string) error {
   992  	c.RLock()
   993  	defer c.RUnlock()
   994  
   995  	if !c.isActiveManager() {
   996  		return c.errNoManager()
   997  	}
   998  
   999  	ctx, cancel := c.getRequestContext()
  1000  	defer cancel()
  1001  
  1002  	service, err := getService(ctx, c.client, input)
  1003  	if err != nil {
  1004  		return err
  1005  	}
  1006  
  1007  	if _, err := c.client.RemoveService(ctx, &swarmapi.RemoveServiceRequest{ServiceID: service.ID}); err != nil {
  1008  		return err
  1009  	}
  1010  	return nil
  1011  }
  1012  
  1013  // GetNodes returns a list of all nodes known to a cluster.
  1014  func (c *Cluster) GetNodes(options apitypes.NodeListOptions) ([]types.Node, error) {
  1015  	c.RLock()
  1016  	defer c.RUnlock()
  1017  
  1018  	if !c.isActiveManager() {
  1019  		return nil, c.errNoManager()
  1020  	}
  1021  
  1022  	filters, err := newListNodesFilters(options.Filter)
  1023  	if err != nil {
  1024  		return nil, err
  1025  	}
  1026  
  1027  	ctx, cancel := c.getRequestContext()
  1028  	defer cancel()
  1029  
  1030  	r, err := c.client.ListNodes(
  1031  		ctx,
  1032  		&swarmapi.ListNodesRequest{Filters: filters})
  1033  	if err != nil {
  1034  		return nil, err
  1035  	}
  1036  
  1037  	nodes := []types.Node{}
  1038  
  1039  	for _, node := range r.Nodes {
  1040  		nodes = append(nodes, convert.NodeFromGRPC(*node))
  1041  	}
  1042  	return nodes, nil
  1043  }
  1044  
  1045  // GetNode returns a node based on an ID or name.
  1046  func (c *Cluster) GetNode(input string) (types.Node, error) {
  1047  	c.RLock()
  1048  	defer c.RUnlock()
  1049  
  1050  	if !c.isActiveManager() {
  1051  		return types.Node{}, c.errNoManager()
  1052  	}
  1053  
  1054  	ctx, cancel := c.getRequestContext()
  1055  	defer cancel()
  1056  
  1057  	node, err := getNode(ctx, c.client, input)
  1058  	if err != nil {
  1059  		return types.Node{}, err
  1060  	}
  1061  	return convert.NodeFromGRPC(*node), nil
  1062  }
  1063  
  1064  // UpdateNode updates existing nodes properties.
  1065  func (c *Cluster) UpdateNode(nodeID string, version uint64, spec types.NodeSpec) error {
  1066  	c.RLock()
  1067  	defer c.RUnlock()
  1068  
  1069  	if !c.isActiveManager() {
  1070  		return c.errNoManager()
  1071  	}
  1072  
  1073  	nodeSpec, err := convert.NodeSpecToGRPC(spec)
  1074  	if err != nil {
  1075  		return err
  1076  	}
  1077  
  1078  	ctx, cancel := c.getRequestContext()
  1079  	defer cancel()
  1080  
  1081  	_, err = c.client.UpdateNode(
  1082  		ctx,
  1083  		&swarmapi.UpdateNodeRequest{
  1084  			NodeID: nodeID,
  1085  			Spec:   &nodeSpec,
  1086  			NodeVersion: &swarmapi.Version{
  1087  				Index: version,
  1088  			},
  1089  		},
  1090  	)
  1091  	return err
  1092  }
  1093  
  1094  // RemoveNode removes a node from a cluster
  1095  func (c *Cluster) RemoveNode(input string, force bool) error {
  1096  	c.RLock()
  1097  	defer c.RUnlock()
  1098  
  1099  	if !c.isActiveManager() {
  1100  		return c.errNoManager()
  1101  	}
  1102  
  1103  	ctx, cancel := c.getRequestContext()
  1104  	defer cancel()
  1105  
  1106  	node, err := getNode(ctx, c.client, input)
  1107  	if err != nil {
  1108  		return err
  1109  	}
  1110  
  1111  	if _, err := c.client.RemoveNode(ctx, &swarmapi.RemoveNodeRequest{NodeID: node.ID, Force: force}); err != nil {
  1112  		return err
  1113  	}
  1114  	return nil
  1115  }
  1116  
  1117  // GetTasks returns a list of tasks matching the filter options.
  1118  func (c *Cluster) GetTasks(options apitypes.TaskListOptions) ([]types.Task, error) {
  1119  	c.RLock()
  1120  	defer c.RUnlock()
  1121  
  1122  	if !c.isActiveManager() {
  1123  		return nil, c.errNoManager()
  1124  	}
  1125  
  1126  	byName := func(filter filters.Args) error {
  1127  		if filter.Include("service") {
  1128  			serviceFilters := filter.Get("service")
  1129  			for _, serviceFilter := range serviceFilters {
  1130  				service, err := c.GetService(serviceFilter)
  1131  				if err != nil {
  1132  					return err
  1133  				}
  1134  				filter.Del("service", serviceFilter)
  1135  				filter.Add("service", service.ID)
  1136  			}
  1137  		}
  1138  		if filter.Include("node") {
  1139  			nodeFilters := filter.Get("node")
  1140  			for _, nodeFilter := range nodeFilters {
  1141  				node, err := c.GetNode(nodeFilter)
  1142  				if err != nil {
  1143  					return err
  1144  				}
  1145  				filter.Del("node", nodeFilter)
  1146  				filter.Add("node", node.ID)
  1147  			}
  1148  		}
  1149  		return nil
  1150  	}
  1151  
  1152  	filters, err := newListTasksFilters(options.Filter, byName)
  1153  	if err != nil {
  1154  		return nil, err
  1155  	}
  1156  
  1157  	ctx, cancel := c.getRequestContext()
  1158  	defer cancel()
  1159  
  1160  	r, err := c.client.ListTasks(
  1161  		ctx,
  1162  		&swarmapi.ListTasksRequest{Filters: filters})
  1163  	if err != nil {
  1164  		return nil, err
  1165  	}
  1166  
  1167  	tasks := []types.Task{}
  1168  
  1169  	for _, task := range r.Tasks {
  1170  		if task.Spec.GetContainer() != nil {
  1171  			tasks = append(tasks, convert.TaskFromGRPC(*task))
  1172  		}
  1173  	}
  1174  	return tasks, nil
  1175  }
  1176  
  1177  // GetTask returns a task by an ID.
  1178  func (c *Cluster) GetTask(input string) (types.Task, error) {
  1179  	c.RLock()
  1180  	defer c.RUnlock()
  1181  
  1182  	if !c.isActiveManager() {
  1183  		return types.Task{}, c.errNoManager()
  1184  	}
  1185  
  1186  	ctx, cancel := c.getRequestContext()
  1187  	defer cancel()
  1188  
  1189  	task, err := getTask(ctx, c.client, input)
  1190  	if err != nil {
  1191  		return types.Task{}, err
  1192  	}
  1193  	return convert.TaskFromGRPC(*task), nil
  1194  }
  1195  
  1196  // GetNetwork returns a cluster network by an ID.
  1197  func (c *Cluster) GetNetwork(input string) (apitypes.NetworkResource, error) {
  1198  	c.RLock()
  1199  	defer c.RUnlock()
  1200  
  1201  	if !c.isActiveManager() {
  1202  		return apitypes.NetworkResource{}, c.errNoManager()
  1203  	}
  1204  
  1205  	ctx, cancel := c.getRequestContext()
  1206  	defer cancel()
  1207  
  1208  	network, err := getNetwork(ctx, c.client, input)
  1209  	if err != nil {
  1210  		return apitypes.NetworkResource{}, err
  1211  	}
  1212  	return convert.BasicNetworkFromGRPC(*network), nil
  1213  }
  1214  
  1215  // GetNetworks returns all current cluster managed networks.
  1216  func (c *Cluster) GetNetworks() ([]apitypes.NetworkResource, error) {
  1217  	c.RLock()
  1218  	defer c.RUnlock()
  1219  
  1220  	if !c.isActiveManager() {
  1221  		return nil, c.errNoManager()
  1222  	}
  1223  
  1224  	ctx, cancel := c.getRequestContext()
  1225  	defer cancel()
  1226  
  1227  	r, err := c.client.ListNetworks(ctx, &swarmapi.ListNetworksRequest{})
  1228  	if err != nil {
  1229  		return nil, err
  1230  	}
  1231  
  1232  	var networks []apitypes.NetworkResource
  1233  
  1234  	for _, network := range r.Networks {
  1235  		networks = append(networks, convert.BasicNetworkFromGRPC(*network))
  1236  	}
  1237  
  1238  	return networks, nil
  1239  }
  1240  
  1241  func attacherKey(target, containerID string) string {
  1242  	return containerID + ":" + target
  1243  }
  1244  
  1245  // UpdateAttachment signals the attachment config to the attachment
  1246  // waiter who is trying to start or attach the container to the
  1247  // network.
  1248  func (c *Cluster) UpdateAttachment(target, containerID string, config *network.NetworkingConfig) error {
  1249  	c.RLock()
  1250  	attacher, ok := c.attachers[attacherKey(target, containerID)]
  1251  	c.RUnlock()
  1252  	if !ok || attacher == nil {
  1253  		return fmt.Errorf("could not find attacher for container %s to network %s", containerID, target)
  1254  	}
  1255  
  1256  	attacher.attachWaitCh <- config
  1257  	close(attacher.attachWaitCh)
  1258  	return nil
  1259  }
  1260  
  1261  // WaitForDetachment waits for the container to stop or detach from
  1262  // the network.
  1263  func (c *Cluster) WaitForDetachment(ctx context.Context, networkName, networkID, taskID, containerID string) error {
  1264  	c.RLock()
  1265  	attacher, ok := c.attachers[attacherKey(networkName, containerID)]
  1266  	if !ok {
  1267  		attacher, ok = c.attachers[attacherKey(networkID, containerID)]
  1268  	}
  1269  	if c.node == nil || c.node.Agent() == nil {
  1270  		c.RUnlock()
  1271  		return fmt.Errorf("invalid cluster node while waiting for detachment")
  1272  	}
  1273  
  1274  	agent := c.node.Agent()
  1275  	c.RUnlock()
  1276  
  1277  	if ok && attacher != nil &&
  1278  		attacher.detachWaitCh != nil &&
  1279  		attacher.attachCompleteCh != nil {
  1280  		// Attachment may be in progress still so wait for
  1281  		// attachment to complete.
  1282  		select {
  1283  		case <-attacher.attachCompleteCh:
  1284  		case <-ctx.Done():
  1285  			return ctx.Err()
  1286  		}
  1287  
  1288  		if attacher.taskID == taskID {
  1289  			select {
  1290  			case <-attacher.detachWaitCh:
  1291  			case <-ctx.Done():
  1292  				return ctx.Err()
  1293  			}
  1294  		}
  1295  	}
  1296  
  1297  	return agent.ResourceAllocator().DetachNetwork(ctx, taskID)
  1298  }
  1299  
  1300  // AttachNetwork generates an attachment request towards the manager.
  1301  func (c *Cluster) AttachNetwork(target string, containerID string, addresses []string) (*network.NetworkingConfig, error) {
  1302  	aKey := attacherKey(target, containerID)
  1303  	c.Lock()
  1304  	if c.node == nil || c.node.Agent() == nil {
  1305  		c.Unlock()
  1306  		return nil, fmt.Errorf("invalid cluster node while attaching to network")
  1307  	}
  1308  	if attacher, ok := c.attachers[aKey]; ok {
  1309  		c.Unlock()
  1310  		return attacher.config, nil
  1311  	}
  1312  
  1313  	agent := c.node.Agent()
  1314  	attachWaitCh := make(chan *network.NetworkingConfig)
  1315  	detachWaitCh := make(chan struct{})
  1316  	attachCompleteCh := make(chan struct{})
  1317  	c.attachers[aKey] = &attacher{
  1318  		attachWaitCh:     attachWaitCh,
  1319  		attachCompleteCh: attachCompleteCh,
  1320  		detachWaitCh:     detachWaitCh,
  1321  	}
  1322  	c.Unlock()
  1323  
  1324  	ctx, cancel := c.getRequestContext()
  1325  	defer cancel()
  1326  
  1327  	taskID, err := agent.ResourceAllocator().AttachNetwork(ctx, containerID, target, addresses)
  1328  	if err != nil {
  1329  		c.Lock()
  1330  		delete(c.attachers, aKey)
  1331  		c.Unlock()
  1332  		return nil, fmt.Errorf("Could not attach to network %s: %v", target, err)
  1333  	}
  1334  
  1335  	c.Lock()
  1336  	c.attachers[aKey].taskID = taskID
  1337  	close(attachCompleteCh)
  1338  	c.Unlock()
  1339  
  1340  	logrus.Debugf("Successfully attached to network %s with tid %s", target, taskID)
  1341  
  1342  	var config *network.NetworkingConfig
  1343  	select {
  1344  	case config = <-attachWaitCh:
  1345  	case <-ctx.Done():
  1346  		return nil, fmt.Errorf("attaching to network failed, make sure your network options are correct and check manager logs: %v", ctx.Err())
  1347  	}
  1348  
  1349  	c.Lock()
  1350  	c.attachers[aKey].config = config
  1351  	c.Unlock()
  1352  	return config, nil
  1353  }
  1354  
  1355  // DetachNetwork unblocks the waiters waiting on WaitForDetachment so
  1356  // that a request to detach can be generated towards the manager.
  1357  func (c *Cluster) DetachNetwork(target string, containerID string) error {
  1358  	aKey := attacherKey(target, containerID)
  1359  
  1360  	c.Lock()
  1361  	attacher, ok := c.attachers[aKey]
  1362  	delete(c.attachers, aKey)
  1363  	c.Unlock()
  1364  
  1365  	if !ok {
  1366  		return fmt.Errorf("could not find network attachment for container %s to network %s", containerID, target)
  1367  	}
  1368  
  1369  	close(attacher.detachWaitCh)
  1370  	return nil
  1371  }
  1372  
  1373  // CreateNetwork creates a new cluster managed network.
  1374  func (c *Cluster) CreateNetwork(s apitypes.NetworkCreateRequest) (string, error) {
  1375  	c.RLock()
  1376  	defer c.RUnlock()
  1377  
  1378  	if !c.isActiveManager() {
  1379  		return "", c.errNoManager()
  1380  	}
  1381  
  1382  	if runconfig.IsPreDefinedNetwork(s.Name) {
  1383  		err := fmt.Errorf("%s is a pre-defined network and cannot be created", s.Name)
  1384  		return "", errors.NewRequestForbiddenError(err)
  1385  	}
  1386  
  1387  	ctx, cancel := c.getRequestContext()
  1388  	defer cancel()
  1389  
  1390  	networkSpec := convert.BasicNetworkCreateToGRPC(s)
  1391  	r, err := c.client.CreateNetwork(ctx, &swarmapi.CreateNetworkRequest{Spec: &networkSpec})
  1392  	if err != nil {
  1393  		return "", err
  1394  	}
  1395  
  1396  	return r.Network.ID, nil
  1397  }
  1398  
  1399  // RemoveNetwork removes a cluster network.
  1400  func (c *Cluster) RemoveNetwork(input string) error {
  1401  	c.RLock()
  1402  	defer c.RUnlock()
  1403  
  1404  	if !c.isActiveManager() {
  1405  		return c.errNoManager()
  1406  	}
  1407  
  1408  	ctx, cancel := c.getRequestContext()
  1409  	defer cancel()
  1410  
  1411  	network, err := getNetwork(ctx, c.client, input)
  1412  	if err != nil {
  1413  		return err
  1414  	}
  1415  
  1416  	if _, err := c.client.RemoveNetwork(ctx, &swarmapi.RemoveNetworkRequest{NetworkID: network.ID}); err != nil {
  1417  		return err
  1418  	}
  1419  	return nil
  1420  }
  1421  
  1422  func (c *Cluster) populateNetworkID(ctx context.Context, client swarmapi.ControlClient, s *types.ServiceSpec) error {
  1423  	// Always prefer NetworkAttachmentConfigs from TaskTemplate
  1424  	// but fallback to service spec for backward compatibility
  1425  	networks := s.TaskTemplate.Networks
  1426  	if len(networks) == 0 {
  1427  		networks = s.Networks
  1428  	}
  1429  
  1430  	for i, n := range networks {
  1431  		apiNetwork, err := getNetwork(ctx, client, n.Target)
  1432  		if err != nil {
  1433  			if ln, _ := c.config.Backend.FindNetwork(n.Target); ln != nil && !ln.Info().Dynamic() {
  1434  				err = fmt.Errorf("network %s is not eligible for docker services", ln.Name())
  1435  				return errors.NewRequestForbiddenError(err)
  1436  			}
  1437  			return err
  1438  		}
  1439  		networks[i].Target = apiNetwork.ID
  1440  	}
  1441  	return nil
  1442  }
  1443  
  1444  func getNetwork(ctx context.Context, c swarmapi.ControlClient, input string) (*swarmapi.Network, error) {
  1445  	// GetNetwork to match via full ID.
  1446  	rg, err := c.GetNetwork(ctx, &swarmapi.GetNetworkRequest{NetworkID: input})
  1447  	if err != nil {
  1448  		// If any error (including NotFound), ListNetworks to match via ID prefix and full name.
  1449  		rl, err := c.ListNetworks(ctx, &swarmapi.ListNetworksRequest{Filters: &swarmapi.ListNetworksRequest_Filters{Names: []string{input}}})
  1450  		if err != nil || len(rl.Networks) == 0 {
  1451  			rl, err = c.ListNetworks(ctx, &swarmapi.ListNetworksRequest{Filters: &swarmapi.ListNetworksRequest_Filters{IDPrefixes: []string{input}}})
  1452  		}
  1453  
  1454  		if err != nil {
  1455  			return nil, err
  1456  		}
  1457  
  1458  		if len(rl.Networks) == 0 {
  1459  			return nil, fmt.Errorf("network %s not found", input)
  1460  		}
  1461  
  1462  		if l := len(rl.Networks); l > 1 {
  1463  			return nil, fmt.Errorf("network %s is ambiguous (%d matches found)", input, l)
  1464  		}
  1465  
  1466  		return rl.Networks[0], nil
  1467  	}
  1468  	return rg.Network, nil
  1469  }
  1470  
  1471  // Cleanup stops active swarm node. This is run before daemon shutdown.
  1472  func (c *Cluster) Cleanup() {
  1473  	c.Lock()
  1474  	node := c.node
  1475  	if node == nil {
  1476  		c.Unlock()
  1477  		return
  1478  	}
  1479  	defer c.Unlock()
  1480  	if c.isActiveManager() {
  1481  		active, reachable, unreachable, err := c.managerStats()
  1482  		if err == nil {
  1483  			singlenode := active && isLastManager(reachable, unreachable)
  1484  			if active && !singlenode && removingManagerCausesLossOfQuorum(reachable, unreachable) {
  1485  				logrus.Errorf("Leaving cluster with %v managers left out of %v. Raft quorum will be lost.", reachable-1, reachable+unreachable)
  1486  			}
  1487  		}
  1488  	}
  1489  	c.stopNode()
  1490  }
  1491  
  1492  func (c *Cluster) managerStats() (current bool, reachable int, unreachable int, err error) {
  1493  	ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
  1494  	defer cancel()
  1495  	nodes, err := c.client.ListNodes(ctx, &swarmapi.ListNodesRequest{})
  1496  	if err != nil {
  1497  		return false, 0, 0, err
  1498  	}
  1499  	for _, n := range nodes.Nodes {
  1500  		if n.ManagerStatus != nil {
  1501  			if n.ManagerStatus.Reachability == swarmapi.RaftMemberStatus_REACHABLE {
  1502  				reachable++
  1503  				if n.ID == c.node.NodeID() {
  1504  					current = true
  1505  				}
  1506  			}
  1507  			if n.ManagerStatus.Reachability == swarmapi.RaftMemberStatus_UNREACHABLE {
  1508  				unreachable++
  1509  			}
  1510  		}
  1511  	}
  1512  	return
  1513  }
  1514  
  1515  func validateAndSanitizeInitRequest(req *types.InitRequest) error {
  1516  	var err error
  1517  	req.ListenAddr, err = validateAddr(req.ListenAddr)
  1518  	if err != nil {
  1519  		return fmt.Errorf("invalid ListenAddr %q: %v", req.ListenAddr, err)
  1520  	}
  1521  
  1522  	return nil
  1523  }
  1524  
  1525  func validateAndSanitizeJoinRequest(req *types.JoinRequest) error {
  1526  	var err error
  1527  	req.ListenAddr, err = validateAddr(req.ListenAddr)
  1528  	if err != nil {
  1529  		return fmt.Errorf("invalid ListenAddr %q: %v", req.ListenAddr, err)
  1530  	}
  1531  	if len(req.RemoteAddrs) == 0 {
  1532  		return fmt.Errorf("at least 1 RemoteAddr is required to join")
  1533  	}
  1534  	for i := range req.RemoteAddrs {
  1535  		req.RemoteAddrs[i], err = validateAddr(req.RemoteAddrs[i])
  1536  		if err != nil {
  1537  			return fmt.Errorf("invalid remoteAddr %q: %v", req.RemoteAddrs[i], err)
  1538  		}
  1539  	}
  1540  	return nil
  1541  }
  1542  
  1543  func validateAddr(addr string) (string, error) {
  1544  	if addr == "" {
  1545  		return addr, fmt.Errorf("invalid empty address")
  1546  	}
  1547  	newaddr, err := opts.ParseTCPAddr(addr, defaultAddr)
  1548  	if err != nil {
  1549  		return addr, nil
  1550  	}
  1551  	return strings.TrimPrefix(newaddr, "tcp://"), nil
  1552  }
  1553  
  1554  func initClusterSpec(node *node, spec types.Spec) error {
  1555  	ctx, _ := context.WithTimeout(context.Background(), 5*time.Second)
  1556  	for conn := range node.ListenControlSocket(ctx) {
  1557  		if ctx.Err() != nil {
  1558  			return ctx.Err()
  1559  		}
  1560  		if conn != nil {
  1561  			client := swarmapi.NewControlClient(conn)
  1562  			var cluster *swarmapi.Cluster
  1563  			for i := 0; ; i++ {
  1564  				lcr, err := client.ListClusters(ctx, &swarmapi.ListClustersRequest{})
  1565  				if err != nil {
  1566  					return fmt.Errorf("error on listing clusters: %v", err)
  1567  				}
  1568  				if len(lcr.Clusters) == 0 {
  1569  					if i < 10 {
  1570  						time.Sleep(200 * time.Millisecond)
  1571  						continue
  1572  					}
  1573  					return fmt.Errorf("empty list of clusters was returned")
  1574  				}
  1575  				cluster = lcr.Clusters[0]
  1576  				break
  1577  			}
  1578  			// In init, we take the initial default values from swarmkit, and merge
  1579  			// any non nil or 0 value from spec to GRPC spec. This will leave the
  1580  			// default value alone.
  1581  			// Note that this is different from Update(), as in Update() we expect
  1582  			// user to specify the complete spec of the cluster (as they already know
  1583  			// the existing one and knows which field to update)
  1584  			clusterSpec, err := convert.MergeSwarmSpecToGRPC(spec, cluster.Spec)
  1585  			if err != nil {
  1586  				return fmt.Errorf("error updating cluster settings: %v", err)
  1587  			}
  1588  			_, err = client.UpdateCluster(ctx, &swarmapi.UpdateClusterRequest{
  1589  				ClusterID:      cluster.ID,
  1590  				ClusterVersion: &cluster.Meta.Version,
  1591  				Spec:           &clusterSpec,
  1592  			})
  1593  			if err != nil {
  1594  				return fmt.Errorf("error updating cluster settings: %v", err)
  1595  			}
  1596  			return nil
  1597  		}
  1598  	}
  1599  	return ctx.Err()
  1600  }