github.com/vieux/docker@v0.6.3-0.20161004191708-e097c2a938c7/daemon/cluster/cluster.go (about)

     1  package cluster
     2  
     3  import (
     4  	"encoding/json"
     5  	"fmt"
     6  	"io/ioutil"
     7  	"net"
     8  	"os"
     9  	"path/filepath"
    10  	"strings"
    11  	"sync"
    12  	"time"
    13  
    14  	"google.golang.org/grpc"
    15  
    16  	"github.com/Sirupsen/logrus"
    17  	"github.com/docker/docker/api/errors"
    18  	apitypes "github.com/docker/docker/api/types"
    19  	"github.com/docker/docker/api/types/filters"
    20  	"github.com/docker/docker/api/types/network"
    21  	types "github.com/docker/docker/api/types/swarm"
    22  	"github.com/docker/docker/daemon/cluster/convert"
    23  	executorpkg "github.com/docker/docker/daemon/cluster/executor"
    24  	"github.com/docker/docker/daemon/cluster/executor/container"
    25  	"github.com/docker/docker/opts"
    26  	"github.com/docker/docker/pkg/ioutils"
    27  	"github.com/docker/docker/pkg/signal"
    28  	"github.com/docker/docker/runconfig"
    29  	swarmagent "github.com/docker/swarmkit/agent"
    30  	swarmapi "github.com/docker/swarmkit/api"
    31  	"golang.org/x/net/context"
    32  )
    33  
    34  const swarmDirName = "swarm"
    35  const controlSocket = "control.sock"
    36  const swarmConnectTimeout = 20 * time.Second
    37  const swarmRequestTimeout = 20 * time.Second
    38  const stateFile = "docker-state.json"
    39  const defaultAddr = "0.0.0.0:2377"
    40  
    41  const (
    42  	initialReconnectDelay = 100 * time.Millisecond
    43  	maxReconnectDelay     = 30 * time.Second
    44  )
    45  
    46  // ErrNoSwarm is returned on leaving a cluster that was never initialized
    47  var ErrNoSwarm = fmt.Errorf("This node is not part of a swarm")
    48  
    49  // ErrSwarmExists is returned on initialize or join request for a cluster that has already been activated
    50  var ErrSwarmExists = fmt.Errorf("This node is already part of a swarm. Use \"docker swarm leave\" to leave this swarm and join another one.")
    51  
    52  // ErrPendingSwarmExists is returned on initialize or join request for a cluster that is already processing a similar request but has not succeeded yet.
    53  var ErrPendingSwarmExists = fmt.Errorf("This node is processing an existing join request that has not succeeded yet. Use \"docker swarm leave\" to cancel the current request.")
    54  
    55  // ErrSwarmJoinTimeoutReached is returned when cluster join could not complete before timeout was reached.
    56  var ErrSwarmJoinTimeoutReached = fmt.Errorf("Timeout was reached before node was joined. The attempt to join the swarm will continue in the background. Use the \"docker info\" command to see the current swarm status of your node.")
    57  
    58  type state struct {
    59  	// LocalAddr is this machine's local IP or hostname, if specified.
    60  	LocalAddr string
    61  	// RemoteAddr is the address that was given to "swarm join. It is used
    62  	// to find LocalAddr if necessary.
    63  	RemoteAddr string
    64  	// ListenAddr is the address we bind to, including a port.
    65  	ListenAddr string
    66  	// AdvertiseAddr is the address other nodes should connect to,
    67  	// including a port.
    68  	AdvertiseAddr string
    69  }
    70  
    71  // NetworkSubnetsProvider exposes functions for retrieving the subnets
    72  // of networks managed by Docker, so they can be filtered.
    73  type NetworkSubnetsProvider interface {
    74  	V4Subnets() []net.IPNet
    75  	V6Subnets() []net.IPNet
    76  }
    77  
    78  // Config provides values for Cluster.
    79  type Config struct {
    80  	Root                   string
    81  	Name                   string
    82  	Backend                executorpkg.Backend
    83  	NetworkSubnetsProvider NetworkSubnetsProvider
    84  
    85  	// DefaultAdvertiseAddr is the default host/IP or network interface to use
    86  	// if no AdvertiseAddr value is specified.
    87  	DefaultAdvertiseAddr string
    88  
    89  	// path to store runtime state, such as the swarm control socket
    90  	RuntimeRoot string
    91  }
    92  
    93  // Cluster provides capabilities to participate in a cluster as a worker or a
    94  // manager.
    95  type Cluster struct {
    96  	sync.RWMutex
    97  	*node
    98  	root            string
    99  	runtimeRoot     string
   100  	config          Config
   101  	configEvent     chan struct{} // todo: make this array and goroutine safe
   102  	localAddr       string
   103  	actualLocalAddr string // after resolution, not persisted
   104  	remoteAddr      string
   105  	listenAddr      string
   106  	advertiseAddr   string
   107  	stop            bool
   108  	err             error
   109  	cancelDelay     func()
   110  	attachers       map[string]*attacher
   111  }
   112  
   113  // attacher manages the in-memory attachment state of a container
   114  // attachment to a global scope network managed by swarm manager. It
   115  // helps in identifying the attachment ID via the taskID and the
   116  // corresponding attachment configuration obtained from the manager.
   117  type attacher struct {
   118  	taskID           string
   119  	config           *network.NetworkingConfig
   120  	attachWaitCh     chan *network.NetworkingConfig
   121  	attachCompleteCh chan struct{}
   122  	detachWaitCh     chan struct{}
   123  }
   124  
   125  type node struct {
   126  	*swarmagent.Node
   127  	done           chan struct{}
   128  	ready          bool
   129  	conn           *grpc.ClientConn
   130  	client         swarmapi.ControlClient
   131  	reconnectDelay time.Duration
   132  }
   133  
   134  // New creates a new Cluster instance using provided config.
   135  func New(config Config) (*Cluster, error) {
   136  	root := filepath.Join(config.Root, swarmDirName)
   137  	if err := os.MkdirAll(root, 0700); err != nil {
   138  		return nil, err
   139  	}
   140  	if config.RuntimeRoot == "" {
   141  		config.RuntimeRoot = root
   142  	}
   143  	if err := os.MkdirAll(config.RuntimeRoot, 0700); err != nil {
   144  		return nil, err
   145  	}
   146  	c := &Cluster{
   147  		root:        root,
   148  		config:      config,
   149  		configEvent: make(chan struct{}, 10),
   150  		runtimeRoot: config.RuntimeRoot,
   151  		attachers:   make(map[string]*attacher),
   152  	}
   153  
   154  	st, err := c.loadState()
   155  	if err != nil {
   156  		if os.IsNotExist(err) {
   157  			return c, nil
   158  		}
   159  		return nil, err
   160  	}
   161  
   162  	n, err := c.startNewNode(false, st.LocalAddr, st.RemoteAddr, st.ListenAddr, st.AdvertiseAddr, "", "")
   163  	if err != nil {
   164  		return nil, err
   165  	}
   166  
   167  	select {
   168  	case <-time.After(swarmConnectTimeout):
   169  		logrus.Errorf("swarm component could not be started before timeout was reached")
   170  	case <-n.Ready():
   171  	case <-n.done:
   172  		return nil, fmt.Errorf("swarm component could not be started: %v", c.err)
   173  	}
   174  	go c.reconnectOnFailure(n)
   175  	return c, nil
   176  }
   177  
   178  func (c *Cluster) loadState() (*state, error) {
   179  	dt, err := ioutil.ReadFile(filepath.Join(c.root, stateFile))
   180  	if err != nil {
   181  		return nil, err
   182  	}
   183  	// missing certificate means no actual state to restore from
   184  	if _, err := os.Stat(filepath.Join(c.root, "certificates/swarm-node.crt")); err != nil {
   185  		if os.IsNotExist(err) {
   186  			c.clearState()
   187  		}
   188  		return nil, err
   189  	}
   190  	var st state
   191  	if err := json.Unmarshal(dt, &st); err != nil {
   192  		return nil, err
   193  	}
   194  	return &st, nil
   195  }
   196  
   197  func (c *Cluster) saveState() error {
   198  	dt, err := json.Marshal(state{
   199  		LocalAddr:     c.localAddr,
   200  		RemoteAddr:    c.remoteAddr,
   201  		ListenAddr:    c.listenAddr,
   202  		AdvertiseAddr: c.advertiseAddr,
   203  	})
   204  	if err != nil {
   205  		return err
   206  	}
   207  	return ioutils.AtomicWriteFile(filepath.Join(c.root, stateFile), dt, 0600)
   208  }
   209  
   210  func (c *Cluster) reconnectOnFailure(n *node) {
   211  	for {
   212  		<-n.done
   213  		c.Lock()
   214  		if c.stop || c.node != nil {
   215  			c.Unlock()
   216  			return
   217  		}
   218  		n.reconnectDelay *= 2
   219  		if n.reconnectDelay > maxReconnectDelay {
   220  			n.reconnectDelay = maxReconnectDelay
   221  		}
   222  		logrus.Warnf("Restarting swarm in %.2f seconds", n.reconnectDelay.Seconds())
   223  		delayCtx, cancel := context.WithTimeout(context.Background(), n.reconnectDelay)
   224  		c.cancelDelay = cancel
   225  		c.Unlock()
   226  		<-delayCtx.Done()
   227  		if delayCtx.Err() != context.DeadlineExceeded {
   228  			return
   229  		}
   230  		c.Lock()
   231  		if c.node != nil {
   232  			c.Unlock()
   233  			return
   234  		}
   235  		var err error
   236  		n, err = c.startNewNode(false, c.localAddr, c.getRemoteAddress(), c.listenAddr, c.advertiseAddr, c.getRemoteAddress(), "")
   237  		if err != nil {
   238  			c.err = err
   239  			close(n.done)
   240  		}
   241  		c.Unlock()
   242  	}
   243  }
   244  
   245  func (c *Cluster) startNewNode(forceNewCluster bool, localAddr, remoteAddr, listenAddr, advertiseAddr, joinAddr, joinToken string) (*node, error) {
   246  	if err := c.config.Backend.IsSwarmCompatible(); err != nil {
   247  		return nil, err
   248  	}
   249  
   250  	actualLocalAddr := localAddr
   251  	if actualLocalAddr == "" {
   252  		// If localAddr was not specified, resolve it automatically
   253  		// based on the route to joinAddr. localAddr can only be left
   254  		// empty on "join".
   255  		listenHost, _, err := net.SplitHostPort(listenAddr)
   256  		if err != nil {
   257  			return nil, fmt.Errorf("could not parse listen address: %v", err)
   258  		}
   259  
   260  		listenAddrIP := net.ParseIP(listenHost)
   261  		if listenAddrIP == nil || !listenAddrIP.IsUnspecified() {
   262  			actualLocalAddr = listenHost
   263  		} else {
   264  			if remoteAddr == "" {
   265  				// Should never happen except using swarms created by
   266  				// old versions that didn't save remoteAddr.
   267  				remoteAddr = "8.8.8.8:53"
   268  			}
   269  			conn, err := net.Dial("udp", remoteAddr)
   270  			if err != nil {
   271  				return nil, fmt.Errorf("could not find local IP address: %v", err)
   272  			}
   273  			localHostPort := conn.LocalAddr().String()
   274  			actualLocalAddr, _, _ = net.SplitHostPort(localHostPort)
   275  			conn.Close()
   276  		}
   277  	}
   278  
   279  	c.node = nil
   280  	c.cancelDelay = nil
   281  	c.stop = false
   282  	n, err := swarmagent.NewNode(&swarmagent.NodeConfig{
   283  		Hostname:           c.config.Name,
   284  		ForceNewCluster:    forceNewCluster,
   285  		ListenControlAPI:   filepath.Join(c.runtimeRoot, controlSocket),
   286  		ListenRemoteAPI:    listenAddr,
   287  		AdvertiseRemoteAPI: advertiseAddr,
   288  		JoinAddr:           joinAddr,
   289  		StateDir:           c.root,
   290  		JoinToken:          joinToken,
   291  		Executor:           container.NewExecutor(c.config.Backend),
   292  		HeartbeatTick:      1,
   293  		ElectionTick:       3,
   294  	})
   295  	if err != nil {
   296  		return nil, err
   297  	}
   298  	ctx := context.Background()
   299  	if err := n.Start(ctx); err != nil {
   300  		return nil, err
   301  	}
   302  	node := &node{
   303  		Node:           n,
   304  		done:           make(chan struct{}),
   305  		reconnectDelay: initialReconnectDelay,
   306  	}
   307  	c.node = node
   308  	c.localAddr = localAddr
   309  	c.actualLocalAddr = actualLocalAddr // not saved
   310  	c.remoteAddr = remoteAddr
   311  	c.listenAddr = listenAddr
   312  	c.advertiseAddr = advertiseAddr
   313  	c.saveState()
   314  
   315  	c.config.Backend.SetClusterProvider(c)
   316  	go func() {
   317  		err := n.Err(ctx)
   318  		if err != nil {
   319  			logrus.Errorf("cluster exited with error: %v", err)
   320  		}
   321  		c.Lock()
   322  		c.node = nil
   323  		c.err = err
   324  		c.Unlock()
   325  		close(node.done)
   326  	}()
   327  
   328  	go func() {
   329  		select {
   330  		case <-n.Ready():
   331  			c.Lock()
   332  			node.ready = true
   333  			c.err = nil
   334  			c.Unlock()
   335  		case <-ctx.Done():
   336  		}
   337  		c.configEvent <- struct{}{}
   338  	}()
   339  
   340  	go func() {
   341  		for conn := range n.ListenControlSocket(ctx) {
   342  			c.Lock()
   343  			if node.conn != conn {
   344  				if conn == nil {
   345  					node.client = nil
   346  				} else {
   347  					node.client = swarmapi.NewControlClient(conn)
   348  				}
   349  			}
   350  			node.conn = conn
   351  			c.Unlock()
   352  			c.configEvent <- struct{}{}
   353  		}
   354  	}()
   355  
   356  	return node, nil
   357  }
   358  
   359  // Init initializes new cluster from user provided request.
   360  func (c *Cluster) Init(req types.InitRequest) (string, error) {
   361  	c.Lock()
   362  	if node := c.node; node != nil {
   363  		if !req.ForceNewCluster {
   364  			c.Unlock()
   365  			return "", ErrSwarmExists
   366  		}
   367  		if err := c.stopNode(); err != nil {
   368  			c.Unlock()
   369  			return "", err
   370  		}
   371  	}
   372  
   373  	if err := validateAndSanitizeInitRequest(&req); err != nil {
   374  		c.Unlock()
   375  		return "", err
   376  	}
   377  
   378  	listenHost, listenPort, err := resolveListenAddr(req.ListenAddr)
   379  	if err != nil {
   380  		c.Unlock()
   381  		return "", err
   382  	}
   383  
   384  	advertiseHost, advertisePort, err := c.resolveAdvertiseAddr(req.AdvertiseAddr, listenPort)
   385  	if err != nil {
   386  		c.Unlock()
   387  		return "", err
   388  	}
   389  
   390  	localAddr := listenHost
   391  
   392  	// If the advertise address is not one of the system's
   393  	// addresses, we also require a listen address.
   394  	listenAddrIP := net.ParseIP(listenHost)
   395  	if listenAddrIP != nil && listenAddrIP.IsUnspecified() {
   396  		advertiseIP := net.ParseIP(advertiseHost)
   397  		if advertiseIP == nil {
   398  			// not an IP
   399  			c.Unlock()
   400  			return "", errMustSpecifyListenAddr
   401  		}
   402  
   403  		systemIPs := listSystemIPs()
   404  
   405  		found := false
   406  		for _, systemIP := range systemIPs {
   407  			if systemIP.Equal(advertiseIP) {
   408  				found = true
   409  				break
   410  			}
   411  		}
   412  		if !found {
   413  			c.Unlock()
   414  			return "", errMustSpecifyListenAddr
   415  		}
   416  		localAddr = advertiseIP.String()
   417  	}
   418  
   419  	// todo: check current state existing
   420  	n, err := c.startNewNode(req.ForceNewCluster, localAddr, "", net.JoinHostPort(listenHost, listenPort), net.JoinHostPort(advertiseHost, advertisePort), "", "")
   421  	if err != nil {
   422  		c.Unlock()
   423  		return "", err
   424  	}
   425  	c.Unlock()
   426  
   427  	select {
   428  	case <-n.Ready():
   429  		if err := initClusterSpec(n, req.Spec); err != nil {
   430  			return "", err
   431  		}
   432  		go c.reconnectOnFailure(n)
   433  		return n.NodeID(), nil
   434  	case <-n.done:
   435  		c.RLock()
   436  		defer c.RUnlock()
   437  		if !req.ForceNewCluster { // if failure on first attempt don't keep state
   438  			if err := c.clearState(); err != nil {
   439  				return "", err
   440  			}
   441  		}
   442  		return "", c.err
   443  	}
   444  }
   445  
   446  // Join makes current Cluster part of an existing swarm cluster.
   447  func (c *Cluster) Join(req types.JoinRequest) error {
   448  	c.Lock()
   449  	if node := c.node; node != nil {
   450  		c.Unlock()
   451  		return ErrSwarmExists
   452  	}
   453  	if err := validateAndSanitizeJoinRequest(&req); err != nil {
   454  		c.Unlock()
   455  		return err
   456  	}
   457  
   458  	listenHost, listenPort, err := resolveListenAddr(req.ListenAddr)
   459  	if err != nil {
   460  		c.Unlock()
   461  		return err
   462  	}
   463  
   464  	var advertiseAddr string
   465  	if req.AdvertiseAddr != "" {
   466  		advertiseHost, advertisePort, err := c.resolveAdvertiseAddr(req.AdvertiseAddr, listenPort)
   467  		// For joining, we don't need to provide an advertise address,
   468  		// since the remote side can detect it.
   469  		if err == nil {
   470  			advertiseAddr = net.JoinHostPort(advertiseHost, advertisePort)
   471  		}
   472  	}
   473  
   474  	// todo: check current state existing
   475  	n, err := c.startNewNode(false, "", req.RemoteAddrs[0], net.JoinHostPort(listenHost, listenPort), advertiseAddr, req.RemoteAddrs[0], req.JoinToken)
   476  	if err != nil {
   477  		c.Unlock()
   478  		return err
   479  	}
   480  	c.Unlock()
   481  
   482  	select {
   483  	case <-time.After(swarmConnectTimeout):
   484  		// attempt to connect will continue in background, also reconnecting
   485  		go c.reconnectOnFailure(n)
   486  		return ErrSwarmJoinTimeoutReached
   487  	case <-n.Ready():
   488  		go c.reconnectOnFailure(n)
   489  		return nil
   490  	case <-n.done:
   491  		c.RLock()
   492  		defer c.RUnlock()
   493  		return c.err
   494  	}
   495  }
   496  
   497  // stopNode is a helper that stops the active c.node and waits until it has
   498  // shut down. Call while keeping the cluster lock.
   499  func (c *Cluster) stopNode() error {
   500  	if c.node == nil {
   501  		return nil
   502  	}
   503  	c.stop = true
   504  	if c.cancelDelay != nil {
   505  		c.cancelDelay()
   506  		c.cancelDelay = nil
   507  	}
   508  	node := c.node
   509  	ctx, cancel := context.WithTimeout(context.Background(), 15*time.Second)
   510  	defer cancel()
   511  	// TODO: can't hold lock on stop because it calls back to network
   512  	c.Unlock()
   513  	defer c.Lock()
   514  	if err := node.Stop(ctx); err != nil && !strings.Contains(err.Error(), "context canceled") {
   515  		return err
   516  	}
   517  	<-node.done
   518  	return nil
   519  }
   520  
   521  func removingManagerCausesLossOfQuorum(reachable, unreachable int) bool {
   522  	return reachable-2 <= unreachable
   523  }
   524  
   525  func isLastManager(reachable, unreachable int) bool {
   526  	return reachable == 1 && unreachable == 0
   527  }
   528  
   529  // Leave shuts down Cluster and removes current state.
   530  func (c *Cluster) Leave(force bool) error {
   531  	c.Lock()
   532  	node := c.node
   533  	if node == nil {
   534  		c.Unlock()
   535  		return ErrNoSwarm
   536  	}
   537  
   538  	if node.Manager() != nil && !force {
   539  		msg := "You are attempting to leave the swarm on a node that is participating as a manager. "
   540  		if c.isActiveManager() {
   541  			active, reachable, unreachable, err := c.managerStats()
   542  			if err == nil {
   543  				if active && removingManagerCausesLossOfQuorum(reachable, unreachable) {
   544  					if isLastManager(reachable, unreachable) {
   545  						msg += "Removing the last manager erases all current state of the swarm. Use `--force` to ignore this message. "
   546  						c.Unlock()
   547  						return fmt.Errorf(msg)
   548  					}
   549  					msg += fmt.Sprintf("Removing this node leaves %v managers out of %v. Without a Raft quorum your swarm will be inaccessible. ", reachable-1, reachable+unreachable)
   550  				}
   551  			}
   552  		} else {
   553  			msg += "Doing so may lose the consensus of your cluster. "
   554  		}
   555  
   556  		msg += "The only way to restore a swarm that has lost consensus is to reinitialize it with `--force-new-cluster`. Use `--force` to suppress this message."
   557  		c.Unlock()
   558  		return fmt.Errorf(msg)
   559  	}
   560  	if err := c.stopNode(); err != nil {
   561  		logrus.Errorf("failed to shut down cluster node: %v", err)
   562  		signal.DumpStacks("")
   563  		c.Unlock()
   564  		return err
   565  	}
   566  	c.Unlock()
   567  	if nodeID := node.NodeID(); nodeID != "" {
   568  		nodeContainers, err := c.listContainerForNode(nodeID)
   569  		if err != nil {
   570  			return err
   571  		}
   572  		for _, id := range nodeContainers {
   573  			if err := c.config.Backend.ContainerRm(id, &apitypes.ContainerRmConfig{ForceRemove: true}); err != nil {
   574  				logrus.Errorf("error removing %v: %v", id, err)
   575  			}
   576  		}
   577  	}
   578  	c.configEvent <- struct{}{}
   579  	// todo: cleanup optional?
   580  	if err := c.clearState(); err != nil {
   581  		return err
   582  	}
   583  	return nil
   584  }
   585  
   586  func (c *Cluster) listContainerForNode(nodeID string) ([]string, error) {
   587  	var ids []string
   588  	filters := filters.NewArgs()
   589  	filters.Add("label", fmt.Sprintf("com.docker.swarm.node.id=%s", nodeID))
   590  	containers, err := c.config.Backend.Containers(&apitypes.ContainerListOptions{
   591  		Filter: filters,
   592  	})
   593  	if err != nil {
   594  		return []string{}, err
   595  	}
   596  	for _, c := range containers {
   597  		ids = append(ids, c.ID)
   598  	}
   599  	return ids, nil
   600  }
   601  
   602  func (c *Cluster) clearState() error {
   603  	// todo: backup this data instead of removing?
   604  	if err := os.RemoveAll(c.root); err != nil {
   605  		return err
   606  	}
   607  	if err := os.MkdirAll(c.root, 0700); err != nil {
   608  		return err
   609  	}
   610  	c.config.Backend.SetClusterProvider(nil)
   611  	return nil
   612  }
   613  
   614  func (c *Cluster) getRequestContext() (context.Context, func()) { // TODO: not needed when requests don't block on qourum lost
   615  	return context.WithTimeout(context.Background(), swarmRequestTimeout)
   616  }
   617  
   618  // Inspect retrieves the configuration properties of a managed swarm cluster.
   619  func (c *Cluster) Inspect() (types.Swarm, error) {
   620  	c.RLock()
   621  	defer c.RUnlock()
   622  
   623  	if !c.isActiveManager() {
   624  		return types.Swarm{}, c.errNoManager()
   625  	}
   626  
   627  	ctx, cancel := c.getRequestContext()
   628  	defer cancel()
   629  
   630  	swarm, err := getSwarm(ctx, c.client)
   631  	if err != nil {
   632  		return types.Swarm{}, err
   633  	}
   634  
   635  	if err != nil {
   636  		return types.Swarm{}, err
   637  	}
   638  
   639  	return convert.SwarmFromGRPC(*swarm), nil
   640  }
   641  
   642  // Update updates configuration of a managed swarm cluster.
   643  func (c *Cluster) Update(version uint64, spec types.Spec, flags types.UpdateFlags) error {
   644  	c.RLock()
   645  	defer c.RUnlock()
   646  
   647  	if !c.isActiveManager() {
   648  		return c.errNoManager()
   649  	}
   650  
   651  	ctx, cancel := c.getRequestContext()
   652  	defer cancel()
   653  
   654  	swarm, err := getSwarm(ctx, c.client)
   655  	if err != nil {
   656  		return err
   657  	}
   658  
   659  	// In update, client should provide the complete spec of the swarm, including
   660  	// Name and Labels. If a field is specified with 0 or nil, then the default value
   661  	// will be used to swarmkit.
   662  	clusterSpec, err := convert.SwarmSpecToGRPC(spec)
   663  	if err != nil {
   664  		return err
   665  	}
   666  
   667  	_, err = c.client.UpdateCluster(
   668  		ctx,
   669  		&swarmapi.UpdateClusterRequest{
   670  			ClusterID: swarm.ID,
   671  			Spec:      &clusterSpec,
   672  			ClusterVersion: &swarmapi.Version{
   673  				Index: version,
   674  			},
   675  			Rotation: swarmapi.JoinTokenRotation{
   676  				RotateWorkerToken:  flags.RotateWorkerToken,
   677  				RotateManagerToken: flags.RotateManagerToken,
   678  			},
   679  		},
   680  	)
   681  	return err
   682  }
   683  
   684  // IsManager returns true if Cluster is participating as a manager.
   685  func (c *Cluster) IsManager() bool {
   686  	c.RLock()
   687  	defer c.RUnlock()
   688  	return c.isActiveManager()
   689  }
   690  
   691  // IsAgent returns true if Cluster is participating as a worker/agent.
   692  func (c *Cluster) IsAgent() bool {
   693  	c.RLock()
   694  	defer c.RUnlock()
   695  	return c.node != nil && c.ready
   696  }
   697  
   698  // GetLocalAddress returns the local address.
   699  func (c *Cluster) GetLocalAddress() string {
   700  	c.RLock()
   701  	defer c.RUnlock()
   702  	return c.actualLocalAddr
   703  }
   704  
   705  // GetListenAddress returns the listen address.
   706  func (c *Cluster) GetListenAddress() string {
   707  	c.RLock()
   708  	defer c.RUnlock()
   709  	return c.listenAddr
   710  }
   711  
   712  // GetAdvertiseAddress returns the remotely reachable address of this node.
   713  func (c *Cluster) GetAdvertiseAddress() string {
   714  	c.RLock()
   715  	defer c.RUnlock()
   716  	if c.advertiseAddr != "" {
   717  		advertiseHost, _, _ := net.SplitHostPort(c.advertiseAddr)
   718  		return advertiseHost
   719  	}
   720  	return c.actualLocalAddr
   721  }
   722  
   723  // GetRemoteAddress returns a known advertise address of a remote manager if
   724  // available.
   725  // todo: change to array/connect with info
   726  func (c *Cluster) GetRemoteAddress() string {
   727  	c.RLock()
   728  	defer c.RUnlock()
   729  	return c.getRemoteAddress()
   730  }
   731  
   732  func (c *Cluster) getRemoteAddress() string {
   733  	if c.node == nil {
   734  		return ""
   735  	}
   736  	nodeID := c.node.NodeID()
   737  	for _, r := range c.node.Remotes() {
   738  		if r.NodeID != nodeID {
   739  			return r.Addr
   740  		}
   741  	}
   742  	return ""
   743  }
   744  
   745  // ListenClusterEvents returns a channel that receives messages on cluster
   746  // participation changes.
   747  // todo: make cancelable and accessible to multiple callers
   748  func (c *Cluster) ListenClusterEvents() <-chan struct{} {
   749  	return c.configEvent
   750  }
   751  
   752  // Info returns information about the current cluster state.
   753  func (c *Cluster) Info() types.Info {
   754  	info := types.Info{
   755  		NodeAddr: c.GetAdvertiseAddress(),
   756  	}
   757  
   758  	c.RLock()
   759  	defer c.RUnlock()
   760  
   761  	if c.node == nil {
   762  		info.LocalNodeState = types.LocalNodeStateInactive
   763  		if c.cancelDelay != nil {
   764  			info.LocalNodeState = types.LocalNodeStateError
   765  		}
   766  	} else {
   767  		info.LocalNodeState = types.LocalNodeStatePending
   768  		if c.ready == true {
   769  			info.LocalNodeState = types.LocalNodeStateActive
   770  		}
   771  	}
   772  	if c.err != nil {
   773  		info.Error = c.err.Error()
   774  	}
   775  
   776  	ctx, cancel := c.getRequestContext()
   777  	defer cancel()
   778  
   779  	if c.isActiveManager() {
   780  		info.ControlAvailable = true
   781  		swarm, err := c.Inspect()
   782  		if err != nil {
   783  			info.Error = err.Error()
   784  		}
   785  
   786  		// Strip JoinTokens
   787  		info.Cluster = swarm.ClusterInfo
   788  
   789  		if r, err := c.client.ListNodes(ctx, &swarmapi.ListNodesRequest{}); err == nil {
   790  			info.Nodes = len(r.Nodes)
   791  			for _, n := range r.Nodes {
   792  				if n.ManagerStatus != nil {
   793  					info.Managers = info.Managers + 1
   794  				}
   795  			}
   796  		}
   797  	}
   798  
   799  	if c.node != nil {
   800  		for _, r := range c.node.Remotes() {
   801  			info.RemoteManagers = append(info.RemoteManagers, types.Peer{NodeID: r.NodeID, Addr: r.Addr})
   802  		}
   803  		info.NodeID = c.node.NodeID()
   804  	}
   805  
   806  	return info
   807  }
   808  
   809  // isActiveManager should not be called without a read lock
   810  func (c *Cluster) isActiveManager() bool {
   811  	return c.node != nil && c.conn != nil
   812  }
   813  
   814  // errNoManager returns error describing why manager commands can't be used.
   815  // Call with read lock.
   816  func (c *Cluster) errNoManager() error {
   817  	if c.node == nil {
   818  		return fmt.Errorf("This node is not a swarm manager. Use \"docker swarm init\" or \"docker swarm join\" to connect this node to swarm and try again.")
   819  	}
   820  	if c.node.Manager() != nil {
   821  		return fmt.Errorf("This node is not a swarm manager. Manager is being prepared or has trouble connecting to the cluster.")
   822  	}
   823  	return fmt.Errorf("This node is not a swarm manager. Worker nodes can't be used to view or modify cluster state. Please run this command on a manager node or promote the current node to a manager.")
   824  }
   825  
   826  // GetServices returns all services of a managed swarm cluster.
   827  func (c *Cluster) GetServices(options apitypes.ServiceListOptions) ([]types.Service, error) {
   828  	c.RLock()
   829  	defer c.RUnlock()
   830  
   831  	if !c.isActiveManager() {
   832  		return nil, c.errNoManager()
   833  	}
   834  
   835  	filters, err := newListServicesFilters(options.Filter)
   836  	if err != nil {
   837  		return nil, err
   838  	}
   839  	ctx, cancel := c.getRequestContext()
   840  	defer cancel()
   841  
   842  	r, err := c.client.ListServices(
   843  		ctx,
   844  		&swarmapi.ListServicesRequest{Filters: filters})
   845  	if err != nil {
   846  		return nil, err
   847  	}
   848  
   849  	services := []types.Service{}
   850  
   851  	for _, service := range r.Services {
   852  		services = append(services, convert.ServiceFromGRPC(*service))
   853  	}
   854  
   855  	return services, nil
   856  }
   857  
   858  // CreateService creates a new service in a managed swarm cluster.
   859  func (c *Cluster) CreateService(s types.ServiceSpec, encodedAuth string) (string, error) {
   860  	c.RLock()
   861  	defer c.RUnlock()
   862  
   863  	if !c.isActiveManager() {
   864  		return "", c.errNoManager()
   865  	}
   866  
   867  	ctx, cancel := c.getRequestContext()
   868  	defer cancel()
   869  
   870  	err := c.populateNetworkID(ctx, c.client, &s)
   871  	if err != nil {
   872  		return "", err
   873  	}
   874  
   875  	serviceSpec, err := convert.ServiceSpecToGRPC(s)
   876  	if err != nil {
   877  		return "", err
   878  	}
   879  
   880  	if encodedAuth != "" {
   881  		ctnr := serviceSpec.Task.GetContainer()
   882  		if ctnr == nil {
   883  			return "", fmt.Errorf("service does not use container tasks")
   884  		}
   885  		ctnr.PullOptions = &swarmapi.ContainerSpec_PullOptions{RegistryAuth: encodedAuth}
   886  	}
   887  
   888  	r, err := c.client.CreateService(ctx, &swarmapi.CreateServiceRequest{Spec: &serviceSpec})
   889  	if err != nil {
   890  		return "", err
   891  	}
   892  
   893  	return r.Service.ID, nil
   894  }
   895  
   896  // GetService returns a service based on an ID or name.
   897  func (c *Cluster) GetService(input string) (types.Service, error) {
   898  	c.RLock()
   899  	defer c.RUnlock()
   900  
   901  	if !c.isActiveManager() {
   902  		return types.Service{}, c.errNoManager()
   903  	}
   904  
   905  	ctx, cancel := c.getRequestContext()
   906  	defer cancel()
   907  
   908  	service, err := getService(ctx, c.client, input)
   909  	if err != nil {
   910  		return types.Service{}, err
   911  	}
   912  	return convert.ServiceFromGRPC(*service), nil
   913  }
   914  
   915  // UpdateService updates existing service to match new properties.
   916  func (c *Cluster) UpdateService(serviceIDOrName string, version uint64, spec types.ServiceSpec, encodedAuth string) error {
   917  	c.RLock()
   918  	defer c.RUnlock()
   919  
   920  	if !c.isActiveManager() {
   921  		return c.errNoManager()
   922  	}
   923  
   924  	ctx, cancel := c.getRequestContext()
   925  	defer cancel()
   926  
   927  	err := c.populateNetworkID(ctx, c.client, &spec)
   928  	if err != nil {
   929  		return err
   930  	}
   931  
   932  	serviceSpec, err := convert.ServiceSpecToGRPC(spec)
   933  	if err != nil {
   934  		return err
   935  	}
   936  
   937  	currentService, err := getService(ctx, c.client, serviceIDOrName)
   938  	if err != nil {
   939  		return err
   940  	}
   941  
   942  	if encodedAuth != "" {
   943  		ctnr := serviceSpec.Task.GetContainer()
   944  		if ctnr == nil {
   945  			return fmt.Errorf("service does not use container tasks")
   946  		}
   947  		ctnr.PullOptions = &swarmapi.ContainerSpec_PullOptions{RegistryAuth: encodedAuth}
   948  	} else {
   949  		// this is needed because if the encodedAuth isn't being updated then we
   950  		// shouldn't lose it, and continue to use the one that was already present
   951  		ctnr := currentService.Spec.Task.GetContainer()
   952  		if ctnr == nil {
   953  			return fmt.Errorf("service does not use container tasks")
   954  		}
   955  		serviceSpec.Task.GetContainer().PullOptions = ctnr.PullOptions
   956  	}
   957  
   958  	_, err = c.client.UpdateService(
   959  		ctx,
   960  		&swarmapi.UpdateServiceRequest{
   961  			ServiceID: currentService.ID,
   962  			Spec:      &serviceSpec,
   963  			ServiceVersion: &swarmapi.Version{
   964  				Index: version,
   965  			},
   966  		},
   967  	)
   968  	return err
   969  }
   970  
   971  // RemoveService removes a service from a managed swarm cluster.
   972  func (c *Cluster) RemoveService(input string) error {
   973  	c.RLock()
   974  	defer c.RUnlock()
   975  
   976  	if !c.isActiveManager() {
   977  		return c.errNoManager()
   978  	}
   979  
   980  	ctx, cancel := c.getRequestContext()
   981  	defer cancel()
   982  
   983  	service, err := getService(ctx, c.client, input)
   984  	if err != nil {
   985  		return err
   986  	}
   987  
   988  	if _, err := c.client.RemoveService(ctx, &swarmapi.RemoveServiceRequest{ServiceID: service.ID}); err != nil {
   989  		return err
   990  	}
   991  	return nil
   992  }
   993  
   994  // GetNodes returns a list of all nodes known to a cluster.
   995  func (c *Cluster) GetNodes(options apitypes.NodeListOptions) ([]types.Node, error) {
   996  	c.RLock()
   997  	defer c.RUnlock()
   998  
   999  	if !c.isActiveManager() {
  1000  		return nil, c.errNoManager()
  1001  	}
  1002  
  1003  	filters, err := newListNodesFilters(options.Filter)
  1004  	if err != nil {
  1005  		return nil, err
  1006  	}
  1007  
  1008  	ctx, cancel := c.getRequestContext()
  1009  	defer cancel()
  1010  
  1011  	r, err := c.client.ListNodes(
  1012  		ctx,
  1013  		&swarmapi.ListNodesRequest{Filters: filters})
  1014  	if err != nil {
  1015  		return nil, err
  1016  	}
  1017  
  1018  	nodes := []types.Node{}
  1019  
  1020  	for _, node := range r.Nodes {
  1021  		nodes = append(nodes, convert.NodeFromGRPC(*node))
  1022  	}
  1023  	return nodes, nil
  1024  }
  1025  
  1026  // GetNode returns a node based on an ID or name.
  1027  func (c *Cluster) GetNode(input string) (types.Node, error) {
  1028  	c.RLock()
  1029  	defer c.RUnlock()
  1030  
  1031  	if !c.isActiveManager() {
  1032  		return types.Node{}, c.errNoManager()
  1033  	}
  1034  
  1035  	ctx, cancel := c.getRequestContext()
  1036  	defer cancel()
  1037  
  1038  	node, err := getNode(ctx, c.client, input)
  1039  	if err != nil {
  1040  		return types.Node{}, err
  1041  	}
  1042  	return convert.NodeFromGRPC(*node), nil
  1043  }
  1044  
  1045  // UpdateNode updates existing nodes properties.
  1046  func (c *Cluster) UpdateNode(nodeID string, version uint64, spec types.NodeSpec) error {
  1047  	c.RLock()
  1048  	defer c.RUnlock()
  1049  
  1050  	if !c.isActiveManager() {
  1051  		return c.errNoManager()
  1052  	}
  1053  
  1054  	nodeSpec, err := convert.NodeSpecToGRPC(spec)
  1055  	if err != nil {
  1056  		return err
  1057  	}
  1058  
  1059  	ctx, cancel := c.getRequestContext()
  1060  	defer cancel()
  1061  
  1062  	_, err = c.client.UpdateNode(
  1063  		ctx,
  1064  		&swarmapi.UpdateNodeRequest{
  1065  			NodeID: nodeID,
  1066  			Spec:   &nodeSpec,
  1067  			NodeVersion: &swarmapi.Version{
  1068  				Index: version,
  1069  			},
  1070  		},
  1071  	)
  1072  	return err
  1073  }
  1074  
  1075  // RemoveNode removes a node from a cluster
  1076  func (c *Cluster) RemoveNode(input string, force bool) error {
  1077  	c.RLock()
  1078  	defer c.RUnlock()
  1079  
  1080  	if !c.isActiveManager() {
  1081  		return c.errNoManager()
  1082  	}
  1083  
  1084  	ctx, cancel := c.getRequestContext()
  1085  	defer cancel()
  1086  
  1087  	node, err := getNode(ctx, c.client, input)
  1088  	if err != nil {
  1089  		return err
  1090  	}
  1091  
  1092  	if _, err := c.client.RemoveNode(ctx, &swarmapi.RemoveNodeRequest{NodeID: node.ID, Force: force}); err != nil {
  1093  		return err
  1094  	}
  1095  	return nil
  1096  }
  1097  
  1098  // GetTasks returns a list of tasks matching the filter options.
  1099  func (c *Cluster) GetTasks(options apitypes.TaskListOptions) ([]types.Task, error) {
  1100  	c.RLock()
  1101  	defer c.RUnlock()
  1102  
  1103  	if !c.isActiveManager() {
  1104  		return nil, c.errNoManager()
  1105  	}
  1106  
  1107  	byName := func(filter filters.Args) error {
  1108  		if filter.Include("service") {
  1109  			serviceFilters := filter.Get("service")
  1110  			for _, serviceFilter := range serviceFilters {
  1111  				service, err := c.GetService(serviceFilter)
  1112  				if err != nil {
  1113  					return err
  1114  				}
  1115  				filter.Del("service", serviceFilter)
  1116  				filter.Add("service", service.ID)
  1117  			}
  1118  		}
  1119  		if filter.Include("node") {
  1120  			nodeFilters := filter.Get("node")
  1121  			for _, nodeFilter := range nodeFilters {
  1122  				node, err := c.GetNode(nodeFilter)
  1123  				if err != nil {
  1124  					return err
  1125  				}
  1126  				filter.Del("node", nodeFilter)
  1127  				filter.Add("node", node.ID)
  1128  			}
  1129  		}
  1130  		return nil
  1131  	}
  1132  
  1133  	filters, err := newListTasksFilters(options.Filter, byName)
  1134  	if err != nil {
  1135  		return nil, err
  1136  	}
  1137  
  1138  	ctx, cancel := c.getRequestContext()
  1139  	defer cancel()
  1140  
  1141  	r, err := c.client.ListTasks(
  1142  		ctx,
  1143  		&swarmapi.ListTasksRequest{Filters: filters})
  1144  	if err != nil {
  1145  		return nil, err
  1146  	}
  1147  
  1148  	tasks := []types.Task{}
  1149  
  1150  	for _, task := range r.Tasks {
  1151  		if task.Spec.GetContainer() != nil {
  1152  			tasks = append(tasks, convert.TaskFromGRPC(*task))
  1153  		}
  1154  	}
  1155  	return tasks, nil
  1156  }
  1157  
  1158  // GetTask returns a task by an ID.
  1159  func (c *Cluster) GetTask(input string) (types.Task, error) {
  1160  	c.RLock()
  1161  	defer c.RUnlock()
  1162  
  1163  	if !c.isActiveManager() {
  1164  		return types.Task{}, c.errNoManager()
  1165  	}
  1166  
  1167  	ctx, cancel := c.getRequestContext()
  1168  	defer cancel()
  1169  
  1170  	task, err := getTask(ctx, c.client, input)
  1171  	if err != nil {
  1172  		return types.Task{}, err
  1173  	}
  1174  	return convert.TaskFromGRPC(*task), nil
  1175  }
  1176  
  1177  // GetNetwork returns a cluster network by an ID.
  1178  func (c *Cluster) GetNetwork(input string) (apitypes.NetworkResource, error) {
  1179  	c.RLock()
  1180  	defer c.RUnlock()
  1181  
  1182  	if !c.isActiveManager() {
  1183  		return apitypes.NetworkResource{}, c.errNoManager()
  1184  	}
  1185  
  1186  	ctx, cancel := c.getRequestContext()
  1187  	defer cancel()
  1188  
  1189  	network, err := getNetwork(ctx, c.client, input)
  1190  	if err != nil {
  1191  		return apitypes.NetworkResource{}, err
  1192  	}
  1193  	return convert.BasicNetworkFromGRPC(*network), nil
  1194  }
  1195  
  1196  // GetNetworks returns all current cluster managed networks.
  1197  func (c *Cluster) GetNetworks() ([]apitypes.NetworkResource, error) {
  1198  	c.RLock()
  1199  	defer c.RUnlock()
  1200  
  1201  	if !c.isActiveManager() {
  1202  		return nil, c.errNoManager()
  1203  	}
  1204  
  1205  	ctx, cancel := c.getRequestContext()
  1206  	defer cancel()
  1207  
  1208  	r, err := c.client.ListNetworks(ctx, &swarmapi.ListNetworksRequest{})
  1209  	if err != nil {
  1210  		return nil, err
  1211  	}
  1212  
  1213  	var networks []apitypes.NetworkResource
  1214  
  1215  	for _, network := range r.Networks {
  1216  		networks = append(networks, convert.BasicNetworkFromGRPC(*network))
  1217  	}
  1218  
  1219  	return networks, nil
  1220  }
  1221  
  1222  func attacherKey(target, containerID string) string {
  1223  	return containerID + ":" + target
  1224  }
  1225  
  1226  // UpdateAttachment signals the attachment config to the attachment
  1227  // waiter who is trying to start or attach the container to the
  1228  // network.
  1229  func (c *Cluster) UpdateAttachment(target, containerID string, config *network.NetworkingConfig) error {
  1230  	c.RLock()
  1231  	attacher, ok := c.attachers[attacherKey(target, containerID)]
  1232  	c.RUnlock()
  1233  	if !ok || attacher == nil {
  1234  		return fmt.Errorf("could not find attacher for container %s to network %s", containerID, target)
  1235  	}
  1236  
  1237  	attacher.attachWaitCh <- config
  1238  	close(attacher.attachWaitCh)
  1239  	return nil
  1240  }
  1241  
  1242  // WaitForDetachment waits for the container to stop or detach from
  1243  // the network.
  1244  func (c *Cluster) WaitForDetachment(ctx context.Context, networkName, networkID, taskID, containerID string) error {
  1245  	c.RLock()
  1246  	attacher, ok := c.attachers[attacherKey(networkName, containerID)]
  1247  	if !ok {
  1248  		attacher, ok = c.attachers[attacherKey(networkID, containerID)]
  1249  	}
  1250  	if c.node == nil || c.node.Agent() == nil {
  1251  		c.RUnlock()
  1252  		return fmt.Errorf("invalid cluster node while waiting for detachment")
  1253  	}
  1254  
  1255  	agent := c.node.Agent()
  1256  	c.RUnlock()
  1257  
  1258  	if ok && attacher != nil &&
  1259  		attacher.detachWaitCh != nil &&
  1260  		attacher.attachCompleteCh != nil {
  1261  		// Attachment may be in progress still so wait for
  1262  		// attachment to complete.
  1263  		select {
  1264  		case <-attacher.attachCompleteCh:
  1265  		case <-ctx.Done():
  1266  			return ctx.Err()
  1267  		}
  1268  
  1269  		if attacher.taskID == taskID {
  1270  			select {
  1271  			case <-attacher.detachWaitCh:
  1272  			case <-ctx.Done():
  1273  				return ctx.Err()
  1274  			}
  1275  		}
  1276  	}
  1277  
  1278  	return agent.ResourceAllocator().DetachNetwork(ctx, taskID)
  1279  }
  1280  
  1281  // AttachNetwork generates an attachment request towards the manager.
  1282  func (c *Cluster) AttachNetwork(target string, containerID string, addresses []string) (*network.NetworkingConfig, error) {
  1283  	aKey := attacherKey(target, containerID)
  1284  	c.Lock()
  1285  	if c.node == nil || c.node.Agent() == nil {
  1286  		c.Unlock()
  1287  		return nil, fmt.Errorf("invalid cluster node while attaching to network")
  1288  	}
  1289  	if attacher, ok := c.attachers[aKey]; ok {
  1290  		c.Unlock()
  1291  		return attacher.config, nil
  1292  	}
  1293  
  1294  	agent := c.node.Agent()
  1295  	attachWaitCh := make(chan *network.NetworkingConfig)
  1296  	detachWaitCh := make(chan struct{})
  1297  	attachCompleteCh := make(chan struct{})
  1298  	c.attachers[aKey] = &attacher{
  1299  		attachWaitCh:     attachWaitCh,
  1300  		attachCompleteCh: attachCompleteCh,
  1301  		detachWaitCh:     detachWaitCh,
  1302  	}
  1303  	c.Unlock()
  1304  
  1305  	ctx, cancel := c.getRequestContext()
  1306  	defer cancel()
  1307  
  1308  	taskID, err := agent.ResourceAllocator().AttachNetwork(ctx, containerID, target, addresses)
  1309  	if err != nil {
  1310  		c.Lock()
  1311  		delete(c.attachers, aKey)
  1312  		c.Unlock()
  1313  		return nil, fmt.Errorf("Could not attach to network %s: %v", target, err)
  1314  	}
  1315  
  1316  	c.Lock()
  1317  	c.attachers[aKey].taskID = taskID
  1318  	close(attachCompleteCh)
  1319  	c.Unlock()
  1320  
  1321  	logrus.Debugf("Successfully attached to network %s with tid %s", target, taskID)
  1322  
  1323  	var config *network.NetworkingConfig
  1324  	select {
  1325  	case config = <-attachWaitCh:
  1326  	case <-ctx.Done():
  1327  		return nil, fmt.Errorf("attaching to network failed, make sure your network options are correct and check manager logs: %v", ctx.Err())
  1328  	}
  1329  
  1330  	c.Lock()
  1331  	c.attachers[aKey].config = config
  1332  	c.Unlock()
  1333  	return config, nil
  1334  }
  1335  
  1336  // DetachNetwork unblocks the waiters waiting on WaitForDetachment so
  1337  // that a request to detach can be generated towards the manager.
  1338  func (c *Cluster) DetachNetwork(target string, containerID string) error {
  1339  	aKey := attacherKey(target, containerID)
  1340  
  1341  	c.Lock()
  1342  	attacher, ok := c.attachers[aKey]
  1343  	delete(c.attachers, aKey)
  1344  	c.Unlock()
  1345  
  1346  	if !ok {
  1347  		return fmt.Errorf("could not find network attachment for container %s to network %s", containerID, target)
  1348  	}
  1349  
  1350  	close(attacher.detachWaitCh)
  1351  	return nil
  1352  }
  1353  
  1354  // CreateNetwork creates a new cluster managed network.
  1355  func (c *Cluster) CreateNetwork(s apitypes.NetworkCreateRequest) (string, error) {
  1356  	c.RLock()
  1357  	defer c.RUnlock()
  1358  
  1359  	if !c.isActiveManager() {
  1360  		return "", c.errNoManager()
  1361  	}
  1362  
  1363  	if runconfig.IsPreDefinedNetwork(s.Name) {
  1364  		err := fmt.Errorf("%s is a pre-defined network and cannot be created", s.Name)
  1365  		return "", errors.NewRequestForbiddenError(err)
  1366  	}
  1367  
  1368  	ctx, cancel := c.getRequestContext()
  1369  	defer cancel()
  1370  
  1371  	networkSpec := convert.BasicNetworkCreateToGRPC(s)
  1372  	r, err := c.client.CreateNetwork(ctx, &swarmapi.CreateNetworkRequest{Spec: &networkSpec})
  1373  	if err != nil {
  1374  		return "", err
  1375  	}
  1376  
  1377  	return r.Network.ID, nil
  1378  }
  1379  
  1380  // RemoveNetwork removes a cluster network.
  1381  func (c *Cluster) RemoveNetwork(input string) error {
  1382  	c.RLock()
  1383  	defer c.RUnlock()
  1384  
  1385  	if !c.isActiveManager() {
  1386  		return c.errNoManager()
  1387  	}
  1388  
  1389  	ctx, cancel := c.getRequestContext()
  1390  	defer cancel()
  1391  
  1392  	network, err := getNetwork(ctx, c.client, input)
  1393  	if err != nil {
  1394  		return err
  1395  	}
  1396  
  1397  	if _, err := c.client.RemoveNetwork(ctx, &swarmapi.RemoveNetworkRequest{NetworkID: network.ID}); err != nil {
  1398  		return err
  1399  	}
  1400  	return nil
  1401  }
  1402  
  1403  func (c *Cluster) populateNetworkID(ctx context.Context, client swarmapi.ControlClient, s *types.ServiceSpec) error {
  1404  	// Always prefer NetworkAttachmentConfigs from TaskTemplate
  1405  	// but fallback to service spec for backward compatibility
  1406  	networks := s.TaskTemplate.Networks
  1407  	if len(networks) == 0 {
  1408  		networks = s.Networks
  1409  	}
  1410  
  1411  	for i, n := range networks {
  1412  		apiNetwork, err := getNetwork(ctx, client, n.Target)
  1413  		if err != nil {
  1414  			if ln, _ := c.config.Backend.FindNetwork(n.Target); ln != nil && !ln.Info().Dynamic() {
  1415  				err = fmt.Errorf("network %s is not eligible for docker services", ln.Name())
  1416  				return errors.NewRequestForbiddenError(err)
  1417  			}
  1418  			return err
  1419  		}
  1420  		networks[i].Target = apiNetwork.ID
  1421  	}
  1422  	return nil
  1423  }
  1424  
  1425  func getNetwork(ctx context.Context, c swarmapi.ControlClient, input string) (*swarmapi.Network, error) {
  1426  	// GetNetwork to match via full ID.
  1427  	rg, err := c.GetNetwork(ctx, &swarmapi.GetNetworkRequest{NetworkID: input})
  1428  	if err != nil {
  1429  		// If any error (including NotFound), ListNetworks to match via ID prefix and full name.
  1430  		rl, err := c.ListNetworks(ctx, &swarmapi.ListNetworksRequest{Filters: &swarmapi.ListNetworksRequest_Filters{Names: []string{input}}})
  1431  		if err != nil || len(rl.Networks) == 0 {
  1432  			rl, err = c.ListNetworks(ctx, &swarmapi.ListNetworksRequest{Filters: &swarmapi.ListNetworksRequest_Filters{IDPrefixes: []string{input}}})
  1433  		}
  1434  
  1435  		if err != nil {
  1436  			return nil, err
  1437  		}
  1438  
  1439  		if len(rl.Networks) == 0 {
  1440  			return nil, fmt.Errorf("network %s not found", input)
  1441  		}
  1442  
  1443  		if l := len(rl.Networks); l > 1 {
  1444  			return nil, fmt.Errorf("network %s is ambiguous (%d matches found)", input, l)
  1445  		}
  1446  
  1447  		return rl.Networks[0], nil
  1448  	}
  1449  	return rg.Network, nil
  1450  }
  1451  
  1452  // Cleanup stops active swarm node. This is run before daemon shutdown.
  1453  func (c *Cluster) Cleanup() {
  1454  	c.Lock()
  1455  	node := c.node
  1456  	if node == nil {
  1457  		c.Unlock()
  1458  		return
  1459  	}
  1460  	defer c.Unlock()
  1461  	if c.isActiveManager() {
  1462  		active, reachable, unreachable, err := c.managerStats()
  1463  		if err == nil {
  1464  			singlenode := active && isLastManager(reachable, unreachable)
  1465  			if active && !singlenode && removingManagerCausesLossOfQuorum(reachable, unreachable) {
  1466  				logrus.Errorf("Leaving cluster with %v managers left out of %v. Raft quorum will be lost.", reachable-1, reachable+unreachable)
  1467  			}
  1468  		}
  1469  	}
  1470  	c.stopNode()
  1471  }
  1472  
  1473  func (c *Cluster) managerStats() (current bool, reachable int, unreachable int, err error) {
  1474  	ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
  1475  	defer cancel()
  1476  	nodes, err := c.client.ListNodes(ctx, &swarmapi.ListNodesRequest{})
  1477  	if err != nil {
  1478  		return false, 0, 0, err
  1479  	}
  1480  	for _, n := range nodes.Nodes {
  1481  		if n.ManagerStatus != nil {
  1482  			if n.ManagerStatus.Reachability == swarmapi.RaftMemberStatus_REACHABLE {
  1483  				reachable++
  1484  				if n.ID == c.node.NodeID() {
  1485  					current = true
  1486  				}
  1487  			}
  1488  			if n.ManagerStatus.Reachability == swarmapi.RaftMemberStatus_UNREACHABLE {
  1489  				unreachable++
  1490  			}
  1491  		}
  1492  	}
  1493  	return
  1494  }
  1495  
  1496  func validateAndSanitizeInitRequest(req *types.InitRequest) error {
  1497  	var err error
  1498  	req.ListenAddr, err = validateAddr(req.ListenAddr)
  1499  	if err != nil {
  1500  		return fmt.Errorf("invalid ListenAddr %q: %v", req.ListenAddr, err)
  1501  	}
  1502  
  1503  	return nil
  1504  }
  1505  
  1506  func validateAndSanitizeJoinRequest(req *types.JoinRequest) error {
  1507  	var err error
  1508  	req.ListenAddr, err = validateAddr(req.ListenAddr)
  1509  	if err != nil {
  1510  		return fmt.Errorf("invalid ListenAddr %q: %v", req.ListenAddr, err)
  1511  	}
  1512  	if len(req.RemoteAddrs) == 0 {
  1513  		return fmt.Errorf("at least 1 RemoteAddr is required to join")
  1514  	}
  1515  	for i := range req.RemoteAddrs {
  1516  		req.RemoteAddrs[i], err = validateAddr(req.RemoteAddrs[i])
  1517  		if err != nil {
  1518  			return fmt.Errorf("invalid remoteAddr %q: %v", req.RemoteAddrs[i], err)
  1519  		}
  1520  	}
  1521  	return nil
  1522  }
  1523  
  1524  func validateAddr(addr string) (string, error) {
  1525  	if addr == "" {
  1526  		return addr, fmt.Errorf("invalid empty address")
  1527  	}
  1528  	newaddr, err := opts.ParseTCPAddr(addr, defaultAddr)
  1529  	if err != nil {
  1530  		return addr, nil
  1531  	}
  1532  	return strings.TrimPrefix(newaddr, "tcp://"), nil
  1533  }
  1534  
  1535  func initClusterSpec(node *node, spec types.Spec) error {
  1536  	ctx, _ := context.WithTimeout(context.Background(), 5*time.Second)
  1537  	for conn := range node.ListenControlSocket(ctx) {
  1538  		if ctx.Err() != nil {
  1539  			return ctx.Err()
  1540  		}
  1541  		if conn != nil {
  1542  			client := swarmapi.NewControlClient(conn)
  1543  			var cluster *swarmapi.Cluster
  1544  			for i := 0; ; i++ {
  1545  				lcr, err := client.ListClusters(ctx, &swarmapi.ListClustersRequest{})
  1546  				if err != nil {
  1547  					return fmt.Errorf("error on listing clusters: %v", err)
  1548  				}
  1549  				if len(lcr.Clusters) == 0 {
  1550  					if i < 10 {
  1551  						time.Sleep(200 * time.Millisecond)
  1552  						continue
  1553  					}
  1554  					return fmt.Errorf("empty list of clusters was returned")
  1555  				}
  1556  				cluster = lcr.Clusters[0]
  1557  				break
  1558  			}
  1559  			// In init, we take the initial default values from swarmkit, and merge
  1560  			// any non nil or 0 value from spec to GRPC spec. This will leave the
  1561  			// default value alone.
  1562  			// Note that this is different from Update(), as in Update() we expect
  1563  			// user to specify the complete spec of the cluster (as they already know
  1564  			// the existing one and knows which field to update)
  1565  			clusterSpec, err := convert.MergeSwarmSpecToGRPC(spec, cluster.Spec)
  1566  			if err != nil {
  1567  				return fmt.Errorf("error updating cluster settings: %v", err)
  1568  			}
  1569  			_, err = client.UpdateCluster(ctx, &swarmapi.UpdateClusterRequest{
  1570  				ClusterID:      cluster.ID,
  1571  				ClusterVersion: &cluster.Meta.Version,
  1572  				Spec:           &clusterSpec,
  1573  			})
  1574  			if err != nil {
  1575  				return fmt.Errorf("error updating cluster settings: %v", err)
  1576  			}
  1577  			return nil
  1578  		}
  1579  	}
  1580  	return ctx.Err()
  1581  }