github.com/dpiddy/docker@v1.12.2-rc1/daemon/cluster/cluster.go (about)

     1  package cluster
     2  
     3  import (
     4  	"encoding/json"
     5  	"fmt"
     6  	"io/ioutil"
     7  	"net"
     8  	"os"
     9  	"path/filepath"
    10  	"strings"
    11  	"sync"
    12  	"time"
    13  
    14  	"google.golang.org/grpc"
    15  
    16  	"github.com/Sirupsen/logrus"
    17  	"github.com/docker/docker/daemon/cluster/convert"
    18  	executorpkg "github.com/docker/docker/daemon/cluster/executor"
    19  	"github.com/docker/docker/daemon/cluster/executor/container"
    20  	"github.com/docker/docker/errors"
    21  	"github.com/docker/docker/opts"
    22  	"github.com/docker/docker/pkg/ioutils"
    23  	"github.com/docker/docker/runconfig"
    24  	apitypes "github.com/docker/engine-api/types"
    25  	"github.com/docker/engine-api/types/filters"
    26  	types "github.com/docker/engine-api/types/swarm"
    27  	swarmagent "github.com/docker/swarmkit/agent"
    28  	swarmapi "github.com/docker/swarmkit/api"
    29  	"golang.org/x/net/context"
    30  )
    31  
    32  const swarmDirName = "swarm"
    33  const controlSocket = "control.sock"
    34  const swarmConnectTimeout = 20 * time.Second
    35  const swarmRequestTimeout = 20 * time.Second
    36  const stateFile = "docker-state.json"
    37  const defaultAddr = "0.0.0.0:2377"
    38  
    39  const (
    40  	initialReconnectDelay = 100 * time.Millisecond
    41  	maxReconnectDelay     = 30 * time.Second
    42  )
    43  
    44  // ErrNoSwarm is returned on leaving a cluster that was never initialized
    45  var ErrNoSwarm = fmt.Errorf("This node is not part of a swarm")
    46  
    47  // ErrSwarmExists is returned on initialize or join request for a cluster that has already been activated
    48  var ErrSwarmExists = fmt.Errorf("This node is already part of a swarm. Use \"docker swarm leave\" to leave this swarm and join another one.")
    49  
    50  // ErrPendingSwarmExists is returned on initialize or join request for a cluster that is already processing a similar request but has not succeeded yet.
    51  var ErrPendingSwarmExists = fmt.Errorf("This node is processing an existing join request that has not succeeded yet. Use \"docker swarm leave\" to cancel the current request.")
    52  
    53  // ErrSwarmJoinTimeoutReached is returned when cluster join could not complete before timeout was reached.
    54  var ErrSwarmJoinTimeoutReached = fmt.Errorf("Timeout was reached before node was joined. The attempt to join the swarm will continue in the background. Use the \"docker info\" command to see the current swarm status of your node.")
    55  
    56  // defaultSpec contains some sane defaults if cluster options are missing on init
    57  var defaultSpec = types.Spec{
    58  	Raft: types.RaftConfig{
    59  		SnapshotInterval:           10000,
    60  		KeepOldSnapshots:           0,
    61  		LogEntriesForSlowFollowers: 500,
    62  		HeartbeatTick:              1,
    63  		ElectionTick:               3,
    64  	},
    65  	CAConfig: types.CAConfig{
    66  		NodeCertExpiry: 90 * 24 * time.Hour,
    67  	},
    68  	Dispatcher: types.DispatcherConfig{
    69  		HeartbeatPeriod: uint64((5 * time.Second).Nanoseconds()),
    70  	},
    71  	Orchestration: types.OrchestrationConfig{
    72  		TaskHistoryRetentionLimit: 10,
    73  	},
    74  }
    75  
    76  type state struct {
    77  	// LocalAddr is this machine's local IP or hostname, if specified.
    78  	LocalAddr string
    79  	// RemoteAddr is the address that was given to "swarm join. It is used
    80  	// to find LocalAddr if necessary.
    81  	RemoteAddr string
    82  	// ListenAddr is the address we bind to, including a port.
    83  	ListenAddr string
    84  	// AdvertiseAddr is the address other nodes should connect to,
    85  	// including a port.
    86  	AdvertiseAddr string
    87  }
    88  
    89  // NetworkSubnetsProvider exposes functions for retrieving the subnets
    90  // of networks managed by Docker, so they can be filtered.
    91  type NetworkSubnetsProvider interface {
    92  	V4Subnets() []net.IPNet
    93  	V6Subnets() []net.IPNet
    94  }
    95  
    96  // Config provides values for Cluster.
    97  type Config struct {
    98  	Root                   string
    99  	Name                   string
   100  	Backend                executorpkg.Backend
   101  	NetworkSubnetsProvider NetworkSubnetsProvider
   102  
   103  	// DefaultAdvertiseAddr is the default host/IP or network interface to use
   104  	// if no AdvertiseAddr value is specified.
   105  	DefaultAdvertiseAddr string
   106  }
   107  
   108  // Cluster provides capabilities to participate in a cluster as a worker or a
   109  // manager.
   110  type Cluster struct {
   111  	sync.RWMutex
   112  	*node
   113  	root            string
   114  	config          Config
   115  	configEvent     chan struct{} // todo: make this array and goroutine safe
   116  	localAddr       string
   117  	actualLocalAddr string // after resolution, not persisted
   118  	remoteAddr      string
   119  	listenAddr      string
   120  	advertiseAddr   string
   121  	stop            bool
   122  	err             error
   123  	cancelDelay     func()
   124  }
   125  
   126  type node struct {
   127  	*swarmagent.Node
   128  	done           chan struct{}
   129  	ready          bool
   130  	conn           *grpc.ClientConn
   131  	client         swarmapi.ControlClient
   132  	reconnectDelay time.Duration
   133  }
   134  
   135  // New creates a new Cluster instance using provided config.
   136  func New(config Config) (*Cluster, error) {
   137  	root := filepath.Join(config.Root, swarmDirName)
   138  	if err := os.MkdirAll(root, 0700); err != nil {
   139  		return nil, err
   140  	}
   141  	c := &Cluster{
   142  		root:        root,
   143  		config:      config,
   144  		configEvent: make(chan struct{}, 10),
   145  	}
   146  
   147  	st, err := c.loadState()
   148  	if err != nil {
   149  		if os.IsNotExist(err) {
   150  			return c, nil
   151  		}
   152  		return nil, err
   153  	}
   154  
   155  	n, err := c.startNewNode(false, st.LocalAddr, st.RemoteAddr, st.ListenAddr, st.AdvertiseAddr, "", "")
   156  	if err != nil {
   157  		return nil, err
   158  	}
   159  
   160  	select {
   161  	case <-time.After(swarmConnectTimeout):
   162  		logrus.Errorf("swarm component could not be started before timeout was reached")
   163  	case <-n.Ready():
   164  	case <-n.done:
   165  		return nil, fmt.Errorf("swarm component could not be started: %v", c.err)
   166  	}
   167  	go c.reconnectOnFailure(n)
   168  	return c, nil
   169  }
   170  
   171  func (c *Cluster) loadState() (*state, error) {
   172  	dt, err := ioutil.ReadFile(filepath.Join(c.root, stateFile))
   173  	if err != nil {
   174  		return nil, err
   175  	}
   176  	// missing certificate means no actual state to restore from
   177  	if _, err := os.Stat(filepath.Join(c.root, "certificates/swarm-node.crt")); err != nil {
   178  		if os.IsNotExist(err) {
   179  			c.clearState()
   180  		}
   181  		return nil, err
   182  	}
   183  	var st state
   184  	if err := json.Unmarshal(dt, &st); err != nil {
   185  		return nil, err
   186  	}
   187  	return &st, nil
   188  }
   189  
   190  func (c *Cluster) saveState() error {
   191  	dt, err := json.Marshal(state{
   192  		LocalAddr:     c.localAddr,
   193  		RemoteAddr:    c.remoteAddr,
   194  		ListenAddr:    c.listenAddr,
   195  		AdvertiseAddr: c.advertiseAddr,
   196  	})
   197  	if err != nil {
   198  		return err
   199  	}
   200  	return ioutils.AtomicWriteFile(filepath.Join(c.root, stateFile), dt, 0600)
   201  }
   202  
   203  func (c *Cluster) reconnectOnFailure(n *node) {
   204  	for {
   205  		<-n.done
   206  		c.Lock()
   207  		if c.stop || c.node != nil {
   208  			c.Unlock()
   209  			return
   210  		}
   211  		n.reconnectDelay *= 2
   212  		if n.reconnectDelay > maxReconnectDelay {
   213  			n.reconnectDelay = maxReconnectDelay
   214  		}
   215  		logrus.Warnf("Restarting swarm in %.2f seconds", n.reconnectDelay.Seconds())
   216  		delayCtx, cancel := context.WithTimeout(context.Background(), n.reconnectDelay)
   217  		c.cancelDelay = cancel
   218  		c.Unlock()
   219  		<-delayCtx.Done()
   220  		if delayCtx.Err() != context.DeadlineExceeded {
   221  			return
   222  		}
   223  		c.Lock()
   224  		if c.node != nil {
   225  			c.Unlock()
   226  			return
   227  		}
   228  		var err error
   229  		n, err = c.startNewNode(false, c.localAddr, c.getRemoteAddress(), c.listenAddr, c.advertiseAddr, c.getRemoteAddress(), "")
   230  		if err != nil {
   231  			c.err = err
   232  			close(n.done)
   233  		}
   234  		c.Unlock()
   235  	}
   236  }
   237  
   238  func (c *Cluster) startNewNode(forceNewCluster bool, localAddr, remoteAddr, listenAddr, advertiseAddr, joinAddr, joinToken string) (*node, error) {
   239  	if err := c.config.Backend.IsSwarmCompatible(); err != nil {
   240  		return nil, err
   241  	}
   242  
   243  	actualLocalAddr := localAddr
   244  	if actualLocalAddr == "" {
   245  		// If localAddr was not specified, resolve it automatically
   246  		// based on the route to joinAddr. localAddr can only be left
   247  		// empty on "join".
   248  		listenHost, _, err := net.SplitHostPort(listenAddr)
   249  		if err != nil {
   250  			return nil, fmt.Errorf("could not parse listen address: %v", err)
   251  		}
   252  
   253  		listenAddrIP := net.ParseIP(listenHost)
   254  		if listenAddrIP == nil || !listenAddrIP.IsUnspecified() {
   255  			actualLocalAddr = listenHost
   256  		} else {
   257  			if remoteAddr == "" {
   258  				// Should never happen except using swarms created by
   259  				// old versions that didn't save remoteAddr.
   260  				remoteAddr = "8.8.8.8:53"
   261  			}
   262  			conn, err := net.Dial("udp", remoteAddr)
   263  			if err != nil {
   264  				return nil, fmt.Errorf("could not find local IP address: %v", err)
   265  			}
   266  			localHostPort := conn.LocalAddr().String()
   267  			actualLocalAddr, _, _ = net.SplitHostPort(localHostPort)
   268  			conn.Close()
   269  		}
   270  	}
   271  
   272  	c.node = nil
   273  	c.cancelDelay = nil
   274  	c.stop = false
   275  	n, err := swarmagent.NewNode(&swarmagent.NodeConfig{
   276  		Hostname:           c.config.Name,
   277  		ForceNewCluster:    forceNewCluster,
   278  		ListenControlAPI:   filepath.Join(c.root, controlSocket),
   279  		ListenRemoteAPI:    listenAddr,
   280  		AdvertiseRemoteAPI: advertiseAddr,
   281  		JoinAddr:           joinAddr,
   282  		StateDir:           c.root,
   283  		JoinToken:          joinToken,
   284  		Executor:           container.NewExecutor(c.config.Backend),
   285  		HeartbeatTick:      1,
   286  		ElectionTick:       3,
   287  	})
   288  	if err != nil {
   289  		return nil, err
   290  	}
   291  	ctx := context.Background()
   292  	if err := n.Start(ctx); err != nil {
   293  		return nil, err
   294  	}
   295  	node := &node{
   296  		Node:           n,
   297  		done:           make(chan struct{}),
   298  		reconnectDelay: initialReconnectDelay,
   299  	}
   300  	c.node = node
   301  	c.localAddr = localAddr
   302  	c.actualLocalAddr = actualLocalAddr // not saved
   303  	c.remoteAddr = remoteAddr
   304  	c.listenAddr = listenAddr
   305  	c.advertiseAddr = advertiseAddr
   306  	c.saveState()
   307  
   308  	c.config.Backend.SetClusterProvider(c)
   309  	go func() {
   310  		err := n.Err(ctx)
   311  		if err != nil {
   312  			logrus.Errorf("cluster exited with error: %v", err)
   313  		}
   314  		c.Lock()
   315  		c.node = nil
   316  		c.err = err
   317  		c.Unlock()
   318  		close(node.done)
   319  	}()
   320  
   321  	go func() {
   322  		select {
   323  		case <-n.Ready():
   324  			c.Lock()
   325  			node.ready = true
   326  			c.err = nil
   327  			c.Unlock()
   328  		case <-ctx.Done():
   329  		}
   330  		c.configEvent <- struct{}{}
   331  	}()
   332  
   333  	go func() {
   334  		for conn := range n.ListenControlSocket(ctx) {
   335  			c.Lock()
   336  			if node.conn != conn {
   337  				if conn == nil {
   338  					node.client = nil
   339  				} else {
   340  					node.client = swarmapi.NewControlClient(conn)
   341  				}
   342  			}
   343  			node.conn = conn
   344  			c.Unlock()
   345  			c.configEvent <- struct{}{}
   346  		}
   347  	}()
   348  
   349  	return node, nil
   350  }
   351  
   352  // Init initializes new cluster from user provided request.
   353  func (c *Cluster) Init(req types.InitRequest) (string, error) {
   354  	c.Lock()
   355  	if node := c.node; node != nil {
   356  		if !req.ForceNewCluster {
   357  			c.Unlock()
   358  			return "", ErrSwarmExists
   359  		}
   360  		if err := c.stopNode(); err != nil {
   361  			c.Unlock()
   362  			return "", err
   363  		}
   364  	}
   365  
   366  	if err := validateAndSanitizeInitRequest(&req); err != nil {
   367  		c.Unlock()
   368  		return "", err
   369  	}
   370  
   371  	listenHost, listenPort, err := resolveListenAddr(req.ListenAddr)
   372  	if err != nil {
   373  		c.Unlock()
   374  		return "", err
   375  	}
   376  
   377  	advertiseHost, advertisePort, err := c.resolveAdvertiseAddr(req.AdvertiseAddr, listenPort)
   378  	if err != nil {
   379  		c.Unlock()
   380  		return "", err
   381  	}
   382  
   383  	localAddr := listenHost
   384  
   385  	// If the advertise address is not one of the system's
   386  	// addresses, we also require a listen address.
   387  	listenAddrIP := net.ParseIP(listenHost)
   388  	if listenAddrIP != nil && listenAddrIP.IsUnspecified() {
   389  		advertiseIP := net.ParseIP(advertiseHost)
   390  		if advertiseIP == nil {
   391  			// not an IP
   392  			c.Unlock()
   393  			return "", errMustSpecifyListenAddr
   394  		}
   395  
   396  		systemIPs := listSystemIPs()
   397  
   398  		found := false
   399  		for _, systemIP := range systemIPs {
   400  			if systemIP.Equal(advertiseIP) {
   401  				found = true
   402  				break
   403  			}
   404  		}
   405  		if !found {
   406  			c.Unlock()
   407  			return "", errMustSpecifyListenAddr
   408  		}
   409  		localAddr = advertiseIP.String()
   410  	}
   411  
   412  	// todo: check current state existing
   413  	n, err := c.startNewNode(req.ForceNewCluster, localAddr, "", net.JoinHostPort(listenHost, listenPort), net.JoinHostPort(advertiseHost, advertisePort), "", "")
   414  	if err != nil {
   415  		c.Unlock()
   416  		return "", err
   417  	}
   418  	c.Unlock()
   419  
   420  	select {
   421  	case <-n.Ready():
   422  		if err := initClusterSpec(n, req.Spec); err != nil {
   423  			return "", err
   424  		}
   425  		go c.reconnectOnFailure(n)
   426  		return n.NodeID(), nil
   427  	case <-n.done:
   428  		c.RLock()
   429  		defer c.RUnlock()
   430  		if !req.ForceNewCluster { // if failure on first attempt don't keep state
   431  			if err := c.clearState(); err != nil {
   432  				return "", err
   433  			}
   434  		}
   435  		return "", c.err
   436  	}
   437  }
   438  
   439  // Join makes current Cluster part of an existing swarm cluster.
   440  func (c *Cluster) Join(req types.JoinRequest) error {
   441  	c.Lock()
   442  	if node := c.node; node != nil {
   443  		c.Unlock()
   444  		return ErrSwarmExists
   445  	}
   446  	if err := validateAndSanitizeJoinRequest(&req); err != nil {
   447  		c.Unlock()
   448  		return err
   449  	}
   450  
   451  	listenHost, listenPort, err := resolveListenAddr(req.ListenAddr)
   452  	if err != nil {
   453  		c.Unlock()
   454  		return err
   455  	}
   456  
   457  	var advertiseAddr string
   458  	if req.AdvertiseAddr != "" {
   459  		advertiseHost, advertisePort, err := c.resolveAdvertiseAddr(req.AdvertiseAddr, listenPort)
   460  		// For joining, we don't need to provide an advertise address,
   461  		// since the remote side can detect it.
   462  		if err == nil {
   463  			advertiseAddr = net.JoinHostPort(advertiseHost, advertisePort)
   464  		}
   465  	}
   466  
   467  	// todo: check current state existing
   468  	n, err := c.startNewNode(false, "", req.RemoteAddrs[0], net.JoinHostPort(listenHost, listenPort), advertiseAddr, req.RemoteAddrs[0], req.JoinToken)
   469  	if err != nil {
   470  		c.Unlock()
   471  		return err
   472  	}
   473  	c.Unlock()
   474  
   475  	select {
   476  	case <-time.After(swarmConnectTimeout):
   477  		// attempt to connect will continue in background, also reconnecting
   478  		go c.reconnectOnFailure(n)
   479  		return ErrSwarmJoinTimeoutReached
   480  	case <-n.Ready():
   481  		go c.reconnectOnFailure(n)
   482  		return nil
   483  	case <-n.done:
   484  		c.RLock()
   485  		defer c.RUnlock()
   486  		return c.err
   487  	}
   488  }
   489  
   490  // stopNode is a helper that stops the active c.node and waits until it has
   491  // shut down. Call while keeping the cluster lock.
   492  func (c *Cluster) stopNode() error {
   493  	if c.node == nil {
   494  		return nil
   495  	}
   496  	c.stop = true
   497  	if c.cancelDelay != nil {
   498  		c.cancelDelay()
   499  		c.cancelDelay = nil
   500  	}
   501  	node := c.node
   502  	ctx, cancel := context.WithTimeout(context.Background(), 15*time.Second)
   503  	defer cancel()
   504  	// TODO: can't hold lock on stop because it calls back to network
   505  	c.Unlock()
   506  	defer c.Lock()
   507  	if err := node.Stop(ctx); err != nil && !strings.Contains(err.Error(), "context canceled") {
   508  		return err
   509  	}
   510  	<-node.done
   511  	return nil
   512  }
   513  
   514  // Leave shuts down Cluster and removes current state.
   515  func (c *Cluster) Leave(force bool) error {
   516  	c.Lock()
   517  	node := c.node
   518  	if node == nil {
   519  		c.Unlock()
   520  		return ErrNoSwarm
   521  	}
   522  
   523  	if node.Manager() != nil && !force {
   524  		msg := "You are attempting to leave the swarm on a node that is participating as a manager. "
   525  		if c.isActiveManager() {
   526  			active, reachable, unreachable, err := c.managerStats()
   527  			if err == nil {
   528  				if active && reachable-2 <= unreachable {
   529  					if reachable == 1 && unreachable == 0 {
   530  						msg += "Removing the last manager erases all current state of the swarm. Use `--force` to ignore this message. "
   531  						c.Unlock()
   532  						return fmt.Errorf(msg)
   533  					}
   534  					msg += fmt.Sprintf("Removing this node leaves %v managers out of %v. Without a Raft quorum your swarm will be inaccessible. ", reachable-1, reachable+unreachable)
   535  				}
   536  			}
   537  		} else {
   538  			msg += "Doing so may lose the consensus of your cluster. "
   539  		}
   540  
   541  		msg += "The only way to restore a swarm that has lost consensus is to reinitialize it with `--force-new-cluster`. Use `--force` to suppress this message."
   542  		c.Unlock()
   543  		return fmt.Errorf(msg)
   544  	}
   545  	if err := c.stopNode(); err != nil {
   546  		c.Unlock()
   547  		return err
   548  	}
   549  	c.Unlock()
   550  	if nodeID := node.NodeID(); nodeID != "" {
   551  		for _, id := range c.config.Backend.ListContainersForNode(nodeID) {
   552  			if err := c.config.Backend.ContainerRm(id, &apitypes.ContainerRmConfig{ForceRemove: true}); err != nil {
   553  				logrus.Errorf("error removing %v: %v", id, err)
   554  			}
   555  		}
   556  	}
   557  	c.configEvent <- struct{}{}
   558  	// todo: cleanup optional?
   559  	if err := c.clearState(); err != nil {
   560  		return err
   561  	}
   562  	return nil
   563  }
   564  
   565  func (c *Cluster) clearState() error {
   566  	// todo: backup this data instead of removing?
   567  	if err := os.RemoveAll(c.root); err != nil {
   568  		return err
   569  	}
   570  	if err := os.MkdirAll(c.root, 0700); err != nil {
   571  		return err
   572  	}
   573  	c.config.Backend.SetClusterProvider(nil)
   574  	return nil
   575  }
   576  
   577  func (c *Cluster) getRequestContext() (context.Context, func()) { // TODO: not needed when requests don't block on qourum lost
   578  	return context.WithTimeout(context.Background(), swarmRequestTimeout)
   579  }
   580  
   581  // Inspect retrieves the configuration properties of a managed swarm cluster.
   582  func (c *Cluster) Inspect() (types.Swarm, error) {
   583  	c.RLock()
   584  	defer c.RUnlock()
   585  
   586  	if !c.isActiveManager() {
   587  		return types.Swarm{}, c.errNoManager()
   588  	}
   589  
   590  	ctx, cancel := c.getRequestContext()
   591  	defer cancel()
   592  
   593  	swarm, err := getSwarm(ctx, c.client)
   594  	if err != nil {
   595  		return types.Swarm{}, err
   596  	}
   597  
   598  	if err != nil {
   599  		return types.Swarm{}, err
   600  	}
   601  
   602  	return convert.SwarmFromGRPC(*swarm), nil
   603  }
   604  
   605  // Update updates configuration of a managed swarm cluster.
   606  func (c *Cluster) Update(version uint64, spec types.Spec, flags types.UpdateFlags) error {
   607  	c.RLock()
   608  	defer c.RUnlock()
   609  
   610  	if !c.isActiveManager() {
   611  		return c.errNoManager()
   612  	}
   613  
   614  	ctx, cancel := c.getRequestContext()
   615  	defer cancel()
   616  
   617  	swarm, err := getSwarm(ctx, c.client)
   618  	if err != nil {
   619  		return err
   620  	}
   621  
   622  	swarmSpec, err := convert.SwarmSpecToGRPC(spec)
   623  	if err != nil {
   624  		return err
   625  	}
   626  
   627  	_, err = c.client.UpdateCluster(
   628  		ctx,
   629  		&swarmapi.UpdateClusterRequest{
   630  			ClusterID: swarm.ID,
   631  			Spec:      &swarmSpec,
   632  			ClusterVersion: &swarmapi.Version{
   633  				Index: version,
   634  			},
   635  			Rotation: swarmapi.JoinTokenRotation{
   636  				RotateWorkerToken:  flags.RotateWorkerToken,
   637  				RotateManagerToken: flags.RotateManagerToken,
   638  			},
   639  		},
   640  	)
   641  	return err
   642  }
   643  
   644  // IsManager returns true if Cluster is participating as a manager.
   645  func (c *Cluster) IsManager() bool {
   646  	c.RLock()
   647  	defer c.RUnlock()
   648  	return c.isActiveManager()
   649  }
   650  
   651  // IsAgent returns true if Cluster is participating as a worker/agent.
   652  func (c *Cluster) IsAgent() bool {
   653  	c.RLock()
   654  	defer c.RUnlock()
   655  	return c.node != nil && c.ready
   656  }
   657  
   658  // GetLocalAddress returns the local address.
   659  func (c *Cluster) GetLocalAddress() string {
   660  	c.RLock()
   661  	defer c.RUnlock()
   662  	return c.actualLocalAddr
   663  }
   664  
   665  // GetListenAddress returns the listen address.
   666  func (c *Cluster) GetListenAddress() string {
   667  	c.RLock()
   668  	defer c.RUnlock()
   669  	return c.listenAddr
   670  }
   671  
   672  // GetAdvertiseAddress returns the remotely reachable address of this node.
   673  func (c *Cluster) GetAdvertiseAddress() string {
   674  	c.RLock()
   675  	defer c.RUnlock()
   676  	if c.advertiseAddr != "" {
   677  		advertiseHost, _, _ := net.SplitHostPort(c.advertiseAddr)
   678  		return advertiseHost
   679  	}
   680  	return c.actualLocalAddr
   681  }
   682  
   683  // GetRemoteAddress returns a known advertise address of a remote manager if
   684  // available.
   685  // todo: change to array/connect with info
   686  func (c *Cluster) GetRemoteAddress() string {
   687  	c.RLock()
   688  	defer c.RUnlock()
   689  	return c.getRemoteAddress()
   690  }
   691  
   692  func (c *Cluster) getRemoteAddress() string {
   693  	if c.node == nil {
   694  		return ""
   695  	}
   696  	nodeID := c.node.NodeID()
   697  	for _, r := range c.node.Remotes() {
   698  		if r.NodeID != nodeID {
   699  			return r.Addr
   700  		}
   701  	}
   702  	return ""
   703  }
   704  
   705  // ListenClusterEvents returns a channel that receives messages on cluster
   706  // participation changes.
   707  // todo: make cancelable and accessible to multiple callers
   708  func (c *Cluster) ListenClusterEvents() <-chan struct{} {
   709  	return c.configEvent
   710  }
   711  
   712  // Info returns information about the current cluster state.
   713  func (c *Cluster) Info() types.Info {
   714  	info := types.Info{
   715  		NodeAddr: c.GetAdvertiseAddress(),
   716  	}
   717  
   718  	c.RLock()
   719  	defer c.RUnlock()
   720  
   721  	if c.node == nil {
   722  		info.LocalNodeState = types.LocalNodeStateInactive
   723  		if c.cancelDelay != nil {
   724  			info.LocalNodeState = types.LocalNodeStateError
   725  		}
   726  	} else {
   727  		info.LocalNodeState = types.LocalNodeStatePending
   728  		if c.ready == true {
   729  			info.LocalNodeState = types.LocalNodeStateActive
   730  		}
   731  	}
   732  	if c.err != nil {
   733  		info.Error = c.err.Error()
   734  	}
   735  
   736  	ctx, cancel := c.getRequestContext()
   737  	defer cancel()
   738  
   739  	if c.isActiveManager() {
   740  		info.ControlAvailable = true
   741  		swarm, err := c.Inspect()
   742  		if err != nil {
   743  			info.Error = err.Error()
   744  		}
   745  
   746  		// Strip JoinTokens
   747  		info.Cluster = swarm.ClusterInfo
   748  
   749  		if r, err := c.client.ListNodes(ctx, &swarmapi.ListNodesRequest{}); err == nil {
   750  			info.Nodes = len(r.Nodes)
   751  			for _, n := range r.Nodes {
   752  				if n.ManagerStatus != nil {
   753  					info.Managers = info.Managers + 1
   754  				}
   755  			}
   756  		}
   757  	}
   758  
   759  	if c.node != nil {
   760  		for _, r := range c.node.Remotes() {
   761  			info.RemoteManagers = append(info.RemoteManagers, types.Peer{NodeID: r.NodeID, Addr: r.Addr})
   762  		}
   763  		info.NodeID = c.node.NodeID()
   764  	}
   765  
   766  	return info
   767  }
   768  
   769  // isActiveManager should not be called without a read lock
   770  func (c *Cluster) isActiveManager() bool {
   771  	return c.node != nil && c.conn != nil
   772  }
   773  
   774  // errNoManager returns error describing why manager commands can't be used.
   775  // Call with read lock.
   776  func (c *Cluster) errNoManager() error {
   777  	if c.node == nil {
   778  		return fmt.Errorf("This node is not a swarm manager. Use \"docker swarm init\" or \"docker swarm join\" to connect this node to swarm and try again.")
   779  	}
   780  	if c.node.Manager() != nil {
   781  		return fmt.Errorf("This node is not a swarm manager. Manager is being prepared or has trouble connecting to the cluster.")
   782  	}
   783  	return fmt.Errorf("This node is not a swarm manager. Worker nodes can't be used to view or modify cluster state. Please run this command on a manager node or promote the current node to a manager.")
   784  }
   785  
   786  // GetServices returns all services of a managed swarm cluster.
   787  func (c *Cluster) GetServices(options apitypes.ServiceListOptions) ([]types.Service, error) {
   788  	c.RLock()
   789  	defer c.RUnlock()
   790  
   791  	if !c.isActiveManager() {
   792  		return nil, c.errNoManager()
   793  	}
   794  
   795  	filters, err := newListServicesFilters(options.Filter)
   796  	if err != nil {
   797  		return nil, err
   798  	}
   799  	ctx, cancel := c.getRequestContext()
   800  	defer cancel()
   801  
   802  	r, err := c.client.ListServices(
   803  		ctx,
   804  		&swarmapi.ListServicesRequest{Filters: filters})
   805  	if err != nil {
   806  		return nil, err
   807  	}
   808  
   809  	services := []types.Service{}
   810  
   811  	for _, service := range r.Services {
   812  		services = append(services, convert.ServiceFromGRPC(*service))
   813  	}
   814  
   815  	return services, nil
   816  }
   817  
   818  // CreateService creates a new service in a managed swarm cluster.
   819  func (c *Cluster) CreateService(s types.ServiceSpec, encodedAuth string) (string, error) {
   820  	c.RLock()
   821  	defer c.RUnlock()
   822  
   823  	if !c.isActiveManager() {
   824  		return "", c.errNoManager()
   825  	}
   826  
   827  	ctx, cancel := c.getRequestContext()
   828  	defer cancel()
   829  
   830  	err := c.populateNetworkID(ctx, c.client, &s)
   831  	if err != nil {
   832  		return "", err
   833  	}
   834  
   835  	serviceSpec, err := convert.ServiceSpecToGRPC(s)
   836  	if err != nil {
   837  		return "", err
   838  	}
   839  
   840  	if encodedAuth != "" {
   841  		ctnr := serviceSpec.Task.GetContainer()
   842  		if ctnr == nil {
   843  			return "", fmt.Errorf("service does not use container tasks")
   844  		}
   845  		ctnr.PullOptions = &swarmapi.ContainerSpec_PullOptions{RegistryAuth: encodedAuth}
   846  	}
   847  
   848  	r, err := c.client.CreateService(ctx, &swarmapi.CreateServiceRequest{Spec: &serviceSpec})
   849  	if err != nil {
   850  		return "", err
   851  	}
   852  
   853  	return r.Service.ID, nil
   854  }
   855  
   856  // GetService returns a service based on an ID or name.
   857  func (c *Cluster) GetService(input string) (types.Service, error) {
   858  	c.RLock()
   859  	defer c.RUnlock()
   860  
   861  	if !c.isActiveManager() {
   862  		return types.Service{}, c.errNoManager()
   863  	}
   864  
   865  	ctx, cancel := c.getRequestContext()
   866  	defer cancel()
   867  
   868  	service, err := getService(ctx, c.client, input)
   869  	if err != nil {
   870  		return types.Service{}, err
   871  	}
   872  	return convert.ServiceFromGRPC(*service), nil
   873  }
   874  
   875  // UpdateService updates existing service to match new properties.
   876  func (c *Cluster) UpdateService(serviceID string, version uint64, spec types.ServiceSpec, encodedAuth string) error {
   877  	c.RLock()
   878  	defer c.RUnlock()
   879  
   880  	if !c.isActiveManager() {
   881  		return c.errNoManager()
   882  	}
   883  
   884  	ctx, cancel := c.getRequestContext()
   885  	defer cancel()
   886  
   887  	err := c.populateNetworkID(ctx, c.client, &spec)
   888  	if err != nil {
   889  		return err
   890  	}
   891  
   892  	serviceSpec, err := convert.ServiceSpecToGRPC(spec)
   893  	if err != nil {
   894  		return err
   895  	}
   896  
   897  	if encodedAuth != "" {
   898  		ctnr := serviceSpec.Task.GetContainer()
   899  		if ctnr == nil {
   900  			return fmt.Errorf("service does not use container tasks")
   901  		}
   902  		ctnr.PullOptions = &swarmapi.ContainerSpec_PullOptions{RegistryAuth: encodedAuth}
   903  	} else {
   904  		// this is needed because if the encodedAuth isn't being updated then we
   905  		// shouldn't lose it, and continue to use the one that was already present
   906  		currentService, err := getService(ctx, c.client, serviceID)
   907  		if err != nil {
   908  			return err
   909  		}
   910  		ctnr := currentService.Spec.Task.GetContainer()
   911  		if ctnr == nil {
   912  			return fmt.Errorf("service does not use container tasks")
   913  		}
   914  		serviceSpec.Task.GetContainer().PullOptions = ctnr.PullOptions
   915  	}
   916  
   917  	_, err = c.client.UpdateService(
   918  		ctx,
   919  		&swarmapi.UpdateServiceRequest{
   920  			ServiceID: serviceID,
   921  			Spec:      &serviceSpec,
   922  			ServiceVersion: &swarmapi.Version{
   923  				Index: version,
   924  			},
   925  		},
   926  	)
   927  	return err
   928  }
   929  
   930  // RemoveService removes a service from a managed swarm cluster.
   931  func (c *Cluster) RemoveService(input string) error {
   932  	c.RLock()
   933  	defer c.RUnlock()
   934  
   935  	if !c.isActiveManager() {
   936  		return c.errNoManager()
   937  	}
   938  
   939  	ctx, cancel := c.getRequestContext()
   940  	defer cancel()
   941  
   942  	service, err := getService(ctx, c.client, input)
   943  	if err != nil {
   944  		return err
   945  	}
   946  
   947  	if _, err := c.client.RemoveService(ctx, &swarmapi.RemoveServiceRequest{ServiceID: service.ID}); err != nil {
   948  		return err
   949  	}
   950  	return nil
   951  }
   952  
   953  // GetNodes returns a list of all nodes known to a cluster.
   954  func (c *Cluster) GetNodes(options apitypes.NodeListOptions) ([]types.Node, error) {
   955  	c.RLock()
   956  	defer c.RUnlock()
   957  
   958  	if !c.isActiveManager() {
   959  		return nil, c.errNoManager()
   960  	}
   961  
   962  	filters, err := newListNodesFilters(options.Filter)
   963  	if err != nil {
   964  		return nil, err
   965  	}
   966  
   967  	ctx, cancel := c.getRequestContext()
   968  	defer cancel()
   969  
   970  	r, err := c.client.ListNodes(
   971  		ctx,
   972  		&swarmapi.ListNodesRequest{Filters: filters})
   973  	if err != nil {
   974  		return nil, err
   975  	}
   976  
   977  	nodes := []types.Node{}
   978  
   979  	for _, node := range r.Nodes {
   980  		nodes = append(nodes, convert.NodeFromGRPC(*node))
   981  	}
   982  	return nodes, nil
   983  }
   984  
   985  // GetNode returns a node based on an ID or name.
   986  func (c *Cluster) GetNode(input string) (types.Node, error) {
   987  	c.RLock()
   988  	defer c.RUnlock()
   989  
   990  	if !c.isActiveManager() {
   991  		return types.Node{}, c.errNoManager()
   992  	}
   993  
   994  	ctx, cancel := c.getRequestContext()
   995  	defer cancel()
   996  
   997  	node, err := getNode(ctx, c.client, input)
   998  	if err != nil {
   999  		return types.Node{}, err
  1000  	}
  1001  	return convert.NodeFromGRPC(*node), nil
  1002  }
  1003  
  1004  // UpdateNode updates existing nodes properties.
  1005  func (c *Cluster) UpdateNode(nodeID string, version uint64, spec types.NodeSpec) error {
  1006  	c.RLock()
  1007  	defer c.RUnlock()
  1008  
  1009  	if !c.isActiveManager() {
  1010  		return c.errNoManager()
  1011  	}
  1012  
  1013  	nodeSpec, err := convert.NodeSpecToGRPC(spec)
  1014  	if err != nil {
  1015  		return err
  1016  	}
  1017  
  1018  	ctx, cancel := c.getRequestContext()
  1019  	defer cancel()
  1020  
  1021  	_, err = c.client.UpdateNode(
  1022  		ctx,
  1023  		&swarmapi.UpdateNodeRequest{
  1024  			NodeID: nodeID,
  1025  			Spec:   &nodeSpec,
  1026  			NodeVersion: &swarmapi.Version{
  1027  				Index: version,
  1028  			},
  1029  		},
  1030  	)
  1031  	return err
  1032  }
  1033  
  1034  // RemoveNode removes a node from a cluster
  1035  func (c *Cluster) RemoveNode(input string, force bool) error {
  1036  	c.RLock()
  1037  	defer c.RUnlock()
  1038  
  1039  	if !c.isActiveManager() {
  1040  		return c.errNoManager()
  1041  	}
  1042  
  1043  	ctx, cancel := c.getRequestContext()
  1044  	defer cancel()
  1045  
  1046  	node, err := getNode(ctx, c.client, input)
  1047  	if err != nil {
  1048  		return err
  1049  	}
  1050  
  1051  	if _, err := c.client.RemoveNode(ctx, &swarmapi.RemoveNodeRequest{NodeID: node.ID, Force: force}); err != nil {
  1052  		return err
  1053  	}
  1054  	return nil
  1055  }
  1056  
  1057  // GetTasks returns a list of tasks matching the filter options.
  1058  func (c *Cluster) GetTasks(options apitypes.TaskListOptions) ([]types.Task, error) {
  1059  	c.RLock()
  1060  	defer c.RUnlock()
  1061  
  1062  	if !c.isActiveManager() {
  1063  		return nil, c.errNoManager()
  1064  	}
  1065  
  1066  	byName := func(filter filters.Args) error {
  1067  		if filter.Include("service") {
  1068  			serviceFilters := filter.Get("service")
  1069  			for _, serviceFilter := range serviceFilters {
  1070  				service, err := c.GetService(serviceFilter)
  1071  				if err != nil {
  1072  					return err
  1073  				}
  1074  				filter.Del("service", serviceFilter)
  1075  				filter.Add("service", service.ID)
  1076  			}
  1077  		}
  1078  		if filter.Include("node") {
  1079  			nodeFilters := filter.Get("node")
  1080  			for _, nodeFilter := range nodeFilters {
  1081  				node, err := c.GetNode(nodeFilter)
  1082  				if err != nil {
  1083  					return err
  1084  				}
  1085  				filter.Del("node", nodeFilter)
  1086  				filter.Add("node", node.ID)
  1087  			}
  1088  		}
  1089  		return nil
  1090  	}
  1091  
  1092  	filters, err := newListTasksFilters(options.Filter, byName)
  1093  	if err != nil {
  1094  		return nil, err
  1095  	}
  1096  
  1097  	ctx, cancel := c.getRequestContext()
  1098  	defer cancel()
  1099  
  1100  	r, err := c.client.ListTasks(
  1101  		ctx,
  1102  		&swarmapi.ListTasksRequest{Filters: filters})
  1103  	if err != nil {
  1104  		return nil, err
  1105  	}
  1106  
  1107  	tasks := []types.Task{}
  1108  
  1109  	for _, task := range r.Tasks {
  1110  		tasks = append(tasks, convert.TaskFromGRPC(*task))
  1111  	}
  1112  	return tasks, nil
  1113  }
  1114  
  1115  // GetTask returns a task by an ID.
  1116  func (c *Cluster) GetTask(input string) (types.Task, error) {
  1117  	c.RLock()
  1118  	defer c.RUnlock()
  1119  
  1120  	if !c.isActiveManager() {
  1121  		return types.Task{}, c.errNoManager()
  1122  	}
  1123  
  1124  	ctx, cancel := c.getRequestContext()
  1125  	defer cancel()
  1126  
  1127  	task, err := getTask(ctx, c.client, input)
  1128  	if err != nil {
  1129  		return types.Task{}, err
  1130  	}
  1131  	return convert.TaskFromGRPC(*task), nil
  1132  }
  1133  
  1134  // GetNetwork returns a cluster network by an ID.
  1135  func (c *Cluster) GetNetwork(input string) (apitypes.NetworkResource, error) {
  1136  	c.RLock()
  1137  	defer c.RUnlock()
  1138  
  1139  	if !c.isActiveManager() {
  1140  		return apitypes.NetworkResource{}, c.errNoManager()
  1141  	}
  1142  
  1143  	ctx, cancel := c.getRequestContext()
  1144  	defer cancel()
  1145  
  1146  	network, err := getNetwork(ctx, c.client, input)
  1147  	if err != nil {
  1148  		return apitypes.NetworkResource{}, err
  1149  	}
  1150  	return convert.BasicNetworkFromGRPC(*network), nil
  1151  }
  1152  
  1153  // GetNetworks returns all current cluster managed networks.
  1154  func (c *Cluster) GetNetworks() ([]apitypes.NetworkResource, error) {
  1155  	c.RLock()
  1156  	defer c.RUnlock()
  1157  
  1158  	if !c.isActiveManager() {
  1159  		return nil, c.errNoManager()
  1160  	}
  1161  
  1162  	ctx, cancel := c.getRequestContext()
  1163  	defer cancel()
  1164  
  1165  	r, err := c.client.ListNetworks(ctx, &swarmapi.ListNetworksRequest{})
  1166  	if err != nil {
  1167  		return nil, err
  1168  	}
  1169  
  1170  	var networks []apitypes.NetworkResource
  1171  
  1172  	for _, network := range r.Networks {
  1173  		networks = append(networks, convert.BasicNetworkFromGRPC(*network))
  1174  	}
  1175  
  1176  	return networks, nil
  1177  }
  1178  
  1179  // CreateNetwork creates a new cluster managed network.
  1180  func (c *Cluster) CreateNetwork(s apitypes.NetworkCreateRequest) (string, error) {
  1181  	c.RLock()
  1182  	defer c.RUnlock()
  1183  
  1184  	if !c.isActiveManager() {
  1185  		return "", c.errNoManager()
  1186  	}
  1187  
  1188  	if runconfig.IsPreDefinedNetwork(s.Name) {
  1189  		err := fmt.Errorf("%s is a pre-defined network and cannot be created", s.Name)
  1190  		return "", errors.NewRequestForbiddenError(err)
  1191  	}
  1192  
  1193  	ctx, cancel := c.getRequestContext()
  1194  	defer cancel()
  1195  
  1196  	networkSpec := convert.BasicNetworkCreateToGRPC(s)
  1197  	r, err := c.client.CreateNetwork(ctx, &swarmapi.CreateNetworkRequest{Spec: &networkSpec})
  1198  	if err != nil {
  1199  		return "", err
  1200  	}
  1201  
  1202  	return r.Network.ID, nil
  1203  }
  1204  
  1205  // RemoveNetwork removes a cluster network.
  1206  func (c *Cluster) RemoveNetwork(input string) error {
  1207  	c.RLock()
  1208  	defer c.RUnlock()
  1209  
  1210  	if !c.isActiveManager() {
  1211  		return c.errNoManager()
  1212  	}
  1213  
  1214  	ctx, cancel := c.getRequestContext()
  1215  	defer cancel()
  1216  
  1217  	network, err := getNetwork(ctx, c.client, input)
  1218  	if err != nil {
  1219  		return err
  1220  	}
  1221  
  1222  	if _, err := c.client.RemoveNetwork(ctx, &swarmapi.RemoveNetworkRequest{NetworkID: network.ID}); err != nil {
  1223  		return err
  1224  	}
  1225  	return nil
  1226  }
  1227  
  1228  func (c *Cluster) populateNetworkID(ctx context.Context, client swarmapi.ControlClient, s *types.ServiceSpec) error {
  1229  	for i, n := range s.Networks {
  1230  		apiNetwork, err := getNetwork(ctx, client, n.Target)
  1231  		if err != nil {
  1232  			if ln, _ := c.config.Backend.FindNetwork(n.Target); ln != nil && !ln.Info().Dynamic() {
  1233  				err = fmt.Errorf("network %s is not eligible for docker services", ln.Name())
  1234  				return errors.NewRequestForbiddenError(err)
  1235  			}
  1236  			return err
  1237  		}
  1238  		s.Networks[i].Target = apiNetwork.ID
  1239  	}
  1240  	return nil
  1241  }
  1242  
  1243  func getNetwork(ctx context.Context, c swarmapi.ControlClient, input string) (*swarmapi.Network, error) {
  1244  	// GetNetwork to match via full ID.
  1245  	rg, err := c.GetNetwork(ctx, &swarmapi.GetNetworkRequest{NetworkID: input})
  1246  	if err != nil {
  1247  		// If any error (including NotFound), ListNetworks to match via ID prefix and full name.
  1248  		rl, err := c.ListNetworks(ctx, &swarmapi.ListNetworksRequest{Filters: &swarmapi.ListNetworksRequest_Filters{Names: []string{input}}})
  1249  		if err != nil || len(rl.Networks) == 0 {
  1250  			rl, err = c.ListNetworks(ctx, &swarmapi.ListNetworksRequest{Filters: &swarmapi.ListNetworksRequest_Filters{IDPrefixes: []string{input}}})
  1251  		}
  1252  
  1253  		if err != nil {
  1254  			return nil, err
  1255  		}
  1256  
  1257  		if len(rl.Networks) == 0 {
  1258  			return nil, fmt.Errorf("network %s not found", input)
  1259  		}
  1260  
  1261  		if l := len(rl.Networks); l > 1 {
  1262  			return nil, fmt.Errorf("network %s is ambiguous (%d matches found)", input, l)
  1263  		}
  1264  
  1265  		return rl.Networks[0], nil
  1266  	}
  1267  	return rg.Network, nil
  1268  }
  1269  
  1270  // Cleanup stops active swarm node. This is run before daemon shutdown.
  1271  func (c *Cluster) Cleanup() {
  1272  	c.Lock()
  1273  	node := c.node
  1274  	if node == nil {
  1275  		c.Unlock()
  1276  		return
  1277  	}
  1278  	defer c.Unlock()
  1279  	if c.isActiveManager() {
  1280  		active, reachable, unreachable, err := c.managerStats()
  1281  		if err == nil {
  1282  			singlenode := active && reachable == 1 && unreachable == 0
  1283  			if active && !singlenode && reachable-2 <= unreachable {
  1284  				logrus.Errorf("Leaving cluster with %v managers left out of %v. Raft quorum will be lost.", reachable-1, reachable+unreachable)
  1285  			}
  1286  		}
  1287  	}
  1288  	c.stopNode()
  1289  }
  1290  
  1291  func (c *Cluster) managerStats() (current bool, reachable int, unreachable int, err error) {
  1292  	ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
  1293  	defer cancel()
  1294  	nodes, err := c.client.ListNodes(ctx, &swarmapi.ListNodesRequest{})
  1295  	if err != nil {
  1296  		return false, 0, 0, err
  1297  	}
  1298  	for _, n := range nodes.Nodes {
  1299  		if n.ManagerStatus != nil {
  1300  			if n.ManagerStatus.Reachability == swarmapi.RaftMemberStatus_REACHABLE {
  1301  				reachable++
  1302  				if n.ID == c.node.NodeID() {
  1303  					current = true
  1304  				}
  1305  			}
  1306  			if n.ManagerStatus.Reachability == swarmapi.RaftMemberStatus_UNREACHABLE {
  1307  				unreachable++
  1308  			}
  1309  		}
  1310  	}
  1311  	return
  1312  }
  1313  
  1314  func validateAndSanitizeInitRequest(req *types.InitRequest) error {
  1315  	var err error
  1316  	req.ListenAddr, err = validateAddr(req.ListenAddr)
  1317  	if err != nil {
  1318  		return fmt.Errorf("invalid ListenAddr %q: %v", req.ListenAddr, err)
  1319  	}
  1320  
  1321  	spec := &req.Spec
  1322  	// provide sane defaults instead of erroring
  1323  	if spec.Name == "" {
  1324  		spec.Name = "default"
  1325  	}
  1326  	if spec.Raft.SnapshotInterval == 0 {
  1327  		spec.Raft.SnapshotInterval = defaultSpec.Raft.SnapshotInterval
  1328  	}
  1329  	if spec.Raft.LogEntriesForSlowFollowers == 0 {
  1330  		spec.Raft.LogEntriesForSlowFollowers = defaultSpec.Raft.LogEntriesForSlowFollowers
  1331  	}
  1332  	if spec.Raft.ElectionTick == 0 {
  1333  		spec.Raft.ElectionTick = defaultSpec.Raft.ElectionTick
  1334  	}
  1335  	if spec.Raft.HeartbeatTick == 0 {
  1336  		spec.Raft.HeartbeatTick = defaultSpec.Raft.HeartbeatTick
  1337  	}
  1338  	if spec.Dispatcher.HeartbeatPeriod == 0 {
  1339  		spec.Dispatcher.HeartbeatPeriod = defaultSpec.Dispatcher.HeartbeatPeriod
  1340  	}
  1341  	if spec.CAConfig.NodeCertExpiry == 0 {
  1342  		spec.CAConfig.NodeCertExpiry = defaultSpec.CAConfig.NodeCertExpiry
  1343  	}
  1344  	if spec.Orchestration.TaskHistoryRetentionLimit == 0 {
  1345  		spec.Orchestration.TaskHistoryRetentionLimit = defaultSpec.Orchestration.TaskHistoryRetentionLimit
  1346  	}
  1347  	return nil
  1348  }
  1349  
  1350  func validateAndSanitizeJoinRequest(req *types.JoinRequest) error {
  1351  	var err error
  1352  	req.ListenAddr, err = validateAddr(req.ListenAddr)
  1353  	if err != nil {
  1354  		return fmt.Errorf("invalid ListenAddr %q: %v", req.ListenAddr, err)
  1355  	}
  1356  	if len(req.RemoteAddrs) == 0 {
  1357  		return fmt.Errorf("at least 1 RemoteAddr is required to join")
  1358  	}
  1359  	for i := range req.RemoteAddrs {
  1360  		req.RemoteAddrs[i], err = validateAddr(req.RemoteAddrs[i])
  1361  		if err != nil {
  1362  			return fmt.Errorf("invalid remoteAddr %q: %v", req.RemoteAddrs[i], err)
  1363  		}
  1364  	}
  1365  	return nil
  1366  }
  1367  
  1368  func validateAddr(addr string) (string, error) {
  1369  	if addr == "" {
  1370  		return addr, fmt.Errorf("invalid empty address")
  1371  	}
  1372  	newaddr, err := opts.ParseTCPAddr(addr, defaultAddr)
  1373  	if err != nil {
  1374  		return addr, nil
  1375  	}
  1376  	return strings.TrimPrefix(newaddr, "tcp://"), nil
  1377  }
  1378  
  1379  func initClusterSpec(node *node, spec types.Spec) error {
  1380  	ctx, _ := context.WithTimeout(context.Background(), 5*time.Second)
  1381  	for conn := range node.ListenControlSocket(ctx) {
  1382  		if ctx.Err() != nil {
  1383  			return ctx.Err()
  1384  		}
  1385  		if conn != nil {
  1386  			client := swarmapi.NewControlClient(conn)
  1387  			var cluster *swarmapi.Cluster
  1388  			for i := 0; ; i++ {
  1389  				lcr, err := client.ListClusters(ctx, &swarmapi.ListClustersRequest{})
  1390  				if err != nil {
  1391  					return fmt.Errorf("error on listing clusters: %v", err)
  1392  				}
  1393  				if len(lcr.Clusters) == 0 {
  1394  					if i < 10 {
  1395  						time.Sleep(200 * time.Millisecond)
  1396  						continue
  1397  					}
  1398  					return fmt.Errorf("empty list of clusters was returned")
  1399  				}
  1400  				cluster = lcr.Clusters[0]
  1401  				break
  1402  			}
  1403  			newspec, err := convert.SwarmSpecToGRPC(spec)
  1404  			if err != nil {
  1405  				return fmt.Errorf("error updating cluster settings: %v", err)
  1406  			}
  1407  			_, err = client.UpdateCluster(ctx, &swarmapi.UpdateClusterRequest{
  1408  				ClusterID:      cluster.ID,
  1409  				ClusterVersion: &cluster.Meta.Version,
  1410  				Spec:           &newspec,
  1411  			})
  1412  			if err != nil {
  1413  				return fmt.Errorf("error updating cluster settings: %v", err)
  1414  			}
  1415  			return nil
  1416  		}
  1417  	}
  1418  	return ctx.Err()
  1419  }