github.com/portworx/docker@v1.12.1/daemon/cluster/cluster.go (about)

     1  package cluster
     2  
     3  import (
     4  	"encoding/json"
     5  	"fmt"
     6  	"io/ioutil"
     7  	"net"
     8  	"os"
     9  	"path/filepath"
    10  	"strings"
    11  	"sync"
    12  	"time"
    13  
    14  	"google.golang.org/grpc"
    15  
    16  	"github.com/Sirupsen/logrus"
    17  	"github.com/docker/docker/daemon/cluster/convert"
    18  	executorpkg "github.com/docker/docker/daemon/cluster/executor"
    19  	"github.com/docker/docker/daemon/cluster/executor/container"
    20  	"github.com/docker/docker/errors"
    21  	"github.com/docker/docker/opts"
    22  	"github.com/docker/docker/pkg/ioutils"
    23  	"github.com/docker/docker/runconfig"
    24  	apitypes "github.com/docker/engine-api/types"
    25  	"github.com/docker/engine-api/types/filters"
    26  	types "github.com/docker/engine-api/types/swarm"
    27  	swarmagent "github.com/docker/swarmkit/agent"
    28  	swarmapi "github.com/docker/swarmkit/api"
    29  	"golang.org/x/net/context"
    30  )
    31  
    32  const swarmDirName = "swarm"
    33  const controlSocket = "control.sock"
    34  const swarmConnectTimeout = 20 * time.Second
    35  const swarmRequestTimeout = 20 * time.Second
    36  const stateFile = "docker-state.json"
    37  const defaultAddr = "0.0.0.0:2377"
    38  
    39  const (
    40  	initialReconnectDelay = 100 * time.Millisecond
    41  	maxReconnectDelay     = 30 * time.Second
    42  )
    43  
    44  // ErrNoSwarm is returned on leaving a cluster that was never initialized
    45  var ErrNoSwarm = fmt.Errorf("This node is not part of a swarm")
    46  
    47  // ErrSwarmExists is returned on initialize or join request for a cluster that has already been activated
    48  var ErrSwarmExists = fmt.Errorf("This node is already part of a swarm. Use \"docker swarm leave\" to leave this swarm and join another one.")
    49  
    50  // ErrPendingSwarmExists is returned on initialize or join request for a cluster that is already processing a similar request but has not succeeded yet.
    51  var ErrPendingSwarmExists = fmt.Errorf("This node is processing an existing join request that has not succeeded yet. Use \"docker swarm leave\" to cancel the current request.")
    52  
    53  // ErrSwarmJoinTimeoutReached is returned when cluster join could not complete before timeout was reached.
    54  var ErrSwarmJoinTimeoutReached = fmt.Errorf("Timeout was reached before node was joined. The attempt to join the swarm will continue in the background. Use the \"docker info\" command to see the current swarm status of your node.")
    55  
    56  // defaultSpec contains some sane defaults if cluster options are missing on init
    57  var defaultSpec = types.Spec{
    58  	Raft: types.RaftConfig{
    59  		SnapshotInterval:           10000,
    60  		KeepOldSnapshots:           0,
    61  		LogEntriesForSlowFollowers: 500,
    62  		HeartbeatTick:              1,
    63  		ElectionTick:               3,
    64  	},
    65  	CAConfig: types.CAConfig{
    66  		NodeCertExpiry: 90 * 24 * time.Hour,
    67  	},
    68  	Dispatcher: types.DispatcherConfig{
    69  		HeartbeatPeriod: uint64((5 * time.Second).Nanoseconds()),
    70  	},
    71  	Orchestration: types.OrchestrationConfig{
    72  		TaskHistoryRetentionLimit: 10,
    73  	},
    74  }
    75  
    76  type state struct {
    77  	// LocalAddr is this machine's local IP or hostname, if specified.
    78  	LocalAddr string
    79  	// RemoteAddr is the address that was given to "swarm join. It is used
    80  	// to find LocalAddr if necessary.
    81  	RemoteAddr string
    82  	// ListenAddr is the address we bind to, including a port.
    83  	ListenAddr string
    84  	// AdvertiseAddr is the address other nodes should connect to,
    85  	// including a port.
    86  	AdvertiseAddr string
    87  }
    88  
    89  // NetworkSubnetsProvider exposes functions for retrieving the subnets
    90  // of networks managed by Docker, so they can be filtered.
    91  type NetworkSubnetsProvider interface {
    92  	V4Subnets() []net.IPNet
    93  	V6Subnets() []net.IPNet
    94  }
    95  
    96  // Config provides values for Cluster.
    97  type Config struct {
    98  	Root                   string
    99  	Name                   string
   100  	Backend                executorpkg.Backend
   101  	NetworkSubnetsProvider NetworkSubnetsProvider
   102  
   103  	// DefaultAdvertiseAddr is the default host/IP or network interface to use
   104  	// if no AdvertiseAddr value is specified.
   105  	DefaultAdvertiseAddr string
   106  }
   107  
   108  // Cluster provides capabilities to participate in a cluster as a worker or a
   109  // manager.
   110  type Cluster struct {
   111  	sync.RWMutex
   112  	*node
   113  	root            string
   114  	config          Config
   115  	configEvent     chan struct{} // todo: make this array and goroutine safe
   116  	localAddr       string
   117  	actualLocalAddr string // after resolution, not persisted
   118  	remoteAddr      string
   119  	listenAddr      string
   120  	advertiseAddr   string
   121  	stop            bool
   122  	err             error
   123  	cancelDelay     func()
   124  }
   125  
   126  type node struct {
   127  	*swarmagent.Node
   128  	done           chan struct{}
   129  	ready          bool
   130  	conn           *grpc.ClientConn
   131  	client         swarmapi.ControlClient
   132  	reconnectDelay time.Duration
   133  }
   134  
   135  // New creates a new Cluster instance using provided config.
   136  func New(config Config) (*Cluster, error) {
   137  	root := filepath.Join(config.Root, swarmDirName)
   138  	if err := os.MkdirAll(root, 0700); err != nil {
   139  		return nil, err
   140  	}
   141  	c := &Cluster{
   142  		root:        root,
   143  		config:      config,
   144  		configEvent: make(chan struct{}, 10),
   145  	}
   146  
   147  	st, err := c.loadState()
   148  	if err != nil {
   149  		if os.IsNotExist(err) {
   150  			return c, nil
   151  		}
   152  		return nil, err
   153  	}
   154  
   155  	n, err := c.startNewNode(false, st.LocalAddr, st.RemoteAddr, st.ListenAddr, st.AdvertiseAddr, "", "")
   156  	if err != nil {
   157  		return nil, err
   158  	}
   159  
   160  	select {
   161  	case <-time.After(swarmConnectTimeout):
   162  		logrus.Errorf("swarm component could not be started before timeout was reached")
   163  	case <-n.Ready():
   164  	case <-n.done:
   165  		return nil, fmt.Errorf("swarm component could not be started: %v", c.err)
   166  	}
   167  	go c.reconnectOnFailure(n)
   168  	return c, nil
   169  }
   170  
   171  func (c *Cluster) loadState() (*state, error) {
   172  	dt, err := ioutil.ReadFile(filepath.Join(c.root, stateFile))
   173  	if err != nil {
   174  		return nil, err
   175  	}
   176  	// missing certificate means no actual state to restore from
   177  	if _, err := os.Stat(filepath.Join(c.root, "certificates/swarm-node.crt")); err != nil {
   178  		if os.IsNotExist(err) {
   179  			c.clearState()
   180  		}
   181  		return nil, err
   182  	}
   183  	var st state
   184  	if err := json.Unmarshal(dt, &st); err != nil {
   185  		return nil, err
   186  	}
   187  	return &st, nil
   188  }
   189  
   190  func (c *Cluster) saveState() error {
   191  	dt, err := json.Marshal(state{
   192  		LocalAddr:     c.localAddr,
   193  		RemoteAddr:    c.remoteAddr,
   194  		ListenAddr:    c.listenAddr,
   195  		AdvertiseAddr: c.advertiseAddr,
   196  	})
   197  	if err != nil {
   198  		return err
   199  	}
   200  	return ioutils.AtomicWriteFile(filepath.Join(c.root, stateFile), dt, 0600)
   201  }
   202  
   203  func (c *Cluster) reconnectOnFailure(n *node) {
   204  	for {
   205  		<-n.done
   206  		c.Lock()
   207  		if c.stop || c.node != nil {
   208  			c.Unlock()
   209  			return
   210  		}
   211  		n.reconnectDelay *= 2
   212  		if n.reconnectDelay > maxReconnectDelay {
   213  			n.reconnectDelay = maxReconnectDelay
   214  		}
   215  		logrus.Warnf("Restarting swarm in %.2f seconds", n.reconnectDelay.Seconds())
   216  		delayCtx, cancel := context.WithTimeout(context.Background(), n.reconnectDelay)
   217  		c.cancelDelay = cancel
   218  		c.Unlock()
   219  		<-delayCtx.Done()
   220  		if delayCtx.Err() != context.DeadlineExceeded {
   221  			return
   222  		}
   223  		c.Lock()
   224  		if c.node != nil {
   225  			c.Unlock()
   226  			return
   227  		}
   228  		var err error
   229  		n, err = c.startNewNode(false, c.localAddr, c.getRemoteAddress(), c.listenAddr, c.advertiseAddr, c.getRemoteAddress(), "")
   230  		if err != nil {
   231  			c.err = err
   232  			close(n.done)
   233  		}
   234  		c.Unlock()
   235  	}
   236  }
   237  
   238  func (c *Cluster) startNewNode(forceNewCluster bool, localAddr, remoteAddr, listenAddr, advertiseAddr, joinAddr, joinToken string) (*node, error) {
   239  	if err := c.config.Backend.IsSwarmCompatible(); err != nil {
   240  		return nil, err
   241  	}
   242  
   243  	actualLocalAddr := localAddr
   244  	if actualLocalAddr == "" {
   245  		// If localAddr was not specified, resolve it automatically
   246  		// based on the route to joinAddr. localAddr can only be left
   247  		// empty on "join".
   248  		listenHost, _, err := net.SplitHostPort(listenAddr)
   249  		if err != nil {
   250  			return nil, fmt.Errorf("could not parse listen address: %v", err)
   251  		}
   252  
   253  		listenAddrIP := net.ParseIP(listenHost)
   254  		if listenAddrIP == nil || !listenAddrIP.IsUnspecified() {
   255  			actualLocalAddr = listenHost
   256  		} else {
   257  			if remoteAddr == "" {
   258  				// Should never happen except using swarms created by
   259  				// old versions that didn't save remoteAddr.
   260  				remoteAddr = "8.8.8.8:53"
   261  			}
   262  			conn, err := net.Dial("udp", remoteAddr)
   263  			if err != nil {
   264  				return nil, fmt.Errorf("could not find local IP address: %v", err)
   265  			}
   266  			localHostPort := conn.LocalAddr().String()
   267  			actualLocalAddr, _, _ = net.SplitHostPort(localHostPort)
   268  			conn.Close()
   269  		}
   270  	}
   271  
   272  	c.node = nil
   273  	c.cancelDelay = nil
   274  	c.stop = false
   275  	n, err := swarmagent.NewNode(&swarmagent.NodeConfig{
   276  		Hostname:           c.config.Name,
   277  		ForceNewCluster:    forceNewCluster,
   278  		ListenControlAPI:   filepath.Join(c.root, controlSocket),
   279  		ListenRemoteAPI:    listenAddr,
   280  		AdvertiseRemoteAPI: advertiseAddr,
   281  		JoinAddr:           joinAddr,
   282  		StateDir:           c.root,
   283  		JoinToken:          joinToken,
   284  		Executor:           container.NewExecutor(c.config.Backend),
   285  		HeartbeatTick:      1,
   286  		ElectionTick:       3,
   287  	})
   288  	if err != nil {
   289  		return nil, err
   290  	}
   291  	ctx := context.Background()
   292  	if err := n.Start(ctx); err != nil {
   293  		return nil, err
   294  	}
   295  	node := &node{
   296  		Node:           n,
   297  		done:           make(chan struct{}),
   298  		reconnectDelay: initialReconnectDelay,
   299  	}
   300  	c.node = node
   301  	c.localAddr = localAddr
   302  	c.actualLocalAddr = actualLocalAddr // not saved
   303  	c.remoteAddr = remoteAddr
   304  	c.listenAddr = listenAddr
   305  	c.advertiseAddr = advertiseAddr
   306  	c.saveState()
   307  
   308  	c.config.Backend.SetClusterProvider(c)
   309  	go func() {
   310  		err := n.Err(ctx)
   311  		if err != nil {
   312  			logrus.Errorf("cluster exited with error: %v", err)
   313  		}
   314  		c.Lock()
   315  		c.node = nil
   316  		c.err = err
   317  		c.Unlock()
   318  		close(node.done)
   319  	}()
   320  
   321  	go func() {
   322  		select {
   323  		case <-n.Ready():
   324  			c.Lock()
   325  			node.ready = true
   326  			c.err = nil
   327  			c.Unlock()
   328  		case <-ctx.Done():
   329  		}
   330  		c.configEvent <- struct{}{}
   331  	}()
   332  
   333  	go func() {
   334  		for conn := range n.ListenControlSocket(ctx) {
   335  			c.Lock()
   336  			if node.conn != conn {
   337  				if conn == nil {
   338  					node.client = nil
   339  				} else {
   340  					node.client = swarmapi.NewControlClient(conn)
   341  				}
   342  			}
   343  			node.conn = conn
   344  			c.Unlock()
   345  			c.configEvent <- struct{}{}
   346  		}
   347  	}()
   348  
   349  	return node, nil
   350  }
   351  
   352  // Init initializes new cluster from user provided request.
   353  func (c *Cluster) Init(req types.InitRequest) (string, error) {
   354  	c.Lock()
   355  	if node := c.node; node != nil {
   356  		if !req.ForceNewCluster {
   357  			c.Unlock()
   358  			return "", ErrSwarmExists
   359  		}
   360  		if err := c.stopNode(); err != nil {
   361  			c.Unlock()
   362  			return "", err
   363  		}
   364  	}
   365  
   366  	if err := validateAndSanitizeInitRequest(&req); err != nil {
   367  		c.Unlock()
   368  		return "", err
   369  	}
   370  
   371  	listenHost, listenPort, err := resolveListenAddr(req.ListenAddr)
   372  	if err != nil {
   373  		c.Unlock()
   374  		return "", err
   375  	}
   376  
   377  	advertiseHost, advertisePort, err := c.resolveAdvertiseAddr(req.AdvertiseAddr, listenPort)
   378  	if err != nil {
   379  		c.Unlock()
   380  		return "", err
   381  	}
   382  
   383  	localAddr := listenHost
   384  
   385  	// If the advertise address is not one of the system's
   386  	// addresses, we also require a listen address.
   387  	listenAddrIP := net.ParseIP(listenHost)
   388  	if listenAddrIP != nil && listenAddrIP.IsUnspecified() {
   389  		advertiseIP := net.ParseIP(advertiseHost)
   390  		if advertiseIP == nil {
   391  			// not an IP
   392  			c.Unlock()
   393  			return "", errMustSpecifyListenAddr
   394  		}
   395  
   396  		systemIPs := listSystemIPs()
   397  
   398  		found := false
   399  		for _, systemIP := range systemIPs {
   400  			if systemIP.Equal(advertiseIP) {
   401  				found = true
   402  				break
   403  			}
   404  		}
   405  		if !found {
   406  			c.Unlock()
   407  			return "", errMustSpecifyListenAddr
   408  		}
   409  		localAddr = advertiseIP.String()
   410  	}
   411  
   412  	// todo: check current state existing
   413  	n, err := c.startNewNode(req.ForceNewCluster, localAddr, "", net.JoinHostPort(listenHost, listenPort), net.JoinHostPort(advertiseHost, advertisePort), "", "")
   414  	if err != nil {
   415  		c.Unlock()
   416  		return "", err
   417  	}
   418  	c.Unlock()
   419  
   420  	select {
   421  	case <-n.Ready():
   422  		if err := initClusterSpec(n, req.Spec); err != nil {
   423  			return "", err
   424  		}
   425  		go c.reconnectOnFailure(n)
   426  		return n.NodeID(), nil
   427  	case <-n.done:
   428  		c.RLock()
   429  		defer c.RUnlock()
   430  		if !req.ForceNewCluster { // if failure on first attempt don't keep state
   431  			if err := c.clearState(); err != nil {
   432  				return "", err
   433  			}
   434  		}
   435  		return "", c.err
   436  	}
   437  }
   438  
   439  // Join makes current Cluster part of an existing swarm cluster.
   440  func (c *Cluster) Join(req types.JoinRequest) error {
   441  	c.Lock()
   442  	if node := c.node; node != nil {
   443  		c.Unlock()
   444  		return ErrSwarmExists
   445  	}
   446  	if err := validateAndSanitizeJoinRequest(&req); err != nil {
   447  		c.Unlock()
   448  		return err
   449  	}
   450  
   451  	listenHost, listenPort, err := resolveListenAddr(req.ListenAddr)
   452  	if err != nil {
   453  		c.Unlock()
   454  		return err
   455  	}
   456  
   457  	var advertiseAddr string
   458  	advertiseHost, advertisePort, err := c.resolveAdvertiseAddr(req.AdvertiseAddr, listenPort)
   459  	// For joining, we don't need to provide an advertise address,
   460  	// since the remote side can detect it.
   461  	if err == nil {
   462  		advertiseAddr = net.JoinHostPort(advertiseHost, advertisePort)
   463  	}
   464  
   465  	// todo: check current state existing
   466  	n, err := c.startNewNode(false, "", req.RemoteAddrs[0], net.JoinHostPort(listenHost, listenPort), advertiseAddr, req.RemoteAddrs[0], req.JoinToken)
   467  	if err != nil {
   468  		c.Unlock()
   469  		return err
   470  	}
   471  	c.Unlock()
   472  
   473  	select {
   474  	case <-time.After(swarmConnectTimeout):
   475  		// attempt to connect will continue in background, also reconnecting
   476  		go c.reconnectOnFailure(n)
   477  		return ErrSwarmJoinTimeoutReached
   478  	case <-n.Ready():
   479  		go c.reconnectOnFailure(n)
   480  		return nil
   481  	case <-n.done:
   482  		c.RLock()
   483  		defer c.RUnlock()
   484  		return c.err
   485  	}
   486  }
   487  
   488  // stopNode is a helper that stops the active c.node and waits until it has
   489  // shut down. Call while keeping the cluster lock.
   490  func (c *Cluster) stopNode() error {
   491  	if c.node == nil {
   492  		return nil
   493  	}
   494  	c.stop = true
   495  	if c.cancelDelay != nil {
   496  		c.cancelDelay()
   497  		c.cancelDelay = nil
   498  	}
   499  	node := c.node
   500  	ctx, cancel := context.WithTimeout(context.Background(), 15*time.Second)
   501  	defer cancel()
   502  	// TODO: can't hold lock on stop because it calls back to network
   503  	c.Unlock()
   504  	defer c.Lock()
   505  	if err := node.Stop(ctx); err != nil && !strings.Contains(err.Error(), "context canceled") {
   506  		return err
   507  	}
   508  	<-node.done
   509  	return nil
   510  }
   511  
   512  // Leave shuts down Cluster and removes current state.
   513  func (c *Cluster) Leave(force bool) error {
   514  	c.Lock()
   515  	node := c.node
   516  	if node == nil {
   517  		c.Unlock()
   518  		return ErrNoSwarm
   519  	}
   520  
   521  	if node.Manager() != nil && !force {
   522  		msg := "You are attempting to leave the swarm on a node that is participating as a manager. "
   523  		if c.isActiveManager() {
   524  			active, reachable, unreachable, err := c.managerStats()
   525  			if err == nil {
   526  				if active && reachable-2 <= unreachable {
   527  					if reachable == 1 && unreachable == 0 {
   528  						msg += "Removing the last manager erases all current state of the swarm. Use `--force` to ignore this message. "
   529  						c.Unlock()
   530  						return fmt.Errorf(msg)
   531  					}
   532  					msg += fmt.Sprintf("Removing this node leaves %v managers out of %v. Without a Raft quorum your swarm will be inaccessible. ", reachable-1, reachable+unreachable)
   533  				}
   534  			}
   535  		} else {
   536  			msg += "Doing so may lose the consensus of your cluster. "
   537  		}
   538  
   539  		msg += "The only way to restore a swarm that has lost consensus is to reinitialize it with `--force-new-cluster`. Use `--force` to suppress this message."
   540  		c.Unlock()
   541  		return fmt.Errorf(msg)
   542  	}
   543  	if err := c.stopNode(); err != nil {
   544  		c.Unlock()
   545  		return err
   546  	}
   547  	c.Unlock()
   548  	if nodeID := node.NodeID(); nodeID != "" {
   549  		for _, id := range c.config.Backend.ListContainersForNode(nodeID) {
   550  			if err := c.config.Backend.ContainerRm(id, &apitypes.ContainerRmConfig{ForceRemove: true}); err != nil {
   551  				logrus.Errorf("error removing %v: %v", id, err)
   552  			}
   553  		}
   554  	}
   555  	c.configEvent <- struct{}{}
   556  	// todo: cleanup optional?
   557  	if err := c.clearState(); err != nil {
   558  		return err
   559  	}
   560  	return nil
   561  }
   562  
   563  func (c *Cluster) clearState() error {
   564  	// todo: backup this data instead of removing?
   565  	if err := os.RemoveAll(c.root); err != nil {
   566  		return err
   567  	}
   568  	if err := os.MkdirAll(c.root, 0700); err != nil {
   569  		return err
   570  	}
   571  	c.config.Backend.SetClusterProvider(nil)
   572  	return nil
   573  }
   574  
   575  func (c *Cluster) getRequestContext() (context.Context, func()) { // TODO: not needed when requests don't block on qourum lost
   576  	return context.WithTimeout(context.Background(), swarmRequestTimeout)
   577  }
   578  
   579  // Inspect retrieves the configuration properties of a managed swarm cluster.
   580  func (c *Cluster) Inspect() (types.Swarm, error) {
   581  	c.RLock()
   582  	defer c.RUnlock()
   583  
   584  	if !c.isActiveManager() {
   585  		return types.Swarm{}, c.errNoManager()
   586  	}
   587  
   588  	ctx, cancel := c.getRequestContext()
   589  	defer cancel()
   590  
   591  	swarm, err := getSwarm(ctx, c.client)
   592  	if err != nil {
   593  		return types.Swarm{}, err
   594  	}
   595  
   596  	if err != nil {
   597  		return types.Swarm{}, err
   598  	}
   599  
   600  	return convert.SwarmFromGRPC(*swarm), nil
   601  }
   602  
   603  // Update updates configuration of a managed swarm cluster.
   604  func (c *Cluster) Update(version uint64, spec types.Spec, flags types.UpdateFlags) error {
   605  	c.RLock()
   606  	defer c.RUnlock()
   607  
   608  	if !c.isActiveManager() {
   609  		return c.errNoManager()
   610  	}
   611  
   612  	ctx, cancel := c.getRequestContext()
   613  	defer cancel()
   614  
   615  	swarm, err := getSwarm(ctx, c.client)
   616  	if err != nil {
   617  		return err
   618  	}
   619  
   620  	swarmSpec, err := convert.SwarmSpecToGRPC(spec)
   621  	if err != nil {
   622  		return err
   623  	}
   624  
   625  	_, err = c.client.UpdateCluster(
   626  		ctx,
   627  		&swarmapi.UpdateClusterRequest{
   628  			ClusterID: swarm.ID,
   629  			Spec:      &swarmSpec,
   630  			ClusterVersion: &swarmapi.Version{
   631  				Index: version,
   632  			},
   633  			Rotation: swarmapi.JoinTokenRotation{
   634  				RotateWorkerToken:  flags.RotateWorkerToken,
   635  				RotateManagerToken: flags.RotateManagerToken,
   636  			},
   637  		},
   638  	)
   639  	return err
   640  }
   641  
   642  // IsManager returns true if Cluster is participating as a manager.
   643  func (c *Cluster) IsManager() bool {
   644  	c.RLock()
   645  	defer c.RUnlock()
   646  	return c.isActiveManager()
   647  }
   648  
   649  // IsAgent returns true if Cluster is participating as a worker/agent.
   650  func (c *Cluster) IsAgent() bool {
   651  	c.RLock()
   652  	defer c.RUnlock()
   653  	return c.node != nil && c.ready
   654  }
   655  
   656  // GetLocalAddress returns the local address.
   657  func (c *Cluster) GetLocalAddress() string {
   658  	c.RLock()
   659  	defer c.RUnlock()
   660  	return c.actualLocalAddr
   661  }
   662  
   663  // GetAdvertiseAddress returns the remotely reachable address of this node.
   664  func (c *Cluster) GetAdvertiseAddress() string {
   665  	c.RLock()
   666  	defer c.RUnlock()
   667  	if c.advertiseAddr != "" {
   668  		advertiseHost, _, _ := net.SplitHostPort(c.advertiseAddr)
   669  		return advertiseHost
   670  	}
   671  	return c.actualLocalAddr
   672  }
   673  
   674  // GetRemoteAddress returns a known advertise address of a remote manager if
   675  // available.
   676  // todo: change to array/connect with info
   677  func (c *Cluster) GetRemoteAddress() string {
   678  	c.RLock()
   679  	defer c.RUnlock()
   680  	return c.getRemoteAddress()
   681  }
   682  
   683  func (c *Cluster) getRemoteAddress() string {
   684  	if c.node == nil {
   685  		return ""
   686  	}
   687  	nodeID := c.node.NodeID()
   688  	for _, r := range c.node.Remotes() {
   689  		if r.NodeID != nodeID {
   690  			return r.Addr
   691  		}
   692  	}
   693  	return ""
   694  }
   695  
   696  // ListenClusterEvents returns a channel that receives messages on cluster
   697  // participation changes.
   698  // todo: make cancelable and accessible to multiple callers
   699  func (c *Cluster) ListenClusterEvents() <-chan struct{} {
   700  	return c.configEvent
   701  }
   702  
   703  // Info returns information about the current cluster state.
   704  func (c *Cluster) Info() types.Info {
   705  	info := types.Info{
   706  		NodeAddr: c.GetAdvertiseAddress(),
   707  	}
   708  
   709  	c.RLock()
   710  	defer c.RUnlock()
   711  
   712  	if c.node == nil {
   713  		info.LocalNodeState = types.LocalNodeStateInactive
   714  		if c.cancelDelay != nil {
   715  			info.LocalNodeState = types.LocalNodeStateError
   716  		}
   717  	} else {
   718  		info.LocalNodeState = types.LocalNodeStatePending
   719  		if c.ready == true {
   720  			info.LocalNodeState = types.LocalNodeStateActive
   721  		}
   722  	}
   723  	if c.err != nil {
   724  		info.Error = c.err.Error()
   725  	}
   726  
   727  	ctx, cancel := c.getRequestContext()
   728  	defer cancel()
   729  
   730  	if c.isActiveManager() {
   731  		info.ControlAvailable = true
   732  		swarm, err := c.Inspect()
   733  		if err != nil {
   734  			info.Error = err.Error()
   735  		}
   736  
   737  		// Strip JoinTokens
   738  		info.Cluster = swarm.ClusterInfo
   739  
   740  		if r, err := c.client.ListNodes(ctx, &swarmapi.ListNodesRequest{}); err == nil {
   741  			info.Nodes = len(r.Nodes)
   742  			for _, n := range r.Nodes {
   743  				if n.ManagerStatus != nil {
   744  					info.Managers = info.Managers + 1
   745  				}
   746  			}
   747  		}
   748  	}
   749  
   750  	if c.node != nil {
   751  		for _, r := range c.node.Remotes() {
   752  			info.RemoteManagers = append(info.RemoteManagers, types.Peer{NodeID: r.NodeID, Addr: r.Addr})
   753  		}
   754  		info.NodeID = c.node.NodeID()
   755  	}
   756  
   757  	return info
   758  }
   759  
   760  // isActiveManager should not be called without a read lock
   761  func (c *Cluster) isActiveManager() bool {
   762  	return c.node != nil && c.conn != nil
   763  }
   764  
   765  // errNoManager returns error describing why manager commands can't be used.
   766  // Call with read lock.
   767  func (c *Cluster) errNoManager() error {
   768  	if c.node == nil {
   769  		return fmt.Errorf("This node is not a swarm manager. Use \"docker swarm init\" or \"docker swarm join\" to connect this node to swarm and try again.")
   770  	}
   771  	if c.node.Manager() != nil {
   772  		return fmt.Errorf("This node is not a swarm manager. Manager is being prepared or has trouble connecting to the cluster.")
   773  	}
   774  	return fmt.Errorf("This node is not a swarm manager. Worker nodes can't be used to view or modify cluster state. Please run this command on a manager node or promote the current node to a manager.")
   775  }
   776  
   777  // GetServices returns all services of a managed swarm cluster.
   778  func (c *Cluster) GetServices(options apitypes.ServiceListOptions) ([]types.Service, error) {
   779  	c.RLock()
   780  	defer c.RUnlock()
   781  
   782  	if !c.isActiveManager() {
   783  		return nil, c.errNoManager()
   784  	}
   785  
   786  	filters, err := newListServicesFilters(options.Filter)
   787  	if err != nil {
   788  		return nil, err
   789  	}
   790  	ctx, cancel := c.getRequestContext()
   791  	defer cancel()
   792  
   793  	r, err := c.client.ListServices(
   794  		ctx,
   795  		&swarmapi.ListServicesRequest{Filters: filters})
   796  	if err != nil {
   797  		return nil, err
   798  	}
   799  
   800  	services := []types.Service{}
   801  
   802  	for _, service := range r.Services {
   803  		services = append(services, convert.ServiceFromGRPC(*service))
   804  	}
   805  
   806  	return services, nil
   807  }
   808  
   809  // CreateService creates a new service in a managed swarm cluster.
   810  func (c *Cluster) CreateService(s types.ServiceSpec, encodedAuth string) (string, error) {
   811  	c.RLock()
   812  	defer c.RUnlock()
   813  
   814  	if !c.isActiveManager() {
   815  		return "", c.errNoManager()
   816  	}
   817  
   818  	ctx, cancel := c.getRequestContext()
   819  	defer cancel()
   820  
   821  	err := c.populateNetworkID(ctx, c.client, &s)
   822  	if err != nil {
   823  		return "", err
   824  	}
   825  
   826  	serviceSpec, err := convert.ServiceSpecToGRPC(s)
   827  	if err != nil {
   828  		return "", err
   829  	}
   830  
   831  	if encodedAuth != "" {
   832  		ctnr := serviceSpec.Task.GetContainer()
   833  		if ctnr == nil {
   834  			return "", fmt.Errorf("service does not use container tasks")
   835  		}
   836  		ctnr.PullOptions = &swarmapi.ContainerSpec_PullOptions{RegistryAuth: encodedAuth}
   837  	}
   838  
   839  	r, err := c.client.CreateService(ctx, &swarmapi.CreateServiceRequest{Spec: &serviceSpec})
   840  	if err != nil {
   841  		return "", err
   842  	}
   843  
   844  	return r.Service.ID, nil
   845  }
   846  
   847  // GetService returns a service based on an ID or name.
   848  func (c *Cluster) GetService(input string) (types.Service, error) {
   849  	c.RLock()
   850  	defer c.RUnlock()
   851  
   852  	if !c.isActiveManager() {
   853  		return types.Service{}, c.errNoManager()
   854  	}
   855  
   856  	ctx, cancel := c.getRequestContext()
   857  	defer cancel()
   858  
   859  	service, err := getService(ctx, c.client, input)
   860  	if err != nil {
   861  		return types.Service{}, err
   862  	}
   863  	return convert.ServiceFromGRPC(*service), nil
   864  }
   865  
   866  // UpdateService updates existing service to match new properties.
   867  func (c *Cluster) UpdateService(serviceID string, version uint64, spec types.ServiceSpec, encodedAuth string) error {
   868  	c.RLock()
   869  	defer c.RUnlock()
   870  
   871  	if !c.isActiveManager() {
   872  		return c.errNoManager()
   873  	}
   874  
   875  	ctx, cancel := c.getRequestContext()
   876  	defer cancel()
   877  
   878  	err := c.populateNetworkID(ctx, c.client, &spec)
   879  	if err != nil {
   880  		return err
   881  	}
   882  
   883  	serviceSpec, err := convert.ServiceSpecToGRPC(spec)
   884  	if err != nil {
   885  		return err
   886  	}
   887  
   888  	if encodedAuth != "" {
   889  		ctnr := serviceSpec.Task.GetContainer()
   890  		if ctnr == nil {
   891  			return fmt.Errorf("service does not use container tasks")
   892  		}
   893  		ctnr.PullOptions = &swarmapi.ContainerSpec_PullOptions{RegistryAuth: encodedAuth}
   894  	} else {
   895  		// this is needed because if the encodedAuth isn't being updated then we
   896  		// shouldn't lose it, and continue to use the one that was already present
   897  		currentService, err := getService(ctx, c.client, serviceID)
   898  		if err != nil {
   899  			return err
   900  		}
   901  		ctnr := currentService.Spec.Task.GetContainer()
   902  		if ctnr == nil {
   903  			return fmt.Errorf("service does not use container tasks")
   904  		}
   905  		serviceSpec.Task.GetContainer().PullOptions = ctnr.PullOptions
   906  	}
   907  
   908  	_, err = c.client.UpdateService(
   909  		ctx,
   910  		&swarmapi.UpdateServiceRequest{
   911  			ServiceID: serviceID,
   912  			Spec:      &serviceSpec,
   913  			ServiceVersion: &swarmapi.Version{
   914  				Index: version,
   915  			},
   916  		},
   917  	)
   918  	return err
   919  }
   920  
   921  // RemoveService removes a service from a managed swarm cluster.
   922  func (c *Cluster) RemoveService(input string) error {
   923  	c.RLock()
   924  	defer c.RUnlock()
   925  
   926  	if !c.isActiveManager() {
   927  		return c.errNoManager()
   928  	}
   929  
   930  	ctx, cancel := c.getRequestContext()
   931  	defer cancel()
   932  
   933  	service, err := getService(ctx, c.client, input)
   934  	if err != nil {
   935  		return err
   936  	}
   937  
   938  	if _, err := c.client.RemoveService(ctx, &swarmapi.RemoveServiceRequest{ServiceID: service.ID}); err != nil {
   939  		return err
   940  	}
   941  	return nil
   942  }
   943  
   944  // GetNodes returns a list of all nodes known to a cluster.
   945  func (c *Cluster) GetNodes(options apitypes.NodeListOptions) ([]types.Node, error) {
   946  	c.RLock()
   947  	defer c.RUnlock()
   948  
   949  	if !c.isActiveManager() {
   950  		return nil, c.errNoManager()
   951  	}
   952  
   953  	filters, err := newListNodesFilters(options.Filter)
   954  	if err != nil {
   955  		return nil, err
   956  	}
   957  
   958  	ctx, cancel := c.getRequestContext()
   959  	defer cancel()
   960  
   961  	r, err := c.client.ListNodes(
   962  		ctx,
   963  		&swarmapi.ListNodesRequest{Filters: filters})
   964  	if err != nil {
   965  		return nil, err
   966  	}
   967  
   968  	nodes := []types.Node{}
   969  
   970  	for _, node := range r.Nodes {
   971  		nodes = append(nodes, convert.NodeFromGRPC(*node))
   972  	}
   973  	return nodes, nil
   974  }
   975  
   976  // GetNode returns a node based on an ID or name.
   977  func (c *Cluster) GetNode(input string) (types.Node, error) {
   978  	c.RLock()
   979  	defer c.RUnlock()
   980  
   981  	if !c.isActiveManager() {
   982  		return types.Node{}, c.errNoManager()
   983  	}
   984  
   985  	ctx, cancel := c.getRequestContext()
   986  	defer cancel()
   987  
   988  	node, err := getNode(ctx, c.client, input)
   989  	if err != nil {
   990  		return types.Node{}, err
   991  	}
   992  	return convert.NodeFromGRPC(*node), nil
   993  }
   994  
   995  // UpdateNode updates existing nodes properties.
   996  func (c *Cluster) UpdateNode(nodeID string, version uint64, spec types.NodeSpec) error {
   997  	c.RLock()
   998  	defer c.RUnlock()
   999  
  1000  	if !c.isActiveManager() {
  1001  		return c.errNoManager()
  1002  	}
  1003  
  1004  	nodeSpec, err := convert.NodeSpecToGRPC(spec)
  1005  	if err != nil {
  1006  		return err
  1007  	}
  1008  
  1009  	ctx, cancel := c.getRequestContext()
  1010  	defer cancel()
  1011  
  1012  	_, err = c.client.UpdateNode(
  1013  		ctx,
  1014  		&swarmapi.UpdateNodeRequest{
  1015  			NodeID: nodeID,
  1016  			Spec:   &nodeSpec,
  1017  			NodeVersion: &swarmapi.Version{
  1018  				Index: version,
  1019  			},
  1020  		},
  1021  	)
  1022  	return err
  1023  }
  1024  
  1025  // RemoveNode removes a node from a cluster
  1026  func (c *Cluster) RemoveNode(input string, force bool) error {
  1027  	c.RLock()
  1028  	defer c.RUnlock()
  1029  
  1030  	if !c.isActiveManager() {
  1031  		return c.errNoManager()
  1032  	}
  1033  
  1034  	ctx, cancel := c.getRequestContext()
  1035  	defer cancel()
  1036  
  1037  	node, err := getNode(ctx, c.client, input)
  1038  	if err != nil {
  1039  		return err
  1040  	}
  1041  
  1042  	if _, err := c.client.RemoveNode(ctx, &swarmapi.RemoveNodeRequest{NodeID: node.ID, Force: force}); err != nil {
  1043  		return err
  1044  	}
  1045  	return nil
  1046  }
  1047  
  1048  // GetTasks returns a list of tasks matching the filter options.
  1049  func (c *Cluster) GetTasks(options apitypes.TaskListOptions) ([]types.Task, error) {
  1050  	c.RLock()
  1051  	defer c.RUnlock()
  1052  
  1053  	if !c.isActiveManager() {
  1054  		return nil, c.errNoManager()
  1055  	}
  1056  
  1057  	byName := func(filter filters.Args) error {
  1058  		if filter.Include("service") {
  1059  			serviceFilters := filter.Get("service")
  1060  			for _, serviceFilter := range serviceFilters {
  1061  				service, err := c.GetService(serviceFilter)
  1062  				if err != nil {
  1063  					return err
  1064  				}
  1065  				filter.Del("service", serviceFilter)
  1066  				filter.Add("service", service.ID)
  1067  			}
  1068  		}
  1069  		if filter.Include("node") {
  1070  			nodeFilters := filter.Get("node")
  1071  			for _, nodeFilter := range nodeFilters {
  1072  				node, err := c.GetNode(nodeFilter)
  1073  				if err != nil {
  1074  					return err
  1075  				}
  1076  				filter.Del("node", nodeFilter)
  1077  				filter.Add("node", node.ID)
  1078  			}
  1079  		}
  1080  		return nil
  1081  	}
  1082  
  1083  	filters, err := newListTasksFilters(options.Filter, byName)
  1084  	if err != nil {
  1085  		return nil, err
  1086  	}
  1087  
  1088  	ctx, cancel := c.getRequestContext()
  1089  	defer cancel()
  1090  
  1091  	r, err := c.client.ListTasks(
  1092  		ctx,
  1093  		&swarmapi.ListTasksRequest{Filters: filters})
  1094  	if err != nil {
  1095  		return nil, err
  1096  	}
  1097  
  1098  	tasks := []types.Task{}
  1099  
  1100  	for _, task := range r.Tasks {
  1101  		tasks = append(tasks, convert.TaskFromGRPC(*task))
  1102  	}
  1103  	return tasks, nil
  1104  }
  1105  
  1106  // GetTask returns a task by an ID.
  1107  func (c *Cluster) GetTask(input string) (types.Task, error) {
  1108  	c.RLock()
  1109  	defer c.RUnlock()
  1110  
  1111  	if !c.isActiveManager() {
  1112  		return types.Task{}, c.errNoManager()
  1113  	}
  1114  
  1115  	ctx, cancel := c.getRequestContext()
  1116  	defer cancel()
  1117  
  1118  	task, err := getTask(ctx, c.client, input)
  1119  	if err != nil {
  1120  		return types.Task{}, err
  1121  	}
  1122  	return convert.TaskFromGRPC(*task), nil
  1123  }
  1124  
  1125  // GetNetwork returns a cluster network by an ID.
  1126  func (c *Cluster) GetNetwork(input string) (apitypes.NetworkResource, error) {
  1127  	c.RLock()
  1128  	defer c.RUnlock()
  1129  
  1130  	if !c.isActiveManager() {
  1131  		return apitypes.NetworkResource{}, c.errNoManager()
  1132  	}
  1133  
  1134  	ctx, cancel := c.getRequestContext()
  1135  	defer cancel()
  1136  
  1137  	network, err := getNetwork(ctx, c.client, input)
  1138  	if err != nil {
  1139  		return apitypes.NetworkResource{}, err
  1140  	}
  1141  	return convert.BasicNetworkFromGRPC(*network), nil
  1142  }
  1143  
  1144  // GetNetworks returns all current cluster managed networks.
  1145  func (c *Cluster) GetNetworks() ([]apitypes.NetworkResource, error) {
  1146  	c.RLock()
  1147  	defer c.RUnlock()
  1148  
  1149  	if !c.isActiveManager() {
  1150  		return nil, c.errNoManager()
  1151  	}
  1152  
  1153  	ctx, cancel := c.getRequestContext()
  1154  	defer cancel()
  1155  
  1156  	r, err := c.client.ListNetworks(ctx, &swarmapi.ListNetworksRequest{})
  1157  	if err != nil {
  1158  		return nil, err
  1159  	}
  1160  
  1161  	var networks []apitypes.NetworkResource
  1162  
  1163  	for _, network := range r.Networks {
  1164  		networks = append(networks, convert.BasicNetworkFromGRPC(*network))
  1165  	}
  1166  
  1167  	return networks, nil
  1168  }
  1169  
  1170  // CreateNetwork creates a new cluster managed network.
  1171  func (c *Cluster) CreateNetwork(s apitypes.NetworkCreateRequest) (string, error) {
  1172  	c.RLock()
  1173  	defer c.RUnlock()
  1174  
  1175  	if !c.isActiveManager() {
  1176  		return "", c.errNoManager()
  1177  	}
  1178  
  1179  	if runconfig.IsPreDefinedNetwork(s.Name) {
  1180  		err := fmt.Errorf("%s is a pre-defined network and cannot be created", s.Name)
  1181  		return "", errors.NewRequestForbiddenError(err)
  1182  	}
  1183  
  1184  	ctx, cancel := c.getRequestContext()
  1185  	defer cancel()
  1186  
  1187  	networkSpec := convert.BasicNetworkCreateToGRPC(s)
  1188  	r, err := c.client.CreateNetwork(ctx, &swarmapi.CreateNetworkRequest{Spec: &networkSpec})
  1189  	if err != nil {
  1190  		return "", err
  1191  	}
  1192  
  1193  	return r.Network.ID, nil
  1194  }
  1195  
  1196  // RemoveNetwork removes a cluster network.
  1197  func (c *Cluster) RemoveNetwork(input string) error {
  1198  	c.RLock()
  1199  	defer c.RUnlock()
  1200  
  1201  	if !c.isActiveManager() {
  1202  		return c.errNoManager()
  1203  	}
  1204  
  1205  	ctx, cancel := c.getRequestContext()
  1206  	defer cancel()
  1207  
  1208  	network, err := getNetwork(ctx, c.client, input)
  1209  	if err != nil {
  1210  		return err
  1211  	}
  1212  
  1213  	if _, err := c.client.RemoveNetwork(ctx, &swarmapi.RemoveNetworkRequest{NetworkID: network.ID}); err != nil {
  1214  		return err
  1215  	}
  1216  	return nil
  1217  }
  1218  
  1219  func (c *Cluster) populateNetworkID(ctx context.Context, client swarmapi.ControlClient, s *types.ServiceSpec) error {
  1220  	for i, n := range s.Networks {
  1221  		apiNetwork, err := getNetwork(ctx, client, n.Target)
  1222  		if err != nil {
  1223  			if ln, _ := c.config.Backend.FindNetwork(n.Target); ln != nil && !ln.Info().Dynamic() {
  1224  				err = fmt.Errorf("network %s is not eligible for docker services", ln.Name())
  1225  				return errors.NewRequestForbiddenError(err)
  1226  			}
  1227  			return err
  1228  		}
  1229  		s.Networks[i].Target = apiNetwork.ID
  1230  	}
  1231  	return nil
  1232  }
  1233  
  1234  func getNetwork(ctx context.Context, c swarmapi.ControlClient, input string) (*swarmapi.Network, error) {
  1235  	// GetNetwork to match via full ID.
  1236  	rg, err := c.GetNetwork(ctx, &swarmapi.GetNetworkRequest{NetworkID: input})
  1237  	if err != nil {
  1238  		// If any error (including NotFound), ListNetworks to match via ID prefix and full name.
  1239  		rl, err := c.ListNetworks(ctx, &swarmapi.ListNetworksRequest{Filters: &swarmapi.ListNetworksRequest_Filters{Names: []string{input}}})
  1240  		if err != nil || len(rl.Networks) == 0 {
  1241  			rl, err = c.ListNetworks(ctx, &swarmapi.ListNetworksRequest{Filters: &swarmapi.ListNetworksRequest_Filters{IDPrefixes: []string{input}}})
  1242  		}
  1243  
  1244  		if err != nil {
  1245  			return nil, err
  1246  		}
  1247  
  1248  		if len(rl.Networks) == 0 {
  1249  			return nil, fmt.Errorf("network %s not found", input)
  1250  		}
  1251  
  1252  		if l := len(rl.Networks); l > 1 {
  1253  			return nil, fmt.Errorf("network %s is ambiguous (%d matches found)", input, l)
  1254  		}
  1255  
  1256  		return rl.Networks[0], nil
  1257  	}
  1258  	return rg.Network, nil
  1259  }
  1260  
  1261  // Cleanup stops active swarm node. This is run before daemon shutdown.
  1262  func (c *Cluster) Cleanup() {
  1263  	c.Lock()
  1264  	node := c.node
  1265  	if node == nil {
  1266  		c.Unlock()
  1267  		return
  1268  	}
  1269  	defer c.Unlock()
  1270  	if c.isActiveManager() {
  1271  		active, reachable, unreachable, err := c.managerStats()
  1272  		if err == nil {
  1273  			singlenode := active && reachable == 1 && unreachable == 0
  1274  			if active && !singlenode && reachable-2 <= unreachable {
  1275  				logrus.Errorf("Leaving cluster with %v managers left out of %v. Raft quorum will be lost.", reachable-1, reachable+unreachable)
  1276  			}
  1277  		}
  1278  	}
  1279  	c.stopNode()
  1280  }
  1281  
  1282  func (c *Cluster) managerStats() (current bool, reachable int, unreachable int, err error) {
  1283  	ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
  1284  	defer cancel()
  1285  	nodes, err := c.client.ListNodes(ctx, &swarmapi.ListNodesRequest{})
  1286  	if err != nil {
  1287  		return false, 0, 0, err
  1288  	}
  1289  	for _, n := range nodes.Nodes {
  1290  		if n.ManagerStatus != nil {
  1291  			if n.ManagerStatus.Reachability == swarmapi.RaftMemberStatus_REACHABLE {
  1292  				reachable++
  1293  				if n.ID == c.node.NodeID() {
  1294  					current = true
  1295  				}
  1296  			}
  1297  			if n.ManagerStatus.Reachability == swarmapi.RaftMemberStatus_UNREACHABLE {
  1298  				unreachable++
  1299  			}
  1300  		}
  1301  	}
  1302  	return
  1303  }
  1304  
  1305  func validateAndSanitizeInitRequest(req *types.InitRequest) error {
  1306  	var err error
  1307  	req.ListenAddr, err = validateAddr(req.ListenAddr)
  1308  	if err != nil {
  1309  		return fmt.Errorf("invalid ListenAddr %q: %v", req.ListenAddr, err)
  1310  	}
  1311  
  1312  	spec := &req.Spec
  1313  	// provide sane defaults instead of erroring
  1314  	if spec.Name == "" {
  1315  		spec.Name = "default"
  1316  	}
  1317  	if spec.Raft.SnapshotInterval == 0 {
  1318  		spec.Raft.SnapshotInterval = defaultSpec.Raft.SnapshotInterval
  1319  	}
  1320  	if spec.Raft.LogEntriesForSlowFollowers == 0 {
  1321  		spec.Raft.LogEntriesForSlowFollowers = defaultSpec.Raft.LogEntriesForSlowFollowers
  1322  	}
  1323  	if spec.Raft.ElectionTick == 0 {
  1324  		spec.Raft.ElectionTick = defaultSpec.Raft.ElectionTick
  1325  	}
  1326  	if spec.Raft.HeartbeatTick == 0 {
  1327  		spec.Raft.HeartbeatTick = defaultSpec.Raft.HeartbeatTick
  1328  	}
  1329  	if spec.Dispatcher.HeartbeatPeriod == 0 {
  1330  		spec.Dispatcher.HeartbeatPeriod = defaultSpec.Dispatcher.HeartbeatPeriod
  1331  	}
  1332  	if spec.CAConfig.NodeCertExpiry == 0 {
  1333  		spec.CAConfig.NodeCertExpiry = defaultSpec.CAConfig.NodeCertExpiry
  1334  	}
  1335  	if spec.Orchestration.TaskHistoryRetentionLimit == 0 {
  1336  		spec.Orchestration.TaskHistoryRetentionLimit = defaultSpec.Orchestration.TaskHistoryRetentionLimit
  1337  	}
  1338  	return nil
  1339  }
  1340  
  1341  func validateAndSanitizeJoinRequest(req *types.JoinRequest) error {
  1342  	var err error
  1343  	req.ListenAddr, err = validateAddr(req.ListenAddr)
  1344  	if err != nil {
  1345  		return fmt.Errorf("invalid ListenAddr %q: %v", req.ListenAddr, err)
  1346  	}
  1347  	if len(req.RemoteAddrs) == 0 {
  1348  		return fmt.Errorf("at least 1 RemoteAddr is required to join")
  1349  	}
  1350  	for i := range req.RemoteAddrs {
  1351  		req.RemoteAddrs[i], err = validateAddr(req.RemoteAddrs[i])
  1352  		if err != nil {
  1353  			return fmt.Errorf("invalid remoteAddr %q: %v", req.RemoteAddrs[i], err)
  1354  		}
  1355  	}
  1356  	return nil
  1357  }
  1358  
  1359  func validateAddr(addr string) (string, error) {
  1360  	if addr == "" {
  1361  		return addr, fmt.Errorf("invalid empty address")
  1362  	}
  1363  	newaddr, err := opts.ParseTCPAddr(addr, defaultAddr)
  1364  	if err != nil {
  1365  		return addr, nil
  1366  	}
  1367  	return strings.TrimPrefix(newaddr, "tcp://"), nil
  1368  }
  1369  
  1370  func initClusterSpec(node *node, spec types.Spec) error {
  1371  	ctx, _ := context.WithTimeout(context.Background(), 5*time.Second)
  1372  	for conn := range node.ListenControlSocket(ctx) {
  1373  		if ctx.Err() != nil {
  1374  			return ctx.Err()
  1375  		}
  1376  		if conn != nil {
  1377  			client := swarmapi.NewControlClient(conn)
  1378  			var cluster *swarmapi.Cluster
  1379  			for i := 0; ; i++ {
  1380  				lcr, err := client.ListClusters(ctx, &swarmapi.ListClustersRequest{})
  1381  				if err != nil {
  1382  					return fmt.Errorf("error on listing clusters: %v", err)
  1383  				}
  1384  				if len(lcr.Clusters) == 0 {
  1385  					if i < 10 {
  1386  						time.Sleep(200 * time.Millisecond)
  1387  						continue
  1388  					}
  1389  					return fmt.Errorf("empty list of clusters was returned")
  1390  				}
  1391  				cluster = lcr.Clusters[0]
  1392  				break
  1393  			}
  1394  			newspec, err := convert.SwarmSpecToGRPC(spec)
  1395  			if err != nil {
  1396  				return fmt.Errorf("error updating cluster settings: %v", err)
  1397  			}
  1398  			_, err = client.UpdateCluster(ctx, &swarmapi.UpdateClusterRequest{
  1399  				ClusterID:      cluster.ID,
  1400  				ClusterVersion: &cluster.Meta.Version,
  1401  				Spec:           &newspec,
  1402  			})
  1403  			if err != nil {
  1404  				return fmt.Errorf("error updating cluster settings: %v", err)
  1405  			}
  1406  			return nil
  1407  		}
  1408  	}
  1409  	return ctx.Err()
  1410  }