github.com/flavio/docker@v0.1.3-0.20170117145210-f63d1a6eec47/daemon/cluster/cluster.go (about)

     1  package cluster
     2  
     3  //
     4  // ## Swarmkit integration
     5  //
     6  // Cluster - static configurable object for accessing everything swarm related.
     7  // Contains methods for connecting and controlling the cluster. Exists always,
     8  // even if swarm mode is not enabled.
     9  //
    10  // NodeRunner - Manager for starting the swarmkit node. Is present only and
    11  // always if swarm mode is enabled. Implements backoff restart loop in case of
    12  // errors.
    13  //
    14  // NodeState - Information about the current node status including access to
    15  // gRPC clients if a manager is active.
    16  //
    17  // ### Locking
    18  //
    19  // `cluster.controlMutex` - taken for the whole lifecycle of the processes that
    20  // can reconfigure cluster(init/join/leave etc). Protects that one
    21  // reconfiguration action has fully completed before another can start.
    22  //
    23  // `cluster.mu` - taken when the actual changes in cluster configurations
    24  // happen. Different from `controlMutex` because in some cases we need to
    25  // access current cluster state even if the long-running reconfiguration is
    26  // going on. For example network stack may ask for the current cluster state in
    27  // the middle of the shutdown. Any time current cluster state is asked you
    28  // should take the read lock of `cluster.mu`. If you are writing an API
    29  // responder that returns synchronously, hold `cluster.mu.RLock()` for the
    30  // duration of the whole handler function. That ensures that node will not be
    31  // shut down until the handler has finished.
    32  //
    33  // NodeRunner implements its internal locks that should not be used outside of
    34  // the struct. Instead, you should just call `nodeRunner.State()` method to get
    35  // the current state of the cluster(still need `cluster.mu.RLock()` to access
    36  // `cluster.nr` reference itself). Most of the changes in NodeRunner happen
    37  // because of an external event(network problem, unexpected swarmkit error) and
    38  // Docker shouldn't take any locks that delay these changes from happening.
    39  //
    40  
    41  import (
    42  	"crypto/x509"
    43  	"encoding/base64"
    44  	"encoding/json"
    45  	"fmt"
    46  	"io"
    47  	"net"
    48  	"os"
    49  	"path/filepath"
    50  	"strings"
    51  	"sync"
    52  	"time"
    53  
    54  	"github.com/Sirupsen/logrus"
    55  	distreference "github.com/docker/distribution/reference"
    56  	apierrors "github.com/docker/docker/api/errors"
    57  	apitypes "github.com/docker/docker/api/types"
    58  	"github.com/docker/docker/api/types/backend"
    59  	"github.com/docker/docker/api/types/filters"
    60  	"github.com/docker/docker/api/types/network"
    61  	types "github.com/docker/docker/api/types/swarm"
    62  	"github.com/docker/docker/daemon/cluster/convert"
    63  	executorpkg "github.com/docker/docker/daemon/cluster/executor"
    64  	"github.com/docker/docker/daemon/logger"
    65  	"github.com/docker/docker/opts"
    66  	"github.com/docker/docker/pkg/ioutils"
    67  	"github.com/docker/docker/pkg/signal"
    68  	"github.com/docker/docker/pkg/stdcopy"
    69  	"github.com/docker/docker/reference"
    70  	"github.com/docker/docker/runconfig"
    71  	swarmapi "github.com/docker/swarmkit/api"
    72  	"github.com/docker/swarmkit/manager/encryption"
    73  	swarmnode "github.com/docker/swarmkit/node"
    74  	"github.com/docker/swarmkit/protobuf/ptypes"
    75  	"github.com/opencontainers/go-digest"
    76  	"github.com/pkg/errors"
    77  	"golang.org/x/net/context"
    78  )
    79  
    80  const swarmDirName = "swarm"
    81  const controlSocket = "control.sock"
    82  const swarmConnectTimeout = 20 * time.Second
    83  const swarmRequestTimeout = 20 * time.Second
    84  const stateFile = "docker-state.json"
    85  const defaultAddr = "0.0.0.0:2377"
    86  
    87  const (
    88  	initialReconnectDelay = 100 * time.Millisecond
    89  	maxReconnectDelay     = 30 * time.Second
    90  	contextPrefix         = "com.docker.swarm"
    91  )
    92  
    93  // errNoSwarm is returned on leaving a cluster that was never initialized
    94  var errNoSwarm = errors.New("This node is not part of a swarm")
    95  
    96  // errSwarmExists is returned on initialize or join request for a cluster that has already been activated
    97  var errSwarmExists = errors.New("This node is already part of a swarm. Use \"docker swarm leave\" to leave this swarm and join another one.")
    98  
    99  // errSwarmJoinTimeoutReached is returned when cluster join could not complete before timeout was reached.
   100  var errSwarmJoinTimeoutReached = errors.New("Timeout was reached before node was joined. The attempt to join the swarm will continue in the background. Use the \"docker info\" command to see the current swarm status of your node.")
   101  
   102  // errSwarmLocked is returned if the swarm is encrypted and needs a key to unlock it.
   103  var errSwarmLocked = errors.New("Swarm is encrypted and needs to be unlocked before it can be used. Please use \"docker swarm unlock\" to unlock it.")
   104  
   105  // errSwarmCertificatesExpired is returned if docker was not started for the whole validity period and they had no chance to renew automatically.
   106  var errSwarmCertificatesExpired = errors.New("Swarm certificates have expired. To replace them, leave the swarm and join again.")
   107  
   108  // NetworkSubnetsProvider exposes functions for retrieving the subnets
   109  // of networks managed by Docker, so they can be filtered.
   110  type NetworkSubnetsProvider interface {
   111  	V4Subnets() []net.IPNet
   112  	V6Subnets() []net.IPNet
   113  }
   114  
   115  // Config provides values for Cluster.
   116  type Config struct {
   117  	Root                   string
   118  	Name                   string
   119  	Backend                executorpkg.Backend
   120  	NetworkSubnetsProvider NetworkSubnetsProvider
   121  
   122  	// DefaultAdvertiseAddr is the default host/IP or network interface to use
   123  	// if no AdvertiseAddr value is specified.
   124  	DefaultAdvertiseAddr string
   125  
   126  	// path to store runtime state, such as the swarm control socket
   127  	RuntimeRoot string
   128  }
   129  
   130  // Cluster provides capabilities to participate in a cluster as a worker or a
   131  // manager.
   132  type Cluster struct {
   133  	mu           sync.RWMutex
   134  	controlMutex sync.RWMutex // protect init/join/leave user operations
   135  	nr           *nodeRunner
   136  	root         string
   137  	runtimeRoot  string
   138  	config       Config
   139  	configEvent  chan struct{} // todo: make this array and goroutine safe
   140  	attachers    map[string]*attacher
   141  }
   142  
   143  // attacher manages the in-memory attachment state of a container
   144  // attachment to a global scope network managed by swarm manager. It
   145  // helps in identifying the attachment ID via the taskID and the
   146  // corresponding attachment configuration obtained from the manager.
   147  type attacher struct {
   148  	taskID           string
   149  	config           *network.NetworkingConfig
   150  	attachWaitCh     chan *network.NetworkingConfig
   151  	attachCompleteCh chan struct{}
   152  	detachWaitCh     chan struct{}
   153  }
   154  
   155  // New creates a new Cluster instance using provided config.
   156  func New(config Config) (*Cluster, error) {
   157  	root := filepath.Join(config.Root, swarmDirName)
   158  	if err := os.MkdirAll(root, 0700); err != nil {
   159  		return nil, err
   160  	}
   161  	if config.RuntimeRoot == "" {
   162  		config.RuntimeRoot = root
   163  	}
   164  	if err := os.MkdirAll(config.RuntimeRoot, 0700); err != nil {
   165  		return nil, err
   166  	}
   167  	c := &Cluster{
   168  		root:        root,
   169  		config:      config,
   170  		configEvent: make(chan struct{}, 10),
   171  		runtimeRoot: config.RuntimeRoot,
   172  		attachers:   make(map[string]*attacher),
   173  	}
   174  
   175  	nodeConfig, err := loadPersistentState(root)
   176  	if err != nil {
   177  		if os.IsNotExist(err) {
   178  			return c, nil
   179  		}
   180  		return nil, err
   181  	}
   182  
   183  	nr, err := c.newNodeRunner(*nodeConfig)
   184  	if err != nil {
   185  		return nil, err
   186  	}
   187  	c.nr = nr
   188  
   189  	select {
   190  	case <-time.After(swarmConnectTimeout):
   191  		logrus.Error("swarm component could not be started before timeout was reached")
   192  	case err := <-nr.Ready():
   193  		if err != nil {
   194  			if errors.Cause(err) == errSwarmLocked {
   195  				return c, nil
   196  			}
   197  			if err, ok := errors.Cause(c.nr.err).(x509.CertificateInvalidError); ok && err.Reason == x509.Expired {
   198  				return c, nil
   199  			}
   200  			return nil, errors.Wrap(err, "swarm component could not be started")
   201  		}
   202  	}
   203  	return c, nil
   204  }
   205  
   206  func (c *Cluster) newNodeRunner(conf nodeStartConfig) (*nodeRunner, error) {
   207  	if err := c.config.Backend.IsSwarmCompatible(); err != nil {
   208  		return nil, err
   209  	}
   210  
   211  	actualLocalAddr := conf.LocalAddr
   212  	if actualLocalAddr == "" {
   213  		// If localAddr was not specified, resolve it automatically
   214  		// based on the route to joinAddr. localAddr can only be left
   215  		// empty on "join".
   216  		listenHost, _, err := net.SplitHostPort(conf.ListenAddr)
   217  		if err != nil {
   218  			return nil, fmt.Errorf("could not parse listen address: %v", err)
   219  		}
   220  
   221  		listenAddrIP := net.ParseIP(listenHost)
   222  		if listenAddrIP == nil || !listenAddrIP.IsUnspecified() {
   223  			actualLocalAddr = listenHost
   224  		} else {
   225  			if conf.RemoteAddr == "" {
   226  				// Should never happen except using swarms created by
   227  				// old versions that didn't save remoteAddr.
   228  				conf.RemoteAddr = "8.8.8.8:53"
   229  			}
   230  			conn, err := net.Dial("udp", conf.RemoteAddr)
   231  			if err != nil {
   232  				return nil, fmt.Errorf("could not find local IP address: %v", err)
   233  			}
   234  			localHostPort := conn.LocalAddr().String()
   235  			actualLocalAddr, _, _ = net.SplitHostPort(localHostPort)
   236  			conn.Close()
   237  		}
   238  	}
   239  
   240  	nr := &nodeRunner{cluster: c}
   241  	nr.actualLocalAddr = actualLocalAddr
   242  
   243  	if err := nr.Start(conf); err != nil {
   244  		return nil, err
   245  	}
   246  
   247  	c.config.Backend.SetClusterProvider(c)
   248  
   249  	return nr, nil
   250  }
   251  
   252  // Init initializes new cluster from user provided request.
   253  func (c *Cluster) Init(req types.InitRequest) (string, error) {
   254  	c.controlMutex.Lock()
   255  	defer c.controlMutex.Unlock()
   256  	c.mu.Lock()
   257  	if c.nr != nil {
   258  		if req.ForceNewCluster {
   259  			if err := c.nr.Stop(); err != nil {
   260  				c.mu.Unlock()
   261  				return "", err
   262  			}
   263  		} else {
   264  			c.mu.Unlock()
   265  			return "", errSwarmExists
   266  		}
   267  	}
   268  	c.mu.Unlock()
   269  
   270  	if err := validateAndSanitizeInitRequest(&req); err != nil {
   271  		return "", apierrors.NewBadRequestError(err)
   272  	}
   273  
   274  	listenHost, listenPort, err := resolveListenAddr(req.ListenAddr)
   275  	if err != nil {
   276  		return "", err
   277  	}
   278  
   279  	advertiseHost, advertisePort, err := c.resolveAdvertiseAddr(req.AdvertiseAddr, listenPort)
   280  	if err != nil {
   281  		return "", err
   282  	}
   283  
   284  	localAddr := listenHost
   285  
   286  	// If the local address is undetermined, the advertise address
   287  	// will be used as local address, if it belongs to this system.
   288  	// If the advertise address is not local, then we try to find
   289  	// a system address to use as local address. If this fails,
   290  	// we give up and ask the user to pass the listen address.
   291  	if net.ParseIP(localAddr).IsUnspecified() {
   292  		advertiseIP := net.ParseIP(advertiseHost)
   293  
   294  		found := false
   295  		for _, systemIP := range listSystemIPs() {
   296  			if systemIP.Equal(advertiseIP) {
   297  				localAddr = advertiseIP.String()
   298  				found = true
   299  				break
   300  			}
   301  		}
   302  
   303  		if !found {
   304  			ip, err := c.resolveSystemAddr()
   305  			if err != nil {
   306  				logrus.Warnf("Could not find a local address: %v", err)
   307  				return "", errMustSpecifyListenAddr
   308  			}
   309  			localAddr = ip.String()
   310  		}
   311  	}
   312  
   313  	if !req.ForceNewCluster {
   314  		clearPersistentState(c.root)
   315  	}
   316  
   317  	nr, err := c.newNodeRunner(nodeStartConfig{
   318  		forceNewCluster: req.ForceNewCluster,
   319  		autolock:        req.AutoLockManagers,
   320  		LocalAddr:       localAddr,
   321  		ListenAddr:      net.JoinHostPort(listenHost, listenPort),
   322  		AdvertiseAddr:   net.JoinHostPort(advertiseHost, advertisePort),
   323  		availability:    req.Availability,
   324  	})
   325  	if err != nil {
   326  		return "", err
   327  	}
   328  	c.mu.Lock()
   329  	c.nr = nr
   330  	c.mu.Unlock()
   331  
   332  	if err := <-nr.Ready(); err != nil {
   333  		if !req.ForceNewCluster { // if failure on first attempt don't keep state
   334  			if err := clearPersistentState(c.root); err != nil {
   335  				return "", err
   336  			}
   337  		}
   338  		if err != nil {
   339  			c.mu.Lock()
   340  			c.nr = nil
   341  			c.mu.Unlock()
   342  		}
   343  		return "", err
   344  	}
   345  	state := nr.State()
   346  	if state.swarmNode == nil { // should never happen but protect from panic
   347  		return "", errors.New("invalid cluster state for spec initialization")
   348  	}
   349  	if err := initClusterSpec(state.swarmNode, req.Spec); err != nil {
   350  		return "", err
   351  	}
   352  	return state.NodeID(), nil
   353  }
   354  
   355  // Join makes current Cluster part of an existing swarm cluster.
   356  func (c *Cluster) Join(req types.JoinRequest) error {
   357  	c.controlMutex.Lock()
   358  	defer c.controlMutex.Unlock()
   359  	c.mu.Lock()
   360  	if c.nr != nil {
   361  		c.mu.Unlock()
   362  		return errSwarmExists
   363  	}
   364  	c.mu.Unlock()
   365  
   366  	if err := validateAndSanitizeJoinRequest(&req); err != nil {
   367  		return apierrors.NewBadRequestError(err)
   368  	}
   369  
   370  	listenHost, listenPort, err := resolveListenAddr(req.ListenAddr)
   371  	if err != nil {
   372  		return err
   373  	}
   374  
   375  	var advertiseAddr string
   376  	if req.AdvertiseAddr != "" {
   377  		advertiseHost, advertisePort, err := c.resolveAdvertiseAddr(req.AdvertiseAddr, listenPort)
   378  		// For joining, we don't need to provide an advertise address,
   379  		// since the remote side can detect it.
   380  		if err == nil {
   381  			advertiseAddr = net.JoinHostPort(advertiseHost, advertisePort)
   382  		}
   383  	}
   384  
   385  	clearPersistentState(c.root)
   386  
   387  	nr, err := c.newNodeRunner(nodeStartConfig{
   388  		RemoteAddr:    req.RemoteAddrs[0],
   389  		ListenAddr:    net.JoinHostPort(listenHost, listenPort),
   390  		AdvertiseAddr: advertiseAddr,
   391  		joinAddr:      req.RemoteAddrs[0],
   392  		joinToken:     req.JoinToken,
   393  		availability:  req.Availability,
   394  	})
   395  	if err != nil {
   396  		return err
   397  	}
   398  
   399  	c.mu.Lock()
   400  	c.nr = nr
   401  	c.mu.Unlock()
   402  
   403  	select {
   404  	case <-time.After(swarmConnectTimeout):
   405  		return errSwarmJoinTimeoutReached
   406  	case err := <-nr.Ready():
   407  		if err != nil {
   408  			c.mu.Lock()
   409  			c.nr = nil
   410  			c.mu.Unlock()
   411  		}
   412  		return err
   413  	}
   414  }
   415  
   416  // GetUnlockKey returns the unlock key for the swarm.
   417  func (c *Cluster) GetUnlockKey() (string, error) {
   418  	c.mu.RLock()
   419  	defer c.mu.RUnlock()
   420  
   421  	state := c.currentNodeState()
   422  	if !state.IsActiveManager() {
   423  		return "", c.errNoManager(state)
   424  	}
   425  
   426  	ctx, cancel := c.getRequestContext()
   427  	defer cancel()
   428  
   429  	client := swarmapi.NewCAClient(state.grpcConn)
   430  
   431  	r, err := client.GetUnlockKey(ctx, &swarmapi.GetUnlockKeyRequest{})
   432  	if err != nil {
   433  		return "", err
   434  	}
   435  
   436  	if len(r.UnlockKey) == 0 {
   437  		// no key
   438  		return "", nil
   439  	}
   440  
   441  	return encryption.HumanReadableKey(r.UnlockKey), nil
   442  }
   443  
   444  // UnlockSwarm provides a key to decrypt data that is encrypted at rest.
   445  func (c *Cluster) UnlockSwarm(req types.UnlockRequest) error {
   446  	c.controlMutex.Lock()
   447  	defer c.controlMutex.Unlock()
   448  
   449  	c.mu.RLock()
   450  	state := c.currentNodeState()
   451  
   452  	if !state.IsActiveManager() {
   453  		// when manager is not active,
   454  		// unless it is locked, otherwise return error.
   455  		if err := c.errNoManager(state); err != errSwarmLocked {
   456  			c.mu.RUnlock()
   457  			return err
   458  		}
   459  	} else {
   460  		// when manager is active, return an error of "not locked"
   461  		c.mu.RUnlock()
   462  		return errors.New("swarm is not locked")
   463  	}
   464  
   465  	// only when swarm is locked, code running reaches here
   466  	nr := c.nr
   467  	c.mu.RUnlock()
   468  
   469  	key, err := encryption.ParseHumanReadableKey(req.UnlockKey)
   470  	if err != nil {
   471  		return err
   472  	}
   473  
   474  	config := nr.config
   475  	config.lockKey = key
   476  	if err := nr.Stop(); err != nil {
   477  		return err
   478  	}
   479  	nr, err = c.newNodeRunner(config)
   480  	if err != nil {
   481  		return err
   482  	}
   483  
   484  	c.mu.Lock()
   485  	c.nr = nr
   486  	c.mu.Unlock()
   487  
   488  	if err := <-nr.Ready(); err != nil {
   489  		if errors.Cause(err) == errSwarmLocked {
   490  			return errors.New("swarm could not be unlocked: invalid key provided")
   491  		}
   492  		return fmt.Errorf("swarm component could not be started: %v", err)
   493  	}
   494  	return nil
   495  }
   496  
   497  // Leave shuts down Cluster and removes current state.
   498  func (c *Cluster) Leave(force bool) error {
   499  	c.controlMutex.Lock()
   500  	defer c.controlMutex.Unlock()
   501  
   502  	c.mu.Lock()
   503  	nr := c.nr
   504  	if nr == nil {
   505  		c.mu.Unlock()
   506  		return errNoSwarm
   507  	}
   508  
   509  	state := c.currentNodeState()
   510  
   511  	if errors.Cause(state.err) == errSwarmLocked && !force {
   512  		// leave a locked swarm without --force is not allowed
   513  		c.mu.Unlock()
   514  		return errors.New("Swarm is encrypted and locked. Please unlock it first or use `--force` to ignore this message.")
   515  	}
   516  
   517  	if state.IsManager() && !force {
   518  		msg := "You are attempting to leave the swarm on a node that is participating as a manager. "
   519  		if state.IsActiveManager() {
   520  			active, reachable, unreachable, err := managerStats(state.controlClient, state.NodeID())
   521  			if err == nil {
   522  				if active && removingManagerCausesLossOfQuorum(reachable, unreachable) {
   523  					if isLastManager(reachable, unreachable) {
   524  						msg += "Removing the last manager erases all current state of the swarm. Use `--force` to ignore this message. "
   525  						c.mu.Unlock()
   526  						return errors.New(msg)
   527  					}
   528  					msg += fmt.Sprintf("Removing this node leaves %v managers out of %v. Without a Raft quorum your swarm will be inaccessible. ", reachable-1, reachable+unreachable)
   529  				}
   530  			}
   531  		} else {
   532  			msg += "Doing so may lose the consensus of your cluster. "
   533  		}
   534  
   535  		msg += "The only way to restore a swarm that has lost consensus is to reinitialize it with `--force-new-cluster`. Use `--force` to suppress this message."
   536  		c.mu.Unlock()
   537  		return errors.New(msg)
   538  	}
   539  	// release readers in here
   540  	if err := nr.Stop(); err != nil {
   541  		logrus.Errorf("failed to shut down cluster node: %v", err)
   542  		signal.DumpStacks("")
   543  		c.mu.Unlock()
   544  		return err
   545  	}
   546  	c.nr = nil
   547  	c.mu.Unlock()
   548  	if nodeID := state.NodeID(); nodeID != "" {
   549  		nodeContainers, err := c.listContainerForNode(nodeID)
   550  		if err != nil {
   551  			return err
   552  		}
   553  		for _, id := range nodeContainers {
   554  			if err := c.config.Backend.ContainerRm(id, &apitypes.ContainerRmConfig{ForceRemove: true}); err != nil {
   555  				logrus.Errorf("error removing %v: %v", id, err)
   556  			}
   557  		}
   558  	}
   559  
   560  	c.configEvent <- struct{}{}
   561  	// todo: cleanup optional?
   562  	if err := clearPersistentState(c.root); err != nil {
   563  		return err
   564  	}
   565  	c.config.Backend.SetClusterProvider(nil)
   566  	return nil
   567  }
   568  
   569  func (c *Cluster) listContainerForNode(nodeID string) ([]string, error) {
   570  	var ids []string
   571  	filters := filters.NewArgs()
   572  	filters.Add("label", fmt.Sprintf("com.docker.swarm.node.id=%s", nodeID))
   573  	containers, err := c.config.Backend.Containers(&apitypes.ContainerListOptions{
   574  		Filters: filters,
   575  	})
   576  	if err != nil {
   577  		return []string{}, err
   578  	}
   579  	for _, c := range containers {
   580  		ids = append(ids, c.ID)
   581  	}
   582  	return ids, nil
   583  }
   584  
   585  func (c *Cluster) getRequestContext() (context.Context, func()) { // TODO: not needed when requests don't block on qourum lost
   586  	return context.WithTimeout(context.Background(), swarmRequestTimeout)
   587  }
   588  
   589  // Inspect retrieves the configuration properties of a managed swarm cluster.
   590  func (c *Cluster) Inspect() (types.Swarm, error) {
   591  	c.mu.RLock()
   592  	defer c.mu.RUnlock()
   593  
   594  	state := c.currentNodeState()
   595  	if !state.IsActiveManager() {
   596  		return types.Swarm{}, c.errNoManager(state)
   597  	}
   598  
   599  	ctx, cancel := c.getRequestContext()
   600  	defer cancel()
   601  
   602  	swarm, err := getSwarm(ctx, state.controlClient)
   603  	if err != nil {
   604  		return types.Swarm{}, err
   605  	}
   606  
   607  	return convert.SwarmFromGRPC(*swarm), nil
   608  }
   609  
   610  // Update updates configuration of a managed swarm cluster.
   611  func (c *Cluster) Update(version uint64, spec types.Spec, flags types.UpdateFlags) error {
   612  	c.mu.RLock()
   613  	defer c.mu.RUnlock()
   614  
   615  	state := c.currentNodeState()
   616  	if !state.IsActiveManager() {
   617  		return c.errNoManager(state)
   618  	}
   619  
   620  	ctx, cancel := c.getRequestContext()
   621  	defer cancel()
   622  
   623  	swarm, err := getSwarm(ctx, state.controlClient)
   624  	if err != nil {
   625  		return err
   626  	}
   627  
   628  	// In update, client should provide the complete spec of the swarm, including
   629  	// Name and Labels. If a field is specified with 0 or nil, then the default value
   630  	// will be used to swarmkit.
   631  	clusterSpec, err := convert.SwarmSpecToGRPC(spec)
   632  	if err != nil {
   633  		return apierrors.NewBadRequestError(err)
   634  	}
   635  
   636  	_, err = state.controlClient.UpdateCluster(
   637  		ctx,
   638  		&swarmapi.UpdateClusterRequest{
   639  			ClusterID: swarm.ID,
   640  			Spec:      &clusterSpec,
   641  			ClusterVersion: &swarmapi.Version{
   642  				Index: version,
   643  			},
   644  			Rotation: swarmapi.KeyRotation{
   645  				WorkerJoinToken:  flags.RotateWorkerToken,
   646  				ManagerJoinToken: flags.RotateManagerToken,
   647  				ManagerUnlockKey: flags.RotateManagerUnlockKey,
   648  			},
   649  		},
   650  	)
   651  	return err
   652  }
   653  
   654  // IsManager returns true if Cluster is participating as a manager.
   655  func (c *Cluster) IsManager() bool {
   656  	c.mu.RLock()
   657  	defer c.mu.RUnlock()
   658  	return c.currentNodeState().IsActiveManager()
   659  }
   660  
   661  // IsAgent returns true if Cluster is participating as a worker/agent.
   662  func (c *Cluster) IsAgent() bool {
   663  	c.mu.RLock()
   664  	defer c.mu.RUnlock()
   665  	return c.currentNodeState().status == types.LocalNodeStateActive
   666  }
   667  
   668  // GetLocalAddress returns the local address.
   669  func (c *Cluster) GetLocalAddress() string {
   670  	c.mu.RLock()
   671  	defer c.mu.RUnlock()
   672  	return c.currentNodeState().actualLocalAddr
   673  }
   674  
   675  // GetListenAddress returns the listen address.
   676  func (c *Cluster) GetListenAddress() string {
   677  	c.mu.RLock()
   678  	defer c.mu.RUnlock()
   679  	if c.nr != nil {
   680  		return c.nr.config.ListenAddr
   681  	}
   682  	return ""
   683  }
   684  
   685  // GetAdvertiseAddress returns the remotely reachable address of this node.
   686  func (c *Cluster) GetAdvertiseAddress() string {
   687  	c.mu.RLock()
   688  	defer c.mu.RUnlock()
   689  	if c.nr != nil && c.nr.config.AdvertiseAddr != "" {
   690  		advertiseHost, _, _ := net.SplitHostPort(c.nr.config.AdvertiseAddr)
   691  		return advertiseHost
   692  	}
   693  	return c.currentNodeState().actualLocalAddr
   694  }
   695  
   696  // GetRemoteAddress returns a known advertise address of a remote manager if
   697  // available.
   698  // todo: change to array/connect with info
   699  func (c *Cluster) GetRemoteAddress() string {
   700  	c.mu.RLock()
   701  	defer c.mu.RUnlock()
   702  	return c.getRemoteAddress()
   703  }
   704  
   705  func (c *Cluster) getRemoteAddress() string {
   706  	state := c.currentNodeState()
   707  	if state.swarmNode == nil {
   708  		return ""
   709  	}
   710  	nodeID := state.swarmNode.NodeID()
   711  	for _, r := range state.swarmNode.Remotes() {
   712  		if r.NodeID != nodeID {
   713  			return r.Addr
   714  		}
   715  	}
   716  	return ""
   717  }
   718  
   719  // ListenClusterEvents returns a channel that receives messages on cluster
   720  // participation changes.
   721  // todo: make cancelable and accessible to multiple callers
   722  func (c *Cluster) ListenClusterEvents() <-chan struct{} {
   723  	return c.configEvent
   724  }
   725  
   726  // Info returns information about the current cluster state.
   727  func (c *Cluster) Info() types.Info {
   728  	info := types.Info{
   729  		NodeAddr: c.GetAdvertiseAddress(),
   730  	}
   731  	c.mu.RLock()
   732  	defer c.mu.RUnlock()
   733  
   734  	state := c.currentNodeState()
   735  	info.LocalNodeState = state.status
   736  	if state.err != nil {
   737  		info.Error = state.err.Error()
   738  	}
   739  
   740  	ctx, cancel := c.getRequestContext()
   741  	defer cancel()
   742  
   743  	if state.IsActiveManager() {
   744  		info.ControlAvailable = true
   745  		swarm, err := c.Inspect()
   746  		if err != nil {
   747  			info.Error = err.Error()
   748  		}
   749  
   750  		// Strip JoinTokens
   751  		info.Cluster = swarm.ClusterInfo
   752  
   753  		if r, err := state.controlClient.ListNodes(ctx, &swarmapi.ListNodesRequest{}); err == nil {
   754  			info.Nodes = len(r.Nodes)
   755  			for _, n := range r.Nodes {
   756  				if n.ManagerStatus != nil {
   757  					info.Managers = info.Managers + 1
   758  				}
   759  			}
   760  		}
   761  	}
   762  
   763  	if state.swarmNode != nil {
   764  		for _, r := range state.swarmNode.Remotes() {
   765  			info.RemoteManagers = append(info.RemoteManagers, types.Peer{NodeID: r.NodeID, Addr: r.Addr})
   766  		}
   767  		info.NodeID = state.swarmNode.NodeID()
   768  	}
   769  
   770  	return info
   771  }
   772  
   773  // currentNodeState should not be called without a read lock
   774  func (c *Cluster) currentNodeState() nodeState {
   775  	return c.nr.State()
   776  }
   777  
   778  // errNoManager returns error describing why manager commands can't be used.
   779  // Call with read lock.
   780  func (c *Cluster) errNoManager(st nodeState) error {
   781  	if st.swarmNode == nil {
   782  		if errors.Cause(st.err) == errSwarmLocked {
   783  			return errSwarmLocked
   784  		}
   785  		if st.err == errSwarmCertificatesExpired {
   786  			return errSwarmCertificatesExpired
   787  		}
   788  		return errors.New("This node is not a swarm manager. Use \"docker swarm init\" or \"docker swarm join\" to connect this node to swarm and try again.")
   789  	}
   790  	if st.swarmNode.Manager() != nil {
   791  		return errors.New("This node is not a swarm manager. Manager is being prepared or has trouble connecting to the cluster.")
   792  	}
   793  	return errors.New("This node is not a swarm manager. Worker nodes can't be used to view or modify cluster state. Please run this command on a manager node or promote the current node to a manager.")
   794  }
   795  
   796  // GetServices returns all services of a managed swarm cluster.
   797  func (c *Cluster) GetServices(options apitypes.ServiceListOptions) ([]types.Service, error) {
   798  	c.mu.RLock()
   799  	defer c.mu.RUnlock()
   800  
   801  	state := c.currentNodeState()
   802  	if !state.IsActiveManager() {
   803  		return nil, c.errNoManager(state)
   804  	}
   805  
   806  	filters, err := newListServicesFilters(options.Filters)
   807  	if err != nil {
   808  		return nil, err
   809  	}
   810  	ctx, cancel := c.getRequestContext()
   811  	defer cancel()
   812  
   813  	r, err := state.controlClient.ListServices(
   814  		ctx,
   815  		&swarmapi.ListServicesRequest{Filters: filters})
   816  	if err != nil {
   817  		return nil, err
   818  	}
   819  
   820  	services := []types.Service{}
   821  
   822  	for _, service := range r.Services {
   823  		services = append(services, convert.ServiceFromGRPC(*service))
   824  	}
   825  
   826  	return services, nil
   827  }
   828  
   829  // imageWithDigestString takes an image such as name or name:tag
   830  // and returns the image pinned to a digest, such as name@sha256:34234...
   831  // Due to the difference between the docker/docker/reference, and the
   832  // docker/distribution/reference packages, we're parsing the image twice.
   833  // As the two packages converge, this function should be simplified.
   834  // TODO(nishanttotla): After the packages converge, the function must
   835  // convert distreference.Named -> distreference.Canonical, and the logic simplified.
   836  func (c *Cluster) imageWithDigestString(ctx context.Context, image string, authConfig *apitypes.AuthConfig) (string, error) {
   837  	if _, err := digest.Parse(image); err == nil {
   838  		return "", errors.New("image reference is an image ID")
   839  	}
   840  	ref, err := distreference.ParseNamed(image)
   841  	if err != nil {
   842  		return "", err
   843  	}
   844  	// only query registry if not a canonical reference (i.e. with digest)
   845  	if _, ok := ref.(distreference.Canonical); !ok {
   846  		// create a docker/docker/reference Named object because GetRepository needs it
   847  		dockerRef, err := reference.ParseNamed(image)
   848  		if err != nil {
   849  			return "", err
   850  		}
   851  		dockerRef = reference.WithDefaultTag(dockerRef)
   852  		namedTaggedRef, ok := dockerRef.(reference.NamedTagged)
   853  		if !ok {
   854  			return "", errors.New("unable to cast image to NamedTagged reference object")
   855  		}
   856  
   857  		repo, _, err := c.config.Backend.GetRepository(ctx, namedTaggedRef, authConfig)
   858  		if err != nil {
   859  			return "", err
   860  		}
   861  		dscrptr, err := repo.Tags(ctx).Get(ctx, namedTaggedRef.Tag())
   862  		if err != nil {
   863  			return "", err
   864  		}
   865  
   866  		namedDigestedRef, err := distreference.WithDigest(distreference.EnsureTagged(ref), dscrptr.Digest)
   867  		if err != nil {
   868  			return "", err
   869  		}
   870  		return namedDigestedRef.String(), nil
   871  	}
   872  	// reference already contains a digest, so just return it
   873  	return ref.String(), nil
   874  }
   875  
   876  // CreateService creates a new service in a managed swarm cluster.
   877  func (c *Cluster) CreateService(s types.ServiceSpec, encodedAuth string) (*apitypes.ServiceCreateResponse, error) {
   878  	c.mu.RLock()
   879  	defer c.mu.RUnlock()
   880  
   881  	state := c.currentNodeState()
   882  	if !state.IsActiveManager() {
   883  		return nil, c.errNoManager(state)
   884  	}
   885  
   886  	ctx, cancel := c.getRequestContext()
   887  	defer cancel()
   888  
   889  	err := c.populateNetworkID(ctx, state.controlClient, &s)
   890  	if err != nil {
   891  		return nil, err
   892  	}
   893  
   894  	serviceSpec, err := convert.ServiceSpecToGRPC(s)
   895  	if err != nil {
   896  		return nil, apierrors.NewBadRequestError(err)
   897  	}
   898  
   899  	ctnr := serviceSpec.Task.GetContainer()
   900  	if ctnr == nil {
   901  		return nil, errors.New("service does not use container tasks")
   902  	}
   903  
   904  	if encodedAuth != "" {
   905  		ctnr.PullOptions = &swarmapi.ContainerSpec_PullOptions{RegistryAuth: encodedAuth}
   906  	}
   907  
   908  	// retrieve auth config from encoded auth
   909  	authConfig := &apitypes.AuthConfig{}
   910  	if encodedAuth != "" {
   911  		if err := json.NewDecoder(base64.NewDecoder(base64.URLEncoding, strings.NewReader(encodedAuth))).Decode(authConfig); err != nil {
   912  			logrus.Warnf("invalid authconfig: %v", err)
   913  		}
   914  	}
   915  
   916  	resp := &apitypes.ServiceCreateResponse{}
   917  
   918  	// pin image by digest
   919  	if os.Getenv("DOCKER_SERVICE_PREFER_OFFLINE_IMAGE") != "1" {
   920  		digestImage, err := c.imageWithDigestString(ctx, ctnr.Image, authConfig)
   921  		if err != nil {
   922  			logrus.Warnf("unable to pin image %s to digest: %s", ctnr.Image, err.Error())
   923  			resp.Warnings = append(resp.Warnings, fmt.Sprintf("unable to pin image %s to digest: %s", ctnr.Image, err.Error()))
   924  		} else if ctnr.Image != digestImage {
   925  			logrus.Debugf("pinning image %s by digest: %s", ctnr.Image, digestImage)
   926  			ctnr.Image = digestImage
   927  		} else {
   928  			logrus.Debugf("creating service using supplied digest reference %s", ctnr.Image)
   929  		}
   930  	}
   931  
   932  	r, err := state.controlClient.CreateService(ctx, &swarmapi.CreateServiceRequest{Spec: &serviceSpec})
   933  	if err != nil {
   934  		return nil, err
   935  	}
   936  
   937  	resp.ID = r.Service.ID
   938  	return resp, nil
   939  }
   940  
   941  // GetService returns a service based on an ID or name.
   942  func (c *Cluster) GetService(input string) (types.Service, error) {
   943  	c.mu.RLock()
   944  	defer c.mu.RUnlock()
   945  
   946  	state := c.currentNodeState()
   947  	if !state.IsActiveManager() {
   948  		return types.Service{}, c.errNoManager(state)
   949  	}
   950  
   951  	ctx, cancel := c.getRequestContext()
   952  	defer cancel()
   953  
   954  	service, err := getService(ctx, state.controlClient, input)
   955  	if err != nil {
   956  		return types.Service{}, err
   957  	}
   958  	return convert.ServiceFromGRPC(*service), nil
   959  }
   960  
   961  // UpdateService updates existing service to match new properties.
   962  func (c *Cluster) UpdateService(serviceIDOrName string, version uint64, spec types.ServiceSpec, encodedAuth string, registryAuthFrom string) (*apitypes.ServiceUpdateResponse, error) {
   963  	c.mu.RLock()
   964  	defer c.mu.RUnlock()
   965  
   966  	state := c.currentNodeState()
   967  	if !state.IsActiveManager() {
   968  		return nil, c.errNoManager(state)
   969  	}
   970  
   971  	ctx, cancel := c.getRequestContext()
   972  	defer cancel()
   973  
   974  	err := c.populateNetworkID(ctx, state.controlClient, &spec)
   975  	if err != nil {
   976  		return nil, err
   977  	}
   978  
   979  	serviceSpec, err := convert.ServiceSpecToGRPC(spec)
   980  	if err != nil {
   981  		return nil, apierrors.NewBadRequestError(err)
   982  	}
   983  
   984  	currentService, err := getService(ctx, state.controlClient, serviceIDOrName)
   985  	if err != nil {
   986  		return nil, err
   987  	}
   988  
   989  	newCtnr := serviceSpec.Task.GetContainer()
   990  	if newCtnr == nil {
   991  		return nil, errors.New("service does not use container tasks")
   992  	}
   993  
   994  	if encodedAuth != "" {
   995  		newCtnr.PullOptions = &swarmapi.ContainerSpec_PullOptions{RegistryAuth: encodedAuth}
   996  	} else {
   997  		// this is needed because if the encodedAuth isn't being updated then we
   998  		// shouldn't lose it, and continue to use the one that was already present
   999  		var ctnr *swarmapi.ContainerSpec
  1000  		switch registryAuthFrom {
  1001  		case apitypes.RegistryAuthFromSpec, "":
  1002  			ctnr = currentService.Spec.Task.GetContainer()
  1003  		case apitypes.RegistryAuthFromPreviousSpec:
  1004  			if currentService.PreviousSpec == nil {
  1005  				return nil, errors.New("service does not have a previous spec")
  1006  			}
  1007  			ctnr = currentService.PreviousSpec.Task.GetContainer()
  1008  		default:
  1009  			return nil, errors.New("unsupported registryAuthFrom value")
  1010  		}
  1011  		if ctnr == nil {
  1012  			return nil, errors.New("service does not use container tasks")
  1013  		}
  1014  		newCtnr.PullOptions = ctnr.PullOptions
  1015  		// update encodedAuth so it can be used to pin image by digest
  1016  		if ctnr.PullOptions != nil {
  1017  			encodedAuth = ctnr.PullOptions.RegistryAuth
  1018  		}
  1019  	}
  1020  
  1021  	// retrieve auth config from encoded auth
  1022  	authConfig := &apitypes.AuthConfig{}
  1023  	if encodedAuth != "" {
  1024  		if err := json.NewDecoder(base64.NewDecoder(base64.URLEncoding, strings.NewReader(encodedAuth))).Decode(authConfig); err != nil {
  1025  			logrus.Warnf("invalid authconfig: %v", err)
  1026  		}
  1027  	}
  1028  
  1029  	resp := &apitypes.ServiceUpdateResponse{}
  1030  
  1031  	// pin image by digest
  1032  	if os.Getenv("DOCKER_SERVICE_PREFER_OFFLINE_IMAGE") != "1" {
  1033  		digestImage, err := c.imageWithDigestString(ctx, newCtnr.Image, authConfig)
  1034  		if err != nil {
  1035  			logrus.Warnf("unable to pin image %s to digest: %s", newCtnr.Image, err.Error())
  1036  			resp.Warnings = append(resp.Warnings, fmt.Sprintf("unable to pin image %s to digest: %s", newCtnr.Image, err.Error()))
  1037  		} else if newCtnr.Image != digestImage {
  1038  			logrus.Debugf("pinning image %s by digest: %s", newCtnr.Image, digestImage)
  1039  			newCtnr.Image = digestImage
  1040  		} else {
  1041  			logrus.Debugf("updating service using supplied digest reference %s", newCtnr.Image)
  1042  		}
  1043  	}
  1044  
  1045  	_, err = state.controlClient.UpdateService(
  1046  		ctx,
  1047  		&swarmapi.UpdateServiceRequest{
  1048  			ServiceID: currentService.ID,
  1049  			Spec:      &serviceSpec,
  1050  			ServiceVersion: &swarmapi.Version{
  1051  				Index: version,
  1052  			},
  1053  		},
  1054  	)
  1055  
  1056  	return resp, err
  1057  }
  1058  
  1059  // RemoveService removes a service from a managed swarm cluster.
  1060  func (c *Cluster) RemoveService(input string) error {
  1061  	c.mu.RLock()
  1062  	defer c.mu.RUnlock()
  1063  
  1064  	state := c.currentNodeState()
  1065  	if !state.IsActiveManager() {
  1066  		return c.errNoManager(state)
  1067  	}
  1068  
  1069  	ctx, cancel := c.getRequestContext()
  1070  	defer cancel()
  1071  
  1072  	service, err := getService(ctx, state.controlClient, input)
  1073  	if err != nil {
  1074  		return err
  1075  	}
  1076  
  1077  	_, err = state.controlClient.RemoveService(ctx, &swarmapi.RemoveServiceRequest{ServiceID: service.ID})
  1078  	return err
  1079  }
  1080  
  1081  // ServiceLogs collects service logs and writes them back to `config.OutStream`
  1082  func (c *Cluster) ServiceLogs(ctx context.Context, input string, config *backend.ContainerLogsConfig, started chan struct{}) error {
  1083  	c.mu.RLock()
  1084  	state := c.currentNodeState()
  1085  	if !state.IsActiveManager() {
  1086  		c.mu.RUnlock()
  1087  		return c.errNoManager(state)
  1088  	}
  1089  
  1090  	service, err := getService(ctx, state.controlClient, input)
  1091  	if err != nil {
  1092  		c.mu.RUnlock()
  1093  		return err
  1094  	}
  1095  
  1096  	stream, err := state.logsClient.SubscribeLogs(ctx, &swarmapi.SubscribeLogsRequest{
  1097  		Selector: &swarmapi.LogSelector{
  1098  			ServiceIDs: []string{service.ID},
  1099  		},
  1100  		Options: &swarmapi.LogSubscriptionOptions{
  1101  			Follow: config.Follow,
  1102  		},
  1103  	})
  1104  	if err != nil {
  1105  		c.mu.RUnlock()
  1106  		return err
  1107  	}
  1108  
  1109  	wf := ioutils.NewWriteFlusher(config.OutStream)
  1110  	defer wf.Close()
  1111  	close(started)
  1112  	wf.Flush()
  1113  
  1114  	outStream := stdcopy.NewStdWriter(wf, stdcopy.Stdout)
  1115  	errStream := stdcopy.NewStdWriter(wf, stdcopy.Stderr)
  1116  
  1117  	// Release the lock before starting the stream.
  1118  	c.mu.RUnlock()
  1119  	for {
  1120  		// Check the context before doing anything.
  1121  		select {
  1122  		case <-ctx.Done():
  1123  			return ctx.Err()
  1124  		default:
  1125  		}
  1126  
  1127  		subscribeMsg, err := stream.Recv()
  1128  		if err == io.EOF {
  1129  			return nil
  1130  		}
  1131  		if err != nil {
  1132  			return err
  1133  		}
  1134  
  1135  		for _, msg := range subscribeMsg.Messages {
  1136  			data := []byte{}
  1137  
  1138  			if config.Timestamps {
  1139  				ts, err := ptypes.Timestamp(msg.Timestamp)
  1140  				if err != nil {
  1141  					return err
  1142  				}
  1143  				data = append(data, []byte(ts.Format(logger.TimeFormat)+" ")...)
  1144  			}
  1145  
  1146  			data = append(data, []byte(fmt.Sprintf("%s.node.id=%s,%s.service.id=%s,%s.task.id=%s ",
  1147  				contextPrefix, msg.Context.NodeID,
  1148  				contextPrefix, msg.Context.ServiceID,
  1149  				contextPrefix, msg.Context.TaskID,
  1150  			))...)
  1151  
  1152  			data = append(data, msg.Data...)
  1153  
  1154  			switch msg.Stream {
  1155  			case swarmapi.LogStreamStdout:
  1156  				outStream.Write(data)
  1157  			case swarmapi.LogStreamStderr:
  1158  				errStream.Write(data)
  1159  			}
  1160  		}
  1161  	}
  1162  }
  1163  
  1164  // GetNodes returns a list of all nodes known to a cluster.
  1165  func (c *Cluster) GetNodes(options apitypes.NodeListOptions) ([]types.Node, error) {
  1166  	c.mu.RLock()
  1167  	defer c.mu.RUnlock()
  1168  
  1169  	state := c.currentNodeState()
  1170  	if !state.IsActiveManager() {
  1171  		return nil, c.errNoManager(state)
  1172  	}
  1173  
  1174  	filters, err := newListNodesFilters(options.Filters)
  1175  	if err != nil {
  1176  		return nil, err
  1177  	}
  1178  
  1179  	ctx, cancel := c.getRequestContext()
  1180  	defer cancel()
  1181  
  1182  	r, err := state.controlClient.ListNodes(
  1183  		ctx,
  1184  		&swarmapi.ListNodesRequest{Filters: filters})
  1185  	if err != nil {
  1186  		return nil, err
  1187  	}
  1188  
  1189  	nodes := []types.Node{}
  1190  
  1191  	for _, node := range r.Nodes {
  1192  		nodes = append(nodes, convert.NodeFromGRPC(*node))
  1193  	}
  1194  	return nodes, nil
  1195  }
  1196  
  1197  // GetNode returns a node based on an ID.
  1198  func (c *Cluster) GetNode(input string) (types.Node, error) {
  1199  	c.mu.RLock()
  1200  	defer c.mu.RUnlock()
  1201  
  1202  	state := c.currentNodeState()
  1203  	if !state.IsActiveManager() {
  1204  		return types.Node{}, c.errNoManager(state)
  1205  	}
  1206  
  1207  	ctx, cancel := c.getRequestContext()
  1208  	defer cancel()
  1209  
  1210  	node, err := getNode(ctx, state.controlClient, input)
  1211  	if err != nil {
  1212  		return types.Node{}, err
  1213  	}
  1214  	return convert.NodeFromGRPC(*node), nil
  1215  }
  1216  
  1217  // UpdateNode updates existing nodes properties.
  1218  func (c *Cluster) UpdateNode(input string, version uint64, spec types.NodeSpec) error {
  1219  	c.mu.RLock()
  1220  	defer c.mu.RUnlock()
  1221  
  1222  	state := c.currentNodeState()
  1223  	if !state.IsActiveManager() {
  1224  		return c.errNoManager(state)
  1225  	}
  1226  
  1227  	nodeSpec, err := convert.NodeSpecToGRPC(spec)
  1228  	if err != nil {
  1229  		return apierrors.NewBadRequestError(err)
  1230  	}
  1231  
  1232  	ctx, cancel := c.getRequestContext()
  1233  	defer cancel()
  1234  
  1235  	currentNode, err := getNode(ctx, state.controlClient, input)
  1236  	if err != nil {
  1237  		return err
  1238  	}
  1239  
  1240  	_, err = state.controlClient.UpdateNode(
  1241  		ctx,
  1242  		&swarmapi.UpdateNodeRequest{
  1243  			NodeID: currentNode.ID,
  1244  			Spec:   &nodeSpec,
  1245  			NodeVersion: &swarmapi.Version{
  1246  				Index: version,
  1247  			},
  1248  		},
  1249  	)
  1250  	return err
  1251  }
  1252  
  1253  // RemoveNode removes a node from a cluster
  1254  func (c *Cluster) RemoveNode(input string, force bool) error {
  1255  	c.mu.RLock()
  1256  	defer c.mu.RUnlock()
  1257  
  1258  	state := c.currentNodeState()
  1259  	if !state.IsActiveManager() {
  1260  		return c.errNoManager(state)
  1261  	}
  1262  
  1263  	ctx, cancel := c.getRequestContext()
  1264  	defer cancel()
  1265  
  1266  	node, err := getNode(ctx, state.controlClient, input)
  1267  	if err != nil {
  1268  		return err
  1269  	}
  1270  
  1271  	_, err = state.controlClient.RemoveNode(ctx, &swarmapi.RemoveNodeRequest{NodeID: node.ID, Force: force})
  1272  	return err
  1273  }
  1274  
  1275  // GetTasks returns a list of tasks matching the filter options.
  1276  func (c *Cluster) GetTasks(options apitypes.TaskListOptions) ([]types.Task, error) {
  1277  	c.mu.RLock()
  1278  	defer c.mu.RUnlock()
  1279  
  1280  	state := c.currentNodeState()
  1281  	if !state.IsActiveManager() {
  1282  		return nil, c.errNoManager(state)
  1283  	}
  1284  
  1285  	byName := func(filter filters.Args) error {
  1286  		if filter.Include("service") {
  1287  			serviceFilters := filter.Get("service")
  1288  			for _, serviceFilter := range serviceFilters {
  1289  				service, err := c.GetService(serviceFilter)
  1290  				if err != nil {
  1291  					return err
  1292  				}
  1293  				filter.Del("service", serviceFilter)
  1294  				filter.Add("service", service.ID)
  1295  			}
  1296  		}
  1297  		if filter.Include("node") {
  1298  			nodeFilters := filter.Get("node")
  1299  			for _, nodeFilter := range nodeFilters {
  1300  				node, err := c.GetNode(nodeFilter)
  1301  				if err != nil {
  1302  					return err
  1303  				}
  1304  				filter.Del("node", nodeFilter)
  1305  				filter.Add("node", node.ID)
  1306  			}
  1307  		}
  1308  		return nil
  1309  	}
  1310  
  1311  	filters, err := newListTasksFilters(options.Filters, byName)
  1312  	if err != nil {
  1313  		return nil, err
  1314  	}
  1315  
  1316  	ctx, cancel := c.getRequestContext()
  1317  	defer cancel()
  1318  
  1319  	r, err := state.controlClient.ListTasks(
  1320  		ctx,
  1321  		&swarmapi.ListTasksRequest{Filters: filters})
  1322  	if err != nil {
  1323  		return nil, err
  1324  	}
  1325  
  1326  	tasks := []types.Task{}
  1327  
  1328  	for _, task := range r.Tasks {
  1329  		if task.Spec.GetContainer() != nil {
  1330  			tasks = append(tasks, convert.TaskFromGRPC(*task))
  1331  		}
  1332  	}
  1333  	return tasks, nil
  1334  }
  1335  
  1336  // GetTask returns a task by an ID.
  1337  func (c *Cluster) GetTask(input string) (types.Task, error) {
  1338  	c.mu.RLock()
  1339  	defer c.mu.RUnlock()
  1340  
  1341  	state := c.currentNodeState()
  1342  	if !state.IsActiveManager() {
  1343  		return types.Task{}, c.errNoManager(state)
  1344  	}
  1345  
  1346  	ctx, cancel := c.getRequestContext()
  1347  	defer cancel()
  1348  
  1349  	task, err := getTask(ctx, state.controlClient, input)
  1350  	if err != nil {
  1351  		return types.Task{}, err
  1352  	}
  1353  	return convert.TaskFromGRPC(*task), nil
  1354  }
  1355  
  1356  // GetNetwork returns a cluster network by an ID.
  1357  func (c *Cluster) GetNetwork(input string) (apitypes.NetworkResource, error) {
  1358  	c.mu.RLock()
  1359  	defer c.mu.RUnlock()
  1360  
  1361  	state := c.currentNodeState()
  1362  	if !state.IsActiveManager() {
  1363  		return apitypes.NetworkResource{}, c.errNoManager(state)
  1364  	}
  1365  
  1366  	ctx, cancel := c.getRequestContext()
  1367  	defer cancel()
  1368  
  1369  	network, err := getNetwork(ctx, state.controlClient, input)
  1370  	if err != nil {
  1371  		return apitypes.NetworkResource{}, err
  1372  	}
  1373  	return convert.BasicNetworkFromGRPC(*network), nil
  1374  }
  1375  
  1376  func (c *Cluster) getNetworks(filters *swarmapi.ListNetworksRequest_Filters) ([]apitypes.NetworkResource, error) {
  1377  	c.mu.RLock()
  1378  	defer c.mu.RUnlock()
  1379  
  1380  	state := c.currentNodeState()
  1381  	if !state.IsActiveManager() {
  1382  		return nil, c.errNoManager(state)
  1383  	}
  1384  
  1385  	ctx, cancel := c.getRequestContext()
  1386  	defer cancel()
  1387  
  1388  	r, err := state.controlClient.ListNetworks(ctx, &swarmapi.ListNetworksRequest{Filters: filters})
  1389  	if err != nil {
  1390  		return nil, err
  1391  	}
  1392  
  1393  	var networks []apitypes.NetworkResource
  1394  
  1395  	for _, network := range r.Networks {
  1396  		networks = append(networks, convert.BasicNetworkFromGRPC(*network))
  1397  	}
  1398  
  1399  	return networks, nil
  1400  }
  1401  
  1402  // GetNetworks returns all current cluster managed networks.
  1403  func (c *Cluster) GetNetworks() ([]apitypes.NetworkResource, error) {
  1404  	return c.getNetworks(nil)
  1405  }
  1406  
  1407  // GetNetworksByName returns cluster managed networks by name.
  1408  // It is ok to have multiple networks here. #18864
  1409  func (c *Cluster) GetNetworksByName(name string) ([]apitypes.NetworkResource, error) {
  1410  	// Note that swarmapi.GetNetworkRequest.Name is not functional.
  1411  	// So we cannot just use that with c.GetNetwork.
  1412  	return c.getNetworks(&swarmapi.ListNetworksRequest_Filters{
  1413  		Names: []string{name},
  1414  	})
  1415  }
  1416  
  1417  func attacherKey(target, containerID string) string {
  1418  	return containerID + ":" + target
  1419  }
  1420  
  1421  // UpdateAttachment signals the attachment config to the attachment
  1422  // waiter who is trying to start or attach the container to the
  1423  // network.
  1424  func (c *Cluster) UpdateAttachment(target, containerID string, config *network.NetworkingConfig) error {
  1425  	c.mu.RLock()
  1426  	attacher, ok := c.attachers[attacherKey(target, containerID)]
  1427  	c.mu.RUnlock()
  1428  	if !ok || attacher == nil {
  1429  		return fmt.Errorf("could not find attacher for container %s to network %s", containerID, target)
  1430  	}
  1431  
  1432  	attacher.attachWaitCh <- config
  1433  	close(attacher.attachWaitCh)
  1434  	return nil
  1435  }
  1436  
  1437  // WaitForDetachment waits for the container to stop or detach from
  1438  // the network.
  1439  func (c *Cluster) WaitForDetachment(ctx context.Context, networkName, networkID, taskID, containerID string) error {
  1440  	c.mu.RLock()
  1441  	attacher, ok := c.attachers[attacherKey(networkName, containerID)]
  1442  	if !ok {
  1443  		attacher, ok = c.attachers[attacherKey(networkID, containerID)]
  1444  	}
  1445  	state := c.currentNodeState()
  1446  	if state.swarmNode == nil || state.swarmNode.Agent() == nil {
  1447  		c.mu.RUnlock()
  1448  		return errors.New("invalid cluster node while waiting for detachment")
  1449  	}
  1450  
  1451  	c.mu.RUnlock()
  1452  	agent := state.swarmNode.Agent()
  1453  	if ok && attacher != nil &&
  1454  		attacher.detachWaitCh != nil &&
  1455  		attacher.attachCompleteCh != nil {
  1456  		// Attachment may be in progress still so wait for
  1457  		// attachment to complete.
  1458  		select {
  1459  		case <-attacher.attachCompleteCh:
  1460  		case <-ctx.Done():
  1461  			return ctx.Err()
  1462  		}
  1463  
  1464  		if attacher.taskID == taskID {
  1465  			select {
  1466  			case <-attacher.detachWaitCh:
  1467  			case <-ctx.Done():
  1468  				return ctx.Err()
  1469  			}
  1470  		}
  1471  	}
  1472  
  1473  	return agent.ResourceAllocator().DetachNetwork(ctx, taskID)
  1474  }
  1475  
  1476  // AttachNetwork generates an attachment request towards the manager.
  1477  func (c *Cluster) AttachNetwork(target string, containerID string, addresses []string) (*network.NetworkingConfig, error) {
  1478  	aKey := attacherKey(target, containerID)
  1479  	c.mu.Lock()
  1480  	state := c.currentNodeState()
  1481  	if state.swarmNode == nil || state.swarmNode.Agent() == nil {
  1482  		c.mu.Unlock()
  1483  		return nil, errors.New("invalid cluster node while attaching to network")
  1484  	}
  1485  	if attacher, ok := c.attachers[aKey]; ok {
  1486  		c.mu.Unlock()
  1487  		return attacher.config, nil
  1488  	}
  1489  
  1490  	agent := state.swarmNode.Agent()
  1491  	attachWaitCh := make(chan *network.NetworkingConfig)
  1492  	detachWaitCh := make(chan struct{})
  1493  	attachCompleteCh := make(chan struct{})
  1494  	c.attachers[aKey] = &attacher{
  1495  		attachWaitCh:     attachWaitCh,
  1496  		attachCompleteCh: attachCompleteCh,
  1497  		detachWaitCh:     detachWaitCh,
  1498  	}
  1499  	c.mu.Unlock()
  1500  
  1501  	ctx, cancel := c.getRequestContext()
  1502  	defer cancel()
  1503  
  1504  	taskID, err := agent.ResourceAllocator().AttachNetwork(ctx, containerID, target, addresses)
  1505  	if err != nil {
  1506  		c.mu.Lock()
  1507  		delete(c.attachers, aKey)
  1508  		c.mu.Unlock()
  1509  		return nil, fmt.Errorf("Could not attach to network %s: %v", target, err)
  1510  	}
  1511  
  1512  	c.mu.Lock()
  1513  	c.attachers[aKey].taskID = taskID
  1514  	close(attachCompleteCh)
  1515  	c.mu.Unlock()
  1516  
  1517  	logrus.Debugf("Successfully attached to network %s with tid %s", target, taskID)
  1518  
  1519  	var config *network.NetworkingConfig
  1520  	select {
  1521  	case config = <-attachWaitCh:
  1522  	case <-ctx.Done():
  1523  		return nil, fmt.Errorf("attaching to network failed, make sure your network options are correct and check manager logs: %v", ctx.Err())
  1524  	}
  1525  
  1526  	c.mu.Lock()
  1527  	c.attachers[aKey].config = config
  1528  	c.mu.Unlock()
  1529  	return config, nil
  1530  }
  1531  
  1532  // DetachNetwork unblocks the waiters waiting on WaitForDetachment so
  1533  // that a request to detach can be generated towards the manager.
  1534  func (c *Cluster) DetachNetwork(target string, containerID string) error {
  1535  	aKey := attacherKey(target, containerID)
  1536  
  1537  	c.mu.Lock()
  1538  	attacher, ok := c.attachers[aKey]
  1539  	delete(c.attachers, aKey)
  1540  	c.mu.Unlock()
  1541  
  1542  	if !ok {
  1543  		return fmt.Errorf("could not find network attachment for container %s to network %s", containerID, target)
  1544  	}
  1545  
  1546  	close(attacher.detachWaitCh)
  1547  	return nil
  1548  }
  1549  
  1550  // CreateNetwork creates a new cluster managed network.
  1551  func (c *Cluster) CreateNetwork(s apitypes.NetworkCreateRequest) (string, error) {
  1552  	c.mu.RLock()
  1553  	defer c.mu.RUnlock()
  1554  
  1555  	state := c.currentNodeState()
  1556  	if !state.IsActiveManager() {
  1557  		return "", c.errNoManager(state)
  1558  	}
  1559  
  1560  	if runconfig.IsPreDefinedNetwork(s.Name) {
  1561  		err := fmt.Errorf("%s is a pre-defined network and cannot be created", s.Name)
  1562  		return "", apierrors.NewRequestForbiddenError(err)
  1563  	}
  1564  
  1565  	ctx, cancel := c.getRequestContext()
  1566  	defer cancel()
  1567  
  1568  	networkSpec := convert.BasicNetworkCreateToGRPC(s)
  1569  	r, err := state.controlClient.CreateNetwork(ctx, &swarmapi.CreateNetworkRequest{Spec: &networkSpec})
  1570  	if err != nil {
  1571  		return "", err
  1572  	}
  1573  
  1574  	return r.Network.ID, nil
  1575  }
  1576  
  1577  // RemoveNetwork removes a cluster network.
  1578  func (c *Cluster) RemoveNetwork(input string) error {
  1579  	c.mu.RLock()
  1580  	defer c.mu.RUnlock()
  1581  
  1582  	state := c.currentNodeState()
  1583  	if !state.IsActiveManager() {
  1584  		return c.errNoManager(state)
  1585  	}
  1586  
  1587  	ctx, cancel := c.getRequestContext()
  1588  	defer cancel()
  1589  
  1590  	network, err := getNetwork(ctx, state.controlClient, input)
  1591  	if err != nil {
  1592  		return err
  1593  	}
  1594  
  1595  	_, err = state.controlClient.RemoveNetwork(ctx, &swarmapi.RemoveNetworkRequest{NetworkID: network.ID})
  1596  	return err
  1597  }
  1598  
  1599  func (c *Cluster) populateNetworkID(ctx context.Context, client swarmapi.ControlClient, s *types.ServiceSpec) error {
  1600  	// Always prefer NetworkAttachmentConfigs from TaskTemplate
  1601  	// but fallback to service spec for backward compatibility
  1602  	networks := s.TaskTemplate.Networks
  1603  	if len(networks) == 0 {
  1604  		networks = s.Networks
  1605  	}
  1606  
  1607  	for i, n := range networks {
  1608  		apiNetwork, err := getNetwork(ctx, client, n.Target)
  1609  		if err != nil {
  1610  			if ln, _ := c.config.Backend.FindNetwork(n.Target); ln != nil && !ln.Info().Dynamic() {
  1611  				err = fmt.Errorf("The network %s cannot be used with services. Only networks scoped to the swarm can be used, such as those created with the overlay driver.", ln.Name())
  1612  				return apierrors.NewRequestForbiddenError(err)
  1613  			}
  1614  			return err
  1615  		}
  1616  		networks[i].Target = apiNetwork.ID
  1617  	}
  1618  	return nil
  1619  }
  1620  
  1621  // Cleanup stops active swarm node. This is run before daemon shutdown.
  1622  func (c *Cluster) Cleanup() {
  1623  	c.controlMutex.Lock()
  1624  	defer c.controlMutex.Unlock()
  1625  
  1626  	c.mu.Lock()
  1627  	node := c.nr
  1628  	if node == nil {
  1629  		c.mu.Unlock()
  1630  		return
  1631  	}
  1632  	defer c.mu.Unlock()
  1633  	state := c.currentNodeState()
  1634  	if state.IsActiveManager() {
  1635  		active, reachable, unreachable, err := managerStats(state.controlClient, state.NodeID())
  1636  		if err == nil {
  1637  			singlenode := active && isLastManager(reachable, unreachable)
  1638  			if active && !singlenode && removingManagerCausesLossOfQuorum(reachable, unreachable) {
  1639  				logrus.Errorf("Leaving cluster with %v managers left out of %v. Raft quorum will be lost.", reachable-1, reachable+unreachable)
  1640  			}
  1641  		}
  1642  	}
  1643  	if err := node.Stop(); err != nil {
  1644  		logrus.Errorf("failed to shut down cluster node: %v", err)
  1645  		signal.DumpStacks("")
  1646  	}
  1647  	c.nr = nil
  1648  }
  1649  
  1650  func managerStats(client swarmapi.ControlClient, currentNodeID string) (current bool, reachable int, unreachable int, err error) {
  1651  	ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
  1652  	defer cancel()
  1653  	nodes, err := client.ListNodes(ctx, &swarmapi.ListNodesRequest{})
  1654  	if err != nil {
  1655  		return false, 0, 0, err
  1656  	}
  1657  	for _, n := range nodes.Nodes {
  1658  		if n.ManagerStatus != nil {
  1659  			if n.ManagerStatus.Reachability == swarmapi.RaftMemberStatus_REACHABLE {
  1660  				reachable++
  1661  				if n.ID == currentNodeID {
  1662  					current = true
  1663  				}
  1664  			}
  1665  			if n.ManagerStatus.Reachability == swarmapi.RaftMemberStatus_UNREACHABLE {
  1666  				unreachable++
  1667  			}
  1668  		}
  1669  	}
  1670  	return
  1671  }
  1672  
  1673  func validateAndSanitizeInitRequest(req *types.InitRequest) error {
  1674  	var err error
  1675  	req.ListenAddr, err = validateAddr(req.ListenAddr)
  1676  	if err != nil {
  1677  		return fmt.Errorf("invalid ListenAddr %q: %v", req.ListenAddr, err)
  1678  	}
  1679  
  1680  	if req.Spec.Annotations.Name == "" {
  1681  		req.Spec.Annotations.Name = "default"
  1682  	} else if req.Spec.Annotations.Name != "default" {
  1683  		return errors.New(`swarm spec must be named "default"`)
  1684  	}
  1685  
  1686  	return nil
  1687  }
  1688  
  1689  func validateAndSanitizeJoinRequest(req *types.JoinRequest) error {
  1690  	var err error
  1691  	req.ListenAddr, err = validateAddr(req.ListenAddr)
  1692  	if err != nil {
  1693  		return fmt.Errorf("invalid ListenAddr %q: %v", req.ListenAddr, err)
  1694  	}
  1695  	if len(req.RemoteAddrs) == 0 {
  1696  		return errors.New("at least 1 RemoteAddr is required to join")
  1697  	}
  1698  	for i := range req.RemoteAddrs {
  1699  		req.RemoteAddrs[i], err = validateAddr(req.RemoteAddrs[i])
  1700  		if err != nil {
  1701  			return fmt.Errorf("invalid remoteAddr %q: %v", req.RemoteAddrs[i], err)
  1702  		}
  1703  	}
  1704  	return nil
  1705  }
  1706  
  1707  func validateAddr(addr string) (string, error) {
  1708  	if addr == "" {
  1709  		return addr, errors.New("invalid empty address")
  1710  	}
  1711  	newaddr, err := opts.ParseTCPAddr(addr, defaultAddr)
  1712  	if err != nil {
  1713  		return addr, nil
  1714  	}
  1715  	return strings.TrimPrefix(newaddr, "tcp://"), nil
  1716  }
  1717  
  1718  func initClusterSpec(node *swarmnode.Node, spec types.Spec) error {
  1719  	ctx, _ := context.WithTimeout(context.Background(), 5*time.Second)
  1720  	for conn := range node.ListenControlSocket(ctx) {
  1721  		if ctx.Err() != nil {
  1722  			return ctx.Err()
  1723  		}
  1724  		if conn != nil {
  1725  			client := swarmapi.NewControlClient(conn)
  1726  			var cluster *swarmapi.Cluster
  1727  			for i := 0; ; i++ {
  1728  				lcr, err := client.ListClusters(ctx, &swarmapi.ListClustersRequest{})
  1729  				if err != nil {
  1730  					return fmt.Errorf("error on listing clusters: %v", err)
  1731  				}
  1732  				if len(lcr.Clusters) == 0 {
  1733  					if i < 10 {
  1734  						time.Sleep(200 * time.Millisecond)
  1735  						continue
  1736  					}
  1737  					return errors.New("empty list of clusters was returned")
  1738  				}
  1739  				cluster = lcr.Clusters[0]
  1740  				break
  1741  			}
  1742  			// In init, we take the initial default values from swarmkit, and merge
  1743  			// any non nil or 0 value from spec to GRPC spec. This will leave the
  1744  			// default value alone.
  1745  			// Note that this is different from Update(), as in Update() we expect
  1746  			// user to specify the complete spec of the cluster (as they already know
  1747  			// the existing one and knows which field to update)
  1748  			clusterSpec, err := convert.MergeSwarmSpecToGRPC(spec, cluster.Spec)
  1749  			if err != nil {
  1750  				return fmt.Errorf("error updating cluster settings: %v", err)
  1751  			}
  1752  			_, err = client.UpdateCluster(ctx, &swarmapi.UpdateClusterRequest{
  1753  				ClusterID:      cluster.ID,
  1754  				ClusterVersion: &cluster.Meta.Version,
  1755  				Spec:           &clusterSpec,
  1756  			})
  1757  			if err != nil {
  1758  				return fmt.Errorf("error updating cluster settings: %v", err)
  1759  			}
  1760  			return nil
  1761  		}
  1762  	}
  1763  	return ctx.Err()
  1764  }
  1765  
  1766  func detectLockedError(err error) error {
  1767  	if err == swarmnode.ErrInvalidUnlockKey {
  1768  		return errors.WithStack(errSwarmLocked)
  1769  	}
  1770  	return err
  1771  }