github.com/zhuohuang-hust/src-cbuild@v0.0.0-20230105071821-c7aab3e7c840/daemon/cluster/cluster.go (about)

     1  package cluster
     2  
     3  //
     4  // ## Swarmkit integration
     5  //
     6  // Cluster - static configurable object for accessing everything swarm related.
     7  // Contains methods for connecting and controlling the cluster. Exists always,
     8  // even if swarm mode is not enabled.
     9  //
    10  // NodeRunner - Manager for starting the swarmkit node. Is present only and
    11  // always if swarm mode is enabled. Implements backoff restart loop in case of
    12  // errors.
    13  //
    14  // NodeState - Information about the current node status including access to
    15  // gRPC clients if a manager is active.
    16  //
    17  // ### Locking
    18  //
    19  // `cluster.controlMutex` - taken for the whole lifecycle of the processes that
    20  // can reconfigure cluster(init/join/leave etc). Protects that one
    21  // reconfiguration action has fully completed before another can start.
    22  //
    23  // `cluster.mu` - taken when the actual changes in cluster configurations
    24  // happen. Different from `controlMutex` because in some cases we need to
    25  // access current cluster state even if the long-running reconfiguration is
    26  // going on. For example network stack may ask for the current cluster state in
    27  // the middle of the shutdown. Any time current cluster state is asked you
    28  // should take the read lock of `cluster.mu`. If you are writing an API
    29  // responder that returns synchronously, hold `cluster.mu.RLock()` for the
    30  // duration of the whole handler function. That ensures that node will not be
    31  // shut down until the handler has finished.
    32  //
    33  // NodeRunner implements its internal locks that should not be used outside of
    34  // the struct. Instead, you should just call `nodeRunner.State()` method to get
    35  // the current state of the cluster(still need `cluster.mu.RLock()` to access
    36  // `cluster.nr` reference itself). Most of the changes in NodeRunner happen
    37  // because of an external event(network problem, unexpected swarmkit error) and
    38  // Docker shouldn't take any locks that delay these changes from happening.
    39  //
    40  
    41  import (
    42  	"crypto/x509"
    43  	"encoding/base64"
    44  	"encoding/json"
    45  	"fmt"
    46  	"io"
    47  	"net"
    48  	"os"
    49  	"path/filepath"
    50  	"strings"
    51  	"sync"
    52  	"time"
    53  
    54  	"github.com/Sirupsen/logrus"
    55  	"github.com/docker/distribution/digest"
    56  	distreference "github.com/docker/distribution/reference"
    57  	apierrors "github.com/docker/docker/api/errors"
    58  	apitypes "github.com/docker/docker/api/types"
    59  	"github.com/docker/docker/api/types/backend"
    60  	"github.com/docker/docker/api/types/filters"
    61  	"github.com/docker/docker/api/types/network"
    62  	types "github.com/docker/docker/api/types/swarm"
    63  	"github.com/docker/docker/daemon/cluster/convert"
    64  	executorpkg "github.com/docker/docker/daemon/cluster/executor"
    65  	"github.com/docker/docker/daemon/logger"
    66  	"github.com/docker/docker/opts"
    67  	"github.com/docker/docker/pkg/ioutils"
    68  	"github.com/docker/docker/pkg/signal"
    69  	"github.com/docker/docker/pkg/stdcopy"
    70  	"github.com/docker/docker/reference"
    71  	"github.com/docker/docker/runconfig"
    72  	swarmapi "github.com/docker/swarmkit/api"
    73  	"github.com/docker/swarmkit/manager/encryption"
    74  	swarmnode "github.com/docker/swarmkit/node"
    75  	"github.com/docker/swarmkit/protobuf/ptypes"
    76  	"github.com/pkg/errors"
    77  	"golang.org/x/net/context"
    78  )
    79  
    80  const swarmDirName = "swarm"
    81  const controlSocket = "control.sock"
    82  const swarmConnectTimeout = 20 * time.Second
    83  const swarmRequestTimeout = 20 * time.Second
    84  const stateFile = "docker-state.json"
    85  const defaultAddr = "0.0.0.0:2377"
    86  
    87  const (
    88  	initialReconnectDelay = 100 * time.Millisecond
    89  	maxReconnectDelay     = 30 * time.Second
    90  	contextPrefix         = "com.docker.swarm"
    91  )
    92  
    93  // errNoSwarm is returned on leaving a cluster that was never initialized
    94  var errNoSwarm = fmt.Errorf("This node is not part of a swarm")
    95  
    96  // errSwarmExists is returned on initialize or join request for a cluster that has already been activated
    97  var errSwarmExists = fmt.Errorf("This node is already part of a swarm. Use \"docker swarm leave\" to leave this swarm and join another one.")
    98  
    99  // errSwarmJoinTimeoutReached is returned when cluster join could not complete before timeout was reached.
   100  var errSwarmJoinTimeoutReached = fmt.Errorf("Timeout was reached before node was joined. The attempt to join the swarm will continue in the background. Use the \"docker info\" command to see the current swarm status of your node.")
   101  
   102  // errSwarmLocked is returned if the swarm is encrypted and needs a key to unlock it.
   103  var errSwarmLocked = fmt.Errorf("Swarm is encrypted and needs to be unlocked before it can be used. Please use \"docker swarm unlock\" to unlock it.")
   104  
   105  // errSwarmCertificatesExpired is returned if docker was not started for the whole validity period and they had no chance to renew automatically.
   106  var errSwarmCertificatesExpired = errors.New("Swarm certificates have expired. To replace them, leave the swarm and join again.")
   107  
   108  // NetworkSubnetsProvider exposes functions for retrieving the subnets
   109  // of networks managed by Docker, so they can be filtered.
   110  type NetworkSubnetsProvider interface {
   111  	V4Subnets() []net.IPNet
   112  	V6Subnets() []net.IPNet
   113  }
   114  
   115  // Config provides values for Cluster.
   116  type Config struct {
   117  	Root                   string
   118  	Name                   string
   119  	Backend                executorpkg.Backend
   120  	NetworkSubnetsProvider NetworkSubnetsProvider
   121  
   122  	// DefaultAdvertiseAddr is the default host/IP or network interface to use
   123  	// if no AdvertiseAddr value is specified.
   124  	DefaultAdvertiseAddr string
   125  
   126  	// path to store runtime state, such as the swarm control socket
   127  	RuntimeRoot string
   128  }
   129  
   130  // Cluster provides capabilities to participate in a cluster as a worker or a
   131  // manager.
   132  type Cluster struct {
   133  	mu           sync.RWMutex
   134  	controlMutex sync.RWMutex // protect init/join/leave user operations
   135  	nr           *nodeRunner
   136  	root         string
   137  	runtimeRoot  string
   138  	config       Config
   139  	configEvent  chan struct{} // todo: make this array and goroutine safe
   140  	attachers    map[string]*attacher
   141  }
   142  
   143  // attacher manages the in-memory attachment state of a container
   144  // attachment to a global scope network managed by swarm manager. It
   145  // helps in identifying the attachment ID via the taskID and the
   146  // corresponding attachment configuration obtained from the manager.
   147  type attacher struct {
   148  	taskID           string
   149  	config           *network.NetworkingConfig
   150  	attachWaitCh     chan *network.NetworkingConfig
   151  	attachCompleteCh chan struct{}
   152  	detachWaitCh     chan struct{}
   153  }
   154  
   155  // New creates a new Cluster instance using provided config.
   156  func New(config Config) (*Cluster, error) {
   157  	root := filepath.Join(config.Root, swarmDirName)
   158  	if err := os.MkdirAll(root, 0700); err != nil {
   159  		return nil, err
   160  	}
   161  	if config.RuntimeRoot == "" {
   162  		config.RuntimeRoot = root
   163  	}
   164  	if err := os.MkdirAll(config.RuntimeRoot, 0700); err != nil {
   165  		return nil, err
   166  	}
   167  	c := &Cluster{
   168  		root:        root,
   169  		config:      config,
   170  		configEvent: make(chan struct{}, 10),
   171  		runtimeRoot: config.RuntimeRoot,
   172  		attachers:   make(map[string]*attacher),
   173  	}
   174  
   175  	nodeConfig, err := loadPersistentState(root)
   176  	if err != nil {
   177  		if os.IsNotExist(err) {
   178  			return c, nil
   179  		}
   180  		return nil, err
   181  	}
   182  
   183  	nr, err := c.newNodeRunner(*nodeConfig)
   184  	if err != nil {
   185  		return nil, err
   186  	}
   187  	c.nr = nr
   188  
   189  	select {
   190  	case <-time.After(swarmConnectTimeout):
   191  		logrus.Error("swarm component could not be started before timeout was reached")
   192  	case err := <-nr.Ready():
   193  		if err != nil {
   194  			if errors.Cause(err) == errSwarmLocked {
   195  				return c, nil
   196  			}
   197  			if err, ok := errors.Cause(c.nr.err).(x509.CertificateInvalidError); ok && err.Reason == x509.Expired {
   198  				return c, nil
   199  			}
   200  			return nil, errors.Wrap(err, "swarm component could not be started")
   201  		}
   202  	}
   203  	return c, nil
   204  }
   205  
   206  func (c *Cluster) newNodeRunner(conf nodeStartConfig) (*nodeRunner, error) {
   207  	if err := c.config.Backend.IsSwarmCompatible(); err != nil {
   208  		return nil, err
   209  	}
   210  
   211  	actualLocalAddr := conf.LocalAddr
   212  	if actualLocalAddr == "" {
   213  		// If localAddr was not specified, resolve it automatically
   214  		// based on the route to joinAddr. localAddr can only be left
   215  		// empty on "join".
   216  		listenHost, _, err := net.SplitHostPort(conf.ListenAddr)
   217  		if err != nil {
   218  			return nil, fmt.Errorf("could not parse listen address: %v", err)
   219  		}
   220  
   221  		listenAddrIP := net.ParseIP(listenHost)
   222  		if listenAddrIP == nil || !listenAddrIP.IsUnspecified() {
   223  			actualLocalAddr = listenHost
   224  		} else {
   225  			if conf.RemoteAddr == "" {
   226  				// Should never happen except using swarms created by
   227  				// old versions that didn't save remoteAddr.
   228  				conf.RemoteAddr = "8.8.8.8:53"
   229  			}
   230  			conn, err := net.Dial("udp", conf.RemoteAddr)
   231  			if err != nil {
   232  				return nil, fmt.Errorf("could not find local IP address: %v", err)
   233  			}
   234  			localHostPort := conn.LocalAddr().String()
   235  			actualLocalAddr, _, _ = net.SplitHostPort(localHostPort)
   236  			conn.Close()
   237  		}
   238  	}
   239  
   240  	nr := &nodeRunner{cluster: c}
   241  	nr.actualLocalAddr = actualLocalAddr
   242  
   243  	if err := nr.Start(conf); err != nil {
   244  		return nil, err
   245  	}
   246  
   247  	c.config.Backend.SetClusterProvider(c)
   248  
   249  	return nr, nil
   250  }
   251  
   252  // Init initializes new cluster from user provided request.
   253  func (c *Cluster) Init(req types.InitRequest) (string, error) {
   254  	c.controlMutex.Lock()
   255  	defer c.controlMutex.Unlock()
   256  	c.mu.Lock()
   257  	if c.nr != nil {
   258  		if req.ForceNewCluster {
   259  			if err := c.nr.Stop(); err != nil {
   260  				c.mu.Unlock()
   261  				return "", err
   262  			}
   263  		} else {
   264  			c.mu.Unlock()
   265  			return "", errSwarmExists
   266  		}
   267  	}
   268  	c.mu.Unlock()
   269  
   270  	if err := validateAndSanitizeInitRequest(&req); err != nil {
   271  		return "", err
   272  	}
   273  
   274  	listenHost, listenPort, err := resolveListenAddr(req.ListenAddr)
   275  	if err != nil {
   276  		return "", err
   277  	}
   278  
   279  	advertiseHost, advertisePort, err := c.resolveAdvertiseAddr(req.AdvertiseAddr, listenPort)
   280  	if err != nil {
   281  		return "", err
   282  	}
   283  
   284  	localAddr := listenHost
   285  
   286  	// If the local address is undetermined, the advertise address
   287  	// will be used as local address, if it belongs to this system.
   288  	// If the advertise address is not local, then we try to find
   289  	// a system address to use as local address. If this fails,
   290  	// we give up and ask user to pass the listen address.
   291  	if net.ParseIP(localAddr).IsUnspecified() {
   292  		advertiseIP := net.ParseIP(advertiseHost)
   293  
   294  		found := false
   295  		for _, systemIP := range listSystemIPs() {
   296  			if systemIP.Equal(advertiseIP) {
   297  				localAddr = advertiseIP.String()
   298  				found = true
   299  				break
   300  			}
   301  		}
   302  
   303  		if !found {
   304  			ip, err := c.resolveSystemAddr()
   305  			if err != nil {
   306  				logrus.Warnf("Could not find a local address: %v", err)
   307  				return "", errMustSpecifyListenAddr
   308  			}
   309  			localAddr = ip.String()
   310  		}
   311  	}
   312  
   313  	if !req.ForceNewCluster {
   314  		clearPersistentState(c.root)
   315  	}
   316  
   317  	nr, err := c.newNodeRunner(nodeStartConfig{
   318  		forceNewCluster: req.ForceNewCluster,
   319  		autolock:        req.AutoLockManagers,
   320  		LocalAddr:       localAddr,
   321  		ListenAddr:      net.JoinHostPort(listenHost, listenPort),
   322  		AdvertiseAddr:   net.JoinHostPort(advertiseHost, advertisePort),
   323  	})
   324  	if err != nil {
   325  		return "", err
   326  	}
   327  	c.mu.Lock()
   328  	c.nr = nr
   329  	c.mu.Unlock()
   330  
   331  	if err := <-nr.Ready(); err != nil {
   332  		if !req.ForceNewCluster { // if failure on first attempt don't keep state
   333  			if err := clearPersistentState(c.root); err != nil {
   334  				return "", err
   335  			}
   336  		}
   337  		if err != nil {
   338  			c.mu.Lock()
   339  			c.nr = nil
   340  			c.mu.Unlock()
   341  		}
   342  		return "", err
   343  	}
   344  	state := nr.State()
   345  	if state.swarmNode == nil { // should never happen but protect from panic
   346  		return "", errors.New("invalid cluster state for spec initialization")
   347  	}
   348  	if err := initClusterSpec(state.swarmNode, req.Spec); err != nil {
   349  		return "", err
   350  	}
   351  	return state.NodeID(), nil
   352  }
   353  
   354  // Join makes current Cluster part of an existing swarm cluster.
   355  func (c *Cluster) Join(req types.JoinRequest) error {
   356  	c.controlMutex.Lock()
   357  	defer c.controlMutex.Unlock()
   358  	c.mu.Lock()
   359  	if c.nr != nil {
   360  		c.mu.Unlock()
   361  		return errSwarmExists
   362  	}
   363  	c.mu.Unlock()
   364  
   365  	if err := validateAndSanitizeJoinRequest(&req); err != nil {
   366  		return err
   367  	}
   368  
   369  	listenHost, listenPort, err := resolveListenAddr(req.ListenAddr)
   370  	if err != nil {
   371  		return err
   372  	}
   373  
   374  	var advertiseAddr string
   375  	if req.AdvertiseAddr != "" {
   376  		advertiseHost, advertisePort, err := c.resolveAdvertiseAddr(req.AdvertiseAddr, listenPort)
   377  		// For joining, we don't need to provide an advertise address,
   378  		// since the remote side can detect it.
   379  		if err == nil {
   380  			advertiseAddr = net.JoinHostPort(advertiseHost, advertisePort)
   381  		}
   382  	}
   383  
   384  	clearPersistentState(c.root)
   385  
   386  	nr, err := c.newNodeRunner(nodeStartConfig{
   387  		RemoteAddr:    req.RemoteAddrs[0],
   388  		ListenAddr:    net.JoinHostPort(listenHost, listenPort),
   389  		AdvertiseAddr: advertiseAddr,
   390  		joinAddr:      req.RemoteAddrs[0],
   391  		joinToken:     req.JoinToken,
   392  	})
   393  	if err != nil {
   394  		return err
   395  	}
   396  
   397  	c.mu.Lock()
   398  	c.nr = nr
   399  	c.mu.Unlock()
   400  
   401  	select {
   402  	case <-time.After(swarmConnectTimeout):
   403  		return errSwarmJoinTimeoutReached
   404  	case err := <-nr.Ready():
   405  		if err != nil {
   406  			c.mu.Lock()
   407  			c.nr = nil
   408  			c.mu.Unlock()
   409  		}
   410  		return err
   411  	}
   412  }
   413  
   414  // GetUnlockKey returns the unlock key for the swarm.
   415  func (c *Cluster) GetUnlockKey() (string, error) {
   416  	c.mu.RLock()
   417  	defer c.mu.RUnlock()
   418  
   419  	state := c.currentNodeState()
   420  	if !state.IsActiveManager() {
   421  		return "", c.errNoManager(state)
   422  	}
   423  
   424  	ctx, cancel := c.getRequestContext()
   425  	defer cancel()
   426  
   427  	client := swarmapi.NewCAClient(state.grpcConn)
   428  
   429  	r, err := client.GetUnlockKey(ctx, &swarmapi.GetUnlockKeyRequest{})
   430  	if err != nil {
   431  		return "", err
   432  	}
   433  
   434  	if len(r.UnlockKey) == 0 {
   435  		// no key
   436  		return "", nil
   437  	}
   438  
   439  	return encryption.HumanReadableKey(r.UnlockKey), nil
   440  }
   441  
   442  // UnlockSwarm provides a key to decrypt data that is encrypted at rest.
   443  func (c *Cluster) UnlockSwarm(req types.UnlockRequest) error {
   444  	c.controlMutex.Lock()
   445  	defer c.controlMutex.Unlock()
   446  
   447  	c.mu.RLock()
   448  	state := c.currentNodeState()
   449  	nr := c.nr
   450  	c.mu.RUnlock()
   451  	if nr == nil || errors.Cause(state.err) != errSwarmLocked {
   452  		return errors.New("swarm is not locked")
   453  	}
   454  	key, err := encryption.ParseHumanReadableKey(req.UnlockKey)
   455  	if err != nil {
   456  		return err
   457  	}
   458  
   459  	config := nr.config
   460  	config.lockKey = key
   461  	if err := nr.Stop(); err != nil {
   462  		return err
   463  	}
   464  	nr, err = c.newNodeRunner(config)
   465  	if err != nil {
   466  		return err
   467  	}
   468  
   469  	c.mu.Lock()
   470  	c.nr = nr
   471  	c.mu.Unlock()
   472  
   473  	if err := <-nr.Ready(); err != nil {
   474  		if errors.Cause(err) == errSwarmLocked {
   475  			return errors.New("swarm could not be unlocked: invalid key provided")
   476  		}
   477  		return fmt.Errorf("swarm component could not be started: %v", err)
   478  	}
   479  	return nil
   480  }
   481  
   482  // Leave shuts down Cluster and removes current state.
   483  func (c *Cluster) Leave(force bool) error {
   484  	c.controlMutex.Lock()
   485  	defer c.controlMutex.Unlock()
   486  
   487  	c.mu.Lock()
   488  	nr := c.nr
   489  	if nr == nil {
   490  		c.mu.Unlock()
   491  		return errNoSwarm
   492  	}
   493  
   494  	state := c.currentNodeState()
   495  
   496  	if errors.Cause(state.err) == errSwarmLocked && !force {
   497  		// leave a locked swarm without --force is not allowed
   498  		c.mu.Unlock()
   499  		return errors.New("Swarm is encrypted and locked. Please unlock it first or use `--force` to ignore this message.")
   500  	}
   501  
   502  	if state.IsManager() && !force {
   503  		msg := "You are attempting to leave the swarm on a node that is participating as a manager. "
   504  		if state.IsActiveManager() {
   505  			active, reachable, unreachable, err := managerStats(state.controlClient, state.NodeID())
   506  			if err == nil {
   507  				if active && removingManagerCausesLossOfQuorum(reachable, unreachable) {
   508  					if isLastManager(reachable, unreachable) {
   509  						msg += "Removing the last manager erases all current state of the swarm. Use `--force` to ignore this message. "
   510  						c.mu.Unlock()
   511  						return fmt.Errorf(msg)
   512  					}
   513  					msg += fmt.Sprintf("Removing this node leaves %v managers out of %v. Without a Raft quorum your swarm will be inaccessible. ", reachable-1, reachable+unreachable)
   514  				}
   515  			}
   516  		} else {
   517  			msg += "Doing so may lose the consensus of your cluster. "
   518  		}
   519  
   520  		msg += "The only way to restore a swarm that has lost consensus is to reinitialize it with `--force-new-cluster`. Use `--force` to suppress this message."
   521  		c.mu.Unlock()
   522  		return fmt.Errorf(msg)
   523  	}
   524  	// release readers in here
   525  	if err := nr.Stop(); err != nil {
   526  		logrus.Errorf("failed to shut down cluster node: %v", err)
   527  		signal.DumpStacks("")
   528  		c.mu.Unlock()
   529  		return err
   530  	}
   531  	c.nr = nil
   532  	c.mu.Unlock()
   533  	if nodeID := state.NodeID(); nodeID != "" {
   534  		nodeContainers, err := c.listContainerForNode(nodeID)
   535  		if err != nil {
   536  			return err
   537  		}
   538  		for _, id := range nodeContainers {
   539  			if err := c.config.Backend.ContainerRm(id, &apitypes.ContainerRmConfig{ForceRemove: true}); err != nil {
   540  				logrus.Errorf("error removing %v: %v", id, err)
   541  			}
   542  		}
   543  	}
   544  
   545  	c.configEvent <- struct{}{}
   546  	// todo: cleanup optional?
   547  	if err := clearPersistentState(c.root); err != nil {
   548  		return err
   549  	}
   550  	c.config.Backend.SetClusterProvider(nil)
   551  	return nil
   552  }
   553  
   554  func (c *Cluster) listContainerForNode(nodeID string) ([]string, error) {
   555  	var ids []string
   556  	filters := filters.NewArgs()
   557  	filters.Add("label", fmt.Sprintf("com.docker.swarm.node.id=%s", nodeID))
   558  	containers, err := c.config.Backend.Containers(&apitypes.ContainerListOptions{
   559  		Filters: filters,
   560  	})
   561  	if err != nil {
   562  		return []string{}, err
   563  	}
   564  	for _, c := range containers {
   565  		ids = append(ids, c.ID)
   566  	}
   567  	return ids, nil
   568  }
   569  
   570  func (c *Cluster) getRequestContext() (context.Context, func()) { // TODO: not needed when requests don't block on qourum lost
   571  	return context.WithTimeout(context.Background(), swarmRequestTimeout)
   572  }
   573  
   574  // Inspect retrieves the configuration properties of a managed swarm cluster.
   575  func (c *Cluster) Inspect() (types.Swarm, error) {
   576  	c.mu.RLock()
   577  	defer c.mu.RUnlock()
   578  
   579  	state := c.currentNodeState()
   580  	if !state.IsActiveManager() {
   581  		return types.Swarm{}, c.errNoManager(state)
   582  	}
   583  
   584  	ctx, cancel := c.getRequestContext()
   585  	defer cancel()
   586  
   587  	swarm, err := getSwarm(ctx, state.controlClient)
   588  	if err != nil {
   589  		return types.Swarm{}, err
   590  	}
   591  
   592  	return convert.SwarmFromGRPC(*swarm), nil
   593  }
   594  
   595  // Update updates configuration of a managed swarm cluster.
   596  func (c *Cluster) Update(version uint64, spec types.Spec, flags types.UpdateFlags) error {
   597  	c.mu.RLock()
   598  	defer c.mu.RUnlock()
   599  
   600  	state := c.currentNodeState()
   601  	if !state.IsActiveManager() {
   602  		return c.errNoManager(state)
   603  	}
   604  
   605  	ctx, cancel := c.getRequestContext()
   606  	defer cancel()
   607  
   608  	swarm, err := getSwarm(ctx, state.controlClient)
   609  	if err != nil {
   610  		return err
   611  	}
   612  
   613  	// In update, client should provide the complete spec of the swarm, including
   614  	// Name and Labels. If a field is specified with 0 or nil, then the default value
   615  	// will be used to swarmkit.
   616  	clusterSpec, err := convert.SwarmSpecToGRPC(spec)
   617  	if err != nil {
   618  		return err
   619  	}
   620  
   621  	_, err = state.controlClient.UpdateCluster(
   622  		ctx,
   623  		&swarmapi.UpdateClusterRequest{
   624  			ClusterID: swarm.ID,
   625  			Spec:      &clusterSpec,
   626  			ClusterVersion: &swarmapi.Version{
   627  				Index: version,
   628  			},
   629  			Rotation: swarmapi.KeyRotation{
   630  				WorkerJoinToken:  flags.RotateWorkerToken,
   631  				ManagerJoinToken: flags.RotateManagerToken,
   632  				ManagerUnlockKey: flags.RotateManagerUnlockKey,
   633  			},
   634  		},
   635  	)
   636  	return err
   637  }
   638  
   639  // IsManager returns true if Cluster is participating as a manager.
   640  func (c *Cluster) IsManager() bool {
   641  	c.mu.RLock()
   642  	defer c.mu.RUnlock()
   643  	return c.currentNodeState().IsActiveManager()
   644  }
   645  
   646  // IsAgent returns true if Cluster is participating as a worker/agent.
   647  func (c *Cluster) IsAgent() bool {
   648  	c.mu.RLock()
   649  	defer c.mu.RUnlock()
   650  	return c.currentNodeState().status == types.LocalNodeStateActive
   651  }
   652  
   653  // GetLocalAddress returns the local address.
   654  func (c *Cluster) GetLocalAddress() string {
   655  	c.mu.RLock()
   656  	defer c.mu.RUnlock()
   657  	return c.currentNodeState().actualLocalAddr
   658  }
   659  
   660  // GetListenAddress returns the listen address.
   661  func (c *Cluster) GetListenAddress() string {
   662  	c.mu.RLock()
   663  	defer c.mu.RUnlock()
   664  	if c.nr != nil {
   665  		return c.nr.config.ListenAddr
   666  	}
   667  	return ""
   668  }
   669  
   670  // GetAdvertiseAddress returns the remotely reachable address of this node.
   671  func (c *Cluster) GetAdvertiseAddress() string {
   672  	c.mu.RLock()
   673  	defer c.mu.RUnlock()
   674  	if c.nr != nil && c.nr.config.AdvertiseAddr != "" {
   675  		advertiseHost, _, _ := net.SplitHostPort(c.nr.config.AdvertiseAddr)
   676  		return advertiseHost
   677  	}
   678  	return c.currentNodeState().actualLocalAddr
   679  }
   680  
   681  // GetRemoteAddress returns a known advertise address of a remote manager if
   682  // available.
   683  // todo: change to array/connect with info
   684  func (c *Cluster) GetRemoteAddress() string {
   685  	c.mu.RLock()
   686  	defer c.mu.RUnlock()
   687  	return c.getRemoteAddress()
   688  }
   689  
   690  func (c *Cluster) getRemoteAddress() string {
   691  	state := c.currentNodeState()
   692  	if state.swarmNode == nil {
   693  		return ""
   694  	}
   695  	nodeID := state.swarmNode.NodeID()
   696  	for _, r := range state.swarmNode.Remotes() {
   697  		if r.NodeID != nodeID {
   698  			return r.Addr
   699  		}
   700  	}
   701  	return ""
   702  }
   703  
   704  // ListenClusterEvents returns a channel that receives messages on cluster
   705  // participation changes.
   706  // todo: make cancelable and accessible to multiple callers
   707  func (c *Cluster) ListenClusterEvents() <-chan struct{} {
   708  	return c.configEvent
   709  }
   710  
   711  // Info returns information about the current cluster state.
   712  func (c *Cluster) Info() types.Info {
   713  	info := types.Info{
   714  		NodeAddr: c.GetAdvertiseAddress(),
   715  	}
   716  	c.mu.RLock()
   717  	defer c.mu.RUnlock()
   718  
   719  	state := c.currentNodeState()
   720  	info.LocalNodeState = state.status
   721  	if state.err != nil {
   722  		info.Error = state.err.Error()
   723  	}
   724  
   725  	ctx, cancel := c.getRequestContext()
   726  	defer cancel()
   727  
   728  	if state.IsActiveManager() {
   729  		info.ControlAvailable = true
   730  		swarm, err := c.Inspect()
   731  		if err != nil {
   732  			info.Error = err.Error()
   733  		}
   734  
   735  		// Strip JoinTokens
   736  		info.Cluster = swarm.ClusterInfo
   737  
   738  		if r, err := state.controlClient.ListNodes(ctx, &swarmapi.ListNodesRequest{}); err == nil {
   739  			info.Nodes = len(r.Nodes)
   740  			for _, n := range r.Nodes {
   741  				if n.ManagerStatus != nil {
   742  					info.Managers = info.Managers + 1
   743  				}
   744  			}
   745  		}
   746  	}
   747  
   748  	if state.swarmNode != nil {
   749  		for _, r := range state.swarmNode.Remotes() {
   750  			info.RemoteManagers = append(info.RemoteManagers, types.Peer{NodeID: r.NodeID, Addr: r.Addr})
   751  		}
   752  		info.NodeID = state.swarmNode.NodeID()
   753  	}
   754  
   755  	return info
   756  }
   757  
   758  // currentNodeState should not be called without a read lock
   759  func (c *Cluster) currentNodeState() nodeState {
   760  	return c.nr.State()
   761  }
   762  
   763  // errNoManager returns error describing why manager commands can't be used.
   764  // Call with read lock.
   765  func (c *Cluster) errNoManager(st nodeState) error {
   766  	if st.swarmNode == nil {
   767  		if errors.Cause(st.err) == errSwarmLocked {
   768  			return errSwarmLocked
   769  		}
   770  		if st.err == errSwarmCertificatesExpired {
   771  			return errSwarmCertificatesExpired
   772  		}
   773  		return fmt.Errorf("This node is not a swarm manager. Use \"docker swarm init\" or \"docker swarm join\" to connect this node to swarm and try again.")
   774  	}
   775  	if st.swarmNode.Manager() != nil {
   776  		return fmt.Errorf("This node is not a swarm manager. Manager is being prepared or has trouble connecting to the cluster.")
   777  	}
   778  	return fmt.Errorf("This node is not a swarm manager. Worker nodes can't be used to view or modify cluster state. Please run this command on a manager node or promote the current node to a manager.")
   779  }
   780  
   781  // GetServices returns all services of a managed swarm cluster.
   782  func (c *Cluster) GetServices(options apitypes.ServiceListOptions) ([]types.Service, error) {
   783  	c.mu.RLock()
   784  	defer c.mu.RUnlock()
   785  
   786  	state := c.currentNodeState()
   787  	if !state.IsActiveManager() {
   788  		return nil, c.errNoManager(state)
   789  	}
   790  
   791  	filters, err := newListServicesFilters(options.Filters)
   792  	if err != nil {
   793  		return nil, err
   794  	}
   795  	ctx, cancel := c.getRequestContext()
   796  	defer cancel()
   797  
   798  	r, err := state.controlClient.ListServices(
   799  		ctx,
   800  		&swarmapi.ListServicesRequest{Filters: filters})
   801  	if err != nil {
   802  		return nil, err
   803  	}
   804  
   805  	services := []types.Service{}
   806  
   807  	for _, service := range r.Services {
   808  		services = append(services, convert.ServiceFromGRPC(*service))
   809  	}
   810  
   811  	return services, nil
   812  }
   813  
   814  // imageWithDigestString takes an image such as name or name:tag
   815  // and returns the image pinned to a digest, such as name@sha256:34234...
   816  // Due to the difference between the docker/docker/reference, and the
   817  // docker/distribution/reference packages, we're parsing the image twice.
   818  // As the two packages converge, this function should be simplified.
   819  // TODO(nishanttotla): After the packages converge, the function must
   820  // convert distreference.Named -> distreference.Canonical, and the logic simplified.
   821  func (c *Cluster) imageWithDigestString(ctx context.Context, image string, authConfig *apitypes.AuthConfig) (string, error) {
   822  	if _, err := digest.ParseDigest(image); err == nil {
   823  		return "", errors.New("image reference is an image ID")
   824  	}
   825  	ref, err := distreference.ParseNamed(image)
   826  	if err != nil {
   827  		return "", err
   828  	}
   829  	// only query registry if not a canonical reference (i.e. with digest)
   830  	if _, ok := ref.(distreference.Canonical); !ok {
   831  		// create a docker/docker/reference Named object because GetRepository needs it
   832  		dockerRef, err := reference.ParseNamed(image)
   833  		if err != nil {
   834  			return "", err
   835  		}
   836  		dockerRef = reference.WithDefaultTag(dockerRef)
   837  		namedTaggedRef, ok := dockerRef.(reference.NamedTagged)
   838  		if !ok {
   839  			return "", fmt.Errorf("unable to cast image to NamedTagged reference object")
   840  		}
   841  
   842  		repo, _, err := c.config.Backend.GetRepository(ctx, namedTaggedRef, authConfig)
   843  		if err != nil {
   844  			return "", err
   845  		}
   846  		dscrptr, err := repo.Tags(ctx).Get(ctx, namedTaggedRef.Tag())
   847  		if err != nil {
   848  			return "", err
   849  		}
   850  
   851  		namedDigestedRef, err := distreference.WithDigest(distreference.EnsureTagged(ref), dscrptr.Digest)
   852  		if err != nil {
   853  			return "", err
   854  		}
   855  		return namedDigestedRef.String(), nil
   856  	}
   857  	// reference already contains a digest, so just return it
   858  	return ref.String(), nil
   859  }
   860  
   861  // CreateService creates a new service in a managed swarm cluster.
   862  func (c *Cluster) CreateService(s types.ServiceSpec, encodedAuth string) (*apitypes.ServiceCreateResponse, error) {
   863  	c.mu.RLock()
   864  	defer c.mu.RUnlock()
   865  
   866  	state := c.currentNodeState()
   867  	if !state.IsActiveManager() {
   868  		return nil, c.errNoManager(state)
   869  	}
   870  
   871  	ctx, cancel := c.getRequestContext()
   872  	defer cancel()
   873  
   874  	err := c.populateNetworkID(ctx, state.controlClient, &s)
   875  	if err != nil {
   876  		return nil, err
   877  	}
   878  
   879  	serviceSpec, err := convert.ServiceSpecToGRPC(s)
   880  	if err != nil {
   881  		return nil, err
   882  	}
   883  
   884  	ctnr := serviceSpec.Task.GetContainer()
   885  	if ctnr == nil {
   886  		return nil, fmt.Errorf("service does not use container tasks")
   887  	}
   888  
   889  	if encodedAuth != "" {
   890  		ctnr.PullOptions = &swarmapi.ContainerSpec_PullOptions{RegistryAuth: encodedAuth}
   891  	}
   892  
   893  	// retrieve auth config from encoded auth
   894  	authConfig := &apitypes.AuthConfig{}
   895  	if encodedAuth != "" {
   896  		if err := json.NewDecoder(base64.NewDecoder(base64.URLEncoding, strings.NewReader(encodedAuth))).Decode(authConfig); err != nil {
   897  			logrus.Warnf("invalid authconfig: %v", err)
   898  		}
   899  	}
   900  
   901  	resp := &apitypes.ServiceCreateResponse{}
   902  
   903  	// pin image by digest
   904  	if os.Getenv("DOCKER_SERVICE_PREFER_OFFLINE_IMAGE") != "1" {
   905  		digestImage, err := c.imageWithDigestString(ctx, ctnr.Image, authConfig)
   906  		if err != nil {
   907  			logrus.Warnf("unable to pin image %s to digest: %s", ctnr.Image, err.Error())
   908  			resp.Warnings = append(resp.Warnings, fmt.Sprintf("unable to pin image %s to digest: %s", ctnr.Image, err.Error()))
   909  		} else {
   910  			logrus.Debugf("pinning image %s by digest: %s", ctnr.Image, digestImage)
   911  			ctnr.Image = digestImage
   912  		}
   913  	}
   914  
   915  	r, err := state.controlClient.CreateService(ctx, &swarmapi.CreateServiceRequest{Spec: &serviceSpec})
   916  	if err != nil {
   917  		return nil, err
   918  	}
   919  
   920  	resp.ID = r.Service.ID
   921  	return resp, nil
   922  }
   923  
   924  // GetService returns a service based on an ID or name.
   925  func (c *Cluster) GetService(input string) (types.Service, error) {
   926  	c.mu.RLock()
   927  	defer c.mu.RUnlock()
   928  
   929  	state := c.currentNodeState()
   930  	if !state.IsActiveManager() {
   931  		return types.Service{}, c.errNoManager(state)
   932  	}
   933  
   934  	ctx, cancel := c.getRequestContext()
   935  	defer cancel()
   936  
   937  	service, err := getService(ctx, state.controlClient, input)
   938  	if err != nil {
   939  		return types.Service{}, err
   940  	}
   941  	return convert.ServiceFromGRPC(*service), nil
   942  }
   943  
   944  // UpdateService updates existing service to match new properties.
   945  func (c *Cluster) UpdateService(serviceIDOrName string, version uint64, spec types.ServiceSpec, encodedAuth string, registryAuthFrom string) (*apitypes.ServiceUpdateResponse, error) {
   946  	c.mu.RLock()
   947  	defer c.mu.RUnlock()
   948  
   949  	state := c.currentNodeState()
   950  	if !state.IsActiveManager() {
   951  		return nil, c.errNoManager(state)
   952  	}
   953  
   954  	ctx, cancel := c.getRequestContext()
   955  	defer cancel()
   956  
   957  	err := c.populateNetworkID(ctx, state.controlClient, &spec)
   958  	if err != nil {
   959  		return nil, err
   960  	}
   961  
   962  	serviceSpec, err := convert.ServiceSpecToGRPC(spec)
   963  	if err != nil {
   964  		return nil, err
   965  	}
   966  
   967  	currentService, err := getService(ctx, state.controlClient, serviceIDOrName)
   968  	if err != nil {
   969  		return nil, err
   970  	}
   971  
   972  	newCtnr := serviceSpec.Task.GetContainer()
   973  	if newCtnr == nil {
   974  		return nil, fmt.Errorf("service does not use container tasks")
   975  	}
   976  
   977  	if encodedAuth != "" {
   978  		newCtnr.PullOptions = &swarmapi.ContainerSpec_PullOptions{RegistryAuth: encodedAuth}
   979  	} else {
   980  		// this is needed because if the encodedAuth isn't being updated then we
   981  		// shouldn't lose it, and continue to use the one that was already present
   982  		var ctnr *swarmapi.ContainerSpec
   983  		switch registryAuthFrom {
   984  		case apitypes.RegistryAuthFromSpec, "":
   985  			ctnr = currentService.Spec.Task.GetContainer()
   986  		case apitypes.RegistryAuthFromPreviousSpec:
   987  			if currentService.PreviousSpec == nil {
   988  				return nil, fmt.Errorf("service does not have a previous spec")
   989  			}
   990  			ctnr = currentService.PreviousSpec.Task.GetContainer()
   991  		default:
   992  			return nil, fmt.Errorf("unsupported registryAuthFromValue")
   993  		}
   994  		if ctnr == nil {
   995  			return nil, fmt.Errorf("service does not use container tasks")
   996  		}
   997  		newCtnr.PullOptions = ctnr.PullOptions
   998  		// update encodedAuth so it can be used to pin image by digest
   999  		if ctnr.PullOptions != nil {
  1000  			encodedAuth = ctnr.PullOptions.RegistryAuth
  1001  		}
  1002  	}
  1003  
  1004  	// retrieve auth config from encoded auth
  1005  	authConfig := &apitypes.AuthConfig{}
  1006  	if encodedAuth != "" {
  1007  		if err := json.NewDecoder(base64.NewDecoder(base64.URLEncoding, strings.NewReader(encodedAuth))).Decode(authConfig); err != nil {
  1008  			logrus.Warnf("invalid authconfig: %v", err)
  1009  		}
  1010  	}
  1011  
  1012  	resp := &apitypes.ServiceUpdateResponse{}
  1013  
  1014  	// pin image by digest
  1015  	if os.Getenv("DOCKER_SERVICE_PREFER_OFFLINE_IMAGE") != "1" {
  1016  		digestImage, err := c.imageWithDigestString(ctx, newCtnr.Image, authConfig)
  1017  		if err != nil {
  1018  			logrus.Warnf("unable to pin image %s to digest: %s", newCtnr.Image, err.Error())
  1019  			resp.Warnings = append(resp.Warnings, fmt.Sprintf("unable to pin image %s to digest: %s", newCtnr.Image, err.Error()))
  1020  		} else if newCtnr.Image != digestImage {
  1021  			logrus.Debugf("pinning image %s by digest: %s", newCtnr.Image, digestImage)
  1022  			newCtnr.Image = digestImage
  1023  		}
  1024  	}
  1025  
  1026  	_, err = state.controlClient.UpdateService(
  1027  		ctx,
  1028  		&swarmapi.UpdateServiceRequest{
  1029  			ServiceID: currentService.ID,
  1030  			Spec:      &serviceSpec,
  1031  			ServiceVersion: &swarmapi.Version{
  1032  				Index: version,
  1033  			},
  1034  		},
  1035  	)
  1036  
  1037  	return resp, err
  1038  }
  1039  
  1040  // RemoveService removes a service from a managed swarm cluster.
  1041  func (c *Cluster) RemoveService(input string) error {
  1042  	c.mu.RLock()
  1043  	defer c.mu.RUnlock()
  1044  
  1045  	state := c.currentNodeState()
  1046  	if !state.IsActiveManager() {
  1047  		return c.errNoManager(state)
  1048  	}
  1049  
  1050  	ctx, cancel := c.getRequestContext()
  1051  	defer cancel()
  1052  
  1053  	service, err := getService(ctx, state.controlClient, input)
  1054  	if err != nil {
  1055  		return err
  1056  	}
  1057  
  1058  	if _, err := state.controlClient.RemoveService(ctx, &swarmapi.RemoveServiceRequest{ServiceID: service.ID}); err != nil {
  1059  		return err
  1060  	}
  1061  	return nil
  1062  }
  1063  
  1064  // ServiceLogs collects service logs and writes them back to `config.OutStream`
  1065  func (c *Cluster) ServiceLogs(ctx context.Context, input string, config *backend.ContainerLogsConfig, started chan struct{}) error {
  1066  	c.mu.RLock()
  1067  	state := c.currentNodeState()
  1068  	if !state.IsActiveManager() {
  1069  		c.mu.RUnlock()
  1070  		return c.errNoManager(state)
  1071  	}
  1072  
  1073  	service, err := getService(ctx, state.controlClient, input)
  1074  	if err != nil {
  1075  		c.mu.RUnlock()
  1076  		return err
  1077  	}
  1078  
  1079  	stream, err := state.logsClient.SubscribeLogs(ctx, &swarmapi.SubscribeLogsRequest{
  1080  		Selector: &swarmapi.LogSelector{
  1081  			ServiceIDs: []string{service.ID},
  1082  		},
  1083  		Options: &swarmapi.LogSubscriptionOptions{
  1084  			Follow: config.Follow,
  1085  		},
  1086  	})
  1087  	if err != nil {
  1088  		c.mu.RUnlock()
  1089  		return err
  1090  	}
  1091  
  1092  	wf := ioutils.NewWriteFlusher(config.OutStream)
  1093  	defer wf.Close()
  1094  	close(started)
  1095  	wf.Flush()
  1096  
  1097  	outStream := stdcopy.NewStdWriter(wf, stdcopy.Stdout)
  1098  	errStream := stdcopy.NewStdWriter(wf, stdcopy.Stderr)
  1099  
  1100  	// Release the lock before starting the stream.
  1101  	c.mu.RUnlock()
  1102  	for {
  1103  		// Check the context before doing anything.
  1104  		select {
  1105  		case <-ctx.Done():
  1106  			return ctx.Err()
  1107  		default:
  1108  		}
  1109  
  1110  		subscribeMsg, err := stream.Recv()
  1111  		if err == io.EOF {
  1112  			return nil
  1113  		}
  1114  		if err != nil {
  1115  			return err
  1116  		}
  1117  
  1118  		for _, msg := range subscribeMsg.Messages {
  1119  			data := []byte{}
  1120  
  1121  			if config.Timestamps {
  1122  				ts, err := ptypes.Timestamp(msg.Timestamp)
  1123  				if err != nil {
  1124  					return err
  1125  				}
  1126  				data = append(data, []byte(ts.Format(logger.TimeFormat)+" ")...)
  1127  			}
  1128  
  1129  			data = append(data, []byte(fmt.Sprintf("%s.node.id=%s,%s.service.id=%s,%s.task.id=%s ",
  1130  				contextPrefix, msg.Context.NodeID,
  1131  				contextPrefix, msg.Context.ServiceID,
  1132  				contextPrefix, msg.Context.TaskID,
  1133  			))...)
  1134  
  1135  			data = append(data, msg.Data...)
  1136  
  1137  			switch msg.Stream {
  1138  			case swarmapi.LogStreamStdout:
  1139  				outStream.Write(data)
  1140  			case swarmapi.LogStreamStderr:
  1141  				errStream.Write(data)
  1142  			}
  1143  		}
  1144  	}
  1145  }
  1146  
  1147  // GetNodes returns a list of all nodes known to a cluster.
  1148  func (c *Cluster) GetNodes(options apitypes.NodeListOptions) ([]types.Node, error) {
  1149  	c.mu.RLock()
  1150  	defer c.mu.RUnlock()
  1151  
  1152  	state := c.currentNodeState()
  1153  	if !state.IsActiveManager() {
  1154  		return nil, c.errNoManager(state)
  1155  	}
  1156  
  1157  	filters, err := newListNodesFilters(options.Filters)
  1158  	if err != nil {
  1159  		return nil, err
  1160  	}
  1161  
  1162  	ctx, cancel := c.getRequestContext()
  1163  	defer cancel()
  1164  
  1165  	r, err := state.controlClient.ListNodes(
  1166  		ctx,
  1167  		&swarmapi.ListNodesRequest{Filters: filters})
  1168  	if err != nil {
  1169  		return nil, err
  1170  	}
  1171  
  1172  	nodes := []types.Node{}
  1173  
  1174  	for _, node := range r.Nodes {
  1175  		nodes = append(nodes, convert.NodeFromGRPC(*node))
  1176  	}
  1177  	return nodes, nil
  1178  }
  1179  
  1180  // GetNode returns a node based on an ID or name.
  1181  func (c *Cluster) GetNode(input string) (types.Node, error) {
  1182  	c.mu.RLock()
  1183  	defer c.mu.RUnlock()
  1184  
  1185  	state := c.currentNodeState()
  1186  	if !state.IsActiveManager() {
  1187  		return types.Node{}, c.errNoManager(state)
  1188  	}
  1189  
  1190  	ctx, cancel := c.getRequestContext()
  1191  	defer cancel()
  1192  
  1193  	node, err := getNode(ctx, state.controlClient, input)
  1194  	if err != nil {
  1195  		return types.Node{}, err
  1196  	}
  1197  	return convert.NodeFromGRPC(*node), nil
  1198  }
  1199  
  1200  // UpdateNode updates existing nodes properties.
  1201  func (c *Cluster) UpdateNode(input string, version uint64, spec types.NodeSpec) error {
  1202  	c.mu.RLock()
  1203  	defer c.mu.RUnlock()
  1204  
  1205  	state := c.currentNodeState()
  1206  	if !state.IsActiveManager() {
  1207  		return c.errNoManager(state)
  1208  	}
  1209  
  1210  	nodeSpec, err := convert.NodeSpecToGRPC(spec)
  1211  	if err != nil {
  1212  		return err
  1213  	}
  1214  
  1215  	ctx, cancel := c.getRequestContext()
  1216  	defer cancel()
  1217  
  1218  	currentNode, err := getNode(ctx, state.controlClient, input)
  1219  	if err != nil {
  1220  		return err
  1221  	}
  1222  
  1223  	_, err = state.controlClient.UpdateNode(
  1224  		ctx,
  1225  		&swarmapi.UpdateNodeRequest{
  1226  			NodeID: currentNode.ID,
  1227  			Spec:   &nodeSpec,
  1228  			NodeVersion: &swarmapi.Version{
  1229  				Index: version,
  1230  			},
  1231  		},
  1232  	)
  1233  	return err
  1234  }
  1235  
  1236  // RemoveNode removes a node from a cluster
  1237  func (c *Cluster) RemoveNode(input string, force bool) error {
  1238  	c.mu.RLock()
  1239  	defer c.mu.RUnlock()
  1240  
  1241  	state := c.currentNodeState()
  1242  	if !state.IsActiveManager() {
  1243  		return c.errNoManager(state)
  1244  	}
  1245  
  1246  	ctx, cancel := c.getRequestContext()
  1247  	defer cancel()
  1248  
  1249  	node, err := getNode(ctx, state.controlClient, input)
  1250  	if err != nil {
  1251  		return err
  1252  	}
  1253  
  1254  	if _, err := state.controlClient.RemoveNode(ctx, &swarmapi.RemoveNodeRequest{NodeID: node.ID, Force: force}); err != nil {
  1255  		return err
  1256  	}
  1257  	return nil
  1258  }
  1259  
  1260  // GetTasks returns a list of tasks matching the filter options.
  1261  func (c *Cluster) GetTasks(options apitypes.TaskListOptions) ([]types.Task, error) {
  1262  	c.mu.RLock()
  1263  	defer c.mu.RUnlock()
  1264  
  1265  	state := c.currentNodeState()
  1266  	if !state.IsActiveManager() {
  1267  		return nil, c.errNoManager(state)
  1268  	}
  1269  
  1270  	byName := func(filter filters.Args) error {
  1271  		if filter.Include("service") {
  1272  			serviceFilters := filter.Get("service")
  1273  			for _, serviceFilter := range serviceFilters {
  1274  				service, err := c.GetService(serviceFilter)
  1275  				if err != nil {
  1276  					return err
  1277  				}
  1278  				filter.Del("service", serviceFilter)
  1279  				filter.Add("service", service.ID)
  1280  			}
  1281  		}
  1282  		if filter.Include("node") {
  1283  			nodeFilters := filter.Get("node")
  1284  			for _, nodeFilter := range nodeFilters {
  1285  				node, err := c.GetNode(nodeFilter)
  1286  				if err != nil {
  1287  					return err
  1288  				}
  1289  				filter.Del("node", nodeFilter)
  1290  				filter.Add("node", node.ID)
  1291  			}
  1292  		}
  1293  		return nil
  1294  	}
  1295  
  1296  	filters, err := newListTasksFilters(options.Filters, byName)
  1297  	if err != nil {
  1298  		return nil, err
  1299  	}
  1300  
  1301  	ctx, cancel := c.getRequestContext()
  1302  	defer cancel()
  1303  
  1304  	r, err := state.controlClient.ListTasks(
  1305  		ctx,
  1306  		&swarmapi.ListTasksRequest{Filters: filters})
  1307  	if err != nil {
  1308  		return nil, err
  1309  	}
  1310  
  1311  	tasks := []types.Task{}
  1312  
  1313  	for _, task := range r.Tasks {
  1314  		if task.Spec.GetContainer() != nil {
  1315  			tasks = append(tasks, convert.TaskFromGRPC(*task))
  1316  		}
  1317  	}
  1318  	return tasks, nil
  1319  }
  1320  
  1321  // GetTask returns a task by an ID.
  1322  func (c *Cluster) GetTask(input string) (types.Task, error) {
  1323  	c.mu.RLock()
  1324  	defer c.mu.RUnlock()
  1325  
  1326  	state := c.currentNodeState()
  1327  	if !state.IsActiveManager() {
  1328  		return types.Task{}, c.errNoManager(state)
  1329  	}
  1330  
  1331  	ctx, cancel := c.getRequestContext()
  1332  	defer cancel()
  1333  
  1334  	task, err := getTask(ctx, state.controlClient, input)
  1335  	if err != nil {
  1336  		return types.Task{}, err
  1337  	}
  1338  	return convert.TaskFromGRPC(*task), nil
  1339  }
  1340  
  1341  // GetNetwork returns a cluster network by an ID.
  1342  func (c *Cluster) GetNetwork(input string) (apitypes.NetworkResource, error) {
  1343  	c.mu.RLock()
  1344  	defer c.mu.RUnlock()
  1345  
  1346  	state := c.currentNodeState()
  1347  	if !state.IsActiveManager() {
  1348  		return apitypes.NetworkResource{}, c.errNoManager(state)
  1349  	}
  1350  
  1351  	ctx, cancel := c.getRequestContext()
  1352  	defer cancel()
  1353  
  1354  	network, err := getNetwork(ctx, state.controlClient, input)
  1355  	if err != nil {
  1356  		return apitypes.NetworkResource{}, err
  1357  	}
  1358  	return convert.BasicNetworkFromGRPC(*network), nil
  1359  }
  1360  
  1361  // GetNetworks returns all current cluster managed networks.
  1362  func (c *Cluster) GetNetworks() ([]apitypes.NetworkResource, error) {
  1363  	c.mu.RLock()
  1364  	defer c.mu.RUnlock()
  1365  
  1366  	state := c.currentNodeState()
  1367  	if !state.IsActiveManager() {
  1368  		return nil, c.errNoManager(state)
  1369  	}
  1370  
  1371  	ctx, cancel := c.getRequestContext()
  1372  	defer cancel()
  1373  
  1374  	r, err := state.controlClient.ListNetworks(ctx, &swarmapi.ListNetworksRequest{})
  1375  	if err != nil {
  1376  		return nil, err
  1377  	}
  1378  
  1379  	var networks []apitypes.NetworkResource
  1380  
  1381  	for _, network := range r.Networks {
  1382  		networks = append(networks, convert.BasicNetworkFromGRPC(*network))
  1383  	}
  1384  
  1385  	return networks, nil
  1386  }
  1387  
  1388  func attacherKey(target, containerID string) string {
  1389  	return containerID + ":" + target
  1390  }
  1391  
  1392  // UpdateAttachment signals the attachment config to the attachment
  1393  // waiter who is trying to start or attach the container to the
  1394  // network.
  1395  func (c *Cluster) UpdateAttachment(target, containerID string, config *network.NetworkingConfig) error {
  1396  	c.mu.RLock()
  1397  	attacher, ok := c.attachers[attacherKey(target, containerID)]
  1398  	c.mu.RUnlock()
  1399  	if !ok || attacher == nil {
  1400  		return fmt.Errorf("could not find attacher for container %s to network %s", containerID, target)
  1401  	}
  1402  
  1403  	attacher.attachWaitCh <- config
  1404  	close(attacher.attachWaitCh)
  1405  	return nil
  1406  }
  1407  
  1408  // WaitForDetachment waits for the container to stop or detach from
  1409  // the network.
  1410  func (c *Cluster) WaitForDetachment(ctx context.Context, networkName, networkID, taskID, containerID string) error {
  1411  	c.mu.RLock()
  1412  	attacher, ok := c.attachers[attacherKey(networkName, containerID)]
  1413  	if !ok {
  1414  		attacher, ok = c.attachers[attacherKey(networkID, containerID)]
  1415  	}
  1416  	state := c.currentNodeState()
  1417  	if state.swarmNode == nil || state.swarmNode.Agent() == nil {
  1418  		c.mu.RUnlock()
  1419  		return fmt.Errorf("invalid cluster node while waiting for detachment")
  1420  	}
  1421  
  1422  	c.mu.RUnlock()
  1423  	agent := state.swarmNode.Agent()
  1424  	if ok && attacher != nil &&
  1425  		attacher.detachWaitCh != nil &&
  1426  		attacher.attachCompleteCh != nil {
  1427  		// Attachment may be in progress still so wait for
  1428  		// attachment to complete.
  1429  		select {
  1430  		case <-attacher.attachCompleteCh:
  1431  		case <-ctx.Done():
  1432  			return ctx.Err()
  1433  		}
  1434  
  1435  		if attacher.taskID == taskID {
  1436  			select {
  1437  			case <-attacher.detachWaitCh:
  1438  			case <-ctx.Done():
  1439  				return ctx.Err()
  1440  			}
  1441  		}
  1442  	}
  1443  
  1444  	return agent.ResourceAllocator().DetachNetwork(ctx, taskID)
  1445  }
  1446  
  1447  // AttachNetwork generates an attachment request towards the manager.
  1448  func (c *Cluster) AttachNetwork(target string, containerID string, addresses []string) (*network.NetworkingConfig, error) {
  1449  	aKey := attacherKey(target, containerID)
  1450  	c.mu.Lock()
  1451  	state := c.currentNodeState()
  1452  	if state.swarmNode == nil || state.swarmNode.Agent() == nil {
  1453  		c.mu.Unlock()
  1454  		return nil, fmt.Errorf("invalid cluster node while attaching to network")
  1455  	}
  1456  	if attacher, ok := c.attachers[aKey]; ok {
  1457  		c.mu.Unlock()
  1458  		return attacher.config, nil
  1459  	}
  1460  
  1461  	agent := state.swarmNode.Agent()
  1462  	attachWaitCh := make(chan *network.NetworkingConfig)
  1463  	detachWaitCh := make(chan struct{})
  1464  	attachCompleteCh := make(chan struct{})
  1465  	c.attachers[aKey] = &attacher{
  1466  		attachWaitCh:     attachWaitCh,
  1467  		attachCompleteCh: attachCompleteCh,
  1468  		detachWaitCh:     detachWaitCh,
  1469  	}
  1470  	c.mu.Unlock()
  1471  
  1472  	ctx, cancel := c.getRequestContext()
  1473  	defer cancel()
  1474  
  1475  	taskID, err := agent.ResourceAllocator().AttachNetwork(ctx, containerID, target, addresses)
  1476  	if err != nil {
  1477  		c.mu.Lock()
  1478  		delete(c.attachers, aKey)
  1479  		c.mu.Unlock()
  1480  		return nil, fmt.Errorf("Could not attach to network %s: %v", target, err)
  1481  	}
  1482  
  1483  	c.mu.Lock()
  1484  	c.attachers[aKey].taskID = taskID
  1485  	close(attachCompleteCh)
  1486  	c.mu.Unlock()
  1487  
  1488  	logrus.Debugf("Successfully attached to network %s with tid %s", target, taskID)
  1489  
  1490  	var config *network.NetworkingConfig
  1491  	select {
  1492  	case config = <-attachWaitCh:
  1493  	case <-ctx.Done():
  1494  		return nil, fmt.Errorf("attaching to network failed, make sure your network options are correct and check manager logs: %v", ctx.Err())
  1495  	}
  1496  
  1497  	c.mu.Lock()
  1498  	c.attachers[aKey].config = config
  1499  	c.mu.Unlock()
  1500  	return config, nil
  1501  }
  1502  
  1503  // DetachNetwork unblocks the waiters waiting on WaitForDetachment so
  1504  // that a request to detach can be generated towards the manager.
  1505  func (c *Cluster) DetachNetwork(target string, containerID string) error {
  1506  	aKey := attacherKey(target, containerID)
  1507  
  1508  	c.mu.Lock()
  1509  	attacher, ok := c.attachers[aKey]
  1510  	delete(c.attachers, aKey)
  1511  	c.mu.Unlock()
  1512  
  1513  	if !ok {
  1514  		return fmt.Errorf("could not find network attachment for container %s to network %s", containerID, target)
  1515  	}
  1516  
  1517  	close(attacher.detachWaitCh)
  1518  	return nil
  1519  }
  1520  
  1521  // CreateNetwork creates a new cluster managed network.
  1522  func (c *Cluster) CreateNetwork(s apitypes.NetworkCreateRequest) (string, error) {
  1523  	c.mu.RLock()
  1524  	defer c.mu.RUnlock()
  1525  
  1526  	state := c.currentNodeState()
  1527  	if !state.IsActiveManager() {
  1528  		return "", c.errNoManager(state)
  1529  	}
  1530  
  1531  	if runconfig.IsPreDefinedNetwork(s.Name) {
  1532  		err := fmt.Errorf("%s is a pre-defined network and cannot be created", s.Name)
  1533  		return "", apierrors.NewRequestForbiddenError(err)
  1534  	}
  1535  
  1536  	ctx, cancel := c.getRequestContext()
  1537  	defer cancel()
  1538  
  1539  	networkSpec := convert.BasicNetworkCreateToGRPC(s)
  1540  	r, err := state.controlClient.CreateNetwork(ctx, &swarmapi.CreateNetworkRequest{Spec: &networkSpec})
  1541  	if err != nil {
  1542  		return "", err
  1543  	}
  1544  
  1545  	return r.Network.ID, nil
  1546  }
  1547  
  1548  // RemoveNetwork removes a cluster network.
  1549  func (c *Cluster) RemoveNetwork(input string) error {
  1550  	c.mu.RLock()
  1551  	defer c.mu.RUnlock()
  1552  
  1553  	state := c.currentNodeState()
  1554  	if !state.IsActiveManager() {
  1555  		return c.errNoManager(state)
  1556  	}
  1557  
  1558  	ctx, cancel := c.getRequestContext()
  1559  	defer cancel()
  1560  
  1561  	network, err := getNetwork(ctx, state.controlClient, input)
  1562  	if err != nil {
  1563  		return err
  1564  	}
  1565  
  1566  	if _, err := state.controlClient.RemoveNetwork(ctx, &swarmapi.RemoveNetworkRequest{NetworkID: network.ID}); err != nil {
  1567  		return err
  1568  	}
  1569  	return nil
  1570  }
  1571  
  1572  func (c *Cluster) populateNetworkID(ctx context.Context, client swarmapi.ControlClient, s *types.ServiceSpec) error {
  1573  	// Always prefer NetworkAttachmentConfigs from TaskTemplate
  1574  	// but fallback to service spec for backward compatibility
  1575  	networks := s.TaskTemplate.Networks
  1576  	if len(networks) == 0 {
  1577  		networks = s.Networks
  1578  	}
  1579  
  1580  	for i, n := range networks {
  1581  		apiNetwork, err := getNetwork(ctx, client, n.Target)
  1582  		if err != nil {
  1583  			if ln, _ := c.config.Backend.FindNetwork(n.Target); ln != nil && !ln.Info().Dynamic() {
  1584  				err = fmt.Errorf("The network %s cannot be used with services. Only networks scoped to the swarm can be used, such as those created with the overlay driver.", ln.Name())
  1585  				return apierrors.NewRequestForbiddenError(err)
  1586  			}
  1587  			return err
  1588  		}
  1589  		networks[i].Target = apiNetwork.ID
  1590  	}
  1591  	return nil
  1592  }
  1593  
  1594  // Cleanup stops active swarm node. This is run before daemon shutdown.
  1595  func (c *Cluster) Cleanup() {
  1596  	c.controlMutex.Lock()
  1597  	defer c.controlMutex.Unlock()
  1598  
  1599  	c.mu.Lock()
  1600  	node := c.nr
  1601  	if node == nil {
  1602  		c.mu.Unlock()
  1603  		return
  1604  	}
  1605  	defer c.mu.Unlock()
  1606  	state := c.currentNodeState()
  1607  	if state.IsActiveManager() {
  1608  		active, reachable, unreachable, err := managerStats(state.controlClient, state.NodeID())
  1609  		if err == nil {
  1610  			singlenode := active && isLastManager(reachable, unreachable)
  1611  			if active && !singlenode && removingManagerCausesLossOfQuorum(reachable, unreachable) {
  1612  				logrus.Errorf("Leaving cluster with %v managers left out of %v. Raft quorum will be lost.", reachable-1, reachable+unreachable)
  1613  			}
  1614  		}
  1615  	}
  1616  	if err := node.Stop(); err != nil {
  1617  		logrus.Errorf("failed to shut down cluster node: %v", err)
  1618  		signal.DumpStacks("")
  1619  	}
  1620  	c.nr = nil
  1621  }
  1622  
  1623  func managerStats(client swarmapi.ControlClient, currentNodeID string) (current bool, reachable int, unreachable int, err error) {
  1624  	ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
  1625  	defer cancel()
  1626  	nodes, err := client.ListNodes(ctx, &swarmapi.ListNodesRequest{})
  1627  	if err != nil {
  1628  		return false, 0, 0, err
  1629  	}
  1630  	for _, n := range nodes.Nodes {
  1631  		if n.ManagerStatus != nil {
  1632  			if n.ManagerStatus.Reachability == swarmapi.RaftMemberStatus_REACHABLE {
  1633  				reachable++
  1634  				if n.ID == currentNodeID {
  1635  					current = true
  1636  				}
  1637  			}
  1638  			if n.ManagerStatus.Reachability == swarmapi.RaftMemberStatus_UNREACHABLE {
  1639  				unreachable++
  1640  			}
  1641  		}
  1642  	}
  1643  	return
  1644  }
  1645  
  1646  func validateAndSanitizeInitRequest(req *types.InitRequest) error {
  1647  	var err error
  1648  	req.ListenAddr, err = validateAddr(req.ListenAddr)
  1649  	if err != nil {
  1650  		return fmt.Errorf("invalid ListenAddr %q: %v", req.ListenAddr, err)
  1651  	}
  1652  
  1653  	if req.Spec.Annotations.Name == "" {
  1654  		req.Spec.Annotations.Name = "default"
  1655  	} else if req.Spec.Annotations.Name != "default" {
  1656  		return errors.New(`swarm spec must be named "default"`)
  1657  	}
  1658  
  1659  	return nil
  1660  }
  1661  
  1662  func validateAndSanitizeJoinRequest(req *types.JoinRequest) error {
  1663  	var err error
  1664  	req.ListenAddr, err = validateAddr(req.ListenAddr)
  1665  	if err != nil {
  1666  		return fmt.Errorf("invalid ListenAddr %q: %v", req.ListenAddr, err)
  1667  	}
  1668  	if len(req.RemoteAddrs) == 0 {
  1669  		return fmt.Errorf("at least 1 RemoteAddr is required to join")
  1670  	}
  1671  	for i := range req.RemoteAddrs {
  1672  		req.RemoteAddrs[i], err = validateAddr(req.RemoteAddrs[i])
  1673  		if err != nil {
  1674  			return fmt.Errorf("invalid remoteAddr %q: %v", req.RemoteAddrs[i], err)
  1675  		}
  1676  	}
  1677  	return nil
  1678  }
  1679  
  1680  func validateAddr(addr string) (string, error) {
  1681  	if addr == "" {
  1682  		return addr, fmt.Errorf("invalid empty address")
  1683  	}
  1684  	newaddr, err := opts.ParseTCPAddr(addr, defaultAddr)
  1685  	if err != nil {
  1686  		return addr, nil
  1687  	}
  1688  	return strings.TrimPrefix(newaddr, "tcp://"), nil
  1689  }
  1690  
  1691  func initClusterSpec(node *swarmnode.Node, spec types.Spec) error {
  1692  	ctx, _ := context.WithTimeout(context.Background(), 5*time.Second)
  1693  	for conn := range node.ListenControlSocket(ctx) {
  1694  		if ctx.Err() != nil {
  1695  			return ctx.Err()
  1696  		}
  1697  		if conn != nil {
  1698  			client := swarmapi.NewControlClient(conn)
  1699  			var cluster *swarmapi.Cluster
  1700  			for i := 0; ; i++ {
  1701  				lcr, err := client.ListClusters(ctx, &swarmapi.ListClustersRequest{})
  1702  				if err != nil {
  1703  					return fmt.Errorf("error on listing clusters: %v", err)
  1704  				}
  1705  				if len(lcr.Clusters) == 0 {
  1706  					if i < 10 {
  1707  						time.Sleep(200 * time.Millisecond)
  1708  						continue
  1709  					}
  1710  					return fmt.Errorf("empty list of clusters was returned")
  1711  				}
  1712  				cluster = lcr.Clusters[0]
  1713  				break
  1714  			}
  1715  			// In init, we take the initial default values from swarmkit, and merge
  1716  			// any non nil or 0 value from spec to GRPC spec. This will leave the
  1717  			// default value alone.
  1718  			// Note that this is different from Update(), as in Update() we expect
  1719  			// user to specify the complete spec of the cluster (as they already know
  1720  			// the existing one and knows which field to update)
  1721  			clusterSpec, err := convert.MergeSwarmSpecToGRPC(spec, cluster.Spec)
  1722  			if err != nil {
  1723  				return fmt.Errorf("error updating cluster settings: %v", err)
  1724  			}
  1725  			_, err = client.UpdateCluster(ctx, &swarmapi.UpdateClusterRequest{
  1726  				ClusterID:      cluster.ID,
  1727  				ClusterVersion: &cluster.Meta.Version,
  1728  				Spec:           &clusterSpec,
  1729  			})
  1730  			if err != nil {
  1731  				return fmt.Errorf("error updating cluster settings: %v", err)
  1732  			}
  1733  			return nil
  1734  		}
  1735  	}
  1736  	return ctx.Err()
  1737  }
  1738  
  1739  func detectLockedError(err error) error {
  1740  	if err == swarmnode.ErrInvalidUnlockKey {
  1741  		return errors.WithStack(errSwarmLocked)
  1742  	}
  1743  	return err
  1744  }