github.com/rita33cool1/iot-system-gateway@v0.0.0-20200911033302-e65bde238cc5/docker-engine/daemon/cluster/swarm.go (about)

     1  package cluster // import "github.com/docker/docker/daemon/cluster"
     2  
     3  import (
     4  	"fmt"
     5  	"net"
     6  	"strings"
     7  	"time"
     8  
     9  	apitypes "github.com/docker/docker/api/types"
    10  	"github.com/docker/docker/api/types/filters"
    11  	types "github.com/docker/docker/api/types/swarm"
    12  	"github.com/docker/docker/daemon/cluster/convert"
    13  	"github.com/docker/docker/errdefs"
    14  	"github.com/docker/docker/opts"
    15  	"github.com/docker/docker/pkg/signal"
    16  	swarmapi "github.com/docker/swarmkit/api"
    17  	"github.com/docker/swarmkit/manager/encryption"
    18  	swarmnode "github.com/docker/swarmkit/node"
    19  	"github.com/pkg/errors"
    20  	"github.com/sirupsen/logrus"
    21  	"golang.org/x/net/context"
    22  )
    23  
    24  // Init initializes new cluster from user provided request.
    25  func (c *Cluster) Init(req types.InitRequest) (string, error) {
    26  	c.controlMutex.Lock()
    27  	defer c.controlMutex.Unlock()
    28  	if c.nr != nil {
    29  		if req.ForceNewCluster {
    30  
    31  			// Take c.mu temporarily to wait for presently running
    32  			// API handlers to finish before shutting down the node.
    33  			c.mu.Lock()
    34  			if !c.nr.nodeState.IsManager() {
    35  				return "", errSwarmNotManager
    36  			}
    37  			c.mu.Unlock()
    38  
    39  			if err := c.nr.Stop(); err != nil {
    40  				return "", err
    41  			}
    42  		} else {
    43  			return "", errSwarmExists
    44  		}
    45  	}
    46  
    47  	if err := validateAndSanitizeInitRequest(&req); err != nil {
    48  		return "", errdefs.InvalidParameter(err)
    49  	}
    50  
    51  	listenHost, listenPort, err := resolveListenAddr(req.ListenAddr)
    52  	if err != nil {
    53  		return "", err
    54  	}
    55  
    56  	advertiseHost, advertisePort, err := c.resolveAdvertiseAddr(req.AdvertiseAddr, listenPort)
    57  	if err != nil {
    58  		return "", err
    59  	}
    60  
    61  	dataPathAddr, err := resolveDataPathAddr(req.DataPathAddr)
    62  	if err != nil {
    63  		return "", err
    64  	}
    65  
    66  	localAddr := listenHost
    67  
    68  	// If the local address is undetermined, the advertise address
    69  	// will be used as local address, if it belongs to this system.
    70  	// If the advertise address is not local, then we try to find
    71  	// a system address to use as local address. If this fails,
    72  	// we give up and ask the user to pass the listen address.
    73  	if net.ParseIP(localAddr).IsUnspecified() {
    74  		advertiseIP := net.ParseIP(advertiseHost)
    75  
    76  		found := false
    77  		for _, systemIP := range listSystemIPs() {
    78  			if systemIP.Equal(advertiseIP) {
    79  				localAddr = advertiseIP.String()
    80  				found = true
    81  				break
    82  			}
    83  		}
    84  
    85  		if !found {
    86  			ip, err := c.resolveSystemAddr()
    87  			if err != nil {
    88  				logrus.Warnf("Could not find a local address: %v", err)
    89  				return "", errMustSpecifyListenAddr
    90  			}
    91  			localAddr = ip.String()
    92  		}
    93  	}
    94  
    95  	nr, err := c.newNodeRunner(nodeStartConfig{
    96  		forceNewCluster: req.ForceNewCluster,
    97  		autolock:        req.AutoLockManagers,
    98  		LocalAddr:       localAddr,
    99  		ListenAddr:      net.JoinHostPort(listenHost, listenPort),
   100  		AdvertiseAddr:   net.JoinHostPort(advertiseHost, advertisePort),
   101  		DataPathAddr:    dataPathAddr,
   102  		availability:    req.Availability,
   103  	})
   104  	if err != nil {
   105  		return "", err
   106  	}
   107  	c.mu.Lock()
   108  	c.nr = nr
   109  	c.mu.Unlock()
   110  
   111  	if err := <-nr.Ready(); err != nil {
   112  		c.mu.Lock()
   113  		c.nr = nil
   114  		c.mu.Unlock()
   115  		if !req.ForceNewCluster { // if failure on first attempt don't keep state
   116  			if err := clearPersistentState(c.root); err != nil {
   117  				return "", err
   118  			}
   119  		}
   120  		return "", err
   121  	}
   122  	state := nr.State()
   123  	if state.swarmNode == nil { // should never happen but protect from panic
   124  		return "", errors.New("invalid cluster state for spec initialization")
   125  	}
   126  	if err := initClusterSpec(state.swarmNode, req.Spec); err != nil {
   127  		return "", err
   128  	}
   129  	return state.NodeID(), nil
   130  }
   131  
   132  // Join makes current Cluster part of an existing swarm cluster.
   133  func (c *Cluster) Join(req types.JoinRequest) error {
   134  	c.controlMutex.Lock()
   135  	defer c.controlMutex.Unlock()
   136  	c.mu.Lock()
   137  	if c.nr != nil {
   138  		c.mu.Unlock()
   139  		return errors.WithStack(errSwarmExists)
   140  	}
   141  	c.mu.Unlock()
   142  
   143  	if err := validateAndSanitizeJoinRequest(&req); err != nil {
   144  		return errdefs.InvalidParameter(err)
   145  	}
   146  
   147  	listenHost, listenPort, err := resolveListenAddr(req.ListenAddr)
   148  	if err != nil {
   149  		return err
   150  	}
   151  
   152  	var advertiseAddr string
   153  	if req.AdvertiseAddr != "" {
   154  		advertiseHost, advertisePort, err := c.resolveAdvertiseAddr(req.AdvertiseAddr, listenPort)
   155  		// For joining, we don't need to provide an advertise address,
   156  		// since the remote side can detect it.
   157  		if err == nil {
   158  			advertiseAddr = net.JoinHostPort(advertiseHost, advertisePort)
   159  		}
   160  	}
   161  
   162  	dataPathAddr, err := resolveDataPathAddr(req.DataPathAddr)
   163  	if err != nil {
   164  		return err
   165  	}
   166  
   167  	nr, err := c.newNodeRunner(nodeStartConfig{
   168  		RemoteAddr:    req.RemoteAddrs[0],
   169  		ListenAddr:    net.JoinHostPort(listenHost, listenPort),
   170  		AdvertiseAddr: advertiseAddr,
   171  		DataPathAddr:  dataPathAddr,
   172  		joinAddr:      req.RemoteAddrs[0],
   173  		joinToken:     req.JoinToken,
   174  		availability:  req.Availability,
   175  	})
   176  	if err != nil {
   177  		return err
   178  	}
   179  
   180  	c.mu.Lock()
   181  	c.nr = nr
   182  	c.mu.Unlock()
   183  
   184  	select {
   185  	case <-time.After(swarmConnectTimeout):
   186  		return errSwarmJoinTimeoutReached
   187  	case err := <-nr.Ready():
   188  		if err != nil {
   189  			c.mu.Lock()
   190  			c.nr = nil
   191  			c.mu.Unlock()
   192  			if err := clearPersistentState(c.root); err != nil {
   193  				return err
   194  			}
   195  		}
   196  		return err
   197  	}
   198  }
   199  
   200  // Inspect retrieves the configuration properties of a managed swarm cluster.
   201  func (c *Cluster) Inspect() (types.Swarm, error) {
   202  	var swarm types.Swarm
   203  	if err := c.lockedManagerAction(func(ctx context.Context, state nodeState) error {
   204  		s, err := c.inspect(ctx, state)
   205  		if err != nil {
   206  			return err
   207  		}
   208  		swarm = s
   209  		return nil
   210  	}); err != nil {
   211  		return types.Swarm{}, err
   212  	}
   213  	return swarm, nil
   214  }
   215  
   216  func (c *Cluster) inspect(ctx context.Context, state nodeState) (types.Swarm, error) {
   217  	s, err := getSwarm(ctx, state.controlClient)
   218  	if err != nil {
   219  		return types.Swarm{}, err
   220  	}
   221  	return convert.SwarmFromGRPC(*s), nil
   222  }
   223  
   224  // Update updates configuration of a managed swarm cluster.
   225  func (c *Cluster) Update(version uint64, spec types.Spec, flags types.UpdateFlags) error {
   226  	return c.lockedManagerAction(func(ctx context.Context, state nodeState) error {
   227  		swarm, err := getSwarm(ctx, state.controlClient)
   228  		if err != nil {
   229  			return err
   230  		}
   231  
   232  		// Validate spec name.
   233  		if spec.Annotations.Name == "" {
   234  			spec.Annotations.Name = "default"
   235  		} else if spec.Annotations.Name != "default" {
   236  			return errdefs.InvalidParameter(errors.New(`swarm spec must be named "default"`))
   237  		}
   238  
   239  		// In update, client should provide the complete spec of the swarm, including
   240  		// Name and Labels. If a field is specified with 0 or nil, then the default value
   241  		// will be used to swarmkit.
   242  		clusterSpec, err := convert.SwarmSpecToGRPC(spec)
   243  		if err != nil {
   244  			return errdefs.InvalidParameter(err)
   245  		}
   246  
   247  		_, err = state.controlClient.UpdateCluster(
   248  			ctx,
   249  			&swarmapi.UpdateClusterRequest{
   250  				ClusterID: swarm.ID,
   251  				Spec:      &clusterSpec,
   252  				ClusterVersion: &swarmapi.Version{
   253  					Index: version,
   254  				},
   255  				Rotation: swarmapi.KeyRotation{
   256  					WorkerJoinToken:  flags.RotateWorkerToken,
   257  					ManagerJoinToken: flags.RotateManagerToken,
   258  					ManagerUnlockKey: flags.RotateManagerUnlockKey,
   259  				},
   260  			},
   261  		)
   262  		return err
   263  	})
   264  }
   265  
   266  // GetUnlockKey returns the unlock key for the swarm.
   267  func (c *Cluster) GetUnlockKey() (string, error) {
   268  	var resp *swarmapi.GetUnlockKeyResponse
   269  	if err := c.lockedManagerAction(func(ctx context.Context, state nodeState) error {
   270  		client := swarmapi.NewCAClient(state.grpcConn)
   271  
   272  		r, err := client.GetUnlockKey(ctx, &swarmapi.GetUnlockKeyRequest{})
   273  		if err != nil {
   274  			return err
   275  		}
   276  		resp = r
   277  		return nil
   278  	}); err != nil {
   279  		return "", err
   280  	}
   281  	if len(resp.UnlockKey) == 0 {
   282  		// no key
   283  		return "", nil
   284  	}
   285  	return encryption.HumanReadableKey(resp.UnlockKey), nil
   286  }
   287  
   288  // UnlockSwarm provides a key to decrypt data that is encrypted at rest.
   289  func (c *Cluster) UnlockSwarm(req types.UnlockRequest) error {
   290  	c.controlMutex.Lock()
   291  	defer c.controlMutex.Unlock()
   292  
   293  	c.mu.RLock()
   294  	state := c.currentNodeState()
   295  
   296  	if !state.IsActiveManager() {
   297  		// when manager is not active,
   298  		// unless it is locked, otherwise return error.
   299  		if err := c.errNoManager(state); err != errSwarmLocked {
   300  			c.mu.RUnlock()
   301  			return err
   302  		}
   303  	} else {
   304  		// when manager is active, return an error of "not locked"
   305  		c.mu.RUnlock()
   306  		return notLockedError{}
   307  	}
   308  
   309  	// only when swarm is locked, code running reaches here
   310  	nr := c.nr
   311  	c.mu.RUnlock()
   312  
   313  	key, err := encryption.ParseHumanReadableKey(req.UnlockKey)
   314  	if err != nil {
   315  		return errdefs.InvalidParameter(err)
   316  	}
   317  
   318  	config := nr.config
   319  	config.lockKey = key
   320  	if err := nr.Stop(); err != nil {
   321  		return err
   322  	}
   323  	nr, err = c.newNodeRunner(config)
   324  	if err != nil {
   325  		return err
   326  	}
   327  
   328  	c.mu.Lock()
   329  	c.nr = nr
   330  	c.mu.Unlock()
   331  
   332  	if err := <-nr.Ready(); err != nil {
   333  		if errors.Cause(err) == errSwarmLocked {
   334  			return invalidUnlockKey{}
   335  		}
   336  		return errors.Errorf("swarm component could not be started: %v", err)
   337  	}
   338  	return nil
   339  }
   340  
   341  // Leave shuts down Cluster and removes current state.
   342  func (c *Cluster) Leave(force bool) error {
   343  	c.controlMutex.Lock()
   344  	defer c.controlMutex.Unlock()
   345  
   346  	c.mu.Lock()
   347  	nr := c.nr
   348  	if nr == nil {
   349  		c.mu.Unlock()
   350  		return errors.WithStack(errNoSwarm)
   351  	}
   352  
   353  	state := c.currentNodeState()
   354  
   355  	c.mu.Unlock()
   356  
   357  	if errors.Cause(state.err) == errSwarmLocked && !force {
   358  		// leave a locked swarm without --force is not allowed
   359  		return errors.WithStack(notAvailableError("Swarm is encrypted and locked. Please unlock it first or use `--force` to ignore this message."))
   360  	}
   361  
   362  	if state.IsManager() && !force {
   363  		msg := "You are attempting to leave the swarm on a node that is participating as a manager. "
   364  		if state.IsActiveManager() {
   365  			active, reachable, unreachable, err := managerStats(state.controlClient, state.NodeID())
   366  			if err == nil {
   367  				if active && removingManagerCausesLossOfQuorum(reachable, unreachable) {
   368  					if isLastManager(reachable, unreachable) {
   369  						msg += "Removing the last manager erases all current state of the swarm. Use `--force` to ignore this message. "
   370  						return errors.WithStack(notAvailableError(msg))
   371  					}
   372  					msg += fmt.Sprintf("Removing this node leaves %v managers out of %v. Without a Raft quorum your swarm will be inaccessible. ", reachable-1, reachable+unreachable)
   373  				}
   374  			}
   375  		} else {
   376  			msg += "Doing so may lose the consensus of your cluster. "
   377  		}
   378  
   379  		msg += "The only way to restore a swarm that has lost consensus is to reinitialize it with `--force-new-cluster`. Use `--force` to suppress this message."
   380  		return errors.WithStack(notAvailableError(msg))
   381  	}
   382  	// release readers in here
   383  	if err := nr.Stop(); err != nil {
   384  		logrus.Errorf("failed to shut down cluster node: %v", err)
   385  		signal.DumpStacks("")
   386  		return err
   387  	}
   388  
   389  	c.mu.Lock()
   390  	c.nr = nil
   391  	c.mu.Unlock()
   392  
   393  	if nodeID := state.NodeID(); nodeID != "" {
   394  		nodeContainers, err := c.listContainerForNode(nodeID)
   395  		if err != nil {
   396  			return err
   397  		}
   398  		for _, id := range nodeContainers {
   399  			if err := c.config.Backend.ContainerRm(id, &apitypes.ContainerRmConfig{ForceRemove: true}); err != nil {
   400  				logrus.Errorf("error removing %v: %v", id, err)
   401  			}
   402  		}
   403  	}
   404  
   405  	// todo: cleanup optional?
   406  	if err := clearPersistentState(c.root); err != nil {
   407  		return err
   408  	}
   409  	c.config.Backend.DaemonLeavesCluster()
   410  	return nil
   411  }
   412  
   413  // Info returns information about the current cluster state.
   414  func (c *Cluster) Info() types.Info {
   415  	info := types.Info{
   416  		NodeAddr: c.GetAdvertiseAddress(),
   417  	}
   418  	c.mu.RLock()
   419  	defer c.mu.RUnlock()
   420  
   421  	state := c.currentNodeState()
   422  	info.LocalNodeState = state.status
   423  	if state.err != nil {
   424  		info.Error = state.err.Error()
   425  	}
   426  
   427  	ctx, cancel := c.getRequestContext()
   428  	defer cancel()
   429  
   430  	if state.IsActiveManager() {
   431  		info.ControlAvailable = true
   432  		swarm, err := c.inspect(ctx, state)
   433  		if err != nil {
   434  			info.Error = err.Error()
   435  		}
   436  
   437  		info.Cluster = &swarm.ClusterInfo
   438  
   439  		if r, err := state.controlClient.ListNodes(ctx, &swarmapi.ListNodesRequest{}); err != nil {
   440  			info.Error = err.Error()
   441  		} else {
   442  			info.Nodes = len(r.Nodes)
   443  			for _, n := range r.Nodes {
   444  				if n.ManagerStatus != nil {
   445  					info.Managers = info.Managers + 1
   446  				}
   447  			}
   448  		}
   449  	}
   450  
   451  	if state.swarmNode != nil {
   452  		for _, r := range state.swarmNode.Remotes() {
   453  			info.RemoteManagers = append(info.RemoteManagers, types.Peer{NodeID: r.NodeID, Addr: r.Addr})
   454  		}
   455  		info.NodeID = state.swarmNode.NodeID()
   456  	}
   457  
   458  	return info
   459  }
   460  
   461  func validateAndSanitizeInitRequest(req *types.InitRequest) error {
   462  	var err error
   463  	req.ListenAddr, err = validateAddr(req.ListenAddr)
   464  	if err != nil {
   465  		return fmt.Errorf("invalid ListenAddr %q: %v", req.ListenAddr, err)
   466  	}
   467  
   468  	if req.Spec.Annotations.Name == "" {
   469  		req.Spec.Annotations.Name = "default"
   470  	} else if req.Spec.Annotations.Name != "default" {
   471  		return errors.New(`swarm spec must be named "default"`)
   472  	}
   473  
   474  	return nil
   475  }
   476  
   477  func validateAndSanitizeJoinRequest(req *types.JoinRequest) error {
   478  	var err error
   479  	req.ListenAddr, err = validateAddr(req.ListenAddr)
   480  	if err != nil {
   481  		return fmt.Errorf("invalid ListenAddr %q: %v", req.ListenAddr, err)
   482  	}
   483  	if len(req.RemoteAddrs) == 0 {
   484  		return errors.New("at least 1 RemoteAddr is required to join")
   485  	}
   486  	for i := range req.RemoteAddrs {
   487  		req.RemoteAddrs[i], err = validateAddr(req.RemoteAddrs[i])
   488  		if err != nil {
   489  			return fmt.Errorf("invalid remoteAddr %q: %v", req.RemoteAddrs[i], err)
   490  		}
   491  	}
   492  	return nil
   493  }
   494  
   495  func validateAddr(addr string) (string, error) {
   496  	if addr == "" {
   497  		return addr, errors.New("invalid empty address")
   498  	}
   499  	newaddr, err := opts.ParseTCPAddr(addr, defaultAddr)
   500  	if err != nil {
   501  		return addr, nil
   502  	}
   503  	return strings.TrimPrefix(newaddr, "tcp://"), nil
   504  }
   505  
   506  func initClusterSpec(node *swarmnode.Node, spec types.Spec) error {
   507  	ctx, _ := context.WithTimeout(context.Background(), 5*time.Second)
   508  	for conn := range node.ListenControlSocket(ctx) {
   509  		if ctx.Err() != nil {
   510  			return ctx.Err()
   511  		}
   512  		if conn != nil {
   513  			client := swarmapi.NewControlClient(conn)
   514  			var cluster *swarmapi.Cluster
   515  			for i := 0; ; i++ {
   516  				lcr, err := client.ListClusters(ctx, &swarmapi.ListClustersRequest{})
   517  				if err != nil {
   518  					return fmt.Errorf("error on listing clusters: %v", err)
   519  				}
   520  				if len(lcr.Clusters) == 0 {
   521  					if i < 10 {
   522  						time.Sleep(200 * time.Millisecond)
   523  						continue
   524  					}
   525  					return errors.New("empty list of clusters was returned")
   526  				}
   527  				cluster = lcr.Clusters[0]
   528  				break
   529  			}
   530  			// In init, we take the initial default values from swarmkit, and merge
   531  			// any non nil or 0 value from spec to GRPC spec. This will leave the
   532  			// default value alone.
   533  			// Note that this is different from Update(), as in Update() we expect
   534  			// user to specify the complete spec of the cluster (as they already know
   535  			// the existing one and knows which field to update)
   536  			clusterSpec, err := convert.MergeSwarmSpecToGRPC(spec, cluster.Spec)
   537  			if err != nil {
   538  				return fmt.Errorf("error updating cluster settings: %v", err)
   539  			}
   540  			_, err = client.UpdateCluster(ctx, &swarmapi.UpdateClusterRequest{
   541  				ClusterID:      cluster.ID,
   542  				ClusterVersion: &cluster.Meta.Version,
   543  				Spec:           &clusterSpec,
   544  			})
   545  			if err != nil {
   546  				return fmt.Errorf("error updating cluster settings: %v", err)
   547  			}
   548  			return nil
   549  		}
   550  	}
   551  	return ctx.Err()
   552  }
   553  
   554  func (c *Cluster) listContainerForNode(nodeID string) ([]string, error) {
   555  	var ids []string
   556  	filters := filters.NewArgs()
   557  	filters.Add("label", fmt.Sprintf("com.docker.swarm.node.id=%s", nodeID))
   558  	containers, err := c.config.Backend.Containers(&apitypes.ContainerListOptions{
   559  		Filters: filters,
   560  	})
   561  	if err != nil {
   562  		return []string{}, err
   563  	}
   564  	for _, c := range containers {
   565  		ids = append(ids, c.ID)
   566  	}
   567  	return ids, nil
   568  }