github.com/docker/docker@v299999999.0.0-20200612211812-aaf470eca7b5+incompatible/daemon/cluster/cluster.go (about)

     1  package cluster // import "github.com/docker/docker/daemon/cluster"
     2  
     3  //
     4  // ## Swarmkit integration
     5  //
     6  // Cluster - static configurable object for accessing everything swarm related.
     7  // Contains methods for connecting and controlling the cluster. Exists always,
     8  // even if swarm mode is not enabled.
     9  //
    10  // NodeRunner - Manager for starting the swarmkit node. Is present only and
    11  // always if swarm mode is enabled. Implements backoff restart loop in case of
    12  // errors.
    13  //
    14  // NodeState - Information about the current node status including access to
    15  // gRPC clients if a manager is active.
    16  //
    17  // ### Locking
    18  //
    19  // `cluster.controlMutex` - taken for the whole lifecycle of the processes that
    20  // can reconfigure cluster(init/join/leave etc). Protects that one
    21  // reconfiguration action has fully completed before another can start.
    22  //
    23  // `cluster.mu` - taken when the actual changes in cluster configurations
    24  // happen. Different from `controlMutex` because in some cases we need to
    25  // access current cluster state even if the long-running reconfiguration is
    26  // going on. For example network stack may ask for the current cluster state in
    27  // the middle of the shutdown. Any time current cluster state is asked you
    28  // should take the read lock of `cluster.mu`. If you are writing an API
    29  // responder that returns synchronously, hold `cluster.mu.RLock()` for the
    30  // duration of the whole handler function. That ensures that node will not be
    31  // shut down until the handler has finished.
    32  //
    33  // NodeRunner implements its internal locks that should not be used outside of
    34  // the struct. Instead, you should just call `nodeRunner.State()` method to get
    35  // the current state of the cluster(still need `cluster.mu.RLock()` to access
    36  // `cluster.nr` reference itself). Most of the changes in NodeRunner happen
    37  // because of an external event(network problem, unexpected swarmkit error) and
    38  // Docker shouldn't take any locks that delay these changes from happening.
    39  //
    40  
    41  import (
    42  	"context"
    43  	"fmt"
    44  	"math"
    45  	"net"
    46  	"os"
    47  	"path/filepath"
    48  	"runtime"
    49  	"sync"
    50  	"time"
    51  
    52  	"github.com/docker/docker/api/types/network"
    53  	types "github.com/docker/docker/api/types/swarm"
    54  	"github.com/docker/docker/daemon/cluster/controllers/plugin"
    55  	executorpkg "github.com/docker/docker/daemon/cluster/executor"
    56  	"github.com/docker/docker/pkg/signal"
    57  	lncluster "github.com/docker/libnetwork/cluster"
    58  	swarmapi "github.com/docker/swarmkit/api"
    59  	swarmnode "github.com/docker/swarmkit/node"
    60  	"github.com/pkg/errors"
    61  	"github.com/sirupsen/logrus"
    62  	"google.golang.org/grpc"
    63  )
    64  
    65  const (
    66  	swarmDirName                   = "swarm"
    67  	controlSocket                  = "control.sock"
    68  	swarmConnectTimeout            = 20 * time.Second
    69  	swarmRequestTimeout            = 20 * time.Second
    70  	stateFile                      = "docker-state.json"
    71  	defaultAddr                    = "0.0.0.0:2377"
    72  	isWindows                      = runtime.GOOS == "windows"
    73  	initialReconnectDelay          = 100 * time.Millisecond
    74  	maxReconnectDelay              = 30 * time.Second
    75  	contextPrefix                  = "com.docker.swarm"
    76  	defaultRecvSizeForListResponse = math.MaxInt32 // the max recv limit grpc <1.4.0
    77  )
    78  
    79  // NetworkSubnetsProvider exposes functions for retrieving the subnets
    80  // of networks managed by Docker, so they can be filtered.
    81  type NetworkSubnetsProvider interface {
    82  	Subnets() ([]net.IPNet, []net.IPNet)
    83  }
    84  
    85  // Config provides values for Cluster.
    86  type Config struct {
    87  	Root                   string
    88  	Name                   string
    89  	Backend                executorpkg.Backend
    90  	ImageBackend           executorpkg.ImageBackend
    91  	PluginBackend          plugin.Backend
    92  	VolumeBackend          executorpkg.VolumeBackend
    93  	NetworkSubnetsProvider NetworkSubnetsProvider
    94  
    95  	// DefaultAdvertiseAddr is the default host/IP or network interface to use
    96  	// if no AdvertiseAddr value is specified.
    97  	DefaultAdvertiseAddr string
    98  
    99  	// path to store runtime state, such as the swarm control socket
   100  	RuntimeRoot string
   101  
   102  	// WatchStream is a channel to pass watch API notifications to daemon
   103  	WatchStream chan *swarmapi.WatchMessage
   104  
   105  	// RaftHeartbeatTick is the number of ticks for heartbeat of quorum members
   106  	RaftHeartbeatTick uint32
   107  
   108  	// RaftElectionTick is the number of ticks to elapse before followers propose a new round of leader election
   109  	// This value should be 10x that of RaftHeartbeatTick
   110  	RaftElectionTick uint32
   111  }
   112  
   113  // Cluster provides capabilities to participate in a cluster as a worker or a
   114  // manager.
   115  type Cluster struct {
   116  	mu           sync.RWMutex
   117  	controlMutex sync.RWMutex // protect init/join/leave user operations
   118  	nr           *nodeRunner
   119  	root         string
   120  	runtimeRoot  string
   121  	config       Config
   122  	configEvent  chan lncluster.ConfigEventType // todo: make this array and goroutine safe
   123  	attachers    map[string]*attacher
   124  	watchStream  chan *swarmapi.WatchMessage
   125  }
   126  
   127  // attacher manages the in-memory attachment state of a container
   128  // attachment to a global scope network managed by swarm manager. It
   129  // helps in identifying the attachment ID via the taskID and the
   130  // corresponding attachment configuration obtained from the manager.
   131  type attacher struct {
   132  	taskID           string
   133  	config           *network.NetworkingConfig
   134  	inProgress       bool
   135  	attachWaitCh     chan *network.NetworkingConfig
   136  	attachCompleteCh chan struct{}
   137  	detachWaitCh     chan struct{}
   138  }
   139  
   140  // New creates a new Cluster instance using provided config.
   141  func New(config Config) (*Cluster, error) {
   142  	root := filepath.Join(config.Root, swarmDirName)
   143  	if err := os.MkdirAll(root, 0700); err != nil {
   144  		return nil, err
   145  	}
   146  	if config.RuntimeRoot == "" {
   147  		config.RuntimeRoot = root
   148  	}
   149  	if config.RaftHeartbeatTick == 0 {
   150  		config.RaftHeartbeatTick = 1
   151  	}
   152  	if config.RaftElectionTick == 0 {
   153  		// 10X heartbeat tick is the recommended ratio according to etcd docs.
   154  		config.RaftElectionTick = 10 * config.RaftHeartbeatTick
   155  	}
   156  
   157  	if err := os.MkdirAll(config.RuntimeRoot, 0700); err != nil {
   158  		return nil, err
   159  	}
   160  	c := &Cluster{
   161  		root:        root,
   162  		config:      config,
   163  		configEvent: make(chan lncluster.ConfigEventType, 10),
   164  		runtimeRoot: config.RuntimeRoot,
   165  		attachers:   make(map[string]*attacher),
   166  		watchStream: config.WatchStream,
   167  	}
   168  	return c, nil
   169  }
   170  
   171  // Start the Cluster instance
   172  // TODO The split between New and Start can be join again when the SendClusterEvent
   173  // method is no longer required
   174  func (c *Cluster) Start() error {
   175  	root := filepath.Join(c.config.Root, swarmDirName)
   176  
   177  	nodeConfig, err := loadPersistentState(root)
   178  	if err != nil {
   179  		if os.IsNotExist(err) {
   180  			return nil
   181  		}
   182  		return err
   183  	}
   184  
   185  	nr, err := c.newNodeRunner(*nodeConfig)
   186  	if err != nil {
   187  		return err
   188  	}
   189  	c.nr = nr
   190  
   191  	timer := time.NewTimer(swarmConnectTimeout)
   192  	defer timer.Stop()
   193  
   194  	select {
   195  	case <-timer.C:
   196  		logrus.Error("swarm component could not be started before timeout was reached")
   197  	case err := <-nr.Ready():
   198  		if err != nil {
   199  			logrus.WithError(err).Error("swarm component could not be started")
   200  			return nil
   201  		}
   202  	}
   203  	return nil
   204  }
   205  
   206  func (c *Cluster) newNodeRunner(conf nodeStartConfig) (*nodeRunner, error) {
   207  	if err := c.config.Backend.IsSwarmCompatible(); err != nil {
   208  		return nil, err
   209  	}
   210  
   211  	actualLocalAddr := conf.LocalAddr
   212  	if actualLocalAddr == "" {
   213  		// If localAddr was not specified, resolve it automatically
   214  		// based on the route to joinAddr. localAddr can only be left
   215  		// empty on "join".
   216  		listenHost, _, err := net.SplitHostPort(conf.ListenAddr)
   217  		if err != nil {
   218  			return nil, fmt.Errorf("could not parse listen address: %v", err)
   219  		}
   220  
   221  		listenAddrIP := net.ParseIP(listenHost)
   222  		if listenAddrIP == nil || !listenAddrIP.IsUnspecified() {
   223  			actualLocalAddr = listenHost
   224  		} else {
   225  			if conf.RemoteAddr == "" {
   226  				// Should never happen except using swarms created by
   227  				// old versions that didn't save remoteAddr.
   228  				conf.RemoteAddr = "8.8.8.8:53"
   229  			}
   230  			conn, err := net.Dial("udp", conf.RemoteAddr)
   231  			if err != nil {
   232  				return nil, fmt.Errorf("could not find local IP address: %v", err)
   233  			}
   234  			localHostPort := conn.LocalAddr().String()
   235  			actualLocalAddr, _, _ = net.SplitHostPort(localHostPort)
   236  			conn.Close()
   237  		}
   238  	}
   239  
   240  	nr := &nodeRunner{cluster: c}
   241  	nr.actualLocalAddr = actualLocalAddr
   242  
   243  	if err := nr.Start(conf); err != nil {
   244  		return nil, err
   245  	}
   246  
   247  	c.config.Backend.DaemonJoinsCluster(c)
   248  
   249  	return nr, nil
   250  }
   251  
   252  func (c *Cluster) getRequestContext() (context.Context, func()) { // TODO: not needed when requests don't block on qourum lost
   253  	return context.WithTimeout(context.Background(), swarmRequestTimeout)
   254  }
   255  
   256  // IsManager returns true if Cluster is participating as a manager.
   257  func (c *Cluster) IsManager() bool {
   258  	c.mu.RLock()
   259  	defer c.mu.RUnlock()
   260  	return c.currentNodeState().IsActiveManager()
   261  }
   262  
   263  // IsAgent returns true if Cluster is participating as a worker/agent.
   264  func (c *Cluster) IsAgent() bool {
   265  	c.mu.RLock()
   266  	defer c.mu.RUnlock()
   267  	return c.currentNodeState().status == types.LocalNodeStateActive
   268  }
   269  
   270  // GetLocalAddress returns the local address.
   271  func (c *Cluster) GetLocalAddress() string {
   272  	c.mu.RLock()
   273  	defer c.mu.RUnlock()
   274  	return c.currentNodeState().actualLocalAddr
   275  }
   276  
   277  // GetListenAddress returns the listen address.
   278  func (c *Cluster) GetListenAddress() string {
   279  	c.mu.RLock()
   280  	defer c.mu.RUnlock()
   281  	if c.nr != nil {
   282  		return c.nr.config.ListenAddr
   283  	}
   284  	return ""
   285  }
   286  
   287  // GetAdvertiseAddress returns the remotely reachable address of this node.
   288  func (c *Cluster) GetAdvertiseAddress() string {
   289  	c.mu.RLock()
   290  	defer c.mu.RUnlock()
   291  	if c.nr != nil && c.nr.config.AdvertiseAddr != "" {
   292  		advertiseHost, _, _ := net.SplitHostPort(c.nr.config.AdvertiseAddr)
   293  		return advertiseHost
   294  	}
   295  	return c.currentNodeState().actualLocalAddr
   296  }
   297  
   298  // GetDataPathAddress returns the address to be used for the data path traffic, if specified.
   299  func (c *Cluster) GetDataPathAddress() string {
   300  	c.mu.RLock()
   301  	defer c.mu.RUnlock()
   302  	if c.nr != nil {
   303  		return c.nr.config.DataPathAddr
   304  	}
   305  	return ""
   306  }
   307  
   308  // GetRemoteAddressList returns the advertise address for each of the remote managers if
   309  // available.
   310  func (c *Cluster) GetRemoteAddressList() []string {
   311  	c.mu.RLock()
   312  	defer c.mu.RUnlock()
   313  	return c.getRemoteAddressList()
   314  }
   315  
   316  // GetWatchStream returns the channel to pass changes from store watch API
   317  func (c *Cluster) GetWatchStream() chan *swarmapi.WatchMessage {
   318  	c.mu.RLock()
   319  	defer c.mu.RUnlock()
   320  	return c.watchStream
   321  }
   322  
   323  func (c *Cluster) getRemoteAddressList() []string {
   324  	state := c.currentNodeState()
   325  	if state.swarmNode == nil {
   326  		return []string{}
   327  	}
   328  
   329  	nodeID := state.swarmNode.NodeID()
   330  	remotes := state.swarmNode.Remotes()
   331  	addressList := make([]string, 0, len(remotes))
   332  	for _, r := range remotes {
   333  		if r.NodeID != nodeID {
   334  			addressList = append(addressList, r.Addr)
   335  		}
   336  	}
   337  	return addressList
   338  }
   339  
   340  // ListenClusterEvents returns a channel that receives messages on cluster
   341  // participation changes.
   342  // todo: make cancelable and accessible to multiple callers
   343  func (c *Cluster) ListenClusterEvents() <-chan lncluster.ConfigEventType {
   344  	return c.configEvent
   345  }
   346  
   347  // currentNodeState should not be called without a read lock
   348  func (c *Cluster) currentNodeState() nodeState {
   349  	return c.nr.State()
   350  }
   351  
   352  // errNoManager returns error describing why manager commands can't be used.
   353  // Call with read lock.
   354  func (c *Cluster) errNoManager(st nodeState) error {
   355  	if st.swarmNode == nil {
   356  		if errors.Is(st.err, errSwarmLocked) {
   357  			return errSwarmLocked
   358  		}
   359  		if st.err == errSwarmCertificatesExpired {
   360  			return errSwarmCertificatesExpired
   361  		}
   362  		return errors.WithStack(notAvailableError("This node is not a swarm manager. Use \"docker swarm init\" or \"docker swarm join\" to connect this node to swarm and try again."))
   363  	}
   364  	if st.swarmNode.Manager() != nil {
   365  		return errors.WithStack(notAvailableError("This node is not a swarm manager. Manager is being prepared or has trouble connecting to the cluster."))
   366  	}
   367  	return errors.WithStack(notAvailableError("This node is not a swarm manager. Worker nodes can't be used to view or modify cluster state. Please run this command on a manager node or promote the current node to a manager."))
   368  }
   369  
   370  // Cleanup stops active swarm node. This is run before daemon shutdown.
   371  func (c *Cluster) Cleanup() {
   372  	c.controlMutex.Lock()
   373  	defer c.controlMutex.Unlock()
   374  
   375  	c.mu.Lock()
   376  	node := c.nr
   377  	if node == nil {
   378  		c.mu.Unlock()
   379  		return
   380  	}
   381  	state := c.currentNodeState()
   382  	c.mu.Unlock()
   383  
   384  	if state.IsActiveManager() {
   385  		active, reachable, unreachable, err := managerStats(state.controlClient, state.NodeID())
   386  		if err == nil {
   387  			singlenode := active && isLastManager(reachable, unreachable)
   388  			if active && !singlenode && removingManagerCausesLossOfQuorum(reachable, unreachable) {
   389  				logrus.Errorf("Leaving cluster with %v managers left out of %v. Raft quorum will be lost.", reachable-1, reachable+unreachable)
   390  			}
   391  		}
   392  	}
   393  
   394  	if err := node.Stop(); err != nil {
   395  		logrus.Errorf("failed to shut down cluster node: %v", err)
   396  		signal.DumpStacks("")
   397  	}
   398  
   399  	c.mu.Lock()
   400  	c.nr = nil
   401  	c.mu.Unlock()
   402  }
   403  
   404  func managerStats(client swarmapi.ControlClient, currentNodeID string) (current bool, reachable int, unreachable int, err error) {
   405  	ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
   406  	defer cancel()
   407  	nodes, err := client.ListNodes(
   408  		ctx, &swarmapi.ListNodesRequest{},
   409  		grpc.MaxCallRecvMsgSize(defaultRecvSizeForListResponse),
   410  	)
   411  	if err != nil {
   412  		return false, 0, 0, err
   413  	}
   414  	for _, n := range nodes.Nodes {
   415  		if n.ManagerStatus != nil {
   416  			if n.ManagerStatus.Reachability == swarmapi.RaftMemberStatus_REACHABLE {
   417  				reachable++
   418  				if n.ID == currentNodeID {
   419  					current = true
   420  				}
   421  			}
   422  			if n.ManagerStatus.Reachability == swarmapi.RaftMemberStatus_UNREACHABLE {
   423  				unreachable++
   424  			}
   425  		}
   426  	}
   427  	return
   428  }
   429  
   430  func detectLockedError(err error) error {
   431  	if err == swarmnode.ErrInvalidUnlockKey {
   432  		return errors.WithStack(errSwarmLocked)
   433  	}
   434  	return err
   435  }
   436  
   437  func (c *Cluster) lockedManagerAction(fn func(ctx context.Context, state nodeState) error) error {
   438  	c.mu.RLock()
   439  	defer c.mu.RUnlock()
   440  
   441  	state := c.currentNodeState()
   442  	if !state.IsActiveManager() {
   443  		return c.errNoManager(state)
   444  	}
   445  
   446  	ctx, cancel := c.getRequestContext()
   447  	defer cancel()
   448  
   449  	return fn(ctx, state)
   450  }
   451  
   452  // SendClusterEvent allows to send cluster events on the configEvent channel
   453  // TODO This method should not be exposed.
   454  // Currently it is used to notify the network controller that the keys are
   455  // available
   456  func (c *Cluster) SendClusterEvent(event lncluster.ConfigEventType) {
   457  	c.mu.RLock()
   458  	defer c.mu.RUnlock()
   459  	c.configEvent <- event
   460  }