github.com/zhouyu0/docker-note@v0.0.0-20190722021225-b8d3825084db/daemon/cluster/cluster.go

github.com/zhouyu0/docker-note@v0.0.0-20190722021225-b8d3825084db/daemon/cluster/cluster.go (about)

     1  package cluster // import "github.com/docker/docker/daemon/cluster"
     2  
     3  //
     4  // ## Swarmkit integration
     5  //
     6  // Cluster - static configurable object for accessing everything swarm related.
     7  // Contains methods for connecting and controlling the cluster. Exists always,
     8  // even if swarm mode is not enabled.
     9  //
    10  // NodeRunner - Manager for starting the swarmkit node. Is present only and
    11  // always if swarm mode is enabled. Implements backoff restart loop in case of
    12  // errors.
    13  //
    14  // NodeState - Information about the current node status including access to
    15  // gRPC clients if a manager is active.
    16  //
    17  // ### Locking
    18  //
    19  // `cluster.controlMutex` - taken for the whole lifecycle of the processes that
    20  // can reconfigure cluster(init/join/leave etc). Protects that one
    21  // reconfiguration action has fully completed before another can start.
    22  //
    23  // `cluster.mu` - taken when the actual changes in cluster configurations
    24  // happen. Different from `controlMutex` because in some cases we need to
    25  // access current cluster state even if the long-running reconfiguration is
    26  // going on. For example network stack may ask for the current cluster state in
    27  // the middle of the shutdown. Any time current cluster state is asked you
    28  // should take the read lock of `cluster.mu`. If you are writing an API
    29  // responder that returns synchronously, hold `cluster.mu.RLock()` for the
    30  // duration of the whole handler function. That ensures that node will not be
    31  // shut down until the handler has finished.
    32  //
    33  // NodeRunner implements its internal locks that should not be used outside of
    34  // the struct. Instead, you should just call `nodeRunner.State()` method to get
    35  // the current state of the cluster(still need `cluster.mu.RLock()` to access
    36  // `cluster.nr` reference itself). Most of the changes in NodeRunner happen
    37  // because of an external event(network problem, unexpected swarmkit error) and
    38  // Docker shouldn't take any locks that delay these changes from happening.
    39  //
    40  
    41  import (
    42  	"context"
    43  	"fmt"
    44  	"math"
    45  	"net"
    46  	"os"
    47  	"path/filepath"
    48  	"sync"
    49  	"time"
    50  
    51  	"github.com/docker/docker/api/types/network"
    52  	types "github.com/docker/docker/api/types/swarm"
    53  	"github.com/docker/docker/daemon/cluster/controllers/plugin"
    54  	executorpkg "github.com/docker/docker/daemon/cluster/executor"
    55  	"github.com/docker/docker/pkg/signal"
    56  	lncluster "github.com/docker/libnetwork/cluster"
    57  	swarmapi "github.com/docker/swarmkit/api"
    58  	swarmnode "github.com/docker/swarmkit/node"
    59  	"github.com/pkg/errors"
    60  	"github.com/sirupsen/logrus"
    61  )
    62  
    63  const swarmDirName = "swarm"
    64  const controlSocket = "control.sock"
    65  const swarmConnectTimeout = 20 * time.Second
    66  const swarmRequestTimeout = 20 * time.Second
    67  const stateFile = "docker-state.json"
    68  const defaultAddr = "0.0.0.0:2377"
    69  
    70  const (
    71  	initialReconnectDelay          = 100 * time.Millisecond
    72  	maxReconnectDelay              = 30 * time.Second
    73  	contextPrefix                  = "com.docker.swarm"
    74  	defaultRecvSizeForListResponse = math.MaxInt32 // the max recv limit grpc <1.4.0
    75  )
    76  
    77  // NetworkSubnetsProvider exposes functions for retrieving the subnets
    78  // of networks managed by Docker, so they can be filtered.
    79  type NetworkSubnetsProvider interface {
    80  	Subnets() ([]net.IPNet, []net.IPNet)
    81  }
    82  
    83  // Config provides values for Cluster.
    84  type Config struct {
    85  	Root                   string
    86  	Name                   string
    87  	Backend                executorpkg.Backend
    88  	ImageBackend           executorpkg.ImageBackend
    89  	PluginBackend          plugin.Backend
    90  	VolumeBackend          executorpkg.VolumeBackend
    91  	NetworkSubnetsProvider NetworkSubnetsProvider
    92  
    93  	// DefaultAdvertiseAddr is the default host/IP or network interface to use
    94  	// if no AdvertiseAddr value is specified.
    95  	DefaultAdvertiseAddr string
    96  
    97  	// path to store runtime state, such as the swarm control socket
    98  	RuntimeRoot string
    99  
   100  	// WatchStream is a channel to pass watch API notifications to daemon
   101  	WatchStream chan *swarmapi.WatchMessage
   102  
   103  	// RaftHeartbeatTick is the number of ticks for heartbeat of quorum members
   104  	RaftHeartbeatTick uint32
   105  
   106  	// RaftElectionTick is the number of ticks to elapse before followers propose a new round of leader election
   107  	// This value should be 10x that of RaftHeartbeatTick
   108  	RaftElectionTick uint32
   109  }
   110  
   111  // Cluster provides capabilities to participate in a cluster as a worker or a
   112  // manager.
   113  type Cluster struct {
   114  	mu           sync.RWMutex
   115  	controlMutex sync.RWMutex // protect init/join/leave user operations
   116  	nr           *nodeRunner
   117  	root         string
   118  	runtimeRoot  string
   119  	config       Config
   120  	configEvent  chan lncluster.ConfigEventType // todo: make this array and goroutine safe
   121  	attachers    map[string]*attacher
   122  	watchStream  chan *swarmapi.WatchMessage
   123  }
   124  
   125  // attacher manages the in-memory attachment state of a container
   126  // attachment to a global scope network managed by swarm manager. It
   127  // helps in identifying the attachment ID via the taskID and the
   128  // corresponding attachment configuration obtained from the manager.
   129  type attacher struct {
   130  	taskID           string
   131  	config           *network.NetworkingConfig
   132  	inProgress       bool
   133  	attachWaitCh     chan *network.NetworkingConfig
   134  	attachCompleteCh chan struct{}
   135  	detachWaitCh     chan struct{}
   136  }
   137  
   138  // New creates a new Cluster instance using provided config.
   139  func New(config Config) (*Cluster, error) {
   140  	root := filepath.Join(config.Root, swarmDirName)
   141  	if err := os.MkdirAll(root, 0700); err != nil {
   142  		return nil, err
   143  	}
   144  	if config.RuntimeRoot == "" {
   145  		config.RuntimeRoot = root
   146  	}
   147  	if config.RaftHeartbeatTick == 0 {
   148  		config.RaftHeartbeatTick = 1
   149  	}
   150  	if config.RaftElectionTick == 0 {
   151  		// 10X heartbeat tick is the recommended ratio according to etcd docs.
   152  		config.RaftElectionTick = 10 * config.RaftHeartbeatTick
   153  	}
   154  
   155  	if err := os.MkdirAll(config.RuntimeRoot, 0700); err != nil {
   156  		return nil, err
   157  	}
   158  	c := &Cluster{
   159  		root:        root,
   160  		config:      config,
   161  		configEvent: make(chan lncluster.ConfigEventType, 10),
   162  		runtimeRoot: config.RuntimeRoot,
   163  		attachers:   make(map[string]*attacher),
   164  		watchStream: config.WatchStream,
   165  	}
   166  	return c, nil
   167  }
   168  
   169  // Start the Cluster instance
   170  // TODO The split between New and Start can be join again when the SendClusterEvent
   171  // method is no longer required
   172  func (c *Cluster) Start() error {
   173  	root := filepath.Join(c.config.Root, swarmDirName)
   174  
   175  	nodeConfig, err := loadPersistentState(root)
   176  	if err != nil {
   177  		if os.IsNotExist(err) {
   178  			return nil
   179  		}
   180  		return err
   181  	}
   182  
   183  	nr, err := c.newNodeRunner(*nodeConfig)
   184  	if err != nil {
   185  		return err
   186  	}
   187  	c.nr = nr
   188  
   189  	select {
   190  	case <-time.After(swarmConnectTimeout):
   191  		logrus.Error("swarm component could not be started before timeout was reached")
   192  	case err := <-nr.Ready():
   193  		if err != nil {
   194  			logrus.WithError(err).Error("swarm component could not be started")
   195  			return nil
   196  		}
   197  	}
   198  	return nil
   199  }
   200  
   201  func (c *Cluster) newNodeRunner(conf nodeStartConfig) (*nodeRunner, error) {
   202  	if err := c.config.Backend.IsSwarmCompatible(); err != nil {
   203  		return nil, err
   204  	}
   205  
   206  	actualLocalAddr := conf.LocalAddr
   207  	if actualLocalAddr == "" {
   208  		// If localAddr was not specified, resolve it automatically
   209  		// based on the route to joinAddr. localAddr can only be left
   210  		// empty on "join".
   211  		listenHost, _, err := net.SplitHostPort(conf.ListenAddr)
   212  		if err != nil {
   213  			return nil, fmt.Errorf("could not parse listen address: %v", err)
   214  		}
   215  
   216  		listenAddrIP := net.ParseIP(listenHost)
   217  		if listenAddrIP == nil || !listenAddrIP.IsUnspecified() {
   218  			actualLocalAddr = listenHost
   219  		} else {
   220  			if conf.RemoteAddr == "" {
   221  				// Should never happen except using swarms created by
   222  				// old versions that didn't save remoteAddr.
   223  				conf.RemoteAddr = "8.8.8.8:53"
   224  			}
   225  			conn, err := net.Dial("udp", conf.RemoteAddr)
   226  			if err != nil {
   227  				return nil, fmt.Errorf("could not find local IP address: %v", err)
   228  			}
   229  			localHostPort := conn.LocalAddr().String()
   230  			actualLocalAddr, _, _ = net.SplitHostPort(localHostPort)
   231  			conn.Close()
   232  		}
   233  	}
   234  
   235  	nr := &nodeRunner{cluster: c}
   236  	nr.actualLocalAddr = actualLocalAddr
   237  
   238  	if err := nr.Start(conf); err != nil {
   239  		return nil, err
   240  	}
   241  
   242  	c.config.Backend.DaemonJoinsCluster(c)
   243  
   244  	return nr, nil
   245  }
   246  
   247  func (c *Cluster) getRequestContext() (context.Context, func()) { // TODO: not needed when requests don't block on qourum lost
   248  	return context.WithTimeout(context.Background(), swarmRequestTimeout)
   249  }
   250  
   251  // IsManager returns true if Cluster is participating as a manager.
   252  func (c *Cluster) IsManager() bool {
   253  	c.mu.RLock()
   254  	defer c.mu.RUnlock()
   255  	return c.currentNodeState().IsActiveManager()
   256  }
   257  
   258  // IsAgent returns true if Cluster is participating as a worker/agent.
   259  func (c *Cluster) IsAgent() bool {
   260  	c.mu.RLock()
   261  	defer c.mu.RUnlock()
   262  	return c.currentNodeState().status == types.LocalNodeStateActive
   263  }
   264  
   265  // GetLocalAddress returns the local address.
   266  func (c *Cluster) GetLocalAddress() string {
   267  	c.mu.RLock()
   268  	defer c.mu.RUnlock()
   269  	return c.currentNodeState().actualLocalAddr
   270  }
   271  
   272  // GetListenAddress returns the listen address.
   273  func (c *Cluster) GetListenAddress() string {
   274  	c.mu.RLock()
   275  	defer c.mu.RUnlock()
   276  	if c.nr != nil {
   277  		return c.nr.config.ListenAddr
   278  	}
   279  	return ""
   280  }
   281  
   282  // GetAdvertiseAddress returns the remotely reachable address of this node.
   283  func (c *Cluster) GetAdvertiseAddress() string {
   284  	c.mu.RLock()
   285  	defer c.mu.RUnlock()
   286  	if c.nr != nil && c.nr.config.AdvertiseAddr != "" {
   287  		advertiseHost, _, _ := net.SplitHostPort(c.nr.config.AdvertiseAddr)
   288  		return advertiseHost
   289  	}
   290  	return c.currentNodeState().actualLocalAddr
   291  }
   292  
   293  // GetDataPathAddress returns the address to be used for the data path traffic, if specified.
   294  func (c *Cluster) GetDataPathAddress() string {
   295  	c.mu.RLock()
   296  	defer c.mu.RUnlock()
   297  	if c.nr != nil {
   298  		return c.nr.config.DataPathAddr
   299  	}
   300  	return ""
   301  }
   302  
   303  // GetRemoteAddressList returns the advertise address for each of the remote managers if
   304  // available.
   305  func (c *Cluster) GetRemoteAddressList() []string {
   306  	c.mu.RLock()
   307  	defer c.mu.RUnlock()
   308  	return c.getRemoteAddressList()
   309  }
   310  
   311  // GetWatchStream returns the channel to pass changes from store watch API
   312  func (c *Cluster) GetWatchStream() chan *swarmapi.WatchMessage {
   313  	c.mu.RLock()
   314  	defer c.mu.RUnlock()
   315  	return c.watchStream
   316  }
   317  
   318  func (c *Cluster) getRemoteAddressList() []string {
   319  	state := c.currentNodeState()
   320  	if state.swarmNode == nil {
   321  		return []string{}
   322  	}
   323  
   324  	nodeID := state.swarmNode.NodeID()
   325  	remotes := state.swarmNode.Remotes()
   326  	addressList := make([]string, 0, len(remotes))
   327  	for _, r := range remotes {
   328  		if r.NodeID != nodeID {
   329  			addressList = append(addressList, r.Addr)
   330  		}
   331  	}
   332  	return addressList
   333  }
   334  
   335  // ListenClusterEvents returns a channel that receives messages on cluster
   336  // participation changes.
   337  // todo: make cancelable and accessible to multiple callers
   338  func (c *Cluster) ListenClusterEvents() <-chan lncluster.ConfigEventType {
   339  	return c.configEvent
   340  }
   341  
   342  // currentNodeState should not be called without a read lock
   343  func (c *Cluster) currentNodeState() nodeState {
   344  	return c.nr.State()
   345  }
   346  
   347  // errNoManager returns error describing why manager commands can't be used.
   348  // Call with read lock.
   349  func (c *Cluster) errNoManager(st nodeState) error {
   350  	if st.swarmNode == nil {
   351  		if errors.Cause(st.err) == errSwarmLocked {
   352  			return errSwarmLocked
   353  		}
   354  		if st.err == errSwarmCertificatesExpired {
   355  			return errSwarmCertificatesExpired
   356  		}
   357  		return errors.WithStack(notAvailableError("This node is not a swarm manager. Use \"docker swarm init\" or \"docker swarm join\" to connect this node to swarm and try again."))
   358  	}
   359  	if st.swarmNode.Manager() != nil {
   360  		return errors.WithStack(notAvailableError("This node is not a swarm manager. Manager is being prepared or has trouble connecting to the cluster."))
   361  	}
   362  	return errors.WithStack(notAvailableError("This node is not a swarm manager. Worker nodes can't be used to view or modify cluster state. Please run this command on a manager node or promote the current node to a manager."))
   363  }
   364  
   365  // Cleanup stops active swarm node. This is run before daemon shutdown.
   366  func (c *Cluster) Cleanup() {
   367  	c.controlMutex.Lock()
   368  	defer c.controlMutex.Unlock()
   369  
   370  	c.mu.Lock()
   371  	node := c.nr
   372  	if node == nil {
   373  		c.mu.Unlock()
   374  		return
   375  	}
   376  	state := c.currentNodeState()
   377  	c.mu.Unlock()
   378  
   379  	if state.IsActiveManager() {
   380  		active, reachable, unreachable, err := managerStats(state.controlClient, state.NodeID())
   381  		if err == nil {
   382  			singlenode := active && isLastManager(reachable, unreachable)
   383  			if active && !singlenode && removingManagerCausesLossOfQuorum(reachable, unreachable) {
   384  				logrus.Errorf("Leaving cluster with %v managers left out of %v. Raft quorum will be lost.", reachable-1, reachable+unreachable)
   385  			}
   386  		}
   387  	}
   388  
   389  	if err := node.Stop(); err != nil {
   390  		logrus.Errorf("failed to shut down cluster node: %v", err)
   391  		signal.DumpStacks("")
   392  	}
   393  
   394  	c.mu.Lock()
   395  	c.nr = nil
   396  	c.mu.Unlock()
   397  }
   398  
   399  func managerStats(client swarmapi.ControlClient, currentNodeID string) (current bool, reachable int, unreachable int, err error) {
   400  	ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
   401  	defer cancel()
   402  	nodes, err := client.ListNodes(ctx, &swarmapi.ListNodesRequest{})
   403  	if err != nil {
   404  		return false, 0, 0, err
   405  	}
   406  	for _, n := range nodes.Nodes {
   407  		if n.ManagerStatus != nil {
   408  			if n.ManagerStatus.Reachability == swarmapi.RaftMemberStatus_REACHABLE {
   409  				reachable++
   410  				if n.ID == currentNodeID {
   411  					current = true
   412  				}
   413  			}
   414  			if n.ManagerStatus.Reachability == swarmapi.RaftMemberStatus_UNREACHABLE {
   415  				unreachable++
   416  			}
   417  		}
   418  	}
   419  	return
   420  }
   421  
   422  func detectLockedError(err error) error {
   423  	if err == swarmnode.ErrInvalidUnlockKey {
   424  		return errors.WithStack(errSwarmLocked)
   425  	}
   426  	return err
   427  }
   428  
   429  func (c *Cluster) lockedManagerAction(fn func(ctx context.Context, state nodeState) error) error {
   430  	c.mu.RLock()
   431  	defer c.mu.RUnlock()
   432  
   433  	state := c.currentNodeState()
   434  	if !state.IsActiveManager() {
   435  		return c.errNoManager(state)
   436  	}
   437  
   438  	ctx, cancel := c.getRequestContext()
   439  	defer cancel()
   440  
   441  	return fn(ctx, state)
   442  }
   443  
   444  // SendClusterEvent allows to send cluster events on the configEvent channel
   445  // TODO This method should not be exposed.
   446  // Currently it is used to notify the network controller that the keys are
   447  // available
   448  func (c *Cluster) SendClusterEvent(event lncluster.ConfigEventType) {
   449  	c.mu.RLock()
   450  	defer c.mu.RUnlock()
   451  	c.configEvent <- event
   452  }