github.com/rhatdan/docker@v0.7.7-0.20180119204836-47a0dcbcd20a/daemon/cluster/cluster.go (about)

     1  package cluster
     2  
     3  //
     4  // ## Swarmkit integration
     5  //
     6  // Cluster - static configurable object for accessing everything swarm related.
     7  // Contains methods for connecting and controlling the cluster. Exists always,
     8  // even if swarm mode is not enabled.
     9  //
    10  // NodeRunner - Manager for starting the swarmkit node. Is present only and
    11  // always if swarm mode is enabled. Implements backoff restart loop in case of
    12  // errors.
    13  //
    14  // NodeState - Information about the current node status including access to
    15  // gRPC clients if a manager is active.
    16  //
    17  // ### Locking
    18  //
    19  // `cluster.controlMutex` - taken for the whole lifecycle of the processes that
    20  // can reconfigure cluster(init/join/leave etc). Protects that one
    21  // reconfiguration action has fully completed before another can start.
    22  //
    23  // `cluster.mu` - taken when the actual changes in cluster configurations
    24  // happen. Different from `controlMutex` because in some cases we need to
    25  // access current cluster state even if the long-running reconfiguration is
    26  // going on. For example network stack may ask for the current cluster state in
    27  // the middle of the shutdown. Any time current cluster state is asked you
    28  // should take the read lock of `cluster.mu`. If you are writing an API
    29  // responder that returns synchronously, hold `cluster.mu.RLock()` for the
    30  // duration of the whole handler function. That ensures that node will not be
    31  // shut down until the handler has finished.
    32  //
    33  // NodeRunner implements its internal locks that should not be used outside of
    34  // the struct. Instead, you should just call `nodeRunner.State()` method to get
    35  // the current state of the cluster(still need `cluster.mu.RLock()` to access
    36  // `cluster.nr` reference itself). Most of the changes in NodeRunner happen
    37  // because of an external event(network problem, unexpected swarmkit error) and
    38  // Docker shouldn't take any locks that delay these changes from happening.
    39  //
    40  
    41  import (
    42  	"fmt"
    43  	"net"
    44  	"os"
    45  	"path/filepath"
    46  	"sync"
    47  	"time"
    48  
    49  	"github.com/docker/docker/api/types/network"
    50  	types "github.com/docker/docker/api/types/swarm"
    51  	"github.com/docker/docker/daemon/cluster/controllers/plugin"
    52  	executorpkg "github.com/docker/docker/daemon/cluster/executor"
    53  	"github.com/docker/docker/pkg/signal"
    54  	lncluster "github.com/docker/libnetwork/cluster"
    55  	swarmapi "github.com/docker/swarmkit/api"
    56  	swarmnode "github.com/docker/swarmkit/node"
    57  	"github.com/pkg/errors"
    58  	"github.com/sirupsen/logrus"
    59  	"golang.org/x/net/context"
    60  )
    61  
    62  const swarmDirName = "swarm"
    63  const controlSocket = "control.sock"
    64  const swarmConnectTimeout = 20 * time.Second
    65  const swarmRequestTimeout = 20 * time.Second
    66  const stateFile = "docker-state.json"
    67  const defaultAddr = "0.0.0.0:2377"
    68  
    69  const (
    70  	initialReconnectDelay = 100 * time.Millisecond
    71  	maxReconnectDelay     = 30 * time.Second
    72  	contextPrefix         = "com.docker.swarm"
    73  )
    74  
    75  // NetworkSubnetsProvider exposes functions for retrieving the subnets
    76  // of networks managed by Docker, so they can be filtered.
    77  type NetworkSubnetsProvider interface {
    78  	Subnets() ([]net.IPNet, []net.IPNet)
    79  }
    80  
    81  // Config provides values for Cluster.
    82  type Config struct {
    83  	Root                   string
    84  	Name                   string
    85  	Backend                executorpkg.Backend
    86  	PluginBackend          plugin.Backend
    87  	NetworkSubnetsProvider NetworkSubnetsProvider
    88  
    89  	// DefaultAdvertiseAddr is the default host/IP or network interface to use
    90  	// if no AdvertiseAddr value is specified.
    91  	DefaultAdvertiseAddr string
    92  
    93  	// path to store runtime state, such as the swarm control socket
    94  	RuntimeRoot string
    95  
    96  	// WatchStream is a channel to pass watch API notifications to daemon
    97  	WatchStream chan *swarmapi.WatchMessage
    98  }
    99  
   100  // Cluster provides capabilities to participate in a cluster as a worker or a
   101  // manager.
   102  type Cluster struct {
   103  	mu           sync.RWMutex
   104  	controlMutex sync.RWMutex // protect init/join/leave user operations
   105  	nr           *nodeRunner
   106  	root         string
   107  	runtimeRoot  string
   108  	config       Config
   109  	configEvent  chan lncluster.ConfigEventType // todo: make this array and goroutine safe
   110  	attachers    map[string]*attacher
   111  	watchStream  chan *swarmapi.WatchMessage
   112  }
   113  
   114  // attacher manages the in-memory attachment state of a container
   115  // attachment to a global scope network managed by swarm manager. It
   116  // helps in identifying the attachment ID via the taskID and the
   117  // corresponding attachment configuration obtained from the manager.
   118  type attacher struct {
   119  	taskID           string
   120  	config           *network.NetworkingConfig
   121  	inProgress       bool
   122  	attachWaitCh     chan *network.NetworkingConfig
   123  	attachCompleteCh chan struct{}
   124  	detachWaitCh     chan struct{}
   125  }
   126  
   127  // New creates a new Cluster instance using provided config.
   128  func New(config Config) (*Cluster, error) {
   129  	root := filepath.Join(config.Root, swarmDirName)
   130  	if err := os.MkdirAll(root, 0700); err != nil {
   131  		return nil, err
   132  	}
   133  	if config.RuntimeRoot == "" {
   134  		config.RuntimeRoot = root
   135  	}
   136  	if err := os.MkdirAll(config.RuntimeRoot, 0700); err != nil {
   137  		return nil, err
   138  	}
   139  	c := &Cluster{
   140  		root:        root,
   141  		config:      config,
   142  		configEvent: make(chan lncluster.ConfigEventType, 10),
   143  		runtimeRoot: config.RuntimeRoot,
   144  		attachers:   make(map[string]*attacher),
   145  		watchStream: config.WatchStream,
   146  	}
   147  	return c, nil
   148  }
   149  
   150  // Start the Cluster instance
   151  // TODO The split between New and Start can be join again when the SendClusterEvent
   152  // method is no longer required
   153  func (c *Cluster) Start() error {
   154  	root := filepath.Join(c.config.Root, swarmDirName)
   155  
   156  	nodeConfig, err := loadPersistentState(root)
   157  	if err != nil {
   158  		if os.IsNotExist(err) {
   159  			return nil
   160  		}
   161  		return err
   162  	}
   163  
   164  	nr, err := c.newNodeRunner(*nodeConfig)
   165  	if err != nil {
   166  		return err
   167  	}
   168  	c.nr = nr
   169  
   170  	select {
   171  	case <-time.After(swarmConnectTimeout):
   172  		logrus.Error("swarm component could not be started before timeout was reached")
   173  	case err := <-nr.Ready():
   174  		if err != nil {
   175  			logrus.WithError(err).Error("swarm component could not be started")
   176  			return nil
   177  		}
   178  	}
   179  	return nil
   180  }
   181  
   182  func (c *Cluster) newNodeRunner(conf nodeStartConfig) (*nodeRunner, error) {
   183  	if err := c.config.Backend.IsSwarmCompatible(); err != nil {
   184  		return nil, err
   185  	}
   186  
   187  	actualLocalAddr := conf.LocalAddr
   188  	if actualLocalAddr == "" {
   189  		// If localAddr was not specified, resolve it automatically
   190  		// based on the route to joinAddr. localAddr can only be left
   191  		// empty on "join".
   192  		listenHost, _, err := net.SplitHostPort(conf.ListenAddr)
   193  		if err != nil {
   194  			return nil, fmt.Errorf("could not parse listen address: %v", err)
   195  		}
   196  
   197  		listenAddrIP := net.ParseIP(listenHost)
   198  		if listenAddrIP == nil || !listenAddrIP.IsUnspecified() {
   199  			actualLocalAddr = listenHost
   200  		} else {
   201  			if conf.RemoteAddr == "" {
   202  				// Should never happen except using swarms created by
   203  				// old versions that didn't save remoteAddr.
   204  				conf.RemoteAddr = "8.8.8.8:53"
   205  			}
   206  			conn, err := net.Dial("udp", conf.RemoteAddr)
   207  			if err != nil {
   208  				return nil, fmt.Errorf("could not find local IP address: %v", err)
   209  			}
   210  			localHostPort := conn.LocalAddr().String()
   211  			actualLocalAddr, _, _ = net.SplitHostPort(localHostPort)
   212  			conn.Close()
   213  		}
   214  	}
   215  
   216  	nr := &nodeRunner{cluster: c}
   217  	nr.actualLocalAddr = actualLocalAddr
   218  
   219  	if err := nr.Start(conf); err != nil {
   220  		return nil, err
   221  	}
   222  
   223  	c.config.Backend.DaemonJoinsCluster(c)
   224  
   225  	return nr, nil
   226  }
   227  
   228  func (c *Cluster) getRequestContext() (context.Context, func()) { // TODO: not needed when requests don't block on qourum lost
   229  	return context.WithTimeout(context.Background(), swarmRequestTimeout)
   230  }
   231  
   232  // IsManager returns true if Cluster is participating as a manager.
   233  func (c *Cluster) IsManager() bool {
   234  	c.mu.RLock()
   235  	defer c.mu.RUnlock()
   236  	return c.currentNodeState().IsActiveManager()
   237  }
   238  
   239  // IsAgent returns true if Cluster is participating as a worker/agent.
   240  func (c *Cluster) IsAgent() bool {
   241  	c.mu.RLock()
   242  	defer c.mu.RUnlock()
   243  	return c.currentNodeState().status == types.LocalNodeStateActive
   244  }
   245  
   246  // GetLocalAddress returns the local address.
   247  func (c *Cluster) GetLocalAddress() string {
   248  	c.mu.RLock()
   249  	defer c.mu.RUnlock()
   250  	return c.currentNodeState().actualLocalAddr
   251  }
   252  
   253  // GetListenAddress returns the listen address.
   254  func (c *Cluster) GetListenAddress() string {
   255  	c.mu.RLock()
   256  	defer c.mu.RUnlock()
   257  	if c.nr != nil {
   258  		return c.nr.config.ListenAddr
   259  	}
   260  	return ""
   261  }
   262  
   263  // GetAdvertiseAddress returns the remotely reachable address of this node.
   264  func (c *Cluster) GetAdvertiseAddress() string {
   265  	c.mu.RLock()
   266  	defer c.mu.RUnlock()
   267  	if c.nr != nil && c.nr.config.AdvertiseAddr != "" {
   268  		advertiseHost, _, _ := net.SplitHostPort(c.nr.config.AdvertiseAddr)
   269  		return advertiseHost
   270  	}
   271  	return c.currentNodeState().actualLocalAddr
   272  }
   273  
   274  // GetDataPathAddress returns the address to be used for the data path traffic, if specified.
   275  func (c *Cluster) GetDataPathAddress() string {
   276  	c.mu.RLock()
   277  	defer c.mu.RUnlock()
   278  	if c.nr != nil {
   279  		return c.nr.config.DataPathAddr
   280  	}
   281  	return ""
   282  }
   283  
   284  // GetRemoteAddressList returns the advertise address for each of the remote managers if
   285  // available.
   286  func (c *Cluster) GetRemoteAddressList() []string {
   287  	c.mu.RLock()
   288  	defer c.mu.RUnlock()
   289  	return c.getRemoteAddressList()
   290  }
   291  
   292  func (c *Cluster) getRemoteAddressList() []string {
   293  	state := c.currentNodeState()
   294  	if state.swarmNode == nil {
   295  		return []string{}
   296  	}
   297  
   298  	nodeID := state.swarmNode.NodeID()
   299  	remotes := state.swarmNode.Remotes()
   300  	addressList := make([]string, 0, len(remotes))
   301  	for _, r := range remotes {
   302  		if r.NodeID != nodeID {
   303  			addressList = append(addressList, r.Addr)
   304  		}
   305  	}
   306  	return addressList
   307  }
   308  
   309  // ListenClusterEvents returns a channel that receives messages on cluster
   310  // participation changes.
   311  // todo: make cancelable and accessible to multiple callers
   312  func (c *Cluster) ListenClusterEvents() <-chan lncluster.ConfigEventType {
   313  	return c.configEvent
   314  }
   315  
   316  // currentNodeState should not be called without a read lock
   317  func (c *Cluster) currentNodeState() nodeState {
   318  	return c.nr.State()
   319  }
   320  
   321  // errNoManager returns error describing why manager commands can't be used.
   322  // Call with read lock.
   323  func (c *Cluster) errNoManager(st nodeState) error {
   324  	if st.swarmNode == nil {
   325  		if errors.Cause(st.err) == errSwarmLocked {
   326  			return errSwarmLocked
   327  		}
   328  		if st.err == errSwarmCertificatesExpired {
   329  			return errSwarmCertificatesExpired
   330  		}
   331  		return errors.WithStack(notAvailableError("This node is not a swarm manager. Use \"docker swarm init\" or \"docker swarm join\" to connect this node to swarm and try again."))
   332  	}
   333  	if st.swarmNode.Manager() != nil {
   334  		return errors.WithStack(notAvailableError("This node is not a swarm manager. Manager is being prepared or has trouble connecting to the cluster."))
   335  	}
   336  	return errors.WithStack(notAvailableError("This node is not a swarm manager. Worker nodes can't be used to view or modify cluster state. Please run this command on a manager node or promote the current node to a manager."))
   337  }
   338  
   339  // Cleanup stops active swarm node. This is run before daemon shutdown.
   340  func (c *Cluster) Cleanup() {
   341  	c.controlMutex.Lock()
   342  	defer c.controlMutex.Unlock()
   343  
   344  	c.mu.Lock()
   345  	node := c.nr
   346  	if node == nil {
   347  		c.mu.Unlock()
   348  		return
   349  	}
   350  	state := c.currentNodeState()
   351  	c.mu.Unlock()
   352  
   353  	if state.IsActiveManager() {
   354  		active, reachable, unreachable, err := managerStats(state.controlClient, state.NodeID())
   355  		if err == nil {
   356  			singlenode := active && isLastManager(reachable, unreachable)
   357  			if active && !singlenode && removingManagerCausesLossOfQuorum(reachable, unreachable) {
   358  				logrus.Errorf("Leaving cluster with %v managers left out of %v. Raft quorum will be lost.", reachable-1, reachable+unreachable)
   359  			}
   360  		}
   361  	}
   362  
   363  	if err := node.Stop(); err != nil {
   364  		logrus.Errorf("failed to shut down cluster node: %v", err)
   365  		signal.DumpStacks("")
   366  	}
   367  
   368  	c.mu.Lock()
   369  	c.nr = nil
   370  	c.mu.Unlock()
   371  }
   372  
   373  func managerStats(client swarmapi.ControlClient, currentNodeID string) (current bool, reachable int, unreachable int, err error) {
   374  	ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
   375  	defer cancel()
   376  	nodes, err := client.ListNodes(ctx, &swarmapi.ListNodesRequest{})
   377  	if err != nil {
   378  		return false, 0, 0, err
   379  	}
   380  	for _, n := range nodes.Nodes {
   381  		if n.ManagerStatus != nil {
   382  			if n.ManagerStatus.Reachability == swarmapi.RaftMemberStatus_REACHABLE {
   383  				reachable++
   384  				if n.ID == currentNodeID {
   385  					current = true
   386  				}
   387  			}
   388  			if n.ManagerStatus.Reachability == swarmapi.RaftMemberStatus_UNREACHABLE {
   389  				unreachable++
   390  			}
   391  		}
   392  	}
   393  	return
   394  }
   395  
   396  func detectLockedError(err error) error {
   397  	if err == swarmnode.ErrInvalidUnlockKey {
   398  		return errors.WithStack(errSwarmLocked)
   399  	}
   400  	return err
   401  }
   402  
   403  func (c *Cluster) lockedManagerAction(fn func(ctx context.Context, state nodeState) error) error {
   404  	c.mu.RLock()
   405  	defer c.mu.RUnlock()
   406  
   407  	state := c.currentNodeState()
   408  	if !state.IsActiveManager() {
   409  		return c.errNoManager(state)
   410  	}
   411  
   412  	ctx, cancel := c.getRequestContext()
   413  	defer cancel()
   414  
   415  	return fn(ctx, state)
   416  }
   417  
   418  // SendClusterEvent allows to send cluster events on the configEvent channel
   419  // TODO This method should not be exposed.
   420  // Currently it is used to notify the network controller that the keys are
   421  // available
   422  func (c *Cluster) SendClusterEvent(event lncluster.ConfigEventType) {
   423  	c.mu.RLock()
   424  	defer c.mu.RUnlock()
   425  	c.configEvent <- event
   426  }