github.com/fabiokung/docker@v0.11.2-0.20170222101415-4534dcd49497/daemon/cluster/cluster.go

github.com/fabiokung/docker@v0.11.2-0.20170222101415-4534dcd49497/daemon/cluster/cluster.go (about)

     1  package cluster
     2  
     3  //
     4  // ## Swarmkit integration
     5  //
     6  // Cluster - static configurable object for accessing everything swarm related.
     7  // Contains methods for connecting and controlling the cluster. Exists always,
     8  // even if swarm mode is not enabled.
     9  //
    10  // NodeRunner - Manager for starting the swarmkit node. Is present only and
    11  // always if swarm mode is enabled. Implements backoff restart loop in case of
    12  // errors.
    13  //
    14  // NodeState - Information about the current node status including access to
    15  // gRPC clients if a manager is active.
    16  //
    17  // ### Locking
    18  //
    19  // `cluster.controlMutex` - taken for the whole lifecycle of the processes that
    20  // can reconfigure cluster(init/join/leave etc). Protects that one
    21  // reconfiguration action has fully completed before another can start.
    22  //
    23  // `cluster.mu` - taken when the actual changes in cluster configurations
    24  // happen. Different from `controlMutex` because in some cases we need to
    25  // access current cluster state even if the long-running reconfiguration is
    26  // going on. For example network stack may ask for the current cluster state in
    27  // the middle of the shutdown. Any time current cluster state is asked you
    28  // should take the read lock of `cluster.mu`. If you are writing an API
    29  // responder that returns synchronously, hold `cluster.mu.RLock()` for the
    30  // duration of the whole handler function. That ensures that node will not be
    31  // shut down until the handler has finished.
    32  //
    33  // NodeRunner implements its internal locks that should not be used outside of
    34  // the struct. Instead, you should just call `nodeRunner.State()` method to get
    35  // the current state of the cluster(still need `cluster.mu.RLock()` to access
    36  // `cluster.nr` reference itself). Most of the changes in NodeRunner happen
    37  // because of an external event(network problem, unexpected swarmkit error) and
    38  // Docker shouldn't take any locks that delay these changes from happening.
    39  //
    40  
    41  import (
    42  	"crypto/x509"
    43  	"fmt"
    44  	"net"
    45  	"os"
    46  	"path/filepath"
    47  	"sync"
    48  	"time"
    49  
    50  	"github.com/Sirupsen/logrus"
    51  	"github.com/docker/docker/api/types/network"
    52  	types "github.com/docker/docker/api/types/swarm"
    53  	executorpkg "github.com/docker/docker/daemon/cluster/executor"
    54  	"github.com/docker/docker/pkg/signal"
    55  	swarmapi "github.com/docker/swarmkit/api"
    56  	swarmnode "github.com/docker/swarmkit/node"
    57  	"github.com/pkg/errors"
    58  	"golang.org/x/net/context"
    59  )
    60  
    61  const swarmDirName = "swarm"
    62  const controlSocket = "control.sock"
    63  const swarmConnectTimeout = 20 * time.Second
    64  const swarmRequestTimeout = 20 * time.Second
    65  const stateFile = "docker-state.json"
    66  const defaultAddr = "0.0.0.0:2377"
    67  
    68  const (
    69  	initialReconnectDelay = 100 * time.Millisecond
    70  	maxReconnectDelay     = 30 * time.Second
    71  	contextPrefix         = "com.docker.swarm"
    72  )
    73  
    74  // errNoSwarm is returned on leaving a cluster that was never initialized
    75  var errNoSwarm = errors.New("This node is not part of a swarm")
    76  
    77  // errSwarmExists is returned on initialize or join request for a cluster that has already been activated
    78  var errSwarmExists = errors.New("This node is already part of a swarm. Use \"docker swarm leave\" to leave this swarm and join another one.")
    79  
    80  // errSwarmJoinTimeoutReached is returned when cluster join could not complete before timeout was reached.
    81  var errSwarmJoinTimeoutReached = errors.New("Timeout was reached before node was joined. The attempt to join the swarm will continue in the background. Use the \"docker info\" command to see the current swarm status of your node.")
    82  
    83  // errSwarmLocked is returned if the swarm is encrypted and needs a key to unlock it.
    84  var errSwarmLocked = errors.New("Swarm is encrypted and needs to be unlocked before it can be used. Please use \"docker swarm unlock\" to unlock it.")
    85  
    86  // errSwarmCertificatesExpired is returned if docker was not started for the whole validity period and they had no chance to renew automatically.
    87  var errSwarmCertificatesExpired = errors.New("Swarm certificates have expired. To replace them, leave the swarm and join again.")
    88  
    89  // NetworkSubnetsProvider exposes functions for retrieving the subnets
    90  // of networks managed by Docker, so they can be filtered.
    91  type NetworkSubnetsProvider interface {
    92  	V4Subnets() []net.IPNet
    93  	V6Subnets() []net.IPNet
    94  }
    95  
    96  // Config provides values for Cluster.
    97  type Config struct {
    98  	Root                   string
    99  	Name                   string
   100  	Backend                executorpkg.Backend
   101  	NetworkSubnetsProvider NetworkSubnetsProvider
   102  
   103  	// DefaultAdvertiseAddr is the default host/IP or network interface to use
   104  	// if no AdvertiseAddr value is specified.
   105  	DefaultAdvertiseAddr string
   106  
   107  	// path to store runtime state, such as the swarm control socket
   108  	RuntimeRoot string
   109  }
   110  
   111  // Cluster provides capabilities to participate in a cluster as a worker or a
   112  // manager.
   113  type Cluster struct {
   114  	mu           sync.RWMutex
   115  	controlMutex sync.RWMutex // protect init/join/leave user operations
   116  	nr           *nodeRunner
   117  	root         string
   118  	runtimeRoot  string
   119  	config       Config
   120  	configEvent  chan struct{} // todo: make this array and goroutine safe
   121  	attachers    map[string]*attacher
   122  }
   123  
   124  // attacher manages the in-memory attachment state of a container
   125  // attachment to a global scope network managed by swarm manager. It
   126  // helps in identifying the attachment ID via the taskID and the
   127  // corresponding attachment configuration obtained from the manager.
   128  type attacher struct {
   129  	taskID           string
   130  	config           *network.NetworkingConfig
   131  	attachWaitCh     chan *network.NetworkingConfig
   132  	attachCompleteCh chan struct{}
   133  	detachWaitCh     chan struct{}
   134  }
   135  
   136  // New creates a new Cluster instance using provided config.
   137  func New(config Config) (*Cluster, error) {
   138  	root := filepath.Join(config.Root, swarmDirName)
   139  	if err := os.MkdirAll(root, 0700); err != nil {
   140  		return nil, err
   141  	}
   142  	if config.RuntimeRoot == "" {
   143  		config.RuntimeRoot = root
   144  	}
   145  	if err := os.MkdirAll(config.RuntimeRoot, 0700); err != nil {
   146  		return nil, err
   147  	}
   148  	c := &Cluster{
   149  		root:        root,
   150  		config:      config,
   151  		configEvent: make(chan struct{}, 10),
   152  		runtimeRoot: config.RuntimeRoot,
   153  		attachers:   make(map[string]*attacher),
   154  	}
   155  
   156  	nodeConfig, err := loadPersistentState(root)
   157  	if err != nil {
   158  		if os.IsNotExist(err) {
   159  			return c, nil
   160  		}
   161  		return nil, err
   162  	}
   163  
   164  	nr, err := c.newNodeRunner(*nodeConfig)
   165  	if err != nil {
   166  		return nil, err
   167  	}
   168  	c.nr = nr
   169  
   170  	select {
   171  	case <-time.After(swarmConnectTimeout):
   172  		logrus.Error("swarm component could not be started before timeout was reached")
   173  	case err := <-nr.Ready():
   174  		if err != nil {
   175  			if errors.Cause(err) == errSwarmLocked {
   176  				return c, nil
   177  			}
   178  			if err, ok := errors.Cause(c.nr.err).(x509.CertificateInvalidError); ok && err.Reason == x509.Expired {
   179  				return c, nil
   180  			}
   181  			return nil, errors.Wrap(err, "swarm component could not be started")
   182  		}
   183  	}
   184  	return c, nil
   185  }
   186  
   187  func (c *Cluster) newNodeRunner(conf nodeStartConfig) (*nodeRunner, error) {
   188  	if err := c.config.Backend.IsSwarmCompatible(); err != nil {
   189  		return nil, err
   190  	}
   191  
   192  	actualLocalAddr := conf.LocalAddr
   193  	if actualLocalAddr == "" {
   194  		// If localAddr was not specified, resolve it automatically
   195  		// based on the route to joinAddr. localAddr can only be left
   196  		// empty on "join".
   197  		listenHost, _, err := net.SplitHostPort(conf.ListenAddr)
   198  		if err != nil {
   199  			return nil, fmt.Errorf("could not parse listen address: %v", err)
   200  		}
   201  
   202  		listenAddrIP := net.ParseIP(listenHost)
   203  		if listenAddrIP == nil || !listenAddrIP.IsUnspecified() {
   204  			actualLocalAddr = listenHost
   205  		} else {
   206  			if conf.RemoteAddr == "" {
   207  				// Should never happen except using swarms created by
   208  				// old versions that didn't save remoteAddr.
   209  				conf.RemoteAddr = "8.8.8.8:53"
   210  			}
   211  			conn, err := net.Dial("udp", conf.RemoteAddr)
   212  			if err != nil {
   213  				return nil, fmt.Errorf("could not find local IP address: %v", err)
   214  			}
   215  			localHostPort := conn.LocalAddr().String()
   216  			actualLocalAddr, _, _ = net.SplitHostPort(localHostPort)
   217  			conn.Close()
   218  		}
   219  	}
   220  
   221  	nr := &nodeRunner{cluster: c}
   222  	nr.actualLocalAddr = actualLocalAddr
   223  
   224  	if err := nr.Start(conf); err != nil {
   225  		return nil, err
   226  	}
   227  
   228  	c.config.Backend.DaemonJoinsCluster(c)
   229  
   230  	return nr, nil
   231  }
   232  
   233  func (c *Cluster) getRequestContext() (context.Context, func()) { // TODO: not needed when requests don't block on qourum lost
   234  	return context.WithTimeout(context.Background(), swarmRequestTimeout)
   235  }
   236  
   237  // IsManager returns true if Cluster is participating as a manager.
   238  func (c *Cluster) IsManager() bool {
   239  	c.mu.RLock()
   240  	defer c.mu.RUnlock()
   241  	return c.currentNodeState().IsActiveManager()
   242  }
   243  
   244  // IsAgent returns true if Cluster is participating as a worker/agent.
   245  func (c *Cluster) IsAgent() bool {
   246  	c.mu.RLock()
   247  	defer c.mu.RUnlock()
   248  	return c.currentNodeState().status == types.LocalNodeStateActive
   249  }
   250  
   251  // GetLocalAddress returns the local address.
   252  func (c *Cluster) GetLocalAddress() string {
   253  	c.mu.RLock()
   254  	defer c.mu.RUnlock()
   255  	return c.currentNodeState().actualLocalAddr
   256  }
   257  
   258  // GetListenAddress returns the listen address.
   259  func (c *Cluster) GetListenAddress() string {
   260  	c.mu.RLock()
   261  	defer c.mu.RUnlock()
   262  	if c.nr != nil {
   263  		return c.nr.config.ListenAddr
   264  	}
   265  	return ""
   266  }
   267  
   268  // GetAdvertiseAddress returns the remotely reachable address of this node.
   269  func (c *Cluster) GetAdvertiseAddress() string {
   270  	c.mu.RLock()
   271  	defer c.mu.RUnlock()
   272  	if c.nr != nil && c.nr.config.AdvertiseAddr != "" {
   273  		advertiseHost, _, _ := net.SplitHostPort(c.nr.config.AdvertiseAddr)
   274  		return advertiseHost
   275  	}
   276  	return c.currentNodeState().actualLocalAddr
   277  }
   278  
   279  // GetRemoteAddress returns a known advertise address of a remote manager if
   280  // available.
   281  // todo: change to array/connect with info
   282  func (c *Cluster) GetRemoteAddress() string {
   283  	c.mu.RLock()
   284  	defer c.mu.RUnlock()
   285  	return c.getRemoteAddress()
   286  }
   287  
   288  func (c *Cluster) getRemoteAddress() string {
   289  	state := c.currentNodeState()
   290  	if state.swarmNode == nil {
   291  		return ""
   292  	}
   293  	nodeID := state.swarmNode.NodeID()
   294  	for _, r := range state.swarmNode.Remotes() {
   295  		if r.NodeID != nodeID {
   296  			return r.Addr
   297  		}
   298  	}
   299  	return ""
   300  }
   301  
   302  // ListenClusterEvents returns a channel that receives messages on cluster
   303  // participation changes.
   304  // todo: make cancelable and accessible to multiple callers
   305  func (c *Cluster) ListenClusterEvents() <-chan struct{} {
   306  	return c.configEvent
   307  }
   308  
   309  // currentNodeState should not be called without a read lock
   310  func (c *Cluster) currentNodeState() nodeState {
   311  	return c.nr.State()
   312  }
   313  
   314  // errNoManager returns error describing why manager commands can't be used.
   315  // Call with read lock.
   316  func (c *Cluster) errNoManager(st nodeState) error {
   317  	if st.swarmNode == nil {
   318  		if errors.Cause(st.err) == errSwarmLocked {
   319  			return errSwarmLocked
   320  		}
   321  		if st.err == errSwarmCertificatesExpired {
   322  			return errSwarmCertificatesExpired
   323  		}
   324  		return errors.New("This node is not a swarm manager. Use \"docker swarm init\" or \"docker swarm join\" to connect this node to swarm and try again.")
   325  	}
   326  	if st.swarmNode.Manager() != nil {
   327  		return errors.New("This node is not a swarm manager. Manager is being prepared or has trouble connecting to the cluster.")
   328  	}
   329  	return errors.New("This node is not a swarm manager. Worker nodes can't be used to view or modify cluster state. Please run this command on a manager node or promote the current node to a manager.")
   330  }
   331  
   332  // Cleanup stops active swarm node. This is run before daemon shutdown.
   333  func (c *Cluster) Cleanup() {
   334  	c.controlMutex.Lock()
   335  	defer c.controlMutex.Unlock()
   336  
   337  	c.mu.Lock()
   338  	node := c.nr
   339  	if node == nil {
   340  		c.mu.Unlock()
   341  		return
   342  	}
   343  	defer c.mu.Unlock()
   344  	state := c.currentNodeState()
   345  	if state.IsActiveManager() {
   346  		active, reachable, unreachable, err := managerStats(state.controlClient, state.NodeID())
   347  		if err == nil {
   348  			singlenode := active && isLastManager(reachable, unreachable)
   349  			if active && !singlenode && removingManagerCausesLossOfQuorum(reachable, unreachable) {
   350  				logrus.Errorf("Leaving cluster with %v managers left out of %v. Raft quorum will be lost.", reachable-1, reachable+unreachable)
   351  			}
   352  		}
   353  	}
   354  	if err := node.Stop(); err != nil {
   355  		logrus.Errorf("failed to shut down cluster node: %v", err)
   356  		signal.DumpStacks("")
   357  	}
   358  	c.nr = nil
   359  }
   360  
   361  func managerStats(client swarmapi.ControlClient, currentNodeID string) (current bool, reachable int, unreachable int, err error) {
   362  	ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
   363  	defer cancel()
   364  	nodes, err := client.ListNodes(ctx, &swarmapi.ListNodesRequest{})
   365  	if err != nil {
   366  		return false, 0, 0, err
   367  	}
   368  	for _, n := range nodes.Nodes {
   369  		if n.ManagerStatus != nil {
   370  			if n.ManagerStatus.Reachability == swarmapi.RaftMemberStatus_REACHABLE {
   371  				reachable++
   372  				if n.ID == currentNodeID {
   373  					current = true
   374  				}
   375  			}
   376  			if n.ManagerStatus.Reachability == swarmapi.RaftMemberStatus_UNREACHABLE {
   377  				unreachable++
   378  			}
   379  		}
   380  	}
   381  	return
   382  }
   383  
   384  func detectLockedError(err error) error {
   385  	if err == swarmnode.ErrInvalidUnlockKey {
   386  		return errors.WithStack(errSwarmLocked)
   387  	}
   388  	return err
   389  }