github.com/lazyboychen7/engine@v17.12.1-ce-rc2+incompatible/daemon/cluster/cluster.go (about) 1 package cluster 2 3 // 4 // ## Swarmkit integration 5 // 6 // Cluster - static configurable object for accessing everything swarm related. 7 // Contains methods for connecting and controlling the cluster. Exists always, 8 // even if swarm mode is not enabled. 9 // 10 // NodeRunner - Manager for starting the swarmkit node. Is present only and 11 // always if swarm mode is enabled. Implements backoff restart loop in case of 12 // errors. 13 // 14 // NodeState - Information about the current node status including access to 15 // gRPC clients if a manager is active. 16 // 17 // ### Locking 18 // 19 // `cluster.controlMutex` - taken for the whole lifecycle of the processes that 20 // can reconfigure cluster(init/join/leave etc). Protects that one 21 // reconfiguration action has fully completed before another can start. 22 // 23 // `cluster.mu` - taken when the actual changes in cluster configurations 24 // happen. Different from `controlMutex` because in some cases we need to 25 // access current cluster state even if the long-running reconfiguration is 26 // going on. For example network stack may ask for the current cluster state in 27 // the middle of the shutdown. Any time current cluster state is asked you 28 // should take the read lock of `cluster.mu`. If you are writing an API 29 // responder that returns synchronously, hold `cluster.mu.RLock()` for the 30 // duration of the whole handler function. That ensures that node will not be 31 // shut down until the handler has finished. 32 // 33 // NodeRunner implements its internal locks that should not be used outside of 34 // the struct. Instead, you should just call `nodeRunner.State()` method to get 35 // the current state of the cluster(still need `cluster.mu.RLock()` to access 36 // `cluster.nr` reference itself). Most of the changes in NodeRunner happen 37 // because of an external event(network problem, unexpected swarmkit error) and 38 // Docker shouldn't take any locks that delay these changes from happening. 39 // 40 41 import ( 42 "fmt" 43 "net" 44 "os" 45 "path/filepath" 46 "sync" 47 "time" 48 49 "github.com/docker/docker/api/types/network" 50 types "github.com/docker/docker/api/types/swarm" 51 "github.com/docker/docker/daemon/cluster/controllers/plugin" 52 executorpkg "github.com/docker/docker/daemon/cluster/executor" 53 "github.com/docker/docker/pkg/signal" 54 lncluster "github.com/docker/libnetwork/cluster" 55 swarmapi "github.com/docker/swarmkit/api" 56 swarmnode "github.com/docker/swarmkit/node" 57 "github.com/pkg/errors" 58 "github.com/sirupsen/logrus" 59 "golang.org/x/net/context" 60 ) 61 62 const swarmDirName = "swarm" 63 const controlSocket = "control.sock" 64 const swarmConnectTimeout = 20 * time.Second 65 const swarmRequestTimeout = 20 * time.Second 66 const stateFile = "docker-state.json" 67 const defaultAddr = "0.0.0.0:2377" 68 69 const ( 70 initialReconnectDelay = 100 * time.Millisecond 71 maxReconnectDelay = 30 * time.Second 72 contextPrefix = "com.docker.swarm" 73 ) 74 75 // NetworkSubnetsProvider exposes functions for retrieving the subnets 76 // of networks managed by Docker, so they can be filtered. 77 type NetworkSubnetsProvider interface { 78 Subnets() ([]net.IPNet, []net.IPNet) 79 } 80 81 // Config provides values for Cluster. 82 type Config struct { 83 Root string 84 Name string 85 Backend executorpkg.Backend 86 PluginBackend plugin.Backend 87 NetworkSubnetsProvider NetworkSubnetsProvider 88 89 // DefaultAdvertiseAddr is the default host/IP or network interface to use 90 // if no AdvertiseAddr value is specified. 91 DefaultAdvertiseAddr string 92 93 // path to store runtime state, such as the swarm control socket 94 RuntimeRoot string 95 96 // WatchStream is a channel to pass watch API notifications to daemon 97 WatchStream chan *swarmapi.WatchMessage 98 } 99 100 // Cluster provides capabilities to participate in a cluster as a worker or a 101 // manager. 102 type Cluster struct { 103 mu sync.RWMutex 104 controlMutex sync.RWMutex // protect init/join/leave user operations 105 nr *nodeRunner 106 root string 107 runtimeRoot string 108 config Config 109 configEvent chan lncluster.ConfigEventType // todo: make this array and goroutine safe 110 attachers map[string]*attacher 111 watchStream chan *swarmapi.WatchMessage 112 } 113 114 // attacher manages the in-memory attachment state of a container 115 // attachment to a global scope network managed by swarm manager. It 116 // helps in identifying the attachment ID via the taskID and the 117 // corresponding attachment configuration obtained from the manager. 118 type attacher struct { 119 taskID string 120 config *network.NetworkingConfig 121 inProgress bool 122 attachWaitCh chan *network.NetworkingConfig 123 attachCompleteCh chan struct{} 124 detachWaitCh chan struct{} 125 } 126 127 // New creates a new Cluster instance using provided config. 128 func New(config Config) (*Cluster, error) { 129 root := filepath.Join(config.Root, swarmDirName) 130 if err := os.MkdirAll(root, 0700); err != nil { 131 return nil, err 132 } 133 if config.RuntimeRoot == "" { 134 config.RuntimeRoot = root 135 } 136 if err := os.MkdirAll(config.RuntimeRoot, 0700); err != nil { 137 return nil, err 138 } 139 c := &Cluster{ 140 root: root, 141 config: config, 142 configEvent: make(chan lncluster.ConfigEventType, 10), 143 runtimeRoot: config.RuntimeRoot, 144 attachers: make(map[string]*attacher), 145 watchStream: config.WatchStream, 146 } 147 return c, nil 148 } 149 150 // Start the Cluster instance 151 // TODO The split between New and Start can be join again when the SendClusterEvent 152 // method is no longer required 153 func (c *Cluster) Start() error { 154 root := filepath.Join(c.config.Root, swarmDirName) 155 156 nodeConfig, err := loadPersistentState(root) 157 if err != nil { 158 if os.IsNotExist(err) { 159 return nil 160 } 161 return err 162 } 163 164 nr, err := c.newNodeRunner(*nodeConfig) 165 if err != nil { 166 return err 167 } 168 c.nr = nr 169 170 select { 171 case <-time.After(swarmConnectTimeout): 172 logrus.Error("swarm component could not be started before timeout was reached") 173 case err := <-nr.Ready(): 174 if err != nil { 175 logrus.WithError(err).Error("swarm component could not be started") 176 return nil 177 } 178 } 179 return nil 180 } 181 182 func (c *Cluster) newNodeRunner(conf nodeStartConfig) (*nodeRunner, error) { 183 if err := c.config.Backend.IsSwarmCompatible(); err != nil { 184 return nil, err 185 } 186 187 actualLocalAddr := conf.LocalAddr 188 if actualLocalAddr == "" { 189 // If localAddr was not specified, resolve it automatically 190 // based on the route to joinAddr. localAddr can only be left 191 // empty on "join". 192 listenHost, _, err := net.SplitHostPort(conf.ListenAddr) 193 if err != nil { 194 return nil, fmt.Errorf("could not parse listen address: %v", err) 195 } 196 197 listenAddrIP := net.ParseIP(listenHost) 198 if listenAddrIP == nil || !listenAddrIP.IsUnspecified() { 199 actualLocalAddr = listenHost 200 } else { 201 if conf.RemoteAddr == "" { 202 // Should never happen except using swarms created by 203 // old versions that didn't save remoteAddr. 204 conf.RemoteAddr = "8.8.8.8:53" 205 } 206 conn, err := net.Dial("udp", conf.RemoteAddr) 207 if err != nil { 208 return nil, fmt.Errorf("could not find local IP address: %v", err) 209 } 210 localHostPort := conn.LocalAddr().String() 211 actualLocalAddr, _, _ = net.SplitHostPort(localHostPort) 212 conn.Close() 213 } 214 } 215 216 nr := &nodeRunner{cluster: c} 217 nr.actualLocalAddr = actualLocalAddr 218 219 if err := nr.Start(conf); err != nil { 220 return nil, err 221 } 222 223 c.config.Backend.DaemonJoinsCluster(c) 224 225 return nr, nil 226 } 227 228 func (c *Cluster) getRequestContext() (context.Context, func()) { // TODO: not needed when requests don't block on qourum lost 229 return context.WithTimeout(context.Background(), swarmRequestTimeout) 230 } 231 232 // IsManager returns true if Cluster is participating as a manager. 233 func (c *Cluster) IsManager() bool { 234 c.mu.RLock() 235 defer c.mu.RUnlock() 236 return c.currentNodeState().IsActiveManager() 237 } 238 239 // IsAgent returns true if Cluster is participating as a worker/agent. 240 func (c *Cluster) IsAgent() bool { 241 c.mu.RLock() 242 defer c.mu.RUnlock() 243 return c.currentNodeState().status == types.LocalNodeStateActive 244 } 245 246 // GetLocalAddress returns the local address. 247 func (c *Cluster) GetLocalAddress() string { 248 c.mu.RLock() 249 defer c.mu.RUnlock() 250 return c.currentNodeState().actualLocalAddr 251 } 252 253 // GetListenAddress returns the listen address. 254 func (c *Cluster) GetListenAddress() string { 255 c.mu.RLock() 256 defer c.mu.RUnlock() 257 if c.nr != nil { 258 return c.nr.config.ListenAddr 259 } 260 return "" 261 } 262 263 // GetAdvertiseAddress returns the remotely reachable address of this node. 264 func (c *Cluster) GetAdvertiseAddress() string { 265 c.mu.RLock() 266 defer c.mu.RUnlock() 267 if c.nr != nil && c.nr.config.AdvertiseAddr != "" { 268 advertiseHost, _, _ := net.SplitHostPort(c.nr.config.AdvertiseAddr) 269 return advertiseHost 270 } 271 return c.currentNodeState().actualLocalAddr 272 } 273 274 // GetDataPathAddress returns the address to be used for the data path traffic, if specified. 275 func (c *Cluster) GetDataPathAddress() string { 276 c.mu.RLock() 277 defer c.mu.RUnlock() 278 if c.nr != nil { 279 return c.nr.config.DataPathAddr 280 } 281 return "" 282 } 283 284 // GetRemoteAddressList returns the advertise address for each of the remote managers if 285 // available. 286 func (c *Cluster) GetRemoteAddressList() []string { 287 c.mu.RLock() 288 defer c.mu.RUnlock() 289 return c.getRemoteAddressList() 290 } 291 292 func (c *Cluster) getRemoteAddressList() []string { 293 state := c.currentNodeState() 294 if state.swarmNode == nil { 295 return []string{} 296 } 297 298 nodeID := state.swarmNode.NodeID() 299 remotes := state.swarmNode.Remotes() 300 addressList := make([]string, 0, len(remotes)) 301 for _, r := range remotes { 302 if r.NodeID != nodeID { 303 addressList = append(addressList, r.Addr) 304 } 305 } 306 return addressList 307 } 308 309 // ListenClusterEvents returns a channel that receives messages on cluster 310 // participation changes. 311 // todo: make cancelable and accessible to multiple callers 312 func (c *Cluster) ListenClusterEvents() <-chan lncluster.ConfigEventType { 313 return c.configEvent 314 } 315 316 // currentNodeState should not be called without a read lock 317 func (c *Cluster) currentNodeState() nodeState { 318 return c.nr.State() 319 } 320 321 // errNoManager returns error describing why manager commands can't be used. 322 // Call with read lock. 323 func (c *Cluster) errNoManager(st nodeState) error { 324 if st.swarmNode == nil { 325 if errors.Cause(st.err) == errSwarmLocked { 326 return errSwarmLocked 327 } 328 if st.err == errSwarmCertificatesExpired { 329 return errSwarmCertificatesExpired 330 } 331 return errors.WithStack(notAvailableError("This node is not a swarm manager. Use \"docker swarm init\" or \"docker swarm join\" to connect this node to swarm and try again.")) 332 } 333 if st.swarmNode.Manager() != nil { 334 return errors.WithStack(notAvailableError("This node is not a swarm manager. Manager is being prepared or has trouble connecting to the cluster.")) 335 } 336 return errors.WithStack(notAvailableError("This node is not a swarm manager. Worker nodes can't be used to view or modify cluster state. Please run this command on a manager node or promote the current node to a manager.")) 337 } 338 339 // Cleanup stops active swarm node. This is run before daemon shutdown. 340 func (c *Cluster) Cleanup() { 341 c.controlMutex.Lock() 342 defer c.controlMutex.Unlock() 343 344 c.mu.Lock() 345 node := c.nr 346 if node == nil { 347 c.mu.Unlock() 348 return 349 } 350 state := c.currentNodeState() 351 c.mu.Unlock() 352 353 if state.IsActiveManager() { 354 active, reachable, unreachable, err := managerStats(state.controlClient, state.NodeID()) 355 if err == nil { 356 singlenode := active && isLastManager(reachable, unreachable) 357 if active && !singlenode && removingManagerCausesLossOfQuorum(reachable, unreachable) { 358 logrus.Errorf("Leaving cluster with %v managers left out of %v. Raft quorum will be lost.", reachable-1, reachable+unreachable) 359 } 360 } 361 } 362 363 if err := node.Stop(); err != nil { 364 logrus.Errorf("failed to shut down cluster node: %v", err) 365 signal.DumpStacks("") 366 } 367 368 c.mu.Lock() 369 c.nr = nil 370 c.mu.Unlock() 371 } 372 373 func managerStats(client swarmapi.ControlClient, currentNodeID string) (current bool, reachable int, unreachable int, err error) { 374 ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) 375 defer cancel() 376 nodes, err := client.ListNodes(ctx, &swarmapi.ListNodesRequest{}) 377 if err != nil { 378 return false, 0, 0, err 379 } 380 for _, n := range nodes.Nodes { 381 if n.ManagerStatus != nil { 382 if n.ManagerStatus.Reachability == swarmapi.RaftMemberStatus_REACHABLE { 383 reachable++ 384 if n.ID == currentNodeID { 385 current = true 386 } 387 } 388 if n.ManagerStatus.Reachability == swarmapi.RaftMemberStatus_UNREACHABLE { 389 unreachable++ 390 } 391 } 392 } 393 return 394 } 395 396 func detectLockedError(err error) error { 397 if err == swarmnode.ErrInvalidUnlockKey { 398 return errors.WithStack(errSwarmLocked) 399 } 400 return err 401 } 402 403 func (c *Cluster) lockedManagerAction(fn func(ctx context.Context, state nodeState) error) error { 404 c.mu.RLock() 405 defer c.mu.RUnlock() 406 407 state := c.currentNodeState() 408 if !state.IsActiveManager() { 409 return c.errNoManager(state) 410 } 411 412 ctx, cancel := c.getRequestContext() 413 defer cancel() 414 415 return fn(ctx, state) 416 } 417 418 // SendClusterEvent allows to send cluster events on the configEvent channel 419 // TODO This method should not be exposed. 420 // Currently it is used to notify the network controller that the keys are 421 // available 422 func (c *Cluster) SendClusterEvent(event lncluster.ConfigEventType) { 423 c.mu.RLock() 424 defer c.mu.RUnlock() 425 c.configEvent <- event 426 }