github.com/zhouyu0/docker-note@v0.0.0-20190722021225-b8d3825084db/daemon/cluster/cluster.go (about) 1 package cluster // import "github.com/docker/docker/daemon/cluster" 2 3 // 4 // ## Swarmkit integration 5 // 6 // Cluster - static configurable object for accessing everything swarm related. 7 // Contains methods for connecting and controlling the cluster. Exists always, 8 // even if swarm mode is not enabled. 9 // 10 // NodeRunner - Manager for starting the swarmkit node. Is present only and 11 // always if swarm mode is enabled. Implements backoff restart loop in case of 12 // errors. 13 // 14 // NodeState - Information about the current node status including access to 15 // gRPC clients if a manager is active. 16 // 17 // ### Locking 18 // 19 // `cluster.controlMutex` - taken for the whole lifecycle of the processes that 20 // can reconfigure cluster(init/join/leave etc). Protects that one 21 // reconfiguration action has fully completed before another can start. 22 // 23 // `cluster.mu` - taken when the actual changes in cluster configurations 24 // happen. Different from `controlMutex` because in some cases we need to 25 // access current cluster state even if the long-running reconfiguration is 26 // going on. For example network stack may ask for the current cluster state in 27 // the middle of the shutdown. Any time current cluster state is asked you 28 // should take the read lock of `cluster.mu`. If you are writing an API 29 // responder that returns synchronously, hold `cluster.mu.RLock()` for the 30 // duration of the whole handler function. That ensures that node will not be 31 // shut down until the handler has finished. 32 // 33 // NodeRunner implements its internal locks that should not be used outside of 34 // the struct. Instead, you should just call `nodeRunner.State()` method to get 35 // the current state of the cluster(still need `cluster.mu.RLock()` to access 36 // `cluster.nr` reference itself). Most of the changes in NodeRunner happen 37 // because of an external event(network problem, unexpected swarmkit error) and 38 // Docker shouldn't take any locks that delay these changes from happening. 39 // 40 41 import ( 42 "context" 43 "fmt" 44 "math" 45 "net" 46 "os" 47 "path/filepath" 48 "sync" 49 "time" 50 51 "github.com/docker/docker/api/types/network" 52 types "github.com/docker/docker/api/types/swarm" 53 "github.com/docker/docker/daemon/cluster/controllers/plugin" 54 executorpkg "github.com/docker/docker/daemon/cluster/executor" 55 "github.com/docker/docker/pkg/signal" 56 lncluster "github.com/docker/libnetwork/cluster" 57 swarmapi "github.com/docker/swarmkit/api" 58 swarmnode "github.com/docker/swarmkit/node" 59 "github.com/pkg/errors" 60 "github.com/sirupsen/logrus" 61 ) 62 63 const swarmDirName = "swarm" 64 const controlSocket = "control.sock" 65 const swarmConnectTimeout = 20 * time.Second 66 const swarmRequestTimeout = 20 * time.Second 67 const stateFile = "docker-state.json" 68 const defaultAddr = "0.0.0.0:2377" 69 70 const ( 71 initialReconnectDelay = 100 * time.Millisecond 72 maxReconnectDelay = 30 * time.Second 73 contextPrefix = "com.docker.swarm" 74 defaultRecvSizeForListResponse = math.MaxInt32 // the max recv limit grpc <1.4.0 75 ) 76 77 // NetworkSubnetsProvider exposes functions for retrieving the subnets 78 // of networks managed by Docker, so they can be filtered. 79 type NetworkSubnetsProvider interface { 80 Subnets() ([]net.IPNet, []net.IPNet) 81 } 82 83 // Config provides values for Cluster. 84 type Config struct { 85 Root string 86 Name string 87 Backend executorpkg.Backend 88 ImageBackend executorpkg.ImageBackend 89 PluginBackend plugin.Backend 90 VolumeBackend executorpkg.VolumeBackend 91 NetworkSubnetsProvider NetworkSubnetsProvider 92 93 // DefaultAdvertiseAddr is the default host/IP or network interface to use 94 // if no AdvertiseAddr value is specified. 95 DefaultAdvertiseAddr string 96 97 // path to store runtime state, such as the swarm control socket 98 RuntimeRoot string 99 100 // WatchStream is a channel to pass watch API notifications to daemon 101 WatchStream chan *swarmapi.WatchMessage 102 103 // RaftHeartbeatTick is the number of ticks for heartbeat of quorum members 104 RaftHeartbeatTick uint32 105 106 // RaftElectionTick is the number of ticks to elapse before followers propose a new round of leader election 107 // This value should be 10x that of RaftHeartbeatTick 108 RaftElectionTick uint32 109 } 110 111 // Cluster provides capabilities to participate in a cluster as a worker or a 112 // manager. 113 type Cluster struct { 114 mu sync.RWMutex 115 controlMutex sync.RWMutex // protect init/join/leave user operations 116 nr *nodeRunner 117 root string 118 runtimeRoot string 119 config Config 120 configEvent chan lncluster.ConfigEventType // todo: make this array and goroutine safe 121 attachers map[string]*attacher 122 watchStream chan *swarmapi.WatchMessage 123 } 124 125 // attacher manages the in-memory attachment state of a container 126 // attachment to a global scope network managed by swarm manager. It 127 // helps in identifying the attachment ID via the taskID and the 128 // corresponding attachment configuration obtained from the manager. 129 type attacher struct { 130 taskID string 131 config *network.NetworkingConfig 132 inProgress bool 133 attachWaitCh chan *network.NetworkingConfig 134 attachCompleteCh chan struct{} 135 detachWaitCh chan struct{} 136 } 137 138 // New creates a new Cluster instance using provided config. 139 func New(config Config) (*Cluster, error) { 140 root := filepath.Join(config.Root, swarmDirName) 141 if err := os.MkdirAll(root, 0700); err != nil { 142 return nil, err 143 } 144 if config.RuntimeRoot == "" { 145 config.RuntimeRoot = root 146 } 147 if config.RaftHeartbeatTick == 0 { 148 config.RaftHeartbeatTick = 1 149 } 150 if config.RaftElectionTick == 0 { 151 // 10X heartbeat tick is the recommended ratio according to etcd docs. 152 config.RaftElectionTick = 10 * config.RaftHeartbeatTick 153 } 154 155 if err := os.MkdirAll(config.RuntimeRoot, 0700); err != nil { 156 return nil, err 157 } 158 c := &Cluster{ 159 root: root, 160 config: config, 161 configEvent: make(chan lncluster.ConfigEventType, 10), 162 runtimeRoot: config.RuntimeRoot, 163 attachers: make(map[string]*attacher), 164 watchStream: config.WatchStream, 165 } 166 return c, nil 167 } 168 169 // Start the Cluster instance 170 // TODO The split between New and Start can be join again when the SendClusterEvent 171 // method is no longer required 172 func (c *Cluster) Start() error { 173 root := filepath.Join(c.config.Root, swarmDirName) 174 175 nodeConfig, err := loadPersistentState(root) 176 if err != nil { 177 if os.IsNotExist(err) { 178 return nil 179 } 180 return err 181 } 182 183 nr, err := c.newNodeRunner(*nodeConfig) 184 if err != nil { 185 return err 186 } 187 c.nr = nr 188 189 select { 190 case <-time.After(swarmConnectTimeout): 191 logrus.Error("swarm component could not be started before timeout was reached") 192 case err := <-nr.Ready(): 193 if err != nil { 194 logrus.WithError(err).Error("swarm component could not be started") 195 return nil 196 } 197 } 198 return nil 199 } 200 201 func (c *Cluster) newNodeRunner(conf nodeStartConfig) (*nodeRunner, error) { 202 if err := c.config.Backend.IsSwarmCompatible(); err != nil { 203 return nil, err 204 } 205 206 actualLocalAddr := conf.LocalAddr 207 if actualLocalAddr == "" { 208 // If localAddr was not specified, resolve it automatically 209 // based on the route to joinAddr. localAddr can only be left 210 // empty on "join". 211 listenHost, _, err := net.SplitHostPort(conf.ListenAddr) 212 if err != nil { 213 return nil, fmt.Errorf("could not parse listen address: %v", err) 214 } 215 216 listenAddrIP := net.ParseIP(listenHost) 217 if listenAddrIP == nil || !listenAddrIP.IsUnspecified() { 218 actualLocalAddr = listenHost 219 } else { 220 if conf.RemoteAddr == "" { 221 // Should never happen except using swarms created by 222 // old versions that didn't save remoteAddr. 223 conf.RemoteAddr = "8.8.8.8:53" 224 } 225 conn, err := net.Dial("udp", conf.RemoteAddr) 226 if err != nil { 227 return nil, fmt.Errorf("could not find local IP address: %v", err) 228 } 229 localHostPort := conn.LocalAddr().String() 230 actualLocalAddr, _, _ = net.SplitHostPort(localHostPort) 231 conn.Close() 232 } 233 } 234 235 nr := &nodeRunner{cluster: c} 236 nr.actualLocalAddr = actualLocalAddr 237 238 if err := nr.Start(conf); err != nil { 239 return nil, err 240 } 241 242 c.config.Backend.DaemonJoinsCluster(c) 243 244 return nr, nil 245 } 246 247 func (c *Cluster) getRequestContext() (context.Context, func()) { // TODO: not needed when requests don't block on qourum lost 248 return context.WithTimeout(context.Background(), swarmRequestTimeout) 249 } 250 251 // IsManager returns true if Cluster is participating as a manager. 252 func (c *Cluster) IsManager() bool { 253 c.mu.RLock() 254 defer c.mu.RUnlock() 255 return c.currentNodeState().IsActiveManager() 256 } 257 258 // IsAgent returns true if Cluster is participating as a worker/agent. 259 func (c *Cluster) IsAgent() bool { 260 c.mu.RLock() 261 defer c.mu.RUnlock() 262 return c.currentNodeState().status == types.LocalNodeStateActive 263 } 264 265 // GetLocalAddress returns the local address. 266 func (c *Cluster) GetLocalAddress() string { 267 c.mu.RLock() 268 defer c.mu.RUnlock() 269 return c.currentNodeState().actualLocalAddr 270 } 271 272 // GetListenAddress returns the listen address. 273 func (c *Cluster) GetListenAddress() string { 274 c.mu.RLock() 275 defer c.mu.RUnlock() 276 if c.nr != nil { 277 return c.nr.config.ListenAddr 278 } 279 return "" 280 } 281 282 // GetAdvertiseAddress returns the remotely reachable address of this node. 283 func (c *Cluster) GetAdvertiseAddress() string { 284 c.mu.RLock() 285 defer c.mu.RUnlock() 286 if c.nr != nil && c.nr.config.AdvertiseAddr != "" { 287 advertiseHost, _, _ := net.SplitHostPort(c.nr.config.AdvertiseAddr) 288 return advertiseHost 289 } 290 return c.currentNodeState().actualLocalAddr 291 } 292 293 // GetDataPathAddress returns the address to be used for the data path traffic, if specified. 294 func (c *Cluster) GetDataPathAddress() string { 295 c.mu.RLock() 296 defer c.mu.RUnlock() 297 if c.nr != nil { 298 return c.nr.config.DataPathAddr 299 } 300 return "" 301 } 302 303 // GetRemoteAddressList returns the advertise address for each of the remote managers if 304 // available. 305 func (c *Cluster) GetRemoteAddressList() []string { 306 c.mu.RLock() 307 defer c.mu.RUnlock() 308 return c.getRemoteAddressList() 309 } 310 311 // GetWatchStream returns the channel to pass changes from store watch API 312 func (c *Cluster) GetWatchStream() chan *swarmapi.WatchMessage { 313 c.mu.RLock() 314 defer c.mu.RUnlock() 315 return c.watchStream 316 } 317 318 func (c *Cluster) getRemoteAddressList() []string { 319 state := c.currentNodeState() 320 if state.swarmNode == nil { 321 return []string{} 322 } 323 324 nodeID := state.swarmNode.NodeID() 325 remotes := state.swarmNode.Remotes() 326 addressList := make([]string, 0, len(remotes)) 327 for _, r := range remotes { 328 if r.NodeID != nodeID { 329 addressList = append(addressList, r.Addr) 330 } 331 } 332 return addressList 333 } 334 335 // ListenClusterEvents returns a channel that receives messages on cluster 336 // participation changes. 337 // todo: make cancelable and accessible to multiple callers 338 func (c *Cluster) ListenClusterEvents() <-chan lncluster.ConfigEventType { 339 return c.configEvent 340 } 341 342 // currentNodeState should not be called without a read lock 343 func (c *Cluster) currentNodeState() nodeState { 344 return c.nr.State() 345 } 346 347 // errNoManager returns error describing why manager commands can't be used. 348 // Call with read lock. 349 func (c *Cluster) errNoManager(st nodeState) error { 350 if st.swarmNode == nil { 351 if errors.Cause(st.err) == errSwarmLocked { 352 return errSwarmLocked 353 } 354 if st.err == errSwarmCertificatesExpired { 355 return errSwarmCertificatesExpired 356 } 357 return errors.WithStack(notAvailableError("This node is not a swarm manager. Use \"docker swarm init\" or \"docker swarm join\" to connect this node to swarm and try again.")) 358 } 359 if st.swarmNode.Manager() != nil { 360 return errors.WithStack(notAvailableError("This node is not a swarm manager. Manager is being prepared or has trouble connecting to the cluster.")) 361 } 362 return errors.WithStack(notAvailableError("This node is not a swarm manager. Worker nodes can't be used to view or modify cluster state. Please run this command on a manager node or promote the current node to a manager.")) 363 } 364 365 // Cleanup stops active swarm node. This is run before daemon shutdown. 366 func (c *Cluster) Cleanup() { 367 c.controlMutex.Lock() 368 defer c.controlMutex.Unlock() 369 370 c.mu.Lock() 371 node := c.nr 372 if node == nil { 373 c.mu.Unlock() 374 return 375 } 376 state := c.currentNodeState() 377 c.mu.Unlock() 378 379 if state.IsActiveManager() { 380 active, reachable, unreachable, err := managerStats(state.controlClient, state.NodeID()) 381 if err == nil { 382 singlenode := active && isLastManager(reachable, unreachable) 383 if active && !singlenode && removingManagerCausesLossOfQuorum(reachable, unreachable) { 384 logrus.Errorf("Leaving cluster with %v managers left out of %v. Raft quorum will be lost.", reachable-1, reachable+unreachable) 385 } 386 } 387 } 388 389 if err := node.Stop(); err != nil { 390 logrus.Errorf("failed to shut down cluster node: %v", err) 391 signal.DumpStacks("") 392 } 393 394 c.mu.Lock() 395 c.nr = nil 396 c.mu.Unlock() 397 } 398 399 func managerStats(client swarmapi.ControlClient, currentNodeID string) (current bool, reachable int, unreachable int, err error) { 400 ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) 401 defer cancel() 402 nodes, err := client.ListNodes(ctx, &swarmapi.ListNodesRequest{}) 403 if err != nil { 404 return false, 0, 0, err 405 } 406 for _, n := range nodes.Nodes { 407 if n.ManagerStatus != nil { 408 if n.ManagerStatus.Reachability == swarmapi.RaftMemberStatus_REACHABLE { 409 reachable++ 410 if n.ID == currentNodeID { 411 current = true 412 } 413 } 414 if n.ManagerStatus.Reachability == swarmapi.RaftMemberStatus_UNREACHABLE { 415 unreachable++ 416 } 417 } 418 } 419 return 420 } 421 422 func detectLockedError(err error) error { 423 if err == swarmnode.ErrInvalidUnlockKey { 424 return errors.WithStack(errSwarmLocked) 425 } 426 return err 427 } 428 429 func (c *Cluster) lockedManagerAction(fn func(ctx context.Context, state nodeState) error) error { 430 c.mu.RLock() 431 defer c.mu.RUnlock() 432 433 state := c.currentNodeState() 434 if !state.IsActiveManager() { 435 return c.errNoManager(state) 436 } 437 438 ctx, cancel := c.getRequestContext() 439 defer cancel() 440 441 return fn(ctx, state) 442 } 443 444 // SendClusterEvent allows to send cluster events on the configEvent channel 445 // TODO This method should not be exposed. 446 // Currently it is used to notify the network controller that the keys are 447 // available 448 func (c *Cluster) SendClusterEvent(event lncluster.ConfigEventType) { 449 c.mu.RLock() 450 defer c.mu.RUnlock() 451 c.configEvent <- event 452 }