github.com/demonoid81/moby@v0.0.0-20200517203328-62dd8e17c460/daemon/cluster/cluster.go (about) 1 package cluster // import "github.com/demonoid81/moby/daemon/cluster" 2 3 // 4 // ## Swarmkit integration 5 // 6 // Cluster - static configurable object for accessing everything swarm related. 7 // Contains methods for connecting and controlling the cluster. Exists always, 8 // even if swarm mode is not enabled. 9 // 10 // NodeRunner - Manager for starting the swarmkit node. Is present only and 11 // always if swarm mode is enabled. Implements backoff restart loop in case of 12 // errors. 13 // 14 // NodeState - Information about the current node status including access to 15 // gRPC clients if a manager is active. 16 // 17 // ### Locking 18 // 19 // `cluster.controlMutex` - taken for the whole lifecycle of the processes that 20 // can reconfigure cluster(init/join/leave etc). Protects that one 21 // reconfiguration action has fully completed before another can start. 22 // 23 // `cluster.mu` - taken when the actual changes in cluster configurations 24 // happen. Different from `controlMutex` because in some cases we need to 25 // access current cluster state even if the long-running reconfiguration is 26 // going on. For example network stack may ask for the current cluster state in 27 // the middle of the shutdown. Any time current cluster state is asked you 28 // should take the read lock of `cluster.mu`. If you are writing an API 29 // responder that returns synchronously, hold `cluster.mu.RLock()` for the 30 // duration of the whole handler function. That ensures that node will not be 31 // shut down until the handler has finished. 32 // 33 // NodeRunner implements its internal locks that should not be used outside of 34 // the struct. Instead, you should just call `nodeRunner.State()` method to get 35 // the current state of the cluster(still need `cluster.mu.RLock()` to access 36 // `cluster.nr` reference itself). Most of the changes in NodeRunner happen 37 // because of an external event(network problem, unexpected swarmkit error) and 38 // Docker shouldn't take any locks that delay these changes from happening. 39 // 40 41 import ( 42 "context" 43 "fmt" 44 "math" 45 "net" 46 "os" 47 "path/filepath" 48 "runtime" 49 "sync" 50 "time" 51 52 "github.com/demonoid81/moby/api/types/network" 53 types "github.com/demonoid81/moby/api/types/swarm" 54 "github.com/demonoid81/moby/daemon/cluster/controllers/plugin" 55 executorpkg "github.com/demonoid81/moby/daemon/cluster/executor" 56 "github.com/demonoid81/moby/pkg/signal" 57 lncluster "github.com/demonoid81/libnetwork/cluster" 58 swarmapi "github.com/docker/swarmkit/api" 59 swarmnode "github.com/docker/swarmkit/node" 60 "github.com/pkg/errors" 61 "github.com/sirupsen/logrus" 62 "google.golang.org/grpc" 63 ) 64 65 const ( 66 swarmDirName = "swarm" 67 controlSocket = "control.sock" 68 swarmConnectTimeout = 20 * time.Second 69 swarmRequestTimeout = 20 * time.Second 70 stateFile = "docker-state.json" 71 defaultAddr = "0.0.0.0:2377" 72 isWindows = runtime.GOOS == "windows" 73 initialReconnectDelay = 100 * time.Millisecond 74 maxReconnectDelay = 30 * time.Second 75 contextPrefix = "com.docker.swarm" 76 defaultRecvSizeForListResponse = math.MaxInt32 // the max recv limit grpc <1.4.0 77 ) 78 79 // NetworkSubnetsProvider exposes functions for retrieving the subnets 80 // of networks managed by Docker, so they can be filtered. 81 type NetworkSubnetsProvider interface { 82 Subnets() ([]net.IPNet, []net.IPNet) 83 } 84 85 // Config provides values for Cluster. 86 type Config struct { 87 Root string 88 Name string 89 Backend executorpkg.Backend 90 ImageBackend executorpkg.ImageBackend 91 PluginBackend plugin.Backend 92 VolumeBackend executorpkg.VolumeBackend 93 NetworkSubnetsProvider NetworkSubnetsProvider 94 95 // DefaultAdvertiseAddr is the default host/IP or network interface to use 96 // if no AdvertiseAddr value is specified. 97 DefaultAdvertiseAddr string 98 99 // path to store runtime state, such as the swarm control socket 100 RuntimeRoot string 101 102 // WatchStream is a channel to pass watch API notifications to daemon 103 WatchStream chan *swarmapi.WatchMessage 104 105 // RaftHeartbeatTick is the number of ticks for heartbeat of quorum members 106 RaftHeartbeatTick uint32 107 108 // RaftElectionTick is the number of ticks to elapse before followers propose a new round of leader election 109 // This value should be 10x that of RaftHeartbeatTick 110 RaftElectionTick uint32 111 } 112 113 // Cluster provides capabilities to participate in a cluster as a worker or a 114 // manager. 115 type Cluster struct { 116 mu sync.RWMutex 117 controlMutex sync.RWMutex // protect init/join/leave user operations 118 nr *nodeRunner 119 root string 120 runtimeRoot string 121 config Config 122 configEvent chan lncluster.ConfigEventType // todo: make this array and goroutine safe 123 attachers map[string]*attacher 124 watchStream chan *swarmapi.WatchMessage 125 } 126 127 // attacher manages the in-memory attachment state of a container 128 // attachment to a global scope network managed by swarm manager. It 129 // helps in identifying the attachment ID via the taskID and the 130 // corresponding attachment configuration obtained from the manager. 131 type attacher struct { 132 taskID string 133 config *network.NetworkingConfig 134 inProgress bool 135 attachWaitCh chan *network.NetworkingConfig 136 attachCompleteCh chan struct{} 137 detachWaitCh chan struct{} 138 } 139 140 // New creates a new Cluster instance using provided config. 141 func New(config Config) (*Cluster, error) { 142 root := filepath.Join(config.Root, swarmDirName) 143 if err := os.MkdirAll(root, 0700); err != nil { 144 return nil, err 145 } 146 if config.RuntimeRoot == "" { 147 config.RuntimeRoot = root 148 } 149 if config.RaftHeartbeatTick == 0 { 150 config.RaftHeartbeatTick = 1 151 } 152 if config.RaftElectionTick == 0 { 153 // 10X heartbeat tick is the recommended ratio according to etcd docs. 154 config.RaftElectionTick = 10 * config.RaftHeartbeatTick 155 } 156 157 if err := os.MkdirAll(config.RuntimeRoot, 0700); err != nil { 158 return nil, err 159 } 160 c := &Cluster{ 161 root: root, 162 config: config, 163 configEvent: make(chan lncluster.ConfigEventType, 10), 164 runtimeRoot: config.RuntimeRoot, 165 attachers: make(map[string]*attacher), 166 watchStream: config.WatchStream, 167 } 168 return c, nil 169 } 170 171 // Start the Cluster instance 172 // TODO The split between New and Start can be join again when the SendClusterEvent 173 // method is no longer required 174 func (c *Cluster) Start() error { 175 root := filepath.Join(c.config.Root, swarmDirName) 176 177 nodeConfig, err := loadPersistentState(root) 178 if err != nil { 179 if os.IsNotExist(err) { 180 return nil 181 } 182 return err 183 } 184 185 nr, err := c.newNodeRunner(*nodeConfig) 186 if err != nil { 187 return err 188 } 189 c.nr = nr 190 191 timer := time.NewTimer(swarmConnectTimeout) 192 defer timer.Stop() 193 194 select { 195 case <-timer.C: 196 logrus.Error("swarm component could not be started before timeout was reached") 197 case err := <-nr.Ready(): 198 if err != nil { 199 logrus.WithError(err).Error("swarm component could not be started") 200 return nil 201 } 202 } 203 return nil 204 } 205 206 func (c *Cluster) newNodeRunner(conf nodeStartConfig) (*nodeRunner, error) { 207 if err := c.config.Backend.IsSwarmCompatible(); err != nil { 208 return nil, err 209 } 210 211 actualLocalAddr := conf.LocalAddr 212 if actualLocalAddr == "" { 213 // If localAddr was not specified, resolve it automatically 214 // based on the route to joinAddr. localAddr can only be left 215 // empty on "join". 216 listenHost, _, err := net.SplitHostPort(conf.ListenAddr) 217 if err != nil { 218 return nil, fmt.Errorf("could not parse listen address: %v", err) 219 } 220 221 listenAddrIP := net.ParseIP(listenHost) 222 if listenAddrIP == nil || !listenAddrIP.IsUnspecified() { 223 actualLocalAddr = listenHost 224 } else { 225 if conf.RemoteAddr == "" { 226 // Should never happen except using swarms created by 227 // old versions that didn't save remoteAddr. 228 conf.RemoteAddr = "8.8.8.8:53" 229 } 230 conn, err := net.Dial("udp", conf.RemoteAddr) 231 if err != nil { 232 return nil, fmt.Errorf("could not find local IP address: %v", err) 233 } 234 localHostPort := conn.LocalAddr().String() 235 actualLocalAddr, _, _ = net.SplitHostPort(localHostPort) 236 conn.Close() 237 } 238 } 239 240 nr := &nodeRunner{cluster: c} 241 nr.actualLocalAddr = actualLocalAddr 242 243 if err := nr.Start(conf); err != nil { 244 return nil, err 245 } 246 247 c.config.Backend.DaemonJoinsCluster(c) 248 249 return nr, nil 250 } 251 252 func (c *Cluster) getRequestContext() (context.Context, func()) { // TODO: not needed when requests don't block on qourum lost 253 return context.WithTimeout(context.Background(), swarmRequestTimeout) 254 } 255 256 // IsManager returns true if Cluster is participating as a manager. 257 func (c *Cluster) IsManager() bool { 258 c.mu.RLock() 259 defer c.mu.RUnlock() 260 return c.currentNodeState().IsActiveManager() 261 } 262 263 // IsAgent returns true if Cluster is participating as a worker/agent. 264 func (c *Cluster) IsAgent() bool { 265 c.mu.RLock() 266 defer c.mu.RUnlock() 267 return c.currentNodeState().status == types.LocalNodeStateActive 268 } 269 270 // GetLocalAddress returns the local address. 271 func (c *Cluster) GetLocalAddress() string { 272 c.mu.RLock() 273 defer c.mu.RUnlock() 274 return c.currentNodeState().actualLocalAddr 275 } 276 277 // GetListenAddress returns the listen address. 278 func (c *Cluster) GetListenAddress() string { 279 c.mu.RLock() 280 defer c.mu.RUnlock() 281 if c.nr != nil { 282 return c.nr.config.ListenAddr 283 } 284 return "" 285 } 286 287 // GetAdvertiseAddress returns the remotely reachable address of this node. 288 func (c *Cluster) GetAdvertiseAddress() string { 289 c.mu.RLock() 290 defer c.mu.RUnlock() 291 if c.nr != nil && c.nr.config.AdvertiseAddr != "" { 292 advertiseHost, _, _ := net.SplitHostPort(c.nr.config.AdvertiseAddr) 293 return advertiseHost 294 } 295 return c.currentNodeState().actualLocalAddr 296 } 297 298 // GetDataPathAddress returns the address to be used for the data path traffic, if specified. 299 func (c *Cluster) GetDataPathAddress() string { 300 c.mu.RLock() 301 defer c.mu.RUnlock() 302 if c.nr != nil { 303 return c.nr.config.DataPathAddr 304 } 305 return "" 306 } 307 308 // GetRemoteAddressList returns the advertise address for each of the remote managers if 309 // available. 310 func (c *Cluster) GetRemoteAddressList() []string { 311 c.mu.RLock() 312 defer c.mu.RUnlock() 313 return c.getRemoteAddressList() 314 } 315 316 // GetWatchStream returns the channel to pass changes from store watch API 317 func (c *Cluster) GetWatchStream() chan *swarmapi.WatchMessage { 318 c.mu.RLock() 319 defer c.mu.RUnlock() 320 return c.watchStream 321 } 322 323 func (c *Cluster) getRemoteAddressList() []string { 324 state := c.currentNodeState() 325 if state.swarmNode == nil { 326 return []string{} 327 } 328 329 nodeID := state.swarmNode.NodeID() 330 remotes := state.swarmNode.Remotes() 331 addressList := make([]string, 0, len(remotes)) 332 for _, r := range remotes { 333 if r.NodeID != nodeID { 334 addressList = append(addressList, r.Addr) 335 } 336 } 337 return addressList 338 } 339 340 // ListenClusterEvents returns a channel that receives messages on cluster 341 // participation changes. 342 // todo: make cancelable and accessible to multiple callers 343 func (c *Cluster) ListenClusterEvents() <-chan lncluster.ConfigEventType { 344 return c.configEvent 345 } 346 347 // currentNodeState should not be called without a read lock 348 func (c *Cluster) currentNodeState() nodeState { 349 return c.nr.State() 350 } 351 352 // errNoManager returns error describing why manager commands can't be used. 353 // Call with read lock. 354 func (c *Cluster) errNoManager(st nodeState) error { 355 if st.swarmNode == nil { 356 if errors.Is(st.err, errSwarmLocked) { 357 return errSwarmLocked 358 } 359 if st.err == errSwarmCertificatesExpired { 360 return errSwarmCertificatesExpired 361 } 362 return errors.WithStack(notAvailableError("This node is not a swarm manager. Use \"docker swarm init\" or \"docker swarm join\" to connect this node to swarm and try again.")) 363 } 364 if st.swarmNode.Manager() != nil { 365 return errors.WithStack(notAvailableError("This node is not a swarm manager. Manager is being prepared or has trouble connecting to the cluster.")) 366 } 367 return errors.WithStack(notAvailableError("This node is not a swarm manager. Worker nodes can't be used to view or modify cluster state. Please run this command on a manager node or promote the current node to a manager.")) 368 } 369 370 // Cleanup stops active swarm node. This is run before daemon shutdown. 371 func (c *Cluster) Cleanup() { 372 c.controlMutex.Lock() 373 defer c.controlMutex.Unlock() 374 375 c.mu.Lock() 376 node := c.nr 377 if node == nil { 378 c.mu.Unlock() 379 return 380 } 381 state := c.currentNodeState() 382 c.mu.Unlock() 383 384 if state.IsActiveManager() { 385 active, reachable, unreachable, err := managerStats(state.controlClient, state.NodeID()) 386 if err == nil { 387 singlenode := active && isLastManager(reachable, unreachable) 388 if active && !singlenode && removingManagerCausesLossOfQuorum(reachable, unreachable) { 389 logrus.Errorf("Leaving cluster with %v managers left out of %v. Raft quorum will be lost.", reachable-1, reachable+unreachable) 390 } 391 } 392 } 393 394 if err := node.Stop(); err != nil { 395 logrus.Errorf("failed to shut down cluster node: %v", err) 396 signal.DumpStacks("") 397 } 398 399 c.mu.Lock() 400 c.nr = nil 401 c.mu.Unlock() 402 } 403 404 func managerStats(client swarmapi.ControlClient, currentNodeID string) (current bool, reachable int, unreachable int, err error) { 405 ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) 406 defer cancel() 407 nodes, err := client.ListNodes( 408 ctx, &swarmapi.ListNodesRequest{}, 409 grpc.MaxCallRecvMsgSize(defaultRecvSizeForListResponse), 410 ) 411 if err != nil { 412 return false, 0, 0, err 413 } 414 for _, n := range nodes.Nodes { 415 if n.ManagerStatus != nil { 416 if n.ManagerStatus.Reachability == swarmapi.RaftMemberStatus_REACHABLE { 417 reachable++ 418 if n.ID == currentNodeID { 419 current = true 420 } 421 } 422 if n.ManagerStatus.Reachability == swarmapi.RaftMemberStatus_UNREACHABLE { 423 unreachable++ 424 } 425 } 426 } 427 return 428 } 429 430 func detectLockedError(err error) error { 431 if err == swarmnode.ErrInvalidUnlockKey { 432 return errors.WithStack(errSwarmLocked) 433 } 434 return err 435 } 436 437 func (c *Cluster) lockedManagerAction(fn func(ctx context.Context, state nodeState) error) error { 438 c.mu.RLock() 439 defer c.mu.RUnlock() 440 441 state := c.currentNodeState() 442 if !state.IsActiveManager() { 443 return c.errNoManager(state) 444 } 445 446 ctx, cancel := c.getRequestContext() 447 defer cancel() 448 449 return fn(ctx, state) 450 } 451 452 // SendClusterEvent allows to send cluster events on the configEvent channel 453 // TODO This method should not be exposed. 454 // Currently it is used to notify the network controller that the keys are 455 // available 456 func (c *Cluster) SendClusterEvent(event lncluster.ConfigEventType) { 457 c.mu.RLock() 458 defer c.mu.RUnlock() 459 c.configEvent <- event 460 }