github.com/zhouyu0/docker-note@v0.0.0-20190722021225-b8d3825084db/daemon/cluster/swarm.go (about) 1 package cluster // import "github.com/docker/docker/daemon/cluster" 2 3 import ( 4 "context" 5 "fmt" 6 "net" 7 "strings" 8 "time" 9 10 apitypes "github.com/docker/docker/api/types" 11 "github.com/docker/docker/api/types/filters" 12 types "github.com/docker/docker/api/types/swarm" 13 "github.com/docker/docker/daemon/cluster/convert" 14 "github.com/docker/docker/errdefs" 15 "github.com/docker/docker/opts" 16 "github.com/docker/docker/pkg/signal" 17 swarmapi "github.com/docker/swarmkit/api" 18 "github.com/docker/swarmkit/manager/encryption" 19 swarmnode "github.com/docker/swarmkit/node" 20 "github.com/pkg/errors" 21 "github.com/sirupsen/logrus" 22 ) 23 24 // Init initializes new cluster from user provided request. 25 func (c *Cluster) Init(req types.InitRequest) (string, error) { 26 c.controlMutex.Lock() 27 defer c.controlMutex.Unlock() 28 if c.nr != nil { 29 if req.ForceNewCluster { 30 31 // Take c.mu temporarily to wait for presently running 32 // API handlers to finish before shutting down the node. 33 c.mu.Lock() 34 if !c.nr.nodeState.IsManager() { 35 return "", errSwarmNotManager 36 } 37 c.mu.Unlock() 38 39 if err := c.nr.Stop(); err != nil { 40 return "", err 41 } 42 } else { 43 return "", errSwarmExists 44 } 45 } 46 47 if err := validateAndSanitizeInitRequest(&req); err != nil { 48 return "", errdefs.InvalidParameter(err) 49 } 50 51 listenHost, listenPort, err := resolveListenAddr(req.ListenAddr) 52 if err != nil { 53 return "", err 54 } 55 56 advertiseHost, advertisePort, err := c.resolveAdvertiseAddr(req.AdvertiseAddr, listenPort) 57 if err != nil { 58 return "", err 59 } 60 61 dataPathAddr, err := resolveDataPathAddr(req.DataPathAddr) 62 if err != nil { 63 return "", err 64 } 65 66 localAddr := listenHost 67 68 // If the local address is undetermined, the advertise address 69 // will be used as local address, if it belongs to this system. 70 // If the advertise address is not local, then we try to find 71 // a system address to use as local address. If this fails, 72 // we give up and ask the user to pass the listen address. 73 if net.ParseIP(localAddr).IsUnspecified() { 74 advertiseIP := net.ParseIP(advertiseHost) 75 76 found := false 77 for _, systemIP := range listSystemIPs() { 78 if systemIP.Equal(advertiseIP) { 79 localAddr = advertiseIP.String() 80 found = true 81 break 82 } 83 } 84 85 if !found { 86 ip, err := c.resolveSystemAddr() 87 if err != nil { 88 logrus.Warnf("Could not find a local address: %v", err) 89 return "", errMustSpecifyListenAddr 90 } 91 localAddr = ip.String() 92 } 93 } 94 95 //Validate Default Address Pool input 96 if err := validateDefaultAddrPool(req.DefaultAddrPool, req.SubnetSize); err != nil { 97 return "", err 98 } 99 nr, err := c.newNodeRunner(nodeStartConfig{ 100 forceNewCluster: req.ForceNewCluster, 101 autolock: req.AutoLockManagers, 102 LocalAddr: localAddr, 103 ListenAddr: net.JoinHostPort(listenHost, listenPort), 104 AdvertiseAddr: net.JoinHostPort(advertiseHost, advertisePort), 105 DataPathAddr: dataPathAddr, 106 DefaultAddressPool: req.DefaultAddrPool, 107 SubnetSize: req.SubnetSize, 108 availability: req.Availability, 109 }) 110 if err != nil { 111 return "", err 112 } 113 c.mu.Lock() 114 c.nr = nr 115 c.mu.Unlock() 116 117 if err := <-nr.Ready(); err != nil { 118 c.mu.Lock() 119 c.nr = nil 120 c.mu.Unlock() 121 if !req.ForceNewCluster { // if failure on first attempt don't keep state 122 if err := clearPersistentState(c.root); err != nil { 123 return "", err 124 } 125 } 126 return "", err 127 } 128 state := nr.State() 129 if state.swarmNode == nil { // should never happen but protect from panic 130 return "", errors.New("invalid cluster state for spec initialization") 131 } 132 if err := initClusterSpec(state.swarmNode, req.Spec); err != nil { 133 return "", err 134 } 135 return state.NodeID(), nil 136 } 137 138 // Join makes current Cluster part of an existing swarm cluster. 139 func (c *Cluster) Join(req types.JoinRequest) error { 140 c.controlMutex.Lock() 141 defer c.controlMutex.Unlock() 142 c.mu.Lock() 143 if c.nr != nil { 144 c.mu.Unlock() 145 return errors.WithStack(errSwarmExists) 146 } 147 c.mu.Unlock() 148 149 if err := validateAndSanitizeJoinRequest(&req); err != nil { 150 return errdefs.InvalidParameter(err) 151 } 152 153 listenHost, listenPort, err := resolveListenAddr(req.ListenAddr) 154 if err != nil { 155 return err 156 } 157 158 var advertiseAddr string 159 if req.AdvertiseAddr != "" { 160 advertiseHost, advertisePort, err := c.resolveAdvertiseAddr(req.AdvertiseAddr, listenPort) 161 // For joining, we don't need to provide an advertise address, 162 // since the remote side can detect it. 163 if err == nil { 164 advertiseAddr = net.JoinHostPort(advertiseHost, advertisePort) 165 } 166 } 167 168 dataPathAddr, err := resolveDataPathAddr(req.DataPathAddr) 169 if err != nil { 170 return err 171 } 172 173 nr, err := c.newNodeRunner(nodeStartConfig{ 174 RemoteAddr: req.RemoteAddrs[0], 175 ListenAddr: net.JoinHostPort(listenHost, listenPort), 176 AdvertiseAddr: advertiseAddr, 177 DataPathAddr: dataPathAddr, 178 joinAddr: req.RemoteAddrs[0], 179 joinToken: req.JoinToken, 180 availability: req.Availability, 181 }) 182 if err != nil { 183 return err 184 } 185 186 c.mu.Lock() 187 c.nr = nr 188 c.mu.Unlock() 189 190 select { 191 case <-time.After(swarmConnectTimeout): 192 return errSwarmJoinTimeoutReached 193 case err := <-nr.Ready(): 194 if err != nil { 195 c.mu.Lock() 196 c.nr = nil 197 c.mu.Unlock() 198 if err := clearPersistentState(c.root); err != nil { 199 return err 200 } 201 } 202 return err 203 } 204 } 205 206 // Inspect retrieves the configuration properties of a managed swarm cluster. 207 func (c *Cluster) Inspect() (types.Swarm, error) { 208 var swarm types.Swarm 209 if err := c.lockedManagerAction(func(ctx context.Context, state nodeState) error { 210 s, err := c.inspect(ctx, state) 211 if err != nil { 212 return err 213 } 214 swarm = s 215 return nil 216 }); err != nil { 217 return types.Swarm{}, err 218 } 219 return swarm, nil 220 } 221 222 func (c *Cluster) inspect(ctx context.Context, state nodeState) (types.Swarm, error) { 223 s, err := getSwarm(ctx, state.controlClient) 224 if err != nil { 225 return types.Swarm{}, err 226 } 227 return convert.SwarmFromGRPC(*s), nil 228 } 229 230 // Update updates configuration of a managed swarm cluster. 231 func (c *Cluster) Update(version uint64, spec types.Spec, flags types.UpdateFlags) error { 232 return c.lockedManagerAction(func(ctx context.Context, state nodeState) error { 233 swarm, err := getSwarm(ctx, state.controlClient) 234 if err != nil { 235 return err 236 } 237 238 // Validate spec name. 239 if spec.Annotations.Name == "" { 240 spec.Annotations.Name = "default" 241 } else if spec.Annotations.Name != "default" { 242 return errdefs.InvalidParameter(errors.New(`swarm spec must be named "default"`)) 243 } 244 245 // In update, client should provide the complete spec of the swarm, including 246 // Name and Labels. If a field is specified with 0 or nil, then the default value 247 // will be used to swarmkit. 248 clusterSpec, err := convert.SwarmSpecToGRPC(spec) 249 if err != nil { 250 return errdefs.InvalidParameter(err) 251 } 252 253 _, err = state.controlClient.UpdateCluster( 254 ctx, 255 &swarmapi.UpdateClusterRequest{ 256 ClusterID: swarm.ID, 257 Spec: &clusterSpec, 258 ClusterVersion: &swarmapi.Version{ 259 Index: version, 260 }, 261 Rotation: swarmapi.KeyRotation{ 262 WorkerJoinToken: flags.RotateWorkerToken, 263 ManagerJoinToken: flags.RotateManagerToken, 264 ManagerUnlockKey: flags.RotateManagerUnlockKey, 265 }, 266 }, 267 ) 268 return err 269 }) 270 } 271 272 // GetUnlockKey returns the unlock key for the swarm. 273 func (c *Cluster) GetUnlockKey() (string, error) { 274 var resp *swarmapi.GetUnlockKeyResponse 275 if err := c.lockedManagerAction(func(ctx context.Context, state nodeState) error { 276 client := swarmapi.NewCAClient(state.grpcConn) 277 278 r, err := client.GetUnlockKey(ctx, &swarmapi.GetUnlockKeyRequest{}) 279 if err != nil { 280 return err 281 } 282 resp = r 283 return nil 284 }); err != nil { 285 return "", err 286 } 287 if len(resp.UnlockKey) == 0 { 288 // no key 289 return "", nil 290 } 291 return encryption.HumanReadableKey(resp.UnlockKey), nil 292 } 293 294 // UnlockSwarm provides a key to decrypt data that is encrypted at rest. 295 func (c *Cluster) UnlockSwarm(req types.UnlockRequest) error { 296 c.controlMutex.Lock() 297 defer c.controlMutex.Unlock() 298 299 c.mu.RLock() 300 state := c.currentNodeState() 301 302 if !state.IsActiveManager() { 303 // when manager is not active, 304 // unless it is locked, otherwise return error. 305 if err := c.errNoManager(state); err != errSwarmLocked { 306 c.mu.RUnlock() 307 return err 308 } 309 } else { 310 // when manager is active, return an error of "not locked" 311 c.mu.RUnlock() 312 return notLockedError{} 313 } 314 315 // only when swarm is locked, code running reaches here 316 nr := c.nr 317 c.mu.RUnlock() 318 319 key, err := encryption.ParseHumanReadableKey(req.UnlockKey) 320 if err != nil { 321 return errdefs.InvalidParameter(err) 322 } 323 324 config := nr.config 325 config.lockKey = key 326 if err := nr.Stop(); err != nil { 327 return err 328 } 329 nr, err = c.newNodeRunner(config) 330 if err != nil { 331 return err 332 } 333 334 c.mu.Lock() 335 c.nr = nr 336 c.mu.Unlock() 337 338 if err := <-nr.Ready(); err != nil { 339 if errors.Cause(err) == errSwarmLocked { 340 return invalidUnlockKey{} 341 } 342 return errors.Errorf("swarm component could not be started: %v", err) 343 } 344 return nil 345 } 346 347 // Leave shuts down Cluster and removes current state. 348 func (c *Cluster) Leave(force bool) error { 349 c.controlMutex.Lock() 350 defer c.controlMutex.Unlock() 351 352 c.mu.Lock() 353 nr := c.nr 354 if nr == nil { 355 c.mu.Unlock() 356 return errors.WithStack(errNoSwarm) 357 } 358 359 state := c.currentNodeState() 360 361 c.mu.Unlock() 362 363 if errors.Cause(state.err) == errSwarmLocked && !force { 364 // leave a locked swarm without --force is not allowed 365 return errors.WithStack(notAvailableError("Swarm is encrypted and locked. Please unlock it first or use `--force` to ignore this message.")) 366 } 367 368 if state.IsManager() && !force { 369 msg := "You are attempting to leave the swarm on a node that is participating as a manager. " 370 if state.IsActiveManager() { 371 active, reachable, unreachable, err := managerStats(state.controlClient, state.NodeID()) 372 if err == nil { 373 if active && removingManagerCausesLossOfQuorum(reachable, unreachable) { 374 if isLastManager(reachable, unreachable) { 375 msg += "Removing the last manager erases all current state of the swarm. Use `--force` to ignore this message. " 376 return errors.WithStack(notAvailableError(msg)) 377 } 378 msg += fmt.Sprintf("Removing this node leaves %v managers out of %v. Without a Raft quorum your swarm will be inaccessible. ", reachable-1, reachable+unreachable) 379 } 380 } 381 } else { 382 msg += "Doing so may lose the consensus of your cluster. " 383 } 384 385 msg += "The only way to restore a swarm that has lost consensus is to reinitialize it with `--force-new-cluster`. Use `--force` to suppress this message." 386 return errors.WithStack(notAvailableError(msg)) 387 } 388 // release readers in here 389 if err := nr.Stop(); err != nil { 390 logrus.Errorf("failed to shut down cluster node: %v", err) 391 signal.DumpStacks("") 392 return err 393 } 394 395 c.mu.Lock() 396 c.nr = nil 397 c.mu.Unlock() 398 399 if nodeID := state.NodeID(); nodeID != "" { 400 nodeContainers, err := c.listContainerForNode(nodeID) 401 if err != nil { 402 return err 403 } 404 for _, id := range nodeContainers { 405 if err := c.config.Backend.ContainerRm(id, &apitypes.ContainerRmConfig{ForceRemove: true}); err != nil { 406 logrus.Errorf("error removing %v: %v", id, err) 407 } 408 } 409 } 410 411 // todo: cleanup optional? 412 if err := clearPersistentState(c.root); err != nil { 413 return err 414 } 415 c.config.Backend.DaemonLeavesCluster() 416 return nil 417 } 418 419 // Info returns information about the current cluster state. 420 func (c *Cluster) Info() types.Info { 421 info := types.Info{ 422 NodeAddr: c.GetAdvertiseAddress(), 423 } 424 c.mu.RLock() 425 defer c.mu.RUnlock() 426 427 state := c.currentNodeState() 428 info.LocalNodeState = state.status 429 if state.err != nil { 430 info.Error = state.err.Error() 431 } 432 433 ctx, cancel := c.getRequestContext() 434 defer cancel() 435 436 if state.IsActiveManager() { 437 info.ControlAvailable = true 438 swarm, err := c.inspect(ctx, state) 439 if err != nil { 440 info.Error = err.Error() 441 } 442 443 info.Cluster = &swarm.ClusterInfo 444 445 if r, err := state.controlClient.ListNodes(ctx, &swarmapi.ListNodesRequest{}); err != nil { 446 info.Error = err.Error() 447 } else { 448 info.Nodes = len(r.Nodes) 449 for _, n := range r.Nodes { 450 if n.ManagerStatus != nil { 451 info.Managers = info.Managers + 1 452 } 453 } 454 } 455 } 456 457 if state.swarmNode != nil { 458 for _, r := range state.swarmNode.Remotes() { 459 info.RemoteManagers = append(info.RemoteManagers, types.Peer{NodeID: r.NodeID, Addr: r.Addr}) 460 } 461 info.NodeID = state.swarmNode.NodeID() 462 } 463 464 return info 465 } 466 467 func validateAndSanitizeInitRequest(req *types.InitRequest) error { 468 var err error 469 req.ListenAddr, err = validateAddr(req.ListenAddr) 470 if err != nil { 471 return fmt.Errorf("invalid ListenAddr %q: %v", req.ListenAddr, err) 472 } 473 474 if req.Spec.Annotations.Name == "" { 475 req.Spec.Annotations.Name = "default" 476 } else if req.Spec.Annotations.Name != "default" { 477 return errors.New(`swarm spec must be named "default"`) 478 } 479 480 return nil 481 } 482 483 func validateAndSanitizeJoinRequest(req *types.JoinRequest) error { 484 var err error 485 req.ListenAddr, err = validateAddr(req.ListenAddr) 486 if err != nil { 487 return fmt.Errorf("invalid ListenAddr %q: %v", req.ListenAddr, err) 488 } 489 if len(req.RemoteAddrs) == 0 { 490 return errors.New("at least 1 RemoteAddr is required to join") 491 } 492 for i := range req.RemoteAddrs { 493 req.RemoteAddrs[i], err = validateAddr(req.RemoteAddrs[i]) 494 if err != nil { 495 return fmt.Errorf("invalid remoteAddr %q: %v", req.RemoteAddrs[i], err) 496 } 497 } 498 return nil 499 } 500 501 func validateAddr(addr string) (string, error) { 502 if addr == "" { 503 return addr, errors.New("invalid empty address") 504 } 505 newaddr, err := opts.ParseTCPAddr(addr, defaultAddr) 506 if err != nil { 507 return addr, nil 508 } 509 return strings.TrimPrefix(newaddr, "tcp://"), nil 510 } 511 512 func initClusterSpec(node *swarmnode.Node, spec types.Spec) error { 513 ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) 514 defer cancel() 515 for conn := range node.ListenControlSocket(ctx) { 516 if ctx.Err() != nil { 517 return ctx.Err() 518 } 519 if conn != nil { 520 client := swarmapi.NewControlClient(conn) 521 var cluster *swarmapi.Cluster 522 for i := 0; ; i++ { 523 lcr, err := client.ListClusters(ctx, &swarmapi.ListClustersRequest{}) 524 if err != nil { 525 return fmt.Errorf("error on listing clusters: %v", err) 526 } 527 if len(lcr.Clusters) == 0 { 528 if i < 10 { 529 time.Sleep(200 * time.Millisecond) 530 continue 531 } 532 return errors.New("empty list of clusters was returned") 533 } 534 cluster = lcr.Clusters[0] 535 break 536 } 537 // In init, we take the initial default values from swarmkit, and merge 538 // any non nil or 0 value from spec to GRPC spec. This will leave the 539 // default value alone. 540 // Note that this is different from Update(), as in Update() we expect 541 // user to specify the complete spec of the cluster (as they already know 542 // the existing one and knows which field to update) 543 clusterSpec, err := convert.MergeSwarmSpecToGRPC(spec, cluster.Spec) 544 if err != nil { 545 return fmt.Errorf("error updating cluster settings: %v", err) 546 } 547 _, err = client.UpdateCluster(ctx, &swarmapi.UpdateClusterRequest{ 548 ClusterID: cluster.ID, 549 ClusterVersion: &cluster.Meta.Version, 550 Spec: &clusterSpec, 551 }) 552 if err != nil { 553 return fmt.Errorf("error updating cluster settings: %v", err) 554 } 555 return nil 556 } 557 } 558 return ctx.Err() 559 } 560 561 func (c *Cluster) listContainerForNode(nodeID string) ([]string, error) { 562 var ids []string 563 filters := filters.NewArgs() 564 filters.Add("label", fmt.Sprintf("com.docker.swarm.node.id=%s", nodeID)) 565 containers, err := c.config.Backend.Containers(&apitypes.ContainerListOptions{ 566 Filters: filters, 567 }) 568 if err != nil { 569 return []string{}, err 570 } 571 for _, c := range containers { 572 ids = append(ids, c.ID) 573 } 574 return ids, nil 575 }