github.com/sams1990/dockerrepo@v17.12.1-ce-rc2+incompatible/daemon/cluster/swarm.go (about) 1 package cluster 2 3 import ( 4 "fmt" 5 "net" 6 "strings" 7 "time" 8 9 apitypes "github.com/docker/docker/api/types" 10 "github.com/docker/docker/api/types/filters" 11 types "github.com/docker/docker/api/types/swarm" 12 "github.com/docker/docker/daemon/cluster/convert" 13 "github.com/docker/docker/opts" 14 "github.com/docker/docker/pkg/signal" 15 swarmapi "github.com/docker/swarmkit/api" 16 "github.com/docker/swarmkit/manager/encryption" 17 swarmnode "github.com/docker/swarmkit/node" 18 "github.com/pkg/errors" 19 "github.com/sirupsen/logrus" 20 "golang.org/x/net/context" 21 ) 22 23 // Init initializes new cluster from user provided request. 24 func (c *Cluster) Init(req types.InitRequest) (string, error) { 25 c.controlMutex.Lock() 26 defer c.controlMutex.Unlock() 27 if c.nr != nil { 28 if req.ForceNewCluster { 29 30 // Take c.mu temporarily to wait for presently running 31 // API handlers to finish before shutting down the node. 32 c.mu.Lock() 33 if !c.nr.nodeState.IsManager() { 34 return "", errSwarmNotManager 35 } 36 c.mu.Unlock() 37 38 if err := c.nr.Stop(); err != nil { 39 return "", err 40 } 41 } else { 42 return "", errSwarmExists 43 } 44 } 45 46 if err := validateAndSanitizeInitRequest(&req); err != nil { 47 return "", validationError{err} 48 } 49 50 listenHost, listenPort, err := resolveListenAddr(req.ListenAddr) 51 if err != nil { 52 return "", err 53 } 54 55 advertiseHost, advertisePort, err := c.resolveAdvertiseAddr(req.AdvertiseAddr, listenPort) 56 if err != nil { 57 return "", err 58 } 59 60 dataPathAddr, err := resolveDataPathAddr(req.DataPathAddr) 61 if err != nil { 62 return "", err 63 } 64 65 localAddr := listenHost 66 67 // If the local address is undetermined, the advertise address 68 // will be used as local address, if it belongs to this system. 69 // If the advertise address is not local, then we try to find 70 // a system address to use as local address. If this fails, 71 // we give up and ask the user to pass the listen address. 72 if net.ParseIP(localAddr).IsUnspecified() { 73 advertiseIP := net.ParseIP(advertiseHost) 74 75 found := false 76 for _, systemIP := range listSystemIPs() { 77 if systemIP.Equal(advertiseIP) { 78 localAddr = advertiseIP.String() 79 found = true 80 break 81 } 82 } 83 84 if !found { 85 ip, err := c.resolveSystemAddr() 86 if err != nil { 87 logrus.Warnf("Could not find a local address: %v", err) 88 return "", errMustSpecifyListenAddr 89 } 90 localAddr = ip.String() 91 } 92 } 93 94 nr, err := c.newNodeRunner(nodeStartConfig{ 95 forceNewCluster: req.ForceNewCluster, 96 autolock: req.AutoLockManagers, 97 LocalAddr: localAddr, 98 ListenAddr: net.JoinHostPort(listenHost, listenPort), 99 AdvertiseAddr: net.JoinHostPort(advertiseHost, advertisePort), 100 DataPathAddr: dataPathAddr, 101 availability: req.Availability, 102 }) 103 if err != nil { 104 return "", err 105 } 106 c.mu.Lock() 107 c.nr = nr 108 c.mu.Unlock() 109 110 if err := <-nr.Ready(); err != nil { 111 c.mu.Lock() 112 c.nr = nil 113 c.mu.Unlock() 114 if !req.ForceNewCluster { // if failure on first attempt don't keep state 115 if err := clearPersistentState(c.root); err != nil { 116 return "", err 117 } 118 } 119 return "", err 120 } 121 state := nr.State() 122 if state.swarmNode == nil { // should never happen but protect from panic 123 return "", errors.New("invalid cluster state for spec initialization") 124 } 125 if err := initClusterSpec(state.swarmNode, req.Spec); err != nil { 126 return "", err 127 } 128 return state.NodeID(), nil 129 } 130 131 // Join makes current Cluster part of an existing swarm cluster. 132 func (c *Cluster) Join(req types.JoinRequest) error { 133 c.controlMutex.Lock() 134 defer c.controlMutex.Unlock() 135 c.mu.Lock() 136 if c.nr != nil { 137 c.mu.Unlock() 138 return errors.WithStack(errSwarmExists) 139 } 140 c.mu.Unlock() 141 142 if err := validateAndSanitizeJoinRequest(&req); err != nil { 143 return validationError{err} 144 } 145 146 listenHost, listenPort, err := resolveListenAddr(req.ListenAddr) 147 if err != nil { 148 return err 149 } 150 151 var advertiseAddr string 152 if req.AdvertiseAddr != "" { 153 advertiseHost, advertisePort, err := c.resolveAdvertiseAddr(req.AdvertiseAddr, listenPort) 154 // For joining, we don't need to provide an advertise address, 155 // since the remote side can detect it. 156 if err == nil { 157 advertiseAddr = net.JoinHostPort(advertiseHost, advertisePort) 158 } 159 } 160 161 dataPathAddr, err := resolveDataPathAddr(req.DataPathAddr) 162 if err != nil { 163 return err 164 } 165 166 nr, err := c.newNodeRunner(nodeStartConfig{ 167 RemoteAddr: req.RemoteAddrs[0], 168 ListenAddr: net.JoinHostPort(listenHost, listenPort), 169 AdvertiseAddr: advertiseAddr, 170 DataPathAddr: dataPathAddr, 171 joinAddr: req.RemoteAddrs[0], 172 joinToken: req.JoinToken, 173 availability: req.Availability, 174 }) 175 if err != nil { 176 return err 177 } 178 179 c.mu.Lock() 180 c.nr = nr 181 c.mu.Unlock() 182 183 select { 184 case <-time.After(swarmConnectTimeout): 185 return errSwarmJoinTimeoutReached 186 case err := <-nr.Ready(): 187 if err != nil { 188 c.mu.Lock() 189 c.nr = nil 190 c.mu.Unlock() 191 if err := clearPersistentState(c.root); err != nil { 192 return err 193 } 194 } 195 return err 196 } 197 } 198 199 // Inspect retrieves the configuration properties of a managed swarm cluster. 200 func (c *Cluster) Inspect() (types.Swarm, error) { 201 var swarm types.Swarm 202 if err := c.lockedManagerAction(func(ctx context.Context, state nodeState) error { 203 s, err := c.inspect(ctx, state) 204 if err != nil { 205 return err 206 } 207 swarm = s 208 return nil 209 }); err != nil { 210 return types.Swarm{}, err 211 } 212 return swarm, nil 213 } 214 215 func (c *Cluster) inspect(ctx context.Context, state nodeState) (types.Swarm, error) { 216 s, err := getSwarm(ctx, state.controlClient) 217 if err != nil { 218 return types.Swarm{}, err 219 } 220 return convert.SwarmFromGRPC(*s), nil 221 } 222 223 // Update updates configuration of a managed swarm cluster. 224 func (c *Cluster) Update(version uint64, spec types.Spec, flags types.UpdateFlags) error { 225 return c.lockedManagerAction(func(ctx context.Context, state nodeState) error { 226 swarm, err := getSwarm(ctx, state.controlClient) 227 if err != nil { 228 return err 229 } 230 231 // Validate spec name. 232 if spec.Annotations.Name == "" { 233 spec.Annotations.Name = "default" 234 } else if spec.Annotations.Name != "default" { 235 return validationError{errors.New(`swarm spec must be named "default"`)} 236 } 237 238 // In update, client should provide the complete spec of the swarm, including 239 // Name and Labels. If a field is specified with 0 or nil, then the default value 240 // will be used to swarmkit. 241 clusterSpec, err := convert.SwarmSpecToGRPC(spec) 242 if err != nil { 243 return convertError{err} 244 } 245 246 _, err = state.controlClient.UpdateCluster( 247 ctx, 248 &swarmapi.UpdateClusterRequest{ 249 ClusterID: swarm.ID, 250 Spec: &clusterSpec, 251 ClusterVersion: &swarmapi.Version{ 252 Index: version, 253 }, 254 Rotation: swarmapi.KeyRotation{ 255 WorkerJoinToken: flags.RotateWorkerToken, 256 ManagerJoinToken: flags.RotateManagerToken, 257 ManagerUnlockKey: flags.RotateManagerUnlockKey, 258 }, 259 }, 260 ) 261 return err 262 }) 263 } 264 265 // GetUnlockKey returns the unlock key for the swarm. 266 func (c *Cluster) GetUnlockKey() (string, error) { 267 var resp *swarmapi.GetUnlockKeyResponse 268 if err := c.lockedManagerAction(func(ctx context.Context, state nodeState) error { 269 client := swarmapi.NewCAClient(state.grpcConn) 270 271 r, err := client.GetUnlockKey(ctx, &swarmapi.GetUnlockKeyRequest{}) 272 if err != nil { 273 return err 274 } 275 resp = r 276 return nil 277 }); err != nil { 278 return "", err 279 } 280 if len(resp.UnlockKey) == 0 { 281 // no key 282 return "", nil 283 } 284 return encryption.HumanReadableKey(resp.UnlockKey), nil 285 } 286 287 // UnlockSwarm provides a key to decrypt data that is encrypted at rest. 288 func (c *Cluster) UnlockSwarm(req types.UnlockRequest) error { 289 c.controlMutex.Lock() 290 defer c.controlMutex.Unlock() 291 292 c.mu.RLock() 293 state := c.currentNodeState() 294 295 if !state.IsActiveManager() { 296 // when manager is not active, 297 // unless it is locked, otherwise return error. 298 if err := c.errNoManager(state); err != errSwarmLocked { 299 c.mu.RUnlock() 300 return err 301 } 302 } else { 303 // when manager is active, return an error of "not locked" 304 c.mu.RUnlock() 305 return notLockedError{} 306 } 307 308 // only when swarm is locked, code running reaches here 309 nr := c.nr 310 c.mu.RUnlock() 311 312 key, err := encryption.ParseHumanReadableKey(req.UnlockKey) 313 if err != nil { 314 return validationError{err} 315 } 316 317 config := nr.config 318 config.lockKey = key 319 if err := nr.Stop(); err != nil { 320 return err 321 } 322 nr, err = c.newNodeRunner(config) 323 if err != nil { 324 return err 325 } 326 327 c.mu.Lock() 328 c.nr = nr 329 c.mu.Unlock() 330 331 if err := <-nr.Ready(); err != nil { 332 if errors.Cause(err) == errSwarmLocked { 333 return invalidUnlockKey{} 334 } 335 return errors.Errorf("swarm component could not be started: %v", err) 336 } 337 return nil 338 } 339 340 // Leave shuts down Cluster and removes current state. 341 func (c *Cluster) Leave(force bool) error { 342 c.controlMutex.Lock() 343 defer c.controlMutex.Unlock() 344 345 c.mu.Lock() 346 nr := c.nr 347 if nr == nil { 348 c.mu.Unlock() 349 return errors.WithStack(errNoSwarm) 350 } 351 352 state := c.currentNodeState() 353 354 c.mu.Unlock() 355 356 if errors.Cause(state.err) == errSwarmLocked && !force { 357 // leave a locked swarm without --force is not allowed 358 return errors.WithStack(notAvailableError("Swarm is encrypted and locked. Please unlock it first or use `--force` to ignore this message.")) 359 } 360 361 if state.IsManager() && !force { 362 msg := "You are attempting to leave the swarm on a node that is participating as a manager. " 363 if state.IsActiveManager() { 364 active, reachable, unreachable, err := managerStats(state.controlClient, state.NodeID()) 365 if err == nil { 366 if active && removingManagerCausesLossOfQuorum(reachable, unreachable) { 367 if isLastManager(reachable, unreachable) { 368 msg += "Removing the last manager erases all current state of the swarm. Use `--force` to ignore this message. " 369 return errors.WithStack(notAvailableError(msg)) 370 } 371 msg += fmt.Sprintf("Removing this node leaves %v managers out of %v. Without a Raft quorum your swarm will be inaccessible. ", reachable-1, reachable+unreachable) 372 } 373 } 374 } else { 375 msg += "Doing so may lose the consensus of your cluster. " 376 } 377 378 msg += "The only way to restore a swarm that has lost consensus is to reinitialize it with `--force-new-cluster`. Use `--force` to suppress this message." 379 return errors.WithStack(notAvailableError(msg)) 380 } 381 // release readers in here 382 if err := nr.Stop(); err != nil { 383 logrus.Errorf("failed to shut down cluster node: %v", err) 384 signal.DumpStacks("") 385 return err 386 } 387 388 c.mu.Lock() 389 c.nr = nil 390 c.mu.Unlock() 391 392 if nodeID := state.NodeID(); nodeID != "" { 393 nodeContainers, err := c.listContainerForNode(nodeID) 394 if err != nil { 395 return err 396 } 397 for _, id := range nodeContainers { 398 if err := c.config.Backend.ContainerRm(id, &apitypes.ContainerRmConfig{ForceRemove: true}); err != nil { 399 logrus.Errorf("error removing %v: %v", id, err) 400 } 401 } 402 } 403 404 // todo: cleanup optional? 405 if err := clearPersistentState(c.root); err != nil { 406 return err 407 } 408 c.config.Backend.DaemonLeavesCluster() 409 return nil 410 } 411 412 // Info returns information about the current cluster state. 413 func (c *Cluster) Info() types.Info { 414 info := types.Info{ 415 NodeAddr: c.GetAdvertiseAddress(), 416 } 417 c.mu.RLock() 418 defer c.mu.RUnlock() 419 420 state := c.currentNodeState() 421 info.LocalNodeState = state.status 422 if state.err != nil { 423 info.Error = state.err.Error() 424 } 425 426 ctx, cancel := c.getRequestContext() 427 defer cancel() 428 429 if state.IsActiveManager() { 430 info.ControlAvailable = true 431 swarm, err := c.inspect(ctx, state) 432 if err != nil { 433 info.Error = err.Error() 434 } 435 436 info.Cluster = &swarm.ClusterInfo 437 438 if r, err := state.controlClient.ListNodes(ctx, &swarmapi.ListNodesRequest{}); err != nil { 439 info.Error = err.Error() 440 } else { 441 info.Nodes = len(r.Nodes) 442 for _, n := range r.Nodes { 443 if n.ManagerStatus != nil { 444 info.Managers = info.Managers + 1 445 } 446 } 447 } 448 } 449 450 if state.swarmNode != nil { 451 for _, r := range state.swarmNode.Remotes() { 452 info.RemoteManagers = append(info.RemoteManagers, types.Peer{NodeID: r.NodeID, Addr: r.Addr}) 453 } 454 info.NodeID = state.swarmNode.NodeID() 455 } 456 457 return info 458 } 459 460 func validateAndSanitizeInitRequest(req *types.InitRequest) error { 461 var err error 462 req.ListenAddr, err = validateAddr(req.ListenAddr) 463 if err != nil { 464 return fmt.Errorf("invalid ListenAddr %q: %v", req.ListenAddr, err) 465 } 466 467 if req.Spec.Annotations.Name == "" { 468 req.Spec.Annotations.Name = "default" 469 } else if req.Spec.Annotations.Name != "default" { 470 return errors.New(`swarm spec must be named "default"`) 471 } 472 473 return nil 474 } 475 476 func validateAndSanitizeJoinRequest(req *types.JoinRequest) error { 477 var err error 478 req.ListenAddr, err = validateAddr(req.ListenAddr) 479 if err != nil { 480 return fmt.Errorf("invalid ListenAddr %q: %v", req.ListenAddr, err) 481 } 482 if len(req.RemoteAddrs) == 0 { 483 return errors.New("at least 1 RemoteAddr is required to join") 484 } 485 for i := range req.RemoteAddrs { 486 req.RemoteAddrs[i], err = validateAddr(req.RemoteAddrs[i]) 487 if err != nil { 488 return fmt.Errorf("invalid remoteAddr %q: %v", req.RemoteAddrs[i], err) 489 } 490 } 491 return nil 492 } 493 494 func validateAddr(addr string) (string, error) { 495 if addr == "" { 496 return addr, errors.New("invalid empty address") 497 } 498 newaddr, err := opts.ParseTCPAddr(addr, defaultAddr) 499 if err != nil { 500 return addr, nil 501 } 502 return strings.TrimPrefix(newaddr, "tcp://"), nil 503 } 504 505 func initClusterSpec(node *swarmnode.Node, spec types.Spec) error { 506 ctx, _ := context.WithTimeout(context.Background(), 5*time.Second) 507 for conn := range node.ListenControlSocket(ctx) { 508 if ctx.Err() != nil { 509 return ctx.Err() 510 } 511 if conn != nil { 512 client := swarmapi.NewControlClient(conn) 513 var cluster *swarmapi.Cluster 514 for i := 0; ; i++ { 515 lcr, err := client.ListClusters(ctx, &swarmapi.ListClustersRequest{}) 516 if err != nil { 517 return fmt.Errorf("error on listing clusters: %v", err) 518 } 519 if len(lcr.Clusters) == 0 { 520 if i < 10 { 521 time.Sleep(200 * time.Millisecond) 522 continue 523 } 524 return errors.New("empty list of clusters was returned") 525 } 526 cluster = lcr.Clusters[0] 527 break 528 } 529 // In init, we take the initial default values from swarmkit, and merge 530 // any non nil or 0 value from spec to GRPC spec. This will leave the 531 // default value alone. 532 // Note that this is different from Update(), as in Update() we expect 533 // user to specify the complete spec of the cluster (as they already know 534 // the existing one and knows which field to update) 535 clusterSpec, err := convert.MergeSwarmSpecToGRPC(spec, cluster.Spec) 536 if err != nil { 537 return fmt.Errorf("error updating cluster settings: %v", err) 538 } 539 _, err = client.UpdateCluster(ctx, &swarmapi.UpdateClusterRequest{ 540 ClusterID: cluster.ID, 541 ClusterVersion: &cluster.Meta.Version, 542 Spec: &clusterSpec, 543 }) 544 if err != nil { 545 return fmt.Errorf("error updating cluster settings: %v", err) 546 } 547 return nil 548 } 549 } 550 return ctx.Err() 551 } 552 553 func (c *Cluster) listContainerForNode(nodeID string) ([]string, error) { 554 var ids []string 555 filters := filters.NewArgs() 556 filters.Add("label", fmt.Sprintf("com.docker.swarm.node.id=%s", nodeID)) 557 containers, err := c.config.Backend.Containers(&apitypes.ContainerListOptions{ 558 Filters: filters, 559 }) 560 if err != nil { 561 return []string{}, err 562 } 563 for _, c := range containers { 564 ids = append(ids, c.ID) 565 } 566 return ids, nil 567 }