github.com/fabiokung/docker@v0.11.2-0.20170222101415-4534dcd49497/daemon/cluster/swarm.go (about) 1 package cluster 2 3 import ( 4 "fmt" 5 "net" 6 "strings" 7 "time" 8 9 "github.com/Sirupsen/logrus" 10 apierrors "github.com/docker/docker/api/errors" 11 apitypes "github.com/docker/docker/api/types" 12 "github.com/docker/docker/api/types/filters" 13 types "github.com/docker/docker/api/types/swarm" 14 "github.com/docker/docker/daemon/cluster/convert" 15 "github.com/docker/docker/opts" 16 "github.com/docker/docker/pkg/signal" 17 swarmapi "github.com/docker/swarmkit/api" 18 "github.com/docker/swarmkit/manager/encryption" 19 swarmnode "github.com/docker/swarmkit/node" 20 "github.com/pkg/errors" 21 "golang.org/x/net/context" 22 ) 23 24 // Init initializes new cluster from user provided request. 25 func (c *Cluster) Init(req types.InitRequest) (string, error) { 26 c.controlMutex.Lock() 27 defer c.controlMutex.Unlock() 28 c.mu.Lock() 29 if c.nr != nil { 30 if req.ForceNewCluster { 31 if err := c.nr.Stop(); err != nil { 32 c.mu.Unlock() 33 return "", err 34 } 35 } else { 36 c.mu.Unlock() 37 return "", errSwarmExists 38 } 39 } 40 c.mu.Unlock() 41 42 if err := validateAndSanitizeInitRequest(&req); err != nil { 43 return "", apierrors.NewBadRequestError(err) 44 } 45 46 listenHost, listenPort, err := resolveListenAddr(req.ListenAddr) 47 if err != nil { 48 return "", err 49 } 50 51 advertiseHost, advertisePort, err := c.resolveAdvertiseAddr(req.AdvertiseAddr, listenPort) 52 if err != nil { 53 return "", err 54 } 55 56 localAddr := listenHost 57 58 // If the local address is undetermined, the advertise address 59 // will be used as local address, if it belongs to this system. 60 // If the advertise address is not local, then we try to find 61 // a system address to use as local address. If this fails, 62 // we give up and ask the user to pass the listen address. 63 if net.ParseIP(localAddr).IsUnspecified() { 64 advertiseIP := net.ParseIP(advertiseHost) 65 66 found := false 67 for _, systemIP := range listSystemIPs() { 68 if systemIP.Equal(advertiseIP) { 69 localAddr = advertiseIP.String() 70 found = true 71 break 72 } 73 } 74 75 if !found { 76 ip, err := c.resolveSystemAddr() 77 if err != nil { 78 logrus.Warnf("Could not find a local address: %v", err) 79 return "", errMustSpecifyListenAddr 80 } 81 localAddr = ip.String() 82 } 83 } 84 85 if !req.ForceNewCluster { 86 clearPersistentState(c.root) 87 } 88 89 nr, err := c.newNodeRunner(nodeStartConfig{ 90 forceNewCluster: req.ForceNewCluster, 91 autolock: req.AutoLockManagers, 92 LocalAddr: localAddr, 93 ListenAddr: net.JoinHostPort(listenHost, listenPort), 94 AdvertiseAddr: net.JoinHostPort(advertiseHost, advertisePort), 95 availability: req.Availability, 96 }) 97 if err != nil { 98 return "", err 99 } 100 c.mu.Lock() 101 c.nr = nr 102 c.mu.Unlock() 103 104 if err := <-nr.Ready(); err != nil { 105 if !req.ForceNewCluster { // if failure on first attempt don't keep state 106 if err := clearPersistentState(c.root); err != nil { 107 return "", err 108 } 109 } 110 if err != nil { 111 c.mu.Lock() 112 c.nr = nil 113 c.mu.Unlock() 114 } 115 return "", err 116 } 117 state := nr.State() 118 if state.swarmNode == nil { // should never happen but protect from panic 119 return "", errors.New("invalid cluster state for spec initialization") 120 } 121 if err := initClusterSpec(state.swarmNode, req.Spec); err != nil { 122 return "", err 123 } 124 return state.NodeID(), nil 125 } 126 127 // Join makes current Cluster part of an existing swarm cluster. 128 func (c *Cluster) Join(req types.JoinRequest) error { 129 c.controlMutex.Lock() 130 defer c.controlMutex.Unlock() 131 c.mu.Lock() 132 if c.nr != nil { 133 c.mu.Unlock() 134 return errSwarmExists 135 } 136 c.mu.Unlock() 137 138 if err := validateAndSanitizeJoinRequest(&req); err != nil { 139 return apierrors.NewBadRequestError(err) 140 } 141 142 listenHost, listenPort, err := resolveListenAddr(req.ListenAddr) 143 if err != nil { 144 return err 145 } 146 147 var advertiseAddr string 148 if req.AdvertiseAddr != "" { 149 advertiseHost, advertisePort, err := c.resolveAdvertiseAddr(req.AdvertiseAddr, listenPort) 150 // For joining, we don't need to provide an advertise address, 151 // since the remote side can detect it. 152 if err == nil { 153 advertiseAddr = net.JoinHostPort(advertiseHost, advertisePort) 154 } 155 } 156 157 clearPersistentState(c.root) 158 159 nr, err := c.newNodeRunner(nodeStartConfig{ 160 RemoteAddr: req.RemoteAddrs[0], 161 ListenAddr: net.JoinHostPort(listenHost, listenPort), 162 AdvertiseAddr: advertiseAddr, 163 joinAddr: req.RemoteAddrs[0], 164 joinToken: req.JoinToken, 165 availability: req.Availability, 166 }) 167 if err != nil { 168 return err 169 } 170 171 c.mu.Lock() 172 c.nr = nr 173 c.mu.Unlock() 174 175 select { 176 case <-time.After(swarmConnectTimeout): 177 return errSwarmJoinTimeoutReached 178 case err := <-nr.Ready(): 179 if err != nil { 180 c.mu.Lock() 181 c.nr = nil 182 c.mu.Unlock() 183 } 184 return err 185 } 186 } 187 188 // Inspect retrieves the configuration properties of a managed swarm cluster. 189 func (c *Cluster) Inspect() (types.Swarm, error) { 190 c.mu.RLock() 191 defer c.mu.RUnlock() 192 193 state := c.currentNodeState() 194 if !state.IsActiveManager() { 195 return types.Swarm{}, c.errNoManager(state) 196 } 197 198 ctx, cancel := c.getRequestContext() 199 defer cancel() 200 201 swarm, err := getSwarm(ctx, state.controlClient) 202 if err != nil { 203 return types.Swarm{}, err 204 } 205 206 return convert.SwarmFromGRPC(*swarm), nil 207 } 208 209 // Update updates configuration of a managed swarm cluster. 210 func (c *Cluster) Update(version uint64, spec types.Spec, flags types.UpdateFlags) error { 211 c.mu.RLock() 212 defer c.mu.RUnlock() 213 214 state := c.currentNodeState() 215 if !state.IsActiveManager() { 216 return c.errNoManager(state) 217 } 218 219 ctx, cancel := c.getRequestContext() 220 defer cancel() 221 222 swarm, err := getSwarm(ctx, state.controlClient) 223 if err != nil { 224 return err 225 } 226 227 // In update, client should provide the complete spec of the swarm, including 228 // Name and Labels. If a field is specified with 0 or nil, then the default value 229 // will be used to swarmkit. 230 clusterSpec, err := convert.SwarmSpecToGRPC(spec) 231 if err != nil { 232 return apierrors.NewBadRequestError(err) 233 } 234 235 _, err = state.controlClient.UpdateCluster( 236 ctx, 237 &swarmapi.UpdateClusterRequest{ 238 ClusterID: swarm.ID, 239 Spec: &clusterSpec, 240 ClusterVersion: &swarmapi.Version{ 241 Index: version, 242 }, 243 Rotation: swarmapi.KeyRotation{ 244 WorkerJoinToken: flags.RotateWorkerToken, 245 ManagerJoinToken: flags.RotateManagerToken, 246 ManagerUnlockKey: flags.RotateManagerUnlockKey, 247 }, 248 }, 249 ) 250 return err 251 } 252 253 // GetUnlockKey returns the unlock key for the swarm. 254 func (c *Cluster) GetUnlockKey() (string, error) { 255 c.mu.RLock() 256 defer c.mu.RUnlock() 257 258 state := c.currentNodeState() 259 if !state.IsActiveManager() { 260 return "", c.errNoManager(state) 261 } 262 263 ctx, cancel := c.getRequestContext() 264 defer cancel() 265 266 client := swarmapi.NewCAClient(state.grpcConn) 267 268 r, err := client.GetUnlockKey(ctx, &swarmapi.GetUnlockKeyRequest{}) 269 if err != nil { 270 return "", err 271 } 272 273 if len(r.UnlockKey) == 0 { 274 // no key 275 return "", nil 276 } 277 278 return encryption.HumanReadableKey(r.UnlockKey), nil 279 } 280 281 // UnlockSwarm provides a key to decrypt data that is encrypted at rest. 282 func (c *Cluster) UnlockSwarm(req types.UnlockRequest) error { 283 c.controlMutex.Lock() 284 defer c.controlMutex.Unlock() 285 286 c.mu.RLock() 287 state := c.currentNodeState() 288 289 if !state.IsActiveManager() { 290 // when manager is not active, 291 // unless it is locked, otherwise return error. 292 if err := c.errNoManager(state); err != errSwarmLocked { 293 c.mu.RUnlock() 294 return err 295 } 296 } else { 297 // when manager is active, return an error of "not locked" 298 c.mu.RUnlock() 299 return errors.New("swarm is not locked") 300 } 301 302 // only when swarm is locked, code running reaches here 303 nr := c.nr 304 c.mu.RUnlock() 305 306 key, err := encryption.ParseHumanReadableKey(req.UnlockKey) 307 if err != nil { 308 return err 309 } 310 311 config := nr.config 312 config.lockKey = key 313 if err := nr.Stop(); err != nil { 314 return err 315 } 316 nr, err = c.newNodeRunner(config) 317 if err != nil { 318 return err 319 } 320 321 c.mu.Lock() 322 c.nr = nr 323 c.mu.Unlock() 324 325 if err := <-nr.Ready(); err != nil { 326 if errors.Cause(err) == errSwarmLocked { 327 return errors.New("swarm could not be unlocked: invalid key provided") 328 } 329 return fmt.Errorf("swarm component could not be started: %v", err) 330 } 331 return nil 332 } 333 334 // Leave shuts down Cluster and removes current state. 335 func (c *Cluster) Leave(force bool) error { 336 c.controlMutex.Lock() 337 defer c.controlMutex.Unlock() 338 339 c.mu.Lock() 340 nr := c.nr 341 if nr == nil { 342 c.mu.Unlock() 343 return errNoSwarm 344 } 345 346 state := c.currentNodeState() 347 348 if errors.Cause(state.err) == errSwarmLocked && !force { 349 // leave a locked swarm without --force is not allowed 350 c.mu.Unlock() 351 return errors.New("Swarm is encrypted and locked. Please unlock it first or use `--force` to ignore this message.") 352 } 353 354 if state.IsManager() && !force { 355 msg := "You are attempting to leave the swarm on a node that is participating as a manager. " 356 if state.IsActiveManager() { 357 active, reachable, unreachable, err := managerStats(state.controlClient, state.NodeID()) 358 if err == nil { 359 if active && removingManagerCausesLossOfQuorum(reachable, unreachable) { 360 if isLastManager(reachable, unreachable) { 361 msg += "Removing the last manager erases all current state of the swarm. Use `--force` to ignore this message. " 362 c.mu.Unlock() 363 return errors.New(msg) 364 } 365 msg += fmt.Sprintf("Removing this node leaves %v managers out of %v. Without a Raft quorum your swarm will be inaccessible. ", reachable-1, reachable+unreachable) 366 } 367 } 368 } else { 369 msg += "Doing so may lose the consensus of your cluster. " 370 } 371 372 msg += "The only way to restore a swarm that has lost consensus is to reinitialize it with `--force-new-cluster`. Use `--force` to suppress this message." 373 c.mu.Unlock() 374 return errors.New(msg) 375 } 376 // release readers in here 377 if err := nr.Stop(); err != nil { 378 logrus.Errorf("failed to shut down cluster node: %v", err) 379 signal.DumpStacks("") 380 c.mu.Unlock() 381 return err 382 } 383 c.nr = nil 384 c.mu.Unlock() 385 if nodeID := state.NodeID(); nodeID != "" { 386 nodeContainers, err := c.listContainerForNode(nodeID) 387 if err != nil { 388 return err 389 } 390 for _, id := range nodeContainers { 391 if err := c.config.Backend.ContainerRm(id, &apitypes.ContainerRmConfig{ForceRemove: true}); err != nil { 392 logrus.Errorf("error removing %v: %v", id, err) 393 } 394 } 395 } 396 397 c.configEvent <- struct{}{} 398 // todo: cleanup optional? 399 if err := clearPersistentState(c.root); err != nil { 400 return err 401 } 402 c.config.Backend.DaemonLeavesCluster() 403 return nil 404 } 405 406 // Info returns information about the current cluster state. 407 func (c *Cluster) Info() types.Info { 408 info := types.Info{ 409 NodeAddr: c.GetAdvertiseAddress(), 410 } 411 c.mu.RLock() 412 defer c.mu.RUnlock() 413 414 state := c.currentNodeState() 415 info.LocalNodeState = state.status 416 if state.err != nil { 417 info.Error = state.err.Error() 418 } 419 420 ctx, cancel := c.getRequestContext() 421 defer cancel() 422 423 if state.IsActiveManager() { 424 info.ControlAvailable = true 425 swarm, err := c.Inspect() 426 if err != nil { 427 info.Error = err.Error() 428 } 429 430 // Strip JoinTokens 431 info.Cluster = swarm.ClusterInfo 432 433 if r, err := state.controlClient.ListNodes(ctx, &swarmapi.ListNodesRequest{}); err != nil { 434 info.Error = err.Error() 435 } else { 436 info.Nodes = len(r.Nodes) 437 for _, n := range r.Nodes { 438 if n.ManagerStatus != nil { 439 info.Managers = info.Managers + 1 440 } 441 } 442 } 443 } 444 445 if state.swarmNode != nil { 446 for _, r := range state.swarmNode.Remotes() { 447 info.RemoteManagers = append(info.RemoteManagers, types.Peer{NodeID: r.NodeID, Addr: r.Addr}) 448 } 449 info.NodeID = state.swarmNode.NodeID() 450 } 451 452 return info 453 } 454 455 func validateAndSanitizeInitRequest(req *types.InitRequest) error { 456 var err error 457 req.ListenAddr, err = validateAddr(req.ListenAddr) 458 if err != nil { 459 return fmt.Errorf("invalid ListenAddr %q: %v", req.ListenAddr, err) 460 } 461 462 if req.Spec.Annotations.Name == "" { 463 req.Spec.Annotations.Name = "default" 464 } else if req.Spec.Annotations.Name != "default" { 465 return errors.New(`swarm spec must be named "default"`) 466 } 467 468 return nil 469 } 470 471 func validateAndSanitizeJoinRequest(req *types.JoinRequest) error { 472 var err error 473 req.ListenAddr, err = validateAddr(req.ListenAddr) 474 if err != nil { 475 return fmt.Errorf("invalid ListenAddr %q: %v", req.ListenAddr, err) 476 } 477 if len(req.RemoteAddrs) == 0 { 478 return errors.New("at least 1 RemoteAddr is required to join") 479 } 480 for i := range req.RemoteAddrs { 481 req.RemoteAddrs[i], err = validateAddr(req.RemoteAddrs[i]) 482 if err != nil { 483 return fmt.Errorf("invalid remoteAddr %q: %v", req.RemoteAddrs[i], err) 484 } 485 } 486 return nil 487 } 488 489 func validateAddr(addr string) (string, error) { 490 if addr == "" { 491 return addr, errors.New("invalid empty address") 492 } 493 newaddr, err := opts.ParseTCPAddr(addr, defaultAddr) 494 if err != nil { 495 return addr, nil 496 } 497 return strings.TrimPrefix(newaddr, "tcp://"), nil 498 } 499 500 func initClusterSpec(node *swarmnode.Node, spec types.Spec) error { 501 ctx, _ := context.WithTimeout(context.Background(), 5*time.Second) 502 for conn := range node.ListenControlSocket(ctx) { 503 if ctx.Err() != nil { 504 return ctx.Err() 505 } 506 if conn != nil { 507 client := swarmapi.NewControlClient(conn) 508 var cluster *swarmapi.Cluster 509 for i := 0; ; i++ { 510 lcr, err := client.ListClusters(ctx, &swarmapi.ListClustersRequest{}) 511 if err != nil { 512 return fmt.Errorf("error on listing clusters: %v", err) 513 } 514 if len(lcr.Clusters) == 0 { 515 if i < 10 { 516 time.Sleep(200 * time.Millisecond) 517 continue 518 } 519 return errors.New("empty list of clusters was returned") 520 } 521 cluster = lcr.Clusters[0] 522 break 523 } 524 // In init, we take the initial default values from swarmkit, and merge 525 // any non nil or 0 value from spec to GRPC spec. This will leave the 526 // default value alone. 527 // Note that this is different from Update(), as in Update() we expect 528 // user to specify the complete spec of the cluster (as they already know 529 // the existing one and knows which field to update) 530 clusterSpec, err := convert.MergeSwarmSpecToGRPC(spec, cluster.Spec) 531 if err != nil { 532 return fmt.Errorf("error updating cluster settings: %v", err) 533 } 534 _, err = client.UpdateCluster(ctx, &swarmapi.UpdateClusterRequest{ 535 ClusterID: cluster.ID, 536 ClusterVersion: &cluster.Meta.Version, 537 Spec: &clusterSpec, 538 }) 539 if err != nil { 540 return fmt.Errorf("error updating cluster settings: %v", err) 541 } 542 return nil 543 } 544 } 545 return ctx.Err() 546 } 547 548 func (c *Cluster) listContainerForNode(nodeID string) ([]string, error) { 549 var ids []string 550 filters := filters.NewArgs() 551 filters.Add("label", fmt.Sprintf("com.docker.swarm.node.id=%s", nodeID)) 552 containers, err := c.config.Backend.Containers(&apitypes.ContainerListOptions{ 553 Filters: filters, 554 }) 555 if err != nil { 556 return []string{}, err 557 } 558 for _, c := range containers { 559 ids = append(ids, c.ID) 560 } 561 return ids, nil 562 }