github.com/kim0/docker@v0.6.2-0.20161130212042-4addda3f07e7/daemon/cluster/cluster.go (about) 1 package cluster 2 3 import ( 4 "encoding/json" 5 "fmt" 6 "io/ioutil" 7 "net" 8 "os" 9 "path/filepath" 10 "strings" 11 "sync" 12 "time" 13 14 "google.golang.org/grpc" 15 16 "github.com/Sirupsen/logrus" 17 "github.com/docker/docker/api/errors" 18 apitypes "github.com/docker/docker/api/types" 19 "github.com/docker/docker/api/types/filters" 20 "github.com/docker/docker/api/types/network" 21 types "github.com/docker/docker/api/types/swarm" 22 "github.com/docker/docker/daemon/cluster/convert" 23 executorpkg "github.com/docker/docker/daemon/cluster/executor" 24 "github.com/docker/docker/daemon/cluster/executor/container" 25 "github.com/docker/docker/opts" 26 "github.com/docker/docker/pkg/ioutils" 27 "github.com/docker/docker/pkg/signal" 28 "github.com/docker/docker/runconfig" 29 swarmapi "github.com/docker/swarmkit/api" 30 swarmnode "github.com/docker/swarmkit/node" 31 "golang.org/x/net/context" 32 ) 33 34 const swarmDirName = "swarm" 35 const controlSocket = "control.sock" 36 const swarmConnectTimeout = 20 * time.Second 37 const swarmRequestTimeout = 20 * time.Second 38 const stateFile = "docker-state.json" 39 const defaultAddr = "0.0.0.0:2377" 40 41 const ( 42 initialReconnectDelay = 100 * time.Millisecond 43 maxReconnectDelay = 30 * time.Second 44 ) 45 46 // ErrNoSwarm is returned on leaving a cluster that was never initialized 47 var ErrNoSwarm = fmt.Errorf("This node is not part of a swarm") 48 49 // ErrSwarmExists is returned on initialize or join request for a cluster that has already been activated 50 var ErrSwarmExists = fmt.Errorf("This node is already part of a swarm. Use \"docker swarm leave\" to leave this swarm and join another one.") 51 52 // ErrPendingSwarmExists is returned on initialize or join request for a cluster that is already processing a similar request but has not succeeded yet. 53 var ErrPendingSwarmExists = fmt.Errorf("This node is processing an existing join request that has not succeeded yet. Use \"docker swarm leave\" to cancel the current request.") 54 55 // ErrSwarmJoinTimeoutReached is returned when cluster join could not complete before timeout was reached. 56 var ErrSwarmJoinTimeoutReached = fmt.Errorf("Timeout was reached before node was joined. The attempt to join the swarm will continue in the background. Use the \"docker info\" command to see the current swarm status of your node.") 57 58 // NetworkSubnetsProvider exposes functions for retrieving the subnets 59 // of networks managed by Docker, so they can be filtered. 60 type NetworkSubnetsProvider interface { 61 V4Subnets() []net.IPNet 62 V6Subnets() []net.IPNet 63 } 64 65 // Config provides values for Cluster. 66 type Config struct { 67 Root string 68 Name string 69 Backend executorpkg.Backend 70 NetworkSubnetsProvider NetworkSubnetsProvider 71 72 // DefaultAdvertiseAddr is the default host/IP or network interface to use 73 // if no AdvertiseAddr value is specified. 74 DefaultAdvertiseAddr string 75 76 // path to store runtime state, such as the swarm control socket 77 RuntimeRoot string 78 } 79 80 // Cluster provides capabilities to participate in a cluster as a worker or a 81 // manager. 82 type Cluster struct { 83 sync.RWMutex 84 *node 85 root string 86 runtimeRoot string 87 config Config 88 configEvent chan struct{} // todo: make this array and goroutine safe 89 actualLocalAddr string // after resolution, not persisted 90 stop bool 91 err error 92 cancelDelay func() 93 attachers map[string]*attacher 94 } 95 96 // attacher manages the in-memory attachment state of a container 97 // attachment to a global scope network managed by swarm manager. It 98 // helps in identifying the attachment ID via the taskID and the 99 // corresponding attachment configuration obtained from the manager. 100 type attacher struct { 101 taskID string 102 config *network.NetworkingConfig 103 attachWaitCh chan *network.NetworkingConfig 104 attachCompleteCh chan struct{} 105 detachWaitCh chan struct{} 106 } 107 108 type node struct { 109 *swarmnode.Node 110 done chan struct{} 111 ready bool 112 conn *grpc.ClientConn 113 client swarmapi.ControlClient 114 reconnectDelay time.Duration 115 config nodeStartConfig 116 } 117 118 // nodeStartConfig holds configuration needed to start a new node. Exported 119 // fields of this structure are saved to disk in json. Unexported fields 120 // contain data that shouldn't be persisted between daemon reloads. 121 type nodeStartConfig struct { 122 // LocalAddr is this machine's local IP or hostname, if specified. 123 LocalAddr string 124 // RemoteAddr is the address that was given to "swarm join". It is used 125 // to find LocalAddr if necessary. 126 RemoteAddr string 127 // ListenAddr is the address we bind to, including a port. 128 ListenAddr string 129 // AdvertiseAddr is the address other nodes should connect to, 130 // including a port. 131 AdvertiseAddr string 132 joinAddr string 133 forceNewCluster bool 134 joinToken string 135 } 136 137 // New creates a new Cluster instance using provided config. 138 func New(config Config) (*Cluster, error) { 139 root := filepath.Join(config.Root, swarmDirName) 140 if err := os.MkdirAll(root, 0700); err != nil { 141 return nil, err 142 } 143 if config.RuntimeRoot == "" { 144 config.RuntimeRoot = root 145 } 146 if err := os.MkdirAll(config.RuntimeRoot, 0700); err != nil { 147 return nil, err 148 } 149 c := &Cluster{ 150 root: root, 151 config: config, 152 configEvent: make(chan struct{}, 10), 153 runtimeRoot: config.RuntimeRoot, 154 attachers: make(map[string]*attacher), 155 } 156 157 nodeConfig, err := c.loadState() 158 if err != nil { 159 if os.IsNotExist(err) { 160 return c, nil 161 } 162 return nil, err 163 } 164 165 n, err := c.startNewNode(*nodeConfig) 166 if err != nil { 167 return nil, err 168 } 169 170 select { 171 case <-time.After(swarmConnectTimeout): 172 logrus.Errorf("swarm component could not be started before timeout was reached") 173 case <-n.Ready(): 174 case <-n.done: 175 return nil, fmt.Errorf("swarm component could not be started: %v", c.err) 176 } 177 go c.reconnectOnFailure(n) 178 return c, nil 179 } 180 181 func (c *Cluster) loadState() (*nodeStartConfig, error) { 182 dt, err := ioutil.ReadFile(filepath.Join(c.root, stateFile)) 183 if err != nil { 184 return nil, err 185 } 186 // missing certificate means no actual state to restore from 187 if _, err := os.Stat(filepath.Join(c.root, "certificates/swarm-node.crt")); err != nil { 188 if os.IsNotExist(err) { 189 c.clearState() 190 } 191 return nil, err 192 } 193 var st nodeStartConfig 194 if err := json.Unmarshal(dt, &st); err != nil { 195 return nil, err 196 } 197 return &st, nil 198 } 199 200 func (c *Cluster) saveState(config nodeStartConfig) error { 201 dt, err := json.Marshal(config) 202 if err != nil { 203 return err 204 } 205 return ioutils.AtomicWriteFile(filepath.Join(c.root, stateFile), dt, 0600) 206 } 207 208 func (c *Cluster) reconnectOnFailure(n *node) { 209 for { 210 <-n.done 211 c.Lock() 212 if c.stop || c.node != nil { 213 c.Unlock() 214 return 215 } 216 n.reconnectDelay *= 2 217 if n.reconnectDelay > maxReconnectDelay { 218 n.reconnectDelay = maxReconnectDelay 219 } 220 logrus.Warnf("Restarting swarm in %.2f seconds", n.reconnectDelay.Seconds()) 221 delayCtx, cancel := context.WithTimeout(context.Background(), n.reconnectDelay) 222 c.cancelDelay = cancel 223 c.Unlock() 224 <-delayCtx.Done() 225 if delayCtx.Err() != context.DeadlineExceeded { 226 return 227 } 228 c.Lock() 229 if c.node != nil { 230 c.Unlock() 231 return 232 } 233 var err error 234 config := n.config 235 config.RemoteAddr = c.getRemoteAddress() 236 config.joinAddr = config.RemoteAddr 237 n, err = c.startNewNode(config) 238 if err != nil { 239 c.err = err 240 close(n.done) 241 } 242 c.Unlock() 243 } 244 } 245 246 func (c *Cluster) startNewNode(conf nodeStartConfig) (*node, error) { 247 if err := c.config.Backend.IsSwarmCompatible(); err != nil { 248 return nil, err 249 } 250 251 actualLocalAddr := conf.LocalAddr 252 if actualLocalAddr == "" { 253 // If localAddr was not specified, resolve it automatically 254 // based on the route to joinAddr. localAddr can only be left 255 // empty on "join". 256 listenHost, _, err := net.SplitHostPort(conf.ListenAddr) 257 if err != nil { 258 return nil, fmt.Errorf("could not parse listen address: %v", err) 259 } 260 261 listenAddrIP := net.ParseIP(listenHost) 262 if listenAddrIP == nil || !listenAddrIP.IsUnspecified() { 263 actualLocalAddr = listenHost 264 } else { 265 if conf.RemoteAddr == "" { 266 // Should never happen except using swarms created by 267 // old versions that didn't save remoteAddr. 268 conf.RemoteAddr = "8.8.8.8:53" 269 } 270 conn, err := net.Dial("udp", conf.RemoteAddr) 271 if err != nil { 272 return nil, fmt.Errorf("could not find local IP address: %v", err) 273 } 274 localHostPort := conn.LocalAddr().String() 275 actualLocalAddr, _, _ = net.SplitHostPort(localHostPort) 276 conn.Close() 277 } 278 } 279 280 c.node = nil 281 c.cancelDelay = nil 282 c.stop = false 283 n, err := swarmnode.New(&swarmnode.Config{ 284 Hostname: c.config.Name, 285 ForceNewCluster: conf.forceNewCluster, 286 ListenControlAPI: filepath.Join(c.runtimeRoot, controlSocket), 287 ListenRemoteAPI: conf.ListenAddr, 288 AdvertiseRemoteAPI: conf.AdvertiseAddr, 289 JoinAddr: conf.joinAddr, 290 StateDir: c.root, 291 JoinToken: conf.joinToken, 292 Executor: container.NewExecutor(c.config.Backend), 293 HeartbeatTick: 1, 294 ElectionTick: 3, 295 }) 296 if err != nil { 297 return nil, err 298 } 299 ctx := context.Background() 300 if err := n.Start(ctx); err != nil { 301 return nil, err 302 } 303 node := &node{ 304 Node: n, 305 done: make(chan struct{}), 306 reconnectDelay: initialReconnectDelay, 307 config: conf, 308 } 309 c.node = node 310 c.actualLocalAddr = actualLocalAddr // not saved 311 c.saveState(conf) 312 313 c.config.Backend.SetClusterProvider(c) 314 go func() { 315 err := n.Err(ctx) 316 if err != nil { 317 logrus.Errorf("cluster exited with error: %v", err) 318 } 319 c.Lock() 320 c.node = nil 321 c.err = err 322 c.Unlock() 323 close(node.done) 324 }() 325 326 go func() { 327 select { 328 case <-n.Ready(): 329 c.Lock() 330 node.ready = true 331 c.err = nil 332 c.Unlock() 333 case <-ctx.Done(): 334 } 335 c.configEvent <- struct{}{} 336 }() 337 338 go func() { 339 for conn := range n.ListenControlSocket(ctx) { 340 c.Lock() 341 if node.conn != conn { 342 if conn == nil { 343 node.client = nil 344 } else { 345 node.client = swarmapi.NewControlClient(conn) 346 } 347 } 348 node.conn = conn 349 c.Unlock() 350 c.configEvent <- struct{}{} 351 } 352 }() 353 354 return node, nil 355 } 356 357 // Init initializes new cluster from user provided request. 358 func (c *Cluster) Init(req types.InitRequest) (string, error) { 359 c.Lock() 360 if node := c.node; node != nil { 361 if !req.ForceNewCluster { 362 c.Unlock() 363 return "", ErrSwarmExists 364 } 365 if err := c.stopNode(); err != nil { 366 c.Unlock() 367 return "", err 368 } 369 } 370 371 if err := validateAndSanitizeInitRequest(&req); err != nil { 372 c.Unlock() 373 return "", err 374 } 375 376 listenHost, listenPort, err := resolveListenAddr(req.ListenAddr) 377 if err != nil { 378 c.Unlock() 379 return "", err 380 } 381 382 advertiseHost, advertisePort, err := c.resolveAdvertiseAddr(req.AdvertiseAddr, listenPort) 383 if err != nil { 384 c.Unlock() 385 return "", err 386 } 387 388 localAddr := listenHost 389 390 // If the advertise address is not one of the system's 391 // addresses, we also require a listen address. 392 listenAddrIP := net.ParseIP(listenHost) 393 if listenAddrIP != nil && listenAddrIP.IsUnspecified() { 394 advertiseIP := net.ParseIP(advertiseHost) 395 if advertiseIP == nil { 396 // not an IP 397 c.Unlock() 398 return "", errMustSpecifyListenAddr 399 } 400 401 systemIPs := listSystemIPs() 402 403 found := false 404 for _, systemIP := range systemIPs { 405 if systemIP.Equal(advertiseIP) { 406 found = true 407 break 408 } 409 } 410 if !found { 411 c.Unlock() 412 return "", errMustSpecifyListenAddr 413 } 414 localAddr = advertiseIP.String() 415 } 416 417 // todo: check current state existing 418 n, err := c.startNewNode(nodeStartConfig{ 419 forceNewCluster: req.ForceNewCluster, 420 LocalAddr: localAddr, 421 ListenAddr: net.JoinHostPort(listenHost, listenPort), 422 AdvertiseAddr: net.JoinHostPort(advertiseHost, advertisePort), 423 }) 424 if err != nil { 425 c.Unlock() 426 return "", err 427 } 428 c.Unlock() 429 430 select { 431 case <-n.Ready(): 432 if err := initClusterSpec(n, req.Spec); err != nil { 433 return "", err 434 } 435 go c.reconnectOnFailure(n) 436 return n.NodeID(), nil 437 case <-n.done: 438 c.RLock() 439 defer c.RUnlock() 440 if !req.ForceNewCluster { // if failure on first attempt don't keep state 441 if err := c.clearState(); err != nil { 442 return "", err 443 } 444 } 445 return "", c.err 446 } 447 } 448 449 // Join makes current Cluster part of an existing swarm cluster. 450 func (c *Cluster) Join(req types.JoinRequest) error { 451 c.Lock() 452 if node := c.node; node != nil { 453 c.Unlock() 454 return ErrSwarmExists 455 } 456 if err := validateAndSanitizeJoinRequest(&req); err != nil { 457 c.Unlock() 458 return err 459 } 460 461 listenHost, listenPort, err := resolveListenAddr(req.ListenAddr) 462 if err != nil { 463 c.Unlock() 464 return err 465 } 466 467 var advertiseAddr string 468 if req.AdvertiseAddr != "" { 469 advertiseHost, advertisePort, err := c.resolveAdvertiseAddr(req.AdvertiseAddr, listenPort) 470 // For joining, we don't need to provide an advertise address, 471 // since the remote side can detect it. 472 if err == nil { 473 advertiseAddr = net.JoinHostPort(advertiseHost, advertisePort) 474 } 475 } 476 477 // todo: check current state existing 478 n, err := c.startNewNode(nodeStartConfig{ 479 RemoteAddr: req.RemoteAddrs[0], 480 ListenAddr: net.JoinHostPort(listenHost, listenPort), 481 AdvertiseAddr: advertiseAddr, 482 joinAddr: req.RemoteAddrs[0], 483 joinToken: req.JoinToken, 484 }) 485 if err != nil { 486 c.Unlock() 487 return err 488 } 489 c.Unlock() 490 491 select { 492 case <-time.After(swarmConnectTimeout): 493 // attempt to connect will continue in background, also reconnecting 494 go c.reconnectOnFailure(n) 495 return ErrSwarmJoinTimeoutReached 496 case <-n.Ready(): 497 go c.reconnectOnFailure(n) 498 return nil 499 case <-n.done: 500 c.RLock() 501 defer c.RUnlock() 502 return c.err 503 } 504 } 505 506 // stopNode is a helper that stops the active c.node and waits until it has 507 // shut down. Call while keeping the cluster lock. 508 func (c *Cluster) stopNode() error { 509 if c.node == nil { 510 return nil 511 } 512 c.stop = true 513 if c.cancelDelay != nil { 514 c.cancelDelay() 515 c.cancelDelay = nil 516 } 517 node := c.node 518 ctx, cancel := context.WithTimeout(context.Background(), 15*time.Second) 519 defer cancel() 520 // TODO: can't hold lock on stop because it calls back to network 521 c.Unlock() 522 defer c.Lock() 523 if err := node.Stop(ctx); err != nil && !strings.Contains(err.Error(), "context canceled") { 524 return err 525 } 526 <-node.done 527 return nil 528 } 529 530 func removingManagerCausesLossOfQuorum(reachable, unreachable int) bool { 531 return reachable-2 <= unreachable 532 } 533 534 func isLastManager(reachable, unreachable int) bool { 535 return reachable == 1 && unreachable == 0 536 } 537 538 // Leave shuts down Cluster and removes current state. 539 func (c *Cluster) Leave(force bool) error { 540 c.Lock() 541 node := c.node 542 if node == nil { 543 c.Unlock() 544 return ErrNoSwarm 545 } 546 547 if node.Manager() != nil && !force { 548 msg := "You are attempting to leave the swarm on a node that is participating as a manager. " 549 if c.isActiveManager() { 550 active, reachable, unreachable, err := c.managerStats() 551 if err == nil { 552 if active && removingManagerCausesLossOfQuorum(reachable, unreachable) { 553 if isLastManager(reachable, unreachable) { 554 msg += "Removing the last manager erases all current state of the swarm. Use `--force` to ignore this message. " 555 c.Unlock() 556 return fmt.Errorf(msg) 557 } 558 msg += fmt.Sprintf("Removing this node leaves %v managers out of %v. Without a Raft quorum your swarm will be inaccessible. ", reachable-1, reachable+unreachable) 559 } 560 } 561 } else { 562 msg += "Doing so may lose the consensus of your cluster. " 563 } 564 565 msg += "The only way to restore a swarm that has lost consensus is to reinitialize it with `--force-new-cluster`. Use `--force` to suppress this message." 566 c.Unlock() 567 return fmt.Errorf(msg) 568 } 569 if err := c.stopNode(); err != nil { 570 logrus.Errorf("failed to shut down cluster node: %v", err) 571 signal.DumpStacks("") 572 c.Unlock() 573 return err 574 } 575 c.Unlock() 576 if nodeID := node.NodeID(); nodeID != "" { 577 nodeContainers, err := c.listContainerForNode(nodeID) 578 if err != nil { 579 return err 580 } 581 for _, id := range nodeContainers { 582 if err := c.config.Backend.ContainerRm(id, &apitypes.ContainerRmConfig{ForceRemove: true}); err != nil { 583 logrus.Errorf("error removing %v: %v", id, err) 584 } 585 } 586 } 587 c.configEvent <- struct{}{} 588 // todo: cleanup optional? 589 if err := c.clearState(); err != nil { 590 return err 591 } 592 return nil 593 } 594 595 func (c *Cluster) listContainerForNode(nodeID string) ([]string, error) { 596 var ids []string 597 filters := filters.NewArgs() 598 filters.Add("label", fmt.Sprintf("com.docker.swarm.node.id=%s", nodeID)) 599 containers, err := c.config.Backend.Containers(&apitypes.ContainerListOptions{ 600 Filter: filters, 601 }) 602 if err != nil { 603 return []string{}, err 604 } 605 for _, c := range containers { 606 ids = append(ids, c.ID) 607 } 608 return ids, nil 609 } 610 611 func (c *Cluster) clearState() error { 612 // todo: backup this data instead of removing? 613 if err := os.RemoveAll(c.root); err != nil { 614 return err 615 } 616 if err := os.MkdirAll(c.root, 0700); err != nil { 617 return err 618 } 619 c.config.Backend.SetClusterProvider(nil) 620 return nil 621 } 622 623 func (c *Cluster) getRequestContext() (context.Context, func()) { // TODO: not needed when requests don't block on qourum lost 624 return context.WithTimeout(context.Background(), swarmRequestTimeout) 625 } 626 627 // Inspect retrieves the configuration properties of a managed swarm cluster. 628 func (c *Cluster) Inspect() (types.Swarm, error) { 629 c.RLock() 630 defer c.RUnlock() 631 632 if !c.isActiveManager() { 633 return types.Swarm{}, c.errNoManager() 634 } 635 636 ctx, cancel := c.getRequestContext() 637 defer cancel() 638 639 swarm, err := getSwarm(ctx, c.client) 640 if err != nil { 641 return types.Swarm{}, err 642 } 643 644 return convert.SwarmFromGRPC(*swarm), nil 645 } 646 647 // Update updates configuration of a managed swarm cluster. 648 func (c *Cluster) Update(version uint64, spec types.Spec, flags types.UpdateFlags) error { 649 c.RLock() 650 defer c.RUnlock() 651 652 if !c.isActiveManager() { 653 return c.errNoManager() 654 } 655 656 ctx, cancel := c.getRequestContext() 657 defer cancel() 658 659 swarm, err := getSwarm(ctx, c.client) 660 if err != nil { 661 return err 662 } 663 664 // In update, client should provide the complete spec of the swarm, including 665 // Name and Labels. If a field is specified with 0 or nil, then the default value 666 // will be used to swarmkit. 667 clusterSpec, err := convert.SwarmSpecToGRPC(spec) 668 if err != nil { 669 return err 670 } 671 672 _, err = c.client.UpdateCluster( 673 ctx, 674 &swarmapi.UpdateClusterRequest{ 675 ClusterID: swarm.ID, 676 Spec: &clusterSpec, 677 ClusterVersion: &swarmapi.Version{ 678 Index: version, 679 }, 680 Rotation: swarmapi.JoinTokenRotation{ 681 RotateWorkerToken: flags.RotateWorkerToken, 682 RotateManagerToken: flags.RotateManagerToken, 683 }, 684 }, 685 ) 686 return err 687 } 688 689 // IsManager returns true if Cluster is participating as a manager. 690 func (c *Cluster) IsManager() bool { 691 c.RLock() 692 defer c.RUnlock() 693 return c.isActiveManager() 694 } 695 696 // IsAgent returns true if Cluster is participating as a worker/agent. 697 func (c *Cluster) IsAgent() bool { 698 c.RLock() 699 defer c.RUnlock() 700 return c.node != nil && c.ready 701 } 702 703 // GetLocalAddress returns the local address. 704 func (c *Cluster) GetLocalAddress() string { 705 c.RLock() 706 defer c.RUnlock() 707 return c.actualLocalAddr 708 } 709 710 // GetListenAddress returns the listen address. 711 func (c *Cluster) GetListenAddress() string { 712 c.RLock() 713 defer c.RUnlock() 714 if c.node != nil { 715 return c.node.config.ListenAddr 716 } 717 return "" 718 } 719 720 // GetAdvertiseAddress returns the remotely reachable address of this node. 721 func (c *Cluster) GetAdvertiseAddress() string { 722 c.RLock() 723 defer c.RUnlock() 724 if c.node != nil && c.node.config.AdvertiseAddr != "" { 725 advertiseHost, _, _ := net.SplitHostPort(c.node.config.AdvertiseAddr) 726 return advertiseHost 727 } 728 return c.actualLocalAddr 729 } 730 731 // GetRemoteAddress returns a known advertise address of a remote manager if 732 // available. 733 // todo: change to array/connect with info 734 func (c *Cluster) GetRemoteAddress() string { 735 c.RLock() 736 defer c.RUnlock() 737 return c.getRemoteAddress() 738 } 739 740 func (c *Cluster) getRemoteAddress() string { 741 if c.node == nil { 742 return "" 743 } 744 nodeID := c.node.NodeID() 745 for _, r := range c.node.Remotes() { 746 if r.NodeID != nodeID { 747 return r.Addr 748 } 749 } 750 return "" 751 } 752 753 // ListenClusterEvents returns a channel that receives messages on cluster 754 // participation changes. 755 // todo: make cancelable and accessible to multiple callers 756 func (c *Cluster) ListenClusterEvents() <-chan struct{} { 757 return c.configEvent 758 } 759 760 // Info returns information about the current cluster state. 761 func (c *Cluster) Info() types.Info { 762 info := types.Info{ 763 NodeAddr: c.GetAdvertiseAddress(), 764 } 765 766 c.RLock() 767 defer c.RUnlock() 768 769 if c.node == nil { 770 info.LocalNodeState = types.LocalNodeStateInactive 771 if c.cancelDelay != nil { 772 info.LocalNodeState = types.LocalNodeStateError 773 } 774 } else { 775 info.LocalNodeState = types.LocalNodeStatePending 776 if c.ready == true { 777 info.LocalNodeState = types.LocalNodeStateActive 778 } 779 } 780 if c.err != nil { 781 info.Error = c.err.Error() 782 } 783 784 ctx, cancel := c.getRequestContext() 785 defer cancel() 786 787 if c.isActiveManager() { 788 info.ControlAvailable = true 789 swarm, err := c.Inspect() 790 if err != nil { 791 info.Error = err.Error() 792 } 793 794 // Strip JoinTokens 795 info.Cluster = swarm.ClusterInfo 796 797 if r, err := c.client.ListNodes(ctx, &swarmapi.ListNodesRequest{}); err == nil { 798 info.Nodes = len(r.Nodes) 799 for _, n := range r.Nodes { 800 if n.ManagerStatus != nil { 801 info.Managers = info.Managers + 1 802 } 803 } 804 } 805 } 806 807 if c.node != nil { 808 for _, r := range c.node.Remotes() { 809 info.RemoteManagers = append(info.RemoteManagers, types.Peer{NodeID: r.NodeID, Addr: r.Addr}) 810 } 811 info.NodeID = c.node.NodeID() 812 } 813 814 return info 815 } 816 817 // isActiveManager should not be called without a read lock 818 func (c *Cluster) isActiveManager() bool { 819 return c.node != nil && c.conn != nil 820 } 821 822 // errNoManager returns error describing why manager commands can't be used. 823 // Call with read lock. 824 func (c *Cluster) errNoManager() error { 825 if c.node == nil { 826 return fmt.Errorf("This node is not a swarm manager. Use \"docker swarm init\" or \"docker swarm join\" to connect this node to swarm and try again.") 827 } 828 if c.node.Manager() != nil { 829 return fmt.Errorf("This node is not a swarm manager. Manager is being prepared or has trouble connecting to the cluster.") 830 } 831 return fmt.Errorf("This node is not a swarm manager. Worker nodes can't be used to view or modify cluster state. Please run this command on a manager node or promote the current node to a manager.") 832 } 833 834 // GetServices returns all services of a managed swarm cluster. 835 func (c *Cluster) GetServices(options apitypes.ServiceListOptions) ([]types.Service, error) { 836 c.RLock() 837 defer c.RUnlock() 838 839 if !c.isActiveManager() { 840 return nil, c.errNoManager() 841 } 842 843 filters, err := newListServicesFilters(options.Filter) 844 if err != nil { 845 return nil, err 846 } 847 ctx, cancel := c.getRequestContext() 848 defer cancel() 849 850 r, err := c.client.ListServices( 851 ctx, 852 &swarmapi.ListServicesRequest{Filters: filters}) 853 if err != nil { 854 return nil, err 855 } 856 857 services := []types.Service{} 858 859 for _, service := range r.Services { 860 services = append(services, convert.ServiceFromGRPC(*service)) 861 } 862 863 return services, nil 864 } 865 866 // CreateService creates a new service in a managed swarm cluster. 867 func (c *Cluster) CreateService(s types.ServiceSpec, encodedAuth string) (string, error) { 868 c.RLock() 869 defer c.RUnlock() 870 871 if !c.isActiveManager() { 872 return "", c.errNoManager() 873 } 874 875 ctx, cancel := c.getRequestContext() 876 defer cancel() 877 878 err := c.populateNetworkID(ctx, c.client, &s) 879 if err != nil { 880 return "", err 881 } 882 883 serviceSpec, err := convert.ServiceSpecToGRPC(s) 884 if err != nil { 885 return "", err 886 } 887 888 if encodedAuth != "" { 889 ctnr := serviceSpec.Task.GetContainer() 890 if ctnr == nil { 891 return "", fmt.Errorf("service does not use container tasks") 892 } 893 ctnr.PullOptions = &swarmapi.ContainerSpec_PullOptions{RegistryAuth: encodedAuth} 894 } 895 896 r, err := c.client.CreateService(ctx, &swarmapi.CreateServiceRequest{Spec: &serviceSpec}) 897 if err != nil { 898 return "", err 899 } 900 901 return r.Service.ID, nil 902 } 903 904 // GetService returns a service based on an ID or name. 905 func (c *Cluster) GetService(input string) (types.Service, error) { 906 c.RLock() 907 defer c.RUnlock() 908 909 if !c.isActiveManager() { 910 return types.Service{}, c.errNoManager() 911 } 912 913 ctx, cancel := c.getRequestContext() 914 defer cancel() 915 916 service, err := getService(ctx, c.client, input) 917 if err != nil { 918 return types.Service{}, err 919 } 920 return convert.ServiceFromGRPC(*service), nil 921 } 922 923 // UpdateService updates existing service to match new properties. 924 func (c *Cluster) UpdateService(serviceIDOrName string, version uint64, spec types.ServiceSpec, encodedAuth string, registryAuthFrom string) error { 925 c.RLock() 926 defer c.RUnlock() 927 928 if !c.isActiveManager() { 929 return c.errNoManager() 930 } 931 932 ctx, cancel := c.getRequestContext() 933 defer cancel() 934 935 err := c.populateNetworkID(ctx, c.client, &spec) 936 if err != nil { 937 return err 938 } 939 940 serviceSpec, err := convert.ServiceSpecToGRPC(spec) 941 if err != nil { 942 return err 943 } 944 945 currentService, err := getService(ctx, c.client, serviceIDOrName) 946 if err != nil { 947 return err 948 } 949 950 if encodedAuth != "" { 951 ctnr := serviceSpec.Task.GetContainer() 952 if ctnr == nil { 953 return fmt.Errorf("service does not use container tasks") 954 } 955 ctnr.PullOptions = &swarmapi.ContainerSpec_PullOptions{RegistryAuth: encodedAuth} 956 } else { 957 // this is needed because if the encodedAuth isn't being updated then we 958 // shouldn't lose it, and continue to use the one that was already present 959 var ctnr *swarmapi.ContainerSpec 960 switch registryAuthFrom { 961 case apitypes.RegistryAuthFromSpec, "": 962 ctnr = currentService.Spec.Task.GetContainer() 963 case apitypes.RegistryAuthFromPreviousSpec: 964 if currentService.PreviousSpec == nil { 965 return fmt.Errorf("service does not have a previous spec") 966 } 967 ctnr = currentService.PreviousSpec.Task.GetContainer() 968 default: 969 return fmt.Errorf("unsupported registryAuthFromValue") 970 } 971 if ctnr == nil { 972 return fmt.Errorf("service does not use container tasks") 973 } 974 serviceSpec.Task.GetContainer().PullOptions = ctnr.PullOptions 975 } 976 977 _, err = c.client.UpdateService( 978 ctx, 979 &swarmapi.UpdateServiceRequest{ 980 ServiceID: currentService.ID, 981 Spec: &serviceSpec, 982 ServiceVersion: &swarmapi.Version{ 983 Index: version, 984 }, 985 }, 986 ) 987 return err 988 } 989 990 // RemoveService removes a service from a managed swarm cluster. 991 func (c *Cluster) RemoveService(input string) error { 992 c.RLock() 993 defer c.RUnlock() 994 995 if !c.isActiveManager() { 996 return c.errNoManager() 997 } 998 999 ctx, cancel := c.getRequestContext() 1000 defer cancel() 1001 1002 service, err := getService(ctx, c.client, input) 1003 if err != nil { 1004 return err 1005 } 1006 1007 if _, err := c.client.RemoveService(ctx, &swarmapi.RemoveServiceRequest{ServiceID: service.ID}); err != nil { 1008 return err 1009 } 1010 return nil 1011 } 1012 1013 // GetNodes returns a list of all nodes known to a cluster. 1014 func (c *Cluster) GetNodes(options apitypes.NodeListOptions) ([]types.Node, error) { 1015 c.RLock() 1016 defer c.RUnlock() 1017 1018 if !c.isActiveManager() { 1019 return nil, c.errNoManager() 1020 } 1021 1022 filters, err := newListNodesFilters(options.Filter) 1023 if err != nil { 1024 return nil, err 1025 } 1026 1027 ctx, cancel := c.getRequestContext() 1028 defer cancel() 1029 1030 r, err := c.client.ListNodes( 1031 ctx, 1032 &swarmapi.ListNodesRequest{Filters: filters}) 1033 if err != nil { 1034 return nil, err 1035 } 1036 1037 nodes := []types.Node{} 1038 1039 for _, node := range r.Nodes { 1040 nodes = append(nodes, convert.NodeFromGRPC(*node)) 1041 } 1042 return nodes, nil 1043 } 1044 1045 // GetNode returns a node based on an ID or name. 1046 func (c *Cluster) GetNode(input string) (types.Node, error) { 1047 c.RLock() 1048 defer c.RUnlock() 1049 1050 if !c.isActiveManager() { 1051 return types.Node{}, c.errNoManager() 1052 } 1053 1054 ctx, cancel := c.getRequestContext() 1055 defer cancel() 1056 1057 node, err := getNode(ctx, c.client, input) 1058 if err != nil { 1059 return types.Node{}, err 1060 } 1061 return convert.NodeFromGRPC(*node), nil 1062 } 1063 1064 // UpdateNode updates existing nodes properties. 1065 func (c *Cluster) UpdateNode(nodeID string, version uint64, spec types.NodeSpec) error { 1066 c.RLock() 1067 defer c.RUnlock() 1068 1069 if !c.isActiveManager() { 1070 return c.errNoManager() 1071 } 1072 1073 nodeSpec, err := convert.NodeSpecToGRPC(spec) 1074 if err != nil { 1075 return err 1076 } 1077 1078 ctx, cancel := c.getRequestContext() 1079 defer cancel() 1080 1081 _, err = c.client.UpdateNode( 1082 ctx, 1083 &swarmapi.UpdateNodeRequest{ 1084 NodeID: nodeID, 1085 Spec: &nodeSpec, 1086 NodeVersion: &swarmapi.Version{ 1087 Index: version, 1088 }, 1089 }, 1090 ) 1091 return err 1092 } 1093 1094 // RemoveNode removes a node from a cluster 1095 func (c *Cluster) RemoveNode(input string, force bool) error { 1096 c.RLock() 1097 defer c.RUnlock() 1098 1099 if !c.isActiveManager() { 1100 return c.errNoManager() 1101 } 1102 1103 ctx, cancel := c.getRequestContext() 1104 defer cancel() 1105 1106 node, err := getNode(ctx, c.client, input) 1107 if err != nil { 1108 return err 1109 } 1110 1111 if _, err := c.client.RemoveNode(ctx, &swarmapi.RemoveNodeRequest{NodeID: node.ID, Force: force}); err != nil { 1112 return err 1113 } 1114 return nil 1115 } 1116 1117 // GetTasks returns a list of tasks matching the filter options. 1118 func (c *Cluster) GetTasks(options apitypes.TaskListOptions) ([]types.Task, error) { 1119 c.RLock() 1120 defer c.RUnlock() 1121 1122 if !c.isActiveManager() { 1123 return nil, c.errNoManager() 1124 } 1125 1126 byName := func(filter filters.Args) error { 1127 if filter.Include("service") { 1128 serviceFilters := filter.Get("service") 1129 for _, serviceFilter := range serviceFilters { 1130 service, err := c.GetService(serviceFilter) 1131 if err != nil { 1132 return err 1133 } 1134 filter.Del("service", serviceFilter) 1135 filter.Add("service", service.ID) 1136 } 1137 } 1138 if filter.Include("node") { 1139 nodeFilters := filter.Get("node") 1140 for _, nodeFilter := range nodeFilters { 1141 node, err := c.GetNode(nodeFilter) 1142 if err != nil { 1143 return err 1144 } 1145 filter.Del("node", nodeFilter) 1146 filter.Add("node", node.ID) 1147 } 1148 } 1149 return nil 1150 } 1151 1152 filters, err := newListTasksFilters(options.Filter, byName) 1153 if err != nil { 1154 return nil, err 1155 } 1156 1157 ctx, cancel := c.getRequestContext() 1158 defer cancel() 1159 1160 r, err := c.client.ListTasks( 1161 ctx, 1162 &swarmapi.ListTasksRequest{Filters: filters}) 1163 if err != nil { 1164 return nil, err 1165 } 1166 1167 tasks := []types.Task{} 1168 1169 for _, task := range r.Tasks { 1170 if task.Spec.GetContainer() != nil { 1171 tasks = append(tasks, convert.TaskFromGRPC(*task)) 1172 } 1173 } 1174 return tasks, nil 1175 } 1176 1177 // GetTask returns a task by an ID. 1178 func (c *Cluster) GetTask(input string) (types.Task, error) { 1179 c.RLock() 1180 defer c.RUnlock() 1181 1182 if !c.isActiveManager() { 1183 return types.Task{}, c.errNoManager() 1184 } 1185 1186 ctx, cancel := c.getRequestContext() 1187 defer cancel() 1188 1189 task, err := getTask(ctx, c.client, input) 1190 if err != nil { 1191 return types.Task{}, err 1192 } 1193 return convert.TaskFromGRPC(*task), nil 1194 } 1195 1196 // GetNetwork returns a cluster network by an ID. 1197 func (c *Cluster) GetNetwork(input string) (apitypes.NetworkResource, error) { 1198 c.RLock() 1199 defer c.RUnlock() 1200 1201 if !c.isActiveManager() { 1202 return apitypes.NetworkResource{}, c.errNoManager() 1203 } 1204 1205 ctx, cancel := c.getRequestContext() 1206 defer cancel() 1207 1208 network, err := getNetwork(ctx, c.client, input) 1209 if err != nil { 1210 return apitypes.NetworkResource{}, err 1211 } 1212 return convert.BasicNetworkFromGRPC(*network), nil 1213 } 1214 1215 // GetNetworks returns all current cluster managed networks. 1216 func (c *Cluster) GetNetworks() ([]apitypes.NetworkResource, error) { 1217 c.RLock() 1218 defer c.RUnlock() 1219 1220 if !c.isActiveManager() { 1221 return nil, c.errNoManager() 1222 } 1223 1224 ctx, cancel := c.getRequestContext() 1225 defer cancel() 1226 1227 r, err := c.client.ListNetworks(ctx, &swarmapi.ListNetworksRequest{}) 1228 if err != nil { 1229 return nil, err 1230 } 1231 1232 var networks []apitypes.NetworkResource 1233 1234 for _, network := range r.Networks { 1235 networks = append(networks, convert.BasicNetworkFromGRPC(*network)) 1236 } 1237 1238 return networks, nil 1239 } 1240 1241 func attacherKey(target, containerID string) string { 1242 return containerID + ":" + target 1243 } 1244 1245 // UpdateAttachment signals the attachment config to the attachment 1246 // waiter who is trying to start or attach the container to the 1247 // network. 1248 func (c *Cluster) UpdateAttachment(target, containerID string, config *network.NetworkingConfig) error { 1249 c.RLock() 1250 attacher, ok := c.attachers[attacherKey(target, containerID)] 1251 c.RUnlock() 1252 if !ok || attacher == nil { 1253 return fmt.Errorf("could not find attacher for container %s to network %s", containerID, target) 1254 } 1255 1256 attacher.attachWaitCh <- config 1257 close(attacher.attachWaitCh) 1258 return nil 1259 } 1260 1261 // WaitForDetachment waits for the container to stop or detach from 1262 // the network. 1263 func (c *Cluster) WaitForDetachment(ctx context.Context, networkName, networkID, taskID, containerID string) error { 1264 c.RLock() 1265 attacher, ok := c.attachers[attacherKey(networkName, containerID)] 1266 if !ok { 1267 attacher, ok = c.attachers[attacherKey(networkID, containerID)] 1268 } 1269 if c.node == nil || c.node.Agent() == nil { 1270 c.RUnlock() 1271 return fmt.Errorf("invalid cluster node while waiting for detachment") 1272 } 1273 1274 agent := c.node.Agent() 1275 c.RUnlock() 1276 1277 if ok && attacher != nil && 1278 attacher.detachWaitCh != nil && 1279 attacher.attachCompleteCh != nil { 1280 // Attachment may be in progress still so wait for 1281 // attachment to complete. 1282 select { 1283 case <-attacher.attachCompleteCh: 1284 case <-ctx.Done(): 1285 return ctx.Err() 1286 } 1287 1288 if attacher.taskID == taskID { 1289 select { 1290 case <-attacher.detachWaitCh: 1291 case <-ctx.Done(): 1292 return ctx.Err() 1293 } 1294 } 1295 } 1296 1297 return agent.ResourceAllocator().DetachNetwork(ctx, taskID) 1298 } 1299 1300 // AttachNetwork generates an attachment request towards the manager. 1301 func (c *Cluster) AttachNetwork(target string, containerID string, addresses []string) (*network.NetworkingConfig, error) { 1302 aKey := attacherKey(target, containerID) 1303 c.Lock() 1304 if c.node == nil || c.node.Agent() == nil { 1305 c.Unlock() 1306 return nil, fmt.Errorf("invalid cluster node while attaching to network") 1307 } 1308 if attacher, ok := c.attachers[aKey]; ok { 1309 c.Unlock() 1310 return attacher.config, nil 1311 } 1312 1313 agent := c.node.Agent() 1314 attachWaitCh := make(chan *network.NetworkingConfig) 1315 detachWaitCh := make(chan struct{}) 1316 attachCompleteCh := make(chan struct{}) 1317 c.attachers[aKey] = &attacher{ 1318 attachWaitCh: attachWaitCh, 1319 attachCompleteCh: attachCompleteCh, 1320 detachWaitCh: detachWaitCh, 1321 } 1322 c.Unlock() 1323 1324 ctx, cancel := c.getRequestContext() 1325 defer cancel() 1326 1327 taskID, err := agent.ResourceAllocator().AttachNetwork(ctx, containerID, target, addresses) 1328 if err != nil { 1329 c.Lock() 1330 delete(c.attachers, aKey) 1331 c.Unlock() 1332 return nil, fmt.Errorf("Could not attach to network %s: %v", target, err) 1333 } 1334 1335 c.Lock() 1336 c.attachers[aKey].taskID = taskID 1337 close(attachCompleteCh) 1338 c.Unlock() 1339 1340 logrus.Debugf("Successfully attached to network %s with tid %s", target, taskID) 1341 1342 var config *network.NetworkingConfig 1343 select { 1344 case config = <-attachWaitCh: 1345 case <-ctx.Done(): 1346 return nil, fmt.Errorf("attaching to network failed, make sure your network options are correct and check manager logs: %v", ctx.Err()) 1347 } 1348 1349 c.Lock() 1350 c.attachers[aKey].config = config 1351 c.Unlock() 1352 return config, nil 1353 } 1354 1355 // DetachNetwork unblocks the waiters waiting on WaitForDetachment so 1356 // that a request to detach can be generated towards the manager. 1357 func (c *Cluster) DetachNetwork(target string, containerID string) error { 1358 aKey := attacherKey(target, containerID) 1359 1360 c.Lock() 1361 attacher, ok := c.attachers[aKey] 1362 delete(c.attachers, aKey) 1363 c.Unlock() 1364 1365 if !ok { 1366 return fmt.Errorf("could not find network attachment for container %s to network %s", containerID, target) 1367 } 1368 1369 close(attacher.detachWaitCh) 1370 return nil 1371 } 1372 1373 // CreateNetwork creates a new cluster managed network. 1374 func (c *Cluster) CreateNetwork(s apitypes.NetworkCreateRequest) (string, error) { 1375 c.RLock() 1376 defer c.RUnlock() 1377 1378 if !c.isActiveManager() { 1379 return "", c.errNoManager() 1380 } 1381 1382 if runconfig.IsPreDefinedNetwork(s.Name) { 1383 err := fmt.Errorf("%s is a pre-defined network and cannot be created", s.Name) 1384 return "", errors.NewRequestForbiddenError(err) 1385 } 1386 1387 ctx, cancel := c.getRequestContext() 1388 defer cancel() 1389 1390 networkSpec := convert.BasicNetworkCreateToGRPC(s) 1391 r, err := c.client.CreateNetwork(ctx, &swarmapi.CreateNetworkRequest{Spec: &networkSpec}) 1392 if err != nil { 1393 return "", err 1394 } 1395 1396 return r.Network.ID, nil 1397 } 1398 1399 // RemoveNetwork removes a cluster network. 1400 func (c *Cluster) RemoveNetwork(input string) error { 1401 c.RLock() 1402 defer c.RUnlock() 1403 1404 if !c.isActiveManager() { 1405 return c.errNoManager() 1406 } 1407 1408 ctx, cancel := c.getRequestContext() 1409 defer cancel() 1410 1411 network, err := getNetwork(ctx, c.client, input) 1412 if err != nil { 1413 return err 1414 } 1415 1416 if _, err := c.client.RemoveNetwork(ctx, &swarmapi.RemoveNetworkRequest{NetworkID: network.ID}); err != nil { 1417 return err 1418 } 1419 return nil 1420 } 1421 1422 func (c *Cluster) populateNetworkID(ctx context.Context, client swarmapi.ControlClient, s *types.ServiceSpec) error { 1423 // Always prefer NetworkAttachmentConfigs from TaskTemplate 1424 // but fallback to service spec for backward compatibility 1425 networks := s.TaskTemplate.Networks 1426 if len(networks) == 0 { 1427 networks = s.Networks 1428 } 1429 1430 for i, n := range networks { 1431 apiNetwork, err := getNetwork(ctx, client, n.Target) 1432 if err != nil { 1433 if ln, _ := c.config.Backend.FindNetwork(n.Target); ln != nil && !ln.Info().Dynamic() { 1434 err = fmt.Errorf("network %s is not eligible for docker services", ln.Name()) 1435 return errors.NewRequestForbiddenError(err) 1436 } 1437 return err 1438 } 1439 networks[i].Target = apiNetwork.ID 1440 } 1441 return nil 1442 } 1443 1444 func getNetwork(ctx context.Context, c swarmapi.ControlClient, input string) (*swarmapi.Network, error) { 1445 // GetNetwork to match via full ID. 1446 rg, err := c.GetNetwork(ctx, &swarmapi.GetNetworkRequest{NetworkID: input}) 1447 if err != nil { 1448 // If any error (including NotFound), ListNetworks to match via ID prefix and full name. 1449 rl, err := c.ListNetworks(ctx, &swarmapi.ListNetworksRequest{Filters: &swarmapi.ListNetworksRequest_Filters{Names: []string{input}}}) 1450 if err != nil || len(rl.Networks) == 0 { 1451 rl, err = c.ListNetworks(ctx, &swarmapi.ListNetworksRequest{Filters: &swarmapi.ListNetworksRequest_Filters{IDPrefixes: []string{input}}}) 1452 } 1453 1454 if err != nil { 1455 return nil, err 1456 } 1457 1458 if len(rl.Networks) == 0 { 1459 return nil, fmt.Errorf("network %s not found", input) 1460 } 1461 1462 if l := len(rl.Networks); l > 1 { 1463 return nil, fmt.Errorf("network %s is ambiguous (%d matches found)", input, l) 1464 } 1465 1466 return rl.Networks[0], nil 1467 } 1468 return rg.Network, nil 1469 } 1470 1471 // Cleanup stops active swarm node. This is run before daemon shutdown. 1472 func (c *Cluster) Cleanup() { 1473 c.Lock() 1474 node := c.node 1475 if node == nil { 1476 c.Unlock() 1477 return 1478 } 1479 defer c.Unlock() 1480 if c.isActiveManager() { 1481 active, reachable, unreachable, err := c.managerStats() 1482 if err == nil { 1483 singlenode := active && isLastManager(reachable, unreachable) 1484 if active && !singlenode && removingManagerCausesLossOfQuorum(reachable, unreachable) { 1485 logrus.Errorf("Leaving cluster with %v managers left out of %v. Raft quorum will be lost.", reachable-1, reachable+unreachable) 1486 } 1487 } 1488 } 1489 c.stopNode() 1490 } 1491 1492 func (c *Cluster) managerStats() (current bool, reachable int, unreachable int, err error) { 1493 ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) 1494 defer cancel() 1495 nodes, err := c.client.ListNodes(ctx, &swarmapi.ListNodesRequest{}) 1496 if err != nil { 1497 return false, 0, 0, err 1498 } 1499 for _, n := range nodes.Nodes { 1500 if n.ManagerStatus != nil { 1501 if n.ManagerStatus.Reachability == swarmapi.RaftMemberStatus_REACHABLE { 1502 reachable++ 1503 if n.ID == c.node.NodeID() { 1504 current = true 1505 } 1506 } 1507 if n.ManagerStatus.Reachability == swarmapi.RaftMemberStatus_UNREACHABLE { 1508 unreachable++ 1509 } 1510 } 1511 } 1512 return 1513 } 1514 1515 func validateAndSanitizeInitRequest(req *types.InitRequest) error { 1516 var err error 1517 req.ListenAddr, err = validateAddr(req.ListenAddr) 1518 if err != nil { 1519 return fmt.Errorf("invalid ListenAddr %q: %v", req.ListenAddr, err) 1520 } 1521 1522 return nil 1523 } 1524 1525 func validateAndSanitizeJoinRequest(req *types.JoinRequest) error { 1526 var err error 1527 req.ListenAddr, err = validateAddr(req.ListenAddr) 1528 if err != nil { 1529 return fmt.Errorf("invalid ListenAddr %q: %v", req.ListenAddr, err) 1530 } 1531 if len(req.RemoteAddrs) == 0 { 1532 return fmt.Errorf("at least 1 RemoteAddr is required to join") 1533 } 1534 for i := range req.RemoteAddrs { 1535 req.RemoteAddrs[i], err = validateAddr(req.RemoteAddrs[i]) 1536 if err != nil { 1537 return fmt.Errorf("invalid remoteAddr %q: %v", req.RemoteAddrs[i], err) 1538 } 1539 } 1540 return nil 1541 } 1542 1543 func validateAddr(addr string) (string, error) { 1544 if addr == "" { 1545 return addr, fmt.Errorf("invalid empty address") 1546 } 1547 newaddr, err := opts.ParseTCPAddr(addr, defaultAddr) 1548 if err != nil { 1549 return addr, nil 1550 } 1551 return strings.TrimPrefix(newaddr, "tcp://"), nil 1552 } 1553 1554 func initClusterSpec(node *node, spec types.Spec) error { 1555 ctx, _ := context.WithTimeout(context.Background(), 5*time.Second) 1556 for conn := range node.ListenControlSocket(ctx) { 1557 if ctx.Err() != nil { 1558 return ctx.Err() 1559 } 1560 if conn != nil { 1561 client := swarmapi.NewControlClient(conn) 1562 var cluster *swarmapi.Cluster 1563 for i := 0; ; i++ { 1564 lcr, err := client.ListClusters(ctx, &swarmapi.ListClustersRequest{}) 1565 if err != nil { 1566 return fmt.Errorf("error on listing clusters: %v", err) 1567 } 1568 if len(lcr.Clusters) == 0 { 1569 if i < 10 { 1570 time.Sleep(200 * time.Millisecond) 1571 continue 1572 } 1573 return fmt.Errorf("empty list of clusters was returned") 1574 } 1575 cluster = lcr.Clusters[0] 1576 break 1577 } 1578 // In init, we take the initial default values from swarmkit, and merge 1579 // any non nil or 0 value from spec to GRPC spec. This will leave the 1580 // default value alone. 1581 // Note that this is different from Update(), as in Update() we expect 1582 // user to specify the complete spec of the cluster (as they already know 1583 // the existing one and knows which field to update) 1584 clusterSpec, err := convert.MergeSwarmSpecToGRPC(spec, cluster.Spec) 1585 if err != nil { 1586 return fmt.Errorf("error updating cluster settings: %v", err) 1587 } 1588 _, err = client.UpdateCluster(ctx, &swarmapi.UpdateClusterRequest{ 1589 ClusterID: cluster.ID, 1590 ClusterVersion: &cluster.Meta.Version, 1591 Spec: &clusterSpec, 1592 }) 1593 if err != nil { 1594 return fmt.Errorf("error updating cluster settings: %v", err) 1595 } 1596 return nil 1597 } 1598 } 1599 return ctx.Err() 1600 }