github.com/vieux/docker@v0.6.3-0.20161004191708-e097c2a938c7/daemon/cluster/cluster.go (about) 1 package cluster 2 3 import ( 4 "encoding/json" 5 "fmt" 6 "io/ioutil" 7 "net" 8 "os" 9 "path/filepath" 10 "strings" 11 "sync" 12 "time" 13 14 "google.golang.org/grpc" 15 16 "github.com/Sirupsen/logrus" 17 "github.com/docker/docker/api/errors" 18 apitypes "github.com/docker/docker/api/types" 19 "github.com/docker/docker/api/types/filters" 20 "github.com/docker/docker/api/types/network" 21 types "github.com/docker/docker/api/types/swarm" 22 "github.com/docker/docker/daemon/cluster/convert" 23 executorpkg "github.com/docker/docker/daemon/cluster/executor" 24 "github.com/docker/docker/daemon/cluster/executor/container" 25 "github.com/docker/docker/opts" 26 "github.com/docker/docker/pkg/ioutils" 27 "github.com/docker/docker/pkg/signal" 28 "github.com/docker/docker/runconfig" 29 swarmagent "github.com/docker/swarmkit/agent" 30 swarmapi "github.com/docker/swarmkit/api" 31 "golang.org/x/net/context" 32 ) 33 34 const swarmDirName = "swarm" 35 const controlSocket = "control.sock" 36 const swarmConnectTimeout = 20 * time.Second 37 const swarmRequestTimeout = 20 * time.Second 38 const stateFile = "docker-state.json" 39 const defaultAddr = "0.0.0.0:2377" 40 41 const ( 42 initialReconnectDelay = 100 * time.Millisecond 43 maxReconnectDelay = 30 * time.Second 44 ) 45 46 // ErrNoSwarm is returned on leaving a cluster that was never initialized 47 var ErrNoSwarm = fmt.Errorf("This node is not part of a swarm") 48 49 // ErrSwarmExists is returned on initialize or join request for a cluster that has already been activated 50 var ErrSwarmExists = fmt.Errorf("This node is already part of a swarm. Use \"docker swarm leave\" to leave this swarm and join another one.") 51 52 // ErrPendingSwarmExists is returned on initialize or join request for a cluster that is already processing a similar request but has not succeeded yet. 53 var ErrPendingSwarmExists = fmt.Errorf("This node is processing an existing join request that has not succeeded yet. Use \"docker swarm leave\" to cancel the current request.") 54 55 // ErrSwarmJoinTimeoutReached is returned when cluster join could not complete before timeout was reached. 56 var ErrSwarmJoinTimeoutReached = fmt.Errorf("Timeout was reached before node was joined. The attempt to join the swarm will continue in the background. Use the \"docker info\" command to see the current swarm status of your node.") 57 58 type state struct { 59 // LocalAddr is this machine's local IP or hostname, if specified. 60 LocalAddr string 61 // RemoteAddr is the address that was given to "swarm join. It is used 62 // to find LocalAddr if necessary. 63 RemoteAddr string 64 // ListenAddr is the address we bind to, including a port. 65 ListenAddr string 66 // AdvertiseAddr is the address other nodes should connect to, 67 // including a port. 68 AdvertiseAddr string 69 } 70 71 // NetworkSubnetsProvider exposes functions for retrieving the subnets 72 // of networks managed by Docker, so they can be filtered. 73 type NetworkSubnetsProvider interface { 74 V4Subnets() []net.IPNet 75 V6Subnets() []net.IPNet 76 } 77 78 // Config provides values for Cluster. 79 type Config struct { 80 Root string 81 Name string 82 Backend executorpkg.Backend 83 NetworkSubnetsProvider NetworkSubnetsProvider 84 85 // DefaultAdvertiseAddr is the default host/IP or network interface to use 86 // if no AdvertiseAddr value is specified. 87 DefaultAdvertiseAddr string 88 89 // path to store runtime state, such as the swarm control socket 90 RuntimeRoot string 91 } 92 93 // Cluster provides capabilities to participate in a cluster as a worker or a 94 // manager. 95 type Cluster struct { 96 sync.RWMutex 97 *node 98 root string 99 runtimeRoot string 100 config Config 101 configEvent chan struct{} // todo: make this array and goroutine safe 102 localAddr string 103 actualLocalAddr string // after resolution, not persisted 104 remoteAddr string 105 listenAddr string 106 advertiseAddr string 107 stop bool 108 err error 109 cancelDelay func() 110 attachers map[string]*attacher 111 } 112 113 // attacher manages the in-memory attachment state of a container 114 // attachment to a global scope network managed by swarm manager. It 115 // helps in identifying the attachment ID via the taskID and the 116 // corresponding attachment configuration obtained from the manager. 117 type attacher struct { 118 taskID string 119 config *network.NetworkingConfig 120 attachWaitCh chan *network.NetworkingConfig 121 attachCompleteCh chan struct{} 122 detachWaitCh chan struct{} 123 } 124 125 type node struct { 126 *swarmagent.Node 127 done chan struct{} 128 ready bool 129 conn *grpc.ClientConn 130 client swarmapi.ControlClient 131 reconnectDelay time.Duration 132 } 133 134 // New creates a new Cluster instance using provided config. 135 func New(config Config) (*Cluster, error) { 136 root := filepath.Join(config.Root, swarmDirName) 137 if err := os.MkdirAll(root, 0700); err != nil { 138 return nil, err 139 } 140 if config.RuntimeRoot == "" { 141 config.RuntimeRoot = root 142 } 143 if err := os.MkdirAll(config.RuntimeRoot, 0700); err != nil { 144 return nil, err 145 } 146 c := &Cluster{ 147 root: root, 148 config: config, 149 configEvent: make(chan struct{}, 10), 150 runtimeRoot: config.RuntimeRoot, 151 attachers: make(map[string]*attacher), 152 } 153 154 st, err := c.loadState() 155 if err != nil { 156 if os.IsNotExist(err) { 157 return c, nil 158 } 159 return nil, err 160 } 161 162 n, err := c.startNewNode(false, st.LocalAddr, st.RemoteAddr, st.ListenAddr, st.AdvertiseAddr, "", "") 163 if err != nil { 164 return nil, err 165 } 166 167 select { 168 case <-time.After(swarmConnectTimeout): 169 logrus.Errorf("swarm component could not be started before timeout was reached") 170 case <-n.Ready(): 171 case <-n.done: 172 return nil, fmt.Errorf("swarm component could not be started: %v", c.err) 173 } 174 go c.reconnectOnFailure(n) 175 return c, nil 176 } 177 178 func (c *Cluster) loadState() (*state, error) { 179 dt, err := ioutil.ReadFile(filepath.Join(c.root, stateFile)) 180 if err != nil { 181 return nil, err 182 } 183 // missing certificate means no actual state to restore from 184 if _, err := os.Stat(filepath.Join(c.root, "certificates/swarm-node.crt")); err != nil { 185 if os.IsNotExist(err) { 186 c.clearState() 187 } 188 return nil, err 189 } 190 var st state 191 if err := json.Unmarshal(dt, &st); err != nil { 192 return nil, err 193 } 194 return &st, nil 195 } 196 197 func (c *Cluster) saveState() error { 198 dt, err := json.Marshal(state{ 199 LocalAddr: c.localAddr, 200 RemoteAddr: c.remoteAddr, 201 ListenAddr: c.listenAddr, 202 AdvertiseAddr: c.advertiseAddr, 203 }) 204 if err != nil { 205 return err 206 } 207 return ioutils.AtomicWriteFile(filepath.Join(c.root, stateFile), dt, 0600) 208 } 209 210 func (c *Cluster) reconnectOnFailure(n *node) { 211 for { 212 <-n.done 213 c.Lock() 214 if c.stop || c.node != nil { 215 c.Unlock() 216 return 217 } 218 n.reconnectDelay *= 2 219 if n.reconnectDelay > maxReconnectDelay { 220 n.reconnectDelay = maxReconnectDelay 221 } 222 logrus.Warnf("Restarting swarm in %.2f seconds", n.reconnectDelay.Seconds()) 223 delayCtx, cancel := context.WithTimeout(context.Background(), n.reconnectDelay) 224 c.cancelDelay = cancel 225 c.Unlock() 226 <-delayCtx.Done() 227 if delayCtx.Err() != context.DeadlineExceeded { 228 return 229 } 230 c.Lock() 231 if c.node != nil { 232 c.Unlock() 233 return 234 } 235 var err error 236 n, err = c.startNewNode(false, c.localAddr, c.getRemoteAddress(), c.listenAddr, c.advertiseAddr, c.getRemoteAddress(), "") 237 if err != nil { 238 c.err = err 239 close(n.done) 240 } 241 c.Unlock() 242 } 243 } 244 245 func (c *Cluster) startNewNode(forceNewCluster bool, localAddr, remoteAddr, listenAddr, advertiseAddr, joinAddr, joinToken string) (*node, error) { 246 if err := c.config.Backend.IsSwarmCompatible(); err != nil { 247 return nil, err 248 } 249 250 actualLocalAddr := localAddr 251 if actualLocalAddr == "" { 252 // If localAddr was not specified, resolve it automatically 253 // based on the route to joinAddr. localAddr can only be left 254 // empty on "join". 255 listenHost, _, err := net.SplitHostPort(listenAddr) 256 if err != nil { 257 return nil, fmt.Errorf("could not parse listen address: %v", err) 258 } 259 260 listenAddrIP := net.ParseIP(listenHost) 261 if listenAddrIP == nil || !listenAddrIP.IsUnspecified() { 262 actualLocalAddr = listenHost 263 } else { 264 if remoteAddr == "" { 265 // Should never happen except using swarms created by 266 // old versions that didn't save remoteAddr. 267 remoteAddr = "8.8.8.8:53" 268 } 269 conn, err := net.Dial("udp", remoteAddr) 270 if err != nil { 271 return nil, fmt.Errorf("could not find local IP address: %v", err) 272 } 273 localHostPort := conn.LocalAddr().String() 274 actualLocalAddr, _, _ = net.SplitHostPort(localHostPort) 275 conn.Close() 276 } 277 } 278 279 c.node = nil 280 c.cancelDelay = nil 281 c.stop = false 282 n, err := swarmagent.NewNode(&swarmagent.NodeConfig{ 283 Hostname: c.config.Name, 284 ForceNewCluster: forceNewCluster, 285 ListenControlAPI: filepath.Join(c.runtimeRoot, controlSocket), 286 ListenRemoteAPI: listenAddr, 287 AdvertiseRemoteAPI: advertiseAddr, 288 JoinAddr: joinAddr, 289 StateDir: c.root, 290 JoinToken: joinToken, 291 Executor: container.NewExecutor(c.config.Backend), 292 HeartbeatTick: 1, 293 ElectionTick: 3, 294 }) 295 if err != nil { 296 return nil, err 297 } 298 ctx := context.Background() 299 if err := n.Start(ctx); err != nil { 300 return nil, err 301 } 302 node := &node{ 303 Node: n, 304 done: make(chan struct{}), 305 reconnectDelay: initialReconnectDelay, 306 } 307 c.node = node 308 c.localAddr = localAddr 309 c.actualLocalAddr = actualLocalAddr // not saved 310 c.remoteAddr = remoteAddr 311 c.listenAddr = listenAddr 312 c.advertiseAddr = advertiseAddr 313 c.saveState() 314 315 c.config.Backend.SetClusterProvider(c) 316 go func() { 317 err := n.Err(ctx) 318 if err != nil { 319 logrus.Errorf("cluster exited with error: %v", err) 320 } 321 c.Lock() 322 c.node = nil 323 c.err = err 324 c.Unlock() 325 close(node.done) 326 }() 327 328 go func() { 329 select { 330 case <-n.Ready(): 331 c.Lock() 332 node.ready = true 333 c.err = nil 334 c.Unlock() 335 case <-ctx.Done(): 336 } 337 c.configEvent <- struct{}{} 338 }() 339 340 go func() { 341 for conn := range n.ListenControlSocket(ctx) { 342 c.Lock() 343 if node.conn != conn { 344 if conn == nil { 345 node.client = nil 346 } else { 347 node.client = swarmapi.NewControlClient(conn) 348 } 349 } 350 node.conn = conn 351 c.Unlock() 352 c.configEvent <- struct{}{} 353 } 354 }() 355 356 return node, nil 357 } 358 359 // Init initializes new cluster from user provided request. 360 func (c *Cluster) Init(req types.InitRequest) (string, error) { 361 c.Lock() 362 if node := c.node; node != nil { 363 if !req.ForceNewCluster { 364 c.Unlock() 365 return "", ErrSwarmExists 366 } 367 if err := c.stopNode(); err != nil { 368 c.Unlock() 369 return "", err 370 } 371 } 372 373 if err := validateAndSanitizeInitRequest(&req); err != nil { 374 c.Unlock() 375 return "", err 376 } 377 378 listenHost, listenPort, err := resolveListenAddr(req.ListenAddr) 379 if err != nil { 380 c.Unlock() 381 return "", err 382 } 383 384 advertiseHost, advertisePort, err := c.resolveAdvertiseAddr(req.AdvertiseAddr, listenPort) 385 if err != nil { 386 c.Unlock() 387 return "", err 388 } 389 390 localAddr := listenHost 391 392 // If the advertise address is not one of the system's 393 // addresses, we also require a listen address. 394 listenAddrIP := net.ParseIP(listenHost) 395 if listenAddrIP != nil && listenAddrIP.IsUnspecified() { 396 advertiseIP := net.ParseIP(advertiseHost) 397 if advertiseIP == nil { 398 // not an IP 399 c.Unlock() 400 return "", errMustSpecifyListenAddr 401 } 402 403 systemIPs := listSystemIPs() 404 405 found := false 406 for _, systemIP := range systemIPs { 407 if systemIP.Equal(advertiseIP) { 408 found = true 409 break 410 } 411 } 412 if !found { 413 c.Unlock() 414 return "", errMustSpecifyListenAddr 415 } 416 localAddr = advertiseIP.String() 417 } 418 419 // todo: check current state existing 420 n, err := c.startNewNode(req.ForceNewCluster, localAddr, "", net.JoinHostPort(listenHost, listenPort), net.JoinHostPort(advertiseHost, advertisePort), "", "") 421 if err != nil { 422 c.Unlock() 423 return "", err 424 } 425 c.Unlock() 426 427 select { 428 case <-n.Ready(): 429 if err := initClusterSpec(n, req.Spec); err != nil { 430 return "", err 431 } 432 go c.reconnectOnFailure(n) 433 return n.NodeID(), nil 434 case <-n.done: 435 c.RLock() 436 defer c.RUnlock() 437 if !req.ForceNewCluster { // if failure on first attempt don't keep state 438 if err := c.clearState(); err != nil { 439 return "", err 440 } 441 } 442 return "", c.err 443 } 444 } 445 446 // Join makes current Cluster part of an existing swarm cluster. 447 func (c *Cluster) Join(req types.JoinRequest) error { 448 c.Lock() 449 if node := c.node; node != nil { 450 c.Unlock() 451 return ErrSwarmExists 452 } 453 if err := validateAndSanitizeJoinRequest(&req); err != nil { 454 c.Unlock() 455 return err 456 } 457 458 listenHost, listenPort, err := resolveListenAddr(req.ListenAddr) 459 if err != nil { 460 c.Unlock() 461 return err 462 } 463 464 var advertiseAddr string 465 if req.AdvertiseAddr != "" { 466 advertiseHost, advertisePort, err := c.resolveAdvertiseAddr(req.AdvertiseAddr, listenPort) 467 // For joining, we don't need to provide an advertise address, 468 // since the remote side can detect it. 469 if err == nil { 470 advertiseAddr = net.JoinHostPort(advertiseHost, advertisePort) 471 } 472 } 473 474 // todo: check current state existing 475 n, err := c.startNewNode(false, "", req.RemoteAddrs[0], net.JoinHostPort(listenHost, listenPort), advertiseAddr, req.RemoteAddrs[0], req.JoinToken) 476 if err != nil { 477 c.Unlock() 478 return err 479 } 480 c.Unlock() 481 482 select { 483 case <-time.After(swarmConnectTimeout): 484 // attempt to connect will continue in background, also reconnecting 485 go c.reconnectOnFailure(n) 486 return ErrSwarmJoinTimeoutReached 487 case <-n.Ready(): 488 go c.reconnectOnFailure(n) 489 return nil 490 case <-n.done: 491 c.RLock() 492 defer c.RUnlock() 493 return c.err 494 } 495 } 496 497 // stopNode is a helper that stops the active c.node and waits until it has 498 // shut down. Call while keeping the cluster lock. 499 func (c *Cluster) stopNode() error { 500 if c.node == nil { 501 return nil 502 } 503 c.stop = true 504 if c.cancelDelay != nil { 505 c.cancelDelay() 506 c.cancelDelay = nil 507 } 508 node := c.node 509 ctx, cancel := context.WithTimeout(context.Background(), 15*time.Second) 510 defer cancel() 511 // TODO: can't hold lock on stop because it calls back to network 512 c.Unlock() 513 defer c.Lock() 514 if err := node.Stop(ctx); err != nil && !strings.Contains(err.Error(), "context canceled") { 515 return err 516 } 517 <-node.done 518 return nil 519 } 520 521 func removingManagerCausesLossOfQuorum(reachable, unreachable int) bool { 522 return reachable-2 <= unreachable 523 } 524 525 func isLastManager(reachable, unreachable int) bool { 526 return reachable == 1 && unreachable == 0 527 } 528 529 // Leave shuts down Cluster and removes current state. 530 func (c *Cluster) Leave(force bool) error { 531 c.Lock() 532 node := c.node 533 if node == nil { 534 c.Unlock() 535 return ErrNoSwarm 536 } 537 538 if node.Manager() != nil && !force { 539 msg := "You are attempting to leave the swarm on a node that is participating as a manager. " 540 if c.isActiveManager() { 541 active, reachable, unreachable, err := c.managerStats() 542 if err == nil { 543 if active && removingManagerCausesLossOfQuorum(reachable, unreachable) { 544 if isLastManager(reachable, unreachable) { 545 msg += "Removing the last manager erases all current state of the swarm. Use `--force` to ignore this message. " 546 c.Unlock() 547 return fmt.Errorf(msg) 548 } 549 msg += fmt.Sprintf("Removing this node leaves %v managers out of %v. Without a Raft quorum your swarm will be inaccessible. ", reachable-1, reachable+unreachable) 550 } 551 } 552 } else { 553 msg += "Doing so may lose the consensus of your cluster. " 554 } 555 556 msg += "The only way to restore a swarm that has lost consensus is to reinitialize it with `--force-new-cluster`. Use `--force` to suppress this message." 557 c.Unlock() 558 return fmt.Errorf(msg) 559 } 560 if err := c.stopNode(); err != nil { 561 logrus.Errorf("failed to shut down cluster node: %v", err) 562 signal.DumpStacks("") 563 c.Unlock() 564 return err 565 } 566 c.Unlock() 567 if nodeID := node.NodeID(); nodeID != "" { 568 nodeContainers, err := c.listContainerForNode(nodeID) 569 if err != nil { 570 return err 571 } 572 for _, id := range nodeContainers { 573 if err := c.config.Backend.ContainerRm(id, &apitypes.ContainerRmConfig{ForceRemove: true}); err != nil { 574 logrus.Errorf("error removing %v: %v", id, err) 575 } 576 } 577 } 578 c.configEvent <- struct{}{} 579 // todo: cleanup optional? 580 if err := c.clearState(); err != nil { 581 return err 582 } 583 return nil 584 } 585 586 func (c *Cluster) listContainerForNode(nodeID string) ([]string, error) { 587 var ids []string 588 filters := filters.NewArgs() 589 filters.Add("label", fmt.Sprintf("com.docker.swarm.node.id=%s", nodeID)) 590 containers, err := c.config.Backend.Containers(&apitypes.ContainerListOptions{ 591 Filter: filters, 592 }) 593 if err != nil { 594 return []string{}, err 595 } 596 for _, c := range containers { 597 ids = append(ids, c.ID) 598 } 599 return ids, nil 600 } 601 602 func (c *Cluster) clearState() error { 603 // todo: backup this data instead of removing? 604 if err := os.RemoveAll(c.root); err != nil { 605 return err 606 } 607 if err := os.MkdirAll(c.root, 0700); err != nil { 608 return err 609 } 610 c.config.Backend.SetClusterProvider(nil) 611 return nil 612 } 613 614 func (c *Cluster) getRequestContext() (context.Context, func()) { // TODO: not needed when requests don't block on qourum lost 615 return context.WithTimeout(context.Background(), swarmRequestTimeout) 616 } 617 618 // Inspect retrieves the configuration properties of a managed swarm cluster. 619 func (c *Cluster) Inspect() (types.Swarm, error) { 620 c.RLock() 621 defer c.RUnlock() 622 623 if !c.isActiveManager() { 624 return types.Swarm{}, c.errNoManager() 625 } 626 627 ctx, cancel := c.getRequestContext() 628 defer cancel() 629 630 swarm, err := getSwarm(ctx, c.client) 631 if err != nil { 632 return types.Swarm{}, err 633 } 634 635 if err != nil { 636 return types.Swarm{}, err 637 } 638 639 return convert.SwarmFromGRPC(*swarm), nil 640 } 641 642 // Update updates configuration of a managed swarm cluster. 643 func (c *Cluster) Update(version uint64, spec types.Spec, flags types.UpdateFlags) error { 644 c.RLock() 645 defer c.RUnlock() 646 647 if !c.isActiveManager() { 648 return c.errNoManager() 649 } 650 651 ctx, cancel := c.getRequestContext() 652 defer cancel() 653 654 swarm, err := getSwarm(ctx, c.client) 655 if err != nil { 656 return err 657 } 658 659 // In update, client should provide the complete spec of the swarm, including 660 // Name and Labels. If a field is specified with 0 or nil, then the default value 661 // will be used to swarmkit. 662 clusterSpec, err := convert.SwarmSpecToGRPC(spec) 663 if err != nil { 664 return err 665 } 666 667 _, err = c.client.UpdateCluster( 668 ctx, 669 &swarmapi.UpdateClusterRequest{ 670 ClusterID: swarm.ID, 671 Spec: &clusterSpec, 672 ClusterVersion: &swarmapi.Version{ 673 Index: version, 674 }, 675 Rotation: swarmapi.JoinTokenRotation{ 676 RotateWorkerToken: flags.RotateWorkerToken, 677 RotateManagerToken: flags.RotateManagerToken, 678 }, 679 }, 680 ) 681 return err 682 } 683 684 // IsManager returns true if Cluster is participating as a manager. 685 func (c *Cluster) IsManager() bool { 686 c.RLock() 687 defer c.RUnlock() 688 return c.isActiveManager() 689 } 690 691 // IsAgent returns true if Cluster is participating as a worker/agent. 692 func (c *Cluster) IsAgent() bool { 693 c.RLock() 694 defer c.RUnlock() 695 return c.node != nil && c.ready 696 } 697 698 // GetLocalAddress returns the local address. 699 func (c *Cluster) GetLocalAddress() string { 700 c.RLock() 701 defer c.RUnlock() 702 return c.actualLocalAddr 703 } 704 705 // GetListenAddress returns the listen address. 706 func (c *Cluster) GetListenAddress() string { 707 c.RLock() 708 defer c.RUnlock() 709 return c.listenAddr 710 } 711 712 // GetAdvertiseAddress returns the remotely reachable address of this node. 713 func (c *Cluster) GetAdvertiseAddress() string { 714 c.RLock() 715 defer c.RUnlock() 716 if c.advertiseAddr != "" { 717 advertiseHost, _, _ := net.SplitHostPort(c.advertiseAddr) 718 return advertiseHost 719 } 720 return c.actualLocalAddr 721 } 722 723 // GetRemoteAddress returns a known advertise address of a remote manager if 724 // available. 725 // todo: change to array/connect with info 726 func (c *Cluster) GetRemoteAddress() string { 727 c.RLock() 728 defer c.RUnlock() 729 return c.getRemoteAddress() 730 } 731 732 func (c *Cluster) getRemoteAddress() string { 733 if c.node == nil { 734 return "" 735 } 736 nodeID := c.node.NodeID() 737 for _, r := range c.node.Remotes() { 738 if r.NodeID != nodeID { 739 return r.Addr 740 } 741 } 742 return "" 743 } 744 745 // ListenClusterEvents returns a channel that receives messages on cluster 746 // participation changes. 747 // todo: make cancelable and accessible to multiple callers 748 func (c *Cluster) ListenClusterEvents() <-chan struct{} { 749 return c.configEvent 750 } 751 752 // Info returns information about the current cluster state. 753 func (c *Cluster) Info() types.Info { 754 info := types.Info{ 755 NodeAddr: c.GetAdvertiseAddress(), 756 } 757 758 c.RLock() 759 defer c.RUnlock() 760 761 if c.node == nil { 762 info.LocalNodeState = types.LocalNodeStateInactive 763 if c.cancelDelay != nil { 764 info.LocalNodeState = types.LocalNodeStateError 765 } 766 } else { 767 info.LocalNodeState = types.LocalNodeStatePending 768 if c.ready == true { 769 info.LocalNodeState = types.LocalNodeStateActive 770 } 771 } 772 if c.err != nil { 773 info.Error = c.err.Error() 774 } 775 776 ctx, cancel := c.getRequestContext() 777 defer cancel() 778 779 if c.isActiveManager() { 780 info.ControlAvailable = true 781 swarm, err := c.Inspect() 782 if err != nil { 783 info.Error = err.Error() 784 } 785 786 // Strip JoinTokens 787 info.Cluster = swarm.ClusterInfo 788 789 if r, err := c.client.ListNodes(ctx, &swarmapi.ListNodesRequest{}); err == nil { 790 info.Nodes = len(r.Nodes) 791 for _, n := range r.Nodes { 792 if n.ManagerStatus != nil { 793 info.Managers = info.Managers + 1 794 } 795 } 796 } 797 } 798 799 if c.node != nil { 800 for _, r := range c.node.Remotes() { 801 info.RemoteManagers = append(info.RemoteManagers, types.Peer{NodeID: r.NodeID, Addr: r.Addr}) 802 } 803 info.NodeID = c.node.NodeID() 804 } 805 806 return info 807 } 808 809 // isActiveManager should not be called without a read lock 810 func (c *Cluster) isActiveManager() bool { 811 return c.node != nil && c.conn != nil 812 } 813 814 // errNoManager returns error describing why manager commands can't be used. 815 // Call with read lock. 816 func (c *Cluster) errNoManager() error { 817 if c.node == nil { 818 return fmt.Errorf("This node is not a swarm manager. Use \"docker swarm init\" or \"docker swarm join\" to connect this node to swarm and try again.") 819 } 820 if c.node.Manager() != nil { 821 return fmt.Errorf("This node is not a swarm manager. Manager is being prepared or has trouble connecting to the cluster.") 822 } 823 return fmt.Errorf("This node is not a swarm manager. Worker nodes can't be used to view or modify cluster state. Please run this command on a manager node or promote the current node to a manager.") 824 } 825 826 // GetServices returns all services of a managed swarm cluster. 827 func (c *Cluster) GetServices(options apitypes.ServiceListOptions) ([]types.Service, error) { 828 c.RLock() 829 defer c.RUnlock() 830 831 if !c.isActiveManager() { 832 return nil, c.errNoManager() 833 } 834 835 filters, err := newListServicesFilters(options.Filter) 836 if err != nil { 837 return nil, err 838 } 839 ctx, cancel := c.getRequestContext() 840 defer cancel() 841 842 r, err := c.client.ListServices( 843 ctx, 844 &swarmapi.ListServicesRequest{Filters: filters}) 845 if err != nil { 846 return nil, err 847 } 848 849 services := []types.Service{} 850 851 for _, service := range r.Services { 852 services = append(services, convert.ServiceFromGRPC(*service)) 853 } 854 855 return services, nil 856 } 857 858 // CreateService creates a new service in a managed swarm cluster. 859 func (c *Cluster) CreateService(s types.ServiceSpec, encodedAuth string) (string, error) { 860 c.RLock() 861 defer c.RUnlock() 862 863 if !c.isActiveManager() { 864 return "", c.errNoManager() 865 } 866 867 ctx, cancel := c.getRequestContext() 868 defer cancel() 869 870 err := c.populateNetworkID(ctx, c.client, &s) 871 if err != nil { 872 return "", err 873 } 874 875 serviceSpec, err := convert.ServiceSpecToGRPC(s) 876 if err != nil { 877 return "", err 878 } 879 880 if encodedAuth != "" { 881 ctnr := serviceSpec.Task.GetContainer() 882 if ctnr == nil { 883 return "", fmt.Errorf("service does not use container tasks") 884 } 885 ctnr.PullOptions = &swarmapi.ContainerSpec_PullOptions{RegistryAuth: encodedAuth} 886 } 887 888 r, err := c.client.CreateService(ctx, &swarmapi.CreateServiceRequest{Spec: &serviceSpec}) 889 if err != nil { 890 return "", err 891 } 892 893 return r.Service.ID, nil 894 } 895 896 // GetService returns a service based on an ID or name. 897 func (c *Cluster) GetService(input string) (types.Service, error) { 898 c.RLock() 899 defer c.RUnlock() 900 901 if !c.isActiveManager() { 902 return types.Service{}, c.errNoManager() 903 } 904 905 ctx, cancel := c.getRequestContext() 906 defer cancel() 907 908 service, err := getService(ctx, c.client, input) 909 if err != nil { 910 return types.Service{}, err 911 } 912 return convert.ServiceFromGRPC(*service), nil 913 } 914 915 // UpdateService updates existing service to match new properties. 916 func (c *Cluster) UpdateService(serviceIDOrName string, version uint64, spec types.ServiceSpec, encodedAuth string) error { 917 c.RLock() 918 defer c.RUnlock() 919 920 if !c.isActiveManager() { 921 return c.errNoManager() 922 } 923 924 ctx, cancel := c.getRequestContext() 925 defer cancel() 926 927 err := c.populateNetworkID(ctx, c.client, &spec) 928 if err != nil { 929 return err 930 } 931 932 serviceSpec, err := convert.ServiceSpecToGRPC(spec) 933 if err != nil { 934 return err 935 } 936 937 currentService, err := getService(ctx, c.client, serviceIDOrName) 938 if err != nil { 939 return err 940 } 941 942 if encodedAuth != "" { 943 ctnr := serviceSpec.Task.GetContainer() 944 if ctnr == nil { 945 return fmt.Errorf("service does not use container tasks") 946 } 947 ctnr.PullOptions = &swarmapi.ContainerSpec_PullOptions{RegistryAuth: encodedAuth} 948 } else { 949 // this is needed because if the encodedAuth isn't being updated then we 950 // shouldn't lose it, and continue to use the one that was already present 951 ctnr := currentService.Spec.Task.GetContainer() 952 if ctnr == nil { 953 return fmt.Errorf("service does not use container tasks") 954 } 955 serviceSpec.Task.GetContainer().PullOptions = ctnr.PullOptions 956 } 957 958 _, err = c.client.UpdateService( 959 ctx, 960 &swarmapi.UpdateServiceRequest{ 961 ServiceID: currentService.ID, 962 Spec: &serviceSpec, 963 ServiceVersion: &swarmapi.Version{ 964 Index: version, 965 }, 966 }, 967 ) 968 return err 969 } 970 971 // RemoveService removes a service from a managed swarm cluster. 972 func (c *Cluster) RemoveService(input string) error { 973 c.RLock() 974 defer c.RUnlock() 975 976 if !c.isActiveManager() { 977 return c.errNoManager() 978 } 979 980 ctx, cancel := c.getRequestContext() 981 defer cancel() 982 983 service, err := getService(ctx, c.client, input) 984 if err != nil { 985 return err 986 } 987 988 if _, err := c.client.RemoveService(ctx, &swarmapi.RemoveServiceRequest{ServiceID: service.ID}); err != nil { 989 return err 990 } 991 return nil 992 } 993 994 // GetNodes returns a list of all nodes known to a cluster. 995 func (c *Cluster) GetNodes(options apitypes.NodeListOptions) ([]types.Node, error) { 996 c.RLock() 997 defer c.RUnlock() 998 999 if !c.isActiveManager() { 1000 return nil, c.errNoManager() 1001 } 1002 1003 filters, err := newListNodesFilters(options.Filter) 1004 if err != nil { 1005 return nil, err 1006 } 1007 1008 ctx, cancel := c.getRequestContext() 1009 defer cancel() 1010 1011 r, err := c.client.ListNodes( 1012 ctx, 1013 &swarmapi.ListNodesRequest{Filters: filters}) 1014 if err != nil { 1015 return nil, err 1016 } 1017 1018 nodes := []types.Node{} 1019 1020 for _, node := range r.Nodes { 1021 nodes = append(nodes, convert.NodeFromGRPC(*node)) 1022 } 1023 return nodes, nil 1024 } 1025 1026 // GetNode returns a node based on an ID or name. 1027 func (c *Cluster) GetNode(input string) (types.Node, error) { 1028 c.RLock() 1029 defer c.RUnlock() 1030 1031 if !c.isActiveManager() { 1032 return types.Node{}, c.errNoManager() 1033 } 1034 1035 ctx, cancel := c.getRequestContext() 1036 defer cancel() 1037 1038 node, err := getNode(ctx, c.client, input) 1039 if err != nil { 1040 return types.Node{}, err 1041 } 1042 return convert.NodeFromGRPC(*node), nil 1043 } 1044 1045 // UpdateNode updates existing nodes properties. 1046 func (c *Cluster) UpdateNode(nodeID string, version uint64, spec types.NodeSpec) error { 1047 c.RLock() 1048 defer c.RUnlock() 1049 1050 if !c.isActiveManager() { 1051 return c.errNoManager() 1052 } 1053 1054 nodeSpec, err := convert.NodeSpecToGRPC(spec) 1055 if err != nil { 1056 return err 1057 } 1058 1059 ctx, cancel := c.getRequestContext() 1060 defer cancel() 1061 1062 _, err = c.client.UpdateNode( 1063 ctx, 1064 &swarmapi.UpdateNodeRequest{ 1065 NodeID: nodeID, 1066 Spec: &nodeSpec, 1067 NodeVersion: &swarmapi.Version{ 1068 Index: version, 1069 }, 1070 }, 1071 ) 1072 return err 1073 } 1074 1075 // RemoveNode removes a node from a cluster 1076 func (c *Cluster) RemoveNode(input string, force bool) error { 1077 c.RLock() 1078 defer c.RUnlock() 1079 1080 if !c.isActiveManager() { 1081 return c.errNoManager() 1082 } 1083 1084 ctx, cancel := c.getRequestContext() 1085 defer cancel() 1086 1087 node, err := getNode(ctx, c.client, input) 1088 if err != nil { 1089 return err 1090 } 1091 1092 if _, err := c.client.RemoveNode(ctx, &swarmapi.RemoveNodeRequest{NodeID: node.ID, Force: force}); err != nil { 1093 return err 1094 } 1095 return nil 1096 } 1097 1098 // GetTasks returns a list of tasks matching the filter options. 1099 func (c *Cluster) GetTasks(options apitypes.TaskListOptions) ([]types.Task, error) { 1100 c.RLock() 1101 defer c.RUnlock() 1102 1103 if !c.isActiveManager() { 1104 return nil, c.errNoManager() 1105 } 1106 1107 byName := func(filter filters.Args) error { 1108 if filter.Include("service") { 1109 serviceFilters := filter.Get("service") 1110 for _, serviceFilter := range serviceFilters { 1111 service, err := c.GetService(serviceFilter) 1112 if err != nil { 1113 return err 1114 } 1115 filter.Del("service", serviceFilter) 1116 filter.Add("service", service.ID) 1117 } 1118 } 1119 if filter.Include("node") { 1120 nodeFilters := filter.Get("node") 1121 for _, nodeFilter := range nodeFilters { 1122 node, err := c.GetNode(nodeFilter) 1123 if err != nil { 1124 return err 1125 } 1126 filter.Del("node", nodeFilter) 1127 filter.Add("node", node.ID) 1128 } 1129 } 1130 return nil 1131 } 1132 1133 filters, err := newListTasksFilters(options.Filter, byName) 1134 if err != nil { 1135 return nil, err 1136 } 1137 1138 ctx, cancel := c.getRequestContext() 1139 defer cancel() 1140 1141 r, err := c.client.ListTasks( 1142 ctx, 1143 &swarmapi.ListTasksRequest{Filters: filters}) 1144 if err != nil { 1145 return nil, err 1146 } 1147 1148 tasks := []types.Task{} 1149 1150 for _, task := range r.Tasks { 1151 if task.Spec.GetContainer() != nil { 1152 tasks = append(tasks, convert.TaskFromGRPC(*task)) 1153 } 1154 } 1155 return tasks, nil 1156 } 1157 1158 // GetTask returns a task by an ID. 1159 func (c *Cluster) GetTask(input string) (types.Task, error) { 1160 c.RLock() 1161 defer c.RUnlock() 1162 1163 if !c.isActiveManager() { 1164 return types.Task{}, c.errNoManager() 1165 } 1166 1167 ctx, cancel := c.getRequestContext() 1168 defer cancel() 1169 1170 task, err := getTask(ctx, c.client, input) 1171 if err != nil { 1172 return types.Task{}, err 1173 } 1174 return convert.TaskFromGRPC(*task), nil 1175 } 1176 1177 // GetNetwork returns a cluster network by an ID. 1178 func (c *Cluster) GetNetwork(input string) (apitypes.NetworkResource, error) { 1179 c.RLock() 1180 defer c.RUnlock() 1181 1182 if !c.isActiveManager() { 1183 return apitypes.NetworkResource{}, c.errNoManager() 1184 } 1185 1186 ctx, cancel := c.getRequestContext() 1187 defer cancel() 1188 1189 network, err := getNetwork(ctx, c.client, input) 1190 if err != nil { 1191 return apitypes.NetworkResource{}, err 1192 } 1193 return convert.BasicNetworkFromGRPC(*network), nil 1194 } 1195 1196 // GetNetworks returns all current cluster managed networks. 1197 func (c *Cluster) GetNetworks() ([]apitypes.NetworkResource, error) { 1198 c.RLock() 1199 defer c.RUnlock() 1200 1201 if !c.isActiveManager() { 1202 return nil, c.errNoManager() 1203 } 1204 1205 ctx, cancel := c.getRequestContext() 1206 defer cancel() 1207 1208 r, err := c.client.ListNetworks(ctx, &swarmapi.ListNetworksRequest{}) 1209 if err != nil { 1210 return nil, err 1211 } 1212 1213 var networks []apitypes.NetworkResource 1214 1215 for _, network := range r.Networks { 1216 networks = append(networks, convert.BasicNetworkFromGRPC(*network)) 1217 } 1218 1219 return networks, nil 1220 } 1221 1222 func attacherKey(target, containerID string) string { 1223 return containerID + ":" + target 1224 } 1225 1226 // UpdateAttachment signals the attachment config to the attachment 1227 // waiter who is trying to start or attach the container to the 1228 // network. 1229 func (c *Cluster) UpdateAttachment(target, containerID string, config *network.NetworkingConfig) error { 1230 c.RLock() 1231 attacher, ok := c.attachers[attacherKey(target, containerID)] 1232 c.RUnlock() 1233 if !ok || attacher == nil { 1234 return fmt.Errorf("could not find attacher for container %s to network %s", containerID, target) 1235 } 1236 1237 attacher.attachWaitCh <- config 1238 close(attacher.attachWaitCh) 1239 return nil 1240 } 1241 1242 // WaitForDetachment waits for the container to stop or detach from 1243 // the network. 1244 func (c *Cluster) WaitForDetachment(ctx context.Context, networkName, networkID, taskID, containerID string) error { 1245 c.RLock() 1246 attacher, ok := c.attachers[attacherKey(networkName, containerID)] 1247 if !ok { 1248 attacher, ok = c.attachers[attacherKey(networkID, containerID)] 1249 } 1250 if c.node == nil || c.node.Agent() == nil { 1251 c.RUnlock() 1252 return fmt.Errorf("invalid cluster node while waiting for detachment") 1253 } 1254 1255 agent := c.node.Agent() 1256 c.RUnlock() 1257 1258 if ok && attacher != nil && 1259 attacher.detachWaitCh != nil && 1260 attacher.attachCompleteCh != nil { 1261 // Attachment may be in progress still so wait for 1262 // attachment to complete. 1263 select { 1264 case <-attacher.attachCompleteCh: 1265 case <-ctx.Done(): 1266 return ctx.Err() 1267 } 1268 1269 if attacher.taskID == taskID { 1270 select { 1271 case <-attacher.detachWaitCh: 1272 case <-ctx.Done(): 1273 return ctx.Err() 1274 } 1275 } 1276 } 1277 1278 return agent.ResourceAllocator().DetachNetwork(ctx, taskID) 1279 } 1280 1281 // AttachNetwork generates an attachment request towards the manager. 1282 func (c *Cluster) AttachNetwork(target string, containerID string, addresses []string) (*network.NetworkingConfig, error) { 1283 aKey := attacherKey(target, containerID) 1284 c.Lock() 1285 if c.node == nil || c.node.Agent() == nil { 1286 c.Unlock() 1287 return nil, fmt.Errorf("invalid cluster node while attaching to network") 1288 } 1289 if attacher, ok := c.attachers[aKey]; ok { 1290 c.Unlock() 1291 return attacher.config, nil 1292 } 1293 1294 agent := c.node.Agent() 1295 attachWaitCh := make(chan *network.NetworkingConfig) 1296 detachWaitCh := make(chan struct{}) 1297 attachCompleteCh := make(chan struct{}) 1298 c.attachers[aKey] = &attacher{ 1299 attachWaitCh: attachWaitCh, 1300 attachCompleteCh: attachCompleteCh, 1301 detachWaitCh: detachWaitCh, 1302 } 1303 c.Unlock() 1304 1305 ctx, cancel := c.getRequestContext() 1306 defer cancel() 1307 1308 taskID, err := agent.ResourceAllocator().AttachNetwork(ctx, containerID, target, addresses) 1309 if err != nil { 1310 c.Lock() 1311 delete(c.attachers, aKey) 1312 c.Unlock() 1313 return nil, fmt.Errorf("Could not attach to network %s: %v", target, err) 1314 } 1315 1316 c.Lock() 1317 c.attachers[aKey].taskID = taskID 1318 close(attachCompleteCh) 1319 c.Unlock() 1320 1321 logrus.Debugf("Successfully attached to network %s with tid %s", target, taskID) 1322 1323 var config *network.NetworkingConfig 1324 select { 1325 case config = <-attachWaitCh: 1326 case <-ctx.Done(): 1327 return nil, fmt.Errorf("attaching to network failed, make sure your network options are correct and check manager logs: %v", ctx.Err()) 1328 } 1329 1330 c.Lock() 1331 c.attachers[aKey].config = config 1332 c.Unlock() 1333 return config, nil 1334 } 1335 1336 // DetachNetwork unblocks the waiters waiting on WaitForDetachment so 1337 // that a request to detach can be generated towards the manager. 1338 func (c *Cluster) DetachNetwork(target string, containerID string) error { 1339 aKey := attacherKey(target, containerID) 1340 1341 c.Lock() 1342 attacher, ok := c.attachers[aKey] 1343 delete(c.attachers, aKey) 1344 c.Unlock() 1345 1346 if !ok { 1347 return fmt.Errorf("could not find network attachment for container %s to network %s", containerID, target) 1348 } 1349 1350 close(attacher.detachWaitCh) 1351 return nil 1352 } 1353 1354 // CreateNetwork creates a new cluster managed network. 1355 func (c *Cluster) CreateNetwork(s apitypes.NetworkCreateRequest) (string, error) { 1356 c.RLock() 1357 defer c.RUnlock() 1358 1359 if !c.isActiveManager() { 1360 return "", c.errNoManager() 1361 } 1362 1363 if runconfig.IsPreDefinedNetwork(s.Name) { 1364 err := fmt.Errorf("%s is a pre-defined network and cannot be created", s.Name) 1365 return "", errors.NewRequestForbiddenError(err) 1366 } 1367 1368 ctx, cancel := c.getRequestContext() 1369 defer cancel() 1370 1371 networkSpec := convert.BasicNetworkCreateToGRPC(s) 1372 r, err := c.client.CreateNetwork(ctx, &swarmapi.CreateNetworkRequest{Spec: &networkSpec}) 1373 if err != nil { 1374 return "", err 1375 } 1376 1377 return r.Network.ID, nil 1378 } 1379 1380 // RemoveNetwork removes a cluster network. 1381 func (c *Cluster) RemoveNetwork(input string) error { 1382 c.RLock() 1383 defer c.RUnlock() 1384 1385 if !c.isActiveManager() { 1386 return c.errNoManager() 1387 } 1388 1389 ctx, cancel := c.getRequestContext() 1390 defer cancel() 1391 1392 network, err := getNetwork(ctx, c.client, input) 1393 if err != nil { 1394 return err 1395 } 1396 1397 if _, err := c.client.RemoveNetwork(ctx, &swarmapi.RemoveNetworkRequest{NetworkID: network.ID}); err != nil { 1398 return err 1399 } 1400 return nil 1401 } 1402 1403 func (c *Cluster) populateNetworkID(ctx context.Context, client swarmapi.ControlClient, s *types.ServiceSpec) error { 1404 // Always prefer NetworkAttachmentConfigs from TaskTemplate 1405 // but fallback to service spec for backward compatibility 1406 networks := s.TaskTemplate.Networks 1407 if len(networks) == 0 { 1408 networks = s.Networks 1409 } 1410 1411 for i, n := range networks { 1412 apiNetwork, err := getNetwork(ctx, client, n.Target) 1413 if err != nil { 1414 if ln, _ := c.config.Backend.FindNetwork(n.Target); ln != nil && !ln.Info().Dynamic() { 1415 err = fmt.Errorf("network %s is not eligible for docker services", ln.Name()) 1416 return errors.NewRequestForbiddenError(err) 1417 } 1418 return err 1419 } 1420 networks[i].Target = apiNetwork.ID 1421 } 1422 return nil 1423 } 1424 1425 func getNetwork(ctx context.Context, c swarmapi.ControlClient, input string) (*swarmapi.Network, error) { 1426 // GetNetwork to match via full ID. 1427 rg, err := c.GetNetwork(ctx, &swarmapi.GetNetworkRequest{NetworkID: input}) 1428 if err != nil { 1429 // If any error (including NotFound), ListNetworks to match via ID prefix and full name. 1430 rl, err := c.ListNetworks(ctx, &swarmapi.ListNetworksRequest{Filters: &swarmapi.ListNetworksRequest_Filters{Names: []string{input}}}) 1431 if err != nil || len(rl.Networks) == 0 { 1432 rl, err = c.ListNetworks(ctx, &swarmapi.ListNetworksRequest{Filters: &swarmapi.ListNetworksRequest_Filters{IDPrefixes: []string{input}}}) 1433 } 1434 1435 if err != nil { 1436 return nil, err 1437 } 1438 1439 if len(rl.Networks) == 0 { 1440 return nil, fmt.Errorf("network %s not found", input) 1441 } 1442 1443 if l := len(rl.Networks); l > 1 { 1444 return nil, fmt.Errorf("network %s is ambiguous (%d matches found)", input, l) 1445 } 1446 1447 return rl.Networks[0], nil 1448 } 1449 return rg.Network, nil 1450 } 1451 1452 // Cleanup stops active swarm node. This is run before daemon shutdown. 1453 func (c *Cluster) Cleanup() { 1454 c.Lock() 1455 node := c.node 1456 if node == nil { 1457 c.Unlock() 1458 return 1459 } 1460 defer c.Unlock() 1461 if c.isActiveManager() { 1462 active, reachable, unreachable, err := c.managerStats() 1463 if err == nil { 1464 singlenode := active && isLastManager(reachable, unreachable) 1465 if active && !singlenode && removingManagerCausesLossOfQuorum(reachable, unreachable) { 1466 logrus.Errorf("Leaving cluster with %v managers left out of %v. Raft quorum will be lost.", reachable-1, reachable+unreachable) 1467 } 1468 } 1469 } 1470 c.stopNode() 1471 } 1472 1473 func (c *Cluster) managerStats() (current bool, reachable int, unreachable int, err error) { 1474 ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) 1475 defer cancel() 1476 nodes, err := c.client.ListNodes(ctx, &swarmapi.ListNodesRequest{}) 1477 if err != nil { 1478 return false, 0, 0, err 1479 } 1480 for _, n := range nodes.Nodes { 1481 if n.ManagerStatus != nil { 1482 if n.ManagerStatus.Reachability == swarmapi.RaftMemberStatus_REACHABLE { 1483 reachable++ 1484 if n.ID == c.node.NodeID() { 1485 current = true 1486 } 1487 } 1488 if n.ManagerStatus.Reachability == swarmapi.RaftMemberStatus_UNREACHABLE { 1489 unreachable++ 1490 } 1491 } 1492 } 1493 return 1494 } 1495 1496 func validateAndSanitizeInitRequest(req *types.InitRequest) error { 1497 var err error 1498 req.ListenAddr, err = validateAddr(req.ListenAddr) 1499 if err != nil { 1500 return fmt.Errorf("invalid ListenAddr %q: %v", req.ListenAddr, err) 1501 } 1502 1503 return nil 1504 } 1505 1506 func validateAndSanitizeJoinRequest(req *types.JoinRequest) error { 1507 var err error 1508 req.ListenAddr, err = validateAddr(req.ListenAddr) 1509 if err != nil { 1510 return fmt.Errorf("invalid ListenAddr %q: %v", req.ListenAddr, err) 1511 } 1512 if len(req.RemoteAddrs) == 0 { 1513 return fmt.Errorf("at least 1 RemoteAddr is required to join") 1514 } 1515 for i := range req.RemoteAddrs { 1516 req.RemoteAddrs[i], err = validateAddr(req.RemoteAddrs[i]) 1517 if err != nil { 1518 return fmt.Errorf("invalid remoteAddr %q: %v", req.RemoteAddrs[i], err) 1519 } 1520 } 1521 return nil 1522 } 1523 1524 func validateAddr(addr string) (string, error) { 1525 if addr == "" { 1526 return addr, fmt.Errorf("invalid empty address") 1527 } 1528 newaddr, err := opts.ParseTCPAddr(addr, defaultAddr) 1529 if err != nil { 1530 return addr, nil 1531 } 1532 return strings.TrimPrefix(newaddr, "tcp://"), nil 1533 } 1534 1535 func initClusterSpec(node *node, spec types.Spec) error { 1536 ctx, _ := context.WithTimeout(context.Background(), 5*time.Second) 1537 for conn := range node.ListenControlSocket(ctx) { 1538 if ctx.Err() != nil { 1539 return ctx.Err() 1540 } 1541 if conn != nil { 1542 client := swarmapi.NewControlClient(conn) 1543 var cluster *swarmapi.Cluster 1544 for i := 0; ; i++ { 1545 lcr, err := client.ListClusters(ctx, &swarmapi.ListClustersRequest{}) 1546 if err != nil { 1547 return fmt.Errorf("error on listing clusters: %v", err) 1548 } 1549 if len(lcr.Clusters) == 0 { 1550 if i < 10 { 1551 time.Sleep(200 * time.Millisecond) 1552 continue 1553 } 1554 return fmt.Errorf("empty list of clusters was returned") 1555 } 1556 cluster = lcr.Clusters[0] 1557 break 1558 } 1559 // In init, we take the initial default values from swarmkit, and merge 1560 // any non nil or 0 value from spec to GRPC spec. This will leave the 1561 // default value alone. 1562 // Note that this is different from Update(), as in Update() we expect 1563 // user to specify the complete spec of the cluster (as they already know 1564 // the existing one and knows which field to update) 1565 clusterSpec, err := convert.MergeSwarmSpecToGRPC(spec, cluster.Spec) 1566 if err != nil { 1567 return fmt.Errorf("error updating cluster settings: %v", err) 1568 } 1569 _, err = client.UpdateCluster(ctx, &swarmapi.UpdateClusterRequest{ 1570 ClusterID: cluster.ID, 1571 ClusterVersion: &cluster.Meta.Version, 1572 Spec: &clusterSpec, 1573 }) 1574 if err != nil { 1575 return fmt.Errorf("error updating cluster settings: %v", err) 1576 } 1577 return nil 1578 } 1579 } 1580 return ctx.Err() 1581 }