github.com/zhuohuang-hust/src-cbuild@v0.0.0-20230105071821-c7aab3e7c840/daemon/cluster/cluster.go (about) 1 package cluster 2 3 // 4 // ## Swarmkit integration 5 // 6 // Cluster - static configurable object for accessing everything swarm related. 7 // Contains methods for connecting and controlling the cluster. Exists always, 8 // even if swarm mode is not enabled. 9 // 10 // NodeRunner - Manager for starting the swarmkit node. Is present only and 11 // always if swarm mode is enabled. Implements backoff restart loop in case of 12 // errors. 13 // 14 // NodeState - Information about the current node status including access to 15 // gRPC clients if a manager is active. 16 // 17 // ### Locking 18 // 19 // `cluster.controlMutex` - taken for the whole lifecycle of the processes that 20 // can reconfigure cluster(init/join/leave etc). Protects that one 21 // reconfiguration action has fully completed before another can start. 22 // 23 // `cluster.mu` - taken when the actual changes in cluster configurations 24 // happen. Different from `controlMutex` because in some cases we need to 25 // access current cluster state even if the long-running reconfiguration is 26 // going on. For example network stack may ask for the current cluster state in 27 // the middle of the shutdown. Any time current cluster state is asked you 28 // should take the read lock of `cluster.mu`. If you are writing an API 29 // responder that returns synchronously, hold `cluster.mu.RLock()` for the 30 // duration of the whole handler function. That ensures that node will not be 31 // shut down until the handler has finished. 32 // 33 // NodeRunner implements its internal locks that should not be used outside of 34 // the struct. Instead, you should just call `nodeRunner.State()` method to get 35 // the current state of the cluster(still need `cluster.mu.RLock()` to access 36 // `cluster.nr` reference itself). Most of the changes in NodeRunner happen 37 // because of an external event(network problem, unexpected swarmkit error) and 38 // Docker shouldn't take any locks that delay these changes from happening. 39 // 40 41 import ( 42 "crypto/x509" 43 "encoding/base64" 44 "encoding/json" 45 "fmt" 46 "io" 47 "net" 48 "os" 49 "path/filepath" 50 "strings" 51 "sync" 52 "time" 53 54 "github.com/Sirupsen/logrus" 55 "github.com/docker/distribution/digest" 56 distreference "github.com/docker/distribution/reference" 57 apierrors "github.com/docker/docker/api/errors" 58 apitypes "github.com/docker/docker/api/types" 59 "github.com/docker/docker/api/types/backend" 60 "github.com/docker/docker/api/types/filters" 61 "github.com/docker/docker/api/types/network" 62 types "github.com/docker/docker/api/types/swarm" 63 "github.com/docker/docker/daemon/cluster/convert" 64 executorpkg "github.com/docker/docker/daemon/cluster/executor" 65 "github.com/docker/docker/daemon/logger" 66 "github.com/docker/docker/opts" 67 "github.com/docker/docker/pkg/ioutils" 68 "github.com/docker/docker/pkg/signal" 69 "github.com/docker/docker/pkg/stdcopy" 70 "github.com/docker/docker/reference" 71 "github.com/docker/docker/runconfig" 72 swarmapi "github.com/docker/swarmkit/api" 73 "github.com/docker/swarmkit/manager/encryption" 74 swarmnode "github.com/docker/swarmkit/node" 75 "github.com/docker/swarmkit/protobuf/ptypes" 76 "github.com/pkg/errors" 77 "golang.org/x/net/context" 78 ) 79 80 const swarmDirName = "swarm" 81 const controlSocket = "control.sock" 82 const swarmConnectTimeout = 20 * time.Second 83 const swarmRequestTimeout = 20 * time.Second 84 const stateFile = "docker-state.json" 85 const defaultAddr = "0.0.0.0:2377" 86 87 const ( 88 initialReconnectDelay = 100 * time.Millisecond 89 maxReconnectDelay = 30 * time.Second 90 contextPrefix = "com.docker.swarm" 91 ) 92 93 // errNoSwarm is returned on leaving a cluster that was never initialized 94 var errNoSwarm = fmt.Errorf("This node is not part of a swarm") 95 96 // errSwarmExists is returned on initialize or join request for a cluster that has already been activated 97 var errSwarmExists = fmt.Errorf("This node is already part of a swarm. Use \"docker swarm leave\" to leave this swarm and join another one.") 98 99 // errSwarmJoinTimeoutReached is returned when cluster join could not complete before timeout was reached. 100 var errSwarmJoinTimeoutReached = fmt.Errorf("Timeout was reached before node was joined. The attempt to join the swarm will continue in the background. Use the \"docker info\" command to see the current swarm status of your node.") 101 102 // errSwarmLocked is returned if the swarm is encrypted and needs a key to unlock it. 103 var errSwarmLocked = fmt.Errorf("Swarm is encrypted and needs to be unlocked before it can be used. Please use \"docker swarm unlock\" to unlock it.") 104 105 // errSwarmCertificatesExpired is returned if docker was not started for the whole validity period and they had no chance to renew automatically. 106 var errSwarmCertificatesExpired = errors.New("Swarm certificates have expired. To replace them, leave the swarm and join again.") 107 108 // NetworkSubnetsProvider exposes functions for retrieving the subnets 109 // of networks managed by Docker, so they can be filtered. 110 type NetworkSubnetsProvider interface { 111 V4Subnets() []net.IPNet 112 V6Subnets() []net.IPNet 113 } 114 115 // Config provides values for Cluster. 116 type Config struct { 117 Root string 118 Name string 119 Backend executorpkg.Backend 120 NetworkSubnetsProvider NetworkSubnetsProvider 121 122 // DefaultAdvertiseAddr is the default host/IP or network interface to use 123 // if no AdvertiseAddr value is specified. 124 DefaultAdvertiseAddr string 125 126 // path to store runtime state, such as the swarm control socket 127 RuntimeRoot string 128 } 129 130 // Cluster provides capabilities to participate in a cluster as a worker or a 131 // manager. 132 type Cluster struct { 133 mu sync.RWMutex 134 controlMutex sync.RWMutex // protect init/join/leave user operations 135 nr *nodeRunner 136 root string 137 runtimeRoot string 138 config Config 139 configEvent chan struct{} // todo: make this array and goroutine safe 140 attachers map[string]*attacher 141 } 142 143 // attacher manages the in-memory attachment state of a container 144 // attachment to a global scope network managed by swarm manager. It 145 // helps in identifying the attachment ID via the taskID and the 146 // corresponding attachment configuration obtained from the manager. 147 type attacher struct { 148 taskID string 149 config *network.NetworkingConfig 150 attachWaitCh chan *network.NetworkingConfig 151 attachCompleteCh chan struct{} 152 detachWaitCh chan struct{} 153 } 154 155 // New creates a new Cluster instance using provided config. 156 func New(config Config) (*Cluster, error) { 157 root := filepath.Join(config.Root, swarmDirName) 158 if err := os.MkdirAll(root, 0700); err != nil { 159 return nil, err 160 } 161 if config.RuntimeRoot == "" { 162 config.RuntimeRoot = root 163 } 164 if err := os.MkdirAll(config.RuntimeRoot, 0700); err != nil { 165 return nil, err 166 } 167 c := &Cluster{ 168 root: root, 169 config: config, 170 configEvent: make(chan struct{}, 10), 171 runtimeRoot: config.RuntimeRoot, 172 attachers: make(map[string]*attacher), 173 } 174 175 nodeConfig, err := loadPersistentState(root) 176 if err != nil { 177 if os.IsNotExist(err) { 178 return c, nil 179 } 180 return nil, err 181 } 182 183 nr, err := c.newNodeRunner(*nodeConfig) 184 if err != nil { 185 return nil, err 186 } 187 c.nr = nr 188 189 select { 190 case <-time.After(swarmConnectTimeout): 191 logrus.Error("swarm component could not be started before timeout was reached") 192 case err := <-nr.Ready(): 193 if err != nil { 194 if errors.Cause(err) == errSwarmLocked { 195 return c, nil 196 } 197 if err, ok := errors.Cause(c.nr.err).(x509.CertificateInvalidError); ok && err.Reason == x509.Expired { 198 return c, nil 199 } 200 return nil, errors.Wrap(err, "swarm component could not be started") 201 } 202 } 203 return c, nil 204 } 205 206 func (c *Cluster) newNodeRunner(conf nodeStartConfig) (*nodeRunner, error) { 207 if err := c.config.Backend.IsSwarmCompatible(); err != nil { 208 return nil, err 209 } 210 211 actualLocalAddr := conf.LocalAddr 212 if actualLocalAddr == "" { 213 // If localAddr was not specified, resolve it automatically 214 // based on the route to joinAddr. localAddr can only be left 215 // empty on "join". 216 listenHost, _, err := net.SplitHostPort(conf.ListenAddr) 217 if err != nil { 218 return nil, fmt.Errorf("could not parse listen address: %v", err) 219 } 220 221 listenAddrIP := net.ParseIP(listenHost) 222 if listenAddrIP == nil || !listenAddrIP.IsUnspecified() { 223 actualLocalAddr = listenHost 224 } else { 225 if conf.RemoteAddr == "" { 226 // Should never happen except using swarms created by 227 // old versions that didn't save remoteAddr. 228 conf.RemoteAddr = "8.8.8.8:53" 229 } 230 conn, err := net.Dial("udp", conf.RemoteAddr) 231 if err != nil { 232 return nil, fmt.Errorf("could not find local IP address: %v", err) 233 } 234 localHostPort := conn.LocalAddr().String() 235 actualLocalAddr, _, _ = net.SplitHostPort(localHostPort) 236 conn.Close() 237 } 238 } 239 240 nr := &nodeRunner{cluster: c} 241 nr.actualLocalAddr = actualLocalAddr 242 243 if err := nr.Start(conf); err != nil { 244 return nil, err 245 } 246 247 c.config.Backend.SetClusterProvider(c) 248 249 return nr, nil 250 } 251 252 // Init initializes new cluster from user provided request. 253 func (c *Cluster) Init(req types.InitRequest) (string, error) { 254 c.controlMutex.Lock() 255 defer c.controlMutex.Unlock() 256 c.mu.Lock() 257 if c.nr != nil { 258 if req.ForceNewCluster { 259 if err := c.nr.Stop(); err != nil { 260 c.mu.Unlock() 261 return "", err 262 } 263 } else { 264 c.mu.Unlock() 265 return "", errSwarmExists 266 } 267 } 268 c.mu.Unlock() 269 270 if err := validateAndSanitizeInitRequest(&req); err != nil { 271 return "", err 272 } 273 274 listenHost, listenPort, err := resolveListenAddr(req.ListenAddr) 275 if err != nil { 276 return "", err 277 } 278 279 advertiseHost, advertisePort, err := c.resolveAdvertiseAddr(req.AdvertiseAddr, listenPort) 280 if err != nil { 281 return "", err 282 } 283 284 localAddr := listenHost 285 286 // If the local address is undetermined, the advertise address 287 // will be used as local address, if it belongs to this system. 288 // If the advertise address is not local, then we try to find 289 // a system address to use as local address. If this fails, 290 // we give up and ask user to pass the listen address. 291 if net.ParseIP(localAddr).IsUnspecified() { 292 advertiseIP := net.ParseIP(advertiseHost) 293 294 found := false 295 for _, systemIP := range listSystemIPs() { 296 if systemIP.Equal(advertiseIP) { 297 localAddr = advertiseIP.String() 298 found = true 299 break 300 } 301 } 302 303 if !found { 304 ip, err := c.resolveSystemAddr() 305 if err != nil { 306 logrus.Warnf("Could not find a local address: %v", err) 307 return "", errMustSpecifyListenAddr 308 } 309 localAddr = ip.String() 310 } 311 } 312 313 if !req.ForceNewCluster { 314 clearPersistentState(c.root) 315 } 316 317 nr, err := c.newNodeRunner(nodeStartConfig{ 318 forceNewCluster: req.ForceNewCluster, 319 autolock: req.AutoLockManagers, 320 LocalAddr: localAddr, 321 ListenAddr: net.JoinHostPort(listenHost, listenPort), 322 AdvertiseAddr: net.JoinHostPort(advertiseHost, advertisePort), 323 }) 324 if err != nil { 325 return "", err 326 } 327 c.mu.Lock() 328 c.nr = nr 329 c.mu.Unlock() 330 331 if err := <-nr.Ready(); err != nil { 332 if !req.ForceNewCluster { // if failure on first attempt don't keep state 333 if err := clearPersistentState(c.root); err != nil { 334 return "", err 335 } 336 } 337 if err != nil { 338 c.mu.Lock() 339 c.nr = nil 340 c.mu.Unlock() 341 } 342 return "", err 343 } 344 state := nr.State() 345 if state.swarmNode == nil { // should never happen but protect from panic 346 return "", errors.New("invalid cluster state for spec initialization") 347 } 348 if err := initClusterSpec(state.swarmNode, req.Spec); err != nil { 349 return "", err 350 } 351 return state.NodeID(), nil 352 } 353 354 // Join makes current Cluster part of an existing swarm cluster. 355 func (c *Cluster) Join(req types.JoinRequest) error { 356 c.controlMutex.Lock() 357 defer c.controlMutex.Unlock() 358 c.mu.Lock() 359 if c.nr != nil { 360 c.mu.Unlock() 361 return errSwarmExists 362 } 363 c.mu.Unlock() 364 365 if err := validateAndSanitizeJoinRequest(&req); err != nil { 366 return err 367 } 368 369 listenHost, listenPort, err := resolveListenAddr(req.ListenAddr) 370 if err != nil { 371 return err 372 } 373 374 var advertiseAddr string 375 if req.AdvertiseAddr != "" { 376 advertiseHost, advertisePort, err := c.resolveAdvertiseAddr(req.AdvertiseAddr, listenPort) 377 // For joining, we don't need to provide an advertise address, 378 // since the remote side can detect it. 379 if err == nil { 380 advertiseAddr = net.JoinHostPort(advertiseHost, advertisePort) 381 } 382 } 383 384 clearPersistentState(c.root) 385 386 nr, err := c.newNodeRunner(nodeStartConfig{ 387 RemoteAddr: req.RemoteAddrs[0], 388 ListenAddr: net.JoinHostPort(listenHost, listenPort), 389 AdvertiseAddr: advertiseAddr, 390 joinAddr: req.RemoteAddrs[0], 391 joinToken: req.JoinToken, 392 }) 393 if err != nil { 394 return err 395 } 396 397 c.mu.Lock() 398 c.nr = nr 399 c.mu.Unlock() 400 401 select { 402 case <-time.After(swarmConnectTimeout): 403 return errSwarmJoinTimeoutReached 404 case err := <-nr.Ready(): 405 if err != nil { 406 c.mu.Lock() 407 c.nr = nil 408 c.mu.Unlock() 409 } 410 return err 411 } 412 } 413 414 // GetUnlockKey returns the unlock key for the swarm. 415 func (c *Cluster) GetUnlockKey() (string, error) { 416 c.mu.RLock() 417 defer c.mu.RUnlock() 418 419 state := c.currentNodeState() 420 if !state.IsActiveManager() { 421 return "", c.errNoManager(state) 422 } 423 424 ctx, cancel := c.getRequestContext() 425 defer cancel() 426 427 client := swarmapi.NewCAClient(state.grpcConn) 428 429 r, err := client.GetUnlockKey(ctx, &swarmapi.GetUnlockKeyRequest{}) 430 if err != nil { 431 return "", err 432 } 433 434 if len(r.UnlockKey) == 0 { 435 // no key 436 return "", nil 437 } 438 439 return encryption.HumanReadableKey(r.UnlockKey), nil 440 } 441 442 // UnlockSwarm provides a key to decrypt data that is encrypted at rest. 443 func (c *Cluster) UnlockSwarm(req types.UnlockRequest) error { 444 c.controlMutex.Lock() 445 defer c.controlMutex.Unlock() 446 447 c.mu.RLock() 448 state := c.currentNodeState() 449 nr := c.nr 450 c.mu.RUnlock() 451 if nr == nil || errors.Cause(state.err) != errSwarmLocked { 452 return errors.New("swarm is not locked") 453 } 454 key, err := encryption.ParseHumanReadableKey(req.UnlockKey) 455 if err != nil { 456 return err 457 } 458 459 config := nr.config 460 config.lockKey = key 461 if err := nr.Stop(); err != nil { 462 return err 463 } 464 nr, err = c.newNodeRunner(config) 465 if err != nil { 466 return err 467 } 468 469 c.mu.Lock() 470 c.nr = nr 471 c.mu.Unlock() 472 473 if err := <-nr.Ready(); err != nil { 474 if errors.Cause(err) == errSwarmLocked { 475 return errors.New("swarm could not be unlocked: invalid key provided") 476 } 477 return fmt.Errorf("swarm component could not be started: %v", err) 478 } 479 return nil 480 } 481 482 // Leave shuts down Cluster and removes current state. 483 func (c *Cluster) Leave(force bool) error { 484 c.controlMutex.Lock() 485 defer c.controlMutex.Unlock() 486 487 c.mu.Lock() 488 nr := c.nr 489 if nr == nil { 490 c.mu.Unlock() 491 return errNoSwarm 492 } 493 494 state := c.currentNodeState() 495 496 if errors.Cause(state.err) == errSwarmLocked && !force { 497 // leave a locked swarm without --force is not allowed 498 c.mu.Unlock() 499 return errors.New("Swarm is encrypted and locked. Please unlock it first or use `--force` to ignore this message.") 500 } 501 502 if state.IsManager() && !force { 503 msg := "You are attempting to leave the swarm on a node that is participating as a manager. " 504 if state.IsActiveManager() { 505 active, reachable, unreachable, err := managerStats(state.controlClient, state.NodeID()) 506 if err == nil { 507 if active && removingManagerCausesLossOfQuorum(reachable, unreachable) { 508 if isLastManager(reachable, unreachable) { 509 msg += "Removing the last manager erases all current state of the swarm. Use `--force` to ignore this message. " 510 c.mu.Unlock() 511 return fmt.Errorf(msg) 512 } 513 msg += fmt.Sprintf("Removing this node leaves %v managers out of %v. Without a Raft quorum your swarm will be inaccessible. ", reachable-1, reachable+unreachable) 514 } 515 } 516 } else { 517 msg += "Doing so may lose the consensus of your cluster. " 518 } 519 520 msg += "The only way to restore a swarm that has lost consensus is to reinitialize it with `--force-new-cluster`. Use `--force` to suppress this message." 521 c.mu.Unlock() 522 return fmt.Errorf(msg) 523 } 524 // release readers in here 525 if err := nr.Stop(); err != nil { 526 logrus.Errorf("failed to shut down cluster node: %v", err) 527 signal.DumpStacks("") 528 c.mu.Unlock() 529 return err 530 } 531 c.nr = nil 532 c.mu.Unlock() 533 if nodeID := state.NodeID(); nodeID != "" { 534 nodeContainers, err := c.listContainerForNode(nodeID) 535 if err != nil { 536 return err 537 } 538 for _, id := range nodeContainers { 539 if err := c.config.Backend.ContainerRm(id, &apitypes.ContainerRmConfig{ForceRemove: true}); err != nil { 540 logrus.Errorf("error removing %v: %v", id, err) 541 } 542 } 543 } 544 545 c.configEvent <- struct{}{} 546 // todo: cleanup optional? 547 if err := clearPersistentState(c.root); err != nil { 548 return err 549 } 550 c.config.Backend.SetClusterProvider(nil) 551 return nil 552 } 553 554 func (c *Cluster) listContainerForNode(nodeID string) ([]string, error) { 555 var ids []string 556 filters := filters.NewArgs() 557 filters.Add("label", fmt.Sprintf("com.docker.swarm.node.id=%s", nodeID)) 558 containers, err := c.config.Backend.Containers(&apitypes.ContainerListOptions{ 559 Filters: filters, 560 }) 561 if err != nil { 562 return []string{}, err 563 } 564 for _, c := range containers { 565 ids = append(ids, c.ID) 566 } 567 return ids, nil 568 } 569 570 func (c *Cluster) getRequestContext() (context.Context, func()) { // TODO: not needed when requests don't block on qourum lost 571 return context.WithTimeout(context.Background(), swarmRequestTimeout) 572 } 573 574 // Inspect retrieves the configuration properties of a managed swarm cluster. 575 func (c *Cluster) Inspect() (types.Swarm, error) { 576 c.mu.RLock() 577 defer c.mu.RUnlock() 578 579 state := c.currentNodeState() 580 if !state.IsActiveManager() { 581 return types.Swarm{}, c.errNoManager(state) 582 } 583 584 ctx, cancel := c.getRequestContext() 585 defer cancel() 586 587 swarm, err := getSwarm(ctx, state.controlClient) 588 if err != nil { 589 return types.Swarm{}, err 590 } 591 592 return convert.SwarmFromGRPC(*swarm), nil 593 } 594 595 // Update updates configuration of a managed swarm cluster. 596 func (c *Cluster) Update(version uint64, spec types.Spec, flags types.UpdateFlags) error { 597 c.mu.RLock() 598 defer c.mu.RUnlock() 599 600 state := c.currentNodeState() 601 if !state.IsActiveManager() { 602 return c.errNoManager(state) 603 } 604 605 ctx, cancel := c.getRequestContext() 606 defer cancel() 607 608 swarm, err := getSwarm(ctx, state.controlClient) 609 if err != nil { 610 return err 611 } 612 613 // In update, client should provide the complete spec of the swarm, including 614 // Name and Labels. If a field is specified with 0 or nil, then the default value 615 // will be used to swarmkit. 616 clusterSpec, err := convert.SwarmSpecToGRPC(spec) 617 if err != nil { 618 return err 619 } 620 621 _, err = state.controlClient.UpdateCluster( 622 ctx, 623 &swarmapi.UpdateClusterRequest{ 624 ClusterID: swarm.ID, 625 Spec: &clusterSpec, 626 ClusterVersion: &swarmapi.Version{ 627 Index: version, 628 }, 629 Rotation: swarmapi.KeyRotation{ 630 WorkerJoinToken: flags.RotateWorkerToken, 631 ManagerJoinToken: flags.RotateManagerToken, 632 ManagerUnlockKey: flags.RotateManagerUnlockKey, 633 }, 634 }, 635 ) 636 return err 637 } 638 639 // IsManager returns true if Cluster is participating as a manager. 640 func (c *Cluster) IsManager() bool { 641 c.mu.RLock() 642 defer c.mu.RUnlock() 643 return c.currentNodeState().IsActiveManager() 644 } 645 646 // IsAgent returns true if Cluster is participating as a worker/agent. 647 func (c *Cluster) IsAgent() bool { 648 c.mu.RLock() 649 defer c.mu.RUnlock() 650 return c.currentNodeState().status == types.LocalNodeStateActive 651 } 652 653 // GetLocalAddress returns the local address. 654 func (c *Cluster) GetLocalAddress() string { 655 c.mu.RLock() 656 defer c.mu.RUnlock() 657 return c.currentNodeState().actualLocalAddr 658 } 659 660 // GetListenAddress returns the listen address. 661 func (c *Cluster) GetListenAddress() string { 662 c.mu.RLock() 663 defer c.mu.RUnlock() 664 if c.nr != nil { 665 return c.nr.config.ListenAddr 666 } 667 return "" 668 } 669 670 // GetAdvertiseAddress returns the remotely reachable address of this node. 671 func (c *Cluster) GetAdvertiseAddress() string { 672 c.mu.RLock() 673 defer c.mu.RUnlock() 674 if c.nr != nil && c.nr.config.AdvertiseAddr != "" { 675 advertiseHost, _, _ := net.SplitHostPort(c.nr.config.AdvertiseAddr) 676 return advertiseHost 677 } 678 return c.currentNodeState().actualLocalAddr 679 } 680 681 // GetRemoteAddress returns a known advertise address of a remote manager if 682 // available. 683 // todo: change to array/connect with info 684 func (c *Cluster) GetRemoteAddress() string { 685 c.mu.RLock() 686 defer c.mu.RUnlock() 687 return c.getRemoteAddress() 688 } 689 690 func (c *Cluster) getRemoteAddress() string { 691 state := c.currentNodeState() 692 if state.swarmNode == nil { 693 return "" 694 } 695 nodeID := state.swarmNode.NodeID() 696 for _, r := range state.swarmNode.Remotes() { 697 if r.NodeID != nodeID { 698 return r.Addr 699 } 700 } 701 return "" 702 } 703 704 // ListenClusterEvents returns a channel that receives messages on cluster 705 // participation changes. 706 // todo: make cancelable and accessible to multiple callers 707 func (c *Cluster) ListenClusterEvents() <-chan struct{} { 708 return c.configEvent 709 } 710 711 // Info returns information about the current cluster state. 712 func (c *Cluster) Info() types.Info { 713 info := types.Info{ 714 NodeAddr: c.GetAdvertiseAddress(), 715 } 716 c.mu.RLock() 717 defer c.mu.RUnlock() 718 719 state := c.currentNodeState() 720 info.LocalNodeState = state.status 721 if state.err != nil { 722 info.Error = state.err.Error() 723 } 724 725 ctx, cancel := c.getRequestContext() 726 defer cancel() 727 728 if state.IsActiveManager() { 729 info.ControlAvailable = true 730 swarm, err := c.Inspect() 731 if err != nil { 732 info.Error = err.Error() 733 } 734 735 // Strip JoinTokens 736 info.Cluster = swarm.ClusterInfo 737 738 if r, err := state.controlClient.ListNodes(ctx, &swarmapi.ListNodesRequest{}); err == nil { 739 info.Nodes = len(r.Nodes) 740 for _, n := range r.Nodes { 741 if n.ManagerStatus != nil { 742 info.Managers = info.Managers + 1 743 } 744 } 745 } 746 } 747 748 if state.swarmNode != nil { 749 for _, r := range state.swarmNode.Remotes() { 750 info.RemoteManagers = append(info.RemoteManagers, types.Peer{NodeID: r.NodeID, Addr: r.Addr}) 751 } 752 info.NodeID = state.swarmNode.NodeID() 753 } 754 755 return info 756 } 757 758 // currentNodeState should not be called without a read lock 759 func (c *Cluster) currentNodeState() nodeState { 760 return c.nr.State() 761 } 762 763 // errNoManager returns error describing why manager commands can't be used. 764 // Call with read lock. 765 func (c *Cluster) errNoManager(st nodeState) error { 766 if st.swarmNode == nil { 767 if errors.Cause(st.err) == errSwarmLocked { 768 return errSwarmLocked 769 } 770 if st.err == errSwarmCertificatesExpired { 771 return errSwarmCertificatesExpired 772 } 773 return fmt.Errorf("This node is not a swarm manager. Use \"docker swarm init\" or \"docker swarm join\" to connect this node to swarm and try again.") 774 } 775 if st.swarmNode.Manager() != nil { 776 return fmt.Errorf("This node is not a swarm manager. Manager is being prepared or has trouble connecting to the cluster.") 777 } 778 return fmt.Errorf("This node is not a swarm manager. Worker nodes can't be used to view or modify cluster state. Please run this command on a manager node or promote the current node to a manager.") 779 } 780 781 // GetServices returns all services of a managed swarm cluster. 782 func (c *Cluster) GetServices(options apitypes.ServiceListOptions) ([]types.Service, error) { 783 c.mu.RLock() 784 defer c.mu.RUnlock() 785 786 state := c.currentNodeState() 787 if !state.IsActiveManager() { 788 return nil, c.errNoManager(state) 789 } 790 791 filters, err := newListServicesFilters(options.Filters) 792 if err != nil { 793 return nil, err 794 } 795 ctx, cancel := c.getRequestContext() 796 defer cancel() 797 798 r, err := state.controlClient.ListServices( 799 ctx, 800 &swarmapi.ListServicesRequest{Filters: filters}) 801 if err != nil { 802 return nil, err 803 } 804 805 services := []types.Service{} 806 807 for _, service := range r.Services { 808 services = append(services, convert.ServiceFromGRPC(*service)) 809 } 810 811 return services, nil 812 } 813 814 // imageWithDigestString takes an image such as name or name:tag 815 // and returns the image pinned to a digest, such as name@sha256:34234... 816 // Due to the difference between the docker/docker/reference, and the 817 // docker/distribution/reference packages, we're parsing the image twice. 818 // As the two packages converge, this function should be simplified. 819 // TODO(nishanttotla): After the packages converge, the function must 820 // convert distreference.Named -> distreference.Canonical, and the logic simplified. 821 func (c *Cluster) imageWithDigestString(ctx context.Context, image string, authConfig *apitypes.AuthConfig) (string, error) { 822 if _, err := digest.ParseDigest(image); err == nil { 823 return "", errors.New("image reference is an image ID") 824 } 825 ref, err := distreference.ParseNamed(image) 826 if err != nil { 827 return "", err 828 } 829 // only query registry if not a canonical reference (i.e. with digest) 830 if _, ok := ref.(distreference.Canonical); !ok { 831 // create a docker/docker/reference Named object because GetRepository needs it 832 dockerRef, err := reference.ParseNamed(image) 833 if err != nil { 834 return "", err 835 } 836 dockerRef = reference.WithDefaultTag(dockerRef) 837 namedTaggedRef, ok := dockerRef.(reference.NamedTagged) 838 if !ok { 839 return "", fmt.Errorf("unable to cast image to NamedTagged reference object") 840 } 841 842 repo, _, err := c.config.Backend.GetRepository(ctx, namedTaggedRef, authConfig) 843 if err != nil { 844 return "", err 845 } 846 dscrptr, err := repo.Tags(ctx).Get(ctx, namedTaggedRef.Tag()) 847 if err != nil { 848 return "", err 849 } 850 851 namedDigestedRef, err := distreference.WithDigest(distreference.EnsureTagged(ref), dscrptr.Digest) 852 if err != nil { 853 return "", err 854 } 855 return namedDigestedRef.String(), nil 856 } 857 // reference already contains a digest, so just return it 858 return ref.String(), nil 859 } 860 861 // CreateService creates a new service in a managed swarm cluster. 862 func (c *Cluster) CreateService(s types.ServiceSpec, encodedAuth string) (*apitypes.ServiceCreateResponse, error) { 863 c.mu.RLock() 864 defer c.mu.RUnlock() 865 866 state := c.currentNodeState() 867 if !state.IsActiveManager() { 868 return nil, c.errNoManager(state) 869 } 870 871 ctx, cancel := c.getRequestContext() 872 defer cancel() 873 874 err := c.populateNetworkID(ctx, state.controlClient, &s) 875 if err != nil { 876 return nil, err 877 } 878 879 serviceSpec, err := convert.ServiceSpecToGRPC(s) 880 if err != nil { 881 return nil, err 882 } 883 884 ctnr := serviceSpec.Task.GetContainer() 885 if ctnr == nil { 886 return nil, fmt.Errorf("service does not use container tasks") 887 } 888 889 if encodedAuth != "" { 890 ctnr.PullOptions = &swarmapi.ContainerSpec_PullOptions{RegistryAuth: encodedAuth} 891 } 892 893 // retrieve auth config from encoded auth 894 authConfig := &apitypes.AuthConfig{} 895 if encodedAuth != "" { 896 if err := json.NewDecoder(base64.NewDecoder(base64.URLEncoding, strings.NewReader(encodedAuth))).Decode(authConfig); err != nil { 897 logrus.Warnf("invalid authconfig: %v", err) 898 } 899 } 900 901 resp := &apitypes.ServiceCreateResponse{} 902 903 // pin image by digest 904 if os.Getenv("DOCKER_SERVICE_PREFER_OFFLINE_IMAGE") != "1" { 905 digestImage, err := c.imageWithDigestString(ctx, ctnr.Image, authConfig) 906 if err != nil { 907 logrus.Warnf("unable to pin image %s to digest: %s", ctnr.Image, err.Error()) 908 resp.Warnings = append(resp.Warnings, fmt.Sprintf("unable to pin image %s to digest: %s", ctnr.Image, err.Error())) 909 } else { 910 logrus.Debugf("pinning image %s by digest: %s", ctnr.Image, digestImage) 911 ctnr.Image = digestImage 912 } 913 } 914 915 r, err := state.controlClient.CreateService(ctx, &swarmapi.CreateServiceRequest{Spec: &serviceSpec}) 916 if err != nil { 917 return nil, err 918 } 919 920 resp.ID = r.Service.ID 921 return resp, nil 922 } 923 924 // GetService returns a service based on an ID or name. 925 func (c *Cluster) GetService(input string) (types.Service, error) { 926 c.mu.RLock() 927 defer c.mu.RUnlock() 928 929 state := c.currentNodeState() 930 if !state.IsActiveManager() { 931 return types.Service{}, c.errNoManager(state) 932 } 933 934 ctx, cancel := c.getRequestContext() 935 defer cancel() 936 937 service, err := getService(ctx, state.controlClient, input) 938 if err != nil { 939 return types.Service{}, err 940 } 941 return convert.ServiceFromGRPC(*service), nil 942 } 943 944 // UpdateService updates existing service to match new properties. 945 func (c *Cluster) UpdateService(serviceIDOrName string, version uint64, spec types.ServiceSpec, encodedAuth string, registryAuthFrom string) (*apitypes.ServiceUpdateResponse, error) { 946 c.mu.RLock() 947 defer c.mu.RUnlock() 948 949 state := c.currentNodeState() 950 if !state.IsActiveManager() { 951 return nil, c.errNoManager(state) 952 } 953 954 ctx, cancel := c.getRequestContext() 955 defer cancel() 956 957 err := c.populateNetworkID(ctx, state.controlClient, &spec) 958 if err != nil { 959 return nil, err 960 } 961 962 serviceSpec, err := convert.ServiceSpecToGRPC(spec) 963 if err != nil { 964 return nil, err 965 } 966 967 currentService, err := getService(ctx, state.controlClient, serviceIDOrName) 968 if err != nil { 969 return nil, err 970 } 971 972 newCtnr := serviceSpec.Task.GetContainer() 973 if newCtnr == nil { 974 return nil, fmt.Errorf("service does not use container tasks") 975 } 976 977 if encodedAuth != "" { 978 newCtnr.PullOptions = &swarmapi.ContainerSpec_PullOptions{RegistryAuth: encodedAuth} 979 } else { 980 // this is needed because if the encodedAuth isn't being updated then we 981 // shouldn't lose it, and continue to use the one that was already present 982 var ctnr *swarmapi.ContainerSpec 983 switch registryAuthFrom { 984 case apitypes.RegistryAuthFromSpec, "": 985 ctnr = currentService.Spec.Task.GetContainer() 986 case apitypes.RegistryAuthFromPreviousSpec: 987 if currentService.PreviousSpec == nil { 988 return nil, fmt.Errorf("service does not have a previous spec") 989 } 990 ctnr = currentService.PreviousSpec.Task.GetContainer() 991 default: 992 return nil, fmt.Errorf("unsupported registryAuthFromValue") 993 } 994 if ctnr == nil { 995 return nil, fmt.Errorf("service does not use container tasks") 996 } 997 newCtnr.PullOptions = ctnr.PullOptions 998 // update encodedAuth so it can be used to pin image by digest 999 if ctnr.PullOptions != nil { 1000 encodedAuth = ctnr.PullOptions.RegistryAuth 1001 } 1002 } 1003 1004 // retrieve auth config from encoded auth 1005 authConfig := &apitypes.AuthConfig{} 1006 if encodedAuth != "" { 1007 if err := json.NewDecoder(base64.NewDecoder(base64.URLEncoding, strings.NewReader(encodedAuth))).Decode(authConfig); err != nil { 1008 logrus.Warnf("invalid authconfig: %v", err) 1009 } 1010 } 1011 1012 resp := &apitypes.ServiceUpdateResponse{} 1013 1014 // pin image by digest 1015 if os.Getenv("DOCKER_SERVICE_PREFER_OFFLINE_IMAGE") != "1" { 1016 digestImage, err := c.imageWithDigestString(ctx, newCtnr.Image, authConfig) 1017 if err != nil { 1018 logrus.Warnf("unable to pin image %s to digest: %s", newCtnr.Image, err.Error()) 1019 resp.Warnings = append(resp.Warnings, fmt.Sprintf("unable to pin image %s to digest: %s", newCtnr.Image, err.Error())) 1020 } else if newCtnr.Image != digestImage { 1021 logrus.Debugf("pinning image %s by digest: %s", newCtnr.Image, digestImage) 1022 newCtnr.Image = digestImage 1023 } 1024 } 1025 1026 _, err = state.controlClient.UpdateService( 1027 ctx, 1028 &swarmapi.UpdateServiceRequest{ 1029 ServiceID: currentService.ID, 1030 Spec: &serviceSpec, 1031 ServiceVersion: &swarmapi.Version{ 1032 Index: version, 1033 }, 1034 }, 1035 ) 1036 1037 return resp, err 1038 } 1039 1040 // RemoveService removes a service from a managed swarm cluster. 1041 func (c *Cluster) RemoveService(input string) error { 1042 c.mu.RLock() 1043 defer c.mu.RUnlock() 1044 1045 state := c.currentNodeState() 1046 if !state.IsActiveManager() { 1047 return c.errNoManager(state) 1048 } 1049 1050 ctx, cancel := c.getRequestContext() 1051 defer cancel() 1052 1053 service, err := getService(ctx, state.controlClient, input) 1054 if err != nil { 1055 return err 1056 } 1057 1058 if _, err := state.controlClient.RemoveService(ctx, &swarmapi.RemoveServiceRequest{ServiceID: service.ID}); err != nil { 1059 return err 1060 } 1061 return nil 1062 } 1063 1064 // ServiceLogs collects service logs and writes them back to `config.OutStream` 1065 func (c *Cluster) ServiceLogs(ctx context.Context, input string, config *backend.ContainerLogsConfig, started chan struct{}) error { 1066 c.mu.RLock() 1067 state := c.currentNodeState() 1068 if !state.IsActiveManager() { 1069 c.mu.RUnlock() 1070 return c.errNoManager(state) 1071 } 1072 1073 service, err := getService(ctx, state.controlClient, input) 1074 if err != nil { 1075 c.mu.RUnlock() 1076 return err 1077 } 1078 1079 stream, err := state.logsClient.SubscribeLogs(ctx, &swarmapi.SubscribeLogsRequest{ 1080 Selector: &swarmapi.LogSelector{ 1081 ServiceIDs: []string{service.ID}, 1082 }, 1083 Options: &swarmapi.LogSubscriptionOptions{ 1084 Follow: config.Follow, 1085 }, 1086 }) 1087 if err != nil { 1088 c.mu.RUnlock() 1089 return err 1090 } 1091 1092 wf := ioutils.NewWriteFlusher(config.OutStream) 1093 defer wf.Close() 1094 close(started) 1095 wf.Flush() 1096 1097 outStream := stdcopy.NewStdWriter(wf, stdcopy.Stdout) 1098 errStream := stdcopy.NewStdWriter(wf, stdcopy.Stderr) 1099 1100 // Release the lock before starting the stream. 1101 c.mu.RUnlock() 1102 for { 1103 // Check the context before doing anything. 1104 select { 1105 case <-ctx.Done(): 1106 return ctx.Err() 1107 default: 1108 } 1109 1110 subscribeMsg, err := stream.Recv() 1111 if err == io.EOF { 1112 return nil 1113 } 1114 if err != nil { 1115 return err 1116 } 1117 1118 for _, msg := range subscribeMsg.Messages { 1119 data := []byte{} 1120 1121 if config.Timestamps { 1122 ts, err := ptypes.Timestamp(msg.Timestamp) 1123 if err != nil { 1124 return err 1125 } 1126 data = append(data, []byte(ts.Format(logger.TimeFormat)+" ")...) 1127 } 1128 1129 data = append(data, []byte(fmt.Sprintf("%s.node.id=%s,%s.service.id=%s,%s.task.id=%s ", 1130 contextPrefix, msg.Context.NodeID, 1131 contextPrefix, msg.Context.ServiceID, 1132 contextPrefix, msg.Context.TaskID, 1133 ))...) 1134 1135 data = append(data, msg.Data...) 1136 1137 switch msg.Stream { 1138 case swarmapi.LogStreamStdout: 1139 outStream.Write(data) 1140 case swarmapi.LogStreamStderr: 1141 errStream.Write(data) 1142 } 1143 } 1144 } 1145 } 1146 1147 // GetNodes returns a list of all nodes known to a cluster. 1148 func (c *Cluster) GetNodes(options apitypes.NodeListOptions) ([]types.Node, error) { 1149 c.mu.RLock() 1150 defer c.mu.RUnlock() 1151 1152 state := c.currentNodeState() 1153 if !state.IsActiveManager() { 1154 return nil, c.errNoManager(state) 1155 } 1156 1157 filters, err := newListNodesFilters(options.Filters) 1158 if err != nil { 1159 return nil, err 1160 } 1161 1162 ctx, cancel := c.getRequestContext() 1163 defer cancel() 1164 1165 r, err := state.controlClient.ListNodes( 1166 ctx, 1167 &swarmapi.ListNodesRequest{Filters: filters}) 1168 if err != nil { 1169 return nil, err 1170 } 1171 1172 nodes := []types.Node{} 1173 1174 for _, node := range r.Nodes { 1175 nodes = append(nodes, convert.NodeFromGRPC(*node)) 1176 } 1177 return nodes, nil 1178 } 1179 1180 // GetNode returns a node based on an ID or name. 1181 func (c *Cluster) GetNode(input string) (types.Node, error) { 1182 c.mu.RLock() 1183 defer c.mu.RUnlock() 1184 1185 state := c.currentNodeState() 1186 if !state.IsActiveManager() { 1187 return types.Node{}, c.errNoManager(state) 1188 } 1189 1190 ctx, cancel := c.getRequestContext() 1191 defer cancel() 1192 1193 node, err := getNode(ctx, state.controlClient, input) 1194 if err != nil { 1195 return types.Node{}, err 1196 } 1197 return convert.NodeFromGRPC(*node), nil 1198 } 1199 1200 // UpdateNode updates existing nodes properties. 1201 func (c *Cluster) UpdateNode(input string, version uint64, spec types.NodeSpec) error { 1202 c.mu.RLock() 1203 defer c.mu.RUnlock() 1204 1205 state := c.currentNodeState() 1206 if !state.IsActiveManager() { 1207 return c.errNoManager(state) 1208 } 1209 1210 nodeSpec, err := convert.NodeSpecToGRPC(spec) 1211 if err != nil { 1212 return err 1213 } 1214 1215 ctx, cancel := c.getRequestContext() 1216 defer cancel() 1217 1218 currentNode, err := getNode(ctx, state.controlClient, input) 1219 if err != nil { 1220 return err 1221 } 1222 1223 _, err = state.controlClient.UpdateNode( 1224 ctx, 1225 &swarmapi.UpdateNodeRequest{ 1226 NodeID: currentNode.ID, 1227 Spec: &nodeSpec, 1228 NodeVersion: &swarmapi.Version{ 1229 Index: version, 1230 }, 1231 }, 1232 ) 1233 return err 1234 } 1235 1236 // RemoveNode removes a node from a cluster 1237 func (c *Cluster) RemoveNode(input string, force bool) error { 1238 c.mu.RLock() 1239 defer c.mu.RUnlock() 1240 1241 state := c.currentNodeState() 1242 if !state.IsActiveManager() { 1243 return c.errNoManager(state) 1244 } 1245 1246 ctx, cancel := c.getRequestContext() 1247 defer cancel() 1248 1249 node, err := getNode(ctx, state.controlClient, input) 1250 if err != nil { 1251 return err 1252 } 1253 1254 if _, err := state.controlClient.RemoveNode(ctx, &swarmapi.RemoveNodeRequest{NodeID: node.ID, Force: force}); err != nil { 1255 return err 1256 } 1257 return nil 1258 } 1259 1260 // GetTasks returns a list of tasks matching the filter options. 1261 func (c *Cluster) GetTasks(options apitypes.TaskListOptions) ([]types.Task, error) { 1262 c.mu.RLock() 1263 defer c.mu.RUnlock() 1264 1265 state := c.currentNodeState() 1266 if !state.IsActiveManager() { 1267 return nil, c.errNoManager(state) 1268 } 1269 1270 byName := func(filter filters.Args) error { 1271 if filter.Include("service") { 1272 serviceFilters := filter.Get("service") 1273 for _, serviceFilter := range serviceFilters { 1274 service, err := c.GetService(serviceFilter) 1275 if err != nil { 1276 return err 1277 } 1278 filter.Del("service", serviceFilter) 1279 filter.Add("service", service.ID) 1280 } 1281 } 1282 if filter.Include("node") { 1283 nodeFilters := filter.Get("node") 1284 for _, nodeFilter := range nodeFilters { 1285 node, err := c.GetNode(nodeFilter) 1286 if err != nil { 1287 return err 1288 } 1289 filter.Del("node", nodeFilter) 1290 filter.Add("node", node.ID) 1291 } 1292 } 1293 return nil 1294 } 1295 1296 filters, err := newListTasksFilters(options.Filters, byName) 1297 if err != nil { 1298 return nil, err 1299 } 1300 1301 ctx, cancel := c.getRequestContext() 1302 defer cancel() 1303 1304 r, err := state.controlClient.ListTasks( 1305 ctx, 1306 &swarmapi.ListTasksRequest{Filters: filters}) 1307 if err != nil { 1308 return nil, err 1309 } 1310 1311 tasks := []types.Task{} 1312 1313 for _, task := range r.Tasks { 1314 if task.Spec.GetContainer() != nil { 1315 tasks = append(tasks, convert.TaskFromGRPC(*task)) 1316 } 1317 } 1318 return tasks, nil 1319 } 1320 1321 // GetTask returns a task by an ID. 1322 func (c *Cluster) GetTask(input string) (types.Task, error) { 1323 c.mu.RLock() 1324 defer c.mu.RUnlock() 1325 1326 state := c.currentNodeState() 1327 if !state.IsActiveManager() { 1328 return types.Task{}, c.errNoManager(state) 1329 } 1330 1331 ctx, cancel := c.getRequestContext() 1332 defer cancel() 1333 1334 task, err := getTask(ctx, state.controlClient, input) 1335 if err != nil { 1336 return types.Task{}, err 1337 } 1338 return convert.TaskFromGRPC(*task), nil 1339 } 1340 1341 // GetNetwork returns a cluster network by an ID. 1342 func (c *Cluster) GetNetwork(input string) (apitypes.NetworkResource, error) { 1343 c.mu.RLock() 1344 defer c.mu.RUnlock() 1345 1346 state := c.currentNodeState() 1347 if !state.IsActiveManager() { 1348 return apitypes.NetworkResource{}, c.errNoManager(state) 1349 } 1350 1351 ctx, cancel := c.getRequestContext() 1352 defer cancel() 1353 1354 network, err := getNetwork(ctx, state.controlClient, input) 1355 if err != nil { 1356 return apitypes.NetworkResource{}, err 1357 } 1358 return convert.BasicNetworkFromGRPC(*network), nil 1359 } 1360 1361 // GetNetworks returns all current cluster managed networks. 1362 func (c *Cluster) GetNetworks() ([]apitypes.NetworkResource, error) { 1363 c.mu.RLock() 1364 defer c.mu.RUnlock() 1365 1366 state := c.currentNodeState() 1367 if !state.IsActiveManager() { 1368 return nil, c.errNoManager(state) 1369 } 1370 1371 ctx, cancel := c.getRequestContext() 1372 defer cancel() 1373 1374 r, err := state.controlClient.ListNetworks(ctx, &swarmapi.ListNetworksRequest{}) 1375 if err != nil { 1376 return nil, err 1377 } 1378 1379 var networks []apitypes.NetworkResource 1380 1381 for _, network := range r.Networks { 1382 networks = append(networks, convert.BasicNetworkFromGRPC(*network)) 1383 } 1384 1385 return networks, nil 1386 } 1387 1388 func attacherKey(target, containerID string) string { 1389 return containerID + ":" + target 1390 } 1391 1392 // UpdateAttachment signals the attachment config to the attachment 1393 // waiter who is trying to start or attach the container to the 1394 // network. 1395 func (c *Cluster) UpdateAttachment(target, containerID string, config *network.NetworkingConfig) error { 1396 c.mu.RLock() 1397 attacher, ok := c.attachers[attacherKey(target, containerID)] 1398 c.mu.RUnlock() 1399 if !ok || attacher == nil { 1400 return fmt.Errorf("could not find attacher for container %s to network %s", containerID, target) 1401 } 1402 1403 attacher.attachWaitCh <- config 1404 close(attacher.attachWaitCh) 1405 return nil 1406 } 1407 1408 // WaitForDetachment waits for the container to stop or detach from 1409 // the network. 1410 func (c *Cluster) WaitForDetachment(ctx context.Context, networkName, networkID, taskID, containerID string) error { 1411 c.mu.RLock() 1412 attacher, ok := c.attachers[attacherKey(networkName, containerID)] 1413 if !ok { 1414 attacher, ok = c.attachers[attacherKey(networkID, containerID)] 1415 } 1416 state := c.currentNodeState() 1417 if state.swarmNode == nil || state.swarmNode.Agent() == nil { 1418 c.mu.RUnlock() 1419 return fmt.Errorf("invalid cluster node while waiting for detachment") 1420 } 1421 1422 c.mu.RUnlock() 1423 agent := state.swarmNode.Agent() 1424 if ok && attacher != nil && 1425 attacher.detachWaitCh != nil && 1426 attacher.attachCompleteCh != nil { 1427 // Attachment may be in progress still so wait for 1428 // attachment to complete. 1429 select { 1430 case <-attacher.attachCompleteCh: 1431 case <-ctx.Done(): 1432 return ctx.Err() 1433 } 1434 1435 if attacher.taskID == taskID { 1436 select { 1437 case <-attacher.detachWaitCh: 1438 case <-ctx.Done(): 1439 return ctx.Err() 1440 } 1441 } 1442 } 1443 1444 return agent.ResourceAllocator().DetachNetwork(ctx, taskID) 1445 } 1446 1447 // AttachNetwork generates an attachment request towards the manager. 1448 func (c *Cluster) AttachNetwork(target string, containerID string, addresses []string) (*network.NetworkingConfig, error) { 1449 aKey := attacherKey(target, containerID) 1450 c.mu.Lock() 1451 state := c.currentNodeState() 1452 if state.swarmNode == nil || state.swarmNode.Agent() == nil { 1453 c.mu.Unlock() 1454 return nil, fmt.Errorf("invalid cluster node while attaching to network") 1455 } 1456 if attacher, ok := c.attachers[aKey]; ok { 1457 c.mu.Unlock() 1458 return attacher.config, nil 1459 } 1460 1461 agent := state.swarmNode.Agent() 1462 attachWaitCh := make(chan *network.NetworkingConfig) 1463 detachWaitCh := make(chan struct{}) 1464 attachCompleteCh := make(chan struct{}) 1465 c.attachers[aKey] = &attacher{ 1466 attachWaitCh: attachWaitCh, 1467 attachCompleteCh: attachCompleteCh, 1468 detachWaitCh: detachWaitCh, 1469 } 1470 c.mu.Unlock() 1471 1472 ctx, cancel := c.getRequestContext() 1473 defer cancel() 1474 1475 taskID, err := agent.ResourceAllocator().AttachNetwork(ctx, containerID, target, addresses) 1476 if err != nil { 1477 c.mu.Lock() 1478 delete(c.attachers, aKey) 1479 c.mu.Unlock() 1480 return nil, fmt.Errorf("Could not attach to network %s: %v", target, err) 1481 } 1482 1483 c.mu.Lock() 1484 c.attachers[aKey].taskID = taskID 1485 close(attachCompleteCh) 1486 c.mu.Unlock() 1487 1488 logrus.Debugf("Successfully attached to network %s with tid %s", target, taskID) 1489 1490 var config *network.NetworkingConfig 1491 select { 1492 case config = <-attachWaitCh: 1493 case <-ctx.Done(): 1494 return nil, fmt.Errorf("attaching to network failed, make sure your network options are correct and check manager logs: %v", ctx.Err()) 1495 } 1496 1497 c.mu.Lock() 1498 c.attachers[aKey].config = config 1499 c.mu.Unlock() 1500 return config, nil 1501 } 1502 1503 // DetachNetwork unblocks the waiters waiting on WaitForDetachment so 1504 // that a request to detach can be generated towards the manager. 1505 func (c *Cluster) DetachNetwork(target string, containerID string) error { 1506 aKey := attacherKey(target, containerID) 1507 1508 c.mu.Lock() 1509 attacher, ok := c.attachers[aKey] 1510 delete(c.attachers, aKey) 1511 c.mu.Unlock() 1512 1513 if !ok { 1514 return fmt.Errorf("could not find network attachment for container %s to network %s", containerID, target) 1515 } 1516 1517 close(attacher.detachWaitCh) 1518 return nil 1519 } 1520 1521 // CreateNetwork creates a new cluster managed network. 1522 func (c *Cluster) CreateNetwork(s apitypes.NetworkCreateRequest) (string, error) { 1523 c.mu.RLock() 1524 defer c.mu.RUnlock() 1525 1526 state := c.currentNodeState() 1527 if !state.IsActiveManager() { 1528 return "", c.errNoManager(state) 1529 } 1530 1531 if runconfig.IsPreDefinedNetwork(s.Name) { 1532 err := fmt.Errorf("%s is a pre-defined network and cannot be created", s.Name) 1533 return "", apierrors.NewRequestForbiddenError(err) 1534 } 1535 1536 ctx, cancel := c.getRequestContext() 1537 defer cancel() 1538 1539 networkSpec := convert.BasicNetworkCreateToGRPC(s) 1540 r, err := state.controlClient.CreateNetwork(ctx, &swarmapi.CreateNetworkRequest{Spec: &networkSpec}) 1541 if err != nil { 1542 return "", err 1543 } 1544 1545 return r.Network.ID, nil 1546 } 1547 1548 // RemoveNetwork removes a cluster network. 1549 func (c *Cluster) RemoveNetwork(input string) error { 1550 c.mu.RLock() 1551 defer c.mu.RUnlock() 1552 1553 state := c.currentNodeState() 1554 if !state.IsActiveManager() { 1555 return c.errNoManager(state) 1556 } 1557 1558 ctx, cancel := c.getRequestContext() 1559 defer cancel() 1560 1561 network, err := getNetwork(ctx, state.controlClient, input) 1562 if err != nil { 1563 return err 1564 } 1565 1566 if _, err := state.controlClient.RemoveNetwork(ctx, &swarmapi.RemoveNetworkRequest{NetworkID: network.ID}); err != nil { 1567 return err 1568 } 1569 return nil 1570 } 1571 1572 func (c *Cluster) populateNetworkID(ctx context.Context, client swarmapi.ControlClient, s *types.ServiceSpec) error { 1573 // Always prefer NetworkAttachmentConfigs from TaskTemplate 1574 // but fallback to service spec for backward compatibility 1575 networks := s.TaskTemplate.Networks 1576 if len(networks) == 0 { 1577 networks = s.Networks 1578 } 1579 1580 for i, n := range networks { 1581 apiNetwork, err := getNetwork(ctx, client, n.Target) 1582 if err != nil { 1583 if ln, _ := c.config.Backend.FindNetwork(n.Target); ln != nil && !ln.Info().Dynamic() { 1584 err = fmt.Errorf("The network %s cannot be used with services. Only networks scoped to the swarm can be used, such as those created with the overlay driver.", ln.Name()) 1585 return apierrors.NewRequestForbiddenError(err) 1586 } 1587 return err 1588 } 1589 networks[i].Target = apiNetwork.ID 1590 } 1591 return nil 1592 } 1593 1594 // Cleanup stops active swarm node. This is run before daemon shutdown. 1595 func (c *Cluster) Cleanup() { 1596 c.controlMutex.Lock() 1597 defer c.controlMutex.Unlock() 1598 1599 c.mu.Lock() 1600 node := c.nr 1601 if node == nil { 1602 c.mu.Unlock() 1603 return 1604 } 1605 defer c.mu.Unlock() 1606 state := c.currentNodeState() 1607 if state.IsActiveManager() { 1608 active, reachable, unreachable, err := managerStats(state.controlClient, state.NodeID()) 1609 if err == nil { 1610 singlenode := active && isLastManager(reachable, unreachable) 1611 if active && !singlenode && removingManagerCausesLossOfQuorum(reachable, unreachable) { 1612 logrus.Errorf("Leaving cluster with %v managers left out of %v. Raft quorum will be lost.", reachable-1, reachable+unreachable) 1613 } 1614 } 1615 } 1616 if err := node.Stop(); err != nil { 1617 logrus.Errorf("failed to shut down cluster node: %v", err) 1618 signal.DumpStacks("") 1619 } 1620 c.nr = nil 1621 } 1622 1623 func managerStats(client swarmapi.ControlClient, currentNodeID string) (current bool, reachable int, unreachable int, err error) { 1624 ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) 1625 defer cancel() 1626 nodes, err := client.ListNodes(ctx, &swarmapi.ListNodesRequest{}) 1627 if err != nil { 1628 return false, 0, 0, err 1629 } 1630 for _, n := range nodes.Nodes { 1631 if n.ManagerStatus != nil { 1632 if n.ManagerStatus.Reachability == swarmapi.RaftMemberStatus_REACHABLE { 1633 reachable++ 1634 if n.ID == currentNodeID { 1635 current = true 1636 } 1637 } 1638 if n.ManagerStatus.Reachability == swarmapi.RaftMemberStatus_UNREACHABLE { 1639 unreachable++ 1640 } 1641 } 1642 } 1643 return 1644 } 1645 1646 func validateAndSanitizeInitRequest(req *types.InitRequest) error { 1647 var err error 1648 req.ListenAddr, err = validateAddr(req.ListenAddr) 1649 if err != nil { 1650 return fmt.Errorf("invalid ListenAddr %q: %v", req.ListenAddr, err) 1651 } 1652 1653 if req.Spec.Annotations.Name == "" { 1654 req.Spec.Annotations.Name = "default" 1655 } else if req.Spec.Annotations.Name != "default" { 1656 return errors.New(`swarm spec must be named "default"`) 1657 } 1658 1659 return nil 1660 } 1661 1662 func validateAndSanitizeJoinRequest(req *types.JoinRequest) error { 1663 var err error 1664 req.ListenAddr, err = validateAddr(req.ListenAddr) 1665 if err != nil { 1666 return fmt.Errorf("invalid ListenAddr %q: %v", req.ListenAddr, err) 1667 } 1668 if len(req.RemoteAddrs) == 0 { 1669 return fmt.Errorf("at least 1 RemoteAddr is required to join") 1670 } 1671 for i := range req.RemoteAddrs { 1672 req.RemoteAddrs[i], err = validateAddr(req.RemoteAddrs[i]) 1673 if err != nil { 1674 return fmt.Errorf("invalid remoteAddr %q: %v", req.RemoteAddrs[i], err) 1675 } 1676 } 1677 return nil 1678 } 1679 1680 func validateAddr(addr string) (string, error) { 1681 if addr == "" { 1682 return addr, fmt.Errorf("invalid empty address") 1683 } 1684 newaddr, err := opts.ParseTCPAddr(addr, defaultAddr) 1685 if err != nil { 1686 return addr, nil 1687 } 1688 return strings.TrimPrefix(newaddr, "tcp://"), nil 1689 } 1690 1691 func initClusterSpec(node *swarmnode.Node, spec types.Spec) error { 1692 ctx, _ := context.WithTimeout(context.Background(), 5*time.Second) 1693 for conn := range node.ListenControlSocket(ctx) { 1694 if ctx.Err() != nil { 1695 return ctx.Err() 1696 } 1697 if conn != nil { 1698 client := swarmapi.NewControlClient(conn) 1699 var cluster *swarmapi.Cluster 1700 for i := 0; ; i++ { 1701 lcr, err := client.ListClusters(ctx, &swarmapi.ListClustersRequest{}) 1702 if err != nil { 1703 return fmt.Errorf("error on listing clusters: %v", err) 1704 } 1705 if len(lcr.Clusters) == 0 { 1706 if i < 10 { 1707 time.Sleep(200 * time.Millisecond) 1708 continue 1709 } 1710 return fmt.Errorf("empty list of clusters was returned") 1711 } 1712 cluster = lcr.Clusters[0] 1713 break 1714 } 1715 // In init, we take the initial default values from swarmkit, and merge 1716 // any non nil or 0 value from spec to GRPC spec. This will leave the 1717 // default value alone. 1718 // Note that this is different from Update(), as in Update() we expect 1719 // user to specify the complete spec of the cluster (as they already know 1720 // the existing one and knows which field to update) 1721 clusterSpec, err := convert.MergeSwarmSpecToGRPC(spec, cluster.Spec) 1722 if err != nil { 1723 return fmt.Errorf("error updating cluster settings: %v", err) 1724 } 1725 _, err = client.UpdateCluster(ctx, &swarmapi.UpdateClusterRequest{ 1726 ClusterID: cluster.ID, 1727 ClusterVersion: &cluster.Meta.Version, 1728 Spec: &clusterSpec, 1729 }) 1730 if err != nil { 1731 return fmt.Errorf("error updating cluster settings: %v", err) 1732 } 1733 return nil 1734 } 1735 } 1736 return ctx.Err() 1737 } 1738 1739 func detectLockedError(err error) error { 1740 if err == swarmnode.ErrInvalidUnlockKey { 1741 return errors.WithStack(errSwarmLocked) 1742 } 1743 return err 1744 }