github.com/kobeld/docker@v1.12.0-rc1/daemon/cluster/cluster.go (about) 1 package cluster 2 3 import ( 4 "encoding/json" 5 "fmt" 6 "io/ioutil" 7 "os" 8 "path/filepath" 9 "strings" 10 "sync" 11 "time" 12 13 "google.golang.org/grpc" 14 15 "github.com/Sirupsen/logrus" 16 "github.com/docker/docker/daemon/cluster/convert" 17 executorpkg "github.com/docker/docker/daemon/cluster/executor" 18 "github.com/docker/docker/daemon/cluster/executor/container" 19 "github.com/docker/docker/errors" 20 "github.com/docker/docker/pkg/ioutils" 21 "github.com/docker/docker/runconfig" 22 apitypes "github.com/docker/engine-api/types" 23 types "github.com/docker/engine-api/types/swarm" 24 swarmagent "github.com/docker/swarmkit/agent" 25 swarmapi "github.com/docker/swarmkit/api" 26 "golang.org/x/net/context" 27 ) 28 29 const swarmDirName = "swarm" 30 const controlSocket = "control.sock" 31 const swarmConnectTimeout = 10 * time.Second 32 const stateFile = "docker-state.json" 33 34 const ( 35 initialReconnectDelay = 100 * time.Millisecond 36 maxReconnectDelay = 10 * time.Second 37 ) 38 39 // ErrNoManager is returned then a manager-only function is called on non-manager 40 var ErrNoManager = fmt.Errorf("this node is not participating as a Swarm manager") 41 42 // ErrNoSwarm is returned on leaving a cluster that was never initialized 43 var ErrNoSwarm = fmt.Errorf("this node is not part of Swarm") 44 45 // ErrSwarmExists is returned on initialize or join request for a cluster that has already been activated 46 var ErrSwarmExists = fmt.Errorf("this node is already part of a Swarm") 47 48 // ErrSwarmJoinTimeoutReached is returned when cluster join could not complete before timeout was reached. 49 var ErrSwarmJoinTimeoutReached = fmt.Errorf("timeout reached before node was joined") 50 51 type state struct { 52 ListenAddr string 53 } 54 55 // Config provides values for Cluster. 56 type Config struct { 57 Root string 58 Name string 59 Backend executorpkg.Backend 60 } 61 62 // Cluster provides capabilities to pariticipate in a cluster as worker or a 63 // manager and a worker. 64 type Cluster struct { 65 sync.RWMutex 66 root string 67 config Config 68 configEvent chan struct{} // todo: make this array and goroutine safe 69 node *swarmagent.Node 70 conn *grpc.ClientConn 71 client swarmapi.ControlClient 72 ready bool 73 listenAddr string 74 err error 75 reconnectDelay time.Duration 76 stop bool 77 cancelDelay func() 78 } 79 80 // New creates a new Cluster instance using provided config. 81 func New(config Config) (*Cluster, error) { 82 root := filepath.Join(config.Root, swarmDirName) 83 if err := os.MkdirAll(root, 0700); err != nil { 84 return nil, err 85 } 86 c := &Cluster{ 87 root: root, 88 config: config, 89 configEvent: make(chan struct{}, 10), 90 reconnectDelay: initialReconnectDelay, 91 } 92 93 dt, err := ioutil.ReadFile(filepath.Join(root, stateFile)) 94 if err != nil { 95 if os.IsNotExist(err) { 96 return c, nil 97 } 98 return nil, err 99 } 100 101 var st state 102 if err := json.Unmarshal(dt, &st); err != nil { 103 return nil, err 104 } 105 106 n, ctx, err := c.startNewNode(false, st.ListenAddr, "", "", "", false) 107 if err != nil { 108 return nil, err 109 } 110 111 select { 112 case <-time.After(swarmConnectTimeout): 113 logrus.Errorf("swarm component could not be started before timeout was reached") 114 case <-n.Ready(context.Background()): 115 case <-ctx.Done(): 116 } 117 if ctx.Err() != nil { 118 return nil, fmt.Errorf("swarm component could not be started") 119 } 120 go c.reconnectOnFailure(ctx) 121 return c, nil 122 } 123 124 func (c *Cluster) saveState() error { 125 dt, err := json.Marshal(state{ListenAddr: c.listenAddr}) 126 if err != nil { 127 return err 128 } 129 return ioutils.AtomicWriteFile(filepath.Join(c.root, stateFile), dt, 0600) 130 } 131 132 func (c *Cluster) reconnectOnFailure(ctx context.Context) { 133 for { 134 <-ctx.Done() 135 c.Lock() 136 if c.stop || c.node != nil { 137 c.Unlock() 138 return 139 } 140 c.reconnectDelay *= 2 141 if c.reconnectDelay > maxReconnectDelay { 142 c.reconnectDelay = maxReconnectDelay 143 } 144 logrus.Warnf("Restarting swarm in %.2f seconds", c.reconnectDelay.Seconds()) 145 delayCtx, cancel := context.WithTimeout(context.Background(), c.reconnectDelay) 146 c.cancelDelay = cancel 147 c.Unlock() 148 <-delayCtx.Done() 149 if delayCtx.Err() != context.DeadlineExceeded { 150 return 151 } 152 c.Lock() 153 if c.node != nil { 154 c.Unlock() 155 return 156 } 157 var err error 158 _, ctx, err = c.startNewNode(false, c.listenAddr, c.getRemoteAddress(), "", "", false) 159 if err != nil { 160 c.err = err 161 ctx = delayCtx 162 } 163 c.Unlock() 164 } 165 } 166 167 func (c *Cluster) startNewNode(forceNewCluster bool, listenAddr, joinAddr, secret, cahash string, ismanager bool) (*swarmagent.Node, context.Context, error) { 168 if err := c.config.Backend.IsSwarmCompatible(); err != nil { 169 return nil, nil, err 170 } 171 c.node = nil 172 c.cancelDelay = nil 173 node, err := swarmagent.NewNode(&swarmagent.NodeConfig{ 174 Hostname: c.config.Name, 175 ForceNewCluster: forceNewCluster, 176 ListenControlAPI: filepath.Join(c.root, controlSocket), 177 ListenRemoteAPI: listenAddr, 178 JoinAddr: joinAddr, 179 StateDir: c.root, 180 CAHash: cahash, 181 Secret: secret, 182 Executor: container.NewExecutor(c.config.Backend), 183 HeartbeatTick: 1, 184 ElectionTick: 3, 185 IsManager: ismanager, 186 }) 187 if err != nil { 188 return nil, nil, err 189 } 190 ctx, cancel := context.WithCancel(context.Background()) 191 if err := node.Start(ctx); err != nil { 192 return nil, nil, err 193 } 194 195 c.node = node 196 c.listenAddr = listenAddr 197 c.saveState() 198 c.config.Backend.SetClusterProvider(c) 199 go func() { 200 err := node.Err(ctx) 201 if err != nil { 202 logrus.Errorf("cluster exited with error: %v", err) 203 } 204 c.Lock() 205 c.conn = nil 206 c.client = nil 207 c.node = nil 208 c.ready = false 209 c.err = err 210 c.Unlock() 211 cancel() 212 }() 213 214 go func() { 215 select { 216 case <-node.Ready(context.Background()): 217 c.Lock() 218 c.reconnectDelay = initialReconnectDelay 219 c.Unlock() 220 case <-ctx.Done(): 221 } 222 if ctx.Err() == nil { 223 c.Lock() 224 c.ready = true 225 c.err = nil 226 c.Unlock() 227 } 228 c.configEvent <- struct{}{} 229 }() 230 231 go func() { 232 for conn := range node.ListenControlSocket(ctx) { 233 c.Lock() 234 if c.conn != conn { 235 c.client = swarmapi.NewControlClient(conn) 236 } 237 if c.conn != nil { 238 c.client = nil 239 } 240 c.conn = conn 241 c.Unlock() 242 c.configEvent <- struct{}{} 243 } 244 }() 245 246 return node, ctx, nil 247 } 248 249 // Init initializes new cluster from user provided request. 250 func (c *Cluster) Init(req types.InitRequest) (string, error) { 251 c.Lock() 252 if c.node != nil { 253 c.Unlock() 254 if !req.ForceNewCluster { 255 return "", ErrSwarmExists 256 } 257 ctx, cancel := context.WithTimeout(context.Background(), 15*time.Second) 258 defer cancel() 259 if err := c.node.Stop(ctx); err != nil && !strings.Contains(err.Error(), "context canceled") { 260 return "", err 261 } 262 c.Lock() 263 c.node = nil 264 c.conn = nil 265 c.ready = false 266 } 267 // todo: check current state existing 268 n, ctx, err := c.startNewNode(req.ForceNewCluster, req.ListenAddr, "", "", "", false) 269 if err != nil { 270 c.Unlock() 271 return "", err 272 } 273 c.Unlock() 274 275 select { 276 case <-n.Ready(context.Background()): 277 if err := initAcceptancePolicy(n, req.Spec.AcceptancePolicy); err != nil { 278 return "", err 279 } 280 go c.reconnectOnFailure(ctx) 281 return n.NodeID(), nil 282 case <-ctx.Done(): 283 c.RLock() 284 defer c.RUnlock() 285 if c.err != nil { 286 if !req.ForceNewCluster { // if failure on first attempt don't keep state 287 if err := c.clearState(); err != nil { 288 return "", err 289 } 290 } 291 return "", c.err 292 } 293 return "", ctx.Err() 294 } 295 } 296 297 // Join makes current Cluster part of an existing swarm cluster. 298 func (c *Cluster) Join(req types.JoinRequest) error { 299 c.Lock() 300 if c.node != nil { 301 c.Unlock() 302 return ErrSwarmExists 303 } 304 // todo: check current state existing 305 if len(req.RemoteAddrs) == 0 { 306 return fmt.Errorf("at least 1 RemoteAddr is required to join") 307 } 308 n, ctx, err := c.startNewNode(false, req.ListenAddr, req.RemoteAddrs[0], req.Secret, req.CACertHash, req.Manager) 309 if err != nil { 310 c.Unlock() 311 return err 312 } 313 c.Unlock() 314 315 select { 316 case <-time.After(swarmConnectTimeout): 317 go c.reconnectOnFailure(ctx) 318 if nodeid := n.NodeID(); nodeid != "" { 319 return fmt.Errorf("Timeout reached before node was joined. Your cluster settings may be preventing this node from automatically joining. To accept this node into cluster run `docker node accept %v` in an existing cluster manager", nodeid) 320 } 321 return ErrSwarmJoinTimeoutReached 322 case <-n.Ready(context.Background()): 323 go c.reconnectOnFailure(ctx) 324 return nil 325 case <-ctx.Done(): 326 c.RLock() 327 defer c.RUnlock() 328 if c.err != nil { 329 return c.err 330 } 331 return ctx.Err() 332 } 333 } 334 335 func (c *Cluster) cancelReconnect() { 336 c.stop = true 337 if c.cancelDelay != nil { 338 c.cancelDelay() 339 c.cancelDelay = nil 340 } 341 } 342 343 // Leave shuts down Cluster and removes current state. 344 func (c *Cluster) Leave(force bool) error { 345 c.Lock() 346 node := c.node 347 if node == nil { 348 c.Unlock() 349 return ErrNoSwarm 350 } 351 352 if node.Manager() != nil && !force { 353 msg := "You are attempting to leave cluster on a node that is participating as a manager. " 354 if c.isActiveManager() { 355 active, reachable, unreachable, err := c.managerStats() 356 if err == nil { 357 if active && reachable-2 <= unreachable { 358 if reachable == 1 && unreachable == 0 { 359 msg += "Leaving last manager will remove all current state of the cluster. Use `--force` to ignore this message. " 360 c.Unlock() 361 return fmt.Errorf(msg) 362 } 363 msg += fmt.Sprintf("Leaving cluster will leave you with %v managers out of %v. This means Raft quorum will be lost and your cluster will become inaccessible. ", reachable-1, reachable+unreachable) 364 } 365 } 366 } else { 367 msg += "Doing so may lose the consenus of your cluster. " 368 } 369 370 msg += "Only way to restore a cluster that has lost consensus is to reinitialize it with `--force-new-cluster`. Use `--force` to ignore this message." 371 c.Unlock() 372 return fmt.Errorf(msg) 373 } 374 c.cancelReconnect() 375 c.Unlock() 376 377 ctx, cancel := context.WithTimeout(context.Background(), 15*time.Second) 378 defer cancel() 379 if err := node.Stop(ctx); err != nil && !strings.Contains(err.Error(), "context canceled") { 380 return err 381 } 382 nodeID := node.NodeID() 383 for _, id := range c.config.Backend.ListContainersForNode(nodeID) { 384 if err := c.config.Backend.ContainerRm(id, &apitypes.ContainerRmConfig{ForceRemove: true}); err != nil { 385 logrus.Errorf("error removing %v: %v", id, err) 386 } 387 } 388 c.Lock() 389 defer c.Unlock() 390 c.node = nil 391 c.conn = nil 392 c.ready = false 393 c.configEvent <- struct{}{} 394 // todo: cleanup optional? 395 if err := c.clearState(); err != nil { 396 return err 397 } 398 return nil 399 } 400 401 func (c *Cluster) clearState() error { 402 if err := os.RemoveAll(c.root); err != nil { 403 return err 404 } 405 if err := os.MkdirAll(c.root, 0700); err != nil { 406 return err 407 } 408 c.config.Backend.SetClusterProvider(nil) 409 return nil 410 } 411 412 func (c *Cluster) getRequestContext() context.Context { // TODO: not needed when requests don't block on qourum lost 413 ctx, _ := context.WithTimeout(context.Background(), 5*time.Second) 414 return ctx 415 } 416 417 // Inspect retrives the confuguration properties of managed swarm cluster. 418 func (c *Cluster) Inspect() (types.Swarm, error) { 419 c.RLock() 420 defer c.RUnlock() 421 422 if !c.isActiveManager() { 423 return types.Swarm{}, ErrNoManager 424 } 425 426 swarm, err := getSwarm(c.getRequestContext(), c.client) 427 if err != nil { 428 return types.Swarm{}, err 429 } 430 431 if err != nil { 432 return types.Swarm{}, err 433 } 434 435 return convert.SwarmFromGRPC(*swarm), nil 436 } 437 438 // Update updates configuration of a managed swarm cluster. 439 func (c *Cluster) Update(version uint64, spec types.Spec) error { 440 c.RLock() 441 defer c.RUnlock() 442 443 if !c.isActiveManager() { 444 return ErrNoManager 445 } 446 447 swarmSpec, err := convert.SwarmSpecToGRPC(spec) 448 if err != nil { 449 return err 450 } 451 452 swarm, err := getSwarm(c.getRequestContext(), c.client) 453 if err != nil { 454 return err 455 } 456 457 _, err = c.client.UpdateCluster( 458 c.getRequestContext(), 459 &swarmapi.UpdateClusterRequest{ 460 ClusterID: swarm.ID, 461 Spec: &swarmSpec, 462 ClusterVersion: &swarmapi.Version{ 463 Index: version, 464 }, 465 }, 466 ) 467 return err 468 } 469 470 // IsManager returns true is Cluster is participating as a manager. 471 func (c *Cluster) IsManager() bool { 472 c.RLock() 473 defer c.RUnlock() 474 return c.isActiveManager() 475 } 476 477 // IsAgent returns true is Cluster is participating as a worker/agent. 478 func (c *Cluster) IsAgent() bool { 479 c.RLock() 480 defer c.RUnlock() 481 return c.ready 482 } 483 484 // GetListenAddress returns the listening address for current maanger's 485 // consensus and dispatcher APIs. 486 func (c *Cluster) GetListenAddress() string { 487 c.RLock() 488 defer c.RUnlock() 489 if c.conn != nil { 490 return c.listenAddr 491 } 492 return "" 493 } 494 495 // GetRemoteAddress returns a known advertise address of a remote maanger if 496 // available. 497 // todo: change to array/connect with info 498 func (c *Cluster) GetRemoteAddress() string { 499 c.RLock() 500 defer c.RUnlock() 501 return c.getRemoteAddress() 502 } 503 504 func (c *Cluster) getRemoteAddress() string { 505 if c.node == nil { 506 return "" 507 } 508 nodeID := c.node.NodeID() 509 for _, r := range c.node.Remotes() { 510 if r.NodeID != nodeID { 511 return r.Addr 512 } 513 } 514 return "" 515 } 516 517 // ListenClusterEvents returns a channel that receives messages on cluster 518 // participation changes. 519 // todo: make cancelable and accessible to multiple callers 520 func (c *Cluster) ListenClusterEvents() <-chan struct{} { 521 return c.configEvent 522 } 523 524 // Info returns information about the current cluster state. 525 func (c *Cluster) Info() types.Info { 526 var info types.Info 527 c.RLock() 528 defer c.RUnlock() 529 530 if c.node == nil { 531 info.LocalNodeState = types.LocalNodeStateInactive 532 if c.cancelDelay != nil { 533 info.LocalNodeState = types.LocalNodeStateError 534 } 535 } else { 536 info.LocalNodeState = types.LocalNodeStatePending 537 if c.ready == true { 538 info.LocalNodeState = types.LocalNodeStateActive 539 } 540 } 541 if c.err != nil { 542 info.Error = c.err.Error() 543 } 544 545 if c.isActiveManager() { 546 info.ControlAvailable = true 547 if r, err := c.client.ListNodes(c.getRequestContext(), &swarmapi.ListNodesRequest{}); err == nil { 548 info.Nodes = len(r.Nodes) 549 for _, n := range r.Nodes { 550 if n.ManagerStatus != nil { 551 info.Managers = info.Managers + 1 552 } 553 } 554 } 555 556 if swarm, err := getSwarm(c.getRequestContext(), c.client); err == nil && swarm != nil { 557 info.CACertHash = swarm.RootCA.CACertHash 558 } 559 } 560 561 if c.node != nil { 562 for _, r := range c.node.Remotes() { 563 info.RemoteManagers = append(info.RemoteManagers, types.Peer{NodeID: r.NodeID, Addr: r.Addr}) 564 } 565 info.NodeID = c.node.NodeID() 566 } 567 568 return info 569 } 570 571 // isActiveManager should not be called without a read lock 572 func (c *Cluster) isActiveManager() bool { 573 return c.conn != nil 574 } 575 576 // GetServices returns all services of a managed swarm cluster. 577 func (c *Cluster) GetServices(options apitypes.ServiceListOptions) ([]types.Service, error) { 578 c.RLock() 579 defer c.RUnlock() 580 581 if !c.isActiveManager() { 582 return nil, ErrNoManager 583 } 584 585 filters, err := newListServicesFilters(options.Filter) 586 if err != nil { 587 return nil, err 588 } 589 r, err := c.client.ListServices( 590 c.getRequestContext(), 591 &swarmapi.ListServicesRequest{Filters: filters}) 592 if err != nil { 593 return nil, err 594 } 595 596 var services []types.Service 597 598 for _, service := range r.Services { 599 services = append(services, convert.ServiceFromGRPC(*service)) 600 } 601 602 return services, nil 603 } 604 605 // CreateService creates a new service in a managed swarm cluster. 606 func (c *Cluster) CreateService(s types.ServiceSpec) (string, error) { 607 c.RLock() 608 defer c.RUnlock() 609 610 if !c.isActiveManager() { 611 return "", ErrNoManager 612 } 613 614 ctx := c.getRequestContext() 615 616 err := populateNetworkID(ctx, c.client, &s) 617 if err != nil { 618 return "", err 619 } 620 621 serviceSpec, err := convert.ServiceSpecToGRPC(s) 622 if err != nil { 623 return "", err 624 } 625 r, err := c.client.CreateService(ctx, &swarmapi.CreateServiceRequest{Spec: &serviceSpec}) 626 if err != nil { 627 return "", err 628 } 629 630 return r.Service.ID, nil 631 } 632 633 // GetService returns a service based on a ID or name. 634 func (c *Cluster) GetService(input string) (types.Service, error) { 635 c.RLock() 636 defer c.RUnlock() 637 638 if !c.isActiveManager() { 639 return types.Service{}, ErrNoManager 640 } 641 642 service, err := getService(c.getRequestContext(), c.client, input) 643 if err != nil { 644 return types.Service{}, err 645 } 646 return convert.ServiceFromGRPC(*service), nil 647 } 648 649 // UpdateService updates existing service to match new properties. 650 func (c *Cluster) UpdateService(serviceID string, version uint64, spec types.ServiceSpec) error { 651 c.RLock() 652 defer c.RUnlock() 653 654 if !c.isActiveManager() { 655 return ErrNoManager 656 } 657 658 serviceSpec, err := convert.ServiceSpecToGRPC(spec) 659 if err != nil { 660 return err 661 } 662 663 _, err = c.client.UpdateService( 664 c.getRequestContext(), 665 &swarmapi.UpdateServiceRequest{ 666 ServiceID: serviceID, 667 Spec: &serviceSpec, 668 ServiceVersion: &swarmapi.Version{ 669 Index: version, 670 }, 671 }, 672 ) 673 return err 674 } 675 676 // RemoveService removes a service from a managed swarm cluster. 677 func (c *Cluster) RemoveService(input string) error { 678 c.RLock() 679 defer c.RUnlock() 680 681 if !c.isActiveManager() { 682 return ErrNoManager 683 } 684 685 service, err := getService(c.getRequestContext(), c.client, input) 686 if err != nil { 687 return err 688 } 689 690 if _, err := c.client.RemoveService(c.getRequestContext(), &swarmapi.RemoveServiceRequest{ServiceID: service.ID}); err != nil { 691 return err 692 } 693 return nil 694 } 695 696 // GetNodes returns a list of all nodes known to a cluster. 697 func (c *Cluster) GetNodes(options apitypes.NodeListOptions) ([]types.Node, error) { 698 c.RLock() 699 defer c.RUnlock() 700 701 if !c.isActiveManager() { 702 return nil, ErrNoManager 703 } 704 705 filters, err := newListNodesFilters(options.Filter) 706 if err != nil { 707 return nil, err 708 } 709 r, err := c.client.ListNodes( 710 c.getRequestContext(), 711 &swarmapi.ListNodesRequest{Filters: filters}) 712 if err != nil { 713 return nil, err 714 } 715 716 nodes := []types.Node{} 717 718 for _, node := range r.Nodes { 719 nodes = append(nodes, convert.NodeFromGRPC(*node)) 720 } 721 return nodes, nil 722 } 723 724 // GetNode returns a node based on a ID or name. 725 func (c *Cluster) GetNode(input string) (types.Node, error) { 726 c.RLock() 727 defer c.RUnlock() 728 729 if !c.isActiveManager() { 730 return types.Node{}, ErrNoManager 731 } 732 733 node, err := getNode(c.getRequestContext(), c.client, input) 734 if err != nil { 735 return types.Node{}, err 736 } 737 return convert.NodeFromGRPC(*node), nil 738 } 739 740 // UpdateNode updates existing nodes properties. 741 func (c *Cluster) UpdateNode(nodeID string, version uint64, spec types.NodeSpec) error { 742 c.RLock() 743 defer c.RUnlock() 744 745 if !c.isActiveManager() { 746 return ErrNoManager 747 } 748 749 nodeSpec, err := convert.NodeSpecToGRPC(spec) 750 if err != nil { 751 return err 752 } 753 754 _, err = c.client.UpdateNode( 755 c.getRequestContext(), 756 &swarmapi.UpdateNodeRequest{ 757 NodeID: nodeID, 758 Spec: &nodeSpec, 759 NodeVersion: &swarmapi.Version{ 760 Index: version, 761 }, 762 }, 763 ) 764 return err 765 } 766 767 // RemoveNode removes a node from a cluster 768 func (c *Cluster) RemoveNode(input string) error { 769 c.RLock() 770 defer c.RUnlock() 771 772 if !c.isActiveManager() { 773 return ErrNoManager 774 } 775 776 ctx := c.getRequestContext() 777 778 node, err := getNode(ctx, c.client, input) 779 if err != nil { 780 return err 781 } 782 783 if _, err := c.client.RemoveNode(ctx, &swarmapi.RemoveNodeRequest{NodeID: node.ID}); err != nil { 784 return err 785 } 786 return nil 787 } 788 789 // GetTasks returns a list of tasks matching the filter options. 790 func (c *Cluster) GetTasks(options apitypes.TaskListOptions) ([]types.Task, error) { 791 c.RLock() 792 defer c.RUnlock() 793 794 if !c.isActiveManager() { 795 return nil, ErrNoManager 796 } 797 798 filters, err := newListTasksFilters(options.Filter) 799 if err != nil { 800 return nil, err 801 } 802 r, err := c.client.ListTasks( 803 c.getRequestContext(), 804 &swarmapi.ListTasksRequest{Filters: filters}) 805 if err != nil { 806 return nil, err 807 } 808 809 tasks := []types.Task{} 810 811 for _, task := range r.Tasks { 812 tasks = append(tasks, convert.TaskFromGRPC(*task)) 813 } 814 return tasks, nil 815 } 816 817 // GetTask returns a task by an ID. 818 func (c *Cluster) GetTask(input string) (types.Task, error) { 819 c.RLock() 820 defer c.RUnlock() 821 822 if !c.isActiveManager() { 823 return types.Task{}, ErrNoManager 824 } 825 826 task, err := getTask(c.getRequestContext(), c.client, input) 827 if err != nil { 828 return types.Task{}, err 829 } 830 return convert.TaskFromGRPC(*task), nil 831 } 832 833 // GetNetwork returns a cluster network by ID. 834 func (c *Cluster) GetNetwork(input string) (apitypes.NetworkResource, error) { 835 c.RLock() 836 defer c.RUnlock() 837 838 if !c.isActiveManager() { 839 return apitypes.NetworkResource{}, ErrNoManager 840 } 841 842 network, err := getNetwork(c.getRequestContext(), c.client, input) 843 if err != nil { 844 return apitypes.NetworkResource{}, err 845 } 846 return convert.BasicNetworkFromGRPC(*network), nil 847 } 848 849 // GetNetworks returns all current cluster managed networks. 850 func (c *Cluster) GetNetworks() ([]apitypes.NetworkResource, error) { 851 c.RLock() 852 defer c.RUnlock() 853 854 if !c.isActiveManager() { 855 return nil, ErrNoManager 856 } 857 858 r, err := c.client.ListNetworks(c.getRequestContext(), &swarmapi.ListNetworksRequest{}) 859 if err != nil { 860 return nil, err 861 } 862 863 var networks []apitypes.NetworkResource 864 865 for _, network := range r.Networks { 866 networks = append(networks, convert.BasicNetworkFromGRPC(*network)) 867 } 868 869 return networks, nil 870 } 871 872 // CreateNetwork creates a new cluster managed network. 873 func (c *Cluster) CreateNetwork(s apitypes.NetworkCreateRequest) (string, error) { 874 c.RLock() 875 defer c.RUnlock() 876 877 if !c.isActiveManager() { 878 return "", ErrNoManager 879 } 880 881 if runconfig.IsPreDefinedNetwork(s.Name) { 882 err := fmt.Errorf("%s is a pre-defined network and cannot be created", s.Name) 883 return "", errors.NewRequestForbiddenError(err) 884 } 885 886 networkSpec := convert.BasicNetworkCreateToGRPC(s) 887 r, err := c.client.CreateNetwork(c.getRequestContext(), &swarmapi.CreateNetworkRequest{Spec: &networkSpec}) 888 if err != nil { 889 return "", err 890 } 891 892 return r.Network.ID, nil 893 } 894 895 // RemoveNetwork removes a cluster network. 896 func (c *Cluster) RemoveNetwork(input string) error { 897 c.RLock() 898 defer c.RUnlock() 899 900 if !c.isActiveManager() { 901 return ErrNoManager 902 } 903 904 network, err := getNetwork(c.getRequestContext(), c.client, input) 905 if err != nil { 906 return err 907 } 908 909 if _, err := c.client.RemoveNetwork(c.getRequestContext(), &swarmapi.RemoveNetworkRequest{NetworkID: network.ID}); err != nil { 910 return err 911 } 912 return nil 913 } 914 915 func populateNetworkID(ctx context.Context, c swarmapi.ControlClient, s *types.ServiceSpec) error { 916 for i, n := range s.Networks { 917 apiNetwork, err := getNetwork(ctx, c, n.Target) 918 if err != nil { 919 return err 920 } 921 s.Networks[i] = types.NetworkAttachmentConfig{Target: apiNetwork.ID} 922 } 923 return nil 924 } 925 926 func getNetwork(ctx context.Context, c swarmapi.ControlClient, input string) (*swarmapi.Network, error) { 927 // GetNetwork to match via full ID. 928 rg, err := c.GetNetwork(ctx, &swarmapi.GetNetworkRequest{NetworkID: input}) 929 if err != nil { 930 // If any error (including NotFound), ListNetworks to match via ID prefix and full name. 931 rl, err := c.ListNetworks(ctx, &swarmapi.ListNetworksRequest{Filters: &swarmapi.ListNetworksRequest_Filters{Names: []string{input}}}) 932 if err != nil || len(rl.Networks) == 0 { 933 rl, err = c.ListNetworks(ctx, &swarmapi.ListNetworksRequest{Filters: &swarmapi.ListNetworksRequest_Filters{IDPrefixes: []string{input}}}) 934 } 935 936 if err != nil { 937 return nil, err 938 } 939 940 if len(rl.Networks) == 0 { 941 return nil, fmt.Errorf("network %s not found", input) 942 } 943 944 if l := len(rl.Networks); l > 1 { 945 return nil, fmt.Errorf("network %s is ambigious (%d matches found)", input, l) 946 } 947 948 return rl.Networks[0], nil 949 } 950 return rg.Network, nil 951 } 952 953 // Cleanup stops active swarm node. This is run before daemon shutdown. 954 func (c *Cluster) Cleanup() { 955 c.Lock() 956 node := c.node 957 if node == nil { 958 c.Unlock() 959 return 960 } 961 962 if c.isActiveManager() { 963 active, reachable, unreachable, err := c.managerStats() 964 if err == nil { 965 singlenode := active && reachable == 1 && unreachable == 0 966 if active && !singlenode && reachable-2 <= unreachable { 967 logrus.Errorf("Leaving cluster with %v managers left out of %v. Raft quorum will be lost.", reachable-1, reachable+unreachable) 968 } 969 } 970 } 971 c.cancelReconnect() 972 c.Unlock() 973 ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) 974 defer cancel() 975 if err := node.Stop(ctx); err != nil { 976 logrus.Errorf("error cleaning up cluster: %v", err) 977 } 978 c.Lock() 979 c.node = nil 980 c.ready = false 981 c.conn = nil 982 c.Unlock() 983 } 984 985 func (c *Cluster) managerStats() (current bool, reachable int, unreachable int, err error) { 986 ctx, _ := context.WithTimeout(context.Background(), 3*time.Second) 987 nodes, err := c.client.ListNodes(ctx, &swarmapi.ListNodesRequest{}) 988 if err != nil { 989 return false, 0, 0, err 990 } 991 for _, n := range nodes.Nodes { 992 if n.ManagerStatus != nil { 993 if n.ManagerStatus.Reachability == swarmapi.RaftMemberStatus_REACHABLE { 994 reachable++ 995 if n.ID == c.node.NodeID() { 996 current = true 997 } 998 } 999 if n.ManagerStatus.Reachability == swarmapi.RaftMemberStatus_UNREACHABLE { 1000 unreachable++ 1001 } 1002 } 1003 } 1004 return 1005 } 1006 1007 func initAcceptancePolicy(node *swarmagent.Node, acceptancePolicy types.AcceptancePolicy) error { 1008 ctx, _ := context.WithTimeout(context.Background(), 5*time.Second) 1009 for conn := range node.ListenControlSocket(ctx) { 1010 if ctx.Err() != nil { 1011 return ctx.Err() 1012 } 1013 if conn != nil { 1014 client := swarmapi.NewControlClient(conn) 1015 var cluster *swarmapi.Cluster 1016 for i := 0; ; i++ { 1017 lcr, err := client.ListClusters(ctx, &swarmapi.ListClustersRequest{}) 1018 if err != nil { 1019 return fmt.Errorf("error on listing clusters: %v", err) 1020 } 1021 if len(lcr.Clusters) == 0 { 1022 if i < 10 { 1023 time.Sleep(200 * time.Millisecond) 1024 continue 1025 } 1026 return fmt.Errorf("empty list of clusters was returned") 1027 } 1028 cluster = lcr.Clusters[0] 1029 break 1030 } 1031 spec := &cluster.Spec 1032 1033 if err := convert.SwarmSpecUpdateAcceptancePolicy(spec, acceptancePolicy); err != nil { 1034 return fmt.Errorf("error updating cluster settings: %v", err) 1035 } 1036 _, err := client.UpdateCluster(ctx, &swarmapi.UpdateClusterRequest{ 1037 ClusterID: cluster.ID, 1038 ClusterVersion: &cluster.Meta.Version, 1039 Spec: spec, 1040 }) 1041 if err != nil { 1042 return fmt.Errorf("error updating cluster settings: %v", err) 1043 } 1044 return nil 1045 } 1046 } 1047 return ctx.Err() 1048 }