github.com/kobeld/docker@v1.12.0-rc1/daemon/cluster/cluster.go (about)

     1  package cluster
     2  
     3  import (
     4  	"encoding/json"
     5  	"fmt"
     6  	"io/ioutil"
     7  	"os"
     8  	"path/filepath"
     9  	"strings"
    10  	"sync"
    11  	"time"
    12  
    13  	"google.golang.org/grpc"
    14  
    15  	"github.com/Sirupsen/logrus"
    16  	"github.com/docker/docker/daemon/cluster/convert"
    17  	executorpkg "github.com/docker/docker/daemon/cluster/executor"
    18  	"github.com/docker/docker/daemon/cluster/executor/container"
    19  	"github.com/docker/docker/errors"
    20  	"github.com/docker/docker/pkg/ioutils"
    21  	"github.com/docker/docker/runconfig"
    22  	apitypes "github.com/docker/engine-api/types"
    23  	types "github.com/docker/engine-api/types/swarm"
    24  	swarmagent "github.com/docker/swarmkit/agent"
    25  	swarmapi "github.com/docker/swarmkit/api"
    26  	"golang.org/x/net/context"
    27  )
    28  
    29  const swarmDirName = "swarm"
    30  const controlSocket = "control.sock"
    31  const swarmConnectTimeout = 10 * time.Second
    32  const stateFile = "docker-state.json"
    33  
    34  const (
    35  	initialReconnectDelay = 100 * time.Millisecond
    36  	maxReconnectDelay     = 10 * time.Second
    37  )
    38  
    39  // ErrNoManager is returned then a manager-only function is called on non-manager
    40  var ErrNoManager = fmt.Errorf("this node is not participating as a Swarm manager")
    41  
    42  // ErrNoSwarm is returned on leaving a cluster that was never initialized
    43  var ErrNoSwarm = fmt.Errorf("this node is not part of Swarm")
    44  
    45  // ErrSwarmExists is returned on initialize or join request for a cluster that has already been activated
    46  var ErrSwarmExists = fmt.Errorf("this node is already part of a Swarm")
    47  
    48  // ErrSwarmJoinTimeoutReached is returned when cluster join could not complete before timeout was reached.
    49  var ErrSwarmJoinTimeoutReached = fmt.Errorf("timeout reached before node was joined")
    50  
    51  type state struct {
    52  	ListenAddr string
    53  }
    54  
    55  // Config provides values for Cluster.
    56  type Config struct {
    57  	Root    string
    58  	Name    string
    59  	Backend executorpkg.Backend
    60  }
    61  
    62  // Cluster provides capabilities to pariticipate in a cluster as worker or a
    63  // manager and a worker.
    64  type Cluster struct {
    65  	sync.RWMutex
    66  	root           string
    67  	config         Config
    68  	configEvent    chan struct{} // todo: make this array and goroutine safe
    69  	node           *swarmagent.Node
    70  	conn           *grpc.ClientConn
    71  	client         swarmapi.ControlClient
    72  	ready          bool
    73  	listenAddr     string
    74  	err            error
    75  	reconnectDelay time.Duration
    76  	stop           bool
    77  	cancelDelay    func()
    78  }
    79  
    80  // New creates a new Cluster instance using provided config.
    81  func New(config Config) (*Cluster, error) {
    82  	root := filepath.Join(config.Root, swarmDirName)
    83  	if err := os.MkdirAll(root, 0700); err != nil {
    84  		return nil, err
    85  	}
    86  	c := &Cluster{
    87  		root:           root,
    88  		config:         config,
    89  		configEvent:    make(chan struct{}, 10),
    90  		reconnectDelay: initialReconnectDelay,
    91  	}
    92  
    93  	dt, err := ioutil.ReadFile(filepath.Join(root, stateFile))
    94  	if err != nil {
    95  		if os.IsNotExist(err) {
    96  			return c, nil
    97  		}
    98  		return nil, err
    99  	}
   100  
   101  	var st state
   102  	if err := json.Unmarshal(dt, &st); err != nil {
   103  		return nil, err
   104  	}
   105  
   106  	n, ctx, err := c.startNewNode(false, st.ListenAddr, "", "", "", false)
   107  	if err != nil {
   108  		return nil, err
   109  	}
   110  
   111  	select {
   112  	case <-time.After(swarmConnectTimeout):
   113  		logrus.Errorf("swarm component could not be started before timeout was reached")
   114  	case <-n.Ready(context.Background()):
   115  	case <-ctx.Done():
   116  	}
   117  	if ctx.Err() != nil {
   118  		return nil, fmt.Errorf("swarm component could not be started")
   119  	}
   120  	go c.reconnectOnFailure(ctx)
   121  	return c, nil
   122  }
   123  
   124  func (c *Cluster) saveState() error {
   125  	dt, err := json.Marshal(state{ListenAddr: c.listenAddr})
   126  	if err != nil {
   127  		return err
   128  	}
   129  	return ioutils.AtomicWriteFile(filepath.Join(c.root, stateFile), dt, 0600)
   130  }
   131  
   132  func (c *Cluster) reconnectOnFailure(ctx context.Context) {
   133  	for {
   134  		<-ctx.Done()
   135  		c.Lock()
   136  		if c.stop || c.node != nil {
   137  			c.Unlock()
   138  			return
   139  		}
   140  		c.reconnectDelay *= 2
   141  		if c.reconnectDelay > maxReconnectDelay {
   142  			c.reconnectDelay = maxReconnectDelay
   143  		}
   144  		logrus.Warnf("Restarting swarm in %.2f seconds", c.reconnectDelay.Seconds())
   145  		delayCtx, cancel := context.WithTimeout(context.Background(), c.reconnectDelay)
   146  		c.cancelDelay = cancel
   147  		c.Unlock()
   148  		<-delayCtx.Done()
   149  		if delayCtx.Err() != context.DeadlineExceeded {
   150  			return
   151  		}
   152  		c.Lock()
   153  		if c.node != nil {
   154  			c.Unlock()
   155  			return
   156  		}
   157  		var err error
   158  		_, ctx, err = c.startNewNode(false, c.listenAddr, c.getRemoteAddress(), "", "", false)
   159  		if err != nil {
   160  			c.err = err
   161  			ctx = delayCtx
   162  		}
   163  		c.Unlock()
   164  	}
   165  }
   166  
   167  func (c *Cluster) startNewNode(forceNewCluster bool, listenAddr, joinAddr, secret, cahash string, ismanager bool) (*swarmagent.Node, context.Context, error) {
   168  	if err := c.config.Backend.IsSwarmCompatible(); err != nil {
   169  		return nil, nil, err
   170  	}
   171  	c.node = nil
   172  	c.cancelDelay = nil
   173  	node, err := swarmagent.NewNode(&swarmagent.NodeConfig{
   174  		Hostname:         c.config.Name,
   175  		ForceNewCluster:  forceNewCluster,
   176  		ListenControlAPI: filepath.Join(c.root, controlSocket),
   177  		ListenRemoteAPI:  listenAddr,
   178  		JoinAddr:         joinAddr,
   179  		StateDir:         c.root,
   180  		CAHash:           cahash,
   181  		Secret:           secret,
   182  		Executor:         container.NewExecutor(c.config.Backend),
   183  		HeartbeatTick:    1,
   184  		ElectionTick:     3,
   185  		IsManager:        ismanager,
   186  	})
   187  	if err != nil {
   188  		return nil, nil, err
   189  	}
   190  	ctx, cancel := context.WithCancel(context.Background())
   191  	if err := node.Start(ctx); err != nil {
   192  		return nil, nil, err
   193  	}
   194  
   195  	c.node = node
   196  	c.listenAddr = listenAddr
   197  	c.saveState()
   198  	c.config.Backend.SetClusterProvider(c)
   199  	go func() {
   200  		err := node.Err(ctx)
   201  		if err != nil {
   202  			logrus.Errorf("cluster exited with error: %v", err)
   203  		}
   204  		c.Lock()
   205  		c.conn = nil
   206  		c.client = nil
   207  		c.node = nil
   208  		c.ready = false
   209  		c.err = err
   210  		c.Unlock()
   211  		cancel()
   212  	}()
   213  
   214  	go func() {
   215  		select {
   216  		case <-node.Ready(context.Background()):
   217  			c.Lock()
   218  			c.reconnectDelay = initialReconnectDelay
   219  			c.Unlock()
   220  		case <-ctx.Done():
   221  		}
   222  		if ctx.Err() == nil {
   223  			c.Lock()
   224  			c.ready = true
   225  			c.err = nil
   226  			c.Unlock()
   227  		}
   228  		c.configEvent <- struct{}{}
   229  	}()
   230  
   231  	go func() {
   232  		for conn := range node.ListenControlSocket(ctx) {
   233  			c.Lock()
   234  			if c.conn != conn {
   235  				c.client = swarmapi.NewControlClient(conn)
   236  			}
   237  			if c.conn != nil {
   238  				c.client = nil
   239  			}
   240  			c.conn = conn
   241  			c.Unlock()
   242  			c.configEvent <- struct{}{}
   243  		}
   244  	}()
   245  
   246  	return node, ctx, nil
   247  }
   248  
   249  // Init initializes new cluster from user provided request.
   250  func (c *Cluster) Init(req types.InitRequest) (string, error) {
   251  	c.Lock()
   252  	if c.node != nil {
   253  		c.Unlock()
   254  		if !req.ForceNewCluster {
   255  			return "", ErrSwarmExists
   256  		}
   257  		ctx, cancel := context.WithTimeout(context.Background(), 15*time.Second)
   258  		defer cancel()
   259  		if err := c.node.Stop(ctx); err != nil && !strings.Contains(err.Error(), "context canceled") {
   260  			return "", err
   261  		}
   262  		c.Lock()
   263  		c.node = nil
   264  		c.conn = nil
   265  		c.ready = false
   266  	}
   267  	// todo: check current state existing
   268  	n, ctx, err := c.startNewNode(req.ForceNewCluster, req.ListenAddr, "", "", "", false)
   269  	if err != nil {
   270  		c.Unlock()
   271  		return "", err
   272  	}
   273  	c.Unlock()
   274  
   275  	select {
   276  	case <-n.Ready(context.Background()):
   277  		if err := initAcceptancePolicy(n, req.Spec.AcceptancePolicy); err != nil {
   278  			return "", err
   279  		}
   280  		go c.reconnectOnFailure(ctx)
   281  		return n.NodeID(), nil
   282  	case <-ctx.Done():
   283  		c.RLock()
   284  		defer c.RUnlock()
   285  		if c.err != nil {
   286  			if !req.ForceNewCluster { // if failure on first attempt don't keep state
   287  				if err := c.clearState(); err != nil {
   288  					return "", err
   289  				}
   290  			}
   291  			return "", c.err
   292  		}
   293  		return "", ctx.Err()
   294  	}
   295  }
   296  
   297  // Join makes current Cluster part of an existing swarm cluster.
   298  func (c *Cluster) Join(req types.JoinRequest) error {
   299  	c.Lock()
   300  	if c.node != nil {
   301  		c.Unlock()
   302  		return ErrSwarmExists
   303  	}
   304  	// todo: check current state existing
   305  	if len(req.RemoteAddrs) == 0 {
   306  		return fmt.Errorf("at least 1 RemoteAddr is required to join")
   307  	}
   308  	n, ctx, err := c.startNewNode(false, req.ListenAddr, req.RemoteAddrs[0], req.Secret, req.CACertHash, req.Manager)
   309  	if err != nil {
   310  		c.Unlock()
   311  		return err
   312  	}
   313  	c.Unlock()
   314  
   315  	select {
   316  	case <-time.After(swarmConnectTimeout):
   317  		go c.reconnectOnFailure(ctx)
   318  		if nodeid := n.NodeID(); nodeid != "" {
   319  			return fmt.Errorf("Timeout reached before node was joined. Your cluster settings may be preventing this node from automatically joining. To accept this node into cluster run `docker node accept %v` in an existing cluster manager", nodeid)
   320  		}
   321  		return ErrSwarmJoinTimeoutReached
   322  	case <-n.Ready(context.Background()):
   323  		go c.reconnectOnFailure(ctx)
   324  		return nil
   325  	case <-ctx.Done():
   326  		c.RLock()
   327  		defer c.RUnlock()
   328  		if c.err != nil {
   329  			return c.err
   330  		}
   331  		return ctx.Err()
   332  	}
   333  }
   334  
   335  func (c *Cluster) cancelReconnect() {
   336  	c.stop = true
   337  	if c.cancelDelay != nil {
   338  		c.cancelDelay()
   339  		c.cancelDelay = nil
   340  	}
   341  }
   342  
   343  // Leave shuts down Cluster and removes current state.
   344  func (c *Cluster) Leave(force bool) error {
   345  	c.Lock()
   346  	node := c.node
   347  	if node == nil {
   348  		c.Unlock()
   349  		return ErrNoSwarm
   350  	}
   351  
   352  	if node.Manager() != nil && !force {
   353  		msg := "You are attempting to leave cluster on a node that is participating as a manager. "
   354  		if c.isActiveManager() {
   355  			active, reachable, unreachable, err := c.managerStats()
   356  			if err == nil {
   357  				if active && reachable-2 <= unreachable {
   358  					if reachable == 1 && unreachable == 0 {
   359  						msg += "Leaving last manager will remove all current state of the cluster. Use `--force` to ignore this message. "
   360  						c.Unlock()
   361  						return fmt.Errorf(msg)
   362  					}
   363  					msg += fmt.Sprintf("Leaving cluster will leave you with  %v managers out of %v. This means Raft quorum will be lost and your cluster will become inaccessible. ", reachable-1, reachable+unreachable)
   364  				}
   365  			}
   366  		} else {
   367  			msg += "Doing so may lose the consenus of your cluster. "
   368  		}
   369  
   370  		msg += "Only way to restore a cluster that has lost consensus is to reinitialize it with `--force-new-cluster`. Use `--force` to ignore this message."
   371  		c.Unlock()
   372  		return fmt.Errorf(msg)
   373  	}
   374  	c.cancelReconnect()
   375  	c.Unlock()
   376  
   377  	ctx, cancel := context.WithTimeout(context.Background(), 15*time.Second)
   378  	defer cancel()
   379  	if err := node.Stop(ctx); err != nil && !strings.Contains(err.Error(), "context canceled") {
   380  		return err
   381  	}
   382  	nodeID := node.NodeID()
   383  	for _, id := range c.config.Backend.ListContainersForNode(nodeID) {
   384  		if err := c.config.Backend.ContainerRm(id, &apitypes.ContainerRmConfig{ForceRemove: true}); err != nil {
   385  			logrus.Errorf("error removing %v: %v", id, err)
   386  		}
   387  	}
   388  	c.Lock()
   389  	defer c.Unlock()
   390  	c.node = nil
   391  	c.conn = nil
   392  	c.ready = false
   393  	c.configEvent <- struct{}{}
   394  	// todo: cleanup optional?
   395  	if err := c.clearState(); err != nil {
   396  		return err
   397  	}
   398  	return nil
   399  }
   400  
   401  func (c *Cluster) clearState() error {
   402  	if err := os.RemoveAll(c.root); err != nil {
   403  		return err
   404  	}
   405  	if err := os.MkdirAll(c.root, 0700); err != nil {
   406  		return err
   407  	}
   408  	c.config.Backend.SetClusterProvider(nil)
   409  	return nil
   410  }
   411  
   412  func (c *Cluster) getRequestContext() context.Context { // TODO: not needed when requests don't block on qourum lost
   413  	ctx, _ := context.WithTimeout(context.Background(), 5*time.Second)
   414  	return ctx
   415  }
   416  
   417  // Inspect retrives the confuguration properties of managed swarm cluster.
   418  func (c *Cluster) Inspect() (types.Swarm, error) {
   419  	c.RLock()
   420  	defer c.RUnlock()
   421  
   422  	if !c.isActiveManager() {
   423  		return types.Swarm{}, ErrNoManager
   424  	}
   425  
   426  	swarm, err := getSwarm(c.getRequestContext(), c.client)
   427  	if err != nil {
   428  		return types.Swarm{}, err
   429  	}
   430  
   431  	if err != nil {
   432  		return types.Swarm{}, err
   433  	}
   434  
   435  	return convert.SwarmFromGRPC(*swarm), nil
   436  }
   437  
   438  // Update updates configuration of a managed swarm cluster.
   439  func (c *Cluster) Update(version uint64, spec types.Spec) error {
   440  	c.RLock()
   441  	defer c.RUnlock()
   442  
   443  	if !c.isActiveManager() {
   444  		return ErrNoManager
   445  	}
   446  
   447  	swarmSpec, err := convert.SwarmSpecToGRPC(spec)
   448  	if err != nil {
   449  		return err
   450  	}
   451  
   452  	swarm, err := getSwarm(c.getRequestContext(), c.client)
   453  	if err != nil {
   454  		return err
   455  	}
   456  
   457  	_, err = c.client.UpdateCluster(
   458  		c.getRequestContext(),
   459  		&swarmapi.UpdateClusterRequest{
   460  			ClusterID: swarm.ID,
   461  			Spec:      &swarmSpec,
   462  			ClusterVersion: &swarmapi.Version{
   463  				Index: version,
   464  			},
   465  		},
   466  	)
   467  	return err
   468  }
   469  
   470  // IsManager returns true is Cluster is participating as a manager.
   471  func (c *Cluster) IsManager() bool {
   472  	c.RLock()
   473  	defer c.RUnlock()
   474  	return c.isActiveManager()
   475  }
   476  
   477  // IsAgent returns true is Cluster is participating as a worker/agent.
   478  func (c *Cluster) IsAgent() bool {
   479  	c.RLock()
   480  	defer c.RUnlock()
   481  	return c.ready
   482  }
   483  
   484  // GetListenAddress returns the listening address for current maanger's
   485  // consensus and dispatcher APIs.
   486  func (c *Cluster) GetListenAddress() string {
   487  	c.RLock()
   488  	defer c.RUnlock()
   489  	if c.conn != nil {
   490  		return c.listenAddr
   491  	}
   492  	return ""
   493  }
   494  
   495  // GetRemoteAddress returns a known advertise address of a remote maanger if
   496  // available.
   497  // todo: change to array/connect with info
   498  func (c *Cluster) GetRemoteAddress() string {
   499  	c.RLock()
   500  	defer c.RUnlock()
   501  	return c.getRemoteAddress()
   502  }
   503  
   504  func (c *Cluster) getRemoteAddress() string {
   505  	if c.node == nil {
   506  		return ""
   507  	}
   508  	nodeID := c.node.NodeID()
   509  	for _, r := range c.node.Remotes() {
   510  		if r.NodeID != nodeID {
   511  			return r.Addr
   512  		}
   513  	}
   514  	return ""
   515  }
   516  
   517  // ListenClusterEvents returns a channel that receives messages on cluster
   518  // participation changes.
   519  // todo: make cancelable and accessible to multiple callers
   520  func (c *Cluster) ListenClusterEvents() <-chan struct{} {
   521  	return c.configEvent
   522  }
   523  
   524  // Info returns information about the current cluster state.
   525  func (c *Cluster) Info() types.Info {
   526  	var info types.Info
   527  	c.RLock()
   528  	defer c.RUnlock()
   529  
   530  	if c.node == nil {
   531  		info.LocalNodeState = types.LocalNodeStateInactive
   532  		if c.cancelDelay != nil {
   533  			info.LocalNodeState = types.LocalNodeStateError
   534  		}
   535  	} else {
   536  		info.LocalNodeState = types.LocalNodeStatePending
   537  		if c.ready == true {
   538  			info.LocalNodeState = types.LocalNodeStateActive
   539  		}
   540  	}
   541  	if c.err != nil {
   542  		info.Error = c.err.Error()
   543  	}
   544  
   545  	if c.isActiveManager() {
   546  		info.ControlAvailable = true
   547  		if r, err := c.client.ListNodes(c.getRequestContext(), &swarmapi.ListNodesRequest{}); err == nil {
   548  			info.Nodes = len(r.Nodes)
   549  			for _, n := range r.Nodes {
   550  				if n.ManagerStatus != nil {
   551  					info.Managers = info.Managers + 1
   552  				}
   553  			}
   554  		}
   555  
   556  		if swarm, err := getSwarm(c.getRequestContext(), c.client); err == nil && swarm != nil {
   557  			info.CACertHash = swarm.RootCA.CACertHash
   558  		}
   559  	}
   560  
   561  	if c.node != nil {
   562  		for _, r := range c.node.Remotes() {
   563  			info.RemoteManagers = append(info.RemoteManagers, types.Peer{NodeID: r.NodeID, Addr: r.Addr})
   564  		}
   565  		info.NodeID = c.node.NodeID()
   566  	}
   567  
   568  	return info
   569  }
   570  
   571  // isActiveManager should not be called without a read lock
   572  func (c *Cluster) isActiveManager() bool {
   573  	return c.conn != nil
   574  }
   575  
   576  // GetServices returns all services of a managed swarm cluster.
   577  func (c *Cluster) GetServices(options apitypes.ServiceListOptions) ([]types.Service, error) {
   578  	c.RLock()
   579  	defer c.RUnlock()
   580  
   581  	if !c.isActiveManager() {
   582  		return nil, ErrNoManager
   583  	}
   584  
   585  	filters, err := newListServicesFilters(options.Filter)
   586  	if err != nil {
   587  		return nil, err
   588  	}
   589  	r, err := c.client.ListServices(
   590  		c.getRequestContext(),
   591  		&swarmapi.ListServicesRequest{Filters: filters})
   592  	if err != nil {
   593  		return nil, err
   594  	}
   595  
   596  	var services []types.Service
   597  
   598  	for _, service := range r.Services {
   599  		services = append(services, convert.ServiceFromGRPC(*service))
   600  	}
   601  
   602  	return services, nil
   603  }
   604  
   605  // CreateService creates a new service in a managed swarm cluster.
   606  func (c *Cluster) CreateService(s types.ServiceSpec) (string, error) {
   607  	c.RLock()
   608  	defer c.RUnlock()
   609  
   610  	if !c.isActiveManager() {
   611  		return "", ErrNoManager
   612  	}
   613  
   614  	ctx := c.getRequestContext()
   615  
   616  	err := populateNetworkID(ctx, c.client, &s)
   617  	if err != nil {
   618  		return "", err
   619  	}
   620  
   621  	serviceSpec, err := convert.ServiceSpecToGRPC(s)
   622  	if err != nil {
   623  		return "", err
   624  	}
   625  	r, err := c.client.CreateService(ctx, &swarmapi.CreateServiceRequest{Spec: &serviceSpec})
   626  	if err != nil {
   627  		return "", err
   628  	}
   629  
   630  	return r.Service.ID, nil
   631  }
   632  
   633  // GetService returns a service based on a ID or name.
   634  func (c *Cluster) GetService(input string) (types.Service, error) {
   635  	c.RLock()
   636  	defer c.RUnlock()
   637  
   638  	if !c.isActiveManager() {
   639  		return types.Service{}, ErrNoManager
   640  	}
   641  
   642  	service, err := getService(c.getRequestContext(), c.client, input)
   643  	if err != nil {
   644  		return types.Service{}, err
   645  	}
   646  	return convert.ServiceFromGRPC(*service), nil
   647  }
   648  
   649  // UpdateService updates existing service to match new properties.
   650  func (c *Cluster) UpdateService(serviceID string, version uint64, spec types.ServiceSpec) error {
   651  	c.RLock()
   652  	defer c.RUnlock()
   653  
   654  	if !c.isActiveManager() {
   655  		return ErrNoManager
   656  	}
   657  
   658  	serviceSpec, err := convert.ServiceSpecToGRPC(spec)
   659  	if err != nil {
   660  		return err
   661  	}
   662  
   663  	_, err = c.client.UpdateService(
   664  		c.getRequestContext(),
   665  		&swarmapi.UpdateServiceRequest{
   666  			ServiceID: serviceID,
   667  			Spec:      &serviceSpec,
   668  			ServiceVersion: &swarmapi.Version{
   669  				Index: version,
   670  			},
   671  		},
   672  	)
   673  	return err
   674  }
   675  
   676  // RemoveService removes a service from a managed swarm cluster.
   677  func (c *Cluster) RemoveService(input string) error {
   678  	c.RLock()
   679  	defer c.RUnlock()
   680  
   681  	if !c.isActiveManager() {
   682  		return ErrNoManager
   683  	}
   684  
   685  	service, err := getService(c.getRequestContext(), c.client, input)
   686  	if err != nil {
   687  		return err
   688  	}
   689  
   690  	if _, err := c.client.RemoveService(c.getRequestContext(), &swarmapi.RemoveServiceRequest{ServiceID: service.ID}); err != nil {
   691  		return err
   692  	}
   693  	return nil
   694  }
   695  
   696  // GetNodes returns a list of all nodes known to a cluster.
   697  func (c *Cluster) GetNodes(options apitypes.NodeListOptions) ([]types.Node, error) {
   698  	c.RLock()
   699  	defer c.RUnlock()
   700  
   701  	if !c.isActiveManager() {
   702  		return nil, ErrNoManager
   703  	}
   704  
   705  	filters, err := newListNodesFilters(options.Filter)
   706  	if err != nil {
   707  		return nil, err
   708  	}
   709  	r, err := c.client.ListNodes(
   710  		c.getRequestContext(),
   711  		&swarmapi.ListNodesRequest{Filters: filters})
   712  	if err != nil {
   713  		return nil, err
   714  	}
   715  
   716  	nodes := []types.Node{}
   717  
   718  	for _, node := range r.Nodes {
   719  		nodes = append(nodes, convert.NodeFromGRPC(*node))
   720  	}
   721  	return nodes, nil
   722  }
   723  
   724  // GetNode returns a node based on a ID or name.
   725  func (c *Cluster) GetNode(input string) (types.Node, error) {
   726  	c.RLock()
   727  	defer c.RUnlock()
   728  
   729  	if !c.isActiveManager() {
   730  		return types.Node{}, ErrNoManager
   731  	}
   732  
   733  	node, err := getNode(c.getRequestContext(), c.client, input)
   734  	if err != nil {
   735  		return types.Node{}, err
   736  	}
   737  	return convert.NodeFromGRPC(*node), nil
   738  }
   739  
   740  // UpdateNode updates existing nodes properties.
   741  func (c *Cluster) UpdateNode(nodeID string, version uint64, spec types.NodeSpec) error {
   742  	c.RLock()
   743  	defer c.RUnlock()
   744  
   745  	if !c.isActiveManager() {
   746  		return ErrNoManager
   747  	}
   748  
   749  	nodeSpec, err := convert.NodeSpecToGRPC(spec)
   750  	if err != nil {
   751  		return err
   752  	}
   753  
   754  	_, err = c.client.UpdateNode(
   755  		c.getRequestContext(),
   756  		&swarmapi.UpdateNodeRequest{
   757  			NodeID: nodeID,
   758  			Spec:   &nodeSpec,
   759  			NodeVersion: &swarmapi.Version{
   760  				Index: version,
   761  			},
   762  		},
   763  	)
   764  	return err
   765  }
   766  
   767  // RemoveNode removes a node from a cluster
   768  func (c *Cluster) RemoveNode(input string) error {
   769  	c.RLock()
   770  	defer c.RUnlock()
   771  
   772  	if !c.isActiveManager() {
   773  		return ErrNoManager
   774  	}
   775  
   776  	ctx := c.getRequestContext()
   777  
   778  	node, err := getNode(ctx, c.client, input)
   779  	if err != nil {
   780  		return err
   781  	}
   782  
   783  	if _, err := c.client.RemoveNode(ctx, &swarmapi.RemoveNodeRequest{NodeID: node.ID}); err != nil {
   784  		return err
   785  	}
   786  	return nil
   787  }
   788  
   789  // GetTasks returns a list of tasks matching the filter options.
   790  func (c *Cluster) GetTasks(options apitypes.TaskListOptions) ([]types.Task, error) {
   791  	c.RLock()
   792  	defer c.RUnlock()
   793  
   794  	if !c.isActiveManager() {
   795  		return nil, ErrNoManager
   796  	}
   797  
   798  	filters, err := newListTasksFilters(options.Filter)
   799  	if err != nil {
   800  		return nil, err
   801  	}
   802  	r, err := c.client.ListTasks(
   803  		c.getRequestContext(),
   804  		&swarmapi.ListTasksRequest{Filters: filters})
   805  	if err != nil {
   806  		return nil, err
   807  	}
   808  
   809  	tasks := []types.Task{}
   810  
   811  	for _, task := range r.Tasks {
   812  		tasks = append(tasks, convert.TaskFromGRPC(*task))
   813  	}
   814  	return tasks, nil
   815  }
   816  
   817  // GetTask returns a task by an ID.
   818  func (c *Cluster) GetTask(input string) (types.Task, error) {
   819  	c.RLock()
   820  	defer c.RUnlock()
   821  
   822  	if !c.isActiveManager() {
   823  		return types.Task{}, ErrNoManager
   824  	}
   825  
   826  	task, err := getTask(c.getRequestContext(), c.client, input)
   827  	if err != nil {
   828  		return types.Task{}, err
   829  	}
   830  	return convert.TaskFromGRPC(*task), nil
   831  }
   832  
   833  // GetNetwork returns a cluster network by ID.
   834  func (c *Cluster) GetNetwork(input string) (apitypes.NetworkResource, error) {
   835  	c.RLock()
   836  	defer c.RUnlock()
   837  
   838  	if !c.isActiveManager() {
   839  		return apitypes.NetworkResource{}, ErrNoManager
   840  	}
   841  
   842  	network, err := getNetwork(c.getRequestContext(), c.client, input)
   843  	if err != nil {
   844  		return apitypes.NetworkResource{}, err
   845  	}
   846  	return convert.BasicNetworkFromGRPC(*network), nil
   847  }
   848  
   849  // GetNetworks returns all current cluster managed networks.
   850  func (c *Cluster) GetNetworks() ([]apitypes.NetworkResource, error) {
   851  	c.RLock()
   852  	defer c.RUnlock()
   853  
   854  	if !c.isActiveManager() {
   855  		return nil, ErrNoManager
   856  	}
   857  
   858  	r, err := c.client.ListNetworks(c.getRequestContext(), &swarmapi.ListNetworksRequest{})
   859  	if err != nil {
   860  		return nil, err
   861  	}
   862  
   863  	var networks []apitypes.NetworkResource
   864  
   865  	for _, network := range r.Networks {
   866  		networks = append(networks, convert.BasicNetworkFromGRPC(*network))
   867  	}
   868  
   869  	return networks, nil
   870  }
   871  
   872  // CreateNetwork creates a new cluster managed network.
   873  func (c *Cluster) CreateNetwork(s apitypes.NetworkCreateRequest) (string, error) {
   874  	c.RLock()
   875  	defer c.RUnlock()
   876  
   877  	if !c.isActiveManager() {
   878  		return "", ErrNoManager
   879  	}
   880  
   881  	if runconfig.IsPreDefinedNetwork(s.Name) {
   882  		err := fmt.Errorf("%s is a pre-defined network and cannot be created", s.Name)
   883  		return "", errors.NewRequestForbiddenError(err)
   884  	}
   885  
   886  	networkSpec := convert.BasicNetworkCreateToGRPC(s)
   887  	r, err := c.client.CreateNetwork(c.getRequestContext(), &swarmapi.CreateNetworkRequest{Spec: &networkSpec})
   888  	if err != nil {
   889  		return "", err
   890  	}
   891  
   892  	return r.Network.ID, nil
   893  }
   894  
   895  // RemoveNetwork removes a cluster network.
   896  func (c *Cluster) RemoveNetwork(input string) error {
   897  	c.RLock()
   898  	defer c.RUnlock()
   899  
   900  	if !c.isActiveManager() {
   901  		return ErrNoManager
   902  	}
   903  
   904  	network, err := getNetwork(c.getRequestContext(), c.client, input)
   905  	if err != nil {
   906  		return err
   907  	}
   908  
   909  	if _, err := c.client.RemoveNetwork(c.getRequestContext(), &swarmapi.RemoveNetworkRequest{NetworkID: network.ID}); err != nil {
   910  		return err
   911  	}
   912  	return nil
   913  }
   914  
   915  func populateNetworkID(ctx context.Context, c swarmapi.ControlClient, s *types.ServiceSpec) error {
   916  	for i, n := range s.Networks {
   917  		apiNetwork, err := getNetwork(ctx, c, n.Target)
   918  		if err != nil {
   919  			return err
   920  		}
   921  		s.Networks[i] = types.NetworkAttachmentConfig{Target: apiNetwork.ID}
   922  	}
   923  	return nil
   924  }
   925  
   926  func getNetwork(ctx context.Context, c swarmapi.ControlClient, input string) (*swarmapi.Network, error) {
   927  	// GetNetwork to match via full ID.
   928  	rg, err := c.GetNetwork(ctx, &swarmapi.GetNetworkRequest{NetworkID: input})
   929  	if err != nil {
   930  		// If any error (including NotFound), ListNetworks to match via ID prefix and full name.
   931  		rl, err := c.ListNetworks(ctx, &swarmapi.ListNetworksRequest{Filters: &swarmapi.ListNetworksRequest_Filters{Names: []string{input}}})
   932  		if err != nil || len(rl.Networks) == 0 {
   933  			rl, err = c.ListNetworks(ctx, &swarmapi.ListNetworksRequest{Filters: &swarmapi.ListNetworksRequest_Filters{IDPrefixes: []string{input}}})
   934  		}
   935  
   936  		if err != nil {
   937  			return nil, err
   938  		}
   939  
   940  		if len(rl.Networks) == 0 {
   941  			return nil, fmt.Errorf("network %s not found", input)
   942  		}
   943  
   944  		if l := len(rl.Networks); l > 1 {
   945  			return nil, fmt.Errorf("network %s is ambigious (%d matches found)", input, l)
   946  		}
   947  
   948  		return rl.Networks[0], nil
   949  	}
   950  	return rg.Network, nil
   951  }
   952  
   953  // Cleanup stops active swarm node. This is run before daemon shutdown.
   954  func (c *Cluster) Cleanup() {
   955  	c.Lock()
   956  	node := c.node
   957  	if node == nil {
   958  		c.Unlock()
   959  		return
   960  	}
   961  
   962  	if c.isActiveManager() {
   963  		active, reachable, unreachable, err := c.managerStats()
   964  		if err == nil {
   965  			singlenode := active && reachable == 1 && unreachable == 0
   966  			if active && !singlenode && reachable-2 <= unreachable {
   967  				logrus.Errorf("Leaving cluster with %v managers left out of %v. Raft quorum will be lost.", reachable-1, reachable+unreachable)
   968  			}
   969  		}
   970  	}
   971  	c.cancelReconnect()
   972  	c.Unlock()
   973  	ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
   974  	defer cancel()
   975  	if err := node.Stop(ctx); err != nil {
   976  		logrus.Errorf("error cleaning up cluster: %v", err)
   977  	}
   978  	c.Lock()
   979  	c.node = nil
   980  	c.ready = false
   981  	c.conn = nil
   982  	c.Unlock()
   983  }
   984  
   985  func (c *Cluster) managerStats() (current bool, reachable int, unreachable int, err error) {
   986  	ctx, _ := context.WithTimeout(context.Background(), 3*time.Second)
   987  	nodes, err := c.client.ListNodes(ctx, &swarmapi.ListNodesRequest{})
   988  	if err != nil {
   989  		return false, 0, 0, err
   990  	}
   991  	for _, n := range nodes.Nodes {
   992  		if n.ManagerStatus != nil {
   993  			if n.ManagerStatus.Reachability == swarmapi.RaftMemberStatus_REACHABLE {
   994  				reachable++
   995  				if n.ID == c.node.NodeID() {
   996  					current = true
   997  				}
   998  			}
   999  			if n.ManagerStatus.Reachability == swarmapi.RaftMemberStatus_UNREACHABLE {
  1000  				unreachable++
  1001  			}
  1002  		}
  1003  	}
  1004  	return
  1005  }
  1006  
  1007  func initAcceptancePolicy(node *swarmagent.Node, acceptancePolicy types.AcceptancePolicy) error {
  1008  	ctx, _ := context.WithTimeout(context.Background(), 5*time.Second)
  1009  	for conn := range node.ListenControlSocket(ctx) {
  1010  		if ctx.Err() != nil {
  1011  			return ctx.Err()
  1012  		}
  1013  		if conn != nil {
  1014  			client := swarmapi.NewControlClient(conn)
  1015  			var cluster *swarmapi.Cluster
  1016  			for i := 0; ; i++ {
  1017  				lcr, err := client.ListClusters(ctx, &swarmapi.ListClustersRequest{})
  1018  				if err != nil {
  1019  					return fmt.Errorf("error on listing clusters: %v", err)
  1020  				}
  1021  				if len(lcr.Clusters) == 0 {
  1022  					if i < 10 {
  1023  						time.Sleep(200 * time.Millisecond)
  1024  						continue
  1025  					}
  1026  					return fmt.Errorf("empty list of clusters was returned")
  1027  				}
  1028  				cluster = lcr.Clusters[0]
  1029  				break
  1030  			}
  1031  			spec := &cluster.Spec
  1032  
  1033  			if err := convert.SwarmSpecUpdateAcceptancePolicy(spec, acceptancePolicy); err != nil {
  1034  				return fmt.Errorf("error updating cluster settings: %v", err)
  1035  			}
  1036  			_, err := client.UpdateCluster(ctx, &swarmapi.UpdateClusterRequest{
  1037  				ClusterID:      cluster.ID,
  1038  				ClusterVersion: &cluster.Meta.Version,
  1039  				Spec:           spec,
  1040  			})
  1041  			if err != nil {
  1042  				return fmt.Errorf("error updating cluster settings: %v", err)
  1043  			}
  1044  			return nil
  1045  		}
  1046  	}
  1047  	return ctx.Err()
  1048  }