github.com/kaisenlinux/docker.io@v0.0.0-20230510090727-ea55db55fac7/swarmkit/node/node.go

github.com/kaisenlinux/docker.io@v0.0.0-20230510090727-ea55db55fac7/swarmkit/node/node.go (about)

     1  package node
     2  
     3  import (
     4  	"bytes"
     5  	"context"
     6  	"crypto/tls"
     7  	"encoding/json"
     8  	"io/ioutil"
     9  	"math"
    10  	"net"
    11  	"os"
    12  	"path/filepath"
    13  	"reflect"
    14  	"sort"
    15  	"strings"
    16  	"sync"
    17  	"time"
    18  
    19  	"github.com/docker/swarmkit/ca/keyutils"
    20  	"github.com/docker/swarmkit/identity"
    21  
    22  	"github.com/docker/docker/pkg/plugingetter"
    23  	"github.com/docker/go-metrics"
    24  	"github.com/docker/libnetwork/drivers/overlay/overlayutils"
    25  	"github.com/docker/swarmkit/agent"
    26  	"github.com/docker/swarmkit/agent/exec"
    27  	"github.com/docker/swarmkit/api"
    28  	"github.com/docker/swarmkit/ca"
    29  	"github.com/docker/swarmkit/connectionbroker"
    30  	"github.com/docker/swarmkit/ioutils"
    31  	"github.com/docker/swarmkit/log"
    32  	"github.com/docker/swarmkit/manager"
    33  	"github.com/docker/swarmkit/manager/allocator/cnmallocator"
    34  	"github.com/docker/swarmkit/manager/encryption"
    35  	"github.com/docker/swarmkit/remotes"
    36  	"github.com/docker/swarmkit/xnet"
    37  	grpc_prometheus "github.com/grpc-ecosystem/go-grpc-prometheus"
    38  	"github.com/pkg/errors"
    39  	"github.com/sirupsen/logrus"
    40  	bolt "go.etcd.io/bbolt"
    41  	"google.golang.org/grpc"
    42  	"google.golang.org/grpc/credentials"
    43  	"google.golang.org/grpc/status"
    44  )
    45  
    46  const (
    47  	stateFilename     = "state.json"
    48  	roleChangeTimeout = 16 * time.Second
    49  )
    50  
    51  var (
    52  	nodeInfo    metrics.LabeledGauge
    53  	nodeManager metrics.Gauge
    54  
    55  	errNodeStarted    = errors.New("node: already started")
    56  	errNodeNotStarted = errors.New("node: not started")
    57  	certDirectory     = "certificates"
    58  
    59  	// ErrInvalidUnlockKey is returned when we can't decrypt the TLS certificate
    60  	ErrInvalidUnlockKey = errors.New("node is locked, and needs a valid unlock key")
    61  
    62  	// ErrMandatoryFIPS is returned when the cluster we are joining mandates FIPS, but we are running in non-FIPS mode
    63  	ErrMandatoryFIPS = errors.New("node is not FIPS-enabled but cluster requires FIPS")
    64  )
    65  
    66  func init() {
    67  	ns := metrics.NewNamespace("swarm", "node", nil)
    68  	nodeInfo = ns.NewLabeledGauge("info", "Information related to the swarm", "",
    69  		"swarm_id",
    70  		"node_id",
    71  	)
    72  	nodeManager = ns.NewGauge("manager", "Whether this node is a manager or not", "")
    73  	metrics.Register(ns)
    74  }
    75  
    76  // Config provides values for a Node.
    77  type Config struct {
    78  	// Hostname is the name of host for agent instance.
    79  	Hostname string
    80  
    81  	// JoinAddr specifies node that should be used for the initial connection to
    82  	// other manager in cluster. This should be only one address and optional,
    83  	// the actual remotes come from the stored state.
    84  	JoinAddr string
    85  
    86  	// StateDir specifies the directory the node uses to keep the state of the
    87  	// remote managers and certificates.
    88  	StateDir string
    89  
    90  	// JoinToken is the token to be used on the first certificate request.
    91  	JoinToken string
    92  
    93  	// ExternalCAs is a list of CAs to which a manager node
    94  	// will make certificate signing requests for node certificates.
    95  	ExternalCAs []*api.ExternalCA
    96  
    97  	// ForceNewCluster creates a new cluster from current raft state.
    98  	ForceNewCluster bool
    99  
   100  	// ListenControlAPI specifies address the control API should listen on.
   101  	ListenControlAPI string
   102  
   103  	// ListenRemoteAPI specifies the address for the remote API that agents
   104  	// and raft members connect to.
   105  	ListenRemoteAPI string
   106  
   107  	// AdvertiseRemoteAPI specifies the address that should be advertised
   108  	// for connections to the remote API (including the raft service).
   109  	AdvertiseRemoteAPI string
   110  
   111  	// NetworkConfig stores network related config for the cluster
   112  	NetworkConfig *cnmallocator.NetworkConfig
   113  
   114  	// Executor specifies the executor to use for the agent.
   115  	Executor exec.Executor
   116  
   117  	// ElectionTick defines the amount of ticks needed without
   118  	// leader to trigger a new election
   119  	ElectionTick uint32
   120  
   121  	// HeartbeatTick defines the amount of ticks between each
   122  	// heartbeat sent to other members for health-check purposes
   123  	HeartbeatTick uint32
   124  
   125  	// AutoLockManagers determines whether or not an unlock key will be generated
   126  	// when bootstrapping a new cluster for the first time
   127  	AutoLockManagers bool
   128  
   129  	// UnlockKey is the key to unlock a node - used for decrypting at rest.  This
   130  	// only applies to nodes that have already joined a cluster.
   131  	UnlockKey []byte
   132  
   133  	// Availability allows a user to control the current scheduling status of a node
   134  	Availability api.NodeSpec_Availability
   135  
   136  	// PluginGetter provides access to docker's plugin inventory.
   137  	PluginGetter plugingetter.PluginGetter
   138  
   139  	// FIPS is a boolean stating whether the node is FIPS enabled
   140  	FIPS bool
   141  }
   142  
   143  // Node implements the primary node functionality for a member of a swarm
   144  // cluster. Node handles workloads and may also run as a manager.
   145  type Node struct {
   146  	sync.RWMutex
   147  	config           *Config
   148  	remotes          *persistentRemotes
   149  	connBroker       *connectionbroker.Broker
   150  	role             string
   151  	roleCond         *sync.Cond
   152  	conn             *grpc.ClientConn
   153  	connCond         *sync.Cond
   154  	nodeID           string
   155  	started          chan struct{}
   156  	startOnce        sync.Once
   157  	stopped          chan struct{}
   158  	stopOnce         sync.Once
   159  	ready            chan struct{} // closed when agent has completed registration and manager(if enabled) is ready to receive control requests
   160  	closed           chan struct{}
   161  	err              error
   162  	agent            *agent.Agent
   163  	manager          *manager.Manager
   164  	notifyNodeChange chan *agent.NodeChanges // used by the agent to relay node updates from the dispatcher Session stream to (*Node).run
   165  	unlockKey        []byte
   166  	vxlanUDPPort     uint32
   167  }
   168  
   169  type lastSeenRole struct {
   170  	role api.NodeRole
   171  }
   172  
   173  // observe notes the latest value of this node role, and returns true if it
   174  // is the first seen value, or is different from the most recently seen value.
   175  func (l *lastSeenRole) observe(newRole api.NodeRole) bool {
   176  	changed := l.role != newRole
   177  	l.role = newRole
   178  	return changed
   179  }
   180  
   181  // RemoteAPIAddr returns address on which remote manager api listens.
   182  // Returns nil if node is not manager.
   183  func (n *Node) RemoteAPIAddr() (string, error) {
   184  	n.RLock()
   185  	defer n.RUnlock()
   186  	if n.manager == nil {
   187  		return "", errors.New("manager is not running")
   188  	}
   189  	addr := n.manager.Addr()
   190  	if addr == "" {
   191  		return "", errors.New("manager addr is not set")
   192  	}
   193  	return addr, nil
   194  }
   195  
   196  // New returns new Node instance.
   197  func New(c *Config) (*Node, error) {
   198  	if err := os.MkdirAll(c.StateDir, 0700); err != nil {
   199  		return nil, err
   200  	}
   201  	stateFile := filepath.Join(c.StateDir, stateFilename)
   202  	dt, err := ioutil.ReadFile(stateFile)
   203  	var p []api.Peer
   204  	if err != nil && !os.IsNotExist(err) {
   205  		return nil, err
   206  	}
   207  	if err == nil {
   208  		if err := json.Unmarshal(dt, &p); err != nil {
   209  			return nil, err
   210  		}
   211  	}
   212  	n := &Node{
   213  		remotes:          newPersistentRemotes(stateFile, p...),
   214  		role:             ca.WorkerRole,
   215  		config:           c,
   216  		started:          make(chan struct{}),
   217  		stopped:          make(chan struct{}),
   218  		closed:           make(chan struct{}),
   219  		ready:            make(chan struct{}),
   220  		notifyNodeChange: make(chan *agent.NodeChanges, 1),
   221  		unlockKey:        c.UnlockKey,
   222  	}
   223  
   224  	if n.config.JoinAddr != "" || n.config.ForceNewCluster {
   225  		n.remotes = newPersistentRemotes(filepath.Join(n.config.StateDir, stateFilename))
   226  		if n.config.JoinAddr != "" {
   227  			n.remotes.Observe(api.Peer{Addr: n.config.JoinAddr}, remotes.DefaultObservationWeight)
   228  		}
   229  	}
   230  
   231  	n.connBroker = connectionbroker.New(n.remotes)
   232  
   233  	n.roleCond = sync.NewCond(n.RLocker())
   234  	n.connCond = sync.NewCond(n.RLocker())
   235  	return n, nil
   236  }
   237  
   238  // BindRemote starts a listener that exposes the remote API.
   239  func (n *Node) BindRemote(ctx context.Context, listenAddr string, advertiseAddr string) error {
   240  	n.RLock()
   241  	defer n.RUnlock()
   242  
   243  	if n.manager == nil {
   244  		return errors.New("manager is not running")
   245  	}
   246  
   247  	return n.manager.BindRemote(ctx, manager.RemoteAddrs{
   248  		ListenAddr:    listenAddr,
   249  		AdvertiseAddr: advertiseAddr,
   250  	})
   251  }
   252  
   253  // Start starts a node instance.
   254  func (n *Node) Start(ctx context.Context) error {
   255  	err := errNodeStarted
   256  
   257  	n.startOnce.Do(func() {
   258  		close(n.started)
   259  		go n.run(ctx)
   260  		err = nil // clear error above, only once.
   261  	})
   262  	return err
   263  }
   264  
   265  func (n *Node) currentRole() api.NodeRole {
   266  	n.Lock()
   267  	currentRole := api.NodeRoleWorker
   268  	if n.role == ca.ManagerRole {
   269  		currentRole = api.NodeRoleManager
   270  	}
   271  	n.Unlock()
   272  	return currentRole
   273  }
   274  
   275  // configVXLANUDPPort sets vxlan port in libnetwork
   276  func configVXLANUDPPort(ctx context.Context, vxlanUDPPort uint32) {
   277  	if err := overlayutils.ConfigVXLANUDPPort(vxlanUDPPort); err != nil {
   278  		log.G(ctx).WithError(err).Error("failed to configure VXLAN UDP port")
   279  		return
   280  	}
   281  	logrus.Infof("initialized VXLAN UDP port to %d ", vxlanUDPPort)
   282  }
   283  
   284  func (n *Node) run(ctx context.Context) (err error) {
   285  	defer func() {
   286  		n.err = err
   287  		// close the n.closed channel to indicate that the Node has completely
   288  		// terminated
   289  		close(n.closed)
   290  	}()
   291  	ctx, cancel := context.WithCancel(ctx)
   292  	defer cancel()
   293  	ctx = log.WithModule(ctx, "node")
   294  
   295  	// set up a goroutine to monitor the stop channel, and cancel the run
   296  	// context when the node is stopped
   297  	go func(ctx context.Context) {
   298  		select {
   299  		case <-ctx.Done():
   300  		case <-n.stopped:
   301  			cancel()
   302  		}
   303  	}(ctx)
   304  
   305  	// First thing's first: get the SecurityConfig for this node. This includes
   306  	// the certificate information, and the root CA.  It also returns a cancel
   307  	// function. This is needed because the SecurityConfig is a live object,
   308  	// and provides a watch queue so that caller can observe changes to the
   309  	// security config. This watch queue has to be closed, which is done by the
   310  	// secConfigCancel function.
   311  	//
   312  	// It's also noteworthy that loading the security config with the node's
   313  	// loadSecurityConfig method has the side effect of setting the node's ID
   314  	// and role fields, meaning it isn't until after that point that node knows
   315  	// its ID
   316  	paths := ca.NewConfigPaths(filepath.Join(n.config.StateDir, certDirectory))
   317  	securityConfig, secConfigCancel, err := n.loadSecurityConfig(ctx, paths)
   318  	if err != nil {
   319  		return err
   320  	}
   321  	defer secConfigCancel()
   322  
   323  	// Now that we have the security config, we can get a TLSRenewer, which is
   324  	// a live component handling certificate rotation.
   325  	renewer := ca.NewTLSRenewer(securityConfig, n.connBroker, paths.RootCA)
   326  
   327  	// Now that we have the security goop all loaded, we know the Node's ID and
   328  	// can add that to our logging context.
   329  	ctx = log.WithLogger(ctx, log.G(ctx).WithField("node.id", n.NodeID()))
   330  
   331  	// Next, set up the task database. The task database is used by the agent
   332  	// to keep a persistent local record of its tasks. Since every manager also
   333  	// has an agent, every node needs a task database, so we do this regardless
   334  	// of role.
   335  	taskDBPath := filepath.Join(n.config.StateDir, "worker", "tasks.db")
   336  	// Doing os.MkdirAll will create the necessary directory path for the task
   337  	// database if it doesn't already exist, and if it does already exist, no
   338  	// error will be returned, so we use this regardless of whether this node
   339  	// is new or not.
   340  	if err := os.MkdirAll(filepath.Dir(taskDBPath), 0777); err != nil {
   341  		return err
   342  	}
   343  
   344  	db, err := bolt.Open(taskDBPath, 0666, nil)
   345  	if err != nil {
   346  		return err
   347  	}
   348  	defer db.Close()
   349  
   350  	// agentDone is a channel that represents the agent having exited. We start
   351  	// the agent in a goroutine a few blocks down, and before that goroutine
   352  	// exits, it closes this channel to signal to the goroutine just below to
   353  	// terminate.
   354  	agentDone := make(chan struct{})
   355  
   356  	// This goroutine is the node changes loop. The n.notifyNodeChange
   357  	// channel is passed to the agent. When an new node object gets sent down
   358  	// to the agent, it gets passed back up to this node object, so that we can
   359  	// check if a role update or a root certificate rotation is required. This
   360  	// handles root rotation, but the renewer handles regular certification
   361  	// rotation.
   362  	go func() {
   363  		// lastNodeDesiredRole is the last-seen value of Node.Spec.DesiredRole,
   364  		// used to make role changes "edge triggered" and avoid renewal loops.
   365  		lastNodeDesiredRole := lastSeenRole{role: n.currentRole()}
   366  
   367  		for {
   368  			select {
   369  			case <-agentDone:
   370  				return
   371  			case nodeChanges := <-n.notifyNodeChange:
   372  				if nodeChanges.Node != nil {
   373  					if nodeChanges.Node.VXLANUDPPort != 0 {
   374  						n.vxlanUDPPort = nodeChanges.Node.VXLANUDPPort
   375  						configVXLANUDPPort(ctx, n.vxlanUDPPort)
   376  					}
   377  					// This is a bit complex to be backward compatible with older CAs that
   378  					// don't support the Node.Role field. They only use what's presently
   379  					// called DesiredRole.
   380  					// 1) If DesiredRole changes, kick off a certificate renewal. The renewal
   381  					//    is delayed slightly to give Role time to change as well if this is
   382  					//    a newer CA. If the certificate we get back doesn't have the expected
   383  					//    role, we continue renewing with exponential backoff.
   384  					// 2) If the server is sending us IssuanceStateRotate, renew the cert as
   385  					//    requested by the CA.
   386  					desiredRoleChanged := lastNodeDesiredRole.observe(nodeChanges.Node.Spec.DesiredRole)
   387  					if desiredRoleChanged {
   388  						switch nodeChanges.Node.Spec.DesiredRole {
   389  						case api.NodeRoleManager:
   390  							renewer.SetExpectedRole(ca.ManagerRole)
   391  						case api.NodeRoleWorker:
   392  							renewer.SetExpectedRole(ca.WorkerRole)
   393  						}
   394  					}
   395  					if desiredRoleChanged || nodeChanges.Node.Certificate.Status.State == api.IssuanceStateRotate {
   396  						renewer.Renew()
   397  					}
   398  				}
   399  
   400  				if nodeChanges.RootCert != nil {
   401  					if bytes.Equal(nodeChanges.RootCert, securityConfig.RootCA().Certs) {
   402  						continue
   403  					}
   404  					newRootCA, err := ca.NewRootCA(nodeChanges.RootCert, nil, nil, ca.DefaultNodeCertExpiration, nil)
   405  					if err != nil {
   406  						log.G(ctx).WithError(err).Error("invalid new root certificate from the dispatcher")
   407  						continue
   408  					}
   409  					if err := securityConfig.UpdateRootCA(&newRootCA); err != nil {
   410  						log.G(ctx).WithError(err).Error("could not use new root CA from dispatcher")
   411  						continue
   412  					}
   413  					if err := ca.SaveRootCA(newRootCA, paths.RootCA); err != nil {
   414  						log.G(ctx).WithError(err).Error("could not save new root certificate from the dispatcher")
   415  						continue
   416  					}
   417  				}
   418  			}
   419  		}
   420  	}()
   421  
   422  	// Now we're going to launch the main component goroutines, the Agent, the
   423  	// Manager (maybe) and the certificate updates loop. We shouldn't exit
   424  	// the node object until all 3 of these components have terminated, so we
   425  	// create a waitgroup to block termination of the node until then
   426  	var wg sync.WaitGroup
   427  	wg.Add(3)
   428  
   429  	// These two blocks update some of the metrics settings.
   430  	nodeInfo.WithValues(
   431  		securityConfig.ClientTLSCreds.Organization(),
   432  		securityConfig.ClientTLSCreds.NodeID(),
   433  	).Set(1)
   434  
   435  	if n.currentRole() == api.NodeRoleManager {
   436  		nodeManager.Set(1)
   437  	} else {
   438  		nodeManager.Set(0)
   439  	}
   440  
   441  	// We created the renewer way up when we were creating the SecurityConfig
   442  	// at the beginning of run, but now we're ready to start receiving
   443  	// CertificateUpdates, and launch a goroutine to handle this. Updates is a
   444  	// channel we iterate containing the results of certificate renewals.
   445  	updates := renewer.Start(ctx)
   446  	go func() {
   447  		for certUpdate := range updates {
   448  			if certUpdate.Err != nil {
   449  				logrus.Warnf("error renewing TLS certificate: %v", certUpdate.Err)
   450  				continue
   451  			}
   452  			// Set the new role, and notify our waiting role changing logic
   453  			// that the role has changed.
   454  			n.Lock()
   455  			n.role = certUpdate.Role
   456  			n.roleCond.Broadcast()
   457  			n.Unlock()
   458  
   459  			// Export the new role for metrics
   460  			if n.currentRole() == api.NodeRoleManager {
   461  				nodeManager.Set(1)
   462  			} else {
   463  				nodeManager.Set(0)
   464  			}
   465  		}
   466  
   467  		wg.Done()
   468  	}()
   469  
   470  	// and, finally, start the two main components: the manager and the agent
   471  	role := n.role
   472  
   473  	// Channels to signal when these respective components are up and ready to
   474  	// go.
   475  	managerReady := make(chan struct{})
   476  	agentReady := make(chan struct{})
   477  	// these variables are defined in this scope so that they're closed on by
   478  	// respective goroutines below.
   479  	var managerErr error
   480  	var agentErr error
   481  	go func() {
   482  		// superviseManager is a routine that watches our manager role
   483  		managerErr = n.superviseManager(ctx, securityConfig, paths.RootCA, managerReady, renewer) // store err and loop
   484  		wg.Done()
   485  		cancel()
   486  	}()
   487  	go func() {
   488  		agentErr = n.runAgent(ctx, db, securityConfig, agentReady)
   489  		wg.Done()
   490  		cancel()
   491  		close(agentDone)
   492  	}()
   493  
   494  	// This goroutine is what signals that the node has fully started by
   495  	// closing the n.ready channel. First, it waits for the agent to start.
   496  	// Then, if this node is a manager, it will wait on either the manager
   497  	// starting, or the node role changing. This ensures that if the node is
   498  	// demoted before the manager starts, it doesn't get stuck.
   499  	go func() {
   500  		<-agentReady
   501  		if role == ca.ManagerRole {
   502  			workerRole := make(chan struct{})
   503  			waitRoleCtx, waitRoleCancel := context.WithCancel(ctx)
   504  			go func() {
   505  				if n.waitRole(waitRoleCtx, ca.WorkerRole) == nil {
   506  					close(workerRole)
   507  				}
   508  			}()
   509  			select {
   510  			case <-managerReady:
   511  			case <-workerRole:
   512  			}
   513  			waitRoleCancel()
   514  		}
   515  		close(n.ready)
   516  	}()
   517  
   518  	// And, finally, we park and wait for the node to close up. If we get any
   519  	// error other than context canceled, we return it.
   520  	wg.Wait()
   521  	if managerErr != nil && errors.Cause(managerErr) != context.Canceled {
   522  		return managerErr
   523  	}
   524  	if agentErr != nil && errors.Cause(agentErr) != context.Canceled {
   525  		return agentErr
   526  	}
   527  	// NOTE(dperny): we return err here, but the last time I can see err being
   528  	// set is when we open the boltdb way up in this method, so I don't know
   529  	// what returning err is supposed to do.
   530  	return err
   531  }
   532  
   533  // Stop stops node execution
   534  func (n *Node) Stop(ctx context.Context) error {
   535  	select {
   536  	case <-n.started:
   537  	default:
   538  		return errNodeNotStarted
   539  	}
   540  	// ask agent to clean up assignments
   541  	n.Lock()
   542  	if n.agent != nil {
   543  		if err := n.agent.Leave(ctx); err != nil {
   544  			log.G(ctx).WithError(err).Error("agent failed to clean up assignments")
   545  		}
   546  	}
   547  	n.Unlock()
   548  
   549  	n.stopOnce.Do(func() {
   550  		close(n.stopped)
   551  	})
   552  
   553  	select {
   554  	case <-n.closed:
   555  		return nil
   556  	case <-ctx.Done():
   557  		return ctx.Err()
   558  	}
   559  }
   560  
   561  // Err returns the error that caused the node to shutdown or nil. Err blocks
   562  // until the node has fully shut down.
   563  func (n *Node) Err(ctx context.Context) error {
   564  	select {
   565  	case <-n.closed:
   566  		return n.err
   567  	case <-ctx.Done():
   568  		return ctx.Err()
   569  	}
   570  }
   571  
   572  // runAgent starts the node's agent. When the agent has started, the provided
   573  // ready channel is closed. When the agent exits, this will return the error
   574  // that caused it.
   575  func (n *Node) runAgent(ctx context.Context, db *bolt.DB, securityConfig *ca.SecurityConfig, ready chan<- struct{}) error {
   576  	// First, get a channel for knowing when a remote peer has been selected.
   577  	// The value returned from the remotesCh is ignored, we just need to know
   578  	// when the peer is selected
   579  	remotesCh := n.remotes.WaitSelect(ctx)
   580  	// then, we set up a new context to pass specifically to
   581  	// ListenControlSocket, and start that method to wait on a connection on
   582  	// the cluster control API.
   583  	waitCtx, waitCancel := context.WithCancel(ctx)
   584  	controlCh := n.ListenControlSocket(waitCtx)
   585  
   586  	// The goal here to wait either until we have a remote peer selected, or
   587  	// connection to the control
   588  	// socket. These are both ways to connect the
   589  	// agent to a manager, and we need to wait until one or the other is
   590  	// available to start the agent
   591  waitPeer:
   592  	for {
   593  		select {
   594  		case <-ctx.Done():
   595  			break waitPeer
   596  		case <-remotesCh:
   597  			break waitPeer
   598  		case conn := <-controlCh:
   599  			// conn will probably be nil the first time we call this, probably,
   600  			// but only a non-nil conn represent an actual connection.
   601  			if conn != nil {
   602  				break waitPeer
   603  			}
   604  		}
   605  	}
   606  
   607  	// We can stop listening for new control socket connections once we're
   608  	// ready
   609  	waitCancel()
   610  
   611  	// NOTE(dperny): not sure why we need to recheck the context here. I guess
   612  	// it avoids a race if the context was canceled at the same time that a
   613  	// connection or peer was available. I think it's just an optimization.
   614  	select {
   615  	case <-ctx.Done():
   616  		return ctx.Err()
   617  	default:
   618  	}
   619  
   620  	// Now we can go ahead and configure, create, and start the agent.
   621  	secChangesCh, secChangesCancel := securityConfig.Watch()
   622  	defer secChangesCancel()
   623  
   624  	rootCA := securityConfig.RootCA()
   625  	issuer := securityConfig.IssuerInfo()
   626  
   627  	agentConfig := &agent.Config{
   628  		Hostname:         n.config.Hostname,
   629  		ConnBroker:       n.connBroker,
   630  		Executor:         n.config.Executor,
   631  		DB:               db,
   632  		NotifyNodeChange: n.notifyNodeChange,
   633  		NotifyTLSChange:  secChangesCh,
   634  		Credentials:      securityConfig.ClientTLSCreds,
   635  		NodeTLSInfo: &api.NodeTLSInfo{
   636  			TrustRoot:           rootCA.Certs,
   637  			CertIssuerPublicKey: issuer.PublicKey,
   638  			CertIssuerSubject:   issuer.Subject,
   639  		},
   640  		FIPS: n.config.FIPS,
   641  	}
   642  	// if a join address has been specified, then if the agent fails to connect
   643  	// due to a TLS error, fail fast - don't keep re-trying to join
   644  	if n.config.JoinAddr != "" {
   645  		agentConfig.SessionTracker = &firstSessionErrorTracker{}
   646  	}
   647  
   648  	a, err := agent.New(agentConfig)
   649  	if err != nil {
   650  		return err
   651  	}
   652  	if err := a.Start(ctx); err != nil {
   653  		return err
   654  	}
   655  
   656  	n.Lock()
   657  	n.agent = a
   658  	n.Unlock()
   659  
   660  	defer func() {
   661  		n.Lock()
   662  		n.agent = nil
   663  		n.Unlock()
   664  	}()
   665  
   666  	// when the agent indicates that it is ready, we close the ready channel.
   667  	go func() {
   668  		<-a.Ready()
   669  		close(ready)
   670  	}()
   671  
   672  	// todo: manually call stop on context cancellation?
   673  
   674  	return a.Err(context.Background())
   675  }
   676  
   677  // Ready returns a channel that is closed after node's initialization has
   678  // completes for the first time.
   679  func (n *Node) Ready() <-chan struct{} {
   680  	return n.ready
   681  }
   682  
   683  func (n *Node) setControlSocket(conn *grpc.ClientConn) {
   684  	n.Lock()
   685  	if n.conn != nil {
   686  		n.conn.Close()
   687  	}
   688  	n.conn = conn
   689  	n.connBroker.SetLocalConn(conn)
   690  	n.connCond.Broadcast()
   691  	n.Unlock()
   692  }
   693  
   694  // ListenControlSocket listens changes of a connection for managing the
   695  // cluster control api
   696  func (n *Node) ListenControlSocket(ctx context.Context) <-chan *grpc.ClientConn {
   697  	c := make(chan *grpc.ClientConn, 1)
   698  	n.RLock()
   699  	conn := n.conn
   700  	c <- conn
   701  	done := make(chan struct{})
   702  	go func() {
   703  		select {
   704  		case <-ctx.Done():
   705  			n.connCond.Broadcast()
   706  		case <-done:
   707  		}
   708  	}()
   709  	go func() {
   710  		defer close(c)
   711  		defer close(done)
   712  		defer n.RUnlock()
   713  		for {
   714  			select {
   715  			case <-ctx.Done():
   716  				return
   717  			default:
   718  			}
   719  			if conn == n.conn {
   720  				n.connCond.Wait()
   721  				continue
   722  			}
   723  			conn = n.conn
   724  			select {
   725  			case c <- conn:
   726  			case <-ctx.Done():
   727  				return
   728  			}
   729  		}
   730  	}()
   731  	return c
   732  }
   733  
   734  // NodeID returns current node's ID. May be empty if not set.
   735  func (n *Node) NodeID() string {
   736  	n.RLock()
   737  	defer n.RUnlock()
   738  	return n.nodeID
   739  }
   740  
   741  // Manager returns manager instance started by node. May be nil.
   742  func (n *Node) Manager() *manager.Manager {
   743  	n.RLock()
   744  	defer n.RUnlock()
   745  	return n.manager
   746  }
   747  
   748  // Agent returns agent instance started by node. May be nil.
   749  func (n *Node) Agent() *agent.Agent {
   750  	n.RLock()
   751  	defer n.RUnlock()
   752  	return n.agent
   753  }
   754  
   755  // IsStateDirty returns true if any objects have been added to raft which make
   756  // the state "dirty". Currently, the existence of any object other than the
   757  // default cluster or the local node implies a dirty state.
   758  func (n *Node) IsStateDirty() (bool, error) {
   759  	n.RLock()
   760  	defer n.RUnlock()
   761  
   762  	if n.manager == nil {
   763  		return false, errors.New("node is not a manager")
   764  	}
   765  
   766  	return n.manager.IsStateDirty()
   767  }
   768  
   769  // Remotes returns a list of known peers known to node.
   770  func (n *Node) Remotes() []api.Peer {
   771  	weights := n.remotes.Weights()
   772  	remotes := make([]api.Peer, 0, len(weights))
   773  	for p := range weights {
   774  		remotes = append(remotes, p)
   775  	}
   776  	return remotes
   777  }
   778  
   779  // Given a cluster ID, returns whether the cluster ID indicates that the cluster
   780  // mandates FIPS mode.  These cluster IDs start with "FIPS." as a prefix.
   781  func isMandatoryFIPSClusterID(securityConfig *ca.SecurityConfig) bool {
   782  	return strings.HasPrefix(securityConfig.ClientTLSCreds.Organization(), "FIPS.")
   783  }
   784  
   785  // Given a join token, returns whether it indicates that the cluster mandates FIPS
   786  // mode.
   787  func isMandatoryFIPSClusterJoinToken(joinToken string) bool {
   788  	if parsed, err := ca.ParseJoinToken(joinToken); err == nil {
   789  		return parsed.FIPS
   790  	}
   791  	return false
   792  }
   793  
   794  func generateFIPSClusterID() string {
   795  	return "FIPS." + identity.NewID()
   796  }
   797  
   798  func (n *Node) loadSecurityConfig(ctx context.Context, paths *ca.SecurityConfigPaths) (*ca.SecurityConfig, func() error, error) {
   799  	var (
   800  		securityConfig *ca.SecurityConfig
   801  		cancel         func() error
   802  	)
   803  
   804  	krw := ca.NewKeyReadWriter(paths.Node, n.unlockKey, &manager.RaftDEKData{FIPS: n.config.FIPS})
   805  	// if FIPS is required, we want to make sure our key is stored in PKCS8 format
   806  	if n.config.FIPS {
   807  		krw.SetKeyFormatter(keyutils.FIPS)
   808  	}
   809  	if err := krw.Migrate(); err != nil {
   810  		return nil, nil, err
   811  	}
   812  
   813  	// Check if we already have a valid certificates on disk.
   814  	rootCA, err := ca.GetLocalRootCA(paths.RootCA)
   815  	if err != nil && err != ca.ErrNoLocalRootCA {
   816  		return nil, nil, err
   817  	}
   818  	if err == nil {
   819  		// if forcing a new cluster, we allow the certificates to be expired - a new set will be generated
   820  		securityConfig, cancel, err = ca.LoadSecurityConfig(ctx, rootCA, krw, n.config.ForceNewCluster)
   821  		if err != nil {
   822  			_, isInvalidKEK := errors.Cause(err).(ca.ErrInvalidKEK)
   823  			if isInvalidKEK {
   824  				return nil, nil, ErrInvalidUnlockKey
   825  			} else if !os.IsNotExist(err) {
   826  				return nil, nil, errors.Wrapf(err, "error while loading TLS certificate in %s", paths.Node.Cert)
   827  			}
   828  		}
   829  	}
   830  
   831  	if securityConfig == nil {
   832  		if n.config.JoinAddr == "" {
   833  			// if we're not joining a cluster, bootstrap a new one - and we have to set the unlock key
   834  			n.unlockKey = nil
   835  			if n.config.AutoLockManagers {
   836  				n.unlockKey = encryption.GenerateSecretKey()
   837  			}
   838  			krw = ca.NewKeyReadWriter(paths.Node, n.unlockKey, &manager.RaftDEKData{FIPS: n.config.FIPS})
   839  			rootCA, err = ca.CreateRootCA(ca.DefaultRootCN)
   840  			if err != nil {
   841  				return nil, nil, err
   842  			}
   843  			if err := ca.SaveRootCA(rootCA, paths.RootCA); err != nil {
   844  				return nil, nil, err
   845  			}
   846  			log.G(ctx).Debug("generated CA key and certificate")
   847  		} else if err == ca.ErrNoLocalRootCA { // from previous error loading the root CA from disk
   848  			// if we are attempting to join another cluster, which has a FIPS join token, and we are not FIPS, error
   849  			if n.config.JoinAddr != "" && isMandatoryFIPSClusterJoinToken(n.config.JoinToken) && !n.config.FIPS {
   850  				return nil, nil, ErrMandatoryFIPS
   851  			}
   852  			rootCA, err = ca.DownloadRootCA(ctx, paths.RootCA, n.config.JoinToken, n.connBroker)
   853  			if err != nil {
   854  				return nil, nil, err
   855  			}
   856  			log.G(ctx).Debug("downloaded CA certificate")
   857  		}
   858  
   859  		// Obtain new certs and setup TLS certificates renewal for this node:
   860  		// - If certificates weren't present on disk, we call CreateSecurityConfig, which blocks
   861  		//   until a valid certificate has been issued.
   862  		// - We wait for CreateSecurityConfig to finish since we need a certificate to operate.
   863  
   864  		// Attempt to load certificate from disk
   865  		securityConfig, cancel, err = ca.LoadSecurityConfig(ctx, rootCA, krw, n.config.ForceNewCluster)
   866  		if err == nil {
   867  			log.G(ctx).WithFields(logrus.Fields{
   868  				"node.id": securityConfig.ClientTLSCreds.NodeID(),
   869  			}).Debugf("loaded TLS certificate")
   870  		} else {
   871  			if _, ok := errors.Cause(err).(ca.ErrInvalidKEK); ok {
   872  				return nil, nil, ErrInvalidUnlockKey
   873  			}
   874  			log.G(ctx).WithError(err).Debugf("no node credentials found in: %s", krw.Target())
   875  
   876  			// if we are attempting to join another cluster, which has a FIPS join token, and we are not FIPS, error
   877  			if n.config.JoinAddr != "" && isMandatoryFIPSClusterJoinToken(n.config.JoinToken) && !n.config.FIPS {
   878  				return nil, nil, ErrMandatoryFIPS
   879  			}
   880  
   881  			requestConfig := ca.CertificateRequestConfig{
   882  				Token:        n.config.JoinToken,
   883  				Availability: n.config.Availability,
   884  				ConnBroker:   n.connBroker,
   885  			}
   886  			// If this is a new cluster, we want to name the cluster ID "FIPS-something"
   887  			if n.config.FIPS {
   888  				requestConfig.Organization = generateFIPSClusterID()
   889  			}
   890  			securityConfig, cancel, err = rootCA.CreateSecurityConfig(ctx, krw, requestConfig)
   891  
   892  			if err != nil {
   893  				return nil, nil, err
   894  			}
   895  		}
   896  	}
   897  
   898  	if isMandatoryFIPSClusterID(securityConfig) && !n.config.FIPS {
   899  		return nil, nil, ErrMandatoryFIPS
   900  	}
   901  
   902  	n.Lock()
   903  	n.role = securityConfig.ClientTLSCreds.Role()
   904  	n.nodeID = securityConfig.ClientTLSCreds.NodeID()
   905  	n.roleCond.Broadcast()
   906  	n.Unlock()
   907  
   908  	return securityConfig, cancel, nil
   909  }
   910  
   911  func (n *Node) initManagerConnection(ctx context.Context, ready chan<- struct{}) error {
   912  	opts := []grpc.DialOption{
   913  		grpc.WithUnaryInterceptor(grpc_prometheus.UnaryClientInterceptor),
   914  		grpc.WithStreamInterceptor(grpc_prometheus.StreamClientInterceptor),
   915  		grpc.WithDefaultCallOptions(grpc.MaxCallRecvMsgSize(math.MaxInt32)),
   916  	}
   917  	insecureCreds := credentials.NewTLS(&tls.Config{InsecureSkipVerify: true})
   918  	opts = append(opts, grpc.WithTransportCredentials(insecureCreds))
   919  	addr := n.config.ListenControlAPI
   920  	opts = append(opts, grpc.WithDialer(
   921  		func(addr string, timeout time.Duration) (net.Conn, error) {
   922  			return xnet.DialTimeoutLocal(addr, timeout)
   923  		}))
   924  	conn, err := grpc.Dial(addr, opts...)
   925  	if err != nil {
   926  		return err
   927  	}
   928  	client := api.NewHealthClient(conn)
   929  	for {
   930  		resp, err := client.Check(ctx, &api.HealthCheckRequest{Service: "ControlAPI"})
   931  		if err != nil {
   932  			return err
   933  		}
   934  		if resp.Status == api.HealthCheckResponse_SERVING {
   935  			break
   936  		}
   937  		time.Sleep(500 * time.Millisecond)
   938  	}
   939  	n.setControlSocket(conn)
   940  	if ready != nil {
   941  		close(ready)
   942  	}
   943  	return nil
   944  }
   945  
   946  // waitRole takes a context and a role. it the blocks until the context is
   947  // canceled or the node's role updates to the provided role. returns nil when
   948  // the node has acquired the provided role, or ctx.Err() if the context is
   949  // canceled
   950  func (n *Node) waitRole(ctx context.Context, role string) error {
   951  	n.roleCond.L.Lock()
   952  	if role == n.role {
   953  		n.roleCond.L.Unlock()
   954  		return nil
   955  	}
   956  	finishCh := make(chan struct{})
   957  	defer close(finishCh)
   958  	go func() {
   959  		select {
   960  		case <-finishCh:
   961  		case <-ctx.Done():
   962  			// call broadcast to shutdown this function
   963  			n.roleCond.Broadcast()
   964  		}
   965  	}()
   966  	defer n.roleCond.L.Unlock()
   967  	for role != n.role {
   968  		n.roleCond.Wait()
   969  		select {
   970  		case <-ctx.Done():
   971  			return ctx.Err()
   972  		default:
   973  		}
   974  	}
   975  
   976  	return nil
   977  }
   978  
   979  // runManager runs the manager on this node. It returns a boolean indicating if
   980  // the stoppage was due to a role change, and an error indicating why the
   981  // manager stopped
   982  func (n *Node) runManager(ctx context.Context, securityConfig *ca.SecurityConfig, rootPaths ca.CertPaths, ready chan struct{}, workerRole <-chan struct{}) (bool, error) {
   983  	// First, set up this manager's advertise and listen addresses, if
   984  	// provided. they might not be provided if this node is joining the cluster
   985  	// instead of creating a new one.
   986  	var remoteAPI *manager.RemoteAddrs
   987  	if n.config.ListenRemoteAPI != "" {
   988  		remoteAPI = &manager.RemoteAddrs{
   989  			ListenAddr:    n.config.ListenRemoteAPI,
   990  			AdvertiseAddr: n.config.AdvertiseRemoteAPI,
   991  		}
   992  	}
   993  
   994  	joinAddr := n.config.JoinAddr
   995  	if joinAddr == "" {
   996  		remoteAddr, err := n.remotes.Select(n.NodeID())
   997  		if err == nil {
   998  			joinAddr = remoteAddr.Addr
   999  		}
  1000  	}
  1001  
  1002  	m, err := manager.New(&manager.Config{
  1003  		ForceNewCluster:  n.config.ForceNewCluster,
  1004  		RemoteAPI:        remoteAPI,
  1005  		ControlAPI:       n.config.ListenControlAPI,
  1006  		SecurityConfig:   securityConfig,
  1007  		ExternalCAs:      n.config.ExternalCAs,
  1008  		JoinRaft:         joinAddr,
  1009  		ForceJoin:        n.config.JoinAddr != "",
  1010  		StateDir:         n.config.StateDir,
  1011  		HeartbeatTick:    n.config.HeartbeatTick,
  1012  		ElectionTick:     n.config.ElectionTick,
  1013  		AutoLockManagers: n.config.AutoLockManagers,
  1014  		UnlockKey:        n.unlockKey,
  1015  		Availability:     n.config.Availability,
  1016  		PluginGetter:     n.config.PluginGetter,
  1017  		RootCAPaths:      rootPaths,
  1018  		FIPS:             n.config.FIPS,
  1019  		NetworkConfig:    n.config.NetworkConfig,
  1020  	})
  1021  	if err != nil {
  1022  		return false, err
  1023  	}
  1024  	// The done channel is used to signal that the manager has exited.
  1025  	done := make(chan struct{})
  1026  	// runErr is an error value set by the goroutine that runs the manager
  1027  	var runErr error
  1028  
  1029  	// The context used to start this might have a logger associated with it
  1030  	// that we'd like to reuse, but we don't want to use that context, so we
  1031  	// pass to the goroutine only the logger, and create a new context with
  1032  	//that logger.
  1033  	go func(logger *logrus.Entry) {
  1034  		if err := m.Run(log.WithLogger(context.Background(), logger)); err != nil {
  1035  			runErr = err
  1036  		}
  1037  		close(done)
  1038  	}(log.G(ctx))
  1039  
  1040  	// clearData is set in the select below, and is used to signal why the
  1041  	// manager is stopping, and indicate whether or not to delete raft data and
  1042  	// keys when stopping the manager.
  1043  	var clearData bool
  1044  	defer func() {
  1045  		n.Lock()
  1046  		n.manager = nil
  1047  		n.Unlock()
  1048  		m.Stop(ctx, clearData)
  1049  		<-done
  1050  		n.setControlSocket(nil)
  1051  	}()
  1052  
  1053  	n.Lock()
  1054  	n.manager = m
  1055  	n.Unlock()
  1056  
  1057  	connCtx, connCancel := context.WithCancel(ctx)
  1058  	defer connCancel()
  1059  
  1060  	// launch a goroutine that will manage our local connection to the manager
  1061  	// from the agent. Remember the managerReady channel created way back in
  1062  	// run? This is actually where we close it. Not when the manager starts,
  1063  	// but when a connection to the control socket has been established.
  1064  	go n.initManagerConnection(connCtx, ready)
  1065  
  1066  	// wait for manager stop or for role change
  1067  	// The manager can be stopped one of 4 ways:
  1068  	// 1. The manager may have errored out and returned an error, closing the
  1069  	//    done channel in the process
  1070  	// 2. The node may have been demoted to a worker. In this case, we're gonna
  1071  	//    have to stop the manager ourselves, setting clearData to true so the
  1072  	//    local raft data, certs, keys, etc, are nuked.
  1073  	// 3. The manager may have been booted from raft. This could happen if it's
  1074  	//    removed from the raft quorum but the role update hasn't registered
  1075  	//    yet. The fact that there is more than 1 code path to cause the
  1076  	//    manager to exit is a possible source of bugs.
  1077  	// 4. The context may have been canceled from above, in which case we
  1078  	//    should stop the manager ourselves, but indicate that this is NOT a
  1079  	//    demotion.
  1080  	select {
  1081  	case <-done:
  1082  		return false, runErr
  1083  	case <-workerRole:
  1084  		log.G(ctx).Info("role changed to worker, stopping manager")
  1085  		clearData = true
  1086  	case <-m.RemovedFromRaft():
  1087  		log.G(ctx).Info("manager removed from raft cluster, stopping manager")
  1088  		clearData = true
  1089  	case <-ctx.Done():
  1090  		return false, ctx.Err()
  1091  	}
  1092  	return clearData, nil
  1093  }
  1094  
  1095  // superviseManager controls whether or not we are running a manager on this
  1096  // node
  1097  func (n *Node) superviseManager(ctx context.Context, securityConfig *ca.SecurityConfig, rootPaths ca.CertPaths, ready chan struct{}, renewer *ca.TLSRenewer) error {
  1098  	// superviseManager is a loop, because we can come in and out of being a
  1099  	// manager, and need to appropriately handle that without disrupting the
  1100  	// node functionality.
  1101  	for {
  1102  		// if we're not a manager, we're just gonna park here and wait until we
  1103  		// are. For normal agent nodes, we'll stay here forever, as intended.
  1104  		if err := n.waitRole(ctx, ca.ManagerRole); err != nil {
  1105  			return err
  1106  		}
  1107  
  1108  		// Once we know we are a manager, we get ourselves ready for when we
  1109  		// lose that role. we create a channel to signal that we've become a
  1110  		// worker, and close it when n.waitRole completes.
  1111  		workerRole := make(chan struct{})
  1112  		waitRoleCtx, waitRoleCancel := context.WithCancel(ctx)
  1113  		go func() {
  1114  			if n.waitRole(waitRoleCtx, ca.WorkerRole) == nil {
  1115  				close(workerRole)
  1116  			}
  1117  		}()
  1118  
  1119  		// the ready channel passed to superviseManager is in turn passed down
  1120  		// to the runManager function. It's used to signal to the caller that
  1121  		// the manager has started.
  1122  		wasRemoved, err := n.runManager(ctx, securityConfig, rootPaths, ready, workerRole)
  1123  		if err != nil {
  1124  			waitRoleCancel()
  1125  			return errors.Wrap(err, "manager stopped")
  1126  		}
  1127  
  1128  		// If the manager stopped running and our role is still
  1129  		// "manager", it's possible that the manager was demoted and
  1130  		// the agent hasn't realized this yet. We should wait for the
  1131  		// role to change instead of restarting the manager immediately.
  1132  		err = func() error {
  1133  			timer := time.NewTimer(roleChangeTimeout)
  1134  			defer timer.Stop()
  1135  			defer waitRoleCancel()
  1136  
  1137  			select {
  1138  			case <-timer.C:
  1139  			case <-workerRole:
  1140  				return nil
  1141  			case <-ctx.Done():
  1142  				return ctx.Err()
  1143  			}
  1144  
  1145  			if !wasRemoved {
  1146  				log.G(ctx).Warn("failed to get worker role after manager stop, restarting manager")
  1147  				return nil
  1148  			}
  1149  			// We need to be extra careful about restarting the
  1150  			// manager. It may cause the node to wrongly join under
  1151  			// a new Raft ID. Since we didn't see a role change
  1152  			// yet, force a certificate renewal. If the certificate
  1153  			// comes back with a worker role, we know we shouldn't
  1154  			// restart the manager. However, if we don't see
  1155  			// workerRole get closed, it means we didn't switch to
  1156  			// a worker certificate, either because we couldn't
  1157  			// contact a working CA, or because we've been
  1158  			// re-promoted. In this case, we must assume we were
  1159  			// re-promoted, and restart the manager.
  1160  			log.G(ctx).Warn("failed to get worker role after manager stop, forcing certificate renewal")
  1161  
  1162  			// We can safely reset this timer without stopping/draining the timer
  1163  			// first because the only way the code has reached this point is if the timer
  1164  			// has already expired - if the role changed or the context were canceled,
  1165  			// then we would have returned already.
  1166  			timer.Reset(roleChangeTimeout)
  1167  
  1168  			renewer.Renew()
  1169  
  1170  			// Now that the renewal request has been sent to the
  1171  			// renewal goroutine, wait for a change in role.
  1172  			select {
  1173  			case <-timer.C:
  1174  				log.G(ctx).Warn("failed to get worker role after manager stop, restarting manager")
  1175  			case <-workerRole:
  1176  			case <-ctx.Done():
  1177  				return ctx.Err()
  1178  			}
  1179  			return nil
  1180  		}()
  1181  		if err != nil {
  1182  			return err
  1183  		}
  1184  
  1185  		// set ready to nil after the first time we've gone through this, as we
  1186  		// don't need to signal after the first time that the manager is ready.
  1187  		ready = nil
  1188  	}
  1189  }
  1190  
  1191  // DowngradeKey reverts the node key to older format so that it can
  1192  // run on older version of swarmkit
  1193  func (n *Node) DowngradeKey() error {
  1194  	paths := ca.NewConfigPaths(filepath.Join(n.config.StateDir, certDirectory))
  1195  	krw := ca.NewKeyReadWriter(paths.Node, n.config.UnlockKey, nil)
  1196  
  1197  	return krw.DowngradeKey()
  1198  }
  1199  
  1200  type persistentRemotes struct {
  1201  	sync.RWMutex
  1202  	c *sync.Cond
  1203  	remotes.Remotes
  1204  	storePath      string
  1205  	lastSavedState []api.Peer
  1206  }
  1207  
  1208  func newPersistentRemotes(f string, peers ...api.Peer) *persistentRemotes {
  1209  	pr := &persistentRemotes{
  1210  		storePath: f,
  1211  		Remotes:   remotes.NewRemotes(peers...),
  1212  	}
  1213  	pr.c = sync.NewCond(pr.RLocker())
  1214  	return pr
  1215  }
  1216  
  1217  func (s *persistentRemotes) Observe(peer api.Peer, weight int) {
  1218  	s.Lock()
  1219  	defer s.Unlock()
  1220  	s.Remotes.Observe(peer, weight)
  1221  	s.c.Broadcast()
  1222  	if err := s.save(); err != nil {
  1223  		logrus.Errorf("error writing cluster state file: %v", err)
  1224  	}
  1225  }
  1226  
  1227  func (s *persistentRemotes) Remove(peers ...api.Peer) {
  1228  	s.Lock()
  1229  	defer s.Unlock()
  1230  	s.Remotes.Remove(peers...)
  1231  	if err := s.save(); err != nil {
  1232  		logrus.Errorf("error writing cluster state file: %v", err)
  1233  	}
  1234  }
  1235  
  1236  func (s *persistentRemotes) save() error {
  1237  	weights := s.Weights()
  1238  	remotes := make([]api.Peer, 0, len(weights))
  1239  	for r := range weights {
  1240  		remotes = append(remotes, r)
  1241  	}
  1242  	sort.Sort(sortablePeers(remotes))
  1243  	if reflect.DeepEqual(remotes, s.lastSavedState) {
  1244  		return nil
  1245  	}
  1246  	dt, err := json.Marshal(remotes)
  1247  	if err != nil {
  1248  		return err
  1249  	}
  1250  	s.lastSavedState = remotes
  1251  	return ioutils.AtomicWriteFile(s.storePath, dt, 0600)
  1252  }
  1253  
  1254  // WaitSelect waits until at least one remote becomes available and then selects one.
  1255  func (s *persistentRemotes) WaitSelect(ctx context.Context) <-chan api.Peer {
  1256  	c := make(chan api.Peer, 1)
  1257  	s.RLock()
  1258  	done := make(chan struct{})
  1259  	go func() {
  1260  		select {
  1261  		case <-ctx.Done():
  1262  			s.c.Broadcast()
  1263  		case <-done:
  1264  		}
  1265  	}()
  1266  	go func() {
  1267  		defer s.RUnlock()
  1268  		defer close(c)
  1269  		defer close(done)
  1270  		for {
  1271  			if ctx.Err() != nil {
  1272  				return
  1273  			}
  1274  			p, err := s.Select()
  1275  			if err == nil {
  1276  				c <- p
  1277  				return
  1278  			}
  1279  			s.c.Wait()
  1280  		}
  1281  	}()
  1282  	return c
  1283  }
  1284  
  1285  // sortablePeers is a sort wrapper for []api.Peer
  1286  type sortablePeers []api.Peer
  1287  
  1288  func (sp sortablePeers) Less(i, j int) bool { return sp[i].NodeID < sp[j].NodeID }
  1289  
  1290  func (sp sortablePeers) Len() int { return len(sp) }
  1291  
  1292  func (sp sortablePeers) Swap(i, j int) { sp[i], sp[j] = sp[j], sp[i] }
  1293  
  1294  // firstSessionErrorTracker is a utility that helps determine whether the agent should exit after
  1295  // a TLS failure on establishing the first session.  This should only happen if a join address
  1296  // is specified.  If establishing the first session succeeds, but later on some session fails
  1297  // because of a TLS error, we don't want to exit the agent because a previously successful
  1298  // session indicates that the TLS error may be a transient issue.
  1299  type firstSessionErrorTracker struct {
  1300  	mu               sync.Mutex
  1301  	pastFirstSession bool
  1302  	err              error
  1303  }
  1304  
  1305  func (fs *firstSessionErrorTracker) SessionEstablished() {
  1306  	fs.mu.Lock()
  1307  	fs.pastFirstSession = true
  1308  	fs.mu.Unlock()
  1309  }
  1310  
  1311  func (fs *firstSessionErrorTracker) SessionError(err error) {
  1312  	fs.mu.Lock()
  1313  	fs.err = err
  1314  	fs.mu.Unlock()
  1315  }
  1316  
  1317  // SessionClosed returns an error if we haven't yet established a session, and
  1318  // we get a gprc error as a result of an X509 failure.
  1319  func (fs *firstSessionErrorTracker) SessionClosed() error {
  1320  	fs.mu.Lock()
  1321  	defer fs.mu.Unlock()
  1322  
  1323  	// if we've successfully established at least 1 session, never return
  1324  	// errors
  1325  	if fs.pastFirstSession {
  1326  		return nil
  1327  	}
  1328  
  1329  	// get the GRPC status from the error, because we only care about GRPC
  1330  	// errors
  1331  	grpcStatus, ok := status.FromError(fs.err)
  1332  	// if this isn't a GRPC error, it's not an error we return from this method
  1333  	if !ok {
  1334  		return nil
  1335  	}
  1336  
  1337  	// NOTE(dperny, cyli): grpc does not expose the error type, which means we have
  1338  	// to string matching to figure out if it's an x509 error.
  1339  	//
  1340  	// The error we're looking for has "connection error:", then says
  1341  	// "transport:" and finally has "x509:"
  1342  	// specifically, the connection error description reads:
  1343  	//
  1344  	//   transport: authentication handshake failed: x509: certificate signed by unknown authority
  1345  	//
  1346  	// This string matching has caused trouble in the past. specifically, at
  1347  	// some point between grpc versions 1.3.0 and 1.7.5, the string we were
  1348  	// matching changed from "transport: x509" to "transport: authentication
  1349  	// handshake failed: x509", which was an issue because we were matching for
  1350  	// string "transport: x509:".
  1351  	//
  1352  	// In GRPC >= 1.10.x, transient errors like TLS errors became hidden by the
  1353  	// load balancing that GRPC does.  In GRPC 1.11.x, they were exposed again
  1354  	// (usually) in RPC calls, but the error string then became:
  1355  	// rpc error: code = Unavailable desc = all SubConns are in TransientFailure, latest connection error: connection error: desc = "transport: authentication handshake failed: x509: certificate signed by unknown authority"
  1356  	//
  1357  	// It also went from an Internal error to an Unavailable error.  So we're just going
  1358  	// to search for the string: "transport: authentication handshake failed: x509:" since
  1359  	// we want to fail for ALL x509 failures, not just unknown authority errors.
  1360  
  1361  	if !strings.Contains(grpcStatus.Message(), "connection error") ||
  1362  		!strings.Contains(grpcStatus.Message(), "transport: authentication handshake failed: x509:") {
  1363  		return nil
  1364  	}
  1365  	return fs.err
  1366  }