github.com/kaisenlinux/docker.io@v0.0.0-20230510090727-ea55db55fac7/swarmkit/manager/manager.go (about)

     1  package manager
     2  
     3  import (
     4  	"context"
     5  	"crypto/tls"
     6  	"fmt"
     7  	"math"
     8  	"net"
     9  	"os"
    10  	"path/filepath"
    11  	"runtime"
    12  	"sync"
    13  	"syscall"
    14  	"time"
    15  
    16  	"github.com/docker/docker/pkg/plugingetter"
    17  	"github.com/docker/go-events"
    18  	gmetrics "github.com/docker/go-metrics"
    19  	"github.com/docker/swarmkit/api"
    20  	"github.com/docker/swarmkit/ca"
    21  	"github.com/docker/swarmkit/connectionbroker"
    22  	"github.com/docker/swarmkit/identity"
    23  	"github.com/docker/swarmkit/log"
    24  	"github.com/docker/swarmkit/manager/allocator"
    25  	"github.com/docker/swarmkit/manager/allocator/cnmallocator"
    26  	"github.com/docker/swarmkit/manager/allocator/networkallocator"
    27  	"github.com/docker/swarmkit/manager/controlapi"
    28  	"github.com/docker/swarmkit/manager/dispatcher"
    29  	"github.com/docker/swarmkit/manager/drivers"
    30  	"github.com/docker/swarmkit/manager/health"
    31  	"github.com/docker/swarmkit/manager/keymanager"
    32  	"github.com/docker/swarmkit/manager/logbroker"
    33  	"github.com/docker/swarmkit/manager/metrics"
    34  	"github.com/docker/swarmkit/manager/orchestrator/constraintenforcer"
    35  	"github.com/docker/swarmkit/manager/orchestrator/global"
    36  	"github.com/docker/swarmkit/manager/orchestrator/jobs"
    37  	"github.com/docker/swarmkit/manager/orchestrator/replicated"
    38  	"github.com/docker/swarmkit/manager/orchestrator/taskreaper"
    39  	"github.com/docker/swarmkit/manager/resourceapi"
    40  	"github.com/docker/swarmkit/manager/scheduler"
    41  	"github.com/docker/swarmkit/manager/state/raft"
    42  	"github.com/docker/swarmkit/manager/state/raft/transport"
    43  	"github.com/docker/swarmkit/manager/state/store"
    44  	"github.com/docker/swarmkit/manager/watchapi"
    45  	"github.com/docker/swarmkit/remotes"
    46  	"github.com/docker/swarmkit/xnet"
    47  	gogotypes "github.com/gogo/protobuf/types"
    48  	grpc_prometheus "github.com/grpc-ecosystem/go-grpc-prometheus"
    49  	"github.com/pkg/errors"
    50  	"github.com/sirupsen/logrus"
    51  	"google.golang.org/grpc"
    52  	"google.golang.org/grpc/credentials"
    53  )
    54  
    55  const (
    56  	// defaultTaskHistoryRetentionLimit is the number of tasks to keep.
    57  	defaultTaskHistoryRetentionLimit = 5
    58  )
    59  
    60  // RemoteAddrs provides a listening address and an optional advertise address
    61  // for serving the remote API.
    62  type RemoteAddrs struct {
    63  	// Address to bind
    64  	ListenAddr string
    65  
    66  	// Address to advertise to remote nodes (optional).
    67  	AdvertiseAddr string
    68  }
    69  
    70  // Config is used to tune the Manager.
    71  type Config struct {
    72  	SecurityConfig *ca.SecurityConfig
    73  
    74  	// RootCAPaths is the path to which new root certs should be save
    75  	RootCAPaths ca.CertPaths
    76  
    77  	// ExternalCAs is a list of initial CAs to which a manager node
    78  	// will make certificate signing requests for node certificates.
    79  	ExternalCAs []*api.ExternalCA
    80  
    81  	// ControlAPI is an address for serving the control API.
    82  	ControlAPI string
    83  
    84  	// RemoteAPI is a listening address for serving the remote API, and
    85  	// an optional advertise address.
    86  	RemoteAPI *RemoteAddrs
    87  
    88  	// JoinRaft is an optional address of a node in an existing raft
    89  	// cluster to join.
    90  	JoinRaft string
    91  
    92  	// ForceJoin causes us to invoke raft's Join RPC even if already part
    93  	// of a cluster.
    94  	ForceJoin bool
    95  
    96  	// StateDir is the top-level state directory
    97  	StateDir string
    98  
    99  	// ForceNewCluster defines if we have to force a new cluster
   100  	// because we are recovering from a backup data directory.
   101  	ForceNewCluster bool
   102  
   103  	// ElectionTick defines the amount of ticks needed without
   104  	// leader to trigger a new election
   105  	ElectionTick uint32
   106  
   107  	// HeartbeatTick defines the amount of ticks between each
   108  	// heartbeat sent to other members for health-check purposes
   109  	HeartbeatTick uint32
   110  
   111  	// AutoLockManagers determines whether or not managers require an unlock key
   112  	// when starting from a stopped state.  This configuration parameter is only
   113  	// applicable when bootstrapping a new cluster for the first time.
   114  	AutoLockManagers bool
   115  
   116  	// UnlockKey is the key to unlock a node - used for decrypting manager TLS keys
   117  	// as well as the raft data encryption key (DEK).  It is applicable when
   118  	// bootstrapping a cluster for the first time (it's a cluster-wide setting),
   119  	// and also when loading up any raft data on disk (as a KEK for the raft DEK).
   120  	UnlockKey []byte
   121  
   122  	// Availability allows a user to control the current scheduling status of a node
   123  	Availability api.NodeSpec_Availability
   124  
   125  	// PluginGetter provides access to docker's plugin inventory.
   126  	PluginGetter plugingetter.PluginGetter
   127  
   128  	// FIPS is a boolean stating whether the node is FIPS enabled - if this is the
   129  	// first node in the cluster, this setting is used to set the cluster-wide mandatory
   130  	// FIPS setting.
   131  	FIPS bool
   132  
   133  	// NetworkConfig stores network related config for the cluster
   134  	NetworkConfig *cnmallocator.NetworkConfig
   135  }
   136  
   137  // Manager is the cluster manager for Swarm.
   138  // This is the high-level object holding and initializing all the manager
   139  // subsystems.
   140  type Manager struct {
   141  	config Config
   142  
   143  	collector              *metrics.Collector
   144  	caserver               *ca.Server
   145  	dispatcher             *dispatcher.Dispatcher
   146  	logbroker              *logbroker.LogBroker
   147  	watchServer            *watchapi.Server
   148  	replicatedOrchestrator *replicated.Orchestrator
   149  	globalOrchestrator     *global.Orchestrator
   150  	jobsOrchestrator       *jobs.Orchestrator
   151  	taskReaper             *taskreaper.TaskReaper
   152  	constraintEnforcer     *constraintenforcer.ConstraintEnforcer
   153  	scheduler              *scheduler.Scheduler
   154  	allocator              *allocator.Allocator
   155  	keyManager             *keymanager.KeyManager
   156  	server                 *grpc.Server
   157  	localserver            *grpc.Server
   158  	raftNode               *raft.Node
   159  	dekRotator             *RaftDEKManager
   160  	roleManager            *roleManager
   161  
   162  	cancelFunc context.CancelFunc
   163  
   164  	// mu is a general mutex used to coordinate starting/stopping and
   165  	// leadership events.
   166  	mu sync.Mutex
   167  	// addrMu is a mutex that protects config.ControlAPI and config.RemoteAPI
   168  	addrMu sync.Mutex
   169  
   170  	started chan struct{}
   171  	stopped bool
   172  
   173  	remoteListener  chan net.Listener
   174  	controlListener chan net.Listener
   175  	errServe        chan error
   176  }
   177  
   178  var (
   179  	leaderMetric gmetrics.Gauge
   180  )
   181  
   182  func init() {
   183  	ns := gmetrics.NewNamespace("swarm", "manager", nil)
   184  	leaderMetric = ns.NewGauge("leader", "Indicates if this manager node is a leader", "")
   185  	gmetrics.Register(ns)
   186  }
   187  
   188  type closeOnceListener struct {
   189  	once sync.Once
   190  	net.Listener
   191  }
   192  
   193  func (l *closeOnceListener) Close() error {
   194  	var err error
   195  	l.once.Do(func() {
   196  		err = l.Listener.Close()
   197  	})
   198  	return err
   199  }
   200  
   201  // New creates a Manager which has not started to accept requests yet.
   202  func New(config *Config) (*Manager, error) {
   203  	err := os.MkdirAll(config.StateDir, 0700)
   204  	if err != nil {
   205  		return nil, errors.Wrap(err, "failed to create state directory")
   206  	}
   207  
   208  	raftStateDir := filepath.Join(config.StateDir, "raft")
   209  	err = os.MkdirAll(raftStateDir, 0700)
   210  	if err != nil {
   211  		return nil, errors.Wrap(err, "failed to create raft state directory")
   212  	}
   213  
   214  	raftCfg := raft.DefaultNodeConfig()
   215  
   216  	if config.ElectionTick > 0 {
   217  		raftCfg.ElectionTick = int(config.ElectionTick)
   218  	}
   219  	if config.HeartbeatTick > 0 {
   220  		raftCfg.HeartbeatTick = int(config.HeartbeatTick)
   221  	}
   222  
   223  	dekRotator, err := NewRaftDEKManager(config.SecurityConfig.KeyWriter(), config.FIPS)
   224  	if err != nil {
   225  		return nil, err
   226  	}
   227  
   228  	newNodeOpts := raft.NodeOptions{
   229  		ID:              config.SecurityConfig.ClientTLSCreds.NodeID(),
   230  		JoinAddr:        config.JoinRaft,
   231  		ForceJoin:       config.ForceJoin,
   232  		Config:          raftCfg,
   233  		StateDir:        raftStateDir,
   234  		ForceNewCluster: config.ForceNewCluster,
   235  		TLSCredentials:  config.SecurityConfig.ClientTLSCreds,
   236  		KeyRotator:      dekRotator,
   237  		FIPS:            config.FIPS,
   238  	}
   239  	raftNode := raft.NewNode(newNodeOpts)
   240  
   241  	// the interceptorWrappers are functions that wrap the prometheus grpc
   242  	// interceptor, and add some of code to log errors locally. one for stream
   243  	// and one for unary. this is needed because the grpc unary interceptor
   244  	// doesn't natively do chaining, you have to implement it in the caller.
   245  	// note that even though these are logging errors, we're still using
   246  	// debug level. returning errors from GRPC methods is common and expected,
   247  	// and logging an ERROR every time a user mistypes a service name would
   248  	// pollute the logs really fast.
   249  	//
   250  	// NOTE(dperny): Because of the fact that these functions are very simple
   251  	// in their operation and have no side effects other than the log output,
   252  	// they are not automatically tested. If you modify them later, make _sure_
   253  	// that they are correct. If you add substantial side effects, abstract
   254  	// these out and test them!
   255  	unaryInterceptorWrapper := func(ctx context.Context, req interface{}, info *grpc.UnaryServerInfo, handler grpc.UnaryHandler) (interface{}, error) {
   256  		// pass the call down into the grpc_prometheus interceptor
   257  		resp, err := grpc_prometheus.UnaryServerInterceptor(ctx, req, info, handler)
   258  		if err != nil {
   259  			log.G(ctx).WithField("rpc", info.FullMethod).WithError(err).Debug("error handling rpc")
   260  		}
   261  		return resp, err
   262  	}
   263  
   264  	streamInterceptorWrapper := func(srv interface{}, ss grpc.ServerStream, info *grpc.StreamServerInfo, handler grpc.StreamHandler) error {
   265  		// we can't re-write a stream context, so don't bother creating a
   266  		// sub-context like in unary methods
   267  		// pass the call down into the grpc_prometheus interceptor
   268  		err := grpc_prometheus.StreamServerInterceptor(srv, ss, info, handler)
   269  		if err != nil {
   270  			log.G(ss.Context()).WithField("rpc", info.FullMethod).WithError(err).Debug("error handling streaming rpc")
   271  		}
   272  		return err
   273  	}
   274  
   275  	opts := []grpc.ServerOption{
   276  		grpc.Creds(config.SecurityConfig.ServerTLSCreds),
   277  		grpc.StreamInterceptor(streamInterceptorWrapper),
   278  		grpc.UnaryInterceptor(unaryInterceptorWrapper),
   279  		grpc.MaxRecvMsgSize(transport.GRPCMaxMsgSize),
   280  	}
   281  
   282  	m := &Manager{
   283  		config:          *config,
   284  		caserver:        ca.NewServer(raftNode.MemoryStore(), config.SecurityConfig),
   285  		dispatcher:      dispatcher.New(),
   286  		logbroker:       logbroker.New(raftNode.MemoryStore()),
   287  		watchServer:     watchapi.NewServer(raftNode.MemoryStore()),
   288  		server:          grpc.NewServer(opts...),
   289  		localserver:     grpc.NewServer(opts...),
   290  		raftNode:        raftNode,
   291  		started:         make(chan struct{}),
   292  		dekRotator:      dekRotator,
   293  		remoteListener:  make(chan net.Listener, 1),
   294  		controlListener: make(chan net.Listener, 1),
   295  		errServe:        make(chan error, 2),
   296  	}
   297  
   298  	if config.ControlAPI != "" {
   299  		m.config.ControlAPI = ""
   300  		if err := m.BindControl(config.ControlAPI); err != nil {
   301  			return nil, err
   302  		}
   303  	}
   304  
   305  	if config.RemoteAPI != nil {
   306  		m.config.RemoteAPI = nil
   307  		// The context isn't used in this case (before (*Manager).Run).
   308  		if err := m.BindRemote(context.Background(), *config.RemoteAPI); err != nil {
   309  			if config.ControlAPI != "" {
   310  				l := <-m.controlListener
   311  				l.Close()
   312  			}
   313  			return nil, err
   314  		}
   315  	}
   316  
   317  	return m, nil
   318  }
   319  
   320  // BindControl binds a local socket for the control API.
   321  func (m *Manager) BindControl(addr string) error {
   322  	m.addrMu.Lock()
   323  	defer m.addrMu.Unlock()
   324  
   325  	if m.config.ControlAPI != "" {
   326  		return errors.New("manager already has a control API address")
   327  	}
   328  
   329  	// don't create a socket directory if we're on windows. we used named pipe
   330  	if runtime.GOOS != "windows" {
   331  		err := os.MkdirAll(filepath.Dir(addr), 0700)
   332  		if err != nil {
   333  			return errors.Wrap(err, "failed to create socket directory")
   334  		}
   335  	}
   336  
   337  	l, err := xnet.ListenLocal(addr)
   338  
   339  	// A unix socket may fail to bind if the file already
   340  	// exists. Try replacing the file.
   341  	if runtime.GOOS != "windows" {
   342  		unwrappedErr := err
   343  		if op, ok := unwrappedErr.(*net.OpError); ok {
   344  			unwrappedErr = op.Err
   345  		}
   346  		if sys, ok := unwrappedErr.(*os.SyscallError); ok {
   347  			unwrappedErr = sys.Err
   348  		}
   349  		if unwrappedErr == syscall.EADDRINUSE {
   350  			os.Remove(addr)
   351  			l, err = xnet.ListenLocal(addr)
   352  		}
   353  	}
   354  	if err != nil {
   355  		return errors.Wrap(err, "failed to listen on control API address")
   356  	}
   357  
   358  	m.config.ControlAPI = addr
   359  	m.controlListener <- l
   360  	return nil
   361  }
   362  
   363  // BindRemote binds a port for the remote API.
   364  func (m *Manager) BindRemote(ctx context.Context, addrs RemoteAddrs) error {
   365  	m.addrMu.Lock()
   366  	defer m.addrMu.Unlock()
   367  
   368  	if m.config.RemoteAPI != nil {
   369  		return errors.New("manager already has remote API address")
   370  	}
   371  
   372  	// If an AdvertiseAddr was specified, we use that as our
   373  	// externally-reachable address.
   374  	advertiseAddr := addrs.AdvertiseAddr
   375  
   376  	var advertiseAddrPort string
   377  	if advertiseAddr == "" {
   378  		// Otherwise, we know we are joining an existing swarm. Use a
   379  		// wildcard address to trigger remote autodetection of our
   380  		// address.
   381  		var err error
   382  		_, advertiseAddrPort, err = net.SplitHostPort(addrs.ListenAddr)
   383  		if err != nil {
   384  			return fmt.Errorf("missing or invalid listen address %s", addrs.ListenAddr)
   385  		}
   386  
   387  		// Even with an IPv6 listening address, it's okay to use
   388  		// 0.0.0.0 here. Any "unspecified" (wildcard) IP will
   389  		// be substituted with the actual source address.
   390  		advertiseAddr = net.JoinHostPort("0.0.0.0", advertiseAddrPort)
   391  	}
   392  
   393  	l, err := net.Listen("tcp", addrs.ListenAddr)
   394  	if err != nil {
   395  		return errors.Wrap(err, "failed to listen on remote API address")
   396  	}
   397  	if advertiseAddrPort == "0" {
   398  		advertiseAddr = l.Addr().String()
   399  		addrs.ListenAddr = advertiseAddr
   400  	}
   401  
   402  	m.config.RemoteAPI = &addrs
   403  
   404  	m.raftNode.SetAddr(ctx, advertiseAddr)
   405  	m.remoteListener <- l
   406  
   407  	return nil
   408  }
   409  
   410  // RemovedFromRaft returns a channel that's closed if the manager is removed
   411  // from the raft cluster. This should be used to trigger a manager shutdown.
   412  func (m *Manager) RemovedFromRaft() <-chan struct{} {
   413  	return m.raftNode.RemovedFromRaft
   414  }
   415  
   416  // Addr returns tcp address on which remote api listens.
   417  func (m *Manager) Addr() string {
   418  	m.addrMu.Lock()
   419  	defer m.addrMu.Unlock()
   420  
   421  	if m.config.RemoteAPI == nil {
   422  		return ""
   423  	}
   424  	return m.config.RemoteAPI.ListenAddr
   425  }
   426  
   427  // Run starts all manager sub-systems and the gRPC server at the configured
   428  // address.
   429  // The call never returns unless an error occurs or `Stop()` is called.
   430  func (m *Manager) Run(parent context.Context) error {
   431  	ctx, ctxCancel := context.WithCancel(parent)
   432  	defer ctxCancel()
   433  
   434  	m.cancelFunc = ctxCancel
   435  
   436  	leadershipCh, cancel := m.raftNode.SubscribeLeadership()
   437  	defer cancel()
   438  
   439  	go m.handleLeadershipEvents(ctx, leadershipCh)
   440  
   441  	authorize := func(ctx context.Context, roles []string) error {
   442  		var (
   443  			blacklistedCerts map[string]*api.BlacklistedCertificate
   444  			clusters         []*api.Cluster
   445  			err              error
   446  		)
   447  
   448  		m.raftNode.MemoryStore().View(func(readTx store.ReadTx) {
   449  			clusters, err = store.FindClusters(readTx, store.ByName(store.DefaultClusterName))
   450  
   451  		})
   452  
   453  		// Not having a cluster object yet means we can't check
   454  		// the blacklist.
   455  		if err == nil && len(clusters) == 1 {
   456  			blacklistedCerts = clusters[0].BlacklistedCertificates
   457  		}
   458  
   459  		// Authorize the remote roles, ensure they can only be forwarded by managers
   460  		_, err = ca.AuthorizeForwardedRoleAndOrg(ctx, roles, []string{ca.ManagerRole}, m.config.SecurityConfig.ClientTLSCreds.Organization(), blacklistedCerts)
   461  		return err
   462  	}
   463  
   464  	baseControlAPI := controlapi.NewServer(m.raftNode.MemoryStore(), m.raftNode, m.config.SecurityConfig, m.config.PluginGetter, drivers.New(m.config.PluginGetter))
   465  	baseResourceAPI := resourceapi.New(m.raftNode.MemoryStore())
   466  	healthServer := health.NewHealthServer()
   467  	localHealthServer := health.NewHealthServer()
   468  
   469  	authenticatedControlAPI := api.NewAuthenticatedWrapperControlServer(baseControlAPI, authorize)
   470  	authenticatedWatchAPI := api.NewAuthenticatedWrapperWatchServer(m.watchServer, authorize)
   471  	authenticatedResourceAPI := api.NewAuthenticatedWrapperResourceAllocatorServer(baseResourceAPI, authorize)
   472  	authenticatedLogsServerAPI := api.NewAuthenticatedWrapperLogsServer(m.logbroker, authorize)
   473  	authenticatedLogBrokerAPI := api.NewAuthenticatedWrapperLogBrokerServer(m.logbroker, authorize)
   474  	authenticatedDispatcherAPI := api.NewAuthenticatedWrapperDispatcherServer(m.dispatcher, authorize)
   475  	authenticatedCAAPI := api.NewAuthenticatedWrapperCAServer(m.caserver, authorize)
   476  	authenticatedNodeCAAPI := api.NewAuthenticatedWrapperNodeCAServer(m.caserver, authorize)
   477  	authenticatedRaftAPI := api.NewAuthenticatedWrapperRaftServer(m.raftNode, authorize)
   478  	authenticatedHealthAPI := api.NewAuthenticatedWrapperHealthServer(healthServer, authorize)
   479  	authenticatedRaftMembershipAPI := api.NewAuthenticatedWrapperRaftMembershipServer(m.raftNode, authorize)
   480  
   481  	proxyDispatcherAPI := api.NewRaftProxyDispatcherServer(authenticatedDispatcherAPI, m.raftNode, nil, ca.WithMetadataForwardTLSInfo)
   482  	proxyCAAPI := api.NewRaftProxyCAServer(authenticatedCAAPI, m.raftNode, nil, ca.WithMetadataForwardTLSInfo)
   483  	proxyNodeCAAPI := api.NewRaftProxyNodeCAServer(authenticatedNodeCAAPI, m.raftNode, nil, ca.WithMetadataForwardTLSInfo)
   484  	proxyRaftMembershipAPI := api.NewRaftProxyRaftMembershipServer(authenticatedRaftMembershipAPI, m.raftNode, nil, ca.WithMetadataForwardTLSInfo)
   485  	proxyResourceAPI := api.NewRaftProxyResourceAllocatorServer(authenticatedResourceAPI, m.raftNode, nil, ca.WithMetadataForwardTLSInfo)
   486  	proxyLogBrokerAPI := api.NewRaftProxyLogBrokerServer(authenticatedLogBrokerAPI, m.raftNode, nil, ca.WithMetadataForwardTLSInfo)
   487  
   488  	// The following local proxies are only wired up to receive requests
   489  	// from a trusted local socket, and these requests don't use TLS,
   490  	// therefore the requests they handle locally should bypass
   491  	// authorization. When requests are proxied from these servers, they
   492  	// are sent as requests from this manager rather than forwarded
   493  	// requests (it has no TLS information to put in the metadata map).
   494  	forwardAsOwnRequest := func(ctx context.Context) (context.Context, error) { return ctx, nil }
   495  	handleRequestLocally := func(ctx context.Context) (context.Context, error) {
   496  		remoteAddr := "127.0.0.1:0"
   497  
   498  		m.addrMu.Lock()
   499  		if m.config.RemoteAPI != nil {
   500  			if m.config.RemoteAPI.AdvertiseAddr != "" {
   501  				remoteAddr = m.config.RemoteAPI.AdvertiseAddr
   502  			} else {
   503  				remoteAddr = m.config.RemoteAPI.ListenAddr
   504  			}
   505  		}
   506  		m.addrMu.Unlock()
   507  
   508  		creds := m.config.SecurityConfig.ClientTLSCreds
   509  
   510  		nodeInfo := ca.RemoteNodeInfo{
   511  			Roles:        []string{creds.Role()},
   512  			Organization: creds.Organization(),
   513  			NodeID:       creds.NodeID(),
   514  			RemoteAddr:   remoteAddr,
   515  		}
   516  
   517  		return context.WithValue(ctx, ca.LocalRequestKey, nodeInfo), nil
   518  	}
   519  	localProxyControlAPI := api.NewRaftProxyControlServer(baseControlAPI, m.raftNode, handleRequestLocally, forwardAsOwnRequest)
   520  	localProxyLogsAPI := api.NewRaftProxyLogsServer(m.logbroker, m.raftNode, handleRequestLocally, forwardAsOwnRequest)
   521  	localProxyDispatcherAPI := api.NewRaftProxyDispatcherServer(m.dispatcher, m.raftNode, handleRequestLocally, forwardAsOwnRequest)
   522  	localProxyCAAPI := api.NewRaftProxyCAServer(m.caserver, m.raftNode, handleRequestLocally, forwardAsOwnRequest)
   523  	localProxyNodeCAAPI := api.NewRaftProxyNodeCAServer(m.caserver, m.raftNode, handleRequestLocally, forwardAsOwnRequest)
   524  	localProxyResourceAPI := api.NewRaftProxyResourceAllocatorServer(baseResourceAPI, m.raftNode, handleRequestLocally, forwardAsOwnRequest)
   525  	localProxyLogBrokerAPI := api.NewRaftProxyLogBrokerServer(m.logbroker, m.raftNode, handleRequestLocally, forwardAsOwnRequest)
   526  
   527  	// Everything registered on m.server should be an authenticated
   528  	// wrapper, or a proxy wrapping an authenticated wrapper!
   529  	api.RegisterCAServer(m.server, proxyCAAPI)
   530  	api.RegisterNodeCAServer(m.server, proxyNodeCAAPI)
   531  	api.RegisterRaftServer(m.server, authenticatedRaftAPI)
   532  	api.RegisterHealthServer(m.server, authenticatedHealthAPI)
   533  	api.RegisterRaftMembershipServer(m.server, proxyRaftMembershipAPI)
   534  	api.RegisterControlServer(m.server, authenticatedControlAPI)
   535  	api.RegisterWatchServer(m.server, authenticatedWatchAPI)
   536  	api.RegisterLogsServer(m.server, authenticatedLogsServerAPI)
   537  	api.RegisterLogBrokerServer(m.server, proxyLogBrokerAPI)
   538  	api.RegisterResourceAllocatorServer(m.server, proxyResourceAPI)
   539  	api.RegisterDispatcherServer(m.server, proxyDispatcherAPI)
   540  	grpc_prometheus.Register(m.server)
   541  
   542  	api.RegisterControlServer(m.localserver, localProxyControlAPI)
   543  	api.RegisterWatchServer(m.localserver, m.watchServer)
   544  	api.RegisterLogsServer(m.localserver, localProxyLogsAPI)
   545  	api.RegisterHealthServer(m.localserver, localHealthServer)
   546  	api.RegisterDispatcherServer(m.localserver, localProxyDispatcherAPI)
   547  	api.RegisterCAServer(m.localserver, localProxyCAAPI)
   548  	api.RegisterNodeCAServer(m.localserver, localProxyNodeCAAPI)
   549  	api.RegisterResourceAllocatorServer(m.localserver, localProxyResourceAPI)
   550  	api.RegisterLogBrokerServer(m.localserver, localProxyLogBrokerAPI)
   551  	grpc_prometheus.Register(m.localserver)
   552  
   553  	healthServer.SetServingStatus("Raft", api.HealthCheckResponse_NOT_SERVING)
   554  	localHealthServer.SetServingStatus("ControlAPI", api.HealthCheckResponse_NOT_SERVING)
   555  
   556  	if err := m.watchServer.Start(ctx); err != nil {
   557  		log.G(ctx).WithError(err).Error("watch server failed to start")
   558  	}
   559  
   560  	go m.serveListener(ctx, m.remoteListener)
   561  	go m.serveListener(ctx, m.controlListener)
   562  
   563  	defer func() {
   564  		m.server.Stop()
   565  		m.localserver.Stop()
   566  	}()
   567  
   568  	// Set the raft server as serving for the health server
   569  	healthServer.SetServingStatus("Raft", api.HealthCheckResponse_SERVING)
   570  
   571  	if err := m.raftNode.JoinAndStart(ctx); err != nil {
   572  		// Don't block future calls to Stop.
   573  		close(m.started)
   574  		return errors.Wrap(err, "can't initialize raft node")
   575  	}
   576  
   577  	localHealthServer.SetServingStatus("ControlAPI", api.HealthCheckResponse_SERVING)
   578  
   579  	// Start metrics collection.
   580  
   581  	m.collector = metrics.NewCollector(m.raftNode.MemoryStore())
   582  	go func(collector *metrics.Collector) {
   583  		if err := collector.Run(ctx); err != nil {
   584  			log.G(ctx).WithError(err).Error("collector failed with an error")
   585  		}
   586  	}(m.collector)
   587  
   588  	close(m.started)
   589  
   590  	go func() {
   591  		err := m.raftNode.Run(ctx)
   592  		if err != nil {
   593  			log.G(ctx).WithError(err).Error("raft node stopped")
   594  			m.Stop(ctx, false)
   595  		}
   596  	}()
   597  
   598  	if err := raft.WaitForLeader(ctx, m.raftNode); err != nil {
   599  		return err
   600  	}
   601  
   602  	c, err := raft.WaitForCluster(ctx, m.raftNode)
   603  	if err != nil {
   604  		return err
   605  	}
   606  	raftConfig := c.Spec.Raft
   607  
   608  	if err := m.watchForClusterChanges(ctx); err != nil {
   609  		return err
   610  	}
   611  
   612  	if int(raftConfig.ElectionTick) != m.raftNode.Config.ElectionTick {
   613  		log.G(ctx).Warningf("election tick value (%ds) is different from the one defined in the cluster config (%vs), the cluster may be unstable", m.raftNode.Config.ElectionTick, raftConfig.ElectionTick)
   614  	}
   615  	if int(raftConfig.HeartbeatTick) != m.raftNode.Config.HeartbeatTick {
   616  		log.G(ctx).Warningf("heartbeat tick value (%ds) is different from the one defined in the cluster config (%vs), the cluster may be unstable", m.raftNode.Config.HeartbeatTick, raftConfig.HeartbeatTick)
   617  	}
   618  
   619  	// wait for an error in serving.
   620  	err = <-m.errServe
   621  	m.mu.Lock()
   622  	if m.stopped {
   623  		m.mu.Unlock()
   624  		return nil
   625  	}
   626  	m.mu.Unlock()
   627  	m.Stop(ctx, false)
   628  
   629  	return err
   630  }
   631  
   632  const stopTimeout = 8 * time.Second
   633  
   634  // Stop stops the manager. It immediately closes all open connections and
   635  // active RPCs as well as stopping the manager's subsystems. If clearData is
   636  // set, the raft logs, snapshots, and keys will be erased.
   637  func (m *Manager) Stop(ctx context.Context, clearData bool) {
   638  	log.G(ctx).Info("Stopping manager")
   639  	// It's not safe to start shutting down while the manager is still
   640  	// starting up.
   641  	<-m.started
   642  
   643  	// the mutex stops us from trying to stop while we're already stopping, or
   644  	// from returning before we've finished stopping.
   645  	m.mu.Lock()
   646  	defer m.mu.Unlock()
   647  	if m.stopped {
   648  		return
   649  	}
   650  	m.stopped = true
   651  
   652  	srvDone, localSrvDone := make(chan struct{}), make(chan struct{})
   653  	go func() {
   654  		m.server.GracefulStop()
   655  		close(srvDone)
   656  	}()
   657  	go func() {
   658  		m.localserver.GracefulStop()
   659  		close(localSrvDone)
   660  	}()
   661  
   662  	m.raftNode.Cancel()
   663  
   664  	if m.collector != nil {
   665  		m.collector.Stop()
   666  	}
   667  
   668  	// The following components are gRPC services that are
   669  	// registered when creating the manager and will need
   670  	// to be re-registered if they are recreated.
   671  	// For simplicity, they are not nilled out.
   672  	m.dispatcher.Stop()
   673  	m.logbroker.Stop()
   674  	m.watchServer.Stop()
   675  	m.caserver.Stop()
   676  
   677  	if m.allocator != nil {
   678  		m.allocator.Stop()
   679  	}
   680  	if m.replicatedOrchestrator != nil {
   681  		m.replicatedOrchestrator.Stop()
   682  	}
   683  	if m.globalOrchestrator != nil {
   684  		m.globalOrchestrator.Stop()
   685  	}
   686  	if m.jobsOrchestrator != nil {
   687  		m.jobsOrchestrator.Stop()
   688  	}
   689  	if m.taskReaper != nil {
   690  		m.taskReaper.Stop()
   691  	}
   692  	if m.constraintEnforcer != nil {
   693  		m.constraintEnforcer.Stop()
   694  	}
   695  	if m.scheduler != nil {
   696  		m.scheduler.Stop()
   697  	}
   698  	if m.roleManager != nil {
   699  		m.roleManager.Stop()
   700  	}
   701  	if m.keyManager != nil {
   702  		m.keyManager.Stop()
   703  	}
   704  
   705  	if clearData {
   706  		m.raftNode.ClearData()
   707  	}
   708  	m.cancelFunc()
   709  	<-m.raftNode.Done()
   710  
   711  	timer := time.AfterFunc(stopTimeout, func() {
   712  		m.server.Stop()
   713  		m.localserver.Stop()
   714  	})
   715  	defer timer.Stop()
   716  	// TODO: we're not waiting on ctx because it very well could be passed from Run,
   717  	// which is already cancelled here. We need to refactor that.
   718  	select {
   719  	case <-srvDone:
   720  		<-localSrvDone
   721  	case <-localSrvDone:
   722  		<-srvDone
   723  	}
   724  
   725  	log.G(ctx).Info("Manager shut down")
   726  	// mutex is released and Run can return now
   727  }
   728  
   729  func (m *Manager) updateKEK(ctx context.Context, cluster *api.Cluster) error {
   730  	securityConfig := m.config.SecurityConfig
   731  	nodeID := m.config.SecurityConfig.ClientTLSCreds.NodeID()
   732  	logger := log.G(ctx).WithFields(logrus.Fields{
   733  		"node.id":   nodeID,
   734  		"node.role": ca.ManagerRole,
   735  	})
   736  
   737  	kekData := ca.KEKData{Version: cluster.Meta.Version.Index}
   738  	for _, encryptionKey := range cluster.UnlockKeys {
   739  		if encryptionKey.Subsystem == ca.ManagerRole {
   740  			kekData.KEK = encryptionKey.Key
   741  			break
   742  		}
   743  	}
   744  	updated, unlockedToLocked, err := m.dekRotator.MaybeUpdateKEK(kekData)
   745  	if err != nil {
   746  		logger.WithError(err).Errorf("failed to re-encrypt TLS key with a new KEK")
   747  		return err
   748  	}
   749  	if updated {
   750  		logger.Debug("successfully rotated KEK")
   751  	}
   752  	if unlockedToLocked {
   753  		// a best effort attempt to update the TLS certificate - if it fails, it'll be updated the next time it renews;
   754  		// don't wait because it might take a bit
   755  		go func() {
   756  			insecureCreds := credentials.NewTLS(&tls.Config{InsecureSkipVerify: true})
   757  
   758  			conn, err := grpc.Dial(
   759  				m.config.ControlAPI,
   760  				grpc.WithUnaryInterceptor(grpc_prometheus.UnaryClientInterceptor),
   761  				grpc.WithStreamInterceptor(grpc_prometheus.StreamClientInterceptor),
   762  				grpc.WithTransportCredentials(insecureCreds),
   763  				grpc.WithDialer(
   764  					func(addr string, timeout time.Duration) (net.Conn, error) {
   765  						return xnet.DialTimeoutLocal(addr, timeout)
   766  					}),
   767  				grpc.WithDefaultCallOptions(grpc.MaxCallRecvMsgSize(math.MaxInt32)),
   768  			)
   769  			if err != nil {
   770  				logger.WithError(err).Error("failed to connect to local manager socket after locking the cluster")
   771  				return
   772  			}
   773  
   774  			defer conn.Close()
   775  
   776  			connBroker := connectionbroker.New(remotes.NewRemotes())
   777  			connBroker.SetLocalConn(conn)
   778  			if err := ca.RenewTLSConfigNow(ctx, securityConfig, connBroker, m.config.RootCAPaths); err != nil {
   779  				logger.WithError(err).Error("failed to download new TLS certificate after locking the cluster")
   780  			}
   781  		}()
   782  	}
   783  	return nil
   784  }
   785  
   786  func (m *Manager) watchForClusterChanges(ctx context.Context) error {
   787  	clusterID := m.config.SecurityConfig.ClientTLSCreds.Organization()
   788  	var cluster *api.Cluster
   789  	clusterWatch, clusterWatchCancel, err := store.ViewAndWatch(m.raftNode.MemoryStore(),
   790  		func(tx store.ReadTx) error {
   791  			cluster = store.GetCluster(tx, clusterID)
   792  			if cluster == nil {
   793  				return fmt.Errorf("unable to get current cluster")
   794  			}
   795  			return nil
   796  		},
   797  		api.EventUpdateCluster{
   798  			Cluster: &api.Cluster{ID: clusterID},
   799  			Checks:  []api.ClusterCheckFunc{api.ClusterCheckID},
   800  		},
   801  	)
   802  	if err != nil {
   803  		return err
   804  	}
   805  	if err := m.updateKEK(ctx, cluster); err != nil {
   806  		return err
   807  	}
   808  
   809  	go func() {
   810  		for {
   811  			select {
   812  			case event := <-clusterWatch:
   813  				clusterEvent := event.(api.EventUpdateCluster)
   814  				m.updateKEK(ctx, clusterEvent.Cluster)
   815  			case <-ctx.Done():
   816  				clusterWatchCancel()
   817  				return
   818  			}
   819  		}
   820  	}()
   821  	return nil
   822  }
   823  
   824  // getLeaderNodeID is a small helper function returning a string with the
   825  // leader's node ID. it is only used for logging, and should not be relied on
   826  // to give a node ID for actual operational purposes (because it returns errors
   827  // as nicely decorated strings)
   828  func (m *Manager) getLeaderNodeID() string {
   829  	// get the current leader ID. this variable tracks the leader *only* for
   830  	// the purposes of logging leadership changes, and should not be relied on
   831  	// for other purposes
   832  	leader, leaderErr := m.raftNode.Leader()
   833  	switch leaderErr {
   834  	case raft.ErrNoRaftMember:
   835  		// this is an unlikely case, but we have to handle it. this means this
   836  		// node is not a member of the raft quorum. this won't look very pretty
   837  		// in logs ("leadership changed from aslkdjfa to ErrNoRaftMember") but
   838  		// it also won't be very common
   839  		return "not yet part of a raft cluster"
   840  	case raft.ErrNoClusterLeader:
   841  		return "no cluster leader"
   842  	default:
   843  		id, err := m.raftNode.GetNodeIDByRaftID(leader)
   844  		// the only possible error here is "ErrMemberUnknown"
   845  		if err != nil {
   846  			return "an unknown node"
   847  		}
   848  		return id
   849  	}
   850  }
   851  
   852  // handleLeadershipEvents handles the is leader event or is follower event.
   853  func (m *Manager) handleLeadershipEvents(ctx context.Context, leadershipCh chan events.Event) {
   854  	// get the current leader and save it for logging leadership changes in
   855  	// this loop
   856  	oldLeader := m.getLeaderNodeID()
   857  	for {
   858  		select {
   859  		case leadershipEvent := <-leadershipCh:
   860  			m.mu.Lock()
   861  			if m.stopped {
   862  				m.mu.Unlock()
   863  				return
   864  			}
   865  			newState := leadershipEvent.(raft.LeadershipState)
   866  
   867  			if newState == raft.IsLeader {
   868  				m.becomeLeader(ctx)
   869  				leaderMetric.Set(1)
   870  			} else if newState == raft.IsFollower {
   871  				m.becomeFollower()
   872  				leaderMetric.Set(0)
   873  			}
   874  			m.mu.Unlock()
   875  
   876  			newLeader := m.getLeaderNodeID()
   877  			// maybe we should use logrus fields for old and new leader, so
   878  			// that users are better able to ingest leadership changes into log
   879  			// aggregators?
   880  			log.G(ctx).Infof("leadership changed from %v to %v", oldLeader, newLeader)
   881  		case <-ctx.Done():
   882  			return
   883  		}
   884  	}
   885  }
   886  
   887  // serveListener serves a listener for local and non local connections.
   888  func (m *Manager) serveListener(ctx context.Context, lCh <-chan net.Listener) {
   889  	var l net.Listener
   890  	select {
   891  	case l = <-lCh:
   892  	case <-ctx.Done():
   893  		return
   894  	}
   895  	ctx = log.WithLogger(ctx, log.G(ctx).WithFields(
   896  		logrus.Fields{
   897  			"proto": l.Addr().Network(),
   898  			"addr":  l.Addr().String(),
   899  		}))
   900  	if _, ok := l.(*net.TCPListener); !ok {
   901  		log.G(ctx).Info("Listening for local connections")
   902  		// we need to disallow double closes because UnixListener.Close
   903  		// can delete unix-socket file of newer listener. grpc calls
   904  		// Close twice indeed: in Serve and in Stop.
   905  		m.errServe <- m.localserver.Serve(&closeOnceListener{Listener: l})
   906  	} else {
   907  		log.G(ctx).Info("Listening for connections")
   908  		m.errServe <- m.server.Serve(l)
   909  	}
   910  }
   911  
   912  // becomeLeader starts the subsystems that are run on the leader.
   913  func (m *Manager) becomeLeader(ctx context.Context) {
   914  	s := m.raftNode.MemoryStore()
   915  
   916  	rootCA := m.config.SecurityConfig.RootCA()
   917  	nodeID := m.config.SecurityConfig.ClientTLSCreds.NodeID()
   918  
   919  	raftCfg := raft.DefaultRaftConfig()
   920  	raftCfg.ElectionTick = uint32(m.raftNode.Config.ElectionTick)
   921  	raftCfg.HeartbeatTick = uint32(m.raftNode.Config.HeartbeatTick)
   922  
   923  	clusterID := m.config.SecurityConfig.ClientTLSCreds.Organization()
   924  
   925  	initialCAConfig := ca.DefaultCAConfig()
   926  	initialCAConfig.ExternalCAs = m.config.ExternalCAs
   927  
   928  	var (
   929  		unlockKeys []*api.EncryptionKey
   930  		err        error
   931  	)
   932  	if m.config.AutoLockManagers {
   933  		unlockKeys = []*api.EncryptionKey{{
   934  			Subsystem: ca.ManagerRole,
   935  			Key:       m.config.UnlockKey,
   936  		}}
   937  	}
   938  	s.Update(func(tx store.Tx) error {
   939  		// Add a default cluster object to the
   940  		// store. Don't check the error because
   941  		// we expect this to fail unless this
   942  		// is a brand new cluster.
   943  		clusterObj := defaultClusterObject(
   944  			clusterID,
   945  			initialCAConfig,
   946  			raftCfg,
   947  			api.EncryptionConfig{AutoLockManagers: m.config.AutoLockManagers},
   948  			unlockKeys,
   949  			rootCA,
   950  			m.config.FIPS,
   951  			nil,
   952  			0,
   953  			0)
   954  
   955  		// If defaultAddrPool is valid we update cluster object with new value
   956  		// If VXLANUDPPort is not 0 then we call update cluster object with new value
   957  		if m.config.NetworkConfig != nil {
   958  			if m.config.NetworkConfig.DefaultAddrPool != nil {
   959  				clusterObj.DefaultAddressPool = m.config.NetworkConfig.DefaultAddrPool
   960  				clusterObj.SubnetSize = m.config.NetworkConfig.SubnetSize
   961  			}
   962  
   963  			if m.config.NetworkConfig.VXLANUDPPort != 0 {
   964  				clusterObj.VXLANUDPPort = m.config.NetworkConfig.VXLANUDPPort
   965  			}
   966  		}
   967  		err := store.CreateCluster(tx, clusterObj)
   968  
   969  		if err != nil && err != store.ErrExist {
   970  			log.G(ctx).WithError(err).Errorf("error creating cluster object")
   971  		}
   972  
   973  		// Add Node entry for ourself, if one
   974  		// doesn't exist already.
   975  		freshCluster := nil == store.CreateNode(tx, managerNode(nodeID, m.config.Availability, clusterObj.VXLANUDPPort))
   976  
   977  		if freshCluster {
   978  			// This is a fresh swarm cluster. Add to store now any initial
   979  			// cluster resource, like the default ingress network which
   980  			// provides the routing mesh for this cluster.
   981  			log.G(ctx).Info("Creating default ingress network")
   982  			if err := store.CreateNetwork(tx, newIngressNetwork()); err != nil {
   983  				log.G(ctx).WithError(err).Error("failed to create default ingress network")
   984  			}
   985  		}
   986  		// Create now the static predefined if the store does not contain predefined
   987  		// networks like bridge/host node-local networks which
   988  		// are known to be present in each cluster node. This is needed
   989  		// in order to allow running services on the predefined docker
   990  		// networks like `bridge` and `host`.
   991  		for _, p := range allocator.PredefinedNetworks() {
   992  			if err := store.CreateNetwork(tx, newPredefinedNetwork(p.Name, p.Driver)); err != nil && err != store.ErrNameConflict {
   993  				log.G(ctx).WithError(err).Error("failed to create predefined network " + p.Name)
   994  			}
   995  		}
   996  		return nil
   997  	})
   998  
   999  	m.replicatedOrchestrator = replicated.NewReplicatedOrchestrator(s)
  1000  	m.constraintEnforcer = constraintenforcer.New(s)
  1001  	m.globalOrchestrator = global.NewGlobalOrchestrator(s)
  1002  	m.jobsOrchestrator = jobs.NewOrchestrator(s)
  1003  	m.taskReaper = taskreaper.New(s)
  1004  	m.scheduler = scheduler.New(s)
  1005  	m.keyManager = keymanager.New(s, keymanager.DefaultConfig())
  1006  	m.roleManager = newRoleManager(s, m.raftNode)
  1007  
  1008  	// TODO(stevvooe): Allocate a context that can be used to
  1009  	// shutdown underlying manager processes when leadership isTestUpdaterRollback
  1010  	// lost.
  1011  
  1012  	// If DefaultAddrPool is null, Read from store and check if
  1013  	// DefaultAddrPool info is stored in cluster object
  1014  	// If VXLANUDPPort is 0, read it from the store - cluster object
  1015  	if m.config.NetworkConfig == nil || m.config.NetworkConfig.DefaultAddrPool == nil || m.config.NetworkConfig.VXLANUDPPort == 0 {
  1016  		var cluster *api.Cluster
  1017  		s.View(func(tx store.ReadTx) {
  1018  			cluster = store.GetCluster(tx, clusterID)
  1019  		})
  1020  		if cluster.DefaultAddressPool != nil {
  1021  			if m.config.NetworkConfig == nil {
  1022  				m.config.NetworkConfig = &cnmallocator.NetworkConfig{}
  1023  			}
  1024  			m.config.NetworkConfig.DefaultAddrPool = append(m.config.NetworkConfig.DefaultAddrPool, cluster.DefaultAddressPool...)
  1025  			m.config.NetworkConfig.SubnetSize = cluster.SubnetSize
  1026  		}
  1027  		if cluster.VXLANUDPPort != 0 {
  1028  			if m.config.NetworkConfig == nil {
  1029  				m.config.NetworkConfig = &cnmallocator.NetworkConfig{}
  1030  			}
  1031  			m.config.NetworkConfig.VXLANUDPPort = cluster.VXLANUDPPort
  1032  		}
  1033  	}
  1034  
  1035  	m.allocator, err = allocator.New(s, m.config.PluginGetter, m.config.NetworkConfig)
  1036  	if err != nil {
  1037  		log.G(ctx).WithError(err).Error("failed to create allocator")
  1038  		// TODO(stevvooe): It doesn't seem correct here to fail
  1039  		// creating the allocator but then use it anyway.
  1040  	}
  1041  
  1042  	if m.keyManager != nil {
  1043  		go func(keyManager *keymanager.KeyManager) {
  1044  			if err := keyManager.Run(ctx); err != nil {
  1045  				log.G(ctx).WithError(err).Error("keymanager failed with an error")
  1046  			}
  1047  		}(m.keyManager)
  1048  	}
  1049  
  1050  	go func(d *dispatcher.Dispatcher) {
  1051  		// Initialize the dispatcher.
  1052  		var cluster *api.Cluster
  1053  		s.View(func(tx store.ReadTx) {
  1054  			cluster = store.GetCluster(tx, clusterID)
  1055  		})
  1056  		var defaultConfig = dispatcher.DefaultConfig()
  1057  		heartbeatPeriod, err := gogotypes.DurationFromProto(cluster.Spec.Dispatcher.HeartbeatPeriod)
  1058  		if err == nil {
  1059  			defaultConfig.HeartbeatPeriod = heartbeatPeriod
  1060  		}
  1061  		d.Init(m.raftNode, defaultConfig, drivers.New(m.config.PluginGetter), m.config.SecurityConfig)
  1062  		if err := d.Run(ctx); err != nil {
  1063  			log.G(ctx).WithError(err).Error("Dispatcher exited with an error")
  1064  		}
  1065  	}(m.dispatcher)
  1066  
  1067  	if err := m.logbroker.Start(ctx); err != nil {
  1068  		log.G(ctx).WithError(err).Error("LogBroker failed to start")
  1069  	}
  1070  
  1071  	go func(server *ca.Server) {
  1072  		if err := server.Run(ctx); err != nil {
  1073  			log.G(ctx).WithError(err).Error("CA signer exited with an error")
  1074  		}
  1075  	}(m.caserver)
  1076  
  1077  	// Start all sub-components in separate goroutines.
  1078  	// TODO(aluzzardi): This should have some kind of error handling so that
  1079  	// any component that goes down would bring the entire manager down.
  1080  	if m.allocator != nil {
  1081  		go func(allocator *allocator.Allocator) {
  1082  			if err := allocator.Run(ctx); err != nil {
  1083  				log.G(ctx).WithError(err).Error("allocator exited with an error")
  1084  			}
  1085  		}(m.allocator)
  1086  	}
  1087  
  1088  	go func(scheduler *scheduler.Scheduler) {
  1089  		if err := scheduler.Run(ctx); err != nil {
  1090  			log.G(ctx).WithError(err).Error("scheduler exited with an error")
  1091  		}
  1092  	}(m.scheduler)
  1093  
  1094  	go func(constraintEnforcer *constraintenforcer.ConstraintEnforcer) {
  1095  		constraintEnforcer.Run()
  1096  	}(m.constraintEnforcer)
  1097  
  1098  	go func(taskReaper *taskreaper.TaskReaper) {
  1099  		taskReaper.Run(ctx)
  1100  	}(m.taskReaper)
  1101  
  1102  	go func(orchestrator *replicated.Orchestrator) {
  1103  		if err := orchestrator.Run(ctx); err != nil {
  1104  			log.G(ctx).WithError(err).Error("replicated orchestrator exited with an error")
  1105  		}
  1106  	}(m.replicatedOrchestrator)
  1107  
  1108  	go func(orchestrator *jobs.Orchestrator) {
  1109  		// jobs orchestrator does not return errors.
  1110  		orchestrator.Run(ctx)
  1111  	}(m.jobsOrchestrator)
  1112  
  1113  	go func(globalOrchestrator *global.Orchestrator) {
  1114  		if err := globalOrchestrator.Run(ctx); err != nil {
  1115  			log.G(ctx).WithError(err).Error("global orchestrator exited with an error")
  1116  		}
  1117  	}(m.globalOrchestrator)
  1118  
  1119  	go func(roleManager *roleManager) {
  1120  		roleManager.Run(ctx)
  1121  	}(m.roleManager)
  1122  }
  1123  
  1124  // becomeFollower shuts down the subsystems that are only run by the leader.
  1125  func (m *Manager) becomeFollower() {
  1126  	// The following components are gRPC services that are
  1127  	// registered when creating the manager and will need
  1128  	// to be re-registered if they are recreated.
  1129  	// For simplicity, they are not nilled out.
  1130  	m.dispatcher.Stop()
  1131  	m.logbroker.Stop()
  1132  	m.caserver.Stop()
  1133  
  1134  	if m.allocator != nil {
  1135  		m.allocator.Stop()
  1136  		m.allocator = nil
  1137  	}
  1138  
  1139  	m.constraintEnforcer.Stop()
  1140  	m.constraintEnforcer = nil
  1141  
  1142  	m.replicatedOrchestrator.Stop()
  1143  	m.replicatedOrchestrator = nil
  1144  
  1145  	m.globalOrchestrator.Stop()
  1146  	m.globalOrchestrator = nil
  1147  
  1148  	m.taskReaper.Stop()
  1149  	m.taskReaper = nil
  1150  
  1151  	m.scheduler.Stop()
  1152  	m.scheduler = nil
  1153  
  1154  	m.roleManager.Stop()
  1155  	m.roleManager = nil
  1156  
  1157  	if m.keyManager != nil {
  1158  		m.keyManager.Stop()
  1159  		m.keyManager = nil
  1160  	}
  1161  }
  1162  
  1163  // defaultClusterObject creates a default cluster.
  1164  func defaultClusterObject(
  1165  	clusterID string,
  1166  	initialCAConfig api.CAConfig,
  1167  	raftCfg api.RaftConfig,
  1168  	encryptionConfig api.EncryptionConfig,
  1169  	initialUnlockKeys []*api.EncryptionKey,
  1170  	rootCA *ca.RootCA,
  1171  	fips bool,
  1172  	defaultAddressPool []string,
  1173  	subnetSize uint32,
  1174  	vxlanUDPPort uint32) *api.Cluster {
  1175  	var caKey []byte
  1176  	if rcaSigner, err := rootCA.Signer(); err == nil {
  1177  		caKey = rcaSigner.Key
  1178  	}
  1179  
  1180  	return &api.Cluster{
  1181  		ID: clusterID,
  1182  		Spec: api.ClusterSpec{
  1183  			Annotations: api.Annotations{
  1184  				Name: store.DefaultClusterName,
  1185  			},
  1186  			Orchestration: api.OrchestrationConfig{
  1187  				TaskHistoryRetentionLimit: defaultTaskHistoryRetentionLimit,
  1188  			},
  1189  			Dispatcher: api.DispatcherConfig{
  1190  				HeartbeatPeriod: gogotypes.DurationProto(dispatcher.DefaultHeartBeatPeriod),
  1191  			},
  1192  			Raft:             raftCfg,
  1193  			CAConfig:         initialCAConfig,
  1194  			EncryptionConfig: encryptionConfig,
  1195  		},
  1196  		RootCA: api.RootCA{
  1197  			CAKey:      caKey,
  1198  			CACert:     rootCA.Certs,
  1199  			CACertHash: rootCA.Digest.String(),
  1200  			JoinTokens: api.JoinTokens{
  1201  				Worker:  ca.GenerateJoinToken(rootCA, fips),
  1202  				Manager: ca.GenerateJoinToken(rootCA, fips),
  1203  			},
  1204  		},
  1205  		UnlockKeys:         initialUnlockKeys,
  1206  		FIPS:               fips,
  1207  		DefaultAddressPool: defaultAddressPool,
  1208  		SubnetSize:         subnetSize,
  1209  		VXLANUDPPort:       vxlanUDPPort,
  1210  	}
  1211  }
  1212  
  1213  // managerNode creates a new node with NodeRoleManager role.
  1214  func managerNode(nodeID string, availability api.NodeSpec_Availability, vxlanPort uint32) *api.Node {
  1215  	return &api.Node{
  1216  		ID: nodeID,
  1217  		Certificate: api.Certificate{
  1218  			CN:   nodeID,
  1219  			Role: api.NodeRoleManager,
  1220  			Status: api.IssuanceStatus{
  1221  				State: api.IssuanceStateIssued,
  1222  			},
  1223  		},
  1224  		Spec: api.NodeSpec{
  1225  			DesiredRole:  api.NodeRoleManager,
  1226  			Membership:   api.NodeMembershipAccepted,
  1227  			Availability: availability,
  1228  		},
  1229  		VXLANUDPPort: vxlanPort,
  1230  	}
  1231  }
  1232  
  1233  // newIngressNetwork returns the network object for the default ingress
  1234  // network, the network which provides the routing mesh. Caller will save to
  1235  // store this object once, at fresh cluster creation. It is expected to
  1236  // call this function inside a store update transaction.
  1237  func newIngressNetwork() *api.Network {
  1238  	return &api.Network{
  1239  		ID: identity.NewID(),
  1240  		Spec: api.NetworkSpec{
  1241  			Ingress: true,
  1242  			Annotations: api.Annotations{
  1243  				Name: "ingress",
  1244  			},
  1245  			DriverConfig: &api.Driver{},
  1246  			IPAM: &api.IPAMOptions{
  1247  				Driver:  &api.Driver{},
  1248  				Configs: []*api.IPAMConfig{},
  1249  			},
  1250  		},
  1251  	}
  1252  }
  1253  
  1254  // Creates a network object representing one of the predefined networks
  1255  // known to be statically created on the cluster nodes. These objects
  1256  // are populated in the store at cluster creation solely in order to
  1257  // support running services on the nodes' predefined networks.
  1258  // External clients can filter these predefined networks by looking
  1259  // at the predefined label.
  1260  func newPredefinedNetwork(name, driver string) *api.Network {
  1261  	return &api.Network{
  1262  		ID: identity.NewID(),
  1263  		Spec: api.NetworkSpec{
  1264  			Annotations: api.Annotations{
  1265  				Name: name,
  1266  				Labels: map[string]string{
  1267  					networkallocator.PredefinedLabel: "true",
  1268  				},
  1269  			},
  1270  			DriverConfig: &api.Driver{Name: driver},
  1271  		},
  1272  	}
  1273  }