github.com/zoomfoo/nomad@v0.8.5-0.20180907175415-f28fd3a1a056/nomad/server.go (about)

     1  package nomad
     2  
     3  import (
     4  	"context"
     5  	"crypto/tls"
     6  	"fmt"
     7  	"io/ioutil"
     8  	"log"
     9  	"net"
    10  	"net/rpc"
    11  	"os"
    12  	"path/filepath"
    13  	"sort"
    14  	"strconv"
    15  	"sync"
    16  	"sync/atomic"
    17  	"time"
    18  
    19  	"github.com/hashicorp/consul/agent/consul/autopilot"
    20  	consulapi "github.com/hashicorp/consul/api"
    21  	"github.com/hashicorp/consul/lib"
    22  	multierror "github.com/hashicorp/go-multierror"
    23  	lru "github.com/hashicorp/golang-lru"
    24  	"github.com/hashicorp/nomad/command/agent/consul"
    25  	"github.com/hashicorp/nomad/helper/codec"
    26  	"github.com/hashicorp/nomad/helper/pool"
    27  	"github.com/hashicorp/nomad/helper/stats"
    28  	"github.com/hashicorp/nomad/helper/tlsutil"
    29  	"github.com/hashicorp/nomad/nomad/deploymentwatcher"
    30  	"github.com/hashicorp/nomad/nomad/drainer"
    31  	"github.com/hashicorp/nomad/nomad/state"
    32  	"github.com/hashicorp/nomad/nomad/structs"
    33  	"github.com/hashicorp/nomad/nomad/structs/config"
    34  	"github.com/hashicorp/nomad/scheduler"
    35  	"github.com/hashicorp/raft"
    36  	raftboltdb "github.com/hashicorp/raft-boltdb"
    37  	"github.com/hashicorp/serf/serf"
    38  )
    39  
    40  const (
    41  	// datacenterQueryLimit sets the max number of DCs that a Nomad
    42  	// Server will query to find bootstrap_expect servers.
    43  	datacenterQueryLimit = 25
    44  
    45  	// maxStaleLeadership is the maximum time we will permit this Nomad
    46  	// Server to go without seeing a valid Raft leader.
    47  	maxStaleLeadership = 15 * time.Second
    48  
    49  	// peersPollInterval is used as the polling interval between attempts
    50  	// to query Consul for Nomad Servers.
    51  	peersPollInterval = 45 * time.Second
    52  
    53  	// peersPollJitter is used to provide a slight amount of variance to
    54  	// the retry interval when querying Consul Servers
    55  	peersPollJitterFactor = 2
    56  
    57  	raftState         = "raft/"
    58  	serfSnapshot      = "serf/snapshot"
    59  	snapshotsRetained = 2
    60  
    61  	// serverRPCCache controls how long we keep an idle connection open to a server
    62  	serverRPCCache = 2 * time.Minute
    63  
    64  	// serverMaxStreams controls how many idle streams we keep open to a server
    65  	serverMaxStreams = 64
    66  
    67  	// raftLogCacheSize is the maximum number of logs to cache in-memory.
    68  	// This is used to reduce disk I/O for the recently committed entries.
    69  	raftLogCacheSize = 512
    70  
    71  	// raftRemoveGracePeriod is how long we wait to allow a RemovePeer
    72  	// to replicate to gracefully leave the cluster.
    73  	raftRemoveGracePeriod = 5 * time.Second
    74  
    75  	// defaultConsulDiscoveryInterval is how often to poll Consul for new
    76  	// servers if there is no leader.
    77  	defaultConsulDiscoveryInterval time.Duration = 3 * time.Second
    78  
    79  	// defaultConsulDiscoveryIntervalRetry is how often to poll Consul for
    80  	// new servers if there is no leader and the last Consul query failed.
    81  	defaultConsulDiscoveryIntervalRetry time.Duration = 9 * time.Second
    82  
    83  	// aclCacheSize is the number of ACL objects to keep cached. ACLs have a parsing and
    84  	// construction cost, so we keep the hot objects cached to reduce the ACL token resolution time.
    85  	aclCacheSize = 512
    86  )
    87  
    88  // Server is Nomad server which manages the job queues,
    89  // schedulers, and notification bus for agents.
    90  type Server struct {
    91  	config *Config
    92  
    93  	logger *log.Logger
    94  
    95  	// Connection pool to other Nomad servers
    96  	connPool *pool.ConnPool
    97  
    98  	// The raft instance is used among Nomad nodes within the
    99  	// region to protect operations that require strong consistency
   100  	leaderCh      <-chan bool
   101  	raft          *raft.Raft
   102  	raftLayer     *RaftLayer
   103  	raftStore     *raftboltdb.BoltStore
   104  	raftInmem     *raft.InmemStore
   105  	raftTransport *raft.NetworkTransport
   106  
   107  	// autopilot is the Autopilot instance for this server.
   108  	autopilot *autopilot.Autopilot
   109  
   110  	// fsm is the state machine used with Raft
   111  	fsm *nomadFSM
   112  
   113  	// rpcListener is used to listen for incoming connections
   114  	rpcListener net.Listener
   115  	listenerCh  chan struct{}
   116  
   117  	// tlsWrap is used to wrap outbound connections using TLS. It should be
   118  	// accessed using the lock.
   119  	tlsWrap     tlsutil.RegionWrapper
   120  	tlsWrapLock sync.RWMutex
   121  
   122  	// rpcServer is the static RPC server that is used by the local agent.
   123  	rpcServer *rpc.Server
   124  
   125  	// clientRpcAdvertise is the advertised RPC address for Nomad clients to connect
   126  	// to this server
   127  	clientRpcAdvertise net.Addr
   128  
   129  	// serverRpcAdvertise is the advertised RPC address for Nomad servers to connect
   130  	// to this server
   131  	serverRpcAdvertise net.Addr
   132  
   133  	// rpcTLS is the TLS config for incoming TLS requests
   134  	rpcTLS    *tls.Config
   135  	rpcCancel context.CancelFunc
   136  
   137  	// staticEndpoints is the set of static endpoints that can be reused across
   138  	// all RPC connections
   139  	staticEndpoints endpoints
   140  
   141  	// streamingRpcs is the registry holding our streaming RPC handlers.
   142  	streamingRpcs *structs.StreamingRpcRegistry
   143  
   144  	// nodeConns is the set of multiplexed node connections we have keyed by
   145  	// NodeID
   146  	nodeConns     map[string][]*nodeConnState
   147  	nodeConnsLock sync.RWMutex
   148  
   149  	// peers is used to track the known Nomad servers. This is
   150  	// used for region forwarding and clustering.
   151  	peers      map[string][]*serverParts
   152  	localPeers map[raft.ServerAddress]*serverParts
   153  	peerLock   sync.RWMutex
   154  
   155  	// serf is the Serf cluster containing only Nomad
   156  	// servers. This is used for multi-region federation
   157  	// and automatic clustering within regions.
   158  	serf *serf.Serf
   159  
   160  	// reconcileCh is used to pass events from the serf handler
   161  	// into the leader manager. Mostly used to handle when servers
   162  	// join/leave from the region.
   163  	reconcileCh chan serf.Member
   164  
   165  	// eventCh is used to receive events from the serf cluster
   166  	eventCh chan serf.Event
   167  
   168  	// BlockedEvals is used to manage evaluations that are blocked on node
   169  	// capacity changes.
   170  	blockedEvals *BlockedEvals
   171  
   172  	// deploymentWatcher is used to watch deployments and their allocations and
   173  	// make the required calls to continue to transition the deployment.
   174  	deploymentWatcher *deploymentwatcher.Watcher
   175  
   176  	// nodeDrainer is used to drain allocations from nodes.
   177  	nodeDrainer *drainer.NodeDrainer
   178  
   179  	// evalBroker is used to manage the in-progress evaluations
   180  	// that are waiting to be brokered to a sub-scheduler
   181  	evalBroker *EvalBroker
   182  
   183  	// periodicDispatcher is used to track and create evaluations for periodic jobs.
   184  	periodicDispatcher *PeriodicDispatch
   185  
   186  	// planQueue is used to manage the submitted allocation
   187  	// plans that are waiting to be assessed by the leader
   188  	planQueue *PlanQueue
   189  
   190  	// heartbeatTimers track the expiration time of each heartbeat that has
   191  	// a TTL. On expiration, the node status is updated to be 'down'.
   192  	heartbeatTimers     map[string]*time.Timer
   193  	heartbeatTimersLock sync.Mutex
   194  
   195  	// consulCatalog is used for discovering other Nomad Servers via Consul
   196  	consulCatalog consul.CatalogAPI
   197  
   198  	// vault is the client for communicating with Vault.
   199  	vault VaultClient
   200  
   201  	// Worker used for processing
   202  	workers []*Worker
   203  
   204  	// aclCache is used to maintain the parsed ACL objects
   205  	aclCache *lru.TwoQueueCache
   206  
   207  	// leaderAcl is the management ACL token that is valid when resolved by the
   208  	// current leader.
   209  	leaderAcl     string
   210  	leaderAclLock sync.Mutex
   211  
   212  	// statsFetcher is used by autopilot to check the status of the other
   213  	// Nomad router.
   214  	statsFetcher *StatsFetcher
   215  
   216  	// EnterpriseState is used to fill in state for Pro/Ent builds
   217  	EnterpriseState
   218  
   219  	left         bool
   220  	shutdown     bool
   221  	shutdownCh   chan struct{}
   222  	shutdownLock sync.Mutex
   223  }
   224  
   225  // Holds the RPC endpoints
   226  type endpoints struct {
   227  	Status     *Status
   228  	Node       *Node
   229  	Job        *Job
   230  	Eval       *Eval
   231  	Plan       *Plan
   232  	Alloc      *Alloc
   233  	Deployment *Deployment
   234  	Region     *Region
   235  	Search     *Search
   236  	Periodic   *Periodic
   237  	System     *System
   238  	Operator   *Operator
   239  	ACL        *ACL
   240  	Enterprise *EnterpriseEndpoints
   241  
   242  	// Client endpoints
   243  	ClientStats       *ClientStats
   244  	FileSystem        *FileSystem
   245  	ClientAllocations *ClientAllocations
   246  }
   247  
   248  // NewServer is used to construct a new Nomad server from the
   249  // configuration, potentially returning an error
   250  func NewServer(config *Config, consulCatalog consul.CatalogAPI, logger *log.Logger) (*Server, error) {
   251  	// Check the protocol version
   252  	if err := config.CheckVersion(); err != nil {
   253  		return nil, err
   254  	}
   255  
   256  	// Create an eval broker
   257  	evalBroker, err := NewEvalBroker(
   258  		config.EvalNackTimeout,
   259  		config.EvalNackInitialReenqueueDelay,
   260  		config.EvalNackSubsequentReenqueueDelay,
   261  		config.EvalDeliveryLimit)
   262  	if err != nil {
   263  		return nil, err
   264  	}
   265  
   266  	// Create a new blocked eval tracker.
   267  	blockedEvals := NewBlockedEvals(evalBroker)
   268  
   269  	// Create a plan queue
   270  	planQueue, err := NewPlanQueue()
   271  	if err != nil {
   272  		return nil, err
   273  	}
   274  
   275  	// Configure TLS
   276  	tlsConf, err := tlsutil.NewTLSConfiguration(config.TLSConfig, true, true)
   277  	if err != nil {
   278  		return nil, err
   279  	}
   280  	incomingTLS, tlsWrap, err := getTLSConf(config.TLSConfig.EnableRPC, tlsConf)
   281  	if err != nil {
   282  		return nil, err
   283  	}
   284  
   285  	// Create the ACL object cache
   286  	aclCache, err := lru.New2Q(aclCacheSize)
   287  	if err != nil {
   288  		return nil, err
   289  	}
   290  
   291  	// Create the server
   292  	s := &Server{
   293  		config:        config,
   294  		consulCatalog: consulCatalog,
   295  		connPool:      pool.NewPool(config.LogOutput, serverRPCCache, serverMaxStreams, tlsWrap),
   296  		logger:        logger,
   297  		tlsWrap:       tlsWrap,
   298  		rpcServer:     rpc.NewServer(),
   299  		streamingRpcs: structs.NewStreamingRpcRegistry(),
   300  		nodeConns:     make(map[string][]*nodeConnState),
   301  		peers:         make(map[string][]*serverParts),
   302  		localPeers:    make(map[raft.ServerAddress]*serverParts),
   303  		reconcileCh:   make(chan serf.Member, 32),
   304  		eventCh:       make(chan serf.Event, 256),
   305  		evalBroker:    evalBroker,
   306  		blockedEvals:  blockedEvals,
   307  		planQueue:     planQueue,
   308  		rpcTLS:        incomingTLS,
   309  		aclCache:      aclCache,
   310  		shutdownCh:    make(chan struct{}),
   311  	}
   312  
   313  	// Create the periodic dispatcher for launching periodic jobs.
   314  	s.periodicDispatcher = NewPeriodicDispatch(s.logger, s)
   315  
   316  	// Initialize the stats fetcher that autopilot will use.
   317  	s.statsFetcher = NewStatsFetcher(logger, s.connPool, s.config.Region)
   318  
   319  	// Setup Vault
   320  	if err := s.setupVaultClient(); err != nil {
   321  		s.Shutdown()
   322  		s.logger.Printf("[ERR] nomad: failed to setup Vault client: %v", err)
   323  		return nil, fmt.Errorf("Failed to setup Vault client: %v", err)
   324  	}
   325  
   326  	// Initialize the RPC layer
   327  	if err := s.setupRPC(tlsWrap); err != nil {
   328  		s.Shutdown()
   329  		s.logger.Printf("[ERR] nomad: failed to start RPC layer: %s", err)
   330  		return nil, fmt.Errorf("Failed to start RPC layer: %v", err)
   331  	}
   332  
   333  	// Initialize the Raft server
   334  	if err := s.setupRaft(); err != nil {
   335  		s.Shutdown()
   336  		s.logger.Printf("[ERR] nomad: failed to start Raft: %s", err)
   337  		return nil, fmt.Errorf("Failed to start Raft: %v", err)
   338  	}
   339  
   340  	// Initialize the wan Serf
   341  	s.serf, err = s.setupSerf(config.SerfConfig, s.eventCh, serfSnapshot)
   342  	if err != nil {
   343  		s.Shutdown()
   344  		s.logger.Printf("[ERR] nomad: failed to start serf WAN: %s", err)
   345  		return nil, fmt.Errorf("Failed to start serf: %v", err)
   346  	}
   347  
   348  	// Initialize the scheduling workers
   349  	if err := s.setupWorkers(); err != nil {
   350  		s.Shutdown()
   351  		s.logger.Printf("[ERR] nomad: failed to start workers: %s", err)
   352  		return nil, fmt.Errorf("Failed to start workers: %v", err)
   353  	}
   354  
   355  	// Setup the Consul syncer
   356  	if err := s.setupConsulSyncer(); err != nil {
   357  		return nil, fmt.Errorf("failed to create server Consul syncer: %v", err)
   358  	}
   359  
   360  	// Setup the deployment watcher.
   361  	if err := s.setupDeploymentWatcher(); err != nil {
   362  		return nil, fmt.Errorf("failed to create deployment watcher: %v", err)
   363  	}
   364  
   365  	// Setup the node drainer.
   366  	s.setupNodeDrainer()
   367  
   368  	// Setup the enterprise state
   369  	if err := s.setupEnterprise(config); err != nil {
   370  		return nil, err
   371  	}
   372  
   373  	// Monitor leadership changes
   374  	go s.monitorLeadership()
   375  
   376  	// Start ingesting events for Serf
   377  	go s.serfEventHandler()
   378  
   379  	// start the RPC listener for the server
   380  	s.startRPCListener()
   381  
   382  	// Emit metrics for the eval broker
   383  	go evalBroker.EmitStats(time.Second, s.shutdownCh)
   384  
   385  	// Emit metrics for the plan queue
   386  	go planQueue.EmitStats(time.Second, s.shutdownCh)
   387  
   388  	// Emit metrics for the blocked eval tracker.
   389  	go blockedEvals.EmitStats(time.Second, s.shutdownCh)
   390  
   391  	// Emit metrics for the Vault client.
   392  	go s.vault.EmitStats(time.Second, s.shutdownCh)
   393  
   394  	// Emit metrics
   395  	go s.heartbeatStats()
   396  
   397  	// Start enterprise background workers
   398  	s.startEnterpriseBackground()
   399  
   400  	// Done
   401  	return s, nil
   402  }
   403  
   404  // startRPCListener starts the server's the RPC listener
   405  func (s *Server) startRPCListener() {
   406  	ctx, cancel := context.WithCancel(context.Background())
   407  	s.rpcCancel = cancel
   408  	go s.listen(ctx)
   409  }
   410  
   411  // createRPCListener creates the server's RPC listener
   412  func (s *Server) createRPCListener() (*net.TCPListener, error) {
   413  	s.listenerCh = make(chan struct{})
   414  	listener, err := net.ListenTCP("tcp", s.config.RPCAddr)
   415  	if err != nil {
   416  		s.logger.Printf("[ERR] nomad: error when initializing TLS listener %s", err)
   417  		return listener, err
   418  	}
   419  
   420  	s.rpcListener = listener
   421  	return listener, nil
   422  }
   423  
   424  // getTLSConf gets the server's TLS configuration based on the config supplied
   425  // by the operator
   426  func getTLSConf(enableRPC bool, tlsConf *tlsutil.Config) (*tls.Config, tlsutil.RegionWrapper, error) {
   427  	var tlsWrap tlsutil.RegionWrapper
   428  	var incomingTLS *tls.Config
   429  	if enableRPC {
   430  		tw, err := tlsConf.OutgoingTLSWrapper()
   431  		if err != nil {
   432  			return nil, nil, err
   433  		}
   434  		tlsWrap = tw
   435  
   436  		itls, err := tlsConf.IncomingTLSConfig()
   437  		if err != nil {
   438  			return nil, nil, err
   439  		}
   440  		incomingTLS = itls
   441  	}
   442  	return incomingTLS, tlsWrap, nil
   443  }
   444  
   445  // reloadTLSConnections updates a server's TLS configuration and reloads RPC
   446  // connections.
   447  func (s *Server) reloadTLSConnections(newTLSConfig *config.TLSConfig) error {
   448  	s.logger.Printf("[INFO] nomad: reloading server connections due to configuration changes")
   449  
   450  	// Check if we can reload the RPC listener
   451  	if s.rpcListener == nil || s.rpcCancel == nil {
   452  		s.logger.Println("[WARN] nomad: Unable to reload configuration due to uninitialized rpc listner")
   453  		return fmt.Errorf("can't reload uninitialized RPC listener")
   454  	}
   455  
   456  	tlsConf, err := tlsutil.NewTLSConfiguration(newTLSConfig, true, true)
   457  	if err != nil {
   458  		s.logger.Printf("[ERR] nomad: unable to create TLS configuration %s", err)
   459  		return err
   460  	}
   461  
   462  	incomingTLS, tlsWrap, err := getTLSConf(newTLSConfig.EnableRPC, tlsConf)
   463  	if err != nil {
   464  		s.logger.Printf("[ERR] nomad: unable to reset TLS context %s", err)
   465  		return err
   466  	}
   467  
   468  	// Store the new tls wrapper.
   469  	s.tlsWrapLock.Lock()
   470  	s.tlsWrap = tlsWrap
   471  	s.tlsWrapLock.Unlock()
   472  
   473  	// Keeping configuration in sync is important for other places that require
   474  	// access to config information, such as rpc.go, where we decide on what kind
   475  	// of network connections to accept depending on the server configuration
   476  	s.config.TLSConfig = newTLSConfig
   477  
   478  	// Kill any old listeners
   479  	s.rpcCancel()
   480  
   481  	s.rpcTLS = incomingTLS
   482  	s.connPool.ReloadTLS(tlsWrap)
   483  
   484  	if err := s.rpcListener.Close(); err != nil {
   485  		s.logger.Printf("[ERR] nomad: Unable to close rpc listener %s", err)
   486  		return err
   487  	}
   488  
   489  	// Wait for the old listener to exit
   490  	<-s.listenerCh
   491  
   492  	// Create the new listener with the update TLS config
   493  	listener, err := s.createRPCListener()
   494  	if err != nil {
   495  		listener.Close()
   496  		return err
   497  	}
   498  
   499  	// Start the new RPC listener
   500  	s.startRPCListener()
   501  
   502  	// Close and reload existing Raft connections
   503  	wrapper := tlsutil.RegionSpecificWrapper(s.config.Region, tlsWrap)
   504  	s.raftLayer.ReloadTLS(wrapper)
   505  	s.raftTransport.CloseStreams()
   506  
   507  	s.logger.Printf("[DEBUG] nomad: finished reloading server connections")
   508  	return nil
   509  }
   510  
   511  // Shutdown is used to shutdown the server
   512  func (s *Server) Shutdown() error {
   513  	s.logger.Printf("[INFO] nomad: shutting down server")
   514  	s.shutdownLock.Lock()
   515  	defer s.shutdownLock.Unlock()
   516  
   517  	if s.shutdown {
   518  		return nil
   519  	}
   520  
   521  	s.shutdown = true
   522  	close(s.shutdownCh)
   523  
   524  	if s.serf != nil {
   525  		s.serf.Shutdown()
   526  	}
   527  
   528  	if s.raft != nil {
   529  		s.raftTransport.Close()
   530  		s.raftLayer.Close()
   531  		future := s.raft.Shutdown()
   532  		if err := future.Error(); err != nil {
   533  			s.logger.Printf("[WARN] nomad: Error shutting down raft: %s", err)
   534  		}
   535  		if s.raftStore != nil {
   536  			s.raftStore.Close()
   537  		}
   538  	}
   539  
   540  	// Shutdown the RPC listener
   541  	if s.rpcListener != nil {
   542  		s.rpcListener.Close()
   543  	}
   544  
   545  	// Close the connection pool
   546  	s.connPool.Shutdown()
   547  
   548  	// Close the fsm
   549  	if s.fsm != nil {
   550  		s.fsm.Close()
   551  	}
   552  
   553  	// Stop Vault token renewal
   554  	if s.vault != nil {
   555  		s.vault.Stop()
   556  	}
   557  
   558  	return nil
   559  }
   560  
   561  // IsShutdown checks if the server is shutdown
   562  func (s *Server) IsShutdown() bool {
   563  	select {
   564  	case <-s.shutdownCh:
   565  		return true
   566  	default:
   567  		return false
   568  	}
   569  }
   570  
   571  // Leave is used to prepare for a graceful shutdown of the server
   572  func (s *Server) Leave() error {
   573  	s.logger.Printf("[INFO] nomad: server starting leave")
   574  	s.left = true
   575  
   576  	// Check the number of known peers
   577  	numPeers, err := s.numPeers()
   578  	if err != nil {
   579  		s.logger.Printf("[ERR] nomad: failed to check raft peers: %v", err)
   580  		return err
   581  	}
   582  
   583  	addr := s.raftTransport.LocalAddr()
   584  
   585  	// If we are the current leader, and we have any other peers (cluster has multiple
   586  	// servers), we should do a RemovePeer to safely reduce the quorum size. If we are
   587  	// not the leader, then we should issue our leave intention and wait to be removed
   588  	// for some sane period of time.
   589  	isLeader := s.IsLeader()
   590  	if isLeader && numPeers > 1 {
   591  		minRaftProtocol, err := s.autopilot.MinRaftProtocol()
   592  		if err != nil {
   593  			return err
   594  		}
   595  
   596  		if minRaftProtocol >= 2 && s.config.RaftConfig.ProtocolVersion >= 3 {
   597  			future := s.raft.RemoveServer(raft.ServerID(s.config.NodeID), 0, 0)
   598  			if err := future.Error(); err != nil {
   599  				s.logger.Printf("[ERR] nomad: failed to remove ourself as raft peer: %v", err)
   600  			}
   601  		} else {
   602  			future := s.raft.RemovePeer(addr)
   603  			if err := future.Error(); err != nil {
   604  				s.logger.Printf("[ERR] nomad: failed to remove ourself as raft peer: %v", err)
   605  			}
   606  		}
   607  	}
   608  
   609  	// Leave the gossip pool
   610  	if s.serf != nil {
   611  		if err := s.serf.Leave(); err != nil {
   612  			s.logger.Printf("[ERR] nomad: failed to leave Serf cluster: %v", err)
   613  		}
   614  	}
   615  
   616  	// If we were not leader, wait to be safely removed from the cluster.
   617  	// We must wait to allow the raft replication to take place, otherwise
   618  	// an immediate shutdown could cause a loss of quorum.
   619  	if !isLeader {
   620  		left := false
   621  		limit := time.Now().Add(raftRemoveGracePeriod)
   622  		for !left && time.Now().Before(limit) {
   623  			// Sleep a while before we check.
   624  			time.Sleep(50 * time.Millisecond)
   625  
   626  			// Get the latest configuration.
   627  			future := s.raft.GetConfiguration()
   628  			if err := future.Error(); err != nil {
   629  				s.logger.Printf("[ERR] nomad: failed to get raft configuration: %v", err)
   630  				break
   631  			}
   632  
   633  			// See if we are no longer included.
   634  			left = true
   635  			for _, server := range future.Configuration().Servers {
   636  				if server.Address == addr {
   637  					left = false
   638  					break
   639  				}
   640  			}
   641  		}
   642  
   643  		// TODO (alexdadgar) With the old Raft library we used to force the
   644  		// peers set to empty when a graceful leave occurred. This would
   645  		// keep voting spam down if the server was restarted, but it was
   646  		// dangerous because the peers was inconsistent with the logs and
   647  		// snapshots, so it wasn't really safe in all cases for the server
   648  		// to become leader. This is now safe, but the log spam is noisy.
   649  		// The next new version of the library will have a "you are not a
   650  		// peer stop it" behavior that should address this. We will have
   651  		// to evaluate during the RC period if this interim situation is
   652  		// not too confusing for operators.
   653  
   654  		// TODO (alexdadgar) When we take a later new version of the Raft
   655  		// library it won't try to complete replication, so this peer
   656  		// may not realize that it has been removed. Need to revisit this
   657  		// and the warning here.
   658  		if !left {
   659  			s.logger.Printf("[WARN] nomad: failed to leave raft configuration gracefully, timeout")
   660  		}
   661  	}
   662  	return nil
   663  }
   664  
   665  // Reload handles a config reload specific to server-only configuration. Not
   666  // all config fields can handle a reload.
   667  func (s *Server) Reload(newConfig *Config) error {
   668  	if newConfig == nil {
   669  		return fmt.Errorf("Reload given a nil config")
   670  	}
   671  
   672  	var mErr multierror.Error
   673  
   674  	// Handle the Vault reload. Vault should never be nil but just guard.
   675  	if s.vault != nil {
   676  		if err := s.vault.SetConfig(newConfig.VaultConfig); err != nil {
   677  			multierror.Append(&mErr, err)
   678  		}
   679  	}
   680  
   681  	shouldReloadTLS, err := tlsutil.ShouldReloadRPCConnections(s.config.TLSConfig, newConfig.TLSConfig)
   682  	if err != nil {
   683  		s.logger.Printf("[ERR] nomad: error checking whether to reload TLS configuration: %s", err)
   684  	}
   685  
   686  	if shouldReloadTLS {
   687  		if err := s.reloadTLSConnections(newConfig.TLSConfig); err != nil {
   688  			s.logger.Printf("[ERR] nomad: error reloading server TLS configuration: %s", err)
   689  			multierror.Append(&mErr, err)
   690  		}
   691  	}
   692  
   693  	return mErr.ErrorOrNil()
   694  }
   695  
   696  // setupBootstrapHandler() creates the closure necessary to support a Consul
   697  // fallback handler.
   698  func (s *Server) setupBootstrapHandler() error {
   699  	// peersTimeout is used to indicate to the Consul Syncer that the
   700  	// current Nomad Server has a stale peer set.  peersTimeout will time
   701  	// out if the Consul Syncer bootstrapFn has not observed a Raft
   702  	// leader in maxStaleLeadership.  If peersTimeout has been triggered,
   703  	// the Consul Syncer will begin querying Consul for other Nomad
   704  	// Servers.
   705  	//
   706  	// NOTE: time.Timer is used vs time.Time in order to handle clock
   707  	// drift because time.Timer is implemented as a monotonic clock.
   708  	var peersTimeout *time.Timer = time.NewTimer(0)
   709  
   710  	// consulQueryCount is the number of times the bootstrapFn has been
   711  	// called, regardless of success.
   712  	var consulQueryCount uint64
   713  
   714  	// leadershipTimedOut is a helper method that returns true if the
   715  	// peersTimeout timer has expired.
   716  	leadershipTimedOut := func() bool {
   717  		select {
   718  		case <-peersTimeout.C:
   719  			return true
   720  		default:
   721  			return false
   722  		}
   723  	}
   724  
   725  	// The bootstrapFn callback handler is used to periodically poll
   726  	// Consul to look up the Nomad Servers in Consul.  In the event the
   727  	// server has been brought up without a `retry-join` configuration
   728  	// and this Server is partitioned from the rest of the cluster,
   729  	// periodically poll Consul to reattach this Server to other servers
   730  	// in the same region and automatically reform a quorum (assuming the
   731  	// correct number of servers required for quorum are present).
   732  	bootstrapFn := func() error {
   733  		// If there is a raft leader, do nothing
   734  		if s.raft.Leader() != "" {
   735  			peersTimeout.Reset(maxStaleLeadership)
   736  			return nil
   737  		}
   738  
   739  		// (ab)use serf.go's behavior of setting BootstrapExpect to
   740  		// zero if we have bootstrapped.  If we have bootstrapped
   741  		bootstrapExpect := atomic.LoadInt32(&s.config.BootstrapExpect)
   742  		if bootstrapExpect == 0 {
   743  			// This Nomad Server has been bootstrapped.  Rely on
   744  			// the peersTimeout firing as a guard to prevent
   745  			// aggressive querying of Consul.
   746  			if !leadershipTimedOut() {
   747  				return nil
   748  			}
   749  		} else {
   750  			if consulQueryCount > 0 && !leadershipTimedOut() {
   751  				return nil
   752  			}
   753  
   754  			// This Nomad Server has not been bootstrapped, reach
   755  			// out to Consul if our peer list is less than
   756  			// `bootstrap_expect`.
   757  			raftPeers, err := s.numPeers()
   758  			if err != nil {
   759  				peersTimeout.Reset(peersPollInterval + lib.RandomStagger(peersPollInterval/peersPollJitterFactor))
   760  				return nil
   761  			}
   762  
   763  			// The necessary number of Nomad Servers required for
   764  			// quorum has been reached, we do not need to poll
   765  			// Consul.  Let the normal timeout-based strategy
   766  			// take over.
   767  			if raftPeers >= int(bootstrapExpect) {
   768  				peersTimeout.Reset(peersPollInterval + lib.RandomStagger(peersPollInterval/peersPollJitterFactor))
   769  				return nil
   770  			}
   771  		}
   772  		consulQueryCount++
   773  
   774  		s.logger.Printf("[DEBUG] server.nomad: lost contact with Nomad quorum, falling back to Consul for server list")
   775  
   776  		dcs, err := s.consulCatalog.Datacenters()
   777  		if err != nil {
   778  			peersTimeout.Reset(peersPollInterval + lib.RandomStagger(peersPollInterval/peersPollJitterFactor))
   779  			return fmt.Errorf("server.nomad: unable to query Consul datacenters: %v", err)
   780  		}
   781  		if len(dcs) > 2 {
   782  			// Query the local DC first, then shuffle the
   783  			// remaining DCs.  If additional calls to bootstrapFn
   784  			// are necessary, this Nomad Server will eventually
   785  			// walk all datacenter until it finds enough hosts to
   786  			// form a quorum.
   787  			shuffleStrings(dcs[1:])
   788  			dcs = dcs[0:lib.MinInt(len(dcs), datacenterQueryLimit)]
   789  		}
   790  
   791  		nomadServerServiceName := s.config.ConsulConfig.ServerServiceName
   792  		var mErr multierror.Error
   793  		const defaultMaxNumNomadServers = 8
   794  		nomadServerServices := make([]string, 0, defaultMaxNumNomadServers)
   795  		localNode := s.serf.Memberlist().LocalNode()
   796  		for _, dc := range dcs {
   797  			consulOpts := &consulapi.QueryOptions{
   798  				AllowStale: true,
   799  				Datacenter: dc,
   800  				Near:       "_agent",
   801  				WaitTime:   consul.DefaultQueryWaitDuration,
   802  			}
   803  			consulServices, _, err := s.consulCatalog.Service(nomadServerServiceName, consul.ServiceTagSerf, consulOpts)
   804  			if err != nil {
   805  				err := fmt.Errorf("failed to query service %q in Consul datacenter %q: %v", nomadServerServiceName, dc, err)
   806  				s.logger.Printf("[WARN] server.nomad: %v", err)
   807  				mErr.Errors = append(mErr.Errors, err)
   808  				continue
   809  			}
   810  
   811  			for _, cs := range consulServices {
   812  				port := strconv.FormatInt(int64(cs.ServicePort), 10)
   813  				addr := cs.ServiceAddress
   814  				if addr == "" {
   815  					addr = cs.Address
   816  				}
   817  				if localNode.Addr.String() == addr && int(localNode.Port) == cs.ServicePort {
   818  					continue
   819  				}
   820  				serverAddr := net.JoinHostPort(addr, port)
   821  				nomadServerServices = append(nomadServerServices, serverAddr)
   822  			}
   823  		}
   824  
   825  		if len(nomadServerServices) == 0 {
   826  			if len(mErr.Errors) > 0 {
   827  				peersTimeout.Reset(peersPollInterval + lib.RandomStagger(peersPollInterval/peersPollJitterFactor))
   828  				return mErr.ErrorOrNil()
   829  			}
   830  
   831  			// Log the error and return nil so future handlers
   832  			// can attempt to register the `nomad` service.
   833  			pollInterval := peersPollInterval + lib.RandomStagger(peersPollInterval/peersPollJitterFactor)
   834  			s.logger.Printf("[TRACE] server.nomad: no Nomad Servers advertising service %+q in Consul datacenters %+q, sleeping for %v", nomadServerServiceName, dcs, pollInterval)
   835  			peersTimeout.Reset(pollInterval)
   836  			return nil
   837  		}
   838  
   839  		numServersContacted, err := s.Join(nomadServerServices)
   840  		if err != nil {
   841  			peersTimeout.Reset(peersPollInterval + lib.RandomStagger(peersPollInterval/peersPollJitterFactor))
   842  			return fmt.Errorf("contacted %d Nomad Servers: %v", numServersContacted, err)
   843  		}
   844  
   845  		peersTimeout.Reset(maxStaleLeadership)
   846  		s.logger.Printf("[INFO] server.nomad: successfully contacted %d Nomad Servers", numServersContacted)
   847  
   848  		return nil
   849  	}
   850  
   851  	// Hacky replacement for old ConsulSyncer Periodic Handler.
   852  	go func() {
   853  		lastOk := true
   854  		sync := time.NewTimer(0)
   855  		for {
   856  			select {
   857  			case <-sync.C:
   858  				d := defaultConsulDiscoveryInterval
   859  				if err := bootstrapFn(); err != nil {
   860  					// Only log if it worked last time
   861  					if lastOk {
   862  						lastOk = false
   863  						s.logger.Printf("[ERR] consul: error looking up Nomad servers: %v", err)
   864  					}
   865  					d = defaultConsulDiscoveryIntervalRetry
   866  				}
   867  				sync.Reset(d)
   868  			case <-s.shutdownCh:
   869  				return
   870  			}
   871  		}
   872  	}()
   873  	return nil
   874  }
   875  
   876  // setupConsulSyncer creates Server-mode consul.Syncer which periodically
   877  // executes callbacks on a fixed interval.
   878  func (s *Server) setupConsulSyncer() error {
   879  	if s.config.ConsulConfig.ServerAutoJoin != nil && *s.config.ConsulConfig.ServerAutoJoin {
   880  		if err := s.setupBootstrapHandler(); err != nil {
   881  			return err
   882  		}
   883  	}
   884  
   885  	return nil
   886  }
   887  
   888  // setupDeploymentWatcher creates a deployment watcher that consumes the RPC
   889  // endpoints for state information and makes transitions via Raft through a
   890  // shim that provides the appropriate methods.
   891  func (s *Server) setupDeploymentWatcher() error {
   892  
   893  	// Create the raft shim type to restrict the set of raft methods that can be
   894  	// made
   895  	raftShim := &deploymentWatcherRaftShim{
   896  		apply: s.raftApply,
   897  	}
   898  
   899  	// Create the deployment watcher
   900  	s.deploymentWatcher = deploymentwatcher.NewDeploymentsWatcher(
   901  		s.logger, raftShim,
   902  		deploymentwatcher.LimitStateQueriesPerSecond,
   903  		deploymentwatcher.CrossDeploymentUpdateBatchDuration)
   904  
   905  	return nil
   906  }
   907  
   908  // setupNodeDrainer creates a node drainer which will be enabled when a server
   909  // becomes a leader.
   910  func (s *Server) setupNodeDrainer() {
   911  	// Create a shim around Raft requests
   912  	shim := drainerShim{s}
   913  	c := &drainer.NodeDrainerConfig{
   914  		Logger:                s.logger,
   915  		Raft:                  shim,
   916  		JobFactory:            drainer.GetDrainingJobWatcher,
   917  		NodeFactory:           drainer.GetNodeWatcherFactory(),
   918  		DrainDeadlineFactory:  drainer.GetDeadlineNotifier,
   919  		StateQueriesPerSecond: drainer.LimitStateQueriesPerSecond,
   920  		BatchUpdateInterval:   drainer.BatchUpdateInterval,
   921  	}
   922  	s.nodeDrainer = drainer.NewNodeDrainer(c)
   923  }
   924  
   925  // setupVaultClient is used to set up the Vault API client.
   926  func (s *Server) setupVaultClient() error {
   927  	v, err := NewVaultClient(s.config.VaultConfig, s.logger, s.purgeVaultAccessors)
   928  	if err != nil {
   929  		return err
   930  	}
   931  	s.vault = v
   932  	return nil
   933  }
   934  
   935  // setupRPC is used to setup the RPC listener
   936  func (s *Server) setupRPC(tlsWrap tlsutil.RegionWrapper) error {
   937  	// Populate the static RPC server
   938  	s.setupRpcServer(s.rpcServer, nil)
   939  
   940  	listener, err := s.createRPCListener()
   941  	if err != nil {
   942  		listener.Close()
   943  		return err
   944  	}
   945  
   946  	if s.config.ClientRPCAdvertise != nil {
   947  		s.clientRpcAdvertise = s.config.ClientRPCAdvertise
   948  	} else {
   949  		s.clientRpcAdvertise = s.rpcListener.Addr()
   950  	}
   951  
   952  	// Verify that we have a usable advertise address
   953  	clientAddr, ok := s.clientRpcAdvertise.(*net.TCPAddr)
   954  	if !ok {
   955  		listener.Close()
   956  		return fmt.Errorf("Client RPC advertise address is not a TCP Address: %v", clientAddr)
   957  	}
   958  	if clientAddr.IP.IsUnspecified() {
   959  		listener.Close()
   960  		return fmt.Errorf("Client RPC advertise address is not advertisable: %v", clientAddr)
   961  	}
   962  
   963  	if s.config.ServerRPCAdvertise != nil {
   964  		s.serverRpcAdvertise = s.config.ServerRPCAdvertise
   965  	} else {
   966  		// Default to the Serf Advertise + RPC Port
   967  		serfIP := s.config.SerfConfig.MemberlistConfig.AdvertiseAddr
   968  		if serfIP == "" {
   969  			serfIP = s.config.SerfConfig.MemberlistConfig.BindAddr
   970  		}
   971  
   972  		addr := net.JoinHostPort(serfIP, fmt.Sprintf("%d", clientAddr.Port))
   973  		resolved, err := net.ResolveTCPAddr("tcp", addr)
   974  		if err != nil {
   975  			return fmt.Errorf("Failed to resolve Server RPC advertise address: %v", err)
   976  		}
   977  
   978  		s.serverRpcAdvertise = resolved
   979  	}
   980  
   981  	// Verify that we have a usable advertise address
   982  	serverAddr, ok := s.serverRpcAdvertise.(*net.TCPAddr)
   983  	if !ok {
   984  		return fmt.Errorf("Server RPC advertise address is not a TCP Address: %v", serverAddr)
   985  	}
   986  	if serverAddr.IP.IsUnspecified() {
   987  		listener.Close()
   988  		return fmt.Errorf("Server RPC advertise address is not advertisable: %v", serverAddr)
   989  	}
   990  
   991  	wrapper := tlsutil.RegionSpecificWrapper(s.config.Region, tlsWrap)
   992  	s.raftLayer = NewRaftLayer(s.serverRpcAdvertise, wrapper)
   993  	return nil
   994  }
   995  
   996  // setupRpcServer is used to populate an RPC server with endpoints
   997  func (s *Server) setupRpcServer(server *rpc.Server, ctx *RPCContext) {
   998  	// Add the static endpoints to the RPC server.
   999  	if s.staticEndpoints.Status == nil {
  1000  		// Initialize the list just once
  1001  		s.staticEndpoints.ACL = &ACL{s}
  1002  		s.staticEndpoints.Alloc = &Alloc{s}
  1003  		s.staticEndpoints.Eval = &Eval{s}
  1004  		s.staticEndpoints.Job = &Job{s}
  1005  		s.staticEndpoints.Node = &Node{srv: s} // Add but don't register
  1006  		s.staticEndpoints.Deployment = &Deployment{srv: s}
  1007  		s.staticEndpoints.Operator = &Operator{s}
  1008  		s.staticEndpoints.Periodic = &Periodic{s}
  1009  		s.staticEndpoints.Plan = &Plan{s}
  1010  		s.staticEndpoints.Region = &Region{s}
  1011  		s.staticEndpoints.Status = &Status{s}
  1012  		s.staticEndpoints.System = &System{s}
  1013  		s.staticEndpoints.Search = &Search{s}
  1014  		s.staticEndpoints.Enterprise = NewEnterpriseEndpoints(s)
  1015  
  1016  		// Client endpoints
  1017  		s.staticEndpoints.ClientStats = &ClientStats{s}
  1018  		s.staticEndpoints.ClientAllocations = &ClientAllocations{s}
  1019  
  1020  		// Streaming endpoints
  1021  		s.staticEndpoints.FileSystem = &FileSystem{s}
  1022  		s.staticEndpoints.FileSystem.register()
  1023  	}
  1024  
  1025  	// Register the static handlers
  1026  	server.Register(s.staticEndpoints.ACL)
  1027  	server.Register(s.staticEndpoints.Alloc)
  1028  	server.Register(s.staticEndpoints.Eval)
  1029  	server.Register(s.staticEndpoints.Job)
  1030  	server.Register(s.staticEndpoints.Deployment)
  1031  	server.Register(s.staticEndpoints.Operator)
  1032  	server.Register(s.staticEndpoints.Periodic)
  1033  	server.Register(s.staticEndpoints.Plan)
  1034  	server.Register(s.staticEndpoints.Region)
  1035  	server.Register(s.staticEndpoints.Status)
  1036  	server.Register(s.staticEndpoints.System)
  1037  	server.Register(s.staticEndpoints.Search)
  1038  	s.staticEndpoints.Enterprise.Register(server)
  1039  	server.Register(s.staticEndpoints.ClientStats)
  1040  	server.Register(s.staticEndpoints.ClientAllocations)
  1041  	server.Register(s.staticEndpoints.FileSystem)
  1042  
  1043  	// Create new dynamic endpoints and add them to the RPC server.
  1044  	node := &Node{srv: s, ctx: ctx}
  1045  
  1046  	// Register the dynamic endpoints
  1047  	server.Register(node)
  1048  }
  1049  
  1050  // setupRaft is used to setup and initialize Raft
  1051  func (s *Server) setupRaft() error {
  1052  	// If we have an unclean exit then attempt to close the Raft store.
  1053  	defer func() {
  1054  		if s.raft == nil && s.raftStore != nil {
  1055  			if err := s.raftStore.Close(); err != nil {
  1056  				s.logger.Printf("[ERR] nomad: failed to close Raft store: %v", err)
  1057  			}
  1058  		}
  1059  	}()
  1060  
  1061  	// Create the FSM
  1062  	fsmConfig := &FSMConfig{
  1063  		EvalBroker: s.evalBroker,
  1064  		Periodic:   s.periodicDispatcher,
  1065  		Blocked:    s.blockedEvals,
  1066  		LogOutput:  s.config.LogOutput,
  1067  		Region:     s.Region(),
  1068  	}
  1069  	var err error
  1070  	s.fsm, err = NewFSM(fsmConfig)
  1071  	if err != nil {
  1072  		return err
  1073  	}
  1074  
  1075  	// Create a transport layer
  1076  	trans := raft.NewNetworkTransport(s.raftLayer, 3, s.config.RaftTimeout,
  1077  		s.config.LogOutput)
  1078  	s.raftTransport = trans
  1079  
  1080  	// Make sure we set the LogOutput.
  1081  	s.config.RaftConfig.LogOutput = s.config.LogOutput
  1082  
  1083  	// Our version of Raft protocol requires the LocalID to match the network
  1084  	// address of the transport.
  1085  	s.config.RaftConfig.LocalID = raft.ServerID(trans.LocalAddr())
  1086  	if s.config.RaftConfig.ProtocolVersion >= 3 {
  1087  		s.config.RaftConfig.LocalID = raft.ServerID(s.config.NodeID)
  1088  	}
  1089  
  1090  	// Build an all in-memory setup for dev mode, otherwise prepare a full
  1091  	// disk-based setup.
  1092  	var log raft.LogStore
  1093  	var stable raft.StableStore
  1094  	var snap raft.SnapshotStore
  1095  	if s.config.DevMode {
  1096  		store := raft.NewInmemStore()
  1097  		s.raftInmem = store
  1098  		stable = store
  1099  		log = store
  1100  		snap = raft.NewDiscardSnapshotStore()
  1101  
  1102  	} else {
  1103  		// Create the base raft path
  1104  		path := filepath.Join(s.config.DataDir, raftState)
  1105  		if err := ensurePath(path, true); err != nil {
  1106  			return err
  1107  		}
  1108  
  1109  		// Create the BoltDB backend
  1110  		store, err := raftboltdb.NewBoltStore(filepath.Join(path, "raft.db"))
  1111  		if err != nil {
  1112  			return err
  1113  		}
  1114  		s.raftStore = store
  1115  		stable = store
  1116  
  1117  		// Wrap the store in a LogCache to improve performance
  1118  		cacheStore, err := raft.NewLogCache(raftLogCacheSize, store)
  1119  		if err != nil {
  1120  			store.Close()
  1121  			return err
  1122  		}
  1123  		log = cacheStore
  1124  
  1125  		// Create the snapshot store
  1126  		snapshots, err := raft.NewFileSnapshotStore(path, snapshotsRetained, s.config.LogOutput)
  1127  		if err != nil {
  1128  			if s.raftStore != nil {
  1129  				s.raftStore.Close()
  1130  			}
  1131  			return err
  1132  		}
  1133  		snap = snapshots
  1134  
  1135  		// For an existing cluster being upgraded to the new version of
  1136  		// Raft, we almost never want to run recovery based on the old
  1137  		// peers.json file. We create a peers.info file with a helpful
  1138  		// note about where peers.json went, and use that as a sentinel
  1139  		// to avoid ingesting the old one that first time (if we have to
  1140  		// create the peers.info file because it's not there, we also
  1141  		// blow away any existing peers.json file).
  1142  		peersFile := filepath.Join(path, "peers.json")
  1143  		peersInfoFile := filepath.Join(path, "peers.info")
  1144  		if _, err := os.Stat(peersInfoFile); os.IsNotExist(err) {
  1145  			if err := ioutil.WriteFile(peersInfoFile, []byte(peersInfoContent), 0755); err != nil {
  1146  				return fmt.Errorf("failed to write peers.info file: %v", err)
  1147  			}
  1148  
  1149  			// Blow away the peers.json file if present, since the
  1150  			// peers.info sentinel wasn't there.
  1151  			if _, err := os.Stat(peersFile); err == nil {
  1152  				if err := os.Remove(peersFile); err != nil {
  1153  					return fmt.Errorf("failed to delete peers.json, please delete manually (see peers.info for details): %v", err)
  1154  				}
  1155  				s.logger.Printf("[INFO] nomad: deleted peers.json file (see peers.info for details)")
  1156  			}
  1157  		} else if _, err := os.Stat(peersFile); err == nil {
  1158  			s.logger.Printf("[INFO] nomad: found peers.json file, recovering Raft configuration...")
  1159  			configuration, err := raft.ReadPeersJSON(peersFile)
  1160  			if err != nil {
  1161  				return fmt.Errorf("recovery failed to parse peers.json: %v", err)
  1162  			}
  1163  			tmpFsm, err := NewFSM(fsmConfig)
  1164  			if err != nil {
  1165  				return fmt.Errorf("recovery failed to make temp FSM: %v", err)
  1166  			}
  1167  			if err := raft.RecoverCluster(s.config.RaftConfig, tmpFsm,
  1168  				log, stable, snap, trans, configuration); err != nil {
  1169  				return fmt.Errorf("recovery failed: %v", err)
  1170  			}
  1171  			if err := os.Remove(peersFile); err != nil {
  1172  				return fmt.Errorf("recovery failed to delete peers.json, please delete manually (see peers.info for details): %v", err)
  1173  			}
  1174  			s.logger.Printf("[INFO] nomad: deleted peers.json file after successful recovery")
  1175  		}
  1176  	}
  1177  
  1178  	// If we are in bootstrap or dev mode and the state is clean then we can
  1179  	// bootstrap now.
  1180  	if s.config.Bootstrap || s.config.DevMode {
  1181  		hasState, err := raft.HasExistingState(log, stable, snap)
  1182  		if err != nil {
  1183  			return err
  1184  		}
  1185  		if !hasState {
  1186  			configuration := raft.Configuration{
  1187  				Servers: []raft.Server{
  1188  					{
  1189  						ID:      s.config.RaftConfig.LocalID,
  1190  						Address: trans.LocalAddr(),
  1191  					},
  1192  				},
  1193  			}
  1194  			if err := raft.BootstrapCluster(s.config.RaftConfig,
  1195  				log, stable, snap, trans, configuration); err != nil {
  1196  				return err
  1197  			}
  1198  		}
  1199  	}
  1200  
  1201  	// Setup the leader channel
  1202  	leaderCh := make(chan bool, 1)
  1203  	s.config.RaftConfig.NotifyCh = leaderCh
  1204  	s.leaderCh = leaderCh
  1205  
  1206  	// Setup the Raft store
  1207  	s.raft, err = raft.NewRaft(s.config.RaftConfig, s.fsm, log, stable, snap, trans)
  1208  	if err != nil {
  1209  		return err
  1210  	}
  1211  	return nil
  1212  }
  1213  
  1214  // setupSerf is used to setup and initialize a Serf
  1215  func (s *Server) setupSerf(conf *serf.Config, ch chan serf.Event, path string) (*serf.Serf, error) {
  1216  	conf.Init()
  1217  	conf.NodeName = fmt.Sprintf("%s.%s", s.config.NodeName, s.config.Region)
  1218  	conf.Tags["role"] = "nomad"
  1219  	conf.Tags["region"] = s.config.Region
  1220  	conf.Tags["dc"] = s.config.Datacenter
  1221  	conf.Tags["vsn"] = fmt.Sprintf("%d", structs.ApiMajorVersion)
  1222  	conf.Tags["mvn"] = fmt.Sprintf("%d", structs.ApiMinorVersion)
  1223  	conf.Tags["build"] = s.config.Build
  1224  	conf.Tags["raft_vsn"] = fmt.Sprintf("%d", s.config.RaftConfig.ProtocolVersion)
  1225  	conf.Tags["id"] = s.config.NodeID
  1226  	conf.Tags["rpc_addr"] = s.clientRpcAdvertise.(*net.TCPAddr).IP.String()         // Address that clients will use to RPC to servers
  1227  	conf.Tags["port"] = fmt.Sprintf("%d", s.serverRpcAdvertise.(*net.TCPAddr).Port) // Port servers use to RPC to one and another
  1228  	if s.config.Bootstrap || (s.config.DevMode && !s.config.DevDisableBootstrap) {
  1229  		conf.Tags["bootstrap"] = "1"
  1230  	}
  1231  	bootstrapExpect := atomic.LoadInt32(&s.config.BootstrapExpect)
  1232  	if bootstrapExpect != 0 {
  1233  		conf.Tags["expect"] = fmt.Sprintf("%d", bootstrapExpect)
  1234  	}
  1235  	if s.config.NonVoter {
  1236  		conf.Tags["nonvoter"] = "1"
  1237  	}
  1238  	if s.config.RedundancyZone != "" {
  1239  		conf.Tags[AutopilotRZTag] = s.config.RedundancyZone
  1240  	}
  1241  	if s.config.UpgradeVersion != "" {
  1242  		conf.Tags[AutopilotVersionTag] = s.config.UpgradeVersion
  1243  	}
  1244  	conf.MemberlistConfig.LogOutput = s.config.LogOutput
  1245  	conf.LogOutput = s.config.LogOutput
  1246  	conf.EventCh = ch
  1247  	if !s.config.DevMode {
  1248  		conf.SnapshotPath = filepath.Join(s.config.DataDir, path)
  1249  		if err := ensurePath(conf.SnapshotPath, false); err != nil {
  1250  			return nil, err
  1251  		}
  1252  	}
  1253  	conf.ProtocolVersion = protocolVersionMap[s.config.ProtocolVersion]
  1254  	conf.RejoinAfterLeave = true
  1255  	// LeavePropagateDelay is used to make sure broadcasted leave intents propagate
  1256  	// This value was tuned using https://www.serf.io/docs/internals/simulator.html to
  1257  	// allow for convergence in 99.9% of nodes in a 10 node cluster
  1258  	conf.LeavePropagateDelay = 1 * time.Second
  1259  	conf.Merge = &serfMergeDelegate{}
  1260  
  1261  	// Until Nomad supports this fully, we disable automatic resolution.
  1262  	// When enabled, the Serf gossip may just turn off if we are the minority
  1263  	// node which is rather unexpected.
  1264  	conf.EnableNameConflictResolution = false
  1265  	return serf.Create(conf)
  1266  }
  1267  
  1268  // setupWorkers is used to start the scheduling workers
  1269  func (s *Server) setupWorkers() error {
  1270  	// Check if all the schedulers are disabled
  1271  	if len(s.config.EnabledSchedulers) == 0 || s.config.NumSchedulers == 0 {
  1272  		s.logger.Printf("[WARN] nomad: no enabled schedulers")
  1273  		return nil
  1274  	}
  1275  
  1276  	// Check if the core scheduler is not enabled
  1277  	foundCore := false
  1278  	for _, sched := range s.config.EnabledSchedulers {
  1279  		if sched == structs.JobTypeCore {
  1280  			foundCore = true
  1281  			continue
  1282  		}
  1283  
  1284  		if _, ok := scheduler.BuiltinSchedulers[sched]; !ok {
  1285  			return fmt.Errorf("invalid configuration: unknown scheduler %q in enabled schedulers", sched)
  1286  		}
  1287  	}
  1288  	if !foundCore {
  1289  		return fmt.Errorf("invalid configuration: %q scheduler not enabled", structs.JobTypeCore)
  1290  	}
  1291  
  1292  	// Start the workers
  1293  	for i := 0; i < s.config.NumSchedulers; i++ {
  1294  		if w, err := NewWorker(s); err != nil {
  1295  			return err
  1296  		} else {
  1297  			s.workers = append(s.workers, w)
  1298  		}
  1299  	}
  1300  	s.logger.Printf("[INFO] nomad: starting %d scheduling worker(s) for %v",
  1301  		s.config.NumSchedulers, s.config.EnabledSchedulers)
  1302  	return nil
  1303  }
  1304  
  1305  // numPeers is used to check on the number of known peers, including the local
  1306  // node.
  1307  func (s *Server) numPeers() (int, error) {
  1308  	future := s.raft.GetConfiguration()
  1309  	if err := future.Error(); err != nil {
  1310  		return 0, err
  1311  	}
  1312  	configuration := future.Configuration()
  1313  	return len(configuration.Servers), nil
  1314  }
  1315  
  1316  // IsLeader checks if this server is the cluster leader
  1317  func (s *Server) IsLeader() bool {
  1318  	return s.raft.State() == raft.Leader
  1319  }
  1320  
  1321  // Join is used to have Nomad join the gossip ring
  1322  // The target address should be another node listening on the
  1323  // Serf address
  1324  func (s *Server) Join(addrs []string) (int, error) {
  1325  	return s.serf.Join(addrs, true)
  1326  }
  1327  
  1328  // LocalMember is used to return the local node
  1329  func (c *Server) LocalMember() serf.Member {
  1330  	return c.serf.LocalMember()
  1331  }
  1332  
  1333  // Members is used to return the members of the serf cluster
  1334  func (s *Server) Members() []serf.Member {
  1335  	return s.serf.Members()
  1336  }
  1337  
  1338  // RemoveFailedNode is used to remove a failed node from the cluster
  1339  func (s *Server) RemoveFailedNode(node string) error {
  1340  	return s.serf.RemoveFailedNode(node)
  1341  }
  1342  
  1343  // KeyManager returns the Serf keyring manager
  1344  func (s *Server) KeyManager() *serf.KeyManager {
  1345  	return s.serf.KeyManager()
  1346  }
  1347  
  1348  // Encrypted determines if gossip is encrypted
  1349  func (s *Server) Encrypted() bool {
  1350  	return s.serf.EncryptionEnabled()
  1351  }
  1352  
  1353  // State returns the underlying state store. This should *not*
  1354  // be used to modify state directly.
  1355  func (s *Server) State() *state.StateStore {
  1356  	return s.fsm.State()
  1357  }
  1358  
  1359  // setLeaderAcl stores the given ACL token as the current leader's ACL token.
  1360  func (s *Server) setLeaderAcl(token string) {
  1361  	s.leaderAclLock.Lock()
  1362  	s.leaderAcl = token
  1363  	s.leaderAclLock.Unlock()
  1364  }
  1365  
  1366  // getLeaderAcl retrieves the leader's ACL token
  1367  func (s *Server) getLeaderAcl() string {
  1368  	s.leaderAclLock.Lock()
  1369  	defer s.leaderAclLock.Unlock()
  1370  	return s.leaderAcl
  1371  }
  1372  
  1373  // Regions returns the known regions in the cluster.
  1374  func (s *Server) Regions() []string {
  1375  	s.peerLock.RLock()
  1376  	defer s.peerLock.RUnlock()
  1377  
  1378  	regions := make([]string, 0, len(s.peers))
  1379  	for region := range s.peers {
  1380  		regions = append(regions, region)
  1381  	}
  1382  	sort.Strings(regions)
  1383  	return regions
  1384  }
  1385  
  1386  // RPC is used to make a local RPC call
  1387  func (s *Server) RPC(method string, args interface{}, reply interface{}) error {
  1388  	codec := &codec.InmemCodec{
  1389  		Method: method,
  1390  		Args:   args,
  1391  		Reply:  reply,
  1392  	}
  1393  	if err := s.rpcServer.ServeRequest(codec); err != nil {
  1394  		return err
  1395  	}
  1396  	return codec.Err
  1397  }
  1398  
  1399  // StreamingRpcHandler is used to make a streaming RPC call.
  1400  func (s *Server) StreamingRpcHandler(method string) (structs.StreamingRpcHandler, error) {
  1401  	return s.streamingRpcs.GetHandler(method)
  1402  }
  1403  
  1404  // Stats is used to return statistics for debugging and insight
  1405  // for various sub-systems
  1406  func (s *Server) Stats() map[string]map[string]string {
  1407  	toString := func(v uint64) string {
  1408  		return strconv.FormatUint(v, 10)
  1409  	}
  1410  	stats := map[string]map[string]string{
  1411  		"nomad": {
  1412  			"server":        "true",
  1413  			"leader":        fmt.Sprintf("%v", s.IsLeader()),
  1414  			"leader_addr":   string(s.raft.Leader()),
  1415  			"bootstrap":     fmt.Sprintf("%v", s.config.Bootstrap),
  1416  			"known_regions": toString(uint64(len(s.peers))),
  1417  		},
  1418  		"raft":    s.raft.Stats(),
  1419  		"serf":    s.serf.Stats(),
  1420  		"runtime": stats.RuntimeStats(),
  1421  	}
  1422  
  1423  	return stats
  1424  }
  1425  
  1426  // Region returns the region of the server
  1427  func (s *Server) Region() string {
  1428  	return s.config.Region
  1429  }
  1430  
  1431  // Datacenter returns the data center of the server
  1432  func (s *Server) Datacenter() string {
  1433  	return s.config.Datacenter
  1434  }
  1435  
  1436  // GetConfig returns the config of the server for testing purposes only
  1437  func (s *Server) GetConfig() *Config {
  1438  	return s.config
  1439  }
  1440  
  1441  // ReplicationToken returns the token used for replication. We use a method to support
  1442  // dynamic reloading of this value later.
  1443  func (s *Server) ReplicationToken() string {
  1444  	return s.config.ReplicationToken
  1445  }
  1446  
  1447  // peersInfoContent is used to help operators understand what happened to the
  1448  // peers.json file. This is written to a file called peers.info in the same
  1449  // location.
  1450  const peersInfoContent = `
  1451  As of Nomad 0.5.5, the peers.json file is only used for recovery
  1452  after an outage. It should be formatted as a JSON array containing the address
  1453  and port (RPC) of each Nomad server in the cluster, like this:
  1454  
  1455  ["10.1.0.1:4647","10.1.0.2:4647","10.1.0.3:4647"]
  1456  
  1457  Under normal operation, the peers.json file will not be present.
  1458  
  1459  When Nomad starts for the first time, it will create this peers.info file and
  1460  delete any existing peers.json file so that recovery doesn't occur on the first
  1461  startup.
  1462  
  1463  Once this peers.info file is present, any peers.json file will be ingested at
  1464  startup, and will set the Raft peer configuration manually to recover from an
  1465  outage. It's crucial that all servers in the cluster are shut down before
  1466  creating the peers.json file, and that all servers receive the same
  1467  configuration. Once the peers.json file is successfully ingested and applied, it
  1468  will be deleted.
  1469  
  1470  Please see https://www.nomadproject.io/guides/outage.html for more information.
  1471  `