github.com/taylorchu/nomad@v0.5.3-rc1.0.20170407200202-db11e7dd7b55/nomad/server.go (about)

     1  package nomad
     2  
     3  import (
     4  	"crypto/tls"
     5  	"errors"
     6  	"fmt"
     7  	"io/ioutil"
     8  	"log"
     9  	"net"
    10  	"net/rpc"
    11  	"os"
    12  	"path/filepath"
    13  	"reflect"
    14  	"sort"
    15  	"strconv"
    16  	"sync"
    17  	"sync/atomic"
    18  	"time"
    19  
    20  	consulapi "github.com/hashicorp/consul/api"
    21  	"github.com/hashicorp/consul/lib"
    22  	"github.com/hashicorp/go-multierror"
    23  	"github.com/hashicorp/nomad/command/agent/consul"
    24  	"github.com/hashicorp/nomad/helper/tlsutil"
    25  	"github.com/hashicorp/nomad/nomad/state"
    26  	"github.com/hashicorp/nomad/nomad/structs"
    27  	"github.com/hashicorp/raft"
    28  	"github.com/hashicorp/raft-boltdb"
    29  	"github.com/hashicorp/serf/serf"
    30  )
    31  
    32  const (
    33  	// datacenterQueryLimit sets the max number of DCs that a Nomad
    34  	// Server will query to find bootstrap_expect servers.
    35  	datacenterQueryLimit = 25
    36  
    37  	// maxStaleLeadership is the maximum time we will permit this Nomad
    38  	// Server to go without seeing a valid Raft leader.
    39  	maxStaleLeadership = 15 * time.Second
    40  
    41  	// peersPollInterval is used as the polling interval between attempts
    42  	// to query Consul for Nomad Servers.
    43  	peersPollInterval = 45 * time.Second
    44  
    45  	// peersPollJitter is used to provide a slight amount of variance to
    46  	// the retry interval when querying Consul Servers
    47  	peersPollJitterFactor = 2
    48  
    49  	raftState         = "raft/"
    50  	serfSnapshot      = "serf/snapshot"
    51  	snapshotsRetained = 2
    52  
    53  	// serverRPCCache controls how long we keep an idle connection open to a server
    54  	serverRPCCache = 2 * time.Minute
    55  
    56  	// serverMaxStreams controsl how many idle streams we keep open to a server
    57  	serverMaxStreams = 64
    58  
    59  	// raftLogCacheSize is the maximum number of logs to cache in-memory.
    60  	// This is used to reduce disk I/O for the recently committed entries.
    61  	raftLogCacheSize = 512
    62  
    63  	// raftRemoveGracePeriod is how long we wait to allow a RemovePeer
    64  	// to replicate to gracefully leave the cluster.
    65  	raftRemoveGracePeriod = 5 * time.Second
    66  )
    67  
    68  // Server is Nomad server which manages the job queues,
    69  // schedulers, and notification bus for agents.
    70  type Server struct {
    71  	config *Config
    72  	logger *log.Logger
    73  
    74  	// Connection pool to other Nomad servers
    75  	connPool *ConnPool
    76  
    77  	// Endpoints holds our RPC endpoints
    78  	endpoints endpoints
    79  
    80  	// The raft instance is used among Nomad nodes within the
    81  	// region to protect operations that require strong consistency
    82  	leaderCh      <-chan bool
    83  	raft          *raft.Raft
    84  	raftLayer     *RaftLayer
    85  	raftStore     *raftboltdb.BoltStore
    86  	raftInmem     *raft.InmemStore
    87  	raftTransport *raft.NetworkTransport
    88  
    89  	// fsm is the state machine used with Raft
    90  	fsm *nomadFSM
    91  
    92  	// rpcListener is used to listen for incoming connections
    93  	rpcListener  net.Listener
    94  	rpcServer    *rpc.Server
    95  	rpcAdvertise net.Addr
    96  
    97  	// rpcTLS is the TLS config for incoming TLS requests
    98  	rpcTLS *tls.Config
    99  
   100  	// peers is used to track the known Nomad servers. This is
   101  	// used for region forwarding and clustering.
   102  	peers      map[string][]*serverParts
   103  	localPeers map[raft.ServerAddress]*serverParts
   104  	peerLock   sync.RWMutex
   105  
   106  	// serf is the Serf cluster containing only Nomad
   107  	// servers. This is used for multi-region federation
   108  	// and automatic clustering within regions.
   109  	serf *serf.Serf
   110  
   111  	// reconcileCh is used to pass events from the serf handler
   112  	// into the leader manager. Mostly used to handle when servers
   113  	// join/leave from the region.
   114  	reconcileCh chan serf.Member
   115  
   116  	// eventCh is used to receive events from the serf cluster
   117  	eventCh chan serf.Event
   118  
   119  	// evalBroker is used to manage the in-progress evaluations
   120  	// that are waiting to be brokered to a sub-scheduler
   121  	evalBroker *EvalBroker
   122  
   123  	// BlockedEvals is used to manage evaluations that are blocked on node
   124  	// capacity changes.
   125  	blockedEvals *BlockedEvals
   126  
   127  	// planQueue is used to manage the submitted allocation
   128  	// plans that are waiting to be assessed by the leader
   129  	planQueue *PlanQueue
   130  
   131  	// periodicDispatcher is used to track and create evaluations for periodic jobs.
   132  	periodicDispatcher *PeriodicDispatch
   133  
   134  	// heartbeatTimers track the expiration time of each heartbeat that has
   135  	// a TTL. On expiration, the node status is updated to be 'down'.
   136  	heartbeatTimers     map[string]*time.Timer
   137  	heartbeatTimersLock sync.Mutex
   138  
   139  	// consulSyncer advertises this Nomad Agent with Consul
   140  	consulSyncer *consul.Syncer
   141  
   142  	// vault is the client for communicating with Vault.
   143  	vault VaultClient
   144  
   145  	// Worker used for processing
   146  	workers []*Worker
   147  
   148  	left         bool
   149  	shutdown     bool
   150  	shutdownCh   chan struct{}
   151  	shutdownLock sync.Mutex
   152  }
   153  
   154  // Holds the RPC endpoints
   155  type endpoints struct {
   156  	Status   *Status
   157  	Node     *Node
   158  	Job      *Job
   159  	Eval     *Eval
   160  	Plan     *Plan
   161  	Alloc    *Alloc
   162  	Region   *Region
   163  	Periodic *Periodic
   164  	System   *System
   165  	Operator *Operator
   166  }
   167  
   168  // NewServer is used to construct a new Nomad server from the
   169  // configuration, potentially returning an error
   170  func NewServer(config *Config, consulSyncer *consul.Syncer, logger *log.Logger) (*Server, error) {
   171  	// Check the protocol version
   172  	if err := config.CheckVersion(); err != nil {
   173  		return nil, err
   174  	}
   175  
   176  	// Create an eval broker
   177  	evalBroker, err := NewEvalBroker(config.EvalNackTimeout, config.EvalDeliveryLimit)
   178  	if err != nil {
   179  		return nil, err
   180  	}
   181  
   182  	// Create a new blocked eval tracker.
   183  	blockedEvals := NewBlockedEvals(evalBroker)
   184  
   185  	// Create a plan queue
   186  	planQueue, err := NewPlanQueue()
   187  	if err != nil {
   188  		return nil, err
   189  	}
   190  
   191  	// Configure TLS
   192  	var tlsWrap tlsutil.RegionWrapper
   193  	var incomingTLS *tls.Config
   194  	if config.TLSConfig.EnableRPC {
   195  		tlsConf := config.tlsConfig()
   196  		tw, err := tlsConf.OutgoingTLSWrapper()
   197  		if err != nil {
   198  			return nil, err
   199  		}
   200  		tlsWrap = tw
   201  
   202  		itls, err := tlsConf.IncomingTLSConfig()
   203  		if err != nil {
   204  			return nil, err
   205  		}
   206  		incomingTLS = itls
   207  	}
   208  
   209  	// Create the server
   210  	s := &Server{
   211  		config:       config,
   212  		consulSyncer: consulSyncer,
   213  		connPool:     NewPool(config.LogOutput, serverRPCCache, serverMaxStreams, tlsWrap),
   214  		logger:       logger,
   215  		rpcServer:    rpc.NewServer(),
   216  		peers:        make(map[string][]*serverParts),
   217  		localPeers:   make(map[raft.ServerAddress]*serverParts),
   218  		reconcileCh:  make(chan serf.Member, 32),
   219  		eventCh:      make(chan serf.Event, 256),
   220  		evalBroker:   evalBroker,
   221  		blockedEvals: blockedEvals,
   222  		planQueue:    planQueue,
   223  		rpcTLS:       incomingTLS,
   224  		shutdownCh:   make(chan struct{}),
   225  	}
   226  
   227  	// Create the periodic dispatcher for launching periodic jobs.
   228  	s.periodicDispatcher = NewPeriodicDispatch(s.logger, s)
   229  
   230  	// Setup Vault
   231  	if err := s.setupVaultClient(); err != nil {
   232  		s.Shutdown()
   233  		s.logger.Printf("[ERR] nomad: failed to setup Vault client: %v", err)
   234  		return nil, fmt.Errorf("Failed to setup Vault client: %v", err)
   235  	}
   236  
   237  	// Initialize the RPC layer
   238  	if err := s.setupRPC(tlsWrap); err != nil {
   239  		s.Shutdown()
   240  		s.logger.Printf("[ERR] nomad: failed to start RPC layer: %s", err)
   241  		return nil, fmt.Errorf("Failed to start RPC layer: %v", err)
   242  	}
   243  
   244  	// Initialize the Raft server
   245  	if err := s.setupRaft(); err != nil {
   246  		s.Shutdown()
   247  		s.logger.Printf("[ERR] nomad: failed to start Raft: %s", err)
   248  		return nil, fmt.Errorf("Failed to start Raft: %v", err)
   249  	}
   250  
   251  	// Initialize the wan Serf
   252  	s.serf, err = s.setupSerf(config.SerfConfig, s.eventCh, serfSnapshot)
   253  	if err != nil {
   254  		s.Shutdown()
   255  		s.logger.Printf("[ERR] nomad: failed to start serf WAN: %s", err)
   256  		return nil, fmt.Errorf("Failed to start serf: %v", err)
   257  	}
   258  
   259  	// Initialize the scheduling workers
   260  	if err := s.setupWorkers(); err != nil {
   261  		s.Shutdown()
   262  		s.logger.Printf("[ERR] nomad: failed to start workers: %s", err)
   263  		return nil, fmt.Errorf("Failed to start workers: %v", err)
   264  	}
   265  
   266  	// Setup the Consul syncer
   267  	if err := s.setupConsulSyncer(); err != nil {
   268  		return nil, fmt.Errorf("failed to create server Consul syncer: %v", err)
   269  	}
   270  
   271  	// Monitor leadership changes
   272  	go s.monitorLeadership()
   273  
   274  	// Start ingesting events for Serf
   275  	go s.serfEventHandler()
   276  
   277  	// Start the RPC listeners
   278  	go s.listen()
   279  
   280  	// Emit metrics for the eval broker
   281  	go evalBroker.EmitStats(time.Second, s.shutdownCh)
   282  
   283  	// Emit metrics for the plan queue
   284  	go planQueue.EmitStats(time.Second, s.shutdownCh)
   285  
   286  	// Emit metrics for the blocked eval tracker.
   287  	go blockedEvals.EmitStats(time.Second, s.shutdownCh)
   288  
   289  	// Emit metrics for the Vault client.
   290  	go s.vault.EmitStats(time.Second, s.shutdownCh)
   291  
   292  	// Emit metrics
   293  	go s.heartbeatStats()
   294  
   295  	// Done
   296  	return s, nil
   297  }
   298  
   299  // Shutdown is used to shutdown the server
   300  func (s *Server) Shutdown() error {
   301  	s.logger.Printf("[INFO] nomad: shutting down server")
   302  	s.shutdownLock.Lock()
   303  	defer s.shutdownLock.Unlock()
   304  
   305  	if s.shutdown {
   306  		return nil
   307  	}
   308  
   309  	s.shutdown = true
   310  	close(s.shutdownCh)
   311  
   312  	if s.serf != nil {
   313  		s.serf.Shutdown()
   314  	}
   315  
   316  	if s.raft != nil {
   317  		s.raftTransport.Close()
   318  		s.raftLayer.Close()
   319  		future := s.raft.Shutdown()
   320  		if err := future.Error(); err != nil {
   321  			s.logger.Printf("[WARN] nomad: Error shutting down raft: %s", err)
   322  		}
   323  		if s.raftStore != nil {
   324  			s.raftStore.Close()
   325  		}
   326  	}
   327  
   328  	// Shutdown the RPC listener
   329  	if s.rpcListener != nil {
   330  		s.rpcListener.Close()
   331  	}
   332  
   333  	// Close the connection pool
   334  	s.connPool.Shutdown()
   335  
   336  	// Close the fsm
   337  	if s.fsm != nil {
   338  		s.fsm.Close()
   339  	}
   340  
   341  	// Stop Vault token renewal
   342  	if s.vault != nil {
   343  		s.vault.Stop()
   344  	}
   345  
   346  	return nil
   347  }
   348  
   349  // IsShutdown checks if the server is shutdown
   350  func (s *Server) IsShutdown() bool {
   351  	select {
   352  	case <-s.shutdownCh:
   353  		return true
   354  	default:
   355  		return false
   356  	}
   357  }
   358  
   359  // Leave is used to prepare for a graceful shutdown of the server
   360  func (s *Server) Leave() error {
   361  	s.logger.Printf("[INFO] nomad: server starting leave")
   362  	s.left = true
   363  
   364  	// Check the number of known peers
   365  	numPeers, err := s.numPeers()
   366  	if err != nil {
   367  		s.logger.Printf("[ERR] nomad: failed to check raft peers: %v", err)
   368  		return err
   369  	}
   370  
   371  	// TODO (alexdadgar) - This will need to be updated once we support node
   372  	// IDs.
   373  	addr := s.raftTransport.LocalAddr()
   374  
   375  	// If we are the current leader, and we have any other peers (cluster has multiple
   376  	// servers), we should do a RemovePeer to safely reduce the quorum size. If we are
   377  	// not the leader, then we should issue our leave intention and wait to be removed
   378  	// for some sane period of time.
   379  	isLeader := s.IsLeader()
   380  	if isLeader && numPeers > 1 {
   381  		future := s.raft.RemovePeer(addr)
   382  		if err := future.Error(); err != nil {
   383  			s.logger.Printf("[ERR] nomad: failed to remove ourself as raft peer: %v", err)
   384  		}
   385  	}
   386  
   387  	// Leave the gossip pool
   388  	if s.serf != nil {
   389  		if err := s.serf.Leave(); err != nil {
   390  			s.logger.Printf("[ERR] nomad: failed to leave Serf cluster: %v", err)
   391  		}
   392  	}
   393  
   394  	// If we were not leader, wait to be safely removed from the cluster.
   395  	// We must wait to allow the raft replication to take place, otherwise
   396  	// an immediate shutdown could cause a loss of quorum.
   397  	if !isLeader {
   398  		left := false
   399  		limit := time.Now().Add(raftRemoveGracePeriod)
   400  		for !left && time.Now().Before(limit) {
   401  			// Sleep a while before we check.
   402  			time.Sleep(50 * time.Millisecond)
   403  
   404  			// Get the latest configuration.
   405  			future := s.raft.GetConfiguration()
   406  			if err := future.Error(); err != nil {
   407  				s.logger.Printf("[ERR] nomad: failed to get raft configuration: %v", err)
   408  				break
   409  			}
   410  
   411  			// See if we are no longer included.
   412  			left = true
   413  			for _, server := range future.Configuration().Servers {
   414  				if server.Address == addr {
   415  					left = false
   416  					break
   417  				}
   418  			}
   419  		}
   420  
   421  		// TODO (alexdadgar) With the old Raft library we used to force the
   422  		// peers set to empty when a graceful leave occurred. This would
   423  		// keep voting spam down if the server was restarted, but it was
   424  		// dangerous because the peers was inconsistent with the logs and
   425  		// snapshots, so it wasn't really safe in all cases for the server
   426  		// to become leader. This is now safe, but the log spam is noisy.
   427  		// The next new version of the library will have a "you are not a
   428  		// peer stop it" behavior that should address this. We will have
   429  		// to evaluate during the RC period if this interim situation is
   430  		// not too confusing for operators.
   431  
   432  		// TODO (alexdadgar) When we take a later new version of the Raft
   433  		// library it won't try to complete replication, so this peer
   434  		// may not realize that it has been removed. Need to revisit this
   435  		// and the warning here.
   436  		if !left {
   437  			s.logger.Printf("[WARN] nomad: failed to leave raft configuration gracefully, timeout")
   438  		}
   439  	}
   440  	return nil
   441  }
   442  
   443  // Reload handles a config reload. Not all config fields can handle a reload.
   444  func (s *Server) Reload(config *Config) error {
   445  	if config == nil {
   446  		return fmt.Errorf("Reload given a nil config")
   447  	}
   448  
   449  	var mErr multierror.Error
   450  
   451  	// Handle the Vault reload. Vault should never be nil but just guard.
   452  	if s.vault != nil {
   453  		if err := s.vault.SetConfig(config.VaultConfig); err != nil {
   454  			multierror.Append(&mErr, err)
   455  		}
   456  	}
   457  
   458  	return mErr.ErrorOrNil()
   459  }
   460  
   461  // setupBootstrapHandler() creates the closure necessary to support a Consul
   462  // fallback handler.
   463  func (s *Server) setupBootstrapHandler() error {
   464  	// peersTimeout is used to indicate to the Consul Syncer that the
   465  	// current Nomad Server has a stale peer set.  peersTimeout will time
   466  	// out if the Consul Syncer bootstrapFn has not observed a Raft
   467  	// leader in maxStaleLeadership.  If peersTimeout has been triggered,
   468  	// the Consul Syncer will begin querying Consul for other Nomad
   469  	// Servers.
   470  	//
   471  	// NOTE: time.Timer is used vs time.Time in order to handle clock
   472  	// drift because time.Timer is implemented as a monotonic clock.
   473  	var peersTimeout *time.Timer = time.NewTimer(0)
   474  
   475  	// consulQueryCount is the number of times the bootstrapFn has been
   476  	// called, regardless of success.
   477  	var consulQueryCount uint64
   478  
   479  	// leadershipTimedOut is a helper method that returns true if the
   480  	// peersTimeout timer has expired.
   481  	leadershipTimedOut := func() bool {
   482  		select {
   483  		case <-peersTimeout.C:
   484  			return true
   485  		default:
   486  			return false
   487  		}
   488  	}
   489  
   490  	// The bootstrapFn callback handler is used to periodically poll
   491  	// Consul to look up the Nomad Servers in Consul.  In the event the
   492  	// server has been brought up without a `retry-join` configuration
   493  	// and this Server is partitioned from the rest of the cluster,
   494  	// periodically poll Consul to reattach this Server to other servers
   495  	// in the same region and automatically reform a quorum (assuming the
   496  	// correct number of servers required for quorum are present).
   497  	bootstrapFn := func() error {
   498  		// If there is a raft leader, do nothing
   499  		if s.raft.Leader() != "" {
   500  			peersTimeout.Reset(maxStaleLeadership)
   501  			return nil
   502  		}
   503  
   504  		// (ab)use serf.go's behavior of setting BootstrapExpect to
   505  		// zero if we have bootstrapped.  If we have bootstrapped
   506  		bootstrapExpect := atomic.LoadInt32(&s.config.BootstrapExpect)
   507  		if bootstrapExpect == 0 {
   508  			// This Nomad Server has been bootstrapped.  Rely on
   509  			// the peersTimeout firing as a guard to prevent
   510  			// aggressive querying of Consul.
   511  			if !leadershipTimedOut() {
   512  				return nil
   513  			}
   514  		} else {
   515  			if consulQueryCount > 0 && !leadershipTimedOut() {
   516  				return nil
   517  			}
   518  
   519  			// This Nomad Server has not been bootstrapped, reach
   520  			// out to Consul if our peer list is less than
   521  			// `bootstrap_expect`.
   522  			raftPeers, err := s.numPeers()
   523  			if err != nil {
   524  				peersTimeout.Reset(peersPollInterval + lib.RandomStagger(peersPollInterval/peersPollJitterFactor))
   525  				return nil
   526  			}
   527  
   528  			// The necessary number of Nomad Servers required for
   529  			// quorum has been reached, we do not need to poll
   530  			// Consul.  Let the normal timeout-based strategy
   531  			// take over.
   532  			if raftPeers >= int(bootstrapExpect) {
   533  				peersTimeout.Reset(peersPollInterval + lib.RandomStagger(peersPollInterval/peersPollJitterFactor))
   534  				return nil
   535  			}
   536  		}
   537  		consulQueryCount++
   538  
   539  		s.logger.Printf("[DEBUG] server.nomad: lost contact with Nomad quorum, falling back to Consul for server list")
   540  
   541  		consulCatalog := s.consulSyncer.ConsulClient().Catalog()
   542  		dcs, err := consulCatalog.Datacenters()
   543  		if err != nil {
   544  			peersTimeout.Reset(peersPollInterval + lib.RandomStagger(peersPollInterval/peersPollJitterFactor))
   545  			return fmt.Errorf("server.nomad: unable to query Consul datacenters: %v", err)
   546  		}
   547  		if len(dcs) > 2 {
   548  			// Query the local DC first, then shuffle the
   549  			// remaining DCs.  If additional calls to bootstrapFn
   550  			// are necessary, this Nomad Server will eventually
   551  			// walk all datacenter until it finds enough hosts to
   552  			// form a quorum.
   553  			shuffleStrings(dcs[1:])
   554  			dcs = dcs[0:lib.MinInt(len(dcs), datacenterQueryLimit)]
   555  		}
   556  
   557  		nomadServerServiceName := s.config.ConsulConfig.ServerServiceName
   558  		var mErr multierror.Error
   559  		const defaultMaxNumNomadServers = 8
   560  		nomadServerServices := make([]string, 0, defaultMaxNumNomadServers)
   561  		localNode := s.serf.Memberlist().LocalNode()
   562  		for _, dc := range dcs {
   563  			consulOpts := &consulapi.QueryOptions{
   564  				AllowStale: true,
   565  				Datacenter: dc,
   566  				Near:       "_agent",
   567  				WaitTime:   consul.DefaultQueryWaitDuration,
   568  			}
   569  			consulServices, _, err := consulCatalog.Service(nomadServerServiceName, consul.ServiceTagSerf, consulOpts)
   570  			if err != nil {
   571  				err := fmt.Errorf("failed to query service %q in Consul datacenter %q: %v", nomadServerServiceName, dc, err)
   572  				s.logger.Printf("[WARN] server.nomad: %v", err)
   573  				mErr.Errors = append(mErr.Errors, err)
   574  				continue
   575  			}
   576  
   577  			for _, cs := range consulServices {
   578  				port := strconv.FormatInt(int64(cs.ServicePort), 10)
   579  				addr := cs.ServiceAddress
   580  				if addr == "" {
   581  					addr = cs.Address
   582  				}
   583  				if localNode.Addr.String() == addr && int(localNode.Port) == cs.ServicePort {
   584  					continue
   585  				}
   586  				serverAddr := net.JoinHostPort(addr, port)
   587  				nomadServerServices = append(nomadServerServices, serverAddr)
   588  			}
   589  		}
   590  
   591  		if len(nomadServerServices) == 0 {
   592  			if len(mErr.Errors) > 0 {
   593  				peersTimeout.Reset(peersPollInterval + lib.RandomStagger(peersPollInterval/peersPollJitterFactor))
   594  				return mErr.ErrorOrNil()
   595  			}
   596  
   597  			// Log the error and return nil so future handlers
   598  			// can attempt to register the `nomad` service.
   599  			pollInterval := peersPollInterval + lib.RandomStagger(peersPollInterval/peersPollJitterFactor)
   600  			s.logger.Printf("[TRACE] server.nomad: no Nomad Servers advertising service %+q in Consul datacenters %+q, sleeping for %v", nomadServerServiceName, dcs, pollInterval)
   601  			peersTimeout.Reset(pollInterval)
   602  			return nil
   603  		}
   604  
   605  		numServersContacted, err := s.Join(nomadServerServices)
   606  		if err != nil {
   607  			peersTimeout.Reset(peersPollInterval + lib.RandomStagger(peersPollInterval/peersPollJitterFactor))
   608  			return fmt.Errorf("contacted %d Nomad Servers: %v", numServersContacted, err)
   609  		}
   610  
   611  		peersTimeout.Reset(maxStaleLeadership)
   612  		s.logger.Printf("[INFO] server.nomad: successfully contacted %d Nomad Servers", numServersContacted)
   613  
   614  		return nil
   615  	}
   616  
   617  	s.consulSyncer.AddPeriodicHandler("Nomad Server Fallback Server Handler", bootstrapFn)
   618  	return nil
   619  }
   620  
   621  // setupConsulSyncer creates Server-mode consul.Syncer which periodically
   622  // executes callbacks on a fixed interval.
   623  func (s *Server) setupConsulSyncer() error {
   624  	if s.config.ConsulConfig.ServerAutoJoin != nil && *s.config.ConsulConfig.ServerAutoJoin {
   625  		if err := s.setupBootstrapHandler(); err != nil {
   626  			return err
   627  		}
   628  	}
   629  
   630  	return nil
   631  }
   632  
   633  // setupVaultClient is used to set up the Vault API client.
   634  func (s *Server) setupVaultClient() error {
   635  	v, err := NewVaultClient(s.config.VaultConfig, s.logger, s.purgeVaultAccessors)
   636  	if err != nil {
   637  		return err
   638  	}
   639  	s.vault = v
   640  	return nil
   641  }
   642  
   643  // setupRPC is used to setup the RPC listener
   644  func (s *Server) setupRPC(tlsWrap tlsutil.RegionWrapper) error {
   645  	// Create endpoints
   646  	s.endpoints.Alloc = &Alloc{s}
   647  	s.endpoints.Eval = &Eval{s}
   648  	s.endpoints.Job = &Job{s}
   649  	s.endpoints.Node = &Node{srv: s}
   650  	s.endpoints.Operator = &Operator{s}
   651  	s.endpoints.Periodic = &Periodic{s}
   652  	s.endpoints.Plan = &Plan{s}
   653  	s.endpoints.Region = &Region{s}
   654  	s.endpoints.Status = &Status{s}
   655  	s.endpoints.System = &System{s}
   656  
   657  	// Register the handlers
   658  	s.rpcServer.Register(s.endpoints.Alloc)
   659  	s.rpcServer.Register(s.endpoints.Eval)
   660  	s.rpcServer.Register(s.endpoints.Job)
   661  	s.rpcServer.Register(s.endpoints.Node)
   662  	s.rpcServer.Register(s.endpoints.Operator)
   663  	s.rpcServer.Register(s.endpoints.Periodic)
   664  	s.rpcServer.Register(s.endpoints.Plan)
   665  	s.rpcServer.Register(s.endpoints.Region)
   666  	s.rpcServer.Register(s.endpoints.Status)
   667  	s.rpcServer.Register(s.endpoints.System)
   668  
   669  	list, err := net.ListenTCP("tcp", s.config.RPCAddr)
   670  	if err != nil {
   671  		return err
   672  	}
   673  	s.rpcListener = list
   674  
   675  	if s.config.RPCAdvertise != nil {
   676  		s.rpcAdvertise = s.config.RPCAdvertise
   677  	} else {
   678  		s.rpcAdvertise = s.rpcListener.Addr()
   679  	}
   680  
   681  	// Verify that we have a usable advertise address
   682  	addr, ok := s.rpcAdvertise.(*net.TCPAddr)
   683  	if !ok {
   684  		list.Close()
   685  		return fmt.Errorf("RPC advertise address is not a TCP Address: %v", addr)
   686  	}
   687  	if addr.IP.IsUnspecified() {
   688  		list.Close()
   689  		return fmt.Errorf("RPC advertise address is not advertisable: %v", addr)
   690  	}
   691  
   692  	wrapper := tlsutil.RegionSpecificWrapper(s.config.Region, tlsWrap)
   693  	s.raftLayer = NewRaftLayer(s.rpcAdvertise, wrapper)
   694  	return nil
   695  }
   696  
   697  // setupRaft is used to setup and initialize Raft
   698  func (s *Server) setupRaft() error {
   699  	// If we have an unclean exit then attempt to close the Raft store.
   700  	defer func() {
   701  		if s.raft == nil && s.raftStore != nil {
   702  			if err := s.raftStore.Close(); err != nil {
   703  				s.logger.Printf("[ERR] nomad: failed to close Raft store: %v", err)
   704  			}
   705  		}
   706  	}()
   707  
   708  	// Create the FSM
   709  	var err error
   710  	s.fsm, err = NewFSM(s.evalBroker, s.periodicDispatcher, s.blockedEvals, s.config.LogOutput)
   711  	if err != nil {
   712  		return err
   713  	}
   714  
   715  	// Create a transport layer
   716  	trans := raft.NewNetworkTransport(s.raftLayer, 3, s.config.RaftTimeout,
   717  		s.config.LogOutput)
   718  	s.raftTransport = trans
   719  
   720  	// Make sure we set the LogOutput.
   721  	s.config.RaftConfig.LogOutput = s.config.LogOutput
   722  
   723  	// Our version of Raft protocol requires the LocalID to match the network
   724  	// address of the transport.
   725  	s.config.RaftConfig.LocalID = raft.ServerID(trans.LocalAddr())
   726  
   727  	// Build an all in-memory setup for dev mode, otherwise prepare a full
   728  	// disk-based setup.
   729  	var log raft.LogStore
   730  	var stable raft.StableStore
   731  	var snap raft.SnapshotStore
   732  	if s.config.DevMode {
   733  		store := raft.NewInmemStore()
   734  		s.raftInmem = store
   735  		stable = store
   736  		log = store
   737  		snap = raft.NewDiscardSnapshotStore()
   738  
   739  	} else {
   740  		// Create the base raft path
   741  		path := filepath.Join(s.config.DataDir, raftState)
   742  		if err := ensurePath(path, true); err != nil {
   743  			return err
   744  		}
   745  
   746  		// Create the BoltDB backend
   747  		store, err := raftboltdb.NewBoltStore(filepath.Join(path, "raft.db"))
   748  		if err != nil {
   749  			return err
   750  		}
   751  		s.raftStore = store
   752  		stable = store
   753  
   754  		// Wrap the store in a LogCache to improve performance
   755  		cacheStore, err := raft.NewLogCache(raftLogCacheSize, store)
   756  		if err != nil {
   757  			store.Close()
   758  			return err
   759  		}
   760  		log = cacheStore
   761  
   762  		// Create the snapshot store
   763  		snapshots, err := raft.NewFileSnapshotStore(path, snapshotsRetained, s.config.LogOutput)
   764  		if err != nil {
   765  			if s.raftStore != nil {
   766  				s.raftStore.Close()
   767  			}
   768  			return err
   769  		}
   770  		snap = snapshots
   771  
   772  		// For an existing cluster being upgraded to the new version of
   773  		// Raft, we almost never want to run recovery based on the old
   774  		// peers.json file. We create a peers.info file with a helpful
   775  		// note about where peers.json went, and use that as a sentinel
   776  		// to avoid ingesting the old one that first time (if we have to
   777  		// create the peers.info file because it's not there, we also
   778  		// blow away any existing peers.json file).
   779  		peersFile := filepath.Join(path, "peers.json")
   780  		peersInfoFile := filepath.Join(path, "peers.info")
   781  		if _, err := os.Stat(peersInfoFile); os.IsNotExist(err) {
   782  			if err := ioutil.WriteFile(peersInfoFile, []byte(peersInfoContent), 0755); err != nil {
   783  				return fmt.Errorf("failed to write peers.info file: %v", err)
   784  			}
   785  
   786  			// Blow away the peers.json file if present, since the
   787  			// peers.info sentinel wasn't there.
   788  			if _, err := os.Stat(peersFile); err == nil {
   789  				if err := os.Remove(peersFile); err != nil {
   790  					return fmt.Errorf("failed to delete peers.json, please delete manually (see peers.info for details): %v", err)
   791  				}
   792  				s.logger.Printf("[INFO] nomad: deleted peers.json file (see peers.info for details)")
   793  			}
   794  		} else if _, err := os.Stat(peersFile); err == nil {
   795  			s.logger.Printf("[INFO] nomad: found peers.json file, recovering Raft configuration...")
   796  			configuration, err := raft.ReadPeersJSON(peersFile)
   797  			if err != nil {
   798  				return fmt.Errorf("recovery failed to parse peers.json: %v", err)
   799  			}
   800  			tmpFsm, err := NewFSM(s.evalBroker, s.periodicDispatcher, s.blockedEvals, s.config.LogOutput)
   801  			if err != nil {
   802  				return fmt.Errorf("recovery failed to make temp FSM: %v", err)
   803  			}
   804  			if err := raft.RecoverCluster(s.config.RaftConfig, tmpFsm,
   805  				log, stable, snap, trans, configuration); err != nil {
   806  				return fmt.Errorf("recovery failed: %v", err)
   807  			}
   808  			if err := os.Remove(peersFile); err != nil {
   809  				return fmt.Errorf("recovery failed to delete peers.json, please delete manually (see peers.info for details): %v", err)
   810  			}
   811  			s.logger.Printf("[INFO] nomad: deleted peers.json file after successful recovery")
   812  		}
   813  	}
   814  
   815  	// If we are in bootstrap or dev mode and the state is clean then we can
   816  	// bootstrap now.
   817  	if s.config.Bootstrap || s.config.DevMode {
   818  		hasState, err := raft.HasExistingState(log, stable, snap)
   819  		if err != nil {
   820  			return err
   821  		}
   822  		if !hasState {
   823  			// TODO (alexdadgar) - This will need to be updated when
   824  			// we add support for node IDs.
   825  			configuration := raft.Configuration{
   826  				Servers: []raft.Server{
   827  					raft.Server{
   828  						ID:      raft.ServerID(trans.LocalAddr()),
   829  						Address: trans.LocalAddr(),
   830  					},
   831  				},
   832  			}
   833  			if err := raft.BootstrapCluster(s.config.RaftConfig,
   834  				log, stable, snap, trans, configuration); err != nil {
   835  				return err
   836  			}
   837  		}
   838  	}
   839  
   840  	// Setup the leader channel
   841  	leaderCh := make(chan bool, 1)
   842  	s.config.RaftConfig.NotifyCh = leaderCh
   843  	s.leaderCh = leaderCh
   844  
   845  	// Setup the Raft store
   846  	s.raft, err = raft.NewRaft(s.config.RaftConfig, s.fsm, log, stable, snap, trans)
   847  	if err != nil {
   848  		return err
   849  	}
   850  	return nil
   851  }
   852  
   853  // setupSerf is used to setup and initialize a Serf
   854  func (s *Server) setupSerf(conf *serf.Config, ch chan serf.Event, path string) (*serf.Serf, error) {
   855  	conf.Init()
   856  	conf.NodeName = fmt.Sprintf("%s.%s", s.config.NodeName, s.config.Region)
   857  	conf.Tags["role"] = "nomad"
   858  	conf.Tags["region"] = s.config.Region
   859  	conf.Tags["dc"] = s.config.Datacenter
   860  	conf.Tags["vsn"] = fmt.Sprintf("%d", structs.ApiMajorVersion)
   861  	conf.Tags["mvn"] = fmt.Sprintf("%d", structs.ApiMinorVersion)
   862  	conf.Tags["build"] = s.config.Build
   863  	conf.Tags["port"] = fmt.Sprintf("%d", s.rpcAdvertise.(*net.TCPAddr).Port)
   864  	if s.config.Bootstrap || (s.config.DevMode && !s.config.DevDisableBootstrap) {
   865  		conf.Tags["bootstrap"] = "1"
   866  	}
   867  	bootstrapExpect := atomic.LoadInt32(&s.config.BootstrapExpect)
   868  	if bootstrapExpect != 0 {
   869  		conf.Tags["expect"] = fmt.Sprintf("%d", bootstrapExpect)
   870  	}
   871  	conf.MemberlistConfig.LogOutput = s.config.LogOutput
   872  	conf.LogOutput = s.config.LogOutput
   873  	conf.EventCh = ch
   874  	if !s.config.DevMode {
   875  		conf.SnapshotPath = filepath.Join(s.config.DataDir, path)
   876  		if err := ensurePath(conf.SnapshotPath, false); err != nil {
   877  			return nil, err
   878  		}
   879  	}
   880  	conf.ProtocolVersion = protocolVersionMap[s.config.ProtocolVersion]
   881  	conf.RejoinAfterLeave = true
   882  	conf.Merge = &serfMergeDelegate{}
   883  
   884  	// Until Nomad supports this fully, we disable automatic resolution.
   885  	// When enabled, the Serf gossip may just turn off if we are the minority
   886  	// node which is rather unexpected.
   887  	conf.EnableNameConflictResolution = false
   888  	return serf.Create(conf)
   889  }
   890  
   891  // setupWorkers is used to start the scheduling workers
   892  func (s *Server) setupWorkers() error {
   893  	// Check if all the schedulers are disabled
   894  	if len(s.config.EnabledSchedulers) == 0 || s.config.NumSchedulers == 0 {
   895  		s.logger.Printf("[WARN] nomad: no enabled schedulers")
   896  		return nil
   897  	}
   898  
   899  	// Start the workers
   900  	for i := 0; i < s.config.NumSchedulers; i++ {
   901  		if w, err := NewWorker(s); err != nil {
   902  			return err
   903  		} else {
   904  			s.workers = append(s.workers, w)
   905  		}
   906  	}
   907  	s.logger.Printf("[INFO] nomad: starting %d scheduling worker(s) for %v",
   908  		s.config.NumSchedulers, s.config.EnabledSchedulers)
   909  	return nil
   910  }
   911  
   912  // numPeers is used to check on the number of known peers, including the local
   913  // node.
   914  func (s *Server) numPeers() (int, error) {
   915  	future := s.raft.GetConfiguration()
   916  	if err := future.Error(); err != nil {
   917  		return 0, err
   918  	}
   919  	configuration := future.Configuration()
   920  	return len(configuration.Servers), nil
   921  }
   922  
   923  // IsLeader checks if this server is the cluster leader
   924  func (s *Server) IsLeader() bool {
   925  	return s.raft.State() == raft.Leader
   926  }
   927  
   928  // Join is used to have Nomad join the gossip ring
   929  // The target address should be another node listening on the
   930  // Serf address
   931  func (s *Server) Join(addrs []string) (int, error) {
   932  	return s.serf.Join(addrs, true)
   933  }
   934  
   935  // LocalMember is used to return the local node
   936  func (c *Server) LocalMember() serf.Member {
   937  	return c.serf.LocalMember()
   938  }
   939  
   940  // Members is used to return the members of the serf cluster
   941  func (s *Server) Members() []serf.Member {
   942  	return s.serf.Members()
   943  }
   944  
   945  // RemoveFailedNode is used to remove a failed node from the cluster
   946  func (s *Server) RemoveFailedNode(node string) error {
   947  	return s.serf.RemoveFailedNode(node)
   948  }
   949  
   950  // KeyManager returns the Serf keyring manager
   951  func (s *Server) KeyManager() *serf.KeyManager {
   952  	return s.serf.KeyManager()
   953  }
   954  
   955  // Encrypted determines if gossip is encrypted
   956  func (s *Server) Encrypted() bool {
   957  	return s.serf.EncryptionEnabled()
   958  }
   959  
   960  // State returns the underlying state store. This should *not*
   961  // be used to modify state directly.
   962  func (s *Server) State() *state.StateStore {
   963  	return s.fsm.State()
   964  }
   965  
   966  // Regions returns the known regions in the cluster.
   967  func (s *Server) Regions() []string {
   968  	s.peerLock.RLock()
   969  	defer s.peerLock.RUnlock()
   970  
   971  	regions := make([]string, 0, len(s.peers))
   972  	for region, _ := range s.peers {
   973  		regions = append(regions, region)
   974  	}
   975  	sort.Strings(regions)
   976  	return regions
   977  }
   978  
   979  // inmemCodec is used to do an RPC call without going over a network
   980  type inmemCodec struct {
   981  	method string
   982  	args   interface{}
   983  	reply  interface{}
   984  	err    error
   985  }
   986  
   987  func (i *inmemCodec) ReadRequestHeader(req *rpc.Request) error {
   988  	req.ServiceMethod = i.method
   989  	return nil
   990  }
   991  
   992  func (i *inmemCodec) ReadRequestBody(args interface{}) error {
   993  	sourceValue := reflect.Indirect(reflect.Indirect(reflect.ValueOf(i.args)))
   994  	dst := reflect.Indirect(reflect.Indirect(reflect.ValueOf(args)))
   995  	dst.Set(sourceValue)
   996  	return nil
   997  }
   998  
   999  func (i *inmemCodec) WriteResponse(resp *rpc.Response, reply interface{}) error {
  1000  	if resp.Error != "" {
  1001  		i.err = errors.New(resp.Error)
  1002  		return nil
  1003  	}
  1004  	sourceValue := reflect.Indirect(reflect.Indirect(reflect.ValueOf(reply)))
  1005  	dst := reflect.Indirect(reflect.Indirect(reflect.ValueOf(i.reply)))
  1006  	dst.Set(sourceValue)
  1007  	return nil
  1008  }
  1009  
  1010  func (i *inmemCodec) Close() error {
  1011  	return nil
  1012  }
  1013  
  1014  // RPC is used to make a local RPC call
  1015  func (s *Server) RPC(method string, args interface{}, reply interface{}) error {
  1016  	codec := &inmemCodec{
  1017  		method: method,
  1018  		args:   args,
  1019  		reply:  reply,
  1020  	}
  1021  	if err := s.rpcServer.ServeRequest(codec); err != nil {
  1022  		return err
  1023  	}
  1024  	return codec.err
  1025  }
  1026  
  1027  // Stats is used to return statistics for debugging and insight
  1028  // for various sub-systems
  1029  func (s *Server) Stats() map[string]map[string]string {
  1030  	toString := func(v uint64) string {
  1031  		return strconv.FormatUint(v, 10)
  1032  	}
  1033  	stats := map[string]map[string]string{
  1034  		"nomad": map[string]string{
  1035  			"server":        "true",
  1036  			"leader":        fmt.Sprintf("%v", s.IsLeader()),
  1037  			"leader_addr":   string(s.raft.Leader()),
  1038  			"bootstrap":     fmt.Sprintf("%v", s.config.Bootstrap),
  1039  			"known_regions": toString(uint64(len(s.peers))),
  1040  		},
  1041  		"raft":    s.raft.Stats(),
  1042  		"serf":    s.serf.Stats(),
  1043  		"runtime": RuntimeStats(),
  1044  	}
  1045  
  1046  	return stats
  1047  }
  1048  
  1049  // Region retuns the region of the server
  1050  func (s *Server) Region() string {
  1051  	return s.config.Region
  1052  }
  1053  
  1054  // Datacenter returns the data center of the server
  1055  func (s *Server) Datacenter() string {
  1056  	return s.config.Datacenter
  1057  }
  1058  
  1059  // GetConfig returns the config of the server for testing purposes only
  1060  func (s *Server) GetConfig() *Config {
  1061  	return s.config
  1062  }
  1063  
  1064  // peersInfoContent is used to help operators understand what happened to the
  1065  // peers.json file. This is written to a file called peers.info in the same
  1066  // location.
  1067  const peersInfoContent = `
  1068  As of Nomad 0.5.5, the peers.json file is only used for recovery
  1069  after an outage. It should be formatted as a JSON array containing the address
  1070  and port of each Consul server in the cluster, like this:
  1071  
  1072  ["10.1.0.1:4647","10.1.0.2:4647","10.1.0.3:4647"]
  1073  
  1074  Under normal operation, the peers.json file will not be present.
  1075  
  1076  When Nomad starts for the first time, it will create this peers.info file and
  1077  delete any existing peers.json file so that recovery doesn't occur on the first
  1078  startup.
  1079  
  1080  Once this peers.info file is present, any peers.json file will be ingested at
  1081  startup, and will set the Raft peer configuration manually to recover from an
  1082  outage. It's crucial that all servers in the cluster are shut down before
  1083  creating the peers.json file, and that all servers receive the same
  1084  configuration. Once the peers.json file is successfully ingested and applied, it
  1085  will be deleted.
  1086  
  1087  Please see https://www.nomadproject.io/guides/outage.html for more information.
  1088  `