github.com/maier/nomad@v0.4.1-0.20161110003312-a9e3d0b8549d/nomad/server.go (about)

     1  package nomad
     2  
     3  import (
     4  	"crypto/tls"
     5  	"errors"
     6  	"fmt"
     7  	"log"
     8  	"net"
     9  	"net/rpc"
    10  	"path/filepath"
    11  	"reflect"
    12  	"sort"
    13  	"strconv"
    14  	"strings"
    15  	"sync"
    16  	"sync/atomic"
    17  	"time"
    18  
    19  	consulapi "github.com/hashicorp/consul/api"
    20  	"github.com/hashicorp/consul/lib"
    21  	"github.com/hashicorp/go-multierror"
    22  	"github.com/hashicorp/nomad/command/agent/consul"
    23  	"github.com/hashicorp/nomad/helper/tlsutil"
    24  	"github.com/hashicorp/nomad/nomad/state"
    25  	"github.com/hashicorp/nomad/nomad/structs"
    26  	"github.com/hashicorp/raft"
    27  	"github.com/hashicorp/raft-boltdb"
    28  	"github.com/hashicorp/serf/serf"
    29  )
    30  
    31  const (
    32  	// datacenterQueryLimit sets the max number of DCs that a Nomad
    33  	// Server will query to find bootstrap_expect servers.
    34  	datacenterQueryLimit = 25
    35  
    36  	// maxStaleLeadership is the maximum time we will permit this Nomad
    37  	// Server to go without seeing a valid Raft leader.
    38  	maxStaleLeadership = 15 * time.Second
    39  
    40  	// peersPollInterval is used as the polling interval between attempts
    41  	// to query Consul for Nomad Servers.
    42  	peersPollInterval = 45 * time.Second
    43  
    44  	// peersPollJitter is used to provide a slight amount of variance to
    45  	// the retry interval when querying Consul Servers
    46  	peersPollJitterFactor = 2
    47  
    48  	raftState         = "raft/"
    49  	serfSnapshot      = "serf/snapshot"
    50  	snapshotsRetained = 2
    51  
    52  	// serverRPCCache controls how long we keep an idle connection open to a server
    53  	serverRPCCache = 2 * time.Minute
    54  
    55  	// serverMaxStreams controsl how many idle streams we keep open to a server
    56  	serverMaxStreams = 64
    57  
    58  	// raftLogCacheSize is the maximum number of logs to cache in-memory.
    59  	// This is used to reduce disk I/O for the recently committed entries.
    60  	raftLogCacheSize = 512
    61  
    62  	// raftRemoveGracePeriod is how long we wait to allow a RemovePeer
    63  	// to replicate to gracefully leave the cluster.
    64  	raftRemoveGracePeriod = 5 * time.Second
    65  )
    66  
    67  // Server is Nomad server which manages the job queues,
    68  // schedulers, and notification bus for agents.
    69  type Server struct {
    70  	config *Config
    71  	logger *log.Logger
    72  
    73  	// Connection pool to other Nomad servers
    74  	connPool *ConnPool
    75  
    76  	// Endpoints holds our RPC endpoints
    77  	endpoints endpoints
    78  
    79  	// The raft instance is used among Nomad nodes within the
    80  	// region to protect operations that require strong consistency
    81  	leaderCh      <-chan bool
    82  	raft          *raft.Raft
    83  	raftLayer     *RaftLayer
    84  	raftPeers     raft.PeerStore
    85  	raftStore     *raftboltdb.BoltStore
    86  	raftInmem     *raft.InmemStore
    87  	raftTransport *raft.NetworkTransport
    88  
    89  	// fsm is the state machine used with Raft
    90  	fsm *nomadFSM
    91  
    92  	// rpcListener is used to listen for incoming connections
    93  	rpcListener  net.Listener
    94  	rpcServer    *rpc.Server
    95  	rpcAdvertise net.Addr
    96  
    97  	// rpcTLS is the TLS config for incoming TLS requests
    98  	rpcTLS *tls.Config
    99  
   100  	// peers is used to track the known Nomad servers. This is
   101  	// used for region forwarding and clustering.
   102  	peers      map[string][]*serverParts
   103  	localPeers map[string]*serverParts
   104  	peerLock   sync.RWMutex
   105  
   106  	// serf is the Serf cluster containing only Nomad
   107  	// servers. This is used for multi-region federation
   108  	// and automatic clustering within regions.
   109  	serf *serf.Serf
   110  
   111  	// reconcileCh is used to pass events from the serf handler
   112  	// into the leader manager. Mostly used to handle when servers
   113  	// join/leave from the region.
   114  	reconcileCh chan serf.Member
   115  
   116  	// eventCh is used to receive events from the serf cluster
   117  	eventCh chan serf.Event
   118  
   119  	// evalBroker is used to manage the in-progress evaluations
   120  	// that are waiting to be brokered to a sub-scheduler
   121  	evalBroker *EvalBroker
   122  
   123  	// BlockedEvals is used to manage evaluations that are blocked on node
   124  	// capacity changes.
   125  	blockedEvals *BlockedEvals
   126  
   127  	// planQueue is used to manage the submitted allocation
   128  	// plans that are waiting to be assessed by the leader
   129  	planQueue *PlanQueue
   130  
   131  	// periodicDispatcher is used to track and create evaluations for periodic jobs.
   132  	periodicDispatcher *PeriodicDispatch
   133  
   134  	// heartbeatTimers track the expiration time of each heartbeat that has
   135  	// a TTL. On expiration, the node status is updated to be 'down'.
   136  	heartbeatTimers     map[string]*time.Timer
   137  	heartbeatTimersLock sync.Mutex
   138  
   139  	// consulSyncer advertises this Nomad Agent with Consul
   140  	consulSyncer *consul.Syncer
   141  
   142  	// vault is the client for communicating with Vault.
   143  	vault VaultClient
   144  
   145  	// Worker used for processing
   146  	workers []*Worker
   147  
   148  	left         bool
   149  	shutdown     bool
   150  	shutdownCh   chan struct{}
   151  	shutdownLock sync.Mutex
   152  }
   153  
   154  // Holds the RPC endpoints
   155  type endpoints struct {
   156  	Status   *Status
   157  	Node     *Node
   158  	Job      *Job
   159  	Eval     *Eval
   160  	Plan     *Plan
   161  	Alloc    *Alloc
   162  	Region   *Region
   163  	Periodic *Periodic
   164  	System   *System
   165  }
   166  
   167  // NewServer is used to construct a new Nomad server from the
   168  // configuration, potentially returning an error
   169  func NewServer(config *Config, consulSyncer *consul.Syncer, logger *log.Logger) (*Server, error) {
   170  	// Check the protocol version
   171  	if err := config.CheckVersion(); err != nil {
   172  		return nil, err
   173  	}
   174  
   175  	// Create an eval broker
   176  	evalBroker, err := NewEvalBroker(config.EvalNackTimeout, config.EvalDeliveryLimit)
   177  	if err != nil {
   178  		return nil, err
   179  	}
   180  
   181  	// Create a new blocked eval tracker.
   182  	blockedEvals := NewBlockedEvals(evalBroker)
   183  
   184  	// Create a plan queue
   185  	planQueue, err := NewPlanQueue()
   186  	if err != nil {
   187  		return nil, err
   188  	}
   189  
   190  	// Configure TLS
   191  	var tlsWrap tlsutil.RegionWrapper
   192  	var incomingTLS *tls.Config
   193  	if config.TLSConfig.EnableRPC {
   194  		tlsConf := config.tlsConfig()
   195  		tw, err := tlsConf.OutgoingTLSWrapper()
   196  		if err != nil {
   197  			return nil, err
   198  		}
   199  		tlsWrap = tw
   200  
   201  		itls, err := tlsConf.IncomingTLSConfig()
   202  		if err != nil {
   203  			return nil, err
   204  		}
   205  		incomingTLS = itls
   206  	}
   207  
   208  	// Create the server
   209  	s := &Server{
   210  		config:       config,
   211  		consulSyncer: consulSyncer,
   212  		connPool:     NewPool(config.LogOutput, serverRPCCache, serverMaxStreams, tlsWrap),
   213  		logger:       logger,
   214  		rpcServer:    rpc.NewServer(),
   215  		peers:        make(map[string][]*serverParts),
   216  		localPeers:   make(map[string]*serverParts),
   217  		reconcileCh:  make(chan serf.Member, 32),
   218  		eventCh:      make(chan serf.Event, 256),
   219  		evalBroker:   evalBroker,
   220  		blockedEvals: blockedEvals,
   221  		planQueue:    planQueue,
   222  		rpcTLS:       incomingTLS,
   223  		shutdownCh:   make(chan struct{}),
   224  	}
   225  
   226  	// Create the periodic dispatcher for launching periodic jobs.
   227  	s.periodicDispatcher = NewPeriodicDispatch(s.logger, s)
   228  
   229  	// Setup Vault
   230  	if err := s.setupVaultClient(); err != nil {
   231  		s.Shutdown()
   232  		s.logger.Printf("[ERR] nomad: failed to setup Vault client: %v", err)
   233  		return nil, fmt.Errorf("Failed to setup Vault client: %v", err)
   234  	}
   235  
   236  	// Initialize the RPC layer
   237  	if err := s.setupRPC(tlsWrap); err != nil {
   238  		s.Shutdown()
   239  		s.logger.Printf("[ERR] nomad: failed to start RPC layer: %s", err)
   240  		return nil, fmt.Errorf("Failed to start RPC layer: %v", err)
   241  	}
   242  
   243  	// Initialize the Raft server
   244  	if err := s.setupRaft(); err != nil {
   245  		s.Shutdown()
   246  		s.logger.Printf("[ERR] nomad: failed to start Raft: %s", err)
   247  		return nil, fmt.Errorf("Failed to start Raft: %v", err)
   248  	}
   249  
   250  	// Initialize the wan Serf
   251  	s.serf, err = s.setupSerf(config.SerfConfig, s.eventCh, serfSnapshot)
   252  	if err != nil {
   253  		s.Shutdown()
   254  		s.logger.Printf("[ERR] nomad: failed to start serf WAN: %s", err)
   255  		return nil, fmt.Errorf("Failed to start serf: %v", err)
   256  	}
   257  
   258  	// Initialize the scheduling workers
   259  	if err := s.setupWorkers(); err != nil {
   260  		s.Shutdown()
   261  		s.logger.Printf("[ERR] nomad: failed to start workers: %s", err)
   262  		return nil, fmt.Errorf("Failed to start workers: %v", err)
   263  	}
   264  
   265  	// Setup the Consul syncer
   266  	if err := s.setupConsulSyncer(); err != nil {
   267  		return nil, fmt.Errorf("failed to create server Consul syncer: %v")
   268  	}
   269  
   270  	// Monitor leadership changes
   271  	go s.monitorLeadership()
   272  
   273  	// Start ingesting events for Serf
   274  	go s.serfEventHandler()
   275  
   276  	// Start the RPC listeners
   277  	go s.listen()
   278  
   279  	// Emit metrics for the eval broker
   280  	go evalBroker.EmitStats(time.Second, s.shutdownCh)
   281  
   282  	// Emit metrics for the plan queue
   283  	go planQueue.EmitStats(time.Second, s.shutdownCh)
   284  
   285  	// Emit metrics for the blocked eval tracker.
   286  	go blockedEvals.EmitStats(time.Second, s.shutdownCh)
   287  
   288  	// Emit metrics
   289  	go s.heartbeatStats()
   290  
   291  	// Done
   292  	return s, nil
   293  }
   294  
   295  // Shutdown is used to shutdown the server
   296  func (s *Server) Shutdown() error {
   297  	s.logger.Printf("[INFO] nomad: shutting down server")
   298  	s.shutdownLock.Lock()
   299  	defer s.shutdownLock.Unlock()
   300  
   301  	if s.shutdown {
   302  		return nil
   303  	}
   304  
   305  	s.shutdown = true
   306  	close(s.shutdownCh)
   307  
   308  	if s.serf != nil {
   309  		s.serf.Shutdown()
   310  	}
   311  
   312  	if s.raft != nil {
   313  		s.raftTransport.Close()
   314  		s.raftLayer.Close()
   315  		future := s.raft.Shutdown()
   316  		if err := future.Error(); err != nil {
   317  			s.logger.Printf("[WARN] nomad: Error shutting down raft: %s", err)
   318  		}
   319  		if s.raftStore != nil {
   320  			s.raftStore.Close()
   321  		}
   322  	}
   323  
   324  	// Shutdown the RPC listener
   325  	if s.rpcListener != nil {
   326  		s.rpcListener.Close()
   327  	}
   328  
   329  	// Close the connection pool
   330  	s.connPool.Shutdown()
   331  
   332  	// Close the fsm
   333  	if s.fsm != nil {
   334  		s.fsm.Close()
   335  	}
   336  
   337  	// Stop Vault token renewal
   338  	if s.vault != nil {
   339  		s.vault.Stop()
   340  	}
   341  
   342  	return nil
   343  }
   344  
   345  // IsShutdown checks if the server is shutdown
   346  func (s *Server) IsShutdown() bool {
   347  	select {
   348  	case <-s.shutdownCh:
   349  		return true
   350  	default:
   351  		return false
   352  	}
   353  }
   354  
   355  // Leave is used to prepare for a graceful shutdown of the server
   356  func (s *Server) Leave() error {
   357  	s.logger.Printf("[INFO] nomad: server starting leave")
   358  	s.left = true
   359  
   360  	// Check the number of known peers
   361  	numPeers, err := s.numOtherPeers()
   362  	if err != nil {
   363  		s.logger.Printf("[ERR] nomad: failed to check raft peers: %v", err)
   364  		return err
   365  	}
   366  
   367  	// If we are the current leader, and we have any other peers (cluster has multiple
   368  	// servers), we should do a RemovePeer to safely reduce the quorum size. If we are
   369  	// not the leader, then we should issue our leave intention and wait to be removed
   370  	// for some sane period of time.
   371  	isLeader := s.IsLeader()
   372  	if isLeader && numPeers > 0 {
   373  		future := s.raft.RemovePeer(s.raftTransport.LocalAddr())
   374  		if err := future.Error(); err != nil && err != raft.ErrUnknownPeer {
   375  			s.logger.Printf("[ERR] nomad: failed to remove ourself as raft peer: %v", err)
   376  		}
   377  	}
   378  
   379  	// Leave the gossip pool
   380  	if s.serf != nil {
   381  		if err := s.serf.Leave(); err != nil {
   382  			s.logger.Printf("[ERR] nomad: failed to leave Serf cluster: %v", err)
   383  		}
   384  	}
   385  
   386  	// If we were not leader, wait to be safely removed from the cluster.
   387  	// We must wait to allow the raft replication to take place, otherwise
   388  	// an immediate shutdown could cause a loss of quorum.
   389  	if !isLeader {
   390  		limit := time.Now().Add(raftRemoveGracePeriod)
   391  		for numPeers > 0 && time.Now().Before(limit) {
   392  			// Update the number of peers
   393  			numPeers, err = s.numOtherPeers()
   394  			if err != nil {
   395  				s.logger.Printf("[ERR] nomad: failed to check raft peers: %v", err)
   396  				break
   397  			}
   398  
   399  			// Avoid the sleep if we are done
   400  			if numPeers == 0 {
   401  				break
   402  			}
   403  
   404  			// Sleep a while and check again
   405  			time.Sleep(50 * time.Millisecond)
   406  		}
   407  		if numPeers != 0 {
   408  			s.logger.Printf("[WARN] nomad: failed to leave raft peer set gracefully, timeout")
   409  		}
   410  	}
   411  	return nil
   412  }
   413  
   414  // setupBootstrapHandler() creates the closure necessary to support a Consul
   415  // fallback handler.
   416  func (s *Server) setupBootstrapHandler() error {
   417  	// peersTimeout is used to indicate to the Consul Syncer that the
   418  	// current Nomad Server has a stale peer set.  peersTimeout will time
   419  	// out if the Consul Syncer bootstrapFn has not observed a Raft
   420  	// leader in maxStaleLeadership.  If peersTimeout has been triggered,
   421  	// the Consul Syncer will begin querying Consul for other Nomad
   422  	// Servers.
   423  	//
   424  	// NOTE: time.Timer is used vs time.Time in order to handle clock
   425  	// drift because time.Timer is implemented as a monotonic clock.
   426  	var peersTimeout *time.Timer = time.NewTimer(0)
   427  
   428  	// consulQueryCount is the number of times the bootstrapFn has been
   429  	// called, regardless of success.
   430  	var consulQueryCount uint64
   431  
   432  	// leadershipTimedOut is a helper method that returns true if the
   433  	// peersTimeout timer has expired.
   434  	leadershipTimedOut := func() bool {
   435  		select {
   436  		case <-peersTimeout.C:
   437  			return true
   438  		default:
   439  			return false
   440  		}
   441  	}
   442  
   443  	// The bootstrapFn callback handler is used to periodically poll
   444  	// Consul to look up the Nomad Servers in Consul.  In the event the
   445  	// server has been brought up without a `retry-join` configuration
   446  	// and this Server is partitioned from the rest of the cluster,
   447  	// periodically poll Consul to reattach this Server to other servers
   448  	// in the same region and automatically reform a quorum (assuming the
   449  	// correct number of servers required for quorum are present).
   450  	bootstrapFn := func() error {
   451  		// If there is a raft leader, do nothing
   452  		if s.raft.Leader() != "" {
   453  			peersTimeout.Reset(maxStaleLeadership)
   454  			return nil
   455  		}
   456  
   457  		// (ab)use serf.go's behavior of setting BootstrapExpect to
   458  		// zero if we have bootstrapped.  If we have bootstrapped
   459  		bootstrapExpect := atomic.LoadInt32(&s.config.BootstrapExpect)
   460  		if bootstrapExpect == 0 {
   461  			// This Nomad Server has been bootstrapped.  Rely on
   462  			// the peersTimeout firing as a guard to prevent
   463  			// aggressive querying of Consul.
   464  			if !leadershipTimedOut() {
   465  				return nil
   466  			}
   467  		} else {
   468  			if consulQueryCount > 0 && !leadershipTimedOut() {
   469  				return nil
   470  			}
   471  
   472  			// This Nomad Server has not been bootstrapped, reach
   473  			// out to Consul if our peer list is less than
   474  			// `bootstrap_expect`.
   475  			raftPeers, err := s.raftPeers.Peers()
   476  			if err != nil {
   477  				peersTimeout.Reset(peersPollInterval + lib.RandomStagger(peersPollInterval/peersPollJitterFactor))
   478  				return nil
   479  			}
   480  
   481  			// The necessary number of Nomad Servers required for
   482  			// quorum has been reached, we do not need to poll
   483  			// Consul.  Let the normal timeout-based strategy
   484  			// take over.
   485  			if len(raftPeers) >= int(bootstrapExpect) {
   486  				peersTimeout.Reset(peersPollInterval + lib.RandomStagger(peersPollInterval/peersPollJitterFactor))
   487  				return nil
   488  			}
   489  		}
   490  		consulQueryCount++
   491  
   492  		s.logger.Printf("[DEBUG] server.consul: lost contact with Nomad quorum, falling back to Consul for server list")
   493  
   494  		consulCatalog := s.consulSyncer.ConsulClient().Catalog()
   495  		dcs, err := consulCatalog.Datacenters()
   496  		if err != nil {
   497  			peersTimeout.Reset(peersPollInterval + lib.RandomStagger(peersPollInterval/peersPollJitterFactor))
   498  			return fmt.Errorf("server.consul: unable to query Consul datacenters: %v", err)
   499  		}
   500  		if len(dcs) > 2 {
   501  			// Query the local DC first, then shuffle the
   502  			// remaining DCs.  If additional calls to bootstrapFn
   503  			// are necessary, this Nomad Server will eventually
   504  			// walk all datacenter until it finds enough hosts to
   505  			// form a quorum.
   506  			shuffleStrings(dcs[1:])
   507  			dcs = dcs[0:lib.MinInt(len(dcs), datacenterQueryLimit)]
   508  		}
   509  
   510  		nomadServerServiceName := s.config.ConsulConfig.ServerServiceName
   511  		var mErr multierror.Error
   512  		const defaultMaxNumNomadServers = 8
   513  		nomadServerServices := make([]string, 0, defaultMaxNumNomadServers)
   514  		localNode := s.serf.Memberlist().LocalNode()
   515  		for _, dc := range dcs {
   516  			consulOpts := &consulapi.QueryOptions{
   517  				AllowStale: true,
   518  				Datacenter: dc,
   519  				Near:       "_agent",
   520  				WaitTime:   consul.DefaultQueryWaitDuration,
   521  			}
   522  			consulServices, _, err := consulCatalog.Service(nomadServerServiceName, consul.ServiceTagSerf, consulOpts)
   523  			if err != nil {
   524  				err := fmt.Errorf("failed to query service %q in Consul datacenter %q: %v", nomadServerServiceName, dc, err)
   525  				s.logger.Printf("[WARN] server.consul: %v", err)
   526  				mErr.Errors = append(mErr.Errors, err)
   527  				continue
   528  			}
   529  
   530  			for _, cs := range consulServices {
   531  				port := strconv.FormatInt(int64(cs.ServicePort), 10)
   532  				addr := cs.ServiceAddress
   533  				if addr == "" {
   534  					addr = cs.Address
   535  				}
   536  				if localNode.Addr.String() == addr && int(localNode.Port) == cs.ServicePort {
   537  					continue
   538  				}
   539  				serverAddr := net.JoinHostPort(addr, port)
   540  				nomadServerServices = append(nomadServerServices, serverAddr)
   541  			}
   542  		}
   543  
   544  		if len(nomadServerServices) == 0 {
   545  			if len(mErr.Errors) > 0 {
   546  				peersTimeout.Reset(peersPollInterval + lib.RandomStagger(peersPollInterval/peersPollJitterFactor))
   547  				return mErr.ErrorOrNil()
   548  			}
   549  
   550  			// Log the error and return nil so future handlers
   551  			// can attempt to register the `nomad` service.
   552  			pollInterval := peersPollInterval + lib.RandomStagger(peersPollInterval/peersPollJitterFactor)
   553  			s.logger.Printf("[TRACE] server.consul: no Nomad Servers advertising service %+q in Consul datacenters %+q, sleeping for %v", nomadServerServiceName, dcs, pollInterval)
   554  			peersTimeout.Reset(pollInterval)
   555  			return nil
   556  		}
   557  
   558  		numServersContacted, err := s.Join(nomadServerServices)
   559  		if err != nil {
   560  			peersTimeout.Reset(peersPollInterval + lib.RandomStagger(peersPollInterval/peersPollJitterFactor))
   561  			return fmt.Errorf("contacted %d Nomad Servers: %v", numServersContacted, err)
   562  		}
   563  
   564  		peersTimeout.Reset(maxStaleLeadership)
   565  		s.logger.Printf("[INFO] server.consul: successfully contacted %d Nomad Servers", numServersContacted)
   566  
   567  		return nil
   568  	}
   569  
   570  	s.consulSyncer.AddPeriodicHandler("Nomad Server Fallback Server Handler", bootstrapFn)
   571  	return nil
   572  }
   573  
   574  // setupConsulSyncer creates Server-mode consul.Syncer which periodically
   575  // executes callbacks on a fixed interval.
   576  func (s *Server) setupConsulSyncer() error {
   577  	if s.config.ConsulConfig.ServerAutoJoin {
   578  		if err := s.setupBootstrapHandler(); err != nil {
   579  			return err
   580  		}
   581  	}
   582  
   583  	return nil
   584  }
   585  
   586  // setupVaultClient is used to set up the Vault API client.
   587  func (s *Server) setupVaultClient() error {
   588  	v, err := NewVaultClient(s.config.VaultConfig, s.logger, s.purgeVaultAccessors)
   589  	if err != nil {
   590  		return err
   591  	}
   592  	s.vault = v
   593  	return nil
   594  }
   595  
   596  // setupRPC is used to setup the RPC listener
   597  func (s *Server) setupRPC(tlsWrap tlsutil.RegionWrapper) error {
   598  	// Create endpoints
   599  	s.endpoints.Status = &Status{s}
   600  	s.endpoints.Node = &Node{srv: s}
   601  	s.endpoints.Job = &Job{s}
   602  	s.endpoints.Eval = &Eval{s}
   603  	s.endpoints.Plan = &Plan{s}
   604  	s.endpoints.Alloc = &Alloc{s}
   605  	s.endpoints.Region = &Region{s}
   606  	s.endpoints.Periodic = &Periodic{s}
   607  	s.endpoints.System = &System{s}
   608  
   609  	// Register the handlers
   610  	s.rpcServer.Register(s.endpoints.Status)
   611  	s.rpcServer.Register(s.endpoints.Node)
   612  	s.rpcServer.Register(s.endpoints.Job)
   613  	s.rpcServer.Register(s.endpoints.Eval)
   614  	s.rpcServer.Register(s.endpoints.Plan)
   615  	s.rpcServer.Register(s.endpoints.Alloc)
   616  	s.rpcServer.Register(s.endpoints.Region)
   617  	s.rpcServer.Register(s.endpoints.Periodic)
   618  	s.rpcServer.Register(s.endpoints.System)
   619  
   620  	list, err := net.ListenTCP("tcp", s.config.RPCAddr)
   621  	if err != nil {
   622  		return err
   623  	}
   624  	s.rpcListener = list
   625  
   626  	if s.config.RPCAdvertise != nil {
   627  		s.rpcAdvertise = s.config.RPCAdvertise
   628  	} else {
   629  		s.rpcAdvertise = s.rpcListener.Addr()
   630  	}
   631  
   632  	// Verify that we have a usable advertise address
   633  	addr, ok := s.rpcAdvertise.(*net.TCPAddr)
   634  	if !ok {
   635  		list.Close()
   636  		return fmt.Errorf("RPC advertise address is not a TCP Address: %v", addr)
   637  	}
   638  	if addr.IP.IsUnspecified() {
   639  		list.Close()
   640  		return fmt.Errorf("RPC advertise address is not advertisable: %v", addr)
   641  	}
   642  
   643  	wrapper := tlsutil.RegionSpecificWrapper(s.config.Region, tlsWrap)
   644  	s.raftLayer = NewRaftLayer(s.rpcAdvertise, wrapper)
   645  	return nil
   646  }
   647  
   648  // setupRaft is used to setup and initialize Raft
   649  func (s *Server) setupRaft() error {
   650  	// If we are in bootstrap mode, enable a single node cluster
   651  	if s.config.Bootstrap || (s.config.DevMode && !s.config.DevDisableBootstrap) {
   652  		s.config.RaftConfig.EnableSingleNode = true
   653  	}
   654  
   655  	// Create the FSM
   656  	var err error
   657  	s.fsm, err = NewFSM(s.evalBroker, s.periodicDispatcher, s.blockedEvals, s.config.LogOutput)
   658  	if err != nil {
   659  		return err
   660  	}
   661  
   662  	// Create a transport layer
   663  	trans := raft.NewNetworkTransport(s.raftLayer, 3, s.config.RaftTimeout,
   664  		s.config.LogOutput)
   665  	s.raftTransport = trans
   666  
   667  	// Create the backend raft store for logs and stable storage
   668  	var log raft.LogStore
   669  	var stable raft.StableStore
   670  	var snap raft.SnapshotStore
   671  	var peers raft.PeerStore
   672  	if s.config.DevMode {
   673  		store := raft.NewInmemStore()
   674  		s.raftInmem = store
   675  		stable = store
   676  		log = store
   677  		snap = raft.NewDiscardSnapshotStore()
   678  		peers = &raft.StaticPeers{}
   679  		s.raftPeers = peers
   680  
   681  	} else {
   682  		// Create the base raft path
   683  		path := filepath.Join(s.config.DataDir, raftState)
   684  		if err := ensurePath(path, true); err != nil {
   685  			return err
   686  		}
   687  
   688  		// Create the BoltDB backend
   689  		store, err := raftboltdb.NewBoltStore(filepath.Join(path, "raft.db"))
   690  		if err != nil {
   691  			return err
   692  		}
   693  		s.raftStore = store
   694  		stable = store
   695  
   696  		// Wrap the store in a LogCache to improve performance
   697  		cacheStore, err := raft.NewLogCache(raftLogCacheSize, store)
   698  		if err != nil {
   699  			store.Close()
   700  			return err
   701  		}
   702  		log = cacheStore
   703  
   704  		// Create the snapshot store
   705  		snapshots, err := raft.NewFileSnapshotStore(path, snapshotsRetained, s.config.LogOutput)
   706  		if err != nil {
   707  			if s.raftStore != nil {
   708  				s.raftStore.Close()
   709  			}
   710  			return err
   711  		}
   712  		snap = snapshots
   713  
   714  		// Setup the peer store
   715  		s.raftPeers = raft.NewJSONPeers(path, trans)
   716  		peers = s.raftPeers
   717  	}
   718  
   719  	// Ensure local host is always included if we are in bootstrap mode
   720  	if s.config.RaftConfig.EnableSingleNode {
   721  		p, err := peers.Peers()
   722  		if err != nil {
   723  			if s.raftStore != nil {
   724  				s.raftStore.Close()
   725  			}
   726  			return err
   727  		}
   728  		if !raft.PeerContained(p, trans.LocalAddr()) {
   729  			peers.SetPeers(raft.AddUniquePeer(p, trans.LocalAddr()))
   730  		}
   731  	}
   732  
   733  	// Make sure we set the LogOutput
   734  	s.config.RaftConfig.LogOutput = s.config.LogOutput
   735  
   736  	// Setup the leader channel
   737  	leaderCh := make(chan bool, 1)
   738  	s.config.RaftConfig.NotifyCh = leaderCh
   739  	s.leaderCh = leaderCh
   740  
   741  	// Setup the Raft store
   742  	s.raft, err = raft.NewRaft(s.config.RaftConfig, s.fsm, log, stable,
   743  		snap, peers, trans)
   744  	if err != nil {
   745  		if s.raftStore != nil {
   746  			s.raftStore.Close()
   747  		}
   748  		trans.Close()
   749  		return err
   750  	}
   751  	return nil
   752  }
   753  
   754  // setupSerf is used to setup and initialize a Serf
   755  func (s *Server) setupSerf(conf *serf.Config, ch chan serf.Event, path string) (*serf.Serf, error) {
   756  	conf.Init()
   757  	conf.NodeName = fmt.Sprintf("%s.%s", s.config.NodeName, s.config.Region)
   758  	conf.Tags["role"] = "nomad"
   759  	conf.Tags["region"] = s.config.Region
   760  	conf.Tags["dc"] = s.config.Datacenter
   761  	conf.Tags["vsn"] = fmt.Sprintf("%d", structs.ApiMajorVersion)
   762  	conf.Tags["mvn"] = fmt.Sprintf("%d", structs.ApiMinorVersion)
   763  	conf.Tags["build"] = s.config.Build
   764  	conf.Tags["port"] = fmt.Sprintf("%d", s.rpcAdvertise.(*net.TCPAddr).Port)
   765  	if s.config.Bootstrap || (s.config.DevMode && !s.config.DevDisableBootstrap) {
   766  		conf.Tags["bootstrap"] = "1"
   767  	}
   768  	bootstrapExpect := atomic.LoadInt32(&s.config.BootstrapExpect)
   769  	if bootstrapExpect != 0 {
   770  		conf.Tags["expect"] = fmt.Sprintf("%d", bootstrapExpect)
   771  	}
   772  	conf.MemberlistConfig.LogOutput = s.config.LogOutput
   773  	conf.LogOutput = s.config.LogOutput
   774  	conf.EventCh = ch
   775  	if !s.config.DevMode {
   776  		conf.SnapshotPath = filepath.Join(s.config.DataDir, path)
   777  		if err := ensurePath(conf.SnapshotPath, false); err != nil {
   778  			return nil, err
   779  		}
   780  	}
   781  	conf.ProtocolVersion = protocolVersionMap[s.config.ProtocolVersion]
   782  	conf.RejoinAfterLeave = true
   783  	conf.Merge = &serfMergeDelegate{}
   784  
   785  	// Until Nomad supports this fully, we disable automatic resolution.
   786  	// When enabled, the Serf gossip may just turn off if we are the minority
   787  	// node which is rather unexpected.
   788  	conf.EnableNameConflictResolution = false
   789  	return serf.Create(conf)
   790  }
   791  
   792  // setupWorkers is used to start the scheduling workers
   793  func (s *Server) setupWorkers() error {
   794  	// Check if all the schedulers are disabled
   795  	if len(s.config.EnabledSchedulers) == 0 || s.config.NumSchedulers == 0 {
   796  		s.logger.Printf("[WARN] nomad: no enabled schedulers")
   797  		return nil
   798  	}
   799  
   800  	// Start the workers
   801  	for i := 0; i < s.config.NumSchedulers; i++ {
   802  		if w, err := NewWorker(s); err != nil {
   803  			return err
   804  		} else {
   805  			s.workers = append(s.workers, w)
   806  		}
   807  	}
   808  	s.logger.Printf("[INFO] nomad: starting %d scheduling worker(s) for %v",
   809  		s.config.NumSchedulers, s.config.EnabledSchedulers)
   810  	return nil
   811  }
   812  
   813  // numOtherPeers is used to check on the number of known peers
   814  // excluding the local node
   815  func (s *Server) numOtherPeers() (int, error) {
   816  	peers, err := s.raftPeers.Peers()
   817  	if err != nil {
   818  		return 0, err
   819  	}
   820  	otherPeers := raft.ExcludePeer(peers, s.raftTransport.LocalAddr())
   821  	return len(otherPeers), nil
   822  }
   823  
   824  // IsLeader checks if this server is the cluster leader
   825  func (s *Server) IsLeader() bool {
   826  	return s.raft.State() == raft.Leader
   827  }
   828  
   829  // Join is used to have Nomad join the gossip ring
   830  // The target address should be another node listening on the
   831  // Serf address
   832  func (s *Server) Join(addrs []string) (int, error) {
   833  	return s.serf.Join(addrs, true)
   834  }
   835  
   836  // LocalMember is used to return the local node
   837  func (c *Server) LocalMember() serf.Member {
   838  	return c.serf.LocalMember()
   839  }
   840  
   841  // Members is used to return the members of the serf cluster
   842  func (s *Server) Members() []serf.Member {
   843  	return s.serf.Members()
   844  }
   845  
   846  // RemoveFailedNode is used to remove a failed node from the cluster
   847  func (s *Server) RemoveFailedNode(node string) error {
   848  	return s.serf.RemoveFailedNode(node)
   849  }
   850  
   851  // KeyManager returns the Serf keyring manager
   852  func (s *Server) KeyManager() *serf.KeyManager {
   853  	return s.serf.KeyManager()
   854  }
   855  
   856  // Encrypted determines if gossip is encrypted
   857  func (s *Server) Encrypted() bool {
   858  	return s.serf.EncryptionEnabled()
   859  }
   860  
   861  // State returns the underlying state store. This should *not*
   862  // be used to modify state directly.
   863  func (s *Server) State() *state.StateStore {
   864  	return s.fsm.State()
   865  }
   866  
   867  // Regions returns the known regions in the cluster.
   868  func (s *Server) Regions() []string {
   869  	s.peerLock.RLock()
   870  	defer s.peerLock.RUnlock()
   871  
   872  	regions := make([]string, 0, len(s.peers))
   873  	for region, _ := range s.peers {
   874  		regions = append(regions, region)
   875  	}
   876  	sort.Strings(regions)
   877  	return regions
   878  }
   879  
   880  // inmemCodec is used to do an RPC call without going over a network
   881  type inmemCodec struct {
   882  	method string
   883  	args   interface{}
   884  	reply  interface{}
   885  	err    error
   886  }
   887  
   888  func (i *inmemCodec) ReadRequestHeader(req *rpc.Request) error {
   889  	req.ServiceMethod = i.method
   890  	return nil
   891  }
   892  
   893  func (i *inmemCodec) ReadRequestBody(args interface{}) error {
   894  	sourceValue := reflect.Indirect(reflect.Indirect(reflect.ValueOf(i.args)))
   895  	dst := reflect.Indirect(reflect.Indirect(reflect.ValueOf(args)))
   896  	dst.Set(sourceValue)
   897  	return nil
   898  }
   899  
   900  func (i *inmemCodec) WriteResponse(resp *rpc.Response, reply interface{}) error {
   901  	if resp.Error != "" {
   902  		i.err = errors.New(resp.Error)
   903  		return nil
   904  	}
   905  	sourceValue := reflect.Indirect(reflect.Indirect(reflect.ValueOf(reply)))
   906  	dst := reflect.Indirect(reflect.Indirect(reflect.ValueOf(i.reply)))
   907  	dst.Set(sourceValue)
   908  	return nil
   909  }
   910  
   911  func (i *inmemCodec) Close() error {
   912  	return nil
   913  }
   914  
   915  // RPC is used to make a local RPC call
   916  func (s *Server) RPC(method string, args interface{}, reply interface{}) error {
   917  	codec := &inmemCodec{
   918  		method: method,
   919  		args:   args,
   920  		reply:  reply,
   921  	}
   922  	if err := s.rpcServer.ServeRequest(codec); err != nil {
   923  		return err
   924  	}
   925  	return codec.err
   926  }
   927  
   928  // Stats is used to return statistics for debugging and insight
   929  // for various sub-systems
   930  func (s *Server) Stats() map[string]map[string]string {
   931  	toString := func(v uint64) string {
   932  		return strconv.FormatUint(v, 10)
   933  	}
   934  	stats := map[string]map[string]string{
   935  		"nomad": map[string]string{
   936  			"server":        "true",
   937  			"leader":        fmt.Sprintf("%v", s.IsLeader()),
   938  			"leader_addr":   s.raft.Leader(),
   939  			"bootstrap":     fmt.Sprintf("%v", s.config.Bootstrap),
   940  			"known_regions": toString(uint64(len(s.peers))),
   941  		},
   942  		"raft":    s.raft.Stats(),
   943  		"serf":    s.serf.Stats(),
   944  		"runtime": RuntimeStats(),
   945  	}
   946  	if peers, err := s.raftPeers.Peers(); err == nil {
   947  		stats["raft"]["raft_peers"] = strings.Join(peers, ",")
   948  	} else {
   949  		s.logger.Printf("[DEBUG] server: error getting raft peers: %v", err)
   950  	}
   951  	return stats
   952  }
   953  
   954  // Region retuns the region of the server
   955  func (s *Server) Region() string {
   956  	return s.config.Region
   957  }
   958  
   959  // Datacenter returns the data center of the server
   960  func (s *Server) Datacenter() string {
   961  	return s.config.Datacenter
   962  }
   963  
   964  // GetConfig returns the config of the server for testing purposes only
   965  func (s *Server) GetConfig() *Config {
   966  	return s.config
   967  }