github.com/clly/consul@v1.4.5/agent/consul/server.go (about)

     1  package consul
     2  
     3  import (
     4  	"context"
     5  	"crypto/tls"
     6  	"errors"
     7  	"fmt"
     8  	"io"
     9  	"io/ioutil"
    10  	"log"
    11  	"net"
    12  	"net/rpc"
    13  	"os"
    14  	"path/filepath"
    15  	"reflect"
    16  	"strconv"
    17  	"sync"
    18  	"sync/atomic"
    19  	"time"
    20  
    21  	ca "github.com/hashicorp/consul/agent/connect/ca"
    22  	"github.com/hashicorp/consul/agent/consul/autopilot"
    23  	"github.com/hashicorp/consul/agent/consul/fsm"
    24  	"github.com/hashicorp/consul/agent/consul/state"
    25  	"github.com/hashicorp/consul/agent/metadata"
    26  	"github.com/hashicorp/consul/agent/pool"
    27  	"github.com/hashicorp/consul/agent/router"
    28  	"github.com/hashicorp/consul/agent/structs"
    29  	"github.com/hashicorp/consul/agent/token"
    30  	"github.com/hashicorp/consul/lib"
    31  	"github.com/hashicorp/consul/sentinel"
    32  	"github.com/hashicorp/consul/tlsutil"
    33  	"github.com/hashicorp/consul/types"
    34  	"github.com/hashicorp/raft"
    35  	raftboltdb "github.com/hashicorp/raft-boltdb"
    36  	"github.com/hashicorp/serf/serf"
    37  )
    38  
    39  // These are the protocol versions that Consul can _understand_. These are
    40  // Consul-level protocol versions, that are used to configure the Serf
    41  // protocol versions.
    42  const (
    43  	ProtocolVersionMin uint8 = 2
    44  
    45  	// Version 3 added support for network coordinates but we kept the
    46  	// default protocol version at 2 to ease the transition to this new
    47  	// feature. A Consul agent speaking version 2 of the protocol will
    48  	// attempt to send its coordinates to a server who understands version
    49  	// 3 or greater.
    50  	ProtocolVersion2Compatible = 2
    51  
    52  	ProtocolVersionMax = 3
    53  )
    54  
    55  const (
    56  	serfLANSnapshot   = "serf/local.snapshot"
    57  	serfWANSnapshot   = "serf/remote.snapshot"
    58  	raftState         = "raft/"
    59  	snapshotsRetained = 2
    60  
    61  	// serverRPCCache controls how long we keep an idle connection
    62  	// open to a server
    63  	serverRPCCache = 2 * time.Minute
    64  
    65  	// serverMaxStreams controls how many idle streams we keep
    66  	// open to a server
    67  	serverMaxStreams = 64
    68  
    69  	// raftLogCacheSize is the maximum number of logs to cache in-memory.
    70  	// This is used to reduce disk I/O for the recently committed entries.
    71  	raftLogCacheSize = 512
    72  
    73  	// raftRemoveGracePeriod is how long we wait to allow a RemovePeer
    74  	// to replicate to gracefully leave the cluster.
    75  	raftRemoveGracePeriod = 5 * time.Second
    76  
    77  	// serfEventChSize is the size of the buffered channel to get Serf
    78  	// events. If this is exhausted we will block Serf and Memberlist.
    79  	serfEventChSize = 2048
    80  
    81  	// reconcileChSize is the size of the buffered channel reconcile updates
    82  	// from Serf with the Catalog. If this is exhausted we will drop updates,
    83  	// and wait for a periodic reconcile.
    84  	reconcileChSize = 256
    85  )
    86  
    87  var (
    88  	ErrWANFederationDisabled = fmt.Errorf("WAN Federation is disabled")
    89  )
    90  
    91  // Server is Consul server which manages the service discovery,
    92  // health checking, DC forwarding, Raft, and multiple Serf pools.
    93  type Server struct {
    94  	// sentinel is the Sentinel code engine (can be nil).
    95  	sentinel sentinel.Evaluator
    96  
    97  	// acls is used to resolve tokens to effective policies
    98  	acls *ACLResolver
    99  
   100  	// aclUpgradeCancel is used to cancel the ACL upgrade goroutine when we
   101  	// lose leadership
   102  	aclUpgradeCancel  context.CancelFunc
   103  	aclUpgradeLock    sync.RWMutex
   104  	aclUpgradeEnabled bool
   105  
   106  	// aclReplicationCancel is used to shut down the ACL replication goroutine
   107  	// when we lose leadership
   108  	aclReplicationCancel  context.CancelFunc
   109  	aclReplicationLock    sync.RWMutex
   110  	aclReplicationEnabled bool
   111  
   112  	// DEPRECATED (ACL-Legacy-Compat) - only needed while we support both
   113  	// useNewACLs is used to determine whether we can use new ACLs or not
   114  	useNewACLs int32
   115  
   116  	// autopilot is the Autopilot instance for this server.
   117  	autopilot *autopilot.Autopilot
   118  
   119  	// autopilotWaitGroup is used to block until Autopilot shuts down.
   120  	autopilotWaitGroup sync.WaitGroup
   121  
   122  	// caProvider is the current CA provider in use for Connect. This is
   123  	// only non-nil when we are the leader.
   124  	caProvider ca.Provider
   125  	// caProviderRoot is the CARoot that was stored along with the ca.Provider
   126  	// active. It's only updated in lock-step with the caProvider. This prevents
   127  	// races between state updates to active roots and the fetch of the provider
   128  	// instance.
   129  	caProviderRoot *structs.CARoot
   130  	caProviderLock sync.RWMutex
   131  
   132  	// caPruningCh is used to shut down the CA root pruning goroutine when we
   133  	// lose leadership.
   134  	caPruningCh      chan struct{}
   135  	caPruningLock    sync.RWMutex
   136  	caPruningEnabled bool
   137  
   138  	// Consul configuration
   139  	config *Config
   140  
   141  	// tokens holds ACL tokens initially from the configuration, but can
   142  	// be updated at runtime, so should always be used instead of going to
   143  	// the configuration directly.
   144  	tokens *token.Store
   145  
   146  	// Connection pool to other consul servers
   147  	connPool *pool.ConnPool
   148  
   149  	// eventChLAN is used to receive events from the
   150  	// serf cluster in the datacenter
   151  	eventChLAN chan serf.Event
   152  
   153  	// eventChWAN is used to receive events from the
   154  	// serf cluster that spans datacenters
   155  	eventChWAN chan serf.Event
   156  
   157  	// fsm is the state machine used with Raft to provide
   158  	// strong consistency.
   159  	fsm *fsm.FSM
   160  
   161  	// Logger uses the provided LogOutput
   162  	logger *log.Logger
   163  
   164  	// The raft instance is used among Consul nodes within the DC to protect
   165  	// operations that require strong consistency.
   166  	// the state directly.
   167  	raft          *raft.Raft
   168  	raftLayer     *RaftLayer
   169  	raftStore     *raftboltdb.BoltStore
   170  	raftTransport *raft.NetworkTransport
   171  	raftInmem     *raft.InmemStore
   172  
   173  	// raftNotifyCh is set up by setupRaft() and ensures that we get reliable leader
   174  	// transition notifications from the Raft layer.
   175  	raftNotifyCh <-chan bool
   176  
   177  	// reconcileCh is used to pass events from the serf handler
   178  	// into the leader manager, so that the strong state can be
   179  	// updated
   180  	reconcileCh chan serf.Member
   181  
   182  	// readyForConsistentReads is used to track when the leader server is
   183  	// ready to serve consistent reads, after it has applied its initial
   184  	// barrier. This is updated atomically.
   185  	readyForConsistentReads int32
   186  
   187  	// leaveCh is used to signal that the server is leaving the cluster
   188  	// and trying to shed its RPC traffic onto other Consul servers. This
   189  	// is only ever closed.
   190  	leaveCh chan struct{}
   191  
   192  	// router is used to map out Consul servers in the WAN and in Consul
   193  	// Enterprise user-defined areas.
   194  	router *router.Router
   195  
   196  	// Listener is used to listen for incoming connections
   197  	Listener  net.Listener
   198  	rpcServer *rpc.Server
   199  
   200  	// rpcTLS is the TLS config for incoming TLS requests
   201  	rpcTLS *tls.Config
   202  
   203  	// serfLAN is the Serf cluster maintained inside the DC
   204  	// which contains all the DC nodes
   205  	serfLAN *serf.Serf
   206  
   207  	// segmentLAN maps segment names to their Serf cluster
   208  	segmentLAN map[string]*serf.Serf
   209  
   210  	// serfWAN is the Serf cluster maintained between DC's
   211  	// which SHOULD only consist of Consul servers
   212  	serfWAN *serf.Serf
   213  
   214  	// serverLookup tracks server consuls in the local datacenter.
   215  	// Used to do leader forwarding and provide fast lookup by server id and address
   216  	serverLookup *ServerLookup
   217  
   218  	// floodLock controls access to floodCh.
   219  	floodLock sync.RWMutex
   220  	floodCh   []chan struct{}
   221  
   222  	// sessionTimers track the expiration time of each Session that has
   223  	// a TTL. On expiration, a SessionDestroy event will occur, and
   224  	// destroy the session via standard session destroy processing
   225  	sessionTimers *SessionTimers
   226  
   227  	// statsFetcher is used by autopilot to check the status of the other
   228  	// Consul router.
   229  	statsFetcher *StatsFetcher
   230  
   231  	// reassertLeaderCh is used to signal the leader loop should re-run
   232  	// leadership actions after a snapshot restore.
   233  	reassertLeaderCh chan chan error
   234  
   235  	// tombstoneGC is used to track the pending GC invocations
   236  	// for the KV tombstones
   237  	tombstoneGC *state.TombstoneGC
   238  
   239  	// aclReplicationStatus (and its associated lock) provide information
   240  	// about the health of the ACL replication goroutine.
   241  	aclReplicationStatus     structs.ACLReplicationStatus
   242  	aclReplicationStatusLock sync.RWMutex
   243  
   244  	// shutdown and the associated members here are used in orchestrating
   245  	// a clean shutdown. The shutdownCh is never written to, only closed to
   246  	// indicate a shutdown has been initiated.
   247  	shutdown     bool
   248  	shutdownCh   chan struct{}
   249  	shutdownLock sync.Mutex
   250  
   251  	// embedded struct to hold all the enterprise specific data
   252  	EnterpriseServer
   253  }
   254  
   255  // NewServer is only used to help setting up a server for testing. Normal code
   256  // exercises NewServerLogger.
   257  func NewServer(config *Config) (*Server, error) {
   258  	c, err := tlsutil.NewConfigurator(config.ToTLSUtilConfig(), nil)
   259  	if err != nil {
   260  		return nil, err
   261  	}
   262  	return NewServerLogger(config, nil, new(token.Store), c)
   263  }
   264  
   265  // NewServerLogger is used to construct a new Consul server from the
   266  // configuration, potentially returning an error
   267  func NewServerLogger(config *Config, logger *log.Logger, tokens *token.Store, tlsConfigurator *tlsutil.Configurator) (*Server, error) {
   268  	// Check the protocol version.
   269  	if err := config.CheckProtocolVersion(); err != nil {
   270  		return nil, err
   271  	}
   272  
   273  	// Check for a data directory.
   274  	if config.DataDir == "" && !config.DevMode {
   275  		return nil, fmt.Errorf("Config must provide a DataDir")
   276  	}
   277  
   278  	// Sanity check the ACLs.
   279  	if err := config.CheckACL(); err != nil {
   280  		return nil, err
   281  	}
   282  
   283  	// Ensure we have a log output and create a logger.
   284  	if config.LogOutput == nil {
   285  		config.LogOutput = os.Stderr
   286  	}
   287  	if logger == nil {
   288  		logger = log.New(config.LogOutput, "", log.LstdFlags)
   289  	}
   290  
   291  	// Check if TLS is enabled
   292  	if config.CAFile != "" || config.CAPath != "" {
   293  		config.UseTLS = true
   294  	}
   295  
   296  	// Set the primary DC if it wasn't set.
   297  	if config.PrimaryDatacenter == "" {
   298  		if config.ACLDatacenter != "" {
   299  			config.PrimaryDatacenter = config.ACLDatacenter
   300  		} else {
   301  			config.PrimaryDatacenter = config.Datacenter
   302  		}
   303  	}
   304  
   305  	// Create the tombstone GC.
   306  	gc, err := state.NewTombstoneGC(config.TombstoneTTL, config.TombstoneTTLGranularity)
   307  	if err != nil {
   308  		return nil, err
   309  	}
   310  
   311  	// Create the shutdown channel - this is closed but never written to.
   312  	shutdownCh := make(chan struct{})
   313  
   314  	connPool := &pool.ConnPool{
   315  		SrcAddr:    config.RPCSrcAddr,
   316  		LogOutput:  config.LogOutput,
   317  		MaxTime:    serverRPCCache,
   318  		MaxStreams: serverMaxStreams,
   319  		TLSWrapper: tlsConfigurator.OutgoingRPCWrapper(),
   320  		ForceTLS:   config.VerifyOutgoing,
   321  	}
   322  
   323  	// Create server.
   324  	s := &Server{
   325  		config:           config,
   326  		tokens:           tokens,
   327  		connPool:         connPool,
   328  		eventChLAN:       make(chan serf.Event, serfEventChSize),
   329  		eventChWAN:       make(chan serf.Event, serfEventChSize),
   330  		logger:           logger,
   331  		leaveCh:          make(chan struct{}),
   332  		reconcileCh:      make(chan serf.Member, reconcileChSize),
   333  		router:           router.NewRouter(logger, config.Datacenter),
   334  		rpcServer:        rpc.NewServer(),
   335  		rpcTLS:           tlsConfigurator.IncomingRPCConfig(),
   336  		reassertLeaderCh: make(chan chan error),
   337  		segmentLAN:       make(map[string]*serf.Serf, len(config.Segments)),
   338  		sessionTimers:    NewSessionTimers(),
   339  		tombstoneGC:      gc,
   340  		serverLookup:     NewServerLookup(),
   341  		shutdownCh:       shutdownCh,
   342  	}
   343  
   344  	// Initialize enterprise specific server functionality
   345  	if err := s.initEnterprise(); err != nil {
   346  		s.Shutdown()
   347  		return nil, err
   348  	}
   349  
   350  	// Initialize the stats fetcher that autopilot will use.
   351  	s.statsFetcher = NewStatsFetcher(logger, s.connPool, s.config.Datacenter)
   352  
   353  	s.sentinel = sentinel.New(logger)
   354  	s.useNewACLs = 0
   355  	aclConfig := ACLResolverConfig{
   356  		Config:      config,
   357  		Delegate:    s,
   358  		CacheConfig: serverACLCacheConfig,
   359  		AutoDisable: false,
   360  		Logger:      logger,
   361  		Sentinel:    s.sentinel,
   362  	}
   363  	// Initialize the ACL resolver.
   364  	if s.acls, err = NewACLResolver(&aclConfig); err != nil {
   365  		s.Shutdown()
   366  		return nil, fmt.Errorf("Failed to create ACL resolver: %v", err)
   367  	}
   368  
   369  	// Initialize the RPC layer.
   370  	if err := s.setupRPC(tlsConfigurator.OutgoingRPCWrapper()); err != nil {
   371  		s.Shutdown()
   372  		return nil, fmt.Errorf("Failed to start RPC layer: %v", err)
   373  	}
   374  
   375  	// Initialize any extra RPC listeners for segments.
   376  	segmentListeners, err := s.setupSegmentRPC()
   377  	if err != nil {
   378  		s.Shutdown()
   379  		return nil, fmt.Errorf("Failed to start segment RPC layer: %v", err)
   380  	}
   381  
   382  	// Initialize the Raft server.
   383  	if err := s.setupRaft(); err != nil {
   384  		s.Shutdown()
   385  		return nil, fmt.Errorf("Failed to start Raft: %v", err)
   386  	}
   387  
   388  	// Serf and dynamic bind ports
   389  	//
   390  	// The LAN serf cluster announces the port of the WAN serf cluster
   391  	// which creates a race when the WAN cluster is supposed to bind to
   392  	// a dynamic port (port 0). The current memberlist implementation will
   393  	// update the bind port in the configuration after the memberlist is
   394  	// created, so we can pull it out from there reliably, even though it's
   395  	// a little gross to be reading the updated config.
   396  
   397  	// Initialize the WAN Serf if enabled
   398  	serfBindPortWAN := -1
   399  	if config.SerfWANConfig != nil {
   400  		serfBindPortWAN = config.SerfWANConfig.MemberlistConfig.BindPort
   401  		s.serfWAN, err = s.setupSerf(config.SerfWANConfig, s.eventChWAN, serfWANSnapshot, true, serfBindPortWAN, "", s.Listener)
   402  		if err != nil {
   403  			s.Shutdown()
   404  			return nil, fmt.Errorf("Failed to start WAN Serf: %v", err)
   405  		}
   406  		// See big comment above why we are doing this.
   407  		if serfBindPortWAN == 0 {
   408  			serfBindPortWAN = config.SerfWANConfig.MemberlistConfig.BindPort
   409  			if serfBindPortWAN == 0 {
   410  				return nil, fmt.Errorf("Failed to get dynamic bind port for WAN Serf")
   411  			}
   412  			s.logger.Printf("[INFO] agent: Serf WAN TCP bound to port %d", serfBindPortWAN)
   413  		}
   414  	}
   415  
   416  	// Initialize the LAN segments before the default LAN Serf so we have
   417  	// updated port information to publish there.
   418  	if err := s.setupSegments(config, serfBindPortWAN, segmentListeners); err != nil {
   419  		s.Shutdown()
   420  		return nil, fmt.Errorf("Failed to setup network segments: %v", err)
   421  	}
   422  
   423  	// Initialize the LAN Serf for the default network segment.
   424  	s.serfLAN, err = s.setupSerf(config.SerfLANConfig, s.eventChLAN, serfLANSnapshot, false, serfBindPortWAN, "", s.Listener)
   425  	if err != nil {
   426  		s.Shutdown()
   427  		return nil, fmt.Errorf("Failed to start LAN Serf: %v", err)
   428  	}
   429  	go s.lanEventHandler()
   430  
   431  	// Start the flooders after the LAN event handler is wired up.
   432  	s.floodSegments(config)
   433  
   434  	// Add a "static route" to the WAN Serf and hook it up to Serf events.
   435  	if s.serfWAN != nil {
   436  		if err := s.router.AddArea(types.AreaWAN, s.serfWAN, s.connPool, s.config.VerifyOutgoing); err != nil {
   437  			s.Shutdown()
   438  			return nil, fmt.Errorf("Failed to add WAN serf route: %v", err)
   439  		}
   440  		go router.HandleSerfEvents(s.logger, s.router, types.AreaWAN, s.serfWAN.ShutdownCh(), s.eventChWAN)
   441  
   442  		// Fire up the LAN <-> WAN join flooder.
   443  		portFn := func(s *metadata.Server) (int, bool) {
   444  			if s.WanJoinPort > 0 {
   445  				return s.WanJoinPort, true
   446  			}
   447  			return 0, false
   448  		}
   449  		go s.Flood(nil, portFn, s.serfWAN)
   450  	}
   451  
   452  	// Start enterprise specific functionality
   453  	if err := s.startEnterprise(); err != nil {
   454  		s.Shutdown()
   455  		return nil, err
   456  	}
   457  
   458  	// Initialize Autopilot. This must happen before starting leadership monitoring
   459  	// as establishing leadership could attempt to use autopilot and cause a panic.
   460  	s.initAutopilot(config)
   461  
   462  	// Start monitoring leadership. This must happen after Serf is set up
   463  	// since it can fire events when leadership is obtained.
   464  	go s.monitorLeadership()
   465  
   466  	// Start listening for RPC requests.
   467  	go s.listen(s.Listener)
   468  
   469  	// Start listeners for any segments with separate RPC listeners.
   470  	for _, listener := range segmentListeners {
   471  		go s.listen(listener)
   472  	}
   473  
   474  	// Start the metrics handlers.
   475  	go s.sessionStats()
   476  
   477  	return s, nil
   478  }
   479  
   480  // setupRaft is used to setup and initialize Raft
   481  func (s *Server) setupRaft() error {
   482  	// If we have an unclean exit then attempt to close the Raft store.
   483  	defer func() {
   484  		if s.raft == nil && s.raftStore != nil {
   485  			if err := s.raftStore.Close(); err != nil {
   486  				s.logger.Printf("[ERR] consul: failed to close Raft store: %v", err)
   487  			}
   488  		}
   489  	}()
   490  
   491  	// Create the FSM.
   492  	var err error
   493  	s.fsm, err = fsm.New(s.tombstoneGC, s.config.LogOutput)
   494  	if err != nil {
   495  		return err
   496  	}
   497  
   498  	var serverAddressProvider raft.ServerAddressProvider = nil
   499  	if s.config.RaftConfig.ProtocolVersion >= 3 { //ServerAddressProvider needs server ids to work correctly, which is only supported in protocol version 3 or higher
   500  		serverAddressProvider = s.serverLookup
   501  	}
   502  
   503  	// Create a transport layer.
   504  	transConfig := &raft.NetworkTransportConfig{
   505  		Stream:                s.raftLayer,
   506  		MaxPool:               3,
   507  		Timeout:               10 * time.Second,
   508  		ServerAddressProvider: serverAddressProvider,
   509  		Logger:                s.logger,
   510  	}
   511  
   512  	trans := raft.NewNetworkTransportWithConfig(transConfig)
   513  	s.raftTransport = trans
   514  
   515  	// Make sure we set the LogOutput.
   516  	s.config.RaftConfig.LogOutput = s.config.LogOutput
   517  	s.config.RaftConfig.Logger = s.logger
   518  
   519  	// Versions of the Raft protocol below 3 require the LocalID to match the network
   520  	// address of the transport.
   521  	s.config.RaftConfig.LocalID = raft.ServerID(trans.LocalAddr())
   522  	if s.config.RaftConfig.ProtocolVersion >= 3 {
   523  		s.config.RaftConfig.LocalID = raft.ServerID(s.config.NodeID)
   524  	}
   525  
   526  	// Build an all in-memory setup for dev mode, otherwise prepare a full
   527  	// disk-based setup.
   528  	var log raft.LogStore
   529  	var stable raft.StableStore
   530  	var snap raft.SnapshotStore
   531  	if s.config.DevMode {
   532  		store := raft.NewInmemStore()
   533  		s.raftInmem = store
   534  		stable = store
   535  		log = store
   536  		snap = raft.NewInmemSnapshotStore()
   537  	} else {
   538  		// Create the base raft path.
   539  		path := filepath.Join(s.config.DataDir, raftState)
   540  		if err := lib.EnsurePath(path, true); err != nil {
   541  			return err
   542  		}
   543  
   544  		// Create the backend raft store for logs and stable storage.
   545  		store, err := raftboltdb.NewBoltStore(filepath.Join(path, "raft.db"))
   546  		if err != nil {
   547  			return err
   548  		}
   549  		s.raftStore = store
   550  		stable = store
   551  
   552  		// Wrap the store in a LogCache to improve performance.
   553  		cacheStore, err := raft.NewLogCache(raftLogCacheSize, store)
   554  		if err != nil {
   555  			return err
   556  		}
   557  		log = cacheStore
   558  
   559  		// Create the snapshot store.
   560  		snapshots, err := raft.NewFileSnapshotStore(path, snapshotsRetained, s.config.LogOutput)
   561  		if err != nil {
   562  			return err
   563  		}
   564  		snap = snapshots
   565  
   566  		// For an existing cluster being upgraded to the new version of
   567  		// Raft, we almost never want to run recovery based on the old
   568  		// peers.json file. We create a peers.info file with a helpful
   569  		// note about where peers.json went, and use that as a sentinel
   570  		// to avoid ingesting the old one that first time (if we have to
   571  		// create the peers.info file because it's not there, we also
   572  		// blow away any existing peers.json file).
   573  		peersFile := filepath.Join(path, "peers.json")
   574  		peersInfoFile := filepath.Join(path, "peers.info")
   575  		if _, err := os.Stat(peersInfoFile); os.IsNotExist(err) {
   576  			if err := ioutil.WriteFile(peersInfoFile, []byte(peersInfoContent), 0755); err != nil {
   577  				return fmt.Errorf("failed to write peers.info file: %v", err)
   578  			}
   579  
   580  			// Blow away the peers.json file if present, since the
   581  			// peers.info sentinel wasn't there.
   582  			if _, err := os.Stat(peersFile); err == nil {
   583  				if err := os.Remove(peersFile); err != nil {
   584  					return fmt.Errorf("failed to delete peers.json, please delete manually (see peers.info for details): %v", err)
   585  				}
   586  				s.logger.Printf("[INFO] consul: deleted peers.json file (see peers.info for details)")
   587  			}
   588  		} else if _, err := os.Stat(peersFile); err == nil {
   589  			s.logger.Printf("[INFO] consul: found peers.json file, recovering Raft configuration...")
   590  
   591  			var configuration raft.Configuration
   592  			if s.config.RaftConfig.ProtocolVersion < 3 {
   593  				configuration, err = raft.ReadPeersJSON(peersFile)
   594  			} else {
   595  				configuration, err = raft.ReadConfigJSON(peersFile)
   596  			}
   597  			if err != nil {
   598  				return fmt.Errorf("recovery failed to parse peers.json: %v", err)
   599  			}
   600  
   601  			tmpFsm, err := fsm.New(s.tombstoneGC, s.config.LogOutput)
   602  			if err != nil {
   603  				return fmt.Errorf("recovery failed to make temp FSM: %v", err)
   604  			}
   605  			if err := raft.RecoverCluster(s.config.RaftConfig, tmpFsm,
   606  				log, stable, snap, trans, configuration); err != nil {
   607  				return fmt.Errorf("recovery failed: %v", err)
   608  			}
   609  
   610  			if err := os.Remove(peersFile); err != nil {
   611  				return fmt.Errorf("recovery failed to delete peers.json, please delete manually (see peers.info for details): %v", err)
   612  			}
   613  			s.logger.Printf("[INFO] consul: deleted peers.json file after successful recovery")
   614  		}
   615  	}
   616  
   617  	// If we are in bootstrap or dev mode and the state is clean then we can
   618  	// bootstrap now.
   619  	if s.config.Bootstrap || s.config.DevMode {
   620  		hasState, err := raft.HasExistingState(log, stable, snap)
   621  		if err != nil {
   622  			return err
   623  		}
   624  		if !hasState {
   625  			configuration := raft.Configuration{
   626  				Servers: []raft.Server{
   627  					raft.Server{
   628  						ID:      s.config.RaftConfig.LocalID,
   629  						Address: trans.LocalAddr(),
   630  					},
   631  				},
   632  			}
   633  			if err := raft.BootstrapCluster(s.config.RaftConfig,
   634  				log, stable, snap, trans, configuration); err != nil {
   635  				return err
   636  			}
   637  		}
   638  	}
   639  
   640  	// Set up a channel for reliable leader notifications.
   641  	raftNotifyCh := make(chan bool, 1)
   642  	s.config.RaftConfig.NotifyCh = raftNotifyCh
   643  	s.raftNotifyCh = raftNotifyCh
   644  
   645  	// Setup the Raft store.
   646  	s.raft, err = raft.NewRaft(s.config.RaftConfig, s.fsm, log, stable, snap, trans)
   647  	if err != nil {
   648  		return err
   649  	}
   650  	return nil
   651  }
   652  
   653  // endpointFactory is a function that returns an RPC endpoint bound to the given
   654  // server.
   655  type factory func(s *Server) interface{}
   656  
   657  // endpoints is a list of registered RPC endpoint factories.
   658  var endpoints []factory
   659  
   660  // registerEndpoint registers a new RPC endpoint factory.
   661  func registerEndpoint(fn factory) {
   662  	endpoints = append(endpoints, fn)
   663  }
   664  
   665  // setupRPC is used to setup the RPC listener
   666  func (s *Server) setupRPC(tlsWrap tlsutil.DCWrapper) error {
   667  	for _, fn := range endpoints {
   668  		s.rpcServer.Register(fn(s))
   669  	}
   670  
   671  	ln, err := net.ListenTCP("tcp", s.config.RPCAddr)
   672  	if err != nil {
   673  		return err
   674  	}
   675  	s.Listener = ln
   676  	if s.config.NotifyListen != nil {
   677  		s.config.NotifyListen()
   678  	}
   679  	// todo(fs): we should probably guard this
   680  	if s.config.RPCAdvertise == nil {
   681  		s.config.RPCAdvertise = ln.Addr().(*net.TCPAddr)
   682  	}
   683  
   684  	// Verify that we have a usable advertise address
   685  	if s.config.RPCAdvertise.IP.IsUnspecified() {
   686  		ln.Close()
   687  		return fmt.Errorf("RPC advertise address is not advertisable: %v", s.config.RPCAdvertise)
   688  	}
   689  
   690  	// Provide a DC specific wrapper. Raft replication is only
   691  	// ever done in the same datacenter, so we can provide it as a constant.
   692  	wrapper := tlsutil.SpecificDC(s.config.Datacenter, tlsWrap)
   693  
   694  	// Define a callback for determining whether to wrap a connection with TLS
   695  	tlsFunc := func(address raft.ServerAddress) bool {
   696  		if s.config.VerifyOutgoing {
   697  			return true
   698  		}
   699  
   700  		server := s.serverLookup.Server(address)
   701  
   702  		if server == nil {
   703  			return false
   704  		}
   705  
   706  		return server.UseTLS
   707  	}
   708  	s.raftLayer = NewRaftLayer(s.config.RPCSrcAddr, s.config.RPCAdvertise, wrapper, tlsFunc)
   709  	return nil
   710  }
   711  
   712  // Shutdown is used to shutdown the server
   713  func (s *Server) Shutdown() error {
   714  	s.logger.Printf("[INFO] consul: shutting down server")
   715  	s.shutdownLock.Lock()
   716  	defer s.shutdownLock.Unlock()
   717  
   718  	if s.shutdown {
   719  		return nil
   720  	}
   721  
   722  	s.shutdown = true
   723  	close(s.shutdownCh)
   724  
   725  	if s.serfLAN != nil {
   726  		s.serfLAN.Shutdown()
   727  	}
   728  
   729  	if s.serfWAN != nil {
   730  		s.serfWAN.Shutdown()
   731  		if err := s.router.RemoveArea(types.AreaWAN); err != nil {
   732  			s.logger.Printf("[WARN] consul: error removing WAN area: %v", err)
   733  		}
   734  	}
   735  	s.router.Shutdown()
   736  
   737  	if s.raft != nil {
   738  		s.raftTransport.Close()
   739  		s.raftLayer.Close()
   740  		future := s.raft.Shutdown()
   741  		if err := future.Error(); err != nil {
   742  			s.logger.Printf("[WARN] consul: error shutting down raft: %s", err)
   743  		}
   744  		if s.raftStore != nil {
   745  			s.raftStore.Close()
   746  		}
   747  	}
   748  
   749  	if s.Listener != nil {
   750  		s.Listener.Close()
   751  	}
   752  
   753  	// Close the connection pool
   754  	s.connPool.Shutdown()
   755  
   756  	return nil
   757  }
   758  
   759  // Leave is used to prepare for a graceful shutdown of the server
   760  func (s *Server) Leave() error {
   761  	s.logger.Printf("[INFO] consul: server starting leave")
   762  
   763  	// Check the number of known peers
   764  	numPeers, err := s.numPeers()
   765  	if err != nil {
   766  		s.logger.Printf("[ERR] consul: failed to check raft peers: %v", err)
   767  		return err
   768  	}
   769  
   770  	addr := s.raftTransport.LocalAddr()
   771  
   772  	// If we are the current leader, and we have any other peers (cluster has multiple
   773  	// servers), we should do a RemoveServer/RemovePeer to safely reduce the quorum size.
   774  	// If we are not the leader, then we should issue our leave intention and wait to be
   775  	// removed for some sane period of time.
   776  	isLeader := s.IsLeader()
   777  	if isLeader && numPeers > 1 {
   778  		minRaftProtocol, err := s.autopilot.MinRaftProtocol()
   779  		if err != nil {
   780  			return err
   781  		}
   782  
   783  		if minRaftProtocol >= 2 && s.config.RaftConfig.ProtocolVersion >= 3 {
   784  			future := s.raft.RemoveServer(raft.ServerID(s.config.NodeID), 0, 0)
   785  			if err := future.Error(); err != nil {
   786  				s.logger.Printf("[ERR] consul: failed to remove ourself as raft peer: %v", err)
   787  			}
   788  		} else {
   789  			future := s.raft.RemovePeer(addr)
   790  			if err := future.Error(); err != nil {
   791  				s.logger.Printf("[ERR] consul: failed to remove ourself as raft peer: %v", err)
   792  			}
   793  		}
   794  	}
   795  
   796  	// Leave the WAN pool
   797  	if s.serfWAN != nil {
   798  		if err := s.serfWAN.Leave(); err != nil {
   799  			s.logger.Printf("[ERR] consul: failed to leave WAN Serf cluster: %v", err)
   800  		}
   801  	}
   802  
   803  	// Leave the LAN pool
   804  	if s.serfLAN != nil {
   805  		if err := s.serfLAN.Leave(); err != nil {
   806  			s.logger.Printf("[ERR] consul: failed to leave LAN Serf cluster: %v", err)
   807  		}
   808  	}
   809  
   810  	// Start refusing RPCs now that we've left the LAN pool. It's important
   811  	// to do this *after* we've left the LAN pool so that clients will know
   812  	// to shift onto another server if they perform a retry. We also wake up
   813  	// all queries in the RPC retry state.
   814  	s.logger.Printf("[INFO] consul: Waiting %s to drain RPC traffic", s.config.LeaveDrainTime)
   815  	close(s.leaveCh)
   816  	time.Sleep(s.config.LeaveDrainTime)
   817  
   818  	// If we were not leader, wait to be safely removed from the cluster. We
   819  	// must wait to allow the raft replication to take place, otherwise an
   820  	// immediate shutdown could cause a loss of quorum.
   821  	if !isLeader {
   822  		left := false
   823  		limit := time.Now().Add(raftRemoveGracePeriod)
   824  		for !left && time.Now().Before(limit) {
   825  			// Sleep a while before we check.
   826  			time.Sleep(50 * time.Millisecond)
   827  
   828  			// Get the latest configuration.
   829  			future := s.raft.GetConfiguration()
   830  			if err := future.Error(); err != nil {
   831  				s.logger.Printf("[ERR] consul: failed to get raft configuration: %v", err)
   832  				break
   833  			}
   834  
   835  			// See if we are no longer included.
   836  			left = true
   837  			for _, server := range future.Configuration().Servers {
   838  				if server.Address == addr {
   839  					left = false
   840  					break
   841  				}
   842  			}
   843  		}
   844  
   845  		// TODO (slackpad) With the old Raft library we used to force the
   846  		// peers set to empty when a graceful leave occurred. This would
   847  		// keep voting spam down if the server was restarted, but it was
   848  		// dangerous because the peers was inconsistent with the logs and
   849  		// snapshots, so it wasn't really safe in all cases for the server
   850  		// to become leader. This is now safe, but the log spam is noisy.
   851  		// The next new version of the library will have a "you are not a
   852  		// peer stop it" behavior that should address this. We will have
   853  		// to evaluate during the RC period if this interim situation is
   854  		// not too confusing for operators.
   855  
   856  		// TODO (slackpad) When we take a later new version of the Raft
   857  		// library it won't try to complete replication, so this peer
   858  		// may not realize that it has been removed. Need to revisit this
   859  		// and the warning here.
   860  		if !left {
   861  			s.logger.Printf("[WARN] consul: failed to leave raft configuration gracefully, timeout")
   862  		}
   863  	}
   864  
   865  	return nil
   866  }
   867  
   868  // numPeers is used to check on the number of known peers, including potentially
   869  // the local node. We count only voters, since others can't actually become
   870  // leader, so aren't considered peers.
   871  func (s *Server) numPeers() (int, error) {
   872  	future := s.raft.GetConfiguration()
   873  	if err := future.Error(); err != nil {
   874  		return 0, err
   875  	}
   876  
   877  	return autopilot.NumPeers(future.Configuration()), nil
   878  }
   879  
   880  // JoinLAN is used to have Consul join the inner-DC pool
   881  // The target address should be another node inside the DC
   882  // listening on the Serf LAN address
   883  func (s *Server) JoinLAN(addrs []string) (int, error) {
   884  	return s.serfLAN.Join(addrs, true)
   885  }
   886  
   887  // JoinWAN is used to have Consul join the cross-WAN Consul ring
   888  // The target address should be another node listening on the
   889  // Serf WAN address
   890  func (s *Server) JoinWAN(addrs []string) (int, error) {
   891  	if s.serfWAN == nil {
   892  		return 0, ErrWANFederationDisabled
   893  	}
   894  	return s.serfWAN.Join(addrs, true)
   895  }
   896  
   897  // LocalMember is used to return the local node
   898  func (s *Server) LocalMember() serf.Member {
   899  	return s.serfLAN.LocalMember()
   900  }
   901  
   902  // LANMembers is used to return the members of the LAN cluster
   903  func (s *Server) LANMembers() []serf.Member {
   904  	return s.serfLAN.Members()
   905  }
   906  
   907  // WANMembers is used to return the members of the LAN cluster
   908  func (s *Server) WANMembers() []serf.Member {
   909  	if s.serfWAN == nil {
   910  		return nil
   911  	}
   912  	return s.serfWAN.Members()
   913  }
   914  
   915  // RemoveFailedNode is used to remove a failed node from the cluster
   916  func (s *Server) RemoveFailedNode(node string) error {
   917  	if err := s.serfLAN.RemoveFailedNode(node); err != nil {
   918  		return err
   919  	}
   920  	if s.serfWAN != nil {
   921  		if err := s.serfWAN.RemoveFailedNode(node); err != nil {
   922  			return err
   923  		}
   924  	}
   925  	return nil
   926  }
   927  
   928  // IsLeader checks if this server is the cluster leader
   929  func (s *Server) IsLeader() bool {
   930  	return s.raft.State() == raft.Leader
   931  }
   932  
   933  // KeyManagerLAN returns the LAN Serf keyring manager
   934  func (s *Server) KeyManagerLAN() *serf.KeyManager {
   935  	return s.serfLAN.KeyManager()
   936  }
   937  
   938  // KeyManagerWAN returns the WAN Serf keyring manager
   939  func (s *Server) KeyManagerWAN() *serf.KeyManager {
   940  	return s.serfWAN.KeyManager()
   941  }
   942  
   943  // Encrypted determines if gossip is encrypted
   944  func (s *Server) Encrypted() bool {
   945  	LANEncrypted := s.serfLAN.EncryptionEnabled()
   946  	if s.serfWAN == nil {
   947  		return LANEncrypted
   948  	}
   949  	return LANEncrypted && s.serfWAN.EncryptionEnabled()
   950  }
   951  
   952  // LANSegments returns a map of LAN segments by name
   953  func (s *Server) LANSegments() map[string]*serf.Serf {
   954  	segments := make(map[string]*serf.Serf, len(s.segmentLAN)+1)
   955  	segments[""] = s.serfLAN
   956  	for name, segment := range s.segmentLAN {
   957  		segments[name] = segment
   958  	}
   959  
   960  	return segments
   961  }
   962  
   963  // inmemCodec is used to do an RPC call without going over a network
   964  type inmemCodec struct {
   965  	method string
   966  	args   interface{}
   967  	reply  interface{}
   968  	err    error
   969  }
   970  
   971  func (i *inmemCodec) ReadRequestHeader(req *rpc.Request) error {
   972  	req.ServiceMethod = i.method
   973  	return nil
   974  }
   975  
   976  func (i *inmemCodec) ReadRequestBody(args interface{}) error {
   977  	sourceValue := reflect.Indirect(reflect.Indirect(reflect.ValueOf(i.args)))
   978  	dst := reflect.Indirect(reflect.Indirect(reflect.ValueOf(args)))
   979  	dst.Set(sourceValue)
   980  	return nil
   981  }
   982  
   983  func (i *inmemCodec) WriteResponse(resp *rpc.Response, reply interface{}) error {
   984  	if resp.Error != "" {
   985  		i.err = errors.New(resp.Error)
   986  		return nil
   987  	}
   988  	sourceValue := reflect.Indirect(reflect.Indirect(reflect.ValueOf(reply)))
   989  	dst := reflect.Indirect(reflect.Indirect(reflect.ValueOf(i.reply)))
   990  	dst.Set(sourceValue)
   991  	return nil
   992  }
   993  
   994  func (i *inmemCodec) Close() error {
   995  	return nil
   996  }
   997  
   998  // RPC is used to make a local RPC call
   999  func (s *Server) RPC(method string, args interface{}, reply interface{}) error {
  1000  	codec := &inmemCodec{
  1001  		method: method,
  1002  		args:   args,
  1003  		reply:  reply,
  1004  	}
  1005  	if err := s.rpcServer.ServeRequest(codec); err != nil {
  1006  		return err
  1007  	}
  1008  	return codec.err
  1009  }
  1010  
  1011  // SnapshotRPC dispatches the given snapshot request, reading from the streaming
  1012  // input and writing to the streaming output depending on the operation.
  1013  func (s *Server) SnapshotRPC(args *structs.SnapshotRequest, in io.Reader, out io.Writer,
  1014  	replyFn structs.SnapshotReplyFn) error {
  1015  
  1016  	// Perform the operation.
  1017  	var reply structs.SnapshotResponse
  1018  	snap, err := s.dispatchSnapshotRequest(args, in, &reply)
  1019  	if err != nil {
  1020  		return err
  1021  	}
  1022  	defer func() {
  1023  		if err := snap.Close(); err != nil {
  1024  			s.logger.Printf("[ERR] consul: Failed to close snapshot: %v", err)
  1025  		}
  1026  	}()
  1027  
  1028  	// Let the caller peek at the reply.
  1029  	if replyFn != nil {
  1030  		if err := replyFn(&reply); err != nil {
  1031  			return nil
  1032  		}
  1033  	}
  1034  
  1035  	// Stream the snapshot.
  1036  	if out != nil {
  1037  		if _, err := io.Copy(out, snap); err != nil {
  1038  			return fmt.Errorf("failed to stream snapshot: %v", err)
  1039  		}
  1040  	}
  1041  	return nil
  1042  }
  1043  
  1044  // RegisterEndpoint is used to substitute an endpoint for testing.
  1045  func (s *Server) RegisterEndpoint(name string, handler interface{}) error {
  1046  	s.logger.Printf("[WARN] consul: endpoint injected; this should only be used for testing")
  1047  	return s.rpcServer.RegisterName(name, handler)
  1048  }
  1049  
  1050  // Stats is used to return statistics for debugging and insight
  1051  // for various sub-systems
  1052  func (s *Server) Stats() map[string]map[string]string {
  1053  	toString := func(v uint64) string {
  1054  		return strconv.FormatUint(v, 10)
  1055  	}
  1056  	numKnownDCs := len(s.router.GetDatacenters())
  1057  	stats := map[string]map[string]string{
  1058  		"consul": map[string]string{
  1059  			"server":            "true",
  1060  			"leader":            fmt.Sprintf("%v", s.IsLeader()),
  1061  			"leader_addr":       string(s.raft.Leader()),
  1062  			"bootstrap":         fmt.Sprintf("%v", s.config.Bootstrap),
  1063  			"known_datacenters": toString(uint64(numKnownDCs)),
  1064  		},
  1065  		"raft":     s.raft.Stats(),
  1066  		"serf_lan": s.serfLAN.Stats(),
  1067  		"runtime":  runtimeStats(),
  1068  	}
  1069  
  1070  	if s.ACLsEnabled() {
  1071  		if s.UseLegacyACLs() {
  1072  			stats["consul"]["acl"] = "legacy"
  1073  		} else {
  1074  			stats["consul"]["acl"] = "enabled"
  1075  		}
  1076  	} else {
  1077  		stats["consul"]["acl"] = "disabled"
  1078  	}
  1079  
  1080  	if s.serfWAN != nil {
  1081  		stats["serf_wan"] = s.serfWAN.Stats()
  1082  	}
  1083  
  1084  	for outerKey, outerValue := range s.enterpriseStats() {
  1085  		if _, ok := stats[outerKey]; ok {
  1086  			for innerKey, innerValue := range outerValue {
  1087  				stats[outerKey][innerKey] = innerValue
  1088  			}
  1089  		} else {
  1090  			stats[outerKey] = outerValue
  1091  		}
  1092  	}
  1093  
  1094  	return stats
  1095  }
  1096  
  1097  // GetLANCoordinate returns the coordinate of the server in the LAN gossip pool.
  1098  func (s *Server) GetLANCoordinate() (lib.CoordinateSet, error) {
  1099  	lan, err := s.serfLAN.GetCoordinate()
  1100  	if err != nil {
  1101  		return nil, err
  1102  	}
  1103  
  1104  	cs := lib.CoordinateSet{"": lan}
  1105  	for name, segment := range s.segmentLAN {
  1106  		c, err := segment.GetCoordinate()
  1107  		if err != nil {
  1108  			return nil, err
  1109  		}
  1110  		cs[name] = c
  1111  	}
  1112  	return cs, nil
  1113  }
  1114  
  1115  // ReloadConfig is used to have the Server do an online reload of
  1116  // relevant configuration information
  1117  func (s *Server) ReloadConfig(config *Config) error {
  1118  	return nil
  1119  }
  1120  
  1121  // Atomically sets a readiness state flag when leadership is obtained, to indicate that server is past its barrier write
  1122  func (s *Server) setConsistentReadReady() {
  1123  	atomic.StoreInt32(&s.readyForConsistentReads, 1)
  1124  }
  1125  
  1126  // Atomically reset readiness state flag on leadership revoke
  1127  func (s *Server) resetConsistentReadReady() {
  1128  	atomic.StoreInt32(&s.readyForConsistentReads, 0)
  1129  }
  1130  
  1131  // Returns true if this server is ready to serve consistent reads
  1132  func (s *Server) isReadyForConsistentReads() bool {
  1133  	return atomic.LoadInt32(&s.readyForConsistentReads) == 1
  1134  }
  1135  
  1136  // peersInfoContent is used to help operators understand what happened to the
  1137  // peers.json file. This is written to a file called peers.info in the same
  1138  // location.
  1139  const peersInfoContent = `
  1140  As of Consul 0.7.0, the peers.json file is only used for recovery
  1141  after an outage. The format of this file depends on what the server has
  1142  configured for its Raft protocol version. Please see the agent configuration
  1143  page at https://www.consul.io/docs/agent/options.html#_raft_protocol for more
  1144  details about this parameter.
  1145  
  1146  For Raft protocol version 2 and earlier, this should be formatted as a JSON
  1147  array containing the address and port of each Consul server in the cluster, like
  1148  this:
  1149  
  1150  [
  1151    "10.1.0.1:8300",
  1152    "10.1.0.2:8300",
  1153    "10.1.0.3:8300"
  1154  ]
  1155  
  1156  For Raft protocol version 3 and later, this should be formatted as a JSON
  1157  array containing the node ID, address:port, and suffrage information of each
  1158  Consul server in the cluster, like this:
  1159  
  1160  [
  1161    {
  1162      "id": "adf4238a-882b-9ddc-4a9d-5b6758e4159e",
  1163      "address": "10.1.0.1:8300",
  1164      "non_voter": false
  1165    },
  1166    {
  1167      "id": "8b6dda82-3103-11e7-93ae-92361f002671",
  1168      "address": "10.1.0.2:8300",
  1169      "non_voter": false
  1170    },
  1171    {
  1172      "id": "97e17742-3103-11e7-93ae-92361f002671",
  1173      "address": "10.1.0.3:8300",
  1174      "non_voter": false
  1175    }
  1176  ]
  1177  
  1178  The "id" field is the node ID of the server. This can be found in the logs when
  1179  the server starts up, or in the "node-id" file inside the server's data
  1180  directory.
  1181  
  1182  The "address" field is the address and port of the server.
  1183  
  1184  The "non_voter" field controls whether the server is a non-voter, which is used
  1185  in some advanced Autopilot configurations, please see
  1186  https://www.consul.io/docs/guides/autopilot.html for more information. If
  1187  "non_voter" is omitted it will default to false, which is typical for most
  1188  clusters.
  1189  
  1190  Under normal operation, the peers.json file will not be present.
  1191  
  1192  When Consul starts for the first time, it will create this peers.info file and
  1193  delete any existing peers.json file so that recovery doesn't occur on the first
  1194  startup.
  1195  
  1196  Once this peers.info file is present, any peers.json file will be ingested at
  1197  startup, and will set the Raft peer configuration manually to recover from an
  1198  outage. It's crucial that all servers in the cluster are shut down before
  1199  creating the peers.json file, and that all servers receive the same
  1200  configuration. Once the peers.json file is successfully ingested and applied, it
  1201  will be deleted.
  1202  
  1203  Please see https://www.consul.io/docs/guides/outage.html for more information.
  1204  `