github.com/Aestek/consul@v1.2.4-0.20190309222502-b2c31e33971a/agent/consul/server.go (about)

     1  package consul
     2  
     3  import (
     4  	"context"
     5  	"crypto/tls"
     6  	"errors"
     7  	"fmt"
     8  	"io"
     9  	"io/ioutil"
    10  	"log"
    11  	"net"
    12  	"net/rpc"
    13  	"os"
    14  	"path/filepath"
    15  	"reflect"
    16  	"strconv"
    17  	"sync"
    18  	"sync/atomic"
    19  	"time"
    20  
    21  	ca "github.com/hashicorp/consul/agent/connect/ca"
    22  	"github.com/hashicorp/consul/agent/consul/autopilot"
    23  	"github.com/hashicorp/consul/agent/consul/fsm"
    24  	"github.com/hashicorp/consul/agent/consul/state"
    25  	"github.com/hashicorp/consul/agent/metadata"
    26  	"github.com/hashicorp/consul/agent/pool"
    27  	"github.com/hashicorp/consul/agent/router"
    28  	"github.com/hashicorp/consul/agent/structs"
    29  	"github.com/hashicorp/consul/agent/token"
    30  	"github.com/hashicorp/consul/lib"
    31  	"github.com/hashicorp/consul/sentinel"
    32  	"github.com/hashicorp/consul/tlsutil"
    33  	"github.com/hashicorp/consul/types"
    34  	"github.com/hashicorp/raft"
    35  	raftboltdb "github.com/hashicorp/raft-boltdb"
    36  	"github.com/hashicorp/serf/serf"
    37  )
    38  
    39  // These are the protocol versions that Consul can _understand_. These are
    40  // Consul-level protocol versions, that are used to configure the Serf
    41  // protocol versions.
    42  const (
    43  	ProtocolVersionMin uint8 = 2
    44  
    45  	// Version 3 added support for network coordinates but we kept the
    46  	// default protocol version at 2 to ease the transition to this new
    47  	// feature. A Consul agent speaking version 2 of the protocol will
    48  	// attempt to send its coordinates to a server who understands version
    49  	// 3 or greater.
    50  	ProtocolVersion2Compatible = 2
    51  
    52  	ProtocolVersionMax = 3
    53  )
    54  
    55  const (
    56  	serfLANSnapshot   = "serf/local.snapshot"
    57  	serfWANSnapshot   = "serf/remote.snapshot"
    58  	raftState         = "raft/"
    59  	snapshotsRetained = 2
    60  
    61  	// serverRPCCache controls how long we keep an idle connection
    62  	// open to a server
    63  	serverRPCCache = 2 * time.Minute
    64  
    65  	// serverMaxStreams controls how many idle streams we keep
    66  	// open to a server
    67  	serverMaxStreams = 64
    68  
    69  	// raftLogCacheSize is the maximum number of logs to cache in-memory.
    70  	// This is used to reduce disk I/O for the recently committed entries.
    71  	raftLogCacheSize = 512
    72  
    73  	// raftRemoveGracePeriod is how long we wait to allow a RemovePeer
    74  	// to replicate to gracefully leave the cluster.
    75  	raftRemoveGracePeriod = 5 * time.Second
    76  
    77  	// serfEventChSize is the size of the buffered channel to get Serf
    78  	// events. If this is exhausted we will block Serf and Memberlist.
    79  	serfEventChSize = 2048
    80  
    81  	// reconcileChSize is the size of the buffered channel reconcile updates
    82  	// from Serf with the Catalog. If this is exhausted we will drop updates,
    83  	// and wait for a periodic reconcile.
    84  	reconcileChSize = 256
    85  )
    86  
    87  var (
    88  	ErrWANFederationDisabled = fmt.Errorf("WAN Federation is disabled")
    89  )
    90  
    91  // Server is Consul server which manages the service discovery,
    92  // health checking, DC forwarding, Raft, and multiple Serf pools.
    93  type Server struct {
    94  	// sentinel is the Sentinel code engine (can be nil).
    95  	sentinel sentinel.Evaluator
    96  
    97  	// acls is used to resolve tokens to effective policies
    98  	acls *ACLResolver
    99  
   100  	// aclUpgradeCancel is used to cancel the ACL upgrade goroutine when we
   101  	// lose leadership
   102  	aclUpgradeCancel  context.CancelFunc
   103  	aclUpgradeLock    sync.RWMutex
   104  	aclUpgradeEnabled bool
   105  
   106  	// aclReplicationCancel is used to shut down the ACL replication goroutine
   107  	// when we lose leadership
   108  	aclReplicationCancel  context.CancelFunc
   109  	aclReplicationLock    sync.RWMutex
   110  	aclReplicationEnabled bool
   111  
   112  	// DEPRECATED (ACL-Legacy-Compat) - only needed while we support both
   113  	// useNewACLs is used to determine whether we can use new ACLs or not
   114  	useNewACLs int32
   115  
   116  	// autopilot is the Autopilot instance for this server.
   117  	autopilot *autopilot.Autopilot
   118  
   119  	// autopilotWaitGroup is used to block until Autopilot shuts down.
   120  	autopilotWaitGroup sync.WaitGroup
   121  
   122  	// caProvider is the current CA provider in use for Connect. This is
   123  	// only non-nil when we are the leader.
   124  	caProvider ca.Provider
   125  	// caProviderRoot is the CARoot that was stored along with the ca.Provider
   126  	// active. It's only updated in lock-step with the caProvider. This prevents
   127  	// races between state updates to active roots and the fetch of the provider
   128  	// instance.
   129  	caProviderRoot *structs.CARoot
   130  	caProviderLock sync.RWMutex
   131  
   132  	// caPruningCh is used to shut down the CA root pruning goroutine when we
   133  	// lose leadership.
   134  	caPruningCh      chan struct{}
   135  	caPruningLock    sync.RWMutex
   136  	caPruningEnabled bool
   137  
   138  	// Consul configuration
   139  	config *Config
   140  
   141  	// tokens holds ACL tokens initially from the configuration, but can
   142  	// be updated at runtime, so should always be used instead of going to
   143  	// the configuration directly.
   144  	tokens *token.Store
   145  
   146  	// Connection pool to other consul servers
   147  	connPool *pool.ConnPool
   148  
   149  	// eventChLAN is used to receive events from the
   150  	// serf cluster in the datacenter
   151  	eventChLAN chan serf.Event
   152  
   153  	// eventChWAN is used to receive events from the
   154  	// serf cluster that spans datacenters
   155  	eventChWAN chan serf.Event
   156  
   157  	// fsm is the state machine used with Raft to provide
   158  	// strong consistency.
   159  	fsm *fsm.FSM
   160  
   161  	// Logger uses the provided LogOutput
   162  	logger *log.Logger
   163  
   164  	// The raft instance is used among Consul nodes within the DC to protect
   165  	// operations that require strong consistency.
   166  	// the state directly.
   167  	raft          *raft.Raft
   168  	raftLayer     *RaftLayer
   169  	raftStore     *raftboltdb.BoltStore
   170  	raftTransport *raft.NetworkTransport
   171  	raftInmem     *raft.InmemStore
   172  
   173  	// raftNotifyCh is set up by setupRaft() and ensures that we get reliable leader
   174  	// transition notifications from the Raft layer.
   175  	raftNotifyCh <-chan bool
   176  
   177  	// reconcileCh is used to pass events from the serf handler
   178  	// into the leader manager, so that the strong state can be
   179  	// updated
   180  	reconcileCh chan serf.Member
   181  
   182  	// readyForConsistentReads is used to track when the leader server is
   183  	// ready to serve consistent reads, after it has applied its initial
   184  	// barrier. This is updated atomically.
   185  	readyForConsistentReads int32
   186  
   187  	// leaveCh is used to signal that the server is leaving the cluster
   188  	// and trying to shed its RPC traffic onto other Consul servers. This
   189  	// is only ever closed.
   190  	leaveCh chan struct{}
   191  
   192  	// router is used to map out Consul servers in the WAN and in Consul
   193  	// Enterprise user-defined areas.
   194  	router *router.Router
   195  
   196  	// Listener is used to listen for incoming connections
   197  	Listener  net.Listener
   198  	rpcServer *rpc.Server
   199  
   200  	// rpcTLS is the TLS config for incoming TLS requests
   201  	rpcTLS *tls.Config
   202  
   203  	// serfLAN is the Serf cluster maintained inside the DC
   204  	// which contains all the DC nodes
   205  	serfLAN *serf.Serf
   206  
   207  	// segmentLAN maps segment names to their Serf cluster
   208  	segmentLAN map[string]*serf.Serf
   209  
   210  	// serfWAN is the Serf cluster maintained between DC's
   211  	// which SHOULD only consist of Consul servers
   212  	serfWAN *serf.Serf
   213  
   214  	// serverLookup tracks server consuls in the local datacenter.
   215  	// Used to do leader forwarding and provide fast lookup by server id and address
   216  	serverLookup *ServerLookup
   217  
   218  	// floodLock controls access to floodCh.
   219  	floodLock sync.RWMutex
   220  	floodCh   []chan struct{}
   221  
   222  	// sessionTimers track the expiration time of each Session that has
   223  	// a TTL. On expiration, a SessionDestroy event will occur, and
   224  	// destroy the session via standard session destroy processing
   225  	sessionTimers *SessionTimers
   226  
   227  	// statsFetcher is used by autopilot to check the status of the other
   228  	// Consul router.
   229  	statsFetcher *StatsFetcher
   230  
   231  	// reassertLeaderCh is used to signal the leader loop should re-run
   232  	// leadership actions after a snapshot restore.
   233  	reassertLeaderCh chan chan error
   234  
   235  	// tombstoneGC is used to track the pending GC invocations
   236  	// for the KV tombstones
   237  	tombstoneGC *state.TombstoneGC
   238  
   239  	// aclReplicationStatus (and its associated lock) provide information
   240  	// about the health of the ACL replication goroutine.
   241  	aclReplicationStatus     structs.ACLReplicationStatus
   242  	aclReplicationStatusLock sync.RWMutex
   243  
   244  	// shutdown and the associated members here are used in orchestrating
   245  	// a clean shutdown. The shutdownCh is never written to, only closed to
   246  	// indicate a shutdown has been initiated.
   247  	shutdown     bool
   248  	shutdownCh   chan struct{}
   249  	shutdownLock sync.Mutex
   250  
   251  	// embedded struct to hold all the enterprise specific data
   252  	EnterpriseServer
   253  }
   254  
   255  func NewServer(config *Config) (*Server, error) {
   256  	return NewServerLogger(config, nil, new(token.Store), tlsutil.NewConfigurator(config.ToTLSUtilConfig()))
   257  }
   258  
   259  // NewServer is used to construct a new Consul server from the
   260  // configuration, potentially returning an error
   261  func NewServerLogger(config *Config, logger *log.Logger, tokens *token.Store, tlsConfigurator *tlsutil.Configurator) (*Server, error) {
   262  	// Check the protocol version.
   263  	if err := config.CheckProtocolVersion(); err != nil {
   264  		return nil, err
   265  	}
   266  
   267  	// Check for a data directory.
   268  	if config.DataDir == "" && !config.DevMode {
   269  		return nil, fmt.Errorf("Config must provide a DataDir")
   270  	}
   271  
   272  	// Sanity check the ACLs.
   273  	if err := config.CheckACL(); err != nil {
   274  		return nil, err
   275  	}
   276  
   277  	// Ensure we have a log output and create a logger.
   278  	if config.LogOutput == nil {
   279  		config.LogOutput = os.Stderr
   280  	}
   281  	if logger == nil {
   282  		logger = log.New(config.LogOutput, "", log.LstdFlags)
   283  	}
   284  
   285  	// Check if TLS is enabled
   286  	if config.CAFile != "" || config.CAPath != "" {
   287  		config.UseTLS = true
   288  	}
   289  
   290  	// Set the primary DC if it wasn't set.
   291  	if config.PrimaryDatacenter == "" {
   292  		if config.ACLDatacenter != "" {
   293  			config.PrimaryDatacenter = config.ACLDatacenter
   294  		} else {
   295  			config.PrimaryDatacenter = config.Datacenter
   296  		}
   297  	}
   298  
   299  	// Create the TLS wrapper for outgoing connections.
   300  	tlsWrap, err := tlsConfigurator.OutgoingRPCWrapper()
   301  	if err != nil {
   302  		return nil, err
   303  	}
   304  
   305  	// Get the incoming TLS config.
   306  	incomingTLS, err := tlsConfigurator.IncomingRPCConfig()
   307  	if err != nil {
   308  		return nil, err
   309  	}
   310  
   311  	// Create the tombstone GC.
   312  	gc, err := state.NewTombstoneGC(config.TombstoneTTL, config.TombstoneTTLGranularity)
   313  	if err != nil {
   314  		return nil, err
   315  	}
   316  
   317  	// Create the shutdown channel - this is closed but never written to.
   318  	shutdownCh := make(chan struct{})
   319  
   320  	connPool := &pool.ConnPool{
   321  		SrcAddr:    config.RPCSrcAddr,
   322  		LogOutput:  config.LogOutput,
   323  		MaxTime:    serverRPCCache,
   324  		MaxStreams: serverMaxStreams,
   325  		TLSWrapper: tlsWrap,
   326  		ForceTLS:   config.VerifyOutgoing,
   327  	}
   328  
   329  	// Create server.
   330  	s := &Server{
   331  		config:           config,
   332  		tokens:           tokens,
   333  		connPool:         connPool,
   334  		eventChLAN:       make(chan serf.Event, serfEventChSize),
   335  		eventChWAN:       make(chan serf.Event, serfEventChSize),
   336  		logger:           logger,
   337  		leaveCh:          make(chan struct{}),
   338  		reconcileCh:      make(chan serf.Member, reconcileChSize),
   339  		router:           router.NewRouter(logger, config.Datacenter),
   340  		rpcServer:        rpc.NewServer(),
   341  		rpcTLS:           incomingTLS,
   342  		reassertLeaderCh: make(chan chan error),
   343  		segmentLAN:       make(map[string]*serf.Serf, len(config.Segments)),
   344  		sessionTimers:    NewSessionTimers(),
   345  		tombstoneGC:      gc,
   346  		serverLookup:     NewServerLookup(),
   347  		shutdownCh:       shutdownCh,
   348  	}
   349  
   350  	// Initialize enterprise specific server functionality
   351  	if err := s.initEnterprise(); err != nil {
   352  		s.Shutdown()
   353  		return nil, err
   354  	}
   355  
   356  	// Initialize the stats fetcher that autopilot will use.
   357  	s.statsFetcher = NewStatsFetcher(logger, s.connPool, s.config.Datacenter)
   358  
   359  	s.sentinel = sentinel.New(logger)
   360  	s.useNewACLs = 0
   361  	aclConfig := ACLResolverConfig{
   362  		Config:      config,
   363  		Delegate:    s,
   364  		CacheConfig: serverACLCacheConfig,
   365  		AutoDisable: false,
   366  		Logger:      logger,
   367  		Sentinel:    s.sentinel,
   368  	}
   369  	// Initialize the ACL resolver.
   370  	if s.acls, err = NewACLResolver(&aclConfig); err != nil {
   371  		s.Shutdown()
   372  		return nil, fmt.Errorf("Failed to create ACL resolver: %v", err)
   373  	}
   374  
   375  	// Initialize the RPC layer.
   376  	if err := s.setupRPC(tlsWrap); err != nil {
   377  		s.Shutdown()
   378  		return nil, fmt.Errorf("Failed to start RPC layer: %v", err)
   379  	}
   380  
   381  	// Initialize any extra RPC listeners for segments.
   382  	segmentListeners, err := s.setupSegmentRPC()
   383  	if err != nil {
   384  		s.Shutdown()
   385  		return nil, fmt.Errorf("Failed to start segment RPC layer: %v", err)
   386  	}
   387  
   388  	// Initialize the Raft server.
   389  	if err := s.setupRaft(); err != nil {
   390  		s.Shutdown()
   391  		return nil, fmt.Errorf("Failed to start Raft: %v", err)
   392  	}
   393  
   394  	// Serf and dynamic bind ports
   395  	//
   396  	// The LAN serf cluster announces the port of the WAN serf cluster
   397  	// which creates a race when the WAN cluster is supposed to bind to
   398  	// a dynamic port (port 0). The current memberlist implementation will
   399  	// update the bind port in the configuration after the memberlist is
   400  	// created, so we can pull it out from there reliably, even though it's
   401  	// a little gross to be reading the updated config.
   402  
   403  	// Initialize the WAN Serf if enabled
   404  	serfBindPortWAN := -1
   405  	if config.SerfWANConfig != nil {
   406  		serfBindPortWAN = config.SerfWANConfig.MemberlistConfig.BindPort
   407  		s.serfWAN, err = s.setupSerf(config.SerfWANConfig, s.eventChWAN, serfWANSnapshot, true, serfBindPortWAN, "", s.Listener)
   408  		if err != nil {
   409  			s.Shutdown()
   410  			return nil, fmt.Errorf("Failed to start WAN Serf: %v", err)
   411  		}
   412  		// See big comment above why we are doing this.
   413  		if serfBindPortWAN == 0 {
   414  			serfBindPortWAN = config.SerfWANConfig.MemberlistConfig.BindPort
   415  			if serfBindPortWAN == 0 {
   416  				return nil, fmt.Errorf("Failed to get dynamic bind port for WAN Serf")
   417  			}
   418  			s.logger.Printf("[INFO] agent: Serf WAN TCP bound to port %d", serfBindPortWAN)
   419  		}
   420  	}
   421  
   422  	// Initialize the LAN segments before the default LAN Serf so we have
   423  	// updated port information to publish there.
   424  	if err := s.setupSegments(config, serfBindPortWAN, segmentListeners); err != nil {
   425  		s.Shutdown()
   426  		return nil, fmt.Errorf("Failed to setup network segments: %v", err)
   427  	}
   428  
   429  	// Initialize the LAN Serf for the default network segment.
   430  	s.serfLAN, err = s.setupSerf(config.SerfLANConfig, s.eventChLAN, serfLANSnapshot, false, serfBindPortWAN, "", s.Listener)
   431  	if err != nil {
   432  		s.Shutdown()
   433  		return nil, fmt.Errorf("Failed to start LAN Serf: %v", err)
   434  	}
   435  	go s.lanEventHandler()
   436  
   437  	// Start the flooders after the LAN event handler is wired up.
   438  	s.floodSegments(config)
   439  
   440  	// Add a "static route" to the WAN Serf and hook it up to Serf events.
   441  	if s.serfWAN != nil {
   442  		if err := s.router.AddArea(types.AreaWAN, s.serfWAN, s.connPool, s.config.VerifyOutgoing); err != nil {
   443  			s.Shutdown()
   444  			return nil, fmt.Errorf("Failed to add WAN serf route: %v", err)
   445  		}
   446  		go router.HandleSerfEvents(s.logger, s.router, types.AreaWAN, s.serfWAN.ShutdownCh(), s.eventChWAN)
   447  
   448  		// Fire up the LAN <-> WAN join flooder.
   449  		portFn := func(s *metadata.Server) (int, bool) {
   450  			if s.WanJoinPort > 0 {
   451  				return s.WanJoinPort, true
   452  			}
   453  			return 0, false
   454  		}
   455  		go s.Flood(nil, portFn, s.serfWAN)
   456  	}
   457  
   458  	// Start enterprise specific functionality
   459  	if err := s.startEnterprise(); err != nil {
   460  		s.Shutdown()
   461  		return nil, err
   462  	}
   463  
   464  	// Initialize Autopilot. This must happen before starting leadership monitoring
   465  	// as establishing leadership could attempt to use autopilot and cause a panic.
   466  	s.initAutopilot(config)
   467  
   468  	// Start monitoring leadership. This must happen after Serf is set up
   469  	// since it can fire events when leadership is obtained.
   470  	go s.monitorLeadership()
   471  
   472  	// Start listening for RPC requests.
   473  	go s.listen(s.Listener)
   474  
   475  	// Start listeners for any segments with separate RPC listeners.
   476  	for _, listener := range segmentListeners {
   477  		go s.listen(listener)
   478  	}
   479  
   480  	// Start the metrics handlers.
   481  	go s.sessionStats()
   482  
   483  	return s, nil
   484  }
   485  
   486  // setupRaft is used to setup and initialize Raft
   487  func (s *Server) setupRaft() error {
   488  	// If we have an unclean exit then attempt to close the Raft store.
   489  	defer func() {
   490  		if s.raft == nil && s.raftStore != nil {
   491  			if err := s.raftStore.Close(); err != nil {
   492  				s.logger.Printf("[ERR] consul: failed to close Raft store: %v", err)
   493  			}
   494  		}
   495  	}()
   496  
   497  	// Create the FSM.
   498  	var err error
   499  	s.fsm, err = fsm.New(s.tombstoneGC, s.config.LogOutput)
   500  	if err != nil {
   501  		return err
   502  	}
   503  
   504  	var serverAddressProvider raft.ServerAddressProvider = nil
   505  	if s.config.RaftConfig.ProtocolVersion >= 3 { //ServerAddressProvider needs server ids to work correctly, which is only supported in protocol version 3 or higher
   506  		serverAddressProvider = s.serverLookup
   507  	}
   508  
   509  	// Create a transport layer.
   510  	transConfig := &raft.NetworkTransportConfig{
   511  		Stream:                s.raftLayer,
   512  		MaxPool:               3,
   513  		Timeout:               10 * time.Second,
   514  		ServerAddressProvider: serverAddressProvider,
   515  		Logger:                s.logger,
   516  	}
   517  
   518  	trans := raft.NewNetworkTransportWithConfig(transConfig)
   519  	s.raftTransport = trans
   520  
   521  	// Make sure we set the LogOutput.
   522  	s.config.RaftConfig.LogOutput = s.config.LogOutput
   523  	s.config.RaftConfig.Logger = s.logger
   524  
   525  	// Versions of the Raft protocol below 3 require the LocalID to match the network
   526  	// address of the transport.
   527  	s.config.RaftConfig.LocalID = raft.ServerID(trans.LocalAddr())
   528  	if s.config.RaftConfig.ProtocolVersion >= 3 {
   529  		s.config.RaftConfig.LocalID = raft.ServerID(s.config.NodeID)
   530  	}
   531  
   532  	// Build an all in-memory setup for dev mode, otherwise prepare a full
   533  	// disk-based setup.
   534  	var log raft.LogStore
   535  	var stable raft.StableStore
   536  	var snap raft.SnapshotStore
   537  	if s.config.DevMode {
   538  		store := raft.NewInmemStore()
   539  		s.raftInmem = store
   540  		stable = store
   541  		log = store
   542  		snap = raft.NewInmemSnapshotStore()
   543  	} else {
   544  		// Create the base raft path.
   545  		path := filepath.Join(s.config.DataDir, raftState)
   546  		if err := lib.EnsurePath(path, true); err != nil {
   547  			return err
   548  		}
   549  
   550  		// Create the backend raft store for logs and stable storage.
   551  		store, err := raftboltdb.NewBoltStore(filepath.Join(path, "raft.db"))
   552  		if err != nil {
   553  			return err
   554  		}
   555  		s.raftStore = store
   556  		stable = store
   557  
   558  		// Wrap the store in a LogCache to improve performance.
   559  		cacheStore, err := raft.NewLogCache(raftLogCacheSize, store)
   560  		if err != nil {
   561  			return err
   562  		}
   563  		log = cacheStore
   564  
   565  		// Create the snapshot store.
   566  		snapshots, err := raft.NewFileSnapshotStore(path, snapshotsRetained, s.config.LogOutput)
   567  		if err != nil {
   568  			return err
   569  		}
   570  		snap = snapshots
   571  
   572  		// For an existing cluster being upgraded to the new version of
   573  		// Raft, we almost never want to run recovery based on the old
   574  		// peers.json file. We create a peers.info file with a helpful
   575  		// note about where peers.json went, and use that as a sentinel
   576  		// to avoid ingesting the old one that first time (if we have to
   577  		// create the peers.info file because it's not there, we also
   578  		// blow away any existing peers.json file).
   579  		peersFile := filepath.Join(path, "peers.json")
   580  		peersInfoFile := filepath.Join(path, "peers.info")
   581  		if _, err := os.Stat(peersInfoFile); os.IsNotExist(err) {
   582  			if err := ioutil.WriteFile(peersInfoFile, []byte(peersInfoContent), 0755); err != nil {
   583  				return fmt.Errorf("failed to write peers.info file: %v", err)
   584  			}
   585  
   586  			// Blow away the peers.json file if present, since the
   587  			// peers.info sentinel wasn't there.
   588  			if _, err := os.Stat(peersFile); err == nil {
   589  				if err := os.Remove(peersFile); err != nil {
   590  					return fmt.Errorf("failed to delete peers.json, please delete manually (see peers.info for details): %v", err)
   591  				}
   592  				s.logger.Printf("[INFO] consul: deleted peers.json file (see peers.info for details)")
   593  			}
   594  		} else if _, err := os.Stat(peersFile); err == nil {
   595  			s.logger.Printf("[INFO] consul: found peers.json file, recovering Raft configuration...")
   596  
   597  			var configuration raft.Configuration
   598  			if s.config.RaftConfig.ProtocolVersion < 3 {
   599  				configuration, err = raft.ReadPeersJSON(peersFile)
   600  			} else {
   601  				configuration, err = raft.ReadConfigJSON(peersFile)
   602  			}
   603  			if err != nil {
   604  				return fmt.Errorf("recovery failed to parse peers.json: %v", err)
   605  			}
   606  
   607  			tmpFsm, err := fsm.New(s.tombstoneGC, s.config.LogOutput)
   608  			if err != nil {
   609  				return fmt.Errorf("recovery failed to make temp FSM: %v", err)
   610  			}
   611  			if err := raft.RecoverCluster(s.config.RaftConfig, tmpFsm,
   612  				log, stable, snap, trans, configuration); err != nil {
   613  				return fmt.Errorf("recovery failed: %v", err)
   614  			}
   615  
   616  			if err := os.Remove(peersFile); err != nil {
   617  				return fmt.Errorf("recovery failed to delete peers.json, please delete manually (see peers.info for details): %v", err)
   618  			}
   619  			s.logger.Printf("[INFO] consul: deleted peers.json file after successful recovery")
   620  		}
   621  	}
   622  
   623  	// If we are in bootstrap or dev mode and the state is clean then we can
   624  	// bootstrap now.
   625  	if s.config.Bootstrap || s.config.DevMode {
   626  		hasState, err := raft.HasExistingState(log, stable, snap)
   627  		if err != nil {
   628  			return err
   629  		}
   630  		if !hasState {
   631  			configuration := raft.Configuration{
   632  				Servers: []raft.Server{
   633  					raft.Server{
   634  						ID:      s.config.RaftConfig.LocalID,
   635  						Address: trans.LocalAddr(),
   636  					},
   637  				},
   638  			}
   639  			if err := raft.BootstrapCluster(s.config.RaftConfig,
   640  				log, stable, snap, trans, configuration); err != nil {
   641  				return err
   642  			}
   643  		}
   644  	}
   645  
   646  	// Set up a channel for reliable leader notifications.
   647  	raftNotifyCh := make(chan bool, 1)
   648  	s.config.RaftConfig.NotifyCh = raftNotifyCh
   649  	s.raftNotifyCh = raftNotifyCh
   650  
   651  	// Setup the Raft store.
   652  	s.raft, err = raft.NewRaft(s.config.RaftConfig, s.fsm, log, stable, snap, trans)
   653  	if err != nil {
   654  		return err
   655  	}
   656  	return nil
   657  }
   658  
   659  // endpointFactory is a function that returns an RPC endpoint bound to the given
   660  // server.
   661  type factory func(s *Server) interface{}
   662  
   663  // endpoints is a list of registered RPC endpoint factories.
   664  var endpoints []factory
   665  
   666  // registerEndpoint registers a new RPC endpoint factory.
   667  func registerEndpoint(fn factory) {
   668  	endpoints = append(endpoints, fn)
   669  }
   670  
   671  // setupRPC is used to setup the RPC listener
   672  func (s *Server) setupRPC(tlsWrap tlsutil.DCWrapper) error {
   673  	for _, fn := range endpoints {
   674  		s.rpcServer.Register(fn(s))
   675  	}
   676  
   677  	ln, err := net.ListenTCP("tcp", s.config.RPCAddr)
   678  	if err != nil {
   679  		return err
   680  	}
   681  	s.Listener = ln
   682  	if s.config.NotifyListen != nil {
   683  		s.config.NotifyListen()
   684  	}
   685  	// todo(fs): we should probably guard this
   686  	if s.config.RPCAdvertise == nil {
   687  		s.config.RPCAdvertise = ln.Addr().(*net.TCPAddr)
   688  	}
   689  
   690  	// Verify that we have a usable advertise address
   691  	if s.config.RPCAdvertise.IP.IsUnspecified() {
   692  		ln.Close()
   693  		return fmt.Errorf("RPC advertise address is not advertisable: %v", s.config.RPCAdvertise)
   694  	}
   695  
   696  	// Provide a DC specific wrapper. Raft replication is only
   697  	// ever done in the same datacenter, so we can provide it as a constant.
   698  	wrapper := tlsutil.SpecificDC(s.config.Datacenter, tlsWrap)
   699  
   700  	// Define a callback for determining whether to wrap a connection with TLS
   701  	tlsFunc := func(address raft.ServerAddress) bool {
   702  		if s.config.VerifyOutgoing {
   703  			return true
   704  		}
   705  
   706  		server := s.serverLookup.Server(address)
   707  
   708  		if server == nil {
   709  			return false
   710  		}
   711  
   712  		return server.UseTLS
   713  	}
   714  	s.raftLayer = NewRaftLayer(s.config.RPCSrcAddr, s.config.RPCAdvertise, wrapper, tlsFunc)
   715  	return nil
   716  }
   717  
   718  // Shutdown is used to shutdown the server
   719  func (s *Server) Shutdown() error {
   720  	s.logger.Printf("[INFO] consul: shutting down server")
   721  	s.shutdownLock.Lock()
   722  	defer s.shutdownLock.Unlock()
   723  
   724  	if s.shutdown {
   725  		return nil
   726  	}
   727  
   728  	s.shutdown = true
   729  	close(s.shutdownCh)
   730  
   731  	if s.serfLAN != nil {
   732  		s.serfLAN.Shutdown()
   733  	}
   734  
   735  	if s.serfWAN != nil {
   736  		s.serfWAN.Shutdown()
   737  		if err := s.router.RemoveArea(types.AreaWAN); err != nil {
   738  			s.logger.Printf("[WARN] consul: error removing WAN area: %v", err)
   739  		}
   740  	}
   741  	s.router.Shutdown()
   742  
   743  	if s.raft != nil {
   744  		s.raftTransport.Close()
   745  		s.raftLayer.Close()
   746  		future := s.raft.Shutdown()
   747  		if err := future.Error(); err != nil {
   748  			s.logger.Printf("[WARN] consul: error shutting down raft: %s", err)
   749  		}
   750  		if s.raftStore != nil {
   751  			s.raftStore.Close()
   752  		}
   753  	}
   754  
   755  	if s.Listener != nil {
   756  		s.Listener.Close()
   757  	}
   758  
   759  	// Close the connection pool
   760  	s.connPool.Shutdown()
   761  
   762  	return nil
   763  }
   764  
   765  // Leave is used to prepare for a graceful shutdown of the server
   766  func (s *Server) Leave() error {
   767  	s.logger.Printf("[INFO] consul: server starting leave")
   768  
   769  	// Check the number of known peers
   770  	numPeers, err := s.numPeers()
   771  	if err != nil {
   772  		s.logger.Printf("[ERR] consul: failed to check raft peers: %v", err)
   773  		return err
   774  	}
   775  
   776  	addr := s.raftTransport.LocalAddr()
   777  
   778  	// If we are the current leader, and we have any other peers (cluster has multiple
   779  	// servers), we should do a RemoveServer/RemovePeer to safely reduce the quorum size.
   780  	// If we are not the leader, then we should issue our leave intention and wait to be
   781  	// removed for some sane period of time.
   782  	isLeader := s.IsLeader()
   783  	if isLeader && numPeers > 1 {
   784  		minRaftProtocol, err := s.autopilot.MinRaftProtocol()
   785  		if err != nil {
   786  			return err
   787  		}
   788  
   789  		if minRaftProtocol >= 2 && s.config.RaftConfig.ProtocolVersion >= 3 {
   790  			future := s.raft.RemoveServer(raft.ServerID(s.config.NodeID), 0, 0)
   791  			if err := future.Error(); err != nil {
   792  				s.logger.Printf("[ERR] consul: failed to remove ourself as raft peer: %v", err)
   793  			}
   794  		} else {
   795  			future := s.raft.RemovePeer(addr)
   796  			if err := future.Error(); err != nil {
   797  				s.logger.Printf("[ERR] consul: failed to remove ourself as raft peer: %v", err)
   798  			}
   799  		}
   800  	}
   801  
   802  	// Leave the WAN pool
   803  	if s.serfWAN != nil {
   804  		if err := s.serfWAN.Leave(); err != nil {
   805  			s.logger.Printf("[ERR] consul: failed to leave WAN Serf cluster: %v", err)
   806  		}
   807  	}
   808  
   809  	// Leave the LAN pool
   810  	if s.serfLAN != nil {
   811  		if err := s.serfLAN.Leave(); err != nil {
   812  			s.logger.Printf("[ERR] consul: failed to leave LAN Serf cluster: %v", err)
   813  		}
   814  	}
   815  
   816  	// Start refusing RPCs now that we've left the LAN pool. It's important
   817  	// to do this *after* we've left the LAN pool so that clients will know
   818  	// to shift onto another server if they perform a retry. We also wake up
   819  	// all queries in the RPC retry state.
   820  	s.logger.Printf("[INFO] consul: Waiting %s to drain RPC traffic", s.config.LeaveDrainTime)
   821  	close(s.leaveCh)
   822  	time.Sleep(s.config.LeaveDrainTime)
   823  
   824  	// If we were not leader, wait to be safely removed from the cluster. We
   825  	// must wait to allow the raft replication to take place, otherwise an
   826  	// immediate shutdown could cause a loss of quorum.
   827  	if !isLeader {
   828  		left := false
   829  		limit := time.Now().Add(raftRemoveGracePeriod)
   830  		for !left && time.Now().Before(limit) {
   831  			// Sleep a while before we check.
   832  			time.Sleep(50 * time.Millisecond)
   833  
   834  			// Get the latest configuration.
   835  			future := s.raft.GetConfiguration()
   836  			if err := future.Error(); err != nil {
   837  				s.logger.Printf("[ERR] consul: failed to get raft configuration: %v", err)
   838  				break
   839  			}
   840  
   841  			// See if we are no longer included.
   842  			left = true
   843  			for _, server := range future.Configuration().Servers {
   844  				if server.Address == addr {
   845  					left = false
   846  					break
   847  				}
   848  			}
   849  		}
   850  
   851  		// TODO (slackpad) With the old Raft library we used to force the
   852  		// peers set to empty when a graceful leave occurred. This would
   853  		// keep voting spam down if the server was restarted, but it was
   854  		// dangerous because the peers was inconsistent with the logs and
   855  		// snapshots, so it wasn't really safe in all cases for the server
   856  		// to become leader. This is now safe, but the log spam is noisy.
   857  		// The next new version of the library will have a "you are not a
   858  		// peer stop it" behavior that should address this. We will have
   859  		// to evaluate during the RC period if this interim situation is
   860  		// not too confusing for operators.
   861  
   862  		// TODO (slackpad) When we take a later new version of the Raft
   863  		// library it won't try to complete replication, so this peer
   864  		// may not realize that it has been removed. Need to revisit this
   865  		// and the warning here.
   866  		if !left {
   867  			s.logger.Printf("[WARN] consul: failed to leave raft configuration gracefully, timeout")
   868  		}
   869  	}
   870  
   871  	return nil
   872  }
   873  
   874  // numPeers is used to check on the number of known peers, including potentially
   875  // the local node. We count only voters, since others can't actually become
   876  // leader, so aren't considered peers.
   877  func (s *Server) numPeers() (int, error) {
   878  	future := s.raft.GetConfiguration()
   879  	if err := future.Error(); err != nil {
   880  		return 0, err
   881  	}
   882  
   883  	return autopilot.NumPeers(future.Configuration()), nil
   884  }
   885  
   886  // JoinLAN is used to have Consul join the inner-DC pool
   887  // The target address should be another node inside the DC
   888  // listening on the Serf LAN address
   889  func (s *Server) JoinLAN(addrs []string) (int, error) {
   890  	return s.serfLAN.Join(addrs, true)
   891  }
   892  
   893  // JoinWAN is used to have Consul join the cross-WAN Consul ring
   894  // The target address should be another node listening on the
   895  // Serf WAN address
   896  func (s *Server) JoinWAN(addrs []string) (int, error) {
   897  	if s.serfWAN == nil {
   898  		return 0, ErrWANFederationDisabled
   899  	}
   900  	return s.serfWAN.Join(addrs, true)
   901  }
   902  
   903  // LocalMember is used to return the local node
   904  func (s *Server) LocalMember() serf.Member {
   905  	return s.serfLAN.LocalMember()
   906  }
   907  
   908  // LANMembers is used to return the members of the LAN cluster
   909  func (s *Server) LANMembers() []serf.Member {
   910  	return s.serfLAN.Members()
   911  }
   912  
   913  // WANMembers is used to return the members of the LAN cluster
   914  func (s *Server) WANMembers() []serf.Member {
   915  	if s.serfWAN == nil {
   916  		return nil
   917  	}
   918  	return s.serfWAN.Members()
   919  }
   920  
   921  // RemoveFailedNode is used to remove a failed node from the cluster
   922  func (s *Server) RemoveFailedNode(node string) error {
   923  	if err := s.serfLAN.RemoveFailedNode(node); err != nil {
   924  		return err
   925  	}
   926  	if s.serfWAN != nil {
   927  		if err := s.serfWAN.RemoveFailedNode(node); err != nil {
   928  			return err
   929  		}
   930  	}
   931  	return nil
   932  }
   933  
   934  // IsLeader checks if this server is the cluster leader
   935  func (s *Server) IsLeader() bool {
   936  	return s.raft.State() == raft.Leader
   937  }
   938  
   939  // KeyManagerLAN returns the LAN Serf keyring manager
   940  func (s *Server) KeyManagerLAN() *serf.KeyManager {
   941  	return s.serfLAN.KeyManager()
   942  }
   943  
   944  // KeyManagerWAN returns the WAN Serf keyring manager
   945  func (s *Server) KeyManagerWAN() *serf.KeyManager {
   946  	return s.serfWAN.KeyManager()
   947  }
   948  
   949  // Encrypted determines if gossip is encrypted
   950  func (s *Server) Encrypted() bool {
   951  	LANEncrypted := s.serfLAN.EncryptionEnabled()
   952  	if s.serfWAN == nil {
   953  		return LANEncrypted
   954  	}
   955  	return LANEncrypted && s.serfWAN.EncryptionEnabled()
   956  }
   957  
   958  // LANSegments returns a map of LAN segments by name
   959  func (s *Server) LANSegments() map[string]*serf.Serf {
   960  	segments := make(map[string]*serf.Serf, len(s.segmentLAN)+1)
   961  	segments[""] = s.serfLAN
   962  	for name, segment := range s.segmentLAN {
   963  		segments[name] = segment
   964  	}
   965  
   966  	return segments
   967  }
   968  
   969  // inmemCodec is used to do an RPC call without going over a network
   970  type inmemCodec struct {
   971  	method string
   972  	args   interface{}
   973  	reply  interface{}
   974  	err    error
   975  }
   976  
   977  func (i *inmemCodec) ReadRequestHeader(req *rpc.Request) error {
   978  	req.ServiceMethod = i.method
   979  	return nil
   980  }
   981  
   982  func (i *inmemCodec) ReadRequestBody(args interface{}) error {
   983  	sourceValue := reflect.Indirect(reflect.Indirect(reflect.ValueOf(i.args)))
   984  	dst := reflect.Indirect(reflect.Indirect(reflect.ValueOf(args)))
   985  	dst.Set(sourceValue)
   986  	return nil
   987  }
   988  
   989  func (i *inmemCodec) WriteResponse(resp *rpc.Response, reply interface{}) error {
   990  	if resp.Error != "" {
   991  		i.err = errors.New(resp.Error)
   992  		return nil
   993  	}
   994  	sourceValue := reflect.Indirect(reflect.Indirect(reflect.ValueOf(reply)))
   995  	dst := reflect.Indirect(reflect.Indirect(reflect.ValueOf(i.reply)))
   996  	dst.Set(sourceValue)
   997  	return nil
   998  }
   999  
  1000  func (i *inmemCodec) Close() error {
  1001  	return nil
  1002  }
  1003  
  1004  // RPC is used to make a local RPC call
  1005  func (s *Server) RPC(method string, args interface{}, reply interface{}) error {
  1006  	codec := &inmemCodec{
  1007  		method: method,
  1008  		args:   args,
  1009  		reply:  reply,
  1010  	}
  1011  	if err := s.rpcServer.ServeRequest(codec); err != nil {
  1012  		return err
  1013  	}
  1014  	return codec.err
  1015  }
  1016  
  1017  // SnapshotRPC dispatches the given snapshot request, reading from the streaming
  1018  // input and writing to the streaming output depending on the operation.
  1019  func (s *Server) SnapshotRPC(args *structs.SnapshotRequest, in io.Reader, out io.Writer,
  1020  	replyFn structs.SnapshotReplyFn) error {
  1021  
  1022  	// Perform the operation.
  1023  	var reply structs.SnapshotResponse
  1024  	snap, err := s.dispatchSnapshotRequest(args, in, &reply)
  1025  	if err != nil {
  1026  		return err
  1027  	}
  1028  	defer func() {
  1029  		if err := snap.Close(); err != nil {
  1030  			s.logger.Printf("[ERR] consul: Failed to close snapshot: %v", err)
  1031  		}
  1032  	}()
  1033  
  1034  	// Let the caller peek at the reply.
  1035  	if replyFn != nil {
  1036  		if err := replyFn(&reply); err != nil {
  1037  			return nil
  1038  		}
  1039  	}
  1040  
  1041  	// Stream the snapshot.
  1042  	if out != nil {
  1043  		if _, err := io.Copy(out, snap); err != nil {
  1044  			return fmt.Errorf("failed to stream snapshot: %v", err)
  1045  		}
  1046  	}
  1047  	return nil
  1048  }
  1049  
  1050  // RegisterEndpoint is used to substitute an endpoint for testing.
  1051  func (s *Server) RegisterEndpoint(name string, handler interface{}) error {
  1052  	s.logger.Printf("[WARN] consul: endpoint injected; this should only be used for testing")
  1053  	return s.rpcServer.RegisterName(name, handler)
  1054  }
  1055  
  1056  // Stats is used to return statistics for debugging and insight
  1057  // for various sub-systems
  1058  func (s *Server) Stats() map[string]map[string]string {
  1059  	toString := func(v uint64) string {
  1060  		return strconv.FormatUint(v, 10)
  1061  	}
  1062  	numKnownDCs := len(s.router.GetDatacenters())
  1063  	stats := map[string]map[string]string{
  1064  		"consul": map[string]string{
  1065  			"server":            "true",
  1066  			"leader":            fmt.Sprintf("%v", s.IsLeader()),
  1067  			"leader_addr":       string(s.raft.Leader()),
  1068  			"bootstrap":         fmt.Sprintf("%v", s.config.Bootstrap),
  1069  			"known_datacenters": toString(uint64(numKnownDCs)),
  1070  		},
  1071  		"raft":     s.raft.Stats(),
  1072  		"serf_lan": s.serfLAN.Stats(),
  1073  		"runtime":  runtimeStats(),
  1074  	}
  1075  
  1076  	if s.ACLsEnabled() {
  1077  		if s.UseLegacyACLs() {
  1078  			stats["consul"]["acl"] = "legacy"
  1079  		} else {
  1080  			stats["consul"]["acl"] = "enabled"
  1081  		}
  1082  	} else {
  1083  		stats["consul"]["acl"] = "disabled"
  1084  	}
  1085  
  1086  	if s.serfWAN != nil {
  1087  		stats["serf_wan"] = s.serfWAN.Stats()
  1088  	}
  1089  
  1090  	for outerKey, outerValue := range s.enterpriseStats() {
  1091  		if _, ok := stats[outerKey]; ok {
  1092  			for innerKey, innerValue := range outerValue {
  1093  				stats[outerKey][innerKey] = innerValue
  1094  			}
  1095  		} else {
  1096  			stats[outerKey] = outerValue
  1097  		}
  1098  	}
  1099  
  1100  	return stats
  1101  }
  1102  
  1103  // GetLANCoordinate returns the coordinate of the server in the LAN gossip pool.
  1104  func (s *Server) GetLANCoordinate() (lib.CoordinateSet, error) {
  1105  	lan, err := s.serfLAN.GetCoordinate()
  1106  	if err != nil {
  1107  		return nil, err
  1108  	}
  1109  
  1110  	cs := lib.CoordinateSet{"": lan}
  1111  	for name, segment := range s.segmentLAN {
  1112  		c, err := segment.GetCoordinate()
  1113  		if err != nil {
  1114  			return nil, err
  1115  		}
  1116  		cs[name] = c
  1117  	}
  1118  	return cs, nil
  1119  }
  1120  
  1121  // ReloadConfig is used to have the Server do an online reload of
  1122  // relevant configuration information
  1123  func (s *Server) ReloadConfig(config *Config) error {
  1124  	return nil
  1125  }
  1126  
  1127  // Atomically sets a readiness state flag when leadership is obtained, to indicate that server is past its barrier write
  1128  func (s *Server) setConsistentReadReady() {
  1129  	atomic.StoreInt32(&s.readyForConsistentReads, 1)
  1130  }
  1131  
  1132  // Atomically reset readiness state flag on leadership revoke
  1133  func (s *Server) resetConsistentReadReady() {
  1134  	atomic.StoreInt32(&s.readyForConsistentReads, 0)
  1135  }
  1136  
  1137  // Returns true if this server is ready to serve consistent reads
  1138  func (s *Server) isReadyForConsistentReads() bool {
  1139  	return atomic.LoadInt32(&s.readyForConsistentReads) == 1
  1140  }
  1141  
  1142  // peersInfoContent is used to help operators understand what happened to the
  1143  // peers.json file. This is written to a file called peers.info in the same
  1144  // location.
  1145  const peersInfoContent = `
  1146  As of Consul 0.7.0, the peers.json file is only used for recovery
  1147  after an outage. The format of this file depends on what the server has
  1148  configured for its Raft protocol version. Please see the agent configuration
  1149  page at https://www.consul.io/docs/agent/options.html#_raft_protocol for more
  1150  details about this parameter.
  1151  
  1152  For Raft protocol version 2 and earlier, this should be formatted as a JSON
  1153  array containing the address and port of each Consul server in the cluster, like
  1154  this:
  1155  
  1156  [
  1157    "10.1.0.1:8300",
  1158    "10.1.0.2:8300",
  1159    "10.1.0.3:8300"
  1160  ]
  1161  
  1162  For Raft protocol version 3 and later, this should be formatted as a JSON
  1163  array containing the node ID, address:port, and suffrage information of each
  1164  Consul server in the cluster, like this:
  1165  
  1166  [
  1167    {
  1168      "id": "adf4238a-882b-9ddc-4a9d-5b6758e4159e",
  1169      "address": "10.1.0.1:8300",
  1170      "non_voter": false
  1171    },
  1172    {
  1173      "id": "8b6dda82-3103-11e7-93ae-92361f002671",
  1174      "address": "10.1.0.2:8300",
  1175      "non_voter": false
  1176    },
  1177    {
  1178      "id": "97e17742-3103-11e7-93ae-92361f002671",
  1179      "address": "10.1.0.3:8300",
  1180      "non_voter": false
  1181    }
  1182  ]
  1183  
  1184  The "id" field is the node ID of the server. This can be found in the logs when
  1185  the server starts up, or in the "node-id" file inside the server's data
  1186  directory.
  1187  
  1188  The "address" field is the address and port of the server.
  1189  
  1190  The "non_voter" field controls whether the server is a non-voter, which is used
  1191  in some advanced Autopilot configurations, please see
  1192  https://www.consul.io/docs/guides/autopilot.html for more information. If
  1193  "non_voter" is omitted it will default to false, which is typical for most
  1194  clusters.
  1195  
  1196  Under normal operation, the peers.json file will not be present.
  1197  
  1198  When Consul starts for the first time, it will create this peers.info file and
  1199  delete any existing peers.json file so that recovery doesn't occur on the first
  1200  startup.
  1201  
  1202  Once this peers.info file is present, any peers.json file will be ingested at
  1203  startup, and will set the Raft peer configuration manually to recover from an
  1204  outage. It's crucial that all servers in the cluster are shut down before
  1205  creating the peers.json file, and that all servers receive the same
  1206  configuration. Once the peers.json file is successfully ingested and applied, it
  1207  will be deleted.
  1208  
  1209  Please see https://www.consul.io/docs/guides/outage.html for more information.
  1210  `