github.com/criteo-forks/consul@v1.4.5-criteonogrpc/agent/consul/server.go (about)

     1  package consul
     2  
     3  import (
     4  	"context"
     5  	"crypto/tls"
     6  	"errors"
     7  	"fmt"
     8  	"io"
     9  	"io/ioutil"
    10  	"log"
    11  	"net"
    12  	"net/rpc"
    13  	"os"
    14  	"path/filepath"
    15  	"reflect"
    16  	"strconv"
    17  	"sync"
    18  	"sync/atomic"
    19  	"time"
    20  
    21  	ca "github.com/hashicorp/consul/agent/connect/ca"
    22  	"github.com/hashicorp/consul/agent/consul/autopilot"
    23  	"github.com/hashicorp/consul/agent/consul/fsm"
    24  	"github.com/hashicorp/consul/agent/consul/state"
    25  	"github.com/hashicorp/consul/agent/metadata"
    26  	"github.com/hashicorp/consul/agent/pool"
    27  	"github.com/hashicorp/consul/agent/router"
    28  	"github.com/hashicorp/consul/agent/structs"
    29  	"github.com/hashicorp/consul/agent/token"
    30  	"github.com/hashicorp/consul/lib"
    31  	"github.com/hashicorp/consul/sentinel"
    32  	"github.com/hashicorp/consul/tlsutil"
    33  	"github.com/hashicorp/consul/types"
    34  	"github.com/hashicorp/raft"
    35  	raftboltdb "github.com/hashicorp/raft-boltdb"
    36  	"github.com/hashicorp/serf/serf"
    37  )
    38  
    39  // These are the protocol versions that Consul can _understand_. These are
    40  // Consul-level protocol versions, that are used to configure the Serf
    41  // protocol versions.
    42  const (
    43  	ProtocolVersionMin uint8 = 2
    44  
    45  	// Version 3 added support for network coordinates but we kept the
    46  	// default protocol version at 2 to ease the transition to this new
    47  	// feature. A Consul agent speaking version 2 of the protocol will
    48  	// attempt to send its coordinates to a server who understands version
    49  	// 3 or greater.
    50  	ProtocolVersion2Compatible = 2
    51  
    52  	ProtocolVersionMax = 3
    53  )
    54  
    55  const (
    56  	serfLANSnapshot   = "serf/local.snapshot"
    57  	serfWANSnapshot   = "serf/remote.snapshot"
    58  	raftState         = "raft/"
    59  	snapshotsRetained = 2
    60  
    61  	// serverRPCCache controls how long we keep an idle connection
    62  	// open to a server
    63  	serverRPCCache = 2 * time.Minute
    64  
    65  	// serverMaxStreams controls how many idle streams we keep
    66  	// open to a server
    67  	serverMaxStreams = 64
    68  
    69  	// raftLogCacheSize is the maximum number of logs to cache in-memory.
    70  	// This is used to reduce disk I/O for the recently committed entries.
    71  	raftLogCacheSize = 512
    72  
    73  	// raftRemoveGracePeriod is how long we wait to allow a RemovePeer
    74  	// to replicate to gracefully leave the cluster.
    75  	raftRemoveGracePeriod = 5 * time.Second
    76  
    77  	// serfEventChSize is the size of the buffered channel to get Serf
    78  	// events. If this is exhausted we will block Serf and Memberlist.
    79  	serfEventChSize = 2048
    80  
    81  	// reconcileChSize is the size of the buffered channel reconcile updates
    82  	// from Serf with the Catalog. If this is exhausted we will drop updates,
    83  	// and wait for a periodic reconcile.
    84  	reconcileChSize = 256
    85  )
    86  
    87  var (
    88  	ErrWANFederationDisabled = fmt.Errorf("WAN Federation is disabled")
    89  )
    90  
    91  type blockingQueryState struct {
    92  	Index    uint64
    93  	Cancel   chan time.Time
    94  	Watchers int32
    95  	Done     chan struct{}
    96  	Apply    atomic.Value
    97  	Err      atomic.Value
    98  }
    99  
   100  func newBlockingQueryState(index uint64, apply func(uint64, interface{}) error, err error) *blockingQueryState {
   101  	queryState := &blockingQueryState{
   102  		Done:   make(chan struct{}),
   103  		Cancel: make(chan time.Time),
   104  		Index:  index,
   105  	}
   106  	if apply != nil {
   107  		queryState.Apply.Store(apply)
   108  	}
   109  	if err != nil {
   110  		queryState.Err.Store(err)
   111  	}
   112  	return queryState
   113  }
   114  
   115  // Server is Consul server which manages the service discovery,
   116  // health checking, DC forwarding, Raft, and multiple Serf pools.
   117  type Server struct {
   118  	// sentinel is the Sentinel code engine (can be nil).
   119  	sentinel sentinel.Evaluator
   120  
   121  	// acls is used to resolve tokens to effective policies
   122  	acls *ACLResolver
   123  
   124  	// aclUpgradeCancel is used to cancel the ACL upgrade goroutine when we
   125  	// lose leadership
   126  	aclUpgradeCancel  context.CancelFunc
   127  	aclUpgradeLock    sync.RWMutex
   128  	aclUpgradeEnabled bool
   129  
   130  	// aclReplicationCancel is used to shut down the ACL replication goroutine
   131  	// when we lose leadership
   132  	aclReplicationCancel  context.CancelFunc
   133  	aclReplicationLock    sync.RWMutex
   134  	aclReplicationEnabled bool
   135  
   136  	// DEPRECATED (ACL-Legacy-Compat) - only needed while we support both
   137  	// useNewACLs is used to determine whether we can use new ACLs or not
   138  	useNewACLs int32
   139  
   140  	// autopilot is the Autopilot instance for this server.
   141  	autopilot *autopilot.Autopilot
   142  
   143  	// autopilotWaitGroup is used to block until Autopilot shuts down.
   144  	autopilotWaitGroup sync.WaitGroup
   145  
   146  	// caProvider is the current CA provider in use for Connect. This is
   147  	// only non-nil when we are the leader.
   148  	caProvider ca.Provider
   149  	// caProviderRoot is the CARoot that was stored along with the ca.Provider
   150  	// active. It's only updated in lock-step with the caProvider. This prevents
   151  	// races between state updates to active roots and the fetch of the provider
   152  	// instance.
   153  	caProviderRoot *structs.CARoot
   154  	caProviderLock sync.RWMutex
   155  
   156  	// caPruningCh is used to shut down the CA root pruning goroutine when we
   157  	// lose leadership.
   158  	caPruningCh      chan struct{}
   159  	caPruningLock    sync.RWMutex
   160  	caPruningEnabled bool
   161  
   162  	// Consul configuration
   163  	config *Config
   164  
   165  	// tokens holds ACL tokens initially from the configuration, but can
   166  	// be updated at runtime, so should always be used instead of going to
   167  	// the configuration directly.
   168  	tokens *token.Store
   169  
   170  	// Connection pool to other consul servers
   171  	connPool *pool.ConnPool
   172  
   173  	// eventChLAN is used to receive events from the
   174  	// serf cluster in the datacenter
   175  	eventChLAN chan serf.Event
   176  
   177  	// eventChWAN is used to receive events from the
   178  	// serf cluster that spans datacenters
   179  	eventChWAN chan serf.Event
   180  
   181  	// fsm is the state machine used with Raft to provide
   182  	// strong consistency.
   183  	fsm *fsm.FSM
   184  
   185  	// Logger uses the provided LogOutput
   186  	logger *log.Logger
   187  
   188  	// The raft instance is used among Consul nodes within the DC to protect
   189  	// operations that require strong consistency.
   190  	// the state directly.
   191  	raft          *raft.Raft
   192  	raftLayer     *RaftLayer
   193  	raftStore     *raftboltdb.BoltStore
   194  	raftTransport *raft.NetworkTransport
   195  	raftInmem     *raft.InmemStore
   196  
   197  	// raftNotifyCh is set up by setupRaft() and ensures that we get reliable leader
   198  	// transition notifications from the Raft layer.
   199  	raftNotifyCh <-chan bool
   200  
   201  	// reconcileCh is used to pass events from the serf handler
   202  	// into the leader manager, so that the strong state can be
   203  	// updated
   204  	reconcileCh chan serf.Member
   205  
   206  	// readyForConsistentReads is used to track when the leader server is
   207  	// ready to serve consistent reads, after it has applied its initial
   208  	// barrier. This is updated atomically.
   209  	readyForConsistentReads int32
   210  
   211  	// leaveCh is used to signal that the server is leaving the cluster
   212  	// and trying to shed its RPC traffic onto other Consul servers. This
   213  	// is only ever closed.
   214  	leaveCh chan struct{}
   215  
   216  	// router is used to map out Consul servers in the WAN and in Consul
   217  	// Enterprise user-defined areas.
   218  	router *router.Router
   219  
   220  	// Listener is used to listen for incoming connections
   221  	Listener  net.Listener
   222  	rpcServer *rpc.Server
   223  
   224  	// rpcTLS is the TLS config for incoming TLS requests
   225  	rpcTLS *tls.Config
   226  
   227  	// serfLAN is the Serf cluster maintained inside the DC
   228  	// which contains all the DC nodes
   229  	serfLAN *serf.Serf
   230  
   231  	// segmentLAN maps segment names to their Serf cluster
   232  	segmentLAN map[string]*serf.Serf
   233  
   234  	// serfWAN is the Serf cluster maintained between DC's
   235  	// which SHOULD only consist of Consul servers
   236  	serfWAN *serf.Serf
   237  
   238  	// serverLookup tracks server consuls in the local datacenter.
   239  	// Used to do leader forwarding and provide fast lookup by server id and address
   240  	serverLookup *ServerLookup
   241  
   242  	// floodLock controls access to floodCh.
   243  	floodLock sync.RWMutex
   244  	floodCh   []chan struct{}
   245  
   246  	// sessionTimers track the expiration time of each Session that has
   247  	// a TTL. On expiration, a SessionDestroy event will occur, and
   248  	// destroy the session via standard session destroy processing
   249  	sessionTimers *SessionTimers
   250  
   251  	// statsFetcher is used by autopilot to check the status of the other
   252  	// Consul router.
   253  	statsFetcher *StatsFetcher
   254  
   255  	// reassertLeaderCh is used to signal the leader loop should re-run
   256  	// leadership actions after a snapshot restore.
   257  	reassertLeaderCh chan chan error
   258  
   259  	// tombstoneGC is used to track the pending GC invocations
   260  	// for the KV tombstones
   261  	tombstoneGC *state.TombstoneGC
   262  
   263  	// aclReplicationStatus (and its associated lock) provide information
   264  	// about the health of the ACL replication goroutine.
   265  	aclReplicationStatus     structs.ACLReplicationStatus
   266  	aclReplicationStatusLock sync.RWMutex
   267  
   268  	// shutdown and the associated members here are used in orchestrating
   269  	// a clean shutdown. The shutdownCh is never written to, only closed to
   270  	// indicate a shutdown has been initiated.
   271  	shutdown     bool
   272  	shutdownCh   chan struct{}
   273  	shutdownLock sync.Mutex
   274  
   275  	blockingQueriesLock sync.RWMutex
   276  	blockingQueries     map[string]*blockingQueryState
   277  
   278  	// embedded struct to hold all the enterprise specific data
   279  	EnterpriseServer
   280  }
   281  
   282  // NewServer is only used to help setting up a server for testing. Normal code
   283  // exercises NewServerLogger.
   284  func NewServer(config *Config) (*Server, error) {
   285  	c, err := tlsutil.NewConfigurator(config.ToTLSUtilConfig(), nil)
   286  	if err != nil {
   287  		return nil, err
   288  	}
   289  	return NewServerLogger(config, nil, new(token.Store), c)
   290  }
   291  
   292  // NewServerLogger is used to construct a new Consul server from the
   293  // configuration, potentially returning an error
   294  func NewServerLogger(config *Config, logger *log.Logger, tokens *token.Store, tlsConfigurator *tlsutil.Configurator) (*Server, error) {
   295  	// Check the protocol version.
   296  	if err := config.CheckProtocolVersion(); err != nil {
   297  		return nil, err
   298  	}
   299  
   300  	// Check for a data directory.
   301  	if config.DataDir == "" && !config.DevMode {
   302  		return nil, fmt.Errorf("Config must provide a DataDir")
   303  	}
   304  
   305  	// Sanity check the ACLs.
   306  	if err := config.CheckACL(); err != nil {
   307  		return nil, err
   308  	}
   309  
   310  	// Ensure we have a log output and create a logger.
   311  	if config.LogOutput == nil {
   312  		config.LogOutput = os.Stderr
   313  	}
   314  	if logger == nil {
   315  		logger = log.New(config.LogOutput, "", log.LstdFlags)
   316  	}
   317  
   318  	// Check if TLS is enabled
   319  	if config.CAFile != "" || config.CAPath != "" {
   320  		config.UseTLS = true
   321  	}
   322  
   323  	// Set the primary DC if it wasn't set.
   324  	if config.PrimaryDatacenter == "" {
   325  		if config.ACLDatacenter != "" {
   326  			config.PrimaryDatacenter = config.ACLDatacenter
   327  		} else {
   328  			config.PrimaryDatacenter = config.Datacenter
   329  		}
   330  	}
   331  
   332  	// Create the tombstone GC.
   333  	gc, err := state.NewTombstoneGC(config.TombstoneTTL, config.TombstoneTTLGranularity)
   334  	if err != nil {
   335  		return nil, err
   336  	}
   337  
   338  	// Create the shutdown channel - this is closed but never written to.
   339  	shutdownCh := make(chan struct{})
   340  
   341  	connPool := &pool.ConnPool{
   342  		SrcAddr:    config.RPCSrcAddr,
   343  		LogOutput:  config.LogOutput,
   344  		MaxTime:    serverRPCCache,
   345  		MaxStreams: serverMaxStreams,
   346  		TLSWrapper: tlsConfigurator.OutgoingRPCWrapper(),
   347  		ForceTLS:   config.VerifyOutgoing,
   348  	}
   349  
   350  	// Create server.
   351  	s := &Server{
   352  		config:           config,
   353  		tokens:           tokens,
   354  		connPool:         connPool,
   355  		eventChLAN:       make(chan serf.Event, serfEventChSize),
   356  		eventChWAN:       make(chan serf.Event, serfEventChSize),
   357  		logger:           logger,
   358  		leaveCh:          make(chan struct{}),
   359  		reconcileCh:      make(chan serf.Member, reconcileChSize),
   360  		router:           router.NewRouter(logger, config.Datacenter),
   361  		rpcServer:        rpc.NewServer(),
   362  		rpcTLS:           tlsConfigurator.IncomingRPCConfig(),
   363  		reassertLeaderCh: make(chan chan error),
   364  		segmentLAN:       make(map[string]*serf.Serf, len(config.Segments)),
   365  		sessionTimers:    NewSessionTimers(),
   366  		tombstoneGC:      gc,
   367  		serverLookup:     NewServerLookup(),
   368  		blockingQueries:  make(map[string]*blockingQueryState),
   369  		shutdownCh:       shutdownCh,
   370  	}
   371  
   372  	// Initialize enterprise specific server functionality
   373  	if err := s.initEnterprise(); err != nil {
   374  		s.Shutdown()
   375  		return nil, err
   376  	}
   377  
   378  	// Initialize the stats fetcher that autopilot will use.
   379  	s.statsFetcher = NewStatsFetcher(logger, s.connPool, s.config.Datacenter)
   380  
   381  	s.sentinel = sentinel.New(logger)
   382  	s.useNewACLs = 0
   383  	aclConfig := ACLResolverConfig{
   384  		Config:      config,
   385  		Delegate:    s,
   386  		CacheConfig: serverACLCacheConfig,
   387  		AutoDisable: false,
   388  		Logger:      logger,
   389  		Sentinel:    s.sentinel,
   390  	}
   391  	// Initialize the ACL resolver.
   392  	if s.acls, err = NewACLResolver(&aclConfig); err != nil {
   393  		s.Shutdown()
   394  		return nil, fmt.Errorf("Failed to create ACL resolver: %v", err)
   395  	}
   396  
   397  	// Initialize the RPC layer.
   398  	if err := s.setupRPC(tlsConfigurator.OutgoingRPCWrapper()); err != nil {
   399  		s.Shutdown()
   400  		return nil, fmt.Errorf("Failed to start RPC layer: %v", err)
   401  	}
   402  
   403  	// Initialize any extra RPC listeners for segments.
   404  	segmentListeners, err := s.setupSegmentRPC()
   405  	if err != nil {
   406  		s.Shutdown()
   407  		return nil, fmt.Errorf("Failed to start segment RPC layer: %v", err)
   408  	}
   409  
   410  	// Initialize the Raft server.
   411  	if err := s.setupRaft(); err != nil {
   412  		s.Shutdown()
   413  		return nil, fmt.Errorf("Failed to start Raft: %v", err)
   414  	}
   415  
   416  	// Serf and dynamic bind ports
   417  	//
   418  	// The LAN serf cluster announces the port of the WAN serf cluster
   419  	// which creates a race when the WAN cluster is supposed to bind to
   420  	// a dynamic port (port 0). The current memberlist implementation will
   421  	// update the bind port in the configuration after the memberlist is
   422  	// created, so we can pull it out from there reliably, even though it's
   423  	// a little gross to be reading the updated config.
   424  
   425  	// Initialize the WAN Serf if enabled
   426  	serfBindPortWAN := -1
   427  	if config.SerfWANConfig != nil {
   428  		serfBindPortWAN = config.SerfWANConfig.MemberlistConfig.BindPort
   429  		s.serfWAN, err = s.setupSerf(config.SerfWANConfig, s.eventChWAN, serfWANSnapshot, true, serfBindPortWAN, "", s.Listener)
   430  		if err != nil {
   431  			s.Shutdown()
   432  			return nil, fmt.Errorf("Failed to start WAN Serf: %v", err)
   433  		}
   434  		// See big comment above why we are doing this.
   435  		if serfBindPortWAN == 0 {
   436  			serfBindPortWAN = config.SerfWANConfig.MemberlistConfig.BindPort
   437  			if serfBindPortWAN == 0 {
   438  				return nil, fmt.Errorf("Failed to get dynamic bind port for WAN Serf")
   439  			}
   440  			s.logger.Printf("[INFO] agent: Serf WAN TCP bound to port %d", serfBindPortWAN)
   441  		}
   442  	}
   443  
   444  	// Initialize the LAN segments before the default LAN Serf so we have
   445  	// updated port information to publish there.
   446  	if err := s.setupSegments(config, serfBindPortWAN, segmentListeners); err != nil {
   447  		s.Shutdown()
   448  		return nil, fmt.Errorf("Failed to setup network segments: %v", err)
   449  	}
   450  
   451  	// Initialize the LAN Serf for the default network segment.
   452  	s.serfLAN, err = s.setupSerf(config.SerfLANConfig, s.eventChLAN, serfLANSnapshot, false, serfBindPortWAN, "", s.Listener)
   453  	if err != nil {
   454  		s.Shutdown()
   455  		return nil, fmt.Errorf("Failed to start LAN Serf: %v", err)
   456  	}
   457  	go s.lanEventHandler()
   458  
   459  	// Start the flooders after the LAN event handler is wired up.
   460  	s.floodSegments(config)
   461  
   462  	// Add a "static route" to the WAN Serf and hook it up to Serf events.
   463  	if s.serfWAN != nil {
   464  		if err := s.router.AddArea(types.AreaWAN, s.serfWAN, s.connPool, s.config.VerifyOutgoing); err != nil {
   465  			s.Shutdown()
   466  			return nil, fmt.Errorf("Failed to add WAN serf route: %v", err)
   467  		}
   468  		go router.HandleSerfEvents(s.logger, s.router, types.AreaWAN, s.serfWAN.ShutdownCh(), s.eventChWAN)
   469  
   470  		// Fire up the LAN <-> WAN join flooder.
   471  		portFn := func(s *metadata.Server) (int, bool) {
   472  			if s.WanJoinPort > 0 {
   473  				return s.WanJoinPort, true
   474  			}
   475  			return 0, false
   476  		}
   477  		go s.Flood(nil, portFn, s.serfWAN)
   478  	}
   479  
   480  	// Start enterprise specific functionality
   481  	if err := s.startEnterprise(); err != nil {
   482  		s.Shutdown()
   483  		return nil, err
   484  	}
   485  
   486  	// Initialize Autopilot. This must happen before starting leadership monitoring
   487  	// as establishing leadership could attempt to use autopilot and cause a panic.
   488  	s.initAutopilot(config)
   489  
   490  	// Start monitoring leadership. This must happen after Serf is set up
   491  	// since it can fire events when leadership is obtained.
   492  	go s.monitorLeadership()
   493  
   494  	// Start listening for RPC requests.
   495  	go s.listen(s.Listener)
   496  
   497  	// Start listeners for any segments with separate RPC listeners.
   498  	for _, listener := range segmentListeners {
   499  		go s.listen(listener)
   500  	}
   501  
   502  	// Start the metrics handlers.
   503  	go s.sessionStats()
   504  
   505  	return s, nil
   506  }
   507  
   508  // setupRaft is used to setup and initialize Raft
   509  func (s *Server) setupRaft() error {
   510  	// If we have an unclean exit then attempt to close the Raft store.
   511  	defer func() {
   512  		if s.raft == nil && s.raftStore != nil {
   513  			if err := s.raftStore.Close(); err != nil {
   514  				s.logger.Printf("[ERR] consul: failed to close Raft store: %v", err)
   515  			}
   516  		}
   517  	}()
   518  
   519  	// Create the FSM.
   520  	var err error
   521  	s.fsm, err = fsm.New(s.tombstoneGC, s.config.WatchSoftLimit, s.config.LogOutput)
   522  	if err != nil {
   523  		return err
   524  	}
   525  
   526  	var serverAddressProvider raft.ServerAddressProvider = nil
   527  	if s.config.RaftConfig.ProtocolVersion >= 3 { //ServerAddressProvider needs server ids to work correctly, which is only supported in protocol version 3 or higher
   528  		serverAddressProvider = s.serverLookup
   529  	}
   530  
   531  	// Create a transport layer.
   532  	transConfig := &raft.NetworkTransportConfig{
   533  		Stream:                s.raftLayer,
   534  		MaxPool:               3,
   535  		Timeout:               10 * time.Second,
   536  		ServerAddressProvider: serverAddressProvider,
   537  		Logger:                s.logger,
   538  	}
   539  
   540  	trans := raft.NewNetworkTransportWithConfig(transConfig)
   541  	s.raftTransport = trans
   542  
   543  	// Make sure we set the LogOutput.
   544  	s.config.RaftConfig.LogOutput = s.config.LogOutput
   545  	s.config.RaftConfig.Logger = s.logger
   546  
   547  	// Versions of the Raft protocol below 3 require the LocalID to match the network
   548  	// address of the transport.
   549  	s.config.RaftConfig.LocalID = raft.ServerID(trans.LocalAddr())
   550  	if s.config.RaftConfig.ProtocolVersion >= 3 {
   551  		s.config.RaftConfig.LocalID = raft.ServerID(s.config.NodeID)
   552  	}
   553  
   554  	// Build an all in-memory setup for dev mode, otherwise prepare a full
   555  	// disk-based setup.
   556  	var log raft.LogStore
   557  	var stable raft.StableStore
   558  	var snap raft.SnapshotStore
   559  	if s.config.DevMode {
   560  		store := raft.NewInmemStore()
   561  		s.raftInmem = store
   562  		stable = store
   563  		log = store
   564  		snap = raft.NewInmemSnapshotStore()
   565  	} else {
   566  		// Create the base raft path.
   567  		path := filepath.Join(s.config.DataDir, raftState)
   568  		if err := lib.EnsurePath(path, true); err != nil {
   569  			return err
   570  		}
   571  
   572  		// Create the backend raft store for logs and stable storage.
   573  		store, err := raftboltdb.NewBoltStore(filepath.Join(path, "raft.db"))
   574  		if err != nil {
   575  			return err
   576  		}
   577  		s.raftStore = store
   578  		stable = store
   579  
   580  		// Wrap the store in a LogCache to improve performance.
   581  		cacheStore, err := raft.NewLogCache(raftLogCacheSize, store)
   582  		if err != nil {
   583  			return err
   584  		}
   585  		log = cacheStore
   586  
   587  		// Create the snapshot store.
   588  		snapshots, err := raft.NewFileSnapshotStore(path, snapshotsRetained, s.config.LogOutput)
   589  		if err != nil {
   590  			return err
   591  		}
   592  		snap = snapshots
   593  
   594  		// For an existing cluster being upgraded to the new version of
   595  		// Raft, we almost never want to run recovery based on the old
   596  		// peers.json file. We create a peers.info file with a helpful
   597  		// note about where peers.json went, and use that as a sentinel
   598  		// to avoid ingesting the old one that first time (if we have to
   599  		// create the peers.info file because it's not there, we also
   600  		// blow away any existing peers.json file).
   601  		peersFile := filepath.Join(path, "peers.json")
   602  		peersInfoFile := filepath.Join(path, "peers.info")
   603  		if _, err := os.Stat(peersInfoFile); os.IsNotExist(err) {
   604  			if err := ioutil.WriteFile(peersInfoFile, []byte(peersInfoContent), 0755); err != nil {
   605  				return fmt.Errorf("failed to write peers.info file: %v", err)
   606  			}
   607  
   608  			// Blow away the peers.json file if present, since the
   609  			// peers.info sentinel wasn't there.
   610  			if _, err := os.Stat(peersFile); err == nil {
   611  				if err := os.Remove(peersFile); err != nil {
   612  					return fmt.Errorf("failed to delete peers.json, please delete manually (see peers.info for details): %v", err)
   613  				}
   614  				s.logger.Printf("[INFO] consul: deleted peers.json file (see peers.info for details)")
   615  			}
   616  		} else if _, err := os.Stat(peersFile); err == nil {
   617  			s.logger.Printf("[INFO] consul: found peers.json file, recovering Raft configuration...")
   618  
   619  			var configuration raft.Configuration
   620  			if s.config.RaftConfig.ProtocolVersion < 3 {
   621  				configuration, err = raft.ReadPeersJSON(peersFile)
   622  			} else {
   623  				configuration, err = raft.ReadConfigJSON(peersFile)
   624  			}
   625  			if err != nil {
   626  				return fmt.Errorf("recovery failed to parse peers.json: %v", err)
   627  			}
   628  
   629  			tmpFsm, err := fsm.New(s.tombstoneGC, s.config.WatchSoftLimit, s.config.LogOutput)
   630  			if err != nil {
   631  				return fmt.Errorf("recovery failed to make temp FSM: %v", err)
   632  			}
   633  			if err := raft.RecoverCluster(s.config.RaftConfig, tmpFsm,
   634  				log, stable, snap, trans, configuration); err != nil {
   635  				return fmt.Errorf("recovery failed: %v", err)
   636  			}
   637  
   638  			if err := os.Remove(peersFile); err != nil {
   639  				return fmt.Errorf("recovery failed to delete peers.json, please delete manually (see peers.info for details): %v", err)
   640  			}
   641  			s.logger.Printf("[INFO] consul: deleted peers.json file after successful recovery")
   642  		}
   643  	}
   644  
   645  	// If we are in bootstrap or dev mode and the state is clean then we can
   646  	// bootstrap now.
   647  	if s.config.Bootstrap || s.config.DevMode {
   648  		hasState, err := raft.HasExistingState(log, stable, snap)
   649  		if err != nil {
   650  			return err
   651  		}
   652  		if !hasState {
   653  			configuration := raft.Configuration{
   654  				Servers: []raft.Server{
   655  					raft.Server{
   656  						ID:      s.config.RaftConfig.LocalID,
   657  						Address: trans.LocalAddr(),
   658  					},
   659  				},
   660  			}
   661  			if err := raft.BootstrapCluster(s.config.RaftConfig,
   662  				log, stable, snap, trans, configuration); err != nil {
   663  				return err
   664  			}
   665  		}
   666  	}
   667  
   668  	// Set up a channel for reliable leader notifications.
   669  	raftNotifyCh := make(chan bool, 1)
   670  	s.config.RaftConfig.NotifyCh = raftNotifyCh
   671  	s.raftNotifyCh = raftNotifyCh
   672  
   673  	// Setup the Raft store.
   674  	s.raft, err = raft.NewRaft(s.config.RaftConfig, s.fsm, log, stable, snap, trans)
   675  	if err != nil {
   676  		return err
   677  	}
   678  	return nil
   679  }
   680  
   681  // endpointFactory is a function that returns an RPC endpoint bound to the given
   682  // server.
   683  type factory func(s *Server) interface{}
   684  
   685  // endpoints is a list of registered RPC endpoint factories.
   686  var endpoints []factory
   687  
   688  // registerEndpoint registers a new RPC endpoint factory.
   689  func registerEndpoint(fn factory) {
   690  	endpoints = append(endpoints, fn)
   691  }
   692  
   693  // setupRPC is used to setup the RPC listener
   694  func (s *Server) setupRPC(tlsWrap tlsutil.DCWrapper) error {
   695  	for _, fn := range endpoints {
   696  		s.rpcServer.Register(fn(s))
   697  	}
   698  
   699  	ln, err := net.ListenTCP("tcp", s.config.RPCAddr)
   700  	if err != nil {
   701  		return err
   702  	}
   703  	s.Listener = ln
   704  	if s.config.NotifyListen != nil {
   705  		s.config.NotifyListen()
   706  	}
   707  	// todo(fs): we should probably guard this
   708  	if s.config.RPCAdvertise == nil {
   709  		s.config.RPCAdvertise = ln.Addr().(*net.TCPAddr)
   710  	}
   711  
   712  	// Verify that we have a usable advertise address
   713  	if s.config.RPCAdvertise.IP.IsUnspecified() {
   714  		ln.Close()
   715  		return fmt.Errorf("RPC advertise address is not advertisable: %v", s.config.RPCAdvertise)
   716  	}
   717  
   718  	// Provide a DC specific wrapper. Raft replication is only
   719  	// ever done in the same datacenter, so we can provide it as a constant.
   720  	wrapper := tlsutil.SpecificDC(s.config.Datacenter, tlsWrap)
   721  
   722  	// Define a callback for determining whether to wrap a connection with TLS
   723  	tlsFunc := func(address raft.ServerAddress) bool {
   724  		if s.config.VerifyOutgoing {
   725  			return true
   726  		}
   727  
   728  		server := s.serverLookup.Server(address)
   729  
   730  		if server == nil {
   731  			return false
   732  		}
   733  
   734  		return server.UseTLS
   735  	}
   736  	s.raftLayer = NewRaftLayer(s.config.RPCSrcAddr, s.config.RPCAdvertise, wrapper, tlsFunc)
   737  	return nil
   738  }
   739  
   740  // Shutdown is used to shutdown the server
   741  func (s *Server) Shutdown() error {
   742  	s.logger.Printf("[INFO] consul: shutting down server")
   743  	s.shutdownLock.Lock()
   744  	defer s.shutdownLock.Unlock()
   745  
   746  	if s.shutdown {
   747  		return nil
   748  	}
   749  
   750  	s.shutdown = true
   751  	close(s.shutdownCh)
   752  
   753  	if s.serfLAN != nil {
   754  		s.serfLAN.Shutdown()
   755  	}
   756  
   757  	if s.serfWAN != nil {
   758  		s.serfWAN.Shutdown()
   759  		if err := s.router.RemoveArea(types.AreaWAN); err != nil {
   760  			s.logger.Printf("[WARN] consul: error removing WAN area: %v", err)
   761  		}
   762  	}
   763  	s.router.Shutdown()
   764  
   765  	if s.raft != nil {
   766  		s.raftTransport.Close()
   767  		s.raftLayer.Close()
   768  		future := s.raft.Shutdown()
   769  		if err := future.Error(); err != nil {
   770  			s.logger.Printf("[WARN] consul: error shutting down raft: %s", err)
   771  		}
   772  		if s.raftStore != nil {
   773  			s.raftStore.Close()
   774  		}
   775  	}
   776  
   777  	if s.Listener != nil {
   778  		s.Listener.Close()
   779  	}
   780  
   781  	// Close the connection pool
   782  	s.connPool.Shutdown()
   783  
   784  	return nil
   785  }
   786  
   787  // Leave is used to prepare for a graceful shutdown of the server
   788  func (s *Server) Leave() error {
   789  	s.logger.Printf("[INFO] consul: server starting leave")
   790  
   791  	// Check the number of known peers
   792  	numPeers, err := s.numPeers()
   793  	if err != nil {
   794  		s.logger.Printf("[ERR] consul: failed to check raft peers: %v", err)
   795  		return err
   796  	}
   797  
   798  	addr := s.raftTransport.LocalAddr()
   799  
   800  	// If we are the current leader, and we have any other peers (cluster has multiple
   801  	// servers), we should do a RemoveServer/RemovePeer to safely reduce the quorum size.
   802  	// If we are not the leader, then we should issue our leave intention and wait to be
   803  	// removed for some sane period of time.
   804  	isLeader := s.IsLeader()
   805  	if isLeader && numPeers > 1 {
   806  		minRaftProtocol, err := s.autopilot.MinRaftProtocol()
   807  		if err != nil {
   808  			return err
   809  		}
   810  
   811  		if minRaftProtocol >= 2 && s.config.RaftConfig.ProtocolVersion >= 3 {
   812  			future := s.raft.RemoveServer(raft.ServerID(s.config.NodeID), 0, 0)
   813  			if err := future.Error(); err != nil {
   814  				s.logger.Printf("[ERR] consul: failed to remove ourself as raft peer: %v", err)
   815  			}
   816  		} else {
   817  			future := s.raft.RemovePeer(addr)
   818  			if err := future.Error(); err != nil {
   819  				s.logger.Printf("[ERR] consul: failed to remove ourself as raft peer: %v", err)
   820  			}
   821  		}
   822  	}
   823  
   824  	// Leave the WAN pool
   825  	if s.serfWAN != nil {
   826  		if err := s.serfWAN.Leave(); err != nil {
   827  			s.logger.Printf("[ERR] consul: failed to leave WAN Serf cluster: %v", err)
   828  		}
   829  	}
   830  
   831  	// Leave the LAN pool
   832  	if s.serfLAN != nil {
   833  		if err := s.serfLAN.Leave(); err != nil {
   834  			s.logger.Printf("[ERR] consul: failed to leave LAN Serf cluster: %v", err)
   835  		}
   836  	}
   837  
   838  	// Start refusing RPCs now that we've left the LAN pool. It's important
   839  	// to do this *after* we've left the LAN pool so that clients will know
   840  	// to shift onto another server if they perform a retry. We also wake up
   841  	// all queries in the RPC retry state.
   842  	s.logger.Printf("[INFO] consul: Waiting %s to drain RPC traffic", s.config.LeaveDrainTime)
   843  	close(s.leaveCh)
   844  	time.Sleep(s.config.LeaveDrainTime)
   845  
   846  	// If we were not leader, wait to be safely removed from the cluster. We
   847  	// must wait to allow the raft replication to take place, otherwise an
   848  	// immediate shutdown could cause a loss of quorum.
   849  	if !isLeader {
   850  		left := false
   851  		limit := time.Now().Add(raftRemoveGracePeriod)
   852  		for !left && time.Now().Before(limit) {
   853  			// Sleep a while before we check.
   854  			time.Sleep(50 * time.Millisecond)
   855  
   856  			// Get the latest configuration.
   857  			future := s.raft.GetConfiguration()
   858  			if err := future.Error(); err != nil {
   859  				s.logger.Printf("[ERR] consul: failed to get raft configuration: %v", err)
   860  				break
   861  			}
   862  
   863  			// See if we are no longer included.
   864  			left = true
   865  			for _, server := range future.Configuration().Servers {
   866  				if server.Address == addr {
   867  					left = false
   868  					break
   869  				}
   870  			}
   871  		}
   872  
   873  		// TODO (slackpad) With the old Raft library we used to force the
   874  		// peers set to empty when a graceful leave occurred. This would
   875  		// keep voting spam down if the server was restarted, but it was
   876  		// dangerous because the peers was inconsistent with the logs and
   877  		// snapshots, so it wasn't really safe in all cases for the server
   878  		// to become leader. This is now safe, but the log spam is noisy.
   879  		// The next new version of the library will have a "you are not a
   880  		// peer stop it" behavior that should address this. We will have
   881  		// to evaluate during the RC period if this interim situation is
   882  		// not too confusing for operators.
   883  
   884  		// TODO (slackpad) When we take a later new version of the Raft
   885  		// library it won't try to complete replication, so this peer
   886  		// may not realize that it has been removed. Need to revisit this
   887  		// and the warning here.
   888  		if !left {
   889  			s.logger.Printf("[WARN] consul: failed to leave raft configuration gracefully, timeout")
   890  		}
   891  	}
   892  
   893  	return nil
   894  }
   895  
   896  // numPeers is used to check on the number of known peers, including potentially
   897  // the local node. We count only voters, since others can't actually become
   898  // leader, so aren't considered peers.
   899  func (s *Server) numPeers() (int, error) {
   900  	future := s.raft.GetConfiguration()
   901  	if err := future.Error(); err != nil {
   902  		return 0, err
   903  	}
   904  
   905  	return autopilot.NumPeers(future.Configuration()), nil
   906  }
   907  
   908  // JoinLAN is used to have Consul join the inner-DC pool
   909  // The target address should be another node inside the DC
   910  // listening on the Serf LAN address
   911  func (s *Server) JoinLAN(addrs []string) (int, error) {
   912  	return s.serfLAN.Join(addrs, true)
   913  }
   914  
   915  // JoinWAN is used to have Consul join the cross-WAN Consul ring
   916  // The target address should be another node listening on the
   917  // Serf WAN address
   918  func (s *Server) JoinWAN(addrs []string) (int, error) {
   919  	if s.serfWAN == nil {
   920  		return 0, ErrWANFederationDisabled
   921  	}
   922  	return s.serfWAN.Join(addrs, true)
   923  }
   924  
   925  // LocalMember is used to return the local node
   926  func (s *Server) LocalMember() serf.Member {
   927  	return s.serfLAN.LocalMember()
   928  }
   929  
   930  // LANMembers is used to return the members of the LAN cluster
   931  func (s *Server) LANMembers() []serf.Member {
   932  	return s.serfLAN.Members()
   933  }
   934  
   935  // WANMembers is used to return the members of the LAN cluster
   936  func (s *Server) WANMembers() []serf.Member {
   937  	if s.serfWAN == nil {
   938  		return nil
   939  	}
   940  	return s.serfWAN.Members()
   941  }
   942  
   943  // RemoveFailedNode is used to remove a failed node from the cluster
   944  func (s *Server) RemoveFailedNode(node string) error {
   945  	if err := s.serfLAN.RemoveFailedNode(node); err != nil {
   946  		return err
   947  	}
   948  	if s.serfWAN != nil {
   949  		if err := s.serfWAN.RemoveFailedNode(node); err != nil {
   950  			return err
   951  		}
   952  	}
   953  	return nil
   954  }
   955  
   956  // IsLeader checks if this server is the cluster leader
   957  func (s *Server) IsLeader() bool {
   958  	return s.raft.State() == raft.Leader
   959  }
   960  
   961  // KeyManagerLAN returns the LAN Serf keyring manager
   962  func (s *Server) KeyManagerLAN() *serf.KeyManager {
   963  	return s.serfLAN.KeyManager()
   964  }
   965  
   966  // KeyManagerWAN returns the WAN Serf keyring manager
   967  func (s *Server) KeyManagerWAN() *serf.KeyManager {
   968  	return s.serfWAN.KeyManager()
   969  }
   970  
   971  // Encrypted determines if gossip is encrypted
   972  func (s *Server) Encrypted() bool {
   973  	LANEncrypted := s.serfLAN.EncryptionEnabled()
   974  	if s.serfWAN == nil {
   975  		return LANEncrypted
   976  	}
   977  	return LANEncrypted && s.serfWAN.EncryptionEnabled()
   978  }
   979  
   980  // LANSegments returns a map of LAN segments by name
   981  func (s *Server) LANSegments() map[string]*serf.Serf {
   982  	segments := make(map[string]*serf.Serf, len(s.segmentLAN)+1)
   983  	segments[""] = s.serfLAN
   984  	for name, segment := range s.segmentLAN {
   985  		segments[name] = segment
   986  	}
   987  
   988  	return segments
   989  }
   990  
   991  // inmemCodec is used to do an RPC call without going over a network
   992  type inmemCodec struct {
   993  	method string
   994  	args   interface{}
   995  	reply  interface{}
   996  	err    error
   997  }
   998  
   999  func (i *inmemCodec) ReadRequestHeader(req *rpc.Request) error {
  1000  	req.ServiceMethod = i.method
  1001  	return nil
  1002  }
  1003  
  1004  func (i *inmemCodec) ReadRequestBody(args interface{}) error {
  1005  	sourceValue := reflect.Indirect(reflect.Indirect(reflect.ValueOf(i.args)))
  1006  	dst := reflect.Indirect(reflect.Indirect(reflect.ValueOf(args)))
  1007  	dst.Set(sourceValue)
  1008  	return nil
  1009  }
  1010  
  1011  func (i *inmemCodec) WriteResponse(resp *rpc.Response, reply interface{}) error {
  1012  	if resp.Error != "" {
  1013  		i.err = errors.New(resp.Error)
  1014  		return nil
  1015  	}
  1016  	sourceValue := reflect.Indirect(reflect.Indirect(reflect.ValueOf(reply)))
  1017  	dst := reflect.Indirect(reflect.Indirect(reflect.ValueOf(i.reply)))
  1018  	dst.Set(sourceValue)
  1019  	return nil
  1020  }
  1021  
  1022  func (i *inmemCodec) Close() error {
  1023  	return nil
  1024  }
  1025  
  1026  // RPC is used to make a local RPC call
  1027  func (s *Server) RPC(method string, args interface{}, reply interface{}) error {
  1028  	codec := &inmemCodec{
  1029  		method: method,
  1030  		args:   args,
  1031  		reply:  reply,
  1032  	}
  1033  	if err := s.rpcServer.ServeRequest(codec); err != nil {
  1034  		return err
  1035  	}
  1036  	return codec.err
  1037  }
  1038  
  1039  // SnapshotRPC dispatches the given snapshot request, reading from the streaming
  1040  // input and writing to the streaming output depending on the operation.
  1041  func (s *Server) SnapshotRPC(args *structs.SnapshotRequest, in io.Reader, out io.Writer,
  1042  	replyFn structs.SnapshotReplyFn) error {
  1043  
  1044  	// Perform the operation.
  1045  	var reply structs.SnapshotResponse
  1046  	snap, err := s.dispatchSnapshotRequest(args, in, &reply)
  1047  	if err != nil {
  1048  		return err
  1049  	}
  1050  	defer func() {
  1051  		if err := snap.Close(); err != nil {
  1052  			s.logger.Printf("[ERR] consul: Failed to close snapshot: %v", err)
  1053  		}
  1054  	}()
  1055  
  1056  	// Let the caller peek at the reply.
  1057  	if replyFn != nil {
  1058  		if err := replyFn(&reply); err != nil {
  1059  			return nil
  1060  		}
  1061  	}
  1062  
  1063  	// Stream the snapshot.
  1064  	if out != nil {
  1065  		if _, err := io.Copy(out, snap); err != nil {
  1066  			return fmt.Errorf("failed to stream snapshot: %v", err)
  1067  		}
  1068  	}
  1069  	return nil
  1070  }
  1071  
  1072  // RegisterEndpoint is used to substitute an endpoint for testing.
  1073  func (s *Server) RegisterEndpoint(name string, handler interface{}) error {
  1074  	s.logger.Printf("[WARN] consul: endpoint injected; this should only be used for testing")
  1075  	return s.rpcServer.RegisterName(name, handler)
  1076  }
  1077  
  1078  // Stats is used to return statistics for debugging and insight
  1079  // for various sub-systems
  1080  func (s *Server) Stats() map[string]map[string]string {
  1081  	toString := func(v uint64) string {
  1082  		return strconv.FormatUint(v, 10)
  1083  	}
  1084  	numKnownDCs := len(s.router.GetDatacenters())
  1085  	stats := map[string]map[string]string{
  1086  		"consul": map[string]string{
  1087  			"server":            "true",
  1088  			"leader":            fmt.Sprintf("%v", s.IsLeader()),
  1089  			"leader_addr":       string(s.raft.Leader()),
  1090  			"bootstrap":         fmt.Sprintf("%v", s.config.Bootstrap),
  1091  			"known_datacenters": toString(uint64(numKnownDCs)),
  1092  		},
  1093  		"raft":     s.raft.Stats(),
  1094  		"serf_lan": s.serfLAN.Stats(),
  1095  		"runtime":  runtimeStats(),
  1096  	}
  1097  
  1098  	if s.ACLsEnabled() {
  1099  		if s.UseLegacyACLs() {
  1100  			stats["consul"]["acl"] = "legacy"
  1101  		} else {
  1102  			stats["consul"]["acl"] = "enabled"
  1103  		}
  1104  	} else {
  1105  		stats["consul"]["acl"] = "disabled"
  1106  	}
  1107  
  1108  	if s.serfWAN != nil {
  1109  		stats["serf_wan"] = s.serfWAN.Stats()
  1110  	}
  1111  
  1112  	for outerKey, outerValue := range s.enterpriseStats() {
  1113  		if _, ok := stats[outerKey]; ok {
  1114  			for innerKey, innerValue := range outerValue {
  1115  				stats[outerKey][innerKey] = innerValue
  1116  			}
  1117  		} else {
  1118  			stats[outerKey] = outerValue
  1119  		}
  1120  	}
  1121  
  1122  	return stats
  1123  }
  1124  
  1125  // GetLANCoordinate returns the coordinate of the server in the LAN gossip pool.
  1126  func (s *Server) GetLANCoordinate() (lib.CoordinateSet, error) {
  1127  	lan, err := s.serfLAN.GetCoordinate()
  1128  	if err != nil {
  1129  		return nil, err
  1130  	}
  1131  
  1132  	cs := lib.CoordinateSet{"": lan}
  1133  	for name, segment := range s.segmentLAN {
  1134  		c, err := segment.GetCoordinate()
  1135  		if err != nil {
  1136  			return nil, err
  1137  		}
  1138  		cs[name] = c
  1139  	}
  1140  	return cs, nil
  1141  }
  1142  
  1143  // ReloadConfig is used to have the Server do an online reload of
  1144  // relevant configuration information
  1145  func (s *Server) ReloadConfig(config *Config) error {
  1146  	return nil
  1147  }
  1148  
  1149  // Atomically sets a readiness state flag when leadership is obtained, to indicate that server is past its barrier write
  1150  func (s *Server) setConsistentReadReady() {
  1151  	atomic.StoreInt32(&s.readyForConsistentReads, 1)
  1152  }
  1153  
  1154  // Atomically reset readiness state flag on leadership revoke
  1155  func (s *Server) resetConsistentReadReady() {
  1156  	atomic.StoreInt32(&s.readyForConsistentReads, 0)
  1157  }
  1158  
  1159  // Returns true if this server is ready to serve consistent reads
  1160  func (s *Server) isReadyForConsistentReads() bool {
  1161  	return atomic.LoadInt32(&s.readyForConsistentReads) == 1
  1162  }
  1163  
  1164  // peersInfoContent is used to help operators understand what happened to the
  1165  // peers.json file. This is written to a file called peers.info in the same
  1166  // location.
  1167  const peersInfoContent = `
  1168  As of Consul 0.7.0, the peers.json file is only used for recovery
  1169  after an outage. The format of this file depends on what the server has
  1170  configured for its Raft protocol version. Please see the agent configuration
  1171  page at https://www.consul.io/docs/agent/options.html#_raft_protocol for more
  1172  details about this parameter.
  1173  
  1174  For Raft protocol version 2 and earlier, this should be formatted as a JSON
  1175  array containing the address and port of each Consul server in the cluster, like
  1176  this:
  1177  
  1178  [
  1179    "10.1.0.1:8300",
  1180    "10.1.0.2:8300",
  1181    "10.1.0.3:8300"
  1182  ]
  1183  
  1184  For Raft protocol version 3 and later, this should be formatted as a JSON
  1185  array containing the node ID, address:port, and suffrage information of each
  1186  Consul server in the cluster, like this:
  1187  
  1188  [
  1189    {
  1190      "id": "adf4238a-882b-9ddc-4a9d-5b6758e4159e",
  1191      "address": "10.1.0.1:8300",
  1192      "non_voter": false
  1193    },
  1194    {
  1195      "id": "8b6dda82-3103-11e7-93ae-92361f002671",
  1196      "address": "10.1.0.2:8300",
  1197      "non_voter": false
  1198    },
  1199    {
  1200      "id": "97e17742-3103-11e7-93ae-92361f002671",
  1201      "address": "10.1.0.3:8300",
  1202      "non_voter": false
  1203    }
  1204  ]
  1205  
  1206  The "id" field is the node ID of the server. This can be found in the logs when
  1207  the server starts up, or in the "node-id" file inside the server's data
  1208  directory.
  1209  
  1210  The "address" field is the address and port of the server.
  1211  
  1212  The "non_voter" field controls whether the server is a non-voter, which is used
  1213  in some advanced Autopilot configurations, please see
  1214  https://www.consul.io/docs/guides/autopilot.html for more information. If
  1215  "non_voter" is omitted it will default to false, which is typical for most
  1216  clusters.
  1217  
  1218  Under normal operation, the peers.json file will not be present.
  1219  
  1220  When Consul starts for the first time, it will create this peers.info file and
  1221  delete any existing peers.json file so that recovery doesn't occur on the first
  1222  startup.
  1223  
  1224  Once this peers.info file is present, any peers.json file will be ingested at
  1225  startup, and will set the Raft peer configuration manually to recover from an
  1226  outage. It's crucial that all servers in the cluster are shut down before
  1227  creating the peers.json file, and that all servers receive the same
  1228  configuration. Once the peers.json file is successfully ingested and applied, it
  1229  will be deleted.
  1230  
  1231  Please see https://www.consul.io/docs/guides/outage.html for more information.
  1232  `