github.com/criteo-forks/consul@v1.4.5-criteonogrpc/agent/consul/server.go (about) 1 package consul 2 3 import ( 4 "context" 5 "crypto/tls" 6 "errors" 7 "fmt" 8 "io" 9 "io/ioutil" 10 "log" 11 "net" 12 "net/rpc" 13 "os" 14 "path/filepath" 15 "reflect" 16 "strconv" 17 "sync" 18 "sync/atomic" 19 "time" 20 21 ca "github.com/hashicorp/consul/agent/connect/ca" 22 "github.com/hashicorp/consul/agent/consul/autopilot" 23 "github.com/hashicorp/consul/agent/consul/fsm" 24 "github.com/hashicorp/consul/agent/consul/state" 25 "github.com/hashicorp/consul/agent/metadata" 26 "github.com/hashicorp/consul/agent/pool" 27 "github.com/hashicorp/consul/agent/router" 28 "github.com/hashicorp/consul/agent/structs" 29 "github.com/hashicorp/consul/agent/token" 30 "github.com/hashicorp/consul/lib" 31 "github.com/hashicorp/consul/sentinel" 32 "github.com/hashicorp/consul/tlsutil" 33 "github.com/hashicorp/consul/types" 34 "github.com/hashicorp/raft" 35 raftboltdb "github.com/hashicorp/raft-boltdb" 36 "github.com/hashicorp/serf/serf" 37 ) 38 39 // These are the protocol versions that Consul can _understand_. These are 40 // Consul-level protocol versions, that are used to configure the Serf 41 // protocol versions. 42 const ( 43 ProtocolVersionMin uint8 = 2 44 45 // Version 3 added support for network coordinates but we kept the 46 // default protocol version at 2 to ease the transition to this new 47 // feature. A Consul agent speaking version 2 of the protocol will 48 // attempt to send its coordinates to a server who understands version 49 // 3 or greater. 50 ProtocolVersion2Compatible = 2 51 52 ProtocolVersionMax = 3 53 ) 54 55 const ( 56 serfLANSnapshot = "serf/local.snapshot" 57 serfWANSnapshot = "serf/remote.snapshot" 58 raftState = "raft/" 59 snapshotsRetained = 2 60 61 // serverRPCCache controls how long we keep an idle connection 62 // open to a server 63 serverRPCCache = 2 * time.Minute 64 65 // serverMaxStreams controls how many idle streams we keep 66 // open to a server 67 serverMaxStreams = 64 68 69 // raftLogCacheSize is the maximum number of logs to cache in-memory. 70 // This is used to reduce disk I/O for the recently committed entries. 71 raftLogCacheSize = 512 72 73 // raftRemoveGracePeriod is how long we wait to allow a RemovePeer 74 // to replicate to gracefully leave the cluster. 75 raftRemoveGracePeriod = 5 * time.Second 76 77 // serfEventChSize is the size of the buffered channel to get Serf 78 // events. If this is exhausted we will block Serf and Memberlist. 79 serfEventChSize = 2048 80 81 // reconcileChSize is the size of the buffered channel reconcile updates 82 // from Serf with the Catalog. If this is exhausted we will drop updates, 83 // and wait for a periodic reconcile. 84 reconcileChSize = 256 85 ) 86 87 var ( 88 ErrWANFederationDisabled = fmt.Errorf("WAN Federation is disabled") 89 ) 90 91 type blockingQueryState struct { 92 Index uint64 93 Cancel chan time.Time 94 Watchers int32 95 Done chan struct{} 96 Apply atomic.Value 97 Err atomic.Value 98 } 99 100 func newBlockingQueryState(index uint64, apply func(uint64, interface{}) error, err error) *blockingQueryState { 101 queryState := &blockingQueryState{ 102 Done: make(chan struct{}), 103 Cancel: make(chan time.Time), 104 Index: index, 105 } 106 if apply != nil { 107 queryState.Apply.Store(apply) 108 } 109 if err != nil { 110 queryState.Err.Store(err) 111 } 112 return queryState 113 } 114 115 // Server is Consul server which manages the service discovery, 116 // health checking, DC forwarding, Raft, and multiple Serf pools. 117 type Server struct { 118 // sentinel is the Sentinel code engine (can be nil). 119 sentinel sentinel.Evaluator 120 121 // acls is used to resolve tokens to effective policies 122 acls *ACLResolver 123 124 // aclUpgradeCancel is used to cancel the ACL upgrade goroutine when we 125 // lose leadership 126 aclUpgradeCancel context.CancelFunc 127 aclUpgradeLock sync.RWMutex 128 aclUpgradeEnabled bool 129 130 // aclReplicationCancel is used to shut down the ACL replication goroutine 131 // when we lose leadership 132 aclReplicationCancel context.CancelFunc 133 aclReplicationLock sync.RWMutex 134 aclReplicationEnabled bool 135 136 // DEPRECATED (ACL-Legacy-Compat) - only needed while we support both 137 // useNewACLs is used to determine whether we can use new ACLs or not 138 useNewACLs int32 139 140 // autopilot is the Autopilot instance for this server. 141 autopilot *autopilot.Autopilot 142 143 // autopilotWaitGroup is used to block until Autopilot shuts down. 144 autopilotWaitGroup sync.WaitGroup 145 146 // caProvider is the current CA provider in use for Connect. This is 147 // only non-nil when we are the leader. 148 caProvider ca.Provider 149 // caProviderRoot is the CARoot that was stored along with the ca.Provider 150 // active. It's only updated in lock-step with the caProvider. This prevents 151 // races between state updates to active roots and the fetch of the provider 152 // instance. 153 caProviderRoot *structs.CARoot 154 caProviderLock sync.RWMutex 155 156 // caPruningCh is used to shut down the CA root pruning goroutine when we 157 // lose leadership. 158 caPruningCh chan struct{} 159 caPruningLock sync.RWMutex 160 caPruningEnabled bool 161 162 // Consul configuration 163 config *Config 164 165 // tokens holds ACL tokens initially from the configuration, but can 166 // be updated at runtime, so should always be used instead of going to 167 // the configuration directly. 168 tokens *token.Store 169 170 // Connection pool to other consul servers 171 connPool *pool.ConnPool 172 173 // eventChLAN is used to receive events from the 174 // serf cluster in the datacenter 175 eventChLAN chan serf.Event 176 177 // eventChWAN is used to receive events from the 178 // serf cluster that spans datacenters 179 eventChWAN chan serf.Event 180 181 // fsm is the state machine used with Raft to provide 182 // strong consistency. 183 fsm *fsm.FSM 184 185 // Logger uses the provided LogOutput 186 logger *log.Logger 187 188 // The raft instance is used among Consul nodes within the DC to protect 189 // operations that require strong consistency. 190 // the state directly. 191 raft *raft.Raft 192 raftLayer *RaftLayer 193 raftStore *raftboltdb.BoltStore 194 raftTransport *raft.NetworkTransport 195 raftInmem *raft.InmemStore 196 197 // raftNotifyCh is set up by setupRaft() and ensures that we get reliable leader 198 // transition notifications from the Raft layer. 199 raftNotifyCh <-chan bool 200 201 // reconcileCh is used to pass events from the serf handler 202 // into the leader manager, so that the strong state can be 203 // updated 204 reconcileCh chan serf.Member 205 206 // readyForConsistentReads is used to track when the leader server is 207 // ready to serve consistent reads, after it has applied its initial 208 // barrier. This is updated atomically. 209 readyForConsistentReads int32 210 211 // leaveCh is used to signal that the server is leaving the cluster 212 // and trying to shed its RPC traffic onto other Consul servers. This 213 // is only ever closed. 214 leaveCh chan struct{} 215 216 // router is used to map out Consul servers in the WAN and in Consul 217 // Enterprise user-defined areas. 218 router *router.Router 219 220 // Listener is used to listen for incoming connections 221 Listener net.Listener 222 rpcServer *rpc.Server 223 224 // rpcTLS is the TLS config for incoming TLS requests 225 rpcTLS *tls.Config 226 227 // serfLAN is the Serf cluster maintained inside the DC 228 // which contains all the DC nodes 229 serfLAN *serf.Serf 230 231 // segmentLAN maps segment names to their Serf cluster 232 segmentLAN map[string]*serf.Serf 233 234 // serfWAN is the Serf cluster maintained between DC's 235 // which SHOULD only consist of Consul servers 236 serfWAN *serf.Serf 237 238 // serverLookup tracks server consuls in the local datacenter. 239 // Used to do leader forwarding and provide fast lookup by server id and address 240 serverLookup *ServerLookup 241 242 // floodLock controls access to floodCh. 243 floodLock sync.RWMutex 244 floodCh []chan struct{} 245 246 // sessionTimers track the expiration time of each Session that has 247 // a TTL. On expiration, a SessionDestroy event will occur, and 248 // destroy the session via standard session destroy processing 249 sessionTimers *SessionTimers 250 251 // statsFetcher is used by autopilot to check the status of the other 252 // Consul router. 253 statsFetcher *StatsFetcher 254 255 // reassertLeaderCh is used to signal the leader loop should re-run 256 // leadership actions after a snapshot restore. 257 reassertLeaderCh chan chan error 258 259 // tombstoneGC is used to track the pending GC invocations 260 // for the KV tombstones 261 tombstoneGC *state.TombstoneGC 262 263 // aclReplicationStatus (and its associated lock) provide information 264 // about the health of the ACL replication goroutine. 265 aclReplicationStatus structs.ACLReplicationStatus 266 aclReplicationStatusLock sync.RWMutex 267 268 // shutdown and the associated members here are used in orchestrating 269 // a clean shutdown. The shutdownCh is never written to, only closed to 270 // indicate a shutdown has been initiated. 271 shutdown bool 272 shutdownCh chan struct{} 273 shutdownLock sync.Mutex 274 275 blockingQueriesLock sync.RWMutex 276 blockingQueries map[string]*blockingQueryState 277 278 // embedded struct to hold all the enterprise specific data 279 EnterpriseServer 280 } 281 282 // NewServer is only used to help setting up a server for testing. Normal code 283 // exercises NewServerLogger. 284 func NewServer(config *Config) (*Server, error) { 285 c, err := tlsutil.NewConfigurator(config.ToTLSUtilConfig(), nil) 286 if err != nil { 287 return nil, err 288 } 289 return NewServerLogger(config, nil, new(token.Store), c) 290 } 291 292 // NewServerLogger is used to construct a new Consul server from the 293 // configuration, potentially returning an error 294 func NewServerLogger(config *Config, logger *log.Logger, tokens *token.Store, tlsConfigurator *tlsutil.Configurator) (*Server, error) { 295 // Check the protocol version. 296 if err := config.CheckProtocolVersion(); err != nil { 297 return nil, err 298 } 299 300 // Check for a data directory. 301 if config.DataDir == "" && !config.DevMode { 302 return nil, fmt.Errorf("Config must provide a DataDir") 303 } 304 305 // Sanity check the ACLs. 306 if err := config.CheckACL(); err != nil { 307 return nil, err 308 } 309 310 // Ensure we have a log output and create a logger. 311 if config.LogOutput == nil { 312 config.LogOutput = os.Stderr 313 } 314 if logger == nil { 315 logger = log.New(config.LogOutput, "", log.LstdFlags) 316 } 317 318 // Check if TLS is enabled 319 if config.CAFile != "" || config.CAPath != "" { 320 config.UseTLS = true 321 } 322 323 // Set the primary DC if it wasn't set. 324 if config.PrimaryDatacenter == "" { 325 if config.ACLDatacenter != "" { 326 config.PrimaryDatacenter = config.ACLDatacenter 327 } else { 328 config.PrimaryDatacenter = config.Datacenter 329 } 330 } 331 332 // Create the tombstone GC. 333 gc, err := state.NewTombstoneGC(config.TombstoneTTL, config.TombstoneTTLGranularity) 334 if err != nil { 335 return nil, err 336 } 337 338 // Create the shutdown channel - this is closed but never written to. 339 shutdownCh := make(chan struct{}) 340 341 connPool := &pool.ConnPool{ 342 SrcAddr: config.RPCSrcAddr, 343 LogOutput: config.LogOutput, 344 MaxTime: serverRPCCache, 345 MaxStreams: serverMaxStreams, 346 TLSWrapper: tlsConfigurator.OutgoingRPCWrapper(), 347 ForceTLS: config.VerifyOutgoing, 348 } 349 350 // Create server. 351 s := &Server{ 352 config: config, 353 tokens: tokens, 354 connPool: connPool, 355 eventChLAN: make(chan serf.Event, serfEventChSize), 356 eventChWAN: make(chan serf.Event, serfEventChSize), 357 logger: logger, 358 leaveCh: make(chan struct{}), 359 reconcileCh: make(chan serf.Member, reconcileChSize), 360 router: router.NewRouter(logger, config.Datacenter), 361 rpcServer: rpc.NewServer(), 362 rpcTLS: tlsConfigurator.IncomingRPCConfig(), 363 reassertLeaderCh: make(chan chan error), 364 segmentLAN: make(map[string]*serf.Serf, len(config.Segments)), 365 sessionTimers: NewSessionTimers(), 366 tombstoneGC: gc, 367 serverLookup: NewServerLookup(), 368 blockingQueries: make(map[string]*blockingQueryState), 369 shutdownCh: shutdownCh, 370 } 371 372 // Initialize enterprise specific server functionality 373 if err := s.initEnterprise(); err != nil { 374 s.Shutdown() 375 return nil, err 376 } 377 378 // Initialize the stats fetcher that autopilot will use. 379 s.statsFetcher = NewStatsFetcher(logger, s.connPool, s.config.Datacenter) 380 381 s.sentinel = sentinel.New(logger) 382 s.useNewACLs = 0 383 aclConfig := ACLResolverConfig{ 384 Config: config, 385 Delegate: s, 386 CacheConfig: serverACLCacheConfig, 387 AutoDisable: false, 388 Logger: logger, 389 Sentinel: s.sentinel, 390 } 391 // Initialize the ACL resolver. 392 if s.acls, err = NewACLResolver(&aclConfig); err != nil { 393 s.Shutdown() 394 return nil, fmt.Errorf("Failed to create ACL resolver: %v", err) 395 } 396 397 // Initialize the RPC layer. 398 if err := s.setupRPC(tlsConfigurator.OutgoingRPCWrapper()); err != nil { 399 s.Shutdown() 400 return nil, fmt.Errorf("Failed to start RPC layer: %v", err) 401 } 402 403 // Initialize any extra RPC listeners for segments. 404 segmentListeners, err := s.setupSegmentRPC() 405 if err != nil { 406 s.Shutdown() 407 return nil, fmt.Errorf("Failed to start segment RPC layer: %v", err) 408 } 409 410 // Initialize the Raft server. 411 if err := s.setupRaft(); err != nil { 412 s.Shutdown() 413 return nil, fmt.Errorf("Failed to start Raft: %v", err) 414 } 415 416 // Serf and dynamic bind ports 417 // 418 // The LAN serf cluster announces the port of the WAN serf cluster 419 // which creates a race when the WAN cluster is supposed to bind to 420 // a dynamic port (port 0). The current memberlist implementation will 421 // update the bind port in the configuration after the memberlist is 422 // created, so we can pull it out from there reliably, even though it's 423 // a little gross to be reading the updated config. 424 425 // Initialize the WAN Serf if enabled 426 serfBindPortWAN := -1 427 if config.SerfWANConfig != nil { 428 serfBindPortWAN = config.SerfWANConfig.MemberlistConfig.BindPort 429 s.serfWAN, err = s.setupSerf(config.SerfWANConfig, s.eventChWAN, serfWANSnapshot, true, serfBindPortWAN, "", s.Listener) 430 if err != nil { 431 s.Shutdown() 432 return nil, fmt.Errorf("Failed to start WAN Serf: %v", err) 433 } 434 // See big comment above why we are doing this. 435 if serfBindPortWAN == 0 { 436 serfBindPortWAN = config.SerfWANConfig.MemberlistConfig.BindPort 437 if serfBindPortWAN == 0 { 438 return nil, fmt.Errorf("Failed to get dynamic bind port for WAN Serf") 439 } 440 s.logger.Printf("[INFO] agent: Serf WAN TCP bound to port %d", serfBindPortWAN) 441 } 442 } 443 444 // Initialize the LAN segments before the default LAN Serf so we have 445 // updated port information to publish there. 446 if err := s.setupSegments(config, serfBindPortWAN, segmentListeners); err != nil { 447 s.Shutdown() 448 return nil, fmt.Errorf("Failed to setup network segments: %v", err) 449 } 450 451 // Initialize the LAN Serf for the default network segment. 452 s.serfLAN, err = s.setupSerf(config.SerfLANConfig, s.eventChLAN, serfLANSnapshot, false, serfBindPortWAN, "", s.Listener) 453 if err != nil { 454 s.Shutdown() 455 return nil, fmt.Errorf("Failed to start LAN Serf: %v", err) 456 } 457 go s.lanEventHandler() 458 459 // Start the flooders after the LAN event handler is wired up. 460 s.floodSegments(config) 461 462 // Add a "static route" to the WAN Serf and hook it up to Serf events. 463 if s.serfWAN != nil { 464 if err := s.router.AddArea(types.AreaWAN, s.serfWAN, s.connPool, s.config.VerifyOutgoing); err != nil { 465 s.Shutdown() 466 return nil, fmt.Errorf("Failed to add WAN serf route: %v", err) 467 } 468 go router.HandleSerfEvents(s.logger, s.router, types.AreaWAN, s.serfWAN.ShutdownCh(), s.eventChWAN) 469 470 // Fire up the LAN <-> WAN join flooder. 471 portFn := func(s *metadata.Server) (int, bool) { 472 if s.WanJoinPort > 0 { 473 return s.WanJoinPort, true 474 } 475 return 0, false 476 } 477 go s.Flood(nil, portFn, s.serfWAN) 478 } 479 480 // Start enterprise specific functionality 481 if err := s.startEnterprise(); err != nil { 482 s.Shutdown() 483 return nil, err 484 } 485 486 // Initialize Autopilot. This must happen before starting leadership monitoring 487 // as establishing leadership could attempt to use autopilot and cause a panic. 488 s.initAutopilot(config) 489 490 // Start monitoring leadership. This must happen after Serf is set up 491 // since it can fire events when leadership is obtained. 492 go s.monitorLeadership() 493 494 // Start listening for RPC requests. 495 go s.listen(s.Listener) 496 497 // Start listeners for any segments with separate RPC listeners. 498 for _, listener := range segmentListeners { 499 go s.listen(listener) 500 } 501 502 // Start the metrics handlers. 503 go s.sessionStats() 504 505 return s, nil 506 } 507 508 // setupRaft is used to setup and initialize Raft 509 func (s *Server) setupRaft() error { 510 // If we have an unclean exit then attempt to close the Raft store. 511 defer func() { 512 if s.raft == nil && s.raftStore != nil { 513 if err := s.raftStore.Close(); err != nil { 514 s.logger.Printf("[ERR] consul: failed to close Raft store: %v", err) 515 } 516 } 517 }() 518 519 // Create the FSM. 520 var err error 521 s.fsm, err = fsm.New(s.tombstoneGC, s.config.WatchSoftLimit, s.config.LogOutput) 522 if err != nil { 523 return err 524 } 525 526 var serverAddressProvider raft.ServerAddressProvider = nil 527 if s.config.RaftConfig.ProtocolVersion >= 3 { //ServerAddressProvider needs server ids to work correctly, which is only supported in protocol version 3 or higher 528 serverAddressProvider = s.serverLookup 529 } 530 531 // Create a transport layer. 532 transConfig := &raft.NetworkTransportConfig{ 533 Stream: s.raftLayer, 534 MaxPool: 3, 535 Timeout: 10 * time.Second, 536 ServerAddressProvider: serverAddressProvider, 537 Logger: s.logger, 538 } 539 540 trans := raft.NewNetworkTransportWithConfig(transConfig) 541 s.raftTransport = trans 542 543 // Make sure we set the LogOutput. 544 s.config.RaftConfig.LogOutput = s.config.LogOutput 545 s.config.RaftConfig.Logger = s.logger 546 547 // Versions of the Raft protocol below 3 require the LocalID to match the network 548 // address of the transport. 549 s.config.RaftConfig.LocalID = raft.ServerID(trans.LocalAddr()) 550 if s.config.RaftConfig.ProtocolVersion >= 3 { 551 s.config.RaftConfig.LocalID = raft.ServerID(s.config.NodeID) 552 } 553 554 // Build an all in-memory setup for dev mode, otherwise prepare a full 555 // disk-based setup. 556 var log raft.LogStore 557 var stable raft.StableStore 558 var snap raft.SnapshotStore 559 if s.config.DevMode { 560 store := raft.NewInmemStore() 561 s.raftInmem = store 562 stable = store 563 log = store 564 snap = raft.NewInmemSnapshotStore() 565 } else { 566 // Create the base raft path. 567 path := filepath.Join(s.config.DataDir, raftState) 568 if err := lib.EnsurePath(path, true); err != nil { 569 return err 570 } 571 572 // Create the backend raft store for logs and stable storage. 573 store, err := raftboltdb.NewBoltStore(filepath.Join(path, "raft.db")) 574 if err != nil { 575 return err 576 } 577 s.raftStore = store 578 stable = store 579 580 // Wrap the store in a LogCache to improve performance. 581 cacheStore, err := raft.NewLogCache(raftLogCacheSize, store) 582 if err != nil { 583 return err 584 } 585 log = cacheStore 586 587 // Create the snapshot store. 588 snapshots, err := raft.NewFileSnapshotStore(path, snapshotsRetained, s.config.LogOutput) 589 if err != nil { 590 return err 591 } 592 snap = snapshots 593 594 // For an existing cluster being upgraded to the new version of 595 // Raft, we almost never want to run recovery based on the old 596 // peers.json file. We create a peers.info file with a helpful 597 // note about where peers.json went, and use that as a sentinel 598 // to avoid ingesting the old one that first time (if we have to 599 // create the peers.info file because it's not there, we also 600 // blow away any existing peers.json file). 601 peersFile := filepath.Join(path, "peers.json") 602 peersInfoFile := filepath.Join(path, "peers.info") 603 if _, err := os.Stat(peersInfoFile); os.IsNotExist(err) { 604 if err := ioutil.WriteFile(peersInfoFile, []byte(peersInfoContent), 0755); err != nil { 605 return fmt.Errorf("failed to write peers.info file: %v", err) 606 } 607 608 // Blow away the peers.json file if present, since the 609 // peers.info sentinel wasn't there. 610 if _, err := os.Stat(peersFile); err == nil { 611 if err := os.Remove(peersFile); err != nil { 612 return fmt.Errorf("failed to delete peers.json, please delete manually (see peers.info for details): %v", err) 613 } 614 s.logger.Printf("[INFO] consul: deleted peers.json file (see peers.info for details)") 615 } 616 } else if _, err := os.Stat(peersFile); err == nil { 617 s.logger.Printf("[INFO] consul: found peers.json file, recovering Raft configuration...") 618 619 var configuration raft.Configuration 620 if s.config.RaftConfig.ProtocolVersion < 3 { 621 configuration, err = raft.ReadPeersJSON(peersFile) 622 } else { 623 configuration, err = raft.ReadConfigJSON(peersFile) 624 } 625 if err != nil { 626 return fmt.Errorf("recovery failed to parse peers.json: %v", err) 627 } 628 629 tmpFsm, err := fsm.New(s.tombstoneGC, s.config.WatchSoftLimit, s.config.LogOutput) 630 if err != nil { 631 return fmt.Errorf("recovery failed to make temp FSM: %v", err) 632 } 633 if err := raft.RecoverCluster(s.config.RaftConfig, tmpFsm, 634 log, stable, snap, trans, configuration); err != nil { 635 return fmt.Errorf("recovery failed: %v", err) 636 } 637 638 if err := os.Remove(peersFile); err != nil { 639 return fmt.Errorf("recovery failed to delete peers.json, please delete manually (see peers.info for details): %v", err) 640 } 641 s.logger.Printf("[INFO] consul: deleted peers.json file after successful recovery") 642 } 643 } 644 645 // If we are in bootstrap or dev mode and the state is clean then we can 646 // bootstrap now. 647 if s.config.Bootstrap || s.config.DevMode { 648 hasState, err := raft.HasExistingState(log, stable, snap) 649 if err != nil { 650 return err 651 } 652 if !hasState { 653 configuration := raft.Configuration{ 654 Servers: []raft.Server{ 655 raft.Server{ 656 ID: s.config.RaftConfig.LocalID, 657 Address: trans.LocalAddr(), 658 }, 659 }, 660 } 661 if err := raft.BootstrapCluster(s.config.RaftConfig, 662 log, stable, snap, trans, configuration); err != nil { 663 return err 664 } 665 } 666 } 667 668 // Set up a channel for reliable leader notifications. 669 raftNotifyCh := make(chan bool, 1) 670 s.config.RaftConfig.NotifyCh = raftNotifyCh 671 s.raftNotifyCh = raftNotifyCh 672 673 // Setup the Raft store. 674 s.raft, err = raft.NewRaft(s.config.RaftConfig, s.fsm, log, stable, snap, trans) 675 if err != nil { 676 return err 677 } 678 return nil 679 } 680 681 // endpointFactory is a function that returns an RPC endpoint bound to the given 682 // server. 683 type factory func(s *Server) interface{} 684 685 // endpoints is a list of registered RPC endpoint factories. 686 var endpoints []factory 687 688 // registerEndpoint registers a new RPC endpoint factory. 689 func registerEndpoint(fn factory) { 690 endpoints = append(endpoints, fn) 691 } 692 693 // setupRPC is used to setup the RPC listener 694 func (s *Server) setupRPC(tlsWrap tlsutil.DCWrapper) error { 695 for _, fn := range endpoints { 696 s.rpcServer.Register(fn(s)) 697 } 698 699 ln, err := net.ListenTCP("tcp", s.config.RPCAddr) 700 if err != nil { 701 return err 702 } 703 s.Listener = ln 704 if s.config.NotifyListen != nil { 705 s.config.NotifyListen() 706 } 707 // todo(fs): we should probably guard this 708 if s.config.RPCAdvertise == nil { 709 s.config.RPCAdvertise = ln.Addr().(*net.TCPAddr) 710 } 711 712 // Verify that we have a usable advertise address 713 if s.config.RPCAdvertise.IP.IsUnspecified() { 714 ln.Close() 715 return fmt.Errorf("RPC advertise address is not advertisable: %v", s.config.RPCAdvertise) 716 } 717 718 // Provide a DC specific wrapper. Raft replication is only 719 // ever done in the same datacenter, so we can provide it as a constant. 720 wrapper := tlsutil.SpecificDC(s.config.Datacenter, tlsWrap) 721 722 // Define a callback for determining whether to wrap a connection with TLS 723 tlsFunc := func(address raft.ServerAddress) bool { 724 if s.config.VerifyOutgoing { 725 return true 726 } 727 728 server := s.serverLookup.Server(address) 729 730 if server == nil { 731 return false 732 } 733 734 return server.UseTLS 735 } 736 s.raftLayer = NewRaftLayer(s.config.RPCSrcAddr, s.config.RPCAdvertise, wrapper, tlsFunc) 737 return nil 738 } 739 740 // Shutdown is used to shutdown the server 741 func (s *Server) Shutdown() error { 742 s.logger.Printf("[INFO] consul: shutting down server") 743 s.shutdownLock.Lock() 744 defer s.shutdownLock.Unlock() 745 746 if s.shutdown { 747 return nil 748 } 749 750 s.shutdown = true 751 close(s.shutdownCh) 752 753 if s.serfLAN != nil { 754 s.serfLAN.Shutdown() 755 } 756 757 if s.serfWAN != nil { 758 s.serfWAN.Shutdown() 759 if err := s.router.RemoveArea(types.AreaWAN); err != nil { 760 s.logger.Printf("[WARN] consul: error removing WAN area: %v", err) 761 } 762 } 763 s.router.Shutdown() 764 765 if s.raft != nil { 766 s.raftTransport.Close() 767 s.raftLayer.Close() 768 future := s.raft.Shutdown() 769 if err := future.Error(); err != nil { 770 s.logger.Printf("[WARN] consul: error shutting down raft: %s", err) 771 } 772 if s.raftStore != nil { 773 s.raftStore.Close() 774 } 775 } 776 777 if s.Listener != nil { 778 s.Listener.Close() 779 } 780 781 // Close the connection pool 782 s.connPool.Shutdown() 783 784 return nil 785 } 786 787 // Leave is used to prepare for a graceful shutdown of the server 788 func (s *Server) Leave() error { 789 s.logger.Printf("[INFO] consul: server starting leave") 790 791 // Check the number of known peers 792 numPeers, err := s.numPeers() 793 if err != nil { 794 s.logger.Printf("[ERR] consul: failed to check raft peers: %v", err) 795 return err 796 } 797 798 addr := s.raftTransport.LocalAddr() 799 800 // If we are the current leader, and we have any other peers (cluster has multiple 801 // servers), we should do a RemoveServer/RemovePeer to safely reduce the quorum size. 802 // If we are not the leader, then we should issue our leave intention and wait to be 803 // removed for some sane period of time. 804 isLeader := s.IsLeader() 805 if isLeader && numPeers > 1 { 806 minRaftProtocol, err := s.autopilot.MinRaftProtocol() 807 if err != nil { 808 return err 809 } 810 811 if minRaftProtocol >= 2 && s.config.RaftConfig.ProtocolVersion >= 3 { 812 future := s.raft.RemoveServer(raft.ServerID(s.config.NodeID), 0, 0) 813 if err := future.Error(); err != nil { 814 s.logger.Printf("[ERR] consul: failed to remove ourself as raft peer: %v", err) 815 } 816 } else { 817 future := s.raft.RemovePeer(addr) 818 if err := future.Error(); err != nil { 819 s.logger.Printf("[ERR] consul: failed to remove ourself as raft peer: %v", err) 820 } 821 } 822 } 823 824 // Leave the WAN pool 825 if s.serfWAN != nil { 826 if err := s.serfWAN.Leave(); err != nil { 827 s.logger.Printf("[ERR] consul: failed to leave WAN Serf cluster: %v", err) 828 } 829 } 830 831 // Leave the LAN pool 832 if s.serfLAN != nil { 833 if err := s.serfLAN.Leave(); err != nil { 834 s.logger.Printf("[ERR] consul: failed to leave LAN Serf cluster: %v", err) 835 } 836 } 837 838 // Start refusing RPCs now that we've left the LAN pool. It's important 839 // to do this *after* we've left the LAN pool so that clients will know 840 // to shift onto another server if they perform a retry. We also wake up 841 // all queries in the RPC retry state. 842 s.logger.Printf("[INFO] consul: Waiting %s to drain RPC traffic", s.config.LeaveDrainTime) 843 close(s.leaveCh) 844 time.Sleep(s.config.LeaveDrainTime) 845 846 // If we were not leader, wait to be safely removed from the cluster. We 847 // must wait to allow the raft replication to take place, otherwise an 848 // immediate shutdown could cause a loss of quorum. 849 if !isLeader { 850 left := false 851 limit := time.Now().Add(raftRemoveGracePeriod) 852 for !left && time.Now().Before(limit) { 853 // Sleep a while before we check. 854 time.Sleep(50 * time.Millisecond) 855 856 // Get the latest configuration. 857 future := s.raft.GetConfiguration() 858 if err := future.Error(); err != nil { 859 s.logger.Printf("[ERR] consul: failed to get raft configuration: %v", err) 860 break 861 } 862 863 // See if we are no longer included. 864 left = true 865 for _, server := range future.Configuration().Servers { 866 if server.Address == addr { 867 left = false 868 break 869 } 870 } 871 } 872 873 // TODO (slackpad) With the old Raft library we used to force the 874 // peers set to empty when a graceful leave occurred. This would 875 // keep voting spam down if the server was restarted, but it was 876 // dangerous because the peers was inconsistent with the logs and 877 // snapshots, so it wasn't really safe in all cases for the server 878 // to become leader. This is now safe, but the log spam is noisy. 879 // The next new version of the library will have a "you are not a 880 // peer stop it" behavior that should address this. We will have 881 // to evaluate during the RC period if this interim situation is 882 // not too confusing for operators. 883 884 // TODO (slackpad) When we take a later new version of the Raft 885 // library it won't try to complete replication, so this peer 886 // may not realize that it has been removed. Need to revisit this 887 // and the warning here. 888 if !left { 889 s.logger.Printf("[WARN] consul: failed to leave raft configuration gracefully, timeout") 890 } 891 } 892 893 return nil 894 } 895 896 // numPeers is used to check on the number of known peers, including potentially 897 // the local node. We count only voters, since others can't actually become 898 // leader, so aren't considered peers. 899 func (s *Server) numPeers() (int, error) { 900 future := s.raft.GetConfiguration() 901 if err := future.Error(); err != nil { 902 return 0, err 903 } 904 905 return autopilot.NumPeers(future.Configuration()), nil 906 } 907 908 // JoinLAN is used to have Consul join the inner-DC pool 909 // The target address should be another node inside the DC 910 // listening on the Serf LAN address 911 func (s *Server) JoinLAN(addrs []string) (int, error) { 912 return s.serfLAN.Join(addrs, true) 913 } 914 915 // JoinWAN is used to have Consul join the cross-WAN Consul ring 916 // The target address should be another node listening on the 917 // Serf WAN address 918 func (s *Server) JoinWAN(addrs []string) (int, error) { 919 if s.serfWAN == nil { 920 return 0, ErrWANFederationDisabled 921 } 922 return s.serfWAN.Join(addrs, true) 923 } 924 925 // LocalMember is used to return the local node 926 func (s *Server) LocalMember() serf.Member { 927 return s.serfLAN.LocalMember() 928 } 929 930 // LANMembers is used to return the members of the LAN cluster 931 func (s *Server) LANMembers() []serf.Member { 932 return s.serfLAN.Members() 933 } 934 935 // WANMembers is used to return the members of the LAN cluster 936 func (s *Server) WANMembers() []serf.Member { 937 if s.serfWAN == nil { 938 return nil 939 } 940 return s.serfWAN.Members() 941 } 942 943 // RemoveFailedNode is used to remove a failed node from the cluster 944 func (s *Server) RemoveFailedNode(node string) error { 945 if err := s.serfLAN.RemoveFailedNode(node); err != nil { 946 return err 947 } 948 if s.serfWAN != nil { 949 if err := s.serfWAN.RemoveFailedNode(node); err != nil { 950 return err 951 } 952 } 953 return nil 954 } 955 956 // IsLeader checks if this server is the cluster leader 957 func (s *Server) IsLeader() bool { 958 return s.raft.State() == raft.Leader 959 } 960 961 // KeyManagerLAN returns the LAN Serf keyring manager 962 func (s *Server) KeyManagerLAN() *serf.KeyManager { 963 return s.serfLAN.KeyManager() 964 } 965 966 // KeyManagerWAN returns the WAN Serf keyring manager 967 func (s *Server) KeyManagerWAN() *serf.KeyManager { 968 return s.serfWAN.KeyManager() 969 } 970 971 // Encrypted determines if gossip is encrypted 972 func (s *Server) Encrypted() bool { 973 LANEncrypted := s.serfLAN.EncryptionEnabled() 974 if s.serfWAN == nil { 975 return LANEncrypted 976 } 977 return LANEncrypted && s.serfWAN.EncryptionEnabled() 978 } 979 980 // LANSegments returns a map of LAN segments by name 981 func (s *Server) LANSegments() map[string]*serf.Serf { 982 segments := make(map[string]*serf.Serf, len(s.segmentLAN)+1) 983 segments[""] = s.serfLAN 984 for name, segment := range s.segmentLAN { 985 segments[name] = segment 986 } 987 988 return segments 989 } 990 991 // inmemCodec is used to do an RPC call without going over a network 992 type inmemCodec struct { 993 method string 994 args interface{} 995 reply interface{} 996 err error 997 } 998 999 func (i *inmemCodec) ReadRequestHeader(req *rpc.Request) error { 1000 req.ServiceMethod = i.method 1001 return nil 1002 } 1003 1004 func (i *inmemCodec) ReadRequestBody(args interface{}) error { 1005 sourceValue := reflect.Indirect(reflect.Indirect(reflect.ValueOf(i.args))) 1006 dst := reflect.Indirect(reflect.Indirect(reflect.ValueOf(args))) 1007 dst.Set(sourceValue) 1008 return nil 1009 } 1010 1011 func (i *inmemCodec) WriteResponse(resp *rpc.Response, reply interface{}) error { 1012 if resp.Error != "" { 1013 i.err = errors.New(resp.Error) 1014 return nil 1015 } 1016 sourceValue := reflect.Indirect(reflect.Indirect(reflect.ValueOf(reply))) 1017 dst := reflect.Indirect(reflect.Indirect(reflect.ValueOf(i.reply))) 1018 dst.Set(sourceValue) 1019 return nil 1020 } 1021 1022 func (i *inmemCodec) Close() error { 1023 return nil 1024 } 1025 1026 // RPC is used to make a local RPC call 1027 func (s *Server) RPC(method string, args interface{}, reply interface{}) error { 1028 codec := &inmemCodec{ 1029 method: method, 1030 args: args, 1031 reply: reply, 1032 } 1033 if err := s.rpcServer.ServeRequest(codec); err != nil { 1034 return err 1035 } 1036 return codec.err 1037 } 1038 1039 // SnapshotRPC dispatches the given snapshot request, reading from the streaming 1040 // input and writing to the streaming output depending on the operation. 1041 func (s *Server) SnapshotRPC(args *structs.SnapshotRequest, in io.Reader, out io.Writer, 1042 replyFn structs.SnapshotReplyFn) error { 1043 1044 // Perform the operation. 1045 var reply structs.SnapshotResponse 1046 snap, err := s.dispatchSnapshotRequest(args, in, &reply) 1047 if err != nil { 1048 return err 1049 } 1050 defer func() { 1051 if err := snap.Close(); err != nil { 1052 s.logger.Printf("[ERR] consul: Failed to close snapshot: %v", err) 1053 } 1054 }() 1055 1056 // Let the caller peek at the reply. 1057 if replyFn != nil { 1058 if err := replyFn(&reply); err != nil { 1059 return nil 1060 } 1061 } 1062 1063 // Stream the snapshot. 1064 if out != nil { 1065 if _, err := io.Copy(out, snap); err != nil { 1066 return fmt.Errorf("failed to stream snapshot: %v", err) 1067 } 1068 } 1069 return nil 1070 } 1071 1072 // RegisterEndpoint is used to substitute an endpoint for testing. 1073 func (s *Server) RegisterEndpoint(name string, handler interface{}) error { 1074 s.logger.Printf("[WARN] consul: endpoint injected; this should only be used for testing") 1075 return s.rpcServer.RegisterName(name, handler) 1076 } 1077 1078 // Stats is used to return statistics for debugging and insight 1079 // for various sub-systems 1080 func (s *Server) Stats() map[string]map[string]string { 1081 toString := func(v uint64) string { 1082 return strconv.FormatUint(v, 10) 1083 } 1084 numKnownDCs := len(s.router.GetDatacenters()) 1085 stats := map[string]map[string]string{ 1086 "consul": map[string]string{ 1087 "server": "true", 1088 "leader": fmt.Sprintf("%v", s.IsLeader()), 1089 "leader_addr": string(s.raft.Leader()), 1090 "bootstrap": fmt.Sprintf("%v", s.config.Bootstrap), 1091 "known_datacenters": toString(uint64(numKnownDCs)), 1092 }, 1093 "raft": s.raft.Stats(), 1094 "serf_lan": s.serfLAN.Stats(), 1095 "runtime": runtimeStats(), 1096 } 1097 1098 if s.ACLsEnabled() { 1099 if s.UseLegacyACLs() { 1100 stats["consul"]["acl"] = "legacy" 1101 } else { 1102 stats["consul"]["acl"] = "enabled" 1103 } 1104 } else { 1105 stats["consul"]["acl"] = "disabled" 1106 } 1107 1108 if s.serfWAN != nil { 1109 stats["serf_wan"] = s.serfWAN.Stats() 1110 } 1111 1112 for outerKey, outerValue := range s.enterpriseStats() { 1113 if _, ok := stats[outerKey]; ok { 1114 for innerKey, innerValue := range outerValue { 1115 stats[outerKey][innerKey] = innerValue 1116 } 1117 } else { 1118 stats[outerKey] = outerValue 1119 } 1120 } 1121 1122 return stats 1123 } 1124 1125 // GetLANCoordinate returns the coordinate of the server in the LAN gossip pool. 1126 func (s *Server) GetLANCoordinate() (lib.CoordinateSet, error) { 1127 lan, err := s.serfLAN.GetCoordinate() 1128 if err != nil { 1129 return nil, err 1130 } 1131 1132 cs := lib.CoordinateSet{"": lan} 1133 for name, segment := range s.segmentLAN { 1134 c, err := segment.GetCoordinate() 1135 if err != nil { 1136 return nil, err 1137 } 1138 cs[name] = c 1139 } 1140 return cs, nil 1141 } 1142 1143 // ReloadConfig is used to have the Server do an online reload of 1144 // relevant configuration information 1145 func (s *Server) ReloadConfig(config *Config) error { 1146 return nil 1147 } 1148 1149 // Atomically sets a readiness state flag when leadership is obtained, to indicate that server is past its barrier write 1150 func (s *Server) setConsistentReadReady() { 1151 atomic.StoreInt32(&s.readyForConsistentReads, 1) 1152 } 1153 1154 // Atomically reset readiness state flag on leadership revoke 1155 func (s *Server) resetConsistentReadReady() { 1156 atomic.StoreInt32(&s.readyForConsistentReads, 0) 1157 } 1158 1159 // Returns true if this server is ready to serve consistent reads 1160 func (s *Server) isReadyForConsistentReads() bool { 1161 return atomic.LoadInt32(&s.readyForConsistentReads) == 1 1162 } 1163 1164 // peersInfoContent is used to help operators understand what happened to the 1165 // peers.json file. This is written to a file called peers.info in the same 1166 // location. 1167 const peersInfoContent = ` 1168 As of Consul 0.7.0, the peers.json file is only used for recovery 1169 after an outage. The format of this file depends on what the server has 1170 configured for its Raft protocol version. Please see the agent configuration 1171 page at https://www.consul.io/docs/agent/options.html#_raft_protocol for more 1172 details about this parameter. 1173 1174 For Raft protocol version 2 and earlier, this should be formatted as a JSON 1175 array containing the address and port of each Consul server in the cluster, like 1176 this: 1177 1178 [ 1179 "10.1.0.1:8300", 1180 "10.1.0.2:8300", 1181 "10.1.0.3:8300" 1182 ] 1183 1184 For Raft protocol version 3 and later, this should be formatted as a JSON 1185 array containing the node ID, address:port, and suffrage information of each 1186 Consul server in the cluster, like this: 1187 1188 [ 1189 { 1190 "id": "adf4238a-882b-9ddc-4a9d-5b6758e4159e", 1191 "address": "10.1.0.1:8300", 1192 "non_voter": false 1193 }, 1194 { 1195 "id": "8b6dda82-3103-11e7-93ae-92361f002671", 1196 "address": "10.1.0.2:8300", 1197 "non_voter": false 1198 }, 1199 { 1200 "id": "97e17742-3103-11e7-93ae-92361f002671", 1201 "address": "10.1.0.3:8300", 1202 "non_voter": false 1203 } 1204 ] 1205 1206 The "id" field is the node ID of the server. This can be found in the logs when 1207 the server starts up, or in the "node-id" file inside the server's data 1208 directory. 1209 1210 The "address" field is the address and port of the server. 1211 1212 The "non_voter" field controls whether the server is a non-voter, which is used 1213 in some advanced Autopilot configurations, please see 1214 https://www.consul.io/docs/guides/autopilot.html for more information. If 1215 "non_voter" is omitted it will default to false, which is typical for most 1216 clusters. 1217 1218 Under normal operation, the peers.json file will not be present. 1219 1220 When Consul starts for the first time, it will create this peers.info file and 1221 delete any existing peers.json file so that recovery doesn't occur on the first 1222 startup. 1223 1224 Once this peers.info file is present, any peers.json file will be ingested at 1225 startup, and will set the Raft peer configuration manually to recover from an 1226 outage. It's crucial that all servers in the cluster are shut down before 1227 creating the peers.json file, and that all servers receive the same 1228 configuration. Once the peers.json file is successfully ingested and applied, it 1229 will be deleted. 1230 1231 Please see https://www.consul.io/docs/guides/outage.html for more information. 1232 `