github.imxd.top/hashicorp/consul@v1.4.5/agent/consul/server.go (about) 1 package consul 2 3 import ( 4 "context" 5 "crypto/tls" 6 "errors" 7 "fmt" 8 "io" 9 "io/ioutil" 10 "log" 11 "net" 12 "net/rpc" 13 "os" 14 "path/filepath" 15 "reflect" 16 "strconv" 17 "sync" 18 "sync/atomic" 19 "time" 20 21 ca "github.com/hashicorp/consul/agent/connect/ca" 22 "github.com/hashicorp/consul/agent/consul/autopilot" 23 "github.com/hashicorp/consul/agent/consul/fsm" 24 "github.com/hashicorp/consul/agent/consul/state" 25 "github.com/hashicorp/consul/agent/metadata" 26 "github.com/hashicorp/consul/agent/pool" 27 "github.com/hashicorp/consul/agent/router" 28 "github.com/hashicorp/consul/agent/structs" 29 "github.com/hashicorp/consul/agent/token" 30 "github.com/hashicorp/consul/lib" 31 "github.com/hashicorp/consul/sentinel" 32 "github.com/hashicorp/consul/tlsutil" 33 "github.com/hashicorp/consul/types" 34 "github.com/hashicorp/raft" 35 raftboltdb "github.com/hashicorp/raft-boltdb" 36 "github.com/hashicorp/serf/serf" 37 ) 38 39 // These are the protocol versions that Consul can _understand_. These are 40 // Consul-level protocol versions, that are used to configure the Serf 41 // protocol versions. 42 const ( 43 ProtocolVersionMin uint8 = 2 44 45 // Version 3 added support for network coordinates but we kept the 46 // default protocol version at 2 to ease the transition to this new 47 // feature. A Consul agent speaking version 2 of the protocol will 48 // attempt to send its coordinates to a server who understands version 49 // 3 or greater. 50 ProtocolVersion2Compatible = 2 51 52 ProtocolVersionMax = 3 53 ) 54 55 const ( 56 serfLANSnapshot = "serf/local.snapshot" 57 serfWANSnapshot = "serf/remote.snapshot" 58 raftState = "raft/" 59 snapshotsRetained = 2 60 61 // serverRPCCache controls how long we keep an idle connection 62 // open to a server 63 serverRPCCache = 2 * time.Minute 64 65 // serverMaxStreams controls how many idle streams we keep 66 // open to a server 67 serverMaxStreams = 64 68 69 // raftLogCacheSize is the maximum number of logs to cache in-memory. 70 // This is used to reduce disk I/O for the recently committed entries. 71 raftLogCacheSize = 512 72 73 // raftRemoveGracePeriod is how long we wait to allow a RemovePeer 74 // to replicate to gracefully leave the cluster. 75 raftRemoveGracePeriod = 5 * time.Second 76 77 // serfEventChSize is the size of the buffered channel to get Serf 78 // events. If this is exhausted we will block Serf and Memberlist. 79 serfEventChSize = 2048 80 81 // reconcileChSize is the size of the buffered channel reconcile updates 82 // from Serf with the Catalog. If this is exhausted we will drop updates, 83 // and wait for a periodic reconcile. 84 reconcileChSize = 256 85 ) 86 87 var ( 88 ErrWANFederationDisabled = fmt.Errorf("WAN Federation is disabled") 89 ) 90 91 // Server is Consul server which manages the service discovery, 92 // health checking, DC forwarding, Raft, and multiple Serf pools. 93 type Server struct { 94 // sentinel is the Sentinel code engine (can be nil). 95 sentinel sentinel.Evaluator 96 97 // acls is used to resolve tokens to effective policies 98 acls *ACLResolver 99 100 // aclUpgradeCancel is used to cancel the ACL upgrade goroutine when we 101 // lose leadership 102 aclUpgradeCancel context.CancelFunc 103 aclUpgradeLock sync.RWMutex 104 aclUpgradeEnabled bool 105 106 // aclReplicationCancel is used to shut down the ACL replication goroutine 107 // when we lose leadership 108 aclReplicationCancel context.CancelFunc 109 aclReplicationLock sync.RWMutex 110 aclReplicationEnabled bool 111 112 // DEPRECATED (ACL-Legacy-Compat) - only needed while we support both 113 // useNewACLs is used to determine whether we can use new ACLs or not 114 useNewACLs int32 115 116 // autopilot is the Autopilot instance for this server. 117 autopilot *autopilot.Autopilot 118 119 // autopilotWaitGroup is used to block until Autopilot shuts down. 120 autopilotWaitGroup sync.WaitGroup 121 122 // caProvider is the current CA provider in use for Connect. This is 123 // only non-nil when we are the leader. 124 caProvider ca.Provider 125 // caProviderRoot is the CARoot that was stored along with the ca.Provider 126 // active. It's only updated in lock-step with the caProvider. This prevents 127 // races between state updates to active roots and the fetch of the provider 128 // instance. 129 caProviderRoot *structs.CARoot 130 caProviderLock sync.RWMutex 131 132 // caPruningCh is used to shut down the CA root pruning goroutine when we 133 // lose leadership. 134 caPruningCh chan struct{} 135 caPruningLock sync.RWMutex 136 caPruningEnabled bool 137 138 // Consul configuration 139 config *Config 140 141 // tokens holds ACL tokens initially from the configuration, but can 142 // be updated at runtime, so should always be used instead of going to 143 // the configuration directly. 144 tokens *token.Store 145 146 // Connection pool to other consul servers 147 connPool *pool.ConnPool 148 149 // eventChLAN is used to receive events from the 150 // serf cluster in the datacenter 151 eventChLAN chan serf.Event 152 153 // eventChWAN is used to receive events from the 154 // serf cluster that spans datacenters 155 eventChWAN chan serf.Event 156 157 // fsm is the state machine used with Raft to provide 158 // strong consistency. 159 fsm *fsm.FSM 160 161 // Logger uses the provided LogOutput 162 logger *log.Logger 163 164 // The raft instance is used among Consul nodes within the DC to protect 165 // operations that require strong consistency. 166 // the state directly. 167 raft *raft.Raft 168 raftLayer *RaftLayer 169 raftStore *raftboltdb.BoltStore 170 raftTransport *raft.NetworkTransport 171 raftInmem *raft.InmemStore 172 173 // raftNotifyCh is set up by setupRaft() and ensures that we get reliable leader 174 // transition notifications from the Raft layer. 175 raftNotifyCh <-chan bool 176 177 // reconcileCh is used to pass events from the serf handler 178 // into the leader manager, so that the strong state can be 179 // updated 180 reconcileCh chan serf.Member 181 182 // readyForConsistentReads is used to track when the leader server is 183 // ready to serve consistent reads, after it has applied its initial 184 // barrier. This is updated atomically. 185 readyForConsistentReads int32 186 187 // leaveCh is used to signal that the server is leaving the cluster 188 // and trying to shed its RPC traffic onto other Consul servers. This 189 // is only ever closed. 190 leaveCh chan struct{} 191 192 // router is used to map out Consul servers in the WAN and in Consul 193 // Enterprise user-defined areas. 194 router *router.Router 195 196 // Listener is used to listen for incoming connections 197 Listener net.Listener 198 rpcServer *rpc.Server 199 200 // rpcTLS is the TLS config for incoming TLS requests 201 rpcTLS *tls.Config 202 203 // serfLAN is the Serf cluster maintained inside the DC 204 // which contains all the DC nodes 205 serfLAN *serf.Serf 206 207 // segmentLAN maps segment names to their Serf cluster 208 segmentLAN map[string]*serf.Serf 209 210 // serfWAN is the Serf cluster maintained between DC's 211 // which SHOULD only consist of Consul servers 212 serfWAN *serf.Serf 213 214 // serverLookup tracks server consuls in the local datacenter. 215 // Used to do leader forwarding and provide fast lookup by server id and address 216 serverLookup *ServerLookup 217 218 // floodLock controls access to floodCh. 219 floodLock sync.RWMutex 220 floodCh []chan struct{} 221 222 // sessionTimers track the expiration time of each Session that has 223 // a TTL. On expiration, a SessionDestroy event will occur, and 224 // destroy the session via standard session destroy processing 225 sessionTimers *SessionTimers 226 227 // statsFetcher is used by autopilot to check the status of the other 228 // Consul router. 229 statsFetcher *StatsFetcher 230 231 // reassertLeaderCh is used to signal the leader loop should re-run 232 // leadership actions after a snapshot restore. 233 reassertLeaderCh chan chan error 234 235 // tombstoneGC is used to track the pending GC invocations 236 // for the KV tombstones 237 tombstoneGC *state.TombstoneGC 238 239 // aclReplicationStatus (and its associated lock) provide information 240 // about the health of the ACL replication goroutine. 241 aclReplicationStatus structs.ACLReplicationStatus 242 aclReplicationStatusLock sync.RWMutex 243 244 // shutdown and the associated members here are used in orchestrating 245 // a clean shutdown. The shutdownCh is never written to, only closed to 246 // indicate a shutdown has been initiated. 247 shutdown bool 248 shutdownCh chan struct{} 249 shutdownLock sync.Mutex 250 251 // embedded struct to hold all the enterprise specific data 252 EnterpriseServer 253 } 254 255 // NewServer is only used to help setting up a server for testing. Normal code 256 // exercises NewServerLogger. 257 func NewServer(config *Config) (*Server, error) { 258 c, err := tlsutil.NewConfigurator(config.ToTLSUtilConfig(), nil) 259 if err != nil { 260 return nil, err 261 } 262 return NewServerLogger(config, nil, new(token.Store), c) 263 } 264 265 // NewServerLogger is used to construct a new Consul server from the 266 // configuration, potentially returning an error 267 func NewServerLogger(config *Config, logger *log.Logger, tokens *token.Store, tlsConfigurator *tlsutil.Configurator) (*Server, error) { 268 // Check the protocol version. 269 if err := config.CheckProtocolVersion(); err != nil { 270 return nil, err 271 } 272 273 // Check for a data directory. 274 if config.DataDir == "" && !config.DevMode { 275 return nil, fmt.Errorf("Config must provide a DataDir") 276 } 277 278 // Sanity check the ACLs. 279 if err := config.CheckACL(); err != nil { 280 return nil, err 281 } 282 283 // Ensure we have a log output and create a logger. 284 if config.LogOutput == nil { 285 config.LogOutput = os.Stderr 286 } 287 if logger == nil { 288 logger = log.New(config.LogOutput, "", log.LstdFlags) 289 } 290 291 // Check if TLS is enabled 292 if config.CAFile != "" || config.CAPath != "" { 293 config.UseTLS = true 294 } 295 296 // Set the primary DC if it wasn't set. 297 if config.PrimaryDatacenter == "" { 298 if config.ACLDatacenter != "" { 299 config.PrimaryDatacenter = config.ACLDatacenter 300 } else { 301 config.PrimaryDatacenter = config.Datacenter 302 } 303 } 304 305 // Create the tombstone GC. 306 gc, err := state.NewTombstoneGC(config.TombstoneTTL, config.TombstoneTTLGranularity) 307 if err != nil { 308 return nil, err 309 } 310 311 // Create the shutdown channel - this is closed but never written to. 312 shutdownCh := make(chan struct{}) 313 314 connPool := &pool.ConnPool{ 315 SrcAddr: config.RPCSrcAddr, 316 LogOutput: config.LogOutput, 317 MaxTime: serverRPCCache, 318 MaxStreams: serverMaxStreams, 319 TLSWrapper: tlsConfigurator.OutgoingRPCWrapper(), 320 ForceTLS: config.VerifyOutgoing, 321 } 322 323 // Create server. 324 s := &Server{ 325 config: config, 326 tokens: tokens, 327 connPool: connPool, 328 eventChLAN: make(chan serf.Event, serfEventChSize), 329 eventChWAN: make(chan serf.Event, serfEventChSize), 330 logger: logger, 331 leaveCh: make(chan struct{}), 332 reconcileCh: make(chan serf.Member, reconcileChSize), 333 router: router.NewRouter(logger, config.Datacenter), 334 rpcServer: rpc.NewServer(), 335 rpcTLS: tlsConfigurator.IncomingRPCConfig(), 336 reassertLeaderCh: make(chan chan error), 337 segmentLAN: make(map[string]*serf.Serf, len(config.Segments)), 338 sessionTimers: NewSessionTimers(), 339 tombstoneGC: gc, 340 serverLookup: NewServerLookup(), 341 shutdownCh: shutdownCh, 342 } 343 344 // Initialize enterprise specific server functionality 345 if err := s.initEnterprise(); err != nil { 346 s.Shutdown() 347 return nil, err 348 } 349 350 // Initialize the stats fetcher that autopilot will use. 351 s.statsFetcher = NewStatsFetcher(logger, s.connPool, s.config.Datacenter) 352 353 s.sentinel = sentinel.New(logger) 354 s.useNewACLs = 0 355 aclConfig := ACLResolverConfig{ 356 Config: config, 357 Delegate: s, 358 CacheConfig: serverACLCacheConfig, 359 AutoDisable: false, 360 Logger: logger, 361 Sentinel: s.sentinel, 362 } 363 // Initialize the ACL resolver. 364 if s.acls, err = NewACLResolver(&aclConfig); err != nil { 365 s.Shutdown() 366 return nil, fmt.Errorf("Failed to create ACL resolver: %v", err) 367 } 368 369 // Initialize the RPC layer. 370 if err := s.setupRPC(tlsConfigurator.OutgoingRPCWrapper()); err != nil { 371 s.Shutdown() 372 return nil, fmt.Errorf("Failed to start RPC layer: %v", err) 373 } 374 375 // Initialize any extra RPC listeners for segments. 376 segmentListeners, err := s.setupSegmentRPC() 377 if err != nil { 378 s.Shutdown() 379 return nil, fmt.Errorf("Failed to start segment RPC layer: %v", err) 380 } 381 382 // Initialize the Raft server. 383 if err := s.setupRaft(); err != nil { 384 s.Shutdown() 385 return nil, fmt.Errorf("Failed to start Raft: %v", err) 386 } 387 388 // Serf and dynamic bind ports 389 // 390 // The LAN serf cluster announces the port of the WAN serf cluster 391 // which creates a race when the WAN cluster is supposed to bind to 392 // a dynamic port (port 0). The current memberlist implementation will 393 // update the bind port in the configuration after the memberlist is 394 // created, so we can pull it out from there reliably, even though it's 395 // a little gross to be reading the updated config. 396 397 // Initialize the WAN Serf if enabled 398 serfBindPortWAN := -1 399 if config.SerfWANConfig != nil { 400 serfBindPortWAN = config.SerfWANConfig.MemberlistConfig.BindPort 401 s.serfWAN, err = s.setupSerf(config.SerfWANConfig, s.eventChWAN, serfWANSnapshot, true, serfBindPortWAN, "", s.Listener) 402 if err != nil { 403 s.Shutdown() 404 return nil, fmt.Errorf("Failed to start WAN Serf: %v", err) 405 } 406 // See big comment above why we are doing this. 407 if serfBindPortWAN == 0 { 408 serfBindPortWAN = config.SerfWANConfig.MemberlistConfig.BindPort 409 if serfBindPortWAN == 0 { 410 return nil, fmt.Errorf("Failed to get dynamic bind port for WAN Serf") 411 } 412 s.logger.Printf("[INFO] agent: Serf WAN TCP bound to port %d", serfBindPortWAN) 413 } 414 } 415 416 // Initialize the LAN segments before the default LAN Serf so we have 417 // updated port information to publish there. 418 if err := s.setupSegments(config, serfBindPortWAN, segmentListeners); err != nil { 419 s.Shutdown() 420 return nil, fmt.Errorf("Failed to setup network segments: %v", err) 421 } 422 423 // Initialize the LAN Serf for the default network segment. 424 s.serfLAN, err = s.setupSerf(config.SerfLANConfig, s.eventChLAN, serfLANSnapshot, false, serfBindPortWAN, "", s.Listener) 425 if err != nil { 426 s.Shutdown() 427 return nil, fmt.Errorf("Failed to start LAN Serf: %v", err) 428 } 429 go s.lanEventHandler() 430 431 // Start the flooders after the LAN event handler is wired up. 432 s.floodSegments(config) 433 434 // Add a "static route" to the WAN Serf and hook it up to Serf events. 435 if s.serfWAN != nil { 436 if err := s.router.AddArea(types.AreaWAN, s.serfWAN, s.connPool, s.config.VerifyOutgoing); err != nil { 437 s.Shutdown() 438 return nil, fmt.Errorf("Failed to add WAN serf route: %v", err) 439 } 440 go router.HandleSerfEvents(s.logger, s.router, types.AreaWAN, s.serfWAN.ShutdownCh(), s.eventChWAN) 441 442 // Fire up the LAN <-> WAN join flooder. 443 portFn := func(s *metadata.Server) (int, bool) { 444 if s.WanJoinPort > 0 { 445 return s.WanJoinPort, true 446 } 447 return 0, false 448 } 449 go s.Flood(nil, portFn, s.serfWAN) 450 } 451 452 // Start enterprise specific functionality 453 if err := s.startEnterprise(); err != nil { 454 s.Shutdown() 455 return nil, err 456 } 457 458 // Initialize Autopilot. This must happen before starting leadership monitoring 459 // as establishing leadership could attempt to use autopilot and cause a panic. 460 s.initAutopilot(config) 461 462 // Start monitoring leadership. This must happen after Serf is set up 463 // since it can fire events when leadership is obtained. 464 go s.monitorLeadership() 465 466 // Start listening for RPC requests. 467 go s.listen(s.Listener) 468 469 // Start listeners for any segments with separate RPC listeners. 470 for _, listener := range segmentListeners { 471 go s.listen(listener) 472 } 473 474 // Start the metrics handlers. 475 go s.sessionStats() 476 477 return s, nil 478 } 479 480 // setupRaft is used to setup and initialize Raft 481 func (s *Server) setupRaft() error { 482 // If we have an unclean exit then attempt to close the Raft store. 483 defer func() { 484 if s.raft == nil && s.raftStore != nil { 485 if err := s.raftStore.Close(); err != nil { 486 s.logger.Printf("[ERR] consul: failed to close Raft store: %v", err) 487 } 488 } 489 }() 490 491 // Create the FSM. 492 var err error 493 s.fsm, err = fsm.New(s.tombstoneGC, s.config.LogOutput) 494 if err != nil { 495 return err 496 } 497 498 var serverAddressProvider raft.ServerAddressProvider = nil 499 if s.config.RaftConfig.ProtocolVersion >= 3 { //ServerAddressProvider needs server ids to work correctly, which is only supported in protocol version 3 or higher 500 serverAddressProvider = s.serverLookup 501 } 502 503 // Create a transport layer. 504 transConfig := &raft.NetworkTransportConfig{ 505 Stream: s.raftLayer, 506 MaxPool: 3, 507 Timeout: 10 * time.Second, 508 ServerAddressProvider: serverAddressProvider, 509 Logger: s.logger, 510 } 511 512 trans := raft.NewNetworkTransportWithConfig(transConfig) 513 s.raftTransport = trans 514 515 // Make sure we set the LogOutput. 516 s.config.RaftConfig.LogOutput = s.config.LogOutput 517 s.config.RaftConfig.Logger = s.logger 518 519 // Versions of the Raft protocol below 3 require the LocalID to match the network 520 // address of the transport. 521 s.config.RaftConfig.LocalID = raft.ServerID(trans.LocalAddr()) 522 if s.config.RaftConfig.ProtocolVersion >= 3 { 523 s.config.RaftConfig.LocalID = raft.ServerID(s.config.NodeID) 524 } 525 526 // Build an all in-memory setup for dev mode, otherwise prepare a full 527 // disk-based setup. 528 var log raft.LogStore 529 var stable raft.StableStore 530 var snap raft.SnapshotStore 531 if s.config.DevMode { 532 store := raft.NewInmemStore() 533 s.raftInmem = store 534 stable = store 535 log = store 536 snap = raft.NewInmemSnapshotStore() 537 } else { 538 // Create the base raft path. 539 path := filepath.Join(s.config.DataDir, raftState) 540 if err := lib.EnsurePath(path, true); err != nil { 541 return err 542 } 543 544 // Create the backend raft store for logs and stable storage. 545 store, err := raftboltdb.NewBoltStore(filepath.Join(path, "raft.db")) 546 if err != nil { 547 return err 548 } 549 s.raftStore = store 550 stable = store 551 552 // Wrap the store in a LogCache to improve performance. 553 cacheStore, err := raft.NewLogCache(raftLogCacheSize, store) 554 if err != nil { 555 return err 556 } 557 log = cacheStore 558 559 // Create the snapshot store. 560 snapshots, err := raft.NewFileSnapshotStore(path, snapshotsRetained, s.config.LogOutput) 561 if err != nil { 562 return err 563 } 564 snap = snapshots 565 566 // For an existing cluster being upgraded to the new version of 567 // Raft, we almost never want to run recovery based on the old 568 // peers.json file. We create a peers.info file with a helpful 569 // note about where peers.json went, and use that as a sentinel 570 // to avoid ingesting the old one that first time (if we have to 571 // create the peers.info file because it's not there, we also 572 // blow away any existing peers.json file). 573 peersFile := filepath.Join(path, "peers.json") 574 peersInfoFile := filepath.Join(path, "peers.info") 575 if _, err := os.Stat(peersInfoFile); os.IsNotExist(err) { 576 if err := ioutil.WriteFile(peersInfoFile, []byte(peersInfoContent), 0755); err != nil { 577 return fmt.Errorf("failed to write peers.info file: %v", err) 578 } 579 580 // Blow away the peers.json file if present, since the 581 // peers.info sentinel wasn't there. 582 if _, err := os.Stat(peersFile); err == nil { 583 if err := os.Remove(peersFile); err != nil { 584 return fmt.Errorf("failed to delete peers.json, please delete manually (see peers.info for details): %v", err) 585 } 586 s.logger.Printf("[INFO] consul: deleted peers.json file (see peers.info for details)") 587 } 588 } else if _, err := os.Stat(peersFile); err == nil { 589 s.logger.Printf("[INFO] consul: found peers.json file, recovering Raft configuration...") 590 591 var configuration raft.Configuration 592 if s.config.RaftConfig.ProtocolVersion < 3 { 593 configuration, err = raft.ReadPeersJSON(peersFile) 594 } else { 595 configuration, err = raft.ReadConfigJSON(peersFile) 596 } 597 if err != nil { 598 return fmt.Errorf("recovery failed to parse peers.json: %v", err) 599 } 600 601 tmpFsm, err := fsm.New(s.tombstoneGC, s.config.LogOutput) 602 if err != nil { 603 return fmt.Errorf("recovery failed to make temp FSM: %v", err) 604 } 605 if err := raft.RecoverCluster(s.config.RaftConfig, tmpFsm, 606 log, stable, snap, trans, configuration); err != nil { 607 return fmt.Errorf("recovery failed: %v", err) 608 } 609 610 if err := os.Remove(peersFile); err != nil { 611 return fmt.Errorf("recovery failed to delete peers.json, please delete manually (see peers.info for details): %v", err) 612 } 613 s.logger.Printf("[INFO] consul: deleted peers.json file after successful recovery") 614 } 615 } 616 617 // If we are in bootstrap or dev mode and the state is clean then we can 618 // bootstrap now. 619 if s.config.Bootstrap || s.config.DevMode { 620 hasState, err := raft.HasExistingState(log, stable, snap) 621 if err != nil { 622 return err 623 } 624 if !hasState { 625 configuration := raft.Configuration{ 626 Servers: []raft.Server{ 627 raft.Server{ 628 ID: s.config.RaftConfig.LocalID, 629 Address: trans.LocalAddr(), 630 }, 631 }, 632 } 633 if err := raft.BootstrapCluster(s.config.RaftConfig, 634 log, stable, snap, trans, configuration); err != nil { 635 return err 636 } 637 } 638 } 639 640 // Set up a channel for reliable leader notifications. 641 raftNotifyCh := make(chan bool, 1) 642 s.config.RaftConfig.NotifyCh = raftNotifyCh 643 s.raftNotifyCh = raftNotifyCh 644 645 // Setup the Raft store. 646 s.raft, err = raft.NewRaft(s.config.RaftConfig, s.fsm, log, stable, snap, trans) 647 if err != nil { 648 return err 649 } 650 return nil 651 } 652 653 // endpointFactory is a function that returns an RPC endpoint bound to the given 654 // server. 655 type factory func(s *Server) interface{} 656 657 // endpoints is a list of registered RPC endpoint factories. 658 var endpoints []factory 659 660 // registerEndpoint registers a new RPC endpoint factory. 661 func registerEndpoint(fn factory) { 662 endpoints = append(endpoints, fn) 663 } 664 665 // setupRPC is used to setup the RPC listener 666 func (s *Server) setupRPC(tlsWrap tlsutil.DCWrapper) error { 667 for _, fn := range endpoints { 668 s.rpcServer.Register(fn(s)) 669 } 670 671 ln, err := net.ListenTCP("tcp", s.config.RPCAddr) 672 if err != nil { 673 return err 674 } 675 s.Listener = ln 676 if s.config.NotifyListen != nil { 677 s.config.NotifyListen() 678 } 679 // todo(fs): we should probably guard this 680 if s.config.RPCAdvertise == nil { 681 s.config.RPCAdvertise = ln.Addr().(*net.TCPAddr) 682 } 683 684 // Verify that we have a usable advertise address 685 if s.config.RPCAdvertise.IP.IsUnspecified() { 686 ln.Close() 687 return fmt.Errorf("RPC advertise address is not advertisable: %v", s.config.RPCAdvertise) 688 } 689 690 // Provide a DC specific wrapper. Raft replication is only 691 // ever done in the same datacenter, so we can provide it as a constant. 692 wrapper := tlsutil.SpecificDC(s.config.Datacenter, tlsWrap) 693 694 // Define a callback for determining whether to wrap a connection with TLS 695 tlsFunc := func(address raft.ServerAddress) bool { 696 if s.config.VerifyOutgoing { 697 return true 698 } 699 700 server := s.serverLookup.Server(address) 701 702 if server == nil { 703 return false 704 } 705 706 return server.UseTLS 707 } 708 s.raftLayer = NewRaftLayer(s.config.RPCSrcAddr, s.config.RPCAdvertise, wrapper, tlsFunc) 709 return nil 710 } 711 712 // Shutdown is used to shutdown the server 713 func (s *Server) Shutdown() error { 714 s.logger.Printf("[INFO] consul: shutting down server") 715 s.shutdownLock.Lock() 716 defer s.shutdownLock.Unlock() 717 718 if s.shutdown { 719 return nil 720 } 721 722 s.shutdown = true 723 close(s.shutdownCh) 724 725 if s.serfLAN != nil { 726 s.serfLAN.Shutdown() 727 } 728 729 if s.serfWAN != nil { 730 s.serfWAN.Shutdown() 731 if err := s.router.RemoveArea(types.AreaWAN); err != nil { 732 s.logger.Printf("[WARN] consul: error removing WAN area: %v", err) 733 } 734 } 735 s.router.Shutdown() 736 737 if s.raft != nil { 738 s.raftTransport.Close() 739 s.raftLayer.Close() 740 future := s.raft.Shutdown() 741 if err := future.Error(); err != nil { 742 s.logger.Printf("[WARN] consul: error shutting down raft: %s", err) 743 } 744 if s.raftStore != nil { 745 s.raftStore.Close() 746 } 747 } 748 749 if s.Listener != nil { 750 s.Listener.Close() 751 } 752 753 // Close the connection pool 754 s.connPool.Shutdown() 755 756 return nil 757 } 758 759 // Leave is used to prepare for a graceful shutdown of the server 760 func (s *Server) Leave() error { 761 s.logger.Printf("[INFO] consul: server starting leave") 762 763 // Check the number of known peers 764 numPeers, err := s.numPeers() 765 if err != nil { 766 s.logger.Printf("[ERR] consul: failed to check raft peers: %v", err) 767 return err 768 } 769 770 addr := s.raftTransport.LocalAddr() 771 772 // If we are the current leader, and we have any other peers (cluster has multiple 773 // servers), we should do a RemoveServer/RemovePeer to safely reduce the quorum size. 774 // If we are not the leader, then we should issue our leave intention and wait to be 775 // removed for some sane period of time. 776 isLeader := s.IsLeader() 777 if isLeader && numPeers > 1 { 778 minRaftProtocol, err := s.autopilot.MinRaftProtocol() 779 if err != nil { 780 return err 781 } 782 783 if minRaftProtocol >= 2 && s.config.RaftConfig.ProtocolVersion >= 3 { 784 future := s.raft.RemoveServer(raft.ServerID(s.config.NodeID), 0, 0) 785 if err := future.Error(); err != nil { 786 s.logger.Printf("[ERR] consul: failed to remove ourself as raft peer: %v", err) 787 } 788 } else { 789 future := s.raft.RemovePeer(addr) 790 if err := future.Error(); err != nil { 791 s.logger.Printf("[ERR] consul: failed to remove ourself as raft peer: %v", err) 792 } 793 } 794 } 795 796 // Leave the WAN pool 797 if s.serfWAN != nil { 798 if err := s.serfWAN.Leave(); err != nil { 799 s.logger.Printf("[ERR] consul: failed to leave WAN Serf cluster: %v", err) 800 } 801 } 802 803 // Leave the LAN pool 804 if s.serfLAN != nil { 805 if err := s.serfLAN.Leave(); err != nil { 806 s.logger.Printf("[ERR] consul: failed to leave LAN Serf cluster: %v", err) 807 } 808 } 809 810 // Start refusing RPCs now that we've left the LAN pool. It's important 811 // to do this *after* we've left the LAN pool so that clients will know 812 // to shift onto another server if they perform a retry. We also wake up 813 // all queries in the RPC retry state. 814 s.logger.Printf("[INFO] consul: Waiting %s to drain RPC traffic", s.config.LeaveDrainTime) 815 close(s.leaveCh) 816 time.Sleep(s.config.LeaveDrainTime) 817 818 // If we were not leader, wait to be safely removed from the cluster. We 819 // must wait to allow the raft replication to take place, otherwise an 820 // immediate shutdown could cause a loss of quorum. 821 if !isLeader { 822 left := false 823 limit := time.Now().Add(raftRemoveGracePeriod) 824 for !left && time.Now().Before(limit) { 825 // Sleep a while before we check. 826 time.Sleep(50 * time.Millisecond) 827 828 // Get the latest configuration. 829 future := s.raft.GetConfiguration() 830 if err := future.Error(); err != nil { 831 s.logger.Printf("[ERR] consul: failed to get raft configuration: %v", err) 832 break 833 } 834 835 // See if we are no longer included. 836 left = true 837 for _, server := range future.Configuration().Servers { 838 if server.Address == addr { 839 left = false 840 break 841 } 842 } 843 } 844 845 // TODO (slackpad) With the old Raft library we used to force the 846 // peers set to empty when a graceful leave occurred. This would 847 // keep voting spam down if the server was restarted, but it was 848 // dangerous because the peers was inconsistent with the logs and 849 // snapshots, so it wasn't really safe in all cases for the server 850 // to become leader. This is now safe, but the log spam is noisy. 851 // The next new version of the library will have a "you are not a 852 // peer stop it" behavior that should address this. We will have 853 // to evaluate during the RC period if this interim situation is 854 // not too confusing for operators. 855 856 // TODO (slackpad) When we take a later new version of the Raft 857 // library it won't try to complete replication, so this peer 858 // may not realize that it has been removed. Need to revisit this 859 // and the warning here. 860 if !left { 861 s.logger.Printf("[WARN] consul: failed to leave raft configuration gracefully, timeout") 862 } 863 } 864 865 return nil 866 } 867 868 // numPeers is used to check on the number of known peers, including potentially 869 // the local node. We count only voters, since others can't actually become 870 // leader, so aren't considered peers. 871 func (s *Server) numPeers() (int, error) { 872 future := s.raft.GetConfiguration() 873 if err := future.Error(); err != nil { 874 return 0, err 875 } 876 877 return autopilot.NumPeers(future.Configuration()), nil 878 } 879 880 // JoinLAN is used to have Consul join the inner-DC pool 881 // The target address should be another node inside the DC 882 // listening on the Serf LAN address 883 func (s *Server) JoinLAN(addrs []string) (int, error) { 884 return s.serfLAN.Join(addrs, true) 885 } 886 887 // JoinWAN is used to have Consul join the cross-WAN Consul ring 888 // The target address should be another node listening on the 889 // Serf WAN address 890 func (s *Server) JoinWAN(addrs []string) (int, error) { 891 if s.serfWAN == nil { 892 return 0, ErrWANFederationDisabled 893 } 894 return s.serfWAN.Join(addrs, true) 895 } 896 897 // LocalMember is used to return the local node 898 func (s *Server) LocalMember() serf.Member { 899 return s.serfLAN.LocalMember() 900 } 901 902 // LANMembers is used to return the members of the LAN cluster 903 func (s *Server) LANMembers() []serf.Member { 904 return s.serfLAN.Members() 905 } 906 907 // WANMembers is used to return the members of the LAN cluster 908 func (s *Server) WANMembers() []serf.Member { 909 if s.serfWAN == nil { 910 return nil 911 } 912 return s.serfWAN.Members() 913 } 914 915 // RemoveFailedNode is used to remove a failed node from the cluster 916 func (s *Server) RemoveFailedNode(node string) error { 917 if err := s.serfLAN.RemoveFailedNode(node); err != nil { 918 return err 919 } 920 if s.serfWAN != nil { 921 if err := s.serfWAN.RemoveFailedNode(node); err != nil { 922 return err 923 } 924 } 925 return nil 926 } 927 928 // IsLeader checks if this server is the cluster leader 929 func (s *Server) IsLeader() bool { 930 return s.raft.State() == raft.Leader 931 } 932 933 // KeyManagerLAN returns the LAN Serf keyring manager 934 func (s *Server) KeyManagerLAN() *serf.KeyManager { 935 return s.serfLAN.KeyManager() 936 } 937 938 // KeyManagerWAN returns the WAN Serf keyring manager 939 func (s *Server) KeyManagerWAN() *serf.KeyManager { 940 return s.serfWAN.KeyManager() 941 } 942 943 // Encrypted determines if gossip is encrypted 944 func (s *Server) Encrypted() bool { 945 LANEncrypted := s.serfLAN.EncryptionEnabled() 946 if s.serfWAN == nil { 947 return LANEncrypted 948 } 949 return LANEncrypted && s.serfWAN.EncryptionEnabled() 950 } 951 952 // LANSegments returns a map of LAN segments by name 953 func (s *Server) LANSegments() map[string]*serf.Serf { 954 segments := make(map[string]*serf.Serf, len(s.segmentLAN)+1) 955 segments[""] = s.serfLAN 956 for name, segment := range s.segmentLAN { 957 segments[name] = segment 958 } 959 960 return segments 961 } 962 963 // inmemCodec is used to do an RPC call without going over a network 964 type inmemCodec struct { 965 method string 966 args interface{} 967 reply interface{} 968 err error 969 } 970 971 func (i *inmemCodec) ReadRequestHeader(req *rpc.Request) error { 972 req.ServiceMethod = i.method 973 return nil 974 } 975 976 func (i *inmemCodec) ReadRequestBody(args interface{}) error { 977 sourceValue := reflect.Indirect(reflect.Indirect(reflect.ValueOf(i.args))) 978 dst := reflect.Indirect(reflect.Indirect(reflect.ValueOf(args))) 979 dst.Set(sourceValue) 980 return nil 981 } 982 983 func (i *inmemCodec) WriteResponse(resp *rpc.Response, reply interface{}) error { 984 if resp.Error != "" { 985 i.err = errors.New(resp.Error) 986 return nil 987 } 988 sourceValue := reflect.Indirect(reflect.Indirect(reflect.ValueOf(reply))) 989 dst := reflect.Indirect(reflect.Indirect(reflect.ValueOf(i.reply))) 990 dst.Set(sourceValue) 991 return nil 992 } 993 994 func (i *inmemCodec) Close() error { 995 return nil 996 } 997 998 // RPC is used to make a local RPC call 999 func (s *Server) RPC(method string, args interface{}, reply interface{}) error { 1000 codec := &inmemCodec{ 1001 method: method, 1002 args: args, 1003 reply: reply, 1004 } 1005 if err := s.rpcServer.ServeRequest(codec); err != nil { 1006 return err 1007 } 1008 return codec.err 1009 } 1010 1011 // SnapshotRPC dispatches the given snapshot request, reading from the streaming 1012 // input and writing to the streaming output depending on the operation. 1013 func (s *Server) SnapshotRPC(args *structs.SnapshotRequest, in io.Reader, out io.Writer, 1014 replyFn structs.SnapshotReplyFn) error { 1015 1016 // Perform the operation. 1017 var reply structs.SnapshotResponse 1018 snap, err := s.dispatchSnapshotRequest(args, in, &reply) 1019 if err != nil { 1020 return err 1021 } 1022 defer func() { 1023 if err := snap.Close(); err != nil { 1024 s.logger.Printf("[ERR] consul: Failed to close snapshot: %v", err) 1025 } 1026 }() 1027 1028 // Let the caller peek at the reply. 1029 if replyFn != nil { 1030 if err := replyFn(&reply); err != nil { 1031 return nil 1032 } 1033 } 1034 1035 // Stream the snapshot. 1036 if out != nil { 1037 if _, err := io.Copy(out, snap); err != nil { 1038 return fmt.Errorf("failed to stream snapshot: %v", err) 1039 } 1040 } 1041 return nil 1042 } 1043 1044 // RegisterEndpoint is used to substitute an endpoint for testing. 1045 func (s *Server) RegisterEndpoint(name string, handler interface{}) error { 1046 s.logger.Printf("[WARN] consul: endpoint injected; this should only be used for testing") 1047 return s.rpcServer.RegisterName(name, handler) 1048 } 1049 1050 // Stats is used to return statistics for debugging and insight 1051 // for various sub-systems 1052 func (s *Server) Stats() map[string]map[string]string { 1053 toString := func(v uint64) string { 1054 return strconv.FormatUint(v, 10) 1055 } 1056 numKnownDCs := len(s.router.GetDatacenters()) 1057 stats := map[string]map[string]string{ 1058 "consul": map[string]string{ 1059 "server": "true", 1060 "leader": fmt.Sprintf("%v", s.IsLeader()), 1061 "leader_addr": string(s.raft.Leader()), 1062 "bootstrap": fmt.Sprintf("%v", s.config.Bootstrap), 1063 "known_datacenters": toString(uint64(numKnownDCs)), 1064 }, 1065 "raft": s.raft.Stats(), 1066 "serf_lan": s.serfLAN.Stats(), 1067 "runtime": runtimeStats(), 1068 } 1069 1070 if s.ACLsEnabled() { 1071 if s.UseLegacyACLs() { 1072 stats["consul"]["acl"] = "legacy" 1073 } else { 1074 stats["consul"]["acl"] = "enabled" 1075 } 1076 } else { 1077 stats["consul"]["acl"] = "disabled" 1078 } 1079 1080 if s.serfWAN != nil { 1081 stats["serf_wan"] = s.serfWAN.Stats() 1082 } 1083 1084 for outerKey, outerValue := range s.enterpriseStats() { 1085 if _, ok := stats[outerKey]; ok { 1086 for innerKey, innerValue := range outerValue { 1087 stats[outerKey][innerKey] = innerValue 1088 } 1089 } else { 1090 stats[outerKey] = outerValue 1091 } 1092 } 1093 1094 return stats 1095 } 1096 1097 // GetLANCoordinate returns the coordinate of the server in the LAN gossip pool. 1098 func (s *Server) GetLANCoordinate() (lib.CoordinateSet, error) { 1099 lan, err := s.serfLAN.GetCoordinate() 1100 if err != nil { 1101 return nil, err 1102 } 1103 1104 cs := lib.CoordinateSet{"": lan} 1105 for name, segment := range s.segmentLAN { 1106 c, err := segment.GetCoordinate() 1107 if err != nil { 1108 return nil, err 1109 } 1110 cs[name] = c 1111 } 1112 return cs, nil 1113 } 1114 1115 // ReloadConfig is used to have the Server do an online reload of 1116 // relevant configuration information 1117 func (s *Server) ReloadConfig(config *Config) error { 1118 return nil 1119 } 1120 1121 // Atomically sets a readiness state flag when leadership is obtained, to indicate that server is past its barrier write 1122 func (s *Server) setConsistentReadReady() { 1123 atomic.StoreInt32(&s.readyForConsistentReads, 1) 1124 } 1125 1126 // Atomically reset readiness state flag on leadership revoke 1127 func (s *Server) resetConsistentReadReady() { 1128 atomic.StoreInt32(&s.readyForConsistentReads, 0) 1129 } 1130 1131 // Returns true if this server is ready to serve consistent reads 1132 func (s *Server) isReadyForConsistentReads() bool { 1133 return atomic.LoadInt32(&s.readyForConsistentReads) == 1 1134 } 1135 1136 // peersInfoContent is used to help operators understand what happened to the 1137 // peers.json file. This is written to a file called peers.info in the same 1138 // location. 1139 const peersInfoContent = ` 1140 As of Consul 0.7.0, the peers.json file is only used for recovery 1141 after an outage. The format of this file depends on what the server has 1142 configured for its Raft protocol version. Please see the agent configuration 1143 page at https://www.consul.io/docs/agent/options.html#_raft_protocol for more 1144 details about this parameter. 1145 1146 For Raft protocol version 2 and earlier, this should be formatted as a JSON 1147 array containing the address and port of each Consul server in the cluster, like 1148 this: 1149 1150 [ 1151 "10.1.0.1:8300", 1152 "10.1.0.2:8300", 1153 "10.1.0.3:8300" 1154 ] 1155 1156 For Raft protocol version 3 and later, this should be formatted as a JSON 1157 array containing the node ID, address:port, and suffrage information of each 1158 Consul server in the cluster, like this: 1159 1160 [ 1161 { 1162 "id": "adf4238a-882b-9ddc-4a9d-5b6758e4159e", 1163 "address": "10.1.0.1:8300", 1164 "non_voter": false 1165 }, 1166 { 1167 "id": "8b6dda82-3103-11e7-93ae-92361f002671", 1168 "address": "10.1.0.2:8300", 1169 "non_voter": false 1170 }, 1171 { 1172 "id": "97e17742-3103-11e7-93ae-92361f002671", 1173 "address": "10.1.0.3:8300", 1174 "non_voter": false 1175 } 1176 ] 1177 1178 The "id" field is the node ID of the server. This can be found in the logs when 1179 the server starts up, or in the "node-id" file inside the server's data 1180 directory. 1181 1182 The "address" field is the address and port of the server. 1183 1184 The "non_voter" field controls whether the server is a non-voter, which is used 1185 in some advanced Autopilot configurations, please see 1186 https://www.consul.io/docs/guides/autopilot.html for more information. If 1187 "non_voter" is omitted it will default to false, which is typical for most 1188 clusters. 1189 1190 Under normal operation, the peers.json file will not be present. 1191 1192 When Consul starts for the first time, it will create this peers.info file and 1193 delete any existing peers.json file so that recovery doesn't occur on the first 1194 startup. 1195 1196 Once this peers.info file is present, any peers.json file will be ingested at 1197 startup, and will set the Raft peer configuration manually to recover from an 1198 outage. It's crucial that all servers in the cluster are shut down before 1199 creating the peers.json file, and that all servers receive the same 1200 configuration. Once the peers.json file is successfully ingested and applied, it 1201 will be deleted. 1202 1203 Please see https://www.consul.io/docs/guides/outage.html for more information. 1204 `