github.com/Aestek/consul@v1.2.4-0.20190309222502-b2c31e33971a/agent/consul/server.go (about) 1 package consul 2 3 import ( 4 "context" 5 "crypto/tls" 6 "errors" 7 "fmt" 8 "io" 9 "io/ioutil" 10 "log" 11 "net" 12 "net/rpc" 13 "os" 14 "path/filepath" 15 "reflect" 16 "strconv" 17 "sync" 18 "sync/atomic" 19 "time" 20 21 ca "github.com/hashicorp/consul/agent/connect/ca" 22 "github.com/hashicorp/consul/agent/consul/autopilot" 23 "github.com/hashicorp/consul/agent/consul/fsm" 24 "github.com/hashicorp/consul/agent/consul/state" 25 "github.com/hashicorp/consul/agent/metadata" 26 "github.com/hashicorp/consul/agent/pool" 27 "github.com/hashicorp/consul/agent/router" 28 "github.com/hashicorp/consul/agent/structs" 29 "github.com/hashicorp/consul/agent/token" 30 "github.com/hashicorp/consul/lib" 31 "github.com/hashicorp/consul/sentinel" 32 "github.com/hashicorp/consul/tlsutil" 33 "github.com/hashicorp/consul/types" 34 "github.com/hashicorp/raft" 35 raftboltdb "github.com/hashicorp/raft-boltdb" 36 "github.com/hashicorp/serf/serf" 37 ) 38 39 // These are the protocol versions that Consul can _understand_. These are 40 // Consul-level protocol versions, that are used to configure the Serf 41 // protocol versions. 42 const ( 43 ProtocolVersionMin uint8 = 2 44 45 // Version 3 added support for network coordinates but we kept the 46 // default protocol version at 2 to ease the transition to this new 47 // feature. A Consul agent speaking version 2 of the protocol will 48 // attempt to send its coordinates to a server who understands version 49 // 3 or greater. 50 ProtocolVersion2Compatible = 2 51 52 ProtocolVersionMax = 3 53 ) 54 55 const ( 56 serfLANSnapshot = "serf/local.snapshot" 57 serfWANSnapshot = "serf/remote.snapshot" 58 raftState = "raft/" 59 snapshotsRetained = 2 60 61 // serverRPCCache controls how long we keep an idle connection 62 // open to a server 63 serverRPCCache = 2 * time.Minute 64 65 // serverMaxStreams controls how many idle streams we keep 66 // open to a server 67 serverMaxStreams = 64 68 69 // raftLogCacheSize is the maximum number of logs to cache in-memory. 70 // This is used to reduce disk I/O for the recently committed entries. 71 raftLogCacheSize = 512 72 73 // raftRemoveGracePeriod is how long we wait to allow a RemovePeer 74 // to replicate to gracefully leave the cluster. 75 raftRemoveGracePeriod = 5 * time.Second 76 77 // serfEventChSize is the size of the buffered channel to get Serf 78 // events. If this is exhausted we will block Serf and Memberlist. 79 serfEventChSize = 2048 80 81 // reconcileChSize is the size of the buffered channel reconcile updates 82 // from Serf with the Catalog. If this is exhausted we will drop updates, 83 // and wait for a periodic reconcile. 84 reconcileChSize = 256 85 ) 86 87 var ( 88 ErrWANFederationDisabled = fmt.Errorf("WAN Federation is disabled") 89 ) 90 91 // Server is Consul server which manages the service discovery, 92 // health checking, DC forwarding, Raft, and multiple Serf pools. 93 type Server struct { 94 // sentinel is the Sentinel code engine (can be nil). 95 sentinel sentinel.Evaluator 96 97 // acls is used to resolve tokens to effective policies 98 acls *ACLResolver 99 100 // aclUpgradeCancel is used to cancel the ACL upgrade goroutine when we 101 // lose leadership 102 aclUpgradeCancel context.CancelFunc 103 aclUpgradeLock sync.RWMutex 104 aclUpgradeEnabled bool 105 106 // aclReplicationCancel is used to shut down the ACL replication goroutine 107 // when we lose leadership 108 aclReplicationCancel context.CancelFunc 109 aclReplicationLock sync.RWMutex 110 aclReplicationEnabled bool 111 112 // DEPRECATED (ACL-Legacy-Compat) - only needed while we support both 113 // useNewACLs is used to determine whether we can use new ACLs or not 114 useNewACLs int32 115 116 // autopilot is the Autopilot instance for this server. 117 autopilot *autopilot.Autopilot 118 119 // autopilotWaitGroup is used to block until Autopilot shuts down. 120 autopilotWaitGroup sync.WaitGroup 121 122 // caProvider is the current CA provider in use for Connect. This is 123 // only non-nil when we are the leader. 124 caProvider ca.Provider 125 // caProviderRoot is the CARoot that was stored along with the ca.Provider 126 // active. It's only updated in lock-step with the caProvider. This prevents 127 // races between state updates to active roots and the fetch of the provider 128 // instance. 129 caProviderRoot *structs.CARoot 130 caProviderLock sync.RWMutex 131 132 // caPruningCh is used to shut down the CA root pruning goroutine when we 133 // lose leadership. 134 caPruningCh chan struct{} 135 caPruningLock sync.RWMutex 136 caPruningEnabled bool 137 138 // Consul configuration 139 config *Config 140 141 // tokens holds ACL tokens initially from the configuration, but can 142 // be updated at runtime, so should always be used instead of going to 143 // the configuration directly. 144 tokens *token.Store 145 146 // Connection pool to other consul servers 147 connPool *pool.ConnPool 148 149 // eventChLAN is used to receive events from the 150 // serf cluster in the datacenter 151 eventChLAN chan serf.Event 152 153 // eventChWAN is used to receive events from the 154 // serf cluster that spans datacenters 155 eventChWAN chan serf.Event 156 157 // fsm is the state machine used with Raft to provide 158 // strong consistency. 159 fsm *fsm.FSM 160 161 // Logger uses the provided LogOutput 162 logger *log.Logger 163 164 // The raft instance is used among Consul nodes within the DC to protect 165 // operations that require strong consistency. 166 // the state directly. 167 raft *raft.Raft 168 raftLayer *RaftLayer 169 raftStore *raftboltdb.BoltStore 170 raftTransport *raft.NetworkTransport 171 raftInmem *raft.InmemStore 172 173 // raftNotifyCh is set up by setupRaft() and ensures that we get reliable leader 174 // transition notifications from the Raft layer. 175 raftNotifyCh <-chan bool 176 177 // reconcileCh is used to pass events from the serf handler 178 // into the leader manager, so that the strong state can be 179 // updated 180 reconcileCh chan serf.Member 181 182 // readyForConsistentReads is used to track when the leader server is 183 // ready to serve consistent reads, after it has applied its initial 184 // barrier. This is updated atomically. 185 readyForConsistentReads int32 186 187 // leaveCh is used to signal that the server is leaving the cluster 188 // and trying to shed its RPC traffic onto other Consul servers. This 189 // is only ever closed. 190 leaveCh chan struct{} 191 192 // router is used to map out Consul servers in the WAN and in Consul 193 // Enterprise user-defined areas. 194 router *router.Router 195 196 // Listener is used to listen for incoming connections 197 Listener net.Listener 198 rpcServer *rpc.Server 199 200 // rpcTLS is the TLS config for incoming TLS requests 201 rpcTLS *tls.Config 202 203 // serfLAN is the Serf cluster maintained inside the DC 204 // which contains all the DC nodes 205 serfLAN *serf.Serf 206 207 // segmentLAN maps segment names to their Serf cluster 208 segmentLAN map[string]*serf.Serf 209 210 // serfWAN is the Serf cluster maintained between DC's 211 // which SHOULD only consist of Consul servers 212 serfWAN *serf.Serf 213 214 // serverLookup tracks server consuls in the local datacenter. 215 // Used to do leader forwarding and provide fast lookup by server id and address 216 serverLookup *ServerLookup 217 218 // floodLock controls access to floodCh. 219 floodLock sync.RWMutex 220 floodCh []chan struct{} 221 222 // sessionTimers track the expiration time of each Session that has 223 // a TTL. On expiration, a SessionDestroy event will occur, and 224 // destroy the session via standard session destroy processing 225 sessionTimers *SessionTimers 226 227 // statsFetcher is used by autopilot to check the status of the other 228 // Consul router. 229 statsFetcher *StatsFetcher 230 231 // reassertLeaderCh is used to signal the leader loop should re-run 232 // leadership actions after a snapshot restore. 233 reassertLeaderCh chan chan error 234 235 // tombstoneGC is used to track the pending GC invocations 236 // for the KV tombstones 237 tombstoneGC *state.TombstoneGC 238 239 // aclReplicationStatus (and its associated lock) provide information 240 // about the health of the ACL replication goroutine. 241 aclReplicationStatus structs.ACLReplicationStatus 242 aclReplicationStatusLock sync.RWMutex 243 244 // shutdown and the associated members here are used in orchestrating 245 // a clean shutdown. The shutdownCh is never written to, only closed to 246 // indicate a shutdown has been initiated. 247 shutdown bool 248 shutdownCh chan struct{} 249 shutdownLock sync.Mutex 250 251 // embedded struct to hold all the enterprise specific data 252 EnterpriseServer 253 } 254 255 func NewServer(config *Config) (*Server, error) { 256 return NewServerLogger(config, nil, new(token.Store), tlsutil.NewConfigurator(config.ToTLSUtilConfig())) 257 } 258 259 // NewServer is used to construct a new Consul server from the 260 // configuration, potentially returning an error 261 func NewServerLogger(config *Config, logger *log.Logger, tokens *token.Store, tlsConfigurator *tlsutil.Configurator) (*Server, error) { 262 // Check the protocol version. 263 if err := config.CheckProtocolVersion(); err != nil { 264 return nil, err 265 } 266 267 // Check for a data directory. 268 if config.DataDir == "" && !config.DevMode { 269 return nil, fmt.Errorf("Config must provide a DataDir") 270 } 271 272 // Sanity check the ACLs. 273 if err := config.CheckACL(); err != nil { 274 return nil, err 275 } 276 277 // Ensure we have a log output and create a logger. 278 if config.LogOutput == nil { 279 config.LogOutput = os.Stderr 280 } 281 if logger == nil { 282 logger = log.New(config.LogOutput, "", log.LstdFlags) 283 } 284 285 // Check if TLS is enabled 286 if config.CAFile != "" || config.CAPath != "" { 287 config.UseTLS = true 288 } 289 290 // Set the primary DC if it wasn't set. 291 if config.PrimaryDatacenter == "" { 292 if config.ACLDatacenter != "" { 293 config.PrimaryDatacenter = config.ACLDatacenter 294 } else { 295 config.PrimaryDatacenter = config.Datacenter 296 } 297 } 298 299 // Create the TLS wrapper for outgoing connections. 300 tlsWrap, err := tlsConfigurator.OutgoingRPCWrapper() 301 if err != nil { 302 return nil, err 303 } 304 305 // Get the incoming TLS config. 306 incomingTLS, err := tlsConfigurator.IncomingRPCConfig() 307 if err != nil { 308 return nil, err 309 } 310 311 // Create the tombstone GC. 312 gc, err := state.NewTombstoneGC(config.TombstoneTTL, config.TombstoneTTLGranularity) 313 if err != nil { 314 return nil, err 315 } 316 317 // Create the shutdown channel - this is closed but never written to. 318 shutdownCh := make(chan struct{}) 319 320 connPool := &pool.ConnPool{ 321 SrcAddr: config.RPCSrcAddr, 322 LogOutput: config.LogOutput, 323 MaxTime: serverRPCCache, 324 MaxStreams: serverMaxStreams, 325 TLSWrapper: tlsWrap, 326 ForceTLS: config.VerifyOutgoing, 327 } 328 329 // Create server. 330 s := &Server{ 331 config: config, 332 tokens: tokens, 333 connPool: connPool, 334 eventChLAN: make(chan serf.Event, serfEventChSize), 335 eventChWAN: make(chan serf.Event, serfEventChSize), 336 logger: logger, 337 leaveCh: make(chan struct{}), 338 reconcileCh: make(chan serf.Member, reconcileChSize), 339 router: router.NewRouter(logger, config.Datacenter), 340 rpcServer: rpc.NewServer(), 341 rpcTLS: incomingTLS, 342 reassertLeaderCh: make(chan chan error), 343 segmentLAN: make(map[string]*serf.Serf, len(config.Segments)), 344 sessionTimers: NewSessionTimers(), 345 tombstoneGC: gc, 346 serverLookup: NewServerLookup(), 347 shutdownCh: shutdownCh, 348 } 349 350 // Initialize enterprise specific server functionality 351 if err := s.initEnterprise(); err != nil { 352 s.Shutdown() 353 return nil, err 354 } 355 356 // Initialize the stats fetcher that autopilot will use. 357 s.statsFetcher = NewStatsFetcher(logger, s.connPool, s.config.Datacenter) 358 359 s.sentinel = sentinel.New(logger) 360 s.useNewACLs = 0 361 aclConfig := ACLResolverConfig{ 362 Config: config, 363 Delegate: s, 364 CacheConfig: serverACLCacheConfig, 365 AutoDisable: false, 366 Logger: logger, 367 Sentinel: s.sentinel, 368 } 369 // Initialize the ACL resolver. 370 if s.acls, err = NewACLResolver(&aclConfig); err != nil { 371 s.Shutdown() 372 return nil, fmt.Errorf("Failed to create ACL resolver: %v", err) 373 } 374 375 // Initialize the RPC layer. 376 if err := s.setupRPC(tlsWrap); err != nil { 377 s.Shutdown() 378 return nil, fmt.Errorf("Failed to start RPC layer: %v", err) 379 } 380 381 // Initialize any extra RPC listeners for segments. 382 segmentListeners, err := s.setupSegmentRPC() 383 if err != nil { 384 s.Shutdown() 385 return nil, fmt.Errorf("Failed to start segment RPC layer: %v", err) 386 } 387 388 // Initialize the Raft server. 389 if err := s.setupRaft(); err != nil { 390 s.Shutdown() 391 return nil, fmt.Errorf("Failed to start Raft: %v", err) 392 } 393 394 // Serf and dynamic bind ports 395 // 396 // The LAN serf cluster announces the port of the WAN serf cluster 397 // which creates a race when the WAN cluster is supposed to bind to 398 // a dynamic port (port 0). The current memberlist implementation will 399 // update the bind port in the configuration after the memberlist is 400 // created, so we can pull it out from there reliably, even though it's 401 // a little gross to be reading the updated config. 402 403 // Initialize the WAN Serf if enabled 404 serfBindPortWAN := -1 405 if config.SerfWANConfig != nil { 406 serfBindPortWAN = config.SerfWANConfig.MemberlistConfig.BindPort 407 s.serfWAN, err = s.setupSerf(config.SerfWANConfig, s.eventChWAN, serfWANSnapshot, true, serfBindPortWAN, "", s.Listener) 408 if err != nil { 409 s.Shutdown() 410 return nil, fmt.Errorf("Failed to start WAN Serf: %v", err) 411 } 412 // See big comment above why we are doing this. 413 if serfBindPortWAN == 0 { 414 serfBindPortWAN = config.SerfWANConfig.MemberlistConfig.BindPort 415 if serfBindPortWAN == 0 { 416 return nil, fmt.Errorf("Failed to get dynamic bind port for WAN Serf") 417 } 418 s.logger.Printf("[INFO] agent: Serf WAN TCP bound to port %d", serfBindPortWAN) 419 } 420 } 421 422 // Initialize the LAN segments before the default LAN Serf so we have 423 // updated port information to publish there. 424 if err := s.setupSegments(config, serfBindPortWAN, segmentListeners); err != nil { 425 s.Shutdown() 426 return nil, fmt.Errorf("Failed to setup network segments: %v", err) 427 } 428 429 // Initialize the LAN Serf for the default network segment. 430 s.serfLAN, err = s.setupSerf(config.SerfLANConfig, s.eventChLAN, serfLANSnapshot, false, serfBindPortWAN, "", s.Listener) 431 if err != nil { 432 s.Shutdown() 433 return nil, fmt.Errorf("Failed to start LAN Serf: %v", err) 434 } 435 go s.lanEventHandler() 436 437 // Start the flooders after the LAN event handler is wired up. 438 s.floodSegments(config) 439 440 // Add a "static route" to the WAN Serf and hook it up to Serf events. 441 if s.serfWAN != nil { 442 if err := s.router.AddArea(types.AreaWAN, s.serfWAN, s.connPool, s.config.VerifyOutgoing); err != nil { 443 s.Shutdown() 444 return nil, fmt.Errorf("Failed to add WAN serf route: %v", err) 445 } 446 go router.HandleSerfEvents(s.logger, s.router, types.AreaWAN, s.serfWAN.ShutdownCh(), s.eventChWAN) 447 448 // Fire up the LAN <-> WAN join flooder. 449 portFn := func(s *metadata.Server) (int, bool) { 450 if s.WanJoinPort > 0 { 451 return s.WanJoinPort, true 452 } 453 return 0, false 454 } 455 go s.Flood(nil, portFn, s.serfWAN) 456 } 457 458 // Start enterprise specific functionality 459 if err := s.startEnterprise(); err != nil { 460 s.Shutdown() 461 return nil, err 462 } 463 464 // Initialize Autopilot. This must happen before starting leadership monitoring 465 // as establishing leadership could attempt to use autopilot and cause a panic. 466 s.initAutopilot(config) 467 468 // Start monitoring leadership. This must happen after Serf is set up 469 // since it can fire events when leadership is obtained. 470 go s.monitorLeadership() 471 472 // Start listening for RPC requests. 473 go s.listen(s.Listener) 474 475 // Start listeners for any segments with separate RPC listeners. 476 for _, listener := range segmentListeners { 477 go s.listen(listener) 478 } 479 480 // Start the metrics handlers. 481 go s.sessionStats() 482 483 return s, nil 484 } 485 486 // setupRaft is used to setup and initialize Raft 487 func (s *Server) setupRaft() error { 488 // If we have an unclean exit then attempt to close the Raft store. 489 defer func() { 490 if s.raft == nil && s.raftStore != nil { 491 if err := s.raftStore.Close(); err != nil { 492 s.logger.Printf("[ERR] consul: failed to close Raft store: %v", err) 493 } 494 } 495 }() 496 497 // Create the FSM. 498 var err error 499 s.fsm, err = fsm.New(s.tombstoneGC, s.config.LogOutput) 500 if err != nil { 501 return err 502 } 503 504 var serverAddressProvider raft.ServerAddressProvider = nil 505 if s.config.RaftConfig.ProtocolVersion >= 3 { //ServerAddressProvider needs server ids to work correctly, which is only supported in protocol version 3 or higher 506 serverAddressProvider = s.serverLookup 507 } 508 509 // Create a transport layer. 510 transConfig := &raft.NetworkTransportConfig{ 511 Stream: s.raftLayer, 512 MaxPool: 3, 513 Timeout: 10 * time.Second, 514 ServerAddressProvider: serverAddressProvider, 515 Logger: s.logger, 516 } 517 518 trans := raft.NewNetworkTransportWithConfig(transConfig) 519 s.raftTransport = trans 520 521 // Make sure we set the LogOutput. 522 s.config.RaftConfig.LogOutput = s.config.LogOutput 523 s.config.RaftConfig.Logger = s.logger 524 525 // Versions of the Raft protocol below 3 require the LocalID to match the network 526 // address of the transport. 527 s.config.RaftConfig.LocalID = raft.ServerID(trans.LocalAddr()) 528 if s.config.RaftConfig.ProtocolVersion >= 3 { 529 s.config.RaftConfig.LocalID = raft.ServerID(s.config.NodeID) 530 } 531 532 // Build an all in-memory setup for dev mode, otherwise prepare a full 533 // disk-based setup. 534 var log raft.LogStore 535 var stable raft.StableStore 536 var snap raft.SnapshotStore 537 if s.config.DevMode { 538 store := raft.NewInmemStore() 539 s.raftInmem = store 540 stable = store 541 log = store 542 snap = raft.NewInmemSnapshotStore() 543 } else { 544 // Create the base raft path. 545 path := filepath.Join(s.config.DataDir, raftState) 546 if err := lib.EnsurePath(path, true); err != nil { 547 return err 548 } 549 550 // Create the backend raft store for logs and stable storage. 551 store, err := raftboltdb.NewBoltStore(filepath.Join(path, "raft.db")) 552 if err != nil { 553 return err 554 } 555 s.raftStore = store 556 stable = store 557 558 // Wrap the store in a LogCache to improve performance. 559 cacheStore, err := raft.NewLogCache(raftLogCacheSize, store) 560 if err != nil { 561 return err 562 } 563 log = cacheStore 564 565 // Create the snapshot store. 566 snapshots, err := raft.NewFileSnapshotStore(path, snapshotsRetained, s.config.LogOutput) 567 if err != nil { 568 return err 569 } 570 snap = snapshots 571 572 // For an existing cluster being upgraded to the new version of 573 // Raft, we almost never want to run recovery based on the old 574 // peers.json file. We create a peers.info file with a helpful 575 // note about where peers.json went, and use that as a sentinel 576 // to avoid ingesting the old one that first time (if we have to 577 // create the peers.info file because it's not there, we also 578 // blow away any existing peers.json file). 579 peersFile := filepath.Join(path, "peers.json") 580 peersInfoFile := filepath.Join(path, "peers.info") 581 if _, err := os.Stat(peersInfoFile); os.IsNotExist(err) { 582 if err := ioutil.WriteFile(peersInfoFile, []byte(peersInfoContent), 0755); err != nil { 583 return fmt.Errorf("failed to write peers.info file: %v", err) 584 } 585 586 // Blow away the peers.json file if present, since the 587 // peers.info sentinel wasn't there. 588 if _, err := os.Stat(peersFile); err == nil { 589 if err := os.Remove(peersFile); err != nil { 590 return fmt.Errorf("failed to delete peers.json, please delete manually (see peers.info for details): %v", err) 591 } 592 s.logger.Printf("[INFO] consul: deleted peers.json file (see peers.info for details)") 593 } 594 } else if _, err := os.Stat(peersFile); err == nil { 595 s.logger.Printf("[INFO] consul: found peers.json file, recovering Raft configuration...") 596 597 var configuration raft.Configuration 598 if s.config.RaftConfig.ProtocolVersion < 3 { 599 configuration, err = raft.ReadPeersJSON(peersFile) 600 } else { 601 configuration, err = raft.ReadConfigJSON(peersFile) 602 } 603 if err != nil { 604 return fmt.Errorf("recovery failed to parse peers.json: %v", err) 605 } 606 607 tmpFsm, err := fsm.New(s.tombstoneGC, s.config.LogOutput) 608 if err != nil { 609 return fmt.Errorf("recovery failed to make temp FSM: %v", err) 610 } 611 if err := raft.RecoverCluster(s.config.RaftConfig, tmpFsm, 612 log, stable, snap, trans, configuration); err != nil { 613 return fmt.Errorf("recovery failed: %v", err) 614 } 615 616 if err := os.Remove(peersFile); err != nil { 617 return fmt.Errorf("recovery failed to delete peers.json, please delete manually (see peers.info for details): %v", err) 618 } 619 s.logger.Printf("[INFO] consul: deleted peers.json file after successful recovery") 620 } 621 } 622 623 // If we are in bootstrap or dev mode and the state is clean then we can 624 // bootstrap now. 625 if s.config.Bootstrap || s.config.DevMode { 626 hasState, err := raft.HasExistingState(log, stable, snap) 627 if err != nil { 628 return err 629 } 630 if !hasState { 631 configuration := raft.Configuration{ 632 Servers: []raft.Server{ 633 raft.Server{ 634 ID: s.config.RaftConfig.LocalID, 635 Address: trans.LocalAddr(), 636 }, 637 }, 638 } 639 if err := raft.BootstrapCluster(s.config.RaftConfig, 640 log, stable, snap, trans, configuration); err != nil { 641 return err 642 } 643 } 644 } 645 646 // Set up a channel for reliable leader notifications. 647 raftNotifyCh := make(chan bool, 1) 648 s.config.RaftConfig.NotifyCh = raftNotifyCh 649 s.raftNotifyCh = raftNotifyCh 650 651 // Setup the Raft store. 652 s.raft, err = raft.NewRaft(s.config.RaftConfig, s.fsm, log, stable, snap, trans) 653 if err != nil { 654 return err 655 } 656 return nil 657 } 658 659 // endpointFactory is a function that returns an RPC endpoint bound to the given 660 // server. 661 type factory func(s *Server) interface{} 662 663 // endpoints is a list of registered RPC endpoint factories. 664 var endpoints []factory 665 666 // registerEndpoint registers a new RPC endpoint factory. 667 func registerEndpoint(fn factory) { 668 endpoints = append(endpoints, fn) 669 } 670 671 // setupRPC is used to setup the RPC listener 672 func (s *Server) setupRPC(tlsWrap tlsutil.DCWrapper) error { 673 for _, fn := range endpoints { 674 s.rpcServer.Register(fn(s)) 675 } 676 677 ln, err := net.ListenTCP("tcp", s.config.RPCAddr) 678 if err != nil { 679 return err 680 } 681 s.Listener = ln 682 if s.config.NotifyListen != nil { 683 s.config.NotifyListen() 684 } 685 // todo(fs): we should probably guard this 686 if s.config.RPCAdvertise == nil { 687 s.config.RPCAdvertise = ln.Addr().(*net.TCPAddr) 688 } 689 690 // Verify that we have a usable advertise address 691 if s.config.RPCAdvertise.IP.IsUnspecified() { 692 ln.Close() 693 return fmt.Errorf("RPC advertise address is not advertisable: %v", s.config.RPCAdvertise) 694 } 695 696 // Provide a DC specific wrapper. Raft replication is only 697 // ever done in the same datacenter, so we can provide it as a constant. 698 wrapper := tlsutil.SpecificDC(s.config.Datacenter, tlsWrap) 699 700 // Define a callback for determining whether to wrap a connection with TLS 701 tlsFunc := func(address raft.ServerAddress) bool { 702 if s.config.VerifyOutgoing { 703 return true 704 } 705 706 server := s.serverLookup.Server(address) 707 708 if server == nil { 709 return false 710 } 711 712 return server.UseTLS 713 } 714 s.raftLayer = NewRaftLayer(s.config.RPCSrcAddr, s.config.RPCAdvertise, wrapper, tlsFunc) 715 return nil 716 } 717 718 // Shutdown is used to shutdown the server 719 func (s *Server) Shutdown() error { 720 s.logger.Printf("[INFO] consul: shutting down server") 721 s.shutdownLock.Lock() 722 defer s.shutdownLock.Unlock() 723 724 if s.shutdown { 725 return nil 726 } 727 728 s.shutdown = true 729 close(s.shutdownCh) 730 731 if s.serfLAN != nil { 732 s.serfLAN.Shutdown() 733 } 734 735 if s.serfWAN != nil { 736 s.serfWAN.Shutdown() 737 if err := s.router.RemoveArea(types.AreaWAN); err != nil { 738 s.logger.Printf("[WARN] consul: error removing WAN area: %v", err) 739 } 740 } 741 s.router.Shutdown() 742 743 if s.raft != nil { 744 s.raftTransport.Close() 745 s.raftLayer.Close() 746 future := s.raft.Shutdown() 747 if err := future.Error(); err != nil { 748 s.logger.Printf("[WARN] consul: error shutting down raft: %s", err) 749 } 750 if s.raftStore != nil { 751 s.raftStore.Close() 752 } 753 } 754 755 if s.Listener != nil { 756 s.Listener.Close() 757 } 758 759 // Close the connection pool 760 s.connPool.Shutdown() 761 762 return nil 763 } 764 765 // Leave is used to prepare for a graceful shutdown of the server 766 func (s *Server) Leave() error { 767 s.logger.Printf("[INFO] consul: server starting leave") 768 769 // Check the number of known peers 770 numPeers, err := s.numPeers() 771 if err != nil { 772 s.logger.Printf("[ERR] consul: failed to check raft peers: %v", err) 773 return err 774 } 775 776 addr := s.raftTransport.LocalAddr() 777 778 // If we are the current leader, and we have any other peers (cluster has multiple 779 // servers), we should do a RemoveServer/RemovePeer to safely reduce the quorum size. 780 // If we are not the leader, then we should issue our leave intention and wait to be 781 // removed for some sane period of time. 782 isLeader := s.IsLeader() 783 if isLeader && numPeers > 1 { 784 minRaftProtocol, err := s.autopilot.MinRaftProtocol() 785 if err != nil { 786 return err 787 } 788 789 if minRaftProtocol >= 2 && s.config.RaftConfig.ProtocolVersion >= 3 { 790 future := s.raft.RemoveServer(raft.ServerID(s.config.NodeID), 0, 0) 791 if err := future.Error(); err != nil { 792 s.logger.Printf("[ERR] consul: failed to remove ourself as raft peer: %v", err) 793 } 794 } else { 795 future := s.raft.RemovePeer(addr) 796 if err := future.Error(); err != nil { 797 s.logger.Printf("[ERR] consul: failed to remove ourself as raft peer: %v", err) 798 } 799 } 800 } 801 802 // Leave the WAN pool 803 if s.serfWAN != nil { 804 if err := s.serfWAN.Leave(); err != nil { 805 s.logger.Printf("[ERR] consul: failed to leave WAN Serf cluster: %v", err) 806 } 807 } 808 809 // Leave the LAN pool 810 if s.serfLAN != nil { 811 if err := s.serfLAN.Leave(); err != nil { 812 s.logger.Printf("[ERR] consul: failed to leave LAN Serf cluster: %v", err) 813 } 814 } 815 816 // Start refusing RPCs now that we've left the LAN pool. It's important 817 // to do this *after* we've left the LAN pool so that clients will know 818 // to shift onto another server if they perform a retry. We also wake up 819 // all queries in the RPC retry state. 820 s.logger.Printf("[INFO] consul: Waiting %s to drain RPC traffic", s.config.LeaveDrainTime) 821 close(s.leaveCh) 822 time.Sleep(s.config.LeaveDrainTime) 823 824 // If we were not leader, wait to be safely removed from the cluster. We 825 // must wait to allow the raft replication to take place, otherwise an 826 // immediate shutdown could cause a loss of quorum. 827 if !isLeader { 828 left := false 829 limit := time.Now().Add(raftRemoveGracePeriod) 830 for !left && time.Now().Before(limit) { 831 // Sleep a while before we check. 832 time.Sleep(50 * time.Millisecond) 833 834 // Get the latest configuration. 835 future := s.raft.GetConfiguration() 836 if err := future.Error(); err != nil { 837 s.logger.Printf("[ERR] consul: failed to get raft configuration: %v", err) 838 break 839 } 840 841 // See if we are no longer included. 842 left = true 843 for _, server := range future.Configuration().Servers { 844 if server.Address == addr { 845 left = false 846 break 847 } 848 } 849 } 850 851 // TODO (slackpad) With the old Raft library we used to force the 852 // peers set to empty when a graceful leave occurred. This would 853 // keep voting spam down if the server was restarted, but it was 854 // dangerous because the peers was inconsistent with the logs and 855 // snapshots, so it wasn't really safe in all cases for the server 856 // to become leader. This is now safe, but the log spam is noisy. 857 // The next new version of the library will have a "you are not a 858 // peer stop it" behavior that should address this. We will have 859 // to evaluate during the RC period if this interim situation is 860 // not too confusing for operators. 861 862 // TODO (slackpad) When we take a later new version of the Raft 863 // library it won't try to complete replication, so this peer 864 // may not realize that it has been removed. Need to revisit this 865 // and the warning here. 866 if !left { 867 s.logger.Printf("[WARN] consul: failed to leave raft configuration gracefully, timeout") 868 } 869 } 870 871 return nil 872 } 873 874 // numPeers is used to check on the number of known peers, including potentially 875 // the local node. We count only voters, since others can't actually become 876 // leader, so aren't considered peers. 877 func (s *Server) numPeers() (int, error) { 878 future := s.raft.GetConfiguration() 879 if err := future.Error(); err != nil { 880 return 0, err 881 } 882 883 return autopilot.NumPeers(future.Configuration()), nil 884 } 885 886 // JoinLAN is used to have Consul join the inner-DC pool 887 // The target address should be another node inside the DC 888 // listening on the Serf LAN address 889 func (s *Server) JoinLAN(addrs []string) (int, error) { 890 return s.serfLAN.Join(addrs, true) 891 } 892 893 // JoinWAN is used to have Consul join the cross-WAN Consul ring 894 // The target address should be another node listening on the 895 // Serf WAN address 896 func (s *Server) JoinWAN(addrs []string) (int, error) { 897 if s.serfWAN == nil { 898 return 0, ErrWANFederationDisabled 899 } 900 return s.serfWAN.Join(addrs, true) 901 } 902 903 // LocalMember is used to return the local node 904 func (s *Server) LocalMember() serf.Member { 905 return s.serfLAN.LocalMember() 906 } 907 908 // LANMembers is used to return the members of the LAN cluster 909 func (s *Server) LANMembers() []serf.Member { 910 return s.serfLAN.Members() 911 } 912 913 // WANMembers is used to return the members of the LAN cluster 914 func (s *Server) WANMembers() []serf.Member { 915 if s.serfWAN == nil { 916 return nil 917 } 918 return s.serfWAN.Members() 919 } 920 921 // RemoveFailedNode is used to remove a failed node from the cluster 922 func (s *Server) RemoveFailedNode(node string) error { 923 if err := s.serfLAN.RemoveFailedNode(node); err != nil { 924 return err 925 } 926 if s.serfWAN != nil { 927 if err := s.serfWAN.RemoveFailedNode(node); err != nil { 928 return err 929 } 930 } 931 return nil 932 } 933 934 // IsLeader checks if this server is the cluster leader 935 func (s *Server) IsLeader() bool { 936 return s.raft.State() == raft.Leader 937 } 938 939 // KeyManagerLAN returns the LAN Serf keyring manager 940 func (s *Server) KeyManagerLAN() *serf.KeyManager { 941 return s.serfLAN.KeyManager() 942 } 943 944 // KeyManagerWAN returns the WAN Serf keyring manager 945 func (s *Server) KeyManagerWAN() *serf.KeyManager { 946 return s.serfWAN.KeyManager() 947 } 948 949 // Encrypted determines if gossip is encrypted 950 func (s *Server) Encrypted() bool { 951 LANEncrypted := s.serfLAN.EncryptionEnabled() 952 if s.serfWAN == nil { 953 return LANEncrypted 954 } 955 return LANEncrypted && s.serfWAN.EncryptionEnabled() 956 } 957 958 // LANSegments returns a map of LAN segments by name 959 func (s *Server) LANSegments() map[string]*serf.Serf { 960 segments := make(map[string]*serf.Serf, len(s.segmentLAN)+1) 961 segments[""] = s.serfLAN 962 for name, segment := range s.segmentLAN { 963 segments[name] = segment 964 } 965 966 return segments 967 } 968 969 // inmemCodec is used to do an RPC call without going over a network 970 type inmemCodec struct { 971 method string 972 args interface{} 973 reply interface{} 974 err error 975 } 976 977 func (i *inmemCodec) ReadRequestHeader(req *rpc.Request) error { 978 req.ServiceMethod = i.method 979 return nil 980 } 981 982 func (i *inmemCodec) ReadRequestBody(args interface{}) error { 983 sourceValue := reflect.Indirect(reflect.Indirect(reflect.ValueOf(i.args))) 984 dst := reflect.Indirect(reflect.Indirect(reflect.ValueOf(args))) 985 dst.Set(sourceValue) 986 return nil 987 } 988 989 func (i *inmemCodec) WriteResponse(resp *rpc.Response, reply interface{}) error { 990 if resp.Error != "" { 991 i.err = errors.New(resp.Error) 992 return nil 993 } 994 sourceValue := reflect.Indirect(reflect.Indirect(reflect.ValueOf(reply))) 995 dst := reflect.Indirect(reflect.Indirect(reflect.ValueOf(i.reply))) 996 dst.Set(sourceValue) 997 return nil 998 } 999 1000 func (i *inmemCodec) Close() error { 1001 return nil 1002 } 1003 1004 // RPC is used to make a local RPC call 1005 func (s *Server) RPC(method string, args interface{}, reply interface{}) error { 1006 codec := &inmemCodec{ 1007 method: method, 1008 args: args, 1009 reply: reply, 1010 } 1011 if err := s.rpcServer.ServeRequest(codec); err != nil { 1012 return err 1013 } 1014 return codec.err 1015 } 1016 1017 // SnapshotRPC dispatches the given snapshot request, reading from the streaming 1018 // input and writing to the streaming output depending on the operation. 1019 func (s *Server) SnapshotRPC(args *structs.SnapshotRequest, in io.Reader, out io.Writer, 1020 replyFn structs.SnapshotReplyFn) error { 1021 1022 // Perform the operation. 1023 var reply structs.SnapshotResponse 1024 snap, err := s.dispatchSnapshotRequest(args, in, &reply) 1025 if err != nil { 1026 return err 1027 } 1028 defer func() { 1029 if err := snap.Close(); err != nil { 1030 s.logger.Printf("[ERR] consul: Failed to close snapshot: %v", err) 1031 } 1032 }() 1033 1034 // Let the caller peek at the reply. 1035 if replyFn != nil { 1036 if err := replyFn(&reply); err != nil { 1037 return nil 1038 } 1039 } 1040 1041 // Stream the snapshot. 1042 if out != nil { 1043 if _, err := io.Copy(out, snap); err != nil { 1044 return fmt.Errorf("failed to stream snapshot: %v", err) 1045 } 1046 } 1047 return nil 1048 } 1049 1050 // RegisterEndpoint is used to substitute an endpoint for testing. 1051 func (s *Server) RegisterEndpoint(name string, handler interface{}) error { 1052 s.logger.Printf("[WARN] consul: endpoint injected; this should only be used for testing") 1053 return s.rpcServer.RegisterName(name, handler) 1054 } 1055 1056 // Stats is used to return statistics for debugging and insight 1057 // for various sub-systems 1058 func (s *Server) Stats() map[string]map[string]string { 1059 toString := func(v uint64) string { 1060 return strconv.FormatUint(v, 10) 1061 } 1062 numKnownDCs := len(s.router.GetDatacenters()) 1063 stats := map[string]map[string]string{ 1064 "consul": map[string]string{ 1065 "server": "true", 1066 "leader": fmt.Sprintf("%v", s.IsLeader()), 1067 "leader_addr": string(s.raft.Leader()), 1068 "bootstrap": fmt.Sprintf("%v", s.config.Bootstrap), 1069 "known_datacenters": toString(uint64(numKnownDCs)), 1070 }, 1071 "raft": s.raft.Stats(), 1072 "serf_lan": s.serfLAN.Stats(), 1073 "runtime": runtimeStats(), 1074 } 1075 1076 if s.ACLsEnabled() { 1077 if s.UseLegacyACLs() { 1078 stats["consul"]["acl"] = "legacy" 1079 } else { 1080 stats["consul"]["acl"] = "enabled" 1081 } 1082 } else { 1083 stats["consul"]["acl"] = "disabled" 1084 } 1085 1086 if s.serfWAN != nil { 1087 stats["serf_wan"] = s.serfWAN.Stats() 1088 } 1089 1090 for outerKey, outerValue := range s.enterpriseStats() { 1091 if _, ok := stats[outerKey]; ok { 1092 for innerKey, innerValue := range outerValue { 1093 stats[outerKey][innerKey] = innerValue 1094 } 1095 } else { 1096 stats[outerKey] = outerValue 1097 } 1098 } 1099 1100 return stats 1101 } 1102 1103 // GetLANCoordinate returns the coordinate of the server in the LAN gossip pool. 1104 func (s *Server) GetLANCoordinate() (lib.CoordinateSet, error) { 1105 lan, err := s.serfLAN.GetCoordinate() 1106 if err != nil { 1107 return nil, err 1108 } 1109 1110 cs := lib.CoordinateSet{"": lan} 1111 for name, segment := range s.segmentLAN { 1112 c, err := segment.GetCoordinate() 1113 if err != nil { 1114 return nil, err 1115 } 1116 cs[name] = c 1117 } 1118 return cs, nil 1119 } 1120 1121 // ReloadConfig is used to have the Server do an online reload of 1122 // relevant configuration information 1123 func (s *Server) ReloadConfig(config *Config) error { 1124 return nil 1125 } 1126 1127 // Atomically sets a readiness state flag when leadership is obtained, to indicate that server is past its barrier write 1128 func (s *Server) setConsistentReadReady() { 1129 atomic.StoreInt32(&s.readyForConsistentReads, 1) 1130 } 1131 1132 // Atomically reset readiness state flag on leadership revoke 1133 func (s *Server) resetConsistentReadReady() { 1134 atomic.StoreInt32(&s.readyForConsistentReads, 0) 1135 } 1136 1137 // Returns true if this server is ready to serve consistent reads 1138 func (s *Server) isReadyForConsistentReads() bool { 1139 return atomic.LoadInt32(&s.readyForConsistentReads) == 1 1140 } 1141 1142 // peersInfoContent is used to help operators understand what happened to the 1143 // peers.json file. This is written to a file called peers.info in the same 1144 // location. 1145 const peersInfoContent = ` 1146 As of Consul 0.7.0, the peers.json file is only used for recovery 1147 after an outage. The format of this file depends on what the server has 1148 configured for its Raft protocol version. Please see the agent configuration 1149 page at https://www.consul.io/docs/agent/options.html#_raft_protocol for more 1150 details about this parameter. 1151 1152 For Raft protocol version 2 and earlier, this should be formatted as a JSON 1153 array containing the address and port of each Consul server in the cluster, like 1154 this: 1155 1156 [ 1157 "10.1.0.1:8300", 1158 "10.1.0.2:8300", 1159 "10.1.0.3:8300" 1160 ] 1161 1162 For Raft protocol version 3 and later, this should be formatted as a JSON 1163 array containing the node ID, address:port, and suffrage information of each 1164 Consul server in the cluster, like this: 1165 1166 [ 1167 { 1168 "id": "adf4238a-882b-9ddc-4a9d-5b6758e4159e", 1169 "address": "10.1.0.1:8300", 1170 "non_voter": false 1171 }, 1172 { 1173 "id": "8b6dda82-3103-11e7-93ae-92361f002671", 1174 "address": "10.1.0.2:8300", 1175 "non_voter": false 1176 }, 1177 { 1178 "id": "97e17742-3103-11e7-93ae-92361f002671", 1179 "address": "10.1.0.3:8300", 1180 "non_voter": false 1181 } 1182 ] 1183 1184 The "id" field is the node ID of the server. This can be found in the logs when 1185 the server starts up, or in the "node-id" file inside the server's data 1186 directory. 1187 1188 The "address" field is the address and port of the server. 1189 1190 The "non_voter" field controls whether the server is a non-voter, which is used 1191 in some advanced Autopilot configurations, please see 1192 https://www.consul.io/docs/guides/autopilot.html for more information. If 1193 "non_voter" is omitted it will default to false, which is typical for most 1194 clusters. 1195 1196 Under normal operation, the peers.json file will not be present. 1197 1198 When Consul starts for the first time, it will create this peers.info file and 1199 delete any existing peers.json file so that recovery doesn't occur on the first 1200 startup. 1201 1202 Once this peers.info file is present, any peers.json file will be ingested at 1203 startup, and will set the Raft peer configuration manually to recover from an 1204 outage. It's crucial that all servers in the cluster are shut down before 1205 creating the peers.json file, and that all servers receive the same 1206 configuration. Once the peers.json file is successfully ingested and applied, it 1207 will be deleted. 1208 1209 Please see https://www.consul.io/docs/guides/outage.html for more information. 1210 `