github.com/maier/nomad@v0.4.1-0.20161110003312-a9e3d0b8549d/nomad/server.go (about) 1 package nomad 2 3 import ( 4 "crypto/tls" 5 "errors" 6 "fmt" 7 "log" 8 "net" 9 "net/rpc" 10 "path/filepath" 11 "reflect" 12 "sort" 13 "strconv" 14 "strings" 15 "sync" 16 "sync/atomic" 17 "time" 18 19 consulapi "github.com/hashicorp/consul/api" 20 "github.com/hashicorp/consul/lib" 21 "github.com/hashicorp/go-multierror" 22 "github.com/hashicorp/nomad/command/agent/consul" 23 "github.com/hashicorp/nomad/helper/tlsutil" 24 "github.com/hashicorp/nomad/nomad/state" 25 "github.com/hashicorp/nomad/nomad/structs" 26 "github.com/hashicorp/raft" 27 "github.com/hashicorp/raft-boltdb" 28 "github.com/hashicorp/serf/serf" 29 ) 30 31 const ( 32 // datacenterQueryLimit sets the max number of DCs that a Nomad 33 // Server will query to find bootstrap_expect servers. 34 datacenterQueryLimit = 25 35 36 // maxStaleLeadership is the maximum time we will permit this Nomad 37 // Server to go without seeing a valid Raft leader. 38 maxStaleLeadership = 15 * time.Second 39 40 // peersPollInterval is used as the polling interval between attempts 41 // to query Consul for Nomad Servers. 42 peersPollInterval = 45 * time.Second 43 44 // peersPollJitter is used to provide a slight amount of variance to 45 // the retry interval when querying Consul Servers 46 peersPollJitterFactor = 2 47 48 raftState = "raft/" 49 serfSnapshot = "serf/snapshot" 50 snapshotsRetained = 2 51 52 // serverRPCCache controls how long we keep an idle connection open to a server 53 serverRPCCache = 2 * time.Minute 54 55 // serverMaxStreams controsl how many idle streams we keep open to a server 56 serverMaxStreams = 64 57 58 // raftLogCacheSize is the maximum number of logs to cache in-memory. 59 // This is used to reduce disk I/O for the recently committed entries. 60 raftLogCacheSize = 512 61 62 // raftRemoveGracePeriod is how long we wait to allow a RemovePeer 63 // to replicate to gracefully leave the cluster. 64 raftRemoveGracePeriod = 5 * time.Second 65 ) 66 67 // Server is Nomad server which manages the job queues, 68 // schedulers, and notification bus for agents. 69 type Server struct { 70 config *Config 71 logger *log.Logger 72 73 // Connection pool to other Nomad servers 74 connPool *ConnPool 75 76 // Endpoints holds our RPC endpoints 77 endpoints endpoints 78 79 // The raft instance is used among Nomad nodes within the 80 // region to protect operations that require strong consistency 81 leaderCh <-chan bool 82 raft *raft.Raft 83 raftLayer *RaftLayer 84 raftPeers raft.PeerStore 85 raftStore *raftboltdb.BoltStore 86 raftInmem *raft.InmemStore 87 raftTransport *raft.NetworkTransport 88 89 // fsm is the state machine used with Raft 90 fsm *nomadFSM 91 92 // rpcListener is used to listen for incoming connections 93 rpcListener net.Listener 94 rpcServer *rpc.Server 95 rpcAdvertise net.Addr 96 97 // rpcTLS is the TLS config for incoming TLS requests 98 rpcTLS *tls.Config 99 100 // peers is used to track the known Nomad servers. This is 101 // used for region forwarding and clustering. 102 peers map[string][]*serverParts 103 localPeers map[string]*serverParts 104 peerLock sync.RWMutex 105 106 // serf is the Serf cluster containing only Nomad 107 // servers. This is used for multi-region federation 108 // and automatic clustering within regions. 109 serf *serf.Serf 110 111 // reconcileCh is used to pass events from the serf handler 112 // into the leader manager. Mostly used to handle when servers 113 // join/leave from the region. 114 reconcileCh chan serf.Member 115 116 // eventCh is used to receive events from the serf cluster 117 eventCh chan serf.Event 118 119 // evalBroker is used to manage the in-progress evaluations 120 // that are waiting to be brokered to a sub-scheduler 121 evalBroker *EvalBroker 122 123 // BlockedEvals is used to manage evaluations that are blocked on node 124 // capacity changes. 125 blockedEvals *BlockedEvals 126 127 // planQueue is used to manage the submitted allocation 128 // plans that are waiting to be assessed by the leader 129 planQueue *PlanQueue 130 131 // periodicDispatcher is used to track and create evaluations for periodic jobs. 132 periodicDispatcher *PeriodicDispatch 133 134 // heartbeatTimers track the expiration time of each heartbeat that has 135 // a TTL. On expiration, the node status is updated to be 'down'. 136 heartbeatTimers map[string]*time.Timer 137 heartbeatTimersLock sync.Mutex 138 139 // consulSyncer advertises this Nomad Agent with Consul 140 consulSyncer *consul.Syncer 141 142 // vault is the client for communicating with Vault. 143 vault VaultClient 144 145 // Worker used for processing 146 workers []*Worker 147 148 left bool 149 shutdown bool 150 shutdownCh chan struct{} 151 shutdownLock sync.Mutex 152 } 153 154 // Holds the RPC endpoints 155 type endpoints struct { 156 Status *Status 157 Node *Node 158 Job *Job 159 Eval *Eval 160 Plan *Plan 161 Alloc *Alloc 162 Region *Region 163 Periodic *Periodic 164 System *System 165 } 166 167 // NewServer is used to construct a new Nomad server from the 168 // configuration, potentially returning an error 169 func NewServer(config *Config, consulSyncer *consul.Syncer, logger *log.Logger) (*Server, error) { 170 // Check the protocol version 171 if err := config.CheckVersion(); err != nil { 172 return nil, err 173 } 174 175 // Create an eval broker 176 evalBroker, err := NewEvalBroker(config.EvalNackTimeout, config.EvalDeliveryLimit) 177 if err != nil { 178 return nil, err 179 } 180 181 // Create a new blocked eval tracker. 182 blockedEvals := NewBlockedEvals(evalBroker) 183 184 // Create a plan queue 185 planQueue, err := NewPlanQueue() 186 if err != nil { 187 return nil, err 188 } 189 190 // Configure TLS 191 var tlsWrap tlsutil.RegionWrapper 192 var incomingTLS *tls.Config 193 if config.TLSConfig.EnableRPC { 194 tlsConf := config.tlsConfig() 195 tw, err := tlsConf.OutgoingTLSWrapper() 196 if err != nil { 197 return nil, err 198 } 199 tlsWrap = tw 200 201 itls, err := tlsConf.IncomingTLSConfig() 202 if err != nil { 203 return nil, err 204 } 205 incomingTLS = itls 206 } 207 208 // Create the server 209 s := &Server{ 210 config: config, 211 consulSyncer: consulSyncer, 212 connPool: NewPool(config.LogOutput, serverRPCCache, serverMaxStreams, tlsWrap), 213 logger: logger, 214 rpcServer: rpc.NewServer(), 215 peers: make(map[string][]*serverParts), 216 localPeers: make(map[string]*serverParts), 217 reconcileCh: make(chan serf.Member, 32), 218 eventCh: make(chan serf.Event, 256), 219 evalBroker: evalBroker, 220 blockedEvals: blockedEvals, 221 planQueue: planQueue, 222 rpcTLS: incomingTLS, 223 shutdownCh: make(chan struct{}), 224 } 225 226 // Create the periodic dispatcher for launching periodic jobs. 227 s.periodicDispatcher = NewPeriodicDispatch(s.logger, s) 228 229 // Setup Vault 230 if err := s.setupVaultClient(); err != nil { 231 s.Shutdown() 232 s.logger.Printf("[ERR] nomad: failed to setup Vault client: %v", err) 233 return nil, fmt.Errorf("Failed to setup Vault client: %v", err) 234 } 235 236 // Initialize the RPC layer 237 if err := s.setupRPC(tlsWrap); err != nil { 238 s.Shutdown() 239 s.logger.Printf("[ERR] nomad: failed to start RPC layer: %s", err) 240 return nil, fmt.Errorf("Failed to start RPC layer: %v", err) 241 } 242 243 // Initialize the Raft server 244 if err := s.setupRaft(); err != nil { 245 s.Shutdown() 246 s.logger.Printf("[ERR] nomad: failed to start Raft: %s", err) 247 return nil, fmt.Errorf("Failed to start Raft: %v", err) 248 } 249 250 // Initialize the wan Serf 251 s.serf, err = s.setupSerf(config.SerfConfig, s.eventCh, serfSnapshot) 252 if err != nil { 253 s.Shutdown() 254 s.logger.Printf("[ERR] nomad: failed to start serf WAN: %s", err) 255 return nil, fmt.Errorf("Failed to start serf: %v", err) 256 } 257 258 // Initialize the scheduling workers 259 if err := s.setupWorkers(); err != nil { 260 s.Shutdown() 261 s.logger.Printf("[ERR] nomad: failed to start workers: %s", err) 262 return nil, fmt.Errorf("Failed to start workers: %v", err) 263 } 264 265 // Setup the Consul syncer 266 if err := s.setupConsulSyncer(); err != nil { 267 return nil, fmt.Errorf("failed to create server Consul syncer: %v") 268 } 269 270 // Monitor leadership changes 271 go s.monitorLeadership() 272 273 // Start ingesting events for Serf 274 go s.serfEventHandler() 275 276 // Start the RPC listeners 277 go s.listen() 278 279 // Emit metrics for the eval broker 280 go evalBroker.EmitStats(time.Second, s.shutdownCh) 281 282 // Emit metrics for the plan queue 283 go planQueue.EmitStats(time.Second, s.shutdownCh) 284 285 // Emit metrics for the blocked eval tracker. 286 go blockedEvals.EmitStats(time.Second, s.shutdownCh) 287 288 // Emit metrics 289 go s.heartbeatStats() 290 291 // Done 292 return s, nil 293 } 294 295 // Shutdown is used to shutdown the server 296 func (s *Server) Shutdown() error { 297 s.logger.Printf("[INFO] nomad: shutting down server") 298 s.shutdownLock.Lock() 299 defer s.shutdownLock.Unlock() 300 301 if s.shutdown { 302 return nil 303 } 304 305 s.shutdown = true 306 close(s.shutdownCh) 307 308 if s.serf != nil { 309 s.serf.Shutdown() 310 } 311 312 if s.raft != nil { 313 s.raftTransport.Close() 314 s.raftLayer.Close() 315 future := s.raft.Shutdown() 316 if err := future.Error(); err != nil { 317 s.logger.Printf("[WARN] nomad: Error shutting down raft: %s", err) 318 } 319 if s.raftStore != nil { 320 s.raftStore.Close() 321 } 322 } 323 324 // Shutdown the RPC listener 325 if s.rpcListener != nil { 326 s.rpcListener.Close() 327 } 328 329 // Close the connection pool 330 s.connPool.Shutdown() 331 332 // Close the fsm 333 if s.fsm != nil { 334 s.fsm.Close() 335 } 336 337 // Stop Vault token renewal 338 if s.vault != nil { 339 s.vault.Stop() 340 } 341 342 return nil 343 } 344 345 // IsShutdown checks if the server is shutdown 346 func (s *Server) IsShutdown() bool { 347 select { 348 case <-s.shutdownCh: 349 return true 350 default: 351 return false 352 } 353 } 354 355 // Leave is used to prepare for a graceful shutdown of the server 356 func (s *Server) Leave() error { 357 s.logger.Printf("[INFO] nomad: server starting leave") 358 s.left = true 359 360 // Check the number of known peers 361 numPeers, err := s.numOtherPeers() 362 if err != nil { 363 s.logger.Printf("[ERR] nomad: failed to check raft peers: %v", err) 364 return err 365 } 366 367 // If we are the current leader, and we have any other peers (cluster has multiple 368 // servers), we should do a RemovePeer to safely reduce the quorum size. If we are 369 // not the leader, then we should issue our leave intention and wait to be removed 370 // for some sane period of time. 371 isLeader := s.IsLeader() 372 if isLeader && numPeers > 0 { 373 future := s.raft.RemovePeer(s.raftTransport.LocalAddr()) 374 if err := future.Error(); err != nil && err != raft.ErrUnknownPeer { 375 s.logger.Printf("[ERR] nomad: failed to remove ourself as raft peer: %v", err) 376 } 377 } 378 379 // Leave the gossip pool 380 if s.serf != nil { 381 if err := s.serf.Leave(); err != nil { 382 s.logger.Printf("[ERR] nomad: failed to leave Serf cluster: %v", err) 383 } 384 } 385 386 // If we were not leader, wait to be safely removed from the cluster. 387 // We must wait to allow the raft replication to take place, otherwise 388 // an immediate shutdown could cause a loss of quorum. 389 if !isLeader { 390 limit := time.Now().Add(raftRemoveGracePeriod) 391 for numPeers > 0 && time.Now().Before(limit) { 392 // Update the number of peers 393 numPeers, err = s.numOtherPeers() 394 if err != nil { 395 s.logger.Printf("[ERR] nomad: failed to check raft peers: %v", err) 396 break 397 } 398 399 // Avoid the sleep if we are done 400 if numPeers == 0 { 401 break 402 } 403 404 // Sleep a while and check again 405 time.Sleep(50 * time.Millisecond) 406 } 407 if numPeers != 0 { 408 s.logger.Printf("[WARN] nomad: failed to leave raft peer set gracefully, timeout") 409 } 410 } 411 return nil 412 } 413 414 // setupBootstrapHandler() creates the closure necessary to support a Consul 415 // fallback handler. 416 func (s *Server) setupBootstrapHandler() error { 417 // peersTimeout is used to indicate to the Consul Syncer that the 418 // current Nomad Server has a stale peer set. peersTimeout will time 419 // out if the Consul Syncer bootstrapFn has not observed a Raft 420 // leader in maxStaleLeadership. If peersTimeout has been triggered, 421 // the Consul Syncer will begin querying Consul for other Nomad 422 // Servers. 423 // 424 // NOTE: time.Timer is used vs time.Time in order to handle clock 425 // drift because time.Timer is implemented as a monotonic clock. 426 var peersTimeout *time.Timer = time.NewTimer(0) 427 428 // consulQueryCount is the number of times the bootstrapFn has been 429 // called, regardless of success. 430 var consulQueryCount uint64 431 432 // leadershipTimedOut is a helper method that returns true if the 433 // peersTimeout timer has expired. 434 leadershipTimedOut := func() bool { 435 select { 436 case <-peersTimeout.C: 437 return true 438 default: 439 return false 440 } 441 } 442 443 // The bootstrapFn callback handler is used to periodically poll 444 // Consul to look up the Nomad Servers in Consul. In the event the 445 // server has been brought up without a `retry-join` configuration 446 // and this Server is partitioned from the rest of the cluster, 447 // periodically poll Consul to reattach this Server to other servers 448 // in the same region and automatically reform a quorum (assuming the 449 // correct number of servers required for quorum are present). 450 bootstrapFn := func() error { 451 // If there is a raft leader, do nothing 452 if s.raft.Leader() != "" { 453 peersTimeout.Reset(maxStaleLeadership) 454 return nil 455 } 456 457 // (ab)use serf.go's behavior of setting BootstrapExpect to 458 // zero if we have bootstrapped. If we have bootstrapped 459 bootstrapExpect := atomic.LoadInt32(&s.config.BootstrapExpect) 460 if bootstrapExpect == 0 { 461 // This Nomad Server has been bootstrapped. Rely on 462 // the peersTimeout firing as a guard to prevent 463 // aggressive querying of Consul. 464 if !leadershipTimedOut() { 465 return nil 466 } 467 } else { 468 if consulQueryCount > 0 && !leadershipTimedOut() { 469 return nil 470 } 471 472 // This Nomad Server has not been bootstrapped, reach 473 // out to Consul if our peer list is less than 474 // `bootstrap_expect`. 475 raftPeers, err := s.raftPeers.Peers() 476 if err != nil { 477 peersTimeout.Reset(peersPollInterval + lib.RandomStagger(peersPollInterval/peersPollJitterFactor)) 478 return nil 479 } 480 481 // The necessary number of Nomad Servers required for 482 // quorum has been reached, we do not need to poll 483 // Consul. Let the normal timeout-based strategy 484 // take over. 485 if len(raftPeers) >= int(bootstrapExpect) { 486 peersTimeout.Reset(peersPollInterval + lib.RandomStagger(peersPollInterval/peersPollJitterFactor)) 487 return nil 488 } 489 } 490 consulQueryCount++ 491 492 s.logger.Printf("[DEBUG] server.consul: lost contact with Nomad quorum, falling back to Consul for server list") 493 494 consulCatalog := s.consulSyncer.ConsulClient().Catalog() 495 dcs, err := consulCatalog.Datacenters() 496 if err != nil { 497 peersTimeout.Reset(peersPollInterval + lib.RandomStagger(peersPollInterval/peersPollJitterFactor)) 498 return fmt.Errorf("server.consul: unable to query Consul datacenters: %v", err) 499 } 500 if len(dcs) > 2 { 501 // Query the local DC first, then shuffle the 502 // remaining DCs. If additional calls to bootstrapFn 503 // are necessary, this Nomad Server will eventually 504 // walk all datacenter until it finds enough hosts to 505 // form a quorum. 506 shuffleStrings(dcs[1:]) 507 dcs = dcs[0:lib.MinInt(len(dcs), datacenterQueryLimit)] 508 } 509 510 nomadServerServiceName := s.config.ConsulConfig.ServerServiceName 511 var mErr multierror.Error 512 const defaultMaxNumNomadServers = 8 513 nomadServerServices := make([]string, 0, defaultMaxNumNomadServers) 514 localNode := s.serf.Memberlist().LocalNode() 515 for _, dc := range dcs { 516 consulOpts := &consulapi.QueryOptions{ 517 AllowStale: true, 518 Datacenter: dc, 519 Near: "_agent", 520 WaitTime: consul.DefaultQueryWaitDuration, 521 } 522 consulServices, _, err := consulCatalog.Service(nomadServerServiceName, consul.ServiceTagSerf, consulOpts) 523 if err != nil { 524 err := fmt.Errorf("failed to query service %q in Consul datacenter %q: %v", nomadServerServiceName, dc, err) 525 s.logger.Printf("[WARN] server.consul: %v", err) 526 mErr.Errors = append(mErr.Errors, err) 527 continue 528 } 529 530 for _, cs := range consulServices { 531 port := strconv.FormatInt(int64(cs.ServicePort), 10) 532 addr := cs.ServiceAddress 533 if addr == "" { 534 addr = cs.Address 535 } 536 if localNode.Addr.String() == addr && int(localNode.Port) == cs.ServicePort { 537 continue 538 } 539 serverAddr := net.JoinHostPort(addr, port) 540 nomadServerServices = append(nomadServerServices, serverAddr) 541 } 542 } 543 544 if len(nomadServerServices) == 0 { 545 if len(mErr.Errors) > 0 { 546 peersTimeout.Reset(peersPollInterval + lib.RandomStagger(peersPollInterval/peersPollJitterFactor)) 547 return mErr.ErrorOrNil() 548 } 549 550 // Log the error and return nil so future handlers 551 // can attempt to register the `nomad` service. 552 pollInterval := peersPollInterval + lib.RandomStagger(peersPollInterval/peersPollJitterFactor) 553 s.logger.Printf("[TRACE] server.consul: no Nomad Servers advertising service %+q in Consul datacenters %+q, sleeping for %v", nomadServerServiceName, dcs, pollInterval) 554 peersTimeout.Reset(pollInterval) 555 return nil 556 } 557 558 numServersContacted, err := s.Join(nomadServerServices) 559 if err != nil { 560 peersTimeout.Reset(peersPollInterval + lib.RandomStagger(peersPollInterval/peersPollJitterFactor)) 561 return fmt.Errorf("contacted %d Nomad Servers: %v", numServersContacted, err) 562 } 563 564 peersTimeout.Reset(maxStaleLeadership) 565 s.logger.Printf("[INFO] server.consul: successfully contacted %d Nomad Servers", numServersContacted) 566 567 return nil 568 } 569 570 s.consulSyncer.AddPeriodicHandler("Nomad Server Fallback Server Handler", bootstrapFn) 571 return nil 572 } 573 574 // setupConsulSyncer creates Server-mode consul.Syncer which periodically 575 // executes callbacks on a fixed interval. 576 func (s *Server) setupConsulSyncer() error { 577 if s.config.ConsulConfig.ServerAutoJoin { 578 if err := s.setupBootstrapHandler(); err != nil { 579 return err 580 } 581 } 582 583 return nil 584 } 585 586 // setupVaultClient is used to set up the Vault API client. 587 func (s *Server) setupVaultClient() error { 588 v, err := NewVaultClient(s.config.VaultConfig, s.logger, s.purgeVaultAccessors) 589 if err != nil { 590 return err 591 } 592 s.vault = v 593 return nil 594 } 595 596 // setupRPC is used to setup the RPC listener 597 func (s *Server) setupRPC(tlsWrap tlsutil.RegionWrapper) error { 598 // Create endpoints 599 s.endpoints.Status = &Status{s} 600 s.endpoints.Node = &Node{srv: s} 601 s.endpoints.Job = &Job{s} 602 s.endpoints.Eval = &Eval{s} 603 s.endpoints.Plan = &Plan{s} 604 s.endpoints.Alloc = &Alloc{s} 605 s.endpoints.Region = &Region{s} 606 s.endpoints.Periodic = &Periodic{s} 607 s.endpoints.System = &System{s} 608 609 // Register the handlers 610 s.rpcServer.Register(s.endpoints.Status) 611 s.rpcServer.Register(s.endpoints.Node) 612 s.rpcServer.Register(s.endpoints.Job) 613 s.rpcServer.Register(s.endpoints.Eval) 614 s.rpcServer.Register(s.endpoints.Plan) 615 s.rpcServer.Register(s.endpoints.Alloc) 616 s.rpcServer.Register(s.endpoints.Region) 617 s.rpcServer.Register(s.endpoints.Periodic) 618 s.rpcServer.Register(s.endpoints.System) 619 620 list, err := net.ListenTCP("tcp", s.config.RPCAddr) 621 if err != nil { 622 return err 623 } 624 s.rpcListener = list 625 626 if s.config.RPCAdvertise != nil { 627 s.rpcAdvertise = s.config.RPCAdvertise 628 } else { 629 s.rpcAdvertise = s.rpcListener.Addr() 630 } 631 632 // Verify that we have a usable advertise address 633 addr, ok := s.rpcAdvertise.(*net.TCPAddr) 634 if !ok { 635 list.Close() 636 return fmt.Errorf("RPC advertise address is not a TCP Address: %v", addr) 637 } 638 if addr.IP.IsUnspecified() { 639 list.Close() 640 return fmt.Errorf("RPC advertise address is not advertisable: %v", addr) 641 } 642 643 wrapper := tlsutil.RegionSpecificWrapper(s.config.Region, tlsWrap) 644 s.raftLayer = NewRaftLayer(s.rpcAdvertise, wrapper) 645 return nil 646 } 647 648 // setupRaft is used to setup and initialize Raft 649 func (s *Server) setupRaft() error { 650 // If we are in bootstrap mode, enable a single node cluster 651 if s.config.Bootstrap || (s.config.DevMode && !s.config.DevDisableBootstrap) { 652 s.config.RaftConfig.EnableSingleNode = true 653 } 654 655 // Create the FSM 656 var err error 657 s.fsm, err = NewFSM(s.evalBroker, s.periodicDispatcher, s.blockedEvals, s.config.LogOutput) 658 if err != nil { 659 return err 660 } 661 662 // Create a transport layer 663 trans := raft.NewNetworkTransport(s.raftLayer, 3, s.config.RaftTimeout, 664 s.config.LogOutput) 665 s.raftTransport = trans 666 667 // Create the backend raft store for logs and stable storage 668 var log raft.LogStore 669 var stable raft.StableStore 670 var snap raft.SnapshotStore 671 var peers raft.PeerStore 672 if s.config.DevMode { 673 store := raft.NewInmemStore() 674 s.raftInmem = store 675 stable = store 676 log = store 677 snap = raft.NewDiscardSnapshotStore() 678 peers = &raft.StaticPeers{} 679 s.raftPeers = peers 680 681 } else { 682 // Create the base raft path 683 path := filepath.Join(s.config.DataDir, raftState) 684 if err := ensurePath(path, true); err != nil { 685 return err 686 } 687 688 // Create the BoltDB backend 689 store, err := raftboltdb.NewBoltStore(filepath.Join(path, "raft.db")) 690 if err != nil { 691 return err 692 } 693 s.raftStore = store 694 stable = store 695 696 // Wrap the store in a LogCache to improve performance 697 cacheStore, err := raft.NewLogCache(raftLogCacheSize, store) 698 if err != nil { 699 store.Close() 700 return err 701 } 702 log = cacheStore 703 704 // Create the snapshot store 705 snapshots, err := raft.NewFileSnapshotStore(path, snapshotsRetained, s.config.LogOutput) 706 if err != nil { 707 if s.raftStore != nil { 708 s.raftStore.Close() 709 } 710 return err 711 } 712 snap = snapshots 713 714 // Setup the peer store 715 s.raftPeers = raft.NewJSONPeers(path, trans) 716 peers = s.raftPeers 717 } 718 719 // Ensure local host is always included if we are in bootstrap mode 720 if s.config.RaftConfig.EnableSingleNode { 721 p, err := peers.Peers() 722 if err != nil { 723 if s.raftStore != nil { 724 s.raftStore.Close() 725 } 726 return err 727 } 728 if !raft.PeerContained(p, trans.LocalAddr()) { 729 peers.SetPeers(raft.AddUniquePeer(p, trans.LocalAddr())) 730 } 731 } 732 733 // Make sure we set the LogOutput 734 s.config.RaftConfig.LogOutput = s.config.LogOutput 735 736 // Setup the leader channel 737 leaderCh := make(chan bool, 1) 738 s.config.RaftConfig.NotifyCh = leaderCh 739 s.leaderCh = leaderCh 740 741 // Setup the Raft store 742 s.raft, err = raft.NewRaft(s.config.RaftConfig, s.fsm, log, stable, 743 snap, peers, trans) 744 if err != nil { 745 if s.raftStore != nil { 746 s.raftStore.Close() 747 } 748 trans.Close() 749 return err 750 } 751 return nil 752 } 753 754 // setupSerf is used to setup and initialize a Serf 755 func (s *Server) setupSerf(conf *serf.Config, ch chan serf.Event, path string) (*serf.Serf, error) { 756 conf.Init() 757 conf.NodeName = fmt.Sprintf("%s.%s", s.config.NodeName, s.config.Region) 758 conf.Tags["role"] = "nomad" 759 conf.Tags["region"] = s.config.Region 760 conf.Tags["dc"] = s.config.Datacenter 761 conf.Tags["vsn"] = fmt.Sprintf("%d", structs.ApiMajorVersion) 762 conf.Tags["mvn"] = fmt.Sprintf("%d", structs.ApiMinorVersion) 763 conf.Tags["build"] = s.config.Build 764 conf.Tags["port"] = fmt.Sprintf("%d", s.rpcAdvertise.(*net.TCPAddr).Port) 765 if s.config.Bootstrap || (s.config.DevMode && !s.config.DevDisableBootstrap) { 766 conf.Tags["bootstrap"] = "1" 767 } 768 bootstrapExpect := atomic.LoadInt32(&s.config.BootstrapExpect) 769 if bootstrapExpect != 0 { 770 conf.Tags["expect"] = fmt.Sprintf("%d", bootstrapExpect) 771 } 772 conf.MemberlistConfig.LogOutput = s.config.LogOutput 773 conf.LogOutput = s.config.LogOutput 774 conf.EventCh = ch 775 if !s.config.DevMode { 776 conf.SnapshotPath = filepath.Join(s.config.DataDir, path) 777 if err := ensurePath(conf.SnapshotPath, false); err != nil { 778 return nil, err 779 } 780 } 781 conf.ProtocolVersion = protocolVersionMap[s.config.ProtocolVersion] 782 conf.RejoinAfterLeave = true 783 conf.Merge = &serfMergeDelegate{} 784 785 // Until Nomad supports this fully, we disable automatic resolution. 786 // When enabled, the Serf gossip may just turn off if we are the minority 787 // node which is rather unexpected. 788 conf.EnableNameConflictResolution = false 789 return serf.Create(conf) 790 } 791 792 // setupWorkers is used to start the scheduling workers 793 func (s *Server) setupWorkers() error { 794 // Check if all the schedulers are disabled 795 if len(s.config.EnabledSchedulers) == 0 || s.config.NumSchedulers == 0 { 796 s.logger.Printf("[WARN] nomad: no enabled schedulers") 797 return nil 798 } 799 800 // Start the workers 801 for i := 0; i < s.config.NumSchedulers; i++ { 802 if w, err := NewWorker(s); err != nil { 803 return err 804 } else { 805 s.workers = append(s.workers, w) 806 } 807 } 808 s.logger.Printf("[INFO] nomad: starting %d scheduling worker(s) for %v", 809 s.config.NumSchedulers, s.config.EnabledSchedulers) 810 return nil 811 } 812 813 // numOtherPeers is used to check on the number of known peers 814 // excluding the local node 815 func (s *Server) numOtherPeers() (int, error) { 816 peers, err := s.raftPeers.Peers() 817 if err != nil { 818 return 0, err 819 } 820 otherPeers := raft.ExcludePeer(peers, s.raftTransport.LocalAddr()) 821 return len(otherPeers), nil 822 } 823 824 // IsLeader checks if this server is the cluster leader 825 func (s *Server) IsLeader() bool { 826 return s.raft.State() == raft.Leader 827 } 828 829 // Join is used to have Nomad join the gossip ring 830 // The target address should be another node listening on the 831 // Serf address 832 func (s *Server) Join(addrs []string) (int, error) { 833 return s.serf.Join(addrs, true) 834 } 835 836 // LocalMember is used to return the local node 837 func (c *Server) LocalMember() serf.Member { 838 return c.serf.LocalMember() 839 } 840 841 // Members is used to return the members of the serf cluster 842 func (s *Server) Members() []serf.Member { 843 return s.serf.Members() 844 } 845 846 // RemoveFailedNode is used to remove a failed node from the cluster 847 func (s *Server) RemoveFailedNode(node string) error { 848 return s.serf.RemoveFailedNode(node) 849 } 850 851 // KeyManager returns the Serf keyring manager 852 func (s *Server) KeyManager() *serf.KeyManager { 853 return s.serf.KeyManager() 854 } 855 856 // Encrypted determines if gossip is encrypted 857 func (s *Server) Encrypted() bool { 858 return s.serf.EncryptionEnabled() 859 } 860 861 // State returns the underlying state store. This should *not* 862 // be used to modify state directly. 863 func (s *Server) State() *state.StateStore { 864 return s.fsm.State() 865 } 866 867 // Regions returns the known regions in the cluster. 868 func (s *Server) Regions() []string { 869 s.peerLock.RLock() 870 defer s.peerLock.RUnlock() 871 872 regions := make([]string, 0, len(s.peers)) 873 for region, _ := range s.peers { 874 regions = append(regions, region) 875 } 876 sort.Strings(regions) 877 return regions 878 } 879 880 // inmemCodec is used to do an RPC call without going over a network 881 type inmemCodec struct { 882 method string 883 args interface{} 884 reply interface{} 885 err error 886 } 887 888 func (i *inmemCodec) ReadRequestHeader(req *rpc.Request) error { 889 req.ServiceMethod = i.method 890 return nil 891 } 892 893 func (i *inmemCodec) ReadRequestBody(args interface{}) error { 894 sourceValue := reflect.Indirect(reflect.Indirect(reflect.ValueOf(i.args))) 895 dst := reflect.Indirect(reflect.Indirect(reflect.ValueOf(args))) 896 dst.Set(sourceValue) 897 return nil 898 } 899 900 func (i *inmemCodec) WriteResponse(resp *rpc.Response, reply interface{}) error { 901 if resp.Error != "" { 902 i.err = errors.New(resp.Error) 903 return nil 904 } 905 sourceValue := reflect.Indirect(reflect.Indirect(reflect.ValueOf(reply))) 906 dst := reflect.Indirect(reflect.Indirect(reflect.ValueOf(i.reply))) 907 dst.Set(sourceValue) 908 return nil 909 } 910 911 func (i *inmemCodec) Close() error { 912 return nil 913 } 914 915 // RPC is used to make a local RPC call 916 func (s *Server) RPC(method string, args interface{}, reply interface{}) error { 917 codec := &inmemCodec{ 918 method: method, 919 args: args, 920 reply: reply, 921 } 922 if err := s.rpcServer.ServeRequest(codec); err != nil { 923 return err 924 } 925 return codec.err 926 } 927 928 // Stats is used to return statistics for debugging and insight 929 // for various sub-systems 930 func (s *Server) Stats() map[string]map[string]string { 931 toString := func(v uint64) string { 932 return strconv.FormatUint(v, 10) 933 } 934 stats := map[string]map[string]string{ 935 "nomad": map[string]string{ 936 "server": "true", 937 "leader": fmt.Sprintf("%v", s.IsLeader()), 938 "leader_addr": s.raft.Leader(), 939 "bootstrap": fmt.Sprintf("%v", s.config.Bootstrap), 940 "known_regions": toString(uint64(len(s.peers))), 941 }, 942 "raft": s.raft.Stats(), 943 "serf": s.serf.Stats(), 944 "runtime": RuntimeStats(), 945 } 946 if peers, err := s.raftPeers.Peers(); err == nil { 947 stats["raft"]["raft_peers"] = strings.Join(peers, ",") 948 } else { 949 s.logger.Printf("[DEBUG] server: error getting raft peers: %v", err) 950 } 951 return stats 952 } 953 954 // Region retuns the region of the server 955 func (s *Server) Region() string { 956 return s.config.Region 957 } 958 959 // Datacenter returns the data center of the server 960 func (s *Server) Datacenter() string { 961 return s.config.Datacenter 962 } 963 964 // GetConfig returns the config of the server for testing purposes only 965 func (s *Server) GetConfig() *Config { 966 return s.config 967 }