gopkg.in/hashicorp/nomad.v0@v0.11.8/nomad/leader.go (about) 1 package nomad 2 3 import ( 4 "bytes" 5 "context" 6 "fmt" 7 "math/rand" 8 "net" 9 "strings" 10 "sync" 11 "time" 12 13 "golang.org/x/time/rate" 14 15 metrics "github.com/armon/go-metrics" 16 log "github.com/hashicorp/go-hclog" 17 memdb "github.com/hashicorp/go-memdb" 18 version "github.com/hashicorp/go-version" 19 "github.com/hashicorp/nomad/helper/uuid" 20 "github.com/hashicorp/nomad/nomad/state" 21 "github.com/hashicorp/nomad/nomad/structs" 22 "github.com/hashicorp/raft" 23 "github.com/hashicorp/serf/serf" 24 "github.com/pkg/errors" 25 ) 26 27 const ( 28 // failedEvalUnblockInterval is the interval at which failed evaluations are 29 // unblocked to re-enter the scheduler. A failed evaluation occurs under 30 // high contention when the schedulers plan does not make progress. 31 failedEvalUnblockInterval = 1 * time.Minute 32 33 // replicationRateLimit is used to rate limit how often data is replicated 34 // between the authoritative region and the local region 35 replicationRateLimit rate.Limit = 10.0 36 37 // barrierWriteTimeout is used to give Raft a chance to process a 38 // possible loss of leadership event if we are unable to get a barrier 39 // while leader. 40 barrierWriteTimeout = 2 * time.Minute 41 ) 42 43 var minAutopilotVersion = version.Must(version.NewVersion("0.8.0")) 44 45 var minSchedulerConfigVersion = version.Must(version.NewVersion("0.9.0")) 46 47 var minClusterIDVersion = version.Must(version.NewVersion("0.10.4")) 48 49 // monitorLeadership is used to monitor if we acquire or lose our role 50 // as the leader in the Raft cluster. There is some work the leader is 51 // expected to do, so we must react to changes 52 func (s *Server) monitorLeadership() { 53 var weAreLeaderCh chan struct{} 54 var leaderLoop sync.WaitGroup 55 56 leaderCh := s.raft.LeaderCh() 57 58 leaderStep := func(isLeader bool) { 59 if isLeader { 60 if weAreLeaderCh != nil { 61 s.logger.Error("attempted to start the leader loop while running") 62 return 63 } 64 65 weAreLeaderCh = make(chan struct{}) 66 leaderLoop.Add(1) 67 go func(ch chan struct{}) { 68 defer leaderLoop.Done() 69 s.leaderLoop(ch) 70 }(weAreLeaderCh) 71 s.logger.Info("cluster leadership acquired") 72 return 73 } 74 75 if weAreLeaderCh == nil { 76 s.logger.Error("attempted to stop the leader loop while not running") 77 return 78 } 79 80 s.logger.Debug("shutting down leader loop") 81 close(weAreLeaderCh) 82 leaderLoop.Wait() 83 weAreLeaderCh = nil 84 s.logger.Info("cluster leadership lost") 85 } 86 87 wasLeader := false 88 for { 89 select { 90 case isLeader := <-leaderCh: 91 if wasLeader != isLeader { 92 wasLeader = isLeader 93 // normal case where we went through a transition 94 leaderStep(isLeader) 95 } else if wasLeader && isLeader { 96 // Server lost but then gained leadership immediately. 97 // During this time, this server may have received 98 // Raft transitions that haven't been applied to the FSM 99 // yet. 100 // Ensure that that FSM caught up and eval queues are refreshed 101 s.logger.Warn("cluster leadership lost and gained leadership immediately. Could indicate network issues, memory paging, or high CPU load.") 102 103 leaderStep(false) 104 leaderStep(true) 105 } else { 106 // Server gained but lost leadership immediately 107 // before it reacted; nothing to do, move on 108 s.logger.Warn("cluster leadership gained and lost leadership immediately. Could indicate network issues, memory paging, or high CPU load.") 109 } 110 case <-s.shutdownCh: 111 if weAreLeaderCh != nil { 112 leaderStep(false) 113 } 114 return 115 } 116 } 117 } 118 119 // leaderLoop runs as long as we are the leader to run various 120 // maintenance activities 121 func (s *Server) leaderLoop(stopCh chan struct{}) { 122 var reconcileCh chan serf.Member 123 establishedLeader := false 124 125 RECONCILE: 126 // Setup a reconciliation timer 127 reconcileCh = nil 128 interval := time.After(s.config.ReconcileInterval) 129 130 // Apply a raft barrier to ensure our FSM is caught up 131 start := time.Now() 132 barrier := s.raft.Barrier(barrierWriteTimeout) 133 if err := barrier.Error(); err != nil { 134 s.logger.Error("failed to wait for barrier", "error", err) 135 goto WAIT 136 } 137 metrics.MeasureSince([]string{"nomad", "leader", "barrier"}, start) 138 139 // Check if we need to handle initial leadership actions 140 if !establishedLeader { 141 if err := s.establishLeadership(stopCh); err != nil { 142 s.logger.Error("failed to establish leadership", "error", err) 143 144 // Immediately revoke leadership since we didn't successfully 145 // establish leadership. 146 if err := s.revokeLeadership(); err != nil { 147 s.logger.Error("failed to revoke leadership", "error", err) 148 } 149 150 goto WAIT 151 } 152 153 establishedLeader = true 154 defer func() { 155 if err := s.revokeLeadership(); err != nil { 156 s.logger.Error("failed to revoke leadership", "error", err) 157 } 158 }() 159 } 160 161 // Reconcile any missing data 162 if err := s.reconcile(); err != nil { 163 s.logger.Error("failed to reconcile", "error", err) 164 goto WAIT 165 } 166 167 // Initial reconcile worked, now we can process the channel 168 // updates 169 reconcileCh = s.reconcileCh 170 171 // Poll the stop channel to give it priority so we don't waste time 172 // trying to perform the other operations if we have been asked to shut 173 // down. 174 select { 175 case <-stopCh: 176 return 177 default: 178 } 179 180 WAIT: 181 // Wait until leadership is lost 182 for { 183 select { 184 case <-stopCh: 185 return 186 case <-s.shutdownCh: 187 return 188 case <-interval: 189 goto RECONCILE 190 case member := <-reconcileCh: 191 s.reconcileMember(member) 192 } 193 } 194 } 195 196 // establishLeadership is invoked once we become leader and are able 197 // to invoke an initial barrier. The barrier is used to ensure any 198 // previously inflight transactions have been committed and that our 199 // state is up-to-date. 200 func (s *Server) establishLeadership(stopCh chan struct{}) error { 201 defer metrics.MeasureSince([]string{"nomad", "leader", "establish_leadership"}, time.Now()) 202 203 // Generate a leader ACL token. This will allow the leader to issue work 204 // that requires a valid ACL token. 205 s.setLeaderAcl(uuid.Generate()) 206 207 // Disable workers to free half the cores for use in the plan queue and 208 // evaluation broker 209 for _, w := range s.pausableWorkers() { 210 w.SetPause(true) 211 } 212 213 // Initialize and start the autopilot routine 214 s.getOrCreateAutopilotConfig() 215 s.autopilot.Start() 216 217 // Initialize scheduler configuration 218 s.getOrCreateSchedulerConfig() 219 220 // Initialize the ClusterID 221 _, _ = s.ClusterID() 222 // todo: use cluster ID for stuff, later! 223 224 // Enable the plan queue, since we are now the leader 225 s.planQueue.SetEnabled(true) 226 227 // Start the plan evaluator 228 go s.planApply() 229 230 // Enable the eval broker, since we are now the leader 231 s.evalBroker.SetEnabled(true) 232 233 // Enable the blocked eval tracker, since we are now the leader 234 s.blockedEvals.SetEnabled(true) 235 s.blockedEvals.SetTimetable(s.fsm.TimeTable()) 236 237 // Enable the deployment watcher, since we are now the leader 238 s.deploymentWatcher.SetEnabled(true, s.State()) 239 240 // Enable the NodeDrainer 241 s.nodeDrainer.SetEnabled(true, s.State()) 242 243 // Enable the volume watcher, since we are now the leader 244 s.volumeWatcher.SetEnabled(true, s.State()) 245 246 // Restore the eval broker state 247 if err := s.restoreEvals(); err != nil { 248 return err 249 } 250 251 // Activate the vault client 252 s.vault.SetActive(true) 253 254 // Enable the periodic dispatcher, since we are now the leader. 255 s.periodicDispatcher.SetEnabled(true) 256 257 // Activate RPC now that local FSM caught up with Raft (as evident by Barrier call success) 258 // and all leader related components (e.g. broker queue) are enabled. 259 // Auxiliary processes (e.g. background, bookkeeping, and cleanup tasks can start after) 260 s.setConsistentReadReady() 261 262 // Further clean ups and follow up that don't block RPC consistency 263 264 // Restore the periodic dispatcher state 265 if err := s.restorePeriodicDispatcher(); err != nil { 266 return err 267 } 268 269 // Scheduler periodic jobs 270 go s.schedulePeriodic(stopCh) 271 272 // Reap any failed evaluations 273 go s.reapFailedEvaluations(stopCh) 274 275 // Reap any duplicate blocked evaluations 276 go s.reapDupBlockedEvaluations(stopCh) 277 278 // Periodically unblock failed allocations 279 go s.periodicUnblockFailedEvals(stopCh) 280 281 // Periodically publish job summary metrics 282 go s.publishJobSummaryMetrics(stopCh) 283 284 // Periodically publish job status metrics 285 go s.publishJobStatusMetrics(stopCh) 286 287 // Setup the heartbeat timers. This is done both when starting up or when 288 // a leader fail over happens. Since the timers are maintained by the leader 289 // node, effectively this means all the timers are renewed at the time of failover. 290 // The TTL contract is that the session will not be expired before the TTL, 291 // so expiring it later is allowable. 292 // 293 // This MUST be done after the initial barrier to ensure the latest Nodes 294 // are available to be initialized. Otherwise initialization may use stale 295 // data. 296 if err := s.initializeHeartbeatTimers(); err != nil { 297 s.logger.Error("heartbeat timer setup failed", "error", err) 298 return err 299 } 300 301 // Start replication of ACLs and Policies if they are enabled, 302 // and we are not the authoritative region. 303 if s.config.ACLEnabled && s.config.Region != s.config.AuthoritativeRegion { 304 go s.replicateACLPolicies(stopCh) 305 go s.replicateACLTokens(stopCh) 306 } 307 308 // Setup any enterprise systems required. 309 if err := s.establishEnterpriseLeadership(stopCh); err != nil { 310 return err 311 } 312 313 // Cleanup orphaned Vault token accessors 314 if err := s.revokeVaultAccessorsOnRestore(); err != nil { 315 return err 316 } 317 318 // Cleanup orphaned Service Identity token accessors 319 if err := s.revokeSITokenAccessorsOnRestore(); err != nil { 320 return err 321 } 322 323 return nil 324 } 325 326 // restoreEvals is used to restore pending evaluations into the eval broker and 327 // blocked evaluations into the blocked eval tracker. The broker and blocked 328 // eval tracker is maintained only by the leader, so it must be restored anytime 329 // a leadership transition takes place. 330 func (s *Server) restoreEvals() error { 331 // Get an iterator over every evaluation 332 ws := memdb.NewWatchSet() 333 iter, err := s.fsm.State().Evals(ws) 334 if err != nil { 335 return fmt.Errorf("failed to get evaluations: %v", err) 336 } 337 338 for { 339 raw := iter.Next() 340 if raw == nil { 341 break 342 } 343 eval := raw.(*structs.Evaluation) 344 345 if eval.ShouldEnqueue() { 346 s.evalBroker.Enqueue(eval) 347 } else if eval.ShouldBlock() { 348 s.blockedEvals.Block(eval) 349 } 350 } 351 return nil 352 } 353 354 // revokeVaultAccessorsOnRestore is used to restore Vault accessors that should be 355 // revoked. 356 func (s *Server) revokeVaultAccessorsOnRestore() error { 357 // An accessor should be revoked if its allocation or node is terminal 358 ws := memdb.NewWatchSet() 359 state := s.fsm.State() 360 iter, err := state.VaultAccessors(ws) 361 if err != nil { 362 return fmt.Errorf("failed to get vault accessors: %v", err) 363 } 364 365 var revoke []*structs.VaultAccessor 366 for { 367 raw := iter.Next() 368 if raw == nil { 369 break 370 } 371 372 va := raw.(*structs.VaultAccessor) 373 374 // Check the allocation 375 alloc, err := state.AllocByID(ws, va.AllocID) 376 if err != nil { 377 return fmt.Errorf("failed to lookup allocation %q: %v", va.AllocID, err) 378 } 379 if alloc == nil || alloc.Terminated() { 380 // No longer running and should be revoked 381 revoke = append(revoke, va) 382 continue 383 } 384 385 // Check the node 386 node, err := state.NodeByID(ws, va.NodeID) 387 if err != nil { 388 return fmt.Errorf("failed to lookup node %q: %v", va.NodeID, err) 389 } 390 if node == nil || node.TerminalStatus() { 391 // Node is terminal so any accessor from it should be revoked 392 revoke = append(revoke, va) 393 continue 394 } 395 } 396 397 if len(revoke) != 0 { 398 s.logger.Info("revoking vault accessors after becoming leader", "accessors", len(revoke)) 399 400 if err := s.vault.MarkForRevocation(revoke); err != nil { 401 return fmt.Errorf("failed to revoke tokens: %v", err) 402 } 403 } 404 405 return nil 406 } 407 408 // revokeSITokenAccessorsOnRestore is used to revoke Service Identity token 409 // accessors on behalf of allocs that are now gone / terminal. 410 func (s *Server) revokeSITokenAccessorsOnRestore() error { 411 ws := memdb.NewWatchSet() 412 fsmState := s.fsm.State() 413 iter, err := fsmState.SITokenAccessors(ws) 414 if err != nil { 415 return errors.Wrap(err, "failed to get SI token accessors") 416 } 417 418 var toRevoke []*structs.SITokenAccessor 419 for raw := iter.Next(); raw != nil; raw = iter.Next() { 420 accessor := raw.(*structs.SITokenAccessor) 421 422 // Check the allocation 423 alloc, err := fsmState.AllocByID(ws, accessor.AllocID) 424 if err != nil { 425 return errors.Wrapf(err, "failed to lookup alloc %q", accessor.AllocID) 426 } 427 if alloc == nil || alloc.Terminated() { 428 // no longer running and associated accessors should be revoked 429 toRevoke = append(toRevoke, accessor) 430 continue 431 } 432 433 // Check the node 434 node, err := fsmState.NodeByID(ws, accessor.NodeID) 435 if err != nil { 436 return errors.Wrapf(err, "failed to lookup node %q", accessor.NodeID) 437 } 438 if node == nil || node.TerminalStatus() { 439 // node is terminal and associated accessors should be revoked 440 toRevoke = append(toRevoke, accessor) 441 continue 442 } 443 } 444 445 if len(toRevoke) > 0 { 446 s.logger.Info("revoking consul accessors after becoming leader", "accessors", len(toRevoke)) 447 s.consulACLs.MarkForRevocation(toRevoke) 448 } 449 450 return nil 451 } 452 453 // restorePeriodicDispatcher is used to restore all periodic jobs into the 454 // periodic dispatcher. It also determines if a periodic job should have been 455 // created during the leadership transition and force runs them. The periodic 456 // dispatcher is maintained only by the leader, so it must be restored anytime a 457 // leadership transition takes place. 458 func (s *Server) restorePeriodicDispatcher() error { 459 logger := s.logger.Named("periodic") 460 ws := memdb.NewWatchSet() 461 iter, err := s.fsm.State().JobsByPeriodic(ws, true) 462 if err != nil { 463 return fmt.Errorf("failed to get periodic jobs: %v", err) 464 } 465 466 now := time.Now() 467 for i := iter.Next(); i != nil; i = iter.Next() { 468 job := i.(*structs.Job) 469 470 // We skip adding parameterized jobs because they themselves aren't 471 // tracked, only the dispatched children are. 472 if job.IsParameterized() { 473 continue 474 } 475 476 if err := s.periodicDispatcher.Add(job); err != nil { 477 logger.Error("failed to add job to periodic dispatcher", "error", err) 478 continue 479 } 480 481 // We do not need to force run the job since it isn't active. 482 if !job.IsPeriodicActive() { 483 continue 484 } 485 486 // If the periodic job has never been launched before, launch will hold 487 // the time the periodic job was added. Otherwise it has the last launch 488 // time of the periodic job. 489 launch, err := s.fsm.State().PeriodicLaunchByID(ws, job.Namespace, job.ID) 490 if err != nil { 491 return fmt.Errorf("failed to get periodic launch time: %v", err) 492 } 493 if launch == nil { 494 return fmt.Errorf("no recorded periodic launch time for job %q in namespace %q", 495 job.ID, job.Namespace) 496 } 497 498 // nextLaunch is the next launch that should occur. 499 nextLaunch, err := job.Periodic.Next(launch.Launch.In(job.Periodic.GetLocation())) 500 if err != nil { 501 logger.Error("failed to determine next periodic launch for job", "job", job.NamespacedID(), "error", err) 502 continue 503 } 504 505 // We skip force launching the job if there should be no next launch 506 // (the zero case) or if the next launch time is in the future. If it is 507 // in the future, it will be handled by the periodic dispatcher. 508 if nextLaunch.IsZero() || !nextLaunch.Before(now) { 509 continue 510 } 511 512 if _, err := s.periodicDispatcher.ForceRun(job.Namespace, job.ID); err != nil { 513 logger.Error("force run of periodic job failed", "job", job.NamespacedID(), "error", err) 514 return fmt.Errorf("force run of periodic job %q failed: %v", job.NamespacedID(), err) 515 } 516 logger.Debug("periodic job force runned during leadership establishment", "job", job.NamespacedID()) 517 } 518 519 return nil 520 } 521 522 // schedulePeriodic is used to do periodic job dispatch while we are leader 523 func (s *Server) schedulePeriodic(stopCh chan struct{}) { 524 evalGC := time.NewTicker(s.config.EvalGCInterval) 525 defer evalGC.Stop() 526 nodeGC := time.NewTicker(s.config.NodeGCInterval) 527 defer nodeGC.Stop() 528 jobGC := time.NewTicker(s.config.JobGCInterval) 529 defer jobGC.Stop() 530 deploymentGC := time.NewTicker(s.config.DeploymentGCInterval) 531 defer deploymentGC.Stop() 532 csiPluginGC := time.NewTicker(s.config.CSIPluginGCInterval) 533 defer csiPluginGC.Stop() 534 csiVolumeClaimGC := time.NewTicker(s.config.CSIVolumeClaimGCInterval) 535 defer csiVolumeClaimGC.Stop() 536 537 // getLatest grabs the latest index from the state store. It returns true if 538 // the index was retrieved successfully. 539 getLatest := func() (uint64, bool) { 540 snapshotIndex, err := s.fsm.State().LatestIndex() 541 if err != nil { 542 s.logger.Error("failed to determine state store's index", "error", err) 543 return 0, false 544 } 545 546 return snapshotIndex, true 547 } 548 549 for { 550 551 select { 552 case <-evalGC.C: 553 if index, ok := getLatest(); ok { 554 s.evalBroker.Enqueue(s.coreJobEval(structs.CoreJobEvalGC, index)) 555 } 556 case <-nodeGC.C: 557 if index, ok := getLatest(); ok { 558 s.evalBroker.Enqueue(s.coreJobEval(structs.CoreJobNodeGC, index)) 559 } 560 case <-jobGC.C: 561 if index, ok := getLatest(); ok { 562 s.evalBroker.Enqueue(s.coreJobEval(structs.CoreJobJobGC, index)) 563 } 564 case <-deploymentGC.C: 565 if index, ok := getLatest(); ok { 566 s.evalBroker.Enqueue(s.coreJobEval(structs.CoreJobDeploymentGC, index)) 567 } 568 case <-csiPluginGC.C: 569 if index, ok := getLatest(); ok { 570 s.evalBroker.Enqueue(s.coreJobEval(structs.CoreJobCSIPluginGC, index)) 571 } 572 case <-csiVolumeClaimGC.C: 573 if index, ok := getLatest(); ok { 574 s.evalBroker.Enqueue(s.coreJobEval(structs.CoreJobCSIVolumeClaimGC, index)) 575 } 576 577 case <-stopCh: 578 return 579 } 580 } 581 } 582 583 // coreJobEval returns an evaluation for a core job 584 func (s *Server) coreJobEval(job string, modifyIndex uint64) *structs.Evaluation { 585 return &structs.Evaluation{ 586 ID: uuid.Generate(), 587 Namespace: "-", 588 Priority: structs.CoreJobPriority, 589 Type: structs.JobTypeCore, 590 TriggeredBy: structs.EvalTriggerScheduled, 591 JobID: job, 592 LeaderACL: s.getLeaderAcl(), 593 Status: structs.EvalStatusPending, 594 ModifyIndex: modifyIndex, 595 } 596 } 597 598 // reapFailedEvaluations is used to reap evaluations that 599 // have reached their delivery limit and should be failed 600 func (s *Server) reapFailedEvaluations(stopCh chan struct{}) { 601 for { 602 select { 603 case <-stopCh: 604 return 605 default: 606 // Scan for a failed evaluation 607 eval, token, err := s.evalBroker.Dequeue([]string{failedQueue}, time.Second) 608 if err != nil { 609 return 610 } 611 if eval == nil { 612 continue 613 } 614 615 // Update the status to failed 616 updateEval := eval.Copy() 617 updateEval.Status = structs.EvalStatusFailed 618 updateEval.StatusDescription = fmt.Sprintf("evaluation reached delivery limit (%d)", s.config.EvalDeliveryLimit) 619 s.logger.Warn("eval reached delivery limit, marking as failed", "eval", updateEval.GoString()) 620 621 // Create a follow-up evaluation that will be used to retry the 622 // scheduling for the job after the cluster is hopefully more stable 623 // due to the fairly large backoff. 624 followupEvalWait := s.config.EvalFailedFollowupBaselineDelay + 625 time.Duration(rand.Int63n(int64(s.config.EvalFailedFollowupDelayRange))) 626 627 followupEval := eval.CreateFailedFollowUpEval(followupEvalWait) 628 updateEval.NextEval = followupEval.ID 629 updateEval.UpdateModifyTime() 630 631 // Update via Raft 632 req := structs.EvalUpdateRequest{ 633 Evals: []*structs.Evaluation{updateEval, followupEval}, 634 } 635 if _, _, err := s.raftApply(structs.EvalUpdateRequestType, &req); err != nil { 636 s.logger.Error("failed to update failed eval and create a follow-up", "eval", updateEval.GoString(), "error", err) 637 continue 638 } 639 640 // Ack completion 641 s.evalBroker.Ack(eval.ID, token) 642 } 643 } 644 } 645 646 // reapDupBlockedEvaluations is used to reap duplicate blocked evaluations and 647 // should be cancelled. 648 func (s *Server) reapDupBlockedEvaluations(stopCh chan struct{}) { 649 for { 650 select { 651 case <-stopCh: 652 return 653 default: 654 // Scan for duplicate blocked evals. 655 dups := s.blockedEvals.GetDuplicates(time.Second) 656 if dups == nil { 657 continue 658 } 659 660 cancel := make([]*structs.Evaluation, len(dups)) 661 for i, dup := range dups { 662 // Update the status to cancelled 663 newEval := dup.Copy() 664 newEval.Status = structs.EvalStatusCancelled 665 newEval.StatusDescription = fmt.Sprintf("existing blocked evaluation exists for job %q", newEval.JobID) 666 newEval.UpdateModifyTime() 667 cancel[i] = newEval 668 } 669 670 // Update via Raft 671 req := structs.EvalUpdateRequest{ 672 Evals: cancel, 673 } 674 if _, _, err := s.raftApply(structs.EvalUpdateRequestType, &req); err != nil { 675 s.logger.Error("failed to update duplicate evals", "evals", log.Fmt("%#v", cancel), "error", err) 676 continue 677 } 678 } 679 } 680 } 681 682 // periodicUnblockFailedEvals periodically unblocks failed, blocked evaluations. 683 func (s *Server) periodicUnblockFailedEvals(stopCh chan struct{}) { 684 ticker := time.NewTicker(failedEvalUnblockInterval) 685 defer ticker.Stop() 686 for { 687 select { 688 case <-stopCh: 689 return 690 case <-ticker.C: 691 // Unblock the failed allocations 692 s.blockedEvals.UnblockFailed() 693 } 694 } 695 } 696 697 // publishJobSummaryMetrics publishes the job summaries as metrics 698 func (s *Server) publishJobSummaryMetrics(stopCh chan struct{}) { 699 timer := time.NewTimer(0) 700 defer timer.Stop() 701 702 for { 703 select { 704 case <-stopCh: 705 return 706 case <-timer.C: 707 timer.Reset(s.config.StatsCollectionInterval) 708 state, err := s.State().Snapshot() 709 if err != nil { 710 s.logger.Error("failed to get state", "error", err) 711 continue 712 } 713 ws := memdb.NewWatchSet() 714 iter, err := state.JobSummaries(ws) 715 if err != nil { 716 s.logger.Error("failed to get job summaries", "error", err) 717 continue 718 } 719 720 for { 721 raw := iter.Next() 722 if raw == nil { 723 break 724 } 725 summary := raw.(*structs.JobSummary) 726 if s.config.DisableDispatchedJobSummaryMetrics { 727 job, err := state.JobByID(ws, summary.Namespace, summary.JobID) 728 if err != nil { 729 s.logger.Error("error getting job for summary", "error", err) 730 continue 731 } 732 if job.Dispatched { 733 continue 734 } 735 } 736 s.iterateJobSummaryMetrics(summary) 737 } 738 } 739 } 740 } 741 742 func (s *Server) iterateJobSummaryMetrics(summary *structs.JobSummary) { 743 for name, tgSummary := range summary.Summary { 744 if !s.config.DisableTaggedMetrics { 745 labels := []metrics.Label{ 746 { 747 Name: "job", 748 Value: summary.JobID, 749 }, 750 { 751 Name: "task_group", 752 Value: name, 753 }, 754 { 755 Name: "namespace", 756 Value: summary.Namespace, 757 }, 758 } 759 760 if strings.Contains(summary.JobID, "/dispatch-") { 761 jobInfo := strings.Split(summary.JobID, "/dispatch-") 762 labels = append(labels, metrics.Label{ 763 Name: "parent_id", 764 Value: jobInfo[0], 765 }, metrics.Label{ 766 Name: "dispatch_id", 767 Value: jobInfo[1], 768 }) 769 } 770 771 if strings.Contains(summary.JobID, "/periodic-") { 772 jobInfo := strings.Split(summary.JobID, "/periodic-") 773 labels = append(labels, metrics.Label{ 774 Name: "parent_id", 775 Value: jobInfo[0], 776 }, metrics.Label{ 777 Name: "periodic_id", 778 Value: jobInfo[1], 779 }) 780 } 781 782 metrics.SetGaugeWithLabels([]string{"nomad", "job_summary", "queued"}, 783 float32(tgSummary.Queued), labels) 784 metrics.SetGaugeWithLabels([]string{"nomad", "job_summary", "complete"}, 785 float32(tgSummary.Complete), labels) 786 metrics.SetGaugeWithLabels([]string{"nomad", "job_summary", "failed"}, 787 float32(tgSummary.Failed), labels) 788 metrics.SetGaugeWithLabels([]string{"nomad", "job_summary", "running"}, 789 float32(tgSummary.Running), labels) 790 metrics.SetGaugeWithLabels([]string{"nomad", "job_summary", "starting"}, 791 float32(tgSummary.Starting), labels) 792 metrics.SetGaugeWithLabels([]string{"nomad", "job_summary", "lost"}, 793 float32(tgSummary.Lost), labels) 794 } 795 if s.config.BackwardsCompatibleMetrics { 796 metrics.SetGauge([]string{"nomad", "job_summary", summary.JobID, name, "queued"}, float32(tgSummary.Queued)) 797 metrics.SetGauge([]string{"nomad", "job_summary", summary.JobID, name, "complete"}, float32(tgSummary.Complete)) 798 metrics.SetGauge([]string{"nomad", "job_summary", summary.JobID, name, "failed"}, float32(tgSummary.Failed)) 799 metrics.SetGauge([]string{"nomad", "job_summary", summary.JobID, name, "running"}, float32(tgSummary.Running)) 800 metrics.SetGauge([]string{"nomad", "job_summary", summary.JobID, name, "starting"}, float32(tgSummary.Starting)) 801 metrics.SetGauge([]string{"nomad", "job_summary", summary.JobID, name, "lost"}, float32(tgSummary.Lost)) 802 } 803 } 804 } 805 806 // publishJobStatusMetrics publishes the job statuses as metrics 807 func (s *Server) publishJobStatusMetrics(stopCh chan struct{}) { 808 timer := time.NewTimer(0) 809 defer timer.Stop() 810 811 for { 812 select { 813 case <-stopCh: 814 return 815 case <-timer.C: 816 timer.Reset(s.config.StatsCollectionInterval) 817 state, err := s.State().Snapshot() 818 if err != nil { 819 s.logger.Error("failed to get state", "error", err) 820 continue 821 } 822 ws := memdb.NewWatchSet() 823 iter, err := state.Jobs(ws) 824 if err != nil { 825 s.logger.Error("failed to get job statuses", "error", err) 826 continue 827 } 828 829 s.iterateJobStatusMetrics(&iter) 830 } 831 } 832 } 833 834 func (s *Server) iterateJobStatusMetrics(jobs *memdb.ResultIterator) { 835 var pending int64 // Sum of all jobs in 'pending' state 836 var running int64 // Sum of all jobs in 'running' state 837 var dead int64 // Sum of all jobs in 'dead' state 838 839 for { 840 raw := (*jobs).Next() 841 if raw == nil { 842 break 843 } 844 845 job := raw.(*structs.Job) 846 847 switch job.Status { 848 case structs.JobStatusPending: 849 pending++ 850 case structs.JobStatusRunning: 851 running++ 852 case structs.JobStatusDead: 853 dead++ 854 } 855 } 856 857 metrics.SetGauge([]string{"nomad", "job_status", "pending"}, float32(pending)) 858 metrics.SetGauge([]string{"nomad", "job_status", "running"}, float32(running)) 859 metrics.SetGauge([]string{"nomad", "job_status", "dead"}, float32(dead)) 860 } 861 862 // revokeLeadership is invoked once we step down as leader. 863 // This is used to cleanup any state that may be specific to a leader. 864 func (s *Server) revokeLeadership() error { 865 defer metrics.MeasureSince([]string{"nomad", "leader", "revoke_leadership"}, time.Now()) 866 867 s.resetConsistentReadReady() 868 869 // Clear the leader token since we are no longer the leader. 870 s.setLeaderAcl("") 871 872 // Disable autopilot 873 s.autopilot.Stop() 874 875 // Disable the plan queue, since we are no longer leader 876 s.planQueue.SetEnabled(false) 877 878 // Disable the eval broker, since it is only useful as a leader 879 s.evalBroker.SetEnabled(false) 880 881 // Disable the blocked eval tracker, since it is only useful as a leader 882 s.blockedEvals.SetEnabled(false) 883 884 // Disable the periodic dispatcher, since it is only useful as a leader 885 s.periodicDispatcher.SetEnabled(false) 886 887 // Disable the Vault client as it is only useful as a leader. 888 s.vault.SetActive(false) 889 890 // Disable the deployment watcher as it is only useful as a leader. 891 s.deploymentWatcher.SetEnabled(false, nil) 892 893 // Disable the node drainer 894 s.nodeDrainer.SetEnabled(false, nil) 895 896 // Disable the volume watcher 897 s.volumeWatcher.SetEnabled(false, nil) 898 899 // Disable any enterprise systems required. 900 if err := s.revokeEnterpriseLeadership(); err != nil { 901 return err 902 } 903 904 // Clear the heartbeat timers on either shutdown or step down, 905 // since we are no longer responsible for TTL expirations. 906 if err := s.clearAllHeartbeatTimers(); err != nil { 907 s.logger.Error("clearing heartbeat timers failed", "error", err) 908 return err 909 } 910 911 // Unpause our worker if we paused previously 912 for _, w := range s.pausableWorkers() { 913 w.SetPause(false) 914 } 915 916 return nil 917 } 918 919 // pausableWorkers returns a slice of the workers 920 // to pause on leader transitions. 921 // 922 // Upon leadership establishment, pause workers to free half 923 // the cores for use in the plan queue and evaluation broker 924 func (s *Server) pausableWorkers() []*Worker { 925 n := len(s.workers) 926 if n <= 1 { 927 return []*Worker{} 928 } 929 930 // Disabling 3/4 of the workers frees CPU for raft and the 931 // plan applier which uses 1/2 the cores. 932 return s.workers[:3*n/4] 933 } 934 935 // reconcile is used to reconcile the differences between Serf 936 // membership and what is reflected in our strongly consistent store. 937 func (s *Server) reconcile() error { 938 defer metrics.MeasureSince([]string{"nomad", "leader", "reconcile"}, time.Now()) 939 members := s.serf.Members() 940 for _, member := range members { 941 if err := s.reconcileMember(member); err != nil { 942 return err 943 } 944 } 945 return nil 946 } 947 948 // reconcileMember is used to do an async reconcile of a single serf member 949 func (s *Server) reconcileMember(member serf.Member) error { 950 // Check if this is a member we should handle 951 valid, parts := isNomadServer(member) 952 if !valid || parts.Region != s.config.Region { 953 return nil 954 } 955 defer metrics.MeasureSince([]string{"nomad", "leader", "reconcileMember"}, time.Now()) 956 957 var err error 958 switch member.Status { 959 case serf.StatusAlive: 960 err = s.addRaftPeer(member, parts) 961 case serf.StatusLeft, StatusReap: 962 err = s.removeRaftPeer(member, parts) 963 } 964 if err != nil { 965 s.logger.Error("failed to reconcile member", "member", member, "error", err) 966 return err 967 } 968 return nil 969 } 970 971 // addRaftPeer is used to add a new Raft peer when a Nomad server joins 972 func (s *Server) addRaftPeer(m serf.Member, parts *serverParts) error { 973 // Check for possibility of multiple bootstrap nodes 974 members := s.serf.Members() 975 if parts.Bootstrap { 976 for _, member := range members { 977 valid, p := isNomadServer(member) 978 if valid && member.Name != m.Name && p.Bootstrap { 979 s.logger.Error("skipping adding Raft peer because an existing peer is in bootstrap mode and only one server should be in bootstrap mode", 980 "existing_peer", member.Name, "joining_peer", m.Name) 981 return nil 982 } 983 } 984 } 985 986 // Processing ourselves could result in trying to remove ourselves to 987 // fix up our address, which would make us step down. This is only 988 // safe to attempt if there are multiple servers available. 989 addr := (&net.TCPAddr{IP: m.Addr, Port: parts.Port}).String() 990 configFuture := s.raft.GetConfiguration() 991 if err := configFuture.Error(); err != nil { 992 s.logger.Error("failed to get raft configuration", "error", err) 993 return err 994 } 995 996 if m.Name == s.config.NodeName { 997 if l := len(configFuture.Configuration().Servers); l < 3 { 998 s.logger.Debug("skipping self join check for peer since the cluster is too small", "peer", m.Name) 999 return nil 1000 } 1001 } 1002 1003 // See if it's already in the configuration. It's harmless to re-add it 1004 // but we want to avoid doing that if possible to prevent useless Raft 1005 // log entries. If the address is the same but the ID changed, remove the 1006 // old server before adding the new one. 1007 minRaftProtocol, err := s.autopilot.MinRaftProtocol() 1008 if err != nil { 1009 return err 1010 } 1011 for _, server := range configFuture.Configuration().Servers { 1012 // No-op if the raft version is too low 1013 if server.Address == raft.ServerAddress(addr) && (minRaftProtocol < 2 || parts.RaftVersion < 3) { 1014 return nil 1015 } 1016 1017 // If the address or ID matches an existing server, see if we need to remove the old one first 1018 if server.Address == raft.ServerAddress(addr) || server.ID == raft.ServerID(parts.ID) { 1019 // Exit with no-op if this is being called on an existing server and both the ID and address match 1020 if server.Address == raft.ServerAddress(addr) && server.ID == raft.ServerID(parts.ID) { 1021 return nil 1022 } 1023 future := s.raft.RemoveServer(server.ID, 0, 0) 1024 if server.Address == raft.ServerAddress(addr) { 1025 if err := future.Error(); err != nil { 1026 return fmt.Errorf("error removing server with duplicate address %q: %s", server.Address, err) 1027 } 1028 s.logger.Info("removed server with duplicate address", "address", server.Address) 1029 } else { 1030 if err := future.Error(); err != nil { 1031 return fmt.Errorf("error removing server with duplicate ID %q: %s", server.ID, err) 1032 } 1033 s.logger.Info("removed server with duplicate ID", "id", server.ID) 1034 } 1035 } 1036 } 1037 1038 // Attempt to add as a peer 1039 switch { 1040 case minRaftProtocol >= 3: 1041 addFuture := s.raft.AddNonvoter(raft.ServerID(parts.ID), raft.ServerAddress(addr), 0, 0) 1042 if err := addFuture.Error(); err != nil { 1043 s.logger.Error("failed to add raft peer", "error", err) 1044 return err 1045 } 1046 case minRaftProtocol == 2 && parts.RaftVersion >= 3: 1047 addFuture := s.raft.AddVoter(raft.ServerID(parts.ID), raft.ServerAddress(addr), 0, 0) 1048 if err := addFuture.Error(); err != nil { 1049 s.logger.Error("failed to add raft peer", "error", err) 1050 return err 1051 } 1052 default: 1053 addFuture := s.raft.AddPeer(raft.ServerAddress(addr)) 1054 if err := addFuture.Error(); err != nil { 1055 s.logger.Error("failed to add raft peer", "error", err) 1056 return err 1057 } 1058 } 1059 1060 return nil 1061 } 1062 1063 // removeRaftPeer is used to remove a Raft peer when a Nomad server leaves 1064 // or is reaped 1065 func (s *Server) removeRaftPeer(m serf.Member, parts *serverParts) error { 1066 addr := (&net.TCPAddr{IP: m.Addr, Port: parts.Port}).String() 1067 1068 // See if it's already in the configuration. It's harmless to re-remove it 1069 // but we want to avoid doing that if possible to prevent useless Raft 1070 // log entries. 1071 configFuture := s.raft.GetConfiguration() 1072 if err := configFuture.Error(); err != nil { 1073 s.logger.Error("failed to get raft configuration", "error", err) 1074 return err 1075 } 1076 1077 minRaftProtocol, err := s.autopilot.MinRaftProtocol() 1078 if err != nil { 1079 return err 1080 } 1081 1082 // Pick which remove API to use based on how the server was added. 1083 for _, server := range configFuture.Configuration().Servers { 1084 // If we understand the new add/remove APIs and the server was added by ID, use the new remove API 1085 if minRaftProtocol >= 2 && server.ID == raft.ServerID(parts.ID) { 1086 s.logger.Info("removing server by ID", "id", server.ID) 1087 future := s.raft.RemoveServer(raft.ServerID(parts.ID), 0, 0) 1088 if err := future.Error(); err != nil { 1089 s.logger.Error("failed to remove raft peer", "id", server.ID, "error", err) 1090 return err 1091 } 1092 break 1093 } else if server.Address == raft.ServerAddress(addr) { 1094 // If not, use the old remove API 1095 s.logger.Info("removing server by address", "address", server.Address) 1096 future := s.raft.RemovePeer(raft.ServerAddress(addr)) 1097 if err := future.Error(); err != nil { 1098 s.logger.Error("failed to remove raft peer", "address", addr, "error", err) 1099 return err 1100 } 1101 break 1102 } 1103 } 1104 1105 return nil 1106 } 1107 1108 // replicateACLPolicies is used to replicate ACL policies from 1109 // the authoritative region to this region. 1110 func (s *Server) replicateACLPolicies(stopCh chan struct{}) { 1111 req := structs.ACLPolicyListRequest{ 1112 QueryOptions: structs.QueryOptions{ 1113 Region: s.config.AuthoritativeRegion, 1114 AllowStale: true, 1115 }, 1116 } 1117 limiter := rate.NewLimiter(replicationRateLimit, int(replicationRateLimit)) 1118 s.logger.Debug("starting ACL policy replication from authoritative region", "authoritative_region", req.Region) 1119 1120 START: 1121 for { 1122 select { 1123 case <-stopCh: 1124 return 1125 default: 1126 // Rate limit how often we attempt replication 1127 limiter.Wait(context.Background()) 1128 1129 // Fetch the list of policies 1130 var resp structs.ACLPolicyListResponse 1131 req.AuthToken = s.ReplicationToken() 1132 err := s.forwardRegion(s.config.AuthoritativeRegion, 1133 "ACL.ListPolicies", &req, &resp) 1134 if err != nil { 1135 s.logger.Error("failed to fetch policies from authoritative region", "error", err) 1136 goto ERR_WAIT 1137 } 1138 1139 // Perform a two-way diff 1140 delete, update := diffACLPolicies(s.State(), req.MinQueryIndex, resp.Policies) 1141 1142 // Delete policies that should not exist 1143 if len(delete) > 0 { 1144 args := &structs.ACLPolicyDeleteRequest{ 1145 Names: delete, 1146 } 1147 _, _, err := s.raftApply(structs.ACLPolicyDeleteRequestType, args) 1148 if err != nil { 1149 s.logger.Error("failed to delete policies", "error", err) 1150 goto ERR_WAIT 1151 } 1152 } 1153 1154 // Fetch any outdated policies 1155 var fetched []*structs.ACLPolicy 1156 if len(update) > 0 { 1157 req := structs.ACLPolicySetRequest{ 1158 Names: update, 1159 QueryOptions: structs.QueryOptions{ 1160 Region: s.config.AuthoritativeRegion, 1161 AuthToken: s.ReplicationToken(), 1162 AllowStale: true, 1163 MinQueryIndex: resp.Index - 1, 1164 }, 1165 } 1166 var reply structs.ACLPolicySetResponse 1167 if err := s.forwardRegion(s.config.AuthoritativeRegion, 1168 "ACL.GetPolicies", &req, &reply); err != nil { 1169 s.logger.Error("failed to fetch policies from authoritative region", "error", err) 1170 goto ERR_WAIT 1171 } 1172 for _, policy := range reply.Policies { 1173 fetched = append(fetched, policy) 1174 } 1175 } 1176 1177 // Update local policies 1178 if len(fetched) > 0 { 1179 args := &structs.ACLPolicyUpsertRequest{ 1180 Policies: fetched, 1181 } 1182 _, _, err := s.raftApply(structs.ACLPolicyUpsertRequestType, args) 1183 if err != nil { 1184 s.logger.Error("failed to update policies", "error", err) 1185 goto ERR_WAIT 1186 } 1187 } 1188 1189 // Update the minimum query index, blocks until there 1190 // is a change. 1191 req.MinQueryIndex = resp.Index 1192 } 1193 } 1194 1195 ERR_WAIT: 1196 select { 1197 case <-time.After(s.config.ReplicationBackoff): 1198 goto START 1199 case <-stopCh: 1200 return 1201 } 1202 } 1203 1204 // diffACLPolicies is used to perform a two-way diff between the local 1205 // policies and the remote policies to determine which policies need to 1206 // be deleted or updated. 1207 func diffACLPolicies(state *state.StateStore, minIndex uint64, remoteList []*structs.ACLPolicyListStub) (delete []string, update []string) { 1208 // Construct a set of the local and remote policies 1209 local := make(map[string][]byte) 1210 remote := make(map[string]struct{}) 1211 1212 // Add all the local policies 1213 iter, err := state.ACLPolicies(nil) 1214 if err != nil { 1215 panic("failed to iterate local policies") 1216 } 1217 for { 1218 raw := iter.Next() 1219 if raw == nil { 1220 break 1221 } 1222 policy := raw.(*structs.ACLPolicy) 1223 local[policy.Name] = policy.Hash 1224 } 1225 1226 // Iterate over the remote policies 1227 for _, rp := range remoteList { 1228 remote[rp.Name] = struct{}{} 1229 1230 // Check if the policy is missing locally 1231 if localHash, ok := local[rp.Name]; !ok { 1232 update = append(update, rp.Name) 1233 1234 // Check if policy is newer remotely and there is a hash mis-match. 1235 } else if rp.ModifyIndex > minIndex && !bytes.Equal(localHash, rp.Hash) { 1236 update = append(update, rp.Name) 1237 } 1238 } 1239 1240 // Check if policy should be deleted 1241 for lp := range local { 1242 if _, ok := remote[lp]; !ok { 1243 delete = append(delete, lp) 1244 } 1245 } 1246 return 1247 } 1248 1249 // replicateACLTokens is used to replicate global ACL tokens from 1250 // the authoritative region to this region. 1251 func (s *Server) replicateACLTokens(stopCh chan struct{}) { 1252 req := structs.ACLTokenListRequest{ 1253 GlobalOnly: true, 1254 QueryOptions: structs.QueryOptions{ 1255 Region: s.config.AuthoritativeRegion, 1256 AllowStale: true, 1257 }, 1258 } 1259 limiter := rate.NewLimiter(replicationRateLimit, int(replicationRateLimit)) 1260 s.logger.Debug("starting ACL token replication from authoritative region", "authoritative_region", req.Region) 1261 1262 START: 1263 for { 1264 select { 1265 case <-stopCh: 1266 return 1267 default: 1268 // Rate limit how often we attempt replication 1269 limiter.Wait(context.Background()) 1270 1271 // Fetch the list of tokens 1272 var resp structs.ACLTokenListResponse 1273 req.AuthToken = s.ReplicationToken() 1274 err := s.forwardRegion(s.config.AuthoritativeRegion, 1275 "ACL.ListTokens", &req, &resp) 1276 if err != nil { 1277 s.logger.Error("failed to fetch tokens from authoritative region", "error", err) 1278 goto ERR_WAIT 1279 } 1280 1281 // Perform a two-way diff 1282 delete, update := diffACLTokens(s.State(), req.MinQueryIndex, resp.Tokens) 1283 1284 // Delete tokens that should not exist 1285 if len(delete) > 0 { 1286 args := &structs.ACLTokenDeleteRequest{ 1287 AccessorIDs: delete, 1288 } 1289 _, _, err := s.raftApply(structs.ACLTokenDeleteRequestType, args) 1290 if err != nil { 1291 s.logger.Error("failed to delete tokens", "error", err) 1292 goto ERR_WAIT 1293 } 1294 } 1295 1296 // Fetch any outdated policies. 1297 var fetched []*structs.ACLToken 1298 if len(update) > 0 { 1299 req := structs.ACLTokenSetRequest{ 1300 AccessorIDS: update, 1301 QueryOptions: structs.QueryOptions{ 1302 Region: s.config.AuthoritativeRegion, 1303 AuthToken: s.ReplicationToken(), 1304 AllowStale: true, 1305 MinQueryIndex: resp.Index - 1, 1306 }, 1307 } 1308 var reply structs.ACLTokenSetResponse 1309 if err := s.forwardRegion(s.config.AuthoritativeRegion, 1310 "ACL.GetTokens", &req, &reply); err != nil { 1311 s.logger.Error("failed to fetch tokens from authoritative region", "error", err) 1312 goto ERR_WAIT 1313 } 1314 for _, token := range reply.Tokens { 1315 fetched = append(fetched, token) 1316 } 1317 } 1318 1319 // Update local tokens 1320 if len(fetched) > 0 { 1321 args := &structs.ACLTokenUpsertRequest{ 1322 Tokens: fetched, 1323 } 1324 _, _, err := s.raftApply(structs.ACLTokenUpsertRequestType, args) 1325 if err != nil { 1326 s.logger.Error("failed to update tokens", "error", err) 1327 goto ERR_WAIT 1328 } 1329 } 1330 1331 // Update the minimum query index, blocks until there 1332 // is a change. 1333 req.MinQueryIndex = resp.Index 1334 } 1335 } 1336 1337 ERR_WAIT: 1338 select { 1339 case <-time.After(s.config.ReplicationBackoff): 1340 goto START 1341 case <-stopCh: 1342 return 1343 } 1344 } 1345 1346 // diffACLTokens is used to perform a two-way diff between the local 1347 // tokens and the remote tokens to determine which tokens need to 1348 // be deleted or updated. 1349 func diffACLTokens(state *state.StateStore, minIndex uint64, remoteList []*structs.ACLTokenListStub) (delete []string, update []string) { 1350 // Construct a set of the local and remote policies 1351 local := make(map[string][]byte) 1352 remote := make(map[string]struct{}) 1353 1354 // Add all the local global tokens 1355 iter, err := state.ACLTokensByGlobal(nil, true) 1356 if err != nil { 1357 panic("failed to iterate local tokens") 1358 } 1359 for { 1360 raw := iter.Next() 1361 if raw == nil { 1362 break 1363 } 1364 token := raw.(*structs.ACLToken) 1365 local[token.AccessorID] = token.Hash 1366 } 1367 1368 // Iterate over the remote tokens 1369 for _, rp := range remoteList { 1370 remote[rp.AccessorID] = struct{}{} 1371 1372 // Check if the token is missing locally 1373 if localHash, ok := local[rp.AccessorID]; !ok { 1374 update = append(update, rp.AccessorID) 1375 1376 // Check if policy is newer remotely and there is a hash mis-match. 1377 } else if rp.ModifyIndex > minIndex && !bytes.Equal(localHash, rp.Hash) { 1378 update = append(update, rp.AccessorID) 1379 } 1380 } 1381 1382 // Check if local token should be deleted 1383 for lp := range local { 1384 if _, ok := remote[lp]; !ok { 1385 delete = append(delete, lp) 1386 } 1387 } 1388 return 1389 } 1390 1391 // getOrCreateAutopilotConfig is used to get the autopilot config, initializing it if necessary 1392 func (s *Server) getOrCreateAutopilotConfig() *structs.AutopilotConfig { 1393 state := s.fsm.State() 1394 _, config, err := state.AutopilotConfig() 1395 if err != nil { 1396 s.logger.Named("autopilot").Error("failed to get autopilot config", "error", err) 1397 return nil 1398 } 1399 if config != nil { 1400 return config 1401 } 1402 1403 if !ServersMeetMinimumVersion(s.Members(), minAutopilotVersion, false) { 1404 s.logger.Named("autopilot").Warn("can't initialize until all servers are above minimum version", "min_version", minAutopilotVersion) 1405 return nil 1406 } 1407 1408 config = s.config.AutopilotConfig 1409 req := structs.AutopilotSetConfigRequest{Config: *config} 1410 if _, _, err = s.raftApply(structs.AutopilotRequestType, req); err != nil { 1411 s.logger.Named("autopilot").Error("failed to initialize config", "error", err) 1412 return nil 1413 } 1414 1415 return config 1416 } 1417 1418 // getOrCreateSchedulerConfig is used to get the scheduler config. We create a default 1419 // config if it doesn't already exist for bootstrapping an empty cluster 1420 func (s *Server) getOrCreateSchedulerConfig() *structs.SchedulerConfiguration { 1421 state := s.fsm.State() 1422 _, config, err := state.SchedulerConfig() 1423 if err != nil { 1424 s.logger.Named("core").Error("failed to get scheduler config", "error", err) 1425 return nil 1426 } 1427 if config != nil { 1428 return config 1429 } 1430 if !ServersMeetMinimumVersion(s.Members(), minSchedulerConfigVersion, false) { 1431 s.logger.Named("core").Warn("can't initialize scheduler config until all servers are above minimum version", "min_version", minSchedulerConfigVersion) 1432 return nil 1433 } 1434 1435 req := structs.SchedulerSetConfigRequest{Config: s.config.DefaultSchedulerConfig} 1436 if _, _, err = s.raftApply(structs.SchedulerConfigRequestType, req); err != nil { 1437 s.logger.Named("core").Error("failed to initialize config", "error", err) 1438 return nil 1439 } 1440 1441 return config 1442 } 1443 1444 func (s *Server) generateClusterID() (string, error) { 1445 if !ServersMeetMinimumVersion(s.Members(), minClusterIDVersion, false) { 1446 s.logger.Named("core").Warn("cannot initialize cluster ID until all servers are above minimum version", "min_version", minClusterIDVersion) 1447 return "", errors.Errorf("cluster ID cannot be created until all servers are above minimum version %s", minClusterIDVersion) 1448 } 1449 1450 newMeta := structs.ClusterMetadata{ClusterID: uuid.Generate(), CreateTime: time.Now().UnixNano()} 1451 if _, _, err := s.raftApply(structs.ClusterMetadataRequestType, newMeta); err != nil { 1452 s.logger.Named("core").Error("failed to create cluster ID", "error", err) 1453 return "", errors.Wrap(err, "failed to create cluster ID") 1454 } 1455 1456 s.logger.Named("core").Info("established cluster id", "cluster_id", newMeta.ClusterID, "create_time", newMeta.CreateTime) 1457 return newMeta.ClusterID, nil 1458 }