github.com/emate/nomad@v0.8.2-wo-binpacking/nomad/leader_test.go (about) 1 package nomad 2 3 import ( 4 "errors" 5 "fmt" 6 "testing" 7 "time" 8 9 "github.com/hashicorp/consul/testutil/retry" 10 memdb "github.com/hashicorp/go-memdb" 11 "github.com/hashicorp/nomad/nomad/mock" 12 "github.com/hashicorp/nomad/nomad/state" 13 "github.com/hashicorp/nomad/nomad/structs" 14 "github.com/hashicorp/nomad/testutil" 15 "github.com/hashicorp/raft" 16 "github.com/stretchr/testify/assert" 17 "github.com/stretchr/testify/require" 18 ) 19 20 func TestLeader_LeftServer(t *testing.T) { 21 s1 := TestServer(t, nil) 22 defer s1.Shutdown() 23 24 s2 := TestServer(t, func(c *Config) { 25 c.DevDisableBootstrap = true 26 }) 27 defer s2.Shutdown() 28 29 s3 := TestServer(t, func(c *Config) { 30 c.DevDisableBootstrap = true 31 }) 32 defer s3.Shutdown() 33 servers := []*Server{s1, s2, s3} 34 TestJoin(t, s1, s2, s3) 35 36 for _, s := range servers { 37 testutil.WaitForResult(func() (bool, error) { 38 peers, _ := s.numPeers() 39 return peers == 3, nil 40 }, func(err error) { 41 t.Fatalf("should have 3 peers") 42 }) 43 } 44 45 // Kill any server 46 var peer *Server 47 for _, s := range servers { 48 if !s.IsLeader() { 49 peer = s 50 break 51 } 52 } 53 if peer == nil { 54 t.Fatalf("Should have a non-leader") 55 } 56 peer.Shutdown() 57 name := fmt.Sprintf("%s.%s", peer.config.NodeName, peer.config.Region) 58 59 testutil.WaitForResult(func() (bool, error) { 60 for _, s := range servers { 61 if s == peer { 62 continue 63 } 64 65 // Force remove the non-leader (transition to left state) 66 if err := s.RemoveFailedNode(name); err != nil { 67 return false, err 68 } 69 70 peers, _ := s.numPeers() 71 return peers == 2, errors.New(fmt.Sprintf("%v", peers)) 72 } 73 74 return true, nil 75 }, func(err error) { 76 t.Fatalf("err: %s", err) 77 }) 78 } 79 80 func TestLeader_LeftLeader(t *testing.T) { 81 s1 := TestServer(t, nil) 82 defer s1.Shutdown() 83 84 s2 := TestServer(t, func(c *Config) { 85 c.DevDisableBootstrap = true 86 }) 87 defer s2.Shutdown() 88 89 s3 := TestServer(t, func(c *Config) { 90 c.DevDisableBootstrap = true 91 }) 92 defer s3.Shutdown() 93 servers := []*Server{s1, s2, s3} 94 TestJoin(t, s1, s2, s3) 95 96 for _, s := range servers { 97 testutil.WaitForResult(func() (bool, error) { 98 peers, _ := s.numPeers() 99 return peers == 3, nil 100 }, func(err error) { 101 t.Fatalf("should have 3 peers") 102 }) 103 } 104 105 // Kill the leader! 106 var leader *Server 107 for _, s := range servers { 108 if s.IsLeader() { 109 leader = s 110 break 111 } 112 } 113 if leader == nil { 114 t.Fatalf("Should have a leader") 115 } 116 leader.Leave() 117 leader.Shutdown() 118 119 for _, s := range servers { 120 if s == leader { 121 continue 122 } 123 testutil.WaitForResult(func() (bool, error) { 124 peers, _ := s.numPeers() 125 return peers == 2, errors.New(fmt.Sprintf("%v", peers)) 126 }, func(err error) { 127 t.Fatalf("should have 2 peers: %v", err) 128 }) 129 } 130 } 131 132 func TestLeader_MultiBootstrap(t *testing.T) { 133 s1 := TestServer(t, nil) 134 defer s1.Shutdown() 135 136 s2 := TestServer(t, nil) 137 defer s2.Shutdown() 138 servers := []*Server{s1, s2} 139 TestJoin(t, s1, s2) 140 141 for _, s := range servers { 142 testutil.WaitForResult(func() (bool, error) { 143 peers := s.Members() 144 return len(peers) == 2, nil 145 }, func(err error) { 146 t.Fatalf("should have 2 peers") 147 }) 148 } 149 150 // Ensure we don't have multiple raft peers 151 for _, s := range servers { 152 peers, _ := s.numPeers() 153 if peers != 1 { 154 t.Fatalf("should only have 1 raft peer!") 155 } 156 } 157 } 158 159 func TestLeader_PlanQueue_Reset(t *testing.T) { 160 s1 := TestServer(t, nil) 161 defer s1.Shutdown() 162 163 s2 := TestServer(t, func(c *Config) { 164 c.DevDisableBootstrap = true 165 }) 166 defer s2.Shutdown() 167 168 s3 := TestServer(t, func(c *Config) { 169 c.DevDisableBootstrap = true 170 }) 171 defer s3.Shutdown() 172 servers := []*Server{s1, s2, s3} 173 TestJoin(t, s1, s2, s3) 174 175 for _, s := range servers { 176 testutil.WaitForResult(func() (bool, error) { 177 peers, _ := s.numPeers() 178 return peers == 3, nil 179 }, func(err error) { 180 t.Fatalf("should have 3 peers") 181 }) 182 } 183 184 var leader *Server 185 for _, s := range servers { 186 if s.IsLeader() { 187 leader = s 188 break 189 } 190 } 191 if leader == nil { 192 t.Fatalf("Should have a leader") 193 } 194 195 if !leader.planQueue.Enabled() { 196 t.Fatalf("should enable plan queue") 197 } 198 199 for _, s := range servers { 200 if !s.IsLeader() && s.planQueue.Enabled() { 201 t.Fatalf("plan queue should not be enabled") 202 } 203 } 204 205 // Kill the leader 206 leader.Shutdown() 207 time.Sleep(100 * time.Millisecond) 208 209 // Wait for a new leader 210 leader = nil 211 testutil.WaitForResult(func() (bool, error) { 212 for _, s := range servers { 213 if s.IsLeader() { 214 leader = s 215 return true, nil 216 } 217 } 218 return false, nil 219 }, func(err error) { 220 t.Fatalf("should have leader") 221 }) 222 223 // Check that the new leader has a pending GC expiration 224 testutil.WaitForResult(func() (bool, error) { 225 return leader.planQueue.Enabled(), nil 226 }, func(err error) { 227 t.Fatalf("should enable plan queue") 228 }) 229 } 230 231 func TestLeader_EvalBroker_Reset(t *testing.T) { 232 s1 := TestServer(t, func(c *Config) { 233 c.NumSchedulers = 0 234 }) 235 defer s1.Shutdown() 236 237 s2 := TestServer(t, func(c *Config) { 238 c.NumSchedulers = 0 239 c.DevDisableBootstrap = true 240 }) 241 defer s2.Shutdown() 242 243 s3 := TestServer(t, func(c *Config) { 244 c.NumSchedulers = 0 245 c.DevDisableBootstrap = true 246 }) 247 defer s3.Shutdown() 248 servers := []*Server{s1, s2, s3} 249 TestJoin(t, s1, s2, s3) 250 testutil.WaitForLeader(t, s1.RPC) 251 252 for _, s := range servers { 253 testutil.WaitForResult(func() (bool, error) { 254 peers, _ := s.numPeers() 255 return peers == 3, nil 256 }, func(err error) { 257 t.Fatalf("should have 3 peers") 258 }) 259 } 260 261 var leader *Server 262 for _, s := range servers { 263 if s.IsLeader() { 264 leader = s 265 break 266 } 267 } 268 if leader == nil { 269 t.Fatalf("Should have a leader") 270 } 271 272 // Inject a pending eval 273 req := structs.EvalUpdateRequest{ 274 Evals: []*structs.Evaluation{mock.Eval()}, 275 } 276 _, _, err := leader.raftApply(structs.EvalUpdateRequestType, req) 277 if err != nil { 278 t.Fatalf("err: %v", err) 279 } 280 281 // Kill the leader 282 leader.Shutdown() 283 time.Sleep(100 * time.Millisecond) 284 285 // Wait for a new leader 286 leader = nil 287 testutil.WaitForResult(func() (bool, error) { 288 for _, s := range servers { 289 if s.IsLeader() { 290 leader = s 291 return true, nil 292 } 293 } 294 return false, nil 295 }, func(err error) { 296 t.Fatalf("should have leader") 297 }) 298 299 // Check that the new leader has a pending evaluation 300 testutil.WaitForResult(func() (bool, error) { 301 stats := leader.evalBroker.Stats() 302 return stats.TotalReady == 1, nil 303 }, func(err error) { 304 t.Fatalf("should have pending evaluation") 305 }) 306 } 307 308 func TestLeader_PeriodicDispatcher_Restore_Adds(t *testing.T) { 309 s1 := TestServer(t, func(c *Config) { 310 c.NumSchedulers = 0 311 }) 312 defer s1.Shutdown() 313 314 s2 := TestServer(t, func(c *Config) { 315 c.NumSchedulers = 0 316 c.DevDisableBootstrap = true 317 }) 318 defer s2.Shutdown() 319 320 s3 := TestServer(t, func(c *Config) { 321 c.NumSchedulers = 0 322 c.DevDisableBootstrap = true 323 }) 324 defer s3.Shutdown() 325 servers := []*Server{s1, s2, s3} 326 TestJoin(t, s1, s2, s3) 327 testutil.WaitForLeader(t, s1.RPC) 328 329 for _, s := range servers { 330 testutil.WaitForResult(func() (bool, error) { 331 peers, _ := s.numPeers() 332 return peers == 3, nil 333 }, func(err error) { 334 t.Fatalf("should have 3 peers") 335 }) 336 } 337 338 var leader *Server 339 for _, s := range servers { 340 if s.IsLeader() { 341 leader = s 342 break 343 } 344 } 345 if leader == nil { 346 t.Fatalf("Should have a leader") 347 } 348 349 // Inject a periodic job, a parameterized periodic job and a non-periodic job 350 periodic := mock.PeriodicJob() 351 nonPeriodic := mock.Job() 352 parameterizedPeriodic := mock.PeriodicJob() 353 parameterizedPeriodic.ParameterizedJob = &structs.ParameterizedJobConfig{} 354 for _, job := range []*structs.Job{nonPeriodic, periodic, parameterizedPeriodic} { 355 req := structs.JobRegisterRequest{ 356 Job: job, 357 WriteRequest: structs.WriteRequest{ 358 Namespace: job.Namespace, 359 }, 360 } 361 _, _, err := leader.raftApply(structs.JobRegisterRequestType, req) 362 if err != nil { 363 t.Fatalf("err: %v", err) 364 } 365 } 366 367 // Kill the leader 368 leader.Shutdown() 369 time.Sleep(100 * time.Millisecond) 370 371 // Wait for a new leader 372 leader = nil 373 testutil.WaitForResult(func() (bool, error) { 374 for _, s := range servers { 375 if s.IsLeader() { 376 leader = s 377 return true, nil 378 } 379 } 380 return false, nil 381 }, func(err error) { 382 t.Fatalf("should have leader") 383 }) 384 385 tuplePeriodic := structs.NamespacedID{ 386 ID: periodic.ID, 387 Namespace: periodic.Namespace, 388 } 389 tupleNonPeriodic := structs.NamespacedID{ 390 ID: nonPeriodic.ID, 391 Namespace: nonPeriodic.Namespace, 392 } 393 tupleParameterized := structs.NamespacedID{ 394 ID: parameterizedPeriodic.ID, 395 Namespace: parameterizedPeriodic.Namespace, 396 } 397 398 // Check that the new leader is tracking the periodic job only 399 testutil.WaitForResult(func() (bool, error) { 400 if _, tracked := leader.periodicDispatcher.tracked[tuplePeriodic]; !tracked { 401 return false, fmt.Errorf("periodic job not tracked") 402 } 403 if _, tracked := leader.periodicDispatcher.tracked[tupleNonPeriodic]; tracked { 404 return false, fmt.Errorf("non periodic job tracked") 405 } 406 if _, tracked := leader.periodicDispatcher.tracked[tupleParameterized]; tracked { 407 return false, fmt.Errorf("parameterized periodic job tracked") 408 } 409 return true, nil 410 }, func(err error) { 411 t.Fatalf(err.Error()) 412 }) 413 } 414 415 func TestLeader_PeriodicDispatcher_Restore_NoEvals(t *testing.T) { 416 s1 := TestServer(t, func(c *Config) { 417 c.NumSchedulers = 0 418 }) 419 defer s1.Shutdown() 420 testutil.WaitForLeader(t, s1.RPC) 421 422 // Inject a periodic job that will be triggered soon. 423 launch := time.Now().Add(1 * time.Second) 424 job := testPeriodicJob(launch) 425 req := structs.JobRegisterRequest{ 426 Job: job, 427 WriteRequest: structs.WriteRequest{ 428 Namespace: job.Namespace, 429 }, 430 } 431 _, _, err := s1.raftApply(structs.JobRegisterRequestType, req) 432 if err != nil { 433 t.Fatalf("err: %v", err) 434 } 435 436 // Flush the periodic dispatcher, ensuring that no evals will be created. 437 s1.periodicDispatcher.SetEnabled(false) 438 439 // Get the current time to ensure the launch time is after this once we 440 // restore. 441 now := time.Now() 442 443 // Sleep till after the job should have been launched. 444 time.Sleep(3 * time.Second) 445 446 // Restore the periodic dispatcher. 447 s1.periodicDispatcher.SetEnabled(true) 448 s1.restorePeriodicDispatcher() 449 450 // Ensure the job is tracked. 451 tuple := structs.NamespacedID{ 452 ID: job.ID, 453 Namespace: job.Namespace, 454 } 455 if _, tracked := s1.periodicDispatcher.tracked[tuple]; !tracked { 456 t.Fatalf("periodic job not restored") 457 } 458 459 // Check that an eval was made. 460 ws := memdb.NewWatchSet() 461 last, err := s1.fsm.State().PeriodicLaunchByID(ws, job.Namespace, job.ID) 462 if err != nil || last == nil { 463 t.Fatalf("failed to get periodic launch time: %v", err) 464 } 465 466 if last.Launch.Before(now) { 467 t.Fatalf("restorePeriodicDispatcher did not force launch: last %v; want after %v", last.Launch, now) 468 } 469 } 470 471 func TestLeader_PeriodicDispatcher_Restore_Evals(t *testing.T) { 472 s1 := TestServer(t, func(c *Config) { 473 c.NumSchedulers = 0 474 }) 475 defer s1.Shutdown() 476 testutil.WaitForLeader(t, s1.RPC) 477 478 // Inject a periodic job that triggered once in the past, should trigger now 479 // and once in the future. 480 now := time.Now() 481 past := now.Add(-1 * time.Second) 482 future := now.Add(10 * time.Second) 483 job := testPeriodicJob(past, now, future) 484 req := structs.JobRegisterRequest{ 485 Job: job, 486 WriteRequest: structs.WriteRequest{ 487 Namespace: job.Namespace, 488 }, 489 } 490 _, _, err := s1.raftApply(structs.JobRegisterRequestType, req) 491 if err != nil { 492 t.Fatalf("err: %v", err) 493 } 494 495 // Create an eval for the past launch. 496 s1.periodicDispatcher.createEval(job, past) 497 498 // Flush the periodic dispatcher, ensuring that no evals will be created. 499 s1.periodicDispatcher.SetEnabled(false) 500 501 // Sleep till after the job should have been launched. 502 time.Sleep(3 * time.Second) 503 504 // Restore the periodic dispatcher. 505 s1.periodicDispatcher.SetEnabled(true) 506 s1.restorePeriodicDispatcher() 507 508 // Ensure the job is tracked. 509 tuple := structs.NamespacedID{ 510 ID: job.ID, 511 Namespace: job.Namespace, 512 } 513 if _, tracked := s1.periodicDispatcher.tracked[tuple]; !tracked { 514 t.Fatalf("periodic job not restored") 515 } 516 517 // Check that an eval was made. 518 ws := memdb.NewWatchSet() 519 last, err := s1.fsm.State().PeriodicLaunchByID(ws, job.Namespace, job.ID) 520 if err != nil || last == nil { 521 t.Fatalf("failed to get periodic launch time: %v", err) 522 } 523 if last.Launch == past { 524 t.Fatalf("restorePeriodicDispatcher did not force launch") 525 } 526 } 527 528 func TestLeader_PeriodicDispatch(t *testing.T) { 529 s1 := TestServer(t, func(c *Config) { 530 c.NumSchedulers = 0 531 c.EvalGCInterval = 5 * time.Millisecond 532 }) 533 defer s1.Shutdown() 534 535 // Wait for a periodic dispatch 536 testutil.WaitForResult(func() (bool, error) { 537 stats := s1.evalBroker.Stats() 538 bySched, ok := stats.ByScheduler[structs.JobTypeCore] 539 if !ok { 540 return false, nil 541 } 542 return bySched.Ready > 0, nil 543 }, func(err error) { 544 t.Fatalf("should pending job") 545 }) 546 } 547 548 func TestLeader_ReapFailedEval(t *testing.T) { 549 s1 := TestServer(t, func(c *Config) { 550 c.NumSchedulers = 0 551 c.EvalDeliveryLimit = 1 552 }) 553 defer s1.Shutdown() 554 testutil.WaitForLeader(t, s1.RPC) 555 556 // Wait for a periodic dispatch 557 eval := mock.Eval() 558 s1.evalBroker.Enqueue(eval) 559 560 // Dequeue and Nack 561 out, token, err := s1.evalBroker.Dequeue(defaultSched, time.Second) 562 if err != nil { 563 t.Fatalf("err: %v", err) 564 } 565 s1.evalBroker.Nack(out.ID, token) 566 567 // Wait for an updated and followup evaluation 568 state := s1.fsm.State() 569 testutil.WaitForResult(func() (bool, error) { 570 ws := memdb.NewWatchSet() 571 out, err := state.EvalByID(ws, eval.ID) 572 if err != nil { 573 return false, err 574 } 575 if out == nil { 576 return false, fmt.Errorf("expect original evaluation to exist") 577 } 578 if out.Status != structs.EvalStatusFailed { 579 return false, fmt.Errorf("got status %v; want %v", out.Status, structs.EvalStatusFailed) 580 } 581 582 // See if there is a followup 583 evals, err := state.EvalsByJob(ws, eval.Namespace, eval.JobID) 584 if err != nil { 585 return false, err 586 } 587 588 if l := len(evals); l != 2 { 589 return false, fmt.Errorf("got %d evals, want 2", l) 590 } 591 592 for _, e := range evals { 593 if e.ID == eval.ID { 594 continue 595 } 596 597 if e.Status != structs.EvalStatusPending { 598 return false, fmt.Errorf("follow up eval has status %v; want %v", 599 e.Status, structs.EvalStatusPending) 600 } 601 602 if e.Wait < s1.config.EvalFailedFollowupBaselineDelay || 603 e.Wait > s1.config.EvalFailedFollowupBaselineDelay+s1.config.EvalFailedFollowupDelayRange { 604 return false, fmt.Errorf("bad wait: %v", e.Wait) 605 } 606 607 if e.TriggeredBy != structs.EvalTriggerFailedFollowUp { 608 return false, fmt.Errorf("follow up eval TriggeredBy %v; want %v", 609 e.TriggeredBy, structs.EvalTriggerFailedFollowUp) 610 } 611 } 612 613 return true, nil 614 }, func(err error) { 615 t.Fatalf("err: %v", err) 616 }) 617 } 618 619 func TestLeader_ReapDuplicateEval(t *testing.T) { 620 s1 := TestServer(t, func(c *Config) { 621 c.NumSchedulers = 0 622 }) 623 defer s1.Shutdown() 624 testutil.WaitForLeader(t, s1.RPC) 625 626 // Create a duplicate blocked eval 627 eval := mock.Eval() 628 eval2 := mock.Eval() 629 eval2.JobID = eval.JobID 630 s1.blockedEvals.Block(eval) 631 s1.blockedEvals.Block(eval2) 632 633 // Wait for the evaluation to marked as cancelled 634 state := s1.fsm.State() 635 testutil.WaitForResult(func() (bool, error) { 636 ws := memdb.NewWatchSet() 637 out, err := state.EvalByID(ws, eval2.ID) 638 if err != nil { 639 return false, err 640 } 641 return out != nil && out.Status == structs.EvalStatusCancelled, nil 642 }, func(err error) { 643 t.Fatalf("err: %v", err) 644 }) 645 } 646 647 func TestLeader_RestoreVaultAccessors(t *testing.T) { 648 s1 := TestServer(t, func(c *Config) { 649 c.NumSchedulers = 0 650 }) 651 defer s1.Shutdown() 652 testutil.WaitForLeader(t, s1.RPC) 653 654 // Insert a vault accessor that should be revoked 655 state := s1.fsm.State() 656 va := mock.VaultAccessor() 657 if err := state.UpsertVaultAccessor(100, []*structs.VaultAccessor{va}); err != nil { 658 t.Fatalf("bad: %v", err) 659 } 660 661 // Swap the Vault client 662 tvc := &TestVaultClient{} 663 s1.vault = tvc 664 665 // Do a restore 666 if err := s1.restoreRevokingAccessors(); err != nil { 667 t.Fatalf("Failed to restore: %v", err) 668 } 669 670 if len(tvc.RevokedTokens) != 1 && tvc.RevokedTokens[0].Accessor != va.Accessor { 671 t.Fatalf("Bad revoked accessors: %v", tvc.RevokedTokens) 672 } 673 } 674 675 func TestLeader_ReplicateACLPolicies(t *testing.T) { 676 t.Parallel() 677 s1, root := TestACLServer(t, func(c *Config) { 678 c.Region = "region1" 679 c.AuthoritativeRegion = "region1" 680 c.ACLEnabled = true 681 }) 682 defer s1.Shutdown() 683 s2, _ := TestACLServer(t, func(c *Config) { 684 c.Region = "region2" 685 c.AuthoritativeRegion = "region1" 686 c.ACLEnabled = true 687 c.ReplicationBackoff = 20 * time.Millisecond 688 c.ReplicationToken = root.SecretID 689 }) 690 defer s2.Shutdown() 691 TestJoin(t, s1, s2) 692 testutil.WaitForLeader(t, s1.RPC) 693 testutil.WaitForLeader(t, s2.RPC) 694 695 // Write a policy to the authoritative region 696 p1 := mock.ACLPolicy() 697 if err := s1.State().UpsertACLPolicies(100, []*structs.ACLPolicy{p1}); err != nil { 698 t.Fatalf("bad: %v", err) 699 } 700 701 // Wait for the policy to replicate 702 testutil.WaitForResult(func() (bool, error) { 703 state := s2.State() 704 out, err := state.ACLPolicyByName(nil, p1.Name) 705 return out != nil, err 706 }, func(err error) { 707 t.Fatalf("should replicate policy") 708 }) 709 } 710 711 func TestLeader_DiffACLPolicies(t *testing.T) { 712 t.Parallel() 713 714 state := state.TestStateStore(t) 715 716 // Populate the local state 717 p1 := mock.ACLPolicy() 718 p2 := mock.ACLPolicy() 719 p3 := mock.ACLPolicy() 720 assert.Nil(t, state.UpsertACLPolicies(100, []*structs.ACLPolicy{p1, p2, p3})) 721 722 // Simulate a remote list 723 p2Stub := p2.Stub() 724 p2Stub.ModifyIndex = 50 // Ignored, same index 725 p3Stub := p3.Stub() 726 p3Stub.ModifyIndex = 100 // Updated, higher index 727 p3Stub.Hash = []byte{0, 1, 2, 3} 728 p4 := mock.ACLPolicy() 729 remoteList := []*structs.ACLPolicyListStub{ 730 p2Stub, 731 p3Stub, 732 p4.Stub(), 733 } 734 delete, update := diffACLPolicies(state, 50, remoteList) 735 736 // P1 does not exist on the remote side, should delete 737 assert.Equal(t, []string{p1.Name}, delete) 738 739 // P2 is un-modified - ignore. P3 modified, P4 new. 740 assert.Equal(t, []string{p3.Name, p4.Name}, update) 741 } 742 743 func TestLeader_ReplicateACLTokens(t *testing.T) { 744 t.Parallel() 745 s1, root := TestACLServer(t, func(c *Config) { 746 c.Region = "region1" 747 c.AuthoritativeRegion = "region1" 748 c.ACLEnabled = true 749 }) 750 defer s1.Shutdown() 751 s2, _ := TestACLServer(t, func(c *Config) { 752 c.Region = "region2" 753 c.AuthoritativeRegion = "region1" 754 c.ACLEnabled = true 755 c.ReplicationBackoff = 20 * time.Millisecond 756 c.ReplicationToken = root.SecretID 757 }) 758 defer s2.Shutdown() 759 TestJoin(t, s1, s2) 760 testutil.WaitForLeader(t, s1.RPC) 761 testutil.WaitForLeader(t, s2.RPC) 762 763 // Write a token to the authoritative region 764 p1 := mock.ACLToken() 765 p1.Global = true 766 if err := s1.State().UpsertACLTokens(100, []*structs.ACLToken{p1}); err != nil { 767 t.Fatalf("bad: %v", err) 768 } 769 770 // Wait for the token to replicate 771 testutil.WaitForResult(func() (bool, error) { 772 state := s2.State() 773 out, err := state.ACLTokenByAccessorID(nil, p1.AccessorID) 774 return out != nil, err 775 }, func(err error) { 776 t.Fatalf("should replicate token") 777 }) 778 } 779 780 func TestLeader_DiffACLTokens(t *testing.T) { 781 t.Parallel() 782 783 state := state.TestStateStore(t) 784 785 // Populate the local state 786 p0 := mock.ACLToken() 787 p1 := mock.ACLToken() 788 p1.Global = true 789 p2 := mock.ACLToken() 790 p2.Global = true 791 p3 := mock.ACLToken() 792 p3.Global = true 793 assert.Nil(t, state.UpsertACLTokens(100, []*structs.ACLToken{p0, p1, p2, p3})) 794 795 // Simulate a remote list 796 p2Stub := p2.Stub() 797 p2Stub.ModifyIndex = 50 // Ignored, same index 798 p3Stub := p3.Stub() 799 p3Stub.ModifyIndex = 100 // Updated, higher index 800 p3Stub.Hash = []byte{0, 1, 2, 3} 801 p4 := mock.ACLToken() 802 p4.Global = true 803 remoteList := []*structs.ACLTokenListStub{ 804 p2Stub, 805 p3Stub, 806 p4.Stub(), 807 } 808 delete, update := diffACLTokens(state, 50, remoteList) 809 810 // P0 is local and should be ignored 811 // P1 does not exist on the remote side, should delete 812 assert.Equal(t, []string{p1.AccessorID}, delete) 813 814 // P2 is un-modified - ignore. P3 modified, P4 new. 815 assert.Equal(t, []string{p3.AccessorID, p4.AccessorID}, update) 816 } 817 818 func TestLeader_UpgradeRaftVersion(t *testing.T) { 819 t.Parallel() 820 s1 := TestServer(t, func(c *Config) { 821 c.Datacenter = "dc1" 822 c.RaftConfig.ProtocolVersion = 2 823 }) 824 defer s1.Shutdown() 825 826 s2 := TestServer(t, func(c *Config) { 827 c.DevDisableBootstrap = true 828 c.RaftConfig.ProtocolVersion = 1 829 }) 830 defer s2.Shutdown() 831 832 s3 := TestServer(t, func(c *Config) { 833 c.DevDisableBootstrap = true 834 c.RaftConfig.ProtocolVersion = 2 835 }) 836 defer s3.Shutdown() 837 838 servers := []*Server{s1, s2, s3} 839 840 // Try to join 841 TestJoin(t, s1, s2, s3) 842 843 for _, s := range servers { 844 testutil.WaitForResult(func() (bool, error) { 845 peers, _ := s.numPeers() 846 return peers == 3, nil 847 }, func(err error) { 848 t.Fatalf("should have 3 peers") 849 }) 850 } 851 852 // Kill the v1 server 853 if err := s2.Leave(); err != nil { 854 t.Fatal(err) 855 } 856 857 for _, s := range []*Server{s1, s3} { 858 minVer, err := s.autopilot.MinRaftProtocol() 859 if err != nil { 860 t.Fatal(err) 861 } 862 if got, want := minVer, 2; got != want { 863 t.Fatalf("got min raft version %d want %d", got, want) 864 } 865 } 866 867 // Replace the dead server with one running raft protocol v3 868 s4 := TestServer(t, func(c *Config) { 869 c.DevDisableBootstrap = true 870 c.Datacenter = "dc1" 871 c.RaftConfig.ProtocolVersion = 3 872 }) 873 defer s4.Shutdown() 874 TestJoin(t, s1, s4) 875 servers[1] = s4 876 877 // Make sure we're back to 3 total peers with the new one added via ID 878 for _, s := range servers { 879 testutil.WaitForResult(func() (bool, error) { 880 addrs := 0 881 ids := 0 882 future := s.raft.GetConfiguration() 883 if err := future.Error(); err != nil { 884 return false, err 885 } 886 for _, server := range future.Configuration().Servers { 887 if string(server.ID) == string(server.Address) { 888 addrs++ 889 } else { 890 ids++ 891 } 892 } 893 if got, want := addrs, 2; got != want { 894 return false, fmt.Errorf("got %d server addresses want %d", got, want) 895 } 896 if got, want := ids, 1; got != want { 897 return false, fmt.Errorf("got %d server ids want %d", got, want) 898 } 899 900 return true, nil 901 }, func(err error) { 902 t.Fatal(err) 903 }) 904 } 905 } 906 907 func TestLeader_Reelection(t *testing.T) { 908 raftProtocols := []int{1, 2, 3} 909 for _, p := range raftProtocols { 910 t.Run("Leader Election - Protocol version "+string(p), func(t *testing.T) { 911 leaderElectionTest(t, raft.ProtocolVersion(p)) 912 }) 913 } 914 915 } 916 917 func leaderElectionTest(t *testing.T, raftProtocol raft.ProtocolVersion) { 918 s1 := TestServer(t, func(c *Config) { 919 c.BootstrapExpect = 3 920 c.RaftConfig.ProtocolVersion = raftProtocol 921 }) 922 defer s1.Shutdown() 923 924 s2 := TestServer(t, func(c *Config) { 925 c.BootstrapExpect = 3 926 c.DevDisableBootstrap = true 927 c.RaftConfig.ProtocolVersion = raftProtocol 928 }) 929 defer s2.Shutdown() 930 931 s3 := TestServer(t, func(c *Config) { 932 c.BootstrapExpect = 3 933 c.DevDisableBootstrap = true 934 c.RaftConfig.ProtocolVersion = raftProtocol 935 }) 936 937 servers := []*Server{s1, s2, s3} 938 939 // Try to join 940 TestJoin(t, s1, s2, s3) 941 testutil.WaitForLeader(t, s1.RPC) 942 943 testutil.WaitForResult(func() (bool, error) { 944 future := s1.raft.GetConfiguration() 945 if err := future.Error(); err != nil { 946 return false, err 947 } 948 949 for _, server := range future.Configuration().Servers { 950 if server.Suffrage == raft.Nonvoter { 951 return false, fmt.Errorf("non-voter %v", server) 952 } 953 } 954 955 return true, nil 956 }, func(err error) { 957 t.Fatal(err) 958 }) 959 960 var leader, nonLeader *Server 961 for _, s := range servers { 962 if s.IsLeader() { 963 leader = s 964 } else { 965 nonLeader = s 966 } 967 } 968 969 // Shutdown the leader 970 leader.Shutdown() 971 // Wait for new leader to elect 972 testutil.WaitForLeader(t, nonLeader.RPC) 973 } 974 975 func TestLeader_RollRaftServer(t *testing.T) { 976 t.Parallel() 977 s1 := TestServer(t, func(c *Config) { 978 c.RaftConfig.ProtocolVersion = 2 979 }) 980 defer s1.Shutdown() 981 982 s2 := TestServer(t, func(c *Config) { 983 c.DevDisableBootstrap = true 984 c.RaftConfig.ProtocolVersion = 2 985 }) 986 defer s2.Shutdown() 987 988 s3 := TestServer(t, func(c *Config) { 989 c.DevDisableBootstrap = true 990 c.RaftConfig.ProtocolVersion = 2 991 }) 992 defer s3.Shutdown() 993 994 servers := []*Server{s1, s2, s3} 995 996 // Try to join 997 TestJoin(t, s1, s2, s3) 998 999 for _, s := range servers { 1000 retry.Run(t, func(r *retry.R) { r.Check(wantPeers(s, 3)) }) 1001 } 1002 1003 // Kill the first v2 server 1004 s1.Shutdown() 1005 1006 for _, s := range []*Server{s1, s3} { 1007 retry.Run(t, func(r *retry.R) { 1008 minVer, err := s.autopilot.MinRaftProtocol() 1009 if err != nil { 1010 r.Fatal(err) 1011 } 1012 if got, want := minVer, 2; got != want { 1013 r.Fatalf("got min raft version %d want %d", got, want) 1014 } 1015 }) 1016 } 1017 1018 // Replace the dead server with one running raft protocol v3 1019 s4 := TestServer(t, func(c *Config) { 1020 c.DevDisableBootstrap = true 1021 c.RaftConfig.ProtocolVersion = 3 1022 }) 1023 defer s4.Shutdown() 1024 TestJoin(t, s4, s2) 1025 servers[0] = s4 1026 1027 // Kill the second v2 server 1028 s2.Shutdown() 1029 1030 for _, s := range []*Server{s3, s4} { 1031 retry.Run(t, func(r *retry.R) { 1032 minVer, err := s.autopilot.MinRaftProtocol() 1033 if err != nil { 1034 r.Fatal(err) 1035 } 1036 if got, want := minVer, 2; got != want { 1037 r.Fatalf("got min raft version %d want %d", got, want) 1038 } 1039 }) 1040 } 1041 // Replace another dead server with one running raft protocol v3 1042 s5 := TestServer(t, func(c *Config) { 1043 c.DevDisableBootstrap = true 1044 c.RaftConfig.ProtocolVersion = 3 1045 }) 1046 defer s5.Shutdown() 1047 TestJoin(t, s5, s4) 1048 servers[1] = s5 1049 1050 // Kill the last v2 server, now minRaftProtocol should be 3 1051 s3.Shutdown() 1052 1053 for _, s := range []*Server{s4, s5} { 1054 retry.Run(t, func(r *retry.R) { 1055 minVer, err := s.autopilot.MinRaftProtocol() 1056 if err != nil { 1057 r.Fatal(err) 1058 } 1059 if got, want := minVer, 3; got != want { 1060 r.Fatalf("got min raft version %d want %d", got, want) 1061 } 1062 }) 1063 } 1064 1065 // Replace the last dead server with one running raft protocol v3 1066 s6 := TestServer(t, func(c *Config) { 1067 c.DevDisableBootstrap = true 1068 c.RaftConfig.ProtocolVersion = 3 1069 }) 1070 defer s6.Shutdown() 1071 TestJoin(t, s6, s4) 1072 servers[2] = s6 1073 1074 // Make sure all the dead servers are removed and we're back to 3 total peers 1075 for _, s := range servers { 1076 retry.Run(t, func(r *retry.R) { 1077 addrs := 0 1078 ids := 0 1079 future := s.raft.GetConfiguration() 1080 if err := future.Error(); err != nil { 1081 r.Fatal(err) 1082 } 1083 for _, server := range future.Configuration().Servers { 1084 if string(server.ID) == string(server.Address) { 1085 addrs++ 1086 } else { 1087 ids++ 1088 } 1089 } 1090 if got, want := addrs, 0; got != want { 1091 r.Fatalf("got %d server addresses want %d", got, want) 1092 } 1093 if got, want := ids, 3; got != want { 1094 r.Fatalf("got %d server ids want %d", got, want) 1095 } 1096 }) 1097 } 1098 } 1099 1100 func TestLeader_RevokeLeadership_MultipleTimes(t *testing.T) { 1101 s1 := TestServer(t, nil) 1102 defer s1.Shutdown() 1103 testutil.WaitForLeader(t, s1.RPC) 1104 1105 testutil.WaitForResult(func() (bool, error) { 1106 return s1.evalBroker.Enabled(), nil 1107 }, func(err error) { 1108 t.Fatalf("should have finished establish leader loop") 1109 }) 1110 1111 require.Nil(t, s1.revokeLeadership()) 1112 require.Nil(t, s1.revokeLeadership()) 1113 require.Nil(t, s1.revokeLeadership()) 1114 }