github.com/ferranbt/nomad@v0.9.3-0.20190607002617-85c449b7667c/nomad/leader_test.go (about) 1 package nomad 2 3 import ( 4 "errors" 5 "fmt" 6 "strconv" 7 "testing" 8 "time" 9 10 "github.com/hashicorp/consul/testutil/retry" 11 memdb "github.com/hashicorp/go-memdb" 12 "github.com/hashicorp/nomad/nomad/mock" 13 "github.com/hashicorp/nomad/nomad/state" 14 "github.com/hashicorp/nomad/nomad/structs" 15 "github.com/hashicorp/nomad/testutil" 16 "github.com/hashicorp/raft" 17 "github.com/hashicorp/serf/serf" 18 "github.com/stretchr/testify/assert" 19 "github.com/stretchr/testify/require" 20 ) 21 22 func TestLeader_LeftServer(t *testing.T) { 23 s1 := TestServer(t, nil) 24 defer s1.Shutdown() 25 26 s2 := TestServer(t, func(c *Config) { 27 c.DevDisableBootstrap = true 28 }) 29 defer s2.Shutdown() 30 31 s3 := TestServer(t, func(c *Config) { 32 c.DevDisableBootstrap = true 33 }) 34 defer s3.Shutdown() 35 servers := []*Server{s1, s2, s3} 36 TestJoin(t, s1, s2, s3) 37 38 for _, s := range servers { 39 testutil.WaitForResult(func() (bool, error) { 40 peers, _ := s.numPeers() 41 return peers == 3, nil 42 }, func(err error) { 43 t.Fatalf("should have 3 peers") 44 }) 45 } 46 47 // Kill any server 48 var peer *Server 49 for _, s := range servers { 50 if !s.IsLeader() { 51 peer = s 52 break 53 } 54 } 55 if peer == nil { 56 t.Fatalf("Should have a non-leader") 57 } 58 peer.Shutdown() 59 name := fmt.Sprintf("%s.%s", peer.config.NodeName, peer.config.Region) 60 61 testutil.WaitForResult(func() (bool, error) { 62 for _, s := range servers { 63 if s == peer { 64 continue 65 } 66 67 // Force remove the non-leader (transition to left state) 68 if err := s.RemoveFailedNode(name); err != nil { 69 return false, err 70 } 71 72 peers, _ := s.numPeers() 73 return peers == 2, errors.New(fmt.Sprintf("%v", peers)) 74 } 75 76 return true, nil 77 }, func(err error) { 78 t.Fatalf("err: %s", err) 79 }) 80 } 81 82 func TestLeader_LeftLeader(t *testing.T) { 83 s1 := TestServer(t, nil) 84 defer s1.Shutdown() 85 86 s2 := TestServer(t, func(c *Config) { 87 c.DevDisableBootstrap = true 88 }) 89 defer s2.Shutdown() 90 91 s3 := TestServer(t, func(c *Config) { 92 c.DevDisableBootstrap = true 93 }) 94 defer s3.Shutdown() 95 servers := []*Server{s1, s2, s3} 96 TestJoin(t, s1, s2, s3) 97 98 for _, s := range servers { 99 testutil.WaitForResult(func() (bool, error) { 100 peers, _ := s.numPeers() 101 return peers == 3, nil 102 }, func(err error) { 103 t.Fatalf("should have 3 peers") 104 }) 105 } 106 107 // Kill the leader! 108 var leader *Server 109 for _, s := range servers { 110 if s.IsLeader() { 111 leader = s 112 break 113 } 114 } 115 if leader == nil { 116 t.Fatalf("Should have a leader") 117 } 118 leader.Leave() 119 leader.Shutdown() 120 121 for _, s := range servers { 122 if s == leader { 123 continue 124 } 125 testutil.WaitForResult(func() (bool, error) { 126 peers, _ := s.numPeers() 127 return peers == 2, errors.New(fmt.Sprintf("%v", peers)) 128 }, func(err error) { 129 t.Fatalf("should have 2 peers: %v", err) 130 }) 131 } 132 } 133 134 func TestLeader_MultiBootstrap(t *testing.T) { 135 s1 := TestServer(t, nil) 136 defer s1.Shutdown() 137 138 s2 := TestServer(t, nil) 139 defer s2.Shutdown() 140 servers := []*Server{s1, s2} 141 TestJoin(t, s1, s2) 142 143 for _, s := range servers { 144 testutil.WaitForResult(func() (bool, error) { 145 peers := s.Members() 146 return len(peers) == 2, nil 147 }, func(err error) { 148 t.Fatalf("should have 2 peers") 149 }) 150 } 151 152 // Ensure we don't have multiple raft peers 153 for _, s := range servers { 154 peers, _ := s.numPeers() 155 if peers != 1 { 156 t.Fatalf("should only have 1 raft peer!") 157 } 158 } 159 } 160 161 func TestLeader_PlanQueue_Reset(t *testing.T) { 162 s1 := TestServer(t, nil) 163 defer s1.Shutdown() 164 165 s2 := TestServer(t, func(c *Config) { 166 c.DevDisableBootstrap = true 167 }) 168 defer s2.Shutdown() 169 170 s3 := TestServer(t, func(c *Config) { 171 c.DevDisableBootstrap = true 172 }) 173 defer s3.Shutdown() 174 servers := []*Server{s1, s2, s3} 175 TestJoin(t, s1, s2, s3) 176 177 for _, s := range servers { 178 testutil.WaitForResult(func() (bool, error) { 179 peers, _ := s.numPeers() 180 return peers == 3, nil 181 }, func(err error) { 182 t.Fatalf("should have 3 peers") 183 }) 184 } 185 186 var leader *Server 187 for _, s := range servers { 188 if s.IsLeader() { 189 leader = s 190 break 191 } 192 } 193 if leader == nil { 194 t.Fatalf("Should have a leader") 195 } 196 197 if !leader.planQueue.Enabled() { 198 t.Fatalf("should enable plan queue") 199 } 200 201 for _, s := range servers { 202 if !s.IsLeader() && s.planQueue.Enabled() { 203 t.Fatalf("plan queue should not be enabled") 204 } 205 } 206 207 // Kill the leader 208 leader.Shutdown() 209 time.Sleep(100 * time.Millisecond) 210 211 // Wait for a new leader 212 leader = nil 213 testutil.WaitForResult(func() (bool, error) { 214 for _, s := range servers { 215 if s.IsLeader() { 216 leader = s 217 return true, nil 218 } 219 } 220 return false, nil 221 }, func(err error) { 222 t.Fatalf("should have leader") 223 }) 224 225 // Check that the new leader has a pending GC expiration 226 testutil.WaitForResult(func() (bool, error) { 227 return leader.planQueue.Enabled(), nil 228 }, func(err error) { 229 t.Fatalf("should enable plan queue") 230 }) 231 } 232 233 func TestLeader_EvalBroker_Reset(t *testing.T) { 234 s1 := TestServer(t, func(c *Config) { 235 c.NumSchedulers = 0 236 }) 237 defer s1.Shutdown() 238 239 s2 := TestServer(t, func(c *Config) { 240 c.NumSchedulers = 0 241 c.DevDisableBootstrap = true 242 }) 243 defer s2.Shutdown() 244 245 s3 := TestServer(t, func(c *Config) { 246 c.NumSchedulers = 0 247 c.DevDisableBootstrap = true 248 }) 249 defer s3.Shutdown() 250 servers := []*Server{s1, s2, s3} 251 TestJoin(t, s1, s2, s3) 252 testutil.WaitForLeader(t, s1.RPC) 253 254 for _, s := range servers { 255 testutil.WaitForResult(func() (bool, error) { 256 peers, _ := s.numPeers() 257 return peers == 3, nil 258 }, func(err error) { 259 t.Fatalf("should have 3 peers") 260 }) 261 } 262 263 var leader *Server 264 for _, s := range servers { 265 if s.IsLeader() { 266 leader = s 267 break 268 } 269 } 270 if leader == nil { 271 t.Fatalf("Should have a leader") 272 } 273 274 // Inject a pending eval 275 req := structs.EvalUpdateRequest{ 276 Evals: []*structs.Evaluation{mock.Eval()}, 277 } 278 _, _, err := leader.raftApply(structs.EvalUpdateRequestType, req) 279 if err != nil { 280 t.Fatalf("err: %v", err) 281 } 282 283 // Kill the leader 284 leader.Shutdown() 285 time.Sleep(100 * time.Millisecond) 286 287 // Wait for a new leader 288 leader = nil 289 testutil.WaitForResult(func() (bool, error) { 290 for _, s := range servers { 291 if s.IsLeader() { 292 leader = s 293 return true, nil 294 } 295 } 296 return false, nil 297 }, func(err error) { 298 t.Fatalf("should have leader") 299 }) 300 301 // Check that the new leader has a pending evaluation 302 testutil.WaitForResult(func() (bool, error) { 303 stats := leader.evalBroker.Stats() 304 return stats.TotalReady == 1, nil 305 }, func(err error) { 306 t.Fatalf("should have pending evaluation") 307 }) 308 } 309 310 func TestLeader_PeriodicDispatcher_Restore_Adds(t *testing.T) { 311 s1 := TestServer(t, func(c *Config) { 312 c.NumSchedulers = 0 313 }) 314 defer s1.Shutdown() 315 316 s2 := TestServer(t, func(c *Config) { 317 c.NumSchedulers = 0 318 c.DevDisableBootstrap = true 319 }) 320 defer s2.Shutdown() 321 322 s3 := TestServer(t, func(c *Config) { 323 c.NumSchedulers = 0 324 c.DevDisableBootstrap = true 325 }) 326 defer s3.Shutdown() 327 servers := []*Server{s1, s2, s3} 328 TestJoin(t, s1, s2, s3) 329 testutil.WaitForLeader(t, s1.RPC) 330 331 for _, s := range servers { 332 testutil.WaitForResult(func() (bool, error) { 333 peers, _ := s.numPeers() 334 return peers == 3, nil 335 }, func(err error) { 336 t.Fatalf("should have 3 peers") 337 }) 338 } 339 340 var leader *Server 341 for _, s := range servers { 342 if s.IsLeader() { 343 leader = s 344 break 345 } 346 } 347 if leader == nil { 348 t.Fatalf("Should have a leader") 349 } 350 351 // Inject a periodic job, a parameterized periodic job and a non-periodic job 352 periodic := mock.PeriodicJob() 353 nonPeriodic := mock.Job() 354 parameterizedPeriodic := mock.PeriodicJob() 355 parameterizedPeriodic.ParameterizedJob = &structs.ParameterizedJobConfig{} 356 for _, job := range []*structs.Job{nonPeriodic, periodic, parameterizedPeriodic} { 357 req := structs.JobRegisterRequest{ 358 Job: job, 359 WriteRequest: structs.WriteRequest{ 360 Namespace: job.Namespace, 361 }, 362 } 363 _, _, err := leader.raftApply(structs.JobRegisterRequestType, req) 364 if err != nil { 365 t.Fatalf("err: %v", err) 366 } 367 } 368 369 // Kill the leader 370 leader.Shutdown() 371 time.Sleep(100 * time.Millisecond) 372 373 // Wait for a new leader 374 leader = nil 375 testutil.WaitForResult(func() (bool, error) { 376 for _, s := range servers { 377 if s.IsLeader() { 378 leader = s 379 return true, nil 380 } 381 } 382 return false, nil 383 }, func(err error) { 384 t.Fatalf("should have leader") 385 }) 386 387 tuplePeriodic := structs.NamespacedID{ 388 ID: periodic.ID, 389 Namespace: periodic.Namespace, 390 } 391 tupleNonPeriodic := structs.NamespacedID{ 392 ID: nonPeriodic.ID, 393 Namespace: nonPeriodic.Namespace, 394 } 395 tupleParameterized := structs.NamespacedID{ 396 ID: parameterizedPeriodic.ID, 397 Namespace: parameterizedPeriodic.Namespace, 398 } 399 400 // Check that the new leader is tracking the periodic job only 401 testutil.WaitForResult(func() (bool, error) { 402 leader.periodicDispatcher.l.Lock() 403 defer leader.periodicDispatcher.l.Unlock() 404 if _, tracked := leader.periodicDispatcher.tracked[tuplePeriodic]; !tracked { 405 return false, fmt.Errorf("periodic job not tracked") 406 } 407 if _, tracked := leader.periodicDispatcher.tracked[tupleNonPeriodic]; tracked { 408 return false, fmt.Errorf("non periodic job tracked") 409 } 410 if _, tracked := leader.periodicDispatcher.tracked[tupleParameterized]; tracked { 411 return false, fmt.Errorf("parameterized periodic job tracked") 412 } 413 return true, nil 414 }, func(err error) { 415 t.Fatalf(err.Error()) 416 }) 417 } 418 419 func TestLeader_PeriodicDispatcher_Restore_NoEvals(t *testing.T) { 420 s1 := TestServer(t, func(c *Config) { 421 c.NumSchedulers = 0 422 }) 423 defer s1.Shutdown() 424 testutil.WaitForLeader(t, s1.RPC) 425 426 // Inject a periodic job that will be triggered soon. 427 launch := time.Now().Add(1 * time.Second) 428 job := testPeriodicJob(launch) 429 req := structs.JobRegisterRequest{ 430 Job: job, 431 WriteRequest: structs.WriteRequest{ 432 Namespace: job.Namespace, 433 }, 434 } 435 _, _, err := s1.raftApply(structs.JobRegisterRequestType, req) 436 if err != nil { 437 t.Fatalf("err: %v", err) 438 } 439 440 // Flush the periodic dispatcher, ensuring that no evals will be created. 441 s1.periodicDispatcher.SetEnabled(false) 442 443 // Get the current time to ensure the launch time is after this once we 444 // restore. 445 now := time.Now() 446 447 // Sleep till after the job should have been launched. 448 time.Sleep(3 * time.Second) 449 450 // Restore the periodic dispatcher. 451 s1.periodicDispatcher.SetEnabled(true) 452 s1.restorePeriodicDispatcher() 453 454 // Ensure the job is tracked. 455 tuple := structs.NamespacedID{ 456 ID: job.ID, 457 Namespace: job.Namespace, 458 } 459 if _, tracked := s1.periodicDispatcher.tracked[tuple]; !tracked { 460 t.Fatalf("periodic job not restored") 461 } 462 463 // Check that an eval was made. 464 ws := memdb.NewWatchSet() 465 last, err := s1.fsm.State().PeriodicLaunchByID(ws, job.Namespace, job.ID) 466 if err != nil || last == nil { 467 t.Fatalf("failed to get periodic launch time: %v", err) 468 } 469 470 if last.Launch.Before(now) { 471 t.Fatalf("restorePeriodicDispatcher did not force launch: last %v; want after %v", last.Launch, now) 472 } 473 } 474 475 func TestLeader_PeriodicDispatcher_Restore_Evals(t *testing.T) { 476 s1 := TestServer(t, func(c *Config) { 477 c.NumSchedulers = 0 478 }) 479 defer s1.Shutdown() 480 testutil.WaitForLeader(t, s1.RPC) 481 482 // Inject a periodic job that triggered once in the past, should trigger now 483 // and once in the future. 484 now := time.Now() 485 past := now.Add(-1 * time.Second) 486 future := now.Add(10 * time.Second) 487 job := testPeriodicJob(past, now, future) 488 req := structs.JobRegisterRequest{ 489 Job: job, 490 WriteRequest: structs.WriteRequest{ 491 Namespace: job.Namespace, 492 }, 493 } 494 _, _, err := s1.raftApply(structs.JobRegisterRequestType, req) 495 if err != nil { 496 t.Fatalf("err: %v", err) 497 } 498 499 // Create an eval for the past launch. 500 s1.periodicDispatcher.createEval(job, past) 501 502 // Flush the periodic dispatcher, ensuring that no evals will be created. 503 s1.periodicDispatcher.SetEnabled(false) 504 505 // Sleep till after the job should have been launched. 506 time.Sleep(3 * time.Second) 507 508 // Restore the periodic dispatcher. 509 s1.periodicDispatcher.SetEnabled(true) 510 s1.restorePeriodicDispatcher() 511 512 // Ensure the job is tracked. 513 tuple := structs.NamespacedID{ 514 ID: job.ID, 515 Namespace: job.Namespace, 516 } 517 if _, tracked := s1.periodicDispatcher.tracked[tuple]; !tracked { 518 t.Fatalf("periodic job not restored") 519 } 520 521 // Check that an eval was made. 522 ws := memdb.NewWatchSet() 523 last, err := s1.fsm.State().PeriodicLaunchByID(ws, job.Namespace, job.ID) 524 if err != nil || last == nil { 525 t.Fatalf("failed to get periodic launch time: %v", err) 526 } 527 if last.Launch == past { 528 t.Fatalf("restorePeriodicDispatcher did not force launch") 529 } 530 } 531 532 func TestLeader_PeriodicDispatch(t *testing.T) { 533 s1 := TestServer(t, func(c *Config) { 534 c.NumSchedulers = 0 535 c.EvalGCInterval = 5 * time.Millisecond 536 }) 537 defer s1.Shutdown() 538 539 // Wait for a periodic dispatch 540 testutil.WaitForResult(func() (bool, error) { 541 stats := s1.evalBroker.Stats() 542 bySched, ok := stats.ByScheduler[structs.JobTypeCore] 543 if !ok { 544 return false, nil 545 } 546 return bySched.Ready > 0, nil 547 }, func(err error) { 548 t.Fatalf("should pending job") 549 }) 550 } 551 552 func TestLeader_ReapFailedEval(t *testing.T) { 553 s1 := TestServer(t, func(c *Config) { 554 c.NumSchedulers = 0 555 c.EvalDeliveryLimit = 1 556 }) 557 defer s1.Shutdown() 558 testutil.WaitForLeader(t, s1.RPC) 559 560 // Wait for a periodic dispatch 561 eval := mock.Eval() 562 s1.evalBroker.Enqueue(eval) 563 564 // Dequeue and Nack 565 out, token, err := s1.evalBroker.Dequeue(defaultSched, time.Second) 566 if err != nil { 567 t.Fatalf("err: %v", err) 568 } 569 s1.evalBroker.Nack(out.ID, token) 570 571 // Wait for an updated and followup evaluation 572 state := s1.fsm.State() 573 testutil.WaitForResult(func() (bool, error) { 574 ws := memdb.NewWatchSet() 575 out, err := state.EvalByID(ws, eval.ID) 576 if err != nil { 577 return false, err 578 } 579 if out == nil { 580 return false, fmt.Errorf("expect original evaluation to exist") 581 } 582 if out.Status != structs.EvalStatusFailed { 583 return false, fmt.Errorf("got status %v; want %v", out.Status, structs.EvalStatusFailed) 584 } 585 if out.NextEval == "" { 586 return false, fmt.Errorf("got empty NextEval") 587 } 588 // See if there is a followup 589 evals, err := state.EvalsByJob(ws, eval.Namespace, eval.JobID) 590 if err != nil { 591 return false, err 592 } 593 594 if l := len(evals); l != 2 { 595 return false, fmt.Errorf("got %d evals, want 2", l) 596 } 597 598 for _, e := range evals { 599 if e.ID == eval.ID { 600 continue 601 } 602 603 if e.Status != structs.EvalStatusPending { 604 return false, fmt.Errorf("follow up eval has status %v; want %v", 605 e.Status, structs.EvalStatusPending) 606 } 607 608 if e.ID != out.NextEval { 609 return false, fmt.Errorf("follow up eval id is %v; orig eval NextEval %v", 610 e.ID, out.NextEval) 611 } 612 613 if e.Wait < s1.config.EvalFailedFollowupBaselineDelay || 614 e.Wait > s1.config.EvalFailedFollowupBaselineDelay+s1.config.EvalFailedFollowupDelayRange { 615 return false, fmt.Errorf("bad wait: %v", e.Wait) 616 } 617 618 if e.TriggeredBy != structs.EvalTriggerFailedFollowUp { 619 return false, fmt.Errorf("follow up eval TriggeredBy %v; want %v", 620 e.TriggeredBy, structs.EvalTriggerFailedFollowUp) 621 } 622 } 623 624 return true, nil 625 }, func(err error) { 626 t.Fatalf("err: %v", err) 627 }) 628 } 629 630 func TestLeader_ReapDuplicateEval(t *testing.T) { 631 s1 := TestServer(t, func(c *Config) { 632 c.NumSchedulers = 0 633 }) 634 defer s1.Shutdown() 635 testutil.WaitForLeader(t, s1.RPC) 636 637 // Create a duplicate blocked eval 638 eval := mock.Eval() 639 eval.CreateIndex = 100 640 eval2 := mock.Eval() 641 eval2.JobID = eval.JobID 642 eval2.CreateIndex = 102 643 s1.blockedEvals.Block(eval) 644 s1.blockedEvals.Block(eval2) 645 646 // Wait for the evaluation to marked as cancelled 647 state := s1.fsm.State() 648 testutil.WaitForResult(func() (bool, error) { 649 ws := memdb.NewWatchSet() 650 out, err := state.EvalByID(ws, eval.ID) 651 if err != nil { 652 return false, err 653 } 654 return out != nil && out.Status == structs.EvalStatusCancelled, nil 655 }, func(err error) { 656 t.Fatalf("err: %v", err) 657 }) 658 } 659 660 func TestLeader_RestoreVaultAccessors(t *testing.T) { 661 s1 := TestServer(t, func(c *Config) { 662 c.NumSchedulers = 0 663 }) 664 defer s1.Shutdown() 665 testutil.WaitForLeader(t, s1.RPC) 666 667 // Insert a vault accessor that should be revoked 668 state := s1.fsm.State() 669 va := mock.VaultAccessor() 670 if err := state.UpsertVaultAccessor(100, []*structs.VaultAccessor{va}); err != nil { 671 t.Fatalf("bad: %v", err) 672 } 673 674 // Swap the Vault client 675 tvc := &TestVaultClient{} 676 s1.vault = tvc 677 678 // Do a restore 679 if err := s1.restoreRevokingAccessors(); err != nil { 680 t.Fatalf("Failed to restore: %v", err) 681 } 682 683 if len(tvc.RevokedTokens) != 1 && tvc.RevokedTokens[0].Accessor != va.Accessor { 684 t.Fatalf("Bad revoked accessors: %v", tvc.RevokedTokens) 685 } 686 } 687 688 func TestLeader_ReplicateACLPolicies(t *testing.T) { 689 t.Parallel() 690 s1, root := TestACLServer(t, func(c *Config) { 691 c.Region = "region1" 692 c.AuthoritativeRegion = "region1" 693 c.ACLEnabled = true 694 }) 695 defer s1.Shutdown() 696 s2, _ := TestACLServer(t, func(c *Config) { 697 c.Region = "region2" 698 c.AuthoritativeRegion = "region1" 699 c.ACLEnabled = true 700 c.ReplicationBackoff = 20 * time.Millisecond 701 c.ReplicationToken = root.SecretID 702 }) 703 defer s2.Shutdown() 704 TestJoin(t, s1, s2) 705 testutil.WaitForLeader(t, s1.RPC) 706 testutil.WaitForLeader(t, s2.RPC) 707 708 // Write a policy to the authoritative region 709 p1 := mock.ACLPolicy() 710 if err := s1.State().UpsertACLPolicies(100, []*structs.ACLPolicy{p1}); err != nil { 711 t.Fatalf("bad: %v", err) 712 } 713 714 // Wait for the policy to replicate 715 testutil.WaitForResult(func() (bool, error) { 716 state := s2.State() 717 out, err := state.ACLPolicyByName(nil, p1.Name) 718 return out != nil, err 719 }, func(err error) { 720 t.Fatalf("should replicate policy") 721 }) 722 } 723 724 func TestLeader_DiffACLPolicies(t *testing.T) { 725 t.Parallel() 726 727 state := state.TestStateStore(t) 728 729 // Populate the local state 730 p1 := mock.ACLPolicy() 731 p2 := mock.ACLPolicy() 732 p3 := mock.ACLPolicy() 733 assert.Nil(t, state.UpsertACLPolicies(100, []*structs.ACLPolicy{p1, p2, p3})) 734 735 // Simulate a remote list 736 p2Stub := p2.Stub() 737 p2Stub.ModifyIndex = 50 // Ignored, same index 738 p3Stub := p3.Stub() 739 p3Stub.ModifyIndex = 100 // Updated, higher index 740 p3Stub.Hash = []byte{0, 1, 2, 3} 741 p4 := mock.ACLPolicy() 742 remoteList := []*structs.ACLPolicyListStub{ 743 p2Stub, 744 p3Stub, 745 p4.Stub(), 746 } 747 delete, update := diffACLPolicies(state, 50, remoteList) 748 749 // P1 does not exist on the remote side, should delete 750 assert.Equal(t, []string{p1.Name}, delete) 751 752 // P2 is un-modified - ignore. P3 modified, P4 new. 753 assert.Equal(t, []string{p3.Name, p4.Name}, update) 754 } 755 756 func TestLeader_ReplicateACLTokens(t *testing.T) { 757 t.Parallel() 758 s1, root := TestACLServer(t, func(c *Config) { 759 c.Region = "region1" 760 c.AuthoritativeRegion = "region1" 761 c.ACLEnabled = true 762 }) 763 defer s1.Shutdown() 764 s2, _ := TestACLServer(t, func(c *Config) { 765 c.Region = "region2" 766 c.AuthoritativeRegion = "region1" 767 c.ACLEnabled = true 768 c.ReplicationBackoff = 20 * time.Millisecond 769 c.ReplicationToken = root.SecretID 770 }) 771 defer s2.Shutdown() 772 TestJoin(t, s1, s2) 773 testutil.WaitForLeader(t, s1.RPC) 774 testutil.WaitForLeader(t, s2.RPC) 775 776 // Write a token to the authoritative region 777 p1 := mock.ACLToken() 778 p1.Global = true 779 if err := s1.State().UpsertACLTokens(100, []*structs.ACLToken{p1}); err != nil { 780 t.Fatalf("bad: %v", err) 781 } 782 783 // Wait for the token to replicate 784 testutil.WaitForResult(func() (bool, error) { 785 state := s2.State() 786 out, err := state.ACLTokenByAccessorID(nil, p1.AccessorID) 787 return out != nil, err 788 }, func(err error) { 789 t.Fatalf("should replicate token") 790 }) 791 } 792 793 func TestLeader_DiffACLTokens(t *testing.T) { 794 t.Parallel() 795 796 state := state.TestStateStore(t) 797 798 // Populate the local state 799 p0 := mock.ACLToken() 800 p1 := mock.ACLToken() 801 p1.Global = true 802 p2 := mock.ACLToken() 803 p2.Global = true 804 p3 := mock.ACLToken() 805 p3.Global = true 806 assert.Nil(t, state.UpsertACLTokens(100, []*structs.ACLToken{p0, p1, p2, p3})) 807 808 // Simulate a remote list 809 p2Stub := p2.Stub() 810 p2Stub.ModifyIndex = 50 // Ignored, same index 811 p3Stub := p3.Stub() 812 p3Stub.ModifyIndex = 100 // Updated, higher index 813 p3Stub.Hash = []byte{0, 1, 2, 3} 814 p4 := mock.ACLToken() 815 p4.Global = true 816 remoteList := []*structs.ACLTokenListStub{ 817 p2Stub, 818 p3Stub, 819 p4.Stub(), 820 } 821 delete, update := diffACLTokens(state, 50, remoteList) 822 823 // P0 is local and should be ignored 824 // P1 does not exist on the remote side, should delete 825 assert.Equal(t, []string{p1.AccessorID}, delete) 826 827 // P2 is un-modified - ignore. P3 modified, P4 new. 828 assert.Equal(t, []string{p3.AccessorID, p4.AccessorID}, update) 829 } 830 831 func TestLeader_UpgradeRaftVersion(t *testing.T) { 832 t.Parallel() 833 s1 := TestServer(t, func(c *Config) { 834 c.Datacenter = "dc1" 835 c.RaftConfig.ProtocolVersion = 2 836 }) 837 defer s1.Shutdown() 838 839 s2 := TestServer(t, func(c *Config) { 840 c.DevDisableBootstrap = true 841 c.RaftConfig.ProtocolVersion = 1 842 }) 843 defer s2.Shutdown() 844 845 s3 := TestServer(t, func(c *Config) { 846 c.DevDisableBootstrap = true 847 c.RaftConfig.ProtocolVersion = 2 848 }) 849 defer s3.Shutdown() 850 851 servers := []*Server{s1, s2, s3} 852 853 // Try to join 854 TestJoin(t, s1, s2, s3) 855 856 for _, s := range servers { 857 testutil.WaitForResult(func() (bool, error) { 858 peers, _ := s.numPeers() 859 return peers == 3, nil 860 }, func(err error) { 861 t.Fatalf("should have 3 peers") 862 }) 863 } 864 865 // Kill the v1 server 866 if err := s2.Leave(); err != nil { 867 t.Fatal(err) 868 } 869 870 for _, s := range []*Server{s1, s3} { 871 minVer, err := s.autopilot.MinRaftProtocol() 872 if err != nil { 873 t.Fatal(err) 874 } 875 if got, want := minVer, 2; got != want { 876 t.Fatalf("got min raft version %d want %d", got, want) 877 } 878 } 879 880 // Replace the dead server with one running raft protocol v3 881 s4 := TestServer(t, func(c *Config) { 882 c.DevDisableBootstrap = true 883 c.Datacenter = "dc1" 884 c.RaftConfig.ProtocolVersion = 3 885 }) 886 defer s4.Shutdown() 887 TestJoin(t, s1, s4) 888 servers[1] = s4 889 890 // Make sure we're back to 3 total peers with the new one added via ID 891 for _, s := range servers { 892 testutil.WaitForResult(func() (bool, error) { 893 addrs := 0 894 ids := 0 895 future := s.raft.GetConfiguration() 896 if err := future.Error(); err != nil { 897 return false, err 898 } 899 for _, server := range future.Configuration().Servers { 900 if string(server.ID) == string(server.Address) { 901 addrs++ 902 } else { 903 ids++ 904 } 905 } 906 if got, want := addrs, 2; got != want { 907 return false, fmt.Errorf("got %d server addresses want %d", got, want) 908 } 909 if got, want := ids, 1; got != want { 910 return false, fmt.Errorf("got %d server ids want %d", got, want) 911 } 912 913 return true, nil 914 }, func(err error) { 915 t.Fatal(err) 916 }) 917 } 918 } 919 920 func TestLeader_Reelection(t *testing.T) { 921 raftProtocols := []int{1, 2, 3} 922 for _, p := range raftProtocols { 923 t.Run("Leader Election - Protocol version "+string(p), func(t *testing.T) { 924 leaderElectionTest(t, raft.ProtocolVersion(p)) 925 }) 926 } 927 928 } 929 930 func leaderElectionTest(t *testing.T, raftProtocol raft.ProtocolVersion) { 931 s1 := TestServer(t, func(c *Config) { 932 c.BootstrapExpect = 3 933 c.RaftConfig.ProtocolVersion = raftProtocol 934 }) 935 defer s1.Shutdown() 936 937 s2 := TestServer(t, func(c *Config) { 938 c.BootstrapExpect = 3 939 c.DevDisableBootstrap = true 940 c.RaftConfig.ProtocolVersion = raftProtocol 941 }) 942 defer s2.Shutdown() 943 944 s3 := TestServer(t, func(c *Config) { 945 c.BootstrapExpect = 3 946 c.DevDisableBootstrap = true 947 c.RaftConfig.ProtocolVersion = raftProtocol 948 }) 949 950 servers := []*Server{s1, s2, s3} 951 952 // Try to join 953 TestJoin(t, s1, s2, s3) 954 testutil.WaitForLeader(t, s1.RPC) 955 956 testutil.WaitForResult(func() (bool, error) { 957 future := s1.raft.GetConfiguration() 958 if err := future.Error(); err != nil { 959 return false, err 960 } 961 962 for _, server := range future.Configuration().Servers { 963 if server.Suffrage == raft.Nonvoter { 964 return false, fmt.Errorf("non-voter %v", server) 965 } 966 } 967 968 return true, nil 969 }, func(err error) { 970 t.Fatal(err) 971 }) 972 973 var leader, nonLeader *Server 974 for _, s := range servers { 975 if s.IsLeader() { 976 leader = s 977 } else { 978 nonLeader = s 979 } 980 } 981 982 // Shutdown the leader 983 leader.Shutdown() 984 // Wait for new leader to elect 985 testutil.WaitForLeader(t, nonLeader.RPC) 986 } 987 988 func TestLeader_RollRaftServer(t *testing.T) { 989 t.Parallel() 990 s1 := TestServer(t, func(c *Config) { 991 c.RaftConfig.ProtocolVersion = 2 992 }) 993 defer s1.Shutdown() 994 995 s2 := TestServer(t, func(c *Config) { 996 c.DevDisableBootstrap = true 997 c.RaftConfig.ProtocolVersion = 2 998 }) 999 defer s2.Shutdown() 1000 1001 s3 := TestServer(t, func(c *Config) { 1002 c.DevDisableBootstrap = true 1003 c.RaftConfig.ProtocolVersion = 2 1004 }) 1005 defer s3.Shutdown() 1006 1007 servers := []*Server{s1, s2, s3} 1008 1009 // Try to join 1010 TestJoin(t, s1, s2, s3) 1011 1012 for _, s := range servers { 1013 retry.Run(t, func(r *retry.R) { r.Check(wantPeers(s, 3)) }) 1014 } 1015 1016 // Kill the first v2 server 1017 s1.Shutdown() 1018 1019 for _, s := range []*Server{s1, s3} { 1020 retry.Run(t, func(r *retry.R) { 1021 minVer, err := s.autopilot.MinRaftProtocol() 1022 if err != nil { 1023 r.Fatal(err) 1024 } 1025 if got, want := minVer, 2; got != want { 1026 r.Fatalf("got min raft version %d want %d", got, want) 1027 } 1028 }) 1029 } 1030 1031 // Replace the dead server with one running raft protocol v3 1032 s4 := TestServer(t, func(c *Config) { 1033 c.DevDisableBootstrap = true 1034 c.RaftConfig.ProtocolVersion = 3 1035 }) 1036 defer s4.Shutdown() 1037 TestJoin(t, s4, s2) 1038 servers[0] = s4 1039 1040 // Kill the second v2 server 1041 s2.Shutdown() 1042 1043 for _, s := range []*Server{s3, s4} { 1044 retry.Run(t, func(r *retry.R) { 1045 minVer, err := s.autopilot.MinRaftProtocol() 1046 if err != nil { 1047 r.Fatal(err) 1048 } 1049 if got, want := minVer, 2; got != want { 1050 r.Fatalf("got min raft version %d want %d", got, want) 1051 } 1052 }) 1053 } 1054 // Replace another dead server with one running raft protocol v3 1055 s5 := TestServer(t, func(c *Config) { 1056 c.DevDisableBootstrap = true 1057 c.RaftConfig.ProtocolVersion = 3 1058 }) 1059 defer s5.Shutdown() 1060 TestJoin(t, s5, s4) 1061 servers[1] = s5 1062 1063 // Kill the last v2 server, now minRaftProtocol should be 3 1064 s3.Shutdown() 1065 1066 for _, s := range []*Server{s4, s5} { 1067 retry.Run(t, func(r *retry.R) { 1068 minVer, err := s.autopilot.MinRaftProtocol() 1069 if err != nil { 1070 r.Fatal(err) 1071 } 1072 if got, want := minVer, 3; got != want { 1073 r.Fatalf("got min raft version %d want %d", got, want) 1074 } 1075 }) 1076 } 1077 1078 // Replace the last dead server with one running raft protocol v3 1079 s6 := TestServer(t, func(c *Config) { 1080 c.DevDisableBootstrap = true 1081 c.RaftConfig.ProtocolVersion = 3 1082 }) 1083 defer s6.Shutdown() 1084 TestJoin(t, s6, s4) 1085 servers[2] = s6 1086 1087 // Make sure all the dead servers are removed and we're back to 3 total peers 1088 for _, s := range servers { 1089 retry.Run(t, func(r *retry.R) { 1090 addrs := 0 1091 ids := 0 1092 future := s.raft.GetConfiguration() 1093 if err := future.Error(); err != nil { 1094 r.Fatal(err) 1095 } 1096 for _, server := range future.Configuration().Servers { 1097 if string(server.ID) == string(server.Address) { 1098 addrs++ 1099 } else { 1100 ids++ 1101 } 1102 } 1103 if got, want := addrs, 0; got != want { 1104 r.Fatalf("got %d server addresses want %d", got, want) 1105 } 1106 if got, want := ids, 3; got != want { 1107 r.Fatalf("got %d server ids want %d", got, want) 1108 } 1109 }) 1110 } 1111 } 1112 1113 func TestLeader_RevokeLeadership_MultipleTimes(t *testing.T) { 1114 s1 := TestServer(t, nil) 1115 defer s1.Shutdown() 1116 testutil.WaitForLeader(t, s1.RPC) 1117 1118 testutil.WaitForResult(func() (bool, error) { 1119 return s1.evalBroker.Enabled(), nil 1120 }, func(err error) { 1121 t.Fatalf("should have finished establish leader loop") 1122 }) 1123 1124 require.Nil(t, s1.revokeLeadership()) 1125 require.Nil(t, s1.revokeLeadership()) 1126 require.Nil(t, s1.revokeLeadership()) 1127 } 1128 1129 // Test doing an inplace upgrade on a server from raft protocol 2 to 3 1130 // This verifies that removing the server and adding it back with a uuid works 1131 // even if the server's address stays the same. 1132 func TestServer_ReconcileMember(t *testing.T) { 1133 // Create a three node cluster 1134 t.Parallel() 1135 s1 := TestServer(t, func(c *Config) { 1136 c.DevDisableBootstrap = true 1137 c.RaftConfig.ProtocolVersion = 3 1138 }) 1139 defer s1.Shutdown() 1140 1141 s2 := TestServer(t, func(c *Config) { 1142 c.DevDisableBootstrap = true 1143 c.RaftConfig.ProtocolVersion = 3 1144 }) 1145 defer s2.Shutdown() 1146 1147 s3 := TestServer(t, func(c *Config) { 1148 c.DevDisableBootstrap = true 1149 c.RaftConfig.ProtocolVersion = 2 1150 }) 1151 defer s3.Shutdown() 1152 TestJoin(t, s1, s2, s3) 1153 testutil.WaitForLeader(t, s1.RPC) 1154 1155 // Create a memberlist object for s3, with raft protocol upgraded to 3 1156 upgradedS3Member := serf.Member{ 1157 Name: s3.config.NodeName, 1158 Addr: s3.config.RPCAddr.IP, 1159 Status: serf.StatusAlive, 1160 Tags: make(map[string]string), 1161 } 1162 upgradedS3Member.Tags["role"] = "nomad" 1163 upgradedS3Member.Tags["id"] = s3.config.NodeID 1164 upgradedS3Member.Tags["region"] = s3.config.Region 1165 upgradedS3Member.Tags["dc"] = s3.config.Datacenter 1166 upgradedS3Member.Tags["rpc_addr"] = "127.0.0.1" 1167 upgradedS3Member.Tags["port"] = strconv.Itoa(s3.config.RPCAddr.Port) 1168 upgradedS3Member.Tags["build"] = "0.8.0" 1169 upgradedS3Member.Tags["vsn"] = "2" 1170 upgradedS3Member.Tags["mvn"] = "1" 1171 upgradedS3Member.Tags["raft_vsn"] = "3" 1172 1173 // Find the leader so that we can call reconcile member on it 1174 var leader *Server 1175 for _, s := range []*Server{s1, s2, s3} { 1176 if s.IsLeader() { 1177 leader = s 1178 } 1179 } 1180 leader.reconcileMember(upgradedS3Member) 1181 // This should remove s3 from the config and potentially cause a leader election 1182 testutil.WaitForLeader(t, s1.RPC) 1183 1184 // Figure out the new leader and call reconcile again, this should add s3 with the new ID format 1185 for _, s := range []*Server{s1, s2, s3} { 1186 if s.IsLeader() { 1187 leader = s 1188 } 1189 } 1190 leader.reconcileMember(upgradedS3Member) 1191 testutil.WaitForLeader(t, s1.RPC) 1192 future := s2.raft.GetConfiguration() 1193 if err := future.Error(); err != nil { 1194 t.Fatal(err) 1195 } 1196 addrs := 0 1197 ids := 0 1198 for _, server := range future.Configuration().Servers { 1199 if string(server.ID) == string(server.Address) { 1200 addrs++ 1201 } else { 1202 ids++ 1203 } 1204 } 1205 // After this, all three servers should have IDs in raft 1206 if got, want := addrs, 0; got != want { 1207 t.Fatalf("got %d server addresses want %d", got, want) 1208 } 1209 if got, want := ids, 3; got != want { 1210 t.Fatalf("got %d server ids want %d", got, want) 1211 } 1212 }