github.com/hspak/nomad@v0.7.2-0.20180309000617-bc4ae22a39a5/nomad/leader_test.go (about) 1 package nomad 2 3 import ( 4 "errors" 5 "fmt" 6 "testing" 7 "time" 8 9 "github.com/hashicorp/consul/testutil/retry" 10 memdb "github.com/hashicorp/go-memdb" 11 "github.com/hashicorp/nomad/nomad/mock" 12 "github.com/hashicorp/nomad/nomad/state" 13 "github.com/hashicorp/nomad/nomad/structs" 14 "github.com/hashicorp/nomad/testutil" 15 "github.com/stretchr/testify/assert" 16 ) 17 18 func TestLeader_LeftServer(t *testing.T) { 19 s1 := TestServer(t, nil) 20 defer s1.Shutdown() 21 22 s2 := TestServer(t, func(c *Config) { 23 c.DevDisableBootstrap = true 24 }) 25 defer s2.Shutdown() 26 27 s3 := TestServer(t, func(c *Config) { 28 c.DevDisableBootstrap = true 29 }) 30 defer s3.Shutdown() 31 servers := []*Server{s1, s2, s3} 32 TestJoin(t, s1, s2, s3) 33 34 for _, s := range servers { 35 testutil.WaitForResult(func() (bool, error) { 36 peers, _ := s.numPeers() 37 return peers == 3, nil 38 }, func(err error) { 39 t.Fatalf("should have 3 peers") 40 }) 41 } 42 43 // Kill any server 44 var peer *Server 45 for _, s := range servers { 46 if !s.IsLeader() { 47 peer = s 48 break 49 } 50 } 51 if peer == nil { 52 t.Fatalf("Should have a non-leader") 53 } 54 peer.Shutdown() 55 name := fmt.Sprintf("%s.%s", peer.config.NodeName, peer.config.Region) 56 57 testutil.WaitForResult(func() (bool, error) { 58 for _, s := range servers { 59 if s == peer { 60 continue 61 } 62 63 // Force remove the non-leader (transition to left state) 64 if err := s.RemoveFailedNode(name); err != nil { 65 return false, err 66 } 67 68 peers, _ := s.numPeers() 69 return peers == 2, errors.New(fmt.Sprintf("%v", peers)) 70 } 71 72 return true, nil 73 }, func(err error) { 74 t.Fatalf("err: %s", err) 75 }) 76 } 77 78 func TestLeader_LeftLeader(t *testing.T) { 79 s1 := TestServer(t, nil) 80 defer s1.Shutdown() 81 82 s2 := TestServer(t, func(c *Config) { 83 c.DevDisableBootstrap = true 84 }) 85 defer s2.Shutdown() 86 87 s3 := TestServer(t, func(c *Config) { 88 c.DevDisableBootstrap = true 89 }) 90 defer s3.Shutdown() 91 servers := []*Server{s1, s2, s3} 92 TestJoin(t, s1, s2, s3) 93 94 for _, s := range servers { 95 testutil.WaitForResult(func() (bool, error) { 96 peers, _ := s.numPeers() 97 return peers == 3, nil 98 }, func(err error) { 99 t.Fatalf("should have 3 peers") 100 }) 101 } 102 103 // Kill the leader! 104 var leader *Server 105 for _, s := range servers { 106 if s.IsLeader() { 107 leader = s 108 break 109 } 110 } 111 if leader == nil { 112 t.Fatalf("Should have a leader") 113 } 114 leader.Leave() 115 leader.Shutdown() 116 117 for _, s := range servers { 118 if s == leader { 119 continue 120 } 121 testutil.WaitForResult(func() (bool, error) { 122 peers, _ := s.numPeers() 123 return peers == 2, errors.New(fmt.Sprintf("%v", peers)) 124 }, func(err error) { 125 t.Fatalf("should have 2 peers: %v", err) 126 }) 127 } 128 } 129 130 func TestLeader_MultiBootstrap(t *testing.T) { 131 s1 := TestServer(t, nil) 132 defer s1.Shutdown() 133 134 s2 := TestServer(t, nil) 135 defer s2.Shutdown() 136 servers := []*Server{s1, s2} 137 TestJoin(t, s1, s2) 138 139 for _, s := range servers { 140 testutil.WaitForResult(func() (bool, error) { 141 peers := s.Members() 142 return len(peers) == 2, nil 143 }, func(err error) { 144 t.Fatalf("should have 2 peers") 145 }) 146 } 147 148 // Ensure we don't have multiple raft peers 149 for _, s := range servers { 150 peers, _ := s.numPeers() 151 if peers != 1 { 152 t.Fatalf("should only have 1 raft peer!") 153 } 154 } 155 } 156 157 func TestLeader_PlanQueue_Reset(t *testing.T) { 158 s1 := TestServer(t, nil) 159 defer s1.Shutdown() 160 161 s2 := TestServer(t, func(c *Config) { 162 c.DevDisableBootstrap = true 163 }) 164 defer s2.Shutdown() 165 166 s3 := TestServer(t, func(c *Config) { 167 c.DevDisableBootstrap = true 168 }) 169 defer s3.Shutdown() 170 servers := []*Server{s1, s2, s3} 171 TestJoin(t, s1, s2, s3) 172 173 for _, s := range servers { 174 testutil.WaitForResult(func() (bool, error) { 175 peers, _ := s.numPeers() 176 return peers == 3, nil 177 }, func(err error) { 178 t.Fatalf("should have 3 peers") 179 }) 180 } 181 182 var leader *Server 183 for _, s := range servers { 184 if s.IsLeader() { 185 leader = s 186 break 187 } 188 } 189 if leader == nil { 190 t.Fatalf("Should have a leader") 191 } 192 193 if !leader.planQueue.Enabled() { 194 t.Fatalf("should enable plan queue") 195 } 196 197 for _, s := range servers { 198 if !s.IsLeader() && s.planQueue.Enabled() { 199 t.Fatalf("plan queue should not be enabled") 200 } 201 } 202 203 // Kill the leader 204 leader.Shutdown() 205 time.Sleep(100 * time.Millisecond) 206 207 // Wait for a new leader 208 leader = nil 209 testutil.WaitForResult(func() (bool, error) { 210 for _, s := range servers { 211 if s.IsLeader() { 212 leader = s 213 return true, nil 214 } 215 } 216 return false, nil 217 }, func(err error) { 218 t.Fatalf("should have leader") 219 }) 220 221 // Check that the new leader has a pending GC expiration 222 testutil.WaitForResult(func() (bool, error) { 223 return leader.planQueue.Enabled(), nil 224 }, func(err error) { 225 t.Fatalf("should enable plan queue") 226 }) 227 } 228 229 func TestLeader_EvalBroker_Reset(t *testing.T) { 230 s1 := TestServer(t, func(c *Config) { 231 c.NumSchedulers = 0 232 }) 233 defer s1.Shutdown() 234 235 s2 := TestServer(t, func(c *Config) { 236 c.NumSchedulers = 0 237 c.DevDisableBootstrap = true 238 }) 239 defer s2.Shutdown() 240 241 s3 := TestServer(t, func(c *Config) { 242 c.NumSchedulers = 0 243 c.DevDisableBootstrap = true 244 }) 245 defer s3.Shutdown() 246 servers := []*Server{s1, s2, s3} 247 TestJoin(t, s1, s2, s3) 248 testutil.WaitForLeader(t, s1.RPC) 249 250 for _, s := range servers { 251 testutil.WaitForResult(func() (bool, error) { 252 peers, _ := s.numPeers() 253 return peers == 3, nil 254 }, func(err error) { 255 t.Fatalf("should have 3 peers") 256 }) 257 } 258 259 var leader *Server 260 for _, s := range servers { 261 if s.IsLeader() { 262 leader = s 263 break 264 } 265 } 266 if leader == nil { 267 t.Fatalf("Should have a leader") 268 } 269 270 // Inject a pending eval 271 req := structs.EvalUpdateRequest{ 272 Evals: []*structs.Evaluation{mock.Eval()}, 273 } 274 _, _, err := leader.raftApply(structs.EvalUpdateRequestType, req) 275 if err != nil { 276 t.Fatalf("err: %v", err) 277 } 278 279 // Kill the leader 280 leader.Shutdown() 281 time.Sleep(100 * time.Millisecond) 282 283 // Wait for a new leader 284 leader = nil 285 testutil.WaitForResult(func() (bool, error) { 286 for _, s := range servers { 287 if s.IsLeader() { 288 leader = s 289 return true, nil 290 } 291 } 292 return false, nil 293 }, func(err error) { 294 t.Fatalf("should have leader") 295 }) 296 297 // Check that the new leader has a pending evaluation 298 testutil.WaitForResult(func() (bool, error) { 299 stats := leader.evalBroker.Stats() 300 return stats.TotalReady == 1, nil 301 }, func(err error) { 302 t.Fatalf("should have pending evaluation") 303 }) 304 } 305 306 func TestLeader_PeriodicDispatcher_Restore_Adds(t *testing.T) { 307 s1 := TestServer(t, func(c *Config) { 308 c.NumSchedulers = 0 309 }) 310 defer s1.Shutdown() 311 312 s2 := TestServer(t, func(c *Config) { 313 c.NumSchedulers = 0 314 c.DevDisableBootstrap = true 315 }) 316 defer s2.Shutdown() 317 318 s3 := TestServer(t, func(c *Config) { 319 c.NumSchedulers = 0 320 c.DevDisableBootstrap = true 321 }) 322 defer s3.Shutdown() 323 servers := []*Server{s1, s2, s3} 324 TestJoin(t, s1, s2, s3) 325 testutil.WaitForLeader(t, s1.RPC) 326 327 for _, s := range servers { 328 testutil.WaitForResult(func() (bool, error) { 329 peers, _ := s.numPeers() 330 return peers == 3, nil 331 }, func(err error) { 332 t.Fatalf("should have 3 peers") 333 }) 334 } 335 336 var leader *Server 337 for _, s := range servers { 338 if s.IsLeader() { 339 leader = s 340 break 341 } 342 } 343 if leader == nil { 344 t.Fatalf("Should have a leader") 345 } 346 347 // Inject a periodic job, a parameterized periodic job and a non-periodic job 348 periodic := mock.PeriodicJob() 349 nonPeriodic := mock.Job() 350 parameterizedPeriodic := mock.PeriodicJob() 351 parameterizedPeriodic.ParameterizedJob = &structs.ParameterizedJobConfig{} 352 for _, job := range []*structs.Job{nonPeriodic, periodic, parameterizedPeriodic} { 353 req := structs.JobRegisterRequest{ 354 Job: job, 355 WriteRequest: structs.WriteRequest{ 356 Namespace: job.Namespace, 357 }, 358 } 359 _, _, err := leader.raftApply(structs.JobRegisterRequestType, req) 360 if err != nil { 361 t.Fatalf("err: %v", err) 362 } 363 } 364 365 // Kill the leader 366 leader.Shutdown() 367 time.Sleep(100 * time.Millisecond) 368 369 // Wait for a new leader 370 leader = nil 371 testutil.WaitForResult(func() (bool, error) { 372 for _, s := range servers { 373 if s.IsLeader() { 374 leader = s 375 return true, nil 376 } 377 } 378 return false, nil 379 }, func(err error) { 380 t.Fatalf("should have leader") 381 }) 382 383 tuplePeriodic := structs.NamespacedID{ 384 ID: periodic.ID, 385 Namespace: periodic.Namespace, 386 } 387 tupleNonPeriodic := structs.NamespacedID{ 388 ID: nonPeriodic.ID, 389 Namespace: nonPeriodic.Namespace, 390 } 391 tupleParameterized := structs.NamespacedID{ 392 ID: parameterizedPeriodic.ID, 393 Namespace: parameterizedPeriodic.Namespace, 394 } 395 396 // Check that the new leader is tracking the periodic job only 397 testutil.WaitForResult(func() (bool, error) { 398 if _, tracked := leader.periodicDispatcher.tracked[tuplePeriodic]; !tracked { 399 return false, fmt.Errorf("periodic job not tracked") 400 } 401 if _, tracked := leader.periodicDispatcher.tracked[tupleNonPeriodic]; tracked { 402 return false, fmt.Errorf("non periodic job tracked") 403 } 404 if _, tracked := leader.periodicDispatcher.tracked[tupleParameterized]; tracked { 405 return false, fmt.Errorf("parameterized periodic job tracked") 406 } 407 return true, nil 408 }, func(err error) { 409 t.Fatalf(err.Error()) 410 }) 411 } 412 413 func TestLeader_PeriodicDispatcher_Restore_NoEvals(t *testing.T) { 414 s1 := TestServer(t, func(c *Config) { 415 c.NumSchedulers = 0 416 }) 417 defer s1.Shutdown() 418 testutil.WaitForLeader(t, s1.RPC) 419 420 // Inject a periodic job that will be triggered soon. 421 launch := time.Now().Add(1 * time.Second) 422 job := testPeriodicJob(launch) 423 req := structs.JobRegisterRequest{ 424 Job: job, 425 WriteRequest: structs.WriteRequest{ 426 Namespace: job.Namespace, 427 }, 428 } 429 _, _, err := s1.raftApply(structs.JobRegisterRequestType, req) 430 if err != nil { 431 t.Fatalf("err: %v", err) 432 } 433 434 // Flush the periodic dispatcher, ensuring that no evals will be created. 435 s1.periodicDispatcher.SetEnabled(false) 436 437 // Get the current time to ensure the launch time is after this once we 438 // restore. 439 now := time.Now() 440 441 // Sleep till after the job should have been launched. 442 time.Sleep(3 * time.Second) 443 444 // Restore the periodic dispatcher. 445 s1.periodicDispatcher.SetEnabled(true) 446 s1.restorePeriodicDispatcher() 447 448 // Ensure the job is tracked. 449 tuple := structs.NamespacedID{ 450 ID: job.ID, 451 Namespace: job.Namespace, 452 } 453 if _, tracked := s1.periodicDispatcher.tracked[tuple]; !tracked { 454 t.Fatalf("periodic job not restored") 455 } 456 457 // Check that an eval was made. 458 ws := memdb.NewWatchSet() 459 last, err := s1.fsm.State().PeriodicLaunchByID(ws, job.Namespace, job.ID) 460 if err != nil || last == nil { 461 t.Fatalf("failed to get periodic launch time: %v", err) 462 } 463 464 if last.Launch.Before(now) { 465 t.Fatalf("restorePeriodicDispatcher did not force launch: last %v; want after %v", last.Launch, now) 466 } 467 } 468 469 func TestLeader_PeriodicDispatcher_Restore_Evals(t *testing.T) { 470 s1 := TestServer(t, func(c *Config) { 471 c.NumSchedulers = 0 472 }) 473 defer s1.Shutdown() 474 testutil.WaitForLeader(t, s1.RPC) 475 476 // Inject a periodic job that triggered once in the past, should trigger now 477 // and once in the future. 478 now := time.Now() 479 past := now.Add(-1 * time.Second) 480 future := now.Add(10 * time.Second) 481 job := testPeriodicJob(past, now, future) 482 req := structs.JobRegisterRequest{ 483 Job: job, 484 WriteRequest: structs.WriteRequest{ 485 Namespace: job.Namespace, 486 }, 487 } 488 _, _, err := s1.raftApply(structs.JobRegisterRequestType, req) 489 if err != nil { 490 t.Fatalf("err: %v", err) 491 } 492 493 // Create an eval for the past launch. 494 s1.periodicDispatcher.createEval(job, past) 495 496 // Flush the periodic dispatcher, ensuring that no evals will be created. 497 s1.periodicDispatcher.SetEnabled(false) 498 499 // Sleep till after the job should have been launched. 500 time.Sleep(3 * time.Second) 501 502 // Restore the periodic dispatcher. 503 s1.periodicDispatcher.SetEnabled(true) 504 s1.restorePeriodicDispatcher() 505 506 // Ensure the job is tracked. 507 tuple := structs.NamespacedID{ 508 ID: job.ID, 509 Namespace: job.Namespace, 510 } 511 if _, tracked := s1.periodicDispatcher.tracked[tuple]; !tracked { 512 t.Fatalf("periodic job not restored") 513 } 514 515 // Check that an eval was made. 516 ws := memdb.NewWatchSet() 517 last, err := s1.fsm.State().PeriodicLaunchByID(ws, job.Namespace, job.ID) 518 if err != nil || last == nil { 519 t.Fatalf("failed to get periodic launch time: %v", err) 520 } 521 if last.Launch == past { 522 t.Fatalf("restorePeriodicDispatcher did not force launch") 523 } 524 } 525 526 func TestLeader_PeriodicDispatch(t *testing.T) { 527 s1 := TestServer(t, func(c *Config) { 528 c.NumSchedulers = 0 529 c.EvalGCInterval = 5 * time.Millisecond 530 }) 531 defer s1.Shutdown() 532 533 // Wait for a periodic dispatch 534 testutil.WaitForResult(func() (bool, error) { 535 stats := s1.evalBroker.Stats() 536 bySched, ok := stats.ByScheduler[structs.JobTypeCore] 537 if !ok { 538 return false, nil 539 } 540 return bySched.Ready > 0, nil 541 }, func(err error) { 542 t.Fatalf("should pending job") 543 }) 544 } 545 546 func TestLeader_ReapFailedEval(t *testing.T) { 547 s1 := TestServer(t, func(c *Config) { 548 c.NumSchedulers = 0 549 c.EvalDeliveryLimit = 1 550 }) 551 defer s1.Shutdown() 552 testutil.WaitForLeader(t, s1.RPC) 553 554 // Wait for a periodic dispatch 555 eval := mock.Eval() 556 s1.evalBroker.Enqueue(eval) 557 558 // Dequeue and Nack 559 out, token, err := s1.evalBroker.Dequeue(defaultSched, time.Second) 560 if err != nil { 561 t.Fatalf("err: %v", err) 562 } 563 s1.evalBroker.Nack(out.ID, token) 564 565 // Wait for an updated and followup evaluation 566 state := s1.fsm.State() 567 testutil.WaitForResult(func() (bool, error) { 568 ws := memdb.NewWatchSet() 569 out, err := state.EvalByID(ws, eval.ID) 570 if err != nil { 571 return false, err 572 } 573 if out == nil { 574 return false, fmt.Errorf("expect original evaluation to exist") 575 } 576 if out.Status != structs.EvalStatusFailed { 577 return false, fmt.Errorf("got status %v; want %v", out.Status, structs.EvalStatusFailed) 578 } 579 580 // See if there is a followup 581 evals, err := state.EvalsByJob(ws, eval.Namespace, eval.JobID) 582 if err != nil { 583 return false, err 584 } 585 586 if l := len(evals); l != 2 { 587 return false, fmt.Errorf("got %d evals, want 2", l) 588 } 589 590 for _, e := range evals { 591 if e.ID == eval.ID { 592 continue 593 } 594 595 if e.Status != structs.EvalStatusPending { 596 return false, fmt.Errorf("follow up eval has status %v; want %v", 597 e.Status, structs.EvalStatusPending) 598 } 599 600 if e.Wait < s1.config.EvalFailedFollowupBaselineDelay || 601 e.Wait > s1.config.EvalFailedFollowupBaselineDelay+s1.config.EvalFailedFollowupDelayRange { 602 return false, fmt.Errorf("bad wait: %v", e.Wait) 603 } 604 605 if e.TriggeredBy != structs.EvalTriggerFailedFollowUp { 606 return false, fmt.Errorf("follow up eval TriggeredBy %v; want %v", 607 e.TriggeredBy, structs.EvalTriggerFailedFollowUp) 608 } 609 } 610 611 return true, nil 612 }, func(err error) { 613 t.Fatalf("err: %v", err) 614 }) 615 } 616 617 func TestLeader_ReapDuplicateEval(t *testing.T) { 618 s1 := TestServer(t, func(c *Config) { 619 c.NumSchedulers = 0 620 }) 621 defer s1.Shutdown() 622 testutil.WaitForLeader(t, s1.RPC) 623 624 // Create a duplicate blocked eval 625 eval := mock.Eval() 626 eval2 := mock.Eval() 627 eval2.JobID = eval.JobID 628 s1.blockedEvals.Block(eval) 629 s1.blockedEvals.Block(eval2) 630 631 // Wait for the evaluation to marked as cancelled 632 state := s1.fsm.State() 633 testutil.WaitForResult(func() (bool, error) { 634 ws := memdb.NewWatchSet() 635 out, err := state.EvalByID(ws, eval2.ID) 636 if err != nil { 637 return false, err 638 } 639 return out != nil && out.Status == structs.EvalStatusCancelled, nil 640 }, func(err error) { 641 t.Fatalf("err: %v", err) 642 }) 643 } 644 645 func TestLeader_RestoreVaultAccessors(t *testing.T) { 646 s1 := TestServer(t, func(c *Config) { 647 c.NumSchedulers = 0 648 }) 649 defer s1.Shutdown() 650 testutil.WaitForLeader(t, s1.RPC) 651 652 // Insert a vault accessor that should be revoked 653 state := s1.fsm.State() 654 va := mock.VaultAccessor() 655 if err := state.UpsertVaultAccessor(100, []*structs.VaultAccessor{va}); err != nil { 656 t.Fatalf("bad: %v", err) 657 } 658 659 // Swap the Vault client 660 tvc := &TestVaultClient{} 661 s1.vault = tvc 662 663 // Do a restore 664 if err := s1.restoreRevokingAccessors(); err != nil { 665 t.Fatalf("Failed to restore: %v", err) 666 } 667 668 if len(tvc.RevokedTokens) != 1 && tvc.RevokedTokens[0].Accessor != va.Accessor { 669 t.Fatalf("Bad revoked accessors: %v", tvc.RevokedTokens) 670 } 671 } 672 673 func TestLeader_ReplicateACLPolicies(t *testing.T) { 674 t.Parallel() 675 s1, root := TestACLServer(t, func(c *Config) { 676 c.Region = "region1" 677 c.AuthoritativeRegion = "region1" 678 c.ACLEnabled = true 679 }) 680 defer s1.Shutdown() 681 s2, _ := TestACLServer(t, func(c *Config) { 682 c.Region = "region2" 683 c.AuthoritativeRegion = "region1" 684 c.ACLEnabled = true 685 c.ReplicationBackoff = 20 * time.Millisecond 686 c.ReplicationToken = root.SecretID 687 }) 688 defer s2.Shutdown() 689 TestJoin(t, s1, s2) 690 testutil.WaitForLeader(t, s1.RPC) 691 testutil.WaitForLeader(t, s2.RPC) 692 693 // Write a policy to the authoritative region 694 p1 := mock.ACLPolicy() 695 if err := s1.State().UpsertACLPolicies(100, []*structs.ACLPolicy{p1}); err != nil { 696 t.Fatalf("bad: %v", err) 697 } 698 699 // Wait for the policy to replicate 700 testutil.WaitForResult(func() (bool, error) { 701 state := s2.State() 702 out, err := state.ACLPolicyByName(nil, p1.Name) 703 return out != nil, err 704 }, func(err error) { 705 t.Fatalf("should replicate policy") 706 }) 707 } 708 709 func TestLeader_DiffACLPolicies(t *testing.T) { 710 t.Parallel() 711 712 state := state.TestStateStore(t) 713 714 // Populate the local state 715 p1 := mock.ACLPolicy() 716 p2 := mock.ACLPolicy() 717 p3 := mock.ACLPolicy() 718 assert.Nil(t, state.UpsertACLPolicies(100, []*structs.ACLPolicy{p1, p2, p3})) 719 720 // Simulate a remote list 721 p2Stub := p2.Stub() 722 p2Stub.ModifyIndex = 50 // Ignored, same index 723 p3Stub := p3.Stub() 724 p3Stub.ModifyIndex = 100 // Updated, higher index 725 p3Stub.Hash = []byte{0, 1, 2, 3} 726 p4 := mock.ACLPolicy() 727 remoteList := []*structs.ACLPolicyListStub{ 728 p2Stub, 729 p3Stub, 730 p4.Stub(), 731 } 732 delete, update := diffACLPolicies(state, 50, remoteList) 733 734 // P1 does not exist on the remote side, should delete 735 assert.Equal(t, []string{p1.Name}, delete) 736 737 // P2 is un-modified - ignore. P3 modified, P4 new. 738 assert.Equal(t, []string{p3.Name, p4.Name}, update) 739 } 740 741 func TestLeader_ReplicateACLTokens(t *testing.T) { 742 t.Parallel() 743 s1, root := TestACLServer(t, func(c *Config) { 744 c.Region = "region1" 745 c.AuthoritativeRegion = "region1" 746 c.ACLEnabled = true 747 }) 748 defer s1.Shutdown() 749 s2, _ := TestACLServer(t, func(c *Config) { 750 c.Region = "region2" 751 c.AuthoritativeRegion = "region1" 752 c.ACLEnabled = true 753 c.ReplicationBackoff = 20 * time.Millisecond 754 c.ReplicationToken = root.SecretID 755 }) 756 defer s2.Shutdown() 757 TestJoin(t, s1, s2) 758 testutil.WaitForLeader(t, s1.RPC) 759 testutil.WaitForLeader(t, s2.RPC) 760 761 // Write a token to the authoritative region 762 p1 := mock.ACLToken() 763 p1.Global = true 764 if err := s1.State().UpsertACLTokens(100, []*structs.ACLToken{p1}); err != nil { 765 t.Fatalf("bad: %v", err) 766 } 767 768 // Wait for the token to replicate 769 testutil.WaitForResult(func() (bool, error) { 770 state := s2.State() 771 out, err := state.ACLTokenByAccessorID(nil, p1.AccessorID) 772 return out != nil, err 773 }, func(err error) { 774 t.Fatalf("should replicate token") 775 }) 776 } 777 778 func TestLeader_DiffACLTokens(t *testing.T) { 779 t.Parallel() 780 781 state := state.TestStateStore(t) 782 783 // Populate the local state 784 p0 := mock.ACLToken() 785 p1 := mock.ACLToken() 786 p1.Global = true 787 p2 := mock.ACLToken() 788 p2.Global = true 789 p3 := mock.ACLToken() 790 p3.Global = true 791 assert.Nil(t, state.UpsertACLTokens(100, []*structs.ACLToken{p0, p1, p2, p3})) 792 793 // Simulate a remote list 794 p2Stub := p2.Stub() 795 p2Stub.ModifyIndex = 50 // Ignored, same index 796 p3Stub := p3.Stub() 797 p3Stub.ModifyIndex = 100 // Updated, higher index 798 p3Stub.Hash = []byte{0, 1, 2, 3} 799 p4 := mock.ACLToken() 800 p4.Global = true 801 remoteList := []*structs.ACLTokenListStub{ 802 p2Stub, 803 p3Stub, 804 p4.Stub(), 805 } 806 delete, update := diffACLTokens(state, 50, remoteList) 807 808 // P0 is local and should be ignored 809 // P1 does not exist on the remote side, should delete 810 assert.Equal(t, []string{p1.AccessorID}, delete) 811 812 // P2 is un-modified - ignore. P3 modified, P4 new. 813 assert.Equal(t, []string{p3.AccessorID, p4.AccessorID}, update) 814 } 815 816 func TestLeader_UpgradeRaftVersion(t *testing.T) { 817 t.Parallel() 818 s1 := TestServer(t, func(c *Config) { 819 c.Datacenter = "dc1" 820 c.RaftConfig.ProtocolVersion = 2 821 }) 822 defer s1.Shutdown() 823 824 s2 := TestServer(t, func(c *Config) { 825 c.DevDisableBootstrap = true 826 c.RaftConfig.ProtocolVersion = 1 827 }) 828 defer s2.Shutdown() 829 830 s3 := TestServer(t, func(c *Config) { 831 c.DevDisableBootstrap = true 832 c.RaftConfig.ProtocolVersion = 2 833 }) 834 defer s3.Shutdown() 835 836 servers := []*Server{s1, s2, s3} 837 838 // Try to join 839 TestJoin(t, s1, s2, s3) 840 841 for _, s := range servers { 842 testutil.WaitForResult(func() (bool, error) { 843 peers, _ := s.numPeers() 844 return peers == 3, nil 845 }, func(err error) { 846 t.Fatalf("should have 3 peers") 847 }) 848 } 849 850 // Kill the v1 server 851 if err := s2.Leave(); err != nil { 852 t.Fatal(err) 853 } 854 855 for _, s := range []*Server{s1, s3} { 856 minVer, err := s.autopilot.MinRaftProtocol() 857 if err != nil { 858 t.Fatal(err) 859 } 860 if got, want := minVer, 2; got != want { 861 t.Fatalf("got min raft version %d want %d", got, want) 862 } 863 } 864 865 // Replace the dead server with one running raft protocol v3 866 s4 := TestServer(t, func(c *Config) { 867 c.DevDisableBootstrap = true 868 c.Datacenter = "dc1" 869 c.RaftConfig.ProtocolVersion = 3 870 }) 871 defer s4.Shutdown() 872 TestJoin(t, s1, s4) 873 servers[1] = s4 874 875 // Make sure we're back to 3 total peers with the new one added via ID 876 for _, s := range servers { 877 testutil.WaitForResult(func() (bool, error) { 878 addrs := 0 879 ids := 0 880 future := s.raft.GetConfiguration() 881 if err := future.Error(); err != nil { 882 return false, err 883 } 884 for _, server := range future.Configuration().Servers { 885 if string(server.ID) == string(server.Address) { 886 addrs++ 887 } else { 888 ids++ 889 } 890 } 891 if got, want := addrs, 2; got != want { 892 return false, fmt.Errorf("got %d server addresses want %d", got, want) 893 } 894 if got, want := ids, 1; got != want { 895 return false, fmt.Errorf("got %d server ids want %d", got, want) 896 } 897 898 return true, nil 899 }, func(err error) { 900 t.Fatal(err) 901 }) 902 } 903 } 904 905 func TestLeader_RollRaftServer(t *testing.T) { 906 t.Parallel() 907 s1 := TestServer(t, func(c *Config) { 908 c.RaftConfig.ProtocolVersion = 2 909 }) 910 defer s1.Shutdown() 911 912 s2 := TestServer(t, func(c *Config) { 913 c.DevDisableBootstrap = true 914 c.RaftConfig.ProtocolVersion = 1 915 }) 916 defer s2.Shutdown() 917 918 s3 := TestServer(t, func(c *Config) { 919 c.DevDisableBootstrap = true 920 c.RaftConfig.ProtocolVersion = 2 921 }) 922 defer s3.Shutdown() 923 924 servers := []*Server{s1, s2, s3} 925 926 // Try to join 927 TestJoin(t, s1, s2, s3) 928 929 for _, s := range servers { 930 retry.Run(t, func(r *retry.R) { r.Check(wantPeers(s, 3)) }) 931 } 932 933 // Kill the v1 server 934 s2.Shutdown() 935 936 for _, s := range []*Server{s1, s3} { 937 retry.Run(t, func(r *retry.R) { 938 minVer, err := s.autopilot.MinRaftProtocol() 939 if err != nil { 940 r.Fatal(err) 941 } 942 if got, want := minVer, 2; got != want { 943 r.Fatalf("got min raft version %d want %d", got, want) 944 } 945 }) 946 } 947 948 // Replace the dead server with one running raft protocol v3 949 s4 := TestServer(t, func(c *Config) { 950 c.DevDisableBootstrap = true 951 c.RaftConfig.ProtocolVersion = 3 952 }) 953 defer s4.Shutdown() 954 TestJoin(t, s4, s1) 955 servers[1] = s4 956 957 // Make sure the dead server is removed and we're back to 3 total peers 958 for _, s := range servers { 959 retry.Run(t, func(r *retry.R) { 960 addrs := 0 961 ids := 0 962 future := s.raft.GetConfiguration() 963 if err := future.Error(); err != nil { 964 r.Fatal(err) 965 } 966 for _, server := range future.Configuration().Servers { 967 if string(server.ID) == string(server.Address) { 968 addrs++ 969 } else { 970 ids++ 971 } 972 } 973 if got, want := addrs, 2; got != want { 974 r.Fatalf("got %d server addresses want %d", got, want) 975 } 976 if got, want := ids, 1; got != want { 977 r.Fatalf("got %d server ids want %d", got, want) 978 } 979 }) 980 } 981 }