github.com/ncodes/nomad@v0.5.7-0.20170403112158-97adf4a74fb3/nomad/leader_test.go (about) 1 package nomad 2 3 import ( 4 "errors" 5 "fmt" 6 "testing" 7 "time" 8 9 memdb "github.com/hashicorp/go-memdb" 10 "github.com/ncodes/nomad/nomad/mock" 11 "github.com/ncodes/nomad/nomad/structs" 12 "github.com/ncodes/nomad/testutil" 13 ) 14 15 func TestLeader_LeftServer(t *testing.T) { 16 s1 := testServer(t, nil) 17 defer s1.Shutdown() 18 19 s2 := testServer(t, func(c *Config) { 20 c.DevDisableBootstrap = true 21 }) 22 defer s2.Shutdown() 23 24 s3 := testServer(t, func(c *Config) { 25 c.DevDisableBootstrap = true 26 }) 27 defer s3.Shutdown() 28 servers := []*Server{s1, s2, s3} 29 testJoin(t, s1, s2, s3) 30 31 for _, s := range servers { 32 testutil.WaitForResult(func() (bool, error) { 33 peers, _ := s.numPeers() 34 return peers == 3, nil 35 }, func(err error) { 36 t.Fatalf("should have 3 peers") 37 }) 38 } 39 40 // Kill any server 41 servers[0].Shutdown() 42 43 testutil.WaitForResult(func() (bool, error) { 44 // Force remove the non-leader (transition to left state) 45 name := fmt.Sprintf("%s.%s", 46 servers[0].config.NodeName, servers[0].config.Region) 47 if err := servers[1].RemoveFailedNode(name); err != nil { 48 t.Fatalf("err: %v", err) 49 } 50 51 for _, s := range servers[1:] { 52 peers, _ := s.numPeers() 53 return peers == 2, errors.New(fmt.Sprintf("%v", peers)) 54 } 55 56 return true, nil 57 }, func(err error) { 58 t.Fatalf("err: %s", err) 59 }) 60 } 61 62 func TestLeader_LeftLeader(t *testing.T) { 63 s1 := testServer(t, nil) 64 defer s1.Shutdown() 65 66 s2 := testServer(t, func(c *Config) { 67 c.DevDisableBootstrap = true 68 }) 69 defer s2.Shutdown() 70 71 s3 := testServer(t, func(c *Config) { 72 c.DevDisableBootstrap = true 73 }) 74 defer s3.Shutdown() 75 servers := []*Server{s1, s2, s3} 76 testJoin(t, s1, s2, s3) 77 78 for _, s := range servers { 79 testutil.WaitForResult(func() (bool, error) { 80 peers, _ := s.numPeers() 81 return peers == 3, nil 82 }, func(err error) { 83 t.Fatalf("should have 3 peers") 84 }) 85 } 86 87 // Kill the leader! 88 var leader *Server 89 for _, s := range servers { 90 if s.IsLeader() { 91 leader = s 92 break 93 } 94 } 95 if leader == nil { 96 t.Fatalf("Should have a leader") 97 } 98 leader.Leave() 99 leader.Shutdown() 100 101 for _, s := range servers { 102 if s == leader { 103 continue 104 } 105 testutil.WaitForResult(func() (bool, error) { 106 peers, _ := s.numPeers() 107 return peers == 2, errors.New(fmt.Sprintf("%v", peers)) 108 }, func(err error) { 109 t.Fatalf("should have 2 peers: %v", err) 110 }) 111 } 112 } 113 114 func TestLeader_MultiBootstrap(t *testing.T) { 115 s1 := testServer(t, nil) 116 defer s1.Shutdown() 117 118 s2 := testServer(t, nil) 119 defer s2.Shutdown() 120 servers := []*Server{s1, s2} 121 testJoin(t, s1, s2) 122 123 for _, s := range servers { 124 testutil.WaitForResult(func() (bool, error) { 125 peers := s.Members() 126 return len(peers) == 2, nil 127 }, func(err error) { 128 t.Fatalf("should have 2 peers") 129 }) 130 } 131 132 // Ensure we don't have multiple raft peers 133 for _, s := range servers { 134 peers, _ := s.numPeers() 135 if peers != 1 { 136 t.Fatalf("should only have 1 raft peer!") 137 } 138 } 139 } 140 141 func TestLeader_PlanQueue_Reset(t *testing.T) { 142 s1 := testServer(t, nil) 143 defer s1.Shutdown() 144 145 s2 := testServer(t, func(c *Config) { 146 c.DevDisableBootstrap = true 147 }) 148 defer s2.Shutdown() 149 150 s3 := testServer(t, func(c *Config) { 151 c.DevDisableBootstrap = true 152 }) 153 defer s3.Shutdown() 154 servers := []*Server{s1, s2, s3} 155 testJoin(t, s1, s2, s3) 156 157 for _, s := range servers { 158 testutil.WaitForResult(func() (bool, error) { 159 peers, _ := s.numPeers() 160 return peers == 3, nil 161 }, func(err error) { 162 t.Fatalf("should have 3 peers") 163 }) 164 } 165 166 var leader *Server 167 for _, s := range servers { 168 if s.IsLeader() { 169 leader = s 170 break 171 } 172 } 173 if leader == nil { 174 t.Fatalf("Should have a leader") 175 } 176 177 if !leader.planQueue.Enabled() { 178 t.Fatalf("should enable plan queue") 179 } 180 181 for _, s := range servers { 182 if !s.IsLeader() && s.planQueue.Enabled() { 183 t.Fatalf("plan queue should not be enabled") 184 } 185 } 186 187 // Kill the leader 188 leader.Shutdown() 189 time.Sleep(100 * time.Millisecond) 190 191 // Wait for a new leader 192 leader = nil 193 testutil.WaitForResult(func() (bool, error) { 194 for _, s := range servers { 195 if s.IsLeader() { 196 leader = s 197 return true, nil 198 } 199 } 200 return false, nil 201 }, func(err error) { 202 t.Fatalf("should have leader") 203 }) 204 205 // Check that the new leader has a pending GC expiration 206 testutil.WaitForResult(func() (bool, error) { 207 return leader.planQueue.Enabled(), nil 208 }, func(err error) { 209 t.Fatalf("should enable plan queue") 210 }) 211 } 212 213 func TestLeader_EvalBroker_Reset(t *testing.T) { 214 s1 := testServer(t, func(c *Config) { 215 c.NumSchedulers = 0 216 }) 217 defer s1.Shutdown() 218 219 s2 := testServer(t, func(c *Config) { 220 c.NumSchedulers = 0 221 c.DevDisableBootstrap = true 222 }) 223 defer s2.Shutdown() 224 225 s3 := testServer(t, func(c *Config) { 226 c.NumSchedulers = 0 227 c.DevDisableBootstrap = true 228 }) 229 defer s3.Shutdown() 230 servers := []*Server{s1, s2, s3} 231 testJoin(t, s1, s2, s3) 232 testutil.WaitForLeader(t, s1.RPC) 233 234 for _, s := range servers { 235 testutil.WaitForResult(func() (bool, error) { 236 peers, _ := s.numPeers() 237 return peers == 3, nil 238 }, func(err error) { 239 t.Fatalf("should have 3 peers") 240 }) 241 } 242 243 var leader *Server 244 for _, s := range servers { 245 if s.IsLeader() { 246 leader = s 247 break 248 } 249 } 250 if leader == nil { 251 t.Fatalf("Should have a leader") 252 } 253 254 // Inject a pending eval 255 req := structs.EvalUpdateRequest{ 256 Evals: []*structs.Evaluation{mock.Eval()}, 257 } 258 _, _, err := leader.raftApply(structs.EvalUpdateRequestType, req) 259 if err != nil { 260 t.Fatalf("err: %v", err) 261 } 262 263 // Kill the leader 264 leader.Shutdown() 265 time.Sleep(100 * time.Millisecond) 266 267 // Wait for a new leader 268 leader = nil 269 testutil.WaitForResult(func() (bool, error) { 270 for _, s := range servers { 271 if s.IsLeader() { 272 leader = s 273 return true, nil 274 } 275 } 276 return false, nil 277 }, func(err error) { 278 t.Fatalf("should have leader") 279 }) 280 281 // Check that the new leader has a pending evaluation 282 testutil.WaitForResult(func() (bool, error) { 283 stats := leader.evalBroker.Stats() 284 return stats.TotalReady == 1, nil 285 }, func(err error) { 286 t.Fatalf("should have pending evaluation") 287 }) 288 } 289 290 func TestLeader_PeriodicDispatcher_Restore_Adds(t *testing.T) { 291 s1 := testServer(t, func(c *Config) { 292 c.NumSchedulers = 0 293 }) 294 defer s1.Shutdown() 295 296 s2 := testServer(t, func(c *Config) { 297 c.NumSchedulers = 0 298 c.DevDisableBootstrap = true 299 }) 300 defer s2.Shutdown() 301 302 s3 := testServer(t, func(c *Config) { 303 c.NumSchedulers = 0 304 c.DevDisableBootstrap = true 305 }) 306 defer s3.Shutdown() 307 servers := []*Server{s1, s2, s3} 308 testJoin(t, s1, s2, s3) 309 testutil.WaitForLeader(t, s1.RPC) 310 311 for _, s := range servers { 312 testutil.WaitForResult(func() (bool, error) { 313 peers, _ := s.numPeers() 314 return peers == 3, nil 315 }, func(err error) { 316 t.Fatalf("should have 3 peers") 317 }) 318 } 319 320 var leader *Server 321 for _, s := range servers { 322 if s.IsLeader() { 323 leader = s 324 break 325 } 326 } 327 if leader == nil { 328 t.Fatalf("Should have a leader") 329 } 330 331 // Inject a periodic job and non-periodic job 332 periodic := mock.PeriodicJob() 333 nonPeriodic := mock.Job() 334 for _, job := range []*structs.Job{nonPeriodic, periodic} { 335 req := structs.JobRegisterRequest{ 336 Job: job, 337 } 338 _, _, err := leader.raftApply(structs.JobRegisterRequestType, req) 339 if err != nil { 340 t.Fatalf("err: %v", err) 341 } 342 } 343 344 // Kill the leader 345 leader.Shutdown() 346 time.Sleep(100 * time.Millisecond) 347 348 // Wait for a new leader 349 leader = nil 350 testutil.WaitForResult(func() (bool, error) { 351 for _, s := range servers { 352 if s.IsLeader() { 353 leader = s 354 return true, nil 355 } 356 } 357 return false, nil 358 }, func(err error) { 359 t.Fatalf("should have leader") 360 }) 361 362 // Check that the new leader is tracking the periodic job. 363 testutil.WaitForResult(func() (bool, error) { 364 _, tracked := leader.periodicDispatcher.tracked[periodic.ID] 365 return tracked, nil 366 }, func(err error) { 367 t.Fatalf("periodic job not tracked") 368 }) 369 } 370 371 func TestLeader_PeriodicDispatcher_Restore_NoEvals(t *testing.T) { 372 s1 := testServer(t, func(c *Config) { 373 c.NumSchedulers = 0 374 }) 375 defer s1.Shutdown() 376 testutil.WaitForLeader(t, s1.RPC) 377 378 // Inject a periodic job that will be triggered soon. 379 launch := time.Now().Add(1 * time.Second) 380 job := testPeriodicJob(launch) 381 req := structs.JobRegisterRequest{ 382 Job: job, 383 } 384 _, _, err := s1.raftApply(structs.JobRegisterRequestType, req) 385 if err != nil { 386 t.Fatalf("err: %v", err) 387 } 388 389 // Flush the periodic dispatcher, ensuring that no evals will be created. 390 s1.periodicDispatcher.SetEnabled(false) 391 392 // Get the current time to ensure the launch time is after this once we 393 // restore. 394 now := time.Now() 395 396 // Sleep till after the job should have been launched. 397 time.Sleep(3 * time.Second) 398 399 // Restore the periodic dispatcher. 400 s1.periodicDispatcher.SetEnabled(true) 401 s1.periodicDispatcher.Start() 402 s1.restorePeriodicDispatcher() 403 404 // Ensure the job is tracked. 405 if _, tracked := s1.periodicDispatcher.tracked[job.ID]; !tracked { 406 t.Fatalf("periodic job not restored") 407 } 408 409 // Check that an eval was made. 410 ws := memdb.NewWatchSet() 411 last, err := s1.fsm.State().PeriodicLaunchByID(ws, job.ID) 412 if err != nil || last == nil { 413 t.Fatalf("failed to get periodic launch time: %v", err) 414 } 415 416 if last.Launch.Before(now) { 417 t.Fatalf("restorePeriodicDispatcher did not force launch: last %v; want after %v", last.Launch, now) 418 } 419 } 420 421 func TestLeader_PeriodicDispatcher_Restore_Evals(t *testing.T) { 422 s1 := testServer(t, func(c *Config) { 423 c.NumSchedulers = 0 424 }) 425 defer s1.Shutdown() 426 testutil.WaitForLeader(t, s1.RPC) 427 428 // Inject a periodic job that triggered once in the past, should trigger now 429 // and once in the future. 430 now := time.Now() 431 past := now.Add(-1 * time.Second) 432 future := now.Add(10 * time.Second) 433 job := testPeriodicJob(past, now, future) 434 req := structs.JobRegisterRequest{ 435 Job: job, 436 } 437 _, _, err := s1.raftApply(structs.JobRegisterRequestType, req) 438 if err != nil { 439 t.Fatalf("err: %v", err) 440 } 441 442 // Create an eval for the past launch. 443 s1.periodicDispatcher.createEval(job, past) 444 445 // Flush the periodic dispatcher, ensuring that no evals will be created. 446 s1.periodicDispatcher.SetEnabled(false) 447 448 // Sleep till after the job should have been launched. 449 time.Sleep(3 * time.Second) 450 451 // Restore the periodic dispatcher. 452 s1.periodicDispatcher.SetEnabled(true) 453 s1.periodicDispatcher.Start() 454 s1.restorePeriodicDispatcher() 455 456 // Ensure the job is tracked. 457 if _, tracked := s1.periodicDispatcher.tracked[job.ID]; !tracked { 458 t.Fatalf("periodic job not restored") 459 } 460 461 // Check that an eval was made. 462 ws := memdb.NewWatchSet() 463 last, err := s1.fsm.State().PeriodicLaunchByID(ws, job.ID) 464 if err != nil || last == nil { 465 t.Fatalf("failed to get periodic launch time: %v", err) 466 } 467 if last.Launch == past { 468 t.Fatalf("restorePeriodicDispatcher did not force launch") 469 } 470 } 471 472 func TestLeader_PeriodicDispatch(t *testing.T) { 473 s1 := testServer(t, func(c *Config) { 474 c.NumSchedulers = 0 475 c.EvalGCInterval = 5 * time.Millisecond 476 }) 477 defer s1.Shutdown() 478 479 // Wait for a periodic dispatch 480 testutil.WaitForResult(func() (bool, error) { 481 stats := s1.evalBroker.Stats() 482 bySched, ok := stats.ByScheduler[structs.JobTypeCore] 483 if !ok { 484 return false, nil 485 } 486 return bySched.Ready > 0, nil 487 }, func(err error) { 488 t.Fatalf("should pending job") 489 }) 490 } 491 492 func TestLeader_ReapFailedEval(t *testing.T) { 493 s1 := testServer(t, func(c *Config) { 494 c.NumSchedulers = 0 495 c.EvalDeliveryLimit = 1 496 }) 497 defer s1.Shutdown() 498 testutil.WaitForLeader(t, s1.RPC) 499 500 // Wait for a periodic dispatch 501 eval := mock.Eval() 502 s1.evalBroker.Enqueue(eval) 503 504 // Dequeue and Nack 505 out, token, err := s1.evalBroker.Dequeue(defaultSched, time.Second) 506 if err != nil { 507 t.Fatalf("err: %v", err) 508 } 509 s1.evalBroker.Nack(out.ID, token) 510 511 // Wait updated evaluation 512 state := s1.fsm.State() 513 testutil.WaitForResult(func() (bool, error) { 514 ws := memdb.NewWatchSet() 515 out, err := state.EvalByID(ws, eval.ID) 516 if err != nil { 517 return false, err 518 } 519 return out != nil && out.Status == structs.EvalStatusFailed, nil 520 }, func(err error) { 521 t.Fatalf("err: %v", err) 522 }) 523 } 524 525 func TestLeader_ReapDuplicateEval(t *testing.T) { 526 s1 := testServer(t, func(c *Config) { 527 c.NumSchedulers = 0 528 }) 529 defer s1.Shutdown() 530 testutil.WaitForLeader(t, s1.RPC) 531 532 // Create a duplicate blocked eval 533 eval := mock.Eval() 534 eval2 := mock.Eval() 535 eval2.JobID = eval.JobID 536 s1.blockedEvals.Block(eval) 537 s1.blockedEvals.Block(eval2) 538 539 // Wait for the evaluation to marked as cancelled 540 state := s1.fsm.State() 541 testutil.WaitForResult(func() (bool, error) { 542 ws := memdb.NewWatchSet() 543 out, err := state.EvalByID(ws, eval2.ID) 544 if err != nil { 545 return false, err 546 } 547 return out != nil && out.Status == structs.EvalStatusCancelled, nil 548 }, func(err error) { 549 t.Fatalf("err: %v", err) 550 }) 551 } 552 553 func TestLeader_RestoreVaultAccessors(t *testing.T) { 554 s1 := testServer(t, func(c *Config) { 555 c.NumSchedulers = 0 556 }) 557 defer s1.Shutdown() 558 testutil.WaitForLeader(t, s1.RPC) 559 560 // Insert a vault accessor that should be revoked 561 state := s1.fsm.State() 562 va := mock.VaultAccessor() 563 if err := state.UpsertVaultAccessor(100, []*structs.VaultAccessor{va}); err != nil { 564 t.Fatalf("bad: %v", err) 565 } 566 567 // Swap the Vault client 568 tvc := &TestVaultClient{} 569 s1.vault = tvc 570 571 // Do a restore 572 if err := s1.restoreRevokingAccessors(); err != nil { 573 t.Fatalf("Failed to restore: %v", err) 574 } 575 576 if len(tvc.RevokedTokens) != 1 && tvc.RevokedTokens[0].Accessor != va.Accessor { 577 t.Fatalf("Bad revoked accessors: %v", tvc.RevokedTokens) 578 } 579 }