github.com/dkerwin/nomad@v0.3.3-0.20160525181927-74554135514b/nomad/leader_test.go (about) 1 package nomad 2 3 import ( 4 "errors" 5 "fmt" 6 "testing" 7 "time" 8 9 "github.com/hashicorp/nomad/nomad/mock" 10 "github.com/hashicorp/nomad/nomad/structs" 11 "github.com/hashicorp/nomad/testutil" 12 ) 13 14 func TestLeader_LeftServer(t *testing.T) { 15 s1 := testServer(t, nil) 16 defer s1.Shutdown() 17 18 s2 := testServer(t, func(c *Config) { 19 c.DevDisableBootstrap = true 20 }) 21 defer s2.Shutdown() 22 23 s3 := testServer(t, func(c *Config) { 24 c.DevDisableBootstrap = true 25 }) 26 defer s3.Shutdown() 27 servers := []*Server{s1, s2, s3} 28 testJoin(t, s1, s2, s3) 29 30 for _, s := range servers { 31 testutil.WaitForResult(func() (bool, error) { 32 peers, _ := s.raftPeers.Peers() 33 return len(peers) == 3, nil 34 }, func(err error) { 35 t.Fatalf("should have 3 peers") 36 }) 37 } 38 39 // Kill any server 40 servers[0].Shutdown() 41 42 testutil.WaitForResult(func() (bool, error) { 43 // Force remove the non-leader (transition to left state) 44 name := fmt.Sprintf("%s.%s", 45 servers[0].config.NodeName, servers[0].config.Region) 46 if err := servers[1].RemoveFailedNode(name); err != nil { 47 t.Fatalf("err: %v", err) 48 } 49 50 for _, s := range servers[1:] { 51 peers, _ := s.raftPeers.Peers() 52 return len(peers) == 2, errors.New(fmt.Sprintf("%v", peers)) 53 } 54 55 return true, nil 56 }, func(err error) { 57 t.Fatalf("err: %s", err) 58 }) 59 } 60 61 func TestLeader_LeftLeader(t *testing.T) { 62 s1 := testServer(t, nil) 63 defer s1.Shutdown() 64 65 s2 := testServer(t, func(c *Config) { 66 c.DevDisableBootstrap = true 67 }) 68 defer s2.Shutdown() 69 70 s3 := testServer(t, func(c *Config) { 71 c.DevDisableBootstrap = true 72 }) 73 defer s3.Shutdown() 74 servers := []*Server{s1, s2, s3} 75 testJoin(t, s1, s2, s3) 76 77 for _, s := range servers { 78 testutil.WaitForResult(func() (bool, error) { 79 peers, _ := s.raftPeers.Peers() 80 return len(peers) == 3, nil 81 }, func(err error) { 82 t.Fatalf("should have 3 peers") 83 }) 84 } 85 86 // Kill the leader! 87 var leader *Server 88 for _, s := range servers { 89 if s.IsLeader() { 90 leader = s 91 break 92 } 93 } 94 if leader == nil { 95 t.Fatalf("Should have a leader") 96 } 97 leader.Leave() 98 leader.Shutdown() 99 100 for _, s := range servers { 101 if s == leader { 102 continue 103 } 104 testutil.WaitForResult(func() (bool, error) { 105 peers, _ := s.raftPeers.Peers() 106 return len(peers) == 2, errors.New(fmt.Sprintf("%v", peers)) 107 }, func(err error) { 108 t.Fatalf("should have 2 peers: %v", err) 109 }) 110 } 111 } 112 113 func TestLeader_MultiBootstrap(t *testing.T) { 114 s1 := testServer(t, nil) 115 defer s1.Shutdown() 116 117 s2 := testServer(t, nil) 118 defer s2.Shutdown() 119 servers := []*Server{s1, s2} 120 testJoin(t, s1, s2) 121 122 for _, s := range servers { 123 testutil.WaitForResult(func() (bool, error) { 124 peers := s.Members() 125 return len(peers) == 2, nil 126 }, func(err error) { 127 t.Fatalf("should have 2 peers") 128 }) 129 } 130 131 // Ensure we don't have multiple raft peers 132 for _, s := range servers { 133 peers, _ := s.raftPeers.Peers() 134 if len(peers) != 1 { 135 t.Fatalf("should only have 1 raft peer!") 136 } 137 } 138 } 139 140 func TestLeader_PlanQueue_Reset(t *testing.T) { 141 s1 := testServer(t, nil) 142 defer s1.Shutdown() 143 144 s2 := testServer(t, func(c *Config) { 145 c.DevDisableBootstrap = true 146 }) 147 defer s2.Shutdown() 148 149 s3 := testServer(t, func(c *Config) { 150 c.DevDisableBootstrap = true 151 }) 152 defer s3.Shutdown() 153 servers := []*Server{s1, s2, s3} 154 testJoin(t, s1, s2, s3) 155 156 for _, s := range servers { 157 testutil.WaitForResult(func() (bool, error) { 158 peers, _ := s.raftPeers.Peers() 159 return len(peers) == 3, nil 160 }, func(err error) { 161 t.Fatalf("should have 3 peers") 162 }) 163 } 164 165 var leader *Server 166 for _, s := range servers { 167 if s.IsLeader() { 168 leader = s 169 break 170 } 171 } 172 if leader == nil { 173 t.Fatalf("Should have a leader") 174 } 175 176 if !leader.planQueue.Enabled() { 177 t.Fatalf("should enable plan queue") 178 } 179 180 for _, s := range servers { 181 if !s.IsLeader() && s.planQueue.Enabled() { 182 t.Fatalf("plan queue should not be enabled") 183 } 184 } 185 186 // Kill the leader 187 leader.Shutdown() 188 time.Sleep(100 * time.Millisecond) 189 190 // Wait for a new leader 191 leader = nil 192 testutil.WaitForResult(func() (bool, error) { 193 for _, s := range servers { 194 if s.IsLeader() { 195 leader = s 196 return true, nil 197 } 198 } 199 return false, nil 200 }, func(err error) { 201 t.Fatalf("should have leader") 202 }) 203 204 // Check that the new leader has a pending GC expiration 205 testutil.WaitForResult(func() (bool, error) { 206 return leader.planQueue.Enabled(), nil 207 }, func(err error) { 208 t.Fatalf("should enable plan queue") 209 }) 210 } 211 212 func TestLeader_EvalBroker_Reset(t *testing.T) { 213 s1 := testServer(t, func(c *Config) { 214 c.NumSchedulers = 0 215 }) 216 defer s1.Shutdown() 217 218 s2 := testServer(t, func(c *Config) { 219 c.NumSchedulers = 0 220 c.DevDisableBootstrap = true 221 }) 222 defer s2.Shutdown() 223 224 s3 := testServer(t, func(c *Config) { 225 c.NumSchedulers = 0 226 c.DevDisableBootstrap = true 227 }) 228 defer s3.Shutdown() 229 servers := []*Server{s1, s2, s3} 230 testJoin(t, s1, s2, s3) 231 testutil.WaitForLeader(t, s1.RPC) 232 233 for _, s := range servers { 234 testutil.WaitForResult(func() (bool, error) { 235 peers, _ := s.raftPeers.Peers() 236 return len(peers) == 3, nil 237 }, func(err error) { 238 t.Fatalf("should have 3 peers") 239 }) 240 } 241 242 var leader *Server 243 for _, s := range servers { 244 if s.IsLeader() { 245 leader = s 246 break 247 } 248 } 249 if leader == nil { 250 t.Fatalf("Should have a leader") 251 } 252 253 // Inject a pending eval 254 req := structs.EvalUpdateRequest{ 255 Evals: []*structs.Evaluation{mock.Eval()}, 256 } 257 _, _, err := leader.raftApply(structs.EvalUpdateRequestType, req) 258 if err != nil { 259 t.Fatalf("err: %v", err) 260 } 261 262 // Kill the leader 263 leader.Shutdown() 264 time.Sleep(100 * time.Millisecond) 265 266 // Wait for a new leader 267 leader = nil 268 testutil.WaitForResult(func() (bool, error) { 269 for _, s := range servers { 270 if s.IsLeader() { 271 leader = s 272 return true, nil 273 } 274 } 275 return false, nil 276 }, func(err error) { 277 t.Fatalf("should have leader") 278 }) 279 280 // Check that the new leader has a pending evaluation 281 testutil.WaitForResult(func() (bool, error) { 282 stats := leader.evalBroker.Stats() 283 return stats.TotalReady == 1, nil 284 }, func(err error) { 285 t.Fatalf("should have pending evaluation") 286 }) 287 } 288 289 func TestLeader_PeriodicDispatcher_Restore_Adds(t *testing.T) { 290 s1 := testServer(t, func(c *Config) { 291 c.NumSchedulers = 0 292 }) 293 defer s1.Shutdown() 294 295 s2 := testServer(t, func(c *Config) { 296 c.NumSchedulers = 0 297 c.DevDisableBootstrap = true 298 }) 299 defer s2.Shutdown() 300 301 s3 := testServer(t, func(c *Config) { 302 c.NumSchedulers = 0 303 c.DevDisableBootstrap = true 304 }) 305 defer s3.Shutdown() 306 servers := []*Server{s1, s2, s3} 307 testJoin(t, s1, s2, s3) 308 testutil.WaitForLeader(t, s1.RPC) 309 310 for _, s := range servers { 311 testutil.WaitForResult(func() (bool, error) { 312 peers, _ := s.raftPeers.Peers() 313 return len(peers) == 3, nil 314 }, func(err error) { 315 t.Fatalf("should have 3 peers") 316 }) 317 } 318 319 var leader *Server 320 for _, s := range servers { 321 if s.IsLeader() { 322 leader = s 323 break 324 } 325 } 326 if leader == nil { 327 t.Fatalf("Should have a leader") 328 } 329 330 // Inject a periodic job and non-periodic job 331 periodic := mock.PeriodicJob() 332 nonPeriodic := mock.Job() 333 for _, job := range []*structs.Job{nonPeriodic, periodic} { 334 req := structs.JobRegisterRequest{ 335 Job: job, 336 } 337 _, _, err := leader.raftApply(structs.JobRegisterRequestType, req) 338 if err != nil { 339 t.Fatalf("err: %v", err) 340 } 341 } 342 343 // Kill the leader 344 leader.Shutdown() 345 time.Sleep(100 * time.Millisecond) 346 347 // Wait for a new leader 348 leader = nil 349 testutil.WaitForResult(func() (bool, error) { 350 for _, s := range servers { 351 if s.IsLeader() { 352 leader = s 353 return true, nil 354 } 355 } 356 return false, nil 357 }, func(err error) { 358 t.Fatalf("should have leader") 359 }) 360 361 // Check that the new leader is tracking the periodic job. 362 testutil.WaitForResult(func() (bool, error) { 363 _, tracked := leader.periodicDispatcher.tracked[periodic.ID] 364 return tracked, nil 365 }, func(err error) { 366 t.Fatalf("periodic job not tracked") 367 }) 368 } 369 370 func TestLeader_PeriodicDispatcher_Restore_NoEvals(t *testing.T) { 371 s1 := testServer(t, func(c *Config) { 372 c.NumSchedulers = 0 373 }) 374 defer s1.Shutdown() 375 testutil.WaitForLeader(t, s1.RPC) 376 377 // Inject a periodic job that will be triggered soon. 378 launch := time.Now().Add(1 * time.Second) 379 job := testPeriodicJob(launch) 380 req := structs.JobRegisterRequest{ 381 Job: job, 382 } 383 _, _, err := s1.raftApply(structs.JobRegisterRequestType, req) 384 if err != nil { 385 t.Fatalf("err: %v", err) 386 } 387 388 // Flush the periodic dispatcher, ensuring that no evals will be created. 389 s1.periodicDispatcher.SetEnabled(false) 390 391 // Get the current time to ensure the launch time is after this once we 392 // restore. 393 now := time.Now() 394 395 // Sleep till after the job should have been launched. 396 time.Sleep(3 * time.Second) 397 398 // Restore the periodic dispatcher. 399 s1.periodicDispatcher.SetEnabled(true) 400 s1.periodicDispatcher.Start() 401 s1.restorePeriodicDispatcher() 402 403 // Ensure the job is tracked. 404 if _, tracked := s1.periodicDispatcher.tracked[job.ID]; !tracked { 405 t.Fatalf("periodic job not restored") 406 } 407 408 // Check that an eval was made. 409 last, err := s1.fsm.State().PeriodicLaunchByID(job.ID) 410 if err != nil || last == nil { 411 t.Fatalf("failed to get periodic launch time: %v", err) 412 } 413 414 if last.Launch.Before(now) { 415 t.Fatalf("restorePeriodicDispatcher did not force launch: last %v; want after %v", last.Launch, now) 416 } 417 } 418 419 func TestLeader_PeriodicDispatcher_Restore_Evals(t *testing.T) { 420 s1 := testServer(t, func(c *Config) { 421 c.NumSchedulers = 0 422 }) 423 defer s1.Shutdown() 424 testutil.WaitForLeader(t, s1.RPC) 425 426 // Inject a periodic job that triggered once in the past, should trigger now 427 // and once in the future. 428 now := time.Now() 429 past := now.Add(-1 * time.Second) 430 future := now.Add(10 * time.Second) 431 job := testPeriodicJob(past, now, future) 432 req := structs.JobRegisterRequest{ 433 Job: job, 434 } 435 _, _, err := s1.raftApply(structs.JobRegisterRequestType, req) 436 if err != nil { 437 t.Fatalf("err: %v", err) 438 } 439 440 // Create an eval for the past launch. 441 s1.periodicDispatcher.createEval(job, past) 442 443 // Flush the periodic dispatcher, ensuring that no evals will be created. 444 s1.periodicDispatcher.SetEnabled(false) 445 446 // Sleep till after the job should have been launched. 447 time.Sleep(3 * time.Second) 448 449 // Restore the periodic dispatcher. 450 s1.periodicDispatcher.SetEnabled(true) 451 s1.periodicDispatcher.Start() 452 s1.restorePeriodicDispatcher() 453 454 // Ensure the job is tracked. 455 if _, tracked := s1.periodicDispatcher.tracked[job.ID]; !tracked { 456 t.Fatalf("periodic job not restored") 457 } 458 459 // Check that an eval was made. 460 last, err := s1.fsm.State().PeriodicLaunchByID(job.ID) 461 if err != nil || last == nil { 462 t.Fatalf("failed to get periodic launch time: %v", err) 463 } 464 if last.Launch == past { 465 t.Fatalf("restorePeriodicDispatcher did not force launch") 466 } 467 } 468 469 func TestLeader_PeriodicDispatch(t *testing.T) { 470 s1 := testServer(t, func(c *Config) { 471 c.NumSchedulers = 0 472 c.EvalGCInterval = 5 * time.Millisecond 473 }) 474 defer s1.Shutdown() 475 476 // Wait for a periodic dispatch 477 testutil.WaitForResult(func() (bool, error) { 478 stats := s1.evalBroker.Stats() 479 bySched, ok := stats.ByScheduler[structs.JobTypeCore] 480 if !ok { 481 return false, nil 482 } 483 return bySched.Ready > 0, nil 484 }, func(err error) { 485 t.Fatalf("should pending job") 486 }) 487 } 488 489 func TestLeader_ReapFailedEval(t *testing.T) { 490 s1 := testServer(t, func(c *Config) { 491 c.NumSchedulers = 0 492 c.EvalDeliveryLimit = 1 493 }) 494 defer s1.Shutdown() 495 testutil.WaitForLeader(t, s1.RPC) 496 497 // Wait for a periodic dispatch 498 eval := mock.Eval() 499 s1.evalBroker.Enqueue(eval) 500 501 // Dequeue and Nack 502 out, token, err := s1.evalBroker.Dequeue(defaultSched, time.Second) 503 if err != nil { 504 t.Fatalf("err: %v", err) 505 } 506 s1.evalBroker.Nack(out.ID, token) 507 508 // Wait updated evaluation 509 state := s1.fsm.State() 510 testutil.WaitForResult(func() (bool, error) { 511 out, err := state.EvalByID(eval.ID) 512 if err != nil { 513 return false, err 514 } 515 return out != nil && out.Status == structs.EvalStatusFailed, nil 516 }, func(err error) { 517 t.Fatalf("err: %v", err) 518 }) 519 } 520 521 func TestLeader_ReapDuplicateEval(t *testing.T) { 522 s1 := testServer(t, func(c *Config) { 523 c.NumSchedulers = 0 524 }) 525 defer s1.Shutdown() 526 testutil.WaitForLeader(t, s1.RPC) 527 528 // Create a duplicate blocked eval 529 eval := mock.Eval() 530 eval2 := mock.Eval() 531 eval2.JobID = eval.JobID 532 s1.blockedEvals.Block(eval) 533 s1.blockedEvals.Block(eval2) 534 535 // Wait for the evaluation to marked as cancelled 536 state := s1.fsm.State() 537 testutil.WaitForResult(func() (bool, error) { 538 out, err := state.EvalByID(eval2.ID) 539 if err != nil { 540 return false, err 541 } 542 return out != nil && out.Status == structs.EvalStatusCancelled, nil 543 }, func(err error) { 544 t.Fatalf("err: %v", err) 545 }) 546 }