github.com/taylorchu/nomad@v0.5.3-rc1.0.20170407200202-db11e7dd7b55/nomad/core_sched_test.go (about) 1 package nomad 2 3 import ( 4 "testing" 5 "time" 6 7 memdb "github.com/hashicorp/go-memdb" 8 "github.com/hashicorp/nomad/nomad/mock" 9 "github.com/hashicorp/nomad/nomad/structs" 10 "github.com/hashicorp/nomad/testutil" 11 ) 12 13 func TestCoreScheduler_EvalGC(t *testing.T) { 14 s1 := testServer(t, nil) 15 defer s1.Shutdown() 16 testutil.WaitForLeader(t, s1.RPC) 17 18 // COMPAT Remove in 0.6: Reset the FSM time table since we reconcile which sets index 0 19 s1.fsm.timetable.table = make([]TimeTableEntry, 1, 10) 20 21 // Insert "dead" eval 22 state := s1.fsm.State() 23 eval := mock.Eval() 24 eval.Status = structs.EvalStatusFailed 25 state.UpsertJobSummary(999, mock.JobSummary(eval.JobID)) 26 err := state.UpsertEvals(1000, []*structs.Evaluation{eval}) 27 if err != nil { 28 t.Fatalf("err: %v", err) 29 } 30 31 // Insert "dead" alloc 32 alloc := mock.Alloc() 33 alloc.EvalID = eval.ID 34 alloc.DesiredStatus = structs.AllocDesiredStatusStop 35 alloc.JobID = eval.JobID 36 37 // Insert "lost" alloc 38 alloc2 := mock.Alloc() 39 alloc2.EvalID = eval.ID 40 alloc2.DesiredStatus = structs.AllocDesiredStatusRun 41 alloc2.ClientStatus = structs.AllocClientStatusLost 42 alloc2.JobID = eval.JobID 43 err = state.UpsertAllocs(1001, []*structs.Allocation{alloc, alloc2}) 44 if err != nil { 45 t.Fatalf("err: %v", err) 46 } 47 48 // Update the time tables to make this work 49 tt := s1.fsm.TimeTable() 50 tt.Witness(2000, time.Now().UTC().Add(-1*s1.config.EvalGCThreshold)) 51 52 // Create a core scheduler 53 snap, err := state.Snapshot() 54 if err != nil { 55 t.Fatalf("err: %v", err) 56 } 57 core := NewCoreScheduler(s1, snap) 58 59 // Attempt the GC 60 gc := s1.coreJobEval(structs.CoreJobEvalGC, 2000) 61 err = core.Process(gc) 62 if err != nil { 63 t.Fatalf("err: %v", err) 64 } 65 66 // Should be gone 67 ws := memdb.NewWatchSet() 68 out, err := state.EvalByID(ws, eval.ID) 69 if err != nil { 70 t.Fatalf("err: %v", err) 71 } 72 if out != nil { 73 t.Fatalf("bad: %v", out) 74 } 75 76 outA, err := state.AllocByID(ws, alloc.ID) 77 if err != nil { 78 t.Fatalf("err: %v", err) 79 } 80 if outA != nil { 81 t.Fatalf("bad: %v", outA) 82 } 83 84 outA2, err := state.AllocByID(ws, alloc2.ID) 85 if err != nil { 86 t.Fatalf("err: %v", err) 87 } 88 if outA2 != nil { 89 t.Fatalf("bad: %v", outA2) 90 } 91 } 92 93 // An EvalGC should never reap a batch job that has not been stopped 94 func TestCoreScheduler_EvalGC_Batch(t *testing.T) { 95 s1 := testServer(t, nil) 96 defer s1.Shutdown() 97 testutil.WaitForLeader(t, s1.RPC) 98 99 // COMPAT Remove in 0.6: Reset the FSM time table since we reconcile which sets index 0 100 s1.fsm.timetable.table = make([]TimeTableEntry, 1, 10) 101 102 // Insert a "dead" job 103 state := s1.fsm.State() 104 job := mock.Job() 105 job.Type = structs.JobTypeBatch 106 job.Status = structs.JobStatusDead 107 err := state.UpsertJob(1000, job) 108 if err != nil { 109 t.Fatalf("err: %v", err) 110 } 111 112 // Insert "complete" eval 113 eval := mock.Eval() 114 eval.Status = structs.EvalStatusComplete 115 eval.Type = structs.JobTypeBatch 116 eval.JobID = job.ID 117 err = state.UpsertEvals(1001, []*structs.Evaluation{eval}) 118 if err != nil { 119 t.Fatalf("err: %v", err) 120 } 121 122 // Insert "failed" alloc 123 alloc := mock.Alloc() 124 alloc.JobID = job.ID 125 alloc.EvalID = eval.ID 126 alloc.DesiredStatus = structs.AllocDesiredStatusStop 127 128 // Insert "lost" alloc 129 alloc2 := mock.Alloc() 130 alloc2.JobID = job.ID 131 alloc2.EvalID = eval.ID 132 alloc2.DesiredStatus = structs.AllocDesiredStatusRun 133 alloc2.ClientStatus = structs.AllocClientStatusLost 134 135 err = state.UpsertAllocs(1002, []*structs.Allocation{alloc, alloc2}) 136 if err != nil { 137 t.Fatalf("err: %v", err) 138 } 139 140 // Update the time tables to make this work 141 tt := s1.fsm.TimeTable() 142 tt.Witness(2000, time.Now().UTC().Add(-1*s1.config.EvalGCThreshold)) 143 144 // Create a core scheduler 145 snap, err := state.Snapshot() 146 if err != nil { 147 t.Fatalf("err: %v", err) 148 } 149 core := NewCoreScheduler(s1, snap) 150 151 // Attempt the GC 152 gc := s1.coreJobEval(structs.CoreJobEvalGC, 2000) 153 err = core.Process(gc) 154 if err != nil { 155 t.Fatalf("err: %v", err) 156 } 157 158 // Nothing should be gone 159 ws := memdb.NewWatchSet() 160 out, err := state.EvalByID(ws, eval.ID) 161 if err != nil { 162 t.Fatalf("err: %v", err) 163 } 164 if out == nil { 165 t.Fatalf("bad: %v", out) 166 } 167 168 outA, err := state.AllocByID(ws, alloc.ID) 169 if err != nil { 170 t.Fatalf("err: %v", err) 171 } 172 if outA == nil { 173 t.Fatalf("bad: %v", outA) 174 } 175 176 outA2, err := state.AllocByID(ws, alloc2.ID) 177 if err != nil { 178 t.Fatalf("err: %v", err) 179 } 180 if outA2 == nil { 181 t.Fatalf("bad: %v", outA2) 182 } 183 184 outB, err := state.JobByID(ws, job.ID) 185 if err != nil { 186 t.Fatalf("err: %v", err) 187 } 188 if outB == nil { 189 t.Fatalf("bad: %v", outB) 190 } 191 } 192 193 // An EvalGC should reap a batch job that has been stopped 194 func TestCoreScheduler_EvalGC_BatchStopped(t *testing.T) { 195 s1 := testServer(t, nil) 196 defer s1.Shutdown() 197 testutil.WaitForLeader(t, s1.RPC) 198 199 // COMPAT Remove in 0.6: Reset the FSM time table since we reconcile which sets index 0 200 s1.fsm.timetable.table = make([]TimeTableEntry, 1, 10) 201 202 // Create a "dead" job 203 state := s1.fsm.State() 204 job := mock.Job() 205 job.Type = structs.JobTypeBatch 206 job.Status = structs.JobStatusDead 207 208 // Insert "complete" eval 209 eval := mock.Eval() 210 eval.Status = structs.EvalStatusComplete 211 eval.Type = structs.JobTypeBatch 212 eval.JobID = job.ID 213 err := state.UpsertEvals(1001, []*structs.Evaluation{eval}) 214 if err != nil { 215 t.Fatalf("err: %v", err) 216 } 217 218 // Insert "failed" alloc 219 alloc := mock.Alloc() 220 alloc.JobID = job.ID 221 alloc.EvalID = eval.ID 222 alloc.DesiredStatus = structs.AllocDesiredStatusStop 223 224 // Insert "lost" alloc 225 alloc2 := mock.Alloc() 226 alloc2.JobID = job.ID 227 alloc2.EvalID = eval.ID 228 alloc2.DesiredStatus = structs.AllocDesiredStatusRun 229 alloc2.ClientStatus = structs.AllocClientStatusLost 230 231 err = state.UpsertAllocs(1002, []*structs.Allocation{alloc, alloc2}) 232 if err != nil { 233 t.Fatalf("err: %v", err) 234 } 235 236 // Update the time tables to make this work 237 tt := s1.fsm.TimeTable() 238 tt.Witness(2000, time.Now().UTC().Add(-1*s1.config.EvalGCThreshold)) 239 240 // Create a core scheduler 241 snap, err := state.Snapshot() 242 if err != nil { 243 t.Fatalf("err: %v", err) 244 } 245 core := NewCoreScheduler(s1, snap) 246 247 // Attempt the GC 248 gc := s1.coreJobEval(structs.CoreJobEvalGC, 2000) 249 err = core.Process(gc) 250 if err != nil { 251 t.Fatalf("err: %v", err) 252 } 253 254 // Everything should be gone 255 ws := memdb.NewWatchSet() 256 out, err := state.EvalByID(ws, eval.ID) 257 if err != nil { 258 t.Fatalf("err: %v", err) 259 } 260 if out != nil { 261 t.Fatalf("bad: %v", out) 262 } 263 264 outA, err := state.AllocByID(ws, alloc.ID) 265 if err != nil { 266 t.Fatalf("err: %v", err) 267 } 268 if outA != nil { 269 t.Fatalf("bad: %v", outA) 270 } 271 272 outA2, err := state.AllocByID(ws, alloc2.ID) 273 if err != nil { 274 t.Fatalf("err: %v", err) 275 } 276 if outA2 != nil { 277 t.Fatalf("bad: %v", outA2) 278 } 279 } 280 281 func TestCoreScheduler_EvalGC_Partial(t *testing.T) { 282 s1 := testServer(t, nil) 283 defer s1.Shutdown() 284 testutil.WaitForLeader(t, s1.RPC) 285 286 // COMPAT Remove in 0.6: Reset the FSM time table since we reconcile which sets index 0 287 s1.fsm.timetable.table = make([]TimeTableEntry, 1, 10) 288 289 // Insert "dead" eval 290 state := s1.fsm.State() 291 eval := mock.Eval() 292 eval.Status = structs.EvalStatusComplete 293 state.UpsertJobSummary(999, mock.JobSummary(eval.JobID)) 294 err := state.UpsertEvals(1000, []*structs.Evaluation{eval}) 295 if err != nil { 296 t.Fatalf("err: %v", err) 297 } 298 299 // Insert "dead" alloc 300 alloc := mock.Alloc() 301 alloc.EvalID = eval.ID 302 alloc.DesiredStatus = structs.AllocDesiredStatusStop 303 state.UpsertJobSummary(1001, mock.JobSummary(alloc.JobID)) 304 305 // Insert "lost" alloc 306 alloc2 := mock.Alloc() 307 alloc2.JobID = alloc.JobID 308 alloc2.EvalID = eval.ID 309 alloc2.DesiredStatus = structs.AllocDesiredStatusRun 310 alloc2.ClientStatus = structs.AllocClientStatusLost 311 312 err = state.UpsertAllocs(1002, []*structs.Allocation{alloc, alloc2}) 313 if err != nil { 314 t.Fatalf("err: %v", err) 315 } 316 317 // Insert "running" alloc 318 alloc3 := mock.Alloc() 319 alloc3.EvalID = eval.ID 320 state.UpsertJobSummary(1003, mock.JobSummary(alloc3.JobID)) 321 err = state.UpsertAllocs(1004, []*structs.Allocation{alloc3}) 322 if err != nil { 323 t.Fatalf("err: %v", err) 324 } 325 326 // Update the time tables to make this work 327 tt := s1.fsm.TimeTable() 328 tt.Witness(2000, time.Now().UTC().Add(-1*s1.config.EvalGCThreshold)) 329 330 // Create a core scheduler 331 snap, err := state.Snapshot() 332 if err != nil { 333 t.Fatalf("err: %v", err) 334 } 335 core := NewCoreScheduler(s1, snap) 336 337 // Attempt the GC 338 gc := s1.coreJobEval(structs.CoreJobEvalGC, 2000) 339 err = core.Process(gc) 340 if err != nil { 341 t.Fatalf("err: %v", err) 342 } 343 344 // Should not be gone 345 ws := memdb.NewWatchSet() 346 out, err := state.EvalByID(ws, eval.ID) 347 if err != nil { 348 t.Fatalf("err: %v", err) 349 } 350 if out == nil { 351 t.Fatalf("bad: %v", out) 352 } 353 354 outA, err := state.AllocByID(ws, alloc3.ID) 355 if err != nil { 356 t.Fatalf("err: %v", err) 357 } 358 if outA == nil { 359 t.Fatalf("bad: %v", outA) 360 } 361 362 // Should be gone 363 outB, err := state.AllocByID(ws, alloc.ID) 364 if err != nil { 365 t.Fatalf("err: %v", err) 366 } 367 if outB != nil { 368 t.Fatalf("bad: %v", outB) 369 } 370 371 outC, err := state.AllocByID(ws, alloc2.ID) 372 if err != nil { 373 t.Fatalf("err: %v", err) 374 } 375 if outC != nil { 376 t.Fatalf("bad: %v", outC) 377 } 378 } 379 380 func TestCoreScheduler_EvalGC_Force(t *testing.T) { 381 s1 := testServer(t, nil) 382 defer s1.Shutdown() 383 testutil.WaitForLeader(t, s1.RPC) 384 385 // COMPAT Remove in 0.6: Reset the FSM time table since we reconcile which sets index 0 386 s1.fsm.timetable.table = make([]TimeTableEntry, 1, 10) 387 388 // Insert "dead" eval 389 state := s1.fsm.State() 390 eval := mock.Eval() 391 eval.Status = structs.EvalStatusFailed 392 state.UpsertJobSummary(999, mock.JobSummary(eval.JobID)) 393 err := state.UpsertEvals(1000, []*structs.Evaluation{eval}) 394 if err != nil { 395 t.Fatalf("err: %v", err) 396 } 397 398 // Insert "dead" alloc 399 alloc := mock.Alloc() 400 alloc.EvalID = eval.ID 401 alloc.DesiredStatus = structs.AllocDesiredStatusStop 402 state.UpsertJobSummary(1001, mock.JobSummary(alloc.JobID)) 403 err = state.UpsertAllocs(1002, []*structs.Allocation{alloc}) 404 if err != nil { 405 t.Fatalf("err: %v", err) 406 } 407 408 // Create a core scheduler 409 snap, err := state.Snapshot() 410 if err != nil { 411 t.Fatalf("err: %v", err) 412 } 413 core := NewCoreScheduler(s1, snap) 414 415 // Attempt the GC 416 gc := s1.coreJobEval(structs.CoreJobForceGC, 1002) 417 err = core.Process(gc) 418 if err != nil { 419 t.Fatalf("err: %v", err) 420 } 421 422 // Should be gone 423 ws := memdb.NewWatchSet() 424 out, err := state.EvalByID(ws, eval.ID) 425 if err != nil { 426 t.Fatalf("err: %v", err) 427 } 428 if out != nil { 429 t.Fatalf("bad: %v", out) 430 } 431 432 outA, err := state.AllocByID(ws, alloc.ID) 433 if err != nil { 434 t.Fatalf("err: %v", err) 435 } 436 if outA != nil { 437 t.Fatalf("bad: %v", outA) 438 } 439 } 440 441 func TestCoreScheduler_NodeGC(t *testing.T) { 442 s1 := testServer(t, nil) 443 defer s1.Shutdown() 444 testutil.WaitForLeader(t, s1.RPC) 445 446 // COMPAT Remove in 0.6: Reset the FSM time table since we reconcile which sets index 0 447 s1.fsm.timetable.table = make([]TimeTableEntry, 1, 10) 448 449 // Insert "dead" node 450 state := s1.fsm.State() 451 node := mock.Node() 452 node.Status = structs.NodeStatusDown 453 err := state.UpsertNode(1000, node) 454 if err != nil { 455 t.Fatalf("err: %v", err) 456 } 457 458 // Update the time tables to make this work 459 tt := s1.fsm.TimeTable() 460 tt.Witness(2000, time.Now().UTC().Add(-1*s1.config.NodeGCThreshold)) 461 462 // Create a core scheduler 463 snap, err := state.Snapshot() 464 if err != nil { 465 t.Fatalf("err: %v", err) 466 } 467 core := NewCoreScheduler(s1, snap) 468 469 // Attempt the GC 470 gc := s1.coreJobEval(structs.CoreJobNodeGC, 2000) 471 err = core.Process(gc) 472 if err != nil { 473 t.Fatalf("err: %v", err) 474 } 475 476 // Should be gone 477 ws := memdb.NewWatchSet() 478 out, err := state.NodeByID(ws, node.ID) 479 if err != nil { 480 t.Fatalf("err: %v", err) 481 } 482 if out != nil { 483 t.Fatalf("bad: %v", out) 484 } 485 } 486 487 func TestCoreScheduler_NodeGC_TerminalAllocs(t *testing.T) { 488 s1 := testServer(t, nil) 489 defer s1.Shutdown() 490 testutil.WaitForLeader(t, s1.RPC) 491 492 // COMPAT Remove in 0.6: Reset the FSM time table since we reconcile which sets index 0 493 s1.fsm.timetable.table = make([]TimeTableEntry, 1, 10) 494 495 // Insert "dead" node 496 state := s1.fsm.State() 497 node := mock.Node() 498 node.Status = structs.NodeStatusDown 499 err := state.UpsertNode(1000, node) 500 if err != nil { 501 t.Fatalf("err: %v", err) 502 } 503 504 // Insert a terminal alloc on that node 505 alloc := mock.Alloc() 506 alloc.DesiredStatus = structs.AllocDesiredStatusStop 507 state.UpsertJobSummary(1001, mock.JobSummary(alloc.JobID)) 508 if err := state.UpsertAllocs(1002, []*structs.Allocation{alloc}); err != nil { 509 t.Fatalf("err: %v", err) 510 } 511 512 // Update the time tables to make this work 513 tt := s1.fsm.TimeTable() 514 tt.Witness(2000, time.Now().UTC().Add(-1*s1.config.NodeGCThreshold)) 515 516 // Create a core scheduler 517 snap, err := state.Snapshot() 518 if err != nil { 519 t.Fatalf("err: %v", err) 520 } 521 core := NewCoreScheduler(s1, snap) 522 523 // Attempt the GC 524 gc := s1.coreJobEval(structs.CoreJobNodeGC, 2000) 525 err = core.Process(gc) 526 if err != nil { 527 t.Fatalf("err: %v", err) 528 } 529 530 // Should be gone 531 ws := memdb.NewWatchSet() 532 out, err := state.NodeByID(ws, node.ID) 533 if err != nil { 534 t.Fatalf("err: %v", err) 535 } 536 if out != nil { 537 t.Fatalf("bad: %v", out) 538 } 539 } 540 541 func TestCoreScheduler_NodeGC_RunningAllocs(t *testing.T) { 542 s1 := testServer(t, nil) 543 defer s1.Shutdown() 544 testutil.WaitForLeader(t, s1.RPC) 545 546 // COMPAT Remove in 0.6: Reset the FSM time table since we reconcile which sets index 0 547 s1.fsm.timetable.table = make([]TimeTableEntry, 1, 10) 548 549 // Insert "dead" node 550 state := s1.fsm.State() 551 node := mock.Node() 552 node.Status = structs.NodeStatusDown 553 err := state.UpsertNode(1000, node) 554 if err != nil { 555 t.Fatalf("err: %v", err) 556 } 557 558 // Insert a running alloc on that node 559 alloc := mock.Alloc() 560 alloc.NodeID = node.ID 561 alloc.DesiredStatus = structs.AllocDesiredStatusRun 562 alloc.ClientStatus = structs.AllocClientStatusRunning 563 state.UpsertJobSummary(1001, mock.JobSummary(alloc.JobID)) 564 if err := state.UpsertAllocs(1002, []*structs.Allocation{alloc}); err != nil { 565 t.Fatalf("err: %v", err) 566 } 567 568 // Update the time tables to make this work 569 tt := s1.fsm.TimeTable() 570 tt.Witness(2000, time.Now().UTC().Add(-1*s1.config.NodeGCThreshold)) 571 572 // Create a core scheduler 573 snap, err := state.Snapshot() 574 if err != nil { 575 t.Fatalf("err: %v", err) 576 } 577 core := NewCoreScheduler(s1, snap) 578 579 // Attempt the GC 580 gc := s1.coreJobEval(structs.CoreJobNodeGC, 2000) 581 err = core.Process(gc) 582 if err != nil { 583 t.Fatalf("err: %v", err) 584 } 585 586 // Should still be here 587 ws := memdb.NewWatchSet() 588 out, err := state.NodeByID(ws, node.ID) 589 if err != nil { 590 t.Fatalf("err: %v", err) 591 } 592 if out == nil { 593 t.Fatalf("bad: %v", out) 594 } 595 } 596 597 func TestCoreScheduler_NodeGC_Force(t *testing.T) { 598 s1 := testServer(t, nil) 599 defer s1.Shutdown() 600 testutil.WaitForLeader(t, s1.RPC) 601 602 // COMPAT Remove in 0.6: Reset the FSM time table since we reconcile which sets index 0 603 s1.fsm.timetable.table = make([]TimeTableEntry, 1, 10) 604 605 // Insert "dead" node 606 state := s1.fsm.State() 607 node := mock.Node() 608 node.Status = structs.NodeStatusDown 609 err := state.UpsertNode(1000, node) 610 if err != nil { 611 t.Fatalf("err: %v", err) 612 } 613 614 // Create a core scheduler 615 snap, err := state.Snapshot() 616 if err != nil { 617 t.Fatalf("err: %v", err) 618 } 619 core := NewCoreScheduler(s1, snap) 620 621 // Attempt the GC 622 gc := s1.coreJobEval(structs.CoreJobForceGC, 1000) 623 err = core.Process(gc) 624 if err != nil { 625 t.Fatalf("err: %v", err) 626 } 627 628 // Should be gone 629 ws := memdb.NewWatchSet() 630 out, err := state.NodeByID(ws, node.ID) 631 if err != nil { 632 t.Fatalf("err: %v", err) 633 } 634 if out != nil { 635 t.Fatalf("bad: %v", out) 636 } 637 } 638 639 func TestCoreScheduler_JobGC_OutstandingEvals(t *testing.T) { 640 s1 := testServer(t, nil) 641 defer s1.Shutdown() 642 testutil.WaitForLeader(t, s1.RPC) 643 644 // COMPAT Remove in 0.6: Reset the FSM time table since we reconcile which sets index 0 645 s1.fsm.timetable.table = make([]TimeTableEntry, 1, 10) 646 647 // Insert job. 648 state := s1.fsm.State() 649 job := mock.Job() 650 job.Type = structs.JobTypeBatch 651 job.Status = structs.JobStatusDead 652 err := state.UpsertJob(1000, job) 653 if err != nil { 654 t.Fatalf("err: %v", err) 655 } 656 657 // Insert two evals, one terminal and one not 658 eval := mock.Eval() 659 eval.JobID = job.ID 660 eval.Status = structs.EvalStatusComplete 661 662 eval2 := mock.Eval() 663 eval2.JobID = job.ID 664 eval2.Status = structs.EvalStatusPending 665 err = state.UpsertEvals(1001, []*structs.Evaluation{eval, eval2}) 666 if err != nil { 667 t.Fatalf("err: %v", err) 668 } 669 670 // Update the time tables to make this work 671 tt := s1.fsm.TimeTable() 672 tt.Witness(2000, time.Now().UTC().Add(-1*s1.config.JobGCThreshold)) 673 674 // Create a core scheduler 675 snap, err := state.Snapshot() 676 if err != nil { 677 t.Fatalf("err: %v", err) 678 } 679 core := NewCoreScheduler(s1, snap) 680 681 // Attempt the GC 682 gc := s1.coreJobEval(structs.CoreJobJobGC, 2000) 683 err = core.Process(gc) 684 if err != nil { 685 t.Fatalf("err: %v", err) 686 } 687 688 // Should still exist 689 ws := memdb.NewWatchSet() 690 out, err := state.JobByID(ws, job.ID) 691 if err != nil { 692 t.Fatalf("err: %v", err) 693 } 694 if out == nil { 695 t.Fatalf("bad: %v", out) 696 } 697 698 outE, err := state.EvalByID(ws, eval.ID) 699 if err != nil { 700 t.Fatalf("err: %v", err) 701 } 702 if outE == nil { 703 t.Fatalf("bad: %v", outE) 704 } 705 706 outE2, err := state.EvalByID(ws, eval2.ID) 707 if err != nil { 708 t.Fatalf("err: %v", err) 709 } 710 if outE2 == nil { 711 t.Fatalf("bad: %v", outE2) 712 } 713 714 // Update the second eval to be terminal 715 eval2.Status = structs.EvalStatusComplete 716 err = state.UpsertEvals(1003, []*structs.Evaluation{eval2}) 717 if err != nil { 718 t.Fatalf("err: %v", err) 719 } 720 721 // Create a core scheduler 722 snap, err = state.Snapshot() 723 if err != nil { 724 t.Fatalf("err: %v", err) 725 } 726 core = NewCoreScheduler(s1, snap) 727 728 // Attempt the GC 729 gc = s1.coreJobEval(structs.CoreJobJobGC, 2000) 730 err = core.Process(gc) 731 if err != nil { 732 t.Fatalf("err: %v", err) 733 } 734 735 // Should not still exist 736 out, err = state.JobByID(ws, job.ID) 737 if err != nil { 738 t.Fatalf("err: %v", err) 739 } 740 if out != nil { 741 t.Fatalf("bad: %v", out) 742 } 743 744 outE, err = state.EvalByID(ws, eval.ID) 745 if err != nil { 746 t.Fatalf("err: %v", err) 747 } 748 if outE != nil { 749 t.Fatalf("bad: %v", outE) 750 } 751 752 outE2, err = state.EvalByID(ws, eval2.ID) 753 if err != nil { 754 t.Fatalf("err: %v", err) 755 } 756 if outE2 != nil { 757 t.Fatalf("bad: %v", outE2) 758 } 759 } 760 761 func TestCoreScheduler_JobGC_OutstandingAllocs(t *testing.T) { 762 s1 := testServer(t, nil) 763 defer s1.Shutdown() 764 testutil.WaitForLeader(t, s1.RPC) 765 766 // COMPAT Remove in 0.6: Reset the FSM time table since we reconcile which sets index 0 767 s1.fsm.timetable.table = make([]TimeTableEntry, 1, 10) 768 769 // Insert job. 770 state := s1.fsm.State() 771 job := mock.Job() 772 job.Type = structs.JobTypeBatch 773 job.Status = structs.JobStatusDead 774 err := state.UpsertJob(1000, job) 775 if err != nil { 776 t.Fatalf("err: %v", err) 777 } 778 779 // Insert an eval 780 eval := mock.Eval() 781 eval.JobID = job.ID 782 eval.Status = structs.EvalStatusComplete 783 err = state.UpsertEvals(1001, []*structs.Evaluation{eval}) 784 if err != nil { 785 t.Fatalf("err: %v", err) 786 } 787 788 // Insert two allocs, one terminal and one not 789 alloc := mock.Alloc() 790 alloc.JobID = job.ID 791 alloc.EvalID = eval.ID 792 alloc.DesiredStatus = structs.AllocDesiredStatusRun 793 alloc.ClientStatus = structs.AllocClientStatusComplete 794 795 alloc2 := mock.Alloc() 796 alloc2.JobID = job.ID 797 alloc2.EvalID = eval.ID 798 alloc2.DesiredStatus = structs.AllocDesiredStatusRun 799 alloc2.ClientStatus = structs.AllocClientStatusRunning 800 801 err = state.UpsertAllocs(1002, []*structs.Allocation{alloc, alloc2}) 802 if err != nil { 803 t.Fatalf("err: %v", err) 804 } 805 806 // Update the time tables to make this work 807 tt := s1.fsm.TimeTable() 808 tt.Witness(2000, time.Now().UTC().Add(-1*s1.config.JobGCThreshold)) 809 810 // Create a core scheduler 811 snap, err := state.Snapshot() 812 if err != nil { 813 t.Fatalf("err: %v", err) 814 } 815 core := NewCoreScheduler(s1, snap) 816 817 // Attempt the GC 818 gc := s1.coreJobEval(structs.CoreJobJobGC, 2000) 819 err = core.Process(gc) 820 if err != nil { 821 t.Fatalf("err: %v", err) 822 } 823 824 // Should still exist 825 ws := memdb.NewWatchSet() 826 out, err := state.JobByID(ws, job.ID) 827 if err != nil { 828 t.Fatalf("err: %v", err) 829 } 830 if out == nil { 831 t.Fatalf("bad: %v", out) 832 } 833 834 outA, err := state.AllocByID(ws, alloc.ID) 835 if err != nil { 836 t.Fatalf("err: %v", err) 837 } 838 if outA == nil { 839 t.Fatalf("bad: %v", outA) 840 } 841 842 outA2, err := state.AllocByID(ws, alloc2.ID) 843 if err != nil { 844 t.Fatalf("err: %v", err) 845 } 846 if outA2 == nil { 847 t.Fatalf("bad: %v", outA2) 848 } 849 850 // Update the second alloc to be terminal 851 alloc2.ClientStatus = structs.AllocClientStatusComplete 852 err = state.UpsertAllocs(1003, []*structs.Allocation{alloc2}) 853 if err != nil { 854 t.Fatalf("err: %v", err) 855 } 856 857 // Create a core scheduler 858 snap, err = state.Snapshot() 859 if err != nil { 860 t.Fatalf("err: %v", err) 861 } 862 core = NewCoreScheduler(s1, snap) 863 864 // Attempt the GC 865 gc = s1.coreJobEval(structs.CoreJobJobGC, 2000) 866 err = core.Process(gc) 867 if err != nil { 868 t.Fatalf("err: %v", err) 869 } 870 871 // Should not still exist 872 out, err = state.JobByID(ws, job.ID) 873 if err != nil { 874 t.Fatalf("err: %v", err) 875 } 876 if out != nil { 877 t.Fatalf("bad: %v", out) 878 } 879 880 outA, err = state.AllocByID(ws, alloc.ID) 881 if err != nil { 882 t.Fatalf("err: %v", err) 883 } 884 if outA != nil { 885 t.Fatalf("bad: %v", outA) 886 } 887 888 outA2, err = state.AllocByID(ws, alloc2.ID) 889 if err != nil { 890 t.Fatalf("err: %v", err) 891 } 892 if outA2 != nil { 893 t.Fatalf("bad: %v", outA2) 894 } 895 } 896 897 // This test ensures that batch jobs are GC'd in one shot, meaning it all 898 // allocs/evals and job or nothing 899 func TestCoreScheduler_JobGC_OneShot(t *testing.T) { 900 s1 := testServer(t, nil) 901 defer s1.Shutdown() 902 testutil.WaitForLeader(t, s1.RPC) 903 904 // COMPAT Remove in 0.6: Reset the FSM time table since we reconcile which sets index 0 905 s1.fsm.timetable.table = make([]TimeTableEntry, 1, 10) 906 907 // Insert job. 908 state := s1.fsm.State() 909 job := mock.Job() 910 job.Type = structs.JobTypeBatch 911 err := state.UpsertJob(1000, job) 912 if err != nil { 913 t.Fatalf("err: %v", err) 914 } 915 916 // Insert two complete evals 917 eval := mock.Eval() 918 eval.JobID = job.ID 919 eval.Status = structs.EvalStatusComplete 920 921 eval2 := mock.Eval() 922 eval2.JobID = job.ID 923 eval2.Status = structs.EvalStatusComplete 924 925 err = state.UpsertEvals(1001, []*structs.Evaluation{eval, eval2}) 926 if err != nil { 927 t.Fatalf("err: %v", err) 928 } 929 930 // Insert one complete alloc and one running on distinct evals 931 alloc := mock.Alloc() 932 alloc.JobID = job.ID 933 alloc.EvalID = eval.ID 934 alloc.DesiredStatus = structs.AllocDesiredStatusStop 935 936 alloc2 := mock.Alloc() 937 alloc2.JobID = job.ID 938 alloc2.EvalID = eval2.ID 939 alloc2.DesiredStatus = structs.AllocDesiredStatusRun 940 941 err = state.UpsertAllocs(1002, []*structs.Allocation{alloc, alloc2}) 942 if err != nil { 943 t.Fatalf("err: %v", err) 944 } 945 946 // Force the jobs state to dead 947 job.Status = structs.JobStatusDead 948 949 // Update the time tables to make this work 950 tt := s1.fsm.TimeTable() 951 tt.Witness(2000, time.Now().UTC().Add(-1*s1.config.JobGCThreshold)) 952 953 // Create a core scheduler 954 snap, err := state.Snapshot() 955 if err != nil { 956 t.Fatalf("err: %v", err) 957 } 958 core := NewCoreScheduler(s1, snap) 959 960 // Attempt the GC 961 gc := s1.coreJobEval(structs.CoreJobJobGC, 2000) 962 err = core.Process(gc) 963 if err != nil { 964 t.Fatalf("err: %v", err) 965 } 966 967 // Should still exist 968 ws := memdb.NewWatchSet() 969 out, err := state.JobByID(ws, job.ID) 970 if err != nil { 971 t.Fatalf("err: %v", err) 972 } 973 if out == nil { 974 t.Fatalf("bad: %v", out) 975 } 976 977 outE, err := state.EvalByID(ws, eval.ID) 978 if err != nil { 979 t.Fatalf("err: %v", err) 980 } 981 if outE == nil { 982 t.Fatalf("bad: %v", outE) 983 } 984 985 outE2, err := state.EvalByID(ws, eval2.ID) 986 if err != nil { 987 t.Fatalf("err: %v", err) 988 } 989 if outE2 == nil { 990 t.Fatalf("bad: %v", outE2) 991 } 992 993 outA, err := state.AllocByID(ws, alloc.ID) 994 if err != nil { 995 t.Fatalf("err: %v", err) 996 } 997 if outA == nil { 998 t.Fatalf("bad: %v", outA) 999 } 1000 outA2, err := state.AllocByID(ws, alloc2.ID) 1001 if err != nil { 1002 t.Fatalf("err: %v", err) 1003 } 1004 if outA2 == nil { 1005 t.Fatalf("bad: %v", outA2) 1006 } 1007 } 1008 1009 func TestCoreScheduler_JobGC_Force(t *testing.T) { 1010 s1 := testServer(t, nil) 1011 defer s1.Shutdown() 1012 testutil.WaitForLeader(t, s1.RPC) 1013 1014 // COMPAT Remove in 0.6: Reset the FSM time table since we reconcile which sets index 0 1015 s1.fsm.timetable.table = make([]TimeTableEntry, 1, 10) 1016 1017 // Insert job. 1018 state := s1.fsm.State() 1019 job := mock.Job() 1020 job.Type = structs.JobTypeBatch 1021 job.Status = structs.JobStatusDead 1022 err := state.UpsertJob(1000, job) 1023 if err != nil { 1024 t.Fatalf("err: %v", err) 1025 } 1026 1027 // Insert a terminal eval 1028 eval := mock.Eval() 1029 eval.JobID = job.ID 1030 eval.Status = structs.EvalStatusComplete 1031 err = state.UpsertEvals(1001, []*structs.Evaluation{eval}) 1032 if err != nil { 1033 t.Fatalf("err: %v", err) 1034 } 1035 1036 // Create a core scheduler 1037 snap, err := state.Snapshot() 1038 if err != nil { 1039 t.Fatalf("err: %v", err) 1040 } 1041 core := NewCoreScheduler(s1, snap) 1042 1043 // Attempt the GC 1044 gc := s1.coreJobEval(structs.CoreJobForceGC, 1002) 1045 err = core.Process(gc) 1046 if err != nil { 1047 t.Fatalf("err: %v", err) 1048 } 1049 1050 // Shouldn't still exist 1051 ws := memdb.NewWatchSet() 1052 out, err := state.JobByID(ws, job.ID) 1053 if err != nil { 1054 t.Fatalf("err: %v", err) 1055 } 1056 if out != nil { 1057 t.Fatalf("bad: %v", out) 1058 } 1059 1060 outE, err := state.EvalByID(ws, eval.ID) 1061 if err != nil { 1062 t.Fatalf("err: %v", err) 1063 } 1064 if outE != nil { 1065 t.Fatalf("bad: %v", outE) 1066 } 1067 } 1068 1069 // This test ensures parameterized and periodic jobs don't get GCd 1070 func TestCoreScheduler_JobGC_NonGCable(t *testing.T) { 1071 s1 := testServer(t, nil) 1072 defer s1.Shutdown() 1073 testutil.WaitForLeader(t, s1.RPC) 1074 1075 // COMPAT Remove in 0.6: Reset the FSM time table since we reconcile which sets index 0 1076 s1.fsm.timetable.table = make([]TimeTableEntry, 1, 10) 1077 1078 // Insert a parameterized job. 1079 state := s1.fsm.State() 1080 job := mock.Job() 1081 job.Type = structs.JobTypeBatch 1082 job.Status = structs.JobStatusRunning 1083 job.ParameterizedJob = &structs.ParameterizedJobConfig{ 1084 Payload: structs.DispatchPayloadRequired, 1085 } 1086 err := state.UpsertJob(1000, job) 1087 if err != nil { 1088 t.Fatalf("err: %v", err) 1089 } 1090 1091 // Insert a periodic job. 1092 job2 := mock.PeriodicJob() 1093 if err := state.UpsertJob(1001, job2); err != nil { 1094 t.Fatalf("err: %v", err) 1095 } 1096 1097 // Create a core scheduler 1098 snap, err := state.Snapshot() 1099 if err != nil { 1100 t.Fatalf("err: %v", err) 1101 } 1102 core := NewCoreScheduler(s1, snap) 1103 1104 // Attempt the GC 1105 gc := s1.coreJobEval(structs.CoreJobForceGC, 1002) 1106 err = core.Process(gc) 1107 if err != nil { 1108 t.Fatalf("err: %v", err) 1109 } 1110 1111 // Should still exist 1112 ws := memdb.NewWatchSet() 1113 out, err := state.JobByID(ws, job.ID) 1114 if err != nil { 1115 t.Fatalf("err: %v", err) 1116 } 1117 if out == nil { 1118 t.Fatalf("bad: %v", out) 1119 } 1120 1121 outE, err := state.JobByID(ws, job2.ID) 1122 if err != nil { 1123 t.Fatalf("err: %v", err) 1124 } 1125 if outE == nil { 1126 t.Fatalf("bad: %v", outE) 1127 } 1128 } 1129 1130 func TestCoreScheduler_PartitionReap(t *testing.T) { 1131 s1 := testServer(t, nil) 1132 defer s1.Shutdown() 1133 testutil.WaitForLeader(t, s1.RPC) 1134 1135 // COMPAT Remove in 0.6: Reset the FSM time table since we reconcile which sets index 0 1136 s1.fsm.timetable.table = make([]TimeTableEntry, 1, 10) 1137 1138 // Create a core scheduler 1139 snap, err := s1.fsm.State().Snapshot() 1140 if err != nil { 1141 t.Fatalf("err: %v", err) 1142 } 1143 core := NewCoreScheduler(s1, snap) 1144 1145 // Set the max ids per reap to something lower. 1146 maxIdsPerReap = 2 1147 1148 evals := []string{"a", "b", "c"} 1149 allocs := []string{"1", "2", "3"} 1150 requests := core.(*CoreScheduler).partitionReap(evals, allocs) 1151 if len(requests) != 3 { 1152 t.Fatalf("Expected 3 requests got: %v", requests) 1153 } 1154 1155 first := requests[0] 1156 if len(first.Allocs) != 2 && len(first.Evals) != 0 { 1157 t.Fatalf("Unexpected first request: %v", first) 1158 } 1159 1160 second := requests[1] 1161 if len(second.Allocs) != 1 && len(second.Evals) != 1 { 1162 t.Fatalf("Unexpected second request: %v", second) 1163 } 1164 1165 third := requests[2] 1166 if len(third.Allocs) != 0 && len(third.Evals) != 2 { 1167 t.Fatalf("Unexpected third request: %v", third) 1168 } 1169 }