github.com/diptanu/nomad@v0.5.7-0.20170516172507-d72e86cbe3d9/nomad/core_sched_test.go (about) 1 package nomad 2 3 import ( 4 "testing" 5 "time" 6 7 memdb "github.com/hashicorp/go-memdb" 8 "github.com/hashicorp/nomad/nomad/mock" 9 "github.com/hashicorp/nomad/nomad/structs" 10 "github.com/hashicorp/nomad/testutil" 11 ) 12 13 func TestCoreScheduler_EvalGC(t *testing.T) { 14 s1 := testServer(t, nil) 15 defer s1.Shutdown() 16 testutil.WaitForLeader(t, s1.RPC) 17 18 // COMPAT Remove in 0.6: Reset the FSM time table since we reconcile which sets index 0 19 s1.fsm.timetable.table = make([]TimeTableEntry, 1, 10) 20 21 // Insert "dead" eval 22 state := s1.fsm.State() 23 eval := mock.Eval() 24 eval.Status = structs.EvalStatusFailed 25 state.UpsertJobSummary(999, mock.JobSummary(eval.JobID)) 26 err := state.UpsertEvals(1000, []*structs.Evaluation{eval}) 27 if err != nil { 28 t.Fatalf("err: %v", err) 29 } 30 31 // Insert "dead" alloc 32 alloc := mock.Alloc() 33 alloc.EvalID = eval.ID 34 alloc.DesiredStatus = structs.AllocDesiredStatusStop 35 alloc.JobID = eval.JobID 36 37 // Insert "lost" alloc 38 alloc2 := mock.Alloc() 39 alloc2.EvalID = eval.ID 40 alloc2.DesiredStatus = structs.AllocDesiredStatusRun 41 alloc2.ClientStatus = structs.AllocClientStatusLost 42 alloc2.JobID = eval.JobID 43 err = state.UpsertAllocs(1001, []*structs.Allocation{alloc, alloc2}) 44 if err != nil { 45 t.Fatalf("err: %v", err) 46 } 47 48 // Update the time tables to make this work 49 tt := s1.fsm.TimeTable() 50 tt.Witness(2000, time.Now().UTC().Add(-1*s1.config.EvalGCThreshold)) 51 52 // Create a core scheduler 53 snap, err := state.Snapshot() 54 if err != nil { 55 t.Fatalf("err: %v", err) 56 } 57 core := NewCoreScheduler(s1, snap) 58 59 // Attempt the GC 60 gc := s1.coreJobEval(structs.CoreJobEvalGC, 2000) 61 err = core.Process(gc) 62 if err != nil { 63 t.Fatalf("err: %v", err) 64 } 65 66 // Should be gone 67 ws := memdb.NewWatchSet() 68 out, err := state.EvalByID(ws, eval.ID) 69 if err != nil { 70 t.Fatalf("err: %v", err) 71 } 72 if out != nil { 73 t.Fatalf("bad: %v", out) 74 } 75 76 outA, err := state.AllocByID(ws, alloc.ID) 77 if err != nil { 78 t.Fatalf("err: %v", err) 79 } 80 if outA != nil { 81 t.Fatalf("bad: %v", outA) 82 } 83 84 outA2, err := state.AllocByID(ws, alloc2.ID) 85 if err != nil { 86 t.Fatalf("err: %v", err) 87 } 88 if outA2 != nil { 89 t.Fatalf("bad: %v", outA2) 90 } 91 } 92 93 // An EvalGC should never reap a batch job that has not been stopped 94 func TestCoreScheduler_EvalGC_Batch(t *testing.T) { 95 s1 := testServer(t, nil) 96 defer s1.Shutdown() 97 testutil.WaitForLeader(t, s1.RPC) 98 99 // COMPAT Remove in 0.6: Reset the FSM time table since we reconcile which sets index 0 100 s1.fsm.timetable.table = make([]TimeTableEntry, 1, 10) 101 102 // Insert a "dead" job 103 state := s1.fsm.State() 104 job := mock.Job() 105 job.Type = structs.JobTypeBatch 106 job.Status = structs.JobStatusDead 107 err := state.UpsertJob(1000, job) 108 if err != nil { 109 t.Fatalf("err: %v", err) 110 } 111 112 // Insert "complete" eval 113 eval := mock.Eval() 114 eval.Status = structs.EvalStatusComplete 115 eval.Type = structs.JobTypeBatch 116 eval.JobID = job.ID 117 err = state.UpsertEvals(1001, []*structs.Evaluation{eval}) 118 if err != nil { 119 t.Fatalf("err: %v", err) 120 } 121 122 // Insert "failed" alloc 123 alloc := mock.Alloc() 124 alloc.JobID = job.ID 125 alloc.EvalID = eval.ID 126 alloc.DesiredStatus = structs.AllocDesiredStatusStop 127 128 // Insert "lost" alloc 129 alloc2 := mock.Alloc() 130 alloc2.JobID = job.ID 131 alloc2.EvalID = eval.ID 132 alloc2.DesiredStatus = structs.AllocDesiredStatusRun 133 alloc2.ClientStatus = structs.AllocClientStatusLost 134 135 err = state.UpsertAllocs(1002, []*structs.Allocation{alloc, alloc2}) 136 if err != nil { 137 t.Fatalf("err: %v", err) 138 } 139 140 // Update the time tables to make this work 141 tt := s1.fsm.TimeTable() 142 tt.Witness(2000, time.Now().UTC().Add(-1*s1.config.EvalGCThreshold)) 143 144 // Create a core scheduler 145 snap, err := state.Snapshot() 146 if err != nil { 147 t.Fatalf("err: %v", err) 148 } 149 core := NewCoreScheduler(s1, snap) 150 151 // Attempt the GC 152 gc := s1.coreJobEval(structs.CoreJobEvalGC, 2000) 153 err = core.Process(gc) 154 if err != nil { 155 t.Fatalf("err: %v", err) 156 } 157 158 // Nothing should be gone 159 ws := memdb.NewWatchSet() 160 out, err := state.EvalByID(ws, eval.ID) 161 if err != nil { 162 t.Fatalf("err: %v", err) 163 } 164 if out == nil { 165 t.Fatalf("bad: %v", out) 166 } 167 168 outA, err := state.AllocByID(ws, alloc.ID) 169 if err != nil { 170 t.Fatalf("err: %v", err) 171 } 172 if outA == nil { 173 t.Fatalf("bad: %v", outA) 174 } 175 176 outA2, err := state.AllocByID(ws, alloc2.ID) 177 if err != nil { 178 t.Fatalf("err: %v", err) 179 } 180 if outA2 == nil { 181 t.Fatalf("bad: %v", outA2) 182 } 183 184 outB, err := state.JobByID(ws, job.ID) 185 if err != nil { 186 t.Fatalf("err: %v", err) 187 } 188 if outB == nil { 189 t.Fatalf("bad: %v", outB) 190 } 191 } 192 193 // An EvalGC should reap a batch job that has been stopped 194 func TestCoreScheduler_EvalGC_BatchStopped(t *testing.T) { 195 s1 := testServer(t, nil) 196 defer s1.Shutdown() 197 testutil.WaitForLeader(t, s1.RPC) 198 199 // COMPAT Remove in 0.6: Reset the FSM time table since we reconcile which sets index 0 200 s1.fsm.timetable.table = make([]TimeTableEntry, 1, 10) 201 202 // Create a "dead" job 203 state := s1.fsm.State() 204 job := mock.Job() 205 job.Type = structs.JobTypeBatch 206 job.Status = structs.JobStatusDead 207 208 // Insert "complete" eval 209 eval := mock.Eval() 210 eval.Status = structs.EvalStatusComplete 211 eval.Type = structs.JobTypeBatch 212 eval.JobID = job.ID 213 err := state.UpsertEvals(1001, []*structs.Evaluation{eval}) 214 if err != nil { 215 t.Fatalf("err: %v", err) 216 } 217 218 // Insert "failed" alloc 219 alloc := mock.Alloc() 220 alloc.JobID = job.ID 221 alloc.EvalID = eval.ID 222 alloc.DesiredStatus = structs.AllocDesiredStatusStop 223 224 // Insert "lost" alloc 225 alloc2 := mock.Alloc() 226 alloc2.JobID = job.ID 227 alloc2.EvalID = eval.ID 228 alloc2.DesiredStatus = structs.AllocDesiredStatusRun 229 alloc2.ClientStatus = structs.AllocClientStatusLost 230 231 err = state.UpsertAllocs(1002, []*structs.Allocation{alloc, alloc2}) 232 if err != nil { 233 t.Fatalf("err: %v", err) 234 } 235 236 // Update the time tables to make this work 237 tt := s1.fsm.TimeTable() 238 tt.Witness(2000, time.Now().UTC().Add(-1*s1.config.EvalGCThreshold)) 239 240 // Create a core scheduler 241 snap, err := state.Snapshot() 242 if err != nil { 243 t.Fatalf("err: %v", err) 244 } 245 core := NewCoreScheduler(s1, snap) 246 247 // Attempt the GC 248 gc := s1.coreJobEval(structs.CoreJobEvalGC, 2000) 249 err = core.Process(gc) 250 if err != nil { 251 t.Fatalf("err: %v", err) 252 } 253 254 // Everything should be gone 255 ws := memdb.NewWatchSet() 256 out, err := state.EvalByID(ws, eval.ID) 257 if err != nil { 258 t.Fatalf("err: %v", err) 259 } 260 if out != nil { 261 t.Fatalf("bad: %v", out) 262 } 263 264 outA, err := state.AllocByID(ws, alloc.ID) 265 if err != nil { 266 t.Fatalf("err: %v", err) 267 } 268 if outA != nil { 269 t.Fatalf("bad: %v", outA) 270 } 271 272 outA2, err := state.AllocByID(ws, alloc2.ID) 273 if err != nil { 274 t.Fatalf("err: %v", err) 275 } 276 if outA2 != nil { 277 t.Fatalf("bad: %v", outA2) 278 } 279 } 280 281 func TestCoreScheduler_EvalGC_Partial(t *testing.T) { 282 s1 := testServer(t, nil) 283 defer s1.Shutdown() 284 testutil.WaitForLeader(t, s1.RPC) 285 286 // COMPAT Remove in 0.6: Reset the FSM time table since we reconcile which sets index 0 287 s1.fsm.timetable.table = make([]TimeTableEntry, 1, 10) 288 289 // Insert "dead" eval 290 state := s1.fsm.State() 291 eval := mock.Eval() 292 eval.Status = structs.EvalStatusComplete 293 state.UpsertJobSummary(999, mock.JobSummary(eval.JobID)) 294 err := state.UpsertEvals(1000, []*structs.Evaluation{eval}) 295 if err != nil { 296 t.Fatalf("err: %v", err) 297 } 298 299 // Insert "dead" alloc 300 alloc := mock.Alloc() 301 alloc.EvalID = eval.ID 302 alloc.DesiredStatus = structs.AllocDesiredStatusStop 303 state.UpsertJobSummary(1001, mock.JobSummary(alloc.JobID)) 304 305 // Insert "lost" alloc 306 alloc2 := mock.Alloc() 307 alloc2.JobID = alloc.JobID 308 alloc2.EvalID = eval.ID 309 alloc2.DesiredStatus = structs.AllocDesiredStatusRun 310 alloc2.ClientStatus = structs.AllocClientStatusLost 311 312 err = state.UpsertAllocs(1002, []*structs.Allocation{alloc, alloc2}) 313 if err != nil { 314 t.Fatalf("err: %v", err) 315 } 316 317 // Insert "running" alloc 318 alloc3 := mock.Alloc() 319 alloc3.EvalID = eval.ID 320 state.UpsertJobSummary(1003, mock.JobSummary(alloc3.JobID)) 321 err = state.UpsertAllocs(1004, []*structs.Allocation{alloc3}) 322 if err != nil { 323 t.Fatalf("err: %v", err) 324 } 325 326 // Update the time tables to make this work 327 tt := s1.fsm.TimeTable() 328 tt.Witness(2000, time.Now().UTC().Add(-1*s1.config.EvalGCThreshold)) 329 330 // Create a core scheduler 331 snap, err := state.Snapshot() 332 if err != nil { 333 t.Fatalf("err: %v", err) 334 } 335 core := NewCoreScheduler(s1, snap) 336 337 // Attempt the GC 338 gc := s1.coreJobEval(structs.CoreJobEvalGC, 2000) 339 err = core.Process(gc) 340 if err != nil { 341 t.Fatalf("err: %v", err) 342 } 343 344 // Should not be gone 345 ws := memdb.NewWatchSet() 346 out, err := state.EvalByID(ws, eval.ID) 347 if err != nil { 348 t.Fatalf("err: %v", err) 349 } 350 if out == nil { 351 t.Fatalf("bad: %v", out) 352 } 353 354 outA, err := state.AllocByID(ws, alloc3.ID) 355 if err != nil { 356 t.Fatalf("err: %v", err) 357 } 358 if outA == nil { 359 t.Fatalf("bad: %v", outA) 360 } 361 362 // Should be gone 363 outB, err := state.AllocByID(ws, alloc.ID) 364 if err != nil { 365 t.Fatalf("err: %v", err) 366 } 367 if outB != nil { 368 t.Fatalf("bad: %v", outB) 369 } 370 371 outC, err := state.AllocByID(ws, alloc2.ID) 372 if err != nil { 373 t.Fatalf("err: %v", err) 374 } 375 if outC != nil { 376 t.Fatalf("bad: %v", outC) 377 } 378 } 379 380 func TestCoreScheduler_EvalGC_Force(t *testing.T) { 381 s1 := testServer(t, nil) 382 defer s1.Shutdown() 383 testutil.WaitForLeader(t, s1.RPC) 384 385 // COMPAT Remove in 0.6: Reset the FSM time table since we reconcile which sets index 0 386 s1.fsm.timetable.table = make([]TimeTableEntry, 1, 10) 387 388 // Insert "dead" eval 389 state := s1.fsm.State() 390 eval := mock.Eval() 391 eval.Status = structs.EvalStatusFailed 392 state.UpsertJobSummary(999, mock.JobSummary(eval.JobID)) 393 err := state.UpsertEvals(1000, []*structs.Evaluation{eval}) 394 if err != nil { 395 t.Fatalf("err: %v", err) 396 } 397 398 // Insert "dead" alloc 399 alloc := mock.Alloc() 400 alloc.EvalID = eval.ID 401 alloc.DesiredStatus = structs.AllocDesiredStatusStop 402 state.UpsertJobSummary(1001, mock.JobSummary(alloc.JobID)) 403 err = state.UpsertAllocs(1002, []*structs.Allocation{alloc}) 404 if err != nil { 405 t.Fatalf("err: %v", err) 406 } 407 408 // Create a core scheduler 409 snap, err := state.Snapshot() 410 if err != nil { 411 t.Fatalf("err: %v", err) 412 } 413 core := NewCoreScheduler(s1, snap) 414 415 // Attempt the GC 416 gc := s1.coreJobEval(structs.CoreJobForceGC, 1002) 417 err = core.Process(gc) 418 if err != nil { 419 t.Fatalf("err: %v", err) 420 } 421 422 // Should be gone 423 ws := memdb.NewWatchSet() 424 out, err := state.EvalByID(ws, eval.ID) 425 if err != nil { 426 t.Fatalf("err: %v", err) 427 } 428 if out != nil { 429 t.Fatalf("bad: %v", out) 430 } 431 432 outA, err := state.AllocByID(ws, alloc.ID) 433 if err != nil { 434 t.Fatalf("err: %v", err) 435 } 436 if outA != nil { 437 t.Fatalf("bad: %v", outA) 438 } 439 } 440 441 func TestCoreScheduler_NodeGC(t *testing.T) { 442 s1 := testServer(t, nil) 443 defer s1.Shutdown() 444 testutil.WaitForLeader(t, s1.RPC) 445 446 // COMPAT Remove in 0.6: Reset the FSM time table since we reconcile which sets index 0 447 s1.fsm.timetable.table = make([]TimeTableEntry, 1, 10) 448 449 // Insert "dead" node 450 state := s1.fsm.State() 451 node := mock.Node() 452 node.Status = structs.NodeStatusDown 453 err := state.UpsertNode(1000, node) 454 if err != nil { 455 t.Fatalf("err: %v", err) 456 } 457 458 // Update the time tables to make this work 459 tt := s1.fsm.TimeTable() 460 tt.Witness(2000, time.Now().UTC().Add(-1*s1.config.NodeGCThreshold)) 461 462 // Create a core scheduler 463 snap, err := state.Snapshot() 464 if err != nil { 465 t.Fatalf("err: %v", err) 466 } 467 core := NewCoreScheduler(s1, snap) 468 469 // Attempt the GC 470 gc := s1.coreJobEval(structs.CoreJobNodeGC, 2000) 471 err = core.Process(gc) 472 if err != nil { 473 t.Fatalf("err: %v", err) 474 } 475 476 // Should be gone 477 ws := memdb.NewWatchSet() 478 out, err := state.NodeByID(ws, node.ID) 479 if err != nil { 480 t.Fatalf("err: %v", err) 481 } 482 if out != nil { 483 t.Fatalf("bad: %v", out) 484 } 485 } 486 487 func TestCoreScheduler_NodeGC_TerminalAllocs(t *testing.T) { 488 s1 := testServer(t, nil) 489 defer s1.Shutdown() 490 testutil.WaitForLeader(t, s1.RPC) 491 492 // COMPAT Remove in 0.6: Reset the FSM time table since we reconcile which sets index 0 493 s1.fsm.timetable.table = make([]TimeTableEntry, 1, 10) 494 495 // Insert "dead" node 496 state := s1.fsm.State() 497 node := mock.Node() 498 node.Status = structs.NodeStatusDown 499 err := state.UpsertNode(1000, node) 500 if err != nil { 501 t.Fatalf("err: %v", err) 502 } 503 504 // Insert a terminal alloc on that node 505 alloc := mock.Alloc() 506 alloc.DesiredStatus = structs.AllocDesiredStatusStop 507 state.UpsertJobSummary(1001, mock.JobSummary(alloc.JobID)) 508 if err := state.UpsertAllocs(1002, []*structs.Allocation{alloc}); err != nil { 509 t.Fatalf("err: %v", err) 510 } 511 512 // Update the time tables to make this work 513 tt := s1.fsm.TimeTable() 514 tt.Witness(2000, time.Now().UTC().Add(-1*s1.config.NodeGCThreshold)) 515 516 // Create a core scheduler 517 snap, err := state.Snapshot() 518 if err != nil { 519 t.Fatalf("err: %v", err) 520 } 521 core := NewCoreScheduler(s1, snap) 522 523 // Attempt the GC 524 gc := s1.coreJobEval(structs.CoreJobNodeGC, 2000) 525 err = core.Process(gc) 526 if err != nil { 527 t.Fatalf("err: %v", err) 528 } 529 530 // Should be gone 531 ws := memdb.NewWatchSet() 532 out, err := state.NodeByID(ws, node.ID) 533 if err != nil { 534 t.Fatalf("err: %v", err) 535 } 536 if out != nil { 537 t.Fatalf("bad: %v", out) 538 } 539 } 540 541 func TestCoreScheduler_NodeGC_RunningAllocs(t *testing.T) { 542 s1 := testServer(t, nil) 543 defer s1.Shutdown() 544 testutil.WaitForLeader(t, s1.RPC) 545 546 // COMPAT Remove in 0.6: Reset the FSM time table since we reconcile which sets index 0 547 s1.fsm.timetable.table = make([]TimeTableEntry, 1, 10) 548 549 // Insert "dead" node 550 state := s1.fsm.State() 551 node := mock.Node() 552 node.Status = structs.NodeStatusDown 553 err := state.UpsertNode(1000, node) 554 if err != nil { 555 t.Fatalf("err: %v", err) 556 } 557 558 // Insert a running alloc on that node 559 alloc := mock.Alloc() 560 alloc.NodeID = node.ID 561 alloc.DesiredStatus = structs.AllocDesiredStatusRun 562 alloc.ClientStatus = structs.AllocClientStatusRunning 563 state.UpsertJobSummary(1001, mock.JobSummary(alloc.JobID)) 564 if err := state.UpsertAllocs(1002, []*structs.Allocation{alloc}); err != nil { 565 t.Fatalf("err: %v", err) 566 } 567 568 // Update the time tables to make this work 569 tt := s1.fsm.TimeTable() 570 tt.Witness(2000, time.Now().UTC().Add(-1*s1.config.NodeGCThreshold)) 571 572 // Create a core scheduler 573 snap, err := state.Snapshot() 574 if err != nil { 575 t.Fatalf("err: %v", err) 576 } 577 core := NewCoreScheduler(s1, snap) 578 579 // Attempt the GC 580 gc := s1.coreJobEval(structs.CoreJobNodeGC, 2000) 581 err = core.Process(gc) 582 if err != nil { 583 t.Fatalf("err: %v", err) 584 } 585 586 // Should still be here 587 ws := memdb.NewWatchSet() 588 out, err := state.NodeByID(ws, node.ID) 589 if err != nil { 590 t.Fatalf("err: %v", err) 591 } 592 if out == nil { 593 t.Fatalf("bad: %v", out) 594 } 595 } 596 597 func TestCoreScheduler_NodeGC_Force(t *testing.T) { 598 s1 := testServer(t, nil) 599 defer s1.Shutdown() 600 testutil.WaitForLeader(t, s1.RPC) 601 602 // COMPAT Remove in 0.6: Reset the FSM time table since we reconcile which sets index 0 603 s1.fsm.timetable.table = make([]TimeTableEntry, 1, 10) 604 605 // Insert "dead" node 606 state := s1.fsm.State() 607 node := mock.Node() 608 node.Status = structs.NodeStatusDown 609 err := state.UpsertNode(1000, node) 610 if err != nil { 611 t.Fatalf("err: %v", err) 612 } 613 614 // Create a core scheduler 615 snap, err := state.Snapshot() 616 if err != nil { 617 t.Fatalf("err: %v", err) 618 } 619 core := NewCoreScheduler(s1, snap) 620 621 // Attempt the GC 622 gc := s1.coreJobEval(structs.CoreJobForceGC, 1000) 623 err = core.Process(gc) 624 if err != nil { 625 t.Fatalf("err: %v", err) 626 } 627 628 // Should be gone 629 ws := memdb.NewWatchSet() 630 out, err := state.NodeByID(ws, node.ID) 631 if err != nil { 632 t.Fatalf("err: %v", err) 633 } 634 if out != nil { 635 t.Fatalf("bad: %v", out) 636 } 637 } 638 639 func TestCoreScheduler_JobGC_OutstandingEvals(t *testing.T) { 640 s1 := testServer(t, nil) 641 defer s1.Shutdown() 642 testutil.WaitForLeader(t, s1.RPC) 643 644 // COMPAT Remove in 0.6: Reset the FSM time table since we reconcile which sets index 0 645 s1.fsm.timetable.table = make([]TimeTableEntry, 1, 10) 646 647 // Insert job. 648 state := s1.fsm.State() 649 job := mock.Job() 650 job.Type = structs.JobTypeBatch 651 job.Status = structs.JobStatusDead 652 err := state.UpsertJob(1000, job) 653 if err != nil { 654 t.Fatalf("err: %v", err) 655 } 656 657 // Insert two evals, one terminal and one not 658 eval := mock.Eval() 659 eval.JobID = job.ID 660 eval.Status = structs.EvalStatusComplete 661 662 eval2 := mock.Eval() 663 eval2.JobID = job.ID 664 eval2.Status = structs.EvalStatusPending 665 err = state.UpsertEvals(1001, []*structs.Evaluation{eval, eval2}) 666 if err != nil { 667 t.Fatalf("err: %v", err) 668 } 669 670 // Update the time tables to make this work 671 tt := s1.fsm.TimeTable() 672 tt.Witness(2000, time.Now().UTC().Add(-1*s1.config.JobGCThreshold)) 673 674 // Create a core scheduler 675 snap, err := state.Snapshot() 676 if err != nil { 677 t.Fatalf("err: %v", err) 678 } 679 core := NewCoreScheduler(s1, snap) 680 681 // Attempt the GC 682 gc := s1.coreJobEval(structs.CoreJobJobGC, 2000) 683 err = core.Process(gc) 684 if err != nil { 685 t.Fatalf("err: %v", err) 686 } 687 688 // Should still exist 689 ws := memdb.NewWatchSet() 690 out, err := state.JobByID(ws, job.ID) 691 if err != nil { 692 t.Fatalf("err: %v", err) 693 } 694 if out == nil { 695 t.Fatalf("bad: %v", out) 696 } 697 698 outE, err := state.EvalByID(ws, eval.ID) 699 if err != nil { 700 t.Fatalf("err: %v", err) 701 } 702 if outE == nil { 703 t.Fatalf("bad: %v", outE) 704 } 705 706 outE2, err := state.EvalByID(ws, eval2.ID) 707 if err != nil { 708 t.Fatalf("err: %v", err) 709 } 710 if outE2 == nil { 711 t.Fatalf("bad: %v", outE2) 712 } 713 714 // Update the second eval to be terminal 715 eval2.Status = structs.EvalStatusComplete 716 err = state.UpsertEvals(1003, []*structs.Evaluation{eval2}) 717 if err != nil { 718 t.Fatalf("err: %v", err) 719 } 720 721 // Create a core scheduler 722 snap, err = state.Snapshot() 723 if err != nil { 724 t.Fatalf("err: %v", err) 725 } 726 core = NewCoreScheduler(s1, snap) 727 728 // Attempt the GC 729 gc = s1.coreJobEval(structs.CoreJobJobGC, 2000) 730 err = core.Process(gc) 731 if err != nil { 732 t.Fatalf("err: %v", err) 733 } 734 735 // Should not still exist 736 out, err = state.JobByID(ws, job.ID) 737 if err != nil { 738 t.Fatalf("err: %v", err) 739 } 740 if out != nil { 741 t.Fatalf("bad: %v", out) 742 } 743 744 outE, err = state.EvalByID(ws, eval.ID) 745 if err != nil { 746 t.Fatalf("err: %v", err) 747 } 748 if outE != nil { 749 t.Fatalf("bad: %v", outE) 750 } 751 752 outE2, err = state.EvalByID(ws, eval2.ID) 753 if err != nil { 754 t.Fatalf("err: %v", err) 755 } 756 if outE2 != nil { 757 t.Fatalf("bad: %v", outE2) 758 } 759 } 760 761 func TestCoreScheduler_JobGC_OutstandingAllocs(t *testing.T) { 762 s1 := testServer(t, nil) 763 defer s1.Shutdown() 764 testutil.WaitForLeader(t, s1.RPC) 765 766 // COMPAT Remove in 0.6: Reset the FSM time table since we reconcile which sets index 0 767 s1.fsm.timetable.table = make([]TimeTableEntry, 1, 10) 768 769 // Insert job. 770 state := s1.fsm.State() 771 job := mock.Job() 772 job.Type = structs.JobTypeBatch 773 job.Status = structs.JobStatusDead 774 err := state.UpsertJob(1000, job) 775 if err != nil { 776 t.Fatalf("err: %v", err) 777 } 778 779 // Insert an eval 780 eval := mock.Eval() 781 eval.JobID = job.ID 782 eval.Status = structs.EvalStatusComplete 783 err = state.UpsertEvals(1001, []*structs.Evaluation{eval}) 784 if err != nil { 785 t.Fatalf("err: %v", err) 786 } 787 788 // Insert two allocs, one terminal and one not 789 alloc := mock.Alloc() 790 alloc.JobID = job.ID 791 alloc.EvalID = eval.ID 792 alloc.DesiredStatus = structs.AllocDesiredStatusRun 793 alloc.ClientStatus = structs.AllocClientStatusComplete 794 795 alloc2 := mock.Alloc() 796 alloc2.JobID = job.ID 797 alloc2.EvalID = eval.ID 798 alloc2.DesiredStatus = structs.AllocDesiredStatusRun 799 alloc2.ClientStatus = structs.AllocClientStatusRunning 800 801 err = state.UpsertAllocs(1002, []*structs.Allocation{alloc, alloc2}) 802 if err != nil { 803 t.Fatalf("err: %v", err) 804 } 805 806 // Update the time tables to make this work 807 tt := s1.fsm.TimeTable() 808 tt.Witness(2000, time.Now().UTC().Add(-1*s1.config.JobGCThreshold)) 809 810 // Create a core scheduler 811 snap, err := state.Snapshot() 812 if err != nil { 813 t.Fatalf("err: %v", err) 814 } 815 core := NewCoreScheduler(s1, snap) 816 817 // Attempt the GC 818 gc := s1.coreJobEval(structs.CoreJobJobGC, 2000) 819 err = core.Process(gc) 820 if err != nil { 821 t.Fatalf("err: %v", err) 822 } 823 824 // Should still exist 825 ws := memdb.NewWatchSet() 826 out, err := state.JobByID(ws, job.ID) 827 if err != nil { 828 t.Fatalf("err: %v", err) 829 } 830 if out == nil { 831 t.Fatalf("bad: %v", out) 832 } 833 834 outA, err := state.AllocByID(ws, alloc.ID) 835 if err != nil { 836 t.Fatalf("err: %v", err) 837 } 838 if outA == nil { 839 t.Fatalf("bad: %v", outA) 840 } 841 842 outA2, err := state.AllocByID(ws, alloc2.ID) 843 if err != nil { 844 t.Fatalf("err: %v", err) 845 } 846 if outA2 == nil { 847 t.Fatalf("bad: %v", outA2) 848 } 849 850 // Update the second alloc to be terminal 851 alloc2.ClientStatus = structs.AllocClientStatusComplete 852 err = state.UpsertAllocs(1003, []*structs.Allocation{alloc2}) 853 if err != nil { 854 t.Fatalf("err: %v", err) 855 } 856 857 // Create a core scheduler 858 snap, err = state.Snapshot() 859 if err != nil { 860 t.Fatalf("err: %v", err) 861 } 862 core = NewCoreScheduler(s1, snap) 863 864 // Attempt the GC 865 gc = s1.coreJobEval(structs.CoreJobJobGC, 2000) 866 err = core.Process(gc) 867 if err != nil { 868 t.Fatalf("err: %v", err) 869 } 870 871 // Should not still exist 872 out, err = state.JobByID(ws, job.ID) 873 if err != nil { 874 t.Fatalf("err: %v", err) 875 } 876 if out != nil { 877 t.Fatalf("bad: %v", out) 878 } 879 880 outA, err = state.AllocByID(ws, alloc.ID) 881 if err != nil { 882 t.Fatalf("err: %v", err) 883 } 884 if outA != nil { 885 t.Fatalf("bad: %v", outA) 886 } 887 888 outA2, err = state.AllocByID(ws, alloc2.ID) 889 if err != nil { 890 t.Fatalf("err: %v", err) 891 } 892 if outA2 != nil { 893 t.Fatalf("bad: %v", outA2) 894 } 895 } 896 897 // This test ensures that batch jobs are GC'd in one shot, meaning it all 898 // allocs/evals and job or nothing 899 func TestCoreScheduler_JobGC_OneShot(t *testing.T) { 900 s1 := testServer(t, nil) 901 defer s1.Shutdown() 902 testutil.WaitForLeader(t, s1.RPC) 903 904 // COMPAT Remove in 0.6: Reset the FSM time table since we reconcile which sets index 0 905 s1.fsm.timetable.table = make([]TimeTableEntry, 1, 10) 906 907 // Insert job. 908 state := s1.fsm.State() 909 job := mock.Job() 910 job.Type = structs.JobTypeBatch 911 err := state.UpsertJob(1000, job) 912 if err != nil { 913 t.Fatalf("err: %v", err) 914 } 915 916 // Insert two complete evals 917 eval := mock.Eval() 918 eval.JobID = job.ID 919 eval.Status = structs.EvalStatusComplete 920 921 eval2 := mock.Eval() 922 eval2.JobID = job.ID 923 eval2.Status = structs.EvalStatusComplete 924 925 err = state.UpsertEvals(1001, []*structs.Evaluation{eval, eval2}) 926 if err != nil { 927 t.Fatalf("err: %v", err) 928 } 929 930 // Insert one complete alloc and one running on distinct evals 931 alloc := mock.Alloc() 932 alloc.JobID = job.ID 933 alloc.EvalID = eval.ID 934 alloc.DesiredStatus = structs.AllocDesiredStatusStop 935 936 alloc2 := mock.Alloc() 937 alloc2.JobID = job.ID 938 alloc2.EvalID = eval2.ID 939 alloc2.DesiredStatus = structs.AllocDesiredStatusRun 940 941 err = state.UpsertAllocs(1002, []*structs.Allocation{alloc, alloc2}) 942 if err != nil { 943 t.Fatalf("err: %v", err) 944 } 945 946 // Force the jobs state to dead 947 job.Status = structs.JobStatusDead 948 949 // Update the time tables to make this work 950 tt := s1.fsm.TimeTable() 951 tt.Witness(2000, time.Now().UTC().Add(-1*s1.config.JobGCThreshold)) 952 953 // Create a core scheduler 954 snap, err := state.Snapshot() 955 if err != nil { 956 t.Fatalf("err: %v", err) 957 } 958 core := NewCoreScheduler(s1, snap) 959 960 // Attempt the GC 961 gc := s1.coreJobEval(structs.CoreJobJobGC, 2000) 962 err = core.Process(gc) 963 if err != nil { 964 t.Fatalf("err: %v", err) 965 } 966 967 // Should still exist 968 ws := memdb.NewWatchSet() 969 out, err := state.JobByID(ws, job.ID) 970 if err != nil { 971 t.Fatalf("err: %v", err) 972 } 973 if out == nil { 974 t.Fatalf("bad: %v", out) 975 } 976 977 outE, err := state.EvalByID(ws, eval.ID) 978 if err != nil { 979 t.Fatalf("err: %v", err) 980 } 981 if outE == nil { 982 t.Fatalf("bad: %v", outE) 983 } 984 985 outE2, err := state.EvalByID(ws, eval2.ID) 986 if err != nil { 987 t.Fatalf("err: %v", err) 988 } 989 if outE2 == nil { 990 t.Fatalf("bad: %v", outE2) 991 } 992 993 outA, err := state.AllocByID(ws, alloc.ID) 994 if err != nil { 995 t.Fatalf("err: %v", err) 996 } 997 if outA == nil { 998 t.Fatalf("bad: %v", outA) 999 } 1000 outA2, err := state.AllocByID(ws, alloc2.ID) 1001 if err != nil { 1002 t.Fatalf("err: %v", err) 1003 } 1004 if outA2 == nil { 1005 t.Fatalf("bad: %v", outA2) 1006 } 1007 } 1008 1009 // This test ensures that stopped jobs are GCd 1010 func TestCoreScheduler_JobGC_Stopped(t *testing.T) { 1011 s1 := testServer(t, nil) 1012 defer s1.Shutdown() 1013 testutil.WaitForLeader(t, s1.RPC) 1014 1015 // COMPAT Remove in 0.6: Reset the FSM time table since we reconcile which sets index 0 1016 s1.fsm.timetable.table = make([]TimeTableEntry, 1, 10) 1017 1018 // Insert job. 1019 state := s1.fsm.State() 1020 job := mock.Job() 1021 //job.Status = structs.JobStatusDead 1022 job.Stop = true 1023 err := state.UpsertJob(1000, job) 1024 if err != nil { 1025 t.Fatalf("err: %v", err) 1026 } 1027 1028 // Insert two complete evals 1029 eval := mock.Eval() 1030 eval.JobID = job.ID 1031 eval.Status = structs.EvalStatusComplete 1032 1033 eval2 := mock.Eval() 1034 eval2.JobID = job.ID 1035 eval2.Status = structs.EvalStatusComplete 1036 1037 err = state.UpsertEvals(1001, []*structs.Evaluation{eval, eval2}) 1038 if err != nil { 1039 t.Fatalf("err: %v", err) 1040 } 1041 1042 // Insert one complete alloc 1043 alloc := mock.Alloc() 1044 alloc.JobID = job.ID 1045 alloc.EvalID = eval.ID 1046 alloc.DesiredStatus = structs.AllocDesiredStatusStop 1047 1048 err = state.UpsertAllocs(1002, []*structs.Allocation{alloc}) 1049 if err != nil { 1050 t.Fatalf("err: %v", err) 1051 } 1052 1053 // Update the time tables to make this work 1054 tt := s1.fsm.TimeTable() 1055 tt.Witness(2000, time.Now().UTC().Add(-1*s1.config.JobGCThreshold)) 1056 1057 // Create a core scheduler 1058 snap, err := state.Snapshot() 1059 if err != nil { 1060 t.Fatalf("err: %v", err) 1061 } 1062 core := NewCoreScheduler(s1, snap) 1063 1064 // Attempt the GC 1065 gc := s1.coreJobEval(structs.CoreJobJobGC, 2000) 1066 err = core.Process(gc) 1067 if err != nil { 1068 t.Fatalf("err: %v", err) 1069 } 1070 1071 // Shouldn't still exist 1072 ws := memdb.NewWatchSet() 1073 out, err := state.JobByID(ws, job.ID) 1074 if err != nil { 1075 t.Fatalf("err: %v", err) 1076 } 1077 if out != nil { 1078 t.Fatalf("bad: %v", out) 1079 } 1080 1081 outE, err := state.EvalByID(ws, eval.ID) 1082 if err != nil { 1083 t.Fatalf("err: %v", err) 1084 } 1085 if outE != nil { 1086 t.Fatalf("bad: %v", outE) 1087 } 1088 1089 outE2, err := state.EvalByID(ws, eval2.ID) 1090 if err != nil { 1091 t.Fatalf("err: %v", err) 1092 } 1093 if outE2 != nil { 1094 t.Fatalf("bad: %v", outE2) 1095 } 1096 1097 outA, err := state.AllocByID(ws, alloc.ID) 1098 if err != nil { 1099 t.Fatalf("err: %v", err) 1100 } 1101 if outA != nil { 1102 t.Fatalf("bad: %v", outA) 1103 } 1104 } 1105 1106 func TestCoreScheduler_JobGC_Force(t *testing.T) { 1107 s1 := testServer(t, nil) 1108 defer s1.Shutdown() 1109 testutil.WaitForLeader(t, s1.RPC) 1110 1111 // COMPAT Remove in 0.6: Reset the FSM time table since we reconcile which sets index 0 1112 s1.fsm.timetable.table = make([]TimeTableEntry, 1, 10) 1113 1114 // Insert job. 1115 state := s1.fsm.State() 1116 job := mock.Job() 1117 job.Type = structs.JobTypeBatch 1118 job.Status = structs.JobStatusDead 1119 err := state.UpsertJob(1000, job) 1120 if err != nil { 1121 t.Fatalf("err: %v", err) 1122 } 1123 1124 // Insert a terminal eval 1125 eval := mock.Eval() 1126 eval.JobID = job.ID 1127 eval.Status = structs.EvalStatusComplete 1128 err = state.UpsertEvals(1001, []*structs.Evaluation{eval}) 1129 if err != nil { 1130 t.Fatalf("err: %v", err) 1131 } 1132 1133 // Create a core scheduler 1134 snap, err := state.Snapshot() 1135 if err != nil { 1136 t.Fatalf("err: %v", err) 1137 } 1138 core := NewCoreScheduler(s1, snap) 1139 1140 // Attempt the GC 1141 gc := s1.coreJobEval(structs.CoreJobForceGC, 1002) 1142 err = core.Process(gc) 1143 if err != nil { 1144 t.Fatalf("err: %v", err) 1145 } 1146 1147 // Shouldn't still exist 1148 ws := memdb.NewWatchSet() 1149 out, err := state.JobByID(ws, job.ID) 1150 if err != nil { 1151 t.Fatalf("err: %v", err) 1152 } 1153 if out != nil { 1154 t.Fatalf("bad: %v", out) 1155 } 1156 1157 outE, err := state.EvalByID(ws, eval.ID) 1158 if err != nil { 1159 t.Fatalf("err: %v", err) 1160 } 1161 if outE != nil { 1162 t.Fatalf("bad: %v", outE) 1163 } 1164 } 1165 1166 // This test ensures parameterized jobs only get gc'd when stopped 1167 func TestCoreScheduler_JobGC_Parameterized(t *testing.T) { 1168 s1 := testServer(t, nil) 1169 defer s1.Shutdown() 1170 testutil.WaitForLeader(t, s1.RPC) 1171 1172 // COMPAT Remove in 0.6: Reset the FSM time table since we reconcile which sets index 0 1173 s1.fsm.timetable.table = make([]TimeTableEntry, 1, 10) 1174 1175 // Insert a parameterized job. 1176 state := s1.fsm.State() 1177 job := mock.Job() 1178 job.Type = structs.JobTypeBatch 1179 job.Status = structs.JobStatusRunning 1180 job.ParameterizedJob = &structs.ParameterizedJobConfig{ 1181 Payload: structs.DispatchPayloadRequired, 1182 } 1183 err := state.UpsertJob(1000, job) 1184 if err != nil { 1185 t.Fatalf("err: %v", err) 1186 } 1187 1188 // Create a core scheduler 1189 snap, err := state.Snapshot() 1190 if err != nil { 1191 t.Fatalf("err: %v", err) 1192 } 1193 core := NewCoreScheduler(s1, snap) 1194 1195 // Attempt the GC 1196 gc := s1.coreJobEval(structs.CoreJobForceGC, 1002) 1197 err = core.Process(gc) 1198 if err != nil { 1199 t.Fatalf("err: %v", err) 1200 } 1201 1202 // Should still exist 1203 ws := memdb.NewWatchSet() 1204 out, err := state.JobByID(ws, job.ID) 1205 if err != nil { 1206 t.Fatalf("err: %v", err) 1207 } 1208 if out == nil { 1209 t.Fatalf("bad: %v", out) 1210 } 1211 1212 // Mark the job as stopped and try again 1213 job2 := job.Copy() 1214 job2.Stop = true 1215 err = state.UpsertJob(2000, job2) 1216 if err != nil { 1217 t.Fatalf("err: %v", err) 1218 } 1219 1220 // Create a core scheduler 1221 snap, err = state.Snapshot() 1222 if err != nil { 1223 t.Fatalf("err: %v", err) 1224 } 1225 core = NewCoreScheduler(s1, snap) 1226 1227 // Attempt the GC 1228 gc = s1.coreJobEval(structs.CoreJobForceGC, 2002) 1229 err = core.Process(gc) 1230 if err != nil { 1231 t.Fatalf("err: %v", err) 1232 } 1233 1234 // Should not exist 1235 out, err = state.JobByID(ws, job.ID) 1236 if err != nil { 1237 t.Fatalf("err: %v", err) 1238 } 1239 if out != nil { 1240 t.Fatalf("bad: %+v", out) 1241 } 1242 } 1243 1244 // This test ensures periodic jobs don't get GCd til they are stopped 1245 func TestCoreScheduler_JobGC_Periodic(t *testing.T) { 1246 1247 s1 := testServer(t, nil) 1248 defer s1.Shutdown() 1249 testutil.WaitForLeader(t, s1.RPC) 1250 1251 // COMPAT Remove in 0.6: Reset the FSM time table since we reconcile which sets index 0 1252 s1.fsm.timetable.table = make([]TimeTableEntry, 1, 10) 1253 1254 // Insert a parameterized job. 1255 state := s1.fsm.State() 1256 job := mock.PeriodicJob() 1257 err := state.UpsertJob(1000, job) 1258 if err != nil { 1259 t.Fatalf("err: %v", err) 1260 } 1261 1262 // Create a core scheduler 1263 snap, err := state.Snapshot() 1264 if err != nil { 1265 t.Fatalf("err: %v", err) 1266 } 1267 core := NewCoreScheduler(s1, snap) 1268 1269 // Attempt the GC 1270 gc := s1.coreJobEval(structs.CoreJobForceGC, 1002) 1271 err = core.Process(gc) 1272 if err != nil { 1273 t.Fatalf("err: %v", err) 1274 } 1275 1276 // Should still exist 1277 ws := memdb.NewWatchSet() 1278 out, err := state.JobByID(ws, job.ID) 1279 if err != nil { 1280 t.Fatalf("err: %v", err) 1281 } 1282 if out == nil { 1283 t.Fatalf("bad: %v", out) 1284 } 1285 1286 // Mark the job as stopped and try again 1287 job2 := job.Copy() 1288 job2.Stop = true 1289 err = state.UpsertJob(2000, job2) 1290 if err != nil { 1291 t.Fatalf("err: %v", err) 1292 } 1293 1294 // Create a core scheduler 1295 snap, err = state.Snapshot() 1296 if err != nil { 1297 t.Fatalf("err: %v", err) 1298 } 1299 core = NewCoreScheduler(s1, snap) 1300 1301 // Attempt the GC 1302 gc = s1.coreJobEval(structs.CoreJobForceGC, 2002) 1303 err = core.Process(gc) 1304 if err != nil { 1305 t.Fatalf("err: %v", err) 1306 } 1307 1308 // Should not exist 1309 out, err = state.JobByID(ws, job.ID) 1310 if err != nil { 1311 t.Fatalf("err: %v", err) 1312 } 1313 if out != nil { 1314 t.Fatalf("bad: %+v", out) 1315 } 1316 } 1317 1318 func TestCoreScheduler_PartitionReap(t *testing.T) { 1319 s1 := testServer(t, nil) 1320 defer s1.Shutdown() 1321 testutil.WaitForLeader(t, s1.RPC) 1322 1323 // COMPAT Remove in 0.6: Reset the FSM time table since we reconcile which sets index 0 1324 s1.fsm.timetable.table = make([]TimeTableEntry, 1, 10) 1325 1326 // Create a core scheduler 1327 snap, err := s1.fsm.State().Snapshot() 1328 if err != nil { 1329 t.Fatalf("err: %v", err) 1330 } 1331 core := NewCoreScheduler(s1, snap) 1332 1333 // Set the max ids per reap to something lower. 1334 maxIdsPerReap = 2 1335 1336 evals := []string{"a", "b", "c"} 1337 allocs := []string{"1", "2", "3"} 1338 requests := core.(*CoreScheduler).partitionReap(evals, allocs) 1339 if len(requests) != 3 { 1340 t.Fatalf("Expected 3 requests got: %v", requests) 1341 } 1342 1343 first := requests[0] 1344 if len(first.Allocs) != 2 && len(first.Evals) != 0 { 1345 t.Fatalf("Unexpected first request: %v", first) 1346 } 1347 1348 second := requests[1] 1349 if len(second.Allocs) != 1 && len(second.Evals) != 1 { 1350 t.Fatalf("Unexpected second request: %v", second) 1351 } 1352 1353 third := requests[2] 1354 if len(third.Allocs) != 0 && len(third.Evals) != 2 { 1355 t.Fatalf("Unexpected third request: %v", third) 1356 } 1357 }