github.com/anuvu/nomad@v0.8.7-atom1/scheduler/generic_sched_test.go (about) 1 package scheduler 2 3 import ( 4 "fmt" 5 "reflect" 6 "sort" 7 "testing" 8 "time" 9 10 memdb "github.com/hashicorp/go-memdb" 11 "github.com/hashicorp/nomad/helper" 12 "github.com/hashicorp/nomad/helper/uuid" 13 "github.com/hashicorp/nomad/nomad/mock" 14 "github.com/hashicorp/nomad/nomad/structs" 15 "github.com/stretchr/testify/assert" 16 "github.com/stretchr/testify/require" 17 ) 18 19 func TestServiceSched_JobRegister(t *testing.T) { 20 h := NewHarness(t) 21 22 // Create some nodes 23 for i := 0; i < 10; i++ { 24 node := mock.Node() 25 noErr(t, h.State.UpsertNode(h.NextIndex(), node)) 26 } 27 28 // Create a job 29 job := mock.Job() 30 noErr(t, h.State.UpsertJob(h.NextIndex(), job)) 31 32 // Create a mock evaluation to register the job 33 eval := &structs.Evaluation{ 34 Namespace: structs.DefaultNamespace, 35 ID: uuid.Generate(), 36 Priority: job.Priority, 37 TriggeredBy: structs.EvalTriggerJobRegister, 38 JobID: job.ID, 39 Status: structs.EvalStatusPending, 40 } 41 42 noErr(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 43 44 // Process the evaluation 45 err := h.Process(NewServiceScheduler, eval) 46 if err != nil { 47 t.Fatalf("err: %v", err) 48 } 49 50 // Ensure a single plan 51 if len(h.Plans) != 1 { 52 t.Fatalf("bad: %#v", h.Plans) 53 } 54 plan := h.Plans[0] 55 56 // Ensure the plan doesn't have annotations. 57 if plan.Annotations != nil { 58 t.Fatalf("expected no annotations") 59 } 60 61 // Ensure the eval has no spawned blocked eval 62 if len(h.CreateEvals) != 0 { 63 t.Fatalf("bad: %#v", h.CreateEvals) 64 if h.Evals[0].BlockedEval != "" { 65 t.Fatalf("bad: %#v", h.Evals[0]) 66 } 67 } 68 69 // Ensure the plan allocated 70 var planned []*structs.Allocation 71 for _, allocList := range plan.NodeAllocation { 72 planned = append(planned, allocList...) 73 } 74 if len(planned) != 10 { 75 t.Fatalf("bad: %#v", plan) 76 } 77 78 // Lookup the allocations by JobID 79 ws := memdb.NewWatchSet() 80 out, err := h.State.AllocsByJob(ws, job.Namespace, job.ID, false) 81 noErr(t, err) 82 83 // Ensure all allocations placed 84 if len(out) != 10 { 85 t.Fatalf("bad: %#v", out) 86 } 87 88 // Ensure different ports were used. 89 used := make(map[int]struct{}) 90 for _, alloc := range out { 91 for _, resource := range alloc.TaskResources { 92 for _, port := range resource.Networks[0].DynamicPorts { 93 if _, ok := used[port.Value]; ok { 94 t.Fatalf("Port collision %v", port.Value) 95 } 96 used[port.Value] = struct{}{} 97 } 98 } 99 } 100 101 h.AssertEvalStatus(t, structs.EvalStatusComplete) 102 } 103 104 func TestServiceSched_JobRegister_StickyAllocs(t *testing.T) { 105 h := NewHarness(t) 106 107 // Create some nodes 108 for i := 0; i < 10; i++ { 109 node := mock.Node() 110 noErr(t, h.State.UpsertNode(h.NextIndex(), node)) 111 } 112 113 // Create a job 114 job := mock.Job() 115 job.TaskGroups[0].EphemeralDisk.Sticky = true 116 noErr(t, h.State.UpsertJob(h.NextIndex(), job)) 117 118 // Create a mock evaluation to register the job 119 eval := &structs.Evaluation{ 120 Namespace: structs.DefaultNamespace, 121 ID: uuid.Generate(), 122 Priority: job.Priority, 123 TriggeredBy: structs.EvalTriggerJobRegister, 124 JobID: job.ID, 125 Status: structs.EvalStatusPending, 126 } 127 noErr(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 128 129 // Process the evaluation 130 if err := h.Process(NewServiceScheduler, eval); err != nil { 131 t.Fatalf("err: %v", err) 132 } 133 134 // Ensure the plan allocated 135 plan := h.Plans[0] 136 planned := make(map[string]*structs.Allocation) 137 for _, allocList := range plan.NodeAllocation { 138 for _, alloc := range allocList { 139 planned[alloc.ID] = alloc 140 } 141 } 142 if len(planned) != 10 { 143 t.Fatalf("bad: %#v", plan) 144 } 145 146 // Update the job to force a rolling upgrade 147 updated := job.Copy() 148 updated.TaskGroups[0].Tasks[0].Resources.CPU += 10 149 noErr(t, h.State.UpsertJob(h.NextIndex(), updated)) 150 151 // Create a mock evaluation to handle the update 152 eval = &structs.Evaluation{ 153 Namespace: structs.DefaultNamespace, 154 ID: uuid.Generate(), 155 Priority: job.Priority, 156 TriggeredBy: structs.EvalTriggerNodeUpdate, 157 JobID: job.ID, 158 Status: structs.EvalStatusPending, 159 } 160 noErr(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 161 h1 := NewHarnessWithState(t, h.State) 162 if err := h1.Process(NewServiceScheduler, eval); err != nil { 163 t.Fatalf("err: %v", err) 164 } 165 166 // Ensure we have created only one new allocation 167 // Ensure a single plan 168 if len(h1.Plans) != 1 { 169 t.Fatalf("bad: %#v", h1.Plans) 170 } 171 plan = h1.Plans[0] 172 var newPlanned []*structs.Allocation 173 for _, allocList := range plan.NodeAllocation { 174 newPlanned = append(newPlanned, allocList...) 175 } 176 if len(newPlanned) != 10 { 177 t.Fatalf("bad plan: %#v", plan) 178 } 179 // Ensure that the new allocations were placed on the same node as the older 180 // ones 181 for _, new := range newPlanned { 182 if new.PreviousAllocation == "" { 183 t.Fatalf("new alloc %q doesn't have a previous allocation", new.ID) 184 } 185 186 old, ok := planned[new.PreviousAllocation] 187 if !ok { 188 t.Fatalf("new alloc %q previous allocation doesn't match any prior placed alloc (%q)", new.ID, new.PreviousAllocation) 189 } 190 if new.NodeID != old.NodeID { 191 t.Fatalf("new alloc and old alloc node doesn't match; got %q; want %q", new.NodeID, old.NodeID) 192 } 193 } 194 } 195 196 func TestServiceSched_JobRegister_DiskConstraints(t *testing.T) { 197 h := NewHarness(t) 198 199 // Create a node 200 node := mock.Node() 201 noErr(t, h.State.UpsertNode(h.NextIndex(), node)) 202 203 // Create a job with count 2 and disk as 60GB so that only one allocation 204 // can fit 205 job := mock.Job() 206 job.TaskGroups[0].Count = 2 207 job.TaskGroups[0].EphemeralDisk.SizeMB = 88 * 1024 208 noErr(t, h.State.UpsertJob(h.NextIndex(), job)) 209 210 // Create a mock evaluation to register the job 211 eval := &structs.Evaluation{ 212 Namespace: structs.DefaultNamespace, 213 ID: uuid.Generate(), 214 Priority: job.Priority, 215 TriggeredBy: structs.EvalTriggerJobRegister, 216 JobID: job.ID, 217 Status: structs.EvalStatusPending, 218 } 219 220 noErr(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 221 222 // Process the evaluation 223 err := h.Process(NewServiceScheduler, eval) 224 if err != nil { 225 t.Fatalf("err: %v", err) 226 } 227 228 // Ensure a single plan 229 if len(h.Plans) != 1 { 230 t.Fatalf("bad: %#v", h.Plans) 231 } 232 plan := h.Plans[0] 233 234 // Ensure the plan doesn't have annotations. 235 if plan.Annotations != nil { 236 t.Fatalf("expected no annotations") 237 } 238 239 // Ensure the eval has a blocked eval 240 if len(h.CreateEvals) != 1 { 241 t.Fatalf("bad: %#v", h.CreateEvals) 242 } 243 244 if h.CreateEvals[0].TriggeredBy != structs.EvalTriggerQueuedAllocs { 245 t.Fatalf("bad: %#v", h.CreateEvals[0]) 246 } 247 248 // Ensure the plan allocated only one allocation 249 var planned []*structs.Allocation 250 for _, allocList := range plan.NodeAllocation { 251 planned = append(planned, allocList...) 252 } 253 if len(planned) != 1 { 254 t.Fatalf("bad: %#v", plan) 255 } 256 257 // Lookup the allocations by JobID 258 ws := memdb.NewWatchSet() 259 out, err := h.State.AllocsByJob(ws, job.Namespace, job.ID, false) 260 noErr(t, err) 261 262 // Ensure only one allocation was placed 263 if len(out) != 1 { 264 t.Fatalf("bad: %#v", out) 265 } 266 267 h.AssertEvalStatus(t, structs.EvalStatusComplete) 268 } 269 270 func TestServiceSched_JobRegister_DistinctHosts(t *testing.T) { 271 h := NewHarness(t) 272 273 // Create some nodes 274 for i := 0; i < 10; i++ { 275 node := mock.Node() 276 noErr(t, h.State.UpsertNode(h.NextIndex(), node)) 277 } 278 279 // Create a job that uses distinct host and has count 1 higher than what is 280 // possible. 281 job := mock.Job() 282 job.TaskGroups[0].Count = 11 283 job.Constraints = append(job.Constraints, &structs.Constraint{Operand: structs.ConstraintDistinctHosts}) 284 noErr(t, h.State.UpsertJob(h.NextIndex(), job)) 285 286 // Create a mock evaluation to register the job 287 eval := &structs.Evaluation{ 288 Namespace: structs.DefaultNamespace, 289 ID: uuid.Generate(), 290 Priority: job.Priority, 291 TriggeredBy: structs.EvalTriggerJobRegister, 292 JobID: job.ID, 293 Status: structs.EvalStatusPending, 294 } 295 296 noErr(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 297 298 // Process the evaluation 299 err := h.Process(NewServiceScheduler, eval) 300 if err != nil { 301 t.Fatalf("err: %v", err) 302 } 303 304 // Ensure a single plan 305 if len(h.Plans) != 1 { 306 t.Fatalf("bad: %#v", h.Plans) 307 } 308 plan := h.Plans[0] 309 310 // Ensure the eval has spawned blocked eval 311 if len(h.CreateEvals) != 1 { 312 t.Fatalf("bad: %#v", h.CreateEvals) 313 } 314 315 // Ensure the plan failed to alloc 316 outEval := h.Evals[0] 317 if len(outEval.FailedTGAllocs) != 1 { 318 t.Fatalf("bad: %+v", outEval) 319 } 320 321 // Ensure the plan allocated 322 var planned []*structs.Allocation 323 for _, allocList := range plan.NodeAllocation { 324 planned = append(planned, allocList...) 325 } 326 if len(planned) != 10 { 327 t.Fatalf("bad: %#v", plan) 328 } 329 330 // Lookup the allocations by JobID 331 ws := memdb.NewWatchSet() 332 out, err := h.State.AllocsByJob(ws, job.Namespace, job.ID, false) 333 noErr(t, err) 334 335 // Ensure all allocations placed 336 if len(out) != 10 { 337 t.Fatalf("bad: %#v", out) 338 } 339 340 // Ensure different node was used per. 341 used := make(map[string]struct{}) 342 for _, alloc := range out { 343 if _, ok := used[alloc.NodeID]; ok { 344 t.Fatalf("Node collision %v", alloc.NodeID) 345 } 346 used[alloc.NodeID] = struct{}{} 347 } 348 349 h.AssertEvalStatus(t, structs.EvalStatusComplete) 350 } 351 352 func TestServiceSched_JobRegister_DistinctProperty(t *testing.T) { 353 h := NewHarness(t) 354 355 // Create some nodes 356 for i := 0; i < 10; i++ { 357 node := mock.Node() 358 rack := "rack2" 359 if i < 5 { 360 rack = "rack1" 361 } 362 node.Meta["rack"] = rack 363 noErr(t, h.State.UpsertNode(h.NextIndex(), node)) 364 } 365 366 // Create a job that uses distinct property and has count higher than what is 367 // possible. 368 job := mock.Job() 369 job.TaskGroups[0].Count = 8 370 job.Constraints = append(job.Constraints, 371 &structs.Constraint{ 372 Operand: structs.ConstraintDistinctProperty, 373 LTarget: "${meta.rack}", 374 RTarget: "2", 375 }) 376 noErr(t, h.State.UpsertJob(h.NextIndex(), job)) 377 378 // Create a mock evaluation to register the job 379 eval := &structs.Evaluation{ 380 Namespace: structs.DefaultNamespace, 381 ID: uuid.Generate(), 382 Priority: job.Priority, 383 TriggeredBy: structs.EvalTriggerJobRegister, 384 JobID: job.ID, 385 Status: structs.EvalStatusPending, 386 } 387 388 noErr(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 389 390 // Process the evaluation 391 err := h.Process(NewServiceScheduler, eval) 392 if err != nil { 393 t.Fatalf("err: %v", err) 394 } 395 396 // Ensure a single plan 397 if len(h.Plans) != 1 { 398 t.Fatalf("bad: %#v", h.Plans) 399 } 400 plan := h.Plans[0] 401 402 // Ensure the plan doesn't have annotations. 403 if plan.Annotations != nil { 404 t.Fatalf("expected no annotations") 405 } 406 407 // Ensure the eval has spawned blocked eval 408 if len(h.CreateEvals) != 1 { 409 t.Fatalf("bad: %#v", h.CreateEvals) 410 } 411 412 // Ensure the plan failed to alloc 413 outEval := h.Evals[0] 414 if len(outEval.FailedTGAllocs) != 1 { 415 t.Fatalf("bad: %+v", outEval) 416 } 417 418 // Ensure the plan allocated 419 var planned []*structs.Allocation 420 for _, allocList := range plan.NodeAllocation { 421 planned = append(planned, allocList...) 422 } 423 if len(planned) != 4 { 424 t.Fatalf("bad: %#v", plan) 425 } 426 427 // Lookup the allocations by JobID 428 ws := memdb.NewWatchSet() 429 out, err := h.State.AllocsByJob(ws, job.Namespace, job.ID, false) 430 noErr(t, err) 431 432 // Ensure all allocations placed 433 if len(out) != 4 { 434 t.Fatalf("bad: %#v", out) 435 } 436 437 // Ensure each node was only used twice 438 used := make(map[string]uint64) 439 for _, alloc := range out { 440 if count, _ := used[alloc.NodeID]; count > 2 { 441 t.Fatalf("Node %v used too much: %d", alloc.NodeID, count) 442 } 443 used[alloc.NodeID]++ 444 } 445 446 h.AssertEvalStatus(t, structs.EvalStatusComplete) 447 } 448 449 func TestServiceSched_JobRegister_DistinctProperty_TaskGroup(t *testing.T) { 450 h := NewHarness(t) 451 452 // Create some nodes 453 for i := 0; i < 2; i++ { 454 node := mock.Node() 455 node.Meta["ssd"] = "true" 456 noErr(t, h.State.UpsertNode(h.NextIndex(), node)) 457 } 458 459 // Create a job that uses distinct property only on one task group. 460 job := mock.Job() 461 job.TaskGroups = append(job.TaskGroups, job.TaskGroups[0].Copy()) 462 job.TaskGroups[0].Count = 1 463 job.TaskGroups[0].Constraints = append(job.TaskGroups[0].Constraints, 464 &structs.Constraint{ 465 Operand: structs.ConstraintDistinctProperty, 466 LTarget: "${meta.ssd}", 467 }) 468 469 job.TaskGroups[1].Name = "tg2" 470 job.TaskGroups[1].Count = 2 471 noErr(t, h.State.UpsertJob(h.NextIndex(), job)) 472 473 // Create a mock evaluation to register the job 474 eval := &structs.Evaluation{ 475 Namespace: structs.DefaultNamespace, 476 ID: uuid.Generate(), 477 Priority: job.Priority, 478 TriggeredBy: structs.EvalTriggerJobRegister, 479 JobID: job.ID, 480 Status: structs.EvalStatusPending, 481 } 482 noErr(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 483 484 // Process the evaluation 485 err := h.Process(NewServiceScheduler, eval) 486 if err != nil { 487 t.Fatalf("err: %v", err) 488 } 489 490 // Ensure a single plan 491 if len(h.Plans) != 1 { 492 t.Fatalf("bad: %#v", h.Plans) 493 } 494 plan := h.Plans[0] 495 496 // Ensure the plan doesn't have annotations. 497 if plan.Annotations != nil { 498 t.Fatalf("expected no annotations") 499 } 500 501 // Ensure the eval hasn't spawned blocked eval 502 if len(h.CreateEvals) != 0 { 503 t.Fatalf("bad: %#v", h.CreateEvals[0]) 504 } 505 506 // Ensure the plan allocated 507 var planned []*structs.Allocation 508 for _, allocList := range plan.NodeAllocation { 509 planned = append(planned, allocList...) 510 } 511 if len(planned) != 3 { 512 t.Fatalf("bad: %#v", plan) 513 } 514 515 // Lookup the allocations by JobID 516 ws := memdb.NewWatchSet() 517 out, err := h.State.AllocsByJob(ws, job.Namespace, job.ID, false) 518 noErr(t, err) 519 520 // Ensure all allocations placed 521 if len(out) != 3 { 522 t.Fatalf("bad: %#v", out) 523 } 524 525 h.AssertEvalStatus(t, structs.EvalStatusComplete) 526 } 527 528 func TestServiceSched_JobRegister_DistinctProperty_TaskGroup_Incr(t *testing.T) { 529 h := NewHarness(t) 530 assert := assert.New(t) 531 532 // Create a job that uses distinct property over the node-id 533 job := mock.Job() 534 job.TaskGroups[0].Count = 3 535 job.TaskGroups[0].Constraints = append(job.TaskGroups[0].Constraints, 536 &structs.Constraint{ 537 Operand: structs.ConstraintDistinctProperty, 538 LTarget: "${node.unique.id}", 539 }) 540 assert.Nil(h.State.UpsertJob(h.NextIndex(), job), "UpsertJob") 541 542 // Create some nodes 543 var nodes []*structs.Node 544 for i := 0; i < 6; i++ { 545 node := mock.Node() 546 nodes = append(nodes, node) 547 assert.Nil(h.State.UpsertNode(h.NextIndex(), node), "UpsertNode") 548 } 549 550 // Create some allocations 551 var allocs []*structs.Allocation 552 for i := 0; i < 3; i++ { 553 alloc := mock.Alloc() 554 alloc.Job = job 555 alloc.JobID = job.ID 556 alloc.NodeID = nodes[i].ID 557 alloc.Name = fmt.Sprintf("my-job.web[%d]", i) 558 allocs = append(allocs, alloc) 559 } 560 assert.Nil(h.State.UpsertAllocs(h.NextIndex(), allocs), "UpsertAllocs") 561 562 // Update the count 563 job2 := job.Copy() 564 job2.TaskGroups[0].Count = 6 565 assert.Nil(h.State.UpsertJob(h.NextIndex(), job2), "UpsertJob") 566 567 // Create a mock evaluation to register the job 568 eval := &structs.Evaluation{ 569 Namespace: structs.DefaultNamespace, 570 ID: uuid.Generate(), 571 Priority: job.Priority, 572 TriggeredBy: structs.EvalTriggerJobRegister, 573 JobID: job.ID, 574 Status: structs.EvalStatusPending, 575 } 576 noErr(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 577 578 // Process the evaluation 579 assert.Nil(h.Process(NewServiceScheduler, eval), "Process") 580 581 // Ensure a single plan 582 assert.Len(h.Plans, 1, "Number of plans") 583 plan := h.Plans[0] 584 585 // Ensure the plan doesn't have annotations. 586 assert.Nil(plan.Annotations, "Plan.Annotations") 587 588 // Ensure the eval hasn't spawned blocked eval 589 assert.Len(h.CreateEvals, 0, "Created Evals") 590 591 // Ensure the plan allocated 592 var planned []*structs.Allocation 593 for _, allocList := range plan.NodeAllocation { 594 planned = append(planned, allocList...) 595 } 596 assert.Len(planned, 6, "Planned Allocations") 597 598 // Lookup the allocations by JobID 599 ws := memdb.NewWatchSet() 600 out, err := h.State.AllocsByJob(ws, job.Namespace, job.ID, false) 601 assert.Nil(err, "AllocsByJob") 602 603 // Ensure all allocations placed 604 assert.Len(out, 6, "Placed Allocations") 605 606 h.AssertEvalStatus(t, structs.EvalStatusComplete) 607 } 608 609 func TestServiceSched_JobRegister_Annotate(t *testing.T) { 610 h := NewHarness(t) 611 612 // Create some nodes 613 for i := 0; i < 10; i++ { 614 node := mock.Node() 615 noErr(t, h.State.UpsertNode(h.NextIndex(), node)) 616 } 617 618 // Create a job 619 job := mock.Job() 620 noErr(t, h.State.UpsertJob(h.NextIndex(), job)) 621 622 // Create a mock evaluation to register the job 623 eval := &structs.Evaluation{ 624 Namespace: structs.DefaultNamespace, 625 ID: uuid.Generate(), 626 Priority: job.Priority, 627 TriggeredBy: structs.EvalTriggerJobRegister, 628 JobID: job.ID, 629 AnnotatePlan: true, 630 Status: structs.EvalStatusPending, 631 } 632 noErr(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 633 634 // Process the evaluation 635 err := h.Process(NewServiceScheduler, eval) 636 if err != nil { 637 t.Fatalf("err: %v", err) 638 } 639 640 // Ensure a single plan 641 if len(h.Plans) != 1 { 642 t.Fatalf("bad: %#v", h.Plans) 643 } 644 plan := h.Plans[0] 645 646 // Ensure the plan allocated 647 var planned []*structs.Allocation 648 for _, allocList := range plan.NodeAllocation { 649 planned = append(planned, allocList...) 650 } 651 if len(planned) != 10 { 652 t.Fatalf("bad: %#v", plan) 653 } 654 655 // Lookup the allocations by JobID 656 ws := memdb.NewWatchSet() 657 out, err := h.State.AllocsByJob(ws, job.Namespace, job.ID, false) 658 noErr(t, err) 659 660 // Ensure all allocations placed 661 if len(out) != 10 { 662 t.Fatalf("bad: %#v", out) 663 } 664 665 h.AssertEvalStatus(t, structs.EvalStatusComplete) 666 667 // Ensure the plan had annotations. 668 if plan.Annotations == nil { 669 t.Fatalf("expected annotations") 670 } 671 672 desiredTGs := plan.Annotations.DesiredTGUpdates 673 if l := len(desiredTGs); l != 1 { 674 t.Fatalf("incorrect number of task groups; got %v; want %v", l, 1) 675 } 676 677 desiredChanges, ok := desiredTGs["web"] 678 if !ok { 679 t.Fatalf("expected task group web to have desired changes") 680 } 681 682 expected := &structs.DesiredUpdates{Place: 10} 683 if !reflect.DeepEqual(desiredChanges, expected) { 684 t.Fatalf("Unexpected desired updates; got %#v; want %#v", desiredChanges, expected) 685 } 686 } 687 688 func TestServiceSched_JobRegister_CountZero(t *testing.T) { 689 h := NewHarness(t) 690 691 // Create some nodes 692 for i := 0; i < 10; i++ { 693 node := mock.Node() 694 noErr(t, h.State.UpsertNode(h.NextIndex(), node)) 695 } 696 697 // Create a job and set the task group count to zero. 698 job := mock.Job() 699 job.TaskGroups[0].Count = 0 700 noErr(t, h.State.UpsertJob(h.NextIndex(), job)) 701 702 // Create a mock evaluation to register the job 703 eval := &structs.Evaluation{ 704 Namespace: structs.DefaultNamespace, 705 ID: uuid.Generate(), 706 Priority: job.Priority, 707 TriggeredBy: structs.EvalTriggerJobRegister, 708 JobID: job.ID, 709 Status: structs.EvalStatusPending, 710 } 711 noErr(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 712 713 // Process the evaluation 714 err := h.Process(NewServiceScheduler, eval) 715 if err != nil { 716 t.Fatalf("err: %v", err) 717 } 718 719 // Ensure there was no plan 720 if len(h.Plans) != 0 { 721 t.Fatalf("bad: %#v", h.Plans) 722 } 723 724 // Lookup the allocations by JobID 725 ws := memdb.NewWatchSet() 726 out, err := h.State.AllocsByJob(ws, job.Namespace, job.ID, false) 727 noErr(t, err) 728 729 // Ensure no allocations placed 730 if len(out) != 0 { 731 t.Fatalf("bad: %#v", out) 732 } 733 734 h.AssertEvalStatus(t, structs.EvalStatusComplete) 735 } 736 737 func TestServiceSched_JobRegister_AllocFail(t *testing.T) { 738 h := NewHarness(t) 739 740 // Create NO nodes 741 // Create a job 742 job := mock.Job() 743 noErr(t, h.State.UpsertJob(h.NextIndex(), job)) 744 745 // Create a mock evaluation to register the job 746 eval := &structs.Evaluation{ 747 Namespace: structs.DefaultNamespace, 748 ID: uuid.Generate(), 749 Priority: job.Priority, 750 TriggeredBy: structs.EvalTriggerJobRegister, 751 JobID: job.ID, 752 Status: structs.EvalStatusPending, 753 } 754 755 noErr(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 756 757 // Process the evaluation 758 err := h.Process(NewServiceScheduler, eval) 759 if err != nil { 760 t.Fatalf("err: %v", err) 761 } 762 763 // Ensure no plan 764 if len(h.Plans) != 0 { 765 t.Fatalf("bad: %#v", h.Plans) 766 } 767 768 // Ensure there is a follow up eval. 769 if len(h.CreateEvals) != 1 || h.CreateEvals[0].Status != structs.EvalStatusBlocked { 770 t.Fatalf("bad: %#v", h.CreateEvals) 771 } 772 773 if len(h.Evals) != 1 { 774 t.Fatalf("incorrect number of updated eval: %#v", h.Evals) 775 } 776 outEval := h.Evals[0] 777 778 // Ensure the eval has its spawned blocked eval 779 if outEval.BlockedEval != h.CreateEvals[0].ID { 780 t.Fatalf("bad: %#v", outEval) 781 } 782 783 // Ensure the plan failed to alloc 784 if outEval == nil || len(outEval.FailedTGAllocs) != 1 { 785 t.Fatalf("bad: %#v", outEval) 786 } 787 788 metrics, ok := outEval.FailedTGAllocs[job.TaskGroups[0].Name] 789 if !ok { 790 t.Fatalf("no failed metrics: %#v", outEval.FailedTGAllocs) 791 } 792 793 // Check the coalesced failures 794 if metrics.CoalescedFailures != 9 { 795 t.Fatalf("bad: %#v", metrics) 796 } 797 798 // Check the available nodes 799 if count, ok := metrics.NodesAvailable["dc1"]; !ok || count != 0 { 800 t.Fatalf("bad: %#v", metrics) 801 } 802 803 // Check queued allocations 804 queued := outEval.QueuedAllocations["web"] 805 if queued != 10 { 806 t.Fatalf("expected queued: %v, actual: %v", 10, queued) 807 } 808 h.AssertEvalStatus(t, structs.EvalStatusComplete) 809 } 810 811 func TestServiceSched_JobRegister_CreateBlockedEval(t *testing.T) { 812 h := NewHarness(t) 813 814 // Create a full node 815 node := mock.Node() 816 node.Reserved = node.Resources 817 node.ComputeClass() 818 noErr(t, h.State.UpsertNode(h.NextIndex(), node)) 819 820 // Create an ineligible node 821 node2 := mock.Node() 822 node2.Attributes["kernel.name"] = "windows" 823 node2.ComputeClass() 824 noErr(t, h.State.UpsertNode(h.NextIndex(), node2)) 825 826 // Create a jobs 827 job := mock.Job() 828 noErr(t, h.State.UpsertJob(h.NextIndex(), job)) 829 830 // Create a mock evaluation to register the job 831 eval := &structs.Evaluation{ 832 Namespace: structs.DefaultNamespace, 833 ID: uuid.Generate(), 834 Priority: job.Priority, 835 TriggeredBy: structs.EvalTriggerJobRegister, 836 JobID: job.ID, 837 Status: structs.EvalStatusPending, 838 } 839 840 noErr(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 841 842 // Process the evaluation 843 err := h.Process(NewServiceScheduler, eval) 844 if err != nil { 845 t.Fatalf("err: %v", err) 846 } 847 848 // Ensure no plan 849 if len(h.Plans) != 0 { 850 t.Fatalf("bad: %#v", h.Plans) 851 } 852 853 // Ensure the plan has created a follow up eval. 854 if len(h.CreateEvals) != 1 { 855 t.Fatalf("bad: %#v", h.CreateEvals) 856 } 857 858 created := h.CreateEvals[0] 859 if created.Status != structs.EvalStatusBlocked { 860 t.Fatalf("bad: %#v", created) 861 } 862 863 classes := created.ClassEligibility 864 if len(classes) != 2 || !classes[node.ComputedClass] || classes[node2.ComputedClass] { 865 t.Fatalf("bad: %#v", classes) 866 } 867 868 if created.EscapedComputedClass { 869 t.Fatalf("bad: %#v", created) 870 } 871 872 // Ensure there is a follow up eval. 873 if len(h.CreateEvals) != 1 || h.CreateEvals[0].Status != structs.EvalStatusBlocked { 874 t.Fatalf("bad: %#v", h.CreateEvals) 875 } 876 877 if len(h.Evals) != 1 { 878 t.Fatalf("incorrect number of updated eval: %#v", h.Evals) 879 } 880 outEval := h.Evals[0] 881 882 // Ensure the plan failed to alloc 883 if outEval == nil || len(outEval.FailedTGAllocs) != 1 { 884 t.Fatalf("bad: %#v", outEval) 885 } 886 887 metrics, ok := outEval.FailedTGAllocs[job.TaskGroups[0].Name] 888 if !ok { 889 t.Fatalf("no failed metrics: %#v", outEval.FailedTGAllocs) 890 } 891 892 // Check the coalesced failures 893 if metrics.CoalescedFailures != 9 { 894 t.Fatalf("bad: %#v", metrics) 895 } 896 897 // Check the available nodes 898 if count, ok := metrics.NodesAvailable["dc1"]; !ok || count != 2 { 899 t.Fatalf("bad: %#v", metrics) 900 } 901 902 h.AssertEvalStatus(t, structs.EvalStatusComplete) 903 } 904 905 func TestServiceSched_JobRegister_FeasibleAndInfeasibleTG(t *testing.T) { 906 h := NewHarness(t) 907 908 // Create one node 909 node := mock.Node() 910 node.NodeClass = "class_0" 911 noErr(t, node.ComputeClass()) 912 noErr(t, h.State.UpsertNode(h.NextIndex(), node)) 913 914 // Create a job that constrains on a node class 915 job := mock.Job() 916 job.TaskGroups[0].Count = 2 917 job.TaskGroups[0].Constraints = append(job.Constraints, 918 &structs.Constraint{ 919 LTarget: "${node.class}", 920 RTarget: "class_0", 921 Operand: "=", 922 }, 923 ) 924 tg2 := job.TaskGroups[0].Copy() 925 tg2.Name = "web2" 926 tg2.Constraints[1].RTarget = "class_1" 927 job.TaskGroups = append(job.TaskGroups, tg2) 928 noErr(t, h.State.UpsertJob(h.NextIndex(), job)) 929 930 // Create a mock evaluation to register the job 931 eval := &structs.Evaluation{ 932 Namespace: structs.DefaultNamespace, 933 ID: uuid.Generate(), 934 Priority: job.Priority, 935 TriggeredBy: structs.EvalTriggerJobRegister, 936 JobID: job.ID, 937 Status: structs.EvalStatusPending, 938 } 939 noErr(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 940 // Process the evaluation 941 err := h.Process(NewServiceScheduler, eval) 942 if err != nil { 943 t.Fatalf("err: %v", err) 944 } 945 946 // Ensure a single plan 947 if len(h.Plans) != 1 { 948 t.Fatalf("bad: %#v", h.Plans) 949 } 950 plan := h.Plans[0] 951 952 // Ensure the plan allocated 953 var planned []*structs.Allocation 954 for _, allocList := range plan.NodeAllocation { 955 planned = append(planned, allocList...) 956 } 957 if len(planned) != 2 { 958 t.Fatalf("bad: %#v", plan) 959 } 960 961 // Ensure two allocations placed 962 ws := memdb.NewWatchSet() 963 out, err := h.State.AllocsByJob(ws, job.Namespace, job.ID, false) 964 noErr(t, err) 965 if len(out) != 2 { 966 t.Fatalf("bad: %#v", out) 967 } 968 969 if len(h.Evals) != 1 { 970 t.Fatalf("incorrect number of updated eval: %#v", h.Evals) 971 } 972 outEval := h.Evals[0] 973 974 // Ensure the eval has its spawned blocked eval 975 if outEval.BlockedEval != h.CreateEvals[0].ID { 976 t.Fatalf("bad: %#v", outEval) 977 } 978 979 // Ensure the plan failed to alloc one tg 980 if outEval == nil || len(outEval.FailedTGAllocs) != 1 { 981 t.Fatalf("bad: %#v", outEval) 982 } 983 984 metrics, ok := outEval.FailedTGAllocs[tg2.Name] 985 if !ok { 986 t.Fatalf("no failed metrics: %#v", outEval.FailedTGAllocs) 987 } 988 989 // Check the coalesced failures 990 if metrics.CoalescedFailures != tg2.Count-1 { 991 t.Fatalf("bad: %#v", metrics) 992 } 993 994 h.AssertEvalStatus(t, structs.EvalStatusComplete) 995 } 996 997 // This test just ensures the scheduler handles the eval type to avoid 998 // regressions. 999 func TestServiceSched_EvaluateMaxPlanEval(t *testing.T) { 1000 h := NewHarness(t) 1001 1002 // Create a job and set the task group count to zero. 1003 job := mock.Job() 1004 job.TaskGroups[0].Count = 0 1005 noErr(t, h.State.UpsertJob(h.NextIndex(), job)) 1006 1007 // Create a mock blocked evaluation 1008 eval := &structs.Evaluation{ 1009 Namespace: structs.DefaultNamespace, 1010 ID: uuid.Generate(), 1011 Status: structs.EvalStatusBlocked, 1012 Priority: job.Priority, 1013 TriggeredBy: structs.EvalTriggerMaxPlans, 1014 JobID: job.ID, 1015 } 1016 1017 // Insert it into the state store 1018 noErr(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 1019 1020 // Process the evaluation 1021 err := h.Process(NewServiceScheduler, eval) 1022 if err != nil { 1023 t.Fatalf("err: %v", err) 1024 } 1025 1026 // Ensure there was no plan 1027 if len(h.Plans) != 0 { 1028 t.Fatalf("bad: %#v", h.Plans) 1029 } 1030 1031 h.AssertEvalStatus(t, structs.EvalStatusComplete) 1032 } 1033 1034 func TestServiceSched_Plan_Partial_Progress(t *testing.T) { 1035 h := NewHarness(t) 1036 1037 // Create a node 1038 node := mock.Node() 1039 noErr(t, h.State.UpsertNode(h.NextIndex(), node)) 1040 1041 // Create a job with a high resource ask so that all the allocations can't 1042 // be placed on a single node. 1043 job := mock.Job() 1044 job.TaskGroups[0].Count = 3 1045 job.TaskGroups[0].Tasks[0].Resources.CPU = 3600 1046 noErr(t, h.State.UpsertJob(h.NextIndex(), job)) 1047 1048 // Create a mock evaluation to register the job 1049 eval := &structs.Evaluation{ 1050 Namespace: structs.DefaultNamespace, 1051 ID: uuid.Generate(), 1052 Priority: job.Priority, 1053 TriggeredBy: structs.EvalTriggerJobRegister, 1054 JobID: job.ID, 1055 Status: structs.EvalStatusPending, 1056 } 1057 1058 noErr(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 1059 1060 // Process the evaluation 1061 err := h.Process(NewServiceScheduler, eval) 1062 if err != nil { 1063 t.Fatalf("err: %v", err) 1064 } 1065 1066 // Ensure a single plan 1067 if len(h.Plans) != 1 { 1068 t.Fatalf("bad: %#v", h.Plans) 1069 } 1070 plan := h.Plans[0] 1071 1072 // Ensure the plan doesn't have annotations. 1073 if plan.Annotations != nil { 1074 t.Fatalf("expected no annotations") 1075 } 1076 1077 // Ensure the plan allocated 1078 var planned []*structs.Allocation 1079 for _, allocList := range plan.NodeAllocation { 1080 planned = append(planned, allocList...) 1081 } 1082 if len(planned) != 1 { 1083 t.Fatalf("bad: %#v", plan) 1084 } 1085 1086 // Lookup the allocations by JobID 1087 ws := memdb.NewWatchSet() 1088 out, err := h.State.AllocsByJob(ws, job.Namespace, job.ID, false) 1089 noErr(t, err) 1090 1091 // Ensure only one allocations placed 1092 if len(out) != 1 { 1093 t.Fatalf("bad: %#v", out) 1094 } 1095 1096 queued := h.Evals[0].QueuedAllocations["web"] 1097 if queued != 2 { 1098 t.Fatalf("expected: %v, actual: %v", 2, queued) 1099 } 1100 1101 h.AssertEvalStatus(t, structs.EvalStatusComplete) 1102 } 1103 1104 func TestServiceSched_EvaluateBlockedEval(t *testing.T) { 1105 h := NewHarness(t) 1106 1107 // Create a job 1108 job := mock.Job() 1109 noErr(t, h.State.UpsertJob(h.NextIndex(), job)) 1110 1111 // Create a mock blocked evaluation 1112 eval := &structs.Evaluation{ 1113 Namespace: structs.DefaultNamespace, 1114 ID: uuid.Generate(), 1115 Status: structs.EvalStatusBlocked, 1116 Priority: job.Priority, 1117 TriggeredBy: structs.EvalTriggerJobRegister, 1118 JobID: job.ID, 1119 } 1120 1121 // Insert it into the state store 1122 noErr(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 1123 1124 // Process the evaluation 1125 err := h.Process(NewServiceScheduler, eval) 1126 if err != nil { 1127 t.Fatalf("err: %v", err) 1128 } 1129 1130 // Ensure there was no plan 1131 if len(h.Plans) != 0 { 1132 t.Fatalf("bad: %#v", h.Plans) 1133 } 1134 1135 // Ensure that the eval was reblocked 1136 if len(h.ReblockEvals) != 1 { 1137 t.Fatalf("bad: %#v", h.ReblockEvals) 1138 } 1139 if h.ReblockEvals[0].ID != eval.ID { 1140 t.Fatalf("expect same eval to be reblocked; got %q; want %q", h.ReblockEvals[0].ID, eval.ID) 1141 } 1142 1143 // Ensure the eval status was not updated 1144 if len(h.Evals) != 0 { 1145 t.Fatalf("Existing eval should not have status set") 1146 } 1147 } 1148 1149 func TestServiceSched_EvaluateBlockedEval_Finished(t *testing.T) { 1150 h := NewHarness(t) 1151 1152 // Create some nodes 1153 for i := 0; i < 10; i++ { 1154 node := mock.Node() 1155 noErr(t, h.State.UpsertNode(h.NextIndex(), node)) 1156 } 1157 1158 // Create a job and set the task group count to zero. 1159 job := mock.Job() 1160 noErr(t, h.State.UpsertJob(h.NextIndex(), job)) 1161 1162 // Create a mock blocked evaluation 1163 eval := &structs.Evaluation{ 1164 Namespace: structs.DefaultNamespace, 1165 ID: uuid.Generate(), 1166 Status: structs.EvalStatusBlocked, 1167 Priority: job.Priority, 1168 TriggeredBy: structs.EvalTriggerJobRegister, 1169 JobID: job.ID, 1170 } 1171 1172 // Insert it into the state store 1173 noErr(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 1174 1175 // Process the evaluation 1176 err := h.Process(NewServiceScheduler, eval) 1177 if err != nil { 1178 t.Fatalf("err: %v", err) 1179 } 1180 1181 // Ensure a single plan 1182 if len(h.Plans) != 1 { 1183 t.Fatalf("bad: %#v", h.Plans) 1184 } 1185 plan := h.Plans[0] 1186 1187 // Ensure the plan doesn't have annotations. 1188 if plan.Annotations != nil { 1189 t.Fatalf("expected no annotations") 1190 } 1191 1192 // Ensure the eval has no spawned blocked eval 1193 if len(h.Evals) != 1 { 1194 t.Fatalf("bad: %#v", h.Evals) 1195 if h.Evals[0].BlockedEval != "" { 1196 t.Fatalf("bad: %#v", h.Evals[0]) 1197 } 1198 } 1199 1200 // Ensure the plan allocated 1201 var planned []*structs.Allocation 1202 for _, allocList := range plan.NodeAllocation { 1203 planned = append(planned, allocList...) 1204 } 1205 if len(planned) != 10 { 1206 t.Fatalf("bad: %#v", plan) 1207 } 1208 1209 // Lookup the allocations by JobID 1210 ws := memdb.NewWatchSet() 1211 out, err := h.State.AllocsByJob(ws, job.Namespace, job.ID, false) 1212 noErr(t, err) 1213 1214 // Ensure all allocations placed 1215 if len(out) != 10 { 1216 t.Fatalf("bad: %#v", out) 1217 } 1218 1219 // Ensure the eval was not reblocked 1220 if len(h.ReblockEvals) != 0 { 1221 t.Fatalf("Existing eval should not have been reblocked as it placed all allocations") 1222 } 1223 1224 h.AssertEvalStatus(t, structs.EvalStatusComplete) 1225 1226 // Ensure queued allocations is zero 1227 queued := h.Evals[0].QueuedAllocations["web"] 1228 if queued != 0 { 1229 t.Fatalf("expected queued: %v, actual: %v", 0, queued) 1230 } 1231 } 1232 1233 func TestServiceSched_JobModify(t *testing.T) { 1234 h := NewHarness(t) 1235 1236 // Create some nodes 1237 var nodes []*structs.Node 1238 for i := 0; i < 10; i++ { 1239 node := mock.Node() 1240 nodes = append(nodes, node) 1241 noErr(t, h.State.UpsertNode(h.NextIndex(), node)) 1242 } 1243 1244 // Generate a fake job with allocations 1245 job := mock.Job() 1246 noErr(t, h.State.UpsertJob(h.NextIndex(), job)) 1247 1248 var allocs []*structs.Allocation 1249 for i := 0; i < 10; i++ { 1250 alloc := mock.Alloc() 1251 alloc.Job = job 1252 alloc.JobID = job.ID 1253 alloc.NodeID = nodes[i].ID 1254 alloc.Name = fmt.Sprintf("my-job.web[%d]", i) 1255 allocs = append(allocs, alloc) 1256 } 1257 noErr(t, h.State.UpsertAllocs(h.NextIndex(), allocs)) 1258 1259 // Add a few terminal status allocations, these should be ignored 1260 var terminal []*structs.Allocation 1261 for i := 0; i < 5; i++ { 1262 alloc := mock.Alloc() 1263 alloc.Job = job 1264 alloc.JobID = job.ID 1265 alloc.NodeID = nodes[i].ID 1266 alloc.Name = fmt.Sprintf("my-job.web[%d]", i) 1267 alloc.DesiredStatus = structs.AllocDesiredStatusStop 1268 terminal = append(terminal, alloc) 1269 } 1270 noErr(t, h.State.UpsertAllocs(h.NextIndex(), terminal)) 1271 1272 // Update the job 1273 job2 := mock.Job() 1274 job2.ID = job.ID 1275 1276 // Update the task, such that it cannot be done in-place 1277 job2.TaskGroups[0].Tasks[0].Config["command"] = "/bin/other" 1278 noErr(t, h.State.UpsertJob(h.NextIndex(), job2)) 1279 1280 // Create a mock evaluation to deal with drain 1281 eval := &structs.Evaluation{ 1282 Namespace: structs.DefaultNamespace, 1283 ID: uuid.Generate(), 1284 Priority: 50, 1285 TriggeredBy: structs.EvalTriggerJobRegister, 1286 JobID: job.ID, 1287 Status: structs.EvalStatusPending, 1288 } 1289 noErr(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 1290 1291 // Process the evaluation 1292 err := h.Process(NewServiceScheduler, eval) 1293 if err != nil { 1294 t.Fatalf("err: %v", err) 1295 } 1296 1297 // Ensure a single plan 1298 if len(h.Plans) != 1 { 1299 t.Fatalf("bad: %#v", h.Plans) 1300 } 1301 plan := h.Plans[0] 1302 1303 // Ensure the plan evicted all allocs 1304 var update []*structs.Allocation 1305 for _, updateList := range plan.NodeUpdate { 1306 update = append(update, updateList...) 1307 } 1308 if len(update) != len(allocs) { 1309 t.Fatalf("bad: %#v", plan) 1310 } 1311 1312 // Ensure the plan allocated 1313 var planned []*structs.Allocation 1314 for _, allocList := range plan.NodeAllocation { 1315 planned = append(planned, allocList...) 1316 } 1317 if len(planned) != 10 { 1318 t.Fatalf("bad: %#v", plan) 1319 } 1320 1321 // Lookup the allocations by JobID 1322 ws := memdb.NewWatchSet() 1323 out, err := h.State.AllocsByJob(ws, job.Namespace, job.ID, false) 1324 noErr(t, err) 1325 1326 // Ensure all allocations placed 1327 out, _ = structs.FilterTerminalAllocs(out) 1328 if len(out) != 10 { 1329 t.Fatalf("bad: %#v", out) 1330 } 1331 1332 h.AssertEvalStatus(t, structs.EvalStatusComplete) 1333 } 1334 1335 // Have a single node and submit a job. Increment the count such that all fit 1336 // on the node but the node doesn't have enough resources to fit the new count + 1337 // 1. This tests that we properly discount the resources of existing allocs. 1338 func TestServiceSched_JobModify_IncrCount_NodeLimit(t *testing.T) { 1339 h := NewHarness(t) 1340 1341 // Create one node 1342 node := mock.Node() 1343 node.Resources.CPU = 1000 1344 noErr(t, h.State.UpsertNode(h.NextIndex(), node)) 1345 1346 // Generate a fake job with one allocation 1347 job := mock.Job() 1348 job.TaskGroups[0].Tasks[0].Resources.CPU = 256 1349 job2 := job.Copy() 1350 noErr(t, h.State.UpsertJob(h.NextIndex(), job)) 1351 1352 var allocs []*structs.Allocation 1353 alloc := mock.Alloc() 1354 alloc.Job = job 1355 alloc.JobID = job.ID 1356 alloc.NodeID = node.ID 1357 alloc.Name = "my-job.web[0]" 1358 alloc.Resources.CPU = 256 1359 allocs = append(allocs, alloc) 1360 noErr(t, h.State.UpsertAllocs(h.NextIndex(), allocs)) 1361 1362 // Update the job to count 3 1363 job2.TaskGroups[0].Count = 3 1364 noErr(t, h.State.UpsertJob(h.NextIndex(), job2)) 1365 1366 // Create a mock evaluation to deal with drain 1367 eval := &structs.Evaluation{ 1368 Namespace: structs.DefaultNamespace, 1369 ID: uuid.Generate(), 1370 Priority: 50, 1371 TriggeredBy: structs.EvalTriggerJobRegister, 1372 JobID: job.ID, 1373 Status: structs.EvalStatusPending, 1374 } 1375 noErr(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 1376 1377 // Process the evaluation 1378 err := h.Process(NewServiceScheduler, eval) 1379 if err != nil { 1380 t.Fatalf("err: %v", err) 1381 } 1382 1383 // Ensure a single plan 1384 if len(h.Plans) != 1 { 1385 t.Fatalf("bad: %#v", h.Plans) 1386 } 1387 plan := h.Plans[0] 1388 1389 // Ensure the plan didn't evicted the alloc 1390 var update []*structs.Allocation 1391 for _, updateList := range plan.NodeUpdate { 1392 update = append(update, updateList...) 1393 } 1394 if len(update) != 0 { 1395 t.Fatalf("bad: %#v", plan) 1396 } 1397 1398 // Ensure the plan allocated 1399 var planned []*structs.Allocation 1400 for _, allocList := range plan.NodeAllocation { 1401 planned = append(planned, allocList...) 1402 } 1403 if len(planned) != 3 { 1404 t.Fatalf("bad: %#v", plan) 1405 } 1406 1407 // Ensure the plan had no failures 1408 if len(h.Evals) != 1 { 1409 t.Fatalf("incorrect number of updated eval: %#v", h.Evals) 1410 } 1411 outEval := h.Evals[0] 1412 if outEval == nil || len(outEval.FailedTGAllocs) != 0 { 1413 t.Fatalf("bad: %#v", outEval) 1414 } 1415 1416 // Lookup the allocations by JobID 1417 ws := memdb.NewWatchSet() 1418 out, err := h.State.AllocsByJob(ws, job.Namespace, job.ID, false) 1419 noErr(t, err) 1420 1421 // Ensure all allocations placed 1422 out, _ = structs.FilterTerminalAllocs(out) 1423 if len(out) != 3 { 1424 t.Fatalf("bad: %#v", out) 1425 } 1426 1427 h.AssertEvalStatus(t, structs.EvalStatusComplete) 1428 } 1429 1430 func TestServiceSched_JobModify_CountZero(t *testing.T) { 1431 h := NewHarness(t) 1432 1433 // Create some nodes 1434 var nodes []*structs.Node 1435 for i := 0; i < 10; i++ { 1436 node := mock.Node() 1437 nodes = append(nodes, node) 1438 noErr(t, h.State.UpsertNode(h.NextIndex(), node)) 1439 } 1440 1441 // Generate a fake job with allocations 1442 job := mock.Job() 1443 noErr(t, h.State.UpsertJob(h.NextIndex(), job)) 1444 1445 var allocs []*structs.Allocation 1446 for i := 0; i < 10; i++ { 1447 alloc := mock.Alloc() 1448 alloc.Job = job 1449 alloc.JobID = job.ID 1450 alloc.NodeID = nodes[i].ID 1451 alloc.Name = structs.AllocName(alloc.JobID, alloc.TaskGroup, uint(i)) 1452 allocs = append(allocs, alloc) 1453 } 1454 noErr(t, h.State.UpsertAllocs(h.NextIndex(), allocs)) 1455 1456 // Add a few terminal status allocations, these should be ignored 1457 var terminal []*structs.Allocation 1458 for i := 0; i < 5; i++ { 1459 alloc := mock.Alloc() 1460 alloc.Job = job 1461 alloc.JobID = job.ID 1462 alloc.NodeID = nodes[i].ID 1463 alloc.Name = structs.AllocName(alloc.JobID, alloc.TaskGroup, uint(i)) 1464 alloc.DesiredStatus = structs.AllocDesiredStatusStop 1465 terminal = append(terminal, alloc) 1466 } 1467 noErr(t, h.State.UpsertAllocs(h.NextIndex(), terminal)) 1468 1469 // Update the job to be count zero 1470 job2 := mock.Job() 1471 job2.ID = job.ID 1472 job2.TaskGroups[0].Count = 0 1473 noErr(t, h.State.UpsertJob(h.NextIndex(), job2)) 1474 1475 // Create a mock evaluation to deal with drain 1476 eval := &structs.Evaluation{ 1477 Namespace: structs.DefaultNamespace, 1478 ID: uuid.Generate(), 1479 Priority: 50, 1480 TriggeredBy: structs.EvalTriggerJobRegister, 1481 JobID: job.ID, 1482 Status: structs.EvalStatusPending, 1483 } 1484 noErr(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 1485 1486 // Process the evaluation 1487 err := h.Process(NewServiceScheduler, eval) 1488 if err != nil { 1489 t.Fatalf("err: %v", err) 1490 } 1491 1492 // Ensure a single plan 1493 if len(h.Plans) != 1 { 1494 t.Fatalf("bad: %#v", h.Plans) 1495 } 1496 plan := h.Plans[0] 1497 1498 // Ensure the plan evicted all allocs 1499 var update []*structs.Allocation 1500 for _, updateList := range plan.NodeUpdate { 1501 update = append(update, updateList...) 1502 } 1503 if len(update) != len(allocs) { 1504 t.Fatalf("bad: %#v", plan) 1505 } 1506 1507 // Ensure the plan didn't allocated 1508 var planned []*structs.Allocation 1509 for _, allocList := range plan.NodeAllocation { 1510 planned = append(planned, allocList...) 1511 } 1512 if len(planned) != 0 { 1513 t.Fatalf("bad: %#v", plan) 1514 } 1515 1516 // Lookup the allocations by JobID 1517 ws := memdb.NewWatchSet() 1518 out, err := h.State.AllocsByJob(ws, job.Namespace, job.ID, false) 1519 noErr(t, err) 1520 1521 // Ensure all allocations placed 1522 out, _ = structs.FilterTerminalAllocs(out) 1523 if len(out) != 0 { 1524 t.Fatalf("bad: %#v", out) 1525 } 1526 1527 h.AssertEvalStatus(t, structs.EvalStatusComplete) 1528 } 1529 1530 func TestServiceSched_JobModify_Rolling(t *testing.T) { 1531 h := NewHarness(t) 1532 1533 // Create some nodes 1534 var nodes []*structs.Node 1535 for i := 0; i < 10; i++ { 1536 node := mock.Node() 1537 nodes = append(nodes, node) 1538 noErr(t, h.State.UpsertNode(h.NextIndex(), node)) 1539 } 1540 1541 // Generate a fake job with allocations 1542 job := mock.Job() 1543 noErr(t, h.State.UpsertJob(h.NextIndex(), job)) 1544 1545 var allocs []*structs.Allocation 1546 for i := 0; i < 10; i++ { 1547 alloc := mock.Alloc() 1548 alloc.Job = job 1549 alloc.JobID = job.ID 1550 alloc.NodeID = nodes[i].ID 1551 alloc.Name = fmt.Sprintf("my-job.web[%d]", i) 1552 allocs = append(allocs, alloc) 1553 } 1554 noErr(t, h.State.UpsertAllocs(h.NextIndex(), allocs)) 1555 1556 // Update the job 1557 job2 := mock.Job() 1558 job2.ID = job.ID 1559 desiredUpdates := 4 1560 job2.TaskGroups[0].Update = &structs.UpdateStrategy{ 1561 MaxParallel: desiredUpdates, 1562 HealthCheck: structs.UpdateStrategyHealthCheck_Checks, 1563 MinHealthyTime: 10 * time.Second, 1564 HealthyDeadline: 10 * time.Minute, 1565 } 1566 1567 // Update the task, such that it cannot be done in-place 1568 job2.TaskGroups[0].Tasks[0].Config["command"] = "/bin/other" 1569 noErr(t, h.State.UpsertJob(h.NextIndex(), job2)) 1570 1571 // Create a mock evaluation to deal with drain 1572 eval := &structs.Evaluation{ 1573 Namespace: structs.DefaultNamespace, 1574 ID: uuid.Generate(), 1575 Priority: 50, 1576 TriggeredBy: structs.EvalTriggerJobRegister, 1577 JobID: job.ID, 1578 Status: structs.EvalStatusPending, 1579 } 1580 noErr(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 1581 1582 // Process the evaluation 1583 err := h.Process(NewServiceScheduler, eval) 1584 if err != nil { 1585 t.Fatalf("err: %v", err) 1586 } 1587 1588 // Ensure a single plan 1589 if len(h.Plans) != 1 { 1590 t.Fatalf("bad: %#v", h.Plans) 1591 } 1592 plan := h.Plans[0] 1593 1594 // Ensure the plan evicted only MaxParallel 1595 var update []*structs.Allocation 1596 for _, updateList := range plan.NodeUpdate { 1597 update = append(update, updateList...) 1598 } 1599 if len(update) != desiredUpdates { 1600 t.Fatalf("bad: got %d; want %d: %#v", len(update), desiredUpdates, plan) 1601 } 1602 1603 // Ensure the plan allocated 1604 var planned []*structs.Allocation 1605 for _, allocList := range plan.NodeAllocation { 1606 planned = append(planned, allocList...) 1607 } 1608 if len(planned) != desiredUpdates { 1609 t.Fatalf("bad: %#v", plan) 1610 } 1611 1612 h.AssertEvalStatus(t, structs.EvalStatusComplete) 1613 1614 // Check that the deployment id is attached to the eval 1615 if h.Evals[0].DeploymentID == "" { 1616 t.Fatalf("Eval not annotated with deployment id") 1617 } 1618 1619 // Ensure a deployment was created 1620 if plan.Deployment == nil { 1621 t.Fatalf("bad: %#v", plan) 1622 } 1623 state, ok := plan.Deployment.TaskGroups[job.TaskGroups[0].Name] 1624 if !ok { 1625 t.Fatalf("bad: %#v", plan) 1626 } 1627 if state.DesiredTotal != 10 && state.DesiredCanaries != 0 { 1628 t.Fatalf("bad: %#v", state) 1629 } 1630 } 1631 1632 // This tests that the old allocation is stopped before placing. 1633 // It is critical to test that the updated job attempts to place more 1634 // allocations as this allows us to assert that destructive changes are done 1635 // first. 1636 func TestServiceSched_JobModify_Rolling_FullNode(t *testing.T) { 1637 h := NewHarness(t) 1638 1639 // Create a node 1640 node := mock.Node() 1641 noErr(t, h.State.UpsertNode(h.NextIndex(), node)) 1642 1643 resourceAsk := node.Resources.Copy() 1644 resourceAsk.CPU -= node.Reserved.CPU 1645 resourceAsk.MemoryMB -= node.Reserved.MemoryMB 1646 resourceAsk.DiskMB -= node.Reserved.DiskMB 1647 resourceAsk.Networks = nil 1648 1649 // Generate a fake job with one alloc that consumes the whole node 1650 job := mock.Job() 1651 job.TaskGroups[0].Count = 1 1652 job.TaskGroups[0].Tasks[0].Resources = resourceAsk 1653 noErr(t, h.State.UpsertJob(h.NextIndex(), job)) 1654 1655 alloc := mock.Alloc() 1656 alloc.Resources = resourceAsk 1657 alloc.Job = job 1658 alloc.JobID = job.ID 1659 alloc.NodeID = node.ID 1660 alloc.Name = "my-job.web[0]" 1661 noErr(t, h.State.UpsertAllocs(h.NextIndex(), []*structs.Allocation{alloc})) 1662 1663 // Update the job to place more versions of the task group, drop the count 1664 // and force destructive updates 1665 job2 := job.Copy() 1666 job2.TaskGroups[0].Count = 5 1667 job2.TaskGroups[0].Update = &structs.UpdateStrategy{ 1668 MaxParallel: 5, 1669 HealthCheck: structs.UpdateStrategyHealthCheck_Checks, 1670 MinHealthyTime: 10 * time.Second, 1671 HealthyDeadline: 10 * time.Minute, 1672 } 1673 job2.TaskGroups[0].Tasks[0].Resources = mock.Alloc().Resources 1674 1675 // Update the task, such that it cannot be done in-place 1676 job2.TaskGroups[0].Tasks[0].Config["command"] = "/bin/other" 1677 noErr(t, h.State.UpsertJob(h.NextIndex(), job2)) 1678 1679 eval := &structs.Evaluation{ 1680 Namespace: structs.DefaultNamespace, 1681 ID: uuid.Generate(), 1682 Priority: 50, 1683 TriggeredBy: structs.EvalTriggerJobRegister, 1684 JobID: job.ID, 1685 Status: structs.EvalStatusPending, 1686 } 1687 noErr(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 1688 1689 // Process the evaluation 1690 err := h.Process(NewServiceScheduler, eval) 1691 if err != nil { 1692 t.Fatalf("err: %v", err) 1693 } 1694 1695 // Ensure a single plan 1696 if len(h.Plans) != 1 { 1697 t.Fatalf("bad: %#v", h.Plans) 1698 } 1699 plan := h.Plans[0] 1700 1701 // Ensure the plan evicted only MaxParallel 1702 var update []*structs.Allocation 1703 for _, updateList := range plan.NodeUpdate { 1704 update = append(update, updateList...) 1705 } 1706 if len(update) != 1 { 1707 t.Fatalf("bad: got %d; want %d: %#v", len(update), 1, plan) 1708 } 1709 1710 // Ensure the plan allocated 1711 var planned []*structs.Allocation 1712 for _, allocList := range plan.NodeAllocation { 1713 planned = append(planned, allocList...) 1714 } 1715 if len(planned) != 1 { 1716 t.Fatalf("bad: %#v", plan) 1717 } 1718 1719 h.AssertEvalStatus(t, structs.EvalStatusComplete) 1720 1721 // Check that the deployment id is attached to the eval 1722 if h.Evals[0].DeploymentID == "" { 1723 t.Fatalf("Eval not annotated with deployment id") 1724 } 1725 1726 // Ensure a deployment was created 1727 if plan.Deployment == nil { 1728 t.Fatalf("bad: %#v", plan) 1729 } 1730 state, ok := plan.Deployment.TaskGroups[job.TaskGroups[0].Name] 1731 if !ok { 1732 t.Fatalf("bad: %#v", plan) 1733 } 1734 if state.DesiredTotal != 1 && state.DesiredCanaries != 0 { 1735 t.Fatalf("bad: %#v", state) 1736 } 1737 } 1738 1739 func TestServiceSched_JobModify_Canaries(t *testing.T) { 1740 h := NewHarness(t) 1741 1742 // Create some nodes 1743 var nodes []*structs.Node 1744 for i := 0; i < 10; i++ { 1745 node := mock.Node() 1746 nodes = append(nodes, node) 1747 noErr(t, h.State.UpsertNode(h.NextIndex(), node)) 1748 } 1749 1750 // Generate a fake job with allocations 1751 job := mock.Job() 1752 noErr(t, h.State.UpsertJob(h.NextIndex(), job)) 1753 1754 var allocs []*structs.Allocation 1755 for i := 0; i < 10; i++ { 1756 alloc := mock.Alloc() 1757 alloc.Job = job 1758 alloc.JobID = job.ID 1759 alloc.NodeID = nodes[i].ID 1760 alloc.Name = fmt.Sprintf("my-job.web[%d]", i) 1761 allocs = append(allocs, alloc) 1762 } 1763 noErr(t, h.State.UpsertAllocs(h.NextIndex(), allocs)) 1764 1765 // Update the job 1766 job2 := mock.Job() 1767 job2.ID = job.ID 1768 desiredUpdates := 2 1769 job2.TaskGroups[0].Update = &structs.UpdateStrategy{ 1770 MaxParallel: desiredUpdates, 1771 Canary: desiredUpdates, 1772 HealthCheck: structs.UpdateStrategyHealthCheck_Checks, 1773 MinHealthyTime: 10 * time.Second, 1774 HealthyDeadline: 10 * time.Minute, 1775 } 1776 1777 // Update the task, such that it cannot be done in-place 1778 job2.TaskGroups[0].Tasks[0].Config["command"] = "/bin/other" 1779 noErr(t, h.State.UpsertJob(h.NextIndex(), job2)) 1780 1781 // Create a mock evaluation to deal with drain 1782 eval := &structs.Evaluation{ 1783 Namespace: structs.DefaultNamespace, 1784 ID: uuid.Generate(), 1785 Priority: 50, 1786 TriggeredBy: structs.EvalTriggerJobRegister, 1787 JobID: job.ID, 1788 Status: structs.EvalStatusPending, 1789 } 1790 noErr(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 1791 1792 // Process the evaluation 1793 err := h.Process(NewServiceScheduler, eval) 1794 if err != nil { 1795 t.Fatalf("err: %v", err) 1796 } 1797 1798 // Ensure a single plan 1799 if len(h.Plans) != 1 { 1800 t.Fatalf("bad: %#v", h.Plans) 1801 } 1802 plan := h.Plans[0] 1803 1804 // Ensure the plan evicted nothing 1805 var update []*structs.Allocation 1806 for _, updateList := range plan.NodeUpdate { 1807 update = append(update, updateList...) 1808 } 1809 if len(update) != 0 { 1810 t.Fatalf("bad: got %d; want %d: %#v", len(update), 0, plan) 1811 } 1812 1813 // Ensure the plan allocated 1814 var planned []*structs.Allocation 1815 for _, allocList := range plan.NodeAllocation { 1816 planned = append(planned, allocList...) 1817 } 1818 if len(planned) != desiredUpdates { 1819 t.Fatalf("bad: %#v", plan) 1820 } 1821 for _, canary := range planned { 1822 if canary.DeploymentStatus == nil || !canary.DeploymentStatus.Canary { 1823 t.Fatalf("expected canary field to be set on canary alloc %q", canary.ID) 1824 } 1825 } 1826 1827 h.AssertEvalStatus(t, structs.EvalStatusComplete) 1828 1829 // Check that the deployment id is attached to the eval 1830 if h.Evals[0].DeploymentID == "" { 1831 t.Fatalf("Eval not annotated with deployment id") 1832 } 1833 1834 // Ensure a deployment was created 1835 if plan.Deployment == nil { 1836 t.Fatalf("bad: %#v", plan) 1837 } 1838 state, ok := plan.Deployment.TaskGroups[job.TaskGroups[0].Name] 1839 if !ok { 1840 t.Fatalf("bad: %#v", plan) 1841 } 1842 if state.DesiredTotal != 10 && state.DesiredCanaries != desiredUpdates { 1843 t.Fatalf("bad: %#v", state) 1844 } 1845 1846 // Assert the canaries were added to the placed list 1847 if len(state.PlacedCanaries) != desiredUpdates { 1848 t.Fatalf("bad: %#v", state) 1849 } 1850 } 1851 1852 func TestServiceSched_JobModify_InPlace(t *testing.T) { 1853 h := NewHarness(t) 1854 1855 // Create some nodes 1856 var nodes []*structs.Node 1857 for i := 0; i < 10; i++ { 1858 node := mock.Node() 1859 nodes = append(nodes, node) 1860 noErr(t, h.State.UpsertNode(h.NextIndex(), node)) 1861 } 1862 1863 // Generate a fake job with allocations and create an older deployment 1864 job := mock.Job() 1865 d := mock.Deployment() 1866 d.JobID = job.ID 1867 noErr(t, h.State.UpsertJob(h.NextIndex(), job)) 1868 noErr(t, h.State.UpsertDeployment(h.NextIndex(), d)) 1869 1870 // Create allocs that are part of the old deployment 1871 var allocs []*structs.Allocation 1872 for i := 0; i < 10; i++ { 1873 alloc := mock.Alloc() 1874 alloc.Job = job 1875 alloc.JobID = job.ID 1876 alloc.NodeID = nodes[i].ID 1877 alloc.Name = fmt.Sprintf("my-job.web[%d]", i) 1878 alloc.DeploymentID = d.ID 1879 alloc.DeploymentStatus = &structs.AllocDeploymentStatus{Healthy: helper.BoolToPtr(true)} 1880 allocs = append(allocs, alloc) 1881 } 1882 noErr(t, h.State.UpsertAllocs(h.NextIndex(), allocs)) 1883 1884 // Update the job 1885 job2 := mock.Job() 1886 job2.ID = job.ID 1887 desiredUpdates := 4 1888 job2.TaskGroups[0].Update = &structs.UpdateStrategy{ 1889 MaxParallel: desiredUpdates, 1890 HealthCheck: structs.UpdateStrategyHealthCheck_Checks, 1891 MinHealthyTime: 10 * time.Second, 1892 HealthyDeadline: 10 * time.Minute, 1893 } 1894 noErr(t, h.State.UpsertJob(h.NextIndex(), job2)) 1895 1896 // Create a mock evaluation to deal with drain 1897 eval := &structs.Evaluation{ 1898 Namespace: structs.DefaultNamespace, 1899 ID: uuid.Generate(), 1900 Priority: 50, 1901 TriggeredBy: structs.EvalTriggerJobRegister, 1902 JobID: job.ID, 1903 Status: structs.EvalStatusPending, 1904 } 1905 noErr(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 1906 1907 // Process the evaluation 1908 err := h.Process(NewServiceScheduler, eval) 1909 if err != nil { 1910 t.Fatalf("err: %v", err) 1911 } 1912 1913 // Ensure a single plan 1914 if len(h.Plans) != 1 { 1915 t.Fatalf("bad: %#v", h.Plans) 1916 } 1917 plan := h.Plans[0] 1918 1919 // Ensure the plan did not evict any allocs 1920 var update []*structs.Allocation 1921 for _, updateList := range plan.NodeUpdate { 1922 update = append(update, updateList...) 1923 } 1924 if len(update) != 0 { 1925 t.Fatalf("bad: %#v", plan) 1926 } 1927 1928 // Ensure the plan updated the existing allocs 1929 var planned []*structs.Allocation 1930 for _, allocList := range plan.NodeAllocation { 1931 planned = append(planned, allocList...) 1932 } 1933 if len(planned) != 10 { 1934 t.Fatalf("bad: %#v", plan) 1935 } 1936 for _, p := range planned { 1937 if p.Job != job2 { 1938 t.Fatalf("should update job") 1939 } 1940 } 1941 1942 // Lookup the allocations by JobID 1943 ws := memdb.NewWatchSet() 1944 out, err := h.State.AllocsByJob(ws, job.Namespace, job.ID, false) 1945 noErr(t, err) 1946 1947 // Ensure all allocations placed 1948 if len(out) != 10 { 1949 t.Fatalf("bad: %#v", out) 1950 } 1951 h.AssertEvalStatus(t, structs.EvalStatusComplete) 1952 1953 // Verify the network did not change 1954 rp := structs.Port{Label: "admin", Value: 5000} 1955 for _, alloc := range out { 1956 for _, resources := range alloc.TaskResources { 1957 if resources.Networks[0].ReservedPorts[0] != rp { 1958 t.Fatalf("bad: %#v", alloc) 1959 } 1960 } 1961 } 1962 1963 // Verify the deployment id was changed and health cleared 1964 for _, alloc := range out { 1965 if alloc.DeploymentID == d.ID { 1966 t.Fatalf("bad: deployment id not cleared") 1967 } else if alloc.DeploymentStatus != nil { 1968 t.Fatalf("bad: deployment status not cleared") 1969 } 1970 } 1971 } 1972 1973 func TestServiceSched_JobModify_DistinctProperty(t *testing.T) { 1974 h := NewHarness(t) 1975 1976 // Create some nodes 1977 var nodes []*structs.Node 1978 for i := 0; i < 10; i++ { 1979 node := mock.Node() 1980 node.Meta["rack"] = fmt.Sprintf("rack%d", i) 1981 nodes = append(nodes, node) 1982 noErr(t, h.State.UpsertNode(h.NextIndex(), node)) 1983 } 1984 1985 // Create a job that uses distinct property and has count higher than what is 1986 // possible. 1987 job := mock.Job() 1988 job.TaskGroups[0].Count = 11 1989 job.Constraints = append(job.Constraints, 1990 &structs.Constraint{ 1991 Operand: structs.ConstraintDistinctProperty, 1992 LTarget: "${meta.rack}", 1993 }) 1994 noErr(t, h.State.UpsertJob(h.NextIndex(), job)) 1995 1996 oldJob := job.Copy() 1997 oldJob.JobModifyIndex -= 1 1998 oldJob.TaskGroups[0].Count = 4 1999 2000 // Place 4 of 10 2001 var allocs []*structs.Allocation 2002 for i := 0; i < 4; i++ { 2003 alloc := mock.Alloc() 2004 alloc.Job = oldJob 2005 alloc.JobID = job.ID 2006 alloc.NodeID = nodes[i].ID 2007 alloc.Name = fmt.Sprintf("my-job.web[%d]", i) 2008 allocs = append(allocs, alloc) 2009 } 2010 noErr(t, h.State.UpsertAllocs(h.NextIndex(), allocs)) 2011 2012 // Create a mock evaluation to register the job 2013 eval := &structs.Evaluation{ 2014 Namespace: structs.DefaultNamespace, 2015 ID: uuid.Generate(), 2016 Priority: job.Priority, 2017 TriggeredBy: structs.EvalTriggerJobRegister, 2018 JobID: job.ID, 2019 Status: structs.EvalStatusPending, 2020 } 2021 noErr(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 2022 2023 // Process the evaluation 2024 err := h.Process(NewServiceScheduler, eval) 2025 if err != nil { 2026 t.Fatalf("err: %v", err) 2027 } 2028 2029 // Ensure a single plan 2030 if len(h.Plans) != 1 { 2031 t.Fatalf("bad: %#v", h.Plans) 2032 } 2033 plan := h.Plans[0] 2034 2035 // Ensure the plan doesn't have annotations. 2036 if plan.Annotations != nil { 2037 t.Fatalf("expected no annotations") 2038 } 2039 2040 // Ensure the eval hasn't spawned blocked eval 2041 if len(h.CreateEvals) != 1 { 2042 t.Fatalf("bad: %#v", h.CreateEvals) 2043 } 2044 2045 // Ensure the plan failed to alloc 2046 outEval := h.Evals[0] 2047 if len(outEval.FailedTGAllocs) != 1 { 2048 t.Fatalf("bad: %+v", outEval) 2049 } 2050 2051 // Ensure the plan allocated 2052 var planned []*structs.Allocation 2053 for _, allocList := range plan.NodeAllocation { 2054 planned = append(planned, allocList...) 2055 } 2056 if len(planned) != 10 { 2057 t.Fatalf("bad: %#v", planned) 2058 } 2059 2060 // Lookup the allocations by JobID 2061 ws := memdb.NewWatchSet() 2062 out, err := h.State.AllocsByJob(ws, job.Namespace, job.ID, false) 2063 noErr(t, err) 2064 2065 // Ensure all allocations placed 2066 if len(out) != 10 { 2067 t.Fatalf("bad: %#v", out) 2068 } 2069 2070 // Ensure different node was used per. 2071 used := make(map[string]struct{}) 2072 for _, alloc := range out { 2073 if _, ok := used[alloc.NodeID]; ok { 2074 t.Fatalf("Node collision %v", alloc.NodeID) 2075 } 2076 used[alloc.NodeID] = struct{}{} 2077 } 2078 2079 h.AssertEvalStatus(t, structs.EvalStatusComplete) 2080 } 2081 2082 func TestServiceSched_JobDeregister_Purged(t *testing.T) { 2083 h := NewHarness(t) 2084 2085 // Generate a fake job with allocations 2086 job := mock.Job() 2087 2088 var allocs []*structs.Allocation 2089 for i := 0; i < 10; i++ { 2090 alloc := mock.Alloc() 2091 alloc.Job = job 2092 alloc.JobID = job.ID 2093 allocs = append(allocs, alloc) 2094 } 2095 for _, alloc := range allocs { 2096 h.State.UpsertJobSummary(h.NextIndex(), mock.JobSummary(alloc.JobID)) 2097 } 2098 noErr(t, h.State.UpsertAllocs(h.NextIndex(), allocs)) 2099 2100 // Create a mock evaluation to deregister the job 2101 eval := &structs.Evaluation{ 2102 Namespace: structs.DefaultNamespace, 2103 ID: uuid.Generate(), 2104 Priority: 50, 2105 TriggeredBy: structs.EvalTriggerJobDeregister, 2106 JobID: job.ID, 2107 Status: structs.EvalStatusPending, 2108 } 2109 noErr(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 2110 2111 // Process the evaluation 2112 err := h.Process(NewServiceScheduler, eval) 2113 if err != nil { 2114 t.Fatalf("err: %v", err) 2115 } 2116 2117 // Ensure a single plan 2118 if len(h.Plans) != 1 { 2119 t.Fatalf("bad: %#v", h.Plans) 2120 } 2121 plan := h.Plans[0] 2122 2123 // Ensure the plan evicted all nodes 2124 if len(plan.NodeUpdate["12345678-abcd-efab-cdef-123456789abc"]) != len(allocs) { 2125 t.Fatalf("bad: %#v", plan) 2126 } 2127 2128 // Lookup the allocations by JobID 2129 ws := memdb.NewWatchSet() 2130 out, err := h.State.AllocsByJob(ws, job.Namespace, job.ID, false) 2131 noErr(t, err) 2132 2133 // Ensure that the job field on the allocation is still populated 2134 for _, alloc := range out { 2135 if alloc.Job == nil { 2136 t.Fatalf("bad: %#v", alloc) 2137 } 2138 } 2139 2140 // Ensure no remaining allocations 2141 out, _ = structs.FilterTerminalAllocs(out) 2142 if len(out) != 0 { 2143 t.Fatalf("bad: %#v", out) 2144 } 2145 2146 h.AssertEvalStatus(t, structs.EvalStatusComplete) 2147 } 2148 2149 func TestServiceSched_JobDeregister_Stopped(t *testing.T) { 2150 h := NewHarness(t) 2151 require := require.New(t) 2152 2153 // Generate a fake job with allocations 2154 job := mock.Job() 2155 job.Stop = true 2156 require.NoError(h.State.UpsertJob(h.NextIndex(), job)) 2157 2158 var allocs []*structs.Allocation 2159 for i := 0; i < 10; i++ { 2160 alloc := mock.Alloc() 2161 alloc.Job = job 2162 alloc.JobID = job.ID 2163 allocs = append(allocs, alloc) 2164 } 2165 require.NoError(h.State.UpsertAllocs(h.NextIndex(), allocs)) 2166 2167 // Create a summary where the queued allocs are set as we want to assert 2168 // they get zeroed out. 2169 summary := mock.JobSummary(job.ID) 2170 web := summary.Summary["web"] 2171 web.Queued = 2 2172 require.NoError(h.State.UpsertJobSummary(h.NextIndex(), summary)) 2173 2174 // Create a mock evaluation to deregister the job 2175 eval := &structs.Evaluation{ 2176 Namespace: structs.DefaultNamespace, 2177 ID: uuid.Generate(), 2178 Priority: 50, 2179 TriggeredBy: structs.EvalTriggerJobDeregister, 2180 JobID: job.ID, 2181 Status: structs.EvalStatusPending, 2182 } 2183 require.NoError(h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 2184 2185 // Process the evaluation 2186 require.NoError(h.Process(NewServiceScheduler, eval)) 2187 2188 // Ensure a single plan 2189 require.Len(h.Plans, 1) 2190 plan := h.Plans[0] 2191 2192 // Ensure the plan evicted all nodes 2193 require.Len(plan.NodeUpdate["12345678-abcd-efab-cdef-123456789abc"], len(allocs)) 2194 2195 // Lookup the allocations by JobID 2196 ws := memdb.NewWatchSet() 2197 out, err := h.State.AllocsByJob(ws, job.Namespace, job.ID, false) 2198 require.NoError(err) 2199 2200 // Ensure that the job field on the allocation is still populated 2201 for _, alloc := range out { 2202 require.NotNil(alloc.Job) 2203 } 2204 2205 // Ensure no remaining allocations 2206 out, _ = structs.FilterTerminalAllocs(out) 2207 require.Empty(out) 2208 2209 // Assert the job summary is cleared out 2210 sout, err := h.State.JobSummaryByID(ws, job.Namespace, job.ID) 2211 require.NoError(err) 2212 require.NotNil(sout) 2213 require.Contains(sout.Summary, "web") 2214 webOut := sout.Summary["web"] 2215 require.Zero(webOut.Queued) 2216 2217 h.AssertEvalStatus(t, structs.EvalStatusComplete) 2218 } 2219 2220 func TestServiceSched_NodeDown(t *testing.T) { 2221 h := NewHarness(t) 2222 2223 // Register a node 2224 node := mock.Node() 2225 node.Status = structs.NodeStatusDown 2226 noErr(t, h.State.UpsertNode(h.NextIndex(), node)) 2227 2228 // Generate a fake job with allocations and an update policy. 2229 job := mock.Job() 2230 noErr(t, h.State.UpsertJob(h.NextIndex(), job)) 2231 2232 var allocs []*structs.Allocation 2233 for i := 0; i < 10; i++ { 2234 alloc := mock.Alloc() 2235 alloc.Job = job 2236 alloc.JobID = job.ID 2237 alloc.NodeID = node.ID 2238 alloc.Name = fmt.Sprintf("my-job.web[%d]", i) 2239 allocs = append(allocs, alloc) 2240 } 2241 2242 // Cover each terminal case and ensure it doesn't change to lost 2243 allocs[7].DesiredStatus = structs.AllocDesiredStatusRun 2244 allocs[7].ClientStatus = structs.AllocClientStatusLost 2245 allocs[8].DesiredStatus = structs.AllocDesiredStatusRun 2246 allocs[8].ClientStatus = structs.AllocClientStatusFailed 2247 allocs[9].DesiredStatus = structs.AllocDesiredStatusRun 2248 allocs[9].ClientStatus = structs.AllocClientStatusComplete 2249 2250 // Mark some allocs as running 2251 for i := 0; i < 4; i++ { 2252 out := allocs[i] 2253 out.ClientStatus = structs.AllocClientStatusRunning 2254 } 2255 2256 // Mark appropriate allocs for migration 2257 for i := 0; i < 7; i++ { 2258 out := allocs[i] 2259 out.DesiredTransition.Migrate = helper.BoolToPtr(true) 2260 } 2261 2262 noErr(t, h.State.UpsertAllocs(h.NextIndex(), allocs)) 2263 2264 // Create a mock evaluation to deal with drain 2265 eval := &structs.Evaluation{ 2266 Namespace: structs.DefaultNamespace, 2267 ID: uuid.Generate(), 2268 Priority: 50, 2269 TriggeredBy: structs.EvalTriggerNodeUpdate, 2270 JobID: job.ID, 2271 NodeID: node.ID, 2272 Status: structs.EvalStatusPending, 2273 } 2274 noErr(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 2275 2276 // Process the evaluation 2277 err := h.Process(NewServiceScheduler, eval) 2278 if err != nil { 2279 t.Fatalf("err: %v", err) 2280 } 2281 2282 // Ensure a single plan 2283 if len(h.Plans) != 1 { 2284 t.Fatalf("bad: %#v", h.Plans) 2285 } 2286 plan := h.Plans[0] 2287 2288 // Test the scheduler marked all non-terminal allocations as lost 2289 if len(plan.NodeUpdate[node.ID]) != 7 { 2290 t.Fatalf("bad: %#v", plan) 2291 } 2292 2293 for _, out := range plan.NodeUpdate[node.ID] { 2294 if out.ClientStatus != structs.AllocClientStatusLost && out.DesiredStatus != structs.AllocDesiredStatusStop { 2295 t.Fatalf("bad alloc: %#v", out) 2296 } 2297 } 2298 2299 h.AssertEvalStatus(t, structs.EvalStatusComplete) 2300 } 2301 2302 func TestServiceSched_NodeUpdate(t *testing.T) { 2303 h := NewHarness(t) 2304 2305 // Register a node 2306 node := mock.Node() 2307 noErr(t, h.State.UpsertNode(h.NextIndex(), node)) 2308 2309 // Generate a fake job with allocations and an update policy. 2310 job := mock.Job() 2311 noErr(t, h.State.UpsertJob(h.NextIndex(), job)) 2312 2313 var allocs []*structs.Allocation 2314 for i := 0; i < 10; i++ { 2315 alloc := mock.Alloc() 2316 alloc.Job = job 2317 alloc.JobID = job.ID 2318 alloc.NodeID = node.ID 2319 alloc.Name = fmt.Sprintf("my-job.web[%d]", i) 2320 allocs = append(allocs, alloc) 2321 } 2322 noErr(t, h.State.UpsertAllocs(h.NextIndex(), allocs)) 2323 2324 // Mark some allocs as running 2325 ws := memdb.NewWatchSet() 2326 for i := 0; i < 4; i++ { 2327 out, _ := h.State.AllocByID(ws, allocs[i].ID) 2328 out.ClientStatus = structs.AllocClientStatusRunning 2329 noErr(t, h.State.UpdateAllocsFromClient(h.NextIndex(), []*structs.Allocation{out})) 2330 } 2331 2332 // Create a mock evaluation which won't trigger any new placements 2333 eval := &structs.Evaluation{ 2334 Namespace: structs.DefaultNamespace, 2335 ID: uuid.Generate(), 2336 Priority: 50, 2337 TriggeredBy: structs.EvalTriggerNodeUpdate, 2338 JobID: job.ID, 2339 NodeID: node.ID, 2340 Status: structs.EvalStatusPending, 2341 } 2342 noErr(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 2343 2344 // Process the evaluation 2345 err := h.Process(NewServiceScheduler, eval) 2346 if err != nil { 2347 t.Fatalf("err: %v", err) 2348 } 2349 if val, ok := h.Evals[0].QueuedAllocations["web"]; !ok || val != 0 { 2350 t.Fatalf("bad queued allocations: %v", h.Evals[0].QueuedAllocations) 2351 } 2352 2353 h.AssertEvalStatus(t, structs.EvalStatusComplete) 2354 } 2355 2356 func TestServiceSched_NodeDrain(t *testing.T) { 2357 h := NewHarness(t) 2358 2359 // Register a draining node 2360 node := mock.Node() 2361 node.Drain = true 2362 noErr(t, h.State.UpsertNode(h.NextIndex(), node)) 2363 2364 // Create some nodes 2365 for i := 0; i < 10; i++ { 2366 node := mock.Node() 2367 noErr(t, h.State.UpsertNode(h.NextIndex(), node)) 2368 } 2369 2370 // Generate a fake job with allocations and an update policy. 2371 job := mock.Job() 2372 noErr(t, h.State.UpsertJob(h.NextIndex(), job)) 2373 2374 var allocs []*structs.Allocation 2375 for i := 0; i < 10; i++ { 2376 alloc := mock.Alloc() 2377 alloc.Job = job 2378 alloc.JobID = job.ID 2379 alloc.NodeID = node.ID 2380 alloc.Name = fmt.Sprintf("my-job.web[%d]", i) 2381 alloc.DesiredTransition.Migrate = helper.BoolToPtr(true) 2382 allocs = append(allocs, alloc) 2383 } 2384 noErr(t, h.State.UpsertAllocs(h.NextIndex(), allocs)) 2385 2386 // Create a mock evaluation to deal with drain 2387 eval := &structs.Evaluation{ 2388 Namespace: structs.DefaultNamespace, 2389 ID: uuid.Generate(), 2390 Priority: 50, 2391 TriggeredBy: structs.EvalTriggerNodeUpdate, 2392 JobID: job.ID, 2393 NodeID: node.ID, 2394 Status: structs.EvalStatusPending, 2395 } 2396 noErr(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 2397 2398 // Process the evaluation 2399 err := h.Process(NewServiceScheduler, eval) 2400 if err != nil { 2401 t.Fatalf("err: %v", err) 2402 } 2403 2404 // Ensure a single plan 2405 if len(h.Plans) != 1 { 2406 t.Fatalf("bad: %#v", h.Plans) 2407 } 2408 plan := h.Plans[0] 2409 2410 // Ensure the plan evicted all allocs 2411 if len(plan.NodeUpdate[node.ID]) != len(allocs) { 2412 t.Fatalf("bad: %#v", plan) 2413 } 2414 2415 // Ensure the plan allocated 2416 var planned []*structs.Allocation 2417 for _, allocList := range plan.NodeAllocation { 2418 planned = append(planned, allocList...) 2419 } 2420 if len(planned) != 10 { 2421 t.Fatalf("bad: %#v", plan) 2422 } 2423 2424 // Lookup the allocations by JobID 2425 ws := memdb.NewWatchSet() 2426 out, err := h.State.AllocsByJob(ws, job.Namespace, job.ID, false) 2427 noErr(t, err) 2428 2429 // Ensure all allocations placed 2430 out, _ = structs.FilterTerminalAllocs(out) 2431 if len(out) != 10 { 2432 t.Fatalf("bad: %#v", out) 2433 } 2434 2435 h.AssertEvalStatus(t, structs.EvalStatusComplete) 2436 } 2437 2438 func TestServiceSched_NodeDrain_Down(t *testing.T) { 2439 h := NewHarness(t) 2440 2441 // Register a draining node 2442 node := mock.Node() 2443 node.Drain = true 2444 node.Status = structs.NodeStatusDown 2445 noErr(t, h.State.UpsertNode(h.NextIndex(), node)) 2446 2447 // Generate a fake job with allocations 2448 job := mock.Job() 2449 noErr(t, h.State.UpsertJob(h.NextIndex(), job)) 2450 2451 var allocs []*structs.Allocation 2452 for i := 0; i < 10; i++ { 2453 alloc := mock.Alloc() 2454 alloc.Job = job 2455 alloc.JobID = job.ID 2456 alloc.NodeID = node.ID 2457 alloc.Name = fmt.Sprintf("my-job.web[%d]", i) 2458 allocs = append(allocs, alloc) 2459 } 2460 noErr(t, h.State.UpsertAllocs(h.NextIndex(), allocs)) 2461 2462 // Set the desired state of the allocs to stop 2463 var stop []*structs.Allocation 2464 for i := 0; i < 6; i++ { 2465 newAlloc := allocs[i].Copy() 2466 newAlloc.ClientStatus = structs.AllocDesiredStatusStop 2467 newAlloc.DesiredTransition.Migrate = helper.BoolToPtr(true) 2468 stop = append(stop, newAlloc) 2469 } 2470 noErr(t, h.State.UpsertAllocs(h.NextIndex(), stop)) 2471 2472 // Mark some of the allocations as running 2473 var running []*structs.Allocation 2474 for i := 4; i < 6; i++ { 2475 newAlloc := stop[i].Copy() 2476 newAlloc.ClientStatus = structs.AllocClientStatusRunning 2477 running = append(running, newAlloc) 2478 } 2479 noErr(t, h.State.UpdateAllocsFromClient(h.NextIndex(), running)) 2480 2481 // Mark some of the allocations as complete 2482 var complete []*structs.Allocation 2483 for i := 6; i < 10; i++ { 2484 newAlloc := allocs[i].Copy() 2485 newAlloc.TaskStates = make(map[string]*structs.TaskState) 2486 newAlloc.TaskStates["web"] = &structs.TaskState{ 2487 State: structs.TaskStateDead, 2488 Events: []*structs.TaskEvent{ 2489 { 2490 Type: structs.TaskTerminated, 2491 ExitCode: 0, 2492 }, 2493 }, 2494 } 2495 newAlloc.ClientStatus = structs.AllocClientStatusComplete 2496 complete = append(complete, newAlloc) 2497 } 2498 noErr(t, h.State.UpdateAllocsFromClient(h.NextIndex(), complete)) 2499 2500 // Create a mock evaluation to deal with the node update 2501 eval := &structs.Evaluation{ 2502 Namespace: structs.DefaultNamespace, 2503 ID: uuid.Generate(), 2504 Priority: 50, 2505 TriggeredBy: structs.EvalTriggerNodeUpdate, 2506 JobID: job.ID, 2507 NodeID: node.ID, 2508 Status: structs.EvalStatusPending, 2509 } 2510 2511 noErr(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 2512 2513 // Process the evaluation 2514 err := h.Process(NewServiceScheduler, eval) 2515 if err != nil { 2516 t.Fatalf("err: %v", err) 2517 } 2518 2519 // Ensure a single plan 2520 if len(h.Plans) != 1 { 2521 t.Fatalf("bad: %#v", h.Plans) 2522 } 2523 plan := h.Plans[0] 2524 2525 // Ensure the plan evicted non terminal allocs 2526 if len(plan.NodeUpdate[node.ID]) != 6 { 2527 t.Fatalf("bad: %#v", plan) 2528 } 2529 2530 // Ensure that all the allocations which were in running or pending state 2531 // has been marked as lost 2532 var lostAllocs []string 2533 for _, alloc := range plan.NodeUpdate[node.ID] { 2534 lostAllocs = append(lostAllocs, alloc.ID) 2535 } 2536 sort.Strings(lostAllocs) 2537 2538 var expectedLostAllocs []string 2539 for i := 0; i < 6; i++ { 2540 expectedLostAllocs = append(expectedLostAllocs, allocs[i].ID) 2541 } 2542 sort.Strings(expectedLostAllocs) 2543 2544 if !reflect.DeepEqual(expectedLostAllocs, lostAllocs) { 2545 t.Fatalf("expected: %v, actual: %v", expectedLostAllocs, lostAllocs) 2546 } 2547 2548 h.AssertEvalStatus(t, structs.EvalStatusComplete) 2549 } 2550 2551 func TestServiceSched_NodeDrain_Queued_Allocations(t *testing.T) { 2552 h := NewHarness(t) 2553 2554 // Register a draining node 2555 node := mock.Node() 2556 noErr(t, h.State.UpsertNode(h.NextIndex(), node)) 2557 2558 // Generate a fake job with allocations and an update policy. 2559 job := mock.Job() 2560 job.TaskGroups[0].Count = 2 2561 noErr(t, h.State.UpsertJob(h.NextIndex(), job)) 2562 2563 var allocs []*structs.Allocation 2564 for i := 0; i < 2; i++ { 2565 alloc := mock.Alloc() 2566 alloc.Job = job 2567 alloc.JobID = job.ID 2568 alloc.NodeID = node.ID 2569 alloc.Name = fmt.Sprintf("my-job.web[%d]", i) 2570 alloc.DesiredTransition.Migrate = helper.BoolToPtr(true) 2571 allocs = append(allocs, alloc) 2572 } 2573 noErr(t, h.State.UpsertAllocs(h.NextIndex(), allocs)) 2574 2575 node.Drain = true 2576 noErr(t, h.State.UpsertNode(h.NextIndex(), node)) 2577 2578 // Create a mock evaluation to deal with drain 2579 eval := &structs.Evaluation{ 2580 Namespace: structs.DefaultNamespace, 2581 ID: uuid.Generate(), 2582 Priority: 50, 2583 TriggeredBy: structs.EvalTriggerNodeUpdate, 2584 JobID: job.ID, 2585 NodeID: node.ID, 2586 Status: structs.EvalStatusPending, 2587 } 2588 noErr(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 2589 2590 // Process the evaluation 2591 err := h.Process(NewServiceScheduler, eval) 2592 if err != nil { 2593 t.Fatalf("err: %v", err) 2594 } 2595 2596 queued := h.Evals[0].QueuedAllocations["web"] 2597 if queued != 2 { 2598 t.Fatalf("expected: %v, actual: %v", 2, queued) 2599 } 2600 } 2601 2602 func TestServiceSched_RetryLimit(t *testing.T) { 2603 h := NewHarness(t) 2604 h.Planner = &RejectPlan{h} 2605 2606 // Create some nodes 2607 for i := 0; i < 10; i++ { 2608 node := mock.Node() 2609 noErr(t, h.State.UpsertNode(h.NextIndex(), node)) 2610 } 2611 2612 // Create a job 2613 job := mock.Job() 2614 noErr(t, h.State.UpsertJob(h.NextIndex(), job)) 2615 2616 // Create a mock evaluation to register the job 2617 eval := &structs.Evaluation{ 2618 Namespace: structs.DefaultNamespace, 2619 ID: uuid.Generate(), 2620 Priority: job.Priority, 2621 TriggeredBy: structs.EvalTriggerJobRegister, 2622 JobID: job.ID, 2623 Status: structs.EvalStatusPending, 2624 } 2625 noErr(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 2626 2627 // Process the evaluation 2628 err := h.Process(NewServiceScheduler, eval) 2629 if err != nil { 2630 t.Fatalf("err: %v", err) 2631 } 2632 2633 // Ensure multiple plans 2634 if len(h.Plans) == 0 { 2635 t.Fatalf("bad: %#v", h.Plans) 2636 } 2637 2638 // Lookup the allocations by JobID 2639 ws := memdb.NewWatchSet() 2640 out, err := h.State.AllocsByJob(ws, job.Namespace, job.ID, false) 2641 noErr(t, err) 2642 2643 // Ensure no allocations placed 2644 if len(out) != 0 { 2645 t.Fatalf("bad: %#v", out) 2646 } 2647 2648 // Should hit the retry limit 2649 h.AssertEvalStatus(t, structs.EvalStatusFailed) 2650 } 2651 2652 func TestServiceSched_Reschedule_OnceNow(t *testing.T) { 2653 h := NewHarness(t) 2654 2655 // Create some nodes 2656 var nodes []*structs.Node 2657 for i := 0; i < 10; i++ { 2658 node := mock.Node() 2659 nodes = append(nodes, node) 2660 noErr(t, h.State.UpsertNode(h.NextIndex(), node)) 2661 } 2662 2663 // Generate a fake job with allocations and an update policy. 2664 job := mock.Job() 2665 job.TaskGroups[0].Count = 2 2666 job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{ 2667 Attempts: 1, 2668 Interval: 15 * time.Minute, 2669 Delay: 5 * time.Second, 2670 MaxDelay: 1 * time.Minute, 2671 DelayFunction: "constant", 2672 } 2673 tgName := job.TaskGroups[0].Name 2674 now := time.Now() 2675 2676 noErr(t, h.State.UpsertJob(h.NextIndex(), job)) 2677 2678 var allocs []*structs.Allocation 2679 for i := 0; i < 2; i++ { 2680 alloc := mock.Alloc() 2681 alloc.Job = job 2682 alloc.JobID = job.ID 2683 alloc.NodeID = nodes[i].ID 2684 alloc.Name = fmt.Sprintf("my-job.web[%d]", i) 2685 allocs = append(allocs, alloc) 2686 } 2687 // Mark one of the allocations as failed 2688 allocs[1].ClientStatus = structs.AllocClientStatusFailed 2689 allocs[1].TaskStates = map[string]*structs.TaskState{tgName: {State: "dead", 2690 StartedAt: now.Add(-1 * time.Hour), 2691 FinishedAt: now.Add(-10 * time.Second)}} 2692 failedAllocID := allocs[1].ID 2693 successAllocID := allocs[0].ID 2694 2695 noErr(t, h.State.UpsertAllocs(h.NextIndex(), allocs)) 2696 2697 // Create a mock evaluation 2698 eval := &structs.Evaluation{ 2699 Namespace: structs.DefaultNamespace, 2700 ID: uuid.Generate(), 2701 Priority: 50, 2702 TriggeredBy: structs.EvalTriggerNodeUpdate, 2703 JobID: job.ID, 2704 Status: structs.EvalStatusPending, 2705 } 2706 noErr(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 2707 2708 // Process the evaluation 2709 err := h.Process(NewServiceScheduler, eval) 2710 if err != nil { 2711 t.Fatalf("err: %v", err) 2712 } 2713 2714 // Ensure multiple plans 2715 if len(h.Plans) == 0 { 2716 t.Fatalf("bad: %#v", h.Plans) 2717 } 2718 2719 // Lookup the allocations by JobID 2720 ws := memdb.NewWatchSet() 2721 out, err := h.State.AllocsByJob(ws, job.Namespace, job.ID, false) 2722 noErr(t, err) 2723 2724 // Verify that one new allocation got created with its restart tracker info 2725 assert := assert.New(t) 2726 assert.Equal(3, len(out)) 2727 var newAlloc *structs.Allocation 2728 for _, alloc := range out { 2729 if alloc.ID != successAllocID && alloc.ID != failedAllocID { 2730 newAlloc = alloc 2731 } 2732 } 2733 assert.Equal(failedAllocID, newAlloc.PreviousAllocation) 2734 assert.Equal(1, len(newAlloc.RescheduleTracker.Events)) 2735 assert.Equal(failedAllocID, newAlloc.RescheduleTracker.Events[0].PrevAllocID) 2736 2737 // Mark this alloc as failed again, should not get rescheduled 2738 newAlloc.ClientStatus = structs.AllocClientStatusFailed 2739 2740 noErr(t, h.State.UpsertAllocs(h.NextIndex(), []*structs.Allocation{newAlloc})) 2741 2742 // Create another mock evaluation 2743 eval = &structs.Evaluation{ 2744 Namespace: structs.DefaultNamespace, 2745 ID: uuid.Generate(), 2746 Priority: 50, 2747 TriggeredBy: structs.EvalTriggerNodeUpdate, 2748 JobID: job.ID, 2749 Status: structs.EvalStatusPending, 2750 } 2751 noErr(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 2752 2753 // Process the evaluation 2754 err = h.Process(NewServiceScheduler, eval) 2755 assert.Nil(err) 2756 // Verify no new allocs were created this time 2757 out, err = h.State.AllocsByJob(ws, job.Namespace, job.ID, false) 2758 noErr(t, err) 2759 assert.Equal(3, len(out)) 2760 2761 } 2762 2763 // Tests that alloc reschedulable at a future time creates a follow up eval 2764 func TestServiceSched_Reschedule_Later(t *testing.T) { 2765 h := NewHarness(t) 2766 require := require.New(t) 2767 // Create some nodes 2768 var nodes []*structs.Node 2769 for i := 0; i < 10; i++ { 2770 node := mock.Node() 2771 nodes = append(nodes, node) 2772 noErr(t, h.State.UpsertNode(h.NextIndex(), node)) 2773 } 2774 2775 // Generate a fake job with allocations and an update policy. 2776 job := mock.Job() 2777 job.TaskGroups[0].Count = 2 2778 delayDuration := 15 * time.Second 2779 job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{ 2780 Attempts: 1, 2781 Interval: 15 * time.Minute, 2782 Delay: delayDuration, 2783 MaxDelay: 1 * time.Minute, 2784 DelayFunction: "constant", 2785 } 2786 tgName := job.TaskGroups[0].Name 2787 now := time.Now() 2788 2789 noErr(t, h.State.UpsertJob(h.NextIndex(), job)) 2790 2791 var allocs []*structs.Allocation 2792 for i := 0; i < 2; i++ { 2793 alloc := mock.Alloc() 2794 alloc.Job = job 2795 alloc.JobID = job.ID 2796 alloc.NodeID = nodes[i].ID 2797 alloc.Name = fmt.Sprintf("my-job.web[%d]", i) 2798 allocs = append(allocs, alloc) 2799 } 2800 // Mark one of the allocations as failed 2801 allocs[1].ClientStatus = structs.AllocClientStatusFailed 2802 allocs[1].TaskStates = map[string]*structs.TaskState{tgName: {State: "dead", 2803 StartedAt: now.Add(-1 * time.Hour), 2804 FinishedAt: now}} 2805 failedAllocID := allocs[1].ID 2806 2807 noErr(t, h.State.UpsertAllocs(h.NextIndex(), allocs)) 2808 2809 // Create a mock evaluation 2810 eval := &structs.Evaluation{ 2811 Namespace: structs.DefaultNamespace, 2812 ID: uuid.Generate(), 2813 Priority: 50, 2814 TriggeredBy: structs.EvalTriggerNodeUpdate, 2815 JobID: job.ID, 2816 Status: structs.EvalStatusPending, 2817 } 2818 noErr(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 2819 2820 // Process the evaluation 2821 err := h.Process(NewServiceScheduler, eval) 2822 if err != nil { 2823 t.Fatalf("err: %v", err) 2824 } 2825 2826 // Ensure multiple plans 2827 if len(h.Plans) == 0 { 2828 t.Fatalf("bad: %#v", h.Plans) 2829 } 2830 2831 // Lookup the allocations by JobID 2832 ws := memdb.NewWatchSet() 2833 out, err := h.State.AllocsByJob(ws, job.Namespace, job.ID, false) 2834 noErr(t, err) 2835 2836 // Verify no new allocs were created 2837 require.Equal(2, len(out)) 2838 2839 // Verify follow up eval was created for the failed alloc 2840 alloc, err := h.State.AllocByID(ws, failedAllocID) 2841 require.Nil(err) 2842 require.NotEmpty(alloc.FollowupEvalID) 2843 2844 // Ensure there is a follow up eval. 2845 if len(h.CreateEvals) != 1 || h.CreateEvals[0].Status != structs.EvalStatusPending { 2846 t.Fatalf("bad: %#v", h.CreateEvals) 2847 } 2848 followupEval := h.CreateEvals[0] 2849 require.Equal(now.Add(delayDuration), followupEval.WaitUntil) 2850 } 2851 2852 func TestServiceSched_Reschedule_MultipleNow(t *testing.T) { 2853 h := NewHarness(t) 2854 2855 // Create some nodes 2856 var nodes []*structs.Node 2857 for i := 0; i < 10; i++ { 2858 node := mock.Node() 2859 nodes = append(nodes, node) 2860 noErr(t, h.State.UpsertNode(h.NextIndex(), node)) 2861 } 2862 2863 maxRestartAttempts := 3 2864 // Generate a fake job with allocations and an update policy. 2865 job := mock.Job() 2866 job.TaskGroups[0].Count = 2 2867 job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{ 2868 Attempts: maxRestartAttempts, 2869 Interval: 30 * time.Minute, 2870 Delay: 5 * time.Second, 2871 DelayFunction: "constant", 2872 } 2873 tgName := job.TaskGroups[0].Name 2874 now := time.Now() 2875 2876 noErr(t, h.State.UpsertJob(h.NextIndex(), job)) 2877 2878 var allocs []*structs.Allocation 2879 for i := 0; i < 2; i++ { 2880 alloc := mock.Alloc() 2881 alloc.ClientStatus = structs.AllocClientStatusRunning 2882 alloc.Job = job 2883 alloc.JobID = job.ID 2884 alloc.NodeID = nodes[i].ID 2885 alloc.Name = fmt.Sprintf("my-job.web[%d]", i) 2886 allocs = append(allocs, alloc) 2887 } 2888 // Mark one of the allocations as failed 2889 allocs[1].ClientStatus = structs.AllocClientStatusFailed 2890 allocs[1].TaskStates = map[string]*structs.TaskState{tgName: {State: "dead", 2891 StartedAt: now.Add(-1 * time.Hour), 2892 FinishedAt: now.Add(-10 * time.Second)}} 2893 2894 noErr(t, h.State.UpsertAllocs(h.NextIndex(), allocs)) 2895 2896 // Create a mock evaluation 2897 eval := &structs.Evaluation{ 2898 Namespace: structs.DefaultNamespace, 2899 ID: uuid.Generate(), 2900 Priority: 50, 2901 TriggeredBy: structs.EvalTriggerNodeUpdate, 2902 JobID: job.ID, 2903 Status: structs.EvalStatusPending, 2904 } 2905 noErr(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 2906 2907 expectedNumAllocs := 3 2908 expectedNumReschedTrackers := 1 2909 2910 failedAllocId := allocs[1].ID 2911 failedNodeID := allocs[1].NodeID 2912 2913 assert := assert.New(t) 2914 for i := 0; i < maxRestartAttempts; i++ { 2915 // Process the evaluation 2916 err := h.Process(NewServiceScheduler, eval) 2917 noErr(t, err) 2918 2919 // Ensure multiple plans 2920 if len(h.Plans) == 0 { 2921 t.Fatalf("bad: %#v", h.Plans) 2922 } 2923 2924 // Lookup the allocations by JobID 2925 ws := memdb.NewWatchSet() 2926 out, err := h.State.AllocsByJob(ws, job.Namespace, job.ID, false) 2927 noErr(t, err) 2928 2929 // Verify that a new allocation got created with its restart tracker info 2930 assert.Equal(expectedNumAllocs, len(out)) 2931 2932 // Find the new alloc with ClientStatusPending 2933 var pendingAllocs []*structs.Allocation 2934 var prevFailedAlloc *structs.Allocation 2935 2936 for _, alloc := range out { 2937 if alloc.ClientStatus == structs.AllocClientStatusPending { 2938 pendingAllocs = append(pendingAllocs, alloc) 2939 } 2940 if alloc.ID == failedAllocId { 2941 prevFailedAlloc = alloc 2942 } 2943 } 2944 assert.Equal(1, len(pendingAllocs)) 2945 newAlloc := pendingAllocs[0] 2946 assert.Equal(expectedNumReschedTrackers, len(newAlloc.RescheduleTracker.Events)) 2947 2948 // Verify the previous NodeID in the most recent reschedule event 2949 reschedEvents := newAlloc.RescheduleTracker.Events 2950 assert.Equal(failedAllocId, reschedEvents[len(reschedEvents)-1].PrevAllocID) 2951 assert.Equal(failedNodeID, reschedEvents[len(reschedEvents)-1].PrevNodeID) 2952 2953 // Verify that the next alloc of the failed alloc is the newly rescheduled alloc 2954 assert.Equal(newAlloc.ID, prevFailedAlloc.NextAllocation) 2955 2956 // Mark this alloc as failed again 2957 newAlloc.ClientStatus = structs.AllocClientStatusFailed 2958 newAlloc.TaskStates = map[string]*structs.TaskState{tgName: {State: "dead", 2959 StartedAt: now.Add(-12 * time.Second), 2960 FinishedAt: now.Add(-10 * time.Second)}} 2961 2962 failedAllocId = newAlloc.ID 2963 failedNodeID = newAlloc.NodeID 2964 2965 noErr(t, h.State.UpsertAllocs(h.NextIndex(), []*structs.Allocation{newAlloc})) 2966 2967 // Create another mock evaluation 2968 eval = &structs.Evaluation{ 2969 Namespace: structs.DefaultNamespace, 2970 ID: uuid.Generate(), 2971 Priority: 50, 2972 TriggeredBy: structs.EvalTriggerNodeUpdate, 2973 JobID: job.ID, 2974 Status: structs.EvalStatusPending, 2975 } 2976 noErr(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 2977 expectedNumAllocs += 1 2978 expectedNumReschedTrackers += 1 2979 } 2980 2981 // Process last eval again, should not reschedule 2982 err := h.Process(NewServiceScheduler, eval) 2983 assert.Nil(err) 2984 2985 // Verify no new allocs were created because restart attempts were exhausted 2986 ws := memdb.NewWatchSet() 2987 out, err := h.State.AllocsByJob(ws, job.Namespace, job.ID, false) 2988 noErr(t, err) 2989 assert.Equal(5, len(out)) // 2 original, plus 3 reschedule attempts 2990 } 2991 2992 // Tests that old reschedule attempts are pruned 2993 func TestServiceSched_Reschedule_PruneEvents(t *testing.T) { 2994 h := NewHarness(t) 2995 2996 // Create some nodes 2997 var nodes []*structs.Node 2998 for i := 0; i < 10; i++ { 2999 node := mock.Node() 3000 nodes = append(nodes, node) 3001 noErr(t, h.State.UpsertNode(h.NextIndex(), node)) 3002 } 3003 3004 // Generate a fake job with allocations and an update policy. 3005 job := mock.Job() 3006 job.TaskGroups[0].Count = 2 3007 job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{ 3008 DelayFunction: "exponential", 3009 MaxDelay: 1 * time.Hour, 3010 Delay: 5 * time.Second, 3011 Unlimited: true, 3012 } 3013 noErr(t, h.State.UpsertJob(h.NextIndex(), job)) 3014 3015 var allocs []*structs.Allocation 3016 for i := 0; i < 2; i++ { 3017 alloc := mock.Alloc() 3018 alloc.Job = job 3019 alloc.JobID = job.ID 3020 alloc.NodeID = nodes[i].ID 3021 alloc.Name = fmt.Sprintf("my-job.web[%d]", i) 3022 allocs = append(allocs, alloc) 3023 } 3024 now := time.Now() 3025 // Mark allocations as failed with restart info 3026 allocs[1].TaskStates = map[string]*structs.TaskState{job.TaskGroups[0].Name: {State: "dead", 3027 StartedAt: now.Add(-1 * time.Hour), 3028 FinishedAt: now.Add(-15 * time.Minute)}} 3029 allocs[1].ClientStatus = structs.AllocClientStatusFailed 3030 3031 allocs[1].RescheduleTracker = &structs.RescheduleTracker{ 3032 Events: []*structs.RescheduleEvent{ 3033 {RescheduleTime: now.Add(-1 * time.Hour).UTC().UnixNano(), 3034 PrevAllocID: uuid.Generate(), 3035 PrevNodeID: uuid.Generate(), 3036 Delay: 5 * time.Second, 3037 }, 3038 {RescheduleTime: now.Add(-40 * time.Minute).UTC().UnixNano(), 3039 PrevAllocID: allocs[0].ID, 3040 PrevNodeID: uuid.Generate(), 3041 Delay: 10 * time.Second, 3042 }, 3043 {RescheduleTime: now.Add(-30 * time.Minute).UTC().UnixNano(), 3044 PrevAllocID: allocs[0].ID, 3045 PrevNodeID: uuid.Generate(), 3046 Delay: 20 * time.Second, 3047 }, 3048 {RescheduleTime: now.Add(-20 * time.Minute).UTC().UnixNano(), 3049 PrevAllocID: allocs[0].ID, 3050 PrevNodeID: uuid.Generate(), 3051 Delay: 40 * time.Second, 3052 }, 3053 {RescheduleTime: now.Add(-10 * time.Minute).UTC().UnixNano(), 3054 PrevAllocID: allocs[0].ID, 3055 PrevNodeID: uuid.Generate(), 3056 Delay: 80 * time.Second, 3057 }, 3058 {RescheduleTime: now.Add(-3 * time.Minute).UTC().UnixNano(), 3059 PrevAllocID: allocs[0].ID, 3060 PrevNodeID: uuid.Generate(), 3061 Delay: 160 * time.Second, 3062 }, 3063 }, 3064 } 3065 expectedFirstRescheduleEvent := allocs[1].RescheduleTracker.Events[1] 3066 expectedDelay := 320 * time.Second 3067 failedAllocID := allocs[1].ID 3068 successAllocID := allocs[0].ID 3069 3070 noErr(t, h.State.UpsertAllocs(h.NextIndex(), allocs)) 3071 3072 // Create a mock evaluation 3073 eval := &structs.Evaluation{ 3074 Namespace: structs.DefaultNamespace, 3075 ID: uuid.Generate(), 3076 Priority: 50, 3077 TriggeredBy: structs.EvalTriggerNodeUpdate, 3078 JobID: job.ID, 3079 Status: structs.EvalStatusPending, 3080 } 3081 noErr(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 3082 3083 // Process the evaluation 3084 err := h.Process(NewServiceScheduler, eval) 3085 if err != nil { 3086 t.Fatalf("err: %v", err) 3087 } 3088 3089 // Ensure multiple plans 3090 if len(h.Plans) == 0 { 3091 t.Fatalf("bad: %#v", h.Plans) 3092 } 3093 3094 // Lookup the allocations by JobID 3095 ws := memdb.NewWatchSet() 3096 out, err := h.State.AllocsByJob(ws, job.Namespace, job.ID, false) 3097 noErr(t, err) 3098 3099 // Verify that one new allocation got created with its restart tracker info 3100 assert := assert.New(t) 3101 assert.Equal(3, len(out)) 3102 var newAlloc *structs.Allocation 3103 for _, alloc := range out { 3104 if alloc.ID != successAllocID && alloc.ID != failedAllocID { 3105 newAlloc = alloc 3106 } 3107 } 3108 3109 assert.Equal(failedAllocID, newAlloc.PreviousAllocation) 3110 // Verify that the new alloc copied the last 5 reschedule attempts 3111 assert.Equal(6, len(newAlloc.RescheduleTracker.Events)) 3112 assert.Equal(expectedFirstRescheduleEvent, newAlloc.RescheduleTracker.Events[0]) 3113 3114 mostRecentRescheduleEvent := newAlloc.RescheduleTracker.Events[5] 3115 // Verify that the failed alloc ID is in the most recent reschedule event 3116 assert.Equal(failedAllocID, mostRecentRescheduleEvent.PrevAllocID) 3117 // Verify that the delay value was captured correctly 3118 assert.Equal(expectedDelay, mostRecentRescheduleEvent.Delay) 3119 3120 } 3121 3122 // Tests that deployments with failed allocs result in placements as long as the 3123 // deployment is running. 3124 func TestDeployment_FailedAllocs_Reschedule(t *testing.T) { 3125 for _, failedDeployment := range []bool{false, true} { 3126 t.Run(fmt.Sprintf("Failed Deployment: %v", failedDeployment), func(t *testing.T) { 3127 h := NewHarness(t) 3128 require := require.New(t) 3129 // Create some nodes 3130 var nodes []*structs.Node 3131 for i := 0; i < 10; i++ { 3132 node := mock.Node() 3133 nodes = append(nodes, node) 3134 noErr(t, h.State.UpsertNode(h.NextIndex(), node)) 3135 } 3136 3137 // Generate a fake job with allocations and a reschedule policy. 3138 job := mock.Job() 3139 job.TaskGroups[0].Count = 2 3140 job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{ 3141 Attempts: 1, 3142 Interval: 15 * time.Minute, 3143 } 3144 jobIndex := h.NextIndex() 3145 require.Nil(h.State.UpsertJob(jobIndex, job)) 3146 3147 deployment := mock.Deployment() 3148 deployment.JobID = job.ID 3149 deployment.JobCreateIndex = jobIndex 3150 deployment.JobVersion = job.Version 3151 if failedDeployment { 3152 deployment.Status = structs.DeploymentStatusFailed 3153 } 3154 3155 require.Nil(h.State.UpsertDeployment(h.NextIndex(), deployment)) 3156 3157 var allocs []*structs.Allocation 3158 for i := 0; i < 2; i++ { 3159 alloc := mock.Alloc() 3160 alloc.Job = job 3161 alloc.JobID = job.ID 3162 alloc.NodeID = nodes[i].ID 3163 alloc.Name = fmt.Sprintf("my-job.web[%d]", i) 3164 alloc.DeploymentID = deployment.ID 3165 allocs = append(allocs, alloc) 3166 } 3167 // Mark one of the allocations as failed in the past 3168 allocs[1].ClientStatus = structs.AllocClientStatusFailed 3169 allocs[1].TaskStates = map[string]*structs.TaskState{"web": {State: "start", 3170 StartedAt: time.Now().Add(-12 * time.Hour), 3171 FinishedAt: time.Now().Add(-10 * time.Hour)}} 3172 allocs[1].DesiredTransition.Reschedule = helper.BoolToPtr(true) 3173 3174 require.Nil(h.State.UpsertAllocs(h.NextIndex(), allocs)) 3175 3176 // Create a mock evaluation 3177 eval := &structs.Evaluation{ 3178 Namespace: structs.DefaultNamespace, 3179 ID: uuid.Generate(), 3180 Priority: 50, 3181 TriggeredBy: structs.EvalTriggerNodeUpdate, 3182 JobID: job.ID, 3183 Status: structs.EvalStatusPending, 3184 } 3185 require.Nil(h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 3186 3187 // Process the evaluation 3188 require.Nil(h.Process(NewServiceScheduler, eval)) 3189 3190 if failedDeployment { 3191 // Verify no plan created 3192 require.Len(h.Plans, 0) 3193 } else { 3194 require.Len(h.Plans, 1) 3195 plan := h.Plans[0] 3196 3197 // Ensure the plan allocated 3198 var planned []*structs.Allocation 3199 for _, allocList := range plan.NodeAllocation { 3200 planned = append(planned, allocList...) 3201 } 3202 if len(planned) != 1 { 3203 t.Fatalf("bad: %#v", plan) 3204 } 3205 } 3206 }) 3207 } 3208 } 3209 3210 func TestBatchSched_Run_CompleteAlloc(t *testing.T) { 3211 h := NewHarness(t) 3212 3213 // Create a node 3214 node := mock.Node() 3215 noErr(t, h.State.UpsertNode(h.NextIndex(), node)) 3216 3217 // Create a job 3218 job := mock.Job() 3219 job.Type = structs.JobTypeBatch 3220 job.TaskGroups[0].Count = 1 3221 noErr(t, h.State.UpsertJob(h.NextIndex(), job)) 3222 3223 // Create a complete alloc 3224 alloc := mock.Alloc() 3225 alloc.Job = job 3226 alloc.JobID = job.ID 3227 alloc.NodeID = node.ID 3228 alloc.Name = "my-job.web[0]" 3229 alloc.ClientStatus = structs.AllocClientStatusComplete 3230 noErr(t, h.State.UpsertAllocs(h.NextIndex(), []*structs.Allocation{alloc})) 3231 3232 // Create a mock evaluation to register the job 3233 eval := &structs.Evaluation{ 3234 Namespace: structs.DefaultNamespace, 3235 ID: uuid.Generate(), 3236 Priority: job.Priority, 3237 TriggeredBy: structs.EvalTriggerJobRegister, 3238 JobID: job.ID, 3239 Status: structs.EvalStatusPending, 3240 } 3241 noErr(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 3242 3243 // Process the evaluation 3244 err := h.Process(NewBatchScheduler, eval) 3245 if err != nil { 3246 t.Fatalf("err: %v", err) 3247 } 3248 3249 // Ensure no plan as it should be a no-op 3250 if len(h.Plans) != 0 { 3251 t.Fatalf("bad: %#v", h.Plans) 3252 } 3253 3254 // Lookup the allocations by JobID 3255 ws := memdb.NewWatchSet() 3256 out, err := h.State.AllocsByJob(ws, job.Namespace, job.ID, false) 3257 noErr(t, err) 3258 3259 // Ensure no allocations placed 3260 if len(out) != 1 { 3261 t.Fatalf("bad: %#v", out) 3262 } 3263 3264 h.AssertEvalStatus(t, structs.EvalStatusComplete) 3265 } 3266 3267 func TestBatchSched_Run_FailedAlloc(t *testing.T) { 3268 h := NewHarness(t) 3269 3270 // Create a node 3271 node := mock.Node() 3272 noErr(t, h.State.UpsertNode(h.NextIndex(), node)) 3273 3274 // Create a job 3275 job := mock.Job() 3276 job.Type = structs.JobTypeBatch 3277 job.TaskGroups[0].Count = 1 3278 noErr(t, h.State.UpsertJob(h.NextIndex(), job)) 3279 3280 tgName := job.TaskGroups[0].Name 3281 now := time.Now() 3282 3283 // Create a failed alloc 3284 alloc := mock.Alloc() 3285 alloc.Job = job 3286 alloc.JobID = job.ID 3287 alloc.NodeID = node.ID 3288 alloc.Name = "my-job.web[0]" 3289 alloc.ClientStatus = structs.AllocClientStatusFailed 3290 alloc.TaskStates = map[string]*structs.TaskState{tgName: {State: "dead", 3291 StartedAt: now.Add(-1 * time.Hour), 3292 FinishedAt: now.Add(-10 * time.Second)}} 3293 noErr(t, h.State.UpsertAllocs(h.NextIndex(), []*structs.Allocation{alloc})) 3294 3295 // Create a mock evaluation to register the job 3296 eval := &structs.Evaluation{ 3297 Namespace: structs.DefaultNamespace, 3298 ID: uuid.Generate(), 3299 Priority: job.Priority, 3300 TriggeredBy: structs.EvalTriggerJobRegister, 3301 JobID: job.ID, 3302 Status: structs.EvalStatusPending, 3303 } 3304 noErr(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 3305 3306 // Process the evaluation 3307 err := h.Process(NewBatchScheduler, eval) 3308 if err != nil { 3309 t.Fatalf("err: %v", err) 3310 } 3311 3312 // Ensure a plan 3313 if len(h.Plans) != 1 { 3314 t.Fatalf("bad: %#v", h.Plans) 3315 } 3316 3317 // Lookup the allocations by JobID 3318 ws := memdb.NewWatchSet() 3319 out, err := h.State.AllocsByJob(ws, job.Namespace, job.ID, false) 3320 noErr(t, err) 3321 3322 // Ensure a replacement alloc was placed. 3323 if len(out) != 2 { 3324 t.Fatalf("bad: %#v", out) 3325 } 3326 3327 // Ensure that the scheduler is recording the correct number of queued 3328 // allocations 3329 queued := h.Evals[0].QueuedAllocations["web"] 3330 if queued != 0 { 3331 t.Fatalf("expected: %v, actual: %v", 1, queued) 3332 } 3333 3334 h.AssertEvalStatus(t, structs.EvalStatusComplete) 3335 } 3336 3337 func TestBatchSched_Run_LostAlloc(t *testing.T) { 3338 h := NewHarness(t) 3339 3340 // Create a node 3341 node := mock.Node() 3342 noErr(t, h.State.UpsertNode(h.NextIndex(), node)) 3343 3344 // Create a job 3345 job := mock.Job() 3346 job.ID = "my-job" 3347 job.Type = structs.JobTypeBatch 3348 job.TaskGroups[0].Count = 3 3349 noErr(t, h.State.UpsertJob(h.NextIndex(), job)) 3350 3351 // Desired = 3 3352 // Mark one as lost and then schedule 3353 // [(0, run, running), (1, run, running), (1, stop, lost)] 3354 3355 // Create two running allocations 3356 var allocs []*structs.Allocation 3357 for i := 0; i <= 1; i++ { 3358 alloc := mock.Alloc() 3359 alloc.Job = job 3360 alloc.JobID = job.ID 3361 alloc.NodeID = node.ID 3362 alloc.Name = fmt.Sprintf("my-job.web[%d]", i) 3363 alloc.ClientStatus = structs.AllocClientStatusRunning 3364 allocs = append(allocs, alloc) 3365 } 3366 3367 // Create a failed alloc 3368 alloc := mock.Alloc() 3369 alloc.Job = job 3370 alloc.JobID = job.ID 3371 alloc.NodeID = node.ID 3372 alloc.Name = "my-job.web[1]" 3373 alloc.DesiredStatus = structs.AllocDesiredStatusStop 3374 alloc.ClientStatus = structs.AllocClientStatusComplete 3375 allocs = append(allocs, alloc) 3376 noErr(t, h.State.UpsertAllocs(h.NextIndex(), allocs)) 3377 3378 // Create a mock evaluation to register the job 3379 eval := &structs.Evaluation{ 3380 Namespace: structs.DefaultNamespace, 3381 ID: uuid.Generate(), 3382 Priority: job.Priority, 3383 TriggeredBy: structs.EvalTriggerJobRegister, 3384 JobID: job.ID, 3385 Status: structs.EvalStatusPending, 3386 } 3387 noErr(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 3388 3389 // Process the evaluation 3390 err := h.Process(NewBatchScheduler, eval) 3391 if err != nil { 3392 t.Fatalf("err: %v", err) 3393 } 3394 3395 // Ensure a plan 3396 if len(h.Plans) != 1 { 3397 t.Fatalf("bad: %#v", h.Plans) 3398 } 3399 3400 // Lookup the allocations by JobID 3401 ws := memdb.NewWatchSet() 3402 out, err := h.State.AllocsByJob(ws, job.Namespace, job.ID, false) 3403 noErr(t, err) 3404 3405 // Ensure a replacement alloc was placed. 3406 if len(out) != 4 { 3407 t.Fatalf("bad: %#v", out) 3408 } 3409 3410 // Assert that we have the correct number of each alloc name 3411 expected := map[string]int{ 3412 "my-job.web[0]": 1, 3413 "my-job.web[1]": 2, 3414 "my-job.web[2]": 1, 3415 } 3416 actual := make(map[string]int, 3) 3417 for _, alloc := range out { 3418 actual[alloc.Name] += 1 3419 } 3420 require.Equal(t, actual, expected) 3421 3422 h.AssertEvalStatus(t, structs.EvalStatusComplete) 3423 } 3424 3425 func TestBatchSched_Run_FailedAllocQueuedAllocations(t *testing.T) { 3426 h := NewHarness(t) 3427 3428 node := mock.Node() 3429 node.Drain = true 3430 noErr(t, h.State.UpsertNode(h.NextIndex(), node)) 3431 3432 // Create a job 3433 job := mock.Job() 3434 job.Type = structs.JobTypeBatch 3435 job.TaskGroups[0].Count = 1 3436 noErr(t, h.State.UpsertJob(h.NextIndex(), job)) 3437 3438 tgName := job.TaskGroups[0].Name 3439 now := time.Now() 3440 3441 // Create a failed alloc 3442 alloc := mock.Alloc() 3443 alloc.Job = job 3444 alloc.JobID = job.ID 3445 alloc.NodeID = node.ID 3446 alloc.Name = "my-job.web[0]" 3447 alloc.ClientStatus = structs.AllocClientStatusFailed 3448 alloc.TaskStates = map[string]*structs.TaskState{tgName: {State: "dead", 3449 StartedAt: now.Add(-1 * time.Hour), 3450 FinishedAt: now.Add(-10 * time.Second)}} 3451 noErr(t, h.State.UpsertAllocs(h.NextIndex(), []*structs.Allocation{alloc})) 3452 3453 // Create a mock evaluation to register the job 3454 eval := &structs.Evaluation{ 3455 Namespace: structs.DefaultNamespace, 3456 ID: uuid.Generate(), 3457 Priority: job.Priority, 3458 TriggeredBy: structs.EvalTriggerJobRegister, 3459 JobID: job.ID, 3460 Status: structs.EvalStatusPending, 3461 } 3462 noErr(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 3463 3464 // Process the evaluation 3465 err := h.Process(NewBatchScheduler, eval) 3466 if err != nil { 3467 t.Fatalf("err: %v", err) 3468 } 3469 3470 // Ensure that the scheduler is recording the correct number of queued 3471 // allocations 3472 queued := h.Evals[0].QueuedAllocations["web"] 3473 if queued != 1 { 3474 t.Fatalf("expected: %v, actual: %v", 1, queued) 3475 } 3476 } 3477 3478 func TestBatchSched_ReRun_SuccessfullyFinishedAlloc(t *testing.T) { 3479 h := NewHarness(t) 3480 3481 // Create two nodes, one that is drained and has a successfully finished 3482 // alloc and a fresh undrained one 3483 node := mock.Node() 3484 node.Drain = true 3485 node2 := mock.Node() 3486 noErr(t, h.State.UpsertNode(h.NextIndex(), node)) 3487 noErr(t, h.State.UpsertNode(h.NextIndex(), node2)) 3488 3489 // Create a job 3490 job := mock.Job() 3491 job.Type = structs.JobTypeBatch 3492 job.TaskGroups[0].Count = 1 3493 noErr(t, h.State.UpsertJob(h.NextIndex(), job)) 3494 3495 // Create a successful alloc 3496 alloc := mock.Alloc() 3497 alloc.Job = job 3498 alloc.JobID = job.ID 3499 alloc.NodeID = node.ID 3500 alloc.Name = "my-job.web[0]" 3501 alloc.ClientStatus = structs.AllocClientStatusComplete 3502 alloc.TaskStates = map[string]*structs.TaskState{ 3503 "web": { 3504 State: structs.TaskStateDead, 3505 Events: []*structs.TaskEvent{ 3506 { 3507 Type: structs.TaskTerminated, 3508 ExitCode: 0, 3509 }, 3510 }, 3511 }, 3512 } 3513 noErr(t, h.State.UpsertAllocs(h.NextIndex(), []*structs.Allocation{alloc})) 3514 3515 // Create a mock evaluation to rerun the job 3516 eval := &structs.Evaluation{ 3517 Namespace: structs.DefaultNamespace, 3518 ID: uuid.Generate(), 3519 Priority: job.Priority, 3520 TriggeredBy: structs.EvalTriggerJobRegister, 3521 JobID: job.ID, 3522 Status: structs.EvalStatusPending, 3523 } 3524 noErr(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 3525 3526 // Process the evaluation 3527 err := h.Process(NewBatchScheduler, eval) 3528 if err != nil { 3529 t.Fatalf("err: %v", err) 3530 } 3531 3532 // Ensure no plan 3533 if len(h.Plans) != 0 { 3534 t.Fatalf("bad: %#v", h.Plans) 3535 } 3536 3537 // Lookup the allocations by JobID 3538 ws := memdb.NewWatchSet() 3539 out, err := h.State.AllocsByJob(ws, job.Namespace, job.ID, false) 3540 noErr(t, err) 3541 3542 // Ensure no replacement alloc was placed. 3543 if len(out) != 1 { 3544 t.Fatalf("bad: %#v", out) 3545 } 3546 3547 h.AssertEvalStatus(t, structs.EvalStatusComplete) 3548 } 3549 3550 // This test checks that terminal allocations that receive an in-place updated 3551 // are not added to the plan 3552 func TestBatchSched_JobModify_InPlace_Terminal(t *testing.T) { 3553 h := NewHarness(t) 3554 3555 // Create some nodes 3556 var nodes []*structs.Node 3557 for i := 0; i < 10; i++ { 3558 node := mock.Node() 3559 nodes = append(nodes, node) 3560 noErr(t, h.State.UpsertNode(h.NextIndex(), node)) 3561 } 3562 3563 // Generate a fake job with allocations 3564 job := mock.Job() 3565 job.Type = structs.JobTypeBatch 3566 noErr(t, h.State.UpsertJob(h.NextIndex(), job)) 3567 3568 var allocs []*structs.Allocation 3569 for i := 0; i < 10; i++ { 3570 alloc := mock.Alloc() 3571 alloc.Job = job 3572 alloc.JobID = job.ID 3573 alloc.NodeID = nodes[i].ID 3574 alloc.Name = fmt.Sprintf("my-job.web[%d]", i) 3575 alloc.ClientStatus = structs.AllocClientStatusComplete 3576 allocs = append(allocs, alloc) 3577 } 3578 noErr(t, h.State.UpsertAllocs(h.NextIndex(), allocs)) 3579 3580 // Create a mock evaluation to trigger the job 3581 eval := &structs.Evaluation{ 3582 Namespace: structs.DefaultNamespace, 3583 ID: uuid.Generate(), 3584 Priority: 50, 3585 TriggeredBy: structs.EvalTriggerJobRegister, 3586 JobID: job.ID, 3587 Status: structs.EvalStatusPending, 3588 } 3589 noErr(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 3590 3591 // Process the evaluation 3592 err := h.Process(NewBatchScheduler, eval) 3593 if err != nil { 3594 t.Fatalf("err: %v", err) 3595 } 3596 3597 // Ensure no plan 3598 if len(h.Plans) != 0 { 3599 t.Fatalf("bad: %#v", h.Plans[0]) 3600 } 3601 } 3602 3603 // This test ensures that terminal jobs from older versions are ignored. 3604 func TestBatchSched_JobModify_Destructive_Terminal(t *testing.T) { 3605 h := NewHarness(t) 3606 3607 // Create some nodes 3608 var nodes []*structs.Node 3609 for i := 0; i < 10; i++ { 3610 node := mock.Node() 3611 nodes = append(nodes, node) 3612 noErr(t, h.State.UpsertNode(h.NextIndex(), node)) 3613 } 3614 3615 // Generate a fake job with allocations 3616 job := mock.Job() 3617 job.Type = structs.JobTypeBatch 3618 noErr(t, h.State.UpsertJob(h.NextIndex(), job)) 3619 3620 var allocs []*structs.Allocation 3621 for i := 0; i < 10; i++ { 3622 alloc := mock.Alloc() 3623 alloc.Job = job 3624 alloc.JobID = job.ID 3625 alloc.NodeID = nodes[i].ID 3626 alloc.Name = fmt.Sprintf("my-job.web[%d]", i) 3627 alloc.ClientStatus = structs.AllocClientStatusComplete 3628 allocs = append(allocs, alloc) 3629 } 3630 noErr(t, h.State.UpsertAllocs(h.NextIndex(), allocs)) 3631 3632 // Update the job 3633 job2 := mock.Job() 3634 job2.ID = job.ID 3635 job2.Type = structs.JobTypeBatch 3636 job2.Version++ 3637 job2.TaskGroups[0].Tasks[0].Env = map[string]string{"foo": "bar"} 3638 noErr(t, h.State.UpsertJob(h.NextIndex(), job2)) 3639 3640 allocs = nil 3641 for i := 0; i < 10; i++ { 3642 alloc := mock.Alloc() 3643 alloc.Job = job2 3644 alloc.JobID = job2.ID 3645 alloc.NodeID = nodes[i].ID 3646 alloc.Name = fmt.Sprintf("my-job.web[%d]", i) 3647 alloc.ClientStatus = structs.AllocClientStatusComplete 3648 alloc.TaskStates = map[string]*structs.TaskState{ 3649 "web": { 3650 State: structs.TaskStateDead, 3651 Events: []*structs.TaskEvent{ 3652 { 3653 Type: structs.TaskTerminated, 3654 ExitCode: 0, 3655 }, 3656 }, 3657 }, 3658 } 3659 allocs = append(allocs, alloc) 3660 } 3661 noErr(t, h.State.UpsertAllocs(h.NextIndex(), allocs)) 3662 3663 // Create a mock evaluation to deal with drain 3664 eval := &structs.Evaluation{ 3665 Namespace: structs.DefaultNamespace, 3666 ID: uuid.Generate(), 3667 Priority: 50, 3668 TriggeredBy: structs.EvalTriggerJobRegister, 3669 JobID: job.ID, 3670 Status: structs.EvalStatusPending, 3671 } 3672 noErr(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 3673 3674 // Process the evaluation 3675 err := h.Process(NewBatchScheduler, eval) 3676 if err != nil { 3677 t.Fatalf("err: %v", err) 3678 } 3679 3680 // Ensure a plan 3681 if len(h.Plans) != 0 { 3682 t.Fatalf("bad: %#v", h.Plans) 3683 } 3684 } 3685 3686 // This test asserts that an allocation from an old job that is running on a 3687 // drained node is cleaned up. 3688 func TestBatchSched_NodeDrain_Running_OldJob(t *testing.T) { 3689 h := NewHarness(t) 3690 3691 // Create two nodes, one that is drained and has a successfully finished 3692 // alloc and a fresh undrained one 3693 node := mock.Node() 3694 node.Drain = true 3695 node2 := mock.Node() 3696 noErr(t, h.State.UpsertNode(h.NextIndex(), node)) 3697 noErr(t, h.State.UpsertNode(h.NextIndex(), node2)) 3698 3699 // Create a job 3700 job := mock.Job() 3701 job.Type = structs.JobTypeBatch 3702 job.TaskGroups[0].Count = 1 3703 noErr(t, h.State.UpsertJob(h.NextIndex(), job)) 3704 3705 // Create a running alloc 3706 alloc := mock.Alloc() 3707 alloc.Job = job 3708 alloc.JobID = job.ID 3709 alloc.NodeID = node.ID 3710 alloc.Name = "my-job.web[0]" 3711 alloc.ClientStatus = structs.AllocClientStatusRunning 3712 noErr(t, h.State.UpsertAllocs(h.NextIndex(), []*structs.Allocation{alloc})) 3713 3714 // Create an update job 3715 job2 := job.Copy() 3716 job2.TaskGroups[0].Tasks[0].Env = map[string]string{"foo": "bar"} 3717 job2.Version++ 3718 noErr(t, h.State.UpsertJob(h.NextIndex(), job2)) 3719 3720 // Create a mock evaluation to register the job 3721 eval := &structs.Evaluation{ 3722 Namespace: structs.DefaultNamespace, 3723 ID: uuid.Generate(), 3724 Priority: job.Priority, 3725 TriggeredBy: structs.EvalTriggerJobRegister, 3726 JobID: job.ID, 3727 Status: structs.EvalStatusPending, 3728 } 3729 3730 noErr(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 3731 3732 // Process the evaluation 3733 err := h.Process(NewBatchScheduler, eval) 3734 if err != nil { 3735 t.Fatalf("err: %v", err) 3736 } 3737 3738 // Ensure a plan 3739 if len(h.Plans) != 1 { 3740 t.Fatalf("bad: %#v", h.Plans) 3741 } 3742 3743 plan := h.Plans[0] 3744 3745 // Ensure the plan evicted 1 3746 if len(plan.NodeUpdate[node.ID]) != 1 { 3747 t.Fatalf("bad: %#v", plan) 3748 } 3749 3750 // Ensure the plan places 1 3751 if len(plan.NodeAllocation[node2.ID]) != 1 { 3752 t.Fatalf("bad: %#v", plan) 3753 } 3754 3755 h.AssertEvalStatus(t, structs.EvalStatusComplete) 3756 } 3757 3758 // This test asserts that an allocation from a job that is complete on a 3759 // drained node is ignored up. 3760 func TestBatchSched_NodeDrain_Complete(t *testing.T) { 3761 h := NewHarness(t) 3762 3763 // Create two nodes, one that is drained and has a successfully finished 3764 // alloc and a fresh undrained one 3765 node := mock.Node() 3766 node.Drain = true 3767 node2 := mock.Node() 3768 noErr(t, h.State.UpsertNode(h.NextIndex(), node)) 3769 noErr(t, h.State.UpsertNode(h.NextIndex(), node2)) 3770 3771 // Create a job 3772 job := mock.Job() 3773 job.Type = structs.JobTypeBatch 3774 job.TaskGroups[0].Count = 1 3775 noErr(t, h.State.UpsertJob(h.NextIndex(), job)) 3776 3777 // Create a complete alloc 3778 alloc := mock.Alloc() 3779 alloc.Job = job 3780 alloc.JobID = job.ID 3781 alloc.NodeID = node.ID 3782 alloc.Name = "my-job.web[0]" 3783 alloc.ClientStatus = structs.AllocClientStatusComplete 3784 alloc.TaskStates = make(map[string]*structs.TaskState) 3785 alloc.TaskStates["web"] = &structs.TaskState{ 3786 State: structs.TaskStateDead, 3787 Events: []*structs.TaskEvent{ 3788 { 3789 Type: structs.TaskTerminated, 3790 ExitCode: 0, 3791 }, 3792 }, 3793 } 3794 noErr(t, h.State.UpsertAllocs(h.NextIndex(), []*structs.Allocation{alloc})) 3795 3796 // Create a mock evaluation to register the job 3797 eval := &structs.Evaluation{ 3798 Namespace: structs.DefaultNamespace, 3799 ID: uuid.Generate(), 3800 Priority: job.Priority, 3801 TriggeredBy: structs.EvalTriggerJobRegister, 3802 JobID: job.ID, 3803 Status: structs.EvalStatusPending, 3804 } 3805 3806 noErr(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 3807 3808 // Process the evaluation 3809 err := h.Process(NewBatchScheduler, eval) 3810 if err != nil { 3811 t.Fatalf("err: %v", err) 3812 } 3813 3814 // Ensure no plan 3815 if len(h.Plans) != 0 { 3816 t.Fatalf("bad: %#v", h.Plans) 3817 } 3818 3819 h.AssertEvalStatus(t, structs.EvalStatusComplete) 3820 } 3821 3822 // This is a slightly odd test but it ensures that we handle a scale down of a 3823 // task group's count and that it works even if all the allocs have the same 3824 // name. 3825 func TestBatchSched_ScaleDown_SameName(t *testing.T) { 3826 h := NewHarness(t) 3827 3828 // Create a node 3829 node := mock.Node() 3830 noErr(t, h.State.UpsertNode(h.NextIndex(), node)) 3831 3832 // Create a job 3833 job := mock.Job() 3834 job.Type = structs.JobTypeBatch 3835 job.TaskGroups[0].Count = 1 3836 noErr(t, h.State.UpsertJob(h.NextIndex(), job)) 3837 3838 // Create a few running alloc 3839 var allocs []*structs.Allocation 3840 for i := 0; i < 5; i++ { 3841 alloc := mock.Alloc() 3842 alloc.Job = job 3843 alloc.JobID = job.ID 3844 alloc.NodeID = node.ID 3845 alloc.Name = "my-job.web[0]" 3846 alloc.ClientStatus = structs.AllocClientStatusRunning 3847 allocs = append(allocs, alloc) 3848 } 3849 noErr(t, h.State.UpsertAllocs(h.NextIndex(), allocs)) 3850 3851 // Create a mock evaluation to register the job 3852 eval := &structs.Evaluation{ 3853 Namespace: structs.DefaultNamespace, 3854 ID: uuid.Generate(), 3855 Priority: job.Priority, 3856 TriggeredBy: structs.EvalTriggerJobRegister, 3857 JobID: job.ID, 3858 Status: structs.EvalStatusPending, 3859 } 3860 3861 noErr(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 3862 3863 // Process the evaluation 3864 err := h.Process(NewBatchScheduler, eval) 3865 if err != nil { 3866 t.Fatalf("err: %v", err) 3867 } 3868 3869 // Ensure a plan 3870 if len(h.Plans) != 1 { 3871 t.Fatalf("bad: %#v", h.Plans) 3872 } 3873 3874 plan := h.Plans[0] 3875 3876 // Ensure the plan evicted 4 of the 5 3877 if len(plan.NodeUpdate[node.ID]) != 4 { 3878 t.Fatalf("bad: %#v", plan) 3879 } 3880 3881 h.AssertEvalStatus(t, structs.EvalStatusComplete) 3882 } 3883 3884 func TestGenericSched_ChainedAlloc(t *testing.T) { 3885 h := NewHarness(t) 3886 3887 // Create some nodes 3888 for i := 0; i < 10; i++ { 3889 node := mock.Node() 3890 noErr(t, h.State.UpsertNode(h.NextIndex(), node)) 3891 } 3892 3893 // Create a job 3894 job := mock.Job() 3895 noErr(t, h.State.UpsertJob(h.NextIndex(), job)) 3896 3897 // Create a mock evaluation to register the job 3898 eval := &structs.Evaluation{ 3899 Namespace: structs.DefaultNamespace, 3900 ID: uuid.Generate(), 3901 Priority: job.Priority, 3902 TriggeredBy: structs.EvalTriggerJobRegister, 3903 JobID: job.ID, 3904 Status: structs.EvalStatusPending, 3905 } 3906 noErr(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 3907 // Process the evaluation 3908 if err := h.Process(NewServiceScheduler, eval); err != nil { 3909 t.Fatalf("err: %v", err) 3910 } 3911 3912 var allocIDs []string 3913 for _, allocList := range h.Plans[0].NodeAllocation { 3914 for _, alloc := range allocList { 3915 allocIDs = append(allocIDs, alloc.ID) 3916 } 3917 } 3918 sort.Strings(allocIDs) 3919 3920 // Create a new harness to invoke the scheduler again 3921 h1 := NewHarnessWithState(t, h.State) 3922 job1 := mock.Job() 3923 job1.ID = job.ID 3924 job1.TaskGroups[0].Tasks[0].Env["foo"] = "bar" 3925 job1.TaskGroups[0].Count = 12 3926 noErr(t, h1.State.UpsertJob(h1.NextIndex(), job1)) 3927 3928 // Create a mock evaluation to update the job 3929 eval1 := &structs.Evaluation{ 3930 Namespace: structs.DefaultNamespace, 3931 ID: uuid.Generate(), 3932 Priority: job1.Priority, 3933 TriggeredBy: structs.EvalTriggerJobRegister, 3934 JobID: job1.ID, 3935 Status: structs.EvalStatusPending, 3936 } 3937 noErr(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval1})) 3938 3939 // Process the evaluation 3940 if err := h1.Process(NewServiceScheduler, eval1); err != nil { 3941 t.Fatalf("err: %v", err) 3942 } 3943 3944 plan := h1.Plans[0] 3945 3946 // Collect all the chained allocation ids and the new allocations which 3947 // don't have any chained allocations 3948 var prevAllocs []string 3949 var newAllocs []string 3950 for _, allocList := range plan.NodeAllocation { 3951 for _, alloc := range allocList { 3952 if alloc.PreviousAllocation == "" { 3953 newAllocs = append(newAllocs, alloc.ID) 3954 continue 3955 } 3956 prevAllocs = append(prevAllocs, alloc.PreviousAllocation) 3957 } 3958 } 3959 sort.Strings(prevAllocs) 3960 3961 // Ensure that the new allocations has their corresponding original 3962 // allocation ids 3963 if !reflect.DeepEqual(prevAllocs, allocIDs) { 3964 t.Fatalf("expected: %v, actual: %v", len(allocIDs), len(prevAllocs)) 3965 } 3966 3967 // Ensuring two new allocations don't have any chained allocations 3968 if len(newAllocs) != 2 { 3969 t.Fatalf("expected: %v, actual: %v", 2, len(newAllocs)) 3970 } 3971 } 3972 3973 func TestServiceSched_NodeDrain_Sticky(t *testing.T) { 3974 h := NewHarness(t) 3975 3976 // Register a draining node 3977 node := mock.Node() 3978 node.Drain = true 3979 noErr(t, h.State.UpsertNode(h.NextIndex(), node)) 3980 3981 // Create an alloc on the draining node 3982 alloc := mock.Alloc() 3983 alloc.Name = "my-job.web[0]" 3984 alloc.NodeID = node.ID 3985 alloc.Job.TaskGroups[0].Count = 1 3986 alloc.Job.TaskGroups[0].EphemeralDisk.Sticky = true 3987 alloc.DesiredTransition.Migrate = helper.BoolToPtr(true) 3988 noErr(t, h.State.UpsertJob(h.NextIndex(), alloc.Job)) 3989 noErr(t, h.State.UpsertAllocs(h.NextIndex(), []*structs.Allocation{alloc})) 3990 3991 // Create a mock evaluation to deal with drain 3992 eval := &structs.Evaluation{ 3993 Namespace: structs.DefaultNamespace, 3994 ID: uuid.Generate(), 3995 Priority: 50, 3996 TriggeredBy: structs.EvalTriggerNodeUpdate, 3997 JobID: alloc.Job.ID, 3998 NodeID: node.ID, 3999 Status: structs.EvalStatusPending, 4000 } 4001 4002 noErr(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 4003 4004 // Process the evaluation 4005 err := h.Process(NewServiceScheduler, eval) 4006 if err != nil { 4007 t.Fatalf("err: %v", err) 4008 } 4009 4010 // Ensure a single plan 4011 if len(h.Plans) != 1 { 4012 t.Fatalf("bad: %#v", h.Plans) 4013 } 4014 plan := h.Plans[0] 4015 4016 // Ensure the plan evicted all allocs 4017 if len(plan.NodeUpdate[node.ID]) != 1 { 4018 t.Fatalf("bad: %#v", plan) 4019 } 4020 4021 // Ensure the plan didn't create any new allocations 4022 var planned []*structs.Allocation 4023 for _, allocList := range plan.NodeAllocation { 4024 planned = append(planned, allocList...) 4025 } 4026 if len(planned) != 0 { 4027 t.Fatalf("bad: %#v", plan) 4028 } 4029 4030 h.AssertEvalStatus(t, structs.EvalStatusComplete) 4031 } 4032 4033 // This test ensures that when a job is stopped, the scheduler properly cancels 4034 // an outstanding deployment. 4035 func TestServiceSched_CancelDeployment_Stopped(t *testing.T) { 4036 h := NewHarness(t) 4037 4038 // Generate a fake job 4039 job := mock.Job() 4040 job.JobModifyIndex = job.CreateIndex + 1 4041 job.ModifyIndex = job.CreateIndex + 1 4042 job.Stop = true 4043 noErr(t, h.State.UpsertJob(h.NextIndex(), job)) 4044 4045 // Create a deployment 4046 d := mock.Deployment() 4047 d.JobID = job.ID 4048 d.JobCreateIndex = job.CreateIndex 4049 d.JobModifyIndex = job.JobModifyIndex - 1 4050 noErr(t, h.State.UpsertDeployment(h.NextIndex(), d)) 4051 4052 // Create a mock evaluation to deregister the job 4053 eval := &structs.Evaluation{ 4054 Namespace: structs.DefaultNamespace, 4055 ID: uuid.Generate(), 4056 Priority: 50, 4057 TriggeredBy: structs.EvalTriggerJobDeregister, 4058 JobID: job.ID, 4059 Status: structs.EvalStatusPending, 4060 } 4061 4062 noErr(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 4063 4064 // Process the evaluation 4065 err := h.Process(NewServiceScheduler, eval) 4066 if err != nil { 4067 t.Fatalf("err: %v", err) 4068 } 4069 4070 // Ensure a single plan 4071 if len(h.Plans) != 1 { 4072 t.Fatalf("bad: %#v", h.Plans) 4073 } 4074 plan := h.Plans[0] 4075 4076 // Ensure the plan cancelled the existing deployment 4077 ws := memdb.NewWatchSet() 4078 out, err := h.State.LatestDeploymentByJobID(ws, job.Namespace, job.ID) 4079 noErr(t, err) 4080 4081 if out == nil { 4082 t.Fatalf("No deployment for job") 4083 } 4084 if out.ID != d.ID { 4085 t.Fatalf("Latest deployment for job is different than original deployment") 4086 } 4087 if out.Status != structs.DeploymentStatusCancelled { 4088 t.Fatalf("Deployment status is %q, want %q", out.Status, structs.DeploymentStatusCancelled) 4089 } 4090 if out.StatusDescription != structs.DeploymentStatusDescriptionStoppedJob { 4091 t.Fatalf("Deployment status description is %q, want %q", 4092 out.StatusDescription, structs.DeploymentStatusDescriptionStoppedJob) 4093 } 4094 4095 // Ensure the plan didn't allocate anything 4096 var planned []*structs.Allocation 4097 for _, allocList := range plan.NodeAllocation { 4098 planned = append(planned, allocList...) 4099 } 4100 if len(planned) != 0 { 4101 t.Fatalf("bad: %#v", plan) 4102 } 4103 4104 h.AssertEvalStatus(t, structs.EvalStatusComplete) 4105 } 4106 4107 // This test ensures that when a job is updated and had an old deployment, the scheduler properly cancels 4108 // the deployment. 4109 func TestServiceSched_CancelDeployment_NewerJob(t *testing.T) { 4110 h := NewHarness(t) 4111 4112 // Generate a fake job 4113 job := mock.Job() 4114 noErr(t, h.State.UpsertJob(h.NextIndex(), job)) 4115 4116 // Create a deployment for an old version of the job 4117 d := mock.Deployment() 4118 d.JobID = job.ID 4119 noErr(t, h.State.UpsertDeployment(h.NextIndex(), d)) 4120 4121 // Upsert again to bump job version 4122 noErr(t, h.State.UpsertJob(h.NextIndex(), job)) 4123 4124 // Create a mock evaluation to kick the job 4125 eval := &structs.Evaluation{ 4126 Namespace: structs.DefaultNamespace, 4127 ID: uuid.Generate(), 4128 Priority: 50, 4129 TriggeredBy: structs.EvalTriggerJobRegister, 4130 JobID: job.ID, 4131 Status: structs.EvalStatusPending, 4132 } 4133 4134 noErr(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 4135 4136 // Process the evaluation 4137 err := h.Process(NewServiceScheduler, eval) 4138 if err != nil { 4139 t.Fatalf("err: %v", err) 4140 } 4141 4142 // Ensure a single plan 4143 if len(h.Plans) != 1 { 4144 t.Fatalf("bad: %#v", h.Plans) 4145 } 4146 plan := h.Plans[0] 4147 4148 // Ensure the plan cancelled the existing deployment 4149 ws := memdb.NewWatchSet() 4150 out, err := h.State.LatestDeploymentByJobID(ws, job.Namespace, job.ID) 4151 noErr(t, err) 4152 4153 if out == nil { 4154 t.Fatalf("No deployment for job") 4155 } 4156 if out.ID != d.ID { 4157 t.Fatalf("Latest deployment for job is different than original deployment") 4158 } 4159 if out.Status != structs.DeploymentStatusCancelled { 4160 t.Fatalf("Deployment status is %q, want %q", out.Status, structs.DeploymentStatusCancelled) 4161 } 4162 if out.StatusDescription != structs.DeploymentStatusDescriptionNewerJob { 4163 t.Fatalf("Deployment status description is %q, want %q", 4164 out.StatusDescription, structs.DeploymentStatusDescriptionNewerJob) 4165 } 4166 // Ensure the plan didn't allocate anything 4167 var planned []*structs.Allocation 4168 for _, allocList := range plan.NodeAllocation { 4169 planned = append(planned, allocList...) 4170 } 4171 if len(planned) != 0 { 4172 t.Fatalf("bad: %#v", plan) 4173 } 4174 4175 h.AssertEvalStatus(t, structs.EvalStatusComplete) 4176 } 4177 4178 // Various table driven tests for carry forward 4179 // of past reschedule events 4180 func Test_updateRescheduleTracker(t *testing.T) { 4181 4182 t1 := time.Now().UTC() 4183 alloc := mock.Alloc() 4184 prevAlloc := mock.Alloc() 4185 4186 type testCase struct { 4187 desc string 4188 prevAllocEvents []*structs.RescheduleEvent 4189 reschedPolicy *structs.ReschedulePolicy 4190 expectedRescheduleEvents []*structs.RescheduleEvent 4191 reschedTime time.Time 4192 } 4193 4194 testCases := []testCase{ 4195 { 4196 desc: "No past events", 4197 prevAllocEvents: nil, 4198 reschedPolicy: &structs.ReschedulePolicy{Unlimited: false, Interval: 24 * time.Hour, Attempts: 2, Delay: 5 * time.Second}, 4199 reschedTime: t1, 4200 expectedRescheduleEvents: []*structs.RescheduleEvent{{t1.UnixNano(), prevAlloc.ID, prevAlloc.NodeID, 5 * time.Second}}, 4201 }, 4202 { 4203 desc: "one past event, linear delay", 4204 prevAllocEvents: []*structs.RescheduleEvent{ 4205 {RescheduleTime: t1.Add(-1 * time.Hour).UnixNano(), 4206 PrevAllocID: prevAlloc.ID, 4207 PrevNodeID: prevAlloc.NodeID, 4208 Delay: 5 * time.Second}}, 4209 reschedPolicy: &structs.ReschedulePolicy{Unlimited: false, Interval: 24 * time.Hour, Attempts: 2, Delay: 5 * time.Second}, 4210 reschedTime: t1, 4211 expectedRescheduleEvents: []*structs.RescheduleEvent{ 4212 { 4213 RescheduleTime: t1.Add(-1 * time.Hour).UnixNano(), 4214 PrevAllocID: prevAlloc.ID, 4215 PrevNodeID: prevAlloc.NodeID, 4216 Delay: 5 * time.Second, 4217 }, 4218 { 4219 RescheduleTime: t1.UnixNano(), 4220 PrevAllocID: prevAlloc.ID, 4221 PrevNodeID: prevAlloc.NodeID, 4222 Delay: 5 * time.Second, 4223 }, 4224 }, 4225 }, 4226 { 4227 desc: "one past event, fibonacci delay", 4228 prevAllocEvents: []*structs.RescheduleEvent{ 4229 {RescheduleTime: t1.Add(-1 * time.Hour).UnixNano(), 4230 PrevAllocID: prevAlloc.ID, 4231 PrevNodeID: prevAlloc.NodeID, 4232 Delay: 5 * time.Second}}, 4233 reschedPolicy: &structs.ReschedulePolicy{Unlimited: false, Interval: 24 * time.Hour, Attempts: 2, Delay: 5 * time.Second, DelayFunction: "fibonacci", MaxDelay: 60 * time.Second}, 4234 reschedTime: t1, 4235 expectedRescheduleEvents: []*structs.RescheduleEvent{ 4236 { 4237 RescheduleTime: t1.Add(-1 * time.Hour).UnixNano(), 4238 PrevAllocID: prevAlloc.ID, 4239 PrevNodeID: prevAlloc.NodeID, 4240 Delay: 5 * time.Second, 4241 }, 4242 { 4243 RescheduleTime: t1.UnixNano(), 4244 PrevAllocID: prevAlloc.ID, 4245 PrevNodeID: prevAlloc.NodeID, 4246 Delay: 5 * time.Second, 4247 }, 4248 }, 4249 }, 4250 { 4251 desc: "eight past events, fibonacci delay, unlimited", 4252 prevAllocEvents: []*structs.RescheduleEvent{ 4253 { 4254 RescheduleTime: t1.Add(-1 * time.Hour).UnixNano(), 4255 PrevAllocID: prevAlloc.ID, 4256 PrevNodeID: prevAlloc.NodeID, 4257 Delay: 5 * time.Second, 4258 }, 4259 { 4260 RescheduleTime: t1.Add(-1 * time.Hour).UnixNano(), 4261 PrevAllocID: prevAlloc.ID, 4262 PrevNodeID: prevAlloc.NodeID, 4263 Delay: 5 * time.Second, 4264 }, 4265 { 4266 RescheduleTime: t1.Add(-1 * time.Hour).UnixNano(), 4267 PrevAllocID: prevAlloc.ID, 4268 PrevNodeID: prevAlloc.NodeID, 4269 Delay: 10 * time.Second, 4270 }, 4271 { 4272 RescheduleTime: t1.Add(-1 * time.Hour).UnixNano(), 4273 PrevAllocID: prevAlloc.ID, 4274 PrevNodeID: prevAlloc.NodeID, 4275 Delay: 15 * time.Second, 4276 }, 4277 { 4278 RescheduleTime: t1.Add(-1 * time.Hour).UnixNano(), 4279 PrevAllocID: prevAlloc.ID, 4280 PrevNodeID: prevAlloc.NodeID, 4281 Delay: 25 * time.Second, 4282 }, 4283 { 4284 RescheduleTime: t1.Add(-1 * time.Hour).UnixNano(), 4285 PrevAllocID: prevAlloc.ID, 4286 PrevNodeID: prevAlloc.NodeID, 4287 Delay: 40 * time.Second, 4288 }, 4289 { 4290 RescheduleTime: t1.Add(-1 * time.Hour).UnixNano(), 4291 PrevAllocID: prevAlloc.ID, 4292 PrevNodeID: prevAlloc.NodeID, 4293 Delay: 65 * time.Second, 4294 }, 4295 { 4296 RescheduleTime: t1.Add(-1 * time.Hour).UnixNano(), 4297 PrevAllocID: prevAlloc.ID, 4298 PrevNodeID: prevAlloc.NodeID, 4299 Delay: 105 * time.Second, 4300 }, 4301 }, 4302 reschedPolicy: &structs.ReschedulePolicy{Unlimited: true, Delay: 5 * time.Second, DelayFunction: "fibonacci", MaxDelay: 240 * time.Second}, 4303 reschedTime: t1, 4304 expectedRescheduleEvents: []*structs.RescheduleEvent{ 4305 { 4306 RescheduleTime: t1.Add(-1 * time.Hour).UnixNano(), 4307 PrevAllocID: prevAlloc.ID, 4308 PrevNodeID: prevAlloc.NodeID, 4309 Delay: 15 * time.Second, 4310 }, 4311 { 4312 RescheduleTime: t1.Add(-1 * time.Hour).UnixNano(), 4313 PrevAllocID: prevAlloc.ID, 4314 PrevNodeID: prevAlloc.NodeID, 4315 Delay: 25 * time.Second, 4316 }, 4317 { 4318 RescheduleTime: t1.Add(-1 * time.Hour).UnixNano(), 4319 PrevAllocID: prevAlloc.ID, 4320 PrevNodeID: prevAlloc.NodeID, 4321 Delay: 40 * time.Second, 4322 }, 4323 { 4324 RescheduleTime: t1.Add(-1 * time.Hour).UnixNano(), 4325 PrevAllocID: prevAlloc.ID, 4326 PrevNodeID: prevAlloc.NodeID, 4327 Delay: 65 * time.Second, 4328 }, 4329 { 4330 RescheduleTime: t1.Add(-1 * time.Hour).UnixNano(), 4331 PrevAllocID: prevAlloc.ID, 4332 PrevNodeID: prevAlloc.NodeID, 4333 Delay: 105 * time.Second, 4334 }, 4335 { 4336 RescheduleTime: t1.UnixNano(), 4337 PrevAllocID: prevAlloc.ID, 4338 PrevNodeID: prevAlloc.NodeID, 4339 Delay: 170 * time.Second, 4340 }, 4341 }, 4342 }, 4343 { 4344 desc: " old attempts past interval, exponential delay, limited", 4345 prevAllocEvents: []*structs.RescheduleEvent{ 4346 { 4347 RescheduleTime: t1.Add(-2 * time.Hour).UnixNano(), 4348 PrevAllocID: prevAlloc.ID, 4349 PrevNodeID: prevAlloc.NodeID, 4350 Delay: 5 * time.Second, 4351 }, 4352 { 4353 RescheduleTime: t1.Add(-70 * time.Minute).UnixNano(), 4354 PrevAllocID: prevAlloc.ID, 4355 PrevNodeID: prevAlloc.NodeID, 4356 Delay: 10 * time.Second, 4357 }, 4358 { 4359 RescheduleTime: t1.Add(-30 * time.Minute).UnixNano(), 4360 PrevAllocID: prevAlloc.ID, 4361 PrevNodeID: prevAlloc.NodeID, 4362 Delay: 20 * time.Second, 4363 }, 4364 { 4365 RescheduleTime: t1.Add(-10 * time.Minute).UnixNano(), 4366 PrevAllocID: prevAlloc.ID, 4367 PrevNodeID: prevAlloc.NodeID, 4368 Delay: 40 * time.Second, 4369 }, 4370 }, 4371 reschedPolicy: &structs.ReschedulePolicy{Unlimited: false, Interval: 1 * time.Hour, Attempts: 5, Delay: 5 * time.Second, DelayFunction: "exponential", MaxDelay: 240 * time.Second}, 4372 reschedTime: t1, 4373 expectedRescheduleEvents: []*structs.RescheduleEvent{ 4374 { 4375 RescheduleTime: t1.Add(-30 * time.Minute).UnixNano(), 4376 PrevAllocID: prevAlloc.ID, 4377 PrevNodeID: prevAlloc.NodeID, 4378 Delay: 20 * time.Second, 4379 }, 4380 { 4381 RescheduleTime: t1.Add(-10 * time.Minute).UnixNano(), 4382 PrevAllocID: prevAlloc.ID, 4383 PrevNodeID: prevAlloc.NodeID, 4384 Delay: 40 * time.Second, 4385 }, 4386 { 4387 RescheduleTime: t1.UnixNano(), 4388 PrevAllocID: prevAlloc.ID, 4389 PrevNodeID: prevAlloc.NodeID, 4390 Delay: 80 * time.Second, 4391 }, 4392 }, 4393 }, 4394 } 4395 4396 for _, tc := range testCases { 4397 t.Run(tc.desc, func(t *testing.T) { 4398 require := require.New(t) 4399 prevAlloc.RescheduleTracker = &structs.RescheduleTracker{Events: tc.prevAllocEvents} 4400 prevAlloc.Job.LookupTaskGroup(prevAlloc.TaskGroup).ReschedulePolicy = tc.reschedPolicy 4401 updateRescheduleTracker(alloc, prevAlloc, tc.reschedTime) 4402 require.Equal(tc.expectedRescheduleEvents, alloc.RescheduleTracker.Events) 4403 }) 4404 } 4405 4406 }