github.com/zoomfoo/nomad@v0.8.5-0.20180907175415-f28fd3a1a056/scheduler/generic_sched_test.go (about) 1 package scheduler 2 3 import ( 4 "fmt" 5 "reflect" 6 "sort" 7 "testing" 8 "time" 9 10 memdb "github.com/hashicorp/go-memdb" 11 "github.com/hashicorp/nomad/helper" 12 "github.com/hashicorp/nomad/helper/uuid" 13 "github.com/hashicorp/nomad/nomad/mock" 14 "github.com/hashicorp/nomad/nomad/structs" 15 "github.com/stretchr/testify/assert" 16 "github.com/stretchr/testify/require" 17 ) 18 19 func TestServiceSched_JobRegister(t *testing.T) { 20 h := NewHarness(t) 21 22 // Create some nodes 23 for i := 0; i < 10; i++ { 24 node := mock.Node() 25 noErr(t, h.State.UpsertNode(h.NextIndex(), node)) 26 } 27 28 // Create a job 29 job := mock.Job() 30 noErr(t, h.State.UpsertJob(h.NextIndex(), job)) 31 32 // Create a mock evaluation to register the job 33 eval := &structs.Evaluation{ 34 Namespace: structs.DefaultNamespace, 35 ID: uuid.Generate(), 36 Priority: job.Priority, 37 TriggeredBy: structs.EvalTriggerJobRegister, 38 JobID: job.ID, 39 Status: structs.EvalStatusPending, 40 } 41 42 noErr(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 43 44 // Process the evaluation 45 err := h.Process(NewServiceScheduler, eval) 46 if err != nil { 47 t.Fatalf("err: %v", err) 48 } 49 50 // Ensure a single plan 51 if len(h.Plans) != 1 { 52 t.Fatalf("bad: %#v", h.Plans) 53 } 54 plan := h.Plans[0] 55 56 // Ensure the plan doesn't have annotations. 57 if plan.Annotations != nil { 58 t.Fatalf("expected no annotations") 59 } 60 61 // Ensure the eval has no spawned blocked eval 62 if len(h.CreateEvals) != 0 { 63 t.Fatalf("bad: %#v", h.CreateEvals) 64 if h.Evals[0].BlockedEval != "" { 65 t.Fatalf("bad: %#v", h.Evals[0]) 66 } 67 } 68 69 // Ensure the plan allocated 70 var planned []*structs.Allocation 71 for _, allocList := range plan.NodeAllocation { 72 planned = append(planned, allocList...) 73 } 74 if len(planned) != 10 { 75 t.Fatalf("bad: %#v", plan) 76 } 77 78 // Lookup the allocations by JobID 79 ws := memdb.NewWatchSet() 80 out, err := h.State.AllocsByJob(ws, job.Namespace, job.ID, false) 81 noErr(t, err) 82 83 // Ensure all allocations placed 84 if len(out) != 10 { 85 t.Fatalf("bad: %#v", out) 86 } 87 88 // Ensure different ports were used. 89 used := make(map[int]struct{}) 90 for _, alloc := range out { 91 for _, resource := range alloc.TaskResources { 92 for _, port := range resource.Networks[0].DynamicPorts { 93 if _, ok := used[port.Value]; ok { 94 t.Fatalf("Port collision %v", port.Value) 95 } 96 used[port.Value] = struct{}{} 97 } 98 } 99 } 100 101 h.AssertEvalStatus(t, structs.EvalStatusComplete) 102 } 103 104 func TestServiceSched_JobRegister_StickyAllocs(t *testing.T) { 105 h := NewHarness(t) 106 107 // Create some nodes 108 for i := 0; i < 10; i++ { 109 node := mock.Node() 110 noErr(t, h.State.UpsertNode(h.NextIndex(), node)) 111 } 112 113 // Create a job 114 job := mock.Job() 115 job.TaskGroups[0].EphemeralDisk.Sticky = true 116 noErr(t, h.State.UpsertJob(h.NextIndex(), job)) 117 118 // Create a mock evaluation to register the job 119 eval := &structs.Evaluation{ 120 Namespace: structs.DefaultNamespace, 121 ID: uuid.Generate(), 122 Priority: job.Priority, 123 TriggeredBy: structs.EvalTriggerJobRegister, 124 JobID: job.ID, 125 Status: structs.EvalStatusPending, 126 } 127 noErr(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 128 129 // Process the evaluation 130 if err := h.Process(NewServiceScheduler, eval); err != nil { 131 t.Fatalf("err: %v", err) 132 } 133 134 // Ensure the plan allocated 135 plan := h.Plans[0] 136 planned := make(map[string]*structs.Allocation) 137 for _, allocList := range plan.NodeAllocation { 138 for _, alloc := range allocList { 139 planned[alloc.ID] = alloc 140 } 141 } 142 if len(planned) != 10 { 143 t.Fatalf("bad: %#v", plan) 144 } 145 146 // Update the job to force a rolling upgrade 147 updated := job.Copy() 148 updated.TaskGroups[0].Tasks[0].Resources.CPU += 10 149 noErr(t, h.State.UpsertJob(h.NextIndex(), updated)) 150 151 // Create a mock evaluation to handle the update 152 eval = &structs.Evaluation{ 153 Namespace: structs.DefaultNamespace, 154 ID: uuid.Generate(), 155 Priority: job.Priority, 156 TriggeredBy: structs.EvalTriggerNodeUpdate, 157 JobID: job.ID, 158 Status: structs.EvalStatusPending, 159 } 160 noErr(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 161 h1 := NewHarnessWithState(t, h.State) 162 if err := h1.Process(NewServiceScheduler, eval); err != nil { 163 t.Fatalf("err: %v", err) 164 } 165 166 // Ensure we have created only one new allocation 167 // Ensure a single plan 168 if len(h1.Plans) != 1 { 169 t.Fatalf("bad: %#v", h1.Plans) 170 } 171 plan = h1.Plans[0] 172 var newPlanned []*structs.Allocation 173 for _, allocList := range plan.NodeAllocation { 174 newPlanned = append(newPlanned, allocList...) 175 } 176 if len(newPlanned) != 10 { 177 t.Fatalf("bad plan: %#v", plan) 178 } 179 // Ensure that the new allocations were placed on the same node as the older 180 // ones 181 for _, new := range newPlanned { 182 if new.PreviousAllocation == "" { 183 t.Fatalf("new alloc %q doesn't have a previous allocation", new.ID) 184 } 185 186 old, ok := planned[new.PreviousAllocation] 187 if !ok { 188 t.Fatalf("new alloc %q previous allocation doesn't match any prior placed alloc (%q)", new.ID, new.PreviousAllocation) 189 } 190 if new.NodeID != old.NodeID { 191 t.Fatalf("new alloc and old alloc node doesn't match; got %q; want %q", new.NodeID, old.NodeID) 192 } 193 } 194 } 195 196 func TestServiceSched_JobRegister_DiskConstraints(t *testing.T) { 197 h := NewHarness(t) 198 199 // Create a node 200 node := mock.Node() 201 noErr(t, h.State.UpsertNode(h.NextIndex(), node)) 202 203 // Create a job with count 2 and disk as 60GB so that only one allocation 204 // can fit 205 job := mock.Job() 206 job.TaskGroups[0].Count = 2 207 job.TaskGroups[0].EphemeralDisk.SizeMB = 88 * 1024 208 noErr(t, h.State.UpsertJob(h.NextIndex(), job)) 209 210 // Create a mock evaluation to register the job 211 eval := &structs.Evaluation{ 212 Namespace: structs.DefaultNamespace, 213 ID: uuid.Generate(), 214 Priority: job.Priority, 215 TriggeredBy: structs.EvalTriggerJobRegister, 216 JobID: job.ID, 217 Status: structs.EvalStatusPending, 218 } 219 220 noErr(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 221 222 // Process the evaluation 223 err := h.Process(NewServiceScheduler, eval) 224 if err != nil { 225 t.Fatalf("err: %v", err) 226 } 227 228 // Ensure a single plan 229 if len(h.Plans) != 1 { 230 t.Fatalf("bad: %#v", h.Plans) 231 } 232 plan := h.Plans[0] 233 234 // Ensure the plan doesn't have annotations. 235 if plan.Annotations != nil { 236 t.Fatalf("expected no annotations") 237 } 238 239 // Ensure the eval has a blocked eval 240 if len(h.CreateEvals) != 1 { 241 t.Fatalf("bad: %#v", h.CreateEvals) 242 } 243 244 // Ensure the plan allocated only one allocation 245 var planned []*structs.Allocation 246 for _, allocList := range plan.NodeAllocation { 247 planned = append(planned, allocList...) 248 } 249 if len(planned) != 1 { 250 t.Fatalf("bad: %#v", plan) 251 } 252 253 // Lookup the allocations by JobID 254 ws := memdb.NewWatchSet() 255 out, err := h.State.AllocsByJob(ws, job.Namespace, job.ID, false) 256 noErr(t, err) 257 258 // Ensure only one allocation was placed 259 if len(out) != 1 { 260 t.Fatalf("bad: %#v", out) 261 } 262 263 h.AssertEvalStatus(t, structs.EvalStatusComplete) 264 } 265 266 func TestServiceSched_JobRegister_DistinctHosts(t *testing.T) { 267 h := NewHarness(t) 268 269 // Create some nodes 270 for i := 0; i < 10; i++ { 271 node := mock.Node() 272 noErr(t, h.State.UpsertNode(h.NextIndex(), node)) 273 } 274 275 // Create a job that uses distinct host and has count 1 higher than what is 276 // possible. 277 job := mock.Job() 278 job.TaskGroups[0].Count = 11 279 job.Constraints = append(job.Constraints, &structs.Constraint{Operand: structs.ConstraintDistinctHosts}) 280 noErr(t, h.State.UpsertJob(h.NextIndex(), job)) 281 282 // Create a mock evaluation to register the job 283 eval := &structs.Evaluation{ 284 Namespace: structs.DefaultNamespace, 285 ID: uuid.Generate(), 286 Priority: job.Priority, 287 TriggeredBy: structs.EvalTriggerJobRegister, 288 JobID: job.ID, 289 Status: structs.EvalStatusPending, 290 } 291 292 noErr(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 293 294 // Process the evaluation 295 err := h.Process(NewServiceScheduler, eval) 296 if err != nil { 297 t.Fatalf("err: %v", err) 298 } 299 300 // Ensure a single plan 301 if len(h.Plans) != 1 { 302 t.Fatalf("bad: %#v", h.Plans) 303 } 304 plan := h.Plans[0] 305 306 // Ensure the eval has spawned blocked eval 307 if len(h.CreateEvals) != 1 { 308 t.Fatalf("bad: %#v", h.CreateEvals) 309 } 310 311 // Ensure the plan failed to alloc 312 outEval := h.Evals[0] 313 if len(outEval.FailedTGAllocs) != 1 { 314 t.Fatalf("bad: %+v", outEval) 315 } 316 317 // Ensure the plan allocated 318 var planned []*structs.Allocation 319 for _, allocList := range plan.NodeAllocation { 320 planned = append(planned, allocList...) 321 } 322 if len(planned) != 10 { 323 t.Fatalf("bad: %#v", plan) 324 } 325 326 // Lookup the allocations by JobID 327 ws := memdb.NewWatchSet() 328 out, err := h.State.AllocsByJob(ws, job.Namespace, job.ID, false) 329 noErr(t, err) 330 331 // Ensure all allocations placed 332 if len(out) != 10 { 333 t.Fatalf("bad: %#v", out) 334 } 335 336 // Ensure different node was used per. 337 used := make(map[string]struct{}) 338 for _, alloc := range out { 339 if _, ok := used[alloc.NodeID]; ok { 340 t.Fatalf("Node collision %v", alloc.NodeID) 341 } 342 used[alloc.NodeID] = struct{}{} 343 } 344 345 h.AssertEvalStatus(t, structs.EvalStatusComplete) 346 } 347 348 func TestServiceSched_JobRegister_DistinctProperty(t *testing.T) { 349 h := NewHarness(t) 350 351 // Create some nodes 352 for i := 0; i < 10; i++ { 353 node := mock.Node() 354 rack := "rack2" 355 if i < 5 { 356 rack = "rack1" 357 } 358 node.Meta["rack"] = rack 359 noErr(t, h.State.UpsertNode(h.NextIndex(), node)) 360 } 361 362 // Create a job that uses distinct property and has count higher than what is 363 // possible. 364 job := mock.Job() 365 job.TaskGroups[0].Count = 8 366 job.Constraints = append(job.Constraints, 367 &structs.Constraint{ 368 Operand: structs.ConstraintDistinctProperty, 369 LTarget: "${meta.rack}", 370 RTarget: "2", 371 }) 372 noErr(t, h.State.UpsertJob(h.NextIndex(), job)) 373 374 // Create a mock evaluation to register the job 375 eval := &structs.Evaluation{ 376 Namespace: structs.DefaultNamespace, 377 ID: uuid.Generate(), 378 Priority: job.Priority, 379 TriggeredBy: structs.EvalTriggerJobRegister, 380 JobID: job.ID, 381 Status: structs.EvalStatusPending, 382 } 383 384 noErr(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 385 386 // Process the evaluation 387 err := h.Process(NewServiceScheduler, eval) 388 if err != nil { 389 t.Fatalf("err: %v", err) 390 } 391 392 // Ensure a single plan 393 if len(h.Plans) != 1 { 394 t.Fatalf("bad: %#v", h.Plans) 395 } 396 plan := h.Plans[0] 397 398 // Ensure the plan doesn't have annotations. 399 if plan.Annotations != nil { 400 t.Fatalf("expected no annotations") 401 } 402 403 // Ensure the eval has spawned blocked eval 404 if len(h.CreateEvals) != 1 { 405 t.Fatalf("bad: %#v", h.CreateEvals) 406 } 407 408 // Ensure the plan failed to alloc 409 outEval := h.Evals[0] 410 if len(outEval.FailedTGAllocs) != 1 { 411 t.Fatalf("bad: %+v", outEval) 412 } 413 414 // Ensure the plan allocated 415 var planned []*structs.Allocation 416 for _, allocList := range plan.NodeAllocation { 417 planned = append(planned, allocList...) 418 } 419 if len(planned) != 4 { 420 t.Fatalf("bad: %#v", plan) 421 } 422 423 // Lookup the allocations by JobID 424 ws := memdb.NewWatchSet() 425 out, err := h.State.AllocsByJob(ws, job.Namespace, job.ID, false) 426 noErr(t, err) 427 428 // Ensure all allocations placed 429 if len(out) != 4 { 430 t.Fatalf("bad: %#v", out) 431 } 432 433 // Ensure each node was only used twice 434 used := make(map[string]uint64) 435 for _, alloc := range out { 436 if count, _ := used[alloc.NodeID]; count > 2 { 437 t.Fatalf("Node %v used too much: %d", alloc.NodeID, count) 438 } 439 used[alloc.NodeID]++ 440 } 441 442 h.AssertEvalStatus(t, structs.EvalStatusComplete) 443 } 444 445 func TestServiceSched_JobRegister_DistinctProperty_TaskGroup(t *testing.T) { 446 h := NewHarness(t) 447 448 // Create some nodes 449 for i := 0; i < 2; i++ { 450 node := mock.Node() 451 node.Meta["ssd"] = "true" 452 noErr(t, h.State.UpsertNode(h.NextIndex(), node)) 453 } 454 455 // Create a job that uses distinct property only on one task group. 456 job := mock.Job() 457 job.TaskGroups = append(job.TaskGroups, job.TaskGroups[0].Copy()) 458 job.TaskGroups[0].Count = 1 459 job.TaskGroups[0].Constraints = append(job.TaskGroups[0].Constraints, 460 &structs.Constraint{ 461 Operand: structs.ConstraintDistinctProperty, 462 LTarget: "${meta.ssd}", 463 }) 464 465 job.TaskGroups[1].Name = "tg2" 466 job.TaskGroups[1].Count = 2 467 noErr(t, h.State.UpsertJob(h.NextIndex(), job)) 468 469 // Create a mock evaluation to register the job 470 eval := &structs.Evaluation{ 471 Namespace: structs.DefaultNamespace, 472 ID: uuid.Generate(), 473 Priority: job.Priority, 474 TriggeredBy: structs.EvalTriggerJobRegister, 475 JobID: job.ID, 476 Status: structs.EvalStatusPending, 477 } 478 noErr(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 479 480 // Process the evaluation 481 err := h.Process(NewServiceScheduler, eval) 482 if err != nil { 483 t.Fatalf("err: %v", err) 484 } 485 486 // Ensure a single plan 487 if len(h.Plans) != 1 { 488 t.Fatalf("bad: %#v", h.Plans) 489 } 490 plan := h.Plans[0] 491 492 // Ensure the plan doesn't have annotations. 493 if plan.Annotations != nil { 494 t.Fatalf("expected no annotations") 495 } 496 497 // Ensure the eval hasn't spawned blocked eval 498 if len(h.CreateEvals) != 0 { 499 t.Fatalf("bad: %#v", h.CreateEvals[0]) 500 } 501 502 // Ensure the plan allocated 503 var planned []*structs.Allocation 504 for _, allocList := range plan.NodeAllocation { 505 planned = append(planned, allocList...) 506 } 507 if len(planned) != 3 { 508 t.Fatalf("bad: %#v", plan) 509 } 510 511 // Lookup the allocations by JobID 512 ws := memdb.NewWatchSet() 513 out, err := h.State.AllocsByJob(ws, job.Namespace, job.ID, false) 514 noErr(t, err) 515 516 // Ensure all allocations placed 517 if len(out) != 3 { 518 t.Fatalf("bad: %#v", out) 519 } 520 521 h.AssertEvalStatus(t, structs.EvalStatusComplete) 522 } 523 524 func TestServiceSched_JobRegister_DistinctProperty_TaskGroup_Incr(t *testing.T) { 525 h := NewHarness(t) 526 assert := assert.New(t) 527 528 // Create a job that uses distinct property over the node-id 529 job := mock.Job() 530 job.TaskGroups[0].Count = 3 531 job.TaskGroups[0].Constraints = append(job.TaskGroups[0].Constraints, 532 &structs.Constraint{ 533 Operand: structs.ConstraintDistinctProperty, 534 LTarget: "${node.unique.id}", 535 }) 536 assert.Nil(h.State.UpsertJob(h.NextIndex(), job), "UpsertJob") 537 538 // Create some nodes 539 var nodes []*structs.Node 540 for i := 0; i < 6; i++ { 541 node := mock.Node() 542 nodes = append(nodes, node) 543 assert.Nil(h.State.UpsertNode(h.NextIndex(), node), "UpsertNode") 544 } 545 546 // Create some allocations 547 var allocs []*structs.Allocation 548 for i := 0; i < 3; i++ { 549 alloc := mock.Alloc() 550 alloc.Job = job 551 alloc.JobID = job.ID 552 alloc.NodeID = nodes[i].ID 553 alloc.Name = fmt.Sprintf("my-job.web[%d]", i) 554 allocs = append(allocs, alloc) 555 } 556 assert.Nil(h.State.UpsertAllocs(h.NextIndex(), allocs), "UpsertAllocs") 557 558 // Update the count 559 job2 := job.Copy() 560 job2.TaskGroups[0].Count = 6 561 assert.Nil(h.State.UpsertJob(h.NextIndex(), job2), "UpsertJob") 562 563 // Create a mock evaluation to register the job 564 eval := &structs.Evaluation{ 565 Namespace: structs.DefaultNamespace, 566 ID: uuid.Generate(), 567 Priority: job.Priority, 568 TriggeredBy: structs.EvalTriggerJobRegister, 569 JobID: job.ID, 570 Status: structs.EvalStatusPending, 571 } 572 noErr(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 573 574 // Process the evaluation 575 assert.Nil(h.Process(NewServiceScheduler, eval), "Process") 576 577 // Ensure a single plan 578 assert.Len(h.Plans, 1, "Number of plans") 579 plan := h.Plans[0] 580 581 // Ensure the plan doesn't have annotations. 582 assert.Nil(plan.Annotations, "Plan.Annotations") 583 584 // Ensure the eval hasn't spawned blocked eval 585 assert.Len(h.CreateEvals, 0, "Created Evals") 586 587 // Ensure the plan allocated 588 var planned []*structs.Allocation 589 for _, allocList := range plan.NodeAllocation { 590 planned = append(planned, allocList...) 591 } 592 assert.Len(planned, 6, "Planned Allocations") 593 594 // Lookup the allocations by JobID 595 ws := memdb.NewWatchSet() 596 out, err := h.State.AllocsByJob(ws, job.Namespace, job.ID, false) 597 assert.Nil(err, "AllocsByJob") 598 599 // Ensure all allocations placed 600 assert.Len(out, 6, "Placed Allocations") 601 602 h.AssertEvalStatus(t, structs.EvalStatusComplete) 603 } 604 605 // Test job registration with spread configured 606 func TestServiceSched_Spread(t *testing.T) { 607 assert := assert.New(t) 608 609 start := uint32(100) 610 step := uint32(10) 611 612 for i := 0; i < 10; i++ { 613 name := fmt.Sprintf("%d%% in dc1", start) 614 t.Run(name, func(t *testing.T) { 615 h := NewHarness(t) 616 remaining := uint32(100 - start) 617 // Create a job that uses spread over data center 618 job := mock.Job() 619 job.Datacenters = []string{"dc1", "dc2"} 620 job.TaskGroups[0].Count = 10 621 job.TaskGroups[0].Spreads = append(job.TaskGroups[0].Spreads, 622 &structs.Spread{ 623 Attribute: "${node.datacenter}", 624 Weight: 100, 625 SpreadTarget: []*structs.SpreadTarget{ 626 { 627 Value: "dc1", 628 Percent: start, 629 }, 630 { 631 Value: "dc2", 632 Percent: remaining, 633 }, 634 }, 635 }) 636 assert.Nil(h.State.UpsertJob(h.NextIndex(), job), "UpsertJob") 637 // Create some nodes, half in dc2 638 var nodes []*structs.Node 639 nodeMap := make(map[string]*structs.Node) 640 for i := 0; i < 10; i++ { 641 node := mock.Node() 642 if i%2 == 0 { 643 node.Datacenter = "dc2" 644 } 645 nodes = append(nodes, node) 646 assert.Nil(h.State.UpsertNode(h.NextIndex(), node), "UpsertNode") 647 nodeMap[node.ID] = node 648 } 649 650 // Create a mock evaluation to register the job 651 eval := &structs.Evaluation{ 652 Namespace: structs.DefaultNamespace, 653 ID: uuid.Generate(), 654 Priority: job.Priority, 655 TriggeredBy: structs.EvalTriggerJobRegister, 656 JobID: job.ID, 657 Status: structs.EvalStatusPending, 658 } 659 noErr(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 660 661 // Process the evaluation 662 assert.Nil(h.Process(NewServiceScheduler, eval), "Process") 663 664 // Ensure a single plan 665 assert.Len(h.Plans, 1, "Number of plans") 666 plan := h.Plans[0] 667 668 // Ensure the plan doesn't have annotations. 669 assert.Nil(plan.Annotations, "Plan.Annotations") 670 671 // Ensure the eval hasn't spawned blocked eval 672 assert.Len(h.CreateEvals, 0, "Created Evals") 673 674 // Ensure the plan allocated 675 var planned []*structs.Allocation 676 dcAllocsMap := make(map[string]int) 677 for nodeId, allocList := range plan.NodeAllocation { 678 planned = append(planned, allocList...) 679 dc := nodeMap[nodeId].Datacenter 680 c := dcAllocsMap[dc] 681 c += len(allocList) 682 dcAllocsMap[dc] = c 683 } 684 assert.Len(planned, 10, "Planned Allocations") 685 686 expectedCounts := make(map[string]int) 687 expectedCounts["dc1"] = 10 - i 688 if i > 0 { 689 expectedCounts["dc2"] = i 690 } 691 require.Equal(t, expectedCounts, dcAllocsMap) 692 693 h.AssertEvalStatus(t, structs.EvalStatusComplete) 694 }) 695 start = start - step 696 } 697 } 698 699 // Test job registration with even spread across dc 700 func TestServiceSched_EvenSpread(t *testing.T) { 701 assert := assert.New(t) 702 703 h := NewHarness(t) 704 // Create a job that uses even spread over data center 705 job := mock.Job() 706 job.Datacenters = []string{"dc1", "dc2"} 707 job.TaskGroups[0].Count = 10 708 job.TaskGroups[0].Spreads = append(job.TaskGroups[0].Spreads, 709 &structs.Spread{ 710 Attribute: "${node.datacenter}", 711 Weight: 100, 712 }) 713 assert.Nil(h.State.UpsertJob(h.NextIndex(), job), "UpsertJob") 714 // Create some nodes, half in dc2 715 var nodes []*structs.Node 716 nodeMap := make(map[string]*structs.Node) 717 for i := 0; i < 10; i++ { 718 node := mock.Node() 719 if i%2 == 0 { 720 node.Datacenter = "dc2" 721 } 722 nodes = append(nodes, node) 723 assert.Nil(h.State.UpsertNode(h.NextIndex(), node), "UpsertNode") 724 nodeMap[node.ID] = node 725 } 726 727 // Create a mock evaluation to register the job 728 eval := &structs.Evaluation{ 729 Namespace: structs.DefaultNamespace, 730 ID: uuid.Generate(), 731 Priority: job.Priority, 732 TriggeredBy: structs.EvalTriggerJobRegister, 733 JobID: job.ID, 734 Status: structs.EvalStatusPending, 735 } 736 noErr(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 737 738 // Process the evaluation 739 assert.Nil(h.Process(NewServiceScheduler, eval), "Process") 740 741 // Ensure a single plan 742 assert.Len(h.Plans, 1, "Number of plans") 743 plan := h.Plans[0] 744 745 // Ensure the plan doesn't have annotations. 746 assert.Nil(plan.Annotations, "Plan.Annotations") 747 748 // Ensure the eval hasn't spawned blocked eval 749 assert.Len(h.CreateEvals, 0, "Created Evals") 750 751 // Ensure the plan allocated 752 var planned []*structs.Allocation 753 dcAllocsMap := make(map[string]int) 754 for nodeId, allocList := range plan.NodeAllocation { 755 planned = append(planned, allocList...) 756 dc := nodeMap[nodeId].Datacenter 757 c := dcAllocsMap[dc] 758 c += len(allocList) 759 dcAllocsMap[dc] = c 760 } 761 assert.Len(planned, 10, "Planned Allocations") 762 763 // Expect even split allocs across datacenter 764 expectedCounts := make(map[string]int) 765 expectedCounts["dc1"] = 5 766 expectedCounts["dc2"] = 5 767 768 require.Equal(t, expectedCounts, dcAllocsMap) 769 770 h.AssertEvalStatus(t, structs.EvalStatusComplete) 771 } 772 773 func TestServiceSched_JobRegister_Annotate(t *testing.T) { 774 h := NewHarness(t) 775 776 // Create some nodes 777 for i := 0; i < 10; i++ { 778 node := mock.Node() 779 noErr(t, h.State.UpsertNode(h.NextIndex(), node)) 780 } 781 782 // Create a job 783 job := mock.Job() 784 noErr(t, h.State.UpsertJob(h.NextIndex(), job)) 785 786 // Create a mock evaluation to register the job 787 eval := &structs.Evaluation{ 788 Namespace: structs.DefaultNamespace, 789 ID: uuid.Generate(), 790 Priority: job.Priority, 791 TriggeredBy: structs.EvalTriggerJobRegister, 792 JobID: job.ID, 793 AnnotatePlan: true, 794 Status: structs.EvalStatusPending, 795 } 796 noErr(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 797 798 // Process the evaluation 799 err := h.Process(NewServiceScheduler, eval) 800 if err != nil { 801 t.Fatalf("err: %v", err) 802 } 803 804 // Ensure a single plan 805 if len(h.Plans) != 1 { 806 t.Fatalf("bad: %#v", h.Plans) 807 } 808 plan := h.Plans[0] 809 810 // Ensure the plan allocated 811 var planned []*structs.Allocation 812 for _, allocList := range plan.NodeAllocation { 813 planned = append(planned, allocList...) 814 } 815 if len(planned) != 10 { 816 t.Fatalf("bad: %#v", plan) 817 } 818 819 // Lookup the allocations by JobID 820 ws := memdb.NewWatchSet() 821 out, err := h.State.AllocsByJob(ws, job.Namespace, job.ID, false) 822 noErr(t, err) 823 824 // Ensure all allocations placed 825 if len(out) != 10 { 826 t.Fatalf("bad: %#v", out) 827 } 828 829 h.AssertEvalStatus(t, structs.EvalStatusComplete) 830 831 // Ensure the plan had annotations. 832 if plan.Annotations == nil { 833 t.Fatalf("expected annotations") 834 } 835 836 desiredTGs := plan.Annotations.DesiredTGUpdates 837 if l := len(desiredTGs); l != 1 { 838 t.Fatalf("incorrect number of task groups; got %v; want %v", l, 1) 839 } 840 841 desiredChanges, ok := desiredTGs["web"] 842 if !ok { 843 t.Fatalf("expected task group web to have desired changes") 844 } 845 846 expected := &structs.DesiredUpdates{Place: 10} 847 if !reflect.DeepEqual(desiredChanges, expected) { 848 t.Fatalf("Unexpected desired updates; got %#v; want %#v", desiredChanges, expected) 849 } 850 } 851 852 func TestServiceSched_JobRegister_CountZero(t *testing.T) { 853 h := NewHarness(t) 854 855 // Create some nodes 856 for i := 0; i < 10; i++ { 857 node := mock.Node() 858 noErr(t, h.State.UpsertNode(h.NextIndex(), node)) 859 } 860 861 // Create a job and set the task group count to zero. 862 job := mock.Job() 863 job.TaskGroups[0].Count = 0 864 noErr(t, h.State.UpsertJob(h.NextIndex(), job)) 865 866 // Create a mock evaluation to register the job 867 eval := &structs.Evaluation{ 868 Namespace: structs.DefaultNamespace, 869 ID: uuid.Generate(), 870 Priority: job.Priority, 871 TriggeredBy: structs.EvalTriggerJobRegister, 872 JobID: job.ID, 873 Status: structs.EvalStatusPending, 874 } 875 noErr(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 876 877 // Process the evaluation 878 err := h.Process(NewServiceScheduler, eval) 879 if err != nil { 880 t.Fatalf("err: %v", err) 881 } 882 883 // Ensure there was no plan 884 if len(h.Plans) != 0 { 885 t.Fatalf("bad: %#v", h.Plans) 886 } 887 888 // Lookup the allocations by JobID 889 ws := memdb.NewWatchSet() 890 out, err := h.State.AllocsByJob(ws, job.Namespace, job.ID, false) 891 noErr(t, err) 892 893 // Ensure no allocations placed 894 if len(out) != 0 { 895 t.Fatalf("bad: %#v", out) 896 } 897 898 h.AssertEvalStatus(t, structs.EvalStatusComplete) 899 } 900 901 func TestServiceSched_JobRegister_AllocFail(t *testing.T) { 902 h := NewHarness(t) 903 904 // Create NO nodes 905 // Create a job 906 job := mock.Job() 907 noErr(t, h.State.UpsertJob(h.NextIndex(), job)) 908 909 // Create a mock evaluation to register the job 910 eval := &structs.Evaluation{ 911 Namespace: structs.DefaultNamespace, 912 ID: uuid.Generate(), 913 Priority: job.Priority, 914 TriggeredBy: structs.EvalTriggerJobRegister, 915 JobID: job.ID, 916 Status: structs.EvalStatusPending, 917 } 918 919 noErr(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 920 921 // Process the evaluation 922 err := h.Process(NewServiceScheduler, eval) 923 if err != nil { 924 t.Fatalf("err: %v", err) 925 } 926 927 // Ensure no plan 928 if len(h.Plans) != 0 { 929 t.Fatalf("bad: %#v", h.Plans) 930 } 931 932 // Ensure there is a follow up eval. 933 if len(h.CreateEvals) != 1 || h.CreateEvals[0].Status != structs.EvalStatusBlocked { 934 t.Fatalf("bad: %#v", h.CreateEvals) 935 } 936 937 if len(h.Evals) != 1 { 938 t.Fatalf("incorrect number of updated eval: %#v", h.Evals) 939 } 940 outEval := h.Evals[0] 941 942 // Ensure the eval has its spawned blocked eval 943 if outEval.BlockedEval != h.CreateEvals[0].ID { 944 t.Fatalf("bad: %#v", outEval) 945 } 946 947 // Ensure the plan failed to alloc 948 if outEval == nil || len(outEval.FailedTGAllocs) != 1 { 949 t.Fatalf("bad: %#v", outEval) 950 } 951 952 metrics, ok := outEval.FailedTGAllocs[job.TaskGroups[0].Name] 953 if !ok { 954 t.Fatalf("no failed metrics: %#v", outEval.FailedTGAllocs) 955 } 956 957 // Check the coalesced failures 958 if metrics.CoalescedFailures != 9 { 959 t.Fatalf("bad: %#v", metrics) 960 } 961 962 // Check the available nodes 963 if count, ok := metrics.NodesAvailable["dc1"]; !ok || count != 0 { 964 t.Fatalf("bad: %#v", metrics) 965 } 966 967 // Check queued allocations 968 queued := outEval.QueuedAllocations["web"] 969 if queued != 10 { 970 t.Fatalf("expected queued: %v, actual: %v", 10, queued) 971 } 972 h.AssertEvalStatus(t, structs.EvalStatusComplete) 973 } 974 975 func TestServiceSched_JobRegister_CreateBlockedEval(t *testing.T) { 976 h := NewHarness(t) 977 978 // Create a full node 979 node := mock.Node() 980 node.Reserved = node.Resources 981 node.ComputeClass() 982 noErr(t, h.State.UpsertNode(h.NextIndex(), node)) 983 984 // Create an ineligible node 985 node2 := mock.Node() 986 node2.Attributes["kernel.name"] = "windows" 987 node2.ComputeClass() 988 noErr(t, h.State.UpsertNode(h.NextIndex(), node2)) 989 990 // Create a jobs 991 job := mock.Job() 992 noErr(t, h.State.UpsertJob(h.NextIndex(), job)) 993 994 // Create a mock evaluation to register the job 995 eval := &structs.Evaluation{ 996 Namespace: structs.DefaultNamespace, 997 ID: uuid.Generate(), 998 Priority: job.Priority, 999 TriggeredBy: structs.EvalTriggerJobRegister, 1000 JobID: job.ID, 1001 Status: structs.EvalStatusPending, 1002 } 1003 1004 noErr(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 1005 1006 // Process the evaluation 1007 err := h.Process(NewServiceScheduler, eval) 1008 if err != nil { 1009 t.Fatalf("err: %v", err) 1010 } 1011 1012 // Ensure no plan 1013 if len(h.Plans) != 0 { 1014 t.Fatalf("bad: %#v", h.Plans) 1015 } 1016 1017 // Ensure the plan has created a follow up eval. 1018 if len(h.CreateEvals) != 1 { 1019 t.Fatalf("bad: %#v", h.CreateEvals) 1020 } 1021 1022 created := h.CreateEvals[0] 1023 if created.Status != structs.EvalStatusBlocked { 1024 t.Fatalf("bad: %#v", created) 1025 } 1026 1027 classes := created.ClassEligibility 1028 if len(classes) != 2 || !classes[node.ComputedClass] || classes[node2.ComputedClass] { 1029 t.Fatalf("bad: %#v", classes) 1030 } 1031 1032 if created.EscapedComputedClass { 1033 t.Fatalf("bad: %#v", created) 1034 } 1035 1036 // Ensure there is a follow up eval. 1037 if len(h.CreateEvals) != 1 || h.CreateEvals[0].Status != structs.EvalStatusBlocked { 1038 t.Fatalf("bad: %#v", h.CreateEvals) 1039 } 1040 1041 if len(h.Evals) != 1 { 1042 t.Fatalf("incorrect number of updated eval: %#v", h.Evals) 1043 } 1044 outEval := h.Evals[0] 1045 1046 // Ensure the plan failed to alloc 1047 if outEval == nil || len(outEval.FailedTGAllocs) != 1 { 1048 t.Fatalf("bad: %#v", outEval) 1049 } 1050 1051 metrics, ok := outEval.FailedTGAllocs[job.TaskGroups[0].Name] 1052 if !ok { 1053 t.Fatalf("no failed metrics: %#v", outEval.FailedTGAllocs) 1054 } 1055 1056 // Check the coalesced failures 1057 if metrics.CoalescedFailures != 9 { 1058 t.Fatalf("bad: %#v", metrics) 1059 } 1060 1061 // Check the available nodes 1062 if count, ok := metrics.NodesAvailable["dc1"]; !ok || count != 2 { 1063 t.Fatalf("bad: %#v", metrics) 1064 } 1065 1066 h.AssertEvalStatus(t, structs.EvalStatusComplete) 1067 } 1068 1069 func TestServiceSched_JobRegister_FeasibleAndInfeasibleTG(t *testing.T) { 1070 h := NewHarness(t) 1071 1072 // Create one node 1073 node := mock.Node() 1074 node.NodeClass = "class_0" 1075 noErr(t, node.ComputeClass()) 1076 noErr(t, h.State.UpsertNode(h.NextIndex(), node)) 1077 1078 // Create a job that constrains on a node class 1079 job := mock.Job() 1080 job.TaskGroups[0].Count = 2 1081 job.TaskGroups[0].Constraints = append(job.Constraints, 1082 &structs.Constraint{ 1083 LTarget: "${node.class}", 1084 RTarget: "class_0", 1085 Operand: "=", 1086 }, 1087 ) 1088 tg2 := job.TaskGroups[0].Copy() 1089 tg2.Name = "web2" 1090 tg2.Constraints[1].RTarget = "class_1" 1091 job.TaskGroups = append(job.TaskGroups, tg2) 1092 noErr(t, h.State.UpsertJob(h.NextIndex(), job)) 1093 1094 // Create a mock evaluation to register the job 1095 eval := &structs.Evaluation{ 1096 Namespace: structs.DefaultNamespace, 1097 ID: uuid.Generate(), 1098 Priority: job.Priority, 1099 TriggeredBy: structs.EvalTriggerJobRegister, 1100 JobID: job.ID, 1101 Status: structs.EvalStatusPending, 1102 } 1103 noErr(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 1104 // Process the evaluation 1105 err := h.Process(NewServiceScheduler, eval) 1106 if err != nil { 1107 t.Fatalf("err: %v", err) 1108 } 1109 1110 // Ensure a single plan 1111 if len(h.Plans) != 1 { 1112 t.Fatalf("bad: %#v", h.Plans) 1113 } 1114 plan := h.Plans[0] 1115 1116 // Ensure the plan allocated 1117 var planned []*structs.Allocation 1118 for _, allocList := range plan.NodeAllocation { 1119 planned = append(planned, allocList...) 1120 } 1121 if len(planned) != 2 { 1122 t.Fatalf("bad: %#v", plan) 1123 } 1124 1125 // Ensure two allocations placed 1126 ws := memdb.NewWatchSet() 1127 out, err := h.State.AllocsByJob(ws, job.Namespace, job.ID, false) 1128 noErr(t, err) 1129 if len(out) != 2 { 1130 t.Fatalf("bad: %#v", out) 1131 } 1132 1133 if len(h.Evals) != 1 { 1134 t.Fatalf("incorrect number of updated eval: %#v", h.Evals) 1135 } 1136 outEval := h.Evals[0] 1137 1138 // Ensure the eval has its spawned blocked eval 1139 if outEval.BlockedEval != h.CreateEvals[0].ID { 1140 t.Fatalf("bad: %#v", outEval) 1141 } 1142 1143 // Ensure the plan failed to alloc one tg 1144 if outEval == nil || len(outEval.FailedTGAllocs) != 1 { 1145 t.Fatalf("bad: %#v", outEval) 1146 } 1147 1148 metrics, ok := outEval.FailedTGAllocs[tg2.Name] 1149 if !ok { 1150 t.Fatalf("no failed metrics: %#v", outEval.FailedTGAllocs) 1151 } 1152 1153 // Check the coalesced failures 1154 if metrics.CoalescedFailures != tg2.Count-1 { 1155 t.Fatalf("bad: %#v", metrics) 1156 } 1157 1158 h.AssertEvalStatus(t, structs.EvalStatusComplete) 1159 } 1160 1161 // This test just ensures the scheduler handles the eval type to avoid 1162 // regressions. 1163 func TestServiceSched_EvaluateMaxPlanEval(t *testing.T) { 1164 h := NewHarness(t) 1165 1166 // Create a job and set the task group count to zero. 1167 job := mock.Job() 1168 job.TaskGroups[0].Count = 0 1169 noErr(t, h.State.UpsertJob(h.NextIndex(), job)) 1170 1171 // Create a mock blocked evaluation 1172 eval := &structs.Evaluation{ 1173 Namespace: structs.DefaultNamespace, 1174 ID: uuid.Generate(), 1175 Status: structs.EvalStatusBlocked, 1176 Priority: job.Priority, 1177 TriggeredBy: structs.EvalTriggerMaxPlans, 1178 JobID: job.ID, 1179 } 1180 1181 // Insert it into the state store 1182 noErr(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 1183 1184 // Process the evaluation 1185 err := h.Process(NewServiceScheduler, eval) 1186 if err != nil { 1187 t.Fatalf("err: %v", err) 1188 } 1189 1190 // Ensure there was no plan 1191 if len(h.Plans) != 0 { 1192 t.Fatalf("bad: %#v", h.Plans) 1193 } 1194 1195 h.AssertEvalStatus(t, structs.EvalStatusComplete) 1196 } 1197 1198 func TestServiceSched_Plan_Partial_Progress(t *testing.T) { 1199 h := NewHarness(t) 1200 1201 // Create a node 1202 node := mock.Node() 1203 noErr(t, h.State.UpsertNode(h.NextIndex(), node)) 1204 1205 // Create a job with a high resource ask so that all the allocations can't 1206 // be placed on a single node. 1207 job := mock.Job() 1208 job.TaskGroups[0].Count = 3 1209 job.TaskGroups[0].Tasks[0].Resources.CPU = 3600 1210 noErr(t, h.State.UpsertJob(h.NextIndex(), job)) 1211 1212 // Create a mock evaluation to register the job 1213 eval := &structs.Evaluation{ 1214 Namespace: structs.DefaultNamespace, 1215 ID: uuid.Generate(), 1216 Priority: job.Priority, 1217 TriggeredBy: structs.EvalTriggerJobRegister, 1218 JobID: job.ID, 1219 Status: structs.EvalStatusPending, 1220 } 1221 1222 noErr(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 1223 1224 // Process the evaluation 1225 err := h.Process(NewServiceScheduler, eval) 1226 if err != nil { 1227 t.Fatalf("err: %v", err) 1228 } 1229 1230 // Ensure a single plan 1231 if len(h.Plans) != 1 { 1232 t.Fatalf("bad: %#v", h.Plans) 1233 } 1234 plan := h.Plans[0] 1235 1236 // Ensure the plan doesn't have annotations. 1237 if plan.Annotations != nil { 1238 t.Fatalf("expected no annotations") 1239 } 1240 1241 // Ensure the plan allocated 1242 var planned []*structs.Allocation 1243 for _, allocList := range plan.NodeAllocation { 1244 planned = append(planned, allocList...) 1245 } 1246 if len(planned) != 1 { 1247 t.Fatalf("bad: %#v", plan) 1248 } 1249 1250 // Lookup the allocations by JobID 1251 ws := memdb.NewWatchSet() 1252 out, err := h.State.AllocsByJob(ws, job.Namespace, job.ID, false) 1253 noErr(t, err) 1254 1255 // Ensure only one allocations placed 1256 if len(out) != 1 { 1257 t.Fatalf("bad: %#v", out) 1258 } 1259 1260 queued := h.Evals[0].QueuedAllocations["web"] 1261 if queued != 2 { 1262 t.Fatalf("expected: %v, actual: %v", 2, queued) 1263 } 1264 1265 h.AssertEvalStatus(t, structs.EvalStatusComplete) 1266 } 1267 1268 func TestServiceSched_EvaluateBlockedEval(t *testing.T) { 1269 h := NewHarness(t) 1270 1271 // Create a job 1272 job := mock.Job() 1273 noErr(t, h.State.UpsertJob(h.NextIndex(), job)) 1274 1275 // Create a mock blocked evaluation 1276 eval := &structs.Evaluation{ 1277 Namespace: structs.DefaultNamespace, 1278 ID: uuid.Generate(), 1279 Status: structs.EvalStatusBlocked, 1280 Priority: job.Priority, 1281 TriggeredBy: structs.EvalTriggerJobRegister, 1282 JobID: job.ID, 1283 } 1284 1285 // Insert it into the state store 1286 noErr(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 1287 1288 // Process the evaluation 1289 err := h.Process(NewServiceScheduler, eval) 1290 if err != nil { 1291 t.Fatalf("err: %v", err) 1292 } 1293 1294 // Ensure there was no plan 1295 if len(h.Plans) != 0 { 1296 t.Fatalf("bad: %#v", h.Plans) 1297 } 1298 1299 // Ensure that the eval was reblocked 1300 if len(h.ReblockEvals) != 1 { 1301 t.Fatalf("bad: %#v", h.ReblockEvals) 1302 } 1303 if h.ReblockEvals[0].ID != eval.ID { 1304 t.Fatalf("expect same eval to be reblocked; got %q; want %q", h.ReblockEvals[0].ID, eval.ID) 1305 } 1306 1307 // Ensure the eval status was not updated 1308 if len(h.Evals) != 0 { 1309 t.Fatalf("Existing eval should not have status set") 1310 } 1311 } 1312 1313 func TestServiceSched_EvaluateBlockedEval_Finished(t *testing.T) { 1314 h := NewHarness(t) 1315 1316 // Create some nodes 1317 for i := 0; i < 10; i++ { 1318 node := mock.Node() 1319 noErr(t, h.State.UpsertNode(h.NextIndex(), node)) 1320 } 1321 1322 // Create a job and set the task group count to zero. 1323 job := mock.Job() 1324 noErr(t, h.State.UpsertJob(h.NextIndex(), job)) 1325 1326 // Create a mock blocked evaluation 1327 eval := &structs.Evaluation{ 1328 Namespace: structs.DefaultNamespace, 1329 ID: uuid.Generate(), 1330 Status: structs.EvalStatusBlocked, 1331 Priority: job.Priority, 1332 TriggeredBy: structs.EvalTriggerJobRegister, 1333 JobID: job.ID, 1334 } 1335 1336 // Insert it into the state store 1337 noErr(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 1338 1339 // Process the evaluation 1340 err := h.Process(NewServiceScheduler, eval) 1341 if err != nil { 1342 t.Fatalf("err: %v", err) 1343 } 1344 1345 // Ensure a single plan 1346 if len(h.Plans) != 1 { 1347 t.Fatalf("bad: %#v", h.Plans) 1348 } 1349 plan := h.Plans[0] 1350 1351 // Ensure the plan doesn't have annotations. 1352 if plan.Annotations != nil { 1353 t.Fatalf("expected no annotations") 1354 } 1355 1356 // Ensure the eval has no spawned blocked eval 1357 if len(h.Evals) != 1 { 1358 t.Fatalf("bad: %#v", h.Evals) 1359 if h.Evals[0].BlockedEval != "" { 1360 t.Fatalf("bad: %#v", h.Evals[0]) 1361 } 1362 } 1363 1364 // Ensure the plan allocated 1365 var planned []*structs.Allocation 1366 for _, allocList := range plan.NodeAllocation { 1367 planned = append(planned, allocList...) 1368 } 1369 if len(planned) != 10 { 1370 t.Fatalf("bad: %#v", plan) 1371 } 1372 1373 // Lookup the allocations by JobID 1374 ws := memdb.NewWatchSet() 1375 out, err := h.State.AllocsByJob(ws, job.Namespace, job.ID, false) 1376 noErr(t, err) 1377 1378 // Ensure all allocations placed 1379 if len(out) != 10 { 1380 t.Fatalf("bad: %#v", out) 1381 } 1382 1383 // Ensure the eval was not reblocked 1384 if len(h.ReblockEvals) != 0 { 1385 t.Fatalf("Existing eval should not have been reblocked as it placed all allocations") 1386 } 1387 1388 h.AssertEvalStatus(t, structs.EvalStatusComplete) 1389 1390 // Ensure queued allocations is zero 1391 queued := h.Evals[0].QueuedAllocations["web"] 1392 if queued != 0 { 1393 t.Fatalf("expected queued: %v, actual: %v", 0, queued) 1394 } 1395 } 1396 1397 func TestServiceSched_JobModify(t *testing.T) { 1398 h := NewHarness(t) 1399 1400 // Create some nodes 1401 var nodes []*structs.Node 1402 for i := 0; i < 10; i++ { 1403 node := mock.Node() 1404 nodes = append(nodes, node) 1405 noErr(t, h.State.UpsertNode(h.NextIndex(), node)) 1406 } 1407 1408 // Generate a fake job with allocations 1409 job := mock.Job() 1410 noErr(t, h.State.UpsertJob(h.NextIndex(), job)) 1411 1412 var allocs []*structs.Allocation 1413 for i := 0; i < 10; i++ { 1414 alloc := mock.Alloc() 1415 alloc.Job = job 1416 alloc.JobID = job.ID 1417 alloc.NodeID = nodes[i].ID 1418 alloc.Name = fmt.Sprintf("my-job.web[%d]", i) 1419 allocs = append(allocs, alloc) 1420 } 1421 noErr(t, h.State.UpsertAllocs(h.NextIndex(), allocs)) 1422 1423 // Add a few terminal status allocations, these should be ignored 1424 var terminal []*structs.Allocation 1425 for i := 0; i < 5; i++ { 1426 alloc := mock.Alloc() 1427 alloc.Job = job 1428 alloc.JobID = job.ID 1429 alloc.NodeID = nodes[i].ID 1430 alloc.Name = fmt.Sprintf("my-job.web[%d]", i) 1431 alloc.DesiredStatus = structs.AllocDesiredStatusStop 1432 terminal = append(terminal, alloc) 1433 } 1434 noErr(t, h.State.UpsertAllocs(h.NextIndex(), terminal)) 1435 1436 // Update the job 1437 job2 := mock.Job() 1438 job2.ID = job.ID 1439 1440 // Update the task, such that it cannot be done in-place 1441 job2.TaskGroups[0].Tasks[0].Config["command"] = "/bin/other" 1442 noErr(t, h.State.UpsertJob(h.NextIndex(), job2)) 1443 1444 // Create a mock evaluation to deal with drain 1445 eval := &structs.Evaluation{ 1446 Namespace: structs.DefaultNamespace, 1447 ID: uuid.Generate(), 1448 Priority: 50, 1449 TriggeredBy: structs.EvalTriggerJobRegister, 1450 JobID: job.ID, 1451 Status: structs.EvalStatusPending, 1452 } 1453 noErr(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 1454 1455 // Process the evaluation 1456 err := h.Process(NewServiceScheduler, eval) 1457 if err != nil { 1458 t.Fatalf("err: %v", err) 1459 } 1460 1461 // Ensure a single plan 1462 if len(h.Plans) != 1 { 1463 t.Fatalf("bad: %#v", h.Plans) 1464 } 1465 plan := h.Plans[0] 1466 1467 // Ensure the plan evicted all allocs 1468 var update []*structs.Allocation 1469 for _, updateList := range plan.NodeUpdate { 1470 update = append(update, updateList...) 1471 } 1472 if len(update) != len(allocs) { 1473 t.Fatalf("bad: %#v", plan) 1474 } 1475 1476 // Ensure the plan allocated 1477 var planned []*structs.Allocation 1478 for _, allocList := range plan.NodeAllocation { 1479 planned = append(planned, allocList...) 1480 } 1481 if len(planned) != 10 { 1482 t.Fatalf("bad: %#v", plan) 1483 } 1484 1485 // Lookup the allocations by JobID 1486 ws := memdb.NewWatchSet() 1487 out, err := h.State.AllocsByJob(ws, job.Namespace, job.ID, false) 1488 noErr(t, err) 1489 1490 // Ensure all allocations placed 1491 out, _ = structs.FilterTerminalAllocs(out) 1492 if len(out) != 10 { 1493 t.Fatalf("bad: %#v", out) 1494 } 1495 1496 h.AssertEvalStatus(t, structs.EvalStatusComplete) 1497 } 1498 1499 // Have a single node and submit a job. Increment the count such that all fit 1500 // on the node but the node doesn't have enough resources to fit the new count + 1501 // 1. This tests that we properly discount the resources of existing allocs. 1502 func TestServiceSched_JobModify_IncrCount_NodeLimit(t *testing.T) { 1503 h := NewHarness(t) 1504 1505 // Create one node 1506 node := mock.Node() 1507 node.Resources.CPU = 1000 1508 noErr(t, h.State.UpsertNode(h.NextIndex(), node)) 1509 1510 // Generate a fake job with one allocation 1511 job := mock.Job() 1512 job.TaskGroups[0].Tasks[0].Resources.CPU = 256 1513 job2 := job.Copy() 1514 noErr(t, h.State.UpsertJob(h.NextIndex(), job)) 1515 1516 var allocs []*structs.Allocation 1517 alloc := mock.Alloc() 1518 alloc.Job = job 1519 alloc.JobID = job.ID 1520 alloc.NodeID = node.ID 1521 alloc.Name = "my-job.web[0]" 1522 alloc.Resources.CPU = 256 1523 allocs = append(allocs, alloc) 1524 noErr(t, h.State.UpsertAllocs(h.NextIndex(), allocs)) 1525 1526 // Update the job to count 3 1527 job2.TaskGroups[0].Count = 3 1528 noErr(t, h.State.UpsertJob(h.NextIndex(), job2)) 1529 1530 // Create a mock evaluation to deal with drain 1531 eval := &structs.Evaluation{ 1532 Namespace: structs.DefaultNamespace, 1533 ID: uuid.Generate(), 1534 Priority: 50, 1535 TriggeredBy: structs.EvalTriggerJobRegister, 1536 JobID: job.ID, 1537 Status: structs.EvalStatusPending, 1538 } 1539 noErr(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 1540 1541 // Process the evaluation 1542 err := h.Process(NewServiceScheduler, eval) 1543 if err != nil { 1544 t.Fatalf("err: %v", err) 1545 } 1546 1547 // Ensure a single plan 1548 if len(h.Plans) != 1 { 1549 t.Fatalf("bad: %#v", h.Plans) 1550 } 1551 plan := h.Plans[0] 1552 1553 // Ensure the plan didn't evicted the alloc 1554 var update []*structs.Allocation 1555 for _, updateList := range plan.NodeUpdate { 1556 update = append(update, updateList...) 1557 } 1558 if len(update) != 0 { 1559 t.Fatalf("bad: %#v", plan) 1560 } 1561 1562 // Ensure the plan allocated 1563 var planned []*structs.Allocation 1564 for _, allocList := range plan.NodeAllocation { 1565 planned = append(planned, allocList...) 1566 } 1567 if len(planned) != 3 { 1568 t.Fatalf("bad: %#v", plan) 1569 } 1570 1571 // Ensure the plan had no failures 1572 if len(h.Evals) != 1 { 1573 t.Fatalf("incorrect number of updated eval: %#v", h.Evals) 1574 } 1575 outEval := h.Evals[0] 1576 if outEval == nil || len(outEval.FailedTGAllocs) != 0 { 1577 t.Fatalf("bad: %#v", outEval) 1578 } 1579 1580 // Lookup the allocations by JobID 1581 ws := memdb.NewWatchSet() 1582 out, err := h.State.AllocsByJob(ws, job.Namespace, job.ID, false) 1583 noErr(t, err) 1584 1585 // Ensure all allocations placed 1586 out, _ = structs.FilterTerminalAllocs(out) 1587 if len(out) != 3 { 1588 t.Fatalf("bad: %#v", out) 1589 } 1590 1591 h.AssertEvalStatus(t, structs.EvalStatusComplete) 1592 } 1593 1594 func TestServiceSched_JobModify_CountZero(t *testing.T) { 1595 h := NewHarness(t) 1596 1597 // Create some nodes 1598 var nodes []*structs.Node 1599 for i := 0; i < 10; i++ { 1600 node := mock.Node() 1601 nodes = append(nodes, node) 1602 noErr(t, h.State.UpsertNode(h.NextIndex(), node)) 1603 } 1604 1605 // Generate a fake job with allocations 1606 job := mock.Job() 1607 noErr(t, h.State.UpsertJob(h.NextIndex(), job)) 1608 1609 var allocs []*structs.Allocation 1610 for i := 0; i < 10; i++ { 1611 alloc := mock.Alloc() 1612 alloc.Job = job 1613 alloc.JobID = job.ID 1614 alloc.NodeID = nodes[i].ID 1615 alloc.Name = structs.AllocName(alloc.JobID, alloc.TaskGroup, uint(i)) 1616 allocs = append(allocs, alloc) 1617 } 1618 noErr(t, h.State.UpsertAllocs(h.NextIndex(), allocs)) 1619 1620 // Add a few terminal status allocations, these should be ignored 1621 var terminal []*structs.Allocation 1622 for i := 0; i < 5; i++ { 1623 alloc := mock.Alloc() 1624 alloc.Job = job 1625 alloc.JobID = job.ID 1626 alloc.NodeID = nodes[i].ID 1627 alloc.Name = structs.AllocName(alloc.JobID, alloc.TaskGroup, uint(i)) 1628 alloc.DesiredStatus = structs.AllocDesiredStatusStop 1629 terminal = append(terminal, alloc) 1630 } 1631 noErr(t, h.State.UpsertAllocs(h.NextIndex(), terminal)) 1632 1633 // Update the job to be count zero 1634 job2 := mock.Job() 1635 job2.ID = job.ID 1636 job2.TaskGroups[0].Count = 0 1637 noErr(t, h.State.UpsertJob(h.NextIndex(), job2)) 1638 1639 // Create a mock evaluation to deal with drain 1640 eval := &structs.Evaluation{ 1641 Namespace: structs.DefaultNamespace, 1642 ID: uuid.Generate(), 1643 Priority: 50, 1644 TriggeredBy: structs.EvalTriggerJobRegister, 1645 JobID: job.ID, 1646 Status: structs.EvalStatusPending, 1647 } 1648 noErr(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 1649 1650 // Process the evaluation 1651 err := h.Process(NewServiceScheduler, eval) 1652 if err != nil { 1653 t.Fatalf("err: %v", err) 1654 } 1655 1656 // Ensure a single plan 1657 if len(h.Plans) != 1 { 1658 t.Fatalf("bad: %#v", h.Plans) 1659 } 1660 plan := h.Plans[0] 1661 1662 // Ensure the plan evicted all allocs 1663 var update []*structs.Allocation 1664 for _, updateList := range plan.NodeUpdate { 1665 update = append(update, updateList...) 1666 } 1667 if len(update) != len(allocs) { 1668 t.Fatalf("bad: %#v", plan) 1669 } 1670 1671 // Ensure the plan didn't allocated 1672 var planned []*structs.Allocation 1673 for _, allocList := range plan.NodeAllocation { 1674 planned = append(planned, allocList...) 1675 } 1676 if len(planned) != 0 { 1677 t.Fatalf("bad: %#v", plan) 1678 } 1679 1680 // Lookup the allocations by JobID 1681 ws := memdb.NewWatchSet() 1682 out, err := h.State.AllocsByJob(ws, job.Namespace, job.ID, false) 1683 noErr(t, err) 1684 1685 // Ensure all allocations placed 1686 out, _ = structs.FilterTerminalAllocs(out) 1687 if len(out) != 0 { 1688 t.Fatalf("bad: %#v", out) 1689 } 1690 1691 h.AssertEvalStatus(t, structs.EvalStatusComplete) 1692 } 1693 1694 func TestServiceSched_JobModify_Rolling(t *testing.T) { 1695 h := NewHarness(t) 1696 1697 // Create some nodes 1698 var nodes []*structs.Node 1699 for i := 0; i < 10; i++ { 1700 node := mock.Node() 1701 nodes = append(nodes, node) 1702 noErr(t, h.State.UpsertNode(h.NextIndex(), node)) 1703 } 1704 1705 // Generate a fake job with allocations 1706 job := mock.Job() 1707 noErr(t, h.State.UpsertJob(h.NextIndex(), job)) 1708 1709 var allocs []*structs.Allocation 1710 for i := 0; i < 10; i++ { 1711 alloc := mock.Alloc() 1712 alloc.Job = job 1713 alloc.JobID = job.ID 1714 alloc.NodeID = nodes[i].ID 1715 alloc.Name = fmt.Sprintf("my-job.web[%d]", i) 1716 allocs = append(allocs, alloc) 1717 } 1718 noErr(t, h.State.UpsertAllocs(h.NextIndex(), allocs)) 1719 1720 // Update the job 1721 job2 := mock.Job() 1722 job2.ID = job.ID 1723 desiredUpdates := 4 1724 job2.TaskGroups[0].Update = &structs.UpdateStrategy{ 1725 MaxParallel: desiredUpdates, 1726 HealthCheck: structs.UpdateStrategyHealthCheck_Checks, 1727 MinHealthyTime: 10 * time.Second, 1728 HealthyDeadline: 10 * time.Minute, 1729 } 1730 1731 // Update the task, such that it cannot be done in-place 1732 job2.TaskGroups[0].Tasks[0].Config["command"] = "/bin/other" 1733 noErr(t, h.State.UpsertJob(h.NextIndex(), job2)) 1734 1735 // Create a mock evaluation to deal with drain 1736 eval := &structs.Evaluation{ 1737 Namespace: structs.DefaultNamespace, 1738 ID: uuid.Generate(), 1739 Priority: 50, 1740 TriggeredBy: structs.EvalTriggerJobRegister, 1741 JobID: job.ID, 1742 Status: structs.EvalStatusPending, 1743 } 1744 noErr(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 1745 1746 // Process the evaluation 1747 err := h.Process(NewServiceScheduler, eval) 1748 if err != nil { 1749 t.Fatalf("err: %v", err) 1750 } 1751 1752 // Ensure a single plan 1753 if len(h.Plans) != 1 { 1754 t.Fatalf("bad: %#v", h.Plans) 1755 } 1756 plan := h.Plans[0] 1757 1758 // Ensure the plan evicted only MaxParallel 1759 var update []*structs.Allocation 1760 for _, updateList := range plan.NodeUpdate { 1761 update = append(update, updateList...) 1762 } 1763 if len(update) != desiredUpdates { 1764 t.Fatalf("bad: got %d; want %d: %#v", len(update), desiredUpdates, plan) 1765 } 1766 1767 // Ensure the plan allocated 1768 var planned []*structs.Allocation 1769 for _, allocList := range plan.NodeAllocation { 1770 planned = append(planned, allocList...) 1771 } 1772 if len(planned) != desiredUpdates { 1773 t.Fatalf("bad: %#v", plan) 1774 } 1775 1776 h.AssertEvalStatus(t, structs.EvalStatusComplete) 1777 1778 // Check that the deployment id is attached to the eval 1779 if h.Evals[0].DeploymentID == "" { 1780 t.Fatalf("Eval not annotated with deployment id") 1781 } 1782 1783 // Ensure a deployment was created 1784 if plan.Deployment == nil { 1785 t.Fatalf("bad: %#v", plan) 1786 } 1787 state, ok := plan.Deployment.TaskGroups[job.TaskGroups[0].Name] 1788 if !ok { 1789 t.Fatalf("bad: %#v", plan) 1790 } 1791 if state.DesiredTotal != 10 && state.DesiredCanaries != 0 { 1792 t.Fatalf("bad: %#v", state) 1793 } 1794 } 1795 1796 // This tests that the old allocation is stopped before placing. 1797 // It is critical to test that the updated job attempts to place more 1798 // allocations as this allows us to assert that destructive changes are done 1799 // first. 1800 func TestServiceSched_JobModify_Rolling_FullNode(t *testing.T) { 1801 h := NewHarness(t) 1802 1803 // Create a node 1804 node := mock.Node() 1805 noErr(t, h.State.UpsertNode(h.NextIndex(), node)) 1806 1807 resourceAsk := node.Resources.Copy() 1808 resourceAsk.CPU -= node.Reserved.CPU 1809 resourceAsk.MemoryMB -= node.Reserved.MemoryMB 1810 resourceAsk.DiskMB -= node.Reserved.DiskMB 1811 resourceAsk.Networks = nil 1812 1813 // Generate a fake job with one alloc that consumes the whole node 1814 job := mock.Job() 1815 job.TaskGroups[0].Count = 1 1816 job.TaskGroups[0].Tasks[0].Resources = resourceAsk 1817 noErr(t, h.State.UpsertJob(h.NextIndex(), job)) 1818 1819 alloc := mock.Alloc() 1820 alloc.Resources = resourceAsk 1821 alloc.Job = job 1822 alloc.JobID = job.ID 1823 alloc.NodeID = node.ID 1824 alloc.Name = "my-job.web[0]" 1825 noErr(t, h.State.UpsertAllocs(h.NextIndex(), []*structs.Allocation{alloc})) 1826 1827 // Update the job to place more versions of the task group, drop the count 1828 // and force destructive updates 1829 job2 := job.Copy() 1830 job2.TaskGroups[0].Count = 5 1831 job2.TaskGroups[0].Update = &structs.UpdateStrategy{ 1832 MaxParallel: 5, 1833 HealthCheck: structs.UpdateStrategyHealthCheck_Checks, 1834 MinHealthyTime: 10 * time.Second, 1835 HealthyDeadline: 10 * time.Minute, 1836 } 1837 job2.TaskGroups[0].Tasks[0].Resources = mock.Alloc().Resources 1838 1839 // Update the task, such that it cannot be done in-place 1840 job2.TaskGroups[0].Tasks[0].Config["command"] = "/bin/other" 1841 noErr(t, h.State.UpsertJob(h.NextIndex(), job2)) 1842 1843 eval := &structs.Evaluation{ 1844 Namespace: structs.DefaultNamespace, 1845 ID: uuid.Generate(), 1846 Priority: 50, 1847 TriggeredBy: structs.EvalTriggerJobRegister, 1848 JobID: job.ID, 1849 Status: structs.EvalStatusPending, 1850 } 1851 noErr(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 1852 1853 // Process the evaluation 1854 err := h.Process(NewServiceScheduler, eval) 1855 if err != nil { 1856 t.Fatalf("err: %v", err) 1857 } 1858 1859 // Ensure a single plan 1860 if len(h.Plans) != 1 { 1861 t.Fatalf("bad: %#v", h.Plans) 1862 } 1863 plan := h.Plans[0] 1864 1865 // Ensure the plan evicted only MaxParallel 1866 var update []*structs.Allocation 1867 for _, updateList := range plan.NodeUpdate { 1868 update = append(update, updateList...) 1869 } 1870 if len(update) != 1 { 1871 t.Fatalf("bad: got %d; want %d: %#v", len(update), 1, plan) 1872 } 1873 1874 // Ensure the plan allocated 1875 var planned []*structs.Allocation 1876 for _, allocList := range plan.NodeAllocation { 1877 planned = append(planned, allocList...) 1878 } 1879 if len(planned) != 1 { 1880 t.Fatalf("bad: %#v", plan) 1881 } 1882 1883 h.AssertEvalStatus(t, structs.EvalStatusComplete) 1884 1885 // Check that the deployment id is attached to the eval 1886 if h.Evals[0].DeploymentID == "" { 1887 t.Fatalf("Eval not annotated with deployment id") 1888 } 1889 1890 // Ensure a deployment was created 1891 if plan.Deployment == nil { 1892 t.Fatalf("bad: %#v", plan) 1893 } 1894 state, ok := plan.Deployment.TaskGroups[job.TaskGroups[0].Name] 1895 if !ok { 1896 t.Fatalf("bad: %#v", plan) 1897 } 1898 if state.DesiredTotal != 1 && state.DesiredCanaries != 0 { 1899 t.Fatalf("bad: %#v", state) 1900 } 1901 } 1902 1903 func TestServiceSched_JobModify_Canaries(t *testing.T) { 1904 h := NewHarness(t) 1905 1906 // Create some nodes 1907 var nodes []*structs.Node 1908 for i := 0; i < 10; i++ { 1909 node := mock.Node() 1910 nodes = append(nodes, node) 1911 noErr(t, h.State.UpsertNode(h.NextIndex(), node)) 1912 } 1913 1914 // Generate a fake job with allocations 1915 job := mock.Job() 1916 noErr(t, h.State.UpsertJob(h.NextIndex(), job)) 1917 1918 var allocs []*structs.Allocation 1919 for i := 0; i < 10; i++ { 1920 alloc := mock.Alloc() 1921 alloc.Job = job 1922 alloc.JobID = job.ID 1923 alloc.NodeID = nodes[i].ID 1924 alloc.Name = fmt.Sprintf("my-job.web[%d]", i) 1925 allocs = append(allocs, alloc) 1926 } 1927 noErr(t, h.State.UpsertAllocs(h.NextIndex(), allocs)) 1928 1929 // Update the job 1930 job2 := mock.Job() 1931 job2.ID = job.ID 1932 desiredUpdates := 2 1933 job2.TaskGroups[0].Update = &structs.UpdateStrategy{ 1934 MaxParallel: desiredUpdates, 1935 Canary: desiredUpdates, 1936 HealthCheck: structs.UpdateStrategyHealthCheck_Checks, 1937 MinHealthyTime: 10 * time.Second, 1938 HealthyDeadline: 10 * time.Minute, 1939 } 1940 1941 // Update the task, such that it cannot be done in-place 1942 job2.TaskGroups[0].Tasks[0].Config["command"] = "/bin/other" 1943 noErr(t, h.State.UpsertJob(h.NextIndex(), job2)) 1944 1945 // Create a mock evaluation to deal with drain 1946 eval := &structs.Evaluation{ 1947 Namespace: structs.DefaultNamespace, 1948 ID: uuid.Generate(), 1949 Priority: 50, 1950 TriggeredBy: structs.EvalTriggerJobRegister, 1951 JobID: job.ID, 1952 Status: structs.EvalStatusPending, 1953 } 1954 noErr(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 1955 1956 // Process the evaluation 1957 err := h.Process(NewServiceScheduler, eval) 1958 if err != nil { 1959 t.Fatalf("err: %v", err) 1960 } 1961 1962 // Ensure a single plan 1963 if len(h.Plans) != 1 { 1964 t.Fatalf("bad: %#v", h.Plans) 1965 } 1966 plan := h.Plans[0] 1967 1968 // Ensure the plan evicted nothing 1969 var update []*structs.Allocation 1970 for _, updateList := range plan.NodeUpdate { 1971 update = append(update, updateList...) 1972 } 1973 if len(update) != 0 { 1974 t.Fatalf("bad: got %d; want %d: %#v", len(update), 0, plan) 1975 } 1976 1977 // Ensure the plan allocated 1978 var planned []*structs.Allocation 1979 for _, allocList := range plan.NodeAllocation { 1980 planned = append(planned, allocList...) 1981 } 1982 if len(planned) != desiredUpdates { 1983 t.Fatalf("bad: %#v", plan) 1984 } 1985 for _, canary := range planned { 1986 if canary.DeploymentStatus == nil || !canary.DeploymentStatus.Canary { 1987 t.Fatalf("expected canary field to be set on canary alloc %q", canary.ID) 1988 } 1989 } 1990 1991 h.AssertEvalStatus(t, structs.EvalStatusComplete) 1992 1993 // Check that the deployment id is attached to the eval 1994 if h.Evals[0].DeploymentID == "" { 1995 t.Fatalf("Eval not annotated with deployment id") 1996 } 1997 1998 // Ensure a deployment was created 1999 if plan.Deployment == nil { 2000 t.Fatalf("bad: %#v", plan) 2001 } 2002 state, ok := plan.Deployment.TaskGroups[job.TaskGroups[0].Name] 2003 if !ok { 2004 t.Fatalf("bad: %#v", plan) 2005 } 2006 if state.DesiredTotal != 10 && state.DesiredCanaries != desiredUpdates { 2007 t.Fatalf("bad: %#v", state) 2008 } 2009 2010 // Assert the canaries were added to the placed list 2011 if len(state.PlacedCanaries) != desiredUpdates { 2012 t.Fatalf("bad: %#v", state) 2013 } 2014 } 2015 2016 func TestServiceSched_JobModify_InPlace(t *testing.T) { 2017 h := NewHarness(t) 2018 2019 // Create some nodes 2020 var nodes []*structs.Node 2021 for i := 0; i < 10; i++ { 2022 node := mock.Node() 2023 nodes = append(nodes, node) 2024 noErr(t, h.State.UpsertNode(h.NextIndex(), node)) 2025 } 2026 2027 // Generate a fake job with allocations and create an older deployment 2028 job := mock.Job() 2029 d := mock.Deployment() 2030 d.JobID = job.ID 2031 noErr(t, h.State.UpsertJob(h.NextIndex(), job)) 2032 noErr(t, h.State.UpsertDeployment(h.NextIndex(), d)) 2033 2034 // Create allocs that are part of the old deployment 2035 var allocs []*structs.Allocation 2036 for i := 0; i < 10; i++ { 2037 alloc := mock.Alloc() 2038 alloc.Job = job 2039 alloc.JobID = job.ID 2040 alloc.NodeID = nodes[i].ID 2041 alloc.Name = fmt.Sprintf("my-job.web[%d]", i) 2042 alloc.DeploymentID = d.ID 2043 alloc.DeploymentStatus = &structs.AllocDeploymentStatus{Healthy: helper.BoolToPtr(true)} 2044 allocs = append(allocs, alloc) 2045 } 2046 noErr(t, h.State.UpsertAllocs(h.NextIndex(), allocs)) 2047 2048 // Update the job 2049 job2 := mock.Job() 2050 job2.ID = job.ID 2051 desiredUpdates := 4 2052 job2.TaskGroups[0].Update = &structs.UpdateStrategy{ 2053 MaxParallel: desiredUpdates, 2054 HealthCheck: structs.UpdateStrategyHealthCheck_Checks, 2055 MinHealthyTime: 10 * time.Second, 2056 HealthyDeadline: 10 * time.Minute, 2057 } 2058 noErr(t, h.State.UpsertJob(h.NextIndex(), job2)) 2059 2060 // Create a mock evaluation to deal with drain 2061 eval := &structs.Evaluation{ 2062 Namespace: structs.DefaultNamespace, 2063 ID: uuid.Generate(), 2064 Priority: 50, 2065 TriggeredBy: structs.EvalTriggerJobRegister, 2066 JobID: job.ID, 2067 Status: structs.EvalStatusPending, 2068 } 2069 noErr(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 2070 2071 // Process the evaluation 2072 err := h.Process(NewServiceScheduler, eval) 2073 if err != nil { 2074 t.Fatalf("err: %v", err) 2075 } 2076 2077 // Ensure a single plan 2078 if len(h.Plans) != 1 { 2079 t.Fatalf("bad: %#v", h.Plans) 2080 } 2081 plan := h.Plans[0] 2082 2083 // Ensure the plan did not evict any allocs 2084 var update []*structs.Allocation 2085 for _, updateList := range plan.NodeUpdate { 2086 update = append(update, updateList...) 2087 } 2088 if len(update) != 0 { 2089 t.Fatalf("bad: %#v", plan) 2090 } 2091 2092 // Ensure the plan updated the existing allocs 2093 var planned []*structs.Allocation 2094 for _, allocList := range plan.NodeAllocation { 2095 planned = append(planned, allocList...) 2096 } 2097 if len(planned) != 10 { 2098 t.Fatalf("bad: %#v", plan) 2099 } 2100 for _, p := range planned { 2101 if p.Job != job2 { 2102 t.Fatalf("should update job") 2103 } 2104 } 2105 2106 // Lookup the allocations by JobID 2107 ws := memdb.NewWatchSet() 2108 out, err := h.State.AllocsByJob(ws, job.Namespace, job.ID, false) 2109 noErr(t, err) 2110 2111 // Ensure all allocations placed 2112 if len(out) != 10 { 2113 t.Fatalf("bad: %#v", out) 2114 } 2115 h.AssertEvalStatus(t, structs.EvalStatusComplete) 2116 2117 // Verify the network did not change 2118 rp := structs.Port{Label: "admin", Value: 5000} 2119 for _, alloc := range out { 2120 for _, resources := range alloc.TaskResources { 2121 if resources.Networks[0].ReservedPorts[0] != rp { 2122 t.Fatalf("bad: %#v", alloc) 2123 } 2124 } 2125 } 2126 2127 // Verify the deployment id was changed and health cleared 2128 for _, alloc := range out { 2129 if alloc.DeploymentID == d.ID { 2130 t.Fatalf("bad: deployment id not cleared") 2131 } else if alloc.DeploymentStatus != nil { 2132 t.Fatalf("bad: deployment status not cleared") 2133 } 2134 } 2135 } 2136 2137 func TestServiceSched_JobModify_DistinctProperty(t *testing.T) { 2138 h := NewHarness(t) 2139 2140 // Create some nodes 2141 var nodes []*structs.Node 2142 for i := 0; i < 10; i++ { 2143 node := mock.Node() 2144 node.Meta["rack"] = fmt.Sprintf("rack%d", i) 2145 nodes = append(nodes, node) 2146 noErr(t, h.State.UpsertNode(h.NextIndex(), node)) 2147 } 2148 2149 // Create a job that uses distinct property and has count higher than what is 2150 // possible. 2151 job := mock.Job() 2152 job.TaskGroups[0].Count = 11 2153 job.Constraints = append(job.Constraints, 2154 &structs.Constraint{ 2155 Operand: structs.ConstraintDistinctProperty, 2156 LTarget: "${meta.rack}", 2157 }) 2158 noErr(t, h.State.UpsertJob(h.NextIndex(), job)) 2159 2160 oldJob := job.Copy() 2161 oldJob.JobModifyIndex -= 1 2162 oldJob.TaskGroups[0].Count = 4 2163 2164 // Place 4 of 10 2165 var allocs []*structs.Allocation 2166 for i := 0; i < 4; i++ { 2167 alloc := mock.Alloc() 2168 alloc.Job = oldJob 2169 alloc.JobID = job.ID 2170 alloc.NodeID = nodes[i].ID 2171 alloc.Name = fmt.Sprintf("my-job.web[%d]", i) 2172 allocs = append(allocs, alloc) 2173 } 2174 noErr(t, h.State.UpsertAllocs(h.NextIndex(), allocs)) 2175 2176 // Create a mock evaluation to register the job 2177 eval := &structs.Evaluation{ 2178 Namespace: structs.DefaultNamespace, 2179 ID: uuid.Generate(), 2180 Priority: job.Priority, 2181 TriggeredBy: structs.EvalTriggerJobRegister, 2182 JobID: job.ID, 2183 Status: structs.EvalStatusPending, 2184 } 2185 noErr(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 2186 2187 // Process the evaluation 2188 err := h.Process(NewServiceScheduler, eval) 2189 if err != nil { 2190 t.Fatalf("err: %v", err) 2191 } 2192 2193 // Ensure a single plan 2194 if len(h.Plans) != 1 { 2195 t.Fatalf("bad: %#v", h.Plans) 2196 } 2197 plan := h.Plans[0] 2198 2199 // Ensure the plan doesn't have annotations. 2200 if plan.Annotations != nil { 2201 t.Fatalf("expected no annotations") 2202 } 2203 2204 // Ensure the eval hasn't spawned blocked eval 2205 if len(h.CreateEvals) != 1 { 2206 t.Fatalf("bad: %#v", h.CreateEvals) 2207 } 2208 2209 // Ensure the plan failed to alloc 2210 outEval := h.Evals[0] 2211 if len(outEval.FailedTGAllocs) != 1 { 2212 t.Fatalf("bad: %+v", outEval) 2213 } 2214 2215 // Ensure the plan allocated 2216 var planned []*structs.Allocation 2217 for _, allocList := range plan.NodeAllocation { 2218 planned = append(planned, allocList...) 2219 } 2220 if len(planned) != 10 { 2221 t.Fatalf("bad: %#v", planned) 2222 } 2223 2224 // Lookup the allocations by JobID 2225 ws := memdb.NewWatchSet() 2226 out, err := h.State.AllocsByJob(ws, job.Namespace, job.ID, false) 2227 noErr(t, err) 2228 2229 // Ensure all allocations placed 2230 if len(out) != 10 { 2231 t.Fatalf("bad: %#v", out) 2232 } 2233 2234 // Ensure different node was used per. 2235 used := make(map[string]struct{}) 2236 for _, alloc := range out { 2237 if _, ok := used[alloc.NodeID]; ok { 2238 t.Fatalf("Node collision %v", alloc.NodeID) 2239 } 2240 used[alloc.NodeID] = struct{}{} 2241 } 2242 2243 h.AssertEvalStatus(t, structs.EvalStatusComplete) 2244 } 2245 2246 func TestServiceSched_JobDeregister_Purged(t *testing.T) { 2247 h := NewHarness(t) 2248 2249 // Generate a fake job with allocations 2250 job := mock.Job() 2251 2252 var allocs []*structs.Allocation 2253 for i := 0; i < 10; i++ { 2254 alloc := mock.Alloc() 2255 alloc.Job = job 2256 alloc.JobID = job.ID 2257 allocs = append(allocs, alloc) 2258 } 2259 for _, alloc := range allocs { 2260 h.State.UpsertJobSummary(h.NextIndex(), mock.JobSummary(alloc.JobID)) 2261 } 2262 noErr(t, h.State.UpsertAllocs(h.NextIndex(), allocs)) 2263 2264 // Create a mock evaluation to deregister the job 2265 eval := &structs.Evaluation{ 2266 Namespace: structs.DefaultNamespace, 2267 ID: uuid.Generate(), 2268 Priority: 50, 2269 TriggeredBy: structs.EvalTriggerJobDeregister, 2270 JobID: job.ID, 2271 Status: structs.EvalStatusPending, 2272 } 2273 noErr(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 2274 2275 // Process the evaluation 2276 err := h.Process(NewServiceScheduler, eval) 2277 if err != nil { 2278 t.Fatalf("err: %v", err) 2279 } 2280 2281 // Ensure a single plan 2282 if len(h.Plans) != 1 { 2283 t.Fatalf("bad: %#v", h.Plans) 2284 } 2285 plan := h.Plans[0] 2286 2287 // Ensure the plan evicted all nodes 2288 if len(plan.NodeUpdate["12345678-abcd-efab-cdef-123456789abc"]) != len(allocs) { 2289 t.Fatalf("bad: %#v", plan) 2290 } 2291 2292 // Lookup the allocations by JobID 2293 ws := memdb.NewWatchSet() 2294 out, err := h.State.AllocsByJob(ws, job.Namespace, job.ID, false) 2295 noErr(t, err) 2296 2297 // Ensure that the job field on the allocation is still populated 2298 for _, alloc := range out { 2299 if alloc.Job == nil { 2300 t.Fatalf("bad: %#v", alloc) 2301 } 2302 } 2303 2304 // Ensure no remaining allocations 2305 out, _ = structs.FilterTerminalAllocs(out) 2306 if len(out) != 0 { 2307 t.Fatalf("bad: %#v", out) 2308 } 2309 2310 h.AssertEvalStatus(t, structs.EvalStatusComplete) 2311 } 2312 2313 func TestServiceSched_JobDeregister_Stopped(t *testing.T) { 2314 h := NewHarness(t) 2315 require := require.New(t) 2316 2317 // Generate a fake job with allocations 2318 job := mock.Job() 2319 job.Stop = true 2320 require.NoError(h.State.UpsertJob(h.NextIndex(), job)) 2321 2322 var allocs []*structs.Allocation 2323 for i := 0; i < 10; i++ { 2324 alloc := mock.Alloc() 2325 alloc.Job = job 2326 alloc.JobID = job.ID 2327 allocs = append(allocs, alloc) 2328 } 2329 require.NoError(h.State.UpsertAllocs(h.NextIndex(), allocs)) 2330 2331 // Create a summary where the queued allocs are set as we want to assert 2332 // they get zeroed out. 2333 summary := mock.JobSummary(job.ID) 2334 web := summary.Summary["web"] 2335 web.Queued = 2 2336 require.NoError(h.State.UpsertJobSummary(h.NextIndex(), summary)) 2337 2338 // Create a mock evaluation to deregister the job 2339 eval := &structs.Evaluation{ 2340 Namespace: structs.DefaultNamespace, 2341 ID: uuid.Generate(), 2342 Priority: 50, 2343 TriggeredBy: structs.EvalTriggerJobDeregister, 2344 JobID: job.ID, 2345 Status: structs.EvalStatusPending, 2346 } 2347 require.NoError(h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 2348 2349 // Process the evaluation 2350 require.NoError(h.Process(NewServiceScheduler, eval)) 2351 2352 // Ensure a single plan 2353 require.Len(h.Plans, 1) 2354 plan := h.Plans[0] 2355 2356 // Ensure the plan evicted all nodes 2357 require.Len(plan.NodeUpdate["12345678-abcd-efab-cdef-123456789abc"], len(allocs)) 2358 2359 // Lookup the allocations by JobID 2360 ws := memdb.NewWatchSet() 2361 out, err := h.State.AllocsByJob(ws, job.Namespace, job.ID, false) 2362 require.NoError(err) 2363 2364 // Ensure that the job field on the allocation is still populated 2365 for _, alloc := range out { 2366 require.NotNil(alloc.Job) 2367 } 2368 2369 // Ensure no remaining allocations 2370 out, _ = structs.FilterTerminalAllocs(out) 2371 require.Empty(out) 2372 2373 // Assert the job summary is cleared out 2374 sout, err := h.State.JobSummaryByID(ws, job.Namespace, job.ID) 2375 require.NoError(err) 2376 require.NotNil(sout) 2377 require.Contains(sout.Summary, "web") 2378 webOut := sout.Summary["web"] 2379 require.Zero(webOut.Queued) 2380 2381 h.AssertEvalStatus(t, structs.EvalStatusComplete) 2382 } 2383 2384 func TestServiceSched_NodeDown(t *testing.T) { 2385 h := NewHarness(t) 2386 2387 // Register a node 2388 node := mock.Node() 2389 node.Status = structs.NodeStatusDown 2390 noErr(t, h.State.UpsertNode(h.NextIndex(), node)) 2391 2392 // Generate a fake job with allocations and an update policy. 2393 job := mock.Job() 2394 noErr(t, h.State.UpsertJob(h.NextIndex(), job)) 2395 2396 var allocs []*structs.Allocation 2397 for i := 0; i < 10; i++ { 2398 alloc := mock.Alloc() 2399 alloc.Job = job 2400 alloc.JobID = job.ID 2401 alloc.NodeID = node.ID 2402 alloc.Name = fmt.Sprintf("my-job.web[%d]", i) 2403 allocs = append(allocs, alloc) 2404 } 2405 2406 // Cover each terminal case and ensure it doesn't change to lost 2407 allocs[7].DesiredStatus = structs.AllocDesiredStatusRun 2408 allocs[7].ClientStatus = structs.AllocClientStatusLost 2409 allocs[8].DesiredStatus = structs.AllocDesiredStatusRun 2410 allocs[8].ClientStatus = structs.AllocClientStatusFailed 2411 allocs[9].DesiredStatus = structs.AllocDesiredStatusRun 2412 allocs[9].ClientStatus = structs.AllocClientStatusComplete 2413 2414 // Mark some allocs as running 2415 for i := 0; i < 4; i++ { 2416 out := allocs[i] 2417 out.ClientStatus = structs.AllocClientStatusRunning 2418 } 2419 2420 // Mark appropriate allocs for migration 2421 for i := 0; i < 7; i++ { 2422 out := allocs[i] 2423 out.DesiredTransition.Migrate = helper.BoolToPtr(true) 2424 } 2425 2426 noErr(t, h.State.UpsertAllocs(h.NextIndex(), allocs)) 2427 2428 // Create a mock evaluation to deal with drain 2429 eval := &structs.Evaluation{ 2430 Namespace: structs.DefaultNamespace, 2431 ID: uuid.Generate(), 2432 Priority: 50, 2433 TriggeredBy: structs.EvalTriggerNodeUpdate, 2434 JobID: job.ID, 2435 NodeID: node.ID, 2436 Status: structs.EvalStatusPending, 2437 } 2438 noErr(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 2439 2440 // Process the evaluation 2441 err := h.Process(NewServiceScheduler, eval) 2442 if err != nil { 2443 t.Fatalf("err: %v", err) 2444 } 2445 2446 // Ensure a single plan 2447 if len(h.Plans) != 1 { 2448 t.Fatalf("bad: %#v", h.Plans) 2449 } 2450 plan := h.Plans[0] 2451 2452 // Test the scheduler marked all non-terminal allocations as lost 2453 if len(plan.NodeUpdate[node.ID]) != 7 { 2454 t.Fatalf("bad: %#v", plan) 2455 } 2456 2457 for _, out := range plan.NodeUpdate[node.ID] { 2458 if out.ClientStatus != structs.AllocClientStatusLost && out.DesiredStatus != structs.AllocDesiredStatusStop { 2459 t.Fatalf("bad alloc: %#v", out) 2460 } 2461 } 2462 2463 h.AssertEvalStatus(t, structs.EvalStatusComplete) 2464 } 2465 2466 func TestServiceSched_NodeUpdate(t *testing.T) { 2467 h := NewHarness(t) 2468 2469 // Register a node 2470 node := mock.Node() 2471 noErr(t, h.State.UpsertNode(h.NextIndex(), node)) 2472 2473 // Generate a fake job with allocations and an update policy. 2474 job := mock.Job() 2475 noErr(t, h.State.UpsertJob(h.NextIndex(), job)) 2476 2477 var allocs []*structs.Allocation 2478 for i := 0; i < 10; i++ { 2479 alloc := mock.Alloc() 2480 alloc.Job = job 2481 alloc.JobID = job.ID 2482 alloc.NodeID = node.ID 2483 alloc.Name = fmt.Sprintf("my-job.web[%d]", i) 2484 allocs = append(allocs, alloc) 2485 } 2486 noErr(t, h.State.UpsertAllocs(h.NextIndex(), allocs)) 2487 2488 // Mark some allocs as running 2489 ws := memdb.NewWatchSet() 2490 for i := 0; i < 4; i++ { 2491 out, _ := h.State.AllocByID(ws, allocs[i].ID) 2492 out.ClientStatus = structs.AllocClientStatusRunning 2493 noErr(t, h.State.UpdateAllocsFromClient(h.NextIndex(), []*structs.Allocation{out})) 2494 } 2495 2496 // Create a mock evaluation which won't trigger any new placements 2497 eval := &structs.Evaluation{ 2498 Namespace: structs.DefaultNamespace, 2499 ID: uuid.Generate(), 2500 Priority: 50, 2501 TriggeredBy: structs.EvalTriggerNodeUpdate, 2502 JobID: job.ID, 2503 NodeID: node.ID, 2504 Status: structs.EvalStatusPending, 2505 } 2506 noErr(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 2507 2508 // Process the evaluation 2509 err := h.Process(NewServiceScheduler, eval) 2510 if err != nil { 2511 t.Fatalf("err: %v", err) 2512 } 2513 if val, ok := h.Evals[0].QueuedAllocations["web"]; !ok || val != 0 { 2514 t.Fatalf("bad queued allocations: %v", h.Evals[0].QueuedAllocations) 2515 } 2516 2517 h.AssertEvalStatus(t, structs.EvalStatusComplete) 2518 } 2519 2520 func TestServiceSched_NodeDrain(t *testing.T) { 2521 h := NewHarness(t) 2522 2523 // Register a draining node 2524 node := mock.Node() 2525 node.Drain = true 2526 noErr(t, h.State.UpsertNode(h.NextIndex(), node)) 2527 2528 // Create some nodes 2529 for i := 0; i < 10; i++ { 2530 node := mock.Node() 2531 noErr(t, h.State.UpsertNode(h.NextIndex(), node)) 2532 } 2533 2534 // Generate a fake job with allocations and an update policy. 2535 job := mock.Job() 2536 noErr(t, h.State.UpsertJob(h.NextIndex(), job)) 2537 2538 var allocs []*structs.Allocation 2539 for i := 0; i < 10; i++ { 2540 alloc := mock.Alloc() 2541 alloc.Job = job 2542 alloc.JobID = job.ID 2543 alloc.NodeID = node.ID 2544 alloc.Name = fmt.Sprintf("my-job.web[%d]", i) 2545 alloc.DesiredTransition.Migrate = helper.BoolToPtr(true) 2546 allocs = append(allocs, alloc) 2547 } 2548 noErr(t, h.State.UpsertAllocs(h.NextIndex(), allocs)) 2549 2550 // Create a mock evaluation to deal with drain 2551 eval := &structs.Evaluation{ 2552 Namespace: structs.DefaultNamespace, 2553 ID: uuid.Generate(), 2554 Priority: 50, 2555 TriggeredBy: structs.EvalTriggerNodeUpdate, 2556 JobID: job.ID, 2557 NodeID: node.ID, 2558 Status: structs.EvalStatusPending, 2559 } 2560 noErr(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 2561 2562 // Process the evaluation 2563 err := h.Process(NewServiceScheduler, eval) 2564 if err != nil { 2565 t.Fatalf("err: %v", err) 2566 } 2567 2568 // Ensure a single plan 2569 if len(h.Plans) != 1 { 2570 t.Fatalf("bad: %#v", h.Plans) 2571 } 2572 plan := h.Plans[0] 2573 2574 // Ensure the plan evicted all allocs 2575 if len(plan.NodeUpdate[node.ID]) != len(allocs) { 2576 t.Fatalf("bad: %#v", plan) 2577 } 2578 2579 // Ensure the plan allocated 2580 var planned []*structs.Allocation 2581 for _, allocList := range plan.NodeAllocation { 2582 planned = append(planned, allocList...) 2583 } 2584 if len(planned) != 10 { 2585 t.Fatalf("bad: %#v", plan) 2586 } 2587 2588 // Lookup the allocations by JobID 2589 ws := memdb.NewWatchSet() 2590 out, err := h.State.AllocsByJob(ws, job.Namespace, job.ID, false) 2591 noErr(t, err) 2592 2593 // Ensure all allocations placed 2594 out, _ = structs.FilterTerminalAllocs(out) 2595 if len(out) != 10 { 2596 t.Fatalf("bad: %#v", out) 2597 } 2598 2599 h.AssertEvalStatus(t, structs.EvalStatusComplete) 2600 } 2601 2602 func TestServiceSched_NodeDrain_Down(t *testing.T) { 2603 h := NewHarness(t) 2604 2605 // Register a draining node 2606 node := mock.Node() 2607 node.Drain = true 2608 node.Status = structs.NodeStatusDown 2609 noErr(t, h.State.UpsertNode(h.NextIndex(), node)) 2610 2611 // Generate a fake job with allocations 2612 job := mock.Job() 2613 noErr(t, h.State.UpsertJob(h.NextIndex(), job)) 2614 2615 var allocs []*structs.Allocation 2616 for i := 0; i < 10; i++ { 2617 alloc := mock.Alloc() 2618 alloc.Job = job 2619 alloc.JobID = job.ID 2620 alloc.NodeID = node.ID 2621 alloc.Name = fmt.Sprintf("my-job.web[%d]", i) 2622 allocs = append(allocs, alloc) 2623 } 2624 noErr(t, h.State.UpsertAllocs(h.NextIndex(), allocs)) 2625 2626 // Set the desired state of the allocs to stop 2627 var stop []*structs.Allocation 2628 for i := 0; i < 6; i++ { 2629 newAlloc := allocs[i].Copy() 2630 newAlloc.ClientStatus = structs.AllocDesiredStatusStop 2631 newAlloc.DesiredTransition.Migrate = helper.BoolToPtr(true) 2632 stop = append(stop, newAlloc) 2633 } 2634 noErr(t, h.State.UpsertAllocs(h.NextIndex(), stop)) 2635 2636 // Mark some of the allocations as running 2637 var running []*structs.Allocation 2638 for i := 4; i < 6; i++ { 2639 newAlloc := stop[i].Copy() 2640 newAlloc.ClientStatus = structs.AllocClientStatusRunning 2641 running = append(running, newAlloc) 2642 } 2643 noErr(t, h.State.UpdateAllocsFromClient(h.NextIndex(), running)) 2644 2645 // Mark some of the allocations as complete 2646 var complete []*structs.Allocation 2647 for i := 6; i < 10; i++ { 2648 newAlloc := allocs[i].Copy() 2649 newAlloc.TaskStates = make(map[string]*structs.TaskState) 2650 newAlloc.TaskStates["web"] = &structs.TaskState{ 2651 State: structs.TaskStateDead, 2652 Events: []*structs.TaskEvent{ 2653 { 2654 Type: structs.TaskTerminated, 2655 ExitCode: 0, 2656 }, 2657 }, 2658 } 2659 newAlloc.ClientStatus = structs.AllocClientStatusComplete 2660 complete = append(complete, newAlloc) 2661 } 2662 noErr(t, h.State.UpdateAllocsFromClient(h.NextIndex(), complete)) 2663 2664 // Create a mock evaluation to deal with the node update 2665 eval := &structs.Evaluation{ 2666 Namespace: structs.DefaultNamespace, 2667 ID: uuid.Generate(), 2668 Priority: 50, 2669 TriggeredBy: structs.EvalTriggerNodeUpdate, 2670 JobID: job.ID, 2671 NodeID: node.ID, 2672 Status: structs.EvalStatusPending, 2673 } 2674 2675 noErr(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 2676 2677 // Process the evaluation 2678 err := h.Process(NewServiceScheduler, eval) 2679 if err != nil { 2680 t.Fatalf("err: %v", err) 2681 } 2682 2683 // Ensure a single plan 2684 if len(h.Plans) != 1 { 2685 t.Fatalf("bad: %#v", h.Plans) 2686 } 2687 plan := h.Plans[0] 2688 2689 // Ensure the plan evicted non terminal allocs 2690 if len(plan.NodeUpdate[node.ID]) != 6 { 2691 t.Fatalf("bad: %#v", plan) 2692 } 2693 2694 // Ensure that all the allocations which were in running or pending state 2695 // has been marked as lost 2696 var lostAllocs []string 2697 for _, alloc := range plan.NodeUpdate[node.ID] { 2698 lostAllocs = append(lostAllocs, alloc.ID) 2699 } 2700 sort.Strings(lostAllocs) 2701 2702 var expectedLostAllocs []string 2703 for i := 0; i < 6; i++ { 2704 expectedLostAllocs = append(expectedLostAllocs, allocs[i].ID) 2705 } 2706 sort.Strings(expectedLostAllocs) 2707 2708 if !reflect.DeepEqual(expectedLostAllocs, lostAllocs) { 2709 t.Fatalf("expected: %v, actual: %v", expectedLostAllocs, lostAllocs) 2710 } 2711 2712 h.AssertEvalStatus(t, structs.EvalStatusComplete) 2713 } 2714 2715 func TestServiceSched_NodeDrain_Queued_Allocations(t *testing.T) { 2716 h := NewHarness(t) 2717 2718 // Register a draining node 2719 node := mock.Node() 2720 noErr(t, h.State.UpsertNode(h.NextIndex(), node)) 2721 2722 // Generate a fake job with allocations and an update policy. 2723 job := mock.Job() 2724 job.TaskGroups[0].Count = 2 2725 noErr(t, h.State.UpsertJob(h.NextIndex(), job)) 2726 2727 var allocs []*structs.Allocation 2728 for i := 0; i < 2; i++ { 2729 alloc := mock.Alloc() 2730 alloc.Job = job 2731 alloc.JobID = job.ID 2732 alloc.NodeID = node.ID 2733 alloc.Name = fmt.Sprintf("my-job.web[%d]", i) 2734 alloc.DesiredTransition.Migrate = helper.BoolToPtr(true) 2735 allocs = append(allocs, alloc) 2736 } 2737 noErr(t, h.State.UpsertAllocs(h.NextIndex(), allocs)) 2738 2739 node.Drain = true 2740 noErr(t, h.State.UpsertNode(h.NextIndex(), node)) 2741 2742 // Create a mock evaluation to deal with drain 2743 eval := &structs.Evaluation{ 2744 Namespace: structs.DefaultNamespace, 2745 ID: uuid.Generate(), 2746 Priority: 50, 2747 TriggeredBy: structs.EvalTriggerNodeUpdate, 2748 JobID: job.ID, 2749 NodeID: node.ID, 2750 Status: structs.EvalStatusPending, 2751 } 2752 noErr(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 2753 2754 // Process the evaluation 2755 err := h.Process(NewServiceScheduler, eval) 2756 if err != nil { 2757 t.Fatalf("err: %v", err) 2758 } 2759 2760 queued := h.Evals[0].QueuedAllocations["web"] 2761 if queued != 2 { 2762 t.Fatalf("expected: %v, actual: %v", 2, queued) 2763 } 2764 } 2765 2766 func TestServiceSched_RetryLimit(t *testing.T) { 2767 h := NewHarness(t) 2768 h.Planner = &RejectPlan{h} 2769 2770 // Create some nodes 2771 for i := 0; i < 10; i++ { 2772 node := mock.Node() 2773 noErr(t, h.State.UpsertNode(h.NextIndex(), node)) 2774 } 2775 2776 // Create a job 2777 job := mock.Job() 2778 noErr(t, h.State.UpsertJob(h.NextIndex(), job)) 2779 2780 // Create a mock evaluation to register the job 2781 eval := &structs.Evaluation{ 2782 Namespace: structs.DefaultNamespace, 2783 ID: uuid.Generate(), 2784 Priority: job.Priority, 2785 TriggeredBy: structs.EvalTriggerJobRegister, 2786 JobID: job.ID, 2787 Status: structs.EvalStatusPending, 2788 } 2789 noErr(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 2790 2791 // Process the evaluation 2792 err := h.Process(NewServiceScheduler, eval) 2793 if err != nil { 2794 t.Fatalf("err: %v", err) 2795 } 2796 2797 // Ensure multiple plans 2798 if len(h.Plans) == 0 { 2799 t.Fatalf("bad: %#v", h.Plans) 2800 } 2801 2802 // Lookup the allocations by JobID 2803 ws := memdb.NewWatchSet() 2804 out, err := h.State.AllocsByJob(ws, job.Namespace, job.ID, false) 2805 noErr(t, err) 2806 2807 // Ensure no allocations placed 2808 if len(out) != 0 { 2809 t.Fatalf("bad: %#v", out) 2810 } 2811 2812 // Should hit the retry limit 2813 h.AssertEvalStatus(t, structs.EvalStatusFailed) 2814 } 2815 2816 func TestServiceSched_Reschedule_OnceNow(t *testing.T) { 2817 h := NewHarness(t) 2818 2819 // Create some nodes 2820 var nodes []*structs.Node 2821 for i := 0; i < 10; i++ { 2822 node := mock.Node() 2823 nodes = append(nodes, node) 2824 noErr(t, h.State.UpsertNode(h.NextIndex(), node)) 2825 } 2826 2827 // Generate a fake job with allocations and an update policy. 2828 job := mock.Job() 2829 job.TaskGroups[0].Count = 2 2830 job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{ 2831 Attempts: 1, 2832 Interval: 15 * time.Minute, 2833 Delay: 5 * time.Second, 2834 MaxDelay: 1 * time.Minute, 2835 DelayFunction: "constant", 2836 } 2837 tgName := job.TaskGroups[0].Name 2838 now := time.Now() 2839 2840 noErr(t, h.State.UpsertJob(h.NextIndex(), job)) 2841 2842 var allocs []*structs.Allocation 2843 for i := 0; i < 2; i++ { 2844 alloc := mock.Alloc() 2845 alloc.Job = job 2846 alloc.JobID = job.ID 2847 alloc.NodeID = nodes[i].ID 2848 alloc.Name = fmt.Sprintf("my-job.web[%d]", i) 2849 allocs = append(allocs, alloc) 2850 } 2851 // Mark one of the allocations as failed 2852 allocs[1].ClientStatus = structs.AllocClientStatusFailed 2853 allocs[1].TaskStates = map[string]*structs.TaskState{tgName: {State: "dead", 2854 StartedAt: now.Add(-1 * time.Hour), 2855 FinishedAt: now.Add(-10 * time.Second)}} 2856 failedAllocID := allocs[1].ID 2857 successAllocID := allocs[0].ID 2858 2859 noErr(t, h.State.UpsertAllocs(h.NextIndex(), allocs)) 2860 2861 // Create a mock evaluation 2862 eval := &structs.Evaluation{ 2863 Namespace: structs.DefaultNamespace, 2864 ID: uuid.Generate(), 2865 Priority: 50, 2866 TriggeredBy: structs.EvalTriggerNodeUpdate, 2867 JobID: job.ID, 2868 Status: structs.EvalStatusPending, 2869 } 2870 noErr(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 2871 2872 // Process the evaluation 2873 err := h.Process(NewServiceScheduler, eval) 2874 if err != nil { 2875 t.Fatalf("err: %v", err) 2876 } 2877 2878 // Ensure multiple plans 2879 if len(h.Plans) == 0 { 2880 t.Fatalf("bad: %#v", h.Plans) 2881 } 2882 2883 // Lookup the allocations by JobID 2884 ws := memdb.NewWatchSet() 2885 out, err := h.State.AllocsByJob(ws, job.Namespace, job.ID, false) 2886 noErr(t, err) 2887 2888 // Verify that one new allocation got created with its restart tracker info 2889 assert := assert.New(t) 2890 assert.Equal(3, len(out)) 2891 var newAlloc *structs.Allocation 2892 for _, alloc := range out { 2893 if alloc.ID != successAllocID && alloc.ID != failedAllocID { 2894 newAlloc = alloc 2895 } 2896 } 2897 assert.Equal(failedAllocID, newAlloc.PreviousAllocation) 2898 assert.Equal(1, len(newAlloc.RescheduleTracker.Events)) 2899 assert.Equal(failedAllocID, newAlloc.RescheduleTracker.Events[0].PrevAllocID) 2900 2901 // Mark this alloc as failed again, should not get rescheduled 2902 newAlloc.ClientStatus = structs.AllocClientStatusFailed 2903 2904 noErr(t, h.State.UpsertAllocs(h.NextIndex(), []*structs.Allocation{newAlloc})) 2905 2906 // Create another mock evaluation 2907 eval = &structs.Evaluation{ 2908 Namespace: structs.DefaultNamespace, 2909 ID: uuid.Generate(), 2910 Priority: 50, 2911 TriggeredBy: structs.EvalTriggerNodeUpdate, 2912 JobID: job.ID, 2913 Status: structs.EvalStatusPending, 2914 } 2915 noErr(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 2916 2917 // Process the evaluation 2918 err = h.Process(NewServiceScheduler, eval) 2919 assert.Nil(err) 2920 // Verify no new allocs were created this time 2921 out, err = h.State.AllocsByJob(ws, job.Namespace, job.ID, false) 2922 noErr(t, err) 2923 assert.Equal(3, len(out)) 2924 2925 } 2926 2927 // Tests that alloc reschedulable at a future time creates a follow up eval 2928 func TestServiceSched_Reschedule_Later(t *testing.T) { 2929 h := NewHarness(t) 2930 require := require.New(t) 2931 // Create some nodes 2932 var nodes []*structs.Node 2933 for i := 0; i < 10; i++ { 2934 node := mock.Node() 2935 nodes = append(nodes, node) 2936 noErr(t, h.State.UpsertNode(h.NextIndex(), node)) 2937 } 2938 2939 // Generate a fake job with allocations and an update policy. 2940 job := mock.Job() 2941 job.TaskGroups[0].Count = 2 2942 delayDuration := 15 * time.Second 2943 job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{ 2944 Attempts: 1, 2945 Interval: 15 * time.Minute, 2946 Delay: delayDuration, 2947 MaxDelay: 1 * time.Minute, 2948 DelayFunction: "constant", 2949 } 2950 tgName := job.TaskGroups[0].Name 2951 now := time.Now() 2952 2953 noErr(t, h.State.UpsertJob(h.NextIndex(), job)) 2954 2955 var allocs []*structs.Allocation 2956 for i := 0; i < 2; i++ { 2957 alloc := mock.Alloc() 2958 alloc.Job = job 2959 alloc.JobID = job.ID 2960 alloc.NodeID = nodes[i].ID 2961 alloc.Name = fmt.Sprintf("my-job.web[%d]", i) 2962 allocs = append(allocs, alloc) 2963 } 2964 // Mark one of the allocations as failed 2965 allocs[1].ClientStatus = structs.AllocClientStatusFailed 2966 allocs[1].TaskStates = map[string]*structs.TaskState{tgName: {State: "dead", 2967 StartedAt: now.Add(-1 * time.Hour), 2968 FinishedAt: now}} 2969 failedAllocID := allocs[1].ID 2970 2971 noErr(t, h.State.UpsertAllocs(h.NextIndex(), allocs)) 2972 2973 // Create a mock evaluation 2974 eval := &structs.Evaluation{ 2975 Namespace: structs.DefaultNamespace, 2976 ID: uuid.Generate(), 2977 Priority: 50, 2978 TriggeredBy: structs.EvalTriggerNodeUpdate, 2979 JobID: job.ID, 2980 Status: structs.EvalStatusPending, 2981 } 2982 noErr(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 2983 2984 // Process the evaluation 2985 err := h.Process(NewServiceScheduler, eval) 2986 if err != nil { 2987 t.Fatalf("err: %v", err) 2988 } 2989 2990 // Ensure multiple plans 2991 if len(h.Plans) == 0 { 2992 t.Fatalf("bad: %#v", h.Plans) 2993 } 2994 2995 // Lookup the allocations by JobID 2996 ws := memdb.NewWatchSet() 2997 out, err := h.State.AllocsByJob(ws, job.Namespace, job.ID, false) 2998 noErr(t, err) 2999 3000 // Verify no new allocs were created 3001 require.Equal(2, len(out)) 3002 3003 // Verify follow up eval was created for the failed alloc 3004 alloc, err := h.State.AllocByID(ws, failedAllocID) 3005 require.Nil(err) 3006 require.NotEmpty(alloc.FollowupEvalID) 3007 3008 // Ensure there is a follow up eval. 3009 if len(h.CreateEvals) != 1 || h.CreateEvals[0].Status != structs.EvalStatusPending { 3010 t.Fatalf("bad: %#v", h.CreateEvals) 3011 } 3012 followupEval := h.CreateEvals[0] 3013 require.Equal(now.Add(delayDuration), followupEval.WaitUntil) 3014 } 3015 3016 func TestServiceSched_Reschedule_MultipleNow(t *testing.T) { 3017 h := NewHarness(t) 3018 3019 // Create some nodes 3020 var nodes []*structs.Node 3021 for i := 0; i < 10; i++ { 3022 node := mock.Node() 3023 nodes = append(nodes, node) 3024 noErr(t, h.State.UpsertNode(h.NextIndex(), node)) 3025 } 3026 3027 maxRestartAttempts := 3 3028 // Generate a fake job with allocations and an update policy. 3029 job := mock.Job() 3030 job.TaskGroups[0].Count = 2 3031 job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{ 3032 Attempts: maxRestartAttempts, 3033 Interval: 30 * time.Minute, 3034 Delay: 5 * time.Second, 3035 DelayFunction: "constant", 3036 } 3037 tgName := job.TaskGroups[0].Name 3038 now := time.Now() 3039 3040 noErr(t, h.State.UpsertJob(h.NextIndex(), job)) 3041 3042 var allocs []*structs.Allocation 3043 for i := 0; i < 2; i++ { 3044 alloc := mock.Alloc() 3045 alloc.ClientStatus = structs.AllocClientStatusRunning 3046 alloc.Job = job 3047 alloc.JobID = job.ID 3048 alloc.NodeID = nodes[i].ID 3049 alloc.Name = fmt.Sprintf("my-job.web[%d]", i) 3050 allocs = append(allocs, alloc) 3051 } 3052 // Mark one of the allocations as failed 3053 allocs[1].ClientStatus = structs.AllocClientStatusFailed 3054 allocs[1].TaskStates = map[string]*structs.TaskState{tgName: {State: "dead", 3055 StartedAt: now.Add(-1 * time.Hour), 3056 FinishedAt: now.Add(-10 * time.Second)}} 3057 3058 noErr(t, h.State.UpsertAllocs(h.NextIndex(), allocs)) 3059 3060 // Create a mock evaluation 3061 eval := &structs.Evaluation{ 3062 Namespace: structs.DefaultNamespace, 3063 ID: uuid.Generate(), 3064 Priority: 50, 3065 TriggeredBy: structs.EvalTriggerNodeUpdate, 3066 JobID: job.ID, 3067 Status: structs.EvalStatusPending, 3068 } 3069 noErr(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 3070 3071 expectedNumAllocs := 3 3072 expectedNumReschedTrackers := 1 3073 3074 failedAllocId := allocs[1].ID 3075 failedNodeID := allocs[1].NodeID 3076 3077 assert := assert.New(t) 3078 for i := 0; i < maxRestartAttempts; i++ { 3079 // Process the evaluation 3080 err := h.Process(NewServiceScheduler, eval) 3081 noErr(t, err) 3082 3083 // Ensure multiple plans 3084 if len(h.Plans) == 0 { 3085 t.Fatalf("bad: %#v", h.Plans) 3086 } 3087 3088 // Lookup the allocations by JobID 3089 ws := memdb.NewWatchSet() 3090 out, err := h.State.AllocsByJob(ws, job.Namespace, job.ID, false) 3091 noErr(t, err) 3092 3093 // Verify that a new allocation got created with its restart tracker info 3094 assert.Equal(expectedNumAllocs, len(out)) 3095 3096 // Find the new alloc with ClientStatusPending 3097 var pendingAllocs []*structs.Allocation 3098 var prevFailedAlloc *structs.Allocation 3099 3100 for _, alloc := range out { 3101 if alloc.ClientStatus == structs.AllocClientStatusPending { 3102 pendingAllocs = append(pendingAllocs, alloc) 3103 } 3104 if alloc.ID == failedAllocId { 3105 prevFailedAlloc = alloc 3106 } 3107 } 3108 assert.Equal(1, len(pendingAllocs)) 3109 newAlloc := pendingAllocs[0] 3110 assert.Equal(expectedNumReschedTrackers, len(newAlloc.RescheduleTracker.Events)) 3111 3112 // Verify the previous NodeID in the most recent reschedule event 3113 reschedEvents := newAlloc.RescheduleTracker.Events 3114 assert.Equal(failedAllocId, reschedEvents[len(reschedEvents)-1].PrevAllocID) 3115 assert.Equal(failedNodeID, reschedEvents[len(reschedEvents)-1].PrevNodeID) 3116 3117 // Verify that the next alloc of the failed alloc is the newly rescheduled alloc 3118 assert.Equal(newAlloc.ID, prevFailedAlloc.NextAllocation) 3119 3120 // Mark this alloc as failed again 3121 newAlloc.ClientStatus = structs.AllocClientStatusFailed 3122 newAlloc.TaskStates = map[string]*structs.TaskState{tgName: {State: "dead", 3123 StartedAt: now.Add(-12 * time.Second), 3124 FinishedAt: now.Add(-10 * time.Second)}} 3125 3126 failedAllocId = newAlloc.ID 3127 failedNodeID = newAlloc.NodeID 3128 3129 noErr(t, h.State.UpsertAllocs(h.NextIndex(), []*structs.Allocation{newAlloc})) 3130 3131 // Create another mock evaluation 3132 eval = &structs.Evaluation{ 3133 Namespace: structs.DefaultNamespace, 3134 ID: uuid.Generate(), 3135 Priority: 50, 3136 TriggeredBy: structs.EvalTriggerNodeUpdate, 3137 JobID: job.ID, 3138 Status: structs.EvalStatusPending, 3139 } 3140 noErr(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 3141 expectedNumAllocs += 1 3142 expectedNumReschedTrackers += 1 3143 } 3144 3145 // Process last eval again, should not reschedule 3146 err := h.Process(NewServiceScheduler, eval) 3147 assert.Nil(err) 3148 3149 // Verify no new allocs were created because restart attempts were exhausted 3150 ws := memdb.NewWatchSet() 3151 out, err := h.State.AllocsByJob(ws, job.Namespace, job.ID, false) 3152 noErr(t, err) 3153 assert.Equal(5, len(out)) // 2 original, plus 3 reschedule attempts 3154 } 3155 3156 // Tests that old reschedule attempts are pruned 3157 func TestServiceSched_Reschedule_PruneEvents(t *testing.T) { 3158 h := NewHarness(t) 3159 3160 // Create some nodes 3161 var nodes []*structs.Node 3162 for i := 0; i < 10; i++ { 3163 node := mock.Node() 3164 nodes = append(nodes, node) 3165 noErr(t, h.State.UpsertNode(h.NextIndex(), node)) 3166 } 3167 3168 // Generate a fake job with allocations and an update policy. 3169 job := mock.Job() 3170 job.TaskGroups[0].Count = 2 3171 job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{ 3172 DelayFunction: "exponential", 3173 MaxDelay: 1 * time.Hour, 3174 Delay: 5 * time.Second, 3175 Unlimited: true, 3176 } 3177 noErr(t, h.State.UpsertJob(h.NextIndex(), job)) 3178 3179 var allocs []*structs.Allocation 3180 for i := 0; i < 2; i++ { 3181 alloc := mock.Alloc() 3182 alloc.Job = job 3183 alloc.JobID = job.ID 3184 alloc.NodeID = nodes[i].ID 3185 alloc.Name = fmt.Sprintf("my-job.web[%d]", i) 3186 allocs = append(allocs, alloc) 3187 } 3188 now := time.Now() 3189 // Mark allocations as failed with restart info 3190 allocs[1].TaskStates = map[string]*structs.TaskState{job.TaskGroups[0].Name: {State: "dead", 3191 StartedAt: now.Add(-1 * time.Hour), 3192 FinishedAt: now.Add(-15 * time.Minute)}} 3193 allocs[1].ClientStatus = structs.AllocClientStatusFailed 3194 3195 allocs[1].RescheduleTracker = &structs.RescheduleTracker{ 3196 Events: []*structs.RescheduleEvent{ 3197 {RescheduleTime: now.Add(-1 * time.Hour).UTC().UnixNano(), 3198 PrevAllocID: uuid.Generate(), 3199 PrevNodeID: uuid.Generate(), 3200 Delay: 5 * time.Second, 3201 }, 3202 {RescheduleTime: now.Add(-40 * time.Minute).UTC().UnixNano(), 3203 PrevAllocID: allocs[0].ID, 3204 PrevNodeID: uuid.Generate(), 3205 Delay: 10 * time.Second, 3206 }, 3207 {RescheduleTime: now.Add(-30 * time.Minute).UTC().UnixNano(), 3208 PrevAllocID: allocs[0].ID, 3209 PrevNodeID: uuid.Generate(), 3210 Delay: 20 * time.Second, 3211 }, 3212 {RescheduleTime: now.Add(-20 * time.Minute).UTC().UnixNano(), 3213 PrevAllocID: allocs[0].ID, 3214 PrevNodeID: uuid.Generate(), 3215 Delay: 40 * time.Second, 3216 }, 3217 {RescheduleTime: now.Add(-10 * time.Minute).UTC().UnixNano(), 3218 PrevAllocID: allocs[0].ID, 3219 PrevNodeID: uuid.Generate(), 3220 Delay: 80 * time.Second, 3221 }, 3222 {RescheduleTime: now.Add(-3 * time.Minute).UTC().UnixNano(), 3223 PrevAllocID: allocs[0].ID, 3224 PrevNodeID: uuid.Generate(), 3225 Delay: 160 * time.Second, 3226 }, 3227 }, 3228 } 3229 expectedFirstRescheduleEvent := allocs[1].RescheduleTracker.Events[1] 3230 expectedDelay := 320 * time.Second 3231 failedAllocID := allocs[1].ID 3232 successAllocID := allocs[0].ID 3233 3234 noErr(t, h.State.UpsertAllocs(h.NextIndex(), allocs)) 3235 3236 // Create a mock evaluation 3237 eval := &structs.Evaluation{ 3238 Namespace: structs.DefaultNamespace, 3239 ID: uuid.Generate(), 3240 Priority: 50, 3241 TriggeredBy: structs.EvalTriggerNodeUpdate, 3242 JobID: job.ID, 3243 Status: structs.EvalStatusPending, 3244 } 3245 noErr(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 3246 3247 // Process the evaluation 3248 err := h.Process(NewServiceScheduler, eval) 3249 if err != nil { 3250 t.Fatalf("err: %v", err) 3251 } 3252 3253 // Ensure multiple plans 3254 if len(h.Plans) == 0 { 3255 t.Fatalf("bad: %#v", h.Plans) 3256 } 3257 3258 // Lookup the allocations by JobID 3259 ws := memdb.NewWatchSet() 3260 out, err := h.State.AllocsByJob(ws, job.Namespace, job.ID, false) 3261 noErr(t, err) 3262 3263 // Verify that one new allocation got created with its restart tracker info 3264 assert := assert.New(t) 3265 assert.Equal(3, len(out)) 3266 var newAlloc *structs.Allocation 3267 for _, alloc := range out { 3268 if alloc.ID != successAllocID && alloc.ID != failedAllocID { 3269 newAlloc = alloc 3270 } 3271 } 3272 3273 assert.Equal(failedAllocID, newAlloc.PreviousAllocation) 3274 // Verify that the new alloc copied the last 5 reschedule attempts 3275 assert.Equal(6, len(newAlloc.RescheduleTracker.Events)) 3276 assert.Equal(expectedFirstRescheduleEvent, newAlloc.RescheduleTracker.Events[0]) 3277 3278 mostRecentRescheduleEvent := newAlloc.RescheduleTracker.Events[5] 3279 // Verify that the failed alloc ID is in the most recent reschedule event 3280 assert.Equal(failedAllocID, mostRecentRescheduleEvent.PrevAllocID) 3281 // Verify that the delay value was captured correctly 3282 assert.Equal(expectedDelay, mostRecentRescheduleEvent.Delay) 3283 3284 } 3285 3286 // Tests that deployments with failed allocs result in placements as long as the 3287 // deployment is running. 3288 func TestDeployment_FailedAllocs_Reschedule(t *testing.T) { 3289 for _, failedDeployment := range []bool{false, true} { 3290 t.Run(fmt.Sprintf("Failed Deployment: %v", failedDeployment), func(t *testing.T) { 3291 h := NewHarness(t) 3292 require := require.New(t) 3293 // Create some nodes 3294 var nodes []*structs.Node 3295 for i := 0; i < 10; i++ { 3296 node := mock.Node() 3297 nodes = append(nodes, node) 3298 noErr(t, h.State.UpsertNode(h.NextIndex(), node)) 3299 } 3300 3301 // Generate a fake job with allocations and a reschedule policy. 3302 job := mock.Job() 3303 job.TaskGroups[0].Count = 2 3304 job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{ 3305 Attempts: 1, 3306 Interval: 15 * time.Minute, 3307 } 3308 jobIndex := h.NextIndex() 3309 require.Nil(h.State.UpsertJob(jobIndex, job)) 3310 3311 deployment := mock.Deployment() 3312 deployment.JobID = job.ID 3313 deployment.JobCreateIndex = jobIndex 3314 deployment.JobVersion = job.Version 3315 if failedDeployment { 3316 deployment.Status = structs.DeploymentStatusFailed 3317 } 3318 3319 require.Nil(h.State.UpsertDeployment(h.NextIndex(), deployment)) 3320 3321 var allocs []*structs.Allocation 3322 for i := 0; i < 2; i++ { 3323 alloc := mock.Alloc() 3324 alloc.Job = job 3325 alloc.JobID = job.ID 3326 alloc.NodeID = nodes[i].ID 3327 alloc.Name = fmt.Sprintf("my-job.web[%d]", i) 3328 alloc.DeploymentID = deployment.ID 3329 allocs = append(allocs, alloc) 3330 } 3331 // Mark one of the allocations as failed in the past 3332 allocs[1].ClientStatus = structs.AllocClientStatusFailed 3333 allocs[1].TaskStates = map[string]*structs.TaskState{"web": {State: "start", 3334 StartedAt: time.Now().Add(-12 * time.Hour), 3335 FinishedAt: time.Now().Add(-10 * time.Hour)}} 3336 allocs[1].DesiredTransition.Reschedule = helper.BoolToPtr(true) 3337 3338 require.Nil(h.State.UpsertAllocs(h.NextIndex(), allocs)) 3339 3340 // Create a mock evaluation 3341 eval := &structs.Evaluation{ 3342 Namespace: structs.DefaultNamespace, 3343 ID: uuid.Generate(), 3344 Priority: 50, 3345 TriggeredBy: structs.EvalTriggerNodeUpdate, 3346 JobID: job.ID, 3347 Status: structs.EvalStatusPending, 3348 } 3349 require.Nil(h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 3350 3351 // Process the evaluation 3352 require.Nil(h.Process(NewServiceScheduler, eval)) 3353 3354 if failedDeployment { 3355 // Verify no plan created 3356 require.Len(h.Plans, 0) 3357 } else { 3358 require.Len(h.Plans, 1) 3359 plan := h.Plans[0] 3360 3361 // Ensure the plan allocated 3362 var planned []*structs.Allocation 3363 for _, allocList := range plan.NodeAllocation { 3364 planned = append(planned, allocList...) 3365 } 3366 if len(planned) != 1 { 3367 t.Fatalf("bad: %#v", plan) 3368 } 3369 } 3370 }) 3371 } 3372 } 3373 3374 func TestBatchSched_Run_CompleteAlloc(t *testing.T) { 3375 h := NewHarness(t) 3376 3377 // Create a node 3378 node := mock.Node() 3379 noErr(t, h.State.UpsertNode(h.NextIndex(), node)) 3380 3381 // Create a job 3382 job := mock.Job() 3383 job.Type = structs.JobTypeBatch 3384 job.TaskGroups[0].Count = 1 3385 noErr(t, h.State.UpsertJob(h.NextIndex(), job)) 3386 3387 // Create a complete alloc 3388 alloc := mock.Alloc() 3389 alloc.Job = job 3390 alloc.JobID = job.ID 3391 alloc.NodeID = node.ID 3392 alloc.Name = "my-job.web[0]" 3393 alloc.ClientStatus = structs.AllocClientStatusComplete 3394 noErr(t, h.State.UpsertAllocs(h.NextIndex(), []*structs.Allocation{alloc})) 3395 3396 // Create a mock evaluation to register the job 3397 eval := &structs.Evaluation{ 3398 Namespace: structs.DefaultNamespace, 3399 ID: uuid.Generate(), 3400 Priority: job.Priority, 3401 TriggeredBy: structs.EvalTriggerJobRegister, 3402 JobID: job.ID, 3403 Status: structs.EvalStatusPending, 3404 } 3405 noErr(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 3406 3407 // Process the evaluation 3408 err := h.Process(NewBatchScheduler, eval) 3409 if err != nil { 3410 t.Fatalf("err: %v", err) 3411 } 3412 3413 // Ensure no plan as it should be a no-op 3414 if len(h.Plans) != 0 { 3415 t.Fatalf("bad: %#v", h.Plans) 3416 } 3417 3418 // Lookup the allocations by JobID 3419 ws := memdb.NewWatchSet() 3420 out, err := h.State.AllocsByJob(ws, job.Namespace, job.ID, false) 3421 noErr(t, err) 3422 3423 // Ensure no allocations placed 3424 if len(out) != 1 { 3425 t.Fatalf("bad: %#v", out) 3426 } 3427 3428 h.AssertEvalStatus(t, structs.EvalStatusComplete) 3429 } 3430 3431 func TestBatchSched_Run_FailedAlloc(t *testing.T) { 3432 h := NewHarness(t) 3433 3434 // Create a node 3435 node := mock.Node() 3436 noErr(t, h.State.UpsertNode(h.NextIndex(), node)) 3437 3438 // Create a job 3439 job := mock.Job() 3440 job.Type = structs.JobTypeBatch 3441 job.TaskGroups[0].Count = 1 3442 noErr(t, h.State.UpsertJob(h.NextIndex(), job)) 3443 3444 tgName := job.TaskGroups[0].Name 3445 now := time.Now() 3446 3447 // Create a failed alloc 3448 alloc := mock.Alloc() 3449 alloc.Job = job 3450 alloc.JobID = job.ID 3451 alloc.NodeID = node.ID 3452 alloc.Name = "my-job.web[0]" 3453 alloc.ClientStatus = structs.AllocClientStatusFailed 3454 alloc.TaskStates = map[string]*structs.TaskState{tgName: {State: "dead", 3455 StartedAt: now.Add(-1 * time.Hour), 3456 FinishedAt: now.Add(-10 * time.Second)}} 3457 noErr(t, h.State.UpsertAllocs(h.NextIndex(), []*structs.Allocation{alloc})) 3458 3459 // Create a mock evaluation to register the job 3460 eval := &structs.Evaluation{ 3461 Namespace: structs.DefaultNamespace, 3462 ID: uuid.Generate(), 3463 Priority: job.Priority, 3464 TriggeredBy: structs.EvalTriggerJobRegister, 3465 JobID: job.ID, 3466 Status: structs.EvalStatusPending, 3467 } 3468 noErr(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 3469 3470 // Process the evaluation 3471 err := h.Process(NewBatchScheduler, eval) 3472 if err != nil { 3473 t.Fatalf("err: %v", err) 3474 } 3475 3476 // Ensure a plan 3477 if len(h.Plans) != 1 { 3478 t.Fatalf("bad: %#v", h.Plans) 3479 } 3480 3481 // Lookup the allocations by JobID 3482 ws := memdb.NewWatchSet() 3483 out, err := h.State.AllocsByJob(ws, job.Namespace, job.ID, false) 3484 noErr(t, err) 3485 3486 // Ensure a replacement alloc was placed. 3487 if len(out) != 2 { 3488 t.Fatalf("bad: %#v", out) 3489 } 3490 3491 // Ensure that the scheduler is recording the correct number of queued 3492 // allocations 3493 queued := h.Evals[0].QueuedAllocations["web"] 3494 if queued != 0 { 3495 t.Fatalf("expected: %v, actual: %v", 1, queued) 3496 } 3497 3498 h.AssertEvalStatus(t, structs.EvalStatusComplete) 3499 } 3500 3501 func TestBatchSched_Run_LostAlloc(t *testing.T) { 3502 h := NewHarness(t) 3503 3504 // Create a node 3505 node := mock.Node() 3506 noErr(t, h.State.UpsertNode(h.NextIndex(), node)) 3507 3508 // Create a job 3509 job := mock.Job() 3510 job.ID = "my-job" 3511 job.Type = structs.JobTypeBatch 3512 job.TaskGroups[0].Count = 3 3513 noErr(t, h.State.UpsertJob(h.NextIndex(), job)) 3514 3515 // Desired = 3 3516 // Mark one as lost and then schedule 3517 // [(0, run, running), (1, run, running), (1, stop, lost)] 3518 3519 // Create two running allocations 3520 var allocs []*structs.Allocation 3521 for i := 0; i <= 1; i++ { 3522 alloc := mock.Alloc() 3523 alloc.Job = job 3524 alloc.JobID = job.ID 3525 alloc.NodeID = node.ID 3526 alloc.Name = fmt.Sprintf("my-job.web[%d]", i) 3527 alloc.ClientStatus = structs.AllocClientStatusRunning 3528 allocs = append(allocs, alloc) 3529 } 3530 3531 // Create a failed alloc 3532 alloc := mock.Alloc() 3533 alloc.Job = job 3534 alloc.JobID = job.ID 3535 alloc.NodeID = node.ID 3536 alloc.Name = "my-job.web[1]" 3537 alloc.DesiredStatus = structs.AllocDesiredStatusStop 3538 alloc.ClientStatus = structs.AllocClientStatusComplete 3539 allocs = append(allocs, alloc) 3540 noErr(t, h.State.UpsertAllocs(h.NextIndex(), allocs)) 3541 3542 // Create a mock evaluation to register the job 3543 eval := &structs.Evaluation{ 3544 Namespace: structs.DefaultNamespace, 3545 ID: uuid.Generate(), 3546 Priority: job.Priority, 3547 TriggeredBy: structs.EvalTriggerJobRegister, 3548 JobID: job.ID, 3549 Status: structs.EvalStatusPending, 3550 } 3551 noErr(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 3552 3553 // Process the evaluation 3554 err := h.Process(NewBatchScheduler, eval) 3555 if err != nil { 3556 t.Fatalf("err: %v", err) 3557 } 3558 3559 // Ensure a plan 3560 if len(h.Plans) != 1 { 3561 t.Fatalf("bad: %#v", h.Plans) 3562 } 3563 3564 // Lookup the allocations by JobID 3565 ws := memdb.NewWatchSet() 3566 out, err := h.State.AllocsByJob(ws, job.Namespace, job.ID, false) 3567 noErr(t, err) 3568 3569 // Ensure a replacement alloc was placed. 3570 if len(out) != 4 { 3571 t.Fatalf("bad: %#v", out) 3572 } 3573 3574 // Assert that we have the correct number of each alloc name 3575 expected := map[string]int{ 3576 "my-job.web[0]": 1, 3577 "my-job.web[1]": 2, 3578 "my-job.web[2]": 1, 3579 } 3580 actual := make(map[string]int, 3) 3581 for _, alloc := range out { 3582 actual[alloc.Name] += 1 3583 } 3584 require.Equal(t, actual, expected) 3585 3586 h.AssertEvalStatus(t, structs.EvalStatusComplete) 3587 } 3588 3589 func TestBatchSched_Run_FailedAllocQueuedAllocations(t *testing.T) { 3590 h := NewHarness(t) 3591 3592 node := mock.Node() 3593 node.Drain = true 3594 noErr(t, h.State.UpsertNode(h.NextIndex(), node)) 3595 3596 // Create a job 3597 job := mock.Job() 3598 job.Type = structs.JobTypeBatch 3599 job.TaskGroups[0].Count = 1 3600 noErr(t, h.State.UpsertJob(h.NextIndex(), job)) 3601 3602 tgName := job.TaskGroups[0].Name 3603 now := time.Now() 3604 3605 // Create a failed alloc 3606 alloc := mock.Alloc() 3607 alloc.Job = job 3608 alloc.JobID = job.ID 3609 alloc.NodeID = node.ID 3610 alloc.Name = "my-job.web[0]" 3611 alloc.ClientStatus = structs.AllocClientStatusFailed 3612 alloc.TaskStates = map[string]*structs.TaskState{tgName: {State: "dead", 3613 StartedAt: now.Add(-1 * time.Hour), 3614 FinishedAt: now.Add(-10 * time.Second)}} 3615 noErr(t, h.State.UpsertAllocs(h.NextIndex(), []*structs.Allocation{alloc})) 3616 3617 // Create a mock evaluation to register the job 3618 eval := &structs.Evaluation{ 3619 Namespace: structs.DefaultNamespace, 3620 ID: uuid.Generate(), 3621 Priority: job.Priority, 3622 TriggeredBy: structs.EvalTriggerJobRegister, 3623 JobID: job.ID, 3624 Status: structs.EvalStatusPending, 3625 } 3626 noErr(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 3627 3628 // Process the evaluation 3629 err := h.Process(NewBatchScheduler, eval) 3630 if err != nil { 3631 t.Fatalf("err: %v", err) 3632 } 3633 3634 // Ensure that the scheduler is recording the correct number of queued 3635 // allocations 3636 queued := h.Evals[0].QueuedAllocations["web"] 3637 if queued != 1 { 3638 t.Fatalf("expected: %v, actual: %v", 1, queued) 3639 } 3640 } 3641 3642 func TestBatchSched_ReRun_SuccessfullyFinishedAlloc(t *testing.T) { 3643 h := NewHarness(t) 3644 3645 // Create two nodes, one that is drained and has a successfully finished 3646 // alloc and a fresh undrained one 3647 node := mock.Node() 3648 node.Drain = true 3649 node2 := mock.Node() 3650 noErr(t, h.State.UpsertNode(h.NextIndex(), node)) 3651 noErr(t, h.State.UpsertNode(h.NextIndex(), node2)) 3652 3653 // Create a job 3654 job := mock.Job() 3655 job.Type = structs.JobTypeBatch 3656 job.TaskGroups[0].Count = 1 3657 noErr(t, h.State.UpsertJob(h.NextIndex(), job)) 3658 3659 // Create a successful alloc 3660 alloc := mock.Alloc() 3661 alloc.Job = job 3662 alloc.JobID = job.ID 3663 alloc.NodeID = node.ID 3664 alloc.Name = "my-job.web[0]" 3665 alloc.ClientStatus = structs.AllocClientStatusComplete 3666 alloc.TaskStates = map[string]*structs.TaskState{ 3667 "web": { 3668 State: structs.TaskStateDead, 3669 Events: []*structs.TaskEvent{ 3670 { 3671 Type: structs.TaskTerminated, 3672 ExitCode: 0, 3673 }, 3674 }, 3675 }, 3676 } 3677 noErr(t, h.State.UpsertAllocs(h.NextIndex(), []*structs.Allocation{alloc})) 3678 3679 // Create a mock evaluation to rerun the job 3680 eval := &structs.Evaluation{ 3681 Namespace: structs.DefaultNamespace, 3682 ID: uuid.Generate(), 3683 Priority: job.Priority, 3684 TriggeredBy: structs.EvalTriggerJobRegister, 3685 JobID: job.ID, 3686 Status: structs.EvalStatusPending, 3687 } 3688 noErr(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 3689 3690 // Process the evaluation 3691 err := h.Process(NewBatchScheduler, eval) 3692 if err != nil { 3693 t.Fatalf("err: %v", err) 3694 } 3695 3696 // Ensure no plan 3697 if len(h.Plans) != 0 { 3698 t.Fatalf("bad: %#v", h.Plans) 3699 } 3700 3701 // Lookup the allocations by JobID 3702 ws := memdb.NewWatchSet() 3703 out, err := h.State.AllocsByJob(ws, job.Namespace, job.ID, false) 3704 noErr(t, err) 3705 3706 // Ensure no replacement alloc was placed. 3707 if len(out) != 1 { 3708 t.Fatalf("bad: %#v", out) 3709 } 3710 3711 h.AssertEvalStatus(t, structs.EvalStatusComplete) 3712 } 3713 3714 // This test checks that terminal allocations that receive an in-place updated 3715 // are not added to the plan 3716 func TestBatchSched_JobModify_InPlace_Terminal(t *testing.T) { 3717 h := NewHarness(t) 3718 3719 // Create some nodes 3720 var nodes []*structs.Node 3721 for i := 0; i < 10; i++ { 3722 node := mock.Node() 3723 nodes = append(nodes, node) 3724 noErr(t, h.State.UpsertNode(h.NextIndex(), node)) 3725 } 3726 3727 // Generate a fake job with allocations 3728 job := mock.Job() 3729 job.Type = structs.JobTypeBatch 3730 noErr(t, h.State.UpsertJob(h.NextIndex(), job)) 3731 3732 var allocs []*structs.Allocation 3733 for i := 0; i < 10; i++ { 3734 alloc := mock.Alloc() 3735 alloc.Job = job 3736 alloc.JobID = job.ID 3737 alloc.NodeID = nodes[i].ID 3738 alloc.Name = fmt.Sprintf("my-job.web[%d]", i) 3739 alloc.ClientStatus = structs.AllocClientStatusComplete 3740 allocs = append(allocs, alloc) 3741 } 3742 noErr(t, h.State.UpsertAllocs(h.NextIndex(), allocs)) 3743 3744 // Create a mock evaluation to trigger the job 3745 eval := &structs.Evaluation{ 3746 Namespace: structs.DefaultNamespace, 3747 ID: uuid.Generate(), 3748 Priority: 50, 3749 TriggeredBy: structs.EvalTriggerJobRegister, 3750 JobID: job.ID, 3751 Status: structs.EvalStatusPending, 3752 } 3753 noErr(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 3754 3755 // Process the evaluation 3756 err := h.Process(NewBatchScheduler, eval) 3757 if err != nil { 3758 t.Fatalf("err: %v", err) 3759 } 3760 3761 // Ensure no plan 3762 if len(h.Plans) != 0 { 3763 t.Fatalf("bad: %#v", h.Plans[0]) 3764 } 3765 } 3766 3767 // This test ensures that terminal jobs from older versions are ignored. 3768 func TestBatchSched_JobModify_Destructive_Terminal(t *testing.T) { 3769 h := NewHarness(t) 3770 3771 // Create some nodes 3772 var nodes []*structs.Node 3773 for i := 0; i < 10; i++ { 3774 node := mock.Node() 3775 nodes = append(nodes, node) 3776 noErr(t, h.State.UpsertNode(h.NextIndex(), node)) 3777 } 3778 3779 // Generate a fake job with allocations 3780 job := mock.Job() 3781 job.Type = structs.JobTypeBatch 3782 noErr(t, h.State.UpsertJob(h.NextIndex(), job)) 3783 3784 var allocs []*structs.Allocation 3785 for i := 0; i < 10; i++ { 3786 alloc := mock.Alloc() 3787 alloc.Job = job 3788 alloc.JobID = job.ID 3789 alloc.NodeID = nodes[i].ID 3790 alloc.Name = fmt.Sprintf("my-job.web[%d]", i) 3791 alloc.ClientStatus = structs.AllocClientStatusComplete 3792 allocs = append(allocs, alloc) 3793 } 3794 noErr(t, h.State.UpsertAllocs(h.NextIndex(), allocs)) 3795 3796 // Update the job 3797 job2 := mock.Job() 3798 job2.ID = job.ID 3799 job2.Type = structs.JobTypeBatch 3800 job2.Version++ 3801 job2.TaskGroups[0].Tasks[0].Env = map[string]string{"foo": "bar"} 3802 noErr(t, h.State.UpsertJob(h.NextIndex(), job2)) 3803 3804 allocs = nil 3805 for i := 0; i < 10; i++ { 3806 alloc := mock.Alloc() 3807 alloc.Job = job2 3808 alloc.JobID = job2.ID 3809 alloc.NodeID = nodes[i].ID 3810 alloc.Name = fmt.Sprintf("my-job.web[%d]", i) 3811 alloc.ClientStatus = structs.AllocClientStatusComplete 3812 alloc.TaskStates = map[string]*structs.TaskState{ 3813 "web": { 3814 State: structs.TaskStateDead, 3815 Events: []*structs.TaskEvent{ 3816 { 3817 Type: structs.TaskTerminated, 3818 ExitCode: 0, 3819 }, 3820 }, 3821 }, 3822 } 3823 allocs = append(allocs, alloc) 3824 } 3825 noErr(t, h.State.UpsertAllocs(h.NextIndex(), allocs)) 3826 3827 // Create a mock evaluation to deal with drain 3828 eval := &structs.Evaluation{ 3829 Namespace: structs.DefaultNamespace, 3830 ID: uuid.Generate(), 3831 Priority: 50, 3832 TriggeredBy: structs.EvalTriggerJobRegister, 3833 JobID: job.ID, 3834 Status: structs.EvalStatusPending, 3835 } 3836 noErr(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 3837 3838 // Process the evaluation 3839 err := h.Process(NewBatchScheduler, eval) 3840 if err != nil { 3841 t.Fatalf("err: %v", err) 3842 } 3843 3844 // Ensure a plan 3845 if len(h.Plans) != 0 { 3846 t.Fatalf("bad: %#v", h.Plans) 3847 } 3848 } 3849 3850 // This test asserts that an allocation from an old job that is running on a 3851 // drained node is cleaned up. 3852 func TestBatchSched_NodeDrain_Running_OldJob(t *testing.T) { 3853 h := NewHarness(t) 3854 3855 // Create two nodes, one that is drained and has a successfully finished 3856 // alloc and a fresh undrained one 3857 node := mock.Node() 3858 node.Drain = true 3859 node2 := mock.Node() 3860 noErr(t, h.State.UpsertNode(h.NextIndex(), node)) 3861 noErr(t, h.State.UpsertNode(h.NextIndex(), node2)) 3862 3863 // Create a job 3864 job := mock.Job() 3865 job.Type = structs.JobTypeBatch 3866 job.TaskGroups[0].Count = 1 3867 noErr(t, h.State.UpsertJob(h.NextIndex(), job)) 3868 3869 // Create a running alloc 3870 alloc := mock.Alloc() 3871 alloc.Job = job 3872 alloc.JobID = job.ID 3873 alloc.NodeID = node.ID 3874 alloc.Name = "my-job.web[0]" 3875 alloc.ClientStatus = structs.AllocClientStatusRunning 3876 noErr(t, h.State.UpsertAllocs(h.NextIndex(), []*structs.Allocation{alloc})) 3877 3878 // Create an update job 3879 job2 := job.Copy() 3880 job2.TaskGroups[0].Tasks[0].Env = map[string]string{"foo": "bar"} 3881 job2.Version++ 3882 noErr(t, h.State.UpsertJob(h.NextIndex(), job2)) 3883 3884 // Create a mock evaluation to register the job 3885 eval := &structs.Evaluation{ 3886 Namespace: structs.DefaultNamespace, 3887 ID: uuid.Generate(), 3888 Priority: job.Priority, 3889 TriggeredBy: structs.EvalTriggerJobRegister, 3890 JobID: job.ID, 3891 Status: structs.EvalStatusPending, 3892 } 3893 3894 noErr(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 3895 3896 // Process the evaluation 3897 err := h.Process(NewBatchScheduler, eval) 3898 if err != nil { 3899 t.Fatalf("err: %v", err) 3900 } 3901 3902 // Ensure a plan 3903 if len(h.Plans) != 1 { 3904 t.Fatalf("bad: %#v", h.Plans) 3905 } 3906 3907 plan := h.Plans[0] 3908 3909 // Ensure the plan evicted 1 3910 if len(plan.NodeUpdate[node.ID]) != 1 { 3911 t.Fatalf("bad: %#v", plan) 3912 } 3913 3914 // Ensure the plan places 1 3915 if len(plan.NodeAllocation[node2.ID]) != 1 { 3916 t.Fatalf("bad: %#v", plan) 3917 } 3918 3919 h.AssertEvalStatus(t, structs.EvalStatusComplete) 3920 } 3921 3922 // This test asserts that an allocation from a job that is complete on a 3923 // drained node is ignored up. 3924 func TestBatchSched_NodeDrain_Complete(t *testing.T) { 3925 h := NewHarness(t) 3926 3927 // Create two nodes, one that is drained and has a successfully finished 3928 // alloc and a fresh undrained one 3929 node := mock.Node() 3930 node.Drain = true 3931 node2 := mock.Node() 3932 noErr(t, h.State.UpsertNode(h.NextIndex(), node)) 3933 noErr(t, h.State.UpsertNode(h.NextIndex(), node2)) 3934 3935 // Create a job 3936 job := mock.Job() 3937 job.Type = structs.JobTypeBatch 3938 job.TaskGroups[0].Count = 1 3939 noErr(t, h.State.UpsertJob(h.NextIndex(), job)) 3940 3941 // Create a complete alloc 3942 alloc := mock.Alloc() 3943 alloc.Job = job 3944 alloc.JobID = job.ID 3945 alloc.NodeID = node.ID 3946 alloc.Name = "my-job.web[0]" 3947 alloc.ClientStatus = structs.AllocClientStatusComplete 3948 alloc.TaskStates = make(map[string]*structs.TaskState) 3949 alloc.TaskStates["web"] = &structs.TaskState{ 3950 State: structs.TaskStateDead, 3951 Events: []*structs.TaskEvent{ 3952 { 3953 Type: structs.TaskTerminated, 3954 ExitCode: 0, 3955 }, 3956 }, 3957 } 3958 noErr(t, h.State.UpsertAllocs(h.NextIndex(), []*structs.Allocation{alloc})) 3959 3960 // Create a mock evaluation to register the job 3961 eval := &structs.Evaluation{ 3962 Namespace: structs.DefaultNamespace, 3963 ID: uuid.Generate(), 3964 Priority: job.Priority, 3965 TriggeredBy: structs.EvalTriggerJobRegister, 3966 JobID: job.ID, 3967 Status: structs.EvalStatusPending, 3968 } 3969 3970 noErr(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 3971 3972 // Process the evaluation 3973 err := h.Process(NewBatchScheduler, eval) 3974 if err != nil { 3975 t.Fatalf("err: %v", err) 3976 } 3977 3978 // Ensure no plan 3979 if len(h.Plans) != 0 { 3980 t.Fatalf("bad: %#v", h.Plans) 3981 } 3982 3983 h.AssertEvalStatus(t, structs.EvalStatusComplete) 3984 } 3985 3986 // This is a slightly odd test but it ensures that we handle a scale down of a 3987 // task group's count and that it works even if all the allocs have the same 3988 // name. 3989 func TestBatchSched_ScaleDown_SameName(t *testing.T) { 3990 h := NewHarness(t) 3991 3992 // Create a node 3993 node := mock.Node() 3994 noErr(t, h.State.UpsertNode(h.NextIndex(), node)) 3995 3996 // Create a job 3997 job := mock.Job() 3998 job.Type = structs.JobTypeBatch 3999 job.TaskGroups[0].Count = 1 4000 noErr(t, h.State.UpsertJob(h.NextIndex(), job)) 4001 4002 // Create a few running alloc 4003 var allocs []*structs.Allocation 4004 for i := 0; i < 5; i++ { 4005 alloc := mock.Alloc() 4006 alloc.Job = job 4007 alloc.JobID = job.ID 4008 alloc.NodeID = node.ID 4009 alloc.Name = "my-job.web[0]" 4010 alloc.ClientStatus = structs.AllocClientStatusRunning 4011 allocs = append(allocs, alloc) 4012 } 4013 noErr(t, h.State.UpsertAllocs(h.NextIndex(), allocs)) 4014 4015 // Create a mock evaluation to register the job 4016 eval := &structs.Evaluation{ 4017 Namespace: structs.DefaultNamespace, 4018 ID: uuid.Generate(), 4019 Priority: job.Priority, 4020 TriggeredBy: structs.EvalTriggerJobRegister, 4021 JobID: job.ID, 4022 Status: structs.EvalStatusPending, 4023 } 4024 4025 noErr(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 4026 4027 // Process the evaluation 4028 err := h.Process(NewBatchScheduler, eval) 4029 if err != nil { 4030 t.Fatalf("err: %v", err) 4031 } 4032 4033 // Ensure a plan 4034 if len(h.Plans) != 1 { 4035 t.Fatalf("bad: %#v", h.Plans) 4036 } 4037 4038 plan := h.Plans[0] 4039 4040 // Ensure the plan evicted 4 of the 5 4041 if len(plan.NodeUpdate[node.ID]) != 4 { 4042 t.Fatalf("bad: %#v", plan) 4043 } 4044 4045 h.AssertEvalStatus(t, structs.EvalStatusComplete) 4046 } 4047 4048 func TestGenericSched_ChainedAlloc(t *testing.T) { 4049 h := NewHarness(t) 4050 4051 // Create some nodes 4052 for i := 0; i < 10; i++ { 4053 node := mock.Node() 4054 noErr(t, h.State.UpsertNode(h.NextIndex(), node)) 4055 } 4056 4057 // Create a job 4058 job := mock.Job() 4059 noErr(t, h.State.UpsertJob(h.NextIndex(), job)) 4060 4061 // Create a mock evaluation to register the job 4062 eval := &structs.Evaluation{ 4063 Namespace: structs.DefaultNamespace, 4064 ID: uuid.Generate(), 4065 Priority: job.Priority, 4066 TriggeredBy: structs.EvalTriggerJobRegister, 4067 JobID: job.ID, 4068 Status: structs.EvalStatusPending, 4069 } 4070 noErr(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 4071 // Process the evaluation 4072 if err := h.Process(NewServiceScheduler, eval); err != nil { 4073 t.Fatalf("err: %v", err) 4074 } 4075 4076 var allocIDs []string 4077 for _, allocList := range h.Plans[0].NodeAllocation { 4078 for _, alloc := range allocList { 4079 allocIDs = append(allocIDs, alloc.ID) 4080 } 4081 } 4082 sort.Strings(allocIDs) 4083 4084 // Create a new harness to invoke the scheduler again 4085 h1 := NewHarnessWithState(t, h.State) 4086 job1 := mock.Job() 4087 job1.ID = job.ID 4088 job1.TaskGroups[0].Tasks[0].Env["foo"] = "bar" 4089 job1.TaskGroups[0].Count = 12 4090 noErr(t, h1.State.UpsertJob(h1.NextIndex(), job1)) 4091 4092 // Create a mock evaluation to update the job 4093 eval1 := &structs.Evaluation{ 4094 Namespace: structs.DefaultNamespace, 4095 ID: uuid.Generate(), 4096 Priority: job1.Priority, 4097 TriggeredBy: structs.EvalTriggerJobRegister, 4098 JobID: job1.ID, 4099 Status: structs.EvalStatusPending, 4100 } 4101 noErr(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval1})) 4102 4103 // Process the evaluation 4104 if err := h1.Process(NewServiceScheduler, eval1); err != nil { 4105 t.Fatalf("err: %v", err) 4106 } 4107 4108 plan := h1.Plans[0] 4109 4110 // Collect all the chained allocation ids and the new allocations which 4111 // don't have any chained allocations 4112 var prevAllocs []string 4113 var newAllocs []string 4114 for _, allocList := range plan.NodeAllocation { 4115 for _, alloc := range allocList { 4116 if alloc.PreviousAllocation == "" { 4117 newAllocs = append(newAllocs, alloc.ID) 4118 continue 4119 } 4120 prevAllocs = append(prevAllocs, alloc.PreviousAllocation) 4121 } 4122 } 4123 sort.Strings(prevAllocs) 4124 4125 // Ensure that the new allocations has their corresponding original 4126 // allocation ids 4127 if !reflect.DeepEqual(prevAllocs, allocIDs) { 4128 t.Fatalf("expected: %v, actual: %v", len(allocIDs), len(prevAllocs)) 4129 } 4130 4131 // Ensuring two new allocations don't have any chained allocations 4132 if len(newAllocs) != 2 { 4133 t.Fatalf("expected: %v, actual: %v", 2, len(newAllocs)) 4134 } 4135 } 4136 4137 func TestServiceSched_NodeDrain_Sticky(t *testing.T) { 4138 h := NewHarness(t) 4139 4140 // Register a draining node 4141 node := mock.Node() 4142 node.Drain = true 4143 noErr(t, h.State.UpsertNode(h.NextIndex(), node)) 4144 4145 // Create an alloc on the draining node 4146 alloc := mock.Alloc() 4147 alloc.Name = "my-job.web[0]" 4148 alloc.NodeID = node.ID 4149 alloc.Job.TaskGroups[0].Count = 1 4150 alloc.Job.TaskGroups[0].EphemeralDisk.Sticky = true 4151 alloc.DesiredTransition.Migrate = helper.BoolToPtr(true) 4152 noErr(t, h.State.UpsertJob(h.NextIndex(), alloc.Job)) 4153 noErr(t, h.State.UpsertAllocs(h.NextIndex(), []*structs.Allocation{alloc})) 4154 4155 // Create a mock evaluation to deal with drain 4156 eval := &structs.Evaluation{ 4157 Namespace: structs.DefaultNamespace, 4158 ID: uuid.Generate(), 4159 Priority: 50, 4160 TriggeredBy: structs.EvalTriggerNodeUpdate, 4161 JobID: alloc.Job.ID, 4162 NodeID: node.ID, 4163 Status: structs.EvalStatusPending, 4164 } 4165 4166 noErr(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 4167 4168 // Process the evaluation 4169 err := h.Process(NewServiceScheduler, eval) 4170 if err != nil { 4171 t.Fatalf("err: %v", err) 4172 } 4173 4174 // Ensure a single plan 4175 if len(h.Plans) != 1 { 4176 t.Fatalf("bad: %#v", h.Plans) 4177 } 4178 plan := h.Plans[0] 4179 4180 // Ensure the plan evicted all allocs 4181 if len(plan.NodeUpdate[node.ID]) != 1 { 4182 t.Fatalf("bad: %#v", plan) 4183 } 4184 4185 // Ensure the plan didn't create any new allocations 4186 var planned []*structs.Allocation 4187 for _, allocList := range plan.NodeAllocation { 4188 planned = append(planned, allocList...) 4189 } 4190 if len(planned) != 0 { 4191 t.Fatalf("bad: %#v", plan) 4192 } 4193 4194 h.AssertEvalStatus(t, structs.EvalStatusComplete) 4195 } 4196 4197 // This test ensures that when a job is stopped, the scheduler properly cancels 4198 // an outstanding deployment. 4199 func TestServiceSched_CancelDeployment_Stopped(t *testing.T) { 4200 h := NewHarness(t) 4201 4202 // Generate a fake job 4203 job := mock.Job() 4204 job.JobModifyIndex = job.CreateIndex + 1 4205 job.ModifyIndex = job.CreateIndex + 1 4206 job.Stop = true 4207 noErr(t, h.State.UpsertJob(h.NextIndex(), job)) 4208 4209 // Create a deployment 4210 d := mock.Deployment() 4211 d.JobID = job.ID 4212 d.JobCreateIndex = job.CreateIndex 4213 d.JobModifyIndex = job.JobModifyIndex - 1 4214 noErr(t, h.State.UpsertDeployment(h.NextIndex(), d)) 4215 4216 // Create a mock evaluation to deregister the job 4217 eval := &structs.Evaluation{ 4218 Namespace: structs.DefaultNamespace, 4219 ID: uuid.Generate(), 4220 Priority: 50, 4221 TriggeredBy: structs.EvalTriggerJobDeregister, 4222 JobID: job.ID, 4223 Status: structs.EvalStatusPending, 4224 } 4225 4226 noErr(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 4227 4228 // Process the evaluation 4229 err := h.Process(NewServiceScheduler, eval) 4230 if err != nil { 4231 t.Fatalf("err: %v", err) 4232 } 4233 4234 // Ensure a single plan 4235 if len(h.Plans) != 1 { 4236 t.Fatalf("bad: %#v", h.Plans) 4237 } 4238 plan := h.Plans[0] 4239 4240 // Ensure the plan cancelled the existing deployment 4241 ws := memdb.NewWatchSet() 4242 out, err := h.State.LatestDeploymentByJobID(ws, job.Namespace, job.ID) 4243 noErr(t, err) 4244 4245 if out == nil { 4246 t.Fatalf("No deployment for job") 4247 } 4248 if out.ID != d.ID { 4249 t.Fatalf("Latest deployment for job is different than original deployment") 4250 } 4251 if out.Status != structs.DeploymentStatusCancelled { 4252 t.Fatalf("Deployment status is %q, want %q", out.Status, structs.DeploymentStatusCancelled) 4253 } 4254 if out.StatusDescription != structs.DeploymentStatusDescriptionStoppedJob { 4255 t.Fatalf("Deployment status description is %q, want %q", 4256 out.StatusDescription, structs.DeploymentStatusDescriptionStoppedJob) 4257 } 4258 4259 // Ensure the plan didn't allocate anything 4260 var planned []*structs.Allocation 4261 for _, allocList := range plan.NodeAllocation { 4262 planned = append(planned, allocList...) 4263 } 4264 if len(planned) != 0 { 4265 t.Fatalf("bad: %#v", plan) 4266 } 4267 4268 h.AssertEvalStatus(t, structs.EvalStatusComplete) 4269 } 4270 4271 // This test ensures that when a job is updated and had an old deployment, the scheduler properly cancels 4272 // the deployment. 4273 func TestServiceSched_CancelDeployment_NewerJob(t *testing.T) { 4274 h := NewHarness(t) 4275 4276 // Generate a fake job 4277 job := mock.Job() 4278 noErr(t, h.State.UpsertJob(h.NextIndex(), job)) 4279 4280 // Create a deployment for an old version of the job 4281 d := mock.Deployment() 4282 d.JobID = job.ID 4283 noErr(t, h.State.UpsertDeployment(h.NextIndex(), d)) 4284 4285 // Upsert again to bump job version 4286 noErr(t, h.State.UpsertJob(h.NextIndex(), job)) 4287 4288 // Create a mock evaluation to kick the job 4289 eval := &structs.Evaluation{ 4290 Namespace: structs.DefaultNamespace, 4291 ID: uuid.Generate(), 4292 Priority: 50, 4293 TriggeredBy: structs.EvalTriggerJobRegister, 4294 JobID: job.ID, 4295 Status: structs.EvalStatusPending, 4296 } 4297 4298 noErr(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 4299 4300 // Process the evaluation 4301 err := h.Process(NewServiceScheduler, eval) 4302 if err != nil { 4303 t.Fatalf("err: %v", err) 4304 } 4305 4306 // Ensure a single plan 4307 if len(h.Plans) != 1 { 4308 t.Fatalf("bad: %#v", h.Plans) 4309 } 4310 plan := h.Plans[0] 4311 4312 // Ensure the plan cancelled the existing deployment 4313 ws := memdb.NewWatchSet() 4314 out, err := h.State.LatestDeploymentByJobID(ws, job.Namespace, job.ID) 4315 noErr(t, err) 4316 4317 if out == nil { 4318 t.Fatalf("No deployment for job") 4319 } 4320 if out.ID != d.ID { 4321 t.Fatalf("Latest deployment for job is different than original deployment") 4322 } 4323 if out.Status != structs.DeploymentStatusCancelled { 4324 t.Fatalf("Deployment status is %q, want %q", out.Status, structs.DeploymentStatusCancelled) 4325 } 4326 if out.StatusDescription != structs.DeploymentStatusDescriptionNewerJob { 4327 t.Fatalf("Deployment status description is %q, want %q", 4328 out.StatusDescription, structs.DeploymentStatusDescriptionNewerJob) 4329 } 4330 // Ensure the plan didn't allocate anything 4331 var planned []*structs.Allocation 4332 for _, allocList := range plan.NodeAllocation { 4333 planned = append(planned, allocList...) 4334 } 4335 if len(planned) != 0 { 4336 t.Fatalf("bad: %#v", plan) 4337 } 4338 4339 h.AssertEvalStatus(t, structs.EvalStatusComplete) 4340 } 4341 4342 // Various table driven tests for carry forward 4343 // of past reschedule events 4344 func Test_updateRescheduleTracker(t *testing.T) { 4345 4346 t1 := time.Now().UTC() 4347 alloc := mock.Alloc() 4348 prevAlloc := mock.Alloc() 4349 4350 type testCase struct { 4351 desc string 4352 prevAllocEvents []*structs.RescheduleEvent 4353 reschedPolicy *structs.ReschedulePolicy 4354 expectedRescheduleEvents []*structs.RescheduleEvent 4355 reschedTime time.Time 4356 } 4357 4358 testCases := []testCase{ 4359 { 4360 desc: "No past events", 4361 prevAllocEvents: nil, 4362 reschedPolicy: &structs.ReschedulePolicy{Unlimited: false, Interval: 24 * time.Hour, Attempts: 2, Delay: 5 * time.Second}, 4363 reschedTime: t1, 4364 expectedRescheduleEvents: []*structs.RescheduleEvent{ 4365 { 4366 RescheduleTime: t1.UnixNano(), 4367 PrevAllocID: prevAlloc.ID, 4368 PrevNodeID: prevAlloc.NodeID, 4369 Delay: 5 * time.Second, 4370 }, 4371 }, 4372 }, 4373 { 4374 desc: "one past event, linear delay", 4375 prevAllocEvents: []*structs.RescheduleEvent{ 4376 {RescheduleTime: t1.Add(-1 * time.Hour).UnixNano(), 4377 PrevAllocID: prevAlloc.ID, 4378 PrevNodeID: prevAlloc.NodeID, 4379 Delay: 5 * time.Second}}, 4380 reschedPolicy: &structs.ReschedulePolicy{Unlimited: false, Interval: 24 * time.Hour, Attempts: 2, Delay: 5 * time.Second}, 4381 reschedTime: t1, 4382 expectedRescheduleEvents: []*structs.RescheduleEvent{ 4383 { 4384 RescheduleTime: t1.Add(-1 * time.Hour).UnixNano(), 4385 PrevAllocID: prevAlloc.ID, 4386 PrevNodeID: prevAlloc.NodeID, 4387 Delay: 5 * time.Second, 4388 }, 4389 { 4390 RescheduleTime: t1.UnixNano(), 4391 PrevAllocID: prevAlloc.ID, 4392 PrevNodeID: prevAlloc.NodeID, 4393 Delay: 5 * time.Second, 4394 }, 4395 }, 4396 }, 4397 { 4398 desc: "one past event, fibonacci delay", 4399 prevAllocEvents: []*structs.RescheduleEvent{ 4400 {RescheduleTime: t1.Add(-1 * time.Hour).UnixNano(), 4401 PrevAllocID: prevAlloc.ID, 4402 PrevNodeID: prevAlloc.NodeID, 4403 Delay: 5 * time.Second}}, 4404 reschedPolicy: &structs.ReschedulePolicy{Unlimited: false, Interval: 24 * time.Hour, Attempts: 2, Delay: 5 * time.Second, DelayFunction: "fibonacci", MaxDelay: 60 * time.Second}, 4405 reschedTime: t1, 4406 expectedRescheduleEvents: []*structs.RescheduleEvent{ 4407 { 4408 RescheduleTime: t1.Add(-1 * time.Hour).UnixNano(), 4409 PrevAllocID: prevAlloc.ID, 4410 PrevNodeID: prevAlloc.NodeID, 4411 Delay: 5 * time.Second, 4412 }, 4413 { 4414 RescheduleTime: t1.UnixNano(), 4415 PrevAllocID: prevAlloc.ID, 4416 PrevNodeID: prevAlloc.NodeID, 4417 Delay: 5 * time.Second, 4418 }, 4419 }, 4420 }, 4421 { 4422 desc: "eight past events, fibonacci delay, unlimited", 4423 prevAllocEvents: []*structs.RescheduleEvent{ 4424 { 4425 RescheduleTime: t1.Add(-1 * time.Hour).UnixNano(), 4426 PrevAllocID: prevAlloc.ID, 4427 PrevNodeID: prevAlloc.NodeID, 4428 Delay: 5 * time.Second, 4429 }, 4430 { 4431 RescheduleTime: t1.Add(-1 * time.Hour).UnixNano(), 4432 PrevAllocID: prevAlloc.ID, 4433 PrevNodeID: prevAlloc.NodeID, 4434 Delay: 5 * time.Second, 4435 }, 4436 { 4437 RescheduleTime: t1.Add(-1 * time.Hour).UnixNano(), 4438 PrevAllocID: prevAlloc.ID, 4439 PrevNodeID: prevAlloc.NodeID, 4440 Delay: 10 * time.Second, 4441 }, 4442 { 4443 RescheduleTime: t1.Add(-1 * time.Hour).UnixNano(), 4444 PrevAllocID: prevAlloc.ID, 4445 PrevNodeID: prevAlloc.NodeID, 4446 Delay: 15 * time.Second, 4447 }, 4448 { 4449 RescheduleTime: t1.Add(-1 * time.Hour).UnixNano(), 4450 PrevAllocID: prevAlloc.ID, 4451 PrevNodeID: prevAlloc.NodeID, 4452 Delay: 25 * time.Second, 4453 }, 4454 { 4455 RescheduleTime: t1.Add(-1 * time.Hour).UnixNano(), 4456 PrevAllocID: prevAlloc.ID, 4457 PrevNodeID: prevAlloc.NodeID, 4458 Delay: 40 * time.Second, 4459 }, 4460 { 4461 RescheduleTime: t1.Add(-1 * time.Hour).UnixNano(), 4462 PrevAllocID: prevAlloc.ID, 4463 PrevNodeID: prevAlloc.NodeID, 4464 Delay: 65 * time.Second, 4465 }, 4466 { 4467 RescheduleTime: t1.Add(-1 * time.Hour).UnixNano(), 4468 PrevAllocID: prevAlloc.ID, 4469 PrevNodeID: prevAlloc.NodeID, 4470 Delay: 105 * time.Second, 4471 }, 4472 }, 4473 reschedPolicy: &structs.ReschedulePolicy{Unlimited: true, Delay: 5 * time.Second, DelayFunction: "fibonacci", MaxDelay: 240 * time.Second}, 4474 reschedTime: t1, 4475 expectedRescheduleEvents: []*structs.RescheduleEvent{ 4476 { 4477 RescheduleTime: t1.Add(-1 * time.Hour).UnixNano(), 4478 PrevAllocID: prevAlloc.ID, 4479 PrevNodeID: prevAlloc.NodeID, 4480 Delay: 15 * time.Second, 4481 }, 4482 { 4483 RescheduleTime: t1.Add(-1 * time.Hour).UnixNano(), 4484 PrevAllocID: prevAlloc.ID, 4485 PrevNodeID: prevAlloc.NodeID, 4486 Delay: 25 * time.Second, 4487 }, 4488 { 4489 RescheduleTime: t1.Add(-1 * time.Hour).UnixNano(), 4490 PrevAllocID: prevAlloc.ID, 4491 PrevNodeID: prevAlloc.NodeID, 4492 Delay: 40 * time.Second, 4493 }, 4494 { 4495 RescheduleTime: t1.Add(-1 * time.Hour).UnixNano(), 4496 PrevAllocID: prevAlloc.ID, 4497 PrevNodeID: prevAlloc.NodeID, 4498 Delay: 65 * time.Second, 4499 }, 4500 { 4501 RescheduleTime: t1.Add(-1 * time.Hour).UnixNano(), 4502 PrevAllocID: prevAlloc.ID, 4503 PrevNodeID: prevAlloc.NodeID, 4504 Delay: 105 * time.Second, 4505 }, 4506 { 4507 RescheduleTime: t1.UnixNano(), 4508 PrevAllocID: prevAlloc.ID, 4509 PrevNodeID: prevAlloc.NodeID, 4510 Delay: 170 * time.Second, 4511 }, 4512 }, 4513 }, 4514 { 4515 desc: " old attempts past interval, exponential delay, limited", 4516 prevAllocEvents: []*structs.RescheduleEvent{ 4517 { 4518 RescheduleTime: t1.Add(-2 * time.Hour).UnixNano(), 4519 PrevAllocID: prevAlloc.ID, 4520 PrevNodeID: prevAlloc.NodeID, 4521 Delay: 5 * time.Second, 4522 }, 4523 { 4524 RescheduleTime: t1.Add(-70 * time.Minute).UnixNano(), 4525 PrevAllocID: prevAlloc.ID, 4526 PrevNodeID: prevAlloc.NodeID, 4527 Delay: 10 * time.Second, 4528 }, 4529 { 4530 RescheduleTime: t1.Add(-30 * time.Minute).UnixNano(), 4531 PrevAllocID: prevAlloc.ID, 4532 PrevNodeID: prevAlloc.NodeID, 4533 Delay: 20 * time.Second, 4534 }, 4535 { 4536 RescheduleTime: t1.Add(-10 * time.Minute).UnixNano(), 4537 PrevAllocID: prevAlloc.ID, 4538 PrevNodeID: prevAlloc.NodeID, 4539 Delay: 40 * time.Second, 4540 }, 4541 }, 4542 reschedPolicy: &structs.ReschedulePolicy{Unlimited: false, Interval: 1 * time.Hour, Attempts: 5, Delay: 5 * time.Second, DelayFunction: "exponential", MaxDelay: 240 * time.Second}, 4543 reschedTime: t1, 4544 expectedRescheduleEvents: []*structs.RescheduleEvent{ 4545 { 4546 RescheduleTime: t1.Add(-30 * time.Minute).UnixNano(), 4547 PrevAllocID: prevAlloc.ID, 4548 PrevNodeID: prevAlloc.NodeID, 4549 Delay: 20 * time.Second, 4550 }, 4551 { 4552 RescheduleTime: t1.Add(-10 * time.Minute).UnixNano(), 4553 PrevAllocID: prevAlloc.ID, 4554 PrevNodeID: prevAlloc.NodeID, 4555 Delay: 40 * time.Second, 4556 }, 4557 { 4558 RescheduleTime: t1.UnixNano(), 4559 PrevAllocID: prevAlloc.ID, 4560 PrevNodeID: prevAlloc.NodeID, 4561 Delay: 80 * time.Second, 4562 }, 4563 }, 4564 }, 4565 } 4566 4567 for _, tc := range testCases { 4568 t.Run(tc.desc, func(t *testing.T) { 4569 require := require.New(t) 4570 prevAlloc.RescheduleTracker = &structs.RescheduleTracker{Events: tc.prevAllocEvents} 4571 prevAlloc.Job.LookupTaskGroup(prevAlloc.TaskGroup).ReschedulePolicy = tc.reschedPolicy 4572 updateRescheduleTracker(alloc, prevAlloc, tc.reschedTime) 4573 require.Equal(tc.expectedRescheduleEvents, alloc.RescheduleTracker.Events) 4574 }) 4575 } 4576 4577 }