github.com/aminovpavel/nomad@v0.11.8/scheduler/generic_sched_test.go (about) 1 package scheduler 2 3 import ( 4 "fmt" 5 "reflect" 6 "sort" 7 "testing" 8 "time" 9 10 memdb "github.com/hashicorp/go-memdb" 11 "github.com/hashicorp/nomad/helper" 12 "github.com/hashicorp/nomad/helper/uuid" 13 "github.com/hashicorp/nomad/nomad/mock" 14 "github.com/hashicorp/nomad/nomad/structs" 15 "github.com/hashicorp/nomad/testutil" 16 "github.com/stretchr/testify/assert" 17 "github.com/stretchr/testify/require" 18 ) 19 20 func TestServiceSched_JobRegister(t *testing.T) { 21 h := NewHarness(t) 22 23 // Create some nodes 24 for i := 0; i < 10; i++ { 25 node := mock.Node() 26 require.NoError(t, h.State.UpsertNode(h.NextIndex(), node)) 27 } 28 29 // Create a job 30 job := mock.Job() 31 require.NoError(t, h.State.UpsertJob(h.NextIndex(), job)) 32 33 // Create a mock evaluation to register the job 34 eval := &structs.Evaluation{ 35 Namespace: structs.DefaultNamespace, 36 ID: uuid.Generate(), 37 Priority: job.Priority, 38 TriggeredBy: structs.EvalTriggerJobRegister, 39 JobID: job.ID, 40 Status: structs.EvalStatusPending, 41 } 42 43 require.NoError(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 44 45 // Process the evaluation 46 err := h.Process(NewServiceScheduler, eval) 47 if err != nil { 48 t.Fatalf("err: %v", err) 49 } 50 51 // Ensure a single plan 52 if len(h.Plans) != 1 { 53 t.Fatalf("bad: %#v", h.Plans) 54 } 55 plan := h.Plans[0] 56 57 // Ensure the plan doesn't have annotations. 58 if plan.Annotations != nil { 59 t.Fatalf("expected no annotations") 60 } 61 62 // Ensure the eval has no spawned blocked eval 63 if len(h.CreateEvals) != 0 { 64 t.Fatalf("bad: %#v", h.CreateEvals) 65 if h.Evals[0].BlockedEval != "" { 66 t.Fatalf("bad: %#v", h.Evals[0]) 67 } 68 } 69 70 // Ensure the plan allocated 71 var planned []*structs.Allocation 72 for _, allocList := range plan.NodeAllocation { 73 planned = append(planned, allocList...) 74 } 75 if len(planned) != 10 { 76 t.Fatalf("bad: %#v", plan) 77 } 78 79 // Lookup the allocations by JobID 80 ws := memdb.NewWatchSet() 81 out, err := h.State.AllocsByJob(ws, job.Namespace, job.ID, false) 82 require.NoError(t, err) 83 84 // Ensure all allocations placed 85 if len(out) != 10 { 86 t.Fatalf("bad: %#v", out) 87 } 88 89 // Ensure different ports were used. 90 used := make(map[int]map[string]struct{}) 91 for _, alloc := range out { 92 for _, resource := range alloc.TaskResources { 93 for _, port := range resource.Networks[0].DynamicPorts { 94 nodeMap, ok := used[port.Value] 95 if !ok { 96 nodeMap = make(map[string]struct{}) 97 used[port.Value] = nodeMap 98 } 99 if _, ok := nodeMap[alloc.NodeID]; ok { 100 t.Fatalf("Port collision on node %q %v", alloc.NodeID, port.Value) 101 } 102 nodeMap[alloc.NodeID] = struct{}{} 103 } 104 } 105 } 106 107 h.AssertEvalStatus(t, structs.EvalStatusComplete) 108 } 109 110 func TestServiceSched_JobRegister_StickyAllocs(t *testing.T) { 111 h := NewHarness(t) 112 113 // Create some nodes 114 for i := 0; i < 10; i++ { 115 node := mock.Node() 116 require.NoError(t, h.State.UpsertNode(h.NextIndex(), node)) 117 } 118 119 // Create a job 120 job := mock.Job() 121 job.TaskGroups[0].EphemeralDisk.Sticky = true 122 require.NoError(t, h.State.UpsertJob(h.NextIndex(), job)) 123 124 // Create a mock evaluation to register the job 125 eval := &structs.Evaluation{ 126 Namespace: structs.DefaultNamespace, 127 ID: uuid.Generate(), 128 Priority: job.Priority, 129 TriggeredBy: structs.EvalTriggerJobRegister, 130 JobID: job.ID, 131 Status: structs.EvalStatusPending, 132 } 133 require.NoError(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 134 135 // Process the evaluation 136 if err := h.Process(NewServiceScheduler, eval); err != nil { 137 t.Fatalf("err: %v", err) 138 } 139 140 // Ensure the plan allocated 141 plan := h.Plans[0] 142 planned := make(map[string]*structs.Allocation) 143 for _, allocList := range plan.NodeAllocation { 144 for _, alloc := range allocList { 145 planned[alloc.ID] = alloc 146 } 147 } 148 if len(planned) != 10 { 149 t.Fatalf("bad: %#v", plan) 150 } 151 152 // Update the job to force a rolling upgrade 153 updated := job.Copy() 154 updated.TaskGroups[0].Tasks[0].Resources.CPU += 10 155 require.NoError(t, h.State.UpsertJob(h.NextIndex(), updated)) 156 157 // Create a mock evaluation to handle the update 158 eval = &structs.Evaluation{ 159 Namespace: structs.DefaultNamespace, 160 ID: uuid.Generate(), 161 Priority: job.Priority, 162 TriggeredBy: structs.EvalTriggerNodeUpdate, 163 JobID: job.ID, 164 Status: structs.EvalStatusPending, 165 } 166 require.NoError(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 167 h1 := NewHarnessWithState(t, h.State) 168 if err := h1.Process(NewServiceScheduler, eval); err != nil { 169 t.Fatalf("err: %v", err) 170 } 171 172 // Ensure we have created only one new allocation 173 // Ensure a single plan 174 if len(h1.Plans) != 1 { 175 t.Fatalf("bad: %#v", h1.Plans) 176 } 177 plan = h1.Plans[0] 178 var newPlanned []*structs.Allocation 179 for _, allocList := range plan.NodeAllocation { 180 newPlanned = append(newPlanned, allocList...) 181 } 182 if len(newPlanned) != 10 { 183 t.Fatalf("bad plan: %#v", plan) 184 } 185 // Ensure that the new allocations were placed on the same node as the older 186 // ones 187 for _, new := range newPlanned { 188 if new.PreviousAllocation == "" { 189 t.Fatalf("new alloc %q doesn't have a previous allocation", new.ID) 190 } 191 192 old, ok := planned[new.PreviousAllocation] 193 if !ok { 194 t.Fatalf("new alloc %q previous allocation doesn't match any prior placed alloc (%q)", new.ID, new.PreviousAllocation) 195 } 196 if new.NodeID != old.NodeID { 197 t.Fatalf("new alloc and old alloc node doesn't match; got %q; want %q", new.NodeID, old.NodeID) 198 } 199 } 200 } 201 202 func TestServiceSched_JobRegister_DiskConstraints(t *testing.T) { 203 h := NewHarness(t) 204 205 // Create a node 206 node := mock.Node() 207 require.NoError(t, h.State.UpsertNode(h.NextIndex(), node)) 208 209 // Create a job with count 2 and disk as 60GB so that only one allocation 210 // can fit 211 job := mock.Job() 212 job.TaskGroups[0].Count = 2 213 job.TaskGroups[0].EphemeralDisk.SizeMB = 88 * 1024 214 require.NoError(t, h.State.UpsertJob(h.NextIndex(), job)) 215 216 // Create a mock evaluation to register the job 217 eval := &structs.Evaluation{ 218 Namespace: structs.DefaultNamespace, 219 ID: uuid.Generate(), 220 Priority: job.Priority, 221 TriggeredBy: structs.EvalTriggerJobRegister, 222 JobID: job.ID, 223 Status: structs.EvalStatusPending, 224 } 225 226 require.NoError(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 227 228 // Process the evaluation 229 err := h.Process(NewServiceScheduler, eval) 230 if err != nil { 231 t.Fatalf("err: %v", err) 232 } 233 234 // Ensure a single plan 235 if len(h.Plans) != 1 { 236 t.Fatalf("bad: %#v", h.Plans) 237 } 238 plan := h.Plans[0] 239 240 // Ensure the plan doesn't have annotations. 241 if plan.Annotations != nil { 242 t.Fatalf("expected no annotations") 243 } 244 245 // Ensure the eval has a blocked eval 246 if len(h.CreateEvals) != 1 { 247 t.Fatalf("bad: %#v", h.CreateEvals) 248 } 249 250 if h.CreateEvals[0].TriggeredBy != structs.EvalTriggerQueuedAllocs { 251 t.Fatalf("bad: %#v", h.CreateEvals[0]) 252 } 253 254 // Ensure the plan allocated only one allocation 255 var planned []*structs.Allocation 256 for _, allocList := range plan.NodeAllocation { 257 planned = append(planned, allocList...) 258 } 259 if len(planned) != 1 { 260 t.Fatalf("bad: %#v", plan) 261 } 262 263 // Lookup the allocations by JobID 264 ws := memdb.NewWatchSet() 265 out, err := h.State.AllocsByJob(ws, job.Namespace, job.ID, false) 266 require.NoError(t, err) 267 268 // Ensure only one allocation was placed 269 if len(out) != 1 { 270 t.Fatalf("bad: %#v", out) 271 } 272 273 h.AssertEvalStatus(t, structs.EvalStatusComplete) 274 } 275 276 func TestServiceSched_JobRegister_DistinctHosts(t *testing.T) { 277 h := NewHarness(t) 278 279 // Create some nodes 280 for i := 0; i < 10; i++ { 281 node := mock.Node() 282 require.NoError(t, h.State.UpsertNode(h.NextIndex(), node)) 283 } 284 285 // Create a job that uses distinct host and has count 1 higher than what is 286 // possible. 287 job := mock.Job() 288 job.TaskGroups[0].Count = 11 289 job.Constraints = append(job.Constraints, &structs.Constraint{Operand: structs.ConstraintDistinctHosts}) 290 require.NoError(t, h.State.UpsertJob(h.NextIndex(), job)) 291 292 // Create a mock evaluation to register the job 293 eval := &structs.Evaluation{ 294 Namespace: structs.DefaultNamespace, 295 ID: uuid.Generate(), 296 Priority: job.Priority, 297 TriggeredBy: structs.EvalTriggerJobRegister, 298 JobID: job.ID, 299 Status: structs.EvalStatusPending, 300 } 301 302 require.NoError(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 303 304 // Process the evaluation 305 err := h.Process(NewServiceScheduler, eval) 306 if err != nil { 307 t.Fatalf("err: %v", err) 308 } 309 310 // Ensure a single plan 311 if len(h.Plans) != 1 { 312 t.Fatalf("bad: %#v", h.Plans) 313 } 314 plan := h.Plans[0] 315 316 // Ensure the eval has spawned blocked eval 317 if len(h.CreateEvals) != 1 { 318 t.Fatalf("bad: %#v", h.CreateEvals) 319 } 320 321 // Ensure the plan failed to alloc 322 outEval := h.Evals[0] 323 if len(outEval.FailedTGAllocs) != 1 { 324 t.Fatalf("bad: %+v", outEval) 325 } 326 327 // Ensure the plan allocated 328 var planned []*structs.Allocation 329 for _, allocList := range plan.NodeAllocation { 330 planned = append(planned, allocList...) 331 } 332 if len(planned) != 10 { 333 t.Fatalf("bad: %#v", plan) 334 } 335 336 // Lookup the allocations by JobID 337 ws := memdb.NewWatchSet() 338 out, err := h.State.AllocsByJob(ws, job.Namespace, job.ID, false) 339 require.NoError(t, err) 340 341 // Ensure all allocations placed 342 if len(out) != 10 { 343 t.Fatalf("bad: %#v", out) 344 } 345 346 // Ensure different node was used per. 347 used := make(map[string]struct{}) 348 for _, alloc := range out { 349 if _, ok := used[alloc.NodeID]; ok { 350 t.Fatalf("Node collision %v", alloc.NodeID) 351 } 352 used[alloc.NodeID] = struct{}{} 353 } 354 355 h.AssertEvalStatus(t, structs.EvalStatusComplete) 356 } 357 358 func TestServiceSched_JobRegister_DistinctProperty(t *testing.T) { 359 h := NewHarness(t) 360 361 // Create some nodes 362 for i := 0; i < 10; i++ { 363 node := mock.Node() 364 rack := "rack2" 365 if i < 5 { 366 rack = "rack1" 367 } 368 node.Meta["rack"] = rack 369 require.NoError(t, h.State.UpsertNode(h.NextIndex(), node)) 370 } 371 372 // Create a job that uses distinct property and has count higher than what is 373 // possible. 374 job := mock.Job() 375 job.TaskGroups[0].Count = 8 376 job.Constraints = append(job.Constraints, 377 &structs.Constraint{ 378 Operand: structs.ConstraintDistinctProperty, 379 LTarget: "${meta.rack}", 380 RTarget: "2", 381 }) 382 require.NoError(t, h.State.UpsertJob(h.NextIndex(), job)) 383 384 // Create a mock evaluation to register the job 385 eval := &structs.Evaluation{ 386 Namespace: structs.DefaultNamespace, 387 ID: uuid.Generate(), 388 Priority: job.Priority, 389 TriggeredBy: structs.EvalTriggerJobRegister, 390 JobID: job.ID, 391 Status: structs.EvalStatusPending, 392 } 393 394 require.NoError(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 395 396 // Process the evaluation 397 err := h.Process(NewServiceScheduler, eval) 398 if err != nil { 399 t.Fatalf("err: %v", err) 400 } 401 402 // Ensure a single plan 403 if len(h.Plans) != 1 { 404 t.Fatalf("bad: %#v", h.Plans) 405 } 406 plan := h.Plans[0] 407 408 // Ensure the plan doesn't have annotations. 409 if plan.Annotations != nil { 410 t.Fatalf("expected no annotations") 411 } 412 413 // Ensure the eval has spawned blocked eval 414 if len(h.CreateEvals) != 1 { 415 t.Fatalf("bad: %#v", h.CreateEvals) 416 } 417 418 // Ensure the plan failed to alloc 419 outEval := h.Evals[0] 420 if len(outEval.FailedTGAllocs) != 1 { 421 t.Fatalf("bad: %+v", outEval) 422 } 423 424 // Ensure the plan allocated 425 var planned []*structs.Allocation 426 for _, allocList := range plan.NodeAllocation { 427 planned = append(planned, allocList...) 428 } 429 if len(planned) != 4 { 430 t.Fatalf("bad: %#v", plan) 431 } 432 433 // Lookup the allocations by JobID 434 ws := memdb.NewWatchSet() 435 out, err := h.State.AllocsByJob(ws, job.Namespace, job.ID, false) 436 require.NoError(t, err) 437 438 // Ensure all allocations placed 439 if len(out) != 4 { 440 t.Fatalf("bad: %#v", out) 441 } 442 443 // Ensure each node was only used twice 444 used := make(map[string]uint64) 445 for _, alloc := range out { 446 if count, _ := used[alloc.NodeID]; count > 2 { 447 t.Fatalf("Node %v used too much: %d", alloc.NodeID, count) 448 } 449 used[alloc.NodeID]++ 450 } 451 452 h.AssertEvalStatus(t, structs.EvalStatusComplete) 453 } 454 455 func TestServiceSched_JobRegister_DistinctProperty_TaskGroup(t *testing.T) { 456 h := NewHarness(t) 457 458 // Create some nodes 459 for i := 0; i < 2; i++ { 460 node := mock.Node() 461 node.Meta["ssd"] = "true" 462 require.NoError(t, h.State.UpsertNode(h.NextIndex(), node)) 463 } 464 465 // Create a job that uses distinct property only on one task group. 466 job := mock.Job() 467 job.TaskGroups = append(job.TaskGroups, job.TaskGroups[0].Copy()) 468 job.TaskGroups[0].Count = 1 469 job.TaskGroups[0].Constraints = append(job.TaskGroups[0].Constraints, 470 &structs.Constraint{ 471 Operand: structs.ConstraintDistinctProperty, 472 LTarget: "${meta.ssd}", 473 }) 474 475 job.TaskGroups[1].Name = "tg2" 476 job.TaskGroups[1].Count = 2 477 require.NoError(t, h.State.UpsertJob(h.NextIndex(), job)) 478 479 // Create a mock evaluation to register the job 480 eval := &structs.Evaluation{ 481 Namespace: structs.DefaultNamespace, 482 ID: uuid.Generate(), 483 Priority: job.Priority, 484 TriggeredBy: structs.EvalTriggerJobRegister, 485 JobID: job.ID, 486 Status: structs.EvalStatusPending, 487 } 488 require.NoError(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 489 490 // Process the evaluation 491 err := h.Process(NewServiceScheduler, eval) 492 if err != nil { 493 t.Fatalf("err: %v", err) 494 } 495 496 // Ensure a single plan 497 if len(h.Plans) != 1 { 498 t.Fatalf("bad: %#v", h.Plans) 499 } 500 plan := h.Plans[0] 501 502 // Ensure the plan doesn't have annotations. 503 if plan.Annotations != nil { 504 t.Fatalf("expected no annotations") 505 } 506 507 // Ensure the eval hasn't spawned blocked eval 508 if len(h.CreateEvals) != 0 { 509 t.Fatalf("bad: %#v", h.CreateEvals[0]) 510 } 511 512 // Ensure the plan allocated 513 var planned []*structs.Allocation 514 for _, allocList := range plan.NodeAllocation { 515 planned = append(planned, allocList...) 516 } 517 if len(planned) != 3 { 518 t.Fatalf("bad: %#v", plan) 519 } 520 521 // Lookup the allocations by JobID 522 ws := memdb.NewWatchSet() 523 out, err := h.State.AllocsByJob(ws, job.Namespace, job.ID, false) 524 require.NoError(t, err) 525 526 // Ensure all allocations placed 527 if len(out) != 3 { 528 t.Fatalf("bad: %#v", out) 529 } 530 531 h.AssertEvalStatus(t, structs.EvalStatusComplete) 532 } 533 534 func TestServiceSched_JobRegister_DistinctProperty_TaskGroup_Incr(t *testing.T) { 535 h := NewHarness(t) 536 assert := assert.New(t) 537 538 // Create a job that uses distinct property over the node-id 539 job := mock.Job() 540 job.TaskGroups[0].Count = 3 541 job.TaskGroups[0].Constraints = append(job.TaskGroups[0].Constraints, 542 &structs.Constraint{ 543 Operand: structs.ConstraintDistinctProperty, 544 LTarget: "${node.unique.id}", 545 }) 546 assert.Nil(h.State.UpsertJob(h.NextIndex(), job), "UpsertJob") 547 548 // Create some nodes 549 var nodes []*structs.Node 550 for i := 0; i < 6; i++ { 551 node := mock.Node() 552 nodes = append(nodes, node) 553 assert.Nil(h.State.UpsertNode(h.NextIndex(), node), "UpsertNode") 554 } 555 556 // Create some allocations 557 var allocs []*structs.Allocation 558 for i := 0; i < 3; i++ { 559 alloc := mock.Alloc() 560 alloc.Job = job 561 alloc.JobID = job.ID 562 alloc.NodeID = nodes[i].ID 563 alloc.Name = fmt.Sprintf("my-job.web[%d]", i) 564 allocs = append(allocs, alloc) 565 } 566 assert.Nil(h.State.UpsertAllocs(h.NextIndex(), allocs), "UpsertAllocs") 567 568 // Update the count 569 job2 := job.Copy() 570 job2.TaskGroups[0].Count = 6 571 assert.Nil(h.State.UpsertJob(h.NextIndex(), job2), "UpsertJob") 572 573 // Create a mock evaluation to register the job 574 eval := &structs.Evaluation{ 575 Namespace: structs.DefaultNamespace, 576 ID: uuid.Generate(), 577 Priority: job.Priority, 578 TriggeredBy: structs.EvalTriggerJobRegister, 579 JobID: job.ID, 580 Status: structs.EvalStatusPending, 581 } 582 require.NoError(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 583 584 // Process the evaluation 585 assert.Nil(h.Process(NewServiceScheduler, eval), "Process") 586 587 // Ensure a single plan 588 assert.Len(h.Plans, 1, "Number of plans") 589 plan := h.Plans[0] 590 591 // Ensure the plan doesn't have annotations. 592 assert.Nil(plan.Annotations, "Plan.Annotations") 593 594 // Ensure the eval hasn't spawned blocked eval 595 assert.Len(h.CreateEvals, 0, "Created Evals") 596 597 // Ensure the plan allocated 598 var planned []*structs.Allocation 599 for _, allocList := range plan.NodeAllocation { 600 planned = append(planned, allocList...) 601 } 602 assert.Len(planned, 6, "Planned Allocations") 603 604 // Lookup the allocations by JobID 605 ws := memdb.NewWatchSet() 606 out, err := h.State.AllocsByJob(ws, job.Namespace, job.ID, false) 607 assert.Nil(err, "AllocsByJob") 608 609 // Ensure all allocations placed 610 assert.Len(out, 6, "Placed Allocations") 611 612 h.AssertEvalStatus(t, structs.EvalStatusComplete) 613 } 614 615 // Test job registration with spread configured 616 func TestServiceSched_Spread(t *testing.T) { 617 assert := assert.New(t) 618 619 start := uint8(100) 620 step := uint8(10) 621 622 for i := 0; i < 10; i++ { 623 name := fmt.Sprintf("%d%% in dc1", start) 624 t.Run(name, func(t *testing.T) { 625 h := NewHarness(t) 626 remaining := uint8(100 - start) 627 // Create a job that uses spread over data center 628 job := mock.Job() 629 job.Datacenters = []string{"dc1", "dc2"} 630 job.TaskGroups[0].Count = 10 631 job.TaskGroups[0].Spreads = append(job.TaskGroups[0].Spreads, 632 &structs.Spread{ 633 Attribute: "${node.datacenter}", 634 Weight: 100, 635 SpreadTarget: []*structs.SpreadTarget{ 636 { 637 Value: "dc1", 638 Percent: start, 639 }, 640 { 641 Value: "dc2", 642 Percent: remaining, 643 }, 644 }, 645 }) 646 assert.Nil(h.State.UpsertJob(h.NextIndex(), job), "UpsertJob") 647 // Create some nodes, half in dc2 648 var nodes []*structs.Node 649 nodeMap := make(map[string]*structs.Node) 650 for i := 0; i < 10; i++ { 651 node := mock.Node() 652 if i%2 == 0 { 653 node.Datacenter = "dc2" 654 } 655 nodes = append(nodes, node) 656 assert.Nil(h.State.UpsertNode(h.NextIndex(), node), "UpsertNode") 657 nodeMap[node.ID] = node 658 } 659 660 // Create a mock evaluation to register the job 661 eval := &structs.Evaluation{ 662 Namespace: structs.DefaultNamespace, 663 ID: uuid.Generate(), 664 Priority: job.Priority, 665 TriggeredBy: structs.EvalTriggerJobRegister, 666 JobID: job.ID, 667 Status: structs.EvalStatusPending, 668 } 669 require.NoError(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 670 671 // Process the evaluation 672 assert.Nil(h.Process(NewServiceScheduler, eval), "Process") 673 674 // Ensure a single plan 675 assert.Len(h.Plans, 1, "Number of plans") 676 plan := h.Plans[0] 677 678 // Ensure the plan doesn't have annotations. 679 assert.Nil(plan.Annotations, "Plan.Annotations") 680 681 // Ensure the eval hasn't spawned blocked eval 682 assert.Len(h.CreateEvals, 0, "Created Evals") 683 684 // Ensure the plan allocated 685 var planned []*structs.Allocation 686 dcAllocsMap := make(map[string]int) 687 for nodeId, allocList := range plan.NodeAllocation { 688 planned = append(planned, allocList...) 689 dc := nodeMap[nodeId].Datacenter 690 c := dcAllocsMap[dc] 691 c += len(allocList) 692 dcAllocsMap[dc] = c 693 } 694 assert.Len(planned, 10, "Planned Allocations") 695 696 expectedCounts := make(map[string]int) 697 expectedCounts["dc1"] = 10 - i 698 if i > 0 { 699 expectedCounts["dc2"] = i 700 } 701 require.Equal(t, expectedCounts, dcAllocsMap) 702 703 h.AssertEvalStatus(t, structs.EvalStatusComplete) 704 }) 705 start = start - step 706 } 707 } 708 709 // Test job registration with even spread across dc 710 func TestServiceSched_EvenSpread(t *testing.T) { 711 assert := assert.New(t) 712 713 h := NewHarness(t) 714 // Create a job that uses even spread over data center 715 job := mock.Job() 716 job.Datacenters = []string{"dc1", "dc2"} 717 job.TaskGroups[0].Count = 10 718 job.TaskGroups[0].Spreads = append(job.TaskGroups[0].Spreads, 719 &structs.Spread{ 720 Attribute: "${node.datacenter}", 721 Weight: 100, 722 }) 723 assert.Nil(h.State.UpsertJob(h.NextIndex(), job), "UpsertJob") 724 // Create some nodes, half in dc2 725 var nodes []*structs.Node 726 nodeMap := make(map[string]*structs.Node) 727 for i := 0; i < 10; i++ { 728 node := mock.Node() 729 if i%2 == 0 { 730 node.Datacenter = "dc2" 731 } 732 nodes = append(nodes, node) 733 assert.Nil(h.State.UpsertNode(h.NextIndex(), node), "UpsertNode") 734 nodeMap[node.ID] = node 735 } 736 737 // Create a mock evaluation to register the job 738 eval := &structs.Evaluation{ 739 Namespace: structs.DefaultNamespace, 740 ID: uuid.Generate(), 741 Priority: job.Priority, 742 TriggeredBy: structs.EvalTriggerJobRegister, 743 JobID: job.ID, 744 Status: structs.EvalStatusPending, 745 } 746 require.NoError(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 747 748 // Process the evaluation 749 assert.Nil(h.Process(NewServiceScheduler, eval), "Process") 750 751 // Ensure a single plan 752 assert.Len(h.Plans, 1, "Number of plans") 753 plan := h.Plans[0] 754 755 // Ensure the plan doesn't have annotations. 756 assert.Nil(plan.Annotations, "Plan.Annotations") 757 758 // Ensure the eval hasn't spawned blocked eval 759 assert.Len(h.CreateEvals, 0, "Created Evals") 760 761 // Ensure the plan allocated 762 var planned []*structs.Allocation 763 dcAllocsMap := make(map[string]int) 764 for nodeId, allocList := range plan.NodeAllocation { 765 planned = append(planned, allocList...) 766 dc := nodeMap[nodeId].Datacenter 767 c := dcAllocsMap[dc] 768 c += len(allocList) 769 dcAllocsMap[dc] = c 770 } 771 assert.Len(planned, 10, "Planned Allocations") 772 773 // Expect even split allocs across datacenter 774 expectedCounts := make(map[string]int) 775 expectedCounts["dc1"] = 5 776 expectedCounts["dc2"] = 5 777 778 require.Equal(t, expectedCounts, dcAllocsMap) 779 780 h.AssertEvalStatus(t, structs.EvalStatusComplete) 781 } 782 783 func TestServiceSched_JobRegister_Annotate(t *testing.T) { 784 h := NewHarness(t) 785 786 // Create some nodes 787 for i := 0; i < 10; i++ { 788 node := mock.Node() 789 require.NoError(t, h.State.UpsertNode(h.NextIndex(), node)) 790 } 791 792 // Create a job 793 job := mock.Job() 794 require.NoError(t, h.State.UpsertJob(h.NextIndex(), job)) 795 796 // Create a mock evaluation to register the job 797 eval := &structs.Evaluation{ 798 Namespace: structs.DefaultNamespace, 799 ID: uuid.Generate(), 800 Priority: job.Priority, 801 TriggeredBy: structs.EvalTriggerJobRegister, 802 JobID: job.ID, 803 AnnotatePlan: true, 804 Status: structs.EvalStatusPending, 805 } 806 require.NoError(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 807 808 // Process the evaluation 809 err := h.Process(NewServiceScheduler, eval) 810 if err != nil { 811 t.Fatalf("err: %v", err) 812 } 813 814 // Ensure a single plan 815 if len(h.Plans) != 1 { 816 t.Fatalf("bad: %#v", h.Plans) 817 } 818 plan := h.Plans[0] 819 820 // Ensure the plan allocated 821 var planned []*structs.Allocation 822 for _, allocList := range plan.NodeAllocation { 823 planned = append(planned, allocList...) 824 } 825 if len(planned) != 10 { 826 t.Fatalf("bad: %#v", plan) 827 } 828 829 // Lookup the allocations by JobID 830 ws := memdb.NewWatchSet() 831 out, err := h.State.AllocsByJob(ws, job.Namespace, job.ID, false) 832 require.NoError(t, err) 833 834 // Ensure all allocations placed 835 if len(out) != 10 { 836 t.Fatalf("bad: %#v", out) 837 } 838 839 h.AssertEvalStatus(t, structs.EvalStatusComplete) 840 841 // Ensure the plan had annotations. 842 if plan.Annotations == nil { 843 t.Fatalf("expected annotations") 844 } 845 846 desiredTGs := plan.Annotations.DesiredTGUpdates 847 if l := len(desiredTGs); l != 1 { 848 t.Fatalf("incorrect number of task groups; got %v; want %v", l, 1) 849 } 850 851 desiredChanges, ok := desiredTGs["web"] 852 if !ok { 853 t.Fatalf("expected task group web to have desired changes") 854 } 855 856 expected := &structs.DesiredUpdates{Place: 10} 857 if !reflect.DeepEqual(desiredChanges, expected) { 858 t.Fatalf("Unexpected desired updates; got %#v; want %#v", desiredChanges, expected) 859 } 860 } 861 862 func TestServiceSched_JobRegister_CountZero(t *testing.T) { 863 h := NewHarness(t) 864 865 // Create some nodes 866 for i := 0; i < 10; i++ { 867 node := mock.Node() 868 require.NoError(t, h.State.UpsertNode(h.NextIndex(), node)) 869 } 870 871 // Create a job and set the task group count to zero. 872 job := mock.Job() 873 job.TaskGroups[0].Count = 0 874 require.NoError(t, h.State.UpsertJob(h.NextIndex(), job)) 875 876 // Create a mock evaluation to register the job 877 eval := &structs.Evaluation{ 878 Namespace: structs.DefaultNamespace, 879 ID: uuid.Generate(), 880 Priority: job.Priority, 881 TriggeredBy: structs.EvalTriggerJobRegister, 882 JobID: job.ID, 883 Status: structs.EvalStatusPending, 884 } 885 require.NoError(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 886 887 // Process the evaluation 888 err := h.Process(NewServiceScheduler, eval) 889 if err != nil { 890 t.Fatalf("err: %v", err) 891 } 892 893 // Ensure there was no plan 894 if len(h.Plans) != 0 { 895 t.Fatalf("bad: %#v", h.Plans) 896 } 897 898 // Lookup the allocations by JobID 899 ws := memdb.NewWatchSet() 900 out, err := h.State.AllocsByJob(ws, job.Namespace, job.ID, false) 901 require.NoError(t, err) 902 903 // Ensure no allocations placed 904 if len(out) != 0 { 905 t.Fatalf("bad: %#v", out) 906 } 907 908 h.AssertEvalStatus(t, structs.EvalStatusComplete) 909 } 910 911 func TestServiceSched_JobRegister_AllocFail(t *testing.T) { 912 h := NewHarness(t) 913 914 // Create NO nodes 915 // Create a job 916 job := mock.Job() 917 require.NoError(t, h.State.UpsertJob(h.NextIndex(), job)) 918 919 // Create a mock evaluation to register the job 920 eval := &structs.Evaluation{ 921 Namespace: structs.DefaultNamespace, 922 ID: uuid.Generate(), 923 Priority: job.Priority, 924 TriggeredBy: structs.EvalTriggerJobRegister, 925 JobID: job.ID, 926 Status: structs.EvalStatusPending, 927 } 928 929 require.NoError(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 930 931 // Process the evaluation 932 err := h.Process(NewServiceScheduler, eval) 933 if err != nil { 934 t.Fatalf("err: %v", err) 935 } 936 937 // Ensure no plan 938 if len(h.Plans) != 0 { 939 t.Fatalf("bad: %#v", h.Plans) 940 } 941 942 // Ensure there is a follow up eval. 943 if len(h.CreateEvals) != 1 || h.CreateEvals[0].Status != structs.EvalStatusBlocked { 944 t.Fatalf("bad: %#v", h.CreateEvals) 945 } 946 947 if len(h.Evals) != 1 { 948 t.Fatalf("incorrect number of updated eval: %#v", h.Evals) 949 } 950 outEval := h.Evals[0] 951 952 // Ensure the eval has its spawned blocked eval 953 if outEval.BlockedEval != h.CreateEvals[0].ID { 954 t.Fatalf("bad: %#v", outEval) 955 } 956 957 // Ensure the plan failed to alloc 958 if outEval == nil || len(outEval.FailedTGAllocs) != 1 { 959 t.Fatalf("bad: %#v", outEval) 960 } 961 962 metrics, ok := outEval.FailedTGAllocs[job.TaskGroups[0].Name] 963 if !ok { 964 t.Fatalf("no failed metrics: %#v", outEval.FailedTGAllocs) 965 } 966 967 // Check the coalesced failures 968 if metrics.CoalescedFailures != 9 { 969 t.Fatalf("bad: %#v", metrics) 970 } 971 972 // Check the available nodes 973 if count, ok := metrics.NodesAvailable["dc1"]; !ok || count != 0 { 974 t.Fatalf("bad: %#v", metrics) 975 } 976 977 // Check queued allocations 978 queued := outEval.QueuedAllocations["web"] 979 if queued != 10 { 980 t.Fatalf("expected queued: %v, actual: %v", 10, queued) 981 } 982 h.AssertEvalStatus(t, structs.EvalStatusComplete) 983 } 984 985 func TestServiceSched_JobRegister_CreateBlockedEval(t *testing.T) { 986 h := NewHarness(t) 987 988 // Create a full node 989 node := mock.Node() 990 node.ReservedResources = &structs.NodeReservedResources{ 991 Cpu: structs.NodeReservedCpuResources{ 992 CpuShares: node.NodeResources.Cpu.CpuShares, 993 }, 994 } 995 node.ComputeClass() 996 require.NoError(t, h.State.UpsertNode(h.NextIndex(), node)) 997 998 // Create an ineligible node 999 node2 := mock.Node() 1000 node2.Attributes["kernel.name"] = "windows" 1001 node2.ComputeClass() 1002 require.NoError(t, h.State.UpsertNode(h.NextIndex(), node2)) 1003 1004 // Create a jobs 1005 job := mock.Job() 1006 require.NoError(t, h.State.UpsertJob(h.NextIndex(), job)) 1007 1008 // Create a mock evaluation to register the job 1009 eval := &structs.Evaluation{ 1010 Namespace: structs.DefaultNamespace, 1011 ID: uuid.Generate(), 1012 Priority: job.Priority, 1013 TriggeredBy: structs.EvalTriggerJobRegister, 1014 JobID: job.ID, 1015 Status: structs.EvalStatusPending, 1016 } 1017 1018 require.NoError(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 1019 1020 // Process the evaluation 1021 err := h.Process(NewServiceScheduler, eval) 1022 if err != nil { 1023 t.Fatalf("err: %v", err) 1024 } 1025 1026 // Ensure no plan 1027 if len(h.Plans) != 0 { 1028 t.Fatalf("bad: %#v", h.Plans) 1029 } 1030 1031 // Ensure the plan has created a follow up eval. 1032 if len(h.CreateEvals) != 1 { 1033 t.Fatalf("bad: %#v", h.CreateEvals) 1034 } 1035 1036 created := h.CreateEvals[0] 1037 if created.Status != structs.EvalStatusBlocked { 1038 t.Fatalf("bad: %#v", created) 1039 } 1040 1041 classes := created.ClassEligibility 1042 if len(classes) != 2 || !classes[node.ComputedClass] || classes[node2.ComputedClass] { 1043 t.Fatalf("bad: %#v", classes) 1044 } 1045 1046 if created.EscapedComputedClass { 1047 t.Fatalf("bad: %#v", created) 1048 } 1049 1050 // Ensure there is a follow up eval. 1051 if len(h.CreateEvals) != 1 || h.CreateEvals[0].Status != structs.EvalStatusBlocked { 1052 t.Fatalf("bad: %#v", h.CreateEvals) 1053 } 1054 1055 if len(h.Evals) != 1 { 1056 t.Fatalf("incorrect number of updated eval: %#v", h.Evals) 1057 } 1058 outEval := h.Evals[0] 1059 1060 // Ensure the plan failed to alloc 1061 if outEval == nil || len(outEval.FailedTGAllocs) != 1 { 1062 t.Fatalf("bad: %#v", outEval) 1063 } 1064 1065 metrics, ok := outEval.FailedTGAllocs[job.TaskGroups[0].Name] 1066 if !ok { 1067 t.Fatalf("no failed metrics: %#v", outEval.FailedTGAllocs) 1068 } 1069 1070 // Check the coalesced failures 1071 if metrics.CoalescedFailures != 9 { 1072 t.Fatalf("bad: %#v", metrics) 1073 } 1074 1075 // Check the available nodes 1076 if count, ok := metrics.NodesAvailable["dc1"]; !ok || count != 2 { 1077 t.Fatalf("bad: %#v", metrics) 1078 } 1079 1080 h.AssertEvalStatus(t, structs.EvalStatusComplete) 1081 } 1082 1083 func TestServiceSched_JobRegister_FeasibleAndInfeasibleTG(t *testing.T) { 1084 h := NewHarness(t) 1085 1086 // Create one node 1087 node := mock.Node() 1088 node.NodeClass = "class_0" 1089 require.NoError(t, node.ComputeClass()) 1090 require.NoError(t, h.State.UpsertNode(h.NextIndex(), node)) 1091 1092 // Create a job that constrains on a node class 1093 job := mock.Job() 1094 job.TaskGroups[0].Count = 2 1095 job.TaskGroups[0].Constraints = append(job.Constraints, 1096 &structs.Constraint{ 1097 LTarget: "${node.class}", 1098 RTarget: "class_0", 1099 Operand: "=", 1100 }, 1101 ) 1102 tg2 := job.TaskGroups[0].Copy() 1103 tg2.Name = "web2" 1104 tg2.Constraints[1].RTarget = "class_1" 1105 job.TaskGroups = append(job.TaskGroups, tg2) 1106 require.NoError(t, h.State.UpsertJob(h.NextIndex(), job)) 1107 1108 // Create a mock evaluation to register the job 1109 eval := &structs.Evaluation{ 1110 Namespace: structs.DefaultNamespace, 1111 ID: uuid.Generate(), 1112 Priority: job.Priority, 1113 TriggeredBy: structs.EvalTriggerJobRegister, 1114 JobID: job.ID, 1115 Status: structs.EvalStatusPending, 1116 } 1117 require.NoError(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 1118 // Process the evaluation 1119 err := h.Process(NewServiceScheduler, eval) 1120 if err != nil { 1121 t.Fatalf("err: %v", err) 1122 } 1123 1124 // Ensure a single plan 1125 if len(h.Plans) != 1 { 1126 t.Fatalf("bad: %#v", h.Plans) 1127 } 1128 plan := h.Plans[0] 1129 1130 // Ensure the plan allocated 1131 var planned []*structs.Allocation 1132 for _, allocList := range plan.NodeAllocation { 1133 planned = append(planned, allocList...) 1134 } 1135 if len(planned) != 2 { 1136 t.Fatalf("bad: %#v", plan) 1137 } 1138 1139 // Ensure two allocations placed 1140 ws := memdb.NewWatchSet() 1141 out, err := h.State.AllocsByJob(ws, job.Namespace, job.ID, false) 1142 require.NoError(t, err) 1143 if len(out) != 2 { 1144 t.Fatalf("bad: %#v", out) 1145 } 1146 1147 if len(h.Evals) != 1 { 1148 t.Fatalf("incorrect number of updated eval: %#v", h.Evals) 1149 } 1150 outEval := h.Evals[0] 1151 1152 // Ensure the eval has its spawned blocked eval 1153 if outEval.BlockedEval != h.CreateEvals[0].ID { 1154 t.Fatalf("bad: %#v", outEval) 1155 } 1156 1157 // Ensure the plan failed to alloc one tg 1158 if outEval == nil || len(outEval.FailedTGAllocs) != 1 { 1159 t.Fatalf("bad: %#v", outEval) 1160 } 1161 1162 metrics, ok := outEval.FailedTGAllocs[tg2.Name] 1163 if !ok { 1164 t.Fatalf("no failed metrics: %#v", outEval.FailedTGAllocs) 1165 } 1166 1167 // Check the coalesced failures 1168 if metrics.CoalescedFailures != tg2.Count-1 { 1169 t.Fatalf("bad: %#v", metrics) 1170 } 1171 1172 h.AssertEvalStatus(t, structs.EvalStatusComplete) 1173 } 1174 1175 // This test just ensures the scheduler handles the eval type to avoid 1176 // regressions. 1177 func TestServiceSched_EvaluateMaxPlanEval(t *testing.T) { 1178 h := NewHarness(t) 1179 1180 // Create a job and set the task group count to zero. 1181 job := mock.Job() 1182 job.TaskGroups[0].Count = 0 1183 require.NoError(t, h.State.UpsertJob(h.NextIndex(), job)) 1184 1185 // Create a mock blocked evaluation 1186 eval := &structs.Evaluation{ 1187 Namespace: structs.DefaultNamespace, 1188 ID: uuid.Generate(), 1189 Status: structs.EvalStatusBlocked, 1190 Priority: job.Priority, 1191 TriggeredBy: structs.EvalTriggerMaxPlans, 1192 JobID: job.ID, 1193 } 1194 1195 // Insert it into the state store 1196 require.NoError(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 1197 1198 // Process the evaluation 1199 err := h.Process(NewServiceScheduler, eval) 1200 if err != nil { 1201 t.Fatalf("err: %v", err) 1202 } 1203 1204 // Ensure there was no plan 1205 if len(h.Plans) != 0 { 1206 t.Fatalf("bad: %#v", h.Plans) 1207 } 1208 1209 h.AssertEvalStatus(t, structs.EvalStatusComplete) 1210 } 1211 1212 func TestServiceSched_Plan_Partial_Progress(t *testing.T) { 1213 h := NewHarness(t) 1214 1215 // Create a node 1216 node := mock.Node() 1217 require.NoError(t, h.State.UpsertNode(h.NextIndex(), node)) 1218 1219 // Create a job with a high resource ask so that all the allocations can't 1220 // be placed on a single node. 1221 job := mock.Job() 1222 job.TaskGroups[0].Count = 3 1223 job.TaskGroups[0].Tasks[0].Resources.CPU = 3600 1224 require.NoError(t, h.State.UpsertJob(h.NextIndex(), job)) 1225 1226 // Create a mock evaluation to register the job 1227 eval := &structs.Evaluation{ 1228 Namespace: structs.DefaultNamespace, 1229 ID: uuid.Generate(), 1230 Priority: job.Priority, 1231 TriggeredBy: structs.EvalTriggerJobRegister, 1232 JobID: job.ID, 1233 Status: structs.EvalStatusPending, 1234 } 1235 1236 require.NoError(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 1237 1238 // Process the evaluation 1239 err := h.Process(NewServiceScheduler, eval) 1240 if err != nil { 1241 t.Fatalf("err: %v", err) 1242 } 1243 1244 // Ensure a single plan 1245 if len(h.Plans) != 1 { 1246 t.Fatalf("bad: %#v", h.Plans) 1247 } 1248 plan := h.Plans[0] 1249 1250 // Ensure the plan doesn't have annotations. 1251 if plan.Annotations != nil { 1252 t.Fatalf("expected no annotations") 1253 } 1254 1255 // Ensure the plan allocated 1256 var planned []*structs.Allocation 1257 for _, allocList := range plan.NodeAllocation { 1258 planned = append(planned, allocList...) 1259 } 1260 if len(planned) != 1 { 1261 t.Fatalf("bad: %#v", plan) 1262 } 1263 1264 // Lookup the allocations by JobID 1265 ws := memdb.NewWatchSet() 1266 out, err := h.State.AllocsByJob(ws, job.Namespace, job.ID, false) 1267 require.NoError(t, err) 1268 1269 // Ensure only one allocations placed 1270 if len(out) != 1 { 1271 t.Fatalf("bad: %#v", out) 1272 } 1273 1274 queued := h.Evals[0].QueuedAllocations["web"] 1275 if queued != 2 { 1276 t.Fatalf("expected: %v, actual: %v", 2, queued) 1277 } 1278 1279 h.AssertEvalStatus(t, structs.EvalStatusComplete) 1280 } 1281 1282 func TestServiceSched_EvaluateBlockedEval(t *testing.T) { 1283 h := NewHarness(t) 1284 1285 // Create a job 1286 job := mock.Job() 1287 require.NoError(t, h.State.UpsertJob(h.NextIndex(), job)) 1288 1289 // Create a mock blocked evaluation 1290 eval := &structs.Evaluation{ 1291 Namespace: structs.DefaultNamespace, 1292 ID: uuid.Generate(), 1293 Status: structs.EvalStatusBlocked, 1294 Priority: job.Priority, 1295 TriggeredBy: structs.EvalTriggerJobRegister, 1296 JobID: job.ID, 1297 } 1298 1299 // Insert it into the state store 1300 require.NoError(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 1301 1302 // Process the evaluation 1303 err := h.Process(NewServiceScheduler, eval) 1304 if err != nil { 1305 t.Fatalf("err: %v", err) 1306 } 1307 1308 // Ensure there was no plan 1309 if len(h.Plans) != 0 { 1310 t.Fatalf("bad: %#v", h.Plans) 1311 } 1312 1313 // Ensure that the eval was reblocked 1314 if len(h.ReblockEvals) != 1 { 1315 t.Fatalf("bad: %#v", h.ReblockEvals) 1316 } 1317 if h.ReblockEvals[0].ID != eval.ID { 1318 t.Fatalf("expect same eval to be reblocked; got %q; want %q", h.ReblockEvals[0].ID, eval.ID) 1319 } 1320 1321 // Ensure the eval status was not updated 1322 if len(h.Evals) != 0 { 1323 t.Fatalf("Existing eval should not have status set") 1324 } 1325 } 1326 1327 func TestServiceSched_EvaluateBlockedEval_Finished(t *testing.T) { 1328 h := NewHarness(t) 1329 1330 // Create some nodes 1331 for i := 0; i < 10; i++ { 1332 node := mock.Node() 1333 require.NoError(t, h.State.UpsertNode(h.NextIndex(), node)) 1334 } 1335 1336 // Create a job and set the task group count to zero. 1337 job := mock.Job() 1338 require.NoError(t, h.State.UpsertJob(h.NextIndex(), job)) 1339 1340 // Create a mock blocked evaluation 1341 eval := &structs.Evaluation{ 1342 Namespace: structs.DefaultNamespace, 1343 ID: uuid.Generate(), 1344 Status: structs.EvalStatusBlocked, 1345 Priority: job.Priority, 1346 TriggeredBy: structs.EvalTriggerJobRegister, 1347 JobID: job.ID, 1348 } 1349 1350 // Insert it into the state store 1351 require.NoError(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 1352 1353 // Process the evaluation 1354 err := h.Process(NewServiceScheduler, eval) 1355 if err != nil { 1356 t.Fatalf("err: %v", err) 1357 } 1358 1359 // Ensure a single plan 1360 if len(h.Plans) != 1 { 1361 t.Fatalf("bad: %#v", h.Plans) 1362 } 1363 plan := h.Plans[0] 1364 1365 // Ensure the plan doesn't have annotations. 1366 if plan.Annotations != nil { 1367 t.Fatalf("expected no annotations") 1368 } 1369 1370 // Ensure the eval has no spawned blocked eval 1371 if len(h.Evals) != 1 { 1372 t.Fatalf("bad: %#v", h.Evals) 1373 if h.Evals[0].BlockedEval != "" { 1374 t.Fatalf("bad: %#v", h.Evals[0]) 1375 } 1376 } 1377 1378 // Ensure the plan allocated 1379 var planned []*structs.Allocation 1380 for _, allocList := range plan.NodeAllocation { 1381 planned = append(planned, allocList...) 1382 } 1383 if len(planned) != 10 { 1384 t.Fatalf("bad: %#v", plan) 1385 } 1386 1387 // Lookup the allocations by JobID 1388 ws := memdb.NewWatchSet() 1389 out, err := h.State.AllocsByJob(ws, job.Namespace, job.ID, false) 1390 require.NoError(t, err) 1391 1392 // Ensure all allocations placed 1393 if len(out) != 10 { 1394 t.Fatalf("bad: %#v", out) 1395 } 1396 1397 // Ensure the eval was not reblocked 1398 if len(h.ReblockEvals) != 0 { 1399 t.Fatalf("Existing eval should not have been reblocked as it placed all allocations") 1400 } 1401 1402 h.AssertEvalStatus(t, structs.EvalStatusComplete) 1403 1404 // Ensure queued allocations is zero 1405 queued := h.Evals[0].QueuedAllocations["web"] 1406 if queued != 0 { 1407 t.Fatalf("expected queued: %v, actual: %v", 0, queued) 1408 } 1409 } 1410 1411 func TestServiceSched_JobModify(t *testing.T) { 1412 h := NewHarness(t) 1413 1414 // Create some nodes 1415 var nodes []*structs.Node 1416 for i := 0; i < 10; i++ { 1417 node := mock.Node() 1418 nodes = append(nodes, node) 1419 require.NoError(t, h.State.UpsertNode(h.NextIndex(), node)) 1420 } 1421 1422 // Generate a fake job with allocations 1423 job := mock.Job() 1424 require.NoError(t, h.State.UpsertJob(h.NextIndex(), job)) 1425 1426 var allocs []*structs.Allocation 1427 for i := 0; i < 10; i++ { 1428 alloc := mock.Alloc() 1429 alloc.Job = job 1430 alloc.JobID = job.ID 1431 alloc.NodeID = nodes[i].ID 1432 alloc.Name = fmt.Sprintf("my-job.web[%d]", i) 1433 allocs = append(allocs, alloc) 1434 } 1435 require.NoError(t, h.State.UpsertAllocs(h.NextIndex(), allocs)) 1436 1437 // Add a few terminal status allocations, these should be ignored 1438 var terminal []*structs.Allocation 1439 for i := 0; i < 5; i++ { 1440 alloc := mock.Alloc() 1441 alloc.Job = job 1442 alloc.JobID = job.ID 1443 alloc.NodeID = nodes[i].ID 1444 alloc.Name = fmt.Sprintf("my-job.web[%d]", i) 1445 alloc.DesiredStatus = structs.AllocDesiredStatusStop 1446 terminal = append(terminal, alloc) 1447 } 1448 require.NoError(t, h.State.UpsertAllocs(h.NextIndex(), terminal)) 1449 1450 // Update the job 1451 job2 := mock.Job() 1452 job2.ID = job.ID 1453 1454 // Update the task, such that it cannot be done in-place 1455 job2.TaskGroups[0].Tasks[0].Config["command"] = "/bin/other" 1456 require.NoError(t, h.State.UpsertJob(h.NextIndex(), job2)) 1457 1458 // Create a mock evaluation to deal with drain 1459 eval := &structs.Evaluation{ 1460 Namespace: structs.DefaultNamespace, 1461 ID: uuid.Generate(), 1462 Priority: 50, 1463 TriggeredBy: structs.EvalTriggerJobRegister, 1464 JobID: job.ID, 1465 Status: structs.EvalStatusPending, 1466 } 1467 require.NoError(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 1468 1469 // Process the evaluation 1470 err := h.Process(NewServiceScheduler, eval) 1471 if err != nil { 1472 t.Fatalf("err: %v", err) 1473 } 1474 1475 // Ensure a single plan 1476 if len(h.Plans) != 1 { 1477 t.Fatalf("bad: %#v", h.Plans) 1478 } 1479 plan := h.Plans[0] 1480 1481 // Ensure the plan evicted all allocs 1482 var update []*structs.Allocation 1483 for _, updateList := range plan.NodeUpdate { 1484 update = append(update, updateList...) 1485 } 1486 if len(update) != len(allocs) { 1487 t.Fatalf("bad: %#v", plan) 1488 } 1489 1490 // Ensure the plan allocated 1491 var planned []*structs.Allocation 1492 for _, allocList := range plan.NodeAllocation { 1493 planned = append(planned, allocList...) 1494 } 1495 if len(planned) != 10 { 1496 t.Fatalf("bad: %#v", plan) 1497 } 1498 1499 // Lookup the allocations by JobID 1500 ws := memdb.NewWatchSet() 1501 out, err := h.State.AllocsByJob(ws, job.Namespace, job.ID, false) 1502 require.NoError(t, err) 1503 1504 // Ensure all allocations placed 1505 out, _ = structs.FilterTerminalAllocs(out) 1506 if len(out) != 10 { 1507 t.Fatalf("bad: %#v", out) 1508 } 1509 1510 h.AssertEvalStatus(t, structs.EvalStatusComplete) 1511 } 1512 1513 // Have a single node and submit a job. Increment the count such that all fit 1514 // on the node but the node doesn't have enough resources to fit the new count + 1515 // 1. This tests that we properly discount the resources of existing allocs. 1516 func TestServiceSched_JobModify_IncrCount_NodeLimit(t *testing.T) { 1517 h := NewHarness(t) 1518 1519 // Create one node 1520 node := mock.Node() 1521 node.NodeResources.Cpu.CpuShares = 1000 1522 require.NoError(t, h.State.UpsertNode(h.NextIndex(), node)) 1523 1524 // Generate a fake job with one allocation 1525 job := mock.Job() 1526 job.TaskGroups[0].Tasks[0].Resources.CPU = 256 1527 job2 := job.Copy() 1528 require.NoError(t, h.State.UpsertJob(h.NextIndex(), job)) 1529 1530 var allocs []*structs.Allocation 1531 alloc := mock.Alloc() 1532 alloc.Job = job 1533 alloc.JobID = job.ID 1534 alloc.NodeID = node.ID 1535 alloc.Name = "my-job.web[0]" 1536 alloc.AllocatedResources.Tasks["web"].Cpu.CpuShares = 256 1537 allocs = append(allocs, alloc) 1538 require.NoError(t, h.State.UpsertAllocs(h.NextIndex(), allocs)) 1539 1540 // Update the job to count 3 1541 job2.TaskGroups[0].Count = 3 1542 require.NoError(t, h.State.UpsertJob(h.NextIndex(), job2)) 1543 1544 // Create a mock evaluation to deal with drain 1545 eval := &structs.Evaluation{ 1546 Namespace: structs.DefaultNamespace, 1547 ID: uuid.Generate(), 1548 Priority: 50, 1549 TriggeredBy: structs.EvalTriggerJobRegister, 1550 JobID: job.ID, 1551 Status: structs.EvalStatusPending, 1552 } 1553 require.NoError(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 1554 1555 // Process the evaluation 1556 err := h.Process(NewServiceScheduler, eval) 1557 if err != nil { 1558 t.Fatalf("err: %v", err) 1559 } 1560 1561 // Ensure a single plan 1562 if len(h.Plans) != 1 { 1563 t.Fatalf("bad: %#v", h.Plans) 1564 } 1565 plan := h.Plans[0] 1566 1567 // Ensure the plan didn't evicted the alloc 1568 var update []*structs.Allocation 1569 for _, updateList := range plan.NodeUpdate { 1570 update = append(update, updateList...) 1571 } 1572 if len(update) != 0 { 1573 t.Fatalf("bad: %#v", plan) 1574 } 1575 1576 // Ensure the plan allocated 1577 var planned []*structs.Allocation 1578 for _, allocList := range plan.NodeAllocation { 1579 planned = append(planned, allocList...) 1580 } 1581 if len(planned) != 3 { 1582 t.Fatalf("bad: %#v", plan) 1583 } 1584 1585 // Ensure the plan had no failures 1586 if len(h.Evals) != 1 { 1587 t.Fatalf("incorrect number of updated eval: %#v", h.Evals) 1588 } 1589 outEval := h.Evals[0] 1590 if outEval == nil || len(outEval.FailedTGAllocs) != 0 { 1591 t.Fatalf("bad: %#v", outEval) 1592 } 1593 1594 // Lookup the allocations by JobID 1595 ws := memdb.NewWatchSet() 1596 out, err := h.State.AllocsByJob(ws, job.Namespace, job.ID, false) 1597 require.NoError(t, err) 1598 1599 // Ensure all allocations placed 1600 out, _ = structs.FilterTerminalAllocs(out) 1601 if len(out) != 3 { 1602 t.Fatalf("bad: %#v", out) 1603 } 1604 1605 h.AssertEvalStatus(t, structs.EvalStatusComplete) 1606 } 1607 1608 func TestServiceSched_JobModify_CountZero(t *testing.T) { 1609 h := NewHarness(t) 1610 1611 // Create some nodes 1612 var nodes []*structs.Node 1613 for i := 0; i < 10; i++ { 1614 node := mock.Node() 1615 nodes = append(nodes, node) 1616 require.NoError(t, h.State.UpsertNode(h.NextIndex(), node)) 1617 } 1618 1619 // Generate a fake job with allocations 1620 job := mock.Job() 1621 require.NoError(t, h.State.UpsertJob(h.NextIndex(), job)) 1622 1623 var allocs []*structs.Allocation 1624 for i := 0; i < 10; i++ { 1625 alloc := mock.Alloc() 1626 alloc.Job = job 1627 alloc.JobID = job.ID 1628 alloc.NodeID = nodes[i].ID 1629 alloc.Name = structs.AllocName(alloc.JobID, alloc.TaskGroup, uint(i)) 1630 allocs = append(allocs, alloc) 1631 } 1632 require.NoError(t, h.State.UpsertAllocs(h.NextIndex(), allocs)) 1633 1634 // Add a few terminal status allocations, these should be ignored 1635 var terminal []*structs.Allocation 1636 for i := 0; i < 5; i++ { 1637 alloc := mock.Alloc() 1638 alloc.Job = job 1639 alloc.JobID = job.ID 1640 alloc.NodeID = nodes[i].ID 1641 alloc.Name = structs.AllocName(alloc.JobID, alloc.TaskGroup, uint(i)) 1642 alloc.DesiredStatus = structs.AllocDesiredStatusStop 1643 terminal = append(terminal, alloc) 1644 } 1645 require.NoError(t, h.State.UpsertAllocs(h.NextIndex(), terminal)) 1646 1647 // Update the job to be count zero 1648 job2 := mock.Job() 1649 job2.ID = job.ID 1650 job2.TaskGroups[0].Count = 0 1651 require.NoError(t, h.State.UpsertJob(h.NextIndex(), job2)) 1652 1653 // Create a mock evaluation to deal with drain 1654 eval := &structs.Evaluation{ 1655 Namespace: structs.DefaultNamespace, 1656 ID: uuid.Generate(), 1657 Priority: 50, 1658 TriggeredBy: structs.EvalTriggerJobRegister, 1659 JobID: job.ID, 1660 Status: structs.EvalStatusPending, 1661 } 1662 require.NoError(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 1663 1664 // Process the evaluation 1665 err := h.Process(NewServiceScheduler, eval) 1666 if err != nil { 1667 t.Fatalf("err: %v", err) 1668 } 1669 1670 // Ensure a single plan 1671 if len(h.Plans) != 1 { 1672 t.Fatalf("bad: %#v", h.Plans) 1673 } 1674 plan := h.Plans[0] 1675 1676 // Ensure the plan evicted all allocs 1677 var update []*structs.Allocation 1678 for _, updateList := range plan.NodeUpdate { 1679 update = append(update, updateList...) 1680 } 1681 if len(update) != len(allocs) { 1682 t.Fatalf("bad: %#v", plan) 1683 } 1684 1685 // Ensure the plan didn't allocated 1686 var planned []*structs.Allocation 1687 for _, allocList := range plan.NodeAllocation { 1688 planned = append(planned, allocList...) 1689 } 1690 if len(planned) != 0 { 1691 t.Fatalf("bad: %#v", plan) 1692 } 1693 1694 // Lookup the allocations by JobID 1695 ws := memdb.NewWatchSet() 1696 out, err := h.State.AllocsByJob(ws, job.Namespace, job.ID, false) 1697 require.NoError(t, err) 1698 1699 // Ensure all allocations placed 1700 out, _ = structs.FilterTerminalAllocs(out) 1701 if len(out) != 0 { 1702 t.Fatalf("bad: %#v", out) 1703 } 1704 1705 h.AssertEvalStatus(t, structs.EvalStatusComplete) 1706 } 1707 1708 func TestServiceSched_JobModify_Rolling(t *testing.T) { 1709 h := NewHarness(t) 1710 1711 // Create some nodes 1712 var nodes []*structs.Node 1713 for i := 0; i < 10; i++ { 1714 node := mock.Node() 1715 nodes = append(nodes, node) 1716 require.NoError(t, h.State.UpsertNode(h.NextIndex(), node)) 1717 } 1718 1719 // Generate a fake job with allocations 1720 job := mock.Job() 1721 require.NoError(t, h.State.UpsertJob(h.NextIndex(), job)) 1722 1723 var allocs []*structs.Allocation 1724 for i := 0; i < 10; i++ { 1725 alloc := mock.Alloc() 1726 alloc.Job = job 1727 alloc.JobID = job.ID 1728 alloc.NodeID = nodes[i].ID 1729 alloc.Name = fmt.Sprintf("my-job.web[%d]", i) 1730 allocs = append(allocs, alloc) 1731 } 1732 require.NoError(t, h.State.UpsertAllocs(h.NextIndex(), allocs)) 1733 1734 // Update the job 1735 job2 := mock.Job() 1736 job2.ID = job.ID 1737 desiredUpdates := 4 1738 job2.TaskGroups[0].Update = &structs.UpdateStrategy{ 1739 MaxParallel: desiredUpdates, 1740 HealthCheck: structs.UpdateStrategyHealthCheck_Checks, 1741 MinHealthyTime: 10 * time.Second, 1742 HealthyDeadline: 10 * time.Minute, 1743 } 1744 1745 // Update the task, such that it cannot be done in-place 1746 job2.TaskGroups[0].Tasks[0].Config["command"] = "/bin/other" 1747 require.NoError(t, h.State.UpsertJob(h.NextIndex(), job2)) 1748 1749 // Create a mock evaluation to deal with drain 1750 eval := &structs.Evaluation{ 1751 Namespace: structs.DefaultNamespace, 1752 ID: uuid.Generate(), 1753 Priority: 50, 1754 TriggeredBy: structs.EvalTriggerJobRegister, 1755 JobID: job.ID, 1756 Status: structs.EvalStatusPending, 1757 } 1758 require.NoError(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 1759 1760 // Process the evaluation 1761 err := h.Process(NewServiceScheduler, eval) 1762 if err != nil { 1763 t.Fatalf("err: %v", err) 1764 } 1765 1766 // Ensure a single plan 1767 if len(h.Plans) != 1 { 1768 t.Fatalf("bad: %#v", h.Plans) 1769 } 1770 plan := h.Plans[0] 1771 1772 // Ensure the plan evicted only MaxParallel 1773 var update []*structs.Allocation 1774 for _, updateList := range plan.NodeUpdate { 1775 update = append(update, updateList...) 1776 } 1777 if len(update) != desiredUpdates { 1778 t.Fatalf("bad: got %d; want %d: %#v", len(update), desiredUpdates, plan) 1779 } 1780 1781 // Ensure the plan allocated 1782 var planned []*structs.Allocation 1783 for _, allocList := range plan.NodeAllocation { 1784 planned = append(planned, allocList...) 1785 } 1786 if len(planned) != desiredUpdates { 1787 t.Fatalf("bad: %#v", plan) 1788 } 1789 1790 h.AssertEvalStatus(t, structs.EvalStatusComplete) 1791 1792 // Check that the deployment id is attached to the eval 1793 if h.Evals[0].DeploymentID == "" { 1794 t.Fatalf("Eval not annotated with deployment id") 1795 } 1796 1797 // Ensure a deployment was created 1798 if plan.Deployment == nil { 1799 t.Fatalf("bad: %#v", plan) 1800 } 1801 state, ok := plan.Deployment.TaskGroups[job.TaskGroups[0].Name] 1802 if !ok { 1803 t.Fatalf("bad: %#v", plan) 1804 } 1805 if state.DesiredTotal != 10 && state.DesiredCanaries != 0 { 1806 t.Fatalf("bad: %#v", state) 1807 } 1808 } 1809 1810 // This tests that the old allocation is stopped before placing. 1811 // It is critical to test that the updated job attempts to place more 1812 // allocations as this allows us to assert that destructive changes are done 1813 // first. 1814 func TestServiceSched_JobModify_Rolling_FullNode(t *testing.T) { 1815 h := NewHarness(t) 1816 1817 // Create a node and clear the reserved resources 1818 node := mock.Node() 1819 node.ReservedResources = nil 1820 require.NoError(t, h.State.UpsertNode(h.NextIndex(), node)) 1821 1822 // Create a resource ask that is the same as the resources available on the 1823 // node 1824 cpu := node.NodeResources.Cpu.CpuShares 1825 mem := node.NodeResources.Memory.MemoryMB 1826 1827 request := &structs.Resources{ 1828 CPU: int(cpu), 1829 MemoryMB: int(mem), 1830 } 1831 allocated := &structs.AllocatedResources{ 1832 Tasks: map[string]*structs.AllocatedTaskResources{ 1833 "web": { 1834 Cpu: structs.AllocatedCpuResources{ 1835 CpuShares: cpu, 1836 }, 1837 Memory: structs.AllocatedMemoryResources{ 1838 MemoryMB: mem, 1839 }, 1840 }, 1841 }, 1842 } 1843 1844 // Generate a fake job with one alloc that consumes the whole node 1845 job := mock.Job() 1846 job.TaskGroups[0].Count = 1 1847 job.TaskGroups[0].Tasks[0].Resources = request 1848 require.NoError(t, h.State.UpsertJob(h.NextIndex(), job)) 1849 1850 alloc := mock.Alloc() 1851 alloc.AllocatedResources = allocated 1852 alloc.Job = job 1853 alloc.JobID = job.ID 1854 alloc.NodeID = node.ID 1855 alloc.Name = "my-job.web[0]" 1856 require.NoError(t, h.State.UpsertAllocs(h.NextIndex(), []*structs.Allocation{alloc})) 1857 1858 // Update the job to place more versions of the task group, drop the count 1859 // and force destructive updates 1860 job2 := job.Copy() 1861 job2.TaskGroups[0].Count = 5 1862 job2.TaskGroups[0].Update = &structs.UpdateStrategy{ 1863 MaxParallel: 5, 1864 HealthCheck: structs.UpdateStrategyHealthCheck_Checks, 1865 MinHealthyTime: 10 * time.Second, 1866 HealthyDeadline: 10 * time.Minute, 1867 } 1868 job2.TaskGroups[0].Tasks[0].Resources = mock.Job().TaskGroups[0].Tasks[0].Resources 1869 1870 // Update the task, such that it cannot be done in-place 1871 job2.TaskGroups[0].Tasks[0].Config["command"] = "/bin/other" 1872 require.NoError(t, h.State.UpsertJob(h.NextIndex(), job2)) 1873 1874 eval := &structs.Evaluation{ 1875 Namespace: structs.DefaultNamespace, 1876 ID: uuid.Generate(), 1877 Priority: 50, 1878 TriggeredBy: structs.EvalTriggerJobRegister, 1879 JobID: job.ID, 1880 Status: structs.EvalStatusPending, 1881 } 1882 require.NoError(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 1883 1884 // Process the evaluation 1885 err := h.Process(NewServiceScheduler, eval) 1886 if err != nil { 1887 t.Fatalf("err: %v", err) 1888 } 1889 1890 // Ensure a single plan 1891 if len(h.Plans) != 1 { 1892 t.Fatalf("bad: %#v", h.Plans) 1893 } 1894 plan := h.Plans[0] 1895 1896 // Ensure the plan evicted only MaxParallel 1897 var update []*structs.Allocation 1898 for _, updateList := range plan.NodeUpdate { 1899 update = append(update, updateList...) 1900 } 1901 if len(update) != 1 { 1902 t.Fatalf("bad: got %d; want %d: %#v", len(update), 1, plan) 1903 } 1904 1905 // Ensure the plan allocated 1906 var planned []*structs.Allocation 1907 for _, allocList := range plan.NodeAllocation { 1908 planned = append(planned, allocList...) 1909 } 1910 if len(planned) != 5 { 1911 t.Fatalf("bad: %#v", plan) 1912 } 1913 1914 h.AssertEvalStatus(t, structs.EvalStatusComplete) 1915 1916 // Check that the deployment id is attached to the eval 1917 if h.Evals[0].DeploymentID == "" { 1918 t.Fatalf("Eval not annotated with deployment id") 1919 } 1920 1921 // Ensure a deployment was created 1922 if plan.Deployment == nil { 1923 t.Fatalf("bad: %#v", plan) 1924 } 1925 state, ok := plan.Deployment.TaskGroups[job.TaskGroups[0].Name] 1926 if !ok { 1927 t.Fatalf("bad: %#v", plan) 1928 } 1929 if state.DesiredTotal != 5 || state.DesiredCanaries != 0 { 1930 t.Fatalf("bad: %#v", state) 1931 } 1932 } 1933 1934 func TestServiceSched_JobModify_Canaries(t *testing.T) { 1935 h := NewHarness(t) 1936 1937 // Create some nodes 1938 var nodes []*structs.Node 1939 for i := 0; i < 10; i++ { 1940 node := mock.Node() 1941 nodes = append(nodes, node) 1942 require.NoError(t, h.State.UpsertNode(h.NextIndex(), node)) 1943 } 1944 1945 // Generate a fake job with allocations 1946 job := mock.Job() 1947 require.NoError(t, h.State.UpsertJob(h.NextIndex(), job)) 1948 1949 var allocs []*structs.Allocation 1950 for i := 0; i < 10; i++ { 1951 alloc := mock.Alloc() 1952 alloc.Job = job 1953 alloc.JobID = job.ID 1954 alloc.NodeID = nodes[i].ID 1955 alloc.Name = fmt.Sprintf("my-job.web[%d]", i) 1956 allocs = append(allocs, alloc) 1957 } 1958 require.NoError(t, h.State.UpsertAllocs(h.NextIndex(), allocs)) 1959 1960 // Update the job 1961 job2 := mock.Job() 1962 job2.ID = job.ID 1963 desiredUpdates := 2 1964 job2.TaskGroups[0].Update = &structs.UpdateStrategy{ 1965 MaxParallel: desiredUpdates, 1966 Canary: desiredUpdates, 1967 HealthCheck: structs.UpdateStrategyHealthCheck_Checks, 1968 MinHealthyTime: 10 * time.Second, 1969 HealthyDeadline: 10 * time.Minute, 1970 } 1971 1972 // Update the task, such that it cannot be done in-place 1973 job2.TaskGroups[0].Tasks[0].Config["command"] = "/bin/other" 1974 require.NoError(t, h.State.UpsertJob(h.NextIndex(), job2)) 1975 1976 // Create a mock evaluation to deal with drain 1977 eval := &structs.Evaluation{ 1978 Namespace: structs.DefaultNamespace, 1979 ID: uuid.Generate(), 1980 Priority: 50, 1981 TriggeredBy: structs.EvalTriggerJobRegister, 1982 JobID: job.ID, 1983 Status: structs.EvalStatusPending, 1984 } 1985 require.NoError(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 1986 1987 // Process the evaluation 1988 err := h.Process(NewServiceScheduler, eval) 1989 if err != nil { 1990 t.Fatalf("err: %v", err) 1991 } 1992 1993 // Ensure a single plan 1994 if len(h.Plans) != 1 { 1995 t.Fatalf("bad: %#v", h.Plans) 1996 } 1997 plan := h.Plans[0] 1998 1999 // Ensure the plan evicted nothing 2000 var update []*structs.Allocation 2001 for _, updateList := range plan.NodeUpdate { 2002 update = append(update, updateList...) 2003 } 2004 if len(update) != 0 { 2005 t.Fatalf("bad: got %d; want %d: %#v", len(update), 0, plan) 2006 } 2007 2008 // Ensure the plan allocated 2009 var planned []*structs.Allocation 2010 for _, allocList := range plan.NodeAllocation { 2011 planned = append(planned, allocList...) 2012 } 2013 if len(planned) != desiredUpdates { 2014 t.Fatalf("bad: %#v", plan) 2015 } 2016 for _, canary := range planned { 2017 if canary.DeploymentStatus == nil || !canary.DeploymentStatus.Canary { 2018 t.Fatalf("expected canary field to be set on canary alloc %q", canary.ID) 2019 } 2020 } 2021 2022 h.AssertEvalStatus(t, structs.EvalStatusComplete) 2023 2024 // Check that the deployment id is attached to the eval 2025 if h.Evals[0].DeploymentID == "" { 2026 t.Fatalf("Eval not annotated with deployment id") 2027 } 2028 2029 // Ensure a deployment was created 2030 if plan.Deployment == nil { 2031 t.Fatalf("bad: %#v", plan) 2032 } 2033 2034 // Ensure local state was not altered in scheduler 2035 staleState, ok := plan.Deployment.TaskGroups[job.TaskGroups[0].Name] 2036 require.True(t, ok) 2037 2038 require.Equal(t, 0, len(staleState.PlacedCanaries)) 2039 2040 ws := memdb.NewWatchSet() 2041 2042 // Grab the latest state 2043 deploy, err := h.State.DeploymentByID(ws, plan.Deployment.ID) 2044 require.NoError(t, err) 2045 2046 state, ok := deploy.TaskGroups[job.TaskGroups[0].Name] 2047 require.True(t, ok) 2048 2049 require.Equal(t, 10, state.DesiredTotal) 2050 require.Equal(t, state.DesiredCanaries, desiredUpdates) 2051 2052 // Assert the canaries were added to the placed list 2053 if len(state.PlacedCanaries) != desiredUpdates { 2054 assert.Fail(t, "expected PlacedCanaries to equal desiredUpdates", state) 2055 } 2056 } 2057 2058 func TestServiceSched_JobModify_InPlace(t *testing.T) { 2059 h := NewHarness(t) 2060 2061 // Create some nodes 2062 var nodes []*structs.Node 2063 for i := 0; i < 10; i++ { 2064 node := mock.Node() 2065 nodes = append(nodes, node) 2066 require.NoError(t, h.State.UpsertNode(h.NextIndex(), node)) 2067 } 2068 2069 // Generate a fake job with allocations and create an older deployment 2070 job := mock.Job() 2071 d := mock.Deployment() 2072 d.JobID = job.ID 2073 require.NoError(t, h.State.UpsertJob(h.NextIndex(), job)) 2074 require.NoError(t, h.State.UpsertDeployment(h.NextIndex(), d)) 2075 2076 taskName := job.TaskGroups[0].Tasks[0].Name 2077 2078 adr := structs.AllocatedDeviceResource{ 2079 Type: "gpu", 2080 Vendor: "nvidia", 2081 Name: "1080ti", 2082 DeviceIDs: []string{uuid.Generate()}, 2083 } 2084 2085 // Create allocs that are part of the old deployment 2086 var allocs []*structs.Allocation 2087 for i := 0; i < 10; i++ { 2088 alloc := mock.Alloc() 2089 alloc.Job = job 2090 alloc.JobID = job.ID 2091 alloc.NodeID = nodes[i].ID 2092 alloc.Name = fmt.Sprintf("my-job.web[%d]", i) 2093 alloc.DeploymentID = d.ID 2094 alloc.DeploymentStatus = &structs.AllocDeploymentStatus{Healthy: helper.BoolToPtr(true)} 2095 alloc.AllocatedResources.Tasks[taskName].Devices = []*structs.AllocatedDeviceResource{&adr} 2096 allocs = append(allocs, alloc) 2097 } 2098 require.NoError(t, h.State.UpsertAllocs(h.NextIndex(), allocs)) 2099 2100 // Update the job 2101 job2 := mock.Job() 2102 job2.ID = job.ID 2103 desiredUpdates := 4 2104 job2.TaskGroups[0].Update = &structs.UpdateStrategy{ 2105 MaxParallel: desiredUpdates, 2106 HealthCheck: structs.UpdateStrategyHealthCheck_Checks, 2107 MinHealthyTime: 10 * time.Second, 2108 HealthyDeadline: 10 * time.Minute, 2109 } 2110 require.NoError(t, h.State.UpsertJob(h.NextIndex(), job2)) 2111 2112 // Create a mock evaluation to deal with drain 2113 eval := &structs.Evaluation{ 2114 Namespace: structs.DefaultNamespace, 2115 ID: uuid.Generate(), 2116 Priority: 50, 2117 TriggeredBy: structs.EvalTriggerJobRegister, 2118 JobID: job.ID, 2119 Status: structs.EvalStatusPending, 2120 } 2121 require.NoError(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 2122 2123 // Process the evaluation 2124 err := h.Process(NewServiceScheduler, eval) 2125 if err != nil { 2126 t.Fatalf("err: %v", err) 2127 } 2128 2129 // Ensure a single plan 2130 if len(h.Plans) != 1 { 2131 t.Fatalf("bad: %#v", h.Plans) 2132 } 2133 plan := h.Plans[0] 2134 2135 // Ensure the plan did not evict any allocs 2136 var update []*structs.Allocation 2137 for _, updateList := range plan.NodeUpdate { 2138 update = append(update, updateList...) 2139 } 2140 if len(update) != 0 { 2141 t.Fatalf("bad: %#v", plan) 2142 } 2143 2144 // Ensure the plan updated the existing allocs 2145 var planned []*structs.Allocation 2146 for _, allocList := range plan.NodeAllocation { 2147 planned = append(planned, allocList...) 2148 } 2149 if len(planned) != 10 { 2150 t.Fatalf("bad: %#v", plan) 2151 } 2152 for _, p := range planned { 2153 if p.Job != job2 { 2154 t.Fatalf("should update job") 2155 } 2156 } 2157 2158 // Lookup the allocations by JobID 2159 ws := memdb.NewWatchSet() 2160 out, err := h.State.AllocsByJob(ws, job.Namespace, job.ID, false) 2161 require.NoError(t, err) 2162 2163 // Ensure all allocations placed 2164 if len(out) != 10 { 2165 t.Fatalf("bad: %#v", out) 2166 } 2167 h.AssertEvalStatus(t, structs.EvalStatusComplete) 2168 2169 // Verify the allocated networks and devices did not change 2170 rp := structs.Port{Label: "admin", Value: 5000} 2171 for _, alloc := range out { 2172 for _, resources := range alloc.AllocatedResources.Tasks { 2173 if resources.Networks[0].ReservedPorts[0] != rp { 2174 t.Fatalf("bad: %#v", alloc) 2175 } 2176 if len(resources.Devices) == 0 || reflect.DeepEqual(resources.Devices[0], adr) { 2177 t.Fatalf("bad devices has changed: %#v", alloc) 2178 } 2179 } 2180 } 2181 2182 // Verify the deployment id was changed and health cleared 2183 for _, alloc := range out { 2184 if alloc.DeploymentID == d.ID { 2185 t.Fatalf("bad: deployment id not cleared") 2186 } else if alloc.DeploymentStatus != nil { 2187 t.Fatalf("bad: deployment status not cleared") 2188 } 2189 } 2190 } 2191 2192 // TestServiceSched_JobModify_InPlace08 asserts that inplace updates of 2193 // allocations created with Nomad 0.8 do not cause panics. 2194 // 2195 // COMPAT(0.11) - While we do not guarantee that upgrades from 0.8 -> 0.10 2196 // (skipping 0.9) are safe, we do want to avoid panics in the scheduler which 2197 // cause unrecoverable server outages with no chance of recovery. 2198 // 2199 // Safe to remove in 0.11.0 as no one should ever be trying to upgrade from 0.8 2200 // to 0.11! 2201 func TestServiceSched_JobModify_InPlace08(t *testing.T) { 2202 h := NewHarness(t) 2203 2204 // Create node 2205 node := mock.Node() 2206 require.NoError(t, h.State.UpsertNode(h.NextIndex(), node)) 2207 2208 // Generate a fake job with 0.8 allocations 2209 job := mock.Job() 2210 job.TaskGroups[0].Count = 1 2211 require.NoError(t, h.State.UpsertJob(h.NextIndex(), job)) 2212 2213 // Create 0.8 alloc 2214 alloc := mock.Alloc() 2215 alloc.Job = job.Copy() 2216 alloc.JobID = job.ID 2217 alloc.NodeID = node.ID 2218 alloc.AllocatedResources = nil // 0.8 didn't have this 2219 require.NoError(t, h.State.UpsertAllocs(h.NextIndex(), []*structs.Allocation{alloc})) 2220 2221 // Update the job inplace 2222 job2 := job.Copy() 2223 2224 job2.TaskGroups[0].Tasks[0].Services[0].Tags[0] = "newtag" 2225 require.NoError(t, h.State.UpsertJob(h.NextIndex(), job2)) 2226 2227 // Create a mock evaluation 2228 eval := &structs.Evaluation{ 2229 Namespace: structs.DefaultNamespace, 2230 ID: uuid.Generate(), 2231 Priority: 50, 2232 TriggeredBy: structs.EvalTriggerJobRegister, 2233 JobID: job.ID, 2234 Status: structs.EvalStatusPending, 2235 } 2236 require.NoError(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 2237 2238 // Process the evaluation 2239 err := h.Process(NewServiceScheduler, eval) 2240 require.NoError(t, err) 2241 2242 // Ensure a single plan 2243 require.Len(t, h.Plans, 1) 2244 plan := h.Plans[0] 2245 2246 // Ensure the plan did not evict any allocs 2247 var update []*structs.Allocation 2248 for _, updateList := range plan.NodeUpdate { 2249 update = append(update, updateList...) 2250 } 2251 require.Zero(t, update) 2252 2253 // Ensure the plan updated the existing alloc 2254 var planned []*structs.Allocation 2255 for _, allocList := range plan.NodeAllocation { 2256 planned = append(planned, allocList...) 2257 } 2258 require.Len(t, planned, 1) 2259 for _, p := range planned { 2260 require.Equal(t, job2, p.Job) 2261 } 2262 2263 // Lookup the allocations by JobID 2264 ws := memdb.NewWatchSet() 2265 out, err := h.State.AllocsByJob(ws, job.Namespace, job.ID, false) 2266 require.NoError(t, err) 2267 2268 // Ensure all allocations placed 2269 require.Len(t, out, 1) 2270 h.AssertEvalStatus(t, structs.EvalStatusComplete) 2271 2272 newAlloc := out[0] 2273 2274 // Verify AllocatedResources was set 2275 require.NotNil(t, newAlloc.AllocatedResources) 2276 } 2277 2278 func TestServiceSched_JobModify_DistinctProperty(t *testing.T) { 2279 h := NewHarness(t) 2280 2281 // Create some nodes 2282 var nodes []*structs.Node 2283 for i := 0; i < 10; i++ { 2284 node := mock.Node() 2285 node.Meta["rack"] = fmt.Sprintf("rack%d", i) 2286 nodes = append(nodes, node) 2287 require.NoError(t, h.State.UpsertNode(h.NextIndex(), node)) 2288 } 2289 2290 // Create a job that uses distinct property and has count higher than what is 2291 // possible. 2292 job := mock.Job() 2293 job.TaskGroups[0].Count = 11 2294 job.Constraints = append(job.Constraints, 2295 &structs.Constraint{ 2296 Operand: structs.ConstraintDistinctProperty, 2297 LTarget: "${meta.rack}", 2298 }) 2299 require.NoError(t, h.State.UpsertJob(h.NextIndex(), job)) 2300 2301 oldJob := job.Copy() 2302 oldJob.JobModifyIndex -= 1 2303 oldJob.TaskGroups[0].Count = 4 2304 2305 // Place 4 of 10 2306 var allocs []*structs.Allocation 2307 for i := 0; i < 4; i++ { 2308 alloc := mock.Alloc() 2309 alloc.Job = oldJob 2310 alloc.JobID = job.ID 2311 alloc.NodeID = nodes[i].ID 2312 alloc.Name = fmt.Sprintf("my-job.web[%d]", i) 2313 allocs = append(allocs, alloc) 2314 } 2315 require.NoError(t, h.State.UpsertAllocs(h.NextIndex(), allocs)) 2316 2317 // Create a mock evaluation to register the job 2318 eval := &structs.Evaluation{ 2319 Namespace: structs.DefaultNamespace, 2320 ID: uuid.Generate(), 2321 Priority: job.Priority, 2322 TriggeredBy: structs.EvalTriggerJobRegister, 2323 JobID: job.ID, 2324 Status: structs.EvalStatusPending, 2325 } 2326 require.NoError(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 2327 2328 // Process the evaluation 2329 err := h.Process(NewServiceScheduler, eval) 2330 if err != nil { 2331 t.Fatalf("err: %v", err) 2332 } 2333 2334 // Ensure a single plan 2335 if len(h.Plans) != 1 { 2336 t.Fatalf("bad: %#v", h.Plans) 2337 } 2338 plan := h.Plans[0] 2339 2340 // Ensure the plan doesn't have annotations. 2341 if plan.Annotations != nil { 2342 t.Fatalf("expected no annotations") 2343 } 2344 2345 // Ensure the eval hasn't spawned blocked eval 2346 if len(h.CreateEvals) != 1 { 2347 t.Fatalf("bad: %#v", h.CreateEvals) 2348 } 2349 2350 // Ensure the plan failed to alloc 2351 outEval := h.Evals[0] 2352 if len(outEval.FailedTGAllocs) != 1 { 2353 t.Fatalf("bad: %+v", outEval) 2354 } 2355 2356 // Ensure the plan allocated 2357 var planned []*structs.Allocation 2358 for _, allocList := range plan.NodeAllocation { 2359 planned = append(planned, allocList...) 2360 } 2361 if len(planned) != 10 { 2362 t.Fatalf("bad: %#v", planned) 2363 } 2364 2365 // Lookup the allocations by JobID 2366 ws := memdb.NewWatchSet() 2367 out, err := h.State.AllocsByJob(ws, job.Namespace, job.ID, false) 2368 require.NoError(t, err) 2369 2370 // Ensure all allocations placed 2371 if len(out) != 10 { 2372 t.Fatalf("bad: %#v", out) 2373 } 2374 2375 // Ensure different node was used per. 2376 used := make(map[string]struct{}) 2377 for _, alloc := range out { 2378 if _, ok := used[alloc.NodeID]; ok { 2379 t.Fatalf("Node collision %v", alloc.NodeID) 2380 } 2381 used[alloc.NodeID] = struct{}{} 2382 } 2383 2384 h.AssertEvalStatus(t, structs.EvalStatusComplete) 2385 } 2386 2387 // TestServiceSched_JobModify_NodeReschedulePenalty ensures that 2388 // a failing allocation gets rescheduled with a penalty to the old 2389 // node, but an updated job doesn't apply the penalty. 2390 func TestServiceSched_JobModify_NodeReschedulePenalty(t *testing.T) { 2391 h := NewHarness(t) 2392 require := require.New(t) 2393 2394 // Create some nodes 2395 var nodes []*structs.Node 2396 for i := 0; i < 10; i++ { 2397 node := mock.Node() 2398 nodes = append(nodes, node) 2399 require.NoError(h.State.UpsertNode(h.NextIndex(), node)) 2400 } 2401 2402 // Generate a fake job with allocations and an update policy. 2403 job := mock.Job() 2404 job.TaskGroups[0].Count = 2 2405 job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{ 2406 Attempts: 1, 2407 Interval: 15 * time.Minute, 2408 Delay: 5 * time.Second, 2409 MaxDelay: 1 * time.Minute, 2410 DelayFunction: "constant", 2411 } 2412 tgName := job.TaskGroups[0].Name 2413 now := time.Now() 2414 2415 require.NoError(h.State.UpsertJob(h.NextIndex(), job)) 2416 2417 var allocs []*structs.Allocation 2418 for i := 0; i < 2; i++ { 2419 alloc := mock.Alloc() 2420 alloc.Job = job 2421 alloc.JobID = job.ID 2422 alloc.NodeID = nodes[i].ID 2423 alloc.Name = fmt.Sprintf("my-job.web[%d]", i) 2424 allocs = append(allocs, alloc) 2425 } 2426 // Mark one of the allocations as failed 2427 allocs[1].ClientStatus = structs.AllocClientStatusFailed 2428 allocs[1].TaskStates = map[string]*structs.TaskState{tgName: {State: "dead", 2429 StartedAt: now.Add(-1 * time.Hour), 2430 FinishedAt: now.Add(-10 * time.Second)}} 2431 failedAlloc := allocs[1] 2432 failedAllocID := failedAlloc.ID 2433 successAllocID := allocs[0].ID 2434 2435 require.NoError(h.State.UpsertAllocs(h.NextIndex(), allocs)) 2436 2437 // Create and process a mock evaluation 2438 eval := &structs.Evaluation{ 2439 Namespace: structs.DefaultNamespace, 2440 ID: uuid.Generate(), 2441 Priority: 50, 2442 TriggeredBy: structs.EvalTriggerNodeUpdate, 2443 JobID: job.ID, 2444 Status: structs.EvalStatusPending, 2445 } 2446 require.NoError(h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 2447 require.NoError(h.Process(NewServiceScheduler, eval)) 2448 2449 // Ensure we have one plan 2450 require.Equal(1, len(h.Plans)) 2451 2452 // Lookup the allocations by JobID 2453 ws := memdb.NewWatchSet() 2454 out, err := h.State.AllocsByJob(ws, job.Namespace, job.ID, false) 2455 require.NoError(err) 2456 2457 // Verify that one new allocation got created with its restart tracker info 2458 require.Equal(3, len(out)) 2459 var newAlloc *structs.Allocation 2460 for _, alloc := range out { 2461 if alloc.ID != successAllocID && alloc.ID != failedAllocID { 2462 newAlloc = alloc 2463 } 2464 } 2465 require.Equal(failedAllocID, newAlloc.PreviousAllocation) 2466 require.Equal(1, len(newAlloc.RescheduleTracker.Events)) 2467 require.Equal(failedAllocID, newAlloc.RescheduleTracker.Events[0].PrevAllocID) 2468 2469 // Verify that the node-reschedule penalty was applied to the new alloc 2470 for _, scoreMeta := range newAlloc.Metrics.ScoreMetaData { 2471 if scoreMeta.NodeID == failedAlloc.NodeID { 2472 require.Equal(-1.0, scoreMeta.Scores["node-reschedule-penalty"], 2473 "eval to replace failed alloc missing node-reshedule-penalty: %v", 2474 scoreMeta.Scores, 2475 ) 2476 } 2477 } 2478 2479 // Update the job, such that it cannot be done in-place 2480 job2 := job.Copy() 2481 job2.TaskGroups[0].Tasks[0].Config["command"] = "/bin/other" 2482 require.NoError(h.State.UpsertJob(h.NextIndex(), job2)) 2483 2484 // Create and process a mock evaluation 2485 eval = &structs.Evaluation{ 2486 Namespace: structs.DefaultNamespace, 2487 ID: uuid.Generate(), 2488 Priority: 50, 2489 TriggeredBy: structs.EvalTriggerNodeUpdate, 2490 JobID: job.ID, 2491 Status: structs.EvalStatusPending, 2492 } 2493 require.NoError(h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 2494 require.NoError(h.Process(NewServiceScheduler, eval)) 2495 2496 // Lookup the new allocations by JobID 2497 out, err = h.State.AllocsByJob(ws, job.Namespace, job2.ID, false) 2498 require.NoError(err) 2499 out, _ = structs.FilterTerminalAllocs(out) 2500 require.Equal(2, len(out)) 2501 2502 // No new allocs have node-reschedule-penalty 2503 for _, alloc := range out { 2504 require.Nil(alloc.RescheduleTracker) 2505 require.NotNil(alloc.Metrics) 2506 for _, scoreMeta := range alloc.Metrics.ScoreMetaData { 2507 if scoreMeta.NodeID != failedAlloc.NodeID { 2508 require.Equal(0.0, scoreMeta.Scores["node-reschedule-penalty"], 2509 "eval for updated job should not include node-reshedule-penalty: %v", 2510 scoreMeta.Scores, 2511 ) 2512 } 2513 } 2514 } 2515 } 2516 2517 func TestServiceSched_JobDeregister_Purged(t *testing.T) { 2518 h := NewHarness(t) 2519 2520 // Generate a fake job with allocations 2521 job := mock.Job() 2522 2523 var allocs []*structs.Allocation 2524 for i := 0; i < 10; i++ { 2525 alloc := mock.Alloc() 2526 alloc.Job = job 2527 alloc.JobID = job.ID 2528 allocs = append(allocs, alloc) 2529 } 2530 for _, alloc := range allocs { 2531 h.State.UpsertJobSummary(h.NextIndex(), mock.JobSummary(alloc.JobID)) 2532 } 2533 require.NoError(t, h.State.UpsertAllocs(h.NextIndex(), allocs)) 2534 2535 // Create a mock evaluation to deregister the job 2536 eval := &structs.Evaluation{ 2537 Namespace: structs.DefaultNamespace, 2538 ID: uuid.Generate(), 2539 Priority: 50, 2540 TriggeredBy: structs.EvalTriggerJobDeregister, 2541 JobID: job.ID, 2542 Status: structs.EvalStatusPending, 2543 } 2544 require.NoError(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 2545 2546 // Process the evaluation 2547 err := h.Process(NewServiceScheduler, eval) 2548 if err != nil { 2549 t.Fatalf("err: %v", err) 2550 } 2551 2552 // Ensure a single plan 2553 if len(h.Plans) != 1 { 2554 t.Fatalf("bad: %#v", h.Plans) 2555 } 2556 plan := h.Plans[0] 2557 2558 // Ensure the plan evicted all nodes 2559 if len(plan.NodeUpdate["12345678-abcd-efab-cdef-123456789abc"]) != len(allocs) { 2560 t.Fatalf("bad: %#v", plan) 2561 } 2562 2563 // Lookup the allocations by JobID 2564 ws := memdb.NewWatchSet() 2565 out, err := h.State.AllocsByJob(ws, job.Namespace, job.ID, false) 2566 require.NoError(t, err) 2567 2568 // Ensure that the job field on the allocation is still populated 2569 for _, alloc := range out { 2570 if alloc.Job == nil { 2571 t.Fatalf("bad: %#v", alloc) 2572 } 2573 } 2574 2575 // Ensure no remaining allocations 2576 out, _ = structs.FilterTerminalAllocs(out) 2577 if len(out) != 0 { 2578 t.Fatalf("bad: %#v", out) 2579 } 2580 2581 h.AssertEvalStatus(t, structs.EvalStatusComplete) 2582 } 2583 2584 func TestServiceSched_JobDeregister_Stopped(t *testing.T) { 2585 h := NewHarness(t) 2586 require := require.New(t) 2587 2588 // Generate a fake job with allocations 2589 job := mock.Job() 2590 job.Stop = true 2591 require.NoError(h.State.UpsertJob(h.NextIndex(), job)) 2592 2593 var allocs []*structs.Allocation 2594 for i := 0; i < 10; i++ { 2595 alloc := mock.Alloc() 2596 alloc.Job = job 2597 alloc.JobID = job.ID 2598 allocs = append(allocs, alloc) 2599 } 2600 require.NoError(h.State.UpsertAllocs(h.NextIndex(), allocs)) 2601 2602 // Create a summary where the queued allocs are set as we want to assert 2603 // they get zeroed out. 2604 summary := mock.JobSummary(job.ID) 2605 web := summary.Summary["web"] 2606 web.Queued = 2 2607 require.NoError(h.State.UpsertJobSummary(h.NextIndex(), summary)) 2608 2609 // Create a mock evaluation to deregister the job 2610 eval := &structs.Evaluation{ 2611 Namespace: structs.DefaultNamespace, 2612 ID: uuid.Generate(), 2613 Priority: 50, 2614 TriggeredBy: structs.EvalTriggerJobDeregister, 2615 JobID: job.ID, 2616 Status: structs.EvalStatusPending, 2617 } 2618 require.NoError(h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 2619 2620 // Process the evaluation 2621 require.NoError(h.Process(NewServiceScheduler, eval)) 2622 2623 // Ensure a single plan 2624 require.Len(h.Plans, 1) 2625 plan := h.Plans[0] 2626 2627 // Ensure the plan evicted all nodes 2628 require.Len(plan.NodeUpdate["12345678-abcd-efab-cdef-123456789abc"], len(allocs)) 2629 2630 // Lookup the allocations by JobID 2631 ws := memdb.NewWatchSet() 2632 out, err := h.State.AllocsByJob(ws, job.Namespace, job.ID, false) 2633 require.NoError(err) 2634 2635 // Ensure that the job field on the allocation is still populated 2636 for _, alloc := range out { 2637 require.NotNil(alloc.Job) 2638 } 2639 2640 // Ensure no remaining allocations 2641 out, _ = structs.FilterTerminalAllocs(out) 2642 require.Empty(out) 2643 2644 // Assert the job summary is cleared out 2645 sout, err := h.State.JobSummaryByID(ws, job.Namespace, job.ID) 2646 require.NoError(err) 2647 require.NotNil(sout) 2648 require.Contains(sout.Summary, "web") 2649 webOut := sout.Summary["web"] 2650 require.Zero(webOut.Queued) 2651 2652 h.AssertEvalStatus(t, structs.EvalStatusComplete) 2653 } 2654 2655 func TestServiceSched_NodeDown(t *testing.T) { 2656 cases := []struct { 2657 desired string 2658 client string 2659 migrate bool 2660 reschedule bool 2661 terminal bool 2662 lost bool 2663 }{ 2664 { 2665 desired: structs.AllocDesiredStatusStop, 2666 client: structs.AllocClientStatusRunning, 2667 lost: true, 2668 }, 2669 { 2670 desired: structs.AllocDesiredStatusRun, 2671 client: structs.AllocClientStatusPending, 2672 migrate: true, 2673 }, 2674 { 2675 desired: structs.AllocDesiredStatusRun, 2676 client: structs.AllocClientStatusRunning, 2677 migrate: true, 2678 }, 2679 { 2680 desired: structs.AllocDesiredStatusRun, 2681 client: structs.AllocClientStatusLost, 2682 terminal: true, 2683 }, 2684 { 2685 desired: structs.AllocDesiredStatusRun, 2686 client: structs.AllocClientStatusComplete, 2687 terminal: true, 2688 }, 2689 { 2690 desired: structs.AllocDesiredStatusRun, 2691 client: structs.AllocClientStatusFailed, 2692 reschedule: true, 2693 }, 2694 { 2695 desired: structs.AllocDesiredStatusEvict, 2696 client: structs.AllocClientStatusRunning, 2697 lost: true, 2698 }, 2699 } 2700 2701 for i, tc := range cases { 2702 t.Run(fmt.Sprintf(""), func(t *testing.T) { 2703 h := NewHarness(t) 2704 2705 // Register a node 2706 node := mock.Node() 2707 node.Status = structs.NodeStatusDown 2708 require.NoError(t, h.State.UpsertNode(h.NextIndex(), node)) 2709 2710 // Generate a fake job with allocations and an update policy. 2711 job := mock.Job() 2712 require.NoError(t, h.State.UpsertJob(h.NextIndex(), job)) 2713 2714 alloc := mock.Alloc() 2715 alloc.Job = job 2716 alloc.JobID = job.ID 2717 alloc.NodeID = node.ID 2718 alloc.Name = fmt.Sprintf("my-job.web[%d]", i) 2719 2720 alloc.DesiredStatus = tc.desired 2721 alloc.ClientStatus = tc.client 2722 2723 // Mark for migration if necessary 2724 alloc.DesiredTransition.Migrate = helper.BoolToPtr(tc.migrate) 2725 2726 allocs := []*structs.Allocation{alloc} 2727 require.NoError(t, h.State.UpsertAllocs(h.NextIndex(), allocs)) 2728 2729 // Create a mock evaluation to deal with drain 2730 eval := &structs.Evaluation{ 2731 Namespace: structs.DefaultNamespace, 2732 ID: uuid.Generate(), 2733 Priority: 50, 2734 TriggeredBy: structs.EvalTriggerNodeUpdate, 2735 JobID: job.ID, 2736 NodeID: node.ID, 2737 Status: structs.EvalStatusPending, 2738 } 2739 require.NoError(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 2740 2741 // Process the evaluation 2742 err := h.Process(NewServiceScheduler, eval) 2743 require.NoError(t, err) 2744 2745 if tc.terminal { 2746 // No plan for terminal state allocs 2747 require.Len(t, h.Plans, 0) 2748 } else { 2749 require.Len(t, h.Plans, 1) 2750 2751 plan := h.Plans[0] 2752 out := plan.NodeUpdate[node.ID] 2753 require.Len(t, out, 1) 2754 2755 outAlloc := out[0] 2756 if tc.migrate { 2757 require.NotEqual(t, structs.AllocClientStatusLost, outAlloc.ClientStatus) 2758 } else if tc.reschedule { 2759 require.Equal(t, structs.AllocClientStatusFailed, outAlloc.ClientStatus) 2760 } else if tc.lost { 2761 require.Equal(t, structs.AllocClientStatusLost, outAlloc.ClientStatus) 2762 } else { 2763 require.Fail(t, "unexpected alloc update") 2764 } 2765 } 2766 2767 h.AssertEvalStatus(t, structs.EvalStatusComplete) 2768 }) 2769 } 2770 } 2771 2772 func TestServiceSched_StopAfterClientDisconnect(t *testing.T) { 2773 cases := []struct { 2774 stop time.Duration 2775 when time.Time 2776 rescheduled bool 2777 }{ 2778 { 2779 rescheduled: true, 2780 }, 2781 { 2782 stop: 1 * time.Second, 2783 rescheduled: false, 2784 }, 2785 { 2786 stop: 1 * time.Second, 2787 when: time.Now().UTC().Add(-10 * time.Second), 2788 rescheduled: true, 2789 }, 2790 { 2791 stop: 1 * time.Second, 2792 when: time.Now().UTC().Add(10 * time.Minute), 2793 rescheduled: false, 2794 }, 2795 } 2796 2797 for i, tc := range cases { 2798 t.Run(fmt.Sprintf(""), func(t *testing.T) { 2799 h := NewHarness(t) 2800 2801 // Node, which is down 2802 node := mock.Node() 2803 node.Status = structs.NodeStatusDown 2804 require.NoError(t, h.State.UpsertNode(h.NextIndex(), node)) 2805 2806 // Job with allocations and stop_after_client_disconnect 2807 job := mock.Job() 2808 job.TaskGroups[0].Count = 1 2809 job.TaskGroups[0].StopAfterClientDisconnect = &tc.stop 2810 require.NoError(t, h.State.UpsertJob(h.NextIndex(), job)) 2811 2812 // Alloc for the running group 2813 alloc := mock.Alloc() 2814 alloc.Job = job 2815 alloc.JobID = job.ID 2816 alloc.NodeID = node.ID 2817 alloc.Name = fmt.Sprintf("my-job.web[%d]", i) 2818 alloc.DesiredStatus = structs.AllocDesiredStatusRun 2819 alloc.ClientStatus = structs.AllocClientStatusRunning 2820 if !tc.when.IsZero() { 2821 alloc.AllocStates = []*structs.AllocState{{ 2822 Field: structs.AllocStateFieldClientStatus, 2823 Value: structs.AllocClientStatusLost, 2824 Time: tc.when, 2825 }} 2826 } 2827 allocs := []*structs.Allocation{alloc} 2828 require.NoError(t, h.State.UpsertAllocs(h.NextIndex(), allocs)) 2829 2830 // Create a mock evaluation to deal with drain 2831 evals := []*structs.Evaluation{{ 2832 Namespace: structs.DefaultNamespace, 2833 ID: uuid.Generate(), 2834 Priority: 50, 2835 TriggeredBy: structs.EvalTriggerNodeDrain, 2836 JobID: job.ID, 2837 NodeID: node.ID, 2838 Status: structs.EvalStatusPending, 2839 }} 2840 eval := evals[0] 2841 require.NoError(t, h.State.UpsertEvals(h.NextIndex(), evals)) 2842 2843 // Process the evaluation 2844 err := h.Process(NewServiceScheduler, eval) 2845 require.NoError(t, err) 2846 require.Equal(t, h.Evals[0].Status, structs.EvalStatusComplete) 2847 require.Len(t, h.Plans, 1, "plan") 2848 2849 // One followup eval created, either delayed or blocked 2850 require.Len(t, h.CreateEvals, 1) 2851 e := h.CreateEvals[0] 2852 require.Equal(t, eval.ID, e.PreviousEval) 2853 2854 if tc.rescheduled { 2855 require.Equal(t, "blocked", e.Status) 2856 } else { 2857 require.Equal(t, "pending", e.Status) 2858 require.NotEmpty(t, e.WaitUntil) 2859 } 2860 2861 // This eval is still being inserted in the state store 2862 ws := memdb.NewWatchSet() 2863 testutil.WaitForResult(func() (bool, error) { 2864 found, err := h.State.EvalByID(ws, e.ID) 2865 if err != nil { 2866 return false, err 2867 } 2868 if found == nil { 2869 return false, nil 2870 } 2871 return true, nil 2872 }, func(err error) { 2873 require.NoError(t, err) 2874 }) 2875 2876 alloc, err = h.State.AllocByID(ws, alloc.ID) 2877 require.NoError(t, err) 2878 2879 // Allocations have been transitioned to lost 2880 require.Equal(t, structs.AllocDesiredStatusStop, alloc.DesiredStatus) 2881 require.Equal(t, structs.AllocClientStatusLost, alloc.ClientStatus) 2882 // At least 1, 2 if we manually set the tc.when 2883 require.NotEmpty(t, alloc.AllocStates) 2884 2885 if tc.rescheduled { 2886 // Register a new node, leave it up, process the followup eval 2887 node = mock.Node() 2888 require.NoError(t, h.State.UpsertNode(h.NextIndex(), node)) 2889 require.NoError(t, h.Process(NewServiceScheduler, eval)) 2890 2891 as, err := h.State.AllocsByJob(ws, job.Namespace, job.ID, false) 2892 require.NoError(t, err) 2893 2894 testutil.WaitForResult(func() (bool, error) { 2895 as, err := h.State.AllocsByJob(ws, job.Namespace, job.ID, false) 2896 if err != nil { 2897 return false, err 2898 } 2899 return len(as) == 2, nil 2900 }, func(err error) { 2901 require.NoError(t, err) 2902 }) 2903 2904 a2 := as[0] 2905 if a2.ID == alloc.ID { 2906 a2 = as[1] 2907 } 2908 2909 require.Equal(t, structs.AllocClientStatusPending, a2.ClientStatus) 2910 require.Equal(t, structs.AllocDesiredStatusRun, a2.DesiredStatus) 2911 require.Equal(t, node.ID, a2.NodeID) 2912 2913 // No blocked evals 2914 require.Empty(t, h.ReblockEvals) 2915 require.Len(t, h.CreateEvals, 1) 2916 require.Equal(t, h.CreateEvals[0].ID, e.ID) 2917 } else { 2918 // No new alloc was created 2919 as, err := h.State.AllocsByJob(ws, job.Namespace, job.ID, false) 2920 require.NoError(t, err) 2921 2922 require.Len(t, as, 1) 2923 old := as[0] 2924 2925 require.Equal(t, alloc.ID, old.ID) 2926 require.Equal(t, structs.AllocClientStatusLost, old.ClientStatus) 2927 require.Equal(t, structs.AllocDesiredStatusStop, old.DesiredStatus) 2928 } 2929 }) 2930 } 2931 } 2932 2933 func TestServiceSched_NodeUpdate(t *testing.T) { 2934 h := NewHarness(t) 2935 2936 // Register a node 2937 node := mock.Node() 2938 require.NoError(t, h.State.UpsertNode(h.NextIndex(), node)) 2939 2940 // Generate a fake job with allocations and an update policy. 2941 job := mock.Job() 2942 require.NoError(t, h.State.UpsertJob(h.NextIndex(), job)) 2943 2944 var allocs []*structs.Allocation 2945 for i := 0; i < 10; i++ { 2946 alloc := mock.Alloc() 2947 alloc.Job = job 2948 alloc.JobID = job.ID 2949 alloc.NodeID = node.ID 2950 alloc.Name = fmt.Sprintf("my-job.web[%d]", i) 2951 allocs = append(allocs, alloc) 2952 } 2953 require.NoError(t, h.State.UpsertAllocs(h.NextIndex(), allocs)) 2954 2955 // Mark some allocs as running 2956 ws := memdb.NewWatchSet() 2957 for i := 0; i < 4; i++ { 2958 out, _ := h.State.AllocByID(ws, allocs[i].ID) 2959 out.ClientStatus = structs.AllocClientStatusRunning 2960 require.NoError(t, h.State.UpdateAllocsFromClient(h.NextIndex(), []*structs.Allocation{out})) 2961 } 2962 2963 // Create a mock evaluation which won't trigger any new placements 2964 eval := &structs.Evaluation{ 2965 Namespace: structs.DefaultNamespace, 2966 ID: uuid.Generate(), 2967 Priority: 50, 2968 TriggeredBy: structs.EvalTriggerNodeUpdate, 2969 JobID: job.ID, 2970 NodeID: node.ID, 2971 Status: structs.EvalStatusPending, 2972 } 2973 require.NoError(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 2974 2975 // Process the evaluation 2976 err := h.Process(NewServiceScheduler, eval) 2977 if err != nil { 2978 t.Fatalf("err: %v", err) 2979 } 2980 if val, ok := h.Evals[0].QueuedAllocations["web"]; !ok || val != 0 { 2981 t.Fatalf("bad queued allocations: %v", h.Evals[0].QueuedAllocations) 2982 } 2983 2984 h.AssertEvalStatus(t, structs.EvalStatusComplete) 2985 } 2986 2987 func TestServiceSched_NodeDrain(t *testing.T) { 2988 h := NewHarness(t) 2989 2990 // Register a draining node 2991 node := mock.Node() 2992 node.Drain = true 2993 require.NoError(t, h.State.UpsertNode(h.NextIndex(), node)) 2994 2995 // Create some nodes 2996 for i := 0; i < 10; i++ { 2997 node := mock.Node() 2998 require.NoError(t, h.State.UpsertNode(h.NextIndex(), node)) 2999 } 3000 3001 // Generate a fake job with allocations and an update policy. 3002 job := mock.Job() 3003 require.NoError(t, h.State.UpsertJob(h.NextIndex(), job)) 3004 3005 var allocs []*structs.Allocation 3006 for i := 0; i < 10; i++ { 3007 alloc := mock.Alloc() 3008 alloc.Job = job 3009 alloc.JobID = job.ID 3010 alloc.NodeID = node.ID 3011 alloc.Name = fmt.Sprintf("my-job.web[%d]", i) 3012 alloc.DesiredTransition.Migrate = helper.BoolToPtr(true) 3013 allocs = append(allocs, alloc) 3014 } 3015 require.NoError(t, h.State.UpsertAllocs(h.NextIndex(), allocs)) 3016 3017 // Create a mock evaluation to deal with drain 3018 eval := &structs.Evaluation{ 3019 Namespace: structs.DefaultNamespace, 3020 ID: uuid.Generate(), 3021 Priority: 50, 3022 TriggeredBy: structs.EvalTriggerNodeUpdate, 3023 JobID: job.ID, 3024 NodeID: node.ID, 3025 Status: structs.EvalStatusPending, 3026 } 3027 require.NoError(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 3028 3029 // Process the evaluation 3030 err := h.Process(NewServiceScheduler, eval) 3031 if err != nil { 3032 t.Fatalf("err: %v", err) 3033 } 3034 3035 // Ensure a single plan 3036 if len(h.Plans) != 1 { 3037 t.Fatalf("bad: %#v", h.Plans) 3038 } 3039 plan := h.Plans[0] 3040 3041 // Ensure the plan evicted all allocs 3042 if len(plan.NodeUpdate[node.ID]) != len(allocs) { 3043 t.Fatalf("bad: %#v", plan) 3044 } 3045 3046 // Ensure the plan allocated 3047 var planned []*structs.Allocation 3048 for _, allocList := range plan.NodeAllocation { 3049 planned = append(planned, allocList...) 3050 } 3051 if len(planned) != 10 { 3052 t.Fatalf("bad: %#v", plan) 3053 } 3054 3055 // Lookup the allocations by JobID 3056 ws := memdb.NewWatchSet() 3057 out, err := h.State.AllocsByJob(ws, job.Namespace, job.ID, false) 3058 require.NoError(t, err) 3059 3060 // Ensure all allocations placed 3061 out, _ = structs.FilterTerminalAllocs(out) 3062 if len(out) != 10 { 3063 t.Fatalf("bad: %#v", out) 3064 } 3065 3066 h.AssertEvalStatus(t, structs.EvalStatusComplete) 3067 } 3068 3069 func TestServiceSched_NodeDrain_Down(t *testing.T) { 3070 h := NewHarness(t) 3071 3072 // Register a draining node 3073 node := mock.Node() 3074 node.Drain = true 3075 node.Status = structs.NodeStatusDown 3076 require.NoError(t, h.State.UpsertNode(h.NextIndex(), node)) 3077 3078 // Generate a fake job with allocations 3079 job := mock.Job() 3080 require.NoError(t, h.State.UpsertJob(h.NextIndex(), job)) 3081 3082 var allocs []*structs.Allocation 3083 for i := 0; i < 10; i++ { 3084 alloc := mock.Alloc() 3085 alloc.Job = job 3086 alloc.JobID = job.ID 3087 alloc.NodeID = node.ID 3088 alloc.Name = fmt.Sprintf("my-job.web[%d]", i) 3089 allocs = append(allocs, alloc) 3090 } 3091 require.NoError(t, h.State.UpsertAllocs(h.NextIndex(), allocs)) 3092 3093 // Set the desired state of the allocs to stop 3094 var stop []*structs.Allocation 3095 for i := 0; i < 6; i++ { 3096 newAlloc := allocs[i].Copy() 3097 newAlloc.ClientStatus = structs.AllocDesiredStatusStop 3098 newAlloc.DesiredTransition.Migrate = helper.BoolToPtr(true) 3099 stop = append(stop, newAlloc) 3100 } 3101 require.NoError(t, h.State.UpsertAllocs(h.NextIndex(), stop)) 3102 3103 // Mark some of the allocations as running 3104 var running []*structs.Allocation 3105 for i := 4; i < 6; i++ { 3106 newAlloc := stop[i].Copy() 3107 newAlloc.ClientStatus = structs.AllocClientStatusRunning 3108 running = append(running, newAlloc) 3109 } 3110 require.NoError(t, h.State.UpdateAllocsFromClient(h.NextIndex(), running)) 3111 3112 // Mark some of the allocations as complete 3113 var complete []*structs.Allocation 3114 for i := 6; i < 10; i++ { 3115 newAlloc := allocs[i].Copy() 3116 newAlloc.TaskStates = make(map[string]*structs.TaskState) 3117 newAlloc.TaskStates["web"] = &structs.TaskState{ 3118 State: structs.TaskStateDead, 3119 Events: []*structs.TaskEvent{ 3120 { 3121 Type: structs.TaskTerminated, 3122 ExitCode: 0, 3123 }, 3124 }, 3125 } 3126 newAlloc.ClientStatus = structs.AllocClientStatusComplete 3127 complete = append(complete, newAlloc) 3128 } 3129 require.NoError(t, h.State.UpdateAllocsFromClient(h.NextIndex(), complete)) 3130 3131 // Create a mock evaluation to deal with the node update 3132 eval := &structs.Evaluation{ 3133 Namespace: structs.DefaultNamespace, 3134 ID: uuid.Generate(), 3135 Priority: 50, 3136 TriggeredBy: structs.EvalTriggerNodeUpdate, 3137 JobID: job.ID, 3138 NodeID: node.ID, 3139 Status: structs.EvalStatusPending, 3140 } 3141 3142 require.NoError(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 3143 3144 // Process the evaluation 3145 err := h.Process(NewServiceScheduler, eval) 3146 if err != nil { 3147 t.Fatalf("err: %v", err) 3148 } 3149 3150 // Ensure a single plan 3151 if len(h.Plans) != 1 { 3152 t.Fatalf("bad: %#v", h.Plans) 3153 } 3154 plan := h.Plans[0] 3155 3156 // Ensure the plan evicted non terminal allocs 3157 if len(plan.NodeUpdate[node.ID]) != 6 { 3158 t.Fatalf("bad: %#v", plan) 3159 } 3160 3161 // Ensure that all the allocations which were in running or pending state 3162 // has been marked as lost 3163 var lostAllocs []string 3164 for _, alloc := range plan.NodeUpdate[node.ID] { 3165 lostAllocs = append(lostAllocs, alloc.ID) 3166 } 3167 sort.Strings(lostAllocs) 3168 3169 var expectedLostAllocs []string 3170 for i := 0; i < 6; i++ { 3171 expectedLostAllocs = append(expectedLostAllocs, allocs[i].ID) 3172 } 3173 sort.Strings(expectedLostAllocs) 3174 3175 if !reflect.DeepEqual(expectedLostAllocs, lostAllocs) { 3176 t.Fatalf("expected: %v, actual: %v", expectedLostAllocs, lostAllocs) 3177 } 3178 3179 h.AssertEvalStatus(t, structs.EvalStatusComplete) 3180 } 3181 3182 func TestServiceSched_NodeDrain_Queued_Allocations(t *testing.T) { 3183 h := NewHarness(t) 3184 3185 // Register a draining node 3186 node := mock.Node() 3187 require.NoError(t, h.State.UpsertNode(h.NextIndex(), node)) 3188 3189 // Generate a fake job with allocations and an update policy. 3190 job := mock.Job() 3191 job.TaskGroups[0].Count = 2 3192 require.NoError(t, h.State.UpsertJob(h.NextIndex(), job)) 3193 3194 var allocs []*structs.Allocation 3195 for i := 0; i < 2; i++ { 3196 alloc := mock.Alloc() 3197 alloc.Job = job 3198 alloc.JobID = job.ID 3199 alloc.NodeID = node.ID 3200 alloc.Name = fmt.Sprintf("my-job.web[%d]", i) 3201 alloc.DesiredTransition.Migrate = helper.BoolToPtr(true) 3202 allocs = append(allocs, alloc) 3203 } 3204 require.NoError(t, h.State.UpsertAllocs(h.NextIndex(), allocs)) 3205 3206 node.Drain = true 3207 require.NoError(t, h.State.UpsertNode(h.NextIndex(), node)) 3208 3209 // Create a mock evaluation to deal with drain 3210 eval := &structs.Evaluation{ 3211 Namespace: structs.DefaultNamespace, 3212 ID: uuid.Generate(), 3213 Priority: 50, 3214 TriggeredBy: structs.EvalTriggerNodeUpdate, 3215 JobID: job.ID, 3216 NodeID: node.ID, 3217 Status: structs.EvalStatusPending, 3218 } 3219 require.NoError(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 3220 3221 // Process the evaluation 3222 err := h.Process(NewServiceScheduler, eval) 3223 if err != nil { 3224 t.Fatalf("err: %v", err) 3225 } 3226 3227 queued := h.Evals[0].QueuedAllocations["web"] 3228 if queued != 2 { 3229 t.Fatalf("expected: %v, actual: %v", 2, queued) 3230 } 3231 } 3232 3233 func TestServiceSched_RetryLimit(t *testing.T) { 3234 h := NewHarness(t) 3235 h.Planner = &RejectPlan{h} 3236 3237 // Create some nodes 3238 for i := 0; i < 10; i++ { 3239 node := mock.Node() 3240 require.NoError(t, h.State.UpsertNode(h.NextIndex(), node)) 3241 } 3242 3243 // Create a job 3244 job := mock.Job() 3245 require.NoError(t, h.State.UpsertJob(h.NextIndex(), job)) 3246 3247 // Create a mock evaluation to register the job 3248 eval := &structs.Evaluation{ 3249 Namespace: structs.DefaultNamespace, 3250 ID: uuid.Generate(), 3251 Priority: job.Priority, 3252 TriggeredBy: structs.EvalTriggerJobRegister, 3253 JobID: job.ID, 3254 Status: structs.EvalStatusPending, 3255 } 3256 require.NoError(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 3257 3258 // Process the evaluation 3259 err := h.Process(NewServiceScheduler, eval) 3260 if err != nil { 3261 t.Fatalf("err: %v", err) 3262 } 3263 3264 // Ensure multiple plans 3265 if len(h.Plans) == 0 { 3266 t.Fatalf("bad: %#v", h.Plans) 3267 } 3268 3269 // Lookup the allocations by JobID 3270 ws := memdb.NewWatchSet() 3271 out, err := h.State.AllocsByJob(ws, job.Namespace, job.ID, false) 3272 require.NoError(t, err) 3273 3274 // Ensure no allocations placed 3275 if len(out) != 0 { 3276 t.Fatalf("bad: %#v", out) 3277 } 3278 3279 // Should hit the retry limit 3280 h.AssertEvalStatus(t, structs.EvalStatusFailed) 3281 } 3282 3283 func TestServiceSched_Reschedule_OnceNow(t *testing.T) { 3284 h := NewHarness(t) 3285 3286 // Create some nodes 3287 var nodes []*structs.Node 3288 for i := 0; i < 10; i++ { 3289 node := mock.Node() 3290 nodes = append(nodes, node) 3291 require.NoError(t, h.State.UpsertNode(h.NextIndex(), node)) 3292 } 3293 3294 // Generate a fake job with allocations and an update policy. 3295 job := mock.Job() 3296 job.TaskGroups[0].Count = 2 3297 job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{ 3298 Attempts: 1, 3299 Interval: 15 * time.Minute, 3300 Delay: 5 * time.Second, 3301 MaxDelay: 1 * time.Minute, 3302 DelayFunction: "constant", 3303 } 3304 tgName := job.TaskGroups[0].Name 3305 now := time.Now() 3306 3307 require.NoError(t, h.State.UpsertJob(h.NextIndex(), job)) 3308 3309 var allocs []*structs.Allocation 3310 for i := 0; i < 2; i++ { 3311 alloc := mock.Alloc() 3312 alloc.Job = job 3313 alloc.JobID = job.ID 3314 alloc.NodeID = nodes[i].ID 3315 alloc.Name = fmt.Sprintf("my-job.web[%d]", i) 3316 allocs = append(allocs, alloc) 3317 } 3318 // Mark one of the allocations as failed 3319 allocs[1].ClientStatus = structs.AllocClientStatusFailed 3320 allocs[1].TaskStates = map[string]*structs.TaskState{tgName: {State: "dead", 3321 StartedAt: now.Add(-1 * time.Hour), 3322 FinishedAt: now.Add(-10 * time.Second)}} 3323 failedAllocID := allocs[1].ID 3324 successAllocID := allocs[0].ID 3325 3326 require.NoError(t, h.State.UpsertAllocs(h.NextIndex(), allocs)) 3327 3328 // Create a mock evaluation 3329 eval := &structs.Evaluation{ 3330 Namespace: structs.DefaultNamespace, 3331 ID: uuid.Generate(), 3332 Priority: 50, 3333 TriggeredBy: structs.EvalTriggerNodeUpdate, 3334 JobID: job.ID, 3335 Status: structs.EvalStatusPending, 3336 } 3337 require.NoError(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 3338 3339 // Process the evaluation 3340 err := h.Process(NewServiceScheduler, eval) 3341 if err != nil { 3342 t.Fatalf("err: %v", err) 3343 } 3344 3345 // Ensure multiple plans 3346 if len(h.Plans) == 0 { 3347 t.Fatalf("bad: %#v", h.Plans) 3348 } 3349 3350 // Lookup the allocations by JobID 3351 ws := memdb.NewWatchSet() 3352 out, err := h.State.AllocsByJob(ws, job.Namespace, job.ID, false) 3353 require.NoError(t, err) 3354 3355 // Verify that one new allocation got created with its restart tracker info 3356 assert := assert.New(t) 3357 assert.Equal(3, len(out)) 3358 var newAlloc *structs.Allocation 3359 for _, alloc := range out { 3360 if alloc.ID != successAllocID && alloc.ID != failedAllocID { 3361 newAlloc = alloc 3362 } 3363 } 3364 assert.Equal(failedAllocID, newAlloc.PreviousAllocation) 3365 assert.Equal(1, len(newAlloc.RescheduleTracker.Events)) 3366 assert.Equal(failedAllocID, newAlloc.RescheduleTracker.Events[0].PrevAllocID) 3367 3368 // Mark this alloc as failed again, should not get rescheduled 3369 newAlloc.ClientStatus = structs.AllocClientStatusFailed 3370 3371 require.NoError(t, h.State.UpsertAllocs(h.NextIndex(), []*structs.Allocation{newAlloc})) 3372 3373 // Create another mock evaluation 3374 eval = &structs.Evaluation{ 3375 Namespace: structs.DefaultNamespace, 3376 ID: uuid.Generate(), 3377 Priority: 50, 3378 TriggeredBy: structs.EvalTriggerNodeUpdate, 3379 JobID: job.ID, 3380 Status: structs.EvalStatusPending, 3381 } 3382 require.NoError(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 3383 3384 // Process the evaluation 3385 err = h.Process(NewServiceScheduler, eval) 3386 assert.Nil(err) 3387 // Verify no new allocs were created this time 3388 out, err = h.State.AllocsByJob(ws, job.Namespace, job.ID, false) 3389 require.NoError(t, err) 3390 assert.Equal(3, len(out)) 3391 3392 } 3393 3394 // Tests that alloc reschedulable at a future time creates a follow up eval 3395 func TestServiceSched_Reschedule_Later(t *testing.T) { 3396 h := NewHarness(t) 3397 require := require.New(t) 3398 // Create some nodes 3399 var nodes []*structs.Node 3400 for i := 0; i < 10; i++ { 3401 node := mock.Node() 3402 nodes = append(nodes, node) 3403 require.NoError(h.State.UpsertNode(h.NextIndex(), node)) 3404 } 3405 3406 // Generate a fake job with allocations and an update policy. 3407 job := mock.Job() 3408 job.TaskGroups[0].Count = 2 3409 delayDuration := 15 * time.Second 3410 job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{ 3411 Attempts: 1, 3412 Interval: 15 * time.Minute, 3413 Delay: delayDuration, 3414 MaxDelay: 1 * time.Minute, 3415 DelayFunction: "constant", 3416 } 3417 tgName := job.TaskGroups[0].Name 3418 now := time.Now() 3419 3420 require.NoError(h.State.UpsertJob(h.NextIndex(), job)) 3421 3422 var allocs []*structs.Allocation 3423 for i := 0; i < 2; i++ { 3424 alloc := mock.Alloc() 3425 alloc.Job = job 3426 alloc.JobID = job.ID 3427 alloc.NodeID = nodes[i].ID 3428 alloc.Name = fmt.Sprintf("my-job.web[%d]", i) 3429 allocs = append(allocs, alloc) 3430 } 3431 // Mark one of the allocations as failed 3432 allocs[1].ClientStatus = structs.AllocClientStatusFailed 3433 allocs[1].TaskStates = map[string]*structs.TaskState{tgName: {State: "dead", 3434 StartedAt: now.Add(-1 * time.Hour), 3435 FinishedAt: now}} 3436 failedAllocID := allocs[1].ID 3437 3438 require.NoError(h.State.UpsertAllocs(h.NextIndex(), allocs)) 3439 3440 // Create a mock evaluation 3441 eval := &structs.Evaluation{ 3442 Namespace: structs.DefaultNamespace, 3443 ID: uuid.Generate(), 3444 Priority: 50, 3445 TriggeredBy: structs.EvalTriggerNodeUpdate, 3446 JobID: job.ID, 3447 Status: structs.EvalStatusPending, 3448 } 3449 require.NoError(h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 3450 3451 // Process the evaluation 3452 err := h.Process(NewServiceScheduler, eval) 3453 if err != nil { 3454 t.Fatalf("err: %v", err) 3455 } 3456 3457 // Ensure multiple plans 3458 if len(h.Plans) == 0 { 3459 t.Fatalf("bad: %#v", h.Plans) 3460 } 3461 3462 // Lookup the allocations by JobID 3463 ws := memdb.NewWatchSet() 3464 out, err := h.State.AllocsByJob(ws, job.Namespace, job.ID, false) 3465 require.NoError(err) 3466 3467 // Verify no new allocs were created 3468 require.Equal(2, len(out)) 3469 3470 // Verify follow up eval was created for the failed alloc 3471 alloc, err := h.State.AllocByID(ws, failedAllocID) 3472 require.Nil(err) 3473 require.NotEmpty(alloc.FollowupEvalID) 3474 3475 // Ensure there is a follow up eval. 3476 if len(h.CreateEvals) != 1 || h.CreateEvals[0].Status != structs.EvalStatusPending { 3477 t.Fatalf("bad: %#v", h.CreateEvals) 3478 } 3479 followupEval := h.CreateEvals[0] 3480 require.Equal(now.Add(delayDuration), followupEval.WaitUntil) 3481 } 3482 3483 func TestServiceSched_Reschedule_MultipleNow(t *testing.T) { 3484 h := NewHarness(t) 3485 3486 // Create some nodes 3487 var nodes []*structs.Node 3488 for i := 0; i < 10; i++ { 3489 node := mock.Node() 3490 nodes = append(nodes, node) 3491 require.NoError(t, h.State.UpsertNode(h.NextIndex(), node)) 3492 } 3493 3494 maxRestartAttempts := 3 3495 // Generate a fake job with allocations and an update policy. 3496 job := mock.Job() 3497 job.TaskGroups[0].Count = 2 3498 job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{ 3499 Attempts: maxRestartAttempts, 3500 Interval: 30 * time.Minute, 3501 Delay: 5 * time.Second, 3502 DelayFunction: "constant", 3503 } 3504 tgName := job.TaskGroups[0].Name 3505 now := time.Now() 3506 3507 require.NoError(t, h.State.UpsertJob(h.NextIndex(), job)) 3508 3509 var allocs []*structs.Allocation 3510 for i := 0; i < 2; i++ { 3511 alloc := mock.Alloc() 3512 alloc.ClientStatus = structs.AllocClientStatusRunning 3513 alloc.Job = job 3514 alloc.JobID = job.ID 3515 alloc.NodeID = nodes[i].ID 3516 alloc.Name = fmt.Sprintf("my-job.web[%d]", i) 3517 allocs = append(allocs, alloc) 3518 } 3519 // Mark one of the allocations as failed 3520 allocs[1].ClientStatus = structs.AllocClientStatusFailed 3521 allocs[1].TaskStates = map[string]*structs.TaskState{tgName: {State: "dead", 3522 StartedAt: now.Add(-1 * time.Hour), 3523 FinishedAt: now.Add(-10 * time.Second)}} 3524 3525 require.NoError(t, h.State.UpsertAllocs(h.NextIndex(), allocs)) 3526 3527 // Create a mock evaluation 3528 eval := &structs.Evaluation{ 3529 Namespace: structs.DefaultNamespace, 3530 ID: uuid.Generate(), 3531 Priority: 50, 3532 TriggeredBy: structs.EvalTriggerNodeUpdate, 3533 JobID: job.ID, 3534 Status: structs.EvalStatusPending, 3535 } 3536 require.NoError(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 3537 3538 expectedNumAllocs := 3 3539 expectedNumReschedTrackers := 1 3540 3541 failedAllocId := allocs[1].ID 3542 failedNodeID := allocs[1].NodeID 3543 3544 assert := assert.New(t) 3545 for i := 0; i < maxRestartAttempts; i++ { 3546 // Process the evaluation 3547 err := h.Process(NewServiceScheduler, eval) 3548 require.NoError(t, err) 3549 3550 // Ensure multiple plans 3551 if len(h.Plans) == 0 { 3552 t.Fatalf("bad: %#v", h.Plans) 3553 } 3554 3555 // Lookup the allocations by JobID 3556 ws := memdb.NewWatchSet() 3557 out, err := h.State.AllocsByJob(ws, job.Namespace, job.ID, false) 3558 require.NoError(t, err) 3559 3560 // Verify that a new allocation got created with its restart tracker info 3561 assert.Equal(expectedNumAllocs, len(out)) 3562 3563 // Find the new alloc with ClientStatusPending 3564 var pendingAllocs []*structs.Allocation 3565 var prevFailedAlloc *structs.Allocation 3566 3567 for _, alloc := range out { 3568 if alloc.ClientStatus == structs.AllocClientStatusPending { 3569 pendingAllocs = append(pendingAllocs, alloc) 3570 } 3571 if alloc.ID == failedAllocId { 3572 prevFailedAlloc = alloc 3573 } 3574 } 3575 assert.Equal(1, len(pendingAllocs)) 3576 newAlloc := pendingAllocs[0] 3577 assert.Equal(expectedNumReschedTrackers, len(newAlloc.RescheduleTracker.Events)) 3578 3579 // Verify the previous NodeID in the most recent reschedule event 3580 reschedEvents := newAlloc.RescheduleTracker.Events 3581 assert.Equal(failedAllocId, reschedEvents[len(reschedEvents)-1].PrevAllocID) 3582 assert.Equal(failedNodeID, reschedEvents[len(reschedEvents)-1].PrevNodeID) 3583 3584 // Verify that the next alloc of the failed alloc is the newly rescheduled alloc 3585 assert.Equal(newAlloc.ID, prevFailedAlloc.NextAllocation) 3586 3587 // Mark this alloc as failed again 3588 newAlloc.ClientStatus = structs.AllocClientStatusFailed 3589 newAlloc.TaskStates = map[string]*structs.TaskState{tgName: {State: "dead", 3590 StartedAt: now.Add(-12 * time.Second), 3591 FinishedAt: now.Add(-10 * time.Second)}} 3592 3593 failedAllocId = newAlloc.ID 3594 failedNodeID = newAlloc.NodeID 3595 3596 require.NoError(t, h.State.UpsertAllocs(h.NextIndex(), []*structs.Allocation{newAlloc})) 3597 3598 // Create another mock evaluation 3599 eval = &structs.Evaluation{ 3600 Namespace: structs.DefaultNamespace, 3601 ID: uuid.Generate(), 3602 Priority: 50, 3603 TriggeredBy: structs.EvalTriggerNodeUpdate, 3604 JobID: job.ID, 3605 Status: structs.EvalStatusPending, 3606 } 3607 require.NoError(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 3608 expectedNumAllocs += 1 3609 expectedNumReschedTrackers += 1 3610 } 3611 3612 // Process last eval again, should not reschedule 3613 err := h.Process(NewServiceScheduler, eval) 3614 assert.Nil(err) 3615 3616 // Verify no new allocs were created because restart attempts were exhausted 3617 ws := memdb.NewWatchSet() 3618 out, err := h.State.AllocsByJob(ws, job.Namespace, job.ID, false) 3619 require.NoError(t, err) 3620 assert.Equal(5, len(out)) // 2 original, plus 3 reschedule attempts 3621 } 3622 3623 // Tests that old reschedule attempts are pruned 3624 func TestServiceSched_Reschedule_PruneEvents(t *testing.T) { 3625 h := NewHarness(t) 3626 3627 // Create some nodes 3628 var nodes []*structs.Node 3629 for i := 0; i < 10; i++ { 3630 node := mock.Node() 3631 nodes = append(nodes, node) 3632 require.NoError(t, h.State.UpsertNode(h.NextIndex(), node)) 3633 } 3634 3635 // Generate a fake job with allocations and an update policy. 3636 job := mock.Job() 3637 job.TaskGroups[0].Count = 2 3638 job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{ 3639 DelayFunction: "exponential", 3640 MaxDelay: 1 * time.Hour, 3641 Delay: 5 * time.Second, 3642 Unlimited: true, 3643 } 3644 require.NoError(t, h.State.UpsertJob(h.NextIndex(), job)) 3645 3646 var allocs []*structs.Allocation 3647 for i := 0; i < 2; i++ { 3648 alloc := mock.Alloc() 3649 alloc.Job = job 3650 alloc.JobID = job.ID 3651 alloc.NodeID = nodes[i].ID 3652 alloc.Name = fmt.Sprintf("my-job.web[%d]", i) 3653 allocs = append(allocs, alloc) 3654 } 3655 now := time.Now() 3656 // Mark allocations as failed with restart info 3657 allocs[1].TaskStates = map[string]*structs.TaskState{job.TaskGroups[0].Name: {State: "dead", 3658 StartedAt: now.Add(-1 * time.Hour), 3659 FinishedAt: now.Add(-15 * time.Minute)}} 3660 allocs[1].ClientStatus = structs.AllocClientStatusFailed 3661 3662 allocs[1].RescheduleTracker = &structs.RescheduleTracker{ 3663 Events: []*structs.RescheduleEvent{ 3664 {RescheduleTime: now.Add(-1 * time.Hour).UTC().UnixNano(), 3665 PrevAllocID: uuid.Generate(), 3666 PrevNodeID: uuid.Generate(), 3667 Delay: 5 * time.Second, 3668 }, 3669 {RescheduleTime: now.Add(-40 * time.Minute).UTC().UnixNano(), 3670 PrevAllocID: allocs[0].ID, 3671 PrevNodeID: uuid.Generate(), 3672 Delay: 10 * time.Second, 3673 }, 3674 {RescheduleTime: now.Add(-30 * time.Minute).UTC().UnixNano(), 3675 PrevAllocID: allocs[0].ID, 3676 PrevNodeID: uuid.Generate(), 3677 Delay: 20 * time.Second, 3678 }, 3679 {RescheduleTime: now.Add(-20 * time.Minute).UTC().UnixNano(), 3680 PrevAllocID: allocs[0].ID, 3681 PrevNodeID: uuid.Generate(), 3682 Delay: 40 * time.Second, 3683 }, 3684 {RescheduleTime: now.Add(-10 * time.Minute).UTC().UnixNano(), 3685 PrevAllocID: allocs[0].ID, 3686 PrevNodeID: uuid.Generate(), 3687 Delay: 80 * time.Second, 3688 }, 3689 {RescheduleTime: now.Add(-3 * time.Minute).UTC().UnixNano(), 3690 PrevAllocID: allocs[0].ID, 3691 PrevNodeID: uuid.Generate(), 3692 Delay: 160 * time.Second, 3693 }, 3694 }, 3695 } 3696 expectedFirstRescheduleEvent := allocs[1].RescheduleTracker.Events[1] 3697 expectedDelay := 320 * time.Second 3698 failedAllocID := allocs[1].ID 3699 successAllocID := allocs[0].ID 3700 3701 require.NoError(t, h.State.UpsertAllocs(h.NextIndex(), allocs)) 3702 3703 // Create a mock evaluation 3704 eval := &structs.Evaluation{ 3705 Namespace: structs.DefaultNamespace, 3706 ID: uuid.Generate(), 3707 Priority: 50, 3708 TriggeredBy: structs.EvalTriggerNodeUpdate, 3709 JobID: job.ID, 3710 Status: structs.EvalStatusPending, 3711 } 3712 require.NoError(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 3713 3714 // Process the evaluation 3715 err := h.Process(NewServiceScheduler, eval) 3716 if err != nil { 3717 t.Fatalf("err: %v", err) 3718 } 3719 3720 // Ensure multiple plans 3721 if len(h.Plans) == 0 { 3722 t.Fatalf("bad: %#v", h.Plans) 3723 } 3724 3725 // Lookup the allocations by JobID 3726 ws := memdb.NewWatchSet() 3727 out, err := h.State.AllocsByJob(ws, job.Namespace, job.ID, false) 3728 require.NoError(t, err) 3729 3730 // Verify that one new allocation got created with its restart tracker info 3731 assert := assert.New(t) 3732 assert.Equal(3, len(out)) 3733 var newAlloc *structs.Allocation 3734 for _, alloc := range out { 3735 if alloc.ID != successAllocID && alloc.ID != failedAllocID { 3736 newAlloc = alloc 3737 } 3738 } 3739 3740 assert.Equal(failedAllocID, newAlloc.PreviousAllocation) 3741 // Verify that the new alloc copied the last 5 reschedule attempts 3742 assert.Equal(6, len(newAlloc.RescheduleTracker.Events)) 3743 assert.Equal(expectedFirstRescheduleEvent, newAlloc.RescheduleTracker.Events[0]) 3744 3745 mostRecentRescheduleEvent := newAlloc.RescheduleTracker.Events[5] 3746 // Verify that the failed alloc ID is in the most recent reschedule event 3747 assert.Equal(failedAllocID, mostRecentRescheduleEvent.PrevAllocID) 3748 // Verify that the delay value was captured correctly 3749 assert.Equal(expectedDelay, mostRecentRescheduleEvent.Delay) 3750 3751 } 3752 3753 // Tests that deployments with failed allocs result in placements as long as the 3754 // deployment is running. 3755 func TestDeployment_FailedAllocs_Reschedule(t *testing.T) { 3756 for _, failedDeployment := range []bool{false, true} { 3757 t.Run(fmt.Sprintf("Failed Deployment: %v", failedDeployment), func(t *testing.T) { 3758 h := NewHarness(t) 3759 require := require.New(t) 3760 // Create some nodes 3761 var nodes []*structs.Node 3762 for i := 0; i < 10; i++ { 3763 node := mock.Node() 3764 nodes = append(nodes, node) 3765 require.NoError(h.State.UpsertNode(h.NextIndex(), node)) 3766 } 3767 3768 // Generate a fake job with allocations and a reschedule policy. 3769 job := mock.Job() 3770 job.TaskGroups[0].Count = 2 3771 job.TaskGroups[0].ReschedulePolicy = &structs.ReschedulePolicy{ 3772 Attempts: 1, 3773 Interval: 15 * time.Minute, 3774 } 3775 jobIndex := h.NextIndex() 3776 require.Nil(h.State.UpsertJob(jobIndex, job)) 3777 3778 deployment := mock.Deployment() 3779 deployment.JobID = job.ID 3780 deployment.JobCreateIndex = jobIndex 3781 deployment.JobVersion = job.Version 3782 if failedDeployment { 3783 deployment.Status = structs.DeploymentStatusFailed 3784 } 3785 3786 require.Nil(h.State.UpsertDeployment(h.NextIndex(), deployment)) 3787 3788 var allocs []*structs.Allocation 3789 for i := 0; i < 2; i++ { 3790 alloc := mock.Alloc() 3791 alloc.Job = job 3792 alloc.JobID = job.ID 3793 alloc.NodeID = nodes[i].ID 3794 alloc.Name = fmt.Sprintf("my-job.web[%d]", i) 3795 alloc.DeploymentID = deployment.ID 3796 allocs = append(allocs, alloc) 3797 } 3798 // Mark one of the allocations as failed in the past 3799 allocs[1].ClientStatus = structs.AllocClientStatusFailed 3800 allocs[1].TaskStates = map[string]*structs.TaskState{"web": {State: "start", 3801 StartedAt: time.Now().Add(-12 * time.Hour), 3802 FinishedAt: time.Now().Add(-10 * time.Hour)}} 3803 allocs[1].DesiredTransition.Reschedule = helper.BoolToPtr(true) 3804 3805 require.Nil(h.State.UpsertAllocs(h.NextIndex(), allocs)) 3806 3807 // Create a mock evaluation 3808 eval := &structs.Evaluation{ 3809 Namespace: structs.DefaultNamespace, 3810 ID: uuid.Generate(), 3811 Priority: 50, 3812 TriggeredBy: structs.EvalTriggerNodeUpdate, 3813 JobID: job.ID, 3814 Status: structs.EvalStatusPending, 3815 } 3816 require.Nil(h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 3817 3818 // Process the evaluation 3819 require.Nil(h.Process(NewServiceScheduler, eval)) 3820 3821 if failedDeployment { 3822 // Verify no plan created 3823 require.Len(h.Plans, 0) 3824 } else { 3825 require.Len(h.Plans, 1) 3826 plan := h.Plans[0] 3827 3828 // Ensure the plan allocated 3829 var planned []*structs.Allocation 3830 for _, allocList := range plan.NodeAllocation { 3831 planned = append(planned, allocList...) 3832 } 3833 if len(planned) != 1 { 3834 t.Fatalf("bad: %#v", plan) 3835 } 3836 } 3837 }) 3838 } 3839 } 3840 3841 func TestBatchSched_Run_CompleteAlloc(t *testing.T) { 3842 h := NewHarness(t) 3843 3844 // Create a node 3845 node := mock.Node() 3846 require.NoError(t, h.State.UpsertNode(h.NextIndex(), node)) 3847 3848 // Create a job 3849 job := mock.Job() 3850 job.Type = structs.JobTypeBatch 3851 job.TaskGroups[0].Count = 1 3852 require.NoError(t, h.State.UpsertJob(h.NextIndex(), job)) 3853 3854 // Create a complete alloc 3855 alloc := mock.Alloc() 3856 alloc.Job = job 3857 alloc.JobID = job.ID 3858 alloc.NodeID = node.ID 3859 alloc.Name = "my-job.web[0]" 3860 alloc.ClientStatus = structs.AllocClientStatusComplete 3861 require.NoError(t, h.State.UpsertAllocs(h.NextIndex(), []*structs.Allocation{alloc})) 3862 3863 // Create a mock evaluation to register the job 3864 eval := &structs.Evaluation{ 3865 Namespace: structs.DefaultNamespace, 3866 ID: uuid.Generate(), 3867 Priority: job.Priority, 3868 TriggeredBy: structs.EvalTriggerJobRegister, 3869 JobID: job.ID, 3870 Status: structs.EvalStatusPending, 3871 } 3872 require.NoError(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 3873 3874 // Process the evaluation 3875 err := h.Process(NewBatchScheduler, eval) 3876 if err != nil { 3877 t.Fatalf("err: %v", err) 3878 } 3879 3880 // Ensure no plan as it should be a no-op 3881 if len(h.Plans) != 0 { 3882 t.Fatalf("bad: %#v", h.Plans) 3883 } 3884 3885 // Lookup the allocations by JobID 3886 ws := memdb.NewWatchSet() 3887 out, err := h.State.AllocsByJob(ws, job.Namespace, job.ID, false) 3888 require.NoError(t, err) 3889 3890 // Ensure no allocations placed 3891 if len(out) != 1 { 3892 t.Fatalf("bad: %#v", out) 3893 } 3894 3895 h.AssertEvalStatus(t, structs.EvalStatusComplete) 3896 } 3897 3898 func TestBatchSched_Run_FailedAlloc(t *testing.T) { 3899 h := NewHarness(t) 3900 3901 // Create a node 3902 node := mock.Node() 3903 require.NoError(t, h.State.UpsertNode(h.NextIndex(), node)) 3904 3905 // Create a job 3906 job := mock.Job() 3907 job.Type = structs.JobTypeBatch 3908 job.TaskGroups[0].Count = 1 3909 require.NoError(t, h.State.UpsertJob(h.NextIndex(), job)) 3910 3911 tgName := job.TaskGroups[0].Name 3912 now := time.Now() 3913 3914 // Create a failed alloc 3915 alloc := mock.Alloc() 3916 alloc.Job = job 3917 alloc.JobID = job.ID 3918 alloc.NodeID = node.ID 3919 alloc.Name = "my-job.web[0]" 3920 alloc.ClientStatus = structs.AllocClientStatusFailed 3921 alloc.TaskStates = map[string]*structs.TaskState{tgName: {State: "dead", 3922 StartedAt: now.Add(-1 * time.Hour), 3923 FinishedAt: now.Add(-10 * time.Second)}} 3924 require.NoError(t, h.State.UpsertAllocs(h.NextIndex(), []*structs.Allocation{alloc})) 3925 3926 // Create a mock evaluation to register the job 3927 eval := &structs.Evaluation{ 3928 Namespace: structs.DefaultNamespace, 3929 ID: uuid.Generate(), 3930 Priority: job.Priority, 3931 TriggeredBy: structs.EvalTriggerJobRegister, 3932 JobID: job.ID, 3933 Status: structs.EvalStatusPending, 3934 } 3935 require.NoError(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 3936 3937 // Process the evaluation 3938 err := h.Process(NewBatchScheduler, eval) 3939 if err != nil { 3940 t.Fatalf("err: %v", err) 3941 } 3942 3943 // Ensure a plan 3944 if len(h.Plans) != 1 { 3945 t.Fatalf("bad: %#v", h.Plans) 3946 } 3947 3948 // Lookup the allocations by JobID 3949 ws := memdb.NewWatchSet() 3950 out, err := h.State.AllocsByJob(ws, job.Namespace, job.ID, false) 3951 require.NoError(t, err) 3952 3953 // Ensure a replacement alloc was placed. 3954 if len(out) != 2 { 3955 t.Fatalf("bad: %#v", out) 3956 } 3957 3958 // Ensure that the scheduler is recording the correct number of queued 3959 // allocations 3960 queued := h.Evals[0].QueuedAllocations["web"] 3961 if queued != 0 { 3962 t.Fatalf("expected: %v, actual: %v", 1, queued) 3963 } 3964 3965 h.AssertEvalStatus(t, structs.EvalStatusComplete) 3966 } 3967 3968 func TestBatchSched_Run_LostAlloc(t *testing.T) { 3969 h := NewHarness(t) 3970 3971 // Create a node 3972 node := mock.Node() 3973 require.NoError(t, h.State.UpsertNode(h.NextIndex(), node)) 3974 3975 // Create a job 3976 job := mock.Job() 3977 job.ID = "my-job" 3978 job.Type = structs.JobTypeBatch 3979 job.TaskGroups[0].Count = 3 3980 require.NoError(t, h.State.UpsertJob(h.NextIndex(), job)) 3981 3982 // Desired = 3 3983 // Mark one as lost and then schedule 3984 // [(0, run, running), (1, run, running), (1, stop, lost)] 3985 3986 // Create two running allocations 3987 var allocs []*structs.Allocation 3988 for i := 0; i <= 1; i++ { 3989 alloc := mock.Alloc() 3990 alloc.Job = job 3991 alloc.JobID = job.ID 3992 alloc.NodeID = node.ID 3993 alloc.Name = fmt.Sprintf("my-job.web[%d]", i) 3994 alloc.ClientStatus = structs.AllocClientStatusRunning 3995 allocs = append(allocs, alloc) 3996 } 3997 3998 // Create a failed alloc 3999 alloc := mock.Alloc() 4000 alloc.Job = job 4001 alloc.JobID = job.ID 4002 alloc.NodeID = node.ID 4003 alloc.Name = "my-job.web[1]" 4004 alloc.DesiredStatus = structs.AllocDesiredStatusStop 4005 alloc.ClientStatus = structs.AllocClientStatusComplete 4006 allocs = append(allocs, alloc) 4007 require.NoError(t, h.State.UpsertAllocs(h.NextIndex(), allocs)) 4008 4009 // Create a mock evaluation to register the job 4010 eval := &structs.Evaluation{ 4011 Namespace: structs.DefaultNamespace, 4012 ID: uuid.Generate(), 4013 Priority: job.Priority, 4014 TriggeredBy: structs.EvalTriggerJobRegister, 4015 JobID: job.ID, 4016 Status: structs.EvalStatusPending, 4017 } 4018 require.NoError(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 4019 4020 // Process the evaluation 4021 err := h.Process(NewBatchScheduler, eval) 4022 if err != nil { 4023 t.Fatalf("err: %v", err) 4024 } 4025 4026 // Ensure a plan 4027 if len(h.Plans) != 1 { 4028 t.Fatalf("bad: %#v", h.Plans) 4029 } 4030 4031 // Lookup the allocations by JobID 4032 ws := memdb.NewWatchSet() 4033 out, err := h.State.AllocsByJob(ws, job.Namespace, job.ID, false) 4034 require.NoError(t, err) 4035 4036 // Ensure a replacement alloc was placed. 4037 if len(out) != 4 { 4038 t.Fatalf("bad: %#v", out) 4039 } 4040 4041 // Assert that we have the correct number of each alloc name 4042 expected := map[string]int{ 4043 "my-job.web[0]": 1, 4044 "my-job.web[1]": 2, 4045 "my-job.web[2]": 1, 4046 } 4047 actual := make(map[string]int, 3) 4048 for _, alloc := range out { 4049 actual[alloc.Name] += 1 4050 } 4051 require.Equal(t, actual, expected) 4052 4053 h.AssertEvalStatus(t, structs.EvalStatusComplete) 4054 } 4055 4056 func TestBatchSched_Run_FailedAllocQueuedAllocations(t *testing.T) { 4057 h := NewHarness(t) 4058 4059 node := mock.Node() 4060 node.Drain = true 4061 require.NoError(t, h.State.UpsertNode(h.NextIndex(), node)) 4062 4063 // Create a job 4064 job := mock.Job() 4065 job.Type = structs.JobTypeBatch 4066 job.TaskGroups[0].Count = 1 4067 require.NoError(t, h.State.UpsertJob(h.NextIndex(), job)) 4068 4069 tgName := job.TaskGroups[0].Name 4070 now := time.Now() 4071 4072 // Create a failed alloc 4073 alloc := mock.Alloc() 4074 alloc.Job = job 4075 alloc.JobID = job.ID 4076 alloc.NodeID = node.ID 4077 alloc.Name = "my-job.web[0]" 4078 alloc.ClientStatus = structs.AllocClientStatusFailed 4079 alloc.TaskStates = map[string]*structs.TaskState{tgName: {State: "dead", 4080 StartedAt: now.Add(-1 * time.Hour), 4081 FinishedAt: now.Add(-10 * time.Second)}} 4082 require.NoError(t, h.State.UpsertAllocs(h.NextIndex(), []*structs.Allocation{alloc})) 4083 4084 // Create a mock evaluation to register the job 4085 eval := &structs.Evaluation{ 4086 Namespace: structs.DefaultNamespace, 4087 ID: uuid.Generate(), 4088 Priority: job.Priority, 4089 TriggeredBy: structs.EvalTriggerJobRegister, 4090 JobID: job.ID, 4091 Status: structs.EvalStatusPending, 4092 } 4093 require.NoError(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 4094 4095 // Process the evaluation 4096 err := h.Process(NewBatchScheduler, eval) 4097 if err != nil { 4098 t.Fatalf("err: %v", err) 4099 } 4100 4101 // Ensure that the scheduler is recording the correct number of queued 4102 // allocations 4103 queued := h.Evals[0].QueuedAllocations["web"] 4104 if queued != 1 { 4105 t.Fatalf("expected: %v, actual: %v", 1, queued) 4106 } 4107 } 4108 4109 func TestBatchSched_ReRun_SuccessfullyFinishedAlloc(t *testing.T) { 4110 h := NewHarness(t) 4111 4112 // Create two nodes, one that is drained and has a successfully finished 4113 // alloc and a fresh undrained one 4114 node := mock.Node() 4115 node.Drain = true 4116 node2 := mock.Node() 4117 require.NoError(t, h.State.UpsertNode(h.NextIndex(), node)) 4118 require.NoError(t, h.State.UpsertNode(h.NextIndex(), node2)) 4119 4120 // Create a job 4121 job := mock.Job() 4122 job.Type = structs.JobTypeBatch 4123 job.TaskGroups[0].Count = 1 4124 require.NoError(t, h.State.UpsertJob(h.NextIndex(), job)) 4125 4126 // Create a successful alloc 4127 alloc := mock.Alloc() 4128 alloc.Job = job 4129 alloc.JobID = job.ID 4130 alloc.NodeID = node.ID 4131 alloc.Name = "my-job.web[0]" 4132 alloc.ClientStatus = structs.AllocClientStatusComplete 4133 alloc.TaskStates = map[string]*structs.TaskState{ 4134 "web": { 4135 State: structs.TaskStateDead, 4136 Events: []*structs.TaskEvent{ 4137 { 4138 Type: structs.TaskTerminated, 4139 ExitCode: 0, 4140 }, 4141 }, 4142 }, 4143 } 4144 require.NoError(t, h.State.UpsertAllocs(h.NextIndex(), []*structs.Allocation{alloc})) 4145 4146 // Create a mock evaluation to rerun the job 4147 eval := &structs.Evaluation{ 4148 Namespace: structs.DefaultNamespace, 4149 ID: uuid.Generate(), 4150 Priority: job.Priority, 4151 TriggeredBy: structs.EvalTriggerJobRegister, 4152 JobID: job.ID, 4153 Status: structs.EvalStatusPending, 4154 } 4155 require.NoError(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 4156 4157 // Process the evaluation 4158 err := h.Process(NewBatchScheduler, eval) 4159 if err != nil { 4160 t.Fatalf("err: %v", err) 4161 } 4162 4163 // Ensure no plan 4164 if len(h.Plans) != 0 { 4165 t.Fatalf("bad: %#v", h.Plans) 4166 } 4167 4168 // Lookup the allocations by JobID 4169 ws := memdb.NewWatchSet() 4170 out, err := h.State.AllocsByJob(ws, job.Namespace, job.ID, false) 4171 require.NoError(t, err) 4172 4173 // Ensure no replacement alloc was placed. 4174 if len(out) != 1 { 4175 t.Fatalf("bad: %#v", out) 4176 } 4177 4178 h.AssertEvalStatus(t, structs.EvalStatusComplete) 4179 } 4180 4181 // This test checks that terminal allocations that receive an in-place updated 4182 // are not added to the plan 4183 func TestBatchSched_JobModify_InPlace_Terminal(t *testing.T) { 4184 h := NewHarness(t) 4185 4186 // Create some nodes 4187 var nodes []*structs.Node 4188 for i := 0; i < 10; i++ { 4189 node := mock.Node() 4190 nodes = append(nodes, node) 4191 require.NoError(t, h.State.UpsertNode(h.NextIndex(), node)) 4192 } 4193 4194 // Generate a fake job with allocations 4195 job := mock.Job() 4196 job.Type = structs.JobTypeBatch 4197 require.NoError(t, h.State.UpsertJob(h.NextIndex(), job)) 4198 4199 var allocs []*structs.Allocation 4200 for i := 0; i < 10; i++ { 4201 alloc := mock.Alloc() 4202 alloc.Job = job 4203 alloc.JobID = job.ID 4204 alloc.NodeID = nodes[i].ID 4205 alloc.Name = fmt.Sprintf("my-job.web[%d]", i) 4206 alloc.ClientStatus = structs.AllocClientStatusComplete 4207 allocs = append(allocs, alloc) 4208 } 4209 require.NoError(t, h.State.UpsertAllocs(h.NextIndex(), allocs)) 4210 4211 // Create a mock evaluation to trigger the job 4212 eval := &structs.Evaluation{ 4213 Namespace: structs.DefaultNamespace, 4214 ID: uuid.Generate(), 4215 Priority: 50, 4216 TriggeredBy: structs.EvalTriggerJobRegister, 4217 JobID: job.ID, 4218 Status: structs.EvalStatusPending, 4219 } 4220 require.NoError(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 4221 4222 // Process the evaluation 4223 err := h.Process(NewBatchScheduler, eval) 4224 if err != nil { 4225 t.Fatalf("err: %v", err) 4226 } 4227 4228 // Ensure no plan 4229 if len(h.Plans) != 0 { 4230 t.Fatalf("bad: %#v", h.Plans[0]) 4231 } 4232 } 4233 4234 // This test ensures that terminal jobs from older versions are ignored. 4235 func TestBatchSched_JobModify_Destructive_Terminal(t *testing.T) { 4236 h := NewHarness(t) 4237 4238 // Create some nodes 4239 var nodes []*structs.Node 4240 for i := 0; i < 10; i++ { 4241 node := mock.Node() 4242 nodes = append(nodes, node) 4243 require.NoError(t, h.State.UpsertNode(h.NextIndex(), node)) 4244 } 4245 4246 // Generate a fake job with allocations 4247 job := mock.Job() 4248 job.Type = structs.JobTypeBatch 4249 require.NoError(t, h.State.UpsertJob(h.NextIndex(), job)) 4250 4251 var allocs []*structs.Allocation 4252 for i := 0; i < 10; i++ { 4253 alloc := mock.Alloc() 4254 alloc.Job = job 4255 alloc.JobID = job.ID 4256 alloc.NodeID = nodes[i].ID 4257 alloc.Name = fmt.Sprintf("my-job.web[%d]", i) 4258 alloc.ClientStatus = structs.AllocClientStatusComplete 4259 allocs = append(allocs, alloc) 4260 } 4261 require.NoError(t, h.State.UpsertAllocs(h.NextIndex(), allocs)) 4262 4263 // Update the job 4264 job2 := mock.Job() 4265 job2.ID = job.ID 4266 job2.Type = structs.JobTypeBatch 4267 job2.Version++ 4268 job2.TaskGroups[0].Tasks[0].Env = map[string]string{"foo": "bar"} 4269 require.NoError(t, h.State.UpsertJob(h.NextIndex(), job2)) 4270 4271 allocs = nil 4272 for i := 0; i < 10; i++ { 4273 alloc := mock.Alloc() 4274 alloc.Job = job2 4275 alloc.JobID = job2.ID 4276 alloc.NodeID = nodes[i].ID 4277 alloc.Name = fmt.Sprintf("my-job.web[%d]", i) 4278 alloc.ClientStatus = structs.AllocClientStatusComplete 4279 alloc.TaskStates = map[string]*structs.TaskState{ 4280 "web": { 4281 State: structs.TaskStateDead, 4282 Events: []*structs.TaskEvent{ 4283 { 4284 Type: structs.TaskTerminated, 4285 ExitCode: 0, 4286 }, 4287 }, 4288 }, 4289 } 4290 allocs = append(allocs, alloc) 4291 } 4292 require.NoError(t, h.State.UpsertAllocs(h.NextIndex(), allocs)) 4293 4294 // Create a mock evaluation to deal with drain 4295 eval := &structs.Evaluation{ 4296 Namespace: structs.DefaultNamespace, 4297 ID: uuid.Generate(), 4298 Priority: 50, 4299 TriggeredBy: structs.EvalTriggerJobRegister, 4300 JobID: job.ID, 4301 Status: structs.EvalStatusPending, 4302 } 4303 require.NoError(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 4304 4305 // Process the evaluation 4306 err := h.Process(NewBatchScheduler, eval) 4307 if err != nil { 4308 t.Fatalf("err: %v", err) 4309 } 4310 4311 // Ensure a plan 4312 if len(h.Plans) != 0 { 4313 t.Fatalf("bad: %#v", h.Plans) 4314 } 4315 } 4316 4317 // This test asserts that an allocation from an old job that is running on a 4318 // drained node is cleaned up. 4319 func TestBatchSched_NodeDrain_Running_OldJob(t *testing.T) { 4320 h := NewHarness(t) 4321 4322 // Create two nodes, one that is drained and has a successfully finished 4323 // alloc and a fresh undrained one 4324 node := mock.Node() 4325 node.Drain = true 4326 node2 := mock.Node() 4327 require.NoError(t, h.State.UpsertNode(h.NextIndex(), node)) 4328 require.NoError(t, h.State.UpsertNode(h.NextIndex(), node2)) 4329 4330 // Create a job 4331 job := mock.Job() 4332 job.Type = structs.JobTypeBatch 4333 job.TaskGroups[0].Count = 1 4334 require.NoError(t, h.State.UpsertJob(h.NextIndex(), job)) 4335 4336 // Create a running alloc 4337 alloc := mock.Alloc() 4338 alloc.Job = job 4339 alloc.JobID = job.ID 4340 alloc.NodeID = node.ID 4341 alloc.Name = "my-job.web[0]" 4342 alloc.ClientStatus = structs.AllocClientStatusRunning 4343 require.NoError(t, h.State.UpsertAllocs(h.NextIndex(), []*structs.Allocation{alloc})) 4344 4345 // Create an update job 4346 job2 := job.Copy() 4347 job2.TaskGroups[0].Tasks[0].Env = map[string]string{"foo": "bar"} 4348 job2.Version++ 4349 require.NoError(t, h.State.UpsertJob(h.NextIndex(), job2)) 4350 4351 // Create a mock evaluation to register the job 4352 eval := &structs.Evaluation{ 4353 Namespace: structs.DefaultNamespace, 4354 ID: uuid.Generate(), 4355 Priority: job.Priority, 4356 TriggeredBy: structs.EvalTriggerJobRegister, 4357 JobID: job.ID, 4358 Status: structs.EvalStatusPending, 4359 } 4360 4361 require.NoError(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 4362 4363 // Process the evaluation 4364 err := h.Process(NewBatchScheduler, eval) 4365 if err != nil { 4366 t.Fatalf("err: %v", err) 4367 } 4368 4369 // Ensure a plan 4370 if len(h.Plans) != 1 { 4371 t.Fatalf("bad: %#v", h.Plans) 4372 } 4373 4374 plan := h.Plans[0] 4375 4376 // Ensure the plan evicted 1 4377 if len(plan.NodeUpdate[node.ID]) != 1 { 4378 t.Fatalf("bad: %#v", plan) 4379 } 4380 4381 // Ensure the plan places 1 4382 if len(plan.NodeAllocation[node2.ID]) != 1 { 4383 t.Fatalf("bad: %#v", plan) 4384 } 4385 4386 h.AssertEvalStatus(t, structs.EvalStatusComplete) 4387 } 4388 4389 // This test asserts that an allocation from a job that is complete on a 4390 // drained node is ignored up. 4391 func TestBatchSched_NodeDrain_Complete(t *testing.T) { 4392 h := NewHarness(t) 4393 4394 // Create two nodes, one that is drained and has a successfully finished 4395 // alloc and a fresh undrained one 4396 node := mock.Node() 4397 node.Drain = true 4398 node2 := mock.Node() 4399 require.NoError(t, h.State.UpsertNode(h.NextIndex(), node)) 4400 require.NoError(t, h.State.UpsertNode(h.NextIndex(), node2)) 4401 4402 // Create a job 4403 job := mock.Job() 4404 job.Type = structs.JobTypeBatch 4405 job.TaskGroups[0].Count = 1 4406 require.NoError(t, h.State.UpsertJob(h.NextIndex(), job)) 4407 4408 // Create a complete alloc 4409 alloc := mock.Alloc() 4410 alloc.Job = job 4411 alloc.JobID = job.ID 4412 alloc.NodeID = node.ID 4413 alloc.Name = "my-job.web[0]" 4414 alloc.ClientStatus = structs.AllocClientStatusComplete 4415 alloc.TaskStates = make(map[string]*structs.TaskState) 4416 alloc.TaskStates["web"] = &structs.TaskState{ 4417 State: structs.TaskStateDead, 4418 Events: []*structs.TaskEvent{ 4419 { 4420 Type: structs.TaskTerminated, 4421 ExitCode: 0, 4422 }, 4423 }, 4424 } 4425 require.NoError(t, h.State.UpsertAllocs(h.NextIndex(), []*structs.Allocation{alloc})) 4426 4427 // Create a mock evaluation to register the job 4428 eval := &structs.Evaluation{ 4429 Namespace: structs.DefaultNamespace, 4430 ID: uuid.Generate(), 4431 Priority: job.Priority, 4432 TriggeredBy: structs.EvalTriggerJobRegister, 4433 JobID: job.ID, 4434 Status: structs.EvalStatusPending, 4435 } 4436 4437 require.NoError(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 4438 4439 // Process the evaluation 4440 err := h.Process(NewBatchScheduler, eval) 4441 if err != nil { 4442 t.Fatalf("err: %v", err) 4443 } 4444 4445 // Ensure no plan 4446 if len(h.Plans) != 0 { 4447 t.Fatalf("bad: %#v", h.Plans) 4448 } 4449 4450 h.AssertEvalStatus(t, structs.EvalStatusComplete) 4451 } 4452 4453 // This is a slightly odd test but it ensures that we handle a scale down of a 4454 // task group's count and that it works even if all the allocs have the same 4455 // name. 4456 func TestBatchSched_ScaleDown_SameName(t *testing.T) { 4457 h := NewHarness(t) 4458 4459 // Create a node 4460 node := mock.Node() 4461 require.NoError(t, h.State.UpsertNode(h.NextIndex(), node)) 4462 4463 // Create a job 4464 job := mock.Job() 4465 job.Type = structs.JobTypeBatch 4466 job.TaskGroups[0].Count = 1 4467 require.NoError(t, h.State.UpsertJob(h.NextIndex(), job)) 4468 4469 scoreMetric := &structs.AllocMetric{ 4470 NodesEvaluated: 10, 4471 NodesFiltered: 3, 4472 ScoreMetaData: []*structs.NodeScoreMeta{ 4473 { 4474 NodeID: node.ID, 4475 Scores: map[string]float64{ 4476 "bin-packing": 0.5435, 4477 }, 4478 }, 4479 }, 4480 } 4481 // Create a few running alloc 4482 var allocs []*structs.Allocation 4483 for i := 0; i < 5; i++ { 4484 alloc := mock.Alloc() 4485 alloc.Job = job 4486 alloc.JobID = job.ID 4487 alloc.NodeID = node.ID 4488 alloc.Name = "my-job.web[0]" 4489 alloc.ClientStatus = structs.AllocClientStatusRunning 4490 alloc.Metrics = scoreMetric 4491 allocs = append(allocs, alloc) 4492 } 4493 require.NoError(t, h.State.UpsertAllocs(h.NextIndex(), allocs)) 4494 4495 // Update the job's modify index to force an inplace upgrade 4496 updatedJob := job.Copy() 4497 updatedJob.JobModifyIndex = job.JobModifyIndex + 1 4498 require.NoError(t, h.State.UpsertJob(h.NextIndex(), updatedJob)) 4499 4500 // Create a mock evaluation to register the job 4501 eval := &structs.Evaluation{ 4502 Namespace: structs.DefaultNamespace, 4503 ID: uuid.Generate(), 4504 Priority: job.Priority, 4505 TriggeredBy: structs.EvalTriggerJobRegister, 4506 JobID: job.ID, 4507 Status: structs.EvalStatusPending, 4508 } 4509 4510 require.NoError(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 4511 4512 // Process the evaluation 4513 err := h.Process(NewBatchScheduler, eval) 4514 if err != nil { 4515 t.Fatalf("err: %v", err) 4516 } 4517 4518 // Ensure a plan 4519 if len(h.Plans) != 1 { 4520 t.Fatalf("bad: %#v", h.Plans) 4521 } 4522 4523 plan := h.Plans[0] 4524 4525 require := require.New(t) 4526 // Ensure the plan evicted 4 of the 5 4527 require.Equal(4, len(plan.NodeUpdate[node.ID])) 4528 4529 // Ensure that the scheduler did not overwrite the original score metrics for the i 4530 for _, inPlaceAllocs := range plan.NodeAllocation { 4531 for _, alloc := range inPlaceAllocs { 4532 require.Equal(scoreMetric, alloc.Metrics) 4533 } 4534 } 4535 h.AssertEvalStatus(t, structs.EvalStatusComplete) 4536 } 4537 4538 func TestGenericSched_AllocFit(t *testing.T) { 4539 testCases := []struct { 4540 Name string 4541 NodeCpu int64 4542 TaskResources structs.Resources 4543 MainTaskCount int 4544 InitTaskCount int 4545 SideTaskCount int 4546 ShouldPlaceAlloc bool 4547 }{ 4548 { 4549 Name: "simple init + sidecar", 4550 NodeCpu: 1200, 4551 TaskResources: structs.Resources{ 4552 CPU: 500, 4553 MemoryMB: 256, 4554 }, 4555 MainTaskCount: 1, 4556 InitTaskCount: 1, 4557 SideTaskCount: 1, 4558 ShouldPlaceAlloc: true, 4559 }, 4560 { 4561 Name: "too big init + sidecar", 4562 NodeCpu: 1200, 4563 TaskResources: structs.Resources{ 4564 CPU: 700, 4565 MemoryMB: 256, 4566 }, 4567 MainTaskCount: 1, 4568 InitTaskCount: 1, 4569 SideTaskCount: 1, 4570 ShouldPlaceAlloc: false, 4571 }, 4572 { 4573 Name: "many init + sidecar", 4574 NodeCpu: 1200, 4575 TaskResources: structs.Resources{ 4576 CPU: 100, 4577 MemoryMB: 100, 4578 }, 4579 MainTaskCount: 3, 4580 InitTaskCount: 5, 4581 SideTaskCount: 5, 4582 ShouldPlaceAlloc: true, 4583 }, 4584 { 4585 Name: "too many init + sidecar", 4586 NodeCpu: 1200, 4587 TaskResources: structs.Resources{ 4588 CPU: 100, 4589 MemoryMB: 100, 4590 }, 4591 MainTaskCount: 10, 4592 InitTaskCount: 10, 4593 SideTaskCount: 10, 4594 ShouldPlaceAlloc: false, 4595 }, 4596 { 4597 Name: "too many too big", 4598 NodeCpu: 1200, 4599 TaskResources: structs.Resources{ 4600 CPU: 1000, 4601 MemoryMB: 100, 4602 }, 4603 MainTaskCount: 10, 4604 InitTaskCount: 10, 4605 SideTaskCount: 10, 4606 ShouldPlaceAlloc: false, 4607 }, 4608 } 4609 for _, testCase := range testCases { 4610 t.Run(testCase.Name, func(t *testing.T) { 4611 h := NewHarness(t) 4612 node := mock.Node() 4613 node.NodeResources.Cpu.CpuShares = testCase.NodeCpu 4614 require.NoError(t, h.State.UpsertNode(h.NextIndex(), node)) 4615 4616 // Create a job with sidecar & init tasks 4617 job := mock.VariableLifecycleJob(testCase.TaskResources, testCase.MainTaskCount, testCase.InitTaskCount, testCase.SideTaskCount) 4618 4619 require.NoError(t, h.State.UpsertJob(h.NextIndex(), job)) 4620 4621 // Create a mock evaluation to register the job 4622 eval := &structs.Evaluation{ 4623 Namespace: structs.DefaultNamespace, 4624 ID: uuid.Generate(), 4625 Priority: job.Priority, 4626 TriggeredBy: structs.EvalTriggerJobRegister, 4627 JobID: job.ID, 4628 Status: structs.EvalStatusPending, 4629 } 4630 require.NoError(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 4631 4632 // Process the evaluation 4633 err := h.Process(NewServiceScheduler, eval) 4634 require.NoError(t, err) 4635 4636 allocs := 0 4637 if testCase.ShouldPlaceAlloc { 4638 allocs = 1 4639 } 4640 // Ensure no plan as it should be a no-op 4641 require.Len(t, h.Plans, allocs) 4642 4643 // Lookup the allocations by JobID 4644 ws := memdb.NewWatchSet() 4645 out, err := h.State.AllocsByJob(ws, job.Namespace, job.ID, false) 4646 require.NoError(t, err) 4647 4648 // Ensure no allocations placed 4649 require.Len(t, out, allocs) 4650 4651 h.AssertEvalStatus(t, structs.EvalStatusComplete) 4652 }) 4653 } 4654 } 4655 4656 func TestGenericSched_ChainedAlloc(t *testing.T) { 4657 h := NewHarness(t) 4658 4659 // Create some nodes 4660 for i := 0; i < 10; i++ { 4661 node := mock.Node() 4662 require.NoError(t, h.State.UpsertNode(h.NextIndex(), node)) 4663 } 4664 4665 // Create a job 4666 job := mock.Job() 4667 require.NoError(t, h.State.UpsertJob(h.NextIndex(), job)) 4668 4669 // Create a mock evaluation to register the job 4670 eval := &structs.Evaluation{ 4671 Namespace: structs.DefaultNamespace, 4672 ID: uuid.Generate(), 4673 Priority: job.Priority, 4674 TriggeredBy: structs.EvalTriggerJobRegister, 4675 JobID: job.ID, 4676 Status: structs.EvalStatusPending, 4677 } 4678 require.NoError(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 4679 // Process the evaluation 4680 if err := h.Process(NewServiceScheduler, eval); err != nil { 4681 t.Fatalf("err: %v", err) 4682 } 4683 4684 var allocIDs []string 4685 for _, allocList := range h.Plans[0].NodeAllocation { 4686 for _, alloc := range allocList { 4687 allocIDs = append(allocIDs, alloc.ID) 4688 } 4689 } 4690 sort.Strings(allocIDs) 4691 4692 // Create a new harness to invoke the scheduler again 4693 h1 := NewHarnessWithState(t, h.State) 4694 job1 := mock.Job() 4695 job1.ID = job.ID 4696 job1.TaskGroups[0].Tasks[0].Env["foo"] = "bar" 4697 job1.TaskGroups[0].Count = 12 4698 require.NoError(t, h1.State.UpsertJob(h1.NextIndex(), job1)) 4699 4700 // Create a mock evaluation to update the job 4701 eval1 := &structs.Evaluation{ 4702 Namespace: structs.DefaultNamespace, 4703 ID: uuid.Generate(), 4704 Priority: job1.Priority, 4705 TriggeredBy: structs.EvalTriggerJobRegister, 4706 JobID: job1.ID, 4707 Status: structs.EvalStatusPending, 4708 } 4709 require.NoError(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval1})) 4710 4711 // Process the evaluation 4712 if err := h1.Process(NewServiceScheduler, eval1); err != nil { 4713 t.Fatalf("err: %v", err) 4714 } 4715 4716 plan := h1.Plans[0] 4717 4718 // Collect all the chained allocation ids and the new allocations which 4719 // don't have any chained allocations 4720 var prevAllocs []string 4721 var newAllocs []string 4722 for _, allocList := range plan.NodeAllocation { 4723 for _, alloc := range allocList { 4724 if alloc.PreviousAllocation == "" { 4725 newAllocs = append(newAllocs, alloc.ID) 4726 continue 4727 } 4728 prevAllocs = append(prevAllocs, alloc.PreviousAllocation) 4729 } 4730 } 4731 sort.Strings(prevAllocs) 4732 4733 // Ensure that the new allocations has their corresponding original 4734 // allocation ids 4735 if !reflect.DeepEqual(prevAllocs, allocIDs) { 4736 t.Fatalf("expected: %v, actual: %v", len(allocIDs), len(prevAllocs)) 4737 } 4738 4739 // Ensuring two new allocations don't have any chained allocations 4740 if len(newAllocs) != 2 { 4741 t.Fatalf("expected: %v, actual: %v", 2, len(newAllocs)) 4742 } 4743 } 4744 4745 func TestServiceSched_NodeDrain_Sticky(t *testing.T) { 4746 h := NewHarness(t) 4747 4748 // Register a draining node 4749 node := mock.Node() 4750 node.Drain = true 4751 require.NoError(t, h.State.UpsertNode(h.NextIndex(), node)) 4752 4753 // Create an alloc on the draining node 4754 alloc := mock.Alloc() 4755 alloc.Name = "my-job.web[0]" 4756 alloc.NodeID = node.ID 4757 alloc.Job.TaskGroups[0].Count = 1 4758 alloc.Job.TaskGroups[0].EphemeralDisk.Sticky = true 4759 alloc.DesiredTransition.Migrate = helper.BoolToPtr(true) 4760 require.NoError(t, h.State.UpsertJob(h.NextIndex(), alloc.Job)) 4761 require.NoError(t, h.State.UpsertAllocs(h.NextIndex(), []*structs.Allocation{alloc})) 4762 4763 // Create a mock evaluation to deal with drain 4764 eval := &structs.Evaluation{ 4765 Namespace: structs.DefaultNamespace, 4766 ID: uuid.Generate(), 4767 Priority: 50, 4768 TriggeredBy: structs.EvalTriggerNodeUpdate, 4769 JobID: alloc.Job.ID, 4770 NodeID: node.ID, 4771 Status: structs.EvalStatusPending, 4772 } 4773 4774 require.NoError(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 4775 4776 // Process the evaluation 4777 err := h.Process(NewServiceScheduler, eval) 4778 if err != nil { 4779 t.Fatalf("err: %v", err) 4780 } 4781 4782 // Ensure a single plan 4783 if len(h.Plans) != 1 { 4784 t.Fatalf("bad: %#v", h.Plans) 4785 } 4786 plan := h.Plans[0] 4787 4788 // Ensure the plan evicted all allocs 4789 if len(plan.NodeUpdate[node.ID]) != 1 { 4790 t.Fatalf("bad: %#v", plan) 4791 } 4792 4793 // Ensure the plan didn't create any new allocations 4794 var planned []*structs.Allocation 4795 for _, allocList := range plan.NodeAllocation { 4796 planned = append(planned, allocList...) 4797 } 4798 if len(planned) != 0 { 4799 t.Fatalf("bad: %#v", plan) 4800 } 4801 4802 h.AssertEvalStatus(t, structs.EvalStatusComplete) 4803 } 4804 4805 // This test ensures that when a job is stopped, the scheduler properly cancels 4806 // an outstanding deployment. 4807 func TestServiceSched_CancelDeployment_Stopped(t *testing.T) { 4808 h := NewHarness(t) 4809 4810 // Generate a fake job 4811 job := mock.Job() 4812 job.JobModifyIndex = job.CreateIndex + 1 4813 job.ModifyIndex = job.CreateIndex + 1 4814 job.Stop = true 4815 require.NoError(t, h.State.UpsertJob(h.NextIndex(), job)) 4816 4817 // Create a deployment 4818 d := mock.Deployment() 4819 d.JobID = job.ID 4820 d.JobCreateIndex = job.CreateIndex 4821 d.JobModifyIndex = job.JobModifyIndex - 1 4822 require.NoError(t, h.State.UpsertDeployment(h.NextIndex(), d)) 4823 4824 // Create a mock evaluation to deregister the job 4825 eval := &structs.Evaluation{ 4826 Namespace: structs.DefaultNamespace, 4827 ID: uuid.Generate(), 4828 Priority: 50, 4829 TriggeredBy: structs.EvalTriggerJobDeregister, 4830 JobID: job.ID, 4831 Status: structs.EvalStatusPending, 4832 } 4833 4834 require.NoError(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 4835 4836 // Process the evaluation 4837 err := h.Process(NewServiceScheduler, eval) 4838 if err != nil { 4839 t.Fatalf("err: %v", err) 4840 } 4841 4842 // Ensure a single plan 4843 if len(h.Plans) != 1 { 4844 t.Fatalf("bad: %#v", h.Plans) 4845 } 4846 plan := h.Plans[0] 4847 4848 // Ensure the plan cancelled the existing deployment 4849 ws := memdb.NewWatchSet() 4850 out, err := h.State.LatestDeploymentByJobID(ws, job.Namespace, job.ID) 4851 require.NoError(t, err) 4852 4853 if out == nil { 4854 t.Fatalf("No deployment for job") 4855 } 4856 if out.ID != d.ID { 4857 t.Fatalf("Latest deployment for job is different than original deployment") 4858 } 4859 if out.Status != structs.DeploymentStatusCancelled { 4860 t.Fatalf("Deployment status is %q, want %q", out.Status, structs.DeploymentStatusCancelled) 4861 } 4862 if out.StatusDescription != structs.DeploymentStatusDescriptionStoppedJob { 4863 t.Fatalf("Deployment status description is %q, want %q", 4864 out.StatusDescription, structs.DeploymentStatusDescriptionStoppedJob) 4865 } 4866 4867 // Ensure the plan didn't allocate anything 4868 var planned []*structs.Allocation 4869 for _, allocList := range plan.NodeAllocation { 4870 planned = append(planned, allocList...) 4871 } 4872 if len(planned) != 0 { 4873 t.Fatalf("bad: %#v", plan) 4874 } 4875 4876 h.AssertEvalStatus(t, structs.EvalStatusComplete) 4877 } 4878 4879 // This test ensures that when a job is updated and had an old deployment, the scheduler properly cancels 4880 // the deployment. 4881 func TestServiceSched_CancelDeployment_NewerJob(t *testing.T) { 4882 h := NewHarness(t) 4883 4884 // Generate a fake job 4885 job := mock.Job() 4886 require.NoError(t, h.State.UpsertJob(h.NextIndex(), job)) 4887 4888 // Create a deployment for an old version of the job 4889 d := mock.Deployment() 4890 d.JobID = job.ID 4891 require.NoError(t, h.State.UpsertDeployment(h.NextIndex(), d)) 4892 4893 // Upsert again to bump job version 4894 require.NoError(t, h.State.UpsertJob(h.NextIndex(), job)) 4895 4896 // Create a mock evaluation to kick the job 4897 eval := &structs.Evaluation{ 4898 Namespace: structs.DefaultNamespace, 4899 ID: uuid.Generate(), 4900 Priority: 50, 4901 TriggeredBy: structs.EvalTriggerJobRegister, 4902 JobID: job.ID, 4903 Status: structs.EvalStatusPending, 4904 } 4905 4906 require.NoError(t, h.State.UpsertEvals(h.NextIndex(), []*structs.Evaluation{eval})) 4907 4908 // Process the evaluation 4909 err := h.Process(NewServiceScheduler, eval) 4910 if err != nil { 4911 t.Fatalf("err: %v", err) 4912 } 4913 4914 // Ensure a single plan 4915 if len(h.Plans) != 1 { 4916 t.Fatalf("bad: %#v", h.Plans) 4917 } 4918 plan := h.Plans[0] 4919 4920 // Ensure the plan cancelled the existing deployment 4921 ws := memdb.NewWatchSet() 4922 out, err := h.State.LatestDeploymentByJobID(ws, job.Namespace, job.ID) 4923 require.NoError(t, err) 4924 4925 if out == nil { 4926 t.Fatalf("No deployment for job") 4927 } 4928 if out.ID != d.ID { 4929 t.Fatalf("Latest deployment for job is different than original deployment") 4930 } 4931 if out.Status != structs.DeploymentStatusCancelled { 4932 t.Fatalf("Deployment status is %q, want %q", out.Status, structs.DeploymentStatusCancelled) 4933 } 4934 if out.StatusDescription != structs.DeploymentStatusDescriptionNewerJob { 4935 t.Fatalf("Deployment status description is %q, want %q", 4936 out.StatusDescription, structs.DeploymentStatusDescriptionNewerJob) 4937 } 4938 // Ensure the plan didn't allocate anything 4939 var planned []*structs.Allocation 4940 for _, allocList := range plan.NodeAllocation { 4941 planned = append(planned, allocList...) 4942 } 4943 if len(planned) != 0 { 4944 t.Fatalf("bad: %#v", plan) 4945 } 4946 4947 h.AssertEvalStatus(t, structs.EvalStatusComplete) 4948 } 4949 4950 // Various table driven tests for carry forward 4951 // of past reschedule events 4952 func Test_updateRescheduleTracker(t *testing.T) { 4953 4954 t1 := time.Now().UTC() 4955 alloc := mock.Alloc() 4956 prevAlloc := mock.Alloc() 4957 4958 type testCase struct { 4959 desc string 4960 prevAllocEvents []*structs.RescheduleEvent 4961 reschedPolicy *structs.ReschedulePolicy 4962 expectedRescheduleEvents []*structs.RescheduleEvent 4963 reschedTime time.Time 4964 } 4965 4966 testCases := []testCase{ 4967 { 4968 desc: "No past events", 4969 prevAllocEvents: nil, 4970 reschedPolicy: &structs.ReschedulePolicy{Unlimited: false, Interval: 24 * time.Hour, Attempts: 2, Delay: 5 * time.Second}, 4971 reschedTime: t1, 4972 expectedRescheduleEvents: []*structs.RescheduleEvent{ 4973 { 4974 RescheduleTime: t1.UnixNano(), 4975 PrevAllocID: prevAlloc.ID, 4976 PrevNodeID: prevAlloc.NodeID, 4977 Delay: 5 * time.Second, 4978 }, 4979 }, 4980 }, 4981 { 4982 desc: "one past event, linear delay", 4983 prevAllocEvents: []*structs.RescheduleEvent{ 4984 {RescheduleTime: t1.Add(-1 * time.Hour).UnixNano(), 4985 PrevAllocID: prevAlloc.ID, 4986 PrevNodeID: prevAlloc.NodeID, 4987 Delay: 5 * time.Second}}, 4988 reschedPolicy: &structs.ReschedulePolicy{Unlimited: false, Interval: 24 * time.Hour, Attempts: 2, Delay: 5 * time.Second}, 4989 reschedTime: t1, 4990 expectedRescheduleEvents: []*structs.RescheduleEvent{ 4991 { 4992 RescheduleTime: t1.Add(-1 * time.Hour).UnixNano(), 4993 PrevAllocID: prevAlloc.ID, 4994 PrevNodeID: prevAlloc.NodeID, 4995 Delay: 5 * time.Second, 4996 }, 4997 { 4998 RescheduleTime: t1.UnixNano(), 4999 PrevAllocID: prevAlloc.ID, 5000 PrevNodeID: prevAlloc.NodeID, 5001 Delay: 5 * time.Second, 5002 }, 5003 }, 5004 }, 5005 { 5006 desc: "one past event, fibonacci delay", 5007 prevAllocEvents: []*structs.RescheduleEvent{ 5008 {RescheduleTime: t1.Add(-1 * time.Hour).UnixNano(), 5009 PrevAllocID: prevAlloc.ID, 5010 PrevNodeID: prevAlloc.NodeID, 5011 Delay: 5 * time.Second}}, 5012 reschedPolicy: &structs.ReschedulePolicy{Unlimited: false, Interval: 24 * time.Hour, Attempts: 2, Delay: 5 * time.Second, DelayFunction: "fibonacci", MaxDelay: 60 * time.Second}, 5013 reschedTime: t1, 5014 expectedRescheduleEvents: []*structs.RescheduleEvent{ 5015 { 5016 RescheduleTime: t1.Add(-1 * time.Hour).UnixNano(), 5017 PrevAllocID: prevAlloc.ID, 5018 PrevNodeID: prevAlloc.NodeID, 5019 Delay: 5 * time.Second, 5020 }, 5021 { 5022 RescheduleTime: t1.UnixNano(), 5023 PrevAllocID: prevAlloc.ID, 5024 PrevNodeID: prevAlloc.NodeID, 5025 Delay: 5 * time.Second, 5026 }, 5027 }, 5028 }, 5029 { 5030 desc: "eight past events, fibonacci delay, unlimited", 5031 prevAllocEvents: []*structs.RescheduleEvent{ 5032 { 5033 RescheduleTime: t1.Add(-1 * time.Hour).UnixNano(), 5034 PrevAllocID: prevAlloc.ID, 5035 PrevNodeID: prevAlloc.NodeID, 5036 Delay: 5 * time.Second, 5037 }, 5038 { 5039 RescheduleTime: t1.Add(-1 * time.Hour).UnixNano(), 5040 PrevAllocID: prevAlloc.ID, 5041 PrevNodeID: prevAlloc.NodeID, 5042 Delay: 5 * time.Second, 5043 }, 5044 { 5045 RescheduleTime: t1.Add(-1 * time.Hour).UnixNano(), 5046 PrevAllocID: prevAlloc.ID, 5047 PrevNodeID: prevAlloc.NodeID, 5048 Delay: 10 * time.Second, 5049 }, 5050 { 5051 RescheduleTime: t1.Add(-1 * time.Hour).UnixNano(), 5052 PrevAllocID: prevAlloc.ID, 5053 PrevNodeID: prevAlloc.NodeID, 5054 Delay: 15 * time.Second, 5055 }, 5056 { 5057 RescheduleTime: t1.Add(-1 * time.Hour).UnixNano(), 5058 PrevAllocID: prevAlloc.ID, 5059 PrevNodeID: prevAlloc.NodeID, 5060 Delay: 25 * time.Second, 5061 }, 5062 { 5063 RescheduleTime: t1.Add(-1 * time.Hour).UnixNano(), 5064 PrevAllocID: prevAlloc.ID, 5065 PrevNodeID: prevAlloc.NodeID, 5066 Delay: 40 * time.Second, 5067 }, 5068 { 5069 RescheduleTime: t1.Add(-1 * time.Hour).UnixNano(), 5070 PrevAllocID: prevAlloc.ID, 5071 PrevNodeID: prevAlloc.NodeID, 5072 Delay: 65 * time.Second, 5073 }, 5074 { 5075 RescheduleTime: t1.Add(-1 * time.Hour).UnixNano(), 5076 PrevAllocID: prevAlloc.ID, 5077 PrevNodeID: prevAlloc.NodeID, 5078 Delay: 105 * time.Second, 5079 }, 5080 }, 5081 reschedPolicy: &structs.ReschedulePolicy{Unlimited: true, Delay: 5 * time.Second, DelayFunction: "fibonacci", MaxDelay: 240 * time.Second}, 5082 reschedTime: t1, 5083 expectedRescheduleEvents: []*structs.RescheduleEvent{ 5084 { 5085 RescheduleTime: t1.Add(-1 * time.Hour).UnixNano(), 5086 PrevAllocID: prevAlloc.ID, 5087 PrevNodeID: prevAlloc.NodeID, 5088 Delay: 15 * time.Second, 5089 }, 5090 { 5091 RescheduleTime: t1.Add(-1 * time.Hour).UnixNano(), 5092 PrevAllocID: prevAlloc.ID, 5093 PrevNodeID: prevAlloc.NodeID, 5094 Delay: 25 * time.Second, 5095 }, 5096 { 5097 RescheduleTime: t1.Add(-1 * time.Hour).UnixNano(), 5098 PrevAllocID: prevAlloc.ID, 5099 PrevNodeID: prevAlloc.NodeID, 5100 Delay: 40 * time.Second, 5101 }, 5102 { 5103 RescheduleTime: t1.Add(-1 * time.Hour).UnixNano(), 5104 PrevAllocID: prevAlloc.ID, 5105 PrevNodeID: prevAlloc.NodeID, 5106 Delay: 65 * time.Second, 5107 }, 5108 { 5109 RescheduleTime: t1.Add(-1 * time.Hour).UnixNano(), 5110 PrevAllocID: prevAlloc.ID, 5111 PrevNodeID: prevAlloc.NodeID, 5112 Delay: 105 * time.Second, 5113 }, 5114 { 5115 RescheduleTime: t1.UnixNano(), 5116 PrevAllocID: prevAlloc.ID, 5117 PrevNodeID: prevAlloc.NodeID, 5118 Delay: 170 * time.Second, 5119 }, 5120 }, 5121 }, 5122 { 5123 desc: " old attempts past interval, exponential delay, limited", 5124 prevAllocEvents: []*structs.RescheduleEvent{ 5125 { 5126 RescheduleTime: t1.Add(-2 * time.Hour).UnixNano(), 5127 PrevAllocID: prevAlloc.ID, 5128 PrevNodeID: prevAlloc.NodeID, 5129 Delay: 5 * time.Second, 5130 }, 5131 { 5132 RescheduleTime: t1.Add(-70 * time.Minute).UnixNano(), 5133 PrevAllocID: prevAlloc.ID, 5134 PrevNodeID: prevAlloc.NodeID, 5135 Delay: 10 * time.Second, 5136 }, 5137 { 5138 RescheduleTime: t1.Add(-30 * time.Minute).UnixNano(), 5139 PrevAllocID: prevAlloc.ID, 5140 PrevNodeID: prevAlloc.NodeID, 5141 Delay: 20 * time.Second, 5142 }, 5143 { 5144 RescheduleTime: t1.Add(-10 * time.Minute).UnixNano(), 5145 PrevAllocID: prevAlloc.ID, 5146 PrevNodeID: prevAlloc.NodeID, 5147 Delay: 40 * time.Second, 5148 }, 5149 }, 5150 reschedPolicy: &structs.ReschedulePolicy{Unlimited: false, Interval: 1 * time.Hour, Attempts: 5, Delay: 5 * time.Second, DelayFunction: "exponential", MaxDelay: 240 * time.Second}, 5151 reschedTime: t1, 5152 expectedRescheduleEvents: []*structs.RescheduleEvent{ 5153 { 5154 RescheduleTime: t1.Add(-30 * time.Minute).UnixNano(), 5155 PrevAllocID: prevAlloc.ID, 5156 PrevNodeID: prevAlloc.NodeID, 5157 Delay: 20 * time.Second, 5158 }, 5159 { 5160 RescheduleTime: t1.Add(-10 * time.Minute).UnixNano(), 5161 PrevAllocID: prevAlloc.ID, 5162 PrevNodeID: prevAlloc.NodeID, 5163 Delay: 40 * time.Second, 5164 }, 5165 { 5166 RescheduleTime: t1.UnixNano(), 5167 PrevAllocID: prevAlloc.ID, 5168 PrevNodeID: prevAlloc.NodeID, 5169 Delay: 80 * time.Second, 5170 }, 5171 }, 5172 }, 5173 } 5174 5175 for _, tc := range testCases { 5176 t.Run(tc.desc, func(t *testing.T) { 5177 require := require.New(t) 5178 prevAlloc.RescheduleTracker = &structs.RescheduleTracker{Events: tc.prevAllocEvents} 5179 prevAlloc.Job.LookupTaskGroup(prevAlloc.TaskGroup).ReschedulePolicy = tc.reschedPolicy 5180 updateRescheduleTracker(alloc, prevAlloc, tc.reschedTime) 5181 require.Equal(tc.expectedRescheduleEvents, alloc.RescheduleTracker.Events) 5182 }) 5183 } 5184 5185 }