github.com/banmanh482/nomad@v0.11.8/scheduler/preemption_test.go (about) 1 package scheduler 2 3 import ( 4 "fmt" 5 "testing" 6 7 "strconv" 8 9 "github.com/hashicorp/nomad/helper/uuid" 10 "github.com/hashicorp/nomad/nomad/mock" 11 "github.com/hashicorp/nomad/nomad/structs" 12 psstructs "github.com/hashicorp/nomad/plugins/shared/structs" 13 "github.com/stretchr/testify/require" 14 ) 15 16 func TestResourceDistance(t *testing.T) { 17 resourceAsk := &structs.ComparableResources{ 18 Flattened: structs.AllocatedTaskResources{ 19 Cpu: structs.AllocatedCpuResources{ 20 CpuShares: 2048, 21 }, 22 Memory: structs.AllocatedMemoryResources{ 23 MemoryMB: 512, 24 }, 25 Networks: []*structs.NetworkResource{ 26 { 27 Device: "eth0", 28 MBits: 1024, 29 }, 30 }, 31 }, 32 Shared: structs.AllocatedSharedResources{ 33 DiskMB: 4096, 34 }, 35 } 36 37 type testCase struct { 38 allocResource *structs.ComparableResources 39 expectedDistance string 40 } 41 42 testCases := []*testCase{ 43 { 44 &structs.ComparableResources{ 45 Flattened: structs.AllocatedTaskResources{ 46 Cpu: structs.AllocatedCpuResources{ 47 CpuShares: 2048, 48 }, 49 Memory: structs.AllocatedMemoryResources{ 50 MemoryMB: 512, 51 }, 52 Networks: []*structs.NetworkResource{ 53 { 54 Device: "eth0", 55 MBits: 1024, 56 }, 57 }, 58 }, 59 Shared: structs.AllocatedSharedResources{ 60 DiskMB: 4096, 61 }, 62 }, 63 "0.000", 64 }, 65 { 66 &structs.ComparableResources{ 67 Flattened: structs.AllocatedTaskResources{ 68 Cpu: structs.AllocatedCpuResources{ 69 CpuShares: 1024, 70 }, 71 Memory: structs.AllocatedMemoryResources{ 72 MemoryMB: 400, 73 }, 74 Networks: []*structs.NetworkResource{ 75 { 76 Device: "eth0", 77 MBits: 1024, 78 }, 79 }, 80 }, 81 Shared: structs.AllocatedSharedResources{ 82 DiskMB: 1024, 83 }, 84 }, 85 "0.928", 86 }, 87 { 88 &structs.ComparableResources{ 89 Flattened: structs.AllocatedTaskResources{ 90 Cpu: structs.AllocatedCpuResources{ 91 CpuShares: 8192, 92 }, 93 Memory: structs.AllocatedMemoryResources{ 94 MemoryMB: 200, 95 }, 96 Networks: []*structs.NetworkResource{ 97 { 98 Device: "eth0", 99 MBits: 512, 100 }, 101 }, 102 }, 103 Shared: structs.AllocatedSharedResources{ 104 DiskMB: 1024, 105 }, 106 }, 107 "3.152", 108 }, 109 { 110 &structs.ComparableResources{ 111 Flattened: structs.AllocatedTaskResources{ 112 Cpu: structs.AllocatedCpuResources{ 113 CpuShares: 2048, 114 }, 115 Memory: structs.AllocatedMemoryResources{ 116 MemoryMB: 500, 117 }, 118 Networks: []*structs.NetworkResource{ 119 { 120 Device: "eth0", 121 MBits: 1024, 122 }, 123 }, 124 }, 125 Shared: structs.AllocatedSharedResources{ 126 DiskMB: 4096, 127 }, 128 }, 129 "0.023", 130 }, 131 } 132 133 for _, tc := range testCases { 134 t.Run("", func(t *testing.T) { 135 require := require.New(t) 136 actualDistance := fmt.Sprintf("%3.3f", basicResourceDistance(resourceAsk, tc.allocResource)) 137 require.Equal(tc.expectedDistance, actualDistance) 138 }) 139 140 } 141 142 } 143 144 func TestPreemption(t *testing.T) { 145 type testCase struct { 146 desc string 147 currentAllocations []*structs.Allocation 148 nodeReservedCapacity *structs.NodeReservedResources 149 nodeCapacity *structs.NodeResources 150 resourceAsk *structs.Resources 151 jobPriority int 152 currentPreemptions []*structs.Allocation 153 preemptedAllocIDs map[string]struct{} 154 } 155 156 highPrioJob := mock.Job() 157 highPrioJob.Priority = 100 158 159 lowPrioJob := mock.Job() 160 lowPrioJob.Priority = 30 161 162 lowPrioJob2 := mock.Job() 163 lowPrioJob2.Priority = 40 164 165 // Create some persistent alloc ids to use in test cases 166 allocIDs := []string{uuid.Generate(), uuid.Generate(), uuid.Generate(), uuid.Generate(), uuid.Generate(), uuid.Generate()} 167 168 var deviceIDs []string 169 for i := 0; i < 10; i++ { 170 deviceIDs = append(deviceIDs, "dev"+strconv.Itoa(i)) 171 } 172 173 defaultNodeResources := &structs.NodeResources{ 174 Cpu: structs.NodeCpuResources{ 175 CpuShares: 4000, 176 }, 177 Memory: structs.NodeMemoryResources{ 178 MemoryMB: 8192, 179 }, 180 Disk: structs.NodeDiskResources{ 181 DiskMB: 100 * 1024, 182 }, 183 Networks: []*structs.NetworkResource{ 184 { 185 Device: "eth0", 186 CIDR: "192.168.0.100/32", 187 MBits: 1000, 188 }, 189 }, 190 Devices: []*structs.NodeDeviceResource{ 191 { 192 Type: "gpu", 193 Vendor: "nvidia", 194 Name: "1080ti", 195 Attributes: map[string]*psstructs.Attribute{ 196 "memory": psstructs.NewIntAttribute(11, psstructs.UnitGiB), 197 "cuda_cores": psstructs.NewIntAttribute(3584, ""), 198 "graphics_clock": psstructs.NewIntAttribute(1480, psstructs.UnitMHz), 199 "memory_bandwidth": psstructs.NewIntAttribute(11, psstructs.UnitGBPerS), 200 }, 201 Instances: []*structs.NodeDevice{ 202 { 203 ID: deviceIDs[0], 204 Healthy: true, 205 }, 206 { 207 ID: deviceIDs[1], 208 Healthy: true, 209 }, 210 { 211 ID: deviceIDs[2], 212 Healthy: true, 213 }, 214 { 215 ID: deviceIDs[3], 216 Healthy: true, 217 }, 218 }, 219 }, 220 { 221 Type: "gpu", 222 Vendor: "nvidia", 223 Name: "2080ti", 224 Attributes: map[string]*psstructs.Attribute{ 225 "memory": psstructs.NewIntAttribute(11, psstructs.UnitGiB), 226 "cuda_cores": psstructs.NewIntAttribute(3584, ""), 227 "graphics_clock": psstructs.NewIntAttribute(1480, psstructs.UnitMHz), 228 "memory_bandwidth": psstructs.NewIntAttribute(11, psstructs.UnitGBPerS), 229 }, 230 Instances: []*structs.NodeDevice{ 231 { 232 ID: deviceIDs[4], 233 Healthy: true, 234 }, 235 { 236 ID: deviceIDs[5], 237 Healthy: true, 238 }, 239 { 240 ID: deviceIDs[6], 241 Healthy: true, 242 }, 243 { 244 ID: deviceIDs[7], 245 Healthy: true, 246 }, 247 { 248 ID: deviceIDs[8], 249 Healthy: true, 250 }, 251 }, 252 }, 253 { 254 Type: "fpga", 255 Vendor: "intel", 256 Name: "F100", 257 Attributes: map[string]*psstructs.Attribute{ 258 "memory": psstructs.NewIntAttribute(4, psstructs.UnitGiB), 259 }, 260 Instances: []*structs.NodeDevice{ 261 { 262 ID: "fpga1", 263 Healthy: true, 264 }, 265 { 266 ID: "fpga2", 267 Healthy: false, 268 }, 269 }, 270 }, 271 }, 272 } 273 274 reservedNodeResources := &structs.NodeReservedResources{ 275 Cpu: structs.NodeReservedCpuResources{ 276 CpuShares: 100, 277 }, 278 Memory: structs.NodeReservedMemoryResources{ 279 MemoryMB: 256, 280 }, 281 Disk: structs.NodeReservedDiskResources{ 282 DiskMB: 4 * 1024, 283 }, 284 } 285 286 testCases := []testCase{ 287 { 288 desc: "No preemption because existing allocs are not low priority", 289 currentAllocations: []*structs.Allocation{ 290 createAlloc(allocIDs[0], highPrioJob, &structs.Resources{ 291 CPU: 3200, 292 MemoryMB: 7256, 293 DiskMB: 4 * 1024, 294 Networks: []*structs.NetworkResource{ 295 { 296 Device: "eth0", 297 IP: "192.168.0.100", 298 MBits: 50, 299 }, 300 }, 301 })}, 302 nodeReservedCapacity: reservedNodeResources, 303 nodeCapacity: defaultNodeResources, 304 jobPriority: 100, 305 resourceAsk: &structs.Resources{ 306 CPU: 2000, 307 MemoryMB: 256, 308 DiskMB: 4 * 1024, 309 Networks: []*structs.NetworkResource{ 310 { 311 Device: "eth0", 312 IP: "192.168.0.100", 313 ReservedPorts: []structs.Port{{Label: "ssh", Value: 22}}, 314 MBits: 1, 315 }, 316 }, 317 }, 318 }, 319 { 320 desc: "Preempting low priority allocs not enough to meet resource ask", 321 currentAllocations: []*structs.Allocation{ 322 createAlloc(allocIDs[0], lowPrioJob, &structs.Resources{ 323 CPU: 3200, 324 MemoryMB: 7256, 325 DiskMB: 4 * 1024, 326 Networks: []*structs.NetworkResource{ 327 { 328 Device: "eth0", 329 IP: "192.168.0.100", 330 MBits: 50, 331 }, 332 }, 333 })}, 334 nodeReservedCapacity: reservedNodeResources, 335 nodeCapacity: defaultNodeResources, 336 jobPriority: 100, 337 resourceAsk: &structs.Resources{ 338 CPU: 4000, 339 MemoryMB: 8192, 340 DiskMB: 4 * 1024, 341 Networks: []*structs.NetworkResource{ 342 { 343 Device: "eth0", 344 IP: "192.168.0.100", 345 ReservedPorts: []structs.Port{{Label: "ssh", Value: 22}}, 346 MBits: 1, 347 }, 348 }, 349 }, 350 }, 351 { 352 desc: "preemption impossible - static port needed is used by higher priority alloc", 353 currentAllocations: []*structs.Allocation{ 354 createAlloc(allocIDs[0], highPrioJob, &structs.Resources{ 355 CPU: 1200, 356 MemoryMB: 2256, 357 DiskMB: 4 * 1024, 358 Networks: []*structs.NetworkResource{ 359 { 360 Device: "eth0", 361 IP: "192.168.0.100", 362 MBits: 150, 363 }, 364 }, 365 }), 366 createAlloc(allocIDs[1], highPrioJob, &structs.Resources{ 367 CPU: 200, 368 MemoryMB: 256, 369 DiskMB: 4 * 1024, 370 Networks: []*structs.NetworkResource{ 371 { 372 Device: "eth0", 373 IP: "192.168.0.200", 374 MBits: 600, 375 ReservedPorts: []structs.Port{ 376 { 377 Label: "db", 378 Value: 88, 379 }, 380 }, 381 }, 382 }, 383 }), 384 }, 385 nodeReservedCapacity: reservedNodeResources, 386 nodeCapacity: defaultNodeResources, 387 jobPriority: 100, 388 resourceAsk: &structs.Resources{ 389 CPU: 600, 390 MemoryMB: 1000, 391 DiskMB: 25 * 1024, 392 Networks: []*structs.NetworkResource{ 393 { 394 Device: "eth0", 395 IP: "192.168.0.100", 396 MBits: 700, 397 ReservedPorts: []structs.Port{ 398 { 399 Label: "db", 400 Value: 88, 401 }, 402 }, 403 }, 404 }, 405 }, 406 }, 407 { 408 desc: "preempt only from device that has allocation with unused reserved port", 409 currentAllocations: []*structs.Allocation{ 410 createAlloc(allocIDs[0], highPrioJob, &structs.Resources{ 411 CPU: 1200, 412 MemoryMB: 2256, 413 DiskMB: 4 * 1024, 414 Networks: []*structs.NetworkResource{ 415 { 416 Device: "eth0", 417 IP: "192.168.0.100", 418 MBits: 150, 419 }, 420 }, 421 }), 422 createAlloc(allocIDs[1], highPrioJob, &structs.Resources{ 423 CPU: 200, 424 MemoryMB: 256, 425 DiskMB: 4 * 1024, 426 Networks: []*structs.NetworkResource{ 427 { 428 Device: "eth1", 429 IP: "192.168.0.200", 430 MBits: 600, 431 ReservedPorts: []structs.Port{ 432 { 433 Label: "db", 434 Value: 88, 435 }, 436 }, 437 }, 438 }, 439 }), 440 createAlloc(allocIDs[2], lowPrioJob, &structs.Resources{ 441 CPU: 200, 442 MemoryMB: 256, 443 DiskMB: 4 * 1024, 444 Networks: []*structs.NetworkResource{ 445 { 446 Device: "eth0", 447 IP: "192.168.0.200", 448 MBits: 600, 449 }, 450 }, 451 }), 452 }, 453 nodeReservedCapacity: reservedNodeResources, 454 // This test sets up a node with two NICs 455 nodeCapacity: &structs.NodeResources{ 456 Cpu: structs.NodeCpuResources{ 457 CpuShares: 4000, 458 }, 459 Memory: structs.NodeMemoryResources{ 460 MemoryMB: 8192, 461 }, 462 Disk: structs.NodeDiskResources{ 463 DiskMB: 100 * 1024, 464 }, 465 Networks: []*structs.NetworkResource{ 466 { 467 Device: "eth0", 468 CIDR: "192.168.0.100/32", 469 MBits: 1000, 470 }, 471 { 472 Device: "eth1", 473 CIDR: "192.168.1.100/32", 474 MBits: 1000, 475 }, 476 }, 477 }, 478 jobPriority: 100, 479 resourceAsk: &structs.Resources{ 480 CPU: 600, 481 MemoryMB: 1000, 482 DiskMB: 25 * 1024, 483 Networks: []*structs.NetworkResource{ 484 { 485 IP: "192.168.0.100", 486 MBits: 700, 487 ReservedPorts: []structs.Port{ 488 { 489 Label: "db", 490 Value: 88, 491 }, 492 }, 493 }, 494 }, 495 }, 496 preemptedAllocIDs: map[string]struct{}{ 497 allocIDs[2]: {}, 498 }, 499 }, 500 { 501 desc: "Combination of high/low priority allocs, without static ports", 502 currentAllocations: []*structs.Allocation{ 503 createAlloc(allocIDs[0], highPrioJob, &structs.Resources{ 504 CPU: 2800, 505 MemoryMB: 2256, 506 DiskMB: 4 * 1024, 507 Networks: []*structs.NetworkResource{ 508 { 509 Device: "eth0", 510 IP: "192.168.0.100", 511 MBits: 150, 512 }, 513 }, 514 }), 515 createAllocWithTaskgroupNetwork(allocIDs[1], lowPrioJob, &structs.Resources{ 516 CPU: 200, 517 MemoryMB: 256, 518 DiskMB: 4 * 1024, 519 Networks: []*structs.NetworkResource{ 520 { 521 Device: "eth0", 522 IP: "192.168.0.200", 523 MBits: 200, 524 }, 525 }, 526 }, &structs.NetworkResource{ 527 Device: "eth0", 528 IP: "192.168.0.201", 529 MBits: 300, 530 }), 531 createAlloc(allocIDs[2], lowPrioJob, &structs.Resources{ 532 CPU: 200, 533 MemoryMB: 256, 534 DiskMB: 4 * 1024, 535 Networks: []*structs.NetworkResource{ 536 { 537 Device: "eth0", 538 IP: "192.168.0.100", 539 MBits: 300, 540 }, 541 }, 542 }), 543 createAlloc(allocIDs[3], lowPrioJob, &structs.Resources{ 544 CPU: 700, 545 MemoryMB: 256, 546 DiskMB: 4 * 1024, 547 }), 548 }, 549 nodeReservedCapacity: reservedNodeResources, 550 nodeCapacity: defaultNodeResources, 551 jobPriority: 100, 552 resourceAsk: &structs.Resources{ 553 CPU: 1100, 554 MemoryMB: 1000, 555 DiskMB: 25 * 1024, 556 Networks: []*structs.NetworkResource{ 557 { 558 Device: "eth0", 559 IP: "192.168.0.100", 560 MBits: 840, 561 }, 562 }, 563 }, 564 preemptedAllocIDs: map[string]struct{}{ 565 allocIDs[1]: {}, 566 allocIDs[2]: {}, 567 allocIDs[3]: {}, 568 }, 569 }, 570 { 571 desc: "preempt allocs with network devices", 572 currentAllocations: []*structs.Allocation{ 573 createAlloc(allocIDs[0], lowPrioJob, &structs.Resources{ 574 CPU: 2800, 575 MemoryMB: 2256, 576 DiskMB: 4 * 1024, 577 }), 578 createAlloc(allocIDs[1], lowPrioJob, &structs.Resources{ 579 CPU: 200, 580 MemoryMB: 256, 581 DiskMB: 4 * 1024, 582 Networks: []*structs.NetworkResource{ 583 { 584 Device: "eth0", 585 IP: "192.168.0.200", 586 MBits: 800, 587 }, 588 }, 589 }), 590 }, 591 nodeReservedCapacity: reservedNodeResources, 592 nodeCapacity: defaultNodeResources, 593 jobPriority: 100, 594 resourceAsk: &structs.Resources{ 595 CPU: 1100, 596 MemoryMB: 1000, 597 DiskMB: 25 * 1024, 598 Networks: []*structs.NetworkResource{ 599 { 600 Device: "eth0", 601 IP: "192.168.0.100", 602 MBits: 840, 603 }, 604 }, 605 }, 606 preemptedAllocIDs: map[string]struct{}{ 607 allocIDs[1]: {}, 608 }, 609 }, 610 { 611 desc: "ignore allocs with close enough priority for network devices", 612 currentAllocations: []*structs.Allocation{ 613 createAlloc(allocIDs[0], lowPrioJob, &structs.Resources{ 614 CPU: 2800, 615 MemoryMB: 2256, 616 DiskMB: 4 * 1024, 617 }), 618 createAlloc(allocIDs[1], lowPrioJob, &structs.Resources{ 619 CPU: 200, 620 MemoryMB: 256, 621 DiskMB: 4 * 1024, 622 Networks: []*structs.NetworkResource{ 623 { 624 Device: "eth0", 625 IP: "192.168.0.200", 626 MBits: 800, 627 }, 628 }, 629 }), 630 }, 631 nodeReservedCapacity: reservedNodeResources, 632 nodeCapacity: defaultNodeResources, 633 jobPriority: lowPrioJob.Priority + 5, 634 resourceAsk: &structs.Resources{ 635 CPU: 1100, 636 MemoryMB: 1000, 637 DiskMB: 25 * 1024, 638 Networks: []*structs.NetworkResource{ 639 { 640 Device: "eth0", 641 IP: "192.168.0.100", 642 MBits: 840, 643 }, 644 }, 645 }, 646 preemptedAllocIDs: nil, 647 }, 648 { 649 desc: "Preemption needed for all resources except network", 650 currentAllocations: []*structs.Allocation{ 651 createAlloc(allocIDs[0], highPrioJob, &structs.Resources{ 652 CPU: 2800, 653 MemoryMB: 2256, 654 DiskMB: 40 * 1024, 655 Networks: []*structs.NetworkResource{ 656 { 657 Device: "eth0", 658 IP: "192.168.0.100", 659 MBits: 150, 660 }, 661 }, 662 }), 663 createAlloc(allocIDs[1], lowPrioJob, &structs.Resources{ 664 CPU: 200, 665 MemoryMB: 256, 666 DiskMB: 4 * 1024, 667 Networks: []*structs.NetworkResource{ 668 { 669 Device: "eth0", 670 IP: "192.168.0.200", 671 MBits: 50, 672 }, 673 }, 674 }), 675 createAlloc(allocIDs[2], lowPrioJob, &structs.Resources{ 676 CPU: 200, 677 MemoryMB: 512, 678 DiskMB: 25 * 1024, 679 }), 680 createAlloc(allocIDs[3], lowPrioJob, &structs.Resources{ 681 CPU: 700, 682 MemoryMB: 276, 683 DiskMB: 20 * 1024, 684 }), 685 }, 686 nodeReservedCapacity: reservedNodeResources, 687 nodeCapacity: defaultNodeResources, 688 jobPriority: 100, 689 resourceAsk: &structs.Resources{ 690 CPU: 1000, 691 MemoryMB: 3000, 692 DiskMB: 50 * 1024, 693 Networks: []*structs.NetworkResource{ 694 { 695 Device: "eth0", 696 IP: "192.168.0.100", 697 MBits: 50, 698 }, 699 }, 700 }, 701 preemptedAllocIDs: map[string]struct{}{ 702 allocIDs[1]: {}, 703 allocIDs[2]: {}, 704 allocIDs[3]: {}, 705 }, 706 }, 707 { 708 desc: "Only one low priority alloc needs to be preempted", 709 currentAllocations: []*structs.Allocation{ 710 createAlloc(allocIDs[0], highPrioJob, &structs.Resources{ 711 CPU: 1200, 712 MemoryMB: 2256, 713 DiskMB: 4 * 1024, 714 Networks: []*structs.NetworkResource{ 715 { 716 Device: "eth0", 717 IP: "192.168.0.100", 718 MBits: 150, 719 }, 720 }, 721 }), 722 createAlloc(allocIDs[1], lowPrioJob, &structs.Resources{ 723 CPU: 200, 724 MemoryMB: 256, 725 DiskMB: 4 * 1024, 726 Networks: []*structs.NetworkResource{ 727 { 728 Device: "eth0", 729 IP: "192.168.0.100", 730 MBits: 500, 731 }, 732 }, 733 }), 734 createAlloc(allocIDs[2], lowPrioJob, &structs.Resources{ 735 CPU: 200, 736 MemoryMB: 256, 737 DiskMB: 4 * 1024, 738 Networks: []*structs.NetworkResource{ 739 { 740 Device: "eth0", 741 IP: "192.168.0.200", 742 MBits: 320, 743 }, 744 }, 745 }), 746 }, 747 nodeReservedCapacity: reservedNodeResources, 748 nodeCapacity: defaultNodeResources, 749 jobPriority: 100, 750 resourceAsk: &structs.Resources{ 751 CPU: 300, 752 MemoryMB: 500, 753 DiskMB: 5 * 1024, 754 Networks: []*structs.NetworkResource{ 755 { 756 Device: "eth0", 757 IP: "192.168.0.100", 758 MBits: 320, 759 }, 760 }, 761 }, 762 preemptedAllocIDs: map[string]struct{}{ 763 allocIDs[2]: {}, 764 }, 765 }, 766 { 767 desc: "one alloc meets static port need, another meets remaining mbits needed", 768 currentAllocations: []*structs.Allocation{ 769 createAlloc(allocIDs[0], highPrioJob, &structs.Resources{ 770 CPU: 1200, 771 MemoryMB: 2256, 772 DiskMB: 4 * 1024, 773 Networks: []*structs.NetworkResource{ 774 { 775 Device: "eth0", 776 IP: "192.168.0.100", 777 MBits: 150, 778 }, 779 }, 780 }), 781 createAlloc(allocIDs[1], lowPrioJob, &structs.Resources{ 782 CPU: 200, 783 MemoryMB: 256, 784 DiskMB: 4 * 1024, 785 Networks: []*structs.NetworkResource{ 786 { 787 Device: "eth0", 788 IP: "192.168.0.200", 789 MBits: 500, 790 ReservedPorts: []structs.Port{ 791 { 792 Label: "db", 793 Value: 88, 794 }, 795 }, 796 }, 797 }, 798 }), 799 createAlloc(allocIDs[2], lowPrioJob, &structs.Resources{ 800 CPU: 200, 801 MemoryMB: 256, 802 DiskMB: 4 * 1024, 803 Networks: []*structs.NetworkResource{ 804 { 805 Device: "eth0", 806 IP: "192.168.0.100", 807 MBits: 200, 808 }, 809 }, 810 }), 811 }, 812 nodeReservedCapacity: reservedNodeResources, 813 nodeCapacity: defaultNodeResources, 814 jobPriority: 100, 815 resourceAsk: &structs.Resources{ 816 CPU: 2700, 817 MemoryMB: 1000, 818 DiskMB: 25 * 1024, 819 Networks: []*structs.NetworkResource{ 820 { 821 Device: "eth0", 822 IP: "192.168.0.100", 823 MBits: 800, 824 ReservedPorts: []structs.Port{ 825 { 826 Label: "db", 827 Value: 88, 828 }, 829 }, 830 }, 831 }, 832 }, 833 preemptedAllocIDs: map[string]struct{}{ 834 allocIDs[1]: {}, 835 allocIDs[2]: {}, 836 }, 837 }, 838 { 839 desc: "alloc that meets static port need also meets other needs", 840 currentAllocations: []*structs.Allocation{ 841 createAlloc(allocIDs[0], highPrioJob, &structs.Resources{ 842 CPU: 1200, 843 MemoryMB: 2256, 844 DiskMB: 4 * 1024, 845 Networks: []*structs.NetworkResource{ 846 { 847 Device: "eth0", 848 IP: "192.168.0.100", 849 MBits: 150, 850 }, 851 }, 852 }), 853 createAlloc(allocIDs[1], lowPrioJob, &structs.Resources{ 854 CPU: 200, 855 MemoryMB: 256, 856 DiskMB: 4 * 1024, 857 Networks: []*structs.NetworkResource{ 858 { 859 Device: "eth0", 860 IP: "192.168.0.200", 861 MBits: 600, 862 ReservedPorts: []structs.Port{ 863 { 864 Label: "db", 865 Value: 88, 866 }, 867 }, 868 }, 869 }, 870 }), 871 createAlloc(allocIDs[2], lowPrioJob, &structs.Resources{ 872 CPU: 200, 873 MemoryMB: 256, 874 DiskMB: 4 * 1024, 875 Networks: []*structs.NetworkResource{ 876 { 877 Device: "eth0", 878 IP: "192.168.0.100", 879 MBits: 100, 880 }, 881 }, 882 }), 883 }, 884 nodeReservedCapacity: reservedNodeResources, 885 nodeCapacity: defaultNodeResources, 886 jobPriority: 100, 887 resourceAsk: &structs.Resources{ 888 CPU: 600, 889 MemoryMB: 1000, 890 DiskMB: 25 * 1024, 891 Networks: []*structs.NetworkResource{ 892 { 893 Device: "eth0", 894 IP: "192.168.0.100", 895 MBits: 700, 896 ReservedPorts: []structs.Port{ 897 { 898 Label: "db", 899 Value: 88, 900 }, 901 }, 902 }, 903 }, 904 }, 905 preemptedAllocIDs: map[string]struct{}{ 906 allocIDs[1]: {}, 907 }, 908 }, 909 { 910 desc: "alloc from job that has existing evictions not chosen for preemption", 911 currentAllocations: []*structs.Allocation{ 912 createAlloc(allocIDs[0], highPrioJob, &structs.Resources{ 913 CPU: 1200, 914 MemoryMB: 2256, 915 DiskMB: 4 * 1024, 916 Networks: []*structs.NetworkResource{ 917 { 918 Device: "eth0", 919 IP: "192.168.0.100", 920 MBits: 150, 921 }, 922 }, 923 }), 924 createAlloc(allocIDs[1], lowPrioJob, &structs.Resources{ 925 CPU: 200, 926 MemoryMB: 256, 927 DiskMB: 4 * 1024, 928 Networks: []*structs.NetworkResource{ 929 { 930 Device: "eth0", 931 IP: "192.168.0.200", 932 MBits: 500, 933 }, 934 }, 935 }), 936 createAlloc(allocIDs[2], lowPrioJob2, &structs.Resources{ 937 CPU: 200, 938 MemoryMB: 256, 939 DiskMB: 4 * 1024, 940 Networks: []*structs.NetworkResource{ 941 { 942 Device: "eth0", 943 IP: "192.168.0.100", 944 MBits: 300, 945 }, 946 }, 947 }), 948 }, 949 nodeReservedCapacity: reservedNodeResources, 950 nodeCapacity: defaultNodeResources, 951 jobPriority: 100, 952 resourceAsk: &structs.Resources{ 953 CPU: 300, 954 MemoryMB: 500, 955 DiskMB: 5 * 1024, 956 Networks: []*structs.NetworkResource{ 957 { 958 Device: "eth0", 959 IP: "192.168.0.100", 960 MBits: 320, 961 }, 962 }, 963 }, 964 currentPreemptions: []*structs.Allocation{ 965 createAlloc(allocIDs[4], lowPrioJob2, &structs.Resources{ 966 CPU: 200, 967 MemoryMB: 256, 968 DiskMB: 4 * 1024, 969 Networks: []*structs.NetworkResource{ 970 { 971 Device: "eth0", 972 IP: "192.168.0.100", 973 MBits: 300, 974 }, 975 }, 976 }), 977 }, 978 preemptedAllocIDs: map[string]struct{}{ 979 allocIDs[1]: {}, 980 }, 981 }, 982 { 983 desc: "Preemption with one device instance per alloc", 984 // Add allocations that use two device instances 985 currentAllocations: []*structs.Allocation{ 986 createAllocWithDevice(allocIDs[0], lowPrioJob, &structs.Resources{ 987 CPU: 500, 988 MemoryMB: 512, 989 DiskMB: 4 * 1024, 990 }, &structs.AllocatedDeviceResource{ 991 Type: "gpu", 992 Vendor: "nvidia", 993 Name: "1080ti", 994 DeviceIDs: []string{deviceIDs[0]}, 995 }), 996 createAllocWithDevice(allocIDs[1], lowPrioJob, &structs.Resources{ 997 CPU: 200, 998 MemoryMB: 512, 999 DiskMB: 4 * 1024, 1000 }, &structs.AllocatedDeviceResource{ 1001 Type: "gpu", 1002 Vendor: "nvidia", 1003 Name: "1080ti", 1004 DeviceIDs: []string{deviceIDs[1]}, 1005 })}, 1006 nodeReservedCapacity: reservedNodeResources, 1007 nodeCapacity: defaultNodeResources, 1008 jobPriority: 100, 1009 resourceAsk: &structs.Resources{ 1010 CPU: 1000, 1011 MemoryMB: 512, 1012 DiskMB: 4 * 1024, 1013 Devices: []*structs.RequestedDevice{ 1014 { 1015 Name: "nvidia/gpu/1080ti", 1016 Count: 4, 1017 }, 1018 }, 1019 }, 1020 preemptedAllocIDs: map[string]struct{}{ 1021 allocIDs[0]: {}, 1022 allocIDs[1]: {}, 1023 }, 1024 }, 1025 { 1026 desc: "Preemption multiple devices used", 1027 currentAllocations: []*structs.Allocation{ 1028 createAllocWithDevice(allocIDs[0], lowPrioJob, &structs.Resources{ 1029 CPU: 500, 1030 MemoryMB: 512, 1031 DiskMB: 4 * 1024, 1032 }, &structs.AllocatedDeviceResource{ 1033 Type: "gpu", 1034 Vendor: "nvidia", 1035 Name: "1080ti", 1036 DeviceIDs: []string{deviceIDs[0], deviceIDs[1], deviceIDs[2], deviceIDs[3]}, 1037 }), 1038 createAllocWithDevice(allocIDs[1], lowPrioJob, &structs.Resources{ 1039 CPU: 200, 1040 MemoryMB: 512, 1041 DiskMB: 4 * 1024, 1042 }, &structs.AllocatedDeviceResource{ 1043 Type: "fpga", 1044 Vendor: "intel", 1045 Name: "F100", 1046 DeviceIDs: []string{"fpga1"}, 1047 })}, 1048 nodeReservedCapacity: reservedNodeResources, 1049 nodeCapacity: defaultNodeResources, 1050 jobPriority: 100, 1051 resourceAsk: &structs.Resources{ 1052 CPU: 1000, 1053 MemoryMB: 512, 1054 DiskMB: 4 * 1024, 1055 Devices: []*structs.RequestedDevice{ 1056 { 1057 Name: "nvidia/gpu/1080ti", 1058 Count: 4, 1059 }, 1060 }, 1061 }, 1062 preemptedAllocIDs: map[string]struct{}{ 1063 allocIDs[0]: {}, 1064 }, 1065 }, 1066 { 1067 // This test cases creates allocations across two GPUs 1068 // Both GPUs are eligible for the task, but only allocs sharing the 1069 // same device should be chosen for preemption 1070 desc: "Preemption with allocs across multiple devices that match", 1071 currentAllocations: []*structs.Allocation{ 1072 createAllocWithDevice(allocIDs[0], lowPrioJob, &structs.Resources{ 1073 CPU: 500, 1074 MemoryMB: 512, 1075 DiskMB: 4 * 1024, 1076 }, &structs.AllocatedDeviceResource{ 1077 Type: "gpu", 1078 Vendor: "nvidia", 1079 Name: "1080ti", 1080 DeviceIDs: []string{deviceIDs[0], deviceIDs[1]}, 1081 }), 1082 createAllocWithDevice(allocIDs[1], highPrioJob, &structs.Resources{ 1083 CPU: 200, 1084 MemoryMB: 100, 1085 DiskMB: 4 * 1024, 1086 }, &structs.AllocatedDeviceResource{ 1087 Type: "gpu", 1088 Vendor: "nvidia", 1089 Name: "1080ti", 1090 DeviceIDs: []string{deviceIDs[2]}, 1091 }), 1092 createAllocWithDevice(allocIDs[2], lowPrioJob, &structs.Resources{ 1093 CPU: 200, 1094 MemoryMB: 256, 1095 DiskMB: 4 * 1024, 1096 }, &structs.AllocatedDeviceResource{ 1097 Type: "gpu", 1098 Vendor: "nvidia", 1099 Name: "2080ti", 1100 DeviceIDs: []string{deviceIDs[4], deviceIDs[5]}, 1101 }), 1102 createAllocWithDevice(allocIDs[3], lowPrioJob, &structs.Resources{ 1103 CPU: 100, 1104 MemoryMB: 256, 1105 DiskMB: 4 * 1024, 1106 }, &structs.AllocatedDeviceResource{ 1107 Type: "gpu", 1108 Vendor: "nvidia", 1109 Name: "2080ti", 1110 DeviceIDs: []string{deviceIDs[6], deviceIDs[7]}, 1111 }), 1112 createAllocWithDevice(allocIDs[4], lowPrioJob, &structs.Resources{ 1113 CPU: 200, 1114 MemoryMB: 512, 1115 DiskMB: 4 * 1024, 1116 }, &structs.AllocatedDeviceResource{ 1117 Type: "fpga", 1118 Vendor: "intel", 1119 Name: "F100", 1120 DeviceIDs: []string{"fpga1"}, 1121 })}, 1122 nodeReservedCapacity: reservedNodeResources, 1123 nodeCapacity: defaultNodeResources, 1124 jobPriority: 100, 1125 resourceAsk: &structs.Resources{ 1126 CPU: 1000, 1127 MemoryMB: 512, 1128 DiskMB: 4 * 1024, 1129 Devices: []*structs.RequestedDevice{ 1130 { 1131 Name: "gpu", 1132 Count: 4, 1133 }, 1134 }, 1135 }, 1136 preemptedAllocIDs: map[string]struct{}{ 1137 allocIDs[2]: {}, 1138 allocIDs[3]: {}, 1139 }, 1140 }, 1141 { 1142 // This test cases creates allocations across two GPUs 1143 // Both GPUs are eligible for the task, but only allocs with the lower 1144 // priority are chosen 1145 desc: "Preemption with lower/higher priority combinations", 1146 currentAllocations: []*structs.Allocation{ 1147 createAllocWithDevice(allocIDs[0], lowPrioJob, &structs.Resources{ 1148 CPU: 500, 1149 MemoryMB: 512, 1150 DiskMB: 4 * 1024, 1151 }, &structs.AllocatedDeviceResource{ 1152 Type: "gpu", 1153 Vendor: "nvidia", 1154 Name: "1080ti", 1155 DeviceIDs: []string{deviceIDs[0], deviceIDs[1]}, 1156 }), 1157 createAllocWithDevice(allocIDs[1], lowPrioJob2, &structs.Resources{ 1158 CPU: 200, 1159 MemoryMB: 100, 1160 DiskMB: 4 * 1024, 1161 }, &structs.AllocatedDeviceResource{ 1162 Type: "gpu", 1163 Vendor: "nvidia", 1164 Name: "1080ti", 1165 DeviceIDs: []string{deviceIDs[2], deviceIDs[3]}, 1166 }), 1167 createAllocWithDevice(allocIDs[2], lowPrioJob, &structs.Resources{ 1168 CPU: 200, 1169 MemoryMB: 256, 1170 DiskMB: 4 * 1024, 1171 }, &structs.AllocatedDeviceResource{ 1172 Type: "gpu", 1173 Vendor: "nvidia", 1174 Name: "2080ti", 1175 DeviceIDs: []string{deviceIDs[4], deviceIDs[5]}, 1176 }), 1177 createAllocWithDevice(allocIDs[3], lowPrioJob, &structs.Resources{ 1178 CPU: 100, 1179 MemoryMB: 256, 1180 DiskMB: 4 * 1024, 1181 }, &structs.AllocatedDeviceResource{ 1182 Type: "gpu", 1183 Vendor: "nvidia", 1184 Name: "2080ti", 1185 DeviceIDs: []string{deviceIDs[6], deviceIDs[7]}, 1186 }), 1187 createAllocWithDevice(allocIDs[4], lowPrioJob, &structs.Resources{ 1188 CPU: 100, 1189 MemoryMB: 256, 1190 DiskMB: 4 * 1024, 1191 }, &structs.AllocatedDeviceResource{ 1192 Type: "gpu", 1193 Vendor: "nvidia", 1194 Name: "2080ti", 1195 DeviceIDs: []string{deviceIDs[8]}, 1196 }), 1197 createAllocWithDevice(allocIDs[5], lowPrioJob, &structs.Resources{ 1198 CPU: 200, 1199 MemoryMB: 512, 1200 DiskMB: 4 * 1024, 1201 }, &structs.AllocatedDeviceResource{ 1202 Type: "fpga", 1203 Vendor: "intel", 1204 Name: "F100", 1205 DeviceIDs: []string{"fpga1"}, 1206 })}, 1207 nodeReservedCapacity: reservedNodeResources, 1208 nodeCapacity: defaultNodeResources, 1209 jobPriority: 100, 1210 resourceAsk: &structs.Resources{ 1211 CPU: 1000, 1212 MemoryMB: 512, 1213 DiskMB: 4 * 1024, 1214 Devices: []*structs.RequestedDevice{ 1215 { 1216 Name: "gpu", 1217 Count: 4, 1218 }, 1219 }, 1220 }, 1221 preemptedAllocIDs: map[string]struct{}{ 1222 allocIDs[2]: {}, 1223 allocIDs[3]: {}, 1224 }, 1225 }, 1226 { 1227 desc: "Device preemption not possible due to more instances needed than available", 1228 currentAllocations: []*structs.Allocation{ 1229 createAllocWithDevice(allocIDs[0], lowPrioJob, &structs.Resources{ 1230 CPU: 500, 1231 MemoryMB: 512, 1232 DiskMB: 4 * 1024, 1233 }, &structs.AllocatedDeviceResource{ 1234 Type: "gpu", 1235 Vendor: "nvidia", 1236 Name: "1080ti", 1237 DeviceIDs: []string{deviceIDs[0], deviceIDs[1], deviceIDs[2], deviceIDs[3]}, 1238 }), 1239 createAllocWithDevice(allocIDs[1], lowPrioJob, &structs.Resources{ 1240 CPU: 200, 1241 MemoryMB: 512, 1242 DiskMB: 4 * 1024, 1243 }, &structs.AllocatedDeviceResource{ 1244 Type: "fpga", 1245 Vendor: "intel", 1246 Name: "F100", 1247 DeviceIDs: []string{"fpga1"}, 1248 })}, 1249 nodeReservedCapacity: reservedNodeResources, 1250 nodeCapacity: defaultNodeResources, 1251 jobPriority: 100, 1252 resourceAsk: &structs.Resources{ 1253 CPU: 1000, 1254 MemoryMB: 512, 1255 DiskMB: 4 * 1024, 1256 Devices: []*structs.RequestedDevice{ 1257 { 1258 Name: "gpu", 1259 Count: 6, 1260 }, 1261 }, 1262 }, 1263 }, 1264 // This test case exercises the code path for a final filtering step that tries to 1265 // minimize the number of preemptible allocations 1266 { 1267 desc: "Filter out allocs whose resource usage superset is also in the preemption list", 1268 currentAllocations: []*structs.Allocation{ 1269 createAlloc(allocIDs[0], highPrioJob, &structs.Resources{ 1270 CPU: 1800, 1271 MemoryMB: 2256, 1272 DiskMB: 4 * 1024, 1273 Networks: []*structs.NetworkResource{ 1274 { 1275 Device: "eth0", 1276 IP: "192.168.0.100", 1277 MBits: 150, 1278 }, 1279 }, 1280 }), 1281 createAlloc(allocIDs[1], lowPrioJob, &structs.Resources{ 1282 CPU: 1500, 1283 MemoryMB: 256, 1284 DiskMB: 5 * 1024, 1285 Networks: []*structs.NetworkResource{ 1286 { 1287 Device: "eth0", 1288 IP: "192.168.0.100", 1289 MBits: 100, 1290 }, 1291 }, 1292 }), 1293 createAlloc(allocIDs[2], lowPrioJob, &structs.Resources{ 1294 CPU: 600, 1295 MemoryMB: 256, 1296 DiskMB: 5 * 1024, 1297 Networks: []*structs.NetworkResource{ 1298 { 1299 Device: "eth0", 1300 IP: "192.168.0.200", 1301 MBits: 300, 1302 }, 1303 }, 1304 }), 1305 }, 1306 nodeReservedCapacity: reservedNodeResources, 1307 nodeCapacity: defaultNodeResources, 1308 jobPriority: 100, 1309 resourceAsk: &structs.Resources{ 1310 CPU: 1000, 1311 MemoryMB: 256, 1312 DiskMB: 5 * 1024, 1313 Networks: []*structs.NetworkResource{ 1314 { 1315 Device: "eth0", 1316 IP: "192.168.0.100", 1317 MBits: 50, 1318 }, 1319 }, 1320 }, 1321 preemptedAllocIDs: map[string]struct{}{ 1322 allocIDs[1]: {}, 1323 }, 1324 }, 1325 } 1326 1327 for _, tc := range testCases { 1328 t.Run(tc.desc, func(t *testing.T) { 1329 node := mock.Node() 1330 node.NodeResources = tc.nodeCapacity 1331 node.ReservedResources = tc.nodeReservedCapacity 1332 1333 state, ctx := testContext(t) 1334 1335 nodes := []*RankedNode{ 1336 { 1337 Node: node, 1338 }, 1339 } 1340 state.UpsertNode(1000, node) 1341 for _, alloc := range tc.currentAllocations { 1342 alloc.NodeID = node.ID 1343 } 1344 require := require.New(t) 1345 err := state.UpsertAllocs(1001, tc.currentAllocations) 1346 1347 require.Nil(err) 1348 if tc.currentPreemptions != nil { 1349 ctx.plan.NodePreemptions[node.ID] = tc.currentPreemptions 1350 } 1351 static := NewStaticRankIterator(ctx, nodes) 1352 binPackIter := NewBinPackIterator(ctx, static, true, tc.jobPriority, structs.SchedulerAlgorithmBinpack) 1353 job := mock.Job() 1354 job.Priority = tc.jobPriority 1355 binPackIter.SetJob(job) 1356 1357 taskGroup := &structs.TaskGroup{ 1358 EphemeralDisk: &structs.EphemeralDisk{}, 1359 Tasks: []*structs.Task{ 1360 { 1361 Name: "web", 1362 Resources: tc.resourceAsk, 1363 }, 1364 }, 1365 } 1366 1367 binPackIter.SetTaskGroup(taskGroup) 1368 option := binPackIter.Next() 1369 if tc.preemptedAllocIDs == nil { 1370 require.Nil(option) 1371 } else { 1372 require.NotNil(option) 1373 preemptedAllocs := option.PreemptedAllocs 1374 require.Equal(len(tc.preemptedAllocIDs), len(preemptedAllocs)) 1375 for _, alloc := range preemptedAllocs { 1376 _, ok := tc.preemptedAllocIDs[alloc.ID] 1377 require.Truef(ok, "alloc %s was preempted unexpectedly", alloc.ID) 1378 } 1379 } 1380 }) 1381 } 1382 } 1383 1384 // helper method to create allocations with given jobs and resources 1385 func createAlloc(id string, job *structs.Job, resource *structs.Resources) *structs.Allocation { 1386 return createAllocInner(id, job, resource, nil, nil) 1387 } 1388 1389 // helper method to create allocation with network at the task group level 1390 func createAllocWithTaskgroupNetwork(id string, job *structs.Job, resource *structs.Resources, tgNet *structs.NetworkResource) *structs.Allocation { 1391 return createAllocInner(id, job, resource, nil, tgNet) 1392 } 1393 1394 func createAllocWithDevice(id string, job *structs.Job, resource *structs.Resources, allocatedDevices *structs.AllocatedDeviceResource) *structs.Allocation { 1395 return createAllocInner(id, job, resource, allocatedDevices, nil) 1396 } 1397 1398 func createAllocInner(id string, job *structs.Job, resource *structs.Resources, allocatedDevices *structs.AllocatedDeviceResource, tgNetwork *structs.NetworkResource) *structs.Allocation { 1399 alloc := &structs.Allocation{ 1400 ID: id, 1401 Job: job, 1402 JobID: job.ID, 1403 TaskResources: map[string]*structs.Resources{ 1404 "web": resource, 1405 }, 1406 Namespace: structs.DefaultNamespace, 1407 EvalID: uuid.Generate(), 1408 DesiredStatus: structs.AllocDesiredStatusRun, 1409 ClientStatus: structs.AllocClientStatusRunning, 1410 TaskGroup: "web", 1411 AllocatedResources: &structs.AllocatedResources{ 1412 Tasks: map[string]*structs.AllocatedTaskResources{ 1413 "web": { 1414 Cpu: structs.AllocatedCpuResources{ 1415 CpuShares: int64(resource.CPU), 1416 }, 1417 Memory: structs.AllocatedMemoryResources{ 1418 MemoryMB: int64(resource.MemoryMB), 1419 }, 1420 Networks: resource.Networks, 1421 }, 1422 }, 1423 }, 1424 } 1425 1426 if allocatedDevices != nil { 1427 alloc.AllocatedResources.Tasks["web"].Devices = []*structs.AllocatedDeviceResource{allocatedDevices} 1428 } 1429 1430 if tgNetwork != nil { 1431 alloc.AllocatedResources.Shared = structs.AllocatedSharedResources{ 1432 Networks: []*structs.NetworkResource{tgNetwork}, 1433 } 1434 } 1435 return alloc 1436 }