sigs.k8s.io/kueue@v0.6.2/pkg/scheduler/scheduler_test.go (about) 1 /* 2 Copyright 2022 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package scheduler 18 19 import ( 20 "context" 21 "errors" 22 "reflect" 23 "sort" 24 "sync" 25 "testing" 26 "time" 27 28 "github.com/google/go-cmp/cmp" 29 "github.com/google/go-cmp/cmp/cmpopts" 30 corev1 "k8s.io/api/core/v1" 31 "k8s.io/apimachinery/pkg/api/resource" 32 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 33 "k8s.io/apimachinery/pkg/runtime" 34 "k8s.io/apimachinery/pkg/types" 35 "k8s.io/apimachinery/pkg/util/sets" 36 "k8s.io/client-go/tools/record" 37 "k8s.io/utils/ptr" 38 "sigs.k8s.io/controller-runtime/pkg/client" 39 "sigs.k8s.io/controller-runtime/pkg/client/interceptor" 40 41 config "sigs.k8s.io/kueue/apis/config/v1beta1" 42 kueue "sigs.k8s.io/kueue/apis/kueue/v1beta1" 43 "sigs.k8s.io/kueue/pkg/cache" 44 "sigs.k8s.io/kueue/pkg/constants" 45 "sigs.k8s.io/kueue/pkg/features" 46 "sigs.k8s.io/kueue/pkg/queue" 47 "sigs.k8s.io/kueue/pkg/scheduler/flavorassigner" 48 "sigs.k8s.io/kueue/pkg/util/routine" 49 utiltesting "sigs.k8s.io/kueue/pkg/util/testing" 50 "sigs.k8s.io/kueue/pkg/workload" 51 ) 52 53 const ( 54 queueingTimeout = time.Second 55 ) 56 57 var cmpDump = []cmp.Option{ 58 cmpopts.SortSlices(func(a, b string) bool { return a < b }), 59 } 60 61 func TestSchedule(t *testing.T) { 62 now := time.Now() 63 resourceFlavors := []*kueue.ResourceFlavor{ 64 {ObjectMeta: metav1.ObjectMeta{Name: "default"}}, 65 {ObjectMeta: metav1.ObjectMeta{Name: "on-demand"}}, 66 {ObjectMeta: metav1.ObjectMeta{Name: "spot"}}, 67 {ObjectMeta: metav1.ObjectMeta{Name: "model-a"}}, 68 } 69 clusterQueues := []kueue.ClusterQueue{ 70 *utiltesting.MakeClusterQueue("sales"). 71 NamespaceSelector(&metav1.LabelSelector{ 72 MatchExpressions: []metav1.LabelSelectorRequirement{{ 73 Key: "dep", 74 Operator: metav1.LabelSelectorOpIn, 75 Values: []string{"sales"}, 76 }}, 77 }). 78 QueueingStrategy(kueue.StrictFIFO). 79 ResourceGroup(*utiltesting.MakeFlavorQuotas("default"). 80 Resource(corev1.ResourceCPU, "50", "0").Obj()). 81 Obj(), 82 *utiltesting.MakeClusterQueue("eng-alpha"). 83 Cohort("eng"). 84 NamespaceSelector(&metav1.LabelSelector{ 85 MatchExpressions: []metav1.LabelSelectorRequirement{{ 86 Key: "dep", 87 Operator: metav1.LabelSelectorOpIn, 88 Values: []string{"eng"}, 89 }}, 90 }). 91 QueueingStrategy(kueue.StrictFIFO). 92 ResourceGroup( 93 *utiltesting.MakeFlavorQuotas("on-demand"). 94 Resource(corev1.ResourceCPU, "50", "50").Obj(), 95 *utiltesting.MakeFlavorQuotas("spot"). 96 Resource(corev1.ResourceCPU, "100", "0").Obj(), 97 ). 98 Obj(), 99 *utiltesting.MakeClusterQueue("eng-beta"). 100 Cohort("eng"). 101 NamespaceSelector(&metav1.LabelSelector{ 102 MatchExpressions: []metav1.LabelSelectorRequirement{{ 103 Key: "dep", 104 Operator: metav1.LabelSelectorOpIn, 105 Values: []string{"eng"}, 106 }}, 107 }). 108 QueueingStrategy(kueue.StrictFIFO). 109 Preemption(kueue.ClusterQueuePreemption{ 110 ReclaimWithinCohort: kueue.PreemptionPolicyAny, 111 WithinClusterQueue: kueue.PreemptionPolicyLowerPriority, 112 }). 113 ResourceGroup( 114 *utiltesting.MakeFlavorQuotas("on-demand"). 115 Resource(corev1.ResourceCPU, "50", "10").Obj(), 116 *utiltesting.MakeFlavorQuotas("spot"). 117 Resource(corev1.ResourceCPU, "0", "100").Obj(), 118 ). 119 ResourceGroup( 120 *utiltesting.MakeFlavorQuotas("model-a"). 121 Resource("example.com/gpu", "20", "0").Obj(), 122 ). 123 Obj(), 124 *utiltesting.MakeClusterQueue("flavor-nonexistent-cq"). 125 QueueingStrategy(kueue.StrictFIFO). 126 ResourceGroup(*utiltesting.MakeFlavorQuotas("nonexistent-flavor"). 127 Resource(corev1.ResourceCPU, "50").Obj()). 128 Obj(), 129 *utiltesting.MakeClusterQueue("lend-a"). 130 Cohort("lend"). 131 NamespaceSelector(&metav1.LabelSelector{ 132 MatchExpressions: []metav1.LabelSelectorRequirement{{ 133 Key: "dep", 134 Operator: metav1.LabelSelectorOpIn, 135 Values: []string{"lend"}, 136 }}, 137 }). 138 ResourceGroup(*utiltesting.MakeFlavorQuotas("default"). 139 Resource(corev1.ResourceCPU, "3", "", "2").Obj()). 140 Obj(), 141 *utiltesting.MakeClusterQueue("lend-b"). 142 Cohort("lend"). 143 NamespaceSelector(&metav1.LabelSelector{ 144 MatchExpressions: []metav1.LabelSelectorRequirement{{ 145 Key: "dep", 146 Operator: metav1.LabelSelectorOpIn, 147 Values: []string{"lend"}, 148 }}, 149 }). 150 ResourceGroup(*utiltesting.MakeFlavorQuotas("default"). 151 Resource(corev1.ResourceCPU, "2", "", "2").Obj()). 152 Obj(), 153 } 154 queues := []kueue.LocalQueue{ 155 { 156 ObjectMeta: metav1.ObjectMeta{ 157 Namespace: "sales", 158 Name: "main", 159 }, 160 Spec: kueue.LocalQueueSpec{ 161 ClusterQueue: "sales", 162 }, 163 }, 164 { 165 ObjectMeta: metav1.ObjectMeta{ 166 Namespace: "sales", 167 Name: "blocked", 168 }, 169 Spec: kueue.LocalQueueSpec{ 170 ClusterQueue: "eng-alpha", 171 }, 172 }, 173 { 174 ObjectMeta: metav1.ObjectMeta{ 175 Namespace: "eng-alpha", 176 Name: "main", 177 }, 178 Spec: kueue.LocalQueueSpec{ 179 ClusterQueue: "eng-alpha", 180 }, 181 }, 182 { 183 ObjectMeta: metav1.ObjectMeta{ 184 Namespace: "eng-beta", 185 Name: "main", 186 }, 187 Spec: kueue.LocalQueueSpec{ 188 ClusterQueue: "eng-beta", 189 }, 190 }, 191 { 192 ObjectMeta: metav1.ObjectMeta{ 193 Namespace: "sales", 194 Name: "flavor-nonexistent-queue", 195 }, 196 Spec: kueue.LocalQueueSpec{ 197 ClusterQueue: "flavor-nonexistent-cq", 198 }, 199 }, 200 { 201 ObjectMeta: metav1.ObjectMeta{ 202 Namespace: "sales", 203 Name: "cq-nonexistent-queue", 204 }, 205 Spec: kueue.LocalQueueSpec{ 206 ClusterQueue: "nonexistent-cq", 207 }, 208 }, 209 { 210 ObjectMeta: metav1.ObjectMeta{ 211 Namespace: "lend", 212 Name: "lend-a-queue", 213 }, 214 Spec: kueue.LocalQueueSpec{ 215 ClusterQueue: "lend-a", 216 }, 217 }, 218 { 219 ObjectMeta: metav1.ObjectMeta{ 220 Namespace: "lend", 221 Name: "lend-b-queue", 222 }, 223 Spec: kueue.LocalQueueSpec{ 224 ClusterQueue: "lend-b", 225 }, 226 }, 227 } 228 cases := map[string]struct { 229 workloads []kueue.Workload 230 admissionError error 231 // wantAssignments is a summary of all the admissions in the cache after this cycle. 232 wantAssignments map[string]kueue.Admission 233 // wantScheduled is the subset of workloads that got scheduled/admitted in this cycle. 234 wantScheduled []string 235 // wantLeft is the workload keys that are left in the queues after this cycle. 236 wantLeft map[string][]string 237 // wantInadmissibleLeft is the workload keys that are left in the inadmissible state after this cycle. 238 wantInadmissibleLeft map[string][]string 239 // wantPreempted is the keys of the workloads that get preempted in the scheduling cycle. 240 wantPreempted sets.Set[string] 241 242 // additional*Queues can hold any extra queues needed by the tc 243 additionalClusterQueues []kueue.ClusterQueue 244 additionalLocalQueues []kueue.LocalQueue 245 246 // disable partial admission 247 disablePartialAdmission bool 248 249 // enable lending limit 250 enableLendingLimit bool 251 252 // ignored if empty, the Message is ignored (it contains the duration) 253 wantEvents []utiltesting.EventRecord 254 }{ 255 "workload fits in single clusterQueue, with check state ready": { 256 workloads: []kueue.Workload{ 257 *utiltesting.MakeWorkload("foo", "sales"). 258 Queue("main"). 259 PodSets(*utiltesting.MakePodSet("one", 10). 260 Request(corev1.ResourceCPU, "1"). 261 Obj()). 262 AdmissionCheck(kueue.AdmissionCheckState{ 263 Name: "check", 264 State: kueue.CheckStateReady, 265 }). 266 Obj(), 267 }, 268 wantAssignments: map[string]kueue.Admission{ 269 "sales/foo": { 270 ClusterQueue: "sales", 271 PodSetAssignments: []kueue.PodSetAssignment{ 272 { 273 Name: "one", 274 Flavors: map[corev1.ResourceName]kueue.ResourceFlavorReference{ 275 corev1.ResourceCPU: "default", 276 }, 277 ResourceUsage: corev1.ResourceList{ 278 corev1.ResourceCPU: resource.MustParse("10000m"), 279 }, 280 Count: ptr.To[int32](10), 281 }, 282 }, 283 }, 284 }, 285 wantScheduled: []string{"sales/foo"}, 286 wantEvents: []utiltesting.EventRecord{ 287 { 288 Key: types.NamespacedName{Namespace: "sales", Name: "foo"}, 289 Reason: "QuotaReserved", 290 EventType: corev1.EventTypeNormal, 291 }, 292 { 293 Key: types.NamespacedName{Namespace: "sales", Name: "foo"}, 294 Reason: "Admitted", 295 EventType: corev1.EventTypeNormal, 296 }, 297 }, 298 }, 299 "workload fits in single clusterQueue, with check state pending": { 300 workloads: []kueue.Workload{ 301 *utiltesting.MakeWorkload("foo", "sales"). 302 Queue("main"). 303 PodSets(*utiltesting.MakePodSet("one", 10). 304 Request(corev1.ResourceCPU, "1"). 305 Obj()). 306 AdmissionCheck(kueue.AdmissionCheckState{ 307 Name: "check", 308 State: kueue.CheckStatePending, 309 }). 310 Obj(), 311 }, 312 wantAssignments: map[string]kueue.Admission{ 313 "sales/foo": { 314 ClusterQueue: "sales", 315 PodSetAssignments: []kueue.PodSetAssignment{ 316 { 317 Name: "one", 318 Flavors: map[corev1.ResourceName]kueue.ResourceFlavorReference{ 319 corev1.ResourceCPU: "default", 320 }, 321 ResourceUsage: corev1.ResourceList{ 322 corev1.ResourceCPU: resource.MustParse("10000m"), 323 }, 324 Count: ptr.To[int32](10), 325 }, 326 }, 327 }, 328 }, 329 wantScheduled: []string{"sales/foo"}, 330 wantEvents: []utiltesting.EventRecord{ 331 { 332 Key: types.NamespacedName{Namespace: "sales", Name: "foo"}, 333 Reason: "QuotaReserved", 334 EventType: corev1.EventTypeNormal, 335 }, 336 }, 337 }, 338 "error during admission": { 339 workloads: []kueue.Workload{ 340 *utiltesting.MakeWorkload("foo", "sales"). 341 Queue("main"). 342 PodSets(*utiltesting.MakePodSet("one", 10). 343 Request(corev1.ResourceCPU, "1"). 344 Obj()). 345 Obj(), 346 }, 347 admissionError: errors.New("admission"), 348 wantLeft: map[string][]string{ 349 "sales": {"sales/foo"}, 350 }, 351 }, 352 "single clusterQueue full": { 353 workloads: []kueue.Workload{ 354 *utiltesting.MakeWorkload("new", "sales"). 355 Queue("main"). 356 PodSets(*utiltesting.MakePodSet("one", 11). 357 Request(corev1.ResourceCPU, "1"). 358 Obj()). 359 Obj(), 360 *utiltesting.MakeWorkload("assigned", "sales"). 361 PodSets(*utiltesting.MakePodSet("one", 40). 362 Request(corev1.ResourceCPU, "1"). 363 Obj()). 364 ReserveQuota(utiltesting.MakeAdmission("sales", "one").Assignment(corev1.ResourceCPU, "default", "40000m").AssignmentPodCount(40).Obj()). 365 Obj(), 366 }, 367 wantAssignments: map[string]kueue.Admission{ 368 "sales/assigned": { 369 ClusterQueue: "sales", 370 PodSetAssignments: []kueue.PodSetAssignment{ 371 { 372 Name: "one", 373 Flavors: map[corev1.ResourceName]kueue.ResourceFlavorReference{ 374 corev1.ResourceCPU: "default", 375 }, 376 ResourceUsage: corev1.ResourceList{ 377 corev1.ResourceCPU: resource.MustParse("40000m"), 378 }, 379 Count: ptr.To[int32](40), 380 }, 381 }, 382 }, 383 }, 384 wantLeft: map[string][]string{ 385 "sales": {"sales/new"}, 386 }, 387 }, 388 "failed to match clusterQueue selector": { 389 workloads: []kueue.Workload{ 390 *utiltesting.MakeWorkload("new", "sales"). 391 Queue("blocked"). 392 PodSets(*utiltesting.MakePodSet("one", 1). 393 Request(corev1.ResourceCPU, "1"). 394 Obj()). 395 Obj(), 396 }, 397 wantInadmissibleLeft: map[string][]string{ 398 "eng-alpha": {"sales/new"}, 399 }, 400 }, 401 "admit in different cohorts": { 402 workloads: []kueue.Workload{ 403 *utiltesting.MakeWorkload("new", "sales"). 404 Queue("main"). 405 PodSets(*utiltesting.MakePodSet("one", 1). 406 Request(corev1.ResourceCPU, "1"). 407 Obj()). 408 Obj(), 409 *utiltesting.MakeWorkload("new", "eng-alpha"). 410 Queue("main"). 411 PodSets(*utiltesting.MakePodSet("one", 51 /* Will borrow */). 412 Request(corev1.ResourceCPU, "1"). 413 Obj()). 414 Obj(), 415 }, 416 wantAssignments: map[string]kueue.Admission{ 417 "sales/new": { 418 ClusterQueue: "sales", 419 PodSetAssignments: []kueue.PodSetAssignment{ 420 { 421 Name: "one", 422 Flavors: map[corev1.ResourceName]kueue.ResourceFlavorReference{ 423 corev1.ResourceCPU: "default", 424 }, 425 ResourceUsage: corev1.ResourceList{ 426 corev1.ResourceCPU: resource.MustParse("1000m"), 427 }, 428 Count: ptr.To[int32](1), 429 }, 430 }, 431 }, 432 "eng-alpha/new": { 433 ClusterQueue: "eng-alpha", 434 PodSetAssignments: []kueue.PodSetAssignment{ 435 { 436 Name: "one", 437 Flavors: map[corev1.ResourceName]kueue.ResourceFlavorReference{ 438 corev1.ResourceCPU: "on-demand", 439 }, 440 ResourceUsage: corev1.ResourceList{ 441 corev1.ResourceCPU: resource.MustParse("51000m"), 442 }, 443 Count: ptr.To[int32](51), 444 }, 445 }, 446 }, 447 }, 448 wantScheduled: []string{"sales/new", "eng-alpha/new"}, 449 }, 450 "admit in same cohort with no borrowing": { 451 workloads: []kueue.Workload{ 452 *utiltesting.MakeWorkload("new", "eng-alpha"). 453 Queue("main"). 454 PodSets(*utiltesting.MakePodSet("one", 40). 455 Request(corev1.ResourceCPU, "1"). 456 Obj()). 457 Obj(), 458 *utiltesting.MakeWorkload("new", "eng-beta"). 459 Queue("main"). 460 PodSets(*utiltesting.MakePodSet("one", 40). 461 Request(corev1.ResourceCPU, "1"). 462 Obj()). 463 Obj(), 464 }, 465 wantAssignments: map[string]kueue.Admission{ 466 "eng-alpha/new": { 467 ClusterQueue: "eng-alpha", 468 PodSetAssignments: []kueue.PodSetAssignment{ 469 { 470 Name: "one", 471 Flavors: map[corev1.ResourceName]kueue.ResourceFlavorReference{ 472 corev1.ResourceCPU: "on-demand", 473 }, 474 ResourceUsage: corev1.ResourceList{ 475 corev1.ResourceCPU: resource.MustParse("40000m"), 476 }, 477 Count: ptr.To[int32](40), 478 }, 479 }, 480 }, 481 "eng-beta/new": { 482 ClusterQueue: "eng-beta", 483 PodSetAssignments: []kueue.PodSetAssignment{ 484 { 485 Name: "one", 486 Flavors: map[corev1.ResourceName]kueue.ResourceFlavorReference{ 487 corev1.ResourceCPU: "on-demand", 488 }, 489 ResourceUsage: corev1.ResourceList{ 490 corev1.ResourceCPU: resource.MustParse("40000m"), 491 }, 492 Count: ptr.To[int32](40), 493 }, 494 }, 495 }, 496 }, 497 wantScheduled: []string{"eng-alpha/new", "eng-beta/new"}, 498 }, 499 "assign multiple resources and flavors": { 500 workloads: []kueue.Workload{ 501 *utiltesting.MakeWorkload("new", "eng-beta"). 502 Queue("main"). 503 PodSets( 504 *utiltesting.MakePodSet("one", 10). 505 Request(corev1.ResourceCPU, "6"). 506 Request("example.com/gpu", "1"). 507 Obj(), 508 *utiltesting.MakePodSet("two", 40). 509 Request(corev1.ResourceCPU, "1"). 510 Obj(), 511 ). 512 Obj(), 513 }, 514 wantAssignments: map[string]kueue.Admission{ 515 "eng-beta/new": { 516 ClusterQueue: "eng-beta", 517 PodSetAssignments: []kueue.PodSetAssignment{ 518 { 519 Name: "one", 520 Flavors: map[corev1.ResourceName]kueue.ResourceFlavorReference{ 521 corev1.ResourceCPU: "on-demand", 522 "example.com/gpu": "model-a", 523 }, 524 ResourceUsage: corev1.ResourceList{ 525 corev1.ResourceCPU: resource.MustParse("60000m"), 526 "example.com/gpu": resource.MustParse("10"), 527 }, 528 Count: ptr.To[int32](10), 529 }, 530 { 531 Name: "two", 532 Flavors: map[corev1.ResourceName]kueue.ResourceFlavorReference{ 533 corev1.ResourceCPU: "spot", 534 }, 535 ResourceUsage: corev1.ResourceList{ 536 corev1.ResourceCPU: resource.MustParse("40000m"), 537 }, 538 Count: ptr.To[int32](40), 539 }, 540 }, 541 }, 542 }, 543 wantScheduled: []string{"eng-beta/new"}, 544 }, 545 "cannot borrow if cohort was assigned and would result in overadmission": { 546 workloads: []kueue.Workload{ 547 *utiltesting.MakeWorkload("new", "eng-alpha"). 548 Queue("main"). 549 PodSets(*utiltesting.MakePodSet("one", 45). 550 Request(corev1.ResourceCPU, "1"). 551 Obj()). 552 Obj(), 553 *utiltesting.MakeWorkload("new", "eng-beta"). 554 Queue("main"). 555 PodSets(*utiltesting.MakePodSet("one", 56). 556 Request(corev1.ResourceCPU, "1"). 557 Obj()). 558 Obj(), 559 }, 560 wantAssignments: map[string]kueue.Admission{ 561 "eng-alpha/new": { 562 ClusterQueue: "eng-alpha", 563 PodSetAssignments: []kueue.PodSetAssignment{ 564 { 565 Name: "one", 566 Flavors: map[corev1.ResourceName]kueue.ResourceFlavorReference{ 567 corev1.ResourceCPU: "on-demand", 568 }, 569 ResourceUsage: corev1.ResourceList{ 570 corev1.ResourceCPU: resource.MustParse("45000m"), 571 }, 572 Count: ptr.To[int32](45), 573 }, 574 }, 575 }, 576 }, 577 wantScheduled: []string{"eng-alpha/new"}, 578 wantLeft: map[string][]string{ 579 "eng-beta": {"eng-beta/new"}, 580 }, 581 }, 582 "can borrow if cohort was assigned and will not result in overadmission": { 583 workloads: []kueue.Workload{ 584 *utiltesting.MakeWorkload("new", "eng-alpha"). 585 Queue("main"). 586 PodSets(*utiltesting.MakePodSet("one", 45). 587 Request(corev1.ResourceCPU, "1"). 588 Obj()). 589 Obj(), 590 *utiltesting.MakeWorkload("new", "eng-beta"). 591 Queue("main"). 592 PodSets(*utiltesting.MakePodSet("one", 55). 593 Request(corev1.ResourceCPU, "1"). 594 Obj()). 595 Obj(), 596 }, 597 wantAssignments: map[string]kueue.Admission{ 598 "eng-alpha/new": { 599 ClusterQueue: "eng-alpha", 600 PodSetAssignments: []kueue.PodSetAssignment{ 601 { 602 Name: "one", 603 Flavors: map[corev1.ResourceName]kueue.ResourceFlavorReference{ 604 corev1.ResourceCPU: "on-demand", 605 }, 606 ResourceUsage: corev1.ResourceList{ 607 corev1.ResourceCPU: resource.MustParse("45000m"), 608 }, 609 Count: ptr.To[int32](45), 610 }, 611 }, 612 }, 613 "eng-beta/new": { 614 ClusterQueue: "eng-beta", 615 PodSetAssignments: []kueue.PodSetAssignment{ 616 { 617 Name: "one", 618 Flavors: map[corev1.ResourceName]kueue.ResourceFlavorReference{ 619 corev1.ResourceCPU: "on-demand", 620 }, 621 ResourceUsage: corev1.ResourceList{ 622 corev1.ResourceCPU: resource.MustParse("55000m"), 623 }, 624 Count: ptr.To[int32](55), 625 }, 626 }, 627 }, 628 }, 629 wantScheduled: []string{"eng-alpha/new", "eng-beta/new"}, 630 }, 631 "can borrow if needs reclaim from cohort in different flavor": { 632 workloads: []kueue.Workload{ 633 *utiltesting.MakeWorkload("can-reclaim", "eng-alpha"). 634 Queue("main"). 635 Request(corev1.ResourceCPU, "100"). 636 Obj(), 637 *utiltesting.MakeWorkload("needs-to-borrow", "eng-beta"). 638 Queue("main"). 639 Request(corev1.ResourceCPU, "1"). 640 Obj(), 641 *utiltesting.MakeWorkload("user-on-demand", "eng-beta"). 642 Request(corev1.ResourceCPU, "50"). 643 ReserveQuota(utiltesting.MakeAdmission("eng-beta").Assignment(corev1.ResourceCPU, "on-demand", "50000m").Obj()). 644 Obj(), 645 *utiltesting.MakeWorkload("user-spot", "eng-beta"). 646 Request(corev1.ResourceCPU, "1"). 647 ReserveQuota(utiltesting.MakeAdmission("eng-beta").Assignment(corev1.ResourceCPU, "spot", "1000m").Obj()). 648 Obj(), 649 }, 650 wantLeft: map[string][]string{ 651 "eng-alpha": {"eng-alpha/can-reclaim"}, 652 }, 653 wantAssignments: map[string]kueue.Admission{ 654 "eng-beta/user-spot": *utiltesting.MakeAdmission("eng-beta").Assignment(corev1.ResourceCPU, "spot", "1000m").Obj(), 655 "eng-beta/user-on-demand": *utiltesting.MakeAdmission("eng-beta").Assignment(corev1.ResourceCPU, "on-demand", "50000m").Obj(), 656 "eng-beta/needs-to-borrow": *utiltesting.MakeAdmission("eng-beta").Assignment(corev1.ResourceCPU, "on-demand", "1000m").Obj(), 657 }, 658 wantScheduled: []string{ 659 "eng-beta/needs-to-borrow", 660 }, 661 }, 662 "workload exceeds lending limit when borrow in cohort": { 663 workloads: []kueue.Workload{ 664 *utiltesting.MakeWorkload("a", "lend"). 665 Request(corev1.ResourceCPU, "2"). 666 ReserveQuota(utiltesting.MakeAdmission("lend-b").Assignment(corev1.ResourceCPU, "default", "2000m").Obj()). 667 Obj(), 668 *utiltesting.MakeWorkload("b", "lend"). 669 Queue("lend-b-queue"). 670 Request(corev1.ResourceCPU, "3"). 671 Obj(), 672 }, 673 wantAssignments: map[string]kueue.Admission{ 674 "lend/a": *utiltesting.MakeAdmission("lend-b").Assignment(corev1.ResourceCPU, "default", "2000m").Obj(), 675 }, 676 wantInadmissibleLeft: map[string][]string{ 677 "lend-b": {"lend/b"}, 678 }, 679 enableLendingLimit: true, 680 }, 681 "preempt workloads in ClusterQueue and cohort": { 682 workloads: []kueue.Workload{ 683 *utiltesting.MakeWorkload("preemptor", "eng-beta"). 684 Queue("main"). 685 Request(corev1.ResourceCPU, "20"). 686 Obj(), 687 *utiltesting.MakeWorkload("use-all-spot", "eng-alpha"). 688 Request(corev1.ResourceCPU, "100"). 689 ReserveQuota(utiltesting.MakeAdmission("eng-alpha").Assignment(corev1.ResourceCPU, "spot", "100000m").Obj()). 690 Obj(), 691 *utiltesting.MakeWorkload("low-1", "eng-beta"). 692 Priority(-1). 693 Request(corev1.ResourceCPU, "30"). 694 ReserveQuota(utiltesting.MakeAdmission("eng-beta").Assignment(corev1.ResourceCPU, "on-demand", "30000m").Obj()). 695 Obj(), 696 *utiltesting.MakeWorkload("low-2", "eng-beta"). 697 Priority(-2). 698 Request(corev1.ResourceCPU, "10"). 699 ReserveQuota(utiltesting.MakeAdmission("eng-beta").Assignment(corev1.ResourceCPU, "on-demand", "10000m").Obj()). 700 Obj(), 701 *utiltesting.MakeWorkload("borrower", "eng-alpha"). 702 Request(corev1.ResourceCPU, "60"). 703 ReserveQuota(utiltesting.MakeAdmission("eng-alpha").Assignment(corev1.ResourceCPU, "on-demand", "60000m").Obj()). 704 Obj(), 705 }, 706 wantLeft: map[string][]string{ 707 // Preemptor is not admitted in this cycle. 708 "eng-beta": {"eng-beta/preemptor"}, 709 }, 710 wantPreempted: sets.New("eng-alpha/borrower", "eng-beta/low-2"), 711 wantAssignments: map[string]kueue.Admission{ 712 "eng-alpha/use-all-spot": *utiltesting.MakeAdmission("eng-alpha").Assignment(corev1.ResourceCPU, "spot", "100").Obj(), 713 "eng-beta/low-1": *utiltesting.MakeAdmission("eng-beta").Assignment(corev1.ResourceCPU, "on-demand", "30").Obj(), 714 // Removal from cache for the preempted workloads is deferred until we receive Workload updates 715 "eng-beta/low-2": *utiltesting.MakeAdmission("eng-beta").Assignment(corev1.ResourceCPU, "on-demand", "10").Obj(), 716 "eng-alpha/borrower": *utiltesting.MakeAdmission("eng-alpha").Assignment(corev1.ResourceCPU, "on-demand", "60").Obj(), 717 }, 718 }, 719 "multiple CQs need preemption": { 720 additionalClusterQueues: []kueue.ClusterQueue{ 721 *utiltesting.MakeClusterQueue("other-alpha"). 722 Cohort("other"). 723 ResourceGroup( 724 *utiltesting.MakeFlavorQuotas("on-demand"). 725 Resource(corev1.ResourceCPU, "50", "50").Obj(), 726 ). 727 Obj(), 728 *utiltesting.MakeClusterQueue("other-beta"). 729 Cohort("other"). 730 Preemption(kueue.ClusterQueuePreemption{ 731 ReclaimWithinCohort: kueue.PreemptionPolicyAny, 732 WithinClusterQueue: kueue.PreemptionPolicyLowerPriority, 733 }). 734 ResourceGroup( 735 *utiltesting.MakeFlavorQuotas("on-demand"). 736 Resource(corev1.ResourceCPU, "50", "10").Obj(), 737 ). 738 Obj(), 739 }, 740 additionalLocalQueues: []kueue.LocalQueue{ 741 *utiltesting.MakeLocalQueue("other", "eng-alpha").ClusterQueue("other-alpha").Obj(), 742 *utiltesting.MakeLocalQueue("other", "eng-beta").ClusterQueue("other-beta").Obj(), 743 }, 744 workloads: []kueue.Workload{ 745 *utiltesting.MakeWorkload("preemptor", "eng-beta"). 746 Priority(-1). 747 Queue("other"). 748 Request(corev1.ResourceCPU, "1"). 749 Obj(), 750 *utiltesting.MakeWorkload("pending", "eng-alpha"). 751 Priority(1). 752 Queue("other"). 753 Request(corev1.ResourceCPU, "1"). 754 Obj(), 755 *utiltesting.MakeWorkload("use-all", "eng-alpha"). 756 Request(corev1.ResourceCPU, "100"). 757 ReserveQuota(utiltesting.MakeAdmission("other-alpha").Assignment(corev1.ResourceCPU, "on-demand", "100").Obj()). 758 Obj(), 759 }, 760 wantLeft: map[string][]string{ 761 // Preemptor is not admitted in this cycle. 762 "other-beta": {"eng-beta/preemptor"}, 763 }, 764 wantInadmissibleLeft: map[string][]string{ 765 "other-alpha": {"eng-alpha/pending"}, 766 }, 767 wantPreempted: sets.New("eng-alpha/use-all"), 768 wantAssignments: map[string]kueue.Admission{ 769 // Removal from cache for the preempted workloads is deferred until we receive Workload updates 770 "eng-alpha/use-all": *utiltesting.MakeAdmission("other-alpha").Assignment(corev1.ResourceCPU, "on-demand", "100").Obj(), 771 }, 772 }, 773 "cannot borrow resource not listed in clusterQueue": { 774 workloads: []kueue.Workload{ 775 *utiltesting.MakeWorkload("new", "eng-alpha"). 776 Queue("main"). 777 Request("example.com/gpu", "1"). 778 Obj(), 779 }, 780 wantLeft: map[string][]string{ 781 "eng-alpha": {"eng-alpha/new"}, 782 }, 783 }, 784 "not enough resources to borrow, fallback to next flavor": { 785 workloads: []kueue.Workload{ 786 *utiltesting.MakeWorkload("new", "eng-alpha"). 787 Queue("main"). 788 PodSets(*utiltesting.MakePodSet("one", 60). 789 Request(corev1.ResourceCPU, "1"). 790 Obj()). 791 Obj(), 792 *utiltesting.MakeWorkload("existing", "eng-beta"). 793 PodSets(*utiltesting.MakePodSet("one", 45). 794 Request(corev1.ResourceCPU, "1"). 795 Obj()). 796 ReserveQuota(utiltesting.MakeAdmission("eng-beta", "one").Assignment(corev1.ResourceCPU, "on-demand", "45000m").AssignmentPodCount(45).Obj()). 797 Obj(), 798 }, 799 wantAssignments: map[string]kueue.Admission{ 800 "eng-alpha/new": { 801 ClusterQueue: "eng-alpha", 802 PodSetAssignments: []kueue.PodSetAssignment{ 803 { 804 Name: "one", 805 Flavors: map[corev1.ResourceName]kueue.ResourceFlavorReference{ 806 corev1.ResourceCPU: "spot", 807 }, 808 ResourceUsage: corev1.ResourceList{ 809 corev1.ResourceCPU: resource.MustParse("60000m"), 810 }, 811 Count: ptr.To[int32](60), 812 }, 813 }, 814 }, 815 "eng-beta/existing": { 816 ClusterQueue: "eng-beta", 817 PodSetAssignments: []kueue.PodSetAssignment{ 818 { 819 Name: "one", 820 Flavors: map[corev1.ResourceName]kueue.ResourceFlavorReference{ 821 corev1.ResourceCPU: "on-demand", 822 }, 823 ResourceUsage: corev1.ResourceList{ 824 corev1.ResourceCPU: resource.MustParse("45000m"), 825 }, 826 Count: ptr.To[int32](45), 827 }, 828 }, 829 }, 830 }, 831 wantScheduled: []string{"eng-alpha/new"}, 832 }, 833 "workload should not fit in nonexistent clusterQueue": { 834 workloads: []kueue.Workload{ 835 *utiltesting.MakeWorkload("foo", "sales"). 836 Queue("cq-nonexistent-queue"). 837 Request(corev1.ResourceCPU, "1"). 838 Obj(), 839 }, 840 }, 841 "workload should not fit in clusterQueue with nonexistent flavor": { 842 workloads: []kueue.Workload{ 843 *utiltesting.MakeWorkload("foo", "sales"). 844 Queue("flavor-nonexistent-queue"). 845 Request(corev1.ResourceCPU, "1"). 846 Obj(), 847 }, 848 wantLeft: map[string][]string{ 849 "flavor-nonexistent-cq": {"sales/foo"}, 850 }, 851 }, 852 "no overadmission while borrowing": { 853 workloads: []kueue.Workload{ 854 *utiltesting.MakeWorkload("new", "eng-beta"). 855 Queue("main"). 856 Creation(now.Add(-2 * time.Second)). 857 PodSets(*utiltesting.MakePodSet("one", 50). 858 Request(corev1.ResourceCPU, "1"). 859 Obj()). 860 Obj(), 861 *utiltesting.MakeWorkload("new-alpha", "eng-alpha"). 862 Queue("main"). 863 Creation(now.Add(-time.Second)). 864 PodSets(*utiltesting.MakePodSet("one", 1). 865 Request(corev1.ResourceCPU, "1"). 866 Obj()). 867 Obj(), 868 *utiltesting.MakeWorkload("new-gamma", "eng-gamma"). 869 Queue("main"). 870 Creation(now). 871 PodSets(*utiltesting.MakePodSet("one", 50). 872 Request(corev1.ResourceCPU, "1"). 873 Obj()). 874 Obj(), 875 *utiltesting.MakeWorkload("existing", "eng-gamma"). 876 PodSets( 877 *utiltesting.MakePodSet("borrow-on-demand", 51). 878 Request(corev1.ResourceCPU, "1"). 879 Obj(), 880 *utiltesting.MakePodSet("use-all-spot", 100). 881 Request(corev1.ResourceCPU, "1"). 882 Obj(), 883 ). 884 ReserveQuota(utiltesting.MakeAdmission("eng-gamma"). 885 PodSets( 886 kueue.PodSetAssignment{ 887 Name: "borrow-on-demand", 888 Flavors: map[corev1.ResourceName]kueue.ResourceFlavorReference{ 889 corev1.ResourceCPU: "on-demand", 890 }, 891 ResourceUsage: corev1.ResourceList{ 892 corev1.ResourceCPU: resource.MustParse("51"), 893 }, 894 Count: ptr.To[int32](51), 895 }, 896 kueue.PodSetAssignment{ 897 Name: "use-all-spot", 898 Flavors: map[corev1.ResourceName]kueue.ResourceFlavorReference{ 899 corev1.ResourceCPU: "spot", 900 }, 901 ResourceUsage: corev1.ResourceList{ 902 corev1.ResourceCPU: resource.MustParse("100"), 903 }, 904 Count: ptr.To[int32](100), 905 }, 906 ). 907 Obj()). 908 Obj(), 909 }, 910 additionalClusterQueues: []kueue.ClusterQueue{ 911 *utiltesting.MakeClusterQueue("eng-gamma"). 912 Cohort("eng"). 913 Preemption(kueue.ClusterQueuePreemption{ 914 ReclaimWithinCohort: kueue.PreemptionPolicyAny, 915 WithinClusterQueue: kueue.PreemptionPolicyLowerPriority, 916 }). 917 ResourceGroup( 918 *utiltesting.MakeFlavorQuotas("on-demand"). 919 Resource(corev1.ResourceCPU, "50", "10").Obj(), 920 *utiltesting.MakeFlavorQuotas("spot"). 921 Resource(corev1.ResourceCPU, "0", "100").Obj(), 922 ). 923 Obj(), 924 }, 925 additionalLocalQueues: []kueue.LocalQueue{ 926 { 927 ObjectMeta: metav1.ObjectMeta{ 928 Namespace: "eng-gamma", 929 Name: "main", 930 }, 931 Spec: kueue.LocalQueueSpec{ 932 ClusterQueue: "eng-gamma", 933 }, 934 }, 935 }, 936 wantAssignments: map[string]kueue.Admission{ 937 "eng-gamma/existing": *utiltesting.MakeAdmission("eng-gamma"). 938 PodSets( 939 kueue.PodSetAssignment{ 940 Name: "borrow-on-demand", 941 Flavors: map[corev1.ResourceName]kueue.ResourceFlavorReference{ 942 corev1.ResourceCPU: "on-demand", 943 }, 944 ResourceUsage: corev1.ResourceList{ 945 corev1.ResourceCPU: resource.MustParse("51"), 946 }, 947 Count: ptr.To[int32](51), 948 }, 949 kueue.PodSetAssignment{ 950 Name: "use-all-spot", 951 Flavors: map[corev1.ResourceName]kueue.ResourceFlavorReference{ 952 corev1.ResourceCPU: "spot", 953 }, 954 ResourceUsage: corev1.ResourceList{ 955 corev1.ResourceCPU: resource.MustParse("100"), 956 }, 957 Count: ptr.To[int32](100), 958 }, 959 ).Obj(), 960 "eng-beta/new": *utiltesting.MakeAdmission("eng-beta", "one").Assignment(corev1.ResourceCPU, "on-demand", "50").AssignmentPodCount(50).Obj(), 961 "eng-alpha/new-alpha": *utiltesting.MakeAdmission("eng-alpha", "one").Assignment(corev1.ResourceCPU, "on-demand", "1").AssignmentPodCount(1).Obj(), 962 }, 963 wantScheduled: []string{"eng-beta/new", "eng-alpha/new-alpha"}, 964 wantLeft: map[string][]string{ 965 "eng-gamma": {"eng-gamma/new-gamma"}, 966 }, 967 }, 968 "partial admission single variable pod set": { 969 workloads: []kueue.Workload{ 970 *utiltesting.MakeWorkload("new", "sales"). 971 Queue("main"). 972 PodSets(*utiltesting.MakePodSet("one", 50). 973 SetMinimumCount(20). 974 Request(corev1.ResourceCPU, "2"). 975 Obj()). 976 Obj(), 977 }, 978 wantAssignments: map[string]kueue.Admission{ 979 "sales/new": { 980 ClusterQueue: "sales", 981 PodSetAssignments: []kueue.PodSetAssignment{ 982 { 983 Name: "one", 984 Flavors: map[corev1.ResourceName]kueue.ResourceFlavorReference{ 985 corev1.ResourceCPU: "default", 986 }, 987 ResourceUsage: corev1.ResourceList{ 988 corev1.ResourceCPU: resource.MustParse("50000m"), 989 }, 990 Count: ptr.To[int32](25), 991 }, 992 }, 993 }, 994 }, 995 wantScheduled: []string{"sales/new"}, 996 }, 997 "partial admission single variable pod set, preempt first": { 998 workloads: []kueue.Workload{ 999 *utiltesting.MakeWorkload("new", "eng-beta"). 1000 Queue("main"). 1001 Priority(4). 1002 PodSets(*utiltesting.MakePodSet("one", 20). 1003 SetMinimumCount(10). 1004 Request("example.com/gpu", "1"). 1005 Obj()). 1006 Obj(), 1007 *utiltesting.MakeWorkload("old", "eng-beta"). 1008 Priority(-4). 1009 PodSets(*utiltesting.MakePodSet("one", 10). 1010 Request("example.com/gpu", "1"). 1011 Obj()). 1012 ReserveQuota(utiltesting.MakeAdmission("eng-beta", "one").Assignment("example.com/gpu", "model-a", "10").AssignmentPodCount(10).Obj()). 1013 Obj(), 1014 }, 1015 wantAssignments: map[string]kueue.Admission{ 1016 "eng-beta/old": { 1017 ClusterQueue: "eng-beta", 1018 PodSetAssignments: []kueue.PodSetAssignment{ 1019 { 1020 Name: "one", 1021 Flavors: map[corev1.ResourceName]kueue.ResourceFlavorReference{ 1022 "example.com/gpu": "model-a", 1023 }, 1024 ResourceUsage: corev1.ResourceList{ 1025 "example.com/gpu": resource.MustParse("10"), 1026 }, 1027 Count: ptr.To[int32](10), 1028 }, 1029 }, 1030 }, 1031 }, 1032 wantPreempted: sets.New("eng-beta/old"), 1033 wantLeft: map[string][]string{ 1034 "eng-beta": {"eng-beta/new"}, 1035 }, 1036 }, 1037 "partial admission multiple variable pod sets": { 1038 workloads: []kueue.Workload{ 1039 *utiltesting.MakeWorkload("new", "sales"). 1040 Queue("main"). 1041 PodSets( 1042 *utiltesting.MakePodSet("one", 20). 1043 Request(corev1.ResourceCPU, "1"). 1044 Obj(), 1045 *utiltesting.MakePodSet("two", 30). 1046 SetMinimumCount(10). 1047 Request(corev1.ResourceCPU, "1"). 1048 Obj(), 1049 *utiltesting.MakePodSet("three", 15). 1050 SetMinimumCount(5). 1051 Request(corev1.ResourceCPU, "1"). 1052 Obj(), 1053 ). 1054 Obj(), 1055 }, 1056 wantAssignments: map[string]kueue.Admission{ 1057 "sales/new": { 1058 ClusterQueue: "sales", 1059 PodSetAssignments: []kueue.PodSetAssignment{ 1060 { 1061 Name: "one", 1062 Flavors: map[corev1.ResourceName]kueue.ResourceFlavorReference{ 1063 corev1.ResourceCPU: "default", 1064 }, 1065 ResourceUsage: corev1.ResourceList{ 1066 corev1.ResourceCPU: resource.MustParse("20000m"), 1067 }, 1068 Count: ptr.To[int32](20), 1069 }, 1070 { 1071 Name: "two", 1072 Flavors: map[corev1.ResourceName]kueue.ResourceFlavorReference{ 1073 corev1.ResourceCPU: "default", 1074 }, 1075 ResourceUsage: corev1.ResourceList{ 1076 corev1.ResourceCPU: resource.MustParse("20000m"), 1077 }, 1078 Count: ptr.To[int32](20), 1079 }, 1080 { 1081 Name: "three", 1082 Flavors: map[corev1.ResourceName]kueue.ResourceFlavorReference{ 1083 corev1.ResourceCPU: "default", 1084 }, 1085 ResourceUsage: corev1.ResourceList{ 1086 corev1.ResourceCPU: resource.MustParse("10000m"), 1087 }, 1088 Count: ptr.To[int32](10), 1089 }, 1090 }, 1091 }, 1092 }, 1093 wantScheduled: []string{"sales/new"}, 1094 }, 1095 "partial admission disabled, multiple variable pod sets": { 1096 workloads: []kueue.Workload{ 1097 *utiltesting.MakeWorkload("new", "sales"). 1098 Queue("main"). 1099 PodSets( 1100 *utiltesting.MakePodSet("one", 20). 1101 Request(corev1.ResourceCPU, "1"). 1102 Obj(), 1103 *utiltesting.MakePodSet("two", 30). 1104 SetMinimumCount(10). 1105 Request(corev1.ResourceCPU, "1"). 1106 Obj(), 1107 *utiltesting.MakePodSet("three", 15). 1108 SetMinimumCount(5). 1109 Request(corev1.ResourceCPU, "1"). 1110 Obj(), 1111 ). 1112 Obj(), 1113 }, 1114 wantLeft: map[string][]string{ 1115 "sales": {"sales/new"}, 1116 }, 1117 disablePartialAdmission: true, 1118 }, 1119 "two workloads can borrow different resources from the same flavor in the same cycle": { 1120 additionalClusterQueues: func() []kueue.ClusterQueue { 1121 preemption := kueue.ClusterQueuePreemption{ 1122 ReclaimWithinCohort: kueue.PreemptionPolicyAny, 1123 WithinClusterQueue: kueue.PreemptionPolicyLowerPriority, 1124 } 1125 rg := *utiltesting.MakeFlavorQuotas("default").Resource("r1", "10", "10").Resource("r2", "10", "10").Obj() 1126 cq1 := *utiltesting.MakeClusterQueue("cq1").Cohort("co").Preemption(preemption).ResourceGroup(rg).Obj() 1127 cq2 := *utiltesting.MakeClusterQueue("cq2").Cohort("co").Preemption(preemption).ResourceGroup(rg).Obj() 1128 cq3 := *utiltesting.MakeClusterQueue("cq3").Cohort("co").Preemption(preemption).ResourceGroup(rg).Obj() 1129 return []kueue.ClusterQueue{cq1, cq2, cq3} 1130 }(), 1131 additionalLocalQueues: []kueue.LocalQueue{ 1132 *utiltesting.MakeLocalQueue("lq1", "sales").ClusterQueue("cq1").Obj(), 1133 *utiltesting.MakeLocalQueue("lq2", "sales").ClusterQueue("cq2").Obj(), 1134 *utiltesting.MakeLocalQueue("lq3", "sales").ClusterQueue("cq3").Obj(), 1135 }, 1136 workloads: []kueue.Workload{ 1137 *utiltesting.MakeWorkload("wl1", "sales").Queue("lq1").Priority(-1).PodSets( 1138 *utiltesting.MakePodSet("main", 1).Request("r1", "16").Obj(), 1139 ).Obj(), 1140 *utiltesting.MakeWorkload("wl2", "sales").Queue("lq2").Priority(-2).PodSets( 1141 *utiltesting.MakePodSet("main", 1).Request("r2", "16").Obj(), 1142 ).Obj(), 1143 }, 1144 wantScheduled: []string{"sales/wl1", "sales/wl2"}, 1145 wantAssignments: map[string]kueue.Admission{ 1146 "sales/wl1": *utiltesting.MakeAdmission("cq1", "main"). 1147 Assignment("r1", "default", "16").AssignmentPodCount(1). 1148 Obj(), 1149 "sales/wl2": *utiltesting.MakeAdmission("cq2", "main"). 1150 Assignment("r2", "default", "16").AssignmentPodCount(1). 1151 Obj(), 1152 }, 1153 }, 1154 "two workloads can borrow the same resources from the same flavor in the same cycle if fits in the cohort quota": { 1155 additionalClusterQueues: func() []kueue.ClusterQueue { 1156 preemption := kueue.ClusterQueuePreemption{ 1157 ReclaimWithinCohort: kueue.PreemptionPolicyAny, 1158 WithinClusterQueue: kueue.PreemptionPolicyLowerPriority, 1159 } 1160 rg := *utiltesting.MakeFlavorQuotas("default").Resource("r1", "10", "10").Resource("r2", "10", "10").Obj() 1161 cq1 := *utiltesting.MakeClusterQueue("cq1").Cohort("co").Preemption(preemption).ResourceGroup(rg).Obj() 1162 cq2 := *utiltesting.MakeClusterQueue("cq2").Cohort("co").Preemption(preemption).ResourceGroup(rg).Obj() 1163 cq3 := *utiltesting.MakeClusterQueue("cq3").Cohort("co").Preemption(preemption).ResourceGroup(rg).Obj() 1164 return []kueue.ClusterQueue{cq1, cq2, cq3} 1165 }(), 1166 additionalLocalQueues: []kueue.LocalQueue{ 1167 *utiltesting.MakeLocalQueue("lq1", "sales").ClusterQueue("cq1").Obj(), 1168 *utiltesting.MakeLocalQueue("lq2", "sales").ClusterQueue("cq2").Obj(), 1169 *utiltesting.MakeLocalQueue("lq3", "sales").ClusterQueue("cq3").Obj(), 1170 }, 1171 workloads: []kueue.Workload{ 1172 *utiltesting.MakeWorkload("wl1", "sales").Queue("lq1").Priority(-1).PodSets( 1173 *utiltesting.MakePodSet("main", 1).Request("r1", "16").Obj(), 1174 ).Obj(), 1175 *utiltesting.MakeWorkload("wl2", "sales").Queue("lq2").Priority(-2).PodSets( 1176 *utiltesting.MakePodSet("main", 1).Request("r1", "14").Obj(), 1177 ).Obj(), 1178 }, 1179 wantScheduled: []string{"sales/wl1", "sales/wl2"}, 1180 wantAssignments: map[string]kueue.Admission{ 1181 "sales/wl1": *utiltesting.MakeAdmission("cq1", "main"). 1182 Assignment("r1", "default", "16").AssignmentPodCount(1). 1183 Obj(), 1184 "sales/wl2": *utiltesting.MakeAdmission("cq2", "main"). 1185 Assignment("r1", "default", "14").AssignmentPodCount(1). 1186 Obj(), 1187 }, 1188 }, 1189 "only one workload can borrow one resources from the same flavor in the same cycle if cohort quota cannot fit": { 1190 additionalClusterQueues: func() []kueue.ClusterQueue { 1191 preemption := kueue.ClusterQueuePreemption{ 1192 ReclaimWithinCohort: kueue.PreemptionPolicyAny, 1193 WithinClusterQueue: kueue.PreemptionPolicyLowerPriority, 1194 } 1195 rg := *utiltesting.MakeFlavorQuotas("default").Resource("r1", "10", "10").Resource("r2", "10", "10").Obj() 1196 cq1 := *utiltesting.MakeClusterQueue("cq1").Cohort("co").Preemption(preemption).ResourceGroup(rg).Obj() 1197 cq2 := *utiltesting.MakeClusterQueue("cq2").Cohort("co").Preemption(preemption).ResourceGroup(rg).Obj() 1198 cq3 := *utiltesting.MakeClusterQueue("cq3").Cohort("co").Preemption(preemption).ResourceGroup(rg).Obj() 1199 return []kueue.ClusterQueue{cq1, cq2, cq3} 1200 }(), 1201 additionalLocalQueues: []kueue.LocalQueue{ 1202 *utiltesting.MakeLocalQueue("lq1", "sales").ClusterQueue("cq1").Obj(), 1203 *utiltesting.MakeLocalQueue("lq2", "sales").ClusterQueue("cq2").Obj(), 1204 *utiltesting.MakeLocalQueue("lq3", "sales").ClusterQueue("cq3").Obj(), 1205 }, 1206 workloads: []kueue.Workload{ 1207 *utiltesting.MakeWorkload("wl1", "sales").Queue("lq1").Priority(-1).PodSets( 1208 *utiltesting.MakePodSet("main", 1).Request("r1", "16").Obj(), 1209 ).Obj(), 1210 *utiltesting.MakeWorkload("wl2", "sales").Queue("lq2").Priority(-2).PodSets( 1211 *utiltesting.MakePodSet("main", 1).Request("r1", "16").Obj(), 1212 ).Obj(), 1213 }, 1214 wantScheduled: []string{"sales/wl1"}, 1215 wantAssignments: map[string]kueue.Admission{ 1216 "sales/wl1": *utiltesting.MakeAdmission("cq1", "main"). 1217 Assignment("r1", "default", "16").AssignmentPodCount(1). 1218 Obj(), 1219 }, 1220 wantLeft: map[string][]string{ 1221 "cq2": {"sales/wl2"}, 1222 }, 1223 }, 1224 "preemption while borrowing, workload waiting for preemption should not block a borrowing workload in another CQ": { 1225 additionalClusterQueues: []kueue.ClusterQueue{ 1226 *utiltesting.MakeClusterQueue("cq_shared"). 1227 Cohort("preemption-while-borrowing"). 1228 ResourceGroup(*utiltesting.MakeFlavorQuotas("default"). 1229 Resource(corev1.ResourceCPU, "4", "0").Obj()). 1230 Obj(), 1231 *utiltesting.MakeClusterQueue("cq_a"). 1232 Cohort("preemption-while-borrowing"). 1233 Preemption(kueue.ClusterQueuePreemption{ 1234 ReclaimWithinCohort: kueue.PreemptionPolicyLowerPriority, 1235 BorrowWithinCohort: &kueue.BorrowWithinCohort{ 1236 Policy: kueue.BorrowWithinCohortPolicyLowerPriority, 1237 }, 1238 }). 1239 ResourceGroup( 1240 *utiltesting.MakeFlavorQuotas("default"). 1241 Resource(corev1.ResourceCPU, "0", "3").Obj(), 1242 ). 1243 Obj(), 1244 *utiltesting.MakeClusterQueue("cq_b"). 1245 Cohort("preemption-while-borrowing"). 1246 Preemption(kueue.ClusterQueuePreemption{ 1247 ReclaimWithinCohort: kueue.PreemptionPolicyLowerPriority, 1248 BorrowWithinCohort: &kueue.BorrowWithinCohort{ 1249 Policy: kueue.BorrowWithinCohortPolicyLowerPriority, 1250 }, 1251 }). 1252 ResourceGroup( 1253 *utiltesting.MakeFlavorQuotas("default"). 1254 Resource(corev1.ResourceCPU, "0").Obj(), 1255 ). 1256 Obj(), 1257 }, 1258 additionalLocalQueues: []kueue.LocalQueue{ 1259 { 1260 ObjectMeta: metav1.ObjectMeta{ 1261 Namespace: "eng-alpha", 1262 Name: "lq_a", 1263 }, 1264 Spec: kueue.LocalQueueSpec{ 1265 ClusterQueue: "cq_a", 1266 }, 1267 }, 1268 { 1269 ObjectMeta: metav1.ObjectMeta{ 1270 Namespace: "eng-beta", 1271 Name: "lq_b", 1272 }, 1273 Spec: kueue.LocalQueueSpec{ 1274 ClusterQueue: "cq_b", 1275 }, 1276 }, 1277 }, 1278 workloads: []kueue.Workload{ 1279 *utiltesting.MakeWorkload("a", "eng-alpha"). 1280 Queue("lq_a"). 1281 Creation(now.Add(time.Second)). 1282 PodSets(*utiltesting.MakePodSet("main", 1). 1283 Request(corev1.ResourceCPU, "3"). 1284 Obj()). 1285 Obj(), 1286 *utiltesting.MakeWorkload("b", "eng-beta"). 1287 Queue("lq_b"). 1288 Creation(now.Add(2 * time.Second)). 1289 PodSets(*utiltesting.MakePodSet("main", 1). 1290 Request(corev1.ResourceCPU, "1"). 1291 Obj()). 1292 Obj(), 1293 *utiltesting.MakeWorkload("admitted_a", "eng-alpha"). 1294 Queue("lq_a"). 1295 PodSets( 1296 *utiltesting.MakePodSet("main", 1). 1297 Request(corev1.ResourceCPU, "2"). 1298 Obj(), 1299 ). 1300 ReserveQuota(utiltesting.MakeAdmission("cq_a"). 1301 PodSets( 1302 kueue.PodSetAssignment{ 1303 Name: "main", 1304 Flavors: map[corev1.ResourceName]kueue.ResourceFlavorReference{ 1305 corev1.ResourceCPU: "default", 1306 }, 1307 ResourceUsage: corev1.ResourceList{ 1308 corev1.ResourceCPU: resource.MustParse("2"), 1309 }, 1310 Count: ptr.To[int32](1), 1311 }, 1312 ). 1313 Obj()). 1314 Obj(), 1315 }, 1316 wantAssignments: map[string]kueue.Admission{ 1317 "eng-alpha/admitted_a": { 1318 ClusterQueue: "cq_a", 1319 PodSetAssignments: []kueue.PodSetAssignment{ 1320 { 1321 Name: "main", 1322 Flavors: map[corev1.ResourceName]kueue.ResourceFlavorReference{ 1323 corev1.ResourceCPU: "default", 1324 }, 1325 ResourceUsage: corev1.ResourceList{ 1326 corev1.ResourceCPU: resource.MustParse("2"), 1327 }, 1328 Count: ptr.To[int32](1), 1329 }, 1330 }, 1331 }, 1332 "eng-beta/b": { 1333 ClusterQueue: "cq_b", 1334 PodSetAssignments: []kueue.PodSetAssignment{ 1335 { 1336 Name: "main", 1337 Flavors: map[corev1.ResourceName]kueue.ResourceFlavorReference{ 1338 corev1.ResourceCPU: "default", 1339 }, 1340 ResourceUsage: corev1.ResourceList{ 1341 corev1.ResourceCPU: resource.MustParse("1"), 1342 }, 1343 Count: ptr.To[int32](1), 1344 }, 1345 }, 1346 }, 1347 }, 1348 wantScheduled: []string{ 1349 "eng-beta/b", 1350 }, 1351 wantInadmissibleLeft: map[string][]string{ 1352 "cq_a": {"eng-alpha/a"}, 1353 }, 1354 }, 1355 } 1356 1357 for name, tc := range cases { 1358 t.Run(name, func(t *testing.T) { 1359 if tc.enableLendingLimit { 1360 defer features.SetFeatureGateDuringTest(t, features.LendingLimit, true)() 1361 } 1362 if tc.disablePartialAdmission { 1363 defer features.SetFeatureGateDuringTest(t, features.PartialAdmission, false)() 1364 } 1365 ctx, _ := utiltesting.ContextWithLog(t) 1366 1367 allQueues := append(queues, tc.additionalLocalQueues...) 1368 allClusterQueues := append(clusterQueues, tc.additionalClusterQueues...) 1369 1370 clientBuilder := utiltesting.NewClientBuilder(). 1371 WithLists(&kueue.WorkloadList{Items: tc.workloads}, &kueue.LocalQueueList{Items: allQueues}). 1372 WithObjects( 1373 &corev1.Namespace{ObjectMeta: metav1.ObjectMeta{Name: "eng-alpha", Labels: map[string]string{"dep": "eng"}}}, 1374 &corev1.Namespace{ObjectMeta: metav1.ObjectMeta{Name: "eng-beta", Labels: map[string]string{"dep": "eng"}}}, 1375 &corev1.Namespace{ObjectMeta: metav1.ObjectMeta{Name: "eng-gamma", Labels: map[string]string{"dep": "eng"}}}, 1376 &corev1.Namespace{ObjectMeta: metav1.ObjectMeta{Name: "sales", Labels: map[string]string{"dep": "sales"}}}, 1377 &corev1.Namespace{ObjectMeta: metav1.ObjectMeta{Name: "lend", Labels: map[string]string{"dep": "lend"}}}, 1378 ) 1379 cl := clientBuilder.Build() 1380 recorder := &utiltesting.EventRecorder{} 1381 cqCache := cache.New(cl) 1382 qManager := queue.NewManager(cl, cqCache) 1383 // Workloads are loaded into queues or clusterQueues as we add them. 1384 for _, q := range allQueues { 1385 if err := qManager.AddLocalQueue(ctx, &q); err != nil { 1386 t.Fatalf("Inserting queue %s/%s in manager: %v", q.Namespace, q.Name, err) 1387 } 1388 } 1389 for i := range resourceFlavors { 1390 cqCache.AddOrUpdateResourceFlavor(resourceFlavors[i]) 1391 } 1392 for _, cq := range allClusterQueues { 1393 if err := cqCache.AddClusterQueue(ctx, &cq); err != nil { 1394 t.Fatalf("Inserting clusterQueue %s in cache: %v", cq.Name, err) 1395 } 1396 if err := qManager.AddClusterQueue(ctx, &cq); err != nil { 1397 t.Fatalf("Inserting clusterQueue %s in manager: %v", cq.Name, err) 1398 } 1399 } 1400 scheduler := New(qManager, cqCache, cl, recorder) 1401 gotScheduled := make(map[string]kueue.Admission) 1402 var mu sync.Mutex 1403 scheduler.applyAdmission = func(ctx context.Context, w *kueue.Workload) error { 1404 if tc.admissionError != nil { 1405 return tc.admissionError 1406 } 1407 mu.Lock() 1408 gotScheduled[workload.Key(w)] = *w.Status.Admission 1409 mu.Unlock() 1410 return nil 1411 } 1412 wg := sync.WaitGroup{} 1413 scheduler.setAdmissionRoutineWrapper(routine.NewWrapper( 1414 func() { wg.Add(1) }, 1415 func() { wg.Done() }, 1416 )) 1417 gotPreempted := sets.New[string]() 1418 scheduler.preemptor.OverrideApply(func(_ context.Context, w *kueue.Workload) error { 1419 mu.Lock() 1420 gotPreempted.Insert(workload.Key(w)) 1421 mu.Unlock() 1422 return nil 1423 }) 1424 1425 ctx, cancel := context.WithTimeout(ctx, queueingTimeout) 1426 go qManager.CleanUpOnContext(ctx) 1427 defer cancel() 1428 1429 scheduler.schedule(ctx) 1430 wg.Wait() 1431 1432 wantScheduled := make(map[string]kueue.Admission) 1433 for _, key := range tc.wantScheduled { 1434 wantScheduled[key] = tc.wantAssignments[key] 1435 } 1436 if diff := cmp.Diff(wantScheduled, gotScheduled); diff != "" { 1437 t.Errorf("Unexpected scheduled workloads (-want,+got):\n%s", diff) 1438 } 1439 1440 if diff := cmp.Diff(tc.wantPreempted, gotPreempted); diff != "" { 1441 t.Errorf("Unexpected preemptions (-want,+got):\n%s", diff) 1442 } 1443 1444 // Verify assignments in cache. 1445 gotAssignments := make(map[string]kueue.Admission) 1446 snapshot := cqCache.Snapshot() 1447 for cqName, c := range snapshot.ClusterQueues { 1448 for name, w := range c.Workloads { 1449 if !workload.HasQuotaReservation(w.Obj) { 1450 t.Errorf("Workload %s is not admitted by a clusterQueue, but it is found as member of clusterQueue %s in the cache", name, cqName) 1451 } else if string(w.Obj.Status.Admission.ClusterQueue) != cqName { 1452 t.Errorf("Workload %s is admitted by clusterQueue %s, but it is found as member of clusterQueue %s in the cache", name, w.Obj.Status.Admission.ClusterQueue, cqName) 1453 } else { 1454 gotAssignments[name] = *w.Obj.Status.Admission 1455 } 1456 } 1457 } 1458 if len(gotAssignments) == 0 { 1459 gotAssignments = nil 1460 } 1461 if diff := cmp.Diff(tc.wantAssignments, gotAssignments); diff != "" { 1462 t.Errorf("Unexpected assigned clusterQueues in cache (-want,+got):\n%s", diff) 1463 } 1464 1465 qDump := qManager.Dump() 1466 if diff := cmp.Diff(tc.wantLeft, qDump, cmpDump...); diff != "" { 1467 t.Errorf("Unexpected elements left in the queue (-want,+got):\n%s", diff) 1468 } 1469 qDumpInadmissible := qManager.DumpInadmissible() 1470 if diff := cmp.Diff(tc.wantInadmissibleLeft, qDumpInadmissible, cmpDump...); diff != "" { 1471 t.Errorf("Unexpected elements left in inadmissible workloads (-want,+got):\n%s", diff) 1472 } 1473 1474 if len(tc.wantEvents) > 0 { 1475 if diff := cmp.Diff(tc.wantEvents, recorder.RecordedEvents, cmpopts.IgnoreFields(utiltesting.EventRecord{}, "Message")); diff != "" { 1476 t.Errorf("unexpected events (-want/+got):\n%s", diff) 1477 } 1478 } 1479 }) 1480 } 1481 } 1482 1483 func TestEntryOrdering(t *testing.T) { 1484 now := time.Now() 1485 input := []entry{ 1486 { 1487 Info: workload.Info{ 1488 Obj: &kueue.Workload{ObjectMeta: metav1.ObjectMeta{ 1489 Name: "old_borrowing", 1490 CreationTimestamp: metav1.NewTime(now), 1491 }}, 1492 }, 1493 assignment: flavorassigner.Assignment{ 1494 Borrowing: true, 1495 }, 1496 }, 1497 { 1498 Info: workload.Info{ 1499 Obj: &kueue.Workload{ObjectMeta: metav1.ObjectMeta{ 1500 Name: "old", 1501 CreationTimestamp: metav1.NewTime(now.Add(time.Second)), 1502 }}, 1503 }, 1504 }, 1505 { 1506 Info: workload.Info{ 1507 Obj: &kueue.Workload{ObjectMeta: metav1.ObjectMeta{ 1508 Name: "new", 1509 CreationTimestamp: metav1.NewTime(now.Add(3 * time.Second)), 1510 }}, 1511 }, 1512 }, 1513 { 1514 Info: workload.Info{ 1515 Obj: &kueue.Workload{ObjectMeta: metav1.ObjectMeta{ 1516 Name: "high_pri_borrowing", 1517 CreationTimestamp: metav1.NewTime(now.Add(3 * time.Second)), 1518 }, Spec: kueue.WorkloadSpec{ 1519 Priority: ptr.To[int32](1), 1520 }}, 1521 }, 1522 assignment: flavorassigner.Assignment{ 1523 Borrowing: true, 1524 }, 1525 }, 1526 { 1527 Info: workload.Info{ 1528 Obj: &kueue.Workload{ObjectMeta: metav1.ObjectMeta{ 1529 Name: "new_high_pri", 1530 CreationTimestamp: metav1.NewTime(now.Add(4 * time.Second)), 1531 }, Spec: kueue.WorkloadSpec{ 1532 Priority: ptr.To[int32](1), 1533 }}, 1534 }, 1535 }, 1536 { 1537 Info: workload.Info{ 1538 Obj: &kueue.Workload{ObjectMeta: metav1.ObjectMeta{ 1539 Name: "new_borrowing", 1540 CreationTimestamp: metav1.NewTime(now.Add(3 * time.Second)), 1541 }}, 1542 }, 1543 assignment: flavorassigner.Assignment{ 1544 Borrowing: true, 1545 }, 1546 }, 1547 { 1548 Info: workload.Info{ 1549 Obj: &kueue.Workload{ 1550 ObjectMeta: metav1.ObjectMeta{ 1551 Name: "evicted_borrowing", 1552 CreationTimestamp: metav1.NewTime(now.Add(time.Second)), 1553 }, 1554 Status: kueue.WorkloadStatus{ 1555 Conditions: []metav1.Condition{ 1556 { 1557 Type: kueue.WorkloadEvicted, 1558 Status: metav1.ConditionTrue, 1559 LastTransitionTime: metav1.NewTime(now.Add(2 * time.Second)), 1560 Reason: kueue.WorkloadEvictedByPodsReadyTimeout, 1561 }, 1562 }, 1563 }, 1564 }, 1565 }, 1566 assignment: flavorassigner.Assignment{ 1567 Borrowing: true, 1568 }, 1569 }, 1570 { 1571 Info: workload.Info{ 1572 Obj: &kueue.Workload{ 1573 ObjectMeta: metav1.ObjectMeta{ 1574 Name: "recently_evicted", 1575 CreationTimestamp: metav1.NewTime(now), 1576 }, 1577 Status: kueue.WorkloadStatus{ 1578 Conditions: []metav1.Condition{ 1579 { 1580 Type: kueue.WorkloadEvicted, 1581 Status: metav1.ConditionTrue, 1582 LastTransitionTime: metav1.NewTime(now.Add(2 * time.Second)), 1583 Reason: kueue.WorkloadEvictedByPodsReadyTimeout, 1584 }, 1585 }, 1586 }, 1587 }, 1588 }, 1589 }, 1590 } 1591 for _, tc := range []struct { 1592 name string 1593 prioritySorting bool 1594 workloadOrdering workload.Ordering 1595 wantOrder []string 1596 }{ 1597 { 1598 name: "Priority sorting is enabled (default) using pods-ready Eviction timestamp (default)", 1599 prioritySorting: true, 1600 workloadOrdering: workload.Ordering{PodsReadyRequeuingTimestamp: config.EvictionTimestamp}, 1601 wantOrder: []string{"new_high_pri", "old", "recently_evicted", "new", "high_pri_borrowing", "old_borrowing", "evicted_borrowing", "new_borrowing"}, 1602 }, 1603 { 1604 name: "Priority sorting is enabled (default) using pods-ready Creation timestamp", 1605 prioritySorting: true, 1606 workloadOrdering: workload.Ordering{PodsReadyRequeuingTimestamp: config.CreationTimestamp}, 1607 wantOrder: []string{"new_high_pri", "recently_evicted", "old", "new", "high_pri_borrowing", "old_borrowing", "evicted_borrowing", "new_borrowing"}, 1608 }, 1609 { 1610 name: "Priority sorting is disabled using pods-ready Eviction timestamp", 1611 prioritySorting: false, 1612 workloadOrdering: workload.Ordering{PodsReadyRequeuingTimestamp: config.EvictionTimestamp}, 1613 wantOrder: []string{"old", "recently_evicted", "new", "new_high_pri", "old_borrowing", "evicted_borrowing", "high_pri_borrowing", "new_borrowing"}, 1614 }, 1615 { 1616 name: "Priority sorting is disabled using pods-ready Creation timestamp", 1617 prioritySorting: false, 1618 workloadOrdering: workload.Ordering{PodsReadyRequeuingTimestamp: config.CreationTimestamp}, 1619 wantOrder: []string{"recently_evicted", "old", "new", "new_high_pri", "old_borrowing", "evicted_borrowing", "high_pri_borrowing", "new_borrowing"}, 1620 }, 1621 } { 1622 t.Run(tc.name, func(t *testing.T) { 1623 t.Cleanup(features.SetFeatureGateDuringTest(t, features.PrioritySortingWithinCohort, tc.prioritySorting)) 1624 sort.Sort(entryOrdering{ 1625 entries: input, 1626 workloadOrdering: tc.workloadOrdering}, 1627 ) 1628 order := make([]string, len(input)) 1629 for i, e := range input { 1630 order[i] = e.Obj.Name 1631 } 1632 if diff := cmp.Diff(tc.wantOrder, order); diff != "" { 1633 t.Errorf("%s: Unexpected order (-want,+got):\n%s", tc.name, diff) 1634 } 1635 }) 1636 } 1637 } 1638 1639 func TestLastSchedulingContext(t *testing.T) { 1640 resourceFlavors := []*kueue.ResourceFlavor{ 1641 {ObjectMeta: metav1.ObjectMeta{Name: "on-demand"}}, 1642 {ObjectMeta: metav1.ObjectMeta{Name: "spot"}}, 1643 } 1644 clusterQueue := []kueue.ClusterQueue{ 1645 *utiltesting.MakeClusterQueue("eng-alpha"). 1646 QueueingStrategy(kueue.BestEffortFIFO). 1647 Preemption(kueue.ClusterQueuePreemption{ 1648 WithinClusterQueue: kueue.PreemptionPolicyLowerPriority, 1649 }). 1650 FlavorFungibility(kueue.FlavorFungibility{ 1651 WhenCanPreempt: kueue.Preempt, 1652 }). 1653 ResourceGroup( 1654 *utiltesting.MakeFlavorQuotas("on-demand"). 1655 Resource(corev1.ResourceCPU, "50", "50").Obj(), 1656 *utiltesting.MakeFlavorQuotas("spot"). 1657 Resource(corev1.ResourceCPU, "100", "0").Obj(), 1658 ).Obj(), 1659 } 1660 clusterQueue_cohort := []kueue.ClusterQueue{ 1661 *utiltesting.MakeClusterQueue("eng-cohort-alpha"). 1662 Cohort("cohort"). 1663 QueueingStrategy(kueue.StrictFIFO). 1664 Preemption(kueue.ClusterQueuePreemption{ 1665 WithinClusterQueue: kueue.PreemptionPolicyNever, 1666 ReclaimWithinCohort: kueue.PreemptionPolicyLowerPriority, 1667 }). 1668 FlavorFungibility(kueue.FlavorFungibility{ 1669 WhenCanPreempt: kueue.Preempt, 1670 WhenCanBorrow: kueue.Borrow, 1671 }). 1672 ResourceGroup( 1673 *utiltesting.MakeFlavorQuotas("on-demand"). 1674 Resource(corev1.ResourceCPU, "50", "50").Obj(), 1675 *utiltesting.MakeFlavorQuotas("spot"). 1676 Resource(corev1.ResourceCPU, "100", "0").Obj(), 1677 ).Obj(), 1678 *utiltesting.MakeClusterQueue("eng-cohort-beta"). 1679 Cohort("cohort"). 1680 QueueingStrategy(kueue.StrictFIFO). 1681 Preemption(kueue.ClusterQueuePreemption{ 1682 WithinClusterQueue: kueue.PreemptionPolicyNever, 1683 ReclaimWithinCohort: kueue.PreemptionPolicyLowerPriority, 1684 }). 1685 FlavorFungibility(kueue.FlavorFungibility{ 1686 WhenCanPreempt: kueue.Preempt, 1687 WhenCanBorrow: kueue.Borrow, 1688 }). 1689 ResourceGroup( 1690 *utiltesting.MakeFlavorQuotas("on-demand"). 1691 Resource(corev1.ResourceCPU, "50", "50").Obj(), 1692 *utiltesting.MakeFlavorQuotas("spot"). 1693 Resource(corev1.ResourceCPU, "100", "0").Obj(), 1694 ).Obj(), 1695 *utiltesting.MakeClusterQueue("eng-cohort-theta"). 1696 Cohort("cohort"). 1697 QueueingStrategy(kueue.StrictFIFO). 1698 Preemption(kueue.ClusterQueuePreemption{ 1699 WithinClusterQueue: kueue.PreemptionPolicyNever, 1700 ReclaimWithinCohort: kueue.PreemptionPolicyLowerPriority, 1701 }). 1702 FlavorFungibility(kueue.FlavorFungibility{ 1703 WhenCanPreempt: kueue.TryNextFlavor, 1704 WhenCanBorrow: kueue.TryNextFlavor, 1705 }). 1706 ResourceGroup( 1707 *utiltesting.MakeFlavorQuotas("on-demand"). 1708 Resource(corev1.ResourceCPU, "50", "50").Obj(), 1709 *utiltesting.MakeFlavorQuotas("spot"). 1710 Resource(corev1.ResourceCPU, "100", "0").Obj(), 1711 ).Obj(), 1712 } 1713 1714 queues := []kueue.LocalQueue{ 1715 { 1716 ObjectMeta: metav1.ObjectMeta{ 1717 Namespace: "default", 1718 Name: "main", 1719 }, 1720 Spec: kueue.LocalQueueSpec{ 1721 ClusterQueue: "eng-alpha", 1722 }, 1723 }, 1724 { 1725 ObjectMeta: metav1.ObjectMeta{ 1726 Namespace: "default", 1727 Name: "main-alpha", 1728 }, 1729 Spec: kueue.LocalQueueSpec{ 1730 ClusterQueue: "eng-cohort-alpha", 1731 }, 1732 }, 1733 { 1734 ObjectMeta: metav1.ObjectMeta{ 1735 Namespace: "default", 1736 Name: "main-beta", 1737 }, 1738 Spec: kueue.LocalQueueSpec{ 1739 ClusterQueue: "eng-cohort-beta", 1740 }, 1741 }, 1742 { 1743 ObjectMeta: metav1.ObjectMeta{ 1744 Namespace: "default", 1745 Name: "main-theta", 1746 }, 1747 Spec: kueue.LocalQueueSpec{ 1748 ClusterQueue: "eng-cohort-theta", 1749 }, 1750 }, 1751 } 1752 wl := utiltesting.MakeWorkload("low-1", "default"). 1753 Queue("main"). 1754 Request(corev1.ResourceCPU, "50"). 1755 ReserveQuota(utiltesting.MakeAdmission("eng-alpha").Assignment(corev1.ResourceCPU, "on-demand", "50").Obj()). 1756 Admitted(true). 1757 Obj() 1758 cases := []struct { 1759 name string 1760 cqs []kueue.ClusterQueue 1761 admittedWorkloads []kueue.Workload 1762 workloads []kueue.Workload 1763 deleteWorkloads []kueue.Workload 1764 wantPreempted sets.Set[string] 1765 wantAdmissionsOnFirstSchedule map[string]kueue.Admission 1766 wantAdmissionsOnSecondSchedule map[string]kueue.Admission 1767 }{ 1768 { 1769 name: "scheduling context not changed: use next flavor if can't preempt", 1770 cqs: clusterQueue, 1771 admittedWorkloads: []kueue.Workload{ 1772 *wl, 1773 }, 1774 workloads: []kueue.Workload{ 1775 *utiltesting.MakeWorkload("new", "default"). 1776 Queue("main"). 1777 Request(corev1.ResourceCPU, "20"). 1778 Obj(), 1779 }, 1780 deleteWorkloads: []kueue.Workload{}, 1781 wantPreempted: sets.Set[string]{}, 1782 wantAdmissionsOnFirstSchedule: map[string]kueue.Admission{}, 1783 wantAdmissionsOnSecondSchedule: map[string]kueue.Admission{ 1784 "default/new": *utiltesting.MakeAdmission("eng-alpha").Assignment(corev1.ResourceCPU, "spot", "20").Obj(), 1785 "default/low-1": *utiltesting.MakeAdmission("eng-alpha").Assignment(corev1.ResourceCPU, "on-demand", "50").Obj(), 1786 }, 1787 }, 1788 { 1789 name: "some workloads were deleted", 1790 cqs: clusterQueue, 1791 admittedWorkloads: []kueue.Workload{ 1792 *wl, 1793 }, 1794 workloads: []kueue.Workload{ 1795 *utiltesting.MakeWorkload("preemptor", "default"). 1796 Queue("main"). 1797 Request(corev1.ResourceCPU, "20"). 1798 Obj(), 1799 }, 1800 deleteWorkloads: []kueue.Workload{ 1801 *wl, 1802 }, 1803 wantPreempted: sets.Set[string]{}, 1804 wantAdmissionsOnFirstSchedule: map[string]kueue.Admission{}, 1805 wantAdmissionsOnSecondSchedule: map[string]kueue.Admission{ 1806 "default/preemptor": *utiltesting.MakeAdmission("eng-alpha").Assignment(corev1.ResourceCPU, "on-demand", "20").Obj(), 1807 }, 1808 }, 1809 { 1810 name: "borrow before next flavor", 1811 cqs: clusterQueue_cohort, 1812 admittedWorkloads: []kueue.Workload{ 1813 *utiltesting.MakeWorkload("placeholder", "default"). 1814 Request(corev1.ResourceCPU, "50"). 1815 ReserveQuota(utiltesting.MakeAdmission("eng-cohort-alpha").Assignment(corev1.ResourceCPU, "on-demand", "50").Obj()). 1816 Admitted(true). 1817 Obj(), 1818 }, 1819 workloads: []kueue.Workload{ 1820 *utiltesting.MakeWorkload("borrower", "default"). 1821 Queue("main-alpha"). 1822 Request(corev1.ResourceCPU, "20"). 1823 Obj(), 1824 *utiltesting.MakeWorkload("workload1", "default"). 1825 Queue("main-beta"). 1826 Request(corev1.ResourceCPU, "20"). 1827 Obj(), 1828 }, 1829 deleteWorkloads: []kueue.Workload{}, 1830 wantPreempted: sets.Set[string]{}, 1831 wantAdmissionsOnFirstSchedule: map[string]kueue.Admission{ 1832 "default/workload1": *utiltesting.MakeAdmission("eng-cohort-beta").Assignment(corev1.ResourceCPU, "on-demand", "20").Obj(), 1833 "default/borrower": *utiltesting.MakeAdmission("eng-cohort-alpha").Assignment(corev1.ResourceCPU, "on-demand", "20").Obj(), 1834 }, 1835 wantAdmissionsOnSecondSchedule: map[string]kueue.Admission{ 1836 "default/placeholder": *utiltesting.MakeAdmission("eng-cohort-alpha").Assignment(corev1.ResourceCPU, "on-demand", "50").Obj(), 1837 "default/workload1": *utiltesting.MakeAdmission("eng-cohort-beta").Assignment(corev1.ResourceCPU, "on-demand", "20").Obj(), 1838 "default/borrower": *utiltesting.MakeAdmission("eng-cohort-alpha").Assignment(corev1.ResourceCPU, "on-demand", "20").Obj(), 1839 }, 1840 }, 1841 { 1842 name: "borrow after all flavors", 1843 cqs: clusterQueue_cohort, 1844 admittedWorkloads: []kueue.Workload{ 1845 *utiltesting.MakeWorkload("placeholder", "default"). 1846 Request(corev1.ResourceCPU, "50"). 1847 ReserveQuota(utiltesting.MakeAdmission("eng-cohort-alpha").Assignment(corev1.ResourceCPU, "on-demand", "50").Obj()). 1848 Admitted(true). 1849 Obj(), 1850 *utiltesting.MakeWorkload("placeholder1", "default"). 1851 Request(corev1.ResourceCPU, "50"). 1852 ReserveQuota(utiltesting.MakeAdmission("eng-cohort-theta").Assignment(corev1.ResourceCPU, "on-demand", "50").Obj()). 1853 Admitted(true). 1854 Obj(), 1855 }, 1856 workloads: []kueue.Workload{ 1857 *utiltesting.MakeWorkload("workload", "default"). 1858 Queue("main-theta"). 1859 Request(corev1.ResourceCPU, "20"). 1860 Obj(), 1861 }, 1862 deleteWorkloads: []kueue.Workload{}, 1863 wantPreempted: sets.Set[string]{}, 1864 wantAdmissionsOnFirstSchedule: map[string]kueue.Admission{ 1865 "default/workload": *utiltesting.MakeAdmission("eng-cohort-theta").Assignment(corev1.ResourceCPU, "spot", "20").Obj(), 1866 }, 1867 wantAdmissionsOnSecondSchedule: map[string]kueue.Admission{ 1868 "default/placeholder": *utiltesting.MakeAdmission("eng-cohort-alpha").Assignment(corev1.ResourceCPU, "on-demand", "50").Obj(), 1869 "default/placeholder1": *utiltesting.MakeAdmission("eng-cohort-theta").Assignment(corev1.ResourceCPU, "on-demand", "50").Obj(), 1870 "default/workload": *utiltesting.MakeAdmission("eng-cohort-theta").Assignment(corev1.ResourceCPU, "spot", "20").Obj(), 1871 }, 1872 }, 1873 { 1874 name: "when the next flavor is full, but can borrow on first", 1875 cqs: clusterQueue_cohort, 1876 admittedWorkloads: []kueue.Workload{ 1877 *utiltesting.MakeWorkload("placeholder", "default"). 1878 Request(corev1.ResourceCPU, "40"). 1879 ReserveQuota(utiltesting.MakeAdmission("eng-cohort-alpha").Assignment(corev1.ResourceCPU, "on-demand", "40").Obj()). 1880 Admitted(true). 1881 Obj(), 1882 *utiltesting.MakeWorkload("placeholder1", "default"). 1883 Request(corev1.ResourceCPU, "40"). 1884 ReserveQuota(utiltesting.MakeAdmission("eng-cohort-theta").Assignment(corev1.ResourceCPU, "on-demand", "40").Obj()). 1885 Admitted(true). 1886 Obj(), 1887 *utiltesting.MakeWorkload("placeholder2", "default"). 1888 Request(corev1.ResourceCPU, "100"). 1889 ReserveQuota(utiltesting.MakeAdmission("eng-cohort-theta").Assignment(corev1.ResourceCPU, "spot", "100").Obj()). 1890 Admitted(true). 1891 Obj(), 1892 }, 1893 workloads: []kueue.Workload{ 1894 *utiltesting.MakeWorkload("workload", "default"). 1895 Queue("main-theta"). 1896 Request(corev1.ResourceCPU, "20"). 1897 Obj(), 1898 }, 1899 deleteWorkloads: []kueue.Workload{}, 1900 wantPreempted: sets.Set[string]{}, 1901 wantAdmissionsOnFirstSchedule: map[string]kueue.Admission{ 1902 "default/workload": *utiltesting.MakeAdmission("eng-cohort-theta").Assignment(corev1.ResourceCPU, "on-demand", "20").Obj(), 1903 }, 1904 wantAdmissionsOnSecondSchedule: map[string]kueue.Admission{ 1905 "default/placeholder": *utiltesting.MakeAdmission("eng-cohort-alpha").Assignment(corev1.ResourceCPU, "on-demand", "40").Obj(), 1906 "default/placeholder1": *utiltesting.MakeAdmission("eng-cohort-theta").Assignment(corev1.ResourceCPU, "on-demand", "40").Obj(), 1907 "default/placeholder2": *utiltesting.MakeAdmission("eng-cohort-theta").Assignment(corev1.ResourceCPU, "spot", "100").Obj(), 1908 "default/workload": *utiltesting.MakeAdmission("eng-cohort-theta").Assignment(corev1.ResourceCPU, "on-demand", "20").Obj(), 1909 }, 1910 }, 1911 { 1912 name: "when the next flavor is full, but can preempt on first", 1913 cqs: clusterQueue_cohort, 1914 admittedWorkloads: []kueue.Workload{ 1915 *utiltesting.MakeWorkload("placeholder-alpha", "default"). 1916 Priority(-1). 1917 Request(corev1.ResourceCPU, "150"). 1918 ReserveQuota(utiltesting.MakeAdmission("eng-cohort-alpha").Assignment(corev1.ResourceCPU, "on-demand", "150").Obj()). 1919 Admitted(true). 1920 Obj(), 1921 *utiltesting.MakeWorkload("placeholder-theta-spot", "default"). 1922 Request(corev1.ResourceCPU, "100"). 1923 ReserveQuota(utiltesting.MakeAdmission("eng-cohort-theta").Assignment(corev1.ResourceCPU, "spot", "100").Obj()). 1924 Admitted(true). 1925 Obj(), 1926 }, 1927 workloads: []kueue.Workload{ 1928 *utiltesting.MakeWorkload("new", "default"). 1929 Queue("main-theta"). 1930 Request(corev1.ResourceCPU, "20"). 1931 Obj(), 1932 }, 1933 deleteWorkloads: []kueue.Workload{*utiltesting.MakeWorkload("placeholder-alpha", "default").Obj()}, 1934 wantPreempted: sets.New("default/placeholder-alpha"), 1935 wantAdmissionsOnFirstSchedule: map[string]kueue.Admission{}, 1936 wantAdmissionsOnSecondSchedule: map[string]kueue.Admission{ 1937 "default/placeholder-theta-spot": *utiltesting.MakeAdmission("eng-cohort-theta").Assignment(corev1.ResourceCPU, "spot", "100").Obj(), 1938 "default/new": *utiltesting.MakeAdmission("eng-cohort-theta").Assignment(corev1.ResourceCPU, "on-demand", "20").Obj(), 1939 }, 1940 }, 1941 } 1942 1943 for _, tc := range cases { 1944 t.Run(tc.name, func(t *testing.T) { 1945 ctx, _ := utiltesting.ContextWithLog(t) 1946 scheme := runtime.NewScheme() 1947 1948 clientBuilder := utiltesting.NewClientBuilder(). 1949 WithLists(&kueue.WorkloadList{Items: tc.admittedWorkloads}, 1950 &kueue.WorkloadList{Items: tc.workloads}, 1951 &kueue.ClusterQueueList{Items: tc.cqs}, 1952 &kueue.LocalQueueList{Items: queues}). 1953 WithObjects( 1954 &corev1.Namespace{ObjectMeta: metav1.ObjectMeta{Name: "default"}}, 1955 ) 1956 cl := clientBuilder.Build() 1957 broadcaster := record.NewBroadcaster() 1958 recorder := broadcaster.NewRecorder(scheme, 1959 corev1.EventSource{Component: constants.AdmissionName}) 1960 cqCache := cache.New(cl) 1961 qManager := queue.NewManager(cl, cqCache) 1962 // Workloads are loaded into queues or clusterQueues as we add them. 1963 for _, q := range queues { 1964 if err := qManager.AddLocalQueue(ctx, &q); err != nil { 1965 t.Fatalf("Inserting queue %s/%s in manager: %v", q.Namespace, q.Name, err) 1966 } 1967 } 1968 for i := range resourceFlavors { 1969 cqCache.AddOrUpdateResourceFlavor(resourceFlavors[i]) 1970 } 1971 for _, cq := range tc.cqs { 1972 if err := cqCache.AddClusterQueue(ctx, &cq); err != nil { 1973 t.Fatalf("Inserting clusterQueue %s in cache: %v", cq.Name, err) 1974 } 1975 if err := qManager.AddClusterQueue(ctx, &cq); err != nil { 1976 t.Fatalf("Inserting clusterQueue %s in manager: %v", cq.Name, err) 1977 } 1978 } 1979 scheduler := New(qManager, cqCache, cl, recorder) 1980 gotScheduled := make(map[string]kueue.Admission) 1981 var mu sync.Mutex 1982 scheduler.applyAdmission = func(ctx context.Context, w *kueue.Workload) error { 1983 mu.Lock() 1984 gotScheduled[workload.Key(w)] = *w.Status.Admission 1985 mu.Unlock() 1986 return nil 1987 } 1988 wg := sync.WaitGroup{} 1989 scheduler.setAdmissionRoutineWrapper(routine.NewWrapper( 1990 func() { wg.Add(1) }, 1991 func() { wg.Done() }, 1992 )) 1993 gotPreempted := sets.New[string]() 1994 scheduler.preemptor.OverrideApply(func(_ context.Context, w *kueue.Workload) error { 1995 mu.Lock() 1996 gotPreempted.Insert(workload.Key(w)) 1997 mu.Unlock() 1998 return nil 1999 }) 2000 2001 ctx, cancel := context.WithTimeout(ctx, queueingTimeout) 2002 go qManager.CleanUpOnContext(ctx) 2003 defer cancel() 2004 2005 scheduler.schedule(ctx) 2006 wg.Wait() 2007 2008 if diff := cmp.Diff(tc.wantPreempted, gotPreempted); diff != "" { 2009 t.Errorf("Unexpected preemptions (-want,+got):\n%s", diff) 2010 } 2011 if diff := cmp.Diff(tc.wantAdmissionsOnFirstSchedule, gotScheduled); diff != "" { 2012 t.Errorf("Unexpected scheduled workloads (-want,+got):\n%s", diff) 2013 } 2014 2015 for _, wl := range tc.deleteWorkloads { 2016 err := cl.Delete(ctx, &wl) 2017 if err != nil { 2018 t.Errorf("Delete workload failed: %v", err) 2019 } 2020 err = cqCache.DeleteWorkload(&wl) 2021 if err != nil { 2022 t.Errorf("Delete workload failed: %v", err) 2023 } 2024 qManager.QueueAssociatedInadmissibleWorkloadsAfter(ctx, &wl, nil) 2025 } 2026 2027 scheduler.schedule(ctx) 2028 wg.Wait() 2029 2030 if diff := cmp.Diff(tc.wantPreempted, gotPreempted); diff != "" { 2031 t.Errorf("Unexpected preemptions (-want,+got):\n%s", diff) 2032 } 2033 // Verify assignments in cache. 2034 gotAssignments := make(map[string]kueue.Admission) 2035 snapshot := cqCache.Snapshot() 2036 for cqName, c := range snapshot.ClusterQueues { 2037 for name, w := range c.Workloads { 2038 if !workload.IsAdmitted(w.Obj) { 2039 t.Errorf("Workload %s is not admitted by a clusterQueue, but it is found as member of clusterQueue %s in the cache", name, cqName) 2040 } else if string(w.Obj.Status.Admission.ClusterQueue) != cqName { 2041 t.Errorf("Workload %s is admitted by clusterQueue %s, but it is found as member of clusterQueue %s in the cache", name, w.Obj.Status.Admission.ClusterQueue, cqName) 2042 } else { 2043 gotAssignments[name] = *w.Obj.Status.Admission 2044 } 2045 } 2046 } 2047 if diff := cmp.Diff(tc.wantAdmissionsOnSecondSchedule, gotAssignments); diff != "" { 2048 t.Errorf("Unexpected assigned clusterQueues in cache (-want,+got):\n%s", diff) 2049 } 2050 }) 2051 } 2052 } 2053 2054 var ignoreConditionTimestamps = cmpopts.IgnoreFields(metav1.Condition{}, "LastTransitionTime") 2055 2056 func TestRequeueAndUpdate(t *testing.T) { 2057 cq := utiltesting.MakeClusterQueue("cq").Obj() 2058 q1 := utiltesting.MakeLocalQueue("q1", "ns1").ClusterQueue(cq.Name).Obj() 2059 w1 := utiltesting.MakeWorkload("w1", "ns1").Queue(q1.Name).Obj() 2060 2061 cases := []struct { 2062 name string 2063 e entry 2064 wantWorkloads map[string][]string 2065 wantInadmissible map[string][]string 2066 wantStatus kueue.WorkloadStatus 2067 wantStatusUpdates int 2068 }{ 2069 { 2070 name: "workload didn't fit", 2071 e: entry{ 2072 inadmissibleMsg: "didn't fit", 2073 }, 2074 wantStatus: kueue.WorkloadStatus{ 2075 Conditions: []metav1.Condition{ 2076 { 2077 Type: kueue.WorkloadQuotaReserved, 2078 Status: metav1.ConditionFalse, 2079 Reason: "Pending", 2080 Message: "didn't fit", 2081 }, 2082 }, 2083 }, 2084 wantInadmissible: map[string][]string{ 2085 "cq": {workload.Key(w1)}, 2086 }, 2087 wantStatusUpdates: 1, 2088 }, 2089 { 2090 name: "assumed", 2091 e: entry{ 2092 status: assumed, 2093 inadmissibleMsg: "", 2094 }, 2095 wantWorkloads: map[string][]string{ 2096 "cq": {workload.Key(w1)}, 2097 }, 2098 }, 2099 { 2100 name: "nominated", 2101 e: entry{ 2102 status: nominated, 2103 inadmissibleMsg: "failed to admit workload", 2104 }, 2105 wantWorkloads: map[string][]string{ 2106 "cq": {workload.Key(w1)}, 2107 }, 2108 }, 2109 { 2110 name: "skipped", 2111 e: entry{ 2112 status: skipped, 2113 inadmissibleMsg: "cohort used in this cycle", 2114 }, 2115 wantStatus: kueue.WorkloadStatus{ 2116 Conditions: []metav1.Condition{ 2117 { 2118 Type: kueue.WorkloadQuotaReserved, 2119 Status: metav1.ConditionFalse, 2120 Reason: "Pending", 2121 Message: "cohort used in this cycle", 2122 }, 2123 }, 2124 }, 2125 wantWorkloads: map[string][]string{ 2126 "cq": {workload.Key(w1)}, 2127 }, 2128 wantStatusUpdates: 1, 2129 }, 2130 } 2131 2132 for _, tc := range cases { 2133 t.Run(tc.name, func(t *testing.T) { 2134 ctx, log := utiltesting.ContextWithLog(t) 2135 scheme := runtime.NewScheme() 2136 2137 updates := 0 2138 objs := []client.Object{w1, q1, &corev1.Namespace{ObjectMeta: metav1.ObjectMeta{Name: "ns1"}}} 2139 cl := utiltesting.NewClientBuilder().WithInterceptorFuncs(interceptor.Funcs{ 2140 SubResourcePatch: func(ctx context.Context, client client.Client, subResourceName string, obj client.Object, patch client.Patch, opts ...client.SubResourcePatchOption) error { 2141 updates++ 2142 return client.SubResource(subResourceName).Patch(ctx, obj, patch, opts...) 2143 }, 2144 }).WithObjects(objs...).WithStatusSubresource(objs...).Build() 2145 broadcaster := record.NewBroadcaster() 2146 recorder := broadcaster.NewRecorder(scheme, corev1.EventSource{Component: constants.AdmissionName}) 2147 cqCache := cache.New(cl) 2148 qManager := queue.NewManager(cl, cqCache) 2149 scheduler := New(qManager, cqCache, cl, recorder) 2150 if err := qManager.AddLocalQueue(ctx, q1); err != nil { 2151 t.Fatalf("Inserting queue %s/%s in manager: %v", q1.Namespace, q1.Name, err) 2152 } 2153 if err := qManager.AddClusterQueue(ctx, cq); err != nil { 2154 t.Fatalf("Inserting clusterQueue %s in manager: %v", cq.Name, err) 2155 } 2156 if err := cqCache.AddClusterQueue(ctx, cq); err != nil { 2157 t.Fatalf("Inserting clusterQueue %s to cache: %v", cq.Name, err) 2158 } 2159 if !cqCache.ClusterQueueActive(cq.Name) { 2160 t.Fatalf("Status of ClusterQueue %s should be active", cq.Name) 2161 } 2162 2163 wInfos := qManager.Heads(ctx) 2164 if len(wInfos) != 1 { 2165 t.Fatalf("Failed getting heads in cluster queue") 2166 } 2167 tc.e.Info = wInfos[0] 2168 scheduler.requeueAndUpdate(log, ctx, tc.e) 2169 2170 qDump := qManager.Dump() 2171 if diff := cmp.Diff(tc.wantWorkloads, qDump, cmpDump...); diff != "" { 2172 t.Errorf("Unexpected elements in the cluster queue (-want,+got):\n%s", diff) 2173 } 2174 2175 inadmissibleDump := qManager.DumpInadmissible() 2176 if diff := cmp.Diff(tc.wantInadmissible, inadmissibleDump, cmpDump...); diff != "" { 2177 t.Errorf("Unexpected elements in the inadmissible stage of the cluster queue (-want,+got):\n%s", diff) 2178 } 2179 2180 var updatedWl kueue.Workload 2181 if err := cl.Get(ctx, client.ObjectKeyFromObject(w1), &updatedWl); err != nil { 2182 t.Fatalf("Failed obtaining updated object: %v", err) 2183 } 2184 if diff := cmp.Diff(tc.wantStatus, updatedWl.Status, ignoreConditionTimestamps); diff != "" { 2185 t.Errorf("Unexpected status after updating (-want,+got):\n%s", diff) 2186 } 2187 // Make sure a second call doesn't make unnecessary updates. 2188 scheduler.requeueAndUpdate(log, ctx, tc.e) 2189 if updates != tc.wantStatusUpdates { 2190 t.Errorf("Observed %d status updates, want %d", updates, tc.wantStatusUpdates) 2191 } 2192 }) 2193 } 2194 } 2195 2196 func TestResourcesToReserve(t *testing.T) { 2197 resourceFlavors := []*kueue.ResourceFlavor{ 2198 {ObjectMeta: metav1.ObjectMeta{Name: "on-demand"}}, 2199 {ObjectMeta: metav1.ObjectMeta{Name: "spot"}}, 2200 {ObjectMeta: metav1.ObjectMeta{Name: "model-a"}}, 2201 {ObjectMeta: metav1.ObjectMeta{Name: "model-b"}}, 2202 } 2203 cq := utiltesting.MakeClusterQueue("cq"). 2204 Cohort("eng"). 2205 ResourceGroup( 2206 *utiltesting.MakeFlavorQuotas("on-demand"). 2207 Resource(corev1.ResourceMemory, "100").Obj(), 2208 *utiltesting.MakeFlavorQuotas("spot"). 2209 Resource(corev1.ResourceMemory, "0", "100").Obj(), 2210 ). 2211 ResourceGroup( 2212 *utiltesting.MakeFlavorQuotas("model-a"). 2213 Resource("gpu", "10", "0").Obj(), 2214 *utiltesting.MakeFlavorQuotas("model-b"). 2215 Resource("gpu", "10", "5").Obj(), 2216 ). 2217 QueueingStrategy(kueue.StrictFIFO). 2218 Obj() 2219 2220 cases := []struct { 2221 name string 2222 assignmentMode flavorassigner.FlavorAssignmentMode 2223 borrowing bool 2224 assignmentUsage cache.FlavorResourceQuantities 2225 cqUsage cache.FlavorResourceQuantities 2226 wantReserved cache.FlavorResourceQuantities 2227 }{ 2228 { 2229 name: "Reserved memory and gpu less than assignment usage, assignment preempts", 2230 assignmentMode: flavorassigner.Preempt, 2231 assignmentUsage: cache.FlavorResourceQuantities{ 2232 kueue.ResourceFlavorReference("on-demand"): {corev1.ResourceMemory: 50}, 2233 kueue.ResourceFlavorReference("model-a"): {"gpu": 6}, 2234 }, 2235 cqUsage: cache.FlavorResourceQuantities{ 2236 kueue.ResourceFlavorReference("on-demand"): {corev1.ResourceMemory: 60}, 2237 kueue.ResourceFlavorReference("spot"): {corev1.ResourceMemory: 50}, 2238 kueue.ResourceFlavorReference("model-a"): {"gpu": 6}, 2239 kueue.ResourceFlavorReference("model-b"): {"gpu": 2}, 2240 }, 2241 wantReserved: cache.FlavorResourceQuantities{ 2242 kueue.ResourceFlavorReference("on-demand"): {corev1.ResourceMemory: 40}, 2243 kueue.ResourceFlavorReference("model-a"): {"gpu": 4}, 2244 }, 2245 }, 2246 { 2247 name: "Reserved memory equal assignment usage, assignment preempts", 2248 assignmentMode: flavorassigner.Preempt, 2249 assignmentUsage: cache.FlavorResourceQuantities{ 2250 kueue.ResourceFlavorReference("on-demand"): {corev1.ResourceMemory: 30}, 2251 kueue.ResourceFlavorReference("model-a"): {"gpu": 2}, 2252 }, 2253 cqUsage: cache.FlavorResourceQuantities{ 2254 kueue.ResourceFlavorReference("on-demand"): {corev1.ResourceMemory: 60}, 2255 kueue.ResourceFlavorReference("spot"): {corev1.ResourceMemory: 50}, 2256 kueue.ResourceFlavorReference("model-a"): {"gpu": 2}, 2257 kueue.ResourceFlavorReference("model-b"): {"gpu": 2}, 2258 }, 2259 wantReserved: cache.FlavorResourceQuantities{ 2260 kueue.ResourceFlavorReference("on-demand"): {corev1.ResourceMemory: 30}, 2261 kueue.ResourceFlavorReference("model-a"): {"gpu": 2}, 2262 }, 2263 }, 2264 { 2265 name: "Reserved memory equal assignment usage, assigmnent fits", 2266 assignmentMode: flavorassigner.Fit, 2267 assignmentUsage: cache.FlavorResourceQuantities{ 2268 kueue.ResourceFlavorReference("on-demand"): {corev1.ResourceMemory: 50}, 2269 kueue.ResourceFlavorReference("model-a"): {"gpu": 2}, 2270 }, 2271 cqUsage: cache.FlavorResourceQuantities{ 2272 kueue.ResourceFlavorReference("on-demand"): {corev1.ResourceMemory: 60}, 2273 kueue.ResourceFlavorReference("spot"): {corev1.ResourceMemory: 50}, 2274 kueue.ResourceFlavorReference("model-a"): {"gpu": 2}, 2275 kueue.ResourceFlavorReference("model-b"): {"gpu": 2}, 2276 }, 2277 wantReserved: cache.FlavorResourceQuantities{ 2278 kueue.ResourceFlavorReference("on-demand"): {corev1.ResourceMemory: 50}, 2279 kueue.ResourceFlavorReference("model-a"): {"gpu": 2}, 2280 }, 2281 }, 2282 { 2283 name: "Reserved memory is 0, CQ is borrowing, assignment preempts without borrowing", 2284 assignmentMode: flavorassigner.Preempt, 2285 assignmentUsage: cache.FlavorResourceQuantities{ 2286 kueue.ResourceFlavorReference("spot"): {corev1.ResourceMemory: 50}, 2287 kueue.ResourceFlavorReference("model-b"): {"gpu": 2}, 2288 }, 2289 cqUsage: cache.FlavorResourceQuantities{ 2290 kueue.ResourceFlavorReference("on-demand"): {corev1.ResourceMemory: 60}, 2291 kueue.ResourceFlavorReference("spot"): {corev1.ResourceMemory: 60}, 2292 kueue.ResourceFlavorReference("model-a"): {"gpu": 2}, 2293 kueue.ResourceFlavorReference("model-b"): {"gpu": 10}, 2294 }, 2295 wantReserved: cache.FlavorResourceQuantities{ 2296 kueue.ResourceFlavorReference("spot"): {corev1.ResourceMemory: 0}, 2297 kueue.ResourceFlavorReference("model-b"): {"gpu": 0}, 2298 }, 2299 }, 2300 { 2301 name: "Reserved memory cut by nominal+borrowing quota, assignment preempts and borrows", 2302 assignmentMode: flavorassigner.Preempt, 2303 borrowing: true, 2304 assignmentUsage: cache.FlavorResourceQuantities{ 2305 kueue.ResourceFlavorReference("spot"): {corev1.ResourceMemory: 50}, 2306 kueue.ResourceFlavorReference("model-b"): {"gpu": 2}, 2307 }, 2308 cqUsage: cache.FlavorResourceQuantities{ 2309 kueue.ResourceFlavorReference("on-demand"): {corev1.ResourceMemory: 60}, 2310 kueue.ResourceFlavorReference("spot"): {corev1.ResourceMemory: 60}, 2311 kueue.ResourceFlavorReference("model-a"): {"gpu": 2}, 2312 kueue.ResourceFlavorReference("model-b"): {"gpu": 10}, 2313 }, 2314 wantReserved: cache.FlavorResourceQuantities{ 2315 kueue.ResourceFlavorReference("spot"): {corev1.ResourceMemory: 40}, 2316 kueue.ResourceFlavorReference("model-b"): {"gpu": 2}, 2317 }, 2318 }, 2319 { 2320 name: "Reserved memory equal assignment usage, CQ borrowing limit is nil", 2321 assignmentMode: flavorassigner.Preempt, 2322 borrowing: true, 2323 assignmentUsage: cache.FlavorResourceQuantities{ 2324 kueue.ResourceFlavorReference("on-demand"): {corev1.ResourceMemory: 50}, 2325 kueue.ResourceFlavorReference("model-b"): {"gpu": 2}, 2326 }, 2327 cqUsage: cache.FlavorResourceQuantities{ 2328 kueue.ResourceFlavorReference("on-demand"): {corev1.ResourceMemory: 60}, 2329 kueue.ResourceFlavorReference("spot"): {corev1.ResourceMemory: 60}, 2330 kueue.ResourceFlavorReference("model-a"): {"gpu": 2}, 2331 kueue.ResourceFlavorReference("model-b"): {"gpu": 10}, 2332 }, 2333 wantReserved: cache.FlavorResourceQuantities{ 2334 kueue.ResourceFlavorReference("on-demand"): {corev1.ResourceMemory: 50}, 2335 kueue.ResourceFlavorReference("model-b"): {"gpu": 2}, 2336 }, 2337 }, 2338 } 2339 for _, tc := range cases { 2340 t.Run(tc.name, func(t *testing.T) { 2341 ctx, _ := utiltesting.ContextWithLog(t) 2342 assignment := flavorassigner.Assignment{ 2343 PodSets: []flavorassigner.PodSetAssignment{{ 2344 Name: "memory", 2345 Status: &flavorassigner.Status{}, 2346 Flavors: flavorassigner.ResourceAssignment{corev1.ResourceMemory: &flavorassigner.FlavorAssignment{Mode: tc.assignmentMode}}, 2347 }, 2348 { 2349 Name: "gpu", 2350 Status: &flavorassigner.Status{}, 2351 Flavors: flavorassigner.ResourceAssignment{"gpu": &flavorassigner.FlavorAssignment{Mode: tc.assignmentMode}}, 2352 }, 2353 }, 2354 Borrowing: tc.borrowing, 2355 Usage: tc.assignmentUsage, 2356 } 2357 e := &entry{assignment: assignment} 2358 cl := utiltesting.NewClientBuilder(). 2359 WithLists(&kueue.ClusterQueueList{Items: []kueue.ClusterQueue{*cq}}). 2360 Build() 2361 cqCache := cache.New(cl) 2362 for _, flavor := range resourceFlavors { 2363 cqCache.AddOrUpdateResourceFlavor(flavor) 2364 } 2365 err := cqCache.AddClusterQueue(ctx, cq) 2366 if err != nil { 2367 t.Errorf("Error when adding ClusterQueue to the cache: %v", err) 2368 } 2369 cachedCQ := cqCache.Snapshot().ClusterQueues["cq"] 2370 cachedCQ.Usage = tc.cqUsage 2371 2372 got := resourcesToReserve(e, cachedCQ) 2373 if !reflect.DeepEqual(tc.wantReserved, got) { 2374 t.Errorf("%s failed\n: Want reservedMem: %v, got: %v", tc.name, tc.wantReserved, got) 2375 } 2376 }) 2377 } 2378 }