sigs.k8s.io/kueue@v0.6.2/test/integration/scheduler/scheduler_test.go (about) 1 /* 2 Copyright 2022 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package scheduler 18 19 import ( 20 "github.com/google/go-cmp/cmp/cmpopts" 21 "github.com/onsi/ginkgo/v2" 22 "github.com/onsi/gomega" 23 corev1 "k8s.io/api/core/v1" 24 "k8s.io/apimachinery/pkg/api/meta" 25 "k8s.io/apimachinery/pkg/api/resource" 26 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 27 "k8s.io/apimachinery/pkg/types" 28 "k8s.io/utils/ptr" 29 "sigs.k8s.io/controller-runtime/pkg/client" 30 31 kueue "sigs.k8s.io/kueue/apis/kueue/v1beta1" 32 "sigs.k8s.io/kueue/pkg/features" 33 "sigs.k8s.io/kueue/pkg/metrics" 34 "sigs.k8s.io/kueue/pkg/util/testing" 35 "sigs.k8s.io/kueue/pkg/workload" 36 "sigs.k8s.io/kueue/test/util" 37 ) 38 39 // +kubebuilder:docs-gen:collapse=Imports 40 41 var _ = ginkgo.Describe("Scheduler", func() { 42 const ( 43 instanceKey = "cloud.provider.com/instance" 44 ) 45 46 var ( 47 ns *corev1.Namespace 48 onDemandFlavor *kueue.ResourceFlavor 49 spotTaintedFlavor *kueue.ResourceFlavor 50 spotUntaintedFlavor *kueue.ResourceFlavor 51 spotToleration corev1.Toleration 52 ) 53 54 ginkgo.BeforeEach(func() { 55 ns = &corev1.Namespace{ 56 ObjectMeta: metav1.ObjectMeta{ 57 GenerateName: "core-", 58 }, 59 } 60 _ = features.SetEnable(features.FlavorFungibility, true) 61 gomega.Expect(k8sClient.Create(ctx, ns)).To(gomega.Succeed()) 62 63 onDemandFlavor = testing.MakeResourceFlavor("on-demand").Label(instanceKey, "on-demand").Obj() 64 65 spotTaintedFlavor = testing.MakeResourceFlavor("spot-tainted"). 66 Label(instanceKey, "spot-tainted"). 67 Taint(corev1.Taint{ 68 Key: instanceKey, 69 Value: "spot-tainted", 70 Effect: corev1.TaintEffectNoSchedule, 71 }).Obj() 72 73 spotToleration = corev1.Toleration{ 74 Key: instanceKey, 75 Operator: corev1.TolerationOpEqual, 76 Value: spotTaintedFlavor.Name, 77 Effect: corev1.TaintEffectNoSchedule, 78 } 79 80 spotUntaintedFlavor = testing.MakeResourceFlavor("spot-untainted").Label(instanceKey, "spot-untainted").Obj() 81 }) 82 83 ginkgo.When("Scheduling workloads on clusterQueues", func() { 84 var ( 85 prodClusterQ *kueue.ClusterQueue 86 devClusterQ *kueue.ClusterQueue 87 podsCountClusterQ *kueue.ClusterQueue 88 podsCountOnlyClusterQ *kueue.ClusterQueue 89 preemptionClusterQ *kueue.ClusterQueue 90 prodQueue *kueue.LocalQueue 91 devQueue *kueue.LocalQueue 92 podsCountQueue *kueue.LocalQueue 93 podsCountOnlyQueue *kueue.LocalQueue 94 preemptionQueue *kueue.LocalQueue 95 cqsStopPolicy *kueue.StopPolicy 96 ) 97 98 ginkgo.JustBeforeEach(func() { 99 gomega.Expect(k8sClient.Create(ctx, onDemandFlavor)).To(gomega.Succeed()) 100 gomega.Expect(k8sClient.Create(ctx, spotTaintedFlavor)).To(gomega.Succeed()) 101 gomega.Expect(k8sClient.Create(ctx, spotUntaintedFlavor)).To(gomega.Succeed()) 102 cqsStopPolicy := ptr.Deref(cqsStopPolicy, kueue.None) 103 104 prodClusterQ = testing.MakeClusterQueue("prod-cq"). 105 ResourceGroup( 106 *testing.MakeFlavorQuotas("spot-tainted").Resource(corev1.ResourceCPU, "5", "5").Obj(), 107 *testing.MakeFlavorQuotas("on-demand").Resource(corev1.ResourceCPU, "5").Obj(), 108 ). 109 Cohort("prod-cohort"). 110 StopPolicy(cqsStopPolicy). 111 Obj() 112 gomega.Expect(k8sClient.Create(ctx, prodClusterQ)).Should(gomega.Succeed()) 113 114 devClusterQ = testing.MakeClusterQueue("dev-clusterqueue"). 115 ResourceGroup( 116 *testing.MakeFlavorQuotas("spot-untainted").Resource(corev1.ResourceCPU, "5").Obj(), 117 *testing.MakeFlavorQuotas("on-demand").Resource(corev1.ResourceCPU, "5").Obj(), 118 ). 119 StopPolicy(cqsStopPolicy). 120 Obj() 121 gomega.Expect(k8sClient.Create(ctx, devClusterQ)).Should(gomega.Succeed()) 122 123 podsCountClusterQ = testing.MakeClusterQueue("pods-count-cq"). 124 ResourceGroup( 125 *testing.MakeFlavorQuotas("on-demand"). 126 Resource(corev1.ResourceCPU, "100"). 127 Resource(corev1.ResourcePods, "5"). 128 Obj(), 129 ). 130 StopPolicy(cqsStopPolicy). 131 Obj() 132 gomega.Expect(k8sClient.Create(ctx, podsCountClusterQ)).Should(gomega.Succeed()) 133 134 podsCountOnlyClusterQ = testing.MakeClusterQueue("pods-count-only-cq"). 135 ResourceGroup( 136 *testing.MakeFlavorQuotas("on-demand"). 137 Resource(corev1.ResourcePods, "5"). 138 Obj(), 139 ). 140 StopPolicy(cqsStopPolicy). 141 Obj() 142 gomega.Expect(k8sClient.Create(ctx, podsCountOnlyClusterQ)).Should(gomega.Succeed()) 143 144 preemptionClusterQ = testing.MakeClusterQueue("preemption-cq"). 145 ResourceGroup( 146 *testing.MakeFlavorQuotas("on-demand").Resource(corev1.ResourceCPU, "3").Obj(), 147 ). 148 Preemption(kueue.ClusterQueuePreemption{ 149 WithinClusterQueue: kueue.PreemptionPolicyLowerPriority, 150 }). 151 StopPolicy(cqsStopPolicy). 152 Obj() 153 gomega.Expect(k8sClient.Create(ctx, preemptionClusterQ)).Should(gomega.Succeed()) 154 155 prodQueue = testing.MakeLocalQueue("prod-queue", ns.Name).ClusterQueue(prodClusterQ.Name).Obj() 156 gomega.Expect(k8sClient.Create(ctx, prodQueue)).Should(gomega.Succeed()) 157 158 devQueue = testing.MakeLocalQueue("dev-queue", ns.Name).ClusterQueue(devClusterQ.Name).Obj() 159 gomega.Expect(k8sClient.Create(ctx, devQueue)).Should(gomega.Succeed()) 160 161 podsCountQueue = testing.MakeLocalQueue("pods-count-queue", ns.Name).ClusterQueue(podsCountClusterQ.Name).Obj() 162 gomega.Expect(k8sClient.Create(ctx, podsCountQueue)).Should(gomega.Succeed()) 163 164 podsCountOnlyQueue = testing.MakeLocalQueue("pods-count-only-queue", ns.Name).ClusterQueue(podsCountOnlyClusterQ.Name).Obj() 165 gomega.Expect(k8sClient.Create(ctx, podsCountOnlyQueue)).Should(gomega.Succeed()) 166 167 preemptionQueue = testing.MakeLocalQueue("preemption-queue", ns.Name).ClusterQueue(preemptionClusterQ.Name).Obj() 168 gomega.Expect(k8sClient.Create(ctx, preemptionQueue)).Should(gomega.Succeed()) 169 }) 170 171 ginkgo.JustAfterEach(func() { 172 gomega.Expect(util.DeleteNamespace(ctx, k8sClient, ns)).To(gomega.Succeed()) 173 util.ExpectClusterQueueToBeDeleted(ctx, k8sClient, prodClusterQ, true) 174 util.ExpectClusterQueueToBeDeleted(ctx, k8sClient, devClusterQ, true) 175 util.ExpectClusterQueueToBeDeleted(ctx, k8sClient, podsCountClusterQ, true) 176 util.ExpectClusterQueueToBeDeleted(ctx, k8sClient, podsCountOnlyClusterQ, true) 177 util.ExpectClusterQueueToBeDeleted(ctx, k8sClient, preemptionClusterQ, true) 178 util.ExpectResourceFlavorToBeDeleted(ctx, k8sClient, onDemandFlavor, true) 179 util.ExpectResourceFlavorToBeDeleted(ctx, k8sClient, spotTaintedFlavor, true) 180 util.ExpectResourceFlavorToBeDeleted(ctx, k8sClient, spotUntaintedFlavor, true) 181 }) 182 183 ginkgo.It("Should admit workloads as they fit in their ClusterQueue", func() { 184 ginkgo.By("checking the first prod workload gets admitted") 185 prodWl1 := testing.MakeWorkload("prod-wl1", ns.Name).Queue(prodQueue.Name).Request(corev1.ResourceCPU, "2").Obj() 186 gomega.Expect(k8sClient.Create(ctx, prodWl1)).Should(gomega.Succeed()) 187 prodWl1Admission := testing.MakeAdmission(prodClusterQ.Name).Assignment(corev1.ResourceCPU, "on-demand", "2").Obj() 188 util.ExpectWorkloadToBeAdmittedAs(ctx, k8sClient, prodWl1, prodWl1Admission) 189 util.ExpectPendingWorkloadsMetric(prodClusterQ, 0, 0) 190 util.ExpectReservingActiveWorkloadsMetric(prodClusterQ, 1) 191 util.ExpectAdmittedWorkloadsTotalMetric(prodClusterQ, 1) 192 193 ginkgo.By("checking a second no-fit workload does not get admitted") 194 prodWl2 := testing.MakeWorkload("prod-wl2", ns.Name).Queue(prodQueue.Name).Request(corev1.ResourceCPU, "5").Obj() 195 gomega.Expect(k8sClient.Create(ctx, prodWl2)).Should(gomega.Succeed()) 196 util.ExpectWorkloadsToBePending(ctx, k8sClient, prodWl2) 197 util.ExpectPendingWorkloadsMetric(prodClusterQ, 0, 1) 198 199 ginkgo.By("checking a dev workload gets admitted") 200 devWl := testing.MakeWorkload("dev-wl", ns.Name).Queue(devQueue.Name).Request(corev1.ResourceCPU, "5").Obj() 201 gomega.Expect(k8sClient.Create(ctx, devWl)).Should(gomega.Succeed()) 202 spotUntaintedFlavorAdmission := testing.MakeAdmission(devClusterQ.Name).Assignment(corev1.ResourceCPU, "spot-untainted", "5").Obj() 203 util.ExpectWorkloadToBeAdmittedAs(ctx, k8sClient, devWl, spotUntaintedFlavorAdmission) 204 util.ExpectPendingWorkloadsMetric(devClusterQ, 0, 0) 205 util.ExpectReservingActiveWorkloadsMetric(devClusterQ, 1) 206 util.ExpectAdmittedWorkloadsTotalMetric(devClusterQ, 1) 207 208 ginkgo.By("checking the second workload gets admitted when the first workload finishes") 209 util.FinishWorkloads(ctx, k8sClient, prodWl1) 210 prodWl2Admission := testing.MakeAdmission(prodClusterQ.Name).Assignment(corev1.ResourceCPU, "on-demand", "5").Obj() 211 util.ExpectWorkloadToBeAdmittedAs(ctx, k8sClient, prodWl2, prodWl2Admission) 212 util.ExpectPendingWorkloadsMetric(prodClusterQ, 0, 0) 213 util.ExpectReservingActiveWorkloadsMetric(prodClusterQ, 1) 214 util.ExpectAdmittedWorkloadsTotalMetric(prodClusterQ, 2) 215 }) 216 217 ginkgo.It("Should admit workloads as number of pods allows it", func() { 218 wl1 := testing.MakeWorkload("wl1", ns.Name). 219 Queue(podsCountQueue.Name). 220 PodSets(*testing.MakePodSet("main", 3). 221 Request(corev1.ResourceCPU, "2"). 222 Obj()). 223 Obj() 224 225 ginkgo.By("checking the first workload gets created and admitted", func() { 226 gomega.Expect(k8sClient.Create(ctx, wl1)).Should(gomega.Succeed()) 227 wl1Admission := testing.MakeAdmission(podsCountClusterQ.Name). 228 Assignment(corev1.ResourceCPU, "on-demand", "6"). 229 Assignment(corev1.ResourcePods, "on-demand", "3"). 230 AssignmentPodCount(3). 231 Obj() 232 util.ExpectWorkloadToBeAdmittedAs(ctx, k8sClient, wl1, wl1Admission) 233 util.ExpectPendingWorkloadsMetric(podsCountClusterQ, 0, 0) 234 util.ExpectReservingActiveWorkloadsMetric(podsCountClusterQ, 1) 235 util.ExpectAdmittedWorkloadsTotalMetric(podsCountClusterQ, 1) 236 }) 237 238 wl2 := testing.MakeWorkload("wl2", ns.Name). 239 Queue(podsCountQueue.Name). 240 PodSets(*testing.MakePodSet("main", 3). 241 Request(corev1.ResourceCPU, "2"). 242 Obj()). 243 Obj() 244 245 wl3 := testing.MakeWorkload("wl3", ns.Name). 246 Queue(podsCountQueue.Name). 247 PodSets(*testing.MakePodSet("main", 2). 248 Request(corev1.ResourceCPU, "2"). 249 Obj()). 250 Obj() 251 252 ginkgo.By("creating the next two workloads", func() { 253 gomega.Expect(k8sClient.Create(ctx, wl2)).Should(gomega.Succeed()) 254 gomega.Expect(k8sClient.Create(ctx, wl3)).Should(gomega.Succeed()) 255 }) 256 257 ginkgo.By("checking the second workload is pending and the third admitted", func() { 258 util.ExpectWorkloadsToHaveQuotaReservation(ctx, k8sClient, podsCountClusterQ.Name, wl1, wl3) 259 util.ExpectWorkloadsToBePending(ctx, k8sClient, wl2) 260 util.ExpectPendingWorkloadsMetric(podsCountClusterQ, 0, 1) 261 util.ExpectReservingActiveWorkloadsMetric(podsCountClusterQ, 2) 262 util.ExpectAdmittedWorkloadsTotalMetric(podsCountClusterQ, 2) 263 }) 264 265 ginkgo.By("finishing the first workload", func() { 266 util.FinishWorkloads(ctx, k8sClient, wl1) 267 }) 268 269 ginkgo.By("checking the second workload is also admitted", func() { 270 util.ExpectWorkloadsToHaveQuotaReservation(ctx, k8sClient, podsCountClusterQ.Name, wl2, wl3) 271 util.ExpectPendingWorkloadsMetric(podsCountClusterQ, 0, 0) 272 util.ExpectReservingActiveWorkloadsMetric(podsCountClusterQ, 2) 273 util.ExpectAdmittedWorkloadsTotalMetric(podsCountClusterQ, 3) 274 }) 275 }) 276 277 ginkgo.It("Should admit workloads as the number of pods (only) allows it", func() { 278 wl1 := testing.MakeWorkload("wl1", ns.Name). 279 Queue(podsCountOnlyQueue.Name). 280 PodSets(*testing.MakePodSet("main", 3). 281 Obj()). 282 Obj() 283 284 ginkgo.By("checking the first workload gets created and admitted", func() { 285 gomega.Expect(k8sClient.Create(ctx, wl1)).Should(gomega.Succeed()) 286 wl1Admission := testing.MakeAdmission(podsCountOnlyClusterQ.Name). 287 Assignment(corev1.ResourcePods, "on-demand", "3"). 288 AssignmentPodCount(3). 289 Obj() 290 util.ExpectWorkloadToBeAdmittedAs(ctx, k8sClient, wl1, wl1Admission) 291 util.ExpectPendingWorkloadsMetric(podsCountOnlyClusterQ, 0, 0) 292 util.ExpectReservingActiveWorkloadsMetric(podsCountOnlyClusterQ, 1) 293 util.ExpectAdmittedWorkloadsTotalMetric(podsCountOnlyClusterQ, 1) 294 }) 295 296 wl2 := testing.MakeWorkload("wl2", ns.Name). 297 Queue(podsCountOnlyQueue.Name). 298 PodSets(*testing.MakePodSet("main", 3). 299 Obj()). 300 Obj() 301 302 wl3 := testing.MakeWorkload("wl3", ns.Name). 303 Queue(podsCountOnlyQueue.Name). 304 PodSets(*testing.MakePodSet("main", 2). 305 Obj()). 306 Obj() 307 308 ginkgo.By("creating the next two workloads", func() { 309 gomega.Expect(k8sClient.Create(ctx, wl2)).Should(gomega.Succeed()) 310 gomega.Expect(k8sClient.Create(ctx, wl3)).Should(gomega.Succeed()) 311 }) 312 313 ginkgo.By("checking the second workload is pending and the third admitted", func() { 314 util.ExpectWorkloadsToHaveQuotaReservation(ctx, k8sClient, podsCountOnlyClusterQ.Name, wl1, wl3) 315 util.ExpectWorkloadsToBePending(ctx, k8sClient, wl2) 316 util.ExpectPendingWorkloadsMetric(podsCountOnlyClusterQ, 0, 1) 317 util.ExpectReservingActiveWorkloadsMetric(podsCountOnlyClusterQ, 2) 318 util.ExpectAdmittedWorkloadsTotalMetric(podsCountOnlyClusterQ, 2) 319 }) 320 321 ginkgo.By("finishing the first workload", func() { 322 util.FinishWorkloads(ctx, k8sClient, wl1) 323 }) 324 325 ginkgo.By("checking the second workload is also admitted", func() { 326 util.ExpectWorkloadsToHaveQuotaReservation(ctx, k8sClient, podsCountOnlyClusterQ.Name, wl2, wl3) 327 util.ExpectPendingWorkloadsMetric(podsCountOnlyClusterQ, 0, 0) 328 util.ExpectReservingActiveWorkloadsMetric(podsCountOnlyClusterQ, 2) 329 util.ExpectAdmittedWorkloadsTotalMetric(podsCountOnlyClusterQ, 3) 330 }) 331 }) 332 333 ginkgo.It("Should admit workloads when resources are dynamically reclaimed", func() { 334 firstWl := testing.MakeWorkload("first-wl", ns.Name).Queue(preemptionQueue.Name). 335 PodSets( 336 *testing.MakePodSet("first", 1).Request(corev1.ResourceCPU, "1").Obj(), 337 *testing.MakePodSet("second", 1).Request(corev1.ResourceCPU, "1").Obj(), 338 *testing.MakePodSet("third", 1).Request(corev1.ResourceCPU, "1").Obj(), 339 ). 340 Obj() 341 ginkgo.By("Creating first workload", func() { 342 gomega.Expect(k8sClient.Create(ctx, firstWl)).Should(gomega.Succeed()) 343 344 util.ExpectWorkloadsToBeAdmitted(ctx, k8sClient, firstWl) 345 util.ExpectPendingWorkloadsMetric(preemptionClusterQ, 0, 0) 346 util.ExpectReservingActiveWorkloadsMetric(preemptionClusterQ, 1) 347 }) 348 349 ginkgo.By("Reclaim one pod from the first workload", func() { 350 gomega.Expect(workload.UpdateReclaimablePods(ctx, k8sClient, firstWl, []kueue.ReclaimablePod{{Name: "third", Count: 1}})).To(gomega.Succeed()) 351 352 util.ExpectPendingWorkloadsMetric(preemptionClusterQ, 0, 0) 353 util.ExpectReservingActiveWorkloadsMetric(preemptionClusterQ, 1) 354 }) 355 356 secondWl := testing.MakeWorkload("second-wl", ns.Name).Queue(preemptionQueue.Name). 357 PodSets( 358 *testing.MakePodSet("first", 1).Request(corev1.ResourceCPU, "1").Obj(), 359 *testing.MakePodSet("second", 1).Request(corev1.ResourceCPU, "1").Obj(), 360 *testing.MakePodSet("third", 1).Request(corev1.ResourceCPU, "1").Obj(), 361 ). 362 Priority(100). 363 Obj() 364 ginkgo.By("Creating the second workload", func() { 365 gomega.Expect(k8sClient.Create(ctx, secondWl)).Should(gomega.Succeed()) 366 367 util.FinishEvictionForWorkloads(ctx, k8sClient, firstWl) 368 util.ExpectWorkloadsToBeAdmitted(ctx, k8sClient, secondWl) 369 util.ExpectPendingWorkloadsMetric(preemptionClusterQ, 0, 1) 370 util.ExpectReservingActiveWorkloadsMetric(preemptionClusterQ, 1) 371 }) 372 373 ginkgo.By("Reclaim two pods from the second workload so that the first workload is resumed", func() { 374 gomega.Expect(workload.UpdateReclaimablePods(ctx, k8sClient, secondWl, []kueue.ReclaimablePod{{Name: "first", Count: 1}, {Name: "second", Count: 1}})).To(gomega.Succeed()) 375 376 util.ExpectWorkloadsToBeAdmitted(ctx, k8sClient, firstWl, secondWl) 377 util.ExpectPendingWorkloadsMetric(preemptionClusterQ, 0, 0) 378 util.ExpectReservingActiveWorkloadsMetric(preemptionClusterQ, 2) 379 }) 380 }) 381 382 ginkgo.When("Hold at startup", func() { 383 ginkgo.BeforeEach(func() { 384 cqsStopPolicy = ptr.To(kueue.Hold) 385 }) 386 ginkgo.AfterEach(func() { 387 cqsStopPolicy = nil 388 }) 389 ginkgo.It("Should admit workloads according to their priorities", func() { 390 const lowPrio, midPrio, highPrio = 0, 10, 100 391 392 wlLow := testing.MakeWorkload("wl-low-priority", ns.Name).Queue(prodQueue.Name).Request(corev1.ResourceCPU, "2").Priority(lowPrio).Obj() 393 gomega.Expect(k8sClient.Create(ctx, wlLow)).Should(gomega.Succeed()) 394 wlMid1 := testing.MakeWorkload("wl-mid-priority-1", ns.Name).Queue(prodQueue.Name).Request(corev1.ResourceCPU, "2").Priority(midPrio).Obj() 395 gomega.Expect(k8sClient.Create(ctx, wlMid1)).Should(gomega.Succeed()) 396 wlMid2 := testing.MakeWorkload("wl-mid-priority-2", ns.Name).Queue(prodQueue.Name).Request(corev1.ResourceCPU, "2").Priority(midPrio).Obj() 397 gomega.Expect(k8sClient.Create(ctx, wlMid2)).Should(gomega.Succeed()) 398 wlHigh1 := testing.MakeWorkload("wl-high-priority-1", ns.Name).Queue(prodQueue.Name).Request(corev1.ResourceCPU, "2").Priority(highPrio).Obj() 399 gomega.Expect(k8sClient.Create(ctx, wlHigh1)).Should(gomega.Succeed()) 400 wlHigh2 := testing.MakeWorkload("wl-high-priority-2", ns.Name).Queue(prodQueue.Name).Request(corev1.ResourceCPU, "2").Priority(highPrio).Obj() 401 gomega.Expect(k8sClient.Create(ctx, wlHigh2)).Should(gomega.Succeed()) 402 403 util.ExpectPendingWorkloadsMetric(prodClusterQ, 0, 5) 404 util.ExpectReservingActiveWorkloadsMetric(prodClusterQ, 0) 405 406 util.UnholdQueue(ctx, k8sClient, prodClusterQ) 407 408 ginkgo.By("checking the workloads with lower priority do not get admitted") 409 util.ExpectWorkloadsToBePending(ctx, k8sClient, wlLow, wlMid1, wlMid2) 410 411 ginkgo.By("checking the workloads with high priority get admitted") 412 util.ExpectWorkloadsToHaveQuotaReservation(ctx, k8sClient, prodClusterQ.Name, wlHigh1, wlHigh2) 413 414 util.ExpectPendingWorkloadsMetric(prodClusterQ, 0, 3) 415 util.ExpectReservingActiveWorkloadsMetric(prodClusterQ, 2) 416 util.ExpectAdmittedWorkloadsTotalMetric(prodClusterQ, 2) 417 418 ginkgo.By("after the high priority workloads finish, only the mid priority workloads should be admitted") 419 util.FinishWorkloads(ctx, k8sClient, wlHigh1, wlHigh2) 420 421 util.ExpectWorkloadsToHaveQuotaReservation(ctx, k8sClient, prodClusterQ.Name, wlMid1, wlMid2) 422 util.ExpectPendingWorkloadsMetric(prodClusterQ, 0, 1) 423 util.ExpectReservingActiveWorkloadsMetric(prodClusterQ, 2) 424 util.ExpectAdmittedWorkloadsTotalMetric(prodClusterQ, 4) 425 }) 426 }) 427 428 ginkgo.It("Should admit two small workloads after a big one finishes", func() { 429 bigWl := testing.MakeWorkload("big-wl", ns.Name).Queue(prodQueue.Name).Request(corev1.ResourceCPU, "5").Obj() 430 ginkgo.By("Creating big workload") 431 gomega.Expect(k8sClient.Create(ctx, bigWl)).Should(gomega.Succeed()) 432 433 util.ExpectWorkloadsToHaveQuotaReservation(ctx, k8sClient, prodClusterQ.Name, bigWl) 434 util.ExpectPendingWorkloadsMetric(prodClusterQ, 0, 0) 435 util.ExpectReservingActiveWorkloadsMetric(prodClusterQ, 1) 436 util.ExpectAdmittedWorkloadsTotalMetric(prodClusterQ, 1) 437 438 smallWl1 := testing.MakeWorkload("small-wl-1", ns.Name).Queue(prodQueue.Name).Request(corev1.ResourceCPU, "2.5").Obj() 439 smallWl2 := testing.MakeWorkload("small-wl-2", ns.Name).Queue(prodQueue.Name).Request(corev1.ResourceCPU, "2.5").Obj() 440 ginkgo.By("Creating two small workloads") 441 gomega.Expect(k8sClient.Create(ctx, smallWl1)).Should(gomega.Succeed()) 442 gomega.Expect(k8sClient.Create(ctx, smallWl2)).Should(gomega.Succeed()) 443 444 util.ExpectWorkloadsToBePending(ctx, k8sClient, smallWl1, smallWl2) 445 util.ExpectPendingWorkloadsMetric(prodClusterQ, 0, 2) 446 util.ExpectReservingActiveWorkloadsMetric(prodClusterQ, 1) 447 448 ginkgo.By("Marking the big workload as finished") 449 util.FinishWorkloads(ctx, k8sClient, bigWl) 450 451 util.ExpectWorkloadsToHaveQuotaReservation(ctx, k8sClient, prodClusterQ.Name, smallWl1, smallWl2) 452 util.ExpectPendingWorkloadsMetric(prodClusterQ, 0, 0) 453 util.ExpectReservingActiveWorkloadsMetric(prodClusterQ, 2) 454 util.ExpectAdmittedWorkloadsTotalMetric(prodClusterQ, 3) 455 }) 456 457 ginkgo.It("Reclaimed resources are not accounted during admission", func() { 458 wl := testing.MakeWorkload("first-wl", ns.Name).Queue(prodQueue.Name). 459 PodSets(*testing.MakePodSet("main", 2).Request(corev1.ResourceCPU, "3").Obj()). 460 Obj() 461 ginkgo.By("Creating the workload", func() { 462 gomega.Expect(k8sClient.Create(ctx, wl)).Should(gomega.Succeed()) 463 464 util.ExpectWorkloadsToBePending(ctx, k8sClient, wl) 465 util.ExpectPendingWorkloadsMetric(prodClusterQ, 0, 1) 466 util.ExpectReservingActiveWorkloadsMetric(prodClusterQ, 0) 467 }) 468 ginkgo.By("Mark one pod as reclaimable", func() { 469 gomega.Expect(workload.UpdateReclaimablePods(ctx, k8sClient, wl, []kueue.ReclaimablePod{{Name: "main", Count: 1}})).To(gomega.Succeed()) 470 471 util.ExpectWorkloadsToHaveQuotaReservation(ctx, k8sClient, prodClusterQ.Name, wl) 472 util.ExpectPendingWorkloadsMetric(prodClusterQ, 0, 0) 473 util.ExpectReservingActiveWorkloadsMetric(prodClusterQ, 1) 474 475 createWl := &kueue.Workload{} 476 gomega.Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(wl), createWl)).To(gomega.Succeed()) 477 gomega.Expect(*createWl.Status.Admission.PodSetAssignments[0].Count).To(gomega.Equal(int32(1))) 478 479 }) 480 }) 481 }) 482 483 ginkgo.When("Handling workloads events", func() { 484 var ( 485 cq *kueue.ClusterQueue 486 queue *kueue.LocalQueue 487 ) 488 489 ginkgo.BeforeEach(func() { 490 gomega.Expect(k8sClient.Create(ctx, onDemandFlavor)).Should(gomega.Succeed()) 491 gomega.Expect(k8sClient.Create(ctx, spotTaintedFlavor)).Should(gomega.Succeed()) 492 493 cq = testing.MakeClusterQueue("cluster-queue"). 494 Cohort("prod"). 495 ResourceGroup( 496 *testing.MakeFlavorQuotas("spot-tainted").Resource(corev1.ResourceCPU, "5", "5").Obj(), 497 *testing.MakeFlavorQuotas("on-demand").Resource(corev1.ResourceCPU, "5").Obj(), 498 ).Obj() 499 gomega.Expect(k8sClient.Create(ctx, cq)).Should(gomega.Succeed()) 500 queue = testing.MakeLocalQueue("queue", ns.Name).ClusterQueue(cq.Name).Obj() 501 gomega.Expect(k8sClient.Create(ctx, queue)).Should(gomega.Succeed()) 502 }) 503 504 ginkgo.AfterEach(func() { 505 gomega.Expect(util.DeleteNamespace(ctx, k8sClient, ns)).To(gomega.Succeed()) 506 util.ExpectClusterQueueToBeDeleted(ctx, k8sClient, cq, true) 507 util.ExpectResourceFlavorToBeDeleted(ctx, k8sClient, onDemandFlavor, true) 508 util.ExpectResourceFlavorToBeDeleted(ctx, k8sClient, spotTaintedFlavor, true) 509 }) 510 511 ginkgo.It("Should re-enqueue by the delete event of workload belonging to the same ClusterQueue", func() { 512 ginkgo.By("First big workload starts") 513 wl1 := testing.MakeWorkload("on-demand-wl1", ns.Name).Queue(queue.Name).Request(corev1.ResourceCPU, "4").Obj() 514 gomega.Expect(k8sClient.Create(ctx, wl1)).Should(gomega.Succeed()) 515 expectWl1Admission := testing.MakeAdmission(cq.Name).Assignment(corev1.ResourceCPU, "on-demand", "4").Obj() 516 util.ExpectWorkloadToBeAdmittedAs(ctx, k8sClient, wl1, expectWl1Admission) 517 util.ExpectPendingWorkloadsMetric(cq, 0, 0) 518 util.ExpectReservingActiveWorkloadsMetric(cq, 1) 519 util.ExpectAdmittedWorkloadsTotalMetric(cq, 1) 520 521 ginkgo.By("Second big workload is pending") 522 wl2 := testing.MakeWorkload("on-demand-wl2", ns.Name).Queue(queue.Name).Request(corev1.ResourceCPU, "4").Obj() 523 gomega.Expect(k8sClient.Create(ctx, wl2)).Should(gomega.Succeed()) 524 util.ExpectWorkloadsToBePending(ctx, k8sClient, wl2) 525 util.ExpectPendingWorkloadsMetric(cq, 0, 1) 526 util.ExpectReservingActiveWorkloadsMetric(cq, 1) 527 util.ExpectAdmittedWorkloadsTotalMetric(cq, 1) 528 529 ginkgo.By("Third small workload starts") 530 wl3 := testing.MakeWorkload("on-demand-wl3", ns.Name).Queue(queue.Name).Request(corev1.ResourceCPU, "1").Obj() 531 gomega.Expect(k8sClient.Create(ctx, wl3)).Should(gomega.Succeed()) 532 expectWl3Admission := testing.MakeAdmission(cq.Name).Assignment(corev1.ResourceCPU, "on-demand", "1").Obj() 533 util.ExpectWorkloadToBeAdmittedAs(ctx, k8sClient, wl3, expectWl3Admission) 534 util.ExpectPendingWorkloadsMetric(cq, 0, 1) 535 util.ExpectReservingActiveWorkloadsMetric(cq, 2) 536 util.ExpectAdmittedWorkloadsTotalMetric(cq, 2) 537 538 ginkgo.By("Second big workload starts after the first one is deleted") 539 gomega.Expect(k8sClient.Delete(ctx, wl1, client.PropagationPolicy(metav1.DeletePropagationBackground))).Should(gomega.Succeed()) 540 expectWl2Admission := testing.MakeAdmission(cq.Name).Assignment(corev1.ResourceCPU, "on-demand", "4").Obj() 541 util.ExpectWorkloadToBeAdmittedAs(ctx, k8sClient, wl2, expectWl2Admission) 542 util.ExpectPendingWorkloadsMetric(cq, 0, 0) 543 util.ExpectReservingActiveWorkloadsMetric(cq, 2) 544 util.ExpectAdmittedWorkloadsTotalMetric(cq, 3) 545 }) 546 547 ginkgo.It("Should re-enqueue by the delete event of workload belonging to the same Cohort", func() { 548 fooCQ := testing.MakeClusterQueue("foo-clusterqueue"). 549 Cohort(cq.Spec.Cohort). 550 ResourceGroup(*testing.MakeFlavorQuotas("on-demand").Resource(corev1.ResourceCPU, "5").Obj()). 551 Obj() 552 gomega.Expect(k8sClient.Create(ctx, fooCQ)).Should(gomega.Succeed()) 553 defer func() { 554 gomega.Expect(util.DeleteClusterQueue(ctx, k8sClient, fooCQ)).Should(gomega.Succeed()) 555 }() 556 557 fooQ := testing.MakeLocalQueue("foo-queue", ns.Name).ClusterQueue(fooCQ.Name).Obj() 558 gomega.Expect(k8sClient.Create(ctx, fooQ)).Should(gomega.Succeed()) 559 560 ginkgo.By("First big workload starts") 561 wl1 := testing.MakeWorkload("on-demand-wl1", ns.Name).Queue(fooQ.Name).Request(corev1.ResourceCPU, "8").Obj() 562 gomega.Expect(k8sClient.Create(ctx, wl1)).Should(gomega.Succeed()) 563 expectAdmission := testing.MakeAdmission(fooCQ.Name).Assignment(corev1.ResourceCPU, "on-demand", "8").Obj() 564 util.ExpectWorkloadToBeAdmittedAs(ctx, k8sClient, wl1, expectAdmission) 565 util.ExpectPendingWorkloadsMetric(fooCQ, 0, 0) 566 util.ExpectReservingActiveWorkloadsMetric(fooCQ, 1) 567 util.ExpectAdmittedWorkloadsTotalMetric(fooCQ, 1) 568 569 ginkgo.By("Second big workload is pending") 570 wl2 := testing.MakeWorkload("on-demand-wl2", ns.Name).Queue(queue.Name).Request(corev1.ResourceCPU, "8").Obj() 571 gomega.Expect(k8sClient.Create(ctx, wl2)).Should(gomega.Succeed()) 572 util.ExpectWorkloadsToBePending(ctx, k8sClient, wl2) 573 util.ExpectPendingWorkloadsMetric(cq, 0, 1) 574 util.ExpectReservingActiveWorkloadsMetric(cq, 0) 575 util.ExpectAdmittedWorkloadsTotalMetric(cq, 0) 576 577 ginkgo.By("Third small workload starts") 578 wl3 := testing.MakeWorkload("on-demand-wl3", ns.Name).Queue(fooQ.Name).Request(corev1.ResourceCPU, "2").Obj() 579 gomega.Expect(k8sClient.Create(ctx, wl3)).Should(gomega.Succeed()) 580 expectAdmission = testing.MakeAdmission(fooCQ.Name).Assignment(corev1.ResourceCPU, "on-demand", "2").Obj() 581 util.ExpectWorkloadToBeAdmittedAs(ctx, k8sClient, wl3, expectAdmission) 582 util.ExpectPendingWorkloadsMetric(fooCQ, 0, 0) 583 util.ExpectReservingActiveWorkloadsMetric(fooCQ, 2) 584 util.ExpectAdmittedWorkloadsTotalMetric(fooCQ, 2) 585 586 ginkgo.By("Second big workload starts after the first one is deleted") 587 gomega.Expect(k8sClient.Delete(ctx, wl1, client.PropagationPolicy(metav1.DeletePropagationBackground))).Should(gomega.Succeed()) 588 expectAdmission = testing.MakeAdmission(cq.Name).Assignment(corev1.ResourceCPU, "on-demand", "8").Obj() 589 util.ExpectWorkloadToBeAdmittedAs(ctx, k8sClient, wl2, expectAdmission) 590 util.ExpectPendingWorkloadsMetric(cq, 0, 0) 591 util.ExpectReservingActiveWorkloadsMetric(cq, 1) 592 util.ExpectAdmittedWorkloadsTotalMetric(cq, 1) 593 }) 594 }) 595 596 ginkgo.When("Handling clusterQueue events", func() { 597 var ( 598 cq *kueue.ClusterQueue 599 queue *kueue.LocalQueue 600 ) 601 602 ginkgo.BeforeEach(func() { 603 gomega.Expect(k8sClient.Create(ctx, onDemandFlavor)).Should(gomega.Succeed()) 604 605 cq = testing.MakeClusterQueue("cluster-queue"). 606 ResourceGroup(*testing.MakeFlavorQuotas("on-demand").Resource(corev1.ResourceCPU, "5").Obj()). 607 Obj() 608 gomega.Expect(k8sClient.Create(ctx, cq)).Should(gomega.Succeed()) 609 queue = testing.MakeLocalQueue("queue", ns.Name).ClusterQueue(cq.Name).Obj() 610 gomega.Expect(k8sClient.Create(ctx, queue)).Should(gomega.Succeed()) 611 }) 612 613 ginkgo.AfterEach(func() { 614 gomega.Expect(util.DeleteNamespace(ctx, k8sClient, ns)).To(gomega.Succeed()) 615 util.ExpectClusterQueueToBeDeleted(ctx, k8sClient, cq, true) 616 util.ExpectResourceFlavorToBeDeleted(ctx, k8sClient, onDemandFlavor, true) 617 }) 618 ginkgo.It("Should re-enqueue by the update event of ClusterQueue", func() { 619 metrics.AdmissionAttemptsTotal.Reset() 620 wl := testing.MakeWorkload("on-demand-wl", ns.Name).Queue(queue.Name).Request(corev1.ResourceCPU, "6").Obj() 621 gomega.Expect(k8sClient.Create(ctx, wl)).Should(gomega.Succeed()) 622 util.ExpectWorkloadsToBePending(ctx, k8sClient, wl) 623 util.ExpectPendingWorkloadsMetric(cq, 0, 1) 624 util.ExpectReservingActiveWorkloadsMetric(cq, 0) 625 util.ExpectAdmittedWorkloadsTotalMetric(cq, 0) 626 util.ExpectAdmissionAttemptsMetric(1, 0) 627 628 ginkgo.By("updating ClusterQueue") 629 updatedCq := &kueue.ClusterQueue{} 630 631 gomega.Eventually(func() error { 632 err := k8sClient.Get(ctx, types.NamespacedName{Name: cq.Name}, updatedCq) 633 if err != nil { 634 return err 635 } 636 updatedCq.Spec.Cohort = "cohort" 637 updatedCq.Spec.ResourceGroups[0].Flavors[0].Resources[0] = kueue.ResourceQuota{ 638 Name: corev1.ResourceCPU, 639 NominalQuota: resource.MustParse("6"), 640 BorrowingLimit: ptr.To(resource.MustParse("0")), 641 } 642 return k8sClient.Update(ctx, updatedCq) 643 }, util.Timeout, util.Interval).Should(gomega.Succeed()) 644 645 expectAdmission := testing.MakeAdmission(cq.Name).Assignment(corev1.ResourceCPU, "on-demand", "6").Obj() 646 util.ExpectWorkloadToBeAdmittedAs(ctx, k8sClient, wl, expectAdmission) 647 util.ExpectPendingWorkloadsMetric(cq, 0, 0) 648 util.ExpectReservingActiveWorkloadsMetric(cq, 1) 649 util.ExpectAdmittedWorkloadsTotalMetric(cq, 1) 650 util.ExpectAdmissionAttemptsMetric(1, 1) 651 }) 652 }) 653 654 ginkgo.When("Using clusterQueue NamespaceSelector", func() { 655 var ( 656 cq *kueue.ClusterQueue 657 queue *kueue.LocalQueue 658 nsFoo *corev1.Namespace 659 queueFoo *kueue.LocalQueue 660 ) 661 662 ginkgo.BeforeEach(func() { 663 gomega.Expect(k8sClient.Create(ctx, onDemandFlavor)).Should(gomega.Succeed()) 664 665 cq = testing.MakeClusterQueue("cluster-queue-with-selector"). 666 NamespaceSelector(&metav1.LabelSelector{ 667 MatchExpressions: []metav1.LabelSelectorRequirement{ 668 { 669 Key: "dep", 670 Operator: metav1.LabelSelectorOpIn, 671 Values: []string{"eng"}, 672 }, 673 }, 674 }). 675 ResourceGroup(*testing.MakeFlavorQuotas("on-demand").Resource(corev1.ResourceCPU, "5").Obj()). 676 Obj() 677 gomega.Expect(k8sClient.Create(ctx, cq)).Should(gomega.Succeed()) 678 679 queue = testing.MakeLocalQueue("queue", ns.Name).ClusterQueue(cq.Name).Obj() 680 gomega.Expect(k8sClient.Create(ctx, queue)).Should(gomega.Succeed()) 681 682 nsFoo = &corev1.Namespace{ 683 ObjectMeta: metav1.ObjectMeta{ 684 GenerateName: "foo-", 685 }, 686 } 687 gomega.Expect(k8sClient.Create(ctx, nsFoo)).To(gomega.Succeed()) 688 queueFoo = testing.MakeLocalQueue("foo", nsFoo.Name).ClusterQueue(cq.Name).Obj() 689 gomega.Expect(k8sClient.Create(ctx, queueFoo)).Should(gomega.Succeed()) 690 }) 691 692 ginkgo.AfterEach(func() { 693 gomega.Expect(util.DeleteNamespace(ctx, k8sClient, ns)).To(gomega.Succeed()) 694 util.ExpectClusterQueueToBeDeleted(ctx, k8sClient, cq, true) 695 util.ExpectResourceFlavorToBeDeleted(ctx, k8sClient, onDemandFlavor, true) 696 }) 697 698 ginkgo.It("Should admit workloads from the selected namespaces", func() { 699 ginkgo.By("checking the workloads don't get admitted at first") 700 wl1 := testing.MakeWorkload("wl1", ns.Name).Queue(queue.Name).Request(corev1.ResourceCPU, "1").Obj() 701 gomega.Expect(k8sClient.Create(ctx, wl1)).Should(gomega.Succeed()) 702 wl2 := testing.MakeWorkload("wl2", nsFoo.Name).Queue(queueFoo.Name).Request(corev1.ResourceCPU, "1").Obj() 703 gomega.Expect(k8sClient.Create(ctx, wl2)).Should(gomega.Succeed()) 704 util.ExpectWorkloadsToBePending(ctx, k8sClient, wl1, wl2) 705 util.ExpectPendingWorkloadsMetric(cq, 0, 2) 706 util.ExpectReservingActiveWorkloadsMetric(cq, 0) 707 util.ExpectAdmittedWorkloadsTotalMetric(cq, 0) 708 709 ginkgo.By("checking the first workload gets admitted after updating the namespace labels to match CQ selector") 710 ns.Labels = map[string]string{"dep": "eng"} 711 gomega.Expect(k8sClient.Update(ctx, ns)).Should(gomega.Succeed()) 712 util.ExpectWorkloadsToHaveQuotaReservation(ctx, k8sClient, cq.Name, wl1) 713 util.ExpectReservingActiveWorkloadsMetric(cq, 1) 714 util.ExpectAdmittedWorkloadsTotalMetric(cq, 1) 715 util.ExpectPendingWorkloadsMetric(cq, 0, 1) 716 }) 717 }) 718 719 ginkgo.When("Referencing resourceFlavors in clusterQueue", func() { 720 var ( 721 fooCQ *kueue.ClusterQueue 722 fooQ *kueue.LocalQueue 723 ) 724 725 ginkgo.BeforeEach(func() { 726 fooCQ = testing.MakeClusterQueue("foo-cq"). 727 QueueingStrategy(kueue.BestEffortFIFO). 728 ResourceGroup(*testing.MakeFlavorQuotas("foo-flavor").Resource(corev1.ResourceCPU, "15").Obj()). 729 Obj() 730 gomega.Expect(k8sClient.Create(ctx, fooCQ)).Should(gomega.Succeed()) 731 fooQ = testing.MakeLocalQueue("foo-queue", ns.Name).ClusterQueue(fooCQ.Name).Obj() 732 gomega.Expect(k8sClient.Create(ctx, fooQ)).Should(gomega.Succeed()) 733 }) 734 735 ginkgo.AfterEach(func() { 736 gomega.Expect(util.DeleteNamespace(ctx, k8sClient, ns)).To(gomega.Succeed()) 737 util.ExpectClusterQueueToBeDeleted(ctx, k8sClient, fooCQ, true) 738 }) 739 740 ginkgo.It("Should be inactive until the flavor is created", func() { 741 ginkgo.By("Creating one workload") 742 util.ExpectClusterQueueStatusMetric(fooCQ, metrics.CQStatusPending) 743 wl := testing.MakeWorkload("workload", ns.Name).Queue(fooQ.Name).Request(corev1.ResourceCPU, "1").Obj() 744 gomega.Expect(k8sClient.Create(ctx, wl)).Should(gomega.Succeed()) 745 util.ExpectWorkloadsToBeFrozen(ctx, k8sClient, fooCQ.Name, wl) 746 util.ExpectPendingWorkloadsMetric(fooCQ, 0, 1) 747 util.ExpectReservingActiveWorkloadsMetric(fooCQ, 0) 748 util.ExpectAdmittedWorkloadsTotalMetric(fooCQ, 0) 749 750 ginkgo.By("Creating foo flavor") 751 fooFlavor := testing.MakeResourceFlavor("foo-flavor").Obj() 752 gomega.Expect(k8sClient.Create(ctx, fooFlavor)).Should(gomega.Succeed()) 753 defer func() { 754 gomega.Expect(util.DeleteResourceFlavor(ctx, k8sClient, fooFlavor)).To(gomega.Succeed()) 755 }() 756 util.ExpectClusterQueueStatusMetric(fooCQ, metrics.CQStatusActive) 757 util.ExpectWorkloadsToHaveQuotaReservation(ctx, k8sClient, fooCQ.Name, wl) 758 util.ExpectPendingWorkloadsMetric(fooCQ, 0, 0) 759 util.ExpectReservingActiveWorkloadsMetric(fooCQ, 1) 760 util.ExpectAdmittedWorkloadsTotalMetric(fooCQ, 1) 761 }) 762 }) 763 764 ginkgo.When("Using taints in resourceFlavors", func() { 765 var ( 766 cq *kueue.ClusterQueue 767 queue *kueue.LocalQueue 768 ) 769 770 ginkgo.BeforeEach(func() { 771 gomega.Expect(k8sClient.Create(ctx, onDemandFlavor)).Should(gomega.Succeed()) 772 gomega.Expect(k8sClient.Create(ctx, spotTaintedFlavor)).Should(gomega.Succeed()) 773 774 cq = testing.MakeClusterQueue("cluster-queue"). 775 QueueingStrategy(kueue.BestEffortFIFO). 776 ResourceGroup( 777 *testing.MakeFlavorQuotas("spot-tainted").Resource(corev1.ResourceCPU, "5", "5").Obj(), 778 *testing.MakeFlavorQuotas("on-demand").Resource(corev1.ResourceCPU, "5").Obj(), 779 ). 780 Cohort("cohort"). 781 Obj() 782 gomega.Expect(k8sClient.Create(ctx, cq)).Should(gomega.Succeed()) 783 784 queue = testing.MakeLocalQueue("queue", ns.Name).ClusterQueue(cq.Name).Obj() 785 gomega.Expect(k8sClient.Create(ctx, queue)).Should(gomega.Succeed()) 786 }) 787 788 ginkgo.AfterEach(func() { 789 gomega.Expect(util.DeleteNamespace(ctx, k8sClient, ns)).To(gomega.Succeed()) 790 util.ExpectClusterQueueToBeDeleted(ctx, k8sClient, cq, true) 791 util.ExpectResourceFlavorToBeDeleted(ctx, k8sClient, onDemandFlavor, true) 792 util.ExpectResourceFlavorToBeDeleted(ctx, k8sClient, spotTaintedFlavor, true) 793 }) 794 795 ginkgo.It("Should schedule workloads on tolerated flavors", func() { 796 ginkgo.By("checking a workload without toleration starts on the non-tainted flavor") 797 wl1 := testing.MakeWorkload("on-demand-wl1", ns.Name).Queue(queue.Name).Request(corev1.ResourceCPU, "5").Obj() 798 gomega.Expect(k8sClient.Create(ctx, wl1)).Should(gomega.Succeed()) 799 800 expectAdmission := testing.MakeAdmission(cq.Name).Assignment(corev1.ResourceCPU, "on-demand", "5").Obj() 801 util.ExpectWorkloadToBeAdmittedAs(ctx, k8sClient, wl1, expectAdmission) 802 util.ExpectPendingWorkloadsMetric(cq, 0, 0) 803 util.ExpectReservingActiveWorkloadsMetric(cq, 1) 804 util.ExpectAdmittedWorkloadsTotalMetric(cq, 1) 805 806 ginkgo.By("checking a second workload without toleration doesn't start") 807 wl2 := testing.MakeWorkload("on-demand-wl2", ns.Name).Queue(queue.Name).Request(corev1.ResourceCPU, "5").Obj() 808 gomega.Expect(k8sClient.Create(ctx, wl2)).Should(gomega.Succeed()) 809 util.ExpectWorkloadsToBePending(ctx, k8sClient, wl2) 810 util.ExpectPendingWorkloadsMetric(cq, 0, 1) 811 util.ExpectReservingActiveWorkloadsMetric(cq, 1) 812 util.ExpectAdmittedWorkloadsTotalMetric(cq, 1) 813 814 ginkgo.By("checking a third workload with toleration starts") 815 wl3 := testing.MakeWorkload("on-demand-wl3", ns.Name).Queue(queue.Name).Toleration(spotToleration).Request(corev1.ResourceCPU, "5").Obj() 816 gomega.Expect(k8sClient.Create(ctx, wl3)).Should(gomega.Succeed()) 817 818 expectAdmission = testing.MakeAdmission(cq.Name).Assignment(corev1.ResourceCPU, "spot-tainted", "5").Obj() 819 util.ExpectWorkloadToBeAdmittedAs(ctx, k8sClient, wl3, expectAdmission) 820 util.ExpectPendingWorkloadsMetric(cq, 0, 1) 821 util.ExpectReservingActiveWorkloadsMetric(cq, 2) 822 util.ExpectAdmittedWorkloadsTotalMetric(cq, 2) 823 }) 824 }) 825 826 ginkgo.When("Using affinity in resourceFlavors", func() { 827 var ( 828 cq *kueue.ClusterQueue 829 queue *kueue.LocalQueue 830 ) 831 832 ginkgo.BeforeEach(func() { 833 gomega.Expect(k8sClient.Create(ctx, onDemandFlavor)).Should(gomega.Succeed()) 834 gomega.Expect(k8sClient.Create(ctx, spotUntaintedFlavor)).Should(gomega.Succeed()) 835 836 cq = testing.MakeClusterQueue("cluster-queue"). 837 ResourceGroup( 838 *testing.MakeFlavorQuotas("spot-untainted").Resource(corev1.ResourceCPU, "5").Obj(), 839 *testing.MakeFlavorQuotas("on-demand").Resource(corev1.ResourceCPU, "5").Obj(), 840 ).Obj() 841 gomega.Expect(k8sClient.Create(ctx, cq)).Should(gomega.Succeed()) 842 843 queue = testing.MakeLocalQueue("queue", ns.Name).ClusterQueue(cq.Name).Obj() 844 gomega.Expect(k8sClient.Create(ctx, queue)).Should(gomega.Succeed()) 845 }) 846 847 ginkgo.AfterEach(func() { 848 gomega.Expect(util.DeleteNamespace(ctx, k8sClient, ns)).To(gomega.Succeed()) 849 util.ExpectClusterQueueToBeDeleted(ctx, k8sClient, cq, true) 850 util.ExpectResourceFlavorToBeDeleted(ctx, k8sClient, onDemandFlavor, true) 851 util.ExpectResourceFlavorToBeDeleted(ctx, k8sClient, spotUntaintedFlavor, true) 852 }) 853 854 ginkgo.It("Should admit workloads with affinity to specific flavor", func() { 855 ginkgo.By("checking a workload without affinity gets admitted on the first flavor") 856 wl1 := testing.MakeWorkload("no-affinity-workload", ns.Name).Queue(queue.Name).Request(corev1.ResourceCPU, "1").Obj() 857 gomega.Expect(k8sClient.Create(ctx, wl1)).Should(gomega.Succeed()) 858 expectAdmission := testing.MakeAdmission(cq.Name).Assignment(corev1.ResourceCPU, "spot-untainted", "1").Obj() 859 util.ExpectWorkloadToBeAdmittedAs(ctx, k8sClient, wl1, expectAdmission) 860 util.ExpectReservingActiveWorkloadsMetric(cq, 1) 861 util.ExpectAdmittedWorkloadsTotalMetric(cq, 1) 862 util.ExpectPendingWorkloadsMetric(cq, 0, 0) 863 864 ginkgo.By("checking a second workload with affinity to on-demand gets admitted") 865 wl2 := testing.MakeWorkload("affinity-wl", ns.Name).Queue(queue.Name). 866 NodeSelector(map[string]string{instanceKey: onDemandFlavor.Name, "foo": "bar"}). 867 Request(corev1.ResourceCPU, "1").Obj() 868 gomega.Expect(k8sClient.Create(ctx, wl2)).Should(gomega.Succeed()) 869 gomega.Expect(len(wl2.Spec.PodSets[0].Template.Spec.NodeSelector)).Should(gomega.Equal(2)) 870 expectAdmission = testing.MakeAdmission(cq.Name).Assignment(corev1.ResourceCPU, "on-demand", "1").Obj() 871 util.ExpectWorkloadToBeAdmittedAs(ctx, k8sClient, wl2, expectAdmission) 872 util.ExpectPendingWorkloadsMetric(cq, 0, 0) 873 util.ExpectReservingActiveWorkloadsMetric(cq, 2) 874 util.ExpectAdmittedWorkloadsTotalMetric(cq, 2) 875 }) 876 }) 877 878 ginkgo.When("Creating objects out-of-order", func() { 879 var ( 880 cq *kueue.ClusterQueue 881 q *kueue.LocalQueue 882 w *kueue.Workload 883 ) 884 885 ginkgo.BeforeEach(func() { 886 cq = testing.MakeClusterQueue("cq"). 887 ResourceGroup(*testing.MakeFlavorQuotas("on-demand").Resource(corev1.ResourceCPU, "5").Obj()). 888 Obj() 889 q = testing.MakeLocalQueue("queue", ns.Name).ClusterQueue(cq.Name).Obj() 890 w = testing.MakeWorkload("workload", ns.Name).Queue(q.Name).Request(corev1.ResourceCPU, "2").Obj() 891 }) 892 893 ginkgo.AfterEach(func() { 894 gomega.Expect(util.DeleteNamespace(ctx, k8sClient, ns)).To(gomega.Succeed()) 895 util.ExpectClusterQueueToBeDeleted(ctx, k8sClient, cq, true) 896 util.ExpectResourceFlavorToBeDeleted(ctx, k8sClient, onDemandFlavor, true) 897 }) 898 899 ginkgo.It("Should admit workload when creating ResourceFlavor->LocalQueue->Workload->ClusterQueue", func() { 900 gomega.Expect(k8sClient.Create(ctx, onDemandFlavor)).To(gomega.Succeed()) 901 gomega.Expect(k8sClient.Create(ctx, q)).To(gomega.Succeed()) 902 gomega.Expect(k8sClient.Create(ctx, w)).To(gomega.Succeed()) 903 gomega.Expect(k8sClient.Create(ctx, cq)).To(gomega.Succeed()) 904 util.ExpectWorkloadsToHaveQuotaReservation(ctx, k8sClient, cq.Name, w) 905 }) 906 907 ginkgo.It("Should admit workload when creating Workload->ResourceFlavor->LocalQueue->ClusterQueue", func() { 908 gomega.Expect(k8sClient.Create(ctx, w)).To(gomega.Succeed()) 909 gomega.Expect(k8sClient.Create(ctx, onDemandFlavor)).To(gomega.Succeed()) 910 gomega.Expect(k8sClient.Create(ctx, q)).To(gomega.Succeed()) 911 gomega.Expect(k8sClient.Create(ctx, cq)).To(gomega.Succeed()) 912 util.ExpectWorkloadsToHaveQuotaReservation(ctx, k8sClient, cq.Name, w) 913 }) 914 915 ginkgo.It("Should admit workload when creating Workload->ResourceFlavor->ClusterQueue->LocalQueue", func() { 916 gomega.Expect(k8sClient.Create(ctx, w)).To(gomega.Succeed()) 917 gomega.Expect(k8sClient.Create(ctx, onDemandFlavor)).To(gomega.Succeed()) 918 gomega.Expect(k8sClient.Create(ctx, cq)).To(gomega.Succeed()) 919 gomega.Expect(k8sClient.Create(ctx, q)).To(gomega.Succeed()) 920 util.ExpectWorkloadsToHaveQuotaReservation(ctx, k8sClient, cq.Name, w) 921 }) 922 923 ginkgo.It("Should admit workload when creating Workload->ClusterQueue->LocalQueue->ResourceFlavor", func() { 924 gomega.Expect(k8sClient.Create(ctx, w)).To(gomega.Succeed()) 925 gomega.Expect(k8sClient.Create(ctx, cq)).To(gomega.Succeed()) 926 gomega.Expect(k8sClient.Create(ctx, q)).To(gomega.Succeed()) 927 gomega.Expect(k8sClient.Create(ctx, onDemandFlavor)).To(gomega.Succeed()) 928 util.ExpectWorkloadsToHaveQuotaReservation(ctx, k8sClient, cq.Name, w) 929 }) 930 }) 931 932 ginkgo.When("Using cohorts for fair-sharing", func() { 933 var ( 934 prodCQ *kueue.ClusterQueue 935 devCQ *kueue.ClusterQueue 936 ) 937 938 ginkgo.BeforeEach(func() { 939 gomega.Expect(k8sClient.Create(ctx, onDemandFlavor)).Should(gomega.Succeed()) 940 gomega.Expect(k8sClient.Create(ctx, spotTaintedFlavor)).Should(gomega.Succeed()) 941 gomega.Expect(k8sClient.Create(ctx, spotUntaintedFlavor)).Should(gomega.Succeed()) 942 }) 943 944 ginkgo.AfterEach(func() { 945 gomega.Expect(util.DeleteNamespace(ctx, k8sClient, ns)).To(gomega.Succeed()) 946 util.ExpectClusterQueueToBeDeleted(ctx, k8sClient, prodCQ, true) 947 if devCQ != nil { 948 util.ExpectClusterQueueToBeDeleted(ctx, k8sClient, devCQ, true) 949 } 950 util.ExpectResourceFlavorToBeDeleted(ctx, k8sClient, onDemandFlavor, true) 951 util.ExpectResourceFlavorToBeDeleted(ctx, k8sClient, spotTaintedFlavor, true) 952 util.ExpectResourceFlavorToBeDeleted(ctx, k8sClient, spotUntaintedFlavor, true) 953 }) 954 955 ginkgo.It("Should admit workloads using borrowed ClusterQueue", func() { 956 prodCQ = testing.MakeClusterQueue("prod-cq"). 957 Cohort("all"). 958 ResourceGroup( 959 *testing.MakeFlavorQuotas("spot-tainted").Resource(corev1.ResourceCPU, "5", "0").Obj(), 960 *testing.MakeFlavorQuotas("on-demand").Resource(corev1.ResourceCPU, "5").Obj(), 961 ).Obj() 962 gomega.Expect(k8sClient.Create(ctx, prodCQ)).Should(gomega.Succeed()) 963 964 queue := testing.MakeLocalQueue("queue", ns.Name).ClusterQueue(prodCQ.Name).Obj() 965 gomega.Expect(k8sClient.Create(ctx, queue)).Should(gomega.Succeed()) 966 967 ginkgo.By("checking a no-fit workload does not get admitted") 968 wl := testing.MakeWorkload("wl", ns.Name).Queue(queue.Name). 969 Request(corev1.ResourceCPU, "10").Toleration(spotToleration).Obj() 970 gomega.Expect(k8sClient.Create(ctx, wl)).Should(gomega.Succeed()) 971 util.ExpectWorkloadsToBePending(ctx, k8sClient, wl) 972 util.ExpectPendingWorkloadsMetric(prodCQ, 0, 1) 973 util.ExpectReservingActiveWorkloadsMetric(prodCQ, 0) 974 util.ExpectAdmittedWorkloadsTotalMetric(prodCQ, 0) 975 976 ginkgo.By("checking the workload gets admitted when a fallback ClusterQueue gets added") 977 fallbackClusterQueue := testing.MakeClusterQueue("fallback-cq"). 978 Cohort(prodCQ.Spec.Cohort). 979 ResourceGroup( 980 *testing.MakeFlavorQuotas("spot-tainted").Resource(corev1.ResourceCPU, "5").Obj(), // prod-cq can't borrow this due to its borrowingLimit 981 *testing.MakeFlavorQuotas("on-demand").Resource(corev1.ResourceCPU, "5").Obj(), 982 ).Obj() 983 gomega.Expect(k8sClient.Create(ctx, fallbackClusterQueue)).Should(gomega.Succeed()) 984 defer func() { 985 gomega.Expect(util.DeleteClusterQueue(ctx, k8sClient, fallbackClusterQueue)).ToNot(gomega.HaveOccurred()) 986 }() 987 988 expectAdmission := testing.MakeAdmission(prodCQ.Name).Assignment(corev1.ResourceCPU, "on-demand", "10").Obj() 989 util.ExpectWorkloadToBeAdmittedAs(ctx, k8sClient, wl, expectAdmission) 990 util.ExpectPendingWorkloadsMetric(prodCQ, 0, 0) 991 util.ExpectReservingActiveWorkloadsMetric(prodCQ, 1) 992 util.ExpectAdmittedWorkloadsTotalMetric(prodCQ, 1) 993 }) 994 995 ginkgo.It("Should schedule workloads borrowing quota from ClusterQueues in the same Cohort", func() { 996 prodCQ = testing.MakeClusterQueue("prod-cq"). 997 Cohort("all"). 998 ResourceGroup(*testing.MakeFlavorQuotas("on-demand").Resource(corev1.ResourceCPU, "5", "10").Obj()). 999 Obj() 1000 gomega.Expect(k8sClient.Create(ctx, prodCQ)).Should(gomega.Succeed()) 1001 1002 devCQ = testing.MakeClusterQueue("dev-cq"). 1003 Cohort("all"). 1004 ResourceGroup(*testing.MakeFlavorQuotas("on-demand").Resource(corev1.ResourceCPU, "5", "10").Obj()). 1005 Obj() 1006 gomega.Expect(k8sClient.Create(ctx, devCQ)).Should(gomega.Succeed()) 1007 1008 prodQueue := testing.MakeLocalQueue("prod-queue", ns.Name).ClusterQueue(prodCQ.Name).Obj() 1009 gomega.Expect(k8sClient.Create(ctx, prodQueue)).Should(gomega.Succeed()) 1010 1011 devQueue := testing.MakeLocalQueue("dev-queue", ns.Name).ClusterQueue(devCQ.Name).Obj() 1012 gomega.Expect(k8sClient.Create(ctx, devQueue)).Should(gomega.Succeed()) 1013 wl1 := testing.MakeWorkload("wl-1", ns.Name).Queue(prodQueue.Name).Request(corev1.ResourceCPU, "11").Obj() 1014 wl2 := testing.MakeWorkload("wl-2", ns.Name).Queue(devQueue.Name).Request(corev1.ResourceCPU, "11").Obj() 1015 1016 ginkgo.By("Creating two workloads") 1017 gomega.Expect(k8sClient.Create(ctx, wl1)).Should(gomega.Succeed()) 1018 gomega.Expect(k8sClient.Create(ctx, wl2)).Should(gomega.Succeed()) 1019 util.ExpectWorkloadsToBePending(ctx, k8sClient, wl1, wl2) 1020 util.ExpectPendingWorkloadsMetric(prodCQ, 0, 1) 1021 util.ExpectPendingWorkloadsMetric(devCQ, 0, 1) 1022 util.ExpectReservingActiveWorkloadsMetric(prodCQ, 0) 1023 util.ExpectReservingActiveWorkloadsMetric(devCQ, 0) 1024 util.ExpectAdmittedWorkloadsTotalMetric(prodCQ, 0) 1025 util.ExpectAdmittedWorkloadsTotalMetric(devCQ, 0) 1026 1027 // Delay cluster queue creation to make sure workloads are in the same 1028 // scheduling cycle. 1029 testCQ := testing.MakeClusterQueue("test-cq"). 1030 Cohort("all"). 1031 ResourceGroup(*testing.MakeFlavorQuotas("on-demand").Resource(corev1.ResourceCPU, "15", "0").Obj()). 1032 Obj() 1033 gomega.Expect(k8sClient.Create(ctx, testCQ)).Should(gomega.Succeed()) 1034 defer func() { 1035 gomega.Expect(util.DeleteClusterQueue(ctx, k8sClient, testCQ)).Should(gomega.Succeed()) 1036 }() 1037 1038 util.ExpectWorkloadsToHaveQuotaReservation(ctx, k8sClient, prodCQ.Name, wl1) 1039 util.ExpectWorkloadsToHaveQuotaReservation(ctx, k8sClient, devCQ.Name, wl2) 1040 util.ExpectPendingWorkloadsMetric(prodCQ, 0, 0) 1041 util.ExpectPendingWorkloadsMetric(devCQ, 0, 0) 1042 util.ExpectReservingActiveWorkloadsMetric(prodCQ, 1) 1043 util.ExpectReservingActiveWorkloadsMetric(devCQ, 1) 1044 util.ExpectAdmittedWorkloadsTotalMetric(prodCQ, 1) 1045 util.ExpectAdmittedWorkloadsTotalMetric(devCQ, 1) 1046 }) 1047 1048 ginkgo.It("Should start workloads that are under min quota before borrowing", func() { 1049 prodCQ = testing.MakeClusterQueue("prod-cq"). 1050 Cohort("all"). 1051 QueueingStrategy(kueue.StrictFIFO). 1052 ResourceGroup(*testing.MakeFlavorQuotas("on-demand").Resource(corev1.ResourceCPU, "2").Obj()). 1053 Obj() 1054 gomega.Expect(k8sClient.Create(ctx, prodCQ)).To(gomega.Succeed()) 1055 1056 devCQ = testing.MakeClusterQueue("dev-cq"). 1057 Cohort("all"). 1058 QueueingStrategy(kueue.StrictFIFO). 1059 ResourceGroup(*testing.MakeFlavorQuotas("on-demand").Resource(corev1.ResourceCPU, "0").Obj()). 1060 Obj() 1061 gomega.Expect(k8sClient.Create(ctx, devCQ)).To(gomega.Succeed()) 1062 1063 prodQueue := testing.MakeLocalQueue("prod-queue", ns.Name).ClusterQueue(prodCQ.Name).Obj() 1064 gomega.Expect(k8sClient.Create(ctx, prodQueue)).To(gomega.Succeed()) 1065 1066 devQueue := testing.MakeLocalQueue("dev-queue", ns.Name).ClusterQueue(devCQ.Name).Obj() 1067 gomega.Expect(k8sClient.Create(ctx, devQueue)).To(gomega.Succeed()) 1068 1069 ginkgo.By("Creating two workloads for prod ClusterQueue") 1070 pWl1 := testing.MakeWorkload("p-wl-1", ns.Name).Queue(prodQueue.Name).Request(corev1.ResourceCPU, "1").Obj() 1071 pWl2 := testing.MakeWorkload("p-wl-2", ns.Name).Queue(prodQueue.Name).Request(corev1.ResourceCPU, "1").Obj() 1072 gomega.Expect(k8sClient.Create(ctx, pWl1)).To(gomega.Succeed()) 1073 gomega.Expect(k8sClient.Create(ctx, pWl2)).To(gomega.Succeed()) 1074 util.ExpectWorkloadsToHaveQuotaReservation(ctx, k8sClient, prodCQ.Name, pWl1, pWl2) 1075 1076 ginkgo.By("Creating a workload for each ClusterQueue") 1077 dWl1 := testing.MakeWorkload("d-wl-1", ns.Name).Queue(devQueue.Name).Request(corev1.ResourceCPU, "1").Obj() 1078 pWl3 := testing.MakeWorkload("p-wl-3", ns.Name).Queue(prodQueue.Name).Request(corev1.ResourceCPU, "2").Obj() 1079 gomega.Expect(k8sClient.Create(ctx, dWl1)).To(gomega.Succeed()) 1080 gomega.Expect(k8sClient.Create(ctx, pWl3)).To(gomega.Succeed()) 1081 util.ExpectWorkloadsToBePending(ctx, k8sClient, dWl1, pWl3) 1082 1083 ginkgo.By("Finishing one workload for prod ClusterQueue") 1084 util.FinishWorkloads(ctx, k8sClient, pWl1) 1085 util.ExpectWorkloadsToBePending(ctx, k8sClient, dWl1, pWl3) 1086 1087 ginkgo.By("Finishing second workload for prod ClusterQueue") 1088 util.FinishWorkloads(ctx, k8sClient, pWl2) 1089 // The pWl3 workload gets accepted, even though it was created after dWl1. 1090 util.ExpectWorkloadsToHaveQuotaReservation(ctx, k8sClient, prodCQ.Name, pWl3) 1091 util.ExpectWorkloadsToBePending(ctx, k8sClient, dWl1) 1092 1093 ginkgo.By("Finishing third workload for prod ClusterQueue") 1094 util.FinishWorkloads(ctx, k8sClient, pWl3) 1095 util.ExpectWorkloadsToHaveQuotaReservation(ctx, k8sClient, devCQ.Name, dWl1) 1096 }) 1097 1098 ginkgo.It("Should try next flavor if can't preempt on first", func() { 1099 prodCQ = testing.MakeClusterQueue("prod-cq"). 1100 QueueingStrategy(kueue.StrictFIFO). 1101 Cohort("all"). 1102 ResourceGroup( 1103 *testing.MakeFlavorQuotas("on-demand").Resource(corev1.ResourceCPU, "2").Obj(), 1104 *testing.MakeFlavorQuotas("spot-untainted").Resource(corev1.ResourceCPU, "2").Obj()). 1105 Preemption(kueue.ClusterQueuePreemption{ 1106 WithinClusterQueue: kueue.PreemptionPolicyLowerPriority, 1107 }). 1108 FlavorFungibility(kueue.FlavorFungibility{ 1109 WhenCanPreempt: kueue.Preempt, 1110 }). 1111 Obj() 1112 gomega.Expect(k8sClient.Create(ctx, prodCQ)).Should(gomega.Succeed()) 1113 1114 prodQueue := testing.MakeLocalQueue("prod-queue", ns.Name).ClusterQueue(prodCQ.Name).Obj() 1115 gomega.Expect(k8sClient.Create(ctx, prodQueue)).Should(gomega.Succeed()) 1116 1117 ginkgo.By("Creating 2 workloads and ensuring they are admitted") 1118 wl1 := testing.MakeWorkload("wl-1", ns.Name).Queue(prodQueue.Name).Request(corev1.ResourceCPU, "1").Obj() 1119 wl2 := testing.MakeWorkload("wl-2", ns.Name).Queue(prodQueue.Name).Request(corev1.ResourceCPU, "1").Obj() 1120 gomega.Expect(k8sClient.Create(ctx, wl1)).Should(gomega.Succeed()) 1121 gomega.Expect(k8sClient.Create(ctx, wl2)).Should(gomega.Succeed()) 1122 util.ExpectWorkloadToBeAdmittedAs(ctx, k8sClient, wl1, 1123 testing.MakeAdmission(prodCQ.Name).Assignment(corev1.ResourceCPU, "on-demand", "1").Obj()) 1124 util.ExpectWorkloadToBeAdmittedAs(ctx, k8sClient, wl2, 1125 testing.MakeAdmission(prodCQ.Name).Assignment(corev1.ResourceCPU, "on-demand", "1").Obj()) 1126 1127 ginkgo.By("Creating an additional workload that can't fit in the first flavor") 1128 wl3 := testing.MakeWorkload("wl-3", ns.Name).Queue(prodQueue.Name).Request(corev1.ResourceCPU, "1").Obj() 1129 gomega.Expect(k8sClient.Create(ctx, wl3)).Should(gomega.Succeed()) 1130 util.ExpectWorkloadToBeAdmittedAs(ctx, k8sClient, wl3, 1131 testing.MakeAdmission(prodCQ.Name).Assignment(corev1.ResourceCPU, "spot-untainted", "1").Obj()) 1132 util.ExpectPendingWorkloadsMetric(prodCQ, 0, 0) 1133 util.ExpectReservingActiveWorkloadsMetric(prodCQ, 3) 1134 util.ExpectAdmittedWorkloadsTotalMetric(prodCQ, 3) 1135 }) 1136 1137 ginkgo.It("Should try next flavor instead of borrowing", func() { 1138 prodCQ = testing.MakeClusterQueue("prod-cq"). 1139 Cohort("all"). 1140 ResourceGroup(*testing.MakeFlavorQuotas("on-demand").Resource(corev1.ResourceCPU, "10", "10").Obj()). 1141 Obj() 1142 gomega.Expect(k8sClient.Create(ctx, prodCQ)).Should(gomega.Succeed()) 1143 1144 devCQ = testing.MakeClusterQueue("dev-cq"). 1145 Cohort("all"). 1146 FlavorFungibility(kueue.FlavorFungibility{WhenCanBorrow: kueue.TryNextFlavor}). 1147 ResourceGroup( 1148 *testing.MakeFlavorQuotas("on-demand").Resource(corev1.ResourceCPU, "10", "10").Obj(), 1149 *testing.MakeFlavorQuotas("spot-tainted").Resource(corev1.ResourceCPU, "11").Obj()). 1150 Obj() 1151 gomega.Expect(k8sClient.Create(ctx, devCQ)).Should(gomega.Succeed()) 1152 1153 prodQueue := testing.MakeLocalQueue("prod-queue", ns.Name).ClusterQueue(prodCQ.Name).Obj() 1154 gomega.Expect(k8sClient.Create(ctx, prodQueue)).Should(gomega.Succeed()) 1155 1156 devQueue := testing.MakeLocalQueue("dev-queue", ns.Name).ClusterQueue(devCQ.Name).Obj() 1157 gomega.Expect(k8sClient.Create(ctx, devQueue)).Should(gomega.Succeed()) 1158 1159 ginkgo.By("Creating one workload") 1160 wl1 := testing.MakeWorkload("wl-1", ns.Name).Queue(prodQueue.Name).Request(corev1.ResourceCPU, "9").Obj() 1161 gomega.Expect(k8sClient.Create(ctx, wl1)).Should(gomega.Succeed()) 1162 prodWl1Admission := testing.MakeAdmission(prodCQ.Name).Assignment(corev1.ResourceCPU, "on-demand", "9").Obj() 1163 util.ExpectWorkloadToBeAdmittedAs(ctx, k8sClient, wl1, prodWl1Admission) 1164 util.ExpectPendingWorkloadsMetric(prodCQ, 0, 0) 1165 util.ExpectReservingActiveWorkloadsMetric(prodCQ, 1) 1166 util.ExpectAdmittedWorkloadsTotalMetric(prodCQ, 1) 1167 1168 ginkgo.By("Creating another workload") 1169 wl2 := testing.MakeWorkload("wl-2", ns.Name).Queue(devQueue.Name).Request(corev1.ResourceCPU, "11").Toleration(spotToleration).Obj() 1170 gomega.Expect(k8sClient.Create(ctx, wl2)).Should(gomega.Succeed()) 1171 prodWl2Admission := testing.MakeAdmission(devCQ.Name).Assignment(corev1.ResourceCPU, "spot-tainted", "11").Obj() 1172 util.ExpectWorkloadToBeAdmittedAs(ctx, k8sClient, wl2, prodWl2Admission) 1173 util.ExpectPendingWorkloadsMetric(devCQ, 0, 0) 1174 util.ExpectReservingActiveWorkloadsMetric(devCQ, 1) 1175 util.ExpectAdmittedWorkloadsTotalMetric(devCQ, 1) 1176 }) 1177 1178 ginkgo.It("Should preempt before try next flavor", func() { 1179 prodCQ = testing.MakeClusterQueue("prod-cq"). 1180 Cohort("all"). 1181 ResourceGroup( 1182 *testing.MakeFlavorQuotas("on-demand").Resource(corev1.ResourceCPU, "10", "10").Obj(), 1183 *testing.MakeFlavorQuotas("spot-untainted").Resource(corev1.ResourceCPU, "11").Obj()). 1184 Preemption(kueue.ClusterQueuePreemption{ 1185 ReclaimWithinCohort: kueue.PreemptionPolicyAny, 1186 }). 1187 FlavorFungibility(kueue.FlavorFungibility{WhenCanPreempt: kueue.Preempt}). 1188 Obj() 1189 gomega.Expect(k8sClient.Create(ctx, prodCQ)).Should(gomega.Succeed()) 1190 1191 devCQ = testing.MakeClusterQueue("dev-cq"). 1192 Cohort("all"). 1193 FlavorFungibility(kueue.FlavorFungibility{WhenCanBorrow: kueue.TryNextFlavor}). 1194 ResourceGroup(*testing.MakeFlavorQuotas("on-demand").Resource(corev1.ResourceCPU, "10", "10").Obj()). 1195 Obj() 1196 gomega.Expect(k8sClient.Create(ctx, devCQ)).Should(gomega.Succeed()) 1197 1198 prodQueue := testing.MakeLocalQueue("prod-queue", ns.Name).ClusterQueue(prodCQ.Name).Obj() 1199 gomega.Expect(k8sClient.Create(ctx, prodQueue)).Should(gomega.Succeed()) 1200 1201 devQueue := testing.MakeLocalQueue("dev-queue", ns.Name).ClusterQueue(devCQ.Name).Obj() 1202 gomega.Expect(k8sClient.Create(ctx, devQueue)).Should(gomega.Succeed()) 1203 1204 ginkgo.By("Creating two workloads") 1205 wl1 := testing.MakeWorkload("wl-1", ns.Name).Priority(0).Queue(devQueue.Name).Request(corev1.ResourceCPU, "9").Obj() 1206 wl2 := testing.MakeWorkload("wl-2", ns.Name).Priority(1).Queue(devQueue.Name).Request(corev1.ResourceCPU, "9").Obj() 1207 gomega.Expect(k8sClient.Create(ctx, wl1)).Should(gomega.Succeed()) 1208 gomega.Expect(k8sClient.Create(ctx, wl2)).Should(gomega.Succeed()) 1209 util.ExpectWorkloadsToBeAdmitted(ctx, k8sClient, wl1, wl2) 1210 1211 ginkgo.By("Creating another workload") 1212 wl3 := testing.MakeWorkload("wl-3", ns.Name).Queue(prodQueue.Name).Request(corev1.ResourceCPU, "5").Obj() 1213 gomega.Expect(k8sClient.Create(ctx, wl3)).Should(gomega.Succeed()) 1214 util.ExpectWorkloadsToBePreempted(ctx, k8sClient, wl1) 1215 1216 util.FinishEvictionForWorkloads(ctx, k8sClient, wl1) 1217 1218 util.ExpectWorkloadToBeAdmittedAs(ctx, k8sClient, wl3, 1219 testing.MakeAdmission(prodCQ.Name).Assignment(corev1.ResourceCPU, "on-demand", "5").Obj()) 1220 }) 1221 }) 1222 1223 ginkgo.When("Using cohorts for sharing when LendingLimit enabled", func() { 1224 var ( 1225 prodCQ *kueue.ClusterQueue 1226 devCQ *kueue.ClusterQueue 1227 ) 1228 1229 ginkgo.BeforeEach(func() { 1230 _ = features.SetEnable(features.LendingLimit, true) 1231 gomega.Expect(k8sClient.Create(ctx, onDemandFlavor)).Should(gomega.Succeed()) 1232 }) 1233 1234 ginkgo.AfterEach(func() { 1235 _ = features.SetEnable(features.LendingLimit, false) 1236 gomega.Expect(util.DeleteNamespace(ctx, k8sClient, ns)).To(gomega.Succeed()) 1237 util.ExpectClusterQueueToBeDeleted(ctx, k8sClient, prodCQ, true) 1238 if devCQ != nil { 1239 util.ExpectClusterQueueToBeDeleted(ctx, k8sClient, devCQ, true) 1240 } 1241 util.ExpectResourceFlavorToBeDeleted(ctx, k8sClient, onDemandFlavor, true) 1242 }) 1243 1244 ginkgo.It("Should admit workloads using borrowed ClusterQueue", func() { 1245 prodCQ = testing.MakeClusterQueue("prod-cq"). 1246 Cohort("all"). 1247 ResourceGroup( 1248 *testing.MakeFlavorQuotas("on-demand").Resource(corev1.ResourceCPU, "5", "", "1").Obj(), 1249 ).Obj() 1250 gomega.Expect(k8sClient.Create(ctx, prodCQ)).Should(gomega.Succeed()) 1251 1252 queue := testing.MakeLocalQueue("queue", ns.Name).ClusterQueue(prodCQ.Name).Obj() 1253 gomega.Expect(k8sClient.Create(ctx, queue)).Should(gomega.Succeed()) 1254 1255 ginkgo.By("checking a no-fit workload does not get admitted") 1256 wl := testing.MakeWorkload("wl", ns.Name).Queue(queue.Name). 1257 Request(corev1.ResourceCPU, "9").Obj() 1258 gomega.Expect(k8sClient.Create(ctx, wl)).Should(gomega.Succeed()) 1259 util.ExpectWorkloadsToBePending(ctx, k8sClient, wl) 1260 util.ExpectPendingWorkloadsMetric(prodCQ, 0, 1) 1261 util.ExpectReservingActiveWorkloadsMetric(prodCQ, 0) 1262 util.ExpectAdmittedWorkloadsTotalMetric(prodCQ, 0) 1263 1264 ginkgo.By("checking the workload gets admitted when another ClusterQueue gets added") 1265 devCQ := testing.MakeClusterQueue("dev-cq"). 1266 Cohort(prodCQ.Spec.Cohort). 1267 ResourceGroup( 1268 *testing.MakeFlavorQuotas("on-demand").Resource(corev1.ResourceCPU, "5", "", "4").Obj(), 1269 ).Obj() 1270 gomega.Expect(k8sClient.Create(ctx, devCQ)).Should(gomega.Succeed()) 1271 defer func() { 1272 gomega.Expect(util.DeleteClusterQueue(ctx, k8sClient, devCQ)).ToNot(gomega.HaveOccurred()) 1273 }() 1274 1275 expectAdmission := testing.MakeAdmission(prodCQ.Name).Assignment(corev1.ResourceCPU, "on-demand", "9").Obj() 1276 util.ExpectWorkloadToBeAdmittedAs(ctx, k8sClient, wl, expectAdmission) 1277 util.ExpectPendingWorkloadsMetric(prodCQ, 0, 0) 1278 util.ExpectReservingActiveWorkloadsMetric(prodCQ, 1) 1279 util.ExpectAdmittedWorkloadsTotalMetric(prodCQ, 1) 1280 }) 1281 1282 ginkgo.It("Should admit workloads after updating lending limit", func() { 1283 prodCQ = testing.MakeClusterQueue("prod-cq"). 1284 Cohort("all"). 1285 ResourceGroup(*testing.MakeFlavorQuotas("on-demand").Resource(corev1.ResourceCPU, "5", "", "0").Obj()). 1286 Obj() 1287 gomega.Expect(k8sClient.Create(ctx, prodCQ)).Should(gomega.Succeed()) 1288 1289 devCQ = testing.MakeClusterQueue("dev-cq"). 1290 Cohort("all"). 1291 ResourceGroup(*testing.MakeFlavorQuotas("on-demand").Resource(corev1.ResourceCPU, "5", "", "0").Obj()). 1292 Obj() 1293 gomega.Expect(k8sClient.Create(ctx, devCQ)).Should(gomega.Succeed()) 1294 1295 prodQueue := testing.MakeLocalQueue("prod-queue", ns.Name).ClusterQueue(prodCQ.Name).Obj() 1296 gomega.Expect(k8sClient.Create(ctx, prodQueue)).Should(gomega.Succeed()) 1297 1298 devQueue := testing.MakeLocalQueue("dev-queue", ns.Name).ClusterQueue(devCQ.Name).Obj() 1299 gomega.Expect(k8sClient.Create(ctx, devQueue)).Should(gomega.Succeed()) 1300 1301 wl1 := testing.MakeWorkload("wl-1", ns.Name).Queue(prodQueue.Name).Request(corev1.ResourceCPU, "5").Obj() 1302 wl2 := testing.MakeWorkload("wl-2", ns.Name).Queue(prodQueue.Name).Request(corev1.ResourceCPU, "5").Obj() 1303 1304 ginkgo.By("Creating two workloads") 1305 gomega.Expect(k8sClient.Create(ctx, wl1)).Should(gomega.Succeed()) 1306 gomega.Expect(k8sClient.Create(ctx, wl2)).Should(gomega.Succeed()) 1307 util.ExpectWorkloadsToBePending(ctx, k8sClient, wl2) 1308 util.ExpectWorkloadsToHaveQuotaReservation(ctx, k8sClient, prodCQ.Name, wl1) 1309 util.ExpectPendingWorkloadsMetric(prodCQ, 0, 1) 1310 util.ExpectPendingWorkloadsMetric(devCQ, 0, 0) 1311 util.ExpectReservingActiveWorkloadsMetric(prodCQ, 1) 1312 util.ExpectReservingActiveWorkloadsMetric(devCQ, 0) 1313 util.ExpectAdmittedWorkloadsTotalMetric(prodCQ, 1) 1314 util.ExpectAdmittedWorkloadsTotalMetric(devCQ, 0) 1315 1316 // Update lending limit of cluster queue 1317 gomega.Eventually(func() error { 1318 err := k8sClient.Get(ctx, types.NamespacedName{Name: devCQ.Name}, devCQ) 1319 if err != nil { 1320 return err 1321 } 1322 devCQ.Spec.ResourceGroups[0].Flavors[0].Resources[0] = kueue.ResourceQuota{ 1323 Name: corev1.ResourceCPU, 1324 NominalQuota: resource.MustParse("5"), 1325 LendingLimit: ptr.To(resource.MustParse("5")), 1326 } 1327 return k8sClient.Update(ctx, devCQ) 1328 }, util.Timeout, util.Interval).Should(gomega.Succeed()) 1329 1330 util.ExpectWorkloadsToHaveQuotaReservation(ctx, k8sClient, prodCQ.Name, wl2) 1331 util.ExpectPendingWorkloadsMetric(prodCQ, 0, 0) 1332 util.ExpectPendingWorkloadsMetric(devCQ, 0, 0) 1333 util.ExpectReservingActiveWorkloadsMetric(prodCQ, 2) 1334 util.ExpectReservingActiveWorkloadsMetric(devCQ, 0) 1335 util.ExpectAdmittedWorkloadsTotalMetric(prodCQ, 2) 1336 util.ExpectAdmittedWorkloadsTotalMetric(devCQ, 0) 1337 }) 1338 }) 1339 1340 ginkgo.When("Queueing with StrictFIFO", func() { 1341 var ( 1342 strictFIFOClusterQ *kueue.ClusterQueue 1343 matchingNS *corev1.Namespace 1344 chName string 1345 ) 1346 1347 ginkgo.BeforeEach(func() { 1348 chName = "cohort" 1349 gomega.Expect(k8sClient.Create(ctx, onDemandFlavor)).Should(gomega.Succeed()) 1350 strictFIFOClusterQ = testing.MakeClusterQueue("strict-fifo-cq"). 1351 QueueingStrategy(kueue.StrictFIFO). 1352 NamespaceSelector(&metav1.LabelSelector{ 1353 MatchExpressions: []metav1.LabelSelectorRequirement{ 1354 { 1355 Key: "dep", 1356 Operator: metav1.LabelSelectorOpIn, 1357 Values: []string{"eng"}, 1358 }, 1359 }, 1360 }). 1361 ResourceGroup(*testing.MakeFlavorQuotas("on-demand").Resource(corev1.ResourceCPU, "5", "0").Obj()). 1362 Cohort(chName). 1363 Obj() 1364 gomega.Expect(k8sClient.Create(ctx, strictFIFOClusterQ)).Should(gomega.Succeed()) 1365 matchingNS = &corev1.Namespace{ 1366 ObjectMeta: metav1.ObjectMeta{ 1367 GenerateName: "foo-", 1368 Labels: map[string]string{"dep": "eng"}, 1369 }, 1370 } 1371 gomega.Expect(k8sClient.Create(ctx, matchingNS)).To(gomega.Succeed()) 1372 }) 1373 1374 ginkgo.AfterEach(func() { 1375 gomega.Expect(util.DeleteNamespace(ctx, k8sClient, matchingNS)).To(gomega.Succeed()) 1376 gomega.Expect(util.DeleteNamespace(ctx, k8sClient, ns)).To(gomega.Succeed()) 1377 util.ExpectClusterQueueToBeDeleted(ctx, k8sClient, strictFIFOClusterQ, true) 1378 util.ExpectResourceFlavorToBeDeleted(ctx, k8sClient, onDemandFlavor, true) 1379 }) 1380 1381 ginkgo.It("Should schedule workloads by their priority strictly", func() { 1382 strictFIFOQueue := testing.MakeLocalQueue("strict-fifo-q", matchingNS.Name).ClusterQueue(strictFIFOClusterQ.Name).Obj() 1383 1384 ginkgo.By("Creating workloads") 1385 wl1 := testing.MakeWorkload("wl1", matchingNS.Name).Queue(strictFIFOQueue. 1386 Name).Request(corev1.ResourceCPU, "2").Priority(100).Obj() 1387 gomega.Expect(k8sClient.Create(ctx, wl1)).Should(gomega.Succeed()) 1388 wl2 := testing.MakeWorkload("wl2", matchingNS.Name).Queue(strictFIFOQueue. 1389 Name).Request(corev1.ResourceCPU, "5").Priority(10).Obj() 1390 gomega.Expect(k8sClient.Create(ctx, wl2)).Should(gomega.Succeed()) 1391 // wl3 can't be scheduled before wl2 even though there is enough quota. 1392 wl3 := testing.MakeWorkload("wl3", matchingNS.Name).Queue(strictFIFOQueue. 1393 Name).Request(corev1.ResourceCPU, "1").Priority(1).Obj() 1394 gomega.Expect(k8sClient.Create(ctx, wl3)).Should(gomega.Succeed()) 1395 1396 gomega.Expect(k8sClient.Create(ctx, strictFIFOQueue)).Should(gomega.Succeed()) 1397 1398 util.ExpectWorkloadsToHaveQuotaReservation(ctx, k8sClient, strictFIFOClusterQ.Name, wl1) 1399 util.ExpectWorkloadsToBePending(ctx, k8sClient, wl2) 1400 // wl3 doesn't even get a scheduling attempt, so can't check for conditions. 1401 gomega.Consistently(func() bool { 1402 lookupKey := types.NamespacedName{Name: wl3.Name, Namespace: wl3.Namespace} 1403 gomega.Expect(k8sClient.Get(ctx, lookupKey, wl3)).Should(gomega.Succeed()) 1404 return !workload.HasQuotaReservation(wl3) 1405 }, util.ConsistentDuration, util.Interval).Should(gomega.Equal(true)) 1406 util.ExpectPendingWorkloadsMetric(strictFIFOClusterQ, 2, 0) 1407 util.ExpectReservingActiveWorkloadsMetric(strictFIFOClusterQ, 1) 1408 util.ExpectAdmittedWorkloadsTotalMetric(strictFIFOClusterQ, 1) 1409 }) 1410 1411 ginkgo.It("Workloads not matching namespaceSelector should not block others", func() { 1412 notMatchingQueue := testing.MakeLocalQueue("not-matching-queue", ns.Name).ClusterQueue(strictFIFOClusterQ.Name).Obj() 1413 gomega.Expect(k8sClient.Create(ctx, notMatchingQueue)).Should(gomega.Succeed()) 1414 1415 matchingQueue := testing.MakeLocalQueue("matching-queue", matchingNS.Name).ClusterQueue(strictFIFOClusterQ.Name).Obj() 1416 gomega.Expect(k8sClient.Create(ctx, matchingQueue)).Should(gomega.Succeed()) 1417 1418 ginkgo.By("Creating workloads") 1419 wl1 := testing.MakeWorkload("wl1", matchingNS.Name).Queue(matchingQueue. 1420 Name).Request(corev1.ResourceCPU, "2").Priority(100).Obj() 1421 gomega.Expect(k8sClient.Create(ctx, wl1)).Should(gomega.Succeed()) 1422 wl2 := testing.MakeWorkload("wl2", ns.Name).Queue(notMatchingQueue. 1423 Name).Request(corev1.ResourceCPU, "5").Priority(10).Obj() 1424 gomega.Expect(k8sClient.Create(ctx, wl2)).Should(gomega.Succeed()) 1425 // wl2 can't block wl3 from getting scheduled. 1426 wl3 := testing.MakeWorkload("wl3", matchingNS.Name).Queue(matchingQueue. 1427 Name).Request(corev1.ResourceCPU, "1").Priority(1).Obj() 1428 gomega.Expect(k8sClient.Create(ctx, wl3)).Should(gomega.Succeed()) 1429 1430 util.ExpectWorkloadsToHaveQuotaReservation(ctx, k8sClient, strictFIFOClusterQ.Name, wl1, wl3) 1431 util.ExpectWorkloadsToBePending(ctx, k8sClient, wl2) 1432 util.ExpectPendingWorkloadsMetric(strictFIFOClusterQ, 0, 1) 1433 }) 1434 1435 ginkgo.It("Pending workload with StrictFIFO doesn't block other CQ from borrowing from a third CQ", func() { 1436 ginkgo.By("Creating ClusterQueues and LocalQueues") 1437 cqTeamA := testing.MakeClusterQueue("team-a"). 1438 ResourceGroup(*testing.MakeFlavorQuotas(onDemandFlavor.Name).Resource(corev1.ResourceCPU, "5").Obj()). 1439 Cohort(chName). 1440 Obj() 1441 gomega.Expect(k8sClient.Create(ctx, cqTeamA)).Should(gomega.Succeed()) 1442 defer func() { 1443 gomega.Expect(util.DeleteNamespace(ctx, k8sClient, matchingNS)).To(gomega.Succeed()) 1444 util.ExpectClusterQueueToBeDeleted(ctx, k8sClient, cqTeamA, true) 1445 }() 1446 1447 strictFIFOLocalQueue := testing.MakeLocalQueue("strict-fifo-q", matchingNS.Name).ClusterQueue(strictFIFOClusterQ.Name).Obj() 1448 lqTeamA := testing.MakeLocalQueue("team-a-lq", matchingNS.Name).ClusterQueue(cqTeamA.Name).Obj() 1449 1450 gomega.Expect(k8sClient.Create(ctx, strictFIFOLocalQueue)).Should(gomega.Succeed()) 1451 gomega.Expect(k8sClient.Create(ctx, lqTeamA)).Should(gomega.Succeed()) 1452 1453 cqSharedResources := testing.MakeClusterQueue("shared-resources"). 1454 ResourceGroup( 1455 *testing.MakeFlavorQuotas(onDemandFlavor.Name).Resource(corev1.ResourceCPU, "5").Obj()). 1456 Cohort(chName). 1457 Obj() 1458 gomega.Expect(k8sClient.Create(ctx, cqSharedResources)).Should(gomega.Succeed()) 1459 defer func() { 1460 gomega.Expect(util.DeleteNamespace(ctx, k8sClient, matchingNS)).To(gomega.Succeed()) 1461 util.ExpectClusterQueueToBeDeleted(ctx, k8sClient, cqSharedResources, true) 1462 }() 1463 1464 ginkgo.By("Creating workloads") 1465 admittedWl1 := testing.MakeWorkload("wl", matchingNS.Name).Queue(strictFIFOLocalQueue. 1466 Name).Request(corev1.ResourceCPU, "3").Priority(10).Obj() 1467 gomega.Expect(k8sClient.Create(ctx, admittedWl1)).Should(gomega.Succeed()) 1468 1469 admittedWl2 := testing.MakeWorkload("player1-a", matchingNS.Name).Queue(lqTeamA. 1470 Name).Request(corev1.ResourceCPU, "5").Priority(1).Obj() 1471 gomega.Expect(k8sClient.Create(ctx, admittedWl2)).Should(gomega.Succeed()) 1472 1473 util.ExpectWorkloadsToBeAdmitted(ctx, k8sClient, admittedWl1, admittedWl2) 1474 gomega.Eventually(func() error { 1475 var cq kueue.ClusterQueue 1476 gomega.Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(cqTeamA), &cq)).To(gomega.Succeed()) 1477 cq.Spec.StopPolicy = ptr.To(kueue.Hold) 1478 return k8sClient.Update(ctx, &cq) 1479 }, util.Timeout, util.Interval).Should(gomega.Succeed()) 1480 1481 // pendingWl exceed nominal+borrowing quota and cannot preempt due to low priority. 1482 pendingWl := testing.MakeWorkload("pending-wl", matchingNS.Name).Queue(strictFIFOLocalQueue. 1483 Name).Request(corev1.ResourceCPU, "3").Priority(9).Obj() 1484 gomega.Expect(k8sClient.Create(ctx, pendingWl)).Should(gomega.Succeed()) 1485 1486 // borrowingWL can borrow shared resources, so it should be scheduled even if workloads 1487 // in other cluster queues are waiting to reclaim nominal resources. 1488 borrowingWl := testing.MakeWorkload("player2-a", matchingNS.Name).Queue(lqTeamA. 1489 Name).Request(corev1.ResourceCPU, "5").Priority(11).Obj() 1490 gomega.Expect(k8sClient.Create(ctx, borrowingWl)).Should(gomega.Succeed()) 1491 1492 // blockedWL wants to borrow resources from strictFIFO CQ, but should be blocked 1493 // from borrowing because there is a pending workload in strictFIFO CQ. 1494 blockedWl := testing.MakeWorkload("player3-a", matchingNS.Name).Queue(lqTeamA. 1495 Name).Request(corev1.ResourceCPU, "1").Priority(10).Obj() 1496 gomega.Expect(k8sClient.Create(ctx, blockedWl)).Should(gomega.Succeed()) 1497 1498 gomega.Eventually(func() error { 1499 var cq kueue.ClusterQueue 1500 gomega.Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(cqTeamA), &cq)).To(gomega.Succeed()) 1501 cq.Spec.StopPolicy = ptr.To(kueue.None) 1502 return k8sClient.Update(ctx, &cq) 1503 }, util.Timeout, util.Interval).Should(gomega.Succeed()) 1504 1505 util.ExpectWorkloadsToBePending(ctx, k8sClient, pendingWl, blockedWl) 1506 util.ExpectWorkloadsToBeAdmitted(ctx, k8sClient, borrowingWl) 1507 }) 1508 }) 1509 1510 ginkgo.When("Deleting clusterQueues", func() { 1511 var ( 1512 cq *kueue.ClusterQueue 1513 queue *kueue.LocalQueue 1514 ) 1515 1516 ginkgo.AfterEach(func() { 1517 gomega.Expect(util.DeleteNamespace(ctx, k8sClient, ns)).To(gomega.Succeed()) 1518 util.ExpectClusterQueueToBeDeleted(ctx, k8sClient, cq, false) 1519 }) 1520 1521 ginkgo.It("Should not admit new created workloads", func() { 1522 ginkgo.By("Create clusterQueue") 1523 cq = testing.MakeClusterQueue("cluster-queue").Obj() 1524 gomega.Expect(k8sClient.Create(ctx, cq)).Should(gomega.Succeed()) 1525 queue = testing.MakeLocalQueue("queue", ns.Name).ClusterQueue(cq.Name).Obj() 1526 gomega.Expect(k8sClient.Create(ctx, queue)).Should(gomega.Succeed()) 1527 1528 ginkgo.By("New created workloads should be admitted") 1529 wl1 := testing.MakeWorkload("workload1", ns.Name).Queue(queue.Name).Obj() 1530 gomega.Expect(k8sClient.Create(ctx, wl1)).Should(gomega.Succeed()) 1531 defer func() { 1532 gomega.Expect(util.DeleteWorkload(ctx, k8sClient, wl1)).To(gomega.Succeed()) 1533 }() 1534 util.ExpectWorkloadsToHaveQuotaReservation(ctx, k8sClient, cq.Name, wl1) 1535 util.ExpectReservingActiveWorkloadsMetric(cq, 1) 1536 util.ExpectAdmittedWorkloadsTotalMetric(cq, 1) 1537 1538 ginkgo.By("Delete clusterQueue") 1539 gomega.Expect(util.DeleteClusterQueue(ctx, k8sClient, cq)).To(gomega.Succeed()) 1540 gomega.Consistently(func() []string { 1541 var newCQ kueue.ClusterQueue 1542 gomega.Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(cq), &newCQ)).To(gomega.Succeed()) 1543 return newCQ.GetFinalizers() 1544 }, util.ConsistentDuration, util.Interval).Should(gomega.Equal([]string{kueue.ResourceInUseFinalizerName})) 1545 1546 ginkgo.By("New created workloads should be frozen") 1547 wl2 := testing.MakeWorkload("workload2", ns.Name).Queue(queue.Name).Obj() 1548 gomega.Expect(k8sClient.Create(ctx, wl2)).Should(gomega.Succeed()) 1549 defer func() { 1550 gomega.Expect(util.DeleteWorkload(ctx, k8sClient, wl2)).To(gomega.Succeed()) 1551 }() 1552 util.ExpectWorkloadsToBeFrozen(ctx, k8sClient, cq.Name, wl2) 1553 util.ExpectReservingActiveWorkloadsMetric(cq, 1) 1554 util.ExpectAdmittedWorkloadsTotalMetric(cq, 1) 1555 util.ExpectPendingWorkloadsMetric(cq, 0, 1) 1556 }) 1557 }) 1558 1559 ginkgo.When("The workload's podSet resource requests are not valid", func() { 1560 var ( 1561 cq *kueue.ClusterQueue 1562 queue *kueue.LocalQueue 1563 ) 1564 1565 ginkgo.BeforeEach(func() { 1566 gomega.Expect(k8sClient.Create(ctx, onDemandFlavor)).To(gomega.Succeed()) 1567 cq = testing.MakeClusterQueue("cluster-queue"). 1568 ResourceGroup( 1569 *testing.MakeFlavorQuotas("on-demand").Resource(corev1.ResourceCPU, "5").Obj(), 1570 ). 1571 Obj() 1572 gomega.Expect(k8sClient.Create(ctx, cq)).Should(gomega.Succeed()) 1573 queue = testing.MakeLocalQueue("queue", ns.Name).ClusterQueue(cq.Name).Obj() 1574 gomega.Expect(k8sClient.Create(ctx, queue)).Should(gomega.Succeed()) 1575 }) 1576 1577 ginkgo.AfterEach(func() { 1578 gomega.Expect(util.DeleteNamespace(ctx, k8sClient, ns)).To(gomega.Succeed()) 1579 util.ExpectClusterQueueToBeDeleted(ctx, k8sClient, cq, true) 1580 util.ExpectResourceFlavorToBeDeleted(ctx, k8sClient, onDemandFlavor, true) 1581 }) 1582 1583 type testParams struct { 1584 reqCPU string 1585 limitCPU string 1586 minCPU string 1587 maxCPU string 1588 limitType corev1.LimitType 1589 wantedStatus string 1590 shouldBeAdmited bool 1591 } 1592 1593 ginkgo.DescribeTable("", func(tp testParams) { 1594 lrBuilder := testing.MakeLimitRange("limit", ns.Name) 1595 if tp.limitType != "" { 1596 lrBuilder.WithType(tp.limitType) 1597 } 1598 if tp.maxCPU != "" { 1599 lrBuilder.WithValue("Max", corev1.ResourceCPU, tp.maxCPU) 1600 } 1601 if tp.minCPU != "" { 1602 lrBuilder.WithValue("Min", corev1.ResourceCPU, tp.minCPU) 1603 } 1604 lr := lrBuilder.Obj() 1605 gomega.Expect(k8sClient.Create(ctx, lr)).To(gomega.Succeed()) 1606 1607 wlBuilder := testing.MakeWorkload("workload", ns.Name).Queue(queue.Name) 1608 1609 if tp.reqCPU != "" { 1610 wlBuilder.Request(corev1.ResourceCPU, tp.reqCPU) 1611 } 1612 if tp.limitCPU != "" { 1613 wlBuilder.Limit(corev1.ResourceCPU, tp.limitCPU) 1614 } 1615 1616 wl := wlBuilder.Obj() 1617 gomega.Expect(k8sClient.Create(ctx, wl)).To(gomega.Succeed()) 1618 1619 if tp.shouldBeAdmited { 1620 util.ExpectWorkloadsToHaveQuotaReservation(ctx, k8sClient, cq.Name, wl) 1621 } else { 1622 gomega.Eventually(func() string { 1623 rwl := kueue.Workload{} 1624 if err := k8sClient.Get(ctx, client.ObjectKeyFromObject(wl), &rwl); err != nil { 1625 return "" 1626 } 1627 1628 cond := meta.FindStatusCondition(rwl.Status.Conditions, kueue.WorkloadQuotaReserved) 1629 if cond == nil { 1630 return "" 1631 } 1632 return cond.Message 1633 }, util.Timeout, util.Interval).Should(gomega.ContainSubstring(tp.wantedStatus)) 1634 } 1635 gomega.Expect(util.DeleteWorkload(ctx, k8sClient, wl)).To(gomega.Succeed()) 1636 gomega.Expect(k8sClient.Delete(ctx, lr)).To(gomega.Succeed()) 1637 }, 1638 ginkgo.Entry("request more that limits", testParams{reqCPU: "3", limitCPU: "2", wantedStatus: "resource validation failed:"}), 1639 ginkgo.Entry("request over container limits", testParams{reqCPU: "2", limitCPU: "3", maxCPU: "1", wantedStatus: "didn't satisfy LimitRange constraints:"}), 1640 ginkgo.Entry("request under container limits", testParams{reqCPU: "2", limitCPU: "3", minCPU: "3", wantedStatus: "didn't satisfy LimitRange constraints:"}), 1641 ginkgo.Entry("request over pod limits", testParams{reqCPU: "2", limitCPU: "3", maxCPU: "1", limitType: corev1.LimitTypePod, wantedStatus: "didn't satisfy LimitRange constraints:"}), 1642 ginkgo.Entry("request under pod limits", testParams{reqCPU: "2", limitCPU: "3", minCPU: "3", limitType: corev1.LimitTypePod, wantedStatus: "didn't satisfy LimitRange constraints:"}), 1643 ginkgo.Entry("valid", testParams{reqCPU: "2", limitCPU: "3", minCPU: "1", maxCPU: "4", shouldBeAdmited: true}), 1644 ) 1645 }) 1646 1647 ginkgo.When("Using clusterQueue stop policy", func() { 1648 var ( 1649 cq *kueue.ClusterQueue 1650 queue *kueue.LocalQueue 1651 ) 1652 1653 ginkgo.BeforeEach(func() { 1654 gomega.Expect(k8sClient.Create(ctx, onDemandFlavor)).Should(gomega.Succeed()) 1655 cq = testing.MakeClusterQueue("cluster-queue"). 1656 ResourceGroup( 1657 *testing.MakeFlavorQuotas("on-demand"). 1658 Resource(corev1.ResourceCPU, "5", "5").Obj(), 1659 ). 1660 Cohort("cohort"). 1661 Obj() 1662 gomega.Expect(k8sClient.Create(ctx, cq)).To(gomega.Succeed()) 1663 queue = testing.MakeLocalQueue("queue", ns.Name).ClusterQueue(cq.Name).Obj() 1664 gomega.Expect(k8sClient.Create(ctx, queue)).To(gomega.Succeed()) 1665 }) 1666 1667 ginkgo.AfterEach(func() { 1668 util.ExpectClusterQueueToBeDeleted(ctx, k8sClient, cq, true) 1669 util.ExpectResourceFlavorToBeDeleted(ctx, k8sClient, onDemandFlavor, true) 1670 }) 1671 1672 ginkgo.It("Should evict workloads when stop policy is drain", func() { 1673 ginkgo.By("Creating first workload") 1674 wl1 := testing.MakeWorkload("one", ns.Name).Queue(queue.Name).Obj() 1675 gomega.Expect(k8sClient.Create(ctx, wl1)).To(gomega.Succeed()) 1676 util.ExpectWorkloadsToBeAdmitted(ctx, k8sClient, wl1) 1677 util.ExpectPendingWorkloadsMetric(cq, 0, 0) 1678 util.ExpectReservingActiveWorkloadsMetric(cq, 1) 1679 1680 ginkgo.By("Stopping the ClusterQueue") 1681 var clusterQueue kueue.ClusterQueue 1682 gomega.Eventually(func() error { 1683 gomega.Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(cq), &clusterQueue)).To(gomega.Succeed()) 1684 clusterQueue.Spec.StopPolicy = ptr.To(kueue.HoldAndDrain) 1685 return k8sClient.Update(ctx, &clusterQueue) 1686 }, util.Timeout, util.Interval).Should(gomega.Succeed()) 1687 1688 util.ExpectClusterQueueStatusMetric(cq, metrics.CQStatusPending) 1689 1690 ginkgo.By("Checking the condition of workload is evicted", func() { 1691 createdWl := kueue.Workload{} 1692 gomega.Eventually(func() *metav1.Condition { 1693 gomega.Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(wl1), &createdWl)).To(gomega.Succeed()) 1694 return meta.FindStatusCondition(createdWl.Status.Conditions, kueue.WorkloadEvicted) 1695 }, util.Timeout, util.Interval).Should(gomega.BeComparableTo(&metav1.Condition{ 1696 Type: kueue.WorkloadEvicted, 1697 Status: metav1.ConditionTrue, 1698 Reason: kueue.WorkloadEvictedByClusterQueueStopped, 1699 Message: "The ClusterQueue is stopped", 1700 }, cmpopts.IgnoreFields(metav1.Condition{}, "LastTransitionTime"))) 1701 }) 1702 1703 util.FinishEvictionForWorkloads(ctx, k8sClient, wl1) 1704 1705 ginkgo.By("Creating another workload") 1706 wl2 := testing.MakeWorkload("two", ns.Name).Queue(queue.Name).Obj() 1707 gomega.Expect(k8sClient.Create(ctx, wl2)).To(gomega.Succeed()) 1708 1709 util.ExpectPendingWorkloadsMetric(cq, 0, 2) 1710 1711 ginkgo.By("Restart the ClusterQueue by removing its stopPolicy") 1712 gomega.Eventually(func() error { 1713 gomega.Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(cq), &clusterQueue)).To(gomega.Succeed()) 1714 clusterQueue.Spec.StopPolicy = nil 1715 return k8sClient.Update(ctx, &clusterQueue) 1716 }, util.Timeout, util.Interval).Should(gomega.Succeed()) 1717 1718 util.ExpectClusterQueueStatusMetric(cq, metrics.CQStatusActive) 1719 1720 util.ExpectWorkloadsToBeAdmitted(ctx, k8sClient, wl1, wl2) 1721 util.ExpectPendingWorkloadsMetric(cq, 0, 0) 1722 util.ExpectReservingActiveWorkloadsMetric(cq, 2) 1723 util.FinishWorkloads(ctx, k8sClient, wl1, wl2) 1724 }) 1725 }) 1726 })