sigs.k8s.io/kueue@v0.6.2/test/integration/controller/jobs/pod/pod_controller_test.go (about) 1 /* 2 Copyright 2023 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package pod 18 19 import ( 20 "fmt" 21 "strconv" 22 "time" 23 24 "github.com/google/go-cmp/cmp" 25 "github.com/google/go-cmp/cmp/cmpopts" 26 "github.com/onsi/ginkgo/v2" 27 "github.com/onsi/gomega" 28 batchv1 "k8s.io/api/batch/v1" 29 corev1 "k8s.io/api/core/v1" 30 apierrors "k8s.io/apimachinery/pkg/api/errors" 31 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 32 "k8s.io/apimachinery/pkg/types" 33 "k8s.io/utils/ptr" 34 "sigs.k8s.io/controller-runtime/pkg/client" 35 "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" 36 37 configapi "sigs.k8s.io/kueue/apis/config/v1beta1" 38 kueue "sigs.k8s.io/kueue/apis/kueue/v1beta1" 39 "sigs.k8s.io/kueue/pkg/controller/jobframework" 40 podcontroller "sigs.k8s.io/kueue/pkg/controller/jobs/pod" 41 "sigs.k8s.io/kueue/pkg/util/testing" 42 testingpod "sigs.k8s.io/kueue/pkg/util/testingjobs/pod" 43 "sigs.k8s.io/kueue/pkg/workload" 44 "sigs.k8s.io/kueue/test/integration/framework" 45 "sigs.k8s.io/kueue/test/util" 46 ) 47 48 const ( 49 podName = "test-pod" 50 instanceKey = "cloud.provider.com/instance" 51 ) 52 53 var ( 54 wlConditionCmpOpts = []cmp.Option{ 55 cmpopts.IgnoreFields(metav1.Condition{}, "LastTransitionTime", "Reason", "Message"), 56 } 57 ) 58 59 var _ = ginkgo.Describe("Pod controller", ginkgo.Ordered, ginkgo.ContinueOnFailure, func() { 60 ginkgo.When("manageJobsWithoutQueueName is disabled", func() { 61 var defaultFlavor = testing.MakeResourceFlavor("default").Label("kubernetes.io/arch", "arm64").Obj() 62 var clusterQueue = testing.MakeClusterQueue("cluster-queue"). 63 ResourceGroup( 64 *testing.MakeFlavorQuotas(defaultFlavor.Name).Resource(corev1.ResourceCPU, "1").Obj(), 65 ).Obj() 66 67 ginkgo.BeforeAll(func() { 68 fwk = &framework.Framework{ 69 CRDPath: crdPath, 70 WebhookPath: webhookPath, 71 } 72 cfg = fwk.Init() 73 ctx, k8sClient = fwk.RunManager(cfg, managerSetup( 74 jobframework.WithManageJobsWithoutQueueName(false), 75 jobframework.WithKubeServerVersion(serverVersionFetcher), 76 jobframework.WithIntegrationOptions(corev1.SchemeGroupVersion.WithKind("Pod").String(), &configapi.PodIntegrationOptions{ 77 PodSelector: &metav1.LabelSelector{}, 78 NamespaceSelector: &metav1.LabelSelector{ 79 MatchExpressions: []metav1.LabelSelectorRequirement{ 80 { 81 Key: "kubernetes.io/metadata.name", 82 Operator: metav1.LabelSelectorOpNotIn, 83 Values: []string{"kube-system", "kueue-system"}, 84 }, 85 }, 86 }, 87 }), 88 )) 89 gomega.Expect(k8sClient.Create(ctx, defaultFlavor)).To(gomega.Succeed()) 90 gomega.Expect(k8sClient.Create(ctx, clusterQueue)).To(gomega.Succeed()) 91 }) 92 ginkgo.AfterAll(func() { 93 util.ExpectClusterQueueToBeDeleted(ctx, k8sClient, clusterQueue, true) 94 util.ExpectResourceFlavorToBeDeleted(ctx, k8sClient, defaultFlavor, true) 95 fwk.Teardown() 96 }) 97 98 var ( 99 ns *corev1.Namespace 100 lookupKey types.NamespacedName 101 wlLookupKey types.NamespacedName 102 ) 103 104 ginkgo.BeforeEach(func() { 105 ns = &corev1.Namespace{ 106 ObjectMeta: metav1.ObjectMeta{ 107 GenerateName: "pod-namespace-", 108 }, 109 } 110 gomega.Expect(k8sClient.Create(ctx, ns)).To(gomega.Succeed()) 111 wlLookupKey = types.NamespacedName{Name: podcontroller.GetWorkloadNameForPod(podName), Namespace: ns.Name} 112 lookupKey = types.NamespacedName{Name: podName, Namespace: ns.Name} 113 }) 114 115 ginkgo.AfterEach(func() { 116 gomega.Expect(util.DeleteNamespace(ctx, k8sClient, ns)).To(gomega.Succeed()) 117 }) 118 119 ginkgo.When("Using single pod", func() { 120 ginkgo.It("Should reconcile the single pod with the queue name", func() { 121 pod := testingpod.MakePod(podName, ns.Name).Queue("test-queue").Obj() 122 gomega.Expect(k8sClient.Create(ctx, pod)).Should(gomega.Succeed()) 123 124 createdPod := &corev1.Pod{} 125 gomega.Eventually(func() error { 126 return k8sClient.Get(ctx, lookupKey, createdPod) 127 }, util.Timeout, util.Interval).Should(gomega.Succeed()) 128 129 gomega.Expect(createdPod.Spec.SchedulingGates).To( 130 gomega.ContainElement(corev1.PodSchedulingGate{Name: "kueue.x-k8s.io/admission"}), 131 "Pod should have scheduling gate", 132 ) 133 134 gomega.Expect(createdPod.Labels).To( 135 gomega.HaveKeyWithValue("kueue.x-k8s.io/managed", "true"), 136 "Pod should have the label", 137 ) 138 139 gomega.Expect(createdPod.Finalizers).To(gomega.ContainElement("kueue.x-k8s.io/managed"), 140 "Pod should have finalizer") 141 142 ginkgo.By("checking that workload is created for pod with the queue name") 143 createdWorkload := &kueue.Workload{} 144 gomega.Eventually(func() error { 145 return k8sClient.Get(ctx, wlLookupKey, createdWorkload) 146 }, util.Timeout, util.Interval).Should(gomega.Succeed()) 147 148 gomega.Expect(createdWorkload.Spec.PodSets).To(gomega.HaveLen(1)) 149 150 gomega.Expect(createdWorkload.Spec.QueueName).To(gomega.Equal("test-queue"), 151 "The Workload should have .spec.queueName set") 152 153 ginkgo.By("checking the pod is unsuspended when workload is assigned") 154 155 clusterQueue := testing.MakeClusterQueue("cluster-queue"). 156 ResourceGroup( 157 *testing.MakeFlavorQuotas("default").Resource(corev1.ResourceCPU, "1").Obj(), 158 ).Obj() 159 admission := testing.MakeAdmission(clusterQueue.Name). 160 Assignment(corev1.ResourceCPU, "default", "1"). 161 AssignmentPodCount(createdWorkload.Spec.PodSets[0].Count). 162 Obj() 163 gomega.Expect(util.SetQuotaReservation(ctx, k8sClient, createdWorkload, admission)).Should(gomega.Succeed()) 164 util.SyncAdmittedConditionForWorkloads(ctx, k8sClient, createdWorkload) 165 166 gomega.Eventually(func(g gomega.Gomega) bool { 167 ok, err := testing.CheckLatestEvent(ctx, k8sClient, "Started", corev1.EventTypeNormal, fmt.Sprintf("Admitted by clusterQueue %v", clusterQueue.Name)) 168 g.Expect(err).NotTo(gomega.HaveOccurred()) 169 return ok 170 }, util.Timeout, util.Interval).Should(gomega.BeTrue()) 171 172 util.ExpectPodUnsuspendedWithNodeSelectors(ctx, k8sClient, lookupKey, map[string]string{"kubernetes.io/arch": "arm64"}) 173 174 gomega.Expect(k8sClient.Get(ctx, wlLookupKey, createdWorkload)).To(gomega.Succeed()) 175 gomega.Expect(createdWorkload.Status.Conditions).To(gomega.BeComparableTo( 176 []metav1.Condition{ 177 {Type: kueue.WorkloadQuotaReserved, Status: metav1.ConditionTrue}, 178 {Type: kueue.WorkloadAdmitted, Status: metav1.ConditionTrue}, 179 }, 180 wlConditionCmpOpts..., 181 )) 182 183 ginkgo.By("checking the workload is finished and the pod finalizer is removed when pod is succeeded") 184 util.SetPodsPhase(ctx, k8sClient, corev1.PodSucceeded, pod) 185 gomega.Eventually(func() []metav1.Condition { 186 err := k8sClient.Get(ctx, wlLookupKey, createdWorkload) 187 if err != nil { 188 return nil 189 } 190 return createdWorkload.Status.Conditions 191 }, util.Timeout, util.Interval).Should(gomega.ContainElement( 192 gomega.BeComparableTo( 193 metav1.Condition{Type: kueue.WorkloadFinished, Status: metav1.ConditionTrue}, 194 wlConditionCmpOpts..., 195 ), 196 ), "Expected 'Finished' workload condition") 197 198 util.ExpectPodsFinalized(ctx, k8sClient, lookupKey) 199 }) 200 201 ginkgo.It("Should remove finalizers from Pods that are actively deleted after being admitted", func() { 202 pod := testingpod.MakePod(podName, ns.Name).Queue("test-queue").Obj() 203 gomega.Expect(k8sClient.Create(ctx, pod)).Should(gomega.Succeed()) 204 205 createdPod := &corev1.Pod{} 206 gomega.Eventually(func() error { 207 return k8sClient.Get(ctx, lookupKey, createdPod) 208 }, util.Timeout, util.Interval).Should(gomega.Succeed()) 209 210 gomega.Expect(createdPod.Finalizers).To(gomega.ContainElement("kueue.x-k8s.io/managed"), 211 "Pod should have finalizer") 212 213 ginkgo.By("checking that workload is created for pod with the queue name") 214 createdWorkload := &kueue.Workload{} 215 gomega.Eventually(func() error { 216 return k8sClient.Get(ctx, wlLookupKey, createdWorkload) 217 }, util.Timeout, util.Interval).Should(gomega.Succeed()) 218 219 ginkgo.By("checking the pod is unsuspended when workload is assigned") 220 clusterQueue := testing.MakeClusterQueue("cluster-queue"). 221 ResourceGroup( 222 *testing.MakeFlavorQuotas("default").Resource(corev1.ResourceCPU, "1").Obj(), 223 ).Obj() 224 admission := testing.MakeAdmission(clusterQueue.Name). 225 Assignment(corev1.ResourceCPU, "default", "1"). 226 AssignmentPodCount(createdWorkload.Spec.PodSets[0].Count). 227 Obj() 228 gomega.Expect(util.SetQuotaReservation(ctx, k8sClient, createdWorkload, admission)).Should(gomega.Succeed()) 229 util.SyncAdmittedConditionForWorkloads(ctx, k8sClient, createdWorkload) 230 231 gomega.Eventually(func() []corev1.PodSchedulingGate { 232 if err := k8sClient.Get(ctx, lookupKey, createdPod); err != nil { 233 return nil 234 } 235 return createdPod.Spec.SchedulingGates 236 }, util.Timeout, util.Interval).Should(gomega.BeEmpty()) 237 238 ginkgo.By("checking that the finalizer is removed when the Pod is deleted early") 239 gomega.Expect(k8sClient.Delete(ctx, createdPod)).Should(gomega.Succeed()) 240 gomega.Eventually(func(g gomega.Gomega) error { 241 return k8sClient.Get(ctx, lookupKey, createdPod) 242 }, util.Timeout, util.Interval).Should(testing.BeNotFoundError()) 243 }) 244 245 ginkgo.When("A workload is evicted", func() { 246 const finalizerName = "kueue.x-k8s.io/integration-test" 247 var pod *corev1.Pod 248 249 ginkgo.BeforeEach(func() { 250 // A pod must have a dedicated finalizer since we need to verify the pod status 251 // after a workload is evicted. 252 pod = testingpod.MakePod(podName, ns.Name).Queue("test-queue").Finalizer(finalizerName).Obj() 253 ginkgo.By("Creating a pod with queue name") 254 gomega.Expect(k8sClient.Create(ctx, pod)).Should(gomega.Succeed()) 255 }) 256 ginkgo.AfterEach(func() { 257 gomega.Eventually(func(g gomega.Gomega) { 258 g.Expect(k8sClient.Get(ctx, lookupKey, pod)).Should(gomega.Succeed()) 259 controllerutil.RemoveFinalizer(pod, finalizerName) 260 g.Expect(k8sClient.Update(ctx, pod)).Should(gomega.Succeed()) 261 }, util.Timeout, util.Interval).Should(gomega.Succeed()) 262 gomega.Eventually(func() bool { 263 return apierrors.IsNotFound(k8sClient.Get(ctx, lookupKey, &corev1.Pod{})) 264 }, util.Timeout, util.Interval).Should(gomega.BeTrue()) 265 }) 266 267 ginkgo.It("Should stop the single pod with the queue name", func() { 268 createdPod := &corev1.Pod{} 269 gomega.Eventually(func() error { 270 return k8sClient.Get(ctx, lookupKey, createdPod) 271 }, util.Timeout, util.Interval).Should(gomega.Succeed()) 272 273 ginkgo.By("checking that workload is created for pod with the queue name") 274 createdWorkload := &kueue.Workload{} 275 gomega.Eventually(func() error { 276 return k8sClient.Get(ctx, wlLookupKey, createdWorkload) 277 }, util.Timeout, util.Interval).Should(gomega.Succeed()) 278 279 gomega.Expect(createdWorkload.Spec.PodSets).To(gomega.HaveLen(1)) 280 281 gomega.Expect(createdWorkload.Spec.QueueName).To(gomega.Equal("test-queue"), "The Workload should have .spec.queueName set") 282 283 ginkgo.By("checking that pod is unsuspended when workload is admitted") 284 clusterQueue := testing.MakeClusterQueue("cluster-queue"). 285 ResourceGroup( 286 *testing.MakeFlavorQuotas("default").Resource(corev1.ResourceCPU, "1").Obj(), 287 ).Obj() 288 admission := testing.MakeAdmission(clusterQueue.Name). 289 Assignment(corev1.ResourceCPU, "default", "1"). 290 AssignmentPodCount(createdWorkload.Spec.PodSets[0].Count). 291 Obj() 292 gomega.Expect(util.SetQuotaReservation(ctx, k8sClient, createdWorkload, admission)).Should(gomega.Succeed()) 293 util.SyncAdmittedConditionForWorkloads(ctx, k8sClient, createdWorkload) 294 295 util.ExpectPodUnsuspendedWithNodeSelectors(ctx, k8sClient, lookupKey, map[string]string{"kubernetes.io/arch": "arm64"}) 296 297 gomega.Expect(k8sClient.Get(ctx, wlLookupKey, createdWorkload)).To(gomega.Succeed()) 298 gomega.Expect(createdWorkload.Status.Conditions).Should(gomega.BeComparableTo( 299 []metav1.Condition{ 300 {Type: kueue.WorkloadQuotaReserved, Status: metav1.ConditionTrue}, 301 {Type: kueue.WorkloadAdmitted, Status: metav1.ConditionTrue}, 302 }, 303 wlConditionCmpOpts..., 304 )) 305 306 ginkgo.By("checking that pod is stopped when workload is evicted") 307 308 gomega.Expect( 309 workload.UpdateStatus(ctx, k8sClient, createdWorkload, kueue.WorkloadEvicted, metav1.ConditionTrue, 310 kueue.WorkloadEvictedByPreemption, "By test", "evict"), 311 ).Should(gomega.Succeed()) 312 util.FinishEvictionForWorkloads(ctx, k8sClient, createdWorkload) 313 314 gomega.Eventually(func(g gomega.Gomega) bool { 315 g.Expect(k8sClient.Get(ctx, lookupKey, createdPod)).To(gomega.Succeed()) 316 return createdPod.DeletionTimestamp.IsZero() 317 }, util.Timeout, util.Interval).Should(gomega.BeFalse(), "Expected pod to be deleted") 318 319 gomega.Expect(createdPod.Status.Conditions).Should(gomega.ContainElement( 320 gomega.BeComparableTo( 321 corev1.PodCondition{ 322 Type: "TerminationTarget", 323 Status: "True", 324 Reason: "StoppedByKueue", 325 Message: "By test", 326 }, 327 cmpopts.IgnoreFields(corev1.PodCondition{}, "LastTransitionTime"), 328 ), 329 )) 330 }) 331 }) 332 333 ginkgo.When("Pod owner is managed by Kueue", func() { 334 var pod *corev1.Pod 335 ginkgo.BeforeEach(func() { 336 pod = testingpod.MakePod(podName, ns.Name). 337 Queue("test-queue"). 338 OwnerReference("parent-job", batchv1.SchemeGroupVersion.WithKind("Job")). 339 Obj() 340 }) 341 342 ginkgo.It("Should skip the pod", func() { 343 gomega.Expect(k8sClient.Create(ctx, pod)).Should(gomega.Succeed()) 344 345 createdPod := &corev1.Pod{} 346 gomega.Eventually(func() error { 347 return k8sClient.Get(ctx, lookupKey, createdPod) 348 }, util.Timeout, util.Interval).Should(gomega.Succeed()) 349 350 gomega.Expect(createdPod.Spec.SchedulingGates).NotTo( 351 gomega.ContainElement(corev1.PodSchedulingGate{Name: "kueue.x-k8s.io/admission"}), 352 "Pod shouldn't have scheduling gate", 353 ) 354 355 gomega.Expect(createdPod.Labels).NotTo( 356 gomega.HaveKeyWithValue("kueue.x-k8s.io/managed", "true"), 357 "Pod shouldn't have the label", 358 ) 359 360 gomega.Expect(createdPod.Finalizers).NotTo(gomega.ContainElement("kueue.x-k8s.io/managed"), 361 "Pod shouldn't have finalizer") 362 363 ginkgo.By(fmt.Sprintf("checking that workload '%s' is not created", wlLookupKey)) 364 createdWorkload := &kueue.Workload{} 365 366 gomega.Expect(k8sClient.Get(ctx, wlLookupKey, createdWorkload)).To(testing.BeNotFoundError()) 367 }) 368 }) 369 370 ginkgo.When("the queue has admission checks", func() { 371 var ( 372 ns *corev1.Namespace 373 clusterQueueAc *kueue.ClusterQueue 374 localQueue *kueue.LocalQueue 375 testFlavor *kueue.ResourceFlavor 376 podLookupKey *types.NamespacedName 377 wlLookupKey *types.NamespacedName 378 admissionCheck *kueue.AdmissionCheck 379 ) 380 381 ginkgo.BeforeEach(func() { 382 ns = &corev1.Namespace{ 383 ObjectMeta: metav1.ObjectMeta{ 384 GenerateName: "pod-ac-namespace-", 385 }, 386 } 387 gomega.Expect(k8sClient.Create(ctx, ns)).To(gomega.Succeed()) 388 admissionCheck = testing.MakeAdmissionCheck("check").ControllerName("ac-controller").Obj() 389 gomega.Expect(k8sClient.Create(ctx, admissionCheck)).To(gomega.Succeed()) 390 util.SetAdmissionCheckActive(ctx, k8sClient, admissionCheck, metav1.ConditionTrue) 391 clusterQueueAc = testing.MakeClusterQueue("prod-cq-with-checks"). 392 ResourceGroup( 393 *testing.MakeFlavorQuotas("test-flavor").Resource(corev1.ResourceCPU, "5").Obj(), 394 ).AdmissionChecks("check").Obj() 395 gomega.Expect(k8sClient.Create(ctx, clusterQueueAc)).Should(gomega.Succeed()) 396 localQueue = testing.MakeLocalQueue("queue", ns.Name).ClusterQueue(clusterQueueAc.Name).Obj() 397 gomega.Expect(k8sClient.Create(ctx, localQueue)).To(gomega.Succeed()) 398 testFlavor = testing.MakeResourceFlavor("test-flavor").Label(instanceKey, "test-flavor").Obj() 399 gomega.Expect(k8sClient.Create(ctx, testFlavor)).Should(gomega.Succeed()) 400 401 podLookupKey = &types.NamespacedName{Name: podName, Namespace: ns.Name} 402 wlLookupKey = &types.NamespacedName{Name: podcontroller.GetWorkloadNameForPod(podName), Namespace: ns.Name} 403 }) 404 405 ginkgo.AfterEach(func() { 406 gomega.Expect(util.DeleteNamespace(ctx, k8sClient, ns)).To(gomega.Succeed()) 407 util.ExpectClusterQueueToBeDeleted(ctx, k8sClient, clusterQueueAc, true) 408 util.ExpectResourceFlavorToBeDeleted(ctx, k8sClient, testFlavor, true) 409 util.ExpectAdmissionCheckToBeDeleted(ctx, k8sClient, admissionCheck, true) 410 }) 411 412 ginkgo.It("labels and annotations should be propagated from admission check to job", func() { 413 createdPod := &corev1.Pod{} 414 createdWorkload := &kueue.Workload{} 415 416 ginkgo.By("creating the job with pod labels & annotations", func() { 417 job := testingpod.MakePod(podName, ns.Name). 418 Queue(localQueue.Name). 419 Request(corev1.ResourceCPU, "5"). 420 Annotation("old-ann-key", "old-ann-value"). 421 Label("old-label-key", "old-label-value"). 422 Obj() 423 gomega.Expect(k8sClient.Create(ctx, job)).Should(gomega.Succeed()) 424 }) 425 426 ginkgo.By("fetch the job and verify it is suspended as the checks are not ready", func() { 427 gomega.Eventually(func(g gomega.Gomega) []corev1.PodSchedulingGate { 428 g.Expect(k8sClient.Get(ctx, *podLookupKey, createdPod)).To(gomega.Succeed()) 429 return createdPod.Spec.SchedulingGates 430 }, util.Timeout, util.Interval).Should( 431 gomega.ContainElement(corev1.PodSchedulingGate{Name: "kueue.x-k8s.io/admission"}), 432 ) 433 }) 434 435 ginkgo.By("fetch the created workload", func() { 436 gomega.Eventually(func() error { 437 return k8sClient.Get(ctx, *wlLookupKey, createdWorkload) 438 }, util.Timeout, util.Interval).Should(gomega.Succeed()) 439 }) 440 441 ginkgo.By("add labels & annotations to the admission check", func() { 442 gomega.Eventually(func() error { 443 var newWL kueue.Workload 444 gomega.Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(createdWorkload), &newWL)).To(gomega.Succeed()) 445 workload.SetAdmissionCheckState(&newWL.Status.AdmissionChecks, kueue.AdmissionCheckState{ 446 Name: "check", 447 State: kueue.CheckStateReady, 448 PodSetUpdates: []kueue.PodSetUpdate{ 449 { 450 Name: "main", 451 Labels: map[string]string{ 452 "label1": "label-value1", 453 }, 454 Annotations: map[string]string{ 455 "ann1": "ann-value1", 456 }, 457 NodeSelector: map[string]string{ 458 "selector1": "selector-value1", 459 }, 460 }, 461 }, 462 }) 463 return k8sClient.Status().Update(ctx, &newWL) 464 }, util.Timeout, util.Interval).Should(gomega.Succeed()) 465 }) 466 467 ginkgo.By("admit the workload", func() { 468 admission := testing.MakeAdmission(clusterQueueAc.Name). 469 Assignment(corev1.ResourceCPU, "test-flavor", "1"). 470 AssignmentPodCount(createdWorkload.Spec.PodSets[0].Count). 471 Obj() 472 gomega.Expect(k8sClient.Get(ctx, *wlLookupKey, createdWorkload)).Should(gomega.Succeed()) 473 gomega.Expect(util.SetQuotaReservation(ctx, k8sClient, createdWorkload, admission)).Should(gomega.Succeed()) 474 util.SyncAdmittedConditionForWorkloads(ctx, k8sClient, createdWorkload) 475 }) 476 477 ginkgo.By("await for the job to be admitted", func() { 478 gomega.Eventually(func(g gomega.Gomega) []corev1.PodSchedulingGate { 479 g.Expect(k8sClient.Get(ctx, *podLookupKey, createdPod)). 480 To(gomega.Succeed()) 481 return createdPod.Spec.SchedulingGates 482 }, util.Timeout, util.Interval).Should(gomega.BeEmpty()) 483 }) 484 485 ginkgo.By("verify the PodSetUpdates are propagated to the running job", func() { 486 gomega.Expect(createdPod.Annotations).Should(gomega.HaveKeyWithValue("ann1", "ann-value1")) 487 gomega.Expect(createdPod.Annotations).Should(gomega.HaveKeyWithValue("old-ann-key", "old-ann-value")) 488 gomega.Expect(createdPod.Labels).Should(gomega.HaveKeyWithValue("label1", "label-value1")) 489 gomega.Expect(createdPod.Labels).Should(gomega.HaveKeyWithValue("old-label-key", "old-label-value")) 490 gomega.Expect(createdPod.Spec.NodeSelector).Should(gomega.HaveKeyWithValue(instanceKey, "test-flavor")) 491 gomega.Expect(createdPod.Spec.NodeSelector).Should(gomega.HaveKeyWithValue("selector1", "selector-value1")) 492 }) 493 }) 494 }) 495 }) 496 497 ginkgo.When("Using pod group", func() { 498 ginkgo.It("Should ungate pods when admitted and finalize Pods when succeeded", func() { 499 ginkgo.By("Creating pods with queue name") 500 pod1 := testingpod.MakePod("test-pod1", ns.Name). 501 Group("test-group"). 502 GroupTotalCount("2"). 503 Queue("test-queue"). 504 Obj() 505 pod2 := testingpod.MakePod("test-pod2", ns.Name). 506 Group("test-group"). 507 GroupTotalCount("2"). 508 Request(corev1.ResourceCPU, "1"). 509 Queue("test-queue"). 510 Obj() 511 pod1LookupKey := client.ObjectKeyFromObject(pod1) 512 pod2LookupKey := client.ObjectKeyFromObject(pod2) 513 514 gomega.Expect(k8sClient.Create(ctx, pod1)).Should(gomega.Succeed()) 515 gomega.Expect(k8sClient.Create(ctx, pod2)).Should(gomega.Succeed()) 516 517 ginkgo.By("checking that workload is created for the pod group with the queue name") 518 wlLookupKey := types.NamespacedName{ 519 Namespace: pod1.Namespace, 520 Name: "test-group", 521 } 522 createdWorkload := &kueue.Workload{} 523 gomega.Eventually(func() error { 524 return k8sClient.Get(ctx, wlLookupKey, createdWorkload) 525 }, util.Timeout, util.Interval).Should(gomega.Succeed()) 526 527 gomega.Expect(createdWorkload.Spec.PodSets).To(gomega.HaveLen(2)) 528 gomega.Expect(createdWorkload.Spec.PodSets[0].Count).To(gomega.Equal(int32(1))) 529 gomega.Expect(createdWorkload.Spec.PodSets[1].Count).To(gomega.Equal(int32(1))) 530 531 gomega.Expect(createdWorkload.Spec.QueueName).To(gomega.Equal("test-queue"), "The Workload should have .spec.queueName set") 532 533 ginkgo.By("checking that all pods in group are unsuspended when workload is admitted", func() { 534 admission := testing.MakeAdmission(clusterQueue.Name).PodSets( 535 kueue.PodSetAssignment{ 536 Name: "4b0469f7", 537 Flavors: map[corev1.ResourceName]kueue.ResourceFlavorReference{ 538 corev1.ResourceCPU: "default", 539 }, 540 Count: ptr.To[int32](1), 541 }, 542 kueue.PodSetAssignment{ 543 Name: "bf90803c", 544 Flavors: map[corev1.ResourceName]kueue.ResourceFlavorReference{ 545 corev1.ResourceCPU: "default", 546 }, 547 Count: ptr.To[int32](1), 548 }, 549 ).Obj() 550 gomega.Expect(util.SetQuotaReservation(ctx, k8sClient, createdWorkload, admission)).Should(gomega.Succeed()) 551 util.SyncAdmittedConditionForWorkloads(ctx, k8sClient, createdWorkload) 552 553 util.ExpectPodUnsuspendedWithNodeSelectors(ctx, k8sClient, pod1LookupKey, map[string]string{"kubernetes.io/arch": "arm64"}) 554 util.ExpectPodUnsuspendedWithNodeSelectors(ctx, k8sClient, pod2LookupKey, map[string]string{"kubernetes.io/arch": "arm64"}) 555 556 gomega.Expect(k8sClient.Get(ctx, wlLookupKey, createdWorkload)).To(gomega.Succeed()) 557 gomega.Expect(createdWorkload.Status.Conditions).Should(gomega.BeComparableTo( 558 []metav1.Condition{ 559 {Type: kueue.WorkloadQuotaReserved, Status: metav1.ConditionTrue}, 560 {Type: kueue.WorkloadAdmitted, Status: metav1.ConditionTrue}, 561 }, 562 wlConditionCmpOpts..., 563 )) 564 }) 565 566 ginkgo.By("checking that pod group is finalized when all pods in the group succeed", func() { 567 util.SetPodsPhase(ctx, k8sClient, corev1.PodSucceeded, pod1, pod2) 568 569 gomega.Eventually(func() []metav1.Condition { 570 err := k8sClient.Get(ctx, wlLookupKey, createdWorkload) 571 if err != nil { 572 return nil 573 } 574 return createdWorkload.Status.Conditions 575 }, util.Timeout, util.Interval).Should(gomega.ContainElement( 576 gomega.BeComparableTo( 577 metav1.Condition{Type: kueue.WorkloadFinished, Status: metav1.ConditionTrue}, 578 wlConditionCmpOpts..., 579 ), 580 ), "Expected 'Finished' workload condition") 581 582 util.ExpectPodsFinalized(ctx, k8sClient, pod1LookupKey, pod2LookupKey) 583 }) 584 }) 585 586 ginkgo.It("Should keep the running pod group with the queue name if workload is evicted", func() { 587 ginkgo.By("Creating pods with queue name") 588 pod1 := testingpod.MakePod("test-pod1", ns.Name). 589 Group("test-group"). 590 GroupTotalCount("2"). 591 Queue("test-queue"). 592 Obj() 593 pod2 := testingpod.MakePod("test-pod2", ns.Name). 594 Group("test-group"). 595 GroupTotalCount("2"). 596 Queue("test-queue"). 597 Obj() 598 pod1LookupKey := client.ObjectKeyFromObject(pod1) 599 pod2LookupKey := client.ObjectKeyFromObject(pod2) 600 601 gomega.Expect(k8sClient.Create(ctx, pod1)).Should(gomega.Succeed()) 602 gomega.Expect(k8sClient.Create(ctx, pod2)).Should(gomega.Succeed()) 603 604 ginkgo.By("checking that workload is created for the pod group with the queue name") 605 wlLookupKey := types.NamespacedName{ 606 Namespace: pod1.Namespace, 607 Name: "test-group", 608 } 609 createdWorkload := &kueue.Workload{} 610 gomega.Eventually(func() error { 611 return k8sClient.Get(ctx, wlLookupKey, createdWorkload) 612 }, util.Timeout, util.Interval).Should(gomega.Succeed()) 613 614 gomega.Expect(createdWorkload.Spec.PodSets).To(gomega.HaveLen(1)) 615 gomega.Expect(createdWorkload.Spec.PodSets[0].Count).To(gomega.Equal(int32(2))) 616 gomega.Expect(createdWorkload.Spec.QueueName).To(gomega.Equal("test-queue"), "The Workload should have .spec.queueName set") 617 originalWorkloadUID := createdWorkload.UID 618 619 admission := testing.MakeAdmission(clusterQueue.Name, "bf90803c"). 620 Assignment(corev1.ResourceCPU, "default", "1"). 621 AssignmentPodCount(createdWorkload.Spec.PodSets[0].Count). 622 Obj() 623 ginkgo.By("checking that all pods in group are unsuspended when workload is admitted", func() { 624 gomega.Expect(util.SetQuotaReservation(ctx, k8sClient, createdWorkload, admission)).Should(gomega.Succeed()) 625 util.SyncAdmittedConditionForWorkloads(ctx, k8sClient, createdWorkload) 626 627 util.ExpectPodUnsuspendedWithNodeSelectors(ctx, k8sClient, pod1LookupKey, map[string]string{"kubernetes.io/arch": "arm64"}) 628 util.ExpectPodUnsuspendedWithNodeSelectors(ctx, k8sClient, pod2LookupKey, map[string]string{"kubernetes.io/arch": "arm64"}) 629 630 gomega.Expect(k8sClient.Get(ctx, wlLookupKey, createdWorkload)).To(gomega.Succeed()) 631 gomega.Expect(createdWorkload.Status.Conditions).Should(gomega.BeComparableTo( 632 []metav1.Condition{ 633 {Type: kueue.WorkloadQuotaReserved, Status: metav1.ConditionTrue}, 634 {Type: kueue.WorkloadAdmitted, Status: metav1.ConditionTrue}, 635 }, 636 wlConditionCmpOpts..., 637 )) 638 }) 639 640 ginkgo.By("set the pods as running", func() { 641 util.SetPodsPhase(ctx, k8sClient, corev1.PodRunning, pod1, pod2) 642 }) 643 644 createdPod := &corev1.Pod{} 645 ginkgo.By("checking that the Pods get a deletion timestamp when the workload is evicted", func() { 646 gomega.Expect(func() error { 647 w := createdWorkload.DeepCopy() 648 workload.SetEvictedCondition(w, "ByTest", "by test") 649 return workload.ApplyAdmissionStatus(ctx, k8sClient, w, false) 650 }()).Should(gomega.Succeed()) 651 652 gomega.Eventually(func(g gomega.Gomega) bool { 653 g.Expect(k8sClient.Get(ctx, pod1LookupKey, createdPod)).To(gomega.Succeed()) 654 return createdPod.DeletionTimestamp.IsZero() 655 }, util.ConsistentDuration, util.Interval).Should(gomega.BeFalse()) 656 657 gomega.Eventually(func(g gomega.Gomega) bool { 658 g.Expect(k8sClient.Get(ctx, pod2LookupKey, createdPod)).To(gomega.Succeed()) 659 return createdPod.DeletionTimestamp.IsZero() 660 }, util.ConsistentDuration, util.Interval).Should(gomega.BeFalse()) 661 }) 662 663 ginkgo.By("finish one pod and fail the other, the eviction should end", func() { 664 util.SetPodsPhase(ctx, k8sClient, corev1.PodSucceeded, pod1) 665 util.SetPodsPhase(ctx, k8sClient, corev1.PodFailed, pod2) 666 667 gomega.Eventually(func(g gomega.Gomega) { 668 g.Expect(k8sClient.Get(ctx, wlLookupKey, createdWorkload)).To(gomega.Succeed()) 669 g.Expect(createdWorkload.Status.Conditions).Should(gomega.ContainElement( 670 gomega.BeComparableTo(metav1.Condition{Type: kueue.WorkloadAdmitted, Status: metav1.ConditionFalse}, wlConditionCmpOpts...), 671 )) 672 }, util.Timeout, util.Interval).Should(gomega.Succeed()) 673 }) 674 675 replacementPod := testingpod.MakePod("test-pod2-replace", ns.Name). 676 Group("test-group"). 677 GroupTotalCount("2"). 678 Queue("test-queue"). 679 Obj() 680 replacementPodLookupKey := client.ObjectKeyFromObject(replacementPod) 681 682 ginkgo.By("creating the replacement pod and readmitting the workload will unsuspended the replacement", func() { 683 gomega.Expect(k8sClient.Create(ctx, replacementPod)).Should(gomega.Succeed()) 684 685 gomega.Expect(k8sClient.Get(ctx, wlLookupKey, createdWorkload)).To(gomega.Succeed()) 686 gomega.Expect(createdWorkload.UID).To(gomega.Equal(originalWorkloadUID)) 687 gomega.Expect(util.SetQuotaReservation(ctx, k8sClient, createdWorkload, admission)).Should(gomega.Succeed()) 688 util.SyncAdmittedConditionForWorkloads(ctx, k8sClient, createdWorkload) 689 690 util.ExpectPodUnsuspendedWithNodeSelectors(ctx, k8sClient, replacementPodLookupKey, map[string]string{"kubernetes.io/arch": "arm64"}) 691 692 gomega.Expect(k8sClient.Get(ctx, wlLookupKey, createdWorkload)).To(gomega.Succeed()) 693 gomega.Expect(createdWorkload.Status.Conditions).Should(gomega.ContainElement( 694 gomega.BeComparableTo(metav1.Condition{Type: kueue.WorkloadAdmitted, Status: metav1.ConditionTrue}, wlConditionCmpOpts...), 695 )) 696 }) 697 698 ginkgo.By("finishing the replacement pod the workload should be finished", func() { 699 util.SetPodsPhase(ctx, k8sClient, corev1.PodSucceeded, replacementPod) 700 701 gomega.Eventually(func(g gomega.Gomega) { 702 g.Expect(k8sClient.Get(ctx, wlLookupKey, createdWorkload)).To(gomega.Succeed()) 703 g.Expect(createdWorkload.Status.Conditions).Should(gomega.ContainElement( 704 gomega.BeComparableTo(metav1.Condition{Type: kueue.WorkloadFinished, Status: metav1.ConditionTrue}, wlConditionCmpOpts...), 705 )) 706 g.Expect(createdWorkload.OwnerReferences).Should(gomega.ContainElement(metav1.OwnerReference{ 707 APIVersion: "v1", 708 Kind: "Pod", 709 Name: replacementPod.Name, 710 UID: replacementPod.UID, 711 })) 712 }, util.Timeout, util.Interval).Should(gomega.Succeed()) 713 }) 714 }) 715 716 ginkgo.It("Should keep the existing workload for pod replacement", func() { 717 ginkgo.By("Creating a single pod with queue and group names") 718 719 pod := testingpod.MakePod("test-pod", ns.Name). 720 Group("test-group"). 721 GroupTotalCount("1"). 722 Queue("test-queue"). 723 Obj() 724 gomega.Expect(k8sClient.Create(ctx, pod)).Should(gomega.Succeed()) 725 726 ginkgo.By("checking that workload is created for the pod group with the queue name") 727 wlLookupKey := types.NamespacedName{ 728 Namespace: pod.Namespace, 729 Name: "test-group", 730 } 731 createdWorkload := &kueue.Workload{} 732 gomega.Eventually(func() error { 733 return k8sClient.Get(ctx, wlLookupKey, createdWorkload) 734 }, util.Timeout, util.Interval).Should(gomega.Succeed()) 735 736 gomega.Expect(createdWorkload.Spec.PodSets).To(gomega.HaveLen(1)) 737 gomega.Expect(createdWorkload.Spec.PodSets[0].Count).To(gomega.Equal(int32(1))) 738 gomega.Expect(createdWorkload.Spec.QueueName).To(gomega.Equal("test-queue"), "The Workload should have .spec.queueName set") 739 740 ginkgo.By("checking that pod is unsuspended when workload is admitted") 741 admission := testing.MakeAdmission(clusterQueue.Name, "bf90803c"). 742 Assignment(corev1.ResourceCPU, "default", "1"). 743 AssignmentPodCount(createdWorkload.Spec.PodSets[0].Count). 744 Obj() 745 gomega.Expect(util.SetQuotaReservation(ctx, k8sClient, createdWorkload, admission)).Should(gomega.Succeed()) 746 util.SyncAdmittedConditionForWorkloads(ctx, k8sClient, createdWorkload) 747 748 podLookupKey := types.NamespacedName{Name: pod.Name, Namespace: pod.Namespace} 749 util.ExpectPodUnsuspendedWithNodeSelectors(ctx, k8sClient, podLookupKey, map[string]string{"kubernetes.io/arch": "arm64"}) 750 751 // cache the uid of the workload it should be the same until the execution ends otherwise the workload was recreated 752 wlUid := createdWorkload.UID 753 754 ginkgo.By("Failing the running pod") 755 756 util.SetPodsPhase(ctx, k8sClient, corev1.PodFailed, pod) 757 createdPod := &corev1.Pod{} 758 gomega.Consistently(func(g gomega.Gomega) []string { 759 g.Expect(k8sClient.Get(ctx, podLookupKey, createdPod)).To(gomega.Succeed()) 760 return createdPod.Finalizers 761 }, util.ConsistentDuration, util.Interval).Should(gomega.ContainElement("kueue.x-k8s.io/managed"), "Pod should have finalizer") 762 gomega.Expect(createdPod.Status.Phase).To(gomega.Equal(corev1.PodFailed)) 763 764 gomega.Expect(k8sClient.Get(ctx, wlLookupKey, createdWorkload)).Should(gomega.Succeed()) 765 gomega.Expect(createdWorkload.DeletionTimestamp.IsZero()).Should(gomega.BeTrue()) 766 767 ginkgo.By("Creating a replacement pod in the group") 768 replacementPod := testingpod.MakePod("replacement-test-pod", ns.Name). 769 Group("test-group"). 770 GroupTotalCount("1"). 771 Queue("test-queue"). 772 Obj() 773 gomega.Expect(k8sClient.Create(ctx, replacementPod)).Should(gomega.Succeed()) 774 replacementPodLookupKey := client.ObjectKeyFromObject(replacementPod) 775 776 ginkgo.By("Failing the replacement", func() { 777 util.ExpectPodsFinalized(ctx, k8sClient, podLookupKey) 778 util.ExpectPodUnsuspendedWithNodeSelectors(ctx, k8sClient, replacementPodLookupKey, map[string]string{"kubernetes.io/arch": "arm64"}) 779 util.SetPodsPhase(ctx, k8sClient, corev1.PodFailed, replacementPod) 780 }) 781 782 ginkgo.By("Creating a second replacement pod in the group") 783 replacementPod2 := testingpod.MakePod("replacement-test-pod2", ns.Name). 784 Group("test-group"). 785 GroupTotalCount("1"). 786 Queue("test-queue"). 787 Obj() 788 gomega.Expect(k8sClient.Create(ctx, replacementPod2)).Should(gomega.Succeed()) 789 replacementPod2LookupKey := client.ObjectKeyFromObject(replacementPod) 790 791 util.ExpectPodUnsuspendedWithNodeSelectors(ctx, k8sClient, replacementPod2LookupKey, map[string]string{"kubernetes.io/arch": "arm64"}) 792 util.SetPodsPhase(ctx, k8sClient, corev1.PodSucceeded, replacementPod2) 793 util.ExpectPodsFinalized(ctx, k8sClient, replacementPodLookupKey, replacementPod2LookupKey) 794 795 gomega.Eventually(func(g gomega.Gomega) []metav1.Condition { 796 g.Expect(k8sClient.Get(ctx, wlLookupKey, createdWorkload)).To(gomega.Succeed()) 797 g.Expect(createdWorkload.UID).To(gomega.Equal(wlUid)) 798 return createdWorkload.Status.Conditions 799 }, util.Timeout, util.Interval).Should(gomega.ContainElement( 800 gomega.BeComparableTo( 801 metav1.Condition{Type: kueue.WorkloadFinished, Status: metav1.ConditionTrue}, 802 wlConditionCmpOpts..., 803 ), 804 ), "Expected 'Finished' workload condition") 805 }) 806 807 ginkgo.It("Should finish the group if one Pod has the `retriable-in-group: false` annotation", func() { 808 ginkgo.By("Creating pods with queue name") 809 pod1 := testingpod.MakePod("test-pod1", ns.Name). 810 Group("test-group"). 811 GroupTotalCount("2"). 812 Queue("test-queue"). 813 Obj() 814 pod2 := testingpod.MakePod("test-pod2", ns.Name). 815 Group("test-group"). 816 GroupTotalCount("2"). 817 Request(corev1.ResourceCPU, "1"). 818 Queue("test-queue"). 819 Obj() 820 pod1LookupKey := client.ObjectKeyFromObject(pod1) 821 pod2LookupKey := client.ObjectKeyFromObject(pod2) 822 823 gomega.Expect(k8sClient.Create(ctx, pod1)).Should(gomega.Succeed()) 824 gomega.Expect(k8sClient.Create(ctx, pod2)).Should(gomega.Succeed()) 825 826 ginkgo.By("checking that workload is created for the pod group with the queue name") 827 wlLookupKey := types.NamespacedName{ 828 Namespace: pod1.Namespace, 829 Name: "test-group", 830 } 831 createdWorkload := &kueue.Workload{} 832 gomega.Eventually(func() error { 833 return k8sClient.Get(ctx, wlLookupKey, createdWorkload) 834 }, util.Timeout, util.Interval).Should(gomega.Succeed()) 835 836 gomega.Expect(createdWorkload.Spec.PodSets).To(gomega.HaveLen(2)) 837 gomega.Expect(createdWorkload.Spec.PodSets[0].Count).To(gomega.Equal(int32(1))) 838 gomega.Expect(createdWorkload.Spec.PodSets[1].Count).To(gomega.Equal(int32(1))) 839 gomega.Expect(createdWorkload.Spec.QueueName).To(gomega.Equal("test-queue"), "The Workload should have .spec.queueName set") 840 841 createdPod := &corev1.Pod{} 842 ginkgo.By("checking that all pods in group are unsuspended when workload is admitted", func() { 843 admission := testing.MakeAdmission(clusterQueue.Name).PodSets( 844 kueue.PodSetAssignment{ 845 Name: "4b0469f7", 846 Flavors: map[corev1.ResourceName]kueue.ResourceFlavorReference{ 847 corev1.ResourceCPU: "default", 848 }, 849 Count: ptr.To[int32](1), 850 }, 851 kueue.PodSetAssignment{ 852 Name: "bf90803c", 853 Flavors: map[corev1.ResourceName]kueue.ResourceFlavorReference{ 854 corev1.ResourceCPU: "default", 855 }, 856 Count: ptr.To[int32](1), 857 }, 858 ).Obj() 859 gomega.Expect(util.SetQuotaReservation(ctx, k8sClient, createdWorkload, admission)).Should(gomega.Succeed()) 860 util.SyncAdmittedConditionForWorkloads(ctx, k8sClient, createdWorkload) 861 862 util.ExpectPodUnsuspendedWithNodeSelectors(ctx, k8sClient, pod1LookupKey, map[string]string{"kubernetes.io/arch": "arm64"}) 863 util.ExpectPodUnsuspendedWithNodeSelectors(ctx, k8sClient, pod2LookupKey, map[string]string{"kubernetes.io/arch": "arm64"}) 864 865 gomega.Expect(k8sClient.Get(ctx, wlLookupKey, createdWorkload)).To(gomega.Succeed()) 866 gomega.Expect(createdWorkload.Status.Conditions).Should(gomega.BeComparableTo( 867 []metav1.Condition{ 868 {Type: kueue.WorkloadQuotaReserved, Status: metav1.ConditionTrue}, 869 {Type: kueue.WorkloadAdmitted, Status: metav1.ConditionTrue}, 870 }, 871 wlConditionCmpOpts..., 872 )) 873 }) 874 875 ginkgo.By("checking that the pod group is not finalized if the group has failed", func() { 876 util.SetPodsPhase(ctx, k8sClient, corev1.PodFailed, pod1, pod2) 877 878 gomega.Consistently(func(g gomega.Gomega) []string { 879 g.Expect(k8sClient.Get(ctx, pod1LookupKey, createdPod)).To(gomega.Succeed()) 880 return createdPod.Finalizers 881 }, util.ConsistentDuration, util.Interval).Should(gomega.ContainElement("kueue.x-k8s.io/managed"), 882 "Pod should have finalizer") 883 884 gomega.Consistently(func(g gomega.Gomega) []string { 885 g.Expect(k8sClient.Get(ctx, pod2LookupKey, createdPod)).To(gomega.Succeed()) 886 return createdPod.Finalizers 887 }, util.ConsistentDuration, util.Interval).Should(gomega.ContainElement("kueue.x-k8s.io/managed"), 888 "Pod should have finalizer") 889 }) 890 891 // Create replacement pod with 'retriable-in-group' = false annotation 892 replacementPod2 := testingpod.MakePod("replacement-test-pod2", ns.Name). 893 Group("test-group"). 894 GroupTotalCount("2"). 895 Image("test-image", nil). 896 Annotation("kueue.x-k8s.io/retriable-in-group", "false"). 897 Queue("test-queue"). 898 Obj() 899 gomega.Expect(k8sClient.Create(ctx, replacementPod2)).Should(gomega.Succeed()) 900 replacementPod2LookupKey := client.ObjectKeyFromObject(replacementPod2) 901 902 ginkgo.By("checking that unretriable replacement pod is allowed to run", func() { 903 util.ExpectPodUnsuspendedWithNodeSelectors(ctx, k8sClient, replacementPod2LookupKey, map[string]string{"kubernetes.io/arch": "arm64"}) 904 }) 905 906 ginkgo.By("checking that the replaced pod is finalized", func() { 907 util.ExpectPodsFinalized(ctx, k8sClient, pod1LookupKey) 908 }) 909 910 ginkgo.By("checking that pod group is finalized when unretriable pod has failed", func() { 911 util.SetPodsPhase(ctx, k8sClient, corev1.PodFailed, replacementPod2) 912 913 gomega.Eventually(func() []metav1.Condition { 914 err := k8sClient.Get(ctx, wlLookupKey, createdWorkload) 915 if err != nil { 916 return nil 917 } 918 return createdWorkload.Status.Conditions 919 }, util.Timeout, util.Interval).Should(gomega.ContainElement( 920 gomega.BeComparableTo( 921 metav1.Condition{Type: kueue.WorkloadFinished, Status: metav1.ConditionTrue}, 922 wlConditionCmpOpts..., 923 ), 924 ), "Expected 'Finished' workload condition") 925 926 util.ExpectPodsFinalized(ctx, k8sClient, pod2LookupKey, replacementPod2LookupKey) 927 }) 928 }) 929 930 ginkgo.It("Should finalize and delete excess pods", func() { 931 ginkgo.By("Creating pods with queue name") 932 pod1 := testingpod.MakePod("test-pod1", ns.Name). 933 Group("test-group"). 934 GroupTotalCount("2"). 935 Queue("test-queue"). 936 Obj() 937 pod2 := testingpod.MakePod("test-pod2", ns.Name). 938 Group("test-group"). 939 GroupTotalCount("2"). 940 Request(corev1.ResourceCPU, "1"). 941 Queue("test-queue"). 942 Obj() 943 excessBasePod := testingpod.MakePod("excess-pod", ns.Name). 944 Group("test-group"). 945 GroupTotalCount("2"). 946 Request(corev1.ResourceCPU, "1"). 947 Queue("test-queue") 948 949 pod1LookupKey := client.ObjectKeyFromObject(pod1) 950 pod2LookupKey := client.ObjectKeyFromObject(pod2) 951 excessPodLookupKey := client.ObjectKeyFromObject(excessBasePod.Obj()) 952 953 gomega.Expect(k8sClient.Create(ctx, pod1)).Should(gomega.Succeed()) 954 gomega.Expect(k8sClient.Create(ctx, pod2)).Should(gomega.Succeed()) 955 956 ginkgo.By("checking that workload is created") 957 wlLookupKey := types.NamespacedName{ 958 Namespace: pod1.Namespace, 959 Name: "test-group", 960 } 961 createdWorkload := &kueue.Workload{} 962 gomega.Eventually(func() error { 963 return k8sClient.Get(ctx, wlLookupKey, createdWorkload) 964 }, util.Timeout, util.Interval).Should(gomega.Succeed()) 965 966 gomega.Expect(createdWorkload.Spec.PodSets).To(gomega.HaveLen(2)) 967 gomega.Expect(createdWorkload.Spec.PodSets[0].Count).To(gomega.Equal(int32(1))) 968 gomega.Expect(createdWorkload.Spec.PodSets[1].Count).To(gomega.Equal(int32(1))) 969 gomega.Expect(createdWorkload.Spec.QueueName).To(gomega.Equal("test-queue"), "The Workload should have .spec.queueName set") 970 971 createdPod := &corev1.Pod{} 972 ginkgo.By("checking that excess pod is deleted before admission", func() { 973 // Make sure that at least a second passes between 974 // creation of pods to avoid flaky behavior. 975 time.Sleep(time.Second * 1) 976 977 excessPod := excessBasePod.Clone().Obj() 978 gomega.Expect(k8sClient.Create(ctx, excessPod)).Should(gomega.Succeed()) 979 980 gomega.Eventually(func() error { 981 return k8sClient.Get(ctx, excessPodLookupKey, createdPod) 982 }, util.Timeout, util.Interval).Should(testing.BeNotFoundError()) 983 }) 984 985 ginkgo.By("checking that all pods in group are unsuspended when workload is admitted", func() { 986 admission := testing.MakeAdmission(clusterQueue.Name).PodSets( 987 kueue.PodSetAssignment{ 988 Name: "4b0469f7", 989 Flavors: map[corev1.ResourceName]kueue.ResourceFlavorReference{ 990 corev1.ResourceCPU: "default", 991 }, 992 Count: ptr.To[int32](1), 993 }, 994 kueue.PodSetAssignment{ 995 Name: "bf90803c", 996 Flavors: map[corev1.ResourceName]kueue.ResourceFlavorReference{ 997 corev1.ResourceCPU: "default", 998 }, 999 Count: ptr.To[int32](1), 1000 }, 1001 ).Obj() 1002 gomega.Expect(util.SetQuotaReservation(ctx, k8sClient, createdWorkload, admission)).Should(gomega.Succeed()) 1003 util.SyncAdmittedConditionForWorkloads(ctx, k8sClient, createdWorkload) 1004 1005 util.ExpectPodUnsuspendedWithNodeSelectors(ctx, k8sClient, pod1LookupKey, map[string]string{"kubernetes.io/arch": "arm64"}) 1006 util.ExpectPodUnsuspendedWithNodeSelectors(ctx, k8sClient, pod2LookupKey, map[string]string{"kubernetes.io/arch": "arm64"}) 1007 1008 gomega.Expect(k8sClient.Get(ctx, wlLookupKey, createdWorkload)).To(gomega.Succeed()) 1009 gomega.Expect(createdWorkload.Status.Conditions).Should(gomega.BeComparableTo( 1010 []metav1.Condition{ 1011 {Type: kueue.WorkloadQuotaReserved, Status: metav1.ConditionTrue}, 1012 {Type: kueue.WorkloadAdmitted, Status: metav1.ConditionTrue}, 1013 }, 1014 wlConditionCmpOpts..., 1015 )) 1016 }) 1017 1018 ginkgo.By("checking that excess pod is deleted after admission", func() { 1019 excessPod := excessBasePod.Clone().Obj() 1020 gomega.Expect(k8sClient.Create(ctx, excessPod)).Should(gomega.Succeed()) 1021 1022 gomega.Eventually(func() error { 1023 return k8sClient.Get(ctx, excessPodLookupKey, createdPod) 1024 }, util.Timeout, util.Interval).Should(testing.BeNotFoundError()) 1025 }) 1026 }) 1027 1028 ginkgo.It("Should finalize all Succeeded Pods when deleted", func() { 1029 ginkgo.By("Creating pods with queue name") 1030 // Use a number of Pods big enough to cause conflicts when removing finalizers >50% of the time. 1031 const podCount = 7 1032 pods := make([]*corev1.Pod, podCount) 1033 for i := range pods { 1034 pods[i] = testingpod.MakePod(fmt.Sprintf("test-pod-%d", i), ns.Name). 1035 Group("test-group"). 1036 GroupTotalCount(strconv.Itoa(podCount)). 1037 Request(corev1.ResourceCPU, "1"). 1038 Queue("test-queue"). 1039 Obj() 1040 gomega.Expect(k8sClient.Create(ctx, pods[i])).Should(gomega.Succeed()) 1041 } 1042 1043 ginkgo.By("checking that workload is created for the pod group") 1044 wlLookupKey := types.NamespacedName{ 1045 Namespace: pods[0].Namespace, 1046 Name: "test-group", 1047 } 1048 createdWorkload := &kueue.Workload{} 1049 gomega.Eventually(func() error { 1050 return k8sClient.Get(ctx, wlLookupKey, createdWorkload) 1051 }, util.Timeout, util.Interval).Should(gomega.Succeed()) 1052 1053 ginkgo.By("Admitting workload", func() { 1054 admission := testing.MakeAdmission(clusterQueue.Name).PodSets( 1055 kueue.PodSetAssignment{ 1056 Name: "4b0469f7", 1057 Flavors: map[corev1.ResourceName]kueue.ResourceFlavorReference{ 1058 corev1.ResourceCPU: "default", 1059 }, 1060 Count: ptr.To[int32](podCount), 1061 }, 1062 ).Obj() 1063 gomega.Expect(util.SetQuotaReservation(ctx, k8sClient, createdWorkload, admission)).Should(gomega.Succeed()) 1064 util.SyncAdmittedConditionForWorkloads(ctx, k8sClient, createdWorkload) 1065 1066 for i := range pods { 1067 util.ExpectPodUnsuspendedWithNodeSelectors(ctx, k8sClient, client.ObjectKeyFromObject(pods[i]), map[string]string{"kubernetes.io/arch": "arm64"}) 1068 } 1069 }) 1070 1071 ginkgo.By("Finishing and deleting Pods", func() { 1072 util.SetPodsPhase(ctx, k8sClient, corev1.PodSucceeded, pods...) 1073 for i := range pods { 1074 gomega.Expect(k8sClient.Delete(ctx, pods[i])).To(gomega.Succeed()) 1075 } 1076 1077 gomega.Eventually(func(g gomega.Gomega) { 1078 for i := range pods { 1079 key := types.NamespacedName{Namespace: ns.Name, Name: fmt.Sprintf("test-pod-%d", i)} 1080 g.Expect(k8sClient.Get(ctx, key, &corev1.Pod{})).To(testing.BeNotFoundError()) 1081 } 1082 }, util.Timeout, util.Interval).Should(gomega.Succeed()) 1083 }) 1084 1085 }) 1086 1087 ginkgo.It("Should finalize workload if pods are absent", func() { 1088 ginkgo.By("Creating pods with queue name") 1089 pod1 := testingpod.MakePod("test-pod1", ns.Name). 1090 Group("test-group"). 1091 GroupTotalCount("2"). 1092 Request(corev1.ResourceCPU, "1"). 1093 Queue("test-queue"). 1094 Obj() 1095 pod2 := testingpod.MakePod("test-pod2", ns.Name). 1096 Group("test-group"). 1097 GroupTotalCount("2"). 1098 Request(corev1.ResourceCPU, "2"). 1099 Queue("test-queue"). 1100 Obj() 1101 1102 gomega.Expect(k8sClient.Create(ctx, pod1)).Should(gomega.Succeed()) 1103 gomega.Expect(k8sClient.Create(ctx, pod2)).Should(gomega.Succeed()) 1104 1105 ginkgo.By("checking that workload is created for the pod group with the queue name") 1106 wlLookupKey := types.NamespacedName{ 1107 Namespace: pod1.Namespace, 1108 Name: "test-group", 1109 } 1110 createdWorkload := &kueue.Workload{} 1111 gomega.Eventually(func() error { 1112 return k8sClient.Get(ctx, wlLookupKey, createdWorkload) 1113 }, util.Timeout, util.Interval).Should(gomega.Succeed()) 1114 1115 gomega.Expect(createdWorkload.Spec.PodSets).To(gomega.HaveLen(2)) 1116 gomega.Expect(createdWorkload.Spec.PodSets[0].Count).To(gomega.Equal(int32(1))) 1117 gomega.Expect(createdWorkload.Spec.PodSets[1].Count).To(gomega.Equal(int32(1))) 1118 gomega.Expect(createdWorkload.Spec.QueueName).To(gomega.Equal("test-queue"), "The Workload should have .spec.queueName set") 1119 gomega.Expect(createdWorkload.ObjectMeta.Finalizers).To(gomega.ContainElement("kueue.x-k8s.io/resource-in-use"), 1120 "The Workload should have the finalizer") 1121 1122 ginkgo.By("checking that workload is finalized when all pods in the group are deleted", func() { 1123 createdPod := corev1.Pod{} 1124 gomega.Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(pod1), &createdPod)).To(gomega.Succeed()) 1125 controllerutil.RemoveFinalizer(&createdPod, "kueue.x-k8s.io/managed") 1126 gomega.Expect(k8sClient.Update(ctx, &createdPod)).To(gomega.Succeed()) 1127 gomega.Expect(k8sClient.Delete(ctx, &createdPod)).To(gomega.Succeed()) 1128 1129 gomega.Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(pod2), &createdPod)).To(gomega.Succeed()) 1130 controllerutil.RemoveFinalizer(&createdPod, "kueue.x-k8s.io/managed") 1131 gomega.Expect(k8sClient.Update(ctx, &createdPod)).To(gomega.Succeed()) 1132 gomega.Expect(k8sClient.Delete(ctx, &createdPod)).To(gomega.Succeed()) 1133 1134 createdWorkload := &kueue.Workload{} 1135 gomega.Eventually(func(g gomega.Gomega) []string { 1136 g.Expect(k8sClient.Get(ctx, wlLookupKey, createdWorkload)).To(gomega.Succeed()) 1137 return createdWorkload.Finalizers 1138 }, util.Timeout, util.Interval).Should(gomega.BeEmpty(), "Expected workload to be finalized") 1139 }) 1140 }) 1141 }) 1142 }) 1143 }) 1144 1145 var _ = ginkgo.Describe("Pod controller interacting with scheduler", ginkgo.Ordered, ginkgo.ContinueOnFailure, func() { 1146 var ( 1147 ns *corev1.Namespace 1148 spotUntaintedFlavor *kueue.ResourceFlavor 1149 clusterQueue *kueue.ClusterQueue 1150 localQueue *kueue.LocalQueue 1151 ) 1152 1153 ginkgo.BeforeAll(func() { 1154 fwk = &framework.Framework{ 1155 CRDPath: crdPath, 1156 WebhookPath: webhookPath, 1157 } 1158 cfg = fwk.Init() 1159 ctx, k8sClient = fwk.RunManager(cfg, managerAndSchedulerSetup( 1160 jobframework.WithManageJobsWithoutQueueName(false), 1161 jobframework.WithIntegrationOptions(corev1.SchemeGroupVersion.WithKind("Pod").String(), &configapi.PodIntegrationOptions{ 1162 PodSelector: &metav1.LabelSelector{}, 1163 NamespaceSelector: &metav1.LabelSelector{ 1164 MatchExpressions: []metav1.LabelSelectorRequirement{ 1165 { 1166 Key: "kubernetes.io/metadata.name", 1167 Operator: metav1.LabelSelectorOpNotIn, 1168 Values: []string{"kube-system", "kueue-system"}, 1169 }, 1170 }, 1171 }, 1172 }), 1173 )) 1174 spotUntaintedFlavor = testing.MakeResourceFlavor("spot-untainted").Label(instanceKey, "spot-untainted").Obj() 1175 gomega.Expect(k8sClient.Create(ctx, spotUntaintedFlavor)).Should(gomega.Succeed()) 1176 1177 clusterQueue = testing.MakeClusterQueue("dev-clusterqueue"). 1178 ResourceGroup( 1179 *testing.MakeFlavorQuotas("spot-untainted").Resource(corev1.ResourceCPU, "5").Obj(), 1180 ).Obj() 1181 gomega.Expect(k8sClient.Create(ctx, clusterQueue)).Should(gomega.Succeed()) 1182 }) 1183 ginkgo.AfterAll(func() { 1184 util.ExpectClusterQueueToBeDeleted(ctx, k8sClient, clusterQueue, true) 1185 util.ExpectResourceFlavorToBeDeleted(ctx, k8sClient, spotUntaintedFlavor, true) 1186 fwk.Teardown() 1187 }) 1188 1189 ginkgo.BeforeEach(func() { 1190 ns = &corev1.Namespace{ 1191 ObjectMeta: metav1.ObjectMeta{ 1192 GenerateName: "pod-sched-namespace-", 1193 }, 1194 } 1195 gomega.Expect(k8sClient.Create(ctx, ns)).To(gomega.Succeed()) 1196 1197 }) 1198 ginkgo.AfterEach(func() { 1199 gomega.Expect(util.DeleteNamespace(ctx, k8sClient, ns)).To(gomega.Succeed()) 1200 1201 gomega.Eventually(func(g gomega.Gomega) []kueue.Workload { 1202 var workloads kueue.WorkloadList 1203 g.Expect(k8sClient.List(ctx, &workloads, client.InNamespace(ns.Name))).To(gomega.Succeed()) 1204 return workloads.Items 1205 }, util.Timeout, util.Interval).Should( 1206 gomega.BeEmpty(), 1207 "All workloads have to be finalized and deleted before the next test starts", 1208 ) 1209 }) 1210 1211 ginkgo.It("Should schedule pods as they fit in their ClusterQueue", func() { 1212 ginkgo.By("creating localQueue") 1213 localQueue = testing.MakeLocalQueue("local-queue", ns.Name).ClusterQueue(clusterQueue.Name).Obj() 1214 gomega.Expect(k8sClient.Create(ctx, localQueue)).Should(gomega.Succeed()) 1215 1216 ginkgo.By("checking if dev pod starts") 1217 pod := testingpod.MakePod("dev-pod", ns.Name).Queue(localQueue.Name). 1218 Request(corev1.ResourceCPU, "2"). 1219 Obj() 1220 gomega.Expect(k8sClient.Create(ctx, pod)).Should(gomega.Succeed()) 1221 createdPod := &corev1.Pod{} 1222 gomega.Eventually(func(g gomega.Gomega) []corev1.PodSchedulingGate { 1223 g.Expect(k8sClient.Get(ctx, types.NamespacedName{Name: pod.Name, Namespace: pod.Namespace}, createdPod)). 1224 To(gomega.Succeed()) 1225 return createdPod.Spec.SchedulingGates 1226 }, util.Timeout, util.Interval).Should(gomega.BeEmpty()) 1227 gomega.Expect(createdPod.Spec.NodeSelector[instanceKey]).Should(gomega.Equal(spotUntaintedFlavor.Name)) 1228 util.ExpectPendingWorkloadsMetric(clusterQueue, 0, 0) 1229 util.ExpectReservingActiveWorkloadsMetric(clusterQueue, 1) 1230 }) 1231 1232 ginkgo.It("Should schedule pod groups as they fit in their ClusterQueue", func() { 1233 ginkgo.By("creating localQueue") 1234 localQueue = testing.MakeLocalQueue("local-queue", ns.Name).ClusterQueue(clusterQueue.Name).Obj() 1235 gomega.Expect(k8sClient.Create(ctx, localQueue)).Should(gomega.Succeed()) 1236 1237 basePod := testingpod.MakePod("pod", ns.Name). 1238 Group("dev-pods"). 1239 GroupTotalCount("4"). 1240 Queue(localQueue.Name). 1241 Request(corev1.ResourceCPU, "1") 1242 1243 role1Pod1 := basePod. 1244 Clone(). 1245 Name("role1-pod1"). 1246 Obj() 1247 role1Pod2 := basePod. 1248 Clone(). 1249 Name("role1-pod2"). 1250 Obj() 1251 role2Pod1 := basePod. 1252 Clone(). 1253 Name("role2-pod1"). 1254 Request(corev1.ResourceCPU, "1.5"). 1255 Obj() 1256 role2Pod2 := basePod. 1257 Clone(). 1258 Name("role2-pod2"). 1259 Request(corev1.ResourceCPU, "1.5"). 1260 Obj() 1261 1262 ginkgo.By("creating the pods", func() { 1263 gomega.Expect(k8sClient.Create(ctx, role1Pod1)).Should(gomega.Succeed()) 1264 gomega.Expect(k8sClient.Create(ctx, role1Pod2)).Should(gomega.Succeed()) 1265 gomega.Expect(k8sClient.Create(ctx, role2Pod1)).Should(gomega.Succeed()) 1266 gomega.Expect(k8sClient.Create(ctx, role2Pod2)).Should(gomega.Succeed()) 1267 }) 1268 1269 // the composed workload is created 1270 wlKey := types.NamespacedName{ 1271 Namespace: role1Pod1.Namespace, 1272 Name: "dev-pods", 1273 } 1274 wl := &kueue.Workload{} 1275 1276 ginkgo.By("checking the composed workload is created", func() { 1277 gomega.Eventually(func(g gomega.Gomega) { 1278 g.Expect(k8sClient.Get(ctx, wlKey, wl)).Should(gomega.Succeed()) 1279 }, util.Timeout, util.Interval).Should(gomega.Succeed()) 1280 }) 1281 1282 createdPod := &corev1.Pod{} 1283 ginkgo.By("check the pods are unsuspended", func() { 1284 gomega.Eventually(func(g gomega.Gomega) []corev1.PodSchedulingGate { 1285 g.Expect(k8sClient.Get(ctx, types.NamespacedName{Name: role1Pod1.Name, Namespace: role1Pod1.Namespace}, createdPod)). 1286 To(gomega.Succeed()) 1287 return createdPod.Spec.SchedulingGates 1288 }, util.Timeout, util.Interval).Should(gomega.BeEmpty()) 1289 gomega.Expect(createdPod.Spec.NodeSelector[instanceKey]).Should(gomega.Equal(spotUntaintedFlavor.Name)) 1290 gomega.Eventually(func(g gomega.Gomega) []corev1.PodSchedulingGate { 1291 g.Expect(k8sClient.Get(ctx, types.NamespacedName{Name: role1Pod2.Name, Namespace: role1Pod2.Namespace}, createdPod)). 1292 To(gomega.Succeed()) 1293 return createdPod.Spec.SchedulingGates 1294 }, util.Timeout, util.Interval).Should(gomega.BeEmpty()) 1295 gomega.Expect(createdPod.Spec.NodeSelector[instanceKey]).Should(gomega.Equal(spotUntaintedFlavor.Name)) 1296 gomega.Eventually(func(g gomega.Gomega) []corev1.PodSchedulingGate { 1297 g.Expect(k8sClient.Get(ctx, types.NamespacedName{Name: role2Pod1.Name, Namespace: role2Pod1.Namespace}, createdPod)). 1298 To(gomega.Succeed()) 1299 return createdPod.Spec.SchedulingGates 1300 }, util.Timeout, util.Interval).Should(gomega.BeEmpty()) 1301 gomega.Expect(createdPod.Spec.NodeSelector[instanceKey]).Should(gomega.Equal(spotUntaintedFlavor.Name)) 1302 gomega.Eventually(func(g gomega.Gomega) []corev1.PodSchedulingGate { 1303 g.Expect(k8sClient.Get(ctx, types.NamespacedName{Name: role2Pod2.Name, Namespace: role2Pod2.Namespace}, createdPod)). 1304 To(gomega.Succeed()) 1305 return createdPod.Spec.SchedulingGates 1306 }, util.Timeout, util.Interval).Should(gomega.BeEmpty()) 1307 gomega.Expect(createdPod.Spec.NodeSelector[instanceKey]).Should(gomega.Equal(spotUntaintedFlavor.Name)) 1308 }) 1309 util.ExpectPendingWorkloadsMetric(clusterQueue, 0, 0) 1310 util.ExpectReservingActiveWorkloadsMetric(clusterQueue, 1) 1311 1312 util.SetPodsPhase(ctx, k8sClient, corev1.PodSucceeded, role1Pod1, role1Pod2, role2Pod1, role2Pod2) 1313 1314 ginkgo.By("checking pods are finalized", func() { 1315 gomega.Eventually(func(g gomega.Gomega) { 1316 util.ExpectPodsFinalized(ctx, k8sClient, 1317 client.ObjectKeyFromObject(role1Pod1), 1318 client.ObjectKeyFromObject(role1Pod2), 1319 client.ObjectKeyFromObject(role2Pod1), 1320 client.ObjectKeyFromObject(role2Pod2), 1321 ) 1322 }, util.Timeout, util.Interval).Should(gomega.Succeed()) 1323 }) 1324 }) 1325 1326 ginkgo.When("The workload's admission is removed", func() { 1327 ginkgo.It("Should not restore the original node selectors", func() { 1328 localQueue := testing.MakeLocalQueue("local-queue", ns.Name).ClusterQueue(clusterQueue.Name).Obj() 1329 pod := testingpod.MakePod("dev-pod", ns.Name).Queue(localQueue.Name). 1330 Request(corev1.ResourceCPU, "2"). 1331 Obj() 1332 lookupKey := types.NamespacedName{Name: pod.Name, Namespace: pod.Namespace} 1333 createdPod := &corev1.Pod{} 1334 1335 ginkgo.By("creating a pod", func() { 1336 gomega.Expect(k8sClient.Create(ctx, pod)).Should(gomega.Succeed()) 1337 }) 1338 1339 ginkgo.By("checking if pod is suspended", func() { 1340 gomega.Eventually(func(g gomega.Gomega) []corev1.PodSchedulingGate { 1341 g.Expect(k8sClient.Get(ctx, types.NamespacedName{Name: pod.Name, Namespace: pod.Namespace}, createdPod)). 1342 To(gomega.Succeed()) 1343 return createdPod.Spec.SchedulingGates 1344 }, util.Timeout, util.Interval).Should( 1345 gomega.ContainElement(corev1.PodSchedulingGate{Name: "kueue.x-k8s.io/admission"}), 1346 ) 1347 }) 1348 1349 // backup the node selector 1350 originalNodeSelector := createdPod.Spec.NodeSelector 1351 1352 ginkgo.By("creating a localQueue", func() { 1353 gomega.Expect(k8sClient.Create(ctx, localQueue)).Should(gomega.Succeed()) 1354 }) 1355 1356 ginkgo.By("checking if pod is unsuspended", func() { 1357 gomega.Eventually(func() []corev1.PodSchedulingGate { 1358 gomega.Expect(k8sClient.Get(ctx, lookupKey, createdPod)).Should(gomega.Succeed()) 1359 return createdPod.Spec.SchedulingGates 1360 }, util.Timeout, util.Interval).Should(gomega.BeEmpty()) 1361 }) 1362 1363 ginkgo.By("checking if the node selector is updated", func() { 1364 gomega.Eventually(func() map[string]string { 1365 gomega.Expect(k8sClient.Get(ctx, lookupKey, createdPod)).Should(gomega.Succeed()) 1366 return createdPod.Spec.NodeSelector 1367 }, util.Timeout, util.Interval).ShouldNot(gomega.Equal(originalNodeSelector)) 1368 }) 1369 1370 ginkgo.By("deleting the localQueue to prevent readmission", func() { 1371 gomega.Expect(util.DeleteLocalQueue(ctx, k8sClient, localQueue)).Should(gomega.Succeed()) 1372 }) 1373 1374 ginkgo.By("clearing the workload's admission to stop the job", func() { 1375 wl := &kueue.Workload{} 1376 wlKey := types.NamespacedName{Name: podcontroller.GetWorkloadNameForPod(pod.Name), Namespace: pod.Namespace} 1377 gomega.Expect(k8sClient.Get(ctx, wlKey, wl)).Should(gomega.Succeed()) 1378 gomega.Expect(util.SetQuotaReservation(ctx, k8sClient, wl, nil)).Should(gomega.Succeed()) 1379 util.SyncAdmittedConditionForWorkloads(ctx, k8sClient, wl) 1380 }) 1381 1382 ginkgo.By("checking if pods are deleted", func() { 1383 gomega.Eventually(func(g gomega.Gomega) error { 1384 return k8sClient.Get(ctx, lookupKey, createdPod) 1385 }, util.Timeout, util.Interval).Should(testing.BeNotFoundError()) 1386 }) 1387 }) 1388 }) 1389 })