sigs.k8s.io/kueue@v0.6.2/test/integration/controller/jobs/job/job_controller_test.go (about) 1 /* 2 Copyright 2022 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package job 18 19 import ( 20 "fmt" 21 "maps" 22 23 "github.com/google/go-cmp/cmp/cmpopts" 24 "github.com/onsi/ginkgo/v2" 25 "github.com/onsi/gomega" 26 batchv1 "k8s.io/api/batch/v1" 27 corev1 "k8s.io/api/core/v1" 28 apierrors "k8s.io/apimachinery/pkg/api/errors" 29 apimeta "k8s.io/apimachinery/pkg/api/meta" 30 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 31 "k8s.io/apimachinery/pkg/types" 32 "k8s.io/client-go/kubernetes/scheme" 33 "k8s.io/utils/ptr" 34 ctrl "sigs.k8s.io/controller-runtime" 35 "sigs.k8s.io/controller-runtime/pkg/client" 36 37 configapi "sigs.k8s.io/kueue/apis/config/v1beta1" 38 kueue "sigs.k8s.io/kueue/apis/kueue/v1beta1" 39 "sigs.k8s.io/kueue/pkg/controller/constants" 40 "sigs.k8s.io/kueue/pkg/controller/jobframework" 41 workloadjob "sigs.k8s.io/kueue/pkg/controller/jobs/job" 42 "sigs.k8s.io/kueue/pkg/util/testing" 43 testingjob "sigs.k8s.io/kueue/pkg/util/testingjobs/job" 44 "sigs.k8s.io/kueue/pkg/workload" 45 "sigs.k8s.io/kueue/test/integration/framework" 46 "sigs.k8s.io/kueue/test/util" 47 ) 48 49 const ( 50 parallelism = 4 51 jobName = "test-job" 52 instanceKey = "cloud.provider.com/instance" 53 priorityClassName = "test-priority-class" 54 priorityValue = 10 55 parentJobName = jobName + "-parent" 56 childJobName = jobName + "-child" 57 ) 58 59 var ( 60 ignoreConditionTimestamps = cmpopts.IgnoreFields(metav1.Condition{}, "LastTransitionTime") 61 ) 62 63 // +kubebuilder:docs-gen:collapse=Imports 64 65 var _ = ginkgo.Describe("Job controller", ginkgo.Ordered, ginkgo.ContinueOnFailure, func() { 66 67 ginkgo.BeforeAll(func() { 68 fwk = &framework.Framework{ 69 CRDPath: crdPath, 70 } 71 cfg = fwk.Init() 72 ctx, k8sClient = fwk.RunManager(cfg, managerSetup( 73 jobframework.WithManageJobsWithoutQueueName(true), 74 )) 75 }) 76 ginkgo.AfterAll(func() { 77 fwk.Teardown() 78 }) 79 80 var ( 81 ns *corev1.Namespace 82 wlLookupKey types.NamespacedName 83 parentWlLookupKey types.NamespacedName 84 childLookupKey types.NamespacedName 85 ) 86 87 ginkgo.BeforeEach(func() { 88 ns = &corev1.Namespace{ 89 ObjectMeta: metav1.ObjectMeta{ 90 GenerateName: "core-", 91 }, 92 } 93 gomega.Expect(k8sClient.Create(ctx, ns)).To(gomega.Succeed()) 94 wlLookupKey = types.NamespacedName{Name: workloadjob.GetWorkloadNameForJob(jobName), Namespace: ns.Name} 95 parentWlLookupKey = types.NamespacedName{Name: workloadjob.GetWorkloadNameForJob(parentJobName), Namespace: ns.Name} 96 childLookupKey = types.NamespacedName{Name: childJobName, Namespace: ns.Name} 97 }) 98 99 ginkgo.AfterEach(func() { 100 gomega.Expect(util.DeleteNamespace(ctx, k8sClient, ns)).To(gomega.Succeed()) 101 }) 102 103 ginkgo.It("Should reconcile workload and job for all jobs", func() { 104 ginkgo.By("checking the job gets suspended when created unsuspended") 105 priorityClass := testing.MakePriorityClass(priorityClassName). 106 PriorityValue(int32(priorityValue)).Obj() 107 gomega.Expect(k8sClient.Create(ctx, priorityClass)).Should(gomega.Succeed()) 108 ginkgo.DeferCleanup(func() { 109 gomega.Expect(k8sClient.Delete(ctx, priorityClass)).To(gomega.Succeed()) 110 }) 111 job := testingjob.MakeJob(jobName, ns.Name).PriorityClass(priorityClassName).Obj() 112 gomega.Expect(k8sClient.Create(ctx, job)).Should(gomega.Succeed()) 113 lookupKey := types.NamespacedName{Name: jobName, Namespace: ns.Name} 114 createdJob := &batchv1.Job{} 115 gomega.Eventually(func() bool { 116 if err := k8sClient.Get(ctx, lookupKey, createdJob); err != nil { 117 return false 118 } 119 return createdJob.Spec.Suspend != nil && *createdJob.Spec.Suspend 120 }, util.Timeout, util.Interval).Should(gomega.BeTrue()) 121 122 ginkgo.By("checking the workload is created without queue assigned") 123 createdWorkload := &kueue.Workload{} 124 gomega.Eventually(func() error { 125 return k8sClient.Get(ctx, wlLookupKey, createdWorkload) 126 }, util.Timeout, util.Interval).Should(gomega.Succeed()) 127 gomega.Expect(createdWorkload.Spec.QueueName).Should(gomega.Equal(""), "The Workload shouldn't have .spec.queueName set") 128 gomega.Expect(metav1.IsControlledBy(createdWorkload, job)).To(gomega.BeTrue(), "The Workload should be owned by the Job") 129 130 createdTime := createdWorkload.CreationTimestamp 131 132 ginkgo.By("checking the workload is created with priority and priorityName") 133 gomega.Expect(createdWorkload.Spec.PriorityClassName).Should(gomega.Equal(priorityClassName)) 134 gomega.Expect(*createdWorkload.Spec.Priority).Should(gomega.Equal(int32(priorityValue))) 135 136 ginkgo.By("checking the workload is updated with queue name when the job does") 137 jobQueueName := "test-queue" 138 createdJob.Annotations = map[string]string{constants.QueueAnnotation: jobQueueName} 139 gomega.Expect(k8sClient.Update(ctx, createdJob)).Should(gomega.Succeed()) 140 gomega.Eventually(func() bool { 141 if err := k8sClient.Get(ctx, wlLookupKey, createdWorkload); err != nil { 142 return false 143 } 144 return createdWorkload.Spec.QueueName == jobQueueName 145 }, util.Timeout, util.Interval).Should(gomega.BeTrue()) 146 147 ginkgo.By("updated workload should have the same created timestamp", func() { 148 gomega.Expect(createdWorkload.CreationTimestamp).Should(gomega.Equal(createdTime)) 149 }) 150 151 ginkgo.By("checking a second non-matching workload is deleted") 152 secondWl := &kueue.Workload{ 153 ObjectMeta: metav1.ObjectMeta{ 154 Name: workloadjob.GetWorkloadNameForJob("second-workload"), 155 Namespace: createdWorkload.Namespace, 156 }, 157 Spec: *createdWorkload.Spec.DeepCopy(), 158 } 159 gomega.Expect(ctrl.SetControllerReference(createdJob, secondWl, scheme.Scheme)).Should(gomega.Succeed()) 160 secondWl.Spec.PodSets[0].Count += 1 161 gomega.Expect(k8sClient.Create(ctx, secondWl)).Should(gomega.Succeed()) 162 gomega.Eventually(func() error { 163 wl := &kueue.Workload{} 164 key := types.NamespacedName{Name: secondWl.Name, Namespace: secondWl.Namespace} 165 return k8sClient.Get(ctx, key, wl) 166 }, util.Timeout, util.Interval).Should(testing.BeNotFoundError()) 167 // check the original wl is still there 168 gomega.Consistently(func() error { 169 return k8sClient.Get(ctx, wlLookupKey, createdWorkload) 170 }, util.ConsistentDuration, util.Interval).Should(gomega.Succeed()) 171 gomega.Eventually(func() bool { 172 ok, _ := testing.CheckLatestEvent(ctx, k8sClient, "DeletedWorkload", corev1.EventTypeNormal, fmt.Sprintf("Deleted not matching Workload: %v", workload.Key(secondWl))) 173 return ok 174 }, util.Timeout, util.Interval).Should(gomega.BeTrue()) 175 176 ginkgo.By("checking the job is unsuspended when workload is assigned") 177 onDemandFlavor := testing.MakeResourceFlavor("on-demand").Label(instanceKey, "on-demand").Obj() 178 gomega.Expect(k8sClient.Create(ctx, onDemandFlavor)).Should(gomega.Succeed()) 179 spotFlavor := testing.MakeResourceFlavor("spot").Label(instanceKey, "spot").Obj() 180 gomega.Expect(k8sClient.Create(ctx, spotFlavor)).Should(gomega.Succeed()) 181 clusterQueue := testing.MakeClusterQueue("cluster-queue"). 182 ResourceGroup( 183 *testing.MakeFlavorQuotas("on-demand").Resource(corev1.ResourceCPU, "5").Obj(), 184 *testing.MakeFlavorQuotas("spot").Resource(corev1.ResourceCPU, "5").Obj(), 185 ).Obj() 186 admission := testing.MakeAdmission(clusterQueue.Name). 187 Assignment(corev1.ResourceCPU, "on-demand", "1m"). 188 AssignmentPodCount(createdWorkload.Spec.PodSets[0].Count). 189 Obj() 190 gomega.Expect(util.SetQuotaReservation(ctx, k8sClient, createdWorkload, admission)).Should(gomega.Succeed()) 191 util.SyncAdmittedConditionForWorkloads(ctx, k8sClient, createdWorkload) 192 gomega.Eventually(func() bool { 193 if err := k8sClient.Get(ctx, lookupKey, createdJob); err != nil { 194 return false 195 } 196 return !*createdJob.Spec.Suspend 197 }, util.Timeout, util.Interval).Should(gomega.BeTrue()) 198 gomega.Eventually(func() bool { 199 ok, _ := testing.CheckLatestEvent(ctx, k8sClient, "Started", corev1.EventTypeNormal, fmt.Sprintf("Admitted by clusterQueue %v", clusterQueue.Name)) 200 return ok 201 }, util.Timeout, util.Interval).Should(gomega.BeTrue()) 202 gomega.Expect(len(createdJob.Spec.Template.Spec.NodeSelector)).Should(gomega.Equal(1)) 203 gomega.Expect(createdJob.Spec.Template.Spec.NodeSelector[instanceKey]).Should(gomega.Equal(onDemandFlavor.Name)) 204 gomega.Consistently(func() bool { 205 if err := k8sClient.Get(ctx, wlLookupKey, createdWorkload); err != nil { 206 return false 207 } 208 return len(createdWorkload.Status.Conditions) == 2 209 }, util.ConsistentDuration, util.Interval).Should(gomega.BeTrue()) 210 211 // We need to set startTime to the job since the kube-controller-manager doesn't exist in envtest. 212 ginkgo.By("setting startTime to the job") 213 now := metav1.Now() 214 createdJob.Status.StartTime = &now 215 gomega.Expect(k8sClient.Status().Update(ctx, createdJob)).Should(gomega.Succeed()) 216 217 ginkgo.By("checking the job gets suspended when parallelism changes and the added node selectors are removed") 218 newParallelism := int32(parallelism + 1) 219 createdJob.Spec.Parallelism = &newParallelism 220 gomega.Expect(k8sClient.Update(ctx, createdJob)).Should(gomega.Succeed()) 221 gomega.Eventually(func() bool { 222 if err := k8sClient.Get(ctx, lookupKey, createdJob); err != nil { 223 return false 224 } 225 return createdJob.Spec.Suspend != nil && *createdJob.Spec.Suspend && createdJob.Status.StartTime == nil && 226 len(createdJob.Spec.Template.Spec.NodeSelector) == 0 227 }, util.Timeout, util.Interval).Should(gomega.BeTrue()) 228 gomega.Eventually(func() bool { 229 ok, _ := testing.CheckLatestEvent(ctx, k8sClient, "DeletedWorkload", corev1.EventTypeNormal, fmt.Sprintf("Deleted not matching Workload: %v", wlLookupKey.String())) 230 return ok 231 }, util.Timeout, util.Interval).Should(gomega.BeTrue()) 232 233 ginkgo.By("checking the workload is updated with new count") 234 gomega.Eventually(func() bool { 235 if err := k8sClient.Get(ctx, wlLookupKey, createdWorkload); err != nil { 236 return false 237 } 238 return createdWorkload.Spec.PodSets[0].Count == newParallelism 239 }, util.Timeout, util.Interval).Should(gomega.BeTrue()) 240 gomega.Expect(createdWorkload.Status.Admission).Should(gomega.BeNil()) 241 242 ginkgo.By("checking the job is unsuspended and selectors added when workload is assigned again") 243 admission = testing.MakeAdmission(clusterQueue.Name). 244 Assignment(corev1.ResourceCPU, "spot", "1m"). 245 AssignmentPodCount(createdWorkload.Spec.PodSets[0].Count). 246 Obj() 247 gomega.Expect(util.SetQuotaReservation(ctx, k8sClient, createdWorkload, admission)).Should(gomega.Succeed()) 248 util.SyncAdmittedConditionForWorkloads(ctx, k8sClient, createdWorkload) 249 gomega.Eventually(func() bool { 250 if err := k8sClient.Get(ctx, lookupKey, createdJob); err != nil { 251 return false 252 } 253 return !*createdJob.Spec.Suspend 254 }, util.Timeout, util.Interval).Should(gomega.BeTrue()) 255 gomega.Expect(len(createdJob.Spec.Template.Spec.NodeSelector)).Should(gomega.Equal(1)) 256 gomega.Expect(createdJob.Spec.Template.Spec.NodeSelector[instanceKey]).Should(gomega.Equal(spotFlavor.Name)) 257 gomega.Consistently(func() bool { 258 if err := k8sClient.Get(ctx, wlLookupKey, createdWorkload); err != nil { 259 return false 260 } 261 return len(createdWorkload.Status.Conditions) == 2 262 }, util.ConsistentDuration, util.Interval).Should(gomega.BeTrue()) 263 264 ginkgo.By("checking the workload is finished when job is completed") 265 createdJob.Status.Conditions = append(createdJob.Status.Conditions, 266 batchv1.JobCondition{ 267 Type: batchv1.JobComplete, 268 Status: corev1.ConditionTrue, 269 LastProbeTime: metav1.Now(), 270 LastTransitionTime: metav1.Now(), 271 }) 272 gomega.Expect(k8sClient.Status().Update(ctx, createdJob)).Should(gomega.Succeed()) 273 gomega.Eventually(func() bool { 274 err := k8sClient.Get(ctx, wlLookupKey, createdWorkload) 275 if err != nil || len(createdWorkload.Status.Conditions) == 1 { 276 return false 277 } 278 279 return apimeta.IsStatusConditionTrue(createdWorkload.Status.Conditions, kueue.WorkloadFinished) 280 }, util.Timeout, util.Interval).Should(gomega.BeTrue()) 281 }) 282 283 ginkgo.It("Should reconcile job when queueName set by annotation (deprecated)", func() { 284 ginkgo.By("checking the workload is created with correct queue name assigned") 285 jobQueueName := "test-queue" 286 job := testingjob.MakeJob(jobName, ns.Name).QueueNameAnnotation("test-queue").Obj() 287 gomega.Expect(k8sClient.Create(ctx, job)).Should(gomega.Succeed()) 288 createdWorkload := &kueue.Workload{} 289 gomega.Eventually(func() error { 290 return k8sClient.Get(ctx, wlLookupKey, createdWorkload) 291 }, util.Timeout, util.Interval).Should(gomega.Succeed()) 292 gomega.Expect(createdWorkload.Spec.QueueName).Should(gomega.Equal(jobQueueName)) 293 }) 294 295 ginkgo.When("The parent-workload annotation is used", func() { 296 297 ginkgo.It("Should suspend a job if the parent workload does not exist", func() { 298 ginkgo.By("Creating the child job which uses the parent workload annotation") 299 childJob := testingjob.MakeJob(childJobName, ns.Name).Suspend(false).ParentWorkload("non-existing-parent-workload").Obj() 300 gomega.Expect(k8sClient.Create(ctx, childJob)).Should(gomega.Succeed()) 301 302 ginkgo.By("checking that the child job is suspended") 303 gomega.Eventually(func() *bool { 304 gomega.Expect(k8sClient.Get(ctx, childLookupKey, childJob)).Should(gomega.Succeed()) 305 return childJob.Spec.Suspend 306 }, util.Timeout, util.Interval).Should(gomega.Equal(ptr.To(true))) 307 }) 308 309 ginkgo.It("Should not create child workload for a job with parent-workload annotation", func() { 310 ginkgo.By("creating the parent job") 311 parentJob := testingjob.MakeJob(parentJobName, ns.Name).Obj() 312 gomega.Expect(k8sClient.Create(ctx, parentJob)).Should(gomega.Succeed()) 313 314 ginkgo.By("waiting for the parent workload to be created") 315 parentWorkload := &kueue.Workload{} 316 gomega.Eventually(func() error { 317 return k8sClient.Get(ctx, parentWlLookupKey, parentWorkload) 318 }, util.Timeout, util.Interval).Should(gomega.Succeed()) 319 320 ginkgo.By("Creating the child job which uses the parent workload annotation") 321 childJob := testingjob.MakeJob(childJobName, ns.Name).ParentWorkload(parentWorkload.Name).Obj() 322 gomega.Expect(k8sClient.Create(ctx, childJob)).Should(gomega.Succeed()) 323 324 ginkgo.By("Checking that the child workload is not created") 325 childWorkload := &kueue.Workload{} 326 childWlLookupKey := types.NamespacedName{Name: workloadjob.GetWorkloadNameForJob(childJobName), Namespace: ns.Name} 327 gomega.Consistently(func() bool { 328 return apierrors.IsNotFound(k8sClient.Get(ctx, childWlLookupKey, childWorkload)) 329 }, util.ConsistentDuration, util.Interval).Should(gomega.BeTrue()) 330 }) 331 332 ginkgo.It("Should not update the queue name of the workload with an empty value that the child job has", func() { 333 jobQueueName := "test-queue" 334 335 ginkgo.By("creating the parent job with queue name") 336 parentJob := testingjob.MakeJob(parentJobName, ns.Name).Queue(jobQueueName).Obj() 337 gomega.Expect(k8sClient.Create(ctx, parentJob)).Should(gomega.Succeed()) 338 339 ginkgo.By("waiting for the parent workload to be created") 340 parentWorkload := &kueue.Workload{} 341 gomega.Eventually(func() error { 342 return k8sClient.Get(ctx, parentWlLookupKey, parentWorkload) 343 }, util.Timeout, util.Interval).Should(gomega.Succeed()) 344 345 ginkgo.By("Creating the child job which uses the parent workload annotation") 346 childJob := testingjob.MakeJob(childJobName, ns.Name).ParentWorkload(parentWorkload.Name).Obj() 347 gomega.Expect(k8sClient.Create(ctx, childJob)).Should(gomega.Succeed()) 348 349 ginkgo.By("Checking that the queue name of the parent workload isn't updated with an empty value") 350 parentWorkload = &kueue.Workload{} 351 gomega.Consistently(func() bool { 352 if err := k8sClient.Get(ctx, parentWlLookupKey, parentWorkload); err != nil { 353 return true 354 } 355 return parentWorkload.Spec.QueueName == jobQueueName 356 }, util.ConsistentDuration, util.Interval).Should(gomega.BeTrue()) 357 }) 358 359 ginkgo.It("Should change the suspension status of the child job when the parent workload is not admitted", func() { 360 ginkgo.By("Create a resource flavor") 361 defaultFlavor := testing.MakeResourceFlavor("default").Label(instanceKey, "default").Obj() 362 gomega.Expect(k8sClient.Create(ctx, defaultFlavor)).Should(gomega.Succeed()) 363 364 ginkgo.By("creating the parent job") 365 parentJob := testingjob.MakeJob(parentJobName, ns.Name).Obj() 366 gomega.Expect(k8sClient.Create(ctx, parentJob)).Should(gomega.Succeed()) 367 368 ginkgo.By("waiting for the parent workload to be created") 369 parentWorkload := &kueue.Workload{} 370 gomega.Eventually(func() error { 371 return k8sClient.Get(ctx, parentWlLookupKey, parentWorkload) 372 }, util.Timeout, util.Interval).Should(gomega.Succeed()) 373 374 ginkgo.By("Creating the child job with the parent-workload annotation") 375 childJob := testingjob.MakeJob(childJobName, ns.Name).ParentWorkload(parentWlLookupKey.Name).Suspend(false).Obj() 376 gomega.Expect(k8sClient.Create(ctx, childJob)).Should(gomega.Succeed()) 377 378 ginkgo.By("checking that the child job is suspended") 379 gomega.Eventually(func() *bool { 380 gomega.Expect(k8sClient.Get(ctx, childLookupKey, childJob)).Should(gomega.Succeed()) 381 return childJob.Spec.Suspend 382 }, util.Timeout, util.Interval).Should(gomega.Equal(ptr.To(true))) 383 }) 384 }) 385 386 ginkgo.When("A prebuilt workload is used", func() { 387 ginkgo.It("Should get suspended if the workload is not found", func() { 388 job := testingjob.MakeJob("job", ns.Name). 389 Queue("main"). 390 Label(constants.PrebuiltWorkloadLabel, "missing-workload"). 391 Obj() 392 gomega.Expect(k8sClient.Create(ctx, job)).To(gomega.Succeed()) 393 gomega.Eventually(func(g gomega.Gomega) { 394 createdJob := batchv1.Job{} 395 g.Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(job), &createdJob)).To(gomega.Succeed()) 396 g.Expect(ptr.Deref(createdJob.Spec.Suspend, false)).To(gomega.BeTrue()) 397 }, util.Timeout, util.Interval).Should(gomega.Succeed()) 398 }) 399 400 ginkgo.It("Should take the ownership of the workload and continue the usual execution", func() { 401 container := corev1.Container{ 402 Name: "c", 403 Image: "pause", 404 } 405 testingjob.SetContainerDefaults(&container) 406 wl := testing.MakeWorkload("wl", ns.Name). 407 PodSets(*testing.MakePodSet("main", 1). 408 Containers(*container.DeepCopy()). 409 Obj()). 410 Obj() 411 gomega.Expect(k8sClient.Create(ctx, wl)).To(gomega.Succeed()) 412 gomega.Eventually(func(g gomega.Gomega) { 413 createdWl := kueue.Workload{} 414 g.Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(wl), &createdWl)).To(gomega.Succeed()) 415 g.Expect(createdWl.OwnerReferences).To(gomega.BeEmpty()) 416 }, util.Timeout, util.Interval).Should(gomega.Succeed()) 417 418 job := testingjob.MakeJob("job", ns.Name). 419 Queue("main"). 420 Label(constants.PrebuiltWorkloadLabel, "wl"). 421 Containers(*container.DeepCopy()). 422 Obj() 423 gomega.Expect(k8sClient.Create(ctx, job)).To(gomega.Succeed()) 424 ginkgo.By("Checking the job gets suspended", func() { 425 gomega.Eventually(func(g gomega.Gomega) { 426 createdJob := batchv1.Job{} 427 g.Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(job), &createdJob)).To(gomega.Succeed()) 428 g.Expect(ptr.Deref(createdJob.Spec.Suspend, false)).To(gomega.BeTrue()) 429 }, util.Timeout, util.Interval).Should(gomega.Succeed()) 430 }) 431 432 ginkgo.By("Check the job gets the ownership of the workload", func() { 433 gomega.Eventually(func(g gomega.Gomega) { 434 createdWl := kueue.Workload{} 435 g.Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(wl), &createdWl)).To(gomega.Succeed()) 436 437 g.Expect(createdWl.OwnerReferences).To(gomega.ContainElement( 438 gomega.BeComparableTo(metav1.OwnerReference{ 439 Name: job.Name, 440 UID: job.UID, 441 }, cmpopts.IgnoreFields(metav1.OwnerReference{}, "APIVersion", "Kind", "Controller", "BlockOwnerDeletion")))) 442 443 // The workload is not marked as finished. 444 g.Expect(apimeta.IsStatusConditionTrue(createdWl.Status.Conditions, kueue.WorkloadFinished)).To(gomega.BeFalse()) 445 446 }, util.Timeout, util.Interval).Should(gomega.Succeed()) 447 }) 448 449 ginkgo.By("Admitting the workload, the job should unsuspend", func() { 450 gomega.Eventually(func(g gomega.Gomega) { 451 createdWl := kueue.Workload{} 452 g.Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(wl), &createdWl)).To(gomega.Succeed()) 453 454 admission := testing.MakeAdmission("cq", container.Name).Obj() 455 g.Expect(util.SetQuotaReservation(ctx, k8sClient, wl, admission)).To(gomega.Succeed()) 456 util.SyncAdmittedConditionForWorkloads(ctx, k8sClient, wl) 457 }, util.Timeout, util.Interval).Should(gomega.Succeed()) 458 459 ginkgo.By("Checking the job gets suspended", func() { 460 gomega.Eventually(func(g gomega.Gomega) { 461 createdJob := batchv1.Job{} 462 g.Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(job), &createdJob)).To(gomega.Succeed()) 463 g.Expect(ptr.Deref(createdJob.Spec.Suspend, true)).To(gomega.BeFalse()) 464 }, util.Timeout, util.Interval).Should(gomega.Succeed()) 465 }) 466 }) 467 468 ginkgo.By("Finishing the job, the workload should be finish", func() { 469 createdJob := batchv1.Job{} 470 gomega.Eventually(func(g gomega.Gomega) { 471 g.Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(job), &createdJob)).To(gomega.Succeed()) 472 createdJob.Status.Succeeded = 1 473 createdJob.Status.Conditions = []batchv1.JobCondition{ 474 { 475 Type: batchv1.JobComplete, 476 Status: corev1.ConditionTrue, 477 LastProbeTime: metav1.Now(), 478 LastTransitionTime: metav1.Now(), 479 Reason: "ByTest", 480 Message: "by test", 481 }, 482 } 483 g.Expect(k8sClient.Status().Update(ctx, &createdJob)).To(gomega.Succeed()) 484 }, util.Timeout, util.Interval).Should(gomega.Succeed()) 485 486 ginkgo.By("Checking the workload is finished", func() { 487 gomega.Eventually(func(g gomega.Gomega) { 488 createdWl := kueue.Workload{} 489 g.Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(wl), &createdWl)).To(gomega.Succeed()) 490 491 g.Expect(createdWl.Status.Conditions).To(gomega.ContainElement( 492 gomega.BeComparableTo(metav1.Condition{ 493 Type: kueue.WorkloadFinished, 494 Status: metav1.ConditionTrue, 495 Reason: "JobFinished", 496 Message: "Job finished successfully", 497 }, cmpopts.IgnoreFields(metav1.Condition{}, "LastTransitionTime")))) 498 }, util.Timeout, util.Interval).Should(gomega.Succeed()) 499 }) 500 }) 501 }) 502 503 }) 504 505 ginkgo.It("Should finish the preemption when the job becomes inactive", func() { 506 job := testingjob.MakeJob(jobName, ns.Name).Queue("q").Obj() 507 wl := &kueue.Workload{} 508 ginkgo.By("create the job and admit the workload", func() { 509 gomega.Expect(k8sClient.Create(ctx, job)).Should(gomega.Succeed()) 510 gomega.Eventually(func() error { return k8sClient.Get(ctx, wlLookupKey, wl) }, util.Timeout, util.Interval).Should(gomega.Succeed()) 511 admission := testing.MakeAdmission("q", job.Spec.Template.Spec.Containers[0].Name).Obj() 512 gomega.Expect(util.SetQuotaReservation(ctx, k8sClient, wl, admission)).To(gomega.Succeed()) 513 util.SyncAdmittedConditionForWorkloads(ctx, k8sClient, wl) 514 }) 515 516 ginkgo.By("wait for the job to be unsuspended", func() { 517 gomega.Eventually(func() bool { 518 gomega.Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(job), job)).To(gomega.Succeed()) 519 return *job.Spec.Suspend 520 }, util.Timeout, util.Interval).Should(gomega.BeFalse()) 521 }) 522 523 ginkgo.By("mark the job as active", func() { 524 gomega.Eventually(func() error { 525 gomega.Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(job), job)).To(gomega.Succeed()) 526 job.Status.Active = 1 527 return k8sClient.Status().Update(ctx, job) 528 }, util.Timeout, util.Interval).Should(gomega.Succeed()) 529 }) 530 531 ginkgo.By("preempt the workload", func() { 532 gomega.Eventually(func() error { 533 gomega.Expect(k8sClient.Get(ctx, wlLookupKey, wl)).To(gomega.Succeed()) 534 return workload.UpdateStatus(ctx, k8sClient, wl, kueue.WorkloadEvicted, metav1.ConditionTrue, kueue.WorkloadEvictedByPreemption, "By test", "evict") 535 }, util.Timeout, util.Interval).Should(gomega.Succeed()) 536 }) 537 538 ginkgo.By("wait for the job to be suspended", func() { 539 gomega.Eventually(func() bool { 540 gomega.Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(job), job)).To(gomega.Succeed()) 541 return *job.Spec.Suspend 542 }, util.Timeout, util.Interval).Should(gomega.BeTrue()) 543 }) 544 545 ginkgo.By("the workload should stay admitted", func() { 546 gomega.Consistently(func() bool { 547 gomega.Expect(k8sClient.Get(ctx, wlLookupKey, wl)).To(gomega.Succeed()) 548 return apimeta.IsStatusConditionTrue(wl.Status.Conditions, kueue.WorkloadQuotaReserved) 549 }, util.ConsistentDuration, util.Interval).Should(gomega.BeTrue()) 550 }) 551 552 ginkgo.By("mark the job as inactive", func() { 553 gomega.Eventually(func() error { 554 gomega.Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(job), job)).To(gomega.Succeed()) 555 job.Status.Active = 0 556 return k8sClient.Status().Update(ctx, job) 557 }, util.Timeout, util.Interval).Should(gomega.Succeed()) 558 }) 559 560 ginkgo.By("the workload should get unadmitted", func() { 561 util.ExpectWorkloadsToBePending(ctx, k8sClient, wl) 562 }) 563 }) 564 565 ginkgo.When("the queue has admission checks", func() { 566 var ( 567 clusterQueueAc *kueue.ClusterQueue 568 localQueue *kueue.LocalQueue 569 testFlavor *kueue.ResourceFlavor 570 jobLookupKey *types.NamespacedName 571 wlLookupKey *types.NamespacedName 572 admissionCheck *kueue.AdmissionCheck 573 ) 574 575 ginkgo.BeforeEach(func() { 576 admissionCheck = testing.MakeAdmissionCheck("check").ControllerName("ac-controller").Obj() 577 gomega.Expect(k8sClient.Create(ctx, admissionCheck)).To(gomega.Succeed()) 578 util.SetAdmissionCheckActive(ctx, k8sClient, admissionCheck, metav1.ConditionTrue) 579 clusterQueueAc = testing.MakeClusterQueue("prod-cq-with-checks"). 580 ResourceGroup( 581 *testing.MakeFlavorQuotas("test-flavor").Resource(corev1.ResourceCPU, "5").Obj(), 582 ).AdmissionChecks("check").Obj() 583 gomega.Expect(k8sClient.Create(ctx, clusterQueueAc)).Should(gomega.Succeed()) 584 localQueue = testing.MakeLocalQueue("queue", ns.Name).ClusterQueue(clusterQueueAc.Name).Obj() 585 gomega.Expect(k8sClient.Create(ctx, localQueue)).To(gomega.Succeed()) 586 testFlavor = testing.MakeResourceFlavor("test-flavor").Label(instanceKey, "test-flavor").Obj() 587 gomega.Expect(k8sClient.Create(ctx, testFlavor)).Should(gomega.Succeed()) 588 589 jobLookupKey = &types.NamespacedName{Name: jobName, Namespace: ns.Name} 590 wlLookupKey = &types.NamespacedName{Name: workloadjob.GetWorkloadNameForJob(jobName), Namespace: ns.Name} 591 }) 592 593 ginkgo.AfterEach(func() { 594 gomega.Expect(util.DeleteAdmissionCheck(ctx, k8sClient, admissionCheck)).To(gomega.Succeed()) 595 util.ExpectResourceFlavorToBeDeleted(ctx, k8sClient, testFlavor, true) 596 gomega.Expect(util.DeleteNamespace(ctx, k8sClient, ns)).To(gomega.Succeed()) 597 util.ExpectClusterQueueToBeDeleted(ctx, k8sClient, clusterQueueAc, true) 598 }) 599 600 ginkgo.It("labels and annotations should be propagated from admission check to job", func() { 601 createdJob := &batchv1.Job{} 602 createdWorkload := &kueue.Workload{} 603 604 ginkgo.By("creating the job with pod labels & annotations", func() { 605 job := testingjob.MakeJob(jobName, ns.Name). 606 Queue(localQueue.Name). 607 Request(corev1.ResourceCPU, "5"). 608 PodAnnotation("old-ann-key", "old-ann-value"). 609 PodLabel("old-label-key", "old-label-value"). 610 Obj() 611 gomega.Expect(k8sClient.Create(ctx, job)).Should(gomega.Succeed()) 612 }) 613 614 ginkgo.By("fetch the job and verify it is suspended as the checks are not ready", func() { 615 gomega.Eventually(func() *bool { 616 gomega.Expect(k8sClient.Get(ctx, *jobLookupKey, createdJob)).Should(gomega.Succeed()) 617 return createdJob.Spec.Suspend 618 }, util.Timeout, util.Interval).Should(gomega.Equal(ptr.To(true))) 619 }) 620 621 ginkgo.By("fetch the created workload", func() { 622 gomega.Eventually(func() error { 623 return k8sClient.Get(ctx, *wlLookupKey, createdWorkload) 624 }, util.Timeout, util.Interval).Should(gomega.Succeed()) 625 }) 626 627 ginkgo.By("add labels & annotations to the workload admission check in PodSetUpdates", func() { 628 gomega.Eventually(func() error { 629 var newWL kueue.Workload 630 gomega.Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(createdWorkload), &newWL)).To(gomega.Succeed()) 631 workload.SetAdmissionCheckState(&newWL.Status.AdmissionChecks, kueue.AdmissionCheckState{ 632 Name: "check", 633 State: kueue.CheckStateReady, 634 PodSetUpdates: []kueue.PodSetUpdate{ 635 { 636 Name: "main", 637 Labels: map[string]string{ 638 "label1": "label-value1", 639 }, 640 Annotations: map[string]string{ 641 "ann1": "ann-value1", 642 }, 643 NodeSelector: map[string]string{ 644 "selector1": "selector-value1", 645 }, 646 Tolerations: []corev1.Toleration{ 647 { 648 Key: "selector1", 649 Value: "selector-value1", 650 Operator: corev1.TolerationOpEqual, 651 Effect: corev1.TaintEffectNoSchedule, 652 }, 653 }, 654 }, 655 }, 656 }) 657 return k8sClient.Status().Update(ctx, &newWL) 658 }, util.Timeout, util.Interval).Should(gomega.Succeed()) 659 }) 660 661 ginkgo.By("admit the workload", func() { 662 admission := testing.MakeAdmission(clusterQueueAc.Name). 663 Assignment(corev1.ResourceCPU, "test-flavor", "1"). 664 AssignmentPodCount(createdWorkload.Spec.PodSets[0].Count). 665 Obj() 666 gomega.Expect(k8sClient.Get(ctx, *wlLookupKey, createdWorkload)).Should(gomega.Succeed()) 667 gomega.Expect(util.SetQuotaReservation(ctx, k8sClient, createdWorkload, admission)).Should(gomega.Succeed()) 668 util.SyncAdmittedConditionForWorkloads(ctx, k8sClient, createdWorkload) 669 }) 670 671 ginkgo.By("await for the job to be admitted", func() { 672 gomega.Eventually(func() *bool { 673 gomega.Expect(k8sClient.Get(ctx, *jobLookupKey, createdJob)).Should(gomega.Succeed()) 674 return createdJob.Spec.Suspend 675 }, util.Timeout, util.Interval).Should(gomega.Equal(ptr.To(false))) 676 }) 677 678 ginkgo.By("verify the PodSetUpdates are propagated to the running job", func() { 679 gomega.Expect(createdJob.Spec.Template.Annotations).Should(gomega.HaveKeyWithValue("ann1", "ann-value1")) 680 gomega.Expect(createdJob.Spec.Template.Annotations).Should(gomega.HaveKeyWithValue("old-ann-key", "old-ann-value")) 681 gomega.Expect(createdJob.Spec.Template.Labels).Should(gomega.HaveKeyWithValue("label1", "label-value1")) 682 gomega.Expect(createdJob.Spec.Template.Labels).Should(gomega.HaveKeyWithValue("old-label-key", "old-label-value")) 683 gomega.Expect(createdJob.Spec.Template.Spec.NodeSelector).Should(gomega.HaveKeyWithValue(instanceKey, "test-flavor")) 684 gomega.Expect(createdJob.Spec.Template.Spec.NodeSelector).Should(gomega.HaveKeyWithValue("selector1", "selector-value1")) 685 gomega.Expect(createdJob.Spec.Template.Spec.Tolerations).Should(gomega.BeComparableTo( 686 []corev1.Toleration{ 687 { 688 Key: "selector1", 689 Value: "selector-value1", 690 Operator: corev1.TolerationOpEqual, 691 Effect: corev1.TaintEffectNoSchedule, 692 }, 693 }, 694 )) 695 }) 696 697 ginkgo.By("delete the localQueue to prevent readmission", func() { 698 gomega.Expect(util.DeleteLocalQueue(ctx, k8sClient, localQueue)).Should(gomega.Succeed()) 699 }) 700 701 ginkgo.By("clear the workload's admission to stop the job", func() { 702 gomega.Expect(k8sClient.Get(ctx, *wlLookupKey, createdWorkload)).Should(gomega.Succeed()) 703 gomega.Expect(util.SetQuotaReservation(ctx, k8sClient, createdWorkload, nil)).Should(gomega.Succeed()) 704 util.SyncAdmittedConditionForWorkloads(ctx, k8sClient, createdWorkload) 705 }) 706 707 ginkgo.By("await for the job to be suspended", func() { 708 gomega.Eventually(func() *bool { 709 gomega.Expect(k8sClient.Get(ctx, *jobLookupKey, createdJob)).Should(gomega.Succeed()) 710 return createdJob.Spec.Suspend 711 }, util.Timeout, util.Interval).Should(gomega.Equal(ptr.To(true))) 712 }) 713 714 ginkgo.By("verify the PodSetUpdates are restored", func() { 715 // In case of batch/Job the stop is done with multiple API calls, suspended=true being 716 // done before the info restoration. We should retry the read if the Info is not restored. 717 gomega.Eventually(func(g gomega.Gomega) { 718 g.Expect(k8sClient.Get(ctx, *jobLookupKey, createdJob)).Should(gomega.Succeed()) 719 g.Expect(createdJob.Spec.Template.Annotations).ShouldNot(gomega.HaveKey("ann1")) 720 g.Expect(createdJob.Spec.Template.Annotations).Should(gomega.HaveKeyWithValue("old-ann-key", "old-ann-value")) 721 g.Expect(createdJob.Spec.Template.Labels).ShouldNot(gomega.HaveKey("label1")) 722 g.Expect(createdJob.Spec.Template.Labels).Should(gomega.HaveKeyWithValue("old-label-key", "old-label-value")) 723 g.Expect(createdJob.Spec.Template.Spec.NodeSelector).ShouldNot(gomega.HaveKey(instanceKey)) 724 g.Expect(createdJob.Spec.Template.Spec.NodeSelector).ShouldNot(gomega.HaveKey("selector1")) 725 }, util.Timeout, util.Interval).Should(gomega.Succeed()) 726 }) 727 }) 728 729 ginkgo.It("should not admit workload if there is a conflict in labels", func() { 730 createdJob := &batchv1.Job{} 731 createdWorkload := &kueue.Workload{} 732 733 ginkgo.By("creating the job with default priority", func() { 734 job := testingjob.MakeJob(jobName, ns.Name). 735 Queue(localQueue.Name). 736 Request(corev1.ResourceCPU, "5"). 737 PodLabel("label-key", "old-label-value"). 738 Obj() 739 gomega.Expect(k8sClient.Create(ctx, job)).Should(gomega.Succeed()) 740 }) 741 742 ginkgo.By("fetch the created job & workload", func() { 743 gomega.Eventually(func() *bool { 744 gomega.Expect(k8sClient.Get(ctx, *jobLookupKey, createdJob)).Should(gomega.Succeed()) 745 return createdJob.Spec.Suspend 746 }, util.Timeout, util.Interval).Should(gomega.Equal(ptr.To(true))) 747 gomega.Eventually(func() error { 748 return k8sClient.Get(ctx, *wlLookupKey, createdWorkload) 749 }, util.Timeout, util.Interval).Should(gomega.Succeed()) 750 }) 751 752 ginkgo.By("add a conflicting label to the admission check in PodSetUpdates", func() { 753 gomega.Eventually(func() error { 754 var newWL kueue.Workload 755 gomega.Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(createdWorkload), &newWL)).To(gomega.Succeed()) 756 workload.SetAdmissionCheckState(&newWL.Status.AdmissionChecks, kueue.AdmissionCheckState{ 757 Name: "check", 758 State: kueue.CheckStateReady, 759 PodSetUpdates: []kueue.PodSetUpdate{ 760 { 761 Name: "main", 762 Labels: map[string]string{ 763 "label-key": "new-label-value", 764 }, 765 }, 766 }, 767 }) 768 return k8sClient.Status().Update(ctx, &newWL) 769 }, util.Timeout, util.Interval).Should(gomega.Succeed()) 770 }) 771 772 ginkgo.By("attempt to admit the workload", func() { 773 admission := testing.MakeAdmission(clusterQueueAc.Name). 774 Assignment(corev1.ResourceCPU, "test-flavor", "1"). 775 AssignmentPodCount(createdWorkload.Spec.PodSets[0].Count). 776 Obj() 777 gomega.Expect(k8sClient.Get(ctx, *wlLookupKey, createdWorkload)).Should(gomega.Succeed()) 778 gomega.Expect(util.SetQuotaReservation(ctx, k8sClient, createdWorkload, admission)).Should(gomega.Succeed()) 779 util.SyncAdmittedConditionForWorkloads(ctx, k8sClient, createdWorkload) 780 }) 781 782 ginkgo.By("verify the job is not started", func() { 783 gomega.Consistently(func() *bool { 784 gomega.Expect(k8sClient.Get(ctx, *jobLookupKey, createdJob)).Should(gomega.Succeed()) 785 return createdJob.Spec.Suspend 786 }, util.ConsistentDuration, util.Interval).Should(gomega.Equal(ptr.To(true))) 787 }) 788 789 ginkgo.By("verify the job has the old label value", func() { 790 gomega.Expect(createdJob.Spec.Template.Labels).Should(gomega.HaveKeyWithValue("label-key", "old-label-value")) 791 }) 792 }) 793 }) 794 }) 795 796 var _ = ginkgo.Describe("Job controller when waitForPodsReady enabled", ginkgo.Ordered, ginkgo.ContinueOnFailure, func() { 797 type podsReadyTestSpec struct { 798 beforeJobStatus *batchv1.JobStatus 799 beforeCondition *metav1.Condition 800 jobStatus batchv1.JobStatus 801 suspended bool 802 wantCondition *metav1.Condition 803 } 804 805 var ( 806 ns *corev1.Namespace 807 defaultFlavor = testing.MakeResourceFlavor("default").Label(instanceKey, "default").Obj() 808 wlLookupKey types.NamespacedName 809 ) 810 811 ginkgo.BeforeAll(func() { 812 fwk = &framework.Framework{ 813 CRDPath: crdPath, 814 } 815 cfg = fwk.Init() 816 ctx, k8sClient = fwk.RunManager(cfg, managerSetup(jobframework.WithWaitForPodsReady(&configapi.WaitForPodsReady{Enable: true}))) 817 ginkgo.By("Create a resource flavor") 818 gomega.Expect(k8sClient.Create(ctx, defaultFlavor)).Should(gomega.Succeed()) 819 }) 820 ginkgo.AfterAll(func() { 821 util.ExpectResourceFlavorToBeDeleted(ctx, k8sClient, defaultFlavor, true) 822 fwk.Teardown() 823 }) 824 825 ginkgo.BeforeEach(func() { 826 ns = &corev1.Namespace{ 827 ObjectMeta: metav1.ObjectMeta{ 828 GenerateName: "core-", 829 }, 830 } 831 gomega.Expect(k8sClient.Create(ctx, ns)).To(gomega.Succeed()) 832 wlLookupKey = types.NamespacedName{Name: workloadjob.GetWorkloadNameForJob(jobName), Namespace: ns.Name} 833 }) 834 835 ginkgo.AfterEach(func() { 836 gomega.Expect(util.DeleteNamespace(ctx, k8sClient, ns)).To(gomega.Succeed()) 837 }) 838 839 ginkgo.DescribeTable("Single job at different stages of progress towards completion", 840 func(podsReadyTestSpec podsReadyTestSpec) { 841 ginkgo.By("Create a job") 842 job := testingjob.MakeJob(jobName, ns.Name).Parallelism(2).Obj() 843 jobQueueName := "test-queue" 844 job.Annotations = map[string]string{constants.QueueAnnotation: jobQueueName} 845 gomega.Expect(k8sClient.Create(ctx, job)).Should(gomega.Succeed()) 846 lookupKey := types.NamespacedName{Name: jobName, Namespace: ns.Name} 847 createdJob := &batchv1.Job{} 848 gomega.Expect(k8sClient.Get(ctx, lookupKey, createdJob)).Should(gomega.Succeed()) 849 850 ginkgo.By("Fetch the workload created for the job") 851 createdWorkload := &kueue.Workload{} 852 gomega.Eventually(func() error { 853 return k8sClient.Get(ctx, wlLookupKey, createdWorkload) 854 }, util.Timeout, util.Interval).Should(gomega.Succeed()) 855 856 ginkgo.By("Admit the workload created for the job") 857 admission := testing.MakeAdmission("foo"). 858 Assignment(corev1.ResourceCPU, "default", "1m"). 859 AssignmentPodCount(createdWorkload.Spec.PodSets[0].Count). 860 Obj() 861 gomega.Expect(util.SetQuotaReservation(ctx, k8sClient, createdWorkload, admission)).Should(gomega.Succeed()) 862 util.SyncAdmittedConditionForWorkloads(ctx, k8sClient, createdWorkload) 863 gomega.Expect(k8sClient.Get(ctx, wlLookupKey, createdWorkload)).Should(gomega.Succeed()) 864 865 ginkgo.By("Await for the job to be unsuspended") 866 gomega.Eventually(func() *bool { 867 gomega.Expect(k8sClient.Get(ctx, lookupKey, createdJob)).Should(gomega.Succeed()) 868 return createdJob.Spec.Suspend 869 }, util.Timeout, util.Interval).Should(gomega.Equal(ptr.To(false))) 870 871 if podsReadyTestSpec.beforeJobStatus != nil { 872 ginkgo.By("Update the job status to simulate its initial progress towards completion") 873 createdJob.Status = *podsReadyTestSpec.beforeJobStatus 874 gomega.Expect(k8sClient.Status().Update(ctx, createdJob)).Should(gomega.Succeed()) 875 gomega.Expect(k8sClient.Get(ctx, lookupKey, createdJob)).Should(gomega.Succeed()) 876 } 877 878 if podsReadyTestSpec.beforeCondition != nil { 879 ginkgo.By("Update the workload status") 880 gomega.Eventually(func() *metav1.Condition { 881 gomega.Expect(k8sClient.Get(ctx, wlLookupKey, createdWorkload)).Should(gomega.Succeed()) 882 return apimeta.FindStatusCondition(createdWorkload.Status.Conditions, kueue.WorkloadPodsReady) 883 }, util.Timeout, util.Interval).Should(gomega.BeComparableTo(podsReadyTestSpec.beforeCondition, ignoreConditionTimestamps)) 884 } 885 886 ginkgo.By("Update the job status to simulate its progress towards completion") 887 createdJob.Status = podsReadyTestSpec.jobStatus 888 gomega.Expect(k8sClient.Status().Update(ctx, createdJob)).Should(gomega.Succeed()) 889 gomega.Expect(k8sClient.Get(ctx, lookupKey, createdJob)).Should(gomega.Succeed()) 890 891 if podsReadyTestSpec.suspended { 892 ginkgo.By("Unset admission of the workload to suspend the job") 893 gomega.Eventually(func() error { 894 // the update may need to be retried due to a conflict as the workload gets 895 // also updated due to setting of the job status. 896 if err := k8sClient.Get(ctx, wlLookupKey, createdWorkload); err != nil { 897 return err 898 } 899 return util.SetQuotaReservation(ctx, k8sClient, createdWorkload, nil) 900 }, util.Timeout, util.Interval).Should(gomega.Succeed()) 901 util.SyncAdmittedConditionForWorkloads(ctx, k8sClient, createdWorkload) 902 } 903 904 ginkgo.By("Verify the PodsReady condition is added") 905 gomega.Eventually(func() *metav1.Condition { 906 gomega.Expect(k8sClient.Get(ctx, wlLookupKey, createdWorkload)).Should(gomega.Succeed()) 907 return apimeta.FindStatusCondition(createdWorkload.Status.Conditions, kueue.WorkloadPodsReady) 908 }, util.Timeout, util.Interval).Should(gomega.BeComparableTo(podsReadyTestSpec.wantCondition, ignoreConditionTimestamps)) 909 }, 910 ginkgo.Entry("No progress", podsReadyTestSpec{ 911 wantCondition: &metav1.Condition{ 912 Type: kueue.WorkloadPodsReady, 913 Status: metav1.ConditionFalse, 914 Reason: "PodsReady", 915 Message: "Not all pods are ready or succeeded", 916 }, 917 }), 918 ginkgo.Entry("Single pod ready", podsReadyTestSpec{ 919 jobStatus: batchv1.JobStatus{ 920 Ready: ptr.To[int32](1), 921 }, 922 wantCondition: &metav1.Condition{ 923 Type: kueue.WorkloadPodsReady, 924 Status: metav1.ConditionFalse, 925 Reason: "PodsReady", 926 Message: "Not all pods are ready or succeeded", 927 }, 928 }), 929 ginkgo.Entry("Single pod succeeded", podsReadyTestSpec{ 930 jobStatus: batchv1.JobStatus{ 931 Succeeded: 1, 932 }, 933 wantCondition: &metav1.Condition{ 934 Type: kueue.WorkloadPodsReady, 935 Status: metav1.ConditionFalse, 936 Reason: "PodsReady", 937 Message: "Not all pods are ready or succeeded", 938 }, 939 }), 940 ginkgo.Entry("All pods are ready", podsReadyTestSpec{ 941 jobStatus: batchv1.JobStatus{ 942 Ready: ptr.To[int32](2), 943 }, 944 wantCondition: &metav1.Condition{ 945 Type: kueue.WorkloadPodsReady, 946 Status: metav1.ConditionTrue, 947 Reason: "PodsReady", 948 Message: "All pods were ready or succeeded since the workload admission", 949 }, 950 }), 951 ginkgo.Entry("One pod ready, one succeeded", podsReadyTestSpec{ 952 jobStatus: batchv1.JobStatus{ 953 Ready: ptr.To[int32](1), 954 Succeeded: 1, 955 }, 956 wantCondition: &metav1.Condition{ 957 Type: kueue.WorkloadPodsReady, 958 Status: metav1.ConditionTrue, 959 Reason: "PodsReady", 960 Message: "All pods were ready or succeeded since the workload admission", 961 }, 962 }), 963 ginkgo.Entry("All pods are succeeded", podsReadyTestSpec{ 964 jobStatus: batchv1.JobStatus{ 965 Ready: ptr.To[int32](0), 966 Succeeded: 2, 967 }, 968 wantCondition: &metav1.Condition{ 969 Type: kueue.WorkloadPodsReady, 970 Status: metav1.ConditionTrue, 971 Reason: "PodsReady", 972 Message: "All pods were ready or succeeded since the workload admission", 973 }, 974 }), 975 ginkgo.Entry("All pods are succeeded; PodsReady=False before", podsReadyTestSpec{ 976 beforeCondition: &metav1.Condition{ 977 Type: kueue.WorkloadPodsReady, 978 Status: metav1.ConditionFalse, 979 Reason: "PodsReady", 980 Message: "Not all pods are ready or succeeded", 981 }, 982 jobStatus: batchv1.JobStatus{ 983 Ready: ptr.To[int32](0), 984 Succeeded: 2, 985 }, 986 wantCondition: &metav1.Condition{ 987 Type: kueue.WorkloadPodsReady, 988 Status: metav1.ConditionTrue, 989 Reason: "PodsReady", 990 Message: "All pods were ready or succeeded since the workload admission", 991 }, 992 }), 993 ginkgo.Entry("One ready pod, one failed; PodsReady=True before", podsReadyTestSpec{ 994 beforeJobStatus: &batchv1.JobStatus{ 995 Ready: ptr.To[int32](2), 996 }, 997 beforeCondition: &metav1.Condition{ 998 Type: kueue.WorkloadPodsReady, 999 Status: metav1.ConditionTrue, 1000 Reason: "PodsReady", 1001 Message: "All pods were ready or succeeded since the workload admission", 1002 }, 1003 jobStatus: batchv1.JobStatus{ 1004 Ready: ptr.To[int32](1), 1005 Failed: 1, 1006 }, 1007 wantCondition: &metav1.Condition{ 1008 Type: kueue.WorkloadPodsReady, 1009 Status: metav1.ConditionTrue, 1010 Reason: "PodsReady", 1011 Message: "All pods were ready or succeeded since the workload admission", 1012 }, 1013 }), 1014 ginkgo.Entry("Job suspended without ready pods; but PodsReady=True before", podsReadyTestSpec{ 1015 beforeJobStatus: &batchv1.JobStatus{ 1016 Ready: ptr.To[int32](2), 1017 }, 1018 beforeCondition: &metav1.Condition{ 1019 Type: kueue.WorkloadPodsReady, 1020 Status: metav1.ConditionTrue, 1021 Reason: "PodsReady", 1022 Message: "All pods were ready or succeeded since the workload admission", 1023 }, 1024 jobStatus: batchv1.JobStatus{ 1025 Failed: 2, 1026 }, 1027 suspended: true, 1028 wantCondition: &metav1.Condition{ 1029 Type: kueue.WorkloadPodsReady, 1030 Status: metav1.ConditionFalse, 1031 Reason: "PodsReady", 1032 Message: "Not all pods are ready or succeeded", 1033 }, 1034 }), 1035 ginkgo.Entry("Job suspended with all pods ready; PodsReady=True before", podsReadyTestSpec{ 1036 beforeJobStatus: &batchv1.JobStatus{ 1037 Ready: ptr.To[int32](2), 1038 }, 1039 beforeCondition: &metav1.Condition{ 1040 Type: kueue.WorkloadPodsReady, 1041 Status: metav1.ConditionTrue, 1042 Reason: "PodsReady", 1043 Message: "All pods were ready or succeeded since the workload admission", 1044 }, 1045 jobStatus: batchv1.JobStatus{ 1046 Ready: ptr.To[int32](2), 1047 }, 1048 suspended: true, 1049 wantCondition: &metav1.Condition{ 1050 Type: kueue.WorkloadPodsReady, 1051 Status: metav1.ConditionFalse, 1052 Reason: "PodsReady", 1053 Message: "Not all pods are ready or succeeded", 1054 }, 1055 }), 1056 ) 1057 }) 1058 1059 var _ = ginkgo.Describe("Job controller interacting with scheduler", ginkgo.Ordered, ginkgo.ContinueOnFailure, func() { 1060 var ( 1061 ns *corev1.Namespace 1062 onDemandFlavor *kueue.ResourceFlavor 1063 spotTaintedFlavor *kueue.ResourceFlavor 1064 spotUntaintedFlavor *kueue.ResourceFlavor 1065 prodClusterQ *kueue.ClusterQueue 1066 devClusterQ *kueue.ClusterQueue 1067 podsCountClusterQ *kueue.ClusterQueue 1068 prodLocalQ *kueue.LocalQueue 1069 devLocalQ *kueue.LocalQueue 1070 ) 1071 1072 ginkgo.BeforeAll(func() { 1073 fwk = &framework.Framework{ 1074 CRDPath: crdPath, 1075 } 1076 cfg = fwk.Init() 1077 ctx, k8sClient = fwk.RunManager(cfg, managerAndSchedulerSetup()) 1078 }) 1079 ginkgo.AfterAll(func() { 1080 fwk.Teardown() 1081 }) 1082 1083 ginkgo.BeforeEach(func() { 1084 ns = &corev1.Namespace{ 1085 ObjectMeta: metav1.ObjectMeta{ 1086 GenerateName: "core-", 1087 }, 1088 } 1089 gomega.Expect(k8sClient.Create(ctx, ns)).To(gomega.Succeed()) 1090 1091 onDemandFlavor = testing.MakeResourceFlavor("on-demand").Label(instanceKey, "on-demand").Obj() 1092 gomega.Expect(k8sClient.Create(ctx, onDemandFlavor)).Should(gomega.Succeed()) 1093 1094 spotTaintedFlavor = testing.MakeResourceFlavor("spot-tainted"). 1095 Label(instanceKey, "spot-tainted"). 1096 Taint(corev1.Taint{ 1097 Key: instanceKey, 1098 Value: "spot-tainted", 1099 Effect: corev1.TaintEffectNoSchedule, 1100 }).Obj() 1101 gomega.Expect(k8sClient.Create(ctx, spotTaintedFlavor)).Should(gomega.Succeed()) 1102 1103 spotUntaintedFlavor = testing.MakeResourceFlavor("spot-untainted").Label(instanceKey, "spot-untainted").Obj() 1104 gomega.Expect(k8sClient.Create(ctx, spotUntaintedFlavor)).Should(gomega.Succeed()) 1105 1106 prodClusterQ = testing.MakeClusterQueue("prod-cq"). 1107 Cohort("prod"). 1108 ResourceGroup( 1109 *testing.MakeFlavorQuotas("spot-tainted").Resource(corev1.ResourceCPU, "5", "0").Obj(), 1110 *testing.MakeFlavorQuotas("on-demand").Resource(corev1.ResourceCPU, "5").Obj(), 1111 ).Obj() 1112 gomega.Expect(k8sClient.Create(ctx, prodClusterQ)).Should(gomega.Succeed()) 1113 1114 devClusterQ = testing.MakeClusterQueue("dev-clusterqueue"). 1115 ResourceGroup( 1116 *testing.MakeFlavorQuotas("spot-untainted").Resource(corev1.ResourceCPU, "5").Obj(), 1117 *testing.MakeFlavorQuotas("on-demand").Resource(corev1.ResourceCPU, "5").Obj(), 1118 ). 1119 FlavorFungibility(kueue.FlavorFungibility{ 1120 WhenCanBorrow: kueue.Borrow, 1121 WhenCanPreempt: kueue.TryNextFlavor, 1122 }). 1123 Preemption(kueue.ClusterQueuePreemption{ 1124 WithinClusterQueue: kueue.PreemptionPolicyLowerPriority, 1125 }). 1126 Obj() 1127 gomega.Expect(k8sClient.Create(ctx, devClusterQ)).Should(gomega.Succeed()) 1128 podsCountClusterQ = testing.MakeClusterQueue("pods-clusterqueue"). 1129 ResourceGroup( 1130 *testing.MakeFlavorQuotas("on-demand").Resource(corev1.ResourcePods, "5").Obj(), 1131 ). 1132 Obj() 1133 gomega.Expect(k8sClient.Create(ctx, podsCountClusterQ)).Should(gomega.Succeed()) 1134 }) 1135 1136 ginkgo.AfterEach(func() { 1137 gomega.Expect(util.DeleteNamespace(ctx, k8sClient, ns)).To(gomega.Succeed()) 1138 util.ExpectClusterQueueToBeDeleted(ctx, k8sClient, prodClusterQ, true) 1139 util.ExpectClusterQueueToBeDeleted(ctx, k8sClient, devClusterQ, true) 1140 util.ExpectClusterQueueToBeDeleted(ctx, k8sClient, podsCountClusterQ, true) 1141 util.ExpectResourceFlavorToBeDeleted(ctx, k8sClient, onDemandFlavor, true) 1142 util.ExpectResourceFlavorToBeDeleted(ctx, k8sClient, spotTaintedFlavor, true) 1143 util.ExpectResourceFlavorToBeDeleted(ctx, k8sClient, spotUntaintedFlavor, true) 1144 }) 1145 1146 ginkgo.It("Should schedule jobs as they fit in their ClusterQueue", func() { 1147 ginkgo.By("creating localQueues") 1148 prodLocalQ = testing.MakeLocalQueue("prod-queue", ns.Name).ClusterQueue(prodClusterQ.Name).Obj() 1149 gomega.Expect(k8sClient.Create(ctx, prodLocalQ)).Should(gomega.Succeed()) 1150 devLocalQ = testing.MakeLocalQueue("dev-queue", ns.Name).ClusterQueue(devClusterQ.Name).Obj() 1151 gomega.Expect(k8sClient.Create(ctx, devLocalQ)).Should(gomega.Succeed()) 1152 1153 ginkgo.By("checking the first prod job starts") 1154 prodJob1 := testingjob.MakeJob("prod-job1", ns.Name).Queue(prodLocalQ.Name).Request(corev1.ResourceCPU, "2").Obj() 1155 gomega.Expect(k8sClient.Create(ctx, prodJob1)).Should(gomega.Succeed()) 1156 lookupKey1 := types.NamespacedName{Name: prodJob1.Name, Namespace: prodJob1.Namespace} 1157 createdProdJob1 := &batchv1.Job{} 1158 gomega.Eventually(func() *bool { 1159 gomega.Expect(k8sClient.Get(ctx, lookupKey1, createdProdJob1)).Should(gomega.Succeed()) 1160 return createdProdJob1.Spec.Suspend 1161 }, util.Timeout, util.Interval).Should(gomega.Equal(ptr.To(false))) 1162 gomega.Expect(createdProdJob1.Spec.Template.Spec.NodeSelector[instanceKey]).Should(gomega.Equal(onDemandFlavor.Name)) 1163 util.ExpectPendingWorkloadsMetric(prodClusterQ, 0, 0) 1164 util.ExpectReservingActiveWorkloadsMetric(prodClusterQ, 1) 1165 1166 ginkgo.By("checking a second no-fit prod job does not start") 1167 prodJob2 := testingjob.MakeJob("prod-job2", ns.Name).Queue(prodLocalQ.Name).Request(corev1.ResourceCPU, "5").Obj() 1168 gomega.Expect(k8sClient.Create(ctx, prodJob2)).Should(gomega.Succeed()) 1169 lookupKey2 := types.NamespacedName{Name: prodJob2.Name, Namespace: prodJob2.Namespace} 1170 createdProdJob2 := &batchv1.Job{} 1171 gomega.Consistently(func() *bool { 1172 gomega.Expect(k8sClient.Get(ctx, lookupKey2, createdProdJob2)).Should(gomega.Succeed()) 1173 return createdProdJob2.Spec.Suspend 1174 }, util.ConsistentDuration, util.Interval).Should(gomega.Equal(ptr.To(true))) 1175 util.ExpectPendingWorkloadsMetric(prodClusterQ, 0, 1) 1176 util.ExpectReservingActiveWorkloadsMetric(prodClusterQ, 1) 1177 1178 ginkgo.By("checking a dev job starts") 1179 devJob := testingjob.MakeJob("dev-job", ns.Name).Queue(devLocalQ.Name).Request(corev1.ResourceCPU, "5").Obj() 1180 gomega.Expect(k8sClient.Create(ctx, devJob)).Should(gomega.Succeed()) 1181 createdDevJob := &batchv1.Job{} 1182 gomega.Eventually(func() *bool { 1183 gomega.Expect(k8sClient.Get(ctx, types.NamespacedName{Name: devJob.Name, Namespace: devJob.Namespace}, createdDevJob)). 1184 Should(gomega.Succeed()) 1185 return createdDevJob.Spec.Suspend 1186 }, util.Timeout, util.Interval).Should(gomega.Equal(ptr.To(false))) 1187 gomega.Expect(createdDevJob.Spec.Template.Spec.NodeSelector[instanceKey]).Should(gomega.Equal(spotUntaintedFlavor.Name)) 1188 util.ExpectPendingWorkloadsMetric(devClusterQ, 0, 0) 1189 util.ExpectReservingActiveWorkloadsMetric(devClusterQ, 1) 1190 1191 ginkgo.By("checking the second prod job starts when the first finishes") 1192 createdProdJob1.Status.Conditions = append(createdProdJob1.Status.Conditions, 1193 batchv1.JobCondition{ 1194 Type: batchv1.JobComplete, 1195 Status: corev1.ConditionTrue, 1196 LastProbeTime: metav1.Now(), 1197 LastTransitionTime: metav1.Now(), 1198 }) 1199 gomega.Expect(k8sClient.Status().Update(ctx, createdProdJob1)).Should(gomega.Succeed()) 1200 gomega.Eventually(func() *bool { 1201 gomega.Expect(k8sClient.Get(ctx, lookupKey2, createdProdJob2)).Should(gomega.Succeed()) 1202 return createdProdJob2.Spec.Suspend 1203 }, util.Timeout, util.Interval).Should(gomega.Equal(ptr.To(false))) 1204 gomega.Expect(createdProdJob2.Spec.Template.Spec.NodeSelector[instanceKey]).Should(gomega.Equal(onDemandFlavor.Name)) 1205 util.ExpectPendingWorkloadsMetric(prodClusterQ, 0, 0) 1206 util.ExpectReservingActiveWorkloadsMetric(prodClusterQ, 1) 1207 }) 1208 1209 ginkgo.It("Should unsuspend job iff localQueue is in the same namespace", func() { 1210 ginkgo.By("create another namespace") 1211 ns2 := &corev1.Namespace{ 1212 ObjectMeta: metav1.ObjectMeta{ 1213 GenerateName: "e2e-", 1214 }, 1215 } 1216 gomega.Expect(k8sClient.Create(ctx, ns2)).To(gomega.Succeed()) 1217 defer func() { 1218 gomega.Expect(util.DeleteNamespace(ctx, k8sClient, ns2)).To(gomega.Succeed()) 1219 }() 1220 1221 ginkgo.By("create a localQueue located in a different namespace as the job") 1222 localQueue := testing.MakeLocalQueue("local-queue", ns2.Name).Obj() 1223 localQueue.Spec.ClusterQueue = kueue.ClusterQueueReference(prodClusterQ.Name) 1224 1225 ginkgo.By("create a job") 1226 prodJob := testingjob.MakeJob("prod-job", ns.Name).Queue(localQueue.Name).Request(corev1.ResourceCPU, "2").Obj() 1227 gomega.Expect(k8sClient.Create(ctx, prodJob)).Should(gomega.Succeed()) 1228 1229 ginkgo.By("job should be suspend") 1230 lookupKey := types.NamespacedName{Name: prodJob.Name, Namespace: prodJob.Namespace} 1231 createdProdJob := &batchv1.Job{} 1232 gomega.Eventually(func() *bool { 1233 gomega.Expect(k8sClient.Get(ctx, lookupKey, createdProdJob)).Should(gomega.Succeed()) 1234 return createdProdJob.Spec.Suspend 1235 }, util.Timeout, util.Interval).Should(gomega.Equal(ptr.To(true))) 1236 1237 ginkgo.By("creating another localQueue of the same name and in the same namespace as the job") 1238 prodLocalQ = testing.MakeLocalQueue(localQueue.Name, ns.Name).ClusterQueue(prodClusterQ.Name).Obj() 1239 gomega.Expect(k8sClient.Create(ctx, prodLocalQ)).Should(gomega.Succeed()) 1240 1241 ginkgo.By("job should be unsuspended and NodeSelector properly set") 1242 gomega.Eventually(func() *bool { 1243 gomega.Expect(k8sClient.Get(ctx, lookupKey, createdProdJob)).Should(gomega.Succeed()) 1244 return createdProdJob.Spec.Suspend 1245 }, util.Timeout, util.Interval).Should(gomega.Equal(ptr.To(false))) 1246 1247 runningSelector := maps.Clone(createdProdJob.Spec.Template.Spec.NodeSelector) 1248 1249 gomega.Expect(runningSelector).To(gomega.Equal(map[string]string{instanceKey: "on-demand"})) 1250 }) 1251 1252 ginkgo.When("The workload's admission is removed", func() { 1253 ginkgo.It("Should restore the original node selectors", func() { 1254 localQueue := testing.MakeLocalQueue("local-queue", ns.Name).ClusterQueue(prodClusterQ.Name).Obj() 1255 job := testingjob.MakeJob(jobName, ns.Name).Queue(localQueue.Name).Request(corev1.ResourceCPU, "2").Obj() 1256 lookupKey := types.NamespacedName{Name: job.Name, Namespace: job.Namespace} 1257 createdJob := &batchv1.Job{} 1258 1259 ginkgo.By("create a job", func() { 1260 gomega.Expect(k8sClient.Create(ctx, job)).Should(gomega.Succeed()) 1261 }) 1262 1263 ginkgo.By("job should be suspend", func() { 1264 gomega.Eventually(func() *bool { 1265 gomega.Expect(k8sClient.Get(ctx, lookupKey, createdJob)).Should(gomega.Succeed()) 1266 return createdJob.Spec.Suspend 1267 }, util.Timeout, util.Interval).Should(gomega.Equal(ptr.To(true))) 1268 }) 1269 1270 // backup the podSet's node selector 1271 originalNodeSelector := createdJob.Spec.Template.Spec.NodeSelector 1272 1273 ginkgo.By("create a localQueue", func() { 1274 gomega.Expect(k8sClient.Create(ctx, localQueue)).Should(gomega.Succeed()) 1275 }) 1276 1277 ginkgo.By("job should be unsuspended", func() { 1278 gomega.Eventually(func() *bool { 1279 gomega.Expect(k8sClient.Get(ctx, lookupKey, createdJob)).Should(gomega.Succeed()) 1280 return createdJob.Spec.Suspend 1281 }, util.Timeout, util.Interval).Should(gomega.Equal(ptr.To(false))) 1282 }) 1283 1284 ginkgo.By("the node selector should be updated", func() { 1285 gomega.Eventually(func() map[string]string { 1286 gomega.Expect(k8sClient.Get(ctx, lookupKey, createdJob)).Should(gomega.Succeed()) 1287 return createdJob.Spec.Template.Spec.NodeSelector 1288 }, util.Timeout, util.Interval).ShouldNot(gomega.Equal(originalNodeSelector)) 1289 }) 1290 1291 ginkgo.By("delete the localQueue to prevent readmission", func() { 1292 gomega.Expect(util.DeleteLocalQueue(ctx, k8sClient, localQueue)).Should(gomega.Succeed()) 1293 }) 1294 1295 ginkgo.By("clear the workload's admission to stop the job", func() { 1296 wl := &kueue.Workload{} 1297 wlKey := types.NamespacedName{Name: workloadjob.GetWorkloadNameForJob(job.Name), Namespace: job.Namespace} 1298 gomega.Expect(k8sClient.Get(ctx, wlKey, wl)).Should(gomega.Succeed()) 1299 gomega.Expect(util.SetQuotaReservation(ctx, k8sClient, wl, nil)).Should(gomega.Succeed()) 1300 util.SyncAdmittedConditionForWorkloads(ctx, k8sClient, wl) 1301 1302 }) 1303 1304 ginkgo.By("the node selector should be restored", func() { 1305 gomega.Eventually(func() map[string]string { 1306 gomega.Expect(k8sClient.Get(ctx, lookupKey, createdJob)).Should(gomega.Succeed()) 1307 return createdJob.Spec.Template.Spec.NodeSelector 1308 }, util.Timeout, util.Interval).Should(gomega.Equal(originalNodeSelector)) 1309 }) 1310 }) 1311 }) 1312 1313 ginkgo.When("The workload is deleted while admitted", func() { 1314 ginkgo.It("Should restore the original node selectors", func() { 1315 localQueue := testing.MakeLocalQueue("local-queue", ns.Name).ClusterQueue(prodClusterQ.Name).Obj() 1316 job := testingjob.MakeJob(jobName, ns.Name).Queue(localQueue.Name).Request(corev1.ResourceCPU, "2").Suspend(false).Obj() 1317 lookupKey := types.NamespacedName{Name: job.Name, Namespace: job.Namespace} 1318 createdJob := &batchv1.Job{} 1319 1320 ginkgo.By("create a job", func() { 1321 gomega.Expect(k8sClient.Create(ctx, job)).Should(gomega.Succeed()) 1322 }) 1323 1324 ginkgo.By("job should be suspend", func() { 1325 gomega.Eventually(func() *bool { 1326 gomega.Expect(k8sClient.Get(ctx, lookupKey, createdJob)).Should(gomega.Succeed()) 1327 return createdJob.Spec.Suspend 1328 }, util.Timeout, util.Interval).Should(gomega.Equal(ptr.To[bool](true))) 1329 }) 1330 1331 // backup the podSet's node selector 1332 originalNodeSelector := createdJob.Spec.Template.Spec.NodeSelector 1333 1334 ginkgo.By("create a localQueue", func() { 1335 gomega.Expect(k8sClient.Create(ctx, localQueue)).Should(gomega.Succeed()) 1336 }) 1337 1338 ginkgo.By("job should be unsuspended", func() { 1339 gomega.Eventually(func() *bool { 1340 gomega.Expect(k8sClient.Get(ctx, lookupKey, createdJob)).Should(gomega.Succeed()) 1341 return createdJob.Spec.Suspend 1342 }, util.Timeout, util.Interval).Should(gomega.Equal(ptr.To[bool](false))) 1343 }) 1344 1345 ginkgo.By("the node selector should be updated", func() { 1346 gomega.Eventually(func() map[string]string { 1347 gomega.Expect(k8sClient.Get(ctx, lookupKey, createdJob)).Should(gomega.Succeed()) 1348 return createdJob.Spec.Template.Spec.NodeSelector 1349 }, util.Timeout, util.Interval).ShouldNot(gomega.Equal(originalNodeSelector)) 1350 }) 1351 1352 ginkgo.By("delete the localQueue to prevent readmission", func() { 1353 gomega.Expect(util.DeleteLocalQueue(ctx, k8sClient, localQueue)).Should(gomega.Succeed()) 1354 }) 1355 1356 ginkgo.By("deleting the workload", func() { 1357 wl := &kueue.Workload{} 1358 wlKey := types.NamespacedName{Name: workloadjob.GetWorkloadNameForJob(job.Name), Namespace: job.Namespace} 1359 gomega.Expect(k8sClient.Get(ctx, wlKey, wl)).Should(gomega.Succeed()) 1360 gomega.Expect(k8sClient.Delete(ctx, wl)).Should(gomega.Succeed()) 1361 }) 1362 1363 ginkgo.By("the node selector should be restored", func() { 1364 gomega.Eventually(func() map[string]string { 1365 gomega.Expect(k8sClient.Get(ctx, lookupKey, createdJob)).Should(gomega.Succeed()) 1366 return createdJob.Spec.Template.Spec.NodeSelector 1367 }, util.Timeout, util.Interval).Should(gomega.Equal(originalNodeSelector)) 1368 }) 1369 }) 1370 }) 1371 1372 ginkgo.When("The job is deleted while admitted", func() { 1373 ginkgo.It("Its workload finalizer should be removed", func() { 1374 localQueue := testing.MakeLocalQueue("local-queue", ns.Name).ClusterQueue(prodClusterQ.Name).Obj() 1375 job := testingjob.MakeJob(jobName, ns.Name).Queue(localQueue.Name).Request(corev1.ResourceCPU, "2").Suspend(false).Obj() 1376 lookupKey := types.NamespacedName{Name: job.Name, Namespace: job.Namespace} 1377 createdJob := &batchv1.Job{} 1378 1379 ginkgo.By("create a job", func() { 1380 gomega.Expect(k8sClient.Create(ctx, job)).Should(gomega.Succeed()) 1381 }) 1382 1383 ginkgo.By("job should be suspend", func() { 1384 gomega.Eventually(func() *bool { 1385 gomega.Expect(k8sClient.Get(ctx, lookupKey, createdJob)).Should(gomega.Succeed()) 1386 return createdJob.Spec.Suspend 1387 }, util.Timeout, util.Interval).Should(gomega.Equal(ptr.To[bool](true))) 1388 }) 1389 1390 ginkgo.By("create a localQueue", func() { 1391 gomega.Expect(k8sClient.Create(ctx, localQueue)).Should(gomega.Succeed()) 1392 }) 1393 1394 ginkgo.By("job should be unsuspended", func() { 1395 gomega.Eventually(func() *bool { 1396 gomega.Expect(k8sClient.Get(ctx, lookupKey, createdJob)).Should(gomega.Succeed()) 1397 return createdJob.Spec.Suspend 1398 }, util.Timeout, util.Interval).Should(gomega.Equal(ptr.To[bool](false))) 1399 }) 1400 1401 wlKey := types.NamespacedName{Name: workloadjob.GetWorkloadNameForJob(job.Name), Namespace: job.Namespace} 1402 ginkgo.By("checking the finalizer is set", func() { 1403 gomega.Eventually(func() []string { 1404 wl := &kueue.Workload{} 1405 gomega.Expect(k8sClient.Get(ctx, wlKey, wl)).Should(gomega.Succeed()) 1406 return wl.Finalizers 1407 }, util.Timeout, util.Interval).Should(gomega.ContainElement(kueue.ResourceInUseFinalizerName)) 1408 }) 1409 1410 ginkgo.By("deleting the job", func() { 1411 gomega.Expect(k8sClient.Delete(ctx, job)).Should(gomega.Succeed()) 1412 }) 1413 1414 ginkgo.By("checking that its workloads finalizer is removed", func() { 1415 gomega.Eventually(func() []string { 1416 wl := &kueue.Workload{} 1417 err := k8sClient.Get(ctx, wlKey, wl) 1418 if err != nil { 1419 if apierrors.IsNotFound(err) { 1420 return []string{} 1421 } 1422 return []string{kueue.ResourceInUseFinalizerName} 1423 } 1424 return wl.Finalizers 1425 }, util.Timeout, util.Interval).ShouldNot(gomega.ContainElement(kueue.ResourceInUseFinalizerName)) 1426 }) 1427 }) 1428 }) 1429 1430 ginkgo.It("Should allow reclaim of resources that are no longer needed", func() { 1431 ginkgo.By("creating localQueue", func() { 1432 prodLocalQ = testing.MakeLocalQueue("prod-queue", ns.Name).ClusterQueue(prodClusterQ.Name).Obj() 1433 gomega.Expect(k8sClient.Create(ctx, prodLocalQ)).Should(gomega.Succeed()) 1434 }) 1435 1436 job1 := testingjob.MakeJob("job1", ns.Name).Queue(prodLocalQ.Name). 1437 Request(corev1.ResourceCPU, "2"). 1438 Completions(5). 1439 Parallelism(2). 1440 Obj() 1441 lookupKey1 := types.NamespacedName{Name: job1.Name, Namespace: job1.Namespace} 1442 1443 ginkgo.By("checking the first job starts", func() { 1444 gomega.Expect(k8sClient.Create(ctx, job1)).Should(gomega.Succeed()) 1445 createdJob1 := &batchv1.Job{} 1446 gomega.Eventually(func() *bool { 1447 gomega.Expect(k8sClient.Get(ctx, lookupKey1, createdJob1)).Should(gomega.Succeed()) 1448 return createdJob1.Spec.Suspend 1449 }, util.Timeout, util.Interval).Should(gomega.Equal(ptr.To(false))) 1450 gomega.Expect(createdJob1.Spec.Template.Spec.NodeSelector[instanceKey]).Should(gomega.Equal(onDemandFlavor.Name)) 1451 util.ExpectPendingWorkloadsMetric(prodClusterQ, 0, 0) 1452 util.ExpectReservingActiveWorkloadsMetric(prodClusterQ, 1) 1453 }) 1454 1455 job2 := testingjob.MakeJob("job2", ns.Name).Queue(prodLocalQ.Name).Request(corev1.ResourceCPU, "3").Obj() 1456 lookupKey2 := types.NamespacedName{Name: job2.Name, Namespace: job2.Namespace} 1457 1458 ginkgo.By("checking a second no-fit job does not start", func() { 1459 gomega.Expect(k8sClient.Create(ctx, job2)).Should(gomega.Succeed()) 1460 createdJob2 := &batchv1.Job{} 1461 gomega.Consistently(func() *bool { 1462 gomega.Expect(k8sClient.Get(ctx, lookupKey2, createdJob2)).Should(gomega.Succeed()) 1463 return createdJob2.Spec.Suspend 1464 }, util.ConsistentDuration, util.Interval).Should(gomega.Equal(ptr.To(true))) 1465 util.ExpectPendingWorkloadsMetric(prodClusterQ, 0, 1) 1466 util.ExpectReservingActiveWorkloadsMetric(prodClusterQ, 1) 1467 }) 1468 1469 ginkgo.By("checking the second job starts when the first has less then to completions to go", func() { 1470 createdJob1 := &batchv1.Job{} 1471 gomega.Expect(k8sClient.Get(ctx, lookupKey1, createdJob1)).Should(gomega.Succeed()) 1472 createdJob1.Status.Succeeded = 4 1473 gomega.Expect(k8sClient.Status().Update(ctx, createdJob1)).Should(gomega.Succeed()) 1474 1475 wl := &kueue.Workload{} 1476 wlKey := types.NamespacedName{Name: workloadjob.GetWorkloadNameForJob(job1.Name), Namespace: job1.Namespace} 1477 gomega.Eventually(func() []kueue.ReclaimablePod { 1478 gomega.Expect(k8sClient.Get(ctx, wlKey, wl)).Should(gomega.Succeed()) 1479 return wl.Status.ReclaimablePods 1480 1481 }, util.Timeout, util.Interval).Should(gomega.BeComparableTo([]kueue.ReclaimablePod{{ 1482 Name: "main", 1483 Count: 1, 1484 }})) 1485 1486 createdJob2 := &batchv1.Job{} 1487 gomega.Eventually(func() *bool { 1488 gomega.Expect(k8sClient.Get(ctx, lookupKey2, createdJob2)).Should(gomega.Succeed()) 1489 return createdJob2.Spec.Suspend 1490 }, util.Timeout, util.Interval).Should(gomega.Equal(ptr.To(false))) 1491 gomega.Expect(createdJob2.Spec.Template.Spec.NodeSelector[instanceKey]).Should(gomega.Equal(onDemandFlavor.Name)) 1492 1493 util.ExpectPendingWorkloadsMetric(prodClusterQ, 0, 0) 1494 util.ExpectReservingActiveWorkloadsMetric(prodClusterQ, 2) 1495 }) 1496 }) 1497 1498 ginkgo.It("Should readmit preempted Job with priorityClass in alternative flavor", func() { 1499 devLocalQ = testing.MakeLocalQueue("dev-queue", ns.Name).ClusterQueue(devClusterQ.Name).Obj() 1500 gomega.Expect(k8sClient.Create(ctx, devLocalQ)).Should(gomega.Succeed()) 1501 1502 highPriorityClass := testing.MakePriorityClass("high").PriorityValue(100).Obj() 1503 gomega.Expect(k8sClient.Create(ctx, highPriorityClass)) 1504 ginkgo.DeferCleanup(func() { 1505 gomega.Expect(k8sClient.Delete(ctx, highPriorityClass)).To(gomega.Succeed()) 1506 }) 1507 1508 lowJobKey := types.NamespacedName{Name: "low", Namespace: ns.Name} 1509 ginkgo.By("Low priority job is unsuspended and has nodeSelector", func() { 1510 job := testingjob.MakeJob("low", ns.Name). 1511 Queue(devLocalQ.Name). 1512 Parallelism(5). 1513 Request(corev1.ResourceCPU, "1"). 1514 Obj() 1515 gomega.Expect(k8sClient.Create(ctx, job)).Should(gomega.Succeed()) 1516 1517 expectJobUnsuspendedWithNodeSelectors(lowJobKey, map[string]string{ 1518 instanceKey: "spot-untainted", 1519 }) 1520 }) 1521 1522 ginkgo.By("High priority job preemtps low priority job", func() { 1523 job := testingjob.MakeJob("high", ns.Name). 1524 Queue(devLocalQ.Name). 1525 PriorityClass("high"). 1526 Parallelism(5). 1527 Request(corev1.ResourceCPU, "1"). 1528 NodeSelector(instanceKey, "spot-untainted"). // target the same flavor to cause preemption 1529 Obj() 1530 gomega.Expect(k8sClient.Create(ctx, job)).Should(gomega.Succeed()) 1531 1532 highJobKey := types.NamespacedName{Name: "high", Namespace: ns.Name} 1533 expectJobUnsuspendedWithNodeSelectors(highJobKey, map[string]string{ 1534 instanceKey: "spot-untainted", 1535 }) 1536 }) 1537 1538 ginkgo.By("Preempted job should be admitted on second flavor", func() { 1539 expectJobUnsuspendedWithNodeSelectors(lowJobKey, map[string]string{ 1540 instanceKey: "on-demand", 1541 }) 1542 }) 1543 }) 1544 1545 ginkgo.It("Should readmit preempted Job with workloadPriorityClass in alternative flavor", func() { 1546 devLocalQ = testing.MakeLocalQueue("dev-queue", ns.Name).ClusterQueue(devClusterQ.Name).Obj() 1547 gomega.Expect(k8sClient.Create(ctx, devLocalQ)).Should(gomega.Succeed()) 1548 1549 highWorkloadPriorityClass := testing.MakeWorkloadPriorityClass("high-workload").PriorityValue(100).Obj() 1550 gomega.Expect(k8sClient.Create(ctx, highWorkloadPriorityClass)) 1551 ginkgo.DeferCleanup(func() { 1552 gomega.Expect(k8sClient.Delete(ctx, highWorkloadPriorityClass)).To(gomega.Succeed()) 1553 }) 1554 1555 lowJobKey := types.NamespacedName{Name: "low", Namespace: ns.Name} 1556 ginkgo.By("Low priority job is unsuspended and has nodeSelector", func() { 1557 job := testingjob.MakeJob("low", ns.Name). 1558 Queue(devLocalQ.Name). 1559 Parallelism(5). 1560 Request(corev1.ResourceCPU, "1"). 1561 Obj() 1562 gomega.Expect(k8sClient.Create(ctx, job)).Should(gomega.Succeed()) 1563 1564 expectJobUnsuspendedWithNodeSelectors(lowJobKey, map[string]string{ 1565 instanceKey: "spot-untainted", 1566 }) 1567 }) 1568 1569 ginkgo.By("High priority job preemtps low priority job", func() { 1570 job := testingjob.MakeJob("high", ns.Name). 1571 Queue(devLocalQ.Name). 1572 WorkloadPriorityClass("high-workload"). 1573 Parallelism(5). 1574 Request(corev1.ResourceCPU, "1"). 1575 NodeSelector(instanceKey, "spot-untainted"). // target the same flavor to cause preemption 1576 Obj() 1577 gomega.Expect(k8sClient.Create(ctx, job)).Should(gomega.Succeed()) 1578 1579 highJobKey := types.NamespacedName{Name: "high", Namespace: ns.Name} 1580 expectJobUnsuspendedWithNodeSelectors(highJobKey, map[string]string{ 1581 instanceKey: "spot-untainted", 1582 }) 1583 }) 1584 1585 ginkgo.By("Preempted job should be admitted on second flavor", func() { 1586 expectJobUnsuspendedWithNodeSelectors(lowJobKey, map[string]string{ 1587 instanceKey: "on-demand", 1588 }) 1589 }) 1590 }) 1591 1592 ginkgo.It("Should schedule jobs with partial admission", func() { 1593 prodLocalQ = testing.MakeLocalQueue("prod-queue", ns.Name).ClusterQueue(prodClusterQ.Name).Obj() 1594 job1 := testingjob.MakeJob("job1", ns.Name). 1595 Queue(prodLocalQ.Name). 1596 Parallelism(5). 1597 Completions(6). 1598 Request(corev1.ResourceCPU, "2"). 1599 Obj() 1600 jobKey := types.NamespacedName{Name: job1.Name, Namespace: job1.Namespace} 1601 wlKey := types.NamespacedName{Name: workloadjob.GetWorkloadNameForJob(job1.Name), Namespace: job1.Namespace} 1602 1603 ginkgo.By("creating localQueues") 1604 gomega.Expect(k8sClient.Create(ctx, prodLocalQ)).Should(gomega.Succeed()) 1605 1606 ginkgo.By("creating the job") 1607 gomega.Expect(k8sClient.Create(ctx, job1)).Should(gomega.Succeed()) 1608 1609 createdJob := &batchv1.Job{} 1610 ginkgo.By("the job should stay suspended", func() { 1611 gomega.Consistently(func() *bool { 1612 gomega.Expect(k8sClient.Get(ctx, jobKey, createdJob)).Should(gomega.Succeed()) 1613 return createdJob.Spec.Suspend 1614 }, util.ConsistentDuration, util.Interval).Should(gomega.Equal(ptr.To(true))) 1615 }) 1616 1617 ginkgo.By("enable partial admission", func() { 1618 gomega.Expect(k8sClient.Get(ctx, jobKey, createdJob)).Should(gomega.Succeed()) 1619 if createdJob.Annotations == nil { 1620 createdJob.Annotations = map[string]string{ 1621 workloadjob.JobMinParallelismAnnotation: "1", 1622 } 1623 } else { 1624 createdJob.Annotations[workloadjob.JobMinParallelismAnnotation] = "1" 1625 } 1626 1627 gomega.Expect(k8sClient.Update(ctx, createdJob)).Should(gomega.Succeed()) 1628 }) 1629 1630 wl := &kueue.Workload{} 1631 ginkgo.By("the job should be unsuspended with a lower parallelism", func() { 1632 gomega.Eventually(func() *bool { 1633 gomega.Expect(k8sClient.Get(ctx, jobKey, createdJob)).Should(gomega.Succeed()) 1634 return createdJob.Spec.Suspend 1635 }, util.Timeout, util.Interval).Should(gomega.Equal(ptr.To(false))) 1636 gomega.Expect(*createdJob.Spec.Parallelism).To(gomega.BeEquivalentTo(2)) 1637 1638 gomega.Expect(k8sClient.Get(ctx, wlKey, wl)).To(gomega.Succeed()) 1639 gomega.Expect(wl.Spec.PodSets[0].MinCount).ToNot(gomega.BeNil()) 1640 gomega.Expect(*wl.Spec.PodSets[0].MinCount).To(gomega.BeEquivalentTo(1)) 1641 }) 1642 1643 ginkgo.By("delete the localQueue to prevent readmission", func() { 1644 gomega.Expect(util.DeleteLocalQueue(ctx, k8sClient, prodLocalQ)).Should(gomega.Succeed()) 1645 }) 1646 1647 ginkgo.By("clear the workloads admission to stop the job", func() { 1648 gomega.Expect(util.SetQuotaReservation(ctx, k8sClient, wl, nil)).To(gomega.Succeed()) 1649 util.SyncAdmittedConditionForWorkloads(ctx, k8sClient, wl) 1650 }) 1651 1652 ginkgo.By("job should be suspended and its parallelism restored", func() { 1653 gomega.Eventually(func(g gomega.Gomega) { 1654 g.Expect(k8sClient.Get(ctx, jobKey, createdJob)).Should(gomega.Succeed()) 1655 g.Expect(ptr.Deref(createdJob.Spec.Suspend, false)).To(gomega.BeTrue(), "the job should be suspended") 1656 g.Expect(ptr.Deref(createdJob.Spec.Parallelism, 0)).To(gomega.BeEquivalentTo(5)) 1657 }, util.Timeout, util.Interval).Should(gomega.Succeed()) 1658 }) 1659 }) 1660 1661 ginkgo.It("Should set the flavor's node selectors if the job is admitted by pods count only", func() { 1662 localQ := testing.MakeLocalQueue("dev-queue", ns.Name).ClusterQueue(podsCountClusterQ.Name).Obj() 1663 gomega.Expect(k8sClient.Create(ctx, localQ)).Should(gomega.Succeed()) 1664 ginkgo.By("Creating a job with no requests, will set the resource flavors selectors when admitted ", func() { 1665 job := testingjob.MakeJob("job", ns.Name). 1666 Queue(localQ.Name). 1667 Parallelism(2). 1668 Obj() 1669 gomega.Expect(k8sClient.Create(ctx, job)).Should(gomega.Succeed()) 1670 expectJobUnsuspendedWithNodeSelectors(client.ObjectKeyFromObject(job), map[string]string{ 1671 instanceKey: "on-demand", 1672 }) 1673 }) 1674 }) 1675 ginkgo.It("Should schedule updated job and update the workload", func() { 1676 localQueue := testing.MakeLocalQueue("local-queue", ns.Name).ClusterQueue(prodClusterQ.Name).Obj() 1677 ginkgo.By("create a localQueue", func() { 1678 gomega.Expect(k8sClient.Create(ctx, localQueue)).Should(gomega.Succeed()) 1679 }) 1680 1681 job := testingjob.MakeJob(jobName, ns.Name).Queue(localQueue.Name).Request(corev1.ResourceCPU, "3").Parallelism(2).Suspend(false).Obj() 1682 lookupKey := types.NamespacedName{Name: job.Name, Namespace: job.Namespace} 1683 createdJob := &batchv1.Job{} 1684 1685 ginkgo.By("creating the job that doesn't fit", func() { 1686 gomega.Expect(k8sClient.Create(ctx, job)).Should(gomega.Succeed()) 1687 }) 1688 1689 ginkgo.By("job should be suspend", func() { 1690 gomega.Eventually(func() *bool { 1691 gomega.Expect(k8sClient.Get(ctx, lookupKey, createdJob)).Should(gomega.Succeed()) 1692 return createdJob.Spec.Suspend 1693 }, util.Timeout, util.Interval).Should(gomega.Equal(ptr.To(true))) 1694 }) 1695 1696 wlLookupKey := types.NamespacedName{Name: workloadjob.GetWorkloadNameForJob(jobName), Namespace: ns.Name} 1697 createdWorkload := util.AwaitAndVerifyCreatedWorkload(ctx, k8sClient, wlLookupKey, createdJob) 1698 createdTime := createdWorkload.CreationTimestamp 1699 1700 createdJob.Spec.Parallelism = ptr.To[int32](1) 1701 1702 ginkgo.By("updating the job", func() { 1703 gomega.Expect(k8sClient.Update(ctx, createdJob)).Should(gomega.Succeed()) 1704 }) 1705 1706 createdWorkload = util.AwaitAndVerifyCreatedWorkload(ctx, k8sClient, wlLookupKey, createdJob) 1707 1708 ginkgo.By("updated job should be unsuspended", func() { 1709 gomega.Eventually(func() *bool { 1710 gomega.Expect(k8sClient.Get(ctx, lookupKey, createdJob)).Should(gomega.Succeed()) 1711 return createdJob.Spec.Suspend 1712 }, util.Timeout, util.Interval).Should(gomega.Equal(ptr.To(false))) 1713 }) 1714 1715 ginkgo.By("updated workload should have the same created timestamp", func() { 1716 gomega.Expect(createdWorkload.CreationTimestamp).Should(gomega.Equal(createdTime)) 1717 }) 1718 }) 1719 1720 ginkgo.When("Suspend a running Job without requeuing through Workload's spec.active field", func() { 1721 ginkgo.It("Should not readmit a job to the queue after Active is changed to false", func() { 1722 ginkgo.By("creating localQueue") 1723 localQueue := testing.MakeLocalQueue("queue", ns.Name).ClusterQueue(prodClusterQ.Name).Obj() 1724 gomega.Expect(k8sClient.Create(ctx, localQueue)).Should(gomega.Succeed()) 1725 1726 sampleJob := testingjob.MakeJob("job1", ns.Name).Queue(localQueue.Name).Request(corev1.ResourceCPU, "2").Obj() 1727 lookupKey1 := types.NamespacedName{Name: sampleJob.Name, Namespace: sampleJob.Namespace} 1728 wll := &kueue.Workload{} 1729 1730 ginkgo.By("checking the job starts") 1731 gomega.Expect(k8sClient.Create(ctx, sampleJob)).Should(gomega.Succeed()) 1732 1733 createdJob := &batchv1.Job{} 1734 wlKey := types.NamespacedName{Name: workloadjob.GetWorkloadNameForJob(sampleJob.Name), Namespace: sampleJob.Namespace} 1735 1736 gomega.Eventually(func(g gomega.Gomega) { 1737 ginkgo.By("checking the job's suspend field is false") 1738 g.Expect(k8sClient.Get(ctx, lookupKey1, sampleJob)).Should(gomega.Succeed()) 1739 g.Expect(sampleJob.Spec.Suspend).To(gomega.Equal(ptr.To(false))) 1740 ginkgo.By("checking the workload is admitted") 1741 g.Expect(k8sClient.Get(ctx, wlKey, wll)).Should(gomega.Succeed()) 1742 util.ExpectWorkloadsToBeAdmitted(ctx, k8sClient, wll) 1743 }, util.Timeout, util.Interval).Should(gomega.Succeed()) 1744 1745 ginkgo.By("Change the Active field to suspend the job and check the job remains suspended and the workload unadmitted") 1746 // Changing Active to false 1747 wll.Spec.Active = ptr.To(false) 1748 gomega.Expect(k8sClient.Update(ctx, wll)).Should(gomega.Succeed()) 1749 1750 ginkgo.By("checking a second job starts after first job is suspended") 1751 sampleJob2 := testingjob.MakeJob("job2", ns.Name).Queue(localQueue.Name).Request(corev1.ResourceCPU, "2").Obj() 1752 1753 lookupKey2 := types.NamespacedName{Name: sampleJob2.Name, Namespace: sampleJob2.Namespace} 1754 wll2 := &kueue.Workload{} 1755 1756 gomega.Expect(k8sClient.Create(ctx, sampleJob2)).Should(gomega.Succeed()) 1757 wlKey2 := types.NamespacedName{Name: workloadjob.GetWorkloadNameForJob(sampleJob2.Name), Namespace: sampleJob2.Namespace} 1758 1759 gomega.Eventually(func(g gomega.Gomega) { 1760 g.Expect(k8sClient.Get(ctx, lookupKey2, sampleJob2)).Should(gomega.Succeed()) 1761 g.Expect(sampleJob2.Spec.Suspend).To(gomega.Equal(ptr.To(false))) 1762 g.Expect(k8sClient.Get(ctx, wlKey2, wll2)).Should(gomega.Succeed()) 1763 util.ExpectWorkloadsToBeAdmitted(ctx, k8sClient, wll2) 1764 }, util.Timeout, util.Interval).Should(gomega.Succeed()) 1765 1766 // Checking job stays suspended 1767 ginkgo.By("checking job is suspended") 1768 gomega.Eventually(func() *bool { 1769 gomega.Expect(k8sClient.Get(ctx, types.NamespacedName{Name: sampleJob.Name, Namespace: sampleJob.Namespace}, createdJob)). 1770 Should(gomega.Succeed()) 1771 return createdJob.Spec.Suspend 1772 }, util.Timeout, util.Interval).Should(gomega.Equal(ptr.To(true))) 1773 1774 ginkgo.By("checking the first job and workload stay suspended and unadmitted") 1775 gomega.Consistently(func(g gomega.Gomega) { 1776 // Job should stay pending 1777 g.Expect(k8sClient.Get(ctx, types.NamespacedName{Name: sampleJob.Name, Namespace: sampleJob.Namespace}, createdJob)). 1778 Should(gomega.Succeed()) 1779 g.Expect(createdJob.Spec.Suspend).To(gomega.Equal(ptr.To(true))) 1780 // Workload should get unadmitted 1781 g.Expect(k8sClient.Get(ctx, wlKey, wll)).Should(gomega.Succeed()) 1782 util.ExpectWorkloadsToBePending(ctx, k8sClient, wll) 1783 // Workload should stay pending 1784 g.Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(wll), wll)).Should(gomega.Succeed()) 1785 // Should have Evicted condition 1786 isEvicting := apimeta.IsStatusConditionTrue(wll.Status.Conditions, kueue.WorkloadEvicted) 1787 gomega.Expect(isEvicting).Should(gomega.BeTrue()) 1788 }, util.ConsistentDuration, util.Interval).Should(gomega.Succeed()) 1789 1790 ginkgo.By("checking the first job becomes unsuspended after we update the Active field back to true") 1791 gomega.Eventually(func() error { 1792 gomega.Expect(k8sClient.Get(ctx, wlKey, wll)).Should(gomega.Succeed()) 1793 wll.Spec.Active = ptr.To(true) 1794 return k8sClient.Update(ctx, wll) 1795 }, util.Timeout, util.Interval).Should(gomega.Succeed()) 1796 1797 gomega.Eventually(func(g gomega.Gomega) { 1798 g.Expect(k8sClient.Get(ctx, types.NamespacedName{Name: sampleJob.Name, Namespace: sampleJob.Namespace}, createdJob)). 1799 Should(gomega.Succeed()) 1800 g.Expect(sampleJob.Spec.Suspend).To(gomega.Equal(ptr.To(false))) 1801 }, util.Timeout, util.Interval).Should(gomega.Succeed()) 1802 1803 }) 1804 }) 1805 }) 1806 1807 func expectJobUnsuspendedWithNodeSelectors(key types.NamespacedName, ns map[string]string) { 1808 job := &batchv1.Job{} 1809 gomega.EventuallyWithOffset(1, func() []any { 1810 gomega.Expect(k8sClient.Get(ctx, key, job)).To(gomega.Succeed()) 1811 return []any{*job.Spec.Suspend, job.Spec.Template.Spec.NodeSelector} 1812 }, util.Timeout, util.Interval).Should(gomega.Equal([]any{false, ns})) 1813 }