sigs.k8s.io/kueue@v0.6.2/test/integration/controller/jobs/mpijob/mpijob_controller_test.go (about)

     1  /*
     2  Copyright 2023 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package mpijob
    18  
    19  import (
    20  	"fmt"
    21  
    22  	"github.com/google/go-cmp/cmp/cmpopts"
    23  	kubeflow "github.com/kubeflow/mpi-operator/pkg/apis/kubeflow/v2beta1"
    24  	"github.com/onsi/ginkgo/v2"
    25  	"github.com/onsi/gomega"
    26  	corev1 "k8s.io/api/core/v1"
    27  	apierrors "k8s.io/apimachinery/pkg/api/errors"
    28  	apimeta "k8s.io/apimachinery/pkg/api/meta"
    29  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    30  	"k8s.io/apimachinery/pkg/types"
    31  	"k8s.io/client-go/kubernetes/scheme"
    32  	"k8s.io/utils/ptr"
    33  	ctrl "sigs.k8s.io/controller-runtime"
    34  	"sigs.k8s.io/controller-runtime/pkg/client"
    35  
    36  	configapi "sigs.k8s.io/kueue/apis/config/v1beta1"
    37  	kueue "sigs.k8s.io/kueue/apis/kueue/v1beta1"
    38  	"sigs.k8s.io/kueue/pkg/controller/constants"
    39  	"sigs.k8s.io/kueue/pkg/controller/jobframework"
    40  	workloadmpijob "sigs.k8s.io/kueue/pkg/controller/jobs/mpijob"
    41  	"sigs.k8s.io/kueue/pkg/util/testing"
    42  	testingjob "sigs.k8s.io/kueue/pkg/util/testingjobs/job"
    43  	testingmpijob "sigs.k8s.io/kueue/pkg/util/testingjobs/mpijob"
    44  	"sigs.k8s.io/kueue/pkg/workload"
    45  	"sigs.k8s.io/kueue/test/integration/framework"
    46  	"sigs.k8s.io/kueue/test/util"
    47  )
    48  
    49  const (
    50  	jobName           = "test-job"
    51  	instanceKey       = "cloud.provider.com/instance"
    52  	priorityClassName = "test-priority-class"
    53  	priorityValue     = 10
    54  )
    55  
    56  var (
    57  	ignoreConditionTimestamps = cmpopts.IgnoreFields(metav1.Condition{}, "LastTransitionTime")
    58  )
    59  
    60  // +kubebuilder:docs-gen:collapse=Imports
    61  
    62  var _ = ginkgo.Describe("Job controller", ginkgo.Ordered, ginkgo.ContinueOnFailure, func() {
    63  
    64  	ginkgo.BeforeAll(func() {
    65  		fwk = &framework.Framework{
    66  			CRDPath:     crdPath,
    67  			DepCRDPaths: []string{mpiCrdPath},
    68  		}
    69  
    70  		cfg = fwk.Init()
    71  		ctx, k8sClient = fwk.RunManager(cfg, managerSetup(false, jobframework.WithManageJobsWithoutQueueName(true)))
    72  	})
    73  	ginkgo.AfterAll(func() {
    74  		fwk.Teardown()
    75  	})
    76  
    77  	var (
    78  		ns          *corev1.Namespace
    79  		wlLookupKey types.NamespacedName
    80  	)
    81  	ginkgo.BeforeEach(func() {
    82  		ns = &corev1.Namespace{
    83  			ObjectMeta: metav1.ObjectMeta{
    84  				GenerateName: "core-",
    85  			},
    86  		}
    87  		gomega.Expect(k8sClient.Create(ctx, ns)).To(gomega.Succeed())
    88  		wlLookupKey = types.NamespacedName{Name: workloadmpijob.GetWorkloadNameForMPIJob(jobName), Namespace: ns.Name}
    89  	})
    90  	ginkgo.AfterEach(func() {
    91  		gomega.Expect(util.DeleteNamespace(ctx, k8sClient, ns)).To(gomega.Succeed())
    92  	})
    93  
    94  	ginkgo.It("Should reconcile MPIJobs", func() {
    95  		ginkgo.By("checking the job gets suspended when created unsuspended")
    96  		priorityClass := testing.MakePriorityClass(priorityClassName).
    97  			PriorityValue(int32(priorityValue)).Obj()
    98  		gomega.Expect(k8sClient.Create(ctx, priorityClass)).Should(gomega.Succeed())
    99  
   100  		job := testingmpijob.MakeMPIJob(jobName, ns.Name).PriorityClass(priorityClassName).Obj()
   101  		err := k8sClient.Create(ctx, job)
   102  		gomega.Expect(err).To(gomega.Succeed())
   103  		createdJob := &kubeflow.MPIJob{}
   104  
   105  		gomega.Eventually(func() bool {
   106  			if err := k8sClient.Get(ctx, types.NamespacedName{Name: jobName, Namespace: ns.Name}, createdJob); err != nil {
   107  				return false
   108  			}
   109  			return createdJob.Spec.RunPolicy.Suspend != nil && *createdJob.Spec.RunPolicy.Suspend
   110  		}, util.Timeout, util.Interval).Should(gomega.BeTrue())
   111  
   112  		ginkgo.By("checking the workload is created without queue assigned")
   113  		createdWorkload := &kueue.Workload{}
   114  		gomega.Eventually(func() error {
   115  			return k8sClient.Get(ctx, wlLookupKey, createdWorkload)
   116  		}, util.Timeout, util.Interval).Should(gomega.Succeed())
   117  		gomega.Expect(createdWorkload.Spec.QueueName).Should(gomega.Equal(""), "The Workload shouldn't have .spec.queueName set")
   118  		gomega.Expect(metav1.IsControlledBy(createdWorkload, createdJob)).To(gomega.BeTrue(), "The Workload should be owned by the Job")
   119  
   120  		ginkgo.By("checking the workload is created with priority and priorityName")
   121  		gomega.Expect(createdWorkload.Spec.PriorityClassName).Should(gomega.Equal(priorityClassName))
   122  		gomega.Expect(*createdWorkload.Spec.Priority).Should(gomega.Equal(int32(priorityValue)))
   123  
   124  		ginkgo.By("checking the workload is updated with queue name when the job does")
   125  		jobQueueName := "test-queue"
   126  		createdJob.Annotations = map[string]string{constants.QueueAnnotation: jobQueueName}
   127  		gomega.Expect(k8sClient.Update(ctx, createdJob)).Should(gomega.Succeed())
   128  		gomega.Eventually(func() bool {
   129  			if err := k8sClient.Get(ctx, wlLookupKey, createdWorkload); err != nil {
   130  				return false
   131  			}
   132  			return createdWorkload.Spec.QueueName == jobQueueName
   133  		}, util.Timeout, util.Interval).Should(gomega.BeTrue())
   134  
   135  		ginkgo.By("checking a second non-matching workload is deleted")
   136  		secondWl := &kueue.Workload{
   137  			ObjectMeta: metav1.ObjectMeta{
   138  				Name:      workloadmpijob.GetWorkloadNameForMPIJob("second-workload"),
   139  				Namespace: createdWorkload.Namespace,
   140  			},
   141  			Spec: *createdWorkload.Spec.DeepCopy(),
   142  		}
   143  		gomega.Expect(ctrl.SetControllerReference(createdJob, secondWl, scheme.Scheme)).Should(gomega.Succeed())
   144  		secondWl.Spec.PodSets[0].Count += 1
   145  
   146  		gomega.Expect(k8sClient.Create(ctx, secondWl)).Should(gomega.Succeed())
   147  		gomega.Eventually(func() error {
   148  			wl := &kueue.Workload{}
   149  			key := types.NamespacedName{Name: secondWl.Name, Namespace: secondWl.Namespace}
   150  			return k8sClient.Get(ctx, key, wl)
   151  		}, util.Timeout, util.Interval).Should(testing.BeNotFoundError())
   152  		// check the original wl is still there
   153  		gomega.Eventually(func() error {
   154  			return k8sClient.Get(ctx, wlLookupKey, createdWorkload)
   155  		}, util.Timeout, util.Interval).Should(gomega.Succeed())
   156  
   157  		ginkgo.By("checking the job is unsuspended when workload is assigned")
   158  		onDemandFlavor := testing.MakeResourceFlavor("on-demand").Label(instanceKey, "on-demand").Obj()
   159  		gomega.Expect(k8sClient.Create(ctx, onDemandFlavor)).Should(gomega.Succeed())
   160  		spotFlavor := testing.MakeResourceFlavor("spot").Label(instanceKey, "spot").Obj()
   161  		gomega.Expect(k8sClient.Create(ctx, spotFlavor)).Should(gomega.Succeed())
   162  		clusterQueue := testing.MakeClusterQueue("cluster-queue").
   163  			ResourceGroup(
   164  				*testing.MakeFlavorQuotas("on-demand").Resource(corev1.ResourceCPU, "5").Obj(),
   165  				*testing.MakeFlavorQuotas("spot").Resource(corev1.ResourceCPU, "5").Obj(),
   166  			).Obj()
   167  		admission := testing.MakeAdmission(clusterQueue.Name).
   168  			PodSets(
   169  				kueue.PodSetAssignment{
   170  					Name: "Launcher",
   171  					Flavors: map[corev1.ResourceName]kueue.ResourceFlavorReference{
   172  						corev1.ResourceCPU: "on-demand",
   173  					},
   174  					Count: ptr.To(createdWorkload.Spec.PodSets[0].Count),
   175  				},
   176  				kueue.PodSetAssignment{
   177  					Name: "Worker",
   178  					Flavors: map[corev1.ResourceName]kueue.ResourceFlavorReference{
   179  						corev1.ResourceCPU: "spot",
   180  					},
   181  					Count: ptr.To(createdWorkload.Spec.PodSets[1].Count),
   182  				},
   183  			).
   184  			Obj()
   185  		gomega.Expect(util.SetQuotaReservation(ctx, k8sClient, createdWorkload, admission)).Should(gomega.Succeed())
   186  		util.SyncAdmittedConditionForWorkloads(ctx, k8sClient, createdWorkload)
   187  		lookupKey := types.NamespacedName{Name: jobName, Namespace: ns.Name}
   188  		gomega.Eventually(func() bool {
   189  			if err := k8sClient.Get(ctx, lookupKey, createdJob); err != nil {
   190  				return false
   191  			}
   192  			return !*createdJob.Spec.RunPolicy.Suspend
   193  		}, util.Timeout, util.Interval).Should(gomega.BeTrue())
   194  		gomega.Eventually(func() bool {
   195  			ok, _ := testing.CheckLatestEvent(ctx, k8sClient, "Started", corev1.EventTypeNormal, fmt.Sprintf("Admitted by clusterQueue %v", clusterQueue.Name))
   196  			return ok
   197  		}, util.Timeout, util.Interval).Should(gomega.BeTrue())
   198  		gomega.Expect(len(createdJob.Spec.MPIReplicaSpecs[kubeflow.MPIReplicaTypeLauncher].Template.Spec.NodeSelector)).Should(gomega.Equal(1))
   199  		gomega.Expect(createdJob.Spec.MPIReplicaSpecs[kubeflow.MPIReplicaTypeLauncher].Template.Spec.NodeSelector[instanceKey]).Should(gomega.Equal(onDemandFlavor.Name))
   200  		gomega.Expect(len(createdJob.Spec.MPIReplicaSpecs[kubeflow.MPIReplicaTypeWorker].Template.Spec.NodeSelector)).Should(gomega.Equal(1))
   201  		gomega.Expect(createdJob.Spec.MPIReplicaSpecs[kubeflow.MPIReplicaTypeWorker].Template.Spec.NodeSelector[instanceKey]).Should(gomega.Equal(spotFlavor.Name))
   202  		gomega.Eventually(func() bool {
   203  			if err := k8sClient.Get(ctx, wlLookupKey, createdWorkload); err != nil {
   204  				return false
   205  			}
   206  			return len(createdWorkload.Status.Conditions) == 2
   207  		}, util.Timeout, util.Interval).Should(gomega.BeTrue())
   208  
   209  		ginkgo.By("checking the job gets suspended when parallelism changes and the added node selectors are removed")
   210  		parallelism := ptr.Deref(job.Spec.MPIReplicaSpecs[kubeflow.MPIReplicaTypeWorker].Replicas, 1)
   211  		newParallelism := int32(parallelism + 1)
   212  		createdJob.Spec.MPIReplicaSpecs[kubeflow.MPIReplicaTypeWorker].Replicas = &newParallelism
   213  		gomega.Expect(k8sClient.Update(ctx, createdJob)).Should(gomega.Succeed())
   214  		gomega.Eventually(func() bool {
   215  			if err := k8sClient.Get(ctx, lookupKey, createdJob); err != nil {
   216  				return false
   217  			}
   218  			return createdJob.Spec.RunPolicy.Suspend != nil && *createdJob.Spec.RunPolicy.Suspend &&
   219  				len(createdJob.Spec.MPIReplicaSpecs[kubeflow.MPIReplicaTypeWorker].Template.Spec.NodeSelector) == 0
   220  		}, util.Timeout, util.Interval).Should(gomega.BeTrue())
   221  		gomega.Eventually(func() bool {
   222  			ok, _ := testing.CheckLatestEvent(ctx, k8sClient, "DeletedWorkload", corev1.EventTypeNormal, fmt.Sprintf("Deleted not matching Workload: %v", wlLookupKey.String()))
   223  			return ok
   224  		}, util.Timeout, util.Interval).Should(gomega.BeTrue())
   225  
   226  		ginkgo.By("checking the workload is updated with new count")
   227  		gomega.Eventually(func() bool {
   228  			if err := k8sClient.Get(ctx, wlLookupKey, createdWorkload); err != nil {
   229  				return false
   230  			}
   231  			return createdWorkload.Spec.PodSets[1].Count == newParallelism
   232  		}, util.Timeout, util.Interval).Should(gomega.BeTrue())
   233  		gomega.Expect(createdWorkload.Status.Admission).Should(gomega.BeNil())
   234  
   235  		ginkgo.By("checking the job is unsuspended and selectors added when workload is assigned again")
   236  		admission = testing.MakeAdmission(clusterQueue.Name).
   237  			PodSets(
   238  				kueue.PodSetAssignment{
   239  					Name: "Launcher",
   240  					Flavors: map[corev1.ResourceName]kueue.ResourceFlavorReference{
   241  						corev1.ResourceCPU: "on-demand",
   242  					},
   243  					Count: ptr.To(createdWorkload.Spec.PodSets[0].Count),
   244  				},
   245  				kueue.PodSetAssignment{
   246  					Name: "Worker",
   247  					Flavors: map[corev1.ResourceName]kueue.ResourceFlavorReference{
   248  						corev1.ResourceCPU: "spot",
   249  					},
   250  					Count: ptr.To(createdWorkload.Spec.PodSets[1].Count),
   251  				},
   252  			).
   253  			Obj()
   254  		gomega.Expect(util.SetQuotaReservation(ctx, k8sClient, createdWorkload, admission)).Should(gomega.Succeed())
   255  		util.SyncAdmittedConditionForWorkloads(ctx, k8sClient, createdWorkload)
   256  		gomega.Eventually(func() bool {
   257  			if err := k8sClient.Get(ctx, lookupKey, createdJob); err != nil {
   258  				return false
   259  			}
   260  			return !*createdJob.Spec.RunPolicy.Suspend
   261  		}, util.Timeout, util.Interval).Should(gomega.BeTrue())
   262  		gomega.Expect(len(createdJob.Spec.MPIReplicaSpecs[kubeflow.MPIReplicaTypeLauncher].Template.Spec.NodeSelector)).Should(gomega.Equal(1))
   263  		gomega.Expect(createdJob.Spec.MPIReplicaSpecs[kubeflow.MPIReplicaTypeLauncher].Template.Spec.NodeSelector[instanceKey]).Should(gomega.Equal(onDemandFlavor.Name))
   264  		gomega.Expect(len(createdJob.Spec.MPIReplicaSpecs[kubeflow.MPIReplicaTypeWorker].Template.Spec.NodeSelector)).Should(gomega.Equal(1))
   265  		gomega.Expect(createdJob.Spec.MPIReplicaSpecs[kubeflow.MPIReplicaTypeWorker].Template.Spec.NodeSelector[instanceKey]).Should(gomega.Equal(spotFlavor.Name))
   266  		gomega.Eventually(func() bool {
   267  			if err := k8sClient.Get(ctx, wlLookupKey, createdWorkload); err != nil {
   268  				return false
   269  			}
   270  			return len(createdWorkload.Status.Conditions) == 2
   271  		}, util.Timeout, util.Interval).Should(gomega.BeTrue())
   272  
   273  		ginkgo.By("checking the workload is finished when job is completed")
   274  		createdJob.Status.Conditions = append(createdJob.Status.Conditions,
   275  			kubeflow.JobCondition{
   276  				Type:               kubeflow.JobSucceeded,
   277  				Status:             corev1.ConditionTrue,
   278  				LastTransitionTime: metav1.Now(),
   279  			})
   280  		gomega.Expect(k8sClient.Status().Update(ctx, createdJob)).Should(gomega.Succeed())
   281  		gomega.Eventually(func() bool {
   282  			err := k8sClient.Get(ctx, wlLookupKey, createdWorkload)
   283  			if err != nil || len(createdWorkload.Status.Conditions) == 2 {
   284  				return false
   285  			}
   286  
   287  			return apimeta.IsStatusConditionTrue(createdWorkload.Status.Conditions, kueue.WorkloadFinished)
   288  		}, util.Timeout, util.Interval).Should(gomega.BeTrue())
   289  	})
   290  
   291  	ginkgo.When("the queue has admission checks", func() {
   292  		var (
   293  			clusterQueueAc *kueue.ClusterQueue
   294  			localQueue     *kueue.LocalQueue
   295  			testFlavor     *kueue.ResourceFlavor
   296  			jobLookupKey   *types.NamespacedName
   297  			wlLookupKey    *types.NamespacedName
   298  			admissionCheck *kueue.AdmissionCheck
   299  		)
   300  
   301  		ginkgo.BeforeEach(func() {
   302  			admissionCheck = testing.MakeAdmissionCheck("check").ControllerName("ac-controller").Obj()
   303  			gomega.Expect(k8sClient.Create(ctx, admissionCheck)).To(gomega.Succeed())
   304  			util.SetAdmissionCheckActive(ctx, k8sClient, admissionCheck, metav1.ConditionTrue)
   305  			clusterQueueAc = testing.MakeClusterQueue("prod-cq-with-checks").
   306  				ResourceGroup(
   307  					*testing.MakeFlavorQuotas("test-flavor").Resource(corev1.ResourceCPU, "5").Obj(),
   308  				).AdmissionChecks("check").Obj()
   309  			gomega.Expect(k8sClient.Create(ctx, clusterQueueAc)).Should(gomega.Succeed())
   310  			localQueue = testing.MakeLocalQueue("queue", ns.Name).ClusterQueue(clusterQueueAc.Name).Obj()
   311  			gomega.Expect(k8sClient.Create(ctx, localQueue)).To(gomega.Succeed())
   312  			testFlavor = testing.MakeResourceFlavor("test-flavor").Label(instanceKey, "test-flavor").Obj()
   313  			gomega.Expect(k8sClient.Create(ctx, testFlavor)).Should(gomega.Succeed())
   314  
   315  			jobLookupKey = &types.NamespacedName{Name: jobName, Namespace: ns.Name}
   316  			wlLookupKey = &types.NamespacedName{Name: workloadmpijob.GetWorkloadNameForMPIJob(jobName), Namespace: ns.Name}
   317  		})
   318  
   319  		ginkgo.AfterEach(func() {
   320  			gomega.Expect(util.DeleteAdmissionCheck(ctx, k8sClient, admissionCheck)).To(gomega.Succeed())
   321  			util.ExpectResourceFlavorToBeDeleted(ctx, k8sClient, testFlavor, true)
   322  			gomega.Expect(util.DeleteNamespace(ctx, k8sClient, ns)).To(gomega.Succeed())
   323  			util.ExpectClusterQueueToBeDeleted(ctx, k8sClient, clusterQueueAc, true)
   324  		})
   325  
   326  		ginkgo.It("labels and annotations should be propagated from admission check to job", func() {
   327  			createdJob := &kubeflow.MPIJob{}
   328  			createdWorkload := &kueue.Workload{}
   329  
   330  			ginkgo.By("creating the job with pod labels & annotations", func() {
   331  				job := testingmpijob.MakeMPIJob(jobName, ns.Name).
   332  					Queue(localQueue.Name).
   333  					PodAnnotation(kubeflow.MPIReplicaTypeWorker, "old-ann-key", "old-ann-value").
   334  					PodLabel(kubeflow.MPIReplicaTypeWorker, "old-label-key", "old-label-value").
   335  					Obj()
   336  				gomega.Expect(k8sClient.Create(ctx, job)).Should(gomega.Succeed())
   337  			})
   338  
   339  			ginkgo.By("fetch the job and verify it is suspended as the checks are not ready", func() {
   340  				gomega.Eventually(func() *bool {
   341  					gomega.Expect(k8sClient.Get(ctx, *jobLookupKey, createdJob)).Should(gomega.Succeed())
   342  					return createdJob.Spec.RunPolicy.Suspend
   343  				}, util.Timeout, util.Interval).Should(gomega.Equal(ptr.To(true)))
   344  			})
   345  
   346  			ginkgo.By("fetch the created workload", func() {
   347  				gomega.Eventually(func() error {
   348  					return k8sClient.Get(ctx, *wlLookupKey, createdWorkload)
   349  				}, util.Timeout, util.Interval).Should(gomega.Succeed())
   350  			})
   351  
   352  			ginkgo.By("add labels & annotations to the admission check", func() {
   353  				gomega.Eventually(func() error {
   354  					var newWL kueue.Workload
   355  					gomega.Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(createdWorkload), &newWL)).To(gomega.Succeed())
   356  					workload.SetAdmissionCheckState(&newWL.Status.AdmissionChecks, kueue.AdmissionCheckState{
   357  						Name:  "check",
   358  						State: kueue.CheckStateReady,
   359  						PodSetUpdates: []kueue.PodSetUpdate{
   360  							{
   361  								Name: "launcher",
   362  								Annotations: map[string]string{
   363  									"ann1": "ann-value-for-launcher",
   364  								},
   365  								Labels: map[string]string{
   366  									"label1": "label-value-for-launcher",
   367  								},
   368  								NodeSelector: map[string]string{
   369  									"selector1": "selector-value-for-launcher",
   370  								},
   371  							},
   372  							{
   373  								Name: "worker",
   374  								Annotations: map[string]string{
   375  									"ann1": "ann-value1",
   376  								},
   377  								Labels: map[string]string{
   378  									"label1": "label-value1",
   379  								},
   380  								NodeSelector: map[string]string{
   381  									"selector1": "selector-value1",
   382  								},
   383  								Tolerations: []corev1.Toleration{
   384  									{
   385  										Key:      "selector1",
   386  										Value:    "selector-value1",
   387  										Operator: corev1.TolerationOpEqual,
   388  										Effect:   corev1.TaintEffectNoSchedule,
   389  									},
   390  								},
   391  							},
   392  						},
   393  					})
   394  					return k8sClient.Status().Update(ctx, &newWL)
   395  				}, util.Timeout, util.Interval).Should(gomega.Succeed())
   396  			})
   397  
   398  			ginkgo.By("admit the workload", func() {
   399  				admission := testing.MakeAdmission(clusterQueueAc.Name).
   400  					PodSets(
   401  						kueue.PodSetAssignment{
   402  							Name: "launcher",
   403  							Flavors: map[corev1.ResourceName]kueue.ResourceFlavorReference{
   404  								corev1.ResourceCPU: "test-flavor",
   405  							},
   406  							Count: ptr.To(createdWorkload.Spec.PodSets[0].Count),
   407  						},
   408  						kueue.PodSetAssignment{
   409  							Name: "worker",
   410  							Flavors: map[corev1.ResourceName]kueue.ResourceFlavorReference{
   411  								corev1.ResourceCPU: "test-flavor",
   412  							},
   413  							Count: ptr.To(createdWorkload.Spec.PodSets[1].Count),
   414  						},
   415  					).
   416  					Obj()
   417  				gomega.Expect(k8sClient.Get(ctx, *wlLookupKey, createdWorkload)).Should(gomega.Succeed())
   418  				gomega.Expect(util.SetQuotaReservation(ctx, k8sClient, createdWorkload, admission)).Should(gomega.Succeed())
   419  				util.SyncAdmittedConditionForWorkloads(ctx, k8sClient, createdWorkload)
   420  			})
   421  
   422  			ginkgo.By("await for the job to start", func() {
   423  				gomega.Eventually(func() *bool {
   424  					gomega.Expect(k8sClient.Get(ctx, *jobLookupKey, createdJob)).Should(gomega.Succeed())
   425  					return createdJob.Spec.RunPolicy.Suspend
   426  				}, util.Timeout, util.Interval).Should(gomega.Equal(ptr.To(false)))
   427  			})
   428  
   429  			ginkgo.By("verify the PodSetUpdates are propagated to the running job, for worker", func() {
   430  				worker := createdJob.Spec.MPIReplicaSpecs[kubeflow.MPIReplicaTypeWorker].Template
   431  				gomega.Expect(worker.Annotations).Should(gomega.HaveKeyWithValue("ann1", "ann-value1"))
   432  				gomega.Expect(worker.Annotations).Should(gomega.HaveKeyWithValue("old-ann-key", "old-ann-value"))
   433  				gomega.Expect(worker.Labels).Should(gomega.HaveKeyWithValue("label1", "label-value1"))
   434  				gomega.Expect(worker.Labels).Should(gomega.HaveKeyWithValue("old-label-key", "old-label-value"))
   435  				gomega.Expect(worker.Spec.NodeSelector).Should(gomega.HaveKeyWithValue(instanceKey, "test-flavor"))
   436  				gomega.Expect(worker.Spec.NodeSelector).Should(gomega.HaveKeyWithValue("selector1", "selector-value1"))
   437  				gomega.Expect(worker.Spec.Tolerations).Should(gomega.BeComparableTo(
   438  					[]corev1.Toleration{
   439  						{
   440  							Key:      "selector1",
   441  							Value:    "selector-value1",
   442  							Operator: corev1.TolerationOpEqual,
   443  							Effect:   corev1.TaintEffectNoSchedule,
   444  						},
   445  					},
   446  				))
   447  			})
   448  
   449  			ginkgo.By("verify the PodSetUpdates are propagated to the running job, for launcher", func() {
   450  				launcher := createdJob.Spec.MPIReplicaSpecs[kubeflow.MPIReplicaTypeLauncher].Template
   451  				gomega.Expect(launcher.Annotations).Should(gomega.HaveKeyWithValue("ann1", "ann-value-for-launcher"))
   452  				gomega.Expect(launcher.Labels).Should(gomega.HaveKeyWithValue("label1", "label-value-for-launcher"))
   453  				gomega.Expect(launcher.Spec.NodeSelector).Should(gomega.HaveKeyWithValue(instanceKey, "test-flavor"))
   454  				gomega.Expect(launcher.Spec.NodeSelector).Should(gomega.HaveKeyWithValue("selector1", "selector-value-for-launcher"))
   455  			})
   456  
   457  			ginkgo.By("delete the localQueue to prevent readmission", func() {
   458  				gomega.Expect(util.DeleteLocalQueue(ctx, k8sClient, localQueue)).Should(gomega.Succeed())
   459  			})
   460  
   461  			ginkgo.By("clear the workload's admission to stop the job", func() {
   462  				gomega.Expect(k8sClient.Get(ctx, *wlLookupKey, createdWorkload)).Should(gomega.Succeed())
   463  				gomega.Expect(util.SetQuotaReservation(ctx, k8sClient, createdWorkload, nil)).Should(gomega.Succeed())
   464  				util.SyncAdmittedConditionForWorkloads(ctx, k8sClient, createdWorkload)
   465  			})
   466  
   467  			ginkgo.By("await for the job to be suspended", func() {
   468  				gomega.Eventually(func() *bool {
   469  					gomega.Expect(k8sClient.Get(ctx, *jobLookupKey, createdJob)).Should(gomega.Succeed())
   470  					return createdJob.Spec.RunPolicy.Suspend
   471  				}, util.Timeout, util.Interval).Should(gomega.Equal(ptr.To(true)))
   472  			})
   473  
   474  			ginkgo.By("verify the PodSetUpdates are restored for worker", func() {
   475  				worker := createdJob.Spec.MPIReplicaSpecs[kubeflow.MPIReplicaTypeWorker].Template
   476  				gomega.Expect(worker.Annotations).ShouldNot(gomega.HaveKey("ann1"))
   477  				gomega.Expect(worker.Annotations).Should(gomega.HaveKeyWithValue("old-ann-key", "old-ann-value"))
   478  				gomega.Expect(worker.Labels).ShouldNot(gomega.HaveKey("label1"))
   479  				gomega.Expect(worker.Labels).Should(gomega.HaveKeyWithValue("old-label-key", "old-label-value"))
   480  				gomega.Expect(worker.Spec.NodeSelector).ShouldNot(gomega.HaveKey(instanceKey))
   481  				gomega.Expect(worker.Spec.NodeSelector).ShouldNot(gomega.HaveKey("selector1"))
   482  			})
   483  
   484  			ginkgo.By("verify the PodSetUpdates are restored for launcher", func() {
   485  				launcher := createdJob.Spec.MPIReplicaSpecs[kubeflow.MPIReplicaTypeLauncher].Template
   486  				gomega.Expect(launcher.Annotations).ShouldNot(gomega.HaveKey("ann1"))
   487  				gomega.Expect(launcher.Labels).ShouldNot(gomega.HaveKey("label1"))
   488  				gomega.Expect(launcher.Spec.NodeSelector).ShouldNot(gomega.HaveKey(instanceKey))
   489  				gomega.Expect(launcher.Spec.NodeSelector).ShouldNot(gomega.HaveKey("selector1"))
   490  			})
   491  		})
   492  	})
   493  })
   494  
   495  var _ = ginkgo.Describe("Job controller for workloads when only jobs with queue are managed", ginkgo.Ordered, ginkgo.ContinueOnFailure, func() {
   496  	ginkgo.BeforeAll(func() {
   497  		fwk = &framework.Framework{
   498  			CRDPath:     crdPath,
   499  			DepCRDPaths: []string{mpiCrdPath},
   500  		}
   501  		cfg = fwk.Init()
   502  		ctx, k8sClient = fwk.RunManager(cfg, managerSetup(true))
   503  	})
   504  	ginkgo.AfterAll(func() {
   505  		fwk.Teardown()
   506  	})
   507  
   508  	var (
   509  		ns             *corev1.Namespace
   510  		childLookupKey types.NamespacedName
   511  		parentJobName  = jobName + "-parent"
   512  		childJobName   = jobName + "-child"
   513  	)
   514  	ginkgo.BeforeEach(func() {
   515  		ns = &corev1.Namespace{
   516  			ObjectMeta: metav1.ObjectMeta{
   517  				GenerateName: "core-",
   518  			},
   519  		}
   520  		gomega.Expect(k8sClient.Create(ctx, ns)).To(gomega.Succeed())
   521  		childLookupKey = types.NamespacedName{Name: childJobName, Namespace: ns.Name}
   522  	})
   523  	ginkgo.AfterEach(func() {
   524  		gomega.Expect(util.DeleteNamespace(ctx, k8sClient, ns)).To(gomega.Succeed())
   525  	})
   526  
   527  	ginkgo.It("Should reconcile jobs only when queue is set", func() {
   528  		ginkgo.By("checking the workload is not created when queue name is not set")
   529  		job := testingmpijob.MakeMPIJob(jobName, ns.Name).Obj()
   530  		gomega.Expect(k8sClient.Create(ctx, job)).Should(gomega.Succeed())
   531  		lookupKey := types.NamespacedName{Name: jobName, Namespace: ns.Name}
   532  		createdJob := &kubeflow.MPIJob{}
   533  		gomega.Expect(k8sClient.Get(ctx, lookupKey, createdJob)).Should(gomega.Succeed())
   534  
   535  		createdWorkload := &kueue.Workload{}
   536  		wlLookupKey := types.NamespacedName{Name: workloadmpijob.GetWorkloadNameForMPIJob(jobName), Namespace: ns.Name}
   537  		gomega.Eventually(func() bool {
   538  			return apierrors.IsNotFound(k8sClient.Get(ctx, wlLookupKey, createdWorkload))
   539  		}, util.Timeout, util.Interval).Should(gomega.BeTrue())
   540  
   541  		ginkgo.By("checking the workload is created when queue name is set")
   542  		jobQueueName := "test-queue"
   543  		createdJob.Annotations = map[string]string{constants.QueueAnnotation: jobQueueName}
   544  		gomega.Expect(k8sClient.Update(ctx, createdJob)).Should(gomega.Succeed())
   545  		gomega.Eventually(func() error {
   546  			return k8sClient.Get(ctx, wlLookupKey, createdWorkload)
   547  		}, util.Timeout, util.Interval).Should(gomega.Succeed())
   548  	})
   549  
   550  	ginkgo.It("Should suspend a job if the parent workload does not exist", func() {
   551  		ginkgo.By("Creating the parent job which has a queue name")
   552  		parentJob := testingmpijob.MakeMPIJob(parentJobName, ns.Name).
   553  			UID(parentJobName).
   554  			Queue("test").
   555  			Suspend(false).
   556  			Obj()
   557  		gomega.Expect(k8sClient.Create(ctx, parentJob)).Should(gomega.Succeed())
   558  
   559  		ginkgo.By("Creating the child job which uses the parent workload annotation")
   560  		childJob := testingjob.MakeJob(childJobName, ns.Name).
   561  			OwnerReference(parentJobName, kubeflow.SchemeGroupVersionKind).
   562  			Suspend(false).
   563  			ParentWorkload("non-existing-parent-workload").
   564  			Obj()
   565  		gomega.Expect(k8sClient.Create(ctx, childJob)).Should(gomega.Succeed())
   566  
   567  		ginkgo.By("checking that the child job is suspended")
   568  		gomega.Eventually(func() *bool {
   569  			gomega.Expect(k8sClient.Get(ctx, childLookupKey, childJob)).Should(gomega.Succeed())
   570  			return childJob.Spec.Suspend
   571  		}, util.Timeout, util.Interval).Should(gomega.Equal(ptr.To(true)))
   572  	})
   573  
   574  	ginkgo.It("Should not suspend a child job if the parent job doesn't have a queue name", func() {
   575  		ginkgo.By("Creating the parent job which doesn't have a queue name")
   576  		parentJob := testingmpijob.MakeMPIJob(parentJobName, ns.Name).
   577  			UID(parentJobName).
   578  			Suspend(false).
   579  			Obj()
   580  		gomega.Expect(k8sClient.Create(ctx, parentJob)).Should(gomega.Succeed())
   581  
   582  		ginkgo.By("Creating the child job which has ownerReference with known existing workload owner")
   583  		childJob := testingjob.MakeJob(childJobName, ns.Name).
   584  			OwnerReference(parentJobName, kubeflow.SchemeGroupVersionKind).
   585  			ParentWorkload(jobframework.GetWorkloadNameForOwnerWithGVK(parentJobName, kubeflow.SchemeGroupVersionKind)).
   586  			Suspend(false).
   587  			Obj()
   588  		gomega.Expect(k8sClient.Create(ctx, childJob)).Should(gomega.Succeed())
   589  
   590  		ginkgo.By("Checking that the child job isn't suspended")
   591  		gomega.Eventually(func() *bool {
   592  			gomega.Expect(k8sClient.Get(ctx, childLookupKey, childJob))
   593  			return childJob.Spec.Suspend
   594  		}, util.Timeout, util.Interval).Should(gomega.Equal(ptr.To(false)))
   595  	})
   596  })
   597  
   598  var _ = ginkgo.Describe("Job controller when waitForPodsReady enabled", ginkgo.Ordered, ginkgo.ContinueOnFailure, func() {
   599  	type podsReadyTestSpec struct {
   600  		beforeJobStatus *kubeflow.JobStatus
   601  		beforeCondition *metav1.Condition
   602  		jobStatus       kubeflow.JobStatus
   603  		suspended       bool
   604  		wantCondition   *metav1.Condition
   605  	}
   606  
   607  	var (
   608  		ns            *corev1.Namespace
   609  		wlLookupKey   types.NamespacedName
   610  		defaultFlavor = testing.MakeResourceFlavor("default").Label(instanceKey, "default").Obj()
   611  	)
   612  
   613  	ginkgo.BeforeAll(func() {
   614  		fwk = &framework.Framework{
   615  			CRDPath:     crdPath,
   616  			DepCRDPaths: []string{mpiCrdPath},
   617  		}
   618  		cfg = fwk.Init()
   619  		ctx, k8sClient = fwk.RunManager(cfg, managerSetup(false, jobframework.WithWaitForPodsReady(&configapi.WaitForPodsReady{Enable: true})))
   620  
   621  		ginkgo.By("Create a resource flavor")
   622  		gomega.Expect(k8sClient.Create(ctx, defaultFlavor)).Should(gomega.Succeed())
   623  	})
   624  	ginkgo.AfterAll(func() {
   625  		util.ExpectResourceFlavorToBeDeleted(ctx, k8sClient, defaultFlavor, true)
   626  		fwk.Teardown()
   627  	})
   628  
   629  	ginkgo.BeforeEach(func() {
   630  		ns = &corev1.Namespace{
   631  			ObjectMeta: metav1.ObjectMeta{
   632  				GenerateName: "core-",
   633  			},
   634  		}
   635  		gomega.Expect(k8sClient.Create(ctx, ns)).To(gomega.Succeed())
   636  		wlLookupKey = types.NamespacedName{Name: workloadmpijob.GetWorkloadNameForMPIJob(jobName), Namespace: ns.Name}
   637  	})
   638  	ginkgo.AfterEach(func() {
   639  		gomega.Expect(util.DeleteNamespace(ctx, k8sClient, ns)).To(gomega.Succeed())
   640  	})
   641  
   642  	ginkgo.DescribeTable("Single job at different stages of progress towards completion",
   643  		func(podsReadyTestSpec podsReadyTestSpec) {
   644  			ginkgo.By("Create a job")
   645  			job := testingmpijob.MakeMPIJob(jobName, ns.Name).Parallelism(2).Obj()
   646  			jobQueueName := "test-queue"
   647  			job.Annotations = map[string]string{constants.QueueAnnotation: jobQueueName}
   648  			gomega.Expect(k8sClient.Create(ctx, job)).Should(gomega.Succeed())
   649  			lookupKey := types.NamespacedName{Name: jobName, Namespace: ns.Name}
   650  			createdJob := &kubeflow.MPIJob{}
   651  			gomega.Expect(k8sClient.Get(ctx, lookupKey, createdJob)).Should(gomega.Succeed())
   652  
   653  			ginkgo.By("Fetch the workload created for the job")
   654  			createdWorkload := &kueue.Workload{}
   655  			gomega.Eventually(func() error {
   656  				return k8sClient.Get(ctx, wlLookupKey, createdWorkload)
   657  			}, util.Timeout, util.Interval).Should(gomega.Succeed())
   658  
   659  			ginkgo.By("Admit the workload created for the job")
   660  			admission := testing.MakeAdmission("foo").
   661  				PodSets(
   662  					kueue.PodSetAssignment{
   663  						Name: "Launcher",
   664  						Flavors: map[corev1.ResourceName]kueue.ResourceFlavorReference{
   665  							corev1.ResourceCPU: "default",
   666  						},
   667  						Count: ptr.To(createdWorkload.Spec.PodSets[0].Count),
   668  					},
   669  					kueue.PodSetAssignment{
   670  						Name: "Worker",
   671  						Flavors: map[corev1.ResourceName]kueue.ResourceFlavorReference{
   672  							corev1.ResourceCPU: "default",
   673  						},
   674  						Count: ptr.To(createdWorkload.Spec.PodSets[1].Count),
   675  					},
   676  				).
   677  				Obj()
   678  			gomega.Expect(util.SetQuotaReservation(ctx, k8sClient, createdWorkload, admission)).Should(gomega.Succeed())
   679  			util.SyncAdmittedConditionForWorkloads(ctx, k8sClient, createdWorkload)
   680  			gomega.Expect(k8sClient.Get(ctx, wlLookupKey, createdWorkload)).Should(gomega.Succeed())
   681  
   682  			ginkgo.By("Await for the job to be unsuspended")
   683  			gomega.Eventually(func() *bool {
   684  				gomega.Expect(k8sClient.Get(ctx, lookupKey, createdJob)).Should(gomega.Succeed())
   685  				return createdJob.Spec.RunPolicy.Suspend
   686  			}, util.Timeout, util.Interval).Should(gomega.Equal(ptr.To(false)))
   687  
   688  			if podsReadyTestSpec.beforeJobStatus != nil {
   689  				ginkgo.By("Update the job status to simulate its initial progress towards completion")
   690  				createdJob.Status = *podsReadyTestSpec.beforeJobStatus
   691  				gomega.Expect(k8sClient.Status().Update(ctx, createdJob)).Should(gomega.Succeed())
   692  				gomega.Expect(k8sClient.Get(ctx, lookupKey, createdJob)).Should(gomega.Succeed())
   693  			}
   694  
   695  			if podsReadyTestSpec.beforeCondition != nil {
   696  				ginkgo.By("Update the workload status")
   697  				gomega.Eventually(func() *metav1.Condition {
   698  					gomega.Expect(k8sClient.Get(ctx, wlLookupKey, createdWorkload)).Should(gomega.Succeed())
   699  					return apimeta.FindStatusCondition(createdWorkload.Status.Conditions, kueue.WorkloadPodsReady)
   700  				}, util.Timeout, util.Interval).Should(gomega.BeComparableTo(podsReadyTestSpec.beforeCondition, ignoreConditionTimestamps))
   701  			}
   702  
   703  			ginkgo.By("Update the job status to simulate its progress towards completion")
   704  			createdJob.Status = podsReadyTestSpec.jobStatus
   705  			gomega.Expect(k8sClient.Status().Update(ctx, createdJob)).Should(gomega.Succeed())
   706  			gomega.Expect(k8sClient.Get(ctx, lookupKey, createdJob)).Should(gomega.Succeed())
   707  
   708  			if podsReadyTestSpec.suspended {
   709  				ginkgo.By("Unset admission of the workload to suspend the job")
   710  				gomega.Eventually(func() error {
   711  					// the update may need to be retried due to a conflict as the workload gets
   712  					// also updated due to setting of the job status.
   713  					if err := k8sClient.Get(ctx, wlLookupKey, createdWorkload); err != nil {
   714  						return err
   715  					}
   716  					return util.SetQuotaReservation(ctx, k8sClient, createdWorkload, nil)
   717  				}, util.Timeout, util.Interval).Should(gomega.Succeed())
   718  				util.SyncAdmittedConditionForWorkloads(ctx, k8sClient, createdWorkload)
   719  			}
   720  
   721  			ginkgo.By("Verify the PodsReady condition is added")
   722  			gomega.Eventually(func() *metav1.Condition {
   723  				gomega.Expect(k8sClient.Get(ctx, wlLookupKey, createdWorkload)).Should(gomega.Succeed())
   724  				return apimeta.FindStatusCondition(createdWorkload.Status.Conditions, kueue.WorkloadPodsReady)
   725  			}, util.Timeout, util.Interval).Should(gomega.BeComparableTo(podsReadyTestSpec.wantCondition, ignoreConditionTimestamps))
   726  		},
   727  		ginkgo.Entry("No progress", podsReadyTestSpec{
   728  			wantCondition: &metav1.Condition{
   729  				Type:    kueue.WorkloadPodsReady,
   730  				Status:  metav1.ConditionFalse,
   731  				Reason:  "PodsReady",
   732  				Message: "Not all pods are ready or succeeded",
   733  			},
   734  		}),
   735  		ginkgo.Entry("Running MPIJob", podsReadyTestSpec{
   736  			jobStatus: kubeflow.JobStatus{
   737  				Conditions: []kubeflow.JobCondition{
   738  					{
   739  						Type:   kubeflow.JobRunning,
   740  						Status: corev1.ConditionTrue,
   741  						Reason: "Running",
   742  					},
   743  				},
   744  			},
   745  			wantCondition: &metav1.Condition{
   746  				Type:    kueue.WorkloadPodsReady,
   747  				Status:  metav1.ConditionTrue,
   748  				Reason:  "PodsReady",
   749  				Message: "All pods were ready or succeeded since the workload admission",
   750  			},
   751  		}),
   752  		ginkgo.Entry("Running MPIJob; PodsReady=False before", podsReadyTestSpec{
   753  			beforeCondition: &metav1.Condition{
   754  				Type:    kueue.WorkloadPodsReady,
   755  				Status:  metav1.ConditionFalse,
   756  				Reason:  "PodsReady",
   757  				Message: "Not all pods are ready or succeeded",
   758  			},
   759  			jobStatus: kubeflow.JobStatus{
   760  				Conditions: []kubeflow.JobCondition{
   761  					{
   762  						Type:   kubeflow.JobRunning,
   763  						Status: corev1.ConditionTrue,
   764  						Reason: "Running",
   765  					},
   766  				},
   767  			},
   768  			wantCondition: &metav1.Condition{
   769  				Type:    kueue.WorkloadPodsReady,
   770  				Status:  metav1.ConditionTrue,
   771  				Reason:  "PodsReady",
   772  				Message: "All pods were ready or succeeded since the workload admission",
   773  			},
   774  		}),
   775  		ginkgo.Entry("Job suspended; PodsReady=True before", podsReadyTestSpec{
   776  			beforeJobStatus: &kubeflow.JobStatus{
   777  				Conditions: []kubeflow.JobCondition{
   778  					{
   779  						Type:   kubeflow.JobRunning,
   780  						Status: corev1.ConditionTrue,
   781  						Reason: "Running",
   782  					},
   783  				},
   784  			},
   785  			beforeCondition: &metav1.Condition{
   786  				Type:    kueue.WorkloadPodsReady,
   787  				Status:  metav1.ConditionTrue,
   788  				Reason:  "PodsReady",
   789  				Message: "All pods were ready or succeeded since the workload admission",
   790  			},
   791  			jobStatus: kubeflow.JobStatus{
   792  				Conditions: []kubeflow.JobCondition{
   793  					{
   794  						Type:   kubeflow.JobRunning,
   795  						Status: corev1.ConditionFalse,
   796  						Reason: "Suspended",
   797  					},
   798  				},
   799  			},
   800  			suspended: true,
   801  			wantCondition: &metav1.Condition{
   802  				Type:    kueue.WorkloadPodsReady,
   803  				Status:  metav1.ConditionFalse,
   804  				Reason:  "PodsReady",
   805  				Message: "Not all pods are ready or succeeded",
   806  			},
   807  		}),
   808  	)
   809  })
   810  
   811  var _ = ginkgo.Describe("Job controller interacting with scheduler", ginkgo.Ordered, ginkgo.ContinueOnFailure, func() {
   812  	var (
   813  		ns                  *corev1.Namespace
   814  		onDemandFlavor      *kueue.ResourceFlavor
   815  		spotUntaintedFlavor *kueue.ResourceFlavor
   816  		clusterQueue        *kueue.ClusterQueue
   817  		localQueue          *kueue.LocalQueue
   818  	)
   819  
   820  	ginkgo.BeforeAll(func() {
   821  		fwk = &framework.Framework{
   822  			CRDPath:     crdPath,
   823  			DepCRDPaths: []string{mpiCrdPath},
   824  		}
   825  		cfg = fwk.Init()
   826  		ctx, k8sClient = fwk.RunManager(cfg, managerAndSchedulerSetup())
   827  	})
   828  	ginkgo.AfterAll(func() {
   829  		fwk.Teardown()
   830  	})
   831  
   832  	ginkgo.BeforeEach(func() {
   833  		ns = &corev1.Namespace{
   834  			ObjectMeta: metav1.ObjectMeta{
   835  				GenerateName: "core-",
   836  			},
   837  		}
   838  		gomega.Expect(k8sClient.Create(ctx, ns)).To(gomega.Succeed())
   839  
   840  		onDemandFlavor = testing.MakeResourceFlavor("on-demand").Label(instanceKey, "on-demand").Obj()
   841  		gomega.Expect(k8sClient.Create(ctx, onDemandFlavor)).Should(gomega.Succeed())
   842  
   843  		spotUntaintedFlavor = testing.MakeResourceFlavor("spot-untainted").Label(instanceKey, "spot-untainted").Obj()
   844  		gomega.Expect(k8sClient.Create(ctx, spotUntaintedFlavor)).Should(gomega.Succeed())
   845  
   846  		clusterQueue = testing.MakeClusterQueue("dev-clusterqueue").
   847  			ResourceGroup(
   848  				*testing.MakeFlavorQuotas("spot-untainted").Resource(corev1.ResourceCPU, "5").Obj(),
   849  				*testing.MakeFlavorQuotas("on-demand").Resource(corev1.ResourceCPU, "5").Obj(),
   850  			).Obj()
   851  		gomega.Expect(k8sClient.Create(ctx, clusterQueue)).Should(gomega.Succeed())
   852  	})
   853  	ginkgo.AfterEach(func() {
   854  		gomega.Expect(util.DeleteNamespace(ctx, k8sClient, ns)).To(gomega.Succeed())
   855  		util.ExpectClusterQueueToBeDeleted(ctx, k8sClient, clusterQueue, true)
   856  		util.ExpectResourceFlavorToBeDeleted(ctx, k8sClient, onDemandFlavor, true)
   857  		util.ExpectResourceFlavorToBeDeleted(ctx, k8sClient, spotUntaintedFlavor, true)
   858  	})
   859  
   860  	ginkgo.It("Should schedule jobs as they fit in their ClusterQueue", func() {
   861  		ginkgo.By("creating localQueue")
   862  		localQueue = testing.MakeLocalQueue("local-queue", ns.Name).ClusterQueue(clusterQueue.Name).Obj()
   863  		gomega.Expect(k8sClient.Create(ctx, localQueue)).Should(gomega.Succeed())
   864  
   865  		ginkgo.By("checking a dev job starts")
   866  		job := testingmpijob.MakeMPIJob("dev-job", ns.Name).Queue(localQueue.Name).
   867  			Request(kubeflow.MPIReplicaTypeLauncher, corev1.ResourceCPU, "3").
   868  			Request(kubeflow.MPIReplicaTypeWorker, corev1.ResourceCPU, "4").
   869  			Obj()
   870  		gomega.Expect(k8sClient.Create(ctx, job)).Should(gomega.Succeed())
   871  		createdJob := &kubeflow.MPIJob{}
   872  		gomega.Eventually(func() *bool {
   873  			gomega.Expect(k8sClient.Get(ctx, types.NamespacedName{Name: job.Name, Namespace: job.Namespace}, createdJob)).
   874  				Should(gomega.Succeed())
   875  			return createdJob.Spec.RunPolicy.Suspend
   876  		}, util.Timeout, util.Interval).Should(gomega.Equal(ptr.To(false)))
   877  		gomega.Expect(createdJob.Spec.MPIReplicaSpecs[kubeflow.MPIReplicaTypeLauncher].Template.Spec.NodeSelector[instanceKey]).Should(gomega.Equal(spotUntaintedFlavor.Name))
   878  		gomega.Expect(createdJob.Spec.MPIReplicaSpecs[kubeflow.MPIReplicaTypeWorker].Template.Spec.NodeSelector[instanceKey]).Should(gomega.Equal(onDemandFlavor.Name))
   879  		util.ExpectPendingWorkloadsMetric(clusterQueue, 0, 0)
   880  		util.ExpectReservingActiveWorkloadsMetric(clusterQueue, 1)
   881  
   882  	})
   883  
   884  	ginkgo.When("The workload's admission is removed", func() {
   885  		ginkgo.It("Should restore the original node selectors", func() {
   886  
   887  			localQueue := testing.MakeLocalQueue("local-queue", ns.Name).ClusterQueue(clusterQueue.Name).Obj()
   888  			job := testingmpijob.MakeMPIJob(jobName, ns.Name).Queue(localQueue.Name).
   889  				Request(kubeflow.MPIReplicaTypeLauncher, corev1.ResourceCPU, "3").
   890  				Request(kubeflow.MPIReplicaTypeWorker, corev1.ResourceCPU, "4").
   891  				Obj()
   892  			lookupKey := types.NamespacedName{Name: job.Name, Namespace: job.Namespace}
   893  			createdJob := &kubeflow.MPIJob{}
   894  
   895  			nodeSelectors := func(j *kubeflow.MPIJob) map[kubeflow.MPIReplicaType]map[string]string {
   896  				ret := map[kubeflow.MPIReplicaType]map[string]string{}
   897  				for k := range j.Spec.MPIReplicaSpecs {
   898  					ret[k] = j.Spec.MPIReplicaSpecs[k].Template.Spec.NodeSelector
   899  				}
   900  				return ret
   901  			}
   902  
   903  			ginkgo.By("create a job", func() {
   904  				gomega.Expect(k8sClient.Create(ctx, job)).Should(gomega.Succeed())
   905  			})
   906  
   907  			ginkgo.By("job should be suspend", func() {
   908  				gomega.Eventually(func() *bool {
   909  					gomega.Expect(k8sClient.Get(ctx, lookupKey, createdJob)).Should(gomega.Succeed())
   910  					return createdJob.Spec.RunPolicy.Suspend
   911  				}, util.Timeout, util.Interval).Should(gomega.Equal(ptr.To(true)))
   912  			})
   913  
   914  			// backup the the node selectors
   915  			originalNodeSelectors := nodeSelectors(createdJob)
   916  
   917  			ginkgo.By("create a localQueue", func() {
   918  				gomega.Expect(k8sClient.Create(ctx, localQueue)).Should(gomega.Succeed())
   919  			})
   920  
   921  			ginkgo.By("job should be unsuspended", func() {
   922  				gomega.Eventually(func() *bool {
   923  					gomega.Expect(k8sClient.Get(ctx, lookupKey, createdJob)).Should(gomega.Succeed())
   924  					return createdJob.Spec.RunPolicy.Suspend
   925  				}, util.Timeout, util.Interval).Should(gomega.Equal(ptr.To(false)))
   926  			})
   927  
   928  			ginkgo.By("the node selectors should be updated", func() {
   929  				gomega.Eventually(func() map[kubeflow.MPIReplicaType]map[string]string {
   930  					gomega.Expect(k8sClient.Get(ctx, lookupKey, createdJob)).Should(gomega.Succeed())
   931  					return nodeSelectors(createdJob)
   932  				}, util.Timeout, util.Interval).ShouldNot(gomega.Equal(originalNodeSelectors))
   933  			})
   934  
   935  			ginkgo.By("delete the localQueue to prevent readmission", func() {
   936  				gomega.Expect(util.DeleteLocalQueue(ctx, k8sClient, localQueue)).Should(gomega.Succeed())
   937  			})
   938  
   939  			ginkgo.By("clear the workload's admission to stop the job", func() {
   940  				wl := &kueue.Workload{}
   941  				wlKey := types.NamespacedName{Name: workloadmpijob.GetWorkloadNameForMPIJob(job.Name), Namespace: job.Namespace}
   942  				gomega.Expect(k8sClient.Get(ctx, wlKey, wl)).Should(gomega.Succeed())
   943  				gomega.Expect(util.SetQuotaReservation(ctx, k8sClient, wl, nil)).Should(gomega.Succeed())
   944  				util.SyncAdmittedConditionForWorkloads(ctx, k8sClient, wl)
   945  			})
   946  
   947  			ginkgo.By("the node selectors should be restored", func() {
   948  				gomega.Eventually(func() map[kubeflow.MPIReplicaType]map[string]string {
   949  					gomega.Expect(k8sClient.Get(ctx, lookupKey, createdJob)).Should(gomega.Succeed())
   950  					return nodeSelectors(createdJob)
   951  				}, util.Timeout, util.Interval).Should(gomega.Equal(originalNodeSelectors))
   952  			})
   953  		})
   954  	})
   955  })