sigs.k8s.io/kueue@v0.6.2/test/integration/multikueue/multikueue_test.go (about) 1 /* 2 Copyright 2023 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package multikueue 18 19 import ( 20 "github.com/google/go-cmp/cmp/cmpopts" 21 "github.com/onsi/ginkgo/v2" 22 "github.com/onsi/gomega" 23 batchv1 "k8s.io/api/batch/v1" 24 corev1 "k8s.io/api/core/v1" 25 apimeta "k8s.io/apimachinery/pkg/api/meta" 26 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 27 "k8s.io/apimachinery/pkg/types" 28 "sigs.k8s.io/controller-runtime/pkg/client" 29 jobset "sigs.k8s.io/jobset/api/jobset/v1alpha2" 30 31 kueuealpha "sigs.k8s.io/kueue/apis/kueue/v1alpha1" 32 kueue "sigs.k8s.io/kueue/apis/kueue/v1beta1" 33 "sigs.k8s.io/kueue/pkg/controller/admissionchecks/multikueue" 34 workloadjob "sigs.k8s.io/kueue/pkg/controller/jobs/job" 35 workloadjobset "sigs.k8s.io/kueue/pkg/controller/jobs/jobset" 36 utiltesting "sigs.k8s.io/kueue/pkg/util/testing" 37 testingjob "sigs.k8s.io/kueue/pkg/util/testingjobs/job" 38 testingjobset "sigs.k8s.io/kueue/pkg/util/testingjobs/jobset" 39 "sigs.k8s.io/kueue/pkg/workload" 40 "sigs.k8s.io/kueue/test/util" 41 ) 42 43 var _ = ginkgo.Describe("Multikueue", func() { 44 var ( 45 managerNs *corev1.Namespace 46 worker1Ns *corev1.Namespace 47 worker2Ns *corev1.Namespace 48 49 managerMultikueueSecret1 *corev1.Secret 50 managerMultikueueSecret2 *corev1.Secret 51 workerCluster1 *kueuealpha.MultiKueueCluster 52 workerCluster2 *kueuealpha.MultiKueueCluster 53 managerMultiKueueConfig *kueuealpha.MultiKueueConfig 54 multikueueAC *kueue.AdmissionCheck 55 managerCq *kueue.ClusterQueue 56 managerLq *kueue.LocalQueue 57 58 worker1Cq *kueue.ClusterQueue 59 worker1Lq *kueue.LocalQueue 60 61 worker2Cq *kueue.ClusterQueue 62 worker2Lq *kueue.LocalQueue 63 ) 64 ginkgo.BeforeEach(func() { 65 managerNs = &corev1.Namespace{ 66 ObjectMeta: metav1.ObjectMeta{ 67 GenerateName: "multikueue-", 68 }, 69 } 70 gomega.Expect(managerTestCluster.client.Create(managerTestCluster.ctx, managerNs)).To(gomega.Succeed()) 71 72 worker1Ns = &corev1.Namespace{ 73 ObjectMeta: metav1.ObjectMeta{ 74 Name: managerNs.Name, 75 }, 76 } 77 gomega.Expect(worker1TestCluster.client.Create(worker1TestCluster.ctx, worker1Ns)).To(gomega.Succeed()) 78 79 worker2Ns = &corev1.Namespace{ 80 ObjectMeta: metav1.ObjectMeta{ 81 Name: managerNs.Name, 82 }, 83 } 84 gomega.Expect(worker2TestCluster.client.Create(worker2TestCluster.ctx, worker2Ns)).To(gomega.Succeed()) 85 86 w1Kubeconfig, err := worker1TestCluster.kubeConfigBytes() 87 gomega.Expect(err).NotTo(gomega.HaveOccurred()) 88 89 w2Kubeconfig, err := worker2TestCluster.kubeConfigBytes() 90 gomega.Expect(err).NotTo(gomega.HaveOccurred()) 91 92 managerMultikueueSecret1 = &corev1.Secret{ 93 ObjectMeta: metav1.ObjectMeta{ 94 Name: "multikueue1", 95 Namespace: managersConfigNamespace.Name, 96 }, 97 Data: map[string][]byte{ 98 kueuealpha.MultiKueueConfigSecretKey: w1Kubeconfig, 99 }, 100 } 101 gomega.Expect(managerTestCluster.client.Create(managerTestCluster.ctx, managerMultikueueSecret1)).To(gomega.Succeed()) 102 103 managerMultikueueSecret2 = &corev1.Secret{ 104 ObjectMeta: metav1.ObjectMeta{ 105 Name: "multikueue2", 106 Namespace: managersConfigNamespace.Name, 107 }, 108 Data: map[string][]byte{ 109 kueuealpha.MultiKueueConfigSecretKey: w2Kubeconfig, 110 }, 111 } 112 gomega.Expect(managerTestCluster.client.Create(managerTestCluster.ctx, managerMultikueueSecret2)).To(gomega.Succeed()) 113 114 workerCluster1 = utiltesting.MakeMultiKueueCluster("worker1").KubeConfig(kueuealpha.SecretLocationType, managerMultikueueSecret1.Name).Obj() 115 gomega.Expect(managerTestCluster.client.Create(managerTestCluster.ctx, workerCluster1)).To(gomega.Succeed()) 116 117 workerCluster2 = utiltesting.MakeMultiKueueCluster("worker2").KubeConfig(kueuealpha.SecretLocationType, managerMultikueueSecret2.Name).Obj() 118 gomega.Expect(managerTestCluster.client.Create(managerTestCluster.ctx, workerCluster2)).To(gomega.Succeed()) 119 120 managerMultiKueueConfig = utiltesting.MakeMultiKueueConfig("multikueueconfig").Clusters(workerCluster1.Name, workerCluster2.Name).Obj() 121 gomega.Expect(managerTestCluster.client.Create(managerTestCluster.ctx, managerMultiKueueConfig)).Should(gomega.Succeed()) 122 123 multikueueAC = utiltesting.MakeAdmissionCheck("ac1"). 124 ControllerName(multikueue.ControllerName). 125 Parameters(kueuealpha.GroupVersion.Group, "MultiKueueConfig", managerMultiKueueConfig.Name). 126 Obj() 127 gomega.Expect(managerTestCluster.client.Create(managerTestCluster.ctx, multikueueAC)).Should(gomega.Succeed()) 128 129 ginkgo.By("wait for check active", func() { 130 updatetedAc := kueue.AdmissionCheck{} 131 acKey := client.ObjectKeyFromObject(multikueueAC) 132 gomega.Eventually(func(g gomega.Gomega) { 133 g.Expect(managerTestCluster.client.Get(managerTestCluster.ctx, acKey, &updatetedAc)).To(gomega.Succeed()) 134 cond := apimeta.FindStatusCondition(updatetedAc.Status.Conditions, kueue.AdmissionCheckActive) 135 g.Expect(cond).NotTo(gomega.BeNil()) 136 g.Expect(cond.Status).To(gomega.Equal(metav1.ConditionTrue), "Reason: %s, Message: %q", cond.Reason, cond.Message) 137 }, util.Timeout, util.Interval).Should(gomega.Succeed()) 138 }) 139 140 managerCq = utiltesting.MakeClusterQueue("q1"). 141 AdmissionChecks(multikueueAC.Name). 142 Obj() 143 gomega.Expect(managerTestCluster.client.Create(managerTestCluster.ctx, managerCq)).Should(gomega.Succeed()) 144 145 managerLq = utiltesting.MakeLocalQueue(managerCq.Name, managerNs.Name).ClusterQueue(managerCq.Name).Obj() 146 gomega.Expect(managerTestCluster.client.Create(managerTestCluster.ctx, managerLq)).Should(gomega.Succeed()) 147 148 worker1Cq = utiltesting.MakeClusterQueue("q1").Obj() 149 gomega.Expect(worker1TestCluster.client.Create(worker1TestCluster.ctx, worker1Cq)).Should(gomega.Succeed()) 150 worker1Lq = utiltesting.MakeLocalQueue(worker1Cq.Name, worker1Ns.Name).ClusterQueue(worker1Cq.Name).Obj() 151 gomega.Expect(worker1TestCluster.client.Create(worker1TestCluster.ctx, worker1Lq)).Should(gomega.Succeed()) 152 153 worker2Cq = utiltesting.MakeClusterQueue("q1").Obj() 154 gomega.Expect(worker2TestCluster.client.Create(worker2TestCluster.ctx, worker2Cq)).Should(gomega.Succeed()) 155 worker2Lq = utiltesting.MakeLocalQueue(worker2Cq.Name, worker2Ns.Name).ClusterQueue(worker2Cq.Name).Obj() 156 gomega.Expect(worker2TestCluster.client.Create(worker2TestCluster.ctx, worker2Lq)).Should(gomega.Succeed()) 157 }) 158 159 ginkgo.AfterEach(func() { 160 gomega.Expect(util.DeleteNamespace(managerTestCluster.ctx, managerTestCluster.client, managerNs)).To(gomega.Succeed()) 161 gomega.Expect(util.DeleteNamespace(worker1TestCluster.ctx, worker1TestCluster.client, worker1Ns)).To(gomega.Succeed()) 162 gomega.Expect(util.DeleteNamespace(worker2TestCluster.ctx, worker2TestCluster.client, worker2Ns)).To(gomega.Succeed()) 163 util.ExpectClusterQueueToBeDeleted(managerTestCluster.ctx, managerTestCluster.client, managerCq, true) 164 util.ExpectClusterQueueToBeDeleted(worker1TestCluster.ctx, worker1TestCluster.client, worker1Cq, true) 165 util.ExpectClusterQueueToBeDeleted(worker2TestCluster.ctx, worker2TestCluster.client, worker2Cq, true) 166 util.ExpectAdmissionCheckToBeDeleted(managerTestCluster.ctx, managerTestCluster.client, multikueueAC, true) 167 gomega.Expect(managerTestCluster.client.Delete(managerTestCluster.ctx, managerMultiKueueConfig)).To(gomega.Succeed()) 168 gomega.Expect(managerTestCluster.client.Delete(managerTestCluster.ctx, workerCluster1)).To(gomega.Succeed()) 169 gomega.Expect(managerTestCluster.client.Delete(managerTestCluster.ctx, workerCluster2)).To(gomega.Succeed()) 170 gomega.Expect(managerTestCluster.client.Delete(managerTestCluster.ctx, managerMultikueueSecret1)).To(gomega.Succeed()) 171 gomega.Expect(managerTestCluster.client.Delete(managerTestCluster.ctx, managerMultikueueSecret2)).To(gomega.Succeed()) 172 }) 173 ginkgo.It("Should properly manage the active condition of AdmissionChecks and MultiKueueClusters", func() { 174 ac := utiltesting.MakeAdmissionCheck("testing-ac"). 175 ControllerName(multikueue.ControllerName). 176 Parameters(kueuealpha.GroupVersion.Group, "MultiKueueConfig", "testing-config"). 177 Obj() 178 ginkgo.By("creating the admission check with missing config, it's set inactive", func() { 179 gomega.Expect(managerTestCluster.client.Create(managerTestCluster.ctx, ac)).Should(gomega.Succeed()) 180 ginkgo.DeferCleanup(func() error { return managerTestCluster.client.Delete(managerTestCluster.ctx, ac) }) 181 182 ginkgo.By("wait for the check's active state update", func() { 183 updatetedAc := kueue.AdmissionCheck{} 184 acKey := client.ObjectKeyFromObject(ac) 185 gomega.Eventually(func(g gomega.Gomega) { 186 g.Expect(managerTestCluster.client.Get(managerTestCluster.ctx, acKey, &updatetedAc)).To(gomega.Succeed()) 187 g.Expect(updatetedAc.Status.Conditions).To(gomega.ContainElement(gomega.BeComparableTo(metav1.Condition{ 188 Type: kueue.AdmissionCheckActive, 189 Status: metav1.ConditionFalse, 190 Reason: "Inactive", 191 Message: `Cannot load the AdmissionChecks parameters: MultiKueueConfig.kueue.x-k8s.io "testing-config" not found`, 192 }, cmpopts.IgnoreFields(metav1.Condition{}, "LastTransitionTime")))) 193 }, util.Timeout, util.Interval).Should(gomega.Succeed()) 194 }) 195 }) 196 197 ginkgo.By("creating a config with duplicate clusters should fail", func() { 198 badConfig := utiltesting.MakeMultiKueueConfig("bad-config").Clusters("c1", "c2", "c1").Obj() 199 gomega.Expect(managerTestCluster.client.Create(managerTestCluster.ctx, badConfig).Error()).Should(gomega.Equal( 200 `MultiKueueConfig.kueue.x-k8s.io "bad-config" is invalid: spec.clusters[2]: Duplicate value: "c1"`)) 201 }) 202 203 config := utiltesting.MakeMultiKueueConfig("testing-config").Clusters("testing-cluster").Obj() 204 ginkgo.By("creating the config, the admission check's state is updated", func() { 205 gomega.Expect(managerTestCluster.client.Create(managerTestCluster.ctx, config)).Should(gomega.Succeed()) 206 ginkgo.DeferCleanup(func() error { return managerTestCluster.client.Delete(managerTestCluster.ctx, config) }) 207 208 ginkgo.By("wait for the check's active state update", func() { 209 updatetedAc := kueue.AdmissionCheck{} 210 acKey := client.ObjectKeyFromObject(ac) 211 gomega.Eventually(func(g gomega.Gomega) { 212 g.Expect(managerTestCluster.client.Get(managerTestCluster.ctx, acKey, &updatetedAc)).To(gomega.Succeed()) 213 g.Expect(updatetedAc.Status.Conditions).To(gomega.ContainElement(gomega.BeComparableTo(metav1.Condition{ 214 Type: kueue.AdmissionCheckActive, 215 Status: metav1.ConditionFalse, 216 Reason: "Inactive", 217 Message: `Missing clusters: [testing-cluster]`, 218 }, cmpopts.IgnoreFields(metav1.Condition{}, "LastTransitionTime")))) 219 }, util.Timeout, util.Interval).Should(gomega.Succeed()) 220 }) 221 }) 222 223 cluster := utiltesting.MakeMultiKueueCluster("testing-cluster").KubeConfig(kueuealpha.SecretLocationType, "testing-secret").Obj() 224 ginkgo.By("creating the cluster, its Active state is updated, the admission check's state is updated", func() { 225 gomega.Expect(managerTestCluster.client.Create(managerTestCluster.ctx, cluster)).Should(gomega.Succeed()) 226 ginkgo.DeferCleanup(func() error { return managerTestCluster.client.Delete(managerTestCluster.ctx, cluster) }) 227 228 ginkgo.By("wait for the cluster's active state update", func() { 229 updatetedCluster := kueuealpha.MultiKueueCluster{} 230 clusterKey := client.ObjectKeyFromObject(cluster) 231 gomega.Eventually(func(g gomega.Gomega) { 232 g.Expect(managerTestCluster.client.Get(managerTestCluster.ctx, clusterKey, &updatetedCluster)).To(gomega.Succeed()) 233 g.Expect(updatetedCluster.Status.Conditions).To(gomega.ContainElement(gomega.BeComparableTo(metav1.Condition{ 234 Type: kueuealpha.MultiKueueClusterActive, 235 Status: metav1.ConditionFalse, 236 Reason: "BadConfig", 237 Message: `Secret "testing-secret" not found`, 238 }, cmpopts.IgnoreFields(metav1.Condition{}, "LastTransitionTime")))) 239 }, util.Timeout, util.Interval).Should(gomega.Succeed()) 240 }) 241 242 ginkgo.By("wait for the check's active state update", func() { 243 updatetedAc := kueue.AdmissionCheck{} 244 acKey := client.ObjectKeyFromObject(ac) 245 gomega.Eventually(func(g gomega.Gomega) { 246 g.Expect(managerTestCluster.client.Get(managerTestCluster.ctx, acKey, &updatetedAc)).To(gomega.Succeed()) 247 g.Expect(updatetedAc.Status.Conditions).To(gomega.ContainElement(gomega.BeComparableTo(metav1.Condition{ 248 Type: kueue.AdmissionCheckActive, 249 Status: metav1.ConditionFalse, 250 Reason: "Inactive", 251 Message: `Inactive clusters: [testing-cluster]`, 252 }, cmpopts.IgnoreFields(metav1.Condition{}, "LastTransitionTime")))) 253 }, util.Timeout, util.Interval).Should(gomega.Succeed()) 254 }) 255 }) 256 257 w1Kubeconfig, err := worker1TestCluster.kubeConfigBytes() 258 gomega.Expect(err).NotTo(gomega.HaveOccurred()) 259 secret := &corev1.Secret{ 260 ObjectMeta: metav1.ObjectMeta{ 261 Name: "testing-secret", 262 Namespace: managersConfigNamespace.Name, 263 }, 264 Data: map[string][]byte{ 265 kueuealpha.MultiKueueConfigSecretKey: w1Kubeconfig, 266 }, 267 } 268 269 ginkgo.By("creating the secret, the cluster and admission check become active", func() { 270 gomega.Expect(managerTestCluster.client.Create(managerTestCluster.ctx, secret)).Should(gomega.Succeed()) 271 ginkgo.DeferCleanup(func() error { return managerTestCluster.client.Delete(managerTestCluster.ctx, secret) }) 272 273 ginkgo.By("wait for the cluster's active state update", func() { 274 updatetedCluster := kueuealpha.MultiKueueCluster{} 275 clusterKey := client.ObjectKeyFromObject(cluster) 276 gomega.Eventually(func(g gomega.Gomega) { 277 g.Expect(managerTestCluster.client.Get(managerTestCluster.ctx, clusterKey, &updatetedCluster)).To(gomega.Succeed()) 278 g.Expect(updatetedCluster.Status.Conditions).To(gomega.ContainElement(gomega.BeComparableTo(metav1.Condition{ 279 Type: kueuealpha.MultiKueueClusterActive, 280 Status: metav1.ConditionTrue, 281 Reason: "Active", 282 Message: "Connected", 283 }, cmpopts.IgnoreFields(metav1.Condition{}, "LastTransitionTime")))) 284 }, util.Timeout, util.Interval).Should(gomega.Succeed()) 285 }) 286 287 ginkgo.By("wait for the check's active state update", func() { 288 updatetedAc := kueue.AdmissionCheck{} 289 acKey := client.ObjectKeyFromObject(ac) 290 gomega.Eventually(func(g gomega.Gomega) { 291 g.Expect(managerTestCluster.client.Get(managerTestCluster.ctx, acKey, &updatetedAc)).To(gomega.Succeed()) 292 g.Expect(updatetedAc.Status.Conditions).To(gomega.ContainElement(gomega.BeComparableTo(metav1.Condition{ 293 Type: kueue.AdmissionCheckActive, 294 Status: metav1.ConditionTrue, 295 Reason: "Active", 296 Message: "The admission check is active", 297 }, cmpopts.IgnoreFields(metav1.Condition{}, "LastTransitionTime")))) 298 }, util.Timeout, util.Interval).Should(gomega.Succeed()) 299 }) 300 }) 301 }) 302 303 ginkgo.It("Should run a job on worker if admitted", func() { 304 job := testingjob.MakeJob("job", managerNs.Name). 305 Queue(managerLq.Name). 306 Obj() 307 gomega.Expect(managerTestCluster.client.Create(managerTestCluster.ctx, job)).Should(gomega.Succeed()) 308 309 createdWorkload := &kueue.Workload{} 310 wlLookupKey := types.NamespacedName{Name: workloadjob.GetWorkloadNameForJob(job.Name), Namespace: managerNs.Name} 311 312 ginkgo.By("setting workload reservation in the management cluster", func() { 313 admission := utiltesting.MakeAdmission(managerCq.Name).Obj() 314 gomega.Eventually(func(g gomega.Gomega) { 315 g.Expect(managerTestCluster.client.Get(managerTestCluster.ctx, wlLookupKey, createdWorkload)).To(gomega.Succeed()) 316 g.Expect(util.SetQuotaReservation(managerTestCluster.ctx, managerTestCluster.client, createdWorkload, admission)).To(gomega.Succeed()) 317 }, util.Timeout, util.Interval).Should(gomega.Succeed()) 318 }) 319 320 ginkgo.By("checking the workload creation in the worker clusters", func() { 321 managerWl := &kueue.Workload{} 322 gomega.Expect(managerTestCluster.client.Get(managerTestCluster.ctx, wlLookupKey, managerWl)).To(gomega.Succeed()) 323 gomega.Eventually(func(g gomega.Gomega) { 324 g.Expect(worker1TestCluster.client.Get(worker1TestCluster.ctx, wlLookupKey, createdWorkload)).To(gomega.Succeed()) 325 g.Expect(createdWorkload.Spec).To(gomega.BeComparableTo(managerWl.Spec)) 326 g.Expect(worker2TestCluster.client.Get(worker2TestCluster.ctx, wlLookupKey, createdWorkload)).To(gomega.Succeed()) 327 g.Expect(createdWorkload.Spec).To(gomega.BeComparableTo(managerWl.Spec)) 328 }, util.Timeout, util.Interval).Should(gomega.Succeed()) 329 }) 330 331 ginkgo.By("setting workload reservation in worker1, AC state is updated in manager and worker2 wl is removed", func() { 332 admission := utiltesting.MakeAdmission(managerCq.Name).Obj() 333 334 gomega.Eventually(func(g gomega.Gomega) { 335 g.Expect(worker1TestCluster.client.Get(worker1TestCluster.ctx, wlLookupKey, createdWorkload)).To(gomega.Succeed()) 336 g.Expect(util.SetQuotaReservation(worker1TestCluster.ctx, worker1TestCluster.client, createdWorkload, admission)).To(gomega.Succeed()) 337 }, util.Timeout, util.Interval).Should(gomega.Succeed()) 338 339 gomega.Eventually(func(g gomega.Gomega) { 340 g.Expect(managerTestCluster.client.Get(managerTestCluster.ctx, wlLookupKey, createdWorkload)).To(gomega.Succeed()) 341 acs := workload.FindAdmissionCheck(createdWorkload.Status.AdmissionChecks, multikueueAC.Name) 342 g.Expect(acs).NotTo(gomega.BeNil()) 343 g.Expect(acs.State).To(gomega.Equal(kueue.CheckStatePending)) 344 g.Expect(acs.Message).To(gomega.Equal(`The workload got reservation on "worker1"`)) 345 }, util.Timeout, util.Interval).Should(gomega.Succeed()) 346 347 gomega.Eventually(func(g gomega.Gomega) { 348 g.Expect(worker2TestCluster.client.Get(worker2TestCluster.ctx, wlLookupKey, createdWorkload)).To(utiltesting.BeNotFoundError()) 349 }, util.Timeout, util.Interval).Should(gomega.Succeed()) 350 }) 351 352 ginkgo.By("finishing the worker job", func() { 353 gomega.Eventually(func(g gomega.Gomega) { 354 createdJob := batchv1.Job{} 355 g.Expect(worker1TestCluster.client.Get(worker1TestCluster.ctx, client.ObjectKeyFromObject(job), &createdJob)).To(gomega.Succeed()) 356 createdJob.Status.Conditions = append(createdJob.Status.Conditions, batchv1.JobCondition{ 357 Type: batchv1.JobComplete, 358 Status: corev1.ConditionTrue, 359 LastProbeTime: metav1.Now(), 360 LastTransitionTime: metav1.Now(), 361 }) 362 g.Expect(worker1TestCluster.client.Status().Update(worker1TestCluster.ctx, &createdJob)).To(gomega.Succeed()) 363 }, util.Timeout, util.Interval).Should(gomega.Succeed()) 364 365 gomega.Eventually(func(g gomega.Gomega) { 366 g.Expect(managerTestCluster.client.Get(managerTestCluster.ctx, wlLookupKey, createdWorkload)).To(gomega.Succeed()) 367 368 g.Expect(apimeta.FindStatusCondition(createdWorkload.Status.Conditions, kueue.WorkloadFinished)).To(gomega.BeComparableTo(&metav1.Condition{ 369 Type: kueue.WorkloadFinished, 370 Status: metav1.ConditionTrue, 371 Reason: "JobFinished", 372 Message: `Job finished successfully`, 373 }, cmpopts.IgnoreFields(metav1.Condition{}, "LastTransitionTime"))) 374 }, util.LongTimeout, util.Interval).Should(gomega.Succeed()) 375 376 gomega.Eventually(func(g gomega.Gomega) { 377 createdWorkload := &kueue.Workload{} 378 g.Expect(worker1TestCluster.client.Get(worker1TestCluster.ctx, wlLookupKey, createdWorkload)).To(utiltesting.BeNotFoundError()) 379 }, util.LongTimeout, util.Interval).Should(gomega.Succeed()) 380 381 }) 382 }) 383 384 ginkgo.It("Should run a jobSet on worker if admitted", func() { 385 jobSet := testingjobset.MakeJobSet("job-set", managerNs.Name). 386 Queue(managerLq.Name). 387 ReplicatedJobs( 388 testingjobset.ReplicatedJobRequirements{ 389 Name: "replicated-job-1", 390 Replicas: 1, 391 Parallelism: 1, 392 Completions: 1, 393 }, testingjobset.ReplicatedJobRequirements{ 394 Name: "replicated-job-2", 395 Replicas: 3, 396 Parallelism: 1, 397 Completions: 1, 398 }, 399 ). 400 Obj() 401 gomega.Expect(managerTestCluster.client.Create(managerTestCluster.ctx, jobSet)).Should(gomega.Succeed()) 402 403 createdWorkload := &kueue.Workload{} 404 wlLookupKey := types.NamespacedName{Name: workloadjobset.GetWorkloadNameForJobSet(jobSet.Name), Namespace: managerNs.Name} 405 406 admission := utiltesting.MakeAdmission(managerCq.Name).PodSets( 407 kueue.PodSetAssignment{ 408 Name: "replicated-job-1", 409 }, kueue.PodSetAssignment{ 410 Name: "replicated-job-2", 411 }, 412 ).Obj() 413 414 ginkgo.By("setting workload reservation in the management cluster", func() { 415 gomega.Eventually(func(g gomega.Gomega) { 416 g.Expect(managerTestCluster.client.Get(managerTestCluster.ctx, wlLookupKey, createdWorkload)).To(gomega.Succeed()) 417 g.Expect(util.SetQuotaReservation(managerTestCluster.ctx, managerTestCluster.client, createdWorkload, admission)).To(gomega.Succeed()) 418 }, util.Timeout, util.Interval).Should(gomega.Succeed()) 419 }) 420 421 ginkgo.By("checking the workload creation in the worker clusters", func() { 422 managerWl := &kueue.Workload{} 423 gomega.Expect(managerTestCluster.client.Get(managerTestCluster.ctx, wlLookupKey, managerWl)).To(gomega.Succeed()) 424 gomega.Eventually(func(g gomega.Gomega) { 425 g.Expect(worker2TestCluster.client.Get(worker2TestCluster.ctx, wlLookupKey, createdWorkload)).To(gomega.Succeed()) 426 g.Expect(createdWorkload.Spec).To(gomega.BeComparableTo(managerWl.Spec)) 427 g.Expect(worker1TestCluster.client.Get(worker1TestCluster.ctx, wlLookupKey, createdWorkload)).To(gomega.Succeed()) 428 g.Expect(createdWorkload.Spec).To(gomega.BeComparableTo(managerWl.Spec)) 429 }, util.Timeout, util.Interval).Should(gomega.Succeed()) 430 }) 431 432 ginkgo.By("setting workload reservation in worker2, the workload is admitted in manager amd worker1 wl is removed", func() { 433 gomega.Eventually(func(g gomega.Gomega) { 434 g.Expect(worker2TestCluster.client.Get(worker2TestCluster.ctx, wlLookupKey, createdWorkload)).To(gomega.Succeed()) 435 g.Expect(util.SetQuotaReservation(worker2TestCluster.ctx, worker2TestCluster.client, createdWorkload, admission)).To(gomega.Succeed()) 436 }, util.Timeout, util.Interval).Should(gomega.Succeed()) 437 438 gomega.Eventually(func(g gomega.Gomega) { 439 g.Expect(managerTestCluster.client.Get(managerTestCluster.ctx, wlLookupKey, createdWorkload)).To(gomega.Succeed()) 440 acs := workload.FindAdmissionCheck(createdWorkload.Status.AdmissionChecks, multikueueAC.Name) 441 g.Expect(acs).NotTo(gomega.BeNil()) 442 g.Expect(acs.State).To(gomega.Equal(kueue.CheckStateReady)) 443 g.Expect(acs.Message).To(gomega.Equal(`The workload got reservation on "worker2"`)) 444 445 g.Expect(apimeta.FindStatusCondition(createdWorkload.Status.Conditions, kueue.WorkloadAdmitted)).To(gomega.BeComparableTo(&metav1.Condition{ 446 Type: kueue.WorkloadAdmitted, 447 Status: metav1.ConditionTrue, 448 Reason: "Admitted", 449 Message: "The workload is admitted", 450 }, cmpopts.IgnoreFields(metav1.Condition{}, "LastTransitionTime"))) 451 452 }, util.Timeout, util.Interval).Should(gomega.Succeed()) 453 454 gomega.Eventually(func(g gomega.Gomega) { 455 g.Expect(worker1TestCluster.client.Get(worker1TestCluster.ctx, wlLookupKey, createdWorkload)).To(utiltesting.BeNotFoundError()) 456 }, util.Timeout, util.Interval).Should(gomega.Succeed()) 457 }) 458 459 ginkgo.By("changing the status of the jobset in the worker, updates the manager's jobset status", func() { 460 gomega.Eventually(func(g gomega.Gomega) { 461 createdJobSet := jobset.JobSet{} 462 g.Expect(worker2TestCluster.client.Get(worker2TestCluster.ctx, client.ObjectKeyFromObject(jobSet), &createdJobSet)).To(gomega.Succeed()) 463 createdJobSet.Status.Restarts = 10 464 g.Expect(worker2TestCluster.client.Status().Update(worker2TestCluster.ctx, &createdJobSet)).To(gomega.Succeed()) 465 }, util.Timeout, util.Interval).Should(gomega.Succeed()) 466 gomega.Eventually(func(g gomega.Gomega) { 467 createdJobSet := jobset.JobSet{} 468 g.Expect(managerTestCluster.client.Get(managerTestCluster.ctx, client.ObjectKeyFromObject(jobSet), &createdJobSet)).To(gomega.Succeed()) 469 g.Expect(createdJobSet.Status.Restarts).To(gomega.Equal(int32(10))) 470 }, util.Timeout, util.Interval).Should(gomega.Succeed()) 471 }) 472 473 ginkgo.By("finishing the worker jobSet, the manager's wl is marked as finished and the worker2 wl removed", func() { 474 gomega.Eventually(func(g gomega.Gomega) { 475 createdJobSet := jobset.JobSet{} 476 g.Expect(worker2TestCluster.client.Get(worker2TestCluster.ctx, client.ObjectKeyFromObject(jobSet), &createdJobSet)).To(gomega.Succeed()) 477 apimeta.SetStatusCondition(&createdJobSet.Status.Conditions, metav1.Condition{ 478 Type: string(jobset.JobSetCompleted), 479 Status: metav1.ConditionTrue, 480 Reason: "ByTest", 481 Message: "by test", 482 }) 483 g.Expect(worker2TestCluster.client.Status().Update(worker2TestCluster.ctx, &createdJobSet)).To(gomega.Succeed()) 484 }, util.Timeout, util.Interval).Should(gomega.Succeed()) 485 486 gomega.Eventually(func(g gomega.Gomega) { 487 g.Expect(managerTestCluster.client.Get(managerTestCluster.ctx, wlLookupKey, createdWorkload)).To(gomega.Succeed()) 488 489 g.Expect(apimeta.FindStatusCondition(createdWorkload.Status.Conditions, kueue.WorkloadFinished)).To(gomega.BeComparableTo(&metav1.Condition{ 490 Type: kueue.WorkloadFinished, 491 Status: metav1.ConditionTrue, 492 Reason: "JobSetFinished", 493 Message: `JobSet finished successfully`, 494 }, cmpopts.IgnoreFields(metav1.Condition{}, "LastTransitionTime"))) 495 }, util.LongTimeout, util.Interval).Should(gomega.Succeed()) 496 497 gomega.Eventually(func(g gomega.Gomega) { 498 createdWorkload := &kueue.Workload{} 499 g.Expect(worker2TestCluster.client.Get(worker2TestCluster.ctx, wlLookupKey, createdWorkload)).To(utiltesting.BeNotFoundError()) 500 }, util.LongTimeout, util.Interval).Should(gomega.Succeed()) 501 502 }) 503 }) 504 505 ginkgo.It("Should remove the worker's workload and job when managers job is deleted", func() { 506 job := testingjob.MakeJob("job", managerNs.Name). 507 Queue(managerLq.Name). 508 Obj() 509 gomega.Expect(managerTestCluster.client.Create(managerTestCluster.ctx, job)).Should(gomega.Succeed()) 510 511 createdWorkload := &kueue.Workload{} 512 wlLookupKey := types.NamespacedName{Name: workloadjob.GetWorkloadNameForJob(job.Name), Namespace: managerNs.Name} 513 514 ginkgo.By("setting workload reservation in the management cluster", func() { 515 admission := utiltesting.MakeAdmission(managerCq.Name).Obj() 516 gomega.Eventually(func(g gomega.Gomega) { 517 g.Expect(managerTestCluster.client.Get(managerTestCluster.ctx, wlLookupKey, createdWorkload)).To(gomega.Succeed()) 518 g.Expect(util.SetQuotaReservation(managerTestCluster.ctx, managerTestCluster.client, createdWorkload, admission)).To(gomega.Succeed()) 519 }, util.Timeout, util.Interval).Should(gomega.Succeed()) 520 }) 521 522 ginkgo.By("checking the workload creation in the worker clusters", func() { 523 managerWl := &kueue.Workload{} 524 gomega.Expect(managerTestCluster.client.Get(managerTestCluster.ctx, wlLookupKey, managerWl)).To(gomega.Succeed()) 525 gomega.Eventually(func(g gomega.Gomega) { 526 g.Expect(worker1TestCluster.client.Get(worker1TestCluster.ctx, wlLookupKey, createdWorkload)).To(gomega.Succeed()) 527 g.Expect(createdWorkload.Spec).To(gomega.BeComparableTo(managerWl.Spec)) 528 g.Expect(worker2TestCluster.client.Get(worker2TestCluster.ctx, wlLookupKey, createdWorkload)).To(gomega.Succeed()) 529 g.Expect(createdWorkload.Spec).To(gomega.BeComparableTo(managerWl.Spec)) 530 }, util.Timeout, util.Interval).Should(gomega.Succeed()) 531 }) 532 533 ginkgo.By("setting workload reservation in worker1, the job is created in worker1", func() { 534 admission := utiltesting.MakeAdmission(managerCq.Name).Obj() 535 536 gomega.Eventually(func(g gomega.Gomega) { 537 g.Expect(worker1TestCluster.client.Get(worker1TestCluster.ctx, wlLookupKey, createdWorkload)).To(gomega.Succeed()) 538 g.Expect(util.SetQuotaReservation(worker1TestCluster.ctx, worker1TestCluster.client, createdWorkload, admission)).To(gomega.Succeed()) 539 }, util.Timeout, util.Interval).Should(gomega.Succeed()) 540 541 gomega.Eventually(func(g gomega.Gomega) { 542 createdJob := batchv1.Job{} 543 g.Expect(worker1TestCluster.client.Get(worker1TestCluster.ctx, client.ObjectKeyFromObject(job), &createdJob)).To(gomega.Succeed()) 544 }, util.Timeout, util.Interval).Should(gomega.Succeed()) 545 }) 546 547 ginkgo.By("removing the managers job and workload, the workload and job in worker1 are removed", func() { 548 gomega.Expect(managerTestCluster.client.Delete(managerTestCluster.ctx, job)).Should(gomega.Succeed()) 549 gomega.Eventually(func(g gomega.Gomega) { 550 g.Expect(managerTestCluster.client.Get(managerTestCluster.ctx, wlLookupKey, createdWorkload)).To(gomega.Succeed()) 551 g.Expect(managerTestCluster.client.Delete(managerTestCluster.ctx, createdWorkload)).To(gomega.Succeed()) 552 553 }, util.LongTimeout, util.Interval).Should(gomega.Succeed()) 554 555 gomega.Eventually(func(g gomega.Gomega) { 556 createdJob := batchv1.Job{} 557 g.Expect(worker1TestCluster.client.Get(managerTestCluster.ctx, wlLookupKey, createdWorkload)).To(utiltesting.BeNotFoundError()) 558 g.Expect(worker1TestCluster.client.Get(worker1TestCluster.ctx, client.ObjectKeyFromObject(job), &createdJob)).To(gomega.Succeed()) 559 }, util.LongTimeout, util.Interval).Should(gomega.Succeed()) 560 }) 561 }) 562 563 })