sigs.k8s.io/kueue@v0.6.2/pkg/controller/jobs/job/job_controller_test.go (about) 1 /* 2 Copyright 2022 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package job 18 19 import ( 20 "strings" 21 "testing" 22 "time" 23 24 "github.com/google/go-cmp/cmp" 25 "github.com/google/go-cmp/cmp/cmpopts" 26 batchv1 "k8s.io/api/batch/v1" 27 corev1 "k8s.io/api/core/v1" 28 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 29 "k8s.io/apimachinery/pkg/types" 30 "k8s.io/utils/ptr" 31 ctrl "sigs.k8s.io/controller-runtime" 32 "sigs.k8s.io/controller-runtime/pkg/client" 33 "sigs.k8s.io/controller-runtime/pkg/reconcile" 34 35 kueue "sigs.k8s.io/kueue/apis/kueue/v1beta1" 36 "sigs.k8s.io/kueue/pkg/constants" 37 controllerconsts "sigs.k8s.io/kueue/pkg/controller/constants" 38 "sigs.k8s.io/kueue/pkg/controller/jobframework" 39 "sigs.k8s.io/kueue/pkg/podset" 40 utiltesting "sigs.k8s.io/kueue/pkg/util/testing" 41 utiltestingjob "sigs.k8s.io/kueue/pkg/util/testingjobs/job" 42 ) 43 44 func TestPodsReady(t *testing.T) { 45 testcases := map[string]struct { 46 job Job 47 want bool 48 }{ 49 "parallelism = completions; no progress": { 50 job: Job{ 51 Spec: batchv1.JobSpec{ 52 Parallelism: ptr.To[int32](3), 53 Completions: ptr.To[int32](3), 54 }, 55 Status: batchv1.JobStatus{}, 56 }, 57 want: false, 58 }, 59 "parallelism = completions; not enough progress": { 60 job: Job{ 61 Spec: batchv1.JobSpec{ 62 Parallelism: ptr.To[int32](3), 63 Completions: ptr.To[int32](3), 64 }, 65 Status: batchv1.JobStatus{ 66 Ready: ptr.To[int32](1), 67 Succeeded: 1, 68 }, 69 }, 70 want: false, 71 }, 72 "parallelism = completions; all ready": { 73 job: Job{ 74 Spec: batchv1.JobSpec{ 75 Parallelism: ptr.To[int32](3), 76 Completions: ptr.To[int32](3), 77 }, 78 Status: batchv1.JobStatus{ 79 Ready: ptr.To[int32](3), 80 Succeeded: 0, 81 }, 82 }, 83 want: true, 84 }, 85 "parallelism = completions; some ready, some succeeded": { 86 job: Job{ 87 Spec: batchv1.JobSpec{ 88 Parallelism: ptr.To[int32](3), 89 Completions: ptr.To[int32](3), 90 }, 91 Status: batchv1.JobStatus{ 92 Ready: ptr.To[int32](2), 93 Succeeded: 1, 94 }, 95 }, 96 want: true, 97 }, 98 "parallelism = completions; all succeeded": { 99 job: Job{ 100 Spec: batchv1.JobSpec{ 101 Parallelism: ptr.To[int32](3), 102 Completions: ptr.To[int32](3), 103 }, 104 Status: batchv1.JobStatus{ 105 Succeeded: 3, 106 }, 107 }, 108 want: true, 109 }, 110 "parallelism < completions; reaching parallelism is enough": { 111 job: Job{ 112 Spec: batchv1.JobSpec{ 113 Parallelism: ptr.To[int32](2), 114 Completions: ptr.To[int32](3), 115 }, 116 Status: batchv1.JobStatus{ 117 Ready: ptr.To[int32](2), 118 }, 119 }, 120 want: true, 121 }, 122 "parallelism > completions; reaching completions is enough": { 123 job: Job{ 124 Spec: batchv1.JobSpec{ 125 Parallelism: ptr.To[int32](3), 126 Completions: ptr.To[int32](2), 127 }, 128 Status: batchv1.JobStatus{ 129 Ready: ptr.To[int32](2), 130 }, 131 }, 132 want: true, 133 }, 134 "parallelism specified only; not enough progress": { 135 job: Job{ 136 Spec: batchv1.JobSpec{ 137 Parallelism: ptr.To[int32](3), 138 }, 139 Status: batchv1.JobStatus{ 140 Ready: ptr.To[int32](2), 141 }, 142 }, 143 want: false, 144 }, 145 "parallelism specified only; all ready": { 146 job: Job{ 147 Spec: batchv1.JobSpec{ 148 Parallelism: ptr.To[int32](3), 149 }, 150 Status: batchv1.JobStatus{ 151 Ready: ptr.To[int32](3), 152 }, 153 }, 154 want: true, 155 }, 156 } 157 158 for name, tc := range testcases { 159 t.Run(name, func(t *testing.T) { 160 got := tc.job.PodsReady() 161 if tc.want != got { 162 t.Errorf("Unexpected response (want: %v, got: %v)", tc.want, got) 163 } 164 }) 165 } 166 } 167 168 func TestPodSetsInfo(t *testing.T) { 169 testcases := map[string]struct { 170 job *Job 171 runInfo, restoreInfo []podset.PodSetInfo 172 wantUnsuspended *batchv1.Job 173 wantRunError error 174 }{ 175 "append": { 176 job: (*Job)(utiltestingjob.MakeJob("job", "ns"). 177 Parallelism(1). 178 NodeSelector("orig-key", "orig-val"). 179 Toleration(corev1.Toleration{ 180 Key: "orig-t-key", 181 Operator: corev1.TolerationOpEqual, 182 Value: "orig-t-val", 183 Effect: corev1.TaintEffectNoSchedule, 184 }). 185 Obj()), 186 runInfo: []podset.PodSetInfo{ 187 { 188 NodeSelector: map[string]string{ 189 "new-key": "new-val", 190 }, 191 Tolerations: []corev1.Toleration{ 192 { 193 Key: "new-t-key", 194 Operator: corev1.TolerationOpEqual, 195 Value: "new-t-val", 196 Effect: corev1.TaintEffectNoSchedule, 197 }, 198 }, 199 }, 200 }, 201 wantUnsuspended: utiltestingjob.MakeJob("job", "ns"). 202 Parallelism(1). 203 NodeSelector("orig-key", "orig-val"). 204 NodeSelector("new-key", "new-val"). 205 Toleration(corev1.Toleration{ 206 Key: "orig-t-key", 207 Operator: corev1.TolerationOpEqual, 208 Value: "orig-t-val", 209 Effect: corev1.TaintEffectNoSchedule, 210 }). 211 Toleration(corev1.Toleration{ 212 Key: "new-t-key", 213 Operator: corev1.TolerationOpEqual, 214 Value: "new-t-val", 215 Effect: corev1.TaintEffectNoSchedule, 216 }). 217 Suspend(false). 218 Obj(), 219 restoreInfo: []podset.PodSetInfo{ 220 { 221 NodeSelector: map[string]string{ 222 "orig-key": "orig-val", 223 }, 224 Tolerations: []corev1.Toleration{ 225 { 226 Key: "orig-t-key", 227 Operator: corev1.TolerationOpEqual, 228 Value: "orig-t-val", 229 Effect: corev1.TaintEffectNoSchedule, 230 }, 231 }, 232 }, 233 }, 234 }, 235 "update": { 236 job: (*Job)(utiltestingjob.MakeJob("job", "ns"). 237 Parallelism(1). 238 NodeSelector("orig-key", "orig-val"). 239 Obj()), 240 runInfo: []podset.PodSetInfo{ 241 { 242 NodeSelector: map[string]string{ 243 "orig-key": "new-val", 244 }, 245 }, 246 }, 247 wantRunError: podset.ErrInvalidPodSetUpdate, 248 wantUnsuspended: utiltestingjob.MakeJob("job", "ns"). 249 Parallelism(1). 250 NodeSelector("orig-key", "orig-val"). 251 Suspend(false). 252 Obj(), 253 restoreInfo: []podset.PodSetInfo{ 254 { 255 NodeSelector: map[string]string{ 256 "orig-key": "orig-val", 257 }, 258 }, 259 }, 260 }, 261 "parallelism": { 262 job: (*Job)(utiltestingjob.MakeJob("job", "ns"). 263 Parallelism(5). 264 SetAnnotation(JobMinParallelismAnnotation, "2"). 265 Obj()), 266 runInfo: []podset.PodSetInfo{ 267 { 268 Count: 2, 269 }, 270 }, 271 wantUnsuspended: utiltestingjob.MakeJob("job", "ns"). 272 Parallelism(2). 273 SetAnnotation(JobMinParallelismAnnotation, "2"). 274 Suspend(false). 275 Obj(), 276 restoreInfo: []podset.PodSetInfo{ 277 { 278 Count: 5, 279 }, 280 }, 281 }, 282 "noInfoOnRun": { 283 job: (*Job)(utiltestingjob.MakeJob("job", "ns"). 284 Parallelism(5). 285 SetAnnotation(JobMinParallelismAnnotation, "2"). 286 Obj()), 287 runInfo: []podset.PodSetInfo{}, 288 wantUnsuspended: utiltestingjob.MakeJob("job", "ns"). 289 Parallelism(5). 290 SetAnnotation(JobMinParallelismAnnotation, "2"). 291 Suspend(false). 292 Obj(), 293 restoreInfo: []podset.PodSetInfo{ 294 { 295 Count: 5, 296 }, 297 }, 298 wantRunError: podset.ErrInvalidPodsetInfo, 299 }, 300 } 301 for name, tc := range testcases { 302 t.Run(name, func(t *testing.T) { 303 origSpec := *tc.job.Spec.DeepCopy() 304 305 gotErr := tc.job.RunWithPodSetsInfo(tc.runInfo) 306 307 if diff := cmp.Diff(tc.wantRunError, gotErr, cmpopts.EquateErrors()); diff != "" { 308 t.Errorf("node selectors mismatch (-want +got):\n%s", diff) 309 } 310 311 if diff := cmp.Diff(tc.job.Spec, tc.wantUnsuspended.Spec); diff != "" { 312 t.Errorf("node selectors mismatch (-want +got):\n%s", diff) 313 } 314 tc.job.RestorePodSetsInfo(tc.restoreInfo) 315 tc.job.Suspend() 316 if diff := cmp.Diff(tc.job.Spec, origSpec); diff != "" { 317 t.Errorf("node selectors mismatch (-want +got):\n%s", diff) 318 } 319 }) 320 } 321 } 322 323 func TestPodSets(t *testing.T) { 324 podTemplate := utiltestingjob.MakeJob("job", "ns").Spec.Template.DeepCopy() 325 cases := map[string]struct { 326 job *Job 327 wantPodSets []kueue.PodSet 328 }{ 329 "no partial admission": { 330 job: (*Job)(utiltestingjob.MakeJob("job", "ns").Parallelism(3).Obj()), 331 wantPodSets: []kueue.PodSet{ 332 { 333 Name: kueue.DefaultPodSetName, 334 Template: *podTemplate.DeepCopy(), 335 Count: 3, 336 }, 337 }, 338 }, 339 "partial admission": { 340 job: (*Job)(utiltestingjob.MakeJob("job", "ns").Parallelism(3).SetAnnotation(JobMinParallelismAnnotation, "2").Obj()), 341 wantPodSets: []kueue.PodSet{ 342 { 343 Name: kueue.DefaultPodSetName, 344 Template: *podTemplate.DeepCopy(), 345 Count: 3, 346 MinCount: ptr.To[int32](2), 347 }, 348 }, 349 }, 350 } 351 for name, tc := range cases { 352 t.Run(name, func(t *testing.T) { 353 gotPodSets := tc.job.PodSets() 354 if diff := cmp.Diff(tc.wantPodSets, gotPodSets); diff != "" { 355 t.Errorf("node selectors mismatch (-want +got):\n%s", diff) 356 } 357 }) 358 } 359 } 360 361 var ( 362 jobCmpOpts = []cmp.Option{ 363 cmpopts.EquateEmpty(), 364 cmpopts.IgnoreFields(batchv1.Job{}, "TypeMeta", "ObjectMeta.OwnerReferences", "ObjectMeta.ResourceVersion", "ObjectMeta.Annotations"), 365 } 366 workloadCmpOpts = []cmp.Option{ 367 cmpopts.EquateEmpty(), 368 cmpopts.SortSlices(func(a, b kueue.Workload) bool { 369 return a.Name < b.Name 370 }), 371 cmpopts.SortSlices(func(a, b metav1.Condition) bool { 372 return a.Type < b.Type 373 }), 374 cmpopts.IgnoreFields( 375 kueue.Workload{}, "TypeMeta", "ObjectMeta.OwnerReferences", 376 "ObjectMeta.Name", "ObjectMeta.ResourceVersion", 377 ), 378 cmpopts.IgnoreFields(metav1.Condition{}, "LastTransitionTime"), 379 cmpopts.IgnoreFields(kueue.AdmissionCheckState{}, "LastTransitionTime"), 380 } 381 workloadCmpOptsWithOwner = []cmp.Option{ 382 cmpopts.EquateEmpty(), 383 cmpopts.SortSlices(func(a, b kueue.Workload) bool { 384 return a.Name < b.Name 385 }), 386 cmpopts.SortSlices(func(a, b metav1.Condition) bool { 387 return a.Type < b.Type 388 }), 389 cmpopts.IgnoreFields( 390 kueue.Workload{}, "TypeMeta", "ObjectMeta.Name", "ObjectMeta.ResourceVersion", 391 ), 392 cmpopts.IgnoreFields(metav1.Condition{}, "LastTransitionTime"), 393 cmpopts.IgnoreFields(kueue.AdmissionCheckState{}, "LastTransitionTime"), 394 } 395 ) 396 397 func TestReconciler(t *testing.T) { 398 baseJobWrapper := utiltestingjob.MakeJob("job", "ns"). 399 Suspend(true). 400 Queue("foo"). 401 Parallelism(10). 402 Request(corev1.ResourceCPU, "1"). 403 Image("", nil) 404 405 baseWPCWrapper := utiltesting.MakeWorkloadPriorityClass("test-wpc"). 406 PriorityValue(100) 407 408 basePCWrapper := utiltesting.MakePriorityClass("test-pc"). 409 PriorityValue(200) 410 411 cases := map[string]struct { 412 reconcilerOptions []jobframework.Option 413 job batchv1.Job 414 workloads []kueue.Workload 415 priorityClasses []client.Object 416 wantJob batchv1.Job 417 wantWorkloads []kueue.Workload 418 wantEvents []utiltesting.EventRecord 419 wantErr error 420 }{ 421 "when workload is admitted the PodSetUpdates are propagated to job": { 422 job: *baseJobWrapper.Clone(). 423 Obj(), 424 wantJob: *baseJobWrapper.Clone(). 425 Suspend(false). 426 PodLabel("ac-key", "ac-value"). 427 Obj(), 428 workloads: []kueue.Workload{ 429 *utiltesting.MakeWorkload("a", "ns").Finalizers(kueue.ResourceInUseFinalizerName). 430 PodSets(*utiltesting.MakePodSet(kueue.DefaultPodSetName, 10).Request(corev1.ResourceCPU, "1").Obj()). 431 ReserveQuota(utiltesting.MakeAdmission("cq").AssignmentPodCount(10).Obj()). 432 Admitted(true). 433 AdmissionCheck(kueue.AdmissionCheckState{ 434 Name: "check", 435 State: kueue.CheckStateReady, 436 PodSetUpdates: []kueue.PodSetUpdate{ 437 { 438 Name: "main", 439 Labels: map[string]string{ 440 "ac-key": "ac-value", 441 }, 442 }, 443 }, 444 }). 445 Obj(), 446 }, 447 wantWorkloads: []kueue.Workload{ 448 *utiltesting.MakeWorkload("a", "ns").Finalizers(kueue.ResourceInUseFinalizerName). 449 Finalizers(kueue.ResourceInUseFinalizerName). 450 PodSets(*utiltesting.MakePodSet(kueue.DefaultPodSetName, 10).Request(corev1.ResourceCPU, "1").Obj()). 451 ReserveQuota(utiltesting.MakeAdmission("cq").AssignmentPodCount(10).Obj()). 452 Admitted(true). 453 AdmissionCheck(kueue.AdmissionCheckState{ 454 Name: "check", 455 State: kueue.CheckStateReady, 456 PodSetUpdates: []kueue.PodSetUpdate{ 457 { 458 Name: "main", 459 Labels: map[string]string{ 460 "ac-key": "ac-value", 461 }, 462 }, 463 }, 464 }). 465 Obj(), 466 }, 467 wantEvents: []utiltesting.EventRecord{ 468 { 469 Key: types.NamespacedName{Name: "job", Namespace: "ns"}, 470 EventType: "Normal", 471 Reason: "Started", 472 Message: "Admitted by clusterQueue cq", 473 }, 474 }, 475 }, 476 "when workload is admitted and spec.active is set to false, the workload's conditions is set to Evicted": { 477 job: *baseJobWrapper.Clone(). 478 Suspend(false). 479 Obj(), 480 wantJob: *baseJobWrapper.Clone(). 481 Suspend(false). 482 Obj(), 483 workloads: []kueue.Workload{ 484 *utiltesting.MakeWorkload("a", "ns").Finalizers(kueue.ResourceInUseFinalizerName). 485 PodSets(*utiltesting.MakePodSet(kueue.DefaultPodSetName, 10).Request(corev1.ResourceCPU, "1").Obj()). 486 ReserveQuota(utiltesting.MakeAdmission("cq").AssignmentPodCount(10).Obj()). 487 Admitted(true). 488 Active(false). 489 AdmissionCheck(kueue.AdmissionCheckState{ 490 Name: "check", 491 State: kueue.CheckStateReady, 492 PodSetUpdates: []kueue.PodSetUpdate{ 493 { 494 Name: "main", 495 }, 496 }, 497 }). 498 Obj(), 499 }, 500 wantWorkloads: []kueue.Workload{ 501 *utiltesting.MakeWorkload("a", "ns").Finalizers(kueue.ResourceInUseFinalizerName). 502 Finalizers(kueue.ResourceInUseFinalizerName). 503 PodSets(*utiltesting.MakePodSet(kueue.DefaultPodSetName, 10).Request(corev1.ResourceCPU, "1").Obj()). 504 ReserveQuota(utiltesting.MakeAdmission("cq").AssignmentPodCount(10).Obj()). 505 Admitted(true). 506 Active(false). 507 Condition(metav1.Condition{ 508 Type: kueue.WorkloadEvicted, 509 Status: metav1.ConditionTrue, 510 Reason: "InactiveWorkload", 511 Message: "The workload is deactivated", 512 }). 513 AdmissionCheck(kueue.AdmissionCheckState{ 514 Name: "check", 515 State: kueue.CheckStateReady, 516 PodSetUpdates: []kueue.PodSetUpdate{ 517 { 518 Name: "main", 519 }, 520 }, 521 }). 522 Obj(), 523 }, 524 }, 525 "when workload is evicted due to spec.active field being false, job gets suspended and quota is unset": { 526 job: *baseJobWrapper.Clone(). 527 Suspend(false). 528 Obj(), 529 wantJob: *baseJobWrapper.Clone(). 530 Suspend(true). 531 Obj(), 532 workloads: []kueue.Workload{ 533 *utiltesting.MakeWorkload("a", "ns").Finalizers(kueue.ResourceInUseFinalizerName). 534 PodSets(*utiltesting.MakePodSet(kueue.DefaultPodSetName, 10).Request(corev1.ResourceCPU, "1").Obj()). 535 ReserveQuota(utiltesting.MakeAdmission("cq").AssignmentPodCount(10).Obj()). 536 Admitted(true). 537 Active(false). 538 Condition(metav1.Condition{ 539 Type: kueue.WorkloadEvicted, 540 Status: metav1.ConditionTrue, 541 Reason: "InactiveWorkload", 542 Message: "The workload is deactivated", 543 }). 544 AdmissionCheck(kueue.AdmissionCheckState{ 545 Name: "check", 546 State: kueue.CheckStateReady, 547 PodSetUpdates: []kueue.PodSetUpdate{ 548 { 549 Name: "main", 550 Labels: map[string]string{ 551 "ac-key": "ac-value", 552 }, 553 }, 554 }, 555 }). 556 Obj(), 557 }, 558 wantWorkloads: []kueue.Workload{ 559 *utiltesting.MakeWorkload("a", "ns").Finalizers(kueue.ResourceInUseFinalizerName). 560 Finalizers(kueue.ResourceInUseFinalizerName). 561 PodSets(*utiltesting.MakePodSet(kueue.DefaultPodSetName, 10).Request(corev1.ResourceCPU, "1").Obj()). 562 ReserveQuota(utiltesting.MakeAdmission("cq").AssignmentPodCount(10).Obj()). 563 Admitted(true). 564 Active(false). 565 Condition(metav1.Condition{ 566 Type: kueue.WorkloadAdmitted, 567 Status: metav1.ConditionFalse, 568 Reason: "NoReservation", 569 Message: "The workload has no reservation", 570 }). 571 Condition(metav1.Condition{ 572 Type: kueue.WorkloadQuotaReserved, 573 Status: metav1.ConditionFalse, 574 Reason: "Pending", 575 Message: "The workload is deactivated", 576 }). 577 Condition(metav1.Condition{ 578 Type: kueue.WorkloadEvicted, 579 Status: metav1.ConditionTrue, 580 Reason: "InactiveWorkload", 581 Message: "The workload is deactivated", 582 }). 583 AdmissionCheck(kueue.AdmissionCheckState{ 584 Name: "check", 585 State: kueue.CheckStateReady, 586 PodSetUpdates: []kueue.PodSetUpdate{ 587 { 588 Name: "main", 589 Labels: map[string]string{ 590 "ac-key": "ac-value", 591 }, 592 }, 593 }, 594 }). 595 Obj(), 596 }, 597 wantEvents: []utiltesting.EventRecord{ 598 { 599 Key: types.NamespacedName{Name: "job", Namespace: "ns"}, 600 EventType: "Normal", 601 Reason: "Stopped", 602 Message: "The workload is deactivated", 603 }, 604 }, 605 }, 606 "when job is initially suspended, the Workload has active=false and it's not admitted, " + 607 "it should not get an evicted condition, but the job should remain suspended": { 608 job: *baseJobWrapper.Clone(). 609 Suspend(true). 610 Obj(), 611 wantJob: *baseJobWrapper.Clone(). 612 Suspend(true). 613 Obj(), 614 workloads: []kueue.Workload{ 615 *utiltesting.MakeWorkload("a", "ns").Finalizers(kueue.ResourceInUseFinalizerName). 616 PodSets(*utiltesting.MakePodSet(kueue.DefaultPodSetName, 10).Request(corev1.ResourceCPU, "1").Obj()). 617 ReserveQuota(utiltesting.MakeAdmission("cq").AssignmentPodCount(10).Obj()). 618 Admitted(true). 619 Active(false). 620 Queue("foo"). 621 Condition(metav1.Condition{ 622 Type: kueue.WorkloadAdmitted, 623 Status: metav1.ConditionFalse, 624 Reason: "NoReservation", 625 Message: "The workload has no reservation", 626 }). 627 Condition(metav1.Condition{ 628 Type: kueue.WorkloadQuotaReserved, 629 Status: metav1.ConditionFalse, 630 Reason: "Pending", 631 Message: "The workload is deactivated", 632 }). 633 AdmissionCheck(kueue.AdmissionCheckState{ 634 Name: "check", 635 State: kueue.CheckStateReady, 636 PodSetUpdates: []kueue.PodSetUpdate{ 637 { 638 Name: "main", 639 Labels: map[string]string{ 640 "ac-key": "ac-value", 641 }, 642 }, 643 }, 644 }). 645 Obj(), 646 }, 647 wantWorkloads: []kueue.Workload{ 648 *utiltesting.MakeWorkload("a", "ns").Finalizers(kueue.ResourceInUseFinalizerName). 649 Finalizers(kueue.ResourceInUseFinalizerName). 650 PodSets(*utiltesting.MakePodSet(kueue.DefaultPodSetName, 10).Request(corev1.ResourceCPU, "1").Obj()). 651 ReserveQuota(utiltesting.MakeAdmission("cq").AssignmentPodCount(10).Obj()). 652 Admitted(true). 653 Active(false). 654 Queue("foo"). 655 Condition(metav1.Condition{ 656 Type: kueue.WorkloadAdmitted, 657 Status: metav1.ConditionFalse, 658 Reason: "NoReservation", 659 Message: "The workload has no reservation", 660 }). 661 Condition(metav1.Condition{ 662 Type: kueue.WorkloadQuotaReserved, 663 Status: metav1.ConditionFalse, 664 Reason: "Pending", 665 Message: "The workload is deactivated", 666 }). 667 AdmissionCheck(kueue.AdmissionCheckState{ 668 Name: "check", 669 State: kueue.CheckStateReady, 670 PodSetUpdates: []kueue.PodSetUpdate{ 671 { 672 Name: "main", 673 Labels: map[string]string{ 674 "ac-key": "ac-value", 675 }, 676 }, 677 }, 678 }). 679 Obj(), 680 }, 681 }, 682 "when workload is admitted and PodSetUpdates conflict between admission checks on labels, the workload is finished with failure": { 683 job: *baseJobWrapper.Clone(). 684 Obj(), 685 wantJob: *baseJobWrapper.Clone(). 686 Suspend(true). 687 Obj(), 688 workloads: []kueue.Workload{ 689 *utiltesting.MakeWorkload("a", "ns").Finalizers(kueue.ResourceInUseFinalizerName). 690 PodSets(*utiltesting.MakePodSet(kueue.DefaultPodSetName, 10).Request(corev1.ResourceCPU, "1").Obj()). 691 ReserveQuota(utiltesting.MakeAdmission("cq").AssignmentPodCount(10).Obj()). 692 Admitted(true). 693 AdmissionCheck(kueue.AdmissionCheckState{ 694 Name: "check1", 695 State: kueue.CheckStateReady, 696 PodSetUpdates: []kueue.PodSetUpdate{ 697 { 698 Name: "main", 699 Labels: map[string]string{ 700 "ac-key": "ac-value1", 701 }, 702 }, 703 }, 704 }). 705 AdmissionCheck(kueue.AdmissionCheckState{ 706 Name: "check2", 707 State: kueue.CheckStateReady, 708 PodSetUpdates: []kueue.PodSetUpdate{ 709 { 710 Name: "main", 711 Labels: map[string]string{ 712 "ac-key": "ac-value2", 713 }, 714 }, 715 }, 716 }). 717 Obj(), 718 }, 719 wantWorkloads: []kueue.Workload{ 720 *utiltesting.MakeWorkload("a", "ns").Finalizers(kueue.ResourceInUseFinalizerName). 721 Finalizers(kueue.ResourceInUseFinalizerName). 722 PodSets(*utiltesting.MakePodSet(kueue.DefaultPodSetName, 10).Request(corev1.ResourceCPU, "1").Obj()). 723 ReserveQuota(utiltesting.MakeAdmission("cq").AssignmentPodCount(10).Obj()). 724 Admitted(true). 725 AdmissionCheck(kueue.AdmissionCheckState{ 726 Name: "check1", 727 State: kueue.CheckStateReady, 728 PodSetUpdates: []kueue.PodSetUpdate{ 729 { 730 Name: "main", 731 Labels: map[string]string{ 732 "ac-key": "ac-value1", 733 }, 734 }, 735 }, 736 }). 737 AdmissionCheck(kueue.AdmissionCheckState{ 738 Name: "check2", 739 State: kueue.CheckStateReady, 740 PodSetUpdates: []kueue.PodSetUpdate{ 741 { 742 Name: "main", 743 Labels: map[string]string{ 744 "ac-key": "ac-value2", 745 }, 746 }, 747 }, 748 }). 749 Condition(metav1.Condition{ 750 Type: kueue.WorkloadFinished, 751 Status: metav1.ConditionTrue, 752 Reason: "FailedToStart", 753 Message: `in admission check "check2": invalid admission check PodSetUpdate: conflict for labels: conflict for key=ac-key, value1=ac-value1, value2=ac-value2`, 754 }). 755 Obj(), 756 }, 757 }, 758 "when workload is admitted and PodSetUpdates conflict between admission checks on annotations, the workload is finished with failure": { 759 job: *baseJobWrapper.Clone(). 760 Obj(), 761 wantJob: *baseJobWrapper.Clone(). 762 Suspend(true). 763 Obj(), 764 workloads: []kueue.Workload{ 765 *utiltesting.MakeWorkload("a", "ns").Finalizers(kueue.ResourceInUseFinalizerName). 766 PodSets(*utiltesting.MakePodSet(kueue.DefaultPodSetName, 10).Request(corev1.ResourceCPU, "1").Obj()). 767 ReserveQuota(utiltesting.MakeAdmission("cq").AssignmentPodCount(10).Obj()). 768 Admitted(true). 769 AdmissionCheck(kueue.AdmissionCheckState{ 770 Name: "check1", 771 State: kueue.CheckStateReady, 772 PodSetUpdates: []kueue.PodSetUpdate{ 773 { 774 Name: "main", 775 Annotations: map[string]string{ 776 "ac-key": "ac-value1", 777 }, 778 }, 779 }, 780 }). 781 AdmissionCheck(kueue.AdmissionCheckState{ 782 Name: "check2", 783 State: kueue.CheckStateReady, 784 PodSetUpdates: []kueue.PodSetUpdate{ 785 { 786 Name: "main", 787 Annotations: map[string]string{ 788 "ac-key": "ac-value2", 789 }, 790 }, 791 }, 792 }). 793 Obj(), 794 }, 795 wantWorkloads: []kueue.Workload{ 796 *utiltesting.MakeWorkload("a", "ns").Finalizers(kueue.ResourceInUseFinalizerName). 797 Finalizers(kueue.ResourceInUseFinalizerName). 798 PodSets(*utiltesting.MakePodSet(kueue.DefaultPodSetName, 10).Request(corev1.ResourceCPU, "1").Obj()). 799 ReserveQuota(utiltesting.MakeAdmission("cq").AssignmentPodCount(10).Obj()). 800 Admitted(true). 801 AdmissionCheck(kueue.AdmissionCheckState{ 802 Name: "check1", 803 State: kueue.CheckStateReady, 804 PodSetUpdates: []kueue.PodSetUpdate{ 805 { 806 Name: "main", 807 Annotations: map[string]string{ 808 "ac-key": "ac-value1", 809 }, 810 }, 811 }, 812 }). 813 AdmissionCheck(kueue.AdmissionCheckState{ 814 Name: "check2", 815 State: kueue.CheckStateReady, 816 PodSetUpdates: []kueue.PodSetUpdate{ 817 { 818 Name: "main", 819 Annotations: map[string]string{ 820 "ac-key": "ac-value2", 821 }, 822 }, 823 }, 824 }). 825 Condition(metav1.Condition{ 826 Type: kueue.WorkloadFinished, 827 Status: metav1.ConditionTrue, 828 Reason: "FailedToStart", 829 Message: `in admission check "check2": invalid admission check PodSetUpdate: conflict for annotations: conflict for key=ac-key, value1=ac-value1, value2=ac-value2`, 830 }). 831 Obj(), 832 }, 833 }, 834 "when workload is admitted and PodSetUpdates conflict between admission checks on nodeSelector, the workload is finished with failure": { 835 job: *baseJobWrapper.Clone(). 836 Obj(), 837 wantJob: *baseJobWrapper.Clone(). 838 Suspend(true). 839 Obj(), 840 workloads: []kueue.Workload{ 841 *utiltesting.MakeWorkload("a", "ns").Finalizers(kueue.ResourceInUseFinalizerName). 842 PodSets(*utiltesting.MakePodSet(kueue.DefaultPodSetName, 10).Request(corev1.ResourceCPU, "1").Obj()). 843 ReserveQuota(utiltesting.MakeAdmission("cq").AssignmentPodCount(10).Obj()). 844 Admitted(true). 845 AdmissionCheck(kueue.AdmissionCheckState{ 846 Name: "check1", 847 State: kueue.CheckStateReady, 848 PodSetUpdates: []kueue.PodSetUpdate{ 849 { 850 Name: "main", 851 NodeSelector: map[string]string{ 852 "ac-key": "ac-value1", 853 }, 854 }, 855 }, 856 }). 857 AdmissionCheck(kueue.AdmissionCheckState{ 858 Name: "check2", 859 State: kueue.CheckStateReady, 860 PodSetUpdates: []kueue.PodSetUpdate{ 861 { 862 Name: "main", 863 NodeSelector: map[string]string{ 864 "ac-key": "ac-value2", 865 }, 866 }, 867 }, 868 }). 869 Obj(), 870 }, 871 wantWorkloads: []kueue.Workload{ 872 *utiltesting.MakeWorkload("a", "ns").Finalizers(kueue.ResourceInUseFinalizerName). 873 Finalizers(kueue.ResourceInUseFinalizerName). 874 PodSets(*utiltesting.MakePodSet(kueue.DefaultPodSetName, 10).Request(corev1.ResourceCPU, "1").Obj()). 875 ReserveQuota(utiltesting.MakeAdmission("cq").AssignmentPodCount(10).Obj()). 876 Admitted(true). 877 AdmissionCheck(kueue.AdmissionCheckState{ 878 Name: "check1", 879 State: kueue.CheckStateReady, 880 PodSetUpdates: []kueue.PodSetUpdate{ 881 { 882 Name: "main", 883 NodeSelector: map[string]string{ 884 "ac-key": "ac-value1", 885 }, 886 }, 887 }, 888 }). 889 AdmissionCheck(kueue.AdmissionCheckState{ 890 Name: "check2", 891 State: kueue.CheckStateReady, 892 PodSetUpdates: []kueue.PodSetUpdate{ 893 { 894 Name: "main", 895 NodeSelector: map[string]string{ 896 "ac-key": "ac-value2", 897 }, 898 }, 899 }, 900 }). 901 Condition(metav1.Condition{ 902 Type: kueue.WorkloadFinished, 903 Status: metav1.ConditionTrue, 904 Reason: "FailedToStart", 905 Message: `in admission check "check2": invalid admission check PodSetUpdate: conflict for nodeSelector: conflict for key=ac-key, value1=ac-value1, value2=ac-value2`, 906 }). 907 Obj(), 908 }, 909 }, 910 "when workload is admitted and PodSetUpdates conflict between admission check nodeSelector and current node selector, the workload is finished with failure": { 911 job: *baseJobWrapper.Clone(). 912 NodeSelector("provisioning", "spot"). 913 Obj(), 914 wantJob: *baseJobWrapper.Clone(). 915 Suspend(true). 916 NodeSelector("provisioning", "spot"). 917 Obj(), 918 workloads: []kueue.Workload{ 919 *utiltesting.MakeWorkload("a", "ns").Finalizers(kueue.ResourceInUseFinalizerName). 920 PodSets(*utiltesting.MakePodSet(kueue.DefaultPodSetName, 10).Request(corev1.ResourceCPU, "1").Obj()). 921 ReserveQuota(utiltesting.MakeAdmission("cq").AssignmentPodCount(10).Obj()). 922 Admitted(true). 923 AdmissionCheck(kueue.AdmissionCheckState{ 924 Name: "check", 925 State: kueue.CheckStateReady, 926 PodSetUpdates: []kueue.PodSetUpdate{ 927 { 928 Name: "main", 929 NodeSelector: map[string]string{ 930 "provisioning": "on-demand", 931 }, 932 }, 933 }, 934 }). 935 Obj(), 936 }, 937 wantWorkloads: []kueue.Workload{ 938 *utiltesting.MakeWorkload("a", "ns").Finalizers(kueue.ResourceInUseFinalizerName). 939 Finalizers(kueue.ResourceInUseFinalizerName). 940 PodSets(*utiltesting.MakePodSet(kueue.DefaultPodSetName, 10).Request(corev1.ResourceCPU, "1").Obj()). 941 ReserveQuota(utiltesting.MakeAdmission("cq").AssignmentPodCount(10).Obj()). 942 Admitted(true). 943 AdmissionCheck(kueue.AdmissionCheckState{ 944 Name: "check", 945 State: kueue.CheckStateReady, 946 PodSetUpdates: []kueue.PodSetUpdate{ 947 { 948 Name: "main", 949 NodeSelector: map[string]string{ 950 "provisioning": "on-demand", 951 }, 952 }, 953 }, 954 }). 955 Condition(metav1.Condition{ 956 Type: kueue.WorkloadFinished, 957 Status: metav1.ConditionTrue, 958 Reason: "FailedToStart", 959 Message: `invalid admission check PodSetUpdate: conflict for nodeSelector: conflict for key=provisioning, value1=spot, value2=on-demand`, 960 }). 961 Obj(), 962 }, 963 }, 964 "when workload is admitted the PodSetUpdates values matching for key": { 965 job: *baseJobWrapper.Clone(). 966 Obj(), 967 wantJob: *baseJobWrapper.Clone(). 968 Suspend(false). 969 PodAnnotation("annotation-key1", "common-value"). 970 PodAnnotation("annotation-key2", "only-in-check1"). 971 PodLabel("label-key1", "common-value"). 972 NodeSelector("node-selector-key1", "common-value"). 973 NodeSelector("node-selector-key2", "only-in-check2"). 974 Obj(), 975 workloads: []kueue.Workload{ 976 *utiltesting.MakeWorkload("a", "ns").Finalizers(kueue.ResourceInUseFinalizerName). 977 PodSets(*utiltesting.MakePodSet(kueue.DefaultPodSetName, 10).Request(corev1.ResourceCPU, "1").Obj()). 978 ReserveQuota(utiltesting.MakeAdmission("cq").AssignmentPodCount(10).Obj()). 979 Admitted(true). 980 AdmissionCheck(kueue.AdmissionCheckState{ 981 Name: "check1", 982 State: kueue.CheckStateReady, 983 PodSetUpdates: []kueue.PodSetUpdate{ 984 { 985 Name: "main", 986 Labels: map[string]string{ 987 "label-key1": "common-value", 988 }, 989 Annotations: map[string]string{ 990 "annotation-key1": "common-value", 991 "annotation-key2": "only-in-check1", 992 }, 993 NodeSelector: map[string]string{ 994 "node-selector-key1": "common-value", 995 }, 996 }, 997 }, 998 }). 999 AdmissionCheck(kueue.AdmissionCheckState{ 1000 Name: "check2", 1001 State: kueue.CheckStateReady, 1002 PodSetUpdates: []kueue.PodSetUpdate{ 1003 { 1004 Name: "main", 1005 Labels: map[string]string{ 1006 "label-key1": "common-value", 1007 }, 1008 Annotations: map[string]string{ 1009 "annotation-key1": "common-value", 1010 }, 1011 NodeSelector: map[string]string{ 1012 "node-selector-key1": "common-value", 1013 "node-selector-key2": "only-in-check2", 1014 }, 1015 }, 1016 }, 1017 }). 1018 Obj(), 1019 }, 1020 wantWorkloads: []kueue.Workload{ 1021 *utiltesting.MakeWorkload("a", "ns").Finalizers(kueue.ResourceInUseFinalizerName). 1022 Finalizers(kueue.ResourceInUseFinalizerName). 1023 PodSets(*utiltesting.MakePodSet(kueue.DefaultPodSetName, 10).Request(corev1.ResourceCPU, "1").Obj()). 1024 ReserveQuota(utiltesting.MakeAdmission("cq").AssignmentPodCount(10).Obj()). 1025 Admitted(true). 1026 AdmissionCheck(kueue.AdmissionCheckState{ 1027 Name: "check1", 1028 State: kueue.CheckStateReady, 1029 PodSetUpdates: []kueue.PodSetUpdate{ 1030 { 1031 Name: "main", 1032 Labels: map[string]string{ 1033 "label-key1": "common-value", 1034 }, 1035 Annotations: map[string]string{ 1036 "annotation-key1": "common-value", 1037 "annotation-key2": "only-in-check1", 1038 }, 1039 NodeSelector: map[string]string{ 1040 "node-selector-key1": "common-value", 1041 }, 1042 }, 1043 }, 1044 }). 1045 AdmissionCheck(kueue.AdmissionCheckState{ 1046 Name: "check2", 1047 State: kueue.CheckStateReady, 1048 PodSetUpdates: []kueue.PodSetUpdate{ 1049 { 1050 Name: "main", 1051 Labels: map[string]string{ 1052 "label-key1": "common-value", 1053 }, 1054 Annotations: map[string]string{ 1055 "annotation-key1": "common-value", 1056 }, 1057 NodeSelector: map[string]string{ 1058 "node-selector-key1": "common-value", 1059 "node-selector-key2": "only-in-check2", 1060 }, 1061 }, 1062 }, 1063 }). 1064 Obj(), 1065 }, 1066 wantEvents: []utiltesting.EventRecord{ 1067 { 1068 Key: types.NamespacedName{Name: "job", Namespace: "ns"}, 1069 EventType: "Normal", 1070 Reason: "Started", 1071 Message: "Admitted by clusterQueue cq", 1072 }, 1073 }, 1074 }, 1075 "suspended job with matching admitted workload is unsuspended": { 1076 reconcilerOptions: []jobframework.Option{ 1077 jobframework.WithManageJobsWithoutQueueName(true), 1078 }, 1079 job: *baseJobWrapper.DeepCopy(), 1080 wantJob: *baseJobWrapper.Clone(). 1081 Suspend(false). 1082 Obj(), 1083 workloads: []kueue.Workload{ 1084 *utiltesting.MakeWorkload("a", "ns").Finalizers(kueue.ResourceInUseFinalizerName). 1085 PodSets(*utiltesting.MakePodSet(kueue.DefaultPodSetName, 10).Request(corev1.ResourceCPU, "1").Obj()). 1086 ReserveQuota(utiltesting.MakeAdmission("cq").AssignmentPodCount(10).Obj()). 1087 Admitted(true). 1088 Obj(), 1089 }, 1090 wantWorkloads: []kueue.Workload{ 1091 *utiltesting.MakeWorkload("a", "ns").Finalizers(kueue.ResourceInUseFinalizerName). 1092 Finalizers(kueue.ResourceInUseFinalizerName). 1093 PodSets(*utiltesting.MakePodSet(kueue.DefaultPodSetName, 10).Request(corev1.ResourceCPU, "1").Obj()). 1094 ReserveQuota(utiltesting.MakeAdmission("cq").AssignmentPodCount(10).Obj()). 1095 Admitted(true). 1096 Obj(), 1097 }, 1098 wantEvents: []utiltesting.EventRecord{ 1099 { 1100 Key: types.NamespacedName{Name: "job", Namespace: "ns"}, 1101 EventType: "Normal", 1102 Reason: "Started", 1103 Message: "Admitted by clusterQueue cq", 1104 }, 1105 }, 1106 }, 1107 "non-matching admitted workload is deleted": { 1108 reconcilerOptions: []jobframework.Option{ 1109 jobframework.WithManageJobsWithoutQueueName(true), 1110 }, 1111 job: *baseJobWrapper.DeepCopy(), 1112 wantJob: *baseJobWrapper.DeepCopy(), 1113 workloads: []kueue.Workload{ 1114 *utiltesting.MakeWorkload("a", "ns").Finalizers(kueue.ResourceInUseFinalizerName). 1115 PodSets(*utiltesting.MakePodSet(kueue.DefaultPodSetName, 5).Request(corev1.ResourceCPU, "1").Obj()). 1116 ReserveQuota(utiltesting.MakeAdmission("cq").AssignmentPodCount(10).Obj()). 1117 Admitted(true). 1118 Obj(), 1119 }, 1120 wantErr: jobframework.ErrNoMatchingWorkloads, 1121 wantEvents: []utiltesting.EventRecord{ 1122 { 1123 Key: types.NamespacedName{Name: "job", Namespace: "ns"}, 1124 EventType: "Normal", 1125 Reason: "DeletedWorkload", 1126 Message: "Deleted not matching Workload: ns/a", 1127 }, 1128 }, 1129 }, 1130 "non-matching non-admitted workload is updated": { 1131 reconcilerOptions: []jobframework.Option{ 1132 jobframework.WithManageJobsWithoutQueueName(true), 1133 }, 1134 job: *baseJobWrapper.DeepCopy(), 1135 wantJob: *baseJobWrapper.DeepCopy(), 1136 workloads: []kueue.Workload{ 1137 *utiltesting.MakeWorkload("a", "ns").Finalizers(kueue.ResourceInUseFinalizerName). 1138 PodSets(*utiltesting.MakePodSet(kueue.DefaultPodSetName, 5).Request(corev1.ResourceCPU, "1").Obj()). 1139 Priority(0). 1140 Obj(), 1141 }, 1142 wantWorkloads: []kueue.Workload{ 1143 *utiltesting.MakeWorkload("a", "ns").Finalizers(kueue.ResourceInUseFinalizerName). 1144 PodSets(*utiltesting.MakePodSet(kueue.DefaultPodSetName, 10).Request(corev1.ResourceCPU, "1").Obj()). 1145 Queue("foo"). 1146 Priority(0). 1147 Obj(), 1148 }, 1149 wantEvents: []utiltesting.EventRecord{ 1150 { 1151 Key: types.NamespacedName{Name: "job", Namespace: "ns"}, 1152 EventType: "Normal", 1153 Reason: "UpdatedWorkload", 1154 Message: "Updated not matching Workload for suspended job: ns/a", 1155 }, 1156 }, 1157 }, 1158 "suspended job with partial admission and admitted workload is unsuspended": { 1159 reconcilerOptions: []jobframework.Option{ 1160 jobframework.WithManageJobsWithoutQueueName(true), 1161 }, 1162 job: *baseJobWrapper.Clone(). 1163 SetAnnotation(JobMinParallelismAnnotation, "5"). 1164 Obj(), 1165 wantJob: *baseJobWrapper.Clone(). 1166 SetAnnotation(JobMinParallelismAnnotation, "5"). 1167 Suspend(false). 1168 Parallelism(8). 1169 Obj(), 1170 workloads: []kueue.Workload{ 1171 *utiltesting.MakeWorkload("a", "ns").Finalizers(kueue.ResourceInUseFinalizerName). 1172 PodSets(*utiltesting.MakePodSet(kueue.DefaultPodSetName, 10).SetMinimumCount(5).Request(corev1.ResourceCPU, "1").Obj()). 1173 ReserveQuota(utiltesting.MakeAdmission("cq").AssignmentPodCount(8).Obj()). 1174 Admitted(true). 1175 Obj(), 1176 }, 1177 wantWorkloads: []kueue.Workload{ 1178 *utiltesting.MakeWorkload("a", "ns").Finalizers(kueue.ResourceInUseFinalizerName). 1179 Finalizers(kueue.ResourceInUseFinalizerName). 1180 PodSets( 1181 *utiltesting.MakePodSet(kueue.DefaultPodSetName, 10). 1182 SetMinimumCount(5). 1183 Request(corev1.ResourceCPU, "1"). 1184 Obj(), 1185 ). 1186 ReserveQuota(utiltesting.MakeAdmission("cq").AssignmentPodCount(8).Obj()). 1187 Admitted(true). 1188 Obj(), 1189 }, 1190 wantEvents: []utiltesting.EventRecord{ 1191 { 1192 Key: types.NamespacedName{Name: "job", Namespace: "ns"}, 1193 EventType: "Normal", 1194 Reason: "Started", 1195 Message: "Admitted by clusterQueue cq", 1196 }, 1197 }, 1198 }, 1199 "unsuspended job with partial admission and non-matching admitted workload is suspended and workload is deleted": { 1200 reconcilerOptions: []jobframework.Option{ 1201 jobframework.WithManageJobsWithoutQueueName(true), 1202 }, 1203 job: *baseJobWrapper.Clone(). 1204 SetAnnotation(JobMinParallelismAnnotation, "5"). 1205 Suspend(false). 1206 Obj(), 1207 wantJob: *baseJobWrapper.Clone(). 1208 SetAnnotation(JobMinParallelismAnnotation, "5"). 1209 Obj(), 1210 workloads: []kueue.Workload{ 1211 *utiltesting.MakeWorkload("a", "ns").Finalizers(kueue.ResourceInUseFinalizerName). 1212 PodSets( 1213 *utiltesting.MakePodSet(kueue.DefaultPodSetName, 10). 1214 SetMinimumCount(5). 1215 Request(corev1.ResourceCPU, "1"). 1216 Obj(), 1217 ). 1218 ReserveQuota(utiltesting.MakeAdmission("cq").AssignmentPodCount(8).Obj()). 1219 Admitted(true). 1220 Obj(), 1221 }, 1222 wantErr: jobframework.ErrNoMatchingWorkloads, 1223 wantEvents: []utiltesting.EventRecord{ 1224 { 1225 Key: types.NamespacedName{Name: "job", Namespace: "ns"}, 1226 EventType: "Normal", 1227 Reason: "Stopped", 1228 Message: "No matching Workload; restoring pod templates according to existent Workload", 1229 }, 1230 { 1231 Key: types.NamespacedName{Name: "job", Namespace: "ns"}, 1232 EventType: "Normal", 1233 Reason: "DeletedWorkload", 1234 Message: "Deleted not matching Workload: ns/a", 1235 }, 1236 }, 1237 }, 1238 "the workload is created when queue name is set": { 1239 job: *baseJobWrapper. 1240 Clone(). 1241 Suspend(false). 1242 Queue("test-queue"). 1243 UID("test-uid"). 1244 Obj(), 1245 wantJob: *baseJobWrapper. 1246 Clone(). 1247 Queue("test-queue"). 1248 UID("test-uid"). 1249 Obj(), 1250 wantWorkloads: []kueue.Workload{ 1251 *utiltesting.MakeWorkload("job", "ns").Finalizers(kueue.ResourceInUseFinalizerName). 1252 PodSets(*utiltesting.MakePodSet(kueue.DefaultPodSetName, 10).Request(corev1.ResourceCPU, "1").Obj()). 1253 Queue("test-queue"). 1254 Priority(0). 1255 Labels(map[string]string{ 1256 controllerconsts.JobUIDLabel: "test-uid", 1257 }). 1258 Obj(), 1259 }, 1260 wantEvents: []utiltesting.EventRecord{ 1261 { 1262 Key: types.NamespacedName{Name: "job", Namespace: "ns"}, 1263 EventType: "Normal", 1264 Reason: "Stopped", 1265 Message: "Missing Workload; unable to restore pod templates", 1266 }, 1267 { 1268 Key: types.NamespacedName{Name: "job", Namespace: "ns"}, 1269 EventType: "Normal", 1270 Reason: "CreatedWorkload", 1271 Message: "Created Workload: ns/job-job-ed7d5", 1272 }, 1273 }, 1274 }, 1275 "the workload is updated when queue name has changed for suspended job": { 1276 job: *baseJobWrapper. 1277 Clone(). 1278 Suspend(true). 1279 Queue("test-queue-new"). 1280 UID("test-uid"). 1281 Obj(), 1282 wantJob: *baseJobWrapper. 1283 Clone(). 1284 Queue("test-queue-new"). 1285 UID("test-uid"). 1286 Obj(), 1287 workloads: []kueue.Workload{ 1288 *utiltesting.MakeWorkload("job", "ns").Finalizers(kueue.ResourceInUseFinalizerName). 1289 PodSets(*utiltesting.MakePodSet(kueue.DefaultPodSetName, 10).Request(corev1.ResourceCPU, "1").Obj()). 1290 Queue("test-queue"). 1291 Priority(0). 1292 Labels(map[string]string{ 1293 controllerconsts.JobUIDLabel: "test-uid", 1294 }). 1295 Obj(), 1296 }, 1297 wantWorkloads: []kueue.Workload{ 1298 *utiltesting.MakeWorkload("job", "ns").Finalizers(kueue.ResourceInUseFinalizerName). 1299 PodSets(*utiltesting.MakePodSet(kueue.DefaultPodSetName, 10).Request(corev1.ResourceCPU, "1").Obj()). 1300 Queue("test-queue-new"). 1301 Priority(0). 1302 Labels(map[string]string{ 1303 controllerconsts.JobUIDLabel: "test-uid", 1304 }). 1305 Obj(), 1306 }, 1307 }, 1308 "the workload is updated when priority class has changed for suspended job": { 1309 job: *baseJobWrapper. 1310 Clone(). 1311 Suspend(true). 1312 UID("test-uid"). 1313 Obj(), 1314 wantJob: *baseJobWrapper. 1315 Clone(). 1316 UID("test-uid"). 1317 Obj(), 1318 workloads: []kueue.Workload{ 1319 *utiltesting.MakeWorkload("job", "ns").Finalizers(kueue.ResourceInUseFinalizerName). 1320 PodSets(*utiltesting.MakePodSet(kueue.DefaultPodSetName, 10).Request(corev1.ResourceCPU, "1").Obj()). 1321 Queue("foo"). 1322 Priority(0). 1323 PriorityClass("new-priority-class"). 1324 Labels(map[string]string{ 1325 controllerconsts.JobUIDLabel: "test-uid", 1326 }). 1327 Obj(), 1328 }, 1329 wantWorkloads: []kueue.Workload{ 1330 *utiltesting.MakeWorkload("job", "ns").Finalizers(kueue.ResourceInUseFinalizerName). 1331 PodSets(*utiltesting.MakePodSet(kueue.DefaultPodSetName, 10).Request(corev1.ResourceCPU, "1").Obj()). 1332 Queue("foo"). 1333 Priority(0). 1334 PriorityClass("new-priority-class"). 1335 Labels(map[string]string{ 1336 controllerconsts.JobUIDLabel: "test-uid", 1337 }). 1338 Obj(), 1339 }, 1340 }, 1341 "the workload without uid label is created when job's uid is longer than 63 characters": { 1342 job: *baseJobWrapper. 1343 Clone(). 1344 Suspend(false). 1345 Queue("test-queue"). 1346 UID(strings.Repeat("long-uid", 8)). 1347 Obj(), 1348 wantJob: *baseJobWrapper. 1349 Clone(). 1350 Queue("test-queue"). 1351 UID(strings.Repeat("long-uid", 8)). 1352 Obj(), 1353 wantWorkloads: []kueue.Workload{ 1354 *utiltesting.MakeWorkload("job", "ns").Finalizers(kueue.ResourceInUseFinalizerName). 1355 PodSets(*utiltesting.MakePodSet(kueue.DefaultPodSetName, 10).Request(corev1.ResourceCPU, "1").Obj()). 1356 Queue("test-queue"). 1357 Priority(0). 1358 Labels(map[string]string{}). 1359 Obj(), 1360 }, 1361 wantEvents: []utiltesting.EventRecord{ 1362 { 1363 Key: types.NamespacedName{Name: "job", Namespace: "ns"}, 1364 EventType: "Normal", 1365 Reason: "Stopped", 1366 Message: "Missing Workload; unable to restore pod templates", 1367 }, 1368 { 1369 Key: types.NamespacedName{Name: "job", Namespace: "ns"}, 1370 EventType: "Normal", 1371 Reason: "CreatedWorkload", 1372 Message: "Created Workload: ns/job-job-ed7d5", 1373 }, 1374 }, 1375 }, 1376 "the workload is not created when queue name is not set": { 1377 job: *utiltestingjob.MakeJob("job", "ns"). 1378 Suspend(false). 1379 Obj(), 1380 wantJob: *utiltestingjob.MakeJob("job", "ns"). 1381 Suspend(false). 1382 Obj(), 1383 }, 1384 "should get error if child job owner not found": { 1385 job: *utiltestingjob.MakeJob("job", "ns"). 1386 ParentWorkload("non-existing-parent-workload"). 1387 Obj(), 1388 wantJob: *utiltestingjob.MakeJob("job", "ns").Obj(), 1389 wantErr: jobframework.ErrChildJobOwnerNotFound, 1390 }, 1391 "should get error if workload owner is unknown": { 1392 job: *utiltestingjob.MakeJob("job", "ns"). 1393 ParentWorkload("non-existing-parent-workload"). 1394 OwnerReference("parent", batchv1.SchemeGroupVersion.WithKind("CronJob")). 1395 Obj(), 1396 wantJob: *utiltestingjob.MakeJob("job", "ns").Obj(), 1397 wantErr: jobframework.ErrUnknownWorkloadOwner, 1398 }, 1399 "non-standalone job is suspended if its parent workload is not found": { 1400 reconcilerOptions: []jobframework.Option{ 1401 jobframework.WithManageJobsWithoutQueueName(true), 1402 }, 1403 job: *baseJobWrapper. 1404 Clone(). 1405 Suspend(false). 1406 ParentWorkload("unit-test"). 1407 Obj(), 1408 wantJob: *baseJobWrapper. 1409 Clone(). 1410 ParentWorkload("unit-test"). 1411 Obj(), 1412 wantEvents: []utiltesting.EventRecord{ 1413 { 1414 Key: types.NamespacedName{Name: "job", Namespace: "ns"}, 1415 EventType: "Normal", 1416 Reason: "Suspended", 1417 Message: "Kueue managed child job suspended", 1418 }, 1419 }, 1420 }, 1421 "non-standalone job is not suspended if its parent workload is admitted": { 1422 reconcilerOptions: []jobframework.Option{ 1423 jobframework.WithManageJobsWithoutQueueName(true), 1424 }, 1425 job: *baseJobWrapper. 1426 Clone(). 1427 Suspend(false). 1428 ParentWorkload("unit-test"). 1429 Obj(), 1430 wantJob: *baseJobWrapper. 1431 Clone(). 1432 Suspend(false). 1433 ParentWorkload("unit-test"). 1434 Obj(), 1435 workloads: []kueue.Workload{ 1436 *utiltesting.MakeWorkload("unit-test", "ns"). 1437 PodSets(*utiltesting.MakePodSet(kueue.DefaultPodSetName, 10).SetMinimumCount(5).Request(corev1.ResourceCPU, "1").Obj()). 1438 ReserveQuota(utiltesting.MakeAdmission("cq").AssignmentPodCount(10).Obj()). 1439 Admitted(true). 1440 Obj(), 1441 }, 1442 wantWorkloads: []kueue.Workload{ 1443 *utiltesting.MakeWorkload("unit-test", "ns"). 1444 PodSets(*utiltesting.MakePodSet(kueue.DefaultPodSetName, 10).SetMinimumCount(5).Request(corev1.ResourceCPU, "1").Obj()). 1445 ReserveQuota(utiltesting.MakeAdmission("cq").AssignmentPodCount(10).Obj()). 1446 Admitted(true). 1447 Obj(), 1448 }, 1449 }, 1450 "non-standalone job is suspended if its parent workload is found and not admitted": { 1451 reconcilerOptions: []jobframework.Option{ 1452 jobframework.WithManageJobsWithoutQueueName(true), 1453 }, 1454 job: *baseJobWrapper. 1455 Clone(). 1456 Suspend(false). 1457 ParentWorkload("parent-workload"). 1458 Obj(), 1459 wantJob: *baseJobWrapper. 1460 Clone(). 1461 ParentWorkload("unit-test"). 1462 Obj(), 1463 workloads: []kueue.Workload{ 1464 *utiltesting.MakeWorkload("parent-workload", "ns").Finalizers(kueue.ResourceInUseFinalizerName). 1465 PodSets(*utiltesting.MakePodSet(kueue.DefaultPodSetName, 10).SetMinimumCount(5).Request(corev1.ResourceCPU, "1").Obj()). 1466 Obj(), 1467 }, 1468 wantWorkloads: []kueue.Workload{ 1469 *utiltesting.MakeWorkload("parent-workload", "ns").Finalizers(kueue.ResourceInUseFinalizerName). 1470 PodSets(*utiltesting.MakePodSet(kueue.DefaultPodSetName, 10).SetMinimumCount(5).Request(corev1.ResourceCPU, "1").Obj()). 1471 Obj(), 1472 }, 1473 wantEvents: []utiltesting.EventRecord{ 1474 { 1475 Key: types.NamespacedName{Name: "job", Namespace: "ns"}, 1476 EventType: "Normal", 1477 Reason: "Suspended", 1478 Message: "Kueue managed child job suspended", 1479 }, 1480 }, 1481 }, 1482 "non-standalone job is not suspended if its parent workload is admitted and queue name is set": { 1483 job: *baseJobWrapper. 1484 Clone(). 1485 Suspend(false). 1486 ParentWorkload("parent-workload"). 1487 Queue("test-queue"). 1488 Obj(), 1489 wantJob: *baseJobWrapper. 1490 Clone(). 1491 Suspend(false). 1492 ParentWorkload("parent-workload"). 1493 Queue("test-queue"). 1494 Obj(), 1495 workloads: []kueue.Workload{ 1496 *utiltesting.MakeWorkload("parent-workload", "ns").Finalizers(kueue.ResourceInUseFinalizerName). 1497 PodSets(*utiltesting.MakePodSet(kueue.DefaultPodSetName, 10).SetMinimumCount(5).Request(corev1.ResourceCPU, "1").Obj()). 1498 ReserveQuota(utiltesting.MakeAdmission("cq").AssignmentPodCount(10).Obj()). 1499 Admitted(true). 1500 Obj(), 1501 }, 1502 wantWorkloads: []kueue.Workload{ 1503 *utiltesting.MakeWorkload("parent-workload", "ns").Finalizers(kueue.ResourceInUseFinalizerName). 1504 PodSets(*utiltesting.MakePodSet(kueue.DefaultPodSetName, 10).SetMinimumCount(5).Request(corev1.ResourceCPU, "1").Obj()). 1505 ReserveQuota(utiltesting.MakeAdmission("cq").AssignmentPodCount(10).Obj()). 1506 Admitted(true). 1507 Obj(), 1508 }, 1509 }, 1510 "checking a second non-matching workload is deleted": { 1511 reconcilerOptions: []jobframework.Option{ 1512 jobframework.WithManageJobsWithoutQueueName(true), 1513 }, 1514 job: *baseJobWrapper. 1515 Clone(). 1516 Suspend(false). 1517 Parallelism(5). 1518 Obj(), 1519 wantJob: *baseJobWrapper. 1520 Clone(). 1521 Suspend(false). 1522 Parallelism(5). 1523 Obj(), 1524 workloads: []kueue.Workload{ 1525 *utiltesting.MakeWorkload("first-workload", "ns").Finalizers(kueue.ResourceInUseFinalizerName). 1526 PodSets(*utiltesting.MakePodSet(kueue.DefaultPodSetName, 5).Request(corev1.ResourceCPU, "1").Obj()). 1527 ReserveQuota(utiltesting.MakeAdmission("cq").Obj()). 1528 Admitted(true). 1529 Obj(), 1530 *utiltesting.MakeWorkload("second-workload", "ns").Finalizers(kueue.ResourceInUseFinalizerName). 1531 PodSets(*utiltesting.MakePodSet(kueue.DefaultPodSetName, 10).SetMinimumCount(5).Request(corev1.ResourceCPU, "1").Obj()). 1532 Obj(), 1533 }, 1534 wantWorkloads: []kueue.Workload{ 1535 *utiltesting.MakeWorkload("first-workload", "ns").Finalizers(kueue.ResourceInUseFinalizerName). 1536 PodSets(*utiltesting.MakePodSet(kueue.DefaultPodSetName, 5).Request(corev1.ResourceCPU, "1").Obj()). 1537 ReserveQuota(utiltesting.MakeAdmission("cq").Obj()). 1538 Admitted(true). 1539 Obj(), 1540 }, 1541 wantErr: jobframework.ErrExtraWorkloads, 1542 wantEvents: []utiltesting.EventRecord{ 1543 { 1544 Key: types.NamespacedName{Name: "job", Namespace: "ns"}, 1545 EventType: "Normal", 1546 Reason: "DeletedWorkload", 1547 Message: "Deleted not matching Workload: ns/second-workload", 1548 }, 1549 }, 1550 }, 1551 "when workload is evicted, suspend, reset startTime and restore node affinity": { 1552 job: *baseJobWrapper.Clone(). 1553 Suspend(false). 1554 StartTime(time.Now()). 1555 NodeSelector("provisioning", "spot"). 1556 Active(10). 1557 Obj(), 1558 workloads: []kueue.Workload{ 1559 *utiltesting.MakeWorkload("a", "ns").Finalizers(kueue.ResourceInUseFinalizerName). 1560 Finalizers(kueue.ResourceInUseFinalizerName). 1561 PodSets(*utiltesting.MakePodSet("main", 10).Request(corev1.ResourceCPU, "1").Obj()). 1562 ReserveQuota(utiltesting.MakeAdmission("cq").AssignmentPodCount(10).Obj()). 1563 Admitted(true). 1564 Condition(metav1.Condition{ 1565 Type: kueue.WorkloadEvicted, 1566 Status: metav1.ConditionTrue, 1567 }). 1568 Obj(), 1569 }, 1570 wantJob: *baseJobWrapper.Clone(). 1571 Suspend(true). 1572 Active(10). 1573 Obj(), 1574 wantWorkloads: []kueue.Workload{ 1575 *utiltesting.MakeWorkload("a", "ns").Finalizers(kueue.ResourceInUseFinalizerName). 1576 PodSets(*utiltesting.MakePodSet("main", 10).Request(corev1.ResourceCPU, "1").Obj()). 1577 ReserveQuota(utiltesting.MakeAdmission("cq").AssignmentPodCount(10).Obj()). 1578 Admitted(true). 1579 Condition(metav1.Condition{ 1580 Type: kueue.WorkloadEvicted, 1581 Status: metav1.ConditionTrue, 1582 }). 1583 Obj(), 1584 }, 1585 wantEvents: []utiltesting.EventRecord{ 1586 { 1587 Key: types.NamespacedName{Name: "job", Namespace: "ns"}, 1588 EventType: "Normal", 1589 Reason: "Stopped", 1590 }, 1591 }, 1592 }, 1593 "when workload is evicted but suspended, reset startTime and restore node affinity": { 1594 job: *baseJobWrapper.Clone(). 1595 Suspend(true). 1596 StartTime(time.Now()). 1597 NodeSelector("provisioning", "spot"). 1598 Active(10). 1599 Obj(), 1600 workloads: []kueue.Workload{ 1601 *utiltesting.MakeWorkload("a", "ns").Finalizers(kueue.ResourceInUseFinalizerName). 1602 PodSets(*utiltesting.MakePodSet("main", 10).Request(corev1.ResourceCPU, "1").Obj()). 1603 ReserveQuota(utiltesting.MakeAdmission("cq").AssignmentPodCount(10).Obj()). 1604 Admitted(true). 1605 Condition(metav1.Condition{ 1606 Type: kueue.WorkloadEvicted, 1607 Status: metav1.ConditionTrue, 1608 }). 1609 Obj(), 1610 }, 1611 wantJob: *baseJobWrapper.Clone(). 1612 Suspend(true). 1613 Active(10). 1614 Obj(), 1615 wantWorkloads: []kueue.Workload{ 1616 *utiltesting.MakeWorkload("a", "ns").Finalizers(kueue.ResourceInUseFinalizerName). 1617 PodSets(*utiltesting.MakePodSet("main", 10).Request(corev1.ResourceCPU, "1").Obj()). 1618 ReserveQuota(utiltesting.MakeAdmission("cq").AssignmentPodCount(10).Obj()). 1619 Admitted(true). 1620 Condition(metav1.Condition{ 1621 Type: kueue.WorkloadEvicted, 1622 Status: metav1.ConditionTrue, 1623 }). 1624 Obj(), 1625 }, 1626 }, 1627 "when workload is evicted, suspended and startTime is reset, restore node affinity": { 1628 job: *baseJobWrapper.Clone(). 1629 Suspend(true). 1630 NodeSelector("provisioning", "spot"). 1631 Active(10). 1632 Obj(), 1633 workloads: []kueue.Workload{ 1634 *utiltesting.MakeWorkload("a", "ns").Finalizers(kueue.ResourceInUseFinalizerName). 1635 PodSets(*utiltesting.MakePodSet("main", 10).Request(corev1.ResourceCPU, "1").Obj()). 1636 ReserveQuota(utiltesting.MakeAdmission("cq").AssignmentPodCount(10).Obj()). 1637 Admitted(true). 1638 Condition(metav1.Condition{ 1639 Type: kueue.WorkloadEvicted, 1640 Status: metav1.ConditionTrue, 1641 }). 1642 Obj(), 1643 }, 1644 wantJob: *baseJobWrapper.Clone(). 1645 Suspend(true). 1646 Active(10). 1647 Obj(), 1648 wantWorkloads: []kueue.Workload{ 1649 *utiltesting.MakeWorkload("a", "ns").Finalizers(kueue.ResourceInUseFinalizerName). 1650 PodSets(*utiltesting.MakePodSet("main", 10).Request(corev1.ResourceCPU, "1").Obj()). 1651 ReserveQuota(utiltesting.MakeAdmission("cq").AssignmentPodCount(10).Obj()). 1652 Admitted(true). 1653 Condition(metav1.Condition{ 1654 Type: kueue.WorkloadEvicted, 1655 Status: metav1.ConditionTrue, 1656 }). 1657 Obj(), 1658 }, 1659 }, 1660 "when job completes, workload is marked as finished": { 1661 job: *baseJobWrapper.Clone(). 1662 Condition(batchv1.JobCondition{Type: batchv1.JobComplete, Status: corev1.ConditionTrue}). 1663 Obj(), 1664 workloads: []kueue.Workload{ 1665 *utiltesting.MakeWorkload("a", "ns").Finalizers(kueue.ResourceInUseFinalizerName). 1666 Finalizers(kueue.ResourceInUseFinalizerName). 1667 PodSets(*utiltesting.MakePodSet("main", 10).Request(corev1.ResourceCPU, "1").Obj()). 1668 ReserveQuota(utiltesting.MakeAdmission("cq").AssignmentPodCount(10).Obj()). 1669 Admitted(true). 1670 Obj(), 1671 }, 1672 wantJob: *baseJobWrapper.Clone(). 1673 Condition(batchv1.JobCondition{Type: batchv1.JobComplete, Status: corev1.ConditionTrue}). 1674 Obj(), 1675 wantWorkloads: []kueue.Workload{ 1676 *utiltesting.MakeWorkload("a", "ns").Finalizers(kueue.ResourceInUseFinalizerName). 1677 Finalizers(kueue.ResourceInUseFinalizerName). 1678 PodSets(*utiltesting.MakePodSet("main", 10).Request(corev1.ResourceCPU, "1").Obj()). 1679 ReserveQuota(utiltesting.MakeAdmission("cq").AssignmentPodCount(10).Obj()). 1680 Admitted(true). 1681 Condition(metav1.Condition{ 1682 Type: kueue.WorkloadFinished, 1683 Status: metav1.ConditionTrue, 1684 Reason: "JobFinished", 1685 Message: "Job finished successfully", 1686 }). 1687 Obj(), 1688 }, 1689 wantEvents: []utiltesting.EventRecord{ 1690 { 1691 Key: types.NamespacedName{Name: "job", Namespace: "ns"}, 1692 EventType: "Normal", 1693 Reason: "FinishedWorkload", 1694 Message: "Workload 'ns/a' is declared finished", 1695 }, 1696 }, 1697 }, 1698 "when the workload is finished, its finalizer is removed": { 1699 job: *baseJobWrapper.Clone().Obj(), 1700 workloads: []kueue.Workload{ 1701 *utiltesting.MakeWorkload("a", "ns").Finalizers(kueue.ResourceInUseFinalizerName). 1702 Finalizers(kueue.ResourceInUseFinalizerName). 1703 PodSets(*utiltesting.MakePodSet("main", 10).Request(corev1.ResourceCPU, "1").Obj()). 1704 Condition(metav1.Condition{ 1705 Type: kueue.WorkloadFinished, 1706 Status: metav1.ConditionTrue, 1707 }). 1708 Obj(), 1709 }, 1710 wantJob: *baseJobWrapper.Clone().Obj(), 1711 wantWorkloads: []kueue.Workload{ 1712 *utiltesting.MakeWorkload("a", "ns"). 1713 PodSets(*utiltesting.MakePodSet("main", 10).Request(corev1.ResourceCPU, "1").Obj()). 1714 Condition(metav1.Condition{ 1715 Type: kueue.WorkloadFinished, 1716 Status: metav1.ConditionTrue, 1717 }). 1718 Obj(), 1719 }, 1720 wantEvents: []utiltesting.EventRecord{ 1721 { 1722 Key: types.NamespacedName{Name: "job", Namespace: "ns"}, 1723 EventType: "Normal", 1724 Reason: "FinishedWorkload", 1725 Message: "Workload 'ns/a' is declared finished", 1726 }, 1727 }, 1728 }, 1729 "the workload is created when queue name is set, with workloadPriorityClass": { 1730 job: *baseJobWrapper. 1731 Clone(). 1732 Suspend(false). 1733 Queue("test-queue"). 1734 UID("test-uid"). 1735 WorkloadPriorityClass("test-wpc"). 1736 Obj(), 1737 priorityClasses: []client.Object{ 1738 baseWPCWrapper.Obj(), 1739 }, 1740 wantJob: *baseJobWrapper. 1741 Clone(). 1742 Queue("test-queue"). 1743 UID("test-uid"). 1744 WorkloadPriorityClass("test-wpc"). 1745 Obj(), 1746 wantWorkloads: []kueue.Workload{ 1747 *utiltesting.MakeWorkload("job", "ns").Finalizers(kueue.ResourceInUseFinalizerName). 1748 PodSets(*utiltesting.MakePodSet(kueue.DefaultPodSetName, 10).Request(corev1.ResourceCPU, "1").Obj()). 1749 Queue("test-queue"). 1750 PriorityClass("test-wpc"). 1751 Priority(100). 1752 PriorityClassSource(constants.WorkloadPriorityClassSource). 1753 Labels(map[string]string{ 1754 controllerconsts.JobUIDLabel: "test-uid", 1755 }). 1756 Obj(), 1757 }, 1758 wantEvents: []utiltesting.EventRecord{ 1759 { 1760 Key: types.NamespacedName{Name: "job", Namespace: "ns"}, 1761 EventType: "Normal", 1762 Reason: "Stopped", 1763 Message: "Missing Workload; unable to restore pod templates", 1764 }, 1765 { 1766 Key: types.NamespacedName{Name: "job", Namespace: "ns"}, 1767 EventType: "Normal", 1768 Reason: "CreatedWorkload", 1769 Message: "Created Workload: ns/job-job-ed7d5", 1770 }, 1771 }, 1772 }, 1773 "the workload is created when queue name is set, with PriorityClass": { 1774 job: *baseJobWrapper. 1775 Clone(). 1776 Suspend(false). 1777 Queue("test-queue"). 1778 UID("test-uid"). 1779 PriorityClass("test-pc"). 1780 Obj(), 1781 priorityClasses: []client.Object{ 1782 basePCWrapper.Obj(), 1783 }, 1784 wantJob: *baseJobWrapper. 1785 Clone(). 1786 Queue("test-queue"). 1787 UID("test-uid"). 1788 PriorityClass("test-pc"). 1789 Obj(), 1790 wantWorkloads: []kueue.Workload{ 1791 *utiltesting.MakeWorkload("job", "ns").Finalizers(kueue.ResourceInUseFinalizerName). 1792 PodSets(*utiltesting.MakePodSet(kueue.DefaultPodSetName, 10).Request(corev1.ResourceCPU, "1").PriorityClass("test-pc").Obj()). 1793 Queue("test-queue"). 1794 PriorityClass("test-pc"). 1795 Priority(200). 1796 PriorityClassSource(constants.PodPriorityClassSource). 1797 Labels(map[string]string{ 1798 controllerconsts.JobUIDLabel: "test-uid", 1799 }). 1800 Obj(), 1801 }, 1802 wantEvents: []utiltesting.EventRecord{ 1803 { 1804 Key: types.NamespacedName{Name: "job", Namespace: "ns"}, 1805 EventType: "Normal", 1806 Reason: "Stopped", 1807 Message: "Missing Workload; unable to restore pod templates", 1808 }, 1809 { 1810 Key: types.NamespacedName{Name: "job", Namespace: "ns"}, 1811 EventType: "Normal", 1812 Reason: "CreatedWorkload", 1813 Message: "Created Workload: ns/job-job-ed7d5", 1814 }, 1815 }, 1816 }, 1817 "the workload is created when queue name is set, with workloadPriorityClass and PriorityClass": { 1818 job: *baseJobWrapper. 1819 Clone(). 1820 Suspend(false). 1821 Queue("test-queue"). 1822 UID("test-uid"). 1823 WorkloadPriorityClass("test-wpc"). 1824 PriorityClass("test-pc"). 1825 Obj(), 1826 priorityClasses: []client.Object{ 1827 basePCWrapper.Obj(), baseWPCWrapper.Obj(), 1828 }, 1829 wantJob: *baseJobWrapper. 1830 Clone(). 1831 Queue("test-queue"). 1832 UID("test-uid"). 1833 WorkloadPriorityClass("test-wpc"). 1834 PriorityClass("test-pc"). 1835 Obj(), 1836 wantWorkloads: []kueue.Workload{ 1837 *utiltesting.MakeWorkload("job", "ns").Finalizers(kueue.ResourceInUseFinalizerName). 1838 PodSets(*utiltesting.MakePodSet(kueue.DefaultPodSetName, 10).Request(corev1.ResourceCPU, "1").PriorityClass("test-pc").Obj()). 1839 Queue("test-queue"). 1840 PriorityClass("test-wpc"). 1841 Priority(100). 1842 PriorityClassSource(constants.WorkloadPriorityClassSource). 1843 Labels(map[string]string{ 1844 controllerconsts.JobUIDLabel: "test-uid", 1845 }). 1846 Obj(), 1847 }, 1848 wantEvents: []utiltesting.EventRecord{ 1849 { 1850 Key: types.NamespacedName{Name: "job", Namespace: "ns"}, 1851 EventType: "Normal", 1852 Reason: "Stopped", 1853 Message: "Missing Workload; unable to restore pod templates", 1854 }, 1855 { 1856 Key: types.NamespacedName{Name: "job", Namespace: "ns"}, 1857 EventType: "Normal", 1858 Reason: "CreatedWorkload", 1859 Message: "Created Workload: ns/job-job-ed7d5", 1860 }, 1861 }, 1862 }, 1863 "the workload shouldn't be recreated for the completed job": { 1864 job: *baseJobWrapper.Clone(). 1865 Condition(batchv1.JobCondition{Type: batchv1.JobComplete, Status: corev1.ConditionTrue}). 1866 Obj(), 1867 workloads: []kueue.Workload{}, 1868 wantJob: *baseJobWrapper.Clone(). 1869 Condition(batchv1.JobCondition{Type: batchv1.JobComplete, Status: corev1.ConditionTrue}). 1870 Obj(), 1871 wantWorkloads: []kueue.Workload{}, 1872 }, 1873 "when the prebuilt workload is missing, no new one is created and the job is suspended": { 1874 job: *baseJobWrapper. 1875 Clone(). 1876 Suspend(false). 1877 Label(controllerconsts.PrebuiltWorkloadLabel, "missing-workload"). 1878 UID("test-uid"). 1879 Obj(), 1880 wantJob: *baseJobWrapper. 1881 Clone(). 1882 Label(controllerconsts.PrebuiltWorkloadLabel, "missing-workload"). 1883 UID("test-uid"). 1884 Obj(), 1885 wantEvents: []utiltesting.EventRecord{ 1886 { 1887 Key: types.NamespacedName{Name: "job", Namespace: "ns"}, 1888 EventType: "Normal", 1889 Reason: "Stopped", 1890 Message: "missing workload", 1891 }, 1892 }, 1893 }, 1894 "when the prebuilt workload exists its owner info is updated": { 1895 job: *baseJobWrapper. 1896 Clone(). 1897 Suspend(false). 1898 Label(controllerconsts.PrebuiltWorkloadLabel, "prebuilt-workload"). 1899 UID("test-uid"). 1900 Obj(), 1901 wantJob: *baseJobWrapper. 1902 Clone(). 1903 Label(controllerconsts.PrebuiltWorkloadLabel, "prebuilt-workload"). 1904 UID("test-uid"). 1905 Obj(), 1906 workloads: []kueue.Workload{ 1907 *utiltesting.MakeWorkload("prebuilt-workload", "ns").Finalizers(kueue.ResourceInUseFinalizerName). 1908 PodSets(*utiltesting.MakePodSet(kueue.DefaultPodSetName, 10).Request(corev1.ResourceCPU, "1").PriorityClass("test-pc").Obj()). 1909 Queue("test-queue"). 1910 PriorityClass("test-wpc"). 1911 Priority(100). 1912 PriorityClassSource(constants.WorkloadPriorityClassSource). 1913 Obj(), 1914 }, 1915 wantWorkloads: []kueue.Workload{ 1916 *utiltesting.MakeWorkload("prebuilt-workload", "ns").Finalizers(kueue.ResourceInUseFinalizerName). 1917 PodSets(*utiltesting.MakePodSet(kueue.DefaultPodSetName, 10).Request(corev1.ResourceCPU, "1").PriorityClass("test-pc").Obj()). 1918 Queue("test-queue"). 1919 PriorityClass("test-wpc"). 1920 Priority(100). 1921 PriorityClassSource(constants.WorkloadPriorityClassSource). 1922 Labels(map[string]string{ 1923 controllerconsts.JobUIDLabel: "test-uid", 1924 }). 1925 ControllerReference(batchv1.SchemeGroupVersion.WithKind("Job"), "job", "test-uid"). 1926 Obj(), 1927 }, 1928 wantEvents: []utiltesting.EventRecord{ 1929 { 1930 Key: types.NamespacedName{Name: "job", Namespace: "ns"}, 1931 EventType: "Normal", 1932 Reason: "Stopped", 1933 Message: "Not admitted by cluster queue", 1934 }, 1935 }, 1936 }, 1937 "when the prebuilt workload is owned by another object": { 1938 job: *baseJobWrapper. 1939 Clone(). 1940 Suspend(false). 1941 Label(controllerconsts.PrebuiltWorkloadLabel, "prebuilt-workload"). 1942 UID("test-uid"). 1943 Obj(), 1944 wantJob: *baseJobWrapper. 1945 Clone(). 1946 Label(controllerconsts.PrebuiltWorkloadLabel, "prebuilt-workload"). 1947 UID("test-uid"). 1948 Obj(), 1949 workloads: []kueue.Workload{ 1950 *utiltesting.MakeWorkload("prebuilt-workload", "ns").Finalizers(kueue.ResourceInUseFinalizerName). 1951 PodSets(*utiltesting.MakePodSet(kueue.DefaultPodSetName, 10).Request(corev1.ResourceCPU, "1").PriorityClass("test-pc").Obj()). 1952 Queue("test-queue"). 1953 PriorityClass("test-wpc"). 1954 Priority(100). 1955 PriorityClassSource(constants.WorkloadPriorityClassSource). 1956 ControllerReference(batchv1.SchemeGroupVersion.WithKind("Job"), "other-job", "other-uid"). 1957 Obj(), 1958 }, 1959 wantWorkloads: []kueue.Workload{ 1960 *utiltesting.MakeWorkload("prebuilt-workload", "ns").Finalizers(kueue.ResourceInUseFinalizerName). 1961 PodSets(*utiltesting.MakePodSet(kueue.DefaultPodSetName, 10).Request(corev1.ResourceCPU, "1").PriorityClass("test-pc").Obj()). 1962 Queue("test-queue"). 1963 PriorityClass("test-wpc"). 1964 Priority(100). 1965 PriorityClassSource(constants.WorkloadPriorityClassSource). 1966 ControllerReference(batchv1.SchemeGroupVersion.WithKind("Job"), "other-job", "other-uid"). 1967 Obj(), 1968 }, 1969 wantEvents: []utiltesting.EventRecord{ 1970 { 1971 Key: types.NamespacedName{Name: "job", Namespace: "ns"}, 1972 EventType: "Normal", 1973 Reason: "Stopped", 1974 Message: "missing workload", 1975 }, 1976 }, 1977 }, 1978 "when the prebuilt workload is not equivalent to the job": { 1979 job: *baseJobWrapper. 1980 Clone(). 1981 Suspend(false). 1982 Label(controllerconsts.PrebuiltWorkloadLabel, "prebuilt-workload"). 1983 UID("test-uid"). 1984 Obj(), 1985 wantJob: *baseJobWrapper. 1986 Clone(). 1987 Label(controllerconsts.PrebuiltWorkloadLabel, "prebuilt-workload"). 1988 UID("test-uid"). 1989 Obj(), 1990 workloads: []kueue.Workload{ 1991 *utiltesting.MakeWorkload("prebuilt-workload", "ns").Finalizers(kueue.ResourceInUseFinalizerName). 1992 PodSets(*utiltesting.MakePodSet(kueue.DefaultPodSetName, 1).Request(corev1.ResourceCPU, "1").PriorityClass("test-pc").Obj()). 1993 Queue("test-queue"). 1994 PriorityClass("test-wpc"). 1995 Priority(100). 1996 PriorityClassSource(constants.WorkloadPriorityClassSource). 1997 Obj(), 1998 }, 1999 wantWorkloads: []kueue.Workload{ 2000 *utiltesting.MakeWorkload("prebuilt-workload", "ns").Finalizers(kueue.ResourceInUseFinalizerName). 2001 PodSets(*utiltesting.MakePodSet(kueue.DefaultPodSetName, 1).Request(corev1.ResourceCPU, "1").PriorityClass("test-pc").Obj()). 2002 Queue("test-queue"). 2003 PriorityClass("test-wpc"). 2004 Priority(100). 2005 PriorityClassSource(constants.WorkloadPriorityClassSource). 2006 Labels(map[string]string{ 2007 controllerconsts.JobUIDLabel: "test-uid", 2008 }). 2009 ControllerReference(batchv1.SchemeGroupVersion.WithKind("Job"), "job", "test-uid"). 2010 Condition(metav1.Condition{ 2011 Type: kueue.WorkloadFinished, 2012 Status: metav1.ConditionTrue, 2013 Reason: "OutOfSync", 2014 Message: "The prebuilt workload is out of sync with its user job", 2015 }). 2016 Obj(), 2017 }, 2018 wantEvents: []utiltesting.EventRecord{ 2019 { 2020 Key: types.NamespacedName{Name: "job", Namespace: "ns"}, 2021 EventType: "Normal", 2022 Reason: "Stopped", 2023 Message: "missing workload", 2024 }, 2025 }, 2026 }, 2027 "the workload is not admitted and tolerations change": { 2028 job: *baseJobWrapper.Clone().Toleration(corev1.Toleration{ 2029 Key: "tolerationkey2", 2030 Operator: corev1.TolerationOpExists, 2031 Effect: corev1.TaintEffectNoSchedule, 2032 }). 2033 Obj(), 2034 wantJob: *baseJobWrapper.Clone().Toleration(corev1.Toleration{ 2035 Key: "tolerationkey2", 2036 Operator: corev1.TolerationOpExists, 2037 Effect: corev1.TaintEffectNoSchedule, 2038 }).Obj(), 2039 workloads: []kueue.Workload{ 2040 *utiltesting.MakeWorkload(GetWorkloadNameForJob(baseJobWrapper.Name), "ns"). 2041 Finalizers(kueue.ResourceInUseFinalizerName). 2042 Queue("foo"). 2043 PodSets( 2044 *utiltesting.MakePodSet("main", 10). 2045 Toleration(corev1.Toleration{ 2046 Key: "tolerationkey1", 2047 Operator: corev1.TolerationOpExists, 2048 Effect: corev1.TaintEffectNoSchedule, 2049 }). 2050 Request(corev1.ResourceCPU, "1"). 2051 Obj(), 2052 ). 2053 Labels(map[string]string{ 2054 controllerconsts.JobUIDLabel: "", 2055 }). 2056 Priority(0). 2057 Obj(), 2058 }, 2059 wantWorkloads: []kueue.Workload{ 2060 *utiltesting.MakeWorkload(GetWorkloadNameForJob(baseJobWrapper.Name), "ns"). 2061 Finalizers(kueue.ResourceInUseFinalizerName). 2062 Queue("foo"). 2063 PodSets( 2064 *utiltesting.MakePodSet("main", 10). 2065 Toleration(corev1.Toleration{ 2066 Key: "tolerationkey2", 2067 Operator: corev1.TolerationOpExists, 2068 Effect: corev1.TaintEffectNoSchedule, 2069 }). 2070 Request(corev1.ResourceCPU, "1"). 2071 Obj(), 2072 ). 2073 Labels(map[string]string{ 2074 controllerconsts.JobUIDLabel: "", 2075 }). 2076 Priority(0). 2077 Obj(), 2078 }, 2079 wantEvents: []utiltesting.EventRecord{ 2080 { 2081 Key: types.NamespacedName{Name: "job", Namespace: "ns"}, 2082 EventType: "Normal", 2083 Reason: "UpdatedWorkload", 2084 Message: "Updated not matching Workload for suspended job: ns/job-job-ed7d5", 2085 }, 2086 }, 2087 }, 2088 "the workload is admitted and tolerations change": { 2089 job: *baseJobWrapper.Clone().Toleration(corev1.Toleration{ 2090 Key: "tolerationkey2", 2091 Operator: corev1.TolerationOpExists, 2092 Effect: corev1.TaintEffectNoSchedule, 2093 }). 2094 Suspend(false). 2095 Obj(), 2096 wantJob: *baseJobWrapper.Clone().Toleration(corev1.Toleration{ 2097 Key: "tolerationkey1", 2098 Operator: corev1.TolerationOpExists, 2099 Effect: corev1.TaintEffectNoSchedule, 2100 }).Obj(), 2101 workloads: []kueue.Workload{ 2102 *utiltesting.MakeWorkload(GetWorkloadNameForJob(baseJobWrapper.Name), "ns"). 2103 Finalizers(kueue.ResourceInUseFinalizerName). 2104 Queue("foo"). 2105 PodSets( 2106 *utiltesting.MakePodSet("main", 10). 2107 Toleration(corev1.Toleration{ 2108 Key: "tolerationkey1", 2109 Operator: corev1.TolerationOpExists, 2110 Effect: corev1.TaintEffectNoSchedule, 2111 }). 2112 Request(corev1.ResourceCPU, "1"). 2113 Obj(), 2114 ). 2115 Labels(map[string]string{ 2116 controllerconsts.JobUIDLabel: "", 2117 }). 2118 Priority(0). 2119 ReserveQuota(utiltesting.MakeAdmission("cq").PodSets(kueue.PodSetAssignment{ 2120 Name: "main", 2121 Flavors: map[corev1.ResourceName]kueue.ResourceFlavorReference{ 2122 corev1.ResourceCPU: "default", 2123 }, 2124 Count: ptr.To[int32](10), 2125 }).Obj()). 2126 Obj(), 2127 }, 2128 wantEvents: []utiltesting.EventRecord{ 2129 { 2130 Key: types.NamespacedName{Name: "job", Namespace: "ns"}, 2131 EventType: "Normal", 2132 Reason: "Stopped", 2133 Message: "No matching Workload; restoring pod templates according to existent Workload", 2134 }, 2135 { 2136 Key: types.NamespacedName{Name: "job", Namespace: "ns"}, 2137 EventType: "Normal", 2138 Reason: "DeletedWorkload", 2139 Message: "Deleted not matching Workload: ns/job-job-ed7d5", 2140 }, 2141 }, 2142 wantErr: jobframework.ErrNoMatchingWorkloads, 2143 }, 2144 "the workload is admitted, job still suspended and tolerations change": { 2145 job: *baseJobWrapper.Clone().Toleration(corev1.Toleration{ 2146 Key: "tolerationkey2", 2147 Operator: corev1.TolerationOpExists, 2148 Effect: corev1.TaintEffectNoSchedule, 2149 }). 2150 Suspend(true). 2151 Obj(), 2152 wantJob: *baseJobWrapper.Clone().Toleration(corev1.Toleration{ 2153 Key: "tolerationkey2", 2154 Operator: corev1.TolerationOpExists, 2155 Effect: corev1.TaintEffectNoSchedule, 2156 }).Obj(), 2157 workloads: []kueue.Workload{ 2158 *utiltesting.MakeWorkload(GetWorkloadNameForJob(baseJobWrapper.Name), "ns"). 2159 Finalizers(kueue.ResourceInUseFinalizerName). 2160 Queue("foo"). 2161 PodSets( 2162 *utiltesting.MakePodSet("main", 10). 2163 Toleration(corev1.Toleration{ 2164 Key: "tolerationkey1", 2165 Operator: corev1.TolerationOpExists, 2166 Effect: corev1.TaintEffectNoSchedule, 2167 }). 2168 Request(corev1.ResourceCPU, "1"). 2169 Obj(), 2170 ). 2171 Labels(map[string]string{ 2172 controllerconsts.JobUIDLabel: "", 2173 }). 2174 Priority(0). 2175 ReserveQuota(utiltesting.MakeAdmission("cq").PodSets(kueue.PodSetAssignment{ 2176 Name: "main", 2177 Flavors: map[corev1.ResourceName]kueue.ResourceFlavorReference{ 2178 corev1.ResourceCPU: "default", 2179 }, 2180 Count: ptr.To[int32](10), 2181 }).Obj()). 2182 Obj(), 2183 }, 2184 wantEvents: []utiltesting.EventRecord{ 2185 { 2186 Key: types.NamespacedName{Name: "job", Namespace: "ns"}, 2187 EventType: "Normal", 2188 Reason: "DeletedWorkload", 2189 Message: "Deleted not matching Workload: ns/job-job-ed7d5", 2190 }, 2191 }, 2192 wantErr: jobframework.ErrNoMatchingWorkloads, 2193 }, 2194 } 2195 for name, tc := range cases { 2196 t.Run(name, func(t *testing.T) { 2197 ctx, _ := utiltesting.ContextWithLog(t) 2198 clientBuilder := utiltesting.NewClientBuilder() 2199 if err := SetupIndexes(ctx, utiltesting.AsIndexer(clientBuilder)); err != nil { 2200 t.Fatalf("Could not setup indexes: %v", err) 2201 } 2202 objs := append(tc.priorityClasses, &tc.job, utiltesting.MakeResourceFlavor("default").Obj()) 2203 kcBuilder := clientBuilder. 2204 WithObjects(objs...) 2205 2206 for i := range tc.workloads { 2207 kcBuilder = kcBuilder.WithStatusSubresource(&tc.workloads[i]) 2208 } 2209 2210 // For prebuilt workloads we are skipping the ownership setup in the test body and 2211 // expect the reconciler to do it. 2212 _, useesPrebuiltWorkload := tc.job.Labels[controllerconsts.PrebuiltWorkloadLabel] 2213 2214 kClient := kcBuilder.Build() 2215 for i := range tc.workloads { 2216 if !useesPrebuiltWorkload { 2217 if err := ctrl.SetControllerReference(&tc.job, &tc.workloads[i], kClient.Scheme()); err != nil { 2218 t.Fatalf("Could not setup owner reference in Workloads: %v", err) 2219 } 2220 } 2221 if err := kClient.Create(ctx, &tc.workloads[i]); err != nil { 2222 t.Fatalf("Could not create workload: %v", err) 2223 } 2224 } 2225 recorder := &utiltesting.EventRecorder{} 2226 reconciler := NewReconciler(kClient, recorder, tc.reconcilerOptions...) 2227 2228 jobKey := client.ObjectKeyFromObject(&tc.job) 2229 _, err := reconciler.Reconcile(ctx, reconcile.Request{ 2230 NamespacedName: jobKey, 2231 }) 2232 if diff := cmp.Diff(tc.wantErr, err, cmpopts.EquateErrors()); diff != "" { 2233 t.Errorf("Reconcile returned error (-want,+got):\n%s", diff) 2234 } 2235 2236 var gotJob batchv1.Job 2237 if err := kClient.Get(ctx, jobKey, &gotJob); err != nil { 2238 t.Fatalf("Could not get Job after reconcile: %v", err) 2239 } 2240 if diff := cmp.Diff(tc.wantJob, gotJob, jobCmpOpts...); diff != "" { 2241 t.Errorf("Job after reconcile (-want,+got):\n%s", diff) 2242 } 2243 var gotWorkloads kueue.WorkloadList 2244 if err := kClient.List(ctx, &gotWorkloads); err != nil { 2245 t.Fatalf("Could not get Workloads after reconcile: %v", err) 2246 } 2247 2248 wlCheckOpts := workloadCmpOpts 2249 if useesPrebuiltWorkload { 2250 wlCheckOpts = workloadCmpOptsWithOwner 2251 } 2252 2253 if diff := cmp.Diff(tc.wantWorkloads, gotWorkloads.Items, wlCheckOpts...); diff != "" { 2254 t.Errorf("Workloads after reconcile (-want,+got):\n%s", diff) 2255 } 2256 2257 if diff := cmp.Diff(tc.wantEvents, recorder.RecordedEvents); diff != "" { 2258 t.Errorf("unexpected events (-want/+got):\n%s", diff) 2259 } 2260 }) 2261 } 2262 }