sigs.k8s.io/kueue@v0.6.2/pkg/controller/core/workload_controller_test.go (about) 1 /* 2 Copyright 2023 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package core 18 19 import ( 20 "context" 21 "testing" 22 "time" 23 24 "github.com/google/go-cmp/cmp" 25 "github.com/google/go-cmp/cmp/cmpopts" 26 batchv1 "k8s.io/api/batch/v1" 27 v1 "k8s.io/api/core/v1" 28 "k8s.io/apimachinery/pkg/api/errors" 29 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 30 "k8s.io/apimachinery/pkg/types" 31 testingclock "k8s.io/utils/clock/testing" 32 "k8s.io/utils/ptr" 33 "sigs.k8s.io/controller-runtime/pkg/client" 34 "sigs.k8s.io/controller-runtime/pkg/reconcile" 35 36 kueue "sigs.k8s.io/kueue/apis/kueue/v1beta1" 37 "sigs.k8s.io/kueue/pkg/cache" 38 "sigs.k8s.io/kueue/pkg/queue" 39 utiltesting "sigs.k8s.io/kueue/pkg/util/testing" 40 ) 41 42 func TestAdmittedNotReadyWorkload(t *testing.T) { 43 now := time.Now() 44 minuteAgo := now.Add(-time.Minute) 45 fakeClock := testingclock.NewFakeClock(now) 46 47 testCases := map[string]struct { 48 workload kueue.Workload 49 podsReadyTimeout *time.Duration 50 wantCountingTowardsTimeout bool 51 wantRecheckAfter time.Duration 52 }{ 53 "workload without Admitted condition; not counting": { 54 workload: kueue.Workload{}, 55 }, 56 "workload with Admitted=True, no PodsReady; counting": { 57 workload: kueue.Workload{ 58 Status: kueue.WorkloadStatus{ 59 Admission: &kueue.Admission{}, 60 Conditions: []metav1.Condition{ 61 { 62 Type: kueue.WorkloadAdmitted, 63 Status: metav1.ConditionTrue, 64 LastTransitionTime: metav1.NewTime(minuteAgo), 65 }, 66 }, 67 }, 68 }, 69 podsReadyTimeout: ptr.To(5 * time.Minute), 70 wantCountingTowardsTimeout: true, 71 wantRecheckAfter: 4 * time.Minute, 72 }, 73 "workload with Admitted=True, no PodsReady, but no timeout configured; not counting": { 74 workload: kueue.Workload{ 75 Status: kueue.WorkloadStatus{ 76 Admission: &kueue.Admission{}, 77 Conditions: []metav1.Condition{ 78 { 79 Type: kueue.WorkloadAdmitted, 80 Status: metav1.ConditionTrue, 81 LastTransitionTime: metav1.NewTime(minuteAgo), 82 }, 83 }, 84 }, 85 }, 86 }, 87 "workload with Admitted=True, no PodsReady; timeout exceeded": { 88 workload: kueue.Workload{ 89 Status: kueue.WorkloadStatus{ 90 Admission: &kueue.Admission{}, 91 Conditions: []metav1.Condition{ 92 { 93 Type: kueue.WorkloadAdmitted, 94 Status: metav1.ConditionTrue, 95 LastTransitionTime: metav1.NewTime(now.Add(-7 * time.Minute)), 96 }, 97 }, 98 }, 99 }, 100 podsReadyTimeout: ptr.To(5 * time.Minute), 101 wantCountingTowardsTimeout: true, 102 }, 103 "workload with Admitted=True, PodsReady=False; counting since PodsReady.LastTransitionTime": { 104 workload: kueue.Workload{ 105 Status: kueue.WorkloadStatus{ 106 Admission: &kueue.Admission{}, 107 Conditions: []metav1.Condition{ 108 { 109 Type: kueue.WorkloadAdmitted, 110 Status: metav1.ConditionTrue, 111 LastTransitionTime: metav1.NewTime(minuteAgo), 112 }, 113 { 114 Type: kueue.WorkloadPodsReady, 115 Status: metav1.ConditionFalse, 116 LastTransitionTime: metav1.NewTime(now), 117 }, 118 }, 119 }, 120 }, 121 podsReadyTimeout: ptr.To(5 * time.Minute), 122 wantCountingTowardsTimeout: true, 123 wantRecheckAfter: 5 * time.Minute, 124 }, 125 "workload with Admitted=Unknown; not counting": { 126 workload: kueue.Workload{ 127 Status: kueue.WorkloadStatus{ 128 Admission: &kueue.Admission{}, 129 Conditions: []metav1.Condition{ 130 { 131 Type: kueue.WorkloadAdmitted, 132 Status: metav1.ConditionUnknown, 133 LastTransitionTime: metav1.NewTime(minuteAgo), 134 }, 135 }, 136 }, 137 }, 138 podsReadyTimeout: ptr.To(5 * time.Minute), 139 }, 140 "workload with Admitted=False, not counting": { 141 workload: kueue.Workload{ 142 Status: kueue.WorkloadStatus{ 143 Admission: &kueue.Admission{}, 144 Conditions: []metav1.Condition{ 145 { 146 Type: kueue.WorkloadAdmitted, 147 Status: metav1.ConditionUnknown, 148 LastTransitionTime: metav1.NewTime(minuteAgo), 149 }, 150 }, 151 }, 152 }, 153 podsReadyTimeout: ptr.To(5 * time.Minute), 154 }, 155 "workload with Admitted=True, PodsReady=True; not counting": { 156 workload: kueue.Workload{ 157 Status: kueue.WorkloadStatus{ 158 Admission: &kueue.Admission{}, 159 Conditions: []metav1.Condition{ 160 { 161 Type: kueue.WorkloadAdmitted, 162 Status: metav1.ConditionTrue, 163 LastTransitionTime: metav1.NewTime(minuteAgo), 164 }, 165 { 166 Type: kueue.WorkloadPodsReady, 167 Status: metav1.ConditionTrue, 168 LastTransitionTime: metav1.NewTime(now), 169 }, 170 }, 171 }, 172 }, 173 podsReadyTimeout: ptr.To(5 * time.Minute), 174 }, 175 } 176 177 for name, tc := range testCases { 178 t.Run(name, func(t *testing.T) { 179 wRec := WorkloadReconciler{podsReadyTimeout: tc.podsReadyTimeout} 180 countingTowardsTimeout, recheckAfter := wRec.admittedNotReadyWorkload(&tc.workload, fakeClock) 181 182 if tc.wantCountingTowardsTimeout != countingTowardsTimeout { 183 t.Errorf("Unexpected countingTowardsTimeout, want=%v, got=%v", tc.wantCountingTowardsTimeout, countingTowardsTimeout) 184 } 185 if tc.wantRecheckAfter != recheckAfter { 186 t.Errorf("Unexpected recheckAfter, want=%v, got=%v", tc.wantRecheckAfter, recheckAfter) 187 } 188 }) 189 } 190 } 191 192 func TestSyncCheckStates(t *testing.T) { 193 now := metav1.NewTime(time.Now()) 194 cases := map[string]struct { 195 states []kueue.AdmissionCheckState 196 list []string 197 wantStates []kueue.AdmissionCheckState 198 wantChange bool 199 ignoreTransitionTime bool 200 }{ 201 "nil conditions, nil list": {}, 202 "add to nil conditions": { 203 list: []string{"ac1", "ac2"}, 204 wantChange: true, 205 wantStates: []kueue.AdmissionCheckState{ 206 { 207 Name: "ac1", 208 State: kueue.CheckStatePending, 209 }, 210 { 211 Name: "ac2", 212 State: kueue.CheckStatePending, 213 }, 214 }, 215 ignoreTransitionTime: true, 216 }, 217 "add and remove": { 218 states: []kueue.AdmissionCheckState{ 219 { 220 Name: "ac0", 221 State: kueue.CheckStatePending, 222 }, 223 { 224 Name: "ac1", 225 State: kueue.CheckStatePending, 226 }, 227 }, 228 list: []string{"ac1", "ac2"}, 229 wantChange: true, 230 wantStates: []kueue.AdmissionCheckState{ 231 { 232 Name: "ac1", 233 State: kueue.CheckStatePending, 234 }, 235 { 236 Name: "ac2", 237 State: kueue.CheckStatePending, 238 }, 239 }, 240 ignoreTransitionTime: true, 241 }, 242 "cleanup": { 243 states: []kueue.AdmissionCheckState{ 244 { 245 Name: "ac0", 246 State: kueue.CheckStatePending, 247 }, 248 { 249 Name: "ac1", 250 State: kueue.CheckStatePending, 251 }, 252 }, 253 wantChange: true, 254 }, 255 "preserve conditions data": { 256 states: []kueue.AdmissionCheckState{ 257 { 258 Name: "ac0", 259 State: kueue.CheckStateReady, 260 Message: "Message one", 261 LastTransitionTime: *now.DeepCopy(), 262 }, 263 { 264 Name: "ac1", 265 State: kueue.CheckStatePending, 266 }, 267 }, 268 list: []string{"ac0", "ac1"}, 269 wantChange: false, 270 wantStates: []kueue.AdmissionCheckState{ 271 { 272 Name: "ac0", 273 State: kueue.CheckStateReady, 274 Message: "Message one", 275 LastTransitionTime: *now.DeepCopy(), 276 }, 277 { 278 Name: "ac1", 279 State: kueue.CheckStatePending, 280 }, 281 }, 282 }, 283 } 284 285 for name, tc := range cases { 286 t.Run(name, func(t *testing.T) { 287 gotStates, gotShouldChange := syncAdmissionCheckConditions(tc.states, tc.list) 288 289 if tc.wantChange != gotShouldChange { 290 t.Errorf("Unexpected should change, want=%v", tc.wantChange) 291 } 292 293 opts := []cmp.Option{} 294 if tc.ignoreTransitionTime { 295 opts = append(opts, cmpopts.IgnoreFields(kueue.AdmissionCheckState{}, "LastTransitionTime")) 296 } 297 if diff := cmp.Diff(tc.wantStates, gotStates, opts...); diff != "" { 298 t.Errorf("Unexpected conditions, (want-/got+): %s", diff) 299 } 300 }) 301 } 302 } 303 304 var ( 305 workloadCmpOpts = []cmp.Option{ 306 cmpopts.EquateEmpty(), 307 cmpopts.IgnoreFields( 308 kueue.Workload{}, "TypeMeta", "ObjectMeta.ResourceVersion", "Status.RequeueState.RequeueAt", 309 ), 310 cmpopts.IgnoreFields(metav1.Condition{}, "LastTransitionTime"), 311 cmpopts.SortSlices(func(a, b metav1.Condition) bool { return a.Type < b.Type }), 312 } 313 ) 314 315 func TestReconcile(t *testing.T) { 316 testStartTime := time.Now() 317 cases := map[string]struct { 318 workload *kueue.Workload 319 wantWorkload *kueue.Workload 320 wantError error 321 wantEvents []utiltesting.EventRecord 322 reconcilerOpts []Option 323 }{ 324 "admit": { 325 workload: utiltesting.MakeWorkload("wl", "ns"). 326 ReserveQuota(utiltesting.MakeAdmission("q1").Obj()). 327 AdmissionCheck(kueue.AdmissionCheckState{ 328 Name: "check", 329 State: kueue.CheckStateReady, 330 }). 331 Obj(), 332 wantWorkload: utiltesting.MakeWorkload("wl", "ns"). 333 ReserveQuota(utiltesting.MakeAdmission("q1").Obj()). 334 AdmissionCheck(kueue.AdmissionCheckState{ 335 Name: "check", 336 State: kueue.CheckStateReady, 337 }). 338 Condition(metav1.Condition{ 339 Type: "Admitted", 340 Status: "True", 341 Reason: "Admitted", 342 Message: "The workload is admitted", 343 }). 344 Obj(), 345 wantEvents: []utiltesting.EventRecord{ 346 { 347 Key: types.NamespacedName{Namespace: "ns", Name: "wl"}, 348 EventType: "Normal", 349 Reason: "Admitted", 350 }, 351 }, 352 }, 353 "already admitted": { 354 workload: utiltesting.MakeWorkload("wl", "ns"). 355 ReserveQuota(utiltesting.MakeAdmission("q1").Obj()). 356 Admitted(true). 357 AdmissionCheck(kueue.AdmissionCheckState{ 358 Name: "check", 359 State: kueue.CheckStateReady, 360 }). 361 Obj(), 362 wantWorkload: utiltesting.MakeWorkload("wl", "ns"). 363 ReserveQuota(utiltesting.MakeAdmission("q1").Obj()). 364 Admitted(true). 365 AdmissionCheck(kueue.AdmissionCheckState{ 366 Name: "check", 367 State: kueue.CheckStateReady, 368 }). 369 Obj(), 370 }, 371 "remove finalizer for finished workload": { 372 workload: utiltesting.MakeWorkload("unit-test", "ns").Finalizers(kueue.ResourceInUseFinalizerName). 373 Condition(metav1.Condition{ 374 Type: "Finished", 375 Status: "True", 376 }). 377 DeletionTimestamp(testStartTime). 378 Obj(), 379 wantWorkload: nil, 380 }, 381 "don't remove finalizer for owned finished workload": { 382 workload: utiltesting.MakeWorkload("unit-test", "ns").Finalizers(kueue.ResourceInUseFinalizerName). 383 Condition(metav1.Condition{ 384 Type: "Finished", 385 Status: "True", 386 }). 387 ControllerReference(batchv1.SchemeGroupVersion.WithKind("Job"), "job", "test-uid"). 388 DeletionTimestamp(testStartTime). 389 Obj(), 390 wantWorkload: utiltesting.MakeWorkload("unit-test", "ns").Finalizers(kueue.ResourceInUseFinalizerName). 391 Condition(metav1.Condition{ 392 Type: "Finished", 393 Status: "True", 394 }). 395 ControllerReference(batchv1.SchemeGroupVersion.WithKind("Job"), "job", "test-uid"). 396 DeletionTimestamp(testStartTime). 397 Obj(), 398 }, 399 "unadmitted workload with rejected checks": { 400 workload: utiltesting.MakeWorkload("wl", "ns"). 401 ControllerReference(batchv1.SchemeGroupVersion.WithKind("Job"), "ownername", "owneruid"). 402 AdmissionCheck(kueue.AdmissionCheckState{ 403 Name: "check", 404 State: kueue.CheckStateRejected, 405 }). 406 Obj(), 407 wantWorkload: utiltesting.MakeWorkload("wl", "ns"). 408 ControllerReference(batchv1.SchemeGroupVersion.WithKind("Job"), "ownername", "owneruid"). 409 AdmissionCheck(kueue.AdmissionCheckState{ 410 Name: "check", 411 State: kueue.CheckStateRejected, 412 }). 413 Condition(metav1.Condition{ 414 Type: "Finished", 415 Status: "True", 416 Reason: "AdmissionChecksRejected", 417 Message: "Admission checks [check] are rejected", 418 }). 419 Obj(), 420 wantEvents: []utiltesting.EventRecord{ 421 { 422 Key: types.NamespacedName{Namespace: "ns", Name: "ownername"}, 423 EventType: "Normal", 424 Reason: "WorkloadFinished", 425 }, 426 }, 427 }, 428 "admitted workload with rejected checks": { 429 workload: utiltesting.MakeWorkload("wl", "ns"). 430 ReserveQuota(utiltesting.MakeAdmission("q1").Obj()). 431 Admitted(true). 432 ControllerReference(batchv1.SchemeGroupVersion.WithKind("Job"), "ownername", "owneruid"). 433 AdmissionCheck(kueue.AdmissionCheckState{ 434 Name: "check", 435 State: kueue.CheckStateRejected, 436 }). 437 Obj(), 438 wantWorkload: utiltesting.MakeWorkload("wl", "ns"). 439 ReserveQuota(utiltesting.MakeAdmission("q1").Obj()). 440 Admitted(true). 441 ControllerReference(batchv1.SchemeGroupVersion.WithKind("Job"), "ownername", "owneruid"). 442 AdmissionCheck(kueue.AdmissionCheckState{ 443 Name: "check", 444 State: kueue.CheckStateRejected, 445 }). 446 Condition(metav1.Condition{ 447 Type: "Evicted", 448 Status: "True", 449 Reason: "AdmissionCheck", 450 Message: "At least one admission check is false", 451 }). 452 Obj(), 453 }, 454 "increment re-queue count": { 455 reconcilerOpts: []Option{ 456 WithPodsReadyTimeout(ptr.To(3 * time.Second)), 457 WithRequeuingBackoffLimitCount(ptr.To[int32](100)), 458 }, 459 workload: utiltesting.MakeWorkload("wl", "ns"). 460 ReserveQuota(utiltesting.MakeAdmission("q1").Obj()). 461 AdmissionCheck(kueue.AdmissionCheckState{ 462 Name: "check", 463 State: kueue.CheckStateReady, 464 }). 465 Condition(metav1.Condition{ // Override LastTransitionTime 466 Type: kueue.WorkloadAdmitted, 467 Status: metav1.ConditionTrue, 468 LastTransitionTime: metav1.NewTime(testStartTime.Add(-5 * time.Minute)), 469 Reason: "ByTest", 470 Message: "Admitted by ClusterQueue q1", 471 }). 472 Admitted(true). 473 RequeueState(ptr.To[int32](29), nil). 474 Obj(), 475 wantWorkload: utiltesting.MakeWorkload("wl", "ns"). 476 ReserveQuota(utiltesting.MakeAdmission("q1").Obj()). 477 Admitted(true). 478 AdmissionCheck(kueue.AdmissionCheckState{ 479 Name: "check", 480 State: kueue.CheckStateReady, 481 }). 482 Condition(metav1.Condition{ 483 Type: kueue.WorkloadEvicted, 484 Status: metav1.ConditionTrue, 485 Reason: kueue.WorkloadEvictedByPodsReadyTimeout, 486 Message: "Exceeded the PodsReady timeout ns/wl", 487 }). 488 // 1.41284738^(30-1) = 22530.0558 489 RequeueState(ptr.To[int32](30), ptr.To(metav1.NewTime(testStartTime.Add(22530*time.Second).Truncate(time.Second)))). 490 Obj(), 491 }, 492 "deactivated workload": { 493 reconcilerOpts: []Option{ 494 WithPodsReadyTimeout(ptr.To(3 * time.Second)), 495 WithRequeuingBackoffLimitCount(ptr.To[int32](1)), 496 }, 497 workload: utiltesting.MakeWorkload("wl", "ns"). 498 ReserveQuota(utiltesting.MakeAdmission("q1").Obj()). 499 AdmissionCheck(kueue.AdmissionCheckState{ 500 Name: "check", 501 State: kueue.CheckStateReady, 502 }). 503 Condition(metav1.Condition{ // Override LastTransitionTime 504 Type: kueue.WorkloadAdmitted, 505 Status: metav1.ConditionTrue, 506 LastTransitionTime: metav1.NewTime(testStartTime.Add(-5 * time.Minute)), 507 Reason: "ByTest", 508 Message: "Admitted by ClusterQueue q1", 509 }). 510 Admitted(true). 511 RequeueState(ptr.To[int32](1), ptr.To(metav1.NewTime(testStartTime.Add(1*time.Second).Truncate(time.Second)))). 512 Obj(), 513 wantWorkload: utiltesting.MakeWorkload("wl", "ns"). 514 Active(false). 515 ReserveQuota(utiltesting.MakeAdmission("q1").Obj()). 516 Admitted(true). 517 AdmissionCheck(kueue.AdmissionCheckState{ 518 Name: "check", 519 State: kueue.CheckStateReady, 520 }). 521 RequeueState(ptr.To[int32](1), ptr.To(metav1.NewTime(testStartTime.Add(1*time.Second).Truncate(time.Second)))). 522 Obj(), 523 wantEvents: []utiltesting.EventRecord{{ 524 Key: types.NamespacedName{Name: "wl", Namespace: "ns"}, 525 EventType: v1.EventTypeNormal, 526 Reason: kueue.WorkloadEvictedByDeactivation, 527 Message: "Deactivated Workload \"ns/wl\" by reached re-queue backoffLimitCount", 528 }}, 529 }, 530 } 531 for name, tc := range cases { 532 t.Run(name, func(t *testing.T) { 533 objs := []client.Object{tc.workload} 534 clientBuilder := utiltesting.NewClientBuilder().WithObjects(objs...).WithStatusSubresource(objs...) 535 cl := clientBuilder.Build() 536 recorder := &utiltesting.EventRecorder{} 537 538 cqCache := cache.New(cl) 539 qManager := queue.NewManager(cl, cqCache) 540 reconciler := NewWorkloadReconciler(cl, qManager, cqCache, recorder, tc.reconcilerOpts...) 541 542 ctxWithLogger, _ := utiltesting.ContextWithLog(t) 543 ctx, ctxCancel := context.WithCancel(ctxWithLogger) 544 defer ctxCancel() 545 546 _, gotError := reconciler.Reconcile(ctx, reconcile.Request{NamespacedName: client.ObjectKeyFromObject(tc.workload)}) 547 548 if diff := cmp.Diff(tc.wantError, gotError); diff != "" { 549 t.Errorf("unexpected reconcile error (-want/+got):\n%s", diff) 550 } 551 552 gotWorkload := &kueue.Workload{} 553 if err := cl.Get(ctx, client.ObjectKeyFromObject(tc.workload), gotWorkload); err != nil { 554 if tc.wantWorkload != nil && !errors.IsNotFound(err) { 555 t.Fatalf("Could not get Workloads after reconcile: %v", err) 556 } 557 gotWorkload = nil 558 } 559 560 if diff := cmp.Diff(tc.wantWorkload, gotWorkload, workloadCmpOpts...); diff != "" { 561 t.Errorf("Workloads after reconcile (-want,+got):\n%s", diff) 562 } 563 564 if tc.wantWorkload != nil { 565 if requeueState := tc.wantWorkload.Status.RequeueState; requeueState != nil && requeueState.RequeueAt != nil { 566 gotRequeueState := gotWorkload.Status.RequeueState 567 if gotRequeueState != nil && gotRequeueState.RequeueAt != nil { 568 // We verify the got requeueAt if the got requeueAt is after the desired requeueAt 569 // since the requeueAt is included in positive seconds of random jitter. 570 // Additionally, we need to verify the requeueAt by "Equal" function 571 // as the "After" function evaluates the nanoseconds despite the metav1.Time is seconds level precision. 572 if !gotRequeueState.RequeueAt.After(requeueState.RequeueAt.Time) && !gotRequeueState.RequeueAt.Equal(requeueState.RequeueAt) { 573 t.Errorf("Unexpected requeueState.requeueAt; gotRequeueAt %v needs to be after requeueAt %v", requeueState.RequeueAt, gotRequeueState.RequeueAt) 574 } 575 } else { 576 t.Errorf("Unexpected nil requeueState.requeuAt; requeueState.requeueAt shouldn't be nil") 577 } 578 } 579 } 580 581 if diff := cmp.Diff(tc.wantEvents, recorder.RecordedEvents, cmpopts.IgnoreFields(utiltesting.EventRecord{}, "Message")); diff != "" { 582 t.Errorf("unexpected events (-want/+got):\n%s", diff) 583 } 584 }) 585 } 586 }