sigs.k8s.io/kueue@v0.6.2/pkg/controller/jobs/raycluster/raycluster_controller_test.go (about) 1 /* 2 Copyright 2024 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package raycluster 18 19 import ( 20 "testing" 21 22 "github.com/google/go-cmp/cmp" 23 "github.com/google/go-cmp/cmp/cmpopts" 24 rayv1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1" 25 corev1 "k8s.io/api/core/v1" 26 "k8s.io/apimachinery/pkg/api/resource" 27 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 28 "k8s.io/client-go/tools/record" 29 ctrl "sigs.k8s.io/controller-runtime" 30 "sigs.k8s.io/controller-runtime/pkg/client" 31 "sigs.k8s.io/controller-runtime/pkg/reconcile" 32 33 kueue "sigs.k8s.io/kueue/apis/kueue/v1beta1" 34 "sigs.k8s.io/kueue/pkg/controller/jobframework" 35 "sigs.k8s.io/kueue/pkg/podset" 36 utiltesting "sigs.k8s.io/kueue/pkg/util/testing" 37 testingrayutil "sigs.k8s.io/kueue/pkg/util/testingjobs/raycluster" 38 ) 39 40 var ( 41 jobCmpOpts = cmp.Options{ 42 cmpopts.EquateEmpty(), 43 cmpopts.IgnoreFields(rayv1.RayCluster{}, "TypeMeta", "ObjectMeta"), 44 } 45 workloadCmpOpts = cmp.Options{ 46 cmpopts.EquateEmpty(), 47 cmpopts.IgnoreFields(kueue.Workload{}, "TypeMeta", "ObjectMeta"), 48 cmpopts.IgnoreFields(kueue.WorkloadSpec{}, "Priority"), 49 cmpopts.IgnoreFields(metav1.Condition{}, "LastTransitionTime"), 50 cmpopts.IgnoreFields(kueue.PodSet{}, "Template"), 51 } 52 ) 53 54 func TestReconciler(t *testing.T) { 55 baseJobWrapper := testingrayutil.MakeCluster("job", "ns"). 56 Suspend(true). 57 Queue("foo"). 58 RequestHead(corev1.ResourceCPU, "10"). 59 RequestWorkerGroup(corev1.ResourceCPU, "10") 60 61 cases := map[string]struct { 62 reconcilerOptions []jobframework.Option 63 job rayv1.RayCluster 64 initObjects []client.Object 65 workloads []kueue.Workload 66 priorityClasses []client.Object 67 wantJob rayv1.RayCluster 68 wantWorkloads []kueue.Workload 69 runInfo []podset.PodSetInfo 70 wantErr error 71 }{ 72 "when workload is admitted, cluster is unsuspended": { 73 initObjects: []client.Object{ 74 utiltesting.MakeResourceFlavor("unit-test-flavor").Label("kubernetes.io/arch", "arm64").Obj(), 75 }, 76 job: *baseJobWrapper.Clone(). 77 Obj(), 78 wantJob: *baseJobWrapper.Clone(). 79 Suspend(false). 80 NodeSelectorHeadGroup("kubernetes.io/arch", "arm64"). 81 Obj(), 82 workloads: []kueue.Workload{ 83 *utiltesting.MakeWorkload("test", "ns"). 84 Finalizers(kueue.ResourceInUseFinalizerName). 85 PodSets( 86 kueue.PodSet{ 87 Name: "head", 88 Count: int32(1), 89 Template: corev1.PodTemplateSpec{ 90 Spec: corev1.PodSpec{ 91 92 RestartPolicy: corev1.RestartPolicyNever, 93 Containers: []corev1.Container{ 94 { 95 Name: "head-container", 96 Resources: corev1.ResourceRequirements{ 97 Requests: make(corev1.ResourceList), 98 }, 99 }, 100 }, 101 }, 102 }, 103 }, 104 kueue.PodSet{ 105 Name: "workers-group-0", 106 Count: int32(1), 107 Template: corev1.PodTemplateSpec{ 108 Spec: corev1.PodSpec{ 109 RestartPolicy: corev1.RestartPolicyNever, 110 111 Containers: []corev1.Container{ 112 { 113 Name: "worker-container", 114 Resources: corev1.ResourceRequirements{ 115 Requests: corev1.ResourceList{ 116 corev1.ResourceCPU: resource.MustParse("10"), 117 }, 118 }, 119 }, 120 }, 121 }, 122 }, 123 }). 124 Request(corev1.ResourceCPU, "10"). 125 ReserveQuota( 126 utiltesting.MakeAdmission("cq", "head", "workers-group-0"). 127 Assignment(corev1.ResourceCPU, "unit-test-flavor", "1"). 128 AssignmentPodCount(1). 129 Obj(), 130 ). 131 Admitted(true). 132 AdmissionCheck(kueue.AdmissionCheckState{ 133 Name: "check", 134 State: kueue.CheckStateReady, 135 PodSetUpdates: []kueue.PodSetUpdate{ 136 { 137 Name: "head", 138 }, 139 { 140 Name: "workers-group-0", 141 }, 142 }, 143 }). 144 Obj(), 145 }, 146 wantWorkloads: []kueue.Workload{ 147 *utiltesting.MakeWorkload("a", "ns").Finalizers(kueue.ResourceInUseFinalizerName). 148 Finalizers(kueue.ResourceInUseFinalizerName). 149 PodSets(kueue.PodSet{ 150 Name: "head", 151 Count: int32(1), 152 Template: corev1.PodTemplateSpec{ 153 Spec: corev1.PodSpec{ 154 RestartPolicy: corev1.RestartPolicyNever, 155 Containers: []corev1.Container{ 156 { 157 Name: "head-container", 158 Resources: corev1.ResourceRequirements{ 159 Requests: make(corev1.ResourceList), 160 }, 161 }, 162 }, 163 }, 164 }, 165 }, 166 kueue.PodSet{ 167 Name: "workers-group-0", 168 Count: int32(1), 169 Template: corev1.PodTemplateSpec{ 170 Spec: corev1.PodSpec{ 171 RestartPolicy: corev1.RestartPolicyNever, 172 Containers: []corev1.Container{ 173 { 174 Name: "worker-container", 175 Resources: corev1.ResourceRequirements{ 176 Requests: make(corev1.ResourceList), 177 }, 178 }, 179 }, 180 }, 181 }, 182 }). 183 ReserveQuota( 184 utiltesting.MakeAdmission("cq", "head", "workers-group-0"). 185 Assignment(corev1.ResourceCPU, "unit-test-flavor", "1"). 186 AssignmentPodCount(1). 187 Obj(), 188 ). 189 Admitted(true). 190 AdmissionCheck(kueue.AdmissionCheckState{ 191 Name: "check", 192 State: kueue.CheckStateReady, 193 PodSetUpdates: []kueue.PodSetUpdate{ 194 { 195 Name: "head", 196 }, 197 { 198 Name: "workers-group-0", 199 }, 200 }, 201 }). 202 Obj(), 203 }, 204 }, 205 "when workload is admitted but workload's conditions is Evicted, suspend it and restore node selector": { 206 initObjects: []client.Object{ 207 utiltesting.MakeResourceFlavor("unit-test-flavor").Label("kubernetes.io/arch", "arm64").Obj(), 208 }, 209 job: *baseJobWrapper.Clone(). 210 Suspend(false). 211 NodeSelectorHeadGroup("kubernetes.io/arch", "arm64"). 212 Obj(), 213 wantJob: *baseJobWrapper.Clone(). 214 Suspend(true). 215 Obj(), 216 workloads: []kueue.Workload{ 217 *utiltesting.MakeWorkload("test", "ns"). 218 Finalizers(kueue.ResourceInUseFinalizerName). 219 PodSets( 220 kueue.PodSet{ 221 Name: "head", 222 Count: int32(1), 223 Template: corev1.PodTemplateSpec{ 224 Spec: corev1.PodSpec{ 225 RestartPolicy: corev1.RestartPolicyNever, 226 Containers: []corev1.Container{ 227 { 228 Name: "head-container", 229 Resources: corev1.ResourceRequirements{ 230 Requests: make(corev1.ResourceList), 231 }, 232 }, 233 }, 234 }, 235 }, 236 }, 237 kueue.PodSet{ 238 Name: "workers-group-0", 239 Count: int32(1), 240 Template: corev1.PodTemplateSpec{ 241 Spec: corev1.PodSpec{ 242 RestartPolicy: corev1.RestartPolicyNever, 243 Containers: []corev1.Container{ 244 { 245 Name: "worker-container", 246 Resources: corev1.ResourceRequirements{ 247 Requests: corev1.ResourceList{ 248 corev1.ResourceCPU: resource.MustParse("10"), 249 }, 250 }, 251 }, 252 }, 253 }, 254 }, 255 }, 256 ). 257 Request(corev1.ResourceCPU, "10"). 258 ReserveQuota(utiltesting.MakeAdmission("cq", "head", "workers-group-0").AssignmentPodCount(1).Obj()). 259 Condition(metav1.Condition{ 260 Type: kueue.WorkloadEvicted, 261 Status: metav1.ConditionTrue, 262 }). 263 Admitted(true). 264 Obj(), 265 }, 266 wantWorkloads: []kueue.Workload{ 267 *utiltesting.MakeWorkload("a", "ns").Finalizers(kueue.ResourceInUseFinalizerName). 268 PodSets( 269 kueue.PodSet{ 270 Name: "head", 271 Count: int32(1), 272 Template: corev1.PodTemplateSpec{ 273 Spec: corev1.PodSpec{ 274 RestartPolicy: corev1.RestartPolicyNever, 275 Containers: []corev1.Container{ 276 { 277 Name: "head-container", 278 Resources: corev1.ResourceRequirements{ 279 Requests: make(corev1.ResourceList), 280 }, 281 }, 282 }, 283 }, 284 }, 285 }, 286 kueue.PodSet{ 287 Name: "workers-group-0", 288 Count: int32(1), 289 Template: corev1.PodTemplateSpec{ 290 Spec: corev1.PodSpec{ 291 RestartPolicy: corev1.RestartPolicyNever, 292 Containers: []corev1.Container{ 293 { 294 Name: "worker-container", 295 Resources: corev1.ResourceRequirements{ 296 Requests: make(corev1.ResourceList), 297 }, 298 }, 299 }, 300 }, 301 }, 302 }). 303 ReserveQuota(utiltesting.MakeAdmission("cq", "head", "workers-group-0").AssignmentPodCount(1).Obj()). 304 Condition(metav1.Condition{ 305 Type: kueue.WorkloadEvicted, 306 Status: metav1.ConditionTrue, 307 }). 308 Condition(metav1.Condition{ 309 Type: kueue.WorkloadQuotaReserved, 310 Status: metav1.ConditionFalse, 311 Reason: "Pending", 312 }). 313 Admitted(true). 314 Condition(metav1.Condition{ 315 Type: kueue.WorkloadAdmitted, 316 Status: metav1.ConditionFalse, 317 Reason: "NoReservation", 318 Message: "The workload has no reservation", 319 }). 320 Obj(), 321 }, 322 }, 323 } 324 for name, tc := range cases { 325 t.Run(name, func(t *testing.T) { 326 327 ctx, _ := utiltesting.ContextWithLog(t) 328 clientBuilder := utiltesting.NewClientBuilder(rayv1.AddToScheme) 329 330 if err := SetupIndexes(ctx, utiltesting.AsIndexer(clientBuilder)); err != nil { 331 t.Fatalf("Could not setup indexes: %v", err) 332 } 333 objs := append(tc.priorityClasses, &tc.job) 334 kcBuilder := clientBuilder.WithObjects(objs...) 335 336 for i := range tc.workloads { 337 kcBuilder = kcBuilder.WithStatusSubresource(&tc.workloads[i]) 338 } 339 340 kcBuilder = clientBuilder.WithObjects(tc.initObjects...) 341 342 kClient := kcBuilder.Build() 343 for i := range tc.workloads { 344 if err := ctrl.SetControllerReference(&tc.job, &tc.workloads[i], kClient.Scheme()); err != nil { 345 t.Fatalf("Could not setup owner reference in Workloads: %v", err) 346 } 347 if err := kClient.Create(ctx, &tc.workloads[i]); err != nil { 348 t.Fatalf("Could not create workload: %v", err) 349 } 350 } 351 recorder := record.NewBroadcaster().NewRecorder(kClient.Scheme(), corev1.EventSource{Component: "test"}) 352 reconciler := NewReconciler(kClient, recorder, tc.reconcilerOptions...) 353 354 jobKey := client.ObjectKeyFromObject(&tc.job) 355 _, err := reconciler.Reconcile(ctx, reconcile.Request{ 356 NamespacedName: jobKey, 357 }) 358 if diff := cmp.Diff(tc.wantErr, err, cmpopts.EquateErrors()); diff != "" { 359 t.Errorf("Reconcile returned error (-want,+got):\n%s", diff) 360 } 361 362 var gotJob rayv1.RayCluster 363 if err := kClient.Get(ctx, jobKey, &gotJob); err != nil { 364 t.Fatalf("Could not get Job after reconcile: %v", err) 365 } 366 if diff := cmp.Diff(tc.wantJob, gotJob, jobCmpOpts...); diff != "" { 367 t.Errorf("Job after reconcile (-want,+got):\n%s", diff) 368 } 369 var gotWorkloads kueue.WorkloadList 370 if err := kClient.List(ctx, &gotWorkloads); err != nil { 371 t.Fatalf("Could not get Workloads after reconcile: %v", err) 372 } 373 if diff := cmp.Diff(tc.wantWorkloads, gotWorkloads.Items, workloadCmpOpts...); diff != "" { 374 t.Errorf("Workloads after reconcile (-want,+got):\n%s", diff) 375 } 376 }) 377 } 378 }