sigs.k8s.io/kueue@v0.6.2/pkg/controller/jobs/raycluster/raycluster_webhook.go (about) 1 /* 2 Copyright 2024 The Kubernetes Authors. 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 http://www.apache.org/licenses/LICENSE-2.0 7 Unless required by applicable law or agreed to in writing, software 8 distributed under the License is distributed on an "AS IS" BASIS, 9 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 10 See the License for the specific language governing permissions and 11 limitations under the License. 12 */ 13 14 package raycluster 15 16 import ( 17 "context" 18 "fmt" 19 20 rayv1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1" 21 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 22 "k8s.io/apimachinery/pkg/runtime" 23 "k8s.io/apimachinery/pkg/util/validation/field" 24 "k8s.io/klog/v2" 25 "k8s.io/utils/ptr" 26 27 ctrl "sigs.k8s.io/controller-runtime" 28 "sigs.k8s.io/controller-runtime/pkg/webhook" 29 "sigs.k8s.io/controller-runtime/pkg/webhook/admission" 30 31 "sigs.k8s.io/kueue/pkg/controller/constants" 32 "sigs.k8s.io/kueue/pkg/controller/jobframework" 33 ) 34 35 type RayClusterWebhook struct { 36 manageJobsWithoutQueueName bool 37 } 38 39 // SetupRayClusterWebhook configures the webhook for rayv1 RayCluster. 40 func SetupRayClusterWebhook(mgr ctrl.Manager, opts ...jobframework.Option) error { 41 options := jobframework.ProcessOptions(opts...) 42 for _, opt := range opts { 43 opt(&options) 44 } 45 wh := &RayClusterWebhook{ 46 manageJobsWithoutQueueName: options.ManageJobsWithoutQueueName, 47 } 48 return ctrl.NewWebhookManagedBy(mgr). 49 For(&rayv1.RayCluster{}). 50 WithDefaulter(wh). 51 WithValidator(wh). 52 Complete() 53 } 54 55 // +kubebuilder:webhook:path=/mutate-ray-io-v1-raycluster,mutating=true,failurePolicy=fail,sideEffects=None,groups=ray.io,resources=rayclusters,verbs=create,versions=v1,name=mraycluster.kb.io,admissionReviewVersions=v1 56 57 var _ webhook.CustomDefaulter = &RayClusterWebhook{} 58 59 // Default implements webhook.CustomDefaulter so a webhook will be registered for the type 60 func (w *RayClusterWebhook) Default(ctx context.Context, obj runtime.Object) error { 61 job := fromObject(obj) 62 log := ctrl.LoggerFrom(ctx).WithName("raycluster-webhook") 63 log.V(10).Info("Applying defaults", "job", klog.KObj(job)) 64 65 // We don't want to double count for a ray cluster created by a RayJob 66 if owner := metav1.GetControllerOf(job.Object()); owner != nil && jobframework.IsOwnerManagedByKueue(owner) { 67 log.Info("RayCluster is owned by RayJob") 68 if job.Annotations == nil { 69 job.Annotations = make(map[string]string) 70 } 71 if pwName, err := jobframework.GetWorkloadNameForOwnerRef(owner); err != nil { 72 return err 73 } else { 74 job.Annotations[constants.ParentWorkloadAnnotation] = pwName 75 } 76 return nil 77 } 78 79 jobframework.ApplyDefaultForSuspend((*RayCluster)(job), w.manageJobsWithoutQueueName) 80 return nil 81 } 82 83 // +kubebuilder:webhook:path=/validate-ray-io-v1-raycluster,mutating=false,failurePolicy=fail,sideEffects=None,groups=ray.io,resources=rayclusters,verbs=create;update,versions=v1,name=vraycluster.kb.io,admissionReviewVersions=v1 84 85 var _ webhook.CustomValidator = &RayClusterWebhook{} 86 87 // ValidateCreate implements webhook.CustomValidator so a webhook will be registered for the type 88 func (w *RayClusterWebhook) ValidateCreate(ctx context.Context, obj runtime.Object) (admission.Warnings, error) { 89 job := obj.(*rayv1.RayCluster) 90 log := ctrl.LoggerFrom(ctx).WithName("raycluster-webhook") 91 log.V(10).Info("Validating create", "job", klog.KObj(job)) 92 return nil, w.validateCreate(job).ToAggregate() 93 } 94 95 func (w *RayClusterWebhook) validateCreate(job *rayv1.RayCluster) field.ErrorList { 96 var allErrors field.ErrorList 97 kueueJob := (*RayCluster)(job) 98 99 if w.manageJobsWithoutQueueName || jobframework.QueueName(kueueJob) != "" { 100 spec := &job.Spec 101 specPath := field.NewPath("spec") 102 103 // TODO revisit once Support dynamically sized (elastic) jobs #77 is implemented 104 // Should not use auto scaler. Once the resources are reserved by queue the cluster should do it's best to use them. 105 if ptr.Deref(spec.EnableInTreeAutoscaling, false) { 106 allErrors = append(allErrors, field.Invalid(specPath.Child("enableInTreeAutoscaling"), spec.EnableInTreeAutoscaling, "a kueue managed job should not use autoscaling")) 107 } 108 109 // Should limit the worker count to 8 - 1 (max podSets num - cluster head) 110 if len(spec.WorkerGroupSpecs) > 7 { 111 allErrors = append(allErrors, field.TooMany(specPath.Child("workerGroupSpecs"), len(spec.WorkerGroupSpecs), 7)) 112 } 113 114 // None of the workerGroups should be named "head" 115 for i := range spec.WorkerGroupSpecs { 116 if spec.WorkerGroupSpecs[i].GroupName == headGroupPodSetName { 117 allErrors = append(allErrors, field.Forbidden(specPath.Child("workerGroupSpecs").Index(i).Child("groupName"), fmt.Sprintf("%q is reserved for the head group", headGroupPodSetName))) 118 } 119 } 120 } 121 122 allErrors = append(allErrors, jobframework.ValidateCreateForQueueName(kueueJob)...) 123 return allErrors 124 } 125 126 // ValidateUpdate implements webhook.CustomValidator so a webhook will be registered for the type 127 func (w *RayClusterWebhook) ValidateUpdate(ctx context.Context, oldObj, newObj runtime.Object) (admission.Warnings, error) { 128 oldJob := oldObj.(*rayv1.RayCluster) 129 newJob := newObj.(*rayv1.RayCluster) 130 log := ctrl.LoggerFrom(ctx).WithName("raycluster-webhook") 131 if w.manageJobsWithoutQueueName || jobframework.QueueName((*RayCluster)(newJob)) != "" { 132 log.Info("Validating update", "job", klog.KObj(newJob)) 133 allErrors := jobframework.ValidateUpdateForQueueName((*RayCluster)(oldJob), (*RayCluster)(newJob)) 134 allErrors = append(allErrors, w.validateCreate(newJob)...) 135 allErrors = append(allErrors, jobframework.ValidateUpdateForWorkloadPriorityClassName((*RayCluster)(oldJob), (*RayCluster)(newJob))...) 136 return nil, allErrors.ToAggregate() 137 } 138 return nil, nil 139 } 140 141 // ValidateDelete implements webhook.CustomValidator so a webhook will be registered for the type 142 func (w *RayClusterWebhook) ValidateDelete(ctx context.Context, obj runtime.Object) (admission.Warnings, error) { 143 return nil, nil 144 }