volcano.sh/volcano@v1.9.0/pkg/webhooks/admission/jobs/validate/admit_job.go (about) 1 /* 2 Copyright 2018 The Volcano Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package validate 18 19 import ( 20 "context" 21 "fmt" 22 "strings" 23 24 admissionv1 "k8s.io/api/admission/v1" 25 whv1 "k8s.io/api/admissionregistration/v1" 26 v1 "k8s.io/api/core/v1" 27 apiequality "k8s.io/apimachinery/pkg/api/equality" 28 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 29 "k8s.io/apimachinery/pkg/util/validation" 30 "k8s.io/apimachinery/pkg/util/validation/field" 31 "k8s.io/klog/v2" 32 k8score "k8s.io/kubernetes/pkg/apis/core" 33 k8scorev1 "k8s.io/kubernetes/pkg/apis/core/v1" 34 k8scorevalid "k8s.io/kubernetes/pkg/apis/core/validation" 35 "k8s.io/kubernetes/pkg/capabilities" 36 37 "volcano.sh/apis/pkg/apis/batch/v1alpha1" 38 schedulingv1beta1 "volcano.sh/apis/pkg/apis/scheduling/v1beta1" 39 "volcano.sh/volcano/pkg/controllers/job/helpers" 40 jobhelpers "volcano.sh/volcano/pkg/controllers/job/helpers" 41 "volcano.sh/volcano/pkg/controllers/job/plugins" 42 controllerMpi "volcano.sh/volcano/pkg/controllers/job/plugins/distributed-framework/mpi" 43 "volcano.sh/volcano/pkg/webhooks/router" 44 "volcano.sh/volcano/pkg/webhooks/schema" 45 "volcano.sh/volcano/pkg/webhooks/util" 46 ) 47 48 func init() { 49 capabilities.Initialize(capabilities.Capabilities{ 50 AllowPrivileged: true, 51 PrivilegedSources: capabilities.PrivilegedSources{ 52 HostNetworkSources: []string{}, 53 HostPIDSources: []string{}, 54 HostIPCSources: []string{}, 55 }, 56 }) 57 router.RegisterAdmission(service) 58 } 59 60 var service = &router.AdmissionService{ 61 Path: "/jobs/validate", 62 Func: AdmitJobs, 63 64 Config: config, 65 66 ValidatingConfig: &whv1.ValidatingWebhookConfiguration{ 67 Webhooks: []whv1.ValidatingWebhook{{ 68 Name: "validatejob.volcano.sh", 69 Rules: []whv1.RuleWithOperations{ 70 { 71 Operations: []whv1.OperationType{whv1.Create, whv1.Update}, 72 Rule: whv1.Rule{ 73 APIGroups: []string{"batch.volcano.sh"}, 74 APIVersions: []string{"v1alpha1"}, 75 Resources: []string{"jobs"}, 76 }, 77 }, 78 }, 79 }}, 80 }, 81 } 82 83 var config = &router.AdmissionServiceConfig{} 84 85 // AdmitJobs is to admit jobs and return response. 86 func AdmitJobs(ar admissionv1.AdmissionReview) *admissionv1.AdmissionResponse { 87 klog.V(3).Infof("admitting jobs -- %s", ar.Request.Operation) 88 89 job, err := schema.DecodeJob(ar.Request.Object, ar.Request.Resource) 90 if err != nil { 91 return util.ToAdmissionResponse(err) 92 } 93 var msg string 94 reviewResponse := admissionv1.AdmissionResponse{} 95 reviewResponse.Allowed = true 96 97 switch ar.Request.Operation { 98 case admissionv1.Create: 99 msg = validateJobCreate(job, &reviewResponse) 100 case admissionv1.Update: 101 oldJob, err := schema.DecodeJob(ar.Request.OldObject, ar.Request.Resource) 102 if err != nil { 103 return util.ToAdmissionResponse(err) 104 } 105 err = validateJobUpdate(oldJob, job) 106 if err != nil { 107 return util.ToAdmissionResponse(err) 108 } 109 default: 110 err := fmt.Errorf("expect operation to be 'CREATE' or 'UPDATE'") 111 return util.ToAdmissionResponse(err) 112 } 113 114 if !reviewResponse.Allowed { 115 reviewResponse.Result = &metav1.Status{Message: strings.TrimSpace(msg)} 116 } 117 return &reviewResponse 118 } 119 120 func validateJobCreate(job *v1alpha1.Job, reviewResponse *admissionv1.AdmissionResponse) string { 121 var msg string 122 taskNames := map[string]string{} 123 var totalReplicas int32 124 125 if job.Spec.MinAvailable < 0 { 126 reviewResponse.Allowed = false 127 return "job 'minAvailable' must be >= 0." 128 } 129 130 if job.Spec.MaxRetry < 0 { 131 reviewResponse.Allowed = false 132 return "'maxRetry' cannot be less than zero." 133 } 134 135 if job.Spec.TTLSecondsAfterFinished != nil && *job.Spec.TTLSecondsAfterFinished < 0 { 136 reviewResponse.Allowed = false 137 return "'ttlSecondsAfterFinished' cannot be less than zero." 138 } 139 140 if len(job.Spec.Tasks) == 0 { 141 reviewResponse.Allowed = false 142 return "No task specified in job spec" 143 } 144 145 if _, ok := job.Spec.Plugins[controllerMpi.MPIPluginName]; ok { 146 mp := controllerMpi.NewInstance(job.Spec.Plugins[controllerMpi.MPIPluginName]) 147 masterIndex := helpers.GetTaskIndexUnderJob(mp.GetMasterName(), job) 148 workerIndex := helpers.GetTaskIndexUnderJob(mp.GetWorkerName(), job) 149 if masterIndex == -1 { 150 reviewResponse.Allowed = false 151 return "The specified mpi master task was not found" 152 } 153 if workerIndex == -1 { 154 reviewResponse.Allowed = false 155 return "The specified mpi worker task was not found" 156 } 157 } 158 159 hasDependenciesBetweenTasks := false 160 for index, task := range job.Spec.Tasks { 161 if task.DependsOn != nil { 162 hasDependenciesBetweenTasks = true 163 } 164 165 if task.Replicas < 0 { 166 msg += fmt.Sprintf(" 'replicas' < 0 in task: %s, job: %s;", task.Name, job.Name) 167 } 168 169 if task.MinAvailable != nil { 170 if *task.MinAvailable < 0 { 171 msg += fmt.Sprintf(" 'minAvailable' < 0 in task: %s, job: %s;", task.Name, job.Name) 172 } else if *task.MinAvailable > task.Replicas { 173 msg += fmt.Sprintf(" 'minAvailable' is greater than 'replicas' in task: %s, job: %s;", task.Name, job.Name) 174 } 175 } 176 177 // count replicas 178 totalReplicas += task.Replicas 179 180 // validate task name 181 if errMsgs := validation.IsDNS1123Label(task.Name); len(errMsgs) > 0 { 182 msg += fmt.Sprintf(" %v;", errMsgs) 183 } 184 185 // duplicate task name 186 if _, found := taskNames[task.Name]; found { 187 msg += fmt.Sprintf(" duplicated task name %s;", task.Name) 188 break 189 } else { 190 taskNames[task.Name] = task.Name 191 } 192 193 if err := validatePolicies(task.Policies, field.NewPath("spec.tasks.policies")); err != nil { 194 msg += err.Error() + fmt.Sprintf(" valid events are %v, valid actions are %v;", 195 getValidEvents(), getValidActions()) 196 } 197 podName := jobhelpers.MakePodName(job.Name, task.Name, index) 198 msg += validateK8sPodNameLength(podName) 199 msg += validateTaskTemplate(task, job, index) 200 } 201 202 msg += validateJobName(job) 203 204 if totalReplicas < job.Spec.MinAvailable { 205 msg += " job 'minAvailable' should not be greater than total replicas in tasks;" 206 } 207 208 if err := validatePolicies(job.Spec.Policies, field.NewPath("spec.policies")); err != nil { 209 msg = msg + err.Error() + fmt.Sprintf(" valid events are %v, valid actions are %v;", 210 getValidEvents(), getValidActions()) 211 } 212 213 // invalid job plugins 214 if len(job.Spec.Plugins) != 0 { 215 for name := range job.Spec.Plugins { 216 if _, found := plugins.GetPluginBuilder(name); !found { 217 msg += fmt.Sprintf(" unable to find job plugin: %s;", name) 218 } 219 } 220 } 221 222 if err := validateIO(job.Spec.Volumes); err != nil { 223 msg += err.Error() 224 } 225 226 queue, err := config.VolcanoClient.SchedulingV1beta1().Queues().Get(context.TODO(), job.Spec.Queue, metav1.GetOptions{}) 227 if err != nil { 228 msg += fmt.Sprintf(" unable to find job queue: %v;", err) 229 } else if queue.Status.State != schedulingv1beta1.QueueStateOpen { 230 msg += fmt.Sprintf(" can only submit job to queue with state `Open`, "+ 231 "queue `%s` status is `%s`;", queue.Name, queue.Status.State) 232 } 233 234 if hasDependenciesBetweenTasks { 235 _, isDag := topoSort(job) 236 if !isDag { 237 msg += " job has dependencies between tasks, but doesn't form a directed acyclic graph(DAG);" 238 } 239 } 240 241 if msg != "" { 242 reviewResponse.Allowed = false 243 } 244 245 return msg 246 } 247 248 func validateJobUpdate(old, new *v1alpha1.Job) error { 249 var totalReplicas int32 250 for _, task := range new.Spec.Tasks { 251 if task.Replicas < 0 { 252 return fmt.Errorf("'replicas' must be >= 0 in task: %s", task.Name) 253 } 254 255 if task.MinAvailable != nil { 256 if *task.MinAvailable < 0 { 257 return fmt.Errorf("'minAvailable' must be >= 0 in task: %s", task.Name) 258 } else if *task.MinAvailable > task.Replicas { 259 return fmt.Errorf("'minAvailable' must be <= 'replicas' in task: %s", task.Name) 260 } 261 } 262 263 // count replicas 264 totalReplicas += task.Replicas 265 } 266 if new.Spec.MinAvailable > totalReplicas { 267 return fmt.Errorf("job 'minAvailable' must not be greater than total replicas") 268 } 269 if new.Spec.MinAvailable < 0 { 270 return fmt.Errorf("job 'minAvailable' must be >= 0") 271 } 272 273 if len(old.Spec.Tasks) != len(new.Spec.Tasks) { 274 return fmt.Errorf("job updates may not add or remove tasks") 275 } 276 // other fields under spec are not allowed to mutate 277 new.Spec.MinAvailable = old.Spec.MinAvailable 278 new.Spec.PriorityClassName = old.Spec.PriorityClassName 279 for i := range new.Spec.Tasks { 280 new.Spec.Tasks[i].Replicas = old.Spec.Tasks[i].Replicas 281 new.Spec.Tasks[i].MinAvailable = old.Spec.Tasks[i].MinAvailable 282 } 283 284 // job controller will update the pvc name if not provided 285 for i := range new.Spec.Volumes { 286 if new.Spec.Volumes[i].VolumeClaim != nil { 287 new.Spec.Volumes[i].VolumeClaimName = "" 288 } 289 } 290 for i := range old.Spec.Volumes { 291 if old.Spec.Volumes[i].VolumeClaim != nil { 292 old.Spec.Volumes[i].VolumeClaimName = "" 293 } 294 } 295 296 if !apiequality.Semantic.DeepEqual(new.Spec, old.Spec) { 297 return fmt.Errorf("job updates may not change fields other than `minAvailable`, `tasks[*].replicas under spec`") 298 } 299 300 return nil 301 } 302 303 func validateTaskTemplate(task v1alpha1.TaskSpec, job *v1alpha1.Job, index int) string { 304 var v1PodTemplate v1.PodTemplate 305 v1PodTemplate.Template = *task.Template.DeepCopy() 306 k8scorev1.SetObjectDefaults_PodTemplate(&v1PodTemplate) 307 308 var coreTemplateSpec k8score.PodTemplateSpec 309 k8scorev1.Convert_v1_PodTemplateSpec_To_core_PodTemplateSpec(&v1PodTemplate.Template, &coreTemplateSpec, nil) 310 311 corePodTemplate := k8score.PodTemplate{ 312 ObjectMeta: metav1.ObjectMeta{ 313 Name: task.Name, 314 Namespace: job.Namespace, 315 }, 316 Template: coreTemplateSpec, 317 } 318 319 opts := k8scorevalid.PodValidationOptions{} 320 if allErrs := k8scorevalid.ValidatePodTemplate(&corePodTemplate, opts); len(allErrs) > 0 { 321 msg := fmt.Sprintf("spec.task[%d].", index) 322 for index := range allErrs { 323 msg += allErrs[index].Error() + ". " 324 } 325 return msg 326 } 327 328 msg := validateTaskTopoPolicy(task, index) 329 if msg != "" { 330 return msg 331 } 332 333 return "" 334 } 335 336 func validateK8sPodNameLength(podName string) string { 337 if errMsgs := validation.IsQualifiedName(podName); len(errMsgs) > 0 { 338 return fmt.Sprintf("create pod with name %s validate failed %v;", podName, errMsgs) 339 } 340 return "" 341 } 342 343 func validateJobName(job *v1alpha1.Job) string { 344 if errMsgs := validation.IsQualifiedName(job.Name); len(errMsgs) > 0 { 345 return fmt.Sprintf("create job with name %s validate failed %v", job.Name, errMsgs) 346 } 347 return "" 348 } 349 350 func validateTaskTopoPolicy(task v1alpha1.TaskSpec, index int) string { 351 if task.TopologyPolicy == "" || task.TopologyPolicy == v1alpha1.None { 352 return "" 353 } 354 355 template := task.Template.DeepCopy() 356 357 for id, container := range template.Spec.Containers { 358 if len(container.Resources.Requests) == 0 { 359 template.Spec.Containers[id].Resources.Requests = container.Resources.Limits.DeepCopy() 360 } 361 } 362 363 for id, container := range template.Spec.InitContainers { 364 if len(container.Resources.Requests) == 0 { 365 template.Spec.InitContainers[id].Resources.Requests = container.Resources.Limits.DeepCopy() 366 } 367 } 368 369 for id, container := range append(template.Spec.Containers, template.Spec.InitContainers...) { 370 requestNum := guaranteedCPUs(container) 371 if requestNum == 0 { 372 return fmt.Sprintf("the cpu request isn't an integer in spec.task[%d] container[%d].", 373 index, id) 374 } 375 } 376 377 return "" 378 } 379 380 func guaranteedCPUs(container v1.Container) int { 381 cpuQuantity := container.Resources.Requests[v1.ResourceCPU] 382 if cpuQuantity.Value()*1000 != cpuQuantity.MilliValue() { 383 return 0 384 } 385 386 return int(cpuQuantity.Value()) 387 }