volcano.sh/volcano@v1.9.0/pkg/webhooks/admission/jobs/validate/admit_job.go (about)

     1  /*
     2  Copyright 2018 The Volcano Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package validate
    18  
    19  import (
    20  	"context"
    21  	"fmt"
    22  	"strings"
    23  
    24  	admissionv1 "k8s.io/api/admission/v1"
    25  	whv1 "k8s.io/api/admissionregistration/v1"
    26  	v1 "k8s.io/api/core/v1"
    27  	apiequality "k8s.io/apimachinery/pkg/api/equality"
    28  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    29  	"k8s.io/apimachinery/pkg/util/validation"
    30  	"k8s.io/apimachinery/pkg/util/validation/field"
    31  	"k8s.io/klog/v2"
    32  	k8score "k8s.io/kubernetes/pkg/apis/core"
    33  	k8scorev1 "k8s.io/kubernetes/pkg/apis/core/v1"
    34  	k8scorevalid "k8s.io/kubernetes/pkg/apis/core/validation"
    35  	"k8s.io/kubernetes/pkg/capabilities"
    36  
    37  	"volcano.sh/apis/pkg/apis/batch/v1alpha1"
    38  	schedulingv1beta1 "volcano.sh/apis/pkg/apis/scheduling/v1beta1"
    39  	"volcano.sh/volcano/pkg/controllers/job/helpers"
    40  	jobhelpers "volcano.sh/volcano/pkg/controllers/job/helpers"
    41  	"volcano.sh/volcano/pkg/controllers/job/plugins"
    42  	controllerMpi "volcano.sh/volcano/pkg/controllers/job/plugins/distributed-framework/mpi"
    43  	"volcano.sh/volcano/pkg/webhooks/router"
    44  	"volcano.sh/volcano/pkg/webhooks/schema"
    45  	"volcano.sh/volcano/pkg/webhooks/util"
    46  )
    47  
    48  func init() {
    49  	capabilities.Initialize(capabilities.Capabilities{
    50  		AllowPrivileged: true,
    51  		PrivilegedSources: capabilities.PrivilegedSources{
    52  			HostNetworkSources: []string{},
    53  			HostPIDSources:     []string{},
    54  			HostIPCSources:     []string{},
    55  		},
    56  	})
    57  	router.RegisterAdmission(service)
    58  }
    59  
    60  var service = &router.AdmissionService{
    61  	Path: "/jobs/validate",
    62  	Func: AdmitJobs,
    63  
    64  	Config: config,
    65  
    66  	ValidatingConfig: &whv1.ValidatingWebhookConfiguration{
    67  		Webhooks: []whv1.ValidatingWebhook{{
    68  			Name: "validatejob.volcano.sh",
    69  			Rules: []whv1.RuleWithOperations{
    70  				{
    71  					Operations: []whv1.OperationType{whv1.Create, whv1.Update},
    72  					Rule: whv1.Rule{
    73  						APIGroups:   []string{"batch.volcano.sh"},
    74  						APIVersions: []string{"v1alpha1"},
    75  						Resources:   []string{"jobs"},
    76  					},
    77  				},
    78  			},
    79  		}},
    80  	},
    81  }
    82  
    83  var config = &router.AdmissionServiceConfig{}
    84  
    85  // AdmitJobs is to admit jobs and return response.
    86  func AdmitJobs(ar admissionv1.AdmissionReview) *admissionv1.AdmissionResponse {
    87  	klog.V(3).Infof("admitting jobs -- %s", ar.Request.Operation)
    88  
    89  	job, err := schema.DecodeJob(ar.Request.Object, ar.Request.Resource)
    90  	if err != nil {
    91  		return util.ToAdmissionResponse(err)
    92  	}
    93  	var msg string
    94  	reviewResponse := admissionv1.AdmissionResponse{}
    95  	reviewResponse.Allowed = true
    96  
    97  	switch ar.Request.Operation {
    98  	case admissionv1.Create:
    99  		msg = validateJobCreate(job, &reviewResponse)
   100  	case admissionv1.Update:
   101  		oldJob, err := schema.DecodeJob(ar.Request.OldObject, ar.Request.Resource)
   102  		if err != nil {
   103  			return util.ToAdmissionResponse(err)
   104  		}
   105  		err = validateJobUpdate(oldJob, job)
   106  		if err != nil {
   107  			return util.ToAdmissionResponse(err)
   108  		}
   109  	default:
   110  		err := fmt.Errorf("expect operation to be 'CREATE' or 'UPDATE'")
   111  		return util.ToAdmissionResponse(err)
   112  	}
   113  
   114  	if !reviewResponse.Allowed {
   115  		reviewResponse.Result = &metav1.Status{Message: strings.TrimSpace(msg)}
   116  	}
   117  	return &reviewResponse
   118  }
   119  
   120  func validateJobCreate(job *v1alpha1.Job, reviewResponse *admissionv1.AdmissionResponse) string {
   121  	var msg string
   122  	taskNames := map[string]string{}
   123  	var totalReplicas int32
   124  
   125  	if job.Spec.MinAvailable < 0 {
   126  		reviewResponse.Allowed = false
   127  		return "job 'minAvailable' must be >= 0."
   128  	}
   129  
   130  	if job.Spec.MaxRetry < 0 {
   131  		reviewResponse.Allowed = false
   132  		return "'maxRetry' cannot be less than zero."
   133  	}
   134  
   135  	if job.Spec.TTLSecondsAfterFinished != nil && *job.Spec.TTLSecondsAfterFinished < 0 {
   136  		reviewResponse.Allowed = false
   137  		return "'ttlSecondsAfterFinished' cannot be less than zero."
   138  	}
   139  
   140  	if len(job.Spec.Tasks) == 0 {
   141  		reviewResponse.Allowed = false
   142  		return "No task specified in job spec"
   143  	}
   144  
   145  	if _, ok := job.Spec.Plugins[controllerMpi.MPIPluginName]; ok {
   146  		mp := controllerMpi.NewInstance(job.Spec.Plugins[controllerMpi.MPIPluginName])
   147  		masterIndex := helpers.GetTaskIndexUnderJob(mp.GetMasterName(), job)
   148  		workerIndex := helpers.GetTaskIndexUnderJob(mp.GetWorkerName(), job)
   149  		if masterIndex == -1 {
   150  			reviewResponse.Allowed = false
   151  			return "The specified mpi master task was not found"
   152  		}
   153  		if workerIndex == -1 {
   154  			reviewResponse.Allowed = false
   155  			return "The specified mpi worker task was not found"
   156  		}
   157  	}
   158  
   159  	hasDependenciesBetweenTasks := false
   160  	for index, task := range job.Spec.Tasks {
   161  		if task.DependsOn != nil {
   162  			hasDependenciesBetweenTasks = true
   163  		}
   164  
   165  		if task.Replicas < 0 {
   166  			msg += fmt.Sprintf(" 'replicas' < 0 in task: %s, job: %s;", task.Name, job.Name)
   167  		}
   168  
   169  		if task.MinAvailable != nil {
   170  			if *task.MinAvailable < 0 {
   171  				msg += fmt.Sprintf(" 'minAvailable' < 0 in task: %s, job: %s;", task.Name, job.Name)
   172  			} else if *task.MinAvailable > task.Replicas {
   173  				msg += fmt.Sprintf(" 'minAvailable' is greater than 'replicas' in task: %s, job: %s;", task.Name, job.Name)
   174  			}
   175  		}
   176  
   177  		// count replicas
   178  		totalReplicas += task.Replicas
   179  
   180  		// validate task name
   181  		if errMsgs := validation.IsDNS1123Label(task.Name); len(errMsgs) > 0 {
   182  			msg += fmt.Sprintf(" %v;", errMsgs)
   183  		}
   184  
   185  		// duplicate task name
   186  		if _, found := taskNames[task.Name]; found {
   187  			msg += fmt.Sprintf(" duplicated task name %s;", task.Name)
   188  			break
   189  		} else {
   190  			taskNames[task.Name] = task.Name
   191  		}
   192  
   193  		if err := validatePolicies(task.Policies, field.NewPath("spec.tasks.policies")); err != nil {
   194  			msg += err.Error() + fmt.Sprintf(" valid events are %v, valid actions are %v;",
   195  				getValidEvents(), getValidActions())
   196  		}
   197  		podName := jobhelpers.MakePodName(job.Name, task.Name, index)
   198  		msg += validateK8sPodNameLength(podName)
   199  		msg += validateTaskTemplate(task, job, index)
   200  	}
   201  
   202  	msg += validateJobName(job)
   203  
   204  	if totalReplicas < job.Spec.MinAvailable {
   205  		msg += " job 'minAvailable' should not be greater than total replicas in tasks;"
   206  	}
   207  
   208  	if err := validatePolicies(job.Spec.Policies, field.NewPath("spec.policies")); err != nil {
   209  		msg = msg + err.Error() + fmt.Sprintf(" valid events are %v, valid actions are %v;",
   210  			getValidEvents(), getValidActions())
   211  	}
   212  
   213  	// invalid job plugins
   214  	if len(job.Spec.Plugins) != 0 {
   215  		for name := range job.Spec.Plugins {
   216  			if _, found := plugins.GetPluginBuilder(name); !found {
   217  				msg += fmt.Sprintf(" unable to find job plugin: %s;", name)
   218  			}
   219  		}
   220  	}
   221  
   222  	if err := validateIO(job.Spec.Volumes); err != nil {
   223  		msg += err.Error()
   224  	}
   225  
   226  	queue, err := config.VolcanoClient.SchedulingV1beta1().Queues().Get(context.TODO(), job.Spec.Queue, metav1.GetOptions{})
   227  	if err != nil {
   228  		msg += fmt.Sprintf(" unable to find job queue: %v;", err)
   229  	} else if queue.Status.State != schedulingv1beta1.QueueStateOpen {
   230  		msg += fmt.Sprintf(" can only submit job to queue with state `Open`, "+
   231  			"queue `%s` status is `%s`;", queue.Name, queue.Status.State)
   232  	}
   233  
   234  	if hasDependenciesBetweenTasks {
   235  		_, isDag := topoSort(job)
   236  		if !isDag {
   237  			msg += " job has dependencies between tasks, but doesn't form a directed acyclic graph(DAG);"
   238  		}
   239  	}
   240  
   241  	if msg != "" {
   242  		reviewResponse.Allowed = false
   243  	}
   244  
   245  	return msg
   246  }
   247  
   248  func validateJobUpdate(old, new *v1alpha1.Job) error {
   249  	var totalReplicas int32
   250  	for _, task := range new.Spec.Tasks {
   251  		if task.Replicas < 0 {
   252  			return fmt.Errorf("'replicas' must be >= 0 in task: %s", task.Name)
   253  		}
   254  
   255  		if task.MinAvailable != nil {
   256  			if *task.MinAvailable < 0 {
   257  				return fmt.Errorf("'minAvailable' must be >= 0 in task: %s", task.Name)
   258  			} else if *task.MinAvailable > task.Replicas {
   259  				return fmt.Errorf("'minAvailable' must be <= 'replicas' in task: %s", task.Name)
   260  			}
   261  		}
   262  
   263  		// count replicas
   264  		totalReplicas += task.Replicas
   265  	}
   266  	if new.Spec.MinAvailable > totalReplicas {
   267  		return fmt.Errorf("job 'minAvailable' must not be greater than total replicas")
   268  	}
   269  	if new.Spec.MinAvailable < 0 {
   270  		return fmt.Errorf("job 'minAvailable' must be >= 0")
   271  	}
   272  
   273  	if len(old.Spec.Tasks) != len(new.Spec.Tasks) {
   274  		return fmt.Errorf("job updates may not add or remove tasks")
   275  	}
   276  	// other fields under spec are not allowed to mutate
   277  	new.Spec.MinAvailable = old.Spec.MinAvailable
   278  	new.Spec.PriorityClassName = old.Spec.PriorityClassName
   279  	for i := range new.Spec.Tasks {
   280  		new.Spec.Tasks[i].Replicas = old.Spec.Tasks[i].Replicas
   281  		new.Spec.Tasks[i].MinAvailable = old.Spec.Tasks[i].MinAvailable
   282  	}
   283  
   284  	// job controller will update the pvc name if not provided
   285  	for i := range new.Spec.Volumes {
   286  		if new.Spec.Volumes[i].VolumeClaim != nil {
   287  			new.Spec.Volumes[i].VolumeClaimName = ""
   288  		}
   289  	}
   290  	for i := range old.Spec.Volumes {
   291  		if old.Spec.Volumes[i].VolumeClaim != nil {
   292  			old.Spec.Volumes[i].VolumeClaimName = ""
   293  		}
   294  	}
   295  
   296  	if !apiequality.Semantic.DeepEqual(new.Spec, old.Spec) {
   297  		return fmt.Errorf("job updates may not change fields other than `minAvailable`, `tasks[*].replicas under spec`")
   298  	}
   299  
   300  	return nil
   301  }
   302  
   303  func validateTaskTemplate(task v1alpha1.TaskSpec, job *v1alpha1.Job, index int) string {
   304  	var v1PodTemplate v1.PodTemplate
   305  	v1PodTemplate.Template = *task.Template.DeepCopy()
   306  	k8scorev1.SetObjectDefaults_PodTemplate(&v1PodTemplate)
   307  
   308  	var coreTemplateSpec k8score.PodTemplateSpec
   309  	k8scorev1.Convert_v1_PodTemplateSpec_To_core_PodTemplateSpec(&v1PodTemplate.Template, &coreTemplateSpec, nil)
   310  
   311  	corePodTemplate := k8score.PodTemplate{
   312  		ObjectMeta: metav1.ObjectMeta{
   313  			Name:      task.Name,
   314  			Namespace: job.Namespace,
   315  		},
   316  		Template: coreTemplateSpec,
   317  	}
   318  
   319  	opts := k8scorevalid.PodValidationOptions{}
   320  	if allErrs := k8scorevalid.ValidatePodTemplate(&corePodTemplate, opts); len(allErrs) > 0 {
   321  		msg := fmt.Sprintf("spec.task[%d].", index)
   322  		for index := range allErrs {
   323  			msg += allErrs[index].Error() + ". "
   324  		}
   325  		return msg
   326  	}
   327  
   328  	msg := validateTaskTopoPolicy(task, index)
   329  	if msg != "" {
   330  		return msg
   331  	}
   332  
   333  	return ""
   334  }
   335  
   336  func validateK8sPodNameLength(podName string) string {
   337  	if errMsgs := validation.IsQualifiedName(podName); len(errMsgs) > 0 {
   338  		return fmt.Sprintf("create pod with name %s validate failed %v;", podName, errMsgs)
   339  	}
   340  	return ""
   341  }
   342  
   343  func validateJobName(job *v1alpha1.Job) string {
   344  	if errMsgs := validation.IsQualifiedName(job.Name); len(errMsgs) > 0 {
   345  		return fmt.Sprintf("create job with name %s validate failed %v", job.Name, errMsgs)
   346  	}
   347  	return ""
   348  }
   349  
   350  func validateTaskTopoPolicy(task v1alpha1.TaskSpec, index int) string {
   351  	if task.TopologyPolicy == "" || task.TopologyPolicy == v1alpha1.None {
   352  		return ""
   353  	}
   354  
   355  	template := task.Template.DeepCopy()
   356  
   357  	for id, container := range template.Spec.Containers {
   358  		if len(container.Resources.Requests) == 0 {
   359  			template.Spec.Containers[id].Resources.Requests = container.Resources.Limits.DeepCopy()
   360  		}
   361  	}
   362  
   363  	for id, container := range template.Spec.InitContainers {
   364  		if len(container.Resources.Requests) == 0 {
   365  			template.Spec.InitContainers[id].Resources.Requests = container.Resources.Limits.DeepCopy()
   366  		}
   367  	}
   368  
   369  	for id, container := range append(template.Spec.Containers, template.Spec.InitContainers...) {
   370  		requestNum := guaranteedCPUs(container)
   371  		if requestNum == 0 {
   372  			return fmt.Sprintf("the cpu request isn't  an integer in spec.task[%d] container[%d].",
   373  				index, id)
   374  		}
   375  	}
   376  
   377  	return ""
   378  }
   379  
   380  func guaranteedCPUs(container v1.Container) int {
   381  	cpuQuantity := container.Resources.Requests[v1.ResourceCPU]
   382  	if cpuQuantity.Value()*1000 != cpuQuantity.MilliValue() {
   383  		return 0
   384  	}
   385  
   386  	return int(cpuQuantity.Value())
   387  }