sigs.k8s.io/kueue@v0.6.2/pkg/util/testingjobs/mxjob/wrappers.go (about)

     1  /*
     2  Copyright 2023 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package mxjob
    18  
    19  import (
    20  	kftraining "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1"
    21  	corev1 "k8s.io/api/core/v1"
    22  	"k8s.io/apimachinery/pkg/api/resource"
    23  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    24  	"k8s.io/apimachinery/pkg/types"
    25  	"k8s.io/utils/ptr"
    26  
    27  	"sigs.k8s.io/kueue/pkg/controller/constants"
    28  )
    29  
    30  // MXJobWrapper wraps a Job.
    31  type MXJobWrapper struct{ kftraining.MXJob }
    32  
    33  // MakeMXJob creates a wrapper for a suspended job with a single container and parallelism=1.
    34  func MakeMXJob(name, ns string) *MXJobWrapper {
    35  	return &MXJobWrapper{kftraining.MXJob{
    36  		ObjectMeta: metav1.ObjectMeta{
    37  			Name:        name,
    38  			Namespace:   ns,
    39  			Annotations: make(map[string]string, 1),
    40  		},
    41  		Spec: kftraining.MXJobSpec{
    42  			JobMode: kftraining.MXTrain,
    43  			RunPolicy: kftraining.RunPolicy{
    44  				Suspend: ptr.To(true),
    45  			},
    46  			MXReplicaSpecs: map[kftraining.ReplicaType]*kftraining.ReplicaSpec{
    47  				kftraining.MXJobReplicaTypeScheduler: {
    48  					Replicas: ptr.To[int32](1),
    49  					Template: corev1.PodTemplateSpec{
    50  						Spec: corev1.PodSpec{
    51  							RestartPolicy: "Never",
    52  							Containers: []corev1.Container{
    53  								{
    54  									Name:      "c",
    55  									Image:     "pause",
    56  									Command:   []string{},
    57  									Resources: corev1.ResourceRequirements{Requests: corev1.ResourceList{}},
    58  								},
    59  							},
    60  							NodeSelector: map[string]string{},
    61  						},
    62  					},
    63  				},
    64  				kftraining.MXJobReplicaTypeServer: {
    65  					Replicas: ptr.To[int32](1),
    66  					Template: corev1.PodTemplateSpec{
    67  						Spec: corev1.PodSpec{
    68  							RestartPolicy: "Never",
    69  							Containers: []corev1.Container{
    70  								{
    71  									Name:      "c",
    72  									Image:     "pause",
    73  									Command:   []string{},
    74  									Resources: corev1.ResourceRequirements{Requests: corev1.ResourceList{}},
    75  								},
    76  							},
    77  							NodeSelector: map[string]string{},
    78  						},
    79  					},
    80  				},
    81  				kftraining.MXJobReplicaTypeWorker: {
    82  					Replicas: ptr.To[int32](1),
    83  					Template: corev1.PodTemplateSpec{
    84  						Spec: corev1.PodSpec{
    85  							RestartPolicy: "Never",
    86  							Containers: []corev1.Container{
    87  								{
    88  									Name:      "c",
    89  									Image:     "pause",
    90  									Command:   []string{},
    91  									Resources: corev1.ResourceRequirements{Requests: corev1.ResourceList{}},
    92  								},
    93  							},
    94  							NodeSelector: map[string]string{},
    95  						},
    96  					},
    97  				},
    98  			},
    99  		},
   100  	}}
   101  }
   102  
   103  // PriorityClass updates job priorityclass.
   104  func (j *MXJobWrapper) PriorityClass(pc string) *MXJobWrapper {
   105  	if j.Spec.RunPolicy.SchedulingPolicy == nil {
   106  		j.Spec.RunPolicy.SchedulingPolicy = &kftraining.SchedulingPolicy{}
   107  	}
   108  	j.Spec.RunPolicy.SchedulingPolicy.PriorityClass = pc
   109  	return j
   110  }
   111  
   112  // WorkloadPriorityClass updates job workloadpriorityclass.
   113  func (j *MXJobWrapper) WorkloadPriorityClass(wpc string) *MXJobWrapper {
   114  	if j.Labels == nil {
   115  		j.Labels = make(map[string]string)
   116  	}
   117  	j.Labels[constants.WorkloadPriorityClassLabel] = wpc
   118  	return j
   119  }
   120  
   121  // Obj returns the inner Job.
   122  func (j *MXJobWrapper) Obj() *kftraining.MXJob {
   123  	return &j.MXJob
   124  }
   125  
   126  // Queue updates the queue name of the job.
   127  func (j *MXJobWrapper) Queue(queue string) *MXJobWrapper {
   128  	if j.Labels == nil {
   129  		j.Labels = make(map[string]string)
   130  	}
   131  	j.Labels[constants.QueueLabel] = queue
   132  	return j
   133  }
   134  
   135  // Request adds a resource request to the default container.
   136  func (j *MXJobWrapper) Request(replicaType kftraining.ReplicaType, r corev1.ResourceName, v string) *MXJobWrapper {
   137  	j.Spec.MXReplicaSpecs[replicaType].Template.Spec.Containers[0].Resources.Requests[r] = resource.MustParse(v)
   138  	return j
   139  }
   140  
   141  // Image updates images of the job.
   142  func (j *MXJobWrapper) Image(image string) *MXJobWrapper {
   143  	j.Spec.MXReplicaSpecs[kftraining.MXJobReplicaTypeScheduler].Template.Spec.Containers[0].Image = image
   144  	j.Spec.MXReplicaSpecs[kftraining.MXJobReplicaTypeServer].Template.Spec.Containers[0].Image = image
   145  	j.Spec.MXReplicaSpecs[kftraining.MXJobReplicaTypeWorker].Template.Spec.Containers[0].Image = image
   146  	return j
   147  }
   148  
   149  // Args updates args of the job.
   150  func (j *MXJobWrapper) Args(args []string) *MXJobWrapper {
   151  	j.Spec.MXReplicaSpecs[kftraining.MXJobReplicaTypeScheduler].Template.Spec.Containers[0].Args = args
   152  	j.Spec.MXReplicaSpecs[kftraining.MXJobReplicaTypeServer].Template.Spec.Containers[0].Args = args
   153  	j.Spec.MXReplicaSpecs[kftraining.MXJobReplicaTypeWorker].Template.Spec.Containers[0].Args = args
   154  	return j
   155  }
   156  
   157  // Parallelism updates job parallelism.
   158  func (j *MXJobWrapper) Parallelism(workerParallelism, psParallelism int32) *MXJobWrapper {
   159  	j.Spec.MXReplicaSpecs[kftraining.MXJobReplicaTypeWorker].Replicas = ptr.To(workerParallelism)
   160  	j.Spec.MXReplicaSpecs[kftraining.MXJobReplicaTypeServer].Replicas = ptr.To(psParallelism)
   161  	return j
   162  }
   163  
   164  // Suspend updates the suspend status of the job.
   165  func (j *MXJobWrapper) Suspend(s bool) *MXJobWrapper {
   166  	j.Spec.RunPolicy.Suspend = &s
   167  	return j
   168  }
   169  
   170  // UID updates the uid of the job.
   171  func (j *MXJobWrapper) UID(uid string) *MXJobWrapper {
   172  	j.ObjectMeta.UID = types.UID(uid)
   173  	return j
   174  }
   175  
   176  // NodeSelector updates the nodeSelector of job.
   177  func (j *MXJobWrapper) NodeSelector(k, v string) *MXJobWrapper {
   178  	return j.RoleNodeSelector(kftraining.MXJobReplicaTypeServer, k, v).
   179  		RoleNodeSelector(kftraining.MXJobReplicaTypeWorker, k, v)
   180  }
   181  
   182  // NodeSelector updates the nodeSelector of job.
   183  func (j *MXJobWrapper) RoleNodeSelector(role kftraining.ReplicaType, k, v string) *MXJobWrapper {
   184  	if j.Spec.MXReplicaSpecs[role].Template.Spec.NodeSelector == nil {
   185  		j.Spec.MXReplicaSpecs[role].Template.Spec.NodeSelector = make(map[string]string)
   186  	}
   187  	j.Spec.MXReplicaSpecs[role].Template.Spec.NodeSelector[k] = v
   188  	return j
   189  }
   190  
   191  // Active updates the replicaStatus for Active of job.
   192  func (j *MXJobWrapper) Active(rType kftraining.ReplicaType, c int32) *MXJobWrapper {
   193  	if j.Status.ReplicaStatuses == nil {
   194  		j.Status.ReplicaStatuses = make(map[kftraining.ReplicaType]*kftraining.ReplicaStatus)
   195  	}
   196  	j.Status.ReplicaStatuses[rType] = &kftraining.ReplicaStatus{
   197  		Active: c,
   198  	}
   199  	return j
   200  }
   201  
   202  // StatusConditions updates status conditions of the MXJob.
   203  func (j *MXJobWrapper) StatusConditions(conditions ...kftraining.JobCondition) *MXJobWrapper {
   204  	j.Status.Conditions = conditions
   205  	return j
   206  }