sigs.k8s.io/kueue@v0.6.2/pkg/util/testingjobs/mxjob/wrappers.go (about) 1 /* 2 Copyright 2023 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package mxjob 18 19 import ( 20 kftraining "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1" 21 corev1 "k8s.io/api/core/v1" 22 "k8s.io/apimachinery/pkg/api/resource" 23 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 24 "k8s.io/apimachinery/pkg/types" 25 "k8s.io/utils/ptr" 26 27 "sigs.k8s.io/kueue/pkg/controller/constants" 28 ) 29 30 // MXJobWrapper wraps a Job. 31 type MXJobWrapper struct{ kftraining.MXJob } 32 33 // MakeMXJob creates a wrapper for a suspended job with a single container and parallelism=1. 34 func MakeMXJob(name, ns string) *MXJobWrapper { 35 return &MXJobWrapper{kftraining.MXJob{ 36 ObjectMeta: metav1.ObjectMeta{ 37 Name: name, 38 Namespace: ns, 39 Annotations: make(map[string]string, 1), 40 }, 41 Spec: kftraining.MXJobSpec{ 42 JobMode: kftraining.MXTrain, 43 RunPolicy: kftraining.RunPolicy{ 44 Suspend: ptr.To(true), 45 }, 46 MXReplicaSpecs: map[kftraining.ReplicaType]*kftraining.ReplicaSpec{ 47 kftraining.MXJobReplicaTypeScheduler: { 48 Replicas: ptr.To[int32](1), 49 Template: corev1.PodTemplateSpec{ 50 Spec: corev1.PodSpec{ 51 RestartPolicy: "Never", 52 Containers: []corev1.Container{ 53 { 54 Name: "c", 55 Image: "pause", 56 Command: []string{}, 57 Resources: corev1.ResourceRequirements{Requests: corev1.ResourceList{}}, 58 }, 59 }, 60 NodeSelector: map[string]string{}, 61 }, 62 }, 63 }, 64 kftraining.MXJobReplicaTypeServer: { 65 Replicas: ptr.To[int32](1), 66 Template: corev1.PodTemplateSpec{ 67 Spec: corev1.PodSpec{ 68 RestartPolicy: "Never", 69 Containers: []corev1.Container{ 70 { 71 Name: "c", 72 Image: "pause", 73 Command: []string{}, 74 Resources: corev1.ResourceRequirements{Requests: corev1.ResourceList{}}, 75 }, 76 }, 77 NodeSelector: map[string]string{}, 78 }, 79 }, 80 }, 81 kftraining.MXJobReplicaTypeWorker: { 82 Replicas: ptr.To[int32](1), 83 Template: corev1.PodTemplateSpec{ 84 Spec: corev1.PodSpec{ 85 RestartPolicy: "Never", 86 Containers: []corev1.Container{ 87 { 88 Name: "c", 89 Image: "pause", 90 Command: []string{}, 91 Resources: corev1.ResourceRequirements{Requests: corev1.ResourceList{}}, 92 }, 93 }, 94 NodeSelector: map[string]string{}, 95 }, 96 }, 97 }, 98 }, 99 }, 100 }} 101 } 102 103 // PriorityClass updates job priorityclass. 104 func (j *MXJobWrapper) PriorityClass(pc string) *MXJobWrapper { 105 if j.Spec.RunPolicy.SchedulingPolicy == nil { 106 j.Spec.RunPolicy.SchedulingPolicy = &kftraining.SchedulingPolicy{} 107 } 108 j.Spec.RunPolicy.SchedulingPolicy.PriorityClass = pc 109 return j 110 } 111 112 // WorkloadPriorityClass updates job workloadpriorityclass. 113 func (j *MXJobWrapper) WorkloadPriorityClass(wpc string) *MXJobWrapper { 114 if j.Labels == nil { 115 j.Labels = make(map[string]string) 116 } 117 j.Labels[constants.WorkloadPriorityClassLabel] = wpc 118 return j 119 } 120 121 // Obj returns the inner Job. 122 func (j *MXJobWrapper) Obj() *kftraining.MXJob { 123 return &j.MXJob 124 } 125 126 // Queue updates the queue name of the job. 127 func (j *MXJobWrapper) Queue(queue string) *MXJobWrapper { 128 if j.Labels == nil { 129 j.Labels = make(map[string]string) 130 } 131 j.Labels[constants.QueueLabel] = queue 132 return j 133 } 134 135 // Request adds a resource request to the default container. 136 func (j *MXJobWrapper) Request(replicaType kftraining.ReplicaType, r corev1.ResourceName, v string) *MXJobWrapper { 137 j.Spec.MXReplicaSpecs[replicaType].Template.Spec.Containers[0].Resources.Requests[r] = resource.MustParse(v) 138 return j 139 } 140 141 // Image updates images of the job. 142 func (j *MXJobWrapper) Image(image string) *MXJobWrapper { 143 j.Spec.MXReplicaSpecs[kftraining.MXJobReplicaTypeScheduler].Template.Spec.Containers[0].Image = image 144 j.Spec.MXReplicaSpecs[kftraining.MXJobReplicaTypeServer].Template.Spec.Containers[0].Image = image 145 j.Spec.MXReplicaSpecs[kftraining.MXJobReplicaTypeWorker].Template.Spec.Containers[0].Image = image 146 return j 147 } 148 149 // Args updates args of the job. 150 func (j *MXJobWrapper) Args(args []string) *MXJobWrapper { 151 j.Spec.MXReplicaSpecs[kftraining.MXJobReplicaTypeScheduler].Template.Spec.Containers[0].Args = args 152 j.Spec.MXReplicaSpecs[kftraining.MXJobReplicaTypeServer].Template.Spec.Containers[0].Args = args 153 j.Spec.MXReplicaSpecs[kftraining.MXJobReplicaTypeWorker].Template.Spec.Containers[0].Args = args 154 return j 155 } 156 157 // Parallelism updates job parallelism. 158 func (j *MXJobWrapper) Parallelism(workerParallelism, psParallelism int32) *MXJobWrapper { 159 j.Spec.MXReplicaSpecs[kftraining.MXJobReplicaTypeWorker].Replicas = ptr.To(workerParallelism) 160 j.Spec.MXReplicaSpecs[kftraining.MXJobReplicaTypeServer].Replicas = ptr.To(psParallelism) 161 return j 162 } 163 164 // Suspend updates the suspend status of the job. 165 func (j *MXJobWrapper) Suspend(s bool) *MXJobWrapper { 166 j.Spec.RunPolicy.Suspend = &s 167 return j 168 } 169 170 // UID updates the uid of the job. 171 func (j *MXJobWrapper) UID(uid string) *MXJobWrapper { 172 j.ObjectMeta.UID = types.UID(uid) 173 return j 174 } 175 176 // NodeSelector updates the nodeSelector of job. 177 func (j *MXJobWrapper) NodeSelector(k, v string) *MXJobWrapper { 178 return j.RoleNodeSelector(kftraining.MXJobReplicaTypeServer, k, v). 179 RoleNodeSelector(kftraining.MXJobReplicaTypeWorker, k, v) 180 } 181 182 // NodeSelector updates the nodeSelector of job. 183 func (j *MXJobWrapper) RoleNodeSelector(role kftraining.ReplicaType, k, v string) *MXJobWrapper { 184 if j.Spec.MXReplicaSpecs[role].Template.Spec.NodeSelector == nil { 185 j.Spec.MXReplicaSpecs[role].Template.Spec.NodeSelector = make(map[string]string) 186 } 187 j.Spec.MXReplicaSpecs[role].Template.Spec.NodeSelector[k] = v 188 return j 189 } 190 191 // Active updates the replicaStatus for Active of job. 192 func (j *MXJobWrapper) Active(rType kftraining.ReplicaType, c int32) *MXJobWrapper { 193 if j.Status.ReplicaStatuses == nil { 194 j.Status.ReplicaStatuses = make(map[kftraining.ReplicaType]*kftraining.ReplicaStatus) 195 } 196 j.Status.ReplicaStatuses[rType] = &kftraining.ReplicaStatus{ 197 Active: c, 198 } 199 return j 200 } 201 202 // StatusConditions updates status conditions of the MXJob. 203 func (j *MXJobWrapper) StatusConditions(conditions ...kftraining.JobCondition) *MXJobWrapper { 204 j.Status.Conditions = conditions 205 return j 206 }