sigs.k8s.io/kueue@v0.6.2/pkg/util/testingjobs/tfjob/wrappers_tfjob.go (about) 1 /* 2 Copyright 2023 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package testing 18 19 import ( 20 kftraining "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1" 21 corev1 "k8s.io/api/core/v1" 22 "k8s.io/apimachinery/pkg/api/resource" 23 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 24 "k8s.io/apimachinery/pkg/types" 25 "k8s.io/utils/ptr" 26 27 "sigs.k8s.io/kueue/pkg/controller/constants" 28 ) 29 30 // TFJobWrapper wraps a Job. 31 type TFJobWrapper struct{ kftraining.TFJob } 32 33 // MakeTFJob creates a wrapper for a suspended job with a single container and parallelism=1. 34 func MakeTFJob(name, ns string) *TFJobWrapper { 35 return &TFJobWrapper{kftraining.TFJob{ 36 ObjectMeta: metav1.ObjectMeta{ 37 Name: name, 38 Namespace: ns, 39 Annotations: make(map[string]string, 1), 40 }, 41 Spec: kftraining.TFJobSpec{ 42 RunPolicy: kftraining.RunPolicy{ 43 Suspend: ptr.To(true), 44 }, 45 TFReplicaSpecs: map[kftraining.ReplicaType]*kftraining.ReplicaSpec{ 46 kftraining.TFJobReplicaTypeChief: { 47 Replicas: ptr.To[int32](1), 48 Template: corev1.PodTemplateSpec{ 49 Spec: corev1.PodSpec{ 50 RestartPolicy: "Never", 51 Containers: []corev1.Container{ 52 { 53 Name: "c", 54 Image: "pause", 55 Command: []string{}, 56 Resources: corev1.ResourceRequirements{Requests: corev1.ResourceList{}}, 57 }, 58 }, 59 NodeSelector: map[string]string{}, 60 }, 61 }, 62 }, 63 kftraining.TFJobReplicaTypePS: { 64 Replicas: ptr.To[int32](1), 65 Template: corev1.PodTemplateSpec{ 66 Spec: corev1.PodSpec{ 67 RestartPolicy: "Never", 68 Containers: []corev1.Container{ 69 { 70 Name: "c", 71 Image: "pause", 72 Command: []string{}, 73 Resources: corev1.ResourceRequirements{Requests: corev1.ResourceList{}}, 74 }, 75 }, 76 NodeSelector: map[string]string{}, 77 }, 78 }, 79 }, 80 kftraining.TFJobReplicaTypeWorker: { 81 Replicas: ptr.To[int32](1), 82 Template: corev1.PodTemplateSpec{ 83 Spec: corev1.PodSpec{ 84 RestartPolicy: "Never", 85 Containers: []corev1.Container{ 86 { 87 Name: "c", 88 Image: "pause", 89 Command: []string{}, 90 Resources: corev1.ResourceRequirements{Requests: corev1.ResourceList{}}, 91 }, 92 }, 93 NodeSelector: map[string]string{}, 94 }, 95 }, 96 }, 97 }, 98 }, 99 }} 100 } 101 102 // PriorityClass updates job priorityclass. 103 func (j *TFJobWrapper) PriorityClass(pc string) *TFJobWrapper { 104 if j.Spec.RunPolicy.SchedulingPolicy == nil { 105 j.Spec.RunPolicy.SchedulingPolicy = &kftraining.SchedulingPolicy{} 106 } 107 j.Spec.RunPolicy.SchedulingPolicy.PriorityClass = pc 108 return j 109 } 110 111 // WorkloadPriorityClass updates job workloadpriorityclass. 112 func (j *TFJobWrapper) WorkloadPriorityClass(wpc string) *TFJobWrapper { 113 if j.Labels == nil { 114 j.Labels = make(map[string]string) 115 } 116 j.Labels[constants.WorkloadPriorityClassLabel] = wpc 117 return j 118 } 119 120 // Obj returns the inner Job. 121 func (j *TFJobWrapper) Obj() *kftraining.TFJob { 122 return &j.TFJob 123 } 124 125 // Queue updates the queue name of the job. 126 func (j *TFJobWrapper) Queue(queue string) *TFJobWrapper { 127 if j.Labels == nil { 128 j.Labels = make(map[string]string) 129 } 130 j.Labels[constants.QueueLabel] = queue 131 return j 132 } 133 134 // Request adds a resource request to the default container. 135 func (j *TFJobWrapper) Request(replicaType kftraining.ReplicaType, r corev1.ResourceName, v string) *TFJobWrapper { 136 j.Spec.TFReplicaSpecs[replicaType].Template.Spec.Containers[0].Resources.Requests[r] = resource.MustParse(v) 137 return j 138 } 139 140 // Parallelism updates job parallelism. 141 func (j *TFJobWrapper) Parallelism(workerParallelism, psParallelism int32) *TFJobWrapper { 142 j.Spec.TFReplicaSpecs[kftraining.TFJobReplicaTypeWorker].Replicas = ptr.To(workerParallelism) 143 j.Spec.TFReplicaSpecs[kftraining.TFJobReplicaTypePS].Replicas = ptr.To(psParallelism) 144 return j 145 } 146 147 // Suspend updates the suspend status of the job. 148 func (j *TFJobWrapper) Suspend(s bool) *TFJobWrapper { 149 j.Spec.RunPolicy.Suspend = &s 150 return j 151 } 152 153 // UID updates the uid of the job. 154 func (j *TFJobWrapper) UID(uid string) *TFJobWrapper { 155 j.ObjectMeta.UID = types.UID(uid) 156 return j 157 }