github.com/kubeflow/training-operator@v1.7.0/pkg/controller.v1/control/pod_control.go (about) 1 // Copyright 2019 The Kubeflow Authors 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package control 16 17 import ( 18 "context" 19 "fmt" 20 "sync" 21 22 commonutil "github.com/kubeflow/training-operator/pkg/util" 23 24 v1 "k8s.io/api/core/v1" 25 "k8s.io/apimachinery/pkg/api/errors" 26 "k8s.io/apimachinery/pkg/api/meta" 27 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 28 "k8s.io/apimachinery/pkg/labels" 29 "k8s.io/apimachinery/pkg/runtime" 30 "k8s.io/apimachinery/pkg/types" 31 clientset "k8s.io/client-go/kubernetes" 32 "k8s.io/client-go/tools/record" 33 ) 34 35 // Reasons for pod events 36 const ( 37 // FailedCreatePodReason is added in an event and in a job condition 38 // when a pod for a replica set is failed to be created. 39 FailedCreatePodReason = "FailedCreatePod" 40 // SuccessfulCreatePodReason is added in an event when a pod for a job 41 // is successfully created. 42 SuccessfulCreatePodReason = "SuccessfulCreatePod" 43 // FailedDeletePodReason is added in an event and in a job condition 44 // when a pod for a replica set is failed to be deleted. 45 FailedDeletePodReason = "FailedDeletePod" 46 // SuccessfulDeletePodReason is added in an event when a pod for a job 47 // is successfully deleted. 48 SuccessfulDeletePodReason = "SuccessfulDeletePod" 49 ) 50 51 // PodControlInterface is an interface that knows how to add or delete pods 52 // created as an interface to allow testing. 53 type PodControlInterface interface { 54 // CreatePods creates new pods according to the spec. 55 CreatePods(namespace string, template *v1.PodTemplateSpec, object runtime.Object) error 56 // CreatePodsOnNode creates a new pod according to the spec on the specified node, 57 // and sets the ControllerRef. 58 CreatePodsOnNode(nodeName, namespace string, template *v1.PodTemplateSpec, object runtime.Object, controllerRef *metav1.OwnerReference) error 59 // CreatePodsWithControllerRef creates new pods according to the spec, and sets object as the pod's controller. 60 CreatePodsWithControllerRef(namespace string, template *v1.PodTemplateSpec, object runtime.Object, controllerRef *metav1.OwnerReference) error 61 // DeletePod deletes the pod identified by podID. 62 DeletePod(namespace string, podID string, object runtime.Object) error 63 // PatchPod patches the pod. 64 PatchPod(namespace, name string, data []byte) error 65 } 66 67 // RealPodControl is the default implementation of PodControlInterface. 68 type RealPodControl struct { 69 KubeClient clientset.Interface 70 Recorder record.EventRecorder 71 } 72 73 var _ PodControlInterface = &RealPodControl{} 74 75 func getPodsLabelSet(template *v1.PodTemplateSpec) labels.Set { 76 desiredLabels := make(labels.Set) 77 for k, v := range template.Labels { 78 desiredLabels[k] = v 79 } 80 return desiredLabels 81 } 82 83 func getPodsFinalizers(template *v1.PodTemplateSpec) []string { 84 desiredFinalizers := make([]string, len(template.Finalizers)) 85 copy(desiredFinalizers, template.Finalizers) 86 return desiredFinalizers 87 } 88 89 func getPodsAnnotationSet(template *v1.PodTemplateSpec) labels.Set { 90 desiredAnnotations := make(labels.Set) 91 for k, v := range template.Annotations { 92 desiredAnnotations[k] = v 93 } 94 return desiredAnnotations 95 } 96 97 func (r RealPodControl) CreatePods(namespace string, template *v1.PodTemplateSpec, object runtime.Object) error { 98 return r.createPods("", namespace, template, object, nil) 99 } 100 101 func (r RealPodControl) CreatePodsWithControllerRef(namespace string, template *v1.PodTemplateSpec, controllerObject runtime.Object, controllerRef *metav1.OwnerReference) error { 102 if err := ValidateControllerRef(controllerRef); err != nil { 103 return err 104 } 105 return r.createPods("", namespace, template, controllerObject, controllerRef) 106 } 107 108 func (r RealPodControl) CreatePodsOnNode(nodeName, namespace string, template *v1.PodTemplateSpec, object runtime.Object, controllerRef *metav1.OwnerReference) error { 109 if err := ValidateControllerRef(controllerRef); err != nil { 110 return err 111 } 112 return r.createPods(nodeName, namespace, template, object, controllerRef) 113 } 114 115 func (r RealPodControl) PatchPod(namespace, name string, data []byte) error { 116 _, err := r.KubeClient.CoreV1().Pods(namespace).Patch(context.TODO(), name, types.StrategicMergePatchType, data, metav1.PatchOptions{}) 117 return err 118 } 119 120 func GetPodFromTemplate(template *v1.PodTemplateSpec, parentObject runtime.Object, controllerRef *metav1.OwnerReference) (*v1.Pod, error) { 121 desiredLabels := getPodsLabelSet(template) 122 desiredFinalizers := getPodsFinalizers(template) 123 desiredAnnotations := getPodsAnnotationSet(template) 124 125 pod := &v1.Pod{ 126 ObjectMeta: metav1.ObjectMeta{ 127 Labels: desiredLabels, 128 Annotations: desiredAnnotations, 129 Name: template.Name, 130 Finalizers: desiredFinalizers, 131 }, 132 } 133 if controllerRef != nil { 134 pod.OwnerReferences = append(pod.OwnerReferences, *controllerRef) 135 } 136 pod.Spec = *template.Spec.DeepCopy() 137 return pod, nil 138 } 139 140 func (r RealPodControl) createPods(nodeName, namespace string, template *v1.PodTemplateSpec, object runtime.Object, controllerRef *metav1.OwnerReference) error { 141 pod, err := GetPodFromTemplate(template, object, controllerRef) 142 if err != nil { 143 return err 144 } 145 if len(nodeName) != 0 { 146 pod.Spec.NodeName = nodeName 147 } 148 if labels.Set(pod.Labels).AsSelectorPreValidated().Empty() { 149 return fmt.Errorf("unable to create pods, no labels") 150 } 151 logger := commonutil.LoggerForPod(pod, object.GetObjectKind().GroupVersionKind().Kind) 152 if newPod, err := r.KubeClient.CoreV1().Pods(namespace).Create(context.TODO(), pod, metav1.CreateOptions{}); err != nil { 153 r.Recorder.Eventf(object, v1.EventTypeWarning, FailedCreatePodReason, "Error creating: %v", err) 154 return err 155 } else { 156 accessor, err := meta.Accessor(object) 157 if err != nil { 158 logger.Errorf("parentObject does not have ObjectMeta, %v", err) 159 return nil 160 } 161 logger.Infof("Controller %v created pod %v", accessor.GetName(), newPod.Name) 162 r.Recorder.Eventf(object, v1.EventTypeNormal, SuccessfulCreatePodReason, "Created pod: %v", newPod.Name) 163 } 164 return nil 165 } 166 167 func (r RealPodControl) DeletePod(namespace string, podID string, object runtime.Object) error { 168 accessor, err := meta.Accessor(object) 169 if err != nil { 170 return fmt.Errorf("object does not have ObjectMeta, %v", err) 171 } 172 logger := commonutil.LoggerForJob(accessor) 173 pod, err := r.KubeClient.CoreV1().Pods(namespace).Get(context.TODO(), podID, metav1.GetOptions{}) 174 if err != nil { 175 if errors.IsNotFound(err) { 176 return nil 177 } 178 return err 179 } 180 if pod.DeletionTimestamp != nil { 181 logger.Infof("pod %s/%s is terminating, skip deleting", pod.Namespace, pod.Name) 182 return nil 183 } 184 logger.Infof("Controller %v deleting pod %v/%v", accessor.GetName(), namespace, podID) 185 // delete options 186 if err := r.KubeClient.CoreV1().Pods(namespace).Delete(context.TODO(), podID, metav1.DeleteOptions{}); err != nil { 187 r.Recorder.Eventf(object, v1.EventTypeWarning, FailedDeletePodReason, "Error deleting: %v", err) 188 return fmt.Errorf("unable to delete pods: %v", err) 189 } else { 190 r.Recorder.Eventf(object, v1.EventTypeNormal, SuccessfulDeletePodReason, "Deleted pod: %v", podID) 191 } 192 return nil 193 } 194 195 type FakePodControl struct { 196 sync.Mutex 197 Templates []v1.PodTemplateSpec 198 ControllerRefs []metav1.OwnerReference 199 DeletePodName []string 200 Patches [][]byte 201 Err error 202 CreateLimit int 203 CreateCallCount int 204 } 205 206 var _ PodControlInterface = &FakePodControl{} 207 208 func (f *FakePodControl) PatchPod(namespace, name string, data []byte) error { 209 f.Lock() 210 defer f.Unlock() 211 f.Patches = append(f.Patches, data) 212 if f.Err != nil { 213 return f.Err 214 } 215 return nil 216 } 217 218 func (f *FakePodControl) CreatePods(namespace string, spec *v1.PodTemplateSpec, object runtime.Object) error { 219 f.Lock() 220 defer f.Unlock() 221 f.CreateCallCount++ 222 if f.CreateLimit != 0 && f.CreateCallCount > f.CreateLimit { 223 return fmt.Errorf("not creating pod, limit %d already reached (create call %d)", f.CreateLimit, f.CreateCallCount) 224 } 225 f.Templates = append(f.Templates, *spec) 226 if f.Err != nil { 227 return f.Err 228 } 229 return nil 230 } 231 232 func (f *FakePodControl) CreatePodsWithControllerRef(namespace string, spec *v1.PodTemplateSpec, object runtime.Object, controllerRef *metav1.OwnerReference) error { 233 f.Lock() 234 defer f.Unlock() 235 f.CreateCallCount++ 236 if f.CreateLimit != 0 && f.CreateCallCount > f.CreateLimit { 237 return fmt.Errorf("not creating pod, limit %d already reached (create call %d)", f.CreateLimit, f.CreateCallCount) 238 } 239 f.Templates = append(f.Templates, *spec) 240 f.ControllerRefs = append(f.ControllerRefs, *controllerRef) 241 if f.Err != nil { 242 return f.Err 243 } 244 return nil 245 } 246 247 func (f *FakePodControl) CreatePodsOnNode(nodeName, namespace string, template *v1.PodTemplateSpec, object runtime.Object, controllerRef *metav1.OwnerReference) error { 248 f.Lock() 249 defer f.Unlock() 250 f.CreateCallCount++ 251 if f.CreateLimit != 0 && f.CreateCallCount > f.CreateLimit { 252 return fmt.Errorf("not creating pod, limit %d already reached (create call %d)", f.CreateLimit, f.CreateCallCount) 253 } 254 f.Templates = append(f.Templates, *template) 255 f.ControllerRefs = append(f.ControllerRefs, *controllerRef) 256 if f.Err != nil { 257 return f.Err 258 } 259 return nil 260 } 261 262 func (f *FakePodControl) DeletePod(namespace string, podID string, object runtime.Object) error { 263 f.Lock() 264 defer f.Unlock() 265 f.DeletePodName = append(f.DeletePodName, podID) 266 if f.Err != nil { 267 return f.Err 268 } 269 return nil 270 } 271 272 func (f *FakePodControl) Clear() { 273 f.Lock() 274 defer f.Unlock() 275 f.DeletePodName = []string{} 276 f.Templates = []v1.PodTemplateSpec{} 277 f.ControllerRefs = []metav1.OwnerReference{} 278 f.Patches = [][]byte{} 279 f.CreateLimit = 0 280 f.CreateCallCount = 0 281 }