github.com/kubeflow/training-operator@v1.7.0/pkg/controller.v1/common/job_test.go (about) 1 /* 2 Copyright 2023 The Kubeflow Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package common 18 19 import ( 20 "context" 21 "testing" 22 "time" 23 24 "github.com/google/go-cmp/cmp" 25 apiv1 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1" 26 "github.com/kubeflow/training-operator/pkg/controller.v1/control" 27 testjobv1 "github.com/kubeflow/training-operator/test_job/apis/test_job/v1" 28 corev1 "k8s.io/api/core/v1" 29 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 30 "k8s.io/apimachinery/pkg/runtime" 31 "k8s.io/client-go/kubernetes/fake" 32 "k8s.io/client-go/tools/record" 33 ) 34 35 func TestDeletePodsAndServices(T *testing.T) { 36 pods := []runtime.Object{ 37 newPod("runningPod", corev1.PodRunning), 38 newPod("succeededPod", corev1.PodSucceeded), 39 } 40 services := []runtime.Object{ 41 newService("runningPod"), 42 newService("succeededPod"), 43 } 44 45 cases := map[string]struct { 46 cleanPodPolicy apiv1.CleanPodPolicy 47 jobCondition apiv1.JobConditionType 48 wantPods *corev1.PodList 49 wantService *corev1.ServiceList 50 }{ 51 "Succeeded job and cleanPodPolicy is Running": { 52 cleanPodPolicy: apiv1.CleanPodPolicyRunning, 53 jobCondition: apiv1.JobSucceeded, 54 wantPods: &corev1.PodList{ 55 Items: []corev1.Pod{ 56 *pods[1].(*corev1.Pod), 57 }, 58 }, 59 wantService: &corev1.ServiceList{ 60 Items: []corev1.Service{ 61 *services[1].(*corev1.Service), 62 }, 63 }, 64 }, 65 "Suspended job and cleanPodPolicy is Running": { 66 cleanPodPolicy: apiv1.CleanPodPolicyRunning, 67 jobCondition: apiv1.JobSuspended, 68 wantPods: &corev1.PodList{}, 69 wantService: &corev1.ServiceList{}, 70 }, 71 "Finished job and cleanPodPolicy is All": { 72 cleanPodPolicy: apiv1.CleanPodPolicyAll, 73 jobCondition: apiv1.JobSucceeded, 74 wantPods: &corev1.PodList{}, 75 wantService: &corev1.ServiceList{}, 76 }, 77 "Finished job and cleanPodPolicy is None": { 78 cleanPodPolicy: apiv1.CleanPodPolicyNone, 79 jobCondition: apiv1.JobFailed, 80 wantPods: &corev1.PodList{ 81 Items: []corev1.Pod{ 82 *pods[0].(*corev1.Pod), 83 *pods[1].(*corev1.Pod), 84 }, 85 }, 86 wantService: &corev1.ServiceList{ 87 Items: []corev1.Service{ 88 *services[0].(*corev1.Service), 89 *services[1].(*corev1.Service), 90 }, 91 }, 92 }, 93 "Suspended job and cleanPodPolicy is None": { 94 cleanPodPolicy: apiv1.CleanPodPolicyNone, 95 jobCondition: apiv1.JobSuspended, 96 wantPods: &corev1.PodList{}, 97 wantService: &corev1.ServiceList{}, 98 }, 99 } 100 for name, tc := range cases { 101 T.Run(name, func(t *testing.T) { 102 fakeClient := fake.NewSimpleClientset(append(pods, services...)...) 103 jobController := JobController{ 104 PodControl: control.RealPodControl{KubeClient: fakeClient, Recorder: &record.FakeRecorder{}}, 105 ServiceControl: control.RealServiceControl{KubeClient: fakeClient, Recorder: &record.FakeRecorder{}}, 106 } 107 108 var inPods []*corev1.Pod 109 for i := range pods { 110 inPods = append(inPods, pods[i].(*corev1.Pod)) 111 } 112 runPolicy := &apiv1.RunPolicy{ 113 CleanPodPolicy: &tc.cleanPodPolicy, 114 } 115 jobStatus := apiv1.JobStatus{ 116 Conditions: []apiv1.JobCondition{ 117 { 118 Type: tc.jobCondition, 119 Status: corev1.ConditionTrue, 120 }, 121 }, 122 } 123 if err := jobController.DeletePodsAndServices(&testjobv1.TestJob{}, runPolicy, jobStatus, inPods); err != nil { 124 T.Errorf("Failed to delete pods and services: %v", err) 125 } 126 gotPods, err := fakeClient.CoreV1().Pods("").List(context.Background(), metav1.ListOptions{}) 127 if err != nil { 128 t.Errorf("Failed to list pods: %v", err) 129 } 130 if diff := cmp.Diff(tc.wantPods, gotPods); len(diff) != 0 { 131 t.Errorf("Unexpected pods after running DeletePodsAndServices (-want,+got):%s\n", diff) 132 } 133 gotServices, err := fakeClient.CoreV1().Services("").List(context.Background(), metav1.ListOptions{}) 134 if err != nil { 135 t.Errorf("Failed to list services: %v", err) 136 } 137 if diff := cmp.Diff(tc.wantService, gotServices); len(diff) != 0 { 138 t.Errorf("Unexpected services after running DeletePodsAndServices (-want,+got):%s\n", diff) 139 } 140 }) 141 } 142 } 143 144 func TestPastBackoffLimit(T *testing.T) { 145 backoffLimitExceededPod := newPod("runningPodWithBackoff", corev1.PodRunning) 146 backoffLimitExceededPod.Status.ContainerStatuses = []corev1.ContainerStatus{ 147 {RestartCount: 3}, 148 } 149 allPods := []*corev1.Pod{ 150 newPod("runningPod", corev1.PodRunning), 151 newPod("succeededPod", corev1.PodSucceeded), 152 backoffLimitExceededPod, 153 } 154 cases := map[string]struct { 155 pods []*corev1.Pod 156 backOffLimit int32 157 wantPastBackOffLimit bool 158 }{ 159 "backOffLimit is 0": { 160 pods: allPods[:2], 161 backOffLimit: 0, 162 wantPastBackOffLimit: false, 163 }, 164 "backOffLimit is 3": { 165 pods: allPods, 166 backOffLimit: 3, 167 wantPastBackOffLimit: true, 168 }, 169 } 170 for name, tc := range cases { 171 T.Run(name, func(t *testing.T) { 172 jobController := JobController{} 173 runPolicy := &apiv1.RunPolicy{ 174 BackoffLimit: &tc.backOffLimit, 175 } 176 replica := map[apiv1.ReplicaType]*apiv1.ReplicaSpec{ 177 "test": {RestartPolicy: apiv1.RestartPolicyOnFailure}, 178 } 179 got, err := jobController.PastBackoffLimit("test-job", runPolicy, replica, tc.pods) 180 if err != nil { 181 t.Errorf("Failaed to do PastBackoffLimit: %v", err) 182 } 183 if tc.wantPastBackOffLimit != got { 184 t.Errorf("Unexpected pastBackoffLimit: \nwant: %v\ngot: %v\n", tc.wantPastBackOffLimit, got) 185 } 186 }) 187 } 188 } 189 190 func TestPastActiveDeadline(T *testing.T) { 191 cases := map[string]struct { 192 activeDeadlineSeconds int64 193 wantPastActiveDeadlineSeconds bool 194 }{ 195 "activeDeadlineSeconds is 0": { 196 activeDeadlineSeconds: 0, 197 wantPastActiveDeadlineSeconds: true, 198 }, 199 "activeDeadlineSeconds is 2": { 200 activeDeadlineSeconds: 2, 201 wantPastActiveDeadlineSeconds: false, 202 }, 203 } 204 for name, tc := range cases { 205 T.Run(name, func(t *testing.T) { 206 jobController := JobController{} 207 runPolicy := &apiv1.RunPolicy{ 208 ActiveDeadlineSeconds: &tc.activeDeadlineSeconds, 209 } 210 jobStatus := apiv1.JobStatus{ 211 StartTime: &metav1.Time{ 212 Time: time.Now(), 213 }, 214 } 215 if got := jobController.PastActiveDeadline(runPolicy, jobStatus); tc.wantPastActiveDeadlineSeconds != got { 216 t.Errorf("Unexpected PastActiveDeadline: \nwant: %v\ngot: %v\n", tc.wantPastActiveDeadlineSeconds, got) 217 } 218 }) 219 } 220 } 221 222 func newPod(name string, phase corev1.PodPhase) *corev1.Pod { 223 pod := &corev1.Pod{ 224 ObjectMeta: metav1.ObjectMeta{ 225 Name: name, 226 Labels: map[string]string{ 227 apiv1.ReplicaTypeLabel: "test", 228 }, 229 }, 230 Status: corev1.PodStatus{ 231 Phase: phase, 232 }, 233 } 234 return pod 235 } 236 237 func newService(name string) *corev1.Service { 238 service := &corev1.Service{ 239 ObjectMeta: metav1.ObjectMeta{ 240 Name: name, 241 }, 242 } 243 return service 244 }