github.com/kubeflow/training-operator@v1.7.0/pkg/controller.v1/common/job_test.go (about)

     1  /*
     2  Copyright 2023 The Kubeflow Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package common
    18  
    19  import (
    20  	"context"
    21  	"testing"
    22  	"time"
    23  
    24  	"github.com/google/go-cmp/cmp"
    25  	apiv1 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1"
    26  	"github.com/kubeflow/training-operator/pkg/controller.v1/control"
    27  	testjobv1 "github.com/kubeflow/training-operator/test_job/apis/test_job/v1"
    28  	corev1 "k8s.io/api/core/v1"
    29  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    30  	"k8s.io/apimachinery/pkg/runtime"
    31  	"k8s.io/client-go/kubernetes/fake"
    32  	"k8s.io/client-go/tools/record"
    33  )
    34  
    35  func TestDeletePodsAndServices(T *testing.T) {
    36  	pods := []runtime.Object{
    37  		newPod("runningPod", corev1.PodRunning),
    38  		newPod("succeededPod", corev1.PodSucceeded),
    39  	}
    40  	services := []runtime.Object{
    41  		newService("runningPod"),
    42  		newService("succeededPod"),
    43  	}
    44  
    45  	cases := map[string]struct {
    46  		cleanPodPolicy apiv1.CleanPodPolicy
    47  		jobCondition   apiv1.JobConditionType
    48  		wantPods       *corev1.PodList
    49  		wantService    *corev1.ServiceList
    50  	}{
    51  		"Succeeded job and cleanPodPolicy is Running": {
    52  			cleanPodPolicy: apiv1.CleanPodPolicyRunning,
    53  			jobCondition:   apiv1.JobSucceeded,
    54  			wantPods: &corev1.PodList{
    55  				Items: []corev1.Pod{
    56  					*pods[1].(*corev1.Pod),
    57  				},
    58  			},
    59  			wantService: &corev1.ServiceList{
    60  				Items: []corev1.Service{
    61  					*services[1].(*corev1.Service),
    62  				},
    63  			},
    64  		},
    65  		"Suspended job and cleanPodPolicy is Running": {
    66  			cleanPodPolicy: apiv1.CleanPodPolicyRunning,
    67  			jobCondition:   apiv1.JobSuspended,
    68  			wantPods:       &corev1.PodList{},
    69  			wantService:    &corev1.ServiceList{},
    70  		},
    71  		"Finished job and cleanPodPolicy is All": {
    72  			cleanPodPolicy: apiv1.CleanPodPolicyAll,
    73  			jobCondition:   apiv1.JobSucceeded,
    74  			wantPods:       &corev1.PodList{},
    75  			wantService:    &corev1.ServiceList{},
    76  		},
    77  		"Finished job and cleanPodPolicy is None": {
    78  			cleanPodPolicy: apiv1.CleanPodPolicyNone,
    79  			jobCondition:   apiv1.JobFailed,
    80  			wantPods: &corev1.PodList{
    81  				Items: []corev1.Pod{
    82  					*pods[0].(*corev1.Pod),
    83  					*pods[1].(*corev1.Pod),
    84  				},
    85  			},
    86  			wantService: &corev1.ServiceList{
    87  				Items: []corev1.Service{
    88  					*services[0].(*corev1.Service),
    89  					*services[1].(*corev1.Service),
    90  				},
    91  			},
    92  		},
    93  		"Suspended job and cleanPodPolicy is None": {
    94  			cleanPodPolicy: apiv1.CleanPodPolicyNone,
    95  			jobCondition:   apiv1.JobSuspended,
    96  			wantPods:       &corev1.PodList{},
    97  			wantService:    &corev1.ServiceList{},
    98  		},
    99  	}
   100  	for name, tc := range cases {
   101  		T.Run(name, func(t *testing.T) {
   102  			fakeClient := fake.NewSimpleClientset(append(pods, services...)...)
   103  			jobController := JobController{
   104  				PodControl:     control.RealPodControl{KubeClient: fakeClient, Recorder: &record.FakeRecorder{}},
   105  				ServiceControl: control.RealServiceControl{KubeClient: fakeClient, Recorder: &record.FakeRecorder{}},
   106  			}
   107  
   108  			var inPods []*corev1.Pod
   109  			for i := range pods {
   110  				inPods = append(inPods, pods[i].(*corev1.Pod))
   111  			}
   112  			runPolicy := &apiv1.RunPolicy{
   113  				CleanPodPolicy: &tc.cleanPodPolicy,
   114  			}
   115  			jobStatus := apiv1.JobStatus{
   116  				Conditions: []apiv1.JobCondition{
   117  					{
   118  						Type:   tc.jobCondition,
   119  						Status: corev1.ConditionTrue,
   120  					},
   121  				},
   122  			}
   123  			if err := jobController.DeletePodsAndServices(&testjobv1.TestJob{}, runPolicy, jobStatus, inPods); err != nil {
   124  				T.Errorf("Failed to delete pods and services: %v", err)
   125  			}
   126  			gotPods, err := fakeClient.CoreV1().Pods("").List(context.Background(), metav1.ListOptions{})
   127  			if err != nil {
   128  				t.Errorf("Failed to list pods: %v", err)
   129  			}
   130  			if diff := cmp.Diff(tc.wantPods, gotPods); len(diff) != 0 {
   131  				t.Errorf("Unexpected pods after running DeletePodsAndServices (-want,+got):%s\n", diff)
   132  			}
   133  			gotServices, err := fakeClient.CoreV1().Services("").List(context.Background(), metav1.ListOptions{})
   134  			if err != nil {
   135  				t.Errorf("Failed to list services: %v", err)
   136  			}
   137  			if diff := cmp.Diff(tc.wantService, gotServices); len(diff) != 0 {
   138  				t.Errorf("Unexpected services after running DeletePodsAndServices (-want,+got):%s\n", diff)
   139  			}
   140  		})
   141  	}
   142  }
   143  
   144  func TestPastBackoffLimit(T *testing.T) {
   145  	backoffLimitExceededPod := newPod("runningPodWithBackoff", corev1.PodRunning)
   146  	backoffLimitExceededPod.Status.ContainerStatuses = []corev1.ContainerStatus{
   147  		{RestartCount: 3},
   148  	}
   149  	allPods := []*corev1.Pod{
   150  		newPod("runningPod", corev1.PodRunning),
   151  		newPod("succeededPod", corev1.PodSucceeded),
   152  		backoffLimitExceededPod,
   153  	}
   154  	cases := map[string]struct {
   155  		pods                 []*corev1.Pod
   156  		backOffLimit         int32
   157  		wantPastBackOffLimit bool
   158  	}{
   159  		"backOffLimit is 0": {
   160  			pods:                 allPods[:2],
   161  			backOffLimit:         0,
   162  			wantPastBackOffLimit: false,
   163  		},
   164  		"backOffLimit is 3": {
   165  			pods:                 allPods,
   166  			backOffLimit:         3,
   167  			wantPastBackOffLimit: true,
   168  		},
   169  	}
   170  	for name, tc := range cases {
   171  		T.Run(name, func(t *testing.T) {
   172  			jobController := JobController{}
   173  			runPolicy := &apiv1.RunPolicy{
   174  				BackoffLimit: &tc.backOffLimit,
   175  			}
   176  			replica := map[apiv1.ReplicaType]*apiv1.ReplicaSpec{
   177  				"test": {RestartPolicy: apiv1.RestartPolicyOnFailure},
   178  			}
   179  			got, err := jobController.PastBackoffLimit("test-job", runPolicy, replica, tc.pods)
   180  			if err != nil {
   181  				t.Errorf("Failaed to do PastBackoffLimit: %v", err)
   182  			}
   183  			if tc.wantPastBackOffLimit != got {
   184  				t.Errorf("Unexpected pastBackoffLimit: \nwant: %v\ngot: %v\n", tc.wantPastBackOffLimit, got)
   185  			}
   186  		})
   187  	}
   188  }
   189  
   190  func TestPastActiveDeadline(T *testing.T) {
   191  	cases := map[string]struct {
   192  		activeDeadlineSeconds         int64
   193  		wantPastActiveDeadlineSeconds bool
   194  	}{
   195  		"activeDeadlineSeconds is 0": {
   196  			activeDeadlineSeconds:         0,
   197  			wantPastActiveDeadlineSeconds: true,
   198  		},
   199  		"activeDeadlineSeconds is 2": {
   200  			activeDeadlineSeconds:         2,
   201  			wantPastActiveDeadlineSeconds: false,
   202  		},
   203  	}
   204  	for name, tc := range cases {
   205  		T.Run(name, func(t *testing.T) {
   206  			jobController := JobController{}
   207  			runPolicy := &apiv1.RunPolicy{
   208  				ActiveDeadlineSeconds: &tc.activeDeadlineSeconds,
   209  			}
   210  			jobStatus := apiv1.JobStatus{
   211  				StartTime: &metav1.Time{
   212  					Time: time.Now(),
   213  				},
   214  			}
   215  			if got := jobController.PastActiveDeadline(runPolicy, jobStatus); tc.wantPastActiveDeadlineSeconds != got {
   216  				t.Errorf("Unexpected PastActiveDeadline: \nwant: %v\ngot: %v\n", tc.wantPastActiveDeadlineSeconds, got)
   217  			}
   218  		})
   219  	}
   220  }
   221  
   222  func newPod(name string, phase corev1.PodPhase) *corev1.Pod {
   223  	pod := &corev1.Pod{
   224  		ObjectMeta: metav1.ObjectMeta{
   225  			Name: name,
   226  			Labels: map[string]string{
   227  				apiv1.ReplicaTypeLabel: "test",
   228  			},
   229  		},
   230  		Status: corev1.PodStatus{
   231  			Phase: phase,
   232  		},
   233  	}
   234  	return pod
   235  }
   236  
   237  func newService(name string) *corev1.Service {
   238  	service := &corev1.Service{
   239  		ObjectMeta: metav1.ObjectMeta{
   240  			Name: name,
   241  		},
   242  	}
   243  	return service
   244  }