volcano.sh/volcano@v1.9.0/test/e2e/jobp/job_restart.go (about) 1 /* 2 Copyright 2021 The Volcano Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package jobp 18 19 import ( 20 "context" 21 22 . "github.com/onsi/ginkgo/v2" 23 . "github.com/onsi/gomega" 24 v1 "k8s.io/api/core/v1" 25 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 26 vcbatch "volcano.sh/apis/pkg/apis/batch/v1alpha1" 27 "volcano.sh/apis/pkg/apis/bus/v1alpha1" 28 jobctl "volcano.sh/volcano/pkg/controllers/job" 29 30 e2eutil "volcano.sh/volcano/test/e2e/util" 31 ) 32 33 var _ = Describe("Test job restart", func() { 34 It("Retain failed pod on last Retry", func() { 35 ctx := e2eutil.InitTestContext(e2eutil.Options{}) 36 defer e2eutil.CleanupTestContext(ctx) 37 38 jobName := "last-retry-job" 39 By("create job") 40 var minSuccess int32 = 2 41 job := e2eutil.CreateJob(ctx, &e2eutil.JobSpec{ 42 Name: jobName, 43 MinSuccess: &minSuccess, 44 Min: 2, 45 Policies: []vcbatch.LifecyclePolicy{ 46 {Event: v1alpha1.PodEvictedEvent, Action: v1alpha1.RestartJobAction}, 47 {Event: v1alpha1.PodFailedEvent, Action: v1alpha1.RestartJobAction}, 48 }, 49 MaxRetry: 2, 50 Tasks: []e2eutil.TaskSpec{ 51 { 52 Name: "running-task", 53 Img: e2eutil.DefaultBusyBoxImage, 54 Min: 1, 55 Rep: 1, 56 Command: "sleep 1000", 57 RestartPolicy: v1.RestartPolicyNever, 58 MaxRetry: 1, 59 }, 60 { 61 Name: "failed-task", 62 Img: e2eutil.DefaultBusyBoxImage, 63 Min: 1, 64 Rep: 1, 65 Command: "sh fake.sh", 66 RestartPolicy: v1.RestartPolicyNever, 67 MaxRetry: 1, 68 }, 69 }, 70 }) 71 72 // wait job failed 73 err := e2eutil.WaitJobStates(ctx, job, []vcbatch.JobPhase{vcbatch.Failed}, e2eutil.FiveMinute) 74 Expect(err).NotTo(HaveOccurred()) 75 76 // check job restart count 77 curjob, err := e2eutil.VcClient.BatchV1alpha1().Jobs(job.Namespace).Get(context.TODO(), jobName, metav1.GetOptions{}) 78 Expect(err).NotTo(HaveOccurred()) 79 Expect(curjob.Status.RetryCount).Should(Equal(int32(2))) 80 81 // wait running pod deleted 82 err = e2eutil.WaitPodGone(ctx, jobctl.MakePodName(jobName, "running-task", 0), job.Namespace) 83 Expect(err).NotTo(HaveOccurred()) 84 85 // failed pod should be existing 86 pod, err := e2eutil.KubeClient.CoreV1().Pods(job.Namespace).Get(context.TODO(), jobctl.MakePodName(jobName, "failed-task", 0), metav1.GetOptions{}) 87 Expect(err).NotTo(HaveOccurred()) 88 Expect(pod.DeletionTimestamp).Should(BeNil()) 89 }) 90 91 It("Retain succeeded pod when job complete", func() { 92 ctx := e2eutil.InitTestContext(e2eutil.Options{}) 93 defer e2eutil.CleanupTestContext(ctx) 94 95 jobName := "complete-retry-job" 96 By("create job") 97 var minSuccess int32 = 2 98 job := e2eutil.CreateJob(ctx, &e2eutil.JobSpec{ 99 Name: jobName, 100 MinSuccess: &minSuccess, 101 Min: 2, 102 Policies: []vcbatch.LifecyclePolicy{ 103 {Event: v1alpha1.PodEvictedEvent, Action: v1alpha1.RestartJobAction}, 104 {Event: v1alpha1.PodFailedEvent, Action: v1alpha1.RestartJobAction}, 105 }, 106 MaxRetry: 1, 107 Tasks: []e2eutil.TaskSpec{ 108 { 109 Name: "running-task", 110 Img: e2eutil.DefaultBusyBoxImage, 111 Min: 1, 112 Rep: 1, 113 Command: "sleep 1000", 114 RestartPolicy: v1.RestartPolicyNever, 115 MaxRetry: 1, 116 }, 117 { 118 Name: "succeeded-task", 119 Img: e2eutil.DefaultBusyBoxImage, 120 Policies: []vcbatch.LifecyclePolicy{ 121 {Event: v1alpha1.TaskCompletedEvent, Action: v1alpha1.CompleteJobAction}, 122 }, 123 Min: 1, 124 Rep: 1, 125 Command: "sleep 1", 126 RestartPolicy: v1.RestartPolicyNever, 127 MaxRetry: 1, 128 }, 129 }, 130 }) 131 132 // wait job failed 133 err := e2eutil.WaitJobStates(ctx, job, []vcbatch.JobPhase{vcbatch.Completed}, e2eutil.FiveMinute) 134 Expect(err).NotTo(HaveOccurred()) 135 136 // check job restart count 137 curjob, err := e2eutil.VcClient.BatchV1alpha1().Jobs(job.Namespace).Get(context.TODO(), jobName, metav1.GetOptions{}) 138 Expect(err).NotTo(HaveOccurred()) 139 Expect(curjob.Status.RetryCount).Should(Equal(int32(0))) 140 141 // wait running pod deleted 142 err = e2eutil.WaitPodGone(ctx, jobctl.MakePodName(jobName, "running-task", 0), job.Namespace) 143 Expect(err).NotTo(HaveOccurred()) 144 145 // succeeded pod should be existing 146 pod, err := e2eutil.KubeClient.CoreV1().Pods(job.Namespace).Get(context.TODO(), jobctl.MakePodName(jobName, "succeeded-task", 0), metav1.GetOptions{}) 147 Expect(err).NotTo(HaveOccurred()) 148 Expect(pod.DeletionTimestamp).Should(BeNil()) 149 }) 150 })