volcano.sh/volcano@v1.9.0/test/e2e/jobseq/queue_job_status.go (about)

     1  /*
     2  Copyright 2022 The Volcano Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package jobseq
    18  
    19  import (
    20  	"context"
    21  	"fmt"
    22  	"strconv"
    23  
    24  	. "github.com/onsi/ginkgo/v2"
    25  	. "github.com/onsi/gomega"
    26  	corev1 "k8s.io/api/core/v1"
    27  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    28  
    29  	e2eutil "volcano.sh/volcano/test/e2e/util"
    30  )
    31  
    32  var _ = Describe("Queue Job Status Transition", func() {
    33  
    34  	It("Transform from inqueque to running should succeed", func() {
    35  		By("Prepare 2 job")
    36  		var ctx *e2eutil.TestContext
    37  		var q1 string
    38  		var rep int32
    39  		q1 = "queue-jobs-status-transition"
    40  		ctx = e2eutil.InitTestContext(e2eutil.Options{
    41  			Queues: []string{q1},
    42  		})
    43  		slot := e2eutil.HalfCPU
    44  		rep = e2eutil.ClusterSize(ctx, slot)
    45  
    46  		if rep < 4 {
    47  			err := fmt.Errorf("You need at least 2 logical cpu for this test case, please skip 'Queue Job Status Transition' when you see this message")
    48  			Expect(err).NotTo(HaveOccurred())
    49  		}
    50  
    51  		for i := 0; i < 2; i++ {
    52  			spec := &e2eutil.JobSpec{
    53  				Tasks: []e2eutil.TaskSpec{
    54  					{
    55  						Name: "queue-job",
    56  						Img:  e2eutil.DefaultNginxImage,
    57  						Req:  slot,
    58  						Min:  rep,
    59  						Rep:  rep,
    60  					},
    61  				},
    62  			}
    63  			spec.Name = "queue-job-status-transition-test-job-" + strconv.Itoa(i)
    64  			spec.Queue = q1
    65  			e2eutil.CreateJob(ctx, spec)
    66  		}
    67  
    68  		By("Verify queue have pod groups inqueue")
    69  		err := e2eutil.WaitQueueStatus(func() (bool, error) {
    70  			queue, err := ctx.Vcclient.SchedulingV1beta1().Queues().Get(context.TODO(), q1, metav1.GetOptions{})
    71  			Expect(err).NotTo(HaveOccurred(), "Get queue %s failed", q1)
    72  			return queue.Status.Inqueue > 0, nil
    73  		})
    74  		Expect(err).NotTo(HaveOccurred(), "Error waiting for queue inqueue")
    75  
    76  		By("Verify queue have pod groups running")
    77  		err = e2eutil.WaitQueueStatus(func() (bool, error) {
    78  			queue, err := ctx.Vcclient.SchedulingV1beta1().Queues().Get(context.TODO(), q1, metav1.GetOptions{})
    79  			Expect(err).NotTo(HaveOccurred(), "Get queue %s failed", q1)
    80  			return queue.Status.Running > 0, nil
    81  		})
    82  		Expect(err).NotTo(HaveOccurred(), "Error waiting for queue running")
    83  		e2eutil.CleanupTestContext(ctx)
    84  	})
    85  
    86  	It("Transform from running to pending should succeed", func() {
    87  		By("Prepare 2 job")
    88  		var ctx *e2eutil.TestContext
    89  		var q1 string
    90  		var podNamespace string
    91  		var rep int32
    92  		var firstJobName string
    93  
    94  		q1 = "queue-jobs-status-transition"
    95  		ctx = e2eutil.InitTestContext(e2eutil.Options{
    96  			Queues: []string{q1},
    97  		})
    98  		podNamespace = ctx.Namespace
    99  		slot := e2eutil.HalfCPU
   100  		rep = e2eutil.ClusterSize(ctx, slot)
   101  
   102  		if rep < 4 {
   103  			err := fmt.Errorf("You need at least 2 logical cpu for this test case, please skip 'Queue Job Status Transition' when you see this message")
   104  			Expect(err).NotTo(HaveOccurred())
   105  		}
   106  
   107  		for i := 0; i < 2; i++ {
   108  			spec := &e2eutil.JobSpec{
   109  				Tasks: []e2eutil.TaskSpec{
   110  					{
   111  						Name: "queue-job",
   112  						Img:  e2eutil.DefaultNginxImage,
   113  						Req:  slot,
   114  						Min:  rep,
   115  						Rep:  rep,
   116  					},
   117  				},
   118  			}
   119  			spec.Name = "queue-job-status-transition-test-job-" + strconv.Itoa(i)
   120  			if i == 0 {
   121  				firstJobName = spec.Name
   122  			}
   123  			spec.Queue = q1
   124  			e2eutil.CreateJob(ctx, spec)
   125  		}
   126  
   127  		By("Verify queue have pod groups running")
   128  		err := e2eutil.WaitQueueStatus(func() (bool, error) {
   129  			queue, err := ctx.Vcclient.SchedulingV1beta1().Queues().Get(context.TODO(), q1, metav1.GetOptions{})
   130  			Expect(err).NotTo(HaveOccurred(), "Get queue %s failed", q1)
   131  			return queue.Status.Running > 0, nil
   132  		})
   133  		Expect(err).NotTo(HaveOccurred(), "Error waiting for queue running")
   134  
   135  		clusterPods, err := ctx.Kubeclient.CoreV1().Pods(podNamespace).List(context.TODO(), metav1.ListOptions{})
   136  		for _, pod := range clusterPods.Items {
   137  			if pod.Labels["volcano.sh/job-name"] == firstJobName {
   138  				err = ctx.Kubeclient.CoreV1().Pods(podNamespace).Delete(context.TODO(), pod.Name, metav1.DeleteOptions{})
   139  				Expect(err).NotTo(HaveOccurred(), "Failed to delete pod %s", pod.Name)
   140  			}
   141  		}
   142  
   143  		By("Verify queue have pod groups Pending")
   144  		err = e2eutil.WaitQueueStatus(func() (bool, error) {
   145  			queue, err := ctx.Vcclient.SchedulingV1beta1().Queues().Get(context.TODO(), q1, metav1.GetOptions{})
   146  			Expect(err).NotTo(HaveOccurred(), "Get queue %s failed", q1)
   147  			return queue.Status.Pending > 0, nil
   148  		})
   149  		Expect(err).NotTo(HaveOccurred(), "Error waiting for queue Pending")
   150  		e2eutil.CleanupTestContext(ctx)
   151  	})
   152  
   153  	It("Transform from running to unknown should succeed", func() {
   154  		By("Prepare 2 job")
   155  		var ctx *e2eutil.TestContext
   156  		var q1 string
   157  		var podNamespace string
   158  		var rep int32
   159  
   160  		q1 = "queue-jobs-status-transition"
   161  		ctx = e2eutil.InitTestContext(e2eutil.Options{
   162  			Queues: []string{q1},
   163  		})
   164  		podNamespace = ctx.Namespace
   165  		slot := e2eutil.HalfCPU
   166  		rep = e2eutil.ClusterSize(ctx, slot)
   167  
   168  		if rep < 4 {
   169  			err := fmt.Errorf("You need at least 2 logical cpu for this test case, please skip 'Queue Job Status Transition' when you see this message")
   170  			Expect(err).NotTo(HaveOccurred())
   171  		}
   172  
   173  		for i := 0; i < 2; i++ {
   174  			spec := &e2eutil.JobSpec{
   175  				Tasks: []e2eutil.TaskSpec{
   176  					{
   177  						Name: "queue-job",
   178  						Img:  e2eutil.DefaultNginxImage,
   179  						Req:  slot,
   180  						Min:  rep,
   181  						Rep:  rep,
   182  					},
   183  				},
   184  			}
   185  			spec.Name = "queue-job-status-transition-test-job-" + strconv.Itoa(i)
   186  			spec.Queue = q1
   187  			e2eutil.CreateJob(ctx, spec)
   188  		}
   189  
   190  		By("Verify queue have pod groups running")
   191  		err := e2eutil.WaitQueueStatus(func() (bool, error) {
   192  			queue, err := ctx.Vcclient.SchedulingV1beta1().Queues().Get(context.TODO(), q1, metav1.GetOptions{})
   193  			Expect(err).NotTo(HaveOccurred(), "Get queue %s failed", q1)
   194  			return queue.Status.Running > 0, nil
   195  		})
   196  		Expect(err).NotTo(HaveOccurred(), "Error waiting for queue running")
   197  
   198  		By("Delete some of pod which will case pod group status transform from running to unknown.")
   199  		podDeleteNum := 0
   200  
   201  		err = e2eutil.WaitPodPhaseRunningMoreThanNum(ctx, podNamespace, 2)
   202  		Expect(err).NotTo(HaveOccurred(), "Failed waiting for pods")
   203  
   204  		clusterPods, err := ctx.Kubeclient.CoreV1().Pods(podNamespace).List(context.TODO(), metav1.ListOptions{})
   205  		for _, pod := range clusterPods.Items {
   206  			if pod.Status.Phase == corev1.PodRunning {
   207  				err = ctx.Kubeclient.CoreV1().Pods(podNamespace).Delete(context.TODO(), pod.Name, metav1.DeleteOptions{})
   208  				Expect(err).NotTo(HaveOccurred(), "Failed to delete pod %s", pod.Name)
   209  				podDeleteNum = podDeleteNum + 1
   210  			}
   211  			if podDeleteNum >= int(rep/2) {
   212  				break
   213  			}
   214  		}
   215  
   216  		By("Verify queue have pod groups unknown")
   217  		err = e2eutil.WaitQueueStatus(func() (bool, error) {
   218  			queue, err := ctx.Vcclient.SchedulingV1beta1().Queues().Get(context.TODO(), q1, metav1.GetOptions{})
   219  			Expect(err).NotTo(HaveOccurred(), "Get queue %s failed", q1)
   220  			return queue.Status.Unknown > 0, nil
   221  		})
   222  		Expect(err).NotTo(HaveOccurred(), "Error waiting for queue unknown")
   223  		e2eutil.CleanupTestContext(ctx)
   224  	})
   225  })