volcano.sh/volcano@v1.9.0/test/e2e/schedulingaction/reclaim.go (about)

     1  /*
     2  Copyright 2021 The Volcano Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package schedulingaction
    18  
    19  import (
    20  	"context"
    21  	"fmt"
    22  	"time"
    23  
    24  	. "github.com/onsi/ginkgo/v2"
    25  	. "github.com/onsi/gomega"
    26  	"gopkg.in/yaml.v2"
    27  	v1 "k8s.io/api/core/v1"
    28  	"k8s.io/apimachinery/pkg/api/resource"
    29  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    30  	"k8s.io/apimachinery/pkg/labels"
    31  
    32  	batchv1alpha1 "volcano.sh/apis/pkg/apis/batch/v1alpha1"
    33  	schedulingv1beta1 "volcano.sh/apis/pkg/apis/scheduling/v1beta1"
    34  
    35  	e2eutil "volcano.sh/volcano/test/e2e/util"
    36  )
    37  
    38  var _ = Describe("Reclaim E2E Test", func() {
    39  
    40  	CreateReclaimJob := func(ctx *e2eutil.TestContext, req v1.ResourceList, name string, queue string, pri string, nodeName string, waitTaskReady bool) (*batchv1alpha1.Job, error) {
    41  		job := &e2eutil.JobSpec{
    42  			Tasks: []e2eutil.TaskSpec{
    43  				{
    44  					Img:    e2eutil.DefaultNginxImage,
    45  					Req:    req,
    46  					Min:    1,
    47  					Rep:    1,
    48  					Labels: map[string]string{schedulingv1beta1.PodPreemptable: "true"},
    49  				},
    50  			},
    51  			Name:     name,
    52  			Queue:    queue,
    53  			NodeName: nodeName,
    54  		}
    55  		if pri != "" {
    56  			job.Pri = pri
    57  		}
    58  		batchJob, err := e2eutil.CreateJobInner(ctx, job)
    59  		if err != nil {
    60  			return nil, err
    61  		}
    62  		if waitTaskReady {
    63  			err = e2eutil.WaitTasksReady(ctx, batchJob, 1)
    64  		}
    65  		return batchJob, err
    66  	}
    67  
    68  	WaitQueueStatus := func(ctx *e2eutil.TestContext, status string, num int32, queue string) error {
    69  		err := e2eutil.WaitQueueStatus(func() (bool, error) {
    70  			queue, err := ctx.Vcclient.SchedulingV1beta1().Queues().Get(context.TODO(), queue, metav1.GetOptions{})
    71  			Expect(err).NotTo(HaveOccurred(), "Get queue %s failed", queue)
    72  			switch status {
    73  			case "Running":
    74  				return queue.Status.Running == num, nil
    75  			case "Open":
    76  				return queue.Status.State == schedulingv1beta1.QueueStateOpen, nil
    77  			case "Pending":
    78  				return queue.Status.Pending == num, nil
    79  			case "Inqueue":
    80  				return queue.Status.Inqueue == num, nil
    81  			default:
    82  				return false, nil
    83  			}
    84  		})
    85  		return err
    86  	}
    87  
    88  	It("Reclaim Case 1: New queue with job created no reclaim when resource is enough", func() {
    89  		q1 := e2eutil.DefaultQueue
    90  		q2 := "reclaim-q2"
    91  		ctx := e2eutil.InitTestContext(e2eutil.Options{
    92  			Queues:             []string{q2},
    93  			NodesNumLimit:      4,
    94  			NodesResourceLimit: e2eutil.CPU1Mem1,
    95  		})
    96  
    97  		defer e2eutil.CleanupTestContext(ctx)
    98  
    99  		By("Setup initial jobs")
   100  
   101  		_, err := CreateReclaimJob(ctx, e2eutil.CPU1Mem1, "reclaim-j1", q1, "", "", true)
   102  		Expect(err).NotTo(HaveOccurred(), "Wait for job1 failed")
   103  
   104  		_, err = CreateReclaimJob(ctx, e2eutil.CPU1Mem1, "reclaim-j2", q2, "", "", true)
   105  		Expect(err).NotTo(HaveOccurred(), "Wait for job2 failed")
   106  
   107  		By("Create new coming queue and job")
   108  		q3 := "reclaim-q3"
   109  		ctx.Queues = append(ctx.Queues, q3)
   110  		e2eutil.CreateQueues(ctx)
   111  
   112  		err = WaitQueueStatus(ctx, "Open", 1, q1)
   113  		Expect(err).NotTo(HaveOccurred(), "Error waiting for queue open")
   114  
   115  		_, err = CreateReclaimJob(ctx, e2eutil.CPU1Mem1, "reclaim-j3", q3, "", "", true)
   116  		Expect(err).NotTo(HaveOccurred(), "Wait for job3 failed")
   117  
   118  		By("Make sure all job running")
   119  
   120  		err = WaitQueueStatus(ctx, "Running", 1, q1)
   121  		Expect(err).NotTo(HaveOccurred(), "Error waiting for queue running")
   122  
   123  		err = WaitQueueStatus(ctx, "Running", 1, q2)
   124  		Expect(err).NotTo(HaveOccurred(), "Error waiting for queue running")
   125  
   126  		err = WaitQueueStatus(ctx, "Running", 1, q3)
   127  		Expect(err).NotTo(HaveOccurred(), "Error waiting for queue running")
   128  
   129  	})
   130  
   131  	It("Reclaim Case 3: New queue with job created no reclaim when job.PodGroup.Status.Phase pending", func() {
   132  		q1 := e2eutil.DefaultQueue
   133  		q2 := "reclaim-q2"
   134  		j1 := "reclaim-j1"
   135  		j2 := "reclaim-j2"
   136  		j3 := "reclaim-j3"
   137  
   138  		ctx := e2eutil.InitTestContext(e2eutil.Options{
   139  			Queues:             []string{q2},
   140  			NodesNumLimit:      3,
   141  			NodesResourceLimit: e2eutil.CPU1Mem1,
   142  			PriorityClasses: map[string]int32{
   143  				"low-priority":  10,
   144  				"high-priority": 10000,
   145  			},
   146  		})
   147  
   148  		defer e2eutil.CleanupTestContext(ctx)
   149  
   150  		By("Setup initial jobs")
   151  
   152  		_, err := CreateReclaimJob(ctx, e2eutil.CPU1Mem1, j1, q1, "", "", true)
   153  		Expect(err).NotTo(HaveOccurred(), "Wait for job1 failed")
   154  
   155  		_, err = CreateReclaimJob(ctx, e2eutil.CPU1Mem1, j2, q2, "", "", true)
   156  		Expect(err).NotTo(HaveOccurred(), "Wait for job2 failed")
   157  
   158  		By("Create new coming queue and job")
   159  		q3 := "reclaim-q3"
   160  		ctx.Queues = append(ctx.Queues, q3)
   161  		e2eutil.CreateQueues(ctx)
   162  
   163  		err = WaitQueueStatus(ctx, "Open", 1, q1)
   164  		Expect(err).NotTo(HaveOccurred(), "Error waiting for queue open")
   165  
   166  		_, err = CreateReclaimJob(ctx, e2eutil.CPU1Mem1, j3, q3, "", "", true)
   167  		Expect(err).NotTo(HaveOccurred(), "Wait for job3 failed")
   168  
   169  		// delete pod of job3 to make sure reclaim-j3 podgroup is pending
   170  		listOptions := metav1.ListOptions{
   171  			LabelSelector: labels.Set(map[string]string{batchv1alpha1.JobNameKey: j3}).String(),
   172  		}
   173  
   174  		job3pods, err := ctx.Kubeclient.CoreV1().Pods(ctx.Namespace).List(context.TODO(), listOptions)
   175  		Expect(err).NotTo(HaveOccurred(), "Get %s pod failed", j3)
   176  
   177  		By("Make sure q1 q2 with job running in it.")
   178  		err = WaitQueueStatus(ctx, "Running", 1, q1)
   179  		Expect(err).NotTo(HaveOccurred(), "Error waiting for queue running")
   180  
   181  		err = WaitQueueStatus(ctx, "Running", 1, q2)
   182  		Expect(err).NotTo(HaveOccurred(), "Error waiting for queue running")
   183  
   184  		for _, pod := range job3pods.Items {
   185  			err = ctx.Kubeclient.CoreV1().Pods(pod.Namespace).Delete(context.TODO(), pod.Name, metav1.DeleteOptions{})
   186  			Expect(err).NotTo(HaveOccurred(), "Failed to delete pod %s", pod.Name)
   187  		}
   188  
   189  		By("Q3 pending when we delete it.")
   190  		err = WaitQueueStatus(ctx, "Pending", 1, q3)
   191  		Expect(err).NotTo(HaveOccurred(), "Error waiting for queue pending")
   192  	})
   193  
   194  	It("Reclaim Case 4: New queue with job created no reclaim when new queue is not created", func() {
   195  		q1 := e2eutil.DefaultQueue
   196  		q2 := "reclaim-q2"
   197  		ctx := e2eutil.InitTestContext(e2eutil.Options{
   198  			Queues:             []string{q2},
   199  			NodesNumLimit:      3,
   200  			NodesResourceLimit: e2eutil.CPU1Mem1,
   201  			PriorityClasses: map[string]int32{
   202  				"low-priority":  10,
   203  				"high-priority": 10000,
   204  			},
   205  		})
   206  
   207  		defer e2eutil.CleanupTestContext(ctx)
   208  
   209  		By("Setup initial jobs")
   210  
   211  		_, err := CreateReclaimJob(ctx, e2eutil.CPU1Mem1, "reclaim-j1", q1, "", "", true)
   212  		Expect(err).NotTo(HaveOccurred(), "Wait for job1 failed")
   213  
   214  		_, err = CreateReclaimJob(ctx, e2eutil.CPU1Mem1, "reclaim-j2", q2, "", "", true)
   215  		Expect(err).NotTo(HaveOccurred(), "Wait for job2 failed")
   216  
   217  		By("Create new coming job")
   218  		q3 := "reclaim-q3"
   219  
   220  		_, err = CreateReclaimJob(ctx, e2eutil.CPU1Mem1, "reclaim-j3", q3, "", "", true)
   221  		Expect(err).Should(HaveOccurred(), "job3 create failed when queue3 is not created")
   222  
   223  		By("Make sure all job running")
   224  
   225  		err = WaitQueueStatus(ctx, "Running", 1, q1)
   226  		Expect(err).NotTo(HaveOccurred(), "Error waiting for queue running")
   227  
   228  		err = WaitQueueStatus(ctx, "Running", 1, q2)
   229  		Expect(err).NotTo(HaveOccurred(), "Error waiting for queue running")
   230  	})
   231  
   232  	// As we agreed, this is not intended behavior, actually, it is a bug.
   233  	It("Reclaim Case 5: New queue with job created no reclaim when job or task is low-priority", func() {
   234  		q1 := e2eutil.DefaultQueue
   235  		q2 := "reclaim-q2"
   236  		ctx := e2eutil.InitTestContext(e2eutil.Options{
   237  			Queues:             []string{q2},
   238  			NodesNumLimit:      3,
   239  			NodesResourceLimit: e2eutil.CPU1Mem1,
   240  			PriorityClasses: map[string]int32{
   241  				"low-priority":  10,
   242  				"high-priority": 10000,
   243  			},
   244  		})
   245  
   246  		defer e2eutil.CleanupTestContext(ctx)
   247  
   248  		By("Setup initial jobs")
   249  
   250  		_, err := CreateReclaimJob(ctx, e2eutil.CPU1Mem1, "reclaim-j1", q1, "high-priority", "", true)
   251  		Expect(err).NotTo(HaveOccurred(), "Wait for job1 failed")
   252  
   253  		_, err = CreateReclaimJob(ctx, e2eutil.CPU1Mem1, "reclaim-j2", q2, "high-priority", "", true)
   254  		Expect(err).NotTo(HaveOccurred(), "Wait for job2 failed")
   255  
   256  		By("Create new coming queue and job")
   257  		q3 := "reclaim-q3"
   258  
   259  		err = WaitQueueStatus(ctx, "Open", 1, q1)
   260  		Expect(err).NotTo(HaveOccurred(), "Error waiting for queue open")
   261  
   262  		_, err = CreateReclaimJob(ctx, e2eutil.CPU1Mem1, "reclaim-j3", q3, "low-priority", "", true)
   263  		Expect(err).Should(HaveOccurred(), "job3 create failed when queue3 is not created")
   264  
   265  		By("Make sure all job running")
   266  
   267  		err = WaitQueueStatus(ctx, "Running", 1, q1)
   268  		Expect(err).NotTo(HaveOccurred(), "Error waiting for queue running")
   269  
   270  		err = WaitQueueStatus(ctx, "Running", 1, q2)
   271  		Expect(err).NotTo(HaveOccurred(), "Error waiting for queue running")
   272  	})
   273  
   274  	It("Reclaim Case 6: New queue with job created no reclaim when overused", func() {
   275  		q1 := e2eutil.DefaultQueue
   276  		q2 := "reclaim-q2"
   277  		q3 := "reclaim-q3"
   278  		ctx := e2eutil.InitTestContext(e2eutil.Options{
   279  			Queues:             []string{q2, q3},
   280  			NodesNumLimit:      3,
   281  			NodesResourceLimit: e2eutil.CPU1Mem1,
   282  			PriorityClasses: map[string]int32{
   283  				"low-priority":  10,
   284  				"high-priority": 10000,
   285  			},
   286  		})
   287  
   288  		defer e2eutil.CleanupTestContext(ctx)
   289  
   290  		By("Setup initial jobs")
   291  
   292  		_, err := CreateReclaimJob(ctx, e2eutil.CPU1Mem1, "reclaim-j1", q1, "", "", true)
   293  		Expect(err).NotTo(HaveOccurred(), "Wait for job1 failed")
   294  
   295  		_, err = CreateReclaimJob(ctx, e2eutil.CPU1Mem1, "reclaim-j2", q2, "", "", true)
   296  		Expect(err).NotTo(HaveOccurred(), "Wait for job2 failed")
   297  
   298  		_, err = CreateReclaimJob(ctx, e2eutil.CPU1Mem1, "reclaim-j3", q3, "", "", true)
   299  		Expect(err).NotTo(HaveOccurred(), "Wait for job3 failed")
   300  
   301  		By("Create job4 to testing overused cases.")
   302  		_, err = CreateReclaimJob(ctx, e2eutil.CPU1Mem1, "reclaim-j4", q3, "", "", false)
   303  		Expect(err).NotTo(HaveOccurred(), "Wait for job4 failed")
   304  
   305  		time.Sleep(10 * time.Second)
   306  		By("Make sure all job running")
   307  
   308  		err = WaitQueueStatus(ctx, "Running", 1, q1)
   309  		Expect(err).NotTo(HaveOccurred(), "Error waiting for queue running")
   310  
   311  		err = WaitQueueStatus(ctx, "Running", 1, q2)
   312  		Expect(err).NotTo(HaveOccurred(), "Error waiting for queue running")
   313  
   314  		err = WaitQueueStatus(ctx, "Running", 1, q3)
   315  		Expect(err).NotTo(HaveOccurred(), "Error waiting for queue running")
   316  
   317  		err = WaitQueueStatus(ctx, "Inqueue", 1, q3)
   318  		Expect(err).NotTo(HaveOccurred(), "Error waiting for queue Inqueue")
   319  	})
   320  
   321  	It("Reclaim Case 7:  New queue with job created no reclaim when job not satisfied with predicates", func() {
   322  		q1 := e2eutil.DefaultQueue
   323  		q2 := "reclaim-q2"
   324  		ctx := e2eutil.InitTestContext(e2eutil.Options{
   325  			Queues:             []string{q2},
   326  			NodesNumLimit:      3,
   327  			NodesResourceLimit: e2eutil.CPU1Mem1,
   328  		})
   329  
   330  		defer e2eutil.CleanupTestContext(ctx)
   331  
   332  		By("Setup initial jobs")
   333  
   334  		_, err := CreateReclaimJob(ctx, e2eutil.CPU1Mem1, "reclaim-j1", q1, "", "", true)
   335  		Expect(err).NotTo(HaveOccurred(), "Wait for job1 failed")
   336  
   337  		_, err = CreateReclaimJob(ctx, e2eutil.CPU1Mem1, "reclaim-j2", q2, "", "", true)
   338  		Expect(err).NotTo(HaveOccurred(), "Wait for job2 failed")
   339  
   340  		By("Create new coming queue and job")
   341  		q3 := "reclaim-q3"
   342  		ctx.Queues = append(ctx.Queues, q3)
   343  		e2eutil.CreateQueues(ctx)
   344  
   345  		err = WaitQueueStatus(ctx, "Open", 1, q1)
   346  		Expect(err).NotTo(HaveOccurred(), "Error waiting for queue open")
   347  
   348  		_, err = CreateReclaimJob(ctx, e2eutil.CPU1Mem1, "reclaim-j3", q3, "", "fake-node", false)
   349  		Expect(err).NotTo(HaveOccurred(), "Wait for job3 failed")
   350  
   351  		time.Sleep(10 * time.Second)
   352  		By("Make sure all job running")
   353  
   354  		err = WaitQueueStatus(ctx, "Running", 1, q1)
   355  		Expect(err).NotTo(HaveOccurred(), "Error waiting for queue running")
   356  
   357  		err = WaitQueueStatus(ctx, "Running", 1, q2)
   358  		Expect(err).NotTo(HaveOccurred(), "Error waiting for queue running")
   359  
   360  		// TODO: it is a bug : the job status is pending but podgroup status is running
   361  		err = WaitQueueStatus(ctx, "Running", 1, q3)
   362  		Expect(err).NotTo(HaveOccurred(), "Error waiting for queue Running")
   363  
   364  	})
   365  
   366  	It("Reclaim Case 8: New queue with job created no reclaim when task resources less than reclaimable resource", func() {
   367  		q1 := e2eutil.DefaultQueue
   368  		q2 := "reclaim-q2"
   369  		ctx := e2eutil.InitTestContext(e2eutil.Options{
   370  			Queues:             []string{q2},
   371  			NodesNumLimit:      3,
   372  			NodesResourceLimit: e2eutil.CPU1Mem1,
   373  			PriorityClasses: map[string]int32{
   374  				"low-priority":  10,
   375  				"high-priority": 10000,
   376  			},
   377  		})
   378  
   379  		defer e2eutil.CleanupTestContext(ctx)
   380  
   381  		By("Setup initial jobs")
   382  
   383  		_, err := CreateReclaimJob(ctx, e2eutil.CPU1Mem1, "reclaim-j1", q1, "", "", true)
   384  		Expect(err).NotTo(HaveOccurred(), "Wait for job1 failed")
   385  
   386  		_, err = CreateReclaimJob(ctx, e2eutil.CPU1Mem1, "reclaim-j2", q2, "", "", true)
   387  		Expect(err).NotTo(HaveOccurred(), "Wait for job2 failed")
   388  
   389  		By("Create new coming queue and job")
   390  		q3 := "reclaim-q3"
   391  		ctx.Queues = append(ctx.Queues, q3)
   392  		e2eutil.CreateQueues(ctx)
   393  
   394  		err = WaitQueueStatus(ctx, "Open", 1, q1)
   395  		Expect(err).NotTo(HaveOccurred(), "Error waiting for queue open")
   396  
   397  		job := &e2eutil.JobSpec{
   398  			Tasks: []e2eutil.TaskSpec{
   399  				{
   400  					Img: e2eutil.DefaultNginxImage,
   401  					Req: e2eutil.CPU4Mem4,
   402  					Min: 1,
   403  					Rep: 1,
   404  				},
   405  			},
   406  			Name:  "reclaim-j4",
   407  			Queue: q3,
   408  		}
   409  		e2eutil.CreateJob(ctx, job)
   410  
   411  		time.Sleep(10 * time.Second)
   412  		By("Make sure all job running")
   413  
   414  		err = WaitQueueStatus(ctx, "Running", 1, q1)
   415  		Expect(err).NotTo(HaveOccurred(), "Error waiting for queue running")
   416  
   417  		err = WaitQueueStatus(ctx, "Running", 1, q2)
   418  		Expect(err).NotTo(HaveOccurred(), "Error waiting for queue running")
   419  
   420  		err = WaitQueueStatus(ctx, "Inqueue", 1, q3)
   421  		Expect(err).NotTo(HaveOccurred(), "Error waiting for queue Inqueue")
   422  	})
   423  
   424  	It("Reclaim Case 9:  New queue with job created, all queues.spec.reclaimable is false, no reclaim", func() {
   425  		q1 := e2eutil.DefaultQueue
   426  		q2 := "reclaim-q2"
   427  		ctx := e2eutil.InitTestContext(e2eutil.Options{
   428  			Queues:             []string{q2},
   429  			NodesNumLimit:      3,
   430  			NodesResourceLimit: e2eutil.CPU1Mem1,
   431  		})
   432  
   433  		defer e2eutil.CleanupTestContext(ctx)
   434  
   435  		By("Setup initial jobs")
   436  
   437  		_, err := CreateReclaimJob(ctx, e2eutil.CPU1Mem1, "reclaim-j1", q1, "", "", true)
   438  		Expect(err).NotTo(HaveOccurred(), "Wait for job1 failed")
   439  
   440  		_, err = CreateReclaimJob(ctx, e2eutil.CPU1Mem1, "reclaim-j2", q2, "", "", true)
   441  		Expect(err).NotTo(HaveOccurred(), "Wait for job2 failed")
   442  
   443  		By("Create new coming queue and job")
   444  		q3 := "reclaim-q3"
   445  		ctx.Queues = append(ctx.Queues, q3)
   446  		e2eutil.CreateQueues(ctx)
   447  
   448  		e2eutil.SetQueueReclaimable(ctx, []string{q1, q2}, false)
   449  		defer e2eutil.SetQueueReclaimable(ctx, []string{q1}, true)
   450  
   451  		err = WaitQueueStatus(ctx, "Open", 1, q1)
   452  		Expect(err).NotTo(HaveOccurred(), "Error waiting for queue open")
   453  
   454  		_, err = CreateReclaimJob(ctx, e2eutil.CPU1Mem1, "reclaim-j3", q3, "", "", true)
   455  		Expect(err).NotTo(HaveOccurred(), "Wait for job3 failed")
   456  
   457  		By("Make sure all job running")
   458  
   459  		err = WaitQueueStatus(ctx, "Running", 1, q1)
   460  		Expect(err).NotTo(HaveOccurred(), "Error waiting for queue running")
   461  
   462  		err = WaitQueueStatus(ctx, "Running", 1, q2)
   463  		Expect(err).NotTo(HaveOccurred(), "Error waiting for queue running")
   464  
   465  		err = WaitQueueStatus(ctx, "Running", 1, q3)
   466  		Expect(err).NotTo(HaveOccurred(), "Error waiting for queue running")
   467  
   468  	})
   469  
   470  	// Reclaim rely on priority is a bug here.
   471  	It("Reclaim Case 10: Multi reclaimed queue", func() {
   472  		q1 := e2eutil.DefaultQueue
   473  		q2 := "reclaim-q2"
   474  		q3 := "reclaim-q3"
   475  		q4 := "reclaim-q4"
   476  		ctx := e2eutil.InitTestContext(e2eutil.Options{
   477  			Queues:             []string{q2, q3, q4},
   478  			NodesNumLimit:      4,
   479  			NodesResourceLimit: e2eutil.CPU1Mem1,
   480  			PriorityClasses: map[string]int32{
   481  				"low-priority":  10,
   482  				"high-priority": 10000,
   483  			},
   484  		})
   485  
   486  		defer e2eutil.CleanupTestContext(ctx)
   487  
   488  		By("Setup initial jobs")
   489  
   490  		spec := &e2eutil.JobSpec{
   491  			Tasks: []e2eutil.TaskSpec{
   492  				{
   493  					Img:    e2eutil.DefaultNginxImage,
   494  					Req:    e2eutil.CPU1Mem1,
   495  					Min:    1,
   496  					Rep:    2,
   497  					Labels: map[string]string{schedulingv1beta1.PodPreemptable: "true"},
   498  				},
   499  			},
   500  		}
   501  
   502  		spec.Name = "reclaim-j1"
   503  		spec.Queue = q1
   504  		spec.Pri = "low-priority"
   505  		job1 := e2eutil.CreateJob(ctx, spec)
   506  		err := e2eutil.WaitJobReady(ctx, job1)
   507  		Expect(err).NotTo(HaveOccurred())
   508  
   509  		spec.Name = "reclaim-j2"
   510  		spec.Queue = q2
   511  		spec.Pri = "low-priority"
   512  		job2 := e2eutil.CreateJob(ctx, spec)
   513  		err = e2eutil.WaitJobReady(ctx, job2)
   514  		Expect(err).NotTo(HaveOccurred())
   515  
   516  		err = WaitQueueStatus(ctx, "Running", 1, q1)
   517  		Expect(err).NotTo(HaveOccurred(), "Error waiting for queue1 running")
   518  
   519  		err = WaitQueueStatus(ctx, "Running", 1, q2)
   520  		Expect(err).NotTo(HaveOccurred(), "Error waiting for queue2 running")
   521  
   522  		By("Create coming jobs")
   523  
   524  		_, err = CreateReclaimJob(ctx, e2eutil.CPU1Mem1, "reclaim-j3", q3, "high-priority", "", true)
   525  		Expect(err).NotTo(HaveOccurred(), "Wait for job3 failed")
   526  
   527  		_, err = CreateReclaimJob(ctx, e2eutil.CPU1Mem1, "reclaim-j4", q4, "high-priority", "", true)
   528  		Expect(err).NotTo(HaveOccurred(), "Wait for job4 failed")
   529  
   530  		By("Make sure all job running")
   531  
   532  		err = WaitQueueStatus(ctx, "Running", 1, q3)
   533  		Expect(err).NotTo(HaveOccurred(), "Error waiting for queue3 running")
   534  
   535  		err = WaitQueueStatus(ctx, "Running", 1, q4)
   536  		Expect(err).NotTo(HaveOccurred(), "Error waiting for queue4 running")
   537  
   538  	})
   539  
   540  	// Reclaim for capacity plugin.
   541  	It("Capacity Reclaim Case 11: Multi reclaimed queue", func() {
   542  		// First replace proportion with capacity plugin.
   543  		cmc := e2eutil.NewConfigMapCase("volcano-system", "integration-scheduler-configmap")
   544  		cmc.ChangeBy(func(data map[string]string) (changed bool, changedBefore map[string]string) {
   545  			vcScheConfStr, ok := data["volcano-scheduler-ci.conf"]
   546  			Expect(ok).To(BeTrue())
   547  
   548  			schedulerConf := &e2eutil.SchedulerConfiguration{}
   549  			err := yaml.Unmarshal([]byte(vcScheConfStr), schedulerConf)
   550  			Expect(err).NotTo(HaveOccurred())
   551  			for _, tier := range schedulerConf.Tiers {
   552  				for i, plugin := range tier.Plugins {
   553  					if plugin.Name == "proportion" {
   554  						tier.Plugins[i].Name = "capacity"
   555  						break
   556  					}
   557  				}
   558  			}
   559  
   560  			newVCScheConfBytes, err := yaml.Marshal(schedulerConf)
   561  			Expect(err).NotTo(HaveOccurred())
   562  
   563  			changed = true
   564  			changedBefore = make(map[string]string)
   565  			changedBefore["volcano-scheduler-ci.conf"] = vcScheConfStr
   566  			data["volcano-scheduler-ci.conf"] = string(newVCScheConfBytes)
   567  			return
   568  		})
   569  		defer cmc.UndoChanged()
   570  
   571  		q1 := "reclaim-q1"
   572  		q2 := "reclaim-q2"
   573  		q3 := "reclaim-q3"
   574  		q4 := "reclaim-q4"
   575  		ctx := e2eutil.InitTestContext(e2eutil.Options{
   576  			Queues:        []string{q1, q2, q3, q4},
   577  			NodesNumLimit: 4,
   578  			DeservedResource: map[string]v1.ResourceList{
   579  				q1: {v1.ResourceCPU: *resource.NewQuantity(1, resource.DecimalSI), v1.ResourceMemory: *resource.NewQuantity(1024*1024*1024, resource.BinarySI)},
   580  				q2: {v1.ResourceCPU: *resource.NewQuantity(1, resource.DecimalSI), v1.ResourceMemory: *resource.NewQuantity(1024*1024*1024, resource.BinarySI)},
   581  				q3: {v1.ResourceCPU: *resource.NewQuantity(2, resource.DecimalSI), v1.ResourceMemory: *resource.NewQuantity(2*1024*1024*1024, resource.BinarySI)},
   582  				q4: {v1.ResourceCPU: *resource.NewQuantity(4, resource.DecimalSI), v1.ResourceMemory: *resource.NewQuantity(4*1024*1024*1024, resource.BinarySI)},
   583  			},
   584  			NodesResourceLimit: e2eutil.CPU2Mem2,
   585  			PriorityClasses: map[string]int32{
   586  				"low-priority":  10,
   587  				"high-priority": 10000,
   588  			},
   589  		})
   590  
   591  		defer e2eutil.CleanupTestContext(ctx)
   592  
   593  		By("Setup initial jobs")
   594  
   595  		spec := &e2eutil.JobSpec{
   596  			Tasks: []e2eutil.TaskSpec{
   597  				{
   598  					Img:    e2eutil.DefaultNginxImage,
   599  					Req:    e2eutil.CPU1Mem1,
   600  					Min:    1,
   601  					Rep:    4,
   602  					Labels: map[string]string{schedulingv1beta1.PodPreemptable: "true"},
   603  				},
   604  			},
   605  		}
   606  
   607  		spec.Name = "reclaim-j1"
   608  		spec.Queue = q1
   609  		spec.Pri = "low-priority"
   610  		job1 := e2eutil.CreateJob(ctx, spec)
   611  		err := e2eutil.WaitJobReady(ctx, job1)
   612  		Expect(err).NotTo(HaveOccurred())
   613  
   614  		spec.Name = "reclaim-j2"
   615  		spec.Queue = q2
   616  		spec.Pri = "low-priority"
   617  		job2 := e2eutil.CreateJob(ctx, spec)
   618  		err = e2eutil.WaitJobReady(ctx, job2)
   619  		Expect(err).NotTo(HaveOccurred())
   620  
   621  		err = WaitQueueStatus(ctx, "Running", 1, q1)
   622  		Expect(err).NotTo(HaveOccurred(), "Error waiting for queue1 running")
   623  
   624  		err = WaitQueueStatus(ctx, "Running", 1, q2)
   625  		Expect(err).NotTo(HaveOccurred(), "Error waiting for queue2 running")
   626  
   627  		By("Create coming jobs")
   628  
   629  		_, err = CreateReclaimJob(ctx, e2eutil.CPU2Mem2, "reclaim-j3", q3, "high-priority", "", true)
   630  		Expect(err).NotTo(HaveOccurred(), "Wait for job3 failed")
   631  
   632  		_, err = CreateReclaimJob(ctx, e2eutil.CPU2Mem2, "reclaim-j4", q4, "high-priority", "", true)
   633  		Expect(err).NotTo(HaveOccurred(), "Wait for job4 failed")
   634  
   635  		_, err = CreateReclaimJob(ctx, e2eutil.CPU1Mem1, "reclaim-j5", q4, "high-priority", "", true)
   636  		Expect(err).NotTo(HaveOccurred(), "Wait for job5 failed")
   637  
   638  		_, err = CreateReclaimJob(ctx, e2eutil.CPU1Mem1, "reclaim-j6", q4, "high-priority", "", true)
   639  		Expect(err).NotTo(HaveOccurred(), "Wait for job6 failed")
   640  
   641  		By("Make sure all job running")
   642  
   643  		err = WaitQueueStatus(ctx, "Running", 1, q3)
   644  		Expect(err).NotTo(HaveOccurred(), "Error waiting for queue3 running")
   645  
   646  		err = WaitQueueStatus(ctx, "Running", 3, q4)
   647  		Expect(err).NotTo(HaveOccurred(), "Error waiting for queue4 running")
   648  
   649  	})
   650  
   651  	It("Reclaim", func() {
   652  		Skip("skip: the case has some problem")
   653  		q1, q2 := "reclaim-q1", "reclaim-q2"
   654  		ctx := e2eutil.InitTestContext(e2eutil.Options{
   655  			Queues: []string{q1, q2},
   656  			PriorityClasses: map[string]int32{
   657  				"low-priority":  10,
   658  				"high-priority": 10000,
   659  			},
   660  		})
   661  		defer e2eutil.CleanupTestContext(ctx)
   662  
   663  		slot := e2eutil.OneCPU
   664  		rep := e2eutil.ClusterSize(ctx, slot)
   665  
   666  		spec := &e2eutil.JobSpec{
   667  			Tasks: []e2eutil.TaskSpec{
   668  				{
   669  					Img:    e2eutil.DefaultNginxImage,
   670  					Req:    slot,
   671  					Min:    1,
   672  					Rep:    rep,
   673  					Labels: map[string]string{schedulingv1beta1.PodPreemptable: "true"},
   674  				},
   675  			},
   676  		}
   677  
   678  		spec.Name = "q1-qj-1"
   679  		spec.Queue = q1
   680  		spec.Pri = "low-priority"
   681  		job1 := e2eutil.CreateJob(ctx, spec)
   682  		err := e2eutil.WaitJobReady(ctx, job1)
   683  		Expect(err).NotTo(HaveOccurred())
   684  
   685  		err = e2eutil.WaitQueueStatus(func() (bool, error) {
   686  			queue, err := ctx.Vcclient.SchedulingV1beta1().Queues().Get(context.TODO(), q1, metav1.GetOptions{})
   687  			Expect(err).NotTo(HaveOccurred())
   688  			return queue.Status.Running == 1, nil
   689  		})
   690  		Expect(err).NotTo(HaveOccurred())
   691  
   692  		expected := int(rep) / 2
   693  		// Reduce one pod to tolerate decimal fraction.
   694  		if expected > 1 {
   695  			expected--
   696  		} else {
   697  			err := fmt.Errorf("expected replica <%d> is too small", expected)
   698  			Expect(err).NotTo(HaveOccurred())
   699  		}
   700  
   701  		spec.Name = "q2-qj-2"
   702  		spec.Queue = q2
   703  		spec.Pri = "high-priority"
   704  		job2 := e2eutil.CreateJob(ctx, spec)
   705  		err = e2eutil.WaitTasksReady(ctx, job2, expected)
   706  		Expect(err).NotTo(HaveOccurred())
   707  
   708  		err = e2eutil.WaitTasksReady(ctx, job1, expected)
   709  		Expect(err).NotTo(HaveOccurred())
   710  
   711  		// Test Queue status
   712  		spec = &e2eutil.JobSpec{
   713  			Name:  "q1-qj-2",
   714  			Queue: q1,
   715  			Tasks: []e2eutil.TaskSpec{
   716  				{
   717  					Img: e2eutil.DefaultNginxImage,
   718  					Req: slot,
   719  					Min: rep * 2,
   720  					Rep: rep * 2,
   721  				},
   722  			},
   723  		}
   724  		job3 := e2eutil.CreateJob(ctx, spec)
   725  		err = e2eutil.WaitJobStatePending(ctx, job3)
   726  		Expect(err).NotTo(HaveOccurred())
   727  		err = e2eutil.WaitQueueStatus(func() (bool, error) {
   728  			queue, err := ctx.Vcclient.SchedulingV1beta1().Queues().Get(context.TODO(), q1, metav1.GetOptions{})
   729  			Expect(err).NotTo(HaveOccurred())
   730  			return queue.Status.Pending == 1, nil
   731  		})
   732  		Expect(err).NotTo(HaveOccurred())
   733  	})
   734  })