volcano.sh/volcano@v1.9.0/pkg/scheduler/actions/preempt/preempt_test.go (about)

     1  /*
     2  Copyright 2018 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package preempt
    18  
    19  import (
    20  	"reflect"
    21  	"testing"
    22  	"time"
    23  
    24  	"github.com/agiledragon/gomonkey/v2"
    25  	v1 "k8s.io/api/core/v1"
    26  	schedulingv1 "k8s.io/api/scheduling/v1"
    27  	"k8s.io/client-go/tools/record"
    28  
    29  	schedulingv1beta1 "volcano.sh/apis/pkg/apis/scheduling/v1beta1"
    30  	"volcano.sh/volcano/cmd/scheduler/app/options"
    31  	"volcano.sh/volcano/pkg/scheduler/api"
    32  	"volcano.sh/volcano/pkg/scheduler/cache"
    33  	"volcano.sh/volcano/pkg/scheduler/conf"
    34  	"volcano.sh/volcano/pkg/scheduler/framework"
    35  	"volcano.sh/volcano/pkg/scheduler/plugins/conformance"
    36  	"volcano.sh/volcano/pkg/scheduler/plugins/gang"
    37  	"volcano.sh/volcano/pkg/scheduler/plugins/priority"
    38  	"volcano.sh/volcano/pkg/scheduler/plugins/proportion"
    39  	"volcano.sh/volcano/pkg/scheduler/util"
    40  )
    41  
    42  func TestPreempt(t *testing.T) {
    43  	var tmp *cache.SchedulerCache
    44  	patchUpdateQueueStatus := gomonkey.ApplyMethod(reflect.TypeOf(tmp), "UpdateQueueStatus", func(scCache *cache.SchedulerCache, queue *api.QueueInfo) error {
    45  		return nil
    46  	})
    47  	defer patchUpdateQueueStatus.Reset()
    48  
    49  	framework.RegisterPluginBuilder(conformance.PluginName, conformance.New)
    50  	framework.RegisterPluginBuilder(gang.PluginName, gang.New)
    51  	framework.RegisterPluginBuilder(priority.PluginName, priority.New)
    52  	framework.RegisterPluginBuilder(proportion.PluginName, proportion.New)
    53  	options.ServerOpts = &options.ServerOption{
    54  		MinNodesToFind:             100,
    55  		MinPercentageOfNodesToFind: 5,
    56  		PercentageOfNodesToFind:    100,
    57  	}
    58  	defer framework.CleanupPluginBuilders()
    59  
    60  	tests := []struct {
    61  		name      string
    62  		podGroups []*schedulingv1beta1.PodGroup
    63  		pods      []*v1.Pod
    64  		nodes     []*v1.Node
    65  		queues    []*schedulingv1beta1.Queue
    66  		expected  int
    67  	}{
    68  		{
    69  			name: "do not preempt if there are enough idle resources",
    70  			podGroups: []*schedulingv1beta1.PodGroup{
    71  				util.BuildPodGroup("pg1", "c1", "q1", 3, map[string]int32{"": 3}, schedulingv1beta1.PodGroupInqueue),
    72  			},
    73  			pods: []*v1.Pod{
    74  				util.BuildPod("c1", "preemptee1", "n1", v1.PodRunning, api.BuildResourceList("1", "1G"), "pg1", make(map[string]string), make(map[string]string)),
    75  				util.BuildPod("c1", "preemptee2", "n1", v1.PodRunning, api.BuildResourceList("1", "1G"), "pg1", make(map[string]string), make(map[string]string)),
    76  				util.BuildPod("c1", "preemptor1", "", v1.PodPending, api.BuildResourceList("1", "1G"), "pg1", make(map[string]string), make(map[string]string)),
    77  			},
    78  			// If there are enough idle resources on the node, then there is no need to preempt anything.
    79  			nodes: []*v1.Node{
    80  				util.BuildNode("n1", api.BuildResourceList("10", "10G", []api.ScalarResource{{Name: "pods", Value: "10"}}...), make(map[string]string)),
    81  			},
    82  			queues: []*schedulingv1beta1.Queue{
    83  				util.BuildQueue("q1", 1, nil),
    84  			},
    85  			expected: 0,
    86  		},
    87  		{
    88  			name: "do not preempt if job is pipelined",
    89  			podGroups: []*schedulingv1beta1.PodGroup{
    90  				util.BuildPodGroup("pg1", "c1", "q1", 1, map[string]int32{"": 2}, schedulingv1beta1.PodGroupInqueue),
    91  				util.BuildPodGroup("pg2", "c1", "q1", 1, map[string]int32{"": 2}, schedulingv1beta1.PodGroupInqueue),
    92  			},
    93  			// Both pg1 and pg2 jobs are pipelined, because enough pods are already running.
    94  			pods: []*v1.Pod{
    95  				util.BuildPod("c1", "preemptee1", "n1", v1.PodRunning, api.BuildResourceList("1", "1G"), "pg1", make(map[string]string), make(map[string]string)),
    96  				util.BuildPod("c1", "preemptee2", "n1", v1.PodRunning, api.BuildResourceList("1", "1G"), "pg1", make(map[string]string), make(map[string]string)),
    97  				util.BuildPod("c1", "preemptee3", "n1", v1.PodRunning, api.BuildResourceList("1", "1G"), "pg2", make(map[string]string), make(map[string]string)),
    98  				util.BuildPod("c1", "preemptor2", "", v1.PodPending, api.BuildResourceList("1", "1G"), "pg2", make(map[string]string), make(map[string]string)),
    99  			},
   100  			// All resources on the node will be in use.
   101  			nodes: []*v1.Node{
   102  				util.BuildNode("n1", api.BuildResourceList("3", "3G", []api.ScalarResource{{Name: "pods", Value: "10"}}...), make(map[string]string)),
   103  			},
   104  			queues: []*schedulingv1beta1.Queue{
   105  				util.BuildQueue("q1", 1, nil),
   106  			},
   107  			expected: 0,
   108  		},
   109  		{
   110  			name: "preempt one task of different job to fit both jobs on one node",
   111  			podGroups: []*schedulingv1beta1.PodGroup{
   112  				util.BuildPodGroupWithPrio("pg1", "c1", "q1", 1, map[string]int32{"": 2}, schedulingv1beta1.PodGroupInqueue, "low-priority"),
   113  				util.BuildPodGroupWithPrio("pg2", "c1", "q1", 1, map[string]int32{"": 2}, schedulingv1beta1.PodGroupInqueue, "high-priority"),
   114  			},
   115  			pods: []*v1.Pod{
   116  				util.BuildPod("c1", "preemptee1", "n1", v1.PodRunning, api.BuildResourceList("1", "1G"), "pg1", map[string]string{schedulingv1beta1.PodPreemptable: "true"}, make(map[string]string)),
   117  				util.BuildPod("c1", "preemptee2", "n1", v1.PodRunning, api.BuildResourceList("1", "1G"), "pg1", make(map[string]string), make(map[string]string)),
   118  				util.BuildPod("c1", "preemptor1", "", v1.PodPending, api.BuildResourceList("1", "1G"), "pg2", make(map[string]string), make(map[string]string)),
   119  				util.BuildPod("c1", "preemptor2", "", v1.PodPending, api.BuildResourceList("1", "1G"), "pg2", make(map[string]string), make(map[string]string)),
   120  			},
   121  			nodes: []*v1.Node{
   122  				util.BuildNode("n1", api.BuildResourceList("2", "2G", []api.ScalarResource{{Name: "pods", Value: "10"}}...), make(map[string]string)),
   123  			},
   124  			queues: []*schedulingv1beta1.Queue{
   125  				util.BuildQueue("q1", 1, nil),
   126  			},
   127  			expected: 1,
   128  		},
   129  		{
   130  			name: "preempt enough tasks to fit large task of different job",
   131  			podGroups: []*schedulingv1beta1.PodGroup{
   132  				util.BuildPodGroupWithPrio("pg1", "c1", "q1", 1, map[string]int32{"": 3}, schedulingv1beta1.PodGroupInqueue, "low-priority"),
   133  				util.BuildPodGroupWithPrio("pg2", "c1", "q1", 1, map[string]int32{"": 1}, schedulingv1beta1.PodGroupInqueue, "high-priority"),
   134  			},
   135  			// There are 3 cpus and 3G of memory idle and 3 tasks running each consuming 1 cpu and 1G of memory.
   136  			// Big task requiring 5 cpus and 5G of memory should preempt 2 of 3 running tasks to fit into the node.
   137  			pods: []*v1.Pod{
   138  				util.BuildPod("c1", "preemptee1", "n1", v1.PodRunning, api.BuildResourceList("1", "1G"), "pg1", map[string]string{schedulingv1beta1.PodPreemptable: "true"}, make(map[string]string)),
   139  				util.BuildPod("c1", "preemptee2", "n1", v1.PodRunning, api.BuildResourceList("1", "1G"), "pg1", map[string]string{schedulingv1beta1.PodPreemptable: "true"}, make(map[string]string)),
   140  				util.BuildPod("c1", "preemptee3", "n1", v1.PodRunning, api.BuildResourceList("1", "1G"), "pg1", map[string]string{schedulingv1beta1.PodPreemptable: "true"}, make(map[string]string)),
   141  				util.BuildPod("c1", "preemptor1", "", v1.PodPending, api.BuildResourceList("5", "5G"), "pg2", make(map[string]string), make(map[string]string)),
   142  			},
   143  			nodes: []*v1.Node{
   144  				util.BuildNode("n1", api.BuildResourceList("6", "6G", []api.ScalarResource{{Name: "pods", Value: "10"}}...), make(map[string]string)),
   145  			},
   146  			queues: []*schedulingv1beta1.Queue{
   147  				util.BuildQueue("q1", 1, nil),
   148  			},
   149  			expected: 2,
   150  		},
   151  		{
   152  			// case about #3161
   153  			name: "preempt low priority job in same queue",
   154  			podGroups: []*schedulingv1beta1.PodGroup{
   155  				util.BuildPodGroupWithPrio("pg1", "c1", "q1", 0, map[string]int32{}, schedulingv1beta1.PodGroupInqueue, "low-priority"),
   156  				util.BuildPodGroupWithPrio("pg2", "c1", "q1", 1, map[string]int32{"": 1}, schedulingv1beta1.PodGroupInqueue, "high-priority"),
   157  			},
   158  			pods: []*v1.Pod{
   159  				util.BuildPod("c1", "preemptee1", "n1", v1.PodRunning, api.BuildResourceList("3", "3G"), "pg1", map[string]string{schedulingv1beta1.PodPreemptable: "true"}, make(map[string]string)),
   160  				util.BuildPod("c1", "preemptor1", "", v1.PodPending, api.BuildResourceList("3", "3G"), "pg2", make(map[string]string), make(map[string]string)),
   161  			},
   162  			nodes: []*v1.Node{
   163  				util.BuildNode("n1", api.BuildResourceList("12", "12G", []api.ScalarResource{{Name: "pods", Value: "10"}}...), make(map[string]string)),
   164  			},
   165  			queues: []*schedulingv1beta1.Queue{
   166  				util.BuildQueue("q1", 1, api.BuildResourceList("4", "4G")),
   167  			},
   168  			expected: 1,
   169  		},
   170  		{
   171  			// case about #3161
   172  			name: "preempt low priority job in same queue: allocatable and has enough resource, don't preempt",
   173  			podGroups: []*schedulingv1beta1.PodGroup{
   174  				util.BuildPodGroupWithPrio("pg1", "c1", "q1", 1, map[string]int32{}, schedulingv1beta1.PodGroupInqueue, "low-priority"),
   175  				util.BuildPodGroupWithPrio("pg2", "c1", "q1", 1, map[string]int32{"": 1}, schedulingv1beta1.PodGroupInqueue, "high-priority"),
   176  			},
   177  			pods: []*v1.Pod{
   178  				util.BuildPod("c1", "preemptee1", "n1", v1.PodRunning, api.BuildResourceList("3", "3G"), "pg1", map[string]string{schedulingv1beta1.PodPreemptable: "true"}, make(map[string]string)),
   179  				util.BuildPod("c1", "preemptor1", "", v1.PodPending, api.BuildResourceList("3", "3G"), "pg2", make(map[string]string), make(map[string]string)),
   180  			},
   181  			nodes: []*v1.Node{
   182  				util.BuildNode("n1", api.BuildResourceList("12", "12G", []api.ScalarResource{{Name: "pods", Value: "10"}}...), make(map[string]string)),
   183  			},
   184  			queues: []*schedulingv1beta1.Queue{
   185  				util.BuildQueue("q1", 1, api.BuildResourceList("6", "6G")),
   186  			},
   187  			expected: 0,
   188  		},
   189  		{
   190  			// case about issue #2232
   191  			name: "preempt low priority job in same queue",
   192  			podGroups: []*schedulingv1beta1.PodGroup{
   193  				util.BuildPodGroupWithPrio("pg1", "c1", "q1", 1, map[string]int32{}, schedulingv1beta1.PodGroupInqueue, "low-priority"),
   194  				util.BuildPodGroupWithPrio("pg2", "c1", "q1", 1, map[string]int32{"": 1}, schedulingv1beta1.PodGroupInqueue, "high-priority"),
   195  			},
   196  			pods: []*v1.Pod{
   197  				util.BuildPod("c1", "preemptee1", "n1", v1.PodRunning, api.BuildResourceList("1", "1G"), "pg1", map[string]string{schedulingv1beta1.PodPreemptable: "true"}, make(map[string]string)),
   198  				util.BuildPod("c1", "preemptee2", "n1", v1.PodRunning, api.BuildResourceList("1", "1G"), "pg1", map[string]string{schedulingv1beta1.PodPreemptable: "true"}, make(map[string]string)),
   199  				util.BuildPod("c1", "preemptee3", "n1", v1.PodRunning, api.BuildResourceList("1", "1G"), "pg1", map[string]string{schedulingv1beta1.PodPreemptable: "true"}, make(map[string]string)),
   200  				util.BuildPod("c1", "preemptor1", "", v1.PodPending, api.BuildResourceList("1", "1G"), "pg2", make(map[string]string), make(map[string]string)),
   201  			},
   202  			nodes: []*v1.Node{
   203  				util.BuildNode("n1", api.BuildResourceList("12", "12G", []api.ScalarResource{{Name: "pods", Value: "10"}}...), make(map[string]string)),
   204  			},
   205  			queues: []*schedulingv1beta1.Queue{
   206  				util.BuildQueue("q1", 1, api.BuildResourceList("3", "3G")),
   207  			},
   208  			expected: 1,
   209  		},
   210  		{
   211  			// case about #3335
   212  			name: "unBestEffort high-priority pod preempt BestEffort low-priority pod in same queue",
   213  			podGroups: []*schedulingv1beta1.PodGroup{
   214  				util.BuildPodGroupWithPrio("pg1", "c1", "q1", 0, map[string]int32{}, schedulingv1beta1.PodGroupInqueue, "low-priority"),
   215  				util.BuildPodGroupWithPrio("pg2", "c1", "q1", 1, map[string]int32{"": 1}, schedulingv1beta1.PodGroupInqueue, "high-priority"),
   216  			},
   217  			pods: []*v1.Pod{
   218  				util.BuildPod("c1", "preemptee1", "n1", v1.PodRunning, v1.ResourceList{}, "pg1", map[string]string{schedulingv1beta1.PodPreemptable: "true"}, make(map[string]string)),
   219  				util.BuildPod("c1", "preemptor1", "", v1.PodPending, api.BuildResourceList("3", "3G"), "pg2", make(map[string]string), make(map[string]string)),
   220  			},
   221  			nodes: []*v1.Node{
   222  				util.BuildNode("n1", api.BuildResourceList("12", "12G", []api.ScalarResource{{Name: "pods", Value: "1"}}...), make(map[string]string)),
   223  			},
   224  			queues: []*schedulingv1beta1.Queue{
   225  				util.BuildQueue("q1", 1, api.BuildResourceList("6", "6G")),
   226  			},
   227  			expected: 1,
   228  		},
   229  		{
   230  			// case about #3335
   231  			name: "BestEffort high-priority pod preempt BestEffort low-priority pod in same queue",
   232  			podGroups: []*schedulingv1beta1.PodGroup{
   233  				util.BuildPodGroupWithPrio("pg1", "c1", "q1", 0, map[string]int32{}, schedulingv1beta1.PodGroupInqueue, "low-priority"),
   234  				util.BuildPodGroupWithPrio("pg2", "c1", "q1", 1, map[string]int32{"": 1}, schedulingv1beta1.PodGroupInqueue, "high-priority"),
   235  			},
   236  			pods: []*v1.Pod{
   237  				util.BuildPod("c1", "preemptee1", "n1", v1.PodRunning, v1.ResourceList{}, "pg1", map[string]string{schedulingv1beta1.PodPreemptable: "true"}, make(map[string]string)),
   238  				util.BuildPod("c1", "preemptor1", "", v1.PodPending, v1.ResourceList{}, "pg2", make(map[string]string), make(map[string]string)),
   239  			},
   240  			nodes: []*v1.Node{
   241  				util.BuildNode("n1", api.BuildResourceList("12", "12G", []api.ScalarResource{{Name: "pods", Value: "1"}}...), make(map[string]string)),
   242  			},
   243  			queues: []*schedulingv1beta1.Queue{
   244  				util.BuildQueue("q1", 1, api.BuildResourceList("6", "6G")),
   245  			},
   246  			expected: 1,
   247  		},
   248  	}
   249  
   250  	preempt := New()
   251  
   252  	for _, test := range tests {
   253  		t.Run(test.name, func(t *testing.T) {
   254  			binder := &util.FakeBinder{
   255  				Binds:   map[string]string{},
   256  				Channel: make(chan string),
   257  			}
   258  			evictor := &util.FakeEvictor{
   259  				Channel: make(chan string),
   260  			}
   261  			schedulerCache := &cache.SchedulerCache{
   262  				Nodes:           make(map[string]*api.NodeInfo),
   263  				Jobs:            make(map[api.JobID]*api.JobInfo),
   264  				Queues:          make(map[api.QueueID]*api.QueueInfo),
   265  				Binder:          binder,
   266  				Evictor:         evictor,
   267  				StatusUpdater:   &util.FakeStatusUpdater{},
   268  				VolumeBinder:    &util.FakeVolumeBinder{},
   269  				PriorityClasses: make(map[string]*schedulingv1.PriorityClass),
   270  
   271  				Recorder: record.NewFakeRecorder(100),
   272  			}
   273  			schedulerCache.PriorityClasses["high-priority"] = &schedulingv1.PriorityClass{
   274  				Value: 100000,
   275  			}
   276  			schedulerCache.PriorityClasses["low-priority"] = &schedulingv1.PriorityClass{
   277  				Value: 10,
   278  			}
   279  			for _, node := range test.nodes {
   280  				schedulerCache.AddOrUpdateNode(node)
   281  			}
   282  			for _, pod := range test.pods {
   283  				schedulerCache.AddPod(pod)
   284  			}
   285  
   286  			for _, ss := range test.podGroups {
   287  				schedulerCache.AddPodGroupV1beta1(ss)
   288  			}
   289  
   290  			for _, q := range test.queues {
   291  				schedulerCache.AddQueueV1beta1(q)
   292  			}
   293  
   294  			trueValue := true
   295  			ssn := framework.OpenSession(schedulerCache, []conf.Tier{
   296  				{
   297  					Plugins: []conf.PluginOption{
   298  						{
   299  							Name:               conformance.PluginName,
   300  							EnabledPreemptable: &trueValue,
   301  						},
   302  						{
   303  							Name:                gang.PluginName,
   304  							EnabledPreemptable:  &trueValue,
   305  							EnabledJobPipelined: &trueValue,
   306  							EnabledJobStarving:  &trueValue,
   307  						},
   308  						{
   309  							Name:                priority.PluginName,
   310  							EnabledTaskOrder:    &trueValue,
   311  							EnabledJobOrder:     &trueValue,
   312  							EnabledPreemptable:  &trueValue,
   313  							EnabledJobPipelined: &trueValue,
   314  							EnabledJobStarving:  &trueValue,
   315  						},
   316  						{
   317  							Name:               proportion.PluginName,
   318  							EnabledOverused:    &trueValue,
   319  							EnabledAllocatable: &trueValue,
   320  							EnabledQueueOrder:  &trueValue,
   321  						},
   322  					},
   323  				},
   324  			}, nil)
   325  			defer framework.CloseSession(ssn)
   326  
   327  			preempt.Execute(ssn)
   328  
   329  			for i := 0; i < test.expected; i++ {
   330  				select {
   331  				case <-evictor.Channel:
   332  				case <-time.After(time.Second):
   333  					t.Errorf("not enough evictions")
   334  				}
   335  			}
   336  			select {
   337  			case key, opened := <-evictor.Channel:
   338  				if opened {
   339  					t.Errorf("unexpected eviction: %s", key)
   340  				}
   341  			case <-time.After(50 * time.Millisecond):
   342  				// TODO: Active waiting here is not optimal, but there is no better way currently.
   343  				//	 Ideally we would like to wait for evict and bind request goroutines to finish first.
   344  			}
   345  		})
   346  	}
   347  }