volcano.sh/volcano@v1.9.0/pkg/scheduler/plugins/proportion/proportion_test.go (about)

     1  /*
     2  Copyright 2022 The Kubernetes Authors.
     3  Licensed under the Apache License, Version 2.0 (the "License");
     4  you may not use this file except in compliance with the License.
     5  You may obtain a copy of the License at
     6      http://www.apache.org/licenses/LICENSE-2.0
     7  Unless required by applicable law or agreed to in writing, software
     8  distributed under the License is distributed on an "AS IS" BASIS,
     9  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    10  See the License for the specific language governing permissions and
    11  limitations under the License.
    12  */
    13  
    14  package proportion
    15  
    16  import (
    17  	"io"
    18  	"net/http"
    19  	"reflect"
    20  	"strconv"
    21  	"strings"
    22  	"testing"
    23  	"time"
    24  
    25  	"github.com/agiledragon/gomonkey/v2"
    26  	"github.com/prometheus/client_golang/prometheus/promhttp"
    27  	apiv1 "k8s.io/api/core/v1"
    28  	schedulingv1 "k8s.io/api/scheduling/v1"
    29  	"k8s.io/apimachinery/pkg/api/resource"
    30  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    31  	"k8s.io/client-go/tools/record"
    32  	"k8s.io/client-go/util/workqueue"
    33  
    34  	schedulingv1beta1 "volcano.sh/apis/pkg/apis/scheduling/v1beta1"
    35  
    36  	"volcano.sh/volcano/cmd/scheduler/app/options"
    37  	"volcano.sh/volcano/pkg/scheduler/actions/allocate"
    38  	"volcano.sh/volcano/pkg/scheduler/api"
    39  	"volcano.sh/volcano/pkg/scheduler/cache"
    40  	"volcano.sh/volcano/pkg/scheduler/conf"
    41  	"volcano.sh/volcano/pkg/scheduler/framework"
    42  	"volcano.sh/volcano/pkg/scheduler/plugins/gang"
    43  	"volcano.sh/volcano/pkg/scheduler/plugins/priority"
    44  	"volcano.sh/volcano/pkg/scheduler/util"
    45  )
    46  
    47  func getWorkerAffinity() *apiv1.Affinity {
    48  	return &apiv1.Affinity{
    49  		PodAntiAffinity: &apiv1.PodAntiAffinity{
    50  			RequiredDuringSchedulingIgnoredDuringExecution: []apiv1.PodAffinityTerm{
    51  				{
    52  					LabelSelector: &metav1.LabelSelector{
    53  						MatchExpressions: []metav1.LabelSelectorRequirement{
    54  							{
    55  								Key:      "role",
    56  								Operator: "In",
    57  								Values:   []string{"worker"},
    58  							},
    59  						},
    60  					},
    61  					TopologyKey: "kubernetes.io/hostname",
    62  				},
    63  			},
    64  		},
    65  	}
    66  }
    67  
    68  func getLocalMetrics() int {
    69  	var data int
    70  
    71  	url := "http://127.0.0.1:8081/metrics"
    72  	method := "GET"
    73  
    74  	client := &http.Client{}
    75  	req, err := http.NewRequest(method, url, nil)
    76  
    77  	if err != nil {
    78  		return data
    79  	}
    80  	req.Header.Add("Authorization", "8cbdb37a-b880-4f2e-844c-e420858ea7eb")
    81  
    82  	res, err := client.Do(req)
    83  	if err != nil {
    84  		return data
    85  	}
    86  	defer res.Body.Close()
    87  
    88  	body, err := io.ReadAll(res.Body)
    89  	if err != nil {
    90  		return data
    91  	}
    92  
    93  	split := strings.Split(string(body), "\n")
    94  	for _, v := range split {
    95  		if !strings.Contains(v, "#") && (strings.Contains(v, "volcano_queue_allocated_memory_bytes") || strings.Contains(v, "volcano_queue_allocated_milli_cpu")) {
    96  			data, _ = strconv.Atoi(strings.Split(v, " ")[1])
    97  		}
    98  	}
    99  
   100  	return data
   101  }
   102  
   103  func TestProportion(t *testing.T) {
   104  	c := make(chan bool, 1)
   105  	var tmp *cache.SchedulerCache
   106  	patches := gomonkey.ApplyMethod(reflect.TypeOf(tmp), "AddBindTask", func(scCache *cache.SchedulerCache, task *api.TaskInfo) error {
   107  		scCache.Binder.Bind(nil, []*api.TaskInfo{task})
   108  		return nil
   109  	})
   110  	defer patches.Reset()
   111  
   112  	patchUpdateQueueStatus := gomonkey.ApplyMethod(reflect.TypeOf(tmp), "UpdateQueueStatus", func(scCache *cache.SchedulerCache, queue *api.QueueInfo) error {
   113  		return nil
   114  	})
   115  	defer patchUpdateQueueStatus.Reset()
   116  
   117  	framework.RegisterPluginBuilder(PluginName, New)
   118  	framework.RegisterPluginBuilder(gang.PluginName, gang.New)
   119  	framework.RegisterPluginBuilder(priority.PluginName, priority.New)
   120  	options.ServerOpts = options.NewServerOption()
   121  	defer framework.CleanupPluginBuilders()
   122  
   123  	// Running pods
   124  	w1 := util.BuildPod("ns1", "worker-1", "", apiv1.PodRunning, api.BuildResourceList("3", "3k"), "pg1", map[string]string{"role": "worker"}, map[string]string{"selector": "worker"})
   125  	w2 := util.BuildPod("ns1", "worker-2", "", apiv1.PodRunning, api.BuildResourceList("5", "5k"), "pg1", map[string]string{"role": "worker"}, map[string]string{})
   126  	w3 := util.BuildPod("ns1", "worker-3", "", apiv1.PodRunning, api.BuildResourceList("4", "4k"), "pg2", map[string]string{"role": "worker"}, map[string]string{})
   127  	w4 := util.BuildPod("ns1", "rdma-demo", "", apiv1.PodRunning, api.BuildResourceList("1", "1k", []api.ScalarResource{{Name: "nvidia.com/gpu", Value: "1"}, {Name: "rdma/hca", Value: "1"}}...), "pg3", map[string]string{}, map[string]string{})
   128  	w1.Spec.Affinity = getWorkerAffinity()
   129  	w2.Spec.Affinity = getWorkerAffinity()
   130  	w3.Spec.Affinity = getWorkerAffinity()
   131  
   132  	// nodes
   133  	n1 := util.BuildNode("node1", api.BuildResourceList("4", "4k", []api.ScalarResource{{Name: "pods", Value: "10"}}...), map[string]string{"selector": "worker"})
   134  	n2 := util.BuildNode("node2", api.BuildResourceList("3", "3k", []api.ScalarResource{{Name: "pods", Value: "10"}}...), map[string]string{})
   135  	n3 := util.BuildNode("node3", api.BuildResourceList("4", "4k", []api.ScalarResource{{Name: "pods", Value: "10"}, {Name: "nvidia.com/gpu", Value: "8"}, {Name: "rdma/hca", Value: "1k"}}...), map[string]string{})
   136  	n1.Status.Allocatable["pods"] = resource.MustParse("15")
   137  	n2.Status.Allocatable["pods"] = resource.MustParse("15")
   138  	n3.Status.Allocatable["pods"] = resource.MustParse("15")
   139  	n1.Labels["kubernetes.io/hostname"] = "node1"
   140  	n2.Labels["kubernetes.io/hostname"] = "node2"
   141  	n3.Labels["kubernetes.io/hostname"] = "node3"
   142  
   143  	// priority
   144  	p1 := &schedulingv1.PriorityClass{ObjectMeta: metav1.ObjectMeta{Name: "p1"}, Value: 1}
   145  	p2 := &schedulingv1.PriorityClass{ObjectMeta: metav1.ObjectMeta{Name: "p2"}, Value: 2}
   146  	// podgroup
   147  	pg1 := &schedulingv1beta1.PodGroup{
   148  		ObjectMeta: metav1.ObjectMeta{
   149  			Namespace: "ns1",
   150  			Name:      "pg1",
   151  		},
   152  		Spec: schedulingv1beta1.PodGroupSpec{
   153  			Queue:             "q1",
   154  			MinMember:         int32(2),
   155  			PriorityClassName: p2.Name,
   156  		},
   157  	}
   158  	pg2 := &schedulingv1beta1.PodGroup{
   159  		ObjectMeta: metav1.ObjectMeta{
   160  			Namespace: "ns1",
   161  			Name:      "pg2",
   162  		},
   163  		Spec: schedulingv1beta1.PodGroupSpec{
   164  			Queue:             "q1",
   165  			MinMember:         int32(1),
   166  			PriorityClassName: p1.Name,
   167  		},
   168  	}
   169  	pgRes3 := api.BuildResourceList("1", "1k", []api.ScalarResource{{Name: "nvidia.com/gpu", Value: "1"}, {Name: "rdma/hca", Value: "1"}}...)
   170  	pg3 := &schedulingv1beta1.PodGroup{
   171  		ObjectMeta: metav1.ObjectMeta{
   172  			Namespace: "ns1",
   173  			Name:      "pg3",
   174  		},
   175  		Spec: schedulingv1beta1.PodGroupSpec{
   176  			Queue:             "q2",
   177  			MinMember:         int32(1),
   178  			PriorityClassName: p1.Name,
   179  			MinResources:      &pgRes3,
   180  		},
   181  	}
   182  
   183  	// queue
   184  	queue1 := &schedulingv1beta1.Queue{
   185  		ObjectMeta: metav1.ObjectMeta{
   186  			Name: "q1",
   187  		},
   188  	}
   189  
   190  	// queue
   191  	queue2 := &schedulingv1beta1.Queue{
   192  		ObjectMeta: metav1.ObjectMeta{
   193  			Name: "q2",
   194  		},
   195  		Spec: schedulingv1beta1.QueueSpec{
   196  			Capability: api.BuildResourceList("2", "2k", []api.ScalarResource{{Name: "pods", Value: "10"}, {Name: "nvidia.com/gpu", Value: "4"}}...),
   197  		},
   198  	}
   199  
   200  	// tests
   201  	tests := []struct {
   202  		name     string
   203  		pods     []*apiv1.Pod
   204  		nodes    []*apiv1.Node
   205  		pcs      []*schedulingv1.PriorityClass
   206  		pgs      []*schedulingv1beta1.PodGroup
   207  		expected map[string]string
   208  	}{
   209  		{
   210  			name:  "pod-deallocate",
   211  			pods:  []*apiv1.Pod{w1, w2, w3},
   212  			nodes: []*apiv1.Node{n1, n2},
   213  			pcs:   []*schedulingv1.PriorityClass{p1, p2},
   214  			pgs:   []*schedulingv1beta1.PodGroup{pg1, pg2},
   215  			expected: map[string]string{ // podKey -> node
   216  				"ns1/worker-3": "node1",
   217  			},
   218  		},
   219  		{
   220  			name:  "realcapability-test",
   221  			pods:  []*apiv1.Pod{w1, w2, w3, w4},
   222  			nodes: []*apiv1.Node{n1, n2, n3},
   223  			pcs:   []*schedulingv1.PriorityClass{p1, p2},
   224  			pgs:   []*schedulingv1beta1.PodGroup{pg1, pg2, pg3},
   225  			expected: map[string]string{ // podKey -> node
   226  				"ns1/rdma-demo": "node3",
   227  			},
   228  		},
   229  	}
   230  
   231  	for _, test := range tests {
   232  		// initialize schedulerCache
   233  		binder := &util.FakeBinder{
   234  			Binds:   map[string]string{},
   235  			Channel: make(chan string),
   236  		}
   237  		recorder := record.NewFakeRecorder(100)
   238  		go func() {
   239  			for {
   240  				event := <-recorder.Events
   241  				t.Logf("%s: [Event] %s", test.name, event)
   242  			}
   243  		}()
   244  		schedulerCache := &cache.SchedulerCache{
   245  			Nodes:           make(map[string]*api.NodeInfo),
   246  			Jobs:            make(map[api.JobID]*api.JobInfo),
   247  			PriorityClasses: make(map[string]*schedulingv1.PriorityClass),
   248  			Queues:          make(map[api.QueueID]*api.QueueInfo),
   249  			Binder:          binder,
   250  			StatusUpdater:   &util.FakeStatusUpdater{},
   251  			VolumeBinder:    &util.FakeVolumeBinder{},
   252  			Recorder:        recorder,
   253  		}
   254  		// deletedJobs to DeletedJobs
   255  		schedulerCache.DeletedJobs = workqueue.NewRateLimitingQueue(workqueue.DefaultControllerRateLimiter())
   256  
   257  		for _, node := range test.nodes {
   258  			schedulerCache.AddOrUpdateNode(node)
   259  		}
   260  		for _, pod := range test.pods {
   261  			schedulerCache.AddPod(pod)
   262  		}
   263  		for _, pc := range test.pcs {
   264  			schedulerCache.PriorityClasses[pc.Name] = pc
   265  		}
   266  		for _, pg := range test.pgs {
   267  			pg.Status = schedulingv1beta1.PodGroupStatus{
   268  				Phase: schedulingv1beta1.PodGroupInqueue,
   269  			}
   270  			schedulerCache.AddPodGroupV1beta1(pg)
   271  		}
   272  		schedulerCache.AddQueueV1beta1(queue1)
   273  		schedulerCache.AddQueueV1beta1(queue2)
   274  		// session
   275  		trueValue := true
   276  
   277  		num := 1
   278  		// proportion
   279  		go func() {
   280  			for {
   281  				select {
   282  				default:
   283  					ssn := framework.OpenSession(schedulerCache, []conf.Tier{
   284  						{
   285  							Plugins: []conf.PluginOption{
   286  								{
   287  									Name:             PluginName,
   288  									EnabledPredicate: &trueValue,
   289  								},
   290  								{
   291  									Name:                gang.PluginName,
   292  									EnabledJobReady:     &trueValue,
   293  									EnabledJobPipelined: &trueValue,
   294  								},
   295  								{
   296  									Name:            priority.PluginName,
   297  									EnabledJobOrder: &trueValue,
   298  								},
   299  							},
   300  						},
   301  					}, nil)
   302  
   303  					allocator := allocate.New()
   304  					allocator.Execute(ssn)
   305  					framework.CloseSession(ssn)
   306  					time.Sleep(time.Second * 3)
   307  					if num == 1 {
   308  						metrics := getLocalMetrics()
   309  						if metrics == 12000 {
   310  							t.Logf("init queue_allocated metrics is ok,%v", metrics)
   311  						}
   312  						schedulerCache.DeletePodGroupV1beta1(pg1)
   313  					} else if num == 2 {
   314  						metrics := getLocalMetrics()
   315  						if metrics == 4000 {
   316  							t.Logf("after delete vcjob pg1, queue_allocated metrics is ok,%v", metrics)
   317  						}
   318  						schedulerCache.DeletePodGroupV1beta1(pg2)
   319  					} else {
   320  						metrics := getLocalMetrics()
   321  						if metrics != 0 {
   322  							t.Errorf("after delete vcjob pg2, queue_allocated metrics is fail,%v", metrics)
   323  							c <- false
   324  							return
   325  						}
   326  						t.Logf("after delete vcjob pg2, queue_allocated metrics is ok,%v", metrics)
   327  						c <- true
   328  					}
   329  					num++
   330  				}
   331  			}
   332  		}()
   333  
   334  		go func() {
   335  			http.Handle("/metrics", promhttp.Handler())
   336  			err := http.ListenAndServe(":8081", nil)
   337  			if err != nil {
   338  				t.Errorf("ListenAndServe() err = %v", err.Error())
   339  			}
   340  		}()
   341  
   342  		for {
   343  			select {
   344  			case res := <-c:
   345  				if !res {
   346  					t.Error("TestProportion failed")
   347  				} else {
   348  					t.Log("TestProportion successful")
   349  				}
   350  				return
   351  			}
   352  
   353  		}
   354  	}
   355  }