volcano.sh/volcano@v1.9.0/test/e2e/util/util.go (about)

     1  /*
     2  Copyright 2021 The Volcano Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package util
    18  
    19  import (
    20  	"context"
    21  	"os"
    22  	"path/filepath"
    23  	"strconv"
    24  	"time"
    25  
    26  	lagencyerror "errors"
    27  
    28  	. "github.com/onsi/ginkgo/v2"
    29  	. "github.com/onsi/gomega"
    30  
    31  	corev1 "k8s.io/api/core/v1"
    32  	v1 "k8s.io/api/core/v1"
    33  	schedv1 "k8s.io/api/scheduling/v1"
    34  	"k8s.io/apimachinery/pkg/api/errors"
    35  	"k8s.io/apimachinery/pkg/api/resource"
    36  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    37  	"k8s.io/apimachinery/pkg/labels"
    38  	"k8s.io/apimachinery/pkg/util/wait"
    39  	"k8s.io/client-go/kubernetes"
    40  
    41  	vcclient "volcano.sh/apis/pkg/client/clientset/versioned"
    42  
    43  	"volcano.sh/volcano/pkg/controllers/job/helpers"
    44  	schedulerapi "volcano.sh/volcano/pkg/scheduler/api"
    45  )
    46  
    47  var (
    48  	OneMinute  = 1 * time.Minute
    49  	TwoMinute  = 2 * time.Minute
    50  	FiveMinute = 5 * time.Minute
    51  	TenMinute  = 10 * time.Minute
    52  	OneCPU     = v1.ResourceList{"cpu": resource.MustParse("1000m")}
    53  	TwoCPU     = v1.ResourceList{"cpu": resource.MustParse("2000m")}
    54  	ThreeCPU   = v1.ResourceList{"cpu": resource.MustParse("3000m")}
    55  	ThirtyCPU  = v1.ResourceList{"cpu": resource.MustParse("30000m")}
    56  	HalfCPU    = v1.ResourceList{"cpu": resource.MustParse("500m")}
    57  	CPU1Mem1   = v1.ResourceList{"cpu": resource.MustParse("1000m"), "memory": resource.MustParse("1024Mi")}
    58  	CPU2Mem2   = v1.ResourceList{"cpu": resource.MustParse("2000m"), "memory": resource.MustParse("2048Mi")}
    59  	CPU4Mem4   = v1.ResourceList{"cpu": resource.MustParse("4000m"), "memory": resource.MustParse("4096Mi")}
    60  )
    61  
    62  const (
    63  	TimeOutMessage               = "timed out waiting for the condition"
    64  	WorkerPriority               = "worker-pri"
    65  	WorkerPriorityValue          = -50
    66  	MasterPriority               = "master-pri"
    67  	MasterPriorityValue          = 100
    68  	NodeFieldSelectorKeyNodeName = "metadata.name"
    69  	SchedulerName                = "volcano"
    70  	ExecuteAction                = "ExecuteAction"
    71  	DefaultQueue                 = "default"
    72  	NumStress                    = 10
    73  )
    74  
    75  const (
    76  	DefaultBusyBoxImage = "busybox:1.24"
    77  	DefaultNginxImage   = "nginx:1.14"
    78  	DefaultMPIImage     = "volcanosh/example-mpi:0.0.3"
    79  	DefaultTFImage      = "volcanosh/dist-mnist-tf-example:0.0.1"
    80  	// "volcanosh/pytorch-mnist-v1beta1-9ee8fda-example:0.0.1" is from "docker.io/kubeflowkatib/pytorch-mnist:v1beta1-9ee8fda"
    81  	DefaultPytorchImage = "volcanosh/pytorch-mnist-v1beta1-9ee8fda-example:0.0.1"
    82  )
    83  
    84  func CPUResource(request string) v1.ResourceList {
    85  	return v1.ResourceList{v1.ResourceCPU: resource.MustParse(request)}
    86  }
    87  
    88  func HomeDir() string {
    89  	if h := os.Getenv("HOME"); h != "" {
    90  		return h
    91  	}
    92  	return os.Getenv("USERPROFILE") // windows
    93  }
    94  
    95  func MasterURL() string {
    96  	if m := os.Getenv("MASTER"); m != "" {
    97  		return m
    98  	}
    99  	return ""
   100  }
   101  
   102  func KubeconfigPath(home string) string {
   103  	if m := os.Getenv("KUBECONFIG"); m != "" {
   104  		return m
   105  	}
   106  	return filepath.Join(home, ".kube", "config") // default kubeconfig path is $HOME/.kube/config
   107  }
   108  
   109  // VolcanoCliBinary function gets the volcano cli binary.
   110  func VolcanoCliBinary() string {
   111  	if bin := os.Getenv("VC_BIN"); bin != "" {
   112  		return filepath.Join(bin, "vcctl")
   113  	}
   114  	return ""
   115  }
   116  
   117  type TestContext struct {
   118  	Kubeclient *kubernetes.Clientset
   119  	Vcclient   *vcclient.Clientset
   120  
   121  	Namespace        string
   122  	Queues           []string
   123  	DeservedResource map[string]v1.ResourceList
   124  	PriorityClasses  map[string]int32
   125  	UsingPlaceHolder bool
   126  }
   127  
   128  type Options struct {
   129  	Namespace          string
   130  	Queues             []string
   131  	DeservedResource   map[string]v1.ResourceList
   132  	PriorityClasses    map[string]int32
   133  	NodesNumLimit      int
   134  	NodesResourceLimit v1.ResourceList
   135  }
   136  
   137  var VcClient *vcclient.Clientset
   138  var KubeClient *kubernetes.Clientset
   139  
   140  func InitTestContext(o Options) *TestContext {
   141  	By("Initializing test context")
   142  
   143  	if o.Namespace == "" {
   144  		o.Namespace = helpers.GenRandomStr(8)
   145  	}
   146  	ctx := &TestContext{
   147  		Namespace:        o.Namespace,
   148  		Queues:           o.Queues,
   149  		DeservedResource: o.DeservedResource,
   150  		PriorityClasses:  o.PriorityClasses,
   151  		Vcclient:         VcClient,
   152  		Kubeclient:       KubeClient,
   153  		UsingPlaceHolder: false,
   154  	}
   155  
   156  	_, err := ctx.Kubeclient.CoreV1().Namespaces().Create(context.TODO(),
   157  		&v1.Namespace{
   158  			ObjectMeta: metav1.ObjectMeta{
   159  				Name: ctx.Namespace,
   160  			},
   161  		},
   162  		metav1.CreateOptions{},
   163  	)
   164  	Expect(err).NotTo(HaveOccurred(), "failed to create namespace")
   165  
   166  	CreateQueues(ctx)
   167  	createPriorityClasses(ctx)
   168  
   169  	if o.NodesNumLimit != 0 && o.NodesResourceLimit != nil {
   170  		setPlaceHolderForSchedulerTesting(ctx, o.NodesResourceLimit, o.NodesNumLimit)
   171  		ctx.UsingPlaceHolder = true
   172  	}
   173  
   174  	return ctx
   175  }
   176  
   177  func NamespaceNotExist(ctx *TestContext) wait.ConditionFunc {
   178  	return NamespaceNotExistWithName(ctx, ctx.Namespace)
   179  }
   180  
   181  func NamespaceNotExistWithName(ctx *TestContext, name string) wait.ConditionFunc {
   182  	return func() (bool, error) {
   183  		_, err := ctx.Kubeclient.CoreV1().Namespaces().Get(context.TODO(), name, metav1.GetOptions{})
   184  		if err != nil && errors.IsNotFound(err) {
   185  			return true, nil
   186  		}
   187  		return false, nil
   188  	}
   189  }
   190  
   191  func FileExist(name string) bool {
   192  	if _, err := os.Stat(name); err != nil {
   193  		if os.IsNotExist(err) {
   194  			return false
   195  		}
   196  	}
   197  	return true
   198  }
   199  
   200  func CleanupTestContext(ctx *TestContext) {
   201  	By("Cleaning up test context")
   202  
   203  	foreground := metav1.DeletePropagationForeground
   204  	err := ctx.Kubeclient.CoreV1().Namespaces().Delete(context.TODO(), ctx.Namespace, metav1.DeleteOptions{
   205  		PropagationPolicy: &foreground,
   206  	})
   207  	Expect(err).NotTo(HaveOccurred(), "failed to delete namespace")
   208  
   209  	deleteQueues(ctx)
   210  	deletePriorityClasses(ctx)
   211  
   212  	if ctx.UsingPlaceHolder {
   213  		deletePlaceHolder(ctx)
   214  	}
   215  
   216  	// Wait for namespace deleted.
   217  	err = wait.Poll(100*time.Millisecond, FiveMinute, NamespaceNotExist(ctx))
   218  	Expect(err).NotTo(HaveOccurred(), "failed to wait for namespace deleted")
   219  }
   220  
   221  func createPriorityClasses(cxt *TestContext) {
   222  	for name, value := range cxt.PriorityClasses {
   223  		_, err := cxt.Kubeclient.SchedulingV1().PriorityClasses().Create(context.TODO(),
   224  			&schedv1.PriorityClass{
   225  				ObjectMeta: metav1.ObjectMeta{
   226  					Name: name,
   227  				},
   228  				Value:         value,
   229  				GlobalDefault: false,
   230  			},
   231  			metav1.CreateOptions{})
   232  		Expect(err).NotTo(HaveOccurred(), "failed to create priority class: %s", name)
   233  	}
   234  }
   235  
   236  func deletePriorityClasses(cxt *TestContext) {
   237  	for name := range cxt.PriorityClasses {
   238  		err := cxt.Kubeclient.SchedulingV1().PriorityClasses().Delete(context.TODO(), name, metav1.DeleteOptions{})
   239  		Expect(err).NotTo(HaveOccurred())
   240  	}
   241  }
   242  
   243  func setPlaceHolderForSchedulerTesting(ctx *TestContext, req v1.ResourceList, reqNum int) (bool, error) {
   244  	if !satisfyMinNodesRequirements(ctx, reqNum) {
   245  		return false, lagencyerror.New("Failed to setup environment, you need to have at least " + strconv.Itoa(reqNum) + " worker node.")
   246  	}
   247  
   248  	nodes, err := ctx.Kubeclient.CoreV1().Nodes().List(context.TODO(), metav1.ListOptions{})
   249  	Expect(err).NotTo(HaveOccurred())
   250  
   251  	pods, err := ctx.Kubeclient.CoreV1().Pods(metav1.NamespaceAll).List(context.TODO(), metav1.ListOptions{})
   252  	Expect(err).NotTo(HaveOccurred())
   253  
   254  	used := map[string]*schedulerapi.Resource{}
   255  
   256  	for _, pod := range pods.Items {
   257  		nodeName := pod.Spec.NodeName
   258  		if len(nodeName) == 0 || pod.DeletionTimestamp != nil {
   259  			continue
   260  		}
   261  
   262  		if pod.Status.Phase == v1.PodSucceeded || pod.Status.Phase == v1.PodFailed {
   263  			continue
   264  		}
   265  
   266  		if _, found := used[nodeName]; !found {
   267  			used[nodeName] = schedulerapi.EmptyResource()
   268  		}
   269  
   270  		for _, c := range pod.Spec.Containers {
   271  			resource := schedulerapi.NewResource(c.Resources.Requests)
   272  			used[nodeName].Add(resource)
   273  		}
   274  	}
   275  
   276  	// var minCPU, minMemory
   277  	minCPU := req.Cpu()
   278  	minMemory := req.Memory()
   279  	resourceRichNode := 0
   280  
   281  	// init placeholders
   282  	placeHolders := map[string]v1.ResourceList{}
   283  
   284  	for _, node := range nodes.Items {
   285  		if len(node.Spec.Taints) != 0 {
   286  			continue
   287  		}
   288  		minCPUMilli := float64(minCPU.MilliValue())
   289  		minMemoryValue := float64(minMemory.Value())
   290  		currentAllocatable := schedulerapi.NewResource(node.Status.Allocatable)
   291  
   292  		if res, found := used[node.Name]; found {
   293  			currentAllocatable.Sub(res)
   294  		}
   295  
   296  		phCPU := currentAllocatable.MilliCPU
   297  		phMemory := currentAllocatable.Memory
   298  
   299  		if minCPUMilli <= currentAllocatable.MilliCPU && minMemoryValue <= currentAllocatable.Memory {
   300  			resourceRichNode = resourceRichNode + 1
   301  			if resourceRichNode <= reqNum {
   302  				phCPU = currentAllocatable.MilliCPU - minCPUMilli
   303  				phMemory = currentAllocatable.Memory - minMemoryValue
   304  			}
   305  		}
   306  
   307  		phCPUQuantity := resource.NewMilliQuantity(int64(phCPU), resource.BinarySI)
   308  		phMemoryQuantity := resource.NewQuantity(int64(phMemory), resource.BinarySI)
   309  		placeHolders[node.Name] = v1.ResourceList{"cpu": *phCPUQuantity, "memory": *phMemoryQuantity}
   310  	}
   311  
   312  	if resourceRichNode < reqNum {
   313  		return false, lagencyerror.New("Failed to setup environment, you need to have at least " + strconv.Itoa(len(req)) + " worker node.")
   314  	}
   315  
   316  	for nodeName, res := range placeHolders {
   317  		err := createPlaceHolder(ctx, res, nodeName)
   318  		Expect(err).NotTo(HaveOccurred())
   319  	}
   320  
   321  	return true, nil
   322  }
   323  
   324  func createPlaceHolder(ctx *TestContext, phr v1.ResourceList, nodeName string) error {
   325  	pod := &corev1.Pod{
   326  		ObjectMeta: metav1.ObjectMeta{
   327  			Name:      nodeName + "-placeholder",
   328  			Namespace: ctx.Namespace,
   329  			Labels: map[string]string{
   330  				"role": "placeholder",
   331  			},
   332  		},
   333  		Spec: corev1.PodSpec{
   334  			Containers: []corev1.Container{
   335  				{
   336  					Name: "placeholder",
   337  					Resources: corev1.ResourceRequirements{
   338  						Requests: phr,
   339  						Limits:   phr,
   340  					},
   341  					Image: DefaultNginxImage,
   342  				},
   343  			},
   344  			NodeName: nodeName,
   345  		},
   346  	}
   347  	_, err := ctx.Kubeclient.CoreV1().Pods(ctx.Namespace).Create(context.TODO(), pod, metav1.CreateOptions{})
   348  	return err
   349  }
   350  
   351  func deletePlaceHolder(ctx *TestContext) {
   352  	listOptions := metav1.ListOptions{
   353  		LabelSelector: labels.Set(map[string]string{"role": "placeholder"}).String(),
   354  	}
   355  	podList, err := ctx.Kubeclient.CoreV1().Pods(ctx.Namespace).List(context.TODO(), listOptions)
   356  	Expect(err).NotTo(HaveOccurred(), "failed to list pods")
   357  
   358  	for _, pod := range podList.Items {
   359  		err := ctx.Kubeclient.CoreV1().Pods(ctx.Namespace).Delete(context.TODO(), pod.Name, metav1.DeleteOptions{})
   360  		Expect(err).NotTo(HaveOccurred(), "failed to delete pod %s", pod.Name)
   361  	}
   362  }