volcano.sh/volcano@v1.9.0/test/e2e/util/util.go (about) 1 /* 2 Copyright 2021 The Volcano Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package util 18 19 import ( 20 "context" 21 "os" 22 "path/filepath" 23 "strconv" 24 "time" 25 26 lagencyerror "errors" 27 28 . "github.com/onsi/ginkgo/v2" 29 . "github.com/onsi/gomega" 30 31 corev1 "k8s.io/api/core/v1" 32 v1 "k8s.io/api/core/v1" 33 schedv1 "k8s.io/api/scheduling/v1" 34 "k8s.io/apimachinery/pkg/api/errors" 35 "k8s.io/apimachinery/pkg/api/resource" 36 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 37 "k8s.io/apimachinery/pkg/labels" 38 "k8s.io/apimachinery/pkg/util/wait" 39 "k8s.io/client-go/kubernetes" 40 41 vcclient "volcano.sh/apis/pkg/client/clientset/versioned" 42 43 "volcano.sh/volcano/pkg/controllers/job/helpers" 44 schedulerapi "volcano.sh/volcano/pkg/scheduler/api" 45 ) 46 47 var ( 48 OneMinute = 1 * time.Minute 49 TwoMinute = 2 * time.Minute 50 FiveMinute = 5 * time.Minute 51 TenMinute = 10 * time.Minute 52 OneCPU = v1.ResourceList{"cpu": resource.MustParse("1000m")} 53 TwoCPU = v1.ResourceList{"cpu": resource.MustParse("2000m")} 54 ThreeCPU = v1.ResourceList{"cpu": resource.MustParse("3000m")} 55 ThirtyCPU = v1.ResourceList{"cpu": resource.MustParse("30000m")} 56 HalfCPU = v1.ResourceList{"cpu": resource.MustParse("500m")} 57 CPU1Mem1 = v1.ResourceList{"cpu": resource.MustParse("1000m"), "memory": resource.MustParse("1024Mi")} 58 CPU2Mem2 = v1.ResourceList{"cpu": resource.MustParse("2000m"), "memory": resource.MustParse("2048Mi")} 59 CPU4Mem4 = v1.ResourceList{"cpu": resource.MustParse("4000m"), "memory": resource.MustParse("4096Mi")} 60 ) 61 62 const ( 63 TimeOutMessage = "timed out waiting for the condition" 64 WorkerPriority = "worker-pri" 65 WorkerPriorityValue = -50 66 MasterPriority = "master-pri" 67 MasterPriorityValue = 100 68 NodeFieldSelectorKeyNodeName = "metadata.name" 69 SchedulerName = "volcano" 70 ExecuteAction = "ExecuteAction" 71 DefaultQueue = "default" 72 NumStress = 10 73 ) 74 75 const ( 76 DefaultBusyBoxImage = "busybox:1.24" 77 DefaultNginxImage = "nginx:1.14" 78 DefaultMPIImage = "volcanosh/example-mpi:0.0.3" 79 DefaultTFImage = "volcanosh/dist-mnist-tf-example:0.0.1" 80 // "volcanosh/pytorch-mnist-v1beta1-9ee8fda-example:0.0.1" is from "docker.io/kubeflowkatib/pytorch-mnist:v1beta1-9ee8fda" 81 DefaultPytorchImage = "volcanosh/pytorch-mnist-v1beta1-9ee8fda-example:0.0.1" 82 ) 83 84 func CPUResource(request string) v1.ResourceList { 85 return v1.ResourceList{v1.ResourceCPU: resource.MustParse(request)} 86 } 87 88 func HomeDir() string { 89 if h := os.Getenv("HOME"); h != "" { 90 return h 91 } 92 return os.Getenv("USERPROFILE") // windows 93 } 94 95 func MasterURL() string { 96 if m := os.Getenv("MASTER"); m != "" { 97 return m 98 } 99 return "" 100 } 101 102 func KubeconfigPath(home string) string { 103 if m := os.Getenv("KUBECONFIG"); m != "" { 104 return m 105 } 106 return filepath.Join(home, ".kube", "config") // default kubeconfig path is $HOME/.kube/config 107 } 108 109 // VolcanoCliBinary function gets the volcano cli binary. 110 func VolcanoCliBinary() string { 111 if bin := os.Getenv("VC_BIN"); bin != "" { 112 return filepath.Join(bin, "vcctl") 113 } 114 return "" 115 } 116 117 type TestContext struct { 118 Kubeclient *kubernetes.Clientset 119 Vcclient *vcclient.Clientset 120 121 Namespace string 122 Queues []string 123 DeservedResource map[string]v1.ResourceList 124 PriorityClasses map[string]int32 125 UsingPlaceHolder bool 126 } 127 128 type Options struct { 129 Namespace string 130 Queues []string 131 DeservedResource map[string]v1.ResourceList 132 PriorityClasses map[string]int32 133 NodesNumLimit int 134 NodesResourceLimit v1.ResourceList 135 } 136 137 var VcClient *vcclient.Clientset 138 var KubeClient *kubernetes.Clientset 139 140 func InitTestContext(o Options) *TestContext { 141 By("Initializing test context") 142 143 if o.Namespace == "" { 144 o.Namespace = helpers.GenRandomStr(8) 145 } 146 ctx := &TestContext{ 147 Namespace: o.Namespace, 148 Queues: o.Queues, 149 DeservedResource: o.DeservedResource, 150 PriorityClasses: o.PriorityClasses, 151 Vcclient: VcClient, 152 Kubeclient: KubeClient, 153 UsingPlaceHolder: false, 154 } 155 156 _, err := ctx.Kubeclient.CoreV1().Namespaces().Create(context.TODO(), 157 &v1.Namespace{ 158 ObjectMeta: metav1.ObjectMeta{ 159 Name: ctx.Namespace, 160 }, 161 }, 162 metav1.CreateOptions{}, 163 ) 164 Expect(err).NotTo(HaveOccurred(), "failed to create namespace") 165 166 CreateQueues(ctx) 167 createPriorityClasses(ctx) 168 169 if o.NodesNumLimit != 0 && o.NodesResourceLimit != nil { 170 setPlaceHolderForSchedulerTesting(ctx, o.NodesResourceLimit, o.NodesNumLimit) 171 ctx.UsingPlaceHolder = true 172 } 173 174 return ctx 175 } 176 177 func NamespaceNotExist(ctx *TestContext) wait.ConditionFunc { 178 return NamespaceNotExistWithName(ctx, ctx.Namespace) 179 } 180 181 func NamespaceNotExistWithName(ctx *TestContext, name string) wait.ConditionFunc { 182 return func() (bool, error) { 183 _, err := ctx.Kubeclient.CoreV1().Namespaces().Get(context.TODO(), name, metav1.GetOptions{}) 184 if err != nil && errors.IsNotFound(err) { 185 return true, nil 186 } 187 return false, nil 188 } 189 } 190 191 func FileExist(name string) bool { 192 if _, err := os.Stat(name); err != nil { 193 if os.IsNotExist(err) { 194 return false 195 } 196 } 197 return true 198 } 199 200 func CleanupTestContext(ctx *TestContext) { 201 By("Cleaning up test context") 202 203 foreground := metav1.DeletePropagationForeground 204 err := ctx.Kubeclient.CoreV1().Namespaces().Delete(context.TODO(), ctx.Namespace, metav1.DeleteOptions{ 205 PropagationPolicy: &foreground, 206 }) 207 Expect(err).NotTo(HaveOccurred(), "failed to delete namespace") 208 209 deleteQueues(ctx) 210 deletePriorityClasses(ctx) 211 212 if ctx.UsingPlaceHolder { 213 deletePlaceHolder(ctx) 214 } 215 216 // Wait for namespace deleted. 217 err = wait.Poll(100*time.Millisecond, FiveMinute, NamespaceNotExist(ctx)) 218 Expect(err).NotTo(HaveOccurred(), "failed to wait for namespace deleted") 219 } 220 221 func createPriorityClasses(cxt *TestContext) { 222 for name, value := range cxt.PriorityClasses { 223 _, err := cxt.Kubeclient.SchedulingV1().PriorityClasses().Create(context.TODO(), 224 &schedv1.PriorityClass{ 225 ObjectMeta: metav1.ObjectMeta{ 226 Name: name, 227 }, 228 Value: value, 229 GlobalDefault: false, 230 }, 231 metav1.CreateOptions{}) 232 Expect(err).NotTo(HaveOccurred(), "failed to create priority class: %s", name) 233 } 234 } 235 236 func deletePriorityClasses(cxt *TestContext) { 237 for name := range cxt.PriorityClasses { 238 err := cxt.Kubeclient.SchedulingV1().PriorityClasses().Delete(context.TODO(), name, metav1.DeleteOptions{}) 239 Expect(err).NotTo(HaveOccurred()) 240 } 241 } 242 243 func setPlaceHolderForSchedulerTesting(ctx *TestContext, req v1.ResourceList, reqNum int) (bool, error) { 244 if !satisfyMinNodesRequirements(ctx, reqNum) { 245 return false, lagencyerror.New("Failed to setup environment, you need to have at least " + strconv.Itoa(reqNum) + " worker node.") 246 } 247 248 nodes, err := ctx.Kubeclient.CoreV1().Nodes().List(context.TODO(), metav1.ListOptions{}) 249 Expect(err).NotTo(HaveOccurred()) 250 251 pods, err := ctx.Kubeclient.CoreV1().Pods(metav1.NamespaceAll).List(context.TODO(), metav1.ListOptions{}) 252 Expect(err).NotTo(HaveOccurred()) 253 254 used := map[string]*schedulerapi.Resource{} 255 256 for _, pod := range pods.Items { 257 nodeName := pod.Spec.NodeName 258 if len(nodeName) == 0 || pod.DeletionTimestamp != nil { 259 continue 260 } 261 262 if pod.Status.Phase == v1.PodSucceeded || pod.Status.Phase == v1.PodFailed { 263 continue 264 } 265 266 if _, found := used[nodeName]; !found { 267 used[nodeName] = schedulerapi.EmptyResource() 268 } 269 270 for _, c := range pod.Spec.Containers { 271 resource := schedulerapi.NewResource(c.Resources.Requests) 272 used[nodeName].Add(resource) 273 } 274 } 275 276 // var minCPU, minMemory 277 minCPU := req.Cpu() 278 minMemory := req.Memory() 279 resourceRichNode := 0 280 281 // init placeholders 282 placeHolders := map[string]v1.ResourceList{} 283 284 for _, node := range nodes.Items { 285 if len(node.Spec.Taints) != 0 { 286 continue 287 } 288 minCPUMilli := float64(minCPU.MilliValue()) 289 minMemoryValue := float64(minMemory.Value()) 290 currentAllocatable := schedulerapi.NewResource(node.Status.Allocatable) 291 292 if res, found := used[node.Name]; found { 293 currentAllocatable.Sub(res) 294 } 295 296 phCPU := currentAllocatable.MilliCPU 297 phMemory := currentAllocatable.Memory 298 299 if minCPUMilli <= currentAllocatable.MilliCPU && minMemoryValue <= currentAllocatable.Memory { 300 resourceRichNode = resourceRichNode + 1 301 if resourceRichNode <= reqNum { 302 phCPU = currentAllocatable.MilliCPU - minCPUMilli 303 phMemory = currentAllocatable.Memory - minMemoryValue 304 } 305 } 306 307 phCPUQuantity := resource.NewMilliQuantity(int64(phCPU), resource.BinarySI) 308 phMemoryQuantity := resource.NewQuantity(int64(phMemory), resource.BinarySI) 309 placeHolders[node.Name] = v1.ResourceList{"cpu": *phCPUQuantity, "memory": *phMemoryQuantity} 310 } 311 312 if resourceRichNode < reqNum { 313 return false, lagencyerror.New("Failed to setup environment, you need to have at least " + strconv.Itoa(len(req)) + " worker node.") 314 } 315 316 for nodeName, res := range placeHolders { 317 err := createPlaceHolder(ctx, res, nodeName) 318 Expect(err).NotTo(HaveOccurred()) 319 } 320 321 return true, nil 322 } 323 324 func createPlaceHolder(ctx *TestContext, phr v1.ResourceList, nodeName string) error { 325 pod := &corev1.Pod{ 326 ObjectMeta: metav1.ObjectMeta{ 327 Name: nodeName + "-placeholder", 328 Namespace: ctx.Namespace, 329 Labels: map[string]string{ 330 "role": "placeholder", 331 }, 332 }, 333 Spec: corev1.PodSpec{ 334 Containers: []corev1.Container{ 335 { 336 Name: "placeholder", 337 Resources: corev1.ResourceRequirements{ 338 Requests: phr, 339 Limits: phr, 340 }, 341 Image: DefaultNginxImage, 342 }, 343 }, 344 NodeName: nodeName, 345 }, 346 } 347 _, err := ctx.Kubeclient.CoreV1().Pods(ctx.Namespace).Create(context.TODO(), pod, metav1.CreateOptions{}) 348 return err 349 } 350 351 func deletePlaceHolder(ctx *TestContext) { 352 listOptions := metav1.ListOptions{ 353 LabelSelector: labels.Set(map[string]string{"role": "placeholder"}).String(), 354 } 355 podList, err := ctx.Kubeclient.CoreV1().Pods(ctx.Namespace).List(context.TODO(), listOptions) 356 Expect(err).NotTo(HaveOccurred(), "failed to list pods") 357 358 for _, pod := range podList.Items { 359 err := ctx.Kubeclient.CoreV1().Pods(ctx.Namespace).Delete(context.TODO(), pod.Name, metav1.DeleteOptions{}) 360 Expect(err).NotTo(HaveOccurred(), "failed to delete pod %s", pod.Name) 361 } 362 }