volcano.sh/volcano@v1.9.0/pkg/scheduler/api/devices/nvidia/vgpu/utils.go (about) 1 /* 2 Copyright 2023 The Volcano Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package vgpu 18 19 import ( 20 "context" 21 "encoding/json" 22 "fmt" 23 "os" 24 "path/filepath" 25 "strconv" 26 "strings" 27 28 v1 "k8s.io/api/core/v1" 29 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 30 k8stypes "k8s.io/apimachinery/pkg/types" 31 "k8s.io/client-go/kubernetes" 32 "k8s.io/client-go/rest" 33 "k8s.io/client-go/tools/clientcmd" 34 "k8s.io/klog/v2" 35 ) 36 37 var kubeClient kubernetes.Interface 38 39 func init() { 40 var err error 41 kubeClient, err = NewClient() 42 if err != nil { 43 klog.Errorf("init kubeclient in hamivgpu failed: %s", err.Error()) 44 } else { 45 klog.V(3).Infoln("init kubeclient success") 46 } 47 } 48 49 // NewClient connects to an API server 50 func NewClient() (kubernetes.Interface, error) { 51 kubeConfig := os.Getenv("KUBECONFIG") 52 if kubeConfig == "" { 53 kubeConfig = filepath.Join(os.Getenv("HOME"), ".kube", "config") 54 } 55 config, err := rest.InClusterConfig() 56 if err != nil { 57 config, err = clientcmd.BuildConfigFromFlags("", kubeConfig) 58 if err != nil { 59 return nil, err 60 } 61 } 62 client, err := kubernetes.NewForConfig(config) 63 kubeClient = client 64 return client, err 65 } 66 67 func patchNodeAnnotations(node *v1.Node, annotations map[string]string) error { 68 type patchMetadata struct { 69 Annotations map[string]string `json:"annotations,omitempty"` 70 } 71 type patchPod struct { 72 Metadata patchMetadata `json:"metadata"` 73 //Spec patchSpec `json:"spec,omitempty"` 74 } 75 76 p := patchPod{} 77 p.Metadata.Annotations = annotations 78 79 bytes, err := json.Marshal(p) 80 if err != nil { 81 return err 82 } 83 _, err = kubeClient.CoreV1().Nodes(). 84 Patch(context.Background(), node.Name, k8stypes.StrategicMergePatchType, bytes, metav1.PatchOptions{}) 85 if err != nil { 86 klog.Errorf("patch pod %v failed, %v", node.Name, err) 87 } 88 return err 89 } 90 91 func decodeNodeDevices(name string, str string) *GPUDevices { 92 if !strings.Contains(str, ":") { 93 return nil 94 } 95 tmp := strings.Split(str, ":") 96 retval := &GPUDevices{ 97 Name: name, 98 Device: make(map[int]*GPUDevice), 99 Score: float64(0), 100 } 101 for index, val := range tmp { 102 if strings.Contains(val, ",") { 103 items := strings.Split(val, ",") 104 count, _ := strconv.Atoi(items[1]) 105 devmem, _ := strconv.Atoi(items[2]) 106 health, _ := strconv.ParseBool(items[4]) 107 i := GPUDevice{ 108 ID: index, 109 UUID: items[0], 110 Number: uint(count), 111 Memory: uint(devmem), 112 Type: items[3], 113 Health: health, 114 } 115 retval.Device[index] = &i 116 } 117 } 118 return retval 119 } 120 121 func encodeContainerDevices(cd []ContainerDevice) string { 122 tmp := "" 123 for _, val := range cd { 124 tmp += val.UUID + "," + val.Type + "," + strconv.Itoa(int(val.Usedmem)) + "," + strconv.Itoa(int(val.Usedcores)) + ":" 125 } 126 klog.V(4).Infoln("Encoded container Devices=", tmp) 127 return tmp 128 //return strings.Join(cd, ",") 129 } 130 131 func encodePodDevices(pd []ContainerDevices) string { 132 var ss []string 133 for _, cd := range pd { 134 ss = append(ss, encodeContainerDevices(cd)) 135 } 136 return strings.Join(ss, ";") 137 } 138 139 func decodeContainerDevices(str string) ContainerDevices { 140 if len(str) == 0 { 141 return ContainerDevices{} 142 } 143 cd := strings.Split(str, ":") 144 contdev := ContainerDevices{} 145 tmpdev := ContainerDevice{} 146 //fmt.Println("before container device", str) 147 if len(str) == 0 { 148 return contdev 149 } 150 for _, val := range cd { 151 if strings.Contains(val, ",") { 152 //fmt.Println("cd is ", val) 153 tmpstr := strings.Split(val, ",") 154 tmpdev.UUID = tmpstr[0] 155 tmpdev.Type = tmpstr[1] 156 devmem, _ := strconv.ParseInt(tmpstr[2], 10, 32) 157 tmpdev.Usedmem = int32(devmem) 158 devcores, _ := strconv.ParseInt(tmpstr[3], 10, 32) 159 tmpdev.Usedcores = int32(devcores) 160 contdev = append(contdev, tmpdev) 161 } 162 } 163 //fmt.Println("Decoded container device", contdev) 164 return contdev 165 } 166 167 func decodePodDevices(str string) []ContainerDevices { 168 if len(str) == 0 { 169 return []ContainerDevices{} 170 } 171 var pd []ContainerDevices 172 for _, s := range strings.Split(str, ";") { 173 cd := decodeContainerDevices(s) 174 pd = append(pd, cd) 175 } 176 return pd 177 } 178 179 func checkVGPUResourcesInPod(pod *v1.Pod) bool { 180 for _, container := range pod.Spec.Containers { 181 _, ok := container.Resources.Limits[VolcanoVGPUMemory] 182 if ok { 183 return true 184 } 185 _, ok = container.Resources.Limits[VolcanoVGPUNumber] 186 if ok { 187 return true 188 } 189 } 190 return false 191 } 192 193 func resourcereqs(pod *v1.Pod) []ContainerDeviceRequest { 194 resourceName := v1.ResourceName(VolcanoVGPUNumber) 195 resourceMem := v1.ResourceName(VolcanoVGPUMemory) 196 resourceMemPercentage := v1.ResourceName(VolcanoVGPUMemoryPercentage) 197 resourceCores := v1.ResourceName(VolcanoVGPUCores) 198 counts := []ContainerDeviceRequest{} 199 //Count Nvidia GPU 200 for i := 0; i < len(pod.Spec.Containers); i++ { 201 singledevice := false 202 v, ok := pod.Spec.Containers[i].Resources.Limits[resourceName] 203 if !ok { 204 v, ok = pod.Spec.Containers[i].Resources.Limits[resourceMem] 205 singledevice = true 206 } 207 if ok { 208 n := int64(1) 209 if !singledevice { 210 n, _ = v.AsInt64() 211 } 212 memnum := 0 213 mem, ok := pod.Spec.Containers[i].Resources.Limits[resourceMem] 214 if !ok { 215 mem, ok = pod.Spec.Containers[i].Resources.Requests[resourceMem] 216 } 217 if ok { 218 memnums, ok := mem.AsInt64() 219 if ok { 220 memnum = int(memnums) 221 } 222 } 223 mempnum := int32(101) 224 mem, ok = pod.Spec.Containers[i].Resources.Limits[resourceMemPercentage] 225 if !ok { 226 mem, ok = pod.Spec.Containers[i].Resources.Requests[resourceMemPercentage] 227 } 228 if ok { 229 mempnums, ok := mem.AsInt64() 230 if ok { 231 mempnum = int32(mempnums) 232 } 233 } 234 if mempnum == 101 && memnum == 0 { 235 mempnum = 100 236 } 237 corenum := 0 238 core, ok := pod.Spec.Containers[i].Resources.Limits[resourceCores] 239 if !ok { 240 core, ok = pod.Spec.Containers[i].Resources.Requests[resourceCores] 241 } 242 if ok { 243 corenums, ok := core.AsInt64() 244 if ok { 245 corenum = int(corenums) 246 } 247 } 248 counts = append(counts, ContainerDeviceRequest{ 249 Nums: int32(n), 250 Type: "NVIDIA", 251 Memreq: int32(memnum), 252 MemPercentagereq: int32(mempnum), 253 Coresreq: int32(corenum), 254 }) 255 } 256 } 257 klog.V(3).Infoln("counts=", counts) 258 return counts 259 } 260 261 func checkGPUtype(annos map[string]string, cardtype string) bool { 262 inuse, ok := annos[GPUInUse] 263 if ok { 264 if !strings.Contains(inuse, ",") { 265 if strings.Contains(strings.ToUpper(cardtype), strings.ToUpper(inuse)) { 266 return true 267 } 268 } else { 269 for _, val := range strings.Split(inuse, ",") { 270 if strings.Contains(strings.ToUpper(cardtype), strings.ToUpper(val)) { 271 return true 272 } 273 } 274 } 275 return false 276 } 277 nouse, ok := annos[GPUNoUse] 278 if ok { 279 if !strings.Contains(nouse, ",") { 280 if strings.Contains(strings.ToUpper(cardtype), strings.ToUpper(nouse)) { 281 return true 282 } 283 } else { 284 for _, val := range strings.Split(nouse, ",") { 285 if strings.Contains(strings.ToUpper(cardtype), strings.ToUpper(val)) { 286 return false 287 } 288 } 289 } 290 return true 291 } 292 return true 293 } 294 295 func checkType(annos map[string]string, d GPUDevice, n ContainerDeviceRequest) bool { 296 //General type check, NVIDIA->NVIDIA MLU->MLU 297 if !strings.Contains(d.Type, n.Type) { 298 return false 299 } 300 if n.Type == NvidiaGPUDevice { 301 return checkGPUtype(annos, d.Type) 302 } 303 klog.Errorf("Unrecognized device %v", n.Type) 304 return false 305 } 306 307 func getGPUDeviceSnapShot(snap *GPUDevices) *GPUDevices { 308 ret := GPUDevices{ 309 Name: snap.Name, 310 Device: make(map[int]*GPUDevice), 311 Score: float64(0), 312 } 313 for index, val := range snap.Device { 314 if val != nil { 315 ret.Device[index] = &GPUDevice{ 316 ID: val.ID, 317 UUID: val.UUID, 318 PodMap: val.PodMap, 319 Memory: val.Memory, 320 Number: val.Number, 321 Type: val.Type, 322 Health: val.Health, 323 UsedNum: val.UsedNum, 324 UsedMem: val.UsedMem, 325 UsedCore: val.UsedCore, 326 } 327 } 328 } 329 return &ret 330 } 331 332 // checkNodeGPUSharingPredicate checks if a pod with gpu requirement can be scheduled on a node. 333 func checkNodeGPUSharingPredicateAndScore(pod *v1.Pod, gssnap *GPUDevices, replicate bool, schedulePolicy string) (bool, []ContainerDevices, float64, error) { 334 // no gpu sharing request 335 score := float64(0) 336 if !checkVGPUResourcesInPod(pod) { 337 return true, []ContainerDevices{}, 0, nil 338 } 339 ctrReq := resourcereqs(pod) 340 if len(ctrReq) == 0 { 341 return true, []ContainerDevices{}, 0, nil 342 } 343 var gs *GPUDevices 344 if replicate { 345 gs = getGPUDeviceSnapShot(gssnap) 346 } else { 347 gs = gssnap 348 } 349 ctrdevs := []ContainerDevices{} 350 for _, val := range ctrReq { 351 devs := []ContainerDevice{} 352 if int(val.Nums) > len(gs.Device) { 353 return false, []ContainerDevices{}, 0, fmt.Errorf("no enough gpu cards on node %s", gs.Name) 354 } 355 klog.V(3).InfoS("Allocating device for container", "request", val) 356 357 for i := len(gs.Device) - 1; i >= 0; i-- { 358 klog.V(3).InfoS("Scoring pod request", "memReq", val.Memreq, "memPercentageReq", val.MemPercentagereq, "coresReq", val.Coresreq, "Nums", val.Nums, "Index", i, "ID", gs.Device[i].ID) 359 klog.V(3).InfoS("Current Device", "Index", i, "TotalMemory", gs.Device[i].Memory, "UsedMemory", gs.Device[i].UsedMem, "UsedCores", gs.Device[i].UsedNum) 360 if gs.Device[i].Number <= uint(gs.Device[i].UsedNum) { 361 continue 362 } 363 if val.MemPercentagereq != 101 && val.Memreq == 0 { 364 val.Memreq = int32(gs.Device[i].Memory * uint(val.MemPercentagereq/100)) 365 } 366 if gs.Device[i].Memory-gs.Device[i].UsedMem < uint(val.Memreq) { 367 continue 368 } 369 if 100-gs.Device[i].UsedCore < uint(val.Coresreq) { 370 continue 371 } 372 // Coresreq=100 indicates it want this card exclusively 373 if val.Coresreq == 100 && gs.Device[i].UsedNum > 0 { 374 continue 375 } 376 // You can't allocate core=0 job to an already full GPU 377 if gs.Device[i].UsedCore == 100 && val.Coresreq == 0 { 378 continue 379 } 380 if !checkType(pod.Annotations, *gs.Device[i], val) { 381 klog.Errorln("failed checktype", gs.Device[i].Type, val.Type) 382 continue 383 } 384 //total += gs.Devices[i].Count 385 //free += node.Devices[i].Count - node.Devices[i].Used 386 if val.Nums > 0 { 387 klog.V(3).InfoS("device fitted", "ID", gs.Device[i].ID) 388 val.Nums-- 389 gs.Device[i].UsedNum++ 390 gs.Device[i].UsedMem += uint(val.Memreq) 391 gs.Device[i].UsedCore += uint(val.Coresreq) 392 devs = append(devs, ContainerDevice{ 393 UUID: gs.Device[i].UUID, 394 Type: val.Type, 395 Usedmem: val.Memreq, 396 Usedcores: val.Coresreq, 397 }) 398 switch schedulePolicy { 399 case binpackPolicy: 400 score += binpackMultiplier * (float64(gs.Device[i].UsedMem) / float64(gs.Device[i].Memory)) 401 case spreadPolicy: 402 if gs.Device[i].UsedNum == 1 { 403 score += spreadMultiplier 404 } 405 default: 406 score = float64(0) 407 } 408 } 409 if val.Nums == 0 { 410 break 411 } 412 } 413 if val.Nums > 0 { 414 return false, []ContainerDevices{}, 0, fmt.Errorf("not enough gpu fitted on this node") 415 } 416 ctrdevs = append(ctrdevs, devs) 417 } 418 return true, ctrdevs, score, nil 419 } 420 421 func patchPodAnnotations(pod *v1.Pod, annotations map[string]string) error { 422 type patchMetadata struct { 423 Annotations map[string]string `json:"annotations,omitempty"` 424 } 425 type patchPod struct { 426 Metadata patchMetadata `json:"metadata"` 427 //Spec patchSpec `json:"spec,omitempty"` 428 } 429 430 p := patchPod{} 431 p.Metadata.Annotations = annotations 432 433 bytes, err := json.Marshal(p) 434 if err != nil { 435 return err 436 } 437 _, err = kubeClient.CoreV1().Pods(pod.Namespace). 438 Patch(context.Background(), pod.Name, k8stypes.StrategicMergePatchType, bytes, metav1.PatchOptions{}) 439 if err != nil { 440 klog.Errorf("patch pod %v failed, %v", pod.Name, err) 441 } 442 /* 443 Can't modify Env of pods here 444 445 patch1 := addGPUIndexPatch() 446 _, err = s.kubeClient.CoreV1().Pods(pod.Namespace). 447 Patch(context.Background(), pod.Name, k8stypes.JSONPatchType, []byte(patch1), metav1.PatchOptions{}) 448 if err != nil { 449 klog.Infof("Patch1 pod %v failed, %v", pod.Name, err) 450 }*/ 451 452 return err 453 }