volcano.sh/volcano@v1.9.0/pkg/scheduler/api/devices/nvidia/gpushare/share.go (about) 1 /* 2 Copyright 2023 The Volcano Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package gpushare 18 19 import ( 20 "fmt" 21 "sort" 22 "strconv" 23 "strings" 24 "time" 25 26 v1 "k8s.io/api/core/v1" 27 "k8s.io/klog/v2" 28 ) 29 30 // getDevicesIdleGPUMemory returns all the idle GPU memory by gpu card. 31 func getDevicesIdleGPUMemory(gs *GPUDevices) map[int]uint { 32 devicesAllGPUMemory := getDevicesAllGPUMemory(gs) 33 devicesUsedGPUMemory := getDevicesUsedGPUMemory(gs) 34 res := map[int]uint{} 35 for id, allMemory := range devicesAllGPUMemory { 36 if usedMemory, found := devicesUsedGPUMemory[id]; found { 37 res[id] = allMemory - usedMemory 38 } else { 39 res[id] = allMemory 40 } 41 } 42 return res 43 } 44 45 func getDevicesUsedGPUMemory(gs *GPUDevices) map[int]uint { 46 res := map[int]uint{} 47 for _, device := range gs.Device { 48 res[device.ID] = device.getUsedGPUMemory() 49 } 50 return res 51 } 52 53 func getDevicesAllGPUMemory(gs *GPUDevices) map[int]uint { 54 res := map[int]uint{} 55 for _, device := range gs.Device { 56 res[device.ID] = device.Memory 57 } 58 return res 59 } 60 61 // GetDevicesIdleGPU returns all the idle gpu card. 62 func getDevicesIdleGPUs(gs *GPUDevices) []int { 63 res := []int{} 64 for _, device := range gs.Device { 65 if device.isIdleGPU() { 66 res = append(res, device.ID) 67 } 68 } 69 return res 70 } 71 72 // getUnhealthyGPUs returns all the unhealthy GPU id. 73 func getUnhealthyGPUs(gs *GPUDevices, node *v1.Node) (unhealthyGPUs []int) { 74 unhealthyGPUs = []int{} 75 devicesStr, ok := node.Annotations[UnhealthyGPUIDs] 76 77 if !ok { 78 return 79 } 80 81 idsStr := strings.Split(devicesStr, ",") 82 for _, sid := range idsStr { 83 id, err := strconv.Atoi(sid) 84 if err != nil { 85 klog.Warningf("Failed to parse unhealthy gpu id %s due to %v", sid, err) 86 } else { 87 unhealthyGPUs = append(unhealthyGPUs, id) 88 } 89 } 90 return 91 } 92 93 // GetGPUIndex returns the index list of gpu cards 94 func GetGPUIndex(pod *v1.Pod) []int { 95 if len(pod.Annotations) == 0 { 96 return nil 97 } 98 99 value, found := pod.Annotations[GPUIndex] 100 if !found { 101 return nil 102 } 103 104 ids := strings.Split(value, ",") 105 if len(ids) == 0 { 106 klog.Errorf("invalid gpu index annotation %s=%s", GPUIndex, value) 107 return nil 108 } 109 110 idSlice := make([]int, len(ids)) 111 for idx, id := range ids { 112 j, err := strconv.Atoi(id) 113 if err != nil { 114 klog.Errorf("invalid %s=%s", GPUIndex, value) 115 return nil 116 } 117 idSlice[idx] = j 118 } 119 return idSlice 120 } 121 122 // checkNodeGPUSharingPredicate checks if a pod with gpu requirement can be scheduled on a node. 123 func checkNodeGPUSharingPredicate(pod *v1.Pod, gs *GPUDevices) (bool, error) { 124 // no gpu sharing request 125 if getGPUMemoryOfPod(pod) <= 0 { 126 return true, nil 127 } 128 ids := predicateGPUbyMemory(pod, gs) 129 if len(ids) == 0 { 130 return false, fmt.Errorf("no enough gpu memory on node %s", gs.Name) 131 } 132 return true, nil 133 } 134 135 func checkNodeGPUNumberPredicate(pod *v1.Pod, gs *GPUDevices) (bool, error) { 136 //no gpu number request 137 if getGPUNumberOfPod(pod) <= 0 { 138 return true, nil 139 } 140 ids := predicateGPUbyNumber(pod, gs) 141 if len(ids) == 0 { 142 return false, fmt.Errorf("no enough gpu number on node %s", gs.Name) 143 } 144 return true, nil 145 } 146 147 // predicateGPUbyMemory returns the available GPU ID 148 func predicateGPUbyMemory(pod *v1.Pod, gs *GPUDevices) []int { 149 gpuRequest := getGPUMemoryOfPod(pod) 150 allocatableGPUs := getDevicesIdleGPUMemory(gs) 151 152 var devIDs []int 153 154 for devID := range allocatableGPUs { 155 if availableGPU, ok := allocatableGPUs[devID]; ok && availableGPU >= gpuRequest { 156 devIDs = append(devIDs, devID) 157 } 158 } 159 sort.Ints(devIDs) 160 return devIDs 161 } 162 163 // predicateGPU returns the available GPU IDs 164 func predicateGPUbyNumber(pod *v1.Pod, gs *GPUDevices) []int { 165 gpuRequest := getGPUNumberOfPod(pod) 166 allocatableGPUs := getDevicesIdleGPUs(gs) 167 168 if len(allocatableGPUs) < gpuRequest { 169 klog.Errorf("Not enough gpu cards") 170 return nil 171 } 172 173 return allocatableGPUs[:gpuRequest] 174 } 175 176 func escapeJSONPointer(p string) string { 177 // Escaping reference name using https://tools.ietf.org/html/rfc6901 178 p = strings.Replace(p, "~", "~0", -1) 179 p = strings.Replace(p, "/", "~1", -1) 180 return p 181 } 182 183 // AddGPUIndexPatch returns the patch adding GPU index 184 func AddGPUIndexPatch(ids []int) string { 185 idsstring := strings.Trim(strings.Replace(fmt.Sprint(ids), " ", ",", -1), "[]") 186 return fmt.Sprintf(`[{"op": "add", "path": "/metadata/annotations/%s", "value":"%d"},`+ 187 `{"op": "add", "path": "/metadata/annotations/%s", "value": "%s"}]`, 188 escapeJSONPointer(PredicateTime), time.Now().UnixNano(), 189 escapeJSONPointer(GPUIndex), idsstring) 190 } 191 192 // RemoveGPUIndexPatch returns the patch removing GPU index 193 func RemoveGPUIndexPatch() string { 194 return fmt.Sprintf(`[{"op": "remove", "path": "/metadata/annotations/%s"},`+ 195 `{"op": "remove", "path": "/metadata/annotations/%s"}]`, escapeJSONPointer(PredicateTime), escapeJSONPointer(GPUIndex)) 196 } 197 198 // getUsedGPUMemory calculates the used memory of the device. 199 func (g *GPUDevice) getUsedGPUMemory() uint { 200 res := uint(0) 201 for _, pod := range g.PodMap { 202 if pod.Status.Phase == v1.PodSucceeded || pod.Status.Phase == v1.PodFailed { 203 continue 204 } else { 205 gpuRequest := getGPUMemoryOfPod(pod) 206 res += gpuRequest 207 } 208 } 209 return res 210 } 211 212 // isIdleGPU check if the device is idled. 213 func (g *GPUDevice) isIdleGPU() bool { 214 return g.PodMap == nil || len(g.PodMap) == 0 215 } 216 217 // getGPUMemoryPod returns the GPU memory required by the pod. 218 func getGPUMemoryOfPod(pod *v1.Pod) uint { 219 var initMem uint 220 for _, container := range pod.Spec.InitContainers { 221 res := getGPUMemoryOfContainer(container.Resources) 222 if initMem < res { 223 initMem = res 224 } 225 } 226 227 var mem uint 228 for _, container := range pod.Spec.Containers { 229 mem += getGPUMemoryOfContainer(container.Resources) 230 } 231 232 if mem > initMem { 233 return mem 234 } 235 return initMem 236 } 237 238 // getGPUMemoryOfContainer returns the GPU memory required by the container. 239 func getGPUMemoryOfContainer(resources v1.ResourceRequirements) uint { 240 var mem uint 241 if val, ok := resources.Limits[VolcanoGPUResource]; ok { 242 mem = uint(val.Value()) 243 } 244 return mem 245 } 246 247 // getGPUNumberOfPod returns the number of GPUs required by the pod. 248 func getGPUNumberOfPod(pod *v1.Pod) int { 249 var gpus int 250 for _, container := range pod.Spec.Containers { 251 gpus += getGPUNumberOfContainer(container.Resources) 252 } 253 254 var initGPUs int 255 for _, container := range pod.Spec.InitContainers { 256 res := getGPUNumberOfContainer(container.Resources) 257 if initGPUs < res { 258 initGPUs = res 259 } 260 } 261 262 if gpus > initGPUs { 263 return gpus 264 } 265 return initGPUs 266 } 267 268 // getGPUNumberOfContainer returns the number of GPUs required by the container. 269 func getGPUNumberOfContainer(resources v1.ResourceRequirements) int { 270 var gpus int 271 if val, ok := resources.Limits[VolcanoGPUNumber]; ok { 272 gpus = int(val.Value()) 273 } 274 return gpus 275 }