volcano.sh/volcano@v1.9.0/pkg/scheduler/api/devices/nvidia/vgpu/device_info.go (about) 1 /* 2 Copyright 2023 The Volcano Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package vgpu 18 19 import ( 20 "fmt" 21 "strconv" 22 "strings" 23 "time" 24 25 "github.com/pkg/errors" 26 v1 "k8s.io/api/core/v1" 27 "k8s.io/client-go/kubernetes" 28 "k8s.io/klog/v2" 29 30 "volcano.sh/volcano/pkg/scheduler/api/devices" 31 "volcano.sh/volcano/pkg/scheduler/plugins/util/nodelock" 32 ) 33 34 // GPUDevice include gpu id, memory and the pods that are sharing it. 35 type GPUDevice struct { 36 // GPU ID 37 ID int 38 // GPU Unique ID 39 UUID string 40 // The pods that are sharing this GPU 41 PodMap map[string]*v1.Pod 42 // memory per card 43 Memory uint 44 // max sharing number 45 Number uint 46 // type of this number 47 Type string 48 // Health condition of this GPU 49 Health bool 50 // number of allocated 51 UsedNum uint 52 // number of device memory allocated 53 UsedMem uint 54 // number of core used 55 UsedCore uint 56 } 57 58 type GPUDevices struct { 59 Name string 60 61 // We cache score in filter step according to schedulePolicy, to avoid recalculating in score 62 Score float64 63 64 Device map[int]*GPUDevice 65 } 66 67 // NewGPUDevice creates a device 68 func NewGPUDevice(id int, mem uint) *GPUDevice { 69 return &GPUDevice{ 70 ID: id, 71 Memory: mem, 72 PodMap: map[string]*v1.Pod{}, 73 UsedNum: 0, 74 UsedMem: 0, 75 UsedCore: 0, 76 } 77 } 78 79 func NewGPUDevices(name string, node *v1.Node) *GPUDevices { 80 if node == nil { 81 return nil 82 } 83 annos, ok := node.Annotations[VolcanoVGPURegister] 84 if !ok { 85 return nil 86 } 87 handshake, ok := node.Annotations[VolcanoVGPUHandshake] 88 if !ok { 89 return nil 90 } 91 nodedevices := decodeNodeDevices(name, annos) 92 if len(nodedevices.Device) == 0 { 93 return nil 94 } 95 for _, val := range nodedevices.Device { 96 klog.V(4).Infoln("name=", nodedevices.Name, "val=", *val) 97 } 98 99 // We have to handshake here in order to avoid time-inconsistency between scheduler and nodes 100 if strings.Contains(handshake, "Requesting") { 101 formertime, _ := time.Parse("2006.01.02 15:04:05", strings.Split(handshake, "_")[1]) 102 if time.Now().After(formertime.Add(time.Second * 60)) { 103 klog.Infof("node %v device %s leave", node.Name, handshake) 104 105 tmppat := make(map[string]string) 106 tmppat[VolcanoVGPUHandshake] = "Deleted_" + time.Now().Format("2006.01.02 15:04:05") 107 patchNodeAnnotations(node, tmppat) 108 return nil 109 } 110 } else if strings.Contains(handshake, "Deleted") { 111 return nil 112 } else { 113 tmppat := make(map[string]string) 114 tmppat[VolcanoVGPUHandshake] = "Requesting_" + time.Now().Format("2006.01.02 15:04:05") 115 patchNodeAnnotations(node, tmppat) 116 } 117 return nodedevices 118 } 119 120 func (gs *GPUDevices) ScoreNode(pod *v1.Pod, schedulePolicy string) float64 { 121 /* TODO: we need a base score to be campatable with preemption, it means a node without evicting a task has 122 a higher score than those needs to evict a task */ 123 124 // Use cached stored in filter state in order to avoid recalculating. 125 return gs.Score 126 } 127 128 func (gs *GPUDevices) GetIgnoredDevices() []string { 129 return []string{VolcanoVGPUMemory, VolcanoVGPUMemoryPercentage, VolcanoVGPUCores} 130 } 131 132 // AddResource adds the pod to GPU pool if it is assigned 133 func (gs *GPUDevices) AddResource(pod *v1.Pod) { 134 ids, ok := pod.Annotations[AssignedIDsAnnotations] 135 if !ok { 136 return 137 } 138 podDev := decodePodDevices(ids) 139 for _, val := range podDev { 140 for _, deviceused := range val { 141 if gs == nil { 142 break 143 } 144 for index, gsdevice := range gs.Device { 145 if gsdevice.UUID == deviceused.UUID { 146 klog.V(4).Infoln("VGPU recording pod", pod.Name, "device", deviceused) 147 gs.Device[index].UsedMem += uint(deviceused.Usedmem) 148 gs.Device[index].UsedNum++ 149 gs.Device[index].UsedCore += uint(deviceused.Usedcores) 150 } 151 } 152 } 153 } 154 gs.GetStatus() 155 } 156 157 // SubResource frees the gpu hold by the pod 158 func (gs *GPUDevices) SubResource(pod *v1.Pod) { 159 ids, ok := pod.Annotations[AssignedIDsAnnotations] 160 if !ok { 161 return 162 } 163 podDev := decodePodDevices(ids) 164 for _, val := range podDev { 165 for _, deviceused := range val { 166 if gs == nil { 167 break 168 } 169 for index, gsdevice := range gs.Device { 170 if gsdevice.UUID == deviceused.UUID { 171 klog.V(4).Infoln("VGPU subsctracting pod", pod.Name, "device", deviceused) 172 gs.Device[index].UsedMem -= uint(deviceused.Usedmem) 173 gs.Device[index].UsedNum-- 174 gs.Device[index].UsedCore -= uint(deviceused.Usedcores) 175 } 176 } 177 } 178 } 179 } 180 181 func (gs *GPUDevices) HasDeviceRequest(pod *v1.Pod) bool { 182 if VGPUEnable && checkVGPUResourcesInPod(pod) { 183 return true 184 } 185 return false 186 } 187 188 func (gs *GPUDevices) Release(kubeClient kubernetes.Interface, pod *v1.Pod) error { 189 // Nothing needs to be done here 190 return nil 191 } 192 193 func (gs *GPUDevices) FilterNode(pod *v1.Pod, schedulePolicy string) (int, string, error) { 194 if VGPUEnable { 195 klog.V(4).Infoln("hami-vgpu DeviceSharing starts filtering pods", pod.Name) 196 fit, _, score, err := checkNodeGPUSharingPredicateAndScore(pod, gs, true, schedulePolicy) 197 if err != nil || !fit { 198 klog.Errorln("deviceSharing err=", err.Error()) 199 return devices.Unschedulable, fmt.Sprintf("hami-vgpuDeviceSharing %s", err.Error()), err 200 } 201 gs.Score = score 202 klog.V(4).Infoln("hami-vgpu DeviceSharing successfully filters pods") 203 } 204 return devices.Success, "", nil 205 } 206 207 func (gs *GPUDevices) Allocate(kubeClient kubernetes.Interface, pod *v1.Pod) error { 208 if VGPUEnable { 209 klog.V(4).Infoln("hami-vgpu DeviceSharing:Into AllocateToPod", pod.Name) 210 fit, device, _, err := checkNodeGPUSharingPredicateAndScore(pod, gs, false, "") 211 if err != nil || !fit { 212 klog.Errorln("DeviceSharing err=", err.Error()) 213 return err 214 } 215 if NodeLockEnable { 216 nodelock.UseClient(kubeClient) 217 err = nodelock.LockNode(gs.Name, DeviceName) 218 if err != nil { 219 return errors.Errorf("node %s locked for lockname gpushare %s", gs.Name, err.Error()) 220 } 221 } 222 223 annotations := make(map[string]string) 224 annotations[AssignedNodeAnnotations] = gs.Name 225 annotations[AssignedTimeAnnotations] = strconv.FormatInt(time.Now().Unix(), 10) 226 annotations[AssignedIDsAnnotations] = encodePodDevices(device) 227 annotations[AssignedIDsToAllocateAnnotations] = annotations[AssignedIDsAnnotations] 228 229 annotations[DeviceBindPhase] = "allocating" 230 annotations[BindTimeAnnotations] = strconv.FormatInt(time.Now().Unix(), 10) 231 err = patchPodAnnotations(pod, annotations) 232 if err != nil { 233 return err 234 } 235 gs.GetStatus() 236 klog.V(3).Infoln("DeviceSharing:Allocate Success") 237 } 238 return nil 239 }