volcano.sh/volcano@v1.9.0/pkg/scheduler/api/devices/nvidia/gpushare/share.go (about)

     1  /*
     2  Copyright 2023 The Volcano Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package gpushare
    18  
    19  import (
    20  	"fmt"
    21  	"sort"
    22  	"strconv"
    23  	"strings"
    24  	"time"
    25  
    26  	v1 "k8s.io/api/core/v1"
    27  	"k8s.io/klog/v2"
    28  )
    29  
    30  // getDevicesIdleGPUMemory returns all the idle GPU memory by gpu card.
    31  func getDevicesIdleGPUMemory(gs *GPUDevices) map[int]uint {
    32  	devicesAllGPUMemory := getDevicesAllGPUMemory(gs)
    33  	devicesUsedGPUMemory := getDevicesUsedGPUMemory(gs)
    34  	res := map[int]uint{}
    35  	for id, allMemory := range devicesAllGPUMemory {
    36  		if usedMemory, found := devicesUsedGPUMemory[id]; found {
    37  			res[id] = allMemory - usedMemory
    38  		} else {
    39  			res[id] = allMemory
    40  		}
    41  	}
    42  	return res
    43  }
    44  
    45  func getDevicesUsedGPUMemory(gs *GPUDevices) map[int]uint {
    46  	res := map[int]uint{}
    47  	for _, device := range gs.Device {
    48  		res[device.ID] = device.getUsedGPUMemory()
    49  	}
    50  	return res
    51  }
    52  
    53  func getDevicesAllGPUMemory(gs *GPUDevices) map[int]uint {
    54  	res := map[int]uint{}
    55  	for _, device := range gs.Device {
    56  		res[device.ID] = device.Memory
    57  	}
    58  	return res
    59  }
    60  
    61  // GetDevicesIdleGPU returns all the idle gpu card.
    62  func getDevicesIdleGPUs(gs *GPUDevices) []int {
    63  	res := []int{}
    64  	for _, device := range gs.Device {
    65  		if device.isIdleGPU() {
    66  			res = append(res, device.ID)
    67  		}
    68  	}
    69  	return res
    70  }
    71  
    72  // getUnhealthyGPUs returns all the unhealthy GPU id.
    73  func getUnhealthyGPUs(gs *GPUDevices, node *v1.Node) (unhealthyGPUs []int) {
    74  	unhealthyGPUs = []int{}
    75  	devicesStr, ok := node.Annotations[UnhealthyGPUIDs]
    76  
    77  	if !ok {
    78  		return
    79  	}
    80  
    81  	idsStr := strings.Split(devicesStr, ",")
    82  	for _, sid := range idsStr {
    83  		id, err := strconv.Atoi(sid)
    84  		if err != nil {
    85  			klog.Warningf("Failed to parse unhealthy gpu id %s due to %v", sid, err)
    86  		} else {
    87  			unhealthyGPUs = append(unhealthyGPUs, id)
    88  		}
    89  	}
    90  	return
    91  }
    92  
    93  // GetGPUIndex returns the index list of gpu cards
    94  func GetGPUIndex(pod *v1.Pod) []int {
    95  	if len(pod.Annotations) == 0 {
    96  		return nil
    97  	}
    98  
    99  	value, found := pod.Annotations[GPUIndex]
   100  	if !found {
   101  		return nil
   102  	}
   103  
   104  	ids := strings.Split(value, ",")
   105  	if len(ids) == 0 {
   106  		klog.Errorf("invalid gpu index annotation %s=%s", GPUIndex, value)
   107  		return nil
   108  	}
   109  
   110  	idSlice := make([]int, len(ids))
   111  	for idx, id := range ids {
   112  		j, err := strconv.Atoi(id)
   113  		if err != nil {
   114  			klog.Errorf("invalid %s=%s", GPUIndex, value)
   115  			return nil
   116  		}
   117  		idSlice[idx] = j
   118  	}
   119  	return idSlice
   120  }
   121  
   122  // checkNodeGPUSharingPredicate checks if a pod with gpu requirement can be scheduled on a node.
   123  func checkNodeGPUSharingPredicate(pod *v1.Pod, gs *GPUDevices) (bool, error) {
   124  	// no gpu sharing request
   125  	if getGPUMemoryOfPod(pod) <= 0 {
   126  		return true, nil
   127  	}
   128  	ids := predicateGPUbyMemory(pod, gs)
   129  	if len(ids) == 0 {
   130  		return false, fmt.Errorf("no enough gpu memory on node %s", gs.Name)
   131  	}
   132  	return true, nil
   133  }
   134  
   135  func checkNodeGPUNumberPredicate(pod *v1.Pod, gs *GPUDevices) (bool, error) {
   136  	//no gpu number request
   137  	if getGPUNumberOfPod(pod) <= 0 {
   138  		return true, nil
   139  	}
   140  	ids := predicateGPUbyNumber(pod, gs)
   141  	if len(ids) == 0 {
   142  		return false, fmt.Errorf("no enough gpu number on node %s", gs.Name)
   143  	}
   144  	return true, nil
   145  }
   146  
   147  // predicateGPUbyMemory returns the available GPU ID
   148  func predicateGPUbyMemory(pod *v1.Pod, gs *GPUDevices) []int {
   149  	gpuRequest := getGPUMemoryOfPod(pod)
   150  	allocatableGPUs := getDevicesIdleGPUMemory(gs)
   151  
   152  	var devIDs []int
   153  
   154  	for devID := range allocatableGPUs {
   155  		if availableGPU, ok := allocatableGPUs[devID]; ok && availableGPU >= gpuRequest {
   156  			devIDs = append(devIDs, devID)
   157  		}
   158  	}
   159  	sort.Ints(devIDs)
   160  	return devIDs
   161  }
   162  
   163  // predicateGPU returns the available GPU IDs
   164  func predicateGPUbyNumber(pod *v1.Pod, gs *GPUDevices) []int {
   165  	gpuRequest := getGPUNumberOfPod(pod)
   166  	allocatableGPUs := getDevicesIdleGPUs(gs)
   167  
   168  	if len(allocatableGPUs) < gpuRequest {
   169  		klog.Errorf("Not enough gpu cards")
   170  		return nil
   171  	}
   172  
   173  	return allocatableGPUs[:gpuRequest]
   174  }
   175  
   176  func escapeJSONPointer(p string) string {
   177  	// Escaping reference name using https://tools.ietf.org/html/rfc6901
   178  	p = strings.Replace(p, "~", "~0", -1)
   179  	p = strings.Replace(p, "/", "~1", -1)
   180  	return p
   181  }
   182  
   183  // AddGPUIndexPatch returns the patch adding GPU index
   184  func AddGPUIndexPatch(ids []int) string {
   185  	idsstring := strings.Trim(strings.Replace(fmt.Sprint(ids), " ", ",", -1), "[]")
   186  	return fmt.Sprintf(`[{"op": "add", "path": "/metadata/annotations/%s", "value":"%d"},`+
   187  		`{"op": "add", "path": "/metadata/annotations/%s", "value": "%s"}]`,
   188  		escapeJSONPointer(PredicateTime), time.Now().UnixNano(),
   189  		escapeJSONPointer(GPUIndex), idsstring)
   190  }
   191  
   192  // RemoveGPUIndexPatch returns the patch removing GPU index
   193  func RemoveGPUIndexPatch() string {
   194  	return fmt.Sprintf(`[{"op": "remove", "path": "/metadata/annotations/%s"},`+
   195  		`{"op": "remove", "path": "/metadata/annotations/%s"}]`, escapeJSONPointer(PredicateTime), escapeJSONPointer(GPUIndex))
   196  }
   197  
   198  // getUsedGPUMemory calculates the used memory of the device.
   199  func (g *GPUDevice) getUsedGPUMemory() uint {
   200  	res := uint(0)
   201  	for _, pod := range g.PodMap {
   202  		if pod.Status.Phase == v1.PodSucceeded || pod.Status.Phase == v1.PodFailed {
   203  			continue
   204  		} else {
   205  			gpuRequest := getGPUMemoryOfPod(pod)
   206  			res += gpuRequest
   207  		}
   208  	}
   209  	return res
   210  }
   211  
   212  // isIdleGPU check if the device is idled.
   213  func (g *GPUDevice) isIdleGPU() bool {
   214  	return g.PodMap == nil || len(g.PodMap) == 0
   215  }
   216  
   217  // getGPUMemoryPod returns the GPU memory required by the pod.
   218  func getGPUMemoryOfPod(pod *v1.Pod) uint {
   219  	var initMem uint
   220  	for _, container := range pod.Spec.InitContainers {
   221  		res := getGPUMemoryOfContainer(container.Resources)
   222  		if initMem < res {
   223  			initMem = res
   224  		}
   225  	}
   226  
   227  	var mem uint
   228  	for _, container := range pod.Spec.Containers {
   229  		mem += getGPUMemoryOfContainer(container.Resources)
   230  	}
   231  
   232  	if mem > initMem {
   233  		return mem
   234  	}
   235  	return initMem
   236  }
   237  
   238  // getGPUMemoryOfContainer returns the GPU memory required by the container.
   239  func getGPUMemoryOfContainer(resources v1.ResourceRequirements) uint {
   240  	var mem uint
   241  	if val, ok := resources.Limits[VolcanoGPUResource]; ok {
   242  		mem = uint(val.Value())
   243  	}
   244  	return mem
   245  }
   246  
   247  // getGPUNumberOfPod returns the number of GPUs required by the pod.
   248  func getGPUNumberOfPod(pod *v1.Pod) int {
   249  	var gpus int
   250  	for _, container := range pod.Spec.Containers {
   251  		gpus += getGPUNumberOfContainer(container.Resources)
   252  	}
   253  
   254  	var initGPUs int
   255  	for _, container := range pod.Spec.InitContainers {
   256  		res := getGPUNumberOfContainer(container.Resources)
   257  		if initGPUs < res {
   258  			initGPUs = res
   259  		}
   260  	}
   261  
   262  	if gpus > initGPUs {
   263  		return gpus
   264  	}
   265  	return initGPUs
   266  }
   267  
   268  // getGPUNumberOfContainer returns the number of GPUs required by the container.
   269  func getGPUNumberOfContainer(resources v1.ResourceRequirements) int {
   270  	var gpus int
   271  	if val, ok := resources.Limits[VolcanoGPUNumber]; ok {
   272  		gpus = int(val.Value())
   273  	}
   274  	return gpus
   275  }