volcano.sh/volcano@v1.9.0/pkg/scheduler/api/devices/nvidia/vgpu/utils.go

volcano.sh/volcano@v1.9.0/pkg/scheduler/api/devices/nvidia/vgpu/utils.go (about)

     1  /*
     2  Copyright 2023 The Volcano Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package vgpu
    18  
    19  import (
    20  	"context"
    21  	"encoding/json"
    22  	"fmt"
    23  	"os"
    24  	"path/filepath"
    25  	"strconv"
    26  	"strings"
    27  
    28  	v1 "k8s.io/api/core/v1"
    29  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    30  	k8stypes "k8s.io/apimachinery/pkg/types"
    31  	"k8s.io/client-go/kubernetes"
    32  	"k8s.io/client-go/rest"
    33  	"k8s.io/client-go/tools/clientcmd"
    34  	"k8s.io/klog/v2"
    35  )
    36  
    37  var kubeClient kubernetes.Interface
    38  
    39  func init() {
    40  	var err error
    41  	kubeClient, err = NewClient()
    42  	if err != nil {
    43  		klog.Errorf("init kubeclient in hamivgpu failed: %s", err.Error())
    44  	} else {
    45  		klog.V(3).Infoln("init kubeclient success")
    46  	}
    47  }
    48  
    49  // NewClient connects to an API server
    50  func NewClient() (kubernetes.Interface, error) {
    51  	kubeConfig := os.Getenv("KUBECONFIG")
    52  	if kubeConfig == "" {
    53  		kubeConfig = filepath.Join(os.Getenv("HOME"), ".kube", "config")
    54  	}
    55  	config, err := rest.InClusterConfig()
    56  	if err != nil {
    57  		config, err = clientcmd.BuildConfigFromFlags("", kubeConfig)
    58  		if err != nil {
    59  			return nil, err
    60  		}
    61  	}
    62  	client, err := kubernetes.NewForConfig(config)
    63  	kubeClient = client
    64  	return client, err
    65  }
    66  
    67  func patchNodeAnnotations(node *v1.Node, annotations map[string]string) error {
    68  	type patchMetadata struct {
    69  		Annotations map[string]string `json:"annotations,omitempty"`
    70  	}
    71  	type patchPod struct {
    72  		Metadata patchMetadata `json:"metadata"`
    73  		//Spec     patchSpec     `json:"spec,omitempty"`
    74  	}
    75  
    76  	p := patchPod{}
    77  	p.Metadata.Annotations = annotations
    78  
    79  	bytes, err := json.Marshal(p)
    80  	if err != nil {
    81  		return err
    82  	}
    83  	_, err = kubeClient.CoreV1().Nodes().
    84  		Patch(context.Background(), node.Name, k8stypes.StrategicMergePatchType, bytes, metav1.PatchOptions{})
    85  	if err != nil {
    86  		klog.Errorf("patch pod %v failed, %v", node.Name, err)
    87  	}
    88  	return err
    89  }
    90  
    91  func decodeNodeDevices(name string, str string) *GPUDevices {
    92  	if !strings.Contains(str, ":") {
    93  		return nil
    94  	}
    95  	tmp := strings.Split(str, ":")
    96  	retval := &GPUDevices{
    97  		Name:   name,
    98  		Device: make(map[int]*GPUDevice),
    99  		Score:  float64(0),
   100  	}
   101  	for index, val := range tmp {
   102  		if strings.Contains(val, ",") {
   103  			items := strings.Split(val, ",")
   104  			count, _ := strconv.Atoi(items[1])
   105  			devmem, _ := strconv.Atoi(items[2])
   106  			health, _ := strconv.ParseBool(items[4])
   107  			i := GPUDevice{
   108  				ID:     index,
   109  				UUID:   items[0],
   110  				Number: uint(count),
   111  				Memory: uint(devmem),
   112  				Type:   items[3],
   113  				Health: health,
   114  			}
   115  			retval.Device[index] = &i
   116  		}
   117  	}
   118  	return retval
   119  }
   120  
   121  func encodeContainerDevices(cd []ContainerDevice) string {
   122  	tmp := ""
   123  	for _, val := range cd {
   124  		tmp += val.UUID + "," + val.Type + "," + strconv.Itoa(int(val.Usedmem)) + "," + strconv.Itoa(int(val.Usedcores)) + ":"
   125  	}
   126  	klog.V(4).Infoln("Encoded container Devices=", tmp)
   127  	return tmp
   128  	//return strings.Join(cd, ",")
   129  }
   130  
   131  func encodePodDevices(pd []ContainerDevices) string {
   132  	var ss []string
   133  	for _, cd := range pd {
   134  		ss = append(ss, encodeContainerDevices(cd))
   135  	}
   136  	return strings.Join(ss, ";")
   137  }
   138  
   139  func decodeContainerDevices(str string) ContainerDevices {
   140  	if len(str) == 0 {
   141  		return ContainerDevices{}
   142  	}
   143  	cd := strings.Split(str, ":")
   144  	contdev := ContainerDevices{}
   145  	tmpdev := ContainerDevice{}
   146  	//fmt.Println("before container device", str)
   147  	if len(str) == 0 {
   148  		return contdev
   149  	}
   150  	for _, val := range cd {
   151  		if strings.Contains(val, ",") {
   152  			//fmt.Println("cd is ", val)
   153  			tmpstr := strings.Split(val, ",")
   154  			tmpdev.UUID = tmpstr[0]
   155  			tmpdev.Type = tmpstr[1]
   156  			devmem, _ := strconv.ParseInt(tmpstr[2], 10, 32)
   157  			tmpdev.Usedmem = int32(devmem)
   158  			devcores, _ := strconv.ParseInt(tmpstr[3], 10, 32)
   159  			tmpdev.Usedcores = int32(devcores)
   160  			contdev = append(contdev, tmpdev)
   161  		}
   162  	}
   163  	//fmt.Println("Decoded container device", contdev)
   164  	return contdev
   165  }
   166  
   167  func decodePodDevices(str string) []ContainerDevices {
   168  	if len(str) == 0 {
   169  		return []ContainerDevices{}
   170  	}
   171  	var pd []ContainerDevices
   172  	for _, s := range strings.Split(str, ";") {
   173  		cd := decodeContainerDevices(s)
   174  		pd = append(pd, cd)
   175  	}
   176  	return pd
   177  }
   178  
   179  func checkVGPUResourcesInPod(pod *v1.Pod) bool {
   180  	for _, container := range pod.Spec.Containers {
   181  		_, ok := container.Resources.Limits[VolcanoVGPUMemory]
   182  		if ok {
   183  			return true
   184  		}
   185  		_, ok = container.Resources.Limits[VolcanoVGPUNumber]
   186  		if ok {
   187  			return true
   188  		}
   189  	}
   190  	return false
   191  }
   192  
   193  func resourcereqs(pod *v1.Pod) []ContainerDeviceRequest {
   194  	resourceName := v1.ResourceName(VolcanoVGPUNumber)
   195  	resourceMem := v1.ResourceName(VolcanoVGPUMemory)
   196  	resourceMemPercentage := v1.ResourceName(VolcanoVGPUMemoryPercentage)
   197  	resourceCores := v1.ResourceName(VolcanoVGPUCores)
   198  	counts := []ContainerDeviceRequest{}
   199  	//Count Nvidia GPU
   200  	for i := 0; i < len(pod.Spec.Containers); i++ {
   201  		singledevice := false
   202  		v, ok := pod.Spec.Containers[i].Resources.Limits[resourceName]
   203  		if !ok {
   204  			v, ok = pod.Spec.Containers[i].Resources.Limits[resourceMem]
   205  			singledevice = true
   206  		}
   207  		if ok {
   208  			n := int64(1)
   209  			if !singledevice {
   210  				n, _ = v.AsInt64()
   211  			}
   212  			memnum := 0
   213  			mem, ok := pod.Spec.Containers[i].Resources.Limits[resourceMem]
   214  			if !ok {
   215  				mem, ok = pod.Spec.Containers[i].Resources.Requests[resourceMem]
   216  			}
   217  			if ok {
   218  				memnums, ok := mem.AsInt64()
   219  				if ok {
   220  					memnum = int(memnums)
   221  				}
   222  			}
   223  			mempnum := int32(101)
   224  			mem, ok = pod.Spec.Containers[i].Resources.Limits[resourceMemPercentage]
   225  			if !ok {
   226  				mem, ok = pod.Spec.Containers[i].Resources.Requests[resourceMemPercentage]
   227  			}
   228  			if ok {
   229  				mempnums, ok := mem.AsInt64()
   230  				if ok {
   231  					mempnum = int32(mempnums)
   232  				}
   233  			}
   234  			if mempnum == 101 && memnum == 0 {
   235  				mempnum = 100
   236  			}
   237  			corenum := 0
   238  			core, ok := pod.Spec.Containers[i].Resources.Limits[resourceCores]
   239  			if !ok {
   240  				core, ok = pod.Spec.Containers[i].Resources.Requests[resourceCores]
   241  			}
   242  			if ok {
   243  				corenums, ok := core.AsInt64()
   244  				if ok {
   245  					corenum = int(corenums)
   246  				}
   247  			}
   248  			counts = append(counts, ContainerDeviceRequest{
   249  				Nums:             int32(n),
   250  				Type:             "NVIDIA",
   251  				Memreq:           int32(memnum),
   252  				MemPercentagereq: int32(mempnum),
   253  				Coresreq:         int32(corenum),
   254  			})
   255  		}
   256  	}
   257  	klog.V(3).Infoln("counts=", counts)
   258  	return counts
   259  }
   260  
   261  func checkGPUtype(annos map[string]string, cardtype string) bool {
   262  	inuse, ok := annos[GPUInUse]
   263  	if ok {
   264  		if !strings.Contains(inuse, ",") {
   265  			if strings.Contains(strings.ToUpper(cardtype), strings.ToUpper(inuse)) {
   266  				return true
   267  			}
   268  		} else {
   269  			for _, val := range strings.Split(inuse, ",") {
   270  				if strings.Contains(strings.ToUpper(cardtype), strings.ToUpper(val)) {
   271  					return true
   272  				}
   273  			}
   274  		}
   275  		return false
   276  	}
   277  	nouse, ok := annos[GPUNoUse]
   278  	if ok {
   279  		if !strings.Contains(nouse, ",") {
   280  			if strings.Contains(strings.ToUpper(cardtype), strings.ToUpper(nouse)) {
   281  				return true
   282  			}
   283  		} else {
   284  			for _, val := range strings.Split(nouse, ",") {
   285  				if strings.Contains(strings.ToUpper(cardtype), strings.ToUpper(val)) {
   286  					return false
   287  				}
   288  			}
   289  		}
   290  		return true
   291  	}
   292  	return true
   293  }
   294  
   295  func checkType(annos map[string]string, d GPUDevice, n ContainerDeviceRequest) bool {
   296  	//General type check, NVIDIA->NVIDIA MLU->MLU
   297  	if !strings.Contains(d.Type, n.Type) {
   298  		return false
   299  	}
   300  	if n.Type == NvidiaGPUDevice {
   301  		return checkGPUtype(annos, d.Type)
   302  	}
   303  	klog.Errorf("Unrecognized device %v", n.Type)
   304  	return false
   305  }
   306  
   307  func getGPUDeviceSnapShot(snap *GPUDevices) *GPUDevices {
   308  	ret := GPUDevices{
   309  		Name:   snap.Name,
   310  		Device: make(map[int]*GPUDevice),
   311  		Score:  float64(0),
   312  	}
   313  	for index, val := range snap.Device {
   314  		if val != nil {
   315  			ret.Device[index] = &GPUDevice{
   316  				ID:       val.ID,
   317  				UUID:     val.UUID,
   318  				PodMap:   val.PodMap,
   319  				Memory:   val.Memory,
   320  				Number:   val.Number,
   321  				Type:     val.Type,
   322  				Health:   val.Health,
   323  				UsedNum:  val.UsedNum,
   324  				UsedMem:  val.UsedMem,
   325  				UsedCore: val.UsedCore,
   326  			}
   327  		}
   328  	}
   329  	return &ret
   330  }
   331  
   332  // checkNodeGPUSharingPredicate checks if a pod with gpu requirement can be scheduled on a node.
   333  func checkNodeGPUSharingPredicateAndScore(pod *v1.Pod, gssnap *GPUDevices, replicate bool, schedulePolicy string) (bool, []ContainerDevices, float64, error) {
   334  	// no gpu sharing request
   335  	score := float64(0)
   336  	if !checkVGPUResourcesInPod(pod) {
   337  		return true, []ContainerDevices{}, 0, nil
   338  	}
   339  	ctrReq := resourcereqs(pod)
   340  	if len(ctrReq) == 0 {
   341  		return true, []ContainerDevices{}, 0, nil
   342  	}
   343  	var gs *GPUDevices
   344  	if replicate {
   345  		gs = getGPUDeviceSnapShot(gssnap)
   346  	} else {
   347  		gs = gssnap
   348  	}
   349  	ctrdevs := []ContainerDevices{}
   350  	for _, val := range ctrReq {
   351  		devs := []ContainerDevice{}
   352  		if int(val.Nums) > len(gs.Device) {
   353  			return false, []ContainerDevices{}, 0, fmt.Errorf("no enough gpu cards on node %s", gs.Name)
   354  		}
   355  		klog.V(3).InfoS("Allocating device for container", "request", val)
   356  
   357  		for i := len(gs.Device) - 1; i >= 0; i-- {
   358  			klog.V(3).InfoS("Scoring pod request", "memReq", val.Memreq, "memPercentageReq", val.MemPercentagereq, "coresReq", val.Coresreq, "Nums", val.Nums, "Index", i, "ID", gs.Device[i].ID)
   359  			klog.V(3).InfoS("Current Device", "Index", i, "TotalMemory", gs.Device[i].Memory, "UsedMemory", gs.Device[i].UsedMem, "UsedCores", gs.Device[i].UsedNum)
   360  			if gs.Device[i].Number <= uint(gs.Device[i].UsedNum) {
   361  				continue
   362  			}
   363  			if val.MemPercentagereq != 101 && val.Memreq == 0 {
   364  				val.Memreq = int32(gs.Device[i].Memory * uint(val.MemPercentagereq/100))
   365  			}
   366  			if gs.Device[i].Memory-gs.Device[i].UsedMem < uint(val.Memreq) {
   367  				continue
   368  			}
   369  			if 100-gs.Device[i].UsedCore < uint(val.Coresreq) {
   370  				continue
   371  			}
   372  			// Coresreq=100 indicates it want this card exclusively
   373  			if val.Coresreq == 100 && gs.Device[i].UsedNum > 0 {
   374  				continue
   375  			}
   376  			// You can't allocate core=0 job to an already full GPU
   377  			if gs.Device[i].UsedCore == 100 && val.Coresreq == 0 {
   378  				continue
   379  			}
   380  			if !checkType(pod.Annotations, *gs.Device[i], val) {
   381  				klog.Errorln("failed checktype", gs.Device[i].Type, val.Type)
   382  				continue
   383  			}
   384  			//total += gs.Devices[i].Count
   385  			//free += node.Devices[i].Count - node.Devices[i].Used
   386  			if val.Nums > 0 {
   387  				klog.V(3).InfoS("device fitted", "ID", gs.Device[i].ID)
   388  				val.Nums--
   389  				gs.Device[i].UsedNum++
   390  				gs.Device[i].UsedMem += uint(val.Memreq)
   391  				gs.Device[i].UsedCore += uint(val.Coresreq)
   392  				devs = append(devs, ContainerDevice{
   393  					UUID:      gs.Device[i].UUID,
   394  					Type:      val.Type,
   395  					Usedmem:   val.Memreq,
   396  					Usedcores: val.Coresreq,
   397  				})
   398  				switch schedulePolicy {
   399  				case binpackPolicy:
   400  					score += binpackMultiplier * (float64(gs.Device[i].UsedMem) / float64(gs.Device[i].Memory))
   401  				case spreadPolicy:
   402  					if gs.Device[i].UsedNum == 1 {
   403  						score += spreadMultiplier
   404  					}
   405  				default:
   406  					score = float64(0)
   407  				}
   408  			}
   409  			if val.Nums == 0 {
   410  				break
   411  			}
   412  		}
   413  		if val.Nums > 0 {
   414  			return false, []ContainerDevices{}, 0, fmt.Errorf("not enough gpu fitted on this node")
   415  		}
   416  		ctrdevs = append(ctrdevs, devs)
   417  	}
   418  	return true, ctrdevs, score, nil
   419  }
   420  
   421  func patchPodAnnotations(pod *v1.Pod, annotations map[string]string) error {
   422  	type patchMetadata struct {
   423  		Annotations map[string]string `json:"annotations,omitempty"`
   424  	}
   425  	type patchPod struct {
   426  		Metadata patchMetadata `json:"metadata"`
   427  		//Spec     patchSpec     `json:"spec,omitempty"`
   428  	}
   429  
   430  	p := patchPod{}
   431  	p.Metadata.Annotations = annotations
   432  
   433  	bytes, err := json.Marshal(p)
   434  	if err != nil {
   435  		return err
   436  	}
   437  	_, err = kubeClient.CoreV1().Pods(pod.Namespace).
   438  		Patch(context.Background(), pod.Name, k8stypes.StrategicMergePatchType, bytes, metav1.PatchOptions{})
   439  	if err != nil {
   440  		klog.Errorf("patch pod %v failed, %v", pod.Name, err)
   441  	}
   442  	/*
   443  		Can't modify Env of pods here
   444  
   445  		patch1 := addGPUIndexPatch()
   446  		_, err = s.kubeClient.CoreV1().Pods(pod.Namespace).
   447  			Patch(context.Background(), pod.Name, k8stypes.JSONPatchType, []byte(patch1), metav1.PatchOptions{})
   448  		if err != nil {
   449  			klog.Infof("Patch1 pod %v failed, %v", pod.Name, err)
   450  		}*/
   451  
   452  	return err
   453  }