volcano.sh/volcano@v1.9.0/pkg/scheduler/api/devices/nvidia/vgpu/device_info.go

volcano.sh/volcano@v1.9.0/pkg/scheduler/api/devices/nvidia/vgpu/device_info.go (about)

     1  /*
     2  Copyright 2023 The Volcano Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package vgpu
    18  
    19  import (
    20  	"fmt"
    21  	"strconv"
    22  	"strings"
    23  	"time"
    24  
    25  	"github.com/pkg/errors"
    26  	v1 "k8s.io/api/core/v1"
    27  	"k8s.io/client-go/kubernetes"
    28  	"k8s.io/klog/v2"
    29  
    30  	"volcano.sh/volcano/pkg/scheduler/api/devices"
    31  	"volcano.sh/volcano/pkg/scheduler/plugins/util/nodelock"
    32  )
    33  
    34  // GPUDevice include gpu id, memory and the pods that are sharing it.
    35  type GPUDevice struct {
    36  	// GPU ID
    37  	ID int
    38  	// GPU Unique ID
    39  	UUID string
    40  	// The pods that are sharing this GPU
    41  	PodMap map[string]*v1.Pod
    42  	// memory per card
    43  	Memory uint
    44  	// max sharing number
    45  	Number uint
    46  	// type of this number
    47  	Type string
    48  	// Health condition of this GPU
    49  	Health bool
    50  	// number of allocated
    51  	UsedNum uint
    52  	// number of device memory allocated
    53  	UsedMem uint
    54  	// number of core used
    55  	UsedCore uint
    56  }
    57  
    58  type GPUDevices struct {
    59  	Name string
    60  
    61  	// We cache score in filter step according to schedulePolicy, to avoid recalculating in score
    62  	Score float64
    63  
    64  	Device map[int]*GPUDevice
    65  }
    66  
    67  // NewGPUDevice creates a device
    68  func NewGPUDevice(id int, mem uint) *GPUDevice {
    69  	return &GPUDevice{
    70  		ID:       id,
    71  		Memory:   mem,
    72  		PodMap:   map[string]*v1.Pod{},
    73  		UsedNum:  0,
    74  		UsedMem:  0,
    75  		UsedCore: 0,
    76  	}
    77  }
    78  
    79  func NewGPUDevices(name string, node *v1.Node) *GPUDevices {
    80  	if node == nil {
    81  		return nil
    82  	}
    83  	annos, ok := node.Annotations[VolcanoVGPURegister]
    84  	if !ok {
    85  		return nil
    86  	}
    87  	handshake, ok := node.Annotations[VolcanoVGPUHandshake]
    88  	if !ok {
    89  		return nil
    90  	}
    91  	nodedevices := decodeNodeDevices(name, annos)
    92  	if len(nodedevices.Device) == 0 {
    93  		return nil
    94  	}
    95  	for _, val := range nodedevices.Device {
    96  		klog.V(4).Infoln("name=", nodedevices.Name, "val=", *val)
    97  	}
    98  
    99  	// We have to handshake here in order to avoid time-inconsistency between scheduler and nodes
   100  	if strings.Contains(handshake, "Requesting") {
   101  		formertime, _ := time.Parse("2006.01.02 15:04:05", strings.Split(handshake, "_")[1])
   102  		if time.Now().After(formertime.Add(time.Second * 60)) {
   103  			klog.Infof("node %v device %s leave", node.Name, handshake)
   104  
   105  			tmppat := make(map[string]string)
   106  			tmppat[VolcanoVGPUHandshake] = "Deleted_" + time.Now().Format("2006.01.02 15:04:05")
   107  			patchNodeAnnotations(node, tmppat)
   108  			return nil
   109  		}
   110  	} else if strings.Contains(handshake, "Deleted") {
   111  		return nil
   112  	} else {
   113  		tmppat := make(map[string]string)
   114  		tmppat[VolcanoVGPUHandshake] = "Requesting_" + time.Now().Format("2006.01.02 15:04:05")
   115  		patchNodeAnnotations(node, tmppat)
   116  	}
   117  	return nodedevices
   118  }
   119  
   120  func (gs *GPUDevices) ScoreNode(pod *v1.Pod, schedulePolicy string) float64 {
   121  	/* TODO: we need a base score to be campatable with preemption, it means a node without evicting a task has
   122  	a higher score than those needs to evict a task */
   123  
   124  	// Use cached stored in filter state in order to avoid recalculating.
   125  	return gs.Score
   126  }
   127  
   128  func (gs *GPUDevices) GetIgnoredDevices() []string {
   129  	return []string{VolcanoVGPUMemory, VolcanoVGPUMemoryPercentage, VolcanoVGPUCores}
   130  }
   131  
   132  // AddResource adds the pod to GPU pool if it is assigned
   133  func (gs *GPUDevices) AddResource(pod *v1.Pod) {
   134  	ids, ok := pod.Annotations[AssignedIDsAnnotations]
   135  	if !ok {
   136  		return
   137  	}
   138  	podDev := decodePodDevices(ids)
   139  	for _, val := range podDev {
   140  		for _, deviceused := range val {
   141  			if gs == nil {
   142  				break
   143  			}
   144  			for index, gsdevice := range gs.Device {
   145  				if gsdevice.UUID == deviceused.UUID {
   146  					klog.V(4).Infoln("VGPU recording pod", pod.Name, "device", deviceused)
   147  					gs.Device[index].UsedMem += uint(deviceused.Usedmem)
   148  					gs.Device[index].UsedNum++
   149  					gs.Device[index].UsedCore += uint(deviceused.Usedcores)
   150  				}
   151  			}
   152  		}
   153  	}
   154  	gs.GetStatus()
   155  }
   156  
   157  // SubResource frees the gpu hold by the pod
   158  func (gs *GPUDevices) SubResource(pod *v1.Pod) {
   159  	ids, ok := pod.Annotations[AssignedIDsAnnotations]
   160  	if !ok {
   161  		return
   162  	}
   163  	podDev := decodePodDevices(ids)
   164  	for _, val := range podDev {
   165  		for _, deviceused := range val {
   166  			if gs == nil {
   167  				break
   168  			}
   169  			for index, gsdevice := range gs.Device {
   170  				if gsdevice.UUID == deviceused.UUID {
   171  					klog.V(4).Infoln("VGPU subsctracting pod", pod.Name, "device", deviceused)
   172  					gs.Device[index].UsedMem -= uint(deviceused.Usedmem)
   173  					gs.Device[index].UsedNum--
   174  					gs.Device[index].UsedCore -= uint(deviceused.Usedcores)
   175  				}
   176  			}
   177  		}
   178  	}
   179  }
   180  
   181  func (gs *GPUDevices) HasDeviceRequest(pod *v1.Pod) bool {
   182  	if VGPUEnable && checkVGPUResourcesInPod(pod) {
   183  		return true
   184  	}
   185  	return false
   186  }
   187  
   188  func (gs *GPUDevices) Release(kubeClient kubernetes.Interface, pod *v1.Pod) error {
   189  	// Nothing needs to be done here
   190  	return nil
   191  }
   192  
   193  func (gs *GPUDevices) FilterNode(pod *v1.Pod, schedulePolicy string) (int, string, error) {
   194  	if VGPUEnable {
   195  		klog.V(4).Infoln("hami-vgpu DeviceSharing starts filtering pods", pod.Name)
   196  		fit, _, score, err := checkNodeGPUSharingPredicateAndScore(pod, gs, true, schedulePolicy)
   197  		if err != nil || !fit {
   198  			klog.Errorln("deviceSharing err=", err.Error())
   199  			return devices.Unschedulable, fmt.Sprintf("hami-vgpuDeviceSharing %s", err.Error()), err
   200  		}
   201  		gs.Score = score
   202  		klog.V(4).Infoln("hami-vgpu DeviceSharing successfully filters pods")
   203  	}
   204  	return devices.Success, "", nil
   205  }
   206  
   207  func (gs *GPUDevices) Allocate(kubeClient kubernetes.Interface, pod *v1.Pod) error {
   208  	if VGPUEnable {
   209  		klog.V(4).Infoln("hami-vgpu DeviceSharing:Into AllocateToPod", pod.Name)
   210  		fit, device, _, err := checkNodeGPUSharingPredicateAndScore(pod, gs, false, "")
   211  		if err != nil || !fit {
   212  			klog.Errorln("DeviceSharing err=", err.Error())
   213  			return err
   214  		}
   215  		if NodeLockEnable {
   216  			nodelock.UseClient(kubeClient)
   217  			err = nodelock.LockNode(gs.Name, DeviceName)
   218  			if err != nil {
   219  				return errors.Errorf("node %s locked for lockname gpushare %s", gs.Name, err.Error())
   220  			}
   221  		}
   222  
   223  		annotations := make(map[string]string)
   224  		annotations[AssignedNodeAnnotations] = gs.Name
   225  		annotations[AssignedTimeAnnotations] = strconv.FormatInt(time.Now().Unix(), 10)
   226  		annotations[AssignedIDsAnnotations] = encodePodDevices(device)
   227  		annotations[AssignedIDsToAllocateAnnotations] = annotations[AssignedIDsAnnotations]
   228  
   229  		annotations[DeviceBindPhase] = "allocating"
   230  		annotations[BindTimeAnnotations] = strconv.FormatInt(time.Now().Unix(), 10)
   231  		err = patchPodAnnotations(pod, annotations)
   232  		if err != nil {
   233  			return err
   234  		}
   235  		gs.GetStatus()
   236  		klog.V(3).Infoln("DeviceSharing:Allocate Success")
   237  	}
   238  	return nil
   239  }