volcano.sh/volcano@v1.9.0/pkg/scheduler/api/numa_info.go (about)

     1  /*
     2  Copyright 2021 The Volcano Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package api
    18  
    19  import (
    20  	"encoding/json"
    21  
    22  	v1 "k8s.io/api/core/v1"
    23  	"k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/topology"
    24  	"k8s.io/utils/cpuset"
    25  
    26  	nodeinfov1alpha1 "volcano.sh/apis/pkg/apis/nodeinfo/v1alpha1"
    27  )
    28  
    29  // NumaChgFlag indicate node numainfo changed status
    30  type NumaChgFlag int
    31  
    32  const (
    33  	// NumaInfoResetFlag indicate reset operate
    34  	NumaInfoResetFlag NumaChgFlag = 0b00
    35  	// NumaInfoMoreFlag indicate the received allocatable resource is getting more
    36  	NumaInfoMoreFlag NumaChgFlag = 0b11
    37  	// NumaInfoLessFlag indicate the received allocatable resource is getting less
    38  	NumaInfoLessFlag NumaChgFlag = 0b10
    39  	// DefaultMaxNodeScore indicates the default max node score
    40  	DefaultMaxNodeScore = 100
    41  )
    42  
    43  // PodResourceDecision is resource allocation determinated by scheduler,
    44  // and passed to kubelet through pod annotation.
    45  type PodResourceDecision struct {
    46  	// NUMAResources is resource list with numa info indexed by numa id.
    47  	NUMAResources map[int]v1.ResourceList `json:"numa,omitempty"`
    48  }
    49  
    50  // ResourceInfo is the allocatable information for the resource
    51  type ResourceInfo struct {
    52  	Allocatable        cpuset.CPUSet
    53  	Capacity           int
    54  	AllocatablePerNuma map[int]float64 // key: NUMA ID
    55  	UsedPerNuma        map[int]float64 // key: NUMA ID
    56  }
    57  
    58  // NumatopoInfo is the information about topology manager on the node
    59  type NumatopoInfo struct {
    60  	Namespace   string
    61  	Name        string
    62  	Policies    map[nodeinfov1alpha1.PolicyName]string
    63  	NumaResMap  map[string]*ResourceInfo
    64  	CPUDetail   topology.CPUDetails
    65  	ResReserved v1.ResourceList
    66  }
    67  
    68  // DeepCopy used to copy NumatopoInfo
    69  func (info *NumatopoInfo) DeepCopy() *NumatopoInfo {
    70  	numaInfo := &NumatopoInfo{
    71  		Namespace:   info.Namespace,
    72  		Name:        info.Name,
    73  		Policies:    make(map[nodeinfov1alpha1.PolicyName]string),
    74  		NumaResMap:  make(map[string]*ResourceInfo),
    75  		CPUDetail:   topology.CPUDetails{},
    76  		ResReserved: make(v1.ResourceList),
    77  	}
    78  
    79  	policies := info.Policies
    80  	for name, policy := range policies {
    81  		numaInfo.Policies[name] = policy
    82  	}
    83  
    84  	for resName, resInfo := range info.NumaResMap {
    85  		tmpInfo := &ResourceInfo{
    86  			AllocatablePerNuma: make(map[int]float64),
    87  			UsedPerNuma:        make(map[int]float64),
    88  		}
    89  		tmpInfo.Capacity = resInfo.Capacity
    90  		tmpInfo.Allocatable = resInfo.Allocatable.Clone()
    91  
    92  		for numaID, data := range resInfo.AllocatablePerNuma {
    93  			tmpInfo.AllocatablePerNuma[numaID] = data
    94  		}
    95  
    96  		for numaID, data := range resInfo.UsedPerNuma {
    97  			tmpInfo.UsedPerNuma[numaID] = data
    98  		}
    99  
   100  		numaInfo.NumaResMap[resName] = tmpInfo
   101  	}
   102  
   103  	cpuDetail := info.CPUDetail
   104  	for cpuID, detail := range cpuDetail {
   105  		numaInfo.CPUDetail[cpuID] = detail
   106  	}
   107  
   108  	resReserved := info.ResReserved
   109  	for resName, res := range resReserved {
   110  		numaInfo.ResReserved[resName] = res
   111  	}
   112  
   113  	return numaInfo
   114  }
   115  
   116  // Compare is the function to show the change of the resource on kubelet
   117  // return val:
   118  // - true : the resource on kubelet is getting more or no change
   119  // - false :  the resource on kubelet is getting less
   120  func (info *NumatopoInfo) Compare(newInfo *NumatopoInfo) bool {
   121  	for resName := range info.NumaResMap {
   122  		oldSize := info.NumaResMap[resName].Allocatable.Size()
   123  		newSize := newInfo.NumaResMap[resName].Allocatable.Size()
   124  		if oldSize <= newSize {
   125  			return true
   126  		}
   127  	}
   128  
   129  	return false
   130  }
   131  
   132  // Allocate is the function to remove the allocated resource
   133  func (info *NumatopoInfo) Allocate(resSets ResNumaSets) {
   134  	for resName := range resSets {
   135  		info.NumaResMap[resName].Allocatable = info.NumaResMap[resName].Allocatable.Difference(resSets[resName])
   136  	}
   137  }
   138  
   139  // Release is the function to reclaim the allocated resource
   140  func (info *NumatopoInfo) Release(resSets ResNumaSets) {
   141  	for resName := range resSets {
   142  		info.NumaResMap[resName].Allocatable = info.NumaResMap[resName].Allocatable.Union(resSets[resName])
   143  	}
   144  }
   145  
   146  func GetPodResourceNumaInfo(ti *TaskInfo) map[int]v1.ResourceList {
   147  	if ti.NumaInfo != nil && len(ti.NumaInfo.ResMap) > 0 {
   148  		return ti.NumaInfo.ResMap
   149  	}
   150  
   151  	if _, ok := ti.Pod.Annotations[topologyDecisionAnnotation]; !ok {
   152  		return nil
   153  	}
   154  
   155  	decision := PodResourceDecision{}
   156  	err := json.Unmarshal([]byte(ti.Pod.Annotations[topologyDecisionAnnotation]), &decision)
   157  	if err != nil {
   158  		return nil
   159  	}
   160  
   161  	return decision.NUMAResources
   162  }
   163  
   164  // AddTask is the function to update the used resource of per numa node
   165  func (info *NumatopoInfo) AddTask(ti *TaskInfo) {
   166  	numaInfo := GetPodResourceNumaInfo(ti)
   167  	if numaInfo == nil {
   168  		return
   169  	}
   170  
   171  	for numaID, resList := range numaInfo {
   172  		for resName, quantity := range resList {
   173  			info.NumaResMap[string(resName)].UsedPerNuma[numaID] += ResQuantity2Float64(resName, quantity)
   174  		}
   175  	}
   176  }
   177  
   178  // RemoveTask is the function to update the used resource of per numa node
   179  func (info *NumatopoInfo) RemoveTask(ti *TaskInfo) {
   180  	decision := GetPodResourceNumaInfo(ti)
   181  	if decision == nil {
   182  		return
   183  	}
   184  
   185  	for numaID, resList := range ti.NumaInfo.ResMap {
   186  		for resName, quantity := range resList {
   187  			info.NumaResMap[string(resName)].UsedPerNuma[numaID] -= ResQuantity2Float64(resName, quantity)
   188  		}
   189  	}
   190  }
   191  
   192  // GenerateNodeResNumaSets return the idle resource sets of all node
   193  func GenerateNodeResNumaSets(nodes map[string]*NodeInfo) map[string]ResNumaSets {
   194  	nodeSlice := make(map[string]ResNumaSets)
   195  	for _, node := range nodes {
   196  		if node.NumaSchedulerInfo == nil {
   197  			continue
   198  		}
   199  
   200  		resMaps := make(ResNumaSets)
   201  		for resName, resMap := range node.NumaSchedulerInfo.NumaResMap {
   202  			resMaps[resName] = resMap.Allocatable.Clone()
   203  		}
   204  
   205  		nodeSlice[node.Name] = resMaps
   206  	}
   207  
   208  	return nodeSlice
   209  }
   210  
   211  // GenerateNumaNodes return the numa IDs of all node
   212  func GenerateNumaNodes(nodes map[string]*NodeInfo) map[string][]int {
   213  	nodeNumaMap := make(map[string][]int)
   214  
   215  	for _, node := range nodes {
   216  		if node.NumaSchedulerInfo == nil {
   217  			continue
   218  		}
   219  
   220  		nodeNumaMap[node.Name] = node.NumaSchedulerInfo.CPUDetail.NUMANodes().List()
   221  	}
   222  
   223  	return nodeNumaMap
   224  }
   225  
   226  // ResNumaSets is the set map of the resource
   227  type ResNumaSets map[string]cpuset.CPUSet
   228  
   229  // Allocate is to remove the allocated resource which is assigned to task
   230  func (resSets ResNumaSets) Allocate(taskSets ResNumaSets) {
   231  	for resName := range taskSets {
   232  		if _, ok := resSets[resName]; !ok {
   233  			continue
   234  		}
   235  		resSets[resName] = resSets[resName].Difference(taskSets[resName])
   236  	}
   237  }
   238  
   239  // Release is to reclaim the allocated resource which is assigned to task
   240  func (resSets ResNumaSets) Release(taskSets ResNumaSets) {
   241  	for resName := range taskSets {
   242  		if _, ok := resSets[resName]; !ok {
   243  			continue
   244  		}
   245  		resSets[resName] = resSets[resName].Union(taskSets[resName])
   246  	}
   247  }
   248  
   249  // Clone is the copy action
   250  func (resSets ResNumaSets) Clone() ResNumaSets {
   251  	newSets := make(ResNumaSets)
   252  	for resName := range resSets {
   253  		newSets[resName] = resSets[resName].Clone()
   254  	}
   255  
   256  	return newSets
   257  }
   258  
   259  // ScoredNode is the wrapper for node during Scoring.
   260  type ScoredNode struct {
   261  	NodeName string
   262  	Score    int64
   263  }