volcano.sh/volcano@v1.9.0/pkg/scheduler/api/numa_info.go (about) 1 /* 2 Copyright 2021 The Volcano Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package api 18 19 import ( 20 "encoding/json" 21 22 v1 "k8s.io/api/core/v1" 23 "k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/topology" 24 "k8s.io/utils/cpuset" 25 26 nodeinfov1alpha1 "volcano.sh/apis/pkg/apis/nodeinfo/v1alpha1" 27 ) 28 29 // NumaChgFlag indicate node numainfo changed status 30 type NumaChgFlag int 31 32 const ( 33 // NumaInfoResetFlag indicate reset operate 34 NumaInfoResetFlag NumaChgFlag = 0b00 35 // NumaInfoMoreFlag indicate the received allocatable resource is getting more 36 NumaInfoMoreFlag NumaChgFlag = 0b11 37 // NumaInfoLessFlag indicate the received allocatable resource is getting less 38 NumaInfoLessFlag NumaChgFlag = 0b10 39 // DefaultMaxNodeScore indicates the default max node score 40 DefaultMaxNodeScore = 100 41 ) 42 43 // PodResourceDecision is resource allocation determinated by scheduler, 44 // and passed to kubelet through pod annotation. 45 type PodResourceDecision struct { 46 // NUMAResources is resource list with numa info indexed by numa id. 47 NUMAResources map[int]v1.ResourceList `json:"numa,omitempty"` 48 } 49 50 // ResourceInfo is the allocatable information for the resource 51 type ResourceInfo struct { 52 Allocatable cpuset.CPUSet 53 Capacity int 54 AllocatablePerNuma map[int]float64 // key: NUMA ID 55 UsedPerNuma map[int]float64 // key: NUMA ID 56 } 57 58 // NumatopoInfo is the information about topology manager on the node 59 type NumatopoInfo struct { 60 Namespace string 61 Name string 62 Policies map[nodeinfov1alpha1.PolicyName]string 63 NumaResMap map[string]*ResourceInfo 64 CPUDetail topology.CPUDetails 65 ResReserved v1.ResourceList 66 } 67 68 // DeepCopy used to copy NumatopoInfo 69 func (info *NumatopoInfo) DeepCopy() *NumatopoInfo { 70 numaInfo := &NumatopoInfo{ 71 Namespace: info.Namespace, 72 Name: info.Name, 73 Policies: make(map[nodeinfov1alpha1.PolicyName]string), 74 NumaResMap: make(map[string]*ResourceInfo), 75 CPUDetail: topology.CPUDetails{}, 76 ResReserved: make(v1.ResourceList), 77 } 78 79 policies := info.Policies 80 for name, policy := range policies { 81 numaInfo.Policies[name] = policy 82 } 83 84 for resName, resInfo := range info.NumaResMap { 85 tmpInfo := &ResourceInfo{ 86 AllocatablePerNuma: make(map[int]float64), 87 UsedPerNuma: make(map[int]float64), 88 } 89 tmpInfo.Capacity = resInfo.Capacity 90 tmpInfo.Allocatable = resInfo.Allocatable.Clone() 91 92 for numaID, data := range resInfo.AllocatablePerNuma { 93 tmpInfo.AllocatablePerNuma[numaID] = data 94 } 95 96 for numaID, data := range resInfo.UsedPerNuma { 97 tmpInfo.UsedPerNuma[numaID] = data 98 } 99 100 numaInfo.NumaResMap[resName] = tmpInfo 101 } 102 103 cpuDetail := info.CPUDetail 104 for cpuID, detail := range cpuDetail { 105 numaInfo.CPUDetail[cpuID] = detail 106 } 107 108 resReserved := info.ResReserved 109 for resName, res := range resReserved { 110 numaInfo.ResReserved[resName] = res 111 } 112 113 return numaInfo 114 } 115 116 // Compare is the function to show the change of the resource on kubelet 117 // return val: 118 // - true : the resource on kubelet is getting more or no change 119 // - false : the resource on kubelet is getting less 120 func (info *NumatopoInfo) Compare(newInfo *NumatopoInfo) bool { 121 for resName := range info.NumaResMap { 122 oldSize := info.NumaResMap[resName].Allocatable.Size() 123 newSize := newInfo.NumaResMap[resName].Allocatable.Size() 124 if oldSize <= newSize { 125 return true 126 } 127 } 128 129 return false 130 } 131 132 // Allocate is the function to remove the allocated resource 133 func (info *NumatopoInfo) Allocate(resSets ResNumaSets) { 134 for resName := range resSets { 135 info.NumaResMap[resName].Allocatable = info.NumaResMap[resName].Allocatable.Difference(resSets[resName]) 136 } 137 } 138 139 // Release is the function to reclaim the allocated resource 140 func (info *NumatopoInfo) Release(resSets ResNumaSets) { 141 for resName := range resSets { 142 info.NumaResMap[resName].Allocatable = info.NumaResMap[resName].Allocatable.Union(resSets[resName]) 143 } 144 } 145 146 func GetPodResourceNumaInfo(ti *TaskInfo) map[int]v1.ResourceList { 147 if ti.NumaInfo != nil && len(ti.NumaInfo.ResMap) > 0 { 148 return ti.NumaInfo.ResMap 149 } 150 151 if _, ok := ti.Pod.Annotations[topologyDecisionAnnotation]; !ok { 152 return nil 153 } 154 155 decision := PodResourceDecision{} 156 err := json.Unmarshal([]byte(ti.Pod.Annotations[topologyDecisionAnnotation]), &decision) 157 if err != nil { 158 return nil 159 } 160 161 return decision.NUMAResources 162 } 163 164 // AddTask is the function to update the used resource of per numa node 165 func (info *NumatopoInfo) AddTask(ti *TaskInfo) { 166 numaInfo := GetPodResourceNumaInfo(ti) 167 if numaInfo == nil { 168 return 169 } 170 171 for numaID, resList := range numaInfo { 172 for resName, quantity := range resList { 173 info.NumaResMap[string(resName)].UsedPerNuma[numaID] += ResQuantity2Float64(resName, quantity) 174 } 175 } 176 } 177 178 // RemoveTask is the function to update the used resource of per numa node 179 func (info *NumatopoInfo) RemoveTask(ti *TaskInfo) { 180 decision := GetPodResourceNumaInfo(ti) 181 if decision == nil { 182 return 183 } 184 185 for numaID, resList := range ti.NumaInfo.ResMap { 186 for resName, quantity := range resList { 187 info.NumaResMap[string(resName)].UsedPerNuma[numaID] -= ResQuantity2Float64(resName, quantity) 188 } 189 } 190 } 191 192 // GenerateNodeResNumaSets return the idle resource sets of all node 193 func GenerateNodeResNumaSets(nodes map[string]*NodeInfo) map[string]ResNumaSets { 194 nodeSlice := make(map[string]ResNumaSets) 195 for _, node := range nodes { 196 if node.NumaSchedulerInfo == nil { 197 continue 198 } 199 200 resMaps := make(ResNumaSets) 201 for resName, resMap := range node.NumaSchedulerInfo.NumaResMap { 202 resMaps[resName] = resMap.Allocatable.Clone() 203 } 204 205 nodeSlice[node.Name] = resMaps 206 } 207 208 return nodeSlice 209 } 210 211 // GenerateNumaNodes return the numa IDs of all node 212 func GenerateNumaNodes(nodes map[string]*NodeInfo) map[string][]int { 213 nodeNumaMap := make(map[string][]int) 214 215 for _, node := range nodes { 216 if node.NumaSchedulerInfo == nil { 217 continue 218 } 219 220 nodeNumaMap[node.Name] = node.NumaSchedulerInfo.CPUDetail.NUMANodes().List() 221 } 222 223 return nodeNumaMap 224 } 225 226 // ResNumaSets is the set map of the resource 227 type ResNumaSets map[string]cpuset.CPUSet 228 229 // Allocate is to remove the allocated resource which is assigned to task 230 func (resSets ResNumaSets) Allocate(taskSets ResNumaSets) { 231 for resName := range taskSets { 232 if _, ok := resSets[resName]; !ok { 233 continue 234 } 235 resSets[resName] = resSets[resName].Difference(taskSets[resName]) 236 } 237 } 238 239 // Release is to reclaim the allocated resource which is assigned to task 240 func (resSets ResNumaSets) Release(taskSets ResNumaSets) { 241 for resName := range taskSets { 242 if _, ok := resSets[resName]; !ok { 243 continue 244 } 245 resSets[resName] = resSets[resName].Union(taskSets[resName]) 246 } 247 } 248 249 // Clone is the copy action 250 func (resSets ResNumaSets) Clone() ResNumaSets { 251 newSets := make(ResNumaSets) 252 for resName := range resSets { 253 newSets[resName] = resSets[resName].Clone() 254 } 255 256 return newSets 257 } 258 259 // ScoredNode is the wrapper for node during Scoring. 260 type ScoredNode struct { 261 NodeName string 262 Score int64 263 }