k8s.io/kubernetes@v1.29.3/test/e2e_node/numa_alignment.go (about) 1 /* 2 Copyright 2020 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package e2enode 18 19 import ( 20 "fmt" 21 "os" 22 "path/filepath" 23 "sort" 24 "strconv" 25 "strings" 26 27 v1 "k8s.io/api/core/v1" 28 "k8s.io/utils/cpuset" 29 30 "k8s.io/kubernetes/test/e2e/framework" 31 e2epod "k8s.io/kubernetes/test/e2e/framework/pod" 32 ) 33 34 type numaPodResources struct { 35 CPUToNUMANode map[int]int 36 PCIDevsToNUMANode map[string]int 37 } 38 39 func (R *numaPodResources) CheckAlignment() bool { 40 nodeNum := -1 // not set 41 for _, cpuNode := range R.CPUToNUMANode { 42 if nodeNum == -1 { 43 nodeNum = cpuNode 44 } else if nodeNum != cpuNode { 45 return false 46 } 47 } 48 for _, devNode := range R.PCIDevsToNUMANode { 49 if nodeNum != devNode { 50 return false 51 } 52 } 53 return true 54 } 55 56 func (R *numaPodResources) String() string { 57 var b strings.Builder 58 // To store the keys in slice in sorted order 59 var cpuKeys []int 60 for ck := range R.CPUToNUMANode { 61 cpuKeys = append(cpuKeys, ck) 62 } 63 sort.Ints(cpuKeys) 64 for _, k := range cpuKeys { 65 nodeNum := R.CPUToNUMANode[k] 66 b.WriteString(fmt.Sprintf("CPU cpu#%03d=%02d\n", k, nodeNum)) 67 } 68 var pciKeys []string 69 for pk := range R.PCIDevsToNUMANode { 70 pciKeys = append(pciKeys, pk) 71 } 72 sort.Strings(pciKeys) 73 for _, k := range pciKeys { 74 nodeNum := R.PCIDevsToNUMANode[k] 75 b.WriteString(fmt.Sprintf("PCI %s=%02d\n", k, nodeNum)) 76 } 77 return b.String() 78 } 79 80 func getCPUsPerNUMANode(nodeNum int) ([]int, error) { 81 nodeCPUList, err := os.ReadFile(fmt.Sprintf("/sys/devices/system/node/node%d/cpulist", nodeNum)) 82 if err != nil { 83 return nil, err 84 } 85 cpus, err := cpuset.Parse(strings.TrimSpace(string(nodeCPUList))) 86 if err != nil { 87 return nil, err 88 } 89 return cpus.List(), nil 90 } 91 92 func getCPUToNUMANodeMapFromEnv(f *framework.Framework, pod *v1.Pod, cnt *v1.Container, environ map[string]string, numaNodes int) (map[int]int, error) { 93 var cpuIDs []int 94 cpuListAllowedEnvVar := "CPULIST_ALLOWED" 95 96 for name, value := range environ { 97 if name == cpuListAllowedEnvVar { 98 cpus, err := cpuset.Parse(value) 99 if err != nil { 100 return nil, err 101 } 102 cpuIDs = cpus.List() 103 } 104 } 105 if len(cpuIDs) == 0 { 106 return nil, fmt.Errorf("variable %q not found in environ", cpuListAllowedEnvVar) 107 } 108 109 cpusPerNUMA := make(map[int][]int) 110 for numaNode := 0; numaNode < numaNodes; numaNode++ { 111 nodeCPUList := e2epod.ExecCommandInContainer(f, pod.Name, cnt.Name, 112 "/bin/cat", fmt.Sprintf("/sys/devices/system/node/node%d/cpulist", numaNode)) 113 114 cpus, err := cpuset.Parse(nodeCPUList) 115 if err != nil { 116 return nil, err 117 } 118 cpusPerNUMA[numaNode] = cpus.List() 119 } 120 121 // CPU IDs -> NUMA Node ID 122 CPUToNUMANode := make(map[int]int) 123 for nodeNum, cpus := range cpusPerNUMA { 124 for _, cpu := range cpus { 125 CPUToNUMANode[cpu] = nodeNum 126 } 127 } 128 129 // filter out only the allowed CPUs 130 CPUMap := make(map[int]int) 131 for _, cpuID := range cpuIDs { 132 _, ok := CPUToNUMANode[cpuID] 133 if !ok { 134 return nil, fmt.Errorf("CPU %d not found on NUMA map: %v", cpuID, CPUToNUMANode) 135 } 136 CPUMap[cpuID] = CPUToNUMANode[cpuID] 137 } 138 return CPUMap, nil 139 } 140 141 func getPCIDeviceToNumaNodeMapFromEnv(f *framework.Framework, pod *v1.Pod, cnt *v1.Container, environ map[string]string) (map[string]int, error) { 142 pciDevPrefix := "PCIDEVICE_" 143 // at this point we don't care which plugin selected the device, 144 // we only need to know which devices were assigned to the POD. 145 // Hence, do prefix search for the variable and fetch the device(s). 146 147 NUMAPerDev := make(map[string]int) 148 for name, value := range environ { 149 if !strings.HasPrefix(name, pciDevPrefix) { 150 continue 151 } 152 153 // a single plugin can allocate more than a single device 154 pciDevs := strings.Split(value, ",") 155 for _, pciDev := range pciDevs { 156 pciDevNUMANode := e2epod.ExecCommandInContainer(f, pod.Name, cnt.Name, 157 "/bin/cat", fmt.Sprintf("/sys/bus/pci/devices/%s/numa_node", pciDev)) 158 NUMAPerDev[pciDev] = numaNodeFromSysFsEntry(pciDevNUMANode) 159 } 160 } 161 return NUMAPerDev, nil 162 } 163 164 func makeEnvMap(logs string) (map[string]string, error) { 165 podEnv := strings.Split(logs, "\n") 166 envMap := make(map[string]string) 167 for _, envVar := range podEnv { 168 if len(envVar) == 0 { 169 continue 170 } 171 pair := strings.SplitN(envVar, "=", 2) 172 if len(pair) != 2 { 173 return nil, fmt.Errorf("unable to split %q", envVar) 174 } 175 envMap[pair[0]] = pair[1] 176 } 177 return envMap, nil 178 } 179 180 type testEnvInfo struct { 181 numaNodes int 182 sriovResourceName string 183 policy string 184 scope string 185 } 186 187 func containerWantsDevices(cnt *v1.Container, envInfo *testEnvInfo) bool { 188 _, found := cnt.Resources.Requests[v1.ResourceName(envInfo.sriovResourceName)] 189 return found 190 } 191 192 func checkNUMAAlignment(f *framework.Framework, pod *v1.Pod, cnt *v1.Container, logs string, envInfo *testEnvInfo) (*numaPodResources, error) { 193 var err error 194 podEnv, err := makeEnvMap(logs) 195 if err != nil { 196 return nil, err 197 } 198 199 CPUToNUMANode, err := getCPUToNUMANodeMapFromEnv(f, pod, cnt, podEnv, envInfo.numaNodes) 200 if err != nil { 201 return nil, err 202 } 203 204 PCIDevsToNUMANode, err := getPCIDeviceToNumaNodeMapFromEnv(f, pod, cnt, podEnv) 205 if err != nil { 206 return nil, err 207 } 208 209 if containerWantsDevices(cnt, envInfo) && len(PCIDevsToNUMANode) == 0 { 210 return nil, fmt.Errorf("no PCI devices found in environ") 211 } 212 numaRes := numaPodResources{ 213 CPUToNUMANode: CPUToNUMANode, 214 PCIDevsToNUMANode: PCIDevsToNUMANode, 215 } 216 aligned := numaRes.CheckAlignment() 217 if !aligned { 218 err = fmt.Errorf("NUMA resources not aligned") 219 } 220 return &numaRes, err 221 } 222 223 type pciDeviceInfo struct { 224 Address string 225 NUMANode int 226 IsPhysFn bool 227 IsVFn bool 228 } 229 230 func getPCIDeviceInfo(sysPCIDir string) ([]pciDeviceInfo, error) { 231 var pciDevs []pciDeviceInfo 232 233 entries, err := os.ReadDir(sysPCIDir) 234 if err != nil { 235 return nil, err 236 } 237 238 for _, entry := range entries { 239 isPhysFn := false 240 isVFn := false 241 if _, err := os.Stat(filepath.Join(sysPCIDir, entry.Name(), "sriov_numvfs")); err == nil { 242 isPhysFn = true 243 } else if !os.IsNotExist(err) { 244 // unexpected error. Bail out 245 return nil, err 246 } 247 if _, err := os.Stat(filepath.Join(sysPCIDir, entry.Name(), "physfn")); err == nil { 248 isVFn = true 249 } else if !os.IsNotExist(err) { 250 // unexpected error. Bail out 251 return nil, err 252 } 253 254 content, err := os.ReadFile(filepath.Join(sysPCIDir, entry.Name(), "numa_node")) 255 if err != nil { 256 return nil, err 257 } 258 259 pciDevs = append(pciDevs, pciDeviceInfo{ 260 Address: entry.Name(), 261 NUMANode: numaNodeFromSysFsEntry(string(content)), 262 IsPhysFn: isPhysFn, 263 IsVFn: isVFn, 264 }) 265 } 266 267 return pciDevs, nil 268 } 269 270 func numaNodeFromSysFsEntry(content string) int { 271 nodeNum, err := strconv.Atoi(strings.TrimSpace(content)) 272 framework.ExpectNoError(err, "error detecting the device numa_node from sysfs: %v", err) 273 return nodeNum 274 }