k8s.io/kubernetes@v1.29.3/test/e2e_node/numa_alignment.go (about)

     1  /*
     2  Copyright 2020 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package e2enode
    18  
    19  import (
    20  	"fmt"
    21  	"os"
    22  	"path/filepath"
    23  	"sort"
    24  	"strconv"
    25  	"strings"
    26  
    27  	v1 "k8s.io/api/core/v1"
    28  	"k8s.io/utils/cpuset"
    29  
    30  	"k8s.io/kubernetes/test/e2e/framework"
    31  	e2epod "k8s.io/kubernetes/test/e2e/framework/pod"
    32  )
    33  
    34  type numaPodResources struct {
    35  	CPUToNUMANode     map[int]int
    36  	PCIDevsToNUMANode map[string]int
    37  }
    38  
    39  func (R *numaPodResources) CheckAlignment() bool {
    40  	nodeNum := -1 // not set
    41  	for _, cpuNode := range R.CPUToNUMANode {
    42  		if nodeNum == -1 {
    43  			nodeNum = cpuNode
    44  		} else if nodeNum != cpuNode {
    45  			return false
    46  		}
    47  	}
    48  	for _, devNode := range R.PCIDevsToNUMANode {
    49  		if nodeNum != devNode {
    50  			return false
    51  		}
    52  	}
    53  	return true
    54  }
    55  
    56  func (R *numaPodResources) String() string {
    57  	var b strings.Builder
    58  	// To store the keys in slice in sorted order
    59  	var cpuKeys []int
    60  	for ck := range R.CPUToNUMANode {
    61  		cpuKeys = append(cpuKeys, ck)
    62  	}
    63  	sort.Ints(cpuKeys)
    64  	for _, k := range cpuKeys {
    65  		nodeNum := R.CPUToNUMANode[k]
    66  		b.WriteString(fmt.Sprintf("CPU cpu#%03d=%02d\n", k, nodeNum))
    67  	}
    68  	var pciKeys []string
    69  	for pk := range R.PCIDevsToNUMANode {
    70  		pciKeys = append(pciKeys, pk)
    71  	}
    72  	sort.Strings(pciKeys)
    73  	for _, k := range pciKeys {
    74  		nodeNum := R.PCIDevsToNUMANode[k]
    75  		b.WriteString(fmt.Sprintf("PCI %s=%02d\n", k, nodeNum))
    76  	}
    77  	return b.String()
    78  }
    79  
    80  func getCPUsPerNUMANode(nodeNum int) ([]int, error) {
    81  	nodeCPUList, err := os.ReadFile(fmt.Sprintf("/sys/devices/system/node/node%d/cpulist", nodeNum))
    82  	if err != nil {
    83  		return nil, err
    84  	}
    85  	cpus, err := cpuset.Parse(strings.TrimSpace(string(nodeCPUList)))
    86  	if err != nil {
    87  		return nil, err
    88  	}
    89  	return cpus.List(), nil
    90  }
    91  
    92  func getCPUToNUMANodeMapFromEnv(f *framework.Framework, pod *v1.Pod, cnt *v1.Container, environ map[string]string, numaNodes int) (map[int]int, error) {
    93  	var cpuIDs []int
    94  	cpuListAllowedEnvVar := "CPULIST_ALLOWED"
    95  
    96  	for name, value := range environ {
    97  		if name == cpuListAllowedEnvVar {
    98  			cpus, err := cpuset.Parse(value)
    99  			if err != nil {
   100  				return nil, err
   101  			}
   102  			cpuIDs = cpus.List()
   103  		}
   104  	}
   105  	if len(cpuIDs) == 0 {
   106  		return nil, fmt.Errorf("variable %q not found in environ", cpuListAllowedEnvVar)
   107  	}
   108  
   109  	cpusPerNUMA := make(map[int][]int)
   110  	for numaNode := 0; numaNode < numaNodes; numaNode++ {
   111  		nodeCPUList := e2epod.ExecCommandInContainer(f, pod.Name, cnt.Name,
   112  			"/bin/cat", fmt.Sprintf("/sys/devices/system/node/node%d/cpulist", numaNode))
   113  
   114  		cpus, err := cpuset.Parse(nodeCPUList)
   115  		if err != nil {
   116  			return nil, err
   117  		}
   118  		cpusPerNUMA[numaNode] = cpus.List()
   119  	}
   120  
   121  	// CPU IDs -> NUMA Node ID
   122  	CPUToNUMANode := make(map[int]int)
   123  	for nodeNum, cpus := range cpusPerNUMA {
   124  		for _, cpu := range cpus {
   125  			CPUToNUMANode[cpu] = nodeNum
   126  		}
   127  	}
   128  
   129  	// filter out only the allowed CPUs
   130  	CPUMap := make(map[int]int)
   131  	for _, cpuID := range cpuIDs {
   132  		_, ok := CPUToNUMANode[cpuID]
   133  		if !ok {
   134  			return nil, fmt.Errorf("CPU %d not found on NUMA map: %v", cpuID, CPUToNUMANode)
   135  		}
   136  		CPUMap[cpuID] = CPUToNUMANode[cpuID]
   137  	}
   138  	return CPUMap, nil
   139  }
   140  
   141  func getPCIDeviceToNumaNodeMapFromEnv(f *framework.Framework, pod *v1.Pod, cnt *v1.Container, environ map[string]string) (map[string]int, error) {
   142  	pciDevPrefix := "PCIDEVICE_"
   143  	// at this point we don't care which plugin selected the device,
   144  	// we only need to know which devices were assigned to the POD.
   145  	// Hence, do prefix search for the variable and fetch the device(s).
   146  
   147  	NUMAPerDev := make(map[string]int)
   148  	for name, value := range environ {
   149  		if !strings.HasPrefix(name, pciDevPrefix) {
   150  			continue
   151  		}
   152  
   153  		// a single plugin can allocate more than a single device
   154  		pciDevs := strings.Split(value, ",")
   155  		for _, pciDev := range pciDevs {
   156  			pciDevNUMANode := e2epod.ExecCommandInContainer(f, pod.Name, cnt.Name,
   157  				"/bin/cat", fmt.Sprintf("/sys/bus/pci/devices/%s/numa_node", pciDev))
   158  			NUMAPerDev[pciDev] = numaNodeFromSysFsEntry(pciDevNUMANode)
   159  		}
   160  	}
   161  	return NUMAPerDev, nil
   162  }
   163  
   164  func makeEnvMap(logs string) (map[string]string, error) {
   165  	podEnv := strings.Split(logs, "\n")
   166  	envMap := make(map[string]string)
   167  	for _, envVar := range podEnv {
   168  		if len(envVar) == 0 {
   169  			continue
   170  		}
   171  		pair := strings.SplitN(envVar, "=", 2)
   172  		if len(pair) != 2 {
   173  			return nil, fmt.Errorf("unable to split %q", envVar)
   174  		}
   175  		envMap[pair[0]] = pair[1]
   176  	}
   177  	return envMap, nil
   178  }
   179  
   180  type testEnvInfo struct {
   181  	numaNodes         int
   182  	sriovResourceName string
   183  	policy            string
   184  	scope             string
   185  }
   186  
   187  func containerWantsDevices(cnt *v1.Container, envInfo *testEnvInfo) bool {
   188  	_, found := cnt.Resources.Requests[v1.ResourceName(envInfo.sriovResourceName)]
   189  	return found
   190  }
   191  
   192  func checkNUMAAlignment(f *framework.Framework, pod *v1.Pod, cnt *v1.Container, logs string, envInfo *testEnvInfo) (*numaPodResources, error) {
   193  	var err error
   194  	podEnv, err := makeEnvMap(logs)
   195  	if err != nil {
   196  		return nil, err
   197  	}
   198  
   199  	CPUToNUMANode, err := getCPUToNUMANodeMapFromEnv(f, pod, cnt, podEnv, envInfo.numaNodes)
   200  	if err != nil {
   201  		return nil, err
   202  	}
   203  
   204  	PCIDevsToNUMANode, err := getPCIDeviceToNumaNodeMapFromEnv(f, pod, cnt, podEnv)
   205  	if err != nil {
   206  		return nil, err
   207  	}
   208  
   209  	if containerWantsDevices(cnt, envInfo) && len(PCIDevsToNUMANode) == 0 {
   210  		return nil, fmt.Errorf("no PCI devices found in environ")
   211  	}
   212  	numaRes := numaPodResources{
   213  		CPUToNUMANode:     CPUToNUMANode,
   214  		PCIDevsToNUMANode: PCIDevsToNUMANode,
   215  	}
   216  	aligned := numaRes.CheckAlignment()
   217  	if !aligned {
   218  		err = fmt.Errorf("NUMA resources not aligned")
   219  	}
   220  	return &numaRes, err
   221  }
   222  
   223  type pciDeviceInfo struct {
   224  	Address  string
   225  	NUMANode int
   226  	IsPhysFn bool
   227  	IsVFn    bool
   228  }
   229  
   230  func getPCIDeviceInfo(sysPCIDir string) ([]pciDeviceInfo, error) {
   231  	var pciDevs []pciDeviceInfo
   232  
   233  	entries, err := os.ReadDir(sysPCIDir)
   234  	if err != nil {
   235  		return nil, err
   236  	}
   237  
   238  	for _, entry := range entries {
   239  		isPhysFn := false
   240  		isVFn := false
   241  		if _, err := os.Stat(filepath.Join(sysPCIDir, entry.Name(), "sriov_numvfs")); err == nil {
   242  			isPhysFn = true
   243  		} else if !os.IsNotExist(err) {
   244  			// unexpected error. Bail out
   245  			return nil, err
   246  		}
   247  		if _, err := os.Stat(filepath.Join(sysPCIDir, entry.Name(), "physfn")); err == nil {
   248  			isVFn = true
   249  		} else if !os.IsNotExist(err) {
   250  			// unexpected error. Bail out
   251  			return nil, err
   252  		}
   253  
   254  		content, err := os.ReadFile(filepath.Join(sysPCIDir, entry.Name(), "numa_node"))
   255  		if err != nil {
   256  			return nil, err
   257  		}
   258  
   259  		pciDevs = append(pciDevs, pciDeviceInfo{
   260  			Address:  entry.Name(),
   261  			NUMANode: numaNodeFromSysFsEntry(string(content)),
   262  			IsPhysFn: isPhysFn,
   263  			IsVFn:    isVFn,
   264  		})
   265  	}
   266  
   267  	return pciDevs, nil
   268  }
   269  
   270  func numaNodeFromSysFsEntry(content string) int {
   271  	nodeNum, err := strconv.Atoi(strings.TrimSpace(content))
   272  	framework.ExpectNoError(err, "error detecting the device numa_node from sysfs: %v", err)
   273  	return nodeNum
   274  }