github.com/google/cadvisor@v0.49.1/utils/sysfs/sysfs.go (about)

     1  // Copyright 2014 Google Inc. All Rights Reserved.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package sysfs
    16  
    17  import (
    18  	"bytes"
    19  	"fmt"
    20  	"os"
    21  	"path"
    22  	"path/filepath"
    23  	"regexp"
    24  	"strconv"
    25  	"strings"
    26  
    27  	"k8s.io/klog/v2"
    28  )
    29  
    30  const (
    31  	blockDir     = "/sys/block"
    32  	cacheDir     = "/sys/devices/system/cpu/cpu"
    33  	netDir       = "/sys/class/net"
    34  	dmiDir       = "/sys/class/dmi"
    35  	ppcDevTree   = "/proc/device-tree"
    36  	s390xDevTree = "/etc" // s390/s390x changes
    37  
    38  	meminfoFile = "meminfo"
    39  
    40  	distanceFile = "distance"
    41  
    42  	sysFsCPUTopology = "topology"
    43  
    44  	// CPUPhysicalPackageID is a physical package id of cpu#. Typically corresponds to a physical socket number,
    45  	// but the actual value is architecture and platform dependent.
    46  	CPUPhysicalPackageID = "physical_package_id"
    47  	// CPUCoreID is the CPU core ID of cpu#. Typically it is the hardware platform's identifier
    48  	// (rather than the kernel's). The actual value is architecture and platform dependent.
    49  	CPUCoreID = "core_id"
    50  
    51  	coreIDFilePath    = "/" + sysFsCPUTopology + "/core_id"
    52  	packageIDFilePath = "/" + sysFsCPUTopology + "/physical_package_id"
    53  
    54  	// memory size calculations
    55  
    56  	cpuDirPattern  = "cpu*[0-9]"
    57  	nodeDirPattern = "node*[0-9]"
    58  
    59  	//HugePagesNrFile name of nr_hugepages file in sysfs
    60  	HugePagesNrFile = "nr_hugepages"
    61  )
    62  
    63  var (
    64  	nodeDir = "/sys/devices/system/node/"
    65  )
    66  
    67  type CacheInfo struct {
    68  	// cache id
    69  	Id int
    70  	// size in bytes
    71  	Size uint64
    72  	// cache type - instruction, data, unified
    73  	Type string
    74  	// distance from cpus in a multi-level hierarchy
    75  	Level int
    76  	// number of cpus that can access this cache.
    77  	Cpus int
    78  }
    79  
    80  // Abstracts the lowest level calls to sysfs.
    81  type SysFs interface {
    82  	// Get NUMA nodes paths
    83  	GetNodesPaths() ([]string, error)
    84  	// Get paths to CPUs in provided directory e.g. /sys/devices/system/node/node0 or /sys/devices/system/cpu
    85  	GetCPUsPaths(cpusPath string) ([]string, error)
    86  	// Get physical core id for specified CPU
    87  	GetCoreID(coreIDFilePath string) (string, error)
    88  	// Get physical package id for specified CPU
    89  	GetCPUPhysicalPackageID(cpuPath string) (string, error)
    90  	// Get total memory for specified NUMA node
    91  	GetMemInfo(nodeDir string) (string, error)
    92  	// Get hugepages from specified directory
    93  	GetHugePagesInfo(hugePagesDirectory string) ([]os.FileInfo, error)
    94  	// Get hugepage_nr from specified directory
    95  	GetHugePagesNr(hugePagesDirectory string, hugePageName string) (string, error)
    96  	// Get directory information for available block devices.
    97  	GetBlockDevices() ([]os.FileInfo, error)
    98  	// Get Size of a given block device.
    99  	GetBlockDeviceSize(string) (string, error)
   100  	// Get scheduler type for the block device.
   101  	GetBlockDeviceScheduler(string) (string, error)
   102  	// Get device major:minor number string.
   103  	GetBlockDeviceNumbers(string) (string, error)
   104  	// Is the device "hidden" (meaning will not have a device handle)
   105  	// This is the case with native nvme multipathing.
   106  	IsBlockDeviceHidden(string) (bool, error)
   107  
   108  	GetNetworkDevices() ([]os.FileInfo, error)
   109  	GetNetworkAddress(string) (string, error)
   110  	GetNetworkMtu(string) (string, error)
   111  	GetNetworkSpeed(string) (string, error)
   112  	GetNetworkStatValue(dev string, stat string) (uint64, error)
   113  
   114  	// Get directory information for available caches accessible to given cpu.
   115  	GetCaches(id int) ([]os.FileInfo, error)
   116  	// Get information for a cache accessible from the given cpu.
   117  	GetCacheInfo(cpu int, cache string) (CacheInfo, error)
   118  
   119  	GetSystemUUID() (string, error)
   120  
   121  	// GetDistances returns distance array
   122  	GetDistances(string) (string, error)
   123  
   124  	// IsCPUOnline determines if CPU status from kernel hotplug machanism standpoint.
   125  	// See: https://www.kernel.org/doc/html/latest/core-api/cpu_hotplug.html
   126  	IsCPUOnline(dir string) bool
   127  }
   128  
   129  type realSysFs struct {
   130  	cpuPath string
   131  }
   132  
   133  func NewRealSysFs() SysFs {
   134  	return &realSysFs{
   135  		cpuPath: "/sys/devices/system/cpu",
   136  	}
   137  }
   138  
   139  func (fs *realSysFs) GetNodesPaths() ([]string, error) {
   140  	pathPattern := fmt.Sprintf("%s%s", nodeDir, nodeDirPattern)
   141  	return filepath.Glob(pathPattern)
   142  }
   143  
   144  func (fs *realSysFs) GetCPUsPaths(cpusPath string) ([]string, error) {
   145  	pathPattern := fmt.Sprintf("%s/%s", cpusPath, cpuDirPattern)
   146  	return filepath.Glob(pathPattern)
   147  }
   148  
   149  func (fs *realSysFs) GetCoreID(cpuPath string) (string, error) {
   150  	coreIDFilePath := fmt.Sprintf("%s%s", cpuPath, coreIDFilePath)
   151  	coreID, err := os.ReadFile(coreIDFilePath)
   152  	if err != nil {
   153  		return "", err
   154  	}
   155  	return strings.TrimSpace(string(coreID)), err
   156  }
   157  
   158  func (fs *realSysFs) GetCPUPhysicalPackageID(cpuPath string) (string, error) {
   159  	packageIDFilePath := fmt.Sprintf("%s%s", cpuPath, packageIDFilePath)
   160  	packageID, err := os.ReadFile(packageIDFilePath)
   161  	if err != nil {
   162  		return "", err
   163  	}
   164  	return strings.TrimSpace(string(packageID)), err
   165  }
   166  
   167  func (fs *realSysFs) GetMemInfo(nodePath string) (string, error) {
   168  	meminfoPath := fmt.Sprintf("%s/%s", nodePath, meminfoFile)
   169  	meminfo, err := os.ReadFile(meminfoPath)
   170  	if err != nil {
   171  		return "", err
   172  	}
   173  	return strings.TrimSpace(string(meminfo)), err
   174  }
   175  
   176  func (fs *realSysFs) GetDistances(nodePath string) (string, error) {
   177  	distancePath := fmt.Sprintf("%s/%s", nodePath, distanceFile)
   178  	distance, err := os.ReadFile(distancePath)
   179  	if err != nil {
   180  		return "", err
   181  	}
   182  	return strings.TrimSpace(string(distance)), err
   183  }
   184  
   185  func (fs *realSysFs) GetHugePagesInfo(hugePagesDirectory string) ([]os.FileInfo, error) {
   186  	dirs, err := os.ReadDir(hugePagesDirectory)
   187  	if err != nil {
   188  		return nil, err
   189  	}
   190  	return toFileInfo(dirs)
   191  }
   192  
   193  func (fs *realSysFs) GetHugePagesNr(hugepagesDirectory string, hugePageName string) (string, error) {
   194  	hugePageFilePath := fmt.Sprintf("%s%s/%s", hugepagesDirectory, hugePageName, HugePagesNrFile)
   195  	hugePageFile, err := os.ReadFile(hugePageFilePath)
   196  	if err != nil {
   197  		return "", err
   198  	}
   199  	return strings.TrimSpace(string(hugePageFile)), err
   200  }
   201  
   202  func (fs *realSysFs) GetBlockDevices() ([]os.FileInfo, error) {
   203  	dirs, err := os.ReadDir(blockDir)
   204  	if err != nil {
   205  		return nil, err
   206  	}
   207  	return toFileInfo(dirs)
   208  }
   209  
   210  func (fs *realSysFs) GetBlockDeviceNumbers(name string) (string, error) {
   211  	dev, err := os.ReadFile(path.Join(blockDir, name, "/dev"))
   212  	if err != nil {
   213  		return "", err
   214  	}
   215  	return string(dev), nil
   216  }
   217  
   218  func (fs *realSysFs) IsBlockDeviceHidden(name string) (bool, error) {
   219  	// See: https://www.kernel.org/doc/Documentation/ABI/stable/sysfs-block
   220  	//      https://git.kernel.org/pub/scm/utils/util-linux/util-linux.git
   221  	//        - c8487d854ba5 ("lsblk: Ignore hidden devices")
   222  	devHiddenPath := path.Join(blockDir, name, "/hidden")
   223  	hidden, err := os.ReadFile(devHiddenPath)
   224  	if err != nil && os.IsNotExist(err) {
   225  		// older OS may not have /hidden sysfs entry, so for sure
   226  		// it is not a hidden device...
   227  		return false, nil
   228  	}
   229  	if err != nil {
   230  		return false, fmt.Errorf("failed to read %s: %w", devHiddenPath, err)
   231  	}
   232  	if string(hidden) == "1" {
   233  		return true, nil
   234  	}
   235  	return false, nil
   236  }
   237  
   238  func (fs *realSysFs) GetBlockDeviceScheduler(name string) (string, error) {
   239  	sched, err := os.ReadFile(path.Join(blockDir, name, "/queue/scheduler"))
   240  	if err != nil {
   241  		return "", err
   242  	}
   243  	return string(sched), nil
   244  }
   245  
   246  func (fs *realSysFs) GetBlockDeviceSize(name string) (string, error) {
   247  	size, err := os.ReadFile(path.Join(blockDir, name, "/size"))
   248  	if err != nil {
   249  		return "", err
   250  	}
   251  	return string(size), nil
   252  }
   253  
   254  func (fs *realSysFs) GetNetworkDevices() ([]os.FileInfo, error) {
   255  	dirs, err := os.ReadDir(netDir)
   256  	if err != nil {
   257  		return nil, err
   258  	}
   259  	files, err := toFileInfo(dirs)
   260  	if err != nil {
   261  		return nil, err
   262  	}
   263  
   264  	// Filter out non-directory & non-symlink files
   265  	filtered := []os.FileInfo{}
   266  	for _, f := range files {
   267  		if f.Mode()|os.ModeSymlink != 0 {
   268  			f, err = os.Stat(path.Join(netDir, f.Name()))
   269  			if err != nil {
   270  				continue
   271  			}
   272  		}
   273  		if f.IsDir() {
   274  			filtered = append(filtered, f)
   275  		}
   276  	}
   277  	return filtered, nil
   278  }
   279  
   280  func (fs *realSysFs) GetNetworkAddress(name string) (string, error) {
   281  	address, err := os.ReadFile(path.Join(netDir, name, "/address"))
   282  	if err != nil {
   283  		return "", err
   284  	}
   285  	return string(address), nil
   286  }
   287  
   288  func (fs *realSysFs) GetNetworkMtu(name string) (string, error) {
   289  	mtu, err := os.ReadFile(path.Join(netDir, name, "/mtu"))
   290  	if err != nil {
   291  		return "", err
   292  	}
   293  	return string(mtu), nil
   294  }
   295  
   296  func (fs *realSysFs) GetNetworkSpeed(name string) (string, error) {
   297  	speed, err := os.ReadFile(path.Join(netDir, name, "/speed"))
   298  	if err != nil {
   299  		return "", err
   300  	}
   301  	return string(speed), nil
   302  }
   303  
   304  func (fs *realSysFs) GetNetworkStatValue(dev string, stat string) (uint64, error) {
   305  	statPath := path.Join(netDir, dev, "/statistics", stat)
   306  	out, err := os.ReadFile(statPath)
   307  	if err != nil {
   308  		return 0, fmt.Errorf("failed to read stat from %q for device %q", statPath, dev)
   309  	}
   310  	var s uint64
   311  	n, err := fmt.Sscanf(string(out), "%d", &s)
   312  	if err != nil || n != 1 {
   313  		return 0, fmt.Errorf("could not parse value from %q for file %s", string(out), statPath)
   314  	}
   315  	return s, nil
   316  }
   317  
   318  func (fs *realSysFs) GetCaches(id int) ([]os.FileInfo, error) {
   319  	cpuPath := fmt.Sprintf("%s%d/cache", cacheDir, id)
   320  	dir, err := os.ReadDir(cpuPath)
   321  	if err != nil {
   322  		return nil, err
   323  	}
   324  	return toFileInfo(dir)
   325  }
   326  
   327  func toFileInfo(dirs []os.DirEntry) ([]os.FileInfo, error) {
   328  	info := []os.FileInfo{}
   329  	for _, dir := range dirs {
   330  		fI, err := dir.Info()
   331  		if err != nil {
   332  			return nil, err
   333  		}
   334  		info = append(info, fI)
   335  	}
   336  	return info, nil
   337  }
   338  
   339  func bitCount(i uint64) (count int) {
   340  	for i != 0 {
   341  		if i&1 == 1 {
   342  			count++
   343  		}
   344  		i >>= 1
   345  	}
   346  	return
   347  }
   348  
   349  func getCPUCount(cache string) (count int, err error) {
   350  	out, err := os.ReadFile(path.Join(cache, "/shared_cpu_map"))
   351  	if err != nil {
   352  		return 0, err
   353  	}
   354  	masks := strings.Split(string(out), ",")
   355  	for _, mask := range masks {
   356  		// convert hex string to uint64
   357  		m, err := strconv.ParseUint(strings.TrimSpace(mask), 16, 64)
   358  		if err != nil {
   359  			return 0, fmt.Errorf("failed to parse cpu map %q: %v", string(out), err)
   360  		}
   361  		count += bitCount(m)
   362  	}
   363  	return
   364  }
   365  
   366  func (fs *realSysFs) GetCacheInfo(cpu int, name string) (CacheInfo, error) {
   367  	cachePath := fmt.Sprintf("%s%d/cache/%s", cacheDir, cpu, name)
   368  	out, err := os.ReadFile(path.Join(cachePath, "/id"))
   369  	if err != nil {
   370  		return CacheInfo{}, err
   371  	}
   372  	var id int
   373  	n, err := fmt.Sscanf(string(out), "%d", &id)
   374  	if err != nil || n != 1 {
   375  		return CacheInfo{}, err
   376  	}
   377  
   378  	out, err = os.ReadFile(path.Join(cachePath, "/size"))
   379  	if err != nil {
   380  		return CacheInfo{}, err
   381  	}
   382  	var size uint64
   383  	n, err = fmt.Sscanf(string(out), "%dK", &size)
   384  	if err != nil || n != 1 {
   385  		return CacheInfo{}, err
   386  	}
   387  	// convert to bytes
   388  	size = size * 1024
   389  	out, err = os.ReadFile(path.Join(cachePath, "/level"))
   390  	if err != nil {
   391  		return CacheInfo{}, err
   392  	}
   393  	var level int
   394  	n, err = fmt.Sscanf(string(out), "%d", &level)
   395  	if err != nil || n != 1 {
   396  		return CacheInfo{}, err
   397  	}
   398  
   399  	out, err = os.ReadFile(path.Join(cachePath, "/type"))
   400  	if err != nil {
   401  		return CacheInfo{}, err
   402  	}
   403  	cacheType := strings.TrimSpace(string(out))
   404  	cpuCount, err := getCPUCount(cachePath)
   405  	if err != nil {
   406  		return CacheInfo{}, err
   407  	}
   408  	return CacheInfo{
   409  		Id:    id,
   410  		Size:  size,
   411  		Level: level,
   412  		Type:  cacheType,
   413  		Cpus:  cpuCount,
   414  	}, nil
   415  }
   416  
   417  func (fs *realSysFs) GetSystemUUID() (string, error) {
   418  	if id, err := os.ReadFile(path.Join(dmiDir, "id", "product_uuid")); err == nil {
   419  		return strings.TrimSpace(string(id)), nil
   420  	} else if id, err = os.ReadFile(path.Join(ppcDevTree, "system-id")); err == nil {
   421  		return strings.TrimSpace(strings.TrimRight(string(id), "\000")), nil
   422  	} else if id, err = os.ReadFile(path.Join(ppcDevTree, "vm,uuid")); err == nil {
   423  		return strings.TrimSpace(strings.TrimRight(string(id), "\000")), nil
   424  	} else if id, err = os.ReadFile(path.Join(s390xDevTree, "machine-id")); err == nil {
   425  		return strings.TrimSpace(string(id)), nil
   426  	} else {
   427  		return "", err
   428  	}
   429  }
   430  
   431  func (fs *realSysFs) IsCPUOnline(cpuPath string) bool {
   432  	cpuOnlinePath, err := filepath.Abs(fs.cpuPath + "/online")
   433  	if err != nil {
   434  		klog.V(1).Infof("Unable to get absolute path for %s", cpuPath)
   435  		return false
   436  	}
   437  
   438  	// Quick check to determine if file exists: if it does not then kernel CPU hotplug is disabled and all CPUs are online.
   439  	_, err = os.Stat(cpuOnlinePath)
   440  	if err != nil && os.IsNotExist(err) {
   441  		return true
   442  	}
   443  	if err != nil {
   444  		klog.V(1).Infof("Unable to stat %s: %s", cpuOnlinePath, err)
   445  	}
   446  
   447  	cpuID, err := getCPUID(cpuPath)
   448  	if err != nil {
   449  		klog.V(1).Infof("Unable to get CPU ID from path %s: %s", cpuPath, err)
   450  		return false
   451  	}
   452  
   453  	isOnline, err := isCPUOnline(cpuOnlinePath, cpuID)
   454  	if err != nil {
   455  		klog.V(1).Infof("Unable to get online CPUs list: %s", err)
   456  		return false
   457  	}
   458  	return isOnline
   459  }
   460  
   461  func getCPUID(dir string) (uint16, error) {
   462  	regex := regexp.MustCompile("cpu([0-9]+)")
   463  	matches := regex.FindStringSubmatch(dir)
   464  	if len(matches) == 2 {
   465  		id, err := strconv.Atoi(matches[1])
   466  		if err != nil {
   467  			return 0, err
   468  		}
   469  		return uint16(id), nil
   470  	}
   471  	return 0, fmt.Errorf("can't get CPU ID from %s", dir)
   472  }
   473  
   474  // isCPUOnline is copied from github.com/opencontainers/runc/libcontainer/cgroups/fs and modified to suite cAdvisor
   475  // needs as Apache 2.0 license allows.
   476  // It parses CPU list (such as: 0,3-5,10) into a struct that allows to determine quickly if CPU or particular ID is online.
   477  // see: https://github.com/opencontainers/runc/blob/ab27e12cebf148aa5d1ee3ad13d9fc7ae12bf0b6/libcontainer/cgroups/fs/cpuset.go#L45
   478  func isCPUOnline(path string, cpuID uint16) (bool, error) {
   479  	fileContent, err := os.ReadFile(path)
   480  	if err != nil {
   481  		return false, err
   482  	}
   483  	if len(fileContent) == 0 {
   484  		return false, fmt.Errorf("%s found to be empty", path)
   485  	}
   486  
   487  	cpuList := strings.TrimSpace(string(fileContent))
   488  	for _, s := range strings.Split(cpuList, ",") {
   489  		splitted := strings.SplitN(s, "-", 3)
   490  		switch len(splitted) {
   491  		case 3:
   492  			return false, fmt.Errorf("invalid values in %s", path)
   493  		case 2:
   494  			min, err := strconv.ParseUint(splitted[0], 10, 16)
   495  			if err != nil {
   496  				return false, err
   497  			}
   498  			max, err := strconv.ParseUint(splitted[1], 10, 16)
   499  			if err != nil {
   500  				return false, err
   501  			}
   502  			if min > max {
   503  				return false, fmt.Errorf("invalid values in %s", path)
   504  			}
   505  			// Return true, if the CPU under consideration is in the range of online CPUs.
   506  			if cpuID >= uint16(min) && cpuID <= uint16(max) {
   507  				return true, nil
   508  			}
   509  		case 1:
   510  			value, err := strconv.ParseUint(s, 10, 16)
   511  			if err != nil {
   512  				return false, err
   513  			}
   514  			if uint16(value) == cpuID {
   515  				return true, nil
   516  			}
   517  		}
   518  	}
   519  
   520  	return false, nil
   521  }
   522  
   523  // Looks for sysfs cpu path containing given CPU property, e.g. core_id or physical_package_id
   524  // and returns number of unique values of given property, exemplary usage: getting number of CPU physical cores
   525  func GetUniqueCPUPropertyCount(cpuAttributesPath string, propertyName string) int {
   526  	absCPUAttributesPath, err := filepath.Abs(cpuAttributesPath)
   527  	if err != nil {
   528  		klog.Errorf("Cannot make %s absolute", cpuAttributesPath)
   529  		return 0
   530  	}
   531  	pathPattern := absCPUAttributesPath + "/cpu*[0-9]"
   532  	sysCPUPaths, err := filepath.Glob(pathPattern)
   533  	if err != nil {
   534  		klog.Errorf("Cannot find files matching pattern (pathPattern: %s),  number of unique %s set to 0", pathPattern, propertyName)
   535  		return 0
   536  	}
   537  	cpuOnlinePath, err := filepath.Abs(cpuAttributesPath + "/online")
   538  	if err != nil {
   539  		klog.V(1).Infof("Unable to get absolute path for %s", cpuAttributesPath+"/../online")
   540  		return 0
   541  	}
   542  
   543  	if err != nil {
   544  		klog.V(1).Infof("Unable to get online CPUs list: %s", err)
   545  		return 0
   546  	}
   547  	uniques := make(map[string]bool)
   548  	for _, sysCPUPath := range sysCPUPaths {
   549  		cpuID, err := getCPUID(sysCPUPath)
   550  		if err != nil {
   551  			klog.V(1).Infof("Unable to get CPU ID from path %s: %s", sysCPUPath, err)
   552  			return 0
   553  		}
   554  		isOnline, err := isCPUOnline(cpuOnlinePath, cpuID)
   555  		if err != nil && !os.IsNotExist(err) {
   556  			klog.V(1).Infof("Unable to determine CPU online state: %s", err)
   557  			continue
   558  		}
   559  		if !isOnline && !os.IsNotExist(err) {
   560  			continue
   561  		}
   562  		propertyPath := filepath.Join(sysCPUPath, sysFsCPUTopology, propertyName)
   563  		propertyVal, err := os.ReadFile(propertyPath)
   564  		if err != nil {
   565  			klog.Warningf("Cannot open %s, assuming 0 for %s of CPU %d", propertyPath, propertyName, cpuID)
   566  			propertyVal = []byte("0")
   567  		}
   568  		packagePath := filepath.Join(sysCPUPath, sysFsCPUTopology, CPUPhysicalPackageID)
   569  		packageVal, err := os.ReadFile(packagePath)
   570  		if err != nil {
   571  			klog.Warningf("Cannot open %s, assuming 0 %s of CPU %d", packagePath, CPUPhysicalPackageID, cpuID)
   572  			packageVal = []byte("0")
   573  
   574  		}
   575  		uniques[fmt.Sprintf("%s_%s", bytes.TrimSpace(propertyVal), bytes.TrimSpace(packageVal))] = true
   576  	}
   577  	return len(uniques)
   578  }