github.com/google/cadvisor@v0.49.1/utils/sysinfo/sysinfo.go (about)

     1  // Copyright 2014 Google Inc. All Rights Reserved.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package sysinfo
    16  
    17  import (
    18  	"fmt"
    19  	"os"
    20  	"regexp"
    21  	"strconv"
    22  	"strings"
    23  
    24  	info "github.com/google/cadvisor/info/v1"
    25  	"github.com/google/cadvisor/utils/sysfs"
    26  
    27  	"k8s.io/klog/v2"
    28  )
    29  
    30  var (
    31  	schedulerRegExp      = regexp.MustCompile(`.*\[(.*)\].*`)
    32  	nodeDirRegExp        = regexp.MustCompile(`node/node(\d*)`)
    33  	cpuDirRegExp         = regexp.MustCompile(`/cpu(\d+)`)
    34  	memoryCapacityRegexp = regexp.MustCompile(`MemTotal:\s*([0-9]+) kB`)
    35  
    36  	cpusPath = "/sys/devices/system/cpu"
    37  )
    38  
    39  const (
    40  	cacheLevel2  = 2
    41  	hugepagesDir = "hugepages/"
    42  )
    43  
    44  // Get information about block devices present on the system.
    45  // Uses the passed in system interface to retrieve the low level OS information.
    46  func GetBlockDeviceInfo(sysfs sysfs.SysFs) (map[string]info.DiskInfo, error) {
    47  	disks, err := sysfs.GetBlockDevices()
    48  	if err != nil {
    49  		return nil, err
    50  	}
    51  
    52  	diskMap := make(map[string]info.DiskInfo)
    53  	for _, disk := range disks {
    54  		name := disk.Name()
    55  		// Ignore non-disk devices.
    56  		// TODO(rjnagal): Maybe just match hd, sd, and dm prefixes.
    57  		if strings.HasPrefix(name, "loop") || strings.HasPrefix(name, "ram") || strings.HasPrefix(name, "sr") {
    58  			continue
    59  		}
    60  		// Ignore "hidden" devices (i.e. nvme path device sysfs entries).
    61  		// These devices are in the form of /dev/nvme$Xc$Yn$Z and will
    62  		// not have a device handle (i.e. "hidden")
    63  		isHidden, err := sysfs.IsBlockDeviceHidden(name)
    64  		if err != nil {
    65  			return nil, err
    66  		}
    67  		if isHidden {
    68  			continue
    69  		}
    70  		diskInfo := info.DiskInfo{
    71  			Name: name,
    72  		}
    73  		dev, err := sysfs.GetBlockDeviceNumbers(name)
    74  		if err != nil {
    75  			return nil, err
    76  		}
    77  		n, err := fmt.Sscanf(dev, "%d:%d", &diskInfo.Major, &diskInfo.Minor)
    78  		if err != nil || n != 2 {
    79  			return nil, fmt.Errorf("could not parse device numbers from %s for device %s", dev, name)
    80  		}
    81  		out, err := sysfs.GetBlockDeviceSize(name)
    82  		if err != nil {
    83  			return nil, err
    84  		}
    85  		// Remove trailing newline before conversion.
    86  		size, err := strconv.ParseUint(strings.TrimSpace(out), 10, 64)
    87  		if err != nil {
    88  			return nil, err
    89  		}
    90  		// size is in 512 bytes blocks.
    91  		diskInfo.Size = size * 512
    92  
    93  		diskInfo.Scheduler = "none"
    94  		blkSched, err := sysfs.GetBlockDeviceScheduler(name)
    95  		if err == nil {
    96  			matches := schedulerRegExp.FindSubmatch([]byte(blkSched))
    97  			if len(matches) >= 2 {
    98  				diskInfo.Scheduler = string(matches[1])
    99  			}
   100  		}
   101  		device := fmt.Sprintf("%d:%d", diskInfo.Major, diskInfo.Minor)
   102  		diskMap[device] = diskInfo
   103  	}
   104  	return diskMap, nil
   105  }
   106  
   107  // Get information about network devices present on the system.
   108  func GetNetworkDevices(sysfs sysfs.SysFs) ([]info.NetInfo, error) {
   109  	devs, err := sysfs.GetNetworkDevices()
   110  	if err != nil {
   111  		return nil, err
   112  	}
   113  	netDevices := []info.NetInfo{}
   114  	for _, dev := range devs {
   115  		name := dev.Name()
   116  		// Ignore docker, loopback, and veth devices.
   117  		ignoredDevices := []string{"lo", "veth", "docker", "nerdctl"}
   118  		ignored := false
   119  		for _, prefix := range ignoredDevices {
   120  			if strings.HasPrefix(name, prefix) {
   121  				ignored = true
   122  				break
   123  			}
   124  		}
   125  		if ignored {
   126  			continue
   127  		}
   128  		address, err := sysfs.GetNetworkAddress(name)
   129  		if err != nil {
   130  			return nil, err
   131  		}
   132  		mtuStr, err := sysfs.GetNetworkMtu(name)
   133  		if err != nil {
   134  			return nil, err
   135  		}
   136  		var mtu int64
   137  		n, err := fmt.Sscanf(mtuStr, "%d", &mtu)
   138  		if err != nil || n != 1 {
   139  			return nil, fmt.Errorf("could not parse mtu from %s for device %s", mtuStr, name)
   140  		}
   141  		netInfo := info.NetInfo{
   142  			Name:       name,
   143  			MacAddress: strings.TrimSpace(address),
   144  			Mtu:        mtu,
   145  		}
   146  		speed, err := sysfs.GetNetworkSpeed(name)
   147  		// Some devices don't set speed.
   148  		if err == nil {
   149  			var s int64
   150  			n, err := fmt.Sscanf(speed, "%d", &s)
   151  			if err != nil || n != 1 {
   152  				return nil, fmt.Errorf("could not parse speed from %s for device %s", speed, name)
   153  			}
   154  			netInfo.Speed = s
   155  		}
   156  		netDevices = append(netDevices, netInfo)
   157  	}
   158  	return netDevices, nil
   159  }
   160  
   161  // GetHugePagesInfo returns information about pre-allocated huge pages
   162  // hugepagesDirectory should be top directory of hugepages
   163  // Such as: /sys/kernel/mm/hugepages/
   164  func GetHugePagesInfo(sysFs sysfs.SysFs, hugepagesDirectory string) ([]info.HugePagesInfo, error) {
   165  	var hugePagesInfo []info.HugePagesInfo
   166  	files, err := sysFs.GetHugePagesInfo(hugepagesDirectory)
   167  	if err != nil {
   168  		// treat as non-fatal since kernels and machine can be
   169  		// configured to disable hugepage support
   170  		return hugePagesInfo, nil
   171  	}
   172  
   173  	for _, st := range files {
   174  		nameArray := strings.Split(st.Name(), "-")
   175  		pageSizeArray := strings.Split(nameArray[1], "kB")
   176  		pageSize, err := strconv.ParseUint(string(pageSizeArray[0]), 10, 64)
   177  		if err != nil {
   178  			return hugePagesInfo, err
   179  		}
   180  
   181  		val, err := sysFs.GetHugePagesNr(hugepagesDirectory, st.Name())
   182  		if err != nil {
   183  			return hugePagesInfo, err
   184  		}
   185  		var numPages uint64
   186  		// we use sscanf as the file as a new-line that trips up ParseUint
   187  		// it returns the number of tokens successfully parsed, so if
   188  		// n != 1, it means we were unable to parse a number from the file
   189  		n, err := fmt.Sscanf(string(val), "%d", &numPages)
   190  		if err != nil || n != 1 {
   191  			return hugePagesInfo, fmt.Errorf("could not parse file nr_hugepage for %s, contents %q", st.Name(), string(val))
   192  		}
   193  
   194  		hugePagesInfo = append(hugePagesInfo, info.HugePagesInfo{
   195  			NumPages: numPages,
   196  			PageSize: pageSize,
   197  		})
   198  	}
   199  	return hugePagesInfo, nil
   200  }
   201  
   202  // GetNodesInfo returns information about NUMA nodes and their topology
   203  func GetNodesInfo(sysFs sysfs.SysFs) ([]info.Node, int, error) {
   204  	nodes := []info.Node{}
   205  	allLogicalCoresCount := 0
   206  
   207  	nodesDirs, err := sysFs.GetNodesPaths()
   208  	if err != nil {
   209  		return nil, 0, err
   210  	}
   211  
   212  	if len(nodesDirs) == 0 {
   213  		klog.V(4).Info("Nodes topology is not available, providing CPU topology")
   214  		return getCPUTopology(sysFs)
   215  	}
   216  
   217  	for _, nodeDir := range nodesDirs {
   218  		id, err := getMatchedInt(nodeDirRegExp, nodeDir)
   219  		if err != nil {
   220  			return nil, 0, err
   221  		}
   222  		node := info.Node{Id: id}
   223  
   224  		cpuDirs, err := sysFs.GetCPUsPaths(nodeDir)
   225  		if len(cpuDirs) == 0 {
   226  			klog.Warningf("Found node without any CPU, nodeDir: %s, number of cpuDirs %d, err: %v", nodeDir, len(cpuDirs), err)
   227  		} else {
   228  			cores, err := getCoresInfo(sysFs, cpuDirs)
   229  			if err != nil {
   230  				return nil, 0, err
   231  			}
   232  			node.Cores = cores
   233  			for _, core := range cores {
   234  				allLogicalCoresCount += len(core.Threads)
   235  			}
   236  		}
   237  
   238  		// On some Linux platforms(such as Arm64 guest kernel), cache info may not exist.
   239  		// So, we should ignore error here.
   240  		err = addCacheInfo(sysFs, &node)
   241  		if err != nil {
   242  			klog.V(1).Infof("Found node without cache information, nodeDir: %s", nodeDir)
   243  		}
   244  
   245  		node.Memory, err = getNodeMemInfo(sysFs, nodeDir)
   246  		if err != nil {
   247  			return nil, 0, err
   248  		}
   249  
   250  		hugepagesDirectory := fmt.Sprintf("%s/%s", nodeDir, hugepagesDir)
   251  		node.HugePages, err = GetHugePagesInfo(sysFs, hugepagesDirectory)
   252  		if err != nil {
   253  			return nil, 0, err
   254  		}
   255  
   256  		node.Distances, err = getDistances(sysFs, nodeDir)
   257  		if err != nil {
   258  			return nil, 0, err
   259  		}
   260  
   261  		nodes = append(nodes, node)
   262  	}
   263  	return nodes, allLogicalCoresCount, err
   264  }
   265  
   266  func getCPUTopology(sysFs sysfs.SysFs) ([]info.Node, int, error) {
   267  	nodes := []info.Node{}
   268  
   269  	cpusPaths, err := sysFs.GetCPUsPaths(cpusPath)
   270  	if err != nil {
   271  		return nil, 0, err
   272  	}
   273  	cpusCount := len(cpusPaths)
   274  
   275  	if cpusCount == 0 {
   276  		err = fmt.Errorf("Any CPU is not available, cpusPath: %s", cpusPath)
   277  		return nil, 0, err
   278  	}
   279  
   280  	cpusByPhysicalPackageID, err := getCpusByPhysicalPackageID(sysFs, cpusPaths)
   281  	if err != nil {
   282  		return nil, 0, err
   283  	}
   284  
   285  	if len(cpusByPhysicalPackageID) == 0 {
   286  		klog.Warningf("Cannot read any physical package id for any CPU")
   287  		return nil, cpusCount, nil
   288  	}
   289  
   290  	for physicalPackageID, cpus := range cpusByPhysicalPackageID {
   291  		node := info.Node{Id: physicalPackageID}
   292  
   293  		cores, err := getCoresInfo(sysFs, cpus)
   294  		if err != nil {
   295  			return nil, 0, err
   296  		}
   297  		node.Cores = cores
   298  
   299  		// On some Linux platforms(such as Arm64 guest kernel), cache info may not exist.
   300  		// So, we should ignore error here.
   301  		err = addCacheInfo(sysFs, &node)
   302  		if err != nil {
   303  			klog.V(1).Infof("Found cpu without cache information, cpuPath: %s", cpus)
   304  		}
   305  		nodes = append(nodes, node)
   306  	}
   307  	return nodes, cpusCount, nil
   308  }
   309  
   310  func getCpusByPhysicalPackageID(sysFs sysfs.SysFs, cpusPaths []string) (map[int][]string, error) {
   311  	cpuPathsByPhysicalPackageID := make(map[int][]string)
   312  	for _, cpuPath := range cpusPaths {
   313  
   314  		rawPhysicalPackageID, err := sysFs.GetCPUPhysicalPackageID(cpuPath)
   315  		if os.IsNotExist(err) {
   316  			klog.Warningf("Cannot read physical package id for %s, physical_package_id file does not exist, err: %s", cpuPath, err)
   317  			continue
   318  		} else if err != nil {
   319  			return nil, err
   320  		}
   321  
   322  		physicalPackageID, err := strconv.Atoi(rawPhysicalPackageID)
   323  		if err != nil {
   324  			return nil, err
   325  		}
   326  
   327  		if _, ok := cpuPathsByPhysicalPackageID[physicalPackageID]; !ok {
   328  			cpuPathsByPhysicalPackageID[physicalPackageID] = make([]string, 0)
   329  		}
   330  
   331  		cpuPathsByPhysicalPackageID[physicalPackageID] = append(cpuPathsByPhysicalPackageID[physicalPackageID], cpuPath)
   332  	}
   333  	return cpuPathsByPhysicalPackageID, nil
   334  }
   335  
   336  // addCacheInfo adds information about cache for NUMA node
   337  func addCacheInfo(sysFs sysfs.SysFs, node *info.Node) error {
   338  	for coreID, core := range node.Cores {
   339  		threadID := core.Threads[0] //get any thread for core
   340  		caches, err := GetCacheInfo(sysFs, threadID)
   341  		if err != nil {
   342  			return err
   343  		}
   344  
   345  		numThreadsPerCore := len(core.Threads)
   346  		numThreadsPerNode := len(node.Cores) * numThreadsPerCore
   347  
   348  		for _, cache := range caches {
   349  			c := info.Cache{
   350  				Id:    cache.Id,
   351  				Size:  cache.Size,
   352  				Level: cache.Level,
   353  				Type:  cache.Type,
   354  			}
   355  			if cache.Level > cacheLevel2 {
   356  				if cache.Cpus == numThreadsPerNode {
   357  					// Add a node level cache.
   358  					cacheFound := false
   359  					for _, nodeCache := range node.Caches {
   360  						if nodeCache == c {
   361  							cacheFound = true
   362  						}
   363  					}
   364  					if !cacheFound {
   365  						node.Caches = append(node.Caches, c)
   366  					}
   367  				} else {
   368  					// Add uncore cache, for architecture in which l3 cache only shared among some cores.
   369  					uncoreCacheFound := false
   370  					for _, uncoreCache := range node.Cores[coreID].UncoreCaches {
   371  						if uncoreCache == c {
   372  							uncoreCacheFound = true
   373  						}
   374  					}
   375  					if !uncoreCacheFound {
   376  						node.Cores[coreID].UncoreCaches = append(node.Cores[coreID].UncoreCaches, c)
   377  					}
   378  				}
   379  			} else if cache.Cpus == numThreadsPerCore {
   380  				// Add core level cache
   381  				node.Cores[coreID].Caches = append(node.Cores[coreID].Caches, c)
   382  			}
   383  			// Ignore unknown caches.
   384  		}
   385  	}
   386  	return nil
   387  }
   388  
   389  // getNodeMemInfo returns information about total memory for NUMA node
   390  func getNodeMemInfo(sysFs sysfs.SysFs, nodeDir string) (uint64, error) {
   391  	rawMem, err := sysFs.GetMemInfo(nodeDir)
   392  	if err != nil {
   393  		//Ignore if per-node info is not available.
   394  		klog.Warningf("Found node without memory information, nodeDir: %s", nodeDir)
   395  		return 0, nil
   396  	}
   397  	matches := memoryCapacityRegexp.FindStringSubmatch(rawMem)
   398  	if len(matches) != 2 {
   399  		return 0, fmt.Errorf("failed to match regexp in output: %q", string(rawMem))
   400  	}
   401  	memory, err := strconv.ParseUint(matches[1], 10, 64)
   402  	if err != nil {
   403  		return 0, err
   404  	}
   405  	memory = memory * 1024 // Convert to bytes
   406  	return uint64(memory), nil
   407  }
   408  
   409  // getDistances returns information about distances between NUMA nodes
   410  func getDistances(sysFs sysfs.SysFs, nodeDir string) ([]uint64, error) {
   411  	rawDistance, err := sysFs.GetDistances(nodeDir)
   412  	if err != nil {
   413  		//Ignore if per-node info is not available.
   414  		klog.Warningf("Found node without distance information, nodeDir: %s", nodeDir)
   415  		return nil, nil
   416  	}
   417  
   418  	distances := []uint64{}
   419  	for _, distance := range strings.Split(rawDistance, " ") {
   420  		distanceUint, err := strconv.ParseUint(distance, 10, 64)
   421  		if err != nil {
   422  			return nil, fmt.Errorf("cannot convert %s to int", distance)
   423  		}
   424  		distances = append(distances, distanceUint)
   425  	}
   426  
   427  	return distances, nil
   428  }
   429  
   430  // getCoresInfo returns information about physical cores
   431  func getCoresInfo(sysFs sysfs.SysFs, cpuDirs []string) ([]info.Core, error) {
   432  	cores := make([]info.Core, 0, len(cpuDirs))
   433  	for _, cpuDir := range cpuDirs {
   434  		cpuID, err := getMatchedInt(cpuDirRegExp, cpuDir)
   435  		if err != nil {
   436  			return nil, fmt.Errorf("unexpected format of CPU directory, cpuDirRegExp %s, cpuDir: %s", cpuDirRegExp, cpuDir)
   437  		}
   438  		if !sysFs.IsCPUOnline(cpuDir) {
   439  			continue
   440  		}
   441  
   442  		rawPhysicalID, err := sysFs.GetCoreID(cpuDir)
   443  		if os.IsNotExist(err) {
   444  			klog.Warningf("Cannot read core id for %s, core_id file does not exist, err: %s", cpuDir, err)
   445  			continue
   446  		} else if err != nil {
   447  			return nil, err
   448  		}
   449  		physicalID, err := strconv.Atoi(rawPhysicalID)
   450  		if err != nil {
   451  			return nil, err
   452  		}
   453  
   454  		rawPhysicalPackageID, err := sysFs.GetCPUPhysicalPackageID(cpuDir)
   455  		if os.IsNotExist(err) {
   456  			klog.Warningf("Cannot read physical package id for %s, physical_package_id file does not exist, err: %s", cpuDir, err)
   457  			continue
   458  		} else if err != nil {
   459  			return nil, err
   460  		}
   461  
   462  		physicalPackageID, err := strconv.Atoi(rawPhysicalPackageID)
   463  		if err != nil {
   464  			return nil, err
   465  		}
   466  
   467  		coreIDx := -1
   468  		for id, core := range cores {
   469  			if core.Id == physicalID && core.SocketID == physicalPackageID {
   470  				coreIDx = id
   471  			}
   472  		}
   473  		if coreIDx == -1 {
   474  			cores = append(cores, info.Core{})
   475  			coreIDx = len(cores) - 1
   476  		}
   477  		desiredCore := &cores[coreIDx]
   478  
   479  		desiredCore.Id = physicalID
   480  		desiredCore.SocketID = physicalPackageID
   481  
   482  		if len(desiredCore.Threads) == 0 {
   483  			desiredCore.Threads = []int{cpuID}
   484  		} else {
   485  			desiredCore.Threads = append(desiredCore.Threads, cpuID)
   486  		}
   487  
   488  	}
   489  	return cores, nil
   490  }
   491  
   492  // GetCacheInfo return information about a cache accessible from the given cpu thread
   493  func GetCacheInfo(sysFs sysfs.SysFs, id int) ([]sysfs.CacheInfo, error) {
   494  	caches, err := sysFs.GetCaches(id)
   495  	if err != nil {
   496  		return nil, err
   497  	}
   498  
   499  	info := []sysfs.CacheInfo{}
   500  	for _, cache := range caches {
   501  		if !strings.HasPrefix(cache.Name(), "index") {
   502  			continue
   503  		}
   504  		cacheInfo, err := sysFs.GetCacheInfo(id, cache.Name())
   505  		if err != nil {
   506  			return nil, err
   507  		}
   508  		info = append(info, cacheInfo)
   509  	}
   510  	return info, nil
   511  }
   512  
   513  func getNetworkStats(name string, sysFs sysfs.SysFs) (info.InterfaceStats, error) {
   514  	var stats info.InterfaceStats
   515  	var err error
   516  	stats.Name = name
   517  	stats.RxBytes, err = sysFs.GetNetworkStatValue(name, "rx_bytes")
   518  	if err != nil {
   519  		return stats, err
   520  	}
   521  	stats.RxPackets, err = sysFs.GetNetworkStatValue(name, "rx_packets")
   522  	if err != nil {
   523  		return stats, err
   524  	}
   525  	stats.RxErrors, err = sysFs.GetNetworkStatValue(name, "rx_errors")
   526  	if err != nil {
   527  		return stats, err
   528  	}
   529  	stats.RxDropped, err = sysFs.GetNetworkStatValue(name, "rx_dropped")
   530  	if err != nil {
   531  		return stats, err
   532  	}
   533  	stats.TxBytes, err = sysFs.GetNetworkStatValue(name, "tx_bytes")
   534  	if err != nil {
   535  		return stats, err
   536  	}
   537  	stats.TxPackets, err = sysFs.GetNetworkStatValue(name, "tx_packets")
   538  	if err != nil {
   539  		return stats, err
   540  	}
   541  	stats.TxErrors, err = sysFs.GetNetworkStatValue(name, "tx_errors")
   542  	if err != nil {
   543  		return stats, err
   544  	}
   545  	stats.TxDropped, err = sysFs.GetNetworkStatValue(name, "tx_dropped")
   546  	if err != nil {
   547  		return stats, err
   548  	}
   549  	return stats, nil
   550  }
   551  
   552  func GetSystemUUID(sysFs sysfs.SysFs) (string, error) {
   553  	return sysFs.GetSystemUUID()
   554  }
   555  
   556  func getMatchedInt(rgx *regexp.Regexp, str string) (int, error) {
   557  	matches := rgx.FindStringSubmatch(str)
   558  	if len(matches) != 2 {
   559  		return 0, fmt.Errorf("failed to match regexp, str: %s", str)
   560  	}
   561  	valInt, err := strconv.Atoi(matches[1])
   562  	if err != nil {
   563  		return 0, err
   564  	}
   565  	return valInt, nil
   566  }
   567  
   568  // GetSocketFromCPU returns Socket ID of passed CPU. If is not present, returns -1.
   569  func GetSocketFromCPU(topology []info.Node, cpu int) int {
   570  	for _, node := range topology {
   571  		found, coreID := node.FindCoreByThread(cpu)
   572  		if found {
   573  			return node.Cores[coreID].SocketID
   574  		}
   575  	}
   576  	return -1
   577  }
   578  
   579  // GetOnlineCPUs returns available cores.
   580  func GetOnlineCPUs(topology []info.Node) []int {
   581  	onlineCPUs := make([]int, 0)
   582  	for _, node := range topology {
   583  		for _, core := range node.Cores {
   584  			onlineCPUs = append(onlineCPUs, core.Threads...)
   585  		}
   586  	}
   587  	return onlineCPUs
   588  }