github.com/google/cadvisor@v0.49.1/utils/sysfs/sysfs.go (about) 1 // Copyright 2014 Google Inc. All Rights Reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package sysfs 16 17 import ( 18 "bytes" 19 "fmt" 20 "os" 21 "path" 22 "path/filepath" 23 "regexp" 24 "strconv" 25 "strings" 26 27 "k8s.io/klog/v2" 28 ) 29 30 const ( 31 blockDir = "/sys/block" 32 cacheDir = "/sys/devices/system/cpu/cpu" 33 netDir = "/sys/class/net" 34 dmiDir = "/sys/class/dmi" 35 ppcDevTree = "/proc/device-tree" 36 s390xDevTree = "/etc" // s390/s390x changes 37 38 meminfoFile = "meminfo" 39 40 distanceFile = "distance" 41 42 sysFsCPUTopology = "topology" 43 44 // CPUPhysicalPackageID is a physical package id of cpu#. Typically corresponds to a physical socket number, 45 // but the actual value is architecture and platform dependent. 46 CPUPhysicalPackageID = "physical_package_id" 47 // CPUCoreID is the CPU core ID of cpu#. Typically it is the hardware platform's identifier 48 // (rather than the kernel's). The actual value is architecture and platform dependent. 49 CPUCoreID = "core_id" 50 51 coreIDFilePath = "/" + sysFsCPUTopology + "/core_id" 52 packageIDFilePath = "/" + sysFsCPUTopology + "/physical_package_id" 53 54 // memory size calculations 55 56 cpuDirPattern = "cpu*[0-9]" 57 nodeDirPattern = "node*[0-9]" 58 59 //HugePagesNrFile name of nr_hugepages file in sysfs 60 HugePagesNrFile = "nr_hugepages" 61 ) 62 63 var ( 64 nodeDir = "/sys/devices/system/node/" 65 ) 66 67 type CacheInfo struct { 68 // cache id 69 Id int 70 // size in bytes 71 Size uint64 72 // cache type - instruction, data, unified 73 Type string 74 // distance from cpus in a multi-level hierarchy 75 Level int 76 // number of cpus that can access this cache. 77 Cpus int 78 } 79 80 // Abstracts the lowest level calls to sysfs. 81 type SysFs interface { 82 // Get NUMA nodes paths 83 GetNodesPaths() ([]string, error) 84 // Get paths to CPUs in provided directory e.g. /sys/devices/system/node/node0 or /sys/devices/system/cpu 85 GetCPUsPaths(cpusPath string) ([]string, error) 86 // Get physical core id for specified CPU 87 GetCoreID(coreIDFilePath string) (string, error) 88 // Get physical package id for specified CPU 89 GetCPUPhysicalPackageID(cpuPath string) (string, error) 90 // Get total memory for specified NUMA node 91 GetMemInfo(nodeDir string) (string, error) 92 // Get hugepages from specified directory 93 GetHugePagesInfo(hugePagesDirectory string) ([]os.FileInfo, error) 94 // Get hugepage_nr from specified directory 95 GetHugePagesNr(hugePagesDirectory string, hugePageName string) (string, error) 96 // Get directory information for available block devices. 97 GetBlockDevices() ([]os.FileInfo, error) 98 // Get Size of a given block device. 99 GetBlockDeviceSize(string) (string, error) 100 // Get scheduler type for the block device. 101 GetBlockDeviceScheduler(string) (string, error) 102 // Get device major:minor number string. 103 GetBlockDeviceNumbers(string) (string, error) 104 // Is the device "hidden" (meaning will not have a device handle) 105 // This is the case with native nvme multipathing. 106 IsBlockDeviceHidden(string) (bool, error) 107 108 GetNetworkDevices() ([]os.FileInfo, error) 109 GetNetworkAddress(string) (string, error) 110 GetNetworkMtu(string) (string, error) 111 GetNetworkSpeed(string) (string, error) 112 GetNetworkStatValue(dev string, stat string) (uint64, error) 113 114 // Get directory information for available caches accessible to given cpu. 115 GetCaches(id int) ([]os.FileInfo, error) 116 // Get information for a cache accessible from the given cpu. 117 GetCacheInfo(cpu int, cache string) (CacheInfo, error) 118 119 GetSystemUUID() (string, error) 120 121 // GetDistances returns distance array 122 GetDistances(string) (string, error) 123 124 // IsCPUOnline determines if CPU status from kernel hotplug machanism standpoint. 125 // See: https://www.kernel.org/doc/html/latest/core-api/cpu_hotplug.html 126 IsCPUOnline(dir string) bool 127 } 128 129 type realSysFs struct { 130 cpuPath string 131 } 132 133 func NewRealSysFs() SysFs { 134 return &realSysFs{ 135 cpuPath: "/sys/devices/system/cpu", 136 } 137 } 138 139 func (fs *realSysFs) GetNodesPaths() ([]string, error) { 140 pathPattern := fmt.Sprintf("%s%s", nodeDir, nodeDirPattern) 141 return filepath.Glob(pathPattern) 142 } 143 144 func (fs *realSysFs) GetCPUsPaths(cpusPath string) ([]string, error) { 145 pathPattern := fmt.Sprintf("%s/%s", cpusPath, cpuDirPattern) 146 return filepath.Glob(pathPattern) 147 } 148 149 func (fs *realSysFs) GetCoreID(cpuPath string) (string, error) { 150 coreIDFilePath := fmt.Sprintf("%s%s", cpuPath, coreIDFilePath) 151 coreID, err := os.ReadFile(coreIDFilePath) 152 if err != nil { 153 return "", err 154 } 155 return strings.TrimSpace(string(coreID)), err 156 } 157 158 func (fs *realSysFs) GetCPUPhysicalPackageID(cpuPath string) (string, error) { 159 packageIDFilePath := fmt.Sprintf("%s%s", cpuPath, packageIDFilePath) 160 packageID, err := os.ReadFile(packageIDFilePath) 161 if err != nil { 162 return "", err 163 } 164 return strings.TrimSpace(string(packageID)), err 165 } 166 167 func (fs *realSysFs) GetMemInfo(nodePath string) (string, error) { 168 meminfoPath := fmt.Sprintf("%s/%s", nodePath, meminfoFile) 169 meminfo, err := os.ReadFile(meminfoPath) 170 if err != nil { 171 return "", err 172 } 173 return strings.TrimSpace(string(meminfo)), err 174 } 175 176 func (fs *realSysFs) GetDistances(nodePath string) (string, error) { 177 distancePath := fmt.Sprintf("%s/%s", nodePath, distanceFile) 178 distance, err := os.ReadFile(distancePath) 179 if err != nil { 180 return "", err 181 } 182 return strings.TrimSpace(string(distance)), err 183 } 184 185 func (fs *realSysFs) GetHugePagesInfo(hugePagesDirectory string) ([]os.FileInfo, error) { 186 dirs, err := os.ReadDir(hugePagesDirectory) 187 if err != nil { 188 return nil, err 189 } 190 return toFileInfo(dirs) 191 } 192 193 func (fs *realSysFs) GetHugePagesNr(hugepagesDirectory string, hugePageName string) (string, error) { 194 hugePageFilePath := fmt.Sprintf("%s%s/%s", hugepagesDirectory, hugePageName, HugePagesNrFile) 195 hugePageFile, err := os.ReadFile(hugePageFilePath) 196 if err != nil { 197 return "", err 198 } 199 return strings.TrimSpace(string(hugePageFile)), err 200 } 201 202 func (fs *realSysFs) GetBlockDevices() ([]os.FileInfo, error) { 203 dirs, err := os.ReadDir(blockDir) 204 if err != nil { 205 return nil, err 206 } 207 return toFileInfo(dirs) 208 } 209 210 func (fs *realSysFs) GetBlockDeviceNumbers(name string) (string, error) { 211 dev, err := os.ReadFile(path.Join(blockDir, name, "/dev")) 212 if err != nil { 213 return "", err 214 } 215 return string(dev), nil 216 } 217 218 func (fs *realSysFs) IsBlockDeviceHidden(name string) (bool, error) { 219 // See: https://www.kernel.org/doc/Documentation/ABI/stable/sysfs-block 220 // https://git.kernel.org/pub/scm/utils/util-linux/util-linux.git 221 // - c8487d854ba5 ("lsblk: Ignore hidden devices") 222 devHiddenPath := path.Join(blockDir, name, "/hidden") 223 hidden, err := os.ReadFile(devHiddenPath) 224 if err != nil && os.IsNotExist(err) { 225 // older OS may not have /hidden sysfs entry, so for sure 226 // it is not a hidden device... 227 return false, nil 228 } 229 if err != nil { 230 return false, fmt.Errorf("failed to read %s: %w", devHiddenPath, err) 231 } 232 if string(hidden) == "1" { 233 return true, nil 234 } 235 return false, nil 236 } 237 238 func (fs *realSysFs) GetBlockDeviceScheduler(name string) (string, error) { 239 sched, err := os.ReadFile(path.Join(blockDir, name, "/queue/scheduler")) 240 if err != nil { 241 return "", err 242 } 243 return string(sched), nil 244 } 245 246 func (fs *realSysFs) GetBlockDeviceSize(name string) (string, error) { 247 size, err := os.ReadFile(path.Join(blockDir, name, "/size")) 248 if err != nil { 249 return "", err 250 } 251 return string(size), nil 252 } 253 254 func (fs *realSysFs) GetNetworkDevices() ([]os.FileInfo, error) { 255 dirs, err := os.ReadDir(netDir) 256 if err != nil { 257 return nil, err 258 } 259 files, err := toFileInfo(dirs) 260 if err != nil { 261 return nil, err 262 } 263 264 // Filter out non-directory & non-symlink files 265 filtered := []os.FileInfo{} 266 for _, f := range files { 267 if f.Mode()|os.ModeSymlink != 0 { 268 f, err = os.Stat(path.Join(netDir, f.Name())) 269 if err != nil { 270 continue 271 } 272 } 273 if f.IsDir() { 274 filtered = append(filtered, f) 275 } 276 } 277 return filtered, nil 278 } 279 280 func (fs *realSysFs) GetNetworkAddress(name string) (string, error) { 281 address, err := os.ReadFile(path.Join(netDir, name, "/address")) 282 if err != nil { 283 return "", err 284 } 285 return string(address), nil 286 } 287 288 func (fs *realSysFs) GetNetworkMtu(name string) (string, error) { 289 mtu, err := os.ReadFile(path.Join(netDir, name, "/mtu")) 290 if err != nil { 291 return "", err 292 } 293 return string(mtu), nil 294 } 295 296 func (fs *realSysFs) GetNetworkSpeed(name string) (string, error) { 297 speed, err := os.ReadFile(path.Join(netDir, name, "/speed")) 298 if err != nil { 299 return "", err 300 } 301 return string(speed), nil 302 } 303 304 func (fs *realSysFs) GetNetworkStatValue(dev string, stat string) (uint64, error) { 305 statPath := path.Join(netDir, dev, "/statistics", stat) 306 out, err := os.ReadFile(statPath) 307 if err != nil { 308 return 0, fmt.Errorf("failed to read stat from %q for device %q", statPath, dev) 309 } 310 var s uint64 311 n, err := fmt.Sscanf(string(out), "%d", &s) 312 if err != nil || n != 1 { 313 return 0, fmt.Errorf("could not parse value from %q for file %s", string(out), statPath) 314 } 315 return s, nil 316 } 317 318 func (fs *realSysFs) GetCaches(id int) ([]os.FileInfo, error) { 319 cpuPath := fmt.Sprintf("%s%d/cache", cacheDir, id) 320 dir, err := os.ReadDir(cpuPath) 321 if err != nil { 322 return nil, err 323 } 324 return toFileInfo(dir) 325 } 326 327 func toFileInfo(dirs []os.DirEntry) ([]os.FileInfo, error) { 328 info := []os.FileInfo{} 329 for _, dir := range dirs { 330 fI, err := dir.Info() 331 if err != nil { 332 return nil, err 333 } 334 info = append(info, fI) 335 } 336 return info, nil 337 } 338 339 func bitCount(i uint64) (count int) { 340 for i != 0 { 341 if i&1 == 1 { 342 count++ 343 } 344 i >>= 1 345 } 346 return 347 } 348 349 func getCPUCount(cache string) (count int, err error) { 350 out, err := os.ReadFile(path.Join(cache, "/shared_cpu_map")) 351 if err != nil { 352 return 0, err 353 } 354 masks := strings.Split(string(out), ",") 355 for _, mask := range masks { 356 // convert hex string to uint64 357 m, err := strconv.ParseUint(strings.TrimSpace(mask), 16, 64) 358 if err != nil { 359 return 0, fmt.Errorf("failed to parse cpu map %q: %v", string(out), err) 360 } 361 count += bitCount(m) 362 } 363 return 364 } 365 366 func (fs *realSysFs) GetCacheInfo(cpu int, name string) (CacheInfo, error) { 367 cachePath := fmt.Sprintf("%s%d/cache/%s", cacheDir, cpu, name) 368 out, err := os.ReadFile(path.Join(cachePath, "/id")) 369 if err != nil { 370 return CacheInfo{}, err 371 } 372 var id int 373 n, err := fmt.Sscanf(string(out), "%d", &id) 374 if err != nil || n != 1 { 375 return CacheInfo{}, err 376 } 377 378 out, err = os.ReadFile(path.Join(cachePath, "/size")) 379 if err != nil { 380 return CacheInfo{}, err 381 } 382 var size uint64 383 n, err = fmt.Sscanf(string(out), "%dK", &size) 384 if err != nil || n != 1 { 385 return CacheInfo{}, err 386 } 387 // convert to bytes 388 size = size * 1024 389 out, err = os.ReadFile(path.Join(cachePath, "/level")) 390 if err != nil { 391 return CacheInfo{}, err 392 } 393 var level int 394 n, err = fmt.Sscanf(string(out), "%d", &level) 395 if err != nil || n != 1 { 396 return CacheInfo{}, err 397 } 398 399 out, err = os.ReadFile(path.Join(cachePath, "/type")) 400 if err != nil { 401 return CacheInfo{}, err 402 } 403 cacheType := strings.TrimSpace(string(out)) 404 cpuCount, err := getCPUCount(cachePath) 405 if err != nil { 406 return CacheInfo{}, err 407 } 408 return CacheInfo{ 409 Id: id, 410 Size: size, 411 Level: level, 412 Type: cacheType, 413 Cpus: cpuCount, 414 }, nil 415 } 416 417 func (fs *realSysFs) GetSystemUUID() (string, error) { 418 if id, err := os.ReadFile(path.Join(dmiDir, "id", "product_uuid")); err == nil { 419 return strings.TrimSpace(string(id)), nil 420 } else if id, err = os.ReadFile(path.Join(ppcDevTree, "system-id")); err == nil { 421 return strings.TrimSpace(strings.TrimRight(string(id), "\000")), nil 422 } else if id, err = os.ReadFile(path.Join(ppcDevTree, "vm,uuid")); err == nil { 423 return strings.TrimSpace(strings.TrimRight(string(id), "\000")), nil 424 } else if id, err = os.ReadFile(path.Join(s390xDevTree, "machine-id")); err == nil { 425 return strings.TrimSpace(string(id)), nil 426 } else { 427 return "", err 428 } 429 } 430 431 func (fs *realSysFs) IsCPUOnline(cpuPath string) bool { 432 cpuOnlinePath, err := filepath.Abs(fs.cpuPath + "/online") 433 if err != nil { 434 klog.V(1).Infof("Unable to get absolute path for %s", cpuPath) 435 return false 436 } 437 438 // Quick check to determine if file exists: if it does not then kernel CPU hotplug is disabled and all CPUs are online. 439 _, err = os.Stat(cpuOnlinePath) 440 if err != nil && os.IsNotExist(err) { 441 return true 442 } 443 if err != nil { 444 klog.V(1).Infof("Unable to stat %s: %s", cpuOnlinePath, err) 445 } 446 447 cpuID, err := getCPUID(cpuPath) 448 if err != nil { 449 klog.V(1).Infof("Unable to get CPU ID from path %s: %s", cpuPath, err) 450 return false 451 } 452 453 isOnline, err := isCPUOnline(cpuOnlinePath, cpuID) 454 if err != nil { 455 klog.V(1).Infof("Unable to get online CPUs list: %s", err) 456 return false 457 } 458 return isOnline 459 } 460 461 func getCPUID(dir string) (uint16, error) { 462 regex := regexp.MustCompile("cpu([0-9]+)") 463 matches := regex.FindStringSubmatch(dir) 464 if len(matches) == 2 { 465 id, err := strconv.Atoi(matches[1]) 466 if err != nil { 467 return 0, err 468 } 469 return uint16(id), nil 470 } 471 return 0, fmt.Errorf("can't get CPU ID from %s", dir) 472 } 473 474 // isCPUOnline is copied from github.com/opencontainers/runc/libcontainer/cgroups/fs and modified to suite cAdvisor 475 // needs as Apache 2.0 license allows. 476 // It parses CPU list (such as: 0,3-5,10) into a struct that allows to determine quickly if CPU or particular ID is online. 477 // see: https://github.com/opencontainers/runc/blob/ab27e12cebf148aa5d1ee3ad13d9fc7ae12bf0b6/libcontainer/cgroups/fs/cpuset.go#L45 478 func isCPUOnline(path string, cpuID uint16) (bool, error) { 479 fileContent, err := os.ReadFile(path) 480 if err != nil { 481 return false, err 482 } 483 if len(fileContent) == 0 { 484 return false, fmt.Errorf("%s found to be empty", path) 485 } 486 487 cpuList := strings.TrimSpace(string(fileContent)) 488 for _, s := range strings.Split(cpuList, ",") { 489 splitted := strings.SplitN(s, "-", 3) 490 switch len(splitted) { 491 case 3: 492 return false, fmt.Errorf("invalid values in %s", path) 493 case 2: 494 min, err := strconv.ParseUint(splitted[0], 10, 16) 495 if err != nil { 496 return false, err 497 } 498 max, err := strconv.ParseUint(splitted[1], 10, 16) 499 if err != nil { 500 return false, err 501 } 502 if min > max { 503 return false, fmt.Errorf("invalid values in %s", path) 504 } 505 // Return true, if the CPU under consideration is in the range of online CPUs. 506 if cpuID >= uint16(min) && cpuID <= uint16(max) { 507 return true, nil 508 } 509 case 1: 510 value, err := strconv.ParseUint(s, 10, 16) 511 if err != nil { 512 return false, err 513 } 514 if uint16(value) == cpuID { 515 return true, nil 516 } 517 } 518 } 519 520 return false, nil 521 } 522 523 // Looks for sysfs cpu path containing given CPU property, e.g. core_id or physical_package_id 524 // and returns number of unique values of given property, exemplary usage: getting number of CPU physical cores 525 func GetUniqueCPUPropertyCount(cpuAttributesPath string, propertyName string) int { 526 absCPUAttributesPath, err := filepath.Abs(cpuAttributesPath) 527 if err != nil { 528 klog.Errorf("Cannot make %s absolute", cpuAttributesPath) 529 return 0 530 } 531 pathPattern := absCPUAttributesPath + "/cpu*[0-9]" 532 sysCPUPaths, err := filepath.Glob(pathPattern) 533 if err != nil { 534 klog.Errorf("Cannot find files matching pattern (pathPattern: %s), number of unique %s set to 0", pathPattern, propertyName) 535 return 0 536 } 537 cpuOnlinePath, err := filepath.Abs(cpuAttributesPath + "/online") 538 if err != nil { 539 klog.V(1).Infof("Unable to get absolute path for %s", cpuAttributesPath+"/../online") 540 return 0 541 } 542 543 if err != nil { 544 klog.V(1).Infof("Unable to get online CPUs list: %s", err) 545 return 0 546 } 547 uniques := make(map[string]bool) 548 for _, sysCPUPath := range sysCPUPaths { 549 cpuID, err := getCPUID(sysCPUPath) 550 if err != nil { 551 klog.V(1).Infof("Unable to get CPU ID from path %s: %s", sysCPUPath, err) 552 return 0 553 } 554 isOnline, err := isCPUOnline(cpuOnlinePath, cpuID) 555 if err != nil && !os.IsNotExist(err) { 556 klog.V(1).Infof("Unable to determine CPU online state: %s", err) 557 continue 558 } 559 if !isOnline && !os.IsNotExist(err) { 560 continue 561 } 562 propertyPath := filepath.Join(sysCPUPath, sysFsCPUTopology, propertyName) 563 propertyVal, err := os.ReadFile(propertyPath) 564 if err != nil { 565 klog.Warningf("Cannot open %s, assuming 0 for %s of CPU %d", propertyPath, propertyName, cpuID) 566 propertyVal = []byte("0") 567 } 568 packagePath := filepath.Join(sysCPUPath, sysFsCPUTopology, CPUPhysicalPackageID) 569 packageVal, err := os.ReadFile(packagePath) 570 if err != nil { 571 klog.Warningf("Cannot open %s, assuming 0 %s of CPU %d", packagePath, CPUPhysicalPackageID, cpuID) 572 packageVal = []byte("0") 573 574 } 575 uniques[fmt.Sprintf("%s_%s", bytes.TrimSpace(propertyVal), bytes.TrimSpace(packageVal))] = true 576 } 577 return len(uniques) 578 }