github.com/google/cadvisor@v0.49.1/resctrl/utils.go (about)

     1  //go:build linux
     2  // +build linux
     3  
     4  // Copyright 2021 Google Inc. All Rights Reserved.
     5  //
     6  // Licensed under the Apache License, Version 2.0 (the "License");
     7  // you may not use this file except in compliance with the License.
     8  // You may obtain a copy of the License at
     9  //
    10  //     http://www.apache.org/licenses/LICENSE-2.0
    11  //
    12  // Unless required by applicable law or agreed to in writing, software
    13  // distributed under the License is distributed on an "AS IS" BASIS,
    14  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    15  // See the License for the specific language governing permissions and
    16  // limitations under the License.
    17  
    18  // Utilities.
    19  package resctrl
    20  
    21  import (
    22  	"bufio"
    23  	"bytes"
    24  	"fmt"
    25  	"os"
    26  	"path/filepath"
    27  	"strconv"
    28  	"strings"
    29  
    30  	"github.com/opencontainers/runc/libcontainer/cgroups"
    31  	"github.com/opencontainers/runc/libcontainer/cgroups/fs2"
    32  	"github.com/opencontainers/runc/libcontainer/intelrdt"
    33  )
    34  
    35  const (
    36  	cpuCgroup                = "cpu"
    37  	rootContainer            = "/"
    38  	monitoringGroupDir       = "mon_groups"
    39  	processTask              = "task"
    40  	cpusFileName             = "cpus"
    41  	cpusListFileName         = "cpus_list"
    42  	schemataFileName         = "schemata"
    43  	tasksFileName            = "tasks"
    44  	modeFileName             = "mode"
    45  	sizeFileName             = "size"
    46  	infoDirName              = "info"
    47  	monDataDirName           = "mon_data"
    48  	monGroupsDirName         = "mon_groups"
    49  	noPidsPassedError        = "there are no pids passed"
    50  	noContainerNameError     = "there are no container name passed"
    51  	noControlGroupFoundError = "couldn't find control group matching container"
    52  	llcOccupancyFileName     = "llc_occupancy"
    53  	mbmLocalBytesFileName    = "mbm_local_bytes"
    54  	mbmTotalBytesFileName    = "mbm_total_bytes"
    55  	containerPrefix          = '/'
    56  	minContainerNameLen      = 2 // "/<container_name>" e.g. "/a"
    57  	unavailable              = "Unavailable"
    58  	monGroupPrefix           = "cadvisor"
    59  )
    60  
    61  var (
    62  	rootResctrl          = ""
    63  	pidsPath             = ""
    64  	processPath          = "/proc"
    65  	enabledMBM           = false
    66  	enabledCMT           = false
    67  	isResctrlInitialized = false
    68  	groupDirectories     = map[string]struct{}{
    69  		cpusFileName:     {},
    70  		cpusListFileName: {},
    71  		infoDirName:      {},
    72  		monDataDirName:   {},
    73  		monGroupsDirName: {},
    74  		schemataFileName: {},
    75  		tasksFileName:    {},
    76  		modeFileName:     {},
    77  		sizeFileName:     {},
    78  	}
    79  )
    80  
    81  func Setup() error {
    82  	var err error
    83  	rootResctrl, err = intelrdt.Root()
    84  	if err != nil {
    85  		return fmt.Errorf("unable to initialize resctrl: %v", err)
    86  	}
    87  
    88  	if cgroups.IsCgroup2UnifiedMode() {
    89  		pidsPath = fs2.UnifiedMountpoint
    90  	} else {
    91  		pidsPath = filepath.Join(fs2.UnifiedMountpoint, cpuCgroup)
    92  	}
    93  
    94  	enabledMBM = intelrdt.IsMBMEnabled()
    95  	enabledCMT = intelrdt.IsCMTEnabled()
    96  
    97  	isResctrlInitialized = true
    98  
    99  	return nil
   100  }
   101  
   102  func prepareMonitoringGroup(containerName string, getContainerPids func() ([]string, error), inHostNamespace bool) (string, error) {
   103  	if containerName == rootContainer {
   104  		return rootResctrl, nil
   105  	}
   106  
   107  	pids, err := getContainerPids()
   108  	if err != nil {
   109  		return "", err
   110  	}
   111  
   112  	if len(pids) == 0 {
   113  		return "", fmt.Errorf("couldn't obtain %q container pids: there is no pids in cgroup", containerName)
   114  	}
   115  
   116  	// Firstly, find the control group to which the container belongs.
   117  	// Consider the root group.
   118  	controlGroupPath, err := findGroup(rootResctrl, pids, true, false)
   119  	if err != nil {
   120  		return "", fmt.Errorf("%q %q: %q", noControlGroupFoundError, containerName, err)
   121  	}
   122  	if controlGroupPath == "" {
   123  		return "", fmt.Errorf("%q %q", noControlGroupFoundError, containerName)
   124  	}
   125  
   126  	// Check if there is any monitoring group.
   127  	monGroupPath, err := findGroup(filepath.Join(controlGroupPath, monGroupsDirName), pids, false, true)
   128  	if err != nil {
   129  		return "", fmt.Errorf("couldn't find monitoring group matching %q container: %v", containerName, err)
   130  	}
   131  
   132  	// Prepare new one if not exists.
   133  	if monGroupPath == "" {
   134  		// Remove leading prefix.
   135  		// e.g. /my/container -> my/container
   136  		if len(containerName) >= minContainerNameLen && containerName[0] == containerPrefix {
   137  			containerName = containerName[1:]
   138  		}
   139  
   140  		// Add own prefix and use `-` instead `/`.
   141  		// e.g. my/container -> cadvisor-my-container
   142  		properContainerName := fmt.Sprintf("%s-%s", monGroupPrefix, strings.Replace(containerName, "/", "-", -1))
   143  		monGroupPath = filepath.Join(controlGroupPath, monitoringGroupDir, properContainerName)
   144  
   145  		err = os.MkdirAll(monGroupPath, os.ModePerm)
   146  		if err != nil {
   147  			return "", fmt.Errorf("couldn't create monitoring group directory for %q container: %w", containerName, err)
   148  		}
   149  
   150  		if !inHostNamespace {
   151  			processPath = "/rootfs/proc"
   152  		}
   153  
   154  		for _, pid := range pids {
   155  			processThreads, err := getAllProcessThreads(filepath.Join(processPath, pid, processTask))
   156  			if err != nil {
   157  				return "", err
   158  			}
   159  			for _, thread := range processThreads {
   160  				err = intelrdt.WriteIntelRdtTasks(monGroupPath, thread)
   161  				if err != nil {
   162  					secondError := os.Remove(monGroupPath)
   163  					if secondError != nil {
   164  						return "", fmt.Errorf(
   165  							"coudn't assign pids to %q container monitoring group: %w \n couldn't clear %q monitoring group: %v",
   166  							containerName, err, containerName, secondError)
   167  					}
   168  					return "", fmt.Errorf("coudn't assign pids to %q container monitoring group: %w", containerName, err)
   169  				}
   170  			}
   171  		}
   172  	}
   173  
   174  	return monGroupPath, nil
   175  }
   176  
   177  func getPids(containerName string) ([]int, error) {
   178  	if len(containerName) == 0 {
   179  		// No container name passed.
   180  		return nil, fmt.Errorf(noContainerNameError)
   181  	}
   182  	pids, err := cgroups.GetAllPids(filepath.Join(pidsPath, containerName))
   183  	if err != nil {
   184  		return nil, fmt.Errorf("couldn't obtain pids for %q container: %v", containerName, err)
   185  	}
   186  	return pids, nil
   187  }
   188  
   189  // getAllProcessThreads obtains all available processes from directory.
   190  // e.g. ls /proc/4215/task/ -> 4215, 4216, 4217, 4218
   191  // func will return [4215, 4216, 4217, 4218].
   192  func getAllProcessThreads(path string) ([]int, error) {
   193  	processThreads := make([]int, 0)
   194  
   195  	threadDirs, err := os.ReadDir(path)
   196  	if err != nil {
   197  		return processThreads, err
   198  	}
   199  
   200  	for _, dir := range threadDirs {
   201  		pid, err := strconv.Atoi(dir.Name())
   202  		if err != nil {
   203  			return nil, fmt.Errorf("couldn't parse %q dir: %v", dir.Name(), err)
   204  		}
   205  		processThreads = append(processThreads, pid)
   206  	}
   207  
   208  	return processThreads, nil
   209  }
   210  
   211  // findGroup returns the path of a control/monitoring group in which the pids are.
   212  func findGroup(group string, pids []string, includeGroup bool, exclusive bool) (string, error) {
   213  	if len(pids) == 0 {
   214  		return "", fmt.Errorf(noPidsPassedError)
   215  	}
   216  
   217  	availablePaths := make([]string, 0)
   218  	if includeGroup {
   219  		availablePaths = append(availablePaths, group)
   220  	}
   221  
   222  	files, err := os.ReadDir(group)
   223  	for _, file := range files {
   224  		if _, ok := groupDirectories[file.Name()]; !ok {
   225  			availablePaths = append(availablePaths, filepath.Join(group, file.Name()))
   226  		}
   227  	}
   228  	if err != nil {
   229  		return "", fmt.Errorf("couldn't obtain groups paths: %w", err)
   230  	}
   231  
   232  	for _, path := range availablePaths {
   233  		groupFound, err := arePIDsInGroup(path, pids, exclusive)
   234  		if err != nil {
   235  			return "", err
   236  		}
   237  		if groupFound {
   238  			return path, nil
   239  		}
   240  	}
   241  
   242  	return "", nil
   243  }
   244  
   245  // arePIDsInGroup returns true if all of the pids are within control group.
   246  func arePIDsInGroup(path string, pids []string, exclusive bool) (bool, error) {
   247  	if len(pids) == 0 {
   248  		return false, fmt.Errorf("couldn't obtain pids from %q path: %v", path, noPidsPassedError)
   249  	}
   250  
   251  	tasks, err := readTasksFile(filepath.Join(path, tasksFileName))
   252  	if err != nil {
   253  		return false, err
   254  	}
   255  
   256  	any := false
   257  	for _, pid := range pids {
   258  		_, ok := tasks[pid]
   259  		if !ok {
   260  			// There are missing pids within group.
   261  			if any {
   262  				return false, fmt.Errorf("there should be all pids in group")
   263  			}
   264  			return false, nil
   265  		}
   266  		any = true
   267  	}
   268  
   269  	// Check if there should be only passed pids in group.
   270  	if exclusive {
   271  		if len(tasks) != len(pids) {
   272  			return false, fmt.Errorf("group should have container pids only")
   273  		}
   274  	}
   275  
   276  	return true, nil
   277  }
   278  
   279  // readTasksFile returns pids map from given tasks path.
   280  func readTasksFile(tasksPath string) (map[string]struct{}, error) {
   281  	tasks := make(map[string]struct{})
   282  
   283  	tasksFile, err := os.Open(tasksPath)
   284  	if err != nil {
   285  		return tasks, fmt.Errorf("couldn't read tasks file from %q path: %w", tasksPath, err)
   286  	}
   287  	defer tasksFile.Close()
   288  
   289  	scanner := bufio.NewScanner(tasksFile)
   290  	for scanner.Scan() {
   291  		tasks[scanner.Text()] = struct{}{}
   292  	}
   293  
   294  	if err := scanner.Err(); err != nil {
   295  		return tasks, fmt.Errorf("couldn't obtain pids from %q path: %w", tasksPath, err)
   296  	}
   297  
   298  	return tasks, nil
   299  }
   300  
   301  func readStatFrom(path string, vendorID string) (uint64, error) {
   302  	context, err := os.ReadFile(path)
   303  	if err != nil {
   304  		return 0, err
   305  	}
   306  
   307  	contextString := string(bytes.TrimSpace(context))
   308  
   309  	if contextString == unavailable {
   310  		err := fmt.Errorf("\"Unavailable\" value from file %q", path)
   311  		if vendorID == "AuthenticAMD" {
   312  			kernelBugzillaLink := "https://bugzilla.kernel.org/show_bug.cgi?id=213311"
   313  			err = fmt.Errorf("%v, possible bug: %q", err, kernelBugzillaLink)
   314  		}
   315  		return 0, err
   316  	}
   317  
   318  	stat, err := strconv.ParseUint(contextString, 10, 64)
   319  	if err != nil {
   320  		return stat, fmt.Errorf("unable to parse %q as a uint from file %q", string(context), path)
   321  	}
   322  
   323  	return stat, nil
   324  }
   325  
   326  func getIntelRDTStatsFrom(path string, vendorID string) (intelrdt.Stats, error) {
   327  	stats := intelrdt.Stats{}
   328  
   329  	statsDirectories, err := filepath.Glob(filepath.Join(path, monDataDirName, "*"))
   330  	if err != nil {
   331  		return stats, err
   332  	}
   333  
   334  	if len(statsDirectories) == 0 {
   335  		return stats, fmt.Errorf("there is no mon_data stats directories: %q", path)
   336  	}
   337  
   338  	var cmtStats []intelrdt.CMTNumaNodeStats
   339  	var mbmStats []intelrdt.MBMNumaNodeStats
   340  
   341  	for _, dir := range statsDirectories {
   342  		if enabledCMT {
   343  			llcOccupancy, err := readStatFrom(filepath.Join(dir, llcOccupancyFileName), vendorID)
   344  			if err != nil {
   345  				return stats, err
   346  			}
   347  			cmtStats = append(cmtStats, intelrdt.CMTNumaNodeStats{LLCOccupancy: llcOccupancy})
   348  		}
   349  		if enabledMBM {
   350  			mbmTotalBytes, err := readStatFrom(filepath.Join(dir, mbmTotalBytesFileName), vendorID)
   351  			if err != nil {
   352  				return stats, err
   353  			}
   354  			mbmLocalBytes, err := readStatFrom(filepath.Join(dir, mbmLocalBytesFileName), vendorID)
   355  			if err != nil {
   356  				return stats, err
   357  			}
   358  			mbmStats = append(mbmStats, intelrdt.MBMNumaNodeStats{
   359  				MBMTotalBytes: mbmTotalBytes,
   360  				MBMLocalBytes: mbmLocalBytes,
   361  			})
   362  		}
   363  	}
   364  
   365  	stats.CMTStats = &cmtStats
   366  	stats.MBMStats = &mbmStats
   367  
   368  	return stats, nil
   369  }