github.com/google/cadvisor@v0.49.1/resctrl/utils.go (about) 1 //go:build linux 2 // +build linux 3 4 // Copyright 2021 Google Inc. All Rights Reserved. 5 // 6 // Licensed under the Apache License, Version 2.0 (the "License"); 7 // you may not use this file except in compliance with the License. 8 // You may obtain a copy of the License at 9 // 10 // http://www.apache.org/licenses/LICENSE-2.0 11 // 12 // Unless required by applicable law or agreed to in writing, software 13 // distributed under the License is distributed on an "AS IS" BASIS, 14 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 // See the License for the specific language governing permissions and 16 // limitations under the License. 17 18 // Utilities. 19 package resctrl 20 21 import ( 22 "bufio" 23 "bytes" 24 "fmt" 25 "os" 26 "path/filepath" 27 "strconv" 28 "strings" 29 30 "github.com/opencontainers/runc/libcontainer/cgroups" 31 "github.com/opencontainers/runc/libcontainer/cgroups/fs2" 32 "github.com/opencontainers/runc/libcontainer/intelrdt" 33 ) 34 35 const ( 36 cpuCgroup = "cpu" 37 rootContainer = "/" 38 monitoringGroupDir = "mon_groups" 39 processTask = "task" 40 cpusFileName = "cpus" 41 cpusListFileName = "cpus_list" 42 schemataFileName = "schemata" 43 tasksFileName = "tasks" 44 modeFileName = "mode" 45 sizeFileName = "size" 46 infoDirName = "info" 47 monDataDirName = "mon_data" 48 monGroupsDirName = "mon_groups" 49 noPidsPassedError = "there are no pids passed" 50 noContainerNameError = "there are no container name passed" 51 noControlGroupFoundError = "couldn't find control group matching container" 52 llcOccupancyFileName = "llc_occupancy" 53 mbmLocalBytesFileName = "mbm_local_bytes" 54 mbmTotalBytesFileName = "mbm_total_bytes" 55 containerPrefix = '/' 56 minContainerNameLen = 2 // "/<container_name>" e.g. "/a" 57 unavailable = "Unavailable" 58 monGroupPrefix = "cadvisor" 59 ) 60 61 var ( 62 rootResctrl = "" 63 pidsPath = "" 64 processPath = "/proc" 65 enabledMBM = false 66 enabledCMT = false 67 isResctrlInitialized = false 68 groupDirectories = map[string]struct{}{ 69 cpusFileName: {}, 70 cpusListFileName: {}, 71 infoDirName: {}, 72 monDataDirName: {}, 73 monGroupsDirName: {}, 74 schemataFileName: {}, 75 tasksFileName: {}, 76 modeFileName: {}, 77 sizeFileName: {}, 78 } 79 ) 80 81 func Setup() error { 82 var err error 83 rootResctrl, err = intelrdt.Root() 84 if err != nil { 85 return fmt.Errorf("unable to initialize resctrl: %v", err) 86 } 87 88 if cgroups.IsCgroup2UnifiedMode() { 89 pidsPath = fs2.UnifiedMountpoint 90 } else { 91 pidsPath = filepath.Join(fs2.UnifiedMountpoint, cpuCgroup) 92 } 93 94 enabledMBM = intelrdt.IsMBMEnabled() 95 enabledCMT = intelrdt.IsCMTEnabled() 96 97 isResctrlInitialized = true 98 99 return nil 100 } 101 102 func prepareMonitoringGroup(containerName string, getContainerPids func() ([]string, error), inHostNamespace bool) (string, error) { 103 if containerName == rootContainer { 104 return rootResctrl, nil 105 } 106 107 pids, err := getContainerPids() 108 if err != nil { 109 return "", err 110 } 111 112 if len(pids) == 0 { 113 return "", fmt.Errorf("couldn't obtain %q container pids: there is no pids in cgroup", containerName) 114 } 115 116 // Firstly, find the control group to which the container belongs. 117 // Consider the root group. 118 controlGroupPath, err := findGroup(rootResctrl, pids, true, false) 119 if err != nil { 120 return "", fmt.Errorf("%q %q: %q", noControlGroupFoundError, containerName, err) 121 } 122 if controlGroupPath == "" { 123 return "", fmt.Errorf("%q %q", noControlGroupFoundError, containerName) 124 } 125 126 // Check if there is any monitoring group. 127 monGroupPath, err := findGroup(filepath.Join(controlGroupPath, monGroupsDirName), pids, false, true) 128 if err != nil { 129 return "", fmt.Errorf("couldn't find monitoring group matching %q container: %v", containerName, err) 130 } 131 132 // Prepare new one if not exists. 133 if monGroupPath == "" { 134 // Remove leading prefix. 135 // e.g. /my/container -> my/container 136 if len(containerName) >= minContainerNameLen && containerName[0] == containerPrefix { 137 containerName = containerName[1:] 138 } 139 140 // Add own prefix and use `-` instead `/`. 141 // e.g. my/container -> cadvisor-my-container 142 properContainerName := fmt.Sprintf("%s-%s", monGroupPrefix, strings.Replace(containerName, "/", "-", -1)) 143 monGroupPath = filepath.Join(controlGroupPath, monitoringGroupDir, properContainerName) 144 145 err = os.MkdirAll(monGroupPath, os.ModePerm) 146 if err != nil { 147 return "", fmt.Errorf("couldn't create monitoring group directory for %q container: %w", containerName, err) 148 } 149 150 if !inHostNamespace { 151 processPath = "/rootfs/proc" 152 } 153 154 for _, pid := range pids { 155 processThreads, err := getAllProcessThreads(filepath.Join(processPath, pid, processTask)) 156 if err != nil { 157 return "", err 158 } 159 for _, thread := range processThreads { 160 err = intelrdt.WriteIntelRdtTasks(monGroupPath, thread) 161 if err != nil { 162 secondError := os.Remove(monGroupPath) 163 if secondError != nil { 164 return "", fmt.Errorf( 165 "coudn't assign pids to %q container monitoring group: %w \n couldn't clear %q monitoring group: %v", 166 containerName, err, containerName, secondError) 167 } 168 return "", fmt.Errorf("coudn't assign pids to %q container monitoring group: %w", containerName, err) 169 } 170 } 171 } 172 } 173 174 return monGroupPath, nil 175 } 176 177 func getPids(containerName string) ([]int, error) { 178 if len(containerName) == 0 { 179 // No container name passed. 180 return nil, fmt.Errorf(noContainerNameError) 181 } 182 pids, err := cgroups.GetAllPids(filepath.Join(pidsPath, containerName)) 183 if err != nil { 184 return nil, fmt.Errorf("couldn't obtain pids for %q container: %v", containerName, err) 185 } 186 return pids, nil 187 } 188 189 // getAllProcessThreads obtains all available processes from directory. 190 // e.g. ls /proc/4215/task/ -> 4215, 4216, 4217, 4218 191 // func will return [4215, 4216, 4217, 4218]. 192 func getAllProcessThreads(path string) ([]int, error) { 193 processThreads := make([]int, 0) 194 195 threadDirs, err := os.ReadDir(path) 196 if err != nil { 197 return processThreads, err 198 } 199 200 for _, dir := range threadDirs { 201 pid, err := strconv.Atoi(dir.Name()) 202 if err != nil { 203 return nil, fmt.Errorf("couldn't parse %q dir: %v", dir.Name(), err) 204 } 205 processThreads = append(processThreads, pid) 206 } 207 208 return processThreads, nil 209 } 210 211 // findGroup returns the path of a control/monitoring group in which the pids are. 212 func findGroup(group string, pids []string, includeGroup bool, exclusive bool) (string, error) { 213 if len(pids) == 0 { 214 return "", fmt.Errorf(noPidsPassedError) 215 } 216 217 availablePaths := make([]string, 0) 218 if includeGroup { 219 availablePaths = append(availablePaths, group) 220 } 221 222 files, err := os.ReadDir(group) 223 for _, file := range files { 224 if _, ok := groupDirectories[file.Name()]; !ok { 225 availablePaths = append(availablePaths, filepath.Join(group, file.Name())) 226 } 227 } 228 if err != nil { 229 return "", fmt.Errorf("couldn't obtain groups paths: %w", err) 230 } 231 232 for _, path := range availablePaths { 233 groupFound, err := arePIDsInGroup(path, pids, exclusive) 234 if err != nil { 235 return "", err 236 } 237 if groupFound { 238 return path, nil 239 } 240 } 241 242 return "", nil 243 } 244 245 // arePIDsInGroup returns true if all of the pids are within control group. 246 func arePIDsInGroup(path string, pids []string, exclusive bool) (bool, error) { 247 if len(pids) == 0 { 248 return false, fmt.Errorf("couldn't obtain pids from %q path: %v", path, noPidsPassedError) 249 } 250 251 tasks, err := readTasksFile(filepath.Join(path, tasksFileName)) 252 if err != nil { 253 return false, err 254 } 255 256 any := false 257 for _, pid := range pids { 258 _, ok := tasks[pid] 259 if !ok { 260 // There are missing pids within group. 261 if any { 262 return false, fmt.Errorf("there should be all pids in group") 263 } 264 return false, nil 265 } 266 any = true 267 } 268 269 // Check if there should be only passed pids in group. 270 if exclusive { 271 if len(tasks) != len(pids) { 272 return false, fmt.Errorf("group should have container pids only") 273 } 274 } 275 276 return true, nil 277 } 278 279 // readTasksFile returns pids map from given tasks path. 280 func readTasksFile(tasksPath string) (map[string]struct{}, error) { 281 tasks := make(map[string]struct{}) 282 283 tasksFile, err := os.Open(tasksPath) 284 if err != nil { 285 return tasks, fmt.Errorf("couldn't read tasks file from %q path: %w", tasksPath, err) 286 } 287 defer tasksFile.Close() 288 289 scanner := bufio.NewScanner(tasksFile) 290 for scanner.Scan() { 291 tasks[scanner.Text()] = struct{}{} 292 } 293 294 if err := scanner.Err(); err != nil { 295 return tasks, fmt.Errorf("couldn't obtain pids from %q path: %w", tasksPath, err) 296 } 297 298 return tasks, nil 299 } 300 301 func readStatFrom(path string, vendorID string) (uint64, error) { 302 context, err := os.ReadFile(path) 303 if err != nil { 304 return 0, err 305 } 306 307 contextString := string(bytes.TrimSpace(context)) 308 309 if contextString == unavailable { 310 err := fmt.Errorf("\"Unavailable\" value from file %q", path) 311 if vendorID == "AuthenticAMD" { 312 kernelBugzillaLink := "https://bugzilla.kernel.org/show_bug.cgi?id=213311" 313 err = fmt.Errorf("%v, possible bug: %q", err, kernelBugzillaLink) 314 } 315 return 0, err 316 } 317 318 stat, err := strconv.ParseUint(contextString, 10, 64) 319 if err != nil { 320 return stat, fmt.Errorf("unable to parse %q as a uint from file %q", string(context), path) 321 } 322 323 return stat, nil 324 } 325 326 func getIntelRDTStatsFrom(path string, vendorID string) (intelrdt.Stats, error) { 327 stats := intelrdt.Stats{} 328 329 statsDirectories, err := filepath.Glob(filepath.Join(path, monDataDirName, "*")) 330 if err != nil { 331 return stats, err 332 } 333 334 if len(statsDirectories) == 0 { 335 return stats, fmt.Errorf("there is no mon_data stats directories: %q", path) 336 } 337 338 var cmtStats []intelrdt.CMTNumaNodeStats 339 var mbmStats []intelrdt.MBMNumaNodeStats 340 341 for _, dir := range statsDirectories { 342 if enabledCMT { 343 llcOccupancy, err := readStatFrom(filepath.Join(dir, llcOccupancyFileName), vendorID) 344 if err != nil { 345 return stats, err 346 } 347 cmtStats = append(cmtStats, intelrdt.CMTNumaNodeStats{LLCOccupancy: llcOccupancy}) 348 } 349 if enabledMBM { 350 mbmTotalBytes, err := readStatFrom(filepath.Join(dir, mbmTotalBytesFileName), vendorID) 351 if err != nil { 352 return stats, err 353 } 354 mbmLocalBytes, err := readStatFrom(filepath.Join(dir, mbmLocalBytesFileName), vendorID) 355 if err != nil { 356 return stats, err 357 } 358 mbmStats = append(mbmStats, intelrdt.MBMNumaNodeStats{ 359 MBMTotalBytes: mbmTotalBytes, 360 MBMLocalBytes: mbmLocalBytes, 361 }) 362 } 363 } 364 365 stats.CMTStats = &cmtStats 366 stats.MBMStats = &mbmStats 367 368 return stats, nil 369 }