k8s.io/kubernetes@v1.29.3/pkg/kubelet/cm/cgroup_manager_linux.go (about) 1 /* 2 Copyright 2016 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package cm 18 19 import ( 20 "errors" 21 "fmt" 22 "os" 23 "path" 24 "path/filepath" 25 "strconv" 26 "strings" 27 "sync" 28 "time" 29 30 libcontainercgroups "github.com/opencontainers/runc/libcontainer/cgroups" 31 "github.com/opencontainers/runc/libcontainer/cgroups/fscommon" 32 "github.com/opencontainers/runc/libcontainer/cgroups/manager" 33 cgroupsystemd "github.com/opencontainers/runc/libcontainer/cgroups/systemd" 34 libcontainerconfigs "github.com/opencontainers/runc/libcontainer/configs" 35 v1 "k8s.io/api/core/v1" 36 "k8s.io/klog/v2" 37 v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper" 38 39 utilruntime "k8s.io/apimachinery/pkg/util/runtime" 40 "k8s.io/apimachinery/pkg/util/sets" 41 cmutil "k8s.io/kubernetes/pkg/kubelet/cm/util" 42 "k8s.io/kubernetes/pkg/kubelet/metrics" 43 ) 44 45 const ( 46 // systemdSuffix is the cgroup name suffix for systemd 47 systemdSuffix string = ".slice" 48 // Cgroup2MemoryMin is memory.min for cgroup v2 49 Cgroup2MemoryMin string = "memory.min" 50 // Cgroup2MemoryHigh is memory.high for cgroup v2 51 Cgroup2MemoryHigh string = "memory.high" 52 Cgroup2MaxCpuLimit string = "max" 53 Cgroup2MaxSwapFilename string = "memory.swap.max" 54 ) 55 56 var RootCgroupName = CgroupName([]string{}) 57 58 // NewCgroupName composes a new cgroup name. 59 // Use RootCgroupName as base to start at the root. 60 // This function does some basic check for invalid characters at the name. 61 func NewCgroupName(base CgroupName, components ...string) CgroupName { 62 for _, component := range components { 63 // Forbit using "_" in internal names. When remapping internal 64 // names to systemd cgroup driver, we want to remap "-" => "_", 65 // so we forbid "_" so that we can always reverse the mapping. 66 if strings.Contains(component, "/") || strings.Contains(component, "_") { 67 panic(fmt.Errorf("invalid character in component [%q] of CgroupName", component)) 68 } 69 } 70 return CgroupName(append(append([]string{}, base...), components...)) 71 } 72 73 func escapeSystemdCgroupName(part string) string { 74 return strings.Replace(part, "-", "_", -1) 75 } 76 77 func unescapeSystemdCgroupName(part string) string { 78 return strings.Replace(part, "_", "-", -1) 79 } 80 81 // cgroupName.ToSystemd converts the internal cgroup name to a systemd name. 82 // For example, the name {"kubepods", "burstable", "pod1234-abcd-5678-efgh"} becomes 83 // "/kubepods.slice/kubepods-burstable.slice/kubepods-burstable-pod1234_abcd_5678_efgh.slice" 84 // This function always expands the systemd name into the cgroupfs form. If only 85 // the last part is needed, use path.Base(...) on it to discard the rest. 86 func (cgroupName CgroupName) ToSystemd() string { 87 if len(cgroupName) == 0 || (len(cgroupName) == 1 && cgroupName[0] == "") { 88 return "/" 89 } 90 newparts := []string{} 91 for _, part := range cgroupName { 92 part = escapeSystemdCgroupName(part) 93 newparts = append(newparts, part) 94 } 95 96 result, err := cgroupsystemd.ExpandSlice(strings.Join(newparts, "-") + systemdSuffix) 97 if err != nil { 98 // Should never happen... 99 panic(fmt.Errorf("error converting cgroup name [%v] to systemd format: %v", cgroupName, err)) 100 } 101 return result 102 } 103 104 func ParseSystemdToCgroupName(name string) CgroupName { 105 driverName := path.Base(name) 106 driverName = strings.TrimSuffix(driverName, systemdSuffix) 107 parts := strings.Split(driverName, "-") 108 result := []string{} 109 for _, part := range parts { 110 result = append(result, unescapeSystemdCgroupName(part)) 111 } 112 return CgroupName(result) 113 } 114 115 func (cgroupName CgroupName) ToCgroupfs() string { 116 return "/" + path.Join(cgroupName...) 117 } 118 119 func ParseCgroupfsToCgroupName(name string) CgroupName { 120 components := strings.Split(strings.TrimPrefix(name, "/"), "/") 121 if len(components) == 1 && components[0] == "" { 122 components = []string{} 123 } 124 return CgroupName(components) 125 } 126 127 func IsSystemdStyleName(name string) bool { 128 return strings.HasSuffix(name, systemdSuffix) 129 } 130 131 // CgroupSubsystems holds information about the mounted cgroup subsystems 132 type CgroupSubsystems struct { 133 // Cgroup subsystem mounts. 134 // e.g.: "/sys/fs/cgroup/cpu" -> ["cpu", "cpuacct"] 135 Mounts []libcontainercgroups.Mount 136 137 // Cgroup subsystem to their mount location. 138 // e.g.: "cpu" -> "/sys/fs/cgroup/cpu" 139 MountPoints map[string]string 140 } 141 142 // cgroupManagerImpl implements the CgroupManager interface. 143 // Its a stateless object which can be used to 144 // update,create or delete any number of cgroups 145 // It relies on runc/libcontainer cgroup managers. 146 type cgroupManagerImpl struct { 147 // subsystems holds information about all the 148 // mounted cgroup subsystems on the node 149 subsystems *CgroupSubsystems 150 151 // useSystemd tells if systemd cgroup manager should be used. 152 useSystemd bool 153 } 154 155 // Make sure that cgroupManagerImpl implements the CgroupManager interface 156 var _ CgroupManager = &cgroupManagerImpl{} 157 158 // NewCgroupManager is a factory method that returns a CgroupManager 159 func NewCgroupManager(cs *CgroupSubsystems, cgroupDriver string) CgroupManager { 160 return &cgroupManagerImpl{ 161 subsystems: cs, 162 useSystemd: cgroupDriver == "systemd", 163 } 164 } 165 166 // Name converts the cgroup to the driver specific value in cgroupfs form. 167 // This always returns a valid cgroupfs path even when systemd driver is in use! 168 func (m *cgroupManagerImpl) Name(name CgroupName) string { 169 if m.useSystemd { 170 return name.ToSystemd() 171 } 172 return name.ToCgroupfs() 173 } 174 175 // CgroupName converts the literal cgroupfs name on the host to an internal identifier. 176 func (m *cgroupManagerImpl) CgroupName(name string) CgroupName { 177 if m.useSystemd { 178 return ParseSystemdToCgroupName(name) 179 } 180 return ParseCgroupfsToCgroupName(name) 181 } 182 183 // buildCgroupPaths builds a path to each cgroup subsystem for the specified name. 184 func (m *cgroupManagerImpl) buildCgroupPaths(name CgroupName) map[string]string { 185 cgroupFsAdaptedName := m.Name(name) 186 cgroupPaths := make(map[string]string, len(m.subsystems.MountPoints)) 187 for key, val := range m.subsystems.MountPoints { 188 cgroupPaths[key] = path.Join(val, cgroupFsAdaptedName) 189 } 190 return cgroupPaths 191 } 192 193 // buildCgroupUnifiedPath builds a path to the specified name. 194 func (m *cgroupManagerImpl) buildCgroupUnifiedPath(name CgroupName) string { 195 cgroupFsAdaptedName := m.Name(name) 196 return path.Join(cmutil.CgroupRoot, cgroupFsAdaptedName) 197 } 198 199 // libctCgroupConfig converts CgroupConfig to libcontainer's Cgroup config. 200 func (m *cgroupManagerImpl) libctCgroupConfig(in *CgroupConfig, needResources bool) *libcontainerconfigs.Cgroup { 201 config := &libcontainerconfigs.Cgroup{ 202 Systemd: m.useSystemd, 203 } 204 if needResources { 205 config.Resources = m.toResources(in.ResourceParameters) 206 } else { 207 config.Resources = &libcontainerconfigs.Resources{} 208 } 209 210 if !config.Systemd { 211 // For fs cgroup manager, we can either set Path or Name and Parent. 212 // Setting Path is easier. 213 config.Path = in.Name.ToCgroupfs() 214 215 return config 216 } 217 218 // For systemd, we have to set Name and Parent, as they are needed to talk to systemd. 219 // Setting Path is optional as it can be deduced from Name and Parent. 220 221 // TODO(filbranden): This logic belongs in libcontainer/cgroup/systemd instead. 222 // It should take a libcontainerconfigs.Cgroup.Path field (rather than Name and Parent) 223 // and split it appropriately, using essentially the logic below. 224 // This was done for cgroupfs in opencontainers/runc#497 but a counterpart 225 // for systemd was never introduced. 226 dir, base := path.Split(in.Name.ToSystemd()) 227 if dir == "/" { 228 dir = "-.slice" 229 } else { 230 dir = path.Base(dir) 231 } 232 config.Parent = dir 233 config.Name = base 234 235 return config 236 } 237 238 // Validate checks if all subsystem cgroups already exist 239 func (m *cgroupManagerImpl) Validate(name CgroupName) error { 240 if libcontainercgroups.IsCgroup2UnifiedMode() { 241 cgroupPath := m.buildCgroupUnifiedPath(name) 242 neededControllers := getSupportedUnifiedControllers() 243 enabledControllers, err := readUnifiedControllers(cgroupPath) 244 if err != nil { 245 return fmt.Errorf("could not read controllers for cgroup %q: %w", name, err) 246 } 247 difference := neededControllers.Difference(enabledControllers) 248 if difference.Len() > 0 { 249 return fmt.Errorf("cgroup %q has some missing controllers: %v", name, strings.Join(sets.List(difference), ", ")) 250 } 251 return nil // valid V2 cgroup 252 } 253 254 // Get map of all cgroup paths on the system for the particular cgroup 255 cgroupPaths := m.buildCgroupPaths(name) 256 257 // the presence of alternative control groups not known to runc confuses 258 // the kubelet existence checks. 259 // ideally, we would have a mechanism in runc to support Exists() logic 260 // scoped to the set control groups it understands. this is being discussed 261 // in https://github.com/opencontainers/runc/issues/1440 262 // once resolved, we can remove this code. 263 allowlistControllers := sets.New[string]("cpu", "cpuacct", "cpuset", "memory", "systemd", "pids") 264 265 if _, ok := m.subsystems.MountPoints["hugetlb"]; ok { 266 allowlistControllers.Insert("hugetlb") 267 } 268 var missingPaths []string 269 // If even one cgroup path doesn't exist, then the cgroup doesn't exist. 270 for controller, path := range cgroupPaths { 271 // ignore mounts we don't care about 272 if !allowlistControllers.Has(controller) { 273 continue 274 } 275 if !libcontainercgroups.PathExists(path) { 276 missingPaths = append(missingPaths, path) 277 } 278 } 279 280 if len(missingPaths) > 0 { 281 return fmt.Errorf("cgroup %q has some missing paths: %v", name, strings.Join(missingPaths, ", ")) 282 } 283 284 return nil // valid V1 cgroup 285 } 286 287 // Exists checks if all subsystem cgroups already exist 288 func (m *cgroupManagerImpl) Exists(name CgroupName) bool { 289 return m.Validate(name) == nil 290 } 291 292 // Destroy destroys the specified cgroup 293 func (m *cgroupManagerImpl) Destroy(cgroupConfig *CgroupConfig) error { 294 start := time.Now() 295 defer func() { 296 metrics.CgroupManagerDuration.WithLabelValues("destroy").Observe(metrics.SinceInSeconds(start)) 297 }() 298 299 libcontainerCgroupConfig := m.libctCgroupConfig(cgroupConfig, false) 300 manager, err := manager.New(libcontainerCgroupConfig) 301 if err != nil { 302 return err 303 } 304 305 // Delete cgroups using libcontainers Managers Destroy() method 306 if err = manager.Destroy(); err != nil { 307 return fmt.Errorf("unable to destroy cgroup paths for cgroup %v : %v", cgroupConfig.Name, err) 308 } 309 310 return nil 311 } 312 313 // getCPUWeight converts from the range [2, 262144] to [1, 10000] 314 func getCPUWeight(cpuShares *uint64) uint64 { 315 if cpuShares == nil { 316 return 0 317 } 318 if *cpuShares >= 262144 { 319 return 10000 320 } 321 return 1 + ((*cpuShares-2)*9999)/262142 322 } 323 324 // readUnifiedControllers reads the controllers available at the specified cgroup 325 func readUnifiedControllers(path string) (sets.Set[string], error) { 326 controllersFileContent, err := os.ReadFile(filepath.Join(path, "cgroup.controllers")) 327 if err != nil { 328 return nil, err 329 } 330 controllers := strings.Fields(string(controllersFileContent)) 331 return sets.New(controllers...), nil 332 } 333 334 var ( 335 availableRootControllersOnce sync.Once 336 availableRootControllers sets.Set[string] 337 ) 338 339 // getSupportedUnifiedControllers returns a set of supported controllers when running on cgroup v2 340 func getSupportedUnifiedControllers() sets.Set[string] { 341 // This is the set of controllers used by the Kubelet 342 supportedControllers := sets.New("cpu", "cpuset", "memory", "hugetlb", "pids") 343 // Memoize the set of controllers that are present in the root cgroup 344 availableRootControllersOnce.Do(func() { 345 var err error 346 availableRootControllers, err = readUnifiedControllers(cmutil.CgroupRoot) 347 if err != nil { 348 panic(fmt.Errorf("cannot read cgroup controllers at %s", cmutil.CgroupRoot)) 349 } 350 }) 351 // Return the set of controllers that are supported both by the Kubelet and by the kernel 352 return supportedControllers.Intersection(availableRootControllers) 353 } 354 355 func (m *cgroupManagerImpl) toResources(resourceConfig *ResourceConfig) *libcontainerconfigs.Resources { 356 resources := &libcontainerconfigs.Resources{ 357 SkipDevices: true, 358 SkipFreezeOnSet: true, 359 } 360 if resourceConfig == nil { 361 return resources 362 } 363 if resourceConfig.Memory != nil { 364 resources.Memory = *resourceConfig.Memory 365 } 366 if resourceConfig.CPUShares != nil { 367 if libcontainercgroups.IsCgroup2UnifiedMode() { 368 resources.CpuWeight = getCPUWeight(resourceConfig.CPUShares) 369 } else { 370 resources.CpuShares = *resourceConfig.CPUShares 371 } 372 } 373 if resourceConfig.CPUQuota != nil { 374 resources.CpuQuota = *resourceConfig.CPUQuota 375 } 376 if resourceConfig.CPUPeriod != nil { 377 resources.CpuPeriod = *resourceConfig.CPUPeriod 378 } 379 if resourceConfig.PidsLimit != nil { 380 resources.PidsLimit = *resourceConfig.PidsLimit 381 } 382 383 m.maybeSetHugetlb(resourceConfig, resources) 384 385 // Ideally unified is used for all the resources when running on cgroup v2. 386 // It doesn't make difference for the memory.max limit, but for e.g. the cpu controller 387 // you can specify the correct setting without relying on the conversions performed by the OCI runtime. 388 if resourceConfig.Unified != nil && libcontainercgroups.IsCgroup2UnifiedMode() { 389 resources.Unified = make(map[string]string) 390 for k, v := range resourceConfig.Unified { 391 resources.Unified[k] = v 392 } 393 } 394 return resources 395 } 396 397 func (m *cgroupManagerImpl) maybeSetHugetlb(resourceConfig *ResourceConfig, resources *libcontainerconfigs.Resources) { 398 // Check if hugetlb is supported. 399 if libcontainercgroups.IsCgroup2UnifiedMode() { 400 if !getSupportedUnifiedControllers().Has("hugetlb") { 401 klog.V(6).InfoS("Optional subsystem not supported: hugetlb") 402 return 403 } 404 } else if _, ok := m.subsystems.MountPoints["hugetlb"]; !ok { 405 klog.V(6).InfoS("Optional subsystem not supported: hugetlb") 406 return 407 } 408 409 // For each page size enumerated, set that value. 410 pageSizes := sets.New[string]() 411 for pageSize, limit := range resourceConfig.HugePageLimit { 412 sizeString, err := v1helper.HugePageUnitSizeFromByteSize(pageSize) 413 if err != nil { 414 klog.InfoS("Invalid pageSize", "err", err) 415 continue 416 } 417 resources.HugetlbLimit = append(resources.HugetlbLimit, &libcontainerconfigs.HugepageLimit{ 418 Pagesize: sizeString, 419 Limit: uint64(limit), 420 }) 421 pageSizes.Insert(sizeString) 422 } 423 // for each page size omitted, limit to 0 424 for _, pageSize := range libcontainercgroups.HugePageSizes() { 425 if pageSizes.Has(pageSize) { 426 continue 427 } 428 resources.HugetlbLimit = append(resources.HugetlbLimit, &libcontainerconfigs.HugepageLimit{ 429 Pagesize: pageSize, 430 Limit: uint64(0), 431 }) 432 } 433 } 434 435 // Update updates the cgroup with the specified Cgroup Configuration 436 func (m *cgroupManagerImpl) Update(cgroupConfig *CgroupConfig) error { 437 start := time.Now() 438 defer func() { 439 metrics.CgroupManagerDuration.WithLabelValues("update").Observe(metrics.SinceInSeconds(start)) 440 }() 441 442 libcontainerCgroupConfig := m.libctCgroupConfig(cgroupConfig, true) 443 manager, err := manager.New(libcontainerCgroupConfig) 444 if err != nil { 445 return fmt.Errorf("failed to create cgroup manager: %v", err) 446 } 447 return manager.Set(libcontainerCgroupConfig.Resources) 448 } 449 450 // Create creates the specified cgroup 451 func (m *cgroupManagerImpl) Create(cgroupConfig *CgroupConfig) error { 452 start := time.Now() 453 defer func() { 454 metrics.CgroupManagerDuration.WithLabelValues("create").Observe(metrics.SinceInSeconds(start)) 455 }() 456 457 libcontainerCgroupConfig := m.libctCgroupConfig(cgroupConfig, true) 458 manager, err := manager.New(libcontainerCgroupConfig) 459 if err != nil { 460 return err 461 } 462 463 // Apply(-1) is a hack to create the cgroup directories for each resource 464 // subsystem. The function [cgroups.Manager.apply()] applies cgroup 465 // configuration to the process with the specified pid. 466 // It creates cgroup files for each subsystems and writes the pid 467 // in the tasks file. We use the function to create all the required 468 // cgroup files but not attach any "real" pid to the cgroup. 469 if err := manager.Apply(-1); err != nil { 470 return err 471 } 472 473 // it may confuse why we call set after we do apply, but the issue is that runc 474 // follows a similar pattern. it's needed to ensure cpu quota is set properly. 475 if err := manager.Set(libcontainerCgroupConfig.Resources); err != nil { 476 utilruntime.HandleError(fmt.Errorf("cgroup manager.Set failed: %w", err)) 477 } 478 479 return nil 480 } 481 482 // Scans through all subsystems to find pids associated with specified cgroup. 483 func (m *cgroupManagerImpl) Pids(name CgroupName) []int { 484 // we need the driver specific name 485 cgroupFsName := m.Name(name) 486 487 // Get a list of processes that we need to kill 488 pidsToKill := sets.New[int]() 489 var pids []int 490 for _, val := range m.subsystems.MountPoints { 491 dir := path.Join(val, cgroupFsName) 492 _, err := os.Stat(dir) 493 if os.IsNotExist(err) { 494 // The subsystem pod cgroup is already deleted 495 // do nothing, continue 496 continue 497 } 498 // Get a list of pids that are still charged to the pod's cgroup 499 pids, err = getCgroupProcs(dir) 500 if err != nil { 501 continue 502 } 503 pidsToKill.Insert(pids...) 504 505 // WalkFunc which is called for each file and directory in the pod cgroup dir 506 visitor := func(path string, info os.FileInfo, err error) error { 507 if err != nil { 508 klog.V(4).InfoS("Cgroup manager encountered error scanning cgroup path", "path", path, "err", err) 509 return filepath.SkipDir 510 } 511 if !info.IsDir() { 512 return nil 513 } 514 pids, err = getCgroupProcs(path) 515 if err != nil { 516 klog.V(4).InfoS("Cgroup manager encountered error getting procs for cgroup path", "path", path, "err", err) 517 return filepath.SkipDir 518 } 519 pidsToKill.Insert(pids...) 520 return nil 521 } 522 // Walk through the pod cgroup directory to check if 523 // container cgroups haven't been GCed yet. Get attached processes to 524 // all such unwanted containers under the pod cgroup 525 if err = filepath.Walk(dir, visitor); err != nil { 526 klog.V(4).InfoS("Cgroup manager encountered error scanning pids for directory", "path", dir, "err", err) 527 } 528 } 529 return sets.List(pidsToKill) 530 } 531 532 // ReduceCPULimits reduces the cgroup's cpu shares to the lowest possible value 533 func (m *cgroupManagerImpl) ReduceCPULimits(cgroupName CgroupName) error { 534 // Set lowest possible CpuShares value for the cgroup 535 minimumCPUShares := uint64(MinShares) 536 resources := &ResourceConfig{ 537 CPUShares: &minimumCPUShares, 538 } 539 containerConfig := &CgroupConfig{ 540 Name: cgroupName, 541 ResourceParameters: resources, 542 } 543 return m.Update(containerConfig) 544 } 545 546 // MemoryUsage returns the current memory usage of the specified cgroup, 547 // as read from cgroupfs. 548 func (m *cgroupManagerImpl) MemoryUsage(name CgroupName) (int64, error) { 549 var path, file string 550 if libcontainercgroups.IsCgroup2UnifiedMode() { 551 path = m.buildCgroupUnifiedPath(name) 552 file = "memory.current" 553 } else { 554 mp, ok := m.subsystems.MountPoints["memory"] 555 if !ok { // should not happen 556 return -1, errors.New("no cgroup v1 mountpoint for memory controller found") 557 } 558 path = mp + "/" + m.Name(name) 559 file = "memory.usage_in_bytes" 560 } 561 val, err := fscommon.GetCgroupParamUint(path, file) 562 return int64(val), err 563 } 564 565 // Convert cgroup v1 cpu.shares value to cgroup v2 cpu.weight 566 // https://github.com/kubernetes/enhancements/tree/master/keps/sig-node/2254-cgroup-v2#phase-1-convert-from-cgroups-v1-settings-to-v2 567 func CpuSharesToCpuWeight(cpuShares uint64) uint64 { 568 return uint64((((cpuShares - 2) * 9999) / 262142) + 1) 569 } 570 571 // Convert cgroup v2 cpu.weight value to cgroup v1 cpu.shares 572 // https://github.com/kubernetes/enhancements/tree/master/keps/sig-node/2254-cgroup-v2#phase-1-convert-from-cgroups-v1-settings-to-v2 573 func CpuWeightToCpuShares(cpuWeight uint64) uint64 { 574 return uint64((((cpuWeight - 1) * 262142) / 9999) + 2) 575 } 576 577 func getCgroupv1CpuConfig(cgroupPath string) (*ResourceConfig, error) { 578 cpuQuotaStr, errQ := fscommon.GetCgroupParamString(cgroupPath, "cpu.cfs_quota_us") 579 if errQ != nil { 580 return nil, fmt.Errorf("failed to read CPU quota for cgroup %v: %v", cgroupPath, errQ) 581 } 582 cpuQuota, errInt := strconv.ParseInt(cpuQuotaStr, 10, 64) 583 if errInt != nil { 584 return nil, fmt.Errorf("failed to convert CPU quota as integer for cgroup %v: %v", cgroupPath, errInt) 585 } 586 cpuPeriod, errP := fscommon.GetCgroupParamUint(cgroupPath, "cpu.cfs_period_us") 587 if errP != nil { 588 return nil, fmt.Errorf("failed to read CPU period for cgroup %v: %v", cgroupPath, errP) 589 } 590 cpuShares, errS := fscommon.GetCgroupParamUint(cgroupPath, "cpu.shares") 591 if errS != nil { 592 return nil, fmt.Errorf("failed to read CPU shares for cgroup %v: %v", cgroupPath, errS) 593 } 594 return &ResourceConfig{CPUShares: &cpuShares, CPUQuota: &cpuQuota, CPUPeriod: &cpuPeriod}, nil 595 } 596 597 func getCgroupv2CpuConfig(cgroupPath string) (*ResourceConfig, error) { 598 var cpuLimitStr, cpuPeriodStr string 599 cpuLimitAndPeriod, err := fscommon.GetCgroupParamString(cgroupPath, "cpu.max") 600 if err != nil { 601 return nil, fmt.Errorf("failed to read cpu.max file for cgroup %v: %v", cgroupPath, err) 602 } 603 numItems, errScan := fmt.Sscanf(cpuLimitAndPeriod, "%s %s", &cpuLimitStr, &cpuPeriodStr) 604 if errScan != nil || numItems != 2 { 605 return nil, fmt.Errorf("failed to correctly parse content of cpu.max file ('%s') for cgroup %v: %v", 606 cpuLimitAndPeriod, cgroupPath, errScan) 607 } 608 cpuLimit := int64(-1) 609 if cpuLimitStr != Cgroup2MaxCpuLimit { 610 cpuLimit, err = strconv.ParseInt(cpuLimitStr, 10, 64) 611 if err != nil { 612 return nil, fmt.Errorf("failed to convert CPU limit as integer for cgroup %v: %v", cgroupPath, err) 613 } 614 } 615 cpuPeriod, errPeriod := strconv.ParseUint(cpuPeriodStr, 10, 64) 616 if errPeriod != nil { 617 return nil, fmt.Errorf("failed to convert CPU period as integer for cgroup %v: %v", cgroupPath, errPeriod) 618 } 619 cpuWeight, errWeight := fscommon.GetCgroupParamUint(cgroupPath, "cpu.weight") 620 if errWeight != nil { 621 return nil, fmt.Errorf("failed to read CPU weight for cgroup %v: %v", cgroupPath, errWeight) 622 } 623 cpuShares := CpuWeightToCpuShares(cpuWeight) 624 return &ResourceConfig{CPUShares: &cpuShares, CPUQuota: &cpuLimit, CPUPeriod: &cpuPeriod}, nil 625 } 626 627 func getCgroupCpuConfig(cgroupPath string) (*ResourceConfig, error) { 628 if libcontainercgroups.IsCgroup2UnifiedMode() { 629 return getCgroupv2CpuConfig(cgroupPath) 630 } else { 631 return getCgroupv1CpuConfig(cgroupPath) 632 } 633 } 634 635 func getCgroupMemoryConfig(cgroupPath string) (*ResourceConfig, error) { 636 memLimitFile := "memory.limit_in_bytes" 637 if libcontainercgroups.IsCgroup2UnifiedMode() { 638 memLimitFile = "memory.max" 639 } 640 memLimit, err := fscommon.GetCgroupParamUint(cgroupPath, memLimitFile) 641 if err != nil { 642 return nil, fmt.Errorf("failed to read %s for cgroup %v: %v", memLimitFile, cgroupPath, err) 643 } 644 mLim := int64(memLimit) 645 //TODO(vinaykul,InPlacePodVerticalScaling): Add memory request support 646 return &ResourceConfig{Memory: &mLim}, nil 647 648 } 649 650 // Get the resource config values applied to the cgroup for specified resource type 651 func (m *cgroupManagerImpl) GetCgroupConfig(name CgroupName, resource v1.ResourceName) (*ResourceConfig, error) { 652 cgroupPaths := m.buildCgroupPaths(name) 653 cgroupResourcePath, found := cgroupPaths[string(resource)] 654 if !found { 655 return nil, fmt.Errorf("failed to build %v cgroup fs path for cgroup %v", resource, name) 656 } 657 switch resource { 658 case v1.ResourceCPU: 659 return getCgroupCpuConfig(cgroupResourcePath) 660 case v1.ResourceMemory: 661 return getCgroupMemoryConfig(cgroupResourcePath) 662 } 663 return nil, fmt.Errorf("unsupported resource %v for cgroup %v", resource, name) 664 } 665 666 func setCgroupv1CpuConfig(cgroupPath string, resourceConfig *ResourceConfig) error { 667 var cpuQuotaStr, cpuPeriodStr, cpuSharesStr string 668 if resourceConfig.CPUQuota != nil { 669 cpuQuotaStr = strconv.FormatInt(*resourceConfig.CPUQuota, 10) 670 if err := os.WriteFile(filepath.Join(cgroupPath, "cpu.cfs_quota_us"), []byte(cpuQuotaStr), 0700); err != nil { 671 return fmt.Errorf("failed to write %v to %v: %v", cpuQuotaStr, cgroupPath, err) 672 } 673 } 674 if resourceConfig.CPUPeriod != nil { 675 cpuPeriodStr = strconv.FormatUint(*resourceConfig.CPUPeriod, 10) 676 if err := os.WriteFile(filepath.Join(cgroupPath, "cpu.cfs_period_us"), []byte(cpuPeriodStr), 0700); err != nil { 677 return fmt.Errorf("failed to write %v to %v: %v", cpuPeriodStr, cgroupPath, err) 678 } 679 } 680 if resourceConfig.CPUShares != nil { 681 cpuSharesStr = strconv.FormatUint(*resourceConfig.CPUShares, 10) 682 if err := os.WriteFile(filepath.Join(cgroupPath, "cpu.shares"), []byte(cpuSharesStr), 0700); err != nil { 683 return fmt.Errorf("failed to write %v to %v: %v", cpuSharesStr, cgroupPath, err) 684 } 685 } 686 return nil 687 } 688 689 func setCgroupv2CpuConfig(cgroupPath string, resourceConfig *ResourceConfig) error { 690 if resourceConfig.CPUQuota != nil { 691 if resourceConfig.CPUPeriod == nil { 692 return fmt.Errorf("CpuPeriod must be specified in order to set CpuLimit") 693 } 694 cpuLimitStr := Cgroup2MaxCpuLimit 695 if *resourceConfig.CPUQuota > -1 { 696 cpuLimitStr = strconv.FormatInt(*resourceConfig.CPUQuota, 10) 697 } 698 cpuPeriodStr := strconv.FormatUint(*resourceConfig.CPUPeriod, 10) 699 cpuMaxStr := fmt.Sprintf("%s %s", cpuLimitStr, cpuPeriodStr) 700 if err := os.WriteFile(filepath.Join(cgroupPath, "cpu.max"), []byte(cpuMaxStr), 0700); err != nil { 701 return fmt.Errorf("failed to write %v to %v: %v", cpuMaxStr, cgroupPath, err) 702 } 703 } 704 if resourceConfig.CPUShares != nil { 705 cpuWeight := CpuSharesToCpuWeight(*resourceConfig.CPUShares) 706 cpuWeightStr := strconv.FormatUint(cpuWeight, 10) 707 if err := os.WriteFile(filepath.Join(cgroupPath, "cpu.weight"), []byte(cpuWeightStr), 0700); err != nil { 708 return fmt.Errorf("failed to write %v to %v: %v", cpuWeightStr, cgroupPath, err) 709 } 710 } 711 return nil 712 } 713 714 func setCgroupCpuConfig(cgroupPath string, resourceConfig *ResourceConfig) error { 715 if libcontainercgroups.IsCgroup2UnifiedMode() { 716 return setCgroupv2CpuConfig(cgroupPath, resourceConfig) 717 } else { 718 return setCgroupv1CpuConfig(cgroupPath, resourceConfig) 719 } 720 } 721 722 func setCgroupMemoryConfig(cgroupPath string, resourceConfig *ResourceConfig) error { 723 memLimitFile := "memory.limit_in_bytes" 724 if libcontainercgroups.IsCgroup2UnifiedMode() { 725 memLimitFile = "memory.max" 726 } 727 memLimit := strconv.FormatInt(*resourceConfig.Memory, 10) 728 if err := os.WriteFile(filepath.Join(cgroupPath, memLimitFile), []byte(memLimit), 0700); err != nil { 729 return fmt.Errorf("failed to write %v to %v/%v: %v", memLimit, cgroupPath, memLimitFile, err) 730 } 731 //TODO(vinaykul,InPlacePodVerticalScaling): Add memory request support 732 return nil 733 } 734 735 // Set resource config for the specified resource type on the cgroup 736 func (m *cgroupManagerImpl) SetCgroupConfig(name CgroupName, resource v1.ResourceName, resourceConfig *ResourceConfig) error { 737 cgroupPaths := m.buildCgroupPaths(name) 738 cgroupResourcePath, found := cgroupPaths[string(resource)] 739 if !found { 740 return fmt.Errorf("failed to build %v cgroup fs path for cgroup %v", resource, name) 741 } 742 switch resource { 743 case v1.ResourceCPU: 744 return setCgroupCpuConfig(cgroupResourcePath, resourceConfig) 745 case v1.ResourceMemory: 746 return setCgroupMemoryConfig(cgroupResourcePath, resourceConfig) 747 } 748 return nil 749 }