github.com/nicocha30/gvisor-ligolo@v0.0.0-20230726075806-989fa2c0a413/runsc/cgroup/cgroup.go (about) 1 // Copyright 2018 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package cgroup provides an interface to read and write configuration to 16 // cgroup. 17 package cgroup 18 19 import ( 20 "bufio" 21 "context" 22 "encoding/json" 23 "errors" 24 "fmt" 25 "io" 26 "io/ioutil" 27 "os" 28 "path/filepath" 29 "strconv" 30 "strings" 31 "time" 32 33 "github.com/cenkalti/backoff" 34 specs "github.com/opencontainers/runtime-spec/specs-go" 35 "golang.org/x/sync/errgroup" 36 "golang.org/x/sys/unix" 37 "github.com/nicocha30/gvisor-ligolo/pkg/cleanup" 38 "github.com/nicocha30/gvisor-ligolo/pkg/log" 39 ) 40 41 const ( 42 cgroupv1FsName = "cgroup" 43 cgroupv2FsName = "cgroup2" 44 45 // procRoot is the procfs root this module uses. 46 procRoot = "/proc" 47 48 // cgroupRoot is the cgroupfs root this module uses. 49 cgroupRoot = "/sys/fs/cgroup" 50 ) 51 52 var controllers = map[string]controller{ 53 "blkio": &blockIO{}, 54 "cpu": &cpu{}, 55 "cpuset": &cpuSet{}, 56 "hugetlb": &hugeTLB{}, 57 "memory": &memory{}, 58 "net_cls": &networkClass{}, 59 "net_prio": &networkPrio{}, 60 "pids": &pids{}, 61 62 // These controllers either don't have anything in the OCI spec or is 63 // irrelevant for a sandbox. 64 "cpuacct": &noop{}, 65 "devices": &noop{}, 66 "freezer": &noop{}, 67 "perf_event": &noop{}, 68 "rdma": &noop{}, 69 "systemd": &noop{}, 70 } 71 72 // IsOnlyV2 checks whether cgroups V2 is enabled and V1 is not. 73 func IsOnlyV2() bool { 74 var stat unix.Statfs_t 75 if err := unix.Statfs(cgroupRoot, &stat); err != nil { 76 // It's not used for anything important, assume not V2 on failure. 77 return false 78 } 79 return stat.Type == unix.CGROUP2_SUPER_MAGIC 80 } 81 82 func setOptionalValueInt(path, name string, val *int64) error { 83 if val == nil || *val == 0 { 84 return nil 85 } 86 str := strconv.FormatInt(*val, 10) 87 return setValue(path, name, str) 88 } 89 90 func setOptionalValueUint(path, name string, val *uint64) error { 91 if val == nil || *val == 0 { 92 return nil 93 } 94 str := strconv.FormatUint(*val, 10) 95 return setValue(path, name, str) 96 } 97 98 func setOptionalValueUint32(path, name string, val *uint32) error { 99 if val == nil || *val == 0 { 100 return nil 101 } 102 str := strconv.FormatUint(uint64(*val), 10) 103 return setValue(path, name, str) 104 } 105 106 func setOptionalValueUint16(path, name string, val *uint16) error { 107 if val == nil || *val == 0 { 108 return nil 109 } 110 str := strconv.FormatUint(uint64(*val), 10) 111 return setValue(path, name, str) 112 } 113 114 func setValue(path, name, data string) error { 115 fullpath := filepath.Join(path, name) 116 log.Debugf("Setting %q to %q", fullpath, data) 117 return writeFile(fullpath, []byte(data), 0700) 118 } 119 120 // writeFile is similar to ioutil.WriteFile() but doesn't create the file if it 121 // doesn't exist. 122 func writeFile(path string, data []byte, perm os.FileMode) error { 123 f, err := os.OpenFile(path, os.O_WRONLY|os.O_TRUNC, perm) 124 if err != nil { 125 return err 126 } 127 defer f.Close() 128 129 _, err = f.Write(data) 130 return err 131 } 132 133 func getValue(path, name string) (string, error) { 134 fullpath := filepath.Join(path, name) 135 out, err := ioutil.ReadFile(fullpath) 136 if err != nil { 137 return "", err 138 } 139 return string(out), nil 140 } 141 142 func getInt(path, name string) (int, error) { 143 s, err := getValue(path, name) 144 if err != nil { 145 return 0, err 146 } 147 return strconv.Atoi(strings.TrimSpace(s)) 148 } 149 150 // fillFromAncestor sets the value of a cgroup file from the first ancestor 151 // that has content. It does nothing if the file in 'path' has already been set. 152 func fillFromAncestor(path string) (string, error) { 153 out, err := ioutil.ReadFile(path) 154 if err != nil { 155 return "", err 156 } 157 val := strings.TrimSpace(string(out)) 158 if val != "" { 159 // File is set, stop here. 160 return val, nil 161 } 162 163 // File is not set, recurse to parent and then set here. 164 name := filepath.Base(path) 165 parent := filepath.Dir(filepath.Dir(path)) 166 val, err = fillFromAncestor(filepath.Join(parent, name)) 167 if err != nil { 168 return "", err 169 } 170 171 if err := writeFile(path, []byte(val), 0700); err != nil { 172 return "", nil 173 } 174 return val, nil 175 } 176 177 // countCpuset returns the number of CPU in a string formatted like: 178 // 179 // "0-2,7,12-14 # bits 0, 1, 2, 7, 12, 13, and 14 set" - man 7 cpuset 180 func countCpuset(cpuset string) (int, error) { 181 var count int 182 for _, p := range strings.Split(cpuset, ",") { 183 interval := strings.Split(p, "-") 184 switch len(interval) { 185 case 1: 186 if _, err := strconv.Atoi(interval[0]); err != nil { 187 return 0, err 188 } 189 count++ 190 191 case 2: 192 start, err := strconv.Atoi(interval[0]) 193 if err != nil { 194 return 0, err 195 } 196 end, err := strconv.Atoi(interval[1]) 197 if err != nil { 198 return 0, err 199 } 200 if start < 0 || end < 0 || start > end { 201 return 0, fmt.Errorf("invalid cpuset: %q", p) 202 } 203 count += end - start + 1 204 205 default: 206 return 0, fmt.Errorf("invalid cpuset: %q", p) 207 } 208 } 209 return count, nil 210 } 211 212 // loadPaths loads cgroup paths for given 'pid', may be set to 'self'. 213 func loadPaths(pid string) (map[string]string, error) { 214 procCgroup, err := os.Open(filepath.Join(procRoot, pid, "cgroup")) 215 if err != nil { 216 return nil, err 217 } 218 defer procCgroup.Close() 219 220 // Load mountinfo for the current process, because it's where cgroups is 221 // being accessed from. 222 mountinfo, err := os.Open(filepath.Join(procRoot, "self/mountinfo")) 223 if err != nil { 224 return nil, err 225 } 226 defer mountinfo.Close() 227 228 return loadPathsHelper(procCgroup, mountinfo, IsOnlyV2()) 229 } 230 231 func loadPathsHelper(cgroup, mountinfo io.Reader, unified bool) (map[string]string, error) { 232 paths := make(map[string]string) 233 234 scanner := bufio.NewScanner(cgroup) 235 for scanner.Scan() { 236 // Format: ID:[name=]controller1,controller2:path 237 // Example: 2:cpu,cpuacct:/user.slice 238 tokens := strings.Split(scanner.Text(), ":") 239 if len(tokens) != 3 { 240 return nil, fmt.Errorf("invalid cgroups file, line: %q", scanner.Text()) 241 } 242 if len(tokens[1]) == 0 && unified { 243 paths[cgroup2Key] = tokens[2] 244 continue 245 } 246 if len(tokens[1]) == 0 { 247 continue 248 } 249 for _, ctrlr := range strings.Split(tokens[1], ",") { 250 // Remove prefix for cgroups with no controller, eg. systemd. 251 ctrlr = strings.TrimPrefix(ctrlr, "name=") 252 // Discard unknown controllers. 253 if _, ok := controllers[ctrlr]; ok { 254 paths[ctrlr] = tokens[2] 255 } 256 } 257 } 258 if err := scanner.Err(); err != nil { 259 return nil, err 260 } 261 262 // For nested containers, in /proc/[pid]/cgroup we see paths from host, 263 // which don't exist in container, so recover the container paths here by 264 // double-checking with /proc/[pid]/mountinfo 265 mountScanner := bufio.NewScanner(mountinfo) 266 haveCg2Path := false 267 for mountScanner.Scan() { 268 // Format: ID parent major:minor root mount-point options opt-fields - fs-type source super-options 269 // Example: 39 32 0:34 / /sys/fs/cgroup/devices rw,noexec shared:18 - cgroup cgroup rw,devices 270 fields := strings.Fields(mountScanner.Text()) 271 if len(fields) < 9 { 272 // Skip mounts that are not cgroup mounts. 273 continue 274 } 275 switch fields[len(fields)-3] { 276 case cgroupv1FsName: 277 // Cgroup controller type is in the super-options field. 278 superOptions := strings.Split(fields[len(fields)-1], ",") 279 for _, opt := range superOptions { 280 // Remove prefix for cgroups with no controller, eg. systemd. 281 opt = strings.TrimPrefix(opt, "name=") 282 283 // Only considers cgroup controllers that are registered, and skip other 284 // irrelevant options, e.g. rw. 285 if cgroupPath, ok := paths[opt]; ok { 286 rootDir := fields[3] 287 if rootDir != "/" { 288 // When cgroup is in submount, remove repeated path components from 289 // cgroup path to avoid duplicating them. 290 relCgroupPath, err := filepath.Rel(rootDir, cgroupPath) 291 if err != nil { 292 return nil, err 293 } 294 paths[opt] = relCgroupPath 295 } 296 } 297 } 298 case cgroupv2FsName: 299 if cgroupPath, ok := paths[cgroup2Key]; !haveCg2Path && ok { 300 root := fields[3] 301 relCgroupPath, err := filepath.Rel(root, cgroupPath) 302 if err != nil { 303 return nil, err 304 } 305 haveCg2Path = true 306 paths[cgroup2Key] = relCgroupPath 307 } 308 } 309 } 310 if err := mountScanner.Err(); err != nil { 311 return nil, err 312 } 313 314 return paths, nil 315 } 316 317 // Cgroup represents a cgroup configuration. 318 type Cgroup interface { 319 Install(res *specs.LinuxResources) error 320 Uninstall() error 321 Join() (func(), error) 322 CPUQuota() (float64, error) 323 CPUUsage() (uint64, error) 324 NumCPU() (int, error) 325 MemoryLimit() (uint64, error) 326 MakePath(controllerName string) string 327 } 328 329 // cgroupV1 represents a group inside all controllers. For example: 330 // 331 // Name='/foo/bar' maps to /sys/fs/cgroup/<controller>/foo/bar on 332 // all controllers. 333 // 334 // If Name is relative, it uses the parent cgroup path to determine the 335 // location. For example: 336 // 337 // Name='foo/bar' and Parent[ctrl]="/user.slice", then it will map to 338 // /sys/fs/cgroup/<ctrl>/user.slice/foo/bar 339 type cgroupV1 struct { 340 Name string `json:"name"` 341 Parents map[string]string `json:"parents"` 342 Own map[string]bool `json:"own"` 343 } 344 345 // NewFromSpec creates a new Cgroup instance if the spec includes a cgroup path. 346 // Returns nil otherwise. Cgroup paths are loaded based on the current process. 347 // If useSystemd is true, the Cgroup will be created and managed with 348 // systemd. This requires systemd (>=v244) to be running on the host and the 349 // cgroup path to be in the form `slice:prefix:name`. 350 func NewFromSpec(spec *specs.Spec, useSystemd bool) (Cgroup, error) { 351 if spec.Linux == nil || spec.Linux.CgroupsPath == "" { 352 return nil, nil 353 } 354 return NewFromPath(spec.Linux.CgroupsPath, useSystemd) 355 } 356 357 // NewFromPath creates a new Cgroup instance from the specified relative path. 358 // Cgroup paths are loaded based on the current process. 359 // If useSystemd is true, the Cgroup will be created and managed with 360 // systemd. This requires systemd (>=v244) to be running on the host and the 361 // cgroup path to be in the form `slice:prefix:name`. 362 func NewFromPath(cgroupsPath string, useSystemd bool) (Cgroup, error) { 363 return new("self", cgroupsPath, useSystemd) 364 } 365 366 // NewFromPid loads cgroup for the given process. 367 // If useSystemd is true, the Cgroup will be created and managed with 368 // systemd. This requires systemd (>=v244) to be running on the host and the 369 // cgroup path to be in the form `slice:prefix:name`. 370 func NewFromPid(pid int, useSystemd bool) (Cgroup, error) { 371 return new(strconv.Itoa(pid), "", useSystemd) 372 } 373 374 func new(pid, cgroupsPath string, useSystemd bool) (Cgroup, error) { 375 var ( 376 parents map[string]string 377 err error 378 cg Cgroup 379 ) 380 381 // If path is relative, load cgroup paths for the process to build the 382 // relative paths. 383 if !filepath.IsAbs(cgroupsPath) { 384 parents, err = loadPaths(pid) 385 if err != nil { 386 return nil, fmt.Errorf("finding current cgroups: %w", err) 387 } 388 } 389 390 if IsOnlyV2() { 391 // The cgroupsPath is in a special `slice:prefix:name` format for systemd 392 // that should not be modified. 393 if p, ok := parents[cgroup2Key]; ok && !useSystemd { 394 // The cgroup of current pid will have tasks in it and we can't use 395 // that, instead, use the its parent which should not have tasks in it. 396 cgroupsPath = filepath.Join(filepath.Dir(p), cgroupsPath) 397 } 398 // Assume that for v2, cgroup is always mounted at cgroupRoot. 399 cg, err = newCgroupV2(cgroupRoot, cgroupsPath, useSystemd) 400 if err != nil { 401 return nil, err 402 } 403 } else { 404 cg = &cgroupV1{ 405 Name: cgroupsPath, 406 Parents: parents, 407 Own: make(map[string]bool), 408 } 409 } 410 log.Debugf("New cgroup for pid: %s, %T: %+v", pid, cg, cg) 411 return cg, nil 412 } 413 414 // CgroupJSON is a wrapper for Cgroup that can be encoded to JSON. 415 type CgroupJSON struct { 416 Cgroup Cgroup 417 } 418 419 type cgroupJSONv1 struct { 420 Cgroup *cgroupV1 `json:"cgroupv1"` 421 } 422 423 type cgroupJSONv2 struct { 424 Cgroup *cgroupV2 `json:"cgroupv2"` 425 } 426 427 type cgroupJSONSystemd struct { 428 Cgroup *cgroupSystemd `json:"cgroupsystemd"` 429 } 430 431 type cgroupJSONUnknown struct { 432 Cgroup any `json:"cgroupunknown"` 433 } 434 435 // UnmarshalJSON implements json.Unmarshaler.UnmarshalJSON 436 func (c *CgroupJSON) UnmarshalJSON(data []byte) error { 437 m := map[string]json.RawMessage{} 438 if err := json.Unmarshal(data, &m); err != nil { 439 return err 440 } 441 442 var cg Cgroup 443 if rm, ok := m["cgroupv1"]; ok { 444 cg = &cgroupV1{} 445 if err := json.Unmarshal(rm, cg); err != nil { 446 return err 447 } 448 } else if rm, ok := m["cgroupv2"]; ok { 449 cg = &cgroupV2{} 450 if err := json.Unmarshal(rm, cg); err != nil { 451 return err 452 } 453 } else if rm, ok := m["cgroupsystemd"]; ok { 454 cg = &cgroupSystemd{} 455 if err := json.Unmarshal(rm, cg); err != nil { 456 return err 457 } 458 } 459 c.Cgroup = cg 460 return nil 461 } 462 463 // MarshalJSON implements json.Marshaler.MarshalJSON 464 func (c *CgroupJSON) MarshalJSON() ([]byte, error) { 465 if c.Cgroup == nil { 466 return json.Marshal(cgroupJSONUnknown{}) 467 } 468 switch c.Cgroup.(type) { 469 case *cgroupV1: 470 return json.Marshal(cgroupJSONv1{Cgroup: c.Cgroup.(*cgroupV1)}) 471 case *cgroupV2: 472 return json.Marshal(cgroupJSONv2{Cgroup: c.Cgroup.(*cgroupV2)}) 473 case *cgroupSystemd: 474 return json.Marshal(cgroupJSONSystemd{Cgroup: c.Cgroup.(*cgroupSystemd)}) 475 } 476 return nil, nil 477 } 478 479 // Install creates and configures cgroups according to 'res'. If cgroup path 480 // already exists, it means that the caller has already provided a 481 // pre-configured cgroups, and 'res' is ignored. 482 func (c *cgroupV1) Install(res *specs.LinuxResources) error { 483 log.Debugf("Installing cgroup path %q", c.Name) 484 485 // Clean up partially created cgroups on error. Errors during cleanup itself 486 // are ignored. 487 clean := cleanup.Make(func() { _ = c.Uninstall() }) 488 defer clean.Clean() 489 490 // Controllers can be symlinks to a group of controllers (e.g. cpu,cpuacct). 491 // So first check what directories need to be created. Otherwise, when 492 // the directory for one of the controllers in a group is created, it will 493 // make it seem like the directory already existed and it's not owned by the 494 // other controllers in the group. 495 var missing []string 496 for key := range controllers { 497 path := c.MakePath(key) 498 if _, err := os.Stat(path); err != nil { 499 missing = append(missing, key) 500 } else { 501 log.Debugf("Using pre-created cgroup %q: %q", key, path) 502 } 503 } 504 for _, key := range missing { 505 ctrlr := controllers[key] 506 507 if skip, err := createController(c, key); skip && ctrlr.optional() { 508 if err := ctrlr.skip(res); err != nil { 509 return err 510 } 511 log.Infof("Skipping cgroup %q, err: %v", key, err) 512 continue 513 } else if err != nil { 514 return err 515 } 516 517 // Only set controllers that were created by me. 518 c.Own[key] = true 519 path := c.MakePath(key) 520 if err := ctrlr.set(res, path); err != nil { 521 return err 522 } 523 } 524 clean.Release() 525 return nil 526 } 527 528 // createController creates the controller directory, checking that the 529 // controller is enabled in the system. It returns a boolean indicating whether 530 // the controller should be skipped (e.g. controller is disabled). In case it 531 // should be skipped, it also returns the error it got. 532 func createController(c Cgroup, name string) (bool, error) { 533 ctrlrPath := filepath.Join(cgroupRoot, name) 534 if _, err := os.Stat(ctrlrPath); err != nil { 535 return os.IsNotExist(err), err 536 } 537 538 path := c.MakePath(name) 539 log.Debugf("Creating cgroup %q: %q", name, path) 540 if err := os.MkdirAll(path, 0755); err != nil { 541 return errors.Is(err, unix.EROFS), err 542 } 543 return false, nil 544 } 545 546 // Uninstall removes the settings done in Install(). If cgroup path already 547 // existed when Install() was called, Uninstall is a noop. 548 func (c *cgroupV1) Uninstall() error { 549 log.Debugf("Deleting cgroup %q", c.Name) 550 g, ctx := errgroup.WithContext(context.Background()) 551 for key := range controllers { 552 if !c.Own[key] { 553 // cgroup is managed by caller, don't touch it. 554 continue 555 } 556 path := c.MakePath(key) 557 log.Debugf("Removing cgroup controller for key=%q path=%q", key, path) 558 559 // If we try to remove the cgroup too soon after killing the sandbox we 560 // might get EBUSY, so we retry for a few seconds until it succeeds. 561 ctx, cancel := context.WithTimeout(ctx, 5*time.Second) 562 defer cancel() 563 b := backoff.WithContext(backoff.NewConstantBackOff(100*time.Millisecond), ctx) 564 fn := func() error { 565 err := unix.Rmdir(path) 566 if os.IsNotExist(err) { 567 return nil 568 } 569 return err 570 } 571 // Run deletions in parallel to remove all directories even if there are 572 // failures/timeouts in other directories. 573 g.Go(func() error { 574 if err := backoff.Retry(fn, b); err != nil { 575 return fmt.Errorf("removing cgroup path %q: %w", path, err) 576 } 577 return nil 578 }) 579 } 580 return g.Wait() 581 } 582 583 // Join adds the current process to the all controllers. Returns function that 584 // restores cgroup to the original state. 585 func (c *cgroupV1) Join() (func(), error) { 586 // First save the current state so it can be restored. 587 paths, err := loadPaths("self") 588 if err != nil { 589 return nil, err 590 } 591 var undoPaths []string 592 for ctrlr, path := range paths { 593 // Skip controllers we don't handle. 594 if _, ok := controllers[ctrlr]; ok { 595 fullPath := filepath.Join(cgroupRoot, ctrlr, path) 596 undoPaths = append(undoPaths, fullPath) 597 } 598 } 599 600 cu := cleanup.Make(func() { 601 for _, path := range undoPaths { 602 log.Debugf("Restoring cgroup %q", path) 603 // Writing the value 0 to a cgroup.procs file causes 604 // the writing process to be moved to the corresponding 605 // cgroup. - cgroups(7). 606 if err := setValue(path, "cgroup.procs", "0"); err != nil { 607 log.Warningf("Error restoring cgroup %q: %v", path, err) 608 } 609 } 610 }) 611 defer cu.Clean() 612 613 // Now join the cgroups. 614 for key, ctrlr := range controllers { 615 path := c.MakePath(key) 616 log.Debugf("Joining cgroup %q", path) 617 // Writing the value 0 to a cgroup.procs file causes the writing process to 618 // be moved to the corresponding cgroup - cgroups(7). 619 if err := setValue(path, "cgroup.procs", "0"); err != nil { 620 if ctrlr.optional() && os.IsNotExist(err) { 621 continue 622 } 623 return nil, err 624 } 625 } 626 return cu.Release(), nil 627 } 628 629 // CPUQuota returns the CFS CPU quota. 630 func (c *cgroupV1) CPUQuota() (float64, error) { 631 path := c.MakePath("cpu") 632 quota, err := getInt(path, "cpu.cfs_quota_us") 633 if err != nil { 634 return -1, err 635 } 636 period, err := getInt(path, "cpu.cfs_period_us") 637 if err != nil { 638 return -1, err 639 } 640 if quota <= 0 || period <= 0 { 641 return -1, err 642 } 643 return float64(quota) / float64(period), nil 644 } 645 646 // CPUUsage returns the total CPU usage of the cgroup. 647 func (c *cgroupV1) CPUUsage() (uint64, error) { 648 path := c.MakePath("cpuacct") 649 usage, err := getValue(path, "cpuacct.usage") 650 if err != nil { 651 return 0, err 652 } 653 return strconv.ParseUint(strings.TrimSpace(usage), 10, 64) 654 } 655 656 // NumCPU returns the number of CPUs configured in 'cpuset/cpuset.cpus'. 657 func (c *cgroupV1) NumCPU() (int, error) { 658 path := c.MakePath("cpuset") 659 cpuset, err := getValue(path, "cpuset.cpus") 660 if err != nil { 661 return 0, err 662 } 663 return countCpuset(strings.TrimSpace(cpuset)) 664 } 665 666 // MemoryLimit returns the memory limit. 667 func (c *cgroupV1) MemoryLimit() (uint64, error) { 668 path := c.MakePath("memory") 669 limStr, err := getValue(path, "memory.limit_in_bytes") 670 if err != nil { 671 return 0, err 672 } 673 return strconv.ParseUint(strings.TrimSpace(limStr), 10, 64) 674 } 675 676 // MakePath builds a path to the given controller. 677 func (c *cgroupV1) MakePath(controllerName string) string { 678 path := c.Name 679 if parent, ok := c.Parents[controllerName]; ok { 680 path = filepath.Join(parent, c.Name) 681 } 682 return filepath.Join(cgroupRoot, controllerName, path) 683 } 684 685 type controller interface { 686 // optional controllers don't fail if not found. 687 optional() bool 688 // set applies resource limits to controller. 689 set(*specs.LinuxResources, string) error 690 // skip is called when controller is not found to check if it can be safely 691 // skipped or not based on the spec. 692 skip(*specs.LinuxResources) error 693 } 694 695 type noop struct{} 696 697 func (n *noop) optional() bool { 698 return true 699 } 700 701 func (*noop) set(*specs.LinuxResources, string) error { 702 return nil 703 } 704 705 func (n *noop) skip(*specs.LinuxResources) error { 706 return nil 707 } 708 709 type mandatory struct{} 710 711 func (*mandatory) optional() bool { 712 return false 713 } 714 715 func (*mandatory) skip(*specs.LinuxResources) error { 716 panic("cgroup controller is not optional") 717 } 718 719 type memory struct { 720 mandatory 721 } 722 723 func (*memory) set(spec *specs.LinuxResources, path string) error { 724 if spec == nil || spec.Memory == nil { 725 return nil 726 } 727 if err := setOptionalValueInt(path, "memory.limit_in_bytes", spec.Memory.Limit); err != nil { 728 return err 729 } 730 if err := setOptionalValueInt(path, "memory.soft_limit_in_bytes", spec.Memory.Reservation); err != nil { 731 return err 732 } 733 if err := setOptionalValueInt(path, "memory.memsw.limit_in_bytes", spec.Memory.Swap); err != nil { 734 return err 735 } 736 if err := setOptionalValueInt(path, "memory.kmem.limit_in_bytes", spec.Memory.Kernel); err != nil { 737 return err 738 } 739 if err := setOptionalValueInt(path, "memory.kmem.tcp.limit_in_bytes", spec.Memory.KernelTCP); err != nil { 740 return err 741 } 742 if err := setOptionalValueUint(path, "memory.swappiness", spec.Memory.Swappiness); err != nil { 743 return err 744 } 745 746 if spec.Memory.DisableOOMKiller != nil && *spec.Memory.DisableOOMKiller { 747 if err := setValue(path, "memory.oom_control", "1"); err != nil { 748 return err 749 } 750 } 751 return nil 752 } 753 754 type cpu struct { 755 mandatory 756 } 757 758 func (*cpu) set(spec *specs.LinuxResources, path string) error { 759 if spec == nil || spec.CPU == nil { 760 return nil 761 } 762 if err := setOptionalValueUint(path, "cpu.shares", spec.CPU.Shares); err != nil { 763 return err 764 } 765 if err := setOptionalValueInt(path, "cpu.cfs_quota_us", spec.CPU.Quota); err != nil { 766 return err 767 } 768 if err := setOptionalValueUint(path, "cpu.cfs_period_us", spec.CPU.Period); err != nil { 769 return err 770 } 771 if err := setOptionalValueUint(path, "cpu.rt_period_us", spec.CPU.RealtimePeriod); err != nil { 772 return err 773 } 774 return setOptionalValueInt(path, "cpu.rt_runtime_us", spec.CPU.RealtimeRuntime) 775 } 776 777 type cpuSet struct { 778 mandatory 779 } 780 781 func (*cpuSet) set(spec *specs.LinuxResources, path string) error { 782 // cpuset.cpus and mems are required fields, but are not set on a new cgroup. 783 // If not set in the spec, get it from one of the ancestors cgroup. 784 if spec == nil || spec.CPU == nil || spec.CPU.Cpus == "" { 785 if _, err := fillFromAncestor(filepath.Join(path, "cpuset.cpus")); err != nil { 786 return err 787 } 788 } else { 789 if err := setValue(path, "cpuset.cpus", spec.CPU.Cpus); err != nil { 790 return err 791 } 792 } 793 794 if spec == nil || spec.CPU == nil || spec.CPU.Mems == "" { 795 _, err := fillFromAncestor(filepath.Join(path, "cpuset.mems")) 796 return err 797 } 798 return setValue(path, "cpuset.mems", spec.CPU.Mems) 799 } 800 801 type blockIO struct { 802 mandatory 803 } 804 805 func (*blockIO) set(spec *specs.LinuxResources, path string) error { 806 if spec == nil || spec.BlockIO == nil { 807 return nil 808 } 809 810 if err := setOptionalValueUint16(path, "blkio.weight", spec.BlockIO.Weight); err != nil { 811 return err 812 } 813 if err := setOptionalValueUint16(path, "blkio.leaf_weight", spec.BlockIO.LeafWeight); err != nil { 814 return err 815 } 816 817 for _, dev := range spec.BlockIO.WeightDevice { 818 if dev.Weight != nil { 819 val := fmt.Sprintf("%d:%d %d", dev.Major, dev.Minor, *dev.Weight) 820 if err := setValue(path, "blkio.weight_device", val); err != nil { 821 return err 822 } 823 } 824 if dev.LeafWeight != nil { 825 val := fmt.Sprintf("%d:%d %d", dev.Major, dev.Minor, *dev.LeafWeight) 826 if err := setValue(path, "blkio.leaf_weight_device", val); err != nil { 827 return err 828 } 829 } 830 } 831 if err := setThrottle(path, "blkio.throttle.read_bps_device", spec.BlockIO.ThrottleReadBpsDevice); err != nil { 832 return err 833 } 834 if err := setThrottle(path, "blkio.throttle.write_bps_device", spec.BlockIO.ThrottleWriteBpsDevice); err != nil { 835 return err 836 } 837 if err := setThrottle(path, "blkio.throttle.read_iops_device", spec.BlockIO.ThrottleReadIOPSDevice); err != nil { 838 return err 839 } 840 return setThrottle(path, "blkio.throttle.write_iops_device", spec.BlockIO.ThrottleWriteIOPSDevice) 841 } 842 843 func setThrottle(path, name string, devs []specs.LinuxThrottleDevice) error { 844 for _, dev := range devs { 845 val := fmt.Sprintf("%d:%d %d", dev.Major, dev.Minor, dev.Rate) 846 if err := setValue(path, name, val); err != nil { 847 return err 848 } 849 } 850 return nil 851 } 852 853 type networkClass struct{} 854 855 func (*networkClass) optional() bool { 856 return true 857 } 858 859 func (*networkClass) set(spec *specs.LinuxResources, path string) error { 860 if spec == nil || spec.Network == nil { 861 return nil 862 } 863 return setOptionalValueUint32(path, "net_cls.classid", spec.Network.ClassID) 864 } 865 866 func (*networkClass) skip(spec *specs.LinuxResources) error { 867 if spec != nil && spec.Network != nil && spec.Network.ClassID != nil { 868 return fmt.Errorf("Network.ClassID set but net_cls cgroup controller not found") 869 } 870 return nil 871 } 872 873 type networkPrio struct{} 874 875 func (*networkPrio) optional() bool { 876 return true 877 } 878 879 func (*networkPrio) set(spec *specs.LinuxResources, path string) error { 880 if spec == nil || spec.Network == nil { 881 return nil 882 } 883 for _, prio := range spec.Network.Priorities { 884 val := fmt.Sprintf("%s %d", prio.Name, prio.Priority) 885 if err := setValue(path, "net_prio.ifpriomap", val); err != nil { 886 return err 887 } 888 } 889 return nil 890 } 891 892 func (*networkPrio) skip(spec *specs.LinuxResources) error { 893 if spec != nil && spec.Network != nil && len(spec.Network.Priorities) > 0 { 894 return fmt.Errorf("Network.Priorities set but net_prio cgroup controller not found") 895 } 896 return nil 897 } 898 899 type pids struct{} 900 901 func (*pids) optional() bool { 902 return true 903 } 904 905 func (*pids) skip(spec *specs.LinuxResources) error { 906 if spec != nil && spec.Pids != nil && spec.Pids.Limit > 0 { 907 return fmt.Errorf("Pids.Limit set but pids cgroup controller not found") 908 } 909 return nil 910 } 911 912 func (*pids) set(spec *specs.LinuxResources, path string) error { 913 if spec == nil || spec.Pids == nil || spec.Pids.Limit <= 0 { 914 return nil 915 } 916 val := strconv.FormatInt(spec.Pids.Limit, 10) 917 return setValue(path, "pids.max", val) 918 } 919 920 type hugeTLB struct{} 921 922 func (*hugeTLB) optional() bool { 923 return true 924 } 925 926 func (*hugeTLB) skip(spec *specs.LinuxResources) error { 927 if spec != nil && len(spec.HugepageLimits) > 0 { 928 return fmt.Errorf("HugepageLimits set but hugetlb cgroup controller not found") 929 } 930 return nil 931 } 932 933 func (*hugeTLB) set(spec *specs.LinuxResources, path string) error { 934 if spec == nil { 935 return nil 936 } 937 for _, limit := range spec.HugepageLimits { 938 name := fmt.Sprintf("hugetlb.%s.limit_in_bytes", limit.Pagesize) 939 val := strconv.FormatUint(limit.Limit, 10) 940 if err := setValue(path, name, val); err != nil { 941 return err 942 } 943 } 944 return nil 945 }