github.com/containerd/nerdctl@v1.7.7/pkg/cmd/container/run_cgroup_linux.go (about)

     1  /*
     2     Copyright The containerd Authors.
     3  
     4     Licensed under the Apache License, Version 2.0 (the "License");
     5     you may not use this file except in compliance with the License.
     6     You may obtain a copy of the License at
     7  
     8         http://www.apache.org/licenses/LICENSE-2.0
     9  
    10     Unless required by applicable law or agreed to in writing, software
    11     distributed under the License is distributed on an "AS IS" BASIS,
    12     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13     See the License for the specific language governing permissions and
    14     limitations under the License.
    15  */
    16  
    17  package container
    18  
    19  import (
    20  	"context"
    21  	"errors"
    22  	"fmt"
    23  	"path/filepath"
    24  	"strings"
    25  
    26  	"github.com/containerd/containerd/containers"
    27  	"github.com/containerd/containerd/oci"
    28  	"github.com/containerd/log"
    29  	"github.com/containerd/nerdctl/pkg/api/types"
    30  	"github.com/containerd/nerdctl/pkg/infoutil"
    31  	"github.com/containerd/nerdctl/pkg/rootlessutil"
    32  	"github.com/docker/go-units"
    33  	"github.com/opencontainers/runtime-spec/specs-go"
    34  )
    35  
    36  type customMemoryOptions struct {
    37  	MemoryReservation *int64
    38  	MemorySwappiness  *uint64
    39  	disableOOMKiller  *bool
    40  }
    41  
    42  func generateCgroupOpts(id string, options types.ContainerCreateOptions) ([]oci.SpecOpts, error) {
    43  	if options.KernelMemory != "" {
    44  		log.L.Warnf("The --kernel-memory flag is no longer supported. This flag is a noop.")
    45  	}
    46  
    47  	if options.Memory == "" && options.OomKillDisable {
    48  		log.L.Warn("Disabling the OOM killer on containers without setting a '-m/--memory' limit may be dangerous.")
    49  	}
    50  
    51  	if options.GOptions.CgroupManager == "none" {
    52  		if !rootlessutil.IsRootless() {
    53  			return nil, errors.New(`cgroup-manager "none" is only supported for rootless`)
    54  		}
    55  
    56  		if options.CPUs > 0.0 || options.Memory != "" || options.MemorySwap != "" || options.PidsLimit > 0 {
    57  			log.L.Warn(`cgroup manager is set to "none", discarding resource limit requests. ` +
    58  				"(Hint: enable cgroup v2 with systemd: https://rootlesscontaine.rs/getting-started/common/cgroup2/)")
    59  		}
    60  		if options.CgroupParent != "" {
    61  			log.L.Warnf(`cgroup manager is set to "none", ignoring cgroup parent %q`+
    62  				"(Hint: enable cgroup v2 with systemd: https://rootlesscontaine.rs/getting-started/common/cgroup2/)", options.CgroupParent)
    63  		}
    64  		return []oci.SpecOpts{oci.WithCgroup("")}, nil
    65  	}
    66  
    67  	var opts []oci.SpecOpts // nolint: prealloc
    68  	path, err := generateCgroupPath(id, options.GOptions.CgroupManager, options.CgroupParent)
    69  	if err != nil {
    70  		return nil, err
    71  	}
    72  	if path != "" {
    73  		opts = append(opts, oci.WithCgroup(path))
    74  	}
    75  
    76  	// cpus: from https://github.com/containerd/containerd/blob/v1.4.3/cmd/ctr/commands/run/run_unix.go#L187-L193
    77  	if options.CPUs > 0.0 {
    78  		var (
    79  			period = uint64(100000)
    80  			quota  = int64(options.CPUs * 100000.0)
    81  		)
    82  		opts = append(opts, oci.WithCPUCFS(quota, period))
    83  	}
    84  
    85  	if options.CPUShares != 0 {
    86  		opts = append(opts, oci.WithCPUShares(options.CPUShares))
    87  	}
    88  
    89  	if options.CPUSetCPUs != "" {
    90  		opts = append(opts, oci.WithCPUs(options.CPUSetCPUs))
    91  	}
    92  	if options.CPUQuota != -1 || options.CPUPeriod != 0 {
    93  		if options.CPUs > 0.0 {
    94  			return nil, errors.New("cpus and quota/period should be used separately")
    95  		}
    96  		opts = append(opts, oci.WithCPUCFS(options.CPUQuota, options.CPUPeriod))
    97  	}
    98  	if options.CPUSetMems != "" {
    99  		opts = append(opts, oci.WithCPUsMems(options.CPUSetMems))
   100  	}
   101  
   102  	var mem64 int64
   103  	if options.Memory != "" {
   104  		mem64, err = units.RAMInBytes(options.Memory)
   105  		if err != nil {
   106  			return nil, fmt.Errorf("failed to parse memory bytes %q: %w", options.Memory, err)
   107  		}
   108  		opts = append(opts, oci.WithMemoryLimit(uint64(mem64)))
   109  	}
   110  
   111  	var memReserve64 int64
   112  	if options.MemoryReservation != "" {
   113  		memReserve64, err = units.RAMInBytes(options.MemoryReservation)
   114  		if err != nil {
   115  			return nil, fmt.Errorf("failed to parse memory bytes %q: %w", options.MemoryReservation, err)
   116  		}
   117  	}
   118  	var memSwap64 int64
   119  	if options.MemorySwap != "" {
   120  		if options.MemorySwap == "-1" {
   121  			memSwap64 = -1
   122  		} else {
   123  			memSwap64, err = units.RAMInBytes(options.MemorySwap)
   124  			if err != nil {
   125  				return nil, fmt.Errorf("failed to parse memory-swap bytes %q: %w", options.MemorySwap, err)
   126  			}
   127  			if mem64 > 0 && memSwap64 > 0 && memSwap64 < mem64 {
   128  				return nil, fmt.Errorf("minimum memoryswap limit should be larger than memory limit, see usage")
   129  			}
   130  		}
   131  	} else {
   132  		// if `--memory-swap` is unset, the container can use as much swap as the `--memory` setting.
   133  		memSwap64 = mem64 * 2
   134  	}
   135  	if memSwap64 == 0 {
   136  		// if --memory-swap is set to 0, the setting is ignored, and the value is treated as unset.
   137  		memSwap64 = mem64 * 2
   138  	}
   139  	if memSwap64 != 0 {
   140  		opts = append(opts, oci.WithMemorySwap(memSwap64))
   141  	}
   142  	if mem64 > 0 && memReserve64 > 0 && mem64 < memReserve64 {
   143  		return nil, fmt.Errorf("minimum memory limit can not be less than memory reservation limit, see usage")
   144  	}
   145  	if options.MemorySwappiness64 > 100 || options.MemorySwappiness64 < -1 {
   146  		return nil, fmt.Errorf("invalid value: %v, valid memory swappiness range is 0-100", options.MemorySwappiness64)
   147  	}
   148  
   149  	var customMemRes customMemoryOptions
   150  	if memReserve64 >= 0 && options.MemoryReservationChanged {
   151  		customMemRes.MemoryReservation = &memReserve64
   152  	}
   153  	if options.MemorySwappiness64 >= 0 && options.MemorySwappiness64Changed {
   154  		memSwapinessUint64 := uint64(options.MemorySwappiness64)
   155  		customMemRes.MemorySwappiness = &memSwapinessUint64
   156  	}
   157  	if options.OomKillDisable {
   158  		customMemRes.disableOOMKiller = &options.OomKillDisable
   159  	}
   160  	opts = append(opts, withCustomMemoryResources(customMemRes))
   161  
   162  	if options.PidsLimit > 0 {
   163  		opts = append(opts, oci.WithPidsLimit(options.PidsLimit))
   164  	}
   165  
   166  	if len(options.CgroupConf) > 0 && infoutil.CgroupsVersion() == "1" {
   167  		return nil, errors.New("cannot use --cgroup-conf without cgroup v2")
   168  	}
   169  
   170  	unifieds := make(map[string]string)
   171  	for _, unified := range options.CgroupConf {
   172  		splitUnified := strings.SplitN(unified, "=", 2)
   173  		if len(splitUnified) < 2 {
   174  			return nil, errors.New("--cgroup-conf must be formatted KEY=VALUE")
   175  		}
   176  		unifieds[splitUnified[0]] = splitUnified[1]
   177  	}
   178  	opts = append(opts, withUnified(unifieds))
   179  
   180  	if options.BlkioWeight != 0 && !infoutil.BlockIOWeight(options.GOptions.CgroupManager) {
   181  		log.L.Warn("kernel support for cgroup blkio weight missing, weight discarded")
   182  		options.BlkioWeight = 0
   183  	}
   184  	if options.BlkioWeight > 0 && options.BlkioWeight < 10 || options.BlkioWeight > 1000 {
   185  		return nil, errors.New("range of blkio weight is from 10 to 1000")
   186  	}
   187  	opts = append(opts, withBlkioWeight(options.BlkioWeight))
   188  
   189  	switch options.Cgroupns {
   190  	case "private":
   191  		ns := specs.LinuxNamespace{
   192  			Type: specs.CgroupNamespace,
   193  		}
   194  		opts = append(opts, oci.WithLinuxNamespace(ns))
   195  	case "host":
   196  		opts = append(opts, oci.WithHostNamespace(specs.CgroupNamespace))
   197  	default:
   198  		return nil, fmt.Errorf("unknown cgroupns mode %q", options.Cgroupns)
   199  	}
   200  
   201  	for _, f := range options.Device {
   202  		devPath, mode, err := ParseDevice(f)
   203  		if err != nil {
   204  			return nil, fmt.Errorf("failed to parse device %q: %w", f, err)
   205  		}
   206  		opts = append(opts, oci.WithLinuxDevice(devPath, mode))
   207  	}
   208  	return opts, nil
   209  }
   210  
   211  func generateCgroupPath(id, cgroupManager, cgroupParent string) (string, error) {
   212  	var (
   213  		path         string
   214  		usingSystemd = cgroupManager == "systemd"
   215  		slice        = "system.slice"
   216  		scopePrefix  = ":nerdctl:"
   217  	)
   218  	if rootlessutil.IsRootlessChild() {
   219  		slice = "user.slice"
   220  	}
   221  
   222  	if cgroupParent == "" {
   223  		if usingSystemd {
   224  			// "slice:prefix:name"
   225  			path = slice + scopePrefix + id
   226  		}
   227  		// Nothing to do for the non-systemd case if a parent wasn't supplied,
   228  		// containerd already sets a default cgroup path as /<namespace>/<containerID>
   229  		return path, nil
   230  	}
   231  
   232  	// If the user asked for a cgroup parent, we will use systemd,
   233  	// Docker uses the following:
   234  	// parent + prefix (in our case, nerdctl) + containerID.
   235  	//
   236  	// In the non systemd case, it's just /parent/containerID
   237  	if usingSystemd {
   238  		if len(cgroupParent) <= 6 || !strings.HasSuffix(cgroupParent, ".slice") {
   239  			return "", errors.New(`cgroup-parent for systemd cgroup should be a valid slice named as "xxx.slice"`)
   240  		}
   241  		path = cgroupParent + scopePrefix + id
   242  	} else {
   243  		path = filepath.Join(cgroupParent, id)
   244  	}
   245  
   246  	return path, nil
   247  }
   248  
   249  // ParseDevice parses the give device string into hostDevPath and mode(defaults: "rwm").
   250  func ParseDevice(s string) (hostDevPath string, mode string, err error) {
   251  	mode = "rwm"
   252  	split := strings.Split(s, ":")
   253  	var containerDevPath string
   254  	switch len(split) {
   255  	case 1: // e.g. "/dev/sda1"
   256  		hostDevPath = split[0]
   257  		containerDevPath = hostDevPath
   258  	case 2: // e.g., "/dev/sda1:rwm", or "/dev/sda1:/dev/sda1
   259  		hostDevPath = split[0]
   260  		if !strings.Contains(split[1], "/") {
   261  			containerDevPath = hostDevPath
   262  			mode = split[1]
   263  		} else {
   264  			containerDevPath = split[1]
   265  		}
   266  	case 3: // e.g., "/dev/sda1:/dev/sda1:rwm"
   267  		hostDevPath = split[0]
   268  		containerDevPath = split[1]
   269  		mode = split[2]
   270  	default:
   271  		return "", "", errors.New("too many `:` symbols")
   272  	}
   273  
   274  	if containerDevPath != hostDevPath {
   275  		return "", "", errors.New("changing the path inside the container is not supported yet")
   276  	}
   277  
   278  	if !filepath.IsAbs(hostDevPath) {
   279  		return "", "", fmt.Errorf("%q is not an absolute path", hostDevPath)
   280  	}
   281  
   282  	if err := validateDeviceMode(mode); err != nil {
   283  		return "", "", err
   284  	}
   285  	return hostDevPath, mode, nil
   286  }
   287  
   288  func validateDeviceMode(mode string) error {
   289  	for _, r := range mode {
   290  		switch r {
   291  		case 'r', 'w', 'm':
   292  		default:
   293  			return fmt.Errorf("invalid mode %q: unexpected rune %v", mode, r)
   294  		}
   295  	}
   296  	return nil
   297  }
   298  
   299  func withUnified(unified map[string]string) oci.SpecOpts {
   300  	return func(_ context.Context, _ oci.Client, _ *containers.Container, s *oci.Spec) (err error) {
   301  		if unified == nil {
   302  			return nil
   303  		}
   304  		s.Linux.Resources.Unified = make(map[string]string)
   305  		for k, v := range unified {
   306  			s.Linux.Resources.Unified[k] = v
   307  		}
   308  		return nil
   309  	}
   310  }
   311  
   312  func withBlkioWeight(blkioWeight uint16) oci.SpecOpts {
   313  	return func(_ context.Context, _ oci.Client, _ *containers.Container, s *oci.Spec) error {
   314  		if blkioWeight == 0 {
   315  			return nil
   316  		}
   317  		s.Linux.Resources.BlockIO = &specs.LinuxBlockIO{Weight: &blkioWeight}
   318  		return nil
   319  	}
   320  }
   321  
   322  func withCustomMemoryResources(memoryOptions customMemoryOptions) oci.SpecOpts {
   323  	return func(_ context.Context, _ oci.Client, _ *containers.Container, s *oci.Spec) error {
   324  		if s.Linux != nil {
   325  			if s.Linux.Resources == nil {
   326  				s.Linux.Resources = &specs.LinuxResources{}
   327  			}
   328  			if s.Linux.Resources.Memory == nil {
   329  				s.Linux.Resources.Memory = &specs.LinuxMemory{}
   330  			}
   331  			if memoryOptions.disableOOMKiller != nil {
   332  				s.Linux.Resources.Memory.DisableOOMKiller = memoryOptions.disableOOMKiller
   333  			}
   334  			if memoryOptions.MemorySwappiness != nil {
   335  				s.Linux.Resources.Memory.Swappiness = memoryOptions.MemorySwappiness
   336  			}
   337  			if memoryOptions.MemoryReservation != nil {
   338  				s.Linux.Resources.Memory.Reservation = memoryOptions.MemoryReservation
   339  			}
   340  		}
   341  		return nil
   342  	}
   343  }