github.com/containerd/nerdctl@v1.7.7/pkg/cmd/container/run_cgroup_linux.go (about) 1 /* 2 Copyright The containerd Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package container 18 19 import ( 20 "context" 21 "errors" 22 "fmt" 23 "path/filepath" 24 "strings" 25 26 "github.com/containerd/containerd/containers" 27 "github.com/containerd/containerd/oci" 28 "github.com/containerd/log" 29 "github.com/containerd/nerdctl/pkg/api/types" 30 "github.com/containerd/nerdctl/pkg/infoutil" 31 "github.com/containerd/nerdctl/pkg/rootlessutil" 32 "github.com/docker/go-units" 33 "github.com/opencontainers/runtime-spec/specs-go" 34 ) 35 36 type customMemoryOptions struct { 37 MemoryReservation *int64 38 MemorySwappiness *uint64 39 disableOOMKiller *bool 40 } 41 42 func generateCgroupOpts(id string, options types.ContainerCreateOptions) ([]oci.SpecOpts, error) { 43 if options.KernelMemory != "" { 44 log.L.Warnf("The --kernel-memory flag is no longer supported. This flag is a noop.") 45 } 46 47 if options.Memory == "" && options.OomKillDisable { 48 log.L.Warn("Disabling the OOM killer on containers without setting a '-m/--memory' limit may be dangerous.") 49 } 50 51 if options.GOptions.CgroupManager == "none" { 52 if !rootlessutil.IsRootless() { 53 return nil, errors.New(`cgroup-manager "none" is only supported for rootless`) 54 } 55 56 if options.CPUs > 0.0 || options.Memory != "" || options.MemorySwap != "" || options.PidsLimit > 0 { 57 log.L.Warn(`cgroup manager is set to "none", discarding resource limit requests. ` + 58 "(Hint: enable cgroup v2 with systemd: https://rootlesscontaine.rs/getting-started/common/cgroup2/)") 59 } 60 if options.CgroupParent != "" { 61 log.L.Warnf(`cgroup manager is set to "none", ignoring cgroup parent %q`+ 62 "(Hint: enable cgroup v2 with systemd: https://rootlesscontaine.rs/getting-started/common/cgroup2/)", options.CgroupParent) 63 } 64 return []oci.SpecOpts{oci.WithCgroup("")}, nil 65 } 66 67 var opts []oci.SpecOpts // nolint: prealloc 68 path, err := generateCgroupPath(id, options.GOptions.CgroupManager, options.CgroupParent) 69 if err != nil { 70 return nil, err 71 } 72 if path != "" { 73 opts = append(opts, oci.WithCgroup(path)) 74 } 75 76 // cpus: from https://github.com/containerd/containerd/blob/v1.4.3/cmd/ctr/commands/run/run_unix.go#L187-L193 77 if options.CPUs > 0.0 { 78 var ( 79 period = uint64(100000) 80 quota = int64(options.CPUs * 100000.0) 81 ) 82 opts = append(opts, oci.WithCPUCFS(quota, period)) 83 } 84 85 if options.CPUShares != 0 { 86 opts = append(opts, oci.WithCPUShares(options.CPUShares)) 87 } 88 89 if options.CPUSetCPUs != "" { 90 opts = append(opts, oci.WithCPUs(options.CPUSetCPUs)) 91 } 92 if options.CPUQuota != -1 || options.CPUPeriod != 0 { 93 if options.CPUs > 0.0 { 94 return nil, errors.New("cpus and quota/period should be used separately") 95 } 96 opts = append(opts, oci.WithCPUCFS(options.CPUQuota, options.CPUPeriod)) 97 } 98 if options.CPUSetMems != "" { 99 opts = append(opts, oci.WithCPUsMems(options.CPUSetMems)) 100 } 101 102 var mem64 int64 103 if options.Memory != "" { 104 mem64, err = units.RAMInBytes(options.Memory) 105 if err != nil { 106 return nil, fmt.Errorf("failed to parse memory bytes %q: %w", options.Memory, err) 107 } 108 opts = append(opts, oci.WithMemoryLimit(uint64(mem64))) 109 } 110 111 var memReserve64 int64 112 if options.MemoryReservation != "" { 113 memReserve64, err = units.RAMInBytes(options.MemoryReservation) 114 if err != nil { 115 return nil, fmt.Errorf("failed to parse memory bytes %q: %w", options.MemoryReservation, err) 116 } 117 } 118 var memSwap64 int64 119 if options.MemorySwap != "" { 120 if options.MemorySwap == "-1" { 121 memSwap64 = -1 122 } else { 123 memSwap64, err = units.RAMInBytes(options.MemorySwap) 124 if err != nil { 125 return nil, fmt.Errorf("failed to parse memory-swap bytes %q: %w", options.MemorySwap, err) 126 } 127 if mem64 > 0 && memSwap64 > 0 && memSwap64 < mem64 { 128 return nil, fmt.Errorf("minimum memoryswap limit should be larger than memory limit, see usage") 129 } 130 } 131 } else { 132 // if `--memory-swap` is unset, the container can use as much swap as the `--memory` setting. 133 memSwap64 = mem64 * 2 134 } 135 if memSwap64 == 0 { 136 // if --memory-swap is set to 0, the setting is ignored, and the value is treated as unset. 137 memSwap64 = mem64 * 2 138 } 139 if memSwap64 != 0 { 140 opts = append(opts, oci.WithMemorySwap(memSwap64)) 141 } 142 if mem64 > 0 && memReserve64 > 0 && mem64 < memReserve64 { 143 return nil, fmt.Errorf("minimum memory limit can not be less than memory reservation limit, see usage") 144 } 145 if options.MemorySwappiness64 > 100 || options.MemorySwappiness64 < -1 { 146 return nil, fmt.Errorf("invalid value: %v, valid memory swappiness range is 0-100", options.MemorySwappiness64) 147 } 148 149 var customMemRes customMemoryOptions 150 if memReserve64 >= 0 && options.MemoryReservationChanged { 151 customMemRes.MemoryReservation = &memReserve64 152 } 153 if options.MemorySwappiness64 >= 0 && options.MemorySwappiness64Changed { 154 memSwapinessUint64 := uint64(options.MemorySwappiness64) 155 customMemRes.MemorySwappiness = &memSwapinessUint64 156 } 157 if options.OomKillDisable { 158 customMemRes.disableOOMKiller = &options.OomKillDisable 159 } 160 opts = append(opts, withCustomMemoryResources(customMemRes)) 161 162 if options.PidsLimit > 0 { 163 opts = append(opts, oci.WithPidsLimit(options.PidsLimit)) 164 } 165 166 if len(options.CgroupConf) > 0 && infoutil.CgroupsVersion() == "1" { 167 return nil, errors.New("cannot use --cgroup-conf without cgroup v2") 168 } 169 170 unifieds := make(map[string]string) 171 for _, unified := range options.CgroupConf { 172 splitUnified := strings.SplitN(unified, "=", 2) 173 if len(splitUnified) < 2 { 174 return nil, errors.New("--cgroup-conf must be formatted KEY=VALUE") 175 } 176 unifieds[splitUnified[0]] = splitUnified[1] 177 } 178 opts = append(opts, withUnified(unifieds)) 179 180 if options.BlkioWeight != 0 && !infoutil.BlockIOWeight(options.GOptions.CgroupManager) { 181 log.L.Warn("kernel support for cgroup blkio weight missing, weight discarded") 182 options.BlkioWeight = 0 183 } 184 if options.BlkioWeight > 0 && options.BlkioWeight < 10 || options.BlkioWeight > 1000 { 185 return nil, errors.New("range of blkio weight is from 10 to 1000") 186 } 187 opts = append(opts, withBlkioWeight(options.BlkioWeight)) 188 189 switch options.Cgroupns { 190 case "private": 191 ns := specs.LinuxNamespace{ 192 Type: specs.CgroupNamespace, 193 } 194 opts = append(opts, oci.WithLinuxNamespace(ns)) 195 case "host": 196 opts = append(opts, oci.WithHostNamespace(specs.CgroupNamespace)) 197 default: 198 return nil, fmt.Errorf("unknown cgroupns mode %q", options.Cgroupns) 199 } 200 201 for _, f := range options.Device { 202 devPath, mode, err := ParseDevice(f) 203 if err != nil { 204 return nil, fmt.Errorf("failed to parse device %q: %w", f, err) 205 } 206 opts = append(opts, oci.WithLinuxDevice(devPath, mode)) 207 } 208 return opts, nil 209 } 210 211 func generateCgroupPath(id, cgroupManager, cgroupParent string) (string, error) { 212 var ( 213 path string 214 usingSystemd = cgroupManager == "systemd" 215 slice = "system.slice" 216 scopePrefix = ":nerdctl:" 217 ) 218 if rootlessutil.IsRootlessChild() { 219 slice = "user.slice" 220 } 221 222 if cgroupParent == "" { 223 if usingSystemd { 224 // "slice:prefix:name" 225 path = slice + scopePrefix + id 226 } 227 // Nothing to do for the non-systemd case if a parent wasn't supplied, 228 // containerd already sets a default cgroup path as /<namespace>/<containerID> 229 return path, nil 230 } 231 232 // If the user asked for a cgroup parent, we will use systemd, 233 // Docker uses the following: 234 // parent + prefix (in our case, nerdctl) + containerID. 235 // 236 // In the non systemd case, it's just /parent/containerID 237 if usingSystemd { 238 if len(cgroupParent) <= 6 || !strings.HasSuffix(cgroupParent, ".slice") { 239 return "", errors.New(`cgroup-parent for systemd cgroup should be a valid slice named as "xxx.slice"`) 240 } 241 path = cgroupParent + scopePrefix + id 242 } else { 243 path = filepath.Join(cgroupParent, id) 244 } 245 246 return path, nil 247 } 248 249 // ParseDevice parses the give device string into hostDevPath and mode(defaults: "rwm"). 250 func ParseDevice(s string) (hostDevPath string, mode string, err error) { 251 mode = "rwm" 252 split := strings.Split(s, ":") 253 var containerDevPath string 254 switch len(split) { 255 case 1: // e.g. "/dev/sda1" 256 hostDevPath = split[0] 257 containerDevPath = hostDevPath 258 case 2: // e.g., "/dev/sda1:rwm", or "/dev/sda1:/dev/sda1 259 hostDevPath = split[0] 260 if !strings.Contains(split[1], "/") { 261 containerDevPath = hostDevPath 262 mode = split[1] 263 } else { 264 containerDevPath = split[1] 265 } 266 case 3: // e.g., "/dev/sda1:/dev/sda1:rwm" 267 hostDevPath = split[0] 268 containerDevPath = split[1] 269 mode = split[2] 270 default: 271 return "", "", errors.New("too many `:` symbols") 272 } 273 274 if containerDevPath != hostDevPath { 275 return "", "", errors.New("changing the path inside the container is not supported yet") 276 } 277 278 if !filepath.IsAbs(hostDevPath) { 279 return "", "", fmt.Errorf("%q is not an absolute path", hostDevPath) 280 } 281 282 if err := validateDeviceMode(mode); err != nil { 283 return "", "", err 284 } 285 return hostDevPath, mode, nil 286 } 287 288 func validateDeviceMode(mode string) error { 289 for _, r := range mode { 290 switch r { 291 case 'r', 'w', 'm': 292 default: 293 return fmt.Errorf("invalid mode %q: unexpected rune %v", mode, r) 294 } 295 } 296 return nil 297 } 298 299 func withUnified(unified map[string]string) oci.SpecOpts { 300 return func(_ context.Context, _ oci.Client, _ *containers.Container, s *oci.Spec) (err error) { 301 if unified == nil { 302 return nil 303 } 304 s.Linux.Resources.Unified = make(map[string]string) 305 for k, v := range unified { 306 s.Linux.Resources.Unified[k] = v 307 } 308 return nil 309 } 310 } 311 312 func withBlkioWeight(blkioWeight uint16) oci.SpecOpts { 313 return func(_ context.Context, _ oci.Client, _ *containers.Container, s *oci.Spec) error { 314 if blkioWeight == 0 { 315 return nil 316 } 317 s.Linux.Resources.BlockIO = &specs.LinuxBlockIO{Weight: &blkioWeight} 318 return nil 319 } 320 } 321 322 func withCustomMemoryResources(memoryOptions customMemoryOptions) oci.SpecOpts { 323 return func(_ context.Context, _ oci.Client, _ *containers.Container, s *oci.Spec) error { 324 if s.Linux != nil { 325 if s.Linux.Resources == nil { 326 s.Linux.Resources = &specs.LinuxResources{} 327 } 328 if s.Linux.Resources.Memory == nil { 329 s.Linux.Resources.Memory = &specs.LinuxMemory{} 330 } 331 if memoryOptions.disableOOMKiller != nil { 332 s.Linux.Resources.Memory.DisableOOMKiller = memoryOptions.disableOOMKiller 333 } 334 if memoryOptions.MemorySwappiness != nil { 335 s.Linux.Resources.Memory.Swappiness = memoryOptions.MemorySwappiness 336 } 337 if memoryOptions.MemoryReservation != nil { 338 s.Linux.Resources.Memory.Reservation = memoryOptions.MemoryReservation 339 } 340 } 341 return nil 342 } 343 }