github.com/containerd/containerd@v22.0.0-20200918172823-438c87b8e050+incompatible/cmd/ctr/commands/run/run_unix.go (about) 1 // +build !windows 2 3 /* 4 Copyright The containerd Authors. 5 6 Licensed under the Apache License, Version 2.0 (the "License"); 7 you may not use this file except in compliance with the License. 8 You may obtain a copy of the License at 9 10 http://www.apache.org/licenses/LICENSE-2.0 11 12 Unless required by applicable law or agreed to in writing, software 13 distributed under the License is distributed on an "AS IS" BASIS, 14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 See the License for the specific language governing permissions and 16 limitations under the License. 17 */ 18 19 package run 20 21 import ( 22 gocontext "context" 23 "fmt" 24 "path/filepath" 25 "strconv" 26 "strings" 27 28 "github.com/containerd/containerd" 29 "github.com/containerd/containerd/cmd/ctr/commands" 30 "github.com/containerd/containerd/contrib/nvidia" 31 "github.com/containerd/containerd/contrib/seccomp" 32 "github.com/containerd/containerd/oci" 33 "github.com/containerd/containerd/platforms" 34 "github.com/containerd/containerd/runtime/v2/runc/options" 35 "github.com/opencontainers/runtime-spec/specs-go" 36 "github.com/pkg/errors" 37 "github.com/sirupsen/logrus" 38 "github.com/urfave/cli" 39 ) 40 41 var platformRunFlags = []cli.Flag{ 42 cli.StringFlag{ 43 Name: "runc-binary", 44 Usage: "specify runc-compatible binary", 45 }, 46 cli.StringFlag{ 47 Name: "runc-root", 48 Usage: "specify runc-compatible root", 49 }, 50 cli.BoolFlag{ 51 Name: "runc-systemd-cgroup", 52 Usage: "start runc with systemd cgroup manager", 53 }, 54 cli.StringFlag{ 55 Name: "uidmap", 56 Usage: "run inside a user namespace with the specified UID mapping range; specified with the format `container-uid:host-uid:length`", 57 }, 58 cli.StringFlag{ 59 Name: "gidmap", 60 Usage: "run inside a user namespace with the specified GID mapping range; specified with the format `container-gid:host-gid:length`", 61 }, 62 cli.BoolFlag{ 63 Name: "remap-labels", 64 Usage: "provide the user namespace ID remapping to the snapshotter via label options; requires snapshotter support", 65 }, 66 cli.Float64Flag{ 67 Name: "cpus", 68 Usage: "set the CFS cpu quota", 69 Value: 0.0, 70 }, 71 } 72 73 // NewContainer creates a new container 74 func NewContainer(ctx gocontext.Context, client *containerd.Client, context *cli.Context) (containerd.Container, error) { 75 var ( 76 id string 77 config = context.IsSet("config") 78 ) 79 if config { 80 id = context.Args().First() 81 } else { 82 id = context.Args().Get(1) 83 } 84 85 var ( 86 opts []oci.SpecOpts 87 cOpts []containerd.NewContainerOpts 88 spec containerd.NewContainerOpts 89 ) 90 91 cOpts = append(cOpts, containerd.WithContainerLabels(commands.LabelArgs(context.StringSlice("label")))) 92 if config { 93 opts = append(opts, oci.WithSpecFromFile(context.String("config"))) 94 } else { 95 var ( 96 ref = context.Args().First() 97 //for container's id is Args[1] 98 args = context.Args()[2:] 99 ) 100 opts = append(opts, oci.WithDefaultSpec(), oci.WithDefaultUnixDevices) 101 if ef := context.String("env-file"); ef != "" { 102 opts = append(opts, oci.WithEnvFile(ef)) 103 } 104 opts = append(opts, oci.WithEnv(context.StringSlice("env"))) 105 opts = append(opts, withMounts(context)) 106 107 if context.Bool("rootfs") { 108 rootfs, err := filepath.Abs(ref) 109 if err != nil { 110 return nil, err 111 } 112 opts = append(opts, oci.WithRootFSPath(rootfs)) 113 } else { 114 snapshotter := context.String("snapshotter") 115 var image containerd.Image 116 i, err := client.ImageService().Get(ctx, ref) 117 if err != nil { 118 return nil, err 119 } 120 if ps := context.String("platform"); ps != "" { 121 platform, err := platforms.Parse(ps) 122 if err != nil { 123 return nil, err 124 } 125 image = containerd.NewImageWithPlatform(client, i, platforms.Only(platform)) 126 } else { 127 image = containerd.NewImage(client, i) 128 } 129 130 unpacked, err := image.IsUnpacked(ctx, snapshotter) 131 if err != nil { 132 return nil, err 133 } 134 if !unpacked { 135 if err := image.Unpack(ctx, snapshotter); err != nil { 136 return nil, err 137 } 138 } 139 opts = append(opts, oci.WithImageConfig(image)) 140 cOpts = append(cOpts, 141 containerd.WithImage(image), 142 containerd.WithSnapshotter(snapshotter)) 143 if uidmap, gidmap := context.String("uidmap"), context.String("gidmap"); uidmap != "" && gidmap != "" { 144 uidMap, err := parseIDMapping(uidmap) 145 if err != nil { 146 return nil, err 147 } 148 gidMap, err := parseIDMapping(gidmap) 149 if err != nil { 150 return nil, err 151 } 152 opts = append(opts, 153 oci.WithUserNamespace([]specs.LinuxIDMapping{uidMap}, []specs.LinuxIDMapping{gidMap})) 154 // use snapshotter opts or the remapped snapshot support to shift the filesystem 155 // currently the only snapshotter known to support the labels is fuse-overlayfs: 156 // https://github.com/AkihiroSuda/containerd-fuse-overlayfs 157 if context.Bool("remap-labels") { 158 cOpts = append(cOpts, containerd.WithNewSnapshot(id, image, 159 containerd.WithRemapperLabels(0, uidMap.HostID, 0, gidMap.HostID, uidMap.Size))) 160 } else { 161 cOpts = append(cOpts, containerd.WithRemappedSnapshot(id, image, uidMap.HostID, gidMap.HostID)) 162 } 163 } else { 164 // Even when "read-only" is set, we don't use KindView snapshot here. (#1495) 165 // We pass writable snapshot to the OCI runtime, and the runtime remounts it as read-only, 166 // after creating some mount points on demand. 167 cOpts = append(cOpts, containerd.WithNewSnapshot(id, image)) 168 } 169 cOpts = append(cOpts, containerd.WithImageStopSignal(image, "SIGTERM")) 170 } 171 if context.Bool("read-only") { 172 opts = append(opts, oci.WithRootFSReadonly()) 173 } 174 if len(args) > 0 { 175 opts = append(opts, oci.WithProcessArgs(args...)) 176 } 177 if cwd := context.String("cwd"); cwd != "" { 178 opts = append(opts, oci.WithProcessCwd(cwd)) 179 } 180 if context.Bool("tty") { 181 opts = append(opts, oci.WithTTY) 182 } 183 if context.Bool("privileged") { 184 opts = append(opts, oci.WithPrivileged, oci.WithAllDevicesAllowed, oci.WithHostDevices) 185 } 186 if context.Bool("net-host") { 187 opts = append(opts, oci.WithHostNamespace(specs.NetworkNamespace), oci.WithHostHostsFile, oci.WithHostResolvconf) 188 } 189 190 seccompProfile := context.String("seccomp-profile") 191 192 if !context.Bool("seccomp") && seccompProfile != "" { 193 return nil, fmt.Errorf("seccomp must be set to true, if using a custom seccomp-profile") 194 } 195 196 if context.Bool("seccomp") { 197 if seccompProfile != "" { 198 opts = append(opts, seccomp.WithProfile(seccompProfile)) 199 } else { 200 opts = append(opts, seccomp.WithDefaultProfile()) 201 } 202 } 203 204 if cpus := context.Float64("cpus"); cpus > 0.0 { 205 var ( 206 period = uint64(100000) 207 quota = int64(cpus * 100000.0) 208 ) 209 opts = append(opts, oci.WithCPUCFS(quota, period)) 210 } 211 212 quota := context.Int64("cpu-quota") 213 period := context.Uint64("cpu-period") 214 if quota != -1 || period != 0 { 215 if cpus := context.Float64("cpus"); cpus > 0.0 { 216 return nil, errors.New("cpus and quota/period should be used separately") 217 } 218 opts = append(opts, oci.WithCPUCFS(quota, period)) 219 } 220 221 joinNs := context.StringSlice("with-ns") 222 for _, ns := range joinNs { 223 parts := strings.Split(ns, ":") 224 if len(parts) != 2 { 225 return nil, errors.New("joining a Linux namespace using --with-ns requires the format 'nstype:path'") 226 } 227 if !validNamespace(parts[0]) { 228 return nil, errors.New("the Linux namespace type specified in --with-ns is not valid: " + parts[0]) 229 } 230 opts = append(opts, oci.WithLinuxNamespace(specs.LinuxNamespace{ 231 Type: specs.LinuxNamespaceType(parts[0]), 232 Path: parts[1], 233 })) 234 } 235 if context.IsSet("gpus") { 236 opts = append(opts, nvidia.WithGPUs(nvidia.WithDevices(context.Int("gpus")), nvidia.WithAllCapabilities)) 237 } 238 if context.IsSet("allow-new-privs") { 239 opts = append(opts, oci.WithNewPrivileges) 240 } 241 if context.IsSet("cgroup") { 242 // NOTE: can be set to "" explicitly for disabling cgroup. 243 opts = append(opts, oci.WithCgroup(context.String("cgroup"))) 244 } 245 limit := context.Uint64("memory-limit") 246 if limit != 0 { 247 opts = append(opts, oci.WithMemoryLimit(limit)) 248 } 249 for _, dev := range context.StringSlice("device") { 250 opts = append(opts, oci.WithLinuxDevice(dev, "rwm")) 251 } 252 } 253 254 runtimeOpts, err := getRuntimeOptions(context) 255 if err != nil { 256 return nil, err 257 } 258 cOpts = append(cOpts, containerd.WithRuntime(context.String("runtime"), runtimeOpts)) 259 260 opts = append(opts, oci.WithAnnotations(commands.LabelArgs(context.StringSlice("label")))) 261 var s specs.Spec 262 spec = containerd.WithSpec(&s, opts...) 263 264 cOpts = append(cOpts, spec) 265 266 // oci.WithImageConfig (WithUsername, WithUserID) depends on access to rootfs for resolving via 267 // the /etc/{passwd,group} files. So cOpts needs to have precedence over opts. 268 return client.NewContainer(ctx, id, cOpts...) 269 } 270 271 func getRuncOptions(context *cli.Context) (*options.Options, error) { 272 runtimeOpts := &options.Options{} 273 if runcBinary := context.String("runc-binary"); runcBinary != "" { 274 runtimeOpts.BinaryName = runcBinary 275 } 276 if context.Bool("runc-systemd-cgroup") { 277 if context.String("cgroup") == "" { 278 // runc maps "machine.slice:foo:deadbeef" to "/machine.slice/foo-deadbeef.scope" 279 return nil, errors.New("option --runc-systemd-cgroup requires --cgroup to be set, e.g. \"machine.slice:foo:deadbeef\"") 280 } 281 runtimeOpts.SystemdCgroup = true 282 } 283 if root := context.String("runc-root"); root != "" { 284 runtimeOpts.Root = root 285 } 286 287 return runtimeOpts, nil 288 } 289 290 func getRuntimeOptions(context *cli.Context) (interface{}, error) { 291 // validate first 292 if (context.String("runc-binary") != "" || context.Bool("runc-systemd-cgroup")) && 293 context.String("runtime") != "io.containerd.runc.v2" { 294 return nil, errors.New("specifying runc-binary and runc-systemd-cgroup is only supported for \"io.containerd.runc.v2\" runtime") 295 } 296 297 if context.String("runtime") == "io.containerd.runc.v2" { 298 return getRuncOptions(context) 299 } 300 301 return nil, nil 302 } 303 304 func getNewTaskOpts(context *cli.Context) []containerd.NewTaskOpts { 305 var ( 306 tOpts []containerd.NewTaskOpts 307 ) 308 if context.Bool("no-pivot") { 309 tOpts = append(tOpts, containerd.WithNoPivotRoot) 310 } 311 if uidmap := context.String("uidmap"); uidmap != "" { 312 uidMap, err := parseIDMapping(uidmap) 313 if err != nil { 314 logrus.WithError(err).Warn("unable to parse uidmap; defaulting to uid 0 IO ownership") 315 } 316 tOpts = append(tOpts, containerd.WithUIDOwner(uidMap.HostID)) 317 } 318 if gidmap := context.String("gidmap"); gidmap != "" { 319 gidMap, err := parseIDMapping(gidmap) 320 if err != nil { 321 logrus.WithError(err).Warn("unable to parse gidmap; defaulting to gid 0 IO ownership") 322 } 323 tOpts = append(tOpts, containerd.WithGIDOwner(gidMap.HostID)) 324 } 325 return tOpts 326 } 327 328 func parseIDMapping(mapping string) (specs.LinuxIDMapping, error) { 329 parts := strings.Split(mapping, ":") 330 if len(parts) != 3 { 331 return specs.LinuxIDMapping{}, errors.New("user namespace mappings require the format `container-id:host-id:size`") 332 } 333 cID, err := strconv.ParseUint(parts[0], 0, 32) 334 if err != nil { 335 return specs.LinuxIDMapping{}, errors.Wrapf(err, "invalid container id for user namespace remapping") 336 } 337 hID, err := strconv.ParseUint(parts[1], 0, 32) 338 if err != nil { 339 return specs.LinuxIDMapping{}, errors.Wrapf(err, "invalid host id for user namespace remapping") 340 } 341 size, err := strconv.ParseUint(parts[2], 0, 32) 342 if err != nil { 343 return specs.LinuxIDMapping{}, errors.Wrapf(err, "invalid size for user namespace remapping") 344 } 345 return specs.LinuxIDMapping{ 346 ContainerID: uint32(cID), 347 HostID: uint32(hID), 348 Size: uint32(size), 349 }, nil 350 } 351 352 func validNamespace(ns string) bool { 353 linuxNs := specs.LinuxNamespaceType(ns) 354 switch linuxNs { 355 case specs.PIDNamespace, 356 specs.NetworkNamespace, 357 specs.UTSNamespace, 358 specs.MountNamespace, 359 specs.UserNamespace, 360 specs.IPCNamespace, 361 specs.CgroupNamespace: 362 return true 363 default: 364 return false 365 } 366 }