github.com/lalkh/containerd@v1.4.3/cmd/ctr/commands/run/run_unix.go (about) 1 // +build !windows 2 3 /* 4 Copyright The containerd Authors. 5 6 Licensed under the Apache License, Version 2.0 (the "License"); 7 you may not use this file except in compliance with the License. 8 You may obtain a copy of the License at 9 10 http://www.apache.org/licenses/LICENSE-2.0 11 12 Unless required by applicable law or agreed to in writing, software 13 distributed under the License is distributed on an "AS IS" BASIS, 14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 See the License for the specific language governing permissions and 16 limitations under the License. 17 */ 18 19 package run 20 21 import ( 22 gocontext "context" 23 "path/filepath" 24 "strconv" 25 "strings" 26 27 "github.com/containerd/containerd" 28 "github.com/containerd/containerd/cmd/ctr/commands" 29 "github.com/containerd/containerd/contrib/nvidia" 30 "github.com/containerd/containerd/contrib/seccomp" 31 "github.com/containerd/containerd/oci" 32 "github.com/containerd/containerd/platforms" 33 "github.com/containerd/containerd/runtime/v2/runc/options" 34 "github.com/opencontainers/runtime-spec/specs-go" 35 "github.com/pkg/errors" 36 "github.com/sirupsen/logrus" 37 "github.com/urfave/cli" 38 ) 39 40 var platformRunFlags = []cli.Flag{ 41 cli.StringFlag{ 42 Name: "runc-binary", 43 Usage: "specify runc-compatible binary", 44 }, 45 cli.BoolFlag{ 46 Name: "runc-systemd-cgroup", 47 Usage: "start runc with systemd cgroup manager", 48 }, 49 cli.StringFlag{ 50 Name: "uidmap", 51 Usage: "run inside a user namespace with the specified UID mapping range; specified with the format `container-uid:host-uid:length`", 52 }, 53 cli.StringFlag{ 54 Name: "gidmap", 55 Usage: "run inside a user namespace with the specified GID mapping range; specified with the format `container-gid:host-gid:length`", 56 }, 57 cli.BoolFlag{ 58 Name: "remap-labels", 59 Usage: "provide the user namespace ID remapping to the snapshotter via label options; requires snapshotter support", 60 }, 61 cli.Float64Flag{ 62 Name: "cpus", 63 Usage: "set the CFS cpu qouta", 64 Value: 0.0, 65 }, 66 } 67 68 // NewContainer creates a new container 69 func NewContainer(ctx gocontext.Context, client *containerd.Client, context *cli.Context) (containerd.Container, error) { 70 var ( 71 id string 72 config = context.IsSet("config") 73 ) 74 if config { 75 id = context.Args().First() 76 } else { 77 id = context.Args().Get(1) 78 } 79 80 var ( 81 opts []oci.SpecOpts 82 cOpts []containerd.NewContainerOpts 83 spec containerd.NewContainerOpts 84 ) 85 86 cOpts = append(cOpts, containerd.WithContainerLabels(commands.LabelArgs(context.StringSlice("label")))) 87 if config { 88 opts = append(opts, oci.WithSpecFromFile(context.String("config"))) 89 } else { 90 var ( 91 ref = context.Args().First() 92 //for container's id is Args[1] 93 args = context.Args()[2:] 94 ) 95 opts = append(opts, oci.WithDefaultSpec(), oci.WithDefaultUnixDevices) 96 if ef := context.String("env-file"); ef != "" { 97 opts = append(opts, oci.WithEnvFile(ef)) 98 } 99 opts = append(opts, oci.WithEnv(context.StringSlice("env"))) 100 opts = append(opts, withMounts(context)) 101 102 if context.Bool("rootfs") { 103 rootfs, err := filepath.Abs(ref) 104 if err != nil { 105 return nil, err 106 } 107 opts = append(opts, oci.WithRootFSPath(rootfs)) 108 } else { 109 snapshotter := context.String("snapshotter") 110 var image containerd.Image 111 i, err := client.ImageService().Get(ctx, ref) 112 if err != nil { 113 return nil, err 114 } 115 if ps := context.String("platform"); ps != "" { 116 platform, err := platforms.Parse(ps) 117 if err != nil { 118 return nil, err 119 } 120 image = containerd.NewImageWithPlatform(client, i, platforms.Only(platform)) 121 } else { 122 image = containerd.NewImage(client, i) 123 } 124 125 unpacked, err := image.IsUnpacked(ctx, snapshotter) 126 if err != nil { 127 return nil, err 128 } 129 if !unpacked { 130 if err := image.Unpack(ctx, snapshotter); err != nil { 131 return nil, err 132 } 133 } 134 opts = append(opts, oci.WithImageConfig(image)) 135 cOpts = append(cOpts, 136 containerd.WithImage(image), 137 containerd.WithSnapshotter(snapshotter)) 138 if uidmap, gidmap := context.String("uidmap"), context.String("gidmap"); uidmap != "" && gidmap != "" { 139 uidMap, err := parseIDMapping(uidmap) 140 if err != nil { 141 return nil, err 142 } 143 gidMap, err := parseIDMapping(gidmap) 144 if err != nil { 145 return nil, err 146 } 147 opts = append(opts, 148 oci.WithUserNamespace([]specs.LinuxIDMapping{uidMap}, []specs.LinuxIDMapping{gidMap})) 149 // use snapshotter opts or the remapped snapshot support to shift the filesystem 150 // currently the only snapshotter known to support the labels is fuse-overlayfs: 151 // https://github.com/AkihiroSuda/containerd-fuse-overlayfs 152 if context.Bool("remap-labels") { 153 cOpts = append(cOpts, containerd.WithNewSnapshot(id, image, 154 containerd.WithRemapperLabels(0, uidMap.HostID, 0, gidMap.HostID, uidMap.Size))) 155 } else { 156 cOpts = append(cOpts, containerd.WithRemappedSnapshot(id, image, uidMap.HostID, gidMap.HostID)) 157 } 158 } else { 159 // Even when "read-only" is set, we don't use KindView snapshot here. (#1495) 160 // We pass writable snapshot to the OCI runtime, and the runtime remounts it as read-only, 161 // after creating some mount points on demand. 162 cOpts = append(cOpts, containerd.WithNewSnapshot(id, image)) 163 } 164 cOpts = append(cOpts, containerd.WithImageStopSignal(image, "SIGTERM")) 165 } 166 if context.Bool("read-only") { 167 opts = append(opts, oci.WithRootFSReadonly()) 168 } 169 if len(args) > 0 { 170 opts = append(opts, oci.WithProcessArgs(args...)) 171 } 172 if cwd := context.String("cwd"); cwd != "" { 173 opts = append(opts, oci.WithProcessCwd(cwd)) 174 } 175 if context.Bool("tty") { 176 opts = append(opts, oci.WithTTY) 177 } 178 if context.Bool("privileged") { 179 opts = append(opts, oci.WithPrivileged, oci.WithAllDevicesAllowed, oci.WithHostDevices) 180 } 181 if context.Bool("net-host") { 182 opts = append(opts, oci.WithHostNamespace(specs.NetworkNamespace), oci.WithHostHostsFile, oci.WithHostResolvconf) 183 } 184 if context.Bool("seccomp") { 185 opts = append(opts, seccomp.WithDefaultProfile()) 186 } 187 if cpus := context.Float64("cpus"); cpus > 0.0 { 188 var ( 189 period = uint64(100000) 190 quota = int64(cpus * 100000.0) 191 ) 192 opts = append(opts, oci.WithCPUCFS(quota, period)) 193 } 194 195 quota := context.Int64("cpu-quota") 196 period := context.Uint64("cpu-period") 197 if quota != -1 || period != 0 { 198 if cpus := context.Float64("cpus"); cpus > 0.0 { 199 return nil, errors.New("cpus and quota/period should be used separately") 200 } 201 opts = append(opts, oci.WithCPUCFS(quota, period)) 202 } 203 204 joinNs := context.StringSlice("with-ns") 205 for _, ns := range joinNs { 206 parts := strings.Split(ns, ":") 207 if len(parts) != 2 { 208 return nil, errors.New("joining a Linux namespace using --with-ns requires the format 'nstype:path'") 209 } 210 if !validNamespace(parts[0]) { 211 return nil, errors.New("the Linux namespace type specified in --with-ns is not valid: " + parts[0]) 212 } 213 opts = append(opts, oci.WithLinuxNamespace(specs.LinuxNamespace{ 214 Type: specs.LinuxNamespaceType(parts[0]), 215 Path: parts[1], 216 })) 217 } 218 if context.IsSet("gpus") { 219 opts = append(opts, nvidia.WithGPUs(nvidia.WithDevices(context.Int("gpus")), nvidia.WithAllCapabilities)) 220 } 221 if context.IsSet("allow-new-privs") { 222 opts = append(opts, oci.WithNewPrivileges) 223 } 224 if context.IsSet("cgroup") { 225 // NOTE: can be set to "" explicitly for disabling cgroup. 226 opts = append(opts, oci.WithCgroup(context.String("cgroup"))) 227 } 228 limit := context.Uint64("memory-limit") 229 if limit != 0 { 230 opts = append(opts, oci.WithMemoryLimit(limit)) 231 } 232 for _, dev := range context.StringSlice("device") { 233 opts = append(opts, oci.WithLinuxDevice(dev, "rwm")) 234 } 235 } 236 237 runtimeOpts, err := getRuntimeOptions(context) 238 if err != nil { 239 return nil, err 240 } 241 cOpts = append(cOpts, containerd.WithRuntime(context.String("runtime"), runtimeOpts)) 242 243 opts = append(opts, oci.WithAnnotations(commands.LabelArgs(context.StringSlice("label")))) 244 var s specs.Spec 245 spec = containerd.WithSpec(&s, opts...) 246 247 cOpts = append(cOpts, spec) 248 249 // oci.WithImageConfig (WithUsername, WithUserID) depends on access to rootfs for resolving via 250 // the /etc/{passwd,group} files. So cOpts needs to have precedence over opts. 251 return client.NewContainer(ctx, id, cOpts...) 252 } 253 254 func getRuncOptions(context *cli.Context) (*options.Options, error) { 255 runtimeOpts := &options.Options{} 256 if runcBinary := context.String("runc-binary"); runcBinary != "" { 257 runtimeOpts.BinaryName = runcBinary 258 } 259 if context.Bool("runc-systemd-cgroup") { 260 if context.String("cgroup") == "" { 261 // runc maps "machine.slice:foo:deadbeef" to "/machine.slice/foo-deadbeef.scope" 262 return nil, errors.New("option --runc-systemd-cgroup requires --cgroup to be set, e.g. \"machine.slice:foo:deadbeef\"") 263 } 264 runtimeOpts.SystemdCgroup = true 265 } 266 267 return runtimeOpts, nil 268 } 269 270 func getRuntimeOptions(context *cli.Context) (interface{}, error) { 271 // validate first 272 if (context.String("runc-binary") != "" || context.Bool("runc-systemd-cgroup")) && 273 context.String("runtime") != "io.containerd.runc.v2" { 274 return nil, errors.New("specifying runc-binary and runc-systemd-cgroup is only supported for \"io.containerd.runc.v2\" runtime") 275 } 276 277 if context.String("runtime") == "io.containerd.runc.v2" { 278 return getRuncOptions(context) 279 } 280 281 return nil, nil 282 } 283 284 func getNewTaskOpts(context *cli.Context) []containerd.NewTaskOpts { 285 var ( 286 tOpts []containerd.NewTaskOpts 287 ) 288 if context.Bool("no-pivot") { 289 tOpts = append(tOpts, containerd.WithNoPivotRoot) 290 } 291 if uidmap := context.String("uidmap"); uidmap != "" { 292 uidMap, err := parseIDMapping(uidmap) 293 if err != nil { 294 logrus.WithError(err).Warn("unable to parse uidmap; defaulting to uid 0 IO ownership") 295 } 296 tOpts = append(tOpts, containerd.WithUIDOwner(uidMap.HostID)) 297 } 298 if gidmap := context.String("gidmap"); gidmap != "" { 299 gidMap, err := parseIDMapping(gidmap) 300 if err != nil { 301 logrus.WithError(err).Warn("unable to parse gidmap; defaulting to gid 0 IO ownership") 302 } 303 tOpts = append(tOpts, containerd.WithGIDOwner(gidMap.HostID)) 304 } 305 return tOpts 306 } 307 308 func parseIDMapping(mapping string) (specs.LinuxIDMapping, error) { 309 parts := strings.Split(mapping, ":") 310 if len(parts) != 3 { 311 return specs.LinuxIDMapping{}, errors.New("user namespace mappings require the format `container-id:host-id:size`") 312 } 313 cID, err := strconv.ParseUint(parts[0], 0, 32) 314 if err != nil { 315 return specs.LinuxIDMapping{}, errors.Wrapf(err, "invalid container id for user namespace remapping") 316 } 317 hID, err := strconv.ParseUint(parts[1], 0, 32) 318 if err != nil { 319 return specs.LinuxIDMapping{}, errors.Wrapf(err, "invalid host id for user namespace remapping") 320 } 321 size, err := strconv.ParseUint(parts[2], 0, 32) 322 if err != nil { 323 return specs.LinuxIDMapping{}, errors.Wrapf(err, "invalid size for user namespace remapping") 324 } 325 return specs.LinuxIDMapping{ 326 ContainerID: uint32(cID), 327 HostID: uint32(hID), 328 Size: uint32(size), 329 }, nil 330 } 331 332 func validNamespace(ns string) bool { 333 linuxNs := specs.LinuxNamespaceType(ns) 334 switch linuxNs { 335 case specs.PIDNamespace, 336 specs.NetworkNamespace, 337 specs.UTSNamespace, 338 specs.MountNamespace, 339 specs.UserNamespace, 340 specs.IPCNamespace, 341 specs.CgroupNamespace: 342 return true 343 default: 344 return false 345 } 346 }