github.com/containerd/Containerd@v1.4.13/cmd/ctr/commands/run/run_unix.go (about)

     1  // +build !windows
     2  
     3  /*
     4     Copyright The containerd Authors.
     5  
     6     Licensed under the Apache License, Version 2.0 (the "License");
     7     you may not use this file except in compliance with the License.
     8     You may obtain a copy of the License at
     9  
    10         http://www.apache.org/licenses/LICENSE-2.0
    11  
    12     Unless required by applicable law or agreed to in writing, software
    13     distributed under the License is distributed on an "AS IS" BASIS,
    14     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    15     See the License for the specific language governing permissions and
    16     limitations under the License.
    17  */
    18  
    19  package run
    20  
    21  import (
    22  	gocontext "context"
    23  	"path/filepath"
    24  	"strconv"
    25  	"strings"
    26  
    27  	"github.com/containerd/containerd"
    28  	"github.com/containerd/containerd/cmd/ctr/commands"
    29  	"github.com/containerd/containerd/contrib/nvidia"
    30  	"github.com/containerd/containerd/contrib/seccomp"
    31  	"github.com/containerd/containerd/oci"
    32  	"github.com/containerd/containerd/platforms"
    33  	"github.com/containerd/containerd/runtime/v2/runc/options"
    34  	"github.com/opencontainers/runtime-spec/specs-go"
    35  	"github.com/pkg/errors"
    36  	"github.com/sirupsen/logrus"
    37  	"github.com/urfave/cli"
    38  )
    39  
    40  var platformRunFlags = []cli.Flag{
    41  	cli.StringFlag{
    42  		Name:  "runc-binary",
    43  		Usage: "specify runc-compatible binary",
    44  	},
    45  	cli.BoolFlag{
    46  		Name:  "runc-systemd-cgroup",
    47  		Usage: "start runc with systemd cgroup manager",
    48  	},
    49  	cli.StringFlag{
    50  		Name:  "uidmap",
    51  		Usage: "run inside a user namespace with the specified UID mapping range; specified with the format `container-uid:host-uid:length`",
    52  	},
    53  	cli.StringFlag{
    54  		Name:  "gidmap",
    55  		Usage: "run inside a user namespace with the specified GID mapping range; specified with the format `container-gid:host-gid:length`",
    56  	},
    57  	cli.BoolFlag{
    58  		Name:  "remap-labels",
    59  		Usage: "provide the user namespace ID remapping to the snapshotter via label options; requires snapshotter support",
    60  	},
    61  	cli.Float64Flag{
    62  		Name:  "cpus",
    63  		Usage: "set the CFS cpu qouta",
    64  		Value: 0.0,
    65  	},
    66  }
    67  
    68  // NewContainer creates a new container
    69  func NewContainer(ctx gocontext.Context, client *containerd.Client, context *cli.Context) (containerd.Container, error) {
    70  	var (
    71  		id     string
    72  		config = context.IsSet("config")
    73  	)
    74  	if config {
    75  		id = context.Args().First()
    76  	} else {
    77  		id = context.Args().Get(1)
    78  	}
    79  
    80  	var (
    81  		opts  []oci.SpecOpts
    82  		cOpts []containerd.NewContainerOpts
    83  		spec  containerd.NewContainerOpts
    84  	)
    85  
    86  	cOpts = append(cOpts, containerd.WithContainerLabels(commands.LabelArgs(context.StringSlice("label"))))
    87  	if config {
    88  		opts = append(opts, oci.WithSpecFromFile(context.String("config")))
    89  	} else {
    90  		var (
    91  			ref = context.Args().First()
    92  			//for container's id is Args[1]
    93  			args = context.Args()[2:]
    94  		)
    95  		opts = append(opts, oci.WithDefaultSpec(), oci.WithDefaultUnixDevices)
    96  		if ef := context.String("env-file"); ef != "" {
    97  			opts = append(opts, oci.WithEnvFile(ef))
    98  		}
    99  		opts = append(opts, oci.WithEnv(context.StringSlice("env")))
   100  		opts = append(opts, withMounts(context))
   101  
   102  		if context.Bool("rootfs") {
   103  			rootfs, err := filepath.Abs(ref)
   104  			if err != nil {
   105  				return nil, err
   106  			}
   107  			opts = append(opts, oci.WithRootFSPath(rootfs))
   108  		} else {
   109  			snapshotter := context.String("snapshotter")
   110  			var image containerd.Image
   111  			i, err := client.ImageService().Get(ctx, ref)
   112  			if err != nil {
   113  				return nil, err
   114  			}
   115  			if ps := context.String("platform"); ps != "" {
   116  				platform, err := platforms.Parse(ps)
   117  				if err != nil {
   118  					return nil, err
   119  				}
   120  				image = containerd.NewImageWithPlatform(client, i, platforms.Only(platform))
   121  			} else {
   122  				image = containerd.NewImage(client, i)
   123  			}
   124  
   125  			unpacked, err := image.IsUnpacked(ctx, snapshotter)
   126  			if err != nil {
   127  				return nil, err
   128  			}
   129  			if !unpacked {
   130  				if err := image.Unpack(ctx, snapshotter); err != nil {
   131  					return nil, err
   132  				}
   133  			}
   134  			opts = append(opts, oci.WithImageConfig(image))
   135  			cOpts = append(cOpts,
   136  				containerd.WithImage(image),
   137  				containerd.WithSnapshotter(snapshotter))
   138  			if uidmap, gidmap := context.String("uidmap"), context.String("gidmap"); uidmap != "" && gidmap != "" {
   139  				uidMap, err := parseIDMapping(uidmap)
   140  				if err != nil {
   141  					return nil, err
   142  				}
   143  				gidMap, err := parseIDMapping(gidmap)
   144  				if err != nil {
   145  					return nil, err
   146  				}
   147  				opts = append(opts,
   148  					oci.WithUserNamespace([]specs.LinuxIDMapping{uidMap}, []specs.LinuxIDMapping{gidMap}))
   149  				// use snapshotter opts or the remapped snapshot support to shift the filesystem
   150  				// currently the only snapshotter known to support the labels is fuse-overlayfs:
   151  				// https://github.com/AkihiroSuda/containerd-fuse-overlayfs
   152  				if context.Bool("remap-labels") {
   153  					cOpts = append(cOpts, containerd.WithNewSnapshot(id, image,
   154  						containerd.WithRemapperLabels(0, uidMap.HostID, 0, gidMap.HostID, uidMap.Size)))
   155  				} else {
   156  					cOpts = append(cOpts, containerd.WithRemappedSnapshot(id, image, uidMap.HostID, gidMap.HostID))
   157  				}
   158  			} else {
   159  				// Even when "read-only" is set, we don't use KindView snapshot here. (#1495)
   160  				// We pass writable snapshot to the OCI runtime, and the runtime remounts it as read-only,
   161  				// after creating some mount points on demand.
   162  				cOpts = append(cOpts, containerd.WithNewSnapshot(id, image))
   163  			}
   164  			cOpts = append(cOpts, containerd.WithImageStopSignal(image, "SIGTERM"))
   165  		}
   166  		if context.Bool("read-only") {
   167  			opts = append(opts, oci.WithRootFSReadonly())
   168  		}
   169  		if len(args) > 0 {
   170  			opts = append(opts, oci.WithProcessArgs(args...))
   171  		}
   172  		if cwd := context.String("cwd"); cwd != "" {
   173  			opts = append(opts, oci.WithProcessCwd(cwd))
   174  		}
   175  		if context.Bool("tty") {
   176  			opts = append(opts, oci.WithTTY)
   177  		}
   178  		if context.Bool("privileged") {
   179  			opts = append(opts, oci.WithPrivileged, oci.WithAllDevicesAllowed, oci.WithHostDevices)
   180  		}
   181  		if context.Bool("net-host") {
   182  			opts = append(opts, oci.WithHostNamespace(specs.NetworkNamespace), oci.WithHostHostsFile, oci.WithHostResolvconf)
   183  		}
   184  		if context.Bool("seccomp") {
   185  			opts = append(opts, seccomp.WithDefaultProfile())
   186  		}
   187  		if cpus := context.Float64("cpus"); cpus > 0.0 {
   188  			var (
   189  				period = uint64(100000)
   190  				quota  = int64(cpus * 100000.0)
   191  			)
   192  			opts = append(opts, oci.WithCPUCFS(quota, period))
   193  		}
   194  
   195  		quota := context.Int64("cpu-quota")
   196  		period := context.Uint64("cpu-period")
   197  		if quota != -1 || period != 0 {
   198  			if cpus := context.Float64("cpus"); cpus > 0.0 {
   199  				return nil, errors.New("cpus and quota/period should be used separately")
   200  			}
   201  			opts = append(opts, oci.WithCPUCFS(quota, period))
   202  		}
   203  
   204  		joinNs := context.StringSlice("with-ns")
   205  		for _, ns := range joinNs {
   206  			parts := strings.Split(ns, ":")
   207  			if len(parts) != 2 {
   208  				return nil, errors.New("joining a Linux namespace using --with-ns requires the format 'nstype:path'")
   209  			}
   210  			if !validNamespace(parts[0]) {
   211  				return nil, errors.New("the Linux namespace type specified in --with-ns is not valid: " + parts[0])
   212  			}
   213  			opts = append(opts, oci.WithLinuxNamespace(specs.LinuxNamespace{
   214  				Type: specs.LinuxNamespaceType(parts[0]),
   215  				Path: parts[1],
   216  			}))
   217  		}
   218  		if context.IsSet("gpus") {
   219  			opts = append(opts, nvidia.WithGPUs(nvidia.WithDevices(context.Int("gpus")), nvidia.WithAllCapabilities))
   220  		}
   221  		if context.IsSet("allow-new-privs") {
   222  			opts = append(opts, oci.WithNewPrivileges)
   223  		}
   224  		if context.IsSet("cgroup") {
   225  			// NOTE: can be set to "" explicitly for disabling cgroup.
   226  			opts = append(opts, oci.WithCgroup(context.String("cgroup")))
   227  		}
   228  		limit := context.Uint64("memory-limit")
   229  		if limit != 0 {
   230  			opts = append(opts, oci.WithMemoryLimit(limit))
   231  		}
   232  		for _, dev := range context.StringSlice("device") {
   233  			opts = append(opts, oci.WithLinuxDevice(dev, "rwm"))
   234  		}
   235  	}
   236  
   237  	runtimeOpts, err := getRuntimeOptions(context)
   238  	if err != nil {
   239  		return nil, err
   240  	}
   241  	cOpts = append(cOpts, containerd.WithRuntime(context.String("runtime"), runtimeOpts))
   242  
   243  	opts = append(opts, oci.WithAnnotations(commands.LabelArgs(context.StringSlice("label"))))
   244  	var s specs.Spec
   245  	spec = containerd.WithSpec(&s, opts...)
   246  
   247  	cOpts = append(cOpts, spec)
   248  
   249  	// oci.WithImageConfig (WithUsername, WithUserID) depends on access to rootfs for resolving via
   250  	// the /etc/{passwd,group} files. So cOpts needs to have precedence over opts.
   251  	return client.NewContainer(ctx, id, cOpts...)
   252  }
   253  
   254  func getRuncOptions(context *cli.Context) (*options.Options, error) {
   255  	runtimeOpts := &options.Options{}
   256  	if runcBinary := context.String("runc-binary"); runcBinary != "" {
   257  		runtimeOpts.BinaryName = runcBinary
   258  	}
   259  	if context.Bool("runc-systemd-cgroup") {
   260  		if context.String("cgroup") == "" {
   261  			// runc maps "machine.slice:foo:deadbeef" to "/machine.slice/foo-deadbeef.scope"
   262  			return nil, errors.New("option --runc-systemd-cgroup requires --cgroup to be set, e.g. \"machine.slice:foo:deadbeef\"")
   263  		}
   264  		runtimeOpts.SystemdCgroup = true
   265  	}
   266  
   267  	return runtimeOpts, nil
   268  }
   269  
   270  func getRuntimeOptions(context *cli.Context) (interface{}, error) {
   271  	// validate first
   272  	if (context.String("runc-binary") != "" || context.Bool("runc-systemd-cgroup")) &&
   273  		context.String("runtime") != "io.containerd.runc.v2" {
   274  		return nil, errors.New("specifying runc-binary and runc-systemd-cgroup is only supported for \"io.containerd.runc.v2\" runtime")
   275  	}
   276  
   277  	if context.String("runtime") == "io.containerd.runc.v2" {
   278  		return getRuncOptions(context)
   279  	}
   280  
   281  	return nil, nil
   282  }
   283  
   284  func getNewTaskOpts(context *cli.Context) []containerd.NewTaskOpts {
   285  	var (
   286  		tOpts []containerd.NewTaskOpts
   287  	)
   288  	if context.Bool("no-pivot") {
   289  		tOpts = append(tOpts, containerd.WithNoPivotRoot)
   290  	}
   291  	if uidmap := context.String("uidmap"); uidmap != "" {
   292  		uidMap, err := parseIDMapping(uidmap)
   293  		if err != nil {
   294  			logrus.WithError(err).Warn("unable to parse uidmap; defaulting to uid 0 IO ownership")
   295  		}
   296  		tOpts = append(tOpts, containerd.WithUIDOwner(uidMap.HostID))
   297  	}
   298  	if gidmap := context.String("gidmap"); gidmap != "" {
   299  		gidMap, err := parseIDMapping(gidmap)
   300  		if err != nil {
   301  			logrus.WithError(err).Warn("unable to parse gidmap; defaulting to gid 0 IO ownership")
   302  		}
   303  		tOpts = append(tOpts, containerd.WithGIDOwner(gidMap.HostID))
   304  	}
   305  	return tOpts
   306  }
   307  
   308  func parseIDMapping(mapping string) (specs.LinuxIDMapping, error) {
   309  	parts := strings.Split(mapping, ":")
   310  	if len(parts) != 3 {
   311  		return specs.LinuxIDMapping{}, errors.New("user namespace mappings require the format `container-id:host-id:size`")
   312  	}
   313  	cID, err := strconv.ParseUint(parts[0], 0, 32)
   314  	if err != nil {
   315  		return specs.LinuxIDMapping{}, errors.Wrapf(err, "invalid container id for user namespace remapping")
   316  	}
   317  	hID, err := strconv.ParseUint(parts[1], 0, 32)
   318  	if err != nil {
   319  		return specs.LinuxIDMapping{}, errors.Wrapf(err, "invalid host id for user namespace remapping")
   320  	}
   321  	size, err := strconv.ParseUint(parts[2], 0, 32)
   322  	if err != nil {
   323  		return specs.LinuxIDMapping{}, errors.Wrapf(err, "invalid size for user namespace remapping")
   324  	}
   325  	return specs.LinuxIDMapping{
   326  		ContainerID: uint32(cID),
   327  		HostID:      uint32(hID),
   328  		Size:        uint32(size),
   329  	}, nil
   330  }
   331  
   332  func validNamespace(ns string) bool {
   333  	linuxNs := specs.LinuxNamespaceType(ns)
   334  	switch linuxNs {
   335  	case specs.PIDNamespace,
   336  		specs.NetworkNamespace,
   337  		specs.UTSNamespace,
   338  		specs.MountNamespace,
   339  		specs.UserNamespace,
   340  		specs.IPCNamespace,
   341  		specs.CgroupNamespace:
   342  		return true
   343  	default:
   344  		return false
   345  	}
   346  }