github.com/opencontainers/umoci@v0.4.8-0.20240508124516-656e4836fb0d/oci/config/convert/default.go (about)

     1  /*
     2   * umoci: Umoci Modifies Open Containers' Images
     3   * Copyright (C) 2016-2020 SUSE LLC
     4   *
     5   * Licensed under the Apache License, Version 2.0 (the "License");
     6   * you may not use this file except in compliance with the License.
     7   * You may obtain a copy of the License at
     8   *
     9   *    http://www.apache.org/licenses/LICENSE-2.0
    10   *
    11   * Unless required by applicable law or agreed to in writing, software
    12   * distributed under the License is distributed on an "AS IS" BASIS,
    13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14   * See the License for the specific language governing permissions and
    15   * limitations under the License.
    16   */
    17  
    18  package convert
    19  
    20  import (
    21  	"strings"
    22  
    23  	"github.com/blang/semver/v4"
    24  	rspec "github.com/opencontainers/runtime-spec/specs-go"
    25  	"github.com/pkg/errors"
    26  )
    27  
    28  // FIXME: We currently use an unreleased version of the runtime-spec and so we
    29  // have to modify the version string because OCI specifications use "-dev" as
    30  // suffix for not-yet-released versions but in such a way that it produces
    31  // incorrect behaviour. This is compounded with the fact that runtime-tools
    32  // cannot handle any version other than the single version they were compiled
    33  // with.
    34  //
    35  // For instance, 1.0.2-dev is the development version after the release of
    36  // 1.0.2, but according to SemVer 1.0.2-dev should be considered older than
    37  // 1.0.2 (it has a pre-release tag) -- the specs should be using 1.0.2+dev.
    38  var curSpecVersion = semver.MustParse(strings.TrimSuffix(rspec.Version, "-dev"))
    39  
    40  // Example returns an example spec file, used as a "good sane default".
    41  // XXX: Really we should just use runc's directly.
    42  func Example() rspec.Spec {
    43  	return rspec.Spec{
    44  		Version: curSpecVersion.String(),
    45  		Root: &rspec.Root{
    46  			Path:     "rootfs",
    47  			Readonly: false,
    48  		},
    49  		Process: &rspec.Process{
    50  			Terminal: true,
    51  			User:     rspec.User{},
    52  			Args: []string{
    53  				"sh",
    54  			},
    55  			Env: []string{
    56  				"PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin",
    57  				"TERM=xterm",
    58  			},
    59  			Cwd:             "/",
    60  			NoNewPrivileges: true,
    61  			Capabilities: &rspec.LinuxCapabilities{
    62  				Bounding: []string{
    63  					"CAP_AUDIT_WRITE",
    64  					"CAP_KILL",
    65  					"CAP_NET_BIND_SERVICE",
    66  				},
    67  				Permitted: []string{
    68  					"CAP_AUDIT_WRITE",
    69  					"CAP_KILL",
    70  					"CAP_NET_BIND_SERVICE",
    71  				},
    72  				Inheritable: []string{
    73  					"CAP_AUDIT_WRITE",
    74  					"CAP_KILL",
    75  					"CAP_NET_BIND_SERVICE",
    76  				},
    77  				Ambient: []string{
    78  					"CAP_AUDIT_WRITE",
    79  					"CAP_KILL",
    80  					"CAP_NET_BIND_SERVICE",
    81  				},
    82  				Effective: []string{
    83  					"CAP_AUDIT_WRITE",
    84  					"CAP_KILL",
    85  					"CAP_NET_BIND_SERVICE",
    86  				},
    87  			},
    88  			Rlimits: []rspec.POSIXRlimit{
    89  				{
    90  					Type: "RLIMIT_NOFILE",
    91  					Hard: uint64(1024),
    92  					Soft: uint64(1024),
    93  				},
    94  			},
    95  		},
    96  		Hostname: "umoci-default",
    97  		Mounts: []rspec.Mount{
    98  			{
    99  				Destination: "/proc",
   100  				Type:        "proc",
   101  				Source:      "proc",
   102  				Options:     nil,
   103  			},
   104  			{
   105  				Destination: "/dev",
   106  				Type:        "tmpfs",
   107  				Source:      "tmpfs",
   108  				Options:     []string{"nosuid", "strictatime", "mode=755", "size=65536k"},
   109  			},
   110  			{
   111  				Destination: "/dev/pts",
   112  				Type:        "devpts",
   113  				Source:      "devpts",
   114  				Options:     []string{"nosuid", "noexec", "newinstance", "ptmxmode=0666", "mode=0620", "gid=5"},
   115  			},
   116  			{
   117  				Destination: "/dev/shm",
   118  				Type:        "tmpfs",
   119  				Source:      "shm",
   120  				Options:     []string{"nosuid", "noexec", "nodev", "mode=1777", "size=65536k"},
   121  			},
   122  			{
   123  				Destination: "/dev/mqueue",
   124  				Type:        "mqueue",
   125  				Source:      "mqueue",
   126  				Options:     []string{"nosuid", "noexec", "nodev"},
   127  			},
   128  			{
   129  				Destination: "/sys",
   130  				Type:        "sysfs",
   131  				Source:      "sysfs",
   132  				Options:     []string{"nosuid", "noexec", "nodev", "ro"},
   133  			},
   134  			{
   135  				Destination: "/sys/fs/cgroup",
   136  				Type:        "cgroup",
   137  				Source:      "cgroup",
   138  				Options:     []string{"nosuid", "noexec", "nodev", "relatime", "ro"},
   139  			},
   140  		},
   141  		Linux: &rspec.Linux{
   142  			MaskedPaths: []string{
   143  				"/proc/kcore",
   144  				"/proc/latency_stats",
   145  				"/proc/timer_list",
   146  				"/proc/timer_stats",
   147  				"/proc/sched_debug",
   148  				"/sys/firmware",
   149  				"/proc/scsi",
   150  			},
   151  			ReadonlyPaths: []string{
   152  				"/proc/asound",
   153  				"/proc/bus",
   154  				"/proc/fs",
   155  				"/proc/irq",
   156  				"/proc/sys",
   157  				"/proc/sysrq-trigger",
   158  			},
   159  			Resources: &rspec.LinuxResources{
   160  				Devices: []rspec.LinuxDeviceCgroup{
   161  					{
   162  						Allow:  false,
   163  						Access: "rwm",
   164  					},
   165  				},
   166  			},
   167  			Namespaces: []rspec.LinuxNamespace{
   168  				{
   169  					Type: "cgroup",
   170  				},
   171  				{
   172  					Type: "pid",
   173  				},
   174  				{
   175  					Type: "network",
   176  				},
   177  				{
   178  					Type: "ipc",
   179  				},
   180  				{
   181  					Type: "uts",
   182  				},
   183  				{
   184  					Type: "mount",
   185  				},
   186  			},
   187  		},
   188  	}
   189  }
   190  
   191  // ToRootless converts a specification to a version that works with rootless
   192  // containers. This is done by removing options and other settings that clash
   193  // with unprivileged user namespaces.
   194  func ToRootless(spec *rspec.Spec) error {
   195  	var namespaces []rspec.LinuxNamespace
   196  
   197  	// Remove additional groups.
   198  	spec.Process.User.AdditionalGids = nil
   199  
   200  	// Remove networkns from the spec.
   201  	for _, ns := range spec.Linux.Namespaces {
   202  		switch ns.Type {
   203  		case rspec.NetworkNamespace, rspec.UserNamespace:
   204  			// Do nothing.
   205  		default:
   206  			namespaces = append(namespaces, ns)
   207  		}
   208  	}
   209  	// Add userns to the spec.
   210  	namespaces = append(namespaces, rspec.LinuxNamespace{
   211  		Type: rspec.UserNamespace,
   212  	})
   213  	spec.Linux.Namespaces = namespaces
   214  
   215  	// Fix up mounts.
   216  	var mounts []rspec.Mount
   217  	for _, mount := range spec.Mounts {
   218  		// Ignore all mounts that are under /sys.
   219  		if strings.HasPrefix(mount.Destination, "/sys") {
   220  			continue
   221  		}
   222  
   223  		// Remove all gid= and uid= mappings.
   224  		var options []string
   225  		for _, option := range mount.Options {
   226  			if !strings.HasPrefix(option, "gid=") && !strings.HasPrefix(option, "uid=") {
   227  				options = append(options, option)
   228  			}
   229  		}
   230  
   231  		mount.Options = options
   232  		mounts = append(mounts, mount)
   233  	}
   234  	// Add the sysfs mount as an rbind.
   235  	mounts = append(mounts, rspec.Mount{
   236  		// NOTE: "type: bind" is silly here, see opencontainers/runc#2035.
   237  		Type:        "bind",
   238  		Source:      "/sys",
   239  		Destination: "/sys",
   240  		Options:     []string{"rbind", "nosuid", "noexec", "nodev", "ro"},
   241  	})
   242  	// Add /etc/resolv.conf as an rbind.
   243  	const resolvConf = "/etc/resolv.conf"
   244  	// If we are using user namespaces, then we must make sure that we don't
   245  	// drop any of the CL_UNPRIVILEGED "locked" flags of the source "mount"
   246  	// when we bind-mount. The reason for this is that at the point when runc
   247  	// sets up the root filesystem, it is already inside a user namespace, and
   248  	// thus cannot change any flags that are locked.
   249  	unprivOpts, err := getUnprivilegedMountFlags(resolvConf)
   250  	if err != nil {
   251  		return errors.Wrapf(err, "inspecting mount flags of %s", resolvConf)
   252  	}
   253  	mounts = append(mounts, rspec.Mount{
   254  		// NOTE: "type: bind" is silly here, see opencontainers/runc#2035.
   255  		Type:        "bind",
   256  		Destination: resolvConf,
   257  		Source:      resolvConf,
   258  		Options:     append(unprivOpts, []string{"rbind", "ro"}...),
   259  	})
   260  	spec.Mounts = mounts
   261  
   262  	// Remove cgroup settings.
   263  	spec.Linux.Resources = nil
   264  	return nil
   265  }