github.com/opencontainers/umoci@v0.4.8-0.20240508124516-656e4836fb0d/oci/config/convert/default.go (about) 1 /* 2 * umoci: Umoci Modifies Open Containers' Images 3 * Copyright (C) 2016-2020 SUSE LLC 4 * 5 * Licensed under the Apache License, Version 2.0 (the "License"); 6 * you may not use this file except in compliance with the License. 7 * You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 18 package convert 19 20 import ( 21 "strings" 22 23 "github.com/blang/semver/v4" 24 rspec "github.com/opencontainers/runtime-spec/specs-go" 25 "github.com/pkg/errors" 26 ) 27 28 // FIXME: We currently use an unreleased version of the runtime-spec and so we 29 // have to modify the version string because OCI specifications use "-dev" as 30 // suffix for not-yet-released versions but in such a way that it produces 31 // incorrect behaviour. This is compounded with the fact that runtime-tools 32 // cannot handle any version other than the single version they were compiled 33 // with. 34 // 35 // For instance, 1.0.2-dev is the development version after the release of 36 // 1.0.2, but according to SemVer 1.0.2-dev should be considered older than 37 // 1.0.2 (it has a pre-release tag) -- the specs should be using 1.0.2+dev. 38 var curSpecVersion = semver.MustParse(strings.TrimSuffix(rspec.Version, "-dev")) 39 40 // Example returns an example spec file, used as a "good sane default". 41 // XXX: Really we should just use runc's directly. 42 func Example() rspec.Spec { 43 return rspec.Spec{ 44 Version: curSpecVersion.String(), 45 Root: &rspec.Root{ 46 Path: "rootfs", 47 Readonly: false, 48 }, 49 Process: &rspec.Process{ 50 Terminal: true, 51 User: rspec.User{}, 52 Args: []string{ 53 "sh", 54 }, 55 Env: []string{ 56 "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", 57 "TERM=xterm", 58 }, 59 Cwd: "/", 60 NoNewPrivileges: true, 61 Capabilities: &rspec.LinuxCapabilities{ 62 Bounding: []string{ 63 "CAP_AUDIT_WRITE", 64 "CAP_KILL", 65 "CAP_NET_BIND_SERVICE", 66 }, 67 Permitted: []string{ 68 "CAP_AUDIT_WRITE", 69 "CAP_KILL", 70 "CAP_NET_BIND_SERVICE", 71 }, 72 Inheritable: []string{ 73 "CAP_AUDIT_WRITE", 74 "CAP_KILL", 75 "CAP_NET_BIND_SERVICE", 76 }, 77 Ambient: []string{ 78 "CAP_AUDIT_WRITE", 79 "CAP_KILL", 80 "CAP_NET_BIND_SERVICE", 81 }, 82 Effective: []string{ 83 "CAP_AUDIT_WRITE", 84 "CAP_KILL", 85 "CAP_NET_BIND_SERVICE", 86 }, 87 }, 88 Rlimits: []rspec.POSIXRlimit{ 89 { 90 Type: "RLIMIT_NOFILE", 91 Hard: uint64(1024), 92 Soft: uint64(1024), 93 }, 94 }, 95 }, 96 Hostname: "umoci-default", 97 Mounts: []rspec.Mount{ 98 { 99 Destination: "/proc", 100 Type: "proc", 101 Source: "proc", 102 Options: nil, 103 }, 104 { 105 Destination: "/dev", 106 Type: "tmpfs", 107 Source: "tmpfs", 108 Options: []string{"nosuid", "strictatime", "mode=755", "size=65536k"}, 109 }, 110 { 111 Destination: "/dev/pts", 112 Type: "devpts", 113 Source: "devpts", 114 Options: []string{"nosuid", "noexec", "newinstance", "ptmxmode=0666", "mode=0620", "gid=5"}, 115 }, 116 { 117 Destination: "/dev/shm", 118 Type: "tmpfs", 119 Source: "shm", 120 Options: []string{"nosuid", "noexec", "nodev", "mode=1777", "size=65536k"}, 121 }, 122 { 123 Destination: "/dev/mqueue", 124 Type: "mqueue", 125 Source: "mqueue", 126 Options: []string{"nosuid", "noexec", "nodev"}, 127 }, 128 { 129 Destination: "/sys", 130 Type: "sysfs", 131 Source: "sysfs", 132 Options: []string{"nosuid", "noexec", "nodev", "ro"}, 133 }, 134 { 135 Destination: "/sys/fs/cgroup", 136 Type: "cgroup", 137 Source: "cgroup", 138 Options: []string{"nosuid", "noexec", "nodev", "relatime", "ro"}, 139 }, 140 }, 141 Linux: &rspec.Linux{ 142 MaskedPaths: []string{ 143 "/proc/kcore", 144 "/proc/latency_stats", 145 "/proc/timer_list", 146 "/proc/timer_stats", 147 "/proc/sched_debug", 148 "/sys/firmware", 149 "/proc/scsi", 150 }, 151 ReadonlyPaths: []string{ 152 "/proc/asound", 153 "/proc/bus", 154 "/proc/fs", 155 "/proc/irq", 156 "/proc/sys", 157 "/proc/sysrq-trigger", 158 }, 159 Resources: &rspec.LinuxResources{ 160 Devices: []rspec.LinuxDeviceCgroup{ 161 { 162 Allow: false, 163 Access: "rwm", 164 }, 165 }, 166 }, 167 Namespaces: []rspec.LinuxNamespace{ 168 { 169 Type: "cgroup", 170 }, 171 { 172 Type: "pid", 173 }, 174 { 175 Type: "network", 176 }, 177 { 178 Type: "ipc", 179 }, 180 { 181 Type: "uts", 182 }, 183 { 184 Type: "mount", 185 }, 186 }, 187 }, 188 } 189 } 190 191 // ToRootless converts a specification to a version that works with rootless 192 // containers. This is done by removing options and other settings that clash 193 // with unprivileged user namespaces. 194 func ToRootless(spec *rspec.Spec) error { 195 var namespaces []rspec.LinuxNamespace 196 197 // Remove additional groups. 198 spec.Process.User.AdditionalGids = nil 199 200 // Remove networkns from the spec. 201 for _, ns := range spec.Linux.Namespaces { 202 switch ns.Type { 203 case rspec.NetworkNamespace, rspec.UserNamespace: 204 // Do nothing. 205 default: 206 namespaces = append(namespaces, ns) 207 } 208 } 209 // Add userns to the spec. 210 namespaces = append(namespaces, rspec.LinuxNamespace{ 211 Type: rspec.UserNamespace, 212 }) 213 spec.Linux.Namespaces = namespaces 214 215 // Fix up mounts. 216 var mounts []rspec.Mount 217 for _, mount := range spec.Mounts { 218 // Ignore all mounts that are under /sys. 219 if strings.HasPrefix(mount.Destination, "/sys") { 220 continue 221 } 222 223 // Remove all gid= and uid= mappings. 224 var options []string 225 for _, option := range mount.Options { 226 if !strings.HasPrefix(option, "gid=") && !strings.HasPrefix(option, "uid=") { 227 options = append(options, option) 228 } 229 } 230 231 mount.Options = options 232 mounts = append(mounts, mount) 233 } 234 // Add the sysfs mount as an rbind. 235 mounts = append(mounts, rspec.Mount{ 236 // NOTE: "type: bind" is silly here, see opencontainers/runc#2035. 237 Type: "bind", 238 Source: "/sys", 239 Destination: "/sys", 240 Options: []string{"rbind", "nosuid", "noexec", "nodev", "ro"}, 241 }) 242 // Add /etc/resolv.conf as an rbind. 243 const resolvConf = "/etc/resolv.conf" 244 // If we are using user namespaces, then we must make sure that we don't 245 // drop any of the CL_UNPRIVILEGED "locked" flags of the source "mount" 246 // when we bind-mount. The reason for this is that at the point when runc 247 // sets up the root filesystem, it is already inside a user namespace, and 248 // thus cannot change any flags that are locked. 249 unprivOpts, err := getUnprivilegedMountFlags(resolvConf) 250 if err != nil { 251 return errors.Wrapf(err, "inspecting mount flags of %s", resolvConf) 252 } 253 mounts = append(mounts, rspec.Mount{ 254 // NOTE: "type: bind" is silly here, see opencontainers/runc#2035. 255 Type: "bind", 256 Destination: resolvConf, 257 Source: resolvConf, 258 Options: append(unprivOpts, []string{"rbind", "ro"}...), 259 }) 260 spec.Mounts = mounts 261 262 // Remove cgroup settings. 263 spec.Linux.Resources = nil 264 return nil 265 }