github.com/dctrud/umoci@v0.4.3-0.20191016193643-05a1d37de015/oci/layer/unpack.go (about) 1 /* 2 * umoci: Umoci Modifies Open Containers' Images 3 * Copyright (C) 2016, 2017, 2018 SUSE LLC. 4 * 5 * Licensed under the Apache License, Version 2.0 (the "License"); 6 * you may not use this file except in compliance with the License. 7 * You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 18 package layer 19 20 import ( 21 "archive/tar" 22 // Import is necessary for go-digest. 23 _ "crypto/sha256" 24 "fmt" 25 "io" 26 "io/ioutil" 27 "os" 28 "path/filepath" 29 "runtime" 30 "strings" 31 "time" 32 33 "github.com/apex/log" 34 gzip "github.com/klauspost/pgzip" 35 "github.com/openSUSE/umoci/oci/cas" 36 "github.com/openSUSE/umoci/oci/casext" 37 iconv "github.com/openSUSE/umoci/oci/config/convert" 38 "github.com/openSUSE/umoci/pkg/fseval" 39 "github.com/openSUSE/umoci/pkg/idtools" 40 "github.com/openSUSE/umoci/pkg/system" 41 "github.com/opencontainers/go-digest" 42 ispec "github.com/opencontainers/image-spec/specs-go/v1" 43 rspec "github.com/opencontainers/runtime-spec/specs-go" 44 rgen "github.com/opencontainers/runtime-tools/generate" 45 "github.com/pkg/errors" 46 "golang.org/x/net/context" 47 "golang.org/x/sys/unix" 48 ) 49 50 // UnpackLayer unpacks the tar stream representing an OCI layer at the given 51 // root. It ensures that the state of the root is as close as possible to the 52 // state used to create the layer. If an error is returned, the state of root 53 // is undefined (unpacking is not guaranteed to be atomic). 54 func UnpackLayer(root string, layer io.Reader, opt *MapOptions) error { 55 var mapOptions MapOptions 56 if opt != nil { 57 mapOptions = *opt 58 } 59 te := NewTarExtractor(mapOptions) 60 tr := tar.NewReader(layer) 61 for { 62 hdr, err := tr.Next() 63 if err == io.EOF { 64 break 65 } 66 if err != nil { 67 return errors.Wrap(err, "read next entry") 68 } 69 if err := te.UnpackEntry(root, hdr, tr); err != nil { 70 return errors.Wrapf(err, "unpack entry: %s", hdr.Name) 71 } 72 } 73 return nil 74 } 75 76 // RootfsName is the name of the rootfs directory inside the bundle path when 77 // generated. 78 const RootfsName = "rootfs" 79 80 // isLayerType returns if the given MediaType is the media type of an image 81 // layer blob. This includes both distributable and non-distributable images. 82 func isLayerType(mediaType string) bool { 83 return mediaType == ispec.MediaTypeImageLayer || mediaType == ispec.MediaTypeImageLayerNonDistributable || 84 mediaType == ispec.MediaTypeImageLayerGzip || mediaType == ispec.MediaTypeImageLayerNonDistributableGzip 85 } 86 87 func needsGunzip(mediaType string) bool { 88 return mediaType == ispec.MediaTypeImageLayerGzip || mediaType == ispec.MediaTypeImageLayerNonDistributableGzip 89 } 90 91 // UnpackManifest extracts all of the layers in the given manifest, as well as 92 // generating a runtime bundle and configuration. The rootfs is extracted to 93 // <bundle>/<layer.RootfsName>. 94 // 95 // FIXME: This interface is ugly. 96 func UnpackManifest(ctx context.Context, engine cas.Engine, bundle string, manifest ispec.Manifest, opt *MapOptions) (err error) { 97 // Create the bundle directory. We only error out if config.json or rootfs/ 98 // already exists, because we cannot be sure that the user intended us to 99 // extract over an existing bundle. 100 if err := os.MkdirAll(bundle, 0755); err != nil { 101 return errors.Wrap(err, "mkdir bundle") 102 } 103 // We change the mode of the bundle directory to 0700. A user can easily 104 // change this after-the-fact, but we do this explicitly to avoid cases 105 // where an unprivileged user could recurse into an otherwise unsafe image 106 // (giving them potential root access through setuid binaries for example). 107 if err := os.Chmod(bundle, 0700); err != nil { 108 return errors.Wrap(err, "chmod bundle 0700") 109 } 110 111 configPath := filepath.Join(bundle, "config.json") 112 rootfsPath := filepath.Join(bundle, RootfsName) 113 114 if _, err := os.Lstat(configPath); !os.IsNotExist(err) { 115 if err == nil { 116 err = fmt.Errorf("config.json already exists") 117 } 118 return errors.Wrap(err, "bundle path empty") 119 } 120 121 defer func() { 122 if err != nil { 123 fsEval := fseval.DefaultFsEval 124 if opt != nil && opt.Rootless { 125 fsEval = fseval.RootlessFsEval 126 } 127 // It's too late to care about errors. 128 _ = fsEval.RemoveAll(rootfsPath) 129 } 130 }() 131 132 log.Infof("unpack rootfs: %s", rootfsPath) 133 if err := UnpackRootfs(ctx, engine, rootfsPath, manifest, opt); err != nil { 134 return errors.Wrap(err, "unpack rootfs") 135 } 136 137 // Generate a runtime configuration file from ispec.Image. 138 configFile, err := os.Create(configPath) 139 if err != nil { 140 return errors.Wrap(err, "open config.json") 141 } 142 defer configFile.Close() 143 144 if err := UnpackRuntimeJSON(ctx, engine, configFile, rootfsPath, manifest, opt); err != nil { 145 return errors.Wrap(err, "unpack config.json") 146 } 147 return nil 148 } 149 150 // UnpackRootfs extracts all of the layers in the given manifest. 151 // Some verification is done during image extraction. 152 func UnpackRootfs(ctx context.Context, engine cas.Engine, rootfsPath string, manifest ispec.Manifest, opt *MapOptions) (err error) { 153 engineExt := casext.NewEngine(engine) 154 155 if _, err := os.Lstat(rootfsPath); !os.IsNotExist(err) { 156 if err == nil { 157 err = fmt.Errorf("%s already exists", rootfsPath) 158 } 159 return err 160 } 161 162 if err := os.Mkdir(rootfsPath, 0755); err != nil { 163 return errors.Wrap(err, "mkdir rootfs") 164 } 165 166 // In order to avoid having a broken rootfs in the case of an error, we 167 // remove the rootfs. In the case of rootless this is particularly 168 // important (`rm -rf` won't work on most distro rootfs's). 169 defer func() { 170 if err != nil { 171 fsEval := fseval.DefaultFsEval 172 if opt != nil && opt.Rootless { 173 fsEval = fseval.RootlessFsEval 174 } 175 // It's too late to care about errors. 176 _ = fsEval.RemoveAll(rootfsPath) 177 } 178 }() 179 180 // Make sure that the owner is correct. 181 rootUID, err := idtools.ToHost(0, opt.UIDMappings) 182 if err != nil { 183 return errors.Wrap(err, "ensure rootuid has mapping") 184 } 185 rootGID, err := idtools.ToHost(0, opt.GIDMappings) 186 if err != nil { 187 return errors.Wrap(err, "ensure rootgid has mapping") 188 } 189 if err := os.Lchown(rootfsPath, rootUID, rootGID); err != nil { 190 return errors.Wrap(err, "chown rootfs") 191 } 192 193 // Currently, many different images in the wild don't specify what the 194 // atime/mtime of the root directory is. This is a huge pain because it 195 // means that we can't ensure consistent unpacking. In order to get around 196 // this, we first set the mtime of the root directory to the Unix epoch 197 // (which is as good of an arbitrary choice as any). 198 epoch := time.Unix(0, 0) 199 if err := system.Lutimes(rootfsPath, epoch, epoch); err != nil { 200 return errors.Wrap(err, "set initial root time") 201 } 202 203 // In order to verify the DiffIDs as we extract layers, we have to get the 204 // .Config blob first. But we can't extract it (generate the runtime 205 // config) until after we have the full rootfs generated. 206 configBlob, err := engineExt.FromDescriptor(ctx, manifest.Config) 207 if err != nil { 208 return errors.Wrap(err, "get config blob") 209 } 210 defer configBlob.Close() 211 if configBlob.MediaType != ispec.MediaTypeImageConfig { 212 return errors.Errorf("unpack rootfs: config blob is not correct mediatype %s: %s", ispec.MediaTypeImageConfig, configBlob.MediaType) 213 } 214 config, ok := configBlob.Data.(ispec.Image) 215 if !ok { 216 // Should _never_ be reached. 217 return errors.Errorf("[internal error] unknown config blob type: %s", configBlob.MediaType) 218 } 219 220 // We can't understand non-layer images. 221 if config.RootFS.Type != "layers" { 222 return errors.Errorf("unpack rootfs: config: unsupported rootfs.type: %s", config.RootFS.Type) 223 } 224 225 // Layer extraction. 226 for idx, layerDescriptor := range manifest.Layers { 227 layerDiffID := config.RootFS.DiffIDs[idx] 228 log.Infof("unpack layer: %s", layerDescriptor.Digest) 229 230 layerBlob, err := engineExt.FromDescriptor(ctx, layerDescriptor) 231 if err != nil { 232 return errors.Wrap(err, "get layer blob") 233 } 234 defer layerBlob.Close() 235 if !isLayerType(layerBlob.MediaType) { 236 return errors.Errorf("unpack rootfs: layer %s: blob is not correct mediatype: %s", layerBlob.Digest, layerBlob.MediaType) 237 } 238 layerData, ok := layerBlob.Data.(io.ReadCloser) 239 if !ok { 240 // Should _never_ be reached. 241 return errors.Errorf("[internal error] layerBlob was not an io.ReadCloser") 242 } 243 244 layerRaw := layerData 245 if needsGunzip(layerBlob.MediaType) { 246 // We have to extract a gzip'd version of the above layer. Also note 247 // that we have to check the DiffID we're extracting (which is the 248 // sha256 sum of the *uncompressed* layer). 249 layerRaw, err = gzip.NewReader(layerData) 250 if err != nil { 251 return errors.Wrap(err, "create gzip reader") 252 } 253 } 254 255 layerDigester := digest.SHA256.Digester() 256 layer := io.TeeReader(layerRaw, layerDigester.Hash()) 257 258 if err := UnpackLayer(rootfsPath, layer, opt); err != nil { 259 return errors.Wrap(err, "unpack layer") 260 } 261 // Different tar implementations can have different levels of redundant 262 // padding and other similar weird behaviours. While on paper they are 263 // all entirely valid archives, Go's tar.Reader implementation doesn't 264 // guarantee that the entire stream will be consumed (which can result 265 // in the later diff_id check failing because the digester didn't get 266 // the whole uncompressed stream). Just blindly consume anything left 267 // in the layer. 268 _, _ = io.Copy(ioutil.Discard, layer) 269 // XXX: Is it possible this breaks in the error path? 270 layerData.Close() 271 272 layerDigest := layerDigester.Digest() 273 if layerDigest != layerDiffID { 274 return errors.Errorf("unpack manifest: layer %s: diffid mismatch: got %s expected %s", layerDescriptor.Digest, layerDigest, layerDiffID) 275 } 276 } 277 278 return nil 279 } 280 281 // UnpackRuntimeJSON converts a given manifest's configuration to a runtime 282 // configuration and writes it to the given writer. If rootfs is specified, it 283 // is sourced during the configuration generation (for conversion of 284 // Config.User and other similar jobs -- which will error out if the user could 285 // not be parsed). If rootfs is not specified (is an empty string) then all 286 // conversions that require sourcing the rootfs will be set to their default 287 // values. 288 // 289 // XXX: I don't like this API. It has way too many arguments. 290 func UnpackRuntimeJSON(ctx context.Context, engine cas.Engine, configFile io.Writer, rootfs string, manifest ispec.Manifest, opt *MapOptions) error { 291 engineExt := casext.NewEngine(engine) 292 293 var mapOptions MapOptions 294 if opt != nil { 295 mapOptions = *opt 296 } 297 298 // In order to verify the DiffIDs as we extract layers, we have to get the 299 // .Config blob first. But we can't extract it (generate the runtime 300 // config) until after we have the full rootfs generated. 301 configBlob, err := engineExt.FromDescriptor(ctx, manifest.Config) 302 if err != nil { 303 return errors.Wrap(err, "get config blob") 304 } 305 defer configBlob.Close() 306 if configBlob.MediaType != ispec.MediaTypeImageConfig { 307 return errors.Errorf("unpack manifest: config blob is not correct mediatype %s: %s", ispec.MediaTypeImageConfig, configBlob.MediaType) 308 } 309 config, ok := configBlob.Data.(ispec.Image) 310 if !ok { 311 // Should _never_ be reached. 312 return errors.Errorf("[internal error] unknown config blob type: %s", configBlob.MediaType) 313 } 314 315 g, err := rgen.New(runtime.GOOS) 316 if err != nil { 317 return errors.Wrap(err, "create config.json generator") 318 } 319 if err := iconv.MutateRuntimeSpec(g, rootfs, config); err != nil { 320 return errors.Wrap(err, "generate config.json") 321 } 322 323 // Add UIDMapping / GIDMapping options. 324 if len(mapOptions.UIDMappings) > 0 || len(mapOptions.GIDMappings) > 0 { 325 g.AddOrReplaceLinuxNamespace("user", "") 326 } 327 g.ClearLinuxUIDMappings() 328 for _, m := range mapOptions.UIDMappings { 329 g.AddLinuxUIDMapping(m.HostID, m.ContainerID, m.Size) 330 } 331 g.ClearLinuxGIDMappings() 332 for _, m := range mapOptions.GIDMappings { 333 g.AddLinuxGIDMapping(m.HostID, m.ContainerID, m.Size) 334 } 335 if mapOptions.Rootless { 336 ToRootless(g.Spec()) 337 const resolvConf = "/etc/resolv.conf" 338 // If we are using user namespaces, then we must make sure that we 339 // don't drop any of the CL_UNPRIVILEGED "locked" flags of the source 340 // "mount" when we bind-mount. The reason for this is that at the point 341 // when runc sets up the root filesystem, it is already inside a user 342 // namespace, and thus cannot change any flags that are locked. 343 unprivOpts, err := getUnprivilegedMountFlags(resolvConf) 344 if err != nil { 345 return errors.Wrapf(err, "inspecting mount flags of %s", resolvConf) 346 } 347 g.AddMount(rspec.Mount{ 348 Destination: resolvConf, 349 Source: resolvConf, 350 Type: "none", 351 Options: append(unprivOpts, []string{"bind", "ro"}...), 352 }) 353 } 354 355 // Save the config.json. 356 if err := g.Save(configFile, rgen.ExportOptions{}); err != nil { 357 return errors.Wrap(err, "write config.json") 358 } 359 return nil 360 } 361 362 // ToRootless converts a specification to a version that works with rootless 363 // containers. This is done by removing options and other settings that clash 364 // with unprivileged user namespaces. 365 func ToRootless(spec *rspec.Spec) { 366 var namespaces []rspec.LinuxNamespace 367 368 // Remove additional groups. 369 spec.Process.User.AdditionalGids = nil 370 371 // Remove networkns from the spec. 372 for _, ns := range spec.Linux.Namespaces { 373 switch ns.Type { 374 case rspec.NetworkNamespace, rspec.UserNamespace: 375 // Do nothing. 376 default: 377 namespaces = append(namespaces, ns) 378 } 379 } 380 // Add userns to the spec. 381 namespaces = append(namespaces, rspec.LinuxNamespace{ 382 Type: rspec.UserNamespace, 383 }) 384 spec.Linux.Namespaces = namespaces 385 386 // Fix up mounts. 387 var mounts []rspec.Mount 388 for _, mount := range spec.Mounts { 389 // Ignore all mounts that are under /sys. 390 if strings.HasPrefix(mount.Destination, "/sys") { 391 continue 392 } 393 394 // Remove all gid= and uid= mappings. 395 var options []string 396 for _, option := range mount.Options { 397 if !strings.HasPrefix(option, "gid=") && !strings.HasPrefix(option, "uid=") { 398 options = append(options, option) 399 } 400 } 401 402 mount.Options = options 403 mounts = append(mounts, mount) 404 } 405 // Add the sysfs mount as an rbind. 406 mounts = append(mounts, rspec.Mount{ 407 Source: "/sys", 408 Destination: "/sys", 409 Type: "none", 410 Options: []string{"rbind", "nosuid", "noexec", "nodev", "ro"}, 411 }) 412 spec.Mounts = mounts 413 414 // Remove cgroup settings. 415 spec.Linux.Resources = nil 416 } 417 418 // Get the set of mount flags that are set on the mount that contains the given 419 // path and are locked by CL_UNPRIVILEGED. This is necessary to ensure that 420 // bind-mounting "with options" will not fail with user namespaces, due to 421 // kernel restrictions that require user namespace mounts to preserve 422 // CL_UNPRIVILEGED locked flags. 423 // 424 // Ported from https://github.com/moby/moby/pull/35205 425 func getUnprivilegedMountFlags(path string) ([]string, error) { 426 var statfs unix.Statfs_t 427 if err := unix.Statfs(path, &statfs); err != nil { 428 return nil, err 429 } 430 431 // The set of keys come from https://github.com/torvalds/linux/blob/v4.13/fs/namespace.c#L1034-L1048. 432 unprivilegedFlags := map[uint64]string{ 433 unix.MS_RDONLY: "ro", 434 unix.MS_NODEV: "nodev", 435 unix.MS_NOEXEC: "noexec", 436 unix.MS_NOSUID: "nosuid", 437 unix.MS_NOATIME: "noatime", 438 unix.MS_RELATIME: "relatime", 439 unix.MS_NODIRATIME: "nodiratime", 440 } 441 442 var flags []string 443 for mask, flag := range unprivilegedFlags { 444 if uint64(statfs.Flags)&mask == mask { 445 flags = append(flags, flag) 446 } 447 } 448 449 return flags, nil 450 }