github.com/dctrud/umoci@v0.4.3-0.20191016193643-05a1d37de015/oci/layer/unpack.go (about)

     1  /*
     2   * umoci: Umoci Modifies Open Containers' Images
     3   * Copyright (C) 2016, 2017, 2018 SUSE LLC.
     4   *
     5   * Licensed under the Apache License, Version 2.0 (the "License");
     6   * you may not use this file except in compliance with the License.
     7   * You may obtain a copy of the License at
     8   *
     9   *    http://www.apache.org/licenses/LICENSE-2.0
    10   *
    11   * Unless required by applicable law or agreed to in writing, software
    12   * distributed under the License is distributed on an "AS IS" BASIS,
    13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14   * See the License for the specific language governing permissions and
    15   * limitations under the License.
    16   */
    17  
    18  package layer
    19  
    20  import (
    21  	"archive/tar"
    22  	// Import is necessary for go-digest.
    23  	_ "crypto/sha256"
    24  	"fmt"
    25  	"io"
    26  	"io/ioutil"
    27  	"os"
    28  	"path/filepath"
    29  	"runtime"
    30  	"strings"
    31  	"time"
    32  
    33  	"github.com/apex/log"
    34  	gzip "github.com/klauspost/pgzip"
    35  	"github.com/openSUSE/umoci/oci/cas"
    36  	"github.com/openSUSE/umoci/oci/casext"
    37  	iconv "github.com/openSUSE/umoci/oci/config/convert"
    38  	"github.com/openSUSE/umoci/pkg/fseval"
    39  	"github.com/openSUSE/umoci/pkg/idtools"
    40  	"github.com/openSUSE/umoci/pkg/system"
    41  	"github.com/opencontainers/go-digest"
    42  	ispec "github.com/opencontainers/image-spec/specs-go/v1"
    43  	rspec "github.com/opencontainers/runtime-spec/specs-go"
    44  	rgen "github.com/opencontainers/runtime-tools/generate"
    45  	"github.com/pkg/errors"
    46  	"golang.org/x/net/context"
    47  	"golang.org/x/sys/unix"
    48  )
    49  
    50  // UnpackLayer unpacks the tar stream representing an OCI layer at the given
    51  // root. It ensures that the state of the root is as close as possible to the
    52  // state used to create the layer. If an error is returned, the state of root
    53  // is undefined (unpacking is not guaranteed to be atomic).
    54  func UnpackLayer(root string, layer io.Reader, opt *MapOptions) error {
    55  	var mapOptions MapOptions
    56  	if opt != nil {
    57  		mapOptions = *opt
    58  	}
    59  	te := NewTarExtractor(mapOptions)
    60  	tr := tar.NewReader(layer)
    61  	for {
    62  		hdr, err := tr.Next()
    63  		if err == io.EOF {
    64  			break
    65  		}
    66  		if err != nil {
    67  			return errors.Wrap(err, "read next entry")
    68  		}
    69  		if err := te.UnpackEntry(root, hdr, tr); err != nil {
    70  			return errors.Wrapf(err, "unpack entry: %s", hdr.Name)
    71  		}
    72  	}
    73  	return nil
    74  }
    75  
    76  // RootfsName is the name of the rootfs directory inside the bundle path when
    77  // generated.
    78  const RootfsName = "rootfs"
    79  
    80  // isLayerType returns if the given MediaType is the media type of an image
    81  // layer blob. This includes both distributable and non-distributable images.
    82  func isLayerType(mediaType string) bool {
    83  	return mediaType == ispec.MediaTypeImageLayer || mediaType == ispec.MediaTypeImageLayerNonDistributable ||
    84  		mediaType == ispec.MediaTypeImageLayerGzip || mediaType == ispec.MediaTypeImageLayerNonDistributableGzip
    85  }
    86  
    87  func needsGunzip(mediaType string) bool {
    88  	return mediaType == ispec.MediaTypeImageLayerGzip || mediaType == ispec.MediaTypeImageLayerNonDistributableGzip
    89  }
    90  
    91  // UnpackManifest extracts all of the layers in the given manifest, as well as
    92  // generating a runtime bundle and configuration. The rootfs is extracted to
    93  // <bundle>/<layer.RootfsName>.
    94  //
    95  // FIXME: This interface is ugly.
    96  func UnpackManifest(ctx context.Context, engine cas.Engine, bundle string, manifest ispec.Manifest, opt *MapOptions) (err error) {
    97  	// Create the bundle directory. We only error out if config.json or rootfs/
    98  	// already exists, because we cannot be sure that the user intended us to
    99  	// extract over an existing bundle.
   100  	if err := os.MkdirAll(bundle, 0755); err != nil {
   101  		return errors.Wrap(err, "mkdir bundle")
   102  	}
   103  	// We change the mode of the bundle directory to 0700. A user can easily
   104  	// change this after-the-fact, but we do this explicitly to avoid cases
   105  	// where an unprivileged user could recurse into an otherwise unsafe image
   106  	// (giving them potential root access through setuid binaries for example).
   107  	if err := os.Chmod(bundle, 0700); err != nil {
   108  		return errors.Wrap(err, "chmod bundle 0700")
   109  	}
   110  
   111  	configPath := filepath.Join(bundle, "config.json")
   112  	rootfsPath := filepath.Join(bundle, RootfsName)
   113  
   114  	if _, err := os.Lstat(configPath); !os.IsNotExist(err) {
   115  		if err == nil {
   116  			err = fmt.Errorf("config.json already exists")
   117  		}
   118  		return errors.Wrap(err, "bundle path empty")
   119  	}
   120  
   121  	defer func() {
   122  		if err != nil {
   123  			fsEval := fseval.DefaultFsEval
   124  			if opt != nil && opt.Rootless {
   125  				fsEval = fseval.RootlessFsEval
   126  			}
   127  			// It's too late to care about errors.
   128  			_ = fsEval.RemoveAll(rootfsPath)
   129  		}
   130  	}()
   131  
   132  	log.Infof("unpack rootfs: %s", rootfsPath)
   133  	if err := UnpackRootfs(ctx, engine, rootfsPath, manifest, opt); err != nil {
   134  		return errors.Wrap(err, "unpack rootfs")
   135  	}
   136  
   137  	// Generate a runtime configuration file from ispec.Image.
   138  	configFile, err := os.Create(configPath)
   139  	if err != nil {
   140  		return errors.Wrap(err, "open config.json")
   141  	}
   142  	defer configFile.Close()
   143  
   144  	if err := UnpackRuntimeJSON(ctx, engine, configFile, rootfsPath, manifest, opt); err != nil {
   145  		return errors.Wrap(err, "unpack config.json")
   146  	}
   147  	return nil
   148  }
   149  
   150  // UnpackRootfs extracts all of the layers in the given manifest.
   151  // Some verification is done during image extraction.
   152  func UnpackRootfs(ctx context.Context, engine cas.Engine, rootfsPath string, manifest ispec.Manifest, opt *MapOptions) (err error) {
   153  	engineExt := casext.NewEngine(engine)
   154  
   155  	if _, err := os.Lstat(rootfsPath); !os.IsNotExist(err) {
   156  		if err == nil {
   157  			err = fmt.Errorf("%s already exists", rootfsPath)
   158  		}
   159  		return err
   160  	}
   161  
   162  	if err := os.Mkdir(rootfsPath, 0755); err != nil {
   163  		return errors.Wrap(err, "mkdir rootfs")
   164  	}
   165  
   166  	// In order to avoid having a broken rootfs in the case of an error, we
   167  	// remove the rootfs. In the case of rootless this is particularly
   168  	// important (`rm -rf` won't work on most distro rootfs's).
   169  	defer func() {
   170  		if err != nil {
   171  			fsEval := fseval.DefaultFsEval
   172  			if opt != nil && opt.Rootless {
   173  				fsEval = fseval.RootlessFsEval
   174  			}
   175  			// It's too late to care about errors.
   176  			_ = fsEval.RemoveAll(rootfsPath)
   177  		}
   178  	}()
   179  
   180  	// Make sure that the owner is correct.
   181  	rootUID, err := idtools.ToHost(0, opt.UIDMappings)
   182  	if err != nil {
   183  		return errors.Wrap(err, "ensure rootuid has mapping")
   184  	}
   185  	rootGID, err := idtools.ToHost(0, opt.GIDMappings)
   186  	if err != nil {
   187  		return errors.Wrap(err, "ensure rootgid has mapping")
   188  	}
   189  	if err := os.Lchown(rootfsPath, rootUID, rootGID); err != nil {
   190  		return errors.Wrap(err, "chown rootfs")
   191  	}
   192  
   193  	// Currently, many different images in the wild don't specify what the
   194  	// atime/mtime of the root directory is. This is a huge pain because it
   195  	// means that we can't ensure consistent unpacking. In order to get around
   196  	// this, we first set the mtime of the root directory to the Unix epoch
   197  	// (which is as good of an arbitrary choice as any).
   198  	epoch := time.Unix(0, 0)
   199  	if err := system.Lutimes(rootfsPath, epoch, epoch); err != nil {
   200  		return errors.Wrap(err, "set initial root time")
   201  	}
   202  
   203  	// In order to verify the DiffIDs as we extract layers, we have to get the
   204  	// .Config blob first. But we can't extract it (generate the runtime
   205  	// config) until after we have the full rootfs generated.
   206  	configBlob, err := engineExt.FromDescriptor(ctx, manifest.Config)
   207  	if err != nil {
   208  		return errors.Wrap(err, "get config blob")
   209  	}
   210  	defer configBlob.Close()
   211  	if configBlob.MediaType != ispec.MediaTypeImageConfig {
   212  		return errors.Errorf("unpack rootfs: config blob is not correct mediatype %s: %s", ispec.MediaTypeImageConfig, configBlob.MediaType)
   213  	}
   214  	config, ok := configBlob.Data.(ispec.Image)
   215  	if !ok {
   216  		// Should _never_ be reached.
   217  		return errors.Errorf("[internal error] unknown config blob type: %s", configBlob.MediaType)
   218  	}
   219  
   220  	// We can't understand non-layer images.
   221  	if config.RootFS.Type != "layers" {
   222  		return errors.Errorf("unpack rootfs: config: unsupported rootfs.type: %s", config.RootFS.Type)
   223  	}
   224  
   225  	// Layer extraction.
   226  	for idx, layerDescriptor := range manifest.Layers {
   227  		layerDiffID := config.RootFS.DiffIDs[idx]
   228  		log.Infof("unpack layer: %s", layerDescriptor.Digest)
   229  
   230  		layerBlob, err := engineExt.FromDescriptor(ctx, layerDescriptor)
   231  		if err != nil {
   232  			return errors.Wrap(err, "get layer blob")
   233  		}
   234  		defer layerBlob.Close()
   235  		if !isLayerType(layerBlob.MediaType) {
   236  			return errors.Errorf("unpack rootfs: layer %s: blob is not correct mediatype: %s", layerBlob.Digest, layerBlob.MediaType)
   237  		}
   238  		layerData, ok := layerBlob.Data.(io.ReadCloser)
   239  		if !ok {
   240  			// Should _never_ be reached.
   241  			return errors.Errorf("[internal error] layerBlob was not an io.ReadCloser")
   242  		}
   243  
   244  		layerRaw := layerData
   245  		if needsGunzip(layerBlob.MediaType) {
   246  			// We have to extract a gzip'd version of the above layer. Also note
   247  			// that we have to check the DiffID we're extracting (which is the
   248  			// sha256 sum of the *uncompressed* layer).
   249  			layerRaw, err = gzip.NewReader(layerData)
   250  			if err != nil {
   251  				return errors.Wrap(err, "create gzip reader")
   252  			}
   253  		}
   254  
   255  		layerDigester := digest.SHA256.Digester()
   256  		layer := io.TeeReader(layerRaw, layerDigester.Hash())
   257  
   258  		if err := UnpackLayer(rootfsPath, layer, opt); err != nil {
   259  			return errors.Wrap(err, "unpack layer")
   260  		}
   261  		// Different tar implementations can have different levels of redundant
   262  		// padding and other similar weird behaviours. While on paper they are
   263  		// all entirely valid archives, Go's tar.Reader implementation doesn't
   264  		// guarantee that the entire stream will be consumed (which can result
   265  		// in the later diff_id check failing because the digester didn't get
   266  		// the whole uncompressed stream). Just blindly consume anything left
   267  		// in the layer.
   268  		_, _ = io.Copy(ioutil.Discard, layer)
   269  		// XXX: Is it possible this breaks in the error path?
   270  		layerData.Close()
   271  
   272  		layerDigest := layerDigester.Digest()
   273  		if layerDigest != layerDiffID {
   274  			return errors.Errorf("unpack manifest: layer %s: diffid mismatch: got %s expected %s", layerDescriptor.Digest, layerDigest, layerDiffID)
   275  		}
   276  	}
   277  
   278  	return nil
   279  }
   280  
   281  // UnpackRuntimeJSON converts a given manifest's configuration to a runtime
   282  // configuration and writes it to the given writer. If rootfs is specified, it
   283  // is sourced during the configuration generation (for conversion of
   284  // Config.User and other similar jobs -- which will error out if the user could
   285  // not be parsed). If rootfs is not specified (is an empty string) then all
   286  // conversions that require sourcing the rootfs will be set to their default
   287  // values.
   288  //
   289  // XXX: I don't like this API. It has way too many arguments.
   290  func UnpackRuntimeJSON(ctx context.Context, engine cas.Engine, configFile io.Writer, rootfs string, manifest ispec.Manifest, opt *MapOptions) error {
   291  	engineExt := casext.NewEngine(engine)
   292  
   293  	var mapOptions MapOptions
   294  	if opt != nil {
   295  		mapOptions = *opt
   296  	}
   297  
   298  	// In order to verify the DiffIDs as we extract layers, we have to get the
   299  	// .Config blob first. But we can't extract it (generate the runtime
   300  	// config) until after we have the full rootfs generated.
   301  	configBlob, err := engineExt.FromDescriptor(ctx, manifest.Config)
   302  	if err != nil {
   303  		return errors.Wrap(err, "get config blob")
   304  	}
   305  	defer configBlob.Close()
   306  	if configBlob.MediaType != ispec.MediaTypeImageConfig {
   307  		return errors.Errorf("unpack manifest: config blob is not correct mediatype %s: %s", ispec.MediaTypeImageConfig, configBlob.MediaType)
   308  	}
   309  	config, ok := configBlob.Data.(ispec.Image)
   310  	if !ok {
   311  		// Should _never_ be reached.
   312  		return errors.Errorf("[internal error] unknown config blob type: %s", configBlob.MediaType)
   313  	}
   314  
   315  	g, err := rgen.New(runtime.GOOS)
   316  	if err != nil {
   317  		return errors.Wrap(err, "create config.json generator")
   318  	}
   319  	if err := iconv.MutateRuntimeSpec(g, rootfs, config); err != nil {
   320  		return errors.Wrap(err, "generate config.json")
   321  	}
   322  
   323  	// Add UIDMapping / GIDMapping options.
   324  	if len(mapOptions.UIDMappings) > 0 || len(mapOptions.GIDMappings) > 0 {
   325  		g.AddOrReplaceLinuxNamespace("user", "")
   326  	}
   327  	g.ClearLinuxUIDMappings()
   328  	for _, m := range mapOptions.UIDMappings {
   329  		g.AddLinuxUIDMapping(m.HostID, m.ContainerID, m.Size)
   330  	}
   331  	g.ClearLinuxGIDMappings()
   332  	for _, m := range mapOptions.GIDMappings {
   333  		g.AddLinuxGIDMapping(m.HostID, m.ContainerID, m.Size)
   334  	}
   335  	if mapOptions.Rootless {
   336  		ToRootless(g.Spec())
   337  		const resolvConf = "/etc/resolv.conf"
   338  		// If we are using user namespaces, then we must make sure that we
   339  		// don't drop any of the CL_UNPRIVILEGED "locked" flags of the source
   340  		// "mount" when we bind-mount. The reason for this is that at the point
   341  		// when runc sets up the root filesystem, it is already inside a user
   342  		// namespace, and thus cannot change any flags that are locked.
   343  		unprivOpts, err := getUnprivilegedMountFlags(resolvConf)
   344  		if err != nil {
   345  			return errors.Wrapf(err, "inspecting mount flags of %s", resolvConf)
   346  		}
   347  		g.AddMount(rspec.Mount{
   348  			Destination: resolvConf,
   349  			Source:      resolvConf,
   350  			Type:        "none",
   351  			Options:     append(unprivOpts, []string{"bind", "ro"}...),
   352  		})
   353  	}
   354  
   355  	// Save the config.json.
   356  	if err := g.Save(configFile, rgen.ExportOptions{}); err != nil {
   357  		return errors.Wrap(err, "write config.json")
   358  	}
   359  	return nil
   360  }
   361  
   362  // ToRootless converts a specification to a version that works with rootless
   363  // containers. This is done by removing options and other settings that clash
   364  // with unprivileged user namespaces.
   365  func ToRootless(spec *rspec.Spec) {
   366  	var namespaces []rspec.LinuxNamespace
   367  
   368  	// Remove additional groups.
   369  	spec.Process.User.AdditionalGids = nil
   370  
   371  	// Remove networkns from the spec.
   372  	for _, ns := range spec.Linux.Namespaces {
   373  		switch ns.Type {
   374  		case rspec.NetworkNamespace, rspec.UserNamespace:
   375  			// Do nothing.
   376  		default:
   377  			namespaces = append(namespaces, ns)
   378  		}
   379  	}
   380  	// Add userns to the spec.
   381  	namespaces = append(namespaces, rspec.LinuxNamespace{
   382  		Type: rspec.UserNamespace,
   383  	})
   384  	spec.Linux.Namespaces = namespaces
   385  
   386  	// Fix up mounts.
   387  	var mounts []rspec.Mount
   388  	for _, mount := range spec.Mounts {
   389  		// Ignore all mounts that are under /sys.
   390  		if strings.HasPrefix(mount.Destination, "/sys") {
   391  			continue
   392  		}
   393  
   394  		// Remove all gid= and uid= mappings.
   395  		var options []string
   396  		for _, option := range mount.Options {
   397  			if !strings.HasPrefix(option, "gid=") && !strings.HasPrefix(option, "uid=") {
   398  				options = append(options, option)
   399  			}
   400  		}
   401  
   402  		mount.Options = options
   403  		mounts = append(mounts, mount)
   404  	}
   405  	// Add the sysfs mount as an rbind.
   406  	mounts = append(mounts, rspec.Mount{
   407  		Source:      "/sys",
   408  		Destination: "/sys",
   409  		Type:        "none",
   410  		Options:     []string{"rbind", "nosuid", "noexec", "nodev", "ro"},
   411  	})
   412  	spec.Mounts = mounts
   413  
   414  	// Remove cgroup settings.
   415  	spec.Linux.Resources = nil
   416  }
   417  
   418  // Get the set of mount flags that are set on the mount that contains the given
   419  // path and are locked by CL_UNPRIVILEGED. This is necessary to ensure that
   420  // bind-mounting "with options" will not fail with user namespaces, due to
   421  // kernel restrictions that require user namespace mounts to preserve
   422  // CL_UNPRIVILEGED locked flags.
   423  //
   424  // Ported from https://github.com/moby/moby/pull/35205
   425  func getUnprivilegedMountFlags(path string) ([]string, error) {
   426  	var statfs unix.Statfs_t
   427  	if err := unix.Statfs(path, &statfs); err != nil {
   428  		return nil, err
   429  	}
   430  
   431  	// The set of keys come from https://github.com/torvalds/linux/blob/v4.13/fs/namespace.c#L1034-L1048.
   432  	unprivilegedFlags := map[uint64]string{
   433  		unix.MS_RDONLY:     "ro",
   434  		unix.MS_NODEV:      "nodev",
   435  		unix.MS_NOEXEC:     "noexec",
   436  		unix.MS_NOSUID:     "nosuid",
   437  		unix.MS_NOATIME:    "noatime",
   438  		unix.MS_RELATIME:   "relatime",
   439  		unix.MS_NODIRATIME: "nodiratime",
   440  	}
   441  
   442  	var flags []string
   443  	for mask, flag := range unprivilegedFlags {
   444  		if uint64(statfs.Flags)&mask == mask {
   445  			flags = append(flags, flag)
   446  		}
   447  	}
   448  
   449  	return flags, nil
   450  }