github.com/opencontainers/umoci@v0.4.8-0.20240508124516-656e4836fb0d/oci/layer/unpack.go (about)

     1  /*
     2   * umoci: Umoci Modifies Open Containers' Images
     3   * Copyright (C) 2016-2020 SUSE LLC
     4   *
     5   * Licensed under the Apache License, Version 2.0 (the "License");
     6   * you may not use this file except in compliance with the License.
     7   * You may obtain a copy of the License at
     8   *
     9   *    http://www.apache.org/licenses/LICENSE-2.0
    10   *
    11   * Unless required by applicable law or agreed to in writing, software
    12   * distributed under the License is distributed on an "AS IS" BASIS,
    13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14   * See the License for the specific language governing permissions and
    15   * limitations under the License.
    16   */
    17  
    18  package layer
    19  
    20  import (
    21  	"archive/tar"
    22  	"context"
    23  	"encoding/json"
    24  	"fmt"
    25  	"io"
    26  	"io/ioutil"
    27  	"os"
    28  	"path/filepath"
    29  	"time"
    30  
    31  	// Import is necessary for go-digest.
    32  	_ "crypto/sha256"
    33  
    34  	"github.com/apex/log"
    35  	gzip "github.com/klauspost/pgzip"
    36  	"github.com/opencontainers/go-digest"
    37  	ispec "github.com/opencontainers/image-spec/specs-go/v1"
    38  	rspec "github.com/opencontainers/runtime-spec/specs-go"
    39  	"github.com/opencontainers/umoci/oci/cas"
    40  	"github.com/opencontainers/umoci/oci/casext"
    41  	iconv "github.com/opencontainers/umoci/oci/config/convert"
    42  	"github.com/opencontainers/umoci/pkg/fseval"
    43  	"github.com/opencontainers/umoci/pkg/idtools"
    44  	"github.com/opencontainers/umoci/pkg/system"
    45  	"github.com/pkg/errors"
    46  )
    47  
    48  // AfterLayerUnpackCallback is called after each layer is unpacked.
    49  type AfterLayerUnpackCallback func(manifest ispec.Manifest, desc ispec.Descriptor) error
    50  
    51  // UnpackLayer unpacks the tar stream representing an OCI layer at the given
    52  // root. It ensures that the state of the root is as close as possible to the
    53  // state used to create the layer. If an error is returned, the state of root
    54  // is undefined (unpacking is not guaranteed to be atomic).
    55  func UnpackLayer(root string, layer io.Reader, opt *UnpackOptions) error {
    56  	var unpackOptions UnpackOptions
    57  	if opt != nil {
    58  		unpackOptions = *opt
    59  	}
    60  	te := NewTarExtractor(unpackOptions)
    61  	tr := tar.NewReader(layer)
    62  	for {
    63  		hdr, err := tr.Next()
    64  		if err == io.EOF {
    65  			break
    66  		}
    67  		if err != nil {
    68  			return errors.Wrap(err, "read next entry")
    69  		}
    70  		if err := te.UnpackEntry(root, hdr, tr); err != nil {
    71  			return errors.Wrapf(err, "unpack entry: %s", hdr.Name)
    72  		}
    73  	}
    74  	return nil
    75  }
    76  
    77  // RootfsName is the name of the rootfs directory inside the bundle path when
    78  // generated.
    79  const RootfsName = "rootfs"
    80  
    81  // isLayerType returns if the given MediaType is the media type of an image
    82  // layer blob. This includes both distributable and non-distributable images.
    83  func isLayerType(mediaType string) bool {
    84  	return mediaType == ispec.MediaTypeImageLayer || mediaType == ispec.MediaTypeImageLayerNonDistributable ||
    85  		mediaType == ispec.MediaTypeImageLayerGzip || mediaType == ispec.MediaTypeImageLayerNonDistributableGzip
    86  }
    87  
    88  func needsGunzip(mediaType string) bool {
    89  	return mediaType == ispec.MediaTypeImageLayerGzip || mediaType == ispec.MediaTypeImageLayerNonDistributableGzip
    90  }
    91  
    92  // UnpackManifest extracts all of the layers in the given manifest, as well as
    93  // generating a runtime bundle and configuration. The rootfs is extracted to
    94  // <bundle>/<layer.RootfsName>.
    95  //
    96  // FIXME: This interface is ugly.
    97  func UnpackManifest(ctx context.Context, engine cas.Engine, bundle string, manifest ispec.Manifest, opt *UnpackOptions) (err error) {
    98  	// Create the bundle directory. We only error out if config.json or rootfs/
    99  	// already exists, because we cannot be sure that the user intended us to
   100  	// extract over an existing bundle.
   101  	if err := os.MkdirAll(bundle, 0755); err != nil {
   102  		return errors.Wrap(err, "mkdir bundle")
   103  	}
   104  	// We change the mode of the bundle directory to 0700. A user can easily
   105  	// change this after-the-fact, but we do this explicitly to avoid cases
   106  	// where an unprivileged user could recurse into an otherwise unsafe image
   107  	// (giving them potential root access through setuid binaries for example).
   108  	if err := os.Chmod(bundle, 0700); err != nil {
   109  		return errors.Wrap(err, "chmod bundle 0700")
   110  	}
   111  
   112  	configPath := filepath.Join(bundle, "config.json")
   113  	rootfsPath := filepath.Join(bundle, RootfsName)
   114  
   115  	if _, err := os.Lstat(configPath); !os.IsNotExist(err) {
   116  		if err == nil {
   117  			return errors.Errorf("config.json already exists in %s", bundle)
   118  		}
   119  		return errors.Wrap(err, "problem accessing bundle config")
   120  	}
   121  
   122  	defer func() {
   123  		if err != nil {
   124  			fsEval := fseval.Default
   125  			if opt != nil && opt.MapOptions.Rootless {
   126  				fsEval = fseval.Rootless
   127  			}
   128  			// It's too late to care about errors.
   129  			// #nosec G104
   130  			_ = fsEval.RemoveAll(rootfsPath)
   131  		}
   132  	}()
   133  
   134  	if _, err := os.Lstat(rootfsPath); !os.IsNotExist(err) && opt.StartFrom.MediaType == "" {
   135  		if err == nil {
   136  			err = fmt.Errorf("%s already exists", rootfsPath)
   137  		}
   138  		return errors.Wrapf(err, "detecting rootfs")
   139  	}
   140  
   141  	log.Infof("unpack rootfs: %s", rootfsPath)
   142  	if err := UnpackRootfs(ctx, engine, rootfsPath, manifest, opt); err != nil {
   143  		return errors.Wrap(err, "unpack rootfs")
   144  	}
   145  
   146  	// Generate a runtime configuration file from ispec.Image.
   147  	configFile, err := os.Create(configPath)
   148  	if err != nil {
   149  		return errors.Wrap(err, "open config.json")
   150  	}
   151  	defer configFile.Close()
   152  
   153  	if err := UnpackRuntimeJSON(ctx, engine, configFile, rootfsPath, manifest, &opt.MapOptions); err != nil {
   154  		return errors.Wrap(err, "unpack config.json")
   155  	}
   156  	return nil
   157  }
   158  
   159  // UnpackRootfs extracts all of the layers in the given manifest.
   160  // Some verification is done during image extraction.
   161  func UnpackRootfs(ctx context.Context, engine cas.Engine, rootfsPath string, manifest ispec.Manifest, opt *UnpackOptions) (err error) {
   162  	engineExt := casext.NewEngine(engine)
   163  
   164  	if err := os.Mkdir(rootfsPath, 0755); err != nil && !os.IsExist(err) {
   165  		return errors.Wrap(err, "mkdir rootfs")
   166  	}
   167  
   168  	// In order to avoid having a broken rootfs in the case of an error, we
   169  	// remove the rootfs. In the case of rootless this is particularly
   170  	// important (`rm -rf` won't work on most distro rootfs's).
   171  	defer func() {
   172  		if err != nil {
   173  			fsEval := fseval.Default
   174  			if opt != nil && opt.MapOptions.Rootless {
   175  				fsEval = fseval.Rootless
   176  			}
   177  			// It's too late to care about errors.
   178  			// #nosec G104
   179  			_ = fsEval.RemoveAll(rootfsPath)
   180  		}
   181  	}()
   182  
   183  	// Make sure that the owner is correct.
   184  	rootUID, err := idtools.ToHost(0, opt.MapOptions.UIDMappings)
   185  	if err != nil {
   186  		return errors.Wrap(err, "ensure rootuid has mapping")
   187  	}
   188  	rootGID, err := idtools.ToHost(0, opt.MapOptions.GIDMappings)
   189  	if err != nil {
   190  		return errors.Wrap(err, "ensure rootgid has mapping")
   191  	}
   192  	if err := os.Lchown(rootfsPath, rootUID, rootGID); err != nil {
   193  		return errors.Wrap(err, "chown rootfs")
   194  	}
   195  
   196  	// Currently, many different images in the wild don't specify what the
   197  	// atime/mtime of the root directory is. This is a huge pain because it
   198  	// means that we can't ensure consistent unpacking. In order to get around
   199  	// this, we first set the mtime of the root directory to the Unix epoch
   200  	// (which is as good of an arbitrary choice as any).
   201  	epoch := time.Unix(0, 0)
   202  	if err := system.Lutimes(rootfsPath, epoch, epoch); err != nil {
   203  		return errors.Wrap(err, "set initial root time")
   204  	}
   205  
   206  	// In order to verify the DiffIDs as we extract layers, we have to get the
   207  	// .Config blob first. But we can't extract it (generate the runtime
   208  	// config) until after we have the full rootfs generated.
   209  	configBlob, err := engineExt.FromDescriptor(ctx, manifest.Config)
   210  	if err != nil {
   211  		return errors.Wrap(err, "get config blob")
   212  	}
   213  	defer configBlob.Close()
   214  	if configBlob.Descriptor.MediaType != ispec.MediaTypeImageConfig {
   215  		return errors.Errorf("unpack rootfs: config blob is not correct mediatype %s: %s", ispec.MediaTypeImageConfig, configBlob.Descriptor.MediaType)
   216  	}
   217  	config, ok := configBlob.Data.(ispec.Image)
   218  	if !ok {
   219  		// Should _never_ be reached.
   220  		return errors.Errorf("[internal error] unknown config blob type: %s", configBlob.Descriptor.MediaType)
   221  	}
   222  
   223  	// We can't understand non-layer images.
   224  	if config.RootFS.Type != "layers" {
   225  		return errors.Errorf("unpack rootfs: config: unsupported rootfs.type: %s", config.RootFS.Type)
   226  	}
   227  
   228  	// Layer extraction.
   229  	found := false
   230  	for idx, layerDescriptor := range manifest.Layers {
   231  		if !found && opt.StartFrom.MediaType != "" && layerDescriptor.Digest.String() != opt.StartFrom.Digest.String() {
   232  			continue
   233  		}
   234  		found = true
   235  
   236  		layerDiffID := config.RootFS.DiffIDs[idx]
   237  		log.Infof("unpack layer: %s", layerDescriptor.Digest)
   238  
   239  		layerBlob, err := engineExt.FromDescriptor(ctx, layerDescriptor)
   240  		if err != nil {
   241  			return errors.Wrap(err, "get layer blob")
   242  		}
   243  		defer layerBlob.Close()
   244  		if !isLayerType(layerBlob.Descriptor.MediaType) {
   245  			return errors.Errorf("unpack rootfs: layer %s: blob is not correct mediatype: %s", layerBlob.Descriptor.Digest, layerBlob.Descriptor.MediaType)
   246  		}
   247  		layerData, ok := layerBlob.Data.(io.ReadCloser)
   248  		if !ok {
   249  			// Should _never_ be reached.
   250  			return errors.Errorf("[internal error] layerBlob was not an io.ReadCloser")
   251  		}
   252  
   253  		layerRaw := layerData
   254  		if needsGunzip(layerBlob.Descriptor.MediaType) {
   255  			// We have to extract a gzip'd version of the above layer. Also note
   256  			// that we have to check the DiffID we're extracting (which is the
   257  			// sha256 sum of the *uncompressed* layer).
   258  			layerRaw, err = gzip.NewReader(layerData)
   259  			if err != nil {
   260  				return errors.Wrap(err, "create gzip reader")
   261  			}
   262  		}
   263  
   264  		layerDigester := digest.SHA256.Digester()
   265  		layer := io.TeeReader(layerRaw, layerDigester.Hash())
   266  
   267  		if err := UnpackLayer(rootfsPath, layer, opt); err != nil {
   268  			return errors.Wrap(err, "unpack layer")
   269  		}
   270  		// Different tar implementations can have different levels of redundant
   271  		// padding and other similar weird behaviours. While on paper they are
   272  		// all entirely valid archives, Go's tar.Reader implementation doesn't
   273  		// guarantee that the entire stream will be consumed (which can result
   274  		// in the later diff_id check failing because the digester didn't get
   275  		// the whole uncompressed stream). Just blindly consume anything left
   276  		// in the layer.
   277  		if n, err := system.Copy(ioutil.Discard, layer); err != nil {
   278  			return errors.Wrap(err, "discard trailing archive bits")
   279  		} else if n != 0 {
   280  			log.Debugf("unpack manifest: layer %s: ignoring %d trailing 'junk' bytes in the tar stream -- probably from GNU tar", layerDescriptor.Digest, n)
   281  		}
   282  		// Same goes for compressed layers -- it seems like some gzip
   283  		// implementations add trailing NUL bytes, which Go doesn't slurp up.
   284  		// Just eat up the rest of the remaining bytes and discard them.
   285  		//
   286  		// FIXME: We use layerData here because pgzip returns io.EOF from
   287  		// WriteTo, which causes havoc with system.Copy. Ideally we would use
   288  		// layerRaw. See <https://github.com/klauspost/pgzip/issues/38>.
   289  		if n, err := system.Copy(ioutil.Discard, layerData); err != nil {
   290  			return errors.Wrap(err, "discard trailing raw bits")
   291  		} else if n != 0 {
   292  			log.Warnf("unpack manifest: layer %s: ignoring %d trailing 'junk' bytes in the blob stream -- this may indicate a bug in the tool which built this image", layerDescriptor.Digest, n)
   293  		}
   294  		if err := layerData.Close(); err != nil {
   295  			return errors.Wrap(err, "close layer data")
   296  		}
   297  
   298  		layerDigest := layerDigester.Digest()
   299  		if layerDigest != layerDiffID {
   300  			return errors.Errorf("unpack manifest: layer %s: diffid mismatch: got %s expected %s", layerDescriptor.Digest, layerDigest, layerDiffID)
   301  		}
   302  
   303  		if opt.AfterLayerUnpack != nil {
   304  			if err := opt.AfterLayerUnpack(manifest, layerDescriptor); err != nil {
   305  				return err
   306  			}
   307  		}
   308  	}
   309  
   310  	return nil
   311  }
   312  
   313  // UnpackRuntimeJSON converts a given manifest's configuration to a runtime
   314  // configuration and writes it to the given writer. If rootfs is specified, it
   315  // is sourced during the configuration generation (for conversion of
   316  // Config.User and other similar jobs -- which will error out if the user could
   317  // not be parsed). If rootfs is not specified (is an empty string) then all
   318  // conversions that require sourcing the rootfs will be set to their default
   319  // values.
   320  //
   321  // XXX: I don't like this API. It has way too many arguments.
   322  func UnpackRuntimeJSON(ctx context.Context, engine cas.Engine, configFile io.Writer, rootfs string, manifest ispec.Manifest, opt *MapOptions) error {
   323  	engineExt := casext.NewEngine(engine)
   324  
   325  	var mapOptions MapOptions
   326  	if opt != nil {
   327  		mapOptions = *opt
   328  	}
   329  
   330  	// In order to verify the DiffIDs as we extract layers, we have to get the
   331  	// .Config blob first. But we can't extract it (generate the runtime
   332  	// config) until after we have the full rootfs generated.
   333  	configBlob, err := engineExt.FromDescriptor(ctx, manifest.Config)
   334  	if err != nil {
   335  		return errors.Wrap(err, "get config blob")
   336  	}
   337  	defer configBlob.Close()
   338  	if configBlob.Descriptor.MediaType != ispec.MediaTypeImageConfig {
   339  		return errors.Errorf("unpack manifest: config blob is not correct mediatype %s: %s", ispec.MediaTypeImageConfig, configBlob.Descriptor.MediaType)
   340  	}
   341  	config, ok := configBlob.Data.(ispec.Image)
   342  	if !ok {
   343  		// Should _never_ be reached.
   344  		return errors.Errorf("[internal error] unknown config blob type: %s", configBlob.Descriptor.MediaType)
   345  	}
   346  
   347  	spec, err := iconv.ToRuntimeSpec(rootfs, config)
   348  	if err != nil {
   349  		return errors.Wrap(err, "generate config.json")
   350  	}
   351  
   352  	// Add UIDMapping / GIDMapping options.
   353  	if len(mapOptions.UIDMappings) > 0 || len(mapOptions.GIDMappings) > 0 {
   354  		var namespaces []rspec.LinuxNamespace
   355  		for _, ns := range spec.Linux.Namespaces {
   356  			if ns.Type == "user" {
   357  				continue
   358  			}
   359  			namespaces = append(namespaces, ns)
   360  		}
   361  		spec.Linux.Namespaces = append(namespaces, rspec.LinuxNamespace{
   362  			Type: "user",
   363  		})
   364  	}
   365  	spec.Linux.UIDMappings = mapOptions.UIDMappings
   366  	spec.Linux.GIDMappings = mapOptions.GIDMappings
   367  	if mapOptions.Rootless {
   368  		if err := iconv.ToRootless(&spec); err != nil {
   369  			return errors.Wrap(err, "convert spec to rootless")
   370  		}
   371  	}
   372  
   373  	// Save the config.json.
   374  	enc := json.NewEncoder(configFile)
   375  	enc.SetIndent("", "\t")
   376  	return errors.Wrap(enc.Encode(spec), "write config.json")
   377  }