github.com/opencontainers/umoci@v0.4.8-0.20240508124516-656e4836fb0d/oci/layer/tar_generate.go (about)

     1  /*
     2   * umoci: Umoci Modifies Open Containers' Images
     3   * Copyright (C) 2016-2020 SUSE LLC
     4   *
     5   * Licensed under the Apache License, Version 2.0 (the "License");
     6   * you may not use this file except in compliance with the License.
     7   * You may obtain a copy of the License at
     8   *
     9   *    http://www.apache.org/licenses/LICENSE-2.0
    10   *
    11   * Unless required by applicable law or agreed to in writing, software
    12   * distributed under the License is distributed on an "AS IS" BASIS,
    13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14   * See the License for the specific language governing permissions and
    15   * limitations under the License.
    16   */
    17  
    18  package layer
    19  
    20  import (
    21  	"archive/tar"
    22  	"io"
    23  	"os"
    24  	"path/filepath"
    25  	"strings"
    26  
    27  	"github.com/apex/log"
    28  	"github.com/opencontainers/umoci/pkg/fseval"
    29  	"github.com/opencontainers/umoci/pkg/system"
    30  	"github.com/opencontainers/umoci/pkg/testutils"
    31  	"github.com/pkg/errors"
    32  	"golang.org/x/sys/unix"
    33  )
    34  
    35  // ignoreXattrs is a list of xattr names that should be ignored when
    36  // creating a new image layer, because they are host-specific and/or would be a
    37  // bad idea to unpack. They are also excluded from Lclearxattr when extracting
    38  // an archive.
    39  // XXX: Maybe we should make this configurable so users can manually blacklist
    40  //
    41  //	(or even whitelist) xattrs that they actually want included? Like how
    42  //	GNU tar's xattr setup works.
    43  var ignoreXattrs = map[string]struct{}{
    44  	// SELinux doesn't allow you to set SELinux policies generically. They're
    45  	// also host-specific. So just ignore them during extraction.
    46  	"security.selinux": {},
    47  
    48  	// NFSv4 ACLs are very system-specific and shouldn't be touched by us, nor
    49  	// should they be included in images.
    50  	"system.nfs4_acl": {},
    51  
    52  	// In order to support overlayfs whiteout mode, we shouldn't un-set
    53  	// this after we've set it when writing out the whiteouts.
    54  	"trusted.overlay.opaque": {},
    55  
    56  	// We don't want to these xattrs into the image, because they're only
    57  	// relevant based on how the build overlay is constructed and will not
    58  	// be true on the target system once the image is unpacked (e.g. inodes
    59  	// might be different, impure status won't be true, etc.).
    60  	"trusted.overlay.redirect": {},
    61  	"trusted.overlay.origin":   {},
    62  	"trusted.overlay.impure":   {},
    63  	"trusted.overlay.nlink":    {},
    64  	"trusted.overlay.upper":    {},
    65  	"trusted.overlay.metacopy": {},
    66  }
    67  
    68  func init() {
    69  	// For test purposes we add a fake forbidden attribute that an unprivileged
    70  	// user can easily write to (and thus we can test it).
    71  	if testutils.IsTestBinary() {
    72  		ignoreXattrs["user.UMOCI:forbidden_xattr"] = struct{}{}
    73  	}
    74  }
    75  
    76  // tarGenerator is a helper for generating layer diff tars. It should be noted
    77  // that when using tarGenerator.Add{Path,Whiteout} it is recommended to do it
    78  // in lexicographic order.
    79  type tarGenerator struct {
    80  	tw *tar.Writer
    81  
    82  	// mapOptions is the set of mapping options for modifying entries before
    83  	// they're added to the layer.
    84  	mapOptions MapOptions
    85  
    86  	// Hardlink mapping.
    87  	inodes map[uint64]string
    88  
    89  	// fsEval is an fseval.FsEval used for extraction.
    90  	fsEval fseval.FsEval
    91  
    92  	// XXX: Should we add a safety check to make sure we don't generate two of
    93  	//      the same path in a tar archive? This is not permitted by the spec.
    94  }
    95  
    96  // newTarGenerator creates a new tarGenerator using the provided writer as the
    97  // output writer.
    98  func newTarGenerator(w io.Writer, opt MapOptions) *tarGenerator {
    99  	fsEval := fseval.Default
   100  	if opt.Rootless {
   101  		fsEval = fseval.Rootless
   102  	}
   103  
   104  	return &tarGenerator{
   105  		tw:         tar.NewWriter(w),
   106  		mapOptions: opt,
   107  		inodes:     map[uint64]string{},
   108  		fsEval:     fsEval,
   109  	}
   110  }
   111  
   112  // normalise converts the provided pathname to a POSIX-compliant pathname. It also will provide an error if a path looks unsafe.
   113  func normalise(rawPath string, isDir bool) (string, error) {
   114  	// Clean up the path.
   115  	path := CleanPath(rawPath)
   116  
   117  	// Nothing to do.
   118  	if path == "." {
   119  		return ".", nil
   120  	}
   121  
   122  	if filepath.IsAbs(path) {
   123  		path = strings.TrimPrefix(path, "/")
   124  	}
   125  
   126  	// Check that the path is "safe", meaning that it doesn't resolve outside
   127  	// of the tar archive. While this might seem paranoid, it is a legitimate
   128  	// concern.
   129  	if "/"+path != filepath.Join("/", path) {
   130  		return "", errors.Errorf("escape warning: generated path is outside tar root: %s", rawPath)
   131  	}
   132  
   133  	// With some other tar formats, you needed to have a '/' at the end of a
   134  	// pathname in order to state that it is a directory. While this is no
   135  	// longer necessary, some older tooling may assume that.
   136  	if isDir {
   137  		path += "/"
   138  	}
   139  
   140  	return path, nil
   141  }
   142  
   143  // AddFile adds a file from the filesystem to the tar archive. It copies all of
   144  // the relevant stat information about the file, and also attempts to track
   145  // hardlinks. This should be functionally equivalent to adding entries with GNU
   146  // tar.
   147  func (tg *tarGenerator) AddFile(name, path string) error {
   148  	fi, err := tg.fsEval.Lstat(path)
   149  	if err != nil {
   150  		return errors.Wrap(err, "add file lstat")
   151  	}
   152  
   153  	linkname := ""
   154  	if fi.Mode()&os.ModeSymlink == os.ModeSymlink {
   155  		if linkname, err = tg.fsEval.Readlink(path); err != nil {
   156  			return errors.Wrap(err, "add file readlink")
   157  		}
   158  	}
   159  
   160  	hdr, err := tar.FileInfoHeader(fi, linkname)
   161  	if err != nil {
   162  		return errors.Wrap(err, "convert fi to hdr")
   163  	}
   164  	hdr.Xattrs = map[string]string{}
   165  	// Usually incorrect for containers and was added in Go 1.10 causing
   166  	// changes to our output on a compiler bump...
   167  	hdr.Uname = ""
   168  	hdr.Gname = ""
   169  
   170  	name, err = normalise(name, fi.IsDir())
   171  	if err != nil {
   172  		return errors.Wrap(err, "normalise path")
   173  	}
   174  	hdr.Name = name
   175  
   176  	// Make sure that we don't include any files with the name ".wh.". This
   177  	// will almost certainly confuse some users (unfortunately) but there's
   178  	// nothing we can do to store such files on-disk.
   179  	if strings.HasPrefix(filepath.Base(name), whPrefix) {
   180  		return errors.Errorf("invalid path has whiteout prefix %q: %s", whPrefix, name)
   181  	}
   182  
   183  	// FIXME: Do we need to ensure that the parent paths have all been added to
   184  	//        the archive? I haven't found any tar specification that makes
   185  	//        this mandatory, but I have a feeling that some people might rely
   186  	//        on it. The issue with implementing it is that we'd have to get
   187  	//        the FileInfo about the directory from somewhere (and we don't
   188  	//        want to waste space by adding an entry that will be overwritten
   189  	//        later).
   190  
   191  	// Different systems have different special things they need to set within
   192  	// a tar header. For example, device numbers are quite important to be set
   193  	// by us.
   194  	statx, err := tg.fsEval.Lstatx(path)
   195  	if err != nil {
   196  		return errors.Wrapf(err, "lstatx %q", path)
   197  	}
   198  	updateHeader(hdr, statx)
   199  
   200  	// Set up xattrs externally to updateHeader because the function signature
   201  	// would look really dumb otherwise.
   202  	// XXX: This should probably be moved to a function in tar_unix.go.
   203  	names, err := tg.fsEval.Llistxattr(path)
   204  	if err != nil {
   205  		if errors.Cause(err) != unix.EOPNOTSUPP {
   206  			return errors.Wrap(err, "get xattr list")
   207  		}
   208  		names = []string{}
   209  	}
   210  	for _, name := range names {
   211  		// Some xattrs need to be skipped for sanity reasons, such as
   212  		// security.selinux, because they are very much host-specific and
   213  		// carrying them to other hosts would be a really bad idea.
   214  		if _, ignore := ignoreXattrs[name]; ignore {
   215  			continue
   216  		}
   217  		// TODO: We should translate all v3 capabilities into root-owned
   218  		//       capabilities here. But we don't have Go code for that yet
   219  		//       (we'd need to use libcap to parse it).
   220  		value, err := tg.fsEval.Lgetxattr(path, name)
   221  		if err != nil {
   222  			v := errors.Cause(err)
   223  			log.Debugf("failure reading xattr from list on %q: %q", name, v)
   224  			if v != unix.EOPNOTSUPP && v != unix.ENODATA {
   225  				// XXX: I'm not sure if we're unprivileged whether Lgetxattr can
   226  				//      fail with EPERM. If it can, we should ignore it (like when
   227  				//      we try to clear xattrs).
   228  				return errors.Wrapf(err, "get xattr: %s", name)
   229  			}
   230  		}
   231  		// https://golang.org/issues/20698 -- We don't just error out here
   232  		// because it's not _really_ a fatal error. Currently it's unclear
   233  		// whether the stdlib will correctly handle reading or disable writing
   234  		// of these PAX headers so we have to track this ourselves.
   235  		if len(value) <= 0 {
   236  			log.Warnf("ignoring empty-valued xattr %s: disallowed by PAX standard", name)
   237  			continue
   238  		}
   239  		// Note that Go strings can actually be arbitrary byte sequences, so
   240  		// this conversion (while it might look a bit wrong) is actually fine.
   241  		hdr.Xattrs[name] = string(value)
   242  	}
   243  
   244  	// Not all systems have the concept of an inode, but I'm not in the mood to
   245  	// handle this in a way that makes anything other than GNU/Linux happy
   246  	// right now. Handle hardlinks.
   247  	if oldpath, ok := tg.inodes[statx.Ino]; ok {
   248  		// We just hit a hardlink, so we just have to change the header.
   249  		hdr.Typeflag = tar.TypeLink
   250  		hdr.Linkname = oldpath
   251  		hdr.Size = 0
   252  	} else {
   253  		tg.inodes[statx.Ino] = name
   254  	}
   255  
   256  	// Apply any header mappings.
   257  	if err := mapHeader(hdr, tg.mapOptions); err != nil {
   258  		return errors.Wrap(err, "map header")
   259  	}
   260  	if err := tg.tw.WriteHeader(hdr); err != nil {
   261  		return errors.Wrap(err, "write header")
   262  	}
   263  
   264  	// Write the contents of regular files.
   265  	if hdr.Typeflag == tar.TypeReg {
   266  		fh, err := tg.fsEval.Open(path)
   267  		if err != nil {
   268  			return errors.Wrap(err, "open file")
   269  		}
   270  		defer fh.Close()
   271  
   272  		n, err := system.Copy(tg.tw, fh)
   273  		if err != nil {
   274  			return errors.Wrap(err, "copy to layer")
   275  		}
   276  		if n != hdr.Size {
   277  			return errors.Wrap(io.ErrShortWrite, "copy to layer")
   278  		}
   279  	}
   280  
   281  	return nil
   282  }
   283  
   284  // whPrefix is the whiteout prefix, which is used to signify "special" files in
   285  // an OCI image layer archive. An expanded filesystem image cannot contain
   286  // files that have a basename starting with this prefix.
   287  const whPrefix = ".wh."
   288  
   289  // whOpaque is the *full* basename of a special file which indicates that all
   290  // siblings in a directory are to be dropped in the "lower" layer.
   291  const whOpaque = whPrefix + whPrefix + ".opq"
   292  
   293  // addWhiteout adds a whiteout file for the given name inside the tar archive.
   294  // It's not recommended to add a file with AddFile and then white it out. If
   295  // you specify opaque, then the whiteout created is an opaque whiteout *for the
   296  // directory path* given.
   297  func (tg *tarGenerator) addWhiteout(name string, opaque bool) error {
   298  	name, err := normalise(name, false)
   299  	if err != nil {
   300  		return errors.Wrap(err, "normalise path")
   301  	}
   302  
   303  	// Disallow having a whiteout of a whiteout, purely for our own sanity.
   304  	dir, file := filepath.Split(name)
   305  	if strings.HasPrefix(file, whPrefix) {
   306  		return errors.Errorf("invalid path has whiteout prefix %q: %s", whPrefix, name)
   307  	}
   308  
   309  	// Figure out the whiteout name.
   310  	whiteout := filepath.Join(dir, whPrefix+file)
   311  	if opaque {
   312  		whiteout = filepath.Join(name, whOpaque)
   313  	}
   314  
   315  	// Add a dummy header for the whiteout file.
   316  	return errors.Wrap(tg.tw.WriteHeader(&tar.Header{
   317  		Name: whiteout,
   318  		Size: 0,
   319  	}), "write whiteout header")
   320  }
   321  
   322  // AddWhiteout creates a whiteout for the provided path.
   323  func (tg *tarGenerator) AddWhiteout(name string) error {
   324  	return tg.addWhiteout(name, false)
   325  }
   326  
   327  // AddOpaqueWhiteout creates a whiteout for the provided path.
   328  func (tg *tarGenerator) AddOpaqueWhiteout(name string) error {
   329  	return tg.addWhiteout(name, true)
   330  }