github.com/kaisenlinux/docker.io@v0.0.0-20230510090727-ea55db55fac7/engine/pkg/archive/copy.go (about)

     1  package archive // import "github.com/docker/docker/pkg/archive"
     2  
     3  import (
     4  	"archive/tar"
     5  	"errors"
     6  	"io"
     7  	"os"
     8  	"path/filepath"
     9  	"strings"
    10  
    11  	"github.com/docker/docker/pkg/system"
    12  	"github.com/sirupsen/logrus"
    13  )
    14  
    15  // Errors used or returned by this file.
    16  var (
    17  	ErrNotDirectory      = errors.New("not a directory")
    18  	ErrDirNotExists      = errors.New("no such directory")
    19  	ErrCannotCopyDir     = errors.New("cannot copy directory")
    20  	ErrInvalidCopySource = errors.New("invalid copy source content")
    21  )
    22  
    23  // PreserveTrailingDotOrSeparator returns the given cleaned path (after
    24  // processing using any utility functions from the path or filepath stdlib
    25  // packages) and appends a trailing `/.` or `/` if its corresponding  original
    26  // path (from before being processed by utility functions from the path or
    27  // filepath stdlib packages) ends with a trailing `/.` or `/`. If the cleaned
    28  // path already ends in a `.` path segment, then another is not added. If the
    29  // clean path already ends in the separator, then another is not added.
    30  func PreserveTrailingDotOrSeparator(cleanedPath string, originalPath string, sep byte) string {
    31  	// Ensure paths are in platform semantics
    32  	cleanedPath = strings.Replace(cleanedPath, "/", string(sep), -1)
    33  	originalPath = strings.Replace(originalPath, "/", string(sep), -1)
    34  
    35  	if !specifiesCurrentDir(cleanedPath) && specifiesCurrentDir(originalPath) {
    36  		if !hasTrailingPathSeparator(cleanedPath, sep) {
    37  			// Add a separator if it doesn't already end with one (a cleaned
    38  			// path would only end in a separator if it is the root).
    39  			cleanedPath += string(sep)
    40  		}
    41  		cleanedPath += "."
    42  	}
    43  
    44  	if !hasTrailingPathSeparator(cleanedPath, sep) && hasTrailingPathSeparator(originalPath, sep) {
    45  		cleanedPath += string(sep)
    46  	}
    47  
    48  	return cleanedPath
    49  }
    50  
    51  // assertsDirectory returns whether the given path is
    52  // asserted to be a directory, i.e., the path ends with
    53  // a trailing '/' or `/.`, assuming a path separator of `/`.
    54  func assertsDirectory(path string, sep byte) bool {
    55  	return hasTrailingPathSeparator(path, sep) || specifiesCurrentDir(path)
    56  }
    57  
    58  // hasTrailingPathSeparator returns whether the given
    59  // path ends with the system's path separator character.
    60  func hasTrailingPathSeparator(path string, sep byte) bool {
    61  	return len(path) > 0 && path[len(path)-1] == sep
    62  }
    63  
    64  // specifiesCurrentDir returns whether the given path specifies
    65  // a "current directory", i.e., the last path segment is `.`.
    66  func specifiesCurrentDir(path string) bool {
    67  	return filepath.Base(path) == "."
    68  }
    69  
    70  // SplitPathDirEntry splits the given path between its directory name and its
    71  // basename by first cleaning the path but preserves a trailing "." if the
    72  // original path specified the current directory.
    73  func SplitPathDirEntry(path string) (dir, base string) {
    74  	cleanedPath := filepath.Clean(filepath.FromSlash(path))
    75  
    76  	if specifiesCurrentDir(path) {
    77  		cleanedPath += string(os.PathSeparator) + "."
    78  	}
    79  
    80  	return filepath.Dir(cleanedPath), filepath.Base(cleanedPath)
    81  }
    82  
    83  // TarResource archives the resource described by the given CopyInfo to a Tar
    84  // archive. A non-nil error is returned if sourcePath does not exist or is
    85  // asserted to be a directory but exists as another type of file.
    86  //
    87  // This function acts as a convenient wrapper around TarWithOptions, which
    88  // requires a directory as the source path. TarResource accepts either a
    89  // directory or a file path and correctly sets the Tar options.
    90  func TarResource(sourceInfo CopyInfo) (content io.ReadCloser, err error) {
    91  	return TarResourceRebase(sourceInfo.Path, sourceInfo.RebaseName)
    92  }
    93  
    94  // TarResourceRebase is like TarResource but renames the first path element of
    95  // items in the resulting tar archive to match the given rebaseName if not "".
    96  func TarResourceRebase(sourcePath, rebaseName string) (content io.ReadCloser, err error) {
    97  	sourcePath = normalizePath(sourcePath)
    98  	if _, err = os.Lstat(sourcePath); err != nil {
    99  		// Catches the case where the source does not exist or is not a
   100  		// directory if asserted to be a directory, as this also causes an
   101  		// error.
   102  		return
   103  	}
   104  
   105  	// Separate the source path between its directory and
   106  	// the entry in that directory which we are archiving.
   107  	sourceDir, sourceBase := SplitPathDirEntry(sourcePath)
   108  	opts := TarResourceRebaseOpts(sourceBase, rebaseName)
   109  
   110  	logrus.Debugf("copying %q from %q", sourceBase, sourceDir)
   111  	return TarWithOptions(sourceDir, opts)
   112  }
   113  
   114  // TarResourceRebaseOpts does not preform the Tar, but instead just creates the rebase
   115  // parameters to be sent to TarWithOptions (the TarOptions struct)
   116  func TarResourceRebaseOpts(sourceBase string, rebaseName string) *TarOptions {
   117  	filter := []string{sourceBase}
   118  	return &TarOptions{
   119  		Compression:      Uncompressed,
   120  		IncludeFiles:     filter,
   121  		IncludeSourceDir: true,
   122  		RebaseNames: map[string]string{
   123  			sourceBase: rebaseName,
   124  		},
   125  	}
   126  }
   127  
   128  // CopyInfo holds basic info about the source
   129  // or destination path of a copy operation.
   130  type CopyInfo struct {
   131  	Path       string
   132  	Exists     bool
   133  	IsDir      bool
   134  	RebaseName string
   135  }
   136  
   137  // CopyInfoSourcePath stats the given path to create a CopyInfo
   138  // struct representing that resource for the source of an archive copy
   139  // operation. The given path should be an absolute local path. A source path
   140  // has all symlinks evaluated that appear before the last path separator ("/"
   141  // on Unix). As it is to be a copy source, the path must exist.
   142  func CopyInfoSourcePath(path string, followLink bool) (CopyInfo, error) {
   143  	// normalize the file path and then evaluate the symbol link
   144  	// we will use the target file instead of the symbol link if
   145  	// followLink is set
   146  	path = normalizePath(path)
   147  
   148  	resolvedPath, rebaseName, err := ResolveHostSourcePath(path, followLink)
   149  	if err != nil {
   150  		return CopyInfo{}, err
   151  	}
   152  
   153  	stat, err := os.Lstat(resolvedPath)
   154  	if err != nil {
   155  		return CopyInfo{}, err
   156  	}
   157  
   158  	return CopyInfo{
   159  		Path:       resolvedPath,
   160  		Exists:     true,
   161  		IsDir:      stat.IsDir(),
   162  		RebaseName: rebaseName,
   163  	}, nil
   164  }
   165  
   166  // CopyInfoDestinationPath stats the given path to create a CopyInfo
   167  // struct representing that resource for the destination of an archive copy
   168  // operation. The given path should be an absolute local path.
   169  func CopyInfoDestinationPath(path string) (info CopyInfo, err error) {
   170  	maxSymlinkIter := 10 // filepath.EvalSymlinks uses 255, but 10 already seems like a lot.
   171  	path = normalizePath(path)
   172  	originalPath := path
   173  
   174  	stat, err := os.Lstat(path)
   175  
   176  	if err == nil && stat.Mode()&os.ModeSymlink == 0 {
   177  		// The path exists and is not a symlink.
   178  		return CopyInfo{
   179  			Path:   path,
   180  			Exists: true,
   181  			IsDir:  stat.IsDir(),
   182  		}, nil
   183  	}
   184  
   185  	// While the path is a symlink.
   186  	for n := 0; err == nil && stat.Mode()&os.ModeSymlink != 0; n++ {
   187  		if n > maxSymlinkIter {
   188  			// Don't follow symlinks more than this arbitrary number of times.
   189  			return CopyInfo{}, errors.New("too many symlinks in " + originalPath)
   190  		}
   191  
   192  		// The path is a symbolic link. We need to evaluate it so that the
   193  		// destination of the copy operation is the link target and not the
   194  		// link itself. This is notably different than CopyInfoSourcePath which
   195  		// only evaluates symlinks before the last appearing path separator.
   196  		// Also note that it is okay if the last path element is a broken
   197  		// symlink as the copy operation should create the target.
   198  		var linkTarget string
   199  
   200  		linkTarget, err = os.Readlink(path)
   201  		if err != nil {
   202  			return CopyInfo{}, err
   203  		}
   204  
   205  		if !system.IsAbs(linkTarget) {
   206  			// Join with the parent directory.
   207  			dstParent, _ := SplitPathDirEntry(path)
   208  			linkTarget = filepath.Join(dstParent, linkTarget)
   209  		}
   210  
   211  		path = linkTarget
   212  		stat, err = os.Lstat(path)
   213  	}
   214  
   215  	if err != nil {
   216  		// It's okay if the destination path doesn't exist. We can still
   217  		// continue the copy operation if the parent directory exists.
   218  		if !os.IsNotExist(err) {
   219  			return CopyInfo{}, err
   220  		}
   221  
   222  		// Ensure destination parent dir exists.
   223  		dstParent, _ := SplitPathDirEntry(path)
   224  
   225  		parentDirStat, err := os.Stat(dstParent)
   226  		if err != nil {
   227  			return CopyInfo{}, err
   228  		}
   229  		if !parentDirStat.IsDir() {
   230  			return CopyInfo{}, ErrNotDirectory
   231  		}
   232  
   233  		return CopyInfo{Path: path}, nil
   234  	}
   235  
   236  	// The path exists after resolving symlinks.
   237  	return CopyInfo{
   238  		Path:   path,
   239  		Exists: true,
   240  		IsDir:  stat.IsDir(),
   241  	}, nil
   242  }
   243  
   244  // PrepareArchiveCopy prepares the given srcContent archive, which should
   245  // contain the archived resource described by srcInfo, to the destination
   246  // described by dstInfo. Returns the possibly modified content archive along
   247  // with the path to the destination directory which it should be extracted to.
   248  func PrepareArchiveCopy(srcContent io.Reader, srcInfo, dstInfo CopyInfo) (dstDir string, content io.ReadCloser, err error) {
   249  	// Ensure in platform semantics
   250  	srcInfo.Path = normalizePath(srcInfo.Path)
   251  	dstInfo.Path = normalizePath(dstInfo.Path)
   252  
   253  	// Separate the destination path between its directory and base
   254  	// components in case the source archive contents need to be rebased.
   255  	dstDir, dstBase := SplitPathDirEntry(dstInfo.Path)
   256  	_, srcBase := SplitPathDirEntry(srcInfo.Path)
   257  
   258  	switch {
   259  	case dstInfo.Exists && dstInfo.IsDir:
   260  		// The destination exists as a directory. No alteration
   261  		// to srcContent is needed as its contents can be
   262  		// simply extracted to the destination directory.
   263  		return dstInfo.Path, io.NopCloser(srcContent), nil
   264  	case dstInfo.Exists && srcInfo.IsDir:
   265  		// The destination exists as some type of file and the source
   266  		// content is a directory. This is an error condition since
   267  		// you cannot copy a directory to an existing file location.
   268  		return "", nil, ErrCannotCopyDir
   269  	case dstInfo.Exists:
   270  		// The destination exists as some type of file and the source content
   271  		// is also a file. The source content entry will have to be renamed to
   272  		// have a basename which matches the destination path's basename.
   273  		if len(srcInfo.RebaseName) != 0 {
   274  			srcBase = srcInfo.RebaseName
   275  		}
   276  		return dstDir, RebaseArchiveEntries(srcContent, srcBase, dstBase), nil
   277  	case srcInfo.IsDir:
   278  		// The destination does not exist and the source content is an archive
   279  		// of a directory. The archive should be extracted to the parent of
   280  		// the destination path instead, and when it is, the directory that is
   281  		// created as a result should take the name of the destination path.
   282  		// The source content entries will have to be renamed to have a
   283  		// basename which matches the destination path's basename.
   284  		if len(srcInfo.RebaseName) != 0 {
   285  			srcBase = srcInfo.RebaseName
   286  		}
   287  		return dstDir, RebaseArchiveEntries(srcContent, srcBase, dstBase), nil
   288  	case assertsDirectory(dstInfo.Path, os.PathSeparator):
   289  		// The destination does not exist and is asserted to be created as a
   290  		// directory, but the source content is not a directory. This is an
   291  		// error condition since you cannot create a directory from a file
   292  		// source.
   293  		return "", nil, ErrDirNotExists
   294  	default:
   295  		// The last remaining case is when the destination does not exist, is
   296  		// not asserted to be a directory, and the source content is not an
   297  		// archive of a directory. It this case, the destination file will need
   298  		// to be created when the archive is extracted and the source content
   299  		// entry will have to be renamed to have a basename which matches the
   300  		// destination path's basename.
   301  		if len(srcInfo.RebaseName) != 0 {
   302  			srcBase = srcInfo.RebaseName
   303  		}
   304  		return dstDir, RebaseArchiveEntries(srcContent, srcBase, dstBase), nil
   305  	}
   306  
   307  }
   308  
   309  // RebaseArchiveEntries rewrites the given srcContent archive replacing
   310  // an occurrence of oldBase with newBase at the beginning of entry names.
   311  func RebaseArchiveEntries(srcContent io.Reader, oldBase, newBase string) io.ReadCloser {
   312  	if oldBase == string(os.PathSeparator) {
   313  		// If oldBase specifies the root directory, use an empty string as
   314  		// oldBase instead so that newBase doesn't replace the path separator
   315  		// that all paths will start with.
   316  		oldBase = ""
   317  	}
   318  
   319  	rebased, w := io.Pipe()
   320  
   321  	go func() {
   322  		srcTar := tar.NewReader(srcContent)
   323  		rebasedTar := tar.NewWriter(w)
   324  
   325  		for {
   326  			hdr, err := srcTar.Next()
   327  			if err == io.EOF {
   328  				// Signals end of archive.
   329  				rebasedTar.Close()
   330  				w.Close()
   331  				return
   332  			}
   333  			if err != nil {
   334  				w.CloseWithError(err)
   335  				return
   336  			}
   337  
   338  			// srcContent tar stream, as served by TarWithOptions(), is
   339  			// definitely in PAX format, but tar.Next() mistakenly guesses it
   340  			// as USTAR, which creates a problem: if the newBase is >100
   341  			// characters long, WriteHeader() returns an error like
   342  			// "archive/tar: cannot encode header: Format specifies USTAR; and USTAR cannot encode Name=...".
   343  			//
   344  			// To fix, set the format to PAX here. See docker/for-linux issue #484.
   345  			hdr.Format = tar.FormatPAX
   346  			hdr.Name = strings.Replace(hdr.Name, oldBase, newBase, 1)
   347  			if hdr.Typeflag == tar.TypeLink {
   348  				hdr.Linkname = strings.Replace(hdr.Linkname, oldBase, newBase, 1)
   349  			}
   350  
   351  			if err = rebasedTar.WriteHeader(hdr); err != nil {
   352  				w.CloseWithError(err)
   353  				return
   354  			}
   355  
   356  			// Ignoring GoSec G110. See https://github.com/securego/gosec/pull/433
   357  			// and https://cure53.de/pentest-report_opa.pdf, which recommends to
   358  			// replace io.Copy with io.CopyN7. The latter allows to specify the
   359  			// maximum number of bytes that should be read. By properly defining
   360  			// the limit, it can be assured that a GZip compression bomb cannot
   361  			// easily cause a Denial-of-Service.
   362  			// After reviewing with @tonistiigi and @cpuguy83, this should not
   363  			// affect us, because here we do not read into memory, hence should
   364  			// not be vulnerable to this code consuming memory.
   365  			//nolint:gosec // G110: Potential DoS vulnerability via decompression bomb (gosec)
   366  			if _, err = io.Copy(rebasedTar, srcTar); err != nil {
   367  				w.CloseWithError(err)
   368  				return
   369  			}
   370  		}
   371  	}()
   372  
   373  	return rebased
   374  }
   375  
   376  // TODO @gupta-ak. These might have to be changed in the future to be
   377  // continuity driver aware as well to support LCOW.
   378  
   379  // CopyResource performs an archive copy from the given source path to the
   380  // given destination path. The source path MUST exist and the destination
   381  // path's parent directory must exist.
   382  func CopyResource(srcPath, dstPath string, followLink bool) error {
   383  	var (
   384  		srcInfo CopyInfo
   385  		err     error
   386  	)
   387  
   388  	// Ensure in platform semantics
   389  	srcPath = normalizePath(srcPath)
   390  	dstPath = normalizePath(dstPath)
   391  
   392  	// Clean the source and destination paths.
   393  	srcPath = PreserveTrailingDotOrSeparator(filepath.Clean(srcPath), srcPath, os.PathSeparator)
   394  	dstPath = PreserveTrailingDotOrSeparator(filepath.Clean(dstPath), dstPath, os.PathSeparator)
   395  
   396  	if srcInfo, err = CopyInfoSourcePath(srcPath, followLink); err != nil {
   397  		return err
   398  	}
   399  
   400  	content, err := TarResource(srcInfo)
   401  	if err != nil {
   402  		return err
   403  	}
   404  	defer content.Close()
   405  
   406  	return CopyTo(content, srcInfo, dstPath)
   407  }
   408  
   409  // CopyTo handles extracting the given content whose
   410  // entries should be sourced from srcInfo to dstPath.
   411  func CopyTo(content io.Reader, srcInfo CopyInfo, dstPath string) error {
   412  	// The destination path need not exist, but CopyInfoDestinationPath will
   413  	// ensure that at least the parent directory exists.
   414  	dstInfo, err := CopyInfoDestinationPath(normalizePath(dstPath))
   415  	if err != nil {
   416  		return err
   417  	}
   418  
   419  	dstDir, copyArchive, err := PrepareArchiveCopy(content, srcInfo, dstInfo)
   420  	if err != nil {
   421  		return err
   422  	}
   423  	defer copyArchive.Close()
   424  
   425  	options := &TarOptions{
   426  		NoLchown:             true,
   427  		NoOverwriteDirNonDir: true,
   428  	}
   429  
   430  	return Untar(copyArchive, dstDir, options)
   431  }
   432  
   433  // ResolveHostSourcePath decides real path need to be copied with parameters such as
   434  // whether to follow symbol link or not, if followLink is true, resolvedPath will return
   435  // link target of any symbol link file, else it will only resolve symlink of directory
   436  // but return symbol link file itself without resolving.
   437  func ResolveHostSourcePath(path string, followLink bool) (resolvedPath, rebaseName string, err error) {
   438  	if followLink {
   439  		resolvedPath, err = filepath.EvalSymlinks(path)
   440  		if err != nil {
   441  			return
   442  		}
   443  
   444  		resolvedPath, rebaseName = GetRebaseName(path, resolvedPath)
   445  	} else {
   446  		dirPath, basePath := filepath.Split(path)
   447  
   448  		// if not follow symbol link, then resolve symbol link of parent dir
   449  		var resolvedDirPath string
   450  		resolvedDirPath, err = filepath.EvalSymlinks(dirPath)
   451  		if err != nil {
   452  			return
   453  		}
   454  		// resolvedDirPath will have been cleaned (no trailing path separators) so
   455  		// we can manually join it with the base path element.
   456  		resolvedPath = resolvedDirPath + string(filepath.Separator) + basePath
   457  		if hasTrailingPathSeparator(path, os.PathSeparator) &&
   458  			filepath.Base(path) != filepath.Base(resolvedPath) {
   459  			rebaseName = filepath.Base(path)
   460  		}
   461  	}
   462  	return resolvedPath, rebaseName, nil
   463  }
   464  
   465  // GetRebaseName normalizes and compares path and resolvedPath,
   466  // return completed resolved path and rebased file name
   467  func GetRebaseName(path, resolvedPath string) (string, string) {
   468  	// linkTarget will have been cleaned (no trailing path separators and dot) so
   469  	// we can manually join it with them
   470  	var rebaseName string
   471  	if specifiesCurrentDir(path) &&
   472  		!specifiesCurrentDir(resolvedPath) {
   473  		resolvedPath += string(filepath.Separator) + "."
   474  	}
   475  
   476  	if hasTrailingPathSeparator(path, os.PathSeparator) &&
   477  		!hasTrailingPathSeparator(resolvedPath, os.PathSeparator) {
   478  		resolvedPath += string(filepath.Separator)
   479  	}
   480  
   481  	if filepath.Base(path) != filepath.Base(resolvedPath) {
   482  		// In the case where the path had a trailing separator and a symlink
   483  		// evaluation has changed the last path component, we will need to
   484  		// rebase the name in the archive that is being copied to match the
   485  		// originally requested name.
   486  		rebaseName = filepath.Base(path)
   487  	}
   488  	return resolvedPath, rebaseName
   489  }