github.com/cdoern/storage@v1.12.13/pkg/archive/copy.go (about)

     1  package archive
     2  
     3  import (
     4  	"archive/tar"
     5  	"errors"
     6  	"io"
     7  	"io/ioutil"
     8  	"os"
     9  	"path/filepath"
    10  	"strings"
    11  
    12  	"github.com/containers/storage/pkg/system"
    13  	"github.com/sirupsen/logrus"
    14  )
    15  
    16  // Errors used or returned by this file.
    17  var (
    18  	ErrNotDirectory      = errors.New("not a directory")
    19  	ErrDirNotExists      = errors.New("no such directory")
    20  	ErrCannotCopyDir     = errors.New("cannot copy directory")
    21  	ErrInvalidCopySource = errors.New("invalid copy source content")
    22  )
    23  
    24  // PreserveTrailingDotOrSeparator returns the given cleaned path (after
    25  // processing using any utility functions from the path or filepath stdlib
    26  // packages) and appends a trailing `/.` or `/` if its corresponding  original
    27  // path (from before being processed by utility functions from the path or
    28  // filepath stdlib packages) ends with a trailing `/.` or `/`. If the cleaned
    29  // path already ends in a `.` path segment, then another is not added. If the
    30  // clean path already ends in a path separator, then another is not added.
    31  func PreserveTrailingDotOrSeparator(cleanedPath, originalPath string) string {
    32  	// Ensure paths are in platform semantics
    33  	cleanedPath = normalizePath(cleanedPath)
    34  	originalPath = normalizePath(originalPath)
    35  
    36  	if !specifiesCurrentDir(cleanedPath) && specifiesCurrentDir(originalPath) {
    37  		if !hasTrailingPathSeparator(cleanedPath) {
    38  			// Add a separator if it doesn't already end with one (a cleaned
    39  			// path would only end in a separator if it is the root).
    40  			cleanedPath += string(filepath.Separator)
    41  		}
    42  		cleanedPath += "."
    43  	}
    44  
    45  	if !hasTrailingPathSeparator(cleanedPath) && hasTrailingPathSeparator(originalPath) {
    46  		cleanedPath += string(filepath.Separator)
    47  	}
    48  
    49  	return cleanedPath
    50  }
    51  
    52  // assertsDirectory returns whether the given path is
    53  // asserted to be a directory, i.e., the path ends with
    54  // a trailing '/' or `/.`, assuming a path separator of `/`.
    55  func assertsDirectory(path string) bool {
    56  	return hasTrailingPathSeparator(path) || specifiesCurrentDir(path)
    57  }
    58  
    59  // hasTrailingPathSeparator returns whether the given
    60  // path ends with the system's path separator character.
    61  func hasTrailingPathSeparator(path string) bool {
    62  	return len(path) > 0 && os.IsPathSeparator(path[len(path)-1])
    63  }
    64  
    65  // specifiesCurrentDir returns whether the given path specifies
    66  // a "current directory", i.e., the last path segment is `.`.
    67  func specifiesCurrentDir(path string) bool {
    68  	return filepath.Base(path) == "."
    69  }
    70  
    71  // SplitPathDirEntry splits the given path between its directory name and its
    72  // basename by first cleaning the path but preserves a trailing "." if the
    73  // original path specified the current directory.
    74  func SplitPathDirEntry(path string) (dir, base string) {
    75  	cleanedPath := filepath.Clean(normalizePath(path))
    76  
    77  	if specifiesCurrentDir(path) {
    78  		cleanedPath += string(filepath.Separator) + "."
    79  	}
    80  
    81  	return filepath.Dir(cleanedPath), filepath.Base(cleanedPath)
    82  }
    83  
    84  // TarResource archives the resource described by the given CopyInfo to a Tar
    85  // archive. A non-nil error is returned if sourcePath does not exist or is
    86  // asserted to be a directory but exists as another type of file.
    87  //
    88  // This function acts as a convenient wrapper around TarWithOptions, which
    89  // requires a directory as the source path. TarResource accepts either a
    90  // directory or a file path and correctly sets the Tar options.
    91  func TarResource(sourceInfo CopyInfo) (content io.ReadCloser, err error) {
    92  	return TarResourceRebase(sourceInfo.Path, sourceInfo.RebaseName)
    93  }
    94  
    95  // TarResourceRebase is like TarResource but renames the first path element of
    96  // items in the resulting tar archive to match the given rebaseName if not "".
    97  func TarResourceRebase(sourcePath, rebaseName string) (content io.ReadCloser, err error) {
    98  	sourcePath = normalizePath(sourcePath)
    99  	if _, err = os.Lstat(sourcePath); err != nil {
   100  		// Catches the case where the source does not exist or is not a
   101  		// directory if asserted to be a directory, as this also causes an
   102  		// error.
   103  		return
   104  	}
   105  
   106  	// Separate the source path between its directory and
   107  	// the entry in that directory which we are archiving.
   108  	sourceDir, sourceBase := SplitPathDirEntry(sourcePath)
   109  
   110  	filter := []string{sourceBase}
   111  
   112  	logrus.Debugf("copying %q from %q", sourceBase, sourceDir)
   113  
   114  	return TarWithOptions(sourceDir, &TarOptions{
   115  		Compression:      Uncompressed,
   116  		IncludeFiles:     filter,
   117  		IncludeSourceDir: true,
   118  		RebaseNames: map[string]string{
   119  			sourceBase: rebaseName,
   120  		},
   121  	})
   122  }
   123  
   124  // CopyInfo holds basic info about the source
   125  // or destination path of a copy operation.
   126  type CopyInfo struct {
   127  	Path       string
   128  	Exists     bool
   129  	IsDir      bool
   130  	RebaseName string
   131  }
   132  
   133  // CopyInfoSourcePath stats the given path to create a CopyInfo
   134  // struct representing that resource for the source of an archive copy
   135  // operation. The given path should be an absolute local path. A source path
   136  // has all symlinks evaluated that appear before the last path separator ("/"
   137  // on Unix). As it is to be a copy source, the path must exist.
   138  func CopyInfoSourcePath(path string, followLink bool) (CopyInfo, error) {
   139  	// normalize the file path and then evaluate the symbol link
   140  	// we will use the target file instead of the symbol link if
   141  	// followLink is set
   142  	path = normalizePath(path)
   143  
   144  	resolvedPath, rebaseName, err := ResolveHostSourcePath(path, followLink)
   145  	if err != nil {
   146  		return CopyInfo{}, err
   147  	}
   148  
   149  	stat, err := os.Lstat(resolvedPath)
   150  	if err != nil {
   151  		return CopyInfo{}, err
   152  	}
   153  
   154  	return CopyInfo{
   155  		Path:       resolvedPath,
   156  		Exists:     true,
   157  		IsDir:      stat.IsDir(),
   158  		RebaseName: rebaseName,
   159  	}, nil
   160  }
   161  
   162  // CopyInfoDestinationPath stats the given path to create a CopyInfo
   163  // struct representing that resource for the destination of an archive copy
   164  // operation. The given path should be an absolute local path.
   165  func CopyInfoDestinationPath(path string) (info CopyInfo, err error) {
   166  	maxSymlinkIter := 10 // filepath.EvalSymlinks uses 255, but 10 already seems like a lot.
   167  	path = normalizePath(path)
   168  	originalPath := path
   169  
   170  	stat, err := os.Lstat(path)
   171  
   172  	if err == nil && stat.Mode()&os.ModeSymlink == 0 {
   173  		// The path exists and is not a symlink.
   174  		return CopyInfo{
   175  			Path:   path,
   176  			Exists: true,
   177  			IsDir:  stat.IsDir(),
   178  		}, nil
   179  	}
   180  
   181  	// While the path is a symlink.
   182  	for n := 0; err == nil && stat.Mode()&os.ModeSymlink != 0; n++ {
   183  		if n > maxSymlinkIter {
   184  			// Don't follow symlinks more than this arbitrary number of times.
   185  			return CopyInfo{}, errors.New("too many symlinks in " + originalPath)
   186  		}
   187  
   188  		// The path is a symbolic link. We need to evaluate it so that the
   189  		// destination of the copy operation is the link target and not the
   190  		// link itself. This is notably different than CopyInfoSourcePath which
   191  		// only evaluates symlinks before the last appearing path separator.
   192  		// Also note that it is okay if the last path element is a broken
   193  		// symlink as the copy operation should create the target.
   194  		var linkTarget string
   195  
   196  		linkTarget, err = os.Readlink(path)
   197  		if err != nil {
   198  			return CopyInfo{}, err
   199  		}
   200  
   201  		if !system.IsAbs(linkTarget) {
   202  			// Join with the parent directory.
   203  			dstParent, _ := SplitPathDirEntry(path)
   204  			linkTarget = filepath.Join(dstParent, linkTarget)
   205  		}
   206  
   207  		path = linkTarget
   208  		stat, err = os.Lstat(path)
   209  	}
   210  
   211  	if err != nil {
   212  		// It's okay if the destination path doesn't exist. We can still
   213  		// continue the copy operation if the parent directory exists.
   214  		if !os.IsNotExist(err) {
   215  			return CopyInfo{}, err
   216  		}
   217  
   218  		// Ensure destination parent dir exists.
   219  		dstParent, _ := SplitPathDirEntry(path)
   220  
   221  		parentDirStat, err := os.Lstat(dstParent)
   222  		if err != nil {
   223  			return CopyInfo{}, err
   224  		}
   225  		if !parentDirStat.IsDir() {
   226  			return CopyInfo{}, ErrNotDirectory
   227  		}
   228  
   229  		return CopyInfo{Path: path}, nil
   230  	}
   231  
   232  	// The path exists after resolving symlinks.
   233  	return CopyInfo{
   234  		Path:   path,
   235  		Exists: true,
   236  		IsDir:  stat.IsDir(),
   237  	}, nil
   238  }
   239  
   240  // PrepareArchiveCopy prepares the given srcContent archive, which should
   241  // contain the archived resource described by srcInfo, to the destination
   242  // described by dstInfo. Returns the possibly modified content archive along
   243  // with the path to the destination directory which it should be extracted to.
   244  func PrepareArchiveCopy(srcContent io.Reader, srcInfo, dstInfo CopyInfo) (dstDir string, content io.ReadCloser, err error) {
   245  	// Ensure in platform semantics
   246  	srcInfo.Path = normalizePath(srcInfo.Path)
   247  	dstInfo.Path = normalizePath(dstInfo.Path)
   248  
   249  	// Separate the destination path between its directory and base
   250  	// components in case the source archive contents need to be rebased.
   251  	dstDir, dstBase := SplitPathDirEntry(dstInfo.Path)
   252  	_, srcBase := SplitPathDirEntry(srcInfo.Path)
   253  
   254  	switch {
   255  	case dstInfo.Exists && dstInfo.IsDir:
   256  		// The destination exists as a directory. No alteration
   257  		// to srcContent is needed as its contents can be
   258  		// simply extracted to the destination directory.
   259  		return dstInfo.Path, ioutil.NopCloser(srcContent), nil
   260  	case dstInfo.Exists && srcInfo.IsDir:
   261  		// The destination exists as some type of file and the source
   262  		// content is a directory. This is an error condition since
   263  		// you cannot copy a directory to an existing file location.
   264  		return "", nil, ErrCannotCopyDir
   265  	case dstInfo.Exists:
   266  		// The destination exists as some type of file and the source content
   267  		// is also a file. The source content entry will have to be renamed to
   268  		// have a basename which matches the destination path's basename.
   269  		if len(srcInfo.RebaseName) != 0 {
   270  			srcBase = srcInfo.RebaseName
   271  		}
   272  		return dstDir, RebaseArchiveEntries(srcContent, srcBase, dstBase), nil
   273  	case srcInfo.IsDir:
   274  		// The destination does not exist and the source content is an archive
   275  		// of a directory. The archive should be extracted to the parent of
   276  		// the destination path instead, and when it is, the directory that is
   277  		// created as a result should take the name of the destination path.
   278  		// The source content entries will have to be renamed to have a
   279  		// basename which matches the destination path's basename.
   280  		if len(srcInfo.RebaseName) != 0 {
   281  			srcBase = srcInfo.RebaseName
   282  		}
   283  		return dstDir, RebaseArchiveEntries(srcContent, srcBase, dstBase), nil
   284  	case assertsDirectory(dstInfo.Path):
   285  		// The destination does not exist and is asserted to be created as a
   286  		// directory, but the source content is not a directory. This is an
   287  		// error condition since you cannot create a directory from a file
   288  		// source.
   289  		return "", nil, ErrDirNotExists
   290  	default:
   291  		// The last remaining case is when the destination does not exist, is
   292  		// not asserted to be a directory, and the source content is not an
   293  		// archive of a directory. It this case, the destination file will need
   294  		// to be created when the archive is extracted and the source content
   295  		// entry will have to be renamed to have a basename which matches the
   296  		// destination path's basename.
   297  		if len(srcInfo.RebaseName) != 0 {
   298  			srcBase = srcInfo.RebaseName
   299  		}
   300  		return dstDir, RebaseArchiveEntries(srcContent, srcBase, dstBase), nil
   301  	}
   302  
   303  }
   304  
   305  // RebaseArchiveEntries rewrites the given srcContent archive replacing
   306  // an occurrence of oldBase with newBase at the beginning of entry names.
   307  func RebaseArchiveEntries(srcContent io.Reader, oldBase, newBase string) io.ReadCloser {
   308  	if oldBase == string(os.PathSeparator) {
   309  		// If oldBase specifies the root directory, use an empty string as
   310  		// oldBase instead so that newBase doesn't replace the path separator
   311  		// that all paths will start with.
   312  		oldBase = ""
   313  	}
   314  
   315  	rebased, w := io.Pipe()
   316  
   317  	go func() {
   318  		srcTar := tar.NewReader(srcContent)
   319  		rebasedTar := tar.NewWriter(w)
   320  
   321  		for {
   322  			hdr, err := srcTar.Next()
   323  			if err == io.EOF {
   324  				// Signals end of archive.
   325  				rebasedTar.Close()
   326  				w.Close()
   327  				return
   328  			}
   329  			if err != nil {
   330  				w.CloseWithError(err)
   331  				return
   332  			}
   333  
   334  			hdr.Name = strings.Replace(hdr.Name, oldBase, newBase, 1)
   335  			if hdr.Typeflag == tar.TypeLink {
   336  				hdr.Linkname = strings.Replace(hdr.Linkname, oldBase, newBase, 1)
   337  			}
   338  
   339  			if err = rebasedTar.WriteHeader(hdr); err != nil {
   340  				w.CloseWithError(err)
   341  				return
   342  			}
   343  
   344  			if _, err = io.Copy(rebasedTar, srcTar); err != nil {
   345  				w.CloseWithError(err)
   346  				return
   347  			}
   348  		}
   349  	}()
   350  
   351  	return rebased
   352  }
   353  
   354  // CopyResource performs an archive copy from the given source path to the
   355  // given destination path. The source path MUST exist and the destination
   356  // path's parent directory must exist.
   357  func CopyResource(srcPath, dstPath string, followLink bool) error {
   358  	var (
   359  		srcInfo CopyInfo
   360  		err     error
   361  	)
   362  
   363  	// Ensure in platform semantics
   364  	srcPath = normalizePath(srcPath)
   365  	dstPath = normalizePath(dstPath)
   366  
   367  	// Clean the source and destination paths.
   368  	srcPath = PreserveTrailingDotOrSeparator(filepath.Clean(srcPath), srcPath)
   369  	dstPath = PreserveTrailingDotOrSeparator(filepath.Clean(dstPath), dstPath)
   370  
   371  	if srcInfo, err = CopyInfoSourcePath(srcPath, followLink); err != nil {
   372  		return err
   373  	}
   374  
   375  	content, err := TarResource(srcInfo)
   376  	if err != nil {
   377  		return err
   378  	}
   379  	defer content.Close()
   380  
   381  	return CopyTo(content, srcInfo, dstPath)
   382  }
   383  
   384  // CopyTo handles extracting the given content whose
   385  // entries should be sourced from srcInfo to dstPath.
   386  func CopyTo(content io.Reader, srcInfo CopyInfo, dstPath string) error {
   387  	// The destination path need not exist, but CopyInfoDestinationPath will
   388  	// ensure that at least the parent directory exists.
   389  	dstInfo, err := CopyInfoDestinationPath(normalizePath(dstPath))
   390  	if err != nil {
   391  		return err
   392  	}
   393  
   394  	dstDir, copyArchive, err := PrepareArchiveCopy(content, srcInfo, dstInfo)
   395  	if err != nil {
   396  		return err
   397  	}
   398  	defer copyArchive.Close()
   399  
   400  	options := &TarOptions{
   401  		NoLchown:             true,
   402  		NoOverwriteDirNonDir: true,
   403  	}
   404  
   405  	return Untar(copyArchive, dstDir, options)
   406  }
   407  
   408  // ResolveHostSourcePath decides real path need to be copied with parameters such as
   409  // whether to follow symbol link or not, if followLink is true, resolvedPath will return
   410  // link target of any symbol link file, else it will only resolve symlink of directory
   411  // but return symbol link file itself without resolving.
   412  func ResolveHostSourcePath(path string, followLink bool) (resolvedPath, rebaseName string, err error) {
   413  	if followLink {
   414  		resolvedPath, err = filepath.EvalSymlinks(path)
   415  		if err != nil {
   416  			return
   417  		}
   418  
   419  		resolvedPath, rebaseName = GetRebaseName(path, resolvedPath)
   420  	} else {
   421  		dirPath, basePath := filepath.Split(path)
   422  
   423  		// if not follow symbol link, then resolve symbol link of parent dir
   424  		var resolvedDirPath string
   425  		resolvedDirPath, err = filepath.EvalSymlinks(dirPath)
   426  		if err != nil {
   427  			return
   428  		}
   429  		// resolvedDirPath will have been cleaned (no trailing path separators) so
   430  		// we can manually join it with the base path element.
   431  		resolvedPath = resolvedDirPath + string(filepath.Separator) + basePath
   432  		if hasTrailingPathSeparator(path) && filepath.Base(path) != filepath.Base(resolvedPath) {
   433  			rebaseName = filepath.Base(path)
   434  		}
   435  	}
   436  	return resolvedPath, rebaseName, nil
   437  }
   438  
   439  // GetRebaseName normalizes and compares path and resolvedPath,
   440  // return completed resolved path and rebased file name
   441  func GetRebaseName(path, resolvedPath string) (string, string) {
   442  	// linkTarget will have been cleaned (no trailing path separators and dot) so
   443  	// we can manually join it with them
   444  	var rebaseName string
   445  	if specifiesCurrentDir(path) && !specifiesCurrentDir(resolvedPath) {
   446  		resolvedPath += string(filepath.Separator) + "."
   447  	}
   448  
   449  	if hasTrailingPathSeparator(path) && !hasTrailingPathSeparator(resolvedPath) {
   450  		resolvedPath += string(filepath.Separator)
   451  	}
   452  
   453  	if filepath.Base(path) != filepath.Base(resolvedPath) {
   454  		// In the case where the path had a trailing separator and a symlink
   455  		// evaluation has changed the last path component, we will need to
   456  		// rebase the name in the archive that is being copied to match the
   457  		// originally requested name.
   458  		rebaseName = filepath.Base(path)
   459  	}
   460  	return resolvedPath, rebaseName
   461  }