github.com/kaisenlinux/docker.io@v0.0.0-20230510090727-ea55db55fac7/engine/builder/dockerfile/copy.go (about)

     1  package dockerfile // import "github.com/docker/docker/builder/dockerfile"
     2  
     3  import (
     4  	"archive/tar"
     5  	"fmt"
     6  	"io"
     7  	"mime"
     8  	"net/http"
     9  	"net/url"
    10  	"os"
    11  	"path/filepath"
    12  	"runtime"
    13  	"sort"
    14  	"strings"
    15  	"time"
    16  
    17  	"github.com/docker/docker/builder"
    18  	"github.com/docker/docker/builder/remotecontext"
    19  	"github.com/docker/docker/pkg/archive"
    20  	"github.com/docker/docker/pkg/containerfs"
    21  	"github.com/docker/docker/pkg/idtools"
    22  	"github.com/docker/docker/pkg/ioutils"
    23  	"github.com/docker/docker/pkg/progress"
    24  	"github.com/docker/docker/pkg/streamformatter"
    25  	"github.com/docker/docker/pkg/system"
    26  	"github.com/docker/docker/pkg/urlutil"
    27  	specs "github.com/opencontainers/image-spec/specs-go/v1"
    28  	"github.com/pkg/errors"
    29  )
    30  
    31  const unnamedFilename = "__unnamed__"
    32  
    33  type pathCache interface {
    34  	Load(key interface{}) (value interface{}, ok bool)
    35  	Store(key, value interface{})
    36  }
    37  
    38  // copyInfo is a data object which stores the metadata about each source file in
    39  // a copyInstruction
    40  type copyInfo struct {
    41  	root         containerfs.ContainerFS
    42  	path         string
    43  	hash         string
    44  	noDecompress bool
    45  }
    46  
    47  func (c copyInfo) fullPath() (string, error) {
    48  	return c.root.ResolveScopedPath(c.path, true)
    49  }
    50  
    51  func newCopyInfoFromSource(source builder.Source, path string, hash string) copyInfo {
    52  	return copyInfo{root: source.Root(), path: path, hash: hash}
    53  }
    54  
    55  func newCopyInfos(copyInfos ...copyInfo) []copyInfo {
    56  	return copyInfos
    57  }
    58  
    59  // copyInstruction is a fully parsed COPY or ADD command that is passed to
    60  // Builder.performCopy to copy files into the image filesystem
    61  type copyInstruction struct {
    62  	cmdName                 string
    63  	infos                   []copyInfo
    64  	dest                    string
    65  	chownStr                string
    66  	allowLocalDecompression bool
    67  	preserveOwnership       bool
    68  }
    69  
    70  // copier reads a raw COPY or ADD command, fetches remote sources using a downloader,
    71  // and creates a copyInstruction
    72  type copier struct {
    73  	imageSource *imageMount
    74  	source      builder.Source
    75  	pathCache   pathCache
    76  	download    sourceDownloader
    77  	platform    *specs.Platform
    78  	// for cleanup. TODO: having copier.cleanup() is error prone and hard to
    79  	// follow. Code calling performCopy should manage the lifecycle of its params.
    80  	// Copier should take override source as input, not imageMount.
    81  	activeLayer builder.RWLayer
    82  	tmpPaths    []string
    83  }
    84  
    85  func copierFromDispatchRequest(req dispatchRequest, download sourceDownloader, imageSource *imageMount) copier {
    86  	platform := req.builder.platform
    87  	if platform == nil {
    88  		// May be nil if not explicitly set in API/dockerfile
    89  		platform = &specs.Platform{}
    90  	}
    91  	if platform.OS == "" {
    92  		// Default to the dispatch requests operating system if not explicit in API/dockerfile
    93  		platform.OS = req.state.operatingSystem
    94  	}
    95  	if platform.OS == "" {
    96  		// This is a failsafe just in case. Shouldn't be hit.
    97  		platform.OS = runtime.GOOS
    98  	}
    99  
   100  	return copier{
   101  		source:      req.source,
   102  		pathCache:   req.builder.pathCache,
   103  		download:    download,
   104  		imageSource: imageSource,
   105  		platform:    platform,
   106  	}
   107  
   108  }
   109  
   110  func (o *copier) createCopyInstruction(args []string, cmdName string) (copyInstruction, error) {
   111  	inst := copyInstruction{cmdName: cmdName}
   112  	last := len(args) - 1
   113  
   114  	// Work in platform-specific filepath semantics
   115  	// TODO: This OS switch for paths is NOT correct and should not be supported.
   116  	// Maintained for backwards compatibility
   117  	pathOS := runtime.GOOS
   118  	if o.platform != nil {
   119  		pathOS = o.platform.OS
   120  	}
   121  	inst.dest = fromSlash(args[last], pathOS)
   122  	separator := string(separator(pathOS))
   123  	infos, err := o.getCopyInfosForSourcePaths(args[0:last], inst.dest)
   124  	if err != nil {
   125  		return inst, errors.Wrapf(err, "%s failed", cmdName)
   126  	}
   127  	if len(infos) > 1 && !strings.HasSuffix(inst.dest, separator) {
   128  		return inst, errors.Errorf("When using %s with more than one source file, the destination must be a directory and end with a /", cmdName)
   129  	}
   130  	inst.infos = infos
   131  	return inst, nil
   132  }
   133  
   134  // getCopyInfosForSourcePaths iterates over the source files and calculate the info
   135  // needed to copy (e.g. hash value if cached)
   136  // The dest is used in case source is URL (and ends with "/")
   137  func (o *copier) getCopyInfosForSourcePaths(sources []string, dest string) ([]copyInfo, error) {
   138  	var infos []copyInfo
   139  	for _, orig := range sources {
   140  		subinfos, err := o.getCopyInfoForSourcePath(orig, dest)
   141  		if err != nil {
   142  			return nil, err
   143  		}
   144  		infos = append(infos, subinfos...)
   145  	}
   146  
   147  	if len(infos) == 0 {
   148  		return nil, errors.New("no source files were specified")
   149  	}
   150  	return infos, nil
   151  }
   152  
   153  func (o *copier) getCopyInfoForSourcePath(orig, dest string) ([]copyInfo, error) {
   154  	if !urlutil.IsURL(orig) {
   155  		return o.calcCopyInfo(orig, true)
   156  	}
   157  
   158  	remote, path, err := o.download(orig)
   159  	if err != nil {
   160  		return nil, err
   161  	}
   162  	// If path == "" then we are unable to determine filename from src
   163  	// We have to make sure dest is available
   164  	if path == "" {
   165  		if strings.HasSuffix(dest, "/") {
   166  			return nil, errors.Errorf("cannot determine filename for source %s", orig)
   167  		}
   168  		path = unnamedFilename
   169  	}
   170  	o.tmpPaths = append(o.tmpPaths, remote.Root().Path())
   171  
   172  	hash, err := remote.Hash(path)
   173  	ci := newCopyInfoFromSource(remote, path, hash)
   174  	ci.noDecompress = true // data from http shouldn't be extracted even on ADD
   175  	return newCopyInfos(ci), err
   176  }
   177  
   178  // Cleanup removes any temporary directories created as part of downloading
   179  // remote files.
   180  func (o *copier) Cleanup() {
   181  	for _, path := range o.tmpPaths {
   182  		os.RemoveAll(path)
   183  	}
   184  	o.tmpPaths = []string{}
   185  	if o.activeLayer != nil {
   186  		o.activeLayer.Release()
   187  		o.activeLayer = nil
   188  	}
   189  }
   190  
   191  // TODO: allowWildcards can probably be removed by refactoring this function further.
   192  func (o *copier) calcCopyInfo(origPath string, allowWildcards bool) ([]copyInfo, error) {
   193  	imageSource := o.imageSource
   194  
   195  	// TODO: do this when creating copier. Requires validateCopySourcePath
   196  	// (and other below) to be aware of the difference sources. Why is it only
   197  	// done on image Source?
   198  	if imageSource != nil && o.activeLayer == nil {
   199  		// this needs to be protected against repeated calls as wildcard copy
   200  		// will call it multiple times for a single COPY
   201  		var err error
   202  		rwLayer, err := imageSource.NewRWLayer()
   203  		if err != nil {
   204  			return nil, err
   205  		}
   206  		o.activeLayer = rwLayer
   207  
   208  		o.source, err = remotecontext.NewLazySource(rwLayer.Root())
   209  		if err != nil {
   210  			return nil, errors.Wrapf(err, "failed to create context for copy from %s", rwLayer.Root().Path())
   211  		}
   212  	}
   213  
   214  	if o.source == nil {
   215  		return nil, errors.Errorf("missing build context")
   216  	}
   217  
   218  	root := o.source.Root()
   219  
   220  	if err := validateCopySourcePath(imageSource, origPath, root.OS()); err != nil {
   221  		return nil, err
   222  	}
   223  
   224  	// Work in source OS specific filepath semantics
   225  	// For LCOW, this is NOT the daemon OS.
   226  	origPath = root.FromSlash(origPath)
   227  	origPath = strings.TrimPrefix(origPath, string(root.Separator()))
   228  	origPath = strings.TrimPrefix(origPath, "."+string(root.Separator()))
   229  
   230  	// Deal with wildcards
   231  	if allowWildcards && containsWildcards(origPath, root.OS()) {
   232  		return o.copyWithWildcards(origPath)
   233  	}
   234  
   235  	if imageSource != nil && imageSource.ImageID() != "" {
   236  		// return a cached copy if one exists
   237  		if h, ok := o.pathCache.Load(imageSource.ImageID() + origPath); ok {
   238  			return newCopyInfos(newCopyInfoFromSource(o.source, origPath, h.(string))), nil
   239  		}
   240  	}
   241  
   242  	// Deal with the single file case
   243  	copyInfo, err := copyInfoForFile(o.source, origPath)
   244  	switch {
   245  	case imageSource == nil && errors.Is(err, os.ErrNotExist):
   246  		return nil, errors.Wrapf(err, "file not found in build context or excluded by .dockerignore")
   247  	case err != nil:
   248  		return nil, err
   249  	case copyInfo.hash != "":
   250  		o.storeInPathCache(imageSource, origPath, copyInfo.hash)
   251  		return newCopyInfos(copyInfo), err
   252  	}
   253  
   254  	// TODO: remove, handle dirs in Hash()
   255  	subfiles, err := walkSource(o.source, origPath)
   256  	if err != nil {
   257  		return nil, err
   258  	}
   259  
   260  	hash := hashStringSlice("dir", subfiles)
   261  	o.storeInPathCache(imageSource, origPath, hash)
   262  	return newCopyInfos(newCopyInfoFromSource(o.source, origPath, hash)), nil
   263  }
   264  
   265  func containsWildcards(name, platform string) bool {
   266  	isWindows := platform == "windows"
   267  	for i := 0; i < len(name); i++ {
   268  		ch := name[i]
   269  		if ch == '\\' && !isWindows {
   270  			i++
   271  		} else if ch == '*' || ch == '?' || ch == '[' {
   272  			return true
   273  		}
   274  	}
   275  	return false
   276  }
   277  
   278  func (o *copier) storeInPathCache(im *imageMount, path string, hash string) {
   279  	if im != nil {
   280  		o.pathCache.Store(im.ImageID()+path, hash)
   281  	}
   282  }
   283  
   284  func (o *copier) copyWithWildcards(origPath string) ([]copyInfo, error) {
   285  	root := o.source.Root()
   286  	var copyInfos []copyInfo
   287  	if err := root.Walk(root.Path(), func(path string, info os.FileInfo, err error) error {
   288  		if err != nil {
   289  			return err
   290  		}
   291  		rel, err := remotecontext.Rel(root, path)
   292  		if err != nil {
   293  			return err
   294  		}
   295  
   296  		if rel == "." {
   297  			return nil
   298  		}
   299  		if match, _ := root.Match(origPath, rel); !match {
   300  			return nil
   301  		}
   302  
   303  		// Note we set allowWildcards to false in case the name has
   304  		// a * in it
   305  		subInfos, err := o.calcCopyInfo(rel, false)
   306  		if err != nil {
   307  			return err
   308  		}
   309  		copyInfos = append(copyInfos, subInfos...)
   310  		return nil
   311  	}); err != nil {
   312  		return nil, err
   313  	}
   314  	return copyInfos, nil
   315  }
   316  
   317  func copyInfoForFile(source builder.Source, path string) (copyInfo, error) {
   318  	fi, err := remotecontext.StatAt(source, path)
   319  	if err != nil {
   320  		if errors.Is(err, os.ErrNotExist) {
   321  			// return the relative path in the error, which is more user-friendly than the full path to the tmp-dir
   322  			return copyInfo{}, errors.WithStack(&os.PathError{Op: "stat", Path: path, Err: os.ErrNotExist})
   323  		}
   324  		return copyInfo{}, err
   325  	}
   326  
   327  	if fi.IsDir() {
   328  		return copyInfo{}, nil
   329  	}
   330  	hash, err := source.Hash(path)
   331  	if err != nil {
   332  		return copyInfo{}, err
   333  	}
   334  	return newCopyInfoFromSource(source, path, "file:"+hash), nil
   335  }
   336  
   337  // TODO: dedupe with copyWithWildcards()
   338  func walkSource(source builder.Source, origPath string) ([]string, error) {
   339  	fp, err := remotecontext.FullPath(source, origPath)
   340  	if err != nil {
   341  		return nil, err
   342  	}
   343  	// Must be a dir
   344  	var subfiles []string
   345  	err = source.Root().Walk(fp, func(path string, info os.FileInfo, err error) error {
   346  		if err != nil {
   347  			return err
   348  		}
   349  		rel, err := remotecontext.Rel(source.Root(), path)
   350  		if err != nil {
   351  			return err
   352  		}
   353  		if rel == "." {
   354  			return nil
   355  		}
   356  		hash, err := source.Hash(rel)
   357  		if err != nil {
   358  			return nil
   359  		}
   360  		// we already checked handleHash above
   361  		subfiles = append(subfiles, hash)
   362  		return nil
   363  	})
   364  	if err != nil {
   365  		return nil, err
   366  	}
   367  
   368  	sort.Strings(subfiles)
   369  	return subfiles, nil
   370  }
   371  
   372  type sourceDownloader func(string) (builder.Source, string, error)
   373  
   374  func newRemoteSourceDownloader(output, stdout io.Writer) sourceDownloader {
   375  	return func(url string) (builder.Source, string, error) {
   376  		return downloadSource(output, stdout, url)
   377  	}
   378  }
   379  
   380  func errOnSourceDownload(_ string) (builder.Source, string, error) {
   381  	return nil, "", errors.New("source can't be a URL for COPY")
   382  }
   383  
   384  func getFilenameForDownload(path string, resp *http.Response) string {
   385  	// Guess filename based on source
   386  	if path != "" && !strings.HasSuffix(path, "/") {
   387  		if filename := filepath.Base(filepath.FromSlash(path)); filename != "" {
   388  			return filename
   389  		}
   390  	}
   391  
   392  	// Guess filename based on Content-Disposition
   393  	if contentDisposition := resp.Header.Get("Content-Disposition"); contentDisposition != "" {
   394  		if _, params, err := mime.ParseMediaType(contentDisposition); err == nil {
   395  			if params["filename"] != "" && !strings.HasSuffix(params["filename"], "/") {
   396  				if filename := filepath.Base(filepath.FromSlash(params["filename"])); filename != "" {
   397  					return filename
   398  				}
   399  			}
   400  		}
   401  	}
   402  	return ""
   403  }
   404  
   405  func downloadSource(output io.Writer, stdout io.Writer, srcURL string) (remote builder.Source, p string, err error) {
   406  	u, err := url.Parse(srcURL)
   407  	if err != nil {
   408  		return
   409  	}
   410  
   411  	resp, err := remotecontext.GetWithStatusError(srcURL)
   412  	if err != nil {
   413  		return
   414  	}
   415  
   416  	filename := getFilenameForDownload(u.Path, resp)
   417  
   418  	// Prepare file in a tmp dir
   419  	tmpDir, err := ioutils.TempDir("", "docker-remote")
   420  	if err != nil {
   421  		return
   422  	}
   423  	defer func() {
   424  		if err != nil {
   425  			os.RemoveAll(tmpDir)
   426  		}
   427  	}()
   428  	// If filename is empty, the returned filename will be "" but
   429  	// the tmp filename will be created as "__unnamed__"
   430  	tmpFileName := filename
   431  	if filename == "" {
   432  		tmpFileName = unnamedFilename
   433  	}
   434  	tmpFileName = filepath.Join(tmpDir, tmpFileName)
   435  	tmpFile, err := os.OpenFile(tmpFileName, os.O_RDWR|os.O_CREATE|os.O_EXCL, 0600)
   436  	if err != nil {
   437  		return
   438  	}
   439  
   440  	progressOutput := streamformatter.NewJSONProgressOutput(output, true)
   441  	progressReader := progress.NewProgressReader(resp.Body, progressOutput, resp.ContentLength, "", "Downloading")
   442  	// Download and dump result to tmp file
   443  	// TODO: add filehash directly
   444  	if _, err = io.Copy(tmpFile, progressReader); err != nil {
   445  		tmpFile.Close()
   446  		return
   447  	}
   448  	// TODO: how important is this random blank line to the output?
   449  	fmt.Fprintln(stdout)
   450  
   451  	// Set the mtime to the Last-Modified header value if present
   452  	// Otherwise just remove atime and mtime
   453  	mTime := time.Time{}
   454  
   455  	lastMod := resp.Header.Get("Last-Modified")
   456  	if lastMod != "" {
   457  		// If we can't parse it then just let it default to 'zero'
   458  		// otherwise use the parsed time value
   459  		if parsedMTime, err := http.ParseTime(lastMod); err == nil {
   460  			mTime = parsedMTime
   461  		}
   462  	}
   463  
   464  	tmpFile.Close()
   465  
   466  	if err = system.Chtimes(tmpFileName, mTime, mTime); err != nil {
   467  		return
   468  	}
   469  
   470  	lc, err := remotecontext.NewLazySource(containerfs.NewLocalContainerFS(tmpDir))
   471  	return lc, filename, err
   472  }
   473  
   474  type copyFileOptions struct {
   475  	decompress bool
   476  	identity   *idtools.Identity
   477  	archiver   Archiver
   478  }
   479  
   480  type copyEndpoint struct {
   481  	driver containerfs.Driver
   482  	path   string
   483  }
   484  
   485  func performCopyForInfo(dest copyInfo, source copyInfo, options copyFileOptions) error {
   486  	srcPath, err := source.fullPath()
   487  	if err != nil {
   488  		return err
   489  	}
   490  
   491  	destPath, err := dest.fullPath()
   492  	if err != nil {
   493  		return err
   494  	}
   495  
   496  	archiver := options.archiver
   497  
   498  	srcEndpoint := &copyEndpoint{driver: source.root, path: srcPath}
   499  	destEndpoint := &copyEndpoint{driver: dest.root, path: destPath}
   500  
   501  	src, err := source.root.Stat(srcPath)
   502  	if err != nil {
   503  		return errors.Wrapf(err, "source path not found")
   504  	}
   505  	if src.IsDir() {
   506  		return copyDirectory(archiver, srcEndpoint, destEndpoint, options.identity)
   507  	}
   508  	if options.decompress && isArchivePath(source.root, srcPath) && !source.noDecompress {
   509  		return archiver.UntarPath(srcPath, destPath)
   510  	}
   511  
   512  	destExistsAsDir, err := isExistingDirectory(destEndpoint)
   513  	if err != nil {
   514  		return err
   515  	}
   516  	// dest.path must be used because destPath has already been cleaned of any
   517  	// trailing slash
   518  	if endsInSlash(dest.root, dest.path) || destExistsAsDir {
   519  		// source.path must be used to get the correct filename when the source
   520  		// is a symlink
   521  		destPath = dest.root.Join(destPath, source.root.Base(source.path))
   522  		destEndpoint = &copyEndpoint{driver: dest.root, path: destPath}
   523  	}
   524  	return copyFile(archiver, srcEndpoint, destEndpoint, options.identity)
   525  }
   526  
   527  func isArchivePath(driver containerfs.ContainerFS, path string) bool {
   528  	file, err := driver.Open(path)
   529  	if err != nil {
   530  		return false
   531  	}
   532  	defer file.Close()
   533  	rdr, err := archive.DecompressStream(file)
   534  	if err != nil {
   535  		return false
   536  	}
   537  	r := tar.NewReader(rdr)
   538  	_, err = r.Next()
   539  	return err == nil
   540  }
   541  
   542  func copyDirectory(archiver Archiver, source, dest *copyEndpoint, identity *idtools.Identity) error {
   543  	destExists, err := isExistingDirectory(dest)
   544  	if err != nil {
   545  		return errors.Wrapf(err, "failed to query destination path")
   546  	}
   547  
   548  	if err := archiver.CopyWithTar(source.path, dest.path); err != nil {
   549  		return errors.Wrapf(err, "failed to copy directory")
   550  	}
   551  	if identity != nil {
   552  		// TODO: @gupta-ak. Investigate how LCOW permission mappings will work.
   553  		return fixPermissions(source.path, dest.path, *identity, !destExists)
   554  	}
   555  	return nil
   556  }
   557  
   558  func copyFile(archiver Archiver, source, dest *copyEndpoint, identity *idtools.Identity) error {
   559  	if runtime.GOOS == "windows" && dest.driver.OS() == "linux" {
   560  		// LCOW
   561  		if err := dest.driver.MkdirAll(dest.driver.Dir(dest.path), 0755); err != nil {
   562  			return errors.Wrapf(err, "failed to create new directory")
   563  		}
   564  	} else {
   565  		// Normal containers
   566  		if identity == nil {
   567  			// Use system.MkdirAll here, which is a custom version of os.MkdirAll
   568  			// modified for use on Windows to handle volume GUID paths. These paths
   569  			// are of the form \\?\Volume{<GUID>}\<path>. An example would be:
   570  			// \\?\Volume{dae8d3ac-b9a1-11e9-88eb-e8554b2ba1db}\bin\busybox.exe
   571  
   572  			if err := system.MkdirAll(filepath.Dir(dest.path), 0755); err != nil {
   573  				return err
   574  			}
   575  		} else {
   576  			if err := idtools.MkdirAllAndChownNew(filepath.Dir(dest.path), 0755, *identity); err != nil {
   577  				return errors.Wrapf(err, "failed to create new directory")
   578  			}
   579  		}
   580  	}
   581  
   582  	if err := archiver.CopyFileWithTar(source.path, dest.path); err != nil {
   583  		return errors.Wrapf(err, "failed to copy file")
   584  	}
   585  	if identity != nil {
   586  		// TODO: @gupta-ak. Investigate how LCOW permission mappings will work.
   587  		return fixPermissions(source.path, dest.path, *identity, false)
   588  	}
   589  	return nil
   590  }
   591  
   592  func endsInSlash(driver containerfs.Driver, path string) bool {
   593  	return strings.HasSuffix(path, string(driver.Separator()))
   594  }
   595  
   596  // isExistingDirectory returns true if the path exists and is a directory
   597  func isExistingDirectory(point *copyEndpoint) (bool, error) {
   598  	destStat, err := point.driver.Stat(point.path)
   599  	switch {
   600  	case errors.Is(err, os.ErrNotExist):
   601  		return false, nil
   602  	case err != nil:
   603  		return false, err
   604  	}
   605  	return destStat.IsDir(), nil
   606  }