github.com/moby/docker@v26.1.3+incompatible/builder/dockerfile/copy.go (about)

     1  package dockerfile // import "github.com/docker/docker/builder/dockerfile"
     2  
     3  import (
     4  	"fmt"
     5  	"io"
     6  	"mime"
     7  	"net/http"
     8  	"net/url"
     9  	"os"
    10  	"path/filepath"
    11  	"sort"
    12  	"strings"
    13  	"time"
    14  
    15  	"github.com/docker/docker/builder"
    16  	"github.com/docker/docker/builder/remotecontext"
    17  	"github.com/docker/docker/builder/remotecontext/urlutil"
    18  	"github.com/docker/docker/pkg/archive"
    19  	"github.com/docker/docker/pkg/idtools"
    20  	"github.com/docker/docker/pkg/longpath"
    21  	"github.com/docker/docker/pkg/progress"
    22  	"github.com/docker/docker/pkg/streamformatter"
    23  	"github.com/docker/docker/pkg/system"
    24  	"github.com/moby/buildkit/frontend/dockerfile/instructions"
    25  	"github.com/moby/sys/symlink"
    26  	ocispec "github.com/opencontainers/image-spec/specs-go/v1"
    27  	"github.com/pkg/errors"
    28  )
    29  
    30  const unnamedFilename = "__unnamed__"
    31  
    32  type pathCache interface {
    33  	Load(key interface{}) (value interface{}, ok bool)
    34  	Store(key, value interface{})
    35  }
    36  
    37  // copyInfo is a data object which stores the metadata about each source file in
    38  // a copyInstruction
    39  type copyInfo struct {
    40  	root         string
    41  	path         string
    42  	hash         string
    43  	noDecompress bool
    44  }
    45  
    46  func (c copyInfo) fullPath() (string, error) {
    47  	return symlink.FollowSymlinkInScope(filepath.Join(c.root, c.path), c.root)
    48  }
    49  
    50  func newCopyInfoFromSource(source builder.Source, path string, hash string) copyInfo {
    51  	return copyInfo{root: source.Root(), path: path, hash: hash}
    52  }
    53  
    54  func newCopyInfos(copyInfos ...copyInfo) []copyInfo {
    55  	return copyInfos
    56  }
    57  
    58  // copyInstruction is a fully parsed COPY or ADD command that is passed to
    59  // Builder.performCopy to copy files into the image filesystem
    60  type copyInstruction struct {
    61  	cmdName                 string
    62  	infos                   []copyInfo
    63  	dest                    string
    64  	chownStr                string
    65  	allowLocalDecompression bool
    66  	preserveOwnership       bool
    67  }
    68  
    69  // copier reads a raw COPY or ADD command, fetches remote sources using a downloader,
    70  // and creates a copyInstruction
    71  type copier struct {
    72  	imageSource *imageMount
    73  	source      builder.Source
    74  	pathCache   pathCache
    75  	download    sourceDownloader
    76  	platform    ocispec.Platform
    77  	// for cleanup. TODO: having copier.cleanup() is error prone and hard to
    78  	// follow. Code calling performCopy should manage the lifecycle of its params.
    79  	// Copier should take override source as input, not imageMount.
    80  	activeLayer builder.RWLayer
    81  	tmpPaths    []string
    82  }
    83  
    84  func copierFromDispatchRequest(req dispatchRequest, download sourceDownloader, imageSource *imageMount) copier {
    85  	platform := req.builder.getPlatform(req.state)
    86  
    87  	return copier{
    88  		source:      req.source,
    89  		pathCache:   req.builder.pathCache,
    90  		download:    download,
    91  		imageSource: imageSource,
    92  		platform:    platform,
    93  	}
    94  }
    95  
    96  func (o *copier) createCopyInstruction(sourcesAndDest instructions.SourcesAndDest, cmdName string) (copyInstruction, error) {
    97  	inst := copyInstruction{
    98  		cmdName: cmdName,
    99  		dest:    filepath.FromSlash(sourcesAndDest.DestPath),
   100  	}
   101  	infos, err := o.getCopyInfosForSourcePaths(sourcesAndDest.SourcePaths, inst.dest)
   102  	if err != nil {
   103  		return inst, errors.Wrapf(err, "%s failed", cmdName)
   104  	}
   105  	if len(infos) > 1 && !strings.HasSuffix(inst.dest, string(os.PathSeparator)) {
   106  		return inst, errors.Errorf("When using %s with more than one source file, the destination must be a directory and end with a /", cmdName)
   107  	}
   108  	inst.infos = infos
   109  	return inst, nil
   110  }
   111  
   112  // getCopyInfosForSourcePaths iterates over the source files and calculate the info
   113  // needed to copy (e.g. hash value if cached)
   114  // The dest is used in case source is URL (and ends with "/")
   115  func (o *copier) getCopyInfosForSourcePaths(sources []string, dest string) ([]copyInfo, error) {
   116  	var infos []copyInfo
   117  	for _, orig := range sources {
   118  		subinfos, err := o.getCopyInfoForSourcePath(orig, dest)
   119  		if err != nil {
   120  			return nil, err
   121  		}
   122  		infos = append(infos, subinfos...)
   123  	}
   124  
   125  	if len(infos) == 0 {
   126  		return nil, errors.New("no source files were specified")
   127  	}
   128  	return infos, nil
   129  }
   130  
   131  func (o *copier) getCopyInfoForSourcePath(orig, dest string) ([]copyInfo, error) {
   132  	if !urlutil.IsURL(orig) {
   133  		return o.calcCopyInfo(orig, true)
   134  	}
   135  
   136  	remote, path, err := o.download(orig)
   137  	if err != nil {
   138  		return nil, err
   139  	}
   140  	// If path == "" then we are unable to determine filename from src
   141  	// We have to make sure dest is available
   142  	if path == "" {
   143  		if strings.HasSuffix(dest, "/") {
   144  			return nil, errors.Errorf("cannot determine filename for source %s", orig)
   145  		}
   146  		path = unnamedFilename
   147  	}
   148  	o.tmpPaths = append(o.tmpPaths, remote.Root())
   149  
   150  	hash, err := remote.Hash(path)
   151  	ci := newCopyInfoFromSource(remote, path, hash)
   152  	ci.noDecompress = true // data from http shouldn't be extracted even on ADD
   153  	return newCopyInfos(ci), err
   154  }
   155  
   156  // Cleanup removes any temporary directories created as part of downloading
   157  // remote files.
   158  func (o *copier) Cleanup() {
   159  	for _, path := range o.tmpPaths {
   160  		os.RemoveAll(path)
   161  	}
   162  	o.tmpPaths = []string{}
   163  	if o.activeLayer != nil {
   164  		o.activeLayer.Release()
   165  		o.activeLayer = nil
   166  	}
   167  }
   168  
   169  // TODO: allowWildcards can probably be removed by refactoring this function further.
   170  func (o *copier) calcCopyInfo(origPath string, allowWildcards bool) ([]copyInfo, error) {
   171  	imageSource := o.imageSource
   172  	if err := validateCopySourcePath(imageSource, origPath); err != nil {
   173  		return nil, err
   174  	}
   175  
   176  	// TODO: do this when creating copier. Requires validateCopySourcePath
   177  	// (and other below) to be aware of the difference sources. Why is it only
   178  	// done on image Source?
   179  	if imageSource != nil && o.activeLayer == nil {
   180  		// this needs to be protected against repeated calls as wildcard copy
   181  		// will call it multiple times for a single COPY
   182  		var err error
   183  		rwLayer, err := imageSource.NewRWLayer()
   184  		if err != nil {
   185  			return nil, err
   186  		}
   187  		o.activeLayer = rwLayer
   188  
   189  		o.source, err = remotecontext.NewLazySource(rwLayer.Root())
   190  		if err != nil {
   191  			return nil, errors.Wrapf(err, "failed to create context for copy from %s", rwLayer.Root())
   192  		}
   193  	}
   194  
   195  	if o.source == nil {
   196  		return nil, errors.Errorf("missing build context")
   197  	}
   198  
   199  	// Work in daemon-specific OS filepath semantics
   200  	origPath = filepath.FromSlash(origPath)
   201  	origPath = strings.TrimPrefix(origPath, string(os.PathSeparator))
   202  	origPath = strings.TrimPrefix(origPath, "."+string(os.PathSeparator))
   203  
   204  	// Deal with wildcards
   205  	if allowWildcards && containsWildcards(origPath) {
   206  		return o.copyWithWildcards(origPath)
   207  	}
   208  
   209  	if imageSource != nil && imageSource.ImageID() != "" {
   210  		// return a cached copy if one exists
   211  		if h, ok := o.pathCache.Load(imageSource.ImageID() + origPath); ok {
   212  			return newCopyInfos(newCopyInfoFromSource(o.source, origPath, h.(string))), nil
   213  		}
   214  	}
   215  
   216  	// Deal with the single file case
   217  	copyInfo, err := copyInfoForFile(o.source, origPath)
   218  	switch {
   219  	case imageSource == nil && errors.Is(err, os.ErrNotExist):
   220  		return nil, errors.Wrapf(err, "file not found in build context or excluded by .dockerignore")
   221  	case err != nil:
   222  		return nil, err
   223  	case copyInfo.hash != "":
   224  		o.storeInPathCache(imageSource, origPath, copyInfo.hash)
   225  		return newCopyInfos(copyInfo), err
   226  	}
   227  
   228  	// TODO: remove, handle dirs in Hash()
   229  	subfiles, err := walkSource(o.source, origPath)
   230  	if err != nil {
   231  		return nil, err
   232  	}
   233  
   234  	hash := hashStringSlice("dir", subfiles)
   235  	o.storeInPathCache(imageSource, origPath, hash)
   236  	return newCopyInfos(newCopyInfoFromSource(o.source, origPath, hash)), nil
   237  }
   238  
   239  func (o *copier) storeInPathCache(im *imageMount, path string, hash string) {
   240  	if im != nil {
   241  		o.pathCache.Store(im.ImageID()+path, hash)
   242  	}
   243  }
   244  
   245  func (o *copier) copyWithWildcards(origPath string) ([]copyInfo, error) {
   246  	root := o.source.Root()
   247  	var copyInfos []copyInfo
   248  	if err := filepath.WalkDir(root, func(path string, _ os.DirEntry, err error) error {
   249  		if err != nil {
   250  			return err
   251  		}
   252  		rel, err := remotecontext.Rel(root, path)
   253  		if err != nil {
   254  			return err
   255  		}
   256  
   257  		if rel == "." {
   258  			return nil
   259  		}
   260  		if match, _ := filepath.Match(origPath, rel); !match {
   261  			return nil
   262  		}
   263  
   264  		// Note we set allowWildcards to false in case the name has
   265  		// a * in it
   266  		subInfos, err := o.calcCopyInfo(rel, false)
   267  		if err != nil {
   268  			return err
   269  		}
   270  		copyInfos = append(copyInfos, subInfos...)
   271  		return nil
   272  	}); err != nil {
   273  		return nil, err
   274  	}
   275  	return copyInfos, nil
   276  }
   277  
   278  func copyInfoForFile(source builder.Source, path string) (copyInfo, error) {
   279  	fi, err := remotecontext.StatAt(source, path)
   280  	if err != nil {
   281  		if errors.Is(err, os.ErrNotExist) {
   282  			// return the relative path in the error, which is more user-friendly than the full path to the tmp-dir
   283  			return copyInfo{}, errors.WithStack(&os.PathError{Op: "stat", Path: path, Err: os.ErrNotExist})
   284  		}
   285  		return copyInfo{}, err
   286  	}
   287  
   288  	if fi.IsDir() {
   289  		return copyInfo{}, nil
   290  	}
   291  	hash, err := source.Hash(path)
   292  	if err != nil {
   293  		return copyInfo{}, err
   294  	}
   295  	return newCopyInfoFromSource(source, path, "file:"+hash), nil
   296  }
   297  
   298  // TODO: dedupe with copyWithWildcards()
   299  func walkSource(source builder.Source, origPath string) ([]string, error) {
   300  	fp, err := remotecontext.FullPath(source, origPath)
   301  	if err != nil {
   302  		return nil, err
   303  	}
   304  	// Must be a dir
   305  	var subfiles []string
   306  	err = filepath.WalkDir(fp, func(path string, _ os.DirEntry, err error) error {
   307  		if err != nil {
   308  			return err
   309  		}
   310  		rel, err := remotecontext.Rel(source.Root(), path)
   311  		if err != nil {
   312  			return err
   313  		}
   314  		if rel == "." {
   315  			return nil
   316  		}
   317  		hash, err := source.Hash(rel)
   318  		if err != nil {
   319  			return nil
   320  		}
   321  		// we already checked handleHash above
   322  		subfiles = append(subfiles, hash)
   323  		return nil
   324  	})
   325  	if err != nil {
   326  		return nil, err
   327  	}
   328  
   329  	sort.Strings(subfiles)
   330  	return subfiles, nil
   331  }
   332  
   333  type sourceDownloader func(string) (builder.Source, string, error)
   334  
   335  func newRemoteSourceDownloader(output, stdout io.Writer) sourceDownloader {
   336  	return func(url string) (builder.Source, string, error) {
   337  		return downloadSource(output, stdout, url)
   338  	}
   339  }
   340  
   341  func errOnSourceDownload(_ string) (builder.Source, string, error) {
   342  	return nil, "", errors.New("source can't be a URL for COPY")
   343  }
   344  
   345  func getFilenameForDownload(path string, resp *http.Response) string {
   346  	// Guess filename based on source
   347  	if path != "" && !strings.HasSuffix(path, "/") {
   348  		if filename := filepath.Base(filepath.FromSlash(path)); filename != "" {
   349  			return filename
   350  		}
   351  	}
   352  
   353  	// Guess filename based on Content-Disposition
   354  	if contentDisposition := resp.Header.Get("Content-Disposition"); contentDisposition != "" {
   355  		if _, params, err := mime.ParseMediaType(contentDisposition); err == nil {
   356  			if params["filename"] != "" && !strings.HasSuffix(params["filename"], "/") {
   357  				if filename := filepath.Base(filepath.FromSlash(params["filename"])); filename != "" {
   358  					return filename
   359  				}
   360  			}
   361  		}
   362  	}
   363  	return ""
   364  }
   365  
   366  func downloadSource(output io.Writer, stdout io.Writer, srcURL string) (remote builder.Source, p string, err error) {
   367  	u, err := url.Parse(srcURL)
   368  	if err != nil {
   369  		return
   370  	}
   371  
   372  	resp, err := remotecontext.GetWithStatusError(srcURL)
   373  	if err != nil {
   374  		return
   375  	}
   376  
   377  	filename := getFilenameForDownload(u.Path, resp)
   378  
   379  	// Prepare file in a tmp dir
   380  	tmpDir, err := longpath.MkdirTemp("", "docker-remote")
   381  	if err != nil {
   382  		return
   383  	}
   384  	defer func() {
   385  		if err != nil {
   386  			os.RemoveAll(tmpDir)
   387  		}
   388  	}()
   389  	// If filename is empty, the returned filename will be "" but
   390  	// the tmp filename will be created as "__unnamed__"
   391  	tmpFileName := filename
   392  	if filename == "" {
   393  		tmpFileName = unnamedFilename
   394  	}
   395  	tmpFileName = filepath.Join(tmpDir, tmpFileName)
   396  	tmpFile, err := os.OpenFile(tmpFileName, os.O_RDWR|os.O_CREATE|os.O_EXCL, 0o600)
   397  	if err != nil {
   398  		return
   399  	}
   400  
   401  	progressOutput := streamformatter.NewJSONProgressOutput(output, true)
   402  	progressReader := progress.NewProgressReader(resp.Body, progressOutput, resp.ContentLength, "", "Downloading")
   403  	// Download and dump result to tmp file
   404  	// TODO: add filehash directly
   405  	if _, err = io.Copy(tmpFile, progressReader); err != nil {
   406  		tmpFile.Close()
   407  		return
   408  	}
   409  	// TODO: how important is this random blank line to the output?
   410  	fmt.Fprintln(stdout)
   411  
   412  	// Set the mtime to the Last-Modified header value if present
   413  	// Otherwise just remove atime and mtime
   414  	mTime := time.Time{}
   415  
   416  	lastMod := resp.Header.Get("Last-Modified")
   417  	if lastMod != "" {
   418  		// If we can't parse it then just let it default to 'zero'
   419  		// otherwise use the parsed time value
   420  		if parsedMTime, err := http.ParseTime(lastMod); err == nil {
   421  			mTime = parsedMTime
   422  		}
   423  	}
   424  
   425  	tmpFile.Close()
   426  
   427  	if err = system.Chtimes(tmpFileName, mTime, mTime); err != nil {
   428  		return
   429  	}
   430  
   431  	lc, err := remotecontext.NewLazySource(tmpDir)
   432  	return lc, filename, err
   433  }
   434  
   435  type copyFileOptions struct {
   436  	decompress bool
   437  	identity   *idtools.Identity
   438  	archiver   *archive.Archiver
   439  }
   440  
   441  func performCopyForInfo(dest copyInfo, source copyInfo, options copyFileOptions) error {
   442  	srcPath, err := source.fullPath()
   443  	if err != nil {
   444  		return err
   445  	}
   446  
   447  	destPath, err := dest.fullPath()
   448  	if err != nil {
   449  		return err
   450  	}
   451  
   452  	archiver := options.archiver
   453  
   454  	src, err := os.Stat(srcPath)
   455  	if err != nil {
   456  		return errors.Wrapf(err, "source path not found")
   457  	}
   458  	if src.IsDir() {
   459  		return copyDirectory(archiver, srcPath, destPath, options.identity)
   460  	}
   461  	if options.decompress && archive.IsArchivePath(srcPath) && !source.noDecompress {
   462  		f, err := os.Open(srcPath)
   463  		if err != nil {
   464  			return err
   465  		}
   466  		defer f.Close()
   467  		options := &archive.TarOptions{
   468  			IDMap:            archiver.IDMapping,
   469  			BestEffortXattrs: true,
   470  		}
   471  		return archiver.Untar(f, destPath, options)
   472  	}
   473  
   474  	destExistsAsDir, err := isExistingDirectory(destPath)
   475  	if err != nil {
   476  		return err
   477  	}
   478  	// dest.path must be used because destPath has already been cleaned of any
   479  	// trailing slash
   480  	if endsInSlash(dest.path) || destExistsAsDir {
   481  		// source.path must be used to get the correct filename when the source
   482  		// is a symlink
   483  		destPath = filepath.Join(destPath, filepath.Base(source.path))
   484  	}
   485  	return copyFile(archiver, srcPath, destPath, options.identity)
   486  }
   487  
   488  func copyDirectory(archiver *archive.Archiver, source, dest string, identity *idtools.Identity) error {
   489  	destExists, err := isExistingDirectory(dest)
   490  	if err != nil {
   491  		return errors.Wrapf(err, "failed to query destination path")
   492  	}
   493  
   494  	if err := archiver.CopyWithTar(source, dest); err != nil {
   495  		return errors.Wrapf(err, "failed to copy directory")
   496  	}
   497  	if identity != nil {
   498  		return fixPermissions(source, dest, *identity, !destExists)
   499  	}
   500  	return nil
   501  }
   502  
   503  func copyFile(archiver *archive.Archiver, source, dest string, identity *idtools.Identity) error {
   504  	if identity == nil {
   505  		// Use system.MkdirAll here, which is a custom version of os.MkdirAll
   506  		// modified for use on Windows to handle volume GUID paths. These paths
   507  		// are of the form \\?\Volume{<GUID>}\<path>. An example would be:
   508  		// \\?\Volume{dae8d3ac-b9a1-11e9-88eb-e8554b2ba1db}\bin\busybox.exe
   509  		if err := system.MkdirAll(filepath.Dir(dest), 0o755); err != nil {
   510  			return err
   511  		}
   512  	} else {
   513  		if err := idtools.MkdirAllAndChownNew(filepath.Dir(dest), 0o755, *identity); err != nil {
   514  			return errors.Wrapf(err, "failed to create new directory")
   515  		}
   516  	}
   517  
   518  	if err := archiver.CopyFileWithTar(source, dest); err != nil {
   519  		return errors.Wrapf(err, "failed to copy file")
   520  	}
   521  	if identity != nil {
   522  		return fixPermissions(source, dest, *identity, false)
   523  	}
   524  	return nil
   525  }
   526  
   527  func endsInSlash(path string) bool {
   528  	return strings.HasSuffix(path, string(filepath.Separator))
   529  }
   530  
   531  // isExistingDirectory returns true if the path exists and is a directory
   532  func isExistingDirectory(path string) (bool, error) {
   533  	destStat, err := os.Stat(path)
   534  	switch {
   535  	case errors.Is(err, os.ErrNotExist):
   536  		return false, nil
   537  	case err != nil:
   538  		return false, err
   539  	}
   540  	return destStat.IsDir(), nil
   541  }