github.com/rawahars/moby@v24.0.4+incompatible/builder/dockerfile/copy.go (about)

     1  package dockerfile // import "github.com/docker/docker/builder/dockerfile"
     2  
     3  import (
     4  	"fmt"
     5  	"io"
     6  	"mime"
     7  	"net/http"
     8  	"net/url"
     9  	"os"
    10  	"path/filepath"
    11  	"runtime"
    12  	"sort"
    13  	"strings"
    14  	"time"
    15  
    16  	"github.com/docker/docker/builder"
    17  	"github.com/docker/docker/builder/remotecontext"
    18  	"github.com/docker/docker/builder/remotecontext/urlutil"
    19  	"github.com/docker/docker/pkg/archive"
    20  	"github.com/docker/docker/pkg/containerfs"
    21  	"github.com/docker/docker/pkg/idtools"
    22  	"github.com/docker/docker/pkg/longpath"
    23  	"github.com/docker/docker/pkg/progress"
    24  	"github.com/docker/docker/pkg/streamformatter"
    25  	"github.com/docker/docker/pkg/system"
    26  	"github.com/moby/buildkit/frontend/dockerfile/instructions"
    27  	ocispec "github.com/opencontainers/image-spec/specs-go/v1"
    28  	"github.com/pkg/errors"
    29  )
    30  
    31  const unnamedFilename = "__unnamed__"
    32  
    33  type pathCache interface {
    34  	Load(key interface{}) (value interface{}, ok bool)
    35  	Store(key, value interface{})
    36  }
    37  
    38  // copyInfo is a data object which stores the metadata about each source file in
    39  // a copyInstruction
    40  type copyInfo struct {
    41  	root         string
    42  	path         string
    43  	hash         string
    44  	noDecompress bool
    45  }
    46  
    47  func (c copyInfo) fullPath() (string, error) {
    48  	return containerfs.ResolveScopedPath(c.root, c.path)
    49  }
    50  
    51  func newCopyInfoFromSource(source builder.Source, path string, hash string) copyInfo {
    52  	return copyInfo{root: source.Root(), path: path, hash: hash}
    53  }
    54  
    55  func newCopyInfos(copyInfos ...copyInfo) []copyInfo {
    56  	return copyInfos
    57  }
    58  
    59  // copyInstruction is a fully parsed COPY or ADD command that is passed to
    60  // Builder.performCopy to copy files into the image filesystem
    61  type copyInstruction struct {
    62  	cmdName                 string
    63  	infos                   []copyInfo
    64  	dest                    string
    65  	chownStr                string
    66  	allowLocalDecompression bool
    67  	preserveOwnership       bool
    68  }
    69  
    70  // copier reads a raw COPY or ADD command, fetches remote sources using a downloader,
    71  // and creates a copyInstruction
    72  type copier struct {
    73  	imageSource *imageMount
    74  	source      builder.Source
    75  	pathCache   pathCache
    76  	download    sourceDownloader
    77  	platform    *ocispec.Platform
    78  	// for cleanup. TODO: having copier.cleanup() is error prone and hard to
    79  	// follow. Code calling performCopy should manage the lifecycle of its params.
    80  	// Copier should take override source as input, not imageMount.
    81  	activeLayer builder.RWLayer
    82  	tmpPaths    []string
    83  }
    84  
    85  func copierFromDispatchRequest(req dispatchRequest, download sourceDownloader, imageSource *imageMount) copier {
    86  	platform := req.builder.platform
    87  	if platform == nil {
    88  		// May be nil if not explicitly set in API/dockerfile
    89  		platform = &ocispec.Platform{}
    90  	}
    91  	if platform.OS == "" {
    92  		// Default to the dispatch requests operating system if not explicit in API/dockerfile
    93  		platform.OS = req.state.operatingSystem
    94  	}
    95  	if platform.OS == "" {
    96  		// This is a failsafe just in case. Shouldn't be hit.
    97  		platform.OS = runtime.GOOS
    98  	}
    99  
   100  	return copier{
   101  		source:      req.source,
   102  		pathCache:   req.builder.pathCache,
   103  		download:    download,
   104  		imageSource: imageSource,
   105  		platform:    platform,
   106  	}
   107  }
   108  
   109  func (o *copier) createCopyInstruction(sourcesAndDest instructions.SourcesAndDest, cmdName string) (copyInstruction, error) {
   110  	inst := copyInstruction{
   111  		cmdName: cmdName,
   112  		dest:    filepath.FromSlash(sourcesAndDest.DestPath),
   113  	}
   114  	infos, err := o.getCopyInfosForSourcePaths(sourcesAndDest.SourcePaths, inst.dest)
   115  	if err != nil {
   116  		return inst, errors.Wrapf(err, "%s failed", cmdName)
   117  	}
   118  	if len(infos) > 1 && !strings.HasSuffix(inst.dest, string(os.PathSeparator)) {
   119  		return inst, errors.Errorf("When using %s with more than one source file, the destination must be a directory and end with a /", cmdName)
   120  	}
   121  	inst.infos = infos
   122  	return inst, nil
   123  }
   124  
   125  // getCopyInfosForSourcePaths iterates over the source files and calculate the info
   126  // needed to copy (e.g. hash value if cached)
   127  // The dest is used in case source is URL (and ends with "/")
   128  func (o *copier) getCopyInfosForSourcePaths(sources []string, dest string) ([]copyInfo, error) {
   129  	var infos []copyInfo
   130  	for _, orig := range sources {
   131  		subinfos, err := o.getCopyInfoForSourcePath(orig, dest)
   132  		if err != nil {
   133  			return nil, err
   134  		}
   135  		infos = append(infos, subinfos...)
   136  	}
   137  
   138  	if len(infos) == 0 {
   139  		return nil, errors.New("no source files were specified")
   140  	}
   141  	return infos, nil
   142  }
   143  
   144  func (o *copier) getCopyInfoForSourcePath(orig, dest string) ([]copyInfo, error) {
   145  	if !urlutil.IsURL(orig) {
   146  		return o.calcCopyInfo(orig, true)
   147  	}
   148  
   149  	remote, path, err := o.download(orig)
   150  	if err != nil {
   151  		return nil, err
   152  	}
   153  	// If path == "" then we are unable to determine filename from src
   154  	// We have to make sure dest is available
   155  	if path == "" {
   156  		if strings.HasSuffix(dest, "/") {
   157  			return nil, errors.Errorf("cannot determine filename for source %s", orig)
   158  		}
   159  		path = unnamedFilename
   160  	}
   161  	o.tmpPaths = append(o.tmpPaths, remote.Root())
   162  
   163  	hash, err := remote.Hash(path)
   164  	ci := newCopyInfoFromSource(remote, path, hash)
   165  	ci.noDecompress = true // data from http shouldn't be extracted even on ADD
   166  	return newCopyInfos(ci), err
   167  }
   168  
   169  // Cleanup removes any temporary directories created as part of downloading
   170  // remote files.
   171  func (o *copier) Cleanup() {
   172  	for _, path := range o.tmpPaths {
   173  		os.RemoveAll(path)
   174  	}
   175  	o.tmpPaths = []string{}
   176  	if o.activeLayer != nil {
   177  		o.activeLayer.Release()
   178  		o.activeLayer = nil
   179  	}
   180  }
   181  
   182  // TODO: allowWildcards can probably be removed by refactoring this function further.
   183  func (o *copier) calcCopyInfo(origPath string, allowWildcards bool) ([]copyInfo, error) {
   184  	imageSource := o.imageSource
   185  	if err := validateCopySourcePath(imageSource, origPath); err != nil {
   186  		return nil, err
   187  	}
   188  
   189  	// TODO: do this when creating copier. Requires validateCopySourcePath
   190  	// (and other below) to be aware of the difference sources. Why is it only
   191  	// done on image Source?
   192  	if imageSource != nil && o.activeLayer == nil {
   193  		// this needs to be protected against repeated calls as wildcard copy
   194  		// will call it multiple times for a single COPY
   195  		var err error
   196  		rwLayer, err := imageSource.NewRWLayer()
   197  		if err != nil {
   198  			return nil, err
   199  		}
   200  		o.activeLayer = rwLayer
   201  
   202  		o.source, err = remotecontext.NewLazySource(rwLayer.Root())
   203  		if err != nil {
   204  			return nil, errors.Wrapf(err, "failed to create context for copy from %s", rwLayer.Root())
   205  		}
   206  	}
   207  
   208  	if o.source == nil {
   209  		return nil, errors.Errorf("missing build context")
   210  	}
   211  
   212  	// Work in daemon-specific OS filepath semantics
   213  	origPath = filepath.FromSlash(origPath)
   214  	origPath = strings.TrimPrefix(origPath, string(os.PathSeparator))
   215  	origPath = strings.TrimPrefix(origPath, "."+string(os.PathSeparator))
   216  
   217  	// Deal with wildcards
   218  	if allowWildcards && containsWildcards(origPath) {
   219  		return o.copyWithWildcards(origPath)
   220  	}
   221  
   222  	if imageSource != nil && imageSource.ImageID() != "" {
   223  		// return a cached copy if one exists
   224  		if h, ok := o.pathCache.Load(imageSource.ImageID() + origPath); ok {
   225  			return newCopyInfos(newCopyInfoFromSource(o.source, origPath, h.(string))), nil
   226  		}
   227  	}
   228  
   229  	// Deal with the single file case
   230  	copyInfo, err := copyInfoForFile(o.source, origPath)
   231  	switch {
   232  	case imageSource == nil && errors.Is(err, os.ErrNotExist):
   233  		return nil, errors.Wrapf(err, "file not found in build context or excluded by .dockerignore")
   234  	case err != nil:
   235  		return nil, err
   236  	case copyInfo.hash != "":
   237  		o.storeInPathCache(imageSource, origPath, copyInfo.hash)
   238  		return newCopyInfos(copyInfo), err
   239  	}
   240  
   241  	// TODO: remove, handle dirs in Hash()
   242  	subfiles, err := walkSource(o.source, origPath)
   243  	if err != nil {
   244  		return nil, err
   245  	}
   246  
   247  	hash := hashStringSlice("dir", subfiles)
   248  	o.storeInPathCache(imageSource, origPath, hash)
   249  	return newCopyInfos(newCopyInfoFromSource(o.source, origPath, hash)), nil
   250  }
   251  
   252  func (o *copier) storeInPathCache(im *imageMount, path string, hash string) {
   253  	if im != nil {
   254  		o.pathCache.Store(im.ImageID()+path, hash)
   255  	}
   256  }
   257  
   258  func (o *copier) copyWithWildcards(origPath string) ([]copyInfo, error) {
   259  	root := o.source.Root()
   260  	var copyInfos []copyInfo
   261  	if err := filepath.WalkDir(root, func(path string, _ os.DirEntry, err error) error {
   262  		if err != nil {
   263  			return err
   264  		}
   265  		rel, err := remotecontext.Rel(root, path)
   266  		if err != nil {
   267  			return err
   268  		}
   269  
   270  		if rel == "." {
   271  			return nil
   272  		}
   273  		if match, _ := filepath.Match(origPath, rel); !match {
   274  			return nil
   275  		}
   276  
   277  		// Note we set allowWildcards to false in case the name has
   278  		// a * in it
   279  		subInfos, err := o.calcCopyInfo(rel, false)
   280  		if err != nil {
   281  			return err
   282  		}
   283  		copyInfos = append(copyInfos, subInfos...)
   284  		return nil
   285  	}); err != nil {
   286  		return nil, err
   287  	}
   288  	return copyInfos, nil
   289  }
   290  
   291  func copyInfoForFile(source builder.Source, path string) (copyInfo, error) {
   292  	fi, err := remotecontext.StatAt(source, path)
   293  	if err != nil {
   294  		if errors.Is(err, os.ErrNotExist) {
   295  			// return the relative path in the error, which is more user-friendly than the full path to the tmp-dir
   296  			return copyInfo{}, errors.WithStack(&os.PathError{Op: "stat", Path: path, Err: os.ErrNotExist})
   297  		}
   298  		return copyInfo{}, err
   299  	}
   300  
   301  	if fi.IsDir() {
   302  		return copyInfo{}, nil
   303  	}
   304  	hash, err := source.Hash(path)
   305  	if err != nil {
   306  		return copyInfo{}, err
   307  	}
   308  	return newCopyInfoFromSource(source, path, "file:"+hash), nil
   309  }
   310  
   311  // TODO: dedupe with copyWithWildcards()
   312  func walkSource(source builder.Source, origPath string) ([]string, error) {
   313  	fp, err := remotecontext.FullPath(source, origPath)
   314  	if err != nil {
   315  		return nil, err
   316  	}
   317  	// Must be a dir
   318  	var subfiles []string
   319  	err = filepath.WalkDir(fp, func(path string, _ os.DirEntry, err error) error {
   320  		if err != nil {
   321  			return err
   322  		}
   323  		rel, err := remotecontext.Rel(source.Root(), path)
   324  		if err != nil {
   325  			return err
   326  		}
   327  		if rel == "." {
   328  			return nil
   329  		}
   330  		hash, err := source.Hash(rel)
   331  		if err != nil {
   332  			return nil
   333  		}
   334  		// we already checked handleHash above
   335  		subfiles = append(subfiles, hash)
   336  		return nil
   337  	})
   338  	if err != nil {
   339  		return nil, err
   340  	}
   341  
   342  	sort.Strings(subfiles)
   343  	return subfiles, nil
   344  }
   345  
   346  type sourceDownloader func(string) (builder.Source, string, error)
   347  
   348  func newRemoteSourceDownloader(output, stdout io.Writer) sourceDownloader {
   349  	return func(url string) (builder.Source, string, error) {
   350  		return downloadSource(output, stdout, url)
   351  	}
   352  }
   353  
   354  func errOnSourceDownload(_ string) (builder.Source, string, error) {
   355  	return nil, "", errors.New("source can't be a URL for COPY")
   356  }
   357  
   358  func getFilenameForDownload(path string, resp *http.Response) string {
   359  	// Guess filename based on source
   360  	if path != "" && !strings.HasSuffix(path, "/") {
   361  		if filename := filepath.Base(filepath.FromSlash(path)); filename != "" {
   362  			return filename
   363  		}
   364  	}
   365  
   366  	// Guess filename based on Content-Disposition
   367  	if contentDisposition := resp.Header.Get("Content-Disposition"); contentDisposition != "" {
   368  		if _, params, err := mime.ParseMediaType(contentDisposition); err == nil {
   369  			if params["filename"] != "" && !strings.HasSuffix(params["filename"], "/") {
   370  				if filename := filepath.Base(filepath.FromSlash(params["filename"])); filename != "" {
   371  					return filename
   372  				}
   373  			}
   374  		}
   375  	}
   376  	return ""
   377  }
   378  
   379  func downloadSource(output io.Writer, stdout io.Writer, srcURL string) (remote builder.Source, p string, err error) {
   380  	u, err := url.Parse(srcURL)
   381  	if err != nil {
   382  		return
   383  	}
   384  
   385  	resp, err := remotecontext.GetWithStatusError(srcURL)
   386  	if err != nil {
   387  		return
   388  	}
   389  
   390  	filename := getFilenameForDownload(u.Path, resp)
   391  
   392  	// Prepare file in a tmp dir
   393  	tmpDir, err := longpath.MkdirTemp("", "docker-remote")
   394  	if err != nil {
   395  		return
   396  	}
   397  	defer func() {
   398  		if err != nil {
   399  			os.RemoveAll(tmpDir)
   400  		}
   401  	}()
   402  	// If filename is empty, the returned filename will be "" but
   403  	// the tmp filename will be created as "__unnamed__"
   404  	tmpFileName := filename
   405  	if filename == "" {
   406  		tmpFileName = unnamedFilename
   407  	}
   408  	tmpFileName = filepath.Join(tmpDir, tmpFileName)
   409  	tmpFile, err := os.OpenFile(tmpFileName, os.O_RDWR|os.O_CREATE|os.O_EXCL, 0600)
   410  	if err != nil {
   411  		return
   412  	}
   413  
   414  	progressOutput := streamformatter.NewJSONProgressOutput(output, true)
   415  	progressReader := progress.NewProgressReader(resp.Body, progressOutput, resp.ContentLength, "", "Downloading")
   416  	// Download and dump result to tmp file
   417  	// TODO: add filehash directly
   418  	if _, err = io.Copy(tmpFile, progressReader); err != nil {
   419  		tmpFile.Close()
   420  		return
   421  	}
   422  	// TODO: how important is this random blank line to the output?
   423  	fmt.Fprintln(stdout)
   424  
   425  	// Set the mtime to the Last-Modified header value if present
   426  	// Otherwise just remove atime and mtime
   427  	mTime := time.Time{}
   428  
   429  	lastMod := resp.Header.Get("Last-Modified")
   430  	if lastMod != "" {
   431  		// If we can't parse it then just let it default to 'zero'
   432  		// otherwise use the parsed time value
   433  		if parsedMTime, err := http.ParseTime(lastMod); err == nil {
   434  			mTime = parsedMTime
   435  		}
   436  	}
   437  
   438  	tmpFile.Close()
   439  
   440  	if err = system.Chtimes(tmpFileName, mTime, mTime); err != nil {
   441  		return
   442  	}
   443  
   444  	lc, err := remotecontext.NewLazySource(tmpDir)
   445  	return lc, filename, err
   446  }
   447  
   448  type copyFileOptions struct {
   449  	decompress bool
   450  	identity   *idtools.Identity
   451  	archiver   *archive.Archiver
   452  }
   453  
   454  func performCopyForInfo(dest copyInfo, source copyInfo, options copyFileOptions) error {
   455  	srcPath, err := source.fullPath()
   456  	if err != nil {
   457  		return err
   458  	}
   459  
   460  	destPath, err := dest.fullPath()
   461  	if err != nil {
   462  		return err
   463  	}
   464  
   465  	archiver := options.archiver
   466  
   467  	src, err := os.Stat(srcPath)
   468  	if err != nil {
   469  		return errors.Wrapf(err, "source path not found")
   470  	}
   471  	if src.IsDir() {
   472  		return copyDirectory(archiver, srcPath, destPath, options.identity)
   473  	}
   474  	if options.decompress && archive.IsArchivePath(srcPath) && !source.noDecompress {
   475  		return archiver.UntarPath(srcPath, destPath)
   476  	}
   477  
   478  	destExistsAsDir, err := isExistingDirectory(destPath)
   479  	if err != nil {
   480  		return err
   481  	}
   482  	// dest.path must be used because destPath has already been cleaned of any
   483  	// trailing slash
   484  	if endsInSlash(dest.path) || destExistsAsDir {
   485  		// source.path must be used to get the correct filename when the source
   486  		// is a symlink
   487  		destPath = filepath.Join(destPath, filepath.Base(source.path))
   488  	}
   489  	return copyFile(archiver, srcPath, destPath, options.identity)
   490  }
   491  
   492  func copyDirectory(archiver *archive.Archiver, source, dest string, identity *idtools.Identity) error {
   493  	destExists, err := isExistingDirectory(dest)
   494  	if err != nil {
   495  		return errors.Wrapf(err, "failed to query destination path")
   496  	}
   497  
   498  	if err := archiver.CopyWithTar(source, dest); err != nil {
   499  		return errors.Wrapf(err, "failed to copy directory")
   500  	}
   501  	if identity != nil {
   502  		return fixPermissions(source, dest, *identity, !destExists)
   503  	}
   504  	return nil
   505  }
   506  
   507  func copyFile(archiver *archive.Archiver, source, dest string, identity *idtools.Identity) error {
   508  	if identity == nil {
   509  		// Use system.MkdirAll here, which is a custom version of os.MkdirAll
   510  		// modified for use on Windows to handle volume GUID paths. These paths
   511  		// are of the form \\?\Volume{<GUID>}\<path>. An example would be:
   512  		// \\?\Volume{dae8d3ac-b9a1-11e9-88eb-e8554b2ba1db}\bin\busybox.exe
   513  		if err := system.MkdirAll(filepath.Dir(dest), 0755); err != nil {
   514  			return err
   515  		}
   516  	} else {
   517  		if err := idtools.MkdirAllAndChownNew(filepath.Dir(dest), 0755, *identity); err != nil {
   518  			return errors.Wrapf(err, "failed to create new directory")
   519  		}
   520  	}
   521  
   522  	if err := archiver.CopyFileWithTar(source, dest); err != nil {
   523  		return errors.Wrapf(err, "failed to copy file")
   524  	}
   525  	if identity != nil {
   526  		return fixPermissions(source, dest, *identity, false)
   527  	}
   528  	return nil
   529  }
   530  
   531  func endsInSlash(path string) bool {
   532  	return strings.HasSuffix(path, string(filepath.Separator))
   533  }
   534  
   535  // isExistingDirectory returns true if the path exists and is a directory
   536  func isExistingDirectory(path string) (bool, error) {
   537  	destStat, err := os.Stat(path)
   538  	switch {
   539  	case errors.Is(err, os.ErrNotExist):
   540  		return false, nil
   541  	case err != nil:
   542  		return false, err
   543  	}
   544  	return destStat.IsDir(), nil
   545  }