github.com/rumpl/bof@v23.0.0-rc.2+incompatible/builder/dockerfile/copy.go (about)

     1  package dockerfile // import "github.com/docker/docker/builder/dockerfile"
     2  
     3  import (
     4  	"archive/tar"
     5  	"fmt"
     6  	"io"
     7  	"mime"
     8  	"net/http"
     9  	"net/url"
    10  	"os"
    11  	"path/filepath"
    12  	"runtime"
    13  	"sort"
    14  	"strings"
    15  	"time"
    16  
    17  	"github.com/docker/docker/builder"
    18  	"github.com/docker/docker/builder/remotecontext"
    19  	"github.com/docker/docker/builder/remotecontext/urlutil"
    20  	"github.com/docker/docker/pkg/archive"
    21  	"github.com/docker/docker/pkg/containerfs"
    22  	"github.com/docker/docker/pkg/idtools"
    23  	"github.com/docker/docker/pkg/ioutils"
    24  	"github.com/docker/docker/pkg/progress"
    25  	"github.com/docker/docker/pkg/streamformatter"
    26  	"github.com/docker/docker/pkg/system"
    27  	"github.com/moby/buildkit/frontend/dockerfile/instructions"
    28  	specs "github.com/opencontainers/image-spec/specs-go/v1"
    29  	"github.com/pkg/errors"
    30  )
    31  
    32  const unnamedFilename = "__unnamed__"
    33  
    34  type pathCache interface {
    35  	Load(key interface{}) (value interface{}, ok bool)
    36  	Store(key, value interface{})
    37  }
    38  
    39  // copyInfo is a data object which stores the metadata about each source file in
    40  // a copyInstruction
    41  type copyInfo struct {
    42  	root         containerfs.ContainerFS
    43  	path         string
    44  	hash         string
    45  	noDecompress bool
    46  }
    47  
    48  func (c copyInfo) fullPath() (string, error) {
    49  	return c.root.ResolveScopedPath(c.path, true)
    50  }
    51  
    52  func newCopyInfoFromSource(source builder.Source, path string, hash string) copyInfo {
    53  	return copyInfo{root: source.Root(), path: path, hash: hash}
    54  }
    55  
    56  func newCopyInfos(copyInfos ...copyInfo) []copyInfo {
    57  	return copyInfos
    58  }
    59  
    60  // copyInstruction is a fully parsed COPY or ADD command that is passed to
    61  // Builder.performCopy to copy files into the image filesystem
    62  type copyInstruction struct {
    63  	cmdName                 string
    64  	infos                   []copyInfo
    65  	dest                    string
    66  	chownStr                string
    67  	allowLocalDecompression bool
    68  	preserveOwnership       bool
    69  }
    70  
    71  // copier reads a raw COPY or ADD command, fetches remote sources using a downloader,
    72  // and creates a copyInstruction
    73  type copier struct {
    74  	imageSource *imageMount
    75  	source      builder.Source
    76  	pathCache   pathCache
    77  	download    sourceDownloader
    78  	platform    *specs.Platform
    79  	// for cleanup. TODO: having copier.cleanup() is error prone and hard to
    80  	// follow. Code calling performCopy should manage the lifecycle of its params.
    81  	// Copier should take override source as input, not imageMount.
    82  	activeLayer builder.RWLayer
    83  	tmpPaths    []string
    84  }
    85  
    86  func copierFromDispatchRequest(req dispatchRequest, download sourceDownloader, imageSource *imageMount) copier {
    87  	platform := req.builder.platform
    88  	if platform == nil {
    89  		// May be nil if not explicitly set in API/dockerfile
    90  		platform = &specs.Platform{}
    91  	}
    92  	if platform.OS == "" {
    93  		// Default to the dispatch requests operating system if not explicit in API/dockerfile
    94  		platform.OS = req.state.operatingSystem
    95  	}
    96  	if platform.OS == "" {
    97  		// This is a failsafe just in case. Shouldn't be hit.
    98  		platform.OS = runtime.GOOS
    99  	}
   100  
   101  	return copier{
   102  		source:      req.source,
   103  		pathCache:   req.builder.pathCache,
   104  		download:    download,
   105  		imageSource: imageSource,
   106  		platform:    platform,
   107  	}
   108  }
   109  
   110  func (o *copier) createCopyInstruction(sourcesAndDest instructions.SourcesAndDest, cmdName string) (copyInstruction, error) {
   111  	inst := copyInstruction{
   112  		cmdName: cmdName,
   113  		dest:    filepath.FromSlash(sourcesAndDest.DestPath),
   114  	}
   115  	infos, err := o.getCopyInfosForSourcePaths(sourcesAndDest.SourcePaths, inst.dest)
   116  	if err != nil {
   117  		return inst, errors.Wrapf(err, "%s failed", cmdName)
   118  	}
   119  	if len(infos) > 1 && !strings.HasSuffix(inst.dest, string(os.PathSeparator)) {
   120  		return inst, errors.Errorf("When using %s with more than one source file, the destination must be a directory and end with a /", cmdName)
   121  	}
   122  	inst.infos = infos
   123  	return inst, nil
   124  }
   125  
   126  // getCopyInfosForSourcePaths iterates over the source files and calculate the info
   127  // needed to copy (e.g. hash value if cached)
   128  // The dest is used in case source is URL (and ends with "/")
   129  func (o *copier) getCopyInfosForSourcePaths(sources []string, dest string) ([]copyInfo, error) {
   130  	var infos []copyInfo
   131  	for _, orig := range sources {
   132  		subinfos, err := o.getCopyInfoForSourcePath(orig, dest)
   133  		if err != nil {
   134  			return nil, err
   135  		}
   136  		infos = append(infos, subinfos...)
   137  	}
   138  
   139  	if len(infos) == 0 {
   140  		return nil, errors.New("no source files were specified")
   141  	}
   142  	return infos, nil
   143  }
   144  
   145  func (o *copier) getCopyInfoForSourcePath(orig, dest string) ([]copyInfo, error) {
   146  	if !urlutil.IsURL(orig) {
   147  		return o.calcCopyInfo(orig, true)
   148  	}
   149  
   150  	remote, path, err := o.download(orig)
   151  	if err != nil {
   152  		return nil, err
   153  	}
   154  	// If path == "" then we are unable to determine filename from src
   155  	// We have to make sure dest is available
   156  	if path == "" {
   157  		if strings.HasSuffix(dest, "/") {
   158  			return nil, errors.Errorf("cannot determine filename for source %s", orig)
   159  		}
   160  		path = unnamedFilename
   161  	}
   162  	o.tmpPaths = append(o.tmpPaths, remote.Root().Path())
   163  
   164  	hash, err := remote.Hash(path)
   165  	ci := newCopyInfoFromSource(remote, path, hash)
   166  	ci.noDecompress = true // data from http shouldn't be extracted even on ADD
   167  	return newCopyInfos(ci), err
   168  }
   169  
   170  // Cleanup removes any temporary directories created as part of downloading
   171  // remote files.
   172  func (o *copier) Cleanup() {
   173  	for _, path := range o.tmpPaths {
   174  		os.RemoveAll(path)
   175  	}
   176  	o.tmpPaths = []string{}
   177  	if o.activeLayer != nil {
   178  		o.activeLayer.Release()
   179  		o.activeLayer = nil
   180  	}
   181  }
   182  
   183  // TODO: allowWildcards can probably be removed by refactoring this function further.
   184  func (o *copier) calcCopyInfo(origPath string, allowWildcards bool) ([]copyInfo, error) {
   185  	imageSource := o.imageSource
   186  	if err := validateCopySourcePath(imageSource, origPath); err != nil {
   187  		return nil, err
   188  	}
   189  
   190  	// TODO: do this when creating copier. Requires validateCopySourcePath
   191  	// (and other below) to be aware of the difference sources. Why is it only
   192  	// done on image Source?
   193  	if imageSource != nil && o.activeLayer == nil {
   194  		// this needs to be protected against repeated calls as wildcard copy
   195  		// will call it multiple times for a single COPY
   196  		var err error
   197  		rwLayer, err := imageSource.NewRWLayer()
   198  		if err != nil {
   199  			return nil, err
   200  		}
   201  		o.activeLayer = rwLayer
   202  
   203  		o.source, err = remotecontext.NewLazySource(rwLayer.Root())
   204  		if err != nil {
   205  			return nil, errors.Wrapf(err, "failed to create context for copy from %s", rwLayer.Root().Path())
   206  		}
   207  	}
   208  
   209  	if o.source == nil {
   210  		return nil, errors.Errorf("missing build context")
   211  	}
   212  
   213  	// Work in daemon-specific OS filepath semantics
   214  	origPath = filepath.FromSlash(origPath)
   215  	origPath = strings.TrimPrefix(origPath, string(os.PathSeparator))
   216  	origPath = strings.TrimPrefix(origPath, "."+string(os.PathSeparator))
   217  
   218  	// Deal with wildcards
   219  	if allowWildcards && containsWildcards(origPath) {
   220  		return o.copyWithWildcards(origPath)
   221  	}
   222  
   223  	if imageSource != nil && imageSource.ImageID() != "" {
   224  		// return a cached copy if one exists
   225  		if h, ok := o.pathCache.Load(imageSource.ImageID() + origPath); ok {
   226  			return newCopyInfos(newCopyInfoFromSource(o.source, origPath, h.(string))), nil
   227  		}
   228  	}
   229  
   230  	// Deal with the single file case
   231  	copyInfo, err := copyInfoForFile(o.source, origPath)
   232  	switch {
   233  	case imageSource == nil && errors.Is(err, os.ErrNotExist):
   234  		return nil, errors.Wrapf(err, "file not found in build context or excluded by .dockerignore")
   235  	case err != nil:
   236  		return nil, err
   237  	case copyInfo.hash != "":
   238  		o.storeInPathCache(imageSource, origPath, copyInfo.hash)
   239  		return newCopyInfos(copyInfo), err
   240  	}
   241  
   242  	// TODO: remove, handle dirs in Hash()
   243  	subfiles, err := walkSource(o.source, origPath)
   244  	if err != nil {
   245  		return nil, err
   246  	}
   247  
   248  	hash := hashStringSlice("dir", subfiles)
   249  	o.storeInPathCache(imageSource, origPath, hash)
   250  	return newCopyInfos(newCopyInfoFromSource(o.source, origPath, hash)), nil
   251  }
   252  
   253  func (o *copier) storeInPathCache(im *imageMount, path string, hash string) {
   254  	if im != nil {
   255  		o.pathCache.Store(im.ImageID()+path, hash)
   256  	}
   257  }
   258  
   259  func (o *copier) copyWithWildcards(origPath string) ([]copyInfo, error) {
   260  	root := o.source.Root()
   261  	var copyInfos []copyInfo
   262  	if err := root.Walk(root.Path(), func(path string, info os.FileInfo, err error) error {
   263  		if err != nil {
   264  			return err
   265  		}
   266  		rel, err := remotecontext.Rel(root, path)
   267  		if err != nil {
   268  			return err
   269  		}
   270  
   271  		if rel == "." {
   272  			return nil
   273  		}
   274  		if match, _ := root.Match(origPath, rel); !match {
   275  			return nil
   276  		}
   277  
   278  		// Note we set allowWildcards to false in case the name has
   279  		// a * in it
   280  		subInfos, err := o.calcCopyInfo(rel, false)
   281  		if err != nil {
   282  			return err
   283  		}
   284  		copyInfos = append(copyInfos, subInfos...)
   285  		return nil
   286  	}); err != nil {
   287  		return nil, err
   288  	}
   289  	return copyInfos, nil
   290  }
   291  
   292  func copyInfoForFile(source builder.Source, path string) (copyInfo, error) {
   293  	fi, err := remotecontext.StatAt(source, path)
   294  	if err != nil {
   295  		if errors.Is(err, os.ErrNotExist) {
   296  			// return the relative path in the error, which is more user-friendly than the full path to the tmp-dir
   297  			return copyInfo{}, errors.WithStack(&os.PathError{Op: "stat", Path: path, Err: os.ErrNotExist})
   298  		}
   299  		return copyInfo{}, err
   300  	}
   301  
   302  	if fi.IsDir() {
   303  		return copyInfo{}, nil
   304  	}
   305  	hash, err := source.Hash(path)
   306  	if err != nil {
   307  		return copyInfo{}, err
   308  	}
   309  	return newCopyInfoFromSource(source, path, "file:"+hash), nil
   310  }
   311  
   312  // TODO: dedupe with copyWithWildcards()
   313  func walkSource(source builder.Source, origPath string) ([]string, error) {
   314  	fp, err := remotecontext.FullPath(source, origPath)
   315  	if err != nil {
   316  		return nil, err
   317  	}
   318  	// Must be a dir
   319  	var subfiles []string
   320  	err = source.Root().Walk(fp, func(path string, info os.FileInfo, err error) error {
   321  		if err != nil {
   322  			return err
   323  		}
   324  		rel, err := remotecontext.Rel(source.Root(), path)
   325  		if err != nil {
   326  			return err
   327  		}
   328  		if rel == "." {
   329  			return nil
   330  		}
   331  		hash, err := source.Hash(rel)
   332  		if err != nil {
   333  			return nil
   334  		}
   335  		// we already checked handleHash above
   336  		subfiles = append(subfiles, hash)
   337  		return nil
   338  	})
   339  	if err != nil {
   340  		return nil, err
   341  	}
   342  
   343  	sort.Strings(subfiles)
   344  	return subfiles, nil
   345  }
   346  
   347  type sourceDownloader func(string) (builder.Source, string, error)
   348  
   349  func newRemoteSourceDownloader(output, stdout io.Writer) sourceDownloader {
   350  	return func(url string) (builder.Source, string, error) {
   351  		return downloadSource(output, stdout, url)
   352  	}
   353  }
   354  
   355  func errOnSourceDownload(_ string) (builder.Source, string, error) {
   356  	return nil, "", errors.New("source can't be a URL for COPY")
   357  }
   358  
   359  func getFilenameForDownload(path string, resp *http.Response) string {
   360  	// Guess filename based on source
   361  	if path != "" && !strings.HasSuffix(path, "/") {
   362  		if filename := filepath.Base(filepath.FromSlash(path)); filename != "" {
   363  			return filename
   364  		}
   365  	}
   366  
   367  	// Guess filename based on Content-Disposition
   368  	if contentDisposition := resp.Header.Get("Content-Disposition"); contentDisposition != "" {
   369  		if _, params, err := mime.ParseMediaType(contentDisposition); err == nil {
   370  			if params["filename"] != "" && !strings.HasSuffix(params["filename"], "/") {
   371  				if filename := filepath.Base(filepath.FromSlash(params["filename"])); filename != "" {
   372  					return filename
   373  				}
   374  			}
   375  		}
   376  	}
   377  	return ""
   378  }
   379  
   380  func downloadSource(output io.Writer, stdout io.Writer, srcURL string) (remote builder.Source, p string, err error) {
   381  	u, err := url.Parse(srcURL)
   382  	if err != nil {
   383  		return
   384  	}
   385  
   386  	resp, err := remotecontext.GetWithStatusError(srcURL)
   387  	if err != nil {
   388  		return
   389  	}
   390  
   391  	filename := getFilenameForDownload(u.Path, resp)
   392  
   393  	// Prepare file in a tmp dir
   394  	tmpDir, err := ioutils.TempDir("", "docker-remote")
   395  	if err != nil {
   396  		return
   397  	}
   398  	defer func() {
   399  		if err != nil {
   400  			os.RemoveAll(tmpDir)
   401  		}
   402  	}()
   403  	// If filename is empty, the returned filename will be "" but
   404  	// the tmp filename will be created as "__unnamed__"
   405  	tmpFileName := filename
   406  	if filename == "" {
   407  		tmpFileName = unnamedFilename
   408  	}
   409  	tmpFileName = filepath.Join(tmpDir, tmpFileName)
   410  	tmpFile, err := os.OpenFile(tmpFileName, os.O_RDWR|os.O_CREATE|os.O_EXCL, 0600)
   411  	if err != nil {
   412  		return
   413  	}
   414  
   415  	progressOutput := streamformatter.NewJSONProgressOutput(output, true)
   416  	progressReader := progress.NewProgressReader(resp.Body, progressOutput, resp.ContentLength, "", "Downloading")
   417  	// Download and dump result to tmp file
   418  	// TODO: add filehash directly
   419  	if _, err = io.Copy(tmpFile, progressReader); err != nil {
   420  		tmpFile.Close()
   421  		return
   422  	}
   423  	// TODO: how important is this random blank line to the output?
   424  	fmt.Fprintln(stdout)
   425  
   426  	// Set the mtime to the Last-Modified header value if present
   427  	// Otherwise just remove atime and mtime
   428  	mTime := time.Time{}
   429  
   430  	lastMod := resp.Header.Get("Last-Modified")
   431  	if lastMod != "" {
   432  		// If we can't parse it then just let it default to 'zero'
   433  		// otherwise use the parsed time value
   434  		if parsedMTime, err := http.ParseTime(lastMod); err == nil {
   435  			mTime = parsedMTime
   436  		}
   437  	}
   438  
   439  	tmpFile.Close()
   440  
   441  	if err = system.Chtimes(tmpFileName, mTime, mTime); err != nil {
   442  		return
   443  	}
   444  
   445  	lc, err := remotecontext.NewLazySource(containerfs.NewLocalContainerFS(tmpDir))
   446  	return lc, filename, err
   447  }
   448  
   449  type copyFileOptions struct {
   450  	decompress bool
   451  	identity   *idtools.Identity
   452  	archiver   Archiver
   453  }
   454  
   455  type copyEndpoint struct {
   456  	driver containerfs.Driver
   457  	path   string
   458  }
   459  
   460  func performCopyForInfo(dest copyInfo, source copyInfo, options copyFileOptions) error {
   461  	srcPath, err := source.fullPath()
   462  	if err != nil {
   463  		return err
   464  	}
   465  
   466  	destPath, err := dest.fullPath()
   467  	if err != nil {
   468  		return err
   469  	}
   470  
   471  	archiver := options.archiver
   472  
   473  	srcEndpoint := &copyEndpoint{driver: source.root, path: srcPath}
   474  	destEndpoint := &copyEndpoint{driver: dest.root, path: destPath}
   475  
   476  	src, err := source.root.Stat(srcPath)
   477  	if err != nil {
   478  		return errors.Wrapf(err, "source path not found")
   479  	}
   480  	if src.IsDir() {
   481  		return copyDirectory(archiver, srcEndpoint, destEndpoint, options.identity)
   482  	}
   483  	if options.decompress && isArchivePath(source.root, srcPath) && !source.noDecompress {
   484  		return archiver.UntarPath(srcPath, destPath)
   485  	}
   486  
   487  	destExistsAsDir, err := isExistingDirectory(destEndpoint)
   488  	if err != nil {
   489  		return err
   490  	}
   491  	// dest.path must be used because destPath has already been cleaned of any
   492  	// trailing slash
   493  	if endsInSlash(dest.root, dest.path) || destExistsAsDir {
   494  		// source.path must be used to get the correct filename when the source
   495  		// is a symlink
   496  		destPath = dest.root.Join(destPath, source.root.Base(source.path))
   497  		destEndpoint = &copyEndpoint{driver: dest.root, path: destPath}
   498  	}
   499  	return copyFile(archiver, srcEndpoint, destEndpoint, options.identity)
   500  }
   501  
   502  func isArchivePath(driver containerfs.ContainerFS, path string) bool {
   503  	file, err := driver.Open(path)
   504  	if err != nil {
   505  		return false
   506  	}
   507  	defer file.Close()
   508  	rdr, err := archive.DecompressStream(file)
   509  	if err != nil {
   510  		return false
   511  	}
   512  	r := tar.NewReader(rdr)
   513  	_, err = r.Next()
   514  	return err == nil
   515  }
   516  
   517  func copyDirectory(archiver Archiver, source, dest *copyEndpoint, identity *idtools.Identity) error {
   518  	destExists, err := isExistingDirectory(dest)
   519  	if err != nil {
   520  		return errors.Wrapf(err, "failed to query destination path")
   521  	}
   522  
   523  	if err := archiver.CopyWithTar(source.path, dest.path); err != nil {
   524  		return errors.Wrapf(err, "failed to copy directory")
   525  	}
   526  	if identity != nil {
   527  		return fixPermissions(source.path, dest.path, *identity, !destExists)
   528  	}
   529  	return nil
   530  }
   531  
   532  func copyFile(archiver Archiver, source, dest *copyEndpoint, identity *idtools.Identity) error {
   533  	if identity == nil {
   534  		// Use system.MkdirAll here, which is a custom version of os.MkdirAll
   535  		// modified for use on Windows to handle volume GUID paths. These paths
   536  		// are of the form \\?\Volume{<GUID>}\<path>. An example would be:
   537  		// \\?\Volume{dae8d3ac-b9a1-11e9-88eb-e8554b2ba1db}\bin\busybox.exe
   538  		if err := system.MkdirAll(filepath.Dir(dest.path), 0755); err != nil {
   539  			return err
   540  		}
   541  	} else {
   542  		if err := idtools.MkdirAllAndChownNew(filepath.Dir(dest.path), 0755, *identity); err != nil {
   543  			return errors.Wrapf(err, "failed to create new directory")
   544  		}
   545  	}
   546  
   547  	if err := archiver.CopyFileWithTar(source.path, dest.path); err != nil {
   548  		return errors.Wrapf(err, "failed to copy file")
   549  	}
   550  	if identity != nil {
   551  		return fixPermissions(source.path, dest.path, *identity, false)
   552  	}
   553  	return nil
   554  }
   555  
   556  func endsInSlash(driver containerfs.Driver, path string) bool {
   557  	return strings.HasSuffix(path, string(driver.Separator()))
   558  }
   559  
   560  // isExistingDirectory returns true if the path exists and is a directory
   561  func isExistingDirectory(point *copyEndpoint) (bool, error) {
   562  	destStat, err := point.driver.Stat(point.path)
   563  	switch {
   564  	case errors.Is(err, os.ErrNotExist):
   565  		return false, nil
   566  	case err != nil:
   567  		return false, err
   568  	}
   569  	return destStat.IsDir(), nil
   570  }