github.com/buildpacks/pack@v0.33.3-0.20240516162812-884dd1837311/pkg/archive/archive.go (about)

     1  // Package archive defines a set of functions for reading and writing directories and files in a number of tar formats.
     2  package archive // import "github.com/buildpacks/pack/pkg/archive"
     3  
     4  import (
     5  	"archive/tar"
     6  	"archive/zip"
     7  	"io"
     8  	"io/fs"
     9  	"os"
    10  	"path/filepath"
    11  	"time"
    12  
    13  	"github.com/docker/docker/pkg/ioutils"
    14  	"github.com/pkg/errors"
    15  
    16  	"github.com/buildpacks/pack/internal/paths"
    17  )
    18  
    19  var NormalizedDateTime time.Time
    20  var Umask fs.FileMode
    21  
    22  func init() {
    23  	NormalizedDateTime = time.Date(1980, time.January, 1, 0, 0, 1, 0, time.UTC)
    24  }
    25  
    26  type TarWriter interface {
    27  	WriteHeader(hdr *tar.Header) error
    28  	Write(b []byte) (int, error)
    29  	Close() error
    30  }
    31  
    32  type TarWriterFactory interface {
    33  	NewWriter(io.Writer) TarWriter
    34  }
    35  
    36  type defaultTarWriterFactory struct{}
    37  
    38  func DefaultTarWriterFactory() TarWriterFactory {
    39  	return defaultTarWriterFactory{}
    40  }
    41  
    42  func (defaultTarWriterFactory) NewWriter(w io.Writer) TarWriter {
    43  	return tar.NewWriter(w)
    44  }
    45  
    46  func ReadDirAsTar(srcDir, basePath string, uid, gid int, mode int64, normalizeModTime, includeRoot bool, fileFilter func(string) bool) io.ReadCloser {
    47  	return GenerateTar(func(tw TarWriter) error {
    48  		return WriteDirToTar(tw, srcDir, basePath, uid, gid, mode, normalizeModTime, includeRoot, fileFilter)
    49  	})
    50  }
    51  
    52  func ReadZipAsTar(srcPath, basePath string, uid, gid int, mode int64, normalizeModTime bool, fileFilter func(string) bool) io.ReadCloser {
    53  	return GenerateTar(func(tw TarWriter) error {
    54  		return WriteZipToTar(tw, srcPath, basePath, uid, gid, mode, normalizeModTime, fileFilter)
    55  	})
    56  }
    57  
    58  func GenerateTar(genFn func(TarWriter) error) io.ReadCloser {
    59  	return GenerateTarWithWriter(genFn, DefaultTarWriterFactory())
    60  }
    61  
    62  // GenerateTarWithWriter returns a reader to a tar from a generator function using a writer from the provided factory.
    63  // Note that the generator will not fully execute until the reader is fully read from. Any errors returned by the
    64  // generator will be returned when reading the reader.
    65  func GenerateTarWithWriter(genFn func(TarWriter) error, twf TarWriterFactory) io.ReadCloser {
    66  	errChan := make(chan error)
    67  	pr, pw := io.Pipe()
    68  
    69  	go func() {
    70  		tw := twf.NewWriter(pw)
    71  		defer func() {
    72  			if r := recover(); r != nil {
    73  				tw.Close()
    74  				pw.CloseWithError(errors.Errorf("panic: %v", r))
    75  			}
    76  		}()
    77  
    78  		err := genFn(tw)
    79  
    80  		closeErr := tw.Close()
    81  		closeErr = aggregateError(closeErr, pw.CloseWithError(err))
    82  
    83  		errChan <- closeErr
    84  	}()
    85  
    86  	return ioutils.NewReadCloserWrapper(pr, func() error {
    87  		var completeErr error
    88  
    89  		// closing the reader ensures that if anything attempts
    90  		// further reading it doesn't block waiting for content
    91  		if err := pr.Close(); err != nil {
    92  			completeErr = aggregateError(completeErr, err)
    93  		}
    94  
    95  		// wait until everything closes properly
    96  		if err := <-errChan; err != nil {
    97  			completeErr = aggregateError(completeErr, err)
    98  		}
    99  
   100  		return completeErr
   101  	})
   102  }
   103  
   104  func aggregateError(base, addition error) error {
   105  	if addition == nil {
   106  		return base
   107  	}
   108  
   109  	if base == nil {
   110  		return addition
   111  	}
   112  
   113  	return errors.Wrap(addition, base.Error())
   114  }
   115  
   116  func CreateSingleFileTarReader(path, txt string) io.ReadCloser {
   117  	tarBuilder := TarBuilder{}
   118  	tarBuilder.AddFile(path, 0644, NormalizedDateTime, []byte(txt))
   119  	return tarBuilder.Reader(DefaultTarWriterFactory())
   120  }
   121  
   122  func CreateSingleFileTar(tarFile, path, txt string) error {
   123  	tarBuilder := TarBuilder{}
   124  	tarBuilder.AddFile(path, 0644, NormalizedDateTime, []byte(txt))
   125  	return tarBuilder.WriteToPath(tarFile, DefaultTarWriterFactory())
   126  }
   127  
   128  // ErrEntryNotExist is an error returned if an entry path doesn't exist
   129  var ErrEntryNotExist = errors.New("not exist")
   130  
   131  // IsEntryNotExist detects whether a given error is of type ErrEntryNotExist
   132  func IsEntryNotExist(err error) bool {
   133  	return err == ErrEntryNotExist || errors.Cause(err) == ErrEntryNotExist
   134  }
   135  
   136  // ReadTarEntry reads and returns a tar file
   137  func ReadTarEntry(rc io.Reader, entryPath string) (*tar.Header, []byte, error) {
   138  	canonicalEntryPath := paths.CanonicalTarPath(entryPath)
   139  	tr := tar.NewReader(rc)
   140  	for {
   141  		header, err := tr.Next()
   142  		if err == io.EOF {
   143  			break
   144  		}
   145  		if err != nil {
   146  			return nil, nil, errors.Wrap(err, "failed to get next tar entry")
   147  		}
   148  
   149  		if paths.CanonicalTarPath(header.Name) == canonicalEntryPath {
   150  			buf, err := io.ReadAll(tr)
   151  			if err != nil {
   152  				return nil, nil, errors.Wrapf(err, "failed to read contents of '%s'", entryPath)
   153  			}
   154  
   155  			return header, buf, nil
   156  		}
   157  	}
   158  
   159  	return nil, nil, errors.Wrapf(ErrEntryNotExist, "could not find entry path '%s'", entryPath)
   160  }
   161  
   162  // WriteDirToTar writes the contents of a directory to a tar writer. `basePath` is the "location" in the tar the
   163  // contents will be placed. The includeRoot param sets the permissions and metadata on the root file.
   164  func WriteDirToTar(tw TarWriter, srcDir, basePath string, uid, gid int, mode int64, normalizeModTime, includeRoot bool, fileFilter func(string) bool) error {
   165  	if includeRoot {
   166  		mode := modePermIfNegativeMode(mode)
   167  		err := writeRootHeader(tw, basePath, mode, uid, gid, normalizeModTime)
   168  		if err != nil {
   169  			return err
   170  		}
   171  	}
   172  
   173  	hardLinkFiles := map[uint64]string{}
   174  	return filepath.Walk(srcDir, func(file string, fi os.FileInfo, err error) error {
   175  		var relPath string
   176  		if fileFilter != nil {
   177  			relPath, err = filepath.Rel(srcDir, file)
   178  			if err != nil {
   179  				return err
   180  			}
   181  			if !fileFilter(relPath) {
   182  				return nil
   183  			}
   184  		}
   185  
   186  		if err != nil {
   187  			return err
   188  		}
   189  
   190  		if relPath == "" {
   191  			relPath, err = filepath.Rel(srcDir, file)
   192  			if err != nil {
   193  				return err
   194  			}
   195  		}
   196  		if relPath == "." {
   197  			return nil
   198  		}
   199  
   200  		if hasModeSocket(fi) != 0 {
   201  			return nil
   202  		}
   203  
   204  		var header *tar.Header
   205  		if hasModeSymLink(fi) {
   206  			if header, err = getHeaderFromSymLink(file, fi); err != nil {
   207  				return err
   208  			}
   209  		} else {
   210  			if header, err = tar.FileInfoHeader(fi, fi.Name()); err != nil {
   211  				return err
   212  			}
   213  		}
   214  
   215  		header.Name = getHeaderNameFromBaseAndRelPath(basePath, relPath)
   216  		if err = processHardLinks(file, fi, hardLinkFiles, header); err != nil {
   217  			return err
   218  		}
   219  
   220  		err = writeHeader(header, uid, gid, mode, normalizeModTime, tw)
   221  		if err != nil {
   222  			return err
   223  		}
   224  
   225  		if hasRegularMode(fi) && header.Size > 0 {
   226  			f, err := os.Open(filepath.Clean(file))
   227  			if err != nil {
   228  				return err
   229  			}
   230  			defer f.Close()
   231  
   232  			if _, err := io.Copy(tw, f); err != nil {
   233  				return err
   234  			}
   235  		}
   236  
   237  		return nil
   238  	})
   239  }
   240  
   241  // processHardLinks determine if the given file has hard-links associated with it, the given hardLinkFiles map keeps track
   242  // of any previous hard-link previously processed. In case the hard-link was already found, the header will be updated with
   243  // the previous information otherwise the new hard-link found will be tracked into the map
   244  func processHardLinks(file string, fi os.FileInfo, hardLinkFiles map[uint64]string, header *tar.Header) error {
   245  	var (
   246  		err       error
   247  		hardlinks bool
   248  		inode     uint64
   249  	)
   250  	if hardlinks, err = hasHardlinks(fi, file); err != nil {
   251  		return err
   252  	}
   253  	if hardlinks {
   254  		inode, err = getInodeFromStat(fi.Sys(), file)
   255  		if err != nil {
   256  			return err
   257  		}
   258  
   259  		if processedPath, ok := hardLinkFiles[inode]; ok {
   260  			header.Typeflag = tar.TypeLink
   261  			header.Linkname = processedPath
   262  			header.Size = 0
   263  		} else {
   264  			hardLinkFiles[inode] = header.Name
   265  		}
   266  	}
   267  	return nil
   268  }
   269  
   270  // WriteZipToTar writes the contents of a zip file to a tar writer.
   271  func WriteZipToTar(tw TarWriter, srcZip, basePath string, uid, gid int, mode int64, normalizeModTime bool, fileFilter func(string) bool) error {
   272  	zipReader, err := zip.OpenReader(srcZip)
   273  	if err != nil {
   274  		return err
   275  	}
   276  	defer zipReader.Close()
   277  
   278  	var fileMode int64
   279  	for _, f := range zipReader.File {
   280  		if fileFilter != nil && !fileFilter(f.Name) {
   281  			continue
   282  		}
   283  
   284  		fileMode = mode
   285  		if isFatFile(f.FileHeader) {
   286  			fileMode = 0777
   287  		}
   288  
   289  		var header *tar.Header
   290  		if f.Mode()&os.ModeSymlink != 0 {
   291  			target, err := func() (string, error) {
   292  				r, err := f.Open()
   293  				if err != nil {
   294  					return "", nil
   295  				}
   296  				defer r.Close()
   297  
   298  				// contents is the target of the symlink
   299  				target, err := io.ReadAll(r)
   300  				if err != nil {
   301  					return "", err
   302  				}
   303  
   304  				return string(target), nil
   305  			}()
   306  
   307  			if err != nil {
   308  				return err
   309  			}
   310  
   311  			header, err = tar.FileInfoHeader(f.FileInfo(), target)
   312  			if err != nil {
   313  				return err
   314  			}
   315  		} else {
   316  			header, err = tar.FileInfoHeader(f.FileInfo(), f.Name)
   317  			if err != nil {
   318  				return err
   319  			}
   320  		}
   321  
   322  		header.Name = filepath.ToSlash(filepath.Join(basePath, f.Name))
   323  		finalizeHeader(header, uid, gid, fileMode, normalizeModTime)
   324  
   325  		if err := tw.WriteHeader(header); err != nil {
   326  			return err
   327  		}
   328  
   329  		if f.Mode().IsRegular() {
   330  			err := func() error {
   331  				fi, err := f.Open()
   332  				if err != nil {
   333  					return err
   334  				}
   335  				defer fi.Close()
   336  
   337  				_, err = io.Copy(tw, fi)
   338  				return err
   339  			}()
   340  
   341  			if err != nil {
   342  				return err
   343  			}
   344  		}
   345  	}
   346  
   347  	return nil
   348  }
   349  
   350  // NormalizeHeader normalizes a tar.Header
   351  //
   352  // Normalizes the following:
   353  //   - ModTime
   354  //   - GID
   355  //   - UID
   356  //   - User Name
   357  //   - Group Name
   358  func NormalizeHeader(header *tar.Header, normalizeModTime bool) {
   359  	if normalizeModTime {
   360  		header.ModTime = NormalizedDateTime
   361  	}
   362  	header.Uid = 0
   363  	header.Gid = 0
   364  	header.Uname = ""
   365  	header.Gname = ""
   366  }
   367  
   368  // IsZip detects whether or not a File is a zip directory
   369  func IsZip(path string) (bool, error) {
   370  	r, err := zip.OpenReader(path)
   371  
   372  	switch {
   373  	case err == nil:
   374  		r.Close()
   375  		return true, nil
   376  	case err == zip.ErrFormat:
   377  		return false, nil
   378  	default:
   379  		return false, err
   380  	}
   381  }
   382  
   383  func isFatFile(header zip.FileHeader) bool {
   384  	var (
   385  		creatorFAT  uint16 = 0 // nolint:revive
   386  		creatorVFAT uint16 = 14
   387  	)
   388  
   389  	// This identifies FAT files, based on the `zip` source: https://golang.org/src/archive/zip/struct.go
   390  	firstByte := header.CreatorVersion >> 8
   391  	return firstByte == creatorFAT || firstByte == creatorVFAT
   392  }
   393  
   394  func finalizeHeader(header *tar.Header, uid, gid int, mode int64, normalizeModTime bool) {
   395  	NormalizeHeader(header, normalizeModTime)
   396  	if mode != -1 {
   397  		header.Mode = mode
   398  	}
   399  	header.Uid = uid
   400  	header.Gid = gid
   401  }
   402  
   403  func hasRegularMode(fi os.FileInfo) bool {
   404  	return fi.Mode().IsRegular()
   405  }
   406  
   407  func getHeaderNameFromBaseAndRelPath(basePath string, relPath string) string {
   408  	return filepath.ToSlash(filepath.Join(basePath, relPath))
   409  }
   410  
   411  func writeHeader(header *tar.Header, uid int, gid int, mode int64, normalizeModTime bool, tw TarWriter) error {
   412  	finalizeHeader(header, uid, gid, mode, normalizeModTime)
   413  
   414  	if err := tw.WriteHeader(header); err != nil {
   415  		return err
   416  	}
   417  
   418  	return nil
   419  }
   420  
   421  func getHeaderFromSymLink(file string, fi os.FileInfo) (*tar.Header, error) {
   422  	target, err := os.Readlink(file)
   423  	if err != nil {
   424  		return nil, err
   425  	}
   426  
   427  	// Ensure that symlinks have Linux link names, independent of source OS
   428  	header, err := tar.FileInfoHeader(fi, filepath.ToSlash(target))
   429  	if err != nil {
   430  		return nil, err
   431  	}
   432  	return header, nil
   433  }
   434  
   435  func hasModeSymLink(fi os.FileInfo) bool {
   436  	return fi.Mode()&os.ModeSymlink != 0
   437  }
   438  
   439  func hasModeSocket(fi os.FileInfo) fs.FileMode {
   440  	return fi.Mode() & os.ModeSocket
   441  }
   442  
   443  func writeRootHeader(tw TarWriter, basePath string, mode int64, uid int, gid int, normalizeModTime bool) error {
   444  	rootHeader := &tar.Header{
   445  		Typeflag: tar.TypeDir,
   446  		Name:     basePath,
   447  		Mode:     mode,
   448  	}
   449  
   450  	finalizeHeader(rootHeader, uid, gid, mode, normalizeModTime)
   451  
   452  	if err := tw.WriteHeader(rootHeader); err != nil {
   453  		return err
   454  	}
   455  
   456  	return nil
   457  }
   458  
   459  func modePermIfNegativeMode(mode int64) int64 {
   460  	if mode == -1 {
   461  		return int64(fs.ModePerm)
   462  	}
   463  	return mode
   464  }