github.com/tilt-dev/tilt@v0.33.15-0.20240515162809-0a22ed45d8a0/internal/build/tar.go (about)

     1  package build
     2  
     3  import (
     4  	"archive/tar"
     5  	"bytes"
     6  	"context"
     7  	"io"
     8  	"os"
     9  	"path"
    10  	"path/filepath"
    11  	"strings"
    12  	"time"
    13  
    14  	"github.com/pkg/errors"
    15  
    16  	"github.com/tilt-dev/tilt/internal/build/moby"
    17  	"github.com/tilt-dev/tilt/internal/dockerfile"
    18  	"github.com/tilt-dev/tilt/pkg/logger"
    19  	"github.com/tilt-dev/tilt/pkg/model"
    20  )
    21  
    22  type ArchiveBuilder struct {
    23  	tw     *tar.Writer
    24  	filter model.PathMatcher
    25  	paths  []string // local paths archived
    26  
    27  	// A shared I/O buffer to help with file copying.
    28  	copyBuf *bytes.Buffer
    29  }
    30  
    31  func NewArchiveBuilder(writer io.Writer, filter model.PathMatcher) *ArchiveBuilder {
    32  	tw := tar.NewWriter(writer)
    33  	if filter == nil {
    34  		filter = model.EmptyMatcher
    35  	}
    36  
    37  	return &ArchiveBuilder{tw: tw, filter: filter, copyBuf: bytes.NewBuffer(nil)}
    38  }
    39  
    40  func (a *ArchiveBuilder) Close() error {
    41  	return a.tw.Close()
    42  }
    43  
    44  // NOTE(dmiller) sometimes users will have very large UID/GIDs that will cause
    45  // archive/tar to switch to PAX format, which will trip this Docker bug:
    46  // https://github.com/docker/cli/issues/1459
    47  // To prevent this, simply clear these out before adding to tar.
    48  func clearUIDAndGID(h *tar.Header) {
    49  	h.Uid = 0
    50  	h.Gid = 0
    51  }
    52  
    53  func (a *ArchiveBuilder) archiveDf(ctx context.Context, df dockerfile.Dockerfile) error {
    54  	tarHeader := &tar.Header{
    55  		Name:       "Dockerfile",
    56  		Typeflag:   tar.TypeReg,
    57  		Size:       int64(len(df)),
    58  		Mode:       0644,
    59  		ModTime:    time.Now(),
    60  		AccessTime: time.Now(),
    61  		ChangeTime: time.Now(),
    62  	}
    63  	clearUIDAndGID(tarHeader)
    64  	err := a.tw.WriteHeader(tarHeader)
    65  	if err != nil {
    66  		return err
    67  	}
    68  	_, err = a.tw.Write([]byte(df))
    69  	if err != nil {
    70  		return err
    71  	}
    72  
    73  	return nil
    74  }
    75  
    76  // ArchivePathsIfExist creates a tar archive of all local files in `paths`. It quietly skips any paths that don't exist.
    77  func (a *ArchiveBuilder) ArchivePathsIfExist(ctx context.Context, paths []PathMapping) error {
    78  	// In order to handle overlapping syncs, we
    79  	// 1) collect all the entries,
    80  	// 2) de-dupe them, with last-one-wins semantics
    81  	// 3) write all the entries
    82  	//
    83  	// It's not obvious that this is the correct behavior. A better approach
    84  	// (that's more in-line with how syncs work) might ignore files in earlier
    85  	// path mappings when we know they're going to be "synced" over.
    86  	// There's a bunch of subtle product decisions about how overlapping path
    87  	// mappings work that we're not sure about.
    88  	entries := []archiveEntry{}
    89  	for _, p := range paths {
    90  		newEntries, err := a.entriesForPath(ctx, p.LocalPath, p.ContainerPath)
    91  		if err != nil {
    92  			return errors.Wrapf(err, "tarPath '%s'", p.LocalPath)
    93  		}
    94  
    95  		entries = append(entries, newEntries...)
    96  	}
    97  
    98  	entries = dedupeEntries(entries)
    99  	for _, entry := range entries {
   100  		err := a.writeEntry(entry)
   101  		if err != nil {
   102  			return errors.Wrapf(err, "tarPath '%s'", entry.path)
   103  		}
   104  		a.paths = append(a.paths, entry.path)
   105  	}
   106  	return nil
   107  }
   108  
   109  // Local paths that were archived
   110  func (a *ArchiveBuilder) Paths() []string {
   111  	return a.paths
   112  }
   113  
   114  type archiveEntry struct {
   115  	path   string
   116  	info   os.FileInfo
   117  	header *tar.Header
   118  }
   119  
   120  // tarPath writes the given source path into tarWriter at the given dest (recursively for directories).
   121  // e.g. tarring my_dir --> dest d: d/file_a, d/file_b
   122  // If source path does not exist, quietly skips it and returns no err
   123  func (a *ArchiveBuilder) entriesForPath(ctx context.Context, localPath, containerPath string) ([]archiveEntry, error) {
   124  	localInfo, err := os.Stat(localPath)
   125  	if err != nil {
   126  		if os.IsNotExist(err) {
   127  			return nil, nil
   128  		}
   129  		return nil, errors.Wrapf(err, "%s: stat", localPath)
   130  	}
   131  
   132  	localPathIsDir := localInfo.IsDir()
   133  	if localPathIsDir {
   134  		// Make sure we can trim this off filenames to get valid relative filepaths
   135  		if !strings.HasSuffix(localPath, string(filepath.Separator)) {
   136  			localPath += string(filepath.Separator)
   137  		}
   138  	}
   139  
   140  	containerPath = strings.TrimPrefix(containerPath, "/")
   141  
   142  	result := make([]archiveEntry, 0)
   143  	err = filepath.Walk(localPath, func(curLocalPath string, info os.FileInfo, err error) error {
   144  		if err != nil {
   145  			return errors.Wrapf(err, "error walking to %s", curLocalPath)
   146  		}
   147  
   148  		matches, err := a.filter.Matches(curLocalPath)
   149  		if err != nil {
   150  			return err
   151  		}
   152  		if matches {
   153  			if info.IsDir() && curLocalPath != localPath {
   154  				shouldSkip, err := a.filter.MatchesEntireDir(curLocalPath)
   155  				if err != nil {
   156  					return err
   157  				}
   158  				if shouldSkip {
   159  					return filepath.SkipDir
   160  				}
   161  			}
   162  			return nil
   163  		}
   164  
   165  		linkname := ""
   166  		if info.Mode()&os.ModeSymlink != 0 {
   167  			var err error
   168  			linkname, err = os.Readlink(curLocalPath)
   169  			if err != nil {
   170  				return err
   171  			}
   172  		}
   173  
   174  		header, err := tar.FileInfoHeader(info, linkname)
   175  		if err != nil {
   176  			// Not all types of files are allowed in a tarball. That's OK.
   177  			// Mimic the Docker behavior and just skip the file.
   178  			logger.Get(ctx).Debugf("Skipping file %s: %v", curLocalPath, err)
   179  			return nil
   180  		}
   181  
   182  		header.Mode = int64(moby.ChmodTarEntry(os.FileMode(header.Mode)))
   183  
   184  		clearUIDAndGID(header)
   185  
   186  		if localPathIsDir {
   187  			// Name of file in tar should be relative to source directory...
   188  			tmp, err := filepath.Rel(localPath, curLocalPath)
   189  			if err != nil {
   190  				return errors.Wrapf(err, "making rel path source:%s path:%s", localPath, curLocalPath)
   191  			}
   192  			// ...and live inside `dest`
   193  			header.Name = path.Join(containerPath, filepath.ToSlash(tmp))
   194  		} else if strings.HasSuffix(containerPath, "/") {
   195  			header.Name = containerPath + filepath.Base(curLocalPath)
   196  		} else {
   197  			header.Name = containerPath
   198  		}
   199  		header.Name = path.Clean(header.Name)
   200  		result = append(result, archiveEntry{
   201  			path:   curLocalPath,
   202  			info:   info,
   203  			header: header,
   204  		})
   205  
   206  		return nil
   207  	})
   208  	if err != nil {
   209  		return nil, err
   210  	}
   211  	return result, nil
   212  }
   213  
   214  func (a *ArchiveBuilder) writeEntry(entry archiveEntry) error {
   215  	path := entry.path
   216  	header := entry.header
   217  
   218  	if header.Typeflag != tar.TypeReg {
   219  		// anything other than a regular file (e.g. dir, symlink) just needs the header
   220  		if err := a.tw.WriteHeader(header); err != nil {
   221  			return errors.Wrapf(err, "%s: writing header", path)
   222  		}
   223  		return nil
   224  	}
   225  
   226  	file, err := os.Open(path)
   227  	if err != nil {
   228  		// In case the file has been deleted since we last looked at it.
   229  		if os.IsNotExist(err) {
   230  			return nil
   231  		}
   232  		return errors.Wrapf(err, "%s: open", path)
   233  	}
   234  
   235  	defer func() {
   236  		_ = file.Close()
   237  	}()
   238  
   239  	// The size header must match the number of contents bytes.
   240  	//
   241  	// There is room for a race condition here if something writes to the file
   242  	// after we've read the file size.
   243  	//
   244  	// For small files, we avoid this by first copying the file into a buffer,
   245  	// and using the size of the buffer to populate the header.
   246  	//
   247  	// For larger files, we don't want to copy the whole thing into a buffer,
   248  	// because that would blow up heap size. There is some danger that this
   249  	// will lead to a spurious error when the tar writer validates the sizes.
   250  	// That error will be disruptive but will be handled as best as we
   251  	// can downstream.
   252  	useBuf := header.Size < 5000000
   253  	if useBuf {
   254  		a.copyBuf.Reset()
   255  		_, err = io.Copy(a.copyBuf, file)
   256  		if err != nil && err != io.EOF {
   257  			return errors.Wrapf(err, "%s: copying Contents", path)
   258  		}
   259  		header.Size = int64(len(a.copyBuf.Bytes()))
   260  	}
   261  
   262  	// wait to write the header until _after_ the file is successfully opened
   263  	// to avoid generating an invalid tar entry that has a header but no contents
   264  	// in the case the file has been deleted
   265  	err = a.tw.WriteHeader(header)
   266  	if err != nil {
   267  		return errors.Wrapf(err, "%s: writing header", path)
   268  	}
   269  
   270  	if useBuf {
   271  		_, err = io.Copy(a.tw, a.copyBuf)
   272  	} else {
   273  		_, err = io.Copy(a.tw, file)
   274  	}
   275  
   276  	if err != nil && err != io.EOF {
   277  		return errors.Wrapf(err, "%s: copying Contents", path)
   278  	}
   279  
   280  	// explicitly flush so that if the entry is invalid we will detect it now and
   281  	// provide a more meaningful error
   282  	if err := a.tw.Flush(); err != nil {
   283  		return errors.Wrapf(err, "%s: flush", path)
   284  	}
   285  	return nil
   286  }
   287  
   288  func tarContextAndUpdateDf(ctx context.Context, writer io.Writer, df dockerfile.Dockerfile, paths []PathMapping, filter model.PathMatcher) error {
   289  	ab := NewArchiveBuilder(writer, filter)
   290  	err := ab.ArchivePathsIfExist(ctx, paths)
   291  	if err != nil {
   292  		return errors.Wrap(err, "archivePaths")
   293  	}
   294  
   295  	err = ab.archiveDf(ctx, df)
   296  	if err != nil {
   297  		_ = ab.Close()
   298  		return errors.Wrap(err, "archiveDf")
   299  	}
   300  
   301  	return ab.Close()
   302  }
   303  
   304  func TarPath(ctx context.Context, writer io.Writer, path string) error {
   305  	ab := NewArchiveBuilder(writer, model.EmptyMatcher)
   306  	err := ab.ArchivePathsIfExist(ctx, []PathMapping{
   307  		{
   308  			LocalPath:     path,
   309  			ContainerPath: ".",
   310  		},
   311  	})
   312  	if err != nil {
   313  		_ = ab.Close()
   314  		return errors.Wrap(err, "TarPath")
   315  	}
   316  
   317  	return ab.Close()
   318  }
   319  
   320  func TarArchiveForPaths(ctx context.Context, toArchive []PathMapping, filter model.PathMatcher) io.ReadCloser {
   321  	pr, pw := io.Pipe()
   322  	go tarArchiveForPaths(ctx, pw, toArchive, filter)
   323  	return pr
   324  }
   325  
   326  func tarArchiveForPaths(ctx context.Context, pw *io.PipeWriter, toArchive []PathMapping, filter model.PathMatcher) {
   327  	ab := NewArchiveBuilder(pw, filter)
   328  	err := ab.ArchivePathsIfExist(ctx, toArchive)
   329  	if err != nil {
   330  		_ = pw.CloseWithError(errors.Wrap(err, "archivePathsIfExists"))
   331  	} else {
   332  		// propagate errors from the TarWriter::Close() because it performs a final
   333  		// Flush() and any errors mean the tar is invalid
   334  		if err := ab.Close(); err != nil {
   335  			_ = pw.CloseWithError(errors.Wrap(err, "tar close"))
   336  		} else {
   337  			_ = pw.Close()
   338  		}
   339  	}
   340  }
   341  
   342  // Dedupe the entries with last-entry-wins semantics.
   343  func dedupeEntries(entries []archiveEntry) []archiveEntry {
   344  	seenIndex := make(map[string]int, len(entries))
   345  	result := make([]archiveEntry, 0, len(entries))
   346  	for i, entry := range entries {
   347  		seenIndex[entry.header.Name] = i
   348  	}
   349  	for i, entry := range entries {
   350  		if seenIndex[entry.header.Name] == i {
   351  			result = append(result, entry)
   352  		}
   353  	}
   354  	return result
   355  }