github.com/tilt-dev/tilt@v0.33.15-0.20240515162809-0a22ed45d8a0/internal/build/tar.go (about) 1 package build 2 3 import ( 4 "archive/tar" 5 "bytes" 6 "context" 7 "io" 8 "os" 9 "path" 10 "path/filepath" 11 "strings" 12 "time" 13 14 "github.com/pkg/errors" 15 16 "github.com/tilt-dev/tilt/internal/build/moby" 17 "github.com/tilt-dev/tilt/internal/dockerfile" 18 "github.com/tilt-dev/tilt/pkg/logger" 19 "github.com/tilt-dev/tilt/pkg/model" 20 ) 21 22 type ArchiveBuilder struct { 23 tw *tar.Writer 24 filter model.PathMatcher 25 paths []string // local paths archived 26 27 // A shared I/O buffer to help with file copying. 28 copyBuf *bytes.Buffer 29 } 30 31 func NewArchiveBuilder(writer io.Writer, filter model.PathMatcher) *ArchiveBuilder { 32 tw := tar.NewWriter(writer) 33 if filter == nil { 34 filter = model.EmptyMatcher 35 } 36 37 return &ArchiveBuilder{tw: tw, filter: filter, copyBuf: bytes.NewBuffer(nil)} 38 } 39 40 func (a *ArchiveBuilder) Close() error { 41 return a.tw.Close() 42 } 43 44 // NOTE(dmiller) sometimes users will have very large UID/GIDs that will cause 45 // archive/tar to switch to PAX format, which will trip this Docker bug: 46 // https://github.com/docker/cli/issues/1459 47 // To prevent this, simply clear these out before adding to tar. 48 func clearUIDAndGID(h *tar.Header) { 49 h.Uid = 0 50 h.Gid = 0 51 } 52 53 func (a *ArchiveBuilder) archiveDf(ctx context.Context, df dockerfile.Dockerfile) error { 54 tarHeader := &tar.Header{ 55 Name: "Dockerfile", 56 Typeflag: tar.TypeReg, 57 Size: int64(len(df)), 58 Mode: 0644, 59 ModTime: time.Now(), 60 AccessTime: time.Now(), 61 ChangeTime: time.Now(), 62 } 63 clearUIDAndGID(tarHeader) 64 err := a.tw.WriteHeader(tarHeader) 65 if err != nil { 66 return err 67 } 68 _, err = a.tw.Write([]byte(df)) 69 if err != nil { 70 return err 71 } 72 73 return nil 74 } 75 76 // ArchivePathsIfExist creates a tar archive of all local files in `paths`. It quietly skips any paths that don't exist. 77 func (a *ArchiveBuilder) ArchivePathsIfExist(ctx context.Context, paths []PathMapping) error { 78 // In order to handle overlapping syncs, we 79 // 1) collect all the entries, 80 // 2) de-dupe them, with last-one-wins semantics 81 // 3) write all the entries 82 // 83 // It's not obvious that this is the correct behavior. A better approach 84 // (that's more in-line with how syncs work) might ignore files in earlier 85 // path mappings when we know they're going to be "synced" over. 86 // There's a bunch of subtle product decisions about how overlapping path 87 // mappings work that we're not sure about. 88 entries := []archiveEntry{} 89 for _, p := range paths { 90 newEntries, err := a.entriesForPath(ctx, p.LocalPath, p.ContainerPath) 91 if err != nil { 92 return errors.Wrapf(err, "tarPath '%s'", p.LocalPath) 93 } 94 95 entries = append(entries, newEntries...) 96 } 97 98 entries = dedupeEntries(entries) 99 for _, entry := range entries { 100 err := a.writeEntry(entry) 101 if err != nil { 102 return errors.Wrapf(err, "tarPath '%s'", entry.path) 103 } 104 a.paths = append(a.paths, entry.path) 105 } 106 return nil 107 } 108 109 // Local paths that were archived 110 func (a *ArchiveBuilder) Paths() []string { 111 return a.paths 112 } 113 114 type archiveEntry struct { 115 path string 116 info os.FileInfo 117 header *tar.Header 118 } 119 120 // tarPath writes the given source path into tarWriter at the given dest (recursively for directories). 121 // e.g. tarring my_dir --> dest d: d/file_a, d/file_b 122 // If source path does not exist, quietly skips it and returns no err 123 func (a *ArchiveBuilder) entriesForPath(ctx context.Context, localPath, containerPath string) ([]archiveEntry, error) { 124 localInfo, err := os.Stat(localPath) 125 if err != nil { 126 if os.IsNotExist(err) { 127 return nil, nil 128 } 129 return nil, errors.Wrapf(err, "%s: stat", localPath) 130 } 131 132 localPathIsDir := localInfo.IsDir() 133 if localPathIsDir { 134 // Make sure we can trim this off filenames to get valid relative filepaths 135 if !strings.HasSuffix(localPath, string(filepath.Separator)) { 136 localPath += string(filepath.Separator) 137 } 138 } 139 140 containerPath = strings.TrimPrefix(containerPath, "/") 141 142 result := make([]archiveEntry, 0) 143 err = filepath.Walk(localPath, func(curLocalPath string, info os.FileInfo, err error) error { 144 if err != nil { 145 return errors.Wrapf(err, "error walking to %s", curLocalPath) 146 } 147 148 matches, err := a.filter.Matches(curLocalPath) 149 if err != nil { 150 return err 151 } 152 if matches { 153 if info.IsDir() && curLocalPath != localPath { 154 shouldSkip, err := a.filter.MatchesEntireDir(curLocalPath) 155 if err != nil { 156 return err 157 } 158 if shouldSkip { 159 return filepath.SkipDir 160 } 161 } 162 return nil 163 } 164 165 linkname := "" 166 if info.Mode()&os.ModeSymlink != 0 { 167 var err error 168 linkname, err = os.Readlink(curLocalPath) 169 if err != nil { 170 return err 171 } 172 } 173 174 header, err := tar.FileInfoHeader(info, linkname) 175 if err != nil { 176 // Not all types of files are allowed in a tarball. That's OK. 177 // Mimic the Docker behavior and just skip the file. 178 logger.Get(ctx).Debugf("Skipping file %s: %v", curLocalPath, err) 179 return nil 180 } 181 182 header.Mode = int64(moby.ChmodTarEntry(os.FileMode(header.Mode))) 183 184 clearUIDAndGID(header) 185 186 if localPathIsDir { 187 // Name of file in tar should be relative to source directory... 188 tmp, err := filepath.Rel(localPath, curLocalPath) 189 if err != nil { 190 return errors.Wrapf(err, "making rel path source:%s path:%s", localPath, curLocalPath) 191 } 192 // ...and live inside `dest` 193 header.Name = path.Join(containerPath, filepath.ToSlash(tmp)) 194 } else if strings.HasSuffix(containerPath, "/") { 195 header.Name = containerPath + filepath.Base(curLocalPath) 196 } else { 197 header.Name = containerPath 198 } 199 header.Name = path.Clean(header.Name) 200 result = append(result, archiveEntry{ 201 path: curLocalPath, 202 info: info, 203 header: header, 204 }) 205 206 return nil 207 }) 208 if err != nil { 209 return nil, err 210 } 211 return result, nil 212 } 213 214 func (a *ArchiveBuilder) writeEntry(entry archiveEntry) error { 215 path := entry.path 216 header := entry.header 217 218 if header.Typeflag != tar.TypeReg { 219 // anything other than a regular file (e.g. dir, symlink) just needs the header 220 if err := a.tw.WriteHeader(header); err != nil { 221 return errors.Wrapf(err, "%s: writing header", path) 222 } 223 return nil 224 } 225 226 file, err := os.Open(path) 227 if err != nil { 228 // In case the file has been deleted since we last looked at it. 229 if os.IsNotExist(err) { 230 return nil 231 } 232 return errors.Wrapf(err, "%s: open", path) 233 } 234 235 defer func() { 236 _ = file.Close() 237 }() 238 239 // The size header must match the number of contents bytes. 240 // 241 // There is room for a race condition here if something writes to the file 242 // after we've read the file size. 243 // 244 // For small files, we avoid this by first copying the file into a buffer, 245 // and using the size of the buffer to populate the header. 246 // 247 // For larger files, we don't want to copy the whole thing into a buffer, 248 // because that would blow up heap size. There is some danger that this 249 // will lead to a spurious error when the tar writer validates the sizes. 250 // That error will be disruptive but will be handled as best as we 251 // can downstream. 252 useBuf := header.Size < 5000000 253 if useBuf { 254 a.copyBuf.Reset() 255 _, err = io.Copy(a.copyBuf, file) 256 if err != nil && err != io.EOF { 257 return errors.Wrapf(err, "%s: copying Contents", path) 258 } 259 header.Size = int64(len(a.copyBuf.Bytes())) 260 } 261 262 // wait to write the header until _after_ the file is successfully opened 263 // to avoid generating an invalid tar entry that has a header but no contents 264 // in the case the file has been deleted 265 err = a.tw.WriteHeader(header) 266 if err != nil { 267 return errors.Wrapf(err, "%s: writing header", path) 268 } 269 270 if useBuf { 271 _, err = io.Copy(a.tw, a.copyBuf) 272 } else { 273 _, err = io.Copy(a.tw, file) 274 } 275 276 if err != nil && err != io.EOF { 277 return errors.Wrapf(err, "%s: copying Contents", path) 278 } 279 280 // explicitly flush so that if the entry is invalid we will detect it now and 281 // provide a more meaningful error 282 if err := a.tw.Flush(); err != nil { 283 return errors.Wrapf(err, "%s: flush", path) 284 } 285 return nil 286 } 287 288 func tarContextAndUpdateDf(ctx context.Context, writer io.Writer, df dockerfile.Dockerfile, paths []PathMapping, filter model.PathMatcher) error { 289 ab := NewArchiveBuilder(writer, filter) 290 err := ab.ArchivePathsIfExist(ctx, paths) 291 if err != nil { 292 return errors.Wrap(err, "archivePaths") 293 } 294 295 err = ab.archiveDf(ctx, df) 296 if err != nil { 297 _ = ab.Close() 298 return errors.Wrap(err, "archiveDf") 299 } 300 301 return ab.Close() 302 } 303 304 func TarPath(ctx context.Context, writer io.Writer, path string) error { 305 ab := NewArchiveBuilder(writer, model.EmptyMatcher) 306 err := ab.ArchivePathsIfExist(ctx, []PathMapping{ 307 { 308 LocalPath: path, 309 ContainerPath: ".", 310 }, 311 }) 312 if err != nil { 313 _ = ab.Close() 314 return errors.Wrap(err, "TarPath") 315 } 316 317 return ab.Close() 318 } 319 320 func TarArchiveForPaths(ctx context.Context, toArchive []PathMapping, filter model.PathMatcher) io.ReadCloser { 321 pr, pw := io.Pipe() 322 go tarArchiveForPaths(ctx, pw, toArchive, filter) 323 return pr 324 } 325 326 func tarArchiveForPaths(ctx context.Context, pw *io.PipeWriter, toArchive []PathMapping, filter model.PathMatcher) { 327 ab := NewArchiveBuilder(pw, filter) 328 err := ab.ArchivePathsIfExist(ctx, toArchive) 329 if err != nil { 330 _ = pw.CloseWithError(errors.Wrap(err, "archivePathsIfExists")) 331 } else { 332 // propagate errors from the TarWriter::Close() because it performs a final 333 // Flush() and any errors mean the tar is invalid 334 if err := ab.Close(); err != nil { 335 _ = pw.CloseWithError(errors.Wrap(err, "tar close")) 336 } else { 337 _ = pw.Close() 338 } 339 } 340 } 341 342 // Dedupe the entries with last-entry-wins semantics. 343 func dedupeEntries(entries []archiveEntry) []archiveEntry { 344 seenIndex := make(map[string]int, len(entries)) 345 result := make([]archiveEntry, 0, len(entries)) 346 for i, entry := range entries { 347 seenIndex[entry.header.Name] = i 348 } 349 for i, entry := range entries { 350 if seenIndex[entry.header.Name] == i { 351 result = append(result, entry) 352 } 353 } 354 return result 355 }