github.com/anchore/syft@v1.38.2/internal/file/tar_file_traversal.go (about) 1 package file 2 3 import ( 4 "context" 5 "fmt" 6 "os" 7 "path/filepath" 8 9 "github.com/bmatcuk/doublestar/v4" 10 "github.com/mholt/archives" 11 12 "github.com/anchore/syft/internal" 13 ) 14 15 // TraverseFilesInTar enumerates all paths stored within a tar archive using the visitor pattern. 16 func TraverseFilesInTar(ctx context.Context, archivePath string, visitor archives.FileHandler) error { 17 tarReader, err := os.Open(archivePath) 18 if err != nil { 19 return fmt.Errorf("unable to open tar archive (%s): %w", archivePath, err) 20 } 21 defer internal.CloseAndLogError(tarReader, archivePath) 22 23 format, _, err := IdentifyArchive(ctx, archivePath, tarReader) 24 if err != nil { 25 return fmt.Errorf("failed to identify tar compression format: %w", err) 26 } 27 28 extractor, ok := format.(archives.Extractor) 29 if !ok { 30 return fmt.Errorf("file format does not support extraction: %s", archivePath) 31 } 32 33 return extractor.Extract(ctx, tarReader, visitor) 34 } 35 36 // ExtractGlobsFromTarToUniqueTempFile extracts paths matching the given globs within the given archive to a temporary directory, returning file openers for each file extracted. 37 func ExtractGlobsFromTarToUniqueTempFile(ctx context.Context, archivePath, dir string, globs ...string) (map[string]Opener, error) { 38 results := make(map[string]Opener) 39 40 // don't allow for full traversal, only select traversal from given paths 41 if len(globs) == 0 { 42 return results, nil 43 } 44 45 visitor := func(_ context.Context, file archives.FileInfo) error { 46 // ignore directories 47 if file.IsDir() { 48 return nil 49 } 50 51 // ignore any filename that doesn't match the given globs... 52 if !matchesAnyGlob(file.Name(), globs...) { 53 return nil 54 } 55 56 // we have a file we want to extract.... 57 tempFilePrefix := filepath.Base(filepath.Clean(file.Name())) + "-" 58 tempFile, err := os.CreateTemp(dir, tempFilePrefix) 59 if err != nil { 60 return fmt.Errorf("unable to create temp file: %w", err) 61 } 62 // we shouldn't try and keep the tempFile open as the returned result may have several files, which takes up 63 // resources (leading to "too many open files"). Instead we'll return a file opener to the caller which 64 // provides a ReadCloser. It is up to the caller to handle closing the file explicitly. 65 defer tempFile.Close() 66 67 packedFile, err := file.Open() 68 if err != nil { 69 return fmt.Errorf("unable to read file=%q from tar=%q: %w", file.NameInArchive, archivePath, err) 70 } 71 defer internal.CloseAndLogError(packedFile, archivePath) 72 73 if err := safeCopy(tempFile, packedFile); err != nil { 74 return fmt.Errorf("unable to copy source=%q for tar=%q: %w", file.Name(), archivePath, err) 75 } 76 77 results[file.Name()] = Opener{path: tempFile.Name()} 78 79 return nil 80 } 81 82 return results, TraverseFilesInTar(ctx, archivePath, visitor) 83 } 84 85 func matchesAnyGlob(name string, globs ...string) bool { 86 for _, glob := range globs { 87 if matches, err := doublestar.PathMatch(glob, name); err == nil && matches { 88 return true 89 } 90 } 91 return false 92 }