github.com/anchore/syft@v1.38.2/internal/file/tar_file_traversal.go (about)

     1  package file
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"os"
     7  	"path/filepath"
     8  
     9  	"github.com/bmatcuk/doublestar/v4"
    10  	"github.com/mholt/archives"
    11  
    12  	"github.com/anchore/syft/internal"
    13  )
    14  
    15  // TraverseFilesInTar enumerates all paths stored within a tar archive using the visitor pattern.
    16  func TraverseFilesInTar(ctx context.Context, archivePath string, visitor archives.FileHandler) error {
    17  	tarReader, err := os.Open(archivePath)
    18  	if err != nil {
    19  		return fmt.Errorf("unable to open tar archive (%s): %w", archivePath, err)
    20  	}
    21  	defer internal.CloseAndLogError(tarReader, archivePath)
    22  
    23  	format, _, err := IdentifyArchive(ctx, archivePath, tarReader)
    24  	if err != nil {
    25  		return fmt.Errorf("failed to identify tar compression format: %w", err)
    26  	}
    27  
    28  	extractor, ok := format.(archives.Extractor)
    29  	if !ok {
    30  		return fmt.Errorf("file format does not support extraction: %s", archivePath)
    31  	}
    32  
    33  	return extractor.Extract(ctx, tarReader, visitor)
    34  }
    35  
    36  // ExtractGlobsFromTarToUniqueTempFile extracts paths matching the given globs within the given archive to a temporary directory, returning file openers for each file extracted.
    37  func ExtractGlobsFromTarToUniqueTempFile(ctx context.Context, archivePath, dir string, globs ...string) (map[string]Opener, error) {
    38  	results := make(map[string]Opener)
    39  
    40  	// don't allow for full traversal, only select traversal from given paths
    41  	if len(globs) == 0 {
    42  		return results, nil
    43  	}
    44  
    45  	visitor := func(_ context.Context, file archives.FileInfo) error {
    46  		// ignore directories
    47  		if file.IsDir() {
    48  			return nil
    49  		}
    50  
    51  		// ignore any filename that doesn't match the given globs...
    52  		if !matchesAnyGlob(file.Name(), globs...) {
    53  			return nil
    54  		}
    55  
    56  		// we have a file we want to extract....
    57  		tempFilePrefix := filepath.Base(filepath.Clean(file.Name())) + "-"
    58  		tempFile, err := os.CreateTemp(dir, tempFilePrefix)
    59  		if err != nil {
    60  			return fmt.Errorf("unable to create temp file: %w", err)
    61  		}
    62  		// we shouldn't try and keep the tempFile open as the returned result may have several files, which takes up
    63  		// resources (leading to "too many open files"). Instead we'll return a file opener to the caller which
    64  		// provides a ReadCloser. It is up to the caller to handle closing the file explicitly.
    65  		defer tempFile.Close()
    66  
    67  		packedFile, err := file.Open()
    68  		if err != nil {
    69  			return fmt.Errorf("unable to read file=%q from tar=%q: %w", file.NameInArchive, archivePath, err)
    70  		}
    71  		defer internal.CloseAndLogError(packedFile, archivePath)
    72  
    73  		if err := safeCopy(tempFile, packedFile); err != nil {
    74  			return fmt.Errorf("unable to copy source=%q for tar=%q: %w", file.Name(), archivePath, err)
    75  		}
    76  
    77  		results[file.Name()] = Opener{path: tempFile.Name()}
    78  
    79  		return nil
    80  	}
    81  
    82  	return results, TraverseFilesInTar(ctx, archivePath, visitor)
    83  }
    84  
    85  func matchesAnyGlob(name string, globs ...string) bool {
    86  	for _, glob := range globs {
    87  		if matches, err := doublestar.PathMatch(glob, name); err == nil && matches {
    88  			return true
    89  		}
    90  	}
    91  	return false
    92  }