github.com/anchore/syft@v1.4.2-0.20240516191711-1bec1fc5d397/syft/pkg/cataloger/java/tar_wrapped_archive_parser.go (about)

     1  package java
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  
     7  	intFile "github.com/anchore/syft/internal/file"
     8  	"github.com/anchore/syft/syft/artifact"
     9  	"github.com/anchore/syft/syft/file"
    10  	"github.com/anchore/syft/syft/pkg"
    11  	"github.com/anchore/syft/syft/pkg/cataloger/generic"
    12  )
    13  
    14  var genericTarGlobs = []string{
    15  	"**/*.tar",
    16  	// gzipped tar
    17  	"**/*.tar.gz",
    18  	"**/*.tgz",
    19  	// bzip2
    20  	"**/*.tar.bz",
    21  	"**/*.tar.bz2",
    22  	"**/*.tbz",
    23  	"**/*.tbz2",
    24  	// brotli
    25  	"**/*.tar.br",
    26  	"**/*.tbr",
    27  	// lz4
    28  	"**/*.tar.lz4",
    29  	"**/*.tlz4",
    30  	// sz
    31  	"**/*.tar.sz",
    32  	"**/*.tsz",
    33  	// xz
    34  	"**/*.tar.xz",
    35  	"**/*.txz",
    36  	// zst
    37  	"**/*.tar.zst",
    38  	"**/*.tzst",
    39  	"**/*.tar.zstd",
    40  	"**/*.tzstd",
    41  }
    42  
    43  // TODO: when the generic archive cataloger is implemented, this should be removed (https://github.com/anchore/syft/issues/246)
    44  
    45  // parseTarWrappedJavaArchive is a parser function for java archive contents contained within arbitrary tar files.
    46  // note: for compressed tars this is an extremely expensive operation and can lead to performance degradation. This is
    47  // due to the fact that there is no central directory header (say as in zip), which means that in order to get
    48  // a file listing within the archive you must decompress the entire archive and seek through all of the entries.
    49  
    50  type genericTarWrappedJavaArchiveParser struct {
    51  	cfg ArchiveCatalogerConfig
    52  }
    53  
    54  func newGenericTarWrappedJavaArchiveParser(cfg ArchiveCatalogerConfig) genericTarWrappedJavaArchiveParser {
    55  	return genericTarWrappedJavaArchiveParser{
    56  		cfg: cfg,
    57  	}
    58  }
    59  
    60  func (gtp genericTarWrappedJavaArchiveParser) parseTarWrappedJavaArchive(ctx context.Context, _ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
    61  	contentPath, archivePath, cleanupFn, err := saveArchiveToTmp(reader.Path(), reader)
    62  	// note: even on error, we should always run cleanup functions
    63  	defer cleanupFn()
    64  	if err != nil {
    65  		return nil, nil, err
    66  	}
    67  
    68  	// look for java archives within the tar archive
    69  	return discoverPkgsFromTar(ctx, reader.Location, archivePath, contentPath, gtp.cfg)
    70  }
    71  
    72  func discoverPkgsFromTar(ctx context.Context, location file.Location, archivePath, contentPath string, cfg ArchiveCatalogerConfig) ([]pkg.Package, []artifact.Relationship, error) {
    73  	openers, err := intFile.ExtractGlobsFromTarToUniqueTempFile(archivePath, contentPath, archiveFormatGlobs...)
    74  	if err != nil {
    75  		return nil, nil, fmt.Errorf("unable to extract files from tar: %w", err)
    76  	}
    77  
    78  	return discoverPkgsFromOpeners(ctx, location, openers, nil, cfg)
    79  }