github.com/lineaje-labs/syft@v0.98.1-0.20231227153149-9e393f60ff1b/syft/pkg/cataloger/java/tar_wrapped_archive_parser.go (about)

     1  package java
     2  
     3  import (
     4  	"fmt"
     5  
     6  	"github.com/anchore/syft/syft/artifact"
     7  	"github.com/anchore/syft/syft/file"
     8  	"github.com/anchore/syft/syft/pkg"
     9  	"github.com/anchore/syft/syft/pkg/cataloger/generic"
    10  	intFile "github.com/lineaje-labs/syft/internal/file"
    11  )
    12  
    13  var genericTarGlobs = []string{
    14  	"**/*.tar",
    15  	// gzipped tar
    16  	"**/*.tar.gz",
    17  	"**/*.tgz",
    18  	// bzip2
    19  	"**/*.tar.bz",
    20  	"**/*.tar.bz2",
    21  	"**/*.tbz",
    22  	"**/*.tbz2",
    23  	// brotli
    24  	"**/*.tar.br",
    25  	"**/*.tbr",
    26  	// lz4
    27  	"**/*.tar.lz4",
    28  	"**/*.tlz4",
    29  	// sz
    30  	"**/*.tar.sz",
    31  	"**/*.tsz",
    32  	// xz
    33  	"**/*.tar.xz",
    34  	"**/*.txz",
    35  	// zst
    36  	"**/*.tar.zst",
    37  	"**/*.tzst",
    38  	"**/*.tar.zstd",
    39  	"**/*.tzstd",
    40  }
    41  
    42  // TODO: when the generic archive cataloger is implemented, this should be removed (https://github.com/anchore/syft/issues/246)
    43  
    44  // parseTarWrappedJavaArchive is a parser function for java archive contents contained within arbitrary tar files.
    45  // note: for compressed tars this is an extremely expensive operation and can lead to performance degradation. This is
    46  // due to the fact that there is no central directory header (say as in zip), which means that in order to get
    47  // a file listing within the archive you must decompress the entire archive and seek through all of the entries.
    48  
    49  type genericTarWrappedJavaArchiveParser struct {
    50  	cfg ArchiveCatalogerConfig
    51  }
    52  
    53  func newGenericTarWrappedJavaArchiveParser(cfg ArchiveCatalogerConfig) genericTarWrappedJavaArchiveParser {
    54  	return genericTarWrappedJavaArchiveParser{
    55  		cfg: cfg,
    56  	}
    57  }
    58  
    59  func (gtp genericTarWrappedJavaArchiveParser) parseTarWrappedJavaArchive(
    60  	_ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser,
    61  ) ([]pkg.Package, []artifact.Relationship, error) {
    62  	contentPath, archivePath, cleanupFn, err := saveArchiveToTmp(reader.Path(), reader)
    63  	// note: even on error, we should always run cleanup functions
    64  	defer cleanupFn()
    65  	if err != nil {
    66  		return nil, nil, err
    67  	}
    68  
    69  	// look for java archives within the tar archive
    70  	return discoverPkgsFromTar(reader.Location, archivePath, contentPath, gtp.cfg)
    71  }
    72  
    73  func discoverPkgsFromTar(
    74  	location file.Location, archivePath, contentPath string, cfg ArchiveCatalogerConfig,
    75  ) ([]pkg.Package, []artifact.Relationship, error) {
    76  	openers, err := intFile.ExtractGlobsFromTarToUniqueTempFile(archivePath, contentPath, archiveFormatGlobs...)
    77  	if err != nil {
    78  		return nil, nil, fmt.Errorf("unable to extract files from tar: %w", err)
    79  	}
    80  
    81  	return discoverPkgsFromOpeners(location, openers, nil, cfg)
    82  }