github.com/nextlinux/gosbom@v0.81.1-0.20230627115839-1ff50c281391/gosbom/pkg/cataloger/java/tar_wrapped_archive_parser.go (about)

     1  package java
     2  
     3  import (
     4  	"fmt"
     5  
     6  	"github.com/nextlinux/gosbom/gosbom/artifact"
     7  	"github.com/nextlinux/gosbom/gosbom/file"
     8  	"github.com/nextlinux/gosbom/gosbom/pkg"
     9  	"github.com/nextlinux/gosbom/gosbom/pkg/cataloger/generic"
    10  	intFile "github.com/nextlinux/gosbom/internal/file"
    11  )
    12  
    13  var genericTarGlobs = []string{
    14  	"**/*.tar",
    15  	// gzipped tar
    16  	"**/*.tar.gz",
    17  	"**/*.tgz",
    18  	// bzip2
    19  	"**/*.tar.bz",
    20  	"**/*.tar.bz2",
    21  	"**/*.tbz",
    22  	"**/*.tbz2",
    23  	// brotli
    24  	"**/*.tar.br",
    25  	"**/*.tbr",
    26  	// lz4
    27  	"**/*.tar.lz4",
    28  	"**/*.tlz4",
    29  	// sz
    30  	"**/*.tar.sz",
    31  	"**/*.tsz",
    32  	// xz
    33  	"**/*.tar.xz",
    34  	"**/*.txz",
    35  	// zst
    36  	"**/*.tar.zst",
    37  	"**/*.tzst",
    38  	"**/*.tar.zstd",
    39  	"**/*.tzstd",
    40  }
    41  
    42  // TODO: when the generic archive cataloger is implemented, this should be removed (https://github.com/nextlinux/gosbom/issues/246)
    43  
    44  // parseTarWrappedJavaArchive is a parser function for java archive contents contained within arbitrary tar files.
    45  // note: for compressed tars this is an extremely expensive operation and can lead to performance degradation. This is
    46  // due to the fact that there is no central directory header (say as in zip), which means that in order to get
    47  // a file listing within the archive you must decompress the entire archive and seek through all of the entries.
    48  func parseTarWrappedJavaArchive(_ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
    49  	contentPath, archivePath, cleanupFn, err := saveArchiveToTmp(reader.AccessPath(), reader)
    50  	// note: even on error, we should always run cleanup functions
    51  	defer cleanupFn()
    52  	if err != nil {
    53  		return nil, nil, err
    54  	}
    55  
    56  	// look for java archives within the tar archive
    57  	return discoverPkgsFromTar(reader.Location, archivePath, contentPath)
    58  }
    59  
    60  func discoverPkgsFromTar(location file.Location, archivePath, contentPath string) ([]pkg.Package, []artifact.Relationship, error) {
    61  	openers, err := intFile.ExtractGlobsFromTarToUniqueTempFile(archivePath, contentPath, archiveFormatGlobs...)
    62  	if err != nil {
    63  		return nil, nil, fmt.Errorf("unable to extract files from tar: %w", err)
    64  	}
    65  
    66  	return discoverPkgsFromOpeners(location, openers, nil)
    67  }