github.com/anchore/syft@v1.38.2/syft/pkg/package.go (about)

     1  /*
     2  Package pkg provides the data structures for a package, a package catalog, package types, and domain-specific metadata.
     3  */
     4  package pkg
     5  
     6  import (
     7  	"fmt"
     8  	"sort"
     9  	"strings"
    10  
    11  	"github.com/anchore/syft/internal/log"
    12  	"github.com/anchore/syft/syft/artifact"
    13  	"github.com/anchore/syft/syft/cpe"
    14  	"github.com/anchore/syft/syft/file"
    15  )
    16  
    17  // Package represents an application or library that has been bundled into a distributable format.
    18  // TODO: if we ignore FoundBy for ID generation should we merge the field to show it was found in two places?
    19  type Package struct {
    20  	// id is a content-addressable identifier for this package, computed from most attribute values (applied recursively)
    21  	id artifact.ID `hash:"ignore"`
    22  
    23  	// Name is the package name
    24  	Name string
    25  
    26  	// Version is the package version
    27  	Version string
    28  
    29  	// FoundBy is the specific cataloger that discovered this package
    30  	FoundBy string `hash:"ignore" cyclonedx:"foundBy"`
    31  
    32  	// Locations are the locations that lead to the discovery of this package (note: not necessarily the locations that make up the package)
    33  	Locations file.LocationSet
    34  
    35  	// Licenses are the licenses discovered from the package metadata
    36  	Licenses LicenseSet
    37  
    38  	// Language is the language this package was written in (e.g. JavaScript, Python, etc)
    39  	Language Language `hash:"ignore" cyclonedx:"language"`
    40  
    41  	// Type is the ecosystem the package belongs to (e.g. Npm, Yarn, Python, Rpm, Deb, etc)
    42  	Type Type `cyclonedx:"type"`
    43  
    44  	// CPEs are all possible Common Platform Enumerators (note: NOT included in ID since derived from other fields)
    45  	CPEs []cpe.CPE `hash:"ignore"`
    46  
    47  	// PURL is the Package URL (see https://github.com/package-url/purl-spec)
    48  	PURL string `hash:"ignore"`
    49  
    50  	// Metadata is additional data found while parsing the package source
    51  	Metadata any
    52  }
    53  
    54  func (p *Package) OverrideID(id artifact.ID) {
    55  	p.id = id
    56  }
    57  
    58  func (p *Package) SetID() {
    59  	id, err := artifact.IDByHash(p)
    60  	if err != nil {
    61  		// TODO: what to do in this case?
    62  		log.Debugf("unable to get fingerprint of package=%s@%s: %+v", p.Name, p.Version, err)
    63  		return
    64  	}
    65  	p.id = id
    66  }
    67  
    68  func (p Package) ID() artifact.ID {
    69  	return p.id
    70  }
    71  
    72  // Stringer to represent a package.
    73  func (p Package) String() string {
    74  	return fmt.Sprintf("Pkg(name=%q version=%q type=%q id=%q)", p.Name, p.Version, p.Type, p.id)
    75  }
    76  
    77  func (p *Package) merge(other Package) error {
    78  	if p.id != other.id {
    79  		return fmt.Errorf("cannot merge packages with different IDs: %q vs %q", p.id, other.id)
    80  	}
    81  
    82  	if p.PURL != other.PURL {
    83  		log.Debugf("merging packages have with different pURLs: %q=%q vs %q=%q", p.id, p.PURL, other.id, other.PURL)
    84  	}
    85  
    86  	p.Locations.Add(other.Locations.ToSlice()...)
    87  	p.Licenses.Add(other.Licenses.ToSlice()...)
    88  
    89  	p.CPEs = cpe.Merge(p.CPEs, other.CPEs)
    90  
    91  	if p.PURL == "" {
    92  		p.PURL = other.PURL
    93  	}
    94  	return nil
    95  }
    96  
    97  // IsValid checks whether a package has the minimum necessary info
    98  // which is a non-empty name.
    99  // The nil-check was added as a helper as often, in this code base, packages
   100  // move between callers as pointers.
   101  // CycloneDX and SPDX define Name as the minimum required info for a valid package:
   102  // * https://spdx.github.io/spdx-spec/package-information/#73-package-version-field
   103  // * https://cyclonedx.org/docs/1.4/json/#components_items_name
   104  func IsValid(p *Package) bool {
   105  	return p != nil && p.Name != ""
   106  }
   107  
   108  //nolint:gocognit
   109  func Less(i, j Package) bool {
   110  	if i.Name == j.Name {
   111  		if i.Version == j.Version {
   112  			iLocations := i.Locations.ToSlice()
   113  			jLocations := j.Locations.ToSlice()
   114  			if i.Type == j.Type {
   115  				maxLen := len(iLocations)
   116  				if len(jLocations) > maxLen {
   117  					maxLen = len(jLocations)
   118  				}
   119  				for l := 0; l < maxLen; l++ {
   120  					if len(iLocations) < l+1 || len(jLocations) < l+1 {
   121  						if len(iLocations) == len(jLocations) {
   122  							break
   123  						}
   124  						return len(iLocations) < len(jLocations)
   125  					}
   126  					if iLocations[l].RealPath == jLocations[l].RealPath {
   127  						continue
   128  					}
   129  					return iLocations[l].RealPath < jLocations[l].RealPath
   130  				}
   131  				// compare remaining metadata as a final fallback
   132  				// note: we cannot guarantee that IDs (which digests the metadata) are stable enough to sort on
   133  				// when there are potentially missing elements there is too much reduction in the dimensions to
   134  				// lean on ID comparison. The best fallback is to look at the string representation of the metadata.
   135  				return strings.Compare(fmt.Sprintf("%#v", i.Metadata), fmt.Sprintf("%#v", j.Metadata)) < 0
   136  			}
   137  			return i.Type < j.Type
   138  		}
   139  		return i.Version < j.Version
   140  	}
   141  	return i.Name < j.Name
   142  }
   143  func Sort(pkgs []Package) {
   144  	sort.SliceStable(pkgs, func(i, j int) bool {
   145  		return Less(pkgs[i], pkgs[j])
   146  	})
   147  }