github.com/kastenhq/syft@v0.0.0-20230821225854-0710af25cdbe/syft/formats/common/spdxhelpers/to_syft_model.go (about)

     1  package spdxhelpers
     2  
     3  import (
     4  	"errors"
     5  	"fmt"
     6  	"net/url"
     7  	"path"
     8  	"regexp"
     9  	"strconv"
    10  	"strings"
    11  
    12  	"github.com/spdx/tools-golang/spdx"
    13  	"github.com/spdx/tools-golang/spdx/v2/common"
    14  
    15  	"github.com/anchore/packageurl-go"
    16  	"github.com/kastenhq/syft/internal/log"
    17  	"github.com/kastenhq/syft/internal/spdxlicense"
    18  	"github.com/kastenhq/syft/syft/artifact"
    19  	"github.com/kastenhq/syft/syft/cpe"
    20  	"github.com/kastenhq/syft/syft/file"
    21  	"github.com/kastenhq/syft/syft/formats/common/util"
    22  	"github.com/kastenhq/syft/syft/license"
    23  	"github.com/kastenhq/syft/syft/linux"
    24  	"github.com/kastenhq/syft/syft/pkg"
    25  	"github.com/kastenhq/syft/syft/sbom"
    26  	"github.com/kastenhq/syft/syft/source"
    27  )
    28  
    29  func ToSyftModel(doc *spdx.Document) (*sbom.SBOM, error) {
    30  	if doc == nil {
    31  		return nil, errors.New("cannot convert SPDX document to Syft model because document is nil")
    32  	}
    33  
    34  	spdxIDMap := make(map[string]any)
    35  
    36  	s := &sbom.SBOM{
    37  		Source: extractSource(spdxIDMap, doc),
    38  		Artifacts: sbom.Artifacts{
    39  			Packages:          pkg.NewCollection(),
    40  			FileMetadata:      map[file.Coordinates]file.Metadata{},
    41  			FileDigests:       map[file.Coordinates][]file.Digest{},
    42  			LinuxDistribution: findLinuxReleaseByPURL(doc),
    43  		},
    44  	}
    45  
    46  	collectSyftPackages(s, spdxIDMap, doc.Packages)
    47  
    48  	collectSyftFiles(s, spdxIDMap, doc)
    49  
    50  	s.Relationships = toSyftRelationships(spdxIDMap, doc)
    51  
    52  	return s, nil
    53  }
    54  
    55  func isDirectory(name string) bool {
    56  	if name == "." || name == ".." || strings.HasSuffix(name, "/") || !strings.Contains(path.Base(name), ".") {
    57  		return true
    58  	}
    59  	return false
    60  }
    61  
    62  func removePackage(packages []*spdx.Package, remove *spdx.Package) (pkgs []*spdx.Package) {
    63  	for _, p := range packages {
    64  		if p == remove {
    65  			continue
    66  		}
    67  		pkgs = append(pkgs, p)
    68  	}
    69  	return
    70  }
    71  
    72  func removeRelationships(relationships []*spdx.Relationship, spdxID spdx.ElementID) (relations []*spdx.Relationship) {
    73  	for _, r := range relationships {
    74  		if r.RefA.ElementRefID == spdxID || r.RefB.ElementRefID == spdxID {
    75  			continue
    76  		}
    77  		relations = append(relations, r)
    78  	}
    79  	return
    80  }
    81  
    82  func findRootPackages(doc *spdx.Document) (out []*spdx.Package) {
    83  	for _, p := range doc.Packages {
    84  		for _, r := range doc.Relationships {
    85  			describes := r.RefA.ElementRefID == "DOCUMENT" &&
    86  				r.Relationship == spdx.RelationshipDescribes &&
    87  				r.RefB.ElementRefID == p.PackageSPDXIdentifier
    88  
    89  			describedBy := r.RefB.ElementRefID == "DOCUMENT" &&
    90  				r.Relationship == spdx.RelationshipDescribedBy &&
    91  				r.RefA.ElementRefID == p.PackageSPDXIdentifier
    92  
    93  			if !describes && !describedBy {
    94  				continue
    95  			}
    96  
    97  			out = append(out, p)
    98  		}
    99  	}
   100  	return
   101  }
   102  
   103  func extractSource(spdxIDMap map[string]any, doc *spdx.Document) source.Description {
   104  	src := extractSourceFromNamespace(doc.DocumentNamespace)
   105  
   106  	rootPackages := findRootPackages(doc)
   107  
   108  	if len(rootPackages) != 1 {
   109  		return src
   110  	}
   111  
   112  	p := rootPackages[0]
   113  
   114  	switch p.PrimaryPackagePurpose {
   115  	case spdxPrimaryPurposeContainer:
   116  		src = containerSource(p)
   117  	case spdxPrimaryPurposeFile:
   118  		src = fileSource(p)
   119  	default:
   120  		return src
   121  	}
   122  
   123  	spdxIDMap[string(p.PackageSPDXIdentifier)] = src
   124  
   125  	doc.Packages = removePackage(doc.Packages, p)
   126  	doc.Relationships = removeRelationships(doc.Relationships, p.PackageSPDXIdentifier)
   127  
   128  	return src
   129  }
   130  
   131  func containerSource(p *spdx.Package) source.Description {
   132  	id := string(p.PackageSPDXIdentifier)
   133  
   134  	container := p.PackageName
   135  	v := p.PackageVersion
   136  	if v != "" {
   137  		container += ":" + v
   138  	}
   139  
   140  	digest := ""
   141  	if len(p.PackageChecksums) > 0 {
   142  		c := p.PackageChecksums[0]
   143  		digest = fmt.Sprintf("%s:%s", fromChecksumAlgorithm(c.Algorithm), c.Value)
   144  	}
   145  	return source.Description{
   146  		ID:      id,
   147  		Name:    p.PackageName,
   148  		Version: p.PackageVersion,
   149  		Metadata: source.StereoscopeImageSourceMetadata{
   150  			UserInput:      container,
   151  			ID:             id,
   152  			Layers:         nil, // TODO handle formats with nested layer packages like Tern and K8s BOM tool
   153  			ManifestDigest: digest,
   154  		},
   155  	}
   156  }
   157  
   158  func fileSource(p *spdx.Package) source.Description {
   159  	typeRegex := regexp.MustCompile("^DocumentRoot-([^-]+)-.*$")
   160  	typeName := typeRegex.ReplaceAllString(string(p.PackageSPDXIdentifier), "$1")
   161  
   162  	var version string
   163  	var metadata any
   164  	switch {
   165  	case typeName == prefixDirectory:
   166  		// is a Syft SBOM, explicitly a directory source
   167  		metadata, version = directorySourceMetadata(p)
   168  	case typeName == prefixFile:
   169  		// is a Syft SBOM, explicitly a file source
   170  		metadata, version = fileSourceMetadata(p)
   171  	case isDirectory(p.PackageName):
   172  		// is a non-Syft SBOM, which looks like a directory
   173  		metadata, version = directorySourceMetadata(p)
   174  	default:
   175  		// is a non-Syft SBOM, which is probably a file
   176  		metadata, version = fileSourceMetadata(p)
   177  	}
   178  
   179  	return source.Description{
   180  		ID:       string(p.PackageSPDXIdentifier),
   181  		Name:     p.PackageName,
   182  		Version:  version,
   183  		Metadata: metadata,
   184  	}
   185  }
   186  
   187  func fileSourceMetadata(p *spdx.Package) (any, string) {
   188  	version := p.PackageVersion
   189  
   190  	m := source.FileSourceMetadata{
   191  		Path: p.PackageName,
   192  	}
   193  	// if this is a Syft SBOM, we might have output a digest as the version
   194  	checksum := toChecksum(p.PackageVersion)
   195  	for _, d := range p.PackageChecksums {
   196  		if checksum != nil && checksum.Value == d.Value {
   197  			version = ""
   198  		}
   199  		m.Digests = append(m.Digests, file.Digest{
   200  			Algorithm: fromChecksumAlgorithm(d.Algorithm),
   201  			Value:     d.Value,
   202  		})
   203  	}
   204  
   205  	return m, version
   206  }
   207  
   208  func directorySourceMetadata(p *spdx.Package) (any, string) {
   209  	return source.DirectorySourceMetadata{
   210  		Path: p.PackageName,
   211  		Base: "",
   212  	}, p.PackageVersion
   213  }
   214  
   215  // NOTE(jonas): SPDX doesn't inform what an SBOM is about,
   216  // image, directory, for example. This is our best effort to determine
   217  // the scheme. Syft-generated SBOMs have in the namespace
   218  // field a type encoded, which we try to identify here.
   219  func extractSourceFromNamespace(ns string) source.Description {
   220  	u, err := url.Parse(ns)
   221  	if err != nil {
   222  		return source.Description{
   223  			Metadata: nil,
   224  		}
   225  	}
   226  
   227  	parts := strings.Split(u.Path, "/")
   228  	for _, p := range parts {
   229  		switch p {
   230  		case inputFile:
   231  			return source.Description{
   232  				Metadata: source.FileSourceMetadata{},
   233  			}
   234  		case inputImage:
   235  			return source.Description{
   236  				Metadata: source.StereoscopeImageSourceMetadata{},
   237  			}
   238  		case inputDirectory:
   239  			return source.Description{
   240  				Metadata: source.DirectorySourceMetadata{},
   241  			}
   242  		}
   243  	}
   244  	return source.Description{}
   245  }
   246  
   247  func findLinuxReleaseByPURL(doc *spdx.Document) *linux.Release {
   248  	for _, p := range doc.Packages {
   249  		purlValue := findPURLValue(p)
   250  		if purlValue == "" {
   251  			continue
   252  		}
   253  		purl, err := packageurl.FromString(purlValue)
   254  		if err != nil {
   255  			log.Warnf("unable to parse purl: %s", purlValue)
   256  			continue
   257  		}
   258  		distro := findQualifierValue(purl, pkg.PURLQualifierDistro)
   259  		if distro != "" {
   260  			parts := strings.Split(distro, "-")
   261  			name := parts[0]
   262  			version := ""
   263  			if len(parts) > 1 {
   264  				version = parts[1]
   265  			}
   266  			return &linux.Release{
   267  				PrettyName: name,
   268  				Name:       name,
   269  				ID:         name,
   270  				IDLike:     []string{name},
   271  				Version:    version,
   272  				VersionID:  version,
   273  			}
   274  		}
   275  	}
   276  
   277  	return nil
   278  }
   279  
   280  func collectSyftPackages(s *sbom.SBOM, spdxIDMap map[string]any, packages []*spdx.Package) {
   281  	for _, p := range packages {
   282  		syftPkg := toSyftPackage(p)
   283  		spdxIDMap[string(p.PackageSPDXIdentifier)] = syftPkg
   284  		s.Artifacts.Packages.Add(syftPkg)
   285  	}
   286  }
   287  
   288  func collectSyftFiles(s *sbom.SBOM, spdxIDMap map[string]any, doc *spdx.Document) {
   289  	for _, f := range doc.Files {
   290  		l := toSyftLocation(f)
   291  		spdxIDMap[string(f.FileSPDXIdentifier)] = l
   292  
   293  		s.Artifacts.FileMetadata[l.Coordinates] = toFileMetadata(f)
   294  		s.Artifacts.FileDigests[l.Coordinates] = toFileDigests(f)
   295  	}
   296  }
   297  
   298  func toFileDigests(f *spdx.File) (digests []file.Digest) {
   299  	for _, digest := range f.Checksums {
   300  		digests = append(digests, file.Digest{
   301  			Algorithm: fromChecksumAlgorithm(digest.Algorithm),
   302  			Value:     digest.Value,
   303  		})
   304  	}
   305  	return digests
   306  }
   307  
   308  func fromChecksumAlgorithm(algorithm common.ChecksumAlgorithm) string {
   309  	return strings.ToLower(string(algorithm))
   310  }
   311  
   312  func toFileMetadata(f *spdx.File) (meta file.Metadata) {
   313  	// FIXME Syft is currently lossy due to the SPDX 2.2.1 spec not supporting arbitrary mimetypes
   314  	for _, typ := range f.FileTypes {
   315  		switch FileType(typ) {
   316  		case ImageFileType:
   317  			meta.MIMEType = "image/"
   318  		case VideoFileType:
   319  			meta.MIMEType = "video/"
   320  		case ApplicationFileType:
   321  			meta.MIMEType = "application/"
   322  		case TextFileType:
   323  			meta.MIMEType = "text/"
   324  		case AudioFileType:
   325  			meta.MIMEType = "audio/"
   326  		case BinaryFileType:
   327  		case ArchiveFileType:
   328  		case OtherFileType:
   329  		}
   330  	}
   331  	return meta
   332  }
   333  
   334  func toSyftRelationships(spdxIDMap map[string]any, doc *spdx.Document) []artifact.Relationship {
   335  	var out []artifact.Relationship
   336  	for _, r := range doc.Relationships {
   337  		// FIXME what to do with r.RefA.DocumentRefID and r.RefA.SpecialID
   338  		if r.RefA.DocumentRefID != "" && requireAndTrimPrefix(r.RefA.DocumentRefID, "DocumentRef-") != string(doc.SPDXIdentifier) {
   339  			log.Debugf("ignoring relationship to external document: %+v", r)
   340  			continue
   341  		}
   342  		a := spdxIDMap[string(r.RefA.ElementRefID)]
   343  		b := spdxIDMap[string(r.RefB.ElementRefID)]
   344  		from, fromOk := a.(pkg.Package)
   345  		toPackage, toPackageOk := b.(pkg.Package)
   346  		toLocation, toLocationOk := b.(file.Location)
   347  		if !fromOk || !(toPackageOk || toLocationOk) {
   348  			log.Debugf("unable to find valid relationship mapping from SPDX, ignoring: (from: %+v) (to: %+v)", a, b)
   349  			continue
   350  		}
   351  		var to artifact.Identifiable
   352  		var typ artifact.RelationshipType
   353  		if toLocationOk {
   354  			switch RelationshipType(r.Relationship) {
   355  			case ContainsRelationship:
   356  				typ = artifact.ContainsRelationship
   357  				to = toLocation
   358  			case OtherRelationship:
   359  				// Encoding uses a specifically formatted comment...
   360  				if strings.Index(r.RelationshipComment, string(artifact.EvidentByRelationship)) == 0 {
   361  					typ = artifact.EvidentByRelationship
   362  					to = toLocation
   363  				}
   364  			}
   365  		} else {
   366  			switch RelationshipType(r.Relationship) {
   367  			case ContainsRelationship:
   368  				typ = artifact.ContainsRelationship
   369  				to = toPackage
   370  			case OtherRelationship:
   371  				// Encoding uses a specifically formatted comment...
   372  				if strings.Index(r.RelationshipComment, string(artifact.OwnershipByFileOverlapRelationship)) == 0 {
   373  					typ = artifact.OwnershipByFileOverlapRelationship
   374  					to = toPackage
   375  				}
   376  			}
   377  		}
   378  		if typ != "" && to != nil {
   379  			out = append(out, artifact.Relationship{
   380  				From: from,
   381  				To:   to,
   382  				Type: typ,
   383  			})
   384  		}
   385  	}
   386  	return out
   387  }
   388  
   389  func toSyftCoordinates(f *spdx.File) file.Coordinates {
   390  	const layerIDPrefix = "layerID: "
   391  	var fileSystemID string
   392  	if strings.Index(f.FileComment, layerIDPrefix) == 0 {
   393  		fileSystemID = strings.TrimPrefix(f.FileComment, layerIDPrefix)
   394  	}
   395  	if strings.Index(string(f.FileSPDXIdentifier), layerIDPrefix) == 0 {
   396  		fileSystemID = strings.TrimPrefix(string(f.FileSPDXIdentifier), layerIDPrefix)
   397  	}
   398  	return file.Coordinates{
   399  		RealPath:     f.FileName,
   400  		FileSystemID: fileSystemID,
   401  	}
   402  }
   403  
   404  func toSyftLocation(f *spdx.File) file.Location {
   405  	l := file.NewVirtualLocationFromCoordinates(toSyftCoordinates(f), f.FileName)
   406  	return l
   407  }
   408  
   409  func requireAndTrimPrefix(val interface{}, prefix string) string {
   410  	if v, ok := val.(string); ok {
   411  		if i := strings.Index(v, prefix); i == 0 {
   412  			return strings.Replace(v, prefix, "", 1)
   413  		}
   414  	}
   415  	return ""
   416  }
   417  
   418  type pkgInfo struct {
   419  	purl packageurl.PackageURL
   420  	typ  pkg.Type
   421  	lang pkg.Language
   422  }
   423  
   424  func (p *pkgInfo) qualifierValue(name string) string {
   425  	return findQualifierValue(p.purl, name)
   426  }
   427  
   428  func findQualifierValue(purl packageurl.PackageURL, qualifier string) string {
   429  	for _, q := range purl.Qualifiers {
   430  		if q.Key == qualifier {
   431  			return q.Value
   432  		}
   433  	}
   434  	return ""
   435  }
   436  
   437  func extractPkgInfo(p *spdx.Package) pkgInfo {
   438  	pu := findPURLValue(p)
   439  	purl, err := packageurl.FromString(pu)
   440  	if err != nil {
   441  		return pkgInfo{}
   442  	}
   443  	return pkgInfo{
   444  		purl,
   445  		pkg.TypeByName(purl.Type),
   446  		pkg.LanguageByName(purl.Type),
   447  	}
   448  }
   449  
   450  func toSyftPackage(p *spdx.Package) pkg.Package {
   451  	info := extractPkgInfo(p)
   452  	metadataType, metadata := extractMetadata(p, info)
   453  	sP := &pkg.Package{
   454  		Type:         info.typ,
   455  		Name:         p.PackageName,
   456  		Version:      p.PackageVersion,
   457  		Licenses:     pkg.NewLicenseSet(parseSPDXLicenses(p)...),
   458  		CPEs:         extractCPEs(p),
   459  		PURL:         purlValue(info.purl),
   460  		Language:     info.lang,
   461  		MetadataType: metadataType,
   462  		Metadata:     metadata,
   463  	}
   464  
   465  	sP.SetID()
   466  
   467  	return *sP
   468  }
   469  
   470  func purlValue(purl packageurl.PackageURL) string {
   471  	val := purl.String()
   472  	if _, err := packageurl.FromString(val); err != nil {
   473  		return ""
   474  	}
   475  	return val
   476  }
   477  
   478  func parseSPDXLicenses(p *spdx.Package) []pkg.License {
   479  	licenses := make([]pkg.License, 0)
   480  
   481  	// concluded
   482  	if p.PackageLicenseConcluded != NOASSERTION && p.PackageLicenseConcluded != NONE && p.PackageLicenseConcluded != "" {
   483  		l := pkg.NewLicense(cleanSPDXID(p.PackageLicenseConcluded))
   484  		l.Type = license.Concluded
   485  		licenses = append(licenses, l)
   486  	}
   487  
   488  	// declared
   489  	if p.PackageLicenseDeclared != NOASSERTION && p.PackageLicenseDeclared != NONE && p.PackageLicenseDeclared != "" {
   490  		l := pkg.NewLicense(cleanSPDXID(p.PackageLicenseDeclared))
   491  		l.Type = license.Declared
   492  		licenses = append(licenses, l)
   493  	}
   494  
   495  	return licenses
   496  }
   497  
   498  func cleanSPDXID(id string) string {
   499  	return strings.TrimPrefix(id, spdxlicense.LicenseRefPrefix)
   500  }
   501  
   502  //nolint:funlen
   503  func extractMetadata(p *spdx.Package, info pkgInfo) (pkg.MetadataType, interface{}) {
   504  	arch := info.qualifierValue(pkg.PURLQualifierArch)
   505  	upstreamValue := info.qualifierValue(pkg.PURLQualifierUpstream)
   506  	upstream := strings.SplitN(upstreamValue, "@", 2)
   507  	upstreamName := upstream[0]
   508  	upstreamVersion := ""
   509  	if len(upstream) > 1 {
   510  		upstreamVersion = upstream[1]
   511  	}
   512  	supplier := ""
   513  	if p.PackageSupplier != nil {
   514  		supplier = p.PackageSupplier.Supplier
   515  	}
   516  	originator := ""
   517  	if p.PackageOriginator != nil {
   518  		originator = p.PackageOriginator.Originator
   519  	}
   520  	switch info.typ {
   521  	case pkg.ApkPkg:
   522  		return pkg.ApkMetadataType, pkg.ApkMetadata{
   523  			Package:       p.PackageName,
   524  			OriginPackage: upstreamName,
   525  			Maintainer:    supplier,
   526  			Version:       p.PackageVersion,
   527  			Architecture:  arch,
   528  			URL:           p.PackageHomePage,
   529  			Description:   p.PackageDescription,
   530  		}
   531  	case pkg.RpmPkg:
   532  		converted, err := strconv.Atoi(info.qualifierValue(pkg.PURLQualifierEpoch))
   533  		var epoch *int
   534  		if err != nil {
   535  			epoch = nil
   536  		} else {
   537  			epoch = &converted
   538  		}
   539  		return pkg.RpmMetadataType, pkg.RpmMetadata{
   540  			Name:      p.PackageName,
   541  			Version:   p.PackageVersion,
   542  			Epoch:     epoch,
   543  			Arch:      arch,
   544  			SourceRpm: upstreamValue,
   545  			Vendor:    originator,
   546  		}
   547  	case pkg.DebPkg:
   548  		return pkg.DpkgMetadataType, pkg.DpkgMetadata{
   549  			Package:       p.PackageName,
   550  			Source:        upstreamName,
   551  			Version:       p.PackageVersion,
   552  			SourceVersion: upstreamVersion,
   553  			Architecture:  arch,
   554  			Maintainer:    originator,
   555  		}
   556  	case pkg.JavaPkg:
   557  		var digests []file.Digest
   558  		for _, value := range p.PackageChecksums {
   559  			digests = append(digests, file.Digest{Algorithm: fromChecksumAlgorithm(value.Algorithm), Value: value.Value})
   560  		}
   561  		return pkg.JavaMetadataType, pkg.JavaMetadata{
   562  			ArchiveDigests: digests,
   563  		}
   564  	case pkg.GoModulePkg:
   565  		var h1Digest string
   566  		for _, value := range p.PackageChecksums {
   567  			digest, err := util.HDigestFromSHA(fromChecksumAlgorithm(value.Algorithm), value.Value)
   568  			if err != nil {
   569  				log.Debugf("invalid h1digest: %v %v", value, err)
   570  				continue
   571  			}
   572  			h1Digest = digest
   573  			break
   574  		}
   575  		return pkg.GolangBinMetadataType, pkg.GolangBinMetadata{
   576  			H1Digest: h1Digest,
   577  		}
   578  	}
   579  	return pkg.UnknownMetadataType, nil
   580  }
   581  
   582  func findPURLValue(p *spdx.Package) string {
   583  	for _, r := range p.PackageExternalReferences {
   584  		if r.RefType == string(PurlExternalRefType) {
   585  			return r.Locator
   586  		}
   587  	}
   588  	return ""
   589  }
   590  
   591  func extractCPEs(p *spdx.Package) (cpes []cpe.CPE) {
   592  	for _, r := range p.PackageExternalReferences {
   593  		if r.RefType == string(Cpe23ExternalRefType) {
   594  			c, err := cpe.New(r.Locator)
   595  			if err != nil {
   596  				log.Warnf("unable to extract SPDX CPE=%q: %+v", r.Locator, err)
   597  				continue
   598  			}
   599  			cpes = append(cpes, c)
   600  		}
   601  	}
   602  	return cpes
   603  }