github.com/nextlinux/gosbom@v0.81.1-0.20230627115839-1ff50c281391/gosbom/formats/common/spdxhelpers/to_gosbom_model.go (about)

     1  package spdxhelpers
     2  
     3  import (
     4  	"errors"
     5  	"net/url"
     6  	"strconv"
     7  	"strings"
     8  
     9  	"github.com/nextlinux/gosbom/gosbom/artifact"
    10  	"github.com/nextlinux/gosbom/gosbom/cpe"
    11  	"github.com/nextlinux/gosbom/gosbom/file"
    12  	"github.com/nextlinux/gosbom/gosbom/formats/common/util"
    13  	"github.com/nextlinux/gosbom/gosbom/license"
    14  	"github.com/nextlinux/gosbom/gosbom/linux"
    15  	"github.com/nextlinux/gosbom/gosbom/pkg"
    16  	"github.com/nextlinux/gosbom/gosbom/sbom"
    17  	"github.com/nextlinux/gosbom/gosbom/source"
    18  	"github.com/nextlinux/gosbom/internal/log"
    19  	"github.com/spdx/tools-golang/spdx"
    20  
    21  	"github.com/anchore/packageurl-go"
    22  )
    23  
    24  func ToGosbomModel(doc *spdx.Document) (*sbom.SBOM, error) {
    25  	if doc == nil {
    26  		return nil, errors.New("cannot convert SPDX document to Gosbom model because document is nil")
    27  	}
    28  
    29  	spdxIDMap := make(map[string]interface{})
    30  
    31  	src := source.Metadata{Scheme: source.UnknownScheme}
    32  	src.Scheme = extractSchemeFromNamespace(doc.DocumentNamespace)
    33  
    34  	s := &sbom.SBOM{
    35  		Source: src,
    36  		Artifacts: sbom.Artifacts{
    37  			Packages:          pkg.NewCollection(),
    38  			FileMetadata:      map[file.Coordinates]file.Metadata{},
    39  			FileDigests:       map[file.Coordinates][]file.Digest{},
    40  			LinuxDistribution: findLinuxReleaseByPURL(doc),
    41  		},
    42  	}
    43  
    44  	collectGosbomPackages(s, spdxIDMap, doc)
    45  
    46  	collectGosbomFiles(s, spdxIDMap, doc)
    47  
    48  	s.Relationships = toGosbomRelationships(spdxIDMap, doc)
    49  
    50  	return s, nil
    51  }
    52  
    53  // NOTE(jonas): SPDX doesn't inform what an SBOM is about,
    54  // image, directory, for example. This is our best effort to determine
    55  // the scheme. Gosbom-generated SBOMs have in the namespace
    56  // field a type encoded, which we try to identify here.
    57  func extractSchemeFromNamespace(ns string) source.Scheme {
    58  	u, err := url.Parse(ns)
    59  	if err != nil {
    60  		return source.UnknownScheme
    61  	}
    62  
    63  	parts := strings.Split(u.Path, "/")
    64  	for _, p := range parts {
    65  		switch p {
    66  		case inputFile:
    67  			return source.FileScheme
    68  		case inputImage:
    69  			return source.ImageScheme
    70  		case inputDirectory:
    71  			return source.DirectoryScheme
    72  		}
    73  	}
    74  	return source.UnknownScheme
    75  }
    76  
    77  func findLinuxReleaseByPURL(doc *spdx.Document) *linux.Release {
    78  	for _, p := range doc.Packages {
    79  		purlValue := findPURLValue(p)
    80  		if purlValue == "" {
    81  			continue
    82  		}
    83  		purl, err := packageurl.FromString(purlValue)
    84  		if err != nil {
    85  			log.Warnf("unable to parse purl: %s", purlValue)
    86  			continue
    87  		}
    88  		distro := findQualifierValue(purl, pkg.PURLQualifierDistro)
    89  		if distro != "" {
    90  			parts := strings.Split(distro, "-")
    91  			name := parts[0]
    92  			version := ""
    93  			if len(parts) > 1 {
    94  				version = parts[1]
    95  			}
    96  			return &linux.Release{
    97  				PrettyName: name,
    98  				Name:       name,
    99  				ID:         name,
   100  				IDLike:     []string{name},
   101  				Version:    version,
   102  				VersionID:  version,
   103  			}
   104  		}
   105  	}
   106  
   107  	return nil
   108  }
   109  
   110  func collectGosbomPackages(s *sbom.SBOM, spdxIDMap map[string]interface{}, doc *spdx.Document) {
   111  	for _, p := range doc.Packages {
   112  		gosbomPkg := toGosbomPackage(p)
   113  		spdxIDMap[string(p.PackageSPDXIdentifier)] = gosbomPkg
   114  		s.Artifacts.Packages.Add(*gosbomPkg)
   115  	}
   116  }
   117  
   118  func collectGosbomFiles(s *sbom.SBOM, spdxIDMap map[string]interface{}, doc *spdx.Document) {
   119  	for _, f := range doc.Files {
   120  		l := toGosbomLocation(f)
   121  		spdxIDMap[string(f.FileSPDXIdentifier)] = l
   122  
   123  		s.Artifacts.FileMetadata[l.Coordinates] = toFileMetadata(f)
   124  		s.Artifacts.FileDigests[l.Coordinates] = toFileDigests(f)
   125  	}
   126  }
   127  
   128  func toFileDigests(f *spdx.File) (digests []file.Digest) {
   129  	for _, digest := range f.Checksums {
   130  		digests = append(digests, file.Digest{
   131  			Algorithm: string(digest.Algorithm),
   132  			Value:     digest.Value,
   133  		})
   134  	}
   135  	return digests
   136  }
   137  
   138  func toFileMetadata(f *spdx.File) (meta file.Metadata) {
   139  	// FIXME Gosbom is currently lossy due to the SPDX 2.2.1 spec not supporting arbitrary mimetypes
   140  	for _, typ := range f.FileTypes {
   141  		switch FileType(typ) {
   142  		case ImageFileType:
   143  			meta.MIMEType = "image/"
   144  		case VideoFileType:
   145  			meta.MIMEType = "video/"
   146  		case ApplicationFileType:
   147  			meta.MIMEType = "application/"
   148  		case TextFileType:
   149  			meta.MIMEType = "text/"
   150  		case AudioFileType:
   151  			meta.MIMEType = "audio/"
   152  		case BinaryFileType:
   153  		case ArchiveFileType:
   154  		case OtherFileType:
   155  		}
   156  	}
   157  	return meta
   158  }
   159  
   160  func toGosbomRelationships(spdxIDMap map[string]interface{}, doc *spdx.Document) []artifact.Relationship {
   161  	var out []artifact.Relationship
   162  	for _, r := range doc.Relationships {
   163  		// FIXME what to do with r.RefA.DocumentRefID and  r.RefA.SpecialID
   164  		if r.RefA.DocumentRefID != "" && requireAndTrimPrefix(r.RefA.DocumentRefID, "DocumentRef-") != string(doc.SPDXIdentifier) {
   165  			log.Debugf("ignoring relationship to external document: %+v", r)
   166  			continue
   167  		}
   168  		a := spdxIDMap[string(r.RefA.ElementRefID)]
   169  		b := spdxIDMap[string(r.RefB.ElementRefID)]
   170  		from, fromOk := a.(*pkg.Package)
   171  		toPackage, toPackageOk := b.(*pkg.Package)
   172  		toLocation, toLocationOk := b.(*file.Location)
   173  		if !fromOk || !(toPackageOk || toLocationOk) {
   174  			log.Debugf("unable to find valid relationship mapping from SPDX 2.2 JSON, ignoring: (from: %+v) (to: %+v)", a, b)
   175  			continue
   176  		}
   177  		var to artifact.Identifiable
   178  		var typ artifact.RelationshipType
   179  		if toLocationOk {
   180  			switch RelationshipType(r.Relationship) {
   181  			case ContainsRelationship:
   182  				typ = artifact.ContainsRelationship
   183  				to = toLocation
   184  			case OtherRelationship:
   185  				// Encoding uses a specifically formatted comment...
   186  				if strings.Index(r.RelationshipComment, string(artifact.EvidentByRelationship)) == 0 {
   187  					typ = artifact.EvidentByRelationship
   188  					to = toLocation
   189  				}
   190  			}
   191  		} else {
   192  			switch RelationshipType(r.Relationship) {
   193  			case ContainsRelationship:
   194  				typ = artifact.ContainsRelationship
   195  				to = toPackage
   196  			case OtherRelationship:
   197  				// Encoding uses a specifically formatted comment...
   198  				if strings.Index(r.RelationshipComment, string(artifact.OwnershipByFileOverlapRelationship)) == 0 {
   199  					typ = artifact.OwnershipByFileOverlapRelationship
   200  					to = toPackage
   201  				}
   202  			}
   203  		}
   204  		if typ != "" && to != nil {
   205  			out = append(out, artifact.Relationship{
   206  				From: from,
   207  				To:   to,
   208  				Type: typ,
   209  			})
   210  		}
   211  	}
   212  	return out
   213  }
   214  
   215  func toGosbomCoordinates(f *spdx.File) file.Coordinates {
   216  	const layerIDPrefix = "layerID: "
   217  	var fileSystemID string
   218  	if strings.Index(f.FileComment, layerIDPrefix) == 0 {
   219  		fileSystemID = strings.TrimPrefix(f.FileComment, layerIDPrefix)
   220  	}
   221  	if strings.Index(string(f.FileSPDXIdentifier), layerIDPrefix) == 0 {
   222  		fileSystemID = strings.TrimPrefix(string(f.FileSPDXIdentifier), layerIDPrefix)
   223  	}
   224  	return file.Coordinates{
   225  		RealPath:     f.FileName,
   226  		FileSystemID: fileSystemID,
   227  	}
   228  }
   229  
   230  func toGosbomLocation(f *spdx.File) *file.Location {
   231  	l := file.NewVirtualLocationFromCoordinates(toGosbomCoordinates(f), f.FileName)
   232  	return &l
   233  }
   234  
   235  func requireAndTrimPrefix(val interface{}, prefix string) string {
   236  	if v, ok := val.(string); ok {
   237  		if i := strings.Index(v, prefix); i == 0 {
   238  			return strings.Replace(v, prefix, "", 1)
   239  		}
   240  	}
   241  	return ""
   242  }
   243  
   244  type pkgInfo struct {
   245  	purl packageurl.PackageURL
   246  	typ  pkg.Type
   247  	lang pkg.Language
   248  }
   249  
   250  func (p *pkgInfo) qualifierValue(name string) string {
   251  	return findQualifierValue(p.purl, name)
   252  }
   253  
   254  func findQualifierValue(purl packageurl.PackageURL, qualifier string) string {
   255  	for _, q := range purl.Qualifiers {
   256  		if q.Key == qualifier {
   257  			return q.Value
   258  		}
   259  	}
   260  	return ""
   261  }
   262  
   263  func extractPkgInfo(p *spdx.Package) pkgInfo {
   264  	pu := findPURLValue(p)
   265  	purl, err := packageurl.FromString(pu)
   266  	if err != nil {
   267  		return pkgInfo{}
   268  	}
   269  	return pkgInfo{
   270  		purl,
   271  		pkg.TypeByName(purl.Type),
   272  		pkg.LanguageByName(purl.Type),
   273  	}
   274  }
   275  
   276  func toGosbomPackage(p *spdx.Package) *pkg.Package {
   277  	info := extractPkgInfo(p)
   278  	metadataType, metadata := extractMetadata(p, info)
   279  	sP := pkg.Package{
   280  		Type:         info.typ,
   281  		Name:         p.PackageName,
   282  		Version:      p.PackageVersion,
   283  		Licenses:     pkg.NewLicenseSet(parseSPDXLicenses(p)...),
   284  		CPEs:         extractCPEs(p),
   285  		PURL:         info.purl.String(),
   286  		Language:     info.lang,
   287  		MetadataType: metadataType,
   288  		Metadata:     metadata,
   289  	}
   290  
   291  	sP.SetID()
   292  
   293  	return &sP
   294  }
   295  
   296  func parseSPDXLicenses(p *spdx.Package) []pkg.License {
   297  	licenses := make([]pkg.License, 0)
   298  
   299  	// concluded
   300  	if p.PackageLicenseConcluded != NOASSERTION && p.PackageLicenseConcluded != NONE && p.PackageLicenseConcluded != "" {
   301  		l := pkg.NewLicense(cleanSPDXID(p.PackageLicenseConcluded))
   302  		l.Type = license.Concluded
   303  		licenses = append(licenses, l)
   304  	}
   305  
   306  	// declared
   307  	if p.PackageLicenseDeclared != NOASSERTION && p.PackageLicenseDeclared != NONE && p.PackageLicenseDeclared != "" {
   308  		l := pkg.NewLicense(cleanSPDXID(p.PackageLicenseDeclared))
   309  		l.Type = license.Declared
   310  		licenses = append(licenses, l)
   311  	}
   312  
   313  	return licenses
   314  }
   315  
   316  func cleanSPDXID(id string) string {
   317  	if strings.HasPrefix(id, "LicenseRef-") {
   318  		return strings.TrimPrefix(id, "LicenseRef-")
   319  	}
   320  	return id
   321  }
   322  
   323  //nolint:funlen
   324  func extractMetadata(p *spdx.Package, info pkgInfo) (pkg.MetadataType, interface{}) {
   325  	arch := info.qualifierValue(pkg.PURLQualifierArch)
   326  	upstreamValue := info.qualifierValue(pkg.PURLQualifierUpstream)
   327  	upstream := strings.SplitN(upstreamValue, "@", 2)
   328  	upstreamName := upstream[0]
   329  	upstreamVersion := ""
   330  	if len(upstream) > 1 {
   331  		upstreamVersion = upstream[1]
   332  	}
   333  	supplier := ""
   334  	if p.PackageSupplier != nil {
   335  		supplier = p.PackageSupplier.Supplier
   336  	}
   337  	originator := ""
   338  	if p.PackageOriginator != nil {
   339  		originator = p.PackageOriginator.Originator
   340  	}
   341  	switch info.typ {
   342  	case pkg.ApkPkg:
   343  		return pkg.ApkMetadataType, pkg.ApkMetadata{
   344  			Package:       p.PackageName,
   345  			OriginPackage: upstreamName,
   346  			Maintainer:    supplier,
   347  			Version:       p.PackageVersion,
   348  			Architecture:  arch,
   349  			URL:           p.PackageHomePage,
   350  			Description:   p.PackageDescription,
   351  		}
   352  	case pkg.RpmPkg:
   353  		converted, err := strconv.Atoi(info.qualifierValue(pkg.PURLQualifierEpoch))
   354  		var epoch *int
   355  		if err != nil {
   356  			epoch = nil
   357  		} else {
   358  			epoch = &converted
   359  		}
   360  		return pkg.RpmMetadataType, pkg.RpmMetadata{
   361  			Name:      p.PackageName,
   362  			Version:   p.PackageVersion,
   363  			Epoch:     epoch,
   364  			Arch:      arch,
   365  			SourceRpm: upstreamValue,
   366  			Vendor:    originator,
   367  		}
   368  	case pkg.DebPkg:
   369  		return pkg.DpkgMetadataType, pkg.DpkgMetadata{
   370  			Package:       p.PackageName,
   371  			Source:        upstreamName,
   372  			Version:       p.PackageVersion,
   373  			SourceVersion: upstreamVersion,
   374  			Architecture:  arch,
   375  			Maintainer:    originator,
   376  		}
   377  	case pkg.JavaPkg:
   378  		var digests []file.Digest
   379  		for _, value := range p.PackageChecksums {
   380  			digests = append(digests, file.Digest{Algorithm: string(value.Algorithm), Value: value.Value})
   381  		}
   382  		return pkg.JavaMetadataType, pkg.JavaMetadata{
   383  			ArchiveDigests: digests,
   384  		}
   385  	case pkg.GoModulePkg:
   386  		var h1Digest string
   387  		for _, value := range p.PackageChecksums {
   388  			digest, err := util.HDigestFromSHA(string(value.Algorithm), value.Value)
   389  			if err != nil {
   390  				log.Debugf("invalid h1digest: %v %v", value, err)
   391  				continue
   392  			}
   393  			h1Digest = digest
   394  			break
   395  		}
   396  		return pkg.GolangBinMetadataType, pkg.GolangBinMetadata{
   397  			H1Digest: h1Digest,
   398  		}
   399  	}
   400  	return pkg.UnknownMetadataType, nil
   401  }
   402  
   403  func findPURLValue(p *spdx.Package) string {
   404  	for _, r := range p.PackageExternalReferences {
   405  		if r.RefType == string(PurlExternalRefType) {
   406  			return r.Locator
   407  		}
   408  	}
   409  	return ""
   410  }
   411  
   412  func extractCPEs(p *spdx.Package) (cpes []cpe.CPE) {
   413  	for _, r := range p.PackageExternalReferences {
   414  		if r.RefType == string(Cpe23ExternalRefType) {
   415  			c, err := cpe.New(r.Locator)
   416  			if err != nil {
   417  				log.Warnf("unable to extract SPDX CPE=%q: %+v", r.Locator, err)
   418  				continue
   419  			}
   420  			cpes = append(cpes, c)
   421  		}
   422  	}
   423  	return cpes
   424  }