github.com/devseccon/trivy@v0.47.1-0.20231123133102-bd902a0bd996/pkg/sbom/spdx/marshal.go (about)

     1  package spdx
     2  
     3  import (
     4  	"fmt"
     5  	"sort"
     6  	"strconv"
     7  	"strings"
     8  	"time"
     9  
    10  	"github.com/mitchellh/hashstructure/v2"
    11  	"github.com/samber/lo"
    12  	"github.com/spdx/tools-golang/spdx"
    13  	"github.com/spdx/tools-golang/spdx/v2/common"
    14  	spdxutils "github.com/spdx/tools-golang/utils"
    15  	"golang.org/x/exp/maps"
    16  	"golang.org/x/xerrors"
    17  
    18  	"github.com/devseccon/trivy/pkg/clock"
    19  	"github.com/devseccon/trivy/pkg/digest"
    20  	ftypes "github.com/devseccon/trivy/pkg/fanal/types"
    21  	"github.com/devseccon/trivy/pkg/licensing"
    22  	"github.com/devseccon/trivy/pkg/licensing/expression"
    23  	"github.com/devseccon/trivy/pkg/log"
    24  	"github.com/devseccon/trivy/pkg/purl"
    25  	"github.com/devseccon/trivy/pkg/scanner/utils"
    26  	"github.com/devseccon/trivy/pkg/types"
    27  	"github.com/devseccon/trivy/pkg/uuid"
    28  )
    29  
    30  const (
    31  	DocumentSPDXIdentifier = "DOCUMENT"
    32  	DocumentNamespace      = "http://aquasecurity.github.io/trivy"
    33  	CreatorOrganization    = "aquasecurity"
    34  	CreatorTool            = "trivy"
    35  	noneField              = "NONE"
    36  )
    37  
    38  const (
    39  	CategoryPackageManager = "PACKAGE-MANAGER"
    40  	RefTypePurl            = "purl"
    41  
    42  	PropertySchemaVersion = "SchemaVersion"
    43  
    44  	// Image properties
    45  	PropertySize       = "Size"
    46  	PropertyImageID    = "ImageID"
    47  	PropertyRepoDigest = "RepoDigest"
    48  	PropertyDiffID     = "DiffID"
    49  	PropertyRepoTag    = "RepoTag"
    50  
    51  	// Package properties
    52  	PropertyPkgID       = "PkgID"
    53  	PropertyLayerDiffID = "LayerDiffID"
    54  	PropertyLayerDigest = "LayerDigest"
    55  	// Package Purpose fields
    56  	PackagePurposeOS          = "OPERATING-SYSTEM"
    57  	PackagePurposeContainer   = "CONTAINER"
    58  	PackagePurposeSource      = "SOURCE"
    59  	PackagePurposeApplication = "APPLICATION"
    60  	PackagePurposeLibrary     = "LIBRARY"
    61  
    62  	PackageSupplierNoAssertion  = "NOASSERTION"
    63  	PackageSupplierOrganization = "Organization"
    64  
    65  	RelationShipContains  = common.TypeRelationshipContains
    66  	RelationShipDescribe  = common.TypeRelationshipDescribe
    67  	RelationShipDependsOn = common.TypeRelationshipDependsOn
    68  
    69  	ElementOperatingSystem = "OperatingSystem"
    70  	ElementApplication     = "Application"
    71  	ElementPackage         = "Package"
    72  	ElementFile            = "File"
    73  )
    74  
    75  var (
    76  	SourcePackagePrefix = "built package from"
    77  )
    78  
    79  type Marshaler struct {
    80  	format     spdx.Document
    81  	hasher     Hash
    82  	appVersion string // Trivy version. It needed for `creator` field
    83  }
    84  
    85  type Hash func(v interface{}, format hashstructure.Format, opts *hashstructure.HashOptions) (uint64, error)
    86  
    87  type marshalOption func(*Marshaler)
    88  
    89  func WithHasher(hasher Hash) marshalOption {
    90  	return func(opts *Marshaler) {
    91  		opts.hasher = hasher
    92  	}
    93  }
    94  
    95  func NewMarshaler(version string, opts ...marshalOption) *Marshaler {
    96  	m := &Marshaler{
    97  		format:     spdx.Document{},
    98  		hasher:     hashstructure.Hash,
    99  		appVersion: version,
   100  	}
   101  
   102  	for _, opt := range opts {
   103  		opt(m)
   104  	}
   105  
   106  	return m
   107  }
   108  
   109  func (m *Marshaler) Marshal(r types.Report) (*spdx.Document, error) {
   110  	var relationShips []*spdx.Relationship
   111  	packages := make(map[spdx.ElementID]*spdx.Package)
   112  	pkgDownloadLocation := getPackageDownloadLocation(r.ArtifactType, r.ArtifactName)
   113  
   114  	// Root package contains OS, OS packages, language-specific packages and so on.
   115  	rootPkg, err := m.rootPackage(r, pkgDownloadLocation)
   116  	if err != nil {
   117  		return nil, xerrors.Errorf("failed to generate a root package: %w", err)
   118  	}
   119  	packages[rootPkg.PackageSPDXIdentifier] = rootPkg
   120  	relationShips = append(relationShips,
   121  		relationShip(DocumentSPDXIdentifier, rootPkg.PackageSPDXIdentifier, RelationShipDescribe),
   122  	)
   123  
   124  	var spdxFiles []*spdx.File
   125  
   126  	for _, result := range r.Results {
   127  		if len(result.Packages) == 0 {
   128  			continue
   129  		}
   130  		parentPackage, err := m.resultToSpdxPackage(result, r.Metadata.OS, pkgDownloadLocation)
   131  		if err != nil {
   132  			return nil, xerrors.Errorf("failed to parse result: %w", err)
   133  		}
   134  		packages[parentPackage.PackageSPDXIdentifier] = &parentPackage
   135  		relationShips = append(relationShips,
   136  			relationShip(rootPkg.PackageSPDXIdentifier, parentPackage.PackageSPDXIdentifier, RelationShipContains),
   137  		)
   138  
   139  		for _, pkg := range result.Packages {
   140  			spdxPackage, err := m.pkgToSpdxPackage(result.Type, pkgDownloadLocation, result.Class, r.Metadata, pkg)
   141  			if err != nil {
   142  				return nil, xerrors.Errorf("failed to parse package: %w", err)
   143  			}
   144  			packages[spdxPackage.PackageSPDXIdentifier] = &spdxPackage
   145  			relationShips = append(relationShips,
   146  				relationShip(parentPackage.PackageSPDXIdentifier, spdxPackage.PackageSPDXIdentifier, RelationShipContains),
   147  			)
   148  			files, err := m.pkgFiles(pkg)
   149  			if err != nil {
   150  				return nil, xerrors.Errorf("package file error: %w", err)
   151  			} else if files == nil {
   152  				continue
   153  			}
   154  
   155  			spdxFiles = append(spdxFiles, files...)
   156  			for _, file := range files {
   157  				relationShips = append(relationShips,
   158  					relationShip(spdxPackage.PackageSPDXIdentifier, file.FileSPDXIdentifier, RelationShipContains),
   159  				)
   160  			}
   161  
   162  			verificationCode, err := spdxutils.GetVerificationCode(files, "")
   163  			if err != nil {
   164  				return nil, xerrors.Errorf("package verification error: %w", err)
   165  			}
   166  
   167  			spdxPackage.FilesAnalyzed = true
   168  			spdxPackage.PackageVerificationCode = &verificationCode
   169  		}
   170  	}
   171  
   172  	return &spdx.Document{
   173  		SPDXVersion:       spdx.Version,
   174  		DataLicense:       spdx.DataLicense,
   175  		SPDXIdentifier:    DocumentSPDXIdentifier,
   176  		DocumentName:      r.ArtifactName,
   177  		DocumentNamespace: getDocumentNamespace(r, m),
   178  		CreationInfo: &spdx.CreationInfo{
   179  			Creators: []common.Creator{
   180  				{
   181  					Creator:     CreatorOrganization,
   182  					CreatorType: "Organization",
   183  				},
   184  				{
   185  					Creator:     fmt.Sprintf("%s-%s", CreatorTool, m.appVersion),
   186  					CreatorType: "Tool",
   187  				},
   188  			},
   189  			Created: clock.Now().UTC().Format(time.RFC3339),
   190  		},
   191  		Packages:      toPackages(packages),
   192  		Relationships: relationShips,
   193  		Files:         spdxFiles,
   194  	}, nil
   195  }
   196  
   197  func toPackages(packages map[spdx.ElementID]*spdx.Package) []*spdx.Package {
   198  	ret := maps.Values(packages)
   199  	sort.Slice(ret, func(i, j int) bool {
   200  		if ret[i].PackageName != ret[j].PackageName {
   201  			return ret[i].PackageName < ret[j].PackageName
   202  		}
   203  		return ret[i].PackageSPDXIdentifier < ret[j].PackageSPDXIdentifier
   204  	})
   205  	return ret
   206  }
   207  
   208  func (m *Marshaler) resultToSpdxPackage(result types.Result, os *ftypes.OS, pkgDownloadLocation string) (spdx.Package, error) {
   209  	switch result.Class {
   210  	case types.ClassOSPkg:
   211  		osPkg, err := m.osPackage(os, pkgDownloadLocation)
   212  		if err != nil {
   213  			return spdx.Package{}, xerrors.Errorf("failed to parse operating system package: %w", err)
   214  		}
   215  		return osPkg, nil
   216  	case types.ClassLangPkg:
   217  		langPkg, err := m.langPackage(result.Target, pkgDownloadLocation, result.Type)
   218  		if err != nil {
   219  			return spdx.Package{}, xerrors.Errorf("failed to parse application package: %w", err)
   220  		}
   221  		return langPkg, nil
   222  	default:
   223  		// unsupported packages
   224  		return spdx.Package{}, nil
   225  	}
   226  }
   227  
   228  func (m *Marshaler) parseFile(filePath string, d digest.Digest) (spdx.File, error) {
   229  	pkgID, err := calcPkgID(m.hasher, filePath)
   230  	if err != nil {
   231  		return spdx.File{}, xerrors.Errorf("failed to get %s package ID: %w", filePath, err)
   232  	}
   233  	file := spdx.File{
   234  		FileSPDXIdentifier: spdx.ElementID(fmt.Sprintf("File-%s", pkgID)),
   235  		FileName:           filePath,
   236  		Checksums:          digestToSpdxFileChecksum(d),
   237  	}
   238  	return file, nil
   239  }
   240  
   241  func (m *Marshaler) rootPackage(r types.Report, pkgDownloadLocation string) (*spdx.Package, error) {
   242  	var externalReferences []*spdx.PackageExternalReference
   243  	attributionTexts := []string{attributionText(PropertySchemaVersion, strconv.Itoa(r.SchemaVersion))}
   244  
   245  	// When the target is a container image, add PURL to the external references of the root package.
   246  	if p, err := purl.NewPackageURL(purl.TypeOCI, r.Metadata, ftypes.Package{}); err != nil {
   247  		return nil, xerrors.Errorf("failed to new package url for oci: %w", err)
   248  	} else if p != nil {
   249  		externalReferences = append(externalReferences, purlExternalReference(p.ToString()))
   250  	}
   251  
   252  	if r.Metadata.ImageID != "" {
   253  		attributionTexts = appendAttributionText(attributionTexts, PropertyImageID, r.Metadata.ImageID)
   254  	}
   255  	if r.Metadata.Size != 0 {
   256  		attributionTexts = appendAttributionText(attributionTexts, PropertySize, strconv.FormatInt(r.Metadata.Size, 10))
   257  	}
   258  
   259  	for _, d := range r.Metadata.RepoDigests {
   260  		attributionTexts = appendAttributionText(attributionTexts, PropertyRepoDigest, d)
   261  	}
   262  	for _, d := range r.Metadata.DiffIDs {
   263  		attributionTexts = appendAttributionText(attributionTexts, PropertyDiffID, d)
   264  	}
   265  	for _, t := range r.Metadata.RepoTags {
   266  		attributionTexts = appendAttributionText(attributionTexts, PropertyRepoTag, t)
   267  	}
   268  
   269  	pkgID, err := calcPkgID(m.hasher, fmt.Sprintf("%s-%s", r.ArtifactName, r.ArtifactType))
   270  	if err != nil {
   271  		return nil, xerrors.Errorf("failed to get %s package ID: %w", pkgID, err)
   272  	}
   273  
   274  	pkgPurpose := PackagePurposeSource
   275  	if r.ArtifactType == ftypes.ArtifactContainerImage {
   276  		pkgPurpose = PackagePurposeContainer
   277  	}
   278  
   279  	return &spdx.Package{
   280  		PackageName:               r.ArtifactName,
   281  		PackageSPDXIdentifier:     elementID(camelCase(string(r.ArtifactType)), pkgID),
   282  		PackageDownloadLocation:   pkgDownloadLocation,
   283  		PackageAttributionTexts:   attributionTexts,
   284  		PackageExternalReferences: externalReferences,
   285  		PrimaryPackagePurpose:     pkgPurpose,
   286  	}, nil
   287  }
   288  
   289  func (m *Marshaler) osPackage(osFound *ftypes.OS, pkgDownloadLocation string) (spdx.Package, error) {
   290  	if osFound == nil {
   291  		return spdx.Package{}, nil
   292  	}
   293  
   294  	pkgID, err := calcPkgID(m.hasher, osFound)
   295  	if err != nil {
   296  		return spdx.Package{}, xerrors.Errorf("failed to get os metadata package ID: %w", err)
   297  	}
   298  
   299  	return spdx.Package{
   300  		PackageName:             string(osFound.Family),
   301  		PackageVersion:          osFound.Name,
   302  		PackageSPDXIdentifier:   elementID(ElementOperatingSystem, pkgID),
   303  		PackageDownloadLocation: pkgDownloadLocation,
   304  		PrimaryPackagePurpose:   PackagePurposeOS,
   305  	}, nil
   306  }
   307  
   308  func (m *Marshaler) langPackage(target, pkgDownloadLocation string, appType ftypes.LangType) (spdx.Package, error) {
   309  	pkgID, err := calcPkgID(m.hasher, fmt.Sprintf("%s-%s", target, appType))
   310  	if err != nil {
   311  		return spdx.Package{}, xerrors.Errorf("failed to get %s package ID: %w", target, err)
   312  	}
   313  
   314  	return spdx.Package{
   315  		PackageName:             string(appType),
   316  		PackageSourceInfo:       target, // TODO: Files seems better
   317  		PackageSPDXIdentifier:   elementID(ElementApplication, pkgID),
   318  		PackageDownloadLocation: pkgDownloadLocation,
   319  		PrimaryPackagePurpose:   PackagePurposeApplication,
   320  	}, nil
   321  }
   322  
   323  func (m *Marshaler) pkgToSpdxPackage(t ftypes.TargetType, pkgDownloadLocation string, class types.ResultClass, metadata types.Metadata, pkg ftypes.Package) (spdx.Package, error) {
   324  	license := GetLicense(pkg)
   325  
   326  	pkgID, err := calcPkgID(m.hasher, pkg)
   327  	if err != nil {
   328  		return spdx.Package{}, xerrors.Errorf("failed to get %s package ID: %w", pkg.Name, err)
   329  	}
   330  
   331  	var pkgSrcInfo string
   332  	if class == types.ClassOSPkg && pkg.SrcName != "" {
   333  		pkgSrcInfo = fmt.Sprintf("%s: %s %s", SourcePackagePrefix, pkg.SrcName, utils.FormatSrcVersion(pkg))
   334  	}
   335  
   336  	packageURL, err := purl.NewPackageURL(t, metadata, pkg)
   337  	if err != nil {
   338  		return spdx.Package{}, xerrors.Errorf("failed to parse purl (%s): %w", pkg.Name, err)
   339  	}
   340  
   341  	var pkgExtRefs []*spdx.PackageExternalReference
   342  	if packageURL != nil {
   343  		pkgExtRefs = []*spdx.PackageExternalReference{purlExternalReference(packageURL.String())}
   344  	}
   345  
   346  	var attrTexts []string
   347  	attrTexts = appendAttributionText(attrTexts, PropertyPkgID, pkg.ID)
   348  	attrTexts = appendAttributionText(attrTexts, PropertyLayerDigest, pkg.Layer.Digest)
   349  	attrTexts = appendAttributionText(attrTexts, PropertyLayerDiffID, pkg.Layer.DiffID)
   350  
   351  	supplier := &spdx.Supplier{Supplier: PackageSupplierNoAssertion}
   352  	if pkg.Maintainer != "" {
   353  		supplier = &spdx.Supplier{
   354  			SupplierType: PackageSupplierOrganization, // Always use "Organization" at the moment as it is difficult to distinguish between "Person" or "Organization".
   355  			Supplier:     pkg.Maintainer,
   356  		}
   357  	}
   358  
   359  	var checksum []spdx.Checksum
   360  	if pkg.Digest != "" && class == types.ClassOSPkg {
   361  		checksum = digestToSpdxFileChecksum(pkg.Digest)
   362  	}
   363  
   364  	return spdx.Package{
   365  		PackageName:             pkg.Name,
   366  		PackageVersion:          utils.FormatVersion(pkg),
   367  		PackageSPDXIdentifier:   elementID(ElementPackage, pkgID),
   368  		PackageDownloadLocation: pkgDownloadLocation,
   369  		PackageSourceInfo:       pkgSrcInfo,
   370  
   371  		// The Declared License is what the authors of a project believe govern the package
   372  		PackageLicenseConcluded: license,
   373  
   374  		// The Concluded License field is the license the SPDX file creator believes governs the package
   375  		PackageLicenseDeclared: license,
   376  
   377  		PackageExternalReferences: pkgExtRefs,
   378  		PackageAttributionTexts:   attrTexts,
   379  		PrimaryPackagePurpose:     PackagePurposeLibrary,
   380  		PackageSupplier:           supplier,
   381  		PackageChecksums:          checksum,
   382  	}, nil
   383  }
   384  
   385  func (m *Marshaler) pkgFiles(pkg ftypes.Package) ([]*spdx.File, error) {
   386  	if pkg.FilePath == "" {
   387  		return nil, nil
   388  	}
   389  
   390  	file, err := m.parseFile(pkg.FilePath, pkg.Digest)
   391  	if err != nil {
   392  		return nil, xerrors.Errorf("failed to parse file: %w", err)
   393  	}
   394  	return []*spdx.File{
   395  		&file,
   396  	}, nil
   397  }
   398  
   399  func elementID(elementType, pkgID string) spdx.ElementID {
   400  	return spdx.ElementID(fmt.Sprintf("%s-%s", elementType, pkgID))
   401  }
   402  
   403  func relationShip(refA, refB spdx.ElementID, operator string) *spdx.Relationship {
   404  	ref := spdx.Relationship{
   405  		RefA:         common.MakeDocElementID("", string(refA)),
   406  		RefB:         common.MakeDocElementID("", string(refB)),
   407  		Relationship: operator,
   408  	}
   409  	return &ref
   410  }
   411  
   412  func appendAttributionText(attributionTexts []string, key, value string) []string {
   413  	if value == "" {
   414  		return attributionTexts
   415  	}
   416  	return append(attributionTexts, attributionText(key, value))
   417  }
   418  
   419  func attributionText(key, value string) string {
   420  	return fmt.Sprintf("%s: %s", key, value)
   421  }
   422  
   423  func purlExternalReference(packageURL string) *spdx.PackageExternalReference {
   424  	return &spdx.PackageExternalReference{
   425  		Category: CategoryPackageManager,
   426  		RefType:  RefTypePurl,
   427  		Locator:  packageURL,
   428  	}
   429  }
   430  
   431  func GetLicense(p ftypes.Package) string {
   432  	if len(p.Licenses) == 0 {
   433  		return noneField
   434  	}
   435  
   436  	license := strings.Join(lo.Map(p.Licenses, func(license string, index int) string {
   437  		// e.g. GPL-3.0-with-autoconf-exception
   438  		license = strings.ReplaceAll(license, "-with-", " WITH ")
   439  		license = strings.ReplaceAll(license, "-WITH-", " WITH ")
   440  
   441  		return fmt.Sprintf("(%s)", license)
   442  	}), " AND ")
   443  	s, err := expression.Normalize(license, licensing.Normalize, expression.NormalizeForSPDX)
   444  	if err != nil {
   445  		// Not fail on the invalid license
   446  		log.Logger.Warnf("Unable to marshal SPDX licenses %q", license)
   447  		return ""
   448  	}
   449  	return s
   450  }
   451  
   452  func getDocumentNamespace(r types.Report, m *Marshaler) string {
   453  	return fmt.Sprintf("%s/%s/%s-%s",
   454  		DocumentNamespace,
   455  		string(r.ArtifactType),
   456  		strings.ReplaceAll(strings.ReplaceAll(r.ArtifactName, "https://", ""), "http://", ""), // remove http(s):// prefix when scanning repos
   457  		uuid.New().String(),
   458  	)
   459  }
   460  
   461  func calcPkgID(h Hash, v interface{}) (string, error) {
   462  	f, err := h(v, hashstructure.FormatV2, &hashstructure.HashOptions{
   463  		ZeroNil:      true,
   464  		SlicesAsSets: true,
   465  	})
   466  	if err != nil {
   467  		return "", xerrors.Errorf("could not build package ID for %+v: %w", v, err)
   468  	}
   469  
   470  	return fmt.Sprintf("%x", f), nil
   471  }
   472  
   473  func camelCase(inputUnderScoreStr string) (camelCase string) {
   474  	isToUpper := false
   475  	for k, v := range inputUnderScoreStr {
   476  		if k == 0 {
   477  			camelCase = strings.ToUpper(string(inputUnderScoreStr[0]))
   478  		} else {
   479  			if isToUpper {
   480  				camelCase += strings.ToUpper(string(v))
   481  				isToUpper = false
   482  			} else {
   483  				if v == '_' {
   484  					isToUpper = true
   485  				} else {
   486  					camelCase += string(v)
   487  				}
   488  			}
   489  		}
   490  	}
   491  	return
   492  }
   493  
   494  func getPackageDownloadLocation(t ftypes.ArtifactType, artifactName string) string {
   495  	location := noneField
   496  	// this field is used for git/mercurial/subversion/bazaar:
   497  	// https://spdx.github.io/spdx-spec/v2.2.2/package-information/#77-package-download-location-field
   498  	if t == ftypes.ArtifactRepository {
   499  		// Trivy currently only supports git repositories. Format examples:
   500  		// git+https://git.myproject.org/MyProject.git
   501  		// git+http://git.myproject.org/MyProject
   502  		location = fmt.Sprintf("git+%s", artifactName)
   503  	}
   504  	return location
   505  }
   506  
   507  func digestToSpdxFileChecksum(d digest.Digest) []common.Checksum {
   508  	if d == "" {
   509  		return nil
   510  	}
   511  
   512  	var alg spdx.ChecksumAlgorithm
   513  	switch d.Algorithm() {
   514  	case digest.SHA1:
   515  		alg = spdx.SHA1
   516  	case digest.SHA256:
   517  		alg = spdx.SHA256
   518  	case digest.MD5:
   519  		alg = spdx.MD5
   520  	default:
   521  		return nil
   522  	}
   523  
   524  	return []spdx.Checksum{
   525  		{
   526  			Algorithm: alg,
   527  			Value:     d.Encoded(),
   528  		},
   529  	}
   530  }