github.com/noqcks/syft@v0.0.0-20230920222752-a9e2c4e288e5/syft/formats/common/spdxhelpers/to_format_model.go (about)

     1  //nolint:gosec // sha1 is used as a required hash function for SPDX, not a crypto function
     2  package spdxhelpers
     3  
     4  import (
     5  	"crypto/sha1"
     6  	"fmt"
     7  	"path"
     8  	"sort"
     9  	"strings"
    10  	"time"
    11  
    12  	"github.com/docker/distribution/reference"
    13  	"github.com/spdx/tools-golang/spdx"
    14  	"golang.org/x/exp/maps"
    15  	"golang.org/x/exp/slices"
    16  
    17  	"github.com/anchore/packageurl-go"
    18  	"github.com/anchore/syft/internal"
    19  	"github.com/anchore/syft/internal/log"
    20  	"github.com/anchore/syft/internal/spdxlicense"
    21  	"github.com/anchore/syft/syft/artifact"
    22  	"github.com/anchore/syft/syft/file"
    23  	"github.com/anchore/syft/syft/formats/common/util"
    24  	"github.com/anchore/syft/syft/pkg"
    25  	"github.com/anchore/syft/syft/sbom"
    26  	"github.com/anchore/syft/syft/source"
    27  )
    28  
    29  const (
    30  	noAssertion = "NOASSERTION"
    31  
    32  	spdxPrimaryPurposeContainer = "CONTAINER"
    33  	spdxPrimaryPurposeFile      = "FILE"
    34  	spdxPrimaryPurposeOther     = "OTHER"
    35  
    36  	prefixImage     = "Image"
    37  	prefixDirectory = "Directory"
    38  	prefixFile      = "File"
    39  	prefixUnknown   = "Unknown"
    40  )
    41  
    42  // ToFormatModel creates and populates a new SPDX document struct that follows the SPDX 2.3
    43  // spec from the given SBOM model.
    44  //
    45  //nolint:funlen
    46  func ToFormatModel(s sbom.SBOM) *spdx.Document {
    47  	name, namespace := DocumentNameAndNamespace(s.Source, s.Descriptor)
    48  
    49  	packages := toPackages(s.Artifacts.Packages, s)
    50  
    51  	relationships := toRelationships(s.RelationshipsSorted())
    52  
    53  	// for valid SPDX we need a document describes relationship
    54  	describesID := spdx.ElementID("DOCUMENT")
    55  
    56  	rootPackage := toRootPackage(s.Source)
    57  	if rootPackage != nil {
    58  		describesID = rootPackage.PackageSPDXIdentifier
    59  
    60  		// add all relationships from the document root to all other packages
    61  		relationships = append(relationships, toRootRelationships(rootPackage, packages)...)
    62  
    63  		// append the root package
    64  		packages = append(packages, rootPackage)
    65  	}
    66  
    67  	// add a relationship for the package the document describes
    68  	documentDescribesRelationship := &spdx.Relationship{
    69  		RefA: spdx.DocElementID{
    70  			ElementRefID: "DOCUMENT",
    71  		},
    72  		Relationship: string(DescribesRelationship),
    73  		RefB: spdx.DocElementID{
    74  			ElementRefID: describesID,
    75  		},
    76  	}
    77  
    78  	// add the root document relationship
    79  	relationships = append(relationships, documentDescribesRelationship)
    80  
    81  	return &spdx.Document{
    82  		// 6.1: SPDX Version; should be in the format "SPDX-x.x"
    83  		// Cardinality: mandatory, one
    84  		SPDXVersion: spdx.Version,
    85  
    86  		// 6.2: Data License; should be "CC0-1.0"
    87  		// Cardinality: mandatory, one
    88  		DataLicense: spdx.DataLicense,
    89  
    90  		// 6.3: SPDX Identifier; should be "DOCUMENT" to represent mandatory identifier of SPDXRef-DOCUMENT
    91  		// Cardinality: mandatory, one
    92  		SPDXIdentifier: "DOCUMENT",
    93  
    94  		// 6.4: Document Name
    95  		// Cardinality: mandatory, one
    96  		DocumentName: name,
    97  
    98  		// 6.5: Document Namespace
    99  		// Cardinality: mandatory, one
   100  		// Purpose: Provide an SPDX document specific namespace as a unique absolute Uniform Resource
   101  		// Identifier (URI) as specified in RFC-3986, with the exception of the ‘#’ delimiter. The SPDX
   102  		// Document URI cannot contain a URI "part" (e.g. the "#" character), since the ‘#’ is used in SPDX
   103  		// element URIs (packages, files, snippets, etc) to separate the document namespace from the
   104  		// element’s SPDX identifier. Additionally, a scheme (e.g. “https:”) is required.
   105  
   106  		// The URI must be unique for the SPDX document including the specific version of the SPDX document.
   107  		// If the SPDX document is updated, thereby creating a new version, a new URI for the updated
   108  		// document must be used. There can only be one URI for an SPDX document and only one SPDX document
   109  		// for a given URI.
   110  
   111  		// Note that the URI does not have to be accessible. It is only intended to provide a unique ID.
   112  		// In many cases, the URI will point to a web accessible document, but this should not be assumed
   113  		// to be the case.
   114  
   115  		DocumentNamespace: namespace,
   116  
   117  		// 6.6: External Document References
   118  		// Cardinality: optional, one or many
   119  		ExternalDocumentReferences: nil,
   120  
   121  		// 6.11: Document Comment
   122  		// Cardinality: optional, one
   123  		DocumentComment: "",
   124  
   125  		CreationInfo: &spdx.CreationInfo{
   126  			// 6.7: License List Version
   127  			// Cardinality: optional, one
   128  			LicenseListVersion: spdxlicense.Version,
   129  
   130  			// 6.8: Creators: may have multiple keys for Person, Organization
   131  			//      and/or Tool
   132  			// Cardinality: mandatory, one or many
   133  			Creators: []spdx.Creator{
   134  				{
   135  					Creator:     "Anchore, Inc",
   136  					CreatorType: "Organization",
   137  				},
   138  				{
   139  					Creator:     s.Descriptor.Name + "-" + s.Descriptor.Version,
   140  					CreatorType: "Tool",
   141  				},
   142  			},
   143  
   144  			// 6.9: Created: data format YYYY-MM-DDThh:mm:ssZ
   145  			// Cardinality: mandatory, one
   146  			Created: time.Now().UTC().Format(time.RFC3339),
   147  
   148  			// 6.10: Creator Comment
   149  			// Cardinality: optional, one
   150  			CreatorComment: "",
   151  		},
   152  		Packages:      packages,
   153  		Files:         toFiles(s),
   154  		Relationships: relationships,
   155  		OtherLicenses: toOtherLicenses(s.Artifacts.Packages),
   156  	}
   157  }
   158  
   159  func toRootRelationships(rootPackage *spdx.Package, packages []*spdx.Package) (out []*spdx.Relationship) {
   160  	for _, p := range packages {
   161  		out = append(out, &spdx.Relationship{
   162  			RefA: spdx.DocElementID{
   163  				ElementRefID: rootPackage.PackageSPDXIdentifier,
   164  			},
   165  			Relationship: string(ContainsRelationship),
   166  			RefB: spdx.DocElementID{
   167  				ElementRefID: p.PackageSPDXIdentifier,
   168  			},
   169  		})
   170  	}
   171  	return
   172  }
   173  
   174  //nolint:funlen
   175  func toRootPackage(s source.Description) *spdx.Package {
   176  	var prefix string
   177  
   178  	name := s.Name
   179  	version := s.Version
   180  
   181  	var purl *packageurl.PackageURL
   182  	purpose := ""
   183  	var checksums []spdx.Checksum
   184  	switch m := s.Metadata.(type) {
   185  	case source.StereoscopeImageSourceMetadata:
   186  		prefix = prefixImage
   187  		purpose = spdxPrimaryPurposeContainer
   188  
   189  		qualifiers := packageurl.Qualifiers{
   190  			{
   191  				Key:   "arch",
   192  				Value: m.Architecture,
   193  			},
   194  		}
   195  
   196  		ref, _ := reference.Parse(m.UserInput)
   197  		if ref, ok := ref.(reference.NamedTagged); ok {
   198  			qualifiers = append(qualifiers, packageurl.Qualifier{
   199  				Key:   "tag",
   200  				Value: ref.Tag(),
   201  			})
   202  		}
   203  
   204  		c := toChecksum(m.ManifestDigest)
   205  		if c != nil {
   206  			checksums = append(checksums, *c)
   207  			purl = &packageurl.PackageURL{
   208  				Type:       "oci",
   209  				Name:       s.Name,
   210  				Version:    m.ManifestDigest,
   211  				Qualifiers: qualifiers,
   212  			}
   213  		}
   214  
   215  	case source.DirectorySourceMetadata:
   216  		prefix = prefixDirectory
   217  		purpose = spdxPrimaryPurposeFile
   218  
   219  	case source.FileSourceMetadata:
   220  		prefix = prefixFile
   221  		purpose = spdxPrimaryPurposeFile
   222  
   223  		for _, d := range m.Digests {
   224  			checksums = append(checksums, spdx.Checksum{
   225  				Algorithm: toChecksumAlgorithm(d.Algorithm),
   226  				Value:     d.Value,
   227  			})
   228  		}
   229  	default:
   230  		prefix = prefixUnknown
   231  		purpose = spdxPrimaryPurposeOther
   232  
   233  		if name == "" {
   234  			name = s.ID
   235  		}
   236  	}
   237  
   238  	p := &spdx.Package{
   239  		PackageName:               name,
   240  		PackageSPDXIdentifier:     spdx.ElementID(SanitizeElementID(fmt.Sprintf("DocumentRoot-%s-%s", prefix, name))),
   241  		PackageVersion:            version,
   242  		PackageChecksums:          checksums,
   243  		PackageExternalReferences: nil,
   244  		PrimaryPackagePurpose:     purpose,
   245  		PackageSupplier: &spdx.Supplier{
   246  			Supplier: NOASSERTION,
   247  		},
   248  		PackageDownloadLocation: NOASSERTION,
   249  	}
   250  
   251  	if purl != nil {
   252  		p.PackageExternalReferences = []*spdx.PackageExternalReference{
   253  			{
   254  				Category: string(PackageManagerReferenceCategory),
   255  				RefType:  string(PurlExternalRefType),
   256  				Locator:  purl.String(),
   257  			},
   258  		}
   259  	}
   260  
   261  	return p
   262  }
   263  
   264  func toSPDXID(identifiable artifact.Identifiable) spdx.ElementID {
   265  	maxLen := 40
   266  	id := ""
   267  	switch it := identifiable.(type) {
   268  	case pkg.Package:
   269  		switch {
   270  		case it.Type != "" && it.Name != "":
   271  			id = fmt.Sprintf("Package-%s-%s-%s", it.Type, it.Name, it.ID())
   272  		case it.Name != "":
   273  			id = fmt.Sprintf("Package-%s-%s", it.Name, it.ID())
   274  		case it.Type != "":
   275  			id = fmt.Sprintf("Package-%s-%s", it.Type, it.ID())
   276  		default:
   277  			id = fmt.Sprintf("Package-%s", it.ID())
   278  		}
   279  	case file.Coordinates:
   280  		p := ""
   281  		parts := strings.Split(it.RealPath, "/")
   282  		for i := len(parts); i > 0; i-- {
   283  			part := parts[i-1]
   284  			if len(part) == 0 {
   285  				continue
   286  			}
   287  			if i < len(parts) && len(p)+len(part)+3 > maxLen {
   288  				p = "..." + p
   289  				break
   290  			}
   291  			p = path.Join(part, p)
   292  		}
   293  		id = fmt.Sprintf("File-%s-%s", p, it.ID())
   294  	default:
   295  		id = string(identifiable.ID())
   296  	}
   297  	// NOTE: the spdx library prepend SPDXRef-, so we don't do it here
   298  	return spdx.ElementID(SanitizeElementID(id))
   299  }
   300  
   301  // packages populates all Package Information from the package Collection (see https://spdx.github.io/spdx-spec/3-package-information/)
   302  //
   303  //nolint:funlen
   304  func toPackages(catalog *pkg.Collection, sbom sbom.SBOM) (results []*spdx.Package) {
   305  	for _, p := range catalog.Sorted() {
   306  		// name should be guaranteed to be unique, but semantically useful and stable
   307  		id := toSPDXID(p)
   308  
   309  		// If the Concluded License is not the same as the Declared License, a written explanation should be provided
   310  		// in the Comments on License field (section 7.16). With respect to NOASSERTION, a written explanation in
   311  		// the Comments on License field (section 7.16) is preferred.
   312  		// extract these correctly to the spdx license format
   313  		concluded, declared := License(p)
   314  
   315  		// two ways to get filesAnalyzed == true:
   316  		// 1. syft has generated a sha1 digest for the package itself - usually in the java cataloger
   317  		// 2. syft has generated a sha1 digest for the package's contents
   318  		packageChecksums, filesAnalyzed := toPackageChecksums(p)
   319  
   320  		packageVerificationCode := newPackageVerificationCode(p, sbom)
   321  		if packageVerificationCode != nil {
   322  			filesAnalyzed = true
   323  		}
   324  
   325  		// invalid SPDX document state
   326  		if filesAnalyzed && packageVerificationCode == nil {
   327  			// this is an invalid document state
   328  			// we reset the filesAnalyzed flag to false to avoid
   329  			// cases where a package digest was generated but there was
   330  			// not enough metadata to generate a verification code regarding the files
   331  			filesAnalyzed = false
   332  		}
   333  
   334  		results = append(results, &spdx.Package{
   335  			// NOT PART OF SPEC
   336  			// flag: does this "package" contain files that were in fact "unpackaged",
   337  			// e.g. included directly in the Document without being in a Package?
   338  			IsUnpackaged: false,
   339  
   340  			// 7.1: Package Name
   341  			// Cardinality: mandatory, one
   342  			PackageName: p.Name,
   343  
   344  			// 7.2: Package SPDX Identifier: "SPDXRef-[idstring]"
   345  			// Cardinality: mandatory, one
   346  			PackageSPDXIdentifier: id,
   347  
   348  			// 7.3: Package Version
   349  			// Cardinality: optional, one
   350  			PackageVersion: p.Version,
   351  
   352  			// 7.4: Package File Name
   353  			// Cardinality: optional, one
   354  			PackageFileName: "",
   355  
   356  			// 7.5: Package Supplier: may have single result for either Person or Organization,
   357  			//                        or NOASSERTION
   358  			// Cardinality: optional, one
   359  
   360  			// 7.6: Package Originator: may have single result for either Person or Organization,
   361  			//                          or NOASSERTION
   362  			// Cardinality: optional, one
   363  			PackageSupplier: toPackageSupplier(p),
   364  
   365  			PackageOriginator: toPackageOriginator(p),
   366  
   367  			// 7.7: Package Download Location
   368  			// Cardinality: mandatory, one
   369  			// NONE if there is no download location whatsoever.
   370  			// NOASSERTION if:
   371  			//   (i) the SPDX file creator has attempted to but cannot reach a reasonable objective determination;
   372  			//   (ii) the SPDX file creator has made no attempt to determine this field; or
   373  			//   (iii) the SPDX file creator has intentionally provided no information (no meaning should be implied by doing so).
   374  			PackageDownloadLocation: DownloadLocation(p),
   375  
   376  			// 7.8: FilesAnalyzed
   377  			// Cardinality: optional, one; default value is "true" if omitted
   378  
   379  			// Purpose: Indicates whether the file content of this package has been available for or subjected to
   380  			// analysis when creating the SPDX document. If false, indicates packages that represent metadata or
   381  			// URI references to a project, product, artifact, distribution or a component. If false, the package
   382  			// must not contain any files.
   383  
   384  			// Intent: A package can refer to a project, product, artifact, distribution or a component that is
   385  			// external to the SPDX document.
   386  			FilesAnalyzed: filesAnalyzed,
   387  			// NOT PART OF SPEC: did FilesAnalyzed tag appear?
   388  			IsFilesAnalyzedTagPresent: true,
   389  
   390  			// 7.9: Package Verification Code
   391  			// Cardinality: optional, one if filesAnalyzed is true / omitted;
   392  			//              zero (must be omitted) if filesAnalyzed is false
   393  			PackageVerificationCode: packageVerificationCode,
   394  
   395  			// 7.10: Package Checksum: may have keys for SHA1, SHA256 and/or MD5
   396  			// Cardinality: optional, one or many
   397  
   398  			// 7.10.1 Purpose: Provide an independently reproducible mechanism that permits unique identification of
   399  			// a specific package that correlates to the data in this SPDX file. This identifier enables a recipient
   400  			// to determine if any file in the original package has been changed. If the SPDX file is to be included
   401  			// in a package, this value should not be calculated. The SHA-1 algorithm will be used to provide the
   402  			// checksum by default.
   403  			PackageChecksums: packageChecksums,
   404  
   405  			// 7.11: Package Home Page
   406  			// Cardinality: optional, one
   407  			PackageHomePage: Homepage(p),
   408  
   409  			// 7.12: Source Information
   410  			// Cardinality: optional, one
   411  			PackageSourceInfo: SourceInfo(p),
   412  
   413  			// 7.13: Concluded License: SPDX License Expression, "NONE" or "NOASSERTION"
   414  			// Cardinality: mandatory, one
   415  			// Purpose: Contain the license the SPDX file creator has concluded as governing the
   416  			// package or alternative values, if the governing license cannot be determined.
   417  			PackageLicenseConcluded: concluded,
   418  
   419  			// 7.14: All Licenses Info from Files: SPDX License Expression, "NONE" or "NOASSERTION"
   420  			// Cardinality: mandatory, one or many if filesAnalyzed is true / omitted;
   421  			//              zero (must be omitted) if filesAnalyzed is false
   422  			PackageLicenseInfoFromFiles: nil,
   423  
   424  			// 7.15: Declared License: SPDX License Expression, "NONE" or "NOASSERTION"
   425  			// Cardinality: mandatory, one
   426  			// Purpose: List the licenses that have been declared by the authors of the package.
   427  			// Any license information that does not originate from the package authors, e.g. license
   428  			// information from a third party repository, should not be included in this field.
   429  			PackageLicenseDeclared: declared,
   430  
   431  			// 7.16: Comments on License
   432  			// Cardinality: optional, one
   433  			PackageLicenseComments: "",
   434  
   435  			// 7.17: Copyright Text: copyright notice(s) text, "NONE" or "NOASSERTION"
   436  			// Cardinality: mandatory, one
   437  			// Purpose: IdentifyFormat the copyright holders of the package, as well as any dates present. This will be a free form text field extracted from package information files. The options to populate this field are limited to:
   438  			//
   439  			// Any text related to a copyright notice, even if not complete;
   440  			// NONE if the package contains no copyright information whatsoever; or
   441  			// NOASSERTION, if
   442  			//   (i) the SPDX document creator has made no attempt to determine this field; or
   443  			//   (ii) the SPDX document creator has intentionally provided no information (no meaning should be implied by doing so).
   444  			//
   445  			PackageCopyrightText: noAssertion,
   446  
   447  			// 7.18: Package Summary Description
   448  			// Cardinality: optional, one
   449  			PackageSummary: "",
   450  
   451  			// 7.19: Package Detailed Description
   452  			// Cardinality: optional, one
   453  			PackageDescription: Description(p),
   454  
   455  			// 7.20: Package Comment
   456  			// Cardinality: optional, one
   457  			PackageComment: "",
   458  
   459  			// 7.21: Package External Reference
   460  			// Cardinality: optional, one or many
   461  			PackageExternalReferences: formatSPDXExternalRefs(p),
   462  
   463  			// 7.22: Package External Reference Comment
   464  			// Cardinality: conditional (optional, one) for each External Reference
   465  			// contained within PackageExternalReference2_1 struct, if present
   466  
   467  			// 7.23: Package Attribution Text
   468  			// Cardinality: optional, one or many
   469  			PackageAttributionTexts: nil,
   470  		})
   471  	}
   472  	return results
   473  }
   474  
   475  func toPackageChecksums(p pkg.Package) ([]spdx.Checksum, bool) {
   476  	filesAnalyzed := false
   477  	var checksums []spdx.Checksum
   478  	switch meta := p.Metadata.(type) {
   479  	// we generate digest for some Java packages
   480  	// spdx.github.io/spdx-spec/package-information/#710-package-checksum-field
   481  	case pkg.JavaMetadata:
   482  		// if syft has generated the digest here then filesAnalyzed is true
   483  		if len(meta.ArchiveDigests) > 0 {
   484  			filesAnalyzed = true
   485  			for _, digest := range meta.ArchiveDigests {
   486  				algo := strings.ToUpper(digest.Algorithm)
   487  				checksums = append(checksums, spdx.Checksum{
   488  					Algorithm: spdx.ChecksumAlgorithm(algo),
   489  					Value:     digest.Value,
   490  				})
   491  			}
   492  		}
   493  	case pkg.GolangBinMetadata:
   494  		// because the H1 digest is found in the Golang metadata we cannot claim that the files were analyzed
   495  		algo, hexStr, err := util.HDigestToSHA(meta.H1Digest)
   496  		if err != nil {
   497  			log.Debugf("invalid h1digest: %s: %v", meta.H1Digest, err)
   498  			break
   499  		}
   500  		algo = strings.ToUpper(algo)
   501  		checksums = append(checksums, spdx.Checksum{
   502  			Algorithm: spdx.ChecksumAlgorithm(algo),
   503  			Value:     hexStr,
   504  		})
   505  	}
   506  	return checksums, filesAnalyzed
   507  }
   508  
   509  func toPackageOriginator(p pkg.Package) *spdx.Originator {
   510  	kind, originator := Originator(p)
   511  	if kind == "" || originator == "" {
   512  		return nil
   513  	}
   514  	return &spdx.Originator{
   515  		Originator:     originator,
   516  		OriginatorType: kind,
   517  	}
   518  }
   519  
   520  func toPackageSupplier(p pkg.Package) *spdx.Supplier {
   521  	// this uses the Originator function for now until
   522  	// a better distinction can be made for supplier
   523  	kind, supplier := Originator(p)
   524  	if kind == "" || supplier == "" {
   525  		return &spdx.Supplier{
   526  			Supplier: NOASSERTION,
   527  		}
   528  	}
   529  	return &spdx.Supplier{
   530  		Supplier:     supplier,
   531  		SupplierType: kind,
   532  	}
   533  }
   534  
   535  func formatSPDXExternalRefs(p pkg.Package) (refs []*spdx.PackageExternalReference) {
   536  	for _, ref := range ExternalRefs(p) {
   537  		refs = append(refs, &spdx.PackageExternalReference{
   538  			Category:           string(ref.ReferenceCategory),
   539  			RefType:            string(ref.ReferenceType),
   540  			Locator:            ref.ReferenceLocator,
   541  			ExternalRefComment: ref.Comment,
   542  		})
   543  	}
   544  	return refs
   545  }
   546  
   547  func toRelationships(relationships []artifact.Relationship) (result []*spdx.Relationship) {
   548  	for _, r := range relationships {
   549  		exists, relationshipType, comment := lookupRelationship(r.Type)
   550  
   551  		if !exists {
   552  			log.Debugf("unable to convert relationship to SPDX, dropping: %+v", r)
   553  			continue
   554  		}
   555  
   556  		// FIXME: we are only currently including Package -> * relationships
   557  		if _, ok := r.From.(pkg.Package); !ok {
   558  			log.Debugf("skipping non-package relationship: %+v", r)
   559  			continue
   560  		}
   561  
   562  		result = append(result, &spdx.Relationship{
   563  			RefA: spdx.DocElementID{
   564  				ElementRefID: toSPDXID(r.From),
   565  			},
   566  			Relationship: string(relationshipType),
   567  			RefB: spdx.DocElementID{
   568  				ElementRefID: toSPDXID(r.To),
   569  			},
   570  			RelationshipComment: comment,
   571  		})
   572  	}
   573  	return result
   574  }
   575  
   576  func lookupRelationship(ty artifact.RelationshipType) (bool, RelationshipType, string) {
   577  	switch ty {
   578  	case artifact.ContainsRelationship:
   579  		return true, ContainsRelationship, ""
   580  	case artifact.DependencyOfRelationship:
   581  		return true, DependencyOfRelationship, ""
   582  	case artifact.OwnershipByFileOverlapRelationship:
   583  		return true, OtherRelationship, fmt.Sprintf("%s: indicates that the parent package claims ownership of a child package since the parent metadata indicates overlap with a location that a cataloger found the child package by", ty)
   584  	case artifact.EvidentByRelationship:
   585  		return true, OtherRelationship, fmt.Sprintf("%s: indicates the package's existence is evident by the given file", ty)
   586  	}
   587  	return false, "", ""
   588  }
   589  
   590  func toFiles(s sbom.SBOM) (results []*spdx.File) {
   591  	artifacts := s.Artifacts
   592  
   593  	for _, coordinates := range s.AllCoordinates() {
   594  		var metadata *file.Metadata
   595  		if metadataForLocation, exists := artifacts.FileMetadata[coordinates]; exists {
   596  			metadata = &metadataForLocation
   597  		}
   598  
   599  		var digests []file.Digest
   600  		if digestsForLocation, exists := artifacts.FileDigests[coordinates]; exists {
   601  			digests = digestsForLocation
   602  		}
   603  
   604  		// if we don't have any metadata or digests for this location
   605  		// then the file is most likely a symlink or non-regular file
   606  		// for now we include a 0 sha1 digest as requested by the spdx spec
   607  		// TODO: update location code in core SBOM so that we can map complex links
   608  		// back to their real file digest location.
   609  		if len(digests) == 0 {
   610  			digests = append(digests, file.Digest{Algorithm: "sha1", Value: "0000000000000000000000000000000000000000"})
   611  		}
   612  
   613  		// TODO: add file classifications (?) and content as a snippet
   614  
   615  		var comment string
   616  		if coordinates.FileSystemID != "" {
   617  			comment = fmt.Sprintf("layerID: %s", coordinates.FileSystemID)
   618  		}
   619  
   620  		results = append(results, &spdx.File{
   621  			FileSPDXIdentifier: toSPDXID(coordinates),
   622  			FileComment:        comment,
   623  			// required, no attempt made to determine license information
   624  			LicenseConcluded: noAssertion,
   625  			Checksums:        toFileChecksums(digests),
   626  			FileName:         coordinates.RealPath,
   627  			FileTypes:        toFileTypes(metadata),
   628  		})
   629  	}
   630  
   631  	// sort by real path then virtual path to ensure the result is stable across multiple runs
   632  	sort.SliceStable(results, func(i, j int) bool {
   633  		if results[i].FileName == results[j].FileName {
   634  			return results[i].FileSPDXIdentifier < results[j].FileSPDXIdentifier
   635  		}
   636  		return results[i].FileName < results[j].FileName
   637  	})
   638  	return results
   639  }
   640  
   641  func toFileChecksums(digests []file.Digest) (checksums []spdx.Checksum) {
   642  	checksums = make([]spdx.Checksum, 0, len(digests))
   643  	for _, digest := range digests {
   644  		checksums = append(checksums, spdx.Checksum{
   645  			Algorithm: toChecksumAlgorithm(digest.Algorithm),
   646  			Value:     digest.Value,
   647  		})
   648  	}
   649  	return checksums
   650  }
   651  
   652  // toChecksum takes a checksum in the format <algorithm>:<hash> and returns an spdx.Checksum or nil if the string is invalid
   653  func toChecksum(algorithmHash string) *spdx.Checksum {
   654  	parts := strings.Split(algorithmHash, ":")
   655  	if len(parts) < 2 {
   656  		return nil
   657  	}
   658  	return &spdx.Checksum{
   659  		Algorithm: toChecksumAlgorithm(parts[0]),
   660  		Value:     parts[1],
   661  	}
   662  }
   663  
   664  func toChecksumAlgorithm(algorithm string) spdx.ChecksumAlgorithm {
   665  	// this needs to be an uppercase version of our algorithm
   666  	return spdx.ChecksumAlgorithm(strings.ToUpper(algorithm))
   667  }
   668  
   669  func toFileTypes(metadata *file.Metadata) (ty []string) {
   670  	if metadata == nil {
   671  		return nil
   672  	}
   673  
   674  	mimeTypePrefix := strings.Split(metadata.MIMEType, "/")[0]
   675  	switch mimeTypePrefix {
   676  	case "image":
   677  		ty = append(ty, string(ImageFileType))
   678  	case "video":
   679  		ty = append(ty, string(VideoFileType))
   680  	case "application":
   681  		ty = append(ty, string(ApplicationFileType))
   682  	case "text":
   683  		ty = append(ty, string(TextFileType))
   684  	case "audio":
   685  		ty = append(ty, string(AudioFileType))
   686  	}
   687  
   688  	if internal.IsExecutable(metadata.MIMEType) {
   689  		ty = append(ty, string(BinaryFileType))
   690  	}
   691  
   692  	if internal.IsArchive(metadata.MIMEType) {
   693  		ty = append(ty, string(ArchiveFileType))
   694  	}
   695  
   696  	// TODO: add support for source, spdx, and documentation file types
   697  	if len(ty) == 0 {
   698  		ty = append(ty, string(OtherFileType))
   699  	}
   700  
   701  	return ty
   702  }
   703  
   704  // other licenses are for licenses from the pkg.Package that do not have an SPDXExpression
   705  // field. The spdxexpression field is only filled given a validated Value field.
   706  func toOtherLicenses(catalog *pkg.Collection) []*spdx.OtherLicense {
   707  	licenses := map[string]spdxLicense{}
   708  
   709  	for p := range catalog.Enumerate() {
   710  		declaredLicenses, concludedLicenses := parseLicenses(p.Licenses.ToSlice())
   711  		for _, l := range declaredLicenses {
   712  			if l.value != "" {
   713  				licenses[l.id] = l
   714  			}
   715  		}
   716  		for _, l := range concludedLicenses {
   717  			if l.value != "" {
   718  				licenses[l.id] = l
   719  			}
   720  		}
   721  	}
   722  
   723  	var result []*spdx.OtherLicense
   724  
   725  	ids := maps.Keys(licenses)
   726  	slices.Sort(ids)
   727  	for _, id := range ids {
   728  		license := licenses[id]
   729  		result = append(result, &spdx.OtherLicense{
   730  			LicenseIdentifier: license.id,
   731  			ExtractedText:     license.value,
   732  		})
   733  	}
   734  	return result
   735  }
   736  
   737  // TODO: handle SPDX excludes file case
   738  // f file is an "excludes" file, skip it /* exclude SPDX analysis file(s) */
   739  // see: https://spdx.github.io/spdx-spec/v2.3/package-information/#79-package-verification-code-field
   740  // the above link contains the SPDX algorithm for a package verification code
   741  func newPackageVerificationCode(p pkg.Package, sbom sbom.SBOM) *spdx.PackageVerificationCode {
   742  	// key off of the contains relationship;
   743  	// spdx validator will fail if a package claims to contain a file but no sha1 provided
   744  	// if a sha1 for a file is provided then the validator will fail if the package does not have
   745  	// a package verification code
   746  	coordinates := sbom.CoordinatesForPackage(p, artifact.ContainsRelationship)
   747  	var digests []file.Digest
   748  	for _, c := range coordinates {
   749  		digest := sbom.Artifacts.FileDigests[c]
   750  		if len(digest) == 0 {
   751  			continue
   752  		}
   753  
   754  		var d file.Digest
   755  		for _, digest := range digest {
   756  			if digest.Algorithm == "sha1" {
   757  				d = digest
   758  				break
   759  			}
   760  		}
   761  		digests = append(digests, d)
   762  	}
   763  
   764  	if len(digests) == 0 {
   765  		return nil
   766  	}
   767  
   768  	// sort templist in ascending order by SHA1 value
   769  	sort.SliceStable(digests, func(i, j int) bool {
   770  		return digests[i].Value < digests[j].Value
   771  	})
   772  
   773  	// filelist = templist with "/n"s removed. /* ordered sequence of SHA1 values with no separators
   774  	var b strings.Builder
   775  	for _, digest := range digests {
   776  		b.WriteString(digest.Value)
   777  	}
   778  
   779  	//nolint:gosec
   780  	hasher := sha1.New()
   781  	_, _ = hasher.Write([]byte(b.String()))
   782  	return &spdx.PackageVerificationCode{
   783  		// 7.9.1: Package Verification Code Value
   784  		// Cardinality: mandatory, one
   785  		Value: fmt.Sprintf("%+x", hasher.Sum(nil)),
   786  	}
   787  }