github.com/lineaje-labs/syft@v0.98.1-0.20231227153149-9e393f60ff1b/syft/format/common/spdxhelpers/to_format_model.go (about)

     1  //nolint:gosec // sha1 is used as a required hash function for SPDX, not a crypto function
     2  package spdxhelpers
     3  
     4  import (
     5  	"crypto/sha1"
     6  	"fmt"
     7  	"path"
     8  	"slices"
     9  	"sort"
    10  	"strings"
    11  	"time"
    12  
    13  	"github.com/distribution/reference"
    14  	"github.com/spdx/tools-golang/spdx"
    15  
    16  	"github.com/anchore/packageurl-go"
    17  	"github.com/anchore/syft/syft/artifact"
    18  	"github.com/anchore/syft/syft/file"
    19  	"github.com/anchore/syft/syft/format/common/util"
    20  	"github.com/anchore/syft/syft/pkg"
    21  	"github.com/anchore/syft/syft/sbom"
    22  	"github.com/anchore/syft/syft/source"
    23  	"github.com/lineaje-labs/syft/internal"
    24  	"github.com/lineaje-labs/syft/internal/log"
    25  	"github.com/lineaje-labs/syft/internal/spdxlicense"
    26  )
    27  
    28  const (
    29  	noAssertion = "NOASSERTION"
    30  
    31  	spdxPrimaryPurposeContainer = "CONTAINER"
    32  	spdxPrimaryPurposeFile      = "FILE"
    33  	spdxPrimaryPurposeOther     = "OTHER"
    34  
    35  	prefixImage     = "Image"
    36  	prefixDirectory = "Directory"
    37  	prefixFile      = "File"
    38  	prefixUnknown   = "Unknown"
    39  )
    40  
    41  // ToFormatModel creates and populates a new SPDX document struct that follows the SPDX 2.3
    42  // spec from the given SBOM model.
    43  //
    44  //nolint:funlen
    45  func ToFormatModel(s sbom.SBOM) *spdx.Document {
    46  	name, namespace := DocumentNameAndNamespace(s.Source, s.Descriptor)
    47  
    48  	packages := toPackages(s.Artifacts.Packages, s)
    49  
    50  	relationships := toRelationships(s.RelationshipsSorted())
    51  
    52  	// for valid SPDX we need a document describes relationship
    53  	describesID := spdx.ElementID("DOCUMENT")
    54  
    55  	rootPackage := toRootPackage(s.Source)
    56  	if rootPackage != nil {
    57  		describesID = rootPackage.PackageSPDXIdentifier
    58  
    59  		// add all relationships from the document root to all other packages
    60  		relationships = append(relationships, toRootRelationships(rootPackage, packages)...)
    61  
    62  		// append the root package
    63  		packages = append(packages, rootPackage)
    64  	}
    65  
    66  	// add a relationship for the package the document describes
    67  	documentDescribesRelationship := &spdx.Relationship{
    68  		RefA: spdx.DocElementID{
    69  			ElementRefID: "DOCUMENT",
    70  		},
    71  		Relationship: string(DescribesRelationship),
    72  		RefB: spdx.DocElementID{
    73  			ElementRefID: describesID,
    74  		},
    75  	}
    76  
    77  	// add the root document relationship
    78  	relationships = append(relationships, documentDescribesRelationship)
    79  
    80  	return &spdx.Document{
    81  		// 6.1: SPDX Version; should be in the format "SPDX-x.x"
    82  		// Cardinality: mandatory, one
    83  		SPDXVersion: spdx.Version,
    84  
    85  		// 6.2: Data License; should be "CC0-1.0"
    86  		// Cardinality: mandatory, one
    87  		DataLicense: spdx.DataLicense,
    88  
    89  		// 6.3: SPDX Identifier; should be "DOCUMENT" to represent mandatory identifier of SPDXRef-DOCUMENT
    90  		// Cardinality: mandatory, one
    91  		SPDXIdentifier: "DOCUMENT",
    92  
    93  		// 6.4: Document Name
    94  		// Cardinality: mandatory, one
    95  		DocumentName: name,
    96  
    97  		// 6.5: Document Namespace
    98  		// Cardinality: mandatory, one
    99  		// Purpose: Provide an SPDX document specific namespace as a unique absolute Uniform Resource
   100  		// Identifier (URI) as specified in RFC-3986, with the exception of the ‘#’ delimiter. The SPDX
   101  		// Document URI cannot contain a URI "part" (e.g. the "#" character), since the ‘#’ is used in SPDX
   102  		// element URIs (packages, files, snippets, etc) to separate the document namespace from the
   103  		// element’s SPDX identifier. Additionally, a scheme (e.g. “https:”) is required.
   104  
   105  		// The URI must be unique for the SPDX document including the specific version of the SPDX document.
   106  		// If the SPDX document is updated, thereby creating a new version, a new URI for the updated
   107  		// document must be used. There can only be one URI for an SPDX document and only one SPDX document
   108  		// for a given URI.
   109  
   110  		// Note that the URI does not have to be accessible. It is only intended to provide a unique ID.
   111  		// In many cases, the URI will point to a web accessible document, but this should not be assumed
   112  		// to be the case.
   113  
   114  		DocumentNamespace: namespace,
   115  
   116  		// 6.6: External Document References
   117  		// Cardinality: optional, one or many
   118  		ExternalDocumentReferences: nil,
   119  
   120  		// 6.11: Document Comment
   121  		// Cardinality: optional, one
   122  		DocumentComment: "",
   123  
   124  		CreationInfo: &spdx.CreationInfo{
   125  			// 6.7: License List Version
   126  			// Cardinality: optional, one
   127  			LicenseListVersion: spdxlicense.Version,
   128  
   129  			// 6.8: Creators: may have multiple keys for Person, Organization
   130  			//      and/or Tool
   131  			// Cardinality: mandatory, one or many
   132  			Creators: []spdx.Creator{
   133  				{
   134  					Creator:     "Anchore, Inc",
   135  					CreatorType: "Organization",
   136  				},
   137  				{
   138  					Creator:     s.Descriptor.Name + "-" + s.Descriptor.Version,
   139  					CreatorType: "Tool",
   140  				},
   141  			},
   142  
   143  			// 6.9: Created: data format YYYY-MM-DDThh:mm:ssZ
   144  			// Cardinality: mandatory, one
   145  			Created: time.Now().UTC().Format(time.RFC3339),
   146  
   147  			// 6.10: Creator Comment
   148  			// Cardinality: optional, one
   149  			CreatorComment: "",
   150  		},
   151  		Packages:      packages,
   152  		Files:         toFiles(s),
   153  		Relationships: relationships,
   154  		OtherLicenses: toOtherLicenses(s.Artifacts.Packages),
   155  	}
   156  }
   157  
   158  func toRootRelationships(rootPackage *spdx.Package, packages []*spdx.Package) (out []*spdx.Relationship) {
   159  	for _, p := range packages {
   160  		out = append(out, &spdx.Relationship{
   161  			RefA: spdx.DocElementID{
   162  				ElementRefID: rootPackage.PackageSPDXIdentifier,
   163  			},
   164  			Relationship: string(ContainsRelationship),
   165  			RefB: spdx.DocElementID{
   166  				ElementRefID: p.PackageSPDXIdentifier,
   167  			},
   168  		})
   169  	}
   170  	return
   171  }
   172  
   173  //nolint:funlen
   174  func toRootPackage(s source.Description) *spdx.Package {
   175  	var prefix string
   176  
   177  	name := s.Name
   178  	version := s.Version
   179  
   180  	var purl *packageurl.PackageURL
   181  	purpose := ""
   182  	var checksums []spdx.Checksum
   183  	switch m := s.Metadata.(type) {
   184  	case source.StereoscopeImageSourceMetadata:
   185  		prefix = prefixImage
   186  		purpose = spdxPrimaryPurposeContainer
   187  
   188  		qualifiers := packageurl.Qualifiers{
   189  			{
   190  				Key:   "arch",
   191  				Value: m.Architecture,
   192  			},
   193  		}
   194  
   195  		ref, _ := reference.Parse(m.UserInput)
   196  		if ref, ok := ref.(reference.NamedTagged); ok {
   197  			qualifiers = append(qualifiers, packageurl.Qualifier{
   198  				Key:   "tag",
   199  				Value: ref.Tag(),
   200  			})
   201  		}
   202  
   203  		c := toChecksum(m.ManifestDigest)
   204  		if c != nil {
   205  			checksums = append(checksums, *c)
   206  			purl = &packageurl.PackageURL{
   207  				Type:       "oci",
   208  				Name:       s.Name,
   209  				Version:    m.ManifestDigest,
   210  				Qualifiers: qualifiers,
   211  			}
   212  		}
   213  
   214  	case source.DirectorySourceMetadata:
   215  		prefix = prefixDirectory
   216  		purpose = spdxPrimaryPurposeFile
   217  
   218  	case source.FileSourceMetadata:
   219  		prefix = prefixFile
   220  		purpose = spdxPrimaryPurposeFile
   221  
   222  		for _, d := range m.Digests {
   223  			checksums = append(checksums, spdx.Checksum{
   224  				Algorithm: toChecksumAlgorithm(d.Algorithm),
   225  				Value:     d.Value,
   226  			})
   227  		}
   228  	default:
   229  		prefix = prefixUnknown
   230  		purpose = spdxPrimaryPurposeOther
   231  
   232  		if name == "" {
   233  			name = s.ID
   234  		}
   235  	}
   236  
   237  	p := &spdx.Package{
   238  		PackageName:               name,
   239  		PackageSPDXIdentifier:     spdx.ElementID(SanitizeElementID(fmt.Sprintf("DocumentRoot-%s-%s", prefix, name))),
   240  		PackageVersion:            version,
   241  		PackageChecksums:          checksums,
   242  		PackageExternalReferences: nil,
   243  		PrimaryPackagePurpose:     purpose,
   244  		PackageSupplier: &spdx.Supplier{
   245  			Supplier: NOASSERTION,
   246  		},
   247  		PackageDownloadLocation: NOASSERTION,
   248  	}
   249  
   250  	if purl != nil {
   251  		p.PackageExternalReferences = []*spdx.PackageExternalReference{
   252  			{
   253  				Category: string(PackageManagerReferenceCategory),
   254  				RefType:  string(PurlExternalRefType),
   255  				Locator:  purl.String(),
   256  			},
   257  		}
   258  	}
   259  
   260  	return p
   261  }
   262  
   263  func toSPDXID(identifiable artifact.Identifiable) spdx.ElementID {
   264  	maxLen := 40
   265  	id := ""
   266  	switch it := identifiable.(type) {
   267  	case pkg.Package:
   268  		switch {
   269  		case it.Type != "" && it.Name != "":
   270  			id = fmt.Sprintf("Package-%s-%s-%s", it.Type, it.Name, it.ID())
   271  		case it.Name != "":
   272  			id = fmt.Sprintf("Package-%s-%s", it.Name, it.ID())
   273  		case it.Type != "":
   274  			id = fmt.Sprintf("Package-%s-%s", it.Type, it.ID())
   275  		default:
   276  			id = fmt.Sprintf("Package-%s", it.ID())
   277  		}
   278  	case file.Coordinates:
   279  		p := ""
   280  		parts := strings.Split(it.RealPath, "/")
   281  		for i := len(parts); i > 0; i-- {
   282  			part := parts[i-1]
   283  			if len(part) == 0 {
   284  				continue
   285  			}
   286  			if i < len(parts) && len(p)+len(part)+3 > maxLen {
   287  				p = "..." + p
   288  				break
   289  			}
   290  			p = path.Join(part, p)
   291  		}
   292  		id = fmt.Sprintf("File-%s-%s", p, it.ID())
   293  	default:
   294  		id = string(identifiable.ID())
   295  	}
   296  	// NOTE: the spdx library prepend SPDXRef-, so we don't do it here
   297  	return spdx.ElementID(SanitizeElementID(id))
   298  }
   299  
   300  // packages populates all Package Information from the package Collection (see https://spdx.github.io/spdx-spec/3-package-information/)
   301  //
   302  //nolint:funlen
   303  func toPackages(catalog *pkg.Collection, sbom sbom.SBOM) (results []*spdx.Package) {
   304  	for _, p := range catalog.Sorted() {
   305  		// name should be guaranteed to be unique, but semantically useful and stable
   306  		id := toSPDXID(p)
   307  
   308  		// If the Concluded License is not the same as the Declared License, a written explanation should be provided
   309  		// in the Comments on License field (section 7.16). With respect to NOASSERTION, a written explanation in
   310  		// the Comments on License field (section 7.16) is preferred.
   311  		// extract these correctly to the spdx license format
   312  		concluded, declared := License(p)
   313  
   314  		// two ways to get filesAnalyzed == true:
   315  		// 1. syft has generated a sha1 digest for the package itself - usually in the java cataloger
   316  		// 2. syft has generated a sha1 digest for the package's contents
   317  		packageChecksums, filesAnalyzed := toPackageChecksums(p)
   318  
   319  		packageVerificationCode := newPackageVerificationCode(p, sbom)
   320  		if packageVerificationCode != nil {
   321  			filesAnalyzed = true
   322  		}
   323  
   324  		// invalid SPDX document state
   325  		if filesAnalyzed && packageVerificationCode == nil {
   326  			// this is an invalid document state
   327  			// we reset the filesAnalyzed flag to false to avoid
   328  			// cases where a package digest was generated but there was
   329  			// not enough metadata to generate a verification code regarding the files
   330  			filesAnalyzed = false
   331  		}
   332  
   333  		results = append(results, &spdx.Package{
   334  			// NOT PART OF SPEC
   335  			// flag: does this "package" contain files that were in fact "unpackaged",
   336  			// e.g. included directly in the Document without being in a Package?
   337  			IsUnpackaged: false,
   338  
   339  			// 7.1: Package Name
   340  			// Cardinality: mandatory, one
   341  			PackageName: p.Name,
   342  
   343  			// 7.2: Package SPDX Identifier: "SPDXRef-[idstring]"
   344  			// Cardinality: mandatory, one
   345  			PackageSPDXIdentifier: id,
   346  
   347  			// 7.3: Package Version
   348  			// Cardinality: optional, one
   349  			PackageVersion: p.Version,
   350  
   351  			// 7.4: Package File Name
   352  			// Cardinality: optional, one
   353  			PackageFileName: "",
   354  
   355  			// 7.5: Package Supplier: may have single result for either Person or Organization,
   356  			//                        or NOASSERTION
   357  			// Cardinality: optional, one
   358  
   359  			// 7.6: Package Originator: may have single result for either Person or Organization,
   360  			//                          or NOASSERTION
   361  			// Cardinality: optional, one
   362  			PackageSupplier: toPackageSupplier(p),
   363  
   364  			PackageOriginator: toPackageOriginator(p),
   365  
   366  			// 7.7: Package Download Location
   367  			// Cardinality: mandatory, one
   368  			// NONE if there is no download location whatsoever.
   369  			// NOASSERTION if:
   370  			//   (i) the SPDX file creator has attempted to but cannot reach a reasonable objective determination;
   371  			//   (ii) the SPDX file creator has made no attempt to determine this field; or
   372  			//   (iii) the SPDX file creator has intentionally provided no information (no meaning should be implied by doing so).
   373  			PackageDownloadLocation: DownloadLocation(p),
   374  
   375  			// 7.8: FilesAnalyzed
   376  			// Cardinality: optional, one; default value is "true" if omitted
   377  
   378  			// Purpose: Indicates whether the file content of this package has been available for or subjected to
   379  			// analysis when creating the SPDX document. If false, indicates packages that represent metadata or
   380  			// URI references to a project, product, artifact, distribution or a component. If false, the package
   381  			// must not contain any files.
   382  
   383  			// Intent: A package can refer to a project, product, artifact, distribution or a component that is
   384  			// external to the SPDX document.
   385  			FilesAnalyzed: filesAnalyzed,
   386  			// NOT PART OF SPEC: did FilesAnalyzed tag appear?
   387  			IsFilesAnalyzedTagPresent: true,
   388  
   389  			// 7.9: Package Verification Code
   390  			// Cardinality: optional, one if filesAnalyzed is true / omitted;
   391  			//              zero (must be omitted) if filesAnalyzed is false
   392  			PackageVerificationCode: packageVerificationCode,
   393  
   394  			// 7.10: Package Checksum: may have keys for SHA1, SHA256 and/or MD5
   395  			// Cardinality: optional, one or many
   396  
   397  			// 7.10.1 Purpose: Provide an independently reproducible mechanism that permits unique identification of
   398  			// a specific package that correlates to the data in this SPDX file. This identifier enables a recipient
   399  			// to determine if any file in the original package has been changed. If the SPDX file is to be included
   400  			// in a package, this value should not be calculated. The SHA-1 algorithm will be used to provide the
   401  			// checksum by default.
   402  			PackageChecksums: packageChecksums,
   403  
   404  			// 7.11: Package Home Page
   405  			// Cardinality: optional, one
   406  			PackageHomePage: Homepage(p),
   407  
   408  			// 7.12: Source Information
   409  			// Cardinality: optional, one
   410  			PackageSourceInfo: SourceInfo(p),
   411  
   412  			// 7.13: Concluded License: SPDX License Expression, "NONE" or "NOASSERTION"
   413  			// Cardinality: mandatory, one
   414  			// Purpose: Contain the license the SPDX file creator has concluded as governing the
   415  			// package or alternative values, if the governing license cannot be determined.
   416  			PackageLicenseConcluded: concluded,
   417  
   418  			// 7.14: All Licenses Info from Files: SPDX License Expression, "NONE" or "NOASSERTION"
   419  			// Cardinality: mandatory, one or many if filesAnalyzed is true / omitted;
   420  			//              zero (must be omitted) if filesAnalyzed is false
   421  			PackageLicenseInfoFromFiles: nil,
   422  
   423  			// 7.15: Declared License: SPDX License Expression, "NONE" or "NOASSERTION"
   424  			// Cardinality: mandatory, one
   425  			// Purpose: List the licenses that have been declared by the authors of the package.
   426  			// Any license information that does not originate from the package authors, e.g. license
   427  			// information from a third party repository, should not be included in this field.
   428  			PackageLicenseDeclared: declared,
   429  
   430  			// 7.16: Comments on License
   431  			// Cardinality: optional, one
   432  			PackageLicenseComments: "",
   433  
   434  			// 7.17: Copyright Text: copyright notice(s) text, "NONE" or "NOASSERTION"
   435  			// Cardinality: mandatory, one
   436  			// Purpose: IdentifyFormat the copyright holders of the package, as well as any dates present. This will be a free form text field extracted from package information files. The options to populate this field are limited to:
   437  			//
   438  			// Any text related to a copyright notice, even if not complete;
   439  			// NONE if the package contains no copyright information whatsoever; or
   440  			// NOASSERTION, if
   441  			//   (i) the SPDX document creator has made no attempt to determine this field; or
   442  			//   (ii) the SPDX document creator has intentionally provided no information (no meaning should be implied by doing so).
   443  			//
   444  			PackageCopyrightText: noAssertion,
   445  
   446  			// 7.18: Package Summary Description
   447  			// Cardinality: optional, one
   448  			PackageSummary: "",
   449  
   450  			// 7.19: Package Detailed Description
   451  			// Cardinality: optional, one
   452  			PackageDescription: Description(p),
   453  
   454  			// 7.20: Package Comment
   455  			// Cardinality: optional, one
   456  			PackageComment: "",
   457  
   458  			// 7.21: Package External Reference
   459  			// Cardinality: optional, one or many
   460  			PackageExternalReferences: formatSPDXExternalRefs(p),
   461  
   462  			// 7.22: Package External Reference Comment
   463  			// Cardinality: conditional (optional, one) for each External Reference
   464  			// contained within PackageExternalReference2_1 struct, if present
   465  
   466  			// 7.23: Package Attribution Text
   467  			// Cardinality: optional, one or many
   468  			PackageAttributionTexts: nil,
   469  		})
   470  	}
   471  	return results
   472  }
   473  
   474  func toPackageChecksums(p pkg.Package) ([]spdx.Checksum, bool) {
   475  	filesAnalyzed := false
   476  	var checksums []spdx.Checksum
   477  	switch meta := p.Metadata.(type) {
   478  	// we generate digest for some Java packages
   479  	// spdx.github.io/spdx-spec/package-information/#710-package-checksum-field
   480  	case pkg.JavaArchive:
   481  		// if syft has generated the digest here then filesAnalyzed is true
   482  		if len(meta.ArchiveDigests) > 0 {
   483  			filesAnalyzed = true
   484  			for _, digest := range meta.ArchiveDigests {
   485  				algo := strings.ToUpper(digest.Algorithm)
   486  				checksums = append(checksums, spdx.Checksum{
   487  					Algorithm: spdx.ChecksumAlgorithm(algo),
   488  					Value:     digest.Value,
   489  				})
   490  			}
   491  		}
   492  	case pkg.GolangBinaryBuildinfoEntry:
   493  		// because the H1 digest is found in the Golang metadata we cannot claim that the files were analyzed
   494  		algo, hexStr, err := util.HDigestToSHA(meta.H1Digest)
   495  		if err != nil {
   496  			log.Debugf("invalid h1digest: %s: %v", meta.H1Digest, err)
   497  			break
   498  		}
   499  		algo = strings.ToUpper(algo)
   500  		checksums = append(checksums, spdx.Checksum{
   501  			Algorithm: spdx.ChecksumAlgorithm(algo),
   502  			Value:     hexStr,
   503  		})
   504  	}
   505  	return checksums, filesAnalyzed
   506  }
   507  
   508  func toPackageOriginator(p pkg.Package) *spdx.Originator {
   509  	kind, originator := Originator(p)
   510  	if kind == "" || originator == "" {
   511  		return nil
   512  	}
   513  	return &spdx.Originator{
   514  		Originator:     originator,
   515  		OriginatorType: kind,
   516  	}
   517  }
   518  
   519  func toPackageSupplier(p pkg.Package) *spdx.Supplier {
   520  	// this uses the Originator function for now until
   521  	// a better distinction can be made for supplier
   522  	kind, supplier := Originator(p)
   523  	if kind == "" || supplier == "" {
   524  		return &spdx.Supplier{
   525  			Supplier: NOASSERTION,
   526  		}
   527  	}
   528  	return &spdx.Supplier{
   529  		Supplier:     supplier,
   530  		SupplierType: kind,
   531  	}
   532  }
   533  
   534  func formatSPDXExternalRefs(p pkg.Package) (refs []*spdx.PackageExternalReference) {
   535  	for _, ref := range ExternalRefs(p) {
   536  		refs = append(refs, &spdx.PackageExternalReference{
   537  			Category:           string(ref.ReferenceCategory),
   538  			RefType:            string(ref.ReferenceType),
   539  			Locator:            ref.ReferenceLocator,
   540  			ExternalRefComment: ref.Comment,
   541  		})
   542  	}
   543  	return refs
   544  }
   545  
   546  func toRelationships(relationships []artifact.Relationship) (result []*spdx.Relationship) {
   547  	for _, r := range relationships {
   548  		exists, relationshipType, comment := lookupRelationship(r.Type)
   549  
   550  		if !exists {
   551  			log.Debugf("unable to convert relationship to SPDX, dropping: %+v", r)
   552  			continue
   553  		}
   554  
   555  		// FIXME: we are only currently including Package -> * relationships
   556  		if _, ok := r.From.(pkg.Package); !ok {
   557  			log.Debugf("skipping non-package relationship: %+v", r)
   558  			continue
   559  		}
   560  
   561  		result = append(result, &spdx.Relationship{
   562  			RefA: spdx.DocElementID{
   563  				ElementRefID: toSPDXID(r.From),
   564  			},
   565  			Relationship: string(relationshipType),
   566  			RefB: spdx.DocElementID{
   567  				ElementRefID: toSPDXID(r.To),
   568  			},
   569  			RelationshipComment: comment,
   570  		})
   571  	}
   572  	return result
   573  }
   574  
   575  func lookupRelationship(ty artifact.RelationshipType) (bool, RelationshipType, string) {
   576  	switch ty {
   577  	case artifact.ContainsRelationship:
   578  		return true, ContainsRelationship, ""
   579  	case artifact.DependencyOfRelationship:
   580  		return true, DependencyOfRelationship, ""
   581  	case artifact.OwnershipByFileOverlapRelationship:
   582  		return true, OtherRelationship, fmt.Sprintf("%s: indicates that the parent package claims ownership of a child package since the parent metadata indicates overlap with a location that a cataloger found the child package by", ty)
   583  	case artifact.EvidentByRelationship:
   584  		return true, OtherRelationship, fmt.Sprintf("%s: indicates the package's existence is evident by the given file", ty)
   585  	}
   586  	return false, "", ""
   587  }
   588  
   589  func toFiles(s sbom.SBOM) (results []*spdx.File) {
   590  	artifacts := s.Artifacts
   591  
   592  	for _, coordinates := range s.AllCoordinates() {
   593  		var metadata *file.Metadata
   594  		if metadataForLocation, exists := artifacts.FileMetadata[coordinates]; exists {
   595  			metadata = &metadataForLocation
   596  		}
   597  
   598  		var digests []file.Digest
   599  		if digestsForLocation, exists := artifacts.FileDigests[coordinates]; exists {
   600  			digests = digestsForLocation
   601  		}
   602  
   603  		// if we don't have any metadata or digests for this location
   604  		// then the file is most likely a symlink or non-regular file
   605  		// for now we include a 0 sha1 digest as requested by the spdx spec
   606  		// TODO: update location code in core SBOM so that we can map complex links
   607  		// back to their real file digest location.
   608  		if len(digests) == 0 {
   609  			digests = append(digests, file.Digest{Algorithm: "sha1", Value: "0000000000000000000000000000000000000000"})
   610  		}
   611  
   612  		// TODO: add file classifications (?) and content as a snippet
   613  
   614  		var comment string
   615  		if coordinates.FileSystemID != "" {
   616  			comment = fmt.Sprintf("layerID: %s", coordinates.FileSystemID)
   617  		}
   618  
   619  		results = append(results, &spdx.File{
   620  			FileSPDXIdentifier: toSPDXID(coordinates),
   621  			FileComment:        comment,
   622  			// required, no attempt made to determine license information
   623  			LicenseConcluded: noAssertion,
   624  			Checksums:        toFileChecksums(digests),
   625  			FileName:         coordinates.RealPath,
   626  			FileTypes:        toFileTypes(metadata),
   627  		})
   628  	}
   629  
   630  	// sort by real path then virtual path to ensure the result is stable across multiple runs
   631  	sort.SliceStable(results, func(i, j int) bool {
   632  		if results[i].FileName == results[j].FileName {
   633  			return results[i].FileSPDXIdentifier < results[j].FileSPDXIdentifier
   634  		}
   635  		return results[i].FileName < results[j].FileName
   636  	})
   637  	return results
   638  }
   639  
   640  func toFileChecksums(digests []file.Digest) (checksums []spdx.Checksum) {
   641  	checksums = make([]spdx.Checksum, 0, len(digests))
   642  	for _, digest := range digests {
   643  		checksums = append(checksums, spdx.Checksum{
   644  			Algorithm: toChecksumAlgorithm(digest.Algorithm),
   645  			Value:     digest.Value,
   646  		})
   647  	}
   648  	return checksums
   649  }
   650  
   651  // toChecksum takes a checksum in the format <algorithm>:<hash> and returns an spdx.Checksum or nil if the string is invalid
   652  func toChecksum(algorithmHash string) *spdx.Checksum {
   653  	parts := strings.Split(algorithmHash, ":")
   654  	if len(parts) < 2 {
   655  		return nil
   656  	}
   657  	return &spdx.Checksum{
   658  		Algorithm: toChecksumAlgorithm(parts[0]),
   659  		Value:     parts[1],
   660  	}
   661  }
   662  
   663  func toChecksumAlgorithm(algorithm string) spdx.ChecksumAlgorithm {
   664  	// this needs to be an uppercase version of our algorithm
   665  	return spdx.ChecksumAlgorithm(strings.ToUpper(algorithm))
   666  }
   667  
   668  func toFileTypes(metadata *file.Metadata) (ty []string) {
   669  	if metadata == nil {
   670  		return nil
   671  	}
   672  
   673  	mimeTypePrefix := strings.Split(metadata.MIMEType, "/")[0]
   674  	switch mimeTypePrefix {
   675  	case "image":
   676  		ty = append(ty, string(ImageFileType))
   677  	case "video":
   678  		ty = append(ty, string(VideoFileType))
   679  	case "application":
   680  		ty = append(ty, string(ApplicationFileType))
   681  	case "text":
   682  		ty = append(ty, string(TextFileType))
   683  	case "audio":
   684  		ty = append(ty, string(AudioFileType))
   685  	}
   686  
   687  	if internal.IsExecutable(metadata.MIMEType) {
   688  		ty = append(ty, string(BinaryFileType))
   689  	}
   690  
   691  	if internal.IsArchive(metadata.MIMEType) {
   692  		ty = append(ty, string(ArchiveFileType))
   693  	}
   694  
   695  	// TODO: add support for source, spdx, and documentation file types
   696  	if len(ty) == 0 {
   697  		ty = append(ty, string(OtherFileType))
   698  	}
   699  
   700  	return ty
   701  }
   702  
   703  // other licenses are for licenses from the pkg.Package that do not have an SPDXExpression
   704  // field. The spdxexpression field is only filled given a validated Value field.
   705  func toOtherLicenses(catalog *pkg.Collection) []*spdx.OtherLicense {
   706  	licenses := map[string]spdxLicense{}
   707  
   708  	for p := range catalog.Enumerate() {
   709  		declaredLicenses, concludedLicenses := parseLicenses(p.Licenses.ToSlice())
   710  		for _, l := range declaredLicenses {
   711  			if l.value != "" {
   712  				licenses[l.id] = l
   713  			}
   714  		}
   715  		for _, l := range concludedLicenses {
   716  			if l.value != "" {
   717  				licenses[l.id] = l
   718  			}
   719  		}
   720  	}
   721  
   722  	var result []*spdx.OtherLicense
   723  
   724  	var ids []string
   725  	for licenseID := range licenses {
   726  		ids = append(ids, licenseID)
   727  	}
   728  
   729  	slices.Sort(ids)
   730  	for _, id := range ids {
   731  		license := licenses[id]
   732  		result = append(result, &spdx.OtherLicense{
   733  			LicenseIdentifier: license.id,
   734  			ExtractedText:     license.value,
   735  		})
   736  	}
   737  	return result
   738  }
   739  
   740  // TODO: handle SPDX excludes file case
   741  // f file is an "excludes" file, skip it /* exclude SPDX analysis file(s) */
   742  // see: https://spdx.github.io/spdx-spec/v2.3/package-information/#79-package-verification-code-field
   743  // the above link contains the SPDX algorithm for a package verification code
   744  func newPackageVerificationCode(p pkg.Package, sbom sbom.SBOM) *spdx.PackageVerificationCode {
   745  	// key off of the contains relationship;
   746  	// spdx validator will fail if a package claims to contain a file but no sha1 provided
   747  	// if a sha1 for a file is provided then the validator will fail if the package does not have
   748  	// a package verification code
   749  	coordinates := sbom.CoordinatesForPackage(p, artifact.ContainsRelationship)
   750  	var digests []file.Digest
   751  	for _, c := range coordinates {
   752  		digest := sbom.Artifacts.FileDigests[c]
   753  		if len(digest) == 0 {
   754  			continue
   755  		}
   756  
   757  		var d file.Digest
   758  		for _, digest := range digest {
   759  			if digest.Algorithm == "sha1" {
   760  				d = digest
   761  				break
   762  			}
   763  		}
   764  		digests = append(digests, d)
   765  	}
   766  
   767  	if len(digests) == 0 {
   768  		return nil
   769  	}
   770  
   771  	// sort templist in ascending order by SHA1 value
   772  	sort.SliceStable(digests, func(i, j int) bool {
   773  		return digests[i].Value < digests[j].Value
   774  	})
   775  
   776  	// filelist = templist with "/n"s removed. /* ordered sequence of SHA1 values with no separators
   777  	var b strings.Builder
   778  	for _, digest := range digests {
   779  		b.WriteString(digest.Value)
   780  	}
   781  
   782  	//nolint:gosec
   783  	hasher := sha1.New()
   784  	_, _ = hasher.Write([]byte(b.String()))
   785  	return &spdx.PackageVerificationCode{
   786  		// 7.9.1: Package Verification Code Value
   787  		// Cardinality: mandatory, one
   788  		Value: fmt.Sprintf("%+x", hasher.Sum(nil)),
   789  	}
   790  }