github.com/anchore/syft@v1.4.2-0.20240516191711-1bec1fc5d397/syft/format/common/spdxhelpers/to_format_model.go (about)

     1  //nolint:gosec // sha1 is used as a required hash function for SPDX, not a crypto function
     2  package spdxhelpers
     3  
     4  import (
     5  	"crypto/sha1"
     6  	"fmt"
     7  	"path"
     8  	"slices"
     9  	"sort"
    10  	"strings"
    11  	"time"
    12  
    13  	"github.com/distribution/reference"
    14  	"github.com/spdx/tools-golang/spdx"
    15  
    16  	"github.com/anchore/packageurl-go"
    17  	"github.com/anchore/syft/internal/log"
    18  	"github.com/anchore/syft/internal/mimetype"
    19  	"github.com/anchore/syft/internal/spdxlicense"
    20  	"github.com/anchore/syft/syft/artifact"
    21  	"github.com/anchore/syft/syft/file"
    22  	"github.com/anchore/syft/syft/format/internal/spdxutil/helpers"
    23  	"github.com/anchore/syft/syft/pkg"
    24  	"github.com/anchore/syft/syft/sbom"
    25  	"github.com/anchore/syft/syft/source"
    26  )
    27  
    28  const (
    29  	noAssertion = "NOASSERTION"
    30  
    31  	spdxPrimaryPurposeContainer = "CONTAINER"
    32  	spdxPrimaryPurposeFile      = "FILE"
    33  	spdxPrimaryPurposeOther     = "OTHER"
    34  
    35  	prefixImage     = "Image"
    36  	prefixDirectory = "Directory"
    37  	prefixFile      = "File"
    38  	prefixUnknown   = "Unknown"
    39  )
    40  
    41  // ToFormatModel creates and populates a new SPDX document struct that follows the SPDX 2.3
    42  // spec from the given SBOM model.
    43  //
    44  //nolint:funlen
    45  func ToFormatModel(s sbom.SBOM) *spdx.Document {
    46  	name, namespace := helpers.DocumentNameAndNamespace(s.Source, s.Descriptor)
    47  
    48  	packages := toPackages(s.Artifacts.Packages, s)
    49  
    50  	relationships := toRelationships(s.RelationshipsSorted())
    51  
    52  	// for valid SPDX we need a document describes relationship
    53  	describesID := spdx.ElementID("DOCUMENT")
    54  
    55  	rootPackage := toRootPackage(s.Source)
    56  	if rootPackage != nil {
    57  		describesID = rootPackage.PackageSPDXIdentifier
    58  
    59  		// add all relationships from the document root to all other packages
    60  		relationships = append(relationships, toRootRelationships(rootPackage, packages)...)
    61  
    62  		// append the root package
    63  		packages = append(packages, rootPackage)
    64  	}
    65  
    66  	// add a relationship for the package the document describes
    67  	documentDescribesRelationship := &spdx.Relationship{
    68  		RefA: spdx.DocElementID{
    69  			ElementRefID: "DOCUMENT",
    70  		},
    71  		Relationship: string(helpers.DescribesRelationship),
    72  		RefB: spdx.DocElementID{
    73  			ElementRefID: describesID,
    74  		},
    75  	}
    76  
    77  	// add the root document relationship
    78  	relationships = append(relationships, documentDescribesRelationship)
    79  
    80  	return &spdx.Document{
    81  		// 6.1: SPDX Version; should be in the format "SPDX-x.x"
    82  		// Cardinality: mandatory, one
    83  		SPDXVersion: spdx.Version,
    84  
    85  		// 6.2: Data License; should be "CC0-1.0"
    86  		// Cardinality: mandatory, one
    87  		DataLicense: spdx.DataLicense,
    88  
    89  		// 6.3: SPDX Identifier; should be "DOCUMENT" to represent mandatory identifier of SPDXRef-DOCUMENT
    90  		// Cardinality: mandatory, one
    91  		SPDXIdentifier: "DOCUMENT",
    92  
    93  		// 6.4: Document Name
    94  		// Cardinality: mandatory, one
    95  		DocumentName: name,
    96  
    97  		// 6.5: Document Namespace
    98  		// Cardinality: mandatory, one
    99  		// Purpose: Provide an SPDX document specific namespace as a unique absolute Uniform Resource
   100  		// Identifier (URI) as specified in RFC-3986, with the exception of the ‘#’ delimiter. The SPDX
   101  		// Document URI cannot contain a URI "part" (e.g. the "#" character), since the ‘#’ is used in SPDX
   102  		// element URIs (packages, files, snippets, etc) to separate the document namespace from the
   103  		// element’s SPDX identifier. Additionally, a scheme (e.g. “https:”) is required.
   104  
   105  		// The URI must be unique for the SPDX document including the specific version of the SPDX document.
   106  		// If the SPDX document is updated, thereby creating a new version, a new URI for the updated
   107  		// document must be used. There can only be one URI for an SPDX document and only one SPDX document
   108  		// for a given URI.
   109  
   110  		// Note that the URI does not have to be accessible. It is only intended to provide a unique ID.
   111  		// In many cases, the URI will point to a web accessible document, but this should not be assumed
   112  		// to be the case.
   113  
   114  		DocumentNamespace: namespace,
   115  
   116  		// 6.6: External Document References
   117  		// Cardinality: optional, one or many
   118  		ExternalDocumentReferences: nil,
   119  
   120  		// 6.11: Document Comment
   121  		// Cardinality: optional, one
   122  		DocumentComment: "",
   123  
   124  		CreationInfo: &spdx.CreationInfo{
   125  			// 6.7: License List Version
   126  			// Cardinality: optional, one
   127  			LicenseListVersion: spdxlicense.Version,
   128  
   129  			// 6.8: Creators: may have multiple keys for Person, Organization
   130  			//      and/or Tool
   131  			// Cardinality: mandatory, one or many
   132  			Creators: []spdx.Creator{
   133  				{
   134  					Creator:     "Anchore, Inc",
   135  					CreatorType: "Organization",
   136  				},
   137  				{
   138  					Creator:     s.Descriptor.Name + "-" + s.Descriptor.Version,
   139  					CreatorType: "Tool",
   140  				},
   141  			},
   142  
   143  			// 6.9: Created: data format YYYY-MM-DDThh:mm:ssZ
   144  			// Cardinality: mandatory, one
   145  			Created: time.Now().UTC().Format(time.RFC3339),
   146  
   147  			// 6.10: Creator Comment
   148  			// Cardinality: optional, one
   149  			CreatorComment: "",
   150  		},
   151  		Packages:      packages,
   152  		Files:         toFiles(s),
   153  		Relationships: relationships,
   154  		OtherLicenses: toOtherLicenses(s.Artifacts.Packages),
   155  	}
   156  }
   157  
   158  func toRootRelationships(rootPackage *spdx.Package, packages []*spdx.Package) (out []*spdx.Relationship) {
   159  	for _, p := range packages {
   160  		out = append(out, &spdx.Relationship{
   161  			RefA: spdx.DocElementID{
   162  				ElementRefID: rootPackage.PackageSPDXIdentifier,
   163  			},
   164  			Relationship: string(helpers.ContainsRelationship),
   165  			RefB: spdx.DocElementID{
   166  				ElementRefID: p.PackageSPDXIdentifier,
   167  			},
   168  		})
   169  	}
   170  	return
   171  }
   172  
   173  //nolint:funlen
   174  func toRootPackage(s source.Description) *spdx.Package {
   175  	var prefix string
   176  
   177  	name := s.Name
   178  	version := s.Version
   179  
   180  	var purl *packageurl.PackageURL
   181  	purpose := ""
   182  	var checksums []spdx.Checksum
   183  	switch m := s.Metadata.(type) {
   184  	case source.ImageMetadata:
   185  		prefix = prefixImage
   186  		purpose = spdxPrimaryPurposeContainer
   187  
   188  		qualifiers := packageurl.Qualifiers{
   189  			{
   190  				Key:   "arch",
   191  				Value: m.Architecture,
   192  			},
   193  		}
   194  
   195  		ref, _ := reference.Parse(m.UserInput)
   196  		if ref, ok := ref.(reference.NamedTagged); ok {
   197  			qualifiers = append(qualifiers, packageurl.Qualifier{
   198  				Key:   "tag",
   199  				Value: ref.Tag(),
   200  			})
   201  		}
   202  
   203  		c := toChecksum(m.ManifestDigest)
   204  		if c != nil {
   205  			checksums = append(checksums, *c)
   206  			purl = &packageurl.PackageURL{
   207  				Type:       "oci",
   208  				Name:       s.Name,
   209  				Version:    m.ManifestDigest,
   210  				Qualifiers: qualifiers,
   211  			}
   212  		}
   213  
   214  	case source.DirectoryMetadata:
   215  		prefix = prefixDirectory
   216  		purpose = spdxPrimaryPurposeFile
   217  
   218  	case source.FileMetadata:
   219  		prefix = prefixFile
   220  		purpose = spdxPrimaryPurposeFile
   221  
   222  		for _, d := range m.Digests {
   223  			checksums = append(checksums, spdx.Checksum{
   224  				Algorithm: toChecksumAlgorithm(d.Algorithm),
   225  				Value:     d.Value,
   226  			})
   227  		}
   228  	default:
   229  		prefix = prefixUnknown
   230  		purpose = spdxPrimaryPurposeOther
   231  
   232  		if name == "" {
   233  			name = s.ID
   234  		}
   235  	}
   236  
   237  	p := &spdx.Package{
   238  		PackageName:               name,
   239  		PackageSPDXIdentifier:     spdx.ElementID(helpers.SanitizeElementID(fmt.Sprintf("DocumentRoot-%s-%s", prefix, name))),
   240  		PackageVersion:            version,
   241  		PackageChecksums:          checksums,
   242  		PackageExternalReferences: nil,
   243  		PrimaryPackagePurpose:     purpose,
   244  		PackageSupplier: &spdx.Supplier{
   245  			Supplier: helpers.NOASSERTION,
   246  		},
   247  		PackageDownloadLocation: helpers.NOASSERTION,
   248  		PackageLicenseConcluded: helpers.NOASSERTION,
   249  		PackageLicenseDeclared:  helpers.NOASSERTION,
   250  	}
   251  
   252  	if purl != nil {
   253  		p.PackageExternalReferences = []*spdx.PackageExternalReference{
   254  			{
   255  				Category: string(helpers.PackageManagerReferenceCategory),
   256  				RefType:  string(helpers.PurlExternalRefType),
   257  				Locator:  purl.String(),
   258  			},
   259  		}
   260  	}
   261  
   262  	return p
   263  }
   264  
   265  func toSPDXID(identifiable artifact.Identifiable) spdx.ElementID {
   266  	maxLen := 40
   267  	id := ""
   268  	switch it := identifiable.(type) {
   269  	case pkg.Package:
   270  		switch {
   271  		case it.Type != "" && it.Name != "":
   272  			id = fmt.Sprintf("Package-%s-%s-%s", it.Type, it.Name, it.ID())
   273  		case it.Name != "":
   274  			id = fmt.Sprintf("Package-%s-%s", it.Name, it.ID())
   275  		case it.Type != "":
   276  			id = fmt.Sprintf("Package-%s-%s", it.Type, it.ID())
   277  		default:
   278  			id = fmt.Sprintf("Package-%s", it.ID())
   279  		}
   280  	case file.Coordinates:
   281  		p := ""
   282  		parts := strings.Split(it.RealPath, "/")
   283  		for i := len(parts); i > 0; i-- {
   284  			part := parts[i-1]
   285  			if len(part) == 0 {
   286  				continue
   287  			}
   288  			if i < len(parts) && len(p)+len(part)+3 > maxLen {
   289  				p = "..." + p
   290  				break
   291  			}
   292  			p = path.Join(part, p)
   293  		}
   294  		id = fmt.Sprintf("File-%s-%s", p, it.ID())
   295  	default:
   296  		id = string(identifiable.ID())
   297  	}
   298  	// NOTE: the spdx library prepend SPDXRef-, so we don't do it here
   299  	return spdx.ElementID(helpers.SanitizeElementID(id))
   300  }
   301  
   302  // packages populates all Package Information from the package Collection (see https://spdx.github.io/spdx-spec/3-package-information/)
   303  //
   304  //nolint:funlen
   305  func toPackages(catalog *pkg.Collection, sbom sbom.SBOM) (results []*spdx.Package) {
   306  	for _, p := range catalog.Sorted() {
   307  		// name should be guaranteed to be unique, but semantically useful and stable
   308  		id := toSPDXID(p)
   309  
   310  		// If the Concluded License is not the same as the Declared License, a written explanation should be provided
   311  		// in the Comments on License field (section 7.16). With respect to NOASSERTION, a written explanation in
   312  		// the Comments on License field (section 7.16) is preferred.
   313  		// extract these correctly to the spdx license format
   314  		concluded, declared := helpers.License(p)
   315  
   316  		// two ways to get filesAnalyzed == true:
   317  		// 1. syft has generated a sha1 digest for the package itself - usually in the java cataloger
   318  		// 2. syft has generated a sha1 digest for the package's contents
   319  		packageChecksums, filesAnalyzed := toPackageChecksums(p)
   320  
   321  		packageVerificationCode := newPackageVerificationCode(p, sbom)
   322  		if packageVerificationCode != nil {
   323  			filesAnalyzed = true
   324  		}
   325  
   326  		// invalid SPDX document state
   327  		if filesAnalyzed && packageVerificationCode == nil {
   328  			// this is an invalid document state
   329  			// we reset the filesAnalyzed flag to false to avoid
   330  			// cases where a package digest was generated but there was
   331  			// not enough metadata to generate a verification code regarding the files
   332  			filesAnalyzed = false
   333  		}
   334  
   335  		results = append(results, &spdx.Package{
   336  			// NOT PART OF SPEC
   337  			// flag: does this "package" contain files that were in fact "unpackaged",
   338  			// e.g. included directly in the Document without being in a Package?
   339  			IsUnpackaged: false,
   340  
   341  			// 7.1: Package Name
   342  			// Cardinality: mandatory, one
   343  			PackageName: p.Name,
   344  
   345  			// 7.2: Package SPDX Identifier: "SPDXRef-[idstring]"
   346  			// Cardinality: mandatory, one
   347  			PackageSPDXIdentifier: id,
   348  
   349  			// 7.3: Package Version
   350  			// Cardinality: optional, one
   351  			PackageVersion: p.Version,
   352  
   353  			// 7.4: Package File Name
   354  			// Cardinality: optional, one
   355  			PackageFileName: "",
   356  
   357  			// 7.5: Package Supplier: may have single result for either Person or Organization,
   358  			//                        or NOASSERTION
   359  			// Cardinality: optional, one
   360  
   361  			// 7.6: Package Originator: may have single result for either Person or Organization,
   362  			//                          or NOASSERTION
   363  			// Cardinality: optional, one
   364  			PackageSupplier: toPackageSupplier(p),
   365  
   366  			PackageOriginator: toPackageOriginator(p),
   367  
   368  			// 7.7: Package Download Location
   369  			// Cardinality: mandatory, one
   370  			// NONE if there is no download location whatsoever.
   371  			// NOASSERTION if:
   372  			//   (i) the SPDX file creator has attempted to but cannot reach a reasonable objective determination;
   373  			//   (ii) the SPDX file creator has made no attempt to determine this field; or
   374  			//   (iii) the SPDX file creator has intentionally provided no information (no meaning should be implied by doing so).
   375  			PackageDownloadLocation: helpers.DownloadLocation(p),
   376  
   377  			// 7.8: FilesAnalyzed
   378  			// Cardinality: optional, one; default value is "true" if omitted
   379  
   380  			// Purpose: Indicates whether the file content of this package has been available for or subjected to
   381  			// analysis when creating the SPDX document. If false, indicates packages that represent metadata or
   382  			// URI references to a project, product, artifact, distribution or a component. If false, the package
   383  			// must not contain any files.
   384  
   385  			// Intent: A package can refer to a project, product, artifact, distribution or a component that is
   386  			// external to the SPDX document.
   387  			FilesAnalyzed: filesAnalyzed,
   388  			// NOT PART OF SPEC: did FilesAnalyzed tag appear?
   389  			IsFilesAnalyzedTagPresent: true,
   390  
   391  			// 7.9: Package Verification Code
   392  			// Cardinality: optional, one if filesAnalyzed is true / omitted;
   393  			//              zero (must be omitted) if filesAnalyzed is false
   394  			PackageVerificationCode: packageVerificationCode,
   395  
   396  			// 7.10: Package Checksum: may have keys for SHA1, SHA256 and/or MD5
   397  			// Cardinality: optional, one or many
   398  
   399  			// 7.10.1 Purpose: Provide an independently reproducible mechanism that permits unique identification of
   400  			// a specific package that correlates to the data in this SPDX file. This identifier enables a recipient
   401  			// to determine if any file in the original package has been changed. If the SPDX file is to be included
   402  			// in a package, this value should not be calculated. The SHA-1 algorithm will be used to provide the
   403  			// checksum by default.
   404  			PackageChecksums: packageChecksums,
   405  
   406  			// 7.11: Package Home Page
   407  			// Cardinality: optional, one
   408  			PackageHomePage: helpers.Homepage(p),
   409  
   410  			// 7.12: Source Information
   411  			// Cardinality: optional, one
   412  			PackageSourceInfo: helpers.SourceInfo(p),
   413  
   414  			// 7.13: Concluded License: SPDX License Expression, "NONE" or "NOASSERTION"
   415  			// Cardinality: mandatory, one
   416  			// Purpose: Contain the license the SPDX file creator has concluded as governing the
   417  			// package or alternative values, if the governing license cannot be determined.
   418  			PackageLicenseConcluded: concluded,
   419  
   420  			// 7.14: All Licenses Info from Files: SPDX License Expression, "NONE" or "NOASSERTION"
   421  			// Cardinality: mandatory, one or many if filesAnalyzed is true / omitted;
   422  			//              zero (must be omitted) if filesAnalyzed is false
   423  			PackageLicenseInfoFromFiles: nil,
   424  
   425  			// 7.15: Declared License: SPDX License Expression, "NONE" or "NOASSERTION"
   426  			// Cardinality: mandatory, one
   427  			// Purpose: List the licenses that have been declared by the authors of the package.
   428  			// Any license information that does not originate from the package authors, e.g. license
   429  			// information from a third party repository, should not be included in this field.
   430  			PackageLicenseDeclared: declared,
   431  
   432  			// 7.16: Comments on License
   433  			// Cardinality: optional, one
   434  			PackageLicenseComments: "",
   435  
   436  			// 7.17: Copyright Text: copyright notice(s) text, "NONE" or "NOASSERTION"
   437  			// Cardinality: mandatory, one
   438  			// Purpose: IdentifyFormat the copyright holders of the package, as well as any dates present. This will be a free form text field extracted from package information files. The options to populate this field are limited to:
   439  			//
   440  			// Any text related to a copyright notice, even if not complete;
   441  			// NONE if the package contains no copyright information whatsoever; or
   442  			// NOASSERTION, if
   443  			//   (i) the SPDX document creator has made no attempt to determine this field; or
   444  			//   (ii) the SPDX document creator has intentionally provided no information (no meaning should be implied by doing so).
   445  			//
   446  			PackageCopyrightText: noAssertion,
   447  
   448  			// 7.18: Package Summary Description
   449  			// Cardinality: optional, one
   450  			PackageSummary: "",
   451  
   452  			// 7.19: Package Detailed Description
   453  			// Cardinality: optional, one
   454  			PackageDescription: helpers.Description(p),
   455  
   456  			// 7.20: Package Comment
   457  			// Cardinality: optional, one
   458  			PackageComment: "",
   459  
   460  			// 7.21: Package External Reference
   461  			// Cardinality: optional, one or many
   462  			PackageExternalReferences: formatSPDXExternalRefs(p),
   463  
   464  			// 7.22: Package External Reference Comment
   465  			// Cardinality: conditional (optional, one) for each External Reference
   466  			// contained within PackageExternalReference2_1 struct, if present
   467  
   468  			// 7.23: Package Attribution Text
   469  			// Cardinality: optional, one or many
   470  			PackageAttributionTexts: nil,
   471  		})
   472  	}
   473  	return results
   474  }
   475  
   476  func toPackageChecksums(p pkg.Package) ([]spdx.Checksum, bool) {
   477  	filesAnalyzed := false
   478  	var checksums []spdx.Checksum
   479  	switch meta := p.Metadata.(type) {
   480  	// we generate digest for some Java packages
   481  	// spdx.github.io/spdx-spec/package-information/#710-package-checksum-field
   482  	case pkg.JavaArchive:
   483  		// if syft has generated the digest here then filesAnalyzed is true
   484  		if len(meta.ArchiveDigests) > 0 {
   485  			filesAnalyzed = true
   486  			for _, digest := range meta.ArchiveDigests {
   487  				algo := strings.ToUpper(digest.Algorithm)
   488  				checksums = append(checksums, spdx.Checksum{
   489  					Algorithm: spdx.ChecksumAlgorithm(algo),
   490  					Value:     digest.Value,
   491  				})
   492  			}
   493  		}
   494  	case pkg.GolangBinaryBuildinfoEntry:
   495  		// because the H1 digest is found in the Golang metadata we cannot claim that the files were analyzed
   496  		algo, hexStr, err := helpers.HDigestToSHA(meta.H1Digest)
   497  		if err != nil {
   498  			log.Debugf("invalid h1digest: %s: %v", meta.H1Digest, err)
   499  			break
   500  		}
   501  		algo = strings.ToUpper(algo)
   502  		checksums = append(checksums, spdx.Checksum{
   503  			Algorithm: spdx.ChecksumAlgorithm(algo),
   504  			Value:     hexStr,
   505  		})
   506  	}
   507  	return checksums, filesAnalyzed
   508  }
   509  
   510  func toPackageOriginator(p pkg.Package) *spdx.Originator {
   511  	kind, originator := helpers.Originator(p)
   512  	if kind == "" || originator == "" {
   513  		return nil
   514  	}
   515  	return &spdx.Originator{
   516  		Originator:     originator,
   517  		OriginatorType: kind,
   518  	}
   519  }
   520  
   521  func toPackageSupplier(p pkg.Package) *spdx.Supplier {
   522  	kind, supplier := helpers.Supplier(p)
   523  	if kind == "" || supplier == "" {
   524  		return &spdx.Supplier{
   525  			Supplier: helpers.NOASSERTION,
   526  		}
   527  	}
   528  	return &spdx.Supplier{
   529  		Supplier:     supplier,
   530  		SupplierType: kind,
   531  	}
   532  }
   533  
   534  func formatSPDXExternalRefs(p pkg.Package) (refs []*spdx.PackageExternalReference) {
   535  	for _, ref := range helpers.ExternalRefs(p) {
   536  		refs = append(refs, &spdx.PackageExternalReference{
   537  			Category:           string(ref.ReferenceCategory),
   538  			RefType:            string(ref.ReferenceType),
   539  			Locator:            ref.ReferenceLocator,
   540  			ExternalRefComment: ref.Comment,
   541  		})
   542  	}
   543  	return refs
   544  }
   545  
   546  func toRelationships(relationships []artifact.Relationship) (result []*spdx.Relationship) {
   547  	for _, r := range relationships {
   548  		exists, relationshipType, comment := lookupRelationship(r.Type)
   549  
   550  		if !exists {
   551  			log.Debugf("unable to convert relationship to SPDX, dropping: %+v", r)
   552  			continue
   553  		}
   554  
   555  		// FIXME: we are only currently including Package -> * relationships
   556  		if _, ok := r.From.(pkg.Package); !ok {
   557  			log.Debugf("skipping non-package relationship: %+v", r)
   558  			continue
   559  		}
   560  
   561  		result = append(result, &spdx.Relationship{
   562  			RefA: spdx.DocElementID{
   563  				ElementRefID: toSPDXID(r.From),
   564  			},
   565  			Relationship: string(relationshipType),
   566  			RefB: spdx.DocElementID{
   567  				ElementRefID: toSPDXID(r.To),
   568  			},
   569  			RelationshipComment: comment,
   570  		})
   571  	}
   572  	return result
   573  }
   574  
   575  func lookupRelationship(ty artifact.RelationshipType) (bool, helpers.RelationshipType, string) {
   576  	switch ty {
   577  	case artifact.ContainsRelationship:
   578  		return true, helpers.ContainsRelationship, ""
   579  	case artifact.DependencyOfRelationship:
   580  		return true, helpers.DependencyOfRelationship, ""
   581  	case artifact.OwnershipByFileOverlapRelationship:
   582  		return true, helpers.OtherRelationship, fmt.Sprintf("%s: indicates that the parent package claims ownership of a child package since the parent metadata indicates overlap with a location that a cataloger found the child package by", ty)
   583  	case artifact.EvidentByRelationship:
   584  		return true, helpers.OtherRelationship, fmt.Sprintf("%s: indicates the package's existence is evident by the given file", ty)
   585  	}
   586  	return false, "", ""
   587  }
   588  
   589  func toFiles(s sbom.SBOM) (results []*spdx.File) {
   590  	artifacts := s.Artifacts
   591  
   592  	for _, coordinates := range s.AllCoordinates() {
   593  		var metadata *file.Metadata
   594  		if metadataForLocation, exists := artifacts.FileMetadata[coordinates]; exists {
   595  			metadata = &metadataForLocation
   596  		}
   597  
   598  		var digests []file.Digest
   599  		if digestsForLocation, exists := artifacts.FileDigests[coordinates]; exists {
   600  			digests = digestsForLocation
   601  		}
   602  
   603  		// if we don't have any metadata or digests for this location
   604  		// then the file is most likely a symlink or non-regular file
   605  		// for now we include a 0 sha1 digest as requested by the spdx spec
   606  		// TODO: update location code in core SBOM so that we can map complex links
   607  		// back to their real file digest location.
   608  		if len(digests) == 0 {
   609  			digests = append(digests, file.Digest{Algorithm: "sha1", Value: "0000000000000000000000000000000000000000"})
   610  		}
   611  
   612  		// TODO: add file classifications (?) and content as a snippet
   613  
   614  		var comment string
   615  		if coordinates.FileSystemID != "" {
   616  			comment = fmt.Sprintf("layerID: %s", coordinates.FileSystemID)
   617  		}
   618  
   619  		results = append(results, &spdx.File{
   620  			FileSPDXIdentifier: toSPDXID(coordinates),
   621  			FileComment:        comment,
   622  			// required, no attempt made to determine license information
   623  			LicenseConcluded: noAssertion,
   624  			Checksums:        toFileChecksums(digests),
   625  			FileName:         coordinates.RealPath,
   626  			FileTypes:        toFileTypes(metadata),
   627  			LicenseInfoInFiles: []string{ // required in SPDX 2.2
   628  				helpers.NOASSERTION,
   629  			},
   630  		})
   631  	}
   632  
   633  	// sort by real path then virtual path to ensure the result is stable across multiple runs
   634  	sort.SliceStable(results, func(i, j int) bool {
   635  		if results[i].FileName == results[j].FileName {
   636  			return results[i].FileSPDXIdentifier < results[j].FileSPDXIdentifier
   637  		}
   638  		return results[i].FileName < results[j].FileName
   639  	})
   640  	return results
   641  }
   642  
   643  func toFileChecksums(digests []file.Digest) (checksums []spdx.Checksum) {
   644  	checksums = make([]spdx.Checksum, 0, len(digests))
   645  	for _, digest := range digests {
   646  		checksums = append(checksums, spdx.Checksum{
   647  			Algorithm: toChecksumAlgorithm(digest.Algorithm),
   648  			Value:     digest.Value,
   649  		})
   650  	}
   651  	return checksums
   652  }
   653  
   654  // toChecksum takes a checksum in the format <algorithm>:<hash> and returns an spdx.Checksum or nil if the string is invalid
   655  func toChecksum(algorithmHash string) *spdx.Checksum {
   656  	parts := strings.Split(algorithmHash, ":")
   657  	if len(parts) < 2 {
   658  		return nil
   659  	}
   660  	return &spdx.Checksum{
   661  		Algorithm: toChecksumAlgorithm(parts[0]),
   662  		Value:     parts[1],
   663  	}
   664  }
   665  
   666  func toChecksumAlgorithm(algorithm string) spdx.ChecksumAlgorithm {
   667  	// this needs to be an uppercase version of our algorithm
   668  	return spdx.ChecksumAlgorithm(strings.ToUpper(algorithm))
   669  }
   670  
   671  func toFileTypes(metadata *file.Metadata) (ty []string) {
   672  	if metadata == nil {
   673  		return nil
   674  	}
   675  
   676  	mimeTypePrefix := strings.Split(metadata.MIMEType, "/")[0]
   677  	switch mimeTypePrefix {
   678  	case "image":
   679  		ty = append(ty, string(helpers.ImageFileType))
   680  	case "video":
   681  		ty = append(ty, string(helpers.VideoFileType))
   682  	case "application":
   683  		ty = append(ty, string(helpers.ApplicationFileType))
   684  	case "text":
   685  		ty = append(ty, string(helpers.TextFileType))
   686  	case "audio":
   687  		ty = append(ty, string(helpers.AudioFileType))
   688  	}
   689  
   690  	if mimetype.IsExecutable(metadata.MIMEType) {
   691  		ty = append(ty, string(helpers.BinaryFileType))
   692  	}
   693  
   694  	if mimetype.IsArchive(metadata.MIMEType) {
   695  		ty = append(ty, string(helpers.ArchiveFileType))
   696  	}
   697  
   698  	// TODO: add support for source, spdx, and documentation file types
   699  	if len(ty) == 0 {
   700  		ty = append(ty, string(helpers.OtherFileType))
   701  	}
   702  
   703  	return ty
   704  }
   705  
   706  // other licenses are for licenses from the pkg.Package that do not have an SPDXExpression
   707  // field. The spdxexpression field is only filled given a validated Value field.
   708  func toOtherLicenses(catalog *pkg.Collection) []*spdx.OtherLicense {
   709  	licenses := map[string]helpers.SPDXLicense{}
   710  
   711  	for p := range catalog.Enumerate() {
   712  		declaredLicenses, concludedLicenses := helpers.ParseLicenses(p.Licenses.ToSlice())
   713  		for _, l := range declaredLicenses {
   714  			if l.Value != "" {
   715  				licenses[l.ID] = l
   716  			}
   717  		}
   718  		for _, l := range concludedLicenses {
   719  			if l.Value != "" {
   720  				licenses[l.ID] = l
   721  			}
   722  		}
   723  	}
   724  
   725  	var result []*spdx.OtherLicense
   726  
   727  	var ids []string
   728  	for licenseID := range licenses {
   729  		ids = append(ids, licenseID)
   730  	}
   731  
   732  	slices.Sort(ids)
   733  	for _, id := range ids {
   734  		license := licenses[id]
   735  		result = append(result, &spdx.OtherLicense{
   736  			LicenseIdentifier: license.ID,
   737  			ExtractedText:     license.Value,
   738  		})
   739  	}
   740  	return result
   741  }
   742  
   743  // TODO: handle SPDX excludes file case
   744  // f file is an "excludes" file, skip it /* exclude SPDX analysis file(s) */
   745  // see: https://spdx.github.io/spdx-spec/v2.3/package-information/#79-package-verification-code-field
   746  // the above link contains the SPDX algorithm for a package verification code
   747  func newPackageVerificationCode(p pkg.Package, sbom sbom.SBOM) *spdx.PackageVerificationCode {
   748  	// key off of the contains relationship;
   749  	// spdx validator will fail if a package claims to contain a file but no sha1 provided
   750  	// if a sha1 for a file is provided then the validator will fail if the package does not have
   751  	// a package verification code
   752  	coordinates := sbom.CoordinatesForPackage(p, artifact.ContainsRelationship)
   753  	var digests []file.Digest
   754  	for _, c := range coordinates {
   755  		digest := sbom.Artifacts.FileDigests[c]
   756  		if len(digest) == 0 {
   757  			continue
   758  		}
   759  
   760  		var d file.Digest
   761  		for _, digest := range digest {
   762  			if digest.Algorithm == "sha1" {
   763  				d = digest
   764  				break
   765  			}
   766  		}
   767  		digests = append(digests, d)
   768  	}
   769  
   770  	if len(digests) == 0 {
   771  		return nil
   772  	}
   773  
   774  	// sort templist in ascending order by SHA1 value
   775  	sort.SliceStable(digests, func(i, j int) bool {
   776  		return digests[i].Value < digests[j].Value
   777  	})
   778  
   779  	// filelist = templist with "/n"s removed. /* ordered sequence of SHA1 values with no separators
   780  	var b strings.Builder
   781  	for _, digest := range digests {
   782  		b.WriteString(digest.Value)
   783  	}
   784  
   785  	//nolint:gosec
   786  	hasher := sha1.New()
   787  	_, _ = hasher.Write([]byte(b.String()))
   788  	return &spdx.PackageVerificationCode{
   789  		// 7.9.1: Package Verification Code Value
   790  		// Cardinality: mandatory, one
   791  		Value: fmt.Sprintf("%+x", hasher.Sum(nil)),
   792  	}
   793  }