github.com/nextlinux/gosbom@v0.81.1-0.20230627115839-1ff50c281391/gosbom/formats/common/spdxhelpers/to_format_model.go (about) 1 //nolint:gosec // sha1 is used as a required hash function for SPDX, not a crypto function 2 package spdxhelpers 3 4 import ( 5 "crypto/sha1" 6 "fmt" 7 "path" 8 "sort" 9 "strings" 10 "time" 11 12 "github.com/nextlinux/gosbom/gosbom/artifact" 13 "github.com/nextlinux/gosbom/gosbom/file" 14 "github.com/nextlinux/gosbom/gosbom/formats/common/util" 15 "github.com/nextlinux/gosbom/gosbom/pkg" 16 "github.com/nextlinux/gosbom/gosbom/sbom" 17 "github.com/nextlinux/gosbom/internal" 18 "github.com/nextlinux/gosbom/internal/log" 19 "github.com/nextlinux/gosbom/internal/spdxlicense" 20 "github.com/spdx/tools-golang/spdx" 21 "golang.org/x/exp/maps" 22 "golang.org/x/exp/slices" 23 ) 24 25 const ( 26 noAssertion = "NOASSERTION" 27 ) 28 29 // ToFormatModel creates and populates a new SPDX document struct that follows the SPDX 2.3 30 // spec from the given SBOM model. 31 // 32 //nolint:funlen 33 func ToFormatModel(s sbom.SBOM) *spdx.Document { 34 name, namespace := DocumentNameAndNamespace(s.Source) 35 relationships := toRelationships(s.RelationshipsSorted()) 36 37 // for valid SPDX we need a document describes relationship 38 // TODO: remove this placeholder after deciding on correct behavior 39 // for the primary package purpose field: 40 // https://spdx.github.io/spdx-spec/v2.3/package-information/#724-primary-package-purpose-field 41 documentDescribesRelationship := &spdx.Relationship{ 42 RefA: spdx.DocElementID{ 43 ElementRefID: "DOCUMENT", 44 }, 45 Relationship: string(DescribesRelationship), 46 RefB: spdx.DocElementID{ 47 ElementRefID: "DOCUMENT", 48 }, 49 RelationshipComment: "", 50 } 51 52 relationships = append(relationships, documentDescribesRelationship) 53 54 return &spdx.Document{ 55 // 6.1: SPDX Version; should be in the format "SPDX-x.x" 56 // Cardinality: mandatory, one 57 SPDXVersion: spdx.Version, 58 59 // 6.2: Data License; should be "CC0-1.0" 60 // Cardinality: mandatory, one 61 DataLicense: spdx.DataLicense, 62 63 // 6.3: SPDX Identifier; should be "DOCUMENT" to represent mandatory identifier of SPDXRef-DOCUMENT 64 // Cardinality: mandatory, one 65 SPDXIdentifier: "DOCUMENT", 66 67 // 6.4: Document Name 68 // Cardinality: mandatory, one 69 DocumentName: name, 70 71 // 6.5: Document Namespace 72 // Cardinality: mandatory, one 73 // Purpose: Provide an SPDX document specific namespace as a unique absolute Uniform Resource 74 // Identifier (URI) as specified in RFC-3986, with the exception of the ‘#’ delimiter. The SPDX 75 // Document URI cannot contain a URI "part" (e.g. the "#" character), since the ‘#’ is used in SPDX 76 // element URIs (packages, files, snippets, etc) to separate the document namespace from the 77 // element’s SPDX identifier. Additionally, a scheme (e.g. “https:”) is required. 78 79 // The URI must be unique for the SPDX document including the specific version of the SPDX document. 80 // If the SPDX document is updated, thereby creating a new version, a new URI for the updated 81 // document must be used. There can only be one URI for an SPDX document and only one SPDX document 82 // for a given URI. 83 84 // Note that the URI does not have to be accessible. It is only intended to provide a unique ID. 85 // In many cases, the URI will point to a web accessible document, but this should not be assumed 86 // to be the case. 87 88 DocumentNamespace: namespace, 89 90 // 6.6: External Document References 91 // Cardinality: optional, one or many 92 ExternalDocumentReferences: nil, 93 94 // 6.11: Document Comment 95 // Cardinality: optional, one 96 DocumentComment: "", 97 98 CreationInfo: &spdx.CreationInfo{ 99 // 6.7: License List Version 100 // Cardinality: optional, one 101 LicenseListVersion: spdxlicense.Version, 102 103 // 6.8: Creators: may have multiple keys for Person, Organization 104 // and/or Tool 105 // Cardinality: mandatory, one or many 106 Creators: []spdx.Creator{ 107 { 108 Creator: "Nextlinux, Inc", 109 CreatorType: "Organization", 110 }, 111 { 112 Creator: internal.ApplicationName + "-" + s.Descriptor.Version, 113 CreatorType: "Tool", 114 }, 115 }, 116 117 // 6.9: Created: data format YYYY-MM-DDThh:mm:ssZ 118 // Cardinality: mandatory, one 119 Created: time.Now().UTC().Format(time.RFC3339), 120 121 // 6.10: Creator Comment 122 // Cardinality: optional, one 123 CreatorComment: "", 124 }, 125 Packages: toPackages(s.Artifacts.Packages, s), 126 Files: toFiles(s), 127 Relationships: relationships, 128 OtherLicenses: toOtherLicenses(s.Artifacts.Packages), 129 } 130 } 131 132 func toSPDXID(identifiable artifact.Identifiable) spdx.ElementID { 133 maxLen := 40 134 id := "" 135 switch it := identifiable.(type) { 136 case pkg.Package: 137 id = SanitizeElementID(fmt.Sprintf("Package-%s-%s-%s", it.Type, it.Name, it.ID())) 138 case file.Coordinates: 139 p := "" 140 parts := strings.Split(it.RealPath, "/") 141 for i := len(parts); i > 0; i-- { 142 part := parts[i-1] 143 if len(part) == 0 { 144 continue 145 } 146 if i < len(parts) && len(p)+len(part)+3 > maxLen { 147 p = "..." + p 148 break 149 } 150 p = path.Join(part, p) 151 } 152 id = SanitizeElementID(fmt.Sprintf("File-%s-%s", p, it.ID())) 153 default: 154 id = string(identifiable.ID()) 155 } 156 // NOTE: the spdx library prepend SPDXRef-, so we don't do it here 157 return spdx.ElementID(id) 158 } 159 160 // packages populates all Package Information from the package Collection (see https://spdx.github.io/spdx-spec/3-package-information/) 161 // 162 //nolint:funlen 163 func toPackages(catalog *pkg.Collection, sbom sbom.SBOM) (results []*spdx.Package) { 164 for _, p := range catalog.Sorted() { 165 // name should be guaranteed to be unique, but semantically useful and stable 166 id := toSPDXID(p) 167 168 // If the Concluded License is not the same as the Declared License, a written explanation should be provided 169 // in the Comments on License field (section 7.16). With respect to NOASSERTION, a written explanation in 170 // the Comments on License field (section 7.16) is preferred. 171 // extract these correctly to the spdx license format 172 concluded, declared := License(p) 173 174 // two ways to get filesAnalyzed == true: 175 // 1. gosbom has generated a sha1 digest for the package itself - usually in the java cataloger 176 // 2. gosbom has generated a sha1 digest for the package's contents 177 packageChecksums, filesAnalyzed := toPackageChecksums(p) 178 179 packageVerificationCode := newPackageVerificationCode(p, sbom) 180 if packageVerificationCode != nil { 181 filesAnalyzed = true 182 } 183 184 // invalid SPDX document state 185 if filesAnalyzed && packageVerificationCode == nil { 186 // this is an invalid document state 187 // we reset the filesAnalyzed flag to false to avoid 188 // cases where a package digest was generated but there was 189 // not enough metadata to generate a verification code regarding the files 190 filesAnalyzed = false 191 } 192 193 results = append(results, &spdx.Package{ 194 // NOT PART OF SPEC 195 // flag: does this "package" contain files that were in fact "unpackaged", 196 // e.g. included directly in the Document without being in a Package? 197 IsUnpackaged: false, 198 199 // 7.1: Package Name 200 // Cardinality: mandatory, one 201 PackageName: p.Name, 202 203 // 7.2: Package SPDX Identifier: "SPDXRef-[idstring]" 204 // Cardinality: mandatory, one 205 PackageSPDXIdentifier: id, 206 207 // 7.3: Package Version 208 // Cardinality: optional, one 209 PackageVersion: p.Version, 210 211 // 7.4: Package File Name 212 // Cardinality: optional, one 213 PackageFileName: "", 214 215 // 7.5: Package Supplier: may have single result for either Person or Organization, 216 // or NOASSERTION 217 // Cardinality: optional, one 218 219 // 7.6: Package Originator: may have single result for either Person or Organization, 220 // or NOASSERTION 221 // Cardinality: optional, one 222 PackageSupplier: nil, 223 224 PackageOriginator: toPackageOriginator(p), 225 226 // 7.7: Package Download Location 227 // Cardinality: mandatory, one 228 // NONE if there is no download location whatsoever. 229 // NOASSERTION if: 230 // (i) the SPDX file creator has attempted to but cannot reach a reasonable objective determination; 231 // (ii) the SPDX file creator has made no attempt to determine this field; or 232 // (iii) the SPDX file creator has intentionally provided no information (no meaning should be implied by doing so). 233 PackageDownloadLocation: DownloadLocation(p), 234 235 // 7.8: FilesAnalyzed 236 // Cardinality: optional, one; default value is "true" if omitted 237 238 // Purpose: Indicates whether the file content of this package has been available for or subjected to 239 // analysis when creating the SPDX document. If false, indicates packages that represent metadata or 240 // URI references to a project, product, artifact, distribution or a component. If false, the package 241 // must not contain any files. 242 243 // Intent: A package can refer to a project, product, artifact, distribution or a component that is 244 // external to the SPDX document. 245 FilesAnalyzed: filesAnalyzed, 246 // NOT PART OF SPEC: did FilesAnalyzed tag appear? 247 IsFilesAnalyzedTagPresent: true, 248 249 // 7.9: Package Verification Code 250 // Cardinality: optional, one if filesAnalyzed is true / omitted; 251 // zero (must be omitted) if filesAnalyzed is false 252 PackageVerificationCode: packageVerificationCode, 253 254 // 7.10: Package Checksum: may have keys for SHA1, SHA256 and/or MD5 255 // Cardinality: optional, one or many 256 257 // 7.10.1 Purpose: Provide an independently reproducible mechanism that permits unique identification of 258 // a specific package that correlates to the data in this SPDX file. This identifier enables a recipient 259 // to determine if any file in the original package has been changed. If the SPDX file is to be included 260 // in a package, this value should not be calculated. The SHA-1 algorithm will be used to provide the 261 // checksum by default. 262 PackageChecksums: packageChecksums, 263 264 // 7.11: Package Home Page 265 // Cardinality: optional, one 266 PackageHomePage: Homepage(p), 267 268 // 7.12: Source Information 269 // Cardinality: optional, one 270 PackageSourceInfo: SourceInfo(p), 271 272 // 7.13: Concluded License: SPDX License Expression, "NONE" or "NOASSERTION" 273 // Cardinality: mandatory, one 274 // Purpose: Contain the license the SPDX file creator has concluded as governing the 275 // package or alternative values, if the governing license cannot be determined. 276 PackageLicenseConcluded: concluded, 277 278 // 7.14: All Licenses Info from Files: SPDX License Expression, "NONE" or "NOASSERTION" 279 // Cardinality: mandatory, one or many if filesAnalyzed is true / omitted; 280 // zero (must be omitted) if filesAnalyzed is false 281 PackageLicenseInfoFromFiles: nil, 282 283 // 7.15: Declared License: SPDX License Expression, "NONE" or "NOASSERTION" 284 // Cardinality: mandatory, one 285 // Purpose: List the licenses that have been declared by the authors of the package. 286 // Any license information that does not originate from the package authors, e.g. license 287 // information from a third party repository, should not be included in this field. 288 PackageLicenseDeclared: declared, 289 290 // 7.16: Comments on License 291 // Cardinality: optional, one 292 PackageLicenseComments: "", 293 294 // 7.17: Copyright Text: copyright notice(s) text, "NONE" or "NOASSERTION" 295 // Cardinality: mandatory, one 296 // Purpose: IdentifyFormat the copyright holders of the package, as well as any dates present. This will be a free form text field extracted from package information files. The options to populate this field are limited to: 297 // 298 // Any text related to a copyright notice, even if not complete; 299 // NONE if the package contains no copyright information whatsoever; or 300 // NOASSERTION, if 301 // (i) the SPDX document creator has made no attempt to determine this field; or 302 // (ii) the SPDX document creator has intentionally provided no information (no meaning should be implied by doing so). 303 // 304 PackageCopyrightText: noAssertion, 305 306 // 7.18: Package Summary Description 307 // Cardinality: optional, one 308 PackageSummary: "", 309 310 // 7.19: Package Detailed Description 311 // Cardinality: optional, one 312 PackageDescription: Description(p), 313 314 // 7.20: Package Comment 315 // Cardinality: optional, one 316 PackageComment: "", 317 318 // 7.21: Package External Reference 319 // Cardinality: optional, one or many 320 PackageExternalReferences: formatSPDXExternalRefs(p), 321 322 // 7.22: Package External Reference Comment 323 // Cardinality: conditional (optional, one) for each External Reference 324 // contained within PackageExternalReference2_1 struct, if present 325 326 // 7.23: Package Attribution Text 327 // Cardinality: optional, one or many 328 PackageAttributionTexts: nil, 329 }) 330 } 331 return results 332 } 333 334 func toPackageChecksums(p pkg.Package) ([]spdx.Checksum, bool) { 335 filesAnalyzed := false 336 var checksums []spdx.Checksum 337 switch meta := p.Metadata.(type) { 338 // we generate digest for some Java packages 339 // spdx.github.io/spdx-spec/package-information/#710-package-checksum-field 340 case pkg.JavaMetadata: 341 // if gosbom has generated the digest here then filesAnalyzed is true 342 if len(meta.ArchiveDigests) > 0 { 343 filesAnalyzed = true 344 for _, digest := range meta.ArchiveDigests { 345 algo := strings.ToUpper(digest.Algorithm) 346 checksums = append(checksums, spdx.Checksum{ 347 Algorithm: spdx.ChecksumAlgorithm(algo), 348 Value: digest.Value, 349 }) 350 } 351 } 352 case pkg.GolangBinMetadata: 353 // because the H1 digest is found in the Golang metadata we cannot claim that the files were analyzed 354 algo, hexStr, err := util.HDigestToSHA(meta.H1Digest) 355 if err != nil { 356 log.Debugf("invalid h1digest: %s: %v", meta.H1Digest, err) 357 break 358 } 359 algo = strings.ToUpper(algo) 360 checksums = append(checksums, spdx.Checksum{ 361 Algorithm: spdx.ChecksumAlgorithm(algo), 362 Value: hexStr, 363 }) 364 } 365 return checksums, filesAnalyzed 366 } 367 368 func toPackageOriginator(p pkg.Package) *spdx.Originator { 369 kind, originator := Originator(p) 370 if kind == "" || originator == "" { 371 return nil 372 } 373 return &spdx.Originator{ 374 Originator: originator, 375 OriginatorType: kind, 376 } 377 } 378 379 func formatSPDXExternalRefs(p pkg.Package) (refs []*spdx.PackageExternalReference) { 380 for _, ref := range ExternalRefs(p) { 381 refs = append(refs, &spdx.PackageExternalReference{ 382 Category: string(ref.ReferenceCategory), 383 RefType: string(ref.ReferenceType), 384 Locator: ref.ReferenceLocator, 385 ExternalRefComment: ref.Comment, 386 }) 387 } 388 return refs 389 } 390 391 func toRelationships(relationships []artifact.Relationship) (result []*spdx.Relationship) { 392 for _, r := range relationships { 393 exists, relationshipType, comment := lookupRelationship(r.Type) 394 395 if !exists { 396 log.Debugf("unable to convert relationship to SPDX, dropping: %+v", r) 397 continue 398 } 399 400 // FIXME: we are only currently including Package -> * relationships 401 if _, ok := r.From.(pkg.Package); !ok { 402 log.Debugf("skipping non-package relationship: %+v", r) 403 continue 404 } 405 406 result = append(result, &spdx.Relationship{ 407 RefA: spdx.DocElementID{ 408 ElementRefID: toSPDXID(r.From), 409 }, 410 Relationship: string(relationshipType), 411 RefB: spdx.DocElementID{ 412 ElementRefID: toSPDXID(r.To), 413 }, 414 RelationshipComment: comment, 415 }) 416 } 417 return result 418 } 419 420 func lookupRelationship(ty artifact.RelationshipType) (bool, RelationshipType, string) { 421 switch ty { 422 case artifact.ContainsRelationship: 423 return true, ContainsRelationship, "" 424 case artifact.DependencyOfRelationship: 425 return true, DependencyOfRelationship, "" 426 case artifact.OwnershipByFileOverlapRelationship: 427 return true, OtherRelationship, fmt.Sprintf("%s: indicates that the parent package claims ownership of a child package since the parent metadata indicates overlap with a location that a cataloger found the child package by", ty) 428 case artifact.EvidentByRelationship: 429 return true, OtherRelationship, fmt.Sprintf("%s: indicates the package's existence is evident by the given file", ty) 430 } 431 return false, "", "" 432 } 433 434 func toFiles(s sbom.SBOM) (results []*spdx.File) { 435 artifacts := s.Artifacts 436 437 for _, coordinates := range s.AllCoordinates() { 438 var metadata *file.Metadata 439 if metadataForLocation, exists := artifacts.FileMetadata[coordinates]; exists { 440 metadata = &metadataForLocation 441 } 442 443 var digests []file.Digest 444 if digestsForLocation, exists := artifacts.FileDigests[coordinates]; exists { 445 digests = digestsForLocation 446 } 447 448 // if we don't have any metadata or digests for this location 449 // then the file is most likely a symlink or non-regular file 450 // for now we include a 0 sha1 digest as requested by the spdx spec 451 // TODO: update location code in core SBOM so that we can map complex links 452 // back to their real file digest location. 453 if len(digests) == 0 { 454 digests = append(digests, file.Digest{Algorithm: "sha1", Value: "0000000000000000000000000000000000000000"}) 455 } 456 457 // TODO: add file classifications (?) and content as a snippet 458 459 var comment string 460 if coordinates.FileSystemID != "" { 461 comment = fmt.Sprintf("layerID: %s", coordinates.FileSystemID) 462 } 463 464 results = append(results, &spdx.File{ 465 FileSPDXIdentifier: toSPDXID(coordinates), 466 FileComment: comment, 467 // required, no attempt made to determine license information 468 LicenseConcluded: noAssertion, 469 Checksums: toFileChecksums(digests), 470 FileName: coordinates.RealPath, 471 FileTypes: toFileTypes(metadata), 472 }) 473 } 474 475 // sort by real path then virtual path to ensure the result is stable across multiple runs 476 sort.SliceStable(results, func(i, j int) bool { 477 if results[i].FileName == results[j].FileName { 478 return results[i].FileSPDXIdentifier < results[j].FileSPDXIdentifier 479 } 480 return results[i].FileName < results[j].FileName 481 }) 482 return results 483 } 484 485 func toFileChecksums(digests []file.Digest) (checksums []spdx.Checksum) { 486 checksums = make([]spdx.Checksum, 0, len(digests)) 487 for _, digest := range digests { 488 checksums = append(checksums, spdx.Checksum{ 489 Algorithm: toChecksumAlgorithm(digest.Algorithm), 490 Value: digest.Value, 491 }) 492 } 493 return checksums 494 } 495 496 func toChecksumAlgorithm(algorithm string) spdx.ChecksumAlgorithm { 497 // this needs to be an uppercase version of our algorithm 498 return spdx.ChecksumAlgorithm(strings.ToUpper(algorithm)) 499 } 500 501 func toFileTypes(metadata *file.Metadata) (ty []string) { 502 if metadata == nil { 503 return nil 504 } 505 506 mimeTypePrefix := strings.Split(metadata.MIMEType, "/")[0] 507 switch mimeTypePrefix { 508 case "image": 509 ty = append(ty, string(ImageFileType)) 510 case "video": 511 ty = append(ty, string(VideoFileType)) 512 case "application": 513 ty = append(ty, string(ApplicationFileType)) 514 case "text": 515 ty = append(ty, string(TextFileType)) 516 case "audio": 517 ty = append(ty, string(AudioFileType)) 518 } 519 520 if internal.IsExecutable(metadata.MIMEType) { 521 ty = append(ty, string(BinaryFileType)) 522 } 523 524 if internal.IsArchive(metadata.MIMEType) { 525 ty = append(ty, string(ArchiveFileType)) 526 } 527 528 // TODO: add support for source, spdx, and documentation file types 529 if len(ty) == 0 { 530 ty = append(ty, string(OtherFileType)) 531 } 532 533 return ty 534 } 535 536 // other licenses are for licenses from the pkg.Package that do not have an SPDXExpression 537 // field. The spdxexpression field is only filled given a validated Value field. 538 func toOtherLicenses(catalog *pkg.Collection) []*spdx.OtherLicense { 539 licenses := map[string]bool{} 540 for _, p := range catalog.Sorted() { 541 declaredLicenses, concludedLicenses := parseLicenses(p.Licenses.ToSlice()) 542 for _, license := range declaredLicenses { 543 if strings.HasPrefix(license, spdxlicense.LicenseRefPrefix) { 544 licenses[license] = true 545 } 546 } 547 for _, license := range concludedLicenses { 548 if strings.HasPrefix(license, spdxlicense.LicenseRefPrefix) { 549 licenses[license] = true 550 } 551 } 552 } 553 554 var result []*spdx.OtherLicense 555 556 sorted := maps.Keys(licenses) 557 slices.Sort(sorted) 558 for _, license := range sorted { 559 // separate the found value from the prefix 560 // this only contains licenses that are not found on the SPDX License List 561 name := strings.TrimPrefix(license, spdxlicense.LicenseRefPrefix) 562 result = append(result, &spdx.OtherLicense{ 563 LicenseIdentifier: SanitizeElementID(license), 564 ExtractedText: name, 565 }) 566 } 567 return result 568 } 569 570 // TODO: handle SPDX excludes file case 571 // f file is an "excludes" file, skip it /* exclude SPDX analysis file(s) */ 572 // see: https://spdx.github.io/spdx-spec/v2.3/package-information/#79-package-verification-code-field 573 // the above link contains the SPDX algorithm for a package verification code 574 func newPackageVerificationCode(p pkg.Package, sbom sbom.SBOM) *spdx.PackageVerificationCode { 575 // key off of the contains relationship; 576 // spdx validator will fail if a package claims to contain a file but no sha1 provided 577 // if a sha1 for a file is provided then the validator will fail if the package does not have 578 // a package verification code 579 coordinates := sbom.CoordinatesForPackage(p, artifact.ContainsRelationship) 580 var digests []file.Digest 581 for _, c := range coordinates { 582 digest := sbom.Artifacts.FileDigests[c] 583 if len(digest) == 0 { 584 continue 585 } 586 587 var d file.Digest 588 for _, digest := range digest { 589 if digest.Algorithm == "sha1" { 590 d = digest 591 break 592 } 593 } 594 digests = append(digests, d) 595 } 596 597 if len(digests) == 0 { 598 return nil 599 } 600 601 // sort templist in ascending order by SHA1 value 602 sort.SliceStable(digests, func(i, j int) bool { 603 return digests[i].Value < digests[j].Value 604 }) 605 606 // filelist = templist with "/n"s removed. /* ordered sequence of SHA1 values with no separators 607 var b strings.Builder 608 for _, digest := range digests { 609 b.WriteString(digest.Value) 610 } 611 612 //nolint:gosec 613 hasher := sha1.New() 614 _, _ = hasher.Write([]byte(b.String())) 615 return &spdx.PackageVerificationCode{ 616 // 7.9.1: Package Verification Code Value 617 // Cardinality: mandatory, one 618 Value: fmt.Sprintf("%+x", hasher.Sum(nil)), 619 } 620 }