github.com/anchore/syft@v1.38.2/syft/format/common/spdxhelpers/to_syft_model.go (about) 1 package spdxhelpers 2 3 import ( 4 "context" 5 "errors" 6 "fmt" 7 "net/url" 8 "path" 9 "regexp" 10 "strconv" 11 "strings" 12 13 "github.com/scylladb/go-set/strset" 14 "github.com/spdx/tools-golang/spdx" 15 "github.com/spdx/tools-golang/spdx/v2/common" 16 17 "github.com/anchore/packageurl-go" 18 "github.com/anchore/syft/internal/log" 19 "github.com/anchore/syft/internal/spdxlicense" 20 "github.com/anchore/syft/syft/artifact" 21 "github.com/anchore/syft/syft/cpe" 22 "github.com/anchore/syft/syft/file" 23 "github.com/anchore/syft/syft/format/internal" 24 "github.com/anchore/syft/syft/format/internal/spdxutil/helpers" 25 "github.com/anchore/syft/syft/license" 26 "github.com/anchore/syft/syft/linux" 27 "github.com/anchore/syft/syft/pkg" 28 "github.com/anchore/syft/syft/sbom" 29 "github.com/anchore/syft/syft/source" 30 ) 31 32 func ToSyftModel(doc *spdx.Document) (*sbom.SBOM, error) { 33 if doc == nil { 34 return nil, errors.New("cannot convert SPDX document to Syft model because document is nil") 35 } 36 37 spdxIDMap := make(map[string]any) 38 39 s := &sbom.SBOM{ 40 Source: extractSource(spdxIDMap, doc), 41 Artifacts: sbom.Artifacts{ 42 Packages: pkg.NewCollection(), 43 FileMetadata: map[file.Coordinates]file.Metadata{}, 44 FileDigests: map[file.Coordinates][]file.Digest{}, 45 LinuxDistribution: findLinuxReleaseByPURL(doc), 46 }, 47 } 48 49 collectSyftPackages(s, spdxIDMap, doc) 50 51 collectSyftFiles(s, spdxIDMap, doc) 52 53 s.Relationships = toSyftRelationships(spdxIDMap, doc) 54 55 return s, nil 56 } 57 58 func isDirectory(name string) bool { 59 if name == "." || name == ".." || strings.HasSuffix(name, "/") || !strings.Contains(path.Base(name), ".") { 60 return true 61 } 62 return false 63 } 64 65 func removePackage(packages []*spdx.Package, remove *spdx.Package) (pkgs []*spdx.Package) { 66 for _, p := range packages { 67 if p == remove { 68 continue 69 } 70 pkgs = append(pkgs, p) 71 } 72 return 73 } 74 75 func removeRelationships(relationships []*spdx.Relationship, spdxID spdx.ElementID) (relations []*spdx.Relationship) { 76 for _, r := range relationships { 77 if r.RefA.ElementRefID == spdxID || r.RefB.ElementRefID == spdxID { 78 continue 79 } 80 relations = append(relations, r) 81 } 82 return 83 } 84 85 func findRootPackages(doc *spdx.Document) (out []*spdx.Package) { 86 for _, p := range doc.Packages { 87 for _, r := range doc.Relationships { 88 describes := r.RefA.ElementRefID == "DOCUMENT" && 89 r.Relationship == spdx.RelationshipDescribes && 90 r.RefB.ElementRefID == p.PackageSPDXIdentifier 91 92 describedBy := r.RefB.ElementRefID == "DOCUMENT" && 93 r.Relationship == spdx.RelationshipDescribedBy && 94 r.RefA.ElementRefID == p.PackageSPDXIdentifier 95 96 if !describes && !describedBy { 97 continue 98 } 99 100 out = append(out, p) 101 } 102 } 103 return 104 } 105 106 func extractSource(spdxIDMap map[string]any, doc *spdx.Document) source.Description { 107 src := extractSourceFromNamespace(doc.DocumentNamespace) 108 109 rootPackages := findRootPackages(doc) 110 111 if len(rootPackages) != 1 { 112 return src 113 } 114 115 p := rootPackages[0] 116 117 switch p.PrimaryPackagePurpose { 118 case spdxPrimaryPurposeContainer: 119 src = containerSource(p) 120 case spdxPrimaryPurposeFile: 121 src = fileSource(p) 122 default: 123 return src 124 } 125 126 spdxIDMap[string(p.PackageSPDXIdentifier)] = src 127 128 doc.Packages = removePackage(doc.Packages, p) 129 doc.Relationships = removeRelationships(doc.Relationships, p.PackageSPDXIdentifier) 130 131 return src 132 } 133 134 func containerSource(p *spdx.Package) source.Description { 135 id := string(p.PackageSPDXIdentifier) 136 137 container := p.PackageName 138 v := p.PackageVersion 139 if v != "" { 140 container += ":" + v 141 } 142 143 digest := "" 144 if len(p.PackageChecksums) > 0 { 145 c := p.PackageChecksums[0] 146 digest = fmt.Sprintf("%s:%s", fromChecksumAlgorithm(c.Algorithm), c.Value) 147 } 148 149 supplier := "" 150 if p.PackageSupplier != nil { 151 // we also don't want NOASSERTION transferred to the syft format 152 // NOASSERTION == "" 153 if p.PackageSupplier.Supplier != helpers.NOASSERTION && p.PackageSupplier.SupplierType == helpers.SUPPLIERORG { 154 supplier = p.PackageSupplier.Supplier 155 } 156 } 157 158 return source.Description{ 159 ID: id, 160 Name: p.PackageName, 161 Version: p.PackageVersion, 162 Supplier: supplier, 163 Metadata: source.ImageMetadata{ 164 UserInput: container, 165 ID: id, 166 Layers: nil, // TODO handle formats with nested layer packages like Tern and K8s BOM tool 167 ManifestDigest: digest, 168 }, 169 } 170 } 171 172 func fileSource(p *spdx.Package) source.Description { 173 typeRegex := regexp.MustCompile("^DocumentRoot-([^-]+)-.*$") 174 typeName := typeRegex.ReplaceAllString(string(p.PackageSPDXIdentifier), "$1") 175 176 var version string 177 var metadata any 178 switch { 179 case typeName == prefixDirectory: 180 // is a Syft SBOM, explicitly a directory source 181 metadata, version = directorySourceMetadata(p) 182 case typeName == prefixFile: 183 // is a Syft SBOM, explicitly a file source 184 metadata, version = fileSourceMetadata(p) 185 case isDirectory(p.PackageName): 186 // is a non-Syft SBOM, which looks like a directory 187 metadata, version = directorySourceMetadata(p) 188 default: 189 // is a non-Syft SBOM, which is probably a file 190 metadata, version = fileSourceMetadata(p) 191 } 192 193 supplier := "" 194 if p.PackageSupplier.Supplier != helpers.NOASSERTION { 195 supplier = p.PackageSupplier.Supplier 196 } 197 198 return source.Description{ 199 ID: string(p.PackageSPDXIdentifier), 200 Name: p.PackageName, 201 Version: version, 202 Supplier: supplier, 203 Metadata: metadata, 204 } 205 } 206 207 func fileSourceMetadata(p *spdx.Package) (any, string) { 208 version := p.PackageVersion 209 210 m := source.FileMetadata{ 211 Path: p.PackageName, 212 } 213 // if this is a Syft SBOM, we might have output a digest as the version 214 checksum := toChecksum(p.PackageVersion) 215 for _, d := range p.PackageChecksums { 216 if checksum != nil && checksum.Value == d.Value { 217 version = "" 218 } 219 m.Digests = append(m.Digests, file.Digest{ 220 Algorithm: fromChecksumAlgorithm(d.Algorithm), 221 Value: d.Value, 222 }) 223 } 224 225 return m, version 226 } 227 228 func directorySourceMetadata(p *spdx.Package) (any, string) { 229 return source.DirectoryMetadata{ 230 Path: p.PackageName, 231 Base: "", 232 }, p.PackageVersion 233 } 234 235 // NOTE(jonas): SPDX doesn't inform what an SBOM is about, 236 // image, directory, for example. This is our best effort to determine 237 // the scheme. Syft-generated SBOMs have in the namespace 238 // field a type encoded, which we try to identify here. 239 func extractSourceFromNamespace(ns string) source.Description { 240 u, err := url.Parse(ns) 241 if err != nil { 242 return source.Description{ 243 Metadata: nil, 244 } 245 } 246 247 parts := strings.Split(u.Path, "/") 248 for _, p := range parts { 249 switch p { 250 case helpers.InputFile: 251 return source.Description{ 252 Metadata: source.FileMetadata{}, 253 } 254 case helpers.InputImage: 255 return source.Description{ 256 Metadata: source.ImageMetadata{}, 257 } 258 case helpers.InputDirectory: 259 return source.Description{ 260 Metadata: source.DirectoryMetadata{}, 261 } 262 } 263 } 264 return source.Description{} 265 } 266 267 func findLinuxReleaseByPURL(doc *spdx.Document) *linux.Release { 268 for _, p := range doc.Packages { 269 purlValue := findPURLValue(p) 270 if purlValue == "" { 271 continue 272 } 273 purl, err := packageurl.FromString(purlValue) 274 if err != nil { 275 log.Warnf("unable to parse purl: %s", purlValue) 276 continue 277 } 278 distro := findQualifierValue(purl, pkg.PURLQualifierDistro) 279 if distro != "" { 280 parts := strings.Split(distro, "-") 281 name := parts[0] 282 version := "" 283 if len(parts) > 1 { 284 version = parts[1] 285 } 286 return &linux.Release{ 287 PrettyName: name, 288 Name: name, 289 ID: name, 290 IDLike: []string{name}, 291 Version: version, 292 VersionID: version, 293 } 294 } 295 } 296 297 return nil 298 } 299 300 func collectSyftPackages(s *sbom.SBOM, spdxIDMap map[string]any, doc *spdx.Document) { 301 skipIDs := packageIDsToSkip(doc) 302 for _, p := range doc.Packages { 303 if p == nil || skipIDs.Has(string(p.PackageSPDXIdentifier)) { 304 continue 305 } 306 syftPkg := toSyftPackage(p) 307 spdxIDMap[string(p.PackageSPDXIdentifier)] = syftPkg 308 s.Artifacts.Packages.Add(syftPkg) 309 } 310 } 311 312 func collectSyftFiles(s *sbom.SBOM, spdxIDMap map[string]any, doc *spdx.Document) { 313 for _, p := range doc.Packages { 314 for _, f := range p.Files { 315 l := toSyftLocation(f) 316 spdxIDMap[string(f.FileSPDXIdentifier)] = l 317 318 s.Artifacts.FileMetadata[l.Coordinates] = toFileMetadata(f) 319 s.Artifacts.FileDigests[l.Coordinates] = toFileDigests(f) 320 } 321 } 322 323 for _, f := range doc.Files { 324 l := toSyftLocation(f) 325 spdxIDMap[string(f.FileSPDXIdentifier)] = l 326 327 s.Artifacts.FileMetadata[l.Coordinates] = toFileMetadata(f) 328 s.Artifacts.FileDigests[l.Coordinates] = toFileDigests(f) 329 } 330 } 331 332 func toFileDigests(f *spdx.File) (digests []file.Digest) { 333 for _, digest := range f.Checksums { 334 digests = append(digests, file.Digest{ 335 Algorithm: fromChecksumAlgorithm(digest.Algorithm), 336 Value: digest.Value, 337 }) 338 } 339 return digests 340 } 341 342 func fromChecksumAlgorithm(algorithm common.ChecksumAlgorithm) string { 343 return strings.ToLower(string(algorithm)) 344 } 345 346 func toFileMetadata(f *spdx.File) (meta file.Metadata) { 347 // FIXME Syft is currently lossy due to the SPDX 2.2.1 spec not supporting arbitrary mimetypes 348 for _, typ := range f.FileTypes { 349 switch helpers.FileType(typ) { 350 case helpers.ImageFileType: 351 meta.MIMEType = "image/" 352 case helpers.VideoFileType: 353 meta.MIMEType = "video/" 354 case helpers.ApplicationFileType: 355 meta.MIMEType = "application/" 356 case helpers.TextFileType: 357 meta.MIMEType = "text/" 358 case helpers.AudioFileType: 359 meta.MIMEType = "audio/" 360 case helpers.BinaryFileType: 361 case helpers.ArchiveFileType: 362 case helpers.OtherFileType: 363 } 364 } 365 return meta 366 } 367 368 func toSyftRelationships(spdxIDMap map[string]any, doc *spdx.Document) []artifact.Relationship { 369 out := collectDocRelationships(spdxIDMap, doc) 370 371 out = append(out, collectPackageFileRelationships(spdxIDMap, doc)...) 372 373 return out 374 } 375 376 func collectDocRelationships(spdxIDMap map[string]any, doc *spdx.Document) (out []artifact.Relationship) { 377 for _, r := range doc.Relationships { 378 // FIXME what to do with r.RefA.DocumentRefID and r.RefA.SpecialID 379 if r.RefA.DocumentRefID != "" && requireAndTrimPrefix(r.RefA.DocumentRefID, "DocumentRef-") != string(doc.SPDXIdentifier) { 380 log.Debugf("ignoring relationship to external document: %+v", r) 381 continue 382 } 383 a := spdxIDMap[string(r.RefA.ElementRefID)] 384 b := spdxIDMap[string(r.RefB.ElementRefID)] 385 from, fromOk := a.(pkg.Package) 386 toPackage, toPackageOk := b.(pkg.Package) 387 toLocation, toLocationOk := b.(file.Location) 388 //nolint:staticcheck 389 if !fromOk || !(toPackageOk || toLocationOk) { 390 log.Debugf("unable to find valid relationship mapping from SPDX, ignoring: (from: %+v) (to: %+v)", a, b) 391 continue 392 } 393 var to artifact.Identifiable 394 var typ artifact.RelationshipType 395 if toLocationOk { 396 switch helpers.RelationshipType(r.Relationship) { 397 case helpers.ContainsRelationship: 398 typ = artifact.ContainsRelationship 399 to = toLocation 400 case helpers.OtherRelationship: 401 // Encoding uses a specifically formatted comment... 402 if strings.Index(r.RelationshipComment, string(artifact.EvidentByRelationship)) == 0 { 403 typ = artifact.EvidentByRelationship 404 to = toLocation 405 } 406 } 407 } else { 408 switch helpers.RelationshipType(r.Relationship) { 409 case helpers.DependencyOfRelationship: 410 typ = artifact.DependencyOfRelationship 411 to = toPackage 412 case helpers.DependsOnRelationship: 413 typ = artifact.DependencyOfRelationship 414 to = from 415 from = toPackage 416 case helpers.ContainsRelationship: 417 typ = artifact.ContainsRelationship 418 to = toPackage 419 case helpers.OtherRelationship: 420 // Encoding uses a specifically formatted comment... 421 if strings.Index(r.RelationshipComment, string(artifact.OwnershipByFileOverlapRelationship)) == 0 { 422 typ = artifact.OwnershipByFileOverlapRelationship 423 to = toPackage 424 } 425 } 426 } 427 if typ != "" && to != nil { 428 out = append(out, artifact.Relationship{ 429 From: from, 430 To: to, 431 Type: typ, 432 }) 433 } 434 } 435 return out 436 } 437 438 // collectPackageFileRelationships add relationships for direct files 439 func collectPackageFileRelationships(spdxIDMap map[string]any, doc *spdx.Document) (out []artifact.Relationship) { 440 for _, p := range doc.Packages { 441 a := spdxIDMap[string(p.PackageSPDXIdentifier)] 442 from, fromOk := a.(pkg.Package) 443 if !fromOk { 444 continue 445 } 446 for _, f := range p.Files { 447 b := spdxIDMap[string(f.FileSPDXIdentifier)] 448 to, toLocationOk := b.(file.Location) 449 if !toLocationOk { 450 continue 451 } 452 out = append(out, artifact.Relationship{ 453 From: from, 454 To: to, 455 Type: artifact.ContainsRelationship, 456 }) 457 } 458 } 459 return out 460 } 461 462 func toSyftCoordinates(f *spdx.File) file.Coordinates { 463 const layerIDPrefix = "layerID: " 464 var fileSystemID string 465 if strings.Index(f.FileComment, layerIDPrefix) == 0 { 466 fileSystemID = strings.TrimPrefix(f.FileComment, layerIDPrefix) 467 } 468 if strings.Index(string(f.FileSPDXIdentifier), layerIDPrefix) == 0 { 469 fileSystemID = strings.TrimPrefix(string(f.FileSPDXIdentifier), layerIDPrefix) 470 } 471 return file.Coordinates{ 472 RealPath: f.FileName, 473 FileSystemID: fileSystemID, 474 } 475 } 476 477 func toSyftLocation(f *spdx.File) file.Location { 478 l := file.NewVirtualLocationFromCoordinates(toSyftCoordinates(f), f.FileName) 479 return l 480 } 481 482 func requireAndTrimPrefix(val interface{}, prefix string) string { 483 if v, ok := val.(string); ok { 484 if i := strings.Index(v, prefix); i == 0 { 485 return strings.Replace(v, prefix, "", 1) 486 } 487 } 488 return "" 489 } 490 491 type pkgInfo struct { 492 purl packageurl.PackageURL 493 typ pkg.Type 494 lang pkg.Language 495 } 496 497 func (p *pkgInfo) qualifierValue(name string) string { 498 return findQualifierValue(p.purl, name) 499 } 500 501 func findQualifierValue(purl packageurl.PackageURL, qualifier string) string { 502 for _, q := range purl.Qualifiers { 503 if q.Key == qualifier { 504 return q.Value 505 } 506 } 507 return "" 508 } 509 510 func extractPkgInfo(p *spdx.Package) pkgInfo { 511 pu := findPURLValue(p) 512 purl, err := packageurl.FromString(pu) 513 if err != nil { 514 return pkgInfo{} 515 } 516 return pkgInfo{ 517 purl, 518 pkg.TypeByName(purl.Type), 519 pkg.LanguageByName(purl.Type), 520 } 521 } 522 523 func toSyftPackage(p *spdx.Package) pkg.Package { 524 info := extractPkgInfo(p) 525 sP := &pkg.Package{ 526 Type: info.typ, 527 Name: p.PackageName, 528 Version: p.PackageVersion, 529 Licenses: pkg.NewLicenseSet(parseSPDXLicenses(p)...), 530 CPEs: extractCPEs(p), 531 PURL: purlValue(info.purl), 532 Language: info.lang, 533 Metadata: extractMetadata(p, info), 534 } 535 536 internal.Backfill(sP) 537 538 if p.PackageSPDXIdentifier != "" { 539 // always prefer the IDs from the SBOM over derived IDs 540 sP.OverrideID(artifact.ID(p.PackageSPDXIdentifier)) 541 } else { 542 sP.SetID() 543 } 544 545 return *sP 546 } 547 548 func purlValue(purl packageurl.PackageURL) string { 549 val := purl.String() 550 if _, err := packageurl.FromString(val); err != nil { 551 return "" 552 } 553 return val 554 } 555 556 func parseSPDXLicenses(p *spdx.Package) []pkg.License { 557 licenses := make([]pkg.License, 0) 558 559 // concluded 560 if p.PackageLicenseConcluded != helpers.NOASSERTION && p.PackageLicenseConcluded != helpers.NONE && p.PackageLicenseConcluded != "" { 561 l := pkg.NewLicenseWithContext(context.TODO(), cleanSPDXID(p.PackageLicenseConcluded)) 562 l.Type = license.Concluded 563 licenses = append(licenses, l) 564 } 565 566 // declared 567 if p.PackageLicenseDeclared != helpers.NOASSERTION && p.PackageLicenseDeclared != helpers.NONE && p.PackageLicenseDeclared != "" { 568 l := pkg.NewLicenseWithContext(context.TODO(), cleanSPDXID(p.PackageLicenseDeclared)) 569 l.Type = license.Declared 570 licenses = append(licenses, l) 571 } 572 573 return licenses 574 } 575 576 func cleanSPDXID(id string) string { 577 return strings.TrimPrefix(id, spdxlicense.LicenseRefPrefix) 578 } 579 580 //nolint:funlen 581 func extractMetadata(p *spdx.Package, info pkgInfo) any { 582 arch := info.qualifierValue(pkg.PURLQualifierArch) 583 upstreamValue := info.qualifierValue(pkg.PURLQualifierUpstream) 584 upstream := strings.SplitN(upstreamValue, "@", 2) 585 upstreamName := upstream[0] 586 upstreamVersion := "" 587 if len(upstream) > 1 { 588 upstreamVersion = upstream[1] 589 } 590 supplier := "" 591 if p.PackageSupplier != nil { 592 supplier = p.PackageSupplier.Supplier 593 } 594 originator := "" 595 if p.PackageOriginator != nil { 596 originator = p.PackageOriginator.Originator 597 } 598 switch info.typ { 599 case pkg.ApkPkg: 600 return pkg.ApkDBEntry{ 601 Package: p.PackageName, 602 OriginPackage: upstreamName, 603 Maintainer: supplier, 604 Version: p.PackageVersion, 605 Architecture: arch, 606 URL: p.PackageHomePage, 607 Description: p.PackageDescription, 608 } 609 case pkg.RpmPkg: 610 converted, err := strconv.Atoi(info.qualifierValue(pkg.PURLQualifierEpoch)) 611 var epoch *int 612 if err != nil { 613 epoch = nil 614 } else { 615 epoch = &converted 616 } 617 return pkg.RpmDBEntry{ 618 Name: p.PackageName, 619 Version: p.PackageVersion, 620 Epoch: epoch, 621 Arch: arch, 622 SourceRpm: upstreamValue, 623 Vendor: originator, 624 } 625 case pkg.DebPkg: 626 return pkg.DpkgDBEntry{ 627 Package: p.PackageName, 628 Source: upstreamName, 629 Version: p.PackageVersion, 630 SourceVersion: upstreamVersion, 631 Architecture: arch, 632 Maintainer: originator, 633 } 634 case pkg.JavaPkg: 635 var digests []file.Digest 636 for _, value := range p.PackageChecksums { 637 digests = append(digests, file.Digest{Algorithm: fromChecksumAlgorithm(value.Algorithm), Value: value.Value}) 638 } 639 return pkg.JavaArchive{ 640 ArchiveDigests: digests, 641 } 642 case pkg.GoModulePkg: 643 var h1Digest string 644 for _, value := range p.PackageChecksums { 645 digest, err := helpers.HDigestFromSHA(fromChecksumAlgorithm(value.Algorithm), value.Value) 646 if err != nil { 647 log.Debugf("invalid h1digest: %v %v", value, err) 648 continue 649 } 650 h1Digest = digest 651 break 652 } 653 return pkg.GolangBinaryBuildinfoEntry{ 654 H1Digest: h1Digest, 655 } 656 } 657 return nil 658 } 659 660 func findPURLValue(p *spdx.Package) string { 661 for _, r := range p.PackageExternalReferences { 662 if r.RefType == string(helpers.PurlExternalRefType) { 663 return r.Locator 664 } 665 } 666 return "" 667 } 668 669 func extractCPEs(p *spdx.Package) (cpes []cpe.CPE) { 670 for _, r := range p.PackageExternalReferences { 671 if r.RefType == string(helpers.Cpe23ExternalRefType) { 672 c, err := cpe.New(r.Locator, cpe.DeclaredSource) 673 if err != nil { 674 log.Warnf("unable to extract SPDX CPE=%q: %+v", r.Locator, err) 675 continue 676 } 677 cpes = append(cpes, c) 678 } 679 } 680 return cpes 681 } 682 683 // packageIDsToSkip returns a set of packageIDs that should not be imported 684 func packageIDsToSkip(doc *spdx.Document) *strset.Set { 685 skipIDs := strset.New() 686 for i := 0; i < len(doc.Relationships); i++ { 687 r := doc.Relationships[i] 688 if r != nil && r.Relationship == spdx.RelationshipGeneratedFrom { 689 skipIDs.Add(string(r.RefB.ElementRefID)) 690 } 691 } 692 return skipIDs 693 }