github.com/lineaje-labs/syft@v0.98.1-0.20231227153149-9e393f60ff1b/syft/format/common/spdxhelpers/to_syft_model.go (about) 1 package spdxhelpers 2 3 import ( 4 "errors" 5 "fmt" 6 "net/url" 7 "path" 8 "regexp" 9 "strconv" 10 "strings" 11 12 "github.com/spdx/tools-golang/spdx" 13 "github.com/spdx/tools-golang/spdx/v2/common" 14 15 "github.com/anchore/packageurl-go" 16 "github.com/anchore/syft/syft/artifact" 17 "github.com/anchore/syft/syft/cpe" 18 "github.com/anchore/syft/syft/file" 19 "github.com/anchore/syft/syft/format/common/util" 20 "github.com/anchore/syft/syft/license" 21 "github.com/anchore/syft/syft/linux" 22 "github.com/anchore/syft/syft/pkg" 23 "github.com/anchore/syft/syft/sbom" 24 "github.com/anchore/syft/syft/source" 25 "github.com/lineaje-labs/syft/internal/log" 26 "github.com/lineaje-labs/syft/internal/spdxlicense" 27 ) 28 29 func ToSyftModel(doc *spdx.Document) (*sbom.SBOM, error) { 30 if doc == nil { 31 return nil, errors.New("cannot convert SPDX document to Syft model because document is nil") 32 } 33 34 spdxIDMap := make(map[string]any) 35 36 s := &sbom.SBOM{ 37 Source: extractSource(spdxIDMap, doc), 38 Artifacts: sbom.Artifacts{ 39 Packages: pkg.NewCollection(), 40 FileMetadata: map[file.Coordinates]file.Metadata{}, 41 FileDigests: map[file.Coordinates][]file.Digest{}, 42 LinuxDistribution: findLinuxReleaseByPURL(doc), 43 }, 44 } 45 46 collectSyftPackages(s, spdxIDMap, doc.Packages) 47 48 collectSyftFiles(s, spdxIDMap, doc) 49 50 s.Relationships = toSyftRelationships(spdxIDMap, doc) 51 52 return s, nil 53 } 54 55 func isDirectory(name string) bool { 56 if name == "." || name == ".." || strings.HasSuffix(name, "/") || !strings.Contains(path.Base(name), ".") { 57 return true 58 } 59 return false 60 } 61 62 func removePackage(packages []*spdx.Package, remove *spdx.Package) (pkgs []*spdx.Package) { 63 for _, p := range packages { 64 if p == remove { 65 continue 66 } 67 pkgs = append(pkgs, p) 68 } 69 return 70 } 71 72 func removeRelationships(relationships []*spdx.Relationship, spdxID spdx.ElementID) (relations []*spdx.Relationship) { 73 for _, r := range relationships { 74 if r.RefA.ElementRefID == spdxID || r.RefB.ElementRefID == spdxID { 75 continue 76 } 77 relations = append(relations, r) 78 } 79 return 80 } 81 82 func findRootPackages(doc *spdx.Document) (out []*spdx.Package) { 83 for _, p := range doc.Packages { 84 for _, r := range doc.Relationships { 85 describes := r.RefA.ElementRefID == "DOCUMENT" && 86 r.Relationship == spdx.RelationshipDescribes && 87 r.RefB.ElementRefID == p.PackageSPDXIdentifier 88 89 describedBy := r.RefB.ElementRefID == "DOCUMENT" && 90 r.Relationship == spdx.RelationshipDescribedBy && 91 r.RefA.ElementRefID == p.PackageSPDXIdentifier 92 93 if !describes && !describedBy { 94 continue 95 } 96 97 out = append(out, p) 98 } 99 } 100 return 101 } 102 103 func extractSource(spdxIDMap map[string]any, doc *spdx.Document) source.Description { 104 src := extractSourceFromNamespace(doc.DocumentNamespace) 105 106 rootPackages := findRootPackages(doc) 107 108 if len(rootPackages) != 1 { 109 return src 110 } 111 112 p := rootPackages[0] 113 114 switch p.PrimaryPackagePurpose { 115 case spdxPrimaryPurposeContainer: 116 src = containerSource(p) 117 case spdxPrimaryPurposeFile: 118 src = fileSource(p) 119 default: 120 return src 121 } 122 123 spdxIDMap[string(p.PackageSPDXIdentifier)] = src 124 125 doc.Packages = removePackage(doc.Packages, p) 126 doc.Relationships = removeRelationships(doc.Relationships, p.PackageSPDXIdentifier) 127 128 return src 129 } 130 131 func containerSource(p *spdx.Package) source.Description { 132 id := string(p.PackageSPDXIdentifier) 133 134 container := p.PackageName 135 v := p.PackageVersion 136 if v != "" { 137 container += ":" + v 138 } 139 140 digest := "" 141 if len(p.PackageChecksums) > 0 { 142 c := p.PackageChecksums[0] 143 digest = fmt.Sprintf("%s:%s", fromChecksumAlgorithm(c.Algorithm), c.Value) 144 } 145 return source.Description{ 146 ID: id, 147 Name: p.PackageName, 148 Version: p.PackageVersion, 149 Metadata: source.StereoscopeImageSourceMetadata{ 150 UserInput: container, 151 ID: id, 152 Layers: nil, // TODO handle formats with nested layer packages like Tern and K8s BOM tool 153 ManifestDigest: digest, 154 }, 155 } 156 } 157 158 func fileSource(p *spdx.Package) source.Description { 159 typeRegex := regexp.MustCompile("^DocumentRoot-([^-]+)-.*$") 160 typeName := typeRegex.ReplaceAllString(string(p.PackageSPDXIdentifier), "$1") 161 162 var version string 163 var metadata any 164 switch { 165 case typeName == prefixDirectory: 166 // is a Syft SBOM, explicitly a directory source 167 metadata, version = directorySourceMetadata(p) 168 case typeName == prefixFile: 169 // is a Syft SBOM, explicitly a file source 170 metadata, version = fileSourceMetadata(p) 171 case isDirectory(p.PackageName): 172 // is a non-Syft SBOM, which looks like a directory 173 metadata, version = directorySourceMetadata(p) 174 default: 175 // is a non-Syft SBOM, which is probably a file 176 metadata, version = fileSourceMetadata(p) 177 } 178 179 return source.Description{ 180 ID: string(p.PackageSPDXIdentifier), 181 Name: p.PackageName, 182 Version: version, 183 Metadata: metadata, 184 } 185 } 186 187 func fileSourceMetadata(p *spdx.Package) (any, string) { 188 version := p.PackageVersion 189 190 m := source.FileSourceMetadata{ 191 Path: p.PackageName, 192 } 193 // if this is a Syft SBOM, we might have output a digest as the version 194 checksum := toChecksum(p.PackageVersion) 195 for _, d := range p.PackageChecksums { 196 if checksum != nil && checksum.Value == d.Value { 197 version = "" 198 } 199 m.Digests = append(m.Digests, file.Digest{ 200 Algorithm: fromChecksumAlgorithm(d.Algorithm), 201 Value: d.Value, 202 }) 203 } 204 205 return m, version 206 } 207 208 func directorySourceMetadata(p *spdx.Package) (any, string) { 209 return source.DirectorySourceMetadata{ 210 Path: p.PackageName, 211 Base: "", 212 }, p.PackageVersion 213 } 214 215 // NOTE(jonas): SPDX doesn't inform what an SBOM is about, 216 // image, directory, for example. This is our best effort to determine 217 // the scheme. Syft-generated SBOMs have in the namespace 218 // field a type encoded, which we try to identify here. 219 func extractSourceFromNamespace(ns string) source.Description { 220 u, err := url.Parse(ns) 221 if err != nil { 222 return source.Description{ 223 Metadata: nil, 224 } 225 } 226 227 parts := strings.Split(u.Path, "/") 228 for _, p := range parts { 229 switch p { 230 case inputFile: 231 return source.Description{ 232 Metadata: source.FileSourceMetadata{}, 233 } 234 case inputImage: 235 return source.Description{ 236 Metadata: source.StereoscopeImageSourceMetadata{}, 237 } 238 case inputDirectory: 239 return source.Description{ 240 Metadata: source.DirectorySourceMetadata{}, 241 } 242 } 243 } 244 return source.Description{} 245 } 246 247 func findLinuxReleaseByPURL(doc *spdx.Document) *linux.Release { 248 for _, p := range doc.Packages { 249 purlValue := findPURLValue(p) 250 if purlValue == "" { 251 continue 252 } 253 purl, err := packageurl.FromString(purlValue) 254 if err != nil { 255 log.Warnf("unable to parse purl: %s", purlValue) 256 continue 257 } 258 distro := findQualifierValue(purl, pkg.PURLQualifierDistro) 259 if distro != "" { 260 parts := strings.Split(distro, "-") 261 name := parts[0] 262 version := "" 263 if len(parts) > 1 { 264 version = parts[1] 265 } 266 return &linux.Release{ 267 PrettyName: name, 268 Name: name, 269 ID: name, 270 IDLike: []string{name}, 271 Version: version, 272 VersionID: version, 273 } 274 } 275 } 276 277 return nil 278 } 279 280 func collectSyftPackages(s *sbom.SBOM, spdxIDMap map[string]any, packages []*spdx.Package) { 281 for _, p := range packages { 282 syftPkg := toSyftPackage(p) 283 spdxIDMap[string(p.PackageSPDXIdentifier)] = syftPkg 284 s.Artifacts.Packages.Add(syftPkg) 285 } 286 } 287 288 func collectSyftFiles(s *sbom.SBOM, spdxIDMap map[string]any, doc *spdx.Document) { 289 for _, p := range doc.Packages { 290 for _, f := range p.Files { 291 l := toSyftLocation(f) 292 spdxIDMap[string(f.FileSPDXIdentifier)] = l 293 294 s.Artifacts.FileMetadata[l.Coordinates] = toFileMetadata(f) 295 s.Artifacts.FileDigests[l.Coordinates] = toFileDigests(f) 296 } 297 } 298 299 for _, f := range doc.Files { 300 l := toSyftLocation(f) 301 spdxIDMap[string(f.FileSPDXIdentifier)] = l 302 303 s.Artifacts.FileMetadata[l.Coordinates] = toFileMetadata(f) 304 s.Artifacts.FileDigests[l.Coordinates] = toFileDigests(f) 305 } 306 } 307 308 func toFileDigests(f *spdx.File) (digests []file.Digest) { 309 for _, digest := range f.Checksums { 310 digests = append(digests, file.Digest{ 311 Algorithm: fromChecksumAlgorithm(digest.Algorithm), 312 Value: digest.Value, 313 }) 314 } 315 return digests 316 } 317 318 func fromChecksumAlgorithm(algorithm common.ChecksumAlgorithm) string { 319 return strings.ToLower(string(algorithm)) 320 } 321 322 func toFileMetadata(f *spdx.File) (meta file.Metadata) { 323 // FIXME Syft is currently lossy due to the SPDX 2.2.1 spec not supporting arbitrary mimetypes 324 for _, typ := range f.FileTypes { 325 switch FileType(typ) { 326 case ImageFileType: 327 meta.MIMEType = "image/" 328 case VideoFileType: 329 meta.MIMEType = "video/" 330 case ApplicationFileType: 331 meta.MIMEType = "application/" 332 case TextFileType: 333 meta.MIMEType = "text/" 334 case AudioFileType: 335 meta.MIMEType = "audio/" 336 case BinaryFileType: 337 case ArchiveFileType: 338 case OtherFileType: 339 } 340 } 341 return meta 342 } 343 344 func toSyftRelationships(spdxIDMap map[string]any, doc *spdx.Document) []artifact.Relationship { 345 out := collectDocRelationships(spdxIDMap, doc) 346 347 out = append(out, collectPackageFileRelationships(spdxIDMap, doc)...) 348 349 return out 350 } 351 352 func collectDocRelationships(spdxIDMap map[string]any, doc *spdx.Document) (out []artifact.Relationship) { 353 for _, r := range doc.Relationships { 354 // FIXME what to do with r.RefA.DocumentRefID and r.RefA.SpecialID 355 if r.RefA.DocumentRefID != "" && requireAndTrimPrefix(r.RefA.DocumentRefID, "DocumentRef-") != string(doc.SPDXIdentifier) { 356 log.Debugf("ignoring relationship to external document: %+v", r) 357 continue 358 } 359 a := spdxIDMap[string(r.RefA.ElementRefID)] 360 b := spdxIDMap[string(r.RefB.ElementRefID)] 361 from, fromOk := a.(pkg.Package) 362 toPackage, toPackageOk := b.(pkg.Package) 363 toLocation, toLocationOk := b.(file.Location) 364 if !fromOk || !(toPackageOk || toLocationOk) { 365 log.Debugf("unable to find valid relationship mapping from SPDX, ignoring: (from: %+v) (to: %+v)", a, b) 366 continue 367 } 368 var to artifact.Identifiable 369 var typ artifact.RelationshipType 370 if toLocationOk { 371 switch RelationshipType(r.Relationship) { 372 case ContainsRelationship: 373 typ = artifact.ContainsRelationship 374 to = toLocation 375 case OtherRelationship: 376 // Encoding uses a specifically formatted comment... 377 if strings.Index(r.RelationshipComment, string(artifact.EvidentByRelationship)) == 0 { 378 typ = artifact.EvidentByRelationship 379 to = toLocation 380 } 381 } 382 } else { 383 switch RelationshipType(r.Relationship) { 384 case ContainsRelationship: 385 typ = artifact.ContainsRelationship 386 to = toPackage 387 case OtherRelationship: 388 // Encoding uses a specifically formatted comment... 389 if strings.Index(r.RelationshipComment, string(artifact.OwnershipByFileOverlapRelationship)) == 0 { 390 typ = artifact.OwnershipByFileOverlapRelationship 391 to = toPackage 392 } 393 } 394 } 395 if typ != "" && to != nil { 396 out = append(out, artifact.Relationship{ 397 From: from, 398 To: to, 399 Type: typ, 400 }) 401 } 402 } 403 return out 404 } 405 406 // collectPackageFileRelationships add relationships for direct files 407 func collectPackageFileRelationships(spdxIDMap map[string]any, doc *spdx.Document) (out []artifact.Relationship) { 408 for _, p := range doc.Packages { 409 a := spdxIDMap[string(p.PackageSPDXIdentifier)] 410 from, fromOk := a.(pkg.Package) 411 if !fromOk { 412 continue 413 } 414 for _, f := range p.Files { 415 b := spdxIDMap[string(f.FileSPDXIdentifier)] 416 to, toLocationOk := b.(file.Location) 417 if !toLocationOk { 418 continue 419 } 420 out = append(out, artifact.Relationship{ 421 From: from, 422 To: to, 423 Type: artifact.ContainsRelationship, 424 }) 425 } 426 } 427 return out 428 } 429 430 func toSyftCoordinates(f *spdx.File) file.Coordinates { 431 const layerIDPrefix = "layerID: " 432 var fileSystemID string 433 if strings.Index(f.FileComment, layerIDPrefix) == 0 { 434 fileSystemID = strings.TrimPrefix(f.FileComment, layerIDPrefix) 435 } 436 if strings.Index(string(f.FileSPDXIdentifier), layerIDPrefix) == 0 { 437 fileSystemID = strings.TrimPrefix(string(f.FileSPDXIdentifier), layerIDPrefix) 438 } 439 return file.Coordinates{ 440 RealPath: f.FileName, 441 FileSystemID: fileSystemID, 442 } 443 } 444 445 func toSyftLocation(f *spdx.File) file.Location { 446 l := file.NewVirtualLocationFromCoordinates(toSyftCoordinates(f), f.FileName) 447 return l 448 } 449 450 func requireAndTrimPrefix(val interface{}, prefix string) string { 451 if v, ok := val.(string); ok { 452 if i := strings.Index(v, prefix); i == 0 { 453 return strings.Replace(v, prefix, "", 1) 454 } 455 } 456 return "" 457 } 458 459 type pkgInfo struct { 460 purl packageurl.PackageURL 461 typ pkg.Type 462 lang pkg.Language 463 } 464 465 func (p *pkgInfo) qualifierValue(name string) string { 466 return findQualifierValue(p.purl, name) 467 } 468 469 func findQualifierValue(purl packageurl.PackageURL, qualifier string) string { 470 for _, q := range purl.Qualifiers { 471 if q.Key == qualifier { 472 return q.Value 473 } 474 } 475 return "" 476 } 477 478 func extractPkgInfo(p *spdx.Package) pkgInfo { 479 pu := findPURLValue(p) 480 purl, err := packageurl.FromString(pu) 481 if err != nil { 482 return pkgInfo{} 483 } 484 return pkgInfo{ 485 purl, 486 pkg.TypeByName(purl.Type), 487 pkg.LanguageByName(purl.Type), 488 } 489 } 490 491 func toSyftPackage(p *spdx.Package) pkg.Package { 492 info := extractPkgInfo(p) 493 sP := &pkg.Package{ 494 Type: info.typ, 495 Name: p.PackageName, 496 Version: p.PackageVersion, 497 Licenses: pkg.NewLicenseSet(parseSPDXLicenses(p)...), 498 CPEs: extractCPEs(p), 499 PURL: purlValue(info.purl), 500 Language: info.lang, 501 Metadata: extractMetadata(p, info), 502 } 503 504 sP.SetID() 505 506 return *sP 507 } 508 509 func purlValue(purl packageurl.PackageURL) string { 510 val := purl.String() 511 if _, err := packageurl.FromString(val); err != nil { 512 return "" 513 } 514 return val 515 } 516 517 func parseSPDXLicenses(p *spdx.Package) []pkg.License { 518 licenses := make([]pkg.License, 0) 519 520 // concluded 521 if p.PackageLicenseConcluded != NOASSERTION && p.PackageLicenseConcluded != NONE && p.PackageLicenseConcluded != "" { 522 l := pkg.NewLicense(cleanSPDXID(p.PackageLicenseConcluded)) 523 l.Type = license.Concluded 524 licenses = append(licenses, l) 525 } 526 527 // declared 528 if p.PackageLicenseDeclared != NOASSERTION && p.PackageLicenseDeclared != NONE && p.PackageLicenseDeclared != "" { 529 l := pkg.NewLicense(cleanSPDXID(p.PackageLicenseDeclared)) 530 l.Type = license.Declared 531 licenses = append(licenses, l) 532 } 533 534 return licenses 535 } 536 537 func cleanSPDXID(id string) string { 538 return strings.TrimPrefix(id, spdxlicense.LicenseRefPrefix) 539 } 540 541 //nolint:funlen 542 func extractMetadata(p *spdx.Package, info pkgInfo) any { 543 arch := info.qualifierValue(pkg.PURLQualifierArch) 544 upstreamValue := info.qualifierValue(pkg.PURLQualifierUpstream) 545 upstream := strings.SplitN(upstreamValue, "@", 2) 546 upstreamName := upstream[0] 547 upstreamVersion := "" 548 if len(upstream) > 1 { 549 upstreamVersion = upstream[1] 550 } 551 supplier := "" 552 if p.PackageSupplier != nil { 553 supplier = p.PackageSupplier.Supplier 554 } 555 originator := "" 556 if p.PackageOriginator != nil { 557 originator = p.PackageOriginator.Originator 558 } 559 switch info.typ { 560 case pkg.ApkPkg: 561 return pkg.ApkDBEntry{ 562 Package: p.PackageName, 563 OriginPackage: upstreamName, 564 Maintainer: supplier, 565 Version: p.PackageVersion, 566 Architecture: arch, 567 URL: p.PackageHomePage, 568 Description: p.PackageDescription, 569 } 570 case pkg.RpmPkg: 571 converted, err := strconv.Atoi(info.qualifierValue(pkg.PURLQualifierEpoch)) 572 var epoch *int 573 if err != nil { 574 epoch = nil 575 } else { 576 epoch = &converted 577 } 578 return pkg.RpmDBEntry{ 579 Name: p.PackageName, 580 Version: p.PackageVersion, 581 Epoch: epoch, 582 Arch: arch, 583 SourceRpm: upstreamValue, 584 Vendor: originator, 585 } 586 case pkg.DebPkg: 587 return pkg.DpkgDBEntry{ 588 Package: p.PackageName, 589 Source: upstreamName, 590 Version: p.PackageVersion, 591 SourceVersion: upstreamVersion, 592 Architecture: arch, 593 Maintainer: originator, 594 } 595 case pkg.JavaPkg: 596 var digests []file.Digest 597 for _, value := range p.PackageChecksums { 598 digests = append(digests, file.Digest{Algorithm: fromChecksumAlgorithm(value.Algorithm), Value: value.Value}) 599 } 600 return pkg.JavaArchive{ 601 ArchiveDigests: digests, 602 } 603 case pkg.GoModulePkg: 604 var h1Digest string 605 for _, value := range p.PackageChecksums { 606 digest, err := util.HDigestFromSHA(fromChecksumAlgorithm(value.Algorithm), value.Value) 607 if err != nil { 608 log.Debugf("invalid h1digest: %v %v", value, err) 609 continue 610 } 611 h1Digest = digest 612 break 613 } 614 return pkg.GolangBinaryBuildinfoEntry{ 615 H1Digest: h1Digest, 616 } 617 } 618 return nil 619 } 620 621 func findPURLValue(p *spdx.Package) string { 622 for _, r := range p.PackageExternalReferences { 623 if r.RefType == string(PurlExternalRefType) { 624 return r.Locator 625 } 626 } 627 return "" 628 } 629 630 func extractCPEs(p *spdx.Package) (cpes []cpe.CPE) { 631 for _, r := range p.PackageExternalReferences { 632 if r.RefType == string(Cpe23ExternalRefType) { 633 c, err := cpe.New(r.Locator) 634 if err != nil { 635 log.Warnf("unable to extract SPDX CPE=%q: %+v", r.Locator, err) 636 continue 637 } 638 cpes = append(cpes, c) 639 } 640 } 641 return cpes 642 }