github.com/kastenhq/syft@v0.0.0-20230821225854-0710af25cdbe/syft/formats/common/spdxhelpers/to_syft_model.go (about) 1 package spdxhelpers 2 3 import ( 4 "errors" 5 "fmt" 6 "net/url" 7 "path" 8 "regexp" 9 "strconv" 10 "strings" 11 12 "github.com/spdx/tools-golang/spdx" 13 "github.com/spdx/tools-golang/spdx/v2/common" 14 15 "github.com/anchore/packageurl-go" 16 "github.com/kastenhq/syft/internal/log" 17 "github.com/kastenhq/syft/internal/spdxlicense" 18 "github.com/kastenhq/syft/syft/artifact" 19 "github.com/kastenhq/syft/syft/cpe" 20 "github.com/kastenhq/syft/syft/file" 21 "github.com/kastenhq/syft/syft/formats/common/util" 22 "github.com/kastenhq/syft/syft/license" 23 "github.com/kastenhq/syft/syft/linux" 24 "github.com/kastenhq/syft/syft/pkg" 25 "github.com/kastenhq/syft/syft/sbom" 26 "github.com/kastenhq/syft/syft/source" 27 ) 28 29 func ToSyftModel(doc *spdx.Document) (*sbom.SBOM, error) { 30 if doc == nil { 31 return nil, errors.New("cannot convert SPDX document to Syft model because document is nil") 32 } 33 34 spdxIDMap := make(map[string]any) 35 36 s := &sbom.SBOM{ 37 Source: extractSource(spdxIDMap, doc), 38 Artifacts: sbom.Artifacts{ 39 Packages: pkg.NewCollection(), 40 FileMetadata: map[file.Coordinates]file.Metadata{}, 41 FileDigests: map[file.Coordinates][]file.Digest{}, 42 LinuxDistribution: findLinuxReleaseByPURL(doc), 43 }, 44 } 45 46 collectSyftPackages(s, spdxIDMap, doc.Packages) 47 48 collectSyftFiles(s, spdxIDMap, doc) 49 50 s.Relationships = toSyftRelationships(spdxIDMap, doc) 51 52 return s, nil 53 } 54 55 func isDirectory(name string) bool { 56 if name == "." || name == ".." || strings.HasSuffix(name, "/") || !strings.Contains(path.Base(name), ".") { 57 return true 58 } 59 return false 60 } 61 62 func removePackage(packages []*spdx.Package, remove *spdx.Package) (pkgs []*spdx.Package) { 63 for _, p := range packages { 64 if p == remove { 65 continue 66 } 67 pkgs = append(pkgs, p) 68 } 69 return 70 } 71 72 func removeRelationships(relationships []*spdx.Relationship, spdxID spdx.ElementID) (relations []*spdx.Relationship) { 73 for _, r := range relationships { 74 if r.RefA.ElementRefID == spdxID || r.RefB.ElementRefID == spdxID { 75 continue 76 } 77 relations = append(relations, r) 78 } 79 return 80 } 81 82 func findRootPackages(doc *spdx.Document) (out []*spdx.Package) { 83 for _, p := range doc.Packages { 84 for _, r := range doc.Relationships { 85 describes := r.RefA.ElementRefID == "DOCUMENT" && 86 r.Relationship == spdx.RelationshipDescribes && 87 r.RefB.ElementRefID == p.PackageSPDXIdentifier 88 89 describedBy := r.RefB.ElementRefID == "DOCUMENT" && 90 r.Relationship == spdx.RelationshipDescribedBy && 91 r.RefA.ElementRefID == p.PackageSPDXIdentifier 92 93 if !describes && !describedBy { 94 continue 95 } 96 97 out = append(out, p) 98 } 99 } 100 return 101 } 102 103 func extractSource(spdxIDMap map[string]any, doc *spdx.Document) source.Description { 104 src := extractSourceFromNamespace(doc.DocumentNamespace) 105 106 rootPackages := findRootPackages(doc) 107 108 if len(rootPackages) != 1 { 109 return src 110 } 111 112 p := rootPackages[0] 113 114 switch p.PrimaryPackagePurpose { 115 case spdxPrimaryPurposeContainer: 116 src = containerSource(p) 117 case spdxPrimaryPurposeFile: 118 src = fileSource(p) 119 default: 120 return src 121 } 122 123 spdxIDMap[string(p.PackageSPDXIdentifier)] = src 124 125 doc.Packages = removePackage(doc.Packages, p) 126 doc.Relationships = removeRelationships(doc.Relationships, p.PackageSPDXIdentifier) 127 128 return src 129 } 130 131 func containerSource(p *spdx.Package) source.Description { 132 id := string(p.PackageSPDXIdentifier) 133 134 container := p.PackageName 135 v := p.PackageVersion 136 if v != "" { 137 container += ":" + v 138 } 139 140 digest := "" 141 if len(p.PackageChecksums) > 0 { 142 c := p.PackageChecksums[0] 143 digest = fmt.Sprintf("%s:%s", fromChecksumAlgorithm(c.Algorithm), c.Value) 144 } 145 return source.Description{ 146 ID: id, 147 Name: p.PackageName, 148 Version: p.PackageVersion, 149 Metadata: source.StereoscopeImageSourceMetadata{ 150 UserInput: container, 151 ID: id, 152 Layers: nil, // TODO handle formats with nested layer packages like Tern and K8s BOM tool 153 ManifestDigest: digest, 154 }, 155 } 156 } 157 158 func fileSource(p *spdx.Package) source.Description { 159 typeRegex := regexp.MustCompile("^DocumentRoot-([^-]+)-.*$") 160 typeName := typeRegex.ReplaceAllString(string(p.PackageSPDXIdentifier), "$1") 161 162 var version string 163 var metadata any 164 switch { 165 case typeName == prefixDirectory: 166 // is a Syft SBOM, explicitly a directory source 167 metadata, version = directorySourceMetadata(p) 168 case typeName == prefixFile: 169 // is a Syft SBOM, explicitly a file source 170 metadata, version = fileSourceMetadata(p) 171 case isDirectory(p.PackageName): 172 // is a non-Syft SBOM, which looks like a directory 173 metadata, version = directorySourceMetadata(p) 174 default: 175 // is a non-Syft SBOM, which is probably a file 176 metadata, version = fileSourceMetadata(p) 177 } 178 179 return source.Description{ 180 ID: string(p.PackageSPDXIdentifier), 181 Name: p.PackageName, 182 Version: version, 183 Metadata: metadata, 184 } 185 } 186 187 func fileSourceMetadata(p *spdx.Package) (any, string) { 188 version := p.PackageVersion 189 190 m := source.FileSourceMetadata{ 191 Path: p.PackageName, 192 } 193 // if this is a Syft SBOM, we might have output a digest as the version 194 checksum := toChecksum(p.PackageVersion) 195 for _, d := range p.PackageChecksums { 196 if checksum != nil && checksum.Value == d.Value { 197 version = "" 198 } 199 m.Digests = append(m.Digests, file.Digest{ 200 Algorithm: fromChecksumAlgorithm(d.Algorithm), 201 Value: d.Value, 202 }) 203 } 204 205 return m, version 206 } 207 208 func directorySourceMetadata(p *spdx.Package) (any, string) { 209 return source.DirectorySourceMetadata{ 210 Path: p.PackageName, 211 Base: "", 212 }, p.PackageVersion 213 } 214 215 // NOTE(jonas): SPDX doesn't inform what an SBOM is about, 216 // image, directory, for example. This is our best effort to determine 217 // the scheme. Syft-generated SBOMs have in the namespace 218 // field a type encoded, which we try to identify here. 219 func extractSourceFromNamespace(ns string) source.Description { 220 u, err := url.Parse(ns) 221 if err != nil { 222 return source.Description{ 223 Metadata: nil, 224 } 225 } 226 227 parts := strings.Split(u.Path, "/") 228 for _, p := range parts { 229 switch p { 230 case inputFile: 231 return source.Description{ 232 Metadata: source.FileSourceMetadata{}, 233 } 234 case inputImage: 235 return source.Description{ 236 Metadata: source.StereoscopeImageSourceMetadata{}, 237 } 238 case inputDirectory: 239 return source.Description{ 240 Metadata: source.DirectorySourceMetadata{}, 241 } 242 } 243 } 244 return source.Description{} 245 } 246 247 func findLinuxReleaseByPURL(doc *spdx.Document) *linux.Release { 248 for _, p := range doc.Packages { 249 purlValue := findPURLValue(p) 250 if purlValue == "" { 251 continue 252 } 253 purl, err := packageurl.FromString(purlValue) 254 if err != nil { 255 log.Warnf("unable to parse purl: %s", purlValue) 256 continue 257 } 258 distro := findQualifierValue(purl, pkg.PURLQualifierDistro) 259 if distro != "" { 260 parts := strings.Split(distro, "-") 261 name := parts[0] 262 version := "" 263 if len(parts) > 1 { 264 version = parts[1] 265 } 266 return &linux.Release{ 267 PrettyName: name, 268 Name: name, 269 ID: name, 270 IDLike: []string{name}, 271 Version: version, 272 VersionID: version, 273 } 274 } 275 } 276 277 return nil 278 } 279 280 func collectSyftPackages(s *sbom.SBOM, spdxIDMap map[string]any, packages []*spdx.Package) { 281 for _, p := range packages { 282 syftPkg := toSyftPackage(p) 283 spdxIDMap[string(p.PackageSPDXIdentifier)] = syftPkg 284 s.Artifacts.Packages.Add(syftPkg) 285 } 286 } 287 288 func collectSyftFiles(s *sbom.SBOM, spdxIDMap map[string]any, doc *spdx.Document) { 289 for _, f := range doc.Files { 290 l := toSyftLocation(f) 291 spdxIDMap[string(f.FileSPDXIdentifier)] = l 292 293 s.Artifacts.FileMetadata[l.Coordinates] = toFileMetadata(f) 294 s.Artifacts.FileDigests[l.Coordinates] = toFileDigests(f) 295 } 296 } 297 298 func toFileDigests(f *spdx.File) (digests []file.Digest) { 299 for _, digest := range f.Checksums { 300 digests = append(digests, file.Digest{ 301 Algorithm: fromChecksumAlgorithm(digest.Algorithm), 302 Value: digest.Value, 303 }) 304 } 305 return digests 306 } 307 308 func fromChecksumAlgorithm(algorithm common.ChecksumAlgorithm) string { 309 return strings.ToLower(string(algorithm)) 310 } 311 312 func toFileMetadata(f *spdx.File) (meta file.Metadata) { 313 // FIXME Syft is currently lossy due to the SPDX 2.2.1 spec not supporting arbitrary mimetypes 314 for _, typ := range f.FileTypes { 315 switch FileType(typ) { 316 case ImageFileType: 317 meta.MIMEType = "image/" 318 case VideoFileType: 319 meta.MIMEType = "video/" 320 case ApplicationFileType: 321 meta.MIMEType = "application/" 322 case TextFileType: 323 meta.MIMEType = "text/" 324 case AudioFileType: 325 meta.MIMEType = "audio/" 326 case BinaryFileType: 327 case ArchiveFileType: 328 case OtherFileType: 329 } 330 } 331 return meta 332 } 333 334 func toSyftRelationships(spdxIDMap map[string]any, doc *spdx.Document) []artifact.Relationship { 335 var out []artifact.Relationship 336 for _, r := range doc.Relationships { 337 // FIXME what to do with r.RefA.DocumentRefID and r.RefA.SpecialID 338 if r.RefA.DocumentRefID != "" && requireAndTrimPrefix(r.RefA.DocumentRefID, "DocumentRef-") != string(doc.SPDXIdentifier) { 339 log.Debugf("ignoring relationship to external document: %+v", r) 340 continue 341 } 342 a := spdxIDMap[string(r.RefA.ElementRefID)] 343 b := spdxIDMap[string(r.RefB.ElementRefID)] 344 from, fromOk := a.(pkg.Package) 345 toPackage, toPackageOk := b.(pkg.Package) 346 toLocation, toLocationOk := b.(file.Location) 347 if !fromOk || !(toPackageOk || toLocationOk) { 348 log.Debugf("unable to find valid relationship mapping from SPDX, ignoring: (from: %+v) (to: %+v)", a, b) 349 continue 350 } 351 var to artifact.Identifiable 352 var typ artifact.RelationshipType 353 if toLocationOk { 354 switch RelationshipType(r.Relationship) { 355 case ContainsRelationship: 356 typ = artifact.ContainsRelationship 357 to = toLocation 358 case OtherRelationship: 359 // Encoding uses a specifically formatted comment... 360 if strings.Index(r.RelationshipComment, string(artifact.EvidentByRelationship)) == 0 { 361 typ = artifact.EvidentByRelationship 362 to = toLocation 363 } 364 } 365 } else { 366 switch RelationshipType(r.Relationship) { 367 case ContainsRelationship: 368 typ = artifact.ContainsRelationship 369 to = toPackage 370 case OtherRelationship: 371 // Encoding uses a specifically formatted comment... 372 if strings.Index(r.RelationshipComment, string(artifact.OwnershipByFileOverlapRelationship)) == 0 { 373 typ = artifact.OwnershipByFileOverlapRelationship 374 to = toPackage 375 } 376 } 377 } 378 if typ != "" && to != nil { 379 out = append(out, artifact.Relationship{ 380 From: from, 381 To: to, 382 Type: typ, 383 }) 384 } 385 } 386 return out 387 } 388 389 func toSyftCoordinates(f *spdx.File) file.Coordinates { 390 const layerIDPrefix = "layerID: " 391 var fileSystemID string 392 if strings.Index(f.FileComment, layerIDPrefix) == 0 { 393 fileSystemID = strings.TrimPrefix(f.FileComment, layerIDPrefix) 394 } 395 if strings.Index(string(f.FileSPDXIdentifier), layerIDPrefix) == 0 { 396 fileSystemID = strings.TrimPrefix(string(f.FileSPDXIdentifier), layerIDPrefix) 397 } 398 return file.Coordinates{ 399 RealPath: f.FileName, 400 FileSystemID: fileSystemID, 401 } 402 } 403 404 func toSyftLocation(f *spdx.File) file.Location { 405 l := file.NewVirtualLocationFromCoordinates(toSyftCoordinates(f), f.FileName) 406 return l 407 } 408 409 func requireAndTrimPrefix(val interface{}, prefix string) string { 410 if v, ok := val.(string); ok { 411 if i := strings.Index(v, prefix); i == 0 { 412 return strings.Replace(v, prefix, "", 1) 413 } 414 } 415 return "" 416 } 417 418 type pkgInfo struct { 419 purl packageurl.PackageURL 420 typ pkg.Type 421 lang pkg.Language 422 } 423 424 func (p *pkgInfo) qualifierValue(name string) string { 425 return findQualifierValue(p.purl, name) 426 } 427 428 func findQualifierValue(purl packageurl.PackageURL, qualifier string) string { 429 for _, q := range purl.Qualifiers { 430 if q.Key == qualifier { 431 return q.Value 432 } 433 } 434 return "" 435 } 436 437 func extractPkgInfo(p *spdx.Package) pkgInfo { 438 pu := findPURLValue(p) 439 purl, err := packageurl.FromString(pu) 440 if err != nil { 441 return pkgInfo{} 442 } 443 return pkgInfo{ 444 purl, 445 pkg.TypeByName(purl.Type), 446 pkg.LanguageByName(purl.Type), 447 } 448 } 449 450 func toSyftPackage(p *spdx.Package) pkg.Package { 451 info := extractPkgInfo(p) 452 metadataType, metadata := extractMetadata(p, info) 453 sP := &pkg.Package{ 454 Type: info.typ, 455 Name: p.PackageName, 456 Version: p.PackageVersion, 457 Licenses: pkg.NewLicenseSet(parseSPDXLicenses(p)...), 458 CPEs: extractCPEs(p), 459 PURL: purlValue(info.purl), 460 Language: info.lang, 461 MetadataType: metadataType, 462 Metadata: metadata, 463 } 464 465 sP.SetID() 466 467 return *sP 468 } 469 470 func purlValue(purl packageurl.PackageURL) string { 471 val := purl.String() 472 if _, err := packageurl.FromString(val); err != nil { 473 return "" 474 } 475 return val 476 } 477 478 func parseSPDXLicenses(p *spdx.Package) []pkg.License { 479 licenses := make([]pkg.License, 0) 480 481 // concluded 482 if p.PackageLicenseConcluded != NOASSERTION && p.PackageLicenseConcluded != NONE && p.PackageLicenseConcluded != "" { 483 l := pkg.NewLicense(cleanSPDXID(p.PackageLicenseConcluded)) 484 l.Type = license.Concluded 485 licenses = append(licenses, l) 486 } 487 488 // declared 489 if p.PackageLicenseDeclared != NOASSERTION && p.PackageLicenseDeclared != NONE && p.PackageLicenseDeclared != "" { 490 l := pkg.NewLicense(cleanSPDXID(p.PackageLicenseDeclared)) 491 l.Type = license.Declared 492 licenses = append(licenses, l) 493 } 494 495 return licenses 496 } 497 498 func cleanSPDXID(id string) string { 499 return strings.TrimPrefix(id, spdxlicense.LicenseRefPrefix) 500 } 501 502 //nolint:funlen 503 func extractMetadata(p *spdx.Package, info pkgInfo) (pkg.MetadataType, interface{}) { 504 arch := info.qualifierValue(pkg.PURLQualifierArch) 505 upstreamValue := info.qualifierValue(pkg.PURLQualifierUpstream) 506 upstream := strings.SplitN(upstreamValue, "@", 2) 507 upstreamName := upstream[0] 508 upstreamVersion := "" 509 if len(upstream) > 1 { 510 upstreamVersion = upstream[1] 511 } 512 supplier := "" 513 if p.PackageSupplier != nil { 514 supplier = p.PackageSupplier.Supplier 515 } 516 originator := "" 517 if p.PackageOriginator != nil { 518 originator = p.PackageOriginator.Originator 519 } 520 switch info.typ { 521 case pkg.ApkPkg: 522 return pkg.ApkMetadataType, pkg.ApkMetadata{ 523 Package: p.PackageName, 524 OriginPackage: upstreamName, 525 Maintainer: supplier, 526 Version: p.PackageVersion, 527 Architecture: arch, 528 URL: p.PackageHomePage, 529 Description: p.PackageDescription, 530 } 531 case pkg.RpmPkg: 532 converted, err := strconv.Atoi(info.qualifierValue(pkg.PURLQualifierEpoch)) 533 var epoch *int 534 if err != nil { 535 epoch = nil 536 } else { 537 epoch = &converted 538 } 539 return pkg.RpmMetadataType, pkg.RpmMetadata{ 540 Name: p.PackageName, 541 Version: p.PackageVersion, 542 Epoch: epoch, 543 Arch: arch, 544 SourceRpm: upstreamValue, 545 Vendor: originator, 546 } 547 case pkg.DebPkg: 548 return pkg.DpkgMetadataType, pkg.DpkgMetadata{ 549 Package: p.PackageName, 550 Source: upstreamName, 551 Version: p.PackageVersion, 552 SourceVersion: upstreamVersion, 553 Architecture: arch, 554 Maintainer: originator, 555 } 556 case pkg.JavaPkg: 557 var digests []file.Digest 558 for _, value := range p.PackageChecksums { 559 digests = append(digests, file.Digest{Algorithm: fromChecksumAlgorithm(value.Algorithm), Value: value.Value}) 560 } 561 return pkg.JavaMetadataType, pkg.JavaMetadata{ 562 ArchiveDigests: digests, 563 } 564 case pkg.GoModulePkg: 565 var h1Digest string 566 for _, value := range p.PackageChecksums { 567 digest, err := util.HDigestFromSHA(fromChecksumAlgorithm(value.Algorithm), value.Value) 568 if err != nil { 569 log.Debugf("invalid h1digest: %v %v", value, err) 570 continue 571 } 572 h1Digest = digest 573 break 574 } 575 return pkg.GolangBinMetadataType, pkg.GolangBinMetadata{ 576 H1Digest: h1Digest, 577 } 578 } 579 return pkg.UnknownMetadataType, nil 580 } 581 582 func findPURLValue(p *spdx.Package) string { 583 for _, r := range p.PackageExternalReferences { 584 if r.RefType == string(PurlExternalRefType) { 585 return r.Locator 586 } 587 } 588 return "" 589 } 590 591 func extractCPEs(p *spdx.Package) (cpes []cpe.CPE) { 592 for _, r := range p.PackageExternalReferences { 593 if r.RefType == string(Cpe23ExternalRefType) { 594 c, err := cpe.New(r.Locator) 595 if err != nil { 596 log.Warnf("unable to extract SPDX CPE=%q: %+v", r.Locator, err) 597 continue 598 } 599 cpes = append(cpes, c) 600 } 601 } 602 return cpes 603 }