github.com/nextlinux/gosbom@v0.81.1-0.20230627115839-1ff50c281391/gosbom/formats/common/spdxhelpers/to_gosbom_model.go (about) 1 package spdxhelpers 2 3 import ( 4 "errors" 5 "net/url" 6 "strconv" 7 "strings" 8 9 "github.com/nextlinux/gosbom/gosbom/artifact" 10 "github.com/nextlinux/gosbom/gosbom/cpe" 11 "github.com/nextlinux/gosbom/gosbom/file" 12 "github.com/nextlinux/gosbom/gosbom/formats/common/util" 13 "github.com/nextlinux/gosbom/gosbom/license" 14 "github.com/nextlinux/gosbom/gosbom/linux" 15 "github.com/nextlinux/gosbom/gosbom/pkg" 16 "github.com/nextlinux/gosbom/gosbom/sbom" 17 "github.com/nextlinux/gosbom/gosbom/source" 18 "github.com/nextlinux/gosbom/internal/log" 19 "github.com/spdx/tools-golang/spdx" 20 21 "github.com/anchore/packageurl-go" 22 ) 23 24 func ToGosbomModel(doc *spdx.Document) (*sbom.SBOM, error) { 25 if doc == nil { 26 return nil, errors.New("cannot convert SPDX document to Gosbom model because document is nil") 27 } 28 29 spdxIDMap := make(map[string]interface{}) 30 31 src := source.Metadata{Scheme: source.UnknownScheme} 32 src.Scheme = extractSchemeFromNamespace(doc.DocumentNamespace) 33 34 s := &sbom.SBOM{ 35 Source: src, 36 Artifacts: sbom.Artifacts{ 37 Packages: pkg.NewCollection(), 38 FileMetadata: map[file.Coordinates]file.Metadata{}, 39 FileDigests: map[file.Coordinates][]file.Digest{}, 40 LinuxDistribution: findLinuxReleaseByPURL(doc), 41 }, 42 } 43 44 collectGosbomPackages(s, spdxIDMap, doc) 45 46 collectGosbomFiles(s, spdxIDMap, doc) 47 48 s.Relationships = toGosbomRelationships(spdxIDMap, doc) 49 50 return s, nil 51 } 52 53 // NOTE(jonas): SPDX doesn't inform what an SBOM is about, 54 // image, directory, for example. This is our best effort to determine 55 // the scheme. Gosbom-generated SBOMs have in the namespace 56 // field a type encoded, which we try to identify here. 57 func extractSchemeFromNamespace(ns string) source.Scheme { 58 u, err := url.Parse(ns) 59 if err != nil { 60 return source.UnknownScheme 61 } 62 63 parts := strings.Split(u.Path, "/") 64 for _, p := range parts { 65 switch p { 66 case inputFile: 67 return source.FileScheme 68 case inputImage: 69 return source.ImageScheme 70 case inputDirectory: 71 return source.DirectoryScheme 72 } 73 } 74 return source.UnknownScheme 75 } 76 77 func findLinuxReleaseByPURL(doc *spdx.Document) *linux.Release { 78 for _, p := range doc.Packages { 79 purlValue := findPURLValue(p) 80 if purlValue == "" { 81 continue 82 } 83 purl, err := packageurl.FromString(purlValue) 84 if err != nil { 85 log.Warnf("unable to parse purl: %s", purlValue) 86 continue 87 } 88 distro := findQualifierValue(purl, pkg.PURLQualifierDistro) 89 if distro != "" { 90 parts := strings.Split(distro, "-") 91 name := parts[0] 92 version := "" 93 if len(parts) > 1 { 94 version = parts[1] 95 } 96 return &linux.Release{ 97 PrettyName: name, 98 Name: name, 99 ID: name, 100 IDLike: []string{name}, 101 Version: version, 102 VersionID: version, 103 } 104 } 105 } 106 107 return nil 108 } 109 110 func collectGosbomPackages(s *sbom.SBOM, spdxIDMap map[string]interface{}, doc *spdx.Document) { 111 for _, p := range doc.Packages { 112 gosbomPkg := toGosbomPackage(p) 113 spdxIDMap[string(p.PackageSPDXIdentifier)] = gosbomPkg 114 s.Artifacts.Packages.Add(*gosbomPkg) 115 } 116 } 117 118 func collectGosbomFiles(s *sbom.SBOM, spdxIDMap map[string]interface{}, doc *spdx.Document) { 119 for _, f := range doc.Files { 120 l := toGosbomLocation(f) 121 spdxIDMap[string(f.FileSPDXIdentifier)] = l 122 123 s.Artifacts.FileMetadata[l.Coordinates] = toFileMetadata(f) 124 s.Artifacts.FileDigests[l.Coordinates] = toFileDigests(f) 125 } 126 } 127 128 func toFileDigests(f *spdx.File) (digests []file.Digest) { 129 for _, digest := range f.Checksums { 130 digests = append(digests, file.Digest{ 131 Algorithm: string(digest.Algorithm), 132 Value: digest.Value, 133 }) 134 } 135 return digests 136 } 137 138 func toFileMetadata(f *spdx.File) (meta file.Metadata) { 139 // FIXME Gosbom is currently lossy due to the SPDX 2.2.1 spec not supporting arbitrary mimetypes 140 for _, typ := range f.FileTypes { 141 switch FileType(typ) { 142 case ImageFileType: 143 meta.MIMEType = "image/" 144 case VideoFileType: 145 meta.MIMEType = "video/" 146 case ApplicationFileType: 147 meta.MIMEType = "application/" 148 case TextFileType: 149 meta.MIMEType = "text/" 150 case AudioFileType: 151 meta.MIMEType = "audio/" 152 case BinaryFileType: 153 case ArchiveFileType: 154 case OtherFileType: 155 } 156 } 157 return meta 158 } 159 160 func toGosbomRelationships(spdxIDMap map[string]interface{}, doc *spdx.Document) []artifact.Relationship { 161 var out []artifact.Relationship 162 for _, r := range doc.Relationships { 163 // FIXME what to do with r.RefA.DocumentRefID and r.RefA.SpecialID 164 if r.RefA.DocumentRefID != "" && requireAndTrimPrefix(r.RefA.DocumentRefID, "DocumentRef-") != string(doc.SPDXIdentifier) { 165 log.Debugf("ignoring relationship to external document: %+v", r) 166 continue 167 } 168 a := spdxIDMap[string(r.RefA.ElementRefID)] 169 b := spdxIDMap[string(r.RefB.ElementRefID)] 170 from, fromOk := a.(*pkg.Package) 171 toPackage, toPackageOk := b.(*pkg.Package) 172 toLocation, toLocationOk := b.(*file.Location) 173 if !fromOk || !(toPackageOk || toLocationOk) { 174 log.Debugf("unable to find valid relationship mapping from SPDX 2.2 JSON, ignoring: (from: %+v) (to: %+v)", a, b) 175 continue 176 } 177 var to artifact.Identifiable 178 var typ artifact.RelationshipType 179 if toLocationOk { 180 switch RelationshipType(r.Relationship) { 181 case ContainsRelationship: 182 typ = artifact.ContainsRelationship 183 to = toLocation 184 case OtherRelationship: 185 // Encoding uses a specifically formatted comment... 186 if strings.Index(r.RelationshipComment, string(artifact.EvidentByRelationship)) == 0 { 187 typ = artifact.EvidentByRelationship 188 to = toLocation 189 } 190 } 191 } else { 192 switch RelationshipType(r.Relationship) { 193 case ContainsRelationship: 194 typ = artifact.ContainsRelationship 195 to = toPackage 196 case OtherRelationship: 197 // Encoding uses a specifically formatted comment... 198 if strings.Index(r.RelationshipComment, string(artifact.OwnershipByFileOverlapRelationship)) == 0 { 199 typ = artifact.OwnershipByFileOverlapRelationship 200 to = toPackage 201 } 202 } 203 } 204 if typ != "" && to != nil { 205 out = append(out, artifact.Relationship{ 206 From: from, 207 To: to, 208 Type: typ, 209 }) 210 } 211 } 212 return out 213 } 214 215 func toGosbomCoordinates(f *spdx.File) file.Coordinates { 216 const layerIDPrefix = "layerID: " 217 var fileSystemID string 218 if strings.Index(f.FileComment, layerIDPrefix) == 0 { 219 fileSystemID = strings.TrimPrefix(f.FileComment, layerIDPrefix) 220 } 221 if strings.Index(string(f.FileSPDXIdentifier), layerIDPrefix) == 0 { 222 fileSystemID = strings.TrimPrefix(string(f.FileSPDXIdentifier), layerIDPrefix) 223 } 224 return file.Coordinates{ 225 RealPath: f.FileName, 226 FileSystemID: fileSystemID, 227 } 228 } 229 230 func toGosbomLocation(f *spdx.File) *file.Location { 231 l := file.NewVirtualLocationFromCoordinates(toGosbomCoordinates(f), f.FileName) 232 return &l 233 } 234 235 func requireAndTrimPrefix(val interface{}, prefix string) string { 236 if v, ok := val.(string); ok { 237 if i := strings.Index(v, prefix); i == 0 { 238 return strings.Replace(v, prefix, "", 1) 239 } 240 } 241 return "" 242 } 243 244 type pkgInfo struct { 245 purl packageurl.PackageURL 246 typ pkg.Type 247 lang pkg.Language 248 } 249 250 func (p *pkgInfo) qualifierValue(name string) string { 251 return findQualifierValue(p.purl, name) 252 } 253 254 func findQualifierValue(purl packageurl.PackageURL, qualifier string) string { 255 for _, q := range purl.Qualifiers { 256 if q.Key == qualifier { 257 return q.Value 258 } 259 } 260 return "" 261 } 262 263 func extractPkgInfo(p *spdx.Package) pkgInfo { 264 pu := findPURLValue(p) 265 purl, err := packageurl.FromString(pu) 266 if err != nil { 267 return pkgInfo{} 268 } 269 return pkgInfo{ 270 purl, 271 pkg.TypeByName(purl.Type), 272 pkg.LanguageByName(purl.Type), 273 } 274 } 275 276 func toGosbomPackage(p *spdx.Package) *pkg.Package { 277 info := extractPkgInfo(p) 278 metadataType, metadata := extractMetadata(p, info) 279 sP := pkg.Package{ 280 Type: info.typ, 281 Name: p.PackageName, 282 Version: p.PackageVersion, 283 Licenses: pkg.NewLicenseSet(parseSPDXLicenses(p)...), 284 CPEs: extractCPEs(p), 285 PURL: info.purl.String(), 286 Language: info.lang, 287 MetadataType: metadataType, 288 Metadata: metadata, 289 } 290 291 sP.SetID() 292 293 return &sP 294 } 295 296 func parseSPDXLicenses(p *spdx.Package) []pkg.License { 297 licenses := make([]pkg.License, 0) 298 299 // concluded 300 if p.PackageLicenseConcluded != NOASSERTION && p.PackageLicenseConcluded != NONE && p.PackageLicenseConcluded != "" { 301 l := pkg.NewLicense(cleanSPDXID(p.PackageLicenseConcluded)) 302 l.Type = license.Concluded 303 licenses = append(licenses, l) 304 } 305 306 // declared 307 if p.PackageLicenseDeclared != NOASSERTION && p.PackageLicenseDeclared != NONE && p.PackageLicenseDeclared != "" { 308 l := pkg.NewLicense(cleanSPDXID(p.PackageLicenseDeclared)) 309 l.Type = license.Declared 310 licenses = append(licenses, l) 311 } 312 313 return licenses 314 } 315 316 func cleanSPDXID(id string) string { 317 if strings.HasPrefix(id, "LicenseRef-") { 318 return strings.TrimPrefix(id, "LicenseRef-") 319 } 320 return id 321 } 322 323 //nolint:funlen 324 func extractMetadata(p *spdx.Package, info pkgInfo) (pkg.MetadataType, interface{}) { 325 arch := info.qualifierValue(pkg.PURLQualifierArch) 326 upstreamValue := info.qualifierValue(pkg.PURLQualifierUpstream) 327 upstream := strings.SplitN(upstreamValue, "@", 2) 328 upstreamName := upstream[0] 329 upstreamVersion := "" 330 if len(upstream) > 1 { 331 upstreamVersion = upstream[1] 332 } 333 supplier := "" 334 if p.PackageSupplier != nil { 335 supplier = p.PackageSupplier.Supplier 336 } 337 originator := "" 338 if p.PackageOriginator != nil { 339 originator = p.PackageOriginator.Originator 340 } 341 switch info.typ { 342 case pkg.ApkPkg: 343 return pkg.ApkMetadataType, pkg.ApkMetadata{ 344 Package: p.PackageName, 345 OriginPackage: upstreamName, 346 Maintainer: supplier, 347 Version: p.PackageVersion, 348 Architecture: arch, 349 URL: p.PackageHomePage, 350 Description: p.PackageDescription, 351 } 352 case pkg.RpmPkg: 353 converted, err := strconv.Atoi(info.qualifierValue(pkg.PURLQualifierEpoch)) 354 var epoch *int 355 if err != nil { 356 epoch = nil 357 } else { 358 epoch = &converted 359 } 360 return pkg.RpmMetadataType, pkg.RpmMetadata{ 361 Name: p.PackageName, 362 Version: p.PackageVersion, 363 Epoch: epoch, 364 Arch: arch, 365 SourceRpm: upstreamValue, 366 Vendor: originator, 367 } 368 case pkg.DebPkg: 369 return pkg.DpkgMetadataType, pkg.DpkgMetadata{ 370 Package: p.PackageName, 371 Source: upstreamName, 372 Version: p.PackageVersion, 373 SourceVersion: upstreamVersion, 374 Architecture: arch, 375 Maintainer: originator, 376 } 377 case pkg.JavaPkg: 378 var digests []file.Digest 379 for _, value := range p.PackageChecksums { 380 digests = append(digests, file.Digest{Algorithm: string(value.Algorithm), Value: value.Value}) 381 } 382 return pkg.JavaMetadataType, pkg.JavaMetadata{ 383 ArchiveDigests: digests, 384 } 385 case pkg.GoModulePkg: 386 var h1Digest string 387 for _, value := range p.PackageChecksums { 388 digest, err := util.HDigestFromSHA(string(value.Algorithm), value.Value) 389 if err != nil { 390 log.Debugf("invalid h1digest: %v %v", value, err) 391 continue 392 } 393 h1Digest = digest 394 break 395 } 396 return pkg.GolangBinMetadataType, pkg.GolangBinMetadata{ 397 H1Digest: h1Digest, 398 } 399 } 400 return pkg.UnknownMetadataType, nil 401 } 402 403 func findPURLValue(p *spdx.Package) string { 404 for _, r := range p.PackageExternalReferences { 405 if r.RefType == string(PurlExternalRefType) { 406 return r.Locator 407 } 408 } 409 return "" 410 } 411 412 func extractCPEs(p *spdx.Package) (cpes []cpe.CPE) { 413 for _, r := range p.PackageExternalReferences { 414 if r.RefType == string(Cpe23ExternalRefType) { 415 c, err := cpe.New(r.Locator) 416 if err != nil { 417 log.Warnf("unable to extract SPDX CPE=%q: %+v", r.Locator, err) 418 continue 419 } 420 cpes = append(cpes, c) 421 } 422 } 423 return cpes 424 }