github.com/google/osv-scalibr@v0.4.1/extractor/filesystem/sbom/spdx/spdx.go (about) 1 // Copyright 2025 Google LLC 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package spdx extracts software dependencies from an SPDX SBOM. 16 package spdx 17 18 import ( 19 "context" 20 "errors" 21 "io" 22 "path/filepath" 23 "strings" 24 25 "github.com/google/osv-scalibr/extractor" 26 "github.com/google/osv-scalibr/extractor/filesystem" 27 spdxmeta "github.com/google/osv-scalibr/extractor/filesystem/sbom/spdx/metadata" 28 "github.com/google/osv-scalibr/inventory" 29 "github.com/google/osv-scalibr/log" 30 "github.com/google/osv-scalibr/plugin" 31 "github.com/google/osv-scalibr/purl" 32 "github.com/spdx/tools-golang/json" 33 "github.com/spdx/tools-golang/rdf" 34 "github.com/spdx/tools-golang/spdx" 35 "github.com/spdx/tools-golang/tagvalue" 36 "github.com/spdx/tools-golang/yaml" 37 ) 38 39 const ( 40 // Name is the unique name of this extractor. 41 Name = "sbom/spdx" 42 ) 43 44 // Extractor extracts software dependencies from an spdx SBOM. 45 type Extractor struct{} 46 47 // New returns a new instance of the extractor. 48 func New() filesystem.Extractor { return &Extractor{} } 49 50 // Name of the extractor. 51 func (e Extractor) Name() string { return Name } 52 53 // Version of the extractor. 54 func (e Extractor) Version() int { return 0 } 55 56 // Requirements of the extractor. 57 func (e Extractor) Requirements() *plugin.Capabilities { return &plugin.Capabilities{} } 58 59 type extractFunc = func(io.Reader) (*spdx.Document, error) 60 61 // Format support based on https://spdx.dev/resources/use/#documents 62 var extensionHandlers = map[string]extractFunc{ 63 ".spdx.json": json.Read, 64 ".spdx": tagvalue.Read, 65 ".spdx.yml": yaml.Read, 66 ".spdx.rdf": rdf.Read, 67 ".spdx.rdf.xml": rdf.Read, 68 // No support for .xsl files because those are too ambiguous and could be many other things. 69 } 70 71 // FileRequired returns true if the specified file is a supported spdx file. 72 func (e Extractor) FileRequired(api filesystem.FileAPI) bool { 73 _, isSupported := findExtractor(api.Path()) 74 return isSupported 75 } 76 77 // Extract parses the SPDX SBOM and returns a list purls from the SBOM. 78 func (e Extractor) Extract(ctx context.Context, input *filesystem.ScanInput) (inventory.Inventory, error) { 79 var parseSbom, isSupported = findExtractor(input.Path) 80 81 if !isSupported { 82 return inventory.Inventory{}, errors.New("sbom/spdx extractor: Invalid file format, only JSON, YAML, RDF, and TagValue are supported") 83 } 84 85 spdxDoc, err := parseSbom(input.Reader) 86 87 if err != nil { 88 return inventory.Inventory{}, err 89 } 90 91 pkgs := e.convertSpdxDocToPackage(spdxDoc, input.Path) 92 return inventory.Inventory{Packages: pkgs}, nil 93 } 94 95 func findExtractor(path string) (extractFunc, bool) { 96 // For Windows 97 path = filepath.ToSlash(path) 98 99 for key := range extensionHandlers { 100 if hasFileExtension(path, key) { 101 return extensionHandlers[key], true 102 } 103 } 104 105 return nil, false 106 } 107 108 func (e Extractor) convertSpdxDocToPackage(spdxDoc *spdx.Document, path string) []*extractor.Package { 109 results := []*extractor.Package{} 110 111 for _, spdxPkg := range spdxDoc.Packages { 112 pkg := &extractor.Package{ 113 Locations: []string{path}, 114 Metadata: &spdxmeta.Metadata{}, 115 } 116 m := pkg.Metadata.(*spdxmeta.Metadata) 117 for _, extRef := range spdxPkg.PackageExternalReferences { 118 // TODO(b/280991231): Support all RefTypes 119 if extRef.RefType == "cpe23Type" || extRef.RefType == "http://spdx.org/rdf/references/cpe23Type" { 120 m.CPEs = append(m.CPEs, extRef.Locator) 121 if len(pkg.Name) == 0 { 122 pkg.Name = extRef.Locator 123 } 124 } else if extRef.RefType == "purl" || extRef.RefType == "http://spdx.org/rdf/references/purl" { 125 if m.PURL != nil { 126 log.Warnf("Multiple PURLs found for same package: %q and %q", m.PURL, extRef.Locator) 127 } 128 packageURL, err := purl.FromString(extRef.Locator) 129 pkg.Name = packageURL.Name 130 if err != nil { 131 log.Warnf("Invalid PURL %q for package: %q", extRef.Locator, spdxPkg.PackageName) 132 } else { 133 m.PURL = &packageURL 134 pkg.PURLType = packageURL.Type 135 } 136 } 137 } 138 pkg.Metadata = m 139 if m.PURL == nil && len(m.CPEs) == 0 { 140 log.Warnf("Neither CPE nor PURL found for package: %+v", spdxPkg) 141 continue 142 } 143 results = append(results, pkg) 144 } 145 146 return results 147 } 148 149 func hasFileExtension(path string, extension string) bool { 150 return strings.HasSuffix(strings.ToLower(path), extension) 151 }