github.com/google/osv-scalibr@v0.4.1/extractor/filesystem/sbom/cdx/cdx.go (about) 1 // Copyright 2025 Google LLC 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package cdx extracts software dependencies from an CycloneDX SBOM. 16 package cdx 17 18 import ( 19 "context" 20 "errors" 21 "io" 22 "path/filepath" 23 "strings" 24 25 "github.com/CycloneDX/cyclonedx-go" 26 "github.com/google/osv-scalibr/extractor" 27 "github.com/google/osv-scalibr/extractor/filesystem" 28 cdxmeta "github.com/google/osv-scalibr/extractor/filesystem/sbom/cdx/metadata" 29 "github.com/google/osv-scalibr/inventory" 30 "github.com/google/osv-scalibr/log" 31 "github.com/google/osv-scalibr/plugin" 32 "github.com/google/osv-scalibr/purl" 33 ) 34 35 const ( 36 // Name is the unique name of this extractor. 37 Name = "sbom/cdx" 38 ) 39 40 // Extractor extracts software dependencies from an CycloneDX SBOM. 41 type Extractor struct{} 42 43 // New returns a new instance of the extractor. 44 func New() filesystem.Extractor { return &Extractor{} } 45 46 // Name of the extractor. 47 func (e Extractor) Name() string { return Name } 48 49 // Version of the extractor. 50 func (e Extractor) Version() int { return 0 } 51 52 // Requirements of the extractor. 53 func (e Extractor) Requirements() *plugin.Capabilities { return &plugin.Capabilities{} } 54 55 type extractFunc = func(io.Reader) (cyclonedx.BOM, error) 56 57 // https://cyclonedx.org/specification/overview/#recognized-file-patterns 58 var cdxExtensions = map[string]cyclonedx.BOMFileFormat{ 59 ".cdx.json": cyclonedx.BOMFileFormatJSON, 60 ".cdx.xml": cyclonedx.BOMFileFormatXML, 61 } 62 63 var cdxNames = map[string]cyclonedx.BOMFileFormat{ 64 "bom.json": cyclonedx.BOMFileFormatJSON, 65 "bom.xml": cyclonedx.BOMFileFormatXML, 66 } 67 68 // FileRequired returns true if the specified file is a supported cdx file. 69 func (e Extractor) FileRequired(api filesystem.FileAPI) bool { 70 return findExtractor(api.Path()) != nil 71 } 72 73 // Extract parses the CycloneDX SBOM and returns a list purls from the SBOM. 74 func (e Extractor) Extract(ctx context.Context, input *filesystem.ScanInput) (inventory.Inventory, error) { 75 var cdxExtractor = findExtractor(input.Path) 76 77 if cdxExtractor == nil { 78 return inventory.Inventory{}, errors.New("sbom/cdx extractor: Invalid file format, only JSON and XML are supported") 79 } 80 81 cdxBOM, err := cdxExtractor(input.Reader) 82 if err != nil { 83 return inventory.Inventory{}, err 84 } 85 86 pkgs := e.convertCdxBomToPackage(&cdxBOM, input.Path) 87 return inventory.Inventory{Packages: pkgs}, nil 88 } 89 90 func findExtractor(path string) extractFunc { 91 // For Windows 92 path = filepath.ToSlash(path) 93 94 for ext, format := range cdxExtensions { 95 if hasFileExtension(path, ext) { 96 return func(rdr io.Reader) (cyclonedx.BOM, error) { 97 var cdxBOM cyclonedx.BOM 98 return cdxBOM, cyclonedx.NewBOMDecoder(rdr, format).Decode(&cdxBOM) 99 } 100 } 101 } 102 103 for name, format := range cdxNames { 104 if strings.ToLower(filepath.Base(path)) == name { 105 return func(rdr io.Reader) (cyclonedx.BOM, error) { 106 var cdxBOM cyclonedx.BOM 107 return cdxBOM, cyclonedx.NewBOMDecoder(rdr, format).Decode(&cdxBOM) 108 } 109 } 110 } 111 112 return nil 113 } 114 115 func enumerateComponents(components []cyclonedx.Component, results *[]*extractor.Package) { 116 for _, cdxPkg := range components { 117 inv := convertComponentToInventory(cdxPkg) 118 if inv != nil { 119 *results = append(*results, inv) 120 } 121 if cdxPkg.Components != nil { 122 enumerateComponents(*cdxPkg.Components, results) 123 } 124 } 125 } 126 127 func (e Extractor) convertCdxBomToPackage(cdxBom *cyclonedx.BOM, path string) []*extractor.Package { 128 results := []*extractor.Package{} 129 130 if cdxBom == nil || cdxBom.Components == nil { 131 return results 132 } 133 134 enumerateComponents(*cdxBom.Components, &results) 135 136 for p := range results { 137 results[p].Locations = []string{path} 138 } 139 140 return results 141 } 142 143 func convertComponentToInventory(cdxPkg cyclonedx.Component) *extractor.Package { 144 pkg := &extractor.Package{ 145 Metadata: &cdxmeta.Metadata{}, 146 } 147 m := pkg.Metadata.(*cdxmeta.Metadata) 148 pkg.Name = cdxPkg.Name 149 pkg.Version = cdxPkg.Version 150 if cdxPkg.CPE != "" { 151 m.CPEs = append(m.CPEs, cdxPkg.CPE) 152 } 153 if cdxPkg.PackageURL != "" { 154 packageURL, err := purl.FromString(cdxPkg.PackageURL) 155 if err != nil { 156 log.Warnf("Invalid PURL %q for package ref: %q", cdxPkg.PackageURL, cdxPkg.BOMRef) 157 } else { 158 m.PURL = &packageURL 159 pkg.PURLType = packageURL.Type 160 if pkg.Name == "" { 161 pkg.Name = packageURL.Name 162 } 163 if pkg.Version == "" { 164 pkg.Version = packageURL.Version 165 } 166 } 167 } 168 if cdxPkg.Evidence != nil && cdxPkg.Evidence.Occurrences != nil { 169 for _, occ := range *cdxPkg.Evidence.Occurrences { 170 if occ.Location != "" { 171 m.CDXLocations = append(m.CDXLocations, occ.Location) 172 } 173 } 174 } 175 pkg.Metadata = m 176 if m.PURL == nil && len(m.CPEs) == 0 { 177 log.Warnf("Neither CPE nor PURL found for package: %+v", cdxPkg) 178 return nil 179 } 180 181 return pkg 182 } 183 184 func hasFileExtension(path string, extension string) bool { 185 return strings.HasSuffix(strings.ToLower(path), extension) 186 }