github.com/google/osv-scalibr@v0.4.1/extractor/filesystem/sbom/cdx/cdx.go (about)

     1  // Copyright 2025 Google LLC
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Package cdx extracts software dependencies from an CycloneDX SBOM.
    16  package cdx
    17  
    18  import (
    19  	"context"
    20  	"errors"
    21  	"io"
    22  	"path/filepath"
    23  	"strings"
    24  
    25  	"github.com/CycloneDX/cyclonedx-go"
    26  	"github.com/google/osv-scalibr/extractor"
    27  	"github.com/google/osv-scalibr/extractor/filesystem"
    28  	cdxmeta "github.com/google/osv-scalibr/extractor/filesystem/sbom/cdx/metadata"
    29  	"github.com/google/osv-scalibr/inventory"
    30  	"github.com/google/osv-scalibr/log"
    31  	"github.com/google/osv-scalibr/plugin"
    32  	"github.com/google/osv-scalibr/purl"
    33  )
    34  
    35  const (
    36  	// Name is the unique name of this extractor.
    37  	Name = "sbom/cdx"
    38  )
    39  
    40  // Extractor extracts software dependencies from an CycloneDX SBOM.
    41  type Extractor struct{}
    42  
    43  // New returns a new instance of the extractor.
    44  func New() filesystem.Extractor { return &Extractor{} }
    45  
    46  // Name of the extractor.
    47  func (e Extractor) Name() string { return Name }
    48  
    49  // Version of the extractor.
    50  func (e Extractor) Version() int { return 0 }
    51  
    52  // Requirements of the extractor.
    53  func (e Extractor) Requirements() *plugin.Capabilities { return &plugin.Capabilities{} }
    54  
    55  type extractFunc = func(io.Reader) (cyclonedx.BOM, error)
    56  
    57  // https://cyclonedx.org/specification/overview/#recognized-file-patterns
    58  var cdxExtensions = map[string]cyclonedx.BOMFileFormat{
    59  	".cdx.json": cyclonedx.BOMFileFormatJSON,
    60  	".cdx.xml":  cyclonedx.BOMFileFormatXML,
    61  }
    62  
    63  var cdxNames = map[string]cyclonedx.BOMFileFormat{
    64  	"bom.json": cyclonedx.BOMFileFormatJSON,
    65  	"bom.xml":  cyclonedx.BOMFileFormatXML,
    66  }
    67  
    68  // FileRequired returns true if the specified file is a supported cdx file.
    69  func (e Extractor) FileRequired(api filesystem.FileAPI) bool {
    70  	return findExtractor(api.Path()) != nil
    71  }
    72  
    73  // Extract parses the CycloneDX SBOM and returns a list purls from the SBOM.
    74  func (e Extractor) Extract(ctx context.Context, input *filesystem.ScanInput) (inventory.Inventory, error) {
    75  	var cdxExtractor = findExtractor(input.Path)
    76  
    77  	if cdxExtractor == nil {
    78  		return inventory.Inventory{}, errors.New("sbom/cdx extractor: Invalid file format, only JSON and XML are supported")
    79  	}
    80  
    81  	cdxBOM, err := cdxExtractor(input.Reader)
    82  	if err != nil {
    83  		return inventory.Inventory{}, err
    84  	}
    85  
    86  	pkgs := e.convertCdxBomToPackage(&cdxBOM, input.Path)
    87  	return inventory.Inventory{Packages: pkgs}, nil
    88  }
    89  
    90  func findExtractor(path string) extractFunc {
    91  	// For Windows
    92  	path = filepath.ToSlash(path)
    93  
    94  	for ext, format := range cdxExtensions {
    95  		if hasFileExtension(path, ext) {
    96  			return func(rdr io.Reader) (cyclonedx.BOM, error) {
    97  				var cdxBOM cyclonedx.BOM
    98  				return cdxBOM, cyclonedx.NewBOMDecoder(rdr, format).Decode(&cdxBOM)
    99  			}
   100  		}
   101  	}
   102  
   103  	for name, format := range cdxNames {
   104  		if strings.ToLower(filepath.Base(path)) == name {
   105  			return func(rdr io.Reader) (cyclonedx.BOM, error) {
   106  				var cdxBOM cyclonedx.BOM
   107  				return cdxBOM, cyclonedx.NewBOMDecoder(rdr, format).Decode(&cdxBOM)
   108  			}
   109  		}
   110  	}
   111  
   112  	return nil
   113  }
   114  
   115  func enumerateComponents(components []cyclonedx.Component, results *[]*extractor.Package) {
   116  	for _, cdxPkg := range components {
   117  		inv := convertComponentToInventory(cdxPkg)
   118  		if inv != nil {
   119  			*results = append(*results, inv)
   120  		}
   121  		if cdxPkg.Components != nil {
   122  			enumerateComponents(*cdxPkg.Components, results)
   123  		}
   124  	}
   125  }
   126  
   127  func (e Extractor) convertCdxBomToPackage(cdxBom *cyclonedx.BOM, path string) []*extractor.Package {
   128  	results := []*extractor.Package{}
   129  
   130  	if cdxBom == nil || cdxBom.Components == nil {
   131  		return results
   132  	}
   133  
   134  	enumerateComponents(*cdxBom.Components, &results)
   135  
   136  	for p := range results {
   137  		results[p].Locations = []string{path}
   138  	}
   139  
   140  	return results
   141  }
   142  
   143  func convertComponentToInventory(cdxPkg cyclonedx.Component) *extractor.Package {
   144  	pkg := &extractor.Package{
   145  		Metadata: &cdxmeta.Metadata{},
   146  	}
   147  	m := pkg.Metadata.(*cdxmeta.Metadata)
   148  	pkg.Name = cdxPkg.Name
   149  	pkg.Version = cdxPkg.Version
   150  	if cdxPkg.CPE != "" {
   151  		m.CPEs = append(m.CPEs, cdxPkg.CPE)
   152  	}
   153  	if cdxPkg.PackageURL != "" {
   154  		packageURL, err := purl.FromString(cdxPkg.PackageURL)
   155  		if err != nil {
   156  			log.Warnf("Invalid PURL %q for package ref: %q", cdxPkg.PackageURL, cdxPkg.BOMRef)
   157  		} else {
   158  			m.PURL = &packageURL
   159  			pkg.PURLType = packageURL.Type
   160  			if pkg.Name == "" {
   161  				pkg.Name = packageURL.Name
   162  			}
   163  			if pkg.Version == "" {
   164  				pkg.Version = packageURL.Version
   165  			}
   166  		}
   167  	}
   168  	if cdxPkg.Evidence != nil && cdxPkg.Evidence.Occurrences != nil {
   169  		for _, occ := range *cdxPkg.Evidence.Occurrences {
   170  			if occ.Location != "" {
   171  				m.CDXLocations = append(m.CDXLocations, occ.Location)
   172  			}
   173  		}
   174  	}
   175  	pkg.Metadata = m
   176  	if m.PURL == nil && len(m.CPEs) == 0 {
   177  		log.Warnf("Neither CPE nor PURL found for package: %+v", cdxPkg)
   178  		return nil
   179  	}
   180  
   181  	return pkg
   182  }
   183  
   184  func hasFileExtension(path string, extension string) bool {
   185  	return strings.HasSuffix(strings.ToLower(path), extension)
   186  }