github.com/google/osv-scalibr@v0.4.1/extractor/filesystem/sbom/spdx/spdx.go (about)

     1  // Copyright 2025 Google LLC
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Package spdx extracts software dependencies from an SPDX SBOM.
    16  package spdx
    17  
    18  import (
    19  	"context"
    20  	"errors"
    21  	"io"
    22  	"path/filepath"
    23  	"strings"
    24  
    25  	"github.com/google/osv-scalibr/extractor"
    26  	"github.com/google/osv-scalibr/extractor/filesystem"
    27  	spdxmeta "github.com/google/osv-scalibr/extractor/filesystem/sbom/spdx/metadata"
    28  	"github.com/google/osv-scalibr/inventory"
    29  	"github.com/google/osv-scalibr/log"
    30  	"github.com/google/osv-scalibr/plugin"
    31  	"github.com/google/osv-scalibr/purl"
    32  	"github.com/spdx/tools-golang/json"
    33  	"github.com/spdx/tools-golang/rdf"
    34  	"github.com/spdx/tools-golang/spdx"
    35  	"github.com/spdx/tools-golang/tagvalue"
    36  	"github.com/spdx/tools-golang/yaml"
    37  )
    38  
    39  const (
    40  	// Name is the unique name of this extractor.
    41  	Name = "sbom/spdx"
    42  )
    43  
    44  // Extractor extracts software dependencies from an spdx SBOM.
    45  type Extractor struct{}
    46  
    47  // New returns a new instance of the extractor.
    48  func New() filesystem.Extractor { return &Extractor{} }
    49  
    50  // Name of the extractor.
    51  func (e Extractor) Name() string { return Name }
    52  
    53  // Version of the extractor.
    54  func (e Extractor) Version() int { return 0 }
    55  
    56  // Requirements of the extractor.
    57  func (e Extractor) Requirements() *plugin.Capabilities { return &plugin.Capabilities{} }
    58  
    59  type extractFunc = func(io.Reader) (*spdx.Document, error)
    60  
    61  // Format support based on https://spdx.dev/resources/use/#documents
    62  var extensionHandlers = map[string]extractFunc{
    63  	".spdx.json":    json.Read,
    64  	".spdx":         tagvalue.Read,
    65  	".spdx.yml":     yaml.Read,
    66  	".spdx.rdf":     rdf.Read,
    67  	".spdx.rdf.xml": rdf.Read,
    68  	// No support for .xsl files because those are too ambiguous and could be many other things.
    69  }
    70  
    71  // FileRequired returns true if the specified file is a supported spdx file.
    72  func (e Extractor) FileRequired(api filesystem.FileAPI) bool {
    73  	_, isSupported := findExtractor(api.Path())
    74  	return isSupported
    75  }
    76  
    77  // Extract parses the SPDX SBOM and returns a list purls from the SBOM.
    78  func (e Extractor) Extract(ctx context.Context, input *filesystem.ScanInput) (inventory.Inventory, error) {
    79  	var parseSbom, isSupported = findExtractor(input.Path)
    80  
    81  	if !isSupported {
    82  		return inventory.Inventory{}, errors.New("sbom/spdx extractor: Invalid file format, only JSON, YAML, RDF, and TagValue are supported")
    83  	}
    84  
    85  	spdxDoc, err := parseSbom(input.Reader)
    86  
    87  	if err != nil {
    88  		return inventory.Inventory{}, err
    89  	}
    90  
    91  	pkgs := e.convertSpdxDocToPackage(spdxDoc, input.Path)
    92  	return inventory.Inventory{Packages: pkgs}, nil
    93  }
    94  
    95  func findExtractor(path string) (extractFunc, bool) {
    96  	// For Windows
    97  	path = filepath.ToSlash(path)
    98  
    99  	for key := range extensionHandlers {
   100  		if hasFileExtension(path, key) {
   101  			return extensionHandlers[key], true
   102  		}
   103  	}
   104  
   105  	return nil, false
   106  }
   107  
   108  func (e Extractor) convertSpdxDocToPackage(spdxDoc *spdx.Document, path string) []*extractor.Package {
   109  	results := []*extractor.Package{}
   110  
   111  	for _, spdxPkg := range spdxDoc.Packages {
   112  		pkg := &extractor.Package{
   113  			Locations: []string{path},
   114  			Metadata:  &spdxmeta.Metadata{},
   115  		}
   116  		m := pkg.Metadata.(*spdxmeta.Metadata)
   117  		for _, extRef := range spdxPkg.PackageExternalReferences {
   118  			// TODO(b/280991231): Support all RefTypes
   119  			if extRef.RefType == "cpe23Type" || extRef.RefType == "http://spdx.org/rdf/references/cpe23Type" {
   120  				m.CPEs = append(m.CPEs, extRef.Locator)
   121  				if len(pkg.Name) == 0 {
   122  					pkg.Name = extRef.Locator
   123  				}
   124  			} else if extRef.RefType == "purl" || extRef.RefType == "http://spdx.org/rdf/references/purl" {
   125  				if m.PURL != nil {
   126  					log.Warnf("Multiple PURLs found for same package: %q and %q", m.PURL, extRef.Locator)
   127  				}
   128  				packageURL, err := purl.FromString(extRef.Locator)
   129  				pkg.Name = packageURL.Name
   130  				if err != nil {
   131  					log.Warnf("Invalid PURL %q for package: %q", extRef.Locator, spdxPkg.PackageName)
   132  				} else {
   133  					m.PURL = &packageURL
   134  					pkg.PURLType = packageURL.Type
   135  				}
   136  			}
   137  		}
   138  		pkg.Metadata = m
   139  		if m.PURL == nil && len(m.CPEs) == 0 {
   140  			log.Warnf("Neither CPE nor PURL found for package: %+v", spdxPkg)
   141  			continue
   142  		}
   143  		results = append(results, pkg)
   144  	}
   145  
   146  	return results
   147  }
   148  
   149  func hasFileExtension(path string, extension string) bool {
   150  	return strings.HasSuffix(strings.ToLower(path), extension)
   151  }