github.com/devseccon/trivy@v0.47.1-0.20231123133102-bd902a0bd996/pkg/sbom/cyclonedx/unmarshal.go (about)

     1  package cyclonedx
     2  
     3  import (
     4  	"bytes"
     5  	"errors"
     6  	"fmt"
     7  	"io"
     8  	"sort"
     9  	"strconv"
    10  
    11  	cdx "github.com/CycloneDX/cyclonedx-go"
    12  	"github.com/package-url/packageurl-go"
    13  	"github.com/samber/lo"
    14  	"golang.org/x/exp/maps"
    15  	"golang.org/x/xerrors"
    16  
    17  	ftypes "github.com/devseccon/trivy/pkg/fanal/types"
    18  	"github.com/devseccon/trivy/pkg/log"
    19  	"github.com/devseccon/trivy/pkg/purl"
    20  	"github.com/devseccon/trivy/pkg/sbom/cyclonedx/core"
    21  	"github.com/devseccon/trivy/pkg/types"
    22  )
    23  
    24  var (
    25  	ErrPURLEmpty = errors.New("purl empty error")
    26  )
    27  
    28  type BOM struct {
    29  	*types.SBOM
    30  
    31  	dependencies map[string][]string
    32  	components   map[string]cdx.Component
    33  }
    34  
    35  func DecodeJSON(r io.Reader) (*cdx.BOM, error) {
    36  	bom := cdx.NewBOM()
    37  	decoder := cdx.NewBOMDecoder(r, cdx.BOMFileFormatJSON)
    38  	if err := decoder.Decode(bom); err != nil {
    39  		return nil, xerrors.Errorf("CycloneDX decode error: %w", err)
    40  	}
    41  	return bom, nil
    42  }
    43  
    44  func (c *BOM) UnmarshalJSON(b []byte) error {
    45  	log.Logger.Debug("Unmarshaling CycloneDX JSON...")
    46  	if c.SBOM == nil {
    47  		c.SBOM = &types.SBOM{}
    48  	}
    49  	bom, err := DecodeJSON(bytes.NewReader(b))
    50  	if err != nil {
    51  		return xerrors.Errorf("CycloneDX decode error: %w", err)
    52  	}
    53  
    54  	if !core.IsTrivySBOM(bom) {
    55  		log.Logger.Warnf("Third-party SBOM may lead to inaccurate vulnerability detection")
    56  		log.Logger.Warnf("Recommend using Trivy to generate SBOMs")
    57  	}
    58  
    59  	if err = c.parseSBOM(bom); err != nil {
    60  		return xerrors.Errorf("failed to parse sbom: %w", err)
    61  	}
    62  
    63  	sort.Slice(c.Applications, func(i, j int) bool {
    64  		if c.Applications[i].Type != c.Applications[j].Type {
    65  			return c.Applications[i].Type < c.Applications[j].Type
    66  		}
    67  		return c.Applications[i].FilePath < c.Applications[j].FilePath
    68  	})
    69  
    70  	var metadata ftypes.Metadata
    71  	if bom.Metadata != nil {
    72  		metadata.Timestamp = bom.Metadata.Timestamp
    73  		if bom.Metadata.Component != nil {
    74  			metadata.Component = toTrivyCdxComponent(lo.FromPtr(bom.Metadata.Component))
    75  		}
    76  	}
    77  
    78  	var components []ftypes.Component
    79  	for _, component := range lo.FromPtr(bom.Components) {
    80  		components = append(components, toTrivyCdxComponent(component))
    81  	}
    82  
    83  	// Keep the original SBOM
    84  	c.CycloneDX = &ftypes.CycloneDX{
    85  		BOMFormat:    bom.BOMFormat,
    86  		SpecVersion:  ftypes.SpecVersion(bom.SpecVersion),
    87  		SerialNumber: bom.SerialNumber,
    88  		Version:      bom.Version,
    89  		Metadata:     metadata,
    90  		Components:   components,
    91  	}
    92  	return nil
    93  }
    94  
    95  func (c *BOM) parseSBOM(bom *cdx.BOM) error {
    96  	c.dependencies = dependencyMap(bom.Dependencies)
    97  	c.components = componentMap(bom.Metadata, bom.Components)
    98  	var seen = make(map[string]struct{})
    99  	for bomRef := range c.dependencies {
   100  		component := c.components[bomRef]
   101  		switch component.Type {
   102  		case cdx.ComponentTypeOS: // OS info and OS packages
   103  			seen[component.BOMRef] = struct{}{}
   104  			c.OS = toOS(component)
   105  			pkgInfo, err := c.parseOSPkgs(component, seen)
   106  			if err != nil {
   107  				return xerrors.Errorf("failed to parse os packages: %w", err)
   108  			}
   109  			c.Packages = append(c.Packages, pkgInfo)
   110  		case cdx.ComponentTypeApplication: // It would be a lock file in a CycloneDX report generated by Trivy
   111  			if core.LookupProperty(component.Properties, PropertyType) == "" {
   112  				continue
   113  			}
   114  			app, err := c.parseLangPkgs(component, seen)
   115  			if err != nil {
   116  				return xerrors.Errorf("failed to parse language packages: %w", err)
   117  			}
   118  			c.Applications = append(c.Applications, *app)
   119  		case cdx.ComponentTypeLibrary:
   120  			// It is an individual package not associated with any lock files and should be processed later.
   121  			// e.g. .gemspec, .egg and .wheel
   122  			continue
   123  		}
   124  	}
   125  
   126  	var libComponents []cdx.Component
   127  	for ref, component := range c.components {
   128  		if _, ok := seen[ref]; ok {
   129  			continue
   130  		}
   131  		if component.Type == cdx.ComponentTypeLibrary || component.PackageURL != "" {
   132  			libComponents = append(libComponents, component)
   133  		}
   134  
   135  		// For third-party SBOMs.
   136  		// If there are no operating-system dependent libraries, make them implicitly dependent.
   137  		if component.Type == cdx.ComponentTypeOS {
   138  			if lo.IsNotEmpty(c.OS) {
   139  				return xerrors.New("multiple OSes are not supported")
   140  			}
   141  			c.OS = toOS(component)
   142  		}
   143  	}
   144  
   145  	pkgInfos, aggregatedApps, err := aggregatePkgs(libComponents)
   146  	if err != nil {
   147  		return xerrors.Errorf("failed to aggregate packages: %w", err)
   148  	}
   149  
   150  	// For third party SBOMs.
   151  	// If a package that depends on the operating-system did not exist,
   152  	// but an os package is found during aggregate, it is used.
   153  	if len(c.Packages) == 0 && len(pkgInfos) != 0 {
   154  		if !c.OS.Detected() {
   155  			log.Logger.Warnf("Ignore the OS package as no OS information is found.")
   156  		} else {
   157  			c.Packages = pkgInfos
   158  		}
   159  	}
   160  	c.Applications = append(c.Applications, aggregatedApps...)
   161  
   162  	return nil
   163  }
   164  
   165  func (c *BOM) parseOSPkgs(component cdx.Component, seen map[string]struct{}) (ftypes.PackageInfo, error) {
   166  	components := c.walkDependencies(component.BOMRef, make(map[string]struct{}))
   167  	pkgs, err := parsePkgs(components, seen)
   168  	if err != nil {
   169  		return ftypes.PackageInfo{}, xerrors.Errorf("failed to parse os package: %w", err)
   170  	}
   171  
   172  	return ftypes.PackageInfo{
   173  		Packages: pkgs,
   174  	}, nil
   175  }
   176  
   177  func (c *BOM) parseLangPkgs(component cdx.Component, seen map[string]struct{}) (*ftypes.Application, error) {
   178  	components := c.walkDependencies(component.BOMRef, make(map[string]struct{}))
   179  	components = lo.UniqBy(components, func(c cdx.Component) string {
   180  		return c.BOMRef
   181  	})
   182  
   183  	app := toApplication(component)
   184  	pkgs, err := parsePkgs(components, seen)
   185  	if err != nil {
   186  		return nil, xerrors.Errorf("failed to parse language-specific packages: %w", err)
   187  	}
   188  	app.Libraries = pkgs
   189  
   190  	return app, nil
   191  }
   192  
   193  func parsePkgs(components []cdx.Component, seen map[string]struct{}) ([]ftypes.Package, error) {
   194  	var pkgs []ftypes.Package
   195  	for _, com := range components {
   196  		seen[com.BOMRef] = struct{}{}
   197  		pkgURL, pkg, err := toPackage(com)
   198  		if errors.Is(err, ErrPURLEmpty) {
   199  			continue
   200  		} else if err != nil {
   201  			return nil, xerrors.Errorf("failed to parse language package: %w", err)
   202  		}
   203  
   204  		// Skip unsupported package types
   205  		if pkgURL.Class() == types.ClassUnknown {
   206  			continue
   207  		}
   208  		pkgs = append(pkgs, *pkg)
   209  	}
   210  	return pkgs, nil
   211  }
   212  
   213  // walkDependencies takes all nested dependencies of the root component.
   214  func (c *BOM) walkDependencies(rootRef string, uniqComponents map[string]struct{}) []cdx.Component {
   215  	// e.g. Library A, B, C, D and E will be returned as dependencies of Application 1.
   216  	// type: Application 1
   217  	//   - type: Library A
   218  	//     - type: Library B
   219  	//   - type: Application 2
   220  	//     - type: Library C
   221  	//     - type: Application 3
   222  	//       - type: Library D
   223  	//       - type: Library E
   224  	var components []cdx.Component
   225  	for _, dep := range c.dependencies[rootRef] {
   226  		component, ok := c.components[dep]
   227  		if !ok {
   228  			continue
   229  		}
   230  
   231  		// there are cases of looped components:
   232  		// type: Application 1
   233  		//  - type: Library A
   234  		//    - type: Library B
   235  		// 	    - type: Library A
   236  		// ...
   237  		// use uniqComponents to fix infinite loop
   238  		if _, ok = uniqComponents[dep]; ok {
   239  			continue
   240  		}
   241  		uniqComponents[dep] = struct{}{}
   242  
   243  		// Take only 'Libraries'
   244  		if component.Type == cdx.ComponentTypeLibrary {
   245  			components = append(components, component)
   246  		}
   247  
   248  		components = append(components, c.walkDependencies(dep, uniqComponents)...)
   249  	}
   250  	return components
   251  }
   252  
   253  func componentMap(metadata *cdx.Metadata, components *[]cdx.Component) map[string]cdx.Component {
   254  	cmap := make(map[string]cdx.Component)
   255  
   256  	for _, component := range lo.FromPtr(components) {
   257  		cmap[component.BOMRef] = component
   258  	}
   259  	if metadata != nil && metadata.Component != nil {
   260  		cmap[metadata.Component.BOMRef] = *metadata.Component
   261  	}
   262  	return cmap
   263  }
   264  
   265  func dependencyMap(deps *[]cdx.Dependency) map[string][]string {
   266  	depMap := make(map[string][]string)
   267  
   268  	for _, dep := range lo.FromPtr(deps) {
   269  		if _, ok := depMap[dep.Ref]; ok {
   270  			continue
   271  		}
   272  		var refs []string
   273  		if dep.Dependencies != nil {
   274  			refs = append(refs, *dep.Dependencies...)
   275  		}
   276  
   277  		depMap[dep.Ref] = refs
   278  	}
   279  	return depMap
   280  }
   281  
   282  func aggregatePkgs(libs []cdx.Component) ([]ftypes.PackageInfo, []ftypes.Application, error) {
   283  	osPkgMap := make(map[string]ftypes.Packages)
   284  	langPkgMap := make(map[ftypes.LangType]ftypes.Packages)
   285  	for _, lib := range libs {
   286  		pkgURL, pkg, err := toPackage(lib)
   287  		if errors.Is(err, ErrPURLEmpty) {
   288  			continue
   289  		} else if err != nil {
   290  			return nil, nil, xerrors.Errorf("failed to parse the component: %w", err)
   291  		}
   292  
   293  		switch pkgURL.Class() {
   294  		case types.ClassOSPkg:
   295  			osPkgMap[pkgURL.Type] = append(osPkgMap[pkgURL.Type], *pkg)
   296  		case types.ClassLangPkg:
   297  			langType := pkgURL.LangType()
   298  			langPkgMap[langType] = append(langPkgMap[langType], *pkg)
   299  		}
   300  	}
   301  
   302  	if len(osPkgMap) > 1 {
   303  		return nil, nil, xerrors.Errorf("multiple types of OS packages in SBOM are not supported (%q)",
   304  			maps.Keys(osPkgMap))
   305  	}
   306  
   307  	var osPkgs ftypes.PackageInfo
   308  	for _, pkgs := range osPkgMap {
   309  		// Just take the first element
   310  		sort.Sort(pkgs)
   311  		osPkgs = ftypes.PackageInfo{Packages: pkgs}
   312  		break
   313  	}
   314  
   315  	var apps []ftypes.Application
   316  	for pkgType, pkgs := range langPkgMap {
   317  		sort.Sort(pkgs)
   318  		apps = append(apps, ftypes.Application{
   319  			Type:      pkgType,
   320  			Libraries: pkgs,
   321  		})
   322  	}
   323  	return []ftypes.PackageInfo{osPkgs}, apps, nil
   324  }
   325  
   326  func toOS(component cdx.Component) ftypes.OS {
   327  	return ftypes.OS{
   328  		Family: ftypes.OSType(component.Name),
   329  		Name:   component.Version,
   330  	}
   331  }
   332  
   333  func toApplication(component cdx.Component) *ftypes.Application {
   334  	return &ftypes.Application{
   335  		Type:     ftypes.LangType(core.LookupProperty(component.Properties, PropertyType)),
   336  		FilePath: component.Name,
   337  	}
   338  }
   339  
   340  func toPackage(component cdx.Component) (*purl.PackageURL, *ftypes.Package, error) {
   341  	if component.PackageURL == "" {
   342  		log.Logger.Warnf("Skip the component (BOM-Ref: %s) as the PURL is empty", component.BOMRef)
   343  		return nil, nil, ErrPURLEmpty
   344  	}
   345  	p, err := purl.FromString(component.PackageURL)
   346  	if err != nil {
   347  		return nil, nil, xerrors.Errorf("failed to parse purl: %w", err)
   348  	}
   349  
   350  	pkg := p.Package()
   351  	// Trivy's marshall loses case-sensitivity in PURL used in SBOM for packages (Go, Npm, PyPI),
   352  	// so we have to use an original package name
   353  	pkg.Name = getPackageName(p.Type, pkg.Name, component)
   354  	pkg.Ref = component.BOMRef
   355  
   356  	for _, license := range lo.FromPtr(component.Licenses) {
   357  		pkg.Licenses = append(pkg.Licenses, license.Expression)
   358  	}
   359  
   360  	for key, value := range core.UnmarshalProperties(component.Properties) {
   361  		switch key {
   362  		case PropertyPkgID:
   363  			pkg.ID = value
   364  		case PropertySrcName:
   365  			pkg.SrcName = value
   366  		case PropertySrcVersion:
   367  			pkg.SrcVersion = value
   368  		case PropertySrcRelease:
   369  			pkg.SrcRelease = value
   370  		case PropertySrcEpoch:
   371  			pkg.SrcEpoch, err = strconv.Atoi(value)
   372  			if err != nil {
   373  				return nil, nil, xerrors.Errorf("failed to parse source epoch: %w", err)
   374  			}
   375  		case PropertyModularitylabel:
   376  			pkg.Modularitylabel = value
   377  		case PropertyLayerDiffID:
   378  			pkg.Layer.DiffID = value
   379  		case PropertyLayerDigest:
   380  			pkg.Layer.Digest = value
   381  		case PropertyFilePath:
   382  			pkg.FilePath = value
   383  		}
   384  	}
   385  
   386  	if p.Class() == types.ClassOSPkg {
   387  		// Fill source package information for components in third-party SBOMs .
   388  		if pkg.SrcName == "" {
   389  			pkg.SrcName = pkg.Name
   390  		}
   391  		if pkg.SrcVersion == "" {
   392  			pkg.SrcVersion = pkg.Version
   393  		}
   394  		if pkg.SrcRelease == "" {
   395  			pkg.SrcRelease = pkg.Release
   396  		}
   397  		if pkg.SrcEpoch == 0 {
   398  			pkg.SrcEpoch = pkg.Epoch
   399  		}
   400  	}
   401  
   402  	return p, pkg, nil
   403  }
   404  
   405  func toTrivyCdxComponent(component cdx.Component) ftypes.Component {
   406  	return ftypes.Component{
   407  		BOMRef:     component.BOMRef,
   408  		MIMEType:   component.MIMEType,
   409  		Type:       ftypes.ComponentType(component.Type),
   410  		Name:       component.Name,
   411  		Version:    component.Version,
   412  		PackageURL: component.PackageURL,
   413  	}
   414  }
   415  
   416  func getPackageName(typ, pkgNameFromPurl string, component cdx.Component) string {
   417  	if typ == packageurl.TypeMaven {
   418  		// Jar uses `Group` field for `GroupID`
   419  		if component.Group != "" {
   420  			return fmt.Sprintf("%s:%s", component.Group, component.Name)
   421  		} else {
   422  			// use name derived from purl if `Group` doesn't exist
   423  			return pkgNameFromPurl
   424  		}
   425  	}
   426  	return component.Name
   427  }