github.com/devseccon/trivy@v0.47.1-0.20231123133102-bd902a0bd996/pkg/sbom/spdx/unmarshal.go (about)

     1  package spdx
     2  
     3  import (
     4  	"bytes"
     5  	"errors"
     6  	"fmt"
     7  	"io"
     8  	"sort"
     9  	"strings"
    10  
    11  	version "github.com/knqyf263/go-rpm-version"
    12  	"github.com/package-url/packageurl-go"
    13  	"github.com/samber/lo"
    14  	"github.com/spdx/tools-golang/json"
    15  	"github.com/spdx/tools-golang/spdx"
    16  	"github.com/spdx/tools-golang/spdx/v2/common"
    17  	"github.com/spdx/tools-golang/tagvalue"
    18  	"golang.org/x/xerrors"
    19  
    20  	ftypes "github.com/devseccon/trivy/pkg/fanal/types"
    21  	"github.com/devseccon/trivy/pkg/purl"
    22  	"github.com/devseccon/trivy/pkg/types"
    23  )
    24  
    25  var (
    26  	errUnknownPackageFormat = xerrors.New("unknown package format")
    27  )
    28  
    29  type SPDX struct {
    30  	*types.SBOM
    31  }
    32  
    33  func NewTVDecoder(r io.Reader) *TVDecoder {
    34  	return &TVDecoder{r: r}
    35  }
    36  
    37  type TVDecoder struct {
    38  	r io.Reader
    39  }
    40  
    41  func (tv *TVDecoder) Decode(v interface{}) error {
    42  	spdxDocument, err := tagvalue.Read(tv.r)
    43  	if err != nil {
    44  		return xerrors.Errorf("failed to load tag-value spdx: %w", err)
    45  	}
    46  
    47  	a, ok := v.(*SPDX)
    48  	if !ok {
    49  		return xerrors.Errorf("invalid struct type tag-value decoder needed SPDX struct")
    50  	}
    51  	err = a.unmarshal(spdxDocument)
    52  	if err != nil {
    53  		return xerrors.Errorf("failed to unmarshal spdx: %w", err)
    54  	}
    55  
    56  	return nil
    57  }
    58  
    59  func (s *SPDX) UnmarshalJSON(b []byte) error {
    60  	spdxDocument, err := json.Read(bytes.NewReader(b))
    61  	if err != nil {
    62  		return xerrors.Errorf("failed to load spdx json: %w", err)
    63  	}
    64  	err = s.unmarshal(spdxDocument)
    65  	if err != nil {
    66  		return xerrors.Errorf("failed to unmarshal spdx: %w", err)
    67  	}
    68  	return nil
    69  }
    70  
    71  func (s *SPDX) unmarshal(spdxDocument *spdx.Document) error {
    72  	var osPkgs []ftypes.Package
    73  	apps := make(map[common.ElementID]*ftypes.Application)
    74  	packageSPDXIdentifierMap := createPackageSPDXIdentifierMap(spdxDocument.Packages)
    75  	packageFilePaths := getPackageFilePaths(spdxDocument)
    76  
    77  	// Hold packages that are not processed by relationships
    78  	orphanPkgs := createPackageSPDXIdentifierMap(spdxDocument.Packages)
    79  
    80  	relationships := lo.Filter(spdxDocument.Relationships, func(rel *spdx.Relationship, _ int) bool {
    81  		// Skip the DESCRIBES relationship.
    82  		return rel.Relationship != common.TypeRelationshipDescribe && rel.Relationship != "DESCRIBE"
    83  	})
    84  
    85  	// Package relationships would be as belows:
    86  	// - Root (container image, filesystem, etc.)
    87  	//   - Operating System (debian 10)
    88  	//     - OS package A
    89  	//     - OS package B
    90  	//   - Application 1 (package-lock.json)
    91  	//     - Node.js package A
    92  	//     - Node.js package B
    93  	//   - Application 2 (Pipfile.lock)
    94  	//     - Python package A
    95  	//     - Python package B
    96  	for _, rel := range relationships {
    97  		pkgA := packageSPDXIdentifierMap[rel.RefA.ElementRefID]
    98  		pkgB := packageSPDXIdentifierMap[rel.RefB.ElementRefID]
    99  
   100  		if pkgA == nil || pkgB == nil {
   101  			// Skip the missing pkg relationship.
   102  			continue
   103  		}
   104  
   105  		switch {
   106  		// Relationship: root package => OS
   107  		case isOperatingSystem(pkgB.PackageSPDXIdentifier):
   108  			s.SBOM.OS = parseOS(*pkgB)
   109  			delete(orphanPkgs, pkgB.PackageSPDXIdentifier)
   110  		// Relationship: OS => OS package
   111  		case isOperatingSystem(pkgA.PackageSPDXIdentifier):
   112  			pkg, _, err := parsePkg(*pkgB, packageFilePaths)
   113  			if errors.Is(err, errUnknownPackageFormat) {
   114  				continue
   115  			} else if err != nil {
   116  				return xerrors.Errorf("failed to parse os package: %w", err)
   117  			}
   118  			osPkgs = append(osPkgs, *pkg)
   119  			delete(orphanPkgs, pkgB.PackageSPDXIdentifier)
   120  		// Relationship: root package => application
   121  		case isApplication(pkgB.PackageSPDXIdentifier):
   122  			// pass
   123  		// Relationship: application => language-specific package
   124  		case isApplication(pkgA.PackageSPDXIdentifier):
   125  			app, ok := apps[pkgA.PackageSPDXIdentifier]
   126  			if !ok {
   127  				app = initApplication(*pkgA)
   128  				apps[pkgA.PackageSPDXIdentifier] = app
   129  			}
   130  
   131  			lib, _, err := parsePkg(*pkgB, packageFilePaths)
   132  			if errors.Is(err, errUnknownPackageFormat) {
   133  				continue
   134  			} else if err != nil {
   135  				return xerrors.Errorf("failed to parse language-specific package: %w", err)
   136  			}
   137  			app.Libraries = append(app.Libraries, *lib)
   138  
   139  			// They are no longer orphan packages
   140  			delete(orphanPkgs, pkgA.PackageSPDXIdentifier)
   141  			delete(orphanPkgs, pkgB.PackageSPDXIdentifier)
   142  		}
   143  	}
   144  
   145  	// Fill OS packages
   146  	if len(osPkgs) > 0 {
   147  		s.Packages = []ftypes.PackageInfo{{Packages: osPkgs}}
   148  	}
   149  
   150  	// Fill applications
   151  	for _, app := range apps {
   152  		s.SBOM.Applications = append(s.SBOM.Applications, *app)
   153  	}
   154  
   155  	// Fallback for when there are no effective relationships.
   156  	if err := s.parsePackages(orphanPkgs); err != nil {
   157  		return err
   158  	}
   159  
   160  	// Keep the original document
   161  	s.SPDX = spdxDocument
   162  	return nil
   163  }
   164  
   165  // parsePackages processes the packages and categorizes them into OS packages and application packages.
   166  // Note that all language-specific packages are treated as a single application.
   167  func (s *SPDX) parsePackages(pkgs map[common.ElementID]*spdx.Package) error {
   168  	var (
   169  		osPkgs []ftypes.Package
   170  		apps   = make(map[ftypes.LangType]ftypes.Application)
   171  	)
   172  
   173  	for _, p := range pkgs {
   174  		pkg, pkgURL, err := parsePkg(*p, nil)
   175  		if errors.Is(err, errUnknownPackageFormat) {
   176  			continue
   177  		} else if err != nil {
   178  			return xerrors.Errorf("failed to parse package: %w", err)
   179  		}
   180  		switch pkgURL.Class() {
   181  		case types.ClassOSPkg:
   182  			osPkgs = append(osPkgs, *pkg)
   183  		case types.ClassLangPkg:
   184  			// Language-specific packages
   185  			pkgType := pkgURL.LangType()
   186  			app, ok := apps[pkgType]
   187  			if !ok {
   188  				app.Type = pkgType
   189  			}
   190  			app.Libraries = append(app.Libraries, *pkg)
   191  			apps[pkgType] = app
   192  		}
   193  	}
   194  	if len(osPkgs) > 0 {
   195  		s.Packages = []ftypes.PackageInfo{{Packages: osPkgs}}
   196  	}
   197  	for _, app := range apps {
   198  		sort.Sort(app.Libraries)
   199  		s.SBOM.Applications = append(s.SBOM.Applications, app)
   200  	}
   201  	return nil
   202  }
   203  
   204  func createPackageSPDXIdentifierMap(packages []*spdx.Package) map[common.ElementID]*spdx.Package {
   205  	return lo.SliceToMap(packages, func(pkg *spdx.Package) (common.ElementID, *spdx.Package) {
   206  		return pkg.PackageSPDXIdentifier, pkg
   207  	})
   208  }
   209  
   210  func createFileSPDXIdentifierMap(files []*spdx.File) map[string]*spdx.File {
   211  	ret := make(map[string]*spdx.File)
   212  	for _, file := range files {
   213  		ret[string(file.FileSPDXIdentifier)] = file
   214  	}
   215  	return ret
   216  }
   217  
   218  func isOperatingSystem(elementID spdx.ElementID) bool {
   219  	return strings.HasPrefix(string(elementID), ElementOperatingSystem)
   220  }
   221  
   222  func isApplication(elementID spdx.ElementID) bool {
   223  	return strings.HasPrefix(string(elementID), ElementApplication)
   224  }
   225  
   226  func isFile(elementID spdx.ElementID) bool {
   227  	return strings.HasPrefix(string(elementID), ElementFile)
   228  }
   229  
   230  func initApplication(pkg spdx.Package) *ftypes.Application {
   231  	app := &ftypes.Application{Type: ftypes.LangType(pkg.PackageName)}
   232  	switch app.Type {
   233  	case ftypes.NodePkg, ftypes.PythonPkg, ftypes.GemSpec, ftypes.Jar, ftypes.CondaPkg:
   234  		app.FilePath = ""
   235  	default:
   236  		app.FilePath = pkg.PackageSourceInfo
   237  	}
   238  
   239  	return app
   240  }
   241  
   242  func parseOS(pkg spdx.Package) ftypes.OS {
   243  	return ftypes.OS{
   244  		Family: ftypes.OSType(pkg.PackageName),
   245  		Name:   pkg.PackageVersion,
   246  	}
   247  }
   248  
   249  func parsePkg(spdxPkg spdx.Package, packageFilePaths map[string]string) (*ftypes.Package, *purl.PackageURL, error) {
   250  	pkg, pkgURL, err := parseExternalReferences(spdxPkg.PackageExternalReferences)
   251  	if err != nil {
   252  		return nil, nil, xerrors.Errorf("external references error: %w", err)
   253  	}
   254  
   255  	if spdxPkg.PackageLicenseDeclared != "NONE" {
   256  		pkg.Licenses = strings.Split(spdxPkg.PackageLicenseDeclared, ",")
   257  	}
   258  
   259  	if strings.HasPrefix(spdxPkg.PackageSourceInfo, SourcePackagePrefix) {
   260  		srcPkgName := strings.TrimPrefix(spdxPkg.PackageSourceInfo, fmt.Sprintf("%s: ", SourcePackagePrefix))
   261  		pkg.SrcEpoch, pkg.SrcName, pkg.SrcVersion, pkg.SrcRelease, err = parseSourceInfo(pkgURL.Type, srcPkgName)
   262  		if err != nil {
   263  			return nil, nil, xerrors.Errorf("failed to parse source info: %w", err)
   264  		}
   265  	}
   266  
   267  	if path, ok := packageFilePaths[string(spdxPkg.PackageSPDXIdentifier)]; ok {
   268  		pkg.FilePath = path
   269  	} else if len(spdxPkg.Files) > 0 {
   270  		// Take the first file name
   271  		pkg.FilePath = spdxPkg.Files[0].FileName
   272  	}
   273  
   274  	pkg.ID = lookupAttributionTexts(spdxPkg.PackageAttributionTexts, PropertyPkgID)
   275  	pkg.Layer.Digest = lookupAttributionTexts(spdxPkg.PackageAttributionTexts, PropertyLayerDigest)
   276  	pkg.Layer.DiffID = lookupAttributionTexts(spdxPkg.PackageAttributionTexts, PropertyLayerDiffID)
   277  
   278  	return pkg, pkgURL, nil
   279  }
   280  
   281  func parseExternalReferences(refs []*spdx.PackageExternalReference) (*ftypes.Package, *purl.PackageURL, error) {
   282  	for _, ref := range refs {
   283  		// Extract the package information from PURL
   284  		if ref.RefType != RefTypePurl || ref.Category != CategoryPackageManager {
   285  			continue
   286  		}
   287  
   288  		packageURL, err := purl.FromString(ref.Locator)
   289  		if err != nil {
   290  			return nil, nil, xerrors.Errorf("failed to parse purl from string: %w", err)
   291  		}
   292  		pkg := packageURL.Package()
   293  		pkg.Ref = ref.Locator
   294  		return pkg, packageURL, nil
   295  	}
   296  	return nil, nil, errUnknownPackageFormat
   297  }
   298  
   299  func lookupAttributionTexts(attributionTexts []string, key string) string {
   300  	for _, text := range attributionTexts {
   301  		if strings.HasPrefix(text, key) {
   302  			return strings.TrimPrefix(text, fmt.Sprintf("%s: ", key))
   303  		}
   304  	}
   305  	return ""
   306  }
   307  
   308  func parseSourceInfo(pkgType, sourceInfo string) (epoch int, name, ver, rel string, err error) {
   309  	srcNameVersion := strings.TrimPrefix(sourceInfo, fmt.Sprintf("%s: ", SourcePackagePrefix))
   310  	ss := strings.Split(srcNameVersion, " ")
   311  	if len(ss) != 2 {
   312  		return 0, "", "", "", xerrors.Errorf("invalid source info (%s)", sourceInfo)
   313  	}
   314  	name = ss[0]
   315  	if pkgType == packageurl.TypeRPM {
   316  		v := version.NewVersion(ss[1])
   317  		epoch = v.Epoch()
   318  		ver = v.Version()
   319  		rel = v.Release()
   320  	} else {
   321  		ver = ss[1]
   322  	}
   323  	return epoch, name, ver, rel, nil
   324  }
   325  
   326  // getPackageFilePaths parses Relationships and finds filepaths for packages
   327  func getPackageFilePaths(spdxDocument *spdx.Document) map[string]string {
   328  	packageFilePaths := make(map[string]string)
   329  	fileSPDXIdentifierMap := createFileSPDXIdentifierMap(spdxDocument.Files)
   330  	for _, rel := range spdxDocument.Relationships {
   331  		if rel.Relationship != common.TypeRelationshipContains && rel.Relationship != "CONTAIN" {
   332  			// Skip the DESCRIBES relationship.
   333  			continue
   334  		}
   335  
   336  		// hasFiles field is deprecated
   337  		// https://github.com/spdx/tools-golang/issues/171
   338  		// hasFiles values converted in Relationships
   339  		// https://github.com/spdx/tools-golang/pull/201
   340  		if isFile(rel.RefB.ElementRefID) {
   341  			file, ok := fileSPDXIdentifierMap[string(rel.RefB.ElementRefID)]
   342  			if ok {
   343  				// Save filePaths for packages
   344  				// Insert filepath will be later
   345  				packageFilePaths[string(rel.RefA.ElementRefID)] = file.FileName
   346  			}
   347  			continue
   348  		}
   349  	}
   350  	return packageFilePaths
   351  }