github.com/devseccon/trivy@v0.47.1-0.20231123133102-bd902a0bd996/pkg/sbom/spdx/unmarshal.go (about) 1 package spdx 2 3 import ( 4 "bytes" 5 "errors" 6 "fmt" 7 "io" 8 "sort" 9 "strings" 10 11 version "github.com/knqyf263/go-rpm-version" 12 "github.com/package-url/packageurl-go" 13 "github.com/samber/lo" 14 "github.com/spdx/tools-golang/json" 15 "github.com/spdx/tools-golang/spdx" 16 "github.com/spdx/tools-golang/spdx/v2/common" 17 "github.com/spdx/tools-golang/tagvalue" 18 "golang.org/x/xerrors" 19 20 ftypes "github.com/devseccon/trivy/pkg/fanal/types" 21 "github.com/devseccon/trivy/pkg/purl" 22 "github.com/devseccon/trivy/pkg/types" 23 ) 24 25 var ( 26 errUnknownPackageFormat = xerrors.New("unknown package format") 27 ) 28 29 type SPDX struct { 30 *types.SBOM 31 } 32 33 func NewTVDecoder(r io.Reader) *TVDecoder { 34 return &TVDecoder{r: r} 35 } 36 37 type TVDecoder struct { 38 r io.Reader 39 } 40 41 func (tv *TVDecoder) Decode(v interface{}) error { 42 spdxDocument, err := tagvalue.Read(tv.r) 43 if err != nil { 44 return xerrors.Errorf("failed to load tag-value spdx: %w", err) 45 } 46 47 a, ok := v.(*SPDX) 48 if !ok { 49 return xerrors.Errorf("invalid struct type tag-value decoder needed SPDX struct") 50 } 51 err = a.unmarshal(spdxDocument) 52 if err != nil { 53 return xerrors.Errorf("failed to unmarshal spdx: %w", err) 54 } 55 56 return nil 57 } 58 59 func (s *SPDX) UnmarshalJSON(b []byte) error { 60 spdxDocument, err := json.Read(bytes.NewReader(b)) 61 if err != nil { 62 return xerrors.Errorf("failed to load spdx json: %w", err) 63 } 64 err = s.unmarshal(spdxDocument) 65 if err != nil { 66 return xerrors.Errorf("failed to unmarshal spdx: %w", err) 67 } 68 return nil 69 } 70 71 func (s *SPDX) unmarshal(spdxDocument *spdx.Document) error { 72 var osPkgs []ftypes.Package 73 apps := make(map[common.ElementID]*ftypes.Application) 74 packageSPDXIdentifierMap := createPackageSPDXIdentifierMap(spdxDocument.Packages) 75 packageFilePaths := getPackageFilePaths(spdxDocument) 76 77 // Hold packages that are not processed by relationships 78 orphanPkgs := createPackageSPDXIdentifierMap(spdxDocument.Packages) 79 80 relationships := lo.Filter(spdxDocument.Relationships, func(rel *spdx.Relationship, _ int) bool { 81 // Skip the DESCRIBES relationship. 82 return rel.Relationship != common.TypeRelationshipDescribe && rel.Relationship != "DESCRIBE" 83 }) 84 85 // Package relationships would be as belows: 86 // - Root (container image, filesystem, etc.) 87 // - Operating System (debian 10) 88 // - OS package A 89 // - OS package B 90 // - Application 1 (package-lock.json) 91 // - Node.js package A 92 // - Node.js package B 93 // - Application 2 (Pipfile.lock) 94 // - Python package A 95 // - Python package B 96 for _, rel := range relationships { 97 pkgA := packageSPDXIdentifierMap[rel.RefA.ElementRefID] 98 pkgB := packageSPDXIdentifierMap[rel.RefB.ElementRefID] 99 100 if pkgA == nil || pkgB == nil { 101 // Skip the missing pkg relationship. 102 continue 103 } 104 105 switch { 106 // Relationship: root package => OS 107 case isOperatingSystem(pkgB.PackageSPDXIdentifier): 108 s.SBOM.OS = parseOS(*pkgB) 109 delete(orphanPkgs, pkgB.PackageSPDXIdentifier) 110 // Relationship: OS => OS package 111 case isOperatingSystem(pkgA.PackageSPDXIdentifier): 112 pkg, _, err := parsePkg(*pkgB, packageFilePaths) 113 if errors.Is(err, errUnknownPackageFormat) { 114 continue 115 } else if err != nil { 116 return xerrors.Errorf("failed to parse os package: %w", err) 117 } 118 osPkgs = append(osPkgs, *pkg) 119 delete(orphanPkgs, pkgB.PackageSPDXIdentifier) 120 // Relationship: root package => application 121 case isApplication(pkgB.PackageSPDXIdentifier): 122 // pass 123 // Relationship: application => language-specific package 124 case isApplication(pkgA.PackageSPDXIdentifier): 125 app, ok := apps[pkgA.PackageSPDXIdentifier] 126 if !ok { 127 app = initApplication(*pkgA) 128 apps[pkgA.PackageSPDXIdentifier] = app 129 } 130 131 lib, _, err := parsePkg(*pkgB, packageFilePaths) 132 if errors.Is(err, errUnknownPackageFormat) { 133 continue 134 } else if err != nil { 135 return xerrors.Errorf("failed to parse language-specific package: %w", err) 136 } 137 app.Libraries = append(app.Libraries, *lib) 138 139 // They are no longer orphan packages 140 delete(orphanPkgs, pkgA.PackageSPDXIdentifier) 141 delete(orphanPkgs, pkgB.PackageSPDXIdentifier) 142 } 143 } 144 145 // Fill OS packages 146 if len(osPkgs) > 0 { 147 s.Packages = []ftypes.PackageInfo{{Packages: osPkgs}} 148 } 149 150 // Fill applications 151 for _, app := range apps { 152 s.SBOM.Applications = append(s.SBOM.Applications, *app) 153 } 154 155 // Fallback for when there are no effective relationships. 156 if err := s.parsePackages(orphanPkgs); err != nil { 157 return err 158 } 159 160 // Keep the original document 161 s.SPDX = spdxDocument 162 return nil 163 } 164 165 // parsePackages processes the packages and categorizes them into OS packages and application packages. 166 // Note that all language-specific packages are treated as a single application. 167 func (s *SPDX) parsePackages(pkgs map[common.ElementID]*spdx.Package) error { 168 var ( 169 osPkgs []ftypes.Package 170 apps = make(map[ftypes.LangType]ftypes.Application) 171 ) 172 173 for _, p := range pkgs { 174 pkg, pkgURL, err := parsePkg(*p, nil) 175 if errors.Is(err, errUnknownPackageFormat) { 176 continue 177 } else if err != nil { 178 return xerrors.Errorf("failed to parse package: %w", err) 179 } 180 switch pkgURL.Class() { 181 case types.ClassOSPkg: 182 osPkgs = append(osPkgs, *pkg) 183 case types.ClassLangPkg: 184 // Language-specific packages 185 pkgType := pkgURL.LangType() 186 app, ok := apps[pkgType] 187 if !ok { 188 app.Type = pkgType 189 } 190 app.Libraries = append(app.Libraries, *pkg) 191 apps[pkgType] = app 192 } 193 } 194 if len(osPkgs) > 0 { 195 s.Packages = []ftypes.PackageInfo{{Packages: osPkgs}} 196 } 197 for _, app := range apps { 198 sort.Sort(app.Libraries) 199 s.SBOM.Applications = append(s.SBOM.Applications, app) 200 } 201 return nil 202 } 203 204 func createPackageSPDXIdentifierMap(packages []*spdx.Package) map[common.ElementID]*spdx.Package { 205 return lo.SliceToMap(packages, func(pkg *spdx.Package) (common.ElementID, *spdx.Package) { 206 return pkg.PackageSPDXIdentifier, pkg 207 }) 208 } 209 210 func createFileSPDXIdentifierMap(files []*spdx.File) map[string]*spdx.File { 211 ret := make(map[string]*spdx.File) 212 for _, file := range files { 213 ret[string(file.FileSPDXIdentifier)] = file 214 } 215 return ret 216 } 217 218 func isOperatingSystem(elementID spdx.ElementID) bool { 219 return strings.HasPrefix(string(elementID), ElementOperatingSystem) 220 } 221 222 func isApplication(elementID spdx.ElementID) bool { 223 return strings.HasPrefix(string(elementID), ElementApplication) 224 } 225 226 func isFile(elementID spdx.ElementID) bool { 227 return strings.HasPrefix(string(elementID), ElementFile) 228 } 229 230 func initApplication(pkg spdx.Package) *ftypes.Application { 231 app := &ftypes.Application{Type: ftypes.LangType(pkg.PackageName)} 232 switch app.Type { 233 case ftypes.NodePkg, ftypes.PythonPkg, ftypes.GemSpec, ftypes.Jar, ftypes.CondaPkg: 234 app.FilePath = "" 235 default: 236 app.FilePath = pkg.PackageSourceInfo 237 } 238 239 return app 240 } 241 242 func parseOS(pkg spdx.Package) ftypes.OS { 243 return ftypes.OS{ 244 Family: ftypes.OSType(pkg.PackageName), 245 Name: pkg.PackageVersion, 246 } 247 } 248 249 func parsePkg(spdxPkg spdx.Package, packageFilePaths map[string]string) (*ftypes.Package, *purl.PackageURL, error) { 250 pkg, pkgURL, err := parseExternalReferences(spdxPkg.PackageExternalReferences) 251 if err != nil { 252 return nil, nil, xerrors.Errorf("external references error: %w", err) 253 } 254 255 if spdxPkg.PackageLicenseDeclared != "NONE" { 256 pkg.Licenses = strings.Split(spdxPkg.PackageLicenseDeclared, ",") 257 } 258 259 if strings.HasPrefix(spdxPkg.PackageSourceInfo, SourcePackagePrefix) { 260 srcPkgName := strings.TrimPrefix(spdxPkg.PackageSourceInfo, fmt.Sprintf("%s: ", SourcePackagePrefix)) 261 pkg.SrcEpoch, pkg.SrcName, pkg.SrcVersion, pkg.SrcRelease, err = parseSourceInfo(pkgURL.Type, srcPkgName) 262 if err != nil { 263 return nil, nil, xerrors.Errorf("failed to parse source info: %w", err) 264 } 265 } 266 267 if path, ok := packageFilePaths[string(spdxPkg.PackageSPDXIdentifier)]; ok { 268 pkg.FilePath = path 269 } else if len(spdxPkg.Files) > 0 { 270 // Take the first file name 271 pkg.FilePath = spdxPkg.Files[0].FileName 272 } 273 274 pkg.ID = lookupAttributionTexts(spdxPkg.PackageAttributionTexts, PropertyPkgID) 275 pkg.Layer.Digest = lookupAttributionTexts(spdxPkg.PackageAttributionTexts, PropertyLayerDigest) 276 pkg.Layer.DiffID = lookupAttributionTexts(spdxPkg.PackageAttributionTexts, PropertyLayerDiffID) 277 278 return pkg, pkgURL, nil 279 } 280 281 func parseExternalReferences(refs []*spdx.PackageExternalReference) (*ftypes.Package, *purl.PackageURL, error) { 282 for _, ref := range refs { 283 // Extract the package information from PURL 284 if ref.RefType != RefTypePurl || ref.Category != CategoryPackageManager { 285 continue 286 } 287 288 packageURL, err := purl.FromString(ref.Locator) 289 if err != nil { 290 return nil, nil, xerrors.Errorf("failed to parse purl from string: %w", err) 291 } 292 pkg := packageURL.Package() 293 pkg.Ref = ref.Locator 294 return pkg, packageURL, nil 295 } 296 return nil, nil, errUnknownPackageFormat 297 } 298 299 func lookupAttributionTexts(attributionTexts []string, key string) string { 300 for _, text := range attributionTexts { 301 if strings.HasPrefix(text, key) { 302 return strings.TrimPrefix(text, fmt.Sprintf("%s: ", key)) 303 } 304 } 305 return "" 306 } 307 308 func parseSourceInfo(pkgType, sourceInfo string) (epoch int, name, ver, rel string, err error) { 309 srcNameVersion := strings.TrimPrefix(sourceInfo, fmt.Sprintf("%s: ", SourcePackagePrefix)) 310 ss := strings.Split(srcNameVersion, " ") 311 if len(ss) != 2 { 312 return 0, "", "", "", xerrors.Errorf("invalid source info (%s)", sourceInfo) 313 } 314 name = ss[0] 315 if pkgType == packageurl.TypeRPM { 316 v := version.NewVersion(ss[1]) 317 epoch = v.Epoch() 318 ver = v.Version() 319 rel = v.Release() 320 } else { 321 ver = ss[1] 322 } 323 return epoch, name, ver, rel, nil 324 } 325 326 // getPackageFilePaths parses Relationships and finds filepaths for packages 327 func getPackageFilePaths(spdxDocument *spdx.Document) map[string]string { 328 packageFilePaths := make(map[string]string) 329 fileSPDXIdentifierMap := createFileSPDXIdentifierMap(spdxDocument.Files) 330 for _, rel := range spdxDocument.Relationships { 331 if rel.Relationship != common.TypeRelationshipContains && rel.Relationship != "CONTAIN" { 332 // Skip the DESCRIBES relationship. 333 continue 334 } 335 336 // hasFiles field is deprecated 337 // https://github.com/spdx/tools-golang/issues/171 338 // hasFiles values converted in Relationships 339 // https://github.com/spdx/tools-golang/pull/201 340 if isFile(rel.RefB.ElementRefID) { 341 file, ok := fileSPDXIdentifierMap[string(rel.RefB.ElementRefID)] 342 if ok { 343 // Save filePaths for packages 344 // Insert filepath will be later 345 packageFilePaths[string(rel.RefA.ElementRefID)] = file.FileName 346 } 347 continue 348 } 349 } 350 return packageFilePaths 351 }