github.com/anchore/syft@v1.38.2/syft/pkg/cataloger/dotnet/deps_binary_cataloger.go (about)

     1  package dotnet
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"path"
     7  	"regexp"
     8  	"sort"
     9  	"strings"
    10  
    11  	"github.com/scylladb/go-set/strset"
    12  
    13  	"github.com/anchore/syft/internal"
    14  	"github.com/anchore/syft/internal/relationship"
    15  	"github.com/anchore/syft/internal/unknown"
    16  	"github.com/anchore/syft/syft/artifact"
    17  	"github.com/anchore/syft/syft/file"
    18  	"github.com/anchore/syft/syft/pkg"
    19  )
    20  
    21  const (
    22  	depsJSONGlob = "**/*.deps.json"
    23  	dllGlob      = "**/*.dll"
    24  	exeGlob      = "**/*.exe"
    25  )
    26  
    27  // depsBinaryCataloger will search for both deps.json evidence and PE file evidence to create packages. All packages
    28  // from both sources are raised up, but with one merge operation applied; If a deps.json package reference can be
    29  // correlated with a PE file, the PE file is attached to the package as supporting evidence.
    30  type depsBinaryCataloger struct {
    31  	config CatalogerConfig
    32  }
    33  
    34  func (c depsBinaryCataloger) Name() string {
    35  	return "dotnet-deps-binary-cataloger"
    36  }
    37  
    38  func (c depsBinaryCataloger) Catalog(_ context.Context, resolver file.Resolver) ([]pkg.Package, []artifact.Relationship, error) { //nolint:funlen
    39  	depJSONDocs, unknowns, err := findDepsJSON(resolver)
    40  	if err != nil {
    41  		return nil, nil, err
    42  	}
    43  
    44  	peFiles, ldpeUnknownErr, err := findPEFiles(resolver)
    45  	if err != nil {
    46  		return nil, nil, err
    47  	}
    48  	if ldpeUnknownErr != nil {
    49  		unknowns = unknown.Join(unknowns, ldpeUnknownErr)
    50  	}
    51  
    52  	// partition the logical PE files by location and pair them with the logicalDepsJSON
    53  	pairedDepsJSONs, remainingPeFiles, remainingDepsJSONs := partitionPEs(depJSONDocs, peFiles)
    54  
    55  	var pkgs []pkg.Package
    56  	var relationships []artifact.Relationship
    57  
    58  	depDocGroups := [][]logicalDepsJSON{pairedDepsJSONs}
    59  
    60  	if !c.config.DepPackagesMustHaveDLL {
    61  		depDocGroups = append(depDocGroups, remainingDepsJSONs)
    62  	}
    63  
    64  	var roots []*pkg.Package
    65  	for _, docs := range depDocGroups {
    66  		for _, doc := range docs {
    67  			rts, ps, rs := packagesFromLogicalDepsJSON(doc, c.config)
    68  			if rts != nil {
    69  				roots = append(roots, rts)
    70  			}
    71  			pkgs = append(pkgs, ps...)
    72  			relationships = append(relationships, rs...)
    73  		}
    74  	}
    75  
    76  	// track existing runtime packages so we don't create duplicates
    77  	existingRuntimeVersions := strset.New()
    78  	var runtimePkgs []*pkg.Package
    79  	for i := range pkgs {
    80  		p := &pkgs[i]
    81  		if p.Type != pkg.DotnetPkg {
    82  			continue
    83  		}
    84  		if isRuntime(p.Name) {
    85  			existingRuntimeVersions.Add(p.Version)
    86  			runtimePkgs = append(runtimePkgs, p)
    87  		}
    88  	}
    89  
    90  	runtimes := make(map[string][]file.Location)
    91  	for _, pe := range remainingPeFiles {
    92  		runtimeVer, isRuntimePkg := isRuntimePackageLocation(pe.Location)
    93  		if isRuntimePkg {
    94  			runtimes[runtimeVer] = append(runtimes[runtimeVer], pe.Location)
    95  			// we should never catalog runtime DLLs as packages themselves, instead there should be a single logical package
    96  			continue
    97  		}
    98  		pkgs = append(pkgs, newDotnetBinaryPackage(pe.VersionResources, pe.Location))
    99  	}
   100  
   101  	// if we found any runtime DLLs we ignored, then make packages for each version found
   102  	for version, locs := range runtimes {
   103  		if len(locs) == 0 || existingRuntimeVersions.Has(version) {
   104  			continue
   105  		}
   106  		rtp := pkg.Package{
   107  			Name:      "Microsoft.NETCore.App",
   108  			Version:   version,
   109  			Type:      pkg.DotnetPkg,
   110  			CPEs:      runtimeCPEs(version),
   111  			Locations: file.NewLocationSet(locs...),
   112  		}
   113  		pkgs = append(pkgs, rtp)
   114  		runtimePkgs = append(runtimePkgs, &rtp)
   115  	}
   116  
   117  	// create a relationship from every runtime package to every root package...
   118  	for _, root := range roots {
   119  		for _, runtimePkg := range runtimePkgs {
   120  			relationships = append(relationships, artifact.Relationship{
   121  				From: *runtimePkg,
   122  				To:   *root,
   123  				Type: artifact.DependencyOfRelationship,
   124  			})
   125  		}
   126  	}
   127  
   128  	// in the process of creating root-to-runtime relationships, we may have created duplicate relationships. Use the relationship index to deduplicate.
   129  	return pkgs, relationship.NewIndex(relationships...).All(), unknowns
   130  }
   131  
   132  var runtimeDLLPathPattern = regexp.MustCompile(`/Microsoft\.NETCore\.App/(?P<version>\d+\.\d+\.\d+)/[^/]+\.dll`)
   133  
   134  func isRuntimePackageLocation(loc file.Location) (string, bool) {
   135  	// we should look at the realpath to see if it is a "**/Microsoft.NETCore.App/\d+.\d+.\d+/*.dll"
   136  	// and if so treat it as a runtime package
   137  	if match := runtimeDLLPathPattern.FindStringSubmatch(loc.RealPath); match != nil {
   138  		versionIndex := runtimeDLLPathPattern.SubexpIndex("version")
   139  		if versionIndex != -1 {
   140  			version := match[versionIndex]
   141  			return version, true
   142  		}
   143  	}
   144  
   145  	return "", false
   146  }
   147  
   148  // partitionPEs pairs PE files with the deps.json based on directory containment.
   149  func partitionPEs(depJsons []logicalDepsJSON, peFiles []logicalPE) ([]logicalDepsJSON, []logicalPE, []logicalDepsJSON) {
   150  	// sort deps.json paths from longest to shortest. This is so we are processing the most specific match first.
   151  	sort.Slice(depJsons, func(i, j int) bool {
   152  		return depJsons[i].Location.RealPath > depJsons[j].Location.RealPath
   153  	})
   154  
   155  	// we should be processing PE files in a stable order
   156  	sort.Slice(peFiles, func(i, j int) bool {
   157  		return peFiles[i].Location.RealPath > peFiles[j].Location.RealPath
   158  	})
   159  
   160  	peFilesByPath := make(map[file.Coordinates][]logicalPE)
   161  	var remainingPeFiles []logicalPE
   162  	for _, pe := range peFiles {
   163  		var found bool
   164  		for i := range depJsons {
   165  			dep := &depJsons[i]
   166  			if isParentOf(dep.Location.RealPath, pe.Location.RealPath) && attachAssociatedExecutables(dep, pe) {
   167  				peFilesByPath[dep.Location.Coordinates] = append(peFilesByPath[dep.Location.Coordinates], pe)
   168  				found = true
   169  				// note: we cannot break from the dep JSON search since the same binary could be associated with multiple packages
   170  				// across multiple deps.json files.
   171  			}
   172  		}
   173  		if !found {
   174  			remainingPeFiles = append(remainingPeFiles, pe)
   175  		}
   176  	}
   177  
   178  	var pairedDepsJSON []logicalDepsJSON
   179  	var remainingDepsJSON []logicalDepsJSON
   180  
   181  	for _, dep := range depJsons {
   182  		if _, ok := peFilesByPath[dep.Location.Coordinates]; !ok {
   183  			remainingDepsJSON = append(remainingDepsJSON, dep)
   184  		} else {
   185  			pairedDepsJSON = append(pairedDepsJSON, dep)
   186  		}
   187  	}
   188  
   189  	return pairedDepsJSON, remainingPeFiles, remainingDepsJSON
   190  }
   191  
   192  // attachAssociatedExecutables looks for PE files matching runtime or resource entries
   193  // and attaches them to the appropriate package.
   194  func attachAssociatedExecutables(dep *logicalDepsJSON, pe logicalPE) bool {
   195  	appDir := path.Dir(dep.Location.RealPath)
   196  	relativeDllPath := strings.TrimPrefix(strings.TrimPrefix(pe.Location.RealPath, appDir), "/")
   197  
   198  	var found bool
   199  	for key, p := range dep.PackagesByNameVersion {
   200  		if targetPath, ok := p.RuntimePathsByRelativeDLLPath[relativeDllPath]; ok {
   201  			pe.TargetPath = targetPath
   202  			p.Executables = append(p.Executables, pe)
   203  			dep.PackagesByNameVersion[key] = p // update the map with the modified package
   204  			found = true
   205  			continue
   206  		}
   207  
   208  		if targetPath, ok := p.ResourcePathsByRelativeDLLPath[relativeDllPath]; ok {
   209  			pe.TargetPath = targetPath
   210  			p.Executables = append(p.Executables, pe)
   211  			dep.PackagesByNameVersion[key] = p // update the map with the modified package
   212  			found = true
   213  			continue
   214  		}
   215  
   216  		if targetPath, ok := p.CompilePathsByRelativeDLLPath[relativeDllPath]; ok {
   217  			pe.TargetPath = targetPath
   218  			p.Executables = append(p.Executables, pe)
   219  			dep.PackagesByNameVersion[key] = p // update the map with the modified package
   220  			found = true
   221  			continue
   222  		}
   223  
   224  		if p.NativePaths.Has(relativeDllPath) {
   225  			pe.TargetPath = relativeDllPath
   226  			p.Executables = append(p.Executables, pe)
   227  			dep.PackagesByNameVersion[key] = p // update the map with the modified package
   228  			found = true
   229  			continue
   230  		}
   231  	}
   232  	return found
   233  }
   234  
   235  // isParentOf checks if parentFile's directory is a prefix of childFile's directory.
   236  func isParentOf(parentFile, childFile string) bool {
   237  	parentDir := path.Dir(parentFile)
   238  	childDir := path.Dir(childFile)
   239  	return strings.HasPrefix(childDir, parentDir)
   240  }
   241  
   242  // packagesFromDepsJSON creates packages from a list of logicalDepsJSON documents.
   243  func packagesFromDepsJSON(docs []logicalDepsJSON, config CatalogerConfig) ([]pkg.Package, []artifact.Relationship) {
   244  	var pkgs []pkg.Package
   245  	var relationships []artifact.Relationship
   246  	for _, ldj := range docs {
   247  		_, ps, rs := packagesFromLogicalDepsJSON(ldj, config)
   248  		pkgs = append(pkgs, ps...)
   249  		relationships = append(relationships, rs...)
   250  	}
   251  	return pkgs, relationships
   252  }
   253  
   254  // packagesFromLogicalDepsJSON converts a logicalDepsJSON (using the new map type) into catalog packages.
   255  func packagesFromLogicalDepsJSON(doc logicalDepsJSON, config CatalogerConfig) (*pkg.Package, []pkg.Package, []artifact.Relationship) {
   256  	var rootPkg *pkg.Package
   257  	if rootLpkg, hasRoot := doc.RootPackage(); hasRoot {
   258  		rootPkg = newDotnetDepsPackage(rootLpkg, doc.Location)
   259  	}
   260  
   261  	var pkgs []pkg.Package
   262  	pkgMap := make(map[string]pkg.Package)
   263  	if rootPkg != nil {
   264  		pkgs = append(pkgs, *rootPkg)
   265  		pkgMap[createNameAndVersion(rootPkg.Name, rootPkg.Version)] = *rootPkg
   266  	}
   267  
   268  	nameVersions := doc.PackageNameVersions.List()
   269  	sort.Strings(nameVersions)
   270  
   271  	// process each non-root package
   272  	skippedDepPkgs := make(map[string]logicalDepsJSONPackage)
   273  	for _, nameVersion := range nameVersions {
   274  		name, version := extractNameAndVersion(nameVersion)
   275  		if rootPkg != nil && name == rootPkg.Name && version == rootPkg.Version {
   276  			continue
   277  		}
   278  		lp := doc.PackagesByNameVersion[nameVersion]
   279  		if config.DepPackagesMustHaveDLL && !lp.FoundDLLs(config.PropagateDLLClaimsToParents) {
   280  			// could not find a paired DLL and the user required this...
   281  			skippedDepPkgs[nameVersion] = lp
   282  			continue
   283  		}
   284  
   285  		// check to see if we should skip this package because it does not claim a DLL (or has not dependency that claims a DLL)
   286  		if config.DepPackagesMustClaimDLL && !lp.ClaimsDLLs(config.PropagateDLLClaimsToParents) {
   287  			if config.RelaxDLLClaimsWhenBundlingDetected && !doc.BundlingDetected || !config.RelaxDLLClaimsWhenBundlingDetected {
   288  				// could not find a runtime or resource path and the user required this...
   289  				// and there is no evidence of a bundler in the dependencies (e.g. ILRepack)
   290  				skippedDepPkgs[nameVersion] = lp
   291  				continue
   292  			}
   293  		}
   294  
   295  		dotnetPkg := newDotnetDepsPackage(lp, doc.Location)
   296  		if dotnetPkg != nil {
   297  			pkgs = append(pkgs, *dotnetPkg)
   298  			pkgMap[nameVersion] = *dotnetPkg
   299  		}
   300  	}
   301  	rels := relationshipsFromLogicalDepsJSON(doc, pkgMap, skippedDepPkgs)
   302  
   303  	// ensure that any libman packages are associated with the all root packages
   304  	for _, libmanPkg := range doc.LibmanPackages {
   305  		pkgs = append(pkgs, libmanPkg)
   306  		if rootPkg == nil {
   307  			continue
   308  		}
   309  		rels = append(rels, artifact.Relationship{
   310  			From: libmanPkg,
   311  			To:   *rootPkg,
   312  			Type: artifact.DependencyOfRelationship,
   313  		})
   314  	}
   315  
   316  	return rootPkg, pkgs, rels
   317  }
   318  
   319  // relationshipsFromLogicalDepsJSON creates relationships from a logicalDepsJSON document for only the given syft packages.
   320  // It is possible that the document describes more packages than that is provided as syft packages, in which cases
   321  // those relationships will not be created. If there are any skipped packages, we still want to logically represent
   322  // dependency relationships, jumping over the skipped packages.
   323  func relationshipsFromLogicalDepsJSON(doc logicalDepsJSON, pkgMap map[string]pkg.Package, skipped map[string]logicalDepsJSONPackage) []artifact.Relationship {
   324  	var relationships []artifact.Relationship
   325  	for _, lp := range doc.PackagesByNameVersion {
   326  		if lp.Targets == nil {
   327  			continue
   328  		}
   329  		for _, depNameVersion := range lp.dependencyNameVersions() {
   330  			thisPkg, ok := pkgMap[lp.NameVersion]
   331  			if !ok {
   332  				continue
   333  			}
   334  
   335  			var depPkgs []pkg.Package
   336  			depPkg, ok := pkgMap[depNameVersion]
   337  			if !ok {
   338  				skippedDepPkg, ok := skipped[depNameVersion]
   339  				if !ok {
   340  					// this package wasn't explicitly skipped, so it could be a malformed deps.json file
   341  					// ignore this case and do not create a relationships
   342  					continue
   343  				}
   344  				// we have a skipped package, so we need to create a relationship but looking a the nearest
   345  				// package with an associated PE file for even dependency listed on the skipped package.
   346  				// Take note that the skipped dependency's dependency could also be skipped, so we need to
   347  				// do this recursively.
   348  				depPkgs = findNearestDependencyPackages(skippedDepPkg, pkgMap, skipped, strset.New())
   349  			} else {
   350  				depPkgs = append(depPkgs, depPkg)
   351  			}
   352  
   353  			for _, d := range depPkgs {
   354  				rel := artifact.Relationship{
   355  					From: d,
   356  					To:   thisPkg,
   357  					Type: artifact.DependencyOfRelationship,
   358  				}
   359  				relationships = append(relationships, rel)
   360  			}
   361  		}
   362  	}
   363  
   364  	relationship.Sort(relationships)
   365  	return relationships
   366  }
   367  
   368  func findNearestDependencyPackages(skippedDep logicalDepsJSONPackage, pkgMap map[string]pkg.Package, skipped map[string]logicalDepsJSONPackage, processed *strset.Set) []pkg.Package {
   369  	var nearestPkgs []pkg.Package
   370  
   371  	// if we have already processed this package, skip it to avoid infinite recursion
   372  	if processed.Has(skippedDep.NameVersion) {
   373  		return nearestPkgs
   374  	}
   375  
   376  	processed.Add(skippedDep.NameVersion)
   377  
   378  	for _, depNameVersion := range skippedDep.dependencyNameVersions() {
   379  		depPkg, ok := pkgMap[depNameVersion]
   380  		if !ok {
   381  			skippedDepPkg, ok := skipped[depNameVersion]
   382  			if !ok {
   383  				// this package wasn't explicitly skipped, so it could be a malformed deps.json file
   384  				// ignore this case and do not create a relationships
   385  				continue
   386  			}
   387  
   388  			nearestPkgs = append(nearestPkgs, findNearestDependencyPackages(skippedDepPkg, pkgMap, skipped, processed)...)
   389  		} else {
   390  			nearestPkgs = append(nearestPkgs, depPkg)
   391  		}
   392  	}
   393  	return nearestPkgs
   394  }
   395  
   396  // findDepsJSON locates and parses all deps.json files.
   397  func findDepsJSON(resolver file.Resolver) ([]logicalDepsJSON, error, error) {
   398  	locs, err := resolver.FilesByGlob(depsJSONGlob)
   399  	if err != nil {
   400  		return nil, nil, fmt.Errorf("unable to find deps.json files: %w", err)
   401  	}
   402  
   403  	var depsJSONs []logicalDepsJSON
   404  	var unknownErr error
   405  	for _, loc := range locs {
   406  		dj, err := readDepsJSON(resolver, loc)
   407  		if err != nil {
   408  			unknownErr = unknown.Append(unknownErr, loc, err)
   409  			continue
   410  		}
   411  
   412  		libman, err := findLibmanJSON(resolver, loc)
   413  		if err != nil {
   414  			unknownErr = unknown.Append(unknownErr, loc, err)
   415  			libman = nil
   416  		}
   417  
   418  		depsJSONs = append(depsJSONs, getLogicalDepsJSON(*dj, libman))
   419  	}
   420  
   421  	return depsJSONs, unknownErr, nil
   422  }
   423  
   424  // readDepsJSON reads and parses a single deps.json file.
   425  func readDepsJSON(resolver file.Resolver, loc file.Location) (*depsJSON, error) {
   426  	reader, err := resolver.FileContentsByLocation(loc)
   427  	if err != nil {
   428  		return nil, unknown.New(loc, fmt.Errorf("unable to read deps.json file: %w", err))
   429  	}
   430  	defer internal.CloseAndLogError(reader, loc.RealPath)
   431  
   432  	dj, err := newDepsJSON(file.NewLocationReadCloser(loc, reader))
   433  	if err != nil {
   434  		return nil, unknown.New(loc, fmt.Errorf("unable to parse deps.json file: %w", err))
   435  	}
   436  
   437  	if dj == nil {
   438  		return nil, unknown.New(loc, fmt.Errorf("expected to find packages in deps.json but did not: %q", loc.RealPath))
   439  	}
   440  
   441  	return dj, nil
   442  }
   443  
   444  // findPEFiles locates and parses all PE files (dll/exe).
   445  func findPEFiles(resolver file.Resolver) ([]logicalPE, error, error) {
   446  	peLocs, err := resolver.FilesByGlob(dllGlob, exeGlob)
   447  	if err != nil {
   448  		return nil, nil, fmt.Errorf("unable to find PE files: %w", err)
   449  	}
   450  
   451  	var peFiles []logicalPE
   452  	var unknownErr error
   453  	for _, loc := range peLocs {
   454  		ldpe, err := readPEFile(resolver, loc)
   455  		if err != nil {
   456  			unknownErr = unknown.Append(unknownErr, loc, err)
   457  			continue
   458  		}
   459  		if ldpe == nil {
   460  			continue
   461  		}
   462  		peFiles = append(peFiles, *ldpe)
   463  	}
   464  
   465  	return peFiles, unknownErr, nil
   466  }
   467  
   468  // readPEFile reads and parses a single PE file.
   469  func readPEFile(resolver file.Resolver, loc file.Location) (*logicalPE, error) {
   470  	reader, err := resolver.FileContentsByLocation(loc)
   471  	if err != nil {
   472  		return nil, unknown.New(loc, fmt.Errorf("unable to read PE file: %w", err))
   473  	}
   474  	defer internal.CloseAndLogError(reader, loc.RealPath)
   475  
   476  	ldpe, err := readLogicalPE(file.NewLocationReadCloser(loc, reader))
   477  	if err != nil {
   478  		return nil, unknown.New(loc, fmt.Errorf("unable to parse PE file: %w", err))
   479  	}
   480  
   481  	if ldpe == nil {
   482  		return nil, nil
   483  	}
   484  
   485  	if !ldpe.CLR.HasEvidenceOfCLR() {
   486  		// this is not a .NET binary
   487  		return nil, nil
   488  	}
   489  
   490  	return ldpe, nil
   491  }