github.com/anchore/syft@v1.38.2/internal/relationship/by_file_ownership.go (about)

     1  package relationship
     2  
     3  import (
     4  	"sort"
     5  
     6  	"github.com/bmatcuk/doublestar/v4"
     7  	"github.com/scylladb/go-set/strset"
     8  
     9  	"github.com/anchore/syft/internal/log"
    10  	"github.com/anchore/syft/internal/sbomsync"
    11  	"github.com/anchore/syft/syft/artifact"
    12  	"github.com/anchore/syft/syft/file"
    13  	"github.com/anchore/syft/syft/pkg"
    14  	"github.com/anchore/syft/syft/sbom"
    15  )
    16  
    17  // altRpmDBGlob allows db matches against new locations introduced in fedora:{36,37}
    18  // See https://github.com/anchore/syft/issues/1077 for larger context
    19  const altRpmDBGlob = "**/rpm/{Packages,Packages.db,rpmdb.sqlite}"
    20  
    21  var globsForbiddenFromBeingOwned = []string{
    22  	// any OS DBs should automatically be ignored to prevent cyclic issues (e.g. the "rpm" RPM owns the path to the
    23  	// RPM DB, so if not ignored that package would own all other packages on the system).
    24  	pkg.ApkDBGlob,
    25  	pkg.DpkgDBGlob,
    26  	pkg.RpmDBGlob,
    27  	altRpmDBGlob,
    28  	// DEB packages share common copyright info between, this does not mean that sharing these paths implies ownership.
    29  	"/usr/share/doc/**/copyright",
    30  }
    31  
    32  type ownershipByFilesMetadata struct {
    33  	Files []string `json:"files"`
    34  }
    35  
    36  func ByFileOwnershipOverlapWorker(resolver file.Resolver, accessor sbomsync.Accessor) {
    37  	var relationships []artifact.Relationship
    38  
    39  	accessor.ReadFromSBOM(func(s *sbom.SBOM) {
    40  		relationships = byFileOwnershipOverlap(resolver, s.Artifacts.Packages)
    41  	})
    42  
    43  	accessor.WriteToSBOM(func(s *sbom.SBOM) {
    44  		s.Relationships = append(s.Relationships, relationships...)
    45  	})
    46  }
    47  
    48  // byFileOwnershipOverlap creates a package-to-package relationship based on discovering which packages have
    49  // evidence locations that overlap with ownership claim from another package's package manager metadata.
    50  func byFileOwnershipOverlap(resolver file.Resolver, catalog *pkg.Collection) []artifact.Relationship {
    51  	var relationships = findOwnershipByFilesRelationships(resolver, catalog)
    52  
    53  	var edges []artifact.Relationship
    54  	for parentID, children := range relationships {
    55  		for childID, files := range children {
    56  			fs := files.List()
    57  			sort.Strings(fs)
    58  
    59  			parent := catalog.Package(parentID) // TODO: this is potentially expensive
    60  			child := catalog.Package(childID)   // TODO: this is potentially expensive
    61  
    62  			if parent == nil {
    63  				log.Tracef("parent package not found: %v", parentID)
    64  				continue
    65  			}
    66  
    67  			if child == nil {
    68  				log.Tracef("child package not found: %v", childID)
    69  				continue
    70  			}
    71  
    72  			edges = append(edges, artifact.Relationship{
    73  				From: *parent,
    74  				To:   *child,
    75  				Type: artifact.OwnershipByFileOverlapRelationship,
    76  				Data: ownershipByFilesMetadata{
    77  					Files: fs,
    78  				},
    79  			})
    80  		}
    81  	}
    82  
    83  	return edges
    84  }
    85  
    86  // findOwnershipByFilesRelationships find overlaps in file ownership with a file that defines another package. Specifically, a .Location.Path of
    87  // a package is found to be owned by another (from the owner's .Metadata.Files[]).
    88  func findOwnershipByFilesRelationships(resolver file.Resolver, catalog *pkg.Collection) map[artifact.ID]map[artifact.ID]*strset.Set { //nolint:gocognit
    89  	var relationships = make(map[artifact.ID]map[artifact.ID]*strset.Set)
    90  
    91  	if catalog == nil {
    92  		return relationships
    93  	}
    94  
    95  	// Build a map of real paths to packages that directly own them. We'll use this
    96  	// to check if a file is already owned by the same type of package when we're
    97  	// determining ownership via symlink.
    98  	directOwnership := directOwnersByPath(catalog)
    99  
   100  	// Now establish relationships, considering symlinks
   101  	for _, candidateOwnerPkg := range catalog.Sorted() {
   102  		id := candidateOwnerPkg.ID()
   103  		if candidateOwnerPkg.Metadata == nil {
   104  			continue
   105  		}
   106  
   107  		// check to see if this is a file owner
   108  		pkgFileOwner, ok := candidateOwnerPkg.Metadata.(pkg.FileOwner)
   109  		if !ok {
   110  			continue
   111  		}
   112  
   113  		for _, ownedFilePath := range pkgFileOwner.OwnedFiles() {
   114  			if ownedFilePath == "" {
   115  				continue
   116  			}
   117  
   118  			// find paths that result in a hit (includes resolving symlinks)
   119  			resolvedPaths := resolvePaths(ownedFilePath, resolver)
   120  
   121  			for _, resolvedPath := range resolvedPaths {
   122  				if matchesAny(resolvedPath, globsForbiddenFromBeingOwned) {
   123  					// we skip over known exceptions to file ownership, such as the RPM package owning
   124  					// the RPM DB path, otherwise the RPM package would "own" all RPMs, which is not intended
   125  					continue
   126  				}
   127  
   128  				// Skip claiming ownership via symlink if another package of the same type
   129  				// directly owns this real path. This is the specific fix for the issue where a
   130  				// symlink shouldn't allow a package to claim ownership when another package of
   131  				// the same type directly owns the real file.
   132  				if resolvedPath != ownedFilePath { // This is a resolved symlink path
   133  					// Check if another package of the same type directly owns this path
   134  					if paths := directOwnership[candidateOwnerPkg.Type]; paths != nil && paths.Has(resolvedPath) {
   135  						// Skip this path - a package of the same type directly owns it
   136  						continue
   137  					}
   138  				}
   139  
   140  				// look for package(s) in the catalog that may be owned by this package and mark the relationship
   141  				for _, subPackage := range catalog.PackagesByPath(resolvedPath) {
   142  					subID := subPackage.ID()
   143  					if subID == id {
   144  						continue
   145  					}
   146  					if _, exists := relationships[id]; !exists {
   147  						relationships[id] = make(map[artifact.ID]*strset.Set)
   148  					}
   149  
   150  					if _, exists := relationships[id][subID]; !exists {
   151  						relationships[id][subID] = strset.New()
   152  					}
   153  					relationships[id][subID].Add(resolvedPath)
   154  				}
   155  			}
   156  		}
   157  	}
   158  
   159  	return relationships
   160  }
   161  
   162  func directOwnersByPath(catalog *pkg.Collection) map[pkg.Type]*strset.Set {
   163  	directOwnership := map[pkg.Type]*strset.Set{}
   164  
   165  	// First, identify direct ownership of all files
   166  	for _, p := range catalog.Sorted() {
   167  		if p.Metadata == nil {
   168  			continue
   169  		}
   170  
   171  		// check to see if this is a file owner
   172  		pkgFileOwner, ok := p.Metadata.(pkg.FileOwner)
   173  		if !ok {
   174  			continue
   175  		}
   176  
   177  		for _, ownedFilePath := range pkgFileOwner.OwnedFiles() {
   178  			if ownedFilePath == "" {
   179  				continue
   180  			}
   181  
   182  			// Register direct ownership
   183  			paths := directOwnership[p.Type]
   184  			if paths == nil {
   185  				paths = strset.New()
   186  				directOwnership[p.Type] = paths
   187  			}
   188  			paths.Add(ownedFilePath)
   189  		}
   190  	}
   191  
   192  	return directOwnership
   193  }
   194  
   195  func resolvePaths(ownedFilePath string, resolver file.Resolver) []string {
   196  	// though we have a string path, we need to resolve symlinks and other filesystem oddities since we cannot assume this is a real path
   197  	var locs []file.Location
   198  	var err error
   199  	if resolver != nil {
   200  		locs, err = resolver.FilesByPath(ownedFilePath)
   201  		if err != nil {
   202  			log.WithFields("error", err, "path", ownedFilePath).Trace("unable to find path for owned file")
   203  			locs = nil
   204  		}
   205  	}
   206  
   207  	ownedFilePaths := strset.New(ownedFilePath)
   208  	for _, loc := range locs {
   209  		ownedFilePaths.Add(loc.RealPath)
   210  	}
   211  	return ownedFilePaths.List()
   212  }
   213  
   214  func matchesAny(s string, globs []string) bool {
   215  	for _, g := range globs {
   216  		matches, err := doublestar.Match(g, s)
   217  		if err != nil {
   218  			log.Errorf("failed to match glob=%q : %+v", g, err)
   219  		}
   220  		if matches {
   221  			return true
   222  		}
   223  	}
   224  	return false
   225  }