github.com/anchore/syft@v1.4.2-0.20240516191711-1bec1fc5d397/internal/relationship/by_file_ownership.go (about)

     1  package relationship
     2  
     3  import (
     4  	"sort"
     5  
     6  	"github.com/bmatcuk/doublestar/v4"
     7  	"github.com/scylladb/go-set/strset"
     8  
     9  	"github.com/anchore/syft/internal/log"
    10  	"github.com/anchore/syft/internal/sbomsync"
    11  	"github.com/anchore/syft/syft/artifact"
    12  	"github.com/anchore/syft/syft/pkg"
    13  	"github.com/anchore/syft/syft/sbom"
    14  )
    15  
    16  // altRpmDBGlob allows db matches against new locations introduced in fedora:{36,37}
    17  // See https://github.com/anchore/syft/issues/1077 for larger context
    18  const altRpmDBGlob = "**/rpm/{Packages,Packages.db,rpmdb.sqlite}"
    19  
    20  var globsForbiddenFromBeingOwned = []string{
    21  	// any OS DBs should automatically be ignored to prevent cyclic issues (e.g. the "rpm" RPM owns the path to the
    22  	// RPM DB, so if not ignored that package would own all other packages on the system).
    23  	pkg.ApkDBGlob,
    24  	pkg.DpkgDBGlob,
    25  	pkg.RpmDBGlob,
    26  	altRpmDBGlob,
    27  	// DEB packages share common copyright info between, this does not mean that sharing these paths implies ownership.
    28  	"/usr/share/doc/**/copyright",
    29  }
    30  
    31  type ownershipByFilesMetadata struct {
    32  	Files []string `json:"files"`
    33  }
    34  
    35  func byFileOwnershipOverlapWorker(accessor sbomsync.Accessor) {
    36  	var relationships []artifact.Relationship
    37  
    38  	accessor.ReadFromSBOM(func(s *sbom.SBOM) {
    39  		relationships = byFileOwnershipOverlap(s.Artifacts.Packages)
    40  	})
    41  
    42  	accessor.WriteToSBOM(func(s *sbom.SBOM) {
    43  		s.Relationships = append(s.Relationships, relationships...)
    44  	})
    45  }
    46  
    47  // byFileOwnershipOverlap creates a package-to-package relationship based on discovering which packages have
    48  // evidence locations that overlap with ownership claim from another package's package manager metadata.
    49  func byFileOwnershipOverlap(catalog *pkg.Collection) []artifact.Relationship {
    50  	var relationships = findOwnershipByFilesRelationships(catalog)
    51  
    52  	var edges []artifact.Relationship
    53  	for parentID, children := range relationships {
    54  		for childID, files := range children {
    55  			fs := files.List()
    56  			sort.Strings(fs)
    57  
    58  			parent := catalog.Package(parentID) // TODO: this is potentially expensive
    59  			child := catalog.Package(childID)   // TODO: this is potentially expensive
    60  
    61  			if parent == nil {
    62  				log.Tracef("parent package not found: %v", parentID)
    63  				continue
    64  			}
    65  
    66  			if child == nil {
    67  				log.Tracef("child package not found: %v", childID)
    68  				continue
    69  			}
    70  
    71  			edges = append(edges, artifact.Relationship{
    72  				From: *parent,
    73  				To:   *child,
    74  				Type: artifact.OwnershipByFileOverlapRelationship,
    75  				Data: ownershipByFilesMetadata{
    76  					Files: fs,
    77  				},
    78  			})
    79  		}
    80  	}
    81  
    82  	return edges
    83  }
    84  
    85  // findOwnershipByFilesRelationships find overlaps in file ownership with a file that defines another package. Specifically, a .Location.Path of
    86  // a package is found to be owned by another (from the owner's .Metadata.Files[]).
    87  func findOwnershipByFilesRelationships(catalog *pkg.Collection) map[artifact.ID]map[artifact.ID]*strset.Set {
    88  	var relationships = make(map[artifact.ID]map[artifact.ID]*strset.Set)
    89  
    90  	if catalog == nil {
    91  		return relationships
    92  	}
    93  
    94  	for _, candidateOwnerPkg := range catalog.Sorted() {
    95  		id := candidateOwnerPkg.ID()
    96  		if candidateOwnerPkg.Metadata == nil {
    97  			continue
    98  		}
    99  
   100  		// check to see if this is a file owner
   101  		pkgFileOwner, ok := candidateOwnerPkg.Metadata.(pkg.FileOwner)
   102  		if !ok {
   103  			continue
   104  		}
   105  		for _, ownedFilePath := range pkgFileOwner.OwnedFiles() {
   106  			if matchesAny(ownedFilePath, globsForbiddenFromBeingOwned) {
   107  				// we skip over known exceptions to file ownership, such as the RPM package owning
   108  				// the RPM DB path, otherwise the RPM package would "own" all RPMs, which is not intended
   109  				continue
   110  			}
   111  
   112  			// look for package(s) in the catalog that may be owned by this package and mark the relationship
   113  			for _, subPackage := range catalog.PackagesByPath(ownedFilePath) {
   114  				subID := subPackage.ID()
   115  				if subID == id {
   116  					continue
   117  				}
   118  				if _, exists := relationships[id]; !exists {
   119  					relationships[id] = make(map[artifact.ID]*strset.Set)
   120  				}
   121  
   122  				if _, exists := relationships[id][subID]; !exists {
   123  					relationships[id][subID] = strset.New()
   124  				}
   125  				relationships[id][subID].Add(ownedFilePath)
   126  			}
   127  		}
   128  	}
   129  
   130  	return relationships
   131  }
   132  
   133  func matchesAny(s string, globs []string) bool {
   134  	for _, g := range globs {
   135  		matches, err := doublestar.Match(g, s)
   136  		if err != nil {
   137  			log.Errorf("failed to match glob=%q : %+v", g, err)
   138  		}
   139  		if matches {
   140  			return true
   141  		}
   142  	}
   143  	return false
   144  }