github.com/anchore/syft@v1.38.2/internal/relationship/by_file_ownership.go (about) 1 package relationship 2 3 import ( 4 "sort" 5 6 "github.com/bmatcuk/doublestar/v4" 7 "github.com/scylladb/go-set/strset" 8 9 "github.com/anchore/syft/internal/log" 10 "github.com/anchore/syft/internal/sbomsync" 11 "github.com/anchore/syft/syft/artifact" 12 "github.com/anchore/syft/syft/file" 13 "github.com/anchore/syft/syft/pkg" 14 "github.com/anchore/syft/syft/sbom" 15 ) 16 17 // altRpmDBGlob allows db matches against new locations introduced in fedora:{36,37} 18 // See https://github.com/anchore/syft/issues/1077 for larger context 19 const altRpmDBGlob = "**/rpm/{Packages,Packages.db,rpmdb.sqlite}" 20 21 var globsForbiddenFromBeingOwned = []string{ 22 // any OS DBs should automatically be ignored to prevent cyclic issues (e.g. the "rpm" RPM owns the path to the 23 // RPM DB, so if not ignored that package would own all other packages on the system). 24 pkg.ApkDBGlob, 25 pkg.DpkgDBGlob, 26 pkg.RpmDBGlob, 27 altRpmDBGlob, 28 // DEB packages share common copyright info between, this does not mean that sharing these paths implies ownership. 29 "/usr/share/doc/**/copyright", 30 } 31 32 type ownershipByFilesMetadata struct { 33 Files []string `json:"files"` 34 } 35 36 func ByFileOwnershipOverlapWorker(resolver file.Resolver, accessor sbomsync.Accessor) { 37 var relationships []artifact.Relationship 38 39 accessor.ReadFromSBOM(func(s *sbom.SBOM) { 40 relationships = byFileOwnershipOverlap(resolver, s.Artifacts.Packages) 41 }) 42 43 accessor.WriteToSBOM(func(s *sbom.SBOM) { 44 s.Relationships = append(s.Relationships, relationships...) 45 }) 46 } 47 48 // byFileOwnershipOverlap creates a package-to-package relationship based on discovering which packages have 49 // evidence locations that overlap with ownership claim from another package's package manager metadata. 50 func byFileOwnershipOverlap(resolver file.Resolver, catalog *pkg.Collection) []artifact.Relationship { 51 var relationships = findOwnershipByFilesRelationships(resolver, catalog) 52 53 var edges []artifact.Relationship 54 for parentID, children := range relationships { 55 for childID, files := range children { 56 fs := files.List() 57 sort.Strings(fs) 58 59 parent := catalog.Package(parentID) // TODO: this is potentially expensive 60 child := catalog.Package(childID) // TODO: this is potentially expensive 61 62 if parent == nil { 63 log.Tracef("parent package not found: %v", parentID) 64 continue 65 } 66 67 if child == nil { 68 log.Tracef("child package not found: %v", childID) 69 continue 70 } 71 72 edges = append(edges, artifact.Relationship{ 73 From: *parent, 74 To: *child, 75 Type: artifact.OwnershipByFileOverlapRelationship, 76 Data: ownershipByFilesMetadata{ 77 Files: fs, 78 }, 79 }) 80 } 81 } 82 83 return edges 84 } 85 86 // findOwnershipByFilesRelationships find overlaps in file ownership with a file that defines another package. Specifically, a .Location.Path of 87 // a package is found to be owned by another (from the owner's .Metadata.Files[]). 88 func findOwnershipByFilesRelationships(resolver file.Resolver, catalog *pkg.Collection) map[artifact.ID]map[artifact.ID]*strset.Set { //nolint:gocognit 89 var relationships = make(map[artifact.ID]map[artifact.ID]*strset.Set) 90 91 if catalog == nil { 92 return relationships 93 } 94 95 // Build a map of real paths to packages that directly own them. We'll use this 96 // to check if a file is already owned by the same type of package when we're 97 // determining ownership via symlink. 98 directOwnership := directOwnersByPath(catalog) 99 100 // Now establish relationships, considering symlinks 101 for _, candidateOwnerPkg := range catalog.Sorted() { 102 id := candidateOwnerPkg.ID() 103 if candidateOwnerPkg.Metadata == nil { 104 continue 105 } 106 107 // check to see if this is a file owner 108 pkgFileOwner, ok := candidateOwnerPkg.Metadata.(pkg.FileOwner) 109 if !ok { 110 continue 111 } 112 113 for _, ownedFilePath := range pkgFileOwner.OwnedFiles() { 114 if ownedFilePath == "" { 115 continue 116 } 117 118 // find paths that result in a hit (includes resolving symlinks) 119 resolvedPaths := resolvePaths(ownedFilePath, resolver) 120 121 for _, resolvedPath := range resolvedPaths { 122 if matchesAny(resolvedPath, globsForbiddenFromBeingOwned) { 123 // we skip over known exceptions to file ownership, such as the RPM package owning 124 // the RPM DB path, otherwise the RPM package would "own" all RPMs, which is not intended 125 continue 126 } 127 128 // Skip claiming ownership via symlink if another package of the same type 129 // directly owns this real path. This is the specific fix for the issue where a 130 // symlink shouldn't allow a package to claim ownership when another package of 131 // the same type directly owns the real file. 132 if resolvedPath != ownedFilePath { // This is a resolved symlink path 133 // Check if another package of the same type directly owns this path 134 if paths := directOwnership[candidateOwnerPkg.Type]; paths != nil && paths.Has(resolvedPath) { 135 // Skip this path - a package of the same type directly owns it 136 continue 137 } 138 } 139 140 // look for package(s) in the catalog that may be owned by this package and mark the relationship 141 for _, subPackage := range catalog.PackagesByPath(resolvedPath) { 142 subID := subPackage.ID() 143 if subID == id { 144 continue 145 } 146 if _, exists := relationships[id]; !exists { 147 relationships[id] = make(map[artifact.ID]*strset.Set) 148 } 149 150 if _, exists := relationships[id][subID]; !exists { 151 relationships[id][subID] = strset.New() 152 } 153 relationships[id][subID].Add(resolvedPath) 154 } 155 } 156 } 157 } 158 159 return relationships 160 } 161 162 func directOwnersByPath(catalog *pkg.Collection) map[pkg.Type]*strset.Set { 163 directOwnership := map[pkg.Type]*strset.Set{} 164 165 // First, identify direct ownership of all files 166 for _, p := range catalog.Sorted() { 167 if p.Metadata == nil { 168 continue 169 } 170 171 // check to see if this is a file owner 172 pkgFileOwner, ok := p.Metadata.(pkg.FileOwner) 173 if !ok { 174 continue 175 } 176 177 for _, ownedFilePath := range pkgFileOwner.OwnedFiles() { 178 if ownedFilePath == "" { 179 continue 180 } 181 182 // Register direct ownership 183 paths := directOwnership[p.Type] 184 if paths == nil { 185 paths = strset.New() 186 directOwnership[p.Type] = paths 187 } 188 paths.Add(ownedFilePath) 189 } 190 } 191 192 return directOwnership 193 } 194 195 func resolvePaths(ownedFilePath string, resolver file.Resolver) []string { 196 // though we have a string path, we need to resolve symlinks and other filesystem oddities since we cannot assume this is a real path 197 var locs []file.Location 198 var err error 199 if resolver != nil { 200 locs, err = resolver.FilesByPath(ownedFilePath) 201 if err != nil { 202 log.WithFields("error", err, "path", ownedFilePath).Trace("unable to find path for owned file") 203 locs = nil 204 } 205 } 206 207 ownedFilePaths := strset.New(ownedFilePath) 208 for _, loc := range locs { 209 ownedFilePaths.Add(loc.RealPath) 210 } 211 return ownedFilePaths.List() 212 } 213 214 func matchesAny(s string, globs []string) bool { 215 for _, g := range globs { 216 matches, err := doublestar.Match(g, s) 217 if err != nil { 218 log.Errorf("failed to match glob=%q : %+v", g, err) 219 } 220 if matches { 221 return true 222 } 223 } 224 return false 225 }