github.com/anchore/syft@v1.4.2-0.20240516191711-1bec1fc5d397/syft/pkg/collection.go (about) 1 package pkg 2 3 import ( 4 "sync" 5 6 "github.com/jinzhu/copier" 7 "github.com/scylladb/go-set/strset" 8 9 "github.com/anchore/syft/internal/log" 10 "github.com/anchore/syft/syft/artifact" 11 ) 12 13 // Collection represents a collection of Packages. 14 type Collection struct { 15 byID map[artifact.ID]Package 16 idsByName map[string]orderedIDSet 17 idsByType map[Type]orderedIDSet 18 idsByPath map[string]orderedIDSet // note: this is real path or virtual path 19 lock sync.RWMutex 20 } 21 22 // NewCollection returns a new empty Collection 23 func NewCollection(pkgs ...Package) *Collection { 24 c := Collection{ 25 byID: make(map[artifact.ID]Package), 26 idsByName: make(map[string]orderedIDSet), 27 idsByType: make(map[Type]orderedIDSet), 28 idsByPath: make(map[string]orderedIDSet), 29 } 30 31 for _, p := range pkgs { 32 c.Add(p) 33 } 34 35 return &c 36 } 37 38 // PackageCount returns the total number of packages that have been added. 39 func (c *Collection) PackageCount() int { 40 c.lock.RLock() 41 defer c.lock.RUnlock() 42 43 return len(c.byID) 44 } 45 46 // Package returns the package with the given ID. 47 func (c *Collection) Package(id artifact.ID) *Package { 48 c.lock.RLock() 49 defer c.lock.RUnlock() 50 51 v, exists := c.byID[id] 52 if !exists { 53 return nil 54 } 55 var p Package 56 if err := copier.Copy(&p, &v); err != nil { 57 log.Warnf("unable to copy package id=%q name=%q: %+v", id, v.Name, err) 58 return nil 59 } 60 p.id = v.id 61 return &p 62 } 63 64 // PackagesByPath returns all packages that were discovered from the given path. 65 func (c *Collection) PackagesByPath(path string) []Package { 66 c.lock.RLock() 67 defer c.lock.RUnlock() 68 69 return c.packages(c.idsByPath[path].slice) 70 } 71 72 // PackagesByName returns all packages that were discovered with a matching name. 73 func (c *Collection) PackagesByName(name string) []Package { 74 c.lock.RLock() 75 defer c.lock.RUnlock() 76 77 return c.packages(c.idsByName[name].slice) 78 } 79 80 // Packages returns all packages for the given ID. 81 func (c *Collection) Packages(ids []artifact.ID) (result []Package) { 82 c.lock.RLock() 83 defer c.lock.RUnlock() 84 85 return c.packages(ids) 86 } 87 88 func (c *Collection) packages(ids []artifact.ID) (result []Package) { 89 // note: read lock must be held by caller 90 91 for _, i := range ids { 92 p, exists := c.byID[i] 93 if exists { 94 result = append(result, p) 95 } 96 } 97 return result 98 } 99 100 // Add n packages to the collection. 101 func (c *Collection) Add(pkgs ...Package) { 102 for _, p := range pkgs { 103 c.add(p) 104 } 105 } 106 107 // Add a package to the Collection. 108 func (c *Collection) add(p Package) { 109 c.lock.Lock() 110 defer c.lock.Unlock() 111 112 id := p.ID() 113 if id == "" { 114 log.Warnf("found package with empty ID while adding to the collection: %+v", p) 115 p.SetID() 116 id = p.ID() 117 } 118 119 if existing, exists := c.byID[id]; exists { 120 // there is already a package with this fingerprint merge the existing record with the new one 121 if err := existing.merge(p); err != nil { 122 log.Warnf("failed to merge packages: %+v", err) 123 } else { 124 c.byID[id] = existing 125 c.addPathsToIndex(p) 126 } 127 return 128 } 129 130 c.addToIndex(p) 131 } 132 133 func (c *Collection) addToIndex(p Package) { 134 // note: write lock must be held by caller 135 136 c.byID[p.id] = p 137 c.addNameToIndex(p) 138 c.addTypeToIndex(p) 139 c.addPathsToIndex(p) 140 } 141 142 func (c *Collection) addNameToIndex(p Package) { 143 // note: write lock must be held by caller 144 145 nameIndex := c.idsByName[p.Name] 146 nameIndex.add(p.id) 147 c.idsByName[p.Name] = nameIndex 148 } 149 150 func (c *Collection) addTypeToIndex(p Package) { 151 // note: write lock must be held by caller 152 153 typeIndex := c.idsByType[p.Type] 154 typeIndex.add(p.id) 155 c.idsByType[p.Type] = typeIndex 156 } 157 158 func (c *Collection) addPathsToIndex(p Package) { 159 // note: write lock must be held by caller 160 161 observedPaths := strset.New() 162 for _, l := range p.Locations.ToSlice() { 163 if l.RealPath != "" && !observedPaths.Has(l.RealPath) { 164 c.addPathToIndex(p.id, l.RealPath) 165 observedPaths.Add(l.RealPath) 166 } 167 if l.AccessPath != "" && l.RealPath != l.AccessPath && !observedPaths.Has(l.AccessPath) { 168 c.addPathToIndex(p.id, l.AccessPath) 169 observedPaths.Add(l.AccessPath) 170 } 171 } 172 } 173 174 func (c *Collection) addPathToIndex(id artifact.ID, path string) { 175 // note: write lock must be held by caller 176 177 pathIndex := c.idsByPath[path] 178 pathIndex.add(id) 179 c.idsByPath[path] = pathIndex 180 } 181 182 func (c *Collection) Delete(ids ...artifact.ID) { 183 c.lock.Lock() 184 defer c.lock.Unlock() 185 186 for _, id := range ids { 187 p, exists := c.byID[id] 188 if !exists { 189 return 190 } 191 192 delete(c.byID, id) 193 c.deleteNameFromIndex(p) 194 c.deleteTypeFromIndex(p) 195 c.deletePathsFromIndex(p) 196 } 197 } 198 199 func (c *Collection) deleteNameFromIndex(p Package) { 200 // note: write lock must be held by caller 201 202 nameIndex := c.idsByName[p.Name] 203 nameIndex.delete(p.id) 204 c.idsByName[p.Name] = nameIndex 205 } 206 207 func (c *Collection) deleteTypeFromIndex(p Package) { 208 // note: write lock must be held by caller 209 210 typeIndex := c.idsByType[p.Type] 211 typeIndex.delete(p.id) 212 c.idsByType[p.Type] = typeIndex 213 } 214 215 func (c *Collection) deletePathsFromIndex(p Package) { 216 // note: write lock must be held by caller 217 218 observedPaths := strset.New() 219 for _, l := range p.Locations.ToSlice() { 220 if l.RealPath != "" && !observedPaths.Has(l.RealPath) { 221 c.deletePathFromIndex(p.id, l.RealPath) 222 observedPaths.Add(l.RealPath) 223 } 224 if l.AccessPath != "" && l.RealPath != l.AccessPath && !observedPaths.Has(l.AccessPath) { 225 c.deletePathFromIndex(p.id, l.AccessPath) 226 observedPaths.Add(l.AccessPath) 227 } 228 } 229 } 230 231 func (c *Collection) deletePathFromIndex(id artifact.ID, path string) { 232 // note: write lock must be held by caller 233 234 pathIndex := c.idsByPath[path] 235 pathIndex.delete(id) 236 if len(pathIndex.slice) == 0 { 237 delete(c.idsByPath, path) 238 } else { 239 c.idsByPath[path] = pathIndex 240 } 241 } 242 243 // Enumerate all packages for the given type(s), enumerating all packages if no type is specified. 244 func (c *Collection) Enumerate(types ...Type) <-chan Package { 245 channel := make(chan Package) 246 go func() { 247 defer close(channel) 248 249 if c == nil { 250 // we should allow enumerating from a collection that was never created (which will result in no packages enumerated) 251 return 252 } 253 254 c.lock.RLock() 255 defer c.lock.RUnlock() 256 257 for ty, ids := range c.idsByType { 258 if len(types) != 0 { 259 found := false 260 typeCheck: 261 for _, t := range types { 262 if t == ty { 263 found = true 264 break typeCheck 265 } 266 } 267 if !found { 268 continue 269 } 270 } 271 for _, id := range ids.slice { 272 p := c.Package(id) 273 if p != nil { 274 channel <- *p 275 } 276 } 277 } 278 }() 279 return channel 280 } 281 282 // Sorted enumerates all packages for the given types sorted by package name. Enumerates all packages if no type 283 // is specified. 284 func (c *Collection) Sorted(types ...Type) (pkgs []Package) { 285 for p := range c.Enumerate(types...) { 286 pkgs = append(pkgs, p) 287 } 288 289 Sort(pkgs) 290 291 return pkgs 292 } 293 294 type orderedIDSet struct { 295 slice []artifact.ID 296 } 297 298 func (s *orderedIDSet) add(ids ...artifact.ID) { 299 loopNewIDs: 300 for _, newID := range ids { 301 for _, existingID := range s.slice { 302 if existingID == newID { 303 continue loopNewIDs 304 } 305 } 306 s.slice = append(s.slice, newID) 307 } 308 } 309 310 func (s *orderedIDSet) delete(id artifact.ID) { 311 for i, existingID := range s.slice { 312 if existingID == id { 313 s.slice = append(s.slice[:i], s.slice[i+1:]...) 314 return 315 } 316 } 317 }