github.com/anchore/syft@v1.4.2-0.20240516191711-1bec1fc5d397/syft/pkg/collection.go (about)

     1  package pkg
     2  
     3  import (
     4  	"sync"
     5  
     6  	"github.com/jinzhu/copier"
     7  	"github.com/scylladb/go-set/strset"
     8  
     9  	"github.com/anchore/syft/internal/log"
    10  	"github.com/anchore/syft/syft/artifact"
    11  )
    12  
    13  // Collection represents a collection of Packages.
    14  type Collection struct {
    15  	byID      map[artifact.ID]Package
    16  	idsByName map[string]orderedIDSet
    17  	idsByType map[Type]orderedIDSet
    18  	idsByPath map[string]orderedIDSet // note: this is real path or virtual path
    19  	lock      sync.RWMutex
    20  }
    21  
    22  // NewCollection returns a new empty Collection
    23  func NewCollection(pkgs ...Package) *Collection {
    24  	c := Collection{
    25  		byID:      make(map[artifact.ID]Package),
    26  		idsByName: make(map[string]orderedIDSet),
    27  		idsByType: make(map[Type]orderedIDSet),
    28  		idsByPath: make(map[string]orderedIDSet),
    29  	}
    30  
    31  	for _, p := range pkgs {
    32  		c.Add(p)
    33  	}
    34  
    35  	return &c
    36  }
    37  
    38  // PackageCount returns the total number of packages that have been added.
    39  func (c *Collection) PackageCount() int {
    40  	c.lock.RLock()
    41  	defer c.lock.RUnlock()
    42  
    43  	return len(c.byID)
    44  }
    45  
    46  // Package returns the package with the given ID.
    47  func (c *Collection) Package(id artifact.ID) *Package {
    48  	c.lock.RLock()
    49  	defer c.lock.RUnlock()
    50  
    51  	v, exists := c.byID[id]
    52  	if !exists {
    53  		return nil
    54  	}
    55  	var p Package
    56  	if err := copier.Copy(&p, &v); err != nil {
    57  		log.Warnf("unable to copy package id=%q name=%q: %+v", id, v.Name, err)
    58  		return nil
    59  	}
    60  	p.id = v.id
    61  	return &p
    62  }
    63  
    64  // PackagesByPath returns all packages that were discovered from the given path.
    65  func (c *Collection) PackagesByPath(path string) []Package {
    66  	c.lock.RLock()
    67  	defer c.lock.RUnlock()
    68  
    69  	return c.packages(c.idsByPath[path].slice)
    70  }
    71  
    72  // PackagesByName returns all packages that were discovered with a matching name.
    73  func (c *Collection) PackagesByName(name string) []Package {
    74  	c.lock.RLock()
    75  	defer c.lock.RUnlock()
    76  
    77  	return c.packages(c.idsByName[name].slice)
    78  }
    79  
    80  // Packages returns all packages for the given ID.
    81  func (c *Collection) Packages(ids []artifact.ID) (result []Package) {
    82  	c.lock.RLock()
    83  	defer c.lock.RUnlock()
    84  
    85  	return c.packages(ids)
    86  }
    87  
    88  func (c *Collection) packages(ids []artifact.ID) (result []Package) {
    89  	// note: read lock must be held by caller
    90  
    91  	for _, i := range ids {
    92  		p, exists := c.byID[i]
    93  		if exists {
    94  			result = append(result, p)
    95  		}
    96  	}
    97  	return result
    98  }
    99  
   100  // Add n packages to the collection.
   101  func (c *Collection) Add(pkgs ...Package) {
   102  	for _, p := range pkgs {
   103  		c.add(p)
   104  	}
   105  }
   106  
   107  // Add a package to the Collection.
   108  func (c *Collection) add(p Package) {
   109  	c.lock.Lock()
   110  	defer c.lock.Unlock()
   111  
   112  	id := p.ID()
   113  	if id == "" {
   114  		log.Warnf("found package with empty ID while adding to the collection: %+v", p)
   115  		p.SetID()
   116  		id = p.ID()
   117  	}
   118  
   119  	if existing, exists := c.byID[id]; exists {
   120  		// there is already a package with this fingerprint merge the existing record with the new one
   121  		if err := existing.merge(p); err != nil {
   122  			log.Warnf("failed to merge packages: %+v", err)
   123  		} else {
   124  			c.byID[id] = existing
   125  			c.addPathsToIndex(p)
   126  		}
   127  		return
   128  	}
   129  
   130  	c.addToIndex(p)
   131  }
   132  
   133  func (c *Collection) addToIndex(p Package) {
   134  	// note: write lock must be held by caller
   135  
   136  	c.byID[p.id] = p
   137  	c.addNameToIndex(p)
   138  	c.addTypeToIndex(p)
   139  	c.addPathsToIndex(p)
   140  }
   141  
   142  func (c *Collection) addNameToIndex(p Package) {
   143  	// note: write lock must be held by caller
   144  
   145  	nameIndex := c.idsByName[p.Name]
   146  	nameIndex.add(p.id)
   147  	c.idsByName[p.Name] = nameIndex
   148  }
   149  
   150  func (c *Collection) addTypeToIndex(p Package) {
   151  	// note: write lock must be held by caller
   152  
   153  	typeIndex := c.idsByType[p.Type]
   154  	typeIndex.add(p.id)
   155  	c.idsByType[p.Type] = typeIndex
   156  }
   157  
   158  func (c *Collection) addPathsToIndex(p Package) {
   159  	// note: write lock must be held by caller
   160  
   161  	observedPaths := strset.New()
   162  	for _, l := range p.Locations.ToSlice() {
   163  		if l.RealPath != "" && !observedPaths.Has(l.RealPath) {
   164  			c.addPathToIndex(p.id, l.RealPath)
   165  			observedPaths.Add(l.RealPath)
   166  		}
   167  		if l.AccessPath != "" && l.RealPath != l.AccessPath && !observedPaths.Has(l.AccessPath) {
   168  			c.addPathToIndex(p.id, l.AccessPath)
   169  			observedPaths.Add(l.AccessPath)
   170  		}
   171  	}
   172  }
   173  
   174  func (c *Collection) addPathToIndex(id artifact.ID, path string) {
   175  	// note: write lock must be held by caller
   176  
   177  	pathIndex := c.idsByPath[path]
   178  	pathIndex.add(id)
   179  	c.idsByPath[path] = pathIndex
   180  }
   181  
   182  func (c *Collection) Delete(ids ...artifact.ID) {
   183  	c.lock.Lock()
   184  	defer c.lock.Unlock()
   185  
   186  	for _, id := range ids {
   187  		p, exists := c.byID[id]
   188  		if !exists {
   189  			return
   190  		}
   191  
   192  		delete(c.byID, id)
   193  		c.deleteNameFromIndex(p)
   194  		c.deleteTypeFromIndex(p)
   195  		c.deletePathsFromIndex(p)
   196  	}
   197  }
   198  
   199  func (c *Collection) deleteNameFromIndex(p Package) {
   200  	// note: write lock must be held by caller
   201  
   202  	nameIndex := c.idsByName[p.Name]
   203  	nameIndex.delete(p.id)
   204  	c.idsByName[p.Name] = nameIndex
   205  }
   206  
   207  func (c *Collection) deleteTypeFromIndex(p Package) {
   208  	// note: write lock must be held by caller
   209  
   210  	typeIndex := c.idsByType[p.Type]
   211  	typeIndex.delete(p.id)
   212  	c.idsByType[p.Type] = typeIndex
   213  }
   214  
   215  func (c *Collection) deletePathsFromIndex(p Package) {
   216  	// note: write lock must be held by caller
   217  
   218  	observedPaths := strset.New()
   219  	for _, l := range p.Locations.ToSlice() {
   220  		if l.RealPath != "" && !observedPaths.Has(l.RealPath) {
   221  			c.deletePathFromIndex(p.id, l.RealPath)
   222  			observedPaths.Add(l.RealPath)
   223  		}
   224  		if l.AccessPath != "" && l.RealPath != l.AccessPath && !observedPaths.Has(l.AccessPath) {
   225  			c.deletePathFromIndex(p.id, l.AccessPath)
   226  			observedPaths.Add(l.AccessPath)
   227  		}
   228  	}
   229  }
   230  
   231  func (c *Collection) deletePathFromIndex(id artifact.ID, path string) {
   232  	// note: write lock must be held by caller
   233  
   234  	pathIndex := c.idsByPath[path]
   235  	pathIndex.delete(id)
   236  	if len(pathIndex.slice) == 0 {
   237  		delete(c.idsByPath, path)
   238  	} else {
   239  		c.idsByPath[path] = pathIndex
   240  	}
   241  }
   242  
   243  // Enumerate all packages for the given type(s), enumerating all packages if no type is specified.
   244  func (c *Collection) Enumerate(types ...Type) <-chan Package {
   245  	channel := make(chan Package)
   246  	go func() {
   247  		defer close(channel)
   248  
   249  		if c == nil {
   250  			// we should allow enumerating from a collection that was never created (which will result in no packages enumerated)
   251  			return
   252  		}
   253  
   254  		c.lock.RLock()
   255  		defer c.lock.RUnlock()
   256  
   257  		for ty, ids := range c.idsByType {
   258  			if len(types) != 0 {
   259  				found := false
   260  			typeCheck:
   261  				for _, t := range types {
   262  					if t == ty {
   263  						found = true
   264  						break typeCheck
   265  					}
   266  				}
   267  				if !found {
   268  					continue
   269  				}
   270  			}
   271  			for _, id := range ids.slice {
   272  				p := c.Package(id)
   273  				if p != nil {
   274  					channel <- *p
   275  				}
   276  			}
   277  		}
   278  	}()
   279  	return channel
   280  }
   281  
   282  // Sorted enumerates all packages for the given types sorted by package name. Enumerates all packages if no type
   283  // is specified.
   284  func (c *Collection) Sorted(types ...Type) (pkgs []Package) {
   285  	for p := range c.Enumerate(types...) {
   286  		pkgs = append(pkgs, p)
   287  	}
   288  
   289  	Sort(pkgs)
   290  
   291  	return pkgs
   292  }
   293  
   294  type orderedIDSet struct {
   295  	slice []artifact.ID
   296  }
   297  
   298  func (s *orderedIDSet) add(ids ...artifact.ID) {
   299  loopNewIDs:
   300  	for _, newID := range ids {
   301  		for _, existingID := range s.slice {
   302  			if existingID == newID {
   303  				continue loopNewIDs
   304  			}
   305  		}
   306  		s.slice = append(s.slice, newID)
   307  	}
   308  }
   309  
   310  func (s *orderedIDSet) delete(id artifact.ID) {
   311  	for i, existingID := range s.slice {
   312  		if existingID == id {
   313  			s.slice = append(s.slice[:i], s.slice[i+1:]...)
   314  			return
   315  		}
   316  	}
   317  }