github.com/anchore/syft@v1.38.2/syft/pkg/collection.go (about)

     1  package pkg
     2  
     3  import (
     4  	"sync"
     5  
     6  	"github.com/jinzhu/copier"
     7  	"github.com/scylladb/go-set/strset"
     8  
     9  	"github.com/anchore/syft/internal/log"
    10  	"github.com/anchore/syft/syft/artifact"
    11  )
    12  
    13  // Collection represents a collection of Packages.
    14  type Collection struct {
    15  	byID      map[artifact.ID]Package
    16  	idsByName map[string]orderedIDSet
    17  	idsByType map[Type]orderedIDSet
    18  	idsByPath map[string]orderedIDSet // note: this is real path or virtual path
    19  	lock      sync.RWMutex
    20  }
    21  
    22  // NewCollection returns a new empty Collection
    23  func NewCollection(pkgs ...Package) *Collection {
    24  	c := Collection{
    25  		byID:      make(map[artifact.ID]Package),
    26  		idsByName: make(map[string]orderedIDSet),
    27  		idsByType: make(map[Type]orderedIDSet),
    28  		idsByPath: make(map[string]orderedIDSet),
    29  	}
    30  
    31  	for _, p := range pkgs {
    32  		c.Add(p)
    33  	}
    34  
    35  	return &c
    36  }
    37  
    38  // PackageCount returns the total number of packages that have been added.
    39  func (c *Collection) PackageCount() int {
    40  	c.lock.RLock()
    41  	defer c.lock.RUnlock()
    42  
    43  	return len(c.byID)
    44  }
    45  
    46  // Package returns the package with the given ID.
    47  func (c *Collection) Package(id artifact.ID) *Package {
    48  	c.lock.RLock()
    49  	defer c.lock.RUnlock()
    50  
    51  	v, exists := c.byID[id]
    52  	if !exists {
    53  		return nil
    54  	}
    55  	var p Package
    56  	if err := copier.Copy(&p, &v); err != nil {
    57  		log.Debugf("unable to copy package id=%q name=%q: %+v", id, v.Name, err)
    58  		return nil
    59  	}
    60  	p.id = v.id
    61  	return &p
    62  }
    63  
    64  // PackagesByPath returns all packages that were discovered from the given path.
    65  func (c *Collection) PackagesByPath(path string) []Package {
    66  	c.lock.RLock()
    67  	defer c.lock.RUnlock()
    68  
    69  	return c.packages(c.idsByPath[path].slice)
    70  }
    71  
    72  // PackagesByName returns all packages that were discovered with a matching name.
    73  func (c *Collection) PackagesByName(name string) []Package {
    74  	c.lock.RLock()
    75  	defer c.lock.RUnlock()
    76  
    77  	return c.packages(c.idsByName[name].slice)
    78  }
    79  
    80  // Packages returns all packages for the given ID.
    81  func (c *Collection) Packages(ids []artifact.ID) (result []Package) {
    82  	c.lock.RLock()
    83  	defer c.lock.RUnlock()
    84  
    85  	return c.packages(ids)
    86  }
    87  
    88  func (c *Collection) packages(ids []artifact.ID) (result []Package) {
    89  	// note: read lock must be held by caller
    90  
    91  	for _, i := range ids {
    92  		p, exists := c.byID[i]
    93  		if exists {
    94  			result = append(result, p)
    95  		}
    96  	}
    97  	return result
    98  }
    99  
   100  // Add n packages to the collection.
   101  func (c *Collection) Add(pkgs ...Package) {
   102  	for _, p := range pkgs {
   103  		c.add(p)
   104  	}
   105  }
   106  
   107  // Add a package to the Collection.
   108  func (c *Collection) add(p Package) {
   109  	c.lock.Lock()
   110  	defer c.lock.Unlock()
   111  
   112  	id := p.ID()
   113  	if id == "" {
   114  		log.Debugf("found package with empty ID while adding to the collection: %+v", p)
   115  		p.SetID()
   116  		id = p.ID()
   117  	}
   118  
   119  	if existing, exists := c.byID[id]; exists {
   120  		// there is already a package with this fingerprint merge the existing record with the new one
   121  		if err := existing.merge(p); err != nil {
   122  			log.Debugf("failed to merge packages: %+v", err)
   123  		} else {
   124  			c.byID[id] = existing
   125  			c.addPathsToIndex(p)
   126  		}
   127  		return
   128  	}
   129  
   130  	c.addToIndex(p)
   131  }
   132  
   133  func (c *Collection) addToIndex(p Package) {
   134  	// note: write lock must be held by caller
   135  
   136  	c.byID[p.id] = p
   137  	c.addNameToIndex(p)
   138  	c.addTypeToIndex(p)
   139  	c.addPathsToIndex(p)
   140  }
   141  
   142  func (c *Collection) addNameToIndex(p Package) {
   143  	// note: write lock must be held by caller
   144  
   145  	nameIndex := c.idsByName[p.Name]
   146  	nameIndex.add(p.id)
   147  	c.idsByName[p.Name] = nameIndex
   148  }
   149  
   150  func (c *Collection) addTypeToIndex(p Package) {
   151  	// note: write lock must be held by caller
   152  
   153  	typeIndex := c.idsByType[p.Type]
   154  	typeIndex.add(p.id)
   155  	c.idsByType[p.Type] = typeIndex
   156  }
   157  
   158  func (c *Collection) addPathsToIndex(p Package) {
   159  	// note: write lock must be held by caller
   160  
   161  	observedPaths := strset.New()
   162  	for _, l := range p.Locations.ToSlice() {
   163  		if l.RealPath != "" && !observedPaths.Has(l.RealPath) {
   164  			c.addPathToIndex(p.id, l.RealPath)
   165  			observedPaths.Add(l.RealPath)
   166  		}
   167  		if l.AccessPath != "" && l.RealPath != l.AccessPath && !observedPaths.Has(l.AccessPath) {
   168  			c.addPathToIndex(p.id, l.AccessPath)
   169  			observedPaths.Add(l.AccessPath)
   170  		}
   171  	}
   172  }
   173  
   174  func (c *Collection) addPathToIndex(id artifact.ID, path string) {
   175  	// note: write lock must be held by caller
   176  
   177  	pathIndex := c.idsByPath[path]
   178  	pathIndex.add(id)
   179  	c.idsByPath[path] = pathIndex
   180  }
   181  
   182  func (c *Collection) Delete(ids ...artifact.ID) {
   183  	c.lock.Lock()
   184  	defer c.lock.Unlock()
   185  
   186  	for _, id := range ids {
   187  		p, exists := c.byID[id]
   188  		if !exists {
   189  			return
   190  		}
   191  
   192  		delete(c.byID, id)
   193  		c.deleteNameFromIndex(p)
   194  		c.deleteTypeFromIndex(p)
   195  		c.deletePathsFromIndex(p)
   196  	}
   197  }
   198  
   199  func (c *Collection) deleteNameFromIndex(p Package) {
   200  	// note: write lock must be held by caller
   201  
   202  	nameIndex := c.idsByName[p.Name]
   203  	nameIndex.delete(p.id)
   204  	if len(nameIndex.slice) == 0 {
   205  		delete(c.idsByName, p.Name)
   206  	} else {
   207  		c.idsByName[p.Name] = nameIndex
   208  	}
   209  }
   210  
   211  func (c *Collection) deleteTypeFromIndex(p Package) {
   212  	// note: write lock must be held by caller
   213  
   214  	typeIndex := c.idsByType[p.Type]
   215  	typeIndex.delete(p.id)
   216  	if len(typeIndex.slice) == 0 {
   217  		delete(c.idsByType, p.Type)
   218  	} else {
   219  		c.idsByType[p.Type] = typeIndex
   220  	}
   221  }
   222  
   223  func (c *Collection) deletePathsFromIndex(p Package) {
   224  	// note: write lock must be held by caller
   225  
   226  	observedPaths := strset.New()
   227  	for _, l := range p.Locations.ToSlice() {
   228  		if l.RealPath != "" && !observedPaths.Has(l.RealPath) {
   229  			c.deletePathFromIndex(p.id, l.RealPath)
   230  			observedPaths.Add(l.RealPath)
   231  		}
   232  		if l.AccessPath != "" && l.RealPath != l.AccessPath && !observedPaths.Has(l.AccessPath) {
   233  			c.deletePathFromIndex(p.id, l.AccessPath)
   234  			observedPaths.Add(l.AccessPath)
   235  		}
   236  	}
   237  }
   238  
   239  func (c *Collection) deletePathFromIndex(id artifact.ID, path string) {
   240  	// note: write lock must be held by caller
   241  
   242  	pathIndex := c.idsByPath[path]
   243  	pathIndex.delete(id)
   244  	if len(pathIndex.slice) == 0 {
   245  		delete(c.idsByPath, path)
   246  	} else {
   247  		c.idsByPath[path] = pathIndex
   248  	}
   249  }
   250  
   251  // Enumerate all packages for the given type(s), enumerating all packages if no type is specified.
   252  func (c *Collection) Enumerate(types ...Type) <-chan Package {
   253  	channel := make(chan Package)
   254  	go func() {
   255  		defer close(channel)
   256  
   257  		if c == nil {
   258  			// we should allow enumerating from a collection that was never created (which will result in no packages enumerated)
   259  			return
   260  		}
   261  
   262  		c.lock.RLock()
   263  		defer c.lock.RUnlock()
   264  
   265  		for ty, ids := range c.idsByType {
   266  			if len(types) != 0 {
   267  				found := false
   268  			typeCheck:
   269  				for _, t := range types {
   270  					if t == ty {
   271  						found = true
   272  						break typeCheck
   273  					}
   274  				}
   275  				if !found {
   276  					continue
   277  				}
   278  			}
   279  			for _, id := range ids.slice {
   280  				p := c.Package(id)
   281  				if p != nil {
   282  					channel <- *p
   283  				}
   284  			}
   285  		}
   286  	}()
   287  	return channel
   288  }
   289  
   290  // Sorted enumerates all packages for the given types sorted by package name. Enumerates all packages if no type
   291  // is specified.
   292  func (c *Collection) Sorted(types ...Type) (pkgs []Package) {
   293  	for p := range c.Enumerate(types...) {
   294  		pkgs = append(pkgs, p)
   295  	}
   296  
   297  	Sort(pkgs)
   298  
   299  	return pkgs
   300  }
   301  
   302  type orderedIDSet struct {
   303  	slice []artifact.ID
   304  }
   305  
   306  func (s *orderedIDSet) add(ids ...artifact.ID) {
   307  loopNewIDs:
   308  	for _, newID := range ids {
   309  		for _, existingID := range s.slice {
   310  			if existingID == newID {
   311  				continue loopNewIDs
   312  			}
   313  		}
   314  		s.slice = append(s.slice, newID)
   315  	}
   316  }
   317  
   318  func (s *orderedIDSet) delete(id artifact.ID) {
   319  	for i, existingID := range s.slice {
   320  		if existingID == id {
   321  			s.slice = append(s.slice[:i], s.slice[i+1:]...)
   322  			return
   323  		}
   324  	}
   325  }