github.com/noqcks/syft@v0.0.0-20230920222752-a9e2c4e288e5/syft/lib.go (about)

     1  /*
     2  Package syft is a "one-stop-shop" for helper utilities for all major functionality provided by child packages of the syft library.
     3  
     4  Here is what the main execution path for syft does:
     5  
     6   1. Parse a user image string to get a stereoscope image.Source object
     7   2. Invoke all catalogers to catalog the image, adding discovered packages to a single catalog object
     8   3. Invoke one or more encoders to output contents of the catalog
     9  
    10  A Source object encapsulates the image object to be cataloged and the user options (catalog all layers vs. squashed layer),
    11  providing a way to inspect paths and file content within the image. The Source object, not the image object, is used
    12  throughout the main execution path. This abstraction allows for decoupling of what is cataloged (a docker image, an OCI
    13  image, a filesystem, etc) and how it is cataloged (the individual catalogers).
    14  
    15  Similar to the cataloging process, Linux distribution identification is also performed based on what is discovered within the image.
    16  */
    17  package syft
    18  
    19  import (
    20  	"fmt"
    21  
    22  	"github.com/wagoodman/go-partybus"
    23  
    24  	"github.com/anchore/go-logger"
    25  	"github.com/anchore/syft/internal/bus"
    26  	"github.com/anchore/syft/internal/log"
    27  	"github.com/anchore/syft/syft/artifact"
    28  	"github.com/anchore/syft/syft/linux"
    29  	"github.com/anchore/syft/syft/pkg"
    30  	"github.com/anchore/syft/syft/pkg/cataloger"
    31  	"github.com/anchore/syft/syft/source"
    32  )
    33  
    34  // CatalogPackages takes an inventory of packages from the given image from a particular perspective
    35  // (e.g. squashed source, all-layers source). Returns the discovered  set of packages, the identified Linux
    36  // distribution, and the source object used to wrap the data source.
    37  func CatalogPackages(src source.Source, cfg cataloger.Config) (*pkg.Collection, []artifact.Relationship, *linux.Release, error) {
    38  	resolver, err := src.FileResolver(cfg.Search.Scope)
    39  	if err != nil {
    40  		return nil, nil, nil, fmt.Errorf("unable to determine resolver while cataloging packages: %w", err)
    41  	}
    42  
    43  	// find the distro
    44  	release := linux.IdentifyRelease(resolver)
    45  	if release != nil {
    46  		log.Infof("identified distro: %s", release.String())
    47  	} else {
    48  		log.Info("could not identify distro")
    49  	}
    50  
    51  	// if the catalogers have been configured, use them regardless of input type
    52  	var catalogers []pkg.Cataloger
    53  	if len(cfg.Catalogers) > 0 {
    54  		catalogers = cataloger.AllCatalogers(cfg)
    55  	} else {
    56  		// otherwise conditionally use the correct set of loggers based on the input type (container image or directory)
    57  
    58  		// TODO: this is bad, we should not be using the concrete type to determine the cataloger set
    59  		// instead this should be a caller concern (pass the catalogers you want to use). The SBOM build PR will do this.
    60  		switch src.(type) {
    61  		case *source.StereoscopeImageSource:
    62  			log.Info("cataloging an image")
    63  			catalogers = cataloger.ImageCatalogers(cfg)
    64  		case *source.FileSource:
    65  			log.Info("cataloging a file")
    66  			catalogers = cataloger.AllCatalogers(cfg)
    67  		case *source.DirectorySource:
    68  			log.Info("cataloging a directory")
    69  			catalogers = cataloger.DirectoryCatalogers(cfg)
    70  		default:
    71  			return nil, nil, nil, fmt.Errorf("unsupported source type: %T", src)
    72  		}
    73  	}
    74  
    75  	catalog, relationships, err := cataloger.Catalog(resolver, release, cfg.Parallelism, catalogers...)
    76  
    77  	// apply exclusions to the package catalog
    78  	// default config value for this is true
    79  	// https://github.com/anchore/syft/issues/931
    80  	if cfg.ExcludeBinaryOverlapByOwnership {
    81  		for _, r := range relationships {
    82  			if cataloger.ExcludeBinaryByFileOwnershipOverlap(r, catalog) {
    83  				catalog.Delete(r.To.ID())
    84  				relationships = removeRelationshipsByID(relationships, r.To.ID())
    85  			}
    86  		}
    87  	}
    88  
    89  	// no need to consider source relationships for os -> binary exclusions
    90  	relationships = append(relationships, newSourceRelationshipsFromCatalog(src, catalog)...)
    91  	return catalog, relationships, release, err
    92  }
    93  
    94  func removeRelationshipsByID(relationships []artifact.Relationship, id artifact.ID) []artifact.Relationship {
    95  	var filtered []artifact.Relationship
    96  	for _, r := range relationships {
    97  		if r.To.ID() != id && r.From.ID() != id {
    98  			filtered = append(filtered, r)
    99  		}
   100  	}
   101  	return filtered
   102  }
   103  
   104  func newSourceRelationshipsFromCatalog(src source.Source, c *pkg.Collection) []artifact.Relationship {
   105  	relationships := make([]artifact.Relationship, 0) // Should we pre-allocate this by giving catalog a Len() method?
   106  	for p := range c.Enumerate() {
   107  		relationships = append(relationships, artifact.Relationship{
   108  			From: src,
   109  			To:   p,
   110  			Type: artifact.ContainsRelationship,
   111  		})
   112  	}
   113  
   114  	return relationships
   115  }
   116  
   117  // SetLogger sets the logger object used for all syft logging calls.
   118  func SetLogger(logger logger.Logger) {
   119  	log.Set(logger)
   120  }
   121  
   122  // SetBus sets the event bus for all syft library bus publish events onto (in-library subscriptions are not allowed).
   123  func SetBus(b *partybus.Bus) {
   124  	bus.Set(b)
   125  }