github.com/lineaje-labs/syft@v0.98.1-0.20231227153149-9e393f60ff1b/syft/lib.go (about)

     1  /*
     2  Package syft is a "one-stop-shop" for helper utilities for all major functionality provided by child packages of the syft library.
     3  
     4  Here is what the main execution path for syft does:
     5  
     6   1. Parse a user image string to get a stereoscope image.Source object
     7   2. Invoke all catalogers to catalog the image, adding discovered packages to a single catalog object
     8   3. Invoke one or more encoders to output contents of the catalog
     9  
    10  A Source object encapsulates the image object to be cataloged and the user options (catalog all layers vs. squashed layer),
    11  providing a way to inspect paths and file content within the image. The Source object, not the image object, is used
    12  throughout the main execution path. This abstraction allows for decoupling of what is cataloged (a docker image, an OCI
    13  image, a filesystem, etc) and how it is cataloged (the individual catalogers).
    14  
    15  Similar to the cataloging process, Linux distribution identification is also performed based on what is discovered within the image.
    16  */
    17  package syft
    18  
    19  import (
    20  	"fmt"
    21  
    22  	"github.com/wagoodman/go-partybus"
    23  
    24  	"github.com/anchore/go-logger"
    25  	"github.com/anchore/syft/syft/artifact"
    26  	"github.com/anchore/syft/syft/linux"
    27  	"github.com/anchore/syft/syft/pkg"
    28  	"github.com/anchore/syft/syft/pkg/cataloger"
    29  	"github.com/anchore/syft/syft/source"
    30  	"github.com/lineaje-labs/syft/internal/bus"
    31  	"github.com/lineaje-labs/syft/internal/log"
    32  )
    33  
    34  // CatalogPackages takes an inventory of packages from the given image from a particular perspective
    35  // (e.g. squashed source, all-layers source). Returns the discovered  set of packages, the identified Linux
    36  // distribution, and the source object used to wrap the data source.
    37  func CatalogPackages(
    38  	src source.Source, cfg cataloger.Config,
    39  ) (*pkg.Collection, []artifact.Relationship, *linux.Release, error) {
    40  	resolver, err := src.FileResolver(cfg.Search.Scope)
    41  	if err != nil {
    42  		return nil, nil, nil, fmt.Errorf("unable to determine resolver while cataloging packages: %w", err)
    43  	}
    44  
    45  	// find the distro
    46  	release := linux.IdentifyRelease(resolver)
    47  	if release != nil {
    48  		log.Infof("identified distro: %s", release.String())
    49  	} else {
    50  		log.Info("could not identify distro")
    51  	}
    52  
    53  	// if the catalogers have been configured, use them regardless of input type
    54  	var catalogers []pkg.Cataloger
    55  	if len(cfg.Catalogers) > 0 {
    56  		catalogers = cataloger.AllCatalogers(cfg)
    57  	} else {
    58  		// otherwise conditionally use the correct set of loggers based on the input type (container image or directory)
    59  
    60  		// TODO: this is bad, we should not be using the concrete type to determine the cataloger set
    61  		// instead this should be a caller concern (pass the catalogers you want to use). The SBOM build PR will do this.
    62  		switch src.(type) {
    63  		case *source.StereoscopeImageSource:
    64  			log.Info("cataloging an image")
    65  			catalogers = cataloger.ImageCatalogers(cfg)
    66  		case *source.FileSource:
    67  			log.Info("cataloging a file")
    68  			catalogers = cataloger.AllCatalogers(cfg)
    69  		case *source.DirectorySource:
    70  			log.Info("cataloging a directory")
    71  			catalogers = cataloger.DirectoryCatalogers(cfg)
    72  		default:
    73  			return nil, nil, nil, fmt.Errorf("unsupported source type: %T", src)
    74  		}
    75  	}
    76  
    77  	catalog, relationships, err := cataloger.Catalog(resolver, release, cfg.Parallelism, catalogers...)
    78  
    79  	// apply exclusions to the package catalog
    80  	// default config value for this is true
    81  	// https://github.com/anchore/syft/issues/931
    82  	if cfg.ExcludeBinaryOverlapByOwnership {
    83  		for _, r := range relationships {
    84  			if cataloger.ExcludeBinaryByFileOwnershipOverlap(r, catalog) {
    85  				catalog.Delete(r.To.ID())
    86  				relationships = removeRelationshipsByID(relationships, r.To.ID())
    87  			}
    88  		}
    89  	}
    90  
    91  	// no need to consider source relationships for os -> binary exclusions
    92  	relationships = append(relationships, newSourceRelationshipsFromCatalog(src, catalog)...)
    93  	return catalog, relationships, release, err
    94  }
    95  
    96  func removeRelationshipsByID(relationships []artifact.Relationship, id artifact.ID) []artifact.Relationship {
    97  	var filtered []artifact.Relationship
    98  	for _, r := range relationships {
    99  		if r.To.ID() != id && r.From.ID() != id {
   100  			filtered = append(filtered, r)
   101  		}
   102  	}
   103  	return filtered
   104  }
   105  
   106  func newSourceRelationshipsFromCatalog(src source.Source, c *pkg.Collection) []artifact.Relationship {
   107  	relationships := make([]artifact.Relationship, 0) // Should we pre-allocate this by giving catalog a Len() method?
   108  	for p := range c.Enumerate() {
   109  		relationships = append(relationships, artifact.Relationship{
   110  			From: src,
   111  			To:   p,
   112  			Type: artifact.ContainsRelationship,
   113  		})
   114  	}
   115  
   116  	return relationships
   117  }
   118  
   119  // SetLogger sets the logger object used for all syft logging calls.
   120  func SetLogger(logger logger.Logger) {
   121  	log.Set(logger)
   122  }
   123  
   124  // SetBus sets the event bus for all syft library bus publish events onto (in-library subscriptions are not allowed).
   125  func SetBus(b *partybus.Bus) {
   126  	bus.Set(b)
   127  }