github.com/anchore/syft@v1.38.2/syft/create_sbom_config.go (about)

     1  package syft
     2  
     3  import (
     4  	"context"
     5  	"errors"
     6  	"fmt"
     7  	"runtime/debug"
     8  	"strings"
     9  
    10  	"github.com/anchore/syft/internal/log"
    11  	"github.com/anchore/syft/internal/task"
    12  	"github.com/anchore/syft/syft/cataloging"
    13  	"github.com/anchore/syft/syft/cataloging/filecataloging"
    14  	"github.com/anchore/syft/syft/cataloging/pkgcataloging"
    15  	"github.com/anchore/syft/syft/file"
    16  	"github.com/anchore/syft/syft/sbom"
    17  	"github.com/anchore/syft/syft/source"
    18  )
    19  
    20  // CreateSBOMConfig specifies all parameters needed for creating an SBOM.
    21  type CreateSBOMConfig struct {
    22  	// required configuration input to specify how cataloging should be performed
    23  	Compliance         cataloging.ComplianceConfig
    24  	Search             cataloging.SearchConfig
    25  	Relationships      cataloging.RelationshipsConfig
    26  	Unknowns           cataloging.UnknownsConfig
    27  	DataGeneration     cataloging.DataGenerationConfig
    28  	Packages           pkgcataloging.Config
    29  	Licenses           cataloging.LicenseConfig
    30  	Files              filecataloging.Config
    31  	Parallelism        int
    32  	CatalogerSelection cataloging.SelectionRequest
    33  
    34  	// audit what tool is being used to generate the SBOM
    35  	ToolName          string
    36  	ToolVersion       string
    37  	ToolConfiguration interface{}
    38  
    39  	packageTaskFactories       task.Factories
    40  	packageCatalogerReferences []pkgcataloging.CatalogerReference
    41  }
    42  
    43  func DefaultCreateSBOMConfig() *CreateSBOMConfig {
    44  	return &CreateSBOMConfig{
    45  		Compliance:           cataloging.DefaultComplianceConfig(),
    46  		Search:               cataloging.DefaultSearchConfig(),
    47  		Relationships:        cataloging.DefaultRelationshipsConfig(),
    48  		DataGeneration:       cataloging.DefaultDataGenerationConfig(),
    49  		Packages:             pkgcataloging.DefaultConfig(),
    50  		Licenses:             cataloging.DefaultLicenseConfig(),
    51  		Files:                filecataloging.DefaultConfig(),
    52  		Parallelism:          0, // use default: run in parallel based on number of CPUs
    53  		packageTaskFactories: task.DefaultPackageTaskFactories(),
    54  
    55  		// library consumers are free to override the tool values to fit their needs, however, we have some sane defaults
    56  		// to ensure that SBOMs generated don't have missing tool metadata.
    57  		ToolName:    "syft",
    58  		ToolVersion: syftVersion(),
    59  	}
    60  }
    61  
    62  func syftVersion() string {
    63  	// extract the syft version from the go module info from the current binary that is running. This is useful for
    64  	// library consumers to at least encode the version of syft that was used to generate the SBOM. Note: we don't
    65  	// use the version info from main because it's baked in with ldflags, which we don't control for library consumers.
    66  	// This approach won't work in all cases though, such as when the binary is stripped of the buildinfo section.
    67  
    68  	buildInfo, ok := debug.ReadBuildInfo()
    69  	if !ok {
    70  		return ""
    71  	}
    72  
    73  	for _, d := range buildInfo.Deps {
    74  		if d.Path == "github.com/anchore/syft" && d.Version != "(devel)" {
    75  			return d.Version
    76  		}
    77  	}
    78  
    79  	return ""
    80  }
    81  
    82  // WithTool allows for setting the specific name, version, and any additional configuration that is not captured
    83  // in the syft default API configuration. This could cover inputs for catalogers that were user-provided, thus,
    84  // is not visible to the syft API, but would be useful to see in the SBOM output.
    85  func (c *CreateSBOMConfig) WithTool(name, version string, cfg ...any) *CreateSBOMConfig {
    86  	c.ToolName = name
    87  	c.ToolVersion = version
    88  	c.ToolConfiguration = cfg
    89  	return c
    90  }
    91  
    92  // WithParallelism allows for setting the number of concurrent cataloging tasks that can be performed at once
    93  func (c *CreateSBOMConfig) WithParallelism(p int) *CreateSBOMConfig {
    94  	c.Parallelism = p
    95  	return c
    96  }
    97  
    98  // WithComplianceConfig allows for setting the specific compliance configuration for cataloging.
    99  func (c *CreateSBOMConfig) WithComplianceConfig(cfg cataloging.ComplianceConfig) *CreateSBOMConfig {
   100  	c.Compliance = cfg
   101  	return c
   102  }
   103  
   104  // WithSearchConfig allows for setting the specific search configuration for cataloging.
   105  func (c *CreateSBOMConfig) WithSearchConfig(cfg cataloging.SearchConfig) *CreateSBOMConfig {
   106  	c.Search = cfg
   107  	return c
   108  }
   109  
   110  // WithRelationshipsConfig allows for defining the specific relationships that should be captured during cataloging.
   111  func (c *CreateSBOMConfig) WithRelationshipsConfig(cfg cataloging.RelationshipsConfig) *CreateSBOMConfig {
   112  	c.Relationships = cfg
   113  	return c
   114  }
   115  
   116  // WithUnknownsConfig allows for defining the specific behavior dealing with unknowns
   117  func (c *CreateSBOMConfig) WithUnknownsConfig(cfg cataloging.UnknownsConfig) *CreateSBOMConfig {
   118  	c.Unknowns = cfg
   119  	return c
   120  }
   121  
   122  // WithDataGenerationConfig allows for defining what data elements that cannot be discovered from the underlying
   123  // target being scanned that should be generated after package creation.
   124  func (c *CreateSBOMConfig) WithDataGenerationConfig(cfg cataloging.DataGenerationConfig) *CreateSBOMConfig {
   125  	c.DataGeneration = cfg
   126  	return c
   127  }
   128  
   129  // WithPackagesConfig allows for defining any specific package cataloging behavior for syft-implemented catalogers.
   130  func (c *CreateSBOMConfig) WithPackagesConfig(cfg pkgcataloging.Config) *CreateSBOMConfig {
   131  	c.Packages = cfg
   132  	return c
   133  }
   134  
   135  // WithLicenseConfig allows for defining any specific license cataloging behavior for syft-implemented catalogers.
   136  func (c *CreateSBOMConfig) WithLicenseConfig(cfg cataloging.LicenseConfig) *CreateSBOMConfig {
   137  	c.Licenses = cfg
   138  	return c
   139  }
   140  
   141  // WithFilesConfig allows for defining file-based cataloging parameters.
   142  func (c *CreateSBOMConfig) WithFilesConfig(cfg filecataloging.Config) *CreateSBOMConfig {
   143  	c.Files = cfg
   144  	return c
   145  }
   146  
   147  // WithoutFiles allows for disabling file cataloging altogether.
   148  func (c *CreateSBOMConfig) WithoutFiles() *CreateSBOMConfig {
   149  	c.Files = filecataloging.Config{
   150  		Selection: file.NoFilesSelection,
   151  		Hashers:   nil,
   152  	}
   153  	return c
   154  }
   155  
   156  // WithCatalogerSelection allows for adding to, removing from, or sub-selecting the final set of catalogers by name or tag.
   157  func (c *CreateSBOMConfig) WithCatalogerSelection(selection cataloging.SelectionRequest) *CreateSBOMConfig {
   158  	c.CatalogerSelection = selection
   159  	return c
   160  }
   161  
   162  // WithoutCatalogers removes all catalogers from the final set of catalogers. This is useful if you want to only use
   163  // user-provided catalogers (without the default syft-provided catalogers).
   164  func (c *CreateSBOMConfig) WithoutCatalogers() *CreateSBOMConfig {
   165  	c.packageTaskFactories = nil
   166  	c.packageCatalogerReferences = nil
   167  	return c
   168  }
   169  
   170  // WithCatalogers allows for adding user-provided catalogers to the final set of catalogers that will always be run
   171  // regardless of the source type or any cataloger selections provided.
   172  func (c *CreateSBOMConfig) WithCatalogers(catalogerRefs ...pkgcataloging.CatalogerReference) *CreateSBOMConfig {
   173  	for i := range catalogerRefs {
   174  		// ensure that all package catalogers have the package tag
   175  		catalogerRefs[i].Tags = append(catalogerRefs[i].Tags, pkgcataloging.PackageTag)
   176  	}
   177  	c.packageCatalogerReferences = append(c.packageCatalogerReferences, catalogerRefs...)
   178  
   179  	return c
   180  }
   181  
   182  // makeTaskGroups considers the entire configuration and finalizes the set of tasks to be run. Tasks are run in
   183  // groups, where each task in a group can be run concurrently, while tasks in different groups must be run serially.
   184  // The final set of task groups is returned along with a cataloger manifest that describes the catalogers that were
   185  // selected and the tokens that were sensitive to this selection (both for adding and removing from the final set).
   186  func (c *CreateSBOMConfig) makeTaskGroups(src source.Description) ([][]task.Task, *catalogerManifest, error) {
   187  	var taskGroups [][]task.Task
   188  
   189  	// generate package and file tasks based on the configuration
   190  	environmentTasks := c.environmentTasks()
   191  	scopeTasks := c.scopeTasks()
   192  	relationshipsTasks := c.relationshipTasks(src)
   193  	unknownTasks := c.unknownsTasks()
   194  	osFeatureDetectionTasks := c.osFeatureDetectionTasks()
   195  
   196  	pkgTasks, fileTasks, selectionEvidence, err := c.selectTasks(src)
   197  	if err != nil {
   198  		return nil, nil, err
   199  	}
   200  
   201  	// combine the user-provided and configured tasks
   202  	if c.Files.Selection == file.FilesOwnedByPackageSelection {
   203  		// special case: we need the package info when we are cataloging files owned by packages
   204  		taskGroups = append(taskGroups, pkgTasks, fileTasks)
   205  	} else {
   206  		taskGroups = append(taskGroups, append(pkgTasks, fileTasks...))
   207  	}
   208  
   209  	// all scope work must be done after all nodes (files and packages) have been cataloged and before the relationship
   210  	if len(scopeTasks) > 0 {
   211  		taskGroups = append(taskGroups, scopeTasks)
   212  	}
   213  
   214  	// all relationship work must be done after all nodes (files and packages) have been cataloged
   215  	if len(relationshipsTasks) > 0 {
   216  		taskGroups = append(taskGroups, relationshipsTasks)
   217  	}
   218  
   219  	// all unknowns tasks should happen after all scanning is complete
   220  	if len(unknownTasks) > 0 {
   221  		taskGroups = append(taskGroups, unknownTasks)
   222  	}
   223  
   224  	// osFeatureDetectionTasks should happen after package scanning is complete
   225  	if len(osFeatureDetectionTasks) > 0 {
   226  		taskGroups = append(taskGroups, osFeatureDetectionTasks)
   227  	}
   228  
   229  	// identifying the environment (i.e. the linux release) must be done first as this is required for package cataloging
   230  	taskGroups = append(
   231  		[][]task.Task{
   232  			environmentTasks,
   233  		},
   234  		taskGroups...,
   235  	)
   236  
   237  	var allTasks []task.Task
   238  	allTasks = append(allTasks, pkgTasks...)
   239  	allTasks = append(allTasks, fileTasks...)
   240  
   241  	return taskGroups, &catalogerManifest{
   242  		Requested: selectionEvidence.Request,
   243  		Used:      formatTaskNames(allTasks),
   244  	}, nil
   245  }
   246  
   247  // fileTasks returns the set of tasks that should be run to catalog files.
   248  func (c *CreateSBOMConfig) fileTasks(cfg task.CatalogingFactoryConfig) ([]task.Task, error) {
   249  	tsks, err := task.DefaultFileTaskFactories().Tasks(cfg)
   250  	if err != nil {
   251  		return nil, fmt.Errorf("unable to create file cataloger tasks: %w", err)
   252  	}
   253  
   254  	return tsks, nil
   255  }
   256  
   257  // selectTasks returns the set of tasks that should be run to catalog packages and files.
   258  func (c *CreateSBOMConfig) selectTasks(src source.Description) ([]task.Task, []task.Task, *task.Selection, error) {
   259  	cfg := task.CatalogingFactoryConfig{
   260  		SearchConfig:         c.Search,
   261  		RelationshipsConfig:  c.Relationships,
   262  		DataGenerationConfig: c.DataGeneration,
   263  		PackagesConfig:       c.Packages,
   264  		LicenseConfig:        c.Licenses,
   265  		ComplianceConfig:     c.Compliance,
   266  		FilesConfig:          c.Files,
   267  	}
   268  
   269  	persistentPkgTasks, selectablePkgTasks, err := c.allPackageTasks(cfg)
   270  	if err != nil {
   271  		return nil, nil, nil, fmt.Errorf("unable to create package cataloger tasks: %w", err)
   272  	}
   273  
   274  	req, err := finalTaskSelectionRequest(c.CatalogerSelection, src)
   275  	if err != nil {
   276  		return nil, nil, nil, err
   277  	}
   278  
   279  	selectableFileTasks, err := c.fileTasks(cfg)
   280  	if err != nil {
   281  		return nil, nil, nil, err
   282  	}
   283  
   284  	taskGroups := [][]task.Task{
   285  		selectablePkgTasks,
   286  		selectableFileTasks,
   287  	}
   288  
   289  	finalTaskGroups, selection, err := task.SelectInGroups(taskGroups, *req)
   290  	if err != nil {
   291  		return nil, nil, nil, err
   292  	}
   293  
   294  	if deprecatedNames := deprecatedTasks(finalTaskGroups); len(deprecatedNames) > 0 {
   295  		log.WithFields("catalogers", strings.Join(deprecatedNames, ", ")).Warn("deprecated catalogers are being used (please remove them from your configuration)")
   296  	}
   297  
   298  	finalPkgTasks := finalTaskGroups[0]
   299  	finalFileTasks := finalTaskGroups[1]
   300  
   301  	finalPkgTasks = append(finalPkgTasks, persistentPkgTasks...)
   302  
   303  	if len(finalPkgTasks) == 0 && len(finalFileTasks) == 0 {
   304  		return nil, nil, nil, fmt.Errorf("no catalogers selected")
   305  	}
   306  
   307  	logTaskNames(finalPkgTasks, "package cataloger")
   308  	logTaskNames(finalFileTasks, "file cataloger")
   309  
   310  	if len(finalPkgTasks) == 0 && len(finalFileTasks) == 0 {
   311  		return nil, nil, nil, fmt.Errorf("no catalogers selected")
   312  	}
   313  
   314  	if len(finalPkgTasks) == 0 {
   315  		log.Debug("no package catalogers selected")
   316  	}
   317  
   318  	if len(finalFileTasks) == 0 {
   319  		if c.Files.Selection != file.NoFilesSelection {
   320  			log.Warnf("no file catalogers selected but file selection is configured as %q (this may be unintentional)", c.Files.Selection)
   321  		} else {
   322  			log.Debug("no file catalogers selected")
   323  		}
   324  	}
   325  
   326  	return finalPkgTasks, finalFileTasks, &selection, nil
   327  }
   328  
   329  func deprecatedTasks(taskGroups [][]task.Task) []string {
   330  	// we want to identify any deprecated catalogers that are being used but default selections will always additionally select `file`
   331  	// catalogers. For this reason, we must explicitly remove `file` catalogers in the selection request. This means if we
   332  	// deprecate a file cataloger we will need special processing.
   333  	_, selection, err := task.SelectInGroups(taskGroups, cataloging.SelectionRequest{DefaultNamesOrTags: []string{pkgcataloging.DeprecatedTag}, RemoveNamesOrTags: []string{filecataloging.FileTag}})
   334  	if err != nil {
   335  		// ignore the error, as it is not critical
   336  		return nil
   337  	}
   338  	return selection.Result.List()
   339  }
   340  
   341  func logTaskNames(tasks []task.Task, kind string) {
   342  	// log as tree output (like tree command)
   343  	log.Debugf("selected %d %s tasks", len(tasks), kind)
   344  	names := formatTaskNames(tasks)
   345  	for idx, t := range names {
   346  		if idx == len(tasks)-1 {
   347  			log.Tracef("└── %s", t)
   348  		} else {
   349  			log.Tracef("├── %s", t)
   350  		}
   351  	}
   352  }
   353  
   354  func finalTaskSelectionRequest(req cataloging.SelectionRequest, src source.Description) (*cataloging.SelectionRequest, error) {
   355  	if len(req.DefaultNamesOrTags) == 0 {
   356  		defaultTags, err := findDefaultTags(src)
   357  		if err != nil {
   358  			return nil, fmt.Errorf("unable to determine default cataloger tag: %w", err)
   359  		}
   360  
   361  		req.DefaultNamesOrTags = append(req.DefaultNamesOrTags, defaultTags...)
   362  
   363  		req.RemoveNamesOrTags = replaceDefaultTagReferences(defaultTags, req.RemoveNamesOrTags)
   364  		req.SubSelectTags = replaceDefaultTagReferences(defaultTags, req.SubSelectTags)
   365  	}
   366  
   367  	return &req, nil
   368  }
   369  
   370  func (c *CreateSBOMConfig) allPackageTasks(cfg task.CatalogingFactoryConfig) ([]task.Task, []task.Task, error) {
   371  	persistentPackageTasks, selectablePackageTasks, err := c.userPackageTasks(cfg)
   372  	if err != nil {
   373  		return nil, nil, err
   374  	}
   375  
   376  	tsks, err := c.packageTaskFactories.Tasks(cfg)
   377  	if err != nil {
   378  		return nil, nil, fmt.Errorf("unable to create package cataloger tasks: %w", err)
   379  	}
   380  
   381  	return persistentPackageTasks, append(tsks, selectablePackageTasks...), nil
   382  }
   383  
   384  func (c *CreateSBOMConfig) userPackageTasks(cfg task.CatalogingFactoryConfig) ([]task.Task, []task.Task, error) {
   385  	var (
   386  		persistentPackageTasks []task.Task
   387  		selectablePackageTasks []task.Task
   388  	)
   389  
   390  	for _, catalogerRef := range c.packageCatalogerReferences {
   391  		if catalogerRef.Cataloger == nil {
   392  			return nil, nil, errors.New("provided cataloger reference without a cataloger")
   393  		}
   394  		if catalogerRef.AlwaysEnabled {
   395  			persistentPackageTasks = append(persistentPackageTasks, task.NewPackageTask(cfg, catalogerRef.Cataloger, catalogerRef.Tags...))
   396  			continue
   397  		}
   398  		if len(catalogerRef.Tags) == 0 {
   399  			return nil, nil, errors.New("provided cataloger reference without tags")
   400  		}
   401  		selectablePackageTasks = append(selectablePackageTasks, task.NewPackageTask(cfg, catalogerRef.Cataloger, catalogerRef.Tags...))
   402  	}
   403  
   404  	return persistentPackageTasks, selectablePackageTasks, nil
   405  }
   406  
   407  // scopeTasks returns the set of tasks that should be run to generate additional scope information
   408  func (c *CreateSBOMConfig) scopeTasks() []task.Task {
   409  	var tsks []task.Task
   410  	if c.Search.Scope == source.DeepSquashedScope {
   411  		if t := task.NewDeepSquashedScopeCleanupTask(); t != nil {
   412  			tsks = append(tsks, t)
   413  		}
   414  	}
   415  	return tsks
   416  }
   417  
   418  // relationshipTasks returns the set of tasks that should be run to generate additional relationships as well as
   419  // prune existing relationships.
   420  func (c *CreateSBOMConfig) relationshipTasks(src source.Description) []task.Task {
   421  	var tsks []task.Task
   422  
   423  	if t := task.NewRelationshipsTask(c.Relationships, src); t != nil {
   424  		tsks = append(tsks, t)
   425  	}
   426  	return tsks
   427  }
   428  
   429  // environmentTasks returns the set of tasks that should be run to identify what is being scanned or the context
   430  // of where it is being scanned. Today this is used to identify the linux distribution release for container images
   431  // being scanned.
   432  func (c *CreateSBOMConfig) environmentTasks() []task.Task {
   433  	var tsks []task.Task
   434  
   435  	if t := task.NewEnvironmentTask(); t != nil {
   436  		tsks = append(tsks, t)
   437  	}
   438  	return tsks
   439  }
   440  
   441  // unknownsTasks returns a set of tasks that perform any necessary post-processing
   442  // to identify SBOM elements as unknowns
   443  func (c *CreateSBOMConfig) unknownsTasks() []task.Task {
   444  	var tasks []task.Task
   445  
   446  	if t := task.NewUnknownsLabelerTask(c.Unknowns); t != nil {
   447  		tasks = append(tasks, t)
   448  	}
   449  
   450  	return tasks
   451  }
   452  
   453  // osFeatureDetectionTasks returns a set of tasks that perform post-processing feature detection and update the SBOM accordingly
   454  func (c *CreateSBOMConfig) osFeatureDetectionTasks() []task.Task {
   455  	var tasks []task.Task
   456  
   457  	if t := task.NewOSFeatureDetectionTask(); t != nil {
   458  		tasks = append(tasks, t)
   459  	}
   460  
   461  	return tasks
   462  }
   463  
   464  func (c *CreateSBOMConfig) validate() error {
   465  	if c.Relationships.ExcludeBinaryPackagesWithFileOwnershipOverlap {
   466  		if !c.Relationships.PackageFileOwnershipOverlap {
   467  			return fmt.Errorf("invalid configuration: to exclude binary packages based on file ownership overlap relationships, cataloging file ownership overlap relationships must be enabled")
   468  		}
   469  	}
   470  	return nil
   471  }
   472  
   473  // Create creates an SBOM from the given source with the current SBOM configuration.
   474  func (c *CreateSBOMConfig) Create(ctx context.Context, src source.Source) (*sbom.SBOM, error) {
   475  	return CreateSBOM(ctx, src, c)
   476  }
   477  
   478  func findDefaultTags(src source.Description) ([]string, error) {
   479  	switch m := src.Metadata.(type) {
   480  	case source.ImageMetadata:
   481  		return []string{pkgcataloging.ImageTag, filecataloging.FileTag}, nil
   482  	case source.FileMetadata, source.DirectoryMetadata:
   483  		return []string{pkgcataloging.DirectoryTag, filecataloging.FileTag}, nil
   484  	case source.SnapMetadata:
   485  		return []string{pkgcataloging.InstalledTag, filecataloging.FileTag}, nil
   486  	default:
   487  		return nil, fmt.Errorf("unable to determine default cataloger tag for source type=%T", m)
   488  	}
   489  }
   490  
   491  func replaceDefaultTagReferences(defaultTags []string, lst []string) []string {
   492  	for i, tag := range lst {
   493  		if strings.ToLower(tag) == "default" {
   494  			switch len(defaultTags) {
   495  			case 0:
   496  				lst[i] = ""
   497  			case 1:
   498  				lst[i] = defaultTags[0]
   499  			default:
   500  				// remove the default tag and add the individual tags
   501  				lst = append(lst[:i], append(defaultTags, lst[i+1:]...)...)
   502  			}
   503  		}
   504  	}
   505  	return lst
   506  }