github.com/anchore/syft@v1.4.2-0.20240516191711-1bec1fc5d397/internal/task/package_task_factory.go (about)

     1  package task
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"sort"
     7  	"strings"
     8  	"unicode"
     9  
    10  	"github.com/scylladb/go-set/strset"
    11  
    12  	"github.com/anchore/syft/internal/bus"
    13  	"github.com/anchore/syft/internal/log"
    14  	"github.com/anchore/syft/internal/sbomsync"
    15  	"github.com/anchore/syft/syft/artifact"
    16  	"github.com/anchore/syft/syft/cataloging"
    17  	"github.com/anchore/syft/syft/cataloging/pkgcataloging"
    18  	"github.com/anchore/syft/syft/event/monitor"
    19  	"github.com/anchore/syft/syft/file"
    20  	"github.com/anchore/syft/syft/pkg"
    21  	"github.com/anchore/syft/syft/pkg/cataloger/common/cpe"
    22  )
    23  
    24  type packageTaskFactory func(cfg CatalogingFactoryConfig) Task
    25  
    26  type PackageTaskFactories []packageTaskFactory
    27  
    28  type CatalogingFactoryConfig struct {
    29  	SearchConfig         cataloging.SearchConfig
    30  	RelationshipsConfig  cataloging.RelationshipsConfig
    31  	DataGenerationConfig cataloging.DataGenerationConfig
    32  	PackagesConfig       pkgcataloging.Config
    33  }
    34  
    35  func DefaultCatalogingFactoryConfig() CatalogingFactoryConfig {
    36  	return CatalogingFactoryConfig{
    37  		SearchConfig:         cataloging.DefaultSearchConfig(),
    38  		RelationshipsConfig:  cataloging.DefaultRelationshipsConfig(),
    39  		DataGenerationConfig: cataloging.DefaultDataGenerationConfig(),
    40  		PackagesConfig:       pkgcataloging.DefaultConfig(),
    41  	}
    42  }
    43  
    44  func newPackageTaskFactory(catalogerFactory func(CatalogingFactoryConfig) pkg.Cataloger, tags ...string) packageTaskFactory {
    45  	return func(cfg CatalogingFactoryConfig) Task {
    46  		return NewPackageTask(cfg, catalogerFactory(cfg), tags...)
    47  	}
    48  }
    49  
    50  func newSimplePackageTaskFactory(catalogerFactory func() pkg.Cataloger, tags ...string) packageTaskFactory {
    51  	return func(cfg CatalogingFactoryConfig) Task {
    52  		return NewPackageTask(cfg, catalogerFactory(), tags...)
    53  	}
    54  }
    55  
    56  func (f PackageTaskFactories) Tasks(cfg CatalogingFactoryConfig) ([]Task, error) {
    57  	var allTasks []Task
    58  	taskNames := strset.New()
    59  	duplicateTaskNames := strset.New()
    60  	var err error
    61  	for _, factory := range f {
    62  		tsk := factory(cfg)
    63  		if tsk == nil {
    64  			continue
    65  		}
    66  		tskName := tsk.Name()
    67  		if taskNames.Has(tskName) {
    68  			duplicateTaskNames.Add(tskName)
    69  		}
    70  
    71  		allTasks = append(allTasks, tsk)
    72  		taskNames.Add(tskName)
    73  	}
    74  	if duplicateTaskNames.Size() > 0 {
    75  		names := duplicateTaskNames.List()
    76  		sort.Strings(names)
    77  		err = fmt.Errorf("duplicate cataloger task names: %v", strings.Join(names, ", "))
    78  	}
    79  
    80  	return allTasks, err
    81  }
    82  
    83  // NewPackageTask creates a Task function for a generic pkg.Cataloger, honoring the common configuration options.
    84  //
    85  //nolint:funlen
    86  func NewPackageTask(cfg CatalogingFactoryConfig, c pkg.Cataloger, tags ...string) Task {
    87  	fn := func(ctx context.Context, resolver file.Resolver, sbom sbomsync.Builder) error {
    88  		catalogerName := c.Name()
    89  		log.WithFields("name", catalogerName).Trace("starting package cataloger")
    90  
    91  		info := monitor.GenericTask{
    92  			Title: monitor.Title{
    93  				Default: prettyName(catalogerName),
    94  			},
    95  			ID:            catalogerName,
    96  			ParentID:      monitor.PackageCatalogingTaskID,
    97  			Context:       "",
    98  			HideOnSuccess: true,
    99  		}
   100  
   101  		t := bus.StartCatalogerTask(info, -1, "")
   102  
   103  		pkgs, relationships, err := c.Catalog(ctx, resolver)
   104  		if err != nil {
   105  			return fmt.Errorf("unable to catalog packages with %q: %w", c.Name(), err)
   106  		}
   107  
   108  		log.WithFields("cataloger", c.Name()).Debugf("discovered %d packages", len(pkgs))
   109  
   110  		for i, p := range pkgs {
   111  			if cfg.DataGenerationConfig.GenerateCPEs {
   112  				// generate CPEs (note: this is excluded from package ID, so is safe to mutate)
   113  				// we might have binary classified CPE already with the package so we want to append here
   114  				dictionaryCPEs, ok := cpe.DictionaryFind(p)
   115  				if ok {
   116  					log.Tracef("used CPE dictionary to find CPEs for %s package %q: %s", p.Type, p.Name, dictionaryCPEs)
   117  					p.CPEs = append(p.CPEs, dictionaryCPEs...)
   118  				} else {
   119  					p.CPEs = append(p.CPEs, cpe.Generate(p)...)
   120  				}
   121  			}
   122  
   123  			// if we were not able to identify the language we have an opportunity
   124  			// to try and get this value from the PURL. Worst case we assert that
   125  			// we could not identify the language at either stage and set UnknownLanguage
   126  			if p.Language == "" {
   127  				p.Language = pkg.LanguageFromPURL(p.PURL)
   128  			}
   129  
   130  			if cfg.RelationshipsConfig.PackageFileOwnership {
   131  				// create file-to-package relationships for files owned by the package
   132  				owningRelationships, err := packageFileOwnershipRelationships(p, resolver)
   133  				if err != nil {
   134  					log.Warnf("unable to create any package-file relationships for package name=%q type=%q: %w", p.Name, p.Type, err)
   135  				} else {
   136  					relationships = append(relationships, owningRelationships...)
   137  				}
   138  			}
   139  
   140  			pkgs[i] = p
   141  		}
   142  
   143  		sbom.AddPackages(pkgs...)
   144  		sbom.AddRelationships(relationships...)
   145  		t.Add(int64(len(pkgs)))
   146  
   147  		t.SetCompleted()
   148  		log.WithFields("name", c.Name()).Trace("package cataloger completed")
   149  
   150  		return nil
   151  	}
   152  	tags = append(tags, pkgcataloging.PackageTag)
   153  
   154  	return NewTask(c.Name(), fn, tags...)
   155  }
   156  
   157  func prettyName(s string) string {
   158  	if s == "" {
   159  		return ""
   160  	}
   161  
   162  	// Convert first character to uppercase
   163  	r := []rune(s)
   164  	r[0] = unicode.ToUpper(r[0])
   165  
   166  	return strings.ReplaceAll(string(r), "-", " ")
   167  }
   168  
   169  func packageFileOwnershipRelationships(p pkg.Package, resolver file.PathResolver) ([]artifact.Relationship, error) {
   170  	fileOwner, ok := p.Metadata.(pkg.FileOwner)
   171  	if !ok {
   172  		return nil, nil
   173  	}
   174  
   175  	locations := map[artifact.ID]file.Location{}
   176  
   177  	for _, path := range fileOwner.OwnedFiles() {
   178  		pathRefs, err := resolver.FilesByPath(path)
   179  		if err != nil {
   180  			return nil, fmt.Errorf("unable to find path for path=%q: %w", path, err)
   181  		}
   182  
   183  		if len(pathRefs) == 0 {
   184  			// ideally we want to warn users about missing files from a package, however, it is very common for
   185  			// container image authors to delete files that are not needed in order to keep image sizes small. Adding
   186  			// a warning here would be needlessly noisy (even for popular base images).
   187  			continue
   188  		}
   189  
   190  		for _, ref := range pathRefs {
   191  			if oldRef, ok := locations[ref.Coordinates.ID()]; ok {
   192  				log.Debugf("found path duplicate of %s", oldRef.RealPath)
   193  			}
   194  			locations[ref.Coordinates.ID()] = ref
   195  		}
   196  	}
   197  
   198  	var relationships []artifact.Relationship
   199  	for _, location := range locations {
   200  		relationships = append(relationships, artifact.Relationship{
   201  			From: p,
   202  			To:   location.Coordinates,
   203  			Type: artifact.ContainsRelationship,
   204  		})
   205  	}
   206  	return relationships, nil
   207  }