github.com/anchore/syft@v1.38.2/internal/task/package_task_factory.go (about)

     1  package task
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"strings"
     7  	"unicode"
     8  
     9  	"github.com/anchore/syft/internal/bus"
    10  	"github.com/anchore/syft/internal/log"
    11  	"github.com/anchore/syft/internal/relationship"
    12  	"github.com/anchore/syft/internal/sbomsync"
    13  	"github.com/anchore/syft/syft/artifact"
    14  	"github.com/anchore/syft/syft/cataloging"
    15  	"github.com/anchore/syft/syft/cataloging/pkgcataloging"
    16  	"github.com/anchore/syft/syft/cpe"
    17  	"github.com/anchore/syft/syft/event/monitor"
    18  	"github.com/anchore/syft/syft/file"
    19  	"github.com/anchore/syft/syft/pkg"
    20  	cpeutils "github.com/anchore/syft/syft/pkg/cataloger/common/cpe"
    21  )
    22  
    23  func newPackageTaskFactory(catalogerFactory func(CatalogingFactoryConfig) pkg.Cataloger, tags ...string) factory {
    24  	return func(cfg CatalogingFactoryConfig) Task {
    25  		return NewPackageTask(cfg, catalogerFactory(cfg), tags...)
    26  	}
    27  }
    28  
    29  func newSimplePackageTaskFactory(catalogerFactory func() pkg.Cataloger, tags ...string) factory {
    30  	return func(cfg CatalogingFactoryConfig) Task {
    31  		return NewPackageTask(cfg, catalogerFactory(), tags...)
    32  	}
    33  }
    34  
    35  // NewPackageTask creates a Task function for a generic pkg.Cataloger, honoring the common configuration options.
    36  func NewPackageTask(cfg CatalogingFactoryConfig, c pkg.Cataloger, tags ...string) Task {
    37  	fn := func(ctx context.Context, resolver file.Resolver, sbom sbomsync.Builder) error {
    38  		catalogerName := c.Name()
    39  		log.WithFields("name", catalogerName).Trace("starting package cataloger")
    40  
    41  		info := monitor.GenericTask{
    42  			Title: monitor.Title{
    43  				Default: prettyName(catalogerName),
    44  			},
    45  			ID:            catalogerName,
    46  			ParentID:      monitor.PackageCatalogingTaskID,
    47  			Context:       "",
    48  			HideOnSuccess: true,
    49  		}
    50  
    51  		t := bus.StartCatalogerTask(info, -1, "")
    52  
    53  		pkgs, relationships, err := c.Catalog(ctx, resolver)
    54  
    55  		log.WithFields("cataloger", catalogerName).Debugf("discovered %d packages", len(pkgs))
    56  
    57  		pkgs, relationships = finalizePkgCatalogerResults(cfg, resolver, catalogerName, pkgs, relationships)
    58  
    59  		pkgs, relationships = applyCompliance(cfg.ComplianceConfig, pkgs, relationships)
    60  
    61  		sbom.AddPackages(pkgs...)
    62  		sbom.AddRelationships(relationships...)
    63  		t.Add(int64(len(pkgs)))
    64  
    65  		t.SetCompleted()
    66  		log.WithFields("name", catalogerName).Trace("package cataloger completed")
    67  
    68  		return err
    69  	}
    70  	tags = append(tags, pkgcataloging.PackageTag)
    71  
    72  	return NewTask(c.Name(), fn, tags...)
    73  }
    74  
    75  func finalizePkgCatalogerResults(cfg CatalogingFactoryConfig, resolver file.PathResolver, catalogerName string, pkgs []pkg.Package, relationships []artifact.Relationship) ([]pkg.Package, []artifact.Relationship) {
    76  	for i, p := range pkgs {
    77  		if p.FoundBy == "" {
    78  			p.FoundBy = catalogerName
    79  		}
    80  
    81  		if cfg.DataGenerationConfig.GenerateCPEs && !hasAuthoritativeCPE(p.CPEs) {
    82  			// generate CPEs (note: this is excluded from package ID, so is safe to mutate)
    83  			// we might have binary classified CPE already with the package so we want to append here
    84  			dictionaryCPEs, ok := cpeutils.DictionaryFind(p)
    85  			if ok {
    86  				log.Tracef("used CPE dictionary to find CPEs for %s package %q: %s", p.Type, p.Name, dictionaryCPEs)
    87  				p.CPEs = append(p.CPEs, dictionaryCPEs...)
    88  			} else {
    89  				p.CPEs = append(p.CPEs, cpeutils.Generate(p)...)
    90  			}
    91  		}
    92  
    93  		// if we were not able to identify the language we have an opportunity
    94  		// to try and get this value from the PURL. Worst case we assert that
    95  		// we could not identify the language at either stage and set UnknownLanguage
    96  		if p.Language == "" {
    97  			p.Language = pkg.LanguageFromPURL(p.PURL)
    98  		}
    99  
   100  		if cfg.RelationshipsConfig.PackageFileOwnership {
   101  			// create file-to-package relationships for files owned by the package
   102  			owningRelationships, err := packageFileOwnershipRelationships(p, resolver)
   103  			if err != nil {
   104  				log.Debugf("unable to create any package-file relationships for package name=%q type=%q: %v", p.Name, p.Type, err)
   105  			} else {
   106  				relationships = append(relationships, owningRelationships...)
   107  			}
   108  		}
   109  
   110  		// we want to know if the user wants to preserve license content or not in the final SBOM
   111  		// note: this looks incorrect, but pkg.License.Content is NOT used to compute the Package ID
   112  		// this does NOT change the reproducibility of the Package ID
   113  		applyLicenseContentRules(&p, cfg.LicenseConfig)
   114  
   115  		pkgs[i] = p
   116  	}
   117  	return pkgs, relationships
   118  }
   119  
   120  type packageReplacement struct {
   121  	original artifact.ID
   122  	pkg      pkg.Package
   123  }
   124  
   125  func applyCompliance(cfg cataloging.ComplianceConfig, pkgs []pkg.Package, relationships []artifact.Relationship) ([]pkg.Package, []artifact.Relationship) {
   126  	remainingPkgs, droppedPkgs, replacements := filterNonCompliantPackages(pkgs, cfg)
   127  
   128  	relIdx := relationship.NewIndex(relationships...)
   129  	for _, p := range droppedPkgs {
   130  		relIdx.Remove(p.ID())
   131  	}
   132  
   133  	for _, replacement := range replacements {
   134  		relIdx.Replace(replacement.original, replacement.pkg)
   135  	}
   136  
   137  	return remainingPkgs, relIdx.All()
   138  }
   139  
   140  func filterNonCompliantPackages(pkgs []pkg.Package, cfg cataloging.ComplianceConfig) ([]pkg.Package, []pkg.Package, []packageReplacement) {
   141  	var remainingPkgs, droppedPkgs []pkg.Package
   142  	var replacements []packageReplacement
   143  	for _, p := range pkgs {
   144  		keep, replacement := applyComplianceRules(&p, cfg)
   145  		if keep {
   146  			remainingPkgs = append(remainingPkgs, p)
   147  		} else {
   148  			droppedPkgs = append(droppedPkgs, p)
   149  		}
   150  		if replacement != nil {
   151  			replacements = append(replacements, *replacement)
   152  		}
   153  	}
   154  
   155  	return remainingPkgs, droppedPkgs, replacements
   156  }
   157  
   158  func applyComplianceRules(p *pkg.Package, cfg cataloging.ComplianceConfig) (bool, *packageReplacement) {
   159  	var drop bool
   160  	var replacement *packageReplacement
   161  
   162  	applyComplianceRule := func(value, fieldName string, action cataloging.ComplianceAction) bool {
   163  		if strings.TrimSpace(value) != "" {
   164  			return false
   165  		}
   166  
   167  		loc := "unknown"
   168  		locs := p.Locations.ToSlice()
   169  		if len(locs) > 0 {
   170  			loc = locs[0].Path()
   171  		}
   172  		switch action {
   173  		case cataloging.ComplianceActionDrop:
   174  			log.WithFields("pkg", p.String(), "location", loc).Debugf("package with missing %s, dropping", fieldName)
   175  			drop = true
   176  
   177  		case cataloging.ComplianceActionStub:
   178  			log.WithFields("pkg", p.String(), "location", loc).Debugf("package with missing %s, stubbing with default value", fieldName)
   179  			return true
   180  
   181  		case cataloging.ComplianceActionKeep:
   182  			log.WithFields("pkg", p.String(), "location", loc, "field", fieldName).Trace("package with missing field, taking no action")
   183  		}
   184  		return false
   185  	}
   186  
   187  	ogID := p.ID()
   188  
   189  	if applyComplianceRule(p.Name, "name", cfg.MissingName) {
   190  		p.Name = cataloging.UnknownStubValue
   191  		p.SetID()
   192  	}
   193  
   194  	if applyComplianceRule(p.Version, "version", cfg.MissingVersion) {
   195  		p.Version = cataloging.UnknownStubValue
   196  		p.SetID()
   197  	}
   198  
   199  	newID := p.ID()
   200  	if newID != ogID {
   201  		replacement = &packageReplacement{
   202  			original: ogID,
   203  			pkg:      *p,
   204  		}
   205  	}
   206  
   207  	return !drop, replacement
   208  }
   209  
   210  func hasAuthoritativeCPE(cpes []cpe.CPE) bool {
   211  	for _, c := range cpes {
   212  		if c.Source != cpe.GeneratedSource {
   213  			return true
   214  		}
   215  	}
   216  	return false
   217  }
   218  
   219  func prettyName(s string) string {
   220  	if s == "" {
   221  		return ""
   222  	}
   223  
   224  	// Convert first character to uppercase
   225  	r := []rune(s)
   226  	r[0] = unicode.ToUpper(r[0])
   227  
   228  	return strings.ReplaceAll(string(r), "-", " ")
   229  }
   230  
   231  func packageFileOwnershipRelationships(p pkg.Package, resolver file.PathResolver) ([]artifact.Relationship, error) {
   232  	fileOwner, ok := p.Metadata.(pkg.FileOwner)
   233  	if !ok {
   234  		return nil, nil
   235  	}
   236  
   237  	locations := map[artifact.ID]file.Location{}
   238  
   239  	for _, path := range fileOwner.OwnedFiles() {
   240  		pathRefs, err := resolver.FilesByPath(path)
   241  		if err != nil {
   242  			return nil, fmt.Errorf("unable to find path for path=%q: %w", path, err)
   243  		}
   244  
   245  		if len(pathRefs) == 0 {
   246  			// ideally we want to warn users about missing files from a package, however, it is very common for
   247  			// container image authors to delete files that are not needed in order to keep image sizes small. Adding
   248  			// a warning here would be needlessly noisy (even for popular base images).
   249  			continue
   250  		}
   251  
   252  		for _, ref := range pathRefs {
   253  			if oldRef, ok := locations[ref.ID()]; ok {
   254  				log.Debugf("found path duplicate of %s", oldRef.RealPath)
   255  			}
   256  			locations[ref.ID()] = ref
   257  		}
   258  	}
   259  
   260  	var relationships []artifact.Relationship
   261  	for _, location := range locations {
   262  		relationships = append(relationships, artifact.Relationship{
   263  			From: p,
   264  			To:   location.Coordinates,
   265  			Type: artifact.ContainsRelationship,
   266  		})
   267  	}
   268  	return relationships, nil
   269  }
   270  
   271  func applyLicenseContentRules(p *pkg.Package, cfg cataloging.LicenseConfig) {
   272  	if p.Licenses.Empty() {
   273  		return
   274  	}
   275  
   276  	licenses := p.Licenses.ToSlice()
   277  	for i := range licenses {
   278  		l := &licenses[i]
   279  		switch cfg.IncludeContent {
   280  		case cataloging.LicenseContentIncludeUnknown:
   281  			// we have an SPDX expression, which means this is NOT an unknown license
   282  			// remove the content, we are only including content for unknown licenses by default
   283  			if l.SPDXExpression != "" {
   284  				licenses[i].Contents = ""
   285  			}
   286  		case cataloging.LicenseContentIncludeAll:
   287  			// always include the content
   288  		default:
   289  			// clear it all out
   290  			licenses[i].Contents = ""
   291  		}
   292  	}
   293  
   294  	p.Licenses = pkg.NewLicenseSet(licenses...)
   295  }