github.com/anchore/syft@v1.38.2/internal/task/package_task_factory.go (about) 1 package task 2 3 import ( 4 "context" 5 "fmt" 6 "strings" 7 "unicode" 8 9 "github.com/anchore/syft/internal/bus" 10 "github.com/anchore/syft/internal/log" 11 "github.com/anchore/syft/internal/relationship" 12 "github.com/anchore/syft/internal/sbomsync" 13 "github.com/anchore/syft/syft/artifact" 14 "github.com/anchore/syft/syft/cataloging" 15 "github.com/anchore/syft/syft/cataloging/pkgcataloging" 16 "github.com/anchore/syft/syft/cpe" 17 "github.com/anchore/syft/syft/event/monitor" 18 "github.com/anchore/syft/syft/file" 19 "github.com/anchore/syft/syft/pkg" 20 cpeutils "github.com/anchore/syft/syft/pkg/cataloger/common/cpe" 21 ) 22 23 func newPackageTaskFactory(catalogerFactory func(CatalogingFactoryConfig) pkg.Cataloger, tags ...string) factory { 24 return func(cfg CatalogingFactoryConfig) Task { 25 return NewPackageTask(cfg, catalogerFactory(cfg), tags...) 26 } 27 } 28 29 func newSimplePackageTaskFactory(catalogerFactory func() pkg.Cataloger, tags ...string) factory { 30 return func(cfg CatalogingFactoryConfig) Task { 31 return NewPackageTask(cfg, catalogerFactory(), tags...) 32 } 33 } 34 35 // NewPackageTask creates a Task function for a generic pkg.Cataloger, honoring the common configuration options. 36 func NewPackageTask(cfg CatalogingFactoryConfig, c pkg.Cataloger, tags ...string) Task { 37 fn := func(ctx context.Context, resolver file.Resolver, sbom sbomsync.Builder) error { 38 catalogerName := c.Name() 39 log.WithFields("name", catalogerName).Trace("starting package cataloger") 40 41 info := monitor.GenericTask{ 42 Title: monitor.Title{ 43 Default: prettyName(catalogerName), 44 }, 45 ID: catalogerName, 46 ParentID: monitor.PackageCatalogingTaskID, 47 Context: "", 48 HideOnSuccess: true, 49 } 50 51 t := bus.StartCatalogerTask(info, -1, "") 52 53 pkgs, relationships, err := c.Catalog(ctx, resolver) 54 55 log.WithFields("cataloger", catalogerName).Debugf("discovered %d packages", len(pkgs)) 56 57 pkgs, relationships = finalizePkgCatalogerResults(cfg, resolver, catalogerName, pkgs, relationships) 58 59 pkgs, relationships = applyCompliance(cfg.ComplianceConfig, pkgs, relationships) 60 61 sbom.AddPackages(pkgs...) 62 sbom.AddRelationships(relationships...) 63 t.Add(int64(len(pkgs))) 64 65 t.SetCompleted() 66 log.WithFields("name", catalogerName).Trace("package cataloger completed") 67 68 return err 69 } 70 tags = append(tags, pkgcataloging.PackageTag) 71 72 return NewTask(c.Name(), fn, tags...) 73 } 74 75 func finalizePkgCatalogerResults(cfg CatalogingFactoryConfig, resolver file.PathResolver, catalogerName string, pkgs []pkg.Package, relationships []artifact.Relationship) ([]pkg.Package, []artifact.Relationship) { 76 for i, p := range pkgs { 77 if p.FoundBy == "" { 78 p.FoundBy = catalogerName 79 } 80 81 if cfg.DataGenerationConfig.GenerateCPEs && !hasAuthoritativeCPE(p.CPEs) { 82 // generate CPEs (note: this is excluded from package ID, so is safe to mutate) 83 // we might have binary classified CPE already with the package so we want to append here 84 dictionaryCPEs, ok := cpeutils.DictionaryFind(p) 85 if ok { 86 log.Tracef("used CPE dictionary to find CPEs for %s package %q: %s", p.Type, p.Name, dictionaryCPEs) 87 p.CPEs = append(p.CPEs, dictionaryCPEs...) 88 } else { 89 p.CPEs = append(p.CPEs, cpeutils.Generate(p)...) 90 } 91 } 92 93 // if we were not able to identify the language we have an opportunity 94 // to try and get this value from the PURL. Worst case we assert that 95 // we could not identify the language at either stage and set UnknownLanguage 96 if p.Language == "" { 97 p.Language = pkg.LanguageFromPURL(p.PURL) 98 } 99 100 if cfg.RelationshipsConfig.PackageFileOwnership { 101 // create file-to-package relationships for files owned by the package 102 owningRelationships, err := packageFileOwnershipRelationships(p, resolver) 103 if err != nil { 104 log.Debugf("unable to create any package-file relationships for package name=%q type=%q: %v", p.Name, p.Type, err) 105 } else { 106 relationships = append(relationships, owningRelationships...) 107 } 108 } 109 110 // we want to know if the user wants to preserve license content or not in the final SBOM 111 // note: this looks incorrect, but pkg.License.Content is NOT used to compute the Package ID 112 // this does NOT change the reproducibility of the Package ID 113 applyLicenseContentRules(&p, cfg.LicenseConfig) 114 115 pkgs[i] = p 116 } 117 return pkgs, relationships 118 } 119 120 type packageReplacement struct { 121 original artifact.ID 122 pkg pkg.Package 123 } 124 125 func applyCompliance(cfg cataloging.ComplianceConfig, pkgs []pkg.Package, relationships []artifact.Relationship) ([]pkg.Package, []artifact.Relationship) { 126 remainingPkgs, droppedPkgs, replacements := filterNonCompliantPackages(pkgs, cfg) 127 128 relIdx := relationship.NewIndex(relationships...) 129 for _, p := range droppedPkgs { 130 relIdx.Remove(p.ID()) 131 } 132 133 for _, replacement := range replacements { 134 relIdx.Replace(replacement.original, replacement.pkg) 135 } 136 137 return remainingPkgs, relIdx.All() 138 } 139 140 func filterNonCompliantPackages(pkgs []pkg.Package, cfg cataloging.ComplianceConfig) ([]pkg.Package, []pkg.Package, []packageReplacement) { 141 var remainingPkgs, droppedPkgs []pkg.Package 142 var replacements []packageReplacement 143 for _, p := range pkgs { 144 keep, replacement := applyComplianceRules(&p, cfg) 145 if keep { 146 remainingPkgs = append(remainingPkgs, p) 147 } else { 148 droppedPkgs = append(droppedPkgs, p) 149 } 150 if replacement != nil { 151 replacements = append(replacements, *replacement) 152 } 153 } 154 155 return remainingPkgs, droppedPkgs, replacements 156 } 157 158 func applyComplianceRules(p *pkg.Package, cfg cataloging.ComplianceConfig) (bool, *packageReplacement) { 159 var drop bool 160 var replacement *packageReplacement 161 162 applyComplianceRule := func(value, fieldName string, action cataloging.ComplianceAction) bool { 163 if strings.TrimSpace(value) != "" { 164 return false 165 } 166 167 loc := "unknown" 168 locs := p.Locations.ToSlice() 169 if len(locs) > 0 { 170 loc = locs[0].Path() 171 } 172 switch action { 173 case cataloging.ComplianceActionDrop: 174 log.WithFields("pkg", p.String(), "location", loc).Debugf("package with missing %s, dropping", fieldName) 175 drop = true 176 177 case cataloging.ComplianceActionStub: 178 log.WithFields("pkg", p.String(), "location", loc).Debugf("package with missing %s, stubbing with default value", fieldName) 179 return true 180 181 case cataloging.ComplianceActionKeep: 182 log.WithFields("pkg", p.String(), "location", loc, "field", fieldName).Trace("package with missing field, taking no action") 183 } 184 return false 185 } 186 187 ogID := p.ID() 188 189 if applyComplianceRule(p.Name, "name", cfg.MissingName) { 190 p.Name = cataloging.UnknownStubValue 191 p.SetID() 192 } 193 194 if applyComplianceRule(p.Version, "version", cfg.MissingVersion) { 195 p.Version = cataloging.UnknownStubValue 196 p.SetID() 197 } 198 199 newID := p.ID() 200 if newID != ogID { 201 replacement = &packageReplacement{ 202 original: ogID, 203 pkg: *p, 204 } 205 } 206 207 return !drop, replacement 208 } 209 210 func hasAuthoritativeCPE(cpes []cpe.CPE) bool { 211 for _, c := range cpes { 212 if c.Source != cpe.GeneratedSource { 213 return true 214 } 215 } 216 return false 217 } 218 219 func prettyName(s string) string { 220 if s == "" { 221 return "" 222 } 223 224 // Convert first character to uppercase 225 r := []rune(s) 226 r[0] = unicode.ToUpper(r[0]) 227 228 return strings.ReplaceAll(string(r), "-", " ") 229 } 230 231 func packageFileOwnershipRelationships(p pkg.Package, resolver file.PathResolver) ([]artifact.Relationship, error) { 232 fileOwner, ok := p.Metadata.(pkg.FileOwner) 233 if !ok { 234 return nil, nil 235 } 236 237 locations := map[artifact.ID]file.Location{} 238 239 for _, path := range fileOwner.OwnedFiles() { 240 pathRefs, err := resolver.FilesByPath(path) 241 if err != nil { 242 return nil, fmt.Errorf("unable to find path for path=%q: %w", path, err) 243 } 244 245 if len(pathRefs) == 0 { 246 // ideally we want to warn users about missing files from a package, however, it is very common for 247 // container image authors to delete files that are not needed in order to keep image sizes small. Adding 248 // a warning here would be needlessly noisy (even for popular base images). 249 continue 250 } 251 252 for _, ref := range pathRefs { 253 if oldRef, ok := locations[ref.ID()]; ok { 254 log.Debugf("found path duplicate of %s", oldRef.RealPath) 255 } 256 locations[ref.ID()] = ref 257 } 258 } 259 260 var relationships []artifact.Relationship 261 for _, location := range locations { 262 relationships = append(relationships, artifact.Relationship{ 263 From: p, 264 To: location.Coordinates, 265 Type: artifact.ContainsRelationship, 266 }) 267 } 268 return relationships, nil 269 } 270 271 func applyLicenseContentRules(p *pkg.Package, cfg cataloging.LicenseConfig) { 272 if p.Licenses.Empty() { 273 return 274 } 275 276 licenses := p.Licenses.ToSlice() 277 for i := range licenses { 278 l := &licenses[i] 279 switch cfg.IncludeContent { 280 case cataloging.LicenseContentIncludeUnknown: 281 // we have an SPDX expression, which means this is NOT an unknown license 282 // remove the content, we are only including content for unknown licenses by default 283 if l.SPDXExpression != "" { 284 licenses[i].Contents = "" 285 } 286 case cataloging.LicenseContentIncludeAll: 287 // always include the content 288 default: 289 // clear it all out 290 licenses[i].Contents = "" 291 } 292 } 293 294 p.Licenses = pkg.NewLicenseSet(licenses...) 295 }