github.com/anchore/syft@v1.4.2-0.20240516191711-1bec1fc5d397/internal/task/package_task_factory.go (about) 1 package task 2 3 import ( 4 "context" 5 "fmt" 6 "sort" 7 "strings" 8 "unicode" 9 10 "github.com/scylladb/go-set/strset" 11 12 "github.com/anchore/syft/internal/bus" 13 "github.com/anchore/syft/internal/log" 14 "github.com/anchore/syft/internal/sbomsync" 15 "github.com/anchore/syft/syft/artifact" 16 "github.com/anchore/syft/syft/cataloging" 17 "github.com/anchore/syft/syft/cataloging/pkgcataloging" 18 "github.com/anchore/syft/syft/event/monitor" 19 "github.com/anchore/syft/syft/file" 20 "github.com/anchore/syft/syft/pkg" 21 "github.com/anchore/syft/syft/pkg/cataloger/common/cpe" 22 ) 23 24 type packageTaskFactory func(cfg CatalogingFactoryConfig) Task 25 26 type PackageTaskFactories []packageTaskFactory 27 28 type CatalogingFactoryConfig struct { 29 SearchConfig cataloging.SearchConfig 30 RelationshipsConfig cataloging.RelationshipsConfig 31 DataGenerationConfig cataloging.DataGenerationConfig 32 PackagesConfig pkgcataloging.Config 33 } 34 35 func DefaultCatalogingFactoryConfig() CatalogingFactoryConfig { 36 return CatalogingFactoryConfig{ 37 SearchConfig: cataloging.DefaultSearchConfig(), 38 RelationshipsConfig: cataloging.DefaultRelationshipsConfig(), 39 DataGenerationConfig: cataloging.DefaultDataGenerationConfig(), 40 PackagesConfig: pkgcataloging.DefaultConfig(), 41 } 42 } 43 44 func newPackageTaskFactory(catalogerFactory func(CatalogingFactoryConfig) pkg.Cataloger, tags ...string) packageTaskFactory { 45 return func(cfg CatalogingFactoryConfig) Task { 46 return NewPackageTask(cfg, catalogerFactory(cfg), tags...) 47 } 48 } 49 50 func newSimplePackageTaskFactory(catalogerFactory func() pkg.Cataloger, tags ...string) packageTaskFactory { 51 return func(cfg CatalogingFactoryConfig) Task { 52 return NewPackageTask(cfg, catalogerFactory(), tags...) 53 } 54 } 55 56 func (f PackageTaskFactories) Tasks(cfg CatalogingFactoryConfig) ([]Task, error) { 57 var allTasks []Task 58 taskNames := strset.New() 59 duplicateTaskNames := strset.New() 60 var err error 61 for _, factory := range f { 62 tsk := factory(cfg) 63 if tsk == nil { 64 continue 65 } 66 tskName := tsk.Name() 67 if taskNames.Has(tskName) { 68 duplicateTaskNames.Add(tskName) 69 } 70 71 allTasks = append(allTasks, tsk) 72 taskNames.Add(tskName) 73 } 74 if duplicateTaskNames.Size() > 0 { 75 names := duplicateTaskNames.List() 76 sort.Strings(names) 77 err = fmt.Errorf("duplicate cataloger task names: %v", strings.Join(names, ", ")) 78 } 79 80 return allTasks, err 81 } 82 83 // NewPackageTask creates a Task function for a generic pkg.Cataloger, honoring the common configuration options. 84 // 85 //nolint:funlen 86 func NewPackageTask(cfg CatalogingFactoryConfig, c pkg.Cataloger, tags ...string) Task { 87 fn := func(ctx context.Context, resolver file.Resolver, sbom sbomsync.Builder) error { 88 catalogerName := c.Name() 89 log.WithFields("name", catalogerName).Trace("starting package cataloger") 90 91 info := monitor.GenericTask{ 92 Title: monitor.Title{ 93 Default: prettyName(catalogerName), 94 }, 95 ID: catalogerName, 96 ParentID: monitor.PackageCatalogingTaskID, 97 Context: "", 98 HideOnSuccess: true, 99 } 100 101 t := bus.StartCatalogerTask(info, -1, "") 102 103 pkgs, relationships, err := c.Catalog(ctx, resolver) 104 if err != nil { 105 return fmt.Errorf("unable to catalog packages with %q: %w", c.Name(), err) 106 } 107 108 log.WithFields("cataloger", c.Name()).Debugf("discovered %d packages", len(pkgs)) 109 110 for i, p := range pkgs { 111 if cfg.DataGenerationConfig.GenerateCPEs { 112 // generate CPEs (note: this is excluded from package ID, so is safe to mutate) 113 // we might have binary classified CPE already with the package so we want to append here 114 dictionaryCPEs, ok := cpe.DictionaryFind(p) 115 if ok { 116 log.Tracef("used CPE dictionary to find CPEs for %s package %q: %s", p.Type, p.Name, dictionaryCPEs) 117 p.CPEs = append(p.CPEs, dictionaryCPEs...) 118 } else { 119 p.CPEs = append(p.CPEs, cpe.Generate(p)...) 120 } 121 } 122 123 // if we were not able to identify the language we have an opportunity 124 // to try and get this value from the PURL. Worst case we assert that 125 // we could not identify the language at either stage and set UnknownLanguage 126 if p.Language == "" { 127 p.Language = pkg.LanguageFromPURL(p.PURL) 128 } 129 130 if cfg.RelationshipsConfig.PackageFileOwnership { 131 // create file-to-package relationships for files owned by the package 132 owningRelationships, err := packageFileOwnershipRelationships(p, resolver) 133 if err != nil { 134 log.Warnf("unable to create any package-file relationships for package name=%q type=%q: %w", p.Name, p.Type, err) 135 } else { 136 relationships = append(relationships, owningRelationships...) 137 } 138 } 139 140 pkgs[i] = p 141 } 142 143 sbom.AddPackages(pkgs...) 144 sbom.AddRelationships(relationships...) 145 t.Add(int64(len(pkgs))) 146 147 t.SetCompleted() 148 log.WithFields("name", c.Name()).Trace("package cataloger completed") 149 150 return nil 151 } 152 tags = append(tags, pkgcataloging.PackageTag) 153 154 return NewTask(c.Name(), fn, tags...) 155 } 156 157 func prettyName(s string) string { 158 if s == "" { 159 return "" 160 } 161 162 // Convert first character to uppercase 163 r := []rune(s) 164 r[0] = unicode.ToUpper(r[0]) 165 166 return strings.ReplaceAll(string(r), "-", " ") 167 } 168 169 func packageFileOwnershipRelationships(p pkg.Package, resolver file.PathResolver) ([]artifact.Relationship, error) { 170 fileOwner, ok := p.Metadata.(pkg.FileOwner) 171 if !ok { 172 return nil, nil 173 } 174 175 locations := map[artifact.ID]file.Location{} 176 177 for _, path := range fileOwner.OwnedFiles() { 178 pathRefs, err := resolver.FilesByPath(path) 179 if err != nil { 180 return nil, fmt.Errorf("unable to find path for path=%q: %w", path, err) 181 } 182 183 if len(pathRefs) == 0 { 184 // ideally we want to warn users about missing files from a package, however, it is very common for 185 // container image authors to delete files that are not needed in order to keep image sizes small. Adding 186 // a warning here would be needlessly noisy (even for popular base images). 187 continue 188 } 189 190 for _, ref := range pathRefs { 191 if oldRef, ok := locations[ref.Coordinates.ID()]; ok { 192 log.Debugf("found path duplicate of %s", oldRef.RealPath) 193 } 194 locations[ref.Coordinates.ID()] = ref 195 } 196 } 197 198 var relationships []artifact.Relationship 199 for _, location := range locations { 200 relationships = append(relationships, artifact.Relationship{ 201 From: p, 202 To: location.Coordinates, 203 Type: artifact.ContainsRelationship, 204 }) 205 } 206 return relationships, nil 207 }