github.com/noqcks/syft@v0.0.0-20230920222752-a9e2c4e288e5/syft/pkg/cataloger/catalog.go (about) 1 package cataloger 2 3 import ( 4 "fmt" 5 "math" 6 "runtime/debug" 7 "sync" 8 9 "github.com/hashicorp/go-multierror" 10 "github.com/wagoodman/go-partybus" 11 "github.com/wagoodman/go-progress" 12 13 "github.com/anchore/syft/internal/bus" 14 "github.com/anchore/syft/internal/log" 15 "github.com/anchore/syft/syft/artifact" 16 "github.com/anchore/syft/syft/event" 17 "github.com/anchore/syft/syft/file" 18 "github.com/anchore/syft/syft/linux" 19 "github.com/anchore/syft/syft/pkg" 20 "github.com/anchore/syft/syft/pkg/cataloger/common/cpe" 21 ) 22 23 // Monitor provides progress-related data for observing the progress of a Catalog() call (published on the event bus). 24 type Monitor struct { 25 FilesProcessed progress.Monitorable // the number of files selected and contents analyzed from all registered catalogers 26 PackagesDiscovered progress.Monitorable // the number of packages discovered from all registered catalogers 27 } 28 29 // catalogResult provides the result of running a single cataloger against source 30 type catalogResult struct { 31 Packages []pkg.Package 32 Relationships []artifact.Relationship 33 // Discovered may sometimes be more than len(packages) 34 Discovered int64 35 Error error 36 } 37 38 // newMonitor creates a new Monitor object and publishes the object on the bus as a PackageCatalogerStarted event. 39 func newMonitor() (*progress.Manual, *progress.Manual) { 40 filesProcessed := progress.Manual{} 41 packagesDiscovered := progress.Manual{} 42 43 bus.Publish(partybus.Event{ 44 Type: event.PackageCatalogerStarted, 45 Value: Monitor{ 46 FilesProcessed: progress.Monitorable(&filesProcessed), 47 PackagesDiscovered: progress.Monitorable(&packagesDiscovered), 48 }, 49 }) 50 return &filesProcessed, &packagesDiscovered 51 } 52 53 func runCataloger(cataloger pkg.Cataloger, resolver file.Resolver) (catalogerResult *catalogResult, err error) { 54 // handle individual cataloger panics 55 defer func() { 56 if e := recover(); e != nil { 57 err = fmt.Errorf("%v at:\n%s", e, string(debug.Stack())) 58 } 59 }() 60 61 catalogerResult = new(catalogResult) 62 63 // find packages from the underlying raw data 64 log.WithFields("cataloger", cataloger.Name()).Trace("cataloging started") 65 packages, relationships, err := cataloger.Catalog(resolver) 66 if err != nil { 67 log.WithFields("cataloger", cataloger.Name()).Warn("error while cataloging") 68 return catalogerResult, err 69 } 70 71 catalogedPackages := len(packages) 72 73 log.WithFields("cataloger", cataloger.Name()).Debugf("discovered %d packages", catalogedPackages) 74 catalogerResult.Discovered = int64(catalogedPackages) 75 76 for _, p := range packages { 77 // generate CPEs (note: this is excluded from package ID, so is safe to mutate) 78 // we might have binary classified CPE already with the package so we want to append here 79 80 dictionaryCPE, ok := cpe.DictionaryFind(p) 81 if ok { 82 log.Debugf("used CPE dictionary to find CPE for %s package %q: %s", p.Type, p.Name, dictionaryCPE.BindToFmtString()) 83 p.CPEs = append(p.CPEs, dictionaryCPE) 84 } else { 85 p.CPEs = append(p.CPEs, cpe.Generate(p)...) 86 } 87 88 // if we were not able to identify the language we have an opportunity 89 // to try and get this value from the PURL. Worst case we assert that 90 // we could not identify the language at either stage and set UnknownLanguage 91 if p.Language == "" { 92 p.Language = pkg.LanguageFromPURL(p.PURL) 93 } 94 95 // create file-to-package relationships for files owned by the package 96 owningRelationships, err := packageFileOwnershipRelationships(p, resolver) 97 if err != nil { 98 log.WithFields("cataloger", cataloger.Name(), "package", p.Name, "error", err).Warnf("unable to create any package-file relationships") 99 } else { 100 catalogerResult.Relationships = append(catalogerResult.Relationships, owningRelationships...) 101 } 102 catalogerResult.Packages = append(catalogerResult.Packages, p) 103 } 104 catalogerResult.Relationships = append(catalogerResult.Relationships, relationships...) 105 log.WithFields("cataloger", cataloger.Name()).Trace("cataloging complete") 106 return catalogerResult, err 107 } 108 109 // Catalog a given source (container image or filesystem) with the given catalogers, returning all discovered packages. 110 // In order to efficiently retrieve contents from a underlying container image the content fetch requests are 111 // done in bulk. Specifically, all files of interest are collected from each catalogers and accumulated into a single 112 // request. 113 // 114 //nolint:funlen 115 func Catalog(resolver file.Resolver, _ *linux.Release, parallelism int, catalogers ...pkg.Cataloger) (*pkg.Collection, []artifact.Relationship, error) { 116 catalog := pkg.NewCollection() 117 var allRelationships []artifact.Relationship 118 119 filesProcessed, packagesDiscovered := newMonitor() 120 defer filesProcessed.SetCompleted() 121 defer packagesDiscovered.SetCompleted() 122 123 // perform analysis, accumulating errors for each failed analysis 124 var errs error 125 126 nCatalogers := len(catalogers) 127 128 // we do not need more parallelism than there are `catalogers`. 129 parallelism = int(math.Min(float64(nCatalogers), math.Max(1.0, float64(parallelism)))) 130 log.WithFields("parallelism", parallelism, "catalogers", nCatalogers).Debug("cataloging packages") 131 132 jobs := make(chan pkg.Cataloger, nCatalogers) 133 results := make(chan *catalogResult, nCatalogers) 134 discoveredPackages := make(chan int64, nCatalogers) 135 136 waitGroup := sync.WaitGroup{} 137 138 for i := 0; i < parallelism; i++ { 139 waitGroup.Add(1) 140 141 go func() { 142 defer waitGroup.Done() 143 144 // wait for / get the next cataloger job available. 145 for cataloger := range jobs { 146 result, err := runCataloger(cataloger, resolver) 147 148 // ensure we set the error to be aggregated 149 result.Error = err 150 151 discoveredPackages <- result.Discovered 152 153 results <- result 154 } 155 }() 156 } 157 158 // dynamically show updated discovered package status 159 go func() { 160 for discovered := range discoveredPackages { 161 packagesDiscovered.Add(discovered) 162 } 163 }() 164 165 // Enqueue the jobs 166 for _, cataloger := range catalogers { 167 jobs <- cataloger 168 } 169 close(jobs) 170 171 // Wait for the jobs to finish 172 waitGroup.Wait() 173 close(results) 174 close(discoveredPackages) 175 176 // collect the results 177 for result := range results { 178 if result.Error != nil { 179 errs = multierror.Append(errs, result.Error) 180 } 181 for _, p := range result.Packages { 182 catalog.Add(p) 183 } 184 allRelationships = append(allRelationships, result.Relationships...) 185 } 186 187 allRelationships = append(allRelationships, pkg.NewRelationships(catalog)...) 188 189 return catalog, allRelationships, errs 190 } 191 192 func packageFileOwnershipRelationships(p pkg.Package, resolver file.PathResolver) ([]artifact.Relationship, error) { 193 fileOwner, ok := p.Metadata.(pkg.FileOwner) 194 if !ok { 195 return nil, nil 196 } 197 198 locations := map[artifact.ID]file.Location{} 199 200 for _, path := range fileOwner.OwnedFiles() { 201 pathRefs, err := resolver.FilesByPath(path) 202 if err != nil { 203 return nil, fmt.Errorf("unable to find path for path=%q: %w", path, err) 204 } 205 206 if len(pathRefs) == 0 { 207 // ideally we want to warn users about missing files from a package, however, it is very common for 208 // container image authors to delete files that are not needed in order to keep image sizes small. Adding 209 // a warning here would be needlessly noisy (even for popular base images). 210 continue 211 } 212 213 for _, ref := range pathRefs { 214 if oldRef, ok := locations[ref.Coordinates.ID()]; ok { 215 log.Debugf("found path duplicate of %s", oldRef.RealPath) 216 } 217 locations[ref.Coordinates.ID()] = ref 218 } 219 } 220 221 var relationships []artifact.Relationship 222 for _, location := range locations { 223 relationships = append(relationships, artifact.Relationship{ 224 From: p, 225 To: location.Coordinates, 226 Type: artifact.ContainsRelationship, 227 }) 228 } 229 return relationships, nil 230 }