github.com/lineaje-labs/syft@v0.98.1-0.20231227153149-9e393f60ff1b/syft/pkg/cataloger/catalog.go (about) 1 package cataloger 2 3 import ( 4 "fmt" 5 "math" 6 "runtime/debug" 7 "sync" 8 9 "github.com/hashicorp/go-multierror" 10 "github.com/wagoodman/go-partybus" 11 "github.com/wagoodman/go-progress" 12 13 "github.com/anchore/syft/syft/artifact" 14 "github.com/anchore/syft/syft/event" 15 "github.com/anchore/syft/syft/file" 16 "github.com/anchore/syft/syft/linux" 17 "github.com/anchore/syft/syft/pkg" 18 "github.com/anchore/syft/syft/pkg/cataloger/common/cpe" 19 "github.com/lineaje-labs/syft/internal/bus" 20 "github.com/lineaje-labs/syft/internal/log" 21 ) 22 23 // Monitor provides progress-related data for observing the progress of a Catalog() call (published on the event bus). 24 type Monitor struct { 25 FilesProcessed progress.Monitorable // the number of files selected and contents analyzed from all registered catalogers 26 PackagesDiscovered progress.Monitorable // the number of packages discovered from all registered catalogers 27 } 28 29 // catalogResult provides the result of running a single cataloger against source 30 type catalogResult struct { 31 Packages []pkg.Package 32 Relationships []artifact.Relationship 33 // Discovered may sometimes be more than len(packages) 34 Discovered int64 35 Error error 36 } 37 38 // newMonitor creates a new Monitor object and publishes the object on the bus as a PackageCatalogerStarted event. 39 func newMonitor() (*progress.Manual, *progress.Manual) { 40 filesProcessed := progress.Manual{} 41 packagesDiscovered := progress.Manual{} 42 43 bus.Publish(partybus.Event{ 44 Type: event.PackageCatalogerStarted, 45 Value: Monitor{ 46 FilesProcessed: progress.Monitorable(&filesProcessed), 47 PackagesDiscovered: progress.Monitorable(&packagesDiscovered), 48 }, 49 }) 50 return &filesProcessed, &packagesDiscovered 51 } 52 53 func runCataloger(cataloger pkg.Cataloger, resolver file.Resolver) (catalogerResult *catalogResult, err error) { 54 // handle individual cataloger panics 55 defer func() { 56 if e := recover(); e != nil { 57 err = fmt.Errorf("%v at:\n%s", e, string(debug.Stack())) 58 } 59 }() 60 61 catalogerResult = new(catalogResult) 62 63 // find packages from the underlying raw data 64 log.WithFields("cataloger", cataloger.Name()).Trace("cataloging started") 65 packages, relationships, err := cataloger.Catalog(resolver) 66 if err != nil { 67 log.WithFields("cataloger", cataloger.Name()).Warn("error while cataloging") 68 return catalogerResult, err 69 } 70 71 catalogedPackages := len(packages) 72 73 log.WithFields("cataloger", cataloger.Name()).Debugf("discovered %d packages", catalogedPackages) 74 catalogerResult.Discovered = int64(catalogedPackages) 75 76 for _, p := range packages { 77 // generate CPEs (note: this is excluded from package ID, so is safe to mutate) 78 // we might have binary classified CPE already with the package so we want to append here 79 80 dictionaryCPE, ok := cpe.DictionaryFind(p) 81 if ok { 82 log.Debugf("used CPE dictionary to find CPE for %s package %q: %s", p.Type, p.Name, dictionaryCPE.BindToFmtString()) 83 p.CPEs = append(p.CPEs, dictionaryCPE) 84 } else { 85 p.CPEs = append(p.CPEs, cpe.Generate(p)...) 86 } 87 88 // if we were not able to identify the language we have an opportunity 89 // to try and get this value from the PURL. Worst case we assert that 90 // we could not identify the language at either stage and set UnknownLanguage 91 if p.Language == "" { 92 p.Language = pkg.LanguageFromPURL(p.PURL) 93 } 94 95 // create file-to-package relationships for files owned by the package 96 owningRelationships, err := packageFileOwnershipRelationships(p, resolver) 97 if err != nil { 98 log.WithFields("cataloger", cataloger.Name(), "package", p.Name, "error", err).Warnf("unable to create any package-file relationships") 99 } else { 100 catalogerResult.Relationships = append(catalogerResult.Relationships, owningRelationships...) 101 } 102 catalogerResult.Packages = append(catalogerResult.Packages, p) 103 } 104 catalogerResult.Relationships = append(catalogerResult.Relationships, relationships...) 105 log.WithFields("cataloger", cataloger.Name()).Trace("cataloging complete") 106 return catalogerResult, err 107 } 108 109 // Catalog a given source (container image or filesystem) with the given catalogers, returning all discovered packages. 110 // In order to efficiently retrieve contents from a underlying container image the content fetch requests are 111 // done in bulk. Specifically, all files of interest are collected from each catalogers and accumulated into a single 112 // request. 113 // 114 //nolint:funlen 115 func Catalog( 116 resolver file.Resolver, _ *linux.Release, parallelism int, catalogers ...pkg.Cataloger, 117 ) (*pkg.Collection, []artifact.Relationship, error) { 118 catalog := pkg.NewCollection() 119 var allRelationships []artifact.Relationship 120 121 filesProcessed, packagesDiscovered := newMonitor() 122 defer filesProcessed.SetCompleted() 123 defer packagesDiscovered.SetCompleted() 124 125 // perform analysis, accumulating errors for each failed analysis 126 var errs error 127 128 nCatalogers := len(catalogers) 129 130 // we do not need more parallelism than there are `catalogers`. 131 parallelism = int(math.Min(float64(nCatalogers), math.Max(1.0, float64(parallelism)))) 132 log.WithFields("parallelism", parallelism, "catalogers", nCatalogers).Debug("cataloging packages") 133 134 jobs := make(chan pkg.Cataloger, nCatalogers) 135 results := make(chan *catalogResult, nCatalogers) 136 discoveredPackages := make(chan int64, nCatalogers) 137 138 waitGroup := sync.WaitGroup{} 139 140 for i := 0; i < parallelism; i++ { 141 waitGroup.Add(1) 142 143 go func() { 144 defer waitGroup.Done() 145 146 // wait for / get the next cataloger job available. 147 for cataloger := range jobs { 148 result, err := runCataloger(cataloger, resolver) 149 150 // ensure we set the error to be aggregated 151 result.Error = err 152 153 discoveredPackages <- result.Discovered 154 155 results <- result 156 } 157 }() 158 } 159 160 // dynamically show updated discovered package status 161 go func() { 162 for discovered := range discoveredPackages { 163 packagesDiscovered.Add(discovered) 164 } 165 }() 166 167 // Enqueue the jobs 168 for _, cataloger := range catalogers { 169 jobs <- cataloger 170 } 171 close(jobs) 172 173 // Wait for the jobs to finish 174 waitGroup.Wait() 175 close(results) 176 close(discoveredPackages) 177 178 // collect the results 179 for result := range results { 180 if result.Error != nil { 181 errs = multierror.Append(errs, result.Error) 182 } 183 for _, p := range result.Packages { 184 catalog.Add(p) 185 } 186 allRelationships = append(allRelationships, result.Relationships...) 187 } 188 189 allRelationships = append(allRelationships, pkg.NewRelationships(catalog)...) 190 191 return catalog, allRelationships, errs 192 } 193 194 func packageFileOwnershipRelationships(p pkg.Package, resolver file.PathResolver) ([]artifact.Relationship, error) { 195 fileOwner, ok := p.Metadata.(pkg.FileOwner) 196 if !ok { 197 return nil, nil 198 } 199 200 locations := map[artifact.ID]file.Location{} 201 202 for _, path := range fileOwner.OwnedFiles() { 203 pathRefs, err := resolver.FilesByPath(path) 204 if err != nil { 205 return nil, fmt.Errorf("unable to find path for path=%q: %w", path, err) 206 } 207 208 if len(pathRefs) == 0 { 209 // ideally we want to warn users about missing files from a package, however, it is very common for 210 // container image authors to delete files that are not needed in order to keep image sizes small. Adding 211 // a warning here would be needlessly noisy (even for popular base images). 212 continue 213 } 214 215 for _, ref := range pathRefs { 216 if oldRef, ok := locations[ref.Coordinates.ID()]; ok { 217 log.Debugf("found path duplicate of %s", oldRef.RealPath) 218 } 219 locations[ref.Coordinates.ID()] = ref 220 } 221 } 222 223 var relationships []artifact.Relationship 224 for _, location := range locations { 225 relationships = append(relationships, artifact.Relationship{ 226 From: p, 227 To: location.Coordinates, 228 Type: artifact.ContainsRelationship, 229 }) 230 } 231 return relationships, nil 232 }