github.com/quay/claircore@v1.5.28/indexer/layerscanner.go (about) 1 package indexer 2 3 import ( 4 "context" 5 "errors" 6 "fmt" 7 "net" 8 "runtime" 9 10 "github.com/quay/zlog" 11 "golang.org/x/sync/errgroup" 12 13 "github.com/quay/claircore" 14 ) 15 16 type LayerScanner struct { 17 store Store 18 19 // Maximum allowed in-flight scanners per Scan call 20 inflight int 21 22 // Pre-constructed and configured scanners. 23 ps []PackageScanner 24 ds []DistributionScanner 25 rs []RepositoryScanner 26 fis []FileScanner 27 } 28 29 // NewLayerScanner is the constructor for a LayerScanner. 30 // 31 // The provided Context is only used for the duration of the call. 32 func NewLayerScanner(ctx context.Context, concurrent int, opts *Options) (*LayerScanner, error) { 33 ctx = zlog.ContextWithValues(ctx, "component", "indexer.NewLayerScanner") 34 zlog.Info(ctx).Msg("NewLayerScanner: constructing a new layer-scanner") 35 switch { 36 case concurrent < 1: 37 zlog.Warn(ctx). 38 Int("value", concurrent). 39 Msg("rectifying nonsense 'concurrent' argument") 40 fallthrough 41 case concurrent == 0: 42 concurrent = runtime.GOMAXPROCS(0) 43 } 44 45 ps, ds, rs, fs, err := EcosystemsToScanners(ctx, opts.Ecosystems) 46 if err != nil { 47 return nil, fmt.Errorf("failed to extract scanners from ecosystems: %v", err) 48 } 49 50 return &LayerScanner{ 51 store: opts.Store, 52 inflight: concurrent, 53 ps: configAndFilter(ctx, opts, ps), 54 ds: configAndFilter(ctx, opts, ds), 55 rs: configAndFilter(ctx, opts, rs), 56 fis: configAndFilter(ctx, opts, fs), 57 }, nil 58 } 59 60 func configAndFilter[S VersionedScanner](ctx context.Context, opts *Options, ss []S) []S { 61 i := 0 62 for _, s := range ss { 63 n := s.Name() 64 var cfgMap map[string]func(interface{}) error 65 switch k := s.Kind(); k { 66 case "package": 67 cfgMap = opts.ScannerConfig.Package 68 case "repository": 69 cfgMap = opts.ScannerConfig.Repo 70 case "distribution": 71 cfgMap = opts.ScannerConfig.Dist 72 case "file": 73 cfgMap = opts.ScannerConfig.File 74 default: 75 zlog.Warn(ctx). 76 Str("kind", k). 77 Str("scanner", n). 78 Msg("unknown scanner kind") 79 continue 80 } 81 82 f, haveCfg := cfgMap[n] 83 if !haveCfg { 84 f = func(interface{}) error { return nil } 85 } 86 cs, csOK := interface{}(s).(ConfigurableScanner) 87 rs, rsOK := interface{}(s).(RPCScanner) 88 switch { 89 case haveCfg && !csOK && !rsOK: 90 zlog.Warn(ctx). 91 Str("scanner", n). 92 Msg("configuration present for an unconfigurable scanner, skipping") 93 case csOK && rsOK: 94 fallthrough 95 case !csOK && rsOK: 96 if err := rs.Configure(ctx, f, opts.Client); err != nil { 97 zlog.Error(ctx). 98 Str("scanner", n). 99 Err(err). 100 Msg("configuration failed") 101 continue 102 } 103 case csOK && !rsOK: 104 if err := cs.Configure(ctx, f); err != nil { 105 zlog.Error(ctx). 106 Str("scanner", n). 107 Err(err). 108 Msg("configuration failed") 109 continue 110 } 111 } 112 ss[i] = s 113 i++ 114 } 115 ss = ss[:i] 116 return ss 117 } 118 119 // Scan performs a concurrency controlled scan of each layer by each configured 120 // scanner, indexing the results on successful completion. 121 // 122 // Scan will launch all layer scan goroutines immediately and then only allow 123 // the configured limit to proceed. 124 // 125 // The provided Context controls cancellation for all scanners. The first error 126 // reported halts all work and is returned from Scan. 127 func (ls *LayerScanner) Scan(ctx context.Context, manifest claircore.Digest, layers []*claircore.Layer) error { 128 ctx = zlog.ContextWithValues(ctx, 129 "component", "indexer/LayerScanner.Scan", 130 "manifest", manifest.String()) 131 132 g, ctx := errgroup.WithContext(ctx) 133 // Using the goroutine's built-in limit is worst-case the same as using an 134 // external semaphore (spawn N goroutines and immediately wait on M of them, 135 // waits cancelling when the first error is returned) but putting the 136 // Context check in the "Layers" loop means we only spawn max 3 extra goroutines 137 // that will immediately return. 138 g.SetLimit(ls.inflight) 139 // Launch is a closure to capture the loop variables and then call the 140 // scanLayer method. 141 launch := func(l *claircore.Layer, s VersionedScanner) func() error { 142 return func() error { 143 select { 144 case <-ctx.Done(): 145 return context.Cause(ctx) 146 default: 147 } 148 if err := ls.scanLayer(ctx, l, s); err != nil { 149 return fmt.Errorf("layer %q: %w", l.Hash, err) 150 } 151 return nil 152 } 153 } 154 dedupe := make(map[string]struct{}) 155 Layers: 156 for _, l := range layers { 157 select { 158 case <-ctx.Done(): 159 break Layers 160 default: 161 } 162 if _, ok := dedupe[l.Hash.String()]; ok { 163 continue 164 } 165 dedupe[l.Hash.String()] = struct{}{} 166 for _, s := range ls.ps { 167 g.Go(launch(l, s)) 168 } 169 for _, s := range ls.ds { 170 g.Go(launch(l, s)) 171 } 172 for _, s := range ls.rs { 173 g.Go(launch(l, s)) 174 } 175 for _, s := range ls.fis { 176 g.Go(launch(l, s)) 177 } 178 } 179 180 return g.Wait() 181 } 182 183 // ScanLayer (along with the result type) handles an individual (scanner, layer) 184 // pair. 185 func (ls *LayerScanner) scanLayer(ctx context.Context, l *claircore.Layer, s VersionedScanner) error { 186 ctx = zlog.ContextWithValues(ctx, 187 "component", "indexer/LayerScanner.scanLayer", 188 "scanner", s.Name(), 189 "kind", s.Kind(), 190 "layer", l.Hash.String()) 191 zlog.Debug(ctx).Msg("scan start") 192 defer zlog.Debug(ctx).Msg("scan done") 193 194 ok, err := ls.store.LayerScanned(ctx, l.Hash, s) 195 if err != nil { 196 return err 197 } 198 if ok { 199 zlog.Debug(ctx).Msg("layer already scanned") 200 return nil 201 } 202 203 var result result 204 if err := result.Do(ctx, s, l); err != nil { 205 return err 206 } 207 208 if err = result.Store(ctx, ls.store, s, l); err != nil { 209 return err 210 } 211 212 if err = ls.store.SetLayerScanned(ctx, l.Hash, s); err != nil { 213 return fmt.Errorf("could not set layer scanned: %w", err) 214 } 215 216 return nil 217 } 218 219 // Result is a type that handles the kind-specific bits of the scan process. 220 type result struct { 221 pkgs []*claircore.Package 222 dists []*claircore.Distribution 223 repos []*claircore.Repository 224 files []claircore.File 225 } 226 227 // Do asserts the Scanner back to having a Scan method, and then calls it. 228 // 229 // The success value is captured and the error value is returned by Do. 230 func (r *result) Do(ctx context.Context, s VersionedScanner, l *claircore.Layer) error { 231 var err error 232 switch s := s.(type) { 233 case PackageScanner: 234 r.pkgs, err = s.Scan(ctx, l) 235 if sc, ok := s.(DefaultRepoScanner); ok { 236 if len(r.pkgs) > 0 { 237 r.repos = append(r.repos, sc.DefaultRepository(ctx)) 238 } 239 } 240 case DistributionScanner: 241 r.dists, err = s.Scan(ctx, l) 242 case RepositoryScanner: 243 r.repos, err = s.Scan(ctx, l) 244 case FileScanner: 245 r.files, err = s.Scan(ctx, l) 246 default: 247 panic(fmt.Sprintf("programmer error: unknown type %T used as scanner", s)) 248 } 249 250 var addrErr *net.AddrError 251 switch { 252 case errors.Is(err, nil): 253 case errors.As(err, &addrErr): 254 zlog.Warn(ctx).Str("scanner", s.Name()).Err(err).Msg("scanner not able to access resources") 255 return nil 256 default: 257 zlog.Info(ctx).Err(err).Send() 258 } 259 260 return err 261 } 262 263 // Store calls the properly typed store method on whatever value was captured in 264 // the result. 265 func (r *result) Store(ctx context.Context, store Store, s VersionedScanner, l *claircore.Layer) error { 266 if r.pkgs != nil { 267 zlog.Debug(ctx).Int("count", len(r.pkgs)).Msg("scan returned packages") 268 if err := store.IndexPackages(ctx, r.pkgs, l, s); err != nil { 269 return err 270 } 271 } 272 if r.dists != nil { 273 zlog.Debug(ctx).Int("count", len(r.dists)).Msg("scan returned dists") 274 if err := store.IndexDistributions(ctx, r.dists, l, s); err != nil { 275 return err 276 } 277 } 278 if r.repos != nil { 279 zlog.Debug(ctx).Int("count", len(r.repos)).Msg("scan returned repos") 280 if err := store.IndexRepositories(ctx, r.repos, l, s); err != nil { 281 return err 282 } 283 } 284 if r.files != nil { 285 zlog.Debug(ctx).Int("count", len(r.files)).Msg("scan returned files") 286 if err := store.IndexFiles(ctx, r.files, l, s); err != nil { 287 return err 288 } 289 } 290 return nil 291 }