github.com/quay/claircore@v1.5.28/indexer/layerscanner.go (about)

     1  package indexer
     2  
     3  import (
     4  	"context"
     5  	"errors"
     6  	"fmt"
     7  	"net"
     8  	"runtime"
     9  
    10  	"github.com/quay/zlog"
    11  	"golang.org/x/sync/errgroup"
    12  
    13  	"github.com/quay/claircore"
    14  )
    15  
    16  type LayerScanner struct {
    17  	store Store
    18  
    19  	// Maximum allowed in-flight scanners per Scan call
    20  	inflight int
    21  
    22  	// Pre-constructed and configured scanners.
    23  	ps  []PackageScanner
    24  	ds  []DistributionScanner
    25  	rs  []RepositoryScanner
    26  	fis []FileScanner
    27  }
    28  
    29  // NewLayerScanner is the constructor for a LayerScanner.
    30  //
    31  // The provided Context is only used for the duration of the call.
    32  func NewLayerScanner(ctx context.Context, concurrent int, opts *Options) (*LayerScanner, error) {
    33  	ctx = zlog.ContextWithValues(ctx, "component", "indexer.NewLayerScanner")
    34  	zlog.Info(ctx).Msg("NewLayerScanner: constructing a new layer-scanner")
    35  	switch {
    36  	case concurrent < 1:
    37  		zlog.Warn(ctx).
    38  			Int("value", concurrent).
    39  			Msg("rectifying nonsense 'concurrent' argument")
    40  		fallthrough
    41  	case concurrent == 0:
    42  		concurrent = runtime.GOMAXPROCS(0)
    43  	}
    44  
    45  	ps, ds, rs, fs, err := EcosystemsToScanners(ctx, opts.Ecosystems)
    46  	if err != nil {
    47  		return nil, fmt.Errorf("failed to extract scanners from ecosystems: %v", err)
    48  	}
    49  
    50  	return &LayerScanner{
    51  		store:    opts.Store,
    52  		inflight: concurrent,
    53  		ps:       configAndFilter(ctx, opts, ps),
    54  		ds:       configAndFilter(ctx, opts, ds),
    55  		rs:       configAndFilter(ctx, opts, rs),
    56  		fis:      configAndFilter(ctx, opts, fs),
    57  	}, nil
    58  }
    59  
    60  func configAndFilter[S VersionedScanner](ctx context.Context, opts *Options, ss []S) []S {
    61  	i := 0
    62  	for _, s := range ss {
    63  		n := s.Name()
    64  		var cfgMap map[string]func(interface{}) error
    65  		switch k := s.Kind(); k {
    66  		case "package":
    67  			cfgMap = opts.ScannerConfig.Package
    68  		case "repository":
    69  			cfgMap = opts.ScannerConfig.Repo
    70  		case "distribution":
    71  			cfgMap = opts.ScannerConfig.Dist
    72  		case "file":
    73  			cfgMap = opts.ScannerConfig.File
    74  		default:
    75  			zlog.Warn(ctx).
    76  				Str("kind", k).
    77  				Str("scanner", n).
    78  				Msg("unknown scanner kind")
    79  			continue
    80  		}
    81  
    82  		f, haveCfg := cfgMap[n]
    83  		if !haveCfg {
    84  			f = func(interface{}) error { return nil }
    85  		}
    86  		cs, csOK := interface{}(s).(ConfigurableScanner)
    87  		rs, rsOK := interface{}(s).(RPCScanner)
    88  		switch {
    89  		case haveCfg && !csOK && !rsOK:
    90  			zlog.Warn(ctx).
    91  				Str("scanner", n).
    92  				Msg("configuration present for an unconfigurable scanner, skipping")
    93  		case csOK && rsOK:
    94  			fallthrough
    95  		case !csOK && rsOK:
    96  			if err := rs.Configure(ctx, f, opts.Client); err != nil {
    97  				zlog.Error(ctx).
    98  					Str("scanner", n).
    99  					Err(err).
   100  					Msg("configuration failed")
   101  				continue
   102  			}
   103  		case csOK && !rsOK:
   104  			if err := cs.Configure(ctx, f); err != nil {
   105  				zlog.Error(ctx).
   106  					Str("scanner", n).
   107  					Err(err).
   108  					Msg("configuration failed")
   109  				continue
   110  			}
   111  		}
   112  		ss[i] = s
   113  		i++
   114  	}
   115  	ss = ss[:i]
   116  	return ss
   117  }
   118  
   119  // Scan performs a concurrency controlled scan of each layer by each configured
   120  // scanner, indexing the results on successful completion.
   121  //
   122  // Scan will launch all layer scan goroutines immediately and then only allow
   123  // the configured limit to proceed.
   124  //
   125  // The provided Context controls cancellation for all scanners. The first error
   126  // reported halts all work and is returned from Scan.
   127  func (ls *LayerScanner) Scan(ctx context.Context, manifest claircore.Digest, layers []*claircore.Layer) error {
   128  	ctx = zlog.ContextWithValues(ctx,
   129  		"component", "indexer/LayerScanner.Scan",
   130  		"manifest", manifest.String())
   131  
   132  	g, ctx := errgroup.WithContext(ctx)
   133  	// Using the goroutine's built-in limit is worst-case the same as using an
   134  	// external semaphore (spawn N goroutines and immediately wait on M of them,
   135  	// waits cancelling when the first error is returned) but putting the
   136  	// Context check in the "Layers" loop means we only spawn max 3 extra goroutines
   137  	// that will immediately return.
   138  	g.SetLimit(ls.inflight)
   139  	// Launch is a closure to capture the loop variables and then call the
   140  	// scanLayer method.
   141  	launch := func(l *claircore.Layer, s VersionedScanner) func() error {
   142  		return func() error {
   143  			select {
   144  			case <-ctx.Done():
   145  				return context.Cause(ctx)
   146  			default:
   147  			}
   148  			if err := ls.scanLayer(ctx, l, s); err != nil {
   149  				return fmt.Errorf("layer %q: %w", l.Hash, err)
   150  			}
   151  			return nil
   152  		}
   153  	}
   154  	dedupe := make(map[string]struct{})
   155  Layers:
   156  	for _, l := range layers {
   157  		select {
   158  		case <-ctx.Done():
   159  			break Layers
   160  		default:
   161  		}
   162  		if _, ok := dedupe[l.Hash.String()]; ok {
   163  			continue
   164  		}
   165  		dedupe[l.Hash.String()] = struct{}{}
   166  		for _, s := range ls.ps {
   167  			g.Go(launch(l, s))
   168  		}
   169  		for _, s := range ls.ds {
   170  			g.Go(launch(l, s))
   171  		}
   172  		for _, s := range ls.rs {
   173  			g.Go(launch(l, s))
   174  		}
   175  		for _, s := range ls.fis {
   176  			g.Go(launch(l, s))
   177  		}
   178  	}
   179  
   180  	return g.Wait()
   181  }
   182  
   183  // ScanLayer (along with the result type) handles an individual (scanner, layer)
   184  // pair.
   185  func (ls *LayerScanner) scanLayer(ctx context.Context, l *claircore.Layer, s VersionedScanner) error {
   186  	ctx = zlog.ContextWithValues(ctx,
   187  		"component", "indexer/LayerScanner.scanLayer",
   188  		"scanner", s.Name(),
   189  		"kind", s.Kind(),
   190  		"layer", l.Hash.String())
   191  	zlog.Debug(ctx).Msg("scan start")
   192  	defer zlog.Debug(ctx).Msg("scan done")
   193  
   194  	ok, err := ls.store.LayerScanned(ctx, l.Hash, s)
   195  	if err != nil {
   196  		return err
   197  	}
   198  	if ok {
   199  		zlog.Debug(ctx).Msg("layer already scanned")
   200  		return nil
   201  	}
   202  
   203  	var result result
   204  	if err := result.Do(ctx, s, l); err != nil {
   205  		return err
   206  	}
   207  
   208  	if err = result.Store(ctx, ls.store, s, l); err != nil {
   209  		return err
   210  	}
   211  
   212  	if err = ls.store.SetLayerScanned(ctx, l.Hash, s); err != nil {
   213  		return fmt.Errorf("could not set layer scanned: %w", err)
   214  	}
   215  
   216  	return nil
   217  }
   218  
   219  // Result is a type that handles the kind-specific bits of the scan process.
   220  type result struct {
   221  	pkgs  []*claircore.Package
   222  	dists []*claircore.Distribution
   223  	repos []*claircore.Repository
   224  	files []claircore.File
   225  }
   226  
   227  // Do asserts the Scanner back to having a Scan method, and then calls it.
   228  //
   229  // The success value is captured and the error value is returned by Do.
   230  func (r *result) Do(ctx context.Context, s VersionedScanner, l *claircore.Layer) error {
   231  	var err error
   232  	switch s := s.(type) {
   233  	case PackageScanner:
   234  		r.pkgs, err = s.Scan(ctx, l)
   235  		if sc, ok := s.(DefaultRepoScanner); ok {
   236  			if len(r.pkgs) > 0 {
   237  				r.repos = append(r.repos, sc.DefaultRepository(ctx))
   238  			}
   239  		}
   240  	case DistributionScanner:
   241  		r.dists, err = s.Scan(ctx, l)
   242  	case RepositoryScanner:
   243  		r.repos, err = s.Scan(ctx, l)
   244  	case FileScanner:
   245  		r.files, err = s.Scan(ctx, l)
   246  	default:
   247  		panic(fmt.Sprintf("programmer error: unknown type %T used as scanner", s))
   248  	}
   249  
   250  	var addrErr *net.AddrError
   251  	switch {
   252  	case errors.Is(err, nil):
   253  	case errors.As(err, &addrErr):
   254  		zlog.Warn(ctx).Str("scanner", s.Name()).Err(err).Msg("scanner not able to access resources")
   255  		return nil
   256  	default:
   257  		zlog.Info(ctx).Err(err).Send()
   258  	}
   259  
   260  	return err
   261  }
   262  
   263  // Store calls the properly typed store method on whatever value was captured in
   264  // the result.
   265  func (r *result) Store(ctx context.Context, store Store, s VersionedScanner, l *claircore.Layer) error {
   266  	if r.pkgs != nil {
   267  		zlog.Debug(ctx).Int("count", len(r.pkgs)).Msg("scan returned packages")
   268  		if err := store.IndexPackages(ctx, r.pkgs, l, s); err != nil {
   269  			return err
   270  		}
   271  	}
   272  	if r.dists != nil {
   273  		zlog.Debug(ctx).Int("count", len(r.dists)).Msg("scan returned dists")
   274  		if err := store.IndexDistributions(ctx, r.dists, l, s); err != nil {
   275  			return err
   276  		}
   277  	}
   278  	if r.repos != nil {
   279  		zlog.Debug(ctx).Int("count", len(r.repos)).Msg("scan returned repos")
   280  		if err := store.IndexRepositories(ctx, r.repos, l, s); err != nil {
   281  			return err
   282  		}
   283  	}
   284  	if r.files != nil {
   285  		zlog.Debug(ctx).Int("count", len(r.files)).Msg("scan returned files")
   286  		if err := store.IndexFiles(ctx, r.files, l, s); err != nil {
   287  			return err
   288  		}
   289  	}
   290  	return nil
   291  }