zotregistry.dev/zot@v1.4.4-0.20240314164342-eec277e14d20/pkg/extensions/search/cve/scan.go (about)

     1  package cveinfo
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"sync"
     7  
     8  	"zotregistry.dev/zot/pkg/log"
     9  	mTypes "zotregistry.dev/zot/pkg/meta/types"
    10  	reqCtx "zotregistry.dev/zot/pkg/requestcontext"
    11  	"zotregistry.dev/zot/pkg/scheduler"
    12  )
    13  
    14  func NewScanTaskGenerator(
    15  	metaDB mTypes.MetaDB,
    16  	scanner Scanner,
    17  	logC log.Logger,
    18  ) scheduler.TaskGenerator {
    19  	sublogger := logC.With().Str("component", "cve").Logger()
    20  
    21  	return &scanTaskGenerator{
    22  		log:        log.Logger{Logger: sublogger},
    23  		metaDB:     metaDB,
    24  		scanner:    scanner,
    25  		lock:       &sync.Mutex{},
    26  		scanErrors: map[string]error{},
    27  		scheduled:  map[string]bool{},
    28  		done:       false,
    29  	}
    30  }
    31  
    32  // scanTaskGenerator takes all manifests from repodb and runs the CVE scanner on them.
    33  // If the scanner already has results cached for a specific manifests, or it cannot be
    34  // scanned, the manifest will be skipped.
    35  // If there are no manifests missing from the cache, the generator finishes.
    36  type scanTaskGenerator struct {
    37  	log        log.Logger
    38  	metaDB     mTypes.MetaDB
    39  	scanner    Scanner
    40  	lock       *sync.Mutex
    41  	scanErrors map[string]error
    42  	scheduled  map[string]bool
    43  	done       bool
    44  }
    45  
    46  func (gen *scanTaskGenerator) getMatcherFunc() mTypes.FilterFunc {
    47  	return func(repoMeta mTypes.RepoMeta, imageMeta mTypes.ImageMeta) bool {
    48  		// Note this matcher will return information based on scan status of manifests
    49  		// An index scan aggregates results of manifest scans
    50  		// If at least one of its manifests can be scanned,
    51  		// the index and its tag will be returned by the caller function too
    52  		repoName := repoMeta.Name
    53  		manifestDigest := imageMeta.Digest.String()
    54  
    55  		if gen.isScheduled(manifestDigest) {
    56  			// We skip this manifest as it has already scheduled
    57  			return false
    58  		}
    59  
    60  		if gen.hasError(manifestDigest) {
    61  			// We skip this manifest as it has already been scanned and errored
    62  			// This is to prevent the generator attempting to run a scan
    63  			// in a loop of the same image which would consistently fail
    64  			return false
    65  		}
    66  
    67  		if gen.scanner.IsResultCached(manifestDigest) {
    68  			// We skip this manifest, it was already scanned
    69  			return false
    70  		}
    71  
    72  		ok, err := gen.scanner.IsImageFormatScannable(repoName, manifestDigest)
    73  		if !ok || err != nil {
    74  			// We skip this manifest, we cannot scan it
    75  			return false
    76  		}
    77  
    78  		return true
    79  	}
    80  }
    81  
    82  func (gen *scanTaskGenerator) addError(digest string, err error) {
    83  	gen.lock.Lock()
    84  	defer gen.lock.Unlock()
    85  
    86  	gen.scanErrors[digest] = err
    87  }
    88  
    89  func (gen *scanTaskGenerator) hasError(digest string) bool {
    90  	gen.lock.Lock()
    91  	defer gen.lock.Unlock()
    92  
    93  	_, ok := gen.scanErrors[digest]
    94  
    95  	return ok
    96  }
    97  
    98  func (gen *scanTaskGenerator) setScheduled(digest string, isScheduled bool) {
    99  	gen.lock.Lock()
   100  	defer gen.lock.Unlock()
   101  
   102  	if _, ok := gen.scheduled[digest]; ok && !isScheduled {
   103  		delete(gen.scheduled, digest)
   104  	} else if isScheduled {
   105  		gen.scheduled[digest] = true
   106  	}
   107  }
   108  
   109  func (gen *scanTaskGenerator) isScheduled(digest string) bool {
   110  	gen.lock.Lock()
   111  	defer gen.lock.Unlock()
   112  
   113  	_, ok := gen.scheduled[digest]
   114  
   115  	return ok
   116  }
   117  
   118  func (gen *scanTaskGenerator) Name() string {
   119  	return "CVEScanGenerator"
   120  }
   121  
   122  func (gen *scanTaskGenerator) Next() (scheduler.Task, error) {
   123  	// metaRB requires us to use a context for authorization
   124  	userAc := reqCtx.NewUserAccessControl()
   125  	userAc.SetUsername("scheduler")
   126  	userAc.SetIsAdmin(true)
   127  	ctx := userAc.DeriveContext(context.Background())
   128  
   129  	// Obtain a list of repos with un-scanned scannable manifests
   130  	// We may implement a method to return just 1 match at some point
   131  	imageMeta, err := gen.metaDB.FilterTags(ctx, mTypes.AcceptAllRepoTag, gen.getMatcherFunc())
   132  	if err != nil {
   133  		// Do not crash the generator for potential metadb inconsistencies
   134  		// as there may be scannable images not yet scanned
   135  		gen.log.Warn().Err(err).Msg("failed to obtain repo metadata during scheduled cve scan")
   136  	}
   137  
   138  	// no imageMeta are returned, all results are in already in cache
   139  	// or manifests cannot be scanned
   140  	if len(imageMeta) == 0 {
   141  		gen.log.Info().Msg("finished scanning available images during scheduled cve scan")
   142  
   143  		gen.done = true
   144  
   145  		return nil, nil
   146  	}
   147  
   148  	// Since imageMeta will always contain just un-scanned images we can pick
   149  	// any image out of the resulting matches
   150  	digest := imageMeta[0].Digest.String()
   151  
   152  	// Mark the digest as scheduled so it is skipped on next generator run
   153  	gen.setScheduled(digest, true)
   154  
   155  	return newScanTask(gen, imageMeta[0].Repo, digest), nil
   156  }
   157  
   158  func (gen *scanTaskGenerator) IsDone() bool {
   159  	return gen.done
   160  }
   161  
   162  func (gen *scanTaskGenerator) IsReady() bool {
   163  	return true
   164  }
   165  
   166  func (gen *scanTaskGenerator) Reset() {
   167  	gen.lock.Lock()
   168  	defer gen.lock.Unlock()
   169  
   170  	gen.scheduled = map[string]bool{}
   171  	gen.scanErrors = map[string]error{}
   172  	gen.done = false
   173  }
   174  
   175  type scanTask struct {
   176  	generator *scanTaskGenerator
   177  	repo      string
   178  	digest    string
   179  }
   180  
   181  func newScanTask(generator *scanTaskGenerator, repo string, digest string) *scanTask {
   182  	return &scanTask{generator, repo, digest}
   183  }
   184  
   185  func (st *scanTask) DoWork(ctx context.Context) error {
   186  	// When work finished clean this entry from the generator
   187  	defer st.generator.setScheduled(st.digest, false)
   188  
   189  	image := st.repo + "@" + st.digest
   190  
   191  	// We cache the results internally in the scanner
   192  	// so we can discard the actual results for now
   193  	if _, err := st.generator.scanner.ScanImage(ctx, image); err != nil {
   194  		st.generator.log.Error().Err(err).Str("image", image).Msg("failed to perform scheduled cve scan for image")
   195  		st.generator.addError(st.digest, err)
   196  
   197  		return err
   198  	}
   199  
   200  	st.generator.log.Debug().Str("image", image).Msg("scheduled cve scan completed successfully for image")
   201  
   202  	return nil
   203  }
   204  
   205  func (st *scanTask) String() string {
   206  	return fmt.Sprintf("{Name: \"%s\", repo: \"%s\", digest: \"%s\"}",
   207  		st.Name(), st.repo, st.digest)
   208  }
   209  
   210  func (st *scanTask) Name() string {
   211  	return "ScanTask"
   212  }