zotregistry.io/zot@v1.4.4-0.20231124084042-02a8ed785457/pkg/extensions/search/cve/scan.go (about)

     1  package cveinfo
     2  
     3  import (
     4  	"context"
     5  	"sync"
     6  
     7  	"zotregistry.io/zot/pkg/log"
     8  	mTypes "zotregistry.io/zot/pkg/meta/types"
     9  	reqCtx "zotregistry.io/zot/pkg/requestcontext"
    10  	"zotregistry.io/zot/pkg/scheduler"
    11  )
    12  
    13  func NewScanTaskGenerator(
    14  	metaDB mTypes.MetaDB,
    15  	scanner Scanner,
    16  	log log.Logger,
    17  ) scheduler.TaskGenerator {
    18  	return &scanTaskGenerator{
    19  		log:        log,
    20  		metaDB:     metaDB,
    21  		scanner:    scanner,
    22  		lock:       &sync.Mutex{},
    23  		scanErrors: map[string]error{},
    24  		scheduled:  map[string]bool{},
    25  		done:       false,
    26  	}
    27  }
    28  
    29  // scanTaskGenerator takes all manifests from repodb and runs the CVE scanner on them.
    30  // If the scanner already has results cached for a specific manifests, or it cannot be
    31  // scanned, the manifest will be skipped.
    32  // If there are no manifests missing from the cache, the generator finishes.
    33  type scanTaskGenerator struct {
    34  	log        log.Logger
    35  	metaDB     mTypes.MetaDB
    36  	scanner    Scanner
    37  	lock       *sync.Mutex
    38  	scanErrors map[string]error
    39  	scheduled  map[string]bool
    40  	done       bool
    41  }
    42  
    43  func (gen *scanTaskGenerator) getMatcherFunc() mTypes.FilterFunc {
    44  	return func(repoMeta mTypes.RepoMeta, imageMeta mTypes.ImageMeta) bool {
    45  		// Note this matcher will return information based on scan status of manifests
    46  		// An index scan aggregates results of manifest scans
    47  		// If at least one of its manifests can be scanned,
    48  		// the index and its tag will be returned by the caller function too
    49  		repoName := repoMeta.Name
    50  		manifestDigest := imageMeta.Digest.String()
    51  
    52  		if gen.isScheduled(manifestDigest) {
    53  			// We skip this manifest as it has already scheduled
    54  			return false
    55  		}
    56  
    57  		if gen.hasError(manifestDigest) {
    58  			// We skip this manifest as it has already been scanned and errored
    59  			// This is to prevent the generator attempting to run a scan
    60  			// in a loop of the same image which would consistently fail
    61  			return false
    62  		}
    63  
    64  		if gen.scanner.IsResultCached(manifestDigest) {
    65  			// We skip this manifest, it was already scanned
    66  			return false
    67  		}
    68  
    69  		ok, err := gen.scanner.IsImageFormatScannable(repoName, manifestDigest)
    70  		if !ok || err != nil {
    71  			// We skip this manifest, we cannot scan it
    72  			return false
    73  		}
    74  
    75  		return true
    76  	}
    77  }
    78  
    79  func (gen *scanTaskGenerator) addError(digest string, err error) {
    80  	gen.lock.Lock()
    81  	defer gen.lock.Unlock()
    82  
    83  	gen.scanErrors[digest] = err
    84  }
    85  
    86  func (gen *scanTaskGenerator) hasError(digest string) bool {
    87  	gen.lock.Lock()
    88  	defer gen.lock.Unlock()
    89  
    90  	_, ok := gen.scanErrors[digest]
    91  
    92  	return ok
    93  }
    94  
    95  func (gen *scanTaskGenerator) setScheduled(digest string, isScheduled bool) {
    96  	gen.lock.Lock()
    97  	defer gen.lock.Unlock()
    98  
    99  	if _, ok := gen.scheduled[digest]; ok && !isScheduled {
   100  		delete(gen.scheduled, digest)
   101  	} else if isScheduled {
   102  		gen.scheduled[digest] = true
   103  	}
   104  }
   105  
   106  func (gen *scanTaskGenerator) isScheduled(digest string) bool {
   107  	gen.lock.Lock()
   108  	defer gen.lock.Unlock()
   109  
   110  	_, ok := gen.scheduled[digest]
   111  
   112  	return ok
   113  }
   114  
   115  func (gen *scanTaskGenerator) Next() (scheduler.Task, error) {
   116  	// metaRB requires us to use a context for authorization
   117  	userAc := reqCtx.NewUserAccessControl()
   118  	userAc.SetUsername("scheduler")
   119  	userAc.SetIsAdmin(true)
   120  	ctx := userAc.DeriveContext(context.Background())
   121  
   122  	// Obtain a list of repos with un-scanned scannable manifests
   123  	// We may implement a method to return just 1 match at some point
   124  	imageMeta, err := gen.metaDB.FilterTags(ctx, mTypes.AcceptAllRepoTag, gen.getMatcherFunc())
   125  	if err != nil {
   126  		// Do not crash the generator for potential metadb inconsistencies
   127  		// as there may be scannable images not yet scanned
   128  		gen.log.Warn().Err(err).Msg("Scheduled CVE scan: error while obtaining repo metadata")
   129  	}
   130  
   131  	// no imageMeta are returned, all results are in already in cache
   132  	// or manifests cannot be scanned
   133  	if len(imageMeta) == 0 {
   134  		gen.log.Info().Msg("Scheduled CVE scan: finished for available images")
   135  
   136  		gen.done = true
   137  
   138  		return nil, nil
   139  	}
   140  
   141  	// Since imageMeta will always contain just un-scanned images we can pick
   142  	// any image out of the resulting matches
   143  	digest := imageMeta[0].Digest.String()
   144  
   145  	// Mark the digest as scheduled so it is skipped on next generator run
   146  	gen.setScheduled(digest, true)
   147  
   148  	return newScanTask(gen, imageMeta[0].Repo, digest), nil
   149  }
   150  
   151  func (gen *scanTaskGenerator) IsDone() bool {
   152  	return gen.done
   153  }
   154  
   155  func (gen *scanTaskGenerator) IsReady() bool {
   156  	return true
   157  }
   158  
   159  func (gen *scanTaskGenerator) Reset() {
   160  	gen.lock.Lock()
   161  	defer gen.lock.Unlock()
   162  
   163  	gen.scheduled = map[string]bool{}
   164  	gen.scanErrors = map[string]error{}
   165  	gen.done = false
   166  }
   167  
   168  type scanTask struct {
   169  	generator *scanTaskGenerator
   170  	repo      string
   171  	digest    string
   172  }
   173  
   174  func newScanTask(generator *scanTaskGenerator, repo string, digest string) *scanTask {
   175  	return &scanTask{generator, repo, digest}
   176  }
   177  
   178  func (st *scanTask) DoWork(ctx context.Context) error {
   179  	// When work finished clean this entry from the generator
   180  	defer st.generator.setScheduled(st.digest, false)
   181  
   182  	image := st.repo + "@" + st.digest
   183  
   184  	// We cache the results internally in the scanner
   185  	// so we can discard the actual results for now
   186  	if _, err := st.generator.scanner.ScanImage(ctx, image); err != nil {
   187  		st.generator.log.Error().Err(err).Str("image", image).Msg("Scheduled CVE scan errored for image")
   188  		st.generator.addError(st.digest, err)
   189  
   190  		return err
   191  	}
   192  
   193  	st.generator.log.Debug().Str("image", image).Msg("Scheduled CVE scan completed successfully for image")
   194  
   195  	return nil
   196  }