zotregistry.io/zot@v1.4.4-0.20231124084042-02a8ed785457/pkg/extensions/search/cve/scan.go (about) 1 package cveinfo 2 3 import ( 4 "context" 5 "sync" 6 7 "zotregistry.io/zot/pkg/log" 8 mTypes "zotregistry.io/zot/pkg/meta/types" 9 reqCtx "zotregistry.io/zot/pkg/requestcontext" 10 "zotregistry.io/zot/pkg/scheduler" 11 ) 12 13 func NewScanTaskGenerator( 14 metaDB mTypes.MetaDB, 15 scanner Scanner, 16 log log.Logger, 17 ) scheduler.TaskGenerator { 18 return &scanTaskGenerator{ 19 log: log, 20 metaDB: metaDB, 21 scanner: scanner, 22 lock: &sync.Mutex{}, 23 scanErrors: map[string]error{}, 24 scheduled: map[string]bool{}, 25 done: false, 26 } 27 } 28 29 // scanTaskGenerator takes all manifests from repodb and runs the CVE scanner on them. 30 // If the scanner already has results cached for a specific manifests, or it cannot be 31 // scanned, the manifest will be skipped. 32 // If there are no manifests missing from the cache, the generator finishes. 33 type scanTaskGenerator struct { 34 log log.Logger 35 metaDB mTypes.MetaDB 36 scanner Scanner 37 lock *sync.Mutex 38 scanErrors map[string]error 39 scheduled map[string]bool 40 done bool 41 } 42 43 func (gen *scanTaskGenerator) getMatcherFunc() mTypes.FilterFunc { 44 return func(repoMeta mTypes.RepoMeta, imageMeta mTypes.ImageMeta) bool { 45 // Note this matcher will return information based on scan status of manifests 46 // An index scan aggregates results of manifest scans 47 // If at least one of its manifests can be scanned, 48 // the index and its tag will be returned by the caller function too 49 repoName := repoMeta.Name 50 manifestDigest := imageMeta.Digest.String() 51 52 if gen.isScheduled(manifestDigest) { 53 // We skip this manifest as it has already scheduled 54 return false 55 } 56 57 if gen.hasError(manifestDigest) { 58 // We skip this manifest as it has already been scanned and errored 59 // This is to prevent the generator attempting to run a scan 60 // in a loop of the same image which would consistently fail 61 return false 62 } 63 64 if gen.scanner.IsResultCached(manifestDigest) { 65 // We skip this manifest, it was already scanned 66 return false 67 } 68 69 ok, err := gen.scanner.IsImageFormatScannable(repoName, manifestDigest) 70 if !ok || err != nil { 71 // We skip this manifest, we cannot scan it 72 return false 73 } 74 75 return true 76 } 77 } 78 79 func (gen *scanTaskGenerator) addError(digest string, err error) { 80 gen.lock.Lock() 81 defer gen.lock.Unlock() 82 83 gen.scanErrors[digest] = err 84 } 85 86 func (gen *scanTaskGenerator) hasError(digest string) bool { 87 gen.lock.Lock() 88 defer gen.lock.Unlock() 89 90 _, ok := gen.scanErrors[digest] 91 92 return ok 93 } 94 95 func (gen *scanTaskGenerator) setScheduled(digest string, isScheduled bool) { 96 gen.lock.Lock() 97 defer gen.lock.Unlock() 98 99 if _, ok := gen.scheduled[digest]; ok && !isScheduled { 100 delete(gen.scheduled, digest) 101 } else if isScheduled { 102 gen.scheduled[digest] = true 103 } 104 } 105 106 func (gen *scanTaskGenerator) isScheduled(digest string) bool { 107 gen.lock.Lock() 108 defer gen.lock.Unlock() 109 110 _, ok := gen.scheduled[digest] 111 112 return ok 113 } 114 115 func (gen *scanTaskGenerator) Next() (scheduler.Task, error) { 116 // metaRB requires us to use a context for authorization 117 userAc := reqCtx.NewUserAccessControl() 118 userAc.SetUsername("scheduler") 119 userAc.SetIsAdmin(true) 120 ctx := userAc.DeriveContext(context.Background()) 121 122 // Obtain a list of repos with un-scanned scannable manifests 123 // We may implement a method to return just 1 match at some point 124 imageMeta, err := gen.metaDB.FilterTags(ctx, mTypes.AcceptAllRepoTag, gen.getMatcherFunc()) 125 if err != nil { 126 // Do not crash the generator for potential metadb inconsistencies 127 // as there may be scannable images not yet scanned 128 gen.log.Warn().Err(err).Msg("Scheduled CVE scan: error while obtaining repo metadata") 129 } 130 131 // no imageMeta are returned, all results are in already in cache 132 // or manifests cannot be scanned 133 if len(imageMeta) == 0 { 134 gen.log.Info().Msg("Scheduled CVE scan: finished for available images") 135 136 gen.done = true 137 138 return nil, nil 139 } 140 141 // Since imageMeta will always contain just un-scanned images we can pick 142 // any image out of the resulting matches 143 digest := imageMeta[0].Digest.String() 144 145 // Mark the digest as scheduled so it is skipped on next generator run 146 gen.setScheduled(digest, true) 147 148 return newScanTask(gen, imageMeta[0].Repo, digest), nil 149 } 150 151 func (gen *scanTaskGenerator) IsDone() bool { 152 return gen.done 153 } 154 155 func (gen *scanTaskGenerator) IsReady() bool { 156 return true 157 } 158 159 func (gen *scanTaskGenerator) Reset() { 160 gen.lock.Lock() 161 defer gen.lock.Unlock() 162 163 gen.scheduled = map[string]bool{} 164 gen.scanErrors = map[string]error{} 165 gen.done = false 166 } 167 168 type scanTask struct { 169 generator *scanTaskGenerator 170 repo string 171 digest string 172 } 173 174 func newScanTask(generator *scanTaskGenerator, repo string, digest string) *scanTask { 175 return &scanTask{generator, repo, digest} 176 } 177 178 func (st *scanTask) DoWork(ctx context.Context) error { 179 // When work finished clean this entry from the generator 180 defer st.generator.setScheduled(st.digest, false) 181 182 image := st.repo + "@" + st.digest 183 184 // We cache the results internally in the scanner 185 // so we can discard the actual results for now 186 if _, err := st.generator.scanner.ScanImage(ctx, image); err != nil { 187 st.generator.log.Error().Err(err).Str("image", image).Msg("Scheduled CVE scan errored for image") 188 st.generator.addError(st.digest, err) 189 190 return err 191 } 192 193 st.generator.log.Debug().Str("image", image).Msg("Scheduled CVE scan completed successfully for image") 194 195 return nil 196 }