zotregistry.dev/zot@v1.4.4-0.20240314164342-eec277e14d20/pkg/extensions/search/cve/scan.go (about) 1 package cveinfo 2 3 import ( 4 "context" 5 "fmt" 6 "sync" 7 8 "zotregistry.dev/zot/pkg/log" 9 mTypes "zotregistry.dev/zot/pkg/meta/types" 10 reqCtx "zotregistry.dev/zot/pkg/requestcontext" 11 "zotregistry.dev/zot/pkg/scheduler" 12 ) 13 14 func NewScanTaskGenerator( 15 metaDB mTypes.MetaDB, 16 scanner Scanner, 17 logC log.Logger, 18 ) scheduler.TaskGenerator { 19 sublogger := logC.With().Str("component", "cve").Logger() 20 21 return &scanTaskGenerator{ 22 log: log.Logger{Logger: sublogger}, 23 metaDB: metaDB, 24 scanner: scanner, 25 lock: &sync.Mutex{}, 26 scanErrors: map[string]error{}, 27 scheduled: map[string]bool{}, 28 done: false, 29 } 30 } 31 32 // scanTaskGenerator takes all manifests from repodb and runs the CVE scanner on them. 33 // If the scanner already has results cached for a specific manifests, or it cannot be 34 // scanned, the manifest will be skipped. 35 // If there are no manifests missing from the cache, the generator finishes. 36 type scanTaskGenerator struct { 37 log log.Logger 38 metaDB mTypes.MetaDB 39 scanner Scanner 40 lock *sync.Mutex 41 scanErrors map[string]error 42 scheduled map[string]bool 43 done bool 44 } 45 46 func (gen *scanTaskGenerator) getMatcherFunc() mTypes.FilterFunc { 47 return func(repoMeta mTypes.RepoMeta, imageMeta mTypes.ImageMeta) bool { 48 // Note this matcher will return information based on scan status of manifests 49 // An index scan aggregates results of manifest scans 50 // If at least one of its manifests can be scanned, 51 // the index and its tag will be returned by the caller function too 52 repoName := repoMeta.Name 53 manifestDigest := imageMeta.Digest.String() 54 55 if gen.isScheduled(manifestDigest) { 56 // We skip this manifest as it has already scheduled 57 return false 58 } 59 60 if gen.hasError(manifestDigest) { 61 // We skip this manifest as it has already been scanned and errored 62 // This is to prevent the generator attempting to run a scan 63 // in a loop of the same image which would consistently fail 64 return false 65 } 66 67 if gen.scanner.IsResultCached(manifestDigest) { 68 // We skip this manifest, it was already scanned 69 return false 70 } 71 72 ok, err := gen.scanner.IsImageFormatScannable(repoName, manifestDigest) 73 if !ok || err != nil { 74 // We skip this manifest, we cannot scan it 75 return false 76 } 77 78 return true 79 } 80 } 81 82 func (gen *scanTaskGenerator) addError(digest string, err error) { 83 gen.lock.Lock() 84 defer gen.lock.Unlock() 85 86 gen.scanErrors[digest] = err 87 } 88 89 func (gen *scanTaskGenerator) hasError(digest string) bool { 90 gen.lock.Lock() 91 defer gen.lock.Unlock() 92 93 _, ok := gen.scanErrors[digest] 94 95 return ok 96 } 97 98 func (gen *scanTaskGenerator) setScheduled(digest string, isScheduled bool) { 99 gen.lock.Lock() 100 defer gen.lock.Unlock() 101 102 if _, ok := gen.scheduled[digest]; ok && !isScheduled { 103 delete(gen.scheduled, digest) 104 } else if isScheduled { 105 gen.scheduled[digest] = true 106 } 107 } 108 109 func (gen *scanTaskGenerator) isScheduled(digest string) bool { 110 gen.lock.Lock() 111 defer gen.lock.Unlock() 112 113 _, ok := gen.scheduled[digest] 114 115 return ok 116 } 117 118 func (gen *scanTaskGenerator) Name() string { 119 return "CVEScanGenerator" 120 } 121 122 func (gen *scanTaskGenerator) Next() (scheduler.Task, error) { 123 // metaRB requires us to use a context for authorization 124 userAc := reqCtx.NewUserAccessControl() 125 userAc.SetUsername("scheduler") 126 userAc.SetIsAdmin(true) 127 ctx := userAc.DeriveContext(context.Background()) 128 129 // Obtain a list of repos with un-scanned scannable manifests 130 // We may implement a method to return just 1 match at some point 131 imageMeta, err := gen.metaDB.FilterTags(ctx, mTypes.AcceptAllRepoTag, gen.getMatcherFunc()) 132 if err != nil { 133 // Do not crash the generator for potential metadb inconsistencies 134 // as there may be scannable images not yet scanned 135 gen.log.Warn().Err(err).Msg("failed to obtain repo metadata during scheduled cve scan") 136 } 137 138 // no imageMeta are returned, all results are in already in cache 139 // or manifests cannot be scanned 140 if len(imageMeta) == 0 { 141 gen.log.Info().Msg("finished scanning available images during scheduled cve scan") 142 143 gen.done = true 144 145 return nil, nil 146 } 147 148 // Since imageMeta will always contain just un-scanned images we can pick 149 // any image out of the resulting matches 150 digest := imageMeta[0].Digest.String() 151 152 // Mark the digest as scheduled so it is skipped on next generator run 153 gen.setScheduled(digest, true) 154 155 return newScanTask(gen, imageMeta[0].Repo, digest), nil 156 } 157 158 func (gen *scanTaskGenerator) IsDone() bool { 159 return gen.done 160 } 161 162 func (gen *scanTaskGenerator) IsReady() bool { 163 return true 164 } 165 166 func (gen *scanTaskGenerator) Reset() { 167 gen.lock.Lock() 168 defer gen.lock.Unlock() 169 170 gen.scheduled = map[string]bool{} 171 gen.scanErrors = map[string]error{} 172 gen.done = false 173 } 174 175 type scanTask struct { 176 generator *scanTaskGenerator 177 repo string 178 digest string 179 } 180 181 func newScanTask(generator *scanTaskGenerator, repo string, digest string) *scanTask { 182 return &scanTask{generator, repo, digest} 183 } 184 185 func (st *scanTask) DoWork(ctx context.Context) error { 186 // When work finished clean this entry from the generator 187 defer st.generator.setScheduled(st.digest, false) 188 189 image := st.repo + "@" + st.digest 190 191 // We cache the results internally in the scanner 192 // so we can discard the actual results for now 193 if _, err := st.generator.scanner.ScanImage(ctx, image); err != nil { 194 st.generator.log.Error().Err(err).Str("image", image).Msg("failed to perform scheduled cve scan for image") 195 st.generator.addError(st.digest, err) 196 197 return err 198 } 199 200 st.generator.log.Debug().Str("image", image).Msg("scheduled cve scan completed successfully for image") 201 202 return nil 203 } 204 205 func (st *scanTask) String() string { 206 return fmt.Sprintf("{Name: \"%s\", repo: \"%s\", digest: \"%s\"}", 207 st.Name(), st.repo, st.digest) 208 } 209 210 func (st *scanTask) Name() string { 211 return "ScanTask" 212 }