github.com/quay/claircore@v1.5.28/enricher/cvss/cvss.go (about) 1 // Package cvss provides a cvss enricher. 2 package cvss 3 4 import ( 5 "bytes" 6 "compress/gzip" 7 "context" 8 "encoding/json" 9 "errors" 10 "fmt" 11 "io" 12 "net/http" 13 "net/url" 14 "regexp" 15 "sort" 16 "strings" 17 "time" 18 19 "github.com/quay/zlog" 20 21 "github.com/quay/claircore" 22 "github.com/quay/claircore/libvuln/driver" 23 "github.com/quay/claircore/pkg/tmp" 24 ) 25 26 var ( 27 _ driver.Enricher = (*Enricher)(nil) 28 _ driver.EnrichmentUpdater = (*Enricher)(nil) 29 30 defaultFeed *url.URL 31 ) 32 33 const ( 34 // Type is the type of data returned from the Enricher's Enrich method. 35 Type = `message/vnd.clair.map.vulnerability; enricher=clair.cvss schema=https://csrc.nist.gov/schema/nvd/feed/1.1/cvss-v3.x.json` 36 // DefaultFeeds is the default place to look for CVE feeds. 37 // 38 // The enricher expects the structure to mirror that found here: files 39 // organized by year, prefixed with `nvdcve-1.1-` and with `.meta` and 40 // `.json.gz` extensions. 41 // 42 //doc:url updater 43 DefaultFeeds = `https://nvd.nist.gov/feeds/json/cve/1.1/` 44 45 // This appears above and must be the same. 46 name = `clair.cvss` 47 48 // First year for the yearly CVE feeds: https://nvd.nist.gov/vuln/data-feeds 49 firstYear = 2002 50 ) 51 52 func init() { 53 var err error 54 defaultFeed, err = url.Parse(DefaultFeeds) 55 if err != nil { 56 panic(err) 57 } 58 } 59 60 // Enricher provides CVSS data as enrichments to a VulnerabilityReport. 61 // 62 // Configure must be called before any other methods. 63 type Enricher struct { 64 driver.NoopUpdater 65 c *http.Client 66 feed *url.URL 67 } 68 69 // Config is the configuration for Enricher. 70 type Config struct { 71 FeedRoot *string `json:"feed_root" yaml:"feed_root"` 72 } 73 74 // Configure implements driver.Configurable. 75 func (e *Enricher) Configure(ctx context.Context, f driver.ConfigUnmarshaler, c *http.Client) error { 76 var cfg Config 77 e.c = c 78 if err := f(&cfg); err != nil { 79 return err 80 } 81 if cfg.FeedRoot != nil { 82 if !strings.HasSuffix(*cfg.FeedRoot, "/") { 83 return fmt.Errorf("URL missing trailing slash: %q", *cfg.FeedRoot) 84 } 85 u, err := url.Parse(*cfg.FeedRoot) 86 if err != nil { 87 return err 88 } 89 e.feed = u 90 } else { 91 var err error 92 e.feed, err = defaultFeed.Parse(".") 93 if err != nil { 94 panic("programmer error: " + err.Error()) 95 } 96 } 97 return nil 98 } 99 100 func metafileURL(root *url.URL, yr int) (*url.URL, error) { 101 return root.Parse(fmt.Sprintf("nvdcve-1.1-%d.meta", yr)) 102 } 103 104 func gzURL(root *url.URL, yr int) (*url.URL, error) { 105 return root.Parse(fmt.Sprintf("nvdcve-1.1-%d.json.gz", yr)) 106 } 107 108 // Name implements driver.Enricher and driver.EnrichmentUpdater. 109 func (*Enricher) Name() string { return name } 110 111 // FetchEnrichment implements driver.EnrichmentUpdater. 112 func (e *Enricher) FetchEnrichment(ctx context.Context, hint driver.Fingerprint) (io.ReadCloser, driver.Fingerprint, error) { 113 ctx = zlog.ContextWithValues(ctx, "component", "enricher/cvss/Enricher/FetchEnrichment") 114 115 // year → sha256 116 prev := make(map[int]string) 117 if err := json.Unmarshal([]byte(hint), &prev); err != nil && hint != "" { 118 return nil, driver.Fingerprint(""), err 119 } 120 cur := make(map[int]string, len(prev)) 121 yrs := make([]int, 0) 122 123 for y, lim := firstYear, time.Now().Year(); y <= lim; y++ { 124 yrs = append(yrs, y) 125 u, err := metafileURL(e.feed, y) 126 if err != nil { 127 return nil, hint, err 128 } 129 zlog.Debug(ctx). 130 Int("year", y). 131 Stringer("url", u). 132 Msg("fetching meta file") 133 req, err := http.NewRequestWithContext(ctx, http.MethodGet, u.String(), nil) 134 if err != nil { 135 return nil, hint, err 136 } 137 res, err := e.c.Do(req) 138 if err != nil { 139 return nil, hint, err 140 } 141 var buf bytes.Buffer 142 _, err = io.Copy(&buf, res.Body) 143 res.Body.Close() // Don't defer because we're in a loop. 144 if err != nil { 145 return nil, hint, err 146 } 147 var mf metafile 148 if err := mf.Parse(&buf); err != nil { 149 return nil, hint, err 150 } 151 zlog.Debug(ctx). 152 Int("year", y). 153 Stringer("url", u). 154 Time("mod", mf.LastModified). 155 Msg("parsed meta file") 156 cur[y] = strings.ToUpper(mf.SHA256) 157 } 158 159 doFetch := false 160 for _, y := range yrs { 161 if prev[y] != cur[y] { 162 zlog.Info(ctx). 163 Int("year", y). 164 Msg("change detected") 165 doFetch = true 166 break 167 } 168 } 169 if !doFetch { 170 return nil, hint, driver.Unchanged 171 } 172 173 out, err := tmp.NewFile("", "cvss.") 174 if err != nil { 175 return nil, hint, err 176 } 177 var success bool 178 defer func() { 179 if !success { 180 if err := out.Close(); err != nil { 181 zlog.Warn(ctx).Err(err).Msg("unable to close spool") 182 } 183 } 184 }() 185 // Doing this serially is slower, but much less complicated than using an 186 // ErrGroup or the like. 187 // 188 // It may become an issue in 25-30 years. 189 for _, y := range yrs { 190 u, err := gzURL(e.feed, y) 191 if err != nil { 192 return nil, hint, fmt.Errorf("bad URL: %w", err) 193 } 194 req, err := http.NewRequestWithContext(ctx, http.MethodGet, u.String(), nil) 195 if err != nil { 196 return nil, hint, fmt.Errorf("unable to create request: %w", err) 197 } 198 zlog.Debug(ctx). 199 Int("year", y). 200 Stringer("url", u). 201 Msg("requesting json") 202 res, err := e.c.Do(req) 203 if err != nil { 204 return nil, hint, fmt.Errorf("unable to do request: %w", err) 205 } 206 gz, err := gzip.NewReader(res.Body) 207 if err != nil { 208 res.Body.Close() 209 return nil, hint, fmt.Errorf("unable to create gzip reader: %w", err) 210 } 211 f, err := newItemFeed(y, gz) 212 gz.Close() 213 res.Body.Close() 214 if err != nil { 215 return nil, hint, fmt.Errorf("unable to process item feed: %w", err) 216 } 217 if err := f.WriteCVSS(ctx, out); err != nil { 218 return nil, hint, fmt.Errorf("unable to write item feed: %w", err) 219 } 220 } 221 if _, err := out.Seek(0, io.SeekStart); err != nil { 222 return nil, hint, fmt.Errorf("unable to reset item feed: %w", err) 223 } 224 success = true 225 226 nh, err := json.Marshal(cur) 227 if err != nil { 228 panic(fmt.Errorf("unable to serialize new hint: %w", err)) 229 } 230 return out, driver.Fingerprint(nh), nil 231 } 232 233 // ParseEnrichment implements driver.EnrichmentUpdater. 234 func (e *Enricher) ParseEnrichment(ctx context.Context, rc io.ReadCloser) ([]driver.EnrichmentRecord, error) { 235 ctx = zlog.ContextWithValues(ctx, "component", "enricher/cvss/Enricher/ParseEnrichment") 236 // Our Fetch method actually has all the smarts w/r/t to constructing the 237 // records, so this is just decoding in a loop. 238 defer rc.Close() 239 var err error 240 dec := json.NewDecoder(rc) 241 ret := make([]driver.EnrichmentRecord, 0, 1024) // Wild guess at initial capacity. 242 // This is going to allocate like mad, hold onto your butts. 243 for err == nil { 244 ret = append(ret, driver.EnrichmentRecord{}) 245 err = dec.Decode(&ret[len(ret)-1]) 246 } 247 zlog.Debug(ctx). 248 Int("count", len(ret)). 249 Msg("decoded enrichments") 250 if err != nil && !errors.Is(err, io.EOF) { 251 return nil, err 252 } 253 return ret, nil 254 } 255 256 // This is a slightly more relaxed version of the validation pattern in the NVD 257 // JSON schema: https://csrc.nist.gov/schema/nvd/feed/1.1/CVE_JSON_4.0_min_1.1.schema 258 // 259 // It allows for "CVE" to be case insensitive and for dashes and underscores 260 // between the different segments. 261 var cveRegexp = regexp.MustCompile(`(?i:cve)[-_][0-9]{4}[-_][0-9]{4,}`) 262 263 // Enrich implements driver.Enricher. 264 func (e *Enricher) Enrich(ctx context.Context, g driver.EnrichmentGetter, r *claircore.VulnerabilityReport) (string, []json.RawMessage, error) { 265 ctx = zlog.ContextWithValues(ctx, "component", "enricher/cvss/Enricher/Enrich") 266 267 // We return any CVSS blobs for CVEs mentioned in the free-form parts of the 268 // vulnerability. 269 m := make(map[string][]json.RawMessage) 270 271 erCache := make(map[string][]driver.EnrichmentRecord) 272 for id, v := range r.Vulnerabilities { 273 t := make(map[string]struct{}) 274 ctx := zlog.ContextWithValues(ctx, 275 "vuln", v.Name) 276 for _, elem := range []string{ 277 v.Description, 278 v.Name, 279 v.Links, 280 } { 281 for _, m := range cveRegexp.FindAllString(elem, -1) { 282 t[m] = struct{}{} 283 } 284 } 285 if len(t) == 0 { 286 continue 287 } 288 ts := make([]string, 0, len(t)) 289 for m := range t { 290 ts = append(ts, m) 291 } 292 zlog.Debug(ctx). 293 Strs("cve", ts). 294 Msg("found CVEs") 295 296 sort.Strings(ts) 297 cveKey := strings.Join(ts, "_") 298 rec, ok := erCache[cveKey] 299 if !ok { 300 var err error 301 rec, err = g.GetEnrichment(ctx, ts) 302 if err != nil { 303 return "", nil, err 304 } 305 erCache[cveKey] = rec 306 } 307 zlog.Debug(ctx). 308 Int("count", len(rec)). 309 Msg("found records") 310 for _, r := range rec { 311 m[id] = append(m[id], r.Enrichment) 312 } 313 } 314 if len(m) == 0 { 315 return Type, nil, nil 316 } 317 b, err := json.Marshal(m) 318 if err != nil { 319 return Type, nil, err 320 } 321 return Type, []json.RawMessage{b}, nil 322 }