github.com/quay/claircore@v1.5.28/enricher/cvss/cvss.go (about)

     1  // Package cvss provides a cvss enricher.
     2  package cvss
     3  
     4  import (
     5  	"bytes"
     6  	"compress/gzip"
     7  	"context"
     8  	"encoding/json"
     9  	"errors"
    10  	"fmt"
    11  	"io"
    12  	"net/http"
    13  	"net/url"
    14  	"regexp"
    15  	"sort"
    16  	"strings"
    17  	"time"
    18  
    19  	"github.com/quay/zlog"
    20  
    21  	"github.com/quay/claircore"
    22  	"github.com/quay/claircore/libvuln/driver"
    23  	"github.com/quay/claircore/pkg/tmp"
    24  )
    25  
    26  var (
    27  	_ driver.Enricher          = (*Enricher)(nil)
    28  	_ driver.EnrichmentUpdater = (*Enricher)(nil)
    29  
    30  	defaultFeed *url.URL
    31  )
    32  
    33  const (
    34  	// Type is the type of data returned from the Enricher's Enrich method.
    35  	Type = `message/vnd.clair.map.vulnerability; enricher=clair.cvss schema=https://csrc.nist.gov/schema/nvd/feed/1.1/cvss-v3.x.json`
    36  	// DefaultFeeds is the default place to look for CVE feeds.
    37  	//
    38  	// The enricher expects the structure to mirror that found here: files
    39  	// organized by year, prefixed with `nvdcve-1.1-` and with `.meta` and
    40  	// `.json.gz` extensions.
    41  	//
    42  	//doc:url updater
    43  	DefaultFeeds = `https://nvd.nist.gov/feeds/json/cve/1.1/`
    44  
    45  	// This appears above and must be the same.
    46  	name = `clair.cvss`
    47  
    48  	// First year for the yearly CVE feeds: https://nvd.nist.gov/vuln/data-feeds
    49  	firstYear = 2002
    50  )
    51  
    52  func init() {
    53  	var err error
    54  	defaultFeed, err = url.Parse(DefaultFeeds)
    55  	if err != nil {
    56  		panic(err)
    57  	}
    58  }
    59  
    60  // Enricher provides CVSS data as enrichments to a VulnerabilityReport.
    61  //
    62  // Configure must be called before any other methods.
    63  type Enricher struct {
    64  	driver.NoopUpdater
    65  	c    *http.Client
    66  	feed *url.URL
    67  }
    68  
    69  // Config is the configuration for Enricher.
    70  type Config struct {
    71  	FeedRoot *string `json:"feed_root" yaml:"feed_root"`
    72  }
    73  
    74  // Configure implements driver.Configurable.
    75  func (e *Enricher) Configure(ctx context.Context, f driver.ConfigUnmarshaler, c *http.Client) error {
    76  	var cfg Config
    77  	e.c = c
    78  	if err := f(&cfg); err != nil {
    79  		return err
    80  	}
    81  	if cfg.FeedRoot != nil {
    82  		if !strings.HasSuffix(*cfg.FeedRoot, "/") {
    83  			return fmt.Errorf("URL missing trailing slash: %q", *cfg.FeedRoot)
    84  		}
    85  		u, err := url.Parse(*cfg.FeedRoot)
    86  		if err != nil {
    87  			return err
    88  		}
    89  		e.feed = u
    90  	} else {
    91  		var err error
    92  		e.feed, err = defaultFeed.Parse(".")
    93  		if err != nil {
    94  			panic("programmer error: " + err.Error())
    95  		}
    96  	}
    97  	return nil
    98  }
    99  
   100  func metafileURL(root *url.URL, yr int) (*url.URL, error) {
   101  	return root.Parse(fmt.Sprintf("nvdcve-1.1-%d.meta", yr))
   102  }
   103  
   104  func gzURL(root *url.URL, yr int) (*url.URL, error) {
   105  	return root.Parse(fmt.Sprintf("nvdcve-1.1-%d.json.gz", yr))
   106  }
   107  
   108  // Name implements driver.Enricher and driver.EnrichmentUpdater.
   109  func (*Enricher) Name() string { return name }
   110  
   111  // FetchEnrichment implements driver.EnrichmentUpdater.
   112  func (e *Enricher) FetchEnrichment(ctx context.Context, hint driver.Fingerprint) (io.ReadCloser, driver.Fingerprint, error) {
   113  	ctx = zlog.ContextWithValues(ctx, "component", "enricher/cvss/Enricher/FetchEnrichment")
   114  
   115  	// year → sha256
   116  	prev := make(map[int]string)
   117  	if err := json.Unmarshal([]byte(hint), &prev); err != nil && hint != "" {
   118  		return nil, driver.Fingerprint(""), err
   119  	}
   120  	cur := make(map[int]string, len(prev))
   121  	yrs := make([]int, 0)
   122  
   123  	for y, lim := firstYear, time.Now().Year(); y <= lim; y++ {
   124  		yrs = append(yrs, y)
   125  		u, err := metafileURL(e.feed, y)
   126  		if err != nil {
   127  			return nil, hint, err
   128  		}
   129  		zlog.Debug(ctx).
   130  			Int("year", y).
   131  			Stringer("url", u).
   132  			Msg("fetching meta file")
   133  		req, err := http.NewRequestWithContext(ctx, http.MethodGet, u.String(), nil)
   134  		if err != nil {
   135  			return nil, hint, err
   136  		}
   137  		res, err := e.c.Do(req)
   138  		if err != nil {
   139  			return nil, hint, err
   140  		}
   141  		var buf bytes.Buffer
   142  		_, err = io.Copy(&buf, res.Body)
   143  		res.Body.Close() // Don't defer because we're in a loop.
   144  		if err != nil {
   145  			return nil, hint, err
   146  		}
   147  		var mf metafile
   148  		if err := mf.Parse(&buf); err != nil {
   149  			return nil, hint, err
   150  		}
   151  		zlog.Debug(ctx).
   152  			Int("year", y).
   153  			Stringer("url", u).
   154  			Time("mod", mf.LastModified).
   155  			Msg("parsed meta file")
   156  		cur[y] = strings.ToUpper(mf.SHA256)
   157  	}
   158  
   159  	doFetch := false
   160  	for _, y := range yrs {
   161  		if prev[y] != cur[y] {
   162  			zlog.Info(ctx).
   163  				Int("year", y).
   164  				Msg("change detected")
   165  			doFetch = true
   166  			break
   167  		}
   168  	}
   169  	if !doFetch {
   170  		return nil, hint, driver.Unchanged
   171  	}
   172  
   173  	out, err := tmp.NewFile("", "cvss.")
   174  	if err != nil {
   175  		return nil, hint, err
   176  	}
   177  	var success bool
   178  	defer func() {
   179  		if !success {
   180  			if err := out.Close(); err != nil {
   181  				zlog.Warn(ctx).Err(err).Msg("unable to close spool")
   182  			}
   183  		}
   184  	}()
   185  	// Doing this serially is slower, but much less complicated than using an
   186  	// ErrGroup or the like.
   187  	//
   188  	// It may become an issue in 25-30 years.
   189  	for _, y := range yrs {
   190  		u, err := gzURL(e.feed, y)
   191  		if err != nil {
   192  			return nil, hint, fmt.Errorf("bad URL: %w", err)
   193  		}
   194  		req, err := http.NewRequestWithContext(ctx, http.MethodGet, u.String(), nil)
   195  		if err != nil {
   196  			return nil, hint, fmt.Errorf("unable to create request: %w", err)
   197  		}
   198  		zlog.Debug(ctx).
   199  			Int("year", y).
   200  			Stringer("url", u).
   201  			Msg("requesting json")
   202  		res, err := e.c.Do(req)
   203  		if err != nil {
   204  			return nil, hint, fmt.Errorf("unable to do request: %w", err)
   205  		}
   206  		gz, err := gzip.NewReader(res.Body)
   207  		if err != nil {
   208  			res.Body.Close()
   209  			return nil, hint, fmt.Errorf("unable to create gzip reader: %w", err)
   210  		}
   211  		f, err := newItemFeed(y, gz)
   212  		gz.Close()
   213  		res.Body.Close()
   214  		if err != nil {
   215  			return nil, hint, fmt.Errorf("unable to process item feed: %w", err)
   216  		}
   217  		if err := f.WriteCVSS(ctx, out); err != nil {
   218  			return nil, hint, fmt.Errorf("unable to write item feed: %w", err)
   219  		}
   220  	}
   221  	if _, err := out.Seek(0, io.SeekStart); err != nil {
   222  		return nil, hint, fmt.Errorf("unable to reset item feed: %w", err)
   223  	}
   224  	success = true
   225  
   226  	nh, err := json.Marshal(cur)
   227  	if err != nil {
   228  		panic(fmt.Errorf("unable to serialize new hint: %w", err))
   229  	}
   230  	return out, driver.Fingerprint(nh), nil
   231  }
   232  
   233  // ParseEnrichment implements driver.EnrichmentUpdater.
   234  func (e *Enricher) ParseEnrichment(ctx context.Context, rc io.ReadCloser) ([]driver.EnrichmentRecord, error) {
   235  	ctx = zlog.ContextWithValues(ctx, "component", "enricher/cvss/Enricher/ParseEnrichment")
   236  	// Our Fetch method actually has all the smarts w/r/t to constructing the
   237  	// records, so this is just decoding in a loop.
   238  	defer rc.Close()
   239  	var err error
   240  	dec := json.NewDecoder(rc)
   241  	ret := make([]driver.EnrichmentRecord, 0, 1024) // Wild guess at initial capacity.
   242  	// This is going to allocate like mad, hold onto your butts.
   243  	for err == nil {
   244  		ret = append(ret, driver.EnrichmentRecord{})
   245  		err = dec.Decode(&ret[len(ret)-1])
   246  	}
   247  	zlog.Debug(ctx).
   248  		Int("count", len(ret)).
   249  		Msg("decoded enrichments")
   250  	if err != nil && !errors.Is(err, io.EOF) {
   251  		return nil, err
   252  	}
   253  	return ret, nil
   254  }
   255  
   256  // This is a slightly more relaxed version of the validation pattern in the NVD
   257  // JSON schema: https://csrc.nist.gov/schema/nvd/feed/1.1/CVE_JSON_4.0_min_1.1.schema
   258  //
   259  // It allows for "CVE" to be case insensitive and for dashes and underscores
   260  // between the different segments.
   261  var cveRegexp = regexp.MustCompile(`(?i:cve)[-_][0-9]{4}[-_][0-9]{4,}`)
   262  
   263  // Enrich implements driver.Enricher.
   264  func (e *Enricher) Enrich(ctx context.Context, g driver.EnrichmentGetter, r *claircore.VulnerabilityReport) (string, []json.RawMessage, error) {
   265  	ctx = zlog.ContextWithValues(ctx, "component", "enricher/cvss/Enricher/Enrich")
   266  
   267  	// We return any CVSS blobs for CVEs mentioned in the free-form parts of the
   268  	// vulnerability.
   269  	m := make(map[string][]json.RawMessage)
   270  
   271  	erCache := make(map[string][]driver.EnrichmentRecord)
   272  	for id, v := range r.Vulnerabilities {
   273  		t := make(map[string]struct{})
   274  		ctx := zlog.ContextWithValues(ctx,
   275  			"vuln", v.Name)
   276  		for _, elem := range []string{
   277  			v.Description,
   278  			v.Name,
   279  			v.Links,
   280  		} {
   281  			for _, m := range cveRegexp.FindAllString(elem, -1) {
   282  				t[m] = struct{}{}
   283  			}
   284  		}
   285  		if len(t) == 0 {
   286  			continue
   287  		}
   288  		ts := make([]string, 0, len(t))
   289  		for m := range t {
   290  			ts = append(ts, m)
   291  		}
   292  		zlog.Debug(ctx).
   293  			Strs("cve", ts).
   294  			Msg("found CVEs")
   295  
   296  		sort.Strings(ts)
   297  		cveKey := strings.Join(ts, "_")
   298  		rec, ok := erCache[cveKey]
   299  		if !ok {
   300  			var err error
   301  			rec, err = g.GetEnrichment(ctx, ts)
   302  			if err != nil {
   303  				return "", nil, err
   304  			}
   305  			erCache[cveKey] = rec
   306  		}
   307  		zlog.Debug(ctx).
   308  			Int("count", len(rec)).
   309  			Msg("found records")
   310  		for _, r := range rec {
   311  			m[id] = append(m[id], r.Enrichment)
   312  		}
   313  	}
   314  	if len(m) == 0 {
   315  		return Type, nil, nil
   316  	}
   317  	b, err := json.Marshal(m)
   318  	if err != nil {
   319  		return Type, nil, err
   320  	}
   321  	return Type, []json.RawMessage{b}, nil
   322  }