github.com/quay/claircore@v1.5.28/debian/updater.go (about)

     1  package debian
     2  
     3  import (
     4  	"bufio"
     5  	"bytes"
     6  	"context"
     7  	"encoding/json"
     8  	"fmt"
     9  	"io"
    10  	"net/http"
    11  	"net/textproto"
    12  	"net/url"
    13  	"path"
    14  	"regexp"
    15  	"strconv"
    16  	"strings"
    17  
    18  	"github.com/quay/zlog"
    19  
    20  	"github.com/quay/claircore/libvuln/driver"
    21  	"github.com/quay/claircore/pkg/tmp"
    22  )
    23  
    24  //doc:url updater
    25  const (
    26  	defaultMirror = `https://deb.debian.org/`
    27  	defaultJSON   = `https://security-tracker.debian.org/tracker/data/json`
    28  )
    29  
    30  var (
    31  	_ driver.UpdaterSetFactory = (*Factory)(nil)
    32  	_ driver.Configurable      = (*Factory)(nil)
    33  	_ driver.Updater           = (*updater)(nil)
    34  	_ driver.Configurable      = (*updater)(nil)
    35  )
    36  
    37  // Factory creates Updaters for all Debian distributions that exist
    38  // in the mirror, and have entries in the JSON security tracker.
    39  //
    40  // [Configure] must be called before [UpdaterSet].
    41  type Factory struct {
    42  	c      *http.Client
    43  	mirror *url.URL
    44  	json   *url.URL
    45  }
    46  
    47  // NewFactory constructs a Factory.
    48  //
    49  // [Configure] must be called before [UpdaterSet].
    50  func NewFactory(_ context.Context) (*Factory, error) {
    51  	f := &Factory{}
    52  	return f, nil
    53  }
    54  
    55  // Configure implements [driver.Configurable].
    56  func (f *Factory) Configure(_ context.Context, cf driver.ConfigUnmarshaler, c *http.Client) error {
    57  	f.c = c
    58  	var cfg FactoryConfig
    59  	if err := cf(&cfg); err != nil {
    60  		return fmt.Errorf("debian: factory configuration error: %w", err)
    61  	}
    62  
    63  	if cfg.ArchiveURL != "" || cfg.OVALURL != "" {
    64  		return fmt.Errorf("debian: neither archive_url nor oval_url should be populated anymore; use json_url and mirror_url instead")
    65  	}
    66  
    67  	u, err := url.Parse(defaultMirror)
    68  	if cfg.MirrorURL != "" {
    69  		u, err = url.Parse(cfg.MirrorURL)
    70  	}
    71  	if err != nil {
    72  		return fmt.Errorf("debian: bad mirror URL: %w", err)
    73  	}
    74  	f.mirror, err = u.Parse("debian/")
    75  	if err != nil {
    76  		return fmt.Errorf("debian: bad mirror URL: %w", err)
    77  	}
    78  
    79  	f.json, err = url.Parse(defaultJSON)
    80  	if cfg.JSONURL != "" {
    81  		f.json, err = url.Parse(cfg.JSONURL)
    82  	}
    83  	if err != nil {
    84  		return fmt.Errorf("debian: bad JSON URL: %w", err)
    85  	}
    86  
    87  	return nil
    88  }
    89  
    90  // FactoryConfig is the configuration honored by the Factory.
    91  //
    92  // The "mirror" URLs expect to find HTML at "dists/" formatted like
    93  // the HTML from the Debian project (that is to say, HTML containing relative links
    94  // to distribution directories).
    95  //
    96  // The "mirror" URL needs a trailing slash.
    97  //
    98  // The "JSON" URL expects to find a JSON array of packages mapped to related vulnerabilities.
    99  type FactoryConfig struct {
   100  	// ArchiveURL is a URL to a Debian archive.
   101  	//
   102  	// Deprecated: Only MirrorURL should be used.
   103  	ArchiveURL string `json:"archive_url" yaml:"archive_url"`
   104  	MirrorURL  string `json:"mirror_url" yaml:"mirror_url"`
   105  	// OVALURL is a URL to a collection of OVAL XML documents.
   106  	//
   107  	// Deprecated: Use JSONURL instead.
   108  	OVALURL string `json:"oval_url" yaml:"oval_url"`
   109  	// JSONURL is a URL to a JSON vulnerability feed.
   110  	JSONURL string `json:"json_url" yaml:"json_url"`
   111  }
   112  
   113  var (
   114  	// LinkRegexp is a bad regexp to extract link targets.
   115  	// This will break if Debian's codenames include a double-quote in the future.
   116  	linkRegexp = regexp.MustCompile(`href="([^"]+)"`)
   117  	// SkipList is a list of strings that, experimentally, indicate the string
   118  	// is not a codename.
   119  	skipList = []string{
   120  		"-", "Debian", "sid", "stable", "testing", "experimental", "README", "updates", "backports",
   121  	}
   122  )
   123  
   124  // UpdaterSet implements [driver.UpdaterSetFactory].
   125  func (f *Factory) UpdaterSet(ctx context.Context) (driver.UpdaterSet, error) {
   126  	s := driver.NewUpdaterSet()
   127  
   128  	if err := f.findReleases(ctx, f.mirror); err != nil {
   129  		return s, fmt.Errorf("debian: examining remote: %w", err)
   130  	}
   131  
   132  	// TODO: Consider returning stub if Last-Modified has not updated.
   133  	u := &updater{
   134  		jsonURL: f.json.String(),
   135  	}
   136  
   137  	if err := s.Add(u); err != nil {
   138  		return s, fmt.Errorf("debian: unable to add updater: %w", err)
   139  	}
   140  
   141  	return s, nil
   142  }
   143  
   144  // FindReleases is split out as a method to make it easier to examine the mirror and the archive.
   145  func (f *Factory) findReleases(ctx context.Context, u *url.URL) error {
   146  	dir, err := u.Parse("dists/")
   147  	if err != nil {
   148  		return fmt.Errorf("debian: unable to construct URL: %w", err)
   149  	}
   150  	req, err := http.NewRequestWithContext(ctx, http.MethodGet, dir.String(), nil)
   151  	if err != nil {
   152  		return fmt.Errorf("debian: unable to construct request: %w", err)
   153  	}
   154  	res, err := f.c.Do(req)
   155  	if err != nil {
   156  		return fmt.Errorf("debian: unable to do request: %w", err)
   157  	}
   158  	defer res.Body.Close()
   159  	switch res.StatusCode {
   160  	case http.StatusOK:
   161  	default:
   162  		return fmt.Errorf("debian: unexpected status fetching %q: %s", dir.String(), res.Status)
   163  	}
   164  	var buf bytes.Buffer
   165  	if _, err := buf.ReadFrom(res.Body); err != nil {
   166  		return fmt.Errorf("debian: unable to read dists listing: %w", err)
   167  	}
   168  	ms := linkRegexp.FindAllStringSubmatch(buf.String(), -1)
   169  
   170  Listing:
   171  	for _, m := range ms {
   172  		dist := m[1]
   173  		switch {
   174  		case dist == "":
   175  			continue
   176  		case dist[0] == '/', dist[0] == '?':
   177  			continue
   178  		}
   179  		for _, s := range skipList {
   180  			if strings.Contains(dist, s) {
   181  				continue Listing
   182  			}
   183  		}
   184  		dist = strings.Trim(dist, "/")
   185  		rf, err := dir.Parse(path.Join(dist, `Release`))
   186  		if err != nil {
   187  			zlog.Info(ctx).
   188  				Err(err).
   189  				Stringer("context", dir).
   190  				Str("target", path.Join(dist, `Release`)).
   191  				Msg("unable to construct URL")
   192  			continue
   193  		}
   194  		req, err := http.NewRequestWithContext(ctx, http.MethodGet, rf.String(), nil)
   195  		if err != nil {
   196  			zlog.Info(ctx).
   197  				Err(err).
   198  				Stringer("url", rf).
   199  				Msg("unable to construct request")
   200  			continue
   201  		}
   202  		req.Header.Set("range", "bytes=0-512")
   203  		res, err := f.c.Do(req)
   204  		if err != nil {
   205  			zlog.Info(ctx).
   206  				Err(err).
   207  				Stringer("url", rf).
   208  				Msg("unable to do request")
   209  			continue
   210  		}
   211  		buf.Reset()
   212  		buf.ReadFrom(res.Body)
   213  		res.Body.Close()
   214  		switch res.StatusCode {
   215  		case http.StatusPartialContent, http.StatusOK:
   216  		case http.StatusNotFound: // Probably extremely old, it's fine.
   217  			continue
   218  		default:
   219  			zlog.Info(ctx).
   220  				Str("status", res.Status).
   221  				Stringer("url", rf).
   222  				Msg("unexpected response")
   223  			continue
   224  		}
   225  		tp := textproto.NewReader(bufio.NewReader(io.MultiReader(&buf, bytes.NewReader([]byte("\r\n\r\n")))))
   226  		h, err := tp.ReadMIMEHeader()
   227  		if err != nil {
   228  			zlog.Info(ctx).Err(err).Msg("unable to read MIME-ish headers")
   229  			continue
   230  		}
   231  		sv := h.Get("Version")
   232  		if sv == "" {
   233  			zlog.Debug(ctx).Str("dist", dist).Msg("no version assigned, skipping")
   234  			continue
   235  		}
   236  		vs := strings.Split(sv, ".")
   237  		if len(vs) == 1 {
   238  			zlog.Debug(ctx).Str("dist", dist).Msg("no version assigned, skipping")
   239  			continue
   240  		}
   241  		ver, err := strconv.ParseInt(vs[0], 10, 32)
   242  		if err != nil {
   243  			zlog.Info(ctx).Err(err).Msg("unable to parse version")
   244  			continue
   245  		}
   246  
   247  		mkDist(dist, int(ver))
   248  	}
   249  
   250  	return nil
   251  }
   252  
   253  // Updater implements [driver.updater].
   254  type updater struct {
   255  	// jsonURL is the URL from which to fetch JSON vulnerability data
   256  	jsonURL string
   257  
   258  	c *http.Client
   259  }
   260  
   261  // UpdaterConfig is the configuration for the updater.
   262  type UpdaterConfig struct {
   263  	// Deprecated: Use JSONURL instead.
   264  	OVALURL string `json:"url" yaml:"url"`
   265  	JSONURL string `json:"json_url" yaml:"json_url"`
   266  	// Deprecated: DistURL and DistsURLs are unused.
   267  	DistsURL  string            `json:"dists_url" yaml:"dists_url"`
   268  	DistsURLs []json.RawMessage `json:"dists_urls" yaml:"dists_urls"`
   269  }
   270  
   271  // Name implements [driver.Updater].
   272  func (u *updater) Name() string {
   273  	return "debian/updater"
   274  }
   275  
   276  // Configure implements [driver.Configurable].
   277  func (u *updater) Configure(ctx context.Context, f driver.ConfigUnmarshaler, c *http.Client) error {
   278  	ctx = zlog.ContextWithValues(ctx, "component", "debian/Updater.Configure")
   279  	u.c = c
   280  	var cfg UpdaterConfig
   281  	if err := f(&cfg); err != nil {
   282  		return err
   283  	}
   284  
   285  	if cfg.DistsURL != "" || cfg.OVALURL != "" {
   286  		zlog.Error(ctx).Msg("configured with deprecated URLs")
   287  		return fmt.Errorf("debian: neither url nor dists_url should be used anymore; use json_url and dists_urls instead")
   288  	}
   289  
   290  	if cfg.JSONURL != "" {
   291  		u.jsonURL = cfg.JSONURL
   292  		zlog.Info(ctx).
   293  			Msg("configured JSON database URL")
   294  	}
   295  
   296  	return nil
   297  }
   298  
   299  // Fetch implements [driver.Fetcher].
   300  func (u *updater) Fetch(ctx context.Context, fingerprint driver.Fingerprint) (io.ReadCloser, driver.Fingerprint, error) {
   301  	ctx = zlog.ContextWithValues(ctx,
   302  		"component", "debian/Updater.Fetch",
   303  		"database", u.jsonURL)
   304  
   305  	req, err := http.NewRequestWithContext(ctx, http.MethodGet, u.jsonURL, nil)
   306  	if err != nil {
   307  		return nil, "", fmt.Errorf("failed to create request")
   308  	}
   309  	if fingerprint != "" {
   310  		req.Header.Set("If-Modified-Since", string(fingerprint))
   311  	}
   312  
   313  	// fetch JSON database
   314  	resp, err := u.c.Do(req)
   315  	if resp != nil {
   316  		defer resp.Body.Close()
   317  	}
   318  	if err != nil {
   319  		return nil, "", fmt.Errorf("failed to retrieve JSON database: %v", err)
   320  	}
   321  
   322  	fp := resp.Header.Get("Last-Modified")
   323  
   324  	switch resp.StatusCode {
   325  	case http.StatusOK:
   326  		if fingerprint == "" || fp != string(fingerprint) {
   327  			zlog.Info(ctx).Msg("fetching latest JSON database")
   328  			break
   329  		}
   330  		fallthrough
   331  	case http.StatusNotModified:
   332  		return nil, fingerprint, driver.Unchanged
   333  	default:
   334  		return nil, "", fmt.Errorf("unexpected response: %v", resp.Status)
   335  	}
   336  
   337  	f, err := tmp.NewFile("", "debian.")
   338  	if err != nil {
   339  		return nil, "", err
   340  	}
   341  
   342  	var success bool
   343  	defer func() {
   344  		if !success {
   345  			if err := f.Close(); err != nil {
   346  				zlog.Warn(ctx).Err(err).Msg("unable to close spool")
   347  			}
   348  		}
   349  	}()
   350  	if _, err := io.Copy(f, resp.Body); err != nil {
   351  		return nil, "", fmt.Errorf("failed to read http body: %w", err)
   352  	}
   353  	if _, err := f.Seek(0, io.SeekStart); err != nil {
   354  		return nil, "", fmt.Errorf("failed to seek body: %w", err)
   355  	}
   356  	zlog.Info(ctx).Msg("fetched latest json database successfully")
   357  
   358  	success = true
   359  	return f, driver.Fingerprint(fp), err
   360  }