github.com/quay/claircore@v1.5.28/updater/updater.go (about)

     1  package updater
     2  
     3  import (
     4  	"archive/zip"
     5  	"context"
     6  	"errors"
     7  	"fmt"
     8  	"io"
     9  	"io/fs"
    10  	"net/http"
    11  	"os"
    12  	"runtime"
    13  	"runtime/pprof"
    14  	"sort"
    15  	"strings"
    16  	"sync"
    17  
    18  	"github.com/google/uuid"
    19  	"github.com/quay/zlog"
    20  	"golang.org/x/sync/errgroup"
    21  
    22  	driver "github.com/quay/claircore/updater/driver/v1"
    23  )
    24  
    25  // Updater coordinates running Updaters and saving the results.
    26  //
    27  // Close must be called, or the program may panic.
    28  type Updater struct {
    29  	store     Store
    30  	locker    Locker
    31  	client    *http.Client
    32  	configs   driver.Configs
    33  	factories []driver.UpdaterFactory
    34  }
    35  
    36  // New returns an Updater ready to use.
    37  //
    38  // None of the resources passed in the Options struct have any of their cleanup
    39  // methods called, and need to be safe for use by multiple goroutines.
    40  func New(ctx context.Context, opts *Options) (*Updater, error) {
    41  	if opts.Store == nil {
    42  		return nil, errors.New("updater: no Store implementation provided")
    43  	}
    44  	if opts.Client == nil {
    45  		return nil, errors.New("updater: no http.Client provided")
    46  	}
    47  
    48  	u := &Updater{
    49  		store:     opts.Store,
    50  		locker:    opts.Locker,
    51  		client:    opts.Client,
    52  		configs:   opts.Configs,
    53  		factories: opts.Factories,
    54  	}
    55  
    56  	if opts.Locker == nil {
    57  		zlog.Warn(ctx).Msg("no locker passed, using process-local locking")
    58  		u.locker = newLocalLocker()
    59  	}
    60  	if opts.Configs == nil {
    61  		zlog.Info(ctx).Msg("no updater configuration passed")
    62  		u.configs = make(driver.Configs)
    63  	}
    64  	if opts.Factories == nil {
    65  		zlog.Warn(ctx).Msg("no updater factories provided, this may be a misconfiguration")
    66  	}
    67  
    68  	_, file, line, _ := runtime.Caller(1)
    69  	runtime.SetFinalizer(u, func(u *Updater) {
    70  		panic(fmt.Sprintf("%s:%d: Updater not closed", file, line))
    71  	})
    72  	return u, nil
    73  }
    74  
    75  // Close releases any resources held by the Updater.
    76  func (u *Updater) Close() error {
    77  	runtime.SetFinalizer(u, nil)
    78  	u.store = nil
    79  	u.locker = nil
    80  	u.client = nil
    81  	u.configs = nil
    82  	u.factories = nil
    83  	return nil
    84  }
    85  
    86  // Options contains the needed options for an Updater.
    87  //
    88  // The Store and Client members are required. The others are optional, but
    89  // should only be omitted in specific circumstances.
    90  type Options struct {
    91  	// This should disallow an unkeyed literal and means that additions to the
    92  	// struct shouldn't cause compilation errors.
    93  	_forceKeys struct{}
    94  	// Store is the interface used to persist parsed data.
    95  	Store Store
    96  	// Client is the http.Client all the Updaters will use.
    97  	Client *http.Client
    98  
    99  	// Locker provides system-wide locks. If multiple Updater processes are
   100  	// running, this should be backed by a distributed lock manager.
   101  	Locker Locker
   102  	// Configs holds configuration functions for Updaters.
   103  	Configs driver.Configs
   104  	// Factories is a slice of UpdaterFactories that are used to construct
   105  	// Updaters on demand.
   106  	Factories []driver.UpdaterFactory
   107  }
   108  
   109  // All the internal machinery deals with this taggedUpdater type, so that we
   110  // only have to call the Name method once.
   111  //
   112  // This is to avoid having lots of labeled calls, as *all* calls to updaters
   113  // should be labeled to help in debugging stray goroutines.
   114  type taggedUpdater struct {
   115  	Name    string
   116  	Updater driver.Updater
   117  }
   118  
   119  // Run constructs new updaters, runs them, and stores the results.
   120  //
   121  // Errors reported from the Updater itself will return the error immediately,
   122  // but errors reported from updaters are collected and reported once all
   123  // updaters have run.
   124  //
   125  // Run should be preferred to explicit Fetch and Parse calls, because knowing
   126  // that both methods will be running in the same process allows for better
   127  // resource usage.
   128  func (u *Updater) Run(ctx context.Context, strict bool) error {
   129  	var (
   130  		us  []taggedUpdater
   131  		ops []driver.UpdateOperation
   132  	)
   133  	sg, sctx := errgroup.WithContext(ctx)
   134  	sg.Go(func() (err error) {
   135  		us, err = u.updaters(sctx, u.configs)
   136  		return err
   137  	})
   138  	sg.Go(func() (err error) {
   139  		ops, err = u.store.GetLatestUpdateOperations(sctx)
   140  		return err
   141  	})
   142  	if err := sg.Wait(); err != nil {
   143  		return err
   144  	}
   145  	pfps := make(map[string]driver.Fingerprint, len(ops))
   146  	for _, op := range ops {
   147  		pfps[op.Updater] = op.Fingerprint
   148  	}
   149  
   150  	var wg sync.WaitGroup
   151  	lim := runtime.GOMAXPROCS(0)
   152  	wg.Add(lim)
   153  	feed, errCh := make(chan taggedUpdater), make(chan error)
   154  	var errs []error
   155  
   156  	eg, ctx := errgroup.WithContext(ctx)
   157  	eg.Go(feeder(ctx, feed, us))
   158  	eg.Go(func() error {
   159  		wg.Wait()
   160  		close(errCh)
   161  		return nil
   162  	})
   163  	eg.Go(func() error {
   164  		for err := range errCh {
   165  			errs = append(errs, err)
   166  		}
   167  		return nil
   168  	})
   169  	for i := 0; i < lim; i++ {
   170  		eg.Go(func() error {
   171  			defer wg.Done()
   172  			spool, err := os.CreateTemp(tmpDir, tmpPattern)
   173  			if err != nil {
   174  				zlog.Warn(ctx).Err(err).Msg("unable to create spool file")
   175  				return err
   176  			}
   177  			spoolname := spool.Name()
   178  			defer func() {
   179  				if err := os.Remove(spoolname); err != nil {
   180  					zlog.Warn(ctx).Str("filename", spoolname).Err(err).Msg("unable to remove spool file")
   181  				}
   182  				if err := spool.Close(); err != nil {
   183  					zlog.Warn(ctx).Str("filename", spoolname).Err(err).Msg("error closing spool file")
   184  				}
   185  			}()
   186  			var updErr *updaterError
   187  			for upd := range feed {
   188  				err := u.fetchAndParse(ctx, spool, pfps, upd)
   189  				switch {
   190  				case errors.Is(err, nil):
   191  				case errors.As(err, &updErr):
   192  					zlog.Debug(ctx).Err(updErr).Msg("updater error")
   193  					errCh <- updErr.Unwrap()
   194  				default:
   195  					return err
   196  				}
   197  			}
   198  			return nil
   199  		})
   200  	}
   201  	if err := eg.Wait(); err != nil {
   202  		return err
   203  	}
   204  	// Print or return errors.
   205  	if len(errs) != 0 {
   206  		if strict {
   207  			return errors.Join(errs...)
   208  		}
   209  		zlog.Info(ctx).Errs("errors", errs).Msg("updater errors")
   210  	}
   211  	return nil
   212  }
   213  
   214  // In all cases, calls into Updaters should be done with the goroutine labels
   215  // set. This allows an execution trace to help narrow down any orphaned
   216  // goroutines.
   217  
   218  func (u *Updater) updaters(ctx context.Context, cfg driver.Configs) ([]taggedUpdater, error) {
   219  	var r []taggedUpdater
   220  	dedup := make(map[string]struct{})
   221  	for _, fac := range u.factories {
   222  		var key string
   223  		pprof.Do(ctx, pprof.Labels("task", "factory_name"), func(_ context.Context) {
   224  			key = fac.Name()
   225  		})
   226  		var set []driver.Updater
   227  		var err error
   228  		pprof.Do(ctx, pprof.Labels("task", "factory_create", "factory", key), func(ctx context.Context) {
   229  			set, err = fac.Create(ctx, cfg[key])
   230  		})
   231  		if err != nil {
   232  			zlog.Info(ctx).Err(err).Msg("factory errored")
   233  			continue
   234  		}
   235  		for _, upd := range set {
   236  			var name string
   237  			pprof.Do(ctx, pprof.Labels("task", "updater_name"), func(_ context.Context) {
   238  				name = upd.Name()
   239  			})
   240  			if strings.Contains(name, "/") {
   241  				zlog.Info(ctx).Str("updater", name).Msg("name contains invalid character: /")
   242  				continue
   243  			}
   244  			if _, ok := dedup[name]; ok {
   245  				zlog.Info(ctx).Str("updater", name).Msg("updater already exists")
   246  				continue
   247  			}
   248  			dedup[name] = struct{}{}
   249  			r = append(r, taggedUpdater{
   250  				Name:    name,
   251  				Updater: upd,
   252  			})
   253  		}
   254  	}
   255  	sort.Slice(r, func(i, j int) bool { return r[i].Name < r[j].Name })
   256  	return r, nil
   257  }
   258  
   259  func (u *Updater) fetchOne(ctx context.Context, tu taggedUpdater, pfp driver.Fingerprint, out io.Writer) (fp driver.Fingerprint, err error) {
   260  	name := tu.Name
   261  	ctx = zlog.ContextWithValues(ctx, "updater", name)
   262  	zlog.Info(ctx).Msg("fetch start")
   263  	defer zlog.Info(ctx).Msg("fetch done")
   264  	lctx, done := u.locker.TryLock(ctx, name)
   265  	defer done()
   266  	if err := lctx.Err(); err != nil {
   267  		if pErr := ctx.Err(); pErr != nil {
   268  			zlog.Debug(ctx).Err(err).Msg("parent context canceled")
   269  			return fp, nil
   270  		}
   271  		zlog.Info(ctx).Err(err).Msg("lock acquisition failed, skipping")
   272  		return fp, err
   273  	}
   274  	ctx = lctx
   275  
   276  	zw := zip.NewWriter(out)
   277  	defer func() {
   278  		if err := zw.Close(); err != nil {
   279  			zlog.Warn(ctx).Err(err).Msg("unable to close zip writer")
   280  		}
   281  	}()
   282  	if len(pfp) != 0 {
   283  		zlog.Debug(ctx).Str("fingerprint", string(pfp)).Msg("found previous fingerprint")
   284  	}
   285  	pprof.Do(ctx, pprof.Labels("task", "updater_fetch", "updater", name), func(ctx context.Context) {
   286  		fp, err = tu.Updater.Fetch(ctx, zw, pfp, u.client)
   287  	})
   288  	return fp, err
   289  }
   290  
   291  func (u *Updater) parseOne(ctx context.Context, tu taggedUpdater, in fs.FS) (*parseResult, error) {
   292  	var (
   293  		any bool
   294  		res parseResult
   295  		err error
   296  	)
   297  	name := tu.Name
   298  	ctx = zlog.ContextWithValues(ctx, "updater", name)
   299  	zlog.Info(ctx).Msg("parse start")
   300  	defer zlog.Info(ctx).Msg("parse done")
   301  
   302  	pprof.Do(ctx, pprof.Labels("task", "updater_parse", "updater", name), func(ctx context.Context) {
   303  		ctx = zlog.ContextWithValues(ctx, "updater", name)
   304  		upd := tu.Updater
   305  		if p, ok := upd.(driver.VulnerabilityParser); ok {
   306  			zlog.Debug(ctx).Msg("implements VulnerabilityParser")
   307  			any = true
   308  			res.Vulnerabilities, err = p.ParseVulnerability(ctx, in)
   309  			if err != nil {
   310  				return
   311  			}
   312  			zlog.Debug(ctx).
   313  				Err(err).
   314  				Int("ct", len(res.Vulnerabilities.Vulnerability)).
   315  				Msg("found vulnerabilities")
   316  		}
   317  		if p, ok := upd.(driver.EnrichmentParser); ok {
   318  			zlog.Debug(ctx).Msg("implements EnrichmentParser")
   319  			any = true
   320  			res.Enrichments, err = p.ParseEnrichment(ctx, in)
   321  			if err != nil {
   322  				return
   323  			}
   324  			zlog.Debug(ctx).
   325  				Err(err).
   326  				Int("ct", len(res.Enrichments)).
   327  				Msg("found enrichments")
   328  		}
   329  	})
   330  	if !any {
   331  		return nil, errors.New("did nothing")
   332  	}
   333  	return &res, nil
   334  }
   335  
   336  type parseResult struct {
   337  	Vulnerabilities *driver.ParsedVulnerabilities
   338  	Enrichments     []driver.EnrichmentRecord
   339  }
   340  
   341  func (u *Updater) fetchAndParse(ctx context.Context, spool *os.File, pfps map[string]driver.Fingerprint, tu taggedUpdater) error {
   342  	spoolname := spool.Name()
   343  	name := tu.Name
   344  	ctx = zlog.ContextWithValues(ctx, "updater", name)
   345  	if _, err := spool.Seek(0, io.SeekStart); err != nil {
   346  		zlog.Error(ctx).Str("filename", spoolname).Err(err).Msg("unable to seek to start")
   347  		return err
   348  	}
   349  	fp, err := u.fetchOne(ctx, tu, pfps[name], spool)
   350  	switch {
   351  	case errors.Is(err, nil):
   352  	case errors.Is(err, driver.ErrUnchanged):
   353  		zlog.Debug(ctx).Msg("unchanged")
   354  		return nil
   355  	default:
   356  		return updaterErr(err)
   357  	}
   358  	sz, err := spool.Seek(0, io.SeekCurrent)
   359  	if err != nil {
   360  		zlog.Error(ctx).Str("filename", spoolname).Err(err).Msg("unable to seek spoolfile")
   361  		return err
   362  	}
   363  	z, err := zip.NewReader(spool, sz)
   364  	if err != nil {
   365  		zlog.Error(ctx).Str("filename", spoolname).Err(err).Msg("unable to create zip reader")
   366  		return err
   367  	}
   368  	res, err := u.parseOne(ctx, tu, z)
   369  	if err != nil {
   370  		return updaterErr(err)
   371  	}
   372  	ref := uuid.New()
   373  
   374  	pprof.Do(ctx, pprof.Labels("updater", name), func(ctx context.Context) {
   375  		if len(res.Vulnerabilities.Vulnerability) != 0 {
   376  			err = u.store.UpdateVulnerabilities(ctx, ref, name, fp, res.Vulnerabilities)
   377  			if err != nil {
   378  				return
   379  			}
   380  			zlog.Info(ctx).Stringer("ref", ref).Msg("updated vulnerabilites")
   381  		}
   382  		if len(res.Enrichments) != 0 {
   383  			err = u.store.UpdateEnrichments(ctx, ref, name, fp, res.Enrichments)
   384  			if err != nil {
   385  				return
   386  			}
   387  			zlog.Info(ctx).Stringer("ref", ref).Msg("updated enrichments")
   388  		}
   389  	})
   390  	if err != nil {
   391  		return err
   392  	}
   393  	return nil
   394  }
   395  
   396  // UpdaterErr returns an *updaterError wrapping "e".
   397  //
   398  // This is used to signal when an error came from an updater.
   399  func updaterErr(e error) error {
   400  	return &updaterError{orig: e}
   401  }
   402  
   403  type updaterError struct {
   404  	orig error
   405  }
   406  
   407  func (u *updaterError) Error() string {
   408  	return u.orig.Error()
   409  }
   410  
   411  func (u *updaterError) Unwrap() error {
   412  	return u.orig
   413  }
   414  
   415  // Feeder sends "us" down "ch" and closes it when done, while respecting the
   416  // Context's timeout.
   417  func feeder(ctx context.Context, ch chan<- taggedUpdater, us []taggedUpdater) func() error {
   418  	return func() error {
   419  		defer close(ch)
   420  		for _, u := range us {
   421  			select {
   422  			case <-ctx.Done():
   423  				return ctx.Err()
   424  			case ch <- u:
   425  			}
   426  		}
   427  		return nil
   428  	}
   429  }
   430  
   431  const (
   432  	tmpDir     = ``
   433  	tmpPattern = `updater.spool.*`
   434  )