github.com/quay/claircore@v1.5.28/libvuln/jsonblob/jsonblob.go (about)

     1  // Package jsonblob implements a JSON-backed recording of update operations to
     2  // replay later.
     3  package jsonblob
     4  
     5  import (
     6  	"bufio"
     7  	"context"
     8  	"encoding/json"
     9  	"errors"
    10  	"fmt"
    11  	"io"
    12  	"os"
    13  	"sort"
    14  	"sync"
    15  	"time"
    16  
    17  	"github.com/google/uuid"
    18  
    19  	"github.com/quay/claircore"
    20  	"github.com/quay/claircore/datastore"
    21  	"github.com/quay/claircore/libvuln/driver"
    22  )
    23  
    24  var _ datastore.Updater = (*Store)(nil)
    25  
    26  // New constructs an empty Store.
    27  func New() (*Store, error) {
    28  	s := Store{}
    29  	s.ops = make(map[string][]driver.UpdateOperation)
    30  	s.entry = make(map[uuid.UUID]*Entry)
    31  	s.latest = make(map[driver.UpdateKind]uuid.UUID)
    32  	return &s, nil
    33  }
    34  
    35  // A Store buffers update operations.
    36  //
    37  // Store opens files in the OS-specified "temp" directories. If updates are
    38  // sufficiently large, this may need to be adjusted. See [os.TempDir] for how to
    39  // do so.
    40  type Store struct {
    41  	sync.RWMutex
    42  	entry  map[uuid.UUID]*Entry
    43  	ops    map[string][]driver.UpdateOperation
    44  	latest map[driver.UpdateKind]uuid.UUID
    45  }
    46  
    47  // Load reads in all the records serialized in the provided [io.Reader].
    48  func Load(ctx context.Context, r io.Reader) (*Loader, error) {
    49  	l := Loader{
    50  		dec: json.NewDecoder(r),
    51  		cur: uuid.Nil,
    52  	}
    53  	return &l, nil
    54  }
    55  
    56  // Loader is an iterator that returns a series of [Entry].
    57  //
    58  // Users should call [*Loader.Next] until it reports false, then check for
    59  // errors via [*Loader.Err].
    60  type Loader struct {
    61  	err error
    62  	e   *Entry
    63  
    64  	dec  *json.Decoder
    65  	next *Entry
    66  	de   diskEntry
    67  	cur  uuid.UUID
    68  }
    69  
    70  // Next reports whether there's an [Entry] to be processed.
    71  func (l *Loader) Next() bool {
    72  	if l.err != nil {
    73  		return false
    74  	}
    75  
    76  	for l.err = l.dec.Decode(&l.de); l.err == nil; l.err = l.dec.Decode(&l.de) {
    77  		id := l.de.Ref
    78  		// If we just hit a new Entry, promote the current one.
    79  		if id != l.cur {
    80  			l.e = l.next
    81  			l.next = &Entry{}
    82  			l.next.Updater = l.de.Updater
    83  			l.next.Fingerprint = l.de.Fingerprint
    84  			l.next.Date = l.de.Date
    85  		}
    86  		switch l.de.Kind {
    87  		case driver.VulnerabilityKind:
    88  			vuln := claircore.Vulnerability{}
    89  			if err := json.Unmarshal(l.de.Vuln.buf, &vuln); err != nil {
    90  				l.err = err
    91  				return false
    92  			}
    93  			l.next.Vuln = append(l.next.Vuln, &vuln)
    94  		case driver.EnrichmentKind:
    95  			en := driver.EnrichmentRecord{}
    96  			if err := json.Unmarshal(l.de.Enrichment.buf, &en); err != nil {
    97  				l.err = err
    98  				return false
    99  			}
   100  			l.next.Enrichment = append(l.next.Enrichment, en)
   101  		}
   102  		// If this was an initial diskEntry, promote the ref.
   103  		if id != l.cur {
   104  			l.cur = id
   105  			// If we have an Entry ready, report that.
   106  			if l.e != nil {
   107  				return true
   108  			}
   109  		}
   110  	}
   111  	l.e = l.next
   112  	return true
   113  }
   114  
   115  // Entry returns the latest loaded [Entry].
   116  func (l *Loader) Entry() *Entry {
   117  	return l.e
   118  }
   119  
   120  // Err is the latest encountered error.
   121  func (l *Loader) Err() error {
   122  	// Don't report EOF as an error.
   123  	if errors.Is(l.err, io.EOF) {
   124  		return nil
   125  	}
   126  	return l.err
   127  }
   128  
   129  // Store writes out the contents of the receiver to the provided [io.Writer].
   130  // It's the inverse of [Load].
   131  //
   132  // Store may only be called once for a series of [Store.UpdateVulnerabilities] and
   133  // [Store.UpdateEnrichments] calls, as it deallocates resources as it writes them.
   134  //
   135  // It should be possible to call this as often as needed to flush resources to
   136  // disk.
   137  func (s *Store) Store(w io.Writer) error {
   138  	s.RLock()
   139  	defer s.RUnlock()
   140  	enc := json.NewEncoder(w)
   141  	enc.SetEscapeHTML(false)
   142  	write := func(id uuid.UUID, e CommonEntry) func(driver.UpdateKind, *os.File, int) error {
   143  		return func(k driver.UpdateKind, f *os.File, ct int) error {
   144  			if f == nil {
   145  				return nil
   146  			}
   147  			defer f.Close()
   148  			shim := newBufShim(f)
   149  			defer shim.Close()
   150  			for i := 0; i < ct; i++ {
   151  				dent := diskEntry{
   152  					CommonEntry: e,
   153  					Ref:         id,
   154  					Kind:        k,
   155  				}
   156  				switch k {
   157  				case driver.EnrichmentKind:
   158  					dent.Enrichment = shim
   159  				case driver.VulnerabilityKind:
   160  					dent.Vuln = shim
   161  				default:
   162  					panic(fmt.Sprintf("programmer error: unknown kind: %v", k))
   163  				}
   164  				if err := enc.Encode(&dent); err != nil {
   165  					return err
   166  				}
   167  			}
   168  			return nil
   169  		}
   170  	}
   171  
   172  	for id, e := range s.entry {
   173  		f := write(id, e.CommonEntry)
   174  		verr := f(driver.VulnerabilityKind, e.vulns, e.vulnCt)
   175  		eerr := f(driver.EnrichmentKind, e.enrichments, e.enrichmentCt)
   176  		delete(s.entry, id)
   177  		if err := errors.Join(verr, eerr); err != nil {
   178  			return err
   179  		}
   180  	}
   181  	return nil
   182  }
   183  
   184  // BufShim treats every call to [bufShim.MarshalJSON] as a [bufio.Scanner.Scan]
   185  // call.
   186  //
   187  // Note this type is very weird, in that it can only be used for _either_ an
   188  // Unmarshal or a Marshal, not both. Doing both on the same structure will
   189  // silently do the wrong thing.
   190  type bufShim struct {
   191  	s   *bufio.Scanner
   192  	buf []byte
   193  }
   194  
   195  func newBufShim(r io.Reader) *bufShim {
   196  	s := new(bufShim)
   197  	s.s = bufio.NewScanner(r)
   198  	s.buf = getBuf()
   199  	s.s.Buffer(s.buf, len(s.buf))
   200  	return s
   201  }
   202  
   203  func (s *bufShim) MarshalJSON() ([]byte, error) {
   204  	if !s.s.Scan() {
   205  		return nil, s.s.Err()
   206  	}
   207  	return s.s.Bytes(), nil
   208  }
   209  
   210  func (s *bufShim) UnmarshalJSON(b []byte) error {
   211  	s.buf = append(s.buf[0:0], b...)
   212  	return nil
   213  }
   214  
   215  func (s *bufShim) Close() error {
   216  	putBuf(s.buf)
   217  	return nil
   218  }
   219  
   220  // Entry is a record of all information needed to record a vulnerability at a
   221  // later date.
   222  type Entry struct {
   223  	CommonEntry
   224  	Vuln       []*claircore.Vulnerability
   225  	Enrichment []driver.EnrichmentRecord
   226  
   227  	// These are hacks to prevent excessive memory consumption.
   228  	vulns        *os.File
   229  	vulnCt       int
   230  	enrichments  *os.File
   231  	enrichmentCt int
   232  }
   233  
   234  // CommonEntry is an embedded type that's shared between the "normal" [Entry] type
   235  // and the on-disk JSON produced by the [Store.Store] method.
   236  type CommonEntry struct {
   237  	Updater     string
   238  	Fingerprint driver.Fingerprint
   239  	Date        time.Time
   240  }
   241  
   242  // DiskEntry is a single vulnerability or enrichment. It's made from unpacking an
   243  // Entry's slice and adding an uuid for grouping back into an Entry upon read.
   244  //
   245  // "Vuln" and "Enrichment" are populated from the backing disk immediately
   246  // before being serialized.
   247  type diskEntry struct {
   248  	CommonEntry
   249  	Ref        uuid.UUID
   250  	Vuln       *bufShim `json:",omitempty"`
   251  	Enrichment *bufShim `json:",omitempty"`
   252  	Kind       driver.UpdateKind
   253  }
   254  
   255  // Entries returns a map containing all the Entries stored by calls to
   256  // UpdateVulnerabilities.
   257  //
   258  // It is unsafe for modification because it does not return a copy of the map.
   259  func (s *Store) Entries() map[uuid.UUID]*Entry {
   260  	// BUG(hank) [Store.Entries] reports seemingly-empty entries when populated
   261  	// via [Store.UpdateVulnerabilities].
   262  	s.RLock()
   263  	defer s.RUnlock()
   264  	return s.entry
   265  }
   266  
   267  // UpdateVulnerabilities records all provided vulnerabilities.
   268  func (s *Store) UpdateVulnerabilities(ctx context.Context, updater string, fingerprint driver.Fingerprint, vulns []*claircore.Vulnerability) (uuid.UUID, error) {
   269  	now := time.Now()
   270  	buf, err := diskBuf(ctx)
   271  	if err != nil {
   272  		return uuid.Nil, err
   273  	}
   274  
   275  	enc := json.NewEncoder(buf)
   276  	enc.SetEscapeHTML(false)
   277  	for _, v := range vulns {
   278  		if err := enc.Encode(v); err != nil {
   279  			return uuid.Nil, err
   280  		}
   281  	}
   282  	if _, err := buf.Seek(0, io.SeekStart); err != nil {
   283  		return uuid.Nil, err
   284  	}
   285  
   286  	e := Entry{
   287  		vulns:  buf,
   288  		vulnCt: len(vulns),
   289  	}
   290  	e.Date = now
   291  	e.Updater = updater
   292  	e.Fingerprint = fingerprint
   293  	ref := uuid.New()
   294  	s.Lock()
   295  	defer s.Unlock()
   296  	for {
   297  		if _, exist := s.entry[ref]; !exist {
   298  			break
   299  		}
   300  		ref = uuid.New()
   301  	}
   302  	s.entry[ref] = &e
   303  	s.latest[driver.VulnerabilityKind] = ref
   304  	s.ops[updater] = append([]driver.UpdateOperation{{
   305  		Ref:         ref,
   306  		Date:        now,
   307  		Fingerprint: fingerprint,
   308  		Updater:     updater,
   309  		Kind:        driver.VulnerabilityKind,
   310  	}}, s.ops[updater]...)
   311  	return ref, nil
   312  }
   313  
   314  // Copyops assumes all locks are taken care of.
   315  func (s *Store) copyops(ty driver.UpdateKind, us ...string) map[string][]driver.UpdateOperation {
   316  	ns := make(map[string]struct{})
   317  	for _, n := range us {
   318  		ns[n] = struct{}{}
   319  	}
   320  	m := make(map[string][]driver.UpdateOperation, len(s.ops))
   321  	for k, v := range s.ops {
   322  		// If we were passed a set of names and this wasn't in it, pass.
   323  		// If we weren't passed a set of names, do the copy for everything.
   324  		if _, ok := ns[k]; len(ns) != 0 && !ok {
   325  			continue
   326  		}
   327  		n := make([]driver.UpdateOperation, len(v))
   328  		copy(n, v)
   329  		// Filter our copy by type, in place.
   330  		i := 0
   331  		for _, op := range n {
   332  			if op.Kind == ty {
   333  				n[i] = op
   334  				i++
   335  			}
   336  		}
   337  		n = n[:i]
   338  		sort.Slice(n, func(i, j int) bool { return n[i].Date.Before(n[j].Date) })
   339  		m[k] = n
   340  	}
   341  	return m
   342  }
   343  
   344  // GetUpdateOperations returns a list of UpdateOperations in date descending
   345  // order for the given updaters.
   346  //
   347  // The returned map is keyed by Updater implementation's unique names.
   348  //
   349  // If no updaters are specified, all UpdateOperations are returned.
   350  func (s *Store) GetUpdateOperations(_ context.Context, k driver.UpdateKind, us ...string) (map[string][]driver.UpdateOperation, error) {
   351  	s.RLock()
   352  	defer s.RUnlock()
   353  	return s.copyops(k, us...), nil
   354  }
   355  
   356  // GetLatestUpdateRefs reports the latest update reference for every known
   357  // updater.
   358  func (s *Store) GetLatestUpdateRefs(_ context.Context, k driver.UpdateKind) (map[string][]driver.UpdateOperation, error) {
   359  	s.RLock()
   360  	defer s.RUnlock()
   361  	return s.copyops(k), nil
   362  }
   363  
   364  // GetLatestUpdateRef reports the latest update reference of any known
   365  // updater.
   366  func (s *Store) GetLatestUpdateRef(_ context.Context, k driver.UpdateKind) (uuid.UUID, error) {
   367  	s.RLock()
   368  	defer s.RUnlock()
   369  	return s.latest[k], nil
   370  }
   371  
   372  // DeleteUpdateOperations is unimplemented.
   373  func (s *Store) DeleteUpdateOperations(context.Context, ...uuid.UUID) (int64, error) {
   374  	return 0, nil
   375  }
   376  
   377  // GetUpdateDiff is unimplemented.
   378  func (s *Store) GetUpdateDiff(ctx context.Context, prev, cur uuid.UUID) (*driver.UpdateDiff, error) {
   379  	return nil, nil
   380  }
   381  
   382  // Initialized implements vulnstore.Updater.
   383  func (s *Store) Initialized(context.Context) (bool, error) {
   384  	s.RLock()
   385  	defer s.RUnlock()
   386  	return len(s.entry) != 0, nil
   387  }
   388  
   389  // GC is unimplemented.
   390  func (s *Store) GC(_ context.Context, _ int) (int64, error) {
   391  	return 0, nil
   392  }
   393  
   394  // UpdateEnrichments creates a new EnrichmentUpdateOperation, inserts the provided
   395  // EnrichmentRecord(s), and ensures enrichments from previous updates are not
   396  // queries by clients.
   397  func (s *Store) UpdateEnrichments(ctx context.Context, kind string, fp driver.Fingerprint, es []driver.EnrichmentRecord) (uuid.UUID, error) {
   398  	now := time.Now()
   399  	buf, err := diskBuf(ctx)
   400  	if err != nil {
   401  		return uuid.Nil, err
   402  	}
   403  
   404  	enc := json.NewEncoder(buf)
   405  	enc.SetEscapeHTML(false)
   406  	for _, v := range es {
   407  		if err := enc.Encode(v); err != nil {
   408  			return uuid.Nil, err
   409  		}
   410  	}
   411  	if _, err := buf.Seek(0, io.SeekStart); err != nil {
   412  		return uuid.Nil, err
   413  	}
   414  
   415  	e := Entry{
   416  		enrichments:  buf,
   417  		enrichmentCt: len(es),
   418  	}
   419  	e.Date = now
   420  	e.Updater = kind
   421  	e.Fingerprint = fp
   422  	ref := uuid.New()
   423  	s.Lock()
   424  	defer s.Unlock()
   425  	for {
   426  		if _, exist := s.entry[ref]; !exist {
   427  			break
   428  		}
   429  		ref = uuid.New()
   430  	}
   431  	s.latest[driver.EnrichmentKind] = ref
   432  	s.entry[ref] = &e
   433  	s.ops[kind] = append([]driver.UpdateOperation{{
   434  		Ref:         ref,
   435  		Date:        now,
   436  		Fingerprint: fp,
   437  		Updater:     kind,
   438  		Kind:        driver.EnrichmentKind,
   439  	}}, s.ops[kind]...)
   440  	return ref, nil
   441  }
   442  
   443  // RecordUpdaterStatus is unimplemented.
   444  func (s *Store) RecordUpdaterStatus(ctx context.Context, updaterName string, updateTime time.Time, fingerprint driver.Fingerprint, updaterError error) error {
   445  	return nil
   446  }
   447  
   448  // RecordUpdaterSetStatus is unimplemented.
   449  func (s *Store) RecordUpdaterSetStatus(ctx context.Context, updaterSet string, updateTime time.Time) error {
   450  	return nil
   451  }
   452  
   453  // DeltaUpdateVulnerabilities is a noop
   454  func (s *Store) DeltaUpdateVulnerabilities(ctx context.Context, updater string, fingerprint driver.Fingerprint, vulns []*claircore.Vulnerability, deleted []string) (uuid.UUID, error) {
   455  	return uuid.Nil, nil
   456  }
   457  
   458  // UpdateEnrichmentsIter is unimplemented.
   459  func (s *Store) UpdateEnrichmentsIter(_ context.Context, _ string, _ driver.Fingerprint, _ datastore.EnrichmentIter) (uuid.UUID, error) {
   460  	return uuid.Nil, errors.ErrUnsupported
   461  }
   462  
   463  // UpdateVulnerabilitiesIter is unimplemented.
   464  func (s *Store) UpdateVulnerabilitiesIter(_ context.Context, _ string, _ driver.Fingerprint, _ datastore.VulnerabilityIter) (uuid.UUID, error) {
   465  	return uuid.Nil, errors.ErrUnsupported
   466  }
   467  
   468  var bufPool sync.Pool
   469  
   470  func getBuf() []byte {
   471  	const sz = 1 << 20 // 1MiB
   472  	b, ok := bufPool.Get().([]byte)
   473  	if !ok {
   474  		b = make([]byte, sz)
   475  	}
   476  	return b
   477  }
   478  func putBuf(b []byte) {
   479  	bufPool.Put(b)
   480  }