github.com/quay/claircore@v1.5.28/datastore/postgres/gc.go (about)

     1  package postgres
     2  
     3  import (
     4  	"context"
     5  	"errors"
     6  	"fmt"
     7  	"runtime"
     8  	"strings"
     9  	"time"
    10  
    11  	"github.com/google/uuid"
    12  	"github.com/jackc/pgtype"
    13  	"github.com/jackc/pgx/v4/pgxpool"
    14  	"github.com/prometheus/client_golang/prometheus"
    15  	"github.com/prometheus/client_golang/prometheus/promauto"
    16  	"github.com/quay/zlog"
    17  	"golang.org/x/sync/semaphore"
    18  )
    19  
    20  var (
    21  	gcCounter = promauto.NewCounterVec(
    22  		prometheus.CounterOpts{
    23  			Namespace: "claircore",
    24  			Subsystem: "vulnstore",
    25  			Name:      "gc_total",
    26  			Help:      "Total number of database queries issued in the GC method.",
    27  		},
    28  		[]string{"query", "success"},
    29  	)
    30  	gcDuration = promauto.NewHistogramVec(
    31  		prometheus.HistogramOpts{
    32  			Namespace: "claircore",
    33  			Subsystem: "vulnstore",
    34  			Name:      "gc_duration_seconds",
    35  			Help:      "The duration of all queries issued in the GC method",
    36  		},
    37  		[]string{"query"},
    38  	)
    39  )
    40  
    41  const (
    42  	// GCThrottle sets a limit for the number of deleted update operations
    43  	// (and subsequent cascade deletes in the uo_vuln table) that can occur in a GC run.
    44  	GCThrottle = 50
    45  )
    46  
    47  // GC is split into two phases, first it will identify any update operations
    48  // which are older then the provided keep value and delete these.
    49  //
    50  // Next it will perform updater based deletions of any vulns from the vuln table
    51  // which are not longer referenced by update operations.
    52  //
    53  // The GC is throttled to not overload the database with cascade deletes.
    54  // If a full GC is required run this method until the returned int64 value
    55  // is 0.
    56  func (s *MatcherStore) GC(ctx context.Context, keep int) (int64, error) {
    57  	// obtain update operations which need deletin'
    58  	ops, totalOps, err := eligibleUpdateOpts(ctx, s.pool, keep)
    59  	if err != nil {
    60  		return 0, err
    61  	}
    62  
    63  	// delete em', but not too many...
    64  	if totalOps >= GCThrottle {
    65  		ops = ops[:GCThrottle]
    66  	}
    67  
    68  	deletedOps, err := s.DeleteUpdateOperations(ctx, ops...)
    69  	if err != nil {
    70  		return totalOps - deletedOps, err
    71  	}
    72  
    73  	// get all updaters we know about.
    74  	updaters, err := distinctUpdaters(ctx, s.pool)
    75  	if err != nil {
    76  		return totalOps - deletedOps, err
    77  	}
    78  
    79  	// issue concurrent updater-based deletion for known updaters
    80  	// limit concurrency by available goroutines.
    81  	cpus := int64(runtime.GOMAXPROCS(0))
    82  	sem := semaphore.NewWeighted(cpus)
    83  
    84  	errC := make(chan error, len(updaters))
    85  
    86  	for _, updater := range updaters {
    87  		err = sem.Acquire(ctx, 1)
    88  		if err != nil {
    89  			break
    90  		}
    91  		go func(u string) {
    92  			defer sem.Release(1)
    93  			err := vulnCleanup(ctx, s.pool, u)
    94  			if err != nil {
    95  				errC <- err
    96  			}
    97  		}(updater)
    98  	}
    99  
   100  	// unconditionally wait for all in-flight go routines to return.
   101  	// the use of context.Background and lack of error checking is intentional.
   102  	// all in-flight go routines are guarantee to release their sems.
   103  	sem.Acquire(context.Background(), cpus)
   104  
   105  	close(errC)
   106  	if len(errC) > 0 {
   107  		b := strings.Builder{}
   108  		b.WriteString("encountered the following errors during gc: \n")
   109  		for e := range errC {
   110  			b.WriteString(e.Error() + "\n")
   111  		}
   112  		return totalOps - deletedOps, errors.New(b.String())
   113  	}
   114  	return totalOps - deletedOps, nil
   115  }
   116  
   117  // distinctUpdaters returns all updaters which have registered an update
   118  // operation.
   119  func distinctUpdaters(ctx context.Context, pool *pgxpool.Pool) ([]string, error) {
   120  	const (
   121  		// will always contain at least two update operations
   122  		selectUpdaters = `
   123  SELECT DISTINCT(updater) FROM update_operation;
   124  `
   125  	)
   126  	rows, err := pool.Query(ctx, selectUpdaters)
   127  	if err != nil {
   128  		return nil, fmt.Errorf("error selecting distinct updaters: %v", err)
   129  	}
   130  	defer rows.Close()
   131  
   132  	var updaters []string
   133  	for rows.Next() {
   134  		var updater string
   135  		err := rows.Scan(&updater)
   136  		switch err {
   137  		case nil:
   138  			// hop out
   139  		default:
   140  			return nil, fmt.Errorf("error scanning updater: %v", err)
   141  		}
   142  		updaters = append(updaters, updater)
   143  	}
   144  	if rows.Err() != nil {
   145  		return nil, rows.Err()
   146  	}
   147  	return updaters, nil
   148  }
   149  
   150  // eligibleUpdateOpts returns a list of update operation refs which exceed the specified
   151  // keep value.
   152  func eligibleUpdateOpts(ctx context.Context, pool *pgxpool.Pool, keep int) ([]uuid.UUID, int64, error) {
   153  	const (
   154  		// this query will return rows of UUID arrays.
   155  		// each returned array are the UUIDs which exceed the provided keep value
   156  		updateOps = `
   157  WITH ordered_ops AS (
   158      SELECT array_agg(ref ORDER BY date DESC) AS refs FROM update_operation GROUP BY updater
   159  )
   160  SELECT ordered_ops.refs[$1:]
   161  FROM ordered_ops
   162  WHERE array_length(ordered_ops.refs, 1) > $2;
   163  `
   164  	)
   165  
   166  	// gather any update operations exceeding our keep value.
   167  	// keep+1 is used because PG's array slicing is inclusive,
   168  	// we want to grab all items once after our keep value.
   169  	m := []uuid.UUID{}
   170  
   171  	start := time.Now()
   172  	rows, err := pool.Query(ctx, updateOps, keep+1, keep)
   173  	switch err {
   174  	case nil:
   175  	default:
   176  		gcCounter.WithLabelValues("updateOps", "false").Inc()
   177  		return nil, 0, fmt.Errorf("error querying for update operations: %v", err)
   178  	}
   179  
   180  	gcCounter.WithLabelValues("updateOps", "true").Inc()
   181  	gcDuration.WithLabelValues("updateOps").Observe(time.Since(start).Seconds())
   182  
   183  	defer rows.Close()
   184  	for rows.Next() {
   185  		// pgx will not scan directly into a []uuid.UUID
   186  		tmp := pgtype.UUIDArray{}
   187  		err := rows.Scan(&tmp)
   188  		if err != nil {
   189  			return nil, 0, fmt.Errorf("error scanning update operations: %w", err)
   190  		}
   191  		for _, u := range tmp.Elements {
   192  			m = append(m, u.Bytes) // this works since [16]byte value is assignable to uuid.UUID
   193  		}
   194  	}
   195  	if rows.Err() != nil {
   196  		return nil, 0, rows.Err()
   197  	}
   198  	return m, int64(len(m)), nil
   199  }
   200  
   201  func vulnCleanup(ctx context.Context, pool *pgxpool.Pool, updater string) error {
   202  	const (
   203  		deleteOrphanedVulns = `
   204  DELETE FROM vuln v1 USING
   205  	vuln v2
   206  	LEFT JOIN uo_vuln uvl
   207  		ON v2.id = uvl.vuln
   208  	WHERE uvl.vuln IS NULL
   209  	AND v2.updater = $1
   210  AND v1.id = v2.id;
   211  `
   212  	)
   213  
   214  	start := time.Now()
   215  	ctx = zlog.ContextWithValues(ctx, "updater", updater)
   216  	zlog.Debug(ctx).
   217  		Msg("starting clean up")
   218  	res, err := pool.Exec(ctx, deleteOrphanedVulns, updater)
   219  	if err != nil {
   220  		gcCounter.WithLabelValues("deleteVulns", "false").Inc()
   221  		return fmt.Errorf("failed while exec'ing vuln delete: %w", err)
   222  	}
   223  	zlog.Debug(ctx).Int64("rows affected", res.RowsAffected()).Msg("vulns deleted")
   224  	gcCounter.WithLabelValues("deleteVulns", "true").Inc()
   225  	gcDuration.WithLabelValues("deleteVulns").Observe(time.Since(start).Seconds())
   226  
   227  	return nil
   228  }