github.com/quay/claircore@v1.5.28/datastore/postgres/updatevulnerabilities.go (about)

     1  package postgres
     2  
     3  import (
     4  	"bytes"
     5  	"context"
     6  	"crypto/md5"
     7  	"fmt"
     8  	"strconv"
     9  	"strings"
    10  	"time"
    11  
    12  	"github.com/google/uuid"
    13  	"github.com/prometheus/client_golang/prometheus"
    14  	"github.com/prometheus/client_golang/prometheus/promauto"
    15  	"github.com/quay/zlog"
    16  
    17  	"github.com/quay/claircore"
    18  	"github.com/quay/claircore/datastore"
    19  	"github.com/quay/claircore/libvuln/driver"
    20  	"github.com/quay/claircore/pkg/microbatch"
    21  )
    22  
    23  var (
    24  	zeroRepo claircore.Repository
    25  	zeroDist claircore.Distribution
    26  )
    27  
    28  var (
    29  	updateVulnerabilitiesCounter = promauto.NewCounterVec(
    30  		prometheus.CounterOpts{
    31  			Namespace: "claircore",
    32  			Subsystem: "vulnstore",
    33  			Name:      "updatevulnerabilities_total",
    34  			Help:      "Total number of database queries issued in the updateVulnerabilities method.",
    35  		},
    36  		[]string{"query", "is_delta"},
    37  	)
    38  	updateVulnerabilitiesDuration = promauto.NewHistogramVec(
    39  		prometheus.HistogramOpts{
    40  			Namespace: "claircore",
    41  			Subsystem: "vulnstore",
    42  			Name:      "updatevulnerabilities_duration_seconds",
    43  			Help:      "The duration of all queries issued in the updateVulnerabilities method",
    44  		},
    45  		[]string{"query", "is_delta"},
    46  	)
    47  )
    48  
    49  // UpdateVulnerabilitiesIter implements vulnstore.Updater.
    50  func (s *MatcherStore) UpdateVulnerabilitiesIter(ctx context.Context, updater string, fp driver.Fingerprint, it datastore.VulnerabilityIter) (uuid.UUID, error) {
    51  	ctx = zlog.ContextWithValues(ctx, "component", "datastore/postgres/MatcherStore.UpdateVulnerabilitiesIter")
    52  	return s.updateVulnerabilities(ctx, updater, fp, it, nil)
    53  }
    54  
    55  // UpdateVulnerabilities implements vulnstore.Updater.
    56  //
    57  // It creates a new UpdateOperation for this update call, inserts the
    58  // provided vulnerabilities and computes a diff comprising the removed
    59  // and added vulnerabilities for this UpdateOperation.
    60  func (s *MatcherStore) UpdateVulnerabilities(ctx context.Context, updater string, fp driver.Fingerprint, vulns []*claircore.Vulnerability) (uuid.UUID, error) {
    61  	ctx = zlog.ContextWithValues(ctx, "component", "datastore/postgres/MatcherStore.UpdateVulnerabilities")
    62  	iterVulns := func(yield func(*claircore.Vulnerability, error) bool) {
    63  		for i := range vulns {
    64  			if !yield(vulns[i], nil) {
    65  				break
    66  			}
    67  		}
    68  	}
    69  	return s.updateVulnerabilities(ctx, updater, fp, iterVulns, nil)
    70  }
    71  
    72  // DeltaUpdateVulnerabilities implements vulnstore.Updater.
    73  //
    74  // It is similar to UpdateVulnerabilities but support processing of
    75  // partial data as opposed to needing an entire vulnerability database
    76  // Order of operations:
    77  //   - Create a new UpdateOperation
    78  //   - Query existing vulnerabilities for the updater
    79  //   - Discount and vulnerabilities with newer updates and deleted vulnerabilities
    80  //   - Update the associated updateOperation for the remaining existing vulnerabilities
    81  //   - Insert the new vulnerabilities
    82  //   - Associate new vulnerabilities with new updateOperation
    83  func (s *MatcherStore) DeltaUpdateVulnerabilities(ctx context.Context, updater string, fingerprint driver.Fingerprint, vulns []*claircore.Vulnerability, deletedVulns []string) (uuid.UUID, error) {
    84  	ctx = zlog.ContextWithValues(ctx, "component", "datastore/postgres/MatcherStore.DeltaUpdateVulnerabilities")
    85  	iterVulns := func(yield func(*claircore.Vulnerability, error) bool) {
    86  		for i := range vulns {
    87  			if !yield(vulns[i], nil) {
    88  				break
    89  			}
    90  		}
    91  	}
    92  	delVulns := func(yield func(string, error) bool) {
    93  		for _, s := range deletedVulns {
    94  			if !yield(s, nil) {
    95  				break
    96  			}
    97  		}
    98  	}
    99  	return s.updateVulnerabilities(ctx, updater, fingerprint, iterVulns, delVulns)
   100  }
   101  
   102  func (s *MatcherStore) updateVulnerabilities(ctx context.Context, updater string, fingerprint driver.Fingerprint, vulnIter datastore.VulnerabilityIter, delIter datastore.Iter[string]) (uuid.UUID, error) {
   103  	const (
   104  		// Create makes a new update operation and returns the reference and ID.
   105  		create = `INSERT INTO update_operation (updater, fingerprint, kind) VALUES ($1, $2, 'vulnerability') RETURNING id, ref;`
   106  		// Select existing vulnerabilities that are associated with the latest_update_operation.
   107  		selectExisting = `
   108  		SELECT
   109  			"name",
   110  			"vuln"."id"
   111  		FROM
   112  			"vuln"
   113  			INNER JOIN "uo_vuln" ON ("vuln"."id" = "uo_vuln"."vuln")
   114  			INNER JOIN "latest_update_operations" ON (
   115  			"latest_update_operations"."id" = "uo_vuln"."uo"
   116  			)
   117  		WHERE
   118  			(
   119  			"latest_update_operations"."kind" = 'vulnerability'
   120  			)
   121  		AND
   122  			(
   123  			"vuln"."updater" = $1
   124  			)`
   125  		// assocExisting associates existing vulnerabilities with new update operations
   126  		assocExisting = `INSERT INTO uo_vuln (uo, vuln) VALUES ($1, $2) ON CONFLICT DO NOTHING;`
   127  		// Insert attempts to create a new vulnerability. It fails silently.
   128  		insert = `
   129  		INSERT INTO vuln (
   130  			hash_kind, hash,
   131  			name, updater, description, issued, links, severity, normalized_severity,
   132  			package_name, package_version, package_module, package_arch, package_kind,
   133  			dist_id, dist_name, dist_version, dist_version_code_name, dist_version_id, dist_arch, dist_cpe, dist_pretty_name,
   134  			repo_name, repo_key, repo_uri,
   135  			fixed_in_version, arch_operation, version_kind, vulnerable_range
   136  		) VALUES (
   137  		  $1, $2,
   138  		  $3, $4, $5, $6, $7, $8, $9,
   139  		  $10, $11, $12, $13, $14,
   140  		  $15, $16, $17, $18, $19, $20, $21, $22,
   141  		  $23, $24, $25,
   142  		  $26, $27, $28, VersionRange($29, $30)
   143  		)
   144  		ON CONFLICT (hash_kind, hash) DO NOTHING;`
   145  		// Assoc associates an update operation and a vulnerability. It fails
   146  		// silently.
   147  		assoc = `
   148  		INSERT INTO uo_vuln (uo, vuln) VALUES (
   149  			$3,
   150  			(SELECT id FROM vuln WHERE hash_kind = $1 AND hash = $2))
   151  		ON CONFLICT DO NOTHING;`
   152  		refreshView = `REFRESH MATERIALIZED VIEW CONCURRENTLY latest_update_operations;`
   153  	)
   154  
   155  	var uoID uint64
   156  	var ref uuid.UUID
   157  
   158  	start := time.Now()
   159  
   160  	tx, err := s.pool.Begin(ctx)
   161  	if err != nil {
   162  		return uuid.Nil, fmt.Errorf("unable to start transaction: %w", err)
   163  	}
   164  	defer tx.Rollback(ctx)
   165  
   166  	if err := tx.QueryRow(ctx, create, updater, string(fingerprint)).Scan(&uoID, &ref); err != nil {
   167  		return uuid.Nil, fmt.Errorf("failed to create update_operation: %w", err)
   168  	}
   169  
   170  	delta := delIter != nil
   171  	updateVulnerabilitiesCounter.WithLabelValues("create", strconv.FormatBool(delta)).Add(1)
   172  	updateVulnerabilitiesDuration.WithLabelValues("create", strconv.FormatBool(delta)).Observe(time.Since(start).Seconds())
   173  
   174  	zlog.Debug(ctx).
   175  		Str("ref", ref.String()).
   176  		Msg("update_operation created")
   177  
   178  	if delta {
   179  		ctx = zlog.ContextWithValues(ctx, "mode", "delta")
   180  		// Get existing vulns
   181  		// The reason this still works even though the new update_operation
   182  		// is already created is because the latest_update_operation view isn't updated until
   183  		// the end of this function.
   184  		start = time.Now()
   185  		rows, err := s.pool.Query(ctx, selectExisting, updater)
   186  		if err != nil {
   187  			return uuid.Nil, fmt.Errorf("failed to get existing vulns: %w", err)
   188  		}
   189  		defer rows.Close()
   190  		updateVulnerabilitiesCounter.WithLabelValues("selectExisting", strconv.FormatBool(delta)).Add(1)
   191  		updateVulnerabilitiesDuration.WithLabelValues("selectExisting", strconv.FormatBool(delta)).Observe(time.Since(start).Seconds())
   192  
   193  		oldVulns := make(map[string][]string)
   194  		for rows.Next() {
   195  			var tmpID int64
   196  			var ID, name string
   197  			err := rows.Scan(
   198  				&name,
   199  				&tmpID,
   200  			)
   201  
   202  			ID = strconv.FormatInt(tmpID, 10)
   203  			if err != nil {
   204  				return uuid.Nil, fmt.Errorf("failed to scan vulnerability: %w", err)
   205  			}
   206  			oldVulns[name] = append(oldVulns[name], ID)
   207  		}
   208  		if err := rows.Err(); err != nil {
   209  			return uuid.Nil, fmt.Errorf("error reading existing vulnerabilities: %w", err)
   210  		}
   211  
   212  		if len(oldVulns) > 0 {
   213  			vulnIter(func(v *claircore.Vulnerability, _ error) bool {
   214  				// If we have an existing vuln in the new batch
   215  				// delete it from the oldVulns map so it doesn't
   216  				// get associated with the new update_operation.
   217  				delete(oldVulns, v.Name)
   218  				return true
   219  			})
   220  			delIter(func(delName string, _ error) bool {
   221  				// If we have an existing vuln that has been signaled
   222  				// as deleted by the updater then delete it so it doesn't
   223  				// get associated with the new update_operation.
   224  				delete(oldVulns, delName)
   225  				return true
   226  			})
   227  		}
   228  		start = time.Now()
   229  		// Associate already existing vulnerabilities with new update_operation.
   230  		for _, vs := range oldVulns {
   231  			for _, vID := range vs {
   232  				_, err := tx.Exec(ctx, assocExisting, uoID, vID)
   233  				if err != nil {
   234  					return uuid.Nil, fmt.Errorf("could not update old vulnerability with new UO: %w", err)
   235  				}
   236  			}
   237  		}
   238  		updateVulnerabilitiesCounter.WithLabelValues("assocExisting", strconv.FormatBool(delta)).Add(float64(len(oldVulns)))
   239  		updateVulnerabilitiesDuration.WithLabelValues("assocExisting", strconv.FormatBool(delta)).Observe(time.Since(start).Seconds())
   240  
   241  	}
   242  
   243  	// batch insert vulnerabilities
   244  	skipCt := 0
   245  	vulnCt := 0
   246  	start = time.Now()
   247  
   248  	mBatcher := microbatch.NewInsert(tx, 2000, time.Minute)
   249  
   250  	vulnIter(func(vuln *claircore.Vulnerability, iterErr error) bool {
   251  		if iterErr != nil {
   252  			err = iterErr
   253  			return false
   254  		}
   255  		vulnCt++
   256  		if vuln.Package == nil || vuln.Package.Name == "" {
   257  			skipCt++
   258  			return true
   259  		}
   260  
   261  		pkg := vuln.Package
   262  		dist := vuln.Dist
   263  		repo := vuln.Repo
   264  		if dist == nil {
   265  			dist = &zeroDist
   266  		}
   267  		if repo == nil {
   268  			repo = &zeroRepo
   269  		}
   270  		hashKind, hash := md5Vuln(vuln)
   271  		vKind, vrLower, vrUpper := rangefmt(vuln.Range)
   272  
   273  		err = mBatcher.Queue(ctx, insert,
   274  			hashKind, hash,
   275  			vuln.Name, vuln.Updater, vuln.Description, vuln.Issued, vuln.Links, vuln.Severity, vuln.NormalizedSeverity,
   276  			pkg.Name, pkg.Version, pkg.Module, pkg.Arch, pkg.Kind,
   277  			dist.DID, dist.Name, dist.Version, dist.VersionCodeName, dist.VersionID, dist.Arch, dist.CPE, dist.PrettyName,
   278  			repo.Name, repo.Key, repo.URI,
   279  			vuln.FixedInVersion, vuln.ArchOperation, vKind, vrLower, vrUpper,
   280  		)
   281  		if err != nil {
   282  			err = fmt.Errorf("failed to queue vulnerability: %w", err)
   283  			return false
   284  		}
   285  
   286  		err = mBatcher.Queue(ctx, assoc, hashKind, hash, uoID)
   287  		if err != nil {
   288  			err = fmt.Errorf("failed to queue association: %w", err)
   289  			return false
   290  		}
   291  
   292  		return true
   293  	})
   294  	if err != nil {
   295  		return uuid.Nil, fmt.Errorf("iterating on vulnerabilities: %w", err)
   296  	}
   297  	if err := mBatcher.Done(ctx); err != nil {
   298  		return uuid.Nil, fmt.Errorf("failed to finish batch vulnerability insert: %w", err)
   299  	}
   300  
   301  	updateVulnerabilitiesCounter.WithLabelValues("insert_batch", strconv.FormatBool(delta)).Add(1)
   302  	updateVulnerabilitiesDuration.WithLabelValues("insert_batch", strconv.FormatBool(delta)).Observe(time.Since(start).Seconds())
   303  
   304  	if err := tx.Commit(ctx); err != nil {
   305  		return uuid.Nil, fmt.Errorf("failed to commit transaction: %w", err)
   306  	}
   307  	if _, err = s.pool.Exec(ctx, refreshView); err != nil {
   308  		return uuid.Nil, fmt.Errorf("could not refresh latest_update_operations: %w", err)
   309  	}
   310  
   311  	zlog.Debug(ctx).
   312  		Str("ref", ref.String()).
   313  		Int("skipped", skipCt).
   314  		Int("inserted", vulnCt-skipCt).
   315  		Msg("update_operation committed")
   316  	return ref, nil
   317  }
   318  
   319  // Md5Vuln creates an md5 hash from the members of the passed-in Vulnerability,
   320  // giving us a stable, context-free identifier for this revision of the
   321  // Vulnerability.
   322  func md5Vuln(v *claircore.Vulnerability) (string, []byte) {
   323  	var b bytes.Buffer
   324  	b.WriteString(v.Name)
   325  	b.WriteString(v.Description)
   326  	b.WriteString(v.Issued.String())
   327  	b.WriteString(v.Links)
   328  	b.WriteString(v.Severity)
   329  	if v.Package != nil {
   330  		b.WriteString(v.Package.Name)
   331  		b.WriteString(v.Package.Version)
   332  		b.WriteString(v.Package.Module)
   333  		b.WriteString(v.Package.Arch)
   334  		b.WriteString(v.Package.Kind)
   335  	}
   336  	if v.Dist != nil {
   337  		b.WriteString(v.Dist.DID)
   338  		b.WriteString(v.Dist.Name)
   339  		b.WriteString(v.Dist.Version)
   340  		b.WriteString(v.Dist.VersionCodeName)
   341  		b.WriteString(v.Dist.VersionID)
   342  		b.WriteString(v.Dist.Arch)
   343  		b.WriteString(v.Dist.CPE.BindFS())
   344  		b.WriteString(v.Dist.PrettyName)
   345  	}
   346  	if v.Repo != nil {
   347  		b.WriteString(v.Repo.Name)
   348  		b.WriteString(v.Repo.Key)
   349  		b.WriteString(v.Repo.URI)
   350  	}
   351  	b.WriteString(v.ArchOperation.String())
   352  	b.WriteString(v.FixedInVersion)
   353  	if k, l, u := rangefmt(v.Range); k != nil {
   354  		b.WriteString(*k)
   355  		b.WriteString(l)
   356  		b.WriteString(u)
   357  	}
   358  	s := md5.Sum(b.Bytes())
   359  	return "md5", s[:]
   360  }
   361  
   362  func rangefmt(r *claircore.Range) (kind *string, lower, upper string) {
   363  	lower, upper = "{}", "{}"
   364  	if r == nil || r.Lower.Kind != r.Upper.Kind {
   365  		return kind, lower, upper
   366  	}
   367  
   368  	kind = &r.Lower.Kind // Just tested the both kinds are the same.
   369  	v := &r.Lower
   370  	var buf strings.Builder
   371  	b := make([]byte, 0, 16) // 16 byte wide scratch buffer
   372  
   373  	buf.WriteByte('{')
   374  	for i := 0; i < 10; i++ {
   375  		if i != 0 {
   376  			buf.WriteByte(',')
   377  		}
   378  		buf.Write(strconv.AppendInt(b, int64(v.V[i]), 10))
   379  	}
   380  	buf.WriteByte('}')
   381  	lower = buf.String()
   382  	buf.Reset()
   383  	v = &r.Upper
   384  	buf.WriteByte('{')
   385  	for i := 0; i < 10; i++ {
   386  		if i != 0 {
   387  			buf.WriteByte(',')
   388  		}
   389  		buf.Write(strconv.AppendInt(b, int64(v.V[i]), 10))
   390  	}
   391  	buf.WriteByte('}')
   392  	upper = buf.String()
   393  
   394  	return kind, lower, upper
   395  }