github.com/quay/claircore@v1.5.28/datastore/postgres/gc.go (about) 1 package postgres 2 3 import ( 4 "context" 5 "errors" 6 "fmt" 7 "runtime" 8 "strings" 9 "time" 10 11 "github.com/google/uuid" 12 "github.com/jackc/pgtype" 13 "github.com/jackc/pgx/v4/pgxpool" 14 "github.com/prometheus/client_golang/prometheus" 15 "github.com/prometheus/client_golang/prometheus/promauto" 16 "github.com/quay/zlog" 17 "golang.org/x/sync/semaphore" 18 ) 19 20 var ( 21 gcCounter = promauto.NewCounterVec( 22 prometheus.CounterOpts{ 23 Namespace: "claircore", 24 Subsystem: "vulnstore", 25 Name: "gc_total", 26 Help: "Total number of database queries issued in the GC method.", 27 }, 28 []string{"query", "success"}, 29 ) 30 gcDuration = promauto.NewHistogramVec( 31 prometheus.HistogramOpts{ 32 Namespace: "claircore", 33 Subsystem: "vulnstore", 34 Name: "gc_duration_seconds", 35 Help: "The duration of all queries issued in the GC method", 36 }, 37 []string{"query"}, 38 ) 39 ) 40 41 const ( 42 // GCThrottle sets a limit for the number of deleted update operations 43 // (and subsequent cascade deletes in the uo_vuln table) that can occur in a GC run. 44 GCThrottle = 50 45 ) 46 47 // GC is split into two phases, first it will identify any update operations 48 // which are older then the provided keep value and delete these. 49 // 50 // Next it will perform updater based deletions of any vulns from the vuln table 51 // which are not longer referenced by update operations. 52 // 53 // The GC is throttled to not overload the database with cascade deletes. 54 // If a full GC is required run this method until the returned int64 value 55 // is 0. 56 func (s *MatcherStore) GC(ctx context.Context, keep int) (int64, error) { 57 // obtain update operations which need deletin' 58 ops, totalOps, err := eligibleUpdateOpts(ctx, s.pool, keep) 59 if err != nil { 60 return 0, err 61 } 62 63 // delete em', but not too many... 64 if totalOps >= GCThrottle { 65 ops = ops[:GCThrottle] 66 } 67 68 deletedOps, err := s.DeleteUpdateOperations(ctx, ops...) 69 if err != nil { 70 return totalOps - deletedOps, err 71 } 72 73 // get all updaters we know about. 74 updaters, err := distinctUpdaters(ctx, s.pool) 75 if err != nil { 76 return totalOps - deletedOps, err 77 } 78 79 // issue concurrent updater-based deletion for known updaters 80 // limit concurrency by available goroutines. 81 cpus := int64(runtime.GOMAXPROCS(0)) 82 sem := semaphore.NewWeighted(cpus) 83 84 errC := make(chan error, len(updaters)) 85 86 for _, updater := range updaters { 87 err = sem.Acquire(ctx, 1) 88 if err != nil { 89 break 90 } 91 go func(u string) { 92 defer sem.Release(1) 93 err := vulnCleanup(ctx, s.pool, u) 94 if err != nil { 95 errC <- err 96 } 97 }(updater) 98 } 99 100 // unconditionally wait for all in-flight go routines to return. 101 // the use of context.Background and lack of error checking is intentional. 102 // all in-flight go routines are guarantee to release their sems. 103 sem.Acquire(context.Background(), cpus) 104 105 close(errC) 106 if len(errC) > 0 { 107 b := strings.Builder{} 108 b.WriteString("encountered the following errors during gc: \n") 109 for e := range errC { 110 b.WriteString(e.Error() + "\n") 111 } 112 return totalOps - deletedOps, errors.New(b.String()) 113 } 114 return totalOps - deletedOps, nil 115 } 116 117 // distinctUpdaters returns all updaters which have registered an update 118 // operation. 119 func distinctUpdaters(ctx context.Context, pool *pgxpool.Pool) ([]string, error) { 120 const ( 121 // will always contain at least two update operations 122 selectUpdaters = ` 123 SELECT DISTINCT(updater) FROM update_operation; 124 ` 125 ) 126 rows, err := pool.Query(ctx, selectUpdaters) 127 if err != nil { 128 return nil, fmt.Errorf("error selecting distinct updaters: %v", err) 129 } 130 defer rows.Close() 131 132 var updaters []string 133 for rows.Next() { 134 var updater string 135 err := rows.Scan(&updater) 136 switch err { 137 case nil: 138 // hop out 139 default: 140 return nil, fmt.Errorf("error scanning updater: %v", err) 141 } 142 updaters = append(updaters, updater) 143 } 144 if rows.Err() != nil { 145 return nil, rows.Err() 146 } 147 return updaters, nil 148 } 149 150 // eligibleUpdateOpts returns a list of update operation refs which exceed the specified 151 // keep value. 152 func eligibleUpdateOpts(ctx context.Context, pool *pgxpool.Pool, keep int) ([]uuid.UUID, int64, error) { 153 const ( 154 // this query will return rows of UUID arrays. 155 // each returned array are the UUIDs which exceed the provided keep value 156 updateOps = ` 157 WITH ordered_ops AS ( 158 SELECT array_agg(ref ORDER BY date DESC) AS refs FROM update_operation GROUP BY updater 159 ) 160 SELECT ordered_ops.refs[$1:] 161 FROM ordered_ops 162 WHERE array_length(ordered_ops.refs, 1) > $2; 163 ` 164 ) 165 166 // gather any update operations exceeding our keep value. 167 // keep+1 is used because PG's array slicing is inclusive, 168 // we want to grab all items once after our keep value. 169 m := []uuid.UUID{} 170 171 start := time.Now() 172 rows, err := pool.Query(ctx, updateOps, keep+1, keep) 173 switch err { 174 case nil: 175 default: 176 gcCounter.WithLabelValues("updateOps", "false").Inc() 177 return nil, 0, fmt.Errorf("error querying for update operations: %v", err) 178 } 179 180 gcCounter.WithLabelValues("updateOps", "true").Inc() 181 gcDuration.WithLabelValues("updateOps").Observe(time.Since(start).Seconds()) 182 183 defer rows.Close() 184 for rows.Next() { 185 // pgx will not scan directly into a []uuid.UUID 186 tmp := pgtype.UUIDArray{} 187 err := rows.Scan(&tmp) 188 if err != nil { 189 return nil, 0, fmt.Errorf("error scanning update operations: %w", err) 190 } 191 for _, u := range tmp.Elements { 192 m = append(m, u.Bytes) // this works since [16]byte value is assignable to uuid.UUID 193 } 194 } 195 if rows.Err() != nil { 196 return nil, 0, rows.Err() 197 } 198 return m, int64(len(m)), nil 199 } 200 201 func vulnCleanup(ctx context.Context, pool *pgxpool.Pool, updater string) error { 202 const ( 203 deleteOrphanedVulns = ` 204 DELETE FROM vuln v1 USING 205 vuln v2 206 LEFT JOIN uo_vuln uvl 207 ON v2.id = uvl.vuln 208 WHERE uvl.vuln IS NULL 209 AND v2.updater = $1 210 AND v1.id = v2.id; 211 ` 212 ) 213 214 start := time.Now() 215 ctx = zlog.ContextWithValues(ctx, "updater", updater) 216 zlog.Debug(ctx). 217 Msg("starting clean up") 218 res, err := pool.Exec(ctx, deleteOrphanedVulns, updater) 219 if err != nil { 220 gcCounter.WithLabelValues("deleteVulns", "false").Inc() 221 return fmt.Errorf("failed while exec'ing vuln delete: %w", err) 222 } 223 zlog.Debug(ctx).Int64("rows affected", res.RowsAffected()).Msg("vulns deleted") 224 gcCounter.WithLabelValues("deleteVulns", "true").Inc() 225 gcDuration.WithLabelValues("deleteVulns").Observe(time.Since(start).Seconds()) 226 227 return nil 228 }