go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/resultdb/internal/services/purger/purger.go (about) 1 // Copyright 2020 The LUCI Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package purger deletes expired test results from Spanner. 16 package purger 17 18 import ( 19 "context" 20 "time" 21 22 "cloud.google.com/go/spanner" 23 24 "go.chromium.org/luci/common/errors" 25 "go.chromium.org/luci/common/logging" 26 "go.chromium.org/luci/server" 27 "go.chromium.org/luci/server/span" 28 29 "go.chromium.org/luci/resultdb/internal/artifacts" 30 "go.chromium.org/luci/resultdb/internal/cron" 31 "go.chromium.org/luci/resultdb/internal/invocations" 32 "go.chromium.org/luci/resultdb/internal/spanutil" 33 ) 34 35 // Options is purger server configuration. 36 type Options struct { 37 // ForceCronInterval forces minimum interval in cron jobs. 38 // Useful in integration tests to reduce the test time. 39 ForceCronInterval time.Duration 40 } 41 42 // InitServer initializes a purger server. 43 func InitServer(srv *server.Server, opts Options) { 44 srv.RunInBackground("resultdb.purge", func(ctx context.Context) { 45 minInterval := time.Minute 46 if opts.ForceCronInterval > 0 { 47 minInterval = opts.ForceCronInterval 48 } 49 run(ctx, minInterval) 50 }) 51 } 52 53 // run continuously purges expired test results. 54 // It blocks until context is canceled. 55 func run(ctx context.Context, minInterval time.Duration) { 56 maxShard, err := invocations.CurrentMaxShard(ctx) 57 switch { 58 case err == spanutil.ErrNoResults: 59 maxShard = invocations.Shards - 1 60 case err != nil: 61 panic(errors.Annotate(err, "failed to determine number of shards").Err()) 62 } 63 64 // Start one cron job for each shard of the database. 65 cron.Group(ctx, maxShard+1, minInterval, purgeOneShard) 66 } 67 68 func purgeOneShard(ctx context.Context, shard int) error { 69 st := spanner.NewStatement(` 70 SELECT InvocationId 71 FROM Invocations@{FORCE_INDEX=InvocationsByExpectedTestResultsExpiration, spanner_emulator.disable_query_null_filtered_index_check=true} 72 WHERE ShardId = @shardId 73 AND ExpectedTestResultsExpirationTime IS NOT NULL 74 AND ExpectedTestResultsExpirationTime <= CURRENT_TIMESTAMP() 75 `) 76 st.Params["shardId"] = shard 77 return spanutil.Query(span.Single(ctx), st, func(row *spanner.Row) error { 78 var id invocations.ID 79 if err := spanutil.FromSpanner(row, &id); err != nil { 80 return err 81 } 82 83 if err := purgeOneInvocation(ctx, id); err != nil { 84 logging.Errorf(ctx, "failed to process %s: %s", id, err) 85 } 86 return nil 87 }) 88 } 89 90 func purgeOneInvocation(ctx context.Context, invID invocations.ID) error { 91 ctx, cancel := span.ReadOnlyTransaction(ctx) 92 defer cancel() 93 94 // Check that invocation hasn't been purged already. 95 var expirationTime spanner.NullTime 96 var realm spanner.NullString 97 err := invocations.ReadColumns(ctx, invID, map[string]any{ 98 "ExpectedTestResultsExpirationTime": &expirationTime, 99 "Realm": &realm, 100 }) 101 if err != nil { 102 return err 103 } 104 if expirationTime.IsNull() { 105 // Invocation was purged by other worker. 106 return nil 107 } 108 109 // Stream rows that need to be purged and delete them in batches. 110 // Note that we cannot use Partitioned UPDATE here because its time complexity 111 // is currently O(table size). 112 // Also Partitioned DML does not support JOINs which we need to purge both 113 // test results and artifacts. 114 var ms []*spanner.Mutation 115 count := 0 116 err = rowsToPurge(ctx, invID, func(table string, key spanner.Key) error { 117 count++ 118 ms = append(ms, spanner.Delete(table, key)) 119 // Flush if the batch is too large. 120 // Cloud Spanner limitation is 20k mutations per txn. 121 // One deletion is one mutation. 122 // Flush at 19k boundary. 123 if len(ms) > 19000 { 124 if _, err := span.Apply(ctx, ms); err != nil { 125 return err 126 } 127 spanutil.IncRowCount(ctx, len(ms), spanutil.TestResults, spanutil.Deleted, realm.StringVal) 128 ms = ms[:0] 129 } 130 return nil 131 }) 132 if err != nil { 133 return err 134 } 135 136 // Flush the last batch. 137 if len(ms) > 0 { 138 if _, err := span.Apply(ctx, ms); err != nil { 139 return err 140 } 141 spanutil.IncRowCount(ctx, len(ms), spanutil.TestResults, spanutil.Deleted, realm.StringVal) 142 } 143 144 // Set the invocation's result expiration to null. 145 if err := unsetInvocationResultsExpiration(ctx, invID); err != nil { 146 return err 147 } 148 149 logging.Debugf(ctx, "Deleted %d test results in %s", count, invID.Name()) 150 return nil 151 } 152 153 // rowsToPurge calls f for rows that should be purged. 154 func rowsToPurge(ctx context.Context, inv invocations.ID, f func(table string, key spanner.Key) error) error { 155 st := spanner.NewStatement(` 156 WITH DoNotPurge AS ( 157 SELECT DISTINCT TestId, VariantHash 158 FROM TestResults@{FORCE_INDEX=UnexpectedTestResults, spanner_emulator.disable_query_null_filtered_index_check=true} 159 WHERE InvocationId = @invocationId 160 AND IsUnexpected = TRUE 161 ) 162 SELECT tr.TestId, tr.ResultId, art.ArtifactId 163 FROM TestResults tr 164 LEFT JOIN DoNotPurge dnp ON tr.TestId = dnp.TestId AND tr.VariantHash = dnp.VariantHash 165 LEFT JOIN Artifacts art 166 ON art.InvocationId = tr.InvocationId AND FORMAT("tr/%s/%s", tr.TestId, tr.ResultId) = art.ParentId 167 WHERE tr.InvocationId = @invocationId 168 AND dnp.VariantHash IS NULL 169 `) 170 171 st.Params["invocationId"] = inv 172 173 var lastTestID, lastResultID string 174 return spanutil.Query(ctx, st, func(row *spanner.Row) error { 175 var testID, resultID string 176 var artifactID spanner.NullString 177 if err := row.Columns(&testID, &resultID, &artifactID); err != nil { 178 return err 179 } 180 181 // Given that we join by TestId and ResultId, result rows with the same 182 // test id and result id will be contiguous. 183 // This is not guaranteed, but happens in practice. 184 // Even if we encounter (testID, resultID) that we've deleted before, this 185 // is OK because a Spanner Delete ignores absence of the target row. 186 // Ultimately, this is an optimization + code simplfication. 187 if testID != lastTestID || resultID != lastResultID { 188 if err := f("TestResults", inv.Key(testID, resultID)); err != nil { 189 return err 190 } 191 192 lastTestID = testID 193 lastResultID = resultID 194 } 195 196 if artifactID.Valid { 197 parentID := artifacts.ParentID(testID, resultID) 198 if err := f("Artifacts", inv.Key(parentID, artifactID)); err != nil { 199 return err 200 } 201 } 202 203 return nil 204 }) 205 } 206 207 func unsetInvocationResultsExpiration(ctx context.Context, id invocations.ID) error { 208 _, err := span.Apply(ctx, []*spanner.Mutation{ 209 spanutil.UpdateMap("Invocations", map[string]any{ 210 "InvocationID": id, 211 "ExpectedTestResultsExpirationTime": nil, 212 }), 213 }) 214 return err 215 }