go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/resultdb/internal/services/purger/purger.go (about)

     1  // Copyright 2020 The LUCI Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Package purger deletes expired test results from Spanner.
    16  package purger
    17  
    18  import (
    19  	"context"
    20  	"time"
    21  
    22  	"cloud.google.com/go/spanner"
    23  
    24  	"go.chromium.org/luci/common/errors"
    25  	"go.chromium.org/luci/common/logging"
    26  	"go.chromium.org/luci/server"
    27  	"go.chromium.org/luci/server/span"
    28  
    29  	"go.chromium.org/luci/resultdb/internal/artifacts"
    30  	"go.chromium.org/luci/resultdb/internal/cron"
    31  	"go.chromium.org/luci/resultdb/internal/invocations"
    32  	"go.chromium.org/luci/resultdb/internal/spanutil"
    33  )
    34  
    35  // Options is purger server configuration.
    36  type Options struct {
    37  	// ForceCronInterval forces minimum interval in cron jobs.
    38  	// Useful in integration tests to reduce the test time.
    39  	ForceCronInterval time.Duration
    40  }
    41  
    42  // InitServer initializes a purger server.
    43  func InitServer(srv *server.Server, opts Options) {
    44  	srv.RunInBackground("resultdb.purge", func(ctx context.Context) {
    45  		minInterval := time.Minute
    46  		if opts.ForceCronInterval > 0 {
    47  			minInterval = opts.ForceCronInterval
    48  		}
    49  		run(ctx, minInterval)
    50  	})
    51  }
    52  
    53  // run continuously purges expired test results.
    54  // It blocks until context is canceled.
    55  func run(ctx context.Context, minInterval time.Duration) {
    56  	maxShard, err := invocations.CurrentMaxShard(ctx)
    57  	switch {
    58  	case err == spanutil.ErrNoResults:
    59  		maxShard = invocations.Shards - 1
    60  	case err != nil:
    61  		panic(errors.Annotate(err, "failed to determine number of shards").Err())
    62  	}
    63  
    64  	// Start one cron job for each shard of the database.
    65  	cron.Group(ctx, maxShard+1, minInterval, purgeOneShard)
    66  }
    67  
    68  func purgeOneShard(ctx context.Context, shard int) error {
    69  	st := spanner.NewStatement(`
    70  		SELECT InvocationId
    71  		FROM Invocations@{FORCE_INDEX=InvocationsByExpectedTestResultsExpiration, spanner_emulator.disable_query_null_filtered_index_check=true}
    72  		WHERE ShardId = @shardId
    73  		AND ExpectedTestResultsExpirationTime IS NOT NULL
    74  		AND ExpectedTestResultsExpirationTime <= CURRENT_TIMESTAMP()
    75  	`)
    76  	st.Params["shardId"] = shard
    77  	return spanutil.Query(span.Single(ctx), st, func(row *spanner.Row) error {
    78  		var id invocations.ID
    79  		if err := spanutil.FromSpanner(row, &id); err != nil {
    80  			return err
    81  		}
    82  
    83  		if err := purgeOneInvocation(ctx, id); err != nil {
    84  			logging.Errorf(ctx, "failed to process %s: %s", id, err)
    85  		}
    86  		return nil
    87  	})
    88  }
    89  
    90  func purgeOneInvocation(ctx context.Context, invID invocations.ID) error {
    91  	ctx, cancel := span.ReadOnlyTransaction(ctx)
    92  	defer cancel()
    93  
    94  	// Check that invocation hasn't been purged already.
    95  	var expirationTime spanner.NullTime
    96  	var realm spanner.NullString
    97  	err := invocations.ReadColumns(ctx, invID, map[string]any{
    98  		"ExpectedTestResultsExpirationTime": &expirationTime,
    99  		"Realm":                             &realm,
   100  	})
   101  	if err != nil {
   102  		return err
   103  	}
   104  	if expirationTime.IsNull() {
   105  		// Invocation was purged by other worker.
   106  		return nil
   107  	}
   108  
   109  	// Stream rows that need to be purged and delete them in batches.
   110  	// Note that we cannot use Partitioned UPDATE here because its time complexity
   111  	// is currently O(table size).
   112  	// Also Partitioned DML does not support JOINs which we need to purge both
   113  	// test results and artifacts.
   114  	var ms []*spanner.Mutation
   115  	count := 0
   116  	err = rowsToPurge(ctx, invID, func(table string, key spanner.Key) error {
   117  		count++
   118  		ms = append(ms, spanner.Delete(table, key))
   119  		// Flush if the batch is too large.
   120  		// Cloud Spanner limitation is 20k mutations per txn.
   121  		// One deletion is one mutation.
   122  		// Flush at 19k boundary.
   123  		if len(ms) > 19000 {
   124  			if _, err := span.Apply(ctx, ms); err != nil {
   125  				return err
   126  			}
   127  			spanutil.IncRowCount(ctx, len(ms), spanutil.TestResults, spanutil.Deleted, realm.StringVal)
   128  			ms = ms[:0]
   129  		}
   130  		return nil
   131  	})
   132  	if err != nil {
   133  		return err
   134  	}
   135  
   136  	// Flush the last batch.
   137  	if len(ms) > 0 {
   138  		if _, err := span.Apply(ctx, ms); err != nil {
   139  			return err
   140  		}
   141  		spanutil.IncRowCount(ctx, len(ms), spanutil.TestResults, spanutil.Deleted, realm.StringVal)
   142  	}
   143  
   144  	// Set the invocation's result expiration to null.
   145  	if err := unsetInvocationResultsExpiration(ctx, invID); err != nil {
   146  		return err
   147  	}
   148  
   149  	logging.Debugf(ctx, "Deleted %d test results in %s", count, invID.Name())
   150  	return nil
   151  }
   152  
   153  // rowsToPurge calls f for rows that should be purged.
   154  func rowsToPurge(ctx context.Context, inv invocations.ID, f func(table string, key spanner.Key) error) error {
   155  	st := spanner.NewStatement(`
   156  		WITH DoNotPurge AS (
   157  			SELECT DISTINCT TestId, VariantHash
   158  			FROM TestResults@{FORCE_INDEX=UnexpectedTestResults, spanner_emulator.disable_query_null_filtered_index_check=true}
   159  			WHERE InvocationId = @invocationId
   160  			  AND IsUnexpected = TRUE
   161  		)
   162  		SELECT tr.TestId, tr.ResultId, art.ArtifactId
   163  		FROM TestResults tr
   164  		LEFT JOIN DoNotPurge dnp ON tr.TestId = dnp.TestId AND tr.VariantHash = dnp.VariantHash
   165  		LEFT JOIN Artifacts art
   166  		  ON art.InvocationId = tr.InvocationId AND FORMAT("tr/%s/%s", tr.TestId, tr.ResultId) = art.ParentId
   167  		WHERE tr.InvocationId = @invocationId
   168  			AND dnp.VariantHash IS NULL
   169  	`)
   170  
   171  	st.Params["invocationId"] = inv
   172  
   173  	var lastTestID, lastResultID string
   174  	return spanutil.Query(ctx, st, func(row *spanner.Row) error {
   175  		var testID, resultID string
   176  		var artifactID spanner.NullString
   177  		if err := row.Columns(&testID, &resultID, &artifactID); err != nil {
   178  			return err
   179  		}
   180  
   181  		// Given that we join by TestId and ResultId, result rows with the same
   182  		// test id and result id will be contiguous.
   183  		// This is not guaranteed, but happens in practice.
   184  		// Even if we encounter (testID, resultID) that we've deleted before, this
   185  		// is OK because a Spanner Delete ignores absence of the target row.
   186  		// Ultimately, this is an optimization + code simplfication.
   187  		if testID != lastTestID || resultID != lastResultID {
   188  			if err := f("TestResults", inv.Key(testID, resultID)); err != nil {
   189  				return err
   190  			}
   191  
   192  			lastTestID = testID
   193  			lastResultID = resultID
   194  		}
   195  
   196  		if artifactID.Valid {
   197  			parentID := artifacts.ParentID(testID, resultID)
   198  			if err := f("Artifacts", inv.Key(parentID, artifactID)); err != nil {
   199  				return err
   200  			}
   201  		}
   202  
   203  		return nil
   204  	})
   205  }
   206  
   207  func unsetInvocationResultsExpiration(ctx context.Context, id invocations.ID) error {
   208  	_, err := span.Apply(ctx, []*spanner.Mutation{
   209  		spanutil.UpdateMap("Invocations", map[string]any{
   210  			"InvocationID":                      id,
   211  			"ExpectedTestResultsExpirationTime": nil,
   212  		}),
   213  	})
   214  	return err
   215  }