go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/resultdb/internal/services/deadlineenforcer/deadline_enforcer.go (about)

     1  // Copyright 2021 The LUCI Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Package deadlineenforcer finalizes tasks with overdue deadlines.
    16  package deadlineenforcer
    17  
    18  import (
    19  	"context"
    20  	"time"
    21  
    22  	"cloud.google.com/go/spanner"
    23  	"google.golang.org/protobuf/types/known/timestamppb"
    24  
    25  	"go.chromium.org/luci/common/clock"
    26  	"go.chromium.org/luci/common/errors"
    27  	"go.chromium.org/luci/common/tsmon/field"
    28  	"go.chromium.org/luci/common/tsmon/metric"
    29  	"go.chromium.org/luci/common/tsmon/types"
    30  	"go.chromium.org/luci/server"
    31  	"go.chromium.org/luci/server/span"
    32  
    33  	"go.chromium.org/luci/resultdb/internal/cron"
    34  	"go.chromium.org/luci/resultdb/internal/invocations"
    35  	"go.chromium.org/luci/resultdb/internal/spanutil"
    36  	"go.chromium.org/luci/resultdb/internal/tasks"
    37  )
    38  
    39  const maxInvocationsPerShardToEnforceAtOnce = 100
    40  
    41  var (
    42  	// timeOverdue tracks the delay between invocations expiring and being
    43  	// picked up by deadlineenforcer.
    44  	timeOverdue = metric.NewCumulativeDistribution(
    45  		"resultdb/deadlineenforcer/delay",
    46  		"Delay between invocation expiration and forced finalization",
    47  		&types.MetricMetadata{Units: types.Milliseconds},
    48  		nil,
    49  		field.String("realm"),
    50  	)
    51  
    52  	// overdueInvocationsFinalized counts invocations finalized by the
    53  	// deadlineenforcer service.
    54  	overdueInvocationsFinalized = metric.NewCounter(
    55  		"resultdb/deadlineenforcer/finalized_invocations",
    56  		"Invocations finalized by deadline enforcer",
    57  		&types.MetricMetadata{Units: "invocations"},
    58  		field.String("realm"),
    59  	)
    60  )
    61  
    62  // Options are for configuring the deadline enforcer.
    63  type Options struct {
    64  	// ForceCronInterval forces minimum interval in cron jobs.
    65  	// Useful in integration tests to reduce the test time.
    66  	ForceCronInterval time.Duration
    67  }
    68  
    69  // InitServer initializes a deadline enforcer server.
    70  func InitServer(srv *server.Server, opts Options) {
    71  	srv.RunInBackground("resultdb.deadlineenforcer", func(ctx context.Context) {
    72  		minInterval := time.Minute
    73  		if opts.ForceCronInterval > 0 {
    74  			minInterval = opts.ForceCronInterval
    75  		}
    76  		run(ctx, minInterval)
    77  	})
    78  }
    79  
    80  // run continuously finalizes expired invocations.
    81  // It blocks until context is canceled.
    82  func run(ctx context.Context, minInterval time.Duration) {
    83  	maxShard, err := invocations.CurrentMaxShard(ctx)
    84  	switch {
    85  	case err == spanutil.ErrNoResults:
    86  		maxShard = invocations.Shards - 1
    87  	case err != nil:
    88  		panic(errors.Annotate(err, "failed to determine number of shards").Err())
    89  	}
    90  
    91  	// Start one cron job for each shard of the database.
    92  	cron.Group(ctx, maxShard+1, minInterval, enforceOneShard)
    93  }
    94  
    95  func enforceOneShard(ctx context.Context, shard int) error {
    96  	limit := maxInvocationsPerShardToEnforceAtOnce
    97  	for {
    98  		cnt, err := enforce(ctx, shard, limit)
    99  		if err != nil {
   100  			return err
   101  		}
   102  		if cnt != limit {
   103  			// The last page wasn't full, there likely aren't any more
   104  			// overdue invocations for now.
   105  			break
   106  		}
   107  	}
   108  	return nil
   109  }
   110  
   111  func enforce(ctx context.Context, shard, limit int) (int, error) {
   112  	st := spanner.NewStatement(`
   113  		SELECT InvocationId, ActiveDeadline, Realm
   114  		FROM Invocations@{FORCE_INDEX=InvocationsByActiveDeadline, spanner_emulator.disable_query_null_filtered_index_check=true}
   115  		WHERE ShardId = @shardId
   116  			AND ActiveDeadline <= CURRENT_TIMESTAMP()
   117  		LIMIT @limit
   118  	`)
   119  	st.Params["shardId"] = shard
   120  	st.Params["limit"] = limit
   121  	rowCount := 0
   122  
   123  	ctx, cancel := span.ReadOnlyTransaction(ctx)
   124  	defer cancel()
   125  	err := spanutil.Query(ctx, st, func(row *spanner.Row) error {
   126  		rowCount++
   127  		var id invocations.ID
   128  		var ts *timestamppb.Timestamp
   129  		var realm string
   130  		if err := spanutil.FromSpanner(row, &id, &ts, &realm); err != nil {
   131  			return err
   132  		}
   133  		// TODO(crbug.com/1207606): Increase parallelism.
   134  		_, err := span.ReadWriteTransaction(ctx, func(ctx context.Context) error {
   135  			tasks.StartInvocationFinalization(ctx, id, true)
   136  			return nil
   137  		})
   138  		if err == nil {
   139  			overdueInvocationsFinalized.Add(ctx, 1, realm)
   140  			timeOverdue.Add(ctx, float64(clock.Now(ctx).Sub(ts.AsTime()).Milliseconds()), realm)
   141  		}
   142  		return err
   143  	})
   144  	return rowCount, err
   145  }