go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/swarming/server/model/tasktorun.go (about)

     1  // Copyright 2023 The LUCI Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package model
    16  
    17  import (
    18  	"context"
    19  	"fmt"
    20  	"time"
    21  
    22  	"go.chromium.org/luci/gae/service/datastore"
    23  )
    24  
    25  const (
    26  	// TaskToRunShards is the number of TaskToRun entity kinds to shard across.
    27  	TaskToRunShards = 16
    28  )
    29  
    30  // TaskToRun defines a TaskRequest slice ready to be scheduled on a bot.
    31  //
    32  // Each TaskRequest results in one or more TaskToRun entities (one per slice).
    33  // They are created sequentially, one by one, as the task progresses through its
    34  // slices. Each TaskToRun is eventually either picked up by a bot for execution
    35  // or expires. Each TaskToRun picked up for execution has an TaskRunResult
    36  // entity (expired ones don't).
    37  //
    38  // A TaskToRun can either be in "native mode" (dispatched via the native
    39  // Swarming scheduler implemented in Python code base) or in "RBE mode"
    40  // (dispatched via the remote RBE scheduler service). This is controlled by
    41  // RBEReservation field.
    42  //
    43  // A TaskToRun (regardless of mode) can be in two states:
    44  //
    45  // 1. "reapable"
    46  //   - Native mode: QueueNumber and Expiration are both set.
    47  //   - RBE mode: ClaimID is unset and Expiration is set.
    48  //
    49  // 2. "consumed":
    50  //   - Native mode: QueueNumber and Expiration are both unset.
    51  //   - RBE mode: ClaimID is set and Expiration is unset.
    52  //
    53  // The entity starts its life in reapable state and then transitions to consumed
    54  // state either by being picked up by a bot for execution or when it expires.
    55  // Consumed state is final.
    56  //
    57  // The key ID is (see TaskToRunID):
    58  // - lower 4 bits is the try number. The only supported value is 1 now.
    59  // - next 5 bits are TaskResultSummary.CurrentTaskSlice (shifted by 4 bits).
    60  // - the rest is 0.
    61  //
    62  // This entity is stored using a bunch of different shards. The shard number is
    63  // derived deterministically by calculating dimensions hash % TaskToRunShards,
    64  // see TaskToRunKey.
    65  type TaskToRun struct {
    66  	// Extra are entity properties that didn't match any declared ones below.
    67  	//
    68  	// Should normally be empty.
    69  	Extra datastore.PropertyMap `gae:"-,extra"`
    70  
    71  	// Key identifies the task and its slice, see TaskToRunKey().
    72  	//
    73  	// Note that the kind is TaskToRunShard<index>, see TaskToRunKind().
    74  	Key *datastore.Key `gae:"$key"`
    75  
    76  	// Created is used to know when the entity is enqueued.
    77  	//
    78  	// The very first TaskToRun has the same value as TaskRequest.Created, but the
    79  	// following ones (when using multiple task slices) have Created set at the
    80  	// time they are created.
    81  	//
    82  	// Used in both native and RBE mode.
    83  	Created time.Time `gae:"created_ts,noindex"`
    84  
    85  	// Dimensions is a copy of dimensions from the corresponding task slice of
    86  	// TaskRequest.
    87  	//
    88  	// It is used to quickly check if a bot can reap this TaskToRun right after
    89  	// fetching it from a datastore query.
    90  	//
    91  	// Used in both native and RBE mode.
    92  	Dimensions TaskDimensions `gae:"dimensions"`
    93  
    94  	// RBEReservation is the RBE reservation name that is (or will be) handling
    95  	// this TaskToRun.
    96  	//
    97  	// If set, then TaskToRunShard is in RBE mode. If not, then in native
    98  	// mode. TaskToRunShard in RBE mode are always (transactionally) created with
    99  	// a Task Queue task to actually dispatch them to the RBE scheduler.
   100  	RBEReservation string `gae:"rbe_reservation,noindex"`
   101  
   102  	// Expiration is the scheduling deadline for this TaskToRun.
   103  	//
   104  	// It is based on TaskSlice.Expiration. It is used to figure out when to
   105  	// fallback on the next task slice. It is scanned by a cron job and thus needs
   106  	// to be indexed.
   107  	//
   108  	// It is unset when the TaskToRun is claimed, canceled or expires.
   109  	//
   110  	// Used in both native and RBE mode.
   111  	Expiration datastore.Optional[time.Time, datastore.Indexed] `gae:"expiration_ts"`
   112  
   113  	// QueueNumber is a magical number by which bots and tasks find one another.
   114  	//
   115  	// Used only in native mode. Always unset and unused in RBE mode.
   116  	//
   117  	// Priority and request creation timestamp are mixed together to allow queries
   118  	// to order the results by this field to allow sorting by priority first, and
   119  	// then timestamp.
   120  	//
   121  	// Gets unset when the TaskToRun is consumed.
   122  	QueueNumber datastore.Optional[int64, datastore.Indexed] `gae:"queue_number"`
   123  
   124  	// ClaimID is set if some bot claimed this TaskToRun and will execute it.
   125  	//
   126  	// Used only in RBE mode. Always unset in native mode.
   127  	//
   128  	// It is an opaque ID supplied by the bot when it attempts to claim this
   129  	// entity. If TaskToRun is already claimed and ClaimID matches the one
   130  	// supplied by the bot, then it means this bot has actually claimed the entity
   131  	// already and now just retries the call.
   132  	//
   133  	// Never gets unset once set.
   134  	ClaimID datastore.Optional[string, datastore.Unindexed] `gae:"claim_id"`
   135  
   136  	// ExpirationDelay is a delay from Expiration to the actual expiry time.
   137  	//
   138  	// This is set at expiration process if the last task slice expired by
   139  	// reaching its deadline. Unset if the last slice expired because there were
   140  	// no bots that could run it.
   141  	//
   142  	// Exclusively for monitoring.
   143  	ExpirationDelay datastore.Optional[float64, datastore.Unindexed] `gae:"expiration_delay"`
   144  }
   145  
   146  // IsReapable returns true if the TaskToRun is still pending.
   147  func (t *TaskToRun) IsReapable() bool {
   148  	return t.Expiration.IsSet()
   149  }
   150  
   151  // TaskToRunKey builds a TaskToRun key given the task request key, the entity
   152  // kind shard index and the task to run ID.
   153  func TaskToRunKey(ctx context.Context, taskReq *datastore.Key, shardIdx int32, ttrID int64) *datastore.Key {
   154  	return datastore.NewKey(ctx, TaskToRunKind(shardIdx), "", ttrID, taskReq)
   155  }
   156  
   157  // TaskToRunKind returns the TaskToRun entity kind name given a shard index.
   158  func TaskToRunKind(shardIdx int32) string {
   159  	return fmt.Sprintf("TaskToRunShard%d", shardIdx)
   160  }