go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/scheduler/appengine/engine/utils.go (about)

     1  // Copyright 2017 The LUCI Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package engine
    16  
    17  import (
    18  	"context"
    19  	"fmt"
    20  	"sort"
    21  	"sync"
    22  	"time"
    23  
    24  	"google.golang.org/protobuf/proto"
    25  
    26  	"go.chromium.org/luci/gae/service/datastore"
    27  	"go.chromium.org/luci/gae/service/memcache"
    28  
    29  	"go.chromium.org/luci/common/clock"
    30  	"go.chromium.org/luci/common/errors"
    31  	"go.chromium.org/luci/common/logging"
    32  	"go.chromium.org/luci/common/retry/transient"
    33  
    34  	"go.chromium.org/luci/scheduler/appengine/internal"
    35  )
    36  
    37  // assertInTransaction panics if the context is not transactional.
    38  func assertInTransaction(c context.Context) {
    39  	if datastore.CurrentTransaction(c) == nil {
    40  		panic("expecting to be called from inside a transaction")
    41  	}
    42  }
    43  
    44  // assertNotInTransaction panics if the context is transactional.
    45  func assertNotInTransaction(c context.Context) {
    46  	if datastore.CurrentTransaction(c) != nil {
    47  		panic("expecting to be called from outside transactions")
    48  	}
    49  }
    50  
    51  // debugLog mutates a string by appending a line to it.
    52  func debugLog(c context.Context, str *string, format string, args ...any) {
    53  	prefix := clock.Now(c).UTC().Format("[15:04:05.000] ")
    54  	*str += prefix + fmt.Sprintf(format+"\n", args...)
    55  }
    56  
    57  // defaultTransactionOptions is used for all transactions.
    58  //
    59  // Almost all transactions done by the scheduler service happen in background
    60  // task queues, it is fine to retry more there.
    61  var defaultTransactionOptions = datastore.TransactionOptions{
    62  	Attempts: 10,
    63  }
    64  
    65  // abortTransaction makes the error abort the transaction (even if it is marked
    66  // as transient).
    67  //
    68  // See runTxn for more info. This is used primarily by errUpdateConflict.
    69  var abortTransaction = errors.BoolTag{Key: errors.NewTagKey("this error aborts the transaction")}
    70  
    71  // runTxn runs a datastore transaction retrying the body on transient errors or
    72  // when encountering a commit conflict.
    73  //
    74  // It will NOT retry errors (even if transient) marked with abortTransaction
    75  // tag. This is primarily used to tag errors that are transient at a level
    76  // higher than the transaction: errors marked with both transient.Tag and
    77  // abortTransaction are not retried by runTxn, but may be retried by something
    78  // on top (like Task Queue).
    79  func runTxn(c context.Context, cb func(context.Context) error) error {
    80  	var attempt int
    81  	var innerErr error
    82  
    83  	err := datastore.RunInTransaction(c, func(c context.Context) error {
    84  		attempt++
    85  		if attempt != 1 {
    86  			if innerErr != nil {
    87  				logging.Warningf(c, "Retrying the transaction after the error: %s", innerErr)
    88  			} else {
    89  				logging.Warningf(c, "Retrying the transaction: failed to commit")
    90  			}
    91  		}
    92  		innerErr = cb(c)
    93  		if transient.Tag.In(innerErr) && !abortTransaction.In(innerErr) {
    94  			return datastore.ErrConcurrentTransaction // causes a retry
    95  		}
    96  		return innerErr
    97  	}, &defaultTransactionOptions)
    98  
    99  	if err != nil {
   100  		logging.WithError(err).Errorf(c, "Transaction failed")
   101  		if innerErr != nil {
   102  			return innerErr
   103  		}
   104  		// Here it can only be a commit error (i.e. produced by RunInTransaction
   105  		// itself, not by its callback). We treat them as transient.
   106  		return transient.Tag.Apply(err)
   107  	}
   108  
   109  	return nil
   110  }
   111  
   112  // runIsolatedTxn is like runTxn, except it executes the callback in a new
   113  // isolated transaction (even if the original context is already transactional).
   114  func runIsolatedTxn(c context.Context, cb func(context.Context) error) error {
   115  	return runTxn(datastore.WithoutTransaction(c), cb)
   116  }
   117  
   118  // equalSortedLists returns true if lists contain the same sequence of strings.
   119  func equalSortedLists(a, b []string) bool {
   120  	if len(a) != len(b) {
   121  		return false
   122  	}
   123  	for i, s := range a {
   124  		if s != b[i] {
   125  			return false
   126  		}
   127  	}
   128  	return true
   129  }
   130  
   131  // equalInt64Lists returns true if two lists of int64 are equal.
   132  //
   133  // Order is important.
   134  func equalInt64Lists(a, b []int64) bool {
   135  	if len(a) != len(b) {
   136  		return false
   137  	}
   138  	for i, s := range a {
   139  		if s != b[i] {
   140  			return false
   141  		}
   142  	}
   143  	return true
   144  }
   145  
   146  // marshalTriggersList serializes list of triggers.
   147  //
   148  // Panics on errors.
   149  func marshalTriggersList(t []*internal.Trigger) []byte {
   150  	if len(t) == 0 {
   151  		return nil
   152  	}
   153  	blob, err := proto.Marshal(&internal.TriggerList{Triggers: t})
   154  	if err != nil {
   155  		panic(err)
   156  	}
   157  	return blob
   158  }
   159  
   160  // unmarshalTriggersList deserializes list of triggers.
   161  func unmarshalTriggersList(blob []byte) ([]*internal.Trigger, error) {
   162  	if len(blob) == 0 {
   163  		return nil, nil
   164  	}
   165  	list := internal.TriggerList{}
   166  	if err := proto.Unmarshal(blob, &list); err != nil {
   167  		return nil, err
   168  	}
   169  	return list.Triggers, nil
   170  }
   171  
   172  // mutateTriggersList deserializes the list, calls a callback, which modifies
   173  // the list and serializes it back.
   174  func mutateTriggersList(blob *[]byte, cb func(*[]*internal.Trigger)) error {
   175  	list, err := unmarshalTriggersList(*blob)
   176  	if err != nil {
   177  		return err
   178  	}
   179  	cb(&list)
   180  	*blob = marshalTriggersList(list)
   181  	return nil
   182  }
   183  
   184  // sortTriggers sorts the triggers by time, most recent last.
   185  func sortTriggers(t []*internal.Trigger) {
   186  	sort.Slice(t, func(i, j int) bool { return isTriggerOlder(t[i], t[j]) })
   187  }
   188  
   189  // isTriggerOlder returns true if t1 is older than t2.
   190  //
   191  // Compares IDs in case of a tie.
   192  func isTriggerOlder(t1, t2 *internal.Trigger) bool {
   193  	ts1 := t1.Created.AsTime()
   194  	ts2 := t2.Created.AsTime()
   195  	switch {
   196  	case ts1.After(ts2):
   197  		return false
   198  	case ts2.After(ts1):
   199  		return true
   200  	default: // equal timestamps
   201  		if t1.OrderInBatch != t2.OrderInBatch {
   202  			return t1.OrderInBatch < t2.OrderInBatch
   203  		}
   204  		return t1.Id < t2.Id
   205  	}
   206  }
   207  
   208  // marshalTimersList serializes list of timers.
   209  //
   210  // Panics on errors.
   211  func marshalTimersList(t []*internal.Timer) []byte {
   212  	if len(t) == 0 {
   213  		return nil
   214  	}
   215  	blob, err := proto.Marshal(&internal.TimerList{Timers: t})
   216  	if err != nil {
   217  		panic(err)
   218  	}
   219  	return blob
   220  }
   221  
   222  // unmarshalTimersList deserializes list of timers.
   223  func unmarshalTimersList(blob []byte) ([]*internal.Timer, error) {
   224  	if len(blob) == 0 {
   225  		return nil, nil
   226  	}
   227  	list := internal.TimerList{}
   228  	if err := proto.Unmarshal(blob, &list); err != nil {
   229  		return nil, err
   230  	}
   231  	return list.Timers, nil
   232  }
   233  
   234  // mutateTimersList deserializes the list, calls a callback, which modifies
   235  // the list and serializes it back.
   236  func mutateTimersList(blob *[]byte, cb func(*[]*internal.Timer)) error {
   237  	list, err := unmarshalTimersList(*blob)
   238  	if err != nil {
   239  		return err
   240  	}
   241  	cb(&list)
   242  	*blob = marshalTimersList(list)
   243  	return nil
   244  }
   245  
   246  // marshalFinishedInvs marshals list of invocations into FinishedInvocationList.
   247  //
   248  // Panics on errors.
   249  func marshalFinishedInvs(invs []*internal.FinishedInvocation) []byte {
   250  	if len(invs) == 0 {
   251  		return nil
   252  	}
   253  	blob, err := proto.Marshal(&internal.FinishedInvocationList{Invocations: invs})
   254  	if err != nil {
   255  		panic(err)
   256  	}
   257  	return blob
   258  }
   259  
   260  // unmarshalFinishedInvs unmarshals FinishedInvocationList proto message.
   261  func unmarshalFinishedInvs(raw []byte) ([]*internal.FinishedInvocation, error) {
   262  	if len(raw) == 0 {
   263  		return nil, nil
   264  	}
   265  	invs := internal.FinishedInvocationList{}
   266  	if err := proto.Unmarshal(raw, &invs); err != nil {
   267  		return nil, err
   268  	}
   269  	return invs.Invocations, nil
   270  }
   271  
   272  // filteredFinishedInvocations unmarshals FinishedInvocationList and filters
   273  // it to keep only entries whose Finished timestamp is newer than 'oldest'.
   274  func filteredFinishedInvs(raw []byte, oldest time.Time) ([]*internal.FinishedInvocation, error) {
   275  	invs, err := unmarshalFinishedInvs(raw)
   276  	if err != nil {
   277  		return nil, err
   278  	}
   279  	filtered := make([]*internal.FinishedInvocation, 0, len(invs))
   280  	for _, inv := range invs {
   281  		if inv.Finished.AsTime().After(oldest) {
   282  			filtered = append(filtered, inv)
   283  		}
   284  	}
   285  	return filtered, nil
   286  }
   287  
   288  // opsCache "remembers" recently executed operations, and skips executing them
   289  // if they already were done.
   290  //
   291  // Expected cardinality of a set of all possible actions should be small (we
   292  // store the cache in memory).
   293  type opsCache struct {
   294  	lock      sync.RWMutex
   295  	doneFlags map[string]bool
   296  }
   297  
   298  // Do calls callback only if it wasn't called before.
   299  //
   300  // Works on best effort basis: callback can and will be called multiple times
   301  // (just not the every time 'Do' is called).
   302  //
   303  // Keeps "done" flag in local memory and in memcache (using 'key' as
   304  // identifier). The callback should be idempotent, since it still may be called
   305  // multiple times if multiple processes attempt to execute the action at once.
   306  func (o *opsCache) Do(c context.Context, key string, cb func() error) error {
   307  	// Check the local cache.
   308  	if o.getFlag(key) {
   309  		return nil
   310  	}
   311  
   312  	// Check the global cache.
   313  	switch _, err := memcache.GetKey(c, key); {
   314  	case err == nil:
   315  		o.setFlag(key)
   316  		return nil
   317  	case err == memcache.ErrCacheMiss:
   318  		break
   319  	default:
   320  		logging.WithError(err).Warningf(c, "opsCache failed to check memcache, will proceed executing op")
   321  	}
   322  
   323  	// Do it.
   324  	if err := cb(); err != nil {
   325  		return err
   326  	}
   327  
   328  	// Store in the local cache.
   329  	o.setFlag(key)
   330  
   331  	// Store in the global cache. Ignore errors, it's not a big deal.
   332  	item := memcache.NewItem(c, key)
   333  	item.SetValue([]byte("ok"))
   334  	item.SetExpiration(24 * time.Hour)
   335  	if err := memcache.Set(c, item); err != nil {
   336  		logging.WithError(err).Warningf(c, "opsCache failed to write item to memcache")
   337  	}
   338  
   339  	return nil
   340  }
   341  
   342  func (o *opsCache) getFlag(key string) bool {
   343  	o.lock.RLock()
   344  	defer o.lock.RUnlock()
   345  	return o.doneFlags[key]
   346  }
   347  
   348  func (o *opsCache) setFlag(key string) {
   349  	o.lock.Lock()
   350  	defer o.lock.Unlock()
   351  	if o.doneFlags == nil {
   352  		o.doneFlags = map[string]bool{}
   353  	}
   354  	o.doneFlags[key] = true
   355  }