go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/cv/internal/common/lease/lease.go (about)

     1  // Copyright 2021 The LUCI Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package lease
    16  
    17  import (
    18  	"bytes"
    19  	"context"
    20  	"fmt"
    21  	"strings"
    22  	"time"
    23  
    24  	"go.chromium.org/luci/common/clock"
    25  	"go.chromium.org/luci/common/data/rand/mathrand"
    26  	"go.chromium.org/luci/common/errors"
    27  	"go.chromium.org/luci/common/retry"
    28  	"go.chromium.org/luci/common/retry/transient"
    29  	"go.chromium.org/luci/gae/service/datastore"
    30  )
    31  
    32  // ResourceID is an ID identifying external resource (e.g. a Gerrit CL).
    33  //
    34  // It is in the format of "type/value" where 'type' is the type of the
    35  // resource and 'value' is the string id which identifies the resource.
    36  type ResourceID string
    37  
    38  func (id ResourceID) isValid() bool {
    39  	if i := strings.IndexByte(string(id), '/'); i > 0 && i < len(id)-1 {
    40  		return true
    41  	}
    42  	return false
    43  }
    44  
    45  // Application contains information to apply for a Lease.
    46  type Application struct {
    47  	// ResourceID is the id of the resource that this Lease will operate on.
    48  	//
    49  	// Required and MUST be valid (See comment of `ResourceID` for format).
    50  	ResourceID ResourceID
    51  	// Holder has the privilege to mutate the resource before Lease expiration.
    52  	//
    53  	// Required.
    54  	Holder string
    55  	// Payload is used to record the mutation that the Lease holder intends to
    56  	// perform during the Lease period.
    57  	Payload []byte
    58  	// ExpireTime is the time that this Lease expires.
    59  	//
    60  	// It will be truncated to millisecond precision in the result Lease.
    61  	//
    62  	// Required, MUST be larger than the current time.
    63  	ExpireTime time.Time
    64  }
    65  
    66  func (a *Application) validate(ctx context.Context) error {
    67  	switch {
    68  	case a == nil:
    69  		return errors.Reason("nil lease application").Err()
    70  	case !a.ResourceID.isValid():
    71  		return errors.Reason("invalid ResourceID: %q", a.ResourceID).Err()
    72  	case a.Holder == "":
    73  		return errors.Reason("empty lease Holder").Err()
    74  	}
    75  	return nil
    76  }
    77  
    78  // AlreadyInLeaseErr is returned when resource is currently in lease.
    79  type AlreadyInLeaseErr struct {
    80  	// ResourceID is the ID of the target resource.
    81  	ResourceID ResourceID
    82  	// ExpireTime is the time the current lease on the target rescourse expires.
    83  	ExpireTime time.Time
    84  	// Holder is the holder of the current lease on the target rescourse.
    85  	Holder string
    86  }
    87  
    88  // Error implements `error`.
    89  func (e *AlreadyInLeaseErr) Error() string {
    90  	return fmt.Sprintf("Resource %q is currently leased by %s until %s", e.ResourceID, e.Holder, e.ExpireTime)
    91  }
    92  
    93  // IsAlreadyInLeaseErr detects and returns `AlreadyInLeaseErr` in the given err.
    94  func IsAlreadyInLeaseErr(err error) (*AlreadyInLeaseErr, bool) {
    95  	var ret *AlreadyInLeaseErr
    96  	errors.WalkLeaves(err, func(leaf error) bool {
    97  		if e, ok := leaf.(*AlreadyInLeaseErr); ok {
    98  			ret = e
    99  			return false
   100  		}
   101  		return true
   102  	})
   103  	return ret, ret != nil
   104  }
   105  
   106  const tokenLen = 8
   107  
   108  // Lease is like a mutex on external resource with expiration time.
   109  type Lease struct {
   110  	_kind string `gae:"$kind,Lease"`
   111  	// ResourceID is the id of the resource that this lease will operate on.
   112  	ResourceID ResourceID `gae:"$id"`
   113  	// Holder has the privilege to mutate the resource before lease expiration.
   114  	Holder string `gae:",noindex"`
   115  	// Payload is used to record the mutation that the lease holder intends to
   116  	// perform during the lease period.
   117  	Payload []byte `gae:",noindex"`
   118  	// ExpireTime is the time (in ms precision) this Lease expires.
   119  	ExpireTime time.Time `gae:",noindex"`
   120  	// Token is randomly generated for each successful lease application and
   121  	// extension.
   122  	//
   123  	// It is used for fast equality check.
   124  	Token []byte `gae:",noindex"`
   125  }
   126  
   127  // Expired tells whether the Lease has expired or not.
   128  //
   129  // A nil Lease is always expired.
   130  func (l *Lease) Expired(ctx context.Context) bool {
   131  	if l == nil {
   132  		return true
   133  	}
   134  	return clock.Now(ctx).After(l.ExpireTime)
   135  }
   136  
   137  // Extend extends the Lease by additional duration.
   138  //
   139  // Returns AlreadyInLeaseErr if this resource is not in the same lease as
   140  // provided.
   141  // The result expireTime will be truncated to millisecond.
   142  func (l *Lease) Extend(ctx context.Context, addition time.Duration) error {
   143  	switch {
   144  	case addition < 0:
   145  		return errors.Reason("expected positive additional duration; got %s", addition).Err()
   146  	case l.Expired(ctx):
   147  		return errors.New("can't extend an expired lease")
   148  	}
   149  
   150  	extended := *l
   151  	extended.ExpireTime = l.ExpireTime.UTC().Add(addition).Truncate(time.Millisecond)
   152  	extended.Token = make([]byte, tokenLen)
   153  	if _, err := mathrand.Read(ctx, extended.Token); err != nil {
   154  		return errors.Annotate(err, "failed to generate token for the extension").Err()
   155  	}
   156  
   157  	var innerErr error
   158  	finalErr := datastore.RunInTransaction(ctx, func(ctx context.Context) (err error) {
   159  		defer func() { innerErr = err }()
   160  		cur, err := Load(ctx, l.ResourceID)
   161  		switch {
   162  		case err != nil:
   163  			return errors.Annotate(err, "failed to fetch lease for resource %s", l.ResourceID).Tag(transient.Tag).Err()
   164  		case cur == nil:
   165  			return errors.New("target lease doesn't exist in datastore")
   166  		case !bytes.Equal(cur.Token, l.Token):
   167  			return &AlreadyInLeaseErr{
   168  				ExpireTime: cur.ExpireTime,
   169  				Holder:     cur.Holder,
   170  				ResourceID: cur.ResourceID,
   171  			}
   172  		}
   173  		if err := datastore.Put(ctx, &extended); err != nil {
   174  			return errors.Annotate(err, "failed to put lease for resource %s", l.ResourceID).Tag(transient.Tag).Err()
   175  		}
   176  		return nil
   177  	}, nil)
   178  
   179  	switch {
   180  	case innerErr != nil:
   181  		return innerErr
   182  	case finalErr != nil:
   183  		return errors.Annotate(finalErr, "failed to extend lease for resource %s", l.ResourceID).Tag(transient.Tag).Err()
   184  	}
   185  	*l = extended
   186  	return nil
   187  }
   188  
   189  // Terminate terminates the lease.
   190  //
   191  // Returns AlreadyInLeaseErr if the provided lease doesn't currently hold the
   192  // resource.
   193  func (l *Lease) Terminate(ctx context.Context) error {
   194  	var innerErr error
   195  	finalErr := datastore.RunInTransaction(ctx, func(ctx context.Context) (err error) {
   196  		defer func() { innerErr = err }()
   197  		cur, err := Load(ctx, l.ResourceID)
   198  		switch {
   199  		case err != nil:
   200  			return errors.Annotate(err, "failed to fetch lease for resource %s", l.ResourceID).Tag(transient.Tag).Err()
   201  		case cur == nil:
   202  			return nil // lease is already terminated
   203  		case !bytes.Equal(cur.Token, l.Token):
   204  			return &AlreadyInLeaseErr{
   205  				ExpireTime: cur.ExpireTime,
   206  				Holder:     cur.Holder,
   207  				ResourceID: cur.ResourceID,
   208  			}
   209  		}
   210  		if err := datastore.Delete(ctx, l); err != nil {
   211  			return errors.Annotate(err, "failed to delete lease for resource %s", l.ResourceID).Tag(transient.Tag).Err()
   212  		}
   213  		return nil
   214  	}, nil)
   215  
   216  	switch {
   217  	case innerErr != nil:
   218  		return innerErr
   219  	case finalErr != nil:
   220  		return errors.Annotate(finalErr, "failed to terminate lease for resource %s", l.ResourceID).Tag(transient.Tag).Err()
   221  	}
   222  	return nil
   223  }
   224  
   225  // Load loads the latest Lease (may already be expired) for given resource.
   226  //
   227  // Returns nil Lease if no Lease can be found for the resource.
   228  func Load(ctx context.Context, rid ResourceID) (*Lease, error) {
   229  	ret := &Lease{ResourceID: rid}
   230  	switch err := datastore.Get(ctx, ret); {
   231  	case err == datastore.ErrNoSuchEntity:
   232  		return nil, nil
   233  	case err != nil:
   234  		return nil, err
   235  	default:
   236  		return ret, nil
   237  	}
   238  }
   239  
   240  // TryApply checks if the Lease application will go through given the latest
   241  // Lease on the resource.
   242  //
   243  // Returns non-nil error if the application will fail. Otherwise, returns nil
   244  // error and the new Lease assuming applications succeeds.
   245  //
   246  // MUST be called in a datastore transaction and the latest Lease MUST be
   247  // loaded in the same transaction.
   248  func TryApply(ctx context.Context, latestLease *Lease, app Application) (*Lease, error) {
   249  	if datastore.CurrentTransaction(ctx) == nil {
   250  		panic("must be called in transaction context")
   251  	}
   252  	if err := app.validate(ctx); err != nil {
   253  		return nil, err
   254  	}
   255  	if !latestLease.Expired(ctx) {
   256  		return nil, &AlreadyInLeaseErr{
   257  			ExpireTime: latestLease.ExpireTime,
   258  			Holder:     latestLease.Holder,
   259  			ResourceID: latestLease.ResourceID,
   260  		}
   261  	}
   262  	ret := &Lease{
   263  		ResourceID: app.ResourceID,
   264  		Holder:     app.Holder,
   265  		Payload:    app.Payload,
   266  		ExpireTime: app.ExpireTime.UTC().Truncate(time.Millisecond),
   267  		Token:      make([]byte, tokenLen),
   268  	}
   269  	if _, err := mathrand.Read(ctx, ret.Token); err != nil {
   270  		return nil, err
   271  	}
   272  	return ret, nil
   273  }
   274  
   275  // Apply applies for a new lease.
   276  //
   277  // Returns AlreadyInLeaseErr if the lease on this resource hasn't expired yet.
   278  func Apply(ctx context.Context, app Application) (*Lease, error) {
   279  	if err := app.validate(ctx); err != nil {
   280  		return nil, err
   281  	}
   282  	rid := app.ResourceID
   283  	var ret *Lease
   284  	var innerErr error
   285  	finalErr := datastore.RunInTransaction(ctx, func(ctx context.Context) (err error) {
   286  		defer func() { innerErr = err }()
   287  		cur, err := Load(ctx, rid)
   288  		if err != nil {
   289  			return errors.Annotate(err, "failed to fetch lease for resource %s", rid).Tag(transient.Tag).Err()
   290  		}
   291  		ret, err = TryApply(ctx, cur, app)
   292  		if err != nil {
   293  			return err
   294  		}
   295  		if err := datastore.Put(ctx, ret); err != nil {
   296  			return errors.Annotate(err, "failed to put Lease for resource %s", rid).Tag(transient.Tag).Err()
   297  		}
   298  		return nil
   299  	}, nil)
   300  	switch {
   301  	case innerErr != nil:
   302  		return nil, innerErr
   303  	case finalErr != nil:
   304  		return nil, errors.Annotate(finalErr, "failed to create lease for resource %s", rid).Tag(transient.Tag).Err()
   305  	}
   306  	return ret, nil
   307  }
   308  
   309  // RetryIfLeased returns a retry.Factory that generates an iterator that
   310  // retries on AlreadyInLeaseErr.
   311  //
   312  // If the error != AlreadyInLease, the `next` iterator is used to compute
   313  // the delays for retries.
   314  //
   315  // If the error == AlreadyInLease, it tags AlreadyInLease as transient, and
   316  // passes it to the `next` iterator. Then, it chooses a shorter delay
   317  // between the time until lease expiry and the delay from the `next` iterator.
   318  //
   319  // If the `next` delay == retry.Stop, the iterator always returns retry.Stop,
   320  // whether the error was AlreadyInLease or not.
   321  func RetryIfLeased(next retry.Factory) retry.Factory {
   322  	return func() retry.Iterator {
   323  		var inner retry.Iterator
   324  		if next != nil {
   325  			inner = next()
   326  		}
   327  		return &retryIfLeasedIterator{inner: inner}
   328  	}
   329  }
   330  
   331  // retryIfLeasedIterator retries on AlreadyInLeaseErr with a shorter duration
   332  // between the lease expiry and what the inner iterator would generate.
   333  //
   334  // If the error is not AlreadyInLeaseErr, it uses the inner iterator to
   335  // determine if it should continue the iteration and how long the delay should
   336  // be, if so.
   337  type retryIfLeasedIterator struct {
   338  	inner retry.Iterator
   339  }
   340  
   341  // Next implements retry.Iterator
   342  func (c retryIfLeasedIterator) Next(ctx context.Context, err error) time.Duration {
   343  	if info, isLeasedErr := IsAlreadyInLeaseErr(err); isLeasedErr {
   344  		timeToExpire := clock.Until(ctx, info.ExpireTime)
   345  		if c.inner == nil {
   346  			return timeToExpire
   347  		}
   348  
   349  		switch innerNext := c.inner.Next(ctx, transient.Tag.Apply(err)); {
   350  		case innerNext == retry.Stop:
   351  			return retry.Stop
   352  		case timeToExpire < innerNext:
   353  			return timeToExpire
   354  		default:
   355  			return innerNext
   356  		}
   357  	}
   358  
   359  	if c.inner == nil {
   360  		return retry.Stop
   361  	}
   362  	return c.inner.Next(ctx, err)
   363  }