go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/server/tq/txn/datastore/lessor.go (about)

     1  // Copyright 2020 The LUCI Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package datastore
    16  
    17  import (
    18  	"context"
    19  	"time"
    20  
    21  	"go.chromium.org/luci/common/clock"
    22  	"go.chromium.org/luci/common/errors"
    23  	"go.chromium.org/luci/common/logging"
    24  	"go.chromium.org/luci/common/retry/transient"
    25  	ds "go.chromium.org/luci/gae/service/datastore"
    26  
    27  	"go.chromium.org/luci/server/tq/internal/lessor"
    28  	"go.chromium.org/luci/server/tq/internal/partition"
    29  )
    30  
    31  // dsLessor implements lessor.Lessor on top of Cloud Datastore.
    32  type dsLessor struct {
    33  }
    34  
    35  // WithLease acquires the lease and executes WithLeaseCB.
    36  // The obtained lease duration may be shorter than requested.
    37  // The obtained lease may be only for some parts of the desired Partition.
    38  func (l *dsLessor) WithLease(ctx context.Context, sectionID string, part *partition.Partition, dur time.Duration, clbk lessor.WithLeaseCB) error {
    39  	expiresAt := clock.Now(ctx).Add(dur)
    40  	if d, ok := ctx.Deadline(); ok && expiresAt.After(d) {
    41  		expiresAt = d
    42  	}
    43  	expiresAt = ds.RoundTime(expiresAt)
    44  
    45  	lease, err := l.acquire(ctx, sectionID, part, expiresAt)
    46  	if err != nil {
    47  		return err
    48  	}
    49  	defer lease.remove(ctx) // failure to remove is logged & ignored.
    50  
    51  	lctx, cancel := clock.WithDeadline(ctx, lease.ExpiresAt)
    52  	defer cancel()
    53  	clbk(lctx, lease.parts)
    54  	return nil
    55  }
    56  
    57  func (*dsLessor) acquire(ctx context.Context, sectionID string, desired *partition.Partition, expiresAt time.Time) (*lease, error) {
    58  	var acquired *lease
    59  	deletedExpired := 0
    60  	err := ds.RunInTransaction(ctx, func(ctx context.Context) error {
    61  		deletedExpired = 0 // reset in case of retries.
    62  		active, expired, err := loadAll(ctx, sectionID)
    63  		if err != nil {
    64  			return err
    65  		}
    66  		if len(expired) > 0 {
    67  			// Deleting >= 1 lease every time a new one is created suffices to avoid
    68  			// accumulating garbage above O(active leases).
    69  			if len(expired) > 50 {
    70  				expired = expired[:50]
    71  			}
    72  			if err = ds.Delete(ctx, expired); err != nil {
    73  				return errors.Annotate(err, "failed to remove %d expired leases", len(expired)).Err()
    74  			}
    75  			deletedExpired = len(expired)
    76  		}
    77  		parts, err := availableForLease(desired, active)
    78  		if err != nil {
    79  			return errors.Annotate(err, "failed to decode available leases").Err()
    80  		}
    81  		acquired, err = save(ctx, sectionID, expiresAt, parts)
    82  		return err
    83  	}, &ds.TransactionOptions{Attempts: 5})
    84  	if err != nil {
    85  		return nil, errors.Annotate(err, "failed to transact a lease").Tag(transient.Tag).Err()
    86  	}
    87  	if deletedExpired > 0 {
    88  		// If this is logged frequently, something is wrong either with the leasing
    89  		// process or the lessees are holding to lease longer than they should.
    90  		logging.Warningf(ctx, "deleted %d expired leases", deletedExpired)
    91  	}
    92  	return acquired, nil
    93  }
    94  
    95  func leasesRootKey(ctx context.Context, sectionID string) *ds.Key {
    96  	return ds.NewKey(ctx, "tq.LeasesRoot", sectionID, 0, nil)
    97  }
    98  
    99  type lease struct {
   100  	_kind string `gae:"$kind,tq.Lease"`
   101  
   102  	Id              int64     `gae:"$id"`     // autoassigned. If not set, implies a noop lease.
   103  	Parent          *ds.Key   `gae:"$parent"` // tq.LeasesRoot entity.
   104  	SerializedParts []string  `gae:",noindex"`
   105  	ExpiresAt       time.Time `gae:",noindex"` // precision up to microseconds.
   106  
   107  	// Set only when lease object is created in save().
   108  	parts partition.SortedPartitions `gae:"-"`
   109  }
   110  
   111  func save(ctx context.Context, sectionID string, expiresAt time.Time, parts partition.SortedPartitions) (*lease, error) {
   112  	if len(parts) == 0 {
   113  		return &lease{
   114  			ExpiresAt: expiresAt,
   115  			parts:     parts,
   116  		}, nil // no need to save noop lease.
   117  	}
   118  
   119  	l := &lease{
   120  		// ID will be autoassgined.
   121  		Parent:          leasesRootKey(ctx, sectionID),
   122  		SerializedParts: make([]string, len(parts)),
   123  		ExpiresAt:       expiresAt.UTC(),
   124  		parts:           parts,
   125  	}
   126  	for i, p := range parts {
   127  		l.SerializedParts[i] = p.String()
   128  	}
   129  	if err := ds.Put(ctx, l); err != nil {
   130  		return nil, errors.Annotate(err, "failed to save a new lease").Tag(transient.Tag).Err()
   131  	}
   132  	return l, nil
   133  }
   134  
   135  func (l *lease) remove(ctx context.Context) {
   136  	if l.Id == 0 {
   137  		return // noop leases are not saved.
   138  	}
   139  	if err := ds.Delete(ctx, l); err != nil {
   140  		// Log only. Once lease expires, it'll garbage-collected next time a new
   141  		// lease is acquired for the same sectionID.
   142  		logging.Warningf(ctx, "failed to remove lease %v", l)
   143  	}
   144  }
   145  
   146  func loadAll(ctx context.Context, sectionID string) (active, expired []*lease, err error) {
   147  	var all []*lease
   148  	q := ds.NewQuery("tq.Lease").Ancestor(leasesRootKey(ctx, sectionID))
   149  	if err := ds.GetAll(ctx, q, &all); err != nil {
   150  		return nil, nil, errors.Annotate(err, "failed to fetch leases").Tag(transient.Tag).Err()
   151  	}
   152  	// Partition active leases in the front and expired at the end of the slice.
   153  	i, j := 0, len(all)
   154  	now := clock.Now(ctx)
   155  	for i < j {
   156  		if all[i].ExpiresAt.After(now) {
   157  			i++
   158  			continue
   159  		}
   160  		j--
   161  		all[i], all[j] = all[j], all[i]
   162  	}
   163  	return all[:i], all[i:], nil
   164  }
   165  
   166  func availableForLease(desired *partition.Partition, active []*lease) (partition.SortedPartitions, error) {
   167  	builder := partition.NewSortedPartitionsBuilder(desired)
   168  	// Exclude from desired all partitions under currently active leases.
   169  	// TODO(tandrii): constrain number of partitions per lease to avoid excessive
   170  	// runtime here.
   171  	for _, l := range active {
   172  		for _, s := range l.SerializedParts {
   173  			p, err := partition.FromString(s)
   174  			if err != nil {
   175  				return nil, err
   176  			}
   177  			builder.Exclude(p)
   178  			if builder.IsEmpty() {
   179  				break
   180  			}
   181  		}
   182  	}
   183  	return builder.Result(), nil
   184  }