go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/server/tq/txn/spanner/lessor.go (about)

     1  // Copyright 2021 The LUCI Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package spanner
    16  
    17  import (
    18  	"context"
    19  	"math"
    20  	"time"
    21  
    22  	"cloud.google.com/go/spanner"
    23  
    24  	"go.chromium.org/luci/common/clock"
    25  	"go.chromium.org/luci/common/errors"
    26  	"go.chromium.org/luci/common/logging"
    27  	"go.chromium.org/luci/common/retry/transient"
    28  
    29  	"go.chromium.org/luci/server/span"
    30  	"go.chromium.org/luci/server/tq/internal/lessor"
    31  	"go.chromium.org/luci/server/tq/internal/partition"
    32  )
    33  
    34  // spanLessor implements lessor.Lessor on top of Cloud Spanner.
    35  type spanLessor struct {
    36  }
    37  
    38  // WithLease acquires the lease and executes WithLeaseCB.
    39  // The obtained lease duration may be shorter than requested.
    40  // The obtained lease may be only for some parts of the desired Partition.
    41  func (l *spanLessor) WithLease(ctx context.Context, sectionID string, part *partition.Partition, dur time.Duration, clbk lessor.WithLeaseCB) error {
    42  	expiresAt := clock.Now(ctx).Add(dur)
    43  	if d, ok := ctx.Deadline(); ok && expiresAt.After(d) {
    44  		expiresAt = d
    45  	}
    46  
    47  	lease, err := l.acquire(ctx, sectionID, part, expiresAt)
    48  	if err != nil {
    49  		return err
    50  	}
    51  	defer lease.remove(ctx) // failure to remove is logged & ignored.
    52  
    53  	lctx, cancel := clock.WithDeadline(ctx, lease.ExpiresAt)
    54  	defer cancel()
    55  	clbk(lctx, lease.parts)
    56  	return nil
    57  }
    58  
    59  func (*spanLessor) acquire(ctx context.Context, sectionID string, desired *partition.Partition, expiresAt time.Time) (*lease, error) {
    60  	var acquired *lease
    61  	deletedExpired := 0
    62  
    63  	_, err := span.ReadWriteTransaction(ctx, func(ctx context.Context) error {
    64  		deletedExpired = 0 // reset in case of retries.
    65  		all, err := loadAll(ctx, sectionID)
    66  		if err != nil {
    67  			return errors.Annotate(err, "failed to read leases").Err()
    68  		}
    69  		active, expired := activeAndExpired(ctx, all)
    70  		if len(expired) > 0 {
    71  			// Deleting >= 1 lease every time a new one is created suffices to avoid
    72  			// accumulating garbage above O(active leases).
    73  			if len(expired) > 50 {
    74  				expired = expired[:50]
    75  			}
    76  			remove(ctx, expired)
    77  			deletedExpired = len(expired)
    78  		}
    79  		parts, err := availableForLease(desired, active)
    80  		if err != nil {
    81  			return errors.Annotate(err, "failed to decode available leases").Err()
    82  		}
    83  		acquired = save(ctx, sectionID, expiresAt, parts, maxLeaseID(all))
    84  		return nil
    85  	})
    86  	if err != nil {
    87  		return nil, errors.Annotate(err, "failed to transact a lease").Tag(transient.Tag).Err()
    88  	}
    89  	if deletedExpired > 0 {
    90  		// If this is logged frequently, something is wrong either with the leasing
    91  		// process or the lessees are holding to lease longer than they should.
    92  		logging.Warningf(ctx, "deleted %d expired leases", deletedExpired)
    93  	}
    94  	return acquired, nil
    95  }
    96  
    97  type lease struct {
    98  	SectionID       string
    99  	LeaseID         int64
   100  	SerializedParts []string
   101  	ExpiresAt       time.Time
   102  
   103  	// Set only when lease object is created in save().
   104  	parts partition.SortedPartitions
   105  }
   106  
   107  func save(ctx context.Context, sectionID string, expiresAt time.Time, parts partition.SortedPartitions, max int64) *lease {
   108  	if len(parts) == 0 {
   109  		return &lease{
   110  			ExpiresAt: expiresAt,
   111  			parts:     parts,
   112  		} // no need to save noop lease.
   113  	}
   114  
   115  	l := &lease{
   116  		SectionID:       sectionID,
   117  		SerializedParts: make([]string, len(parts)),
   118  		ExpiresAt:       expiresAt.UTC(),
   119  		parts:           parts,
   120  	}
   121  	for i, p := range parts {
   122  		l.SerializedParts[i] = p.String()
   123  	}
   124  
   125  	// Strictly increase the leaseID until it reaches to math.MaxInt64 then
   126  	// go back and increase from 1 again.
   127  	var leaseID int64
   128  	switch {
   129  	case max < math.MaxInt64:
   130  		leaseID = max + 1
   131  	default:
   132  		leaseID = 1
   133  	}
   134  
   135  	l.LeaseID = leaseID
   136  	m := spanner.InsertMap("TQLeases", map[string]any{
   137  		"SectionID":       l.SectionID,
   138  		"LeaseID":         leaseID,
   139  		"SerializedParts": l.SerializedParts,
   140  		"ExpiresAt":       l.ExpiresAt,
   141  	})
   142  	span.BufferWrite(ctx, m)
   143  
   144  	return l
   145  }
   146  
   147  func (l *lease) remove(ctx context.Context) {
   148  	if l.LeaseID == 0 {
   149  		return
   150  	}
   151  
   152  	_, err := span.ReadWriteTransaction(ctx, func(ctx context.Context) error {
   153  		remove(ctx, []*lease{l})
   154  		return nil
   155  	})
   156  	if err != nil {
   157  		// Log only. Once lease expires, it'll garbage-collected next time a new
   158  		// lease is acquired for the same sectionID.
   159  		logging.Warningf(ctx, "failed to remove lease %v", l)
   160  	}
   161  }
   162  
   163  func query(ctx context.Context, sectionID string) ([]*lease, error) {
   164  	st := spanner.NewStatement(`
   165  		SELECT SectionID, LeaseID, SerializedParts, ExpiresAt
   166  		FROM TQLeases
   167  		WHERE SectionID = @sectionID
   168  	`)
   169  	st.Params = map[string]any{
   170  		"sectionID": sectionID,
   171  	}
   172  
   173  	var all []*lease
   174  	err := span.Query(ctx, st).Do(
   175  		func(row *spanner.Row) error {
   176  			l := &lease{}
   177  			if err := row.Columns(&l.SectionID, &l.LeaseID, &l.SerializedParts, &l.ExpiresAt); err != nil {
   178  				return err
   179  			}
   180  			all = append(all, l)
   181  			return nil
   182  		},
   183  	)
   184  	return all, err
   185  }
   186  
   187  func loadAll(ctx context.Context, sectionID string) ([]*lease, error) {
   188  	all, err := query(ctx, sectionID)
   189  	if err != nil {
   190  		return nil, errors.Annotate(err, "failed to fetch leases").Tag(transient.Tag).Err()
   191  	}
   192  	return all, nil
   193  }
   194  
   195  func activeAndExpired(ctx context.Context, all []*lease) (active, expired []*lease) {
   196  	// Partition active leases in the front and expired at the end of the slice.
   197  	i, j := 0, len(all)
   198  	now := clock.Now(ctx)
   199  	for i < j {
   200  		if all[i].ExpiresAt.After(now) {
   201  			i++
   202  			continue
   203  		}
   204  		j--
   205  		all[i], all[j] = all[j], all[i]
   206  	}
   207  	return all[:i], all[i:]
   208  }
   209  
   210  func maxLeaseID(all []*lease) int64 {
   211  	var max int64 = 0
   212  	for _, l := range all {
   213  		if l.LeaseID > max {
   214  			max = l.LeaseID
   215  		}
   216  	}
   217  	return max
   218  }
   219  
   220  func availableForLease(desired *partition.Partition, active []*lease) (partition.SortedPartitions, error) {
   221  	builder := partition.NewSortedPartitionsBuilder(desired)
   222  	// Exclude from desired all partitions under currently active leases.
   223  	// TODO(tandrii): constrain number of partitions per lease to avoid excessive
   224  	// runtime here.
   225  	for _, l := range active {
   226  		for _, s := range l.SerializedParts {
   227  			p, err := partition.FromString(s)
   228  			if err != nil {
   229  				return nil, err
   230  			}
   231  			builder.Exclude(p)
   232  			if builder.IsEmpty() {
   233  				break
   234  			}
   235  		}
   236  	}
   237  	return builder.Result(), nil
   238  }
   239  
   240  func remove(ctx context.Context, ls []*lease) {
   241  	ms := make([]*spanner.Mutation, 0, len(ls))
   242  	for _, l := range ls {
   243  		if l.LeaseID == 0 {
   244  			continue
   245  		}
   246  		m := spanner.Delete("TQLeases", spanner.Key{l.SectionID, l.LeaseID})
   247  		ms = append(ms, m)
   248  	}
   249  	span.BufferWrite(ctx, ms...)
   250  }