github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/kv/kvserver/replica_range_lease.go (about)

     1  // Copyright 2016 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  // This file contains replica methods related to range leases.
    12  //
    13  // Here be dragons: The lease system (especially for epoch-based
    14  // leases) relies on multiple interlocking conditional puts (here and
    15  // in NodeLiveness). Reads (to get expected values) and conditional
    16  // puts have to happen in a certain order, leading to surprising
    17  // dependencies at a distance (for example, there's a LeaseStatus
    18  // object that gets plumbed most of the way through this file.
    19  // LeaseStatus bundles the results of multiple checks with the time at
    20  // which they were performed, so that timestamp must be used for later
    21  // operations). The current arrangement is not perfect, and some
    22  // opportunities for improvement appear, but any changes must be made
    23  // very carefully.
    24  //
    25  // NOTE(bdarnell): The biggest problem with the current code is that
    26  // with epoch-based leases, we may do two separate slow operations
    27  // (IncrementEpoch/Heartbeat and RequestLease/AdminTransferLease). In
    28  // the organization that was inherited from expiration-based leases,
    29  // we prepare the arguments we're going to use for the lease
    30  // operations before performing the liveness operations, and by the
    31  // time the liveness operations complete those may be stale.
    32  //
    33  // Therefore, my suggested refactoring would be to move the liveness
    34  // operations earlier in the process, soon after the initial
    35  // leaseStatus call. If a liveness operation is required, do it and
    36  // start over, with a fresh leaseStatus.
    37  //
    38  // This could also allow the liveness operations to be coalesced per
    39  // node instead of having each range separately queue up redundant
    40  // liveness operations. (The InitOrJoin model predates the
    41  // singleflight package; could we simplify things by using it?)
    42  
    43  package kvserver
    44  
    45  import (
    46  	"context"
    47  	"fmt"
    48  	"time"
    49  
    50  	"github.com/cockroachdb/cockroach/pkg/base"
    51  	"github.com/cockroachdb/cockroach/pkg/keys"
    52  	"github.com/cockroachdb/cockroach/pkg/kv/kvserver/kvserverpb"
    53  	"github.com/cockroachdb/cockroach/pkg/roachpb"
    54  	"github.com/cockroachdb/cockroach/pkg/util/hlc"
    55  	"github.com/cockroachdb/cockroach/pkg/util/log"
    56  	"github.com/cockroachdb/cockroach/pkg/util/timeutil"
    57  	"github.com/cockroachdb/cockroach/pkg/util/tracing"
    58  	"github.com/cockroachdb/errors"
    59  	"github.com/cockroachdb/logtags"
    60  	"github.com/opentracing/opentracing-go"
    61  )
    62  
    63  var leaseStatusLogLimiter = log.Every(5 * time.Second)
    64  
    65  // leaseRequestHandle is a handle to an asynchronous lease request.
    66  type leaseRequestHandle struct {
    67  	p *pendingLeaseRequest
    68  	c chan *roachpb.Error
    69  }
    70  
    71  // C returns the channel where the lease request's result will be sent on.
    72  func (h *leaseRequestHandle) C() <-chan *roachpb.Error {
    73  	if h.c == nil {
    74  		panic("handle already canceled")
    75  	}
    76  	return h.c
    77  }
    78  
    79  // Cancel cancels the request handle. It also cancels the asynchronous
    80  // lease request task if its reference count drops to zero.
    81  func (h *leaseRequestHandle) Cancel() {
    82  	h.p.repl.mu.Lock()
    83  	defer h.p.repl.mu.Unlock()
    84  	if len(h.c) == 0 {
    85  		// Our lease request is ongoing...
    86  		// Unregister handle.
    87  		delete(h.p.llHandles, h)
    88  		// Cancel request, if necessary.
    89  		if len(h.p.llHandles) == 0 {
    90  			h.p.cancelLocked()
    91  		}
    92  	}
    93  	// Mark handle as canceled.
    94  	h.c = nil
    95  }
    96  
    97  // resolve notifies the handle of the request's result.
    98  //
    99  // Requires repl.mu is exclusively locked.
   100  func (h *leaseRequestHandle) resolve(pErr *roachpb.Error) { h.c <- pErr }
   101  
   102  // pendingLeaseRequest coalesces RequestLease requests and lets
   103  // callers join an in-progress lease request and wait for the result.
   104  // The actual execution of the RequestLease Raft request is delegated
   105  // to a replica.
   106  //
   107  // There are two types of leases: expiration-based and epoch-based.
   108  // Expiration-based leases are considered valid as long as the wall
   109  // time is less than the lease expiration timestamp minus the maximum
   110  // clock offset. Epoch-based leases do not expire, but rely on the
   111  // leaseholder maintaining its node liveness record (also a lease, but
   112  // at the node level). All ranges up to and including the node
   113  // liveness table must use expiration-based leases to avoid any
   114  // circular dependencies.
   115  //
   116  // Methods are not thread-safe; a pendingLeaseRequest is logically part
   117  // of the replica it references, so replica.mu should be used to
   118  // synchronize all calls.
   119  type pendingLeaseRequest struct {
   120  	// The replica that the pendingLeaseRequest is a part of.
   121  	repl *Replica
   122  	// Set of request handles attached to the lease acquisition.
   123  	// All accesses require repl.mu to be exclusively locked.
   124  	llHandles map[*leaseRequestHandle]struct{}
   125  	// cancelLocked is a context cancellation function for the async lease
   126  	// request, if one exists. It cancels an ongoing lease request and cleans up
   127  	// the requests state, including setting the cancelLocked function itself to
   128  	// nil. It will be called when a lease request is canceled because all
   129  	// handles cancel or when a lease request completes. If nil, then no request
   130  	// is in progress. repl.mu to be exclusively locked to call the function.
   131  	cancelLocked func()
   132  	// nextLease is the pending RequestLease request, if any. It can be used to
   133  	// figure out if we're in the process of extending our own lease, or
   134  	// transferring it to another replica.
   135  	nextLease roachpb.Lease
   136  }
   137  
   138  func makePendingLeaseRequest(repl *Replica) pendingLeaseRequest {
   139  	return pendingLeaseRequest{
   140  		repl:      repl,
   141  		llHandles: make(map[*leaseRequestHandle]struct{}),
   142  	}
   143  }
   144  
   145  // RequestPending returns the pending Lease, if one is in progress.
   146  // The second return val is true if a lease request is pending.
   147  //
   148  // Requires repl.mu is read locked.
   149  func (p *pendingLeaseRequest) RequestPending() (roachpb.Lease, bool) {
   150  	pending := p.cancelLocked != nil
   151  	if pending {
   152  		return p.nextLease, true
   153  	}
   154  	return roachpb.Lease{}, false
   155  }
   156  
   157  // InitOrJoinRequest executes a RequestLease command asynchronously and returns a
   158  // handle on which the result will be posted. If there's already a request in
   159  // progress, we join in waiting for the results of that request.
   160  // It is an error to call InitOrJoinRequest() while a request is in progress
   161  // naming another replica as lease holder.
   162  //
   163  // replica is used to schedule and execute async work (proposing a RequestLease
   164  // command). replica.mu is locked when delivering results, so calls from the
   165  // replica happen either before or after a result for a pending request has
   166  // happened.
   167  //
   168  // The new lease will be a successor to the one in the status
   169  // argument, and its fields will be used to fill in the expected
   170  // values for liveness and lease operations.
   171  //
   172  // transfer needs to be set if the request represents a lease transfer (as
   173  // opposed to an extension, or acquiring the lease when none is held).
   174  //
   175  // Requires repl.mu is exclusively locked.
   176  func (p *pendingLeaseRequest) InitOrJoinRequest(
   177  	ctx context.Context,
   178  	nextLeaseHolder roachpb.ReplicaDescriptor,
   179  	status kvserverpb.LeaseStatus,
   180  	startKey roachpb.Key,
   181  	transfer bool,
   182  ) *leaseRequestHandle {
   183  	if nextLease, ok := p.RequestPending(); ok {
   184  		if nextLease.Replica.ReplicaID == nextLeaseHolder.ReplicaID {
   185  			// Join a pending request asking for the same replica to become lease
   186  			// holder.
   187  			return p.JoinRequest()
   188  		}
   189  
   190  		// We can't join the request in progress.
   191  		// TODO(nvanbenschoten): should this return a LeaseRejectedError? Should
   192  		// it cancel and replace the request in progress? Reconsider.
   193  		return p.newResolvedHandle(roachpb.NewErrorf(
   194  			"request for different replica in progress (requesting: %+v, in progress: %+v)",
   195  			nextLeaseHolder.ReplicaID, nextLease.Replica.ReplicaID))
   196  	}
   197  
   198  	// No request in progress. Let's propose a Lease command asynchronously.
   199  	llHandle := p.newHandle()
   200  	reqHeader := roachpb.RequestHeader{
   201  		Key: startKey,
   202  	}
   203  	var leaseReq roachpb.Request
   204  	now := p.repl.store.Clock().Now()
   205  	reqLease := roachpb.Lease{
   206  		// It's up to us to ensure that Lease.Start is greater than the
   207  		// end time of the previous lease. This means that if status
   208  		// refers to an expired epoch lease, we must increment the epoch
   209  		// *at status.Timestamp* before we can propose this lease.
   210  		//
   211  		// Note that the server may decrease our proposed start time if it
   212  		// decides that it is safe to do so (for example, this happens
   213  		// when renewing an expiration-based lease), but it will never
   214  		// increase it (and a start timestamp that is too low is unsafe
   215  		// because it results in incorrect initialization of the timestamp
   216  		// cache on the new leaseholder).
   217  		Start:      status.Timestamp,
   218  		Replica:    nextLeaseHolder,
   219  		ProposedTS: &now,
   220  	}
   221  
   222  	if p.repl.requiresExpiringLeaseRLocked() {
   223  		reqLease.Expiration = &hlc.Timestamp{}
   224  		*reqLease.Expiration = status.Timestamp.Add(int64(p.repl.store.cfg.RangeLeaseActiveDuration()), 0)
   225  	} else {
   226  		// Get the liveness for the next lease holder and set the epoch in the lease request.
   227  		liveness, err := p.repl.store.cfg.NodeLiveness.GetLiveness(nextLeaseHolder.NodeID)
   228  		if err != nil {
   229  			llHandle.resolve(roachpb.NewError(&roachpb.LeaseRejectedError{
   230  				Existing:  status.Lease,
   231  				Requested: reqLease,
   232  				Message:   fmt.Sprintf("couldn't request lease for %+v: %v", nextLeaseHolder, err),
   233  			}))
   234  			return llHandle
   235  		}
   236  		reqLease.Epoch = liveness.Epoch
   237  	}
   238  
   239  	if transfer {
   240  		leaseReq = &roachpb.TransferLeaseRequest{
   241  			RequestHeader: reqHeader,
   242  			Lease:         reqLease,
   243  			PrevLease:     status.Lease,
   244  		}
   245  	} else {
   246  		minProposedTS := p.repl.mu.minLeaseProposedTS
   247  		leaseReq = &roachpb.RequestLeaseRequest{
   248  			RequestHeader: reqHeader,
   249  			Lease:         reqLease,
   250  			// PrevLease must match for our lease to be accepted. If another
   251  			// lease is applied between our previous call to leaseStatus and
   252  			// our lease request applying, it will be rejected.
   253  			PrevLease:     status.Lease,
   254  			MinProposedTS: &minProposedTS,
   255  		}
   256  	}
   257  
   258  	if err := p.requestLeaseAsync(ctx, nextLeaseHolder, reqLease, status, leaseReq); err != nil {
   259  		// We failed to start the asynchronous task. Send a blank NotLeaseHolderError
   260  		// back to indicate that we have no idea who the range lease holder might
   261  		// be; we've withdrawn from active duty.
   262  		llHandle.resolve(roachpb.NewError(
   263  			newNotLeaseHolderError(nil, p.repl.store.StoreID(), p.repl.mu.state.Desc)))
   264  		return llHandle
   265  	}
   266  	// InitOrJoinRequest requires that repl.mu is exclusively locked. requestLeaseAsync
   267  	// also requires this lock to send results on all waiter channels. This means that
   268  	// no results will be sent until we've release the lock, so there's no race between
   269  	// adding our new channel to p.llHandles below and requestLeaseAsync sending results
   270  	// on all channels in p.llHandles. The same logic applies to p.nextLease.
   271  	p.llHandles[llHandle] = struct{}{}
   272  	p.nextLease = reqLease
   273  	return llHandle
   274  }
   275  
   276  // requestLeaseAsync sends a transfer lease or lease request to the
   277  // specified replica. The request is sent in an async task.
   278  //
   279  // The status argument is used as the expected value for liveness operations.
   280  // reqLease and leaseReq must be consistent with the LeaseStatus.
   281  func (p *pendingLeaseRequest) requestLeaseAsync(
   282  	parentCtx context.Context,
   283  	nextLeaseHolder roachpb.ReplicaDescriptor,
   284  	reqLease roachpb.Lease,
   285  	status kvserverpb.LeaseStatus,
   286  	leaseReq roachpb.Request,
   287  ) error {
   288  	const opName = "request range lease"
   289  	var sp opentracing.Span
   290  	tr := p.repl.AmbientContext.Tracer
   291  	if parentSp := opentracing.SpanFromContext(parentCtx); parentSp != nil {
   292  		// We use FollowsFrom because the lease request's span can outlive the
   293  		// parent request. This is possible if parentCtx is canceled after others
   294  		// have coalesced on to this lease request (see leaseRequestHandle.Cancel).
   295  		// TODO(andrei): we should use Tracer.StartChildSpan() for efficiency,
   296  		// except that one does not currently support FollowsFrom relationships.
   297  		sp = tr.StartSpan(
   298  			opName,
   299  			opentracing.FollowsFrom(parentSp.Context()),
   300  			tracing.LogTagsFromCtx(parentCtx),
   301  		)
   302  	} else {
   303  		sp = tr.(*tracing.Tracer).StartRootSpan(
   304  			opName, logtags.FromContext(parentCtx), tracing.NonRecordableSpan)
   305  	}
   306  
   307  	// Create a new context *without* a timeout. Instead, we multiplex the
   308  	// cancellation of all contexts onto this new one, only canceling it if all
   309  	// coalesced requests timeout/cancel. p.cancelLocked (defined below) is the
   310  	// cancel function that must be called; calling just cancel is insufficient.
   311  	ctx := p.repl.AnnotateCtx(context.Background())
   312  	ctx = opentracing.ContextWithSpan(ctx, sp)
   313  	ctx, cancel := context.WithCancel(ctx)
   314  
   315  	// Make sure we clean up the context and request state. This will be called
   316  	// either when the request completes cleanly or when it is terminated early.
   317  	p.cancelLocked = func() {
   318  		cancel()
   319  		p.cancelLocked = nil
   320  		p.nextLease = roachpb.Lease{}
   321  	}
   322  
   323  	err := p.repl.store.Stopper().RunAsyncTask(
   324  		ctx, "storage.pendingLeaseRequest: requesting lease", func(ctx context.Context) {
   325  			defer sp.Finish()
   326  
   327  			// If requesting an epoch-based lease & current state is expired,
   328  			// potentially heartbeat our own liveness or increment epoch of
   329  			// prior owner. Note we only do this if the previous lease was
   330  			// epoch-based.
   331  			var pErr *roachpb.Error
   332  			if reqLease.Type() == roachpb.LeaseEpoch && status.State == kvserverpb.LeaseState_EXPIRED &&
   333  				status.Lease.Type() == roachpb.LeaseEpoch {
   334  				var err error
   335  				// If this replica is previous & next lease holder, manually heartbeat to become live.
   336  				if status.Lease.OwnedBy(nextLeaseHolder.StoreID) &&
   337  					p.repl.store.StoreID() == nextLeaseHolder.StoreID {
   338  					if err = p.repl.store.cfg.NodeLiveness.Heartbeat(ctx, status.Liveness); err != nil {
   339  						log.Errorf(ctx, "%v", err)
   340  					}
   341  				} else if status.Liveness.Epoch == status.Lease.Epoch {
   342  					// If not owner, increment epoch if necessary to invalidate lease.
   343  					// However, we only do so in the event that the next leaseholder is
   344  					// considered live at this time. If not, there's no sense in
   345  					// incrementing the expired leaseholder's epoch.
   346  					if live, liveErr := p.repl.store.cfg.NodeLiveness.IsLive(nextLeaseHolder.NodeID); !live || liveErr != nil {
   347  						err = errors.Errorf("not incrementing epoch on n%d because next leaseholder (n%d) not live (err = %v)",
   348  							status.Liveness.NodeID, nextLeaseHolder.NodeID, liveErr)
   349  						if log.V(1) {
   350  							log.Infof(ctx, "%v", err)
   351  						}
   352  					} else if err = p.repl.store.cfg.NodeLiveness.IncrementEpoch(ctx, status.Liveness); err != nil {
   353  						// If we get ErrEpochAlreadyIncremented, someone else beat
   354  						// us to it. This proves that the target node is truly
   355  						// dead *now*, but it doesn't prove that it was dead at
   356  						// status.Timestamp (which we've encoded into our lease
   357  						// request). It's possible that the node was temporarily
   358  						// considered dead but revived without having its epoch
   359  						// incremented, i.e. that it was in fact live at
   360  						// status.Timestamp.
   361  						//
   362  						// It would be incorrect to simply proceed to sending our
   363  						// lease request since our lease.Start may precede the
   364  						// effective end timestamp of the predecessor lease (the
   365  						// expiration of the last successful heartbeat before the
   366  						// epoch increment), and so under this lease this node's
   367  						// timestamp cache would not necessarily reflect all reads
   368  						// served by the prior leaseholder.
   369  						//
   370  						// It would be correct to bump the timestamp in the lease
   371  						// request and proceed, but that just sets up another race
   372  						// between this node and the one that already incremented
   373  						// the epoch. They're probably going to beat us this time
   374  						// too, so just return the NotLeaseHolderError here
   375  						// instead of trying to fix up the timestamps and submit
   376  						// the lease request.
   377  						//
   378  						// ErrEpochAlreadyIncremented is not an unusual situation,
   379  						// so we don't log it as an error.
   380  						//
   381  						// https://github.com/cockroachdb/cockroach/issues/35986
   382  						if !errors.Is(err, ErrEpochAlreadyIncremented) {
   383  							log.Errorf(ctx, "%v", err)
   384  						}
   385  					}
   386  				}
   387  				// Set error for propagation to all waiters below.
   388  				if err != nil {
   389  					// TODO(bdarnell): is status.Lease really what we want to put in the NotLeaseHolderError here?
   390  					pErr = roachpb.NewError(newNotLeaseHolderError(&status.Lease, p.repl.store.StoreID(), p.repl.Desc()))
   391  				}
   392  			}
   393  
   394  			// Send the RequestLeaseRequest or TransferLeaseRequest and wait for the new
   395  			// lease to be applied.
   396  			if pErr == nil {
   397  				ba := roachpb.BatchRequest{}
   398  				ba.Timestamp = p.repl.store.Clock().Now()
   399  				ba.RangeID = p.repl.RangeID
   400  				ba.Add(leaseReq)
   401  				_, pErr = p.repl.Send(ctx, ba)
   402  			}
   403  			// We reset our state below regardless of whether we've gotten an error or
   404  			// not, but note that an error is ambiguous - there's no guarantee that the
   405  			// transfer will not still apply. That's OK, however, as the "in transfer"
   406  			// state maintained by the pendingLeaseRequest is not relied on for
   407  			// correctness (see repl.mu.minLeaseProposedTS), and resetting the state
   408  			// is beneficial as it'll allow the replica to attempt to transfer again or
   409  			// extend the existing lease in the future.
   410  
   411  			p.repl.mu.Lock()
   412  			defer p.repl.mu.Unlock()
   413  			if ctx.Err() != nil {
   414  				// We were canceled and this request was already cleaned up
   415  				// under lock. At this point, another async request could be
   416  				// active so we don't want to do anything else.
   417  				return
   418  			}
   419  
   420  			// Send result of lease to all waiter channels and cleanup request.
   421  			for llHandle := range p.llHandles {
   422  				// Don't send the same transaction object twice; this can lead to races.
   423  				if pErr != nil {
   424  					pErrClone := *pErr
   425  					pErrClone.SetTxn(pErr.GetTxn())
   426  					llHandle.resolve(&pErrClone)
   427  				} else {
   428  					llHandle.resolve(nil)
   429  				}
   430  				delete(p.llHandles, llHandle)
   431  			}
   432  			p.cancelLocked()
   433  		})
   434  	if err != nil {
   435  		p.cancelLocked()
   436  		sp.Finish()
   437  		return err
   438  	}
   439  	return nil
   440  }
   441  
   442  // JoinRequest adds one more waiter to the currently pending request.
   443  // It is the caller's responsibility to ensure that there is a pending request,
   444  // and that the request is compatible with whatever the caller is currently
   445  // wanting to do (i.e. the request is naming the intended node as the next
   446  // lease holder).
   447  //
   448  // Requires repl.mu is exclusively locked.
   449  func (p *pendingLeaseRequest) JoinRequest() *leaseRequestHandle {
   450  	llHandle := p.newHandle()
   451  	if _, ok := p.RequestPending(); !ok {
   452  		llHandle.resolve(roachpb.NewErrorf("no request in progress"))
   453  		return llHandle
   454  	}
   455  	p.llHandles[llHandle] = struct{}{}
   456  	return llHandle
   457  }
   458  
   459  // TransferInProgress returns the next lease, if the replica is in the process
   460  // of transferring away its range lease. This next lease indicates the next
   461  // lease holder. The second return val is true if a transfer is in progress.
   462  // Note that the return values are best-effort and shouldn't be relied upon for
   463  // correctness: if a previous transfer has returned an error, TransferInProgress
   464  // will return `false`, but that doesn't necessarily mean that the transfer
   465  // cannot still apply (see replica.mu.minLeaseProposedTS).
   466  //
   467  // It is assumed that the replica owning this pendingLeaseRequest owns the
   468  // LeaderLease.
   469  //
   470  // replicaID is the ID of the parent replica.
   471  //
   472  // Requires repl.mu is read locked.
   473  func (p *pendingLeaseRequest) TransferInProgress(
   474  	replicaID roachpb.ReplicaID,
   475  ) (roachpb.Lease, bool) {
   476  	if nextLease, ok := p.RequestPending(); ok {
   477  		// Is the lease being transferred? (as opposed to just extended)
   478  		if replicaID != nextLease.Replica.ReplicaID {
   479  			return nextLease, true
   480  		}
   481  	}
   482  	return roachpb.Lease{}, false
   483  }
   484  
   485  // newHandle creates a new leaseRequestHandle referencing the pending lease
   486  // request.
   487  func (p *pendingLeaseRequest) newHandle() *leaseRequestHandle {
   488  	return &leaseRequestHandle{
   489  		p: p,
   490  		c: make(chan *roachpb.Error, 1),
   491  	}
   492  }
   493  
   494  // newResolvedHandle creates a new leaseRequestHandle referencing the pending
   495  // lease request. It then resolves the handle with the provided error.
   496  func (p *pendingLeaseRequest) newResolvedHandle(pErr *roachpb.Error) *leaseRequestHandle {
   497  	h := p.newHandle()
   498  	h.resolve(pErr)
   499  	return h
   500  }
   501  
   502  // leaseStatus returns lease status. If the lease is epoch-based,
   503  // the liveness field will be set to the liveness used to compute
   504  // its state, unless state == leaseError.
   505  //
   506  // - The lease is considered valid if the timestamp is covered by the
   507  //   supplied lease. This is determined differently depending on the
   508  //   lease properties. For expiration-based leases, the timestamp is
   509  //   covered if it's less than the expiration (minus the maximum
   510  //   clock offset). For epoch-based "node liveness" leases, the lease
   511  //   epoch must match the owner node's liveness epoch -AND- the
   512  //   timestamp must be within the node's liveness expiration (also
   513  //   minus the maximum clock offset).
   514  //
   515  //   To be valid, a lease which contains a valid ProposedTS must have
   516  //   a proposed timestamp greater than the minimum proposed timestamp,
   517  //   which prevents a restarted process from serving commands, since
   518  //   the spanlatch manager has been wiped through the restart.
   519  //
   520  // - The lease is considered in stasis if the timestamp is within the
   521  //   maximum clock offset window of the lease expiration.
   522  //
   523  // - The lease is considered expired in all other cases.
   524  //
   525  // The maximum clock offset must always be taken into consideration to
   526  // avoid a failure of linearizability on a single register during
   527  // lease changes. Without that stasis period, the following could
   528  // occur:
   529  //
   530  // * a range lease gets committed on the new lease holder (but not the old).
   531  // * client proposes and commits a write on new lease holder (with a
   532  //   timestamp just greater than the expiration of the old lease).
   533  // * client tries to read what it wrote, but hits a slow coordinator
   534  //   (which assigns a timestamp covered by the old lease).
   535  // * the read is served by the old lease holder (which has not
   536  //   processed the change in lease holdership).
   537  // * the client fails to read their own write.
   538  func (r *Replica) leaseStatus(
   539  	lease roachpb.Lease, timestamp, minProposedTS hlc.Timestamp,
   540  ) kvserverpb.LeaseStatus {
   541  	status := kvserverpb.LeaseStatus{Timestamp: timestamp, Lease: lease}
   542  	var expiration hlc.Timestamp
   543  	if lease.Type() == roachpb.LeaseExpiration {
   544  		expiration = lease.GetExpiration()
   545  	} else {
   546  		var err error
   547  		status.Liveness, err = r.store.cfg.NodeLiveness.GetLiveness(lease.Replica.NodeID)
   548  		if err != nil || status.Liveness.Epoch < lease.Epoch {
   549  			// If lease validity can't be determined (e.g. gossip is down
   550  			// and liveness info isn't available for owner), we can neither
   551  			// use the lease nor do we want to attempt to acquire it.
   552  			if err != nil {
   553  				if leaseStatusLogLimiter.ShouldLog() {
   554  					log.Warningf(context.TODO(), "can't determine lease status due to node liveness error: %+v", err)
   555  				}
   556  			}
   557  			status.State = kvserverpb.LeaseState_ERROR
   558  			return status
   559  		}
   560  		if status.Liveness.Epoch > lease.Epoch {
   561  			status.State = kvserverpb.LeaseState_EXPIRED
   562  			return status
   563  		}
   564  		expiration = hlc.Timestamp(status.Liveness.Expiration)
   565  	}
   566  	maxOffset := r.store.Clock().MaxOffset()
   567  	stasis := expiration.Add(-int64(maxOffset), 0)
   568  	if timestamp.Less(stasis) {
   569  		status.State = kvserverpb.LeaseState_VALID
   570  		// If the replica owns the lease, additional verify that the lease's
   571  		// proposed timestamp is not earlier than the min proposed timestamp.
   572  		if lease.Replica.StoreID == r.store.StoreID() &&
   573  			lease.ProposedTS != nil && lease.ProposedTS.Less(minProposedTS) {
   574  			status.State = kvserverpb.LeaseState_PROSCRIBED
   575  		}
   576  	} else if timestamp.Less(expiration) {
   577  		status.State = kvserverpb.LeaseState_STASIS
   578  	} else {
   579  		status.State = kvserverpb.LeaseState_EXPIRED
   580  	}
   581  	return status
   582  }
   583  
   584  // requiresExpiringLeaseRLocked returns whether this range uses an
   585  // expiration-based lease; false if epoch-based. Ranges located before or
   586  // including the node liveness table must use expiration leases to avoid
   587  // circular dependencies on the node liveness table.
   588  func (r *Replica) requiresExpiringLeaseRLocked() bool {
   589  	return r.store.cfg.NodeLiveness == nil || !r.store.cfg.EnableEpochRangeLeases ||
   590  		r.mu.state.Desc.StartKey.Less(roachpb.RKey(keys.NodeLivenessKeyMax))
   591  }
   592  
   593  // requestLeaseLocked executes a request to obtain or extend a lease
   594  // asynchronously and returns a channel on which the result will be posted. If
   595  // there's already a request in progress, we join in waiting for the results of
   596  // that request. Unless an error is returned, the obtained lease will be valid
   597  // for a time interval containing the requested timestamp.
   598  // If a transfer is in progress, a NotLeaseHolderError directing to the recipient is
   599  // sent on the returned chan.
   600  func (r *Replica) requestLeaseLocked(
   601  	ctx context.Context, status kvserverpb.LeaseStatus,
   602  ) *leaseRequestHandle {
   603  	if r.store.TestingKnobs().LeaseRequestEvent != nil {
   604  		r.store.TestingKnobs().LeaseRequestEvent(status.Timestamp)
   605  	}
   606  	// Propose a Raft command to get a lease for this replica.
   607  	repDesc, err := r.getReplicaDescriptorRLocked()
   608  	if err != nil {
   609  		return r.mu.pendingLeaseRequest.newResolvedHandle(roachpb.NewError(err))
   610  	}
   611  	if transferLease, ok := r.mu.pendingLeaseRequest.TransferInProgress(repDesc.ReplicaID); ok {
   612  		return r.mu.pendingLeaseRequest.newResolvedHandle(roachpb.NewError(
   613  			newNotLeaseHolderError(&transferLease, r.store.StoreID(), r.mu.state.Desc)))
   614  	}
   615  	if r.store.IsDraining() {
   616  		// We've retired from active duty.
   617  		return r.mu.pendingLeaseRequest.newResolvedHandle(roachpb.NewError(
   618  			newNotLeaseHolderError(nil, r.store.StoreID(), r.mu.state.Desc)))
   619  	}
   620  	return r.mu.pendingLeaseRequest.InitOrJoinRequest(
   621  		ctx, repDesc, status, r.mu.state.Desc.StartKey.AsRawKey(), false /* transfer */)
   622  }
   623  
   624  // AdminTransferLease transfers the LeaderLease to another replica. A
   625  // valid LeaseStatus must be supplied. Only the current holder of the
   626  // LeaderLease can do a transfer, because it needs to stop serving
   627  // reads and proposing Raft commands (CPut is a read) after sending
   628  // the transfer command. If it did not stop serving reads immediately,
   629  // it would potentially serve reads with timestamps greater than the
   630  // start timestamp of the new (transferred) lease. More subtly, the
   631  // replica can't even serve reads or propose commands with timestamps
   632  // lower than the start of the new lease because it could lead to read
   633  // your own write violations (see comments on the stasis period in
   634  // IsLeaseValid). We could, in principle, serve reads more than the
   635  // maximum clock offset in the past.
   636  //
   637  // The method waits for any in-progress lease extension to be done, and it also
   638  // blocks until the transfer is done. If a transfer is already in progress,
   639  // this method joins in waiting for it to complete if it's transferring to the
   640  // same replica. Otherwise, a NotLeaseHolderError is returned.
   641  func (r *Replica) AdminTransferLease(ctx context.Context, target roachpb.StoreID) error {
   642  	// initTransferHelper inits a transfer if no extension is in progress.
   643  	// It returns a channel for waiting for the result of a pending
   644  	// extension (if any is in progress) and a channel for waiting for the
   645  	// transfer (if it was successfully initiated).
   646  	var nextLeaseHolder roachpb.ReplicaDescriptor
   647  	initTransferHelper := func() (extension, transfer *leaseRequestHandle, err error) {
   648  		r.mu.Lock()
   649  		defer r.mu.Unlock()
   650  
   651  		status := r.leaseStatus(*r.mu.state.Lease, r.store.Clock().Now(), r.mu.minLeaseProposedTS)
   652  		if status.Lease.OwnedBy(target) {
   653  			// The target is already the lease holder. Nothing to do.
   654  			return nil, nil, nil
   655  		}
   656  		desc := r.mu.state.Desc
   657  		if !status.Lease.OwnedBy(r.store.StoreID()) {
   658  			return nil, nil, newNotLeaseHolderError(&status.Lease, r.store.StoreID(), desc)
   659  		}
   660  		// Verify the target is a replica of the range.
   661  		var ok bool
   662  		if nextLeaseHolder, ok = desc.GetReplicaDescriptor(target); !ok {
   663  			return nil, nil, errors.Errorf("unable to find store %d in range %+v", target, desc)
   664  		}
   665  
   666  		// For now, don't allow replicas of type LEARNER to be leaseholders, see
   667  		// comments in RequestLease and TransferLease for why.
   668  		//
   669  		// TODO(dan): We shouldn't need this, the checks in RequestLease and
   670  		// TransferLease are the canonical ones and should be sufficient. Sadly, the
   671  		// `r.mu.minLeaseProposedTS = status.Timestamp` line below will likely play
   672  		// badly with that. This would be an issue even without learners, but
   673  		// omitting this check would make it worse. Fixme.
   674  		if t := nextLeaseHolder.GetType(); t != roachpb.VOTER_FULL {
   675  			return nil, nil, errors.Errorf(`cannot transfer lease to replica of type %s`, t)
   676  		}
   677  
   678  		if nextLease, ok := r.mu.pendingLeaseRequest.RequestPending(); ok &&
   679  			nextLease.Replica != nextLeaseHolder {
   680  			repDesc, err := r.getReplicaDescriptorRLocked()
   681  			if err != nil {
   682  				return nil, nil, err
   683  			}
   684  			if nextLease.Replica == repDesc {
   685  				// There's an extension in progress. Let's wait for it to succeed and
   686  				// try again.
   687  				return r.mu.pendingLeaseRequest.JoinRequest(), nil, nil
   688  			}
   689  			// Another transfer is in progress, and it's not transferring to the
   690  			// same replica we'd like.
   691  			return nil, nil, newNotLeaseHolderError(&nextLease, r.store.StoreID(), desc)
   692  		}
   693  		// Stop using the current lease.
   694  		r.mu.minLeaseProposedTS = status.Timestamp
   695  		transfer = r.mu.pendingLeaseRequest.InitOrJoinRequest(
   696  			ctx, nextLeaseHolder, status, desc.StartKey.AsRawKey(), true, /* transfer */
   697  		)
   698  		return nil, transfer, nil
   699  	}
   700  
   701  	// Loop while there's an extension in progress.
   702  	for {
   703  		// See if there's an extension in progress that we have to wait for.
   704  		// If there isn't, request a transfer.
   705  		extension, transfer, err := initTransferHelper()
   706  		if err != nil {
   707  			return err
   708  		}
   709  		if extension == nil {
   710  			if transfer == nil {
   711  				// The target is us and we're the lease holder.
   712  				return nil
   713  			}
   714  			select {
   715  			case pErr := <-transfer.C():
   716  				return pErr.GoError()
   717  			case <-ctx.Done():
   718  				transfer.Cancel()
   719  				return ctx.Err()
   720  			}
   721  		}
   722  		// Wait for the in-progress extension without holding the mutex.
   723  		if r.store.TestingKnobs().LeaseTransferBlockedOnExtensionEvent != nil {
   724  			r.store.TestingKnobs().LeaseTransferBlockedOnExtensionEvent(nextLeaseHolder)
   725  		}
   726  		select {
   727  		case <-extension.C():
   728  			continue
   729  		case <-ctx.Done():
   730  			extension.Cancel()
   731  			return ctx.Err()
   732  		}
   733  	}
   734  }
   735  
   736  // GetLease returns the lease and, if available, the proposed next lease.
   737  func (r *Replica) GetLease() (roachpb.Lease, roachpb.Lease) {
   738  	r.mu.RLock()
   739  	defer r.mu.RUnlock()
   740  	return r.getLeaseRLocked()
   741  }
   742  
   743  func (r *Replica) getLeaseRLocked() (roachpb.Lease, roachpb.Lease) {
   744  	if nextLease, ok := r.mu.pendingLeaseRequest.RequestPending(); ok {
   745  		return *r.mu.state.Lease, nextLease
   746  	}
   747  	return *r.mu.state.Lease, roachpb.Lease{}
   748  }
   749  
   750  // OwnsValidLease returns whether this replica is the current valid
   751  // leaseholder. Note that this method does not check to see if a transfer is
   752  // pending, but returns the status of the current lease and ownership at the
   753  // specified point in time.
   754  func (r *Replica) OwnsValidLease(ts hlc.Timestamp) bool {
   755  	r.mu.RLock()
   756  	defer r.mu.RUnlock()
   757  	return r.ownsValidLeaseRLocked(ts)
   758  }
   759  
   760  func (r *Replica) ownsValidLeaseRLocked(ts hlc.Timestamp) bool {
   761  	return r.mu.state.Lease.OwnedBy(r.store.StoreID()) &&
   762  		r.leaseStatus(*r.mu.state.Lease, ts, r.mu.minLeaseProposedTS).State == kvserverpb.LeaseState_VALID
   763  }
   764  
   765  // IsLeaseValid returns true if the replica's lease is owned by this
   766  // replica and is valid (not expired, not in stasis).
   767  func (r *Replica) IsLeaseValid(lease roachpb.Lease, ts hlc.Timestamp) bool {
   768  	r.mu.RLock()
   769  	defer r.mu.RUnlock()
   770  	return r.isLeaseValidRLocked(lease, ts)
   771  }
   772  
   773  func (r *Replica) isLeaseValidRLocked(lease roachpb.Lease, ts hlc.Timestamp) bool {
   774  	return r.leaseStatus(lease, ts, r.mu.minLeaseProposedTS).State == kvserverpb.LeaseState_VALID
   775  }
   776  
   777  // newNotLeaseHolderError returns a NotLeaseHolderError initialized with the
   778  // replica for the holder (if any) of the given lease.
   779  //
   780  // Note that this error can be generated on the Raft processing goroutine, so
   781  // its output should be completely determined by its parameters.
   782  func newNotLeaseHolderError(
   783  	l *roachpb.Lease, proposerStoreID roachpb.StoreID, rangeDesc *roachpb.RangeDescriptor,
   784  ) *roachpb.NotLeaseHolderError {
   785  	err := &roachpb.NotLeaseHolderError{
   786  		RangeID: rangeDesc.RangeID,
   787  	}
   788  	if proposerStoreID != 0 {
   789  		err.Replica, _ = rangeDesc.GetReplicaDescriptor(proposerStoreID)
   790  	}
   791  	if l != nil {
   792  		// Normally, we return the lease-holding Replica here. However, in the
   793  		// case in which a leader removes itself, we want the followers to
   794  		// avoid handing out a misleading clue (which in itself shouldn't be
   795  		// overly disruptive as the lease would expire and then this method
   796  		// shouldn't be called for it any more, but at the very least it
   797  		// could catch tests in a loop, presumably due to manual clocks).
   798  		_, stillMember := rangeDesc.GetReplicaDescriptor(l.Replica.StoreID)
   799  		if stillMember {
   800  			err.LeaseHolder = &l.Replica
   801  			err.Lease = l
   802  		}
   803  	}
   804  	return err
   805  }
   806  
   807  // leaseGoodToGo is a fast-path for lease checks which verifies that an
   808  // existing lease is valid and owned by the current store. This method should
   809  // not be called directly. Use redirectOnOrAcquireLease instead.
   810  func (r *Replica) leaseGoodToGo(ctx context.Context) (kvserverpb.LeaseStatus, bool) {
   811  	timestamp := r.store.Clock().Now()
   812  	r.mu.RLock()
   813  	defer r.mu.RUnlock()
   814  
   815  	if r.requiresExpiringLeaseRLocked() {
   816  		// Slow-path for expiration-based leases.
   817  		return kvserverpb.LeaseStatus{}, false
   818  	}
   819  
   820  	status := r.leaseStatus(*r.mu.state.Lease, timestamp, r.mu.minLeaseProposedTS)
   821  	if status.State == kvserverpb.LeaseState_VALID && status.Lease.OwnedBy(r.store.StoreID()) {
   822  		// We own the lease...
   823  		if repDesc, err := r.getReplicaDescriptorRLocked(); err == nil {
   824  			if _, ok := r.mu.pendingLeaseRequest.TransferInProgress(repDesc.ReplicaID); !ok {
   825  				// ...and there is no transfer pending.
   826  				return status, true
   827  			}
   828  		}
   829  	}
   830  	return kvserverpb.LeaseStatus{}, false
   831  }
   832  
   833  // redirectOnOrAcquireLease checks whether this replica has the lease at the
   834  // current timestamp. If it does, returns the lease and its status. If
   835  // another replica currently holds the lease, redirects by returning
   836  // NotLeaseHolderError. If the lease is expired, a renewal is synchronously
   837  // requested. Leases are eagerly renewed when a request with a timestamp
   838  // within rangeLeaseRenewalDuration of the lease expiration is served.
   839  //
   840  // TODO(spencer): for write commands, don't wait while requesting
   841  //  the range lease. If the lease acquisition fails, the write cmd
   842  //  will fail as well. If it succeeds, as is likely, then the write
   843  //  will not incur latency waiting for the command to complete.
   844  //  Reads, however, must wait.
   845  //
   846  // TODO(rangeLeaseRenewalDuration): what is rangeLeaseRenewalDuration
   847  //  referring to? It appears to have rotted.
   848  func (r *Replica) redirectOnOrAcquireLease(
   849  	ctx context.Context,
   850  ) (kvserverpb.LeaseStatus, *roachpb.Error) {
   851  	if status, ok := r.leaseGoodToGo(ctx); ok {
   852  		return status, nil
   853  	}
   854  
   855  	// Loop until the lease is held or the replica ascertains the actual
   856  	// lease holder. Returns also on context.Done() (timeout or cancellation).
   857  	var status kvserverpb.LeaseStatus
   858  	for attempt := 1; ; attempt++ {
   859  		timestamp := r.store.Clock().Now()
   860  		llHandle, pErr := func() (*leaseRequestHandle, *roachpb.Error) {
   861  			r.mu.Lock()
   862  			defer r.mu.Unlock()
   863  
   864  			status = r.leaseStatus(*r.mu.state.Lease, timestamp, r.mu.minLeaseProposedTS)
   865  			switch status.State {
   866  			case kvserverpb.LeaseState_ERROR:
   867  				// Lease state couldn't be determined.
   868  				log.VEventf(ctx, 2, "lease state couldn't be determined")
   869  				return nil, roachpb.NewError(
   870  					newNotLeaseHolderError(nil, r.store.StoreID(), r.mu.state.Desc))
   871  
   872  			case kvserverpb.LeaseState_VALID, kvserverpb.LeaseState_STASIS:
   873  				if !status.Lease.OwnedBy(r.store.StoreID()) {
   874  					_, stillMember := r.mu.state.Desc.GetReplicaDescriptor(status.Lease.Replica.StoreID)
   875  					if !stillMember {
   876  						// This would be the situation in which the lease holder gets removed when
   877  						// holding the lease, or in which a lease request erroneously gets accepted
   878  						// for a replica that is not in the replica set. Neither of the two can
   879  						// happen in normal usage since appropriate mechanisms have been added:
   880  						//
   881  						// 1. Only the lease holder (at the time) schedules removal of a replica,
   882  						// but the lease can change hands and so the situation in which a follower
   883  						// coordinates a replica removal of the (new) lease holder is possible (if
   884  						// unlikely) in practice. In this situation, the new lease holder would at
   885  						// some point be asked to propose the replica change's EndTxn to Raft. A
   886  						// check has been added that prevents proposals that amount to the removal
   887  						// of the proposer's (and hence lease holder's) Replica, preventing this
   888  						// scenario.
   889  						//
   890  						// 2. A lease is accepted for a Replica that has been removed. Without
   891  						// precautions, this could happen because lease requests are special in
   892  						// that they are the only command that is proposed on a follower (other
   893  						// commands may be proposed from followers, but not successfully so). For
   894  						// all proposals, processRaftCommand checks that their ProposalLease is
   895  						// compatible with the active lease for the log position. For commands
   896  						// proposed on the lease holder, the spanlatch manager then serializes
   897  						// everything. But lease requests get created on followers based on their
   898  						// local state and thus without being sequenced through latching. Thus
   899  						// a recently removed follower (unaware of its own removal) could submit
   900  						// a proposal for the lease (correctly using as a ProposerLease the last
   901  						// active lease), and would receive it given the up-to-date ProposerLease.
   902  						// Hence, an extra check is in order: processRaftCommand makes sure that
   903  						// lease requests for a replica not in the descriptor are bounced.
   904  						//
   905  						// However, this is possible if the `cockroach debug
   906  						// unsafe-remove-dead-replicas` command has been used, so
   907  						// this is just a logged error instead of a fatal
   908  						// assertion.
   909  						log.Errorf(ctx, "lease %s owned by replica %+v that no longer exists",
   910  							status.Lease, status.Lease.Replica)
   911  					}
   912  					// Otherwise, if the lease is currently held by another replica, redirect
   913  					// to the holder.
   914  					return nil, roachpb.NewError(
   915  						newNotLeaseHolderError(&status.Lease, r.store.StoreID(), r.mu.state.Desc))
   916  				}
   917  				// Check that we're not in the process of transferring the lease away.
   918  				// If we are transferring the lease away, we can't serve reads or
   919  				// propose Raft commands - see comments on TransferLease.
   920  				// TODO(andrei): If the lease is being transferred, consider returning a
   921  				// new error type so the client backs off until the transfer is
   922  				// completed.
   923  				repDesc, err := r.getReplicaDescriptorRLocked()
   924  				if err != nil {
   925  					return nil, roachpb.NewError(err)
   926  				}
   927  				if transferLease, ok := r.mu.pendingLeaseRequest.TransferInProgress(
   928  					repDesc.ReplicaID); ok {
   929  					return nil, roachpb.NewError(
   930  						newNotLeaseHolderError(&transferLease, r.store.StoreID(), r.mu.state.Desc))
   931  				}
   932  
   933  				// If the lease is in stasis, we can't serve requests until we've
   934  				// renewed the lease, so we return the handle to block on renewal.
   935  				// Otherwise, we don't need to wait for the extension and simply
   936  				// ignore the returned handle (whose channel is buffered) and continue.
   937  				if status.State == kvserverpb.LeaseState_STASIS {
   938  					return r.requestLeaseLocked(ctx, status), nil
   939  				}
   940  
   941  				// Extend the lease if this range uses expiration-based
   942  				// leases, the lease is in need of renewal, and there's not
   943  				// already an extension pending.
   944  				_, requestPending := r.mu.pendingLeaseRequest.RequestPending()
   945  				if !requestPending && r.requiresExpiringLeaseRLocked() {
   946  					renewal := status.Lease.Expiration.Add(-r.store.cfg.RangeLeaseRenewalDuration().Nanoseconds(), 0)
   947  					if renewal.LessEq(timestamp) {
   948  						if log.V(2) {
   949  							log.Infof(ctx, "extending lease %s at %s", status.Lease, timestamp)
   950  						}
   951  						// We had an active lease to begin with, but we want to trigger
   952  						// a lease extension. We explicitly ignore the returned handle
   953  						// as we won't block on it.
   954  						_ = r.requestLeaseLocked(ctx, status)
   955  					}
   956  				}
   957  
   958  			case kvserverpb.LeaseState_EXPIRED:
   959  				// No active lease: Request renewal if a renewal is not already pending.
   960  				log.VEventf(ctx, 2, "request range lease (attempt #%d)", attempt)
   961  				return r.requestLeaseLocked(ctx, status), nil
   962  
   963  			case kvserverpb.LeaseState_PROSCRIBED:
   964  				// Lease proposed timestamp is earlier than the min proposed
   965  				// timestamp limit this replica must observe. If this store
   966  				// owns the lease, re-request. Otherwise, redirect.
   967  				if status.Lease.OwnedBy(r.store.StoreID()) {
   968  					log.VEventf(ctx, 2, "request range lease (attempt #%d)", attempt)
   969  					return r.requestLeaseLocked(ctx, status), nil
   970  				}
   971  				// If lease is currently held by another, redirect to holder.
   972  				return nil, roachpb.NewError(
   973  					newNotLeaseHolderError(&status.Lease, r.store.StoreID(), r.mu.state.Desc))
   974  			}
   975  
   976  			// Return a nil handle to signal that we have a valid lease.
   977  			return nil, nil
   978  		}()
   979  		if pErr != nil {
   980  			return kvserverpb.LeaseStatus{}, pErr
   981  		}
   982  		if llHandle == nil {
   983  			// We own a valid lease.
   984  			return status, nil
   985  		}
   986  
   987  		// Wait for the range lease to finish, or the context to expire.
   988  		pErr = func() (pErr *roachpb.Error) {
   989  			slowTimer := timeutil.NewTimer()
   990  			defer slowTimer.Stop()
   991  			slowTimer.Reset(base.SlowRequestThreshold)
   992  			tBegin := timeutil.Now()
   993  			for {
   994  				select {
   995  				case pErr = <-llHandle.C():
   996  					if pErr != nil {
   997  						switch tErr := pErr.GetDetail().(type) {
   998  						case *roachpb.AmbiguousResultError:
   999  							// This can happen if the RequestLease command we sent has been
  1000  							// applied locally through a snapshot: the RequestLeaseRequest
  1001  							// cannot be reproposed so we get this ambiguity.
  1002  							// We'll just loop around.
  1003  							return nil
  1004  						case *roachpb.LeaseRejectedError:
  1005  							if tErr.Existing.OwnedBy(r.store.StoreID()) {
  1006  								// The RequestLease command we sent was rejected because another
  1007  								// lease was applied in the meantime, but we own that other
  1008  								// lease. So, loop until the current node becomes aware that
  1009  								// it's the leaseholder.
  1010  								return nil
  1011  							}
  1012  
  1013  							// Getting a LeaseRejectedError back means someone else got there
  1014  							// first, or the lease request was somehow invalid due to a concurrent
  1015  							// change. That concurrent change could have been that this replica was
  1016  							// removed (see processRaftCommand), so check for that case before
  1017  							// falling back to a NotLeaseHolderError.
  1018  							var err error
  1019  							if _, descErr := r.GetReplicaDescriptor(); descErr != nil {
  1020  								err = descErr
  1021  							} else if lease, _ := r.GetLease(); !r.IsLeaseValid(lease, r.store.Clock().Now()) {
  1022  								err = newNotLeaseHolderError(nil, r.store.StoreID(), r.Desc())
  1023  							} else {
  1024  								err = newNotLeaseHolderError(&lease, r.store.StoreID(), r.Desc())
  1025  							}
  1026  							pErr = roachpb.NewError(err)
  1027  						}
  1028  						return pErr
  1029  					}
  1030  					log.Eventf(ctx, "lease acquisition succeeded: %+v", status.Lease)
  1031  					return nil
  1032  				case <-slowTimer.C:
  1033  					slowTimer.Read = true
  1034  					log.Warningf(ctx, "have been waiting %s attempting to acquire lease",
  1035  						base.SlowRequestThreshold)
  1036  					r.store.metrics.SlowLeaseRequests.Inc(1)
  1037  					defer func() {
  1038  						r.store.metrics.SlowLeaseRequests.Dec(1)
  1039  						log.Infof(ctx, "slow lease acquisition finished after %s with error %v after %d attempts", timeutil.Since(tBegin), pErr, attempt)
  1040  					}()
  1041  				case <-ctx.Done():
  1042  					llHandle.Cancel()
  1043  					log.VErrEventf(ctx, 2, "lease acquisition failed: %s", ctx.Err())
  1044  					return roachpb.NewError(newNotLeaseHolderError(nil, r.store.StoreID(), r.Desc()))
  1045  				case <-r.store.Stopper().ShouldStop():
  1046  					llHandle.Cancel()
  1047  					return roachpb.NewError(newNotLeaseHolderError(nil, r.store.StoreID(), r.Desc()))
  1048  				}
  1049  			}
  1050  		}()
  1051  		if pErr != nil {
  1052  			return kvserverpb.LeaseStatus{}, pErr
  1053  		}
  1054  	}
  1055  }