github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/kv/kvserver/concurrency/lock_table_waiter.go (about)

     1  // Copyright 2020 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package concurrency
    12  
    13  import (
    14  	"context"
    15  	"math"
    16  	"time"
    17  
    18  	"github.com/cockroachdb/cockroach/pkg/kv/kvserver/intentresolver"
    19  	"github.com/cockroachdb/cockroach/pkg/kv/kvserver/spanset"
    20  	"github.com/cockroachdb/cockroach/pkg/roachpb"
    21  	"github.com/cockroachdb/cockroach/pkg/settings"
    22  	"github.com/cockroachdb/cockroach/pkg/settings/cluster"
    23  	"github.com/cockroachdb/cockroach/pkg/storage/enginepb"
    24  	"github.com/cockroachdb/cockroach/pkg/util/log"
    25  	"github.com/cockroachdb/cockroach/pkg/util/stop"
    26  	"github.com/cockroachdb/cockroach/pkg/util/syncutil"
    27  	"github.com/cockroachdb/cockroach/pkg/util/timeutil"
    28  	"github.com/cockroachdb/cockroach/pkg/util/uuid"
    29  	"github.com/cockroachdb/errors"
    30  )
    31  
    32  // LockTableLivenessPushDelay sets the delay before pushing in order to detect
    33  // coordinator failures of conflicting transactions.
    34  var LockTableLivenessPushDelay = settings.RegisterDurationSetting(
    35  	"kv.lock_table.coordinator_liveness_push_delay",
    36  	"the delay before pushing in order to detect coordinator failures of conflicting transactions",
    37  	// This is set to a short duration to ensure that we quickly detect failed
    38  	// transaction coordinators that have abandoned one or many locks. We don't
    39  	// want to wait out a long timeout on each of these locks to detect that
    40  	// they are abandoned. However, we also don't want to push immediately in
    41  	// cases where the lock is going to be resolved shortly.
    42  	//
    43  	// We could increase this default to somewhere on the order of the
    44  	// transaction heartbeat timeout (5s) if we had a better way to avoid paying
    45  	// the cost on each of a transaction's abandoned locks and instead only pay
    46  	// it once per abandoned transaction per range or per node. This could come
    47  	// in a few different forms, including:
    48  	// - a per-store cache of recently detected abandoned transaction IDs
    49  	// - a per-range reverse index from transaction ID to locked keys
    50  	//
    51  	// TODO(nvanbenschoten): increasing this default value.
    52  	10*time.Millisecond,
    53  )
    54  
    55  // LockTableDeadlockDetectionPushDelay sets the delay before pushing in order to
    56  // detect dependency cycles between transactions.
    57  var LockTableDeadlockDetectionPushDelay = settings.RegisterDurationSetting(
    58  	"kv.lock_table.deadlock_detection_push_delay",
    59  	"the delay before pushing in order to detect dependency cycles between transactions",
    60  	// This is set to a medium duration to ensure that deadlock caused by
    61  	// dependency cycles between transactions are eventually detected, but that
    62  	// the deadlock detection does not impose any overhead in the vastly common
    63  	// case where there are no dependency cycles. We optimistically assume that
    64  	// deadlocks are not common in production applications and wait locally on
    65  	// locks for a while before checking for a deadlock. Increasing this value
    66  	// reduces the amount of time wasted in needless deadlock checks, but slows
    67  	// down reporting of real deadlock scenarios.
    68  	//
    69  	// The value is analogous to Postgres' deadlock_timeout setting, which has a
    70  	// default value of 1s:
    71  	//  https://www.postgresql.org/docs/current/runtime-config-locks.html#GUC-DEADLOCK-TIMEOUT.
    72  	//
    73  	// We could increase this default to somewhere around 250ms - 1000ms if we
    74  	// confirmed that we do not observe deadlocks in any of the workloads that
    75  	// we care about. When doing so, we should be conscious that even once
    76  	// distributed deadlock detection begins, there is some latency proportional
    77  	// to the length of the dependency cycle before the deadlock is detected.
    78  	//
    79  	// TODO(nvanbenschoten): increasing this default value.
    80  	100*time.Millisecond,
    81  )
    82  
    83  // lockTableWaiterImpl is an implementation of lockTableWaiter.
    84  type lockTableWaiterImpl struct {
    85  	st      *cluster.Settings
    86  	stopper *stop.Stopper
    87  	ir      IntentResolver
    88  	lm      LockManager
    89  
    90  	// finalizedTxnCache is a small LRU cache that tracks transactions that
    91  	// were pushed and found to be finalized (COMMITTED or ABORTED). It is
    92  	// used as an optimization to avoid repeatedly pushing the transaction
    93  	// record when cleaning up the intents of an abandoned transaction.
    94  	//
    95  	// NOTE: it probably makes sense to maintain a single finalizedTxnCache
    96  	// across all Ranges on a Store instead of an individual cache per
    97  	// Range. For now, we don't do this because we don't share any state
    98  	// between separate concurrency.Manager instances.
    99  	finalizedTxnCache txnCache
   100  
   101  	// When set, WriteIntentError are propagated instead of pushing
   102  	// conflicting transactions.
   103  	disableTxnPushing bool
   104  }
   105  
   106  // IntentResolver is an interface used by lockTableWaiterImpl to push
   107  // transactions and to resolve intents. It contains only the subset of the
   108  // intentresolver.IntentResolver interface that lockTableWaiterImpl needs.
   109  type IntentResolver interface {
   110  	// PushTransaction pushes the provided transaction. The method will push the
   111  	// provided pushee transaction immediately, if possible. Otherwise, it will
   112  	// block until the pushee transaction is finalized or eventually can be
   113  	// pushed successfully.
   114  	PushTransaction(
   115  		context.Context, *enginepb.TxnMeta, roachpb.Header, roachpb.PushTxnType,
   116  	) (*roachpb.Transaction, *Error)
   117  
   118  	// ResolveIntent synchronously resolves the provided intent.
   119  	ResolveIntent(context.Context, roachpb.LockUpdate, intentresolver.ResolveOptions) *Error
   120  
   121  	// ResolveIntents synchronously resolves the provided batch of intents.
   122  	ResolveIntents(context.Context, []roachpb.LockUpdate, intentresolver.ResolveOptions) *Error
   123  }
   124  
   125  // WaitOn implements the lockTableWaiter interface.
   126  func (w *lockTableWaiterImpl) WaitOn(
   127  	ctx context.Context, req Request, guard lockTableGuard,
   128  ) (err *Error) {
   129  	newStateC := guard.NewStateChan()
   130  	ctxDoneC := ctx.Done()
   131  	shouldQuiesceC := w.stopper.ShouldQuiesce()
   132  	// Used to delay liveness and deadlock detection pushes.
   133  	var timer *timeutil.Timer
   134  	var timerC <-chan time.Time
   135  	var timerWaitingState waitingState
   136  	// Used to defer the resolution of duplicate intents. Intended to allow
   137  	// batching of intent resolution while cleaning up after abandoned txns. A
   138  	// request may begin deferring intent resolution and then be forced to wait
   139  	// again on other locks. This is ok, as the request that deferred intent
   140  	// resolution will often be the new reservation holder for those intents'
   141  	// keys. Even when this is not the case (e.g. the request is read-only so it
   142  	// can't hold reservations), any other requests that slip ahead will simply
   143  	// re-discover the intent(s) during evaluation and resolve them themselves.
   144  	var deferredResolution []roachpb.LockUpdate
   145  	defer w.resolveDeferredIntents(ctx, &err, &deferredResolution)
   146  	for {
   147  		select {
   148  		case <-newStateC:
   149  			timerC = nil
   150  			state := guard.CurState()
   151  			switch state.kind {
   152  			case waitFor, waitForDistinguished:
   153  				// waitFor indicates that the request is waiting on another
   154  				// transaction. This transaction may be the lock holder of a
   155  				// conflicting lock or the head of a lock-wait queue that the
   156  				// request is a part of.
   157  				//
   158  				// waitForDistinguished is like waitFor, except it instructs the
   159  				// waiter to quickly push the conflicting transaction after a short
   160  				// liveness push delay instead of waiting out the full deadlock
   161  				// detection push delay. The lockTable guarantees that there is
   162  				// always at least one request in the waitForDistinguished state for
   163  				// each lock that has any waiters.
   164  				//
   165  				// The purpose of the waitForDistinguished state is to avoid waiting
   166  				// out the longer deadlock detection delay before recognizing and
   167  				// recovering from the failure of a transaction coordinator for
   168  				// *each* of that transaction's previously written intents.
   169  				livenessPush := state.kind == waitForDistinguished
   170  				deadlockPush := true
   171  
   172  				// If the conflict is a reservation holder and not a held lock then
   173  				// there's no need to perform a liveness push - the request must be
   174  				// alive or its context would have been canceled and it would have
   175  				// exited its lock wait-queues.
   176  				if !state.held {
   177  					livenessPush = false
   178  				}
   179  
   180  				// For non-transactional requests, there's no need to perform
   181  				// deadlock detection because a non-transactional request can
   182  				// not be part of a dependency cycle. Non-transactional requests
   183  				// cannot hold locks or reservations.
   184  				if req.Txn == nil {
   185  					deadlockPush = false
   186  				}
   187  
   188  				// If the request doesn't want to perform a push for either
   189  				// reason, continue waiting.
   190  				if !livenessPush && !deadlockPush {
   191  					continue
   192  				}
   193  
   194  				// If we know that a lock holder is already finalized (COMMITTED
   195  				// or ABORTED), there's no reason to push it again. Instead, we
   196  				// can skip directly to intent resolution.
   197  				//
   198  				// As an optimization, we defer the intent resolution until the
   199  				// we're done waiting on all conflicting locks in this function.
   200  				// This allows us to accumulate a group of intents to resolve
   201  				// and send them together as a batch.
   202  				//
   203  				// Remember that if the lock is held, there will be at least one
   204  				// waiter with livenessPush = true (the distinguished waiter),
   205  				// so at least one request will enter this branch and perform
   206  				// the cleanup on behalf of all other waiters.
   207  				if livenessPush {
   208  					if pusheeTxn, ok := w.finalizedTxnCache.get(state.txn.ID); ok {
   209  						resolve := roachpb.MakeLockUpdate(pusheeTxn, roachpb.Span{Key: state.key})
   210  						deferredResolution = append(deferredResolution, resolve)
   211  
   212  						// Inform the LockManager that the lock has been updated with a
   213  						// finalized status so that it gets removed from the lockTable
   214  						// and we are allowed to proceed.
   215  						//
   216  						// For unreplicated locks, this is all that is needed - the
   217  						// lockTable is the source of truth so, once removed, the
   218  						// unreplicated lock is gone. It is perfectly valid for us to
   219  						// instruct the lock to be released because we know that the
   220  						// lock's owner is finalized.
   221  						//
   222  						// For replicated locks, this is a bit of a lie. The lock hasn't
   223  						// actually been updated yet, but we will be conducting intent
   224  						// resolution in the future (before we observe the corresponding
   225  						// MVCC state). This is safe because we already handle cases
   226  						// where locks exist only in the MVCC keyspace and not in the
   227  						// lockTable.
   228  						//
   229  						// In the future, we'd like to make this more explicit.
   230  						// Specifically, we'd like to augment the lockTable with an
   231  						// understanding of finalized but not yet resolved locks. These
   232  						// locks will allow conflicting transactions to proceed with
   233  						// evaluation without the need to first remove all traces of
   234  						// them via a round of replication. This is discussed in more
   235  						// detail in #41720. Specifically, see mention of "contention
   236  						// footprint" and COMMITTED_BUT_NOT_REMOVABLE.
   237  						w.lm.OnLockUpdated(ctx, &deferredResolution[len(deferredResolution)-1])
   238  						continue
   239  					}
   240  				}
   241  
   242  				// The request should push to detect abandoned locks due to
   243  				// failed transaction coordinators, detect deadlocks between
   244  				// transactions, or both, but only after delay. This delay
   245  				// avoids unnecessary push traffic when the conflicting
   246  				// transaction is continuing to make forward progress.
   247  				delay := time.Duration(math.MaxInt64)
   248  				if livenessPush {
   249  					delay = minDuration(delay, LockTableLivenessPushDelay.Get(&w.st.SV))
   250  				}
   251  				if deadlockPush {
   252  					delay = minDuration(delay, LockTableDeadlockDetectionPushDelay.Get(&w.st.SV))
   253  				}
   254  
   255  				// However, if the pushee has the minimum priority or if the
   256  				// pusher has the maximum priority, push immediately.
   257  				// TODO(nvanbenschoten): flesh these interactions out more and
   258  				// add some testing.
   259  				if hasMinPriority(state.txn) || hasMaxPriority(req.Txn) {
   260  					delay = 0
   261  				}
   262  
   263  				if delay > 0 {
   264  					if timer == nil {
   265  						timer = timeutil.NewTimer()
   266  						defer timer.Stop()
   267  					}
   268  					timer.Reset(delay)
   269  					timerC = timer.C
   270  				} else {
   271  					// If we don't want to delay the push, don't use a real timer.
   272  					// Doing so is both a waste of resources and, more importantly,
   273  					// makes TestConcurrencyManagerBasic flaky because there's no
   274  					// guarantee that the timer will fire before the goroutine enters
   275  					// a "select" waiting state on the next iteration of this loop.
   276  					timerC = closedTimerC
   277  				}
   278  				timerWaitingState = state
   279  
   280  			case waitElsewhere:
   281  				// The lockTable has hit a memory limit and is no longer maintaining
   282  				// proper lock wait-queues.
   283  				if !state.held {
   284  					// If the lock is not held, exit immediately. Requests will
   285  					// be ordered when acquiring latches.
   286  					return nil
   287  				}
   288  				// The waiting request is still not safe to proceed with
   289  				// evaluation because there is still a transaction holding the
   290  				// lock. It should push the transaction it is blocked on
   291  				// immediately to wait in that transaction's txnWaitQueue. Once
   292  				// this completes, the request should stop waiting on this
   293  				// lockTableGuard, as it will no longer observe lock-table state
   294  				// transitions.
   295  				return w.pushLockTxn(ctx, req, state)
   296  
   297  			case waitSelf:
   298  				// Another request from the same transaction is the reservation
   299  				// holder of this lock wait-queue. This can only happen when the
   300  				// request's transaction is sending multiple requests concurrently.
   301  				// Proceed with waiting without pushing anyone.
   302  
   303  			case doneWaiting:
   304  				// The request has waited for all conflicting locks to be released
   305  				// and is at the front of any lock wait-queues. It can now stop
   306  				// waiting, re-acquire latches, and check the lockTable again for
   307  				// any new conflicts. If it find none, it can proceed with
   308  				// evaluation.
   309  				return nil
   310  
   311  			default:
   312  				panic("unexpected waiting state")
   313  			}
   314  
   315  		case <-timerC:
   316  			// If the request was in the waitFor or waitForDistinguished states
   317  			// and did not observe any update to its state for the entire delay,
   318  			// it should push. It may be the case that the transaction is part
   319  			// of a dependency cycle or that the lock holder's coordinator node
   320  			// has crashed.
   321  			timerC = nil
   322  			if timer != nil {
   323  				timer.Read = true
   324  			}
   325  
   326  			// If the request is conflicting with a held lock then it pushes its
   327  			// holder synchronously - there is no way it will be able to proceed
   328  			// until the lock's transaction undergoes a state transition (either
   329  			// completing or being pushed) and then updates the lock's state
   330  			// through intent resolution. The request has a dependency on the
   331  			// entire conflicting transaction.
   332  			//
   333  			// However, if the request is conflicting with another request (a
   334  			// reservation holder) then it pushes the reservation holder
   335  			// asynchronously while continuing to listen to state transition in
   336  			// the lockTable. This allows the request to cancel its push if the
   337  			// conflicting reservation exits the lock wait-queue without leaving
   338  			// behind a lock. In this case, the request has a dependency on the
   339  			// conflicting request but not necessarily the entire conflicting
   340  			// transaction.
   341  			if timerWaitingState.held {
   342  				err = w.pushLockTxn(ctx, req, timerWaitingState)
   343  			} else {
   344  				// It would be more natural to launch an async task for the push
   345  				// and continue listening on this goroutine for lockTable state
   346  				// transitions, but doing so is harder to test against. Instead,
   347  				// we launch an async task to listen to lockTable state and
   348  				// synchronously push. If the watcher goroutine detects a
   349  				// lockTable change, it cancels the context on the push.
   350  				pushCtx, pushCancel := context.WithCancel(ctx)
   351  				go w.watchForNotifications(pushCtx, pushCancel, newStateC)
   352  				err = w.pushRequestTxn(pushCtx, req, timerWaitingState)
   353  				if errors.Is(pushCtx.Err(), context.Canceled) {
   354  					// Ignore the context canceled error. If this was for the
   355  					// parent context then we'll notice on the next select.
   356  					err = nil
   357  				}
   358  				pushCancel()
   359  			}
   360  			if err != nil {
   361  				return err
   362  			}
   363  
   364  		case <-ctxDoneC:
   365  			return roachpb.NewError(ctx.Err())
   366  
   367  		case <-shouldQuiesceC:
   368  			return roachpb.NewError(&roachpb.NodeUnavailableError{})
   369  		}
   370  	}
   371  }
   372  
   373  // WaitOnLock implements the lockTableWaiter interface.
   374  func (w *lockTableWaiterImpl) WaitOnLock(
   375  	ctx context.Context, req Request, intent *roachpb.Intent,
   376  ) *Error {
   377  	sa, _, err := findAccessInSpans(intent.Key, req.LockSpans)
   378  	if err != nil {
   379  		return roachpb.NewError(err)
   380  	}
   381  	return w.pushLockTxn(ctx, req, waitingState{
   382  		kind:        waitFor,
   383  		txn:         &intent.Txn,
   384  		key:         intent.Key,
   385  		held:        true,
   386  		guardAccess: sa,
   387  	})
   388  }
   389  
   390  // ClearCaches implements the lockTableWaiter interface.
   391  func (w *lockTableWaiterImpl) ClearCaches() {
   392  	w.finalizedTxnCache.clear()
   393  }
   394  
   395  // pushLockTxn pushes the holder of the provided lock.
   396  //
   397  // The method blocks until the lock holder transaction experiences a state
   398  // transition such that it no longer conflicts with the pusher's request. The
   399  // method then synchronously updates the lock to trigger a state transition in
   400  // the lockTable that will free up the request to proceed. If the method returns
   401  // successfully then the caller can expect to have an updated waitingState.
   402  func (w *lockTableWaiterImpl) pushLockTxn(
   403  	ctx context.Context, req Request, ws waitingState,
   404  ) *Error {
   405  	if w.disableTxnPushing {
   406  		return roachpb.NewError(&roachpb.WriteIntentError{
   407  			Intents: []roachpb.Intent{roachpb.MakeIntent(ws.txn, ws.key)},
   408  		})
   409  	}
   410  
   411  	// Determine which form of push to use. For read-write conflicts, try to
   412  	// push the lock holder's timestamp forward so the read request can read
   413  	// under the lock. For write-write conflicts, try to abort the lock holder
   414  	// entirely so the write request can revoke and replace the lock with its
   415  	// own lock.
   416  	h := w.pushHeader(req)
   417  	var pushType roachpb.PushTxnType
   418  	switch ws.guardAccess {
   419  	case spanset.SpanReadOnly:
   420  		pushType = roachpb.PUSH_TIMESTAMP
   421  		log.VEventf(ctx, 3, "pushing timestamp of txn %s above %s", ws.txn.ID.Short(), h.Timestamp)
   422  	case spanset.SpanReadWrite:
   423  		pushType = roachpb.PUSH_ABORT
   424  		log.VEventf(ctx, 3, "pushing txn %s to abort", ws.txn.ID.Short())
   425  	}
   426  
   427  	pusheeTxn, err := w.ir.PushTransaction(ctx, ws.txn, h, pushType)
   428  	if err != nil {
   429  		return err
   430  	}
   431  
   432  	// If the transaction is finalized, add it to the finalizedTxnCache. This
   433  	// avoids needing to push it again if we find another one of its locks and
   434  	// allows for batching of intent resolution.
   435  	if pusheeTxn.Status.IsFinalized() {
   436  		w.finalizedTxnCache.add(pusheeTxn)
   437  	}
   438  
   439  	// If the push succeeded then the lock holder transaction must have
   440  	// experienced a state transition such that it no longer conflicts with
   441  	// the pusher's request. This state transition could have been any of the
   442  	// following, each of which would be captured in the pusheeTxn proto:
   443  	// 1. the pushee was committed
   444  	// 2. the pushee was aborted
   445  	// 3. the pushee was pushed to a higher provisional commit timestamp such
   446  	//    that once its locks are updated to reflect this, they will no longer
   447  	//    conflict with the pusher request. This is only applicable if pushType
   448  	//    is PUSH_TIMESTAMP.
   449  	// 4. the pushee rolled back all sequence numbers that it held the
   450  	//    conflicting lock at. This allows the lock to be revoked entirely.
   451  	//    TODO(nvanbenschoten): we do not currently detect this case. Doing so
   452  	//    would not be useful until we begin eagerly updating a transaction's
   453  	//    record upon rollbacks to savepoints.
   454  	//
   455  	// Update the conflicting lock to trigger the desired state transition in
   456  	// the lockTable itself, which will allow the request to proceed.
   457  	//
   458  	// We always poison due to limitations of the API: not poisoning equals
   459  	// clearing the AbortSpan, and if our pushee transaction first got pushed
   460  	// for timestamp (by us), then (by someone else) aborted and poisoned, and
   461  	// then we run the below code, we're clearing the AbortSpan illegaly.
   462  	// Furthermore, even if our pushType is not PUSH_ABORT, we may have ended up
   463  	// with the responsibility to abort the intents (for example if we find the
   464  	// transaction aborted). To do better here, we need per-intent information
   465  	// on whether we need to poison.
   466  	resolve := roachpb.MakeLockUpdate(pusheeTxn, roachpb.Span{Key: ws.key})
   467  	opts := intentresolver.ResolveOptions{Poison: true}
   468  	return w.ir.ResolveIntent(ctx, resolve, opts)
   469  }
   470  
   471  // pushRequestTxn pushes the owner of the provided request.
   472  //
   473  // The method blocks until either the pusher's transaction is aborted or the
   474  // pushee's transaction is finalized (committed or aborted). If the pusher's
   475  // transaction is aborted then the method will send an error on the channel and
   476  // the pusher should exit its lock wait-queues. If the pushee's transaction is
   477  // finalized then the method will send no error on the channel. The pushee is
   478  // expected to notice that it has been aborted during its next attempt to push
   479  // another transaction and will exit its lock wait-queues.
   480  //
   481  // However, the method responds to context cancelation and will terminate the
   482  // push attempt if its context is canceled. This allows the caller to revoke a
   483  // push if it determines that the pushee is no longer blocking the request. The
   484  // caller is expected to terminate the push if it observes any state transitions
   485  // in the lockTable. As such, the push is only expected to be allowed to run to
   486  // completion in cases where requests are truly deadlocked.
   487  func (w *lockTableWaiterImpl) pushRequestTxn(
   488  	ctx context.Context, req Request, ws waitingState,
   489  ) *Error {
   490  	// Regardless of whether the waiting request is reading from or writing to a
   491  	// key, it always performs a PUSH_ABORT when pushing a conflicting request
   492  	// because it wants to block until either a) the pushee or the pusher is
   493  	// aborted due to a deadlock or b) the request exits the lock wait-queue and
   494  	// the caller of this function cancels the push.
   495  	h := w.pushHeader(req)
   496  	pushType := roachpb.PUSH_ABORT
   497  	log.VEventf(ctx, 3, "pushing txn %s to detect request deadlock", ws.txn.ID.Short())
   498  
   499  	_, err := w.ir.PushTransaction(ctx, ws.txn, h, pushType)
   500  	if err != nil {
   501  		return err
   502  	}
   503  
   504  	// Even if the push succeeded and aborted the other transaction to break a
   505  	// deadlock, there's nothing for the pusher to clean up. The conflicting
   506  	// request will quickly exit the lock wait-queue and release its reservation
   507  	// once it notices that it is aborted and the pusher will be free to proceed
   508  	// because it was not waiting on any locks. If the pusher's request does end
   509  	// up hitting a lock which the pushee fails to clean up, it will perform the
   510  	// cleanup itself using pushLockTxn.
   511  	//
   512  	// It may appear that there is a bug here in the handling of request-only
   513  	// dependency cycles. If such a cycle was broken by simultaneously aborting
   514  	// the transactions responsible for each of the request, there would be no
   515  	// guarantee that an aborted pusher would notice that its own transaction
   516  	// was aborted before it notices that its pushee's transaction was aborted.
   517  	// For example, in the simplest case, imagine two requests deadlocked on
   518  	// each other. If their transactions are both aborted and each push notices
   519  	// the pushee is aborted first, they will both return here triumphantly and
   520  	// wait for the other to exit its lock wait-queues, leading to deadlock.
   521  	// Even if they eventually pushed each other again, there would be no
   522  	// guarantee that the same thing wouldn't happen.
   523  	//
   524  	// However, such a situation is not possible in practice because such a
   525  	// dependency cycle is never constructed by the lockTable. The lockTable
   526  	// assigns each request a monotonically increasing sequence number upon its
   527  	// initial entrance to the lockTable. This sequence number is used to
   528  	// straighten out dependency chains of requests such that a request only
   529  	// waits on conflicting requests with lower sequence numbers than its own
   530  	// sequence number. This behavior guarantees that request-only dependency
   531  	// cycles are never constructed by the lockTable. Put differently, all
   532  	// dependency cycles must include at least one dependency on a lock and,
   533  	// therefore, one call to pushLockTxn. Unlike pushRequestTxn, pushLockTxn
   534  	// actively removes the conflicting lock and removes the dependency when it
   535  	// determines that its pushee transaction is aborted. This means that the
   536  	// call to pushLockTxn will continue to make forward progress in the case of
   537  	// a simultaneous abort of all transactions behind the members of the cycle,
   538  	// preventing such a hypothesized deadlock from ever materializing.
   539  	//
   540  	// Example:
   541  	//
   542  	//  req(1, txn1), req(1, txn2) are both waiting on a lock held by txn3, and
   543  	//  they respectively hold a reservation on key "a" and key "b". req(2, txn2)
   544  	//  queues up behind the reservation on key "a" and req(2, txn1) queues up
   545  	//  behind the reservation on key "b". Now the dependency cycle between txn1
   546  	//  and txn2 only involves requests, but some of the requests here also
   547  	//  depend on a lock. So when both txn1, txn2 are aborted, the req(1, txn1),
   548  	//  req(1, txn2) are guaranteed to eventually notice through self-directed
   549  	//  QueryTxn requests and will exit the lockTable, allowing req(2, txn1) and
   550  	//  req(2, txn2) to get the reservation and now they no longer depend on each
   551  	//  other.
   552  	//
   553  	return nil
   554  }
   555  
   556  func (w *lockTableWaiterImpl) pushHeader(req Request) roachpb.Header {
   557  	h := roachpb.Header{
   558  		Timestamp:    req.readConflictTimestamp(),
   559  		UserPriority: req.Priority,
   560  	}
   561  	if req.Txn != nil {
   562  		// We are going to hand the header (and thus the transaction proto) to
   563  		// the RPC framework, after which it must not be changed (since that
   564  		// could race). Since the subsequent execution of the original request
   565  		// might mutate the transaction, make a copy here. See #9130.
   566  		h.Txn = req.Txn.Clone()
   567  	}
   568  	return h
   569  }
   570  
   571  // resolveDeferredIntents resolves the batch of intents if the provided error is
   572  // nil. The batch of intents may be resolved more efficiently than if they were
   573  // resolved individually.
   574  func (w *lockTableWaiterImpl) resolveDeferredIntents(
   575  	ctx context.Context, err **Error, deferredResolution *[]roachpb.LockUpdate,
   576  ) {
   577  	if (*err != nil) || (len(*deferredResolution) == 0) {
   578  		return
   579  	}
   580  	// See pushLockTxn for an explanation of these options.
   581  	opts := intentresolver.ResolveOptions{Poison: true}
   582  	*err = w.ir.ResolveIntents(ctx, *deferredResolution, opts)
   583  }
   584  
   585  // watchForNotifications selects on the provided channel and watches for any
   586  // updates. If the channel is ever notified, it calls the provided context
   587  // cancelation function and exits.
   588  func (w *lockTableWaiterImpl) watchForNotifications(
   589  	ctx context.Context, cancel func(), newStateC chan struct{},
   590  ) {
   591  	select {
   592  	case <-newStateC:
   593  		// Re-signal the channel.
   594  		select {
   595  		case newStateC <- struct{}{}:
   596  		default:
   597  		}
   598  		// Cancel the context of the async task.
   599  		cancel()
   600  	case <-ctx.Done():
   601  	}
   602  }
   603  
   604  // txnCache is a small LRU cache that holds Transaction objects.
   605  //
   606  // The zero value of this struct is ready for use.
   607  type txnCache struct {
   608  	mu   syncutil.Mutex
   609  	txns [8]*roachpb.Transaction // [MRU, ..., LRU]
   610  }
   611  
   612  func (c *txnCache) get(id uuid.UUID) (*roachpb.Transaction, bool) {
   613  	c.mu.Lock()
   614  	defer c.mu.Unlock()
   615  	if idx := c.getIdxLocked(id); idx >= 0 {
   616  		txn := c.txns[idx]
   617  		c.moveFrontLocked(txn, idx)
   618  		return txn, true
   619  	}
   620  	return nil, false
   621  }
   622  
   623  func (c *txnCache) add(txn *roachpb.Transaction) {
   624  	c.mu.Lock()
   625  	defer c.mu.Unlock()
   626  	if idx := c.getIdxLocked(txn.ID); idx >= 0 {
   627  		c.moveFrontLocked(txn, idx)
   628  	} else {
   629  		c.insertFrontLocked(txn)
   630  	}
   631  }
   632  
   633  func (c *txnCache) clear() {
   634  	c.mu.Lock()
   635  	defer c.mu.Unlock()
   636  	for i := range c.txns {
   637  		c.txns[i] = nil
   638  	}
   639  }
   640  
   641  func (c *txnCache) getIdxLocked(id uuid.UUID) int {
   642  	for i, txn := range c.txns {
   643  		if txn != nil && txn.ID == id {
   644  			return i
   645  		}
   646  	}
   647  	return -1
   648  }
   649  
   650  func (c *txnCache) moveFrontLocked(txn *roachpb.Transaction, cur int) {
   651  	copy(c.txns[1:cur+1], c.txns[:cur])
   652  	c.txns[0] = txn
   653  }
   654  
   655  func (c *txnCache) insertFrontLocked(txn *roachpb.Transaction) {
   656  	copy(c.txns[1:], c.txns[:])
   657  	c.txns[0] = txn
   658  }
   659  
   660  func hasMinPriority(txn *enginepb.TxnMeta) bool {
   661  	return txn != nil && txn.Priority == enginepb.MinTxnPriority
   662  }
   663  
   664  func hasMaxPriority(txn *roachpb.Transaction) bool {
   665  	return txn != nil && txn.Priority == enginepb.MaxTxnPriority
   666  }
   667  
   668  func minDuration(a, b time.Duration) time.Duration {
   669  	if a < b {
   670  		return a
   671  	}
   672  	return b
   673  }
   674  
   675  var closedTimerC chan time.Time
   676  
   677  func init() {
   678  	closedTimerC = make(chan time.Time)
   679  	close(closedTimerC)
   680  }