github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/kv/kvclient/kvcoord/txn_coord_sender.go (about)

     1  // Copyright 2014 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package kvcoord
    12  
    13  import (
    14  	"bytes"
    15  	"context"
    16  	"fmt"
    17  	"runtime/debug"
    18  	"time"
    19  
    20  	"github.com/cockroachdb/cockroach/pkg/keys"
    21  	"github.com/cockroachdb/cockroach/pkg/kv"
    22  	"github.com/cockroachdb/cockroach/pkg/roachpb"
    23  	"github.com/cockroachdb/cockroach/pkg/storage/enginepb"
    24  	"github.com/cockroachdb/cockroach/pkg/util/duration"
    25  	"github.com/cockroachdb/cockroach/pkg/util/hlc"
    26  	"github.com/cockroachdb/cockroach/pkg/util/log"
    27  	"github.com/cockroachdb/cockroach/pkg/util/syncutil"
    28  	"github.com/cockroachdb/cockroach/pkg/util/tracing"
    29  	"github.com/cockroachdb/cockroach/pkg/util/uuid"
    30  	"github.com/cockroachdb/errors"
    31  	"github.com/cockroachdb/logtags"
    32  )
    33  
    34  const (
    35  	opTxnCoordSender = "txn coordinator send"
    36  )
    37  
    38  // txnState represents states relating to whether an EndTxn request needs
    39  // to be sent.
    40  //go:generate stringer -type=txnState
    41  type txnState int
    42  
    43  const (
    44  	// txnPending is the normal state for ongoing transactions.
    45  	txnPending txnState = iota
    46  
    47  	// txnError means that a batch encountered a non-retriable error. Further
    48  	// batches except EndTxn(commit=false) will be rejected.
    49  	txnError
    50  
    51  	// txnFinalized means that an EndTxn(commit=true) has been executed
    52  	// successfully, or an EndTxn(commit=false) was sent - regardless of
    53  	// whether it executed successfully or not. Further batches except
    54  	// EndTxn(commit=false) will be rejected; a second rollback is allowed
    55  	// in case the first one fails.
    56  	// TODO(andrei): we'd probably benefit from splitting this state into at least
    57  	// two - transaction definitely cleaned up, and transaction potentially
    58  	// cleaned up.
    59  	txnFinalized
    60  )
    61  
    62  // A TxnCoordSender is the production implementation of client.TxnSender. It is
    63  // a Sender which wraps a lower-level Sender (a DistSender) to which it sends
    64  // commands. It works on behalf of the client to keep a transaction's state
    65  // (e.g. intents) and to perform periodic heartbeating of the transaction
    66  // required when necessary.  Unlike other senders, TxnCoordSender is not a
    67  // singleton - an instance is created for every transaction by the
    68  // TxnCoordSenderFactory.
    69  //
    70  // Among the functions it performs are:
    71  // - Heartbeating of the transaction record. Note that heartbeating is done only
    72  // from the root transaction coordinator, in the event that multiple
    73  // coordinators are active (i.e. in a distributed SQL flow).
    74  // - Accumulating lock spans.
    75  // - Attaching lock spans to EndTxn requests, for cleanup.
    76  // - Handles retriable errors by either bumping the transaction's epoch or, in
    77  // case of TransactionAbortedErrors, cleaning up the transaction (in this case,
    78  // the client.Txn is expected to create a new TxnCoordSender instance
    79  // transparently for the higher-level client).
    80  //
    81  // Since it is stateful, the TxnCoordSender needs to understand when a
    82  // transaction is "finished" and the state can be destroyed. As such there's a
    83  // contract that the client.Txn needs obey. Read-only transactions don't matter
    84  // - they're stateless. For the others, once an intent write is sent by the
    85  // client, the TxnCoordSender considers the transactions completed in the
    86  // following situations:
    87  // - A batch containing an EndTxns (commit or rollback) succeeds.
    88  // - A batch containing an EndTxn(commit=false) succeeds or fails. Only
    89  // more rollback attempts can follow a rollback attempt.
    90  // - A batch returns a TransactionAbortedError. As mentioned above, the client
    91  // is expected to create a new TxnCoordSender for the next transaction attempt.
    92  //
    93  // Note that "1PC" batches (i.e. batches containing both a Begin and an
    94  // EndTxn) are no exception from the contract - if the batch fails, the
    95  // client is expected to send a rollback (or perform another transaction attempt
    96  // in case of retriable errors).
    97  type TxnCoordSender struct {
    98  	mu struct {
    99  		syncutil.Mutex
   100  
   101  		txnState txnState
   102  		// storedErr is set when txnState == txnError. This storedErr is returned to
   103  		// clients on Send().
   104  		storedErr *roachpb.Error
   105  
   106  		// active is set whenever the transaction has sent any requests. Rolling
   107  		// back to a savepoint taken before the TxnCoordSender became active resets
   108  		// the field to false.
   109  		active bool
   110  
   111  		// closed is set once this transaction has either committed or rolled back
   112  		// (including when the heartbeat loop cleans it up asynchronously). If the
   113  		// client sends anything other than a rollback, it will get an error
   114  		// (a retryable TransactionAbortedError in case of the async abort).
   115  		closed bool
   116  
   117  		// txn is the Transaction proto attached to all the requests and updated on
   118  		// all the responses.
   119  		txn roachpb.Transaction
   120  
   121  		// userPriority is the txn's priority. Used when restarting the transaction.
   122  		// This field is only populated on rootTxns.
   123  		userPriority roachpb.UserPriority
   124  	}
   125  
   126  	// A pointer member to the creating factory provides access to
   127  	// immutable factory settings.
   128  	*TxnCoordSenderFactory
   129  
   130  	// An ordered stack of pluggable request interceptors that can transform
   131  	// batch requests and responses while each maintaining targeted state.
   132  	// The stack is stored in a slice backed by the interceptorAlloc.arr and each
   133  	// txnInterceptor implementation is embedded in the interceptorAlloc struct,
   134  	// so the entire stack is allocated together with TxnCoordSender without any
   135  	// additional heap allocations necessary.
   136  	interceptorStack []txnInterceptor
   137  	interceptorAlloc struct {
   138  		arr [6]txnInterceptor
   139  		txnHeartbeater
   140  		txnSeqNumAllocator
   141  		txnPipeliner
   142  		txnSpanRefresher
   143  		txnCommitter
   144  		txnMetricRecorder
   145  		txnLockGatekeeper // not in interceptorStack array.
   146  	}
   147  
   148  	// typ specifies whether this transaction is the top level,
   149  	// or one of potentially many distributed transactions.
   150  	typ kv.TxnType
   151  }
   152  
   153  var _ kv.TxnSender = &TxnCoordSender{}
   154  
   155  // txnInterceptors are pluggable request interceptors that transform requests
   156  // and responses and can perform operations in the context of a transaction. A
   157  // TxnCoordSender maintains a stack of txnInterceptors that it calls into under
   158  // lock whenever it sends a request.
   159  type txnInterceptor interface {
   160  	lockedSender
   161  
   162  	// setWrapped sets the txnInterceptor wrapped lockedSender.
   163  	setWrapped(wrapped lockedSender)
   164  
   165  	// populateLeafInputState populates the given input payload
   166  	// for a LeafTxn.
   167  	populateLeafInputState(*roachpb.LeafTxnInputState)
   168  
   169  	// populateLeafFinalState populates the final payload
   170  	// for a LeafTxn to bring back into a RootTxn.
   171  	populateLeafFinalState(*roachpb.LeafTxnFinalState)
   172  
   173  	// importLeafFinalState updates any internal state held inside the
   174  	// interceptor from the given LeafTxn final state.
   175  	importLeafFinalState(context.Context, *roachpb.LeafTxnFinalState)
   176  
   177  	// epochBumpedLocked resets the interceptor in the case of a txn epoch
   178  	// increment.
   179  	epochBumpedLocked()
   180  
   181  	// createSavepointLocked is used to populate a savepoint with all the state
   182  	// that needs to be restored on a rollback.
   183  	createSavepointLocked(context.Context, *savepoint)
   184  
   185  	// rollbackToSavepointLocked is used to restore the state previously saved by
   186  	// createSavepointLocked().
   187  	rollbackToSavepointLocked(context.Context, savepoint)
   188  
   189  	// closeLocked closes the interceptor. It is called when the TxnCoordSender
   190  	// shuts down due to either a txn commit or a txn abort. The method will
   191  	// be called exactly once from cleanupTxnLocked.
   192  	closeLocked()
   193  }
   194  
   195  func newRootTxnCoordSender(
   196  	tcf *TxnCoordSenderFactory, txn *roachpb.Transaction, pri roachpb.UserPriority,
   197  ) kv.TxnSender {
   198  	txn.AssertInitialized(context.TODO())
   199  
   200  	if txn.Status != roachpb.PENDING {
   201  		log.Fatalf(context.TODO(), "unexpected non-pending txn in RootTransactionalSender: %s", txn)
   202  	}
   203  	if txn.Sequence != 0 {
   204  		log.Fatalf(context.TODO(), "cannot initialize root txn with seq != 0: %s", txn)
   205  	}
   206  
   207  	tcs := &TxnCoordSender{
   208  		typ:                   kv.RootTxn,
   209  		TxnCoordSenderFactory: tcf,
   210  	}
   211  	tcs.mu.txnState = txnPending
   212  	tcs.mu.userPriority = pri
   213  
   214  	// Create a stack of request/response interceptors. All of the objects in
   215  	// this stack are pre-allocated on the TxnCoordSender struct, so this just
   216  	// initializes the interceptors and pieces them together. It then adds a
   217  	// txnLockGatekeeper at the bottom of the stack to connect it with the
   218  	// TxnCoordSender's wrapped sender. First, each of the interceptor objects
   219  	// is initialized.
   220  	tcs.interceptorAlloc.txnHeartbeater.init(
   221  		tcf.AmbientContext,
   222  		tcs.stopper,
   223  		tcs.clock,
   224  		&tcs.metrics,
   225  		tcs.heartbeatInterval,
   226  		&tcs.interceptorAlloc.txnLockGatekeeper,
   227  		&tcs.mu.Mutex,
   228  		&tcs.mu.txn,
   229  	)
   230  	tcs.interceptorAlloc.txnCommitter = txnCommitter{
   231  		st:      tcf.st,
   232  		stopper: tcs.stopper,
   233  		mu:      &tcs.mu.Mutex,
   234  	}
   235  	tcs.interceptorAlloc.txnMetricRecorder = txnMetricRecorder{
   236  		metrics: &tcs.metrics,
   237  		clock:   tcs.clock,
   238  		txn:     &tcs.mu.txn,
   239  	}
   240  	tcs.initCommonInterceptors(tcf, txn, kv.RootTxn)
   241  
   242  	// Once the interceptors are initialized, piece them all together in the
   243  	// correct order.
   244  	tcs.interceptorAlloc.arr = [...]txnInterceptor{
   245  		&tcs.interceptorAlloc.txnHeartbeater,
   246  		// Various interceptors below rely on sequence number allocation,
   247  		// so the sequence number allocator is near the top of the stack.
   248  		&tcs.interceptorAlloc.txnSeqNumAllocator,
   249  		// The pipelinger sits above the span refresher because it will
   250  		// never generate transaction retry errors that could be avoided
   251  		// with a refresh.
   252  		&tcs.interceptorAlloc.txnPipeliner,
   253  		// The span refresher may resend entire batches to avoid transaction
   254  		// retries. Because of that, we need to be careful which interceptors
   255  		// sit below it in the stack.
   256  		&tcs.interceptorAlloc.txnSpanRefresher,
   257  		// The committer sits beneath the span refresher so that any
   258  		// retryable errors that it generates have a chance of being
   259  		// "refreshed away" without the need for a txn restart. Because the
   260  		// span refresher can re-issue batches, it needs to be careful about
   261  		// what parts of the batch it mutates. Any mutation needs to be
   262  		// idempotent and should avoid writing to memory when not changing
   263  		// it to avoid looking like a data race.
   264  		&tcs.interceptorAlloc.txnCommitter,
   265  		// The metrics recorder sits at the bottom of the stack so that it
   266  		// can observe all transformations performed by other interceptors.
   267  		&tcs.interceptorAlloc.txnMetricRecorder,
   268  	}
   269  	tcs.interceptorStack = tcs.interceptorAlloc.arr[:]
   270  
   271  	tcs.connectInterceptors()
   272  
   273  	tcs.mu.txn.Update(txn)
   274  	return tcs
   275  }
   276  
   277  func (tc *TxnCoordSender) initCommonInterceptors(
   278  	tcf *TxnCoordSenderFactory, txn *roachpb.Transaction, typ kv.TxnType,
   279  ) {
   280  	var riGen rangeIteratorFactory
   281  	if ds, ok := tcf.wrapped.(*DistSender); ok {
   282  		riGen.ds = ds
   283  	}
   284  	tc.interceptorAlloc.txnPipeliner = txnPipeliner{
   285  		st:    tcf.st,
   286  		riGen: riGen,
   287  	}
   288  	tc.interceptorAlloc.txnSpanRefresher = txnSpanRefresher{
   289  		st:    tcf.st,
   290  		knobs: &tcf.testingKnobs,
   291  		riGen: riGen,
   292  		// We can only allow refresh span retries on root transactions
   293  		// because those are the only places where we have all of the
   294  		// refresh spans. If this is a leaf, as in a distributed sql flow,
   295  		// we need to propagate the error to the root for an epoch restart.
   296  		canAutoRetry:                  typ == kv.RootTxn,
   297  		refreshSuccess:                tc.metrics.RefreshSuccess,
   298  		refreshFail:                   tc.metrics.RefreshFail,
   299  		refreshFailWithCondensedSpans: tc.metrics.RefreshFailWithCondensedSpans,
   300  		refreshMemoryLimitExceeded:    tc.metrics.RefreshMemoryLimitExceeded,
   301  	}
   302  	tc.interceptorAlloc.txnLockGatekeeper = txnLockGatekeeper{
   303  		wrapped:                 tc.wrapped,
   304  		mu:                      &tc.mu.Mutex,
   305  		allowConcurrentRequests: typ == kv.LeafTxn,
   306  	}
   307  	tc.interceptorAlloc.txnSeqNumAllocator.writeSeq = txn.Sequence
   308  }
   309  
   310  func (tc *TxnCoordSender) connectInterceptors() {
   311  	for i, reqInt := range tc.interceptorStack {
   312  		if i < len(tc.interceptorStack)-1 {
   313  			reqInt.setWrapped(tc.interceptorStack[i+1])
   314  		} else {
   315  			reqInt.setWrapped(&tc.interceptorAlloc.txnLockGatekeeper)
   316  		}
   317  	}
   318  }
   319  
   320  func newLeafTxnCoordSender(
   321  	tcf *TxnCoordSenderFactory, tis *roachpb.LeafTxnInputState,
   322  ) kv.TxnSender {
   323  	txn := &tis.Txn
   324  	// 19.2 roots might have this flag set. In 20.1, the flag is only set by the
   325  	// server and terminated by the client in the span refresher interceptor. If
   326  	// the root is a 19.2 node, we reset the flag because it only confuses
   327  	// that interceptor and provides no benefit.
   328  	txn.WriteTooOld = false
   329  	txn.AssertInitialized(context.TODO())
   330  
   331  	// Deal with requests from 19.2 nodes which did not set ReadTimestamp.
   332  	if txn.ReadTimestamp.Less(txn.DeprecatedOrigTimestamp) {
   333  		txn.ReadTimestamp = txn.DeprecatedOrigTimestamp
   334  	}
   335  
   336  	if txn.Status != roachpb.PENDING {
   337  		log.Fatalf(context.TODO(), "unexpected non-pending txn in LeafTransactionalSender: %s", tis)
   338  	}
   339  
   340  	tcs := &TxnCoordSender{
   341  		typ:                   kv.LeafTxn,
   342  		TxnCoordSenderFactory: tcf,
   343  	}
   344  	tcs.mu.txnState = txnPending
   345  	// No need to initialize tcs.mu.userPriority here,
   346  	// as this field is only used in root txns.
   347  
   348  	// Create a stack of request/response interceptors. All of the objects in
   349  	// this stack are pre-allocated on the TxnCoordSender struct, so this just
   350  	// initializes the interceptors and pieces them together. It then adds a
   351  	// txnLockGatekeeper at the bottom of the stack to connect it with the
   352  	// TxnCoordSender's wrapped sender. First, each of the interceptor objects
   353  	// is initialized.
   354  	tcs.initCommonInterceptors(tcf, txn, kv.LeafTxn)
   355  
   356  	// Per-interceptor leaf initialization. If/when more interceptors
   357  	// need leaf initialization, this should be turned into an interface
   358  	// method on txnInterceptor with a loop here.
   359  	tcs.interceptorAlloc.txnPipeliner.initializeLeaf(tis)
   360  	tcs.interceptorAlloc.txnSeqNumAllocator.initializeLeaf(tis)
   361  
   362  	// Once the interceptors are initialized, piece them all together in the
   363  	// correct order.
   364  	tcs.interceptorAlloc.arr = [cap(tcs.interceptorAlloc.arr)]txnInterceptor{
   365  		// LeafTxns never perform writes so the sequence number allocator
   366  		// should never increment its sequence number counter over its
   367  		// lifetime, but it still plays the important role of assigning each
   368  		// read request the latest sequence number.
   369  		&tcs.interceptorAlloc.txnSeqNumAllocator,
   370  		// The pipeliner is needed on leaves to ensure that in-flight writes
   371  		// are chained onto by reads that should see them.
   372  		&tcs.interceptorAlloc.txnPipeliner,
   373  		// The span refresher may be needed for accumulating the spans to
   374  		// be reported to the Root. See also: #24798.
   375  		//
   376  		// Note: this interceptor must be the last in the list; it is
   377  		// only conditionally included in the stack. See below.
   378  		&tcs.interceptorAlloc.txnSpanRefresher,
   379  	}
   380  	// All other interceptors are absent from a LeafTxn's interceptor stack
   381  	// because they do not serve a role on leaves.
   382  
   383  	// If the root has informed us that the read spans are not needed by
   384  	// the root, we don't need the txnSpanRefresher.
   385  	if tis.RefreshInvalid {
   386  		tcs.interceptorStack = tcs.interceptorAlloc.arr[:2]
   387  	} else {
   388  		tcs.interceptorStack = tcs.interceptorAlloc.arr[:3]
   389  	}
   390  
   391  	tcs.connectInterceptors()
   392  
   393  	tcs.mu.txn.Update(txn)
   394  	return tcs
   395  }
   396  
   397  // DisablePipelining is part of the client.TxnSender interface.
   398  func (tc *TxnCoordSender) DisablePipelining() error {
   399  	tc.mu.Lock()
   400  	defer tc.mu.Unlock()
   401  	if tc.mu.active {
   402  		return errors.Errorf("cannot disable pipelining on a running transaction")
   403  	}
   404  	tc.interceptorAlloc.txnPipeliner.disabled = true
   405  	return nil
   406  }
   407  
   408  func generateTxnDeadlineExceededErr(
   409  	txn *roachpb.Transaction, deadline hlc.Timestamp,
   410  ) *roachpb.Error {
   411  	exceededBy := txn.WriteTimestamp.GoTime().Sub(deadline.GoTime())
   412  	extraMsg := fmt.Sprintf(
   413  		"txn timestamp pushed too much; deadline exceeded by %s (%s > %s)",
   414  		exceededBy, txn.WriteTimestamp, deadline)
   415  	return roachpb.NewErrorWithTxn(
   416  		roachpb.NewTransactionRetryError(roachpb.RETRY_COMMIT_DEADLINE_EXCEEDED, extraMsg), txn)
   417  }
   418  
   419  // commitReadOnlyTxnLocked "commits" a read-only txn. It is equivalent, but
   420  // cheaper than, sending an EndTxnRequest. A read-only txn doesn't have a
   421  // transaction record, so there's no need to send any request to the server. An
   422  // EndTxnRequest for a read-only txn is elided by the txnCommitter interceptor.
   423  // However, calling this and short-circuting even earlier is even more efficient
   424  // (and shows in benchmarks).
   425  // TODO(nvanbenschoten): we could have this call into txnCommitter's
   426  // sendLockedWithElidedEndTxn method, but we would want to confirm
   427  // that doing so doesn't cut into the speed-up we see from this fast-path.
   428  func (tc *TxnCoordSender) commitReadOnlyTxnLocked(
   429  	ctx context.Context, ba roachpb.BatchRequest,
   430  ) *roachpb.Error {
   431  	deadline := ba.Requests[0].GetEndTxn().Deadline
   432  	if deadline != nil && deadline.LessEq(tc.mu.txn.WriteTimestamp) {
   433  		txn := tc.mu.txn.Clone()
   434  		pErr := generateTxnDeadlineExceededErr(txn, *deadline)
   435  		// We need to bump the epoch and transform this retriable error.
   436  		ba.Txn = txn
   437  		return tc.updateStateLocked(ctx, ba, nil /* br */, pErr)
   438  	}
   439  	tc.mu.txnState = txnFinalized
   440  	// Mark the transaction as committed so that, in case this commit is done by
   441  	// the closure passed to db.Txn()), db.Txn() doesn't attempt to commit again.
   442  	// Also so that the correct metric gets incremented.
   443  	tc.mu.txn.Status = roachpb.COMMITTED
   444  	tc.cleanupTxnLocked(ctx)
   445  	return nil
   446  }
   447  
   448  // Send is part of the client.TxnSender interface.
   449  func (tc *TxnCoordSender) Send(
   450  	ctx context.Context, ba roachpb.BatchRequest,
   451  ) (*roachpb.BatchResponse, *roachpb.Error) {
   452  	// NOTE: The locking here is unusual. Although it might look like it, we are
   453  	// NOT holding the lock continuously for the duration of the Send. We lock
   454  	// here, and unlock at the botton of the interceptor stack, in the
   455  	// txnLockGatekeeper. The we lock again in that interceptor when the response
   456  	// comes, and unlock again in the defer below.
   457  	tc.mu.Lock()
   458  	defer tc.mu.Unlock()
   459  	tc.mu.active = true
   460  
   461  	if pErr := tc.maybeRejectClientLocked(ctx, &ba); pErr != nil {
   462  		return nil, pErr
   463  	}
   464  
   465  	if ba.IsSingleEndTxnRequest() && !tc.interceptorAlloc.txnPipeliner.hasAcquiredLocks() {
   466  		return nil, tc.commitReadOnlyTxnLocked(ctx, ba)
   467  	}
   468  
   469  	startNs := tc.clock.PhysicalNow()
   470  
   471  	ctx, sp := tc.AnnotateCtxWithSpan(ctx, opTxnCoordSender)
   472  	defer sp.Finish()
   473  
   474  	// Associate the txnID with the trace.
   475  	if tc.mu.txn.ID == (uuid.UUID{}) {
   476  		log.Fatalf(ctx, "cannot send transactional request through unbound TxnCoordSender")
   477  	}
   478  	if !tracing.IsBlackHoleSpan(sp) {
   479  		sp.SetBaggageItem("txnID", tc.mu.txn.ID.String())
   480  	}
   481  	ctx = logtags.AddTag(ctx, "txn", uuid.ShortStringer(tc.mu.txn.ID))
   482  	if log.V(2) {
   483  		ctx = logtags.AddTag(ctx, "ts", tc.mu.txn.WriteTimestamp)
   484  	}
   485  
   486  	// It doesn't make sense to use inconsistent reads in a transaction. However,
   487  	// we still need to accept it as a parameter for this to compile.
   488  	if ba.ReadConsistency != roachpb.CONSISTENT {
   489  		return nil, roachpb.NewErrorf("cannot use %s ReadConsistency in txn",
   490  			ba.ReadConsistency)
   491  	}
   492  
   493  	lastIndex := len(ba.Requests) - 1
   494  	if lastIndex < 0 {
   495  		return nil, nil
   496  	}
   497  
   498  	// Clone the Txn's Proto so that future modifications can be made without
   499  	// worrying about synchronization.
   500  	ba.Txn = tc.mu.txn.Clone()
   501  
   502  	// Send the command through the txnInterceptor stack.
   503  	br, pErr := tc.interceptorStack[0].SendLocked(ctx, ba)
   504  
   505  	pErr = tc.updateStateLocked(ctx, ba, br, pErr)
   506  
   507  	// If we succeeded to commit, or we attempted to rollback, we move to
   508  	// txnFinalized.
   509  	if req, ok := ba.GetArg(roachpb.EndTxn); ok {
   510  		etReq := req.(*roachpb.EndTxnRequest)
   511  		if etReq.Commit {
   512  			if pErr == nil {
   513  				tc.mu.txnState = txnFinalized
   514  				tc.cleanupTxnLocked(ctx)
   515  				tc.maybeSleepForLinearizable(ctx, br, startNs)
   516  			}
   517  		} else {
   518  			// Rollbacks always move us to txnFinalized.
   519  			tc.mu.txnState = txnFinalized
   520  			tc.cleanupTxnLocked(ctx)
   521  		}
   522  	}
   523  
   524  	if pErr != nil {
   525  		return nil, pErr
   526  	}
   527  
   528  	if br != nil && br.Error != nil {
   529  		panic(roachpb.ErrorUnexpectedlySet(nil /* culprit */, br))
   530  	}
   531  
   532  	return br, nil
   533  }
   534  
   535  // maybeSleepForLinearizable sleeps if the linearizable flag is set. We want to
   536  // make sure that all the clocks in the system are past the commit timestamp of
   537  // the transaction. This is guaranteed if either:
   538  // - the commit timestamp is MaxOffset behind startNs
   539  // - MaxOffset ns were spent in this function when returning to the
   540  // client.
   541  // Below we choose the option that involves less waiting, which is likely the
   542  // first one unless a transaction commits with an odd timestamp.
   543  func (tc *TxnCoordSender) maybeSleepForLinearizable(
   544  	ctx context.Context, br *roachpb.BatchResponse, startNs int64,
   545  ) {
   546  	if tsNS := br.Txn.WriteTimestamp.WallTime; startNs > tsNS {
   547  		startNs = tsNS
   548  	}
   549  	sleepNS := tc.clock.MaxOffset() -
   550  		time.Duration(tc.clock.PhysicalNow()-startNs)
   551  
   552  	if tc.linearizable && sleepNS > 0 {
   553  		// TODO(andrei): perhaps we shouldn't sleep with the lock held.
   554  		log.VEventf(ctx, 2, "%v: waiting %s on EndTxn for linearizability",
   555  			br.Txn.Short(), duration.Truncate(sleepNS, time.Millisecond))
   556  		time.Sleep(sleepNS)
   557  	}
   558  }
   559  
   560  // maybeRejectClientLocked checks whether the transaction is in a state that
   561  // prevents it from continuing, such as the heartbeat having detected the
   562  // transaction to have been aborted.
   563  //
   564  // ba is the batch that the client is trying to send. It's inspected because
   565  // rollbacks are always allowed. Can be nil.
   566  func (tc *TxnCoordSender) maybeRejectClientLocked(
   567  	ctx context.Context, ba *roachpb.BatchRequest,
   568  ) *roachpb.Error {
   569  	if ba != nil && ba.IsSingleAbortTxnRequest() {
   570  		// As a special case, we allow rollbacks to be sent at any time. Any
   571  		// rollback attempt moves the TxnCoordSender state to txnFinalized, but higher
   572  		// layers are free to retry rollbacks if they want (and they do, for
   573  		// example, when the context was canceled while txn.Rollback() was running).
   574  		return nil
   575  	}
   576  
   577  	// Check the transaction coordinator state.
   578  	switch tc.mu.txnState {
   579  	case txnPending:
   580  		// All good.
   581  	case txnError:
   582  		return tc.mu.storedErr
   583  	case txnFinalized:
   584  		msg := fmt.Sprintf("client already committed or rolled back the transaction. "+
   585  			"Trying to execute: %s", ba.Summary())
   586  		stack := string(debug.Stack())
   587  		log.Errorf(ctx, "%s. stack:\n%s", msg, stack)
   588  		return roachpb.NewErrorWithTxn(roachpb.NewTransactionStatusError(msg), &tc.mu.txn)
   589  	}
   590  
   591  	// Check the transaction proto state, along with any finalized transaction
   592  	// status observed by the transaction heartbeat loop.
   593  	protoStatus := tc.mu.txn.Status
   594  	hbObservedStatus := tc.interceptorAlloc.txnHeartbeater.mu.finalObservedStatus
   595  	switch {
   596  	case protoStatus == roachpb.ABORTED:
   597  		// The transaction was rolled back synchronously.
   598  		fallthrough
   599  	case protoStatus != roachpb.COMMITTED && hbObservedStatus == roachpb.ABORTED:
   600  		// The transaction heartbeat observed an aborted transaction record and
   601  		// this was not due to a synchronous transaction commit and transaction
   602  		// record garbage collection.
   603  		// See the comment on txnHeartbeater.mu.finalizedStatus for more details.
   604  		abortedErr := roachpb.NewErrorWithTxn(
   605  			roachpb.NewTransactionAbortedError(roachpb.ABORT_REASON_CLIENT_REJECT), &tc.mu.txn)
   606  		if tc.typ == kv.LeafTxn {
   607  			// Leaf txns return raw retriable errors (which get handled by the
   608  			// root) rather than TransactionRetryWithProtoRefreshError.
   609  			return abortedErr
   610  		}
   611  		// Root txns handle retriable errors.
   612  		newTxn := roachpb.PrepareTransactionForRetry(
   613  			ctx, abortedErr, roachpb.NormalUserPriority, tc.clock)
   614  		return roachpb.NewError(roachpb.NewTransactionRetryWithProtoRefreshError(
   615  			abortedErr.Message, tc.mu.txn.ID, newTxn))
   616  	case protoStatus != roachpb.PENDING || hbObservedStatus != roachpb.PENDING:
   617  		// The transaction proto is in an unexpected state.
   618  		return roachpb.NewErrorf(
   619  			"unexpected txn state: %s; heartbeat observed status: %s", tc.mu.txn, hbObservedStatus)
   620  	default:
   621  		// All good.
   622  	}
   623  	return nil
   624  }
   625  
   626  // cleanupTxnLocked closes all the interceptors.
   627  func (tc *TxnCoordSender) cleanupTxnLocked(ctx context.Context) {
   628  	if tc.mu.closed {
   629  		return
   630  	}
   631  	tc.mu.closed = true
   632  	// Close each interceptor.
   633  	for _, reqInt := range tc.interceptorStack {
   634  		reqInt.closeLocked()
   635  	}
   636  }
   637  
   638  // UpdateStateOnRemoteRetryableErr is part of the TxnSender interface.
   639  func (tc *TxnCoordSender) UpdateStateOnRemoteRetryableErr(
   640  	ctx context.Context, pErr *roachpb.Error,
   641  ) *roachpb.Error {
   642  	tc.mu.Lock()
   643  	defer tc.mu.Unlock()
   644  	return roachpb.NewError(tc.handleRetryableErrLocked(ctx, pErr))
   645  }
   646  
   647  // handleRetryableErrLocked takes a retriable error and creates a
   648  // TransactionRetryWithProtoRefreshError containing the transaction that needs
   649  // to be used by the next attempt. It also handles various aspects of updating
   650  // the TxnCoordSender's state. Depending on the error, the TxnCoordSender might
   651  // not be usable afterwards (in case of TransactionAbortedError). The caller is
   652  // expected to check the ID of the resulting transaction. If the TxnCoordSender
   653  // can still be used, it will have been prepared for a new epoch.
   654  func (tc *TxnCoordSender) handleRetryableErrLocked(
   655  	ctx context.Context, pErr *roachpb.Error,
   656  ) *roachpb.TransactionRetryWithProtoRefreshError {
   657  	// If the error is a transaction retry error, update metrics to
   658  	// reflect the reason for the restart. More details about the
   659  	// different error types are documented above on the metaRestart
   660  	// variables.
   661  	switch tErr := pErr.GetDetail().(type) {
   662  	case *roachpb.TransactionRetryError:
   663  		switch tErr.Reason {
   664  		case roachpb.RETRY_WRITE_TOO_OLD:
   665  			tc.metrics.RestartsWriteTooOld.Inc()
   666  		case roachpb.RETRY_SERIALIZABLE:
   667  			tc.metrics.RestartsSerializable.Inc()
   668  		case roachpb.RETRY_ASYNC_WRITE_FAILURE:
   669  			tc.metrics.RestartsAsyncWriteFailure.Inc()
   670  		default:
   671  			tc.metrics.RestartsUnknown.Inc()
   672  		}
   673  
   674  	case *roachpb.WriteTooOldError:
   675  		tc.metrics.RestartsWriteTooOldMulti.Inc()
   676  
   677  	case *roachpb.ReadWithinUncertaintyIntervalError:
   678  		tc.metrics.RestartsReadWithinUncertainty.Inc()
   679  
   680  	case *roachpb.TransactionAbortedError:
   681  		tc.metrics.RestartsTxnAborted.Inc()
   682  
   683  	case *roachpb.TransactionPushError:
   684  		tc.metrics.RestartsTxnPush.Inc()
   685  
   686  	default:
   687  		tc.metrics.RestartsUnknown.Inc()
   688  	}
   689  	errTxnID := pErr.GetTxn().ID
   690  	newTxn := roachpb.PrepareTransactionForRetry(ctx, pErr, tc.mu.userPriority, tc.clock)
   691  
   692  	// We'll pass a TransactionRetryWithProtoRefreshError up to the next layer.
   693  	retErr := roachpb.NewTransactionRetryWithProtoRefreshError(
   694  		pErr.Message,
   695  		errTxnID, // the id of the transaction that encountered the error
   696  		newTxn)
   697  
   698  	// If the ID changed, it means we had to start a new transaction and the
   699  	// old one is toast. This TxnCoordSender cannot be used any more - future
   700  	// Send() calls will be rejected; the client is supposed to create a new
   701  	// one.
   702  	if errTxnID != newTxn.ID {
   703  		// Remember that this txn is aborted to reject future requests.
   704  		tc.mu.txn.Status = roachpb.ABORTED
   705  		// Abort the old txn. The client is not supposed to use use this
   706  		// TxnCoordSender any more.
   707  		tc.interceptorAlloc.txnHeartbeater.abortTxnAsyncLocked(ctx)
   708  		tc.cleanupTxnLocked(ctx)
   709  		return retErr
   710  	}
   711  
   712  	// This is where we get a new epoch.
   713  	tc.mu.txn.Update(&newTxn)
   714  
   715  	// Reset state as this is a retryable txn error that is incrementing
   716  	// the transaction's epoch.
   717  	log.VEventf(ctx, 2, "resetting epoch-based coordinator state on retry")
   718  	for _, reqInt := range tc.interceptorStack {
   719  		reqInt.epochBumpedLocked()
   720  	}
   721  	return retErr
   722  }
   723  
   724  // updateStateLocked updates the transaction state in both the success and error
   725  // cases. It also updates retryable errors with the updated transaction for use
   726  // by client restarts.
   727  func (tc *TxnCoordSender) updateStateLocked(
   728  	ctx context.Context, ba roachpb.BatchRequest, br *roachpb.BatchResponse, pErr *roachpb.Error,
   729  ) *roachpb.Error {
   730  
   731  	// We handle a couple of different cases:
   732  	// 1) A successful response. If that response carries a transaction proto,
   733  	// we'll use it to update our proto.
   734  	// 2) A non-retriable error. We move to the txnError state and we cleanup. If
   735  	// the error carries a transaction in it, we update our proto with it
   736  	// (although Andrei doesn't know if that serves any purpose).
   737  	// 3) A retriable error. We "handle" it, in the sense that we call
   738  	// handleRetryableErrLocked() to transform the error. If the error instructs
   739  	// the client to start a new transaction (i.e. TransactionAbortedError), then
   740  	// the current transaction is automatically rolled-back. Otherwise, we update
   741  	// our proto for a new epoch.
   742  	// NOTE: We'd love to move to state txnError in case of new error but alas
   743  	// with the current interface we can't: there's no way for the client to ack
   744  	// the receipt of the error and control the switching to the new epoch. This
   745  	// is a major problem of the current txn interface - it means that concurrent
   746  	// users of a txn might operate at the wrong epoch if they race with the
   747  	// receipt of such an error.
   748  
   749  	if pErr == nil {
   750  		tc.mu.txn.Update(br.Txn)
   751  		return nil
   752  	}
   753  
   754  	if pErr.TransactionRestart != roachpb.TransactionRestart_NONE {
   755  		if tc.typ == kv.LeafTxn {
   756  			// Leaves handle retriable errors differently than roots. The leaf
   757  			// transaction is not supposed to be used any more after a retriable
   758  			// error. Separately, the error needs to make its way back to the root.
   759  
   760  			// From now on, clients will get this error whenever they Send(). We want
   761  			// clients to get the same retriable error so we don't wrap it in
   762  			// TxnAlreadyEncounteredErrorError as we do elsewhere.
   763  			tc.mu.txnState = txnError
   764  			tc.mu.storedErr = pErr
   765  
   766  			// Cleanup.
   767  			tc.mu.txn.Update(pErr.GetTxn())
   768  			tc.cleanupTxnLocked(ctx)
   769  			return pErr
   770  		}
   771  
   772  		txnID := ba.Txn.ID
   773  		errTxnID := pErr.GetTxn().ID // The ID of the txn that needs to be restarted.
   774  		if errTxnID != txnID {
   775  			// KV should not return errors for transactions other than the one in
   776  			// the BatchRequest.
   777  			log.Fatalf(ctx, "retryable error for the wrong txn. ba.Txn: %s. pErr: %s",
   778  				ba.Txn, pErr)
   779  		}
   780  		return roachpb.NewError(tc.handleRetryableErrLocked(ctx, pErr))
   781  	}
   782  
   783  	// This is the non-retriable error case.
   784  
   785  	// Most errors cause the transaction to not accept further requests (except a
   786  	// rollback), but some errors are safe to allow continuing (in particular
   787  	// ConditionFailedError). In particular, SQL can recover by rolling back to a
   788  	// savepoint.
   789  	if roachpb.ErrPriority(pErr.GetDetail()) != roachpb.ErrorScoreUnambiguousError {
   790  		tc.mu.txnState = txnError
   791  		tc.mu.storedErr = roachpb.NewError(&roachpb.TxnAlreadyEncounteredErrorError{
   792  			PrevError: pErr.String(),
   793  		})
   794  	}
   795  
   796  	// Update our transaction with any information the error has.
   797  	if errTxn := pErr.GetTxn(); errTxn != nil {
   798  		if errTxn.Status == roachpb.COMMITTED {
   799  			sanityCheckCommittedErr(ctx, pErr, ba)
   800  		}
   801  		tc.mu.txn.Update(errTxn)
   802  	}
   803  	return pErr
   804  }
   805  
   806  // sanityCheckCommittedErr verifies the circumstances in which we're receiving
   807  // an error indicating a COMMITTED transaction. Only rollbacks should be
   808  // encountering such errors. Marking a transaction as explicitly-committed can
   809  // also encounter these errors, but those errors don't make it to the
   810  // TxnCoordSender.
   811  func sanityCheckCommittedErr(ctx context.Context, pErr *roachpb.Error, ba roachpb.BatchRequest) {
   812  	errTxn := pErr.GetTxn()
   813  	if errTxn == nil || errTxn.Status != roachpb.COMMITTED {
   814  		// We shouldn't have been called.
   815  		return
   816  	}
   817  	// The only case in which an error can have a COMMITTED transaction in it is
   818  	// when the request was a rollback. Rollbacks can race with commits if a
   819  	// context timeout expires while a commit request is in flight.
   820  	if ba.IsSingleAbortTxnRequest() {
   821  		return
   822  	}
   823  	// Finding out about our transaction being committed indicates a serious bug.
   824  	// Requests are not supposed to be sent on transactions after they are
   825  	// committed.
   826  	log.Fatalf(ctx, "transaction unexpectedly committed: %s. ba: %s. txn: %s.", pErr, ba, errTxn)
   827  }
   828  
   829  // setTxnAnchorKey sets the key at which to anchor the transaction record. The
   830  // transaction anchor key defaults to the first key written in a transaction.
   831  func (tc *TxnCoordSender) setTxnAnchorKeyLocked(key roachpb.Key) error {
   832  	if len(tc.mu.txn.Key) != 0 {
   833  		return errors.Errorf("transaction anchor key already set")
   834  	}
   835  	tc.mu.txn.Key = key
   836  	return nil
   837  }
   838  
   839  // AnchorOnSystemConfigRange is part of the client.TxnSender interface.
   840  func (tc *TxnCoordSender) AnchorOnSystemConfigRange() error {
   841  	tc.mu.Lock()
   842  	defer tc.mu.Unlock()
   843  	// Allow this to be called more than once.
   844  	if bytes.Equal(tc.mu.txn.Key, keys.SystemConfigSpan.Key) {
   845  		return nil
   846  	}
   847  	// The system-config trigger must be run on the system-config range which
   848  	// means any transaction with the trigger set needs to be anchored to the
   849  	// system-config range.
   850  	return tc.setTxnAnchorKeyLocked(keys.SystemConfigSpan.Key)
   851  }
   852  
   853  // TxnStatus is part of the client.TxnSender interface.
   854  func (tc *TxnCoordSender) TxnStatus() roachpb.TransactionStatus {
   855  	tc.mu.Lock()
   856  	defer tc.mu.Unlock()
   857  	return tc.mu.txn.Status
   858  }
   859  
   860  // SetUserPriority is part of the client.TxnSender interface.
   861  func (tc *TxnCoordSender) SetUserPriority(pri roachpb.UserPriority) error {
   862  	tc.mu.Lock()
   863  	defer tc.mu.Unlock()
   864  	if tc.mu.active && pri != tc.mu.userPriority {
   865  		return errors.New("cannot change the user priority of a running transaction")
   866  	}
   867  	tc.mu.userPriority = pri
   868  	tc.mu.txn.Priority = roachpb.MakePriority(pri)
   869  	return nil
   870  }
   871  
   872  // SetDebugName is part of the client.TxnSender interface.
   873  func (tc *TxnCoordSender) SetDebugName(name string) {
   874  	tc.mu.Lock()
   875  	defer tc.mu.Unlock()
   876  
   877  	if tc.mu.txn.Name == name {
   878  		return
   879  	}
   880  
   881  	if tc.mu.active {
   882  		panic("cannot change the debug name of a running transaction")
   883  	}
   884  	tc.mu.txn.Name = name
   885  }
   886  
   887  // String is part of the client.TxnSender interface.
   888  func (tc *TxnCoordSender) String() string {
   889  	tc.mu.Lock()
   890  	defer tc.mu.Unlock()
   891  	return tc.mu.txn.String()
   892  }
   893  
   894  // ReadTimestamp is part of the client.TxnSender interface.
   895  func (tc *TxnCoordSender) ReadTimestamp() hlc.Timestamp {
   896  	tc.mu.Lock()
   897  	defer tc.mu.Unlock()
   898  	return tc.mu.txn.ReadTimestamp
   899  }
   900  
   901  // ProvisionalCommitTimestamp is part of the client.TxnSender interface.
   902  func (tc *TxnCoordSender) ProvisionalCommitTimestamp() hlc.Timestamp {
   903  	tc.mu.Lock()
   904  	defer tc.mu.Unlock()
   905  	return tc.mu.txn.WriteTimestamp
   906  }
   907  
   908  // CommitTimestamp is part of the client.TxnSender interface.
   909  func (tc *TxnCoordSender) CommitTimestamp() hlc.Timestamp {
   910  	tc.mu.Lock()
   911  	defer tc.mu.Unlock()
   912  	txn := &tc.mu.txn
   913  	tc.mu.txn.CommitTimestampFixed = true
   914  	return txn.ReadTimestamp
   915  }
   916  
   917  // CommitTimestampFixed is part of the client.TxnSender interface.
   918  func (tc *TxnCoordSender) CommitTimestampFixed() bool {
   919  	tc.mu.Lock()
   920  	defer tc.mu.Unlock()
   921  	return tc.mu.txn.CommitTimestampFixed
   922  }
   923  
   924  // SetFixedTimestamp is part of the client.TxnSender interface.
   925  func (tc *TxnCoordSender) SetFixedTimestamp(ctx context.Context, ts hlc.Timestamp) {
   926  	tc.mu.Lock()
   927  	tc.mu.txn.ReadTimestamp = ts
   928  	tc.mu.txn.WriteTimestamp = ts
   929  	tc.mu.txn.MaxTimestamp = ts
   930  	tc.mu.txn.CommitTimestampFixed = true
   931  
   932  	// Set the MinTimestamp to the minimum of the existing MinTimestamp and the fixed
   933  	// timestamp. This ensures that the MinTimestamp is always <= the other timestamps.
   934  	tc.mu.txn.MinTimestamp.Backward(ts)
   935  
   936  	// For backwards compatibility with 19.2, set the DeprecatedOrigTimestamp too.
   937  	tc.mu.txn.DeprecatedOrigTimestamp = ts
   938  
   939  	tc.mu.Unlock()
   940  }
   941  
   942  // ManualRestart is part of the client.TxnSender interface.
   943  func (tc *TxnCoordSender) ManualRestart(
   944  	ctx context.Context, pri roachpb.UserPriority, ts hlc.Timestamp,
   945  ) {
   946  	tc.mu.Lock()
   947  	defer tc.mu.Unlock()
   948  
   949  	if tc.mu.txnState == txnFinalized {
   950  		log.Fatalf(ctx, "ManualRestart called on finalized txn: %s", tc.mu.txn)
   951  	}
   952  
   953  	// Invalidate any writes performed by any workers after the retry updated
   954  	// the txn's proto but before we synchronized (some of these writes might
   955  	// have been performed at the wrong epoch).
   956  	tc.mu.txn.Restart(pri, 0 /* upgradePriority */, ts)
   957  
   958  	for _, reqInt := range tc.interceptorStack {
   959  		reqInt.epochBumpedLocked()
   960  	}
   961  
   962  	// The txn might have entered the txnError state after the epoch was bumped.
   963  	// Reset the state for the retry.
   964  	tc.mu.txnState = txnPending
   965  }
   966  
   967  // IsSerializablePushAndRefreshNotPossible is part of the client.TxnSender interface.
   968  func (tc *TxnCoordSender) IsSerializablePushAndRefreshNotPossible() bool {
   969  	tc.mu.Lock()
   970  	defer tc.mu.Unlock()
   971  
   972  	isTxnPushed := tc.mu.txn.WriteTimestamp != tc.mu.txn.ReadTimestamp
   973  	refreshAttemptNotPossible := tc.interceptorAlloc.txnSpanRefresher.refreshInvalid ||
   974  		tc.mu.txn.CommitTimestampFixed
   975  	// We check CommitTimestampFixed here because, if that's set, refreshing
   976  	// of reads is not performed.
   977  	return isTxnPushed && refreshAttemptNotPossible
   978  }
   979  
   980  // Epoch is part of the client.TxnSender interface.
   981  func (tc *TxnCoordSender) Epoch() enginepb.TxnEpoch {
   982  	return tc.mu.txn.Epoch
   983  }
   984  
   985  // IsTracking returns true if the heartbeat loop is running.
   986  func (tc *TxnCoordSender) IsTracking() bool {
   987  	tc.mu.Lock()
   988  	defer tc.mu.Unlock()
   989  	return tc.interceptorAlloc.txnHeartbeater.heartbeatLoopRunningLocked()
   990  }
   991  
   992  // Active returns true if requests were sent already. Rolling back to a
   993  // savepoint taken before any requests were sent resets this to false.
   994  func (tc *TxnCoordSender) Active() bool {
   995  	tc.mu.Lock()
   996  	defer tc.mu.Unlock()
   997  	return tc.mu.active
   998  }
   999  
  1000  // GetLeafTxnInputState is part of the client.TxnSender interface.
  1001  func (tc *TxnCoordSender) GetLeafTxnInputState(
  1002  	ctx context.Context, opt kv.TxnStatusOpt,
  1003  ) (roachpb.LeafTxnInputState, error) {
  1004  	tc.mu.Lock()
  1005  	defer tc.mu.Unlock()
  1006  
  1007  	if err := tc.checkTxnStatusLocked(ctx, opt); err != nil {
  1008  		return roachpb.LeafTxnInputState{}, err
  1009  	}
  1010  
  1011  	// Copy mutable state so access is safe for the caller.
  1012  	var tis roachpb.LeafTxnInputState
  1013  	tis.Txn = tc.mu.txn
  1014  	for _, reqInt := range tc.interceptorStack {
  1015  		reqInt.populateLeafInputState(&tis)
  1016  	}
  1017  
  1018  	// Also mark the TxnCoordSender as "active".  This prevents changing
  1019  	// the priority after a leaf has been created. It als conservatively
  1020  	// ensures that Active() returns true if there's maybe a command
  1021  	// being executed concurrently by a leaf.
  1022  	tc.mu.active = true
  1023  
  1024  	return tis, nil
  1025  }
  1026  
  1027  // GetLeafTxnFinalState is part of the client.TxnSender interface.
  1028  func (tc *TxnCoordSender) GetLeafTxnFinalState(
  1029  	ctx context.Context, opt kv.TxnStatusOpt,
  1030  ) (roachpb.LeafTxnFinalState, error) {
  1031  	tc.mu.Lock()
  1032  	defer tc.mu.Unlock()
  1033  
  1034  	if err := tc.checkTxnStatusLocked(ctx, opt); err != nil {
  1035  		return roachpb.LeafTxnFinalState{}, err
  1036  	}
  1037  
  1038  	var tfs roachpb.LeafTxnFinalState
  1039  
  1040  	// For compatibility with pre-20.1 nodes: populate the command
  1041  	// count.
  1042  	// TODO(knz,andrei): Remove this and the command count
  1043  	// field in 20.2.
  1044  	if tc.mu.active {
  1045  		tfs.DeprecatedCommandCount = 1
  1046  	}
  1047  
  1048  	// Copy mutable state so access is safe for the caller.
  1049  	tfs.Txn = tc.mu.txn
  1050  	for _, reqInt := range tc.interceptorStack {
  1051  		reqInt.populateLeafFinalState(&tfs)
  1052  	}
  1053  
  1054  	return tfs, nil
  1055  }
  1056  
  1057  func (tc *TxnCoordSender) checkTxnStatusLocked(ctx context.Context, opt kv.TxnStatusOpt) error {
  1058  	switch opt {
  1059  	case kv.AnyTxnStatus:
  1060  		// Nothing to check.
  1061  	case kv.OnlyPending:
  1062  		// Check the coordinator's proto status.
  1063  		rejectErr := tc.maybeRejectClientLocked(ctx, nil /* ba */)
  1064  		if rejectErr != nil {
  1065  			return rejectErr.GoError()
  1066  		}
  1067  	default:
  1068  		panic("unreachable")
  1069  	}
  1070  	return nil
  1071  }
  1072  
  1073  // UpdateRootWithLeafFinalState is part of the client.TxnSender interface.
  1074  func (tc *TxnCoordSender) UpdateRootWithLeafFinalState(
  1075  	ctx context.Context, tfs *roachpb.LeafTxnFinalState,
  1076  ) {
  1077  	tc.mu.Lock()
  1078  	defer tc.mu.Unlock()
  1079  
  1080  	if tc.mu.txn.ID == (uuid.UUID{}) {
  1081  		log.Fatalf(ctx, "cannot UpdateRootWithLeafFinalState on unbound TxnCoordSender. input id: %s", tfs.Txn.ID)
  1082  	}
  1083  
  1084  	// Sanity check: don't combine if the tfs is for a different txn ID.
  1085  	if tc.mu.txn.ID != tfs.Txn.ID {
  1086  		return
  1087  	}
  1088  
  1089  	// If the LeafTxnFinalState is telling us the transaction has been
  1090  	// aborted, it's better if we don't ingest it. Ingesting it would
  1091  	// possibly put us in an inconsistent state, with an ABORTED proto
  1092  	// but with the heartbeat loop still running. It presumably follows
  1093  	// a TxnAbortedError that was also received. If that error was also
  1094  	// passed to us, then we've already aborted the txn and importing
  1095  	// the leaf statewould be OK. However, as it stands, if the
  1096  	// TxnAbortedError followed a non-retriable error, than we don't get
  1097  	// the aborted error (in fact, we don't get either of the errors;
  1098  	// the client is responsible for rolling back).
  1099  	//
  1100  	// TODO(andrei): A better design would be to abort the txn as soon
  1101  	// as any error is received from DistSQL, which would eliminate
  1102  	// qualms about what error comes first.
  1103  	if tfs.Txn.Status != roachpb.PENDING {
  1104  		return
  1105  	}
  1106  
  1107  	tc.mu.txn.Update(&tfs.Txn)
  1108  	for _, reqInt := range tc.interceptorStack {
  1109  		reqInt.importLeafFinalState(ctx, tfs)
  1110  	}
  1111  }
  1112  
  1113  // TestingCloneTxn is part of the client.TxnSender interface.
  1114  // This is for use by tests only. To derive leaf TxnCoordSenders,
  1115  // use GetLeafTxnInitialState instead.
  1116  func (tc *TxnCoordSender) TestingCloneTxn() *roachpb.Transaction {
  1117  	tc.mu.Lock()
  1118  	defer tc.mu.Unlock()
  1119  	return tc.mu.txn.Clone()
  1120  }
  1121  
  1122  // PrepareRetryableError is part of the client.TxnSender interface.
  1123  func (tc *TxnCoordSender) PrepareRetryableError(ctx context.Context, msg string) error {
  1124  	tc.mu.Lock()
  1125  	defer tc.mu.Unlock()
  1126  	return roachpb.NewTransactionRetryWithProtoRefreshError(
  1127  		msg, tc.mu.txn.ID, tc.mu.txn)
  1128  }
  1129  
  1130  // Step is part of the TxnSender interface.
  1131  func (tc *TxnCoordSender) Step(ctx context.Context) error {
  1132  	if tc.typ != kv.RootTxn {
  1133  		return errors.WithContextTags(
  1134  			errors.AssertionFailedf("cannot call Step() in leaf txn"), ctx)
  1135  	}
  1136  	tc.mu.Lock()
  1137  	defer tc.mu.Unlock()
  1138  	return tc.interceptorAlloc.txnSeqNumAllocator.stepLocked(ctx)
  1139  }
  1140  
  1141  // ConfigureStepping is part of the TxnSender interface.
  1142  func (tc *TxnCoordSender) ConfigureStepping(
  1143  	ctx context.Context, mode kv.SteppingMode,
  1144  ) (prevMode kv.SteppingMode) {
  1145  	if tc.typ != kv.RootTxn {
  1146  		panic(errors.WithContextTags(
  1147  			errors.AssertionFailedf("cannot call ConfigureStepping() in leaf txn"), ctx))
  1148  	}
  1149  	tc.mu.Lock()
  1150  	defer tc.mu.Unlock()
  1151  	return tc.interceptorAlloc.txnSeqNumAllocator.configureSteppingLocked(mode)
  1152  }
  1153  
  1154  // GetSteppingMode is part of the TxnSender interface.
  1155  func (tc *TxnCoordSender) GetSteppingMode(ctx context.Context) (curMode kv.SteppingMode) {
  1156  	curMode = kv.SteppingDisabled
  1157  	if tc.interceptorAlloc.txnSeqNumAllocator.steppingModeEnabled {
  1158  		curMode = kv.SteppingEnabled
  1159  	}
  1160  	return curMode
  1161  }