github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/kv/kvserver/batcheval/cmd_push_txn.go (about)

     1  // Copyright 2014 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package batcheval
    12  
    13  import (
    14  	"bytes"
    15  	"context"
    16  
    17  	"github.com/cockroachdb/cockroach/pkg/keys"
    18  	"github.com/cockroachdb/cockroach/pkg/kv/kvserver/batcheval/result"
    19  	"github.com/cockroachdb/cockroach/pkg/kv/kvserver/spanset"
    20  	"github.com/cockroachdb/cockroach/pkg/kv/kvserver/txnwait"
    21  	"github.com/cockroachdb/cockroach/pkg/roachpb"
    22  	"github.com/cockroachdb/cockroach/pkg/storage"
    23  	"github.com/cockroachdb/cockroach/pkg/util/hlc"
    24  	"github.com/cockroachdb/cockroach/pkg/util/log"
    25  	"github.com/cockroachdb/errors"
    26  )
    27  
    28  func init() {
    29  	RegisterReadWriteCommand(roachpb.PushTxn, declareKeysPushTransaction, PushTxn)
    30  }
    31  
    32  func declareKeysPushTransaction(
    33  	_ *roachpb.RangeDescriptor,
    34  	header roachpb.Header,
    35  	req roachpb.Request,
    36  	latchSpans, _ *spanset.SpanSet,
    37  ) {
    38  	pr := req.(*roachpb.PushTxnRequest)
    39  	latchSpans.AddNonMVCC(spanset.SpanReadWrite, roachpb.Span{Key: keys.TransactionKey(pr.PusheeTxn.Key, pr.PusheeTxn.ID)})
    40  	latchSpans.AddNonMVCC(spanset.SpanReadWrite, roachpb.Span{Key: keys.AbortSpanKey(header.RangeID, pr.PusheeTxn.ID)})
    41  }
    42  
    43  // PushTxn resolves conflicts between concurrent txns (or between
    44  // a non-transactional reader or writer and a txn) in several ways,
    45  // depending on the statuses and priorities of the conflicting
    46  // transactions. The PushTxn operation is invoked by a "pusher"
    47  // (args.PusherTxn -- the writer trying to abort a conflicting txn
    48  // or the reader trying to push a conflicting txn's commit timestamp
    49  // forward), who attempts to resolve a conflict with a "pushee"
    50  // (args.PusheeTxn -- the pushee txn whose intent(s) caused the
    51  // conflict). A pusher is either transactional, in which case
    52  // PusherTxn is completely initialized, or not, in which case the
    53  // PusherTxn has only the priority set.
    54  //
    55  // The request arrives and immediately tries to determine the current
    56  // disposition of the pushee transaction by reading its transaction
    57  // record. If it finds one, it continues with the push. If not, it
    58  // uses knowledge from the existence of the conflicting intent to
    59  // determine the current state of the pushee. It's possible that the
    60  // transaction record is missing either because it hasn't been written
    61  // yet or because it has already been GCed after being finalized. Once
    62  // the request determines which case its in, it decides whether to
    63  // continue with the push. There are a number of different outcomes
    64  // that a push can result in, based on the state that the pushee's
    65  // transaction record is found in:
    66  //
    67  // Txn already committed/aborted: If the pushee txn is committed or
    68  // aborted return success.
    69  //
    70  // Txn record expired: If the pushee txn is pending, its last
    71  // heartbeat timestamp is observed to determine the latest client
    72  // activity. This heartbeat is forwarded by the conflicting intent's
    73  // timestamp because that timestamp also indicates definitive client
    74  // activity. This time of "last activity" is compared against the
    75  // current time to determine whether the transaction has expired.
    76  // If so, it is aborted. NOTE: the intent timestamp used is not
    77  // updated on intent pushes. This is important because it allows us
    78  // to use its timestamp as an indication of recent activity. If this
    79  // is ever changed, we don't run the risk of any correctness violations,
    80  // but we do make it possible for intent pushes to look like client
    81  // activity and extend the waiting period until a transaction is
    82  // considered expired. This waiting period is a "courtesy" - if we
    83  // simply aborted txns right away then we would see worse performance
    84  // under contention, but everything would still be correct.
    85  //
    86  // Txn record not expired: If the pushee txn is not expired, its
    87  // priority is compared against the pusher's (see CanPushWithPriority).
    88  //
    89  // Push cannot proceed: a TransactionPushError is returned.
    90  //
    91  // Push can proceed but txn record staging: if the transaction record
    92  // is STAGING then it can't be changed by a pusher without going through
    93  // the transaction recovery process. An IndeterminateCommitError is returned
    94  // to kick off recovery.
    95  //
    96  // Push can proceed: the pushee's transaction record is modified and
    97  // rewritten, based on the value of args.PushType. If args.PushType
    98  // is PUSH_ABORT, txn.Status is set to ABORTED. If args.PushType is
    99  // PUSH_TIMESTAMP, txn.Timestamp is set to just after args.PushTo.
   100  //
   101  // If the pushee is aborted, its timestamp will be forwarded to match
   102  // its last client activity timestamp (i.e. last heartbeat), if available.
   103  // This is done so that the updated timestamp populates the AbortSpan when
   104  // the pusher proceeds to resolve intents, allowing the GC queue to purge
   105  // records for which the transaction coordinator must have found out via
   106  // its heartbeats that the transaction has failed.
   107  func PushTxn(
   108  	ctx context.Context, readWriter storage.ReadWriter, cArgs CommandArgs, resp roachpb.Response,
   109  ) (result.Result, error) {
   110  	args := cArgs.Args.(*roachpb.PushTxnRequest)
   111  	h := cArgs.Header
   112  	reply := resp.(*roachpb.PushTxnResponse)
   113  
   114  	if h.Txn != nil {
   115  		return result.Result{}, ErrTransactionUnsupported
   116  	}
   117  	if h.Timestamp.Less(args.PushTo) {
   118  		// Verify that the PushTxn's timestamp is not less than the timestamp that
   119  		// the request intends to push the transaction to. Transactions should not
   120  		// be pushed into the future or their effect may not be fully reflected in
   121  		// a future leaseholder's timestamp cache. This is analogous to how reads
   122  		// should not be performed at a timestamp in the future.
   123  		return result.Result{}, errors.Errorf("request timestamp %s less than PushTo timestamp %s", h.Timestamp, args.PushTo)
   124  	}
   125  	if h.Timestamp.Less(args.PusheeTxn.WriteTimestamp) {
   126  		// This condition must hold for the timestamp cache access/update to be safe.
   127  		return result.Result{}, errors.Errorf("request timestamp %s less than pushee txn timestamp %s", h.Timestamp, args.PusheeTxn.WriteTimestamp)
   128  	}
   129  	now := cArgs.EvalCtx.Clock().Now()
   130  	if now.Less(h.Timestamp) {
   131  		// The batch's timestamp should have been used to update the clock.
   132  		return result.Result{}, errors.Errorf("request timestamp %s less than current clock time %s", h.Timestamp, now)
   133  	}
   134  	if !bytes.Equal(args.Key, args.PusheeTxn.Key) {
   135  		return result.Result{}, errors.Errorf("request key %s should match pushee txn key %s", args.Key, args.PusheeTxn.Key)
   136  	}
   137  	key := keys.TransactionKey(args.PusheeTxn.Key, args.PusheeTxn.ID)
   138  
   139  	// Fetch existing transaction; if missing, we're allowed to abort.
   140  	var existTxn roachpb.Transaction
   141  	ok, err := storage.MVCCGetProto(ctx, readWriter, key, hlc.Timestamp{}, &existTxn, storage.MVCCGetOptions{})
   142  	if err != nil {
   143  		return result.Result{}, err
   144  	} else if !ok {
   145  		log.VEventf(ctx, 2, "pushee txn record not found")
   146  		// There are three cases in which there is no transaction record:
   147  		//
   148  		// * the pushee is still active but its transaction record has not
   149  		//   been written yet. This is fairly common because transactions
   150  		//   do not eagerly write their transaction record before writing
   151  		//   intents, which another reader or writer might stumble upon and
   152  		//   be forced to push.
   153  		// * the pushee resolved its intents synchronously on successful commit;
   154  		//   in this case, the transaction record of the pushee is also removed.
   155  		//   Note that in this case, the intent which prompted this PushTxn
   156  		//   doesn't exist any more.
   157  		// * the pushee timed out or was aborted and the intent not cleaned up,
   158  		//   but the transaction record was garbage collected.
   159  		//
   160  		// To determine which case we're in, we check whether the transaction could
   161  		// ever write a transaction record. We do this by using the metadata from
   162  		// the intent and attempting to synthesize a transaction record while
   163  		// verifying that it would be possible for the transaction record to ever be
   164  		// written. If a transaction record for the transaction could be written in
   165  		// the future then we must be in the first case. If one could not be written
   166  		// then we know we're in either the second or the third case.
   167  		reply.PusheeTxn = SynthesizeTxnFromMeta(cArgs.EvalCtx, args.PusheeTxn)
   168  		if reply.PusheeTxn.Status == roachpb.ABORTED {
   169  			// If the transaction is uncommittable, we don't even need to
   170  			// persist an ABORTED transaction record, we can just consider it
   171  			// aborted. This is good because it allows us to obey the invariant
   172  			// that only the transaction's own coordinator can create its
   173  			// transaction record.
   174  			result := result.Result{}
   175  			result.Local.UpdatedTxns = []*roachpb.Transaction{&reply.PusheeTxn}
   176  			return result, nil
   177  		}
   178  	} else {
   179  		// Start with the persisted transaction record.
   180  		reply.PusheeTxn = existTxn
   181  	}
   182  
   183  	// If already committed or aborted, return success.
   184  	if reply.PusheeTxn.Status.IsFinalized() {
   185  		// Trivial noop.
   186  		return result.Result{}, nil
   187  	}
   188  
   189  	// If we're trying to move the timestamp forward, and it's already
   190  	// far enough forward, return success.
   191  	if args.PushType == roachpb.PUSH_TIMESTAMP && args.PushTo.LessEq(reply.PusheeTxn.WriteTimestamp) {
   192  		// Trivial noop.
   193  		return result.Result{}, nil
   194  	}
   195  
   196  	// The pusher might be aware of a newer version of the pushee.
   197  	increasedEpochOrTimestamp := false
   198  	if reply.PusheeTxn.WriteTimestamp.Less(args.PusheeTxn.WriteTimestamp) {
   199  		reply.PusheeTxn.WriteTimestamp = args.PusheeTxn.WriteTimestamp
   200  		increasedEpochOrTimestamp = true
   201  	}
   202  	if reply.PusheeTxn.Epoch < args.PusheeTxn.Epoch {
   203  		reply.PusheeTxn.Epoch = args.PusheeTxn.Epoch
   204  		increasedEpochOrTimestamp = true
   205  	}
   206  	reply.PusheeTxn.UpgradePriority(args.PusheeTxn.Priority)
   207  
   208  	// If the pusher is aware that the pushee's currently recorded attempt at a
   209  	// parallel commit failed, either because it found intents at a higher
   210  	// timestamp than the parallel commit attempt or because it found intents at
   211  	// a higher epoch than the parallel commit attempt, it should not consider
   212  	// the pushee to be performing a parallel commit. Its commit status is not
   213  	// indeterminate.
   214  	if increasedEpochOrTimestamp && reply.PusheeTxn.Status == roachpb.STAGING {
   215  		reply.PusheeTxn.Status = roachpb.PENDING
   216  		reply.PusheeTxn.InFlightWrites = nil
   217  	}
   218  
   219  	pushType := args.PushType
   220  	var pusherWins bool
   221  	var reason string
   222  
   223  	switch {
   224  	case txnwait.IsExpired(now, &reply.PusheeTxn):
   225  		reason = "pushee is expired"
   226  		// When cleaning up, actually clean up (as opposed to simply pushing
   227  		// the garbage in the path of future writers).
   228  		pushType = roachpb.PUSH_ABORT
   229  		pusherWins = true
   230  	case pushType == roachpb.PUSH_TOUCH:
   231  		// If just attempting to cleanup old or already-committed txns,
   232  		// pusher always fails.
   233  		pusherWins = false
   234  	case CanPushWithPriority(&args.PusherTxn, &reply.PusheeTxn):
   235  		reason = "pusher has priority"
   236  		pusherWins = true
   237  	case args.Force:
   238  		reason = "forced push"
   239  		pusherWins = true
   240  	}
   241  
   242  	if log.V(1) && reason != "" {
   243  		s := "pushed"
   244  		if !pusherWins {
   245  			s = "failed to push"
   246  		}
   247  		log.Infof(ctx, "%s %s (push type=%s) %s: %s (pushee last active: %s)",
   248  			args.PusherTxn.Short(), log.Safe(s),
   249  			log.Safe(pushType),
   250  			args.PusheeTxn.Short(),
   251  			log.Safe(reason),
   252  			reply.PusheeTxn.LastActive())
   253  	}
   254  
   255  	// If the pushed transaction is in the staging state, we can't change its
   256  	// record without first going through the transaction recovery process and
   257  	// attempting to finalize it.
   258  	recoverOnFailedPush := cArgs.EvalCtx.EvalKnobs().RecoverIndeterminateCommitsOnFailedPushes
   259  	if reply.PusheeTxn.Status == roachpb.STAGING && (pusherWins || recoverOnFailedPush) {
   260  		err := roachpb.NewIndeterminateCommitError(reply.PusheeTxn)
   261  		log.VEventf(ctx, 1, "%v", err)
   262  		return result.Result{}, err
   263  	}
   264  
   265  	if !pusherWins {
   266  		err := roachpb.NewTransactionPushError(reply.PusheeTxn)
   267  		log.VEventf(ctx, 1, "%v", err)
   268  		return result.Result{}, err
   269  	}
   270  
   271  	// Upgrade priority of pushed transaction to one less than pusher's.
   272  	reply.PusheeTxn.UpgradePriority(args.PusherTxn.Priority - 1)
   273  
   274  	// Determine what to do with the pushee, based on the push type.
   275  	switch pushType {
   276  	case roachpb.PUSH_ABORT:
   277  		// If aborting the transaction, set the new status.
   278  		reply.PusheeTxn.Status = roachpb.ABORTED
   279  		// If the transaction record was already present, forward the timestamp
   280  		// to accommodate AbortSpan GC. See method comment for details.
   281  		if ok {
   282  			reply.PusheeTxn.WriteTimestamp.Forward(reply.PusheeTxn.LastActive())
   283  		}
   284  	case roachpb.PUSH_TIMESTAMP:
   285  		// Otherwise, update timestamp to be one greater than the request's
   286  		// timestamp. This new timestamp will be use to update the read timestamp
   287  		// cache. If the transaction record was not already present then we rely on
   288  		// the timestamp cache to prevent the record from ever being written with a
   289  		// timestamp beneath this timestamp.
   290  		reply.PusheeTxn.WriteTimestamp.Forward(args.PushTo)
   291  	default:
   292  		return result.Result{}, errors.Errorf("unexpected push type: %v", pushType)
   293  	}
   294  
   295  	// If the transaction record was already present, persist the updates to it.
   296  	// If not, then we don't want to create it. This could allow for finalized
   297  	// transactions to be revived. Instead, we obey the invariant that only the
   298  	// transaction's own coordinator can issue requests that create its
   299  	// transaction record. To ensure that a timestamp push or an abort is
   300  	// respected for transactions without transaction records, we rely on markers
   301  	// in the timestamp cache.
   302  	if ok {
   303  		txnRecord := reply.PusheeTxn.AsRecord()
   304  		if err := storage.MVCCPutProto(ctx, readWriter, cArgs.Stats, key, hlc.Timestamp{}, nil, &txnRecord); err != nil {
   305  			return result.Result{}, err
   306  		}
   307  	}
   308  
   309  	result := result.Result{}
   310  	result.Local.UpdatedTxns = []*roachpb.Transaction{&reply.PusheeTxn}
   311  	return result, nil
   312  }