github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/kv/kvserver/replica_evaluate.go

github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/kv/kvserver/replica_evaluate.go (about)

     1  // Copyright 2019 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package kvserver
    12  
    13  import (
    14  	"bytes"
    15  	"context"
    16  
    17  	"github.com/cockroachdb/cockroach/pkg/kv/kvserver/batcheval"
    18  	"github.com/cockroachdb/cockroach/pkg/kv/kvserver/batcheval/result"
    19  	"github.com/cockroachdb/cockroach/pkg/kv/kvserver/kvserverbase"
    20  	"github.com/cockroachdb/cockroach/pkg/kv/kvserver/spanset"
    21  	"github.com/cockroachdb/cockroach/pkg/roachpb"
    22  	"github.com/cockroachdb/cockroach/pkg/storage"
    23  	"github.com/cockroachdb/cockroach/pkg/storage/enginepb"
    24  	"github.com/cockroachdb/cockroach/pkg/util/errorutil"
    25  	"github.com/cockroachdb/cockroach/pkg/util/hlc"
    26  	"github.com/cockroachdb/cockroach/pkg/util/log"
    27  	"github.com/cockroachdb/errors"
    28  	"github.com/kr/pretty"
    29  	"golang.org/x/time/rate"
    30  )
    31  
    32  var sentryIssue46720Limiter = rate.NewLimiter(0.1, 1) // 1 every 10s
    33  
    34  // optimizePuts searches for contiguous runs of Put & CPut commands in
    35  // the supplied request union. Any run which exceeds a minimum length
    36  // threshold employs a full order iterator to determine whether the
    37  // range of keys being written is empty. If so, then the run can be
    38  // set to put "blindly", meaning no iterator need be used to read
    39  // existing values during the MVCC write.
    40  // The caller should use the returned slice (which is either equal to
    41  // the input slice, or has been shallow-copied appropriately to avoid
    42  // mutating the original requests).
    43  func optimizePuts(
    44  	reader storage.Reader, origReqs []roachpb.RequestUnion, distinctSpans bool,
    45  ) []roachpb.RequestUnion {
    46  	var minKey, maxKey roachpb.Key
    47  	var unique map[string]struct{}
    48  	if !distinctSpans {
    49  		unique = make(map[string]struct{}, len(origReqs))
    50  	}
    51  	// Returns false on occurrence of a duplicate key.
    52  	maybeAddPut := func(key roachpb.Key) bool {
    53  		// Note that casting the byte slice key to a string does not allocate.
    54  		if unique != nil {
    55  			if _, ok := unique[string(key)]; ok {
    56  				return false
    57  			}
    58  			unique[string(key)] = struct{}{}
    59  		}
    60  		if minKey == nil || bytes.Compare(key, minKey) < 0 {
    61  			minKey = key
    62  		}
    63  		if maxKey == nil || bytes.Compare(key, maxKey) > 0 {
    64  			maxKey = key
    65  		}
    66  		return true
    67  	}
    68  
    69  	firstUnoptimizedIndex := len(origReqs)
    70  	for i, r := range origReqs {
    71  		switch t := r.GetInner().(type) {
    72  		case *roachpb.PutRequest:
    73  			if maybeAddPut(t.Key) {
    74  				continue
    75  			}
    76  		case *roachpb.ConditionalPutRequest:
    77  			if maybeAddPut(t.Key) {
    78  				continue
    79  			}
    80  		case *roachpb.InitPutRequest:
    81  			if maybeAddPut(t.Key) {
    82  				continue
    83  			}
    84  		}
    85  		firstUnoptimizedIndex = i
    86  		break
    87  	}
    88  
    89  	if firstUnoptimizedIndex < optimizePutThreshold { // don't bother if below this threshold
    90  		return origReqs
    91  	}
    92  	iter := reader.NewIterator(storage.IterOptions{
    93  		// We want to include maxKey in our scan. Since UpperBound is exclusive, we
    94  		// need to set it to the key after maxKey.
    95  		UpperBound: maxKey.Next(),
    96  	})
    97  	defer iter.Close()
    98  
    99  	// If there are enough puts in the run to justify calling seek,
   100  	// we can determine whether any part of the range being written
   101  	// is "virgin" and set the puts to write blindly.
   102  	// Find the first non-empty key in the run.
   103  	iter.SeekGE(storage.MakeMVCCMetadataKey(minKey))
   104  	var iterKey roachpb.Key
   105  	if ok, err := iter.Valid(); err != nil {
   106  		// TODO(bdarnell): return an error here instead of silently
   107  		// running without the optimization?
   108  		log.Errorf(context.TODO(), "Seek returned error; disabling blind-put optimization: %+v", err)
   109  		return origReqs
   110  	} else if ok && bytes.Compare(iter.Key().Key, maxKey) <= 0 {
   111  		iterKey = iter.Key().Key
   112  	}
   113  	// Set the prefix of the run which is being written to virgin
   114  	// keyspace to "blindly" put values.
   115  	reqs := append([]roachpb.RequestUnion(nil), origReqs...)
   116  	for i := range reqs[:firstUnoptimizedIndex] {
   117  		inner := reqs[i].GetInner()
   118  		if iterKey == nil || bytes.Compare(iterKey, inner.Header().Key) > 0 {
   119  			switch t := inner.(type) {
   120  			case *roachpb.PutRequest:
   121  				shallow := *t
   122  				shallow.Blind = true
   123  				reqs[i].MustSetInner(&shallow)
   124  			case *roachpb.ConditionalPutRequest:
   125  				shallow := *t
   126  				shallow.Blind = true
   127  				reqs[i].MustSetInner(&shallow)
   128  			case *roachpb.InitPutRequest:
   129  				shallow := *t
   130  				shallow.Blind = true
   131  				reqs[i].MustSetInner(&shallow)
   132  			default:
   133  				log.Fatalf(context.TODO(), "unexpected non-put request: %s", t)
   134  			}
   135  		}
   136  	}
   137  	return reqs
   138  }
   139  
   140  // evaluateBatch evaluates a batch request by splitting it up into its
   141  // individual commands, passing them to evaluateCommand, and combining
   142  // the results.
   143  func evaluateBatch(
   144  	ctx context.Context,
   145  	idKey kvserverbase.CmdIDKey,
   146  	readWriter storage.ReadWriter,
   147  	rec batcheval.EvalContext,
   148  	ms *enginepb.MVCCStats,
   149  	ba *roachpb.BatchRequest,
   150  	readOnly bool,
   151  ) (_ *roachpb.BatchResponse, _ result.Result, retErr *roachpb.Error) {
   152  
   153  	defer func() {
   154  		// Ensure that errors don't carry the WriteTooOld flag set. The client
   155  		// handles non-error responses with the WriteTooOld flag set, and errors
   156  		// with this flag set confuse it.
   157  		if retErr != nil && retErr.GetTxn() != nil {
   158  			retErr.GetTxn().WriteTooOld = false
   159  		}
   160  	}()
   161  
   162  	// NB: Don't mutate BatchRequest directly.
   163  	baReqs := ba.Requests
   164  	baHeader := ba.Header
   165  	br := ba.CreateReply()
   166  
   167  	// Optimize any contiguous sequences of put and conditional put ops.
   168  	if len(baReqs) >= optimizePutThreshold && !readOnly {
   169  		baReqs = optimizePuts(readWriter, baReqs, baHeader.DistinctSpans)
   170  	}
   171  
   172  	// Create a clone of the transaction to store the new txn state produced on
   173  	// the return/error path.
   174  	if baHeader.Txn != nil {
   175  		baHeader.Txn = baHeader.Txn.Clone()
   176  
   177  		// Check whether this transaction has been aborted, if applicable. This
   178  		// applies to reads and writes once a transaction that has begun to
   179  		// acquire locks (see #2231 for more about why we check for aborted
   180  		// transactions on reads). Note that 1PC transactions have had their
   181  		// transaction field cleared by this point so we do not execute this
   182  		// check in that case.
   183  		if baHeader.Txn.IsLocking() {
   184  			// We don't check the abort span for a couple of special requests:
   185  			// - if the request is asking to abort the transaction, then don't check the
   186  			// AbortSpan; we don't want the request to be rejected if the transaction
   187  			// has already been aborted.
   188  			// - heartbeats don't check the abort span. If the txn is aborted, they'll
   189  			// return an aborted proto in their otherwise successful response.
   190  			// TODO(nvanbenschoten): Let's remove heartbeats from this whitelist when
   191  			// we rationalize the TODO in txnHeartbeater.heartbeat.
   192  			if !ba.IsSingleAbortTxnRequest() && !ba.IsSingleHeartbeatTxnRequest() {
   193  				if pErr := checkIfTxnAborted(ctx, rec, readWriter, *baHeader.Txn); pErr != nil {
   194  					return nil, result.Result{}, pErr
   195  				}
   196  			}
   197  		}
   198  	}
   199  
   200  	var mergedResult result.Result
   201  
   202  	// WriteTooOldErrors have particular handling. When a request encounters the
   203  	// error, we'd like to lay down an intent in order to avoid writers being
   204  	// starved. So, for blind writes, we swallow the error and instead we set the
   205  	// WriteTooOld flag on the response. For non-blind writes (e.g. CPut), we
   206  	// can't do that and so we just return the WriteTooOldError - see note on
   207  	// IsRead() stanza below. Upon receiving either a WriteTooOldError or a
   208  	// response with the WriteTooOld flag set, the client will attempt to bump
   209  	// the txn's read timestamp through a refresh. If successful, the client
   210  	// will retry this batch (in both cases).
   211  	//
   212  	// In any case, evaluation of the current batch always continue after a
   213  	// WriteTooOldError in order to find out if there's more conflicts and chose
   214  	// a final write timestamp.
   215  	var writeTooOldState struct {
   216  		err *roachpb.WriteTooOldError
   217  		// cantDeferWTOE is set when a WriteTooOldError cannot be deferred past the
   218  		// end of the current batch.
   219  		cantDeferWTOE bool
   220  	}
   221  
   222  	for index, union := range baReqs {
   223  		// Execute the command.
   224  		args := union.GetInner()
   225  
   226  		if baHeader.Txn != nil {
   227  			// Set the Request's sequence number on the TxnMeta for this
   228  			// request. The MVCC layer (currently) uses TxnMeta to
   229  			// pass input arguments, such as the seqnum at which a
   230  			// request operates.
   231  			baHeader.Txn.Sequence = args.Header().Sequence
   232  		}
   233  
   234  		// Note that responses are populated even when an error is returned.
   235  		// TODO(tschottdorf): Change that. IIRC there is nontrivial use of it currently.
   236  		reply := br.Responses[index].GetInner()
   237  
   238  		var curResult result.Result
   239  		var pErr *roachpb.Error
   240  
   241  		curResult, pErr = evaluateCommand(
   242  			ctx, idKey, index, readWriter, rec, ms, baHeader, args, reply)
   243  
   244  		// If an EndTxn wants to restart because of a write too old, we
   245  		// might have a better error to return to the client.
   246  		retErr, ok := pErr.GetDetail().(*roachpb.TransactionRetryError)
   247  		if ok && retErr.Reason == roachpb.RETRY_WRITE_TOO_OLD &&
   248  			args.Method() == roachpb.EndTxn && writeTooOldState.err != nil {
   249  			pErr.SetDetail(writeTooOldState.err)
   250  			// Don't defer this error. We could perhaps rely on the client observing
   251  			// the WriteTooOld flag and retry the batch, but we choose not too.
   252  			writeTooOldState.cantDeferWTOE = true
   253  		}
   254  
   255  		if err := mergedResult.MergeAndDestroy(curResult); err != nil {
   256  			// TODO(tschottdorf): see whether we really need to pass nontrivial
   257  			// Result up on error and if so, formalize that.
   258  			log.Fatalf(
   259  				ctx,
   260  				"unable to absorb Result: %s\ndiff(new, old): %s",
   261  				err, pretty.Diff(curResult, mergedResult),
   262  			)
   263  		}
   264  
   265  		if pErr != nil {
   266  			// Initialize the error index.
   267  			pErr.SetErrorIndex(int32(index))
   268  
   269  			switch tErr := pErr.GetDetail().(type) {
   270  			case *roachpb.WriteTooOldError:
   271  				// We got a WriteTooOldError. We continue on to run all
   272  				// commands in the batch in order to determine the highest
   273  				// timestamp for more efficient retries. If the batch is
   274  				// transactional, we continue to lay down intents so that
   275  				// other concurrent overlapping transactions are forced
   276  				// through intent resolution and the chances of this batch
   277  				// succeeding when it will be retried are increased.
   278  				if writeTooOldState.err != nil {
   279  					writeTooOldState.err.ActualTimestamp.Forward(
   280  						tErr.ActualTimestamp)
   281  				} else {
   282  					writeTooOldState.err = tErr
   283  				}
   284  
   285  				// For read-write requests that observe key-value state, we don't have
   286  				// the option of leaving an intent behind when they encounter a
   287  				// WriteTooOldError, so we have to return an error instead of a response
   288  				// with the WriteTooOld flag set (which would also leave intents
   289  				// behind). These requests need to be re-evaluated at the bumped
   290  				// timestamp in order for their results to be valid. The current
   291  				// evaluation resulted in an result that could well be different from
   292  				// what the request would return if it were evaluated at the bumped
   293  				// timestamp, which would cause the request to be rejected if it were
   294  				// sent again with the same sequence number after a refresh.
   295  				//
   296  				// Similarly, for read-only requests that encounter a WriteTooOldError,
   297  				// we don't have the option of returning a response with the WriteTooOld
   298  				// flag set because a response is not even generated in tandem with the
   299  				// WriteTooOldError. We could fix this and then allow WriteTooOldErrors
   300  				// to be deferred in these cases, but doing so would buy more into the
   301  				// extremely error-prone approach of retuning responses and errors
   302  				// together throughout the MVCC read path. Doing so is not desirable as
   303  				// it has repeatedly caused bugs in the past. Instead, we'd like to get
   304  				// rid of this pattern entirely and instead address the TODO below.
   305  				//
   306  				// TODO(andrei): What we really want to do here is either speculatively
   307  				// evaluate the request at the bumped timestamp and return that
   308  				// speculative result, or leave behind an unreplicated lock that won't
   309  				// prevent the request for evaluating again at the same sequence number
   310  				// but at a bumped timestamp.
   311  				if !roachpb.IsBlindWrite(args) {
   312  					writeTooOldState.cantDeferWTOE = true
   313  				}
   314  
   315  				if baHeader.Txn != nil {
   316  					log.VEventf(ctx, 2, "setting WriteTooOld because of key: %s. wts: %s -> %s",
   317  						args.Header().Key, baHeader.Txn.WriteTimestamp, tErr.ActualTimestamp)
   318  					baHeader.Txn.WriteTimestamp.Forward(tErr.ActualTimestamp)
   319  					baHeader.Txn.WriteTooOld = true
   320  				} else {
   321  					// For non-transactional requests, there's nowhere to defer the error
   322  					// to. And the request has to fail because non-transactional batches
   323  					// should read and write at the same timestamp.
   324  					writeTooOldState.cantDeferWTOE = true
   325  				}
   326  
   327  				// Clear pErr; we're done processing the WTOE for now and we'll return
   328  				// to considering it below after we've evaluated all requests.
   329  				pErr = nil
   330  			default:
   331  				return nil, mergedResult, pErr
   332  			}
   333  		}
   334  
   335  		// If the last request was carried out with a limit, subtract the number
   336  		// of results from the limit going forward. Exhausting the limit results
   337  		// in a limit of -1. This makes sure that we still execute the rest of
   338  		// the batch, but with limit-aware operations returning no data.
   339  		if limit, retResults := baHeader.MaxSpanRequestKeys, reply.Header().NumKeys; limit > 0 {
   340  			if retResults > limit {
   341  				index, retResults, limit := index, retResults, limit // don't alloc unless branch taken
   342  				err := errorutil.UnexpectedWithIssueErrorf(46652,
   343  					"received %d results, limit was %d (original limit: %d, batch=%s idx=%d)",
   344  					errors.Safe(retResults), errors.Safe(limit),
   345  					errors.Safe(ba.Header.MaxSpanRequestKeys),
   346  					errors.Safe(ba.Summary()), errors.Safe(index))
   347  				if sentryIssue46720Limiter.Allow() {
   348  					log.Errorf(ctx, "%v", err)
   349  					errorutil.SendReport(ctx, &rec.ClusterSettings().SV, err)
   350  				}
   351  				return nil, mergedResult, roachpb.NewError(err)
   352  			} else if retResults < limit {
   353  				baHeader.MaxSpanRequestKeys -= retResults
   354  			} else {
   355  				// They were equal, so drop to -1 instead of zero (which would
   356  				// mean "no limit").
   357  				baHeader.MaxSpanRequestKeys = -1
   358  			}
   359  		} else if limit < 0 {
   360  			if retResults > 0 {
   361  				index, retResults := index, retResults // don't alloc unless branch taken
   362  				log.Fatalf(ctx,
   363  					"received %d results, limit was exhausted (original limit: %d, batch=%s idx=%d)",
   364  					errors.Safe(retResults), errors.Safe(ba.Header.MaxSpanRequestKeys),
   365  					errors.Safe(ba.Summary()), errors.Safe(index))
   366  			}
   367  		}
   368  		// Same as for MaxSpanRequestKeys above, keep track of the limit and
   369  		// make sure to fall through to -1 instead of hitting zero (which
   370  		// means no limit).
   371  		if baHeader.TargetBytes > 0 {
   372  			retBytes := reply.Header().NumBytes
   373  			if baHeader.TargetBytes > retBytes {
   374  				baHeader.TargetBytes -= retBytes
   375  			} else {
   376  				baHeader.TargetBytes = -1
   377  			}
   378  		}
   379  
   380  		// If transactional, we use ba.Txn for each individual command and
   381  		// accumulate updates to it. Once accumulated, we then remove the Txn
   382  		// from each individual response.
   383  		// TODO(spencer,tschottdorf): need copy-on-write behavior for the
   384  		//   updated batch transaction / timestamp.
   385  		if baHeader.Txn != nil {
   386  			if header := reply.Header(); header.Txn != nil {
   387  				baHeader.Txn.Update(header.Txn)
   388  				header.Txn = nil
   389  				reply.SetHeader(header)
   390  			}
   391  		}
   392  	}
   393  
   394  	if writeTooOldState.err != nil {
   395  		if baHeader.Txn != nil && baHeader.Txn.Status.IsCommittedOrStaging() {
   396  			log.Fatalf(ctx, "committed txn with writeTooOld err: %s", writeTooOldState.err)
   397  		}
   398  	}
   399  
   400  	// If there's a write too old error that we can't defer, return it.
   401  	if writeTooOldState.cantDeferWTOE {
   402  		return nil, mergedResult, roachpb.NewErrorWithTxn(writeTooOldState.err, baHeader.Txn)
   403  	}
   404  
   405  	// Update the batch response timestamp field to the timestamp at which the
   406  	// batch's reads were evaluated.
   407  	if baHeader.Txn != nil {
   408  		// If transactional, send out the final transaction entry with the reply.
   409  		br.Txn = baHeader.Txn
   410  		// Note that br.Txn.ReadTimestamp might be higher than baHeader.Timestamp if
   411  		// we had an EndTxn that decided that it can refresh to something higher
   412  		// than baHeader.Timestamp because there were no refresh spans.
   413  		if br.Txn.ReadTimestamp.Less(baHeader.Timestamp) {
   414  			log.Fatalf(ctx, "br.Txn.ReadTimestamp < ba.Timestamp (%s < %s). ba: %s",
   415  				br.Txn.ReadTimestamp, baHeader.Timestamp, ba)
   416  		}
   417  		br.Timestamp = br.Txn.ReadTimestamp
   418  	} else {
   419  		br.Timestamp = baHeader.Timestamp
   420  	}
   421  
   422  	return br, mergedResult, nil
   423  }
   424  
   425  // evaluateCommand delegates to the eval method for the given
   426  // roachpb.Request. The returned Result may be partially valid
   427  // even if an error is returned. maxKeys is the number of scan results
   428  // remaining for this batch (MaxInt64 for no limit).
   429  func evaluateCommand(
   430  	ctx context.Context,
   431  	raftCmdID kvserverbase.CmdIDKey,
   432  	index int,
   433  	readWriter storage.ReadWriter,
   434  	rec batcheval.EvalContext,
   435  	ms *enginepb.MVCCStats,
   436  	h roachpb.Header,
   437  	args roachpb.Request,
   438  	reply roachpb.Response,
   439  ) (result.Result, *roachpb.Error) {
   440  	// If a unittest filter was installed, check for an injected error; otherwise, continue.
   441  	if filter := rec.EvalKnobs().TestingEvalFilter; filter != nil {
   442  		filterArgs := kvserverbase.FilterArgs{
   443  			Ctx:   ctx,
   444  			CmdID: raftCmdID,
   445  			Index: index,
   446  			Sid:   rec.StoreID(),
   447  			Req:   args,
   448  			Hdr:   h,
   449  		}
   450  		if pErr := filter(filterArgs); pErr != nil {
   451  			if pErr.GetTxn() == nil {
   452  				pErr.SetTxn(h.Txn)
   453  			}
   454  			log.Infof(ctx, "test injecting error: %s", pErr)
   455  			return result.Result{}, pErr
   456  		}
   457  	}
   458  
   459  	var err error
   460  	var pd result.Result
   461  
   462  	if cmd, ok := batcheval.LookupCommand(args.Method()); ok {
   463  		cArgs := batcheval.CommandArgs{
   464  			EvalCtx: rec,
   465  			Header:  h,
   466  			Args:    args,
   467  			Stats:   ms,
   468  		}
   469  
   470  		if cmd.EvalRW != nil {
   471  			pd, err = cmd.EvalRW(ctx, readWriter, cArgs, reply)
   472  		} else {
   473  			pd, err = cmd.EvalRO(ctx, readWriter, cArgs, reply)
   474  		}
   475  	} else {
   476  		err = errors.Errorf("unrecognized command %s", args.Method())
   477  	}
   478  
   479  	if h.ReturnRangeInfo {
   480  		returnRangeInfo(reply, rec)
   481  	}
   482  
   483  	// TODO(peter): We'd like to assert that the hlc clock is always updated
   484  	// correctly, but various tests insert versioned data without going through
   485  	// the proper channels. See TestPushTxnUpgradeExistingTxn for an example.
   486  	//
   487  	// if header.Txn != nil && h.Timestamp.LessEq(header.Txn.Timestamp) {
   488  	// 	if now := r.store.Clock().Now(); now.Less(header.Txn.Timestamp) {
   489  	// 		log.Fatalf(ctx, "hlc clock not updated: %s < %s", now, header.Txn.Timestamp)
   490  	// 	}
   491  	// }
   492  
   493  	if log.V(2) {
   494  		log.Infof(ctx, "evaluated %s command %+v: %+v, err=%v", args.Method(), args, reply, err)
   495  	}
   496  
   497  	// Create a roachpb.Error by initializing txn from the request/response header.
   498  	var pErr *roachpb.Error
   499  	if err != nil {
   500  		txn := reply.Header().Txn
   501  		if txn == nil {
   502  			txn = h.Txn
   503  		}
   504  		pErr = roachpb.NewErrorWithTxn(err, txn)
   505  	}
   506  
   507  	return pd, pErr
   508  }
   509  
   510  // returnRangeInfo populates RangeInfos in the response if the batch
   511  // requested them.
   512  func returnRangeInfo(reply roachpb.Response, rec batcheval.EvalContext) {
   513  	header := reply.Header()
   514  	lease, _ := rec.GetLease()
   515  	desc := rec.Desc()
   516  	header.RangeInfos = []roachpb.RangeInfo{
   517  		{
   518  			Desc:  *desc,
   519  			Lease: lease,
   520  		},
   521  	}
   522  	reply.SetHeader(header)
   523  }
   524  
   525  // canDoServersideRetry looks at the error produced by evaluating ba (or the
   526  // WriteTooOldFlag in br.Txn if there's no error) and decides if it's possible
   527  // to retry the batch evaluation at a higher timestamp. Retrying is sometimes
   528  // possible in case of some retriable errors which ask for higher timestamps:
   529  // for transactional requests, retrying is possible if the transaction had not
   530  // performed any prior reads that need refreshing.
   531  //
   532  // deadline, if not nil, specifies the highest timestamp (exclusive) at which
   533  // the request can be evaluated. If ba is a transactional request, then dealine
   534  // cannot be specified; a transaction's deadline comes from it's EndTxn request.
   535  //
   536  // If true is returned, ba and ba.Txn will have been updated with the new
   537  // timestamp.
   538  func canDoServersideRetry(
   539  	ctx context.Context,
   540  	pErr *roachpb.Error,
   541  	ba *roachpb.BatchRequest,
   542  	br *roachpb.BatchResponse,
   543  	latchSpans *spanset.SpanSet,
   544  	deadline *hlc.Timestamp,
   545  ) bool {
   546  	if ba.Txn != nil {
   547  		if deadline != nil {
   548  			log.Fatal(ctx, "deadline passed for transactional request")
   549  		}
   550  		canFwdRTS := ba.CanForwardReadTimestamp
   551  		if etArg, ok := ba.GetArg(roachpb.EndTxn); ok {
   552  			// If the request provided an EndTxn request, also check its
   553  			// CanCommitAtHigherTimestamp flag. This ensures that we're backwards
   554  			// compatable and gives us a chance to make sure that these flags are
   555  			// in-sync until the CanCommitAtHigherTimestamp is migrated away.
   556  			et := etArg.(*roachpb.EndTxnRequest)
   557  			canFwdCTS := batcheval.CanForwardCommitTimestampWithoutRefresh(ba.Txn, et)
   558  			if canFwdRTS && !canFwdCTS {
   559  				log.Fatalf(ctx, "unexpected mismatch between Batch.CanForwardReadTimestamp "+
   560  					"(%+v) and EndTxn.CanCommitAtHigherTimestamp (%+v)", ba, et)
   561  			}
   562  			canFwdRTS = canFwdCTS
   563  			deadline = et.Deadline
   564  		}
   565  		if !canFwdRTS {
   566  			return false
   567  		}
   568  	}
   569  	var newTimestamp hlc.Timestamp
   570  
   571  	if pErr != nil {
   572  		switch tErr := pErr.GetDetail().(type) {
   573  		case *roachpb.WriteTooOldError:
   574  			// Locking scans hit WriteTooOld errors if they encounter values at
   575  			// timestamps higher than their read timestamps. The encountered
   576  			// timestamps are guaranteed to be greater than the txn's read
   577  			// timestamp, but not its write timestamp. So, when determining what
   578  			// the new timestamp should be, we make sure to not regress the
   579  			// txn's write timestamp.
   580  			newTimestamp = tErr.ActualTimestamp
   581  			if ba.Txn != nil {
   582  				newTimestamp.Forward(pErr.GetTxn().WriteTimestamp)
   583  			}
   584  		case *roachpb.TransactionRetryError:
   585  			if ba.Txn == nil {
   586  				// TODO(andrei): I don't know if TransactionRetryError is possible for
   587  				// non-transactional batches, but some tests inject them for 1PC
   588  				// transactions. I'm not sure how to deal with them, so let's not retry.
   589  				return false
   590  			}
   591  			newTimestamp = pErr.GetTxn().WriteTimestamp
   592  		default:
   593  			// TODO(andrei): Handle other retriable errors too.
   594  			return false
   595  		}
   596  	} else {
   597  		if !br.Txn.WriteTooOld {
   598  			log.Fatalf(ctx, "programming error: expected the WriteTooOld flag to be set")
   599  		}
   600  		newTimestamp = br.Txn.WriteTimestamp
   601  	}
   602  
   603  	if deadline != nil && deadline.LessEq(newTimestamp) {
   604  		return false
   605  	}
   606  	return tryBumpBatchTimestamp(ctx, ba, newTimestamp, latchSpans)
   607  }