github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/kv/kvclient/kvcoord/dist_sender_server_test.go (about)

     1  // Copyright 2015 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package kvcoord_test
    12  
    13  import (
    14  	"bytes"
    15  	"context"
    16  	"fmt"
    17  	"regexp"
    18  	"sort"
    19  	"strings"
    20  	"sync/atomic"
    21  	"testing"
    22  	"time"
    23  
    24  	"github.com/cockroachdb/cockroach/pkg/base"
    25  	"github.com/cockroachdb/cockroach/pkg/gossip"
    26  	"github.com/cockroachdb/cockroach/pkg/keys"
    27  	"github.com/cockroachdb/cockroach/pkg/kv"
    28  	"github.com/cockroachdb/cockroach/pkg/kv/kvclient/kvcoord"
    29  	"github.com/cockroachdb/cockroach/pkg/kv/kvserver"
    30  	"github.com/cockroachdb/cockroach/pkg/kv/kvserver/kvserverbase"
    31  	"github.com/cockroachdb/cockroach/pkg/roachpb"
    32  	"github.com/cockroachdb/cockroach/pkg/rpc/nodedialer"
    33  	"github.com/cockroachdb/cockroach/pkg/server"
    34  	"github.com/cockroachdb/cockroach/pkg/settings/cluster"
    35  	"github.com/cockroachdb/cockroach/pkg/storage"
    36  	"github.com/cockroachdb/cockroach/pkg/testutils"
    37  	"github.com/cockroachdb/cockroach/pkg/testutils/kvclientutils"
    38  	"github.com/cockroachdb/cockroach/pkg/testutils/serverutils"
    39  	"github.com/cockroachdb/cockroach/pkg/util/hlc"
    40  	"github.com/cockroachdb/cockroach/pkg/util/leaktest"
    41  	"github.com/cockroachdb/cockroach/pkg/util/log"
    42  	"github.com/cockroachdb/errors"
    43  	"github.com/stretchr/testify/require"
    44  )
    45  
    46  // NOTE: these tests are in package kv_test to avoid a circular
    47  // dependency between the server and kv packages. These tests rely on
    48  // starting a TestServer, which creates a "real" node and employs a
    49  // distributed sender server-side.
    50  
    51  func strToValue(s string) *roachpb.Value {
    52  	v := roachpb.MakeValueFromBytes([]byte(s))
    53  	return &v
    54  }
    55  
    56  func startNoSplitMergeServer(t *testing.T) (serverutils.TestServerInterface, *kv.DB) {
    57  	s, _, db := serverutils.StartServer(t, base.TestServerArgs{
    58  		Knobs: base.TestingKnobs{
    59  			Store: &kvserver.StoreTestingKnobs{
    60  				DisableSplitQueue: true,
    61  				DisableMergeQueue: true,
    62  			},
    63  		},
    64  	})
    65  	return s, db
    66  }
    67  
    68  // TestRangeLookupWithOpenTransaction verifies that range lookups are
    69  // done in such a way (e.g. using inconsistent reads) that they
    70  // proceed in the event that a write intent is extant at the meta
    71  // index record being read.
    72  func TestRangeLookupWithOpenTransaction(t *testing.T) {
    73  	defer leaktest.AfterTest(t)()
    74  	s, _ := startNoSplitMergeServer(t)
    75  	defer s.Stopper().Stop(context.Background())
    76  
    77  	// Create an intent on the meta1 record by writing directly to the
    78  	// engine.
    79  	key := testutils.MakeKey(keys.Meta1Prefix, roachpb.KeyMax)
    80  	now := s.Clock().Now()
    81  	txn := roachpb.MakeTransaction("txn", roachpb.Key("foobar"), 0, now, 0)
    82  	if err := storage.MVCCPutProto(
    83  		context.Background(), s.(*server.TestServer).Engines()[0],
    84  		nil, key, now, &txn, &roachpb.RangeDescriptor{}); err != nil {
    85  		t.Fatal(err)
    86  	}
    87  
    88  	// Create a new DistSender and client.DB so that the Get below is guaranteed
    89  	// to not hit in the range descriptor cache forcing a RangeLookup operation.
    90  	ambient := log.AmbientContext{Tracer: s.ClusterSettings().Tracer}
    91  	ds := kvcoord.NewDistSender(
    92  		kvcoord.DistSenderConfig{
    93  			AmbientCtx: ambient,
    94  			Clock:      s.Clock(),
    95  			RPCContext: s.RPCContext(),
    96  			NodeDialer: nodedialer.New(s.RPCContext(), gossip.AddressResolver(s.(*server.TestServer).Gossip())),
    97  			Settings:   cluster.MakeTestingClusterSettings(),
    98  		},
    99  		s.(*server.TestServer).Gossip(),
   100  	)
   101  	tsf := kvcoord.NewTxnCoordSenderFactory(
   102  		kvcoord.TxnCoordSenderFactoryConfig{
   103  			AmbientCtx: ambient,
   104  			Clock:      s.Clock(),
   105  			Stopper:    s.Stopper(),
   106  		},
   107  		ds,
   108  	)
   109  	db := kv.NewDB(ambient, tsf, s.Clock())
   110  
   111  	// Now, with an intent pending, attempt (asynchronously) to read
   112  	// from an arbitrary key. This will cause the distributed sender to
   113  	// do a range lookup, which will encounter the intent. We're
   114  	// verifying here that the range lookup doesn't fail with a write
   115  	// intent error. If it did, it would go into a deadloop attempting
   116  	// to push the transaction, which in turn requires another range
   117  	// lookup, etc, ad nauseam.
   118  	if _, err := db.Get(context.Background(), "a"); err != nil {
   119  		t.Fatal(err)
   120  	}
   121  }
   122  
   123  // setupMultipleRanges creates a database client to the supplied test
   124  // server and splits the key range at the given keys. Returns the DB
   125  // client.
   126  func setupMultipleRanges(ctx context.Context, db *kv.DB, splitAt ...string) error {
   127  	// Split the keyspace at the given keys.
   128  	for _, key := range splitAt {
   129  		if err := db.AdminSplit(ctx, key /* spanKey */, key /* splitKey */, hlc.MaxTimestamp /* expirationTime */); err != nil {
   130  			return err
   131  		}
   132  	}
   133  	return nil
   134  }
   135  
   136  type checkResultsMode int
   137  
   138  const (
   139  	// Strict means that the expected results must be passed exactly.
   140  	Strict checkResultsMode = iota
   141  	// AcceptPrefix means that a superset of the expected results may be passed.
   142  	// The actual results for each scan must be a prefix of the passed-in values.
   143  	AcceptPrefix
   144  )
   145  
   146  type checkOptions struct {
   147  	mode     checkResultsMode
   148  	expCount int
   149  }
   150  
   151  // checks the keys returned from a Scan/ReverseScan.
   152  //
   153  // Args:
   154  // expSatisfied: A set of indexes into spans representing the scans that
   155  // 	have been completed and don't need a ResumeSpan. For these scans, having no
   156  // 	results and also no resume span is acceptable by this function.
   157  // resultsMode: Specifies how strict the result checking is supposed to be.
   158  // 	expCount
   159  // expCount: If resultsMode == AcceptPrefix, this is the total number of
   160  // 	expected results. Ignored for resultsMode == Strict.
   161  func checkSpanResults(
   162  	t *testing.T,
   163  	spans [][]string,
   164  	results []kv.Result,
   165  	expResults [][]string,
   166  	expSatisfied map[int]struct{},
   167  	opt checkOptions,
   168  ) {
   169  	t.Helper()
   170  	if len(expResults) != len(results) {
   171  		t.Fatalf("only got %d results, wanted %d", len(expResults), len(results))
   172  	}
   173  	// Ensure all the keys returned align properly with what is expected.
   174  	count := 0
   175  	for i, res := range results {
   176  		count += len(res.Rows)
   177  		if opt.mode == Strict {
   178  			if len(res.Rows) != len(expResults[i]) {
   179  				t.Fatalf("scan %d (%s): expected %d rows, got %d (%s)",
   180  					i, spans[i], len(expResults[i]), len(res.Rows), res)
   181  			}
   182  		}
   183  		for j, kv := range res.Rows {
   184  			if key, expKey := string(kv.Key), expResults[i][j]; key != expKey {
   185  				t.Fatalf("scan %d (%s) expected result %d to be %q; got %q",
   186  					i, spans[i], j, expKey, key)
   187  			}
   188  		}
   189  	}
   190  	if opt.mode == AcceptPrefix && count != opt.expCount {
   191  		// Check that the bound was respected.
   192  		t.Errorf("count = %d, expCount = %d", count, opt.expCount)
   193  	}
   194  }
   195  
   196  // checks ResumeSpan returned in a ScanResponse.
   197  func checkResumeSpanScanResults(
   198  	t *testing.T,
   199  	spans [][]string,
   200  	results []kv.Result,
   201  	expResults [][]string,
   202  	expSatisfied map[int]struct{},
   203  	opt checkOptions,
   204  ) {
   205  	t.Helper()
   206  	for i, res := range results {
   207  		rowLen := len(res.Rows)
   208  		// Check that satisfied scans don't have resume spans.
   209  		if _, satisfied := expSatisfied[i]; satisfied {
   210  			if res.ResumeSpan != nil {
   211  				t.Fatalf("satisfied scan %d (%s) has ResumeSpan: %v",
   212  					i, spans[i], res.ResumeSpan)
   213  			}
   214  			continue
   215  		}
   216  
   217  		if res.ResumeReason == roachpb.RESUME_UNKNOWN {
   218  			t.Fatalf("scan %d (%s): no resume reason. resume span: %+v",
   219  				i, spans[i], res.ResumeSpan)
   220  		}
   221  
   222  		// The scan is not expected to be satisfied, so there must be a resume span.
   223  		// The resume span should be identical to the original request if no
   224  		// results have been produced, or should continue after the last result
   225  		// otherwise.
   226  		resumeKey := string(res.ResumeSpan.Key)
   227  		if res.ResumeReason != roachpb.RESUME_KEY_LIMIT {
   228  			t.Fatalf("scan %d (%s): unexpected resume reason %s",
   229  				i, spans[i], res.ResumeReason)
   230  		}
   231  		if rowLen == 0 {
   232  			if resumeKey != spans[i][0] {
   233  				t.Fatalf("scan %d: expected resume %s, got: %s",
   234  					i, spans[i][0], resumeKey)
   235  			}
   236  		} else {
   237  			lastRes := expResults[i][rowLen-1]
   238  			if resumeKey <= lastRes {
   239  				t.Fatalf("scan %d: expected resume %s to be above last result %s",
   240  					i, resumeKey, lastRes)
   241  			}
   242  		}
   243  
   244  		// The EndKey must be untouched.
   245  		if key, expKey := string(res.ResumeSpan.EndKey), spans[i][1]; key != expKey {
   246  			t.Errorf("expected resume endkey %d to be %q; got %q", i, expKey, key)
   247  		}
   248  	}
   249  }
   250  
   251  // check ResumeSpan returned in a ReverseScanResponse.
   252  func checkResumeSpanReverseScanResults(
   253  	t *testing.T,
   254  	spans [][]string,
   255  	results []kv.Result,
   256  	expResults [][]string,
   257  	expSatisfied map[int]struct{},
   258  	opt checkOptions,
   259  ) {
   260  	t.Helper()
   261  	for i, res := range results {
   262  		rowLen := len(res.Rows)
   263  		// Check that satisfied scans don't have resume spans.
   264  		if _, satisfied := expSatisfied[i]; satisfied {
   265  			if res.ResumeSpan != nil {
   266  				t.Fatalf("satisfied scan %d has ResumeSpan: %v", i, res.ResumeSpan)
   267  			}
   268  			continue
   269  		}
   270  
   271  		// The scan is not expected to be satisfied, so there must be a resume span.
   272  		// The resume span should be identical to the original request if no
   273  		// results have been produced, or should continue after the last result
   274  		// otherwise.
   275  		resumeKey := string(res.ResumeSpan.EndKey)
   276  		if res.ResumeReason != roachpb.RESUME_KEY_LIMIT {
   277  			t.Fatalf("scan %d (%s): unexpected resume reason %s",
   278  				i, spans[i], res.ResumeReason)
   279  		}
   280  		if rowLen == 0 {
   281  			if resumeKey != spans[i][1] {
   282  				t.Fatalf("scan %d (%s) expected resume %s, got: %s",
   283  					i, spans[i], spans[i][1], resumeKey)
   284  			}
   285  		} else {
   286  			lastRes := expResults[i][rowLen-1]
   287  			if resumeKey >= lastRes {
   288  				t.Fatalf("scan %d: expected resume %s to be below last result %s",
   289  					i, resumeKey, lastRes)
   290  			}
   291  		}
   292  
   293  		// The Key must be untouched.
   294  		if key, expKey := string(res.ResumeSpan.Key), spans[i][0]; key != expKey {
   295  			t.Errorf("expected resume key %d to be %q; got %q", i, expKey, key)
   296  		}
   297  	}
   298  }
   299  
   300  // check an entire scan result including the ResumeSpan.
   301  func checkScanResults(
   302  	t *testing.T,
   303  	spans [][]string,
   304  	results []kv.Result,
   305  	expResults [][]string,
   306  	expSatisfied map[int]struct{},
   307  	opt checkOptions,
   308  ) {
   309  	checkSpanResults(t, spans, results, expResults, expSatisfied, opt)
   310  	checkResumeSpanScanResults(t, spans, results, expResults, expSatisfied, opt)
   311  }
   312  
   313  // check an entire reverse scan result including the ResumeSpan.
   314  func checkReverseScanResults(
   315  	t *testing.T,
   316  	spans [][]string,
   317  	results []kv.Result,
   318  	expResults [][]string,
   319  	expSatisfied map[int]struct{},
   320  	opt checkOptions,
   321  ) {
   322  	checkSpanResults(t, spans, results, expResults, expSatisfied, opt)
   323  	checkResumeSpanReverseScanResults(t, spans, results, expResults, expSatisfied, opt)
   324  }
   325  
   326  // Tests multiple scans, forward and reverse, across many ranges with multiple
   327  // bounds.
   328  func TestMultiRangeBoundedBatchScan(t *testing.T) {
   329  	defer leaktest.AfterTest(t)()
   330  	s, _ := startNoSplitMergeServer(t)
   331  	defer s.Stopper().Stop(context.Background())
   332  	ctx := context.Background()
   333  
   334  	db := s.DB()
   335  	splits := []string{"a", "b", "c", "d", "e", "f"}
   336  	if err := setupMultipleRanges(ctx, db, splits...); err != nil {
   337  		t.Fatal(err)
   338  	}
   339  	keys := []string{"a1", "a2", "a3", "b1", "b2", "c1", "c2", "d1", "f1", "f2", "f3"}
   340  	for _, key := range keys {
   341  		if err := db.Put(ctx, key, "value"); err != nil {
   342  			t.Fatal(err)
   343  		}
   344  	}
   345  
   346  	scans := [][]string{{"a", "c"}, {"b", "c2"}, {"c", "g"}, {"f1a", "f2a"}}
   347  	// These are the expected results if there is no bound.
   348  	expResults := [][]string{
   349  		{"a1", "a2", "a3", "b1", "b2"},
   350  		{"b1", "b2", "c1"},
   351  		{"c1", "c2", "d1", "f1", "f2", "f3"},
   352  		{"f2"},
   353  	}
   354  	var expResultsReverse [][]string
   355  	for _, res := range expResults {
   356  		var rres []string
   357  		for i := len(res) - 1; i >= 0; i-- {
   358  			rres = append(rres, res[i])
   359  		}
   360  		expResultsReverse = append(expResultsReverse, rres)
   361  	}
   362  
   363  	maxExpCount := 0
   364  	for _, res := range expResults {
   365  		maxExpCount += len(res)
   366  	}
   367  
   368  	// Compute the `bound` at which each scan is satisfied. We take advantage
   369  	// that, in this test, each scan is satisfied as soon as its last expected
   370  	// results is generated; in other words, the last result for each scan is in
   371  	// the same range as the scan's end key.
   372  	// This loopy code "simulates" the way the DistSender operates in the face of
   373  	// overlapping spans that cross ranges and have key limits: the batch run
   374  	// range by range and, within a range, scans are satisfied in the order in
   375  	// which they appear in the batch.
   376  	satisfiedBoundThreshold := make([]int, len(expResults))
   377  	satisfiedBoundThresholdReverse := make([]int, len(expResults))
   378  	remaining := make(map[int]int)
   379  	for i := range expResults {
   380  		remaining[i] = len(expResults[i])
   381  	}
   382  	const maxBound int = 20
   383  	var r int
   384  	splits = append([]string{""}, splits...)
   385  	splits = append(splits, "zzzzzz")
   386  	for s := 1; s < len(splits)-1; s++ {
   387  		firstK := sort.SearchStrings(keys, splits[s])
   388  		lastK := sort.SearchStrings(keys, splits[s+1]) - 1
   389  		for j, res := range expResults {
   390  			for _, expK := range res {
   391  				for k := firstK; k <= lastK; k++ {
   392  					if keys[k] == expK {
   393  						r++
   394  						remaining[j]--
   395  						if remaining[j] == 0 {
   396  							satisfiedBoundThreshold[j] = r
   397  						}
   398  						break
   399  					}
   400  				}
   401  			}
   402  		}
   403  	}
   404  	// Compute the thresholds for the reverse scans.
   405  	r = 0
   406  	for i := range expResults {
   407  		remaining[i] = len(expResults[i])
   408  	}
   409  	for s := len(splits) - 1; s > 0; s-- {
   410  		// The split contains keys [lastK..firstK].
   411  		firstK := sort.SearchStrings(keys, splits[s]) - 1
   412  		lastK := sort.SearchStrings(keys, splits[s-1])
   413  		for j, res := range expResultsReverse {
   414  			for expIdx := len(res) - 1; expIdx >= 0; expIdx-- {
   415  				expK := res[expIdx]
   416  				for k := firstK; k >= lastK; k-- {
   417  					if keys[k] == expK {
   418  						r++
   419  						remaining[j]--
   420  						if remaining[j] == 0 {
   421  							satisfiedBoundThresholdReverse[j] = r
   422  						}
   423  						break
   424  					}
   425  				}
   426  			}
   427  		}
   428  	}
   429  
   430  	for _, reverse := range []bool{false, true} {
   431  		for bound := 1; bound <= maxBound; bound++ {
   432  			t.Run(fmt.Sprintf("reverse=%t,bound=%d", reverse, bound), func(t *testing.T) {
   433  				b := &kv.Batch{}
   434  				b.Header.MaxSpanRequestKeys = int64(bound)
   435  
   436  				for _, span := range scans {
   437  					if !reverse {
   438  						b.Scan(span[0], span[1])
   439  					} else {
   440  						b.ReverseScan(span[0], span[1])
   441  					}
   442  				}
   443  				if err := db.Run(ctx, b); err != nil {
   444  					t.Fatal(err)
   445  				}
   446  
   447  				expCount := maxExpCount
   448  				if bound < maxExpCount {
   449  					expCount = bound
   450  				}
   451  				// Compute the satisfied scans.
   452  				expSatisfied := make(map[int]struct{})
   453  				for i := range b.Results {
   454  					var threshold int
   455  					if !reverse {
   456  						threshold = satisfiedBoundThreshold[i]
   457  					} else {
   458  						threshold = satisfiedBoundThresholdReverse[i]
   459  					}
   460  					if bound >= threshold {
   461  						expSatisfied[i] = struct{}{}
   462  					}
   463  				}
   464  				opt := checkOptions{mode: AcceptPrefix, expCount: expCount}
   465  				if !reverse {
   466  					checkScanResults(
   467  						t, scans, b.Results, expResults, expSatisfied, opt)
   468  				} else {
   469  					checkReverseScanResults(
   470  						t, scans, b.Results, expResultsReverse, expSatisfied, opt)
   471  				}
   472  
   473  				// Re-query using the resume spans that were returned; check that all
   474  				// spans are read properly.
   475  				if bound < maxExpCount {
   476  					newB := &kv.Batch{}
   477  					for _, res := range b.Results {
   478  						if res.ResumeSpan != nil {
   479  							if !reverse {
   480  								newB.Scan(res.ResumeSpan.Key, res.ResumeSpan.EndKey)
   481  							} else {
   482  								newB.ReverseScan(res.ResumeSpan.Key, res.ResumeSpan.EndKey)
   483  							}
   484  						}
   485  					}
   486  					if err := db.Run(ctx, newB); err != nil {
   487  						t.Fatal(err)
   488  					}
   489  					// Add the results to the previous results.
   490  					j := 0
   491  					for i, res := range b.Results {
   492  						if res.ResumeSpan != nil {
   493  							b.Results[i].Rows = append(b.Results[i].Rows, newB.Results[j].Rows...)
   494  							b.Results[i].ResumeSpan = newB.Results[j].ResumeSpan
   495  							j++
   496  						}
   497  					}
   498  					for i := range b.Results {
   499  						expSatisfied[i] = struct{}{}
   500  					}
   501  					// Check that the scan results contain all the expected results.
   502  					opt = checkOptions{mode: Strict}
   503  					if !reverse {
   504  						checkScanResults(
   505  							t, scans, b.Results, expResults, expSatisfied, opt)
   506  					} else {
   507  						checkReverseScanResults(
   508  							t, scans, b.Results, expResultsReverse, expSatisfied, opt)
   509  					}
   510  				}
   511  			})
   512  		}
   513  	}
   514  }
   515  
   516  // TestMultiRangeBoundedBatchScanPartialResponses runs multiple scan requests
   517  // either out-of-order or over overlapping key spans and shows how the batch
   518  // responses can contain partial responses.
   519  func TestMultiRangeBoundedBatchScanPartialResponses(t *testing.T) {
   520  	defer leaktest.AfterTest(t)()
   521  	s, _ := startNoSplitMergeServer(t)
   522  	ctx := context.Background()
   523  	defer s.Stopper().Stop(ctx)
   524  
   525  	db := s.DB()
   526  	if err := setupMultipleRanges(ctx, db, "a", "b", "c", "d", "e", "f"); err != nil {
   527  		t.Fatal(err)
   528  	}
   529  
   530  	for _, key := range []string{"a1", "a2", "a3", "b1", "b2", "b3", "c1", "c2", "c3"} {
   531  		if err := db.Put(ctx, key, "value"); err != nil {
   532  			t.Fatal(err)
   533  		}
   534  	}
   535  
   536  	for _, tc := range []struct {
   537  		name         string
   538  		bound        int64
   539  		spans        [][]string
   540  		expResults   [][]string
   541  		expSatisfied []int
   542  	}{
   543  		{
   544  			name:  "unsorted, non-overlapping, neither satisfied",
   545  			bound: 6,
   546  			spans: [][]string{
   547  				{"b1", "d"}, {"a", "b1"},
   548  			},
   549  			expResults: [][]string{
   550  				{"b1", "b2", "b3"}, {"a1", "a2", "a3"},
   551  			},
   552  		},
   553  		{
   554  			name:  "unsorted, non-overlapping, first satisfied",
   555  			bound: 6,
   556  			spans: [][]string{
   557  				{"b1", "c"}, {"a", "b1"},
   558  			},
   559  			expResults: [][]string{
   560  				{"b1", "b2", "b3"}, {"a1", "a2", "a3"},
   561  			},
   562  			expSatisfied: []int{0},
   563  		},
   564  		{
   565  			name:  "unsorted, non-overlapping, second satisfied",
   566  			bound: 6,
   567  			spans: [][]string{
   568  				{"b1", "d"}, {"a", "b"},
   569  			},
   570  			expResults: [][]string{
   571  				{"b1", "b2", "b3"}, {"a1", "a2", "a3"},
   572  			},
   573  			expSatisfied: []int{1},
   574  		},
   575  		{
   576  			name:  "unsorted, non-overlapping, both satisfied",
   577  			bound: 6,
   578  			spans: [][]string{
   579  				{"b1", "c"}, {"a", "b"},
   580  			},
   581  			expResults: [][]string{
   582  				{"b1", "b2", "b3"}, {"a1", "a2", "a3"},
   583  			},
   584  			expSatisfied: []int{0, 1},
   585  		},
   586  		{
   587  			name:  "sorted, overlapping, neither satisfied",
   588  			bound: 7,
   589  			spans: [][]string{
   590  				{"a", "d"}, {"b", "g"},
   591  			},
   592  			expResults: [][]string{
   593  				{"a1", "a2", "a3", "b1", "b2", "b3"}, {"b1"},
   594  			},
   595  		},
   596  		{
   597  			name:  "sorted, overlapping, first satisfied",
   598  			bound: 7,
   599  			spans: [][]string{
   600  				{"a", "c"}, {"b", "g"},
   601  			},
   602  			expResults: [][]string{
   603  				{"a1", "a2", "a3", "b1", "b2", "b3"}, {"b1"},
   604  			},
   605  			expSatisfied: []int{0},
   606  		},
   607  		{
   608  			name:  "sorted, overlapping, second satisfied",
   609  			bound: 9,
   610  			spans: [][]string{
   611  				{"a", "d"}, {"b", "c"},
   612  			},
   613  			expResults: [][]string{
   614  				{"a1", "a2", "a3", "b1", "b2", "b3"}, {"b1", "b2", "b3"},
   615  			},
   616  			expSatisfied: []int{1},
   617  		},
   618  		{
   619  			name:  "sorted, overlapping, both satisfied",
   620  			bound: 9,
   621  			spans: [][]string{
   622  				{"a", "c"}, {"b", "c"},
   623  			},
   624  			expResults: [][]string{
   625  				{"a1", "a2", "a3", "b1", "b2", "b3"}, {"b1", "b2", "b3"},
   626  			},
   627  			expSatisfied: []int{0, 1},
   628  		},
   629  		{
   630  			name:  "unsorted, overlapping, neither satisfied",
   631  			bound: 7,
   632  			spans: [][]string{
   633  				{"b", "g"}, {"a", "d"},
   634  			},
   635  			expResults: [][]string{
   636  				{"b1", "b2", "b3"}, {"a1", "a2", "a3", "b1"},
   637  			},
   638  		},
   639  		{
   640  			name:  "unsorted, overlapping, first satisfied",
   641  			bound: 7,
   642  			spans: [][]string{
   643  				{"b", "c"}, {"a", "d"},
   644  			},
   645  			expResults: [][]string{
   646  				{"b1", "b2", "b3"}, {"a1", "a2", "a3", "b1"},
   647  			},
   648  			expSatisfied: []int{0},
   649  		},
   650  		{
   651  			name:  "unsorted, overlapping, second satisfied",
   652  			bound: 7,
   653  			spans: [][]string{
   654  				{"b", "g"}, {"a", "b2"},
   655  			},
   656  			expResults: [][]string{
   657  				{"b1", "b2", "b3"}, {"a1", "a2", "a3", "b1"},
   658  			},
   659  			expSatisfied: []int{1},
   660  		},
   661  		{
   662  			name:  "unsorted, overlapping, both satisfied",
   663  			bound: 7,
   664  			spans: [][]string{
   665  				{"b", "c"}, {"a", "b2"},
   666  			},
   667  			expResults: [][]string{
   668  				{"b1", "b2", "b3"}, {"a1", "a2", "a3", "b1"},
   669  			},
   670  			expSatisfied: []int{0, 1},
   671  		},
   672  		{
   673  			name:  "unsorted, overlapping, unreached",
   674  			bound: 7,
   675  			spans: [][]string{
   676  				{"b", "g"}, {"c", "f"}, {"a", "d"},
   677  			},
   678  			expResults: [][]string{
   679  				{"b1", "b2", "b3"}, {}, {"a1", "a2", "a3", "b1"},
   680  			},
   681  		},
   682  	} {
   683  		t.Run(tc.name, func(t *testing.T) {
   684  			b := &kv.Batch{}
   685  			b.Header.MaxSpanRequestKeys = tc.bound
   686  			for _, span := range tc.spans {
   687  				b.Scan(span[0], span[1])
   688  			}
   689  			if err := db.Run(ctx, b); err != nil {
   690  				t.Fatal(err)
   691  			}
   692  
   693  			expSatisfied := make(map[int]struct{})
   694  			for _, exp := range tc.expSatisfied {
   695  				expSatisfied[exp] = struct{}{}
   696  			}
   697  			opts := checkOptions{mode: Strict}
   698  			checkScanResults(t, tc.spans, b.Results, tc.expResults, expSatisfied, opts)
   699  		})
   700  	}
   701  }
   702  
   703  // checks the results of a DelRange.
   704  func checkDelRangeResults(
   705  	t *testing.T,
   706  	spans [][]string,
   707  	results []kv.Result,
   708  	expResults [][]string,
   709  	expSatisfied map[int]struct{},
   710  ) {
   711  	checkDelRangeSpanResults(t, results, expResults)
   712  	checkResumeSpanDelRangeResults(t, spans, results, expResults, expSatisfied)
   713  }
   714  
   715  // checks the keys returned from a DelRange.
   716  func checkDelRangeSpanResults(t *testing.T, results []kv.Result, expResults [][]string) {
   717  	t.Helper()
   718  	require.Equal(t, len(expResults), len(results))
   719  	for i, res := range results {
   720  		require.Equal(t, len(expResults[i]), len(res.Keys))
   721  		for j, key := range res.Keys {
   722  			require.Equal(t, expResults[i][j], string(key))
   723  		}
   724  	}
   725  }
   726  
   727  // checks the ResumeSpan in the DelRange results.
   728  func checkResumeSpanDelRangeResults(
   729  	t *testing.T,
   730  	spans [][]string,
   731  	results []kv.Result,
   732  	expResults [][]string,
   733  	expSatisfied map[int]struct{},
   734  ) {
   735  	t.Helper()
   736  	for i, res := range results {
   737  		keyLen := len(res.Keys)
   738  		// Check that satisfied requests don't have resume spans.
   739  		if _, satisfied := expSatisfied[i]; satisfied {
   740  			require.Nil(t, res.ResumeSpan)
   741  			continue
   742  		}
   743  
   744  		// Check ResumeSpan when request has been processed.
   745  		require.NotNil(t, res.ResumeSpan)
   746  		require.Equal(t, roachpb.RESUME_KEY_LIMIT, res.ResumeReason)
   747  
   748  		// The key can be empty once the entire span has been deleted.
   749  		if keyLen == 0 {
   750  			// The request was not processed; the resume span key >= first seen key.
   751  			require.LessOrEqual(t, spans[i][0], string(res.ResumeSpan.Key), "ResumeSpan.Key")
   752  		} else {
   753  			// The next start key is always greater than the last key seen.
   754  			lastRes := expResults[i][keyLen-1]
   755  			require.Less(t, lastRes, string(res.ResumeSpan.Key), "ResumeSpan.Key")
   756  		}
   757  		// The EndKey is untouched.
   758  		require.Equal(t, spans[i][1], string(res.ResumeSpan.EndKey), "ResumeSpan.EndKey")
   759  	}
   760  }
   761  
   762  // Tests multiple delete range requests across many ranges with multiple bounds.
   763  func TestMultiRangeBoundedBatchDelRange(t *testing.T) {
   764  	defer leaktest.AfterTest(t)()
   765  	s, _ := startNoSplitMergeServer(t)
   766  	ctx := context.Background()
   767  	defer s.Stopper().Stop(ctx)
   768  
   769  	db := s.DB()
   770  	if err := setupMultipleRanges(ctx, db, "a", "b", "c", "d", "e", "f", "g", "h"); err != nil {
   771  		t.Fatal(err)
   772  	}
   773  
   774  	expResultsWithoutBound := [][]string{
   775  		{"a1", "a2", "a3", "b1", "b2"},
   776  		{"c1", "c2", "d1"},
   777  		{"g1", "g2"},
   778  	}
   779  
   780  	for bound := 1; bound <= 20; bound++ {
   781  		t.Run(fmt.Sprintf("bound=%d", bound), func(t *testing.T) {
   782  			for _, key := range []string{"a1", "a2", "a3", "b1", "b2", "c1", "c2", "d1", "f1", "f2", "f3", "g1", "g2", "h1"} {
   783  				if err := db.Put(ctx, key, "value"); err != nil {
   784  					t.Fatal(err)
   785  				}
   786  			}
   787  
   788  			b := &kv.Batch{}
   789  			b.Header.MaxSpanRequestKeys = int64(bound)
   790  			spans := [][]string{{"a", "c"}, {"c", "e"}, {"g", "h"}}
   791  			for _, span := range spans {
   792  				b.DelRange(span[0], span[1], true /* returnKeys */)
   793  			}
   794  			if err := db.Run(ctx, b); err != nil {
   795  				t.Fatal(err)
   796  			}
   797  
   798  			require.Equal(t, len(expResultsWithoutBound), len(b.Results))
   799  
   800  			expResults := make([][]string, len(expResultsWithoutBound))
   801  			expSatisfied := make(map[int]struct{})
   802  			var count int
   803  		Loop:
   804  			for i, expRes := range expResultsWithoutBound {
   805  				for _, key := range expRes {
   806  					if count == bound {
   807  						break Loop
   808  					}
   809  					expResults[i] = append(expResults[i], key)
   810  					count++
   811  				}
   812  				// NB: only works because requests are sorted and non-overlapping.
   813  				expSatisfied[i] = struct{}{}
   814  			}
   815  
   816  			checkDelRangeResults(t, spans, b.Results, expResults, expSatisfied)
   817  		})
   818  	}
   819  }
   820  
   821  // TestMultiRangeBoundedBatchScanPartialResponses runs multiple delete range
   822  // requests either out-of-order or over overlapping key spans and shows how the
   823  // batch responses can contain partial responses.
   824  func TestMultiRangeBoundedBatchDelRangePartialResponses(t *testing.T) {
   825  	defer leaktest.AfterTest(t)()
   826  	s, _ := startNoSplitMergeServer(t)
   827  	ctx := context.Background()
   828  	defer s.Stopper().Stop(ctx)
   829  
   830  	db := s.DB()
   831  	if err := setupMultipleRanges(ctx, db, "a", "b", "c", "d", "e", "f"); err != nil {
   832  		t.Fatal(err)
   833  	}
   834  
   835  	for _, tc := range []struct {
   836  		name         string
   837  		bound        int64
   838  		spans        [][]string
   839  		expResults   [][]string
   840  		expSatisfied []int
   841  	}{
   842  		{
   843  			name:  "unsorted, non-overlapping, neither satisfied",
   844  			bound: 6,
   845  			spans: [][]string{
   846  				{"b1", "d"}, {"a", "b1"},
   847  			},
   848  			expResults: [][]string{
   849  				{"b1", "b2", "b3"}, {"a1", "a2", "a3"},
   850  			},
   851  		},
   852  		{
   853  			name:  "unsorted, non-overlapping, first satisfied",
   854  			bound: 6,
   855  			spans: [][]string{
   856  				{"b1", "c"}, {"a", "b1"},
   857  			},
   858  			expResults: [][]string{
   859  				{"b1", "b2", "b3"}, {"a1", "a2", "a3"},
   860  			},
   861  			expSatisfied: []int{0},
   862  		},
   863  		{
   864  			name:  "unsorted, non-overlapping, second satisfied",
   865  			bound: 6,
   866  			spans: [][]string{
   867  				{"b1", "d"}, {"a", "b"},
   868  			},
   869  			expResults: [][]string{
   870  				{"b1", "b2", "b3"}, {"a1", "a2", "a3"},
   871  			},
   872  			expSatisfied: []int{1},
   873  		},
   874  		{
   875  			name:  "unsorted, non-overlapping, both satisfied",
   876  			bound: 6,
   877  			spans: [][]string{
   878  				{"b1", "c"}, {"a", "b"},
   879  			},
   880  			expResults: [][]string{
   881  				{"b1", "b2", "b3"}, {"a1", "a2", "a3"},
   882  			},
   883  			expSatisfied: []int{0, 1},
   884  		},
   885  		{
   886  			// NOTE: the first request will have already deleted the keys, so
   887  			// the second request has no keys to delete.
   888  			name:  "sorted, overlapping, neither satisfied",
   889  			bound: 7,
   890  			spans: [][]string{
   891  				{"a", "d"}, {"b", "g"},
   892  			},
   893  			expResults: [][]string{
   894  				{"a1", "a2", "a3", "b1", "b2", "b3", "c1"}, {},
   895  			},
   896  		},
   897  		{
   898  			name:  "sorted, overlapping, first satisfied",
   899  			bound: 7,
   900  			spans: [][]string{
   901  				{"a", "c"}, {"b", "g"},
   902  			},
   903  			expResults: [][]string{
   904  				{"a1", "a2", "a3", "b1", "b2", "b3"}, {"c1"},
   905  			},
   906  			expSatisfied: []int{0},
   907  		},
   908  		{
   909  			name:  "sorted, overlapping, second satisfied",
   910  			bound: 7,
   911  			spans: [][]string{
   912  				{"a", "d"}, {"b", "c"},
   913  			},
   914  			expResults: [][]string{
   915  				{"a1", "a2", "a3", "b1", "b2", "b3", "c1"}, {},
   916  			},
   917  			expSatisfied: []int{1},
   918  		},
   919  		{
   920  			name:  "sorted, overlapping, both satisfied",
   921  			bound: 7,
   922  			spans: [][]string{
   923  				{"a", "c"}, {"b", "c"},
   924  			},
   925  			expResults: [][]string{
   926  				{"a1", "a2", "a3", "b1", "b2", "b3"}, {},
   927  			},
   928  			expSatisfied: []int{0, 1},
   929  		},
   930  		{
   931  			name:  "unsorted, overlapping, neither satisfied",
   932  			bound: 7,
   933  			spans: [][]string{
   934  				{"b", "g"}, {"a", "d"},
   935  			},
   936  			expResults: [][]string{
   937  				{"b1", "b2", "b3", "c1"}, {"a1", "a2", "a3"},
   938  			},
   939  		},
   940  		{
   941  			name:  "unsorted, overlapping, first satisfied",
   942  			bound: 7,
   943  			spans: [][]string{
   944  				{"b", "c"}, {"a", "d"},
   945  			},
   946  			expResults: [][]string{
   947  				{"b1", "b2", "b3"}, {"a1", "a2", "a3", "c1"},
   948  			},
   949  			expSatisfied: []int{0},
   950  		},
   951  		{
   952  			name:  "unsorted, overlapping, second satisfied",
   953  			bound: 7,
   954  			spans: [][]string{
   955  				{"b", "g"}, {"a", "b2"},
   956  			},
   957  			expResults: [][]string{
   958  				{"b1", "b2", "b3", "c1"}, {"a1", "a2", "a3"},
   959  			},
   960  			expSatisfied: []int{1},
   961  		},
   962  		{
   963  			name:  "unsorted, overlapping, both satisfied",
   964  			bound: 7,
   965  			spans: [][]string{
   966  				{"b", "c"}, {"a", "b2"},
   967  			},
   968  			expResults: [][]string{
   969  				{"b1", "b2", "b3"}, {"a1", "a2", "a3"},
   970  			},
   971  			expSatisfied: []int{0, 1},
   972  		},
   973  		{
   974  			name:  "unsorted, overlapping, unreached",
   975  			bound: 6,
   976  			spans: [][]string{
   977  				{"b", "g"}, {"c", "f"}, {"a", "d"},
   978  			},
   979  			expResults: [][]string{
   980  				{"b1", "b2", "b3"}, {}, {"a1", "a2", "a3"},
   981  			},
   982  		},
   983  	} {
   984  		t.Run(tc.name, func(t *testing.T) {
   985  			// Re-write all keys before each subtest.
   986  			for _, key := range []string{"a1", "a2", "a3", "b1", "b2", "b3", "c1", "c2", "c3", "d1", "d2", "d3"} {
   987  				if err := db.Put(ctx, key, "value"); err != nil {
   988  					t.Fatal(err)
   989  				}
   990  			}
   991  
   992  			b := &kv.Batch{}
   993  			b.Header.MaxSpanRequestKeys = tc.bound
   994  			for _, span := range tc.spans {
   995  				b.DelRange(span[0], span[1], true /* returnKeys */)
   996  			}
   997  			if err := db.Run(ctx, b); err != nil {
   998  				t.Fatal(err)
   999  			}
  1000  
  1001  			expSatisfied := make(map[int]struct{})
  1002  			for _, exp := range tc.expSatisfied {
  1003  				expSatisfied[exp] = struct{}{}
  1004  			}
  1005  			checkDelRangeResults(t, tc.spans, b.Results, tc.expResults, expSatisfied)
  1006  		})
  1007  	}
  1008  }
  1009  
  1010  // Test that a bounded range delete request that gets terminated at a range
  1011  // boundary uses the range boundary as the start key in the response ResumeSpan.
  1012  func TestMultiRangeBoundedBatchDelRangeBoundary(t *testing.T) {
  1013  	defer leaktest.AfterTest(t)()
  1014  	s, _ := startNoSplitMergeServer(t)
  1015  	ctx := context.Background()
  1016  	defer s.Stopper().Stop(ctx)
  1017  
  1018  	db := s.DB()
  1019  	if err := setupMultipleRanges(ctx, db, "a", "b"); err != nil {
  1020  		t.Fatal(err)
  1021  	}
  1022  	for _, key := range []string{"a1", "a2", "a3", "b1", "b2"} {
  1023  		if err := db.Put(ctx, key, "value"); err != nil {
  1024  			t.Fatal(err)
  1025  		}
  1026  	}
  1027  
  1028  	b := &kv.Batch{}
  1029  	b.Header.MaxSpanRequestKeys = 3
  1030  	b.DelRange("a", "c", true /* returnKeys */)
  1031  	if err := db.Run(ctx, b); err != nil {
  1032  		t.Fatal(err)
  1033  	}
  1034  	if len(b.Results) != 1 {
  1035  		t.Fatalf("%d results returned", len(b.Results))
  1036  	}
  1037  	if string(b.Results[0].ResumeSpan.Key) != "b" || string(b.Results[0].ResumeSpan.EndKey) != "c" {
  1038  		t.Fatalf("received ResumeSpan %+v", b.Results[0].ResumeSpan)
  1039  	}
  1040  
  1041  	b = &kv.Batch{}
  1042  	b.Header.MaxSpanRequestKeys = 1
  1043  	b.DelRange("b", "c", true /* returnKeys */)
  1044  	if err := db.Run(ctx, b); err != nil {
  1045  		t.Fatal(err)
  1046  	}
  1047  	if len(b.Results) != 1 {
  1048  		t.Fatalf("%d results returned", len(b.Results))
  1049  	}
  1050  	if string(b.Results[0].ResumeSpan.Key) != "b2" || string(b.Results[0].ResumeSpan.EndKey) != "c" {
  1051  		t.Fatalf("received ResumeSpan %+v", b.Results[0].ResumeSpan)
  1052  	}
  1053  }
  1054  
  1055  // TestMultiRangeEmptyAfterTruncate exercises a code path in which a
  1056  // multi-range request deals with a range without any active requests after
  1057  // truncation. In that case, the request is skipped.
  1058  func TestMultiRangeEmptyAfterTruncate(t *testing.T) {
  1059  	defer leaktest.AfterTest(t)()
  1060  	s, _ := startNoSplitMergeServer(t)
  1061  	ctx := context.Background()
  1062  	defer s.Stopper().Stop(ctx)
  1063  	db := s.DB()
  1064  	if err := setupMultipleRanges(ctx, db, "c", "d"); err != nil {
  1065  		t.Fatal(err)
  1066  	}
  1067  
  1068  	// Delete the keys within a transaction. The range [c,d) doesn't have
  1069  	// any active requests.
  1070  	if err := db.Txn(ctx, func(ctx context.Context, txn *kv.Txn) error {
  1071  		b := txn.NewBatch()
  1072  		b.DelRange("a", "b", false /* returnKeys */)
  1073  		b.DelRange("e", "f", false /* returnKeys */)
  1074  		return txn.CommitInBatch(ctx, b)
  1075  	}); err != nil {
  1076  		t.Fatalf("unexpected error on transactional DeleteRange: %s", err)
  1077  	}
  1078  }
  1079  
  1080  // TestMultiRequestBatchWithFwdAndReverseRequests are disallowed.
  1081  func TestMultiRequestBatchWithFwdAndReverseRequests(t *testing.T) {
  1082  	defer leaktest.AfterTest(t)()
  1083  	s, _ := startNoSplitMergeServer(t)
  1084  	ctx := context.Background()
  1085  	defer s.Stopper().Stop(ctx)
  1086  	db := s.DB()
  1087  	if err := setupMultipleRanges(ctx, db, "a", "b"); err != nil {
  1088  		t.Fatal(err)
  1089  	}
  1090  	b := &kv.Batch{}
  1091  	b.Header.MaxSpanRequestKeys = 100
  1092  	b.Scan("a", "b")
  1093  	b.ReverseScan("a", "b")
  1094  	if err := db.Run(ctx, b); !testutils.IsError(
  1095  		err, "batch with limit contains both forward and reverse scans",
  1096  	) {
  1097  		t.Fatal(err)
  1098  	}
  1099  }
  1100  
  1101  // TestMultiRangeScanReverseScanDeleteResolve verifies that Scan, ReverseScan,
  1102  // DeleteRange and ResolveIntentRange work across ranges.
  1103  func TestMultiRangeScanReverseScanDeleteResolve(t *testing.T) {
  1104  	defer leaktest.AfterTest(t)()
  1105  	s, _ := startNoSplitMergeServer(t)
  1106  	ctx := context.Background()
  1107  	defer s.Stopper().Stop(ctx)
  1108  	db := s.DB()
  1109  	if err := setupMultipleRanges(ctx, db, "b"); err != nil {
  1110  		t.Fatal(err)
  1111  	}
  1112  
  1113  	// Write keys before, at, and after the split key.
  1114  	for _, key := range []string{"a", "b", "c"} {
  1115  		if err := db.Put(ctx, key, "value"); err != nil {
  1116  			t.Fatal(err)
  1117  		}
  1118  	}
  1119  	// Scan to retrieve the keys just written.
  1120  	if rows, err := db.Scan(ctx, "a", "q", 0); err != nil {
  1121  		t.Fatalf("unexpected error on Scan: %s", err)
  1122  	} else if l := len(rows); l != 3 {
  1123  		t.Errorf("expected 3 rows; got %d", l)
  1124  	}
  1125  
  1126  	// Scan in reverse order to retrieve the keys just written.
  1127  	if rows, err := db.ReverseScan(ctx, "a", "q", 0); err != nil {
  1128  		t.Fatalf("unexpected error on ReverseScan: %s", err)
  1129  	} else if l := len(rows); l != 3 {
  1130  		t.Errorf("expected 3 rows; got %d", l)
  1131  	}
  1132  
  1133  	// Delete the keys within a transaction. Implicitly, the intents are
  1134  	// resolved via ResolveIntentRange upon completion.
  1135  	if err := db.Txn(ctx, func(ctx context.Context, txn *kv.Txn) error {
  1136  		b := txn.NewBatch()
  1137  		b.DelRange("a", "d", false /* returnKeys */)
  1138  		return txn.CommitInBatch(ctx, b)
  1139  	}); err != nil {
  1140  		t.Fatalf("unexpected error on transactional DeleteRange: %s", err)
  1141  	}
  1142  
  1143  	// Scan consistently to make sure the intents are gone.
  1144  	if rows, err := db.Scan(ctx, "a", "q", 0); err != nil {
  1145  		t.Fatalf("unexpected error on Scan: %s", err)
  1146  	} else if l := len(rows); l != 0 {
  1147  		t.Errorf("expected 0 rows; got %d", l)
  1148  	}
  1149  
  1150  	// ReverseScan consistently to make sure the intents are gone.
  1151  	if rows, err := db.ReverseScan(ctx, "a", "q", 0); err != nil {
  1152  		t.Fatalf("unexpected error on ReverseScan: %s", err)
  1153  	} else if l := len(rows); l != 0 {
  1154  		t.Errorf("expected 0 rows; got %d", l)
  1155  	}
  1156  }
  1157  
  1158  // TestMultiRangeScanReverseScanInconsistent verifies that a Scan/ReverseScan
  1159  // across ranges that doesn't require read consistency will set a timestamp
  1160  // using the clock local to the distributed sender.
  1161  func TestMultiRangeScanReverseScanInconsistent(t *testing.T) {
  1162  	defer leaktest.AfterTest(t)()
  1163  
  1164  	for _, rc := range []roachpb.ReadConsistencyType{
  1165  		roachpb.READ_UNCOMMITTED,
  1166  		roachpb.INCONSISTENT,
  1167  	} {
  1168  		t.Run(rc.String(), func(t *testing.T) {
  1169  			s, _ := startNoSplitMergeServer(t)
  1170  			ctx := context.Background()
  1171  			defer s.Stopper().Stop(ctx)
  1172  			db := s.DB()
  1173  			if err := setupMultipleRanges(ctx, db, "b"); err != nil {
  1174  				t.Fatal(err)
  1175  			}
  1176  
  1177  			// Write keys "a" and "b", the latter of which is the first key in the
  1178  			// second range.
  1179  			keys := [2]string{"a", "b"}
  1180  			ts := [2]hlc.Timestamp{}
  1181  			for i, key := range keys {
  1182  				b := &kv.Batch{}
  1183  				b.Put(key, "value")
  1184  				if err := db.Run(ctx, b); err != nil {
  1185  					t.Fatal(err)
  1186  				}
  1187  				ts[i] = s.Clock().Now()
  1188  				log.Infof(ctx, "%d: %s %d", i, key, ts[i])
  1189  				if i == 0 {
  1190  					testutils.SucceedsSoon(t, func() error {
  1191  						// Enforce that when we write the second key, it's written
  1192  						// with a strictly higher timestamp. We're dropping logical
  1193  						// ticks and the clock may just have been pushed into the
  1194  						// future, so that's necessary. See #3122.
  1195  						if ts[0].WallTime >= s.Clock().Now().WallTime {
  1196  							return errors.New("time stands still")
  1197  						}
  1198  						return nil
  1199  					})
  1200  				}
  1201  			}
  1202  
  1203  			// Do an inconsistent Scan/ReverseScan from a new DistSender and verify
  1204  			// it does the read at its local clock and doesn't receive an
  1205  			// OpRequiresTxnError. We set the local clock to the timestamp of
  1206  			// just above the first key to verify it's used to read only key "a".
  1207  			for i, request := range []roachpb.Request{
  1208  				roachpb.NewScan(roachpb.Key("a"), roachpb.Key("c"), false),
  1209  				roachpb.NewReverseScan(roachpb.Key("a"), roachpb.Key("c"), false),
  1210  			} {
  1211  				manual := hlc.NewManualClock(ts[0].WallTime + 1)
  1212  				clock := hlc.NewClock(manual.UnixNano, time.Nanosecond)
  1213  				ds := kvcoord.NewDistSender(
  1214  					kvcoord.DistSenderConfig{
  1215  						AmbientCtx: log.AmbientContext{Tracer: s.ClusterSettings().Tracer},
  1216  						Clock:      clock,
  1217  						RPCContext: s.RPCContext(),
  1218  						NodeDialer: nodedialer.New(s.RPCContext(), gossip.AddressResolver(s.(*server.TestServer).Gossip())),
  1219  						Settings:   cluster.MakeTestingClusterSettings(),
  1220  					},
  1221  					s.(*server.TestServer).Gossip(),
  1222  				)
  1223  
  1224  				reply, err := kv.SendWrappedWith(context.Background(), ds, roachpb.Header{
  1225  					ReadConsistency: rc,
  1226  				}, request)
  1227  				if err != nil {
  1228  					t.Fatal(err)
  1229  				}
  1230  
  1231  				var rows []roachpb.KeyValue
  1232  				switch r := reply.(type) {
  1233  				case *roachpb.ScanResponse:
  1234  					rows = r.Rows
  1235  				case *roachpb.ReverseScanResponse:
  1236  					rows = r.Rows
  1237  				default:
  1238  					t.Fatalf("unexpected response %T: %v", reply, reply)
  1239  				}
  1240  
  1241  				if l := len(rows); l != 1 {
  1242  					t.Fatalf("%d: expected 1 row; got %d\n%v", i, l, rows)
  1243  				}
  1244  				if key := string(rows[0].Key); keys[0] != key {
  1245  					t.Errorf("expected key %q; got %q", keys[0], key)
  1246  				}
  1247  			}
  1248  		})
  1249  	}
  1250  }
  1251  
  1252  // TestParallelSender splits the keyspace 10 times and verifies that a
  1253  // scan across all and 10 puts to each range both use parallelizing
  1254  // dist sender.
  1255  func TestParallelSender(t *testing.T) {
  1256  	defer leaktest.AfterTest(t)()
  1257  	s, db := startNoSplitMergeServer(t)
  1258  	defer s.Stopper().Stop(context.Background())
  1259  	ctx := context.Background()
  1260  
  1261  	// Split into multiple ranges.
  1262  	splitKeys := []string{"a", "b", "c", "d", "e", "f", "g", "h", "i", "j"}
  1263  	for _, key := range splitKeys {
  1264  		if err := db.AdminSplit(context.Background(), key, key, hlc.MaxTimestamp /* expirationTime */); err != nil {
  1265  			t.Fatal(err)
  1266  		}
  1267  	}
  1268  
  1269  	getPSCount := func() int64 {
  1270  		return s.DistSenderI().(*kvcoord.DistSender).Metrics().AsyncSentCount.Count()
  1271  	}
  1272  	psCount := getPSCount()
  1273  
  1274  	// Batch writes to each range.
  1275  	if err := db.Txn(ctx, func(ctx context.Context, txn *kv.Txn) error {
  1276  		b := txn.NewBatch()
  1277  		for _, key := range splitKeys {
  1278  			b.Put(key, "val")
  1279  		}
  1280  		return txn.CommitInBatch(ctx, b)
  1281  	}); err != nil {
  1282  		t.Errorf("unexpected error on batch put: %s", err)
  1283  	}
  1284  	newPSCount := getPSCount()
  1285  	if c := newPSCount - psCount; c < 9 {
  1286  		t.Errorf("expected at least 9 parallel sends; got %d", c)
  1287  	}
  1288  	psCount = newPSCount
  1289  
  1290  	// Scan across all rows.
  1291  	if rows, err := db.Scan(context.Background(), "a", "z", 0); err != nil {
  1292  		t.Fatalf("unexpected error on Scan: %s", err)
  1293  	} else if l := len(rows); l != len(splitKeys) {
  1294  		t.Fatalf("expected %d rows; got %d", len(splitKeys), l)
  1295  	}
  1296  	newPSCount = getPSCount()
  1297  	if c := newPSCount - psCount; c < 9 {
  1298  		t.Errorf("expected at least 9 parallel sends; got %d", c)
  1299  	}
  1300  }
  1301  
  1302  func initReverseScanTestEnv(s serverutils.TestServerInterface, t *testing.T) *kv.DB {
  1303  	db := s.DB()
  1304  
  1305  	// Set up multiple ranges:
  1306  	// ["", "b"),["b", "e") ,["e", "g") and ["g", "\xff\xff").
  1307  	for _, key := range []string{"b", "e", "g"} {
  1308  		// Split the keyspace at the given key.
  1309  		if err := db.AdminSplit(context.Background(), key, key, hlc.MaxTimestamp /* expirationTime */); err != nil {
  1310  			t.Fatal(err)
  1311  		}
  1312  	}
  1313  	// Write keys before, at, and after the split key.
  1314  	for _, key := range []string{"a", "b", "c", "d", "e", "f", "g", "h"} {
  1315  		if err := db.Put(context.Background(), key, "value"); err != nil {
  1316  			t.Fatal(err)
  1317  		}
  1318  	}
  1319  	return db
  1320  }
  1321  
  1322  // TestSingleRangeReverseScan verifies that ReverseScan gets the right results
  1323  // on a single range.
  1324  func TestSingleRangeReverseScan(t *testing.T) {
  1325  	defer leaktest.AfterTest(t)()
  1326  	s, _ := startNoSplitMergeServer(t)
  1327  	defer s.Stopper().Stop(context.Background())
  1328  	db := initReverseScanTestEnv(s, t)
  1329  	ctx := context.Background()
  1330  
  1331  	// Case 1: Request.EndKey is in the middle of the range.
  1332  	if rows, err := db.ReverseScan(ctx, "b", "d", 0); err != nil {
  1333  		t.Fatalf("unexpected error on ReverseScan: %s", err)
  1334  	} else if l := len(rows); l != 2 {
  1335  		t.Errorf("expected 2 rows; got %d", l)
  1336  	}
  1337  	if rows, err := db.ReverseScan(ctx, "b", "d", 1); err != nil {
  1338  		t.Fatalf("unexpected error on ReverseScan: %s", err)
  1339  	} else if l := len(rows); l != 1 {
  1340  		t.Errorf("expected 1 rows; got %d", l)
  1341  	}
  1342  
  1343  	// Case 2: Request.EndKey is equal to the EndKey of the range.
  1344  	if rows, pErr := db.ReverseScan(ctx, "e", "g", 0); pErr != nil {
  1345  		t.Fatalf("unexpected error on ReverseScan: %s", pErr)
  1346  	} else if l := len(rows); l != 2 {
  1347  		t.Errorf("expected 2 rows; got %d", l)
  1348  	}
  1349  	// Case 3: Test roachpb.TableDataMin. Expected to return "g" and "h".
  1350  	wanted := 2
  1351  	if rows, pErr := db.ReverseScan(ctx, "g", keys.TableDataMin, 0); pErr != nil {
  1352  		t.Fatalf("unexpected error on ReverseScan: %s", pErr)
  1353  	} else if l := len(rows); l != wanted {
  1354  		t.Errorf("expected %d rows; got %d", wanted, l)
  1355  	}
  1356  	// Case 4: Test keys.SystemMax
  1357  	// This span covers the system DB keys. Note sql.GetInitialSystemValues
  1358  	// returns one key before keys.SystemMax, but our scan is including one key
  1359  	// (\xffa) created for the test.
  1360  	if rows, pErr := db.ReverseScan(ctx, keys.SystemMax, "b", 0); pErr != nil {
  1361  		t.Fatalf("unexpected error on ReverseScan: %s", pErr)
  1362  	} else if l := len(rows); l != 1 {
  1363  		t.Errorf("expected 1 row; got %d", l)
  1364  	}
  1365  }
  1366  
  1367  // TestMultiRangeReverseScan verifies that ReverseScan gets the right results
  1368  // across multiple ranges.
  1369  func TestMultiRangeReverseScan(t *testing.T) {
  1370  	defer leaktest.AfterTest(t)()
  1371  	s, _ := startNoSplitMergeServer(t)
  1372  	defer s.Stopper().Stop(context.Background())
  1373  	db := initReverseScanTestEnv(s, t)
  1374  	ctx := context.Background()
  1375  
  1376  	// Case 1: Request.EndKey is in the middle of the range.
  1377  	if rows, pErr := db.ReverseScan(ctx, "a", "d", 0); pErr != nil {
  1378  		t.Fatalf("unexpected error on ReverseScan: %s", pErr)
  1379  	} else if l := len(rows); l != 3 {
  1380  		t.Errorf("expected 3 rows; got %d", l)
  1381  	}
  1382  	if rows, pErr := db.ReverseScan(ctx, "a", "d", 2); pErr != nil {
  1383  		t.Fatalf("unexpected error on ReverseScan: %s", pErr)
  1384  	} else if l := len(rows); l != 2 {
  1385  		t.Errorf("expected 2 rows; got %d", l)
  1386  	}
  1387  	// Case 2: Request.EndKey is equal to the EndKey of the range.
  1388  	if rows, pErr := db.ReverseScan(ctx, "d", "g", 0); pErr != nil {
  1389  		t.Fatalf("unexpected error on ReverseScan: %s", pErr)
  1390  	} else if l := len(rows); l != 3 {
  1391  		t.Errorf("expected 3 rows; got %d", l)
  1392  	}
  1393  }
  1394  
  1395  // TestBatchPutWithConcurrentSplit creates a batch with a series of put
  1396  // requests and splits the middle of the range in order to trigger
  1397  // reentrant invocation of DistSender.divideAndSendBatchToRanges. See
  1398  // #12603 for more details.
  1399  func TestBatchPutWithConcurrentSplit(t *testing.T) {
  1400  	defer leaktest.AfterTest(t)()
  1401  	s, db := startNoSplitMergeServer(t)
  1402  	defer s.Stopper().Stop(context.Background())
  1403  
  1404  	// Split first using the default client and scan to make sure that
  1405  	// the range descriptor cache reflects the split.
  1406  	for _, key := range []string{"b", "f"} {
  1407  		if err := db.AdminSplit(context.Background(), key, key, hlc.MaxTimestamp /* expirationTime */); err != nil {
  1408  			t.Fatal(err)
  1409  		}
  1410  	}
  1411  	if rows, err := db.Scan(context.Background(), "a", "z", 0); err != nil {
  1412  		t.Fatal(err)
  1413  	} else if l := len(rows); l != 0 {
  1414  		t.Fatalf("expected empty keyspace; got %d rows", l)
  1415  	}
  1416  
  1417  	// Now, split further at the given keys, but use a new dist sender so
  1418  	// we don't update the caches on the default dist sender-backed client.
  1419  	ds := kvcoord.NewDistSender(
  1420  		kvcoord.DistSenderConfig{
  1421  			AmbientCtx: log.AmbientContext{Tracer: s.ClusterSettings().Tracer},
  1422  			Clock:      s.Clock(),
  1423  			RPCContext: s.RPCContext(),
  1424  			NodeDialer: nodedialer.New(s.RPCContext(), gossip.AddressResolver(s.(*server.TestServer).Gossip())),
  1425  			Settings:   cluster.MakeTestingClusterSettings(),
  1426  		}, s.(*server.TestServer).Gossip(),
  1427  	)
  1428  	for _, key := range []string{"c"} {
  1429  		req := &roachpb.AdminSplitRequest{
  1430  			RequestHeader: roachpb.RequestHeader{
  1431  				Key: roachpb.Key(key),
  1432  			},
  1433  			SplitKey:       roachpb.Key(key),
  1434  			ExpirationTime: hlc.MaxTimestamp,
  1435  		}
  1436  		if _, err := kv.SendWrapped(context.Background(), ds, req); err != nil {
  1437  			t.Fatal(err)
  1438  		}
  1439  	}
  1440  
  1441  	// Execute a batch on the default sender. Since its cache will not
  1442  	// have been updated to reflect the new splits, it will discover
  1443  	// them partway through and need to reinvoke divideAndSendBatchToRanges.
  1444  	b := &kv.Batch{}
  1445  	for i, key := range []string{"a1", "b1", "c1", "d1", "f1"} {
  1446  		b.Put(key, fmt.Sprintf("value-%d", i))
  1447  	}
  1448  	if err := db.Run(context.Background(), b); err != nil {
  1449  		t.Fatal(err)
  1450  	}
  1451  }
  1452  
  1453  // TestReverseScanWithSplitAndMerge verifies that ReverseScan gets the right results
  1454  // across multiple ranges while range splits and merges happen.
  1455  func TestReverseScanWithSplitAndMerge(t *testing.T) {
  1456  	defer leaktest.AfterTest(t)()
  1457  	s, _ := startNoSplitMergeServer(t)
  1458  	defer s.Stopper().Stop(context.Background())
  1459  	db := initReverseScanTestEnv(s, t)
  1460  
  1461  	// Case 1: An encounter with a range split.
  1462  	// Split the range ["b", "e") at "c".
  1463  	if err := db.AdminSplit(context.Background(), "c", "c", hlc.MaxTimestamp /* expirationTime */); err != nil {
  1464  		t.Fatal(err)
  1465  	}
  1466  
  1467  	// The ReverseScan will run into a stale descriptor.
  1468  	if rows, err := db.ReverseScan(context.Background(), "a", "d", 0); err != nil {
  1469  		t.Fatalf("unexpected error on ReverseScan: %s", err)
  1470  	} else if l := len(rows); l != 3 {
  1471  		t.Errorf("expected 3 rows; got %d", l)
  1472  	}
  1473  
  1474  	// Case 2: encounter with range merge .
  1475  	// Merge the range ["e", "g") and ["g", "\xff\xff") .
  1476  	if err := db.AdminMerge(context.Background(), "e"); err != nil {
  1477  		t.Fatal(err)
  1478  	}
  1479  	if rows, err := db.ReverseScan(context.Background(), "d", "g", 0); err != nil {
  1480  		t.Fatalf("unexpected error on ReverseScan: %s", err)
  1481  	} else if l := len(rows); l != 3 {
  1482  		t.Errorf("expected 3 rows; got %d", l)
  1483  	}
  1484  }
  1485  
  1486  func TestBadRequest(t *testing.T) {
  1487  	defer leaktest.AfterTest(t)()
  1488  	t.Skip("TODO(andreimatei): This last assertion in this test was broken by #33150. " +
  1489  		"I suspect the reason is that there is no longer a single Range " +
  1490  		"that spans [KeyMin, z), so we're not hitting the error.")
  1491  	s, db := startNoSplitMergeServer(t)
  1492  	defer s.Stopper().Stop(context.Background())
  1493  	ctx := context.Background()
  1494  
  1495  	// Write key "a".
  1496  	if err := db.Put(ctx, "a", "value"); err != nil {
  1497  		t.Fatal(err)
  1498  	}
  1499  
  1500  	if _, err := db.Scan(ctx, "a", "a", 0); !testutils.IsError(err, "must be greater than start") {
  1501  		t.Fatalf("unexpected error on scan with startkey == endkey: %v", err)
  1502  	}
  1503  
  1504  	if _, err := db.ReverseScan(ctx, "a", "a", 0); !testutils.IsError(err, "must be greater than start") {
  1505  		t.Fatalf("unexpected error on reverse scan with startkey == endkey: %v", err)
  1506  	}
  1507  
  1508  	if err := db.DelRange(ctx, "x", "a"); !testutils.IsError(err, "must be greater than start") {
  1509  		t.Fatalf("unexpected error on deletion on [x, a): %v", err)
  1510  	}
  1511  
  1512  	if err := db.DelRange(ctx, "", "z"); !testutils.IsError(err, "must be greater than LocalMax") {
  1513  		t.Fatalf("unexpected error on deletion on [KeyMin, z): %v", err)
  1514  	}
  1515  }
  1516  
  1517  // TestPropagateTxnOnError verifies that DistSender.Send properly propagates the
  1518  // txn data to a next iteration. The test uses the txn.ObservedTimestamps field
  1519  // to verify that.
  1520  func TestPropagateTxnOnError(t *testing.T) {
  1521  	defer leaktest.AfterTest(t)()
  1522  
  1523  	// Inject these two observed timestamps into the parts of the batch's
  1524  	// response that does not result in an error. Even though the batch as a
  1525  	// whole results in an error, the transaction should still propagate this
  1526  	// information.
  1527  	ot1 := roachpb.ObservedTimestamp{NodeID: 7, Timestamp: hlc.Timestamp{WallTime: 15}}
  1528  	ot2 := roachpb.ObservedTimestamp{NodeID: 8, Timestamp: hlc.Timestamp{WallTime: 16}}
  1529  	containsObservedTSs := func(txn *roachpb.Transaction) bool {
  1530  		contains := func(ot roachpb.ObservedTimestamp) bool {
  1531  			for _, ts := range txn.ObservedTimestamps {
  1532  				if ts.Equal(ot) {
  1533  					return true
  1534  				}
  1535  			}
  1536  			return false
  1537  		}
  1538  		return contains(ot1) && contains(ot2)
  1539  	}
  1540  
  1541  	// Set up a filter to so that the first CPut operation will
  1542  	// get a ReadWithinUncertaintyIntervalError and so that the
  1543  	// Put operations on either side of the CPut will each return
  1544  	// with the new observed timestamp.
  1545  	keyA, keyB, keyC := roachpb.Key("a"), roachpb.Key("b"), roachpb.Key("c")
  1546  	var numCPuts int32
  1547  	var storeKnobs kvserver.StoreTestingKnobs
  1548  	storeKnobs.EvalKnobs.TestingEvalFilter =
  1549  		func(fArgs kvserverbase.FilterArgs) *roachpb.Error {
  1550  			k := fArgs.Req.Header().Key
  1551  			switch fArgs.Req.(type) {
  1552  			case *roachpb.PutRequest:
  1553  				if k.Equal(keyA) {
  1554  					fArgs.Hdr.Txn.UpdateObservedTimestamp(ot1.NodeID, ot1.Timestamp)
  1555  				} else if k.Equal(keyC) {
  1556  					fArgs.Hdr.Txn.UpdateObservedTimestamp(ot2.NodeID, ot2.Timestamp)
  1557  				}
  1558  			case *roachpb.ConditionalPutRequest:
  1559  				if k.Equal(keyB) {
  1560  					if atomic.AddInt32(&numCPuts, 1) == 1 {
  1561  						pErr := roachpb.NewReadWithinUncertaintyIntervalError(hlc.Timestamp{}, hlc.Timestamp{}, nil)
  1562  						return roachpb.NewErrorWithTxn(pErr, fArgs.Hdr.Txn)
  1563  					}
  1564  				}
  1565  			}
  1566  			return nil
  1567  		}
  1568  
  1569  	s, _, _ := serverutils.StartServer(t,
  1570  		base.TestServerArgs{Knobs: base.TestingKnobs{Store: &storeKnobs}})
  1571  	ctx := context.Background()
  1572  	defer s.Stopper().Stop(ctx)
  1573  
  1574  	db := s.DB()
  1575  	if err := setupMultipleRanges(ctx, db, "b", "c"); err != nil {
  1576  		t.Fatal(err)
  1577  	}
  1578  
  1579  	// Set the initial value on the target key "b".
  1580  	origVal := roachpb.MakeValueFromString("val")
  1581  	if err := db.Put(ctx, keyB, &origVal); err != nil {
  1582  		t.Fatal(err)
  1583  	}
  1584  	// After using origVal as an arg to CPut, we're not allowed to modify it.
  1585  	// Passing it back to CPut again (which is the whole point of keeping it
  1586  	// around) will clear and re-init the checksum, so defensively copy it before
  1587  	// we save it.
  1588  	origVal = roachpb.Value{RawBytes: append([]byte(nil), origVal.RawBytes...)}
  1589  
  1590  	// The following txn creates a batch request that is split into three
  1591  	// requests: Put, CPut, and Put. The CPut operation will get a
  1592  	// ReadWithinUncertaintyIntervalError and the txn will be retried.
  1593  	epoch := 0
  1594  	if err := db.Txn(ctx, func(ctx context.Context, txn *kv.Txn) error {
  1595  		// Observe the commit timestamp to prevent refreshes.
  1596  		_ = txn.CommitTimestamp()
  1597  
  1598  		epoch++
  1599  		proto := txn.TestingCloneTxn()
  1600  		if epoch >= 2 {
  1601  			// ObservedTimestamps must contain the timestamp returned from the
  1602  			// Put operation.
  1603  			if !containsObservedTSs(proto) {
  1604  				t.Errorf("expected observed timestamp, found: %v", proto.ObservedTimestamps)
  1605  			}
  1606  		} else {
  1607  			// ObservedTimestamps must not contain the timestamp returned from
  1608  			// the Put operation.
  1609  			if containsObservedTSs(proto) {
  1610  				t.Errorf("unexpected observed timestamp, found: %v", proto.ObservedTimestamps)
  1611  			}
  1612  		}
  1613  
  1614  		b := txn.NewBatch()
  1615  		b.Put(keyA, "val")
  1616  		b.CPut(keyB, "new_val", &origVal)
  1617  		b.Put(keyC, "val2")
  1618  		err := txn.CommitInBatch(ctx, b)
  1619  		if epoch == 1 {
  1620  			if retErr := (*roachpb.TransactionRetryWithProtoRefreshError)(nil); errors.As(err, &retErr) {
  1621  				if !testutils.IsError(retErr, "ReadWithinUncertaintyIntervalError") {
  1622  					t.Errorf("expected ReadWithinUncertaintyIntervalError, but got: %v", retErr)
  1623  				}
  1624  			} else {
  1625  				t.Errorf("expected a retryable error, but got: %v", err)
  1626  			}
  1627  		}
  1628  		return err
  1629  	}); err != nil {
  1630  		t.Errorf("unexpected error on transactional Puts: %s", err)
  1631  	}
  1632  
  1633  	if epoch != 2 {
  1634  		t.Errorf("unexpected epoch; the txn must be retried exactly once, but got %d", epoch)
  1635  	}
  1636  }
  1637  
  1638  // TestTxnStarvation pits a transaction against an adversarial
  1639  // concurrent writer which will continually cause write-too-old
  1640  // errors unless the transaction is able to lay down intents on
  1641  // retry.
  1642  func TestTxnStarvation(t *testing.T) {
  1643  	defer leaktest.AfterTest(t)()
  1644  	s, _, _ := serverutils.StartServer(t, base.TestServerArgs{})
  1645  	ctx := context.Background()
  1646  	defer s.Stopper().Stop(ctx)
  1647  
  1648  	haveWritten := make(chan struct{})
  1649  	txnDone := make(chan struct{})
  1650  	errCh := make(chan error)
  1651  
  1652  	// Busy write new values to the same key.
  1653  	go func() {
  1654  		for i := 0; ; i++ {
  1655  			if err := s.DB().Put(ctx, "key", fmt.Sprintf("%10d", i)); err != nil {
  1656  				errCh <- err
  1657  				return
  1658  			}
  1659  			// Signal after the first write.
  1660  			if i == 0 {
  1661  				close(haveWritten)
  1662  			}
  1663  			select {
  1664  			case <-txnDone:
  1665  				errCh <- nil
  1666  				return
  1667  			default:
  1668  			}
  1669  		}
  1670  	}()
  1671  
  1672  	epoch := 0
  1673  	if err := s.DB().Txn(ctx, func(ctx context.Context, txn *kv.Txn) error {
  1674  		epoch++
  1675  		<-haveWritten
  1676  		time.Sleep(1 * time.Millisecond)
  1677  		b := txn.NewBatch()
  1678  		b.Put("key", "txn-value")
  1679  		return txn.CommitInBatch(ctx, b)
  1680  	}); err != nil {
  1681  		t.Fatal(err)
  1682  	}
  1683  	close(txnDone)
  1684  
  1685  	if epoch > 2 {
  1686  		t.Fatalf("expected at most two epochs; got %d", epoch)
  1687  	}
  1688  
  1689  	if err := <-errCh; err != nil {
  1690  		t.Fatal(err)
  1691  	}
  1692  }
  1693  
  1694  // Test that, if the TxnCoordSender gets a TransactionAbortedError, it sends an
  1695  // EndTxn with Poison=true (the poisoning is so that concurrent readers don't
  1696  // miss their writes).
  1697  func TestAsyncAbortPoisons(t *testing.T) {
  1698  	defer leaktest.AfterTest(t)()
  1699  
  1700  	// Add a testing request filter which pauses a get request for the
  1701  	// key until after the signal channel is closed.
  1702  	var storeKnobs kvserver.StoreTestingKnobs
  1703  	keyA, keyB := roachpb.Key("a"), roachpb.Key("b")
  1704  	commitCh := make(chan error, 1)
  1705  	storeKnobs.TestingRequestFilter = func(_ context.Context, ba roachpb.BatchRequest) *roachpb.Error {
  1706  		for _, req := range ba.Requests {
  1707  			switch r := req.GetInner().(type) {
  1708  			case *roachpb.EndTxnRequest:
  1709  				if r.Key.Equal(keyA) {
  1710  					if r.Poison {
  1711  						close(commitCh)
  1712  					} else {
  1713  						commitCh <- fmt.Errorf("EndTxn didn't have expected Poison flag")
  1714  					}
  1715  				}
  1716  			}
  1717  		}
  1718  		return nil
  1719  	}
  1720  	s, _, _ := serverutils.StartServer(t,
  1721  		base.TestServerArgs{Knobs: base.TestingKnobs{Store: &storeKnobs}})
  1722  	ctx := context.Background()
  1723  	defer s.Stopper().Stop(ctx)
  1724  
  1725  	// Setup two userspace ranges: /Min-b, b-/Max.
  1726  	db := s.DB()
  1727  
  1728  	// Write values to key "a".
  1729  	txn := kv.NewTxn(ctx, db, 0 /* gatewayNodeID */)
  1730  	b := txn.NewBatch()
  1731  	b.Put(keyA, []byte("value"))
  1732  	if err := txn.Run(ctx, b); err != nil {
  1733  		t.Fatal(err)
  1734  	}
  1735  
  1736  	// Run a high-priority txn that will abort the previous one.
  1737  	if err := db.Txn(context.Background(), func(ctx context.Context, txn *kv.Txn) error {
  1738  		if err := txn.SetUserPriority(roachpb.MaxUserPriority); err != nil {
  1739  			return err
  1740  		}
  1741  		// Write to keyB first to locate this txn's record on a different key
  1742  		// than the initial txn's record. This allows the request filter to
  1743  		// trivially ignore this transaction.
  1744  		if err := txn.Put(ctx, keyB, []byte("value2")); err != nil {
  1745  			return err
  1746  		}
  1747  		return txn.Put(ctx, keyA, []byte("value2"))
  1748  	}); err != nil {
  1749  		t.Fatal(err)
  1750  	}
  1751  
  1752  	expErr := regexp.QuoteMeta("TransactionAbortedError(ABORT_REASON_ABORT_SPAN)")
  1753  	if _, err := txn.Get(ctx, keyA); !testutils.IsError(err, expErr) {
  1754  		t.Fatalf("expected %s, got: %v", expErr, err)
  1755  	}
  1756  	if err := <-commitCh; err != nil {
  1757  		t.Fatal(err)
  1758  	}
  1759  }
  1760  
  1761  // TestTxnCoordSenderRetries verifies that the txn coord sender
  1762  // can automatically retry transactions in many different cases,
  1763  // but still fail in others, depending on different conditions.
  1764  func TestTxnCoordSenderRetries(t *testing.T) {
  1765  	defer leaktest.AfterTest(t)()
  1766  
  1767  	var filterFn atomic.Value
  1768  	var storeKnobs kvserver.StoreTestingKnobs
  1769  	storeKnobs.EvalKnobs.TestingEvalFilter =
  1770  		func(fArgs kvserverbase.FilterArgs) *roachpb.Error {
  1771  			fnVal := filterFn.Load()
  1772  			if fn, ok := fnVal.(func(kvserverbase.FilterArgs) *roachpb.Error); ok && fn != nil {
  1773  				return fn(fArgs)
  1774  			}
  1775  			return nil
  1776  		}
  1777  
  1778  	var refreshSpansCondenseFilter atomic.Value
  1779  	s, _, _ := serverutils.StartServer(t,
  1780  		base.TestServerArgs{Knobs: base.TestingKnobs{
  1781  			Store: &storeKnobs,
  1782  			KVClient: &kvcoord.ClientTestingKnobs{
  1783  				CondenseRefreshSpansFilter: func() bool {
  1784  					fnVal := refreshSpansCondenseFilter.Load()
  1785  					if fn, ok := fnVal.(func() bool); ok {
  1786  						return fn()
  1787  					}
  1788  					return true
  1789  				},
  1790  			}}})
  1791  
  1792  	disableCondensingRefreshSpans := func() bool { return false }
  1793  
  1794  	ctx := context.Background()
  1795  	defer s.Stopper().Stop(ctx)
  1796  
  1797  	newUncertaintyFilter := func(key roachpb.Key) func(kvserverbase.FilterArgs) *roachpb.Error {
  1798  		var count int32
  1799  		return func(fArgs kvserverbase.FilterArgs) *roachpb.Error {
  1800  			if (fArgs.Req.Header().Key.Equal(key) ||
  1801  				fArgs.Req.Header().Span().ContainsKey(key)) && fArgs.Hdr.Txn != nil {
  1802  				if atomic.AddInt32(&count, 1) > 1 {
  1803  					return nil
  1804  				}
  1805  				err := roachpb.NewReadWithinUncertaintyIntervalError(
  1806  					fArgs.Hdr.Timestamp, s.Clock().Now(), fArgs.Hdr.Txn)
  1807  				return roachpb.NewErrorWithTxn(err, fArgs.Hdr.Txn)
  1808  			}
  1809  			return nil
  1810  		}
  1811  	}
  1812  
  1813  	// Setup two userspace ranges: /Min-b, b-/Max.
  1814  	db := s.DB()
  1815  	if err := setupMultipleRanges(ctx, db, "b"); err != nil {
  1816  		t.Fatal(err)
  1817  	}
  1818  
  1819  	testCases := []struct {
  1820  		name                       string
  1821  		beforeTxnStart             func(context.Context, *kv.DB) error  // called before the txn starts
  1822  		afterTxnStart              func(context.Context, *kv.DB) error  // called after the txn chooses a timestamp
  1823  		retryable                  func(context.Context, *kv.Txn) error // called during the txn; may be retried
  1824  		filter                     func(kvserverbase.FilterArgs) *roachpb.Error
  1825  		refreshSpansCondenseFilter func() bool
  1826  		priorReads                 bool
  1827  		tsLeaked                   bool
  1828  		// If both of these are false, no retries.
  1829  		txnCoordRetry bool
  1830  		clientRetry   bool
  1831  		expFailure    string // regexp pattern to match on error if not empty
  1832  	}{
  1833  		{
  1834  			name: "forwarded timestamp with get and put",
  1835  			afterTxnStart: func(ctx context.Context, db *kv.DB) error {
  1836  				_, err := db.Get(ctx, "a") // read key to set ts cache
  1837  				return err
  1838  			},
  1839  			retryable: func(ctx context.Context, txn *kv.Txn) error {
  1840  				return txn.Put(ctx, "a", "put") // put to advance txn ts
  1841  			},
  1842  		},
  1843  		{
  1844  			name: "forwarded timestamp with get and put after timestamp leaked",
  1845  			afterTxnStart: func(ctx context.Context, db *kv.DB) error {
  1846  				_, err := db.Get(ctx, "a") // read key to set ts cache
  1847  				return err
  1848  			},
  1849  			retryable: func(ctx context.Context, txn *kv.Txn) error {
  1850  				return txn.Put(ctx, "a", "put") // put to advance txn ts
  1851  			},
  1852  			tsLeaked:    true,
  1853  			clientRetry: true,
  1854  		},
  1855  		{
  1856  			name: "forwarded timestamp with get and initput",
  1857  			afterTxnStart: func(ctx context.Context, db *kv.DB) error {
  1858  				_, err := db.Get(ctx, "a") // read key to set ts cache
  1859  				return err
  1860  			},
  1861  			retryable: func(ctx context.Context, txn *kv.Txn) error {
  1862  				return txn.InitPut(ctx, "a", "put", false /* failOnTombstones */) // put to advance txn ts
  1863  			},
  1864  		},
  1865  		{
  1866  			name: "forwarded timestamp with get and cput",
  1867  			afterTxnStart: func(ctx context.Context, db *kv.DB) error {
  1868  				_, err := db.Get(ctx, "a") // read key to set ts cache
  1869  				return err
  1870  			},
  1871  			retryable: func(ctx context.Context, txn *kv.Txn) error {
  1872  				return txn.CPut(ctx, "a", "cput", strToValue("put")) // cput to advance txn ts, set update span
  1873  			},
  1874  		},
  1875  		{
  1876  			name: "forwarded timestamp with get and cput after timestamp leaked",
  1877  			beforeTxnStart: func(ctx context.Context, db *kv.DB) error {
  1878  				return db.Put(ctx, "a", "put")
  1879  			},
  1880  			afterTxnStart: func(ctx context.Context, db *kv.DB) error {
  1881  				_, err := db.Get(ctx, "a") // read key to set ts cache
  1882  				return err
  1883  			},
  1884  			retryable: func(ctx context.Context, txn *kv.Txn) error {
  1885  				return txn.CPut(ctx, "a", "cput", strToValue("put")) // cput to advance txn ts, set update span
  1886  			},
  1887  			tsLeaked:    true,
  1888  			clientRetry: true,
  1889  		},
  1890  		{
  1891  			name: "forwarded timestamp with scan and cput",
  1892  			afterTxnStart: func(ctx context.Context, db *kv.DB) error {
  1893  				_, err := db.Scan(ctx, "a", "az", 0) // scan sets ts cache
  1894  				return err
  1895  			},
  1896  			retryable: func(ctx context.Context, txn *kv.Txn) error {
  1897  				return txn.CPut(ctx, "ab", "cput", nil) // cput advances, sets update span
  1898  			},
  1899  		},
  1900  		{
  1901  			name: "forwarded timestamp with delete range",
  1902  			afterTxnStart: func(ctx context.Context, db *kv.DB) error {
  1903  				_, err := db.Get(ctx, "a") // read key to set ts cache
  1904  				return err
  1905  			},
  1906  			retryable: func(ctx context.Context, txn *kv.Txn) error {
  1907  				return txn.DelRange(ctx, "a", "b")
  1908  			},
  1909  			// Expect a transaction coord retry, which should succeed.
  1910  			txnCoordRetry: true,
  1911  		},
  1912  		{
  1913  			name: "forwarded timestamp with put in batch commit",
  1914  			afterTxnStart: func(ctx context.Context, db *kv.DB) error {
  1915  				_, err := db.Get(ctx, "a") // set ts cache
  1916  				return err
  1917  			},
  1918  			retryable: func(ctx context.Context, txn *kv.Txn) error {
  1919  				b := txn.NewBatch()
  1920  				b.Put("a", "put")
  1921  				return txn.CommitInBatch(ctx, b)
  1922  			},
  1923  			// No retries, 1pc commit.
  1924  		},
  1925  		{
  1926  			name: "forwarded timestamp with cput in batch commit",
  1927  			beforeTxnStart: func(ctx context.Context, db *kv.DB) error {
  1928  				return db.Put(ctx, "a", "orig")
  1929  			},
  1930  			afterTxnStart: func(ctx context.Context, db *kv.DB) error {
  1931  				_, err := db.Get(ctx, "a") // set ts cache
  1932  				return err
  1933  			},
  1934  			retryable: func(ctx context.Context, txn *kv.Txn) error {
  1935  				b := txn.NewBatch()
  1936  				b.CPut("a", "cput", strToValue("orig"))
  1937  				return txn.CommitInBatch(ctx, b)
  1938  			},
  1939  			// No retries, 1pc commit.
  1940  		},
  1941  		{
  1942  			name: "forwarded timestamp with get in batch commit",
  1943  			afterTxnStart: func(ctx context.Context, db *kv.DB) error {
  1944  				_, err := db.Get(ctx, "a") // set ts cache
  1945  				return err
  1946  			},
  1947  			retryable: func(ctx context.Context, txn *kv.Txn) error {
  1948  				// Advance timestamp.
  1949  				if err := txn.Put(ctx, "a", "put"); err != nil {
  1950  					return err
  1951  				}
  1952  				b := txn.NewBatch()
  1953  				b.Get("a2")
  1954  				return txn.CommitInBatch(ctx, b)
  1955  			},
  1956  			// Read-only request (Get) prevents server-side refresh.
  1957  			txnCoordRetry: true,
  1958  		},
  1959  		{
  1960  			name: "forwarded timestamp with scan in batch commit",
  1961  			afterTxnStart: func(ctx context.Context, db *kv.DB) error {
  1962  				_, err := db.Get(ctx, "a") // set ts cache
  1963  				return err
  1964  			},
  1965  			retryable: func(ctx context.Context, txn *kv.Txn) error {
  1966  				// Advance timestamp.
  1967  				if err := txn.Put(ctx, "a", "put"); err != nil {
  1968  					return err
  1969  				}
  1970  				b := txn.NewBatch()
  1971  				b.Scan("a2", "a3")
  1972  				return txn.CommitInBatch(ctx, b)
  1973  			},
  1974  			// Read-only request (Scan) prevents server-side refresh.
  1975  			txnCoordRetry: true,
  1976  		},
  1977  		{
  1978  			// If we've exhausted the limit for tracking refresh spans but we
  1979  			// already refreshed, keep running the txn.
  1980  			name:                       "forwarded timestamp with too many refreshes, read only",
  1981  			refreshSpansCondenseFilter: disableCondensingRefreshSpans,
  1982  			afterTxnStart: func(ctx context.Context, db *kv.DB) error {
  1983  				return db.Put(ctx, "a", "value")
  1984  			},
  1985  			retryable: func(ctx context.Context, txn *kv.Txn) error {
  1986  				// Make the batch large enough such that when we accounted for
  1987  				// all of its spans then we exceed the limit on refresh spans.
  1988  				// This is not an issue because we refresh before tracking their
  1989  				// spans.
  1990  				keybase := strings.Repeat("a", 1024)
  1991  				maxRefreshBytes := kvcoord.MaxTxnRefreshSpansBytes.Get(&s.ClusterSettings().SV)
  1992  				scanToExceed := int(maxRefreshBytes) / len(keybase)
  1993  				b := txn.NewBatch()
  1994  				// Hit the uncertainty error at the beginning of the batch.
  1995  				b.Get("a")
  1996  				for i := 0; i < scanToExceed; i++ {
  1997  					key := roachpb.Key(fmt.Sprintf("%s%10d", keybase, i))
  1998  					b.Scan(key, key.Next())
  1999  				}
  2000  				return txn.Run(ctx, b)
  2001  			},
  2002  			filter: newUncertaintyFilter(roachpb.Key([]byte("a"))),
  2003  			// Expect a transaction coord retry, which should succeed.
  2004  			txnCoordRetry: true,
  2005  		},
  2006  		{
  2007  			// Even if accounting for the refresh spans would have exhausted the
  2008  			// limit for tracking refresh spans and our transaction's timestamp
  2009  			// has been pushed, if we successfully commit then we won't hit an
  2010  			// error. This is the case even if the final batch itself causes a
  2011  			// no-op refresh because the txn has no refresh spans.
  2012  			name: "forwarded timestamp with too many refreshes in batch commit " +
  2013  				"with no-op refresh",
  2014  			refreshSpansCondenseFilter: disableCondensingRefreshSpans,
  2015  			afterTxnStart: func(ctx context.Context, db *kv.DB) error {
  2016  				_, err := db.Get(ctx, "a") // set ts cache
  2017  				return err
  2018  			},
  2019  			retryable: func(ctx context.Context, txn *kv.Txn) error {
  2020  				// Advance timestamp.
  2021  				if err := txn.Put(ctx, "a", "put"); err != nil {
  2022  					return err
  2023  				}
  2024  				// Make the final batch large enough such that if we accounted
  2025  				// for all of its spans then we would exceed the limit on
  2026  				// refresh spans. This is not an issue because we never need to
  2027  				// account for them. The txn has no refresh spans, so it can
  2028  				// forward its timestamp while committing.
  2029  				keybase := strings.Repeat("a", 1024)
  2030  				maxRefreshBytes := kvcoord.MaxTxnRefreshSpansBytes.Get(&s.ClusterSettings().SV)
  2031  				scanToExceed := int(maxRefreshBytes) / len(keybase)
  2032  				b := txn.NewBatch()
  2033  				for i := 0; i < scanToExceed; i++ {
  2034  					key := roachpb.Key(fmt.Sprintf("%s%10d", keybase, i))
  2035  					b.Scan(key, key.Next())
  2036  				}
  2037  				return txn.CommitInBatch(ctx, b)
  2038  			},
  2039  			txnCoordRetry: true,
  2040  		},
  2041  		{
  2042  			// Even if accounting for the refresh spans would have exhausted the
  2043  			// limit for tracking refresh spans and our transaction's timestamp
  2044  			// has been pushed, if we successfully commit then we won't hit an
  2045  			// error. This is the case even if the final batch itself causes a
  2046  			// real refresh.
  2047  			name: "forwarded timestamp with too many refreshes in batch commit " +
  2048  				"with refresh",
  2049  			refreshSpansCondenseFilter: disableCondensingRefreshSpans,
  2050  			afterTxnStart: func(ctx context.Context, db *kv.DB) error {
  2051  				_, err := db.Get(ctx, "a") // set ts cache
  2052  				return err
  2053  			},
  2054  			retryable: func(ctx context.Context, txn *kv.Txn) error {
  2055  				// Advance timestamp. This also creates a refresh span which
  2056  				// will prevent the txn from committing without a refresh.
  2057  				if err := txn.DelRange(ctx, "a", "b"); err != nil {
  2058  					return err
  2059  				}
  2060  				// Make the final batch large enough such that if we accounted
  2061  				// for all of its spans then we would exceed the limit on
  2062  				// refresh spans. This is not an issue because we never need to
  2063  				// account for them until the final batch, at which time we
  2064  				// perform a span refresh and successfully commit.
  2065  				keybase := strings.Repeat("a", 1024)
  2066  				maxRefreshBytes := kvcoord.MaxTxnRefreshSpansBytes.Get(&s.ClusterSettings().SV)
  2067  				scanToExceed := int(maxRefreshBytes) / len(keybase)
  2068  				b := txn.NewBatch()
  2069  				for i := 0; i < scanToExceed; i++ {
  2070  					key := roachpb.Key(fmt.Sprintf("%s%10d", keybase, i))
  2071  					b.Scan(key, key.Next())
  2072  				}
  2073  				return txn.CommitInBatch(ctx, b)
  2074  			},
  2075  			txnCoordRetry: true,
  2076  		},
  2077  		{
  2078  			name: "write too old with put",
  2079  			afterTxnStart: func(ctx context.Context, db *kv.DB) error {
  2080  				return db.Put(ctx, "a", "put")
  2081  			},
  2082  			retryable: func(ctx context.Context, txn *kv.Txn) error {
  2083  				return txn.Put(ctx, "a", "put")
  2084  			},
  2085  		},
  2086  		{
  2087  			name: "write too old with put after prior read",
  2088  			afterTxnStart: func(ctx context.Context, db *kv.DB) error {
  2089  				return db.Put(ctx, "a", "put")
  2090  			},
  2091  			retryable: func(ctx context.Context, txn *kv.Txn) error {
  2092  				return txn.Put(ctx, "a", "put")
  2093  			},
  2094  			priorReads:    true,
  2095  			txnCoordRetry: true,
  2096  		},
  2097  		{
  2098  			name: "write too old with put after timestamp leaked",
  2099  			afterTxnStart: func(ctx context.Context, db *kv.DB) error {
  2100  				return db.Put(ctx, "a", "put")
  2101  			},
  2102  			retryable: func(ctx context.Context, txn *kv.Txn) error {
  2103  				return txn.Put(ctx, "a", "put")
  2104  			},
  2105  			tsLeaked:    true,
  2106  			clientRetry: true,
  2107  		},
  2108  		{
  2109  			name: "write too old with get in the clear",
  2110  			afterTxnStart: func(ctx context.Context, db *kv.DB) error {
  2111  				return db.Put(ctx, "a", "put")
  2112  			},
  2113  			retryable: func(ctx context.Context, txn *kv.Txn) error {
  2114  				if _, err := txn.Get(ctx, "b"); err != nil {
  2115  					return err
  2116  				}
  2117  				return txn.Put(ctx, "a", "put")
  2118  			},
  2119  			txnCoordRetry: true,
  2120  		},
  2121  		{
  2122  			name: "write too old with get conflict",
  2123  			afterTxnStart: func(ctx context.Context, db *kv.DB) error {
  2124  				return db.Put(ctx, "a", "put")
  2125  			},
  2126  			retryable: func(ctx context.Context, txn *kv.Txn) error {
  2127  				if _, err := txn.Get(ctx, "a"); err != nil {
  2128  					return err
  2129  				}
  2130  				return txn.Put(ctx, "a", "put")
  2131  			},
  2132  			clientRetry: true,
  2133  		},
  2134  		{
  2135  			name: "write too old with multiple puts to same key",
  2136  			beforeTxnStart: func(ctx context.Context, db *kv.DB) error {
  2137  				return db.Put(ctx, "a", "value1")
  2138  			},
  2139  			retryable: func(ctx context.Context, txn *kv.Txn) error {
  2140  				// Get so we must refresh when txn timestamp moves forward.
  2141  				if _, err := txn.Get(ctx, "a"); err != nil {
  2142  					return err
  2143  				}
  2144  				// Now, Put a new value to "a" out of band from the txn.
  2145  				if err := txn.DB().Put(ctx, "a", "value2"); err != nil {
  2146  					return err
  2147  				}
  2148  				// On the first txn Put, we will get a WriteTooOld flag set,
  2149  				// but lay down the intent and continue the txn.
  2150  				if err := txn.Put(ctx, "a", "txn-value1"); err != nil {
  2151  					return err
  2152  				}
  2153  				// Write again to make sure the timestamp of the second intent
  2154  				// is correctly set to the txn's advanced timestamp. There was
  2155  				// previously a bug where the txn's DeprecatedOrigTimestamp would be used
  2156  				// and so on the txn refresh caused by the WriteTooOld flag, the
  2157  				// out-of-band Put's value would be missed (see #23032).
  2158  				return txn.Put(ctx, "a", "txn-value2")
  2159  			},
  2160  			clientRetry: true, // expect a client-side retry as refresh should fail
  2161  		},
  2162  		{
  2163  			name: "write too old with cput matching newer value",
  2164  			beforeTxnStart: func(ctx context.Context, db *kv.DB) error {
  2165  				return db.Put(ctx, "a", "value")
  2166  			},
  2167  			afterTxnStart: func(ctx context.Context, db *kv.DB) error {
  2168  				return db.Put(ctx, "a", "put")
  2169  			},
  2170  			retryable: func(ctx context.Context, txn *kv.Txn) error {
  2171  				return txn.CPut(ctx, "a", "cput", strToValue("put"))
  2172  			},
  2173  			txnCoordRetry: false,              // fails on first attempt at cput
  2174  			expFailure:    "unexpected value", // the failure we get is a condition failed error
  2175  		},
  2176  		{
  2177  			name: "write too old with cput matching older value",
  2178  			beforeTxnStart: func(ctx context.Context, db *kv.DB) error {
  2179  				return db.Put(ctx, "a", "value")
  2180  			},
  2181  			afterTxnStart: func(ctx context.Context, db *kv.DB) error {
  2182  				return db.Put(ctx, "a", "put")
  2183  			},
  2184  			retryable: func(ctx context.Context, txn *kv.Txn) error {
  2185  				return txn.CPut(ctx, "a", "cput", strToValue("value"))
  2186  			},
  2187  			txnCoordRetry: false,              // non-matching value means we fail txn coord retry
  2188  			expFailure:    "unexpected value", // the failure we get is a condition failed error
  2189  		},
  2190  		{
  2191  			name: "write too old with cput matching older and newer values",
  2192  			beforeTxnStart: func(ctx context.Context, db *kv.DB) error {
  2193  				return db.Put(ctx, "a", "value")
  2194  			},
  2195  			afterTxnStart: func(ctx context.Context, db *kv.DB) error {
  2196  				return db.Put(ctx, "a", "value")
  2197  			},
  2198  			retryable: func(ctx context.Context, txn *kv.Txn) error {
  2199  				return txn.CPut(ctx, "a", "cput", strToValue("value"))
  2200  			},
  2201  		},
  2202  		{
  2203  			name: "write too old with cput matching older and newer values after prior read",
  2204  			beforeTxnStart: func(ctx context.Context, db *kv.DB) error {
  2205  				return db.Put(ctx, "a", "value")
  2206  			},
  2207  			afterTxnStart: func(ctx context.Context, db *kv.DB) error {
  2208  				return db.Put(ctx, "a", "value")
  2209  			},
  2210  			retryable: func(ctx context.Context, txn *kv.Txn) error {
  2211  				return txn.CPut(ctx, "a", "cput", strToValue("value"))
  2212  			},
  2213  			priorReads:    true,
  2214  			txnCoordRetry: true,
  2215  		},
  2216  		{
  2217  			name: "write too old with increment",
  2218  			beforeTxnStart: func(ctx context.Context, db *kv.DB) error {
  2219  				_, err := db.Inc(ctx, "inc1", 1)
  2220  				return err
  2221  			},
  2222  			afterTxnStart: func(ctx context.Context, db *kv.DB) error {
  2223  				_, err := db.Inc(ctx, "inc1", 1)
  2224  				return err
  2225  			},
  2226  			retryable: func(ctx context.Context, txn *kv.Txn) error {
  2227  				val, err := txn.Inc(ctx, "inc1", 1)
  2228  				if err != nil {
  2229  					return err
  2230  				}
  2231  				if vInt := val.ValueInt(); vInt != 3 {
  2232  					return errors.Errorf("expected val=3; got %d", vInt)
  2233  				}
  2234  				return nil
  2235  			},
  2236  		},
  2237  		{
  2238  			name: "write too old with increment after prior read",
  2239  			beforeTxnStart: func(ctx context.Context, db *kv.DB) error {
  2240  				_, err := db.Inc(ctx, "inc2", 1)
  2241  				return err
  2242  			},
  2243  			afterTxnStart: func(ctx context.Context, db *kv.DB) error {
  2244  				_, err := db.Inc(ctx, "inc2", 1)
  2245  				return err
  2246  			},
  2247  			retryable: func(ctx context.Context, txn *kv.Txn) error {
  2248  				val, err := txn.Inc(ctx, "inc2", 1)
  2249  				if err != nil {
  2250  					return err
  2251  				}
  2252  				if vInt := val.ValueInt(); vInt != 3 {
  2253  					return errors.Errorf("expected val=3; got %d", vInt)
  2254  				}
  2255  				return nil
  2256  			},
  2257  			priorReads:    true,
  2258  			txnCoordRetry: true,
  2259  		},
  2260  		{
  2261  			name: "write too old with initput",
  2262  			afterTxnStart: func(ctx context.Context, db *kv.DB) error {
  2263  				return db.Put(ctx, "iput", "put")
  2264  			},
  2265  			retryable: func(ctx context.Context, txn *kv.Txn) error {
  2266  				return txn.InitPut(ctx, "iput", "put", false)
  2267  			},
  2268  		},
  2269  		{
  2270  			name: "write too old with initput after prior read",
  2271  			afterTxnStart: func(ctx context.Context, db *kv.DB) error {
  2272  				return db.Put(ctx, "iput", "put")
  2273  			},
  2274  			retryable: func(ctx context.Context, txn *kv.Txn) error {
  2275  				return txn.InitPut(ctx, "iput", "put", false)
  2276  			},
  2277  			priorReads:    true,
  2278  			txnCoordRetry: true, // fails on first attempt at cput with write too old
  2279  			// Succeeds on second attempt.
  2280  		},
  2281  		{
  2282  			name: "write too old with initput matching older and newer values",
  2283  			beforeTxnStart: func(ctx context.Context, db *kv.DB) error {
  2284  				return db.Put(ctx, "iput", "put")
  2285  			},
  2286  			afterTxnStart: func(ctx context.Context, db *kv.DB) error {
  2287  				return db.Put(ctx, "iput", "put")
  2288  			},
  2289  			retryable: func(ctx context.Context, txn *kv.Txn) error {
  2290  				return txn.InitPut(ctx, "iput", "put", false)
  2291  			},
  2292  		},
  2293  		{
  2294  			name: "write too old with initput matching older and newer values after prior read",
  2295  			beforeTxnStart: func(ctx context.Context, db *kv.DB) error {
  2296  				return db.Put(ctx, "iput", "put")
  2297  			},
  2298  			afterTxnStart: func(ctx context.Context, db *kv.DB) error {
  2299  				return db.Put(ctx, "iput", "put")
  2300  			},
  2301  			retryable: func(ctx context.Context, txn *kv.Txn) error {
  2302  				return txn.InitPut(ctx, "iput", "put", false)
  2303  			},
  2304  			priorReads: true,
  2305  			// Expect a transaction coord retry, which should succeed.
  2306  			txnCoordRetry: true,
  2307  		},
  2308  		{
  2309  			name: "write too old with initput matching older value",
  2310  			beforeTxnStart: func(ctx context.Context, db *kv.DB) error {
  2311  				return db.Put(ctx, "iput", "put1")
  2312  			},
  2313  			afterTxnStart: func(ctx context.Context, db *kv.DB) error {
  2314  				return db.Put(ctx, "iput", "put2")
  2315  			},
  2316  			retryable: func(ctx context.Context, txn *kv.Txn) error {
  2317  				return txn.InitPut(ctx, "iput", "put1", false)
  2318  			},
  2319  			txnCoordRetry: false,              // non-matching value means we fail txn coord retry
  2320  			expFailure:    "unexpected value", // the failure we get is a condition failed error
  2321  		},
  2322  		{
  2323  			name: "write too old with initput matching newer value",
  2324  			beforeTxnStart: func(ctx context.Context, db *kv.DB) error {
  2325  				return db.Put(ctx, "iput", "put1")
  2326  			},
  2327  			afterTxnStart: func(ctx context.Context, db *kv.DB) error {
  2328  				return db.Put(ctx, "iput", "put2")
  2329  			},
  2330  			retryable: func(ctx context.Context, txn *kv.Txn) error {
  2331  				return txn.InitPut(ctx, "iput", "put2", false)
  2332  			},
  2333  			// No txn coord retry as we get condition failed error.
  2334  			expFailure: "unexpected value", // the failure we get is a condition failed error
  2335  		},
  2336  		{
  2337  			name: "write too old with initput failing on tombstone before",
  2338  			beforeTxnStart: func(ctx context.Context, db *kv.DB) error {
  2339  				return db.Del(ctx, "iput")
  2340  			},
  2341  			afterTxnStart: func(ctx context.Context, db *kv.DB) error {
  2342  				return db.Put(ctx, "iput", "put2")
  2343  			},
  2344  			retryable: func(ctx context.Context, txn *kv.Txn) error {
  2345  				return txn.InitPut(ctx, "iput", "put2", true)
  2346  			},
  2347  			expFailure: "unexpected value", // condition failed error when failing on tombstones
  2348  		},
  2349  		{
  2350  			name: "write too old with initput failing on tombstone after",
  2351  			beforeTxnStart: func(ctx context.Context, db *kv.DB) error {
  2352  				return db.Put(ctx, "iput", "put")
  2353  			},
  2354  			afterTxnStart: func(ctx context.Context, db *kv.DB) error {
  2355  				return db.Del(ctx, "iput")
  2356  			},
  2357  			retryable: func(ctx context.Context, txn *kv.Txn) error {
  2358  				return txn.InitPut(ctx, "iput", "put", true)
  2359  			},
  2360  			txnCoordRetry: false,              // non-matching value means we fail txn coord retry
  2361  			expFailure:    "unexpected value", // condition failed error when failing on tombstones
  2362  		},
  2363  		{
  2364  			name: "write too old with locking read",
  2365  			afterTxnStart: func(ctx context.Context, db *kv.DB) error {
  2366  				return db.Put(ctx, "a", "put")
  2367  			},
  2368  			retryable: func(ctx context.Context, txn *kv.Txn) error {
  2369  				_, err := txn.ScanForUpdate(ctx, "a", "a\x00", 0)
  2370  				return err
  2371  			},
  2372  		},
  2373  		{
  2374  			name: "write too old with locking read after prior read",
  2375  			afterTxnStart: func(ctx context.Context, db *kv.DB) error {
  2376  				return db.Put(ctx, "a", "put")
  2377  			},
  2378  			retryable: func(ctx context.Context, txn *kv.Txn) error {
  2379  				_, err := txn.ScanForUpdate(ctx, "a", "a\x00", 0)
  2380  				return err
  2381  			},
  2382  			priorReads:    true,
  2383  			txnCoordRetry: true,
  2384  		},
  2385  		{
  2386  			// This test sends a 1PC batch with Put+EndTxn.
  2387  			// The Put gets a write too old error but, since there's no refresh spans,
  2388  			// the commit succeeds.
  2389  			name: "write too old with put in batch commit",
  2390  			afterTxnStart: func(ctx context.Context, db *kv.DB) error {
  2391  				return db.Put(ctx, "a", "put")
  2392  			},
  2393  			retryable: func(ctx context.Context, txn *kv.Txn) error {
  2394  				b := txn.NewBatch()
  2395  				b.Put("a", "new-put")
  2396  				return txn.CommitInBatch(ctx, b) // will be a 1PC, won't get auto retry
  2397  			},
  2398  			// No retries, 1pc commit.
  2399  		},
  2400  		{
  2401  			// This test is like the previous one in that the commit batch succeeds at
  2402  			// an updated timestamp, but this time the EndTxn puts the
  2403  			// transaction in the STAGING state instead of COMMITTED because there had
  2404  			// been previous write in a different batch. Like above, the commit is
  2405  			// successful since there are no refresh spans (the request will succeed
  2406  			// after a server-side refresh).
  2407  			name: "write too old in staging commit",
  2408  			beforeTxnStart: func(ctx context.Context, db *kv.DB) error {
  2409  				return db.Put(ctx, "a", "orig")
  2410  			},
  2411  			afterTxnStart: func(ctx context.Context, db *kv.DB) error {
  2412  				return db.Put(ctx, "a", "put")
  2413  			},
  2414  			retryable: func(ctx context.Context, txn *kv.Txn) error {
  2415  				if err := txn.Put(ctx, "another", "another put"); err != nil {
  2416  					return err
  2417  				}
  2418  				b := txn.NewBatch()
  2419  				b.Put("a", "final value")
  2420  				return txn.CommitInBatch(ctx, b)
  2421  			},
  2422  			// The request will succeed after a server-side refresh.
  2423  			txnCoordRetry: false,
  2424  		},
  2425  		{
  2426  			name: "write too old with cput in batch commit",
  2427  			beforeTxnStart: func(ctx context.Context, db *kv.DB) error {
  2428  				return db.Put(ctx, "a", "orig")
  2429  			},
  2430  			afterTxnStart: func(ctx context.Context, db *kv.DB) error {
  2431  				return db.Put(ctx, "a", "put")
  2432  			},
  2433  			retryable: func(ctx context.Context, txn *kv.Txn) error {
  2434  				b := txn.NewBatch()
  2435  				b.CPut("a", "cput", strToValue("put"))
  2436  				return txn.CommitInBatch(ctx, b) // will be a 1PC, won't get auto retry
  2437  			},
  2438  			// No client-side retries, 1PC commit. On the server-side, the batch is
  2439  			// evaluated twice: once at the original timestamp, where it gets a
  2440  			// WriteTooOldError, and then once at the pushed timestamp. The
  2441  			// server-side retry is enabled by the fact that there have not been any
  2442  			// previous reads and so the transaction can commit at a pushed timestamp.
  2443  		},
  2444  		{
  2445  			// This test is like the previous one, except the 1PC batch cannot commit
  2446  			// at the updated timestamp.
  2447  			name: "write too old with failed cput in batch commit",
  2448  			beforeTxnStart: func(ctx context.Context, db *kv.DB) error {
  2449  				return db.Put(ctx, "a", "orig")
  2450  			},
  2451  			afterTxnStart: func(ctx context.Context, db *kv.DB) error {
  2452  				return db.Put(ctx, "a", "put")
  2453  			},
  2454  			retryable: func(ctx context.Context, txn *kv.Txn) error {
  2455  				b := txn.NewBatch()
  2456  				b.CPut("a", "cput", strToValue("orig"))
  2457  				return txn.CommitInBatch(ctx, b) // will be a 1PC, won't get auto retry
  2458  			},
  2459  			expFailure: "unexpected value", // The CPut cannot succeed.
  2460  		},
  2461  		{
  2462  			name: "multi-range batch with forwarded timestamp",
  2463  			afterTxnStart: func(ctx context.Context, db *kv.DB) error {
  2464  				_, err := db.Get(ctx, "c") // set ts cache
  2465  				return err
  2466  			},
  2467  			retryable: func(ctx context.Context, txn *kv.Txn) error {
  2468  				b := txn.NewBatch()
  2469  				b.Put("a", "put")
  2470  				b.Put("c", "put")
  2471  				return txn.CommitInBatch(ctx, b)
  2472  			},
  2473  			txnCoordRetry: true,
  2474  		},
  2475  		{
  2476  			name: "multi-range batch with forwarded timestamp and cput",
  2477  			beforeTxnStart: func(ctx context.Context, db *kv.DB) error {
  2478  				return db.Put(ctx, "a", "value")
  2479  			},
  2480  			afterTxnStart: func(ctx context.Context, db *kv.DB) error {
  2481  				_, err := db.Get(ctx, "a") // set ts cache
  2482  				return err
  2483  			},
  2484  			retryable: func(ctx context.Context, txn *kv.Txn) error {
  2485  				b := txn.NewBatch()
  2486  				b.CPut("a", "cput", strToValue("value"))
  2487  				b.Put("c", "put")
  2488  				return txn.CommitInBatch(ctx, b) // both puts will succeed, no retry
  2489  			},
  2490  		},
  2491  		{
  2492  			name: "multi-range batch with forwarded timestamp and cput and get",
  2493  			beforeTxnStart: func(ctx context.Context, db *kv.DB) error {
  2494  				return db.Put(ctx, "a", "value")
  2495  			},
  2496  			afterTxnStart: func(ctx context.Context, db *kv.DB) error {
  2497  				_, err := db.Get(ctx, "a") // set ts cache
  2498  				return err
  2499  			},
  2500  			retryable: func(ctx context.Context, txn *kv.Txn) error {
  2501  				if _, err := txn.Get(ctx, "b"); err != nil { // Get triggers retry
  2502  					return err
  2503  				}
  2504  				b := txn.NewBatch()
  2505  				b.CPut("a", "cput", strToValue("value"))
  2506  				b.Put("c", "put")
  2507  				return txn.CommitInBatch(ctx, b) // both puts will succeed, et will retry from get
  2508  			},
  2509  			txnCoordRetry: true,
  2510  		},
  2511  		{
  2512  			name: "multi-range batch with forwarded timestamp and cput and delete range",
  2513  			beforeTxnStart: func(ctx context.Context, db *kv.DB) error {
  2514  				return db.Put(ctx, "c", "value")
  2515  			},
  2516  			afterTxnStart: func(ctx context.Context, db *kv.DB) error {
  2517  				_, err := db.Get(ctx, "a") // set ts cache
  2518  				return err
  2519  			},
  2520  			retryable: func(ctx context.Context, txn *kv.Txn) error {
  2521  				b := txn.NewBatch()
  2522  				b.DelRange("a", "b", false /* returnKeys */)
  2523  				b.CPut("c", "cput", strToValue("value"))
  2524  				return txn.CommitInBatch(ctx, b) // both puts will succeed, et will retry
  2525  			},
  2526  			txnCoordRetry: true,
  2527  		},
  2528  		{
  2529  			name: "multi-range batch with write too old",
  2530  			afterTxnStart: func(ctx context.Context, db *kv.DB) error {
  2531  				return db.Put(ctx, "c", "value")
  2532  			},
  2533  			retryable: func(ctx context.Context, txn *kv.Txn) error {
  2534  				b := txn.NewBatch()
  2535  				b.Put("a", "put")
  2536  				b.Put("c", "put")
  2537  				return txn.CommitInBatch(ctx, b) // put to c will return WriteTooOldError
  2538  			},
  2539  			txnCoordRetry: true,
  2540  		},
  2541  		{
  2542  			name: "multi-range batch with write too old and failed cput",
  2543  			beforeTxnStart: func(ctx context.Context, db *kv.DB) error {
  2544  				return db.Put(ctx, "a", "orig")
  2545  			},
  2546  			afterTxnStart: func(ctx context.Context, db *kv.DB) error {
  2547  				return db.Put(ctx, "a", "value")
  2548  			},
  2549  			retryable: func(ctx context.Context, txn *kv.Txn) error {
  2550  				b := txn.NewBatch()
  2551  				b.CPut("a", "cput", strToValue("orig"))
  2552  				b.Put("c", "put")
  2553  				return txn.CommitInBatch(ctx, b)
  2554  			},
  2555  			txnCoordRetry: false,              // non-matching value means we fail txn coord retry
  2556  			expFailure:    "unexpected value", // the failure we get is a condition failed error
  2557  		},
  2558  		{
  2559  			name: "multi-range batch with write too old and successful cput",
  2560  			beforeTxnStart: func(ctx context.Context, db *kv.DB) error {
  2561  				return db.Put(ctx, "a", "orig")
  2562  			},
  2563  			afterTxnStart: func(ctx context.Context, db *kv.DB) error {
  2564  				return db.Put(ctx, "a", "orig")
  2565  			},
  2566  			retryable: func(ctx context.Context, txn *kv.Txn) error {
  2567  				b := txn.NewBatch()
  2568  				b.CPut("a", "cput", strToValue("orig"))
  2569  				b.Put("c", "put")
  2570  				return txn.CommitInBatch(ctx, b)
  2571  			},
  2572  			// We expect the request to succeed after a server-side retry.
  2573  			txnCoordRetry: false,
  2574  		},
  2575  		{
  2576  			// This test checks the behavior of batches that were split by the
  2577  			// DistSender. We'll check that the whole batch is retried after a
  2578  			// successful refresh, and that previously-successful prefix sub-batches
  2579  			// are not refreshed (but are retried instead).
  2580  			name: "multi-range with scan getting updated results after refresh",
  2581  			afterTxnStart: func(ctx context.Context, db *kv.DB) error {
  2582  				// Write to "a". This value will not be seen by the Get the first time
  2583  				// it's evaluated, but it will be see when it's retried at a bumped
  2584  				// timestamp. In particular, this verifies that the get is not
  2585  				// refreshed, for this would fail (and lead to a client-side retry
  2586  				// instead of one at the txn coord sender).
  2587  				if err := db.Put(ctx, "a", "newval"); err != nil {
  2588  					return err
  2589  				}
  2590  				// "b" is on a different range, so this put will cause a
  2591  				// WriteTooOldError on the 2nd sub-batch. The error will cause a
  2592  				// refresh.
  2593  				return db.Put(ctx, "b", "newval2")
  2594  			},
  2595  			retryable: func(ctx context.Context, txn *kv.Txn) error {
  2596  				b := txn.NewBatch()
  2597  				b.Get("a")
  2598  				b.Put("b", "put2")
  2599  				err := txn.Run(ctx, b)
  2600  				if err != nil {
  2601  					return err
  2602  				}
  2603  				gr := b.RawResponse().Responses[0].GetGet()
  2604  				if b, err := gr.Value.GetBytes(); err != nil {
  2605  					return err
  2606  				} else if !bytes.Equal(b, []byte("newval")) {
  2607  					return fmt.Errorf("expected \"newval\", got: %v", b)
  2608  				}
  2609  				return txn.Commit(ctx)
  2610  			},
  2611  			txnCoordRetry: true,
  2612  		},
  2613  		{
  2614  			name: "cput within uncertainty interval",
  2615  			beforeTxnStart: func(ctx context.Context, db *kv.DB) error {
  2616  				return db.Put(ctx, "a", "value")
  2617  			},
  2618  			retryable: func(ctx context.Context, txn *kv.Txn) error {
  2619  				return txn.CPut(ctx, "a", "cput", strToValue("value"))
  2620  			},
  2621  			filter:        newUncertaintyFilter(roachpb.Key([]byte("a"))),
  2622  			txnCoordRetry: true,
  2623  		},
  2624  		{
  2625  			name: "cput within uncertainty interval after timestamp leaked",
  2626  			beforeTxnStart: func(ctx context.Context, db *kv.DB) error {
  2627  				return db.Put(ctx, "a", "value")
  2628  			},
  2629  			retryable: func(ctx context.Context, txn *kv.Txn) error {
  2630  				return txn.CPut(ctx, "a", "cput", strToValue("value"))
  2631  			},
  2632  			filter:      newUncertaintyFilter(roachpb.Key([]byte("a"))),
  2633  			clientRetry: true,
  2634  			tsLeaked:    true,
  2635  		},
  2636  		{
  2637  			name: "reads within uncertainty interval",
  2638  			beforeTxnStart: func(ctx context.Context, db *kv.DB) error {
  2639  				return db.Put(ctx, "a", "value")
  2640  			},
  2641  			retryable: func(ctx context.Context, txn *kv.Txn) error {
  2642  				if _, err := txn.Get(ctx, "aa"); err != nil {
  2643  					return err
  2644  				}
  2645  				if _, err := txn.Get(ctx, "ab"); err != nil {
  2646  					return err
  2647  				}
  2648  				if _, err := txn.Get(ctx, "ac"); err != nil {
  2649  					return err
  2650  				}
  2651  				return txn.CPut(ctx, "a", "cput", strToValue("value"))
  2652  			},
  2653  			filter:        newUncertaintyFilter(roachpb.Key([]byte("ac"))),
  2654  			txnCoordRetry: true,
  2655  		},
  2656  		{
  2657  			name: "reads within uncertainty interval and violating concurrent put",
  2658  			beforeTxnStart: func(ctx context.Context, db *kv.DB) error {
  2659  				return db.Put(ctx, "a", "value")
  2660  			},
  2661  			afterTxnStart: func(ctx context.Context, db *kv.DB) error {
  2662  				return db.Put(ctx, "ab", "value")
  2663  			},
  2664  			retryable: func(ctx context.Context, txn *kv.Txn) error {
  2665  				if _, err := txn.Get(ctx, "aa"); err != nil {
  2666  					return err
  2667  				}
  2668  				if _, err := txn.Get(ctx, "ab"); err != nil {
  2669  					return err
  2670  				}
  2671  				if _, err := txn.Get(ctx, "ac"); err != nil {
  2672  					return err
  2673  				}
  2674  				return nil
  2675  			},
  2676  			filter:      newUncertaintyFilter(roachpb.Key([]byte("ac"))),
  2677  			clientRetry: true, // note this txn is read-only but still restarts
  2678  		},
  2679  		{
  2680  			name: "multi-range batch with uncertainty interval error",
  2681  			beforeTxnStart: func(ctx context.Context, db *kv.DB) error {
  2682  				return db.Put(ctx, "c", "value")
  2683  			},
  2684  			retryable: func(ctx context.Context, txn *kv.Txn) error {
  2685  				if err := txn.Put(ctx, "a", "put"); err != nil {
  2686  					return err
  2687  				}
  2688  				b := txn.NewBatch()
  2689  				b.CPut("c", "cput", strToValue("value"))
  2690  				return txn.CommitInBatch(ctx, b)
  2691  			},
  2692  			filter:        newUncertaintyFilter(roachpb.Key([]byte("c"))),
  2693  			txnCoordRetry: true,
  2694  		},
  2695  		{
  2696  			name: "multi-range batch with uncertainty interval error and get conflict",
  2697  			beforeTxnStart: func(ctx context.Context, db *kv.DB) error {
  2698  				return db.Put(ctx, "a", "init")
  2699  			},
  2700  			afterTxnStart: func(ctx context.Context, db *kv.DB) error {
  2701  				if err := db.Put(ctx, "b", "value"); err != nil {
  2702  					return err
  2703  				}
  2704  				return db.Put(ctx, "a", "value")
  2705  			},
  2706  			retryable: func(ctx context.Context, txn *kv.Txn) error {
  2707  				if _, err := txn.Get(ctx, "b"); err != nil {
  2708  					return err
  2709  				}
  2710  				b := txn.NewBatch()
  2711  				b.CPut("a", "cput", strToValue("value"))
  2712  				return txn.CommitInBatch(ctx, b)
  2713  			},
  2714  			filter:      newUncertaintyFilter(roachpb.Key([]byte("a"))),
  2715  			clientRetry: true, // will fail because of conflict on refresh span for the Get
  2716  		},
  2717  		{
  2718  			name: "multi-range batch with uncertainty interval error and mixed success",
  2719  			beforeTxnStart: func(ctx context.Context, db *kv.DB) error {
  2720  				return db.Put(ctx, "c", "value")
  2721  			},
  2722  			retryable: func(ctx context.Context, txn *kv.Txn) error {
  2723  				b := txn.NewBatch()
  2724  				b.Put("a", "put")
  2725  				b.CPut("c", "cput", strToValue("value"))
  2726  				return txn.CommitInBatch(ctx, b)
  2727  			},
  2728  			filter: newUncertaintyFilter(roachpb.Key([]byte("c"))),
  2729  			// Expect a transaction coord retry, which should succeed.
  2730  			txnCoordRetry: true,
  2731  		},
  2732  		{
  2733  			name: "multi-range scan with uncertainty interval error",
  2734  			retryable: func(ctx context.Context, txn *kv.Txn) error {
  2735  				_, err := txn.Scan(ctx, "a", "d", 0)
  2736  				return err
  2737  			},
  2738  			filter: newUncertaintyFilter(roachpb.Key([]byte("c"))),
  2739  			// Expect a transaction coord retry, which should succeed.
  2740  			txnCoordRetry: true,
  2741  		},
  2742  		{
  2743  			name: "multi-range delete range with uncertainty interval error",
  2744  			retryable: func(ctx context.Context, txn *kv.Txn) error {
  2745  				return txn.DelRange(ctx, "a", "d")
  2746  			},
  2747  			filter: newUncertaintyFilter(roachpb.Key([]byte("c"))),
  2748  			// Expect a transaction coord retry, which should succeed.
  2749  			txnCoordRetry: true,
  2750  		},
  2751  		{
  2752  			name: "missing pipelined write caught on chain",
  2753  			retryable: func(ctx context.Context, txn *kv.Txn) error {
  2754  				if err := txn.Put(ctx, "a", "put"); err != nil {
  2755  					return err
  2756  				}
  2757  				// Simulate a failed intent write by resolving the intent
  2758  				// directly. This should be picked up by the transaction's
  2759  				// QueryIntent when chaining on to the pipelined write to
  2760  				// key "a".
  2761  				var ba roachpb.BatchRequest
  2762  				ba.Add(&roachpb.ResolveIntentRequest{
  2763  					RequestHeader: roachpb.RequestHeader{
  2764  						Key: roachpb.Key("a"),
  2765  					},
  2766  					IntentTxn: txn.TestingCloneTxn().TxnMeta,
  2767  					Status:    roachpb.ABORTED,
  2768  				})
  2769  				if _, pErr := txn.DB().NonTransactionalSender().Send(ctx, ba); pErr != nil {
  2770  					return pErr.GoError()
  2771  				}
  2772  				_, err := txn.Get(ctx, "a")
  2773  				return err
  2774  			},
  2775  			// The missing intent write results in a RETRY_ASYNC_WRITE_FAILURE error.
  2776  			clientRetry: true,
  2777  		},
  2778  		{
  2779  			name: "missing pipelined write caught on commit",
  2780  			retryable: func(ctx context.Context, txn *kv.Txn) error {
  2781  				if err := txn.Put(ctx, "a", "put"); err != nil {
  2782  					return err
  2783  				}
  2784  				// Simulate a failed intent write by resolving the intent
  2785  				// directly. This should be picked up by the transaction's
  2786  				// pre-commit QueryIntent for the pipelined write to key "a".
  2787  				var ba roachpb.BatchRequest
  2788  				ba.Add(&roachpb.ResolveIntentRequest{
  2789  					RequestHeader: roachpb.RequestHeader{
  2790  						Key: roachpb.Key("a"),
  2791  					},
  2792  					IntentTxn: txn.TestingCloneTxn().TxnMeta,
  2793  					Status:    roachpb.ABORTED,
  2794  				})
  2795  				if _, pErr := txn.DB().NonTransactionalSender().Send(ctx, ba); pErr != nil {
  2796  					return pErr.GoError()
  2797  				}
  2798  				return nil // commit
  2799  			},
  2800  			// The missing intent write results in a RETRY_ASYNC_WRITE_FAILURE error.
  2801  			clientRetry: true,
  2802  		},
  2803  	}
  2804  
  2805  	for _, tc := range testCases {
  2806  		t.Run(tc.name, func(t *testing.T) {
  2807  			if tc.beforeTxnStart != nil {
  2808  				if err := tc.beforeTxnStart(ctx, db); err != nil {
  2809  					t.Fatalf("failed beforeTxnStart: %s", err)
  2810  				}
  2811  			}
  2812  
  2813  			if tc.filter != nil {
  2814  				filterFn.Store(tc.filter)
  2815  				defer filterFn.Store((func(kvserverbase.FilterArgs) *roachpb.Error)(nil))
  2816  			}
  2817  			if tc.refreshSpansCondenseFilter != nil {
  2818  				refreshSpansCondenseFilter.Store(tc.refreshSpansCondenseFilter)
  2819  				defer refreshSpansCondenseFilter.Store((func() bool)(nil))
  2820  			}
  2821  
  2822  			var metrics kvcoord.TxnMetrics
  2823  			var lastRefreshes int64
  2824  			var hadClientRetry bool
  2825  			epoch := 0
  2826  			if err := db.Txn(ctx, func(ctx context.Context, txn *kv.Txn) error {
  2827  				if tc.priorReads {
  2828  					_, err := txn.Get(ctx, "prior read")
  2829  					if err != nil {
  2830  						t.Fatalf("unexpected error during prior read: %v", err)
  2831  					}
  2832  				}
  2833  				if tc.tsLeaked {
  2834  					// Read the commit timestamp so the expectation is that
  2835  					// this transaction cannot be restarted internally.
  2836  					_ = txn.CommitTimestamp()
  2837  				}
  2838  				if epoch > 0 {
  2839  					if !tc.clientRetry {
  2840  						t.Fatal("expected txn coord sender to retry, but got client-side retry")
  2841  					}
  2842  					hadClientRetry = true
  2843  					// We expected a new epoch and got it; return success.
  2844  					return nil
  2845  				}
  2846  				defer func() { epoch++ }()
  2847  
  2848  				if tc.afterTxnStart != nil {
  2849  					if err := tc.afterTxnStart(ctx, db); err != nil {
  2850  						t.Fatalf("failed afterTxnStart: %s", err)
  2851  					}
  2852  				}
  2853  
  2854  				metrics = txn.Sender().(*kvcoord.TxnCoordSender).TxnCoordSenderFactory.Metrics()
  2855  				lastRefreshes = metrics.RefreshSuccess.Count()
  2856  
  2857  				return tc.retryable(ctx, txn)
  2858  			}); err != nil {
  2859  				if len(tc.expFailure) == 0 || !testutils.IsError(err, tc.expFailure) {
  2860  					t.Fatal(err)
  2861  				}
  2862  			} else {
  2863  				if len(tc.expFailure) > 0 {
  2864  					t.Errorf("expected failure %q", tc.expFailure)
  2865  				}
  2866  			}
  2867  			// Verify auto retry metric. Because there's a chance that splits
  2868  			// from the cluster setup are still ongoing and can experience
  2869  			// their own retries, this might increase by more than one, so we
  2870  			// can only check here that it's >= 1.
  2871  			refreshes := metrics.RefreshSuccess.Count() - lastRefreshes
  2872  			if tc.txnCoordRetry && refreshes == 0 {
  2873  				t.Errorf("expected [at least] one txn coord sender auto retry; got %d", refreshes)
  2874  			} else if !tc.txnCoordRetry && refreshes != 0 {
  2875  				t.Errorf("expected no txn coord sender auto retries; got %d", refreshes)
  2876  			}
  2877  			if tc.clientRetry && !hadClientRetry {
  2878  				t.Errorf("expected but did not experience client retry")
  2879  			} else if !tc.clientRetry && hadClientRetry {
  2880  				t.Errorf("did not expect but experienced client retry")
  2881  			}
  2882  		})
  2883  	}
  2884  }
  2885  
  2886  // Test that, even though at the kvserver level requests are not idempotent
  2887  // across an EndTxn, a TxnCoordSender retry of the final batch after a refresh
  2888  // still works fine. We check that a transaction is not considered implicitly
  2889  // committed through a combination of writes from a previous attempt of the
  2890  // EndTxn batch and a STAGING txn record written by a newer attempt of that
  2891  // batch.
  2892  // Namely, the scenario is as follows:
  2893  // 1. client sends CPut(a) + CPut(b) + EndTxn. The CPut(a) is split by the
  2894  //    DistSender from the rest. Note that the parallel commit mechanism is in
  2895  //    effect here.
  2896  // 2. One of the two sides gets a WriteTooOldError, the other succeeds.
  2897  //    The client needs to refresh.
  2898  // 3. The refresh succeeds.
  2899  // 4. The client resends the whole batch (note that we don't keep track of the
  2900  //    previous partial success).
  2901  // 5. The batch is split again, and one of the two sides fails.
  2902  //
  2903  // This tests checks that, for the different combinations of failures across the
  2904  // two attempts of the request, the transaction is not erroneously considered to
  2905  // be committed. We don't want an intent laid down by the first attempt to
  2906  // satisfy a STAGING record from the 2nd attempt, or the other way around (an
  2907  // intent written in the 2nd attempt satisfying a STAGING record written on the
  2908  // first attempt). See subtests for more details.
  2909  func TestTxnCoordSenderRetriesAcrossEndTxn(t *testing.T) {
  2910  	defer leaktest.AfterTest(t)()
  2911  
  2912  	var filterFn atomic.Value
  2913  	var storeKnobs kvserver.StoreTestingKnobs
  2914  	storeKnobs.EvalKnobs.TestingEvalFilter =
  2915  		func(fArgs kvserverbase.FilterArgs) *roachpb.Error {
  2916  			fnVal := filterFn.Load()
  2917  			if fn, ok := fnVal.(func(kvserverbase.FilterArgs) *roachpb.Error); ok && fn != nil {
  2918  				return fn(fArgs)
  2919  			}
  2920  			return nil
  2921  		}
  2922  
  2923  	// The left side is CPut(a), the right side is CPut(b)+EndTxn(STAGING).
  2924  	type side int
  2925  	const (
  2926  		left side = iota
  2927  		right
  2928  	)
  2929  
  2930  	testCases := []struct {
  2931  		// sidePushedOnFirstAttempt controls which sub-batch will return a
  2932  		// WriteTooOldError on the first attempt.
  2933  		sidePushedOnFirstAttempt    side
  2934  		sideRejectedOnSecondAttempt side
  2935  		txnRecExpectation           kvclientutils.PushExpectation
  2936  	}{
  2937  		{
  2938  			// On the first attempt, the left side succeeds in laying down an intent,
  2939  			// while the right side fails. On the 2nd attempt, the right side succeeds
  2940  			// while the left side fails.
  2941  			//
  2942  			// The point of this test is to check that the txn is not considered to be
  2943  			// implicitly committed at this point. Handling this scenario requires
  2944  			// special care. If we didn't do anything, then we'd end up with a STAGING
  2945  			// txn record (from the second attempt of the request) and an intent on
  2946  			// "a" from the first attempt. That intent would have a lower timestamp
  2947  			// than the txn record and so the txn would be considered explicitly
  2948  			// committed. If the txn were to be considered implicitly committed, and
  2949  			// the intent on "a" was resolved, then write on a (when it eventually
  2950  			// evaluates) might return wrong results, or be pushed, or generally get
  2951  			// very confused about how its own transaction got committed already.
  2952  			//
  2953  			// We handle this scenario by disabling the parallel commit on the
  2954  			// request's 2nd attempt. Thus, the EndTxn will be split from all the
  2955  			// other requests, and the txn record is never written if anything fails.
  2956  			sidePushedOnFirstAttempt:    right,
  2957  			sideRejectedOnSecondAttempt: left,
  2958  			// The first attempt of right side contains a parallel commit (i.e. an
  2959  			// EndTxn), but fails. The 2nd attempt of the right side will no longer
  2960  			// contain an EndTxn, as explained above. So we expect the txn record to
  2961  			// not exist.
  2962  			txnRecExpectation: kvclientutils.ExpectPusheeTxnRecordNotFound,
  2963  		},
  2964  		{
  2965  			// On the first attempt, the right side succeed in writing a STAGING txn
  2966  			// record, but the left side fails. On the second attempt, the right side
  2967  			// is rejected.
  2968  			//
  2969  			// The point of this test is to check that the txn is not considered
  2970  			// implicitly committed at this point. All the intents are in place for
  2971  			// the txn to be considered committed, but we rely on the fact that the
  2972  			// intent on "a" has a timestamp that's too high (it gets the timestamp
  2973  			// from the 2nd attempt, after a refresh, but the STAGING txn record has
  2974  			// an older timestamp). If the txn were to be considered implicitly
  2975  			// committed, it'd be bad as we are returning an error to the client
  2976  			// telling it that the EndTxn failed.
  2977  			sidePushedOnFirstAttempt:    left,
  2978  			sideRejectedOnSecondAttempt: right,
  2979  			// The first attempt of the right side writes a STAGING txn record, so we
  2980  			// expect to perform txn recovery.
  2981  			txnRecExpectation: kvclientutils.ExpectPusheeTxnRecovery,
  2982  		},
  2983  	}
  2984  
  2985  	for _, tc := range testCases {
  2986  		t.Run("", func(t *testing.T) {
  2987  			s, _, db := serverutils.StartServer(t,
  2988  				base.TestServerArgs{Knobs: base.TestingKnobs{Store: &storeKnobs}})
  2989  			ctx := context.Background()
  2990  			defer s.Stopper().Stop(ctx)
  2991  
  2992  			keyA, keyA1, keyB, keyB1 := roachpb.Key("a"), roachpb.Key("a1"), roachpb.Key("b"), roachpb.Key("b1")
  2993  			require.NoError(t, setupMultipleRanges(ctx, db, string(keyB)))
  2994  
  2995  			origValA := roachpb.MakeValueFromString("initA")
  2996  			require.NoError(t, db.Put(ctx, keyA, &origValA))
  2997  			origValB := roachpb.MakeValueFromString("initA")
  2998  			require.NoError(t, db.Put(ctx, keyB, &origValB))
  2999  
  3000  			txn := db.NewTxn(ctx, "test txn")
  3001  
  3002  			// Do a write to anchor the txn on b's range.
  3003  			require.NoError(t, txn.Put(ctx, keyB1, "b1"))
  3004  
  3005  			// Take a snapshot of the txn early. We'll use it when verifying if the txn is
  3006  			// implicitly committed. If we didn't use this early snapshot and, instead,
  3007  			// used the transaction with a bumped timestamp, then the push code would
  3008  			// infer that the txn is not implicitly committed without actually running the
  3009  			// recovery procedure. Using this snapshot mimics a pusher that ran into an
  3010  			// old intent.
  3011  			origTxn := txn.TestingCloneTxn()
  3012  
  3013  			// Do a read to prevent the txn for performing server-side refreshes.
  3014  			_, err := txn.Get(ctx, keyA1)
  3015  			require.NoError(t, err)
  3016  
  3017  			// After the txn started, do a conflicting read. This will cause one of
  3018  			// the txn's upcoming CPuts to return a WriteTooOldError on the first
  3019  			// attempt, causing in turn to refresh and a retry. Note that, being
  3020  			// CPuts, the pushed writes don't defer the error by returning the
  3021  			// WriteTooOld flag instead of a WriteTooOldError.
  3022  			var readKey roachpb.Key
  3023  			if tc.sidePushedOnFirstAttempt == left {
  3024  				readKey = keyA
  3025  			} else {
  3026  				readKey = keyB
  3027  			}
  3028  			_, err = db.Get(ctx, readKey)
  3029  			require.NoError(t, err)
  3030  
  3031  			b := txn.NewBatch()
  3032  			b.CPut(keyA, "a", &origValA)
  3033  			b.CPut(keyB, "b", &origValB)
  3034  
  3035  			var secondAttemptRejectKey roachpb.Key
  3036  			if tc.sideRejectedOnSecondAttempt == left {
  3037  				secondAttemptRejectKey = keyA
  3038  			} else {
  3039  				secondAttemptRejectKey = keyB
  3040  			}
  3041  
  3042  			// Install a filter which will reject requests touching
  3043  			// secondAttemptRejectKey on the retry.
  3044  			var count int32
  3045  			filterFn.Store(func(args kvserverbase.FilterArgs) *roachpb.Error {
  3046  				put, ok := args.Req.(*roachpb.ConditionalPutRequest)
  3047  				if !ok {
  3048  					return nil
  3049  				}
  3050  				if !put.Key.Equal(secondAttemptRejectKey) {
  3051  					return nil
  3052  				}
  3053  				count++
  3054  				// Reject the right request on the 2nd attempt.
  3055  				if count == 2 {
  3056  					return roachpb.NewErrorf("injected error; test rejecting request")
  3057  				}
  3058  				return nil
  3059  			})
  3060  
  3061  			require.Regexp(t, "injected", txn.CommitInBatch(ctx, b))
  3062  			err = kvclientutils.CheckPushResult(
  3063  				ctx, db, *origTxn, kvclientutils.ExpectAborted, tc.txnRecExpectation)
  3064  			require.NoError(t, err)
  3065  		})
  3066  	}
  3067  }
  3068  
  3069  // Test that we're being smart about the timestamp ranges that need to be
  3070  // refreshed: when span are refreshed, they only need to be checked for writes
  3071  // above the previous time when they've been refreshed, not from the
  3072  // transaction's original read timestamp. To wit, the following scenario should
  3073  // NOT result in a failed refresh:
  3074  // - txn starts at ts 100
  3075  // - someone else writes "a" @ 200
  3076  // - txn attempts to write "a" and is pushed to (200,1). The refresh succeeds.
  3077  // - txn reads something that has a value in [100,200]. For example, "a", which
  3078  //   it just wrote.
  3079  // - someone else writes "b" @ 300
  3080  // - txn attempts to write "b" and is pushed to (300,1). This refresh must also
  3081  //   succeed. If this Refresh request would check for values in the range
  3082  //   [100-300], it would fail (as it would find a@200). But since it only checks
  3083  //   for values in the range [200-300] (i.e. values written beyond the timestamp
  3084  //   that was refreshed before), we're good.
  3085  func TestRefreshNoFalsePositive(t *testing.T) {
  3086  	defer leaktest.AfterTest(t)()
  3087  	ctx := context.Background()
  3088  	s, _, db := serverutils.StartServer(t, base.TestServerArgs{})
  3089  	defer s.Stopper().Stop(ctx)
  3090  
  3091  	txn := db.NewTxn(ctx, "test")
  3092  	origTimestamp := txn.ReadTimestamp()
  3093  	log.Infof(ctx, "test txn starting @ %s", origTimestamp)
  3094  	require.NoError(t, db.Put(ctx, "a", "test"))
  3095  	// Attempt to overwrite b, which will result in a push.
  3096  	require.NoError(t, txn.Put(ctx, "a", "test2"))
  3097  	afterPush := txn.ReadTimestamp()
  3098  	require.True(t, origTimestamp.Less(afterPush))
  3099  	log.Infof(ctx, "txn pushed to %s", afterPush)
  3100  
  3101  	// Read a so that we have to refresh it when we're pushed again.
  3102  	_, err := txn.Get(ctx, "a")
  3103  	require.NoError(t, err)
  3104  
  3105  	require.NoError(t, db.Put(ctx, "b", "test"))
  3106  
  3107  	// Attempt to overwrite b, which will result in another push. The point of the
  3108  	// test is to check that this push succeeds in refreshing "a".
  3109  	log.Infof(ctx, "test txn writing b")
  3110  	require.NoError(t, txn.Put(ctx, "b", "test2"))
  3111  	require.True(t, afterPush.Less(txn.ReadTimestamp()))
  3112  	log.Infof(ctx, "txn pushed to %s", txn.ReadTimestamp())
  3113  
  3114  	require.NoError(t, txn.Commit(ctx))
  3115  }