github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/kv/kvclient/kvcoord/dist_sender_test.go (about)

     1  // Copyright 2014 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package kvcoord
    12  
    13  import (
    14  	"bytes"
    15  	"context"
    16  	"fmt"
    17  	"reflect"
    18  	"regexp"
    19  	"sort"
    20  	"strconv"
    21  	"sync"
    22  	"sync/atomic"
    23  	"testing"
    24  	"time"
    25  
    26  	"github.com/cockroachdb/cockroach/pkg/base"
    27  	"github.com/cockroachdb/cockroach/pkg/config/zonepb"
    28  	"github.com/cockroachdb/cockroach/pkg/gossip"
    29  	"github.com/cockroachdb/cockroach/pkg/gossip/simulation"
    30  	"github.com/cockroachdb/cockroach/pkg/keys"
    31  	"github.com/cockroachdb/cockroach/pkg/kv"
    32  	"github.com/cockroachdb/cockroach/pkg/roachpb"
    33  	"github.com/cockroachdb/cockroach/pkg/rpc"
    34  	"github.com/cockroachdb/cockroach/pkg/rpc/nodedialer"
    35  	"github.com/cockroachdb/cockroach/pkg/settings/cluster"
    36  	"github.com/cockroachdb/cockroach/pkg/testutils"
    37  	"github.com/cockroachdb/cockroach/pkg/util"
    38  	"github.com/cockroachdb/cockroach/pkg/util/hlc"
    39  	"github.com/cockroachdb/cockroach/pkg/util/leaktest"
    40  	"github.com/cockroachdb/cockroach/pkg/util/log"
    41  	"github.com/cockroachdb/cockroach/pkg/util/metric"
    42  	"github.com/cockroachdb/cockroach/pkg/util/retry"
    43  	"github.com/cockroachdb/cockroach/pkg/util/stop"
    44  	"github.com/cockroachdb/cockroach/pkg/util/tracing"
    45  	"github.com/cockroachdb/cockroach/pkg/util/uuid"
    46  	"github.com/cockroachdb/errors"
    47  	"github.com/stretchr/testify/assert"
    48  	"github.com/stretchr/testify/require"
    49  )
    50  
    51  var (
    52  	//
    53  	// Meta RangeDescriptors
    54  	//
    55  	testMetaEndKey = roachpb.RKey(keys.SystemPrefix)
    56  	// single meta1 and meta2 range with one replica.
    57  	testMetaRangeDescriptor = roachpb.RangeDescriptor{
    58  		RangeID:  1,
    59  		StartKey: roachpb.RKeyMin,
    60  		EndKey:   testMetaEndKey,
    61  		InternalReplicas: []roachpb.ReplicaDescriptor{
    62  			{
    63  				NodeID:  1,
    64  				StoreID: 1,
    65  			},
    66  		},
    67  	}
    68  
    69  	//
    70  	// User-Space RangeDescriptors
    71  	//
    72  	// single user-space descriptor with one replica.
    73  	testUserRangeDescriptor = roachpb.RangeDescriptor{
    74  		RangeID:  2,
    75  		StartKey: testMetaEndKey,
    76  		EndKey:   roachpb.RKeyMax,
    77  		InternalReplicas: []roachpb.ReplicaDescriptor{
    78  			{
    79  				NodeID:  1,
    80  				StoreID: 1,
    81  			},
    82  		},
    83  	}
    84  	// single user-space descriptor with three replicas.
    85  	testUserRangeDescriptor3Replicas = roachpb.RangeDescriptor{
    86  		RangeID:  2,
    87  		StartKey: testMetaEndKey,
    88  		EndKey:   roachpb.RKeyMax,
    89  		InternalReplicas: []roachpb.ReplicaDescriptor{
    90  			{
    91  				NodeID:  1,
    92  				StoreID: 1,
    93  			},
    94  			{
    95  				NodeID:  2,
    96  				StoreID: 2,
    97  			},
    98  			{
    99  				NodeID:  3,
   100  				StoreID: 3,
   101  			},
   102  		},
   103  	}
   104  )
   105  
   106  var testAddress = util.NewUnresolvedAddr("tcp", "node1")
   107  
   108  // simpleSendFn is the function type used to dispatch RPC calls in simpleTransportAdapter
   109  type simpleSendFn func(
   110  	context.Context,
   111  	SendOptions,
   112  	ReplicaSlice,
   113  	roachpb.BatchRequest,
   114  ) (*roachpb.BatchResponse, error)
   115  
   116  // stubRPCSendFn is an rpcSendFn that simply creates a reply for the
   117  // BatchRequest without performing an RPC call or triggering any
   118  // test instrumentation.
   119  var stubRPCSendFn simpleSendFn = func(
   120  	_ context.Context, _ SendOptions, _ ReplicaSlice, args roachpb.BatchRequest,
   121  ) (*roachpb.BatchResponse, error) {
   122  	return args.CreateReply(), nil
   123  }
   124  
   125  // adaptSimpleTransport converts the RPCSend functions used in these
   126  // tests to the newer transport interface.
   127  func adaptSimpleTransport(fn simpleSendFn) TransportFactory {
   128  	return func(
   129  		opts SendOptions,
   130  		nodeDialer *nodedialer.Dialer,
   131  		replicas ReplicaSlice,
   132  	) (Transport, error) {
   133  		return &simpleTransportAdapter{
   134  			fn:       fn,
   135  			opts:     opts,
   136  			replicas: replicas}, nil
   137  	}
   138  }
   139  
   140  type simpleTransportAdapter struct {
   141  	fn          simpleSendFn
   142  	opts        SendOptions
   143  	replicas    ReplicaSlice
   144  	nextReplica int
   145  }
   146  
   147  func (l *simpleTransportAdapter) IsExhausted() bool {
   148  	return l.nextReplica >= len(l.replicas)
   149  }
   150  
   151  func (l *simpleTransportAdapter) SendNext(
   152  	ctx context.Context, ba roachpb.BatchRequest,
   153  ) (*roachpb.BatchResponse, error) {
   154  	ba.Replica = l.replicas[l.nextReplica].ReplicaDescriptor
   155  	l.nextReplica++
   156  	return l.fn(ctx, l.opts, l.replicas, ba)
   157  }
   158  
   159  func (l *simpleTransportAdapter) NextInternalClient(
   160  	ctx context.Context,
   161  ) (context.Context, roachpb.InternalClient, error) {
   162  	panic("unimplemented")
   163  }
   164  
   165  func (l *simpleTransportAdapter) NextReplica() roachpb.ReplicaDescriptor {
   166  	if !l.IsExhausted() {
   167  		return l.replicas[l.nextReplica].ReplicaDescriptor
   168  	}
   169  	return roachpb.ReplicaDescriptor{}
   170  }
   171  
   172  func (*simpleTransportAdapter) MoveToFront(roachpb.ReplicaDescriptor) {
   173  }
   174  
   175  func makeGossip(t *testing.T, stopper *stop.Stopper, rpcContext *rpc.Context) *gossip.Gossip {
   176  	server := rpc.NewServer(rpcContext)
   177  
   178  	const nodeID = 1
   179  	g := gossip.NewTest(nodeID, rpcContext, server, stopper, metric.NewRegistry(), zonepb.DefaultZoneConfigRef())
   180  	if err := g.SetNodeDescriptor(newNodeDesc(nodeID)); err != nil {
   181  		t.Fatal(err)
   182  	}
   183  	if err := g.AddInfo(gossip.KeySentinel, nil, time.Hour); err != nil {
   184  		t.Fatal(err)
   185  	}
   186  
   187  	return g
   188  }
   189  
   190  func newNodeDesc(nodeID roachpb.NodeID) *roachpb.NodeDescriptor {
   191  	return &roachpb.NodeDescriptor{
   192  		NodeID:  nodeID,
   193  		Address: util.MakeUnresolvedAddr("tcp", fmt.Sprintf("invalid.invalid:%d", nodeID)),
   194  	}
   195  }
   196  
   197  // TestSendRPCOrder verifies that sendRPC correctly takes into account the
   198  // lease holder, attributes and required consistency to determine where to send
   199  // remote requests.
   200  func TestSendRPCOrder(t *testing.T) {
   201  	defer leaktest.AfterTest(t)()
   202  	stopper := stop.NewStopper()
   203  	ctx := context.Background()
   204  	defer stopper.Stop(ctx)
   205  
   206  	clock := hlc.NewClock(hlc.UnixNano, time.Nanosecond)
   207  	rpcContext := rpc.NewInsecureTestingContext(clock, stopper)
   208  	g := makeGossip(t, stopper, rpcContext)
   209  	rangeID := roachpb.RangeID(99)
   210  
   211  	nodeTiers := map[int32][]roachpb.Tier{
   212  		1: {}, // The local node, set in each test case.
   213  		2: {roachpb.Tier{Key: "country", Value: "us"}, roachpb.Tier{Key: "region", Value: "west"}},
   214  		3: {roachpb.Tier{Key: "country", Value: "eu"}, roachpb.Tier{Key: "city", Value: "dublin"}},
   215  		4: {roachpb.Tier{Key: "country", Value: "us"}, roachpb.Tier{Key: "region", Value: "east"}, roachpb.Tier{Key: "city", Value: "nyc"}},
   216  		5: {roachpb.Tier{Key: "country", Value: "us"}, roachpb.Tier{Key: "region", Value: "east"}, roachpb.Tier{Key: "city", Value: "mia"}},
   217  	}
   218  
   219  	// Gets filled below to identify the replica by its address.
   220  	makeVerifier := func(expAddrs []roachpb.NodeID) func(SendOptions, ReplicaSlice) error {
   221  		return func(o SendOptions, replicas ReplicaSlice) error {
   222  			var actualAddrs []roachpb.NodeID
   223  			for i, r := range replicas {
   224  				if len(expAddrs) <= i {
   225  					return errors.Errorf("got unexpected address: %s", r.NodeDesc.Address)
   226  				}
   227  				if expAddrs[i] == 0 {
   228  					actualAddrs = append(actualAddrs, 0)
   229  				} else {
   230  					actualAddrs = append(actualAddrs, r.NodeDesc.NodeID)
   231  				}
   232  			}
   233  			if !reflect.DeepEqual(expAddrs, actualAddrs) {
   234  				return errors.Errorf("expected %d, but found %d", expAddrs, actualAddrs)
   235  			}
   236  			return nil
   237  		}
   238  	}
   239  
   240  	testCases := []struct {
   241  		args        roachpb.Request
   242  		tiers       []roachpb.Tier
   243  		expReplica  []roachpb.NodeID
   244  		leaseHolder int32 // 0 for not caching a lease holder.
   245  		// Naming is somewhat off, as eventually consistent reads usually
   246  		// do not have to go to the lease holder when a node has a read lease.
   247  		// Would really want CONSENSUS here, but that is not implemented.
   248  		// Likely a test setup here will never have a read lease, but good
   249  		// to keep in mind.
   250  		consistent bool
   251  	}{
   252  		// Inconsistent Scan without matching attributes.
   253  		{
   254  			args:       &roachpb.ScanRequest{},
   255  			tiers:      []roachpb.Tier{},
   256  			expReplica: []roachpb.NodeID{1, 2, 3, 4, 5},
   257  		},
   258  		// Inconsistent Scan with matching attributes.
   259  		// Should move the two nodes matching the attributes to the front and
   260  		// go stable.
   261  		{
   262  			args:  &roachpb.ScanRequest{},
   263  			tiers: nodeTiers[5],
   264  			// Compare only the first two resulting addresses.
   265  			expReplica: []roachpb.NodeID{5, 4, 0, 0, 0},
   266  		},
   267  
   268  		// Scan without matching attributes that requires but does not find
   269  		// a lease holder.
   270  		{
   271  			args:       &roachpb.ScanRequest{},
   272  			tiers:      []roachpb.Tier{},
   273  			expReplica: []roachpb.NodeID{1, 2, 3, 4, 5},
   274  			consistent: true,
   275  		},
   276  		// Put without matching attributes that requires but does not find lease holder.
   277  		// Should go random and not change anything.
   278  		{
   279  			args:       &roachpb.PutRequest{},
   280  			tiers:      []roachpb.Tier{{Key: "nomatch", Value: ""}},
   281  			expReplica: []roachpb.NodeID{1, 2, 3, 4, 5},
   282  		},
   283  		// Put with matching attributes but no lease holder.
   284  		// Should move the two nodes matching the attributes to the front.
   285  		{
   286  			args:  &roachpb.PutRequest{},
   287  			tiers: append(nodeTiers[5], roachpb.Tier{Key: "irrelevant", Value: ""}),
   288  			// Compare only the first two resulting addresses.
   289  			expReplica: []roachpb.NodeID{5, 4, 0, 0, 0},
   290  		},
   291  		// Put with matching attributes that finds the lease holder (node 3).
   292  		// Should address the lease holder and the two nodes matching the attributes
   293  		// (the last and second to last) in that order.
   294  		{
   295  			args:  &roachpb.PutRequest{},
   296  			tiers: append(nodeTiers[5], roachpb.Tier{Key: "irrelevant", Value: ""}),
   297  			// Compare only the first resulting address as we have a lease holder
   298  			// and that means we're only trying to send there.
   299  			expReplica:  []roachpb.NodeID{2, 0, 0, 0, 0},
   300  			leaseHolder: 2,
   301  		},
   302  		// Inconsistent Get without matching attributes but lease holder (node 3). Should just
   303  		// go random as the lease holder does not matter.
   304  		{
   305  			args:        &roachpb.GetRequest{},
   306  			tiers:       []roachpb.Tier{},
   307  			expReplica:  []roachpb.NodeID{1, 2, 3, 4, 5},
   308  			leaseHolder: 2,
   309  		},
   310  	}
   311  
   312  	descriptor := roachpb.RangeDescriptor{
   313  		StartKey:      roachpb.RKeyMin,
   314  		EndKey:        roachpb.RKeyMax,
   315  		RangeID:       rangeID,
   316  		NextReplicaID: 1,
   317  	}
   318  	for i := int32(1); i <= 5; i++ {
   319  		addr := util.MakeUnresolvedAddr("tcp", fmt.Sprintf("node%d:1", i))
   320  		nd := &roachpb.NodeDescriptor{
   321  			NodeID:  roachpb.NodeID(i),
   322  			Address: util.MakeUnresolvedAddr(addr.Network(), addr.String()),
   323  			Locality: roachpb.Locality{
   324  				Tiers: nodeTiers[i],
   325  			},
   326  		}
   327  		if err := g.AddInfoProto(gossip.MakeNodeIDKey(roachpb.NodeID(i)), nd, time.Hour); err != nil {
   328  			t.Fatal(err)
   329  		}
   330  		descriptor.AddReplica(roachpb.NodeID(i), roachpb.StoreID(i), roachpb.VOTER_FULL)
   331  	}
   332  
   333  	// Stub to be changed in each test case.
   334  	var verifyCall func(SendOptions, ReplicaSlice) error
   335  
   336  	var testFn simpleSendFn = func(
   337  		_ context.Context,
   338  		opts SendOptions,
   339  		replicas ReplicaSlice,
   340  		args roachpb.BatchRequest,
   341  	) (*roachpb.BatchResponse, error) {
   342  		if err := verifyCall(opts, replicas); err != nil {
   343  			return nil, err
   344  		}
   345  		return args.CreateReply(), nil
   346  	}
   347  
   348  	cfg := DistSenderConfig{
   349  		AmbientCtx: log.AmbientContext{Tracer: tracing.NewTracer()},
   350  		Clock:      clock,
   351  		RPCContext: rpcContext,
   352  		TestingKnobs: ClientTestingKnobs{
   353  			TransportFactory: adaptSimpleTransport(testFn),
   354  		},
   355  		RangeDescriptorDB: mockRangeDescriptorDBForDescs(descriptor),
   356  		NodeDialer:        nodedialer.New(rpcContext, gossip.AddressResolver(g)),
   357  		Settings:          cluster.MakeTestingClusterSettings(),
   358  	}
   359  
   360  	ds := NewDistSender(cfg, g)
   361  
   362  	for n, tc := range testCases {
   363  		t.Run("", func(t *testing.T) {
   364  			verifyCall = makeVerifier(tc.expReplica)
   365  
   366  			{
   367  				// The local node needs to get its attributes during sendRPC.
   368  				nd := &roachpb.NodeDescriptor{
   369  					NodeID:  6,
   370  					Address: util.MakeUnresolvedAddr("tcp", fmt.Sprintf("invalid.invalid:6")),
   371  					Locality: roachpb.Locality{
   372  						Tiers: tc.tiers,
   373  					},
   374  				}
   375  				g.NodeID.Reset(nd.NodeID)
   376  				if err := g.SetNodeDescriptor(nd); err != nil {
   377  					t.Fatal(err)
   378  				}
   379  			}
   380  
   381  			ds.leaseHolderCache.Update(
   382  				ctx, rangeID, roachpb.StoreID(0),
   383  			)
   384  			if tc.leaseHolder > 0 {
   385  				ds.leaseHolderCache.Update(
   386  					ctx, rangeID, descriptor.InternalReplicas[tc.leaseHolder-1].StoreID,
   387  				)
   388  			}
   389  
   390  			args := tc.args
   391  			{
   392  				header := args.Header()
   393  				header.Key = roachpb.Key("a")
   394  				args.SetHeader(header)
   395  			}
   396  			if roachpb.IsRange(args) {
   397  				header := args.Header()
   398  				header.EndKey = args.Header().Key.Next()
   399  				args.SetHeader(header)
   400  			}
   401  			consistency := roachpb.CONSISTENT
   402  			if !tc.consistent {
   403  				consistency = roachpb.INCONSISTENT
   404  			}
   405  			// Kill the cached NodeDescriptor, enforcing a lookup from Gossip.
   406  			ds.nodeDescriptor = nil
   407  			if _, err := kv.SendWrappedWith(ctx, ds, roachpb.Header{
   408  				RangeID:         rangeID, // Not used in this test, but why not.
   409  				ReadConsistency: consistency,
   410  			}, args); err != nil {
   411  				t.Errorf("%d: %s", n, err)
   412  			}
   413  		})
   414  	}
   415  }
   416  
   417  // MockRangeDescriptorDB is an implementation of RangeDescriptorDB. Unlike
   418  // DistSender's implementation, MockRangeDescriptorDB does not call back into
   419  // the RangeDescriptorCache by default to perform RangeLookups. Because of this,
   420  // tests should not rely on that behavior and should implement it themselves if
   421  // they need it.
   422  type MockRangeDescriptorDB func(roachpb.RKey, bool) (rs, preRs []roachpb.RangeDescriptor, err error)
   423  
   424  func (mdb MockRangeDescriptorDB) RangeLookup(
   425  	ctx context.Context, key roachpb.RKey, useReverseScan bool,
   426  ) ([]roachpb.RangeDescriptor, []roachpb.RangeDescriptor, error) {
   427  	return mdb(key, useReverseScan)
   428  }
   429  
   430  func (mdb MockRangeDescriptorDB) FirstRange() (*roachpb.RangeDescriptor, error) {
   431  	rs, _, err := mdb(roachpb.RKey(roachpb.KeyMin), false)
   432  	if err != nil || len(rs) == 0 {
   433  		return nil, err
   434  	}
   435  	return &rs[0], nil
   436  }
   437  
   438  // withMetaRecursion returns a new MockRangeDescriptorDB that will behave the
   439  // same as the receiver, but will also recurse into the provided
   440  // RangeDescriptorCache on each lookup to simulate the use of a descriptor's
   441  // parent descriptor during the RangeLookup scan. This is important for tests
   442  // that expect the RangeLookup for a user space descriptor to trigger a lookup
   443  // for a meta descriptor.
   444  func (mdb MockRangeDescriptorDB) withMetaRecursion(
   445  	rdc *RangeDescriptorCache,
   446  ) MockRangeDescriptorDB {
   447  	return func(key roachpb.RKey, useReverseScan bool) (rs, preRs []roachpb.RangeDescriptor, err error) {
   448  		metaKey := keys.RangeMetaKey(key)
   449  		if !metaKey.Equal(roachpb.RKeyMin) {
   450  			_, _, err := rdc.LookupRangeDescriptorWithEvictionToken(context.Background(), metaKey, nil, useReverseScan)
   451  			if err != nil {
   452  				return nil, nil, err
   453  			}
   454  		}
   455  		return mdb(key, useReverseScan)
   456  	}
   457  }
   458  
   459  // withMetaRecursion calls MockRangeDescriptorDB.withMetaRecursion on the
   460  // DistSender's RangeDescriptorDB.
   461  func (ds *DistSender) withMetaRecursion() *DistSender {
   462  	ds.rangeCache.db = ds.rangeCache.db.(MockRangeDescriptorDB).withMetaRecursion(ds.rangeCache)
   463  	return ds
   464  }
   465  
   466  func mockRangeDescriptorDBForDescs(descs ...roachpb.RangeDescriptor) MockRangeDescriptorDB {
   467  	return MockRangeDescriptorDB(func(key roachpb.RKey, useReverseScan bool) ([]roachpb.RangeDescriptor, []roachpb.RangeDescriptor, error) {
   468  		var matchingDescs []roachpb.RangeDescriptor
   469  		for _, desc := range descs {
   470  			contains := desc.ContainsKey
   471  			if useReverseScan {
   472  				contains = desc.ContainsKeyInverted
   473  			}
   474  			if contains(key) {
   475  				matchingDescs = append(matchingDescs, desc)
   476  			}
   477  		}
   478  		switch len(matchingDescs) {
   479  		case 0:
   480  			panic(fmt.Sprintf("found no matching descriptors for key %s", key))
   481  		case 1:
   482  			return matchingDescs, nil, nil
   483  		default:
   484  			panic(fmt.Sprintf("found multiple matching descriptors for key %s: %v", key, matchingDescs))
   485  		}
   486  	})
   487  }
   488  
   489  var defaultMockRangeDescriptorDB = mockRangeDescriptorDBForDescs(
   490  	testMetaRangeDescriptor,
   491  	testUserRangeDescriptor,
   492  )
   493  var threeReplicaMockRangeDescriptorDB = mockRangeDescriptorDBForDescs(
   494  	testMetaRangeDescriptor,
   495  	testUserRangeDescriptor3Replicas,
   496  )
   497  
   498  func TestImmutableBatchArgs(t *testing.T) {
   499  	defer leaktest.AfterTest(t)()
   500  	stopper := stop.NewStopper()
   501  	defer stopper.Stop(context.Background())
   502  
   503  	clock := hlc.NewClock(hlc.UnixNano, time.Nanosecond)
   504  	rpcContext := rpc.NewInsecureTestingContext(clock, stopper)
   505  	g := makeGossip(t, stopper, rpcContext)
   506  	var testFn simpleSendFn = func(
   507  		_ context.Context,
   508  		_ SendOptions,
   509  		_ ReplicaSlice,
   510  		args roachpb.BatchRequest,
   511  	) (*roachpb.BatchResponse, error) {
   512  		reply := args.CreateReply()
   513  		reply.Txn = args.Txn.Clone()
   514  		reply.Txn.WriteTimestamp = hlc.MaxTimestamp
   515  		return reply, nil
   516  	}
   517  
   518  	cfg := DistSenderConfig{
   519  		AmbientCtx: log.AmbientContext{Tracer: tracing.NewTracer()},
   520  		Clock:      clock,
   521  		RPCContext: rpcContext,
   522  		TestingKnobs: ClientTestingKnobs{
   523  			TransportFactory: adaptSimpleTransport(testFn),
   524  		},
   525  		RangeDescriptorDB: defaultMockRangeDescriptorDB,
   526  		NodeDialer:        nodedialer.New(rpcContext, gossip.AddressResolver(g)),
   527  		Settings:          cluster.MakeTestingClusterSettings(),
   528  	}
   529  
   530  	ds := NewDistSender(cfg, g)
   531  
   532  	txn := roachpb.MakeTransaction(
   533  		"test", nil /* baseKey */, roachpb.NormalUserPriority,
   534  		clock.Now(), clock.MaxOffset().Nanoseconds(),
   535  	)
   536  	origTxnTs := txn.WriteTimestamp
   537  
   538  	// An optimization does copy-on-write if we haven't observed anything,
   539  	// so make sure we're not in that case.
   540  	txn.UpdateObservedTimestamp(1, hlc.MaxTimestamp)
   541  
   542  	put := roachpb.NewPut(roachpb.Key("don't"), roachpb.Value{})
   543  	if _, pErr := kv.SendWrappedWith(context.Background(), ds, roachpb.Header{
   544  		Txn: &txn,
   545  	}, put); pErr != nil {
   546  		t.Fatal(pErr)
   547  	}
   548  
   549  	if txn.WriteTimestamp != origTxnTs {
   550  		t.Fatal("Transaction was mutated by DistSender")
   551  	}
   552  }
   553  
   554  // TestRetryOnNotLeaseHolderError verifies that the DistSender correctly updates the
   555  // lease holder cache and retries when receiving a NotLeaseHolderError.
   556  func TestRetryOnNotLeaseHolderError(t *testing.T) {
   557  	defer leaktest.AfterTest(t)()
   558  	stopper := stop.NewStopper()
   559  	defer stopper.Stop(context.Background())
   560  
   561  	clock := hlc.NewClock(hlc.UnixNano, time.Nanosecond)
   562  	rpcContext := rpc.NewInsecureTestingContext(clock, stopper)
   563  	g := makeGossip(t, stopper, rpcContext)
   564  	leaseHolder := roachpb.ReplicaDescriptor{
   565  		NodeID:  99,
   566  		StoreID: 999,
   567  	}
   568  	first := true
   569  
   570  	var testFn simpleSendFn = func(
   571  		_ context.Context,
   572  		_ SendOptions,
   573  		_ ReplicaSlice,
   574  		args roachpb.BatchRequest,
   575  	) (*roachpb.BatchResponse, error) {
   576  		reply := &roachpb.BatchResponse{}
   577  		if first {
   578  			reply.Error = roachpb.NewError(
   579  				&roachpb.NotLeaseHolderError{LeaseHolder: &leaseHolder})
   580  			first = false
   581  			return reply, nil
   582  		}
   583  		// Return an error to avoid activating a code path that would
   584  		// populate the leaseholder cache from the successful response.
   585  		// That's not what this test wants to test.
   586  		reply.Error = roachpb.NewErrorf("boom")
   587  		return reply, nil
   588  	}
   589  
   590  	cfg := DistSenderConfig{
   591  		AmbientCtx: log.AmbientContext{Tracer: tracing.NewTracer()},
   592  		Clock:      clock,
   593  		RPCContext: rpcContext,
   594  		TestingKnobs: ClientTestingKnobs{
   595  			TransportFactory: adaptSimpleTransport(testFn),
   596  		},
   597  		RangeDescriptorDB: defaultMockRangeDescriptorDB,
   598  		NodeDialer:        nodedialer.New(rpcContext, gossip.AddressResolver(g)),
   599  		Settings:          cluster.MakeTestingClusterSettings(),
   600  	}
   601  	ds := NewDistSender(cfg, g)
   602  	v := roachpb.MakeValueFromString("value")
   603  	put := roachpb.NewPut(roachpb.Key("a"), v)
   604  	if _, pErr := kv.SendWrapped(context.Background(), ds, put); !testutils.IsPError(pErr, "boom") {
   605  		t.Fatalf("unexpected error: %v", pErr)
   606  	}
   607  	if first {
   608  		t.Errorf("The command did not retry")
   609  	}
   610  	rangeID := roachpb.RangeID(2)
   611  	if cur, ok := ds.leaseHolderCache.Lookup(context.Background(), rangeID); !ok {
   612  		t.Errorf("lease holder cache was not updated: expected %+v", leaseHolder)
   613  	} else if cur != leaseHolder.StoreID {
   614  		t.Errorf("lease holder cache was not updated: expected %d, got %d", leaseHolder.StoreID, cur)
   615  	}
   616  }
   617  
   618  // TestBackoffOnNotLeaseHolderErrorDuringTransfer verifies that the DistSender
   619  // backs off upon receiving multiple NotLeaseHolderErrors without observing an
   620  // increase in LeaseSequence.
   621  func TestBackoffOnNotLeaseHolderErrorDuringTransfer(t *testing.T) {
   622  	defer leaktest.AfterTest(t)()
   623  	stopper := stop.NewStopper()
   624  	defer stopper.Stop(context.Background())
   625  
   626  	clock := hlc.NewClock(hlc.UnixNano, time.Nanosecond)
   627  	rpcContext := rpc.NewInsecureTestingContext(clock, stopper)
   628  	g := makeGossip(t, stopper, rpcContext)
   629  	leaseHolders := testUserRangeDescriptor3Replicas.InternalReplicas
   630  	for _, n := range leaseHolders {
   631  		if err := g.AddInfoProto(
   632  			gossip.MakeNodeIDKey(n.NodeID),
   633  			newNodeDesc(n.NodeID),
   634  			gossip.NodeDescriptorTTL,
   635  		); err != nil {
   636  			t.Fatal(err)
   637  		}
   638  	}
   639  	var sequences []roachpb.LeaseSequence
   640  	var testFn simpleSendFn = func(
   641  		_ context.Context,
   642  		_ SendOptions,
   643  		_ ReplicaSlice,
   644  		args roachpb.BatchRequest,
   645  	) (*roachpb.BatchResponse, error) {
   646  		reply := &roachpb.BatchResponse{}
   647  		if len(sequences) > 0 {
   648  			seq := sequences[0]
   649  			sequences = sequences[1:]
   650  			lease := roachpb.Lease{
   651  				Sequence: seq,
   652  				Replica:  leaseHolders[int(seq)%2],
   653  			}
   654  			reply.Error = roachpb.NewError(
   655  				&roachpb.NotLeaseHolderError{
   656  					Replica:     leaseHolders[int(seq)%2],
   657  					LeaseHolder: &leaseHolders[(int(seq)+1)%2],
   658  					Lease:       &lease,
   659  				})
   660  			return reply, nil
   661  		}
   662  		// Return an error to bail out of retries.
   663  		reply.Error = roachpb.NewErrorf("boom")
   664  		return reply, nil
   665  	}
   666  
   667  	cfg := DistSenderConfig{
   668  		AmbientCtx: log.AmbientContext{Tracer: tracing.NewTracer()},
   669  		Clock:      clock,
   670  		RPCContext: rpcContext,
   671  		TestingKnobs: ClientTestingKnobs{
   672  			TransportFactory: adaptSimpleTransport(testFn),
   673  		},
   674  		RangeDescriptorDB: threeReplicaMockRangeDescriptorDB,
   675  		NodeDialer:        nodedialer.New(rpcContext, gossip.AddressResolver(g)),
   676  		RPCRetryOptions: &retry.Options{
   677  			InitialBackoff: time.Microsecond,
   678  			MaxBackoff:     time.Microsecond,
   679  		},
   680  		Settings: cluster.MakeTestingClusterSettings(),
   681  	}
   682  	for i, c := range []struct {
   683  		leaseSequences []roachpb.LeaseSequence
   684  		expected       int64
   685  	}{
   686  		{[]roachpb.LeaseSequence{1, 0, 1, 2}, 2},
   687  		{[]roachpb.LeaseSequence{0}, 0},
   688  		{[]roachpb.LeaseSequence{1, 0, 1, 2, 1}, 3},
   689  	} {
   690  		sequences = c.leaseSequences
   691  		ds := NewDistSender(cfg, g)
   692  		v := roachpb.MakeValueFromString("value")
   693  		put := roachpb.NewPut(roachpb.Key("a"), v)
   694  		if _, pErr := kv.SendWrapped(context.Background(), ds, put); !testutils.IsPError(pErr, "boom") {
   695  			t.Fatalf("%d: unexpected error: %v", i, pErr)
   696  		}
   697  		if got := ds.Metrics().InLeaseTransferBackoffs.Count(); got != c.expected {
   698  			t.Fatalf("%d: expected %d backoffs, got %d", i, c.expected, got)
   699  		}
   700  	}
   701  }
   702  
   703  // This test verifies that when we have a cached leaseholder that is down
   704  // it is ejected from the cache.
   705  func TestDistSenderDownNodeEvictLeaseholder(t *testing.T) {
   706  	defer leaktest.AfterTest(t)()
   707  
   708  	ctx := context.Background()
   709  	stopper := stop.NewStopper()
   710  	defer stopper.Stop(ctx)
   711  
   712  	clock := hlc.NewClock(hlc.UnixNano, time.Nanosecond)
   713  	rpcContext := rpc.NewInsecureTestingContext(clock, stopper)
   714  	g := makeGossip(t, stopper, rpcContext)
   715  	if err := g.AddInfoProto(
   716  		gossip.MakeNodeIDKey(roachpb.NodeID(2)),
   717  		newNodeDesc(2),
   718  		gossip.NodeDescriptorTTL,
   719  	); err != nil {
   720  		t.Fatal(err)
   721  	}
   722  
   723  	var contacted1, contacted2 bool
   724  
   725  	transport := func(
   726  		ctx context.Context,
   727  		opts SendOptions,
   728  		replicas ReplicaSlice,
   729  		ba roachpb.BatchRequest,
   730  	) (*roachpb.BatchResponse, error) {
   731  		switch ba.Replica.StoreID {
   732  		case 1:
   733  			contacted1 = true
   734  			return nil, errors.New("mock RPC error")
   735  		case 2:
   736  			contacted2 = true
   737  			return ba.CreateReply(), nil
   738  		default:
   739  			panic("unexpected replica: " + ba.Replica.String())
   740  		}
   741  	}
   742  
   743  	cfg := DistSenderConfig{
   744  		AmbientCtx: log.AmbientContext{Tracer: tracing.NewTracer()},
   745  		Clock:      clock,
   746  		RPCContext: rpcContext,
   747  		TestingKnobs: ClientTestingKnobs{
   748  			TransportFactory: adaptSimpleTransport(transport),
   749  		},
   750  		RangeDescriptorDB: mockRangeDescriptorDBForDescs(
   751  			roachpb.RangeDescriptor{
   752  				RangeID:  1,
   753  				StartKey: roachpb.RKeyMin,
   754  				EndKey:   roachpb.RKeyMax,
   755  				InternalReplicas: []roachpb.ReplicaDescriptor{
   756  					{
   757  						NodeID:  1,
   758  						StoreID: 1,
   759  					},
   760  					{
   761  						NodeID:  2,
   762  						StoreID: 2,
   763  					},
   764  				},
   765  			}),
   766  		NodeDialer: nodedialer.New(rpcContext, gossip.AddressResolver(g)),
   767  		Settings:   cluster.MakeTestingClusterSettings(),
   768  	}
   769  
   770  	ds := NewDistSender(cfg, g)
   771  	ds.LeaseHolderCache().Update(ctx, roachpb.RangeID(1), roachpb.StoreID(1))
   772  
   773  	var ba roachpb.BatchRequest
   774  	ba.RangeID = 1
   775  	get := &roachpb.GetRequest{}
   776  	get.Key = roachpb.Key("a")
   777  	ba.Add(get)
   778  
   779  	if _, pErr := ds.Send(ctx, ba); pErr != nil {
   780  		t.Fatal(pErr)
   781  	}
   782  
   783  	if !contacted1 || !contacted2 {
   784  		t.Errorf("contacted n1: %t, contacted n2: %t", contacted1, contacted2)
   785  	}
   786  
   787  	if storeID, ok := ds.LeaseHolderCache().Lookup(ctx, roachpb.RangeID(1)); !ok {
   788  		t.Fatalf("expected new leaseholder to be cached")
   789  	} else if exp := roachpb.StoreID(2); storeID != exp {
   790  		t.Fatalf("expected lease holder for r1 to be cached as s%d, but got s%d", exp, storeID)
   791  	}
   792  }
   793  
   794  // TestRetryOnDescriptorLookupError verifies that the DistSender retries a descriptor
   795  // lookup on any error.
   796  func TestRetryOnDescriptorLookupError(t *testing.T) {
   797  	defer leaktest.AfterTest(t)()
   798  	stopper := stop.NewStopper()
   799  	defer stopper.Stop(context.Background())
   800  
   801  	clock := hlc.NewClock(hlc.UnixNano, time.Nanosecond)
   802  	rpcContext := rpc.NewInsecureTestingContext(clock, stopper)
   803  	g := makeGossip(t, stopper, rpcContext)
   804  
   805  	errs := []error{
   806  		errors.New("boom"),
   807  		nil,
   808  	}
   809  
   810  	cfg := DistSenderConfig{
   811  		AmbientCtx: log.AmbientContext{Tracer: tracing.NewTracer()},
   812  		Clock:      clock,
   813  		RPCContext: rpcContext,
   814  		TestingKnobs: ClientTestingKnobs{
   815  			TransportFactory: adaptSimpleTransport(stubRPCSendFn),
   816  		},
   817  		RangeDescriptorDB: MockRangeDescriptorDB(func(key roachpb.RKey, _ bool) ([]roachpb.RangeDescriptor, []roachpb.RangeDescriptor, error) {
   818  			// Don't return an error on the FirstRange lookup.
   819  			if key.Equal(roachpb.KeyMin) {
   820  				return []roachpb.RangeDescriptor{testMetaRangeDescriptor}, nil, nil
   821  			}
   822  
   823  			// Return next error and truncate the prefix of the errors array.
   824  			err := errs[0]
   825  			errs = errs[1:]
   826  			return []roachpb.RangeDescriptor{testUserRangeDescriptor}, nil, err
   827  		}),
   828  		NodeDialer: nodedialer.New(rpcContext, gossip.AddressResolver(g)),
   829  		Settings:   cluster.MakeTestingClusterSettings(),
   830  	}
   831  	ds := NewDistSender(cfg, g)
   832  	put := roachpb.NewPut(roachpb.Key("a"), roachpb.MakeValueFromString("value"))
   833  	// Error on descriptor lookup, second attempt successful.
   834  	if _, pErr := kv.SendWrapped(context.Background(), ds, put); pErr != nil {
   835  		t.Errorf("unexpected error: %s", pErr)
   836  	}
   837  	if len(errs) != 0 {
   838  		t.Fatalf("expected more descriptor lookups, leftover errs: %+v", errs)
   839  	}
   840  }
   841  
   842  // TestEvictOnFirstRangeGossip verifies that we evict the first range
   843  // descriptor from the descriptor cache when a gossip update is received for
   844  // the first range.
   845  func TestEvictOnFirstRangeGossip(t *testing.T) {
   846  	defer leaktest.AfterTest(t)()
   847  
   848  	stopper := stop.NewStopper()
   849  	defer stopper.Stop(context.Background())
   850  
   851  	clock := hlc.NewClock(hlc.UnixNano, time.Nanosecond)
   852  	rpcContext := rpc.NewInsecureTestingContext(clock, stopper)
   853  	g := makeGossip(t, stopper, rpcContext)
   854  
   855  	sender := func(
   856  		_ context.Context, ba roachpb.BatchRequest,
   857  	) (*roachpb.BatchResponse, *roachpb.Error) {
   858  		return ba.CreateReply(), nil
   859  	}
   860  
   861  	desc := roachpb.RangeDescriptor{
   862  		RangeID:  1,
   863  		StartKey: roachpb.RKeyMin,
   864  		EndKey:   roachpb.RKeyMax,
   865  		InternalReplicas: []roachpb.ReplicaDescriptor{
   866  			{
   867  				NodeID:  1,
   868  				StoreID: 1,
   869  			},
   870  		},
   871  	}
   872  
   873  	var numFirstRange int32
   874  	rDB := MockRangeDescriptorDB(func(key roachpb.RKey, _ bool) (
   875  		[]roachpb.RangeDescriptor, []roachpb.RangeDescriptor, error,
   876  	) {
   877  		if key.Equal(roachpb.KeyMin) {
   878  			atomic.AddInt32(&numFirstRange, 1)
   879  		}
   880  		return []roachpb.RangeDescriptor{desc}, nil, nil
   881  	})
   882  
   883  	cfg := DistSenderConfig{
   884  		AmbientCtx: log.AmbientContext{Tracer: tracing.NewTracer()},
   885  		Clock:      clock,
   886  		RPCContext: rpcContext,
   887  		TestingKnobs: ClientTestingKnobs{
   888  			TransportFactory: SenderTransportFactory(
   889  				tracing.NewTracer(),
   890  				kv.SenderFunc(sender),
   891  			),
   892  		},
   893  		RangeDescriptorDB: rDB,
   894  		NodeDialer:        nodedialer.New(rpcContext, gossip.AddressResolver(g)),
   895  		Settings:          cluster.MakeTestingClusterSettings(),
   896  	}
   897  
   898  	ds := NewDistSender(cfg, g).withMetaRecursion()
   899  
   900  	anyKey := roachpb.Key("anything")
   901  	rAnyKey := keys.MustAddr(anyKey)
   902  
   903  	call := func() {
   904  		if _, _, err := ds.rangeCache.LookupRangeDescriptorWithEvictionToken(
   905  			context.Background(), rAnyKey, nil, false,
   906  		); err != nil {
   907  			t.Fatal(err)
   908  		}
   909  	}
   910  
   911  	// Perform multiple calls and check that the first range is only looked up
   912  	// once, with subsequent calls hitting the cache.
   913  	//
   914  	// This potentially races with the cache-evicting gossip callback on the
   915  	// first range, so it is important that the first range descriptor's state
   916  	// in gossip is stable from this point forward.
   917  	for i := 0; i < 3; i++ {
   918  		call()
   919  		if num := atomic.LoadInt32(&numFirstRange); num != 1 {
   920  			t.Fatalf("expected one first range lookup, got %d", num)
   921  		}
   922  	}
   923  	// Tweak the descriptor so that the gossip callback will be invoked.
   924  	desc.Generation = 1
   925  	if err := g.AddInfoProto(gossip.KeyFirstRangeDescriptor, &desc, 0); err != nil {
   926  		t.Fatal(err)
   927  	}
   928  
   929  	// Once Gossip fires the callbacks, we should see a cache eviction and thus,
   930  	// a new cache hit.
   931  	testutils.SucceedsSoon(t, func() error {
   932  		call()
   933  		if exp, act := int32(2), atomic.LoadInt32(&numFirstRange); exp != act {
   934  			return errors.Errorf("expected %d first range lookups, got %d", exp, act)
   935  		}
   936  		return nil
   937  	})
   938  }
   939  
   940  func TestEvictCacheOnError(t *testing.T) {
   941  	defer leaktest.AfterTest(t)()
   942  	// The first attempt gets a BatchResponse with replicaError in the header, if
   943  	// replicaError set. If not set, the first attempt gets an RPC error. The
   944  	// second attempt, if any, succeeds.
   945  	// Currently lease holder and cached range descriptor are treated equally.
   946  	// TODO(bdarnell): refactor to cover different types of retryable errors.
   947  	const errString = "boom"
   948  	testDesc := roachpb.RangeDescriptor{
   949  		RangeID:  1,
   950  		StartKey: testMetaEndKey,
   951  		EndKey:   roachpb.RKeyMax,
   952  		InternalReplicas: []roachpb.ReplicaDescriptor{
   953  			{
   954  				NodeID:  1,
   955  				StoreID: 1,
   956  			},
   957  		},
   958  	}
   959  
   960  	testCases := []struct {
   961  		canceledCtx            bool
   962  		replicaError           error
   963  		shouldClearLeaseHolder bool
   964  		shouldClearReplica     bool
   965  	}{
   966  		{false, errors.New(errString), false, false},                                     // non-retryable replica error
   967  		{false, &roachpb.RangeKeyMismatchError{MismatchedRange: testDesc}, false, false}, // RangeKeyMismatch replica error
   968  		{false, &roachpb.RangeNotFoundError{}, false, false},                             // RangeNotFound replica error
   969  		{false, nil, false, false},                                                       // RPC error
   970  		{true, nil, false, false},                                                        // canceled context
   971  	}
   972  
   973  	for i, tc := range testCases {
   974  		stopper := stop.NewStopper()
   975  		defer stopper.Stop(context.Background())
   976  
   977  		clock := hlc.NewClock(hlc.UnixNano, time.Nanosecond)
   978  		rpcContext := rpc.NewInsecureTestingContext(clock, stopper)
   979  		g := makeGossip(t, stopper, rpcContext)
   980  		leaseHolder := roachpb.ReplicaDescriptor{
   981  			NodeID:  99,
   982  			StoreID: 999,
   983  		}
   984  		first := true
   985  
   986  		ctx, cancel := context.WithCancel(context.Background())
   987  
   988  		var testFn simpleSendFn = func(
   989  			ctx context.Context,
   990  			_ SendOptions,
   991  			_ ReplicaSlice,
   992  			args roachpb.BatchRequest,
   993  		) (*roachpb.BatchResponse, error) {
   994  			if !first {
   995  				return args.CreateReply(), nil
   996  			}
   997  			first = false
   998  			if tc.canceledCtx {
   999  				cancel()
  1000  				return nil, ctx.Err()
  1001  			}
  1002  			if tc.replicaError == nil {
  1003  				return nil, errors.New(errString)
  1004  			}
  1005  			reply := &roachpb.BatchResponse{}
  1006  			reply.Error = roachpb.NewError(tc.replicaError)
  1007  			return reply, nil
  1008  		}
  1009  
  1010  		cfg := DistSenderConfig{
  1011  			AmbientCtx: log.AmbientContext{Tracer: tracing.NewTracer()},
  1012  			Clock:      clock,
  1013  			RPCContext: rpcContext,
  1014  			TestingKnobs: ClientTestingKnobs{
  1015  				TransportFactory: adaptSimpleTransport(testFn),
  1016  			},
  1017  			RangeDescriptorDB: defaultMockRangeDescriptorDB,
  1018  			NodeDialer:        nodedialer.New(rpcContext, gossip.AddressResolver(g)),
  1019  			Settings:          cluster.MakeTestingClusterSettings(),
  1020  		}
  1021  		ds := NewDistSender(cfg, g)
  1022  		ds.leaseHolderCache.Update(context.Background(), 1, leaseHolder.StoreID)
  1023  		key := roachpb.Key("a")
  1024  		put := roachpb.NewPut(key, roachpb.MakeValueFromString("value"))
  1025  
  1026  		if _, pErr := kv.SendWrapped(ctx, ds, put); pErr != nil && !testutils.IsPError(pErr, errString) && !testutils.IsError(pErr.GoError(), ctx.Err().Error()) {
  1027  			t.Errorf("put encountered unexpected error: %s", pErr)
  1028  		}
  1029  		if _, ok := ds.leaseHolderCache.Lookup(context.Background(), 1); ok != !tc.shouldClearLeaseHolder {
  1030  			t.Errorf("%d: lease holder cache eviction: shouldClearLeaseHolder=%t, but value is %t", i, tc.shouldClearLeaseHolder, ok)
  1031  		}
  1032  		cachedDesc := ds.rangeCache.GetCachedRangeDescriptor(roachpb.RKey(key), false /* inverted */)
  1033  		if cachedDesc == nil != tc.shouldClearReplica {
  1034  			t.Errorf("%d: unexpected second replica lookup behavior: wanted=%t", i, tc.shouldClearReplica)
  1035  		}
  1036  	}
  1037  }
  1038  
  1039  func TestEvictCacheOnUnknownLeaseHolder(t *testing.T) {
  1040  	defer leaktest.AfterTest(t)()
  1041  	stopper := stop.NewStopper()
  1042  	defer stopper.Stop(context.Background())
  1043  
  1044  	clock := hlc.NewClock(hlc.UnixNano, time.Nanosecond)
  1045  	rpcContext := rpc.NewInsecureTestingContext(clock, stopper)
  1046  	g := makeGossip(t, stopper, rpcContext)
  1047  
  1048  	// Gossip the two nodes referred to in testUserRangeDescriptor3Replicas.
  1049  	for i := 2; i <= 3; i++ {
  1050  		nd := newNodeDesc(roachpb.NodeID(i))
  1051  		if err := g.AddInfoProto(gossip.MakeNodeIDKey(roachpb.NodeID(i)), nd, time.Hour); err != nil {
  1052  			t.Fatal(err)
  1053  		}
  1054  	}
  1055  
  1056  	var count int32
  1057  	var testFn simpleSendFn = func(
  1058  		_ context.Context,
  1059  		_ SendOptions,
  1060  		_ ReplicaSlice,
  1061  		args roachpb.BatchRequest,
  1062  	) (*roachpb.BatchResponse, error) {
  1063  		var err error
  1064  		switch count {
  1065  		case 0, 1:
  1066  			err = &roachpb.NotLeaseHolderError{LeaseHolder: &roachpb.ReplicaDescriptor{NodeID: 99, StoreID: 999}}
  1067  		case 2:
  1068  			err = roachpb.NewRangeNotFoundError(0, 0)
  1069  		default:
  1070  			return args.CreateReply(), nil
  1071  		}
  1072  		count++
  1073  		reply := &roachpb.BatchResponse{}
  1074  		reply.Error = roachpb.NewError(err)
  1075  		return reply, nil
  1076  	}
  1077  
  1078  	cfg := DistSenderConfig{
  1079  		AmbientCtx: log.AmbientContext{Tracer: tracing.NewTracer()},
  1080  		Clock:      clock,
  1081  		RPCContext: rpcContext,
  1082  		TestingKnobs: ClientTestingKnobs{
  1083  			TransportFactory: adaptSimpleTransport(testFn),
  1084  		},
  1085  		RangeDescriptorDB: threeReplicaMockRangeDescriptorDB,
  1086  		NodeDialer:        nodedialer.New(rpcContext, gossip.AddressResolver(g)),
  1087  		Settings:          cluster.MakeTestingClusterSettings(),
  1088  	}
  1089  	ds := NewDistSender(cfg, g)
  1090  	key := roachpb.Key("a")
  1091  	put := roachpb.NewPut(key, roachpb.MakeValueFromString("value"))
  1092  
  1093  	if _, pErr := kv.SendWrapped(context.Background(), ds, put); pErr != nil {
  1094  		t.Errorf("put encountered unexpected error: %s", pErr)
  1095  	}
  1096  	if count != 3 {
  1097  		t.Errorf("expected three retries; got %d", count)
  1098  	}
  1099  }
  1100  
  1101  // TestRetryOnWrongReplicaError sets up a DistSender on a minimal gossip
  1102  // network and a mock of Send, and verifies that the DistSender correctly
  1103  // retries upon encountering a stale entry in its range descriptor cache.
  1104  func TestRetryOnWrongReplicaError(t *testing.T) {
  1105  	defer leaktest.AfterTest(t)()
  1106  	stopper := stop.NewStopper()
  1107  	defer stopper.Stop(context.Background())
  1108  
  1109  	clock := hlc.NewClock(hlc.UnixNano, time.Nanosecond)
  1110  	rpcContext := rpc.NewInsecureTestingContext(clock, stopper)
  1111  	g := makeGossip(t, stopper, rpcContext)
  1112  	if err := g.AddInfoProto(gossip.KeyFirstRangeDescriptor, &testMetaRangeDescriptor, time.Hour); err != nil {
  1113  		t.Fatal(err)
  1114  	}
  1115  
  1116  	// Updated below, after it has first been returned.
  1117  	badEndKey := roachpb.RKey("m")
  1118  	newRangeDescriptor := testUserRangeDescriptor
  1119  	goodEndKey := newRangeDescriptor.EndKey
  1120  	newRangeDescriptor.EndKey = badEndKey
  1121  	descStale := true
  1122  
  1123  	var testFn simpleSendFn = func(
  1124  		_ context.Context,
  1125  		_ SendOptions,
  1126  		_ ReplicaSlice,
  1127  		ba roachpb.BatchRequest,
  1128  	) (*roachpb.BatchResponse, error) {
  1129  		rs, err := keys.Range(ba.Requests)
  1130  		if err != nil {
  1131  			t.Fatal(err)
  1132  		}
  1133  		if kv.TestingIsRangeLookup(ba) {
  1134  			if bytes.HasPrefix(rs.Key, keys.Meta1Prefix) {
  1135  				br := &roachpb.BatchResponse{}
  1136  				r := &roachpb.ScanResponse{}
  1137  				var kv roachpb.KeyValue
  1138  				if err := kv.Value.SetProto(&testMetaRangeDescriptor); err != nil {
  1139  					t.Fatal(err)
  1140  				}
  1141  				r.Rows = append(r.Rows, kv)
  1142  				br.Add(r)
  1143  				return br, nil
  1144  			}
  1145  
  1146  			if !descStale && bytes.HasPrefix(rs.Key, keys.Meta2Prefix) {
  1147  				t.Fatalf("unexpected extra lookup for non-stale replica descriptor at %s", rs.Key)
  1148  			}
  1149  
  1150  			br := &roachpb.BatchResponse{}
  1151  			r := &roachpb.ScanResponse{}
  1152  			var kv roachpb.KeyValue
  1153  			if err := kv.Value.SetProto(&newRangeDescriptor); err != nil {
  1154  				t.Fatal(err)
  1155  			}
  1156  			r.Rows = append(r.Rows, kv)
  1157  			br.Add(r)
  1158  			// If we just returned the stale descriptor, set up returning the
  1159  			// good one next time.
  1160  			if bytes.HasPrefix(rs.Key, keys.Meta2Prefix) {
  1161  				if newRangeDescriptor.EndKey.Equal(badEndKey) {
  1162  					newRangeDescriptor.EndKey = goodEndKey
  1163  				} else {
  1164  					descStale = false
  1165  				}
  1166  			}
  1167  			return br, nil
  1168  		}
  1169  		// When the Scan first turns up, update the descriptor for future
  1170  		// range descriptor lookups.
  1171  		if !newRangeDescriptor.EndKey.Equal(goodEndKey) {
  1172  			return nil, &roachpb.RangeKeyMismatchError{
  1173  				RequestStartKey: rs.Key.AsRawKey(),
  1174  				RequestEndKey:   rs.EndKey.AsRawKey(),
  1175  			}
  1176  		}
  1177  		return ba.CreateReply(), nil
  1178  	}
  1179  
  1180  	cfg := DistSenderConfig{
  1181  		AmbientCtx: log.AmbientContext{Tracer: tracing.NewTracer()},
  1182  		Clock:      clock,
  1183  		RPCContext: rpcContext,
  1184  		TestingKnobs: ClientTestingKnobs{
  1185  			TransportFactory: adaptSimpleTransport(testFn),
  1186  		},
  1187  		NodeDialer: nodedialer.New(rpcContext, gossip.AddressResolver(g)),
  1188  		Settings:   cluster.MakeTestingClusterSettings(),
  1189  	}
  1190  	ds := NewDistSender(cfg, g)
  1191  	scan := roachpb.NewScan(roachpb.Key("a"), roachpb.Key("d"), false)
  1192  	if _, err := kv.SendWrapped(context.Background(), ds, scan); err != nil {
  1193  		t.Errorf("scan encountered error: %s", err)
  1194  	}
  1195  }
  1196  
  1197  // TestRetryOnWrongReplicaErrorWithSuggestion sets up a DistSender on a
  1198  // minimal gossip network and a mock of Send, and verifies that the DistSender
  1199  // correctly retries upon encountering a stale entry in its range descriptor cache
  1200  // without needing to perform a second RangeLookup when the mismatch error
  1201  // provides a suggestion.
  1202  func TestRetryOnWrongReplicaErrorWithSuggestion(t *testing.T) {
  1203  	defer leaktest.AfterTest(t)()
  1204  	stopper := stop.NewStopper()
  1205  	defer stopper.Stop(context.Background())
  1206  
  1207  	clock := hlc.NewClock(hlc.UnixNano, time.Nanosecond)
  1208  	rpcContext := rpc.NewInsecureTestingContext(clock, stopper)
  1209  	g := makeGossip(t, stopper, rpcContext)
  1210  	if err := g.AddInfoProto(gossip.KeyFirstRangeDescriptor, &testMetaRangeDescriptor, time.Hour); err != nil {
  1211  		t.Fatal(err)
  1212  	}
  1213  
  1214  	// The test is gonna send the request first to staleDesc, but it reaches the
  1215  	// rhsDesc, which redirects it to lhsDesc.
  1216  	staleDesc := testUserRangeDescriptor
  1217  	lhsDesc := testUserRangeDescriptor
  1218  	lhsDesc.EndKey = roachpb.RKey("m")
  1219  	lhsDesc.RangeID = staleDesc.RangeID + 1
  1220  	lhsDesc.Generation = staleDesc.Generation + 1
  1221  	rhsDesc := testUserRangeDescriptor
  1222  	rhsDesc.StartKey = roachpb.RKey("m")
  1223  	rhsDesc.RangeID = staleDesc.RangeID + 2
  1224  	rhsDesc.Generation = staleDesc.Generation + 2
  1225  	firstLookup := true
  1226  
  1227  	var testFn simpleSendFn = func(
  1228  		_ context.Context,
  1229  		_ SendOptions,
  1230  		_ ReplicaSlice,
  1231  		ba roachpb.BatchRequest,
  1232  	) (*roachpb.BatchResponse, error) {
  1233  		rs, err := keys.Range(ba.Requests)
  1234  		if err != nil {
  1235  			t.Fatal(err)
  1236  		}
  1237  		if kv.TestingIsRangeLookup(ba) {
  1238  			if bytes.HasPrefix(rs.Key, keys.Meta1Prefix) {
  1239  				br := &roachpb.BatchResponse{}
  1240  				r := &roachpb.ScanResponse{}
  1241  				var kv roachpb.KeyValue
  1242  				if err := kv.Value.SetProto(&testMetaRangeDescriptor); err != nil {
  1243  					t.Fatal(err)
  1244  				}
  1245  				r.Rows = append(r.Rows, kv)
  1246  				br.Add(r)
  1247  				return br, nil
  1248  			}
  1249  
  1250  			if !firstLookup {
  1251  				t.Fatalf("unexpected extra lookup for non-stale replica descriptor at %s", rs.Key)
  1252  			}
  1253  			firstLookup = false
  1254  
  1255  			br := &roachpb.BatchResponse{}
  1256  			r := &roachpb.ScanResponse{}
  1257  			var kv roachpb.KeyValue
  1258  			if err := kv.Value.SetProto(&staleDesc); err != nil {
  1259  				t.Fatal(err)
  1260  			}
  1261  			r.Rows = append(r.Rows, kv)
  1262  			br.Add(r)
  1263  			return br, nil
  1264  		}
  1265  
  1266  		// When the Scan first turns up, provide the correct descriptor as a
  1267  		// suggestion for future range descriptor lookups.
  1268  		if ba.RangeID == staleDesc.RangeID {
  1269  			var br roachpb.BatchResponse
  1270  			br.Error = roachpb.NewError(&roachpb.RangeKeyMismatchError{
  1271  				RequestStartKey: rs.Key.AsRawKey(),
  1272  				RequestEndKey:   rs.EndKey.AsRawKey(),
  1273  				MismatchedRange: rhsDesc,
  1274  				SuggestedRange:  &lhsDesc,
  1275  			})
  1276  			return &br, nil
  1277  		} else if ba.RangeID != lhsDesc.RangeID {
  1278  			t.Fatalf("unexpected RangeID %d provided in request %v. expected: %s", ba.RangeID, ba, lhsDesc.RangeID)
  1279  		}
  1280  		return ba.CreateReply(), nil
  1281  	}
  1282  
  1283  	cfg := DistSenderConfig{
  1284  		AmbientCtx: log.AmbientContext{Tracer: tracing.NewTracer()},
  1285  		Clock:      clock,
  1286  		RPCContext: rpcContext,
  1287  		TestingKnobs: ClientTestingKnobs{
  1288  			TransportFactory: adaptSimpleTransport(testFn),
  1289  		},
  1290  		NodeDialer: nodedialer.New(rpcContext, gossip.AddressResolver(g)),
  1291  		Settings:   cluster.MakeTestingClusterSettings(),
  1292  	}
  1293  	ds := NewDistSender(cfg, g)
  1294  	scan := roachpb.NewScan(roachpb.Key("a"), roachpb.Key("d"), false)
  1295  	if _, err := kv.SendWrapped(context.Background(), ds, scan); err != nil {
  1296  		t.Errorf("scan encountered error: %s", err)
  1297  	}
  1298  }
  1299  
  1300  func TestGetFirstRangeDescriptor(t *testing.T) {
  1301  	defer leaktest.AfterTest(t)()
  1302  	stopper := stop.NewStopper()
  1303  	defer stopper.Stop(context.Background())
  1304  
  1305  	n := simulation.NewNetwork(stopper, 3, true, zonepb.DefaultZoneConfigRef())
  1306  	for _, node := range n.Nodes {
  1307  		// TODO(spencer): remove the use of gossip/simulation here.
  1308  		node.Gossip.EnableSimulationCycler(false)
  1309  	}
  1310  	n.Start()
  1311  	ds := NewDistSender(DistSenderConfig{
  1312  		AmbientCtx: log.AmbientContext{Tracer: tracing.NewTracer()},
  1313  		RPCContext: n.RPCContext,
  1314  		NodeDialer: nodedialer.New(n.RPCContext, gossip.AddressResolver(n.Nodes[0].Gossip)),
  1315  		Settings:   cluster.MakeTestingClusterSettings(),
  1316  	}, n.Nodes[0].Gossip)
  1317  	if _, err := ds.FirstRange(); err == nil {
  1318  		t.Errorf("expected not to find first range descriptor")
  1319  	}
  1320  	expectedDesc := &roachpb.RangeDescriptor{}
  1321  	expectedDesc.StartKey = roachpb.RKey("a")
  1322  	expectedDesc.EndKey = roachpb.RKey("c")
  1323  
  1324  	// Add first RangeDescriptor to a node different from the node for
  1325  	// this dist sender and ensure that this dist sender has the
  1326  	// information within a given time.
  1327  	if err := n.Nodes[1].Gossip.AddInfoProto(gossip.KeyFirstRangeDescriptor, expectedDesc, time.Hour); err != nil {
  1328  		t.Fatal(err)
  1329  	}
  1330  	const maxCycles = 25
  1331  	n.SimulateNetwork(func(cycle int, network *simulation.Network) bool {
  1332  		desc, err := ds.FirstRange()
  1333  		if err != nil {
  1334  			if cycle >= maxCycles {
  1335  				t.Errorf("could not get range descriptor after %d cycles", cycle)
  1336  				return false
  1337  			}
  1338  			return true
  1339  		}
  1340  		if !bytes.Equal(desc.StartKey, expectedDesc.StartKey) ||
  1341  			!bytes.Equal(desc.EndKey, expectedDesc.EndKey) {
  1342  			t.Errorf("expected first range descriptor %v, instead was %v",
  1343  				expectedDesc, desc)
  1344  		}
  1345  		return false
  1346  	})
  1347  }
  1348  
  1349  // TestSendRPCRetry verifies that sendRPC failed on first address but succeed on
  1350  // second address, the second reply should be successfully returned back.
  1351  func TestSendRPCRetry(t *testing.T) {
  1352  	defer leaktest.AfterTest(t)()
  1353  	stopper := stop.NewStopper()
  1354  	defer stopper.Stop(context.Background())
  1355  
  1356  	clock := hlc.NewClock(hlc.UnixNano, time.Nanosecond)
  1357  	rpcContext := rpc.NewInsecureTestingContext(clock, stopper)
  1358  	g := makeGossip(t, stopper, rpcContext)
  1359  	if err := g.SetNodeDescriptor(newNodeDesc(1)); err != nil {
  1360  		t.Fatal(err)
  1361  	}
  1362  
  1363  	// Fill RangeDescriptor with 2 replicas.
  1364  	var descriptor = roachpb.RangeDescriptor{
  1365  		RangeID:  1,
  1366  		StartKey: roachpb.RKey("a"),
  1367  		EndKey:   roachpb.RKey("z"),
  1368  	}
  1369  	for i := 1; i <= 2; i++ {
  1370  		addr := util.MakeUnresolvedAddr("tcp", fmt.Sprintf("node%d", i))
  1371  		nd := &roachpb.NodeDescriptor{
  1372  			NodeID:  roachpb.NodeID(i),
  1373  			Address: util.MakeUnresolvedAddr(addr.Network(), addr.String()),
  1374  		}
  1375  		if err := g.AddInfoProto(gossip.MakeNodeIDKey(roachpb.NodeID(i)), nd, time.Hour); err != nil {
  1376  			t.Fatal(err)
  1377  		}
  1378  
  1379  		descriptor.InternalReplicas = append(descriptor.InternalReplicas, roachpb.ReplicaDescriptor{
  1380  			NodeID:  roachpb.NodeID(i),
  1381  			StoreID: roachpb.StoreID(i),
  1382  		})
  1383  	}
  1384  	descDB := mockRangeDescriptorDBForDescs(
  1385  		testMetaRangeDescriptor,
  1386  		descriptor,
  1387  	)
  1388  
  1389  	var testFn simpleSendFn = func(
  1390  		_ context.Context,
  1391  		_ SendOptions,
  1392  		_ ReplicaSlice,
  1393  		args roachpb.BatchRequest,
  1394  	) (*roachpb.BatchResponse, error) {
  1395  		batchReply := &roachpb.BatchResponse{}
  1396  		reply := &roachpb.ScanResponse{}
  1397  		batchReply.Add(reply)
  1398  		reply.Rows = append([]roachpb.KeyValue{}, roachpb.KeyValue{Key: roachpb.Key("b"), Value: roachpb.Value{}})
  1399  		return batchReply, nil
  1400  	}
  1401  	cfg := DistSenderConfig{
  1402  		AmbientCtx: log.AmbientContext{Tracer: tracing.NewTracer()},
  1403  		Clock:      clock,
  1404  		RPCContext: rpcContext,
  1405  		TestingKnobs: ClientTestingKnobs{
  1406  			TransportFactory: adaptSimpleTransport(testFn),
  1407  		},
  1408  		RangeDescriptorDB: descDB,
  1409  		Settings:          cluster.MakeTestingClusterSettings(),
  1410  	}
  1411  	ds := NewDistSender(cfg, g)
  1412  	scan := roachpb.NewScan(roachpb.Key("a"), roachpb.Key("d"), false)
  1413  	sr, err := kv.SendWrappedWith(context.Background(), ds, roachpb.Header{MaxSpanRequestKeys: 1}, scan)
  1414  	if err != nil {
  1415  		t.Fatal(err)
  1416  	}
  1417  	if l := len(sr.(*roachpb.ScanResponse).Rows); l != 1 {
  1418  		t.Fatalf("expected 1 row; got %d", l)
  1419  	}
  1420  }
  1421  
  1422  // This test reproduces the main problem in:
  1423  // https://github.com/cockroachdb/cockroach/issues/30613.
  1424  // by verifying that if a RangeNotFoundError is returned from a Replica,
  1425  // the next Replica is tried.
  1426  func TestSendRPCRangeNotFoundError(t *testing.T) {
  1427  	defer leaktest.AfterTest(t)()
  1428  	stopper := stop.NewStopper()
  1429  	defer stopper.Stop(context.Background())
  1430  
  1431  	clock := hlc.NewClock(hlc.UnixNano, time.Nanosecond)
  1432  	rpcContext := rpc.NewInsecureTestingContext(clock, stopper)
  1433  	g := makeGossip(t, stopper, rpcContext)
  1434  	if err := g.SetNodeDescriptor(newNodeDesc(1)); err != nil {
  1435  		t.Fatal(err)
  1436  	}
  1437  
  1438  	// Fill RangeDescriptor with three replicas.
  1439  	var descriptor = roachpb.RangeDescriptor{
  1440  		RangeID:       1,
  1441  		StartKey:      roachpb.RKey("a"),
  1442  		EndKey:        roachpb.RKey("z"),
  1443  		NextReplicaID: 1,
  1444  	}
  1445  	for i := 1; i <= 3; i++ {
  1446  		addr := util.MakeUnresolvedAddr("tcp", fmt.Sprintf("node%d", i))
  1447  		nd := &roachpb.NodeDescriptor{
  1448  			NodeID:  roachpb.NodeID(i),
  1449  			Address: util.MakeUnresolvedAddr(addr.Network(), addr.String()),
  1450  		}
  1451  		if err := g.AddInfoProto(gossip.MakeNodeIDKey(roachpb.NodeID(i)), nd, time.Hour); err != nil {
  1452  			t.Fatal(err)
  1453  		}
  1454  
  1455  		descriptor.AddReplica(roachpb.NodeID(i), roachpb.StoreID(i), roachpb.VOTER_FULL)
  1456  	}
  1457  	descDB := mockRangeDescriptorDBForDescs(
  1458  		testMetaRangeDescriptor,
  1459  		descriptor,
  1460  	)
  1461  
  1462  	seen := map[roachpb.ReplicaID]struct{}{}
  1463  	var leaseholderStoreID roachpb.StoreID
  1464  	var ds *DistSender
  1465  	var testFn simpleSendFn = func(
  1466  		_ context.Context,
  1467  		_ SendOptions,
  1468  		_ ReplicaSlice,
  1469  		ba roachpb.BatchRequest,
  1470  	) (*roachpb.BatchResponse, error) {
  1471  		br := ba.CreateReply()
  1472  		if _, ok := seen[ba.Replica.ReplicaID]; ok {
  1473  			br.Error = roachpb.NewErrorf("visited replica %+v twice", ba.Replica)
  1474  			return br, nil
  1475  		}
  1476  		seen[ba.Replica.ReplicaID] = struct{}{}
  1477  		if len(seen) <= 2 {
  1478  			if len(seen) == 1 {
  1479  				// Add to the leaseholder cache to verify that the response evicts it.
  1480  				ds.leaseHolderCache.Update(context.Background(), ba.RangeID, ba.Replica.StoreID)
  1481  			}
  1482  			br.Error = roachpb.NewError(roachpb.NewRangeNotFoundError(ba.RangeID, ba.Replica.StoreID))
  1483  			return br, nil
  1484  		}
  1485  		leaseholderStoreID = ba.Replica.StoreID
  1486  		return br, nil
  1487  	}
  1488  	cfg := DistSenderConfig{
  1489  		AmbientCtx: log.AmbientContext{Tracer: tracing.NewTracer()},
  1490  		Clock:      clock,
  1491  		RPCContext: rpcContext,
  1492  		TestingKnobs: ClientTestingKnobs{
  1493  			TransportFactory: adaptSimpleTransport(testFn),
  1494  		},
  1495  		RangeDescriptorDB: descDB,
  1496  		Settings:          cluster.MakeTestingClusterSettings(),
  1497  	}
  1498  	ds = NewDistSender(cfg, g)
  1499  	get := roachpb.NewGet(roachpb.Key("b"))
  1500  	_, err := kv.SendWrapped(context.Background(), ds, get)
  1501  	if err != nil {
  1502  		t.Fatal(err)
  1503  	}
  1504  	if storeID, found := ds.leaseHolderCache.Lookup(context.Background(), roachpb.RangeID(1)); !found {
  1505  		t.Fatal("expected a cached leaseholder")
  1506  	} else if storeID != leaseholderStoreID {
  1507  		t.Fatalf("unexpected cached leaseholder s%d, expected s%d", storeID, leaseholderStoreID)
  1508  	}
  1509  }
  1510  
  1511  // TestGetNodeDescriptor checks that the Node descriptor automatically gets
  1512  // looked up from Gossip.
  1513  func TestGetNodeDescriptor(t *testing.T) {
  1514  	defer leaktest.AfterTest(t)()
  1515  	stopper := stop.NewStopper()
  1516  	defer stopper.Stop(context.Background())
  1517  
  1518  	clock := hlc.NewClock(hlc.UnixNano, time.Nanosecond)
  1519  	rpcContext := rpc.NewInsecureTestingContext(clock, stopper)
  1520  	g := makeGossip(t, stopper, rpcContext)
  1521  	ds := NewDistSender(DistSenderConfig{
  1522  		AmbientCtx: log.AmbientContext{Tracer: tracing.NewTracer()},
  1523  		RPCContext: rpcContext,
  1524  		Clock:      clock,
  1525  		Settings:   cluster.MakeTestingClusterSettings(),
  1526  	}, g)
  1527  	g.NodeID.Reset(5)
  1528  	if err := g.SetNodeDescriptor(newNodeDesc(5)); err != nil {
  1529  		t.Fatal(err)
  1530  	}
  1531  	testutils.SucceedsSoon(t, func() error {
  1532  		desc := ds.getNodeDescriptor()
  1533  		if desc != nil && desc.NodeID == 5 {
  1534  			return nil
  1535  		}
  1536  		return errors.Errorf("wanted NodeID 5, got %v", desc)
  1537  	})
  1538  }
  1539  
  1540  func TestMultiRangeGapReverse(t *testing.T) {
  1541  	defer leaktest.AfterTest(t)()
  1542  	stopper := stop.NewStopper()
  1543  	defer stopper.Stop(context.Background())
  1544  
  1545  	clock := hlc.NewClock(hlc.UnixNano, time.Nanosecond)
  1546  	rpcContext := rpc.NewInsecureTestingContext(clock, stopper)
  1547  	g := makeGossip(t, stopper, rpcContext)
  1548  
  1549  	var descs []roachpb.RangeDescriptor
  1550  	splits := []roachpb.Key{roachpb.Key("a"), roachpb.Key("b"), roachpb.Key("c"), roachpb.Key("d")}
  1551  	for i, split := range splits {
  1552  		var startKey roachpb.RKey
  1553  		if i > 0 {
  1554  			startKey = descs[i-1].EndKey
  1555  		}
  1556  		descs = append(descs, roachpb.RangeDescriptor{
  1557  			RangeID:  roachpb.RangeID(i + 1),
  1558  			StartKey: startKey,
  1559  			EndKey:   keys.MustAddr(split),
  1560  			InternalReplicas: []roachpb.ReplicaDescriptor{
  1561  				{
  1562  					NodeID:  1,
  1563  					StoreID: 1,
  1564  				},
  1565  			},
  1566  		})
  1567  	}
  1568  
  1569  	sender := kv.SenderFunc(
  1570  		func(_ context.Context, args roachpb.BatchRequest) (*roachpb.BatchResponse, *roachpb.Error) {
  1571  			rb := args.CreateReply()
  1572  			return rb, nil
  1573  		})
  1574  
  1575  	rdb := MockRangeDescriptorDB(func(key roachpb.RKey, reverse bool) (
  1576  		[]roachpb.RangeDescriptor, []roachpb.RangeDescriptor, error,
  1577  	) {
  1578  		n := sort.Search(len(descs), func(i int) bool {
  1579  			if !reverse {
  1580  				return key.Less(descs[i].EndKey)
  1581  			}
  1582  			// In reverse mode, the range boundary behavior is "inverted".
  1583  			// If we scan [a,z) in reverse mode, we'd look up key z.
  1584  			return !descs[i].EndKey.Less(key) // key <= EndKey
  1585  		})
  1586  		if n < 0 {
  1587  			n = 0
  1588  		}
  1589  		if n >= len(descs) {
  1590  			panic(fmt.Sprintf("didn't set up descriptor for key %q", key))
  1591  		}
  1592  		return descs[n : n+1], nil, nil
  1593  	})
  1594  
  1595  	cfg := DistSenderConfig{
  1596  		AmbientCtx:        log.AmbientContext{Tracer: tracing.NewTracer()},
  1597  		Clock:             clock,
  1598  		RPCContext:        rpcContext,
  1599  		RangeDescriptorDB: rdb,
  1600  		TestingKnobs: ClientTestingKnobs{
  1601  			TransportFactory: SenderTransportFactory(
  1602  				tracing.NewTracer(),
  1603  				sender,
  1604  			),
  1605  		},
  1606  		Settings: cluster.MakeTestingClusterSettings(),
  1607  	}
  1608  
  1609  	ds := NewDistSender(cfg, g)
  1610  
  1611  	txn := roachpb.MakeTransaction("foo", nil, 1.0, clock.Now(), 0)
  1612  
  1613  	var ba roachpb.BatchRequest
  1614  	ba.Txn = &txn
  1615  	ba.Add(roachpb.NewReverseScan(splits[0], splits[1], false))
  1616  	ba.Add(roachpb.NewReverseScan(splits[2], splits[3], false))
  1617  
  1618  	// Before fixing https://github.com/cockroachdb/cockroach/issues/18174, this
  1619  	// would error with:
  1620  	//
  1621  	// truncation resulted in empty batch on {b-c}: ReverseScan ["a","b"), ReverseScan ["c","d")
  1622  	if _, pErr := ds.Send(context.Background(), ba); pErr != nil {
  1623  		t.Fatal(pErr)
  1624  	}
  1625  }
  1626  
  1627  // TestMultiRangeMergeStaleDescriptor simulates the situation in which the
  1628  // DistSender executes a multi-range scan which encounters the stale descriptor
  1629  // of a range which has since incorporated its right neighbor by means of a
  1630  // merge. It is verified that the DistSender scans the correct keyrange exactly
  1631  // once.
  1632  func TestMultiRangeMergeStaleDescriptor(t *testing.T) {
  1633  	defer leaktest.AfterTest(t)()
  1634  	stopper := stop.NewStopper()
  1635  	defer stopper.Stop(context.Background())
  1636  
  1637  	clock := hlc.NewClock(hlc.UnixNano, time.Nanosecond)
  1638  	rpcContext := rpc.NewInsecureTestingContext(clock, stopper)
  1639  	g := makeGossip(t, stopper, rpcContext)
  1640  	// Assume we have two ranges, [a-b) and [b-KeyMax).
  1641  	merged := false
  1642  	// The stale first range descriptor which is unaware of the merge.
  1643  	var firstRange = roachpb.RangeDescriptor{
  1644  		RangeID:  2,
  1645  		StartKey: roachpb.RKey("a"),
  1646  		EndKey:   roachpb.RKey("b"),
  1647  		InternalReplicas: []roachpb.ReplicaDescriptor{
  1648  			{
  1649  				NodeID:  1,
  1650  				StoreID: 1,
  1651  			},
  1652  		},
  1653  	}
  1654  	// The merged descriptor, which will be looked up after having processed
  1655  	// the stale range [a,b).
  1656  	var mergedRange = roachpb.RangeDescriptor{
  1657  		RangeID:  2,
  1658  		StartKey: roachpb.RKey("a"),
  1659  		EndKey:   roachpb.RKeyMax,
  1660  		InternalReplicas: []roachpb.ReplicaDescriptor{
  1661  			{
  1662  				NodeID:  1,
  1663  				StoreID: 1,
  1664  			},
  1665  		},
  1666  	}
  1667  	// Assume we have two key-value pairs, a=1 and c=2.
  1668  	existingKVs := []roachpb.KeyValue{
  1669  		{Key: roachpb.Key("a"), Value: roachpb.MakeValueFromString("1")},
  1670  		{Key: roachpb.Key("c"), Value: roachpb.MakeValueFromString("2")},
  1671  	}
  1672  	var testFn simpleSendFn = func(
  1673  		_ context.Context,
  1674  		_ SendOptions,
  1675  		_ ReplicaSlice,
  1676  		ba roachpb.BatchRequest,
  1677  	) (*roachpb.BatchResponse, error) {
  1678  		rs, err := keys.Range(ba.Requests)
  1679  		if err != nil {
  1680  			t.Fatal(err)
  1681  		}
  1682  		batchReply := &roachpb.BatchResponse{}
  1683  		reply := &roachpb.ScanResponse{}
  1684  		batchReply.Add(reply)
  1685  		results := []roachpb.KeyValue{}
  1686  		for _, curKV := range existingKVs {
  1687  			curKeyAddr, err := keys.Addr(curKV.Key)
  1688  			if err != nil {
  1689  				t.Fatal(err)
  1690  			}
  1691  			if rs.Key.Less(curKeyAddr.Next()) && curKeyAddr.Less(rs.EndKey) {
  1692  				results = append(results, curKV)
  1693  			}
  1694  		}
  1695  		reply.Rows = results
  1696  		return batchReply, nil
  1697  	}
  1698  	cfg := DistSenderConfig{
  1699  		AmbientCtx: log.AmbientContext{Tracer: tracing.NewTracer()},
  1700  		Clock:      clock,
  1701  		RPCContext: rpcContext,
  1702  		TestingKnobs: ClientTestingKnobs{
  1703  			TransportFactory: adaptSimpleTransport(testFn),
  1704  		},
  1705  		RangeDescriptorDB: MockRangeDescriptorDB(func(key roachpb.RKey, _ bool) ([]roachpb.RangeDescriptor, []roachpb.RangeDescriptor, error) {
  1706  			if key.Less(testMetaRangeDescriptor.EndKey) {
  1707  				return []roachpb.RangeDescriptor{testMetaRangeDescriptor}, nil, nil
  1708  			}
  1709  			if !merged {
  1710  				// Assume a range merge operation happened.
  1711  				merged = true
  1712  				return []roachpb.RangeDescriptor{firstRange}, nil, nil
  1713  			}
  1714  			return []roachpb.RangeDescriptor{mergedRange}, nil, nil
  1715  		}),
  1716  		Settings: cluster.MakeTestingClusterSettings(),
  1717  	}
  1718  	ds := NewDistSender(cfg, g)
  1719  	scan := roachpb.NewScan(roachpb.Key("a"), roachpb.Key("d"), false)
  1720  	// Set the Txn info to avoid an OpRequiresTxnError.
  1721  	reply, err := kv.SendWrappedWith(context.Background(), ds, roachpb.Header{
  1722  		MaxSpanRequestKeys: 10,
  1723  		Txn:                &roachpb.Transaction{},
  1724  	}, scan)
  1725  	if err != nil {
  1726  		t.Fatalf("scan encountered error: %s", err)
  1727  	}
  1728  	sr := reply.(*roachpb.ScanResponse)
  1729  	if !reflect.DeepEqual(existingKVs, sr.Rows) {
  1730  		t.Fatalf("expect get %v, actual get %v", existingKVs, sr.Rows)
  1731  	}
  1732  }
  1733  
  1734  // TestRangeLookupOptionOnReverseScan verifies that a lookup triggered by a
  1735  // ReverseScan request has the useReverseScan specified.
  1736  func TestRangeLookupOptionOnReverseScan(t *testing.T) {
  1737  	defer leaktest.AfterTest(t)()
  1738  	stopper := stop.NewStopper()
  1739  	defer stopper.Stop(context.Background())
  1740  
  1741  	clock := hlc.NewClock(hlc.UnixNano, time.Nanosecond)
  1742  	rpcContext := rpc.NewInsecureTestingContext(clock, stopper)
  1743  	g := makeGossip(t, stopper, rpcContext)
  1744  	cfg := DistSenderConfig{
  1745  		AmbientCtx: log.AmbientContext{Tracer: tracing.NewTracer()},
  1746  		Clock:      clock,
  1747  		RPCContext: rpcContext,
  1748  		TestingKnobs: ClientTestingKnobs{
  1749  			TransportFactory: adaptSimpleTransport(stubRPCSendFn),
  1750  		},
  1751  		RangeDescriptorDB: MockRangeDescriptorDB(func(key roachpb.RKey, useReverseScan bool) ([]roachpb.RangeDescriptor, []roachpb.RangeDescriptor, error) {
  1752  			if !key.Equal(roachpb.KeyMin) && !useReverseScan {
  1753  				t.Fatalf("expected UseReverseScan to be set")
  1754  			}
  1755  			if key.Less(testMetaRangeDescriptor.EndKey) {
  1756  				return []roachpb.RangeDescriptor{testMetaRangeDescriptor}, nil, nil
  1757  			}
  1758  			return []roachpb.RangeDescriptor{testUserRangeDescriptor}, nil, nil
  1759  		}),
  1760  		Settings: cluster.MakeTestingClusterSettings(),
  1761  	}
  1762  	ds := NewDistSender(cfg, g)
  1763  	rScan := &roachpb.ReverseScanRequest{
  1764  		RequestHeader: roachpb.RequestHeader{Key: roachpb.Key("a"), EndKey: roachpb.Key("b")},
  1765  	}
  1766  	if _, err := kv.SendWrapped(context.Background(), ds, rScan); err != nil {
  1767  		t.Fatal(err)
  1768  	}
  1769  }
  1770  
  1771  // TestClockUpdateOnResponse verifies that the DistSender picks up
  1772  // the timestamp of the remote party embedded in responses.
  1773  func TestClockUpdateOnResponse(t *testing.T) {
  1774  	defer leaktest.AfterTest(t)()
  1775  	stopper := stop.NewStopper()
  1776  	defer stopper.Stop(context.Background())
  1777  
  1778  	clock := hlc.NewClock(hlc.UnixNano, time.Nanosecond)
  1779  	rpcContext := rpc.NewInsecureTestingContext(clock, stopper)
  1780  	g := makeGossip(t, stopper, rpcContext)
  1781  	cfg := DistSenderConfig{
  1782  		AmbientCtx:        log.AmbientContext{Tracer: tracing.NewTracer()},
  1783  		Clock:             clock,
  1784  		RPCContext:        rpcContext,
  1785  		RangeDescriptorDB: defaultMockRangeDescriptorDB,
  1786  		NodeDialer:        nodedialer.New(rpcContext, gossip.AddressResolver(g)),
  1787  		Settings:          cluster.MakeTestingClusterSettings(),
  1788  	}
  1789  	ds := NewDistSender(cfg, g)
  1790  
  1791  	expectedErr := roachpb.NewError(errors.New("boom"))
  1792  
  1793  	// Prepare the test function
  1794  	put := roachpb.NewPut(roachpb.Key("a"), roachpb.MakeValueFromString("value"))
  1795  	doCheck := func(sender kv.Sender, fakeTime hlc.Timestamp) {
  1796  		ds.transportFactory = SenderTransportFactory(tracing.NewTracer(), sender)
  1797  		_, err := kv.SendWrapped(context.Background(), ds, put)
  1798  		if err != nil && err != expectedErr {
  1799  			t.Fatal(err)
  1800  		}
  1801  		newTime := ds.clock.Now()
  1802  		if newTime.Less(fakeTime) {
  1803  			t.Fatalf("clock was not advanced: expected >= %s; got %s", fakeTime, newTime)
  1804  		}
  1805  	}
  1806  
  1807  	// Test timestamp propagation on valid BatchResults.
  1808  	fakeTime := ds.clock.Now().Add(10000000000 /*10s*/, 0)
  1809  	replyNormal := kv.SenderFunc(
  1810  		func(_ context.Context, args roachpb.BatchRequest) (*roachpb.BatchResponse, *roachpb.Error) {
  1811  			rb := args.CreateReply()
  1812  			rb.Now = fakeTime
  1813  			return rb, nil
  1814  		})
  1815  	doCheck(replyNormal, fakeTime)
  1816  
  1817  	// Test timestamp propagation on errors.
  1818  	fakeTime = ds.clock.Now().Add(10000000000 /*10s*/, 0)
  1819  	replyError := kv.SenderFunc(
  1820  		func(_ context.Context, _ roachpb.BatchRequest) (*roachpb.BatchResponse, *roachpb.Error) {
  1821  			pErr := expectedErr
  1822  			pErr.Now = fakeTime
  1823  			return nil, pErr
  1824  		})
  1825  	doCheck(replyError, fakeTime)
  1826  }
  1827  
  1828  // TestTruncateWithSpanAndDescriptor verifies that a batch request is truncated with a
  1829  // range span and the range of a descriptor found in cache.
  1830  func TestTruncateWithSpanAndDescriptor(t *testing.T) {
  1831  	defer leaktest.AfterTest(t)()
  1832  	stopper := stop.NewStopper()
  1833  	defer stopper.Stop(context.Background())
  1834  
  1835  	clock := hlc.NewClock(hlc.UnixNano, time.Nanosecond)
  1836  	rpcContext := rpc.NewInsecureTestingContext(clock, stopper)
  1837  	g := makeGossip(t, stopper, rpcContext)
  1838  	if err := g.SetNodeDescriptor(newNodeDesc(1)); err != nil {
  1839  		t.Fatal(err)
  1840  	}
  1841  	nd := &roachpb.NodeDescriptor{
  1842  		NodeID:  roachpb.NodeID(1),
  1843  		Address: util.MakeUnresolvedAddr(testAddress.Network(), testAddress.String()),
  1844  	}
  1845  	if err := g.AddInfoProto(gossip.MakeNodeIDKey(roachpb.NodeID(1)), nd, time.Hour); err != nil {
  1846  		t.Fatal(err)
  1847  	}
  1848  
  1849  	// Fill MockRangeDescriptorDB with two descriptors. When a
  1850  	// range descriptor is looked up by key "b", return the second
  1851  	// descriptor whose range is ["a", "c") and partially overlaps
  1852  	// with the first descriptor's range.
  1853  	var descriptor1 = roachpb.RangeDescriptor{
  1854  		RangeID:  2,
  1855  		StartKey: testMetaEndKey,
  1856  		EndKey:   roachpb.RKey("b"),
  1857  		InternalReplicas: []roachpb.ReplicaDescriptor{
  1858  			{
  1859  				NodeID:  1,
  1860  				StoreID: 1,
  1861  			},
  1862  		},
  1863  	}
  1864  	var descriptor2 = roachpb.RangeDescriptor{
  1865  		RangeID:  3,
  1866  		StartKey: roachpb.RKey("a"),
  1867  		EndKey:   roachpb.RKey("c"),
  1868  		InternalReplicas: []roachpb.ReplicaDescriptor{
  1869  			{
  1870  				NodeID:  1,
  1871  				StoreID: 1,
  1872  			},
  1873  		},
  1874  	}
  1875  	descDB := MockRangeDescriptorDB(func(key roachpb.RKey, _ bool) ([]roachpb.RangeDescriptor, []roachpb.RangeDescriptor, error) {
  1876  		if key.Less(testMetaRangeDescriptor.EndKey) {
  1877  			return []roachpb.RangeDescriptor{testMetaRangeDescriptor}, nil, nil
  1878  		}
  1879  		desc := descriptor1
  1880  		if key.Equal(roachpb.RKey("b")) {
  1881  			desc = descriptor2
  1882  		}
  1883  		return []roachpb.RangeDescriptor{desc}, nil, nil
  1884  	})
  1885  
  1886  	// Define our rpcSend stub which checks the span of the batch
  1887  	// requests. Because of parallelization, there's no guarantee
  1888  	// on the ordering of requests.
  1889  	var haveA, haveB bool
  1890  	sendStub := func(
  1891  		_ context.Context,
  1892  		_ SendOptions,
  1893  		_ ReplicaSlice,
  1894  		ba roachpb.BatchRequest,
  1895  	) (*roachpb.BatchResponse, error) {
  1896  		rs, err := keys.Range(ba.Requests)
  1897  		if err != nil {
  1898  			t.Fatal(err)
  1899  		}
  1900  		if rs.Key.Equal(roachpb.RKey("a")) && rs.EndKey.Equal(roachpb.RKey("a").Next()) {
  1901  			haveA = true
  1902  		} else if rs.Key.Equal(roachpb.RKey("b")) && rs.EndKey.Equal(roachpb.RKey("b").Next()) {
  1903  			haveB = true
  1904  		} else {
  1905  			t.Fatalf("Unexpected span %s", rs)
  1906  		}
  1907  
  1908  		batchReply := &roachpb.BatchResponse{}
  1909  		reply := &roachpb.PutResponse{}
  1910  		batchReply.Add(reply)
  1911  		return batchReply, nil
  1912  	}
  1913  
  1914  	cfg := DistSenderConfig{
  1915  		AmbientCtx: log.AmbientContext{Tracer: tracing.NewTracer()},
  1916  		Clock:      clock,
  1917  		RPCContext: rpcContext,
  1918  		TestingKnobs: ClientTestingKnobs{
  1919  			TransportFactory: adaptSimpleTransport(sendStub),
  1920  		},
  1921  		RangeDescriptorDB: descDB,
  1922  		Settings:          cluster.MakeTestingClusterSettings(),
  1923  	}
  1924  	ds := NewDistSender(cfg, g)
  1925  
  1926  	// Send a batch request containing two puts. In the first
  1927  	// attempt, the span of the descriptor found in the cache is
  1928  	// ["a", "b"). The request is truncated to contain only the put
  1929  	// on "a".
  1930  	//
  1931  	// In the second attempt, The range of the descriptor found in
  1932  	// the cache is ["a", "c"), but the put on "a" will not be
  1933  	// present. The request is truncated to contain only the put on "b".
  1934  	ba := roachpb.BatchRequest{}
  1935  	ba.Txn = &roachpb.Transaction{Name: "test"}
  1936  	{
  1937  		val := roachpb.MakeValueFromString("val")
  1938  		ba.Add(roachpb.NewPut(keys.MakeRangeKeyPrefix(roachpb.RKey("a")), val))
  1939  	}
  1940  	{
  1941  		val := roachpb.MakeValueFromString("val")
  1942  		ba.Add(roachpb.NewPut(keys.MakeRangeKeyPrefix(roachpb.RKey("b")), val))
  1943  	}
  1944  
  1945  	if _, pErr := ds.Send(context.Background(), ba); pErr != nil {
  1946  		t.Fatal(pErr)
  1947  	}
  1948  
  1949  	if !haveA || !haveB {
  1950  		t.Errorf("expected two requests for \"a\" and \"b\": %t, %t", haveA, haveB)
  1951  	}
  1952  }
  1953  
  1954  // TestTruncateWithLocalSpanAndDescriptor verifies that a batch request with local keys
  1955  // is truncated with a range span and the range of a descriptor found in cache.
  1956  func TestTruncateWithLocalSpanAndDescriptor(t *testing.T) {
  1957  	defer leaktest.AfterTest(t)()
  1958  	stopper := stop.NewStopper()
  1959  	defer stopper.Stop(context.Background())
  1960  
  1961  	clock := hlc.NewClock(hlc.UnixNano, time.Nanosecond)
  1962  	rpcContext := rpc.NewInsecureTestingContext(clock, stopper)
  1963  	g := makeGossip(t, stopper, rpcContext)
  1964  	if err := g.SetNodeDescriptor(newNodeDesc(1)); err != nil {
  1965  		t.Fatal(err)
  1966  	}
  1967  	nd := &roachpb.NodeDescriptor{
  1968  		NodeID:  roachpb.NodeID(1),
  1969  		Address: util.MakeUnresolvedAddr(testAddress.Network(), testAddress.String()),
  1970  	}
  1971  	if err := g.AddInfoProto(gossip.MakeNodeIDKey(roachpb.NodeID(1)), nd, time.Hour); err != nil {
  1972  		t.Fatal(err)
  1973  	}
  1974  
  1975  	// Fill MockRangeDescriptorDB with two descriptors.
  1976  	var descriptor1 = roachpb.RangeDescriptor{
  1977  		RangeID:  2,
  1978  		StartKey: testMetaEndKey,
  1979  		EndKey:   roachpb.RKey("b"),
  1980  		InternalReplicas: []roachpb.ReplicaDescriptor{
  1981  			{
  1982  				NodeID:  1,
  1983  				StoreID: 1,
  1984  			},
  1985  		},
  1986  	}
  1987  	var descriptor2 = roachpb.RangeDescriptor{
  1988  		RangeID:  3,
  1989  		StartKey: roachpb.RKey("b"),
  1990  		EndKey:   roachpb.RKey("c"),
  1991  		InternalReplicas: []roachpb.ReplicaDescriptor{
  1992  			{
  1993  				NodeID:  1,
  1994  				StoreID: 1,
  1995  			},
  1996  		},
  1997  	}
  1998  	var descriptor3 = roachpb.RangeDescriptor{
  1999  		RangeID:  4,
  2000  		StartKey: roachpb.RKey("c"),
  2001  		EndKey:   roachpb.RKeyMax,
  2002  		InternalReplicas: []roachpb.ReplicaDescriptor{
  2003  			{
  2004  				NodeID:  1,
  2005  				StoreID: 1,
  2006  			},
  2007  		},
  2008  	}
  2009  	descDB := mockRangeDescriptorDBForDescs(
  2010  		testMetaRangeDescriptor,
  2011  		descriptor1,
  2012  		descriptor2,
  2013  		descriptor3,
  2014  	)
  2015  
  2016  	// Define our rpcSend stub which checks the span of the batch
  2017  	// requests.
  2018  	haveRequest := []bool{false, false, false}
  2019  	sendStub := func(
  2020  		_ context.Context,
  2021  		_ SendOptions,
  2022  		_ ReplicaSlice,
  2023  		ba roachpb.BatchRequest,
  2024  	) (*roachpb.BatchResponse, error) {
  2025  		h := ba.Requests[0].GetInner().Header()
  2026  		if h.Key.Equal(keys.RangeDescriptorKey(roachpb.RKey("a"))) && h.EndKey.Equal(keys.MakeRangeKeyPrefix(roachpb.RKey("b"))) {
  2027  			haveRequest[0] = true
  2028  		} else if h.Key.Equal(keys.MakeRangeKeyPrefix(roachpb.RKey("b"))) && h.EndKey.Equal(keys.MakeRangeKeyPrefix(roachpb.RKey("c"))) {
  2029  			haveRequest[1] = true
  2030  		} else if h.Key.Equal(keys.MakeRangeKeyPrefix(roachpb.RKey("c"))) && h.EndKey.Equal(keys.RangeDescriptorKey(roachpb.RKey("c"))) {
  2031  			haveRequest[2] = true
  2032  		} else {
  2033  			t.Fatalf("Unexpected span [%s,%s)", h.Key, h.EndKey)
  2034  		}
  2035  
  2036  		batchReply := &roachpb.BatchResponse{}
  2037  		reply := &roachpb.ScanResponse{}
  2038  		batchReply.Add(reply)
  2039  		return batchReply, nil
  2040  	}
  2041  
  2042  	cfg := DistSenderConfig{
  2043  		AmbientCtx: log.AmbientContext{Tracer: tracing.NewTracer()},
  2044  		Clock:      clock,
  2045  		RPCContext: rpcContext,
  2046  		TestingKnobs: ClientTestingKnobs{
  2047  			TransportFactory: adaptSimpleTransport(sendStub),
  2048  		},
  2049  		RangeDescriptorDB: descDB,
  2050  		Settings:          cluster.MakeTestingClusterSettings(),
  2051  	}
  2052  	ds := NewDistSender(cfg, g)
  2053  
  2054  	// Send a batch request contains two scans. In the first
  2055  	// attempt, the range of the descriptor found in the cache is
  2056  	// ["", "b"). The request is truncated to contain only the scan
  2057  	// on local keys that address up to "b".
  2058  	//
  2059  	// In the second attempt, The range of the descriptor found in
  2060  	// the cache is ["b", "d"), The request is truncated to contain
  2061  	// only the scan on local keys that address from "b" to "d".
  2062  	ba := roachpb.BatchRequest{}
  2063  	ba.Txn = &roachpb.Transaction{Name: "test"}
  2064  	ba.Add(roachpb.NewScan(
  2065  		keys.RangeDescriptorKey(roachpb.RKey("a")),
  2066  		keys.RangeDescriptorKey(roachpb.RKey("c")),
  2067  		false /* forUpdate */))
  2068  
  2069  	if _, pErr := ds.Send(context.Background(), ba); pErr != nil {
  2070  		t.Fatal(pErr)
  2071  	}
  2072  	for i, found := range haveRequest {
  2073  		if !found {
  2074  			t.Errorf("request %d not received", i)
  2075  		}
  2076  	}
  2077  }
  2078  
  2079  // TestMultiRangeWithEndTxn verifies that when a chunk of batch looks like it's
  2080  // going to be dispatched to more than one range, it will be split up if it
  2081  // contains an EndTxn that is not performing a parallel commit. However, it will
  2082  // not be split up if it contains an EndTxn that is performing a parallel
  2083  // commit.
  2084  func TestMultiRangeWithEndTxn(t *testing.T) {
  2085  	defer leaktest.AfterTest(t)()
  2086  	stopper := stop.NewStopper()
  2087  	defer stopper.Stop(context.Background())
  2088  
  2089  	clock := hlc.NewClock(hlc.UnixNano, time.Nanosecond)
  2090  	rpcContext := rpc.NewInsecureTestingContext(clock, stopper)
  2091  	g := makeGossip(t, stopper, rpcContext)
  2092  	testCases := []struct {
  2093  		put1, put2, et roachpb.Key
  2094  		parCommit      bool
  2095  		exp            [][]roachpb.Method
  2096  	}{
  2097  		{
  2098  			// Everything hits the first range, so we get a 1PC txn.
  2099  			put1:      roachpb.Key("a1"),
  2100  			put2:      roachpb.Key("a2"),
  2101  			et:        roachpb.Key("a3"),
  2102  			parCommit: false,
  2103  			exp:       [][]roachpb.Method{{roachpb.Put, roachpb.Put, roachpb.EndTxn}},
  2104  		},
  2105  		{
  2106  			// Everything hits the first range, so we get a 1PC txn.
  2107  			// Parallel commit doesn't matter.
  2108  			put1:      roachpb.Key("a1"),
  2109  			put2:      roachpb.Key("a2"),
  2110  			et:        roachpb.Key("a3"),
  2111  			parCommit: true,
  2112  			exp:       [][]roachpb.Method{{roachpb.Put, roachpb.Put, roachpb.EndTxn}},
  2113  		},
  2114  		{
  2115  			// Only EndTxn hits the second range.
  2116  			put1:      roachpb.Key("a1"),
  2117  			put2:      roachpb.Key("a2"),
  2118  			et:        roachpb.Key("b"),
  2119  			parCommit: false,
  2120  			exp:       [][]roachpb.Method{{roachpb.Put, roachpb.Put}, {roachpb.EndTxn}},
  2121  		},
  2122  		{
  2123  			// Only EndTxn hits the second range. However, since the EndTxn is
  2124  			// performing a parallel commit, it is sent in parallel, which we
  2125  			// can't detect directly because the EndTxn batch is sent to the
  2126  			// second range and a strict ordering of batches is enforced by
  2127  			// DisableParallelBatches.
  2128  			put1:      roachpb.Key("a1"),
  2129  			put2:      roachpb.Key("a2"),
  2130  			et:        roachpb.Key("b"),
  2131  			parCommit: true,
  2132  			exp:       [][]roachpb.Method{{roachpb.Put, roachpb.Put}, {roachpb.EndTxn}},
  2133  		},
  2134  		{
  2135  			// One write hits the second range, so EndTxn has to be split off.
  2136  			// In this case, going in the usual order without splitting off
  2137  			// would actually be fine, but it doesn't seem worth optimizing at
  2138  			// this point.
  2139  			put1:      roachpb.Key("a1"),
  2140  			put2:      roachpb.Key("b1"),
  2141  			et:        roachpb.Key("a1"),
  2142  			parCommit: false,
  2143  			exp:       [][]roachpb.Method{{roachpb.Put}, {roachpb.Put}, {roachpb.EndTxn}},
  2144  		},
  2145  		{
  2146  			// One write hits the second range. Again, EndTxn does not need to
  2147  			// be split off because it is performing a parallel commit, so the
  2148  			// only split is due to the range boundary.
  2149  			put1:      roachpb.Key("a1"),
  2150  			put2:      roachpb.Key("b1"),
  2151  			et:        roachpb.Key("a1"),
  2152  			parCommit: true,
  2153  			exp:       [][]roachpb.Method{{roachpb.Put, roachpb.EndTxn}, {roachpb.Put}},
  2154  		},
  2155  		{
  2156  			// Both writes go to the second range, but not EndTxn. It is split
  2157  			// from the writes and sent after.
  2158  			put1:      roachpb.Key("b1"),
  2159  			put2:      roachpb.Key("b2"),
  2160  			et:        roachpb.Key("a1"),
  2161  			parCommit: false,
  2162  			exp:       [][]roachpb.Method{{roachpb.Put, roachpb.Put}, {roachpb.EndTxn}},
  2163  		},
  2164  		{
  2165  			// Both writes go to the second range, but not EndTxn. Since the
  2166  			// EndTxn is performing a parallel commit, it is sent in parallel.
  2167  			// We can tell this because the EndTxn batch is sent to the first
  2168  			// range and ends up being delivered first, unlike in the previous
  2169  			// case.
  2170  			put1:      roachpb.Key("b1"),
  2171  			put2:      roachpb.Key("b2"),
  2172  			et:        roachpb.Key("a1"),
  2173  			parCommit: true,
  2174  			exp:       [][]roachpb.Method{{roachpb.EndTxn}, {roachpb.Put, roachpb.Put}},
  2175  		},
  2176  	}
  2177  
  2178  	if err := g.SetNodeDescriptor(newNodeDesc(1)); err != nil {
  2179  		t.Fatal(err)
  2180  	}
  2181  	nd := &roachpb.NodeDescriptor{
  2182  		NodeID:  roachpb.NodeID(1),
  2183  		Address: util.MakeUnresolvedAddr(testAddress.Network(), testAddress.String()),
  2184  	}
  2185  	if err := g.AddInfoProto(gossip.MakeNodeIDKey(roachpb.NodeID(1)), nd, time.Hour); err != nil {
  2186  		t.Fatal(err)
  2187  
  2188  	}
  2189  
  2190  	// Fill MockRangeDescriptorDB with two descriptors.
  2191  	var descriptor1 = roachpb.RangeDescriptor{
  2192  		RangeID:  2,
  2193  		StartKey: testMetaEndKey,
  2194  		EndKey:   roachpb.RKey("b"),
  2195  		InternalReplicas: []roachpb.ReplicaDescriptor{
  2196  			{
  2197  				NodeID:  1,
  2198  				StoreID: 1,
  2199  			},
  2200  		},
  2201  	}
  2202  	var descriptor2 = roachpb.RangeDescriptor{
  2203  		RangeID:  3,
  2204  		StartKey: roachpb.RKey("b"),
  2205  		EndKey:   roachpb.RKeyMax,
  2206  		InternalReplicas: []roachpb.ReplicaDescriptor{
  2207  			{
  2208  				NodeID:  1,
  2209  				StoreID: 1,
  2210  			},
  2211  		},
  2212  	}
  2213  	descDB := mockRangeDescriptorDBForDescs(
  2214  		testMetaRangeDescriptor,
  2215  		descriptor1,
  2216  		descriptor2,
  2217  	)
  2218  
  2219  	for i, test := range testCases {
  2220  		var act [][]roachpb.Method
  2221  		var testFn simpleSendFn = func(
  2222  			_ context.Context,
  2223  			_ SendOptions,
  2224  			_ ReplicaSlice,
  2225  			ba roachpb.BatchRequest,
  2226  		) (*roachpb.BatchResponse, error) {
  2227  			var cur []roachpb.Method
  2228  			for _, union := range ba.Requests {
  2229  				cur = append(cur, union.GetInner().Method())
  2230  			}
  2231  			act = append(act, cur)
  2232  			return ba.CreateReply(), nil
  2233  		}
  2234  
  2235  		cfg := DistSenderConfig{
  2236  			AmbientCtx: log.AmbientContext{Tracer: tracing.NewTracer()},
  2237  			Clock:      clock,
  2238  			RPCContext: rpcContext,
  2239  			TestingKnobs: ClientTestingKnobs{
  2240  				TransportFactory: adaptSimpleTransport(testFn),
  2241  			},
  2242  			RangeDescriptorDB: descDB,
  2243  			Settings:          cluster.MakeTestingClusterSettings(),
  2244  		}
  2245  		ds := NewDistSender(cfg, g)
  2246  		ds.DisableParallelBatches()
  2247  
  2248  		// Send a batch request containing two puts.
  2249  		var ba roachpb.BatchRequest
  2250  		ba.Txn = &roachpb.Transaction{Name: "test"}
  2251  		ba.Add(roachpb.NewPut(test.put1, roachpb.MakeValueFromString("val1")))
  2252  		ba.Add(roachpb.NewPut(test.put2, roachpb.MakeValueFromString("val2")))
  2253  		et := &roachpb.EndTxnRequest{
  2254  			RequestHeader: roachpb.RequestHeader{Key: test.et},
  2255  			Commit:        true,
  2256  		}
  2257  		if test.parCommit {
  2258  			et.InFlightWrites = []roachpb.SequencedWrite{
  2259  				{Key: test.put1, Sequence: 1}, {Key: test.put2, Sequence: 2},
  2260  			}
  2261  		}
  2262  		ba.Add(et)
  2263  
  2264  		if _, pErr := ds.Send(context.Background(), ba); pErr != nil {
  2265  			t.Fatal(pErr)
  2266  		}
  2267  
  2268  		for j, batchMethods := range act {
  2269  			if !reflect.DeepEqual(test.exp[j], batchMethods) {
  2270  				t.Fatalf("test %d: expected [%d] %v, got %v", i, j, test.exp[j], batchMethods)
  2271  			}
  2272  		}
  2273  	}
  2274  }
  2275  
  2276  // TestParallelCommitSplitFromQueryIntents verifies that a parallel-committing
  2277  // batch is split into sub-batches - one containing all pre-commit QueryIntent
  2278  // requests and one containing everything else.
  2279  //
  2280  // The test only uses a single range, so it only tests the split of ranges in
  2281  // divideAndSendParallelCommit. See TestMultiRangeWithEndTxn for a test that
  2282  // verifies proper behavior of batches containing EndTxn requests which span
  2283  // ranges.
  2284  func TestParallelCommitSplitFromQueryIntents(t *testing.T) {
  2285  	defer leaktest.AfterTest(t)()
  2286  	stopper := stop.NewStopper()
  2287  	defer stopper.Stop(context.Background())
  2288  
  2289  	clock := hlc.NewClock(hlc.UnixNano, time.Nanosecond)
  2290  	rpcContext := rpc.NewInsecureTestingContext(clock, stopper)
  2291  	g := makeGossip(t, stopper, rpcContext)
  2292  
  2293  	keyA, keyB := roachpb.Key("a"), roachpb.Key("ab")
  2294  	put1 := roachpb.NewPut(keyA, roachpb.MakeValueFromString("val1"))
  2295  	put2 := roachpb.NewPut(keyB, roachpb.MakeValueFromString("val2"))
  2296  	qi := &roachpb.QueryIntentRequest{RequestHeader: roachpb.RequestHeader{Key: keyA}}
  2297  	et := &roachpb.EndTxnRequest{
  2298  		RequestHeader: roachpb.RequestHeader{Key: keyA},
  2299  		Commit:        true,
  2300  	}
  2301  	etPar := &roachpb.EndTxnRequest{
  2302  		RequestHeader:  roachpb.RequestHeader{Key: keyA},
  2303  		Commit:         true,
  2304  		InFlightWrites: []roachpb.SequencedWrite{{Key: keyA, Sequence: 1}, {Key: keyB, Sequence: 2}},
  2305  	}
  2306  
  2307  	testCases := []struct {
  2308  		name string
  2309  		reqs []roachpb.Request
  2310  		exp  [][]roachpb.Method
  2311  	}{
  2312  		{
  2313  			name: "no parallel commits or query intents",
  2314  			reqs: []roachpb.Request{put1, put2, et},
  2315  			exp:  [][]roachpb.Method{{roachpb.Put, roachpb.Put, roachpb.EndTxn}},
  2316  		},
  2317  		{
  2318  			name: "no parallel commits, but regular and pre-commit query intents",
  2319  			reqs: []roachpb.Request{qi, put1, put2, qi, et},
  2320  			exp: [][]roachpb.Method{
  2321  				{roachpb.QueryIntent, roachpb.Put, roachpb.Put, roachpb.QueryIntent, roachpb.EndTxn},
  2322  			},
  2323  		},
  2324  		{
  2325  			name: "parallel commits without query intents",
  2326  			reqs: []roachpb.Request{put1, put2, etPar},
  2327  			exp:  [][]roachpb.Method{{roachpb.Put, roachpb.Put, roachpb.EndTxn}},
  2328  		},
  2329  		{
  2330  			name: "parallel commits with pre-commit query intents",
  2331  			reqs: []roachpb.Request{put1, put2, qi, qi, etPar},
  2332  			exp: [][]roachpb.Method{
  2333  				{roachpb.QueryIntent, roachpb.QueryIntent},
  2334  				{roachpb.Put, roachpb.Put, roachpb.EndTxn},
  2335  			},
  2336  		},
  2337  		{
  2338  			name: "parallel commits with regular query intents",
  2339  			reqs: []roachpb.Request{qi, put1, qi, put2, etPar},
  2340  			exp: [][]roachpb.Method{
  2341  				{roachpb.QueryIntent, roachpb.Put, roachpb.QueryIntent, roachpb.Put, roachpb.EndTxn},
  2342  			},
  2343  		},
  2344  		{
  2345  			name: "parallel commits with regular and pre-commit query intents",
  2346  			reqs: []roachpb.Request{qi, put1, put2, qi, qi, qi, etPar},
  2347  			exp: [][]roachpb.Method{
  2348  				{roachpb.QueryIntent, roachpb.QueryIntent, roachpb.QueryIntent},
  2349  				{roachpb.QueryIntent, roachpb.Put, roachpb.Put, roachpb.EndTxn},
  2350  			},
  2351  		},
  2352  	}
  2353  	for _, test := range testCases {
  2354  		t.Run(test.name, func(t *testing.T) {
  2355  			var act [][]roachpb.Method
  2356  			var testFn simpleSendFn = func(
  2357  				_ context.Context,
  2358  				_ SendOptions,
  2359  				_ ReplicaSlice,
  2360  				ba roachpb.BatchRequest,
  2361  			) (*roachpb.BatchResponse, error) {
  2362  				var cur []roachpb.Method
  2363  				for _, union := range ba.Requests {
  2364  					cur = append(cur, union.GetInner().Method())
  2365  				}
  2366  				act = append(act, cur)
  2367  				return ba.CreateReply(), nil
  2368  			}
  2369  
  2370  			cfg := DistSenderConfig{
  2371  				AmbientCtx: log.AmbientContext{Tracer: tracing.NewTracer()},
  2372  				Clock:      clock,
  2373  				RPCContext: rpcContext,
  2374  				TestingKnobs: ClientTestingKnobs{
  2375  					TransportFactory: adaptSimpleTransport(testFn),
  2376  				},
  2377  				RangeDescriptorDB: defaultMockRangeDescriptorDB,
  2378  				Settings:          cluster.MakeTestingClusterSettings(),
  2379  			}
  2380  			ds := NewDistSender(cfg, g)
  2381  			ds.DisableParallelBatches()
  2382  
  2383  			// Send a batch request containing the requests.
  2384  			var ba roachpb.BatchRequest
  2385  			ba.Txn = &roachpb.Transaction{Name: "test"}
  2386  			ba.Add(test.reqs...)
  2387  
  2388  			if _, pErr := ds.Send(context.Background(), ba); pErr != nil {
  2389  				t.Fatal(pErr)
  2390  			}
  2391  
  2392  			for j, batchMethods := range act {
  2393  				if !reflect.DeepEqual(test.exp[j], batchMethods) {
  2394  					t.Fatalf("expected [%d] %v, got %v", j, test.exp[j], batchMethods)
  2395  				}
  2396  			}
  2397  		})
  2398  	}
  2399  }
  2400  
  2401  // TestParallelCommitsDetectIntentMissingCause tests the functionality in
  2402  // DistSender.detectIntentMissingDueToIntentResolution.
  2403  func TestParallelCommitsDetectIntentMissingCause(t *testing.T) {
  2404  	defer leaktest.AfterTest(t)()
  2405  	stopper := stop.NewStopper()
  2406  	defer stopper.Stop(context.Background())
  2407  
  2408  	clock := hlc.NewClock(hlc.UnixNano, time.Nanosecond)
  2409  	rpcContext := rpc.NewInsecureTestingContext(clock, stopper)
  2410  	g := makeGossip(t, stopper, rpcContext)
  2411  
  2412  	key := roachpb.Key("a")
  2413  	txn := roachpb.MakeTransaction(
  2414  		"test", key, roachpb.NormalUserPriority,
  2415  		clock.Now(), clock.MaxOffset().Nanoseconds(),
  2416  	)
  2417  
  2418  	testCases := []struct {
  2419  		name       string
  2420  		queryTxnFn func() (roachpb.TransactionStatus, error)
  2421  		expErr     string
  2422  	}{
  2423  		{
  2424  			name: "transaction record PENDING, real intent missing error",
  2425  			queryTxnFn: func() (roachpb.TransactionStatus, error) {
  2426  				return roachpb.PENDING, nil
  2427  			},
  2428  			expErr: "intent missing",
  2429  		},
  2430  		{
  2431  			name: "transaction record STAGING, real intent missing error",
  2432  			queryTxnFn: func() (roachpb.TransactionStatus, error) {
  2433  				return roachpb.STAGING, nil
  2434  			},
  2435  			expErr: "intent missing",
  2436  		},
  2437  		{
  2438  			name: "transaction record COMMITTED, intent missing error caused by intent resolution",
  2439  			queryTxnFn: func() (roachpb.TransactionStatus, error) {
  2440  				return roachpb.COMMITTED, nil
  2441  			},
  2442  		},
  2443  		{
  2444  			name: "transaction record ABORTED, ambiguous intent missing error",
  2445  			queryTxnFn: func() (roachpb.TransactionStatus, error) {
  2446  				return roachpb.ABORTED, nil
  2447  			},
  2448  			expErr: "result is ambiguous (intent missing and record aborted)",
  2449  		},
  2450  		{
  2451  			name: "QueryTxn error, unresolved ambiguity",
  2452  			queryTxnFn: func() (roachpb.TransactionStatus, error) {
  2453  				return 0, errors.New("unable to query txn")
  2454  			},
  2455  			expErr: "result is ambiguous (error=unable to query txn [intent missing])",
  2456  		},
  2457  	}
  2458  	for _, test := range testCases {
  2459  		t.Run(test.name, func(t *testing.T) {
  2460  			var testFn simpleSendFn = func(
  2461  				_ context.Context,
  2462  				_ SendOptions,
  2463  				_ ReplicaSlice,
  2464  				ba roachpb.BatchRequest,
  2465  			) (*roachpb.BatchResponse, error) {
  2466  				br := ba.CreateReply()
  2467  				switch ba.Requests[0].GetInner().Method() {
  2468  				case roachpb.QueryIntent:
  2469  					br.Error = roachpb.NewError(roachpb.NewIntentMissingError(key, nil))
  2470  				case roachpb.QueryTxn:
  2471  					status, err := test.queryTxnFn()
  2472  					if err != nil {
  2473  						br.Error = roachpb.NewError(err)
  2474  					} else {
  2475  						respTxn := txn
  2476  						respTxn.Status = status
  2477  						br.Responses[0].GetQueryTxn().QueriedTxn = respTxn
  2478  					}
  2479  				case roachpb.EndTxn:
  2480  					br.Txn = ba.Txn.Clone()
  2481  					br.Txn.Status = roachpb.STAGING
  2482  				}
  2483  				return br, nil
  2484  			}
  2485  
  2486  			cfg := DistSenderConfig{
  2487  				AmbientCtx: log.AmbientContext{Tracer: tracing.NewTracer()},
  2488  				Clock:      clock,
  2489  				RPCContext: rpcContext,
  2490  				TestingKnobs: ClientTestingKnobs{
  2491  					TransportFactory: adaptSimpleTransport(testFn),
  2492  				},
  2493  				RangeDescriptorDB: defaultMockRangeDescriptorDB,
  2494  				Settings:          cluster.MakeTestingClusterSettings(),
  2495  			}
  2496  			ds := NewDistSender(cfg, g)
  2497  
  2498  			// Send a parallel commit batch request.
  2499  			var ba roachpb.BatchRequest
  2500  			ba.Txn = txn.Clone()
  2501  			ba.Add(&roachpb.QueryIntentRequest{
  2502  				RequestHeader:  roachpb.RequestHeader{Key: key},
  2503  				Txn:            txn.TxnMeta,
  2504  				ErrorIfMissing: true,
  2505  			})
  2506  			ba.Add(&roachpb.EndTxnRequest{
  2507  				RequestHeader:  roachpb.RequestHeader{Key: key},
  2508  				Commit:         true,
  2509  				InFlightWrites: []roachpb.SequencedWrite{{Key: key, Sequence: 1}},
  2510  			})
  2511  
  2512  			// Verify that the response is expected.
  2513  			_, pErr := ds.Send(context.Background(), ba)
  2514  			if test.expErr == "" {
  2515  				if pErr != nil {
  2516  					t.Fatalf("unexpected error %v", pErr)
  2517  				}
  2518  			} else {
  2519  				if !testutils.IsPError(pErr, regexp.QuoteMeta(test.expErr)) {
  2520  					t.Fatalf("expected error %q; found %v", test.expErr, pErr)
  2521  				}
  2522  			}
  2523  		})
  2524  	}
  2525  }
  2526  
  2527  func TestCountRanges(t *testing.T) {
  2528  	defer leaktest.AfterTest(t)()
  2529  	stopper := stop.NewStopper()
  2530  	defer stopper.Stop(context.Background())
  2531  
  2532  	clock := hlc.NewClock(hlc.UnixNano, time.Nanosecond)
  2533  	rpcContext := rpc.NewInsecureTestingContext(clock, stopper)
  2534  	g := makeGossip(t, stopper, rpcContext)
  2535  	// Create a slice of fake descriptors.
  2536  	const numDescriptors = 9
  2537  	const firstKeyBoundary = 'a'
  2538  	var descriptors [numDescriptors]roachpb.RangeDescriptor
  2539  	for i := range descriptors {
  2540  		startKey := testMetaEndKey
  2541  		if i > 0 {
  2542  			startKey = roachpb.RKey(string(firstKeyBoundary + i - 1))
  2543  		}
  2544  		endKey := roachpb.RKeyMax
  2545  		if i < len(descriptors)-1 {
  2546  			endKey = roachpb.RKey(string(firstKeyBoundary + i))
  2547  		}
  2548  
  2549  		descriptors[i] = roachpb.RangeDescriptor{
  2550  			RangeID:  roachpb.RangeID(i + 2),
  2551  			StartKey: startKey,
  2552  			EndKey:   endKey,
  2553  			InternalReplicas: []roachpb.ReplicaDescriptor{
  2554  				{
  2555  					NodeID:  1,
  2556  					StoreID: 1,
  2557  				},
  2558  			},
  2559  		}
  2560  	}
  2561  
  2562  	// Mock out descriptor DB and sender function.
  2563  	descDB := mockRangeDescriptorDBForDescs(append(descriptors[:], testMetaRangeDescriptor)...)
  2564  	cfg := DistSenderConfig{
  2565  		AmbientCtx: log.AmbientContext{Tracer: tracing.NewTracer()},
  2566  		Clock:      clock,
  2567  		RPCContext: rpcContext,
  2568  		TestingKnobs: ClientTestingKnobs{
  2569  			TransportFactory: adaptSimpleTransport(stubRPCSendFn),
  2570  		},
  2571  		RangeDescriptorDB: descDB,
  2572  		Settings:          cluster.MakeTestingClusterSettings(),
  2573  	}
  2574  	ds := NewDistSender(cfg, g)
  2575  
  2576  	// Verify counted ranges.
  2577  	keyIn := func(desc roachpb.RangeDescriptor) roachpb.RKey {
  2578  		return append(desc.StartKey, 'a')
  2579  	}
  2580  	testcases := []struct {
  2581  		key    roachpb.RKey
  2582  		endKey roachpb.RKey
  2583  		count  int64
  2584  	}{
  2585  		{testMetaEndKey, roachpb.RKey(string(firstKeyBoundary)), 1},
  2586  		{testMetaEndKey, keyIn(descriptors[0]), 1},
  2587  		{testMetaEndKey, descriptors[len(descriptors)-1].StartKey, numDescriptors - 1},
  2588  		{descriptors[0].EndKey, roachpb.RKeyMax, numDescriptors - 1},
  2589  		// Everything from the min key to a key within the last range.
  2590  		{testMetaEndKey, keyIn(descriptors[len(descriptors)-1]), numDescriptors},
  2591  		{testMetaEndKey, roachpb.RKeyMax, numDescriptors},
  2592  	}
  2593  	for i, tc := range testcases {
  2594  		count, pErr := ds.CountRanges(context.Background(), roachpb.RSpan{Key: tc.key, EndKey: tc.endKey})
  2595  		if pErr != nil {
  2596  			t.Fatalf("%d: %s", i, pErr)
  2597  		}
  2598  		if a, e := count, tc.count; a != e {
  2599  			t.Errorf("%d: # of ranges %d != expected %d", i, a, e)
  2600  		}
  2601  	}
  2602  }
  2603  
  2604  func TestSenderTransport(t *testing.T) {
  2605  	defer leaktest.AfterTest(t)()
  2606  	transport, err := SenderTransportFactory(
  2607  		tracing.NewTracer(),
  2608  		kv.SenderFunc(
  2609  			func(
  2610  				_ context.Context,
  2611  				_ roachpb.BatchRequest,
  2612  			) (r *roachpb.BatchResponse, e *roachpb.Error) {
  2613  				return
  2614  			},
  2615  		))(SendOptions{}, &nodedialer.Dialer{}, ReplicaSlice{{}})
  2616  	if err != nil {
  2617  		t.Fatal(err)
  2618  	}
  2619  	_, err = transport.SendNext(context.Background(), roachpb.BatchRequest{})
  2620  	if err != nil {
  2621  		t.Fatal(err)
  2622  	}
  2623  	if !transport.IsExhausted() {
  2624  		t.Fatalf("transport is not exhausted")
  2625  	}
  2626  }
  2627  
  2628  func TestGatewayNodeID(t *testing.T) {
  2629  	defer leaktest.AfterTest(t)()
  2630  	stopper := stop.NewStopper()
  2631  	defer stopper.Stop(context.Background())
  2632  
  2633  	clock := hlc.NewClock(hlc.UnixNano, time.Nanosecond)
  2634  	rpcContext := rpc.NewInsecureTestingContext(clock, stopper)
  2635  	g := makeGossip(t, stopper, rpcContext)
  2636  	const expNodeID = 42
  2637  	nd := newNodeDesc(expNodeID)
  2638  	g.NodeID.Reset(nd.NodeID)
  2639  	if err := g.SetNodeDescriptor(nd); err != nil {
  2640  		t.Fatal(err)
  2641  	}
  2642  	if err := g.AddInfoProto(gossip.MakeNodeIDKey(expNodeID), nd, time.Hour); err != nil {
  2643  		t.Fatal(err)
  2644  	}
  2645  
  2646  	var observedNodeID roachpb.NodeID
  2647  	var testFn simpleSendFn = func(
  2648  		_ context.Context,
  2649  		_ SendOptions,
  2650  		_ ReplicaSlice,
  2651  		ba roachpb.BatchRequest,
  2652  	) (*roachpb.BatchResponse, error) {
  2653  		observedNodeID = ba.Header.GatewayNodeID
  2654  		return ba.CreateReply(), nil
  2655  	}
  2656  
  2657  	cfg := DistSenderConfig{
  2658  		AmbientCtx: log.AmbientContext{Tracer: tracing.NewTracer()},
  2659  		Clock:      clock,
  2660  		RPCContext: rpcContext,
  2661  		TestingKnobs: ClientTestingKnobs{
  2662  			TransportFactory: adaptSimpleTransport(testFn),
  2663  		},
  2664  		RangeDescriptorDB: defaultMockRangeDescriptorDB,
  2665  		Settings:          cluster.MakeTestingClusterSettings(),
  2666  	}
  2667  	ds := NewDistSender(cfg, g)
  2668  	var ba roachpb.BatchRequest
  2669  	ba.Add(roachpb.NewPut(roachpb.Key("a"), roachpb.MakeValueFromString("value")))
  2670  	if _, err := ds.Send(context.Background(), ba); err != nil {
  2671  		t.Fatalf("put encountered error: %s", err)
  2672  	}
  2673  	if observedNodeID != expNodeID {
  2674  		t.Errorf("got GatewayNodeID=%d, want %d", observedNodeID, expNodeID)
  2675  	}
  2676  }
  2677  
  2678  // TestMultipleErrorsMerged tests that DistSender prioritizes errors that are
  2679  // returned from concurrent partial batches and returns the "best" one after
  2680  // merging the transaction metadata passed on the errors.
  2681  func TestMultipleErrorsMerged(t *testing.T) {
  2682  	defer leaktest.AfterTest(t)()
  2683  	stopper := stop.NewStopper()
  2684  	defer stopper.Stop(context.Background())
  2685  
  2686  	clock := hlc.NewClock(hlc.UnixNano, time.Nanosecond)
  2687  	rpcContext := rpc.NewInsecureTestingContext(clock, stopper)
  2688  	g := makeGossip(t, stopper, rpcContext)
  2689  
  2690  	if err := g.SetNodeDescriptor(newNodeDesc(1)); err != nil {
  2691  		t.Fatal(err)
  2692  	}
  2693  	nd := &roachpb.NodeDescriptor{
  2694  		NodeID:  roachpb.NodeID(1),
  2695  		Address: util.MakeUnresolvedAddr(testAddress.Network(), testAddress.String()),
  2696  	}
  2697  	if err := g.AddInfoProto(gossip.MakeNodeIDKey(roachpb.NodeID(1)), nd, time.Hour); err != nil {
  2698  		t.Fatal(err)
  2699  	}
  2700  
  2701  	// Fill MockRangeDescriptorDB with two descriptors.
  2702  	var descriptor1 = roachpb.RangeDescriptor{
  2703  		RangeID:  2,
  2704  		StartKey: testMetaEndKey,
  2705  		EndKey:   roachpb.RKey("b"),
  2706  		InternalReplicas: []roachpb.ReplicaDescriptor{
  2707  			{
  2708  				NodeID:  1,
  2709  				StoreID: 1,
  2710  			},
  2711  		},
  2712  	}
  2713  	var descriptor2 = roachpb.RangeDescriptor{
  2714  		RangeID:  3,
  2715  		StartKey: roachpb.RKey("b"),
  2716  		EndKey:   roachpb.RKeyMax,
  2717  		InternalReplicas: []roachpb.ReplicaDescriptor{
  2718  			{
  2719  				NodeID:  1,
  2720  				StoreID: 1,
  2721  			},
  2722  		},
  2723  	}
  2724  	descDB := mockRangeDescriptorDBForDescs(
  2725  		testMetaRangeDescriptor,
  2726  		descriptor1,
  2727  		descriptor2,
  2728  	)
  2729  
  2730  	txn := roachpb.MakeTransaction(
  2731  		"test", nil /* baseKey */, roachpb.NormalUserPriority,
  2732  		clock.Now(), clock.MaxOffset().Nanoseconds(),
  2733  	)
  2734  	// We're also going to check that the highest bumped WriteTimestamp makes it
  2735  	// to the merged error.
  2736  	err1WriteTimestamp := txn.WriteTimestamp.Add(100, 0)
  2737  	err2WriteTimestamp := txn.WriteTimestamp.Add(200, 0)
  2738  
  2739  	retryErr := roachpb.NewTransactionRetryError(roachpb.RETRY_SERIALIZABLE, "test err")
  2740  	abortErr := roachpb.NewTransactionAbortedError(roachpb.ABORT_REASON_ABORTED_RECORD_FOUND)
  2741  	conditionFailedErr := &roachpb.ConditionFailedError{}
  2742  	sendErr := &roachpb.SendError{}
  2743  	ambiguousErr := &roachpb.AmbiguousResultError{}
  2744  	randomErr := &roachpb.IntegerOverflowError{}
  2745  
  2746  	testCases := []struct {
  2747  		err1, err2 error
  2748  		expErr     string
  2749  	}{
  2750  		{
  2751  			err1:   retryErr,
  2752  			err2:   nil,
  2753  			expErr: "TransactionRetryError: retry txn (RETRY_SERIALIZABLE - test err)",
  2754  		},
  2755  		{
  2756  			err1:   abortErr,
  2757  			err2:   nil,
  2758  			expErr: "TransactionAbortedError(ABORT_REASON_ABORTED_RECORD_FOUND)",
  2759  		},
  2760  		{
  2761  			err1:   conditionFailedErr,
  2762  			err2:   nil,
  2763  			expErr: "unexpected value",
  2764  		},
  2765  		{
  2766  			err1:   retryErr,
  2767  			err2:   retryErr,
  2768  			expErr: "TransactionRetryError: retry txn (RETRY_SERIALIZABLE - test err)",
  2769  		},
  2770  		{
  2771  			err1:   retryErr,
  2772  			err2:   abortErr,
  2773  			expErr: "TransactionAbortedError(ABORT_REASON_ABORTED_RECORD_FOUND)",
  2774  		},
  2775  		{
  2776  			err1:   abortErr,
  2777  			err2:   abortErr,
  2778  			expErr: "TransactionAbortedError(ABORT_REASON_ABORTED_RECORD_FOUND)",
  2779  		},
  2780  		{
  2781  			err1:   retryErr,
  2782  			err2:   conditionFailedErr,
  2783  			expErr: "unexpected value",
  2784  		},
  2785  		{
  2786  			err1:   abortErr,
  2787  			err2:   conditionFailedErr,
  2788  			expErr: "TransactionAbortedError(ABORT_REASON_ABORTED_RECORD_FOUND)",
  2789  		},
  2790  		{
  2791  			err1:   conditionFailedErr,
  2792  			err2:   conditionFailedErr,
  2793  			expErr: "unexpected value",
  2794  		},
  2795  		// ConditionFailedError has a low score since it's "not ambiguous". We want
  2796  		// ambiguity to be infectious, so most things have a higher score.
  2797  		{
  2798  			err1:   conditionFailedErr,
  2799  			err2:   ambiguousErr,
  2800  			expErr: "result is ambiguous",
  2801  		},
  2802  		{
  2803  			err1:   conditionFailedErr,
  2804  			err2:   sendErr,
  2805  			expErr: "failed to send RPC",
  2806  		},
  2807  		{
  2808  			err1:   conditionFailedErr,
  2809  			err2:   randomErr,
  2810  			expErr: "results in overflow",
  2811  		},
  2812  	}
  2813  	for i, tc := range testCases {
  2814  		t.Run(strconv.Itoa(i), func(t *testing.T) {
  2815  			// We run every test case twice, to make sure error merging is commutative.
  2816  			testutils.RunTrueAndFalse(t, "reverse", func(t *testing.T, reverse bool) {
  2817  				if reverse {
  2818  					// Switch the order of errors.
  2819  					err1 := tc.err1
  2820  					err2 := tc.err2
  2821  					tc.err1 = err2
  2822  					tc.err2 = err1
  2823  				}
  2824  
  2825  				var testFn simpleSendFn = func(
  2826  					_ context.Context,
  2827  					_ SendOptions,
  2828  					_ ReplicaSlice,
  2829  					ba roachpb.BatchRequest,
  2830  				) (*roachpb.BatchResponse, error) {
  2831  					reply := ba.CreateReply()
  2832  					if delRng := ba.Requests[0].GetDeleteRange(); delRng == nil {
  2833  						return nil, errors.Errorf("expected DeleteRange request, found %v", ba.Requests[0])
  2834  					} else if delRng.Key.Equal(roachpb.Key("a")) {
  2835  						if tc.err1 != nil {
  2836  							errTxn := ba.Txn.Clone()
  2837  							errTxn.WriteTimestamp = err1WriteTimestamp
  2838  							reply.Error = roachpb.NewErrorWithTxn(tc.err1, errTxn)
  2839  						}
  2840  					} else if delRng.Key.Equal(roachpb.Key("b")) {
  2841  						if tc.err2 != nil {
  2842  							errTxn := ba.Txn.Clone()
  2843  							errTxn.WriteTimestamp = err2WriteTimestamp
  2844  							reply.Error = roachpb.NewErrorWithTxn(tc.err2, errTxn)
  2845  						}
  2846  					} else {
  2847  						return nil, errors.Errorf("unexpected DeleteRange boundaries")
  2848  					}
  2849  					return reply, nil
  2850  				}
  2851  
  2852  				cfg := DistSenderConfig{
  2853  					AmbientCtx: log.AmbientContext{Tracer: tracing.NewTracer()},
  2854  					Clock:      clock,
  2855  					RPCContext: rpcContext,
  2856  					TestingKnobs: ClientTestingKnobs{
  2857  						TransportFactory: adaptSimpleTransport(testFn),
  2858  					},
  2859  					RangeDescriptorDB: descDB,
  2860  					Settings:          cluster.MakeTestingClusterSettings(),
  2861  					RPCRetryOptions:   &retry.Options{MaxRetries: 1},
  2862  				}
  2863  				ds := NewDistSender(cfg, g)
  2864  
  2865  				var ba roachpb.BatchRequest
  2866  				ba.Txn = txn.Clone()
  2867  				ba.Add(roachpb.NewDeleteRange(roachpb.Key("a"), roachpb.Key("c"), false /* returnKeys */))
  2868  
  2869  				expWriteTimestamp := txn.WriteTimestamp
  2870  				if tc.err1 != nil {
  2871  					expWriteTimestamp = err1WriteTimestamp
  2872  				}
  2873  				if tc.err2 != nil {
  2874  					expWriteTimestamp = err2WriteTimestamp
  2875  				}
  2876  
  2877  				if _, pErr := ds.Send(context.Background(), ba); pErr == nil {
  2878  					t.Fatalf("expected an error to be returned from distSender")
  2879  				} else if !testutils.IsPError(pErr, regexp.QuoteMeta(tc.expErr)) {
  2880  					t.Fatalf("expected error %q; found %v", tc.expErr, pErr)
  2881  				} else if !pErr.GetTxn().WriteTimestamp.Equal(expWriteTimestamp) {
  2882  					t.Fatalf("expected bumped ts %s, got: %s", expWriteTimestamp, pErr.GetTxn().WriteTimestamp)
  2883  				}
  2884  			})
  2885  		})
  2886  	}
  2887  }
  2888  
  2889  // Regression test for #20067.
  2890  // If a batch is partitioned into multiple partial batches, the
  2891  // roachpb.Error.Index of each batch should correspond to its original index in
  2892  // the overall batch.
  2893  func TestErrorIndexAlignment(t *testing.T) {
  2894  	defer leaktest.AfterTest(t)()
  2895  	stopper := stop.NewStopper()
  2896  	defer stopper.Stop(context.Background())
  2897  
  2898  	clock := hlc.NewClock(hlc.UnixNano, time.Nanosecond)
  2899  	rpcContext := rpc.NewInsecureTestingContext(clock, stopper)
  2900  	g := makeGossip(t, stopper, rpcContext)
  2901  
  2902  	if err := g.SetNodeDescriptor(newNodeDesc(1)); err != nil {
  2903  		t.Fatal(err)
  2904  	}
  2905  	nd := &roachpb.NodeDescriptor{
  2906  		NodeID:  roachpb.NodeID(1),
  2907  		Address: util.MakeUnresolvedAddr(testAddress.Network(), testAddress.String()),
  2908  	}
  2909  	if err := g.AddInfoProto(gossip.MakeNodeIDKey(roachpb.NodeID(1)), nd, time.Hour); err != nil {
  2910  		t.Fatal(err)
  2911  	}
  2912  
  2913  	// Fill MockRangeDescriptorDB with two descriptors.
  2914  	var descriptor1 = roachpb.RangeDescriptor{
  2915  		RangeID:  2,
  2916  		StartKey: testMetaEndKey,
  2917  		EndKey:   roachpb.RKey("b"),
  2918  		InternalReplicas: []roachpb.ReplicaDescriptor{
  2919  			{
  2920  				NodeID:  1,
  2921  				StoreID: 1,
  2922  			},
  2923  		},
  2924  	}
  2925  	var descriptor2 = roachpb.RangeDescriptor{
  2926  		RangeID:  3,
  2927  		StartKey: roachpb.RKey("b"),
  2928  		EndKey:   roachpb.RKey("c"),
  2929  		InternalReplicas: []roachpb.ReplicaDescriptor{
  2930  			{
  2931  				NodeID:  1,
  2932  				StoreID: 1,
  2933  			},
  2934  		},
  2935  	}
  2936  	var descriptor3 = roachpb.RangeDescriptor{
  2937  		RangeID:  4,
  2938  		StartKey: roachpb.RKey("c"),
  2939  		EndKey:   roachpb.RKeyMax,
  2940  		InternalReplicas: []roachpb.ReplicaDescriptor{
  2941  			{
  2942  				NodeID:  1,
  2943  				StoreID: 1,
  2944  			},
  2945  		},
  2946  	}
  2947  
  2948  	// The 1st partial batch has 1 request.
  2949  	// The 2nd partial batch has 2 requests.
  2950  	// The 3rd partial batch has 1 request.
  2951  	// Each test case returns an error for the first request of the nth
  2952  	// partial batch.
  2953  	testCases := []struct {
  2954  		// The nth request to return an error.
  2955  		nthPartialBatch  int
  2956  		expectedFinalIdx int32
  2957  	}{
  2958  		{0, 0},
  2959  		{1, 1},
  2960  		{2, 3},
  2961  	}
  2962  
  2963  	descDB := mockRangeDescriptorDBForDescs(
  2964  		testMetaRangeDescriptor,
  2965  		descriptor1,
  2966  		descriptor2,
  2967  		descriptor3,
  2968  	)
  2969  
  2970  	for i, tc := range testCases {
  2971  		t.Run(strconv.Itoa(i), func(t *testing.T) {
  2972  			nthRequest := 0
  2973  
  2974  			var testFn simpleSendFn = func(
  2975  				_ context.Context,
  2976  				_ SendOptions,
  2977  				_ ReplicaSlice,
  2978  				ba roachpb.BatchRequest,
  2979  			) (*roachpb.BatchResponse, error) {
  2980  				reply := ba.CreateReply()
  2981  				if nthRequest == tc.nthPartialBatch {
  2982  					reply.Error = &roachpb.Error{
  2983  						// The relative index is always 0 since
  2984  						// we return an error for the first
  2985  						// request of the nthPartialBatch.
  2986  						Index: &roachpb.ErrPosition{Index: 0},
  2987  					}
  2988  				}
  2989  				nthRequest++
  2990  				return reply, nil
  2991  			}
  2992  
  2993  			cfg := DistSenderConfig{
  2994  				AmbientCtx: log.AmbientContext{Tracer: tracing.NewTracer()},
  2995  				Clock:      clock,
  2996  				RPCContext: rpcContext,
  2997  				TestingKnobs: ClientTestingKnobs{
  2998  					TransportFactory: adaptSimpleTransport(testFn),
  2999  				},
  3000  				RangeDescriptorDB: descDB,
  3001  				Settings:          cluster.MakeTestingClusterSettings(),
  3002  			}
  3003  			ds := NewDistSender(cfg, g)
  3004  			ds.DisableParallelBatches()
  3005  
  3006  			var ba roachpb.BatchRequest
  3007  			ba.Txn = &roachpb.Transaction{Name: "test"}
  3008  			// First batch has 1 request.
  3009  			val := roachpb.MakeValueFromString("val")
  3010  			ba.Add(roachpb.NewPut(roachpb.Key("a"), val))
  3011  
  3012  			// Second batch has 2 requests.
  3013  			val = roachpb.MakeValueFromString("val")
  3014  			ba.Add(roachpb.NewPut(roachpb.Key("b"), val))
  3015  			val = roachpb.MakeValueFromString("val")
  3016  			ba.Add(roachpb.NewPut(roachpb.Key("bb"), val))
  3017  
  3018  			// Third batch has 1 request.
  3019  			val = roachpb.MakeValueFromString("val")
  3020  			ba.Add(roachpb.NewPut(roachpb.Key("c"), val))
  3021  
  3022  			_, pErr := ds.Send(context.Background(), ba)
  3023  			if pErr == nil {
  3024  				t.Fatalf("expected an error to be returned from distSender")
  3025  			}
  3026  			if pErr.Index.Index != tc.expectedFinalIdx {
  3027  				t.Errorf("expected error index to be %d, instead got %d", tc.expectedFinalIdx, pErr.Index.Index)
  3028  			}
  3029  		})
  3030  	}
  3031  }
  3032  
  3033  // TestCanSendToFollower tests that the DistSender abides by the result it
  3034  // get from CanSendToFollower.
  3035  func TestCanSendToFollower(t *testing.T) {
  3036  	defer leaktest.AfterTest(t)()
  3037  	stopper := stop.NewStopper()
  3038  	defer stopper.Stop(context.Background())
  3039  
  3040  	old := CanSendToFollower
  3041  	defer func() { CanSendToFollower = old }()
  3042  	canSend := true
  3043  	CanSendToFollower = func(_ uuid.UUID, _ *cluster.Settings, ba roachpb.BatchRequest) bool {
  3044  		return !ba.IsLocking() && canSend
  3045  	}
  3046  
  3047  	clock := hlc.NewClock(hlc.UnixNano, time.Nanosecond)
  3048  	rpcContext := rpc.NewInsecureTestingContext(clock, stopper)
  3049  	g := makeGossip(t, stopper, rpcContext)
  3050  	leaseHolders := testUserRangeDescriptor3Replicas.InternalReplicas
  3051  	for _, n := range leaseHolders {
  3052  		if err := g.AddInfoProto(
  3053  			gossip.MakeNodeIDKey(n.NodeID),
  3054  			newNodeDesc(n.NodeID),
  3055  			gossip.NodeDescriptorTTL,
  3056  		); err != nil {
  3057  			t.Fatal(err)
  3058  		}
  3059  	}
  3060  	var sentTo ReplicaInfo
  3061  	var testFn simpleSendFn = func(
  3062  		_ context.Context,
  3063  		_ SendOptions,
  3064  		r ReplicaSlice,
  3065  		args roachpb.BatchRequest,
  3066  	) (*roachpb.BatchResponse, error) {
  3067  		sentTo = r[0]
  3068  		return args.CreateReply(), nil
  3069  	}
  3070  	cfg := DistSenderConfig{
  3071  		AmbientCtx: log.AmbientContext{Tracer: tracing.NewTracer()},
  3072  		Clock:      clock,
  3073  		RPCContext: rpcContext,
  3074  		TestingKnobs: ClientTestingKnobs{
  3075  			TransportFactory: adaptSimpleTransport(testFn),
  3076  		},
  3077  		RangeDescriptorDB: threeReplicaMockRangeDescriptorDB,
  3078  		NodeDialer:        nodedialer.New(rpcContext, gossip.AddressResolver(g)),
  3079  		RPCRetryOptions: &retry.Options{
  3080  			InitialBackoff: time.Microsecond,
  3081  			MaxBackoff:     time.Microsecond,
  3082  		},
  3083  		Settings: cluster.MakeTestingClusterSettings(),
  3084  	}
  3085  	for i, c := range []struct {
  3086  		canSendToFollower bool
  3087  		header            roachpb.Header
  3088  		msg               roachpb.Request
  3089  		expectedNode      roachpb.NodeID
  3090  	}{
  3091  		{
  3092  			true,
  3093  			roachpb.Header{
  3094  				Txn: &roachpb.Transaction{},
  3095  			},
  3096  			roachpb.NewPut(roachpb.Key("a"), roachpb.Value{}),
  3097  			2,
  3098  		},
  3099  		{
  3100  			true,
  3101  			roachpb.Header{
  3102  				Txn: &roachpb.Transaction{},
  3103  			},
  3104  			roachpb.NewGet(roachpb.Key("a")),
  3105  			1,
  3106  		},
  3107  		{
  3108  			true,
  3109  			roachpb.Header{},
  3110  			roachpb.NewGet(roachpb.Key("a")),
  3111  			1,
  3112  		},
  3113  		{
  3114  			false,
  3115  			roachpb.Header{},
  3116  			roachpb.NewGet(roachpb.Key("a")),
  3117  			2,
  3118  		},
  3119  	} {
  3120  		t.Run("", func(t *testing.T) {
  3121  			sentTo = ReplicaInfo{}
  3122  			canSend = c.canSendToFollower
  3123  			ds := NewDistSender(cfg, g)
  3124  			ds.clusterID = &base.ClusterIDContainer{}
  3125  			// set 2 to be the leaseholder
  3126  			ds.LeaseHolderCache().Update(context.Background(), 2 /* rangeID */, 2 /* storeID */)
  3127  			_, pErr := kv.SendWrappedWith(context.Background(), ds, c.header, c.msg)
  3128  			require.Nil(t, pErr)
  3129  			if sentTo.NodeID != c.expectedNode {
  3130  				t.Fatalf("%d: unexpected replica: %v != %v", i, sentTo.NodeID, c.expectedNode)
  3131  			}
  3132  			// Check that the leaseholder cache doesn't change, even if the request is
  3133  			// served by a follower. This tests a regression for a bug we've had where
  3134  			// we were always updating the leaseholder cache on successful RPCs
  3135  			// because we erroneously assumed that a success must come from the
  3136  			// leaseholder.
  3137  			storeID, ok := ds.LeaseHolderCache().Lookup(context.Background(), 2 /* rangeID */)
  3138  			require.True(t, ok)
  3139  			require.Equal(t, roachpb.StoreID(2), storeID)
  3140  		})
  3141  	}
  3142  }
  3143  
  3144  // TestEvictMetaRange tests that a query on a stale meta2 range should evict it
  3145  // from the cache.
  3146  func TestEvictMetaRange(t *testing.T) {
  3147  	defer leaktest.AfterTest(t)()
  3148  	stopper := stop.NewStopper()
  3149  	defer stopper.Stop(context.Background())
  3150  
  3151  	testutils.RunTrueAndFalse(t, "hasSuggestedRange", func(t *testing.T, hasSuggestedRange bool) {
  3152  		splitKey := keys.RangeMetaKey(roachpb.RKey("b"))
  3153  
  3154  		testMeta1RangeDescriptor := testMetaRangeDescriptor
  3155  		testMeta1RangeDescriptor.EndKey = roachpb.RKey(keys.Meta2Prefix)
  3156  
  3157  		testMeta2RangeDescriptor1 := testMetaRangeDescriptor
  3158  		testMeta2RangeDescriptor1.RangeID = 2
  3159  		testMeta2RangeDescriptor1.StartKey = roachpb.RKey(keys.Meta2Prefix)
  3160  
  3161  		testMeta2RangeDescriptor2 := testMetaRangeDescriptor
  3162  		testMeta2RangeDescriptor2.RangeID = 3
  3163  		testMeta2RangeDescriptor2.StartKey = roachpb.RKey(keys.Meta2Prefix)
  3164  
  3165  		testUserRangeDescriptor1 := roachpb.RangeDescriptor{
  3166  			RangeID:  4,
  3167  			StartKey: roachpb.RKey("a"),
  3168  			EndKey:   roachpb.RKey("b"),
  3169  			InternalReplicas: []roachpb.ReplicaDescriptor{
  3170  				{
  3171  					NodeID:  1,
  3172  					StoreID: 1,
  3173  				},
  3174  			},
  3175  		}
  3176  
  3177  		testUserRangeDescriptor2 := roachpb.RangeDescriptor{
  3178  			RangeID:  5,
  3179  			StartKey: roachpb.RKey("b"),
  3180  			EndKey:   roachpb.RKey("c"),
  3181  			InternalReplicas: []roachpb.ReplicaDescriptor{
  3182  				{
  3183  					NodeID:  1,
  3184  					StoreID: 1,
  3185  				},
  3186  			},
  3187  		}
  3188  
  3189  		clock := hlc.NewClock(hlc.UnixNano, time.Nanosecond)
  3190  		rpcContext := rpc.NewInsecureTestingContext(clock, stopper)
  3191  		g := makeGossip(t, stopper, rpcContext)
  3192  		if err := g.AddInfoProto(gossip.KeyFirstRangeDescriptor, &testMeta1RangeDescriptor, time.Hour); err != nil {
  3193  			t.Fatal(err)
  3194  		}
  3195  
  3196  		isStale := false
  3197  
  3198  		var testFn simpleSendFn = func(
  3199  			_ context.Context,
  3200  			_ SendOptions,
  3201  			_ ReplicaSlice,
  3202  			ba roachpb.BatchRequest,
  3203  		) (*roachpb.BatchResponse, error) {
  3204  			rs, err := keys.Range(ba.Requests)
  3205  			if err != nil {
  3206  				t.Fatal(err)
  3207  			}
  3208  			if !kv.TestingIsRangeLookup(ba) {
  3209  				return ba.CreateReply(), nil
  3210  			}
  3211  
  3212  			if bytes.HasPrefix(rs.Key, keys.Meta1Prefix) {
  3213  				// Querying meta 1 range.
  3214  				br := &roachpb.BatchResponse{}
  3215  				r := &roachpb.ScanResponse{}
  3216  				var kv roachpb.KeyValue
  3217  				if rs.Key.Equal(keys.RangeMetaKey(keys.RangeMetaKey(roachpb.RKey("a")).Next()).Next()) {
  3218  					// Scan request is [/Meta1/a - /Meta2), so return the first meta1
  3219  					// range.
  3220  					if err := kv.Value.SetProto(&testMeta2RangeDescriptor1); err != nil {
  3221  						t.Fatal(err)
  3222  					}
  3223  				} else {
  3224  					// Scan request is [/Meta1/b - /Meta2), so return the second meta1
  3225  					// range. This is needed when no SuggestedRange is returned from the
  3226  					// RangeKeyMismatch error and an additional lookup is needed to
  3227  					// determine the correct meta2 range descriptor.
  3228  					if err := kv.Value.SetProto(&testMeta2RangeDescriptor2); err != nil {
  3229  						t.Fatal(err)
  3230  					}
  3231  				}
  3232  				r.Rows = append(r.Rows, kv)
  3233  				br.Add(r)
  3234  				return br, nil
  3235  			}
  3236  			// Querying meta2 range.
  3237  			br := &roachpb.BatchResponse{}
  3238  			r := &roachpb.ScanResponse{}
  3239  			var kv roachpb.KeyValue
  3240  			if rs.Key.Equal(keys.RangeMetaKey(roachpb.RKey("a")).Next()) {
  3241  				// Scan request is [/Meta2/a - /Meta2/b), so return the first
  3242  				// user range descriptor.
  3243  				if err := kv.Value.SetProto(&testUserRangeDescriptor1); err != nil {
  3244  					t.Fatal(err)
  3245  				}
  3246  			} else if isStale {
  3247  				// Scan request is [/Meta2/b - /Meta2/c). Since we simulate a split of
  3248  				// [/Meta2 - /System) into [/Meta2 - /Meta2/a) and [/Meta2/b - /System)
  3249  				// and we sent the batch request to the stale cached meta2 range
  3250  				// descriptor [/Meta2 - /Meta2/a), we return a RangeKeyMismatchError. We
  3251  				// test for two cases here:
  3252  				// 1) The SuggestedRange is supplied and the correct meta2 range is
  3253  				//    directly inserted into the cache.
  3254  				// 2) The SuggestedRange is not supplied and we have to an additional
  3255  				//    lookup in meta1 to determine the correct meta2 range.
  3256  
  3257  				// Simulate a split.
  3258  				testMeta2RangeDescriptor1.EndKey = splitKey
  3259  				testMeta2RangeDescriptor2.StartKey = splitKey
  3260  				isStale = false
  3261  
  3262  				reply := ba.CreateReply()
  3263  				// Return a RangeKeyMismatchError to simulate the range being stale.
  3264  				err := &roachpb.RangeKeyMismatchError{
  3265  					RequestStartKey: rs.Key.AsRawKey(),
  3266  					RequestEndKey:   rs.EndKey.AsRawKey(),
  3267  					MismatchedRange: testMeta2RangeDescriptor1,
  3268  				}
  3269  				if hasSuggestedRange {
  3270  					err.SuggestedRange = &testMeta2RangeDescriptor2
  3271  				}
  3272  				reply.Error = roachpb.NewError(err)
  3273  				return reply, nil
  3274  			} else {
  3275  				// Scan request is [/Meta2/b - /Meta2/c) and the range descriptor is
  3276  				// not stale, so return the second user range descriptor.
  3277  				if err := kv.Value.SetProto(&testUserRangeDescriptor2); err != nil {
  3278  					t.Fatal(err)
  3279  				}
  3280  			}
  3281  			r.Rows = append(r.Rows, kv)
  3282  			br.Add(r)
  3283  			return br, nil
  3284  		}
  3285  
  3286  		cfg := DistSenderConfig{
  3287  			AmbientCtx: log.AmbientContext{Tracer: tracing.NewTracer()},
  3288  			Clock:      clock,
  3289  			RPCContext: rpcContext,
  3290  			TestingKnobs: ClientTestingKnobs{
  3291  				TransportFactory: adaptSimpleTransport(testFn),
  3292  			},
  3293  			NodeDialer: nodedialer.New(rpcContext, gossip.AddressResolver(g)),
  3294  			Settings:   cluster.MakeTestingClusterSettings(),
  3295  		}
  3296  		ds := NewDistSender(cfg, g)
  3297  
  3298  		scan := roachpb.NewScan(roachpb.Key("a"), roachpb.Key("b"), false)
  3299  		if _, pErr := kv.SendWrapped(context.Background(), ds, scan); pErr != nil {
  3300  			t.Fatalf("scan encountered error: %s", pErr)
  3301  		}
  3302  
  3303  		// Verify that there is one meta2 cached range.
  3304  		cachedRange := ds.rangeCache.GetCachedRangeDescriptor(keys.RangeMetaKey(roachpb.RKey("a")), false)
  3305  		if !cachedRange.StartKey.Equal(keys.Meta2Prefix) || !cachedRange.EndKey.Equal(testMetaEndKey) {
  3306  			t.Fatalf("expected cached meta2 range to be [%s, %s), actual [%s, %s)",
  3307  				keys.Meta2Prefix, testMetaEndKey, cachedRange.StartKey, cachedRange.EndKey)
  3308  		}
  3309  
  3310  		// Simulate a split on the meta2 range and mark it as stale.
  3311  		isStale = true
  3312  
  3313  		scan = roachpb.NewScan(roachpb.Key("b"), roachpb.Key("c"), false)
  3314  		if _, pErr := kv.SendWrapped(context.Background(), ds, scan); pErr != nil {
  3315  			t.Fatalf("scan encountered error: %s", pErr)
  3316  		}
  3317  
  3318  		// Verify that there are two meta2 cached ranges.
  3319  		cachedRange = ds.rangeCache.GetCachedRangeDescriptor(keys.RangeMetaKey(roachpb.RKey("a")), false)
  3320  		if !cachedRange.StartKey.Equal(keys.Meta2Prefix) || !cachedRange.EndKey.Equal(splitKey) {
  3321  			t.Fatalf("expected cached meta2 range to be [%s, %s), actual [%s, %s)",
  3322  				keys.Meta2Prefix, splitKey, cachedRange.StartKey, cachedRange.EndKey)
  3323  		}
  3324  		cachedRange = ds.rangeCache.GetCachedRangeDescriptor(keys.RangeMetaKey(roachpb.RKey("b")), false)
  3325  		if !cachedRange.StartKey.Equal(splitKey) || !cachedRange.EndKey.Equal(testMetaEndKey) {
  3326  			t.Fatalf("expected cached meta2 range to be [%s, %s), actual [%s, %s)",
  3327  				splitKey, testMetaEndKey, cachedRange.StartKey, cachedRange.EndKey)
  3328  		}
  3329  	})
  3330  }
  3331  
  3332  // TestConnectionClass verifies that the dist sender constructs a transport with
  3333  // the appropriate class for a given resolved range.
  3334  func TestConnectionClass(t *testing.T) {
  3335  	defer leaktest.AfterTest(t)()
  3336  	stopper := stop.NewStopper()
  3337  	defer stopper.Stop(context.Background())
  3338  	// Create a mock range descriptor DB that can resolve made up meta1, node
  3339  	// liveness and user ranges.
  3340  	rDB := MockRangeDescriptorDB(func(key roachpb.RKey, _ bool) (
  3341  		[]roachpb.RangeDescriptor, []roachpb.RangeDescriptor, error,
  3342  	) {
  3343  		if key.Equal(roachpb.KeyMin) {
  3344  			return []roachpb.RangeDescriptor{{
  3345  				RangeID:  1,
  3346  				StartKey: roachpb.RKeyMin,
  3347  				EndKey:   roachpb.RKey(keys.NodeLivenessPrefix),
  3348  				InternalReplicas: []roachpb.ReplicaDescriptor{
  3349  					{NodeID: 1, StoreID: 1},
  3350  				},
  3351  			}}, nil, nil
  3352  		} else if bytes.HasPrefix(key, keys.NodeLivenessPrefix) {
  3353  			return []roachpb.RangeDescriptor{{
  3354  				RangeID:  2,
  3355  				StartKey: roachpb.RKey(keys.NodeLivenessPrefix),
  3356  				EndKey:   roachpb.RKey(keys.NodeLivenessKeyMax),
  3357  				InternalReplicas: []roachpb.ReplicaDescriptor{
  3358  					{NodeID: 1, StoreID: 1},
  3359  				},
  3360  			}}, nil, nil
  3361  		}
  3362  		return []roachpb.RangeDescriptor{{
  3363  			RangeID:  3,
  3364  			StartKey: roachpb.RKey(keys.NodeLivenessKeyMax),
  3365  			EndKey:   roachpb.RKeyMax,
  3366  			InternalReplicas: []roachpb.ReplicaDescriptor{
  3367  				{NodeID: 1, StoreID: 1},
  3368  			},
  3369  		}}, nil, nil
  3370  	})
  3371  	// Verify that the request carries the class we expect it to for its span.
  3372  	verifyClass := func(class rpc.ConnectionClass, args roachpb.BatchRequest) {
  3373  		span, err := keys.Range(args.Requests)
  3374  		if assert.Nil(t, err) {
  3375  			assert.Equalf(t, rpc.ConnectionClassForKey(span.Key), class,
  3376  				"unexpected class for span key %v", span.Key)
  3377  		}
  3378  	}
  3379  	var testFn simpleSendFn = func(
  3380  		_ context.Context,
  3381  		opts SendOptions,
  3382  		replicas ReplicaSlice,
  3383  		args roachpb.BatchRequest,
  3384  	) (*roachpb.BatchResponse, error) {
  3385  		verifyClass(opts.class, args)
  3386  		return args.CreateReply(), nil
  3387  	}
  3388  	clock := hlc.NewClock(hlc.UnixNano, time.Nanosecond)
  3389  	rpcContext := rpc.NewInsecureTestingContext(clock, stopper)
  3390  	g := makeGossip(t, stopper, rpcContext)
  3391  	cfg := DistSenderConfig{
  3392  		AmbientCtx: log.AmbientContext{Tracer: tracing.NewTracer()},
  3393  		Clock:      clock,
  3394  		RPCContext: rpcContext,
  3395  		TestingKnobs: ClientTestingKnobs{
  3396  			TransportFactory: adaptSimpleTransport(testFn),
  3397  		},
  3398  		NodeDialer: nodedialer.New(rpcContext, gossip.AddressResolver(g)),
  3399  		RPCRetryOptions: &retry.Options{
  3400  			MaxRetries: 1,
  3401  		},
  3402  		RangeDescriptorDB: rDB,
  3403  		Settings:          cluster.MakeTestingClusterSettings(),
  3404  	}
  3405  	ds := NewDistSender(cfg, g)
  3406  
  3407  	// Check the three important cases to ensure they are sent with the correct
  3408  	// ConnectionClass.
  3409  	for _, key := range []roachpb.Key{
  3410  		keys.Meta1Prefix,
  3411  		keys.NodeLivenessKey(1),
  3412  		keys.SystemSQLCodec.TablePrefix(1234), // A non-system table
  3413  	} {
  3414  		t.Run(key.String(), func(t *testing.T) {
  3415  			var ba roachpb.BatchRequest
  3416  			ba.Add(&roachpb.GetRequest{
  3417  				RequestHeader: roachpb.RequestHeader{
  3418  					Key: key,
  3419  				},
  3420  			})
  3421  			_, err := ds.Send(context.Background(), ba)
  3422  			require.Nil(t, err)
  3423  		})
  3424  	}
  3425  }
  3426  
  3427  // TestEvictionTokenCoalesce tests when two separate batch requests are a part
  3428  // of the same stale range descriptor, they are coalesced when the range lookup
  3429  // is retried.
  3430  func TestEvictionTokenCoalesce(t *testing.T) {
  3431  	defer leaktest.AfterTest(t)()
  3432  	stopper := stop.NewStopper()
  3433  	defer stopper.Stop(context.Background())
  3434  
  3435  	initGen := int64(1)
  3436  	testUserRangeDescriptor := roachpb.RangeDescriptor{
  3437  		RangeID:  2,
  3438  		StartKey: roachpb.RKey("a"),
  3439  		EndKey:   roachpb.RKey("d"),
  3440  		InternalReplicas: []roachpb.ReplicaDescriptor{
  3441  			{
  3442  				NodeID:  1,
  3443  				StoreID: 1,
  3444  			},
  3445  		},
  3446  		Generation: initGen,
  3447  	}
  3448  
  3449  	clock := hlc.NewClock(hlc.UnixNano, time.Nanosecond)
  3450  	rpcContext := rpc.NewInsecureTestingContext(clock, stopper)
  3451  	g := makeGossip(t, stopper, rpcContext)
  3452  	if err := g.AddInfoProto(gossip.KeyFirstRangeDescriptor, &testMetaRangeDescriptor, time.Hour); err != nil {
  3453  		t.Fatal(err)
  3454  	}
  3455  
  3456  	sendErrors := int32(0)
  3457  	var queriedMetaKeys sync.Map
  3458  
  3459  	var ds *DistSender
  3460  	var testFn simpleSendFn = func(
  3461  		_ context.Context,
  3462  		_ SendOptions,
  3463  		_ ReplicaSlice,
  3464  		ba roachpb.BatchRequest,
  3465  	) (*roachpb.BatchResponse, error) {
  3466  		rs, err := keys.Range(ba.Requests)
  3467  		br := ba.CreateReply()
  3468  		if err != nil {
  3469  			br.Error = roachpb.NewError(err)
  3470  			return br, nil
  3471  		}
  3472  		if !kv.TestingIsRangeLookup(ba) {
  3473  			// Return a SendError so DistSender retries the first range lookup in the
  3474  			// user key-space for both batches.
  3475  			if atomic.AddInt32(&sendErrors, 1) <= 2 {
  3476  				br.Error = roachpb.NewError(&roachpb.SendError{})
  3477  				return br, nil
  3478  			}
  3479  			return br, nil
  3480  		}
  3481  
  3482  		if bytes.HasPrefix(rs.Key, keys.Meta1Prefix) {
  3483  			// Querying meta 1 range.
  3484  			br = &roachpb.BatchResponse{}
  3485  			r := &roachpb.ScanResponse{}
  3486  			var kv roachpb.KeyValue
  3487  			if err := kv.Value.SetProto(&testMetaRangeDescriptor); err != nil {
  3488  				br.Error = roachpb.NewError(err)
  3489  				return br, nil
  3490  			}
  3491  			r.Rows = append(r.Rows, kv)
  3492  			br.Add(r)
  3493  			return br, nil
  3494  		}
  3495  		// Querying meta2 range.
  3496  		br = &roachpb.BatchResponse{}
  3497  		r := &roachpb.ScanResponse{}
  3498  		var kv roachpb.KeyValue
  3499  		if err := kv.Value.SetProto(&testUserRangeDescriptor); err != nil {
  3500  			br.Error = roachpb.NewError(err)
  3501  			return br, nil
  3502  		}
  3503  		r.Rows = append(r.Rows, kv)
  3504  		br.Add(r)
  3505  		// The first query for each batch request key of the meta1 range should be
  3506  		// in separate requests because there is no prior eviction token.
  3507  		if _, ok := queriedMetaKeys.Load(string(rs.Key)); ok {
  3508  			// Wait until we have two in-flight requests.
  3509  			if err := testutils.SucceedsSoonError(func() error {
  3510  				// Since the previously fetched RangeDescriptor was ["a", "d"), the request keys
  3511  				// would be coalesced to "a".
  3512  				numCalls := ds.rangeCache.lookupRequests.NumCalls(fmt.Sprintf("a:false:%d", initGen))
  3513  				if numCalls != 2 {
  3514  					return errors.Errorf("expected %d in-flight requests, got %d", 2, numCalls)
  3515  				}
  3516  				return nil
  3517  			}); err != nil {
  3518  				br.Error = roachpb.NewError(err)
  3519  				return br, nil
  3520  			}
  3521  		}
  3522  		queriedMetaKeys.Store(string(rs.Key), struct{}{})
  3523  		return br, nil
  3524  	}
  3525  
  3526  	cfg := DistSenderConfig{
  3527  		AmbientCtx: log.AmbientContext{Tracer: tracing.NewTracer()},
  3528  		Clock:      clock,
  3529  		RPCContext: rpcContext,
  3530  		TestingKnobs: ClientTestingKnobs{
  3531  			TransportFactory: adaptSimpleTransport(testFn),
  3532  		},
  3533  		NodeDialer: nodedialer.New(rpcContext, gossip.AddressResolver(g)),
  3534  		RPCRetryOptions: &retry.Options{
  3535  			MaxRetries: 1,
  3536  		},
  3537  		Settings: cluster.MakeTestingClusterSettings(),
  3538  	}
  3539  	ds = NewDistSender(cfg, g)
  3540  
  3541  	var batchWaitGroup sync.WaitGroup
  3542  	putFn := func(key, value string) {
  3543  		defer batchWaitGroup.Done()
  3544  		put := roachpb.NewPut(roachpb.Key(key), roachpb.MakeValueFromString("c"))
  3545  		if _, pErr := kv.SendWrapped(context.Background(), ds, put); pErr != nil {
  3546  			t.Errorf("put encountered error: %s", pErr)
  3547  		}
  3548  	}
  3549  	batchWaitGroup.Add(2)
  3550  	go putFn("b", "b")
  3551  	go putFn("c", "c")
  3552  	batchWaitGroup.Wait()
  3553  }
  3554  
  3555  func TestDistSenderSlowLogMessage(t *testing.T) {
  3556  	defer leaktest.AfterTest(t)()
  3557  	const (
  3558  		dur      = 8158 * time.Millisecond
  3559  		attempts = 120
  3560  	)
  3561  	desc := &roachpb.RangeDescriptor{RangeID: 9, StartKey: roachpb.RKey("x")}
  3562  	{
  3563  		exp := `have been waiting 8.16s (120 attempts) for RPC to` +
  3564  			` r9:{-} [<no replicas>, next=0, gen=0]: boom`
  3565  		act := slowRangeRPCWarningStr(
  3566  			dur,
  3567  			120,
  3568  			desc,
  3569  			roachpb.NewError(errors.New("boom")))
  3570  
  3571  		require.Equal(t, exp, act)
  3572  	}
  3573  
  3574  	{
  3575  		exp := `slow RPC finished after 8.16s (120 attempts)`
  3576  		act := slowRangeRPCReturnWarningStr(dur, attempts)
  3577  		require.Equal(t, exp, act)
  3578  	}
  3579  }