github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/kv/kvclient/kvcoord/dist_sender_test.go (about) 1 // Copyright 2014 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package kvcoord 12 13 import ( 14 "bytes" 15 "context" 16 "fmt" 17 "reflect" 18 "regexp" 19 "sort" 20 "strconv" 21 "sync" 22 "sync/atomic" 23 "testing" 24 "time" 25 26 "github.com/cockroachdb/cockroach/pkg/base" 27 "github.com/cockroachdb/cockroach/pkg/config/zonepb" 28 "github.com/cockroachdb/cockroach/pkg/gossip" 29 "github.com/cockroachdb/cockroach/pkg/gossip/simulation" 30 "github.com/cockroachdb/cockroach/pkg/keys" 31 "github.com/cockroachdb/cockroach/pkg/kv" 32 "github.com/cockroachdb/cockroach/pkg/roachpb" 33 "github.com/cockroachdb/cockroach/pkg/rpc" 34 "github.com/cockroachdb/cockroach/pkg/rpc/nodedialer" 35 "github.com/cockroachdb/cockroach/pkg/settings/cluster" 36 "github.com/cockroachdb/cockroach/pkg/testutils" 37 "github.com/cockroachdb/cockroach/pkg/util" 38 "github.com/cockroachdb/cockroach/pkg/util/hlc" 39 "github.com/cockroachdb/cockroach/pkg/util/leaktest" 40 "github.com/cockroachdb/cockroach/pkg/util/log" 41 "github.com/cockroachdb/cockroach/pkg/util/metric" 42 "github.com/cockroachdb/cockroach/pkg/util/retry" 43 "github.com/cockroachdb/cockroach/pkg/util/stop" 44 "github.com/cockroachdb/cockroach/pkg/util/tracing" 45 "github.com/cockroachdb/cockroach/pkg/util/uuid" 46 "github.com/cockroachdb/errors" 47 "github.com/stretchr/testify/assert" 48 "github.com/stretchr/testify/require" 49 ) 50 51 var ( 52 // 53 // Meta RangeDescriptors 54 // 55 testMetaEndKey = roachpb.RKey(keys.SystemPrefix) 56 // single meta1 and meta2 range with one replica. 57 testMetaRangeDescriptor = roachpb.RangeDescriptor{ 58 RangeID: 1, 59 StartKey: roachpb.RKeyMin, 60 EndKey: testMetaEndKey, 61 InternalReplicas: []roachpb.ReplicaDescriptor{ 62 { 63 NodeID: 1, 64 StoreID: 1, 65 }, 66 }, 67 } 68 69 // 70 // User-Space RangeDescriptors 71 // 72 // single user-space descriptor with one replica. 73 testUserRangeDescriptor = roachpb.RangeDescriptor{ 74 RangeID: 2, 75 StartKey: testMetaEndKey, 76 EndKey: roachpb.RKeyMax, 77 InternalReplicas: []roachpb.ReplicaDescriptor{ 78 { 79 NodeID: 1, 80 StoreID: 1, 81 }, 82 }, 83 } 84 // single user-space descriptor with three replicas. 85 testUserRangeDescriptor3Replicas = roachpb.RangeDescriptor{ 86 RangeID: 2, 87 StartKey: testMetaEndKey, 88 EndKey: roachpb.RKeyMax, 89 InternalReplicas: []roachpb.ReplicaDescriptor{ 90 { 91 NodeID: 1, 92 StoreID: 1, 93 }, 94 { 95 NodeID: 2, 96 StoreID: 2, 97 }, 98 { 99 NodeID: 3, 100 StoreID: 3, 101 }, 102 }, 103 } 104 ) 105 106 var testAddress = util.NewUnresolvedAddr("tcp", "node1") 107 108 // simpleSendFn is the function type used to dispatch RPC calls in simpleTransportAdapter 109 type simpleSendFn func( 110 context.Context, 111 SendOptions, 112 ReplicaSlice, 113 roachpb.BatchRequest, 114 ) (*roachpb.BatchResponse, error) 115 116 // stubRPCSendFn is an rpcSendFn that simply creates a reply for the 117 // BatchRequest without performing an RPC call or triggering any 118 // test instrumentation. 119 var stubRPCSendFn simpleSendFn = func( 120 _ context.Context, _ SendOptions, _ ReplicaSlice, args roachpb.BatchRequest, 121 ) (*roachpb.BatchResponse, error) { 122 return args.CreateReply(), nil 123 } 124 125 // adaptSimpleTransport converts the RPCSend functions used in these 126 // tests to the newer transport interface. 127 func adaptSimpleTransport(fn simpleSendFn) TransportFactory { 128 return func( 129 opts SendOptions, 130 nodeDialer *nodedialer.Dialer, 131 replicas ReplicaSlice, 132 ) (Transport, error) { 133 return &simpleTransportAdapter{ 134 fn: fn, 135 opts: opts, 136 replicas: replicas}, nil 137 } 138 } 139 140 type simpleTransportAdapter struct { 141 fn simpleSendFn 142 opts SendOptions 143 replicas ReplicaSlice 144 nextReplica int 145 } 146 147 func (l *simpleTransportAdapter) IsExhausted() bool { 148 return l.nextReplica >= len(l.replicas) 149 } 150 151 func (l *simpleTransportAdapter) SendNext( 152 ctx context.Context, ba roachpb.BatchRequest, 153 ) (*roachpb.BatchResponse, error) { 154 ba.Replica = l.replicas[l.nextReplica].ReplicaDescriptor 155 l.nextReplica++ 156 return l.fn(ctx, l.opts, l.replicas, ba) 157 } 158 159 func (l *simpleTransportAdapter) NextInternalClient( 160 ctx context.Context, 161 ) (context.Context, roachpb.InternalClient, error) { 162 panic("unimplemented") 163 } 164 165 func (l *simpleTransportAdapter) NextReplica() roachpb.ReplicaDescriptor { 166 if !l.IsExhausted() { 167 return l.replicas[l.nextReplica].ReplicaDescriptor 168 } 169 return roachpb.ReplicaDescriptor{} 170 } 171 172 func (*simpleTransportAdapter) MoveToFront(roachpb.ReplicaDescriptor) { 173 } 174 175 func makeGossip(t *testing.T, stopper *stop.Stopper, rpcContext *rpc.Context) *gossip.Gossip { 176 server := rpc.NewServer(rpcContext) 177 178 const nodeID = 1 179 g := gossip.NewTest(nodeID, rpcContext, server, stopper, metric.NewRegistry(), zonepb.DefaultZoneConfigRef()) 180 if err := g.SetNodeDescriptor(newNodeDesc(nodeID)); err != nil { 181 t.Fatal(err) 182 } 183 if err := g.AddInfo(gossip.KeySentinel, nil, time.Hour); err != nil { 184 t.Fatal(err) 185 } 186 187 return g 188 } 189 190 func newNodeDesc(nodeID roachpb.NodeID) *roachpb.NodeDescriptor { 191 return &roachpb.NodeDescriptor{ 192 NodeID: nodeID, 193 Address: util.MakeUnresolvedAddr("tcp", fmt.Sprintf("invalid.invalid:%d", nodeID)), 194 } 195 } 196 197 // TestSendRPCOrder verifies that sendRPC correctly takes into account the 198 // lease holder, attributes and required consistency to determine where to send 199 // remote requests. 200 func TestSendRPCOrder(t *testing.T) { 201 defer leaktest.AfterTest(t)() 202 stopper := stop.NewStopper() 203 ctx := context.Background() 204 defer stopper.Stop(ctx) 205 206 clock := hlc.NewClock(hlc.UnixNano, time.Nanosecond) 207 rpcContext := rpc.NewInsecureTestingContext(clock, stopper) 208 g := makeGossip(t, stopper, rpcContext) 209 rangeID := roachpb.RangeID(99) 210 211 nodeTiers := map[int32][]roachpb.Tier{ 212 1: {}, // The local node, set in each test case. 213 2: {roachpb.Tier{Key: "country", Value: "us"}, roachpb.Tier{Key: "region", Value: "west"}}, 214 3: {roachpb.Tier{Key: "country", Value: "eu"}, roachpb.Tier{Key: "city", Value: "dublin"}}, 215 4: {roachpb.Tier{Key: "country", Value: "us"}, roachpb.Tier{Key: "region", Value: "east"}, roachpb.Tier{Key: "city", Value: "nyc"}}, 216 5: {roachpb.Tier{Key: "country", Value: "us"}, roachpb.Tier{Key: "region", Value: "east"}, roachpb.Tier{Key: "city", Value: "mia"}}, 217 } 218 219 // Gets filled below to identify the replica by its address. 220 makeVerifier := func(expAddrs []roachpb.NodeID) func(SendOptions, ReplicaSlice) error { 221 return func(o SendOptions, replicas ReplicaSlice) error { 222 var actualAddrs []roachpb.NodeID 223 for i, r := range replicas { 224 if len(expAddrs) <= i { 225 return errors.Errorf("got unexpected address: %s", r.NodeDesc.Address) 226 } 227 if expAddrs[i] == 0 { 228 actualAddrs = append(actualAddrs, 0) 229 } else { 230 actualAddrs = append(actualAddrs, r.NodeDesc.NodeID) 231 } 232 } 233 if !reflect.DeepEqual(expAddrs, actualAddrs) { 234 return errors.Errorf("expected %d, but found %d", expAddrs, actualAddrs) 235 } 236 return nil 237 } 238 } 239 240 testCases := []struct { 241 args roachpb.Request 242 tiers []roachpb.Tier 243 expReplica []roachpb.NodeID 244 leaseHolder int32 // 0 for not caching a lease holder. 245 // Naming is somewhat off, as eventually consistent reads usually 246 // do not have to go to the lease holder when a node has a read lease. 247 // Would really want CONSENSUS here, but that is not implemented. 248 // Likely a test setup here will never have a read lease, but good 249 // to keep in mind. 250 consistent bool 251 }{ 252 // Inconsistent Scan without matching attributes. 253 { 254 args: &roachpb.ScanRequest{}, 255 tiers: []roachpb.Tier{}, 256 expReplica: []roachpb.NodeID{1, 2, 3, 4, 5}, 257 }, 258 // Inconsistent Scan with matching attributes. 259 // Should move the two nodes matching the attributes to the front and 260 // go stable. 261 { 262 args: &roachpb.ScanRequest{}, 263 tiers: nodeTiers[5], 264 // Compare only the first two resulting addresses. 265 expReplica: []roachpb.NodeID{5, 4, 0, 0, 0}, 266 }, 267 268 // Scan without matching attributes that requires but does not find 269 // a lease holder. 270 { 271 args: &roachpb.ScanRequest{}, 272 tiers: []roachpb.Tier{}, 273 expReplica: []roachpb.NodeID{1, 2, 3, 4, 5}, 274 consistent: true, 275 }, 276 // Put without matching attributes that requires but does not find lease holder. 277 // Should go random and not change anything. 278 { 279 args: &roachpb.PutRequest{}, 280 tiers: []roachpb.Tier{{Key: "nomatch", Value: ""}}, 281 expReplica: []roachpb.NodeID{1, 2, 3, 4, 5}, 282 }, 283 // Put with matching attributes but no lease holder. 284 // Should move the two nodes matching the attributes to the front. 285 { 286 args: &roachpb.PutRequest{}, 287 tiers: append(nodeTiers[5], roachpb.Tier{Key: "irrelevant", Value: ""}), 288 // Compare only the first two resulting addresses. 289 expReplica: []roachpb.NodeID{5, 4, 0, 0, 0}, 290 }, 291 // Put with matching attributes that finds the lease holder (node 3). 292 // Should address the lease holder and the two nodes matching the attributes 293 // (the last and second to last) in that order. 294 { 295 args: &roachpb.PutRequest{}, 296 tiers: append(nodeTiers[5], roachpb.Tier{Key: "irrelevant", Value: ""}), 297 // Compare only the first resulting address as we have a lease holder 298 // and that means we're only trying to send there. 299 expReplica: []roachpb.NodeID{2, 0, 0, 0, 0}, 300 leaseHolder: 2, 301 }, 302 // Inconsistent Get without matching attributes but lease holder (node 3). Should just 303 // go random as the lease holder does not matter. 304 { 305 args: &roachpb.GetRequest{}, 306 tiers: []roachpb.Tier{}, 307 expReplica: []roachpb.NodeID{1, 2, 3, 4, 5}, 308 leaseHolder: 2, 309 }, 310 } 311 312 descriptor := roachpb.RangeDescriptor{ 313 StartKey: roachpb.RKeyMin, 314 EndKey: roachpb.RKeyMax, 315 RangeID: rangeID, 316 NextReplicaID: 1, 317 } 318 for i := int32(1); i <= 5; i++ { 319 addr := util.MakeUnresolvedAddr("tcp", fmt.Sprintf("node%d:1", i)) 320 nd := &roachpb.NodeDescriptor{ 321 NodeID: roachpb.NodeID(i), 322 Address: util.MakeUnresolvedAddr(addr.Network(), addr.String()), 323 Locality: roachpb.Locality{ 324 Tiers: nodeTiers[i], 325 }, 326 } 327 if err := g.AddInfoProto(gossip.MakeNodeIDKey(roachpb.NodeID(i)), nd, time.Hour); err != nil { 328 t.Fatal(err) 329 } 330 descriptor.AddReplica(roachpb.NodeID(i), roachpb.StoreID(i), roachpb.VOTER_FULL) 331 } 332 333 // Stub to be changed in each test case. 334 var verifyCall func(SendOptions, ReplicaSlice) error 335 336 var testFn simpleSendFn = func( 337 _ context.Context, 338 opts SendOptions, 339 replicas ReplicaSlice, 340 args roachpb.BatchRequest, 341 ) (*roachpb.BatchResponse, error) { 342 if err := verifyCall(opts, replicas); err != nil { 343 return nil, err 344 } 345 return args.CreateReply(), nil 346 } 347 348 cfg := DistSenderConfig{ 349 AmbientCtx: log.AmbientContext{Tracer: tracing.NewTracer()}, 350 Clock: clock, 351 RPCContext: rpcContext, 352 TestingKnobs: ClientTestingKnobs{ 353 TransportFactory: adaptSimpleTransport(testFn), 354 }, 355 RangeDescriptorDB: mockRangeDescriptorDBForDescs(descriptor), 356 NodeDialer: nodedialer.New(rpcContext, gossip.AddressResolver(g)), 357 Settings: cluster.MakeTestingClusterSettings(), 358 } 359 360 ds := NewDistSender(cfg, g) 361 362 for n, tc := range testCases { 363 t.Run("", func(t *testing.T) { 364 verifyCall = makeVerifier(tc.expReplica) 365 366 { 367 // The local node needs to get its attributes during sendRPC. 368 nd := &roachpb.NodeDescriptor{ 369 NodeID: 6, 370 Address: util.MakeUnresolvedAddr("tcp", fmt.Sprintf("invalid.invalid:6")), 371 Locality: roachpb.Locality{ 372 Tiers: tc.tiers, 373 }, 374 } 375 g.NodeID.Reset(nd.NodeID) 376 if err := g.SetNodeDescriptor(nd); err != nil { 377 t.Fatal(err) 378 } 379 } 380 381 ds.leaseHolderCache.Update( 382 ctx, rangeID, roachpb.StoreID(0), 383 ) 384 if tc.leaseHolder > 0 { 385 ds.leaseHolderCache.Update( 386 ctx, rangeID, descriptor.InternalReplicas[tc.leaseHolder-1].StoreID, 387 ) 388 } 389 390 args := tc.args 391 { 392 header := args.Header() 393 header.Key = roachpb.Key("a") 394 args.SetHeader(header) 395 } 396 if roachpb.IsRange(args) { 397 header := args.Header() 398 header.EndKey = args.Header().Key.Next() 399 args.SetHeader(header) 400 } 401 consistency := roachpb.CONSISTENT 402 if !tc.consistent { 403 consistency = roachpb.INCONSISTENT 404 } 405 // Kill the cached NodeDescriptor, enforcing a lookup from Gossip. 406 ds.nodeDescriptor = nil 407 if _, err := kv.SendWrappedWith(ctx, ds, roachpb.Header{ 408 RangeID: rangeID, // Not used in this test, but why not. 409 ReadConsistency: consistency, 410 }, args); err != nil { 411 t.Errorf("%d: %s", n, err) 412 } 413 }) 414 } 415 } 416 417 // MockRangeDescriptorDB is an implementation of RangeDescriptorDB. Unlike 418 // DistSender's implementation, MockRangeDescriptorDB does not call back into 419 // the RangeDescriptorCache by default to perform RangeLookups. Because of this, 420 // tests should not rely on that behavior and should implement it themselves if 421 // they need it. 422 type MockRangeDescriptorDB func(roachpb.RKey, bool) (rs, preRs []roachpb.RangeDescriptor, err error) 423 424 func (mdb MockRangeDescriptorDB) RangeLookup( 425 ctx context.Context, key roachpb.RKey, useReverseScan bool, 426 ) ([]roachpb.RangeDescriptor, []roachpb.RangeDescriptor, error) { 427 return mdb(key, useReverseScan) 428 } 429 430 func (mdb MockRangeDescriptorDB) FirstRange() (*roachpb.RangeDescriptor, error) { 431 rs, _, err := mdb(roachpb.RKey(roachpb.KeyMin), false) 432 if err != nil || len(rs) == 0 { 433 return nil, err 434 } 435 return &rs[0], nil 436 } 437 438 // withMetaRecursion returns a new MockRangeDescriptorDB that will behave the 439 // same as the receiver, but will also recurse into the provided 440 // RangeDescriptorCache on each lookup to simulate the use of a descriptor's 441 // parent descriptor during the RangeLookup scan. This is important for tests 442 // that expect the RangeLookup for a user space descriptor to trigger a lookup 443 // for a meta descriptor. 444 func (mdb MockRangeDescriptorDB) withMetaRecursion( 445 rdc *RangeDescriptorCache, 446 ) MockRangeDescriptorDB { 447 return func(key roachpb.RKey, useReverseScan bool) (rs, preRs []roachpb.RangeDescriptor, err error) { 448 metaKey := keys.RangeMetaKey(key) 449 if !metaKey.Equal(roachpb.RKeyMin) { 450 _, _, err := rdc.LookupRangeDescriptorWithEvictionToken(context.Background(), metaKey, nil, useReverseScan) 451 if err != nil { 452 return nil, nil, err 453 } 454 } 455 return mdb(key, useReverseScan) 456 } 457 } 458 459 // withMetaRecursion calls MockRangeDescriptorDB.withMetaRecursion on the 460 // DistSender's RangeDescriptorDB. 461 func (ds *DistSender) withMetaRecursion() *DistSender { 462 ds.rangeCache.db = ds.rangeCache.db.(MockRangeDescriptorDB).withMetaRecursion(ds.rangeCache) 463 return ds 464 } 465 466 func mockRangeDescriptorDBForDescs(descs ...roachpb.RangeDescriptor) MockRangeDescriptorDB { 467 return MockRangeDescriptorDB(func(key roachpb.RKey, useReverseScan bool) ([]roachpb.RangeDescriptor, []roachpb.RangeDescriptor, error) { 468 var matchingDescs []roachpb.RangeDescriptor 469 for _, desc := range descs { 470 contains := desc.ContainsKey 471 if useReverseScan { 472 contains = desc.ContainsKeyInverted 473 } 474 if contains(key) { 475 matchingDescs = append(matchingDescs, desc) 476 } 477 } 478 switch len(matchingDescs) { 479 case 0: 480 panic(fmt.Sprintf("found no matching descriptors for key %s", key)) 481 case 1: 482 return matchingDescs, nil, nil 483 default: 484 panic(fmt.Sprintf("found multiple matching descriptors for key %s: %v", key, matchingDescs)) 485 } 486 }) 487 } 488 489 var defaultMockRangeDescriptorDB = mockRangeDescriptorDBForDescs( 490 testMetaRangeDescriptor, 491 testUserRangeDescriptor, 492 ) 493 var threeReplicaMockRangeDescriptorDB = mockRangeDescriptorDBForDescs( 494 testMetaRangeDescriptor, 495 testUserRangeDescriptor3Replicas, 496 ) 497 498 func TestImmutableBatchArgs(t *testing.T) { 499 defer leaktest.AfterTest(t)() 500 stopper := stop.NewStopper() 501 defer stopper.Stop(context.Background()) 502 503 clock := hlc.NewClock(hlc.UnixNano, time.Nanosecond) 504 rpcContext := rpc.NewInsecureTestingContext(clock, stopper) 505 g := makeGossip(t, stopper, rpcContext) 506 var testFn simpleSendFn = func( 507 _ context.Context, 508 _ SendOptions, 509 _ ReplicaSlice, 510 args roachpb.BatchRequest, 511 ) (*roachpb.BatchResponse, error) { 512 reply := args.CreateReply() 513 reply.Txn = args.Txn.Clone() 514 reply.Txn.WriteTimestamp = hlc.MaxTimestamp 515 return reply, nil 516 } 517 518 cfg := DistSenderConfig{ 519 AmbientCtx: log.AmbientContext{Tracer: tracing.NewTracer()}, 520 Clock: clock, 521 RPCContext: rpcContext, 522 TestingKnobs: ClientTestingKnobs{ 523 TransportFactory: adaptSimpleTransport(testFn), 524 }, 525 RangeDescriptorDB: defaultMockRangeDescriptorDB, 526 NodeDialer: nodedialer.New(rpcContext, gossip.AddressResolver(g)), 527 Settings: cluster.MakeTestingClusterSettings(), 528 } 529 530 ds := NewDistSender(cfg, g) 531 532 txn := roachpb.MakeTransaction( 533 "test", nil /* baseKey */, roachpb.NormalUserPriority, 534 clock.Now(), clock.MaxOffset().Nanoseconds(), 535 ) 536 origTxnTs := txn.WriteTimestamp 537 538 // An optimization does copy-on-write if we haven't observed anything, 539 // so make sure we're not in that case. 540 txn.UpdateObservedTimestamp(1, hlc.MaxTimestamp) 541 542 put := roachpb.NewPut(roachpb.Key("don't"), roachpb.Value{}) 543 if _, pErr := kv.SendWrappedWith(context.Background(), ds, roachpb.Header{ 544 Txn: &txn, 545 }, put); pErr != nil { 546 t.Fatal(pErr) 547 } 548 549 if txn.WriteTimestamp != origTxnTs { 550 t.Fatal("Transaction was mutated by DistSender") 551 } 552 } 553 554 // TestRetryOnNotLeaseHolderError verifies that the DistSender correctly updates the 555 // lease holder cache and retries when receiving a NotLeaseHolderError. 556 func TestRetryOnNotLeaseHolderError(t *testing.T) { 557 defer leaktest.AfterTest(t)() 558 stopper := stop.NewStopper() 559 defer stopper.Stop(context.Background()) 560 561 clock := hlc.NewClock(hlc.UnixNano, time.Nanosecond) 562 rpcContext := rpc.NewInsecureTestingContext(clock, stopper) 563 g := makeGossip(t, stopper, rpcContext) 564 leaseHolder := roachpb.ReplicaDescriptor{ 565 NodeID: 99, 566 StoreID: 999, 567 } 568 first := true 569 570 var testFn simpleSendFn = func( 571 _ context.Context, 572 _ SendOptions, 573 _ ReplicaSlice, 574 args roachpb.BatchRequest, 575 ) (*roachpb.BatchResponse, error) { 576 reply := &roachpb.BatchResponse{} 577 if first { 578 reply.Error = roachpb.NewError( 579 &roachpb.NotLeaseHolderError{LeaseHolder: &leaseHolder}) 580 first = false 581 return reply, nil 582 } 583 // Return an error to avoid activating a code path that would 584 // populate the leaseholder cache from the successful response. 585 // That's not what this test wants to test. 586 reply.Error = roachpb.NewErrorf("boom") 587 return reply, nil 588 } 589 590 cfg := DistSenderConfig{ 591 AmbientCtx: log.AmbientContext{Tracer: tracing.NewTracer()}, 592 Clock: clock, 593 RPCContext: rpcContext, 594 TestingKnobs: ClientTestingKnobs{ 595 TransportFactory: adaptSimpleTransport(testFn), 596 }, 597 RangeDescriptorDB: defaultMockRangeDescriptorDB, 598 NodeDialer: nodedialer.New(rpcContext, gossip.AddressResolver(g)), 599 Settings: cluster.MakeTestingClusterSettings(), 600 } 601 ds := NewDistSender(cfg, g) 602 v := roachpb.MakeValueFromString("value") 603 put := roachpb.NewPut(roachpb.Key("a"), v) 604 if _, pErr := kv.SendWrapped(context.Background(), ds, put); !testutils.IsPError(pErr, "boom") { 605 t.Fatalf("unexpected error: %v", pErr) 606 } 607 if first { 608 t.Errorf("The command did not retry") 609 } 610 rangeID := roachpb.RangeID(2) 611 if cur, ok := ds.leaseHolderCache.Lookup(context.Background(), rangeID); !ok { 612 t.Errorf("lease holder cache was not updated: expected %+v", leaseHolder) 613 } else if cur != leaseHolder.StoreID { 614 t.Errorf("lease holder cache was not updated: expected %d, got %d", leaseHolder.StoreID, cur) 615 } 616 } 617 618 // TestBackoffOnNotLeaseHolderErrorDuringTransfer verifies that the DistSender 619 // backs off upon receiving multiple NotLeaseHolderErrors without observing an 620 // increase in LeaseSequence. 621 func TestBackoffOnNotLeaseHolderErrorDuringTransfer(t *testing.T) { 622 defer leaktest.AfterTest(t)() 623 stopper := stop.NewStopper() 624 defer stopper.Stop(context.Background()) 625 626 clock := hlc.NewClock(hlc.UnixNano, time.Nanosecond) 627 rpcContext := rpc.NewInsecureTestingContext(clock, stopper) 628 g := makeGossip(t, stopper, rpcContext) 629 leaseHolders := testUserRangeDescriptor3Replicas.InternalReplicas 630 for _, n := range leaseHolders { 631 if err := g.AddInfoProto( 632 gossip.MakeNodeIDKey(n.NodeID), 633 newNodeDesc(n.NodeID), 634 gossip.NodeDescriptorTTL, 635 ); err != nil { 636 t.Fatal(err) 637 } 638 } 639 var sequences []roachpb.LeaseSequence 640 var testFn simpleSendFn = func( 641 _ context.Context, 642 _ SendOptions, 643 _ ReplicaSlice, 644 args roachpb.BatchRequest, 645 ) (*roachpb.BatchResponse, error) { 646 reply := &roachpb.BatchResponse{} 647 if len(sequences) > 0 { 648 seq := sequences[0] 649 sequences = sequences[1:] 650 lease := roachpb.Lease{ 651 Sequence: seq, 652 Replica: leaseHolders[int(seq)%2], 653 } 654 reply.Error = roachpb.NewError( 655 &roachpb.NotLeaseHolderError{ 656 Replica: leaseHolders[int(seq)%2], 657 LeaseHolder: &leaseHolders[(int(seq)+1)%2], 658 Lease: &lease, 659 }) 660 return reply, nil 661 } 662 // Return an error to bail out of retries. 663 reply.Error = roachpb.NewErrorf("boom") 664 return reply, nil 665 } 666 667 cfg := DistSenderConfig{ 668 AmbientCtx: log.AmbientContext{Tracer: tracing.NewTracer()}, 669 Clock: clock, 670 RPCContext: rpcContext, 671 TestingKnobs: ClientTestingKnobs{ 672 TransportFactory: adaptSimpleTransport(testFn), 673 }, 674 RangeDescriptorDB: threeReplicaMockRangeDescriptorDB, 675 NodeDialer: nodedialer.New(rpcContext, gossip.AddressResolver(g)), 676 RPCRetryOptions: &retry.Options{ 677 InitialBackoff: time.Microsecond, 678 MaxBackoff: time.Microsecond, 679 }, 680 Settings: cluster.MakeTestingClusterSettings(), 681 } 682 for i, c := range []struct { 683 leaseSequences []roachpb.LeaseSequence 684 expected int64 685 }{ 686 {[]roachpb.LeaseSequence{1, 0, 1, 2}, 2}, 687 {[]roachpb.LeaseSequence{0}, 0}, 688 {[]roachpb.LeaseSequence{1, 0, 1, 2, 1}, 3}, 689 } { 690 sequences = c.leaseSequences 691 ds := NewDistSender(cfg, g) 692 v := roachpb.MakeValueFromString("value") 693 put := roachpb.NewPut(roachpb.Key("a"), v) 694 if _, pErr := kv.SendWrapped(context.Background(), ds, put); !testutils.IsPError(pErr, "boom") { 695 t.Fatalf("%d: unexpected error: %v", i, pErr) 696 } 697 if got := ds.Metrics().InLeaseTransferBackoffs.Count(); got != c.expected { 698 t.Fatalf("%d: expected %d backoffs, got %d", i, c.expected, got) 699 } 700 } 701 } 702 703 // This test verifies that when we have a cached leaseholder that is down 704 // it is ejected from the cache. 705 func TestDistSenderDownNodeEvictLeaseholder(t *testing.T) { 706 defer leaktest.AfterTest(t)() 707 708 ctx := context.Background() 709 stopper := stop.NewStopper() 710 defer stopper.Stop(ctx) 711 712 clock := hlc.NewClock(hlc.UnixNano, time.Nanosecond) 713 rpcContext := rpc.NewInsecureTestingContext(clock, stopper) 714 g := makeGossip(t, stopper, rpcContext) 715 if err := g.AddInfoProto( 716 gossip.MakeNodeIDKey(roachpb.NodeID(2)), 717 newNodeDesc(2), 718 gossip.NodeDescriptorTTL, 719 ); err != nil { 720 t.Fatal(err) 721 } 722 723 var contacted1, contacted2 bool 724 725 transport := func( 726 ctx context.Context, 727 opts SendOptions, 728 replicas ReplicaSlice, 729 ba roachpb.BatchRequest, 730 ) (*roachpb.BatchResponse, error) { 731 switch ba.Replica.StoreID { 732 case 1: 733 contacted1 = true 734 return nil, errors.New("mock RPC error") 735 case 2: 736 contacted2 = true 737 return ba.CreateReply(), nil 738 default: 739 panic("unexpected replica: " + ba.Replica.String()) 740 } 741 } 742 743 cfg := DistSenderConfig{ 744 AmbientCtx: log.AmbientContext{Tracer: tracing.NewTracer()}, 745 Clock: clock, 746 RPCContext: rpcContext, 747 TestingKnobs: ClientTestingKnobs{ 748 TransportFactory: adaptSimpleTransport(transport), 749 }, 750 RangeDescriptorDB: mockRangeDescriptorDBForDescs( 751 roachpb.RangeDescriptor{ 752 RangeID: 1, 753 StartKey: roachpb.RKeyMin, 754 EndKey: roachpb.RKeyMax, 755 InternalReplicas: []roachpb.ReplicaDescriptor{ 756 { 757 NodeID: 1, 758 StoreID: 1, 759 }, 760 { 761 NodeID: 2, 762 StoreID: 2, 763 }, 764 }, 765 }), 766 NodeDialer: nodedialer.New(rpcContext, gossip.AddressResolver(g)), 767 Settings: cluster.MakeTestingClusterSettings(), 768 } 769 770 ds := NewDistSender(cfg, g) 771 ds.LeaseHolderCache().Update(ctx, roachpb.RangeID(1), roachpb.StoreID(1)) 772 773 var ba roachpb.BatchRequest 774 ba.RangeID = 1 775 get := &roachpb.GetRequest{} 776 get.Key = roachpb.Key("a") 777 ba.Add(get) 778 779 if _, pErr := ds.Send(ctx, ba); pErr != nil { 780 t.Fatal(pErr) 781 } 782 783 if !contacted1 || !contacted2 { 784 t.Errorf("contacted n1: %t, contacted n2: %t", contacted1, contacted2) 785 } 786 787 if storeID, ok := ds.LeaseHolderCache().Lookup(ctx, roachpb.RangeID(1)); !ok { 788 t.Fatalf("expected new leaseholder to be cached") 789 } else if exp := roachpb.StoreID(2); storeID != exp { 790 t.Fatalf("expected lease holder for r1 to be cached as s%d, but got s%d", exp, storeID) 791 } 792 } 793 794 // TestRetryOnDescriptorLookupError verifies that the DistSender retries a descriptor 795 // lookup on any error. 796 func TestRetryOnDescriptorLookupError(t *testing.T) { 797 defer leaktest.AfterTest(t)() 798 stopper := stop.NewStopper() 799 defer stopper.Stop(context.Background()) 800 801 clock := hlc.NewClock(hlc.UnixNano, time.Nanosecond) 802 rpcContext := rpc.NewInsecureTestingContext(clock, stopper) 803 g := makeGossip(t, stopper, rpcContext) 804 805 errs := []error{ 806 errors.New("boom"), 807 nil, 808 } 809 810 cfg := DistSenderConfig{ 811 AmbientCtx: log.AmbientContext{Tracer: tracing.NewTracer()}, 812 Clock: clock, 813 RPCContext: rpcContext, 814 TestingKnobs: ClientTestingKnobs{ 815 TransportFactory: adaptSimpleTransport(stubRPCSendFn), 816 }, 817 RangeDescriptorDB: MockRangeDescriptorDB(func(key roachpb.RKey, _ bool) ([]roachpb.RangeDescriptor, []roachpb.RangeDescriptor, error) { 818 // Don't return an error on the FirstRange lookup. 819 if key.Equal(roachpb.KeyMin) { 820 return []roachpb.RangeDescriptor{testMetaRangeDescriptor}, nil, nil 821 } 822 823 // Return next error and truncate the prefix of the errors array. 824 err := errs[0] 825 errs = errs[1:] 826 return []roachpb.RangeDescriptor{testUserRangeDescriptor}, nil, err 827 }), 828 NodeDialer: nodedialer.New(rpcContext, gossip.AddressResolver(g)), 829 Settings: cluster.MakeTestingClusterSettings(), 830 } 831 ds := NewDistSender(cfg, g) 832 put := roachpb.NewPut(roachpb.Key("a"), roachpb.MakeValueFromString("value")) 833 // Error on descriptor lookup, second attempt successful. 834 if _, pErr := kv.SendWrapped(context.Background(), ds, put); pErr != nil { 835 t.Errorf("unexpected error: %s", pErr) 836 } 837 if len(errs) != 0 { 838 t.Fatalf("expected more descriptor lookups, leftover errs: %+v", errs) 839 } 840 } 841 842 // TestEvictOnFirstRangeGossip verifies that we evict the first range 843 // descriptor from the descriptor cache when a gossip update is received for 844 // the first range. 845 func TestEvictOnFirstRangeGossip(t *testing.T) { 846 defer leaktest.AfterTest(t)() 847 848 stopper := stop.NewStopper() 849 defer stopper.Stop(context.Background()) 850 851 clock := hlc.NewClock(hlc.UnixNano, time.Nanosecond) 852 rpcContext := rpc.NewInsecureTestingContext(clock, stopper) 853 g := makeGossip(t, stopper, rpcContext) 854 855 sender := func( 856 _ context.Context, ba roachpb.BatchRequest, 857 ) (*roachpb.BatchResponse, *roachpb.Error) { 858 return ba.CreateReply(), nil 859 } 860 861 desc := roachpb.RangeDescriptor{ 862 RangeID: 1, 863 StartKey: roachpb.RKeyMin, 864 EndKey: roachpb.RKeyMax, 865 InternalReplicas: []roachpb.ReplicaDescriptor{ 866 { 867 NodeID: 1, 868 StoreID: 1, 869 }, 870 }, 871 } 872 873 var numFirstRange int32 874 rDB := MockRangeDescriptorDB(func(key roachpb.RKey, _ bool) ( 875 []roachpb.RangeDescriptor, []roachpb.RangeDescriptor, error, 876 ) { 877 if key.Equal(roachpb.KeyMin) { 878 atomic.AddInt32(&numFirstRange, 1) 879 } 880 return []roachpb.RangeDescriptor{desc}, nil, nil 881 }) 882 883 cfg := DistSenderConfig{ 884 AmbientCtx: log.AmbientContext{Tracer: tracing.NewTracer()}, 885 Clock: clock, 886 RPCContext: rpcContext, 887 TestingKnobs: ClientTestingKnobs{ 888 TransportFactory: SenderTransportFactory( 889 tracing.NewTracer(), 890 kv.SenderFunc(sender), 891 ), 892 }, 893 RangeDescriptorDB: rDB, 894 NodeDialer: nodedialer.New(rpcContext, gossip.AddressResolver(g)), 895 Settings: cluster.MakeTestingClusterSettings(), 896 } 897 898 ds := NewDistSender(cfg, g).withMetaRecursion() 899 900 anyKey := roachpb.Key("anything") 901 rAnyKey := keys.MustAddr(anyKey) 902 903 call := func() { 904 if _, _, err := ds.rangeCache.LookupRangeDescriptorWithEvictionToken( 905 context.Background(), rAnyKey, nil, false, 906 ); err != nil { 907 t.Fatal(err) 908 } 909 } 910 911 // Perform multiple calls and check that the first range is only looked up 912 // once, with subsequent calls hitting the cache. 913 // 914 // This potentially races with the cache-evicting gossip callback on the 915 // first range, so it is important that the first range descriptor's state 916 // in gossip is stable from this point forward. 917 for i := 0; i < 3; i++ { 918 call() 919 if num := atomic.LoadInt32(&numFirstRange); num != 1 { 920 t.Fatalf("expected one first range lookup, got %d", num) 921 } 922 } 923 // Tweak the descriptor so that the gossip callback will be invoked. 924 desc.Generation = 1 925 if err := g.AddInfoProto(gossip.KeyFirstRangeDescriptor, &desc, 0); err != nil { 926 t.Fatal(err) 927 } 928 929 // Once Gossip fires the callbacks, we should see a cache eviction and thus, 930 // a new cache hit. 931 testutils.SucceedsSoon(t, func() error { 932 call() 933 if exp, act := int32(2), atomic.LoadInt32(&numFirstRange); exp != act { 934 return errors.Errorf("expected %d first range lookups, got %d", exp, act) 935 } 936 return nil 937 }) 938 } 939 940 func TestEvictCacheOnError(t *testing.T) { 941 defer leaktest.AfterTest(t)() 942 // The first attempt gets a BatchResponse with replicaError in the header, if 943 // replicaError set. If not set, the first attempt gets an RPC error. The 944 // second attempt, if any, succeeds. 945 // Currently lease holder and cached range descriptor are treated equally. 946 // TODO(bdarnell): refactor to cover different types of retryable errors. 947 const errString = "boom" 948 testDesc := roachpb.RangeDescriptor{ 949 RangeID: 1, 950 StartKey: testMetaEndKey, 951 EndKey: roachpb.RKeyMax, 952 InternalReplicas: []roachpb.ReplicaDescriptor{ 953 { 954 NodeID: 1, 955 StoreID: 1, 956 }, 957 }, 958 } 959 960 testCases := []struct { 961 canceledCtx bool 962 replicaError error 963 shouldClearLeaseHolder bool 964 shouldClearReplica bool 965 }{ 966 {false, errors.New(errString), false, false}, // non-retryable replica error 967 {false, &roachpb.RangeKeyMismatchError{MismatchedRange: testDesc}, false, false}, // RangeKeyMismatch replica error 968 {false, &roachpb.RangeNotFoundError{}, false, false}, // RangeNotFound replica error 969 {false, nil, false, false}, // RPC error 970 {true, nil, false, false}, // canceled context 971 } 972 973 for i, tc := range testCases { 974 stopper := stop.NewStopper() 975 defer stopper.Stop(context.Background()) 976 977 clock := hlc.NewClock(hlc.UnixNano, time.Nanosecond) 978 rpcContext := rpc.NewInsecureTestingContext(clock, stopper) 979 g := makeGossip(t, stopper, rpcContext) 980 leaseHolder := roachpb.ReplicaDescriptor{ 981 NodeID: 99, 982 StoreID: 999, 983 } 984 first := true 985 986 ctx, cancel := context.WithCancel(context.Background()) 987 988 var testFn simpleSendFn = func( 989 ctx context.Context, 990 _ SendOptions, 991 _ ReplicaSlice, 992 args roachpb.BatchRequest, 993 ) (*roachpb.BatchResponse, error) { 994 if !first { 995 return args.CreateReply(), nil 996 } 997 first = false 998 if tc.canceledCtx { 999 cancel() 1000 return nil, ctx.Err() 1001 } 1002 if tc.replicaError == nil { 1003 return nil, errors.New(errString) 1004 } 1005 reply := &roachpb.BatchResponse{} 1006 reply.Error = roachpb.NewError(tc.replicaError) 1007 return reply, nil 1008 } 1009 1010 cfg := DistSenderConfig{ 1011 AmbientCtx: log.AmbientContext{Tracer: tracing.NewTracer()}, 1012 Clock: clock, 1013 RPCContext: rpcContext, 1014 TestingKnobs: ClientTestingKnobs{ 1015 TransportFactory: adaptSimpleTransport(testFn), 1016 }, 1017 RangeDescriptorDB: defaultMockRangeDescriptorDB, 1018 NodeDialer: nodedialer.New(rpcContext, gossip.AddressResolver(g)), 1019 Settings: cluster.MakeTestingClusterSettings(), 1020 } 1021 ds := NewDistSender(cfg, g) 1022 ds.leaseHolderCache.Update(context.Background(), 1, leaseHolder.StoreID) 1023 key := roachpb.Key("a") 1024 put := roachpb.NewPut(key, roachpb.MakeValueFromString("value")) 1025 1026 if _, pErr := kv.SendWrapped(ctx, ds, put); pErr != nil && !testutils.IsPError(pErr, errString) && !testutils.IsError(pErr.GoError(), ctx.Err().Error()) { 1027 t.Errorf("put encountered unexpected error: %s", pErr) 1028 } 1029 if _, ok := ds.leaseHolderCache.Lookup(context.Background(), 1); ok != !tc.shouldClearLeaseHolder { 1030 t.Errorf("%d: lease holder cache eviction: shouldClearLeaseHolder=%t, but value is %t", i, tc.shouldClearLeaseHolder, ok) 1031 } 1032 cachedDesc := ds.rangeCache.GetCachedRangeDescriptor(roachpb.RKey(key), false /* inverted */) 1033 if cachedDesc == nil != tc.shouldClearReplica { 1034 t.Errorf("%d: unexpected second replica lookup behavior: wanted=%t", i, tc.shouldClearReplica) 1035 } 1036 } 1037 } 1038 1039 func TestEvictCacheOnUnknownLeaseHolder(t *testing.T) { 1040 defer leaktest.AfterTest(t)() 1041 stopper := stop.NewStopper() 1042 defer stopper.Stop(context.Background()) 1043 1044 clock := hlc.NewClock(hlc.UnixNano, time.Nanosecond) 1045 rpcContext := rpc.NewInsecureTestingContext(clock, stopper) 1046 g := makeGossip(t, stopper, rpcContext) 1047 1048 // Gossip the two nodes referred to in testUserRangeDescriptor3Replicas. 1049 for i := 2; i <= 3; i++ { 1050 nd := newNodeDesc(roachpb.NodeID(i)) 1051 if err := g.AddInfoProto(gossip.MakeNodeIDKey(roachpb.NodeID(i)), nd, time.Hour); err != nil { 1052 t.Fatal(err) 1053 } 1054 } 1055 1056 var count int32 1057 var testFn simpleSendFn = func( 1058 _ context.Context, 1059 _ SendOptions, 1060 _ ReplicaSlice, 1061 args roachpb.BatchRequest, 1062 ) (*roachpb.BatchResponse, error) { 1063 var err error 1064 switch count { 1065 case 0, 1: 1066 err = &roachpb.NotLeaseHolderError{LeaseHolder: &roachpb.ReplicaDescriptor{NodeID: 99, StoreID: 999}} 1067 case 2: 1068 err = roachpb.NewRangeNotFoundError(0, 0) 1069 default: 1070 return args.CreateReply(), nil 1071 } 1072 count++ 1073 reply := &roachpb.BatchResponse{} 1074 reply.Error = roachpb.NewError(err) 1075 return reply, nil 1076 } 1077 1078 cfg := DistSenderConfig{ 1079 AmbientCtx: log.AmbientContext{Tracer: tracing.NewTracer()}, 1080 Clock: clock, 1081 RPCContext: rpcContext, 1082 TestingKnobs: ClientTestingKnobs{ 1083 TransportFactory: adaptSimpleTransport(testFn), 1084 }, 1085 RangeDescriptorDB: threeReplicaMockRangeDescriptorDB, 1086 NodeDialer: nodedialer.New(rpcContext, gossip.AddressResolver(g)), 1087 Settings: cluster.MakeTestingClusterSettings(), 1088 } 1089 ds := NewDistSender(cfg, g) 1090 key := roachpb.Key("a") 1091 put := roachpb.NewPut(key, roachpb.MakeValueFromString("value")) 1092 1093 if _, pErr := kv.SendWrapped(context.Background(), ds, put); pErr != nil { 1094 t.Errorf("put encountered unexpected error: %s", pErr) 1095 } 1096 if count != 3 { 1097 t.Errorf("expected three retries; got %d", count) 1098 } 1099 } 1100 1101 // TestRetryOnWrongReplicaError sets up a DistSender on a minimal gossip 1102 // network and a mock of Send, and verifies that the DistSender correctly 1103 // retries upon encountering a stale entry in its range descriptor cache. 1104 func TestRetryOnWrongReplicaError(t *testing.T) { 1105 defer leaktest.AfterTest(t)() 1106 stopper := stop.NewStopper() 1107 defer stopper.Stop(context.Background()) 1108 1109 clock := hlc.NewClock(hlc.UnixNano, time.Nanosecond) 1110 rpcContext := rpc.NewInsecureTestingContext(clock, stopper) 1111 g := makeGossip(t, stopper, rpcContext) 1112 if err := g.AddInfoProto(gossip.KeyFirstRangeDescriptor, &testMetaRangeDescriptor, time.Hour); err != nil { 1113 t.Fatal(err) 1114 } 1115 1116 // Updated below, after it has first been returned. 1117 badEndKey := roachpb.RKey("m") 1118 newRangeDescriptor := testUserRangeDescriptor 1119 goodEndKey := newRangeDescriptor.EndKey 1120 newRangeDescriptor.EndKey = badEndKey 1121 descStale := true 1122 1123 var testFn simpleSendFn = func( 1124 _ context.Context, 1125 _ SendOptions, 1126 _ ReplicaSlice, 1127 ba roachpb.BatchRequest, 1128 ) (*roachpb.BatchResponse, error) { 1129 rs, err := keys.Range(ba.Requests) 1130 if err != nil { 1131 t.Fatal(err) 1132 } 1133 if kv.TestingIsRangeLookup(ba) { 1134 if bytes.HasPrefix(rs.Key, keys.Meta1Prefix) { 1135 br := &roachpb.BatchResponse{} 1136 r := &roachpb.ScanResponse{} 1137 var kv roachpb.KeyValue 1138 if err := kv.Value.SetProto(&testMetaRangeDescriptor); err != nil { 1139 t.Fatal(err) 1140 } 1141 r.Rows = append(r.Rows, kv) 1142 br.Add(r) 1143 return br, nil 1144 } 1145 1146 if !descStale && bytes.HasPrefix(rs.Key, keys.Meta2Prefix) { 1147 t.Fatalf("unexpected extra lookup for non-stale replica descriptor at %s", rs.Key) 1148 } 1149 1150 br := &roachpb.BatchResponse{} 1151 r := &roachpb.ScanResponse{} 1152 var kv roachpb.KeyValue 1153 if err := kv.Value.SetProto(&newRangeDescriptor); err != nil { 1154 t.Fatal(err) 1155 } 1156 r.Rows = append(r.Rows, kv) 1157 br.Add(r) 1158 // If we just returned the stale descriptor, set up returning the 1159 // good one next time. 1160 if bytes.HasPrefix(rs.Key, keys.Meta2Prefix) { 1161 if newRangeDescriptor.EndKey.Equal(badEndKey) { 1162 newRangeDescriptor.EndKey = goodEndKey 1163 } else { 1164 descStale = false 1165 } 1166 } 1167 return br, nil 1168 } 1169 // When the Scan first turns up, update the descriptor for future 1170 // range descriptor lookups. 1171 if !newRangeDescriptor.EndKey.Equal(goodEndKey) { 1172 return nil, &roachpb.RangeKeyMismatchError{ 1173 RequestStartKey: rs.Key.AsRawKey(), 1174 RequestEndKey: rs.EndKey.AsRawKey(), 1175 } 1176 } 1177 return ba.CreateReply(), nil 1178 } 1179 1180 cfg := DistSenderConfig{ 1181 AmbientCtx: log.AmbientContext{Tracer: tracing.NewTracer()}, 1182 Clock: clock, 1183 RPCContext: rpcContext, 1184 TestingKnobs: ClientTestingKnobs{ 1185 TransportFactory: adaptSimpleTransport(testFn), 1186 }, 1187 NodeDialer: nodedialer.New(rpcContext, gossip.AddressResolver(g)), 1188 Settings: cluster.MakeTestingClusterSettings(), 1189 } 1190 ds := NewDistSender(cfg, g) 1191 scan := roachpb.NewScan(roachpb.Key("a"), roachpb.Key("d"), false) 1192 if _, err := kv.SendWrapped(context.Background(), ds, scan); err != nil { 1193 t.Errorf("scan encountered error: %s", err) 1194 } 1195 } 1196 1197 // TestRetryOnWrongReplicaErrorWithSuggestion sets up a DistSender on a 1198 // minimal gossip network and a mock of Send, and verifies that the DistSender 1199 // correctly retries upon encountering a stale entry in its range descriptor cache 1200 // without needing to perform a second RangeLookup when the mismatch error 1201 // provides a suggestion. 1202 func TestRetryOnWrongReplicaErrorWithSuggestion(t *testing.T) { 1203 defer leaktest.AfterTest(t)() 1204 stopper := stop.NewStopper() 1205 defer stopper.Stop(context.Background()) 1206 1207 clock := hlc.NewClock(hlc.UnixNano, time.Nanosecond) 1208 rpcContext := rpc.NewInsecureTestingContext(clock, stopper) 1209 g := makeGossip(t, stopper, rpcContext) 1210 if err := g.AddInfoProto(gossip.KeyFirstRangeDescriptor, &testMetaRangeDescriptor, time.Hour); err != nil { 1211 t.Fatal(err) 1212 } 1213 1214 // The test is gonna send the request first to staleDesc, but it reaches the 1215 // rhsDesc, which redirects it to lhsDesc. 1216 staleDesc := testUserRangeDescriptor 1217 lhsDesc := testUserRangeDescriptor 1218 lhsDesc.EndKey = roachpb.RKey("m") 1219 lhsDesc.RangeID = staleDesc.RangeID + 1 1220 lhsDesc.Generation = staleDesc.Generation + 1 1221 rhsDesc := testUserRangeDescriptor 1222 rhsDesc.StartKey = roachpb.RKey("m") 1223 rhsDesc.RangeID = staleDesc.RangeID + 2 1224 rhsDesc.Generation = staleDesc.Generation + 2 1225 firstLookup := true 1226 1227 var testFn simpleSendFn = func( 1228 _ context.Context, 1229 _ SendOptions, 1230 _ ReplicaSlice, 1231 ba roachpb.BatchRequest, 1232 ) (*roachpb.BatchResponse, error) { 1233 rs, err := keys.Range(ba.Requests) 1234 if err != nil { 1235 t.Fatal(err) 1236 } 1237 if kv.TestingIsRangeLookup(ba) { 1238 if bytes.HasPrefix(rs.Key, keys.Meta1Prefix) { 1239 br := &roachpb.BatchResponse{} 1240 r := &roachpb.ScanResponse{} 1241 var kv roachpb.KeyValue 1242 if err := kv.Value.SetProto(&testMetaRangeDescriptor); err != nil { 1243 t.Fatal(err) 1244 } 1245 r.Rows = append(r.Rows, kv) 1246 br.Add(r) 1247 return br, nil 1248 } 1249 1250 if !firstLookup { 1251 t.Fatalf("unexpected extra lookup for non-stale replica descriptor at %s", rs.Key) 1252 } 1253 firstLookup = false 1254 1255 br := &roachpb.BatchResponse{} 1256 r := &roachpb.ScanResponse{} 1257 var kv roachpb.KeyValue 1258 if err := kv.Value.SetProto(&staleDesc); err != nil { 1259 t.Fatal(err) 1260 } 1261 r.Rows = append(r.Rows, kv) 1262 br.Add(r) 1263 return br, nil 1264 } 1265 1266 // When the Scan first turns up, provide the correct descriptor as a 1267 // suggestion for future range descriptor lookups. 1268 if ba.RangeID == staleDesc.RangeID { 1269 var br roachpb.BatchResponse 1270 br.Error = roachpb.NewError(&roachpb.RangeKeyMismatchError{ 1271 RequestStartKey: rs.Key.AsRawKey(), 1272 RequestEndKey: rs.EndKey.AsRawKey(), 1273 MismatchedRange: rhsDesc, 1274 SuggestedRange: &lhsDesc, 1275 }) 1276 return &br, nil 1277 } else if ba.RangeID != lhsDesc.RangeID { 1278 t.Fatalf("unexpected RangeID %d provided in request %v. expected: %s", ba.RangeID, ba, lhsDesc.RangeID) 1279 } 1280 return ba.CreateReply(), nil 1281 } 1282 1283 cfg := DistSenderConfig{ 1284 AmbientCtx: log.AmbientContext{Tracer: tracing.NewTracer()}, 1285 Clock: clock, 1286 RPCContext: rpcContext, 1287 TestingKnobs: ClientTestingKnobs{ 1288 TransportFactory: adaptSimpleTransport(testFn), 1289 }, 1290 NodeDialer: nodedialer.New(rpcContext, gossip.AddressResolver(g)), 1291 Settings: cluster.MakeTestingClusterSettings(), 1292 } 1293 ds := NewDistSender(cfg, g) 1294 scan := roachpb.NewScan(roachpb.Key("a"), roachpb.Key("d"), false) 1295 if _, err := kv.SendWrapped(context.Background(), ds, scan); err != nil { 1296 t.Errorf("scan encountered error: %s", err) 1297 } 1298 } 1299 1300 func TestGetFirstRangeDescriptor(t *testing.T) { 1301 defer leaktest.AfterTest(t)() 1302 stopper := stop.NewStopper() 1303 defer stopper.Stop(context.Background()) 1304 1305 n := simulation.NewNetwork(stopper, 3, true, zonepb.DefaultZoneConfigRef()) 1306 for _, node := range n.Nodes { 1307 // TODO(spencer): remove the use of gossip/simulation here. 1308 node.Gossip.EnableSimulationCycler(false) 1309 } 1310 n.Start() 1311 ds := NewDistSender(DistSenderConfig{ 1312 AmbientCtx: log.AmbientContext{Tracer: tracing.NewTracer()}, 1313 RPCContext: n.RPCContext, 1314 NodeDialer: nodedialer.New(n.RPCContext, gossip.AddressResolver(n.Nodes[0].Gossip)), 1315 Settings: cluster.MakeTestingClusterSettings(), 1316 }, n.Nodes[0].Gossip) 1317 if _, err := ds.FirstRange(); err == nil { 1318 t.Errorf("expected not to find first range descriptor") 1319 } 1320 expectedDesc := &roachpb.RangeDescriptor{} 1321 expectedDesc.StartKey = roachpb.RKey("a") 1322 expectedDesc.EndKey = roachpb.RKey("c") 1323 1324 // Add first RangeDescriptor to a node different from the node for 1325 // this dist sender and ensure that this dist sender has the 1326 // information within a given time. 1327 if err := n.Nodes[1].Gossip.AddInfoProto(gossip.KeyFirstRangeDescriptor, expectedDesc, time.Hour); err != nil { 1328 t.Fatal(err) 1329 } 1330 const maxCycles = 25 1331 n.SimulateNetwork(func(cycle int, network *simulation.Network) bool { 1332 desc, err := ds.FirstRange() 1333 if err != nil { 1334 if cycle >= maxCycles { 1335 t.Errorf("could not get range descriptor after %d cycles", cycle) 1336 return false 1337 } 1338 return true 1339 } 1340 if !bytes.Equal(desc.StartKey, expectedDesc.StartKey) || 1341 !bytes.Equal(desc.EndKey, expectedDesc.EndKey) { 1342 t.Errorf("expected first range descriptor %v, instead was %v", 1343 expectedDesc, desc) 1344 } 1345 return false 1346 }) 1347 } 1348 1349 // TestSendRPCRetry verifies that sendRPC failed on first address but succeed on 1350 // second address, the second reply should be successfully returned back. 1351 func TestSendRPCRetry(t *testing.T) { 1352 defer leaktest.AfterTest(t)() 1353 stopper := stop.NewStopper() 1354 defer stopper.Stop(context.Background()) 1355 1356 clock := hlc.NewClock(hlc.UnixNano, time.Nanosecond) 1357 rpcContext := rpc.NewInsecureTestingContext(clock, stopper) 1358 g := makeGossip(t, stopper, rpcContext) 1359 if err := g.SetNodeDescriptor(newNodeDesc(1)); err != nil { 1360 t.Fatal(err) 1361 } 1362 1363 // Fill RangeDescriptor with 2 replicas. 1364 var descriptor = roachpb.RangeDescriptor{ 1365 RangeID: 1, 1366 StartKey: roachpb.RKey("a"), 1367 EndKey: roachpb.RKey("z"), 1368 } 1369 for i := 1; i <= 2; i++ { 1370 addr := util.MakeUnresolvedAddr("tcp", fmt.Sprintf("node%d", i)) 1371 nd := &roachpb.NodeDescriptor{ 1372 NodeID: roachpb.NodeID(i), 1373 Address: util.MakeUnresolvedAddr(addr.Network(), addr.String()), 1374 } 1375 if err := g.AddInfoProto(gossip.MakeNodeIDKey(roachpb.NodeID(i)), nd, time.Hour); err != nil { 1376 t.Fatal(err) 1377 } 1378 1379 descriptor.InternalReplicas = append(descriptor.InternalReplicas, roachpb.ReplicaDescriptor{ 1380 NodeID: roachpb.NodeID(i), 1381 StoreID: roachpb.StoreID(i), 1382 }) 1383 } 1384 descDB := mockRangeDescriptorDBForDescs( 1385 testMetaRangeDescriptor, 1386 descriptor, 1387 ) 1388 1389 var testFn simpleSendFn = func( 1390 _ context.Context, 1391 _ SendOptions, 1392 _ ReplicaSlice, 1393 args roachpb.BatchRequest, 1394 ) (*roachpb.BatchResponse, error) { 1395 batchReply := &roachpb.BatchResponse{} 1396 reply := &roachpb.ScanResponse{} 1397 batchReply.Add(reply) 1398 reply.Rows = append([]roachpb.KeyValue{}, roachpb.KeyValue{Key: roachpb.Key("b"), Value: roachpb.Value{}}) 1399 return batchReply, nil 1400 } 1401 cfg := DistSenderConfig{ 1402 AmbientCtx: log.AmbientContext{Tracer: tracing.NewTracer()}, 1403 Clock: clock, 1404 RPCContext: rpcContext, 1405 TestingKnobs: ClientTestingKnobs{ 1406 TransportFactory: adaptSimpleTransport(testFn), 1407 }, 1408 RangeDescriptorDB: descDB, 1409 Settings: cluster.MakeTestingClusterSettings(), 1410 } 1411 ds := NewDistSender(cfg, g) 1412 scan := roachpb.NewScan(roachpb.Key("a"), roachpb.Key("d"), false) 1413 sr, err := kv.SendWrappedWith(context.Background(), ds, roachpb.Header{MaxSpanRequestKeys: 1}, scan) 1414 if err != nil { 1415 t.Fatal(err) 1416 } 1417 if l := len(sr.(*roachpb.ScanResponse).Rows); l != 1 { 1418 t.Fatalf("expected 1 row; got %d", l) 1419 } 1420 } 1421 1422 // This test reproduces the main problem in: 1423 // https://github.com/cockroachdb/cockroach/issues/30613. 1424 // by verifying that if a RangeNotFoundError is returned from a Replica, 1425 // the next Replica is tried. 1426 func TestSendRPCRangeNotFoundError(t *testing.T) { 1427 defer leaktest.AfterTest(t)() 1428 stopper := stop.NewStopper() 1429 defer stopper.Stop(context.Background()) 1430 1431 clock := hlc.NewClock(hlc.UnixNano, time.Nanosecond) 1432 rpcContext := rpc.NewInsecureTestingContext(clock, stopper) 1433 g := makeGossip(t, stopper, rpcContext) 1434 if err := g.SetNodeDescriptor(newNodeDesc(1)); err != nil { 1435 t.Fatal(err) 1436 } 1437 1438 // Fill RangeDescriptor with three replicas. 1439 var descriptor = roachpb.RangeDescriptor{ 1440 RangeID: 1, 1441 StartKey: roachpb.RKey("a"), 1442 EndKey: roachpb.RKey("z"), 1443 NextReplicaID: 1, 1444 } 1445 for i := 1; i <= 3; i++ { 1446 addr := util.MakeUnresolvedAddr("tcp", fmt.Sprintf("node%d", i)) 1447 nd := &roachpb.NodeDescriptor{ 1448 NodeID: roachpb.NodeID(i), 1449 Address: util.MakeUnresolvedAddr(addr.Network(), addr.String()), 1450 } 1451 if err := g.AddInfoProto(gossip.MakeNodeIDKey(roachpb.NodeID(i)), nd, time.Hour); err != nil { 1452 t.Fatal(err) 1453 } 1454 1455 descriptor.AddReplica(roachpb.NodeID(i), roachpb.StoreID(i), roachpb.VOTER_FULL) 1456 } 1457 descDB := mockRangeDescriptorDBForDescs( 1458 testMetaRangeDescriptor, 1459 descriptor, 1460 ) 1461 1462 seen := map[roachpb.ReplicaID]struct{}{} 1463 var leaseholderStoreID roachpb.StoreID 1464 var ds *DistSender 1465 var testFn simpleSendFn = func( 1466 _ context.Context, 1467 _ SendOptions, 1468 _ ReplicaSlice, 1469 ba roachpb.BatchRequest, 1470 ) (*roachpb.BatchResponse, error) { 1471 br := ba.CreateReply() 1472 if _, ok := seen[ba.Replica.ReplicaID]; ok { 1473 br.Error = roachpb.NewErrorf("visited replica %+v twice", ba.Replica) 1474 return br, nil 1475 } 1476 seen[ba.Replica.ReplicaID] = struct{}{} 1477 if len(seen) <= 2 { 1478 if len(seen) == 1 { 1479 // Add to the leaseholder cache to verify that the response evicts it. 1480 ds.leaseHolderCache.Update(context.Background(), ba.RangeID, ba.Replica.StoreID) 1481 } 1482 br.Error = roachpb.NewError(roachpb.NewRangeNotFoundError(ba.RangeID, ba.Replica.StoreID)) 1483 return br, nil 1484 } 1485 leaseholderStoreID = ba.Replica.StoreID 1486 return br, nil 1487 } 1488 cfg := DistSenderConfig{ 1489 AmbientCtx: log.AmbientContext{Tracer: tracing.NewTracer()}, 1490 Clock: clock, 1491 RPCContext: rpcContext, 1492 TestingKnobs: ClientTestingKnobs{ 1493 TransportFactory: adaptSimpleTransport(testFn), 1494 }, 1495 RangeDescriptorDB: descDB, 1496 Settings: cluster.MakeTestingClusterSettings(), 1497 } 1498 ds = NewDistSender(cfg, g) 1499 get := roachpb.NewGet(roachpb.Key("b")) 1500 _, err := kv.SendWrapped(context.Background(), ds, get) 1501 if err != nil { 1502 t.Fatal(err) 1503 } 1504 if storeID, found := ds.leaseHolderCache.Lookup(context.Background(), roachpb.RangeID(1)); !found { 1505 t.Fatal("expected a cached leaseholder") 1506 } else if storeID != leaseholderStoreID { 1507 t.Fatalf("unexpected cached leaseholder s%d, expected s%d", storeID, leaseholderStoreID) 1508 } 1509 } 1510 1511 // TestGetNodeDescriptor checks that the Node descriptor automatically gets 1512 // looked up from Gossip. 1513 func TestGetNodeDescriptor(t *testing.T) { 1514 defer leaktest.AfterTest(t)() 1515 stopper := stop.NewStopper() 1516 defer stopper.Stop(context.Background()) 1517 1518 clock := hlc.NewClock(hlc.UnixNano, time.Nanosecond) 1519 rpcContext := rpc.NewInsecureTestingContext(clock, stopper) 1520 g := makeGossip(t, stopper, rpcContext) 1521 ds := NewDistSender(DistSenderConfig{ 1522 AmbientCtx: log.AmbientContext{Tracer: tracing.NewTracer()}, 1523 RPCContext: rpcContext, 1524 Clock: clock, 1525 Settings: cluster.MakeTestingClusterSettings(), 1526 }, g) 1527 g.NodeID.Reset(5) 1528 if err := g.SetNodeDescriptor(newNodeDesc(5)); err != nil { 1529 t.Fatal(err) 1530 } 1531 testutils.SucceedsSoon(t, func() error { 1532 desc := ds.getNodeDescriptor() 1533 if desc != nil && desc.NodeID == 5 { 1534 return nil 1535 } 1536 return errors.Errorf("wanted NodeID 5, got %v", desc) 1537 }) 1538 } 1539 1540 func TestMultiRangeGapReverse(t *testing.T) { 1541 defer leaktest.AfterTest(t)() 1542 stopper := stop.NewStopper() 1543 defer stopper.Stop(context.Background()) 1544 1545 clock := hlc.NewClock(hlc.UnixNano, time.Nanosecond) 1546 rpcContext := rpc.NewInsecureTestingContext(clock, stopper) 1547 g := makeGossip(t, stopper, rpcContext) 1548 1549 var descs []roachpb.RangeDescriptor 1550 splits := []roachpb.Key{roachpb.Key("a"), roachpb.Key("b"), roachpb.Key("c"), roachpb.Key("d")} 1551 for i, split := range splits { 1552 var startKey roachpb.RKey 1553 if i > 0 { 1554 startKey = descs[i-1].EndKey 1555 } 1556 descs = append(descs, roachpb.RangeDescriptor{ 1557 RangeID: roachpb.RangeID(i + 1), 1558 StartKey: startKey, 1559 EndKey: keys.MustAddr(split), 1560 InternalReplicas: []roachpb.ReplicaDescriptor{ 1561 { 1562 NodeID: 1, 1563 StoreID: 1, 1564 }, 1565 }, 1566 }) 1567 } 1568 1569 sender := kv.SenderFunc( 1570 func(_ context.Context, args roachpb.BatchRequest) (*roachpb.BatchResponse, *roachpb.Error) { 1571 rb := args.CreateReply() 1572 return rb, nil 1573 }) 1574 1575 rdb := MockRangeDescriptorDB(func(key roachpb.RKey, reverse bool) ( 1576 []roachpb.RangeDescriptor, []roachpb.RangeDescriptor, error, 1577 ) { 1578 n := sort.Search(len(descs), func(i int) bool { 1579 if !reverse { 1580 return key.Less(descs[i].EndKey) 1581 } 1582 // In reverse mode, the range boundary behavior is "inverted". 1583 // If we scan [a,z) in reverse mode, we'd look up key z. 1584 return !descs[i].EndKey.Less(key) // key <= EndKey 1585 }) 1586 if n < 0 { 1587 n = 0 1588 } 1589 if n >= len(descs) { 1590 panic(fmt.Sprintf("didn't set up descriptor for key %q", key)) 1591 } 1592 return descs[n : n+1], nil, nil 1593 }) 1594 1595 cfg := DistSenderConfig{ 1596 AmbientCtx: log.AmbientContext{Tracer: tracing.NewTracer()}, 1597 Clock: clock, 1598 RPCContext: rpcContext, 1599 RangeDescriptorDB: rdb, 1600 TestingKnobs: ClientTestingKnobs{ 1601 TransportFactory: SenderTransportFactory( 1602 tracing.NewTracer(), 1603 sender, 1604 ), 1605 }, 1606 Settings: cluster.MakeTestingClusterSettings(), 1607 } 1608 1609 ds := NewDistSender(cfg, g) 1610 1611 txn := roachpb.MakeTransaction("foo", nil, 1.0, clock.Now(), 0) 1612 1613 var ba roachpb.BatchRequest 1614 ba.Txn = &txn 1615 ba.Add(roachpb.NewReverseScan(splits[0], splits[1], false)) 1616 ba.Add(roachpb.NewReverseScan(splits[2], splits[3], false)) 1617 1618 // Before fixing https://github.com/cockroachdb/cockroach/issues/18174, this 1619 // would error with: 1620 // 1621 // truncation resulted in empty batch on {b-c}: ReverseScan ["a","b"), ReverseScan ["c","d") 1622 if _, pErr := ds.Send(context.Background(), ba); pErr != nil { 1623 t.Fatal(pErr) 1624 } 1625 } 1626 1627 // TestMultiRangeMergeStaleDescriptor simulates the situation in which the 1628 // DistSender executes a multi-range scan which encounters the stale descriptor 1629 // of a range which has since incorporated its right neighbor by means of a 1630 // merge. It is verified that the DistSender scans the correct keyrange exactly 1631 // once. 1632 func TestMultiRangeMergeStaleDescriptor(t *testing.T) { 1633 defer leaktest.AfterTest(t)() 1634 stopper := stop.NewStopper() 1635 defer stopper.Stop(context.Background()) 1636 1637 clock := hlc.NewClock(hlc.UnixNano, time.Nanosecond) 1638 rpcContext := rpc.NewInsecureTestingContext(clock, stopper) 1639 g := makeGossip(t, stopper, rpcContext) 1640 // Assume we have two ranges, [a-b) and [b-KeyMax). 1641 merged := false 1642 // The stale first range descriptor which is unaware of the merge. 1643 var firstRange = roachpb.RangeDescriptor{ 1644 RangeID: 2, 1645 StartKey: roachpb.RKey("a"), 1646 EndKey: roachpb.RKey("b"), 1647 InternalReplicas: []roachpb.ReplicaDescriptor{ 1648 { 1649 NodeID: 1, 1650 StoreID: 1, 1651 }, 1652 }, 1653 } 1654 // The merged descriptor, which will be looked up after having processed 1655 // the stale range [a,b). 1656 var mergedRange = roachpb.RangeDescriptor{ 1657 RangeID: 2, 1658 StartKey: roachpb.RKey("a"), 1659 EndKey: roachpb.RKeyMax, 1660 InternalReplicas: []roachpb.ReplicaDescriptor{ 1661 { 1662 NodeID: 1, 1663 StoreID: 1, 1664 }, 1665 }, 1666 } 1667 // Assume we have two key-value pairs, a=1 and c=2. 1668 existingKVs := []roachpb.KeyValue{ 1669 {Key: roachpb.Key("a"), Value: roachpb.MakeValueFromString("1")}, 1670 {Key: roachpb.Key("c"), Value: roachpb.MakeValueFromString("2")}, 1671 } 1672 var testFn simpleSendFn = func( 1673 _ context.Context, 1674 _ SendOptions, 1675 _ ReplicaSlice, 1676 ba roachpb.BatchRequest, 1677 ) (*roachpb.BatchResponse, error) { 1678 rs, err := keys.Range(ba.Requests) 1679 if err != nil { 1680 t.Fatal(err) 1681 } 1682 batchReply := &roachpb.BatchResponse{} 1683 reply := &roachpb.ScanResponse{} 1684 batchReply.Add(reply) 1685 results := []roachpb.KeyValue{} 1686 for _, curKV := range existingKVs { 1687 curKeyAddr, err := keys.Addr(curKV.Key) 1688 if err != nil { 1689 t.Fatal(err) 1690 } 1691 if rs.Key.Less(curKeyAddr.Next()) && curKeyAddr.Less(rs.EndKey) { 1692 results = append(results, curKV) 1693 } 1694 } 1695 reply.Rows = results 1696 return batchReply, nil 1697 } 1698 cfg := DistSenderConfig{ 1699 AmbientCtx: log.AmbientContext{Tracer: tracing.NewTracer()}, 1700 Clock: clock, 1701 RPCContext: rpcContext, 1702 TestingKnobs: ClientTestingKnobs{ 1703 TransportFactory: adaptSimpleTransport(testFn), 1704 }, 1705 RangeDescriptorDB: MockRangeDescriptorDB(func(key roachpb.RKey, _ bool) ([]roachpb.RangeDescriptor, []roachpb.RangeDescriptor, error) { 1706 if key.Less(testMetaRangeDescriptor.EndKey) { 1707 return []roachpb.RangeDescriptor{testMetaRangeDescriptor}, nil, nil 1708 } 1709 if !merged { 1710 // Assume a range merge operation happened. 1711 merged = true 1712 return []roachpb.RangeDescriptor{firstRange}, nil, nil 1713 } 1714 return []roachpb.RangeDescriptor{mergedRange}, nil, nil 1715 }), 1716 Settings: cluster.MakeTestingClusterSettings(), 1717 } 1718 ds := NewDistSender(cfg, g) 1719 scan := roachpb.NewScan(roachpb.Key("a"), roachpb.Key("d"), false) 1720 // Set the Txn info to avoid an OpRequiresTxnError. 1721 reply, err := kv.SendWrappedWith(context.Background(), ds, roachpb.Header{ 1722 MaxSpanRequestKeys: 10, 1723 Txn: &roachpb.Transaction{}, 1724 }, scan) 1725 if err != nil { 1726 t.Fatalf("scan encountered error: %s", err) 1727 } 1728 sr := reply.(*roachpb.ScanResponse) 1729 if !reflect.DeepEqual(existingKVs, sr.Rows) { 1730 t.Fatalf("expect get %v, actual get %v", existingKVs, sr.Rows) 1731 } 1732 } 1733 1734 // TestRangeLookupOptionOnReverseScan verifies that a lookup triggered by a 1735 // ReverseScan request has the useReverseScan specified. 1736 func TestRangeLookupOptionOnReverseScan(t *testing.T) { 1737 defer leaktest.AfterTest(t)() 1738 stopper := stop.NewStopper() 1739 defer stopper.Stop(context.Background()) 1740 1741 clock := hlc.NewClock(hlc.UnixNano, time.Nanosecond) 1742 rpcContext := rpc.NewInsecureTestingContext(clock, stopper) 1743 g := makeGossip(t, stopper, rpcContext) 1744 cfg := DistSenderConfig{ 1745 AmbientCtx: log.AmbientContext{Tracer: tracing.NewTracer()}, 1746 Clock: clock, 1747 RPCContext: rpcContext, 1748 TestingKnobs: ClientTestingKnobs{ 1749 TransportFactory: adaptSimpleTransport(stubRPCSendFn), 1750 }, 1751 RangeDescriptorDB: MockRangeDescriptorDB(func(key roachpb.RKey, useReverseScan bool) ([]roachpb.RangeDescriptor, []roachpb.RangeDescriptor, error) { 1752 if !key.Equal(roachpb.KeyMin) && !useReverseScan { 1753 t.Fatalf("expected UseReverseScan to be set") 1754 } 1755 if key.Less(testMetaRangeDescriptor.EndKey) { 1756 return []roachpb.RangeDescriptor{testMetaRangeDescriptor}, nil, nil 1757 } 1758 return []roachpb.RangeDescriptor{testUserRangeDescriptor}, nil, nil 1759 }), 1760 Settings: cluster.MakeTestingClusterSettings(), 1761 } 1762 ds := NewDistSender(cfg, g) 1763 rScan := &roachpb.ReverseScanRequest{ 1764 RequestHeader: roachpb.RequestHeader{Key: roachpb.Key("a"), EndKey: roachpb.Key("b")}, 1765 } 1766 if _, err := kv.SendWrapped(context.Background(), ds, rScan); err != nil { 1767 t.Fatal(err) 1768 } 1769 } 1770 1771 // TestClockUpdateOnResponse verifies that the DistSender picks up 1772 // the timestamp of the remote party embedded in responses. 1773 func TestClockUpdateOnResponse(t *testing.T) { 1774 defer leaktest.AfterTest(t)() 1775 stopper := stop.NewStopper() 1776 defer stopper.Stop(context.Background()) 1777 1778 clock := hlc.NewClock(hlc.UnixNano, time.Nanosecond) 1779 rpcContext := rpc.NewInsecureTestingContext(clock, stopper) 1780 g := makeGossip(t, stopper, rpcContext) 1781 cfg := DistSenderConfig{ 1782 AmbientCtx: log.AmbientContext{Tracer: tracing.NewTracer()}, 1783 Clock: clock, 1784 RPCContext: rpcContext, 1785 RangeDescriptorDB: defaultMockRangeDescriptorDB, 1786 NodeDialer: nodedialer.New(rpcContext, gossip.AddressResolver(g)), 1787 Settings: cluster.MakeTestingClusterSettings(), 1788 } 1789 ds := NewDistSender(cfg, g) 1790 1791 expectedErr := roachpb.NewError(errors.New("boom")) 1792 1793 // Prepare the test function 1794 put := roachpb.NewPut(roachpb.Key("a"), roachpb.MakeValueFromString("value")) 1795 doCheck := func(sender kv.Sender, fakeTime hlc.Timestamp) { 1796 ds.transportFactory = SenderTransportFactory(tracing.NewTracer(), sender) 1797 _, err := kv.SendWrapped(context.Background(), ds, put) 1798 if err != nil && err != expectedErr { 1799 t.Fatal(err) 1800 } 1801 newTime := ds.clock.Now() 1802 if newTime.Less(fakeTime) { 1803 t.Fatalf("clock was not advanced: expected >= %s; got %s", fakeTime, newTime) 1804 } 1805 } 1806 1807 // Test timestamp propagation on valid BatchResults. 1808 fakeTime := ds.clock.Now().Add(10000000000 /*10s*/, 0) 1809 replyNormal := kv.SenderFunc( 1810 func(_ context.Context, args roachpb.BatchRequest) (*roachpb.BatchResponse, *roachpb.Error) { 1811 rb := args.CreateReply() 1812 rb.Now = fakeTime 1813 return rb, nil 1814 }) 1815 doCheck(replyNormal, fakeTime) 1816 1817 // Test timestamp propagation on errors. 1818 fakeTime = ds.clock.Now().Add(10000000000 /*10s*/, 0) 1819 replyError := kv.SenderFunc( 1820 func(_ context.Context, _ roachpb.BatchRequest) (*roachpb.BatchResponse, *roachpb.Error) { 1821 pErr := expectedErr 1822 pErr.Now = fakeTime 1823 return nil, pErr 1824 }) 1825 doCheck(replyError, fakeTime) 1826 } 1827 1828 // TestTruncateWithSpanAndDescriptor verifies that a batch request is truncated with a 1829 // range span and the range of a descriptor found in cache. 1830 func TestTruncateWithSpanAndDescriptor(t *testing.T) { 1831 defer leaktest.AfterTest(t)() 1832 stopper := stop.NewStopper() 1833 defer stopper.Stop(context.Background()) 1834 1835 clock := hlc.NewClock(hlc.UnixNano, time.Nanosecond) 1836 rpcContext := rpc.NewInsecureTestingContext(clock, stopper) 1837 g := makeGossip(t, stopper, rpcContext) 1838 if err := g.SetNodeDescriptor(newNodeDesc(1)); err != nil { 1839 t.Fatal(err) 1840 } 1841 nd := &roachpb.NodeDescriptor{ 1842 NodeID: roachpb.NodeID(1), 1843 Address: util.MakeUnresolvedAddr(testAddress.Network(), testAddress.String()), 1844 } 1845 if err := g.AddInfoProto(gossip.MakeNodeIDKey(roachpb.NodeID(1)), nd, time.Hour); err != nil { 1846 t.Fatal(err) 1847 } 1848 1849 // Fill MockRangeDescriptorDB with two descriptors. When a 1850 // range descriptor is looked up by key "b", return the second 1851 // descriptor whose range is ["a", "c") and partially overlaps 1852 // with the first descriptor's range. 1853 var descriptor1 = roachpb.RangeDescriptor{ 1854 RangeID: 2, 1855 StartKey: testMetaEndKey, 1856 EndKey: roachpb.RKey("b"), 1857 InternalReplicas: []roachpb.ReplicaDescriptor{ 1858 { 1859 NodeID: 1, 1860 StoreID: 1, 1861 }, 1862 }, 1863 } 1864 var descriptor2 = roachpb.RangeDescriptor{ 1865 RangeID: 3, 1866 StartKey: roachpb.RKey("a"), 1867 EndKey: roachpb.RKey("c"), 1868 InternalReplicas: []roachpb.ReplicaDescriptor{ 1869 { 1870 NodeID: 1, 1871 StoreID: 1, 1872 }, 1873 }, 1874 } 1875 descDB := MockRangeDescriptorDB(func(key roachpb.RKey, _ bool) ([]roachpb.RangeDescriptor, []roachpb.RangeDescriptor, error) { 1876 if key.Less(testMetaRangeDescriptor.EndKey) { 1877 return []roachpb.RangeDescriptor{testMetaRangeDescriptor}, nil, nil 1878 } 1879 desc := descriptor1 1880 if key.Equal(roachpb.RKey("b")) { 1881 desc = descriptor2 1882 } 1883 return []roachpb.RangeDescriptor{desc}, nil, nil 1884 }) 1885 1886 // Define our rpcSend stub which checks the span of the batch 1887 // requests. Because of parallelization, there's no guarantee 1888 // on the ordering of requests. 1889 var haveA, haveB bool 1890 sendStub := func( 1891 _ context.Context, 1892 _ SendOptions, 1893 _ ReplicaSlice, 1894 ba roachpb.BatchRequest, 1895 ) (*roachpb.BatchResponse, error) { 1896 rs, err := keys.Range(ba.Requests) 1897 if err != nil { 1898 t.Fatal(err) 1899 } 1900 if rs.Key.Equal(roachpb.RKey("a")) && rs.EndKey.Equal(roachpb.RKey("a").Next()) { 1901 haveA = true 1902 } else if rs.Key.Equal(roachpb.RKey("b")) && rs.EndKey.Equal(roachpb.RKey("b").Next()) { 1903 haveB = true 1904 } else { 1905 t.Fatalf("Unexpected span %s", rs) 1906 } 1907 1908 batchReply := &roachpb.BatchResponse{} 1909 reply := &roachpb.PutResponse{} 1910 batchReply.Add(reply) 1911 return batchReply, nil 1912 } 1913 1914 cfg := DistSenderConfig{ 1915 AmbientCtx: log.AmbientContext{Tracer: tracing.NewTracer()}, 1916 Clock: clock, 1917 RPCContext: rpcContext, 1918 TestingKnobs: ClientTestingKnobs{ 1919 TransportFactory: adaptSimpleTransport(sendStub), 1920 }, 1921 RangeDescriptorDB: descDB, 1922 Settings: cluster.MakeTestingClusterSettings(), 1923 } 1924 ds := NewDistSender(cfg, g) 1925 1926 // Send a batch request containing two puts. In the first 1927 // attempt, the span of the descriptor found in the cache is 1928 // ["a", "b"). The request is truncated to contain only the put 1929 // on "a". 1930 // 1931 // In the second attempt, The range of the descriptor found in 1932 // the cache is ["a", "c"), but the put on "a" will not be 1933 // present. The request is truncated to contain only the put on "b". 1934 ba := roachpb.BatchRequest{} 1935 ba.Txn = &roachpb.Transaction{Name: "test"} 1936 { 1937 val := roachpb.MakeValueFromString("val") 1938 ba.Add(roachpb.NewPut(keys.MakeRangeKeyPrefix(roachpb.RKey("a")), val)) 1939 } 1940 { 1941 val := roachpb.MakeValueFromString("val") 1942 ba.Add(roachpb.NewPut(keys.MakeRangeKeyPrefix(roachpb.RKey("b")), val)) 1943 } 1944 1945 if _, pErr := ds.Send(context.Background(), ba); pErr != nil { 1946 t.Fatal(pErr) 1947 } 1948 1949 if !haveA || !haveB { 1950 t.Errorf("expected two requests for \"a\" and \"b\": %t, %t", haveA, haveB) 1951 } 1952 } 1953 1954 // TestTruncateWithLocalSpanAndDescriptor verifies that a batch request with local keys 1955 // is truncated with a range span and the range of a descriptor found in cache. 1956 func TestTruncateWithLocalSpanAndDescriptor(t *testing.T) { 1957 defer leaktest.AfterTest(t)() 1958 stopper := stop.NewStopper() 1959 defer stopper.Stop(context.Background()) 1960 1961 clock := hlc.NewClock(hlc.UnixNano, time.Nanosecond) 1962 rpcContext := rpc.NewInsecureTestingContext(clock, stopper) 1963 g := makeGossip(t, stopper, rpcContext) 1964 if err := g.SetNodeDescriptor(newNodeDesc(1)); err != nil { 1965 t.Fatal(err) 1966 } 1967 nd := &roachpb.NodeDescriptor{ 1968 NodeID: roachpb.NodeID(1), 1969 Address: util.MakeUnresolvedAddr(testAddress.Network(), testAddress.String()), 1970 } 1971 if err := g.AddInfoProto(gossip.MakeNodeIDKey(roachpb.NodeID(1)), nd, time.Hour); err != nil { 1972 t.Fatal(err) 1973 } 1974 1975 // Fill MockRangeDescriptorDB with two descriptors. 1976 var descriptor1 = roachpb.RangeDescriptor{ 1977 RangeID: 2, 1978 StartKey: testMetaEndKey, 1979 EndKey: roachpb.RKey("b"), 1980 InternalReplicas: []roachpb.ReplicaDescriptor{ 1981 { 1982 NodeID: 1, 1983 StoreID: 1, 1984 }, 1985 }, 1986 } 1987 var descriptor2 = roachpb.RangeDescriptor{ 1988 RangeID: 3, 1989 StartKey: roachpb.RKey("b"), 1990 EndKey: roachpb.RKey("c"), 1991 InternalReplicas: []roachpb.ReplicaDescriptor{ 1992 { 1993 NodeID: 1, 1994 StoreID: 1, 1995 }, 1996 }, 1997 } 1998 var descriptor3 = roachpb.RangeDescriptor{ 1999 RangeID: 4, 2000 StartKey: roachpb.RKey("c"), 2001 EndKey: roachpb.RKeyMax, 2002 InternalReplicas: []roachpb.ReplicaDescriptor{ 2003 { 2004 NodeID: 1, 2005 StoreID: 1, 2006 }, 2007 }, 2008 } 2009 descDB := mockRangeDescriptorDBForDescs( 2010 testMetaRangeDescriptor, 2011 descriptor1, 2012 descriptor2, 2013 descriptor3, 2014 ) 2015 2016 // Define our rpcSend stub which checks the span of the batch 2017 // requests. 2018 haveRequest := []bool{false, false, false} 2019 sendStub := func( 2020 _ context.Context, 2021 _ SendOptions, 2022 _ ReplicaSlice, 2023 ba roachpb.BatchRequest, 2024 ) (*roachpb.BatchResponse, error) { 2025 h := ba.Requests[0].GetInner().Header() 2026 if h.Key.Equal(keys.RangeDescriptorKey(roachpb.RKey("a"))) && h.EndKey.Equal(keys.MakeRangeKeyPrefix(roachpb.RKey("b"))) { 2027 haveRequest[0] = true 2028 } else if h.Key.Equal(keys.MakeRangeKeyPrefix(roachpb.RKey("b"))) && h.EndKey.Equal(keys.MakeRangeKeyPrefix(roachpb.RKey("c"))) { 2029 haveRequest[1] = true 2030 } else if h.Key.Equal(keys.MakeRangeKeyPrefix(roachpb.RKey("c"))) && h.EndKey.Equal(keys.RangeDescriptorKey(roachpb.RKey("c"))) { 2031 haveRequest[2] = true 2032 } else { 2033 t.Fatalf("Unexpected span [%s,%s)", h.Key, h.EndKey) 2034 } 2035 2036 batchReply := &roachpb.BatchResponse{} 2037 reply := &roachpb.ScanResponse{} 2038 batchReply.Add(reply) 2039 return batchReply, nil 2040 } 2041 2042 cfg := DistSenderConfig{ 2043 AmbientCtx: log.AmbientContext{Tracer: tracing.NewTracer()}, 2044 Clock: clock, 2045 RPCContext: rpcContext, 2046 TestingKnobs: ClientTestingKnobs{ 2047 TransportFactory: adaptSimpleTransport(sendStub), 2048 }, 2049 RangeDescriptorDB: descDB, 2050 Settings: cluster.MakeTestingClusterSettings(), 2051 } 2052 ds := NewDistSender(cfg, g) 2053 2054 // Send a batch request contains two scans. In the first 2055 // attempt, the range of the descriptor found in the cache is 2056 // ["", "b"). The request is truncated to contain only the scan 2057 // on local keys that address up to "b". 2058 // 2059 // In the second attempt, The range of the descriptor found in 2060 // the cache is ["b", "d"), The request is truncated to contain 2061 // only the scan on local keys that address from "b" to "d". 2062 ba := roachpb.BatchRequest{} 2063 ba.Txn = &roachpb.Transaction{Name: "test"} 2064 ba.Add(roachpb.NewScan( 2065 keys.RangeDescriptorKey(roachpb.RKey("a")), 2066 keys.RangeDescriptorKey(roachpb.RKey("c")), 2067 false /* forUpdate */)) 2068 2069 if _, pErr := ds.Send(context.Background(), ba); pErr != nil { 2070 t.Fatal(pErr) 2071 } 2072 for i, found := range haveRequest { 2073 if !found { 2074 t.Errorf("request %d not received", i) 2075 } 2076 } 2077 } 2078 2079 // TestMultiRangeWithEndTxn verifies that when a chunk of batch looks like it's 2080 // going to be dispatched to more than one range, it will be split up if it 2081 // contains an EndTxn that is not performing a parallel commit. However, it will 2082 // not be split up if it contains an EndTxn that is performing a parallel 2083 // commit. 2084 func TestMultiRangeWithEndTxn(t *testing.T) { 2085 defer leaktest.AfterTest(t)() 2086 stopper := stop.NewStopper() 2087 defer stopper.Stop(context.Background()) 2088 2089 clock := hlc.NewClock(hlc.UnixNano, time.Nanosecond) 2090 rpcContext := rpc.NewInsecureTestingContext(clock, stopper) 2091 g := makeGossip(t, stopper, rpcContext) 2092 testCases := []struct { 2093 put1, put2, et roachpb.Key 2094 parCommit bool 2095 exp [][]roachpb.Method 2096 }{ 2097 { 2098 // Everything hits the first range, so we get a 1PC txn. 2099 put1: roachpb.Key("a1"), 2100 put2: roachpb.Key("a2"), 2101 et: roachpb.Key("a3"), 2102 parCommit: false, 2103 exp: [][]roachpb.Method{{roachpb.Put, roachpb.Put, roachpb.EndTxn}}, 2104 }, 2105 { 2106 // Everything hits the first range, so we get a 1PC txn. 2107 // Parallel commit doesn't matter. 2108 put1: roachpb.Key("a1"), 2109 put2: roachpb.Key("a2"), 2110 et: roachpb.Key("a3"), 2111 parCommit: true, 2112 exp: [][]roachpb.Method{{roachpb.Put, roachpb.Put, roachpb.EndTxn}}, 2113 }, 2114 { 2115 // Only EndTxn hits the second range. 2116 put1: roachpb.Key("a1"), 2117 put2: roachpb.Key("a2"), 2118 et: roachpb.Key("b"), 2119 parCommit: false, 2120 exp: [][]roachpb.Method{{roachpb.Put, roachpb.Put}, {roachpb.EndTxn}}, 2121 }, 2122 { 2123 // Only EndTxn hits the second range. However, since the EndTxn is 2124 // performing a parallel commit, it is sent in parallel, which we 2125 // can't detect directly because the EndTxn batch is sent to the 2126 // second range and a strict ordering of batches is enforced by 2127 // DisableParallelBatches. 2128 put1: roachpb.Key("a1"), 2129 put2: roachpb.Key("a2"), 2130 et: roachpb.Key("b"), 2131 parCommit: true, 2132 exp: [][]roachpb.Method{{roachpb.Put, roachpb.Put}, {roachpb.EndTxn}}, 2133 }, 2134 { 2135 // One write hits the second range, so EndTxn has to be split off. 2136 // In this case, going in the usual order without splitting off 2137 // would actually be fine, but it doesn't seem worth optimizing at 2138 // this point. 2139 put1: roachpb.Key("a1"), 2140 put2: roachpb.Key("b1"), 2141 et: roachpb.Key("a1"), 2142 parCommit: false, 2143 exp: [][]roachpb.Method{{roachpb.Put}, {roachpb.Put}, {roachpb.EndTxn}}, 2144 }, 2145 { 2146 // One write hits the second range. Again, EndTxn does not need to 2147 // be split off because it is performing a parallel commit, so the 2148 // only split is due to the range boundary. 2149 put1: roachpb.Key("a1"), 2150 put2: roachpb.Key("b1"), 2151 et: roachpb.Key("a1"), 2152 parCommit: true, 2153 exp: [][]roachpb.Method{{roachpb.Put, roachpb.EndTxn}, {roachpb.Put}}, 2154 }, 2155 { 2156 // Both writes go to the second range, but not EndTxn. It is split 2157 // from the writes and sent after. 2158 put1: roachpb.Key("b1"), 2159 put2: roachpb.Key("b2"), 2160 et: roachpb.Key("a1"), 2161 parCommit: false, 2162 exp: [][]roachpb.Method{{roachpb.Put, roachpb.Put}, {roachpb.EndTxn}}, 2163 }, 2164 { 2165 // Both writes go to the second range, but not EndTxn. Since the 2166 // EndTxn is performing a parallel commit, it is sent in parallel. 2167 // We can tell this because the EndTxn batch is sent to the first 2168 // range and ends up being delivered first, unlike in the previous 2169 // case. 2170 put1: roachpb.Key("b1"), 2171 put2: roachpb.Key("b2"), 2172 et: roachpb.Key("a1"), 2173 parCommit: true, 2174 exp: [][]roachpb.Method{{roachpb.EndTxn}, {roachpb.Put, roachpb.Put}}, 2175 }, 2176 } 2177 2178 if err := g.SetNodeDescriptor(newNodeDesc(1)); err != nil { 2179 t.Fatal(err) 2180 } 2181 nd := &roachpb.NodeDescriptor{ 2182 NodeID: roachpb.NodeID(1), 2183 Address: util.MakeUnresolvedAddr(testAddress.Network(), testAddress.String()), 2184 } 2185 if err := g.AddInfoProto(gossip.MakeNodeIDKey(roachpb.NodeID(1)), nd, time.Hour); err != nil { 2186 t.Fatal(err) 2187 2188 } 2189 2190 // Fill MockRangeDescriptorDB with two descriptors. 2191 var descriptor1 = roachpb.RangeDescriptor{ 2192 RangeID: 2, 2193 StartKey: testMetaEndKey, 2194 EndKey: roachpb.RKey("b"), 2195 InternalReplicas: []roachpb.ReplicaDescriptor{ 2196 { 2197 NodeID: 1, 2198 StoreID: 1, 2199 }, 2200 }, 2201 } 2202 var descriptor2 = roachpb.RangeDescriptor{ 2203 RangeID: 3, 2204 StartKey: roachpb.RKey("b"), 2205 EndKey: roachpb.RKeyMax, 2206 InternalReplicas: []roachpb.ReplicaDescriptor{ 2207 { 2208 NodeID: 1, 2209 StoreID: 1, 2210 }, 2211 }, 2212 } 2213 descDB := mockRangeDescriptorDBForDescs( 2214 testMetaRangeDescriptor, 2215 descriptor1, 2216 descriptor2, 2217 ) 2218 2219 for i, test := range testCases { 2220 var act [][]roachpb.Method 2221 var testFn simpleSendFn = func( 2222 _ context.Context, 2223 _ SendOptions, 2224 _ ReplicaSlice, 2225 ba roachpb.BatchRequest, 2226 ) (*roachpb.BatchResponse, error) { 2227 var cur []roachpb.Method 2228 for _, union := range ba.Requests { 2229 cur = append(cur, union.GetInner().Method()) 2230 } 2231 act = append(act, cur) 2232 return ba.CreateReply(), nil 2233 } 2234 2235 cfg := DistSenderConfig{ 2236 AmbientCtx: log.AmbientContext{Tracer: tracing.NewTracer()}, 2237 Clock: clock, 2238 RPCContext: rpcContext, 2239 TestingKnobs: ClientTestingKnobs{ 2240 TransportFactory: adaptSimpleTransport(testFn), 2241 }, 2242 RangeDescriptorDB: descDB, 2243 Settings: cluster.MakeTestingClusterSettings(), 2244 } 2245 ds := NewDistSender(cfg, g) 2246 ds.DisableParallelBatches() 2247 2248 // Send a batch request containing two puts. 2249 var ba roachpb.BatchRequest 2250 ba.Txn = &roachpb.Transaction{Name: "test"} 2251 ba.Add(roachpb.NewPut(test.put1, roachpb.MakeValueFromString("val1"))) 2252 ba.Add(roachpb.NewPut(test.put2, roachpb.MakeValueFromString("val2"))) 2253 et := &roachpb.EndTxnRequest{ 2254 RequestHeader: roachpb.RequestHeader{Key: test.et}, 2255 Commit: true, 2256 } 2257 if test.parCommit { 2258 et.InFlightWrites = []roachpb.SequencedWrite{ 2259 {Key: test.put1, Sequence: 1}, {Key: test.put2, Sequence: 2}, 2260 } 2261 } 2262 ba.Add(et) 2263 2264 if _, pErr := ds.Send(context.Background(), ba); pErr != nil { 2265 t.Fatal(pErr) 2266 } 2267 2268 for j, batchMethods := range act { 2269 if !reflect.DeepEqual(test.exp[j], batchMethods) { 2270 t.Fatalf("test %d: expected [%d] %v, got %v", i, j, test.exp[j], batchMethods) 2271 } 2272 } 2273 } 2274 } 2275 2276 // TestParallelCommitSplitFromQueryIntents verifies that a parallel-committing 2277 // batch is split into sub-batches - one containing all pre-commit QueryIntent 2278 // requests and one containing everything else. 2279 // 2280 // The test only uses a single range, so it only tests the split of ranges in 2281 // divideAndSendParallelCommit. See TestMultiRangeWithEndTxn for a test that 2282 // verifies proper behavior of batches containing EndTxn requests which span 2283 // ranges. 2284 func TestParallelCommitSplitFromQueryIntents(t *testing.T) { 2285 defer leaktest.AfterTest(t)() 2286 stopper := stop.NewStopper() 2287 defer stopper.Stop(context.Background()) 2288 2289 clock := hlc.NewClock(hlc.UnixNano, time.Nanosecond) 2290 rpcContext := rpc.NewInsecureTestingContext(clock, stopper) 2291 g := makeGossip(t, stopper, rpcContext) 2292 2293 keyA, keyB := roachpb.Key("a"), roachpb.Key("ab") 2294 put1 := roachpb.NewPut(keyA, roachpb.MakeValueFromString("val1")) 2295 put2 := roachpb.NewPut(keyB, roachpb.MakeValueFromString("val2")) 2296 qi := &roachpb.QueryIntentRequest{RequestHeader: roachpb.RequestHeader{Key: keyA}} 2297 et := &roachpb.EndTxnRequest{ 2298 RequestHeader: roachpb.RequestHeader{Key: keyA}, 2299 Commit: true, 2300 } 2301 etPar := &roachpb.EndTxnRequest{ 2302 RequestHeader: roachpb.RequestHeader{Key: keyA}, 2303 Commit: true, 2304 InFlightWrites: []roachpb.SequencedWrite{{Key: keyA, Sequence: 1}, {Key: keyB, Sequence: 2}}, 2305 } 2306 2307 testCases := []struct { 2308 name string 2309 reqs []roachpb.Request 2310 exp [][]roachpb.Method 2311 }{ 2312 { 2313 name: "no parallel commits or query intents", 2314 reqs: []roachpb.Request{put1, put2, et}, 2315 exp: [][]roachpb.Method{{roachpb.Put, roachpb.Put, roachpb.EndTxn}}, 2316 }, 2317 { 2318 name: "no parallel commits, but regular and pre-commit query intents", 2319 reqs: []roachpb.Request{qi, put1, put2, qi, et}, 2320 exp: [][]roachpb.Method{ 2321 {roachpb.QueryIntent, roachpb.Put, roachpb.Put, roachpb.QueryIntent, roachpb.EndTxn}, 2322 }, 2323 }, 2324 { 2325 name: "parallel commits without query intents", 2326 reqs: []roachpb.Request{put1, put2, etPar}, 2327 exp: [][]roachpb.Method{{roachpb.Put, roachpb.Put, roachpb.EndTxn}}, 2328 }, 2329 { 2330 name: "parallel commits with pre-commit query intents", 2331 reqs: []roachpb.Request{put1, put2, qi, qi, etPar}, 2332 exp: [][]roachpb.Method{ 2333 {roachpb.QueryIntent, roachpb.QueryIntent}, 2334 {roachpb.Put, roachpb.Put, roachpb.EndTxn}, 2335 }, 2336 }, 2337 { 2338 name: "parallel commits with regular query intents", 2339 reqs: []roachpb.Request{qi, put1, qi, put2, etPar}, 2340 exp: [][]roachpb.Method{ 2341 {roachpb.QueryIntent, roachpb.Put, roachpb.QueryIntent, roachpb.Put, roachpb.EndTxn}, 2342 }, 2343 }, 2344 { 2345 name: "parallel commits with regular and pre-commit query intents", 2346 reqs: []roachpb.Request{qi, put1, put2, qi, qi, qi, etPar}, 2347 exp: [][]roachpb.Method{ 2348 {roachpb.QueryIntent, roachpb.QueryIntent, roachpb.QueryIntent}, 2349 {roachpb.QueryIntent, roachpb.Put, roachpb.Put, roachpb.EndTxn}, 2350 }, 2351 }, 2352 } 2353 for _, test := range testCases { 2354 t.Run(test.name, func(t *testing.T) { 2355 var act [][]roachpb.Method 2356 var testFn simpleSendFn = func( 2357 _ context.Context, 2358 _ SendOptions, 2359 _ ReplicaSlice, 2360 ba roachpb.BatchRequest, 2361 ) (*roachpb.BatchResponse, error) { 2362 var cur []roachpb.Method 2363 for _, union := range ba.Requests { 2364 cur = append(cur, union.GetInner().Method()) 2365 } 2366 act = append(act, cur) 2367 return ba.CreateReply(), nil 2368 } 2369 2370 cfg := DistSenderConfig{ 2371 AmbientCtx: log.AmbientContext{Tracer: tracing.NewTracer()}, 2372 Clock: clock, 2373 RPCContext: rpcContext, 2374 TestingKnobs: ClientTestingKnobs{ 2375 TransportFactory: adaptSimpleTransport(testFn), 2376 }, 2377 RangeDescriptorDB: defaultMockRangeDescriptorDB, 2378 Settings: cluster.MakeTestingClusterSettings(), 2379 } 2380 ds := NewDistSender(cfg, g) 2381 ds.DisableParallelBatches() 2382 2383 // Send a batch request containing the requests. 2384 var ba roachpb.BatchRequest 2385 ba.Txn = &roachpb.Transaction{Name: "test"} 2386 ba.Add(test.reqs...) 2387 2388 if _, pErr := ds.Send(context.Background(), ba); pErr != nil { 2389 t.Fatal(pErr) 2390 } 2391 2392 for j, batchMethods := range act { 2393 if !reflect.DeepEqual(test.exp[j], batchMethods) { 2394 t.Fatalf("expected [%d] %v, got %v", j, test.exp[j], batchMethods) 2395 } 2396 } 2397 }) 2398 } 2399 } 2400 2401 // TestParallelCommitsDetectIntentMissingCause tests the functionality in 2402 // DistSender.detectIntentMissingDueToIntentResolution. 2403 func TestParallelCommitsDetectIntentMissingCause(t *testing.T) { 2404 defer leaktest.AfterTest(t)() 2405 stopper := stop.NewStopper() 2406 defer stopper.Stop(context.Background()) 2407 2408 clock := hlc.NewClock(hlc.UnixNano, time.Nanosecond) 2409 rpcContext := rpc.NewInsecureTestingContext(clock, stopper) 2410 g := makeGossip(t, stopper, rpcContext) 2411 2412 key := roachpb.Key("a") 2413 txn := roachpb.MakeTransaction( 2414 "test", key, roachpb.NormalUserPriority, 2415 clock.Now(), clock.MaxOffset().Nanoseconds(), 2416 ) 2417 2418 testCases := []struct { 2419 name string 2420 queryTxnFn func() (roachpb.TransactionStatus, error) 2421 expErr string 2422 }{ 2423 { 2424 name: "transaction record PENDING, real intent missing error", 2425 queryTxnFn: func() (roachpb.TransactionStatus, error) { 2426 return roachpb.PENDING, nil 2427 }, 2428 expErr: "intent missing", 2429 }, 2430 { 2431 name: "transaction record STAGING, real intent missing error", 2432 queryTxnFn: func() (roachpb.TransactionStatus, error) { 2433 return roachpb.STAGING, nil 2434 }, 2435 expErr: "intent missing", 2436 }, 2437 { 2438 name: "transaction record COMMITTED, intent missing error caused by intent resolution", 2439 queryTxnFn: func() (roachpb.TransactionStatus, error) { 2440 return roachpb.COMMITTED, nil 2441 }, 2442 }, 2443 { 2444 name: "transaction record ABORTED, ambiguous intent missing error", 2445 queryTxnFn: func() (roachpb.TransactionStatus, error) { 2446 return roachpb.ABORTED, nil 2447 }, 2448 expErr: "result is ambiguous (intent missing and record aborted)", 2449 }, 2450 { 2451 name: "QueryTxn error, unresolved ambiguity", 2452 queryTxnFn: func() (roachpb.TransactionStatus, error) { 2453 return 0, errors.New("unable to query txn") 2454 }, 2455 expErr: "result is ambiguous (error=unable to query txn [intent missing])", 2456 }, 2457 } 2458 for _, test := range testCases { 2459 t.Run(test.name, func(t *testing.T) { 2460 var testFn simpleSendFn = func( 2461 _ context.Context, 2462 _ SendOptions, 2463 _ ReplicaSlice, 2464 ba roachpb.BatchRequest, 2465 ) (*roachpb.BatchResponse, error) { 2466 br := ba.CreateReply() 2467 switch ba.Requests[0].GetInner().Method() { 2468 case roachpb.QueryIntent: 2469 br.Error = roachpb.NewError(roachpb.NewIntentMissingError(key, nil)) 2470 case roachpb.QueryTxn: 2471 status, err := test.queryTxnFn() 2472 if err != nil { 2473 br.Error = roachpb.NewError(err) 2474 } else { 2475 respTxn := txn 2476 respTxn.Status = status 2477 br.Responses[0].GetQueryTxn().QueriedTxn = respTxn 2478 } 2479 case roachpb.EndTxn: 2480 br.Txn = ba.Txn.Clone() 2481 br.Txn.Status = roachpb.STAGING 2482 } 2483 return br, nil 2484 } 2485 2486 cfg := DistSenderConfig{ 2487 AmbientCtx: log.AmbientContext{Tracer: tracing.NewTracer()}, 2488 Clock: clock, 2489 RPCContext: rpcContext, 2490 TestingKnobs: ClientTestingKnobs{ 2491 TransportFactory: adaptSimpleTransport(testFn), 2492 }, 2493 RangeDescriptorDB: defaultMockRangeDescriptorDB, 2494 Settings: cluster.MakeTestingClusterSettings(), 2495 } 2496 ds := NewDistSender(cfg, g) 2497 2498 // Send a parallel commit batch request. 2499 var ba roachpb.BatchRequest 2500 ba.Txn = txn.Clone() 2501 ba.Add(&roachpb.QueryIntentRequest{ 2502 RequestHeader: roachpb.RequestHeader{Key: key}, 2503 Txn: txn.TxnMeta, 2504 ErrorIfMissing: true, 2505 }) 2506 ba.Add(&roachpb.EndTxnRequest{ 2507 RequestHeader: roachpb.RequestHeader{Key: key}, 2508 Commit: true, 2509 InFlightWrites: []roachpb.SequencedWrite{{Key: key, Sequence: 1}}, 2510 }) 2511 2512 // Verify that the response is expected. 2513 _, pErr := ds.Send(context.Background(), ba) 2514 if test.expErr == "" { 2515 if pErr != nil { 2516 t.Fatalf("unexpected error %v", pErr) 2517 } 2518 } else { 2519 if !testutils.IsPError(pErr, regexp.QuoteMeta(test.expErr)) { 2520 t.Fatalf("expected error %q; found %v", test.expErr, pErr) 2521 } 2522 } 2523 }) 2524 } 2525 } 2526 2527 func TestCountRanges(t *testing.T) { 2528 defer leaktest.AfterTest(t)() 2529 stopper := stop.NewStopper() 2530 defer stopper.Stop(context.Background()) 2531 2532 clock := hlc.NewClock(hlc.UnixNano, time.Nanosecond) 2533 rpcContext := rpc.NewInsecureTestingContext(clock, stopper) 2534 g := makeGossip(t, stopper, rpcContext) 2535 // Create a slice of fake descriptors. 2536 const numDescriptors = 9 2537 const firstKeyBoundary = 'a' 2538 var descriptors [numDescriptors]roachpb.RangeDescriptor 2539 for i := range descriptors { 2540 startKey := testMetaEndKey 2541 if i > 0 { 2542 startKey = roachpb.RKey(string(firstKeyBoundary + i - 1)) 2543 } 2544 endKey := roachpb.RKeyMax 2545 if i < len(descriptors)-1 { 2546 endKey = roachpb.RKey(string(firstKeyBoundary + i)) 2547 } 2548 2549 descriptors[i] = roachpb.RangeDescriptor{ 2550 RangeID: roachpb.RangeID(i + 2), 2551 StartKey: startKey, 2552 EndKey: endKey, 2553 InternalReplicas: []roachpb.ReplicaDescriptor{ 2554 { 2555 NodeID: 1, 2556 StoreID: 1, 2557 }, 2558 }, 2559 } 2560 } 2561 2562 // Mock out descriptor DB and sender function. 2563 descDB := mockRangeDescriptorDBForDescs(append(descriptors[:], testMetaRangeDescriptor)...) 2564 cfg := DistSenderConfig{ 2565 AmbientCtx: log.AmbientContext{Tracer: tracing.NewTracer()}, 2566 Clock: clock, 2567 RPCContext: rpcContext, 2568 TestingKnobs: ClientTestingKnobs{ 2569 TransportFactory: adaptSimpleTransport(stubRPCSendFn), 2570 }, 2571 RangeDescriptorDB: descDB, 2572 Settings: cluster.MakeTestingClusterSettings(), 2573 } 2574 ds := NewDistSender(cfg, g) 2575 2576 // Verify counted ranges. 2577 keyIn := func(desc roachpb.RangeDescriptor) roachpb.RKey { 2578 return append(desc.StartKey, 'a') 2579 } 2580 testcases := []struct { 2581 key roachpb.RKey 2582 endKey roachpb.RKey 2583 count int64 2584 }{ 2585 {testMetaEndKey, roachpb.RKey(string(firstKeyBoundary)), 1}, 2586 {testMetaEndKey, keyIn(descriptors[0]), 1}, 2587 {testMetaEndKey, descriptors[len(descriptors)-1].StartKey, numDescriptors - 1}, 2588 {descriptors[0].EndKey, roachpb.RKeyMax, numDescriptors - 1}, 2589 // Everything from the min key to a key within the last range. 2590 {testMetaEndKey, keyIn(descriptors[len(descriptors)-1]), numDescriptors}, 2591 {testMetaEndKey, roachpb.RKeyMax, numDescriptors}, 2592 } 2593 for i, tc := range testcases { 2594 count, pErr := ds.CountRanges(context.Background(), roachpb.RSpan{Key: tc.key, EndKey: tc.endKey}) 2595 if pErr != nil { 2596 t.Fatalf("%d: %s", i, pErr) 2597 } 2598 if a, e := count, tc.count; a != e { 2599 t.Errorf("%d: # of ranges %d != expected %d", i, a, e) 2600 } 2601 } 2602 } 2603 2604 func TestSenderTransport(t *testing.T) { 2605 defer leaktest.AfterTest(t)() 2606 transport, err := SenderTransportFactory( 2607 tracing.NewTracer(), 2608 kv.SenderFunc( 2609 func( 2610 _ context.Context, 2611 _ roachpb.BatchRequest, 2612 ) (r *roachpb.BatchResponse, e *roachpb.Error) { 2613 return 2614 }, 2615 ))(SendOptions{}, &nodedialer.Dialer{}, ReplicaSlice{{}}) 2616 if err != nil { 2617 t.Fatal(err) 2618 } 2619 _, err = transport.SendNext(context.Background(), roachpb.BatchRequest{}) 2620 if err != nil { 2621 t.Fatal(err) 2622 } 2623 if !transport.IsExhausted() { 2624 t.Fatalf("transport is not exhausted") 2625 } 2626 } 2627 2628 func TestGatewayNodeID(t *testing.T) { 2629 defer leaktest.AfterTest(t)() 2630 stopper := stop.NewStopper() 2631 defer stopper.Stop(context.Background()) 2632 2633 clock := hlc.NewClock(hlc.UnixNano, time.Nanosecond) 2634 rpcContext := rpc.NewInsecureTestingContext(clock, stopper) 2635 g := makeGossip(t, stopper, rpcContext) 2636 const expNodeID = 42 2637 nd := newNodeDesc(expNodeID) 2638 g.NodeID.Reset(nd.NodeID) 2639 if err := g.SetNodeDescriptor(nd); err != nil { 2640 t.Fatal(err) 2641 } 2642 if err := g.AddInfoProto(gossip.MakeNodeIDKey(expNodeID), nd, time.Hour); err != nil { 2643 t.Fatal(err) 2644 } 2645 2646 var observedNodeID roachpb.NodeID 2647 var testFn simpleSendFn = func( 2648 _ context.Context, 2649 _ SendOptions, 2650 _ ReplicaSlice, 2651 ba roachpb.BatchRequest, 2652 ) (*roachpb.BatchResponse, error) { 2653 observedNodeID = ba.Header.GatewayNodeID 2654 return ba.CreateReply(), nil 2655 } 2656 2657 cfg := DistSenderConfig{ 2658 AmbientCtx: log.AmbientContext{Tracer: tracing.NewTracer()}, 2659 Clock: clock, 2660 RPCContext: rpcContext, 2661 TestingKnobs: ClientTestingKnobs{ 2662 TransportFactory: adaptSimpleTransport(testFn), 2663 }, 2664 RangeDescriptorDB: defaultMockRangeDescriptorDB, 2665 Settings: cluster.MakeTestingClusterSettings(), 2666 } 2667 ds := NewDistSender(cfg, g) 2668 var ba roachpb.BatchRequest 2669 ba.Add(roachpb.NewPut(roachpb.Key("a"), roachpb.MakeValueFromString("value"))) 2670 if _, err := ds.Send(context.Background(), ba); err != nil { 2671 t.Fatalf("put encountered error: %s", err) 2672 } 2673 if observedNodeID != expNodeID { 2674 t.Errorf("got GatewayNodeID=%d, want %d", observedNodeID, expNodeID) 2675 } 2676 } 2677 2678 // TestMultipleErrorsMerged tests that DistSender prioritizes errors that are 2679 // returned from concurrent partial batches and returns the "best" one after 2680 // merging the transaction metadata passed on the errors. 2681 func TestMultipleErrorsMerged(t *testing.T) { 2682 defer leaktest.AfterTest(t)() 2683 stopper := stop.NewStopper() 2684 defer stopper.Stop(context.Background()) 2685 2686 clock := hlc.NewClock(hlc.UnixNano, time.Nanosecond) 2687 rpcContext := rpc.NewInsecureTestingContext(clock, stopper) 2688 g := makeGossip(t, stopper, rpcContext) 2689 2690 if err := g.SetNodeDescriptor(newNodeDesc(1)); err != nil { 2691 t.Fatal(err) 2692 } 2693 nd := &roachpb.NodeDescriptor{ 2694 NodeID: roachpb.NodeID(1), 2695 Address: util.MakeUnresolvedAddr(testAddress.Network(), testAddress.String()), 2696 } 2697 if err := g.AddInfoProto(gossip.MakeNodeIDKey(roachpb.NodeID(1)), nd, time.Hour); err != nil { 2698 t.Fatal(err) 2699 } 2700 2701 // Fill MockRangeDescriptorDB with two descriptors. 2702 var descriptor1 = roachpb.RangeDescriptor{ 2703 RangeID: 2, 2704 StartKey: testMetaEndKey, 2705 EndKey: roachpb.RKey("b"), 2706 InternalReplicas: []roachpb.ReplicaDescriptor{ 2707 { 2708 NodeID: 1, 2709 StoreID: 1, 2710 }, 2711 }, 2712 } 2713 var descriptor2 = roachpb.RangeDescriptor{ 2714 RangeID: 3, 2715 StartKey: roachpb.RKey("b"), 2716 EndKey: roachpb.RKeyMax, 2717 InternalReplicas: []roachpb.ReplicaDescriptor{ 2718 { 2719 NodeID: 1, 2720 StoreID: 1, 2721 }, 2722 }, 2723 } 2724 descDB := mockRangeDescriptorDBForDescs( 2725 testMetaRangeDescriptor, 2726 descriptor1, 2727 descriptor2, 2728 ) 2729 2730 txn := roachpb.MakeTransaction( 2731 "test", nil /* baseKey */, roachpb.NormalUserPriority, 2732 clock.Now(), clock.MaxOffset().Nanoseconds(), 2733 ) 2734 // We're also going to check that the highest bumped WriteTimestamp makes it 2735 // to the merged error. 2736 err1WriteTimestamp := txn.WriteTimestamp.Add(100, 0) 2737 err2WriteTimestamp := txn.WriteTimestamp.Add(200, 0) 2738 2739 retryErr := roachpb.NewTransactionRetryError(roachpb.RETRY_SERIALIZABLE, "test err") 2740 abortErr := roachpb.NewTransactionAbortedError(roachpb.ABORT_REASON_ABORTED_RECORD_FOUND) 2741 conditionFailedErr := &roachpb.ConditionFailedError{} 2742 sendErr := &roachpb.SendError{} 2743 ambiguousErr := &roachpb.AmbiguousResultError{} 2744 randomErr := &roachpb.IntegerOverflowError{} 2745 2746 testCases := []struct { 2747 err1, err2 error 2748 expErr string 2749 }{ 2750 { 2751 err1: retryErr, 2752 err2: nil, 2753 expErr: "TransactionRetryError: retry txn (RETRY_SERIALIZABLE - test err)", 2754 }, 2755 { 2756 err1: abortErr, 2757 err2: nil, 2758 expErr: "TransactionAbortedError(ABORT_REASON_ABORTED_RECORD_FOUND)", 2759 }, 2760 { 2761 err1: conditionFailedErr, 2762 err2: nil, 2763 expErr: "unexpected value", 2764 }, 2765 { 2766 err1: retryErr, 2767 err2: retryErr, 2768 expErr: "TransactionRetryError: retry txn (RETRY_SERIALIZABLE - test err)", 2769 }, 2770 { 2771 err1: retryErr, 2772 err2: abortErr, 2773 expErr: "TransactionAbortedError(ABORT_REASON_ABORTED_RECORD_FOUND)", 2774 }, 2775 { 2776 err1: abortErr, 2777 err2: abortErr, 2778 expErr: "TransactionAbortedError(ABORT_REASON_ABORTED_RECORD_FOUND)", 2779 }, 2780 { 2781 err1: retryErr, 2782 err2: conditionFailedErr, 2783 expErr: "unexpected value", 2784 }, 2785 { 2786 err1: abortErr, 2787 err2: conditionFailedErr, 2788 expErr: "TransactionAbortedError(ABORT_REASON_ABORTED_RECORD_FOUND)", 2789 }, 2790 { 2791 err1: conditionFailedErr, 2792 err2: conditionFailedErr, 2793 expErr: "unexpected value", 2794 }, 2795 // ConditionFailedError has a low score since it's "not ambiguous". We want 2796 // ambiguity to be infectious, so most things have a higher score. 2797 { 2798 err1: conditionFailedErr, 2799 err2: ambiguousErr, 2800 expErr: "result is ambiguous", 2801 }, 2802 { 2803 err1: conditionFailedErr, 2804 err2: sendErr, 2805 expErr: "failed to send RPC", 2806 }, 2807 { 2808 err1: conditionFailedErr, 2809 err2: randomErr, 2810 expErr: "results in overflow", 2811 }, 2812 } 2813 for i, tc := range testCases { 2814 t.Run(strconv.Itoa(i), func(t *testing.T) { 2815 // We run every test case twice, to make sure error merging is commutative. 2816 testutils.RunTrueAndFalse(t, "reverse", func(t *testing.T, reverse bool) { 2817 if reverse { 2818 // Switch the order of errors. 2819 err1 := tc.err1 2820 err2 := tc.err2 2821 tc.err1 = err2 2822 tc.err2 = err1 2823 } 2824 2825 var testFn simpleSendFn = func( 2826 _ context.Context, 2827 _ SendOptions, 2828 _ ReplicaSlice, 2829 ba roachpb.BatchRequest, 2830 ) (*roachpb.BatchResponse, error) { 2831 reply := ba.CreateReply() 2832 if delRng := ba.Requests[0].GetDeleteRange(); delRng == nil { 2833 return nil, errors.Errorf("expected DeleteRange request, found %v", ba.Requests[0]) 2834 } else if delRng.Key.Equal(roachpb.Key("a")) { 2835 if tc.err1 != nil { 2836 errTxn := ba.Txn.Clone() 2837 errTxn.WriteTimestamp = err1WriteTimestamp 2838 reply.Error = roachpb.NewErrorWithTxn(tc.err1, errTxn) 2839 } 2840 } else if delRng.Key.Equal(roachpb.Key("b")) { 2841 if tc.err2 != nil { 2842 errTxn := ba.Txn.Clone() 2843 errTxn.WriteTimestamp = err2WriteTimestamp 2844 reply.Error = roachpb.NewErrorWithTxn(tc.err2, errTxn) 2845 } 2846 } else { 2847 return nil, errors.Errorf("unexpected DeleteRange boundaries") 2848 } 2849 return reply, nil 2850 } 2851 2852 cfg := DistSenderConfig{ 2853 AmbientCtx: log.AmbientContext{Tracer: tracing.NewTracer()}, 2854 Clock: clock, 2855 RPCContext: rpcContext, 2856 TestingKnobs: ClientTestingKnobs{ 2857 TransportFactory: adaptSimpleTransport(testFn), 2858 }, 2859 RangeDescriptorDB: descDB, 2860 Settings: cluster.MakeTestingClusterSettings(), 2861 RPCRetryOptions: &retry.Options{MaxRetries: 1}, 2862 } 2863 ds := NewDistSender(cfg, g) 2864 2865 var ba roachpb.BatchRequest 2866 ba.Txn = txn.Clone() 2867 ba.Add(roachpb.NewDeleteRange(roachpb.Key("a"), roachpb.Key("c"), false /* returnKeys */)) 2868 2869 expWriteTimestamp := txn.WriteTimestamp 2870 if tc.err1 != nil { 2871 expWriteTimestamp = err1WriteTimestamp 2872 } 2873 if tc.err2 != nil { 2874 expWriteTimestamp = err2WriteTimestamp 2875 } 2876 2877 if _, pErr := ds.Send(context.Background(), ba); pErr == nil { 2878 t.Fatalf("expected an error to be returned from distSender") 2879 } else if !testutils.IsPError(pErr, regexp.QuoteMeta(tc.expErr)) { 2880 t.Fatalf("expected error %q; found %v", tc.expErr, pErr) 2881 } else if !pErr.GetTxn().WriteTimestamp.Equal(expWriteTimestamp) { 2882 t.Fatalf("expected bumped ts %s, got: %s", expWriteTimestamp, pErr.GetTxn().WriteTimestamp) 2883 } 2884 }) 2885 }) 2886 } 2887 } 2888 2889 // Regression test for #20067. 2890 // If a batch is partitioned into multiple partial batches, the 2891 // roachpb.Error.Index of each batch should correspond to its original index in 2892 // the overall batch. 2893 func TestErrorIndexAlignment(t *testing.T) { 2894 defer leaktest.AfterTest(t)() 2895 stopper := stop.NewStopper() 2896 defer stopper.Stop(context.Background()) 2897 2898 clock := hlc.NewClock(hlc.UnixNano, time.Nanosecond) 2899 rpcContext := rpc.NewInsecureTestingContext(clock, stopper) 2900 g := makeGossip(t, stopper, rpcContext) 2901 2902 if err := g.SetNodeDescriptor(newNodeDesc(1)); err != nil { 2903 t.Fatal(err) 2904 } 2905 nd := &roachpb.NodeDescriptor{ 2906 NodeID: roachpb.NodeID(1), 2907 Address: util.MakeUnresolvedAddr(testAddress.Network(), testAddress.String()), 2908 } 2909 if err := g.AddInfoProto(gossip.MakeNodeIDKey(roachpb.NodeID(1)), nd, time.Hour); err != nil { 2910 t.Fatal(err) 2911 } 2912 2913 // Fill MockRangeDescriptorDB with two descriptors. 2914 var descriptor1 = roachpb.RangeDescriptor{ 2915 RangeID: 2, 2916 StartKey: testMetaEndKey, 2917 EndKey: roachpb.RKey("b"), 2918 InternalReplicas: []roachpb.ReplicaDescriptor{ 2919 { 2920 NodeID: 1, 2921 StoreID: 1, 2922 }, 2923 }, 2924 } 2925 var descriptor2 = roachpb.RangeDescriptor{ 2926 RangeID: 3, 2927 StartKey: roachpb.RKey("b"), 2928 EndKey: roachpb.RKey("c"), 2929 InternalReplicas: []roachpb.ReplicaDescriptor{ 2930 { 2931 NodeID: 1, 2932 StoreID: 1, 2933 }, 2934 }, 2935 } 2936 var descriptor3 = roachpb.RangeDescriptor{ 2937 RangeID: 4, 2938 StartKey: roachpb.RKey("c"), 2939 EndKey: roachpb.RKeyMax, 2940 InternalReplicas: []roachpb.ReplicaDescriptor{ 2941 { 2942 NodeID: 1, 2943 StoreID: 1, 2944 }, 2945 }, 2946 } 2947 2948 // The 1st partial batch has 1 request. 2949 // The 2nd partial batch has 2 requests. 2950 // The 3rd partial batch has 1 request. 2951 // Each test case returns an error for the first request of the nth 2952 // partial batch. 2953 testCases := []struct { 2954 // The nth request to return an error. 2955 nthPartialBatch int 2956 expectedFinalIdx int32 2957 }{ 2958 {0, 0}, 2959 {1, 1}, 2960 {2, 3}, 2961 } 2962 2963 descDB := mockRangeDescriptorDBForDescs( 2964 testMetaRangeDescriptor, 2965 descriptor1, 2966 descriptor2, 2967 descriptor3, 2968 ) 2969 2970 for i, tc := range testCases { 2971 t.Run(strconv.Itoa(i), func(t *testing.T) { 2972 nthRequest := 0 2973 2974 var testFn simpleSendFn = func( 2975 _ context.Context, 2976 _ SendOptions, 2977 _ ReplicaSlice, 2978 ba roachpb.BatchRequest, 2979 ) (*roachpb.BatchResponse, error) { 2980 reply := ba.CreateReply() 2981 if nthRequest == tc.nthPartialBatch { 2982 reply.Error = &roachpb.Error{ 2983 // The relative index is always 0 since 2984 // we return an error for the first 2985 // request of the nthPartialBatch. 2986 Index: &roachpb.ErrPosition{Index: 0}, 2987 } 2988 } 2989 nthRequest++ 2990 return reply, nil 2991 } 2992 2993 cfg := DistSenderConfig{ 2994 AmbientCtx: log.AmbientContext{Tracer: tracing.NewTracer()}, 2995 Clock: clock, 2996 RPCContext: rpcContext, 2997 TestingKnobs: ClientTestingKnobs{ 2998 TransportFactory: adaptSimpleTransport(testFn), 2999 }, 3000 RangeDescriptorDB: descDB, 3001 Settings: cluster.MakeTestingClusterSettings(), 3002 } 3003 ds := NewDistSender(cfg, g) 3004 ds.DisableParallelBatches() 3005 3006 var ba roachpb.BatchRequest 3007 ba.Txn = &roachpb.Transaction{Name: "test"} 3008 // First batch has 1 request. 3009 val := roachpb.MakeValueFromString("val") 3010 ba.Add(roachpb.NewPut(roachpb.Key("a"), val)) 3011 3012 // Second batch has 2 requests. 3013 val = roachpb.MakeValueFromString("val") 3014 ba.Add(roachpb.NewPut(roachpb.Key("b"), val)) 3015 val = roachpb.MakeValueFromString("val") 3016 ba.Add(roachpb.NewPut(roachpb.Key("bb"), val)) 3017 3018 // Third batch has 1 request. 3019 val = roachpb.MakeValueFromString("val") 3020 ba.Add(roachpb.NewPut(roachpb.Key("c"), val)) 3021 3022 _, pErr := ds.Send(context.Background(), ba) 3023 if pErr == nil { 3024 t.Fatalf("expected an error to be returned from distSender") 3025 } 3026 if pErr.Index.Index != tc.expectedFinalIdx { 3027 t.Errorf("expected error index to be %d, instead got %d", tc.expectedFinalIdx, pErr.Index.Index) 3028 } 3029 }) 3030 } 3031 } 3032 3033 // TestCanSendToFollower tests that the DistSender abides by the result it 3034 // get from CanSendToFollower. 3035 func TestCanSendToFollower(t *testing.T) { 3036 defer leaktest.AfterTest(t)() 3037 stopper := stop.NewStopper() 3038 defer stopper.Stop(context.Background()) 3039 3040 old := CanSendToFollower 3041 defer func() { CanSendToFollower = old }() 3042 canSend := true 3043 CanSendToFollower = func(_ uuid.UUID, _ *cluster.Settings, ba roachpb.BatchRequest) bool { 3044 return !ba.IsLocking() && canSend 3045 } 3046 3047 clock := hlc.NewClock(hlc.UnixNano, time.Nanosecond) 3048 rpcContext := rpc.NewInsecureTestingContext(clock, stopper) 3049 g := makeGossip(t, stopper, rpcContext) 3050 leaseHolders := testUserRangeDescriptor3Replicas.InternalReplicas 3051 for _, n := range leaseHolders { 3052 if err := g.AddInfoProto( 3053 gossip.MakeNodeIDKey(n.NodeID), 3054 newNodeDesc(n.NodeID), 3055 gossip.NodeDescriptorTTL, 3056 ); err != nil { 3057 t.Fatal(err) 3058 } 3059 } 3060 var sentTo ReplicaInfo 3061 var testFn simpleSendFn = func( 3062 _ context.Context, 3063 _ SendOptions, 3064 r ReplicaSlice, 3065 args roachpb.BatchRequest, 3066 ) (*roachpb.BatchResponse, error) { 3067 sentTo = r[0] 3068 return args.CreateReply(), nil 3069 } 3070 cfg := DistSenderConfig{ 3071 AmbientCtx: log.AmbientContext{Tracer: tracing.NewTracer()}, 3072 Clock: clock, 3073 RPCContext: rpcContext, 3074 TestingKnobs: ClientTestingKnobs{ 3075 TransportFactory: adaptSimpleTransport(testFn), 3076 }, 3077 RangeDescriptorDB: threeReplicaMockRangeDescriptorDB, 3078 NodeDialer: nodedialer.New(rpcContext, gossip.AddressResolver(g)), 3079 RPCRetryOptions: &retry.Options{ 3080 InitialBackoff: time.Microsecond, 3081 MaxBackoff: time.Microsecond, 3082 }, 3083 Settings: cluster.MakeTestingClusterSettings(), 3084 } 3085 for i, c := range []struct { 3086 canSendToFollower bool 3087 header roachpb.Header 3088 msg roachpb.Request 3089 expectedNode roachpb.NodeID 3090 }{ 3091 { 3092 true, 3093 roachpb.Header{ 3094 Txn: &roachpb.Transaction{}, 3095 }, 3096 roachpb.NewPut(roachpb.Key("a"), roachpb.Value{}), 3097 2, 3098 }, 3099 { 3100 true, 3101 roachpb.Header{ 3102 Txn: &roachpb.Transaction{}, 3103 }, 3104 roachpb.NewGet(roachpb.Key("a")), 3105 1, 3106 }, 3107 { 3108 true, 3109 roachpb.Header{}, 3110 roachpb.NewGet(roachpb.Key("a")), 3111 1, 3112 }, 3113 { 3114 false, 3115 roachpb.Header{}, 3116 roachpb.NewGet(roachpb.Key("a")), 3117 2, 3118 }, 3119 } { 3120 t.Run("", func(t *testing.T) { 3121 sentTo = ReplicaInfo{} 3122 canSend = c.canSendToFollower 3123 ds := NewDistSender(cfg, g) 3124 ds.clusterID = &base.ClusterIDContainer{} 3125 // set 2 to be the leaseholder 3126 ds.LeaseHolderCache().Update(context.Background(), 2 /* rangeID */, 2 /* storeID */) 3127 _, pErr := kv.SendWrappedWith(context.Background(), ds, c.header, c.msg) 3128 require.Nil(t, pErr) 3129 if sentTo.NodeID != c.expectedNode { 3130 t.Fatalf("%d: unexpected replica: %v != %v", i, sentTo.NodeID, c.expectedNode) 3131 } 3132 // Check that the leaseholder cache doesn't change, even if the request is 3133 // served by a follower. This tests a regression for a bug we've had where 3134 // we were always updating the leaseholder cache on successful RPCs 3135 // because we erroneously assumed that a success must come from the 3136 // leaseholder. 3137 storeID, ok := ds.LeaseHolderCache().Lookup(context.Background(), 2 /* rangeID */) 3138 require.True(t, ok) 3139 require.Equal(t, roachpb.StoreID(2), storeID) 3140 }) 3141 } 3142 } 3143 3144 // TestEvictMetaRange tests that a query on a stale meta2 range should evict it 3145 // from the cache. 3146 func TestEvictMetaRange(t *testing.T) { 3147 defer leaktest.AfterTest(t)() 3148 stopper := stop.NewStopper() 3149 defer stopper.Stop(context.Background()) 3150 3151 testutils.RunTrueAndFalse(t, "hasSuggestedRange", func(t *testing.T, hasSuggestedRange bool) { 3152 splitKey := keys.RangeMetaKey(roachpb.RKey("b")) 3153 3154 testMeta1RangeDescriptor := testMetaRangeDescriptor 3155 testMeta1RangeDescriptor.EndKey = roachpb.RKey(keys.Meta2Prefix) 3156 3157 testMeta2RangeDescriptor1 := testMetaRangeDescriptor 3158 testMeta2RangeDescriptor1.RangeID = 2 3159 testMeta2RangeDescriptor1.StartKey = roachpb.RKey(keys.Meta2Prefix) 3160 3161 testMeta2RangeDescriptor2 := testMetaRangeDescriptor 3162 testMeta2RangeDescriptor2.RangeID = 3 3163 testMeta2RangeDescriptor2.StartKey = roachpb.RKey(keys.Meta2Prefix) 3164 3165 testUserRangeDescriptor1 := roachpb.RangeDescriptor{ 3166 RangeID: 4, 3167 StartKey: roachpb.RKey("a"), 3168 EndKey: roachpb.RKey("b"), 3169 InternalReplicas: []roachpb.ReplicaDescriptor{ 3170 { 3171 NodeID: 1, 3172 StoreID: 1, 3173 }, 3174 }, 3175 } 3176 3177 testUserRangeDescriptor2 := roachpb.RangeDescriptor{ 3178 RangeID: 5, 3179 StartKey: roachpb.RKey("b"), 3180 EndKey: roachpb.RKey("c"), 3181 InternalReplicas: []roachpb.ReplicaDescriptor{ 3182 { 3183 NodeID: 1, 3184 StoreID: 1, 3185 }, 3186 }, 3187 } 3188 3189 clock := hlc.NewClock(hlc.UnixNano, time.Nanosecond) 3190 rpcContext := rpc.NewInsecureTestingContext(clock, stopper) 3191 g := makeGossip(t, stopper, rpcContext) 3192 if err := g.AddInfoProto(gossip.KeyFirstRangeDescriptor, &testMeta1RangeDescriptor, time.Hour); err != nil { 3193 t.Fatal(err) 3194 } 3195 3196 isStale := false 3197 3198 var testFn simpleSendFn = func( 3199 _ context.Context, 3200 _ SendOptions, 3201 _ ReplicaSlice, 3202 ba roachpb.BatchRequest, 3203 ) (*roachpb.BatchResponse, error) { 3204 rs, err := keys.Range(ba.Requests) 3205 if err != nil { 3206 t.Fatal(err) 3207 } 3208 if !kv.TestingIsRangeLookup(ba) { 3209 return ba.CreateReply(), nil 3210 } 3211 3212 if bytes.HasPrefix(rs.Key, keys.Meta1Prefix) { 3213 // Querying meta 1 range. 3214 br := &roachpb.BatchResponse{} 3215 r := &roachpb.ScanResponse{} 3216 var kv roachpb.KeyValue 3217 if rs.Key.Equal(keys.RangeMetaKey(keys.RangeMetaKey(roachpb.RKey("a")).Next()).Next()) { 3218 // Scan request is [/Meta1/a - /Meta2), so return the first meta1 3219 // range. 3220 if err := kv.Value.SetProto(&testMeta2RangeDescriptor1); err != nil { 3221 t.Fatal(err) 3222 } 3223 } else { 3224 // Scan request is [/Meta1/b - /Meta2), so return the second meta1 3225 // range. This is needed when no SuggestedRange is returned from the 3226 // RangeKeyMismatch error and an additional lookup is needed to 3227 // determine the correct meta2 range descriptor. 3228 if err := kv.Value.SetProto(&testMeta2RangeDescriptor2); err != nil { 3229 t.Fatal(err) 3230 } 3231 } 3232 r.Rows = append(r.Rows, kv) 3233 br.Add(r) 3234 return br, nil 3235 } 3236 // Querying meta2 range. 3237 br := &roachpb.BatchResponse{} 3238 r := &roachpb.ScanResponse{} 3239 var kv roachpb.KeyValue 3240 if rs.Key.Equal(keys.RangeMetaKey(roachpb.RKey("a")).Next()) { 3241 // Scan request is [/Meta2/a - /Meta2/b), so return the first 3242 // user range descriptor. 3243 if err := kv.Value.SetProto(&testUserRangeDescriptor1); err != nil { 3244 t.Fatal(err) 3245 } 3246 } else if isStale { 3247 // Scan request is [/Meta2/b - /Meta2/c). Since we simulate a split of 3248 // [/Meta2 - /System) into [/Meta2 - /Meta2/a) and [/Meta2/b - /System) 3249 // and we sent the batch request to the stale cached meta2 range 3250 // descriptor [/Meta2 - /Meta2/a), we return a RangeKeyMismatchError. We 3251 // test for two cases here: 3252 // 1) The SuggestedRange is supplied and the correct meta2 range is 3253 // directly inserted into the cache. 3254 // 2) The SuggestedRange is not supplied and we have to an additional 3255 // lookup in meta1 to determine the correct meta2 range. 3256 3257 // Simulate a split. 3258 testMeta2RangeDescriptor1.EndKey = splitKey 3259 testMeta2RangeDescriptor2.StartKey = splitKey 3260 isStale = false 3261 3262 reply := ba.CreateReply() 3263 // Return a RangeKeyMismatchError to simulate the range being stale. 3264 err := &roachpb.RangeKeyMismatchError{ 3265 RequestStartKey: rs.Key.AsRawKey(), 3266 RequestEndKey: rs.EndKey.AsRawKey(), 3267 MismatchedRange: testMeta2RangeDescriptor1, 3268 } 3269 if hasSuggestedRange { 3270 err.SuggestedRange = &testMeta2RangeDescriptor2 3271 } 3272 reply.Error = roachpb.NewError(err) 3273 return reply, nil 3274 } else { 3275 // Scan request is [/Meta2/b - /Meta2/c) and the range descriptor is 3276 // not stale, so return the second user range descriptor. 3277 if err := kv.Value.SetProto(&testUserRangeDescriptor2); err != nil { 3278 t.Fatal(err) 3279 } 3280 } 3281 r.Rows = append(r.Rows, kv) 3282 br.Add(r) 3283 return br, nil 3284 } 3285 3286 cfg := DistSenderConfig{ 3287 AmbientCtx: log.AmbientContext{Tracer: tracing.NewTracer()}, 3288 Clock: clock, 3289 RPCContext: rpcContext, 3290 TestingKnobs: ClientTestingKnobs{ 3291 TransportFactory: adaptSimpleTransport(testFn), 3292 }, 3293 NodeDialer: nodedialer.New(rpcContext, gossip.AddressResolver(g)), 3294 Settings: cluster.MakeTestingClusterSettings(), 3295 } 3296 ds := NewDistSender(cfg, g) 3297 3298 scan := roachpb.NewScan(roachpb.Key("a"), roachpb.Key("b"), false) 3299 if _, pErr := kv.SendWrapped(context.Background(), ds, scan); pErr != nil { 3300 t.Fatalf("scan encountered error: %s", pErr) 3301 } 3302 3303 // Verify that there is one meta2 cached range. 3304 cachedRange := ds.rangeCache.GetCachedRangeDescriptor(keys.RangeMetaKey(roachpb.RKey("a")), false) 3305 if !cachedRange.StartKey.Equal(keys.Meta2Prefix) || !cachedRange.EndKey.Equal(testMetaEndKey) { 3306 t.Fatalf("expected cached meta2 range to be [%s, %s), actual [%s, %s)", 3307 keys.Meta2Prefix, testMetaEndKey, cachedRange.StartKey, cachedRange.EndKey) 3308 } 3309 3310 // Simulate a split on the meta2 range and mark it as stale. 3311 isStale = true 3312 3313 scan = roachpb.NewScan(roachpb.Key("b"), roachpb.Key("c"), false) 3314 if _, pErr := kv.SendWrapped(context.Background(), ds, scan); pErr != nil { 3315 t.Fatalf("scan encountered error: %s", pErr) 3316 } 3317 3318 // Verify that there are two meta2 cached ranges. 3319 cachedRange = ds.rangeCache.GetCachedRangeDescriptor(keys.RangeMetaKey(roachpb.RKey("a")), false) 3320 if !cachedRange.StartKey.Equal(keys.Meta2Prefix) || !cachedRange.EndKey.Equal(splitKey) { 3321 t.Fatalf("expected cached meta2 range to be [%s, %s), actual [%s, %s)", 3322 keys.Meta2Prefix, splitKey, cachedRange.StartKey, cachedRange.EndKey) 3323 } 3324 cachedRange = ds.rangeCache.GetCachedRangeDescriptor(keys.RangeMetaKey(roachpb.RKey("b")), false) 3325 if !cachedRange.StartKey.Equal(splitKey) || !cachedRange.EndKey.Equal(testMetaEndKey) { 3326 t.Fatalf("expected cached meta2 range to be [%s, %s), actual [%s, %s)", 3327 splitKey, testMetaEndKey, cachedRange.StartKey, cachedRange.EndKey) 3328 } 3329 }) 3330 } 3331 3332 // TestConnectionClass verifies that the dist sender constructs a transport with 3333 // the appropriate class for a given resolved range. 3334 func TestConnectionClass(t *testing.T) { 3335 defer leaktest.AfterTest(t)() 3336 stopper := stop.NewStopper() 3337 defer stopper.Stop(context.Background()) 3338 // Create a mock range descriptor DB that can resolve made up meta1, node 3339 // liveness and user ranges. 3340 rDB := MockRangeDescriptorDB(func(key roachpb.RKey, _ bool) ( 3341 []roachpb.RangeDescriptor, []roachpb.RangeDescriptor, error, 3342 ) { 3343 if key.Equal(roachpb.KeyMin) { 3344 return []roachpb.RangeDescriptor{{ 3345 RangeID: 1, 3346 StartKey: roachpb.RKeyMin, 3347 EndKey: roachpb.RKey(keys.NodeLivenessPrefix), 3348 InternalReplicas: []roachpb.ReplicaDescriptor{ 3349 {NodeID: 1, StoreID: 1}, 3350 }, 3351 }}, nil, nil 3352 } else if bytes.HasPrefix(key, keys.NodeLivenessPrefix) { 3353 return []roachpb.RangeDescriptor{{ 3354 RangeID: 2, 3355 StartKey: roachpb.RKey(keys.NodeLivenessPrefix), 3356 EndKey: roachpb.RKey(keys.NodeLivenessKeyMax), 3357 InternalReplicas: []roachpb.ReplicaDescriptor{ 3358 {NodeID: 1, StoreID: 1}, 3359 }, 3360 }}, nil, nil 3361 } 3362 return []roachpb.RangeDescriptor{{ 3363 RangeID: 3, 3364 StartKey: roachpb.RKey(keys.NodeLivenessKeyMax), 3365 EndKey: roachpb.RKeyMax, 3366 InternalReplicas: []roachpb.ReplicaDescriptor{ 3367 {NodeID: 1, StoreID: 1}, 3368 }, 3369 }}, nil, nil 3370 }) 3371 // Verify that the request carries the class we expect it to for its span. 3372 verifyClass := func(class rpc.ConnectionClass, args roachpb.BatchRequest) { 3373 span, err := keys.Range(args.Requests) 3374 if assert.Nil(t, err) { 3375 assert.Equalf(t, rpc.ConnectionClassForKey(span.Key), class, 3376 "unexpected class for span key %v", span.Key) 3377 } 3378 } 3379 var testFn simpleSendFn = func( 3380 _ context.Context, 3381 opts SendOptions, 3382 replicas ReplicaSlice, 3383 args roachpb.BatchRequest, 3384 ) (*roachpb.BatchResponse, error) { 3385 verifyClass(opts.class, args) 3386 return args.CreateReply(), nil 3387 } 3388 clock := hlc.NewClock(hlc.UnixNano, time.Nanosecond) 3389 rpcContext := rpc.NewInsecureTestingContext(clock, stopper) 3390 g := makeGossip(t, stopper, rpcContext) 3391 cfg := DistSenderConfig{ 3392 AmbientCtx: log.AmbientContext{Tracer: tracing.NewTracer()}, 3393 Clock: clock, 3394 RPCContext: rpcContext, 3395 TestingKnobs: ClientTestingKnobs{ 3396 TransportFactory: adaptSimpleTransport(testFn), 3397 }, 3398 NodeDialer: nodedialer.New(rpcContext, gossip.AddressResolver(g)), 3399 RPCRetryOptions: &retry.Options{ 3400 MaxRetries: 1, 3401 }, 3402 RangeDescriptorDB: rDB, 3403 Settings: cluster.MakeTestingClusterSettings(), 3404 } 3405 ds := NewDistSender(cfg, g) 3406 3407 // Check the three important cases to ensure they are sent with the correct 3408 // ConnectionClass. 3409 for _, key := range []roachpb.Key{ 3410 keys.Meta1Prefix, 3411 keys.NodeLivenessKey(1), 3412 keys.SystemSQLCodec.TablePrefix(1234), // A non-system table 3413 } { 3414 t.Run(key.String(), func(t *testing.T) { 3415 var ba roachpb.BatchRequest 3416 ba.Add(&roachpb.GetRequest{ 3417 RequestHeader: roachpb.RequestHeader{ 3418 Key: key, 3419 }, 3420 }) 3421 _, err := ds.Send(context.Background(), ba) 3422 require.Nil(t, err) 3423 }) 3424 } 3425 } 3426 3427 // TestEvictionTokenCoalesce tests when two separate batch requests are a part 3428 // of the same stale range descriptor, they are coalesced when the range lookup 3429 // is retried. 3430 func TestEvictionTokenCoalesce(t *testing.T) { 3431 defer leaktest.AfterTest(t)() 3432 stopper := stop.NewStopper() 3433 defer stopper.Stop(context.Background()) 3434 3435 initGen := int64(1) 3436 testUserRangeDescriptor := roachpb.RangeDescriptor{ 3437 RangeID: 2, 3438 StartKey: roachpb.RKey("a"), 3439 EndKey: roachpb.RKey("d"), 3440 InternalReplicas: []roachpb.ReplicaDescriptor{ 3441 { 3442 NodeID: 1, 3443 StoreID: 1, 3444 }, 3445 }, 3446 Generation: initGen, 3447 } 3448 3449 clock := hlc.NewClock(hlc.UnixNano, time.Nanosecond) 3450 rpcContext := rpc.NewInsecureTestingContext(clock, stopper) 3451 g := makeGossip(t, stopper, rpcContext) 3452 if err := g.AddInfoProto(gossip.KeyFirstRangeDescriptor, &testMetaRangeDescriptor, time.Hour); err != nil { 3453 t.Fatal(err) 3454 } 3455 3456 sendErrors := int32(0) 3457 var queriedMetaKeys sync.Map 3458 3459 var ds *DistSender 3460 var testFn simpleSendFn = func( 3461 _ context.Context, 3462 _ SendOptions, 3463 _ ReplicaSlice, 3464 ba roachpb.BatchRequest, 3465 ) (*roachpb.BatchResponse, error) { 3466 rs, err := keys.Range(ba.Requests) 3467 br := ba.CreateReply() 3468 if err != nil { 3469 br.Error = roachpb.NewError(err) 3470 return br, nil 3471 } 3472 if !kv.TestingIsRangeLookup(ba) { 3473 // Return a SendError so DistSender retries the first range lookup in the 3474 // user key-space for both batches. 3475 if atomic.AddInt32(&sendErrors, 1) <= 2 { 3476 br.Error = roachpb.NewError(&roachpb.SendError{}) 3477 return br, nil 3478 } 3479 return br, nil 3480 } 3481 3482 if bytes.HasPrefix(rs.Key, keys.Meta1Prefix) { 3483 // Querying meta 1 range. 3484 br = &roachpb.BatchResponse{} 3485 r := &roachpb.ScanResponse{} 3486 var kv roachpb.KeyValue 3487 if err := kv.Value.SetProto(&testMetaRangeDescriptor); err != nil { 3488 br.Error = roachpb.NewError(err) 3489 return br, nil 3490 } 3491 r.Rows = append(r.Rows, kv) 3492 br.Add(r) 3493 return br, nil 3494 } 3495 // Querying meta2 range. 3496 br = &roachpb.BatchResponse{} 3497 r := &roachpb.ScanResponse{} 3498 var kv roachpb.KeyValue 3499 if err := kv.Value.SetProto(&testUserRangeDescriptor); err != nil { 3500 br.Error = roachpb.NewError(err) 3501 return br, nil 3502 } 3503 r.Rows = append(r.Rows, kv) 3504 br.Add(r) 3505 // The first query for each batch request key of the meta1 range should be 3506 // in separate requests because there is no prior eviction token. 3507 if _, ok := queriedMetaKeys.Load(string(rs.Key)); ok { 3508 // Wait until we have two in-flight requests. 3509 if err := testutils.SucceedsSoonError(func() error { 3510 // Since the previously fetched RangeDescriptor was ["a", "d"), the request keys 3511 // would be coalesced to "a". 3512 numCalls := ds.rangeCache.lookupRequests.NumCalls(fmt.Sprintf("a:false:%d", initGen)) 3513 if numCalls != 2 { 3514 return errors.Errorf("expected %d in-flight requests, got %d", 2, numCalls) 3515 } 3516 return nil 3517 }); err != nil { 3518 br.Error = roachpb.NewError(err) 3519 return br, nil 3520 } 3521 } 3522 queriedMetaKeys.Store(string(rs.Key), struct{}{}) 3523 return br, nil 3524 } 3525 3526 cfg := DistSenderConfig{ 3527 AmbientCtx: log.AmbientContext{Tracer: tracing.NewTracer()}, 3528 Clock: clock, 3529 RPCContext: rpcContext, 3530 TestingKnobs: ClientTestingKnobs{ 3531 TransportFactory: adaptSimpleTransport(testFn), 3532 }, 3533 NodeDialer: nodedialer.New(rpcContext, gossip.AddressResolver(g)), 3534 RPCRetryOptions: &retry.Options{ 3535 MaxRetries: 1, 3536 }, 3537 Settings: cluster.MakeTestingClusterSettings(), 3538 } 3539 ds = NewDistSender(cfg, g) 3540 3541 var batchWaitGroup sync.WaitGroup 3542 putFn := func(key, value string) { 3543 defer batchWaitGroup.Done() 3544 put := roachpb.NewPut(roachpb.Key(key), roachpb.MakeValueFromString("c")) 3545 if _, pErr := kv.SendWrapped(context.Background(), ds, put); pErr != nil { 3546 t.Errorf("put encountered error: %s", pErr) 3547 } 3548 } 3549 batchWaitGroup.Add(2) 3550 go putFn("b", "b") 3551 go putFn("c", "c") 3552 batchWaitGroup.Wait() 3553 } 3554 3555 func TestDistSenderSlowLogMessage(t *testing.T) { 3556 defer leaktest.AfterTest(t)() 3557 const ( 3558 dur = 8158 * time.Millisecond 3559 attempts = 120 3560 ) 3561 desc := &roachpb.RangeDescriptor{RangeID: 9, StartKey: roachpb.RKey("x")} 3562 { 3563 exp := `have been waiting 8.16s (120 attempts) for RPC to` + 3564 ` r9:{-} [<no replicas>, next=0, gen=0]: boom` 3565 act := slowRangeRPCWarningStr( 3566 dur, 3567 120, 3568 desc, 3569 roachpb.NewError(errors.New("boom"))) 3570 3571 require.Equal(t, exp, act) 3572 } 3573 3574 { 3575 exp := `slow RPC finished after 8.16s (120 attempts)` 3576 act := slowRangeRPCReturnWarningStr(dur, attempts) 3577 require.Equal(t, exp, act) 3578 } 3579 }