github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/kv/kvserver/replica_test.go (about) 1 // Copyright 2014 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package kvserver 12 13 import ( 14 "bytes" 15 "context" 16 "fmt" 17 "math" 18 "math/rand" 19 "os" 20 "reflect" 21 "regexp" 22 "sort" 23 "strconv" 24 "strings" 25 "sync" 26 "sync/atomic" 27 "testing" 28 "time" 29 30 "github.com/cockroachdb/cockroach/pkg/base" 31 "github.com/cockroachdb/cockroach/pkg/clusterversion" 32 "github.com/cockroachdb/cockroach/pkg/config" 33 "github.com/cockroachdb/cockroach/pkg/config/zonepb" 34 "github.com/cockroachdb/cockroach/pkg/gossip" 35 "github.com/cockroachdb/cockroach/pkg/keys" 36 "github.com/cockroachdb/cockroach/pkg/kv" 37 "github.com/cockroachdb/cockroach/pkg/kv/kvserver/apply" 38 "github.com/cockroachdb/cockroach/pkg/kv/kvserver/batcheval" 39 "github.com/cockroachdb/cockroach/pkg/kv/kvserver/batcheval/result" 40 "github.com/cockroachdb/cockroach/pkg/kv/kvserver/concurrency" 41 "github.com/cockroachdb/cockroach/pkg/kv/kvserver/concurrency/lock" 42 "github.com/cockroachdb/cockroach/pkg/kv/kvserver/intentresolver" 43 "github.com/cockroachdb/cockroach/pkg/kv/kvserver/kvserverbase" 44 "github.com/cockroachdb/cockroach/pkg/kv/kvserver/kvserverpb" 45 "github.com/cockroachdb/cockroach/pkg/kv/kvserver/rditer" 46 "github.com/cockroachdb/cockroach/pkg/kv/kvserver/spanset" 47 "github.com/cockroachdb/cockroach/pkg/kv/kvserver/stateloader" 48 "github.com/cockroachdb/cockroach/pkg/kv/kvserver/txnwait" 49 "github.com/cockroachdb/cockroach/pkg/roachpb" 50 "github.com/cockroachdb/cockroach/pkg/rpc" 51 "github.com/cockroachdb/cockroach/pkg/server/telemetry" 52 "github.com/cockroachdb/cockroach/pkg/settings/cluster" 53 "github.com/cockroachdb/cockroach/pkg/storage" 54 "github.com/cockroachdb/cockroach/pkg/storage/enginepb" 55 "github.com/cockroachdb/cockroach/pkg/testutils" 56 "github.com/cockroachdb/cockroach/pkg/testutils/serverutils" 57 "github.com/cockroachdb/cockroach/pkg/util/hlc" 58 "github.com/cockroachdb/cockroach/pkg/util/leaktest" 59 "github.com/cockroachdb/cockroach/pkg/util/log" 60 "github.com/cockroachdb/cockroach/pkg/util/metric" 61 "github.com/cockroachdb/cockroach/pkg/util/protoutil" 62 "github.com/cockroachdb/cockroach/pkg/util/randutil" 63 "github.com/cockroachdb/cockroach/pkg/util/stop" 64 "github.com/cockroachdb/cockroach/pkg/util/syncutil" 65 "github.com/cockroachdb/cockroach/pkg/util/tracing" 66 "github.com/cockroachdb/cockroach/pkg/util/uuid" 67 "github.com/cockroachdb/errors" 68 "github.com/cockroachdb/logtags" 69 "github.com/gogo/protobuf/proto" 70 "github.com/kr/pretty" 71 opentracing "github.com/opentracing/opentracing-go" 72 "github.com/stretchr/testify/assert" 73 "github.com/stretchr/testify/require" 74 "go.etcd.io/etcd/raft" 75 "go.etcd.io/etcd/raft/raftpb" 76 "go.etcd.io/etcd/raft/tracker" 77 "golang.org/x/net/trace" 78 ) 79 80 // allSpans is a SpanSet that covers *everything* for use in tests that don't 81 // care about properly declaring their spans. 82 var allSpans = func() spanset.SpanSet { 83 var ss spanset.SpanSet 84 ss.AddNonMVCC(spanset.SpanReadWrite, roachpb.Span{ 85 Key: roachpb.KeyMin, 86 EndKey: roachpb.KeyMax, 87 }) 88 // Local keys (see `keys.localPrefix`). 89 ss.AddNonMVCC(spanset.SpanReadWrite, roachpb.Span{ 90 Key: append([]byte("\x01"), roachpb.KeyMin...), 91 EndKey: append([]byte("\x01"), roachpb.KeyMax...), 92 }) 93 return ss 94 }() 95 96 // allSpansGuard returns a concurrency guard that indicates that it provides 97 // isolation across all key spans for use in tests that don't care about 98 // properly declaring their spans or sequencing with the concurrency manager. 99 func allSpansGuard() *concurrency.Guard { 100 return &concurrency.Guard{ 101 Req: concurrency.Request{ 102 LatchSpans: &allSpans, 103 }, 104 } 105 } 106 107 func testRangeDescriptor() *roachpb.RangeDescriptor { 108 return &roachpb.RangeDescriptor{ 109 RangeID: 1, 110 StartKey: roachpb.RKeyMin, 111 EndKey: roachpb.RKeyMax, 112 InternalReplicas: []roachpb.ReplicaDescriptor{ 113 { 114 ReplicaID: 1, 115 NodeID: 1, 116 StoreID: 1, 117 }, 118 }, 119 NextReplicaID: 2, 120 } 121 } 122 123 // boostrapMode controls how the first range is created in testContext. 124 type bootstrapMode int 125 126 const ( 127 // Use Store.WriteInitialData, which writes the range descriptor and other 128 // metadata. Most tests should use this mode because it more closely resembles 129 // the real world. 130 bootstrapRangeWithMetadata bootstrapMode = iota 131 // Create a range with NewRange and Store.AddRangeTest. The store's data 132 // will be persisted but metadata will not. 133 // 134 // Tests which run in this mode play fast and loose; they want 135 // a Replica which doesn't have too many moving parts, but then 136 // may still exercise a sizable amount of code, be it by accident 137 // or design. We bootstrap them here with what's absolutely 138 // necessary to not immediately crash on a Raft command, but 139 // nothing more. 140 // If you read this and you're writing a new test, try not to 141 // use this mode - it's deprecated and tends to get in the way 142 // of new development. 143 bootstrapRangeOnly 144 ) 145 146 // leaseExpiry returns a duration in nanos after which any range lease the 147 // Replica may hold is expired. It is more precise than LeaseExpiration 148 // in that it returns the minimal duration necessary. 149 func leaseExpiry(repl *Replica) int64 { 150 l, _ := repl.GetLease() 151 if l.Type() != roachpb.LeaseExpiration { 152 panic("leaseExpiry only valid for expiration-based leases") 153 } 154 return l.Expiration.WallTime + 1 155 } 156 157 // Create a Raft status that shows everyone fully up to date. 158 func upToDateRaftStatus(repls []roachpb.ReplicaDescriptor) *raft.Status { 159 prs := make(map[uint64]tracker.Progress) 160 for _, repl := range repls { 161 prs[uint64(repl.ReplicaID)] = tracker.Progress{ 162 State: tracker.StateReplicate, 163 Match: 100, 164 } 165 } 166 return &raft.Status{ 167 BasicStatus: raft.BasicStatus{ 168 HardState: raftpb.HardState{Commit: 100}, 169 SoftState: raft.SoftState{Lead: 1, RaftState: raft.StateLeader}, 170 }, 171 Progress: prs, 172 } 173 } 174 175 // testContext contains all the objects necessary to test a Range. 176 // In most cases, simply call Start(t) (and later Stop()) on a zero-initialized 177 // testContext{}. Any fields which are initialized to non-nil values 178 // will be used as-is. 179 type testContext struct { 180 testing.TB 181 transport *RaftTransport 182 store *Store 183 repl *Replica 184 rangeID roachpb.RangeID 185 gossip *gossip.Gossip 186 engine storage.Engine 187 manualClock *hlc.ManualClock 188 bootstrapMode bootstrapMode 189 } 190 191 func (tc *testContext) Clock() *hlc.Clock { 192 return tc.store.cfg.Clock 193 } 194 195 // Start initializes the test context with a single range covering the 196 // entire keyspace. 197 func (tc *testContext) Start(t testing.TB, stopper *stop.Stopper) { 198 tc.manualClock = hlc.NewManualClock(123) 199 cfg := TestStoreConfig(hlc.NewClock(tc.manualClock.UnixNano, time.Nanosecond)) 200 tc.StartWithStoreConfig(t, stopper, cfg) 201 } 202 203 // StartWithStoreConfig initializes the test context with a single 204 // range covering the entire keyspace. 205 func (tc *testContext) StartWithStoreConfig(t testing.TB, stopper *stop.Stopper, cfg StoreConfig) { 206 tc.StartWithStoreConfigAndVersion(t, stopper, cfg, cfg.Settings.Version.BinaryVersion()) 207 } 208 209 // StartWithStoreConfigAndVersion is like StartWithStoreConfig but additionally 210 // allows control over the bootstrap version. 211 func (tc *testContext) StartWithStoreConfigAndVersion( 212 t testing.TB, stopper *stop.Stopper, cfg StoreConfig, bootstrapVersion roachpb.Version, 213 ) { 214 tc.TB = t 215 // Setup fake zone config handler. 216 config.TestingSetupZoneConfigHook(stopper) 217 if tc.gossip == nil { 218 rpcContext := rpc.NewContext( 219 cfg.AmbientCtx, &base.Config{Insecure: true}, cfg.Clock, stopper, cfg.Settings) 220 server := rpc.NewServer(rpcContext) // never started 221 tc.gossip = gossip.NewTest(1, rpcContext, server, stopper, metric.NewRegistry(), cfg.DefaultZoneConfig) 222 } 223 if tc.engine == nil { 224 tc.engine = storage.NewInMem(context.Background(), storage.DefaultStorageEngine, 225 roachpb.Attributes{Attrs: []string{"dc1", "mem"}}, 1<<20) 226 stopper.AddCloser(tc.engine) 227 } 228 if tc.transport == nil { 229 tc.transport = NewDummyRaftTransport(cfg.Settings) 230 } 231 ctx := context.Background() 232 if tc.store == nil { 233 cv := clusterversion.ClusterVersion{Version: bootstrapVersion} 234 cfg.Gossip = tc.gossip 235 cfg.Transport = tc.transport 236 cfg.StorePool = NewTestStorePool(cfg) 237 // Create a test sender without setting a store. This is to deal with the 238 // circular dependency between the test sender and the store. The actual 239 // store will be passed to the sender after it is created and bootstrapped. 240 factory := &testSenderFactory{} 241 cfg.DB = kv.NewDB(cfg.AmbientCtx, factory, cfg.Clock) 242 243 require.NoError(t, WriteClusterVersion(ctx, tc.engine, cv)) 244 if err := InitEngine(ctx, tc.engine, roachpb.StoreIdent{ 245 ClusterID: uuid.MakeV4(), 246 NodeID: 1, 247 StoreID: 1, 248 }); err != nil { 249 t.Fatal(err) 250 } 251 if err := clusterversion.Initialize(ctx, cv.Version, &cfg.Settings.SV); err != nil { 252 t.Fatal(err) 253 } 254 tc.store = NewStore(ctx, cfg, tc.engine, &roachpb.NodeDescriptor{NodeID: 1}) 255 // Now that we have our actual store, monkey patch the factory used in cfg.DB. 256 factory.setStore(tc.store) 257 // We created the store without a real KV client, so it can't perform splits 258 // or merges. 259 tc.store.splitQueue.SetDisabled(true) 260 tc.store.mergeQueue.SetDisabled(true) 261 262 if tc.repl == nil && tc.bootstrapMode == bootstrapRangeWithMetadata { 263 if err := WriteInitialClusterData( 264 ctx, tc.store.Engine(), 265 nil, /* initialValues */ 266 bootstrapVersion, 267 1 /* numStores */, nil /* splits */, cfg.Clock.PhysicalNow(), 268 ); err != nil { 269 t.Fatal(err) 270 } 271 } 272 if err := tc.store.Start(ctx, stopper); err != nil { 273 t.Fatal(err) 274 } 275 tc.store.WaitForInit() 276 } 277 278 realRange := tc.repl == nil 279 280 if realRange { 281 if tc.bootstrapMode == bootstrapRangeOnly { 282 testDesc := testRangeDescriptor() 283 if _, err := stateloader.WriteInitialState( 284 ctx, 285 tc.store.Engine(), 286 enginepb.MVCCStats{}, 287 *testDesc, 288 roachpb.BootstrapLease(), 289 hlc.Timestamp{}, 290 stateloader.TruncatedStateUnreplicated, 291 ); err != nil { 292 t.Fatal(err) 293 } 294 repl, err := newReplica(ctx, testDesc, tc.store, 1) 295 if err != nil { 296 t.Fatal(err) 297 } 298 if err := tc.store.AddReplica(repl); err != nil { 299 t.Fatal(err) 300 } 301 } 302 var err error 303 tc.repl, err = tc.store.GetReplica(1) 304 if err != nil { 305 t.Fatal(err) 306 } 307 tc.rangeID = tc.repl.RangeID 308 } 309 310 if err := tc.initConfigs(realRange, t); err != nil { 311 t.Fatal(err) 312 } 313 } 314 315 func (tc *testContext) Sender() kv.Sender { 316 return kv.Wrap(tc.repl, func(ba roachpb.BatchRequest) roachpb.BatchRequest { 317 if ba.RangeID == 0 { 318 ba.RangeID = 1 319 } 320 if ba.Timestamp == (hlc.Timestamp{}) { 321 if err := ba.SetActiveTimestamp(tc.Clock().Now); err != nil { 322 tc.Fatal(err) 323 } 324 } 325 tc.Clock().Update(ba.Timestamp) 326 return ba 327 }) 328 } 329 330 // SendWrappedWith is a convenience function which wraps the request in a batch 331 // and sends it 332 func (tc *testContext) SendWrappedWith( 333 h roachpb.Header, args roachpb.Request, 334 ) (roachpb.Response, *roachpb.Error) { 335 return kv.SendWrappedWith(context.Background(), tc.Sender(), h, args) 336 } 337 338 // SendWrapped is identical to SendWrappedWith with a zero header. 339 func (tc *testContext) SendWrapped(args roachpb.Request) (roachpb.Response, *roachpb.Error) { 340 return tc.SendWrappedWith(roachpb.Header{}, args) 341 } 342 343 // initConfigs creates default configuration entries. 344 func (tc *testContext) initConfigs(realRange bool, t testing.TB) error { 345 // Put an empty system config into gossip so that gossip callbacks get 346 // run. We're using a fake config, but it's hooked into SystemConfig. 347 if err := tc.gossip.AddInfoProto(gossip.KeySystemConfig, 348 &config.SystemConfigEntries{}, 0); err != nil { 349 return err 350 } 351 352 testutils.SucceedsSoon(t, func() error { 353 if cfg := tc.gossip.GetSystemConfig(); cfg == nil { 354 return errors.Errorf("expected system config to be set") 355 } 356 return nil 357 }) 358 359 return nil 360 } 361 362 // addBogusReplicaToRangeDesc modifies the range descriptor to include a second 363 // replica. This is useful for tests that want to pretend they're transferring 364 // the range lease away, as the lease can only be obtained by Replicas which are 365 // part of the range descriptor. 366 // This is a workaround, but it's sufficient for the purposes of several tests. 367 func (tc *testContext) addBogusReplicaToRangeDesc( 368 ctx context.Context, 369 ) (roachpb.ReplicaDescriptor, error) { 370 secondReplica := roachpb.ReplicaDescriptor{ 371 NodeID: 2, 372 StoreID: 2, 373 ReplicaID: 2, 374 } 375 oldDesc := *tc.repl.Desc() 376 newDesc := oldDesc 377 newDesc.InternalReplicas = append(newDesc.InternalReplicas, secondReplica) 378 newDesc.NextReplicaID = 3 379 380 dbDescKV, err := tc.store.DB().Get(ctx, keys.RangeDescriptorKey(oldDesc.StartKey)) 381 if err != nil { 382 return roachpb.ReplicaDescriptor{}, err 383 } 384 var dbDesc roachpb.RangeDescriptor 385 if err := dbDescKV.Value.GetProto(&dbDesc); err != nil { 386 return roachpb.ReplicaDescriptor{}, err 387 } 388 if !oldDesc.Equal(&dbDesc) { 389 return roachpb.ReplicaDescriptor{}, errors.Errorf(`descs didn't match: %v vs %v`, oldDesc, dbDesc) 390 } 391 392 // Update the "on-disk" replica state, so that it doesn't diverge from what we 393 // have in memory. At the time of this writing, this is not actually required 394 // by the tests using this functionality, but it seems sane to do. 395 ba := kv.Batch{ 396 Header: roachpb.Header{Timestamp: tc.Clock().Now()}, 397 } 398 descKey := keys.RangeDescriptorKey(oldDesc.StartKey) 399 if err := updateRangeDescriptor(&ba, descKey, dbDescKV.Value, &newDesc); err != nil { 400 return roachpb.ReplicaDescriptor{}, err 401 } 402 if err := tc.store.DB().Run(ctx, &ba); err != nil { 403 return roachpb.ReplicaDescriptor{}, err 404 } 405 406 tc.repl.setDescRaftMuLocked(ctx, &newDesc) 407 tc.repl.raftMu.Lock() 408 tc.repl.mu.Lock() 409 tc.repl.assertStateLocked(ctx, tc.engine) 410 tc.repl.mu.Unlock() 411 tc.repl.raftMu.Unlock() 412 return secondReplica, nil 413 } 414 415 func newTransaction( 416 name string, baseKey roachpb.Key, userPriority roachpb.UserPriority, clock *hlc.Clock, 417 ) *roachpb.Transaction { 418 var offset int64 419 var now hlc.Timestamp 420 if clock != nil { 421 offset = clock.MaxOffset().Nanoseconds() 422 now = clock.Now() 423 } 424 txn := roachpb.MakeTransaction(name, baseKey, userPriority, now, offset) 425 return &txn 426 } 427 428 // assignSeqNumsForReqs sets sequence numbers for each of the provided requests 429 // given a transaction proto. It also updates the proto to reflect the incremented 430 // sequence number. 431 func assignSeqNumsForReqs(txn *roachpb.Transaction, reqs ...roachpb.Request) { 432 for _, ru := range reqs { 433 txn.Sequence++ 434 oldHeader := ru.Header() 435 oldHeader.Sequence = txn.Sequence 436 ru.SetHeader(oldHeader) 437 } 438 } 439 440 // createReplicaSets creates new roachpb.ReplicaDescriptor protos based on an array of 441 // StoreIDs to aid in testing. Note that this does not actually produce any 442 // replicas, it just creates the descriptors. 443 func createReplicaSets(replicaNumbers []roachpb.StoreID) []roachpb.ReplicaDescriptor { 444 result := []roachpb.ReplicaDescriptor{} 445 for _, replicaNumber := range replicaNumbers { 446 result = append(result, roachpb.ReplicaDescriptor{ 447 StoreID: replicaNumber, 448 }) 449 } 450 return result 451 } 452 453 // TestIsOnePhaseCommit verifies the circumstances where a 454 // transactional batch can be committed as an atomic write. 455 func TestIsOnePhaseCommit(t *testing.T) { 456 defer leaktest.AfterTest(t)() 457 ctx := context.Background() 458 withSeq := func(req roachpb.Request, seq enginepb.TxnSeq) roachpb.Request { 459 h := req.Header() 460 h.Sequence = seq 461 req.SetHeader(h) 462 return req 463 } 464 makeReqs := func(reqs ...roachpb.Request) []roachpb.RequestUnion { 465 ru := make([]roachpb.RequestUnion, len(reqs)) 466 for i, r := range reqs { 467 ru[i].MustSetInner(r) 468 } 469 return ru 470 } 471 472 noReqs := makeReqs() 473 getReq := makeReqs(withSeq(&roachpb.GetRequest{}, 0)) 474 putReq := makeReqs(withSeq(&roachpb.PutRequest{}, 1)) 475 etReq := makeReqs(withSeq(&roachpb.EndTxnRequest{Commit: true}, 1)) 476 txnReqs := makeReqs( 477 withSeq(&roachpb.PutRequest{}, 1), 478 withSeq(&roachpb.EndTxnRequest{Commit: true}, 2), 479 ) 480 txnReqsNoRefresh := makeReqs( 481 withSeq(&roachpb.PutRequest{}, 1), 482 withSeq(&roachpb.EndTxnRequest{Commit: true, CanCommitAtHigherTimestamp: true}, 2), 483 ) 484 txnReqsRequire1PC := makeReqs( 485 withSeq(&roachpb.PutRequest{}, 1), 486 withSeq(&roachpb.EndTxnRequest{Commit: true, Require1PC: true}, 2), 487 ) 488 489 testCases := []struct { 490 ru []roachpb.RequestUnion 491 isTxn bool 492 isRestarted bool 493 // isWTO implies isTSOff. 494 isWTO bool 495 isTSOff bool 496 exp1PC bool 497 }{ 498 {ru: noReqs, isTxn: false, exp1PC: false}, 499 {ru: noReqs, isTxn: true, exp1PC: false}, 500 {ru: getReq, isTxn: true, exp1PC: false}, 501 {ru: putReq, isTxn: true, exp1PC: false}, 502 {ru: etReq, isTxn: true, exp1PC: true}, 503 {ru: etReq, isTxn: true, isTSOff: true, exp1PC: false}, 504 {ru: etReq, isTxn: true, isWTO: true, exp1PC: false}, 505 {ru: etReq, isTxn: true, isRestarted: true, exp1PC: false}, 506 {ru: etReq, isTxn: true, isRestarted: true, isTSOff: true, exp1PC: false}, 507 {ru: etReq, isTxn: true, isRestarted: true, isWTO: true, isTSOff: true, exp1PC: false}, 508 {ru: txnReqs[:1], isTxn: true, exp1PC: false}, 509 {ru: txnReqs[1:], isTxn: true, exp1PC: false}, 510 {ru: txnReqs, isTxn: true, exp1PC: true}, 511 {ru: txnReqs, isTxn: true, isTSOff: true, exp1PC: false}, 512 {ru: txnReqs, isTxn: true, isWTO: true, exp1PC: false}, 513 {ru: txnReqs, isTxn: true, isRestarted: true, exp1PC: false}, 514 {ru: txnReqs, isTxn: true, isRestarted: true, isTSOff: true, exp1PC: false}, 515 {ru: txnReqs, isTxn: true, isRestarted: true, isWTO: true, exp1PC: false}, 516 {ru: txnReqsNoRefresh[:1], isTxn: true, exp1PC: false}, 517 {ru: txnReqsNoRefresh[1:], isTxn: true, exp1PC: false}, 518 {ru: txnReqsNoRefresh, isTxn: true, exp1PC: true}, 519 {ru: txnReqsNoRefresh, isTxn: true, isTSOff: true, exp1PC: true}, 520 {ru: txnReqsNoRefresh, isTxn: true, isWTO: true, exp1PC: true}, 521 {ru: txnReqsNoRefresh, isTxn: true, isRestarted: true, exp1PC: false}, 522 {ru: txnReqsNoRefresh, isTxn: true, isRestarted: true, isTSOff: true, exp1PC: false}, 523 {ru: txnReqsNoRefresh, isTxn: true, isRestarted: true, isWTO: true, exp1PC: false}, 524 {ru: txnReqsRequire1PC[:1], isTxn: true, exp1PC: false}, 525 {ru: txnReqsRequire1PC[1:], isTxn: true, exp1PC: false}, 526 {ru: txnReqsRequire1PC, isTxn: true, exp1PC: true}, 527 {ru: txnReqsRequire1PC, isTxn: true, isTSOff: true, exp1PC: false}, 528 {ru: txnReqsRequire1PC, isTxn: true, isWTO: true, exp1PC: false}, 529 {ru: txnReqsRequire1PC, isTxn: true, isRestarted: true, exp1PC: true}, 530 {ru: txnReqsRequire1PC, isTxn: true, isRestarted: true, isTSOff: true, exp1PC: false}, 531 {ru: txnReqsRequire1PC, isTxn: true, isRestarted: true, isWTO: true, exp1PC: false}, 532 } 533 534 clock := hlc.NewClock(hlc.UnixNano, time.Nanosecond) 535 for i, c := range testCases { 536 t.Run( 537 fmt.Sprintf("%d:isTxn:%t,isRestarted:%t,isWTO:%t,isTSOff:%t", 538 i, c.isTxn, c.isRestarted, c.isWTO, c.isTSOff), 539 func(t *testing.T) { 540 ba := roachpb.BatchRequest{Requests: c.ru} 541 if c.isTxn { 542 ba.Txn = newTransaction("txn", roachpb.Key("a"), 1, clock) 543 if c.isRestarted { 544 ba.Txn.Restart(-1, 0, clock.Now()) 545 } 546 if c.isWTO { 547 ba.Txn.WriteTooOld = true 548 c.isTSOff = true 549 } 550 if c.isTSOff { 551 ba.Txn.WriteTimestamp = ba.Txn.ReadTimestamp.Add(1, 0) 552 } 553 } else { 554 require.False(t, c.isRestarted) 555 require.False(t, c.isWTO) 556 require.False(t, c.isTSOff) 557 } 558 559 // Emulate what a server actually does and bump the write timestamp when 560 // possible. This makes some batches with diverged read and write 561 // timestamps pass isOnePhaseCommit(). 562 maybeBumpReadTimestampToWriteTimestamp(ctx, &ba, &spanset.SpanSet{}) 563 564 if is1PC := isOnePhaseCommit(&ba); is1PC != c.exp1PC { 565 t.Errorf("expected 1pc=%t; got %t", c.exp1PC, is1PC) 566 } 567 }) 568 } 569 } 570 571 // TestReplicaContains verifies that the range uses Key.Address() in 572 // order to properly resolve addresses for local keys. 573 func TestReplicaContains(t *testing.T) { 574 defer leaktest.AfterTest(t)() 575 desc := &roachpb.RangeDescriptor{ 576 RangeID: 1, 577 StartKey: roachpb.RKey("a"), 578 EndKey: roachpb.RKey("b"), 579 } 580 581 // This test really only needs a hollow shell of a Replica. 582 r := &Replica{} 583 r.mu.state.Desc = desc 584 r.rangeStr.store(0, desc) 585 586 if statsKey := keys.RangeStatsLegacyKey(desc.RangeID); !r.ContainsKey(statsKey) { 587 t.Errorf("expected range to contain range stats key %q", statsKey) 588 } 589 if !r.ContainsKey(roachpb.Key("aa")) { 590 t.Errorf("expected range to contain key \"aa\"") 591 } 592 if !r.ContainsKey(keys.RangeDescriptorKey([]byte("aa"))) { 593 t.Errorf("expected range to contain range descriptor key for \"aa\"") 594 } 595 if !r.ContainsKeyRange(roachpb.Key("aa"), roachpb.Key("b")) { 596 t.Errorf("expected range to contain key range \"aa\"-\"b\"") 597 } 598 if !r.ContainsKeyRange(keys.RangeDescriptorKey([]byte("aa")), 599 keys.RangeDescriptorKey([]byte("b"))) { 600 t.Errorf("expected range to contain key transaction range \"aa\"-\"b\"") 601 } 602 } 603 604 func sendLeaseRequest(r *Replica, l *roachpb.Lease) error { 605 ba := roachpb.BatchRequest{} 606 ba.Timestamp = r.store.Clock().Now() 607 ba.Add(&roachpb.RequestLeaseRequest{Lease: *l}) 608 exLease, _ := r.GetLease() 609 ch, _, _, pErr := r.evalAndPropose(context.Background(), &ba, allSpansGuard(), &exLease) 610 if pErr == nil { 611 // Next if the command was committed, wait for the range to apply it. 612 // TODO(bdarnell): refactor this to a more conventional error-handling pattern. 613 pErr = (<-ch).Err 614 } 615 return pErr.GoError() 616 } 617 618 // TestReplicaReadConsistency verifies behavior of the range under 619 // different read consistencies. Note that this unittest plays 620 // fast and loose with granting range leases. 621 func TestReplicaReadConsistency(t *testing.T) { 622 defer leaktest.AfterTest(t)() 623 624 stopper := stop.NewStopper() 625 defer stopper.Stop(context.Background()) 626 627 tc := testContext{manualClock: hlc.NewManualClock(123)} 628 cfg := TestStoreConfig(hlc.NewClock(tc.manualClock.UnixNano, time.Nanosecond)) 629 cfg.TestingKnobs.DisableAutomaticLeaseRenewal = true 630 tc.StartWithStoreConfig(t, stopper, cfg) 631 632 secondReplica, err := tc.addBogusReplicaToRangeDesc(context.Background()) 633 if err != nil { 634 t.Fatal(err) 635 } 636 637 gArgs := getArgs(roachpb.Key("a")) 638 639 // Try consistent read and verify success. 640 641 if _, err := tc.SendWrapped(&gArgs); err != nil { 642 t.Errorf("expected success on consistent read: %+v", err) 643 } 644 645 // Try a read commmitted read and an inconsistent read, both within a 646 // transaction. 647 txn := newTransaction("test", roachpb.Key("a"), 1, tc.Clock()) 648 assignSeqNumsForReqs(txn, &gArgs) 649 650 if _, err := tc.SendWrappedWith(roachpb.Header{ 651 Txn: txn, 652 ReadConsistency: roachpb.READ_UNCOMMITTED, 653 }, &gArgs); err == nil { 654 t.Errorf("expected error on read uncommitted read within a txn") 655 } 656 657 if _, err := tc.SendWrappedWith(roachpb.Header{ 658 Txn: txn, 659 ReadConsistency: roachpb.INCONSISTENT, 660 }, &gArgs); err == nil { 661 t.Errorf("expected error on inconsistent read within a txn") 662 } 663 664 // Lose the lease and verify CONSISTENT reads receive NotLeaseHolderError 665 // and INCONSISTENT reads work as expected. 666 tc.manualClock.Set(leaseExpiry(tc.repl)) 667 start := tc.Clock().Now() 668 if err := sendLeaseRequest(tc.repl, &roachpb.Lease{ 669 Start: start, 670 Expiration: start.Add(10, 0).Clone(), 671 Replica: secondReplica, 672 }); err != nil { 673 t.Fatal(err) 674 } 675 676 // Send without Txn. 677 _, pErr := tc.SendWrappedWith(roachpb.Header{ 678 ReadConsistency: roachpb.CONSISTENT, 679 }, &gArgs) 680 if _, ok := pErr.GetDetail().(*roachpb.NotLeaseHolderError); !ok { 681 t.Errorf("expected not lease holder error; got %s", pErr) 682 } 683 684 _, pErr = tc.SendWrappedWith(roachpb.Header{ 685 ReadConsistency: roachpb.READ_UNCOMMITTED, 686 }, &gArgs) 687 if _, ok := pErr.GetDetail().(*roachpb.NotLeaseHolderError); !ok { 688 t.Errorf("expected not lease holder error; got %s", pErr) 689 } 690 691 if _, pErr := tc.SendWrappedWith(roachpb.Header{ 692 ReadConsistency: roachpb.INCONSISTENT, 693 }, &gArgs); pErr != nil { 694 t.Errorf("expected success reading with inconsistent: %s", pErr) 695 } 696 } 697 698 // Test the behavior of a replica while a range lease transfer is in progress: 699 // - while the transfer is in progress, reads should return errors pointing to 700 // the transfer target. 701 // - if a transfer fails, the pre-existing lease does not start being used 702 // again. Instead, a new lease needs to be obtained. This is because, even 703 // though the transfer got an error, that error is considered ambiguous as the 704 // transfer might still apply. 705 func TestBehaviorDuringLeaseTransfer(t *testing.T) { 706 defer leaktest.AfterTest(t)() 707 manual := hlc.NewManualClock(123) 708 clock := hlc.NewClock(manual.UnixNano, 100*time.Millisecond) 709 tc := testContext{manualClock: manual} 710 tsc := TestStoreConfig(clock) 711 var leaseAcquisitionTrap atomic.Value 712 tsc.TestingKnobs.DisableAutomaticLeaseRenewal = true 713 tsc.TestingKnobs.LeaseRequestEvent = func(ts hlc.Timestamp) { 714 val := leaseAcquisitionTrap.Load() 715 if val == nil { 716 return 717 } 718 trapCallback := val.(func(ts hlc.Timestamp)) 719 if trapCallback != nil { 720 trapCallback(ts) 721 } 722 } 723 transferSem := make(chan struct{}) 724 tsc.TestingKnobs.EvalKnobs.TestingEvalFilter = 725 func(filterArgs kvserverbase.FilterArgs) *roachpb.Error { 726 if _, ok := filterArgs.Req.(*roachpb.TransferLeaseRequest); ok { 727 // Notify the test that the transfer has been trapped. 728 transferSem <- struct{}{} 729 // Wait for the test to unblock the transfer. 730 <-transferSem 731 // Return an error, so that the pendingLeaseRequest considers the 732 // transfer failed. 733 return roachpb.NewErrorf("injected transfer error") 734 } 735 return nil 736 } 737 stopper := stop.NewStopper() 738 defer stopper.Stop(context.Background()) 739 tc.StartWithStoreConfig(t, stopper, tsc) 740 secondReplica, err := tc.addBogusReplicaToRangeDesc(context.Background()) 741 if err != nil { 742 t.Fatal(err) 743 } 744 745 // Do a read to acquire the lease. 746 gArgs := getArgs(roachpb.Key("a")) 747 if _, err := tc.SendWrapped(&gArgs); err != nil { 748 t.Fatal(err) 749 } 750 751 // Advance the clock so that the transfer we're going to perform sets a higher 752 // minLeaseProposedTS. 753 tc.manualClock.Increment((500 * time.Nanosecond).Nanoseconds()) 754 755 // Initiate a transfer (async) and wait for it to be blocked. 756 transferResChan := make(chan error) 757 go func() { 758 err := tc.repl.AdminTransferLease(context.Background(), secondReplica.StoreID) 759 if !testutils.IsError(err, "injected") { 760 transferResChan <- err 761 } else { 762 transferResChan <- nil 763 } 764 }() 765 <-transferSem 766 // Check that a transfer is indeed on-going. 767 tc.repl.mu.Lock() 768 repDesc, err := tc.repl.getReplicaDescriptorRLocked() 769 if err != nil { 770 tc.repl.mu.Unlock() 771 t.Fatal(err) 772 } 773 _, pending := tc.repl.mu.pendingLeaseRequest.TransferInProgress(repDesc.ReplicaID) 774 tc.repl.mu.Unlock() 775 if !pending { 776 t.Fatalf("expected transfer to be in progress, and it wasn't") 777 } 778 779 // Check that, while the transfer is on-going, the replica redirects to the 780 // transfer target. 781 _, pErr := tc.SendWrapped(&gArgs) 782 nlhe, ok := pErr.GetDetail().(*roachpb.NotLeaseHolderError) 783 if !ok || nlhe.LeaseHolder.StoreID != secondReplica.StoreID { 784 t.Fatalf("expected not lease holder error pointing to store %d, got %v", 785 secondReplica.StoreID, pErr) 786 } 787 788 // Unblock the transfer and wait for the pendingLeaseRequest to clear the 789 // transfer state. 790 transferSem <- struct{}{} 791 if err := <-transferResChan; err != nil { 792 t.Fatal(err) 793 } 794 795 testutils.SucceedsSoon(t, func() error { 796 tc.repl.mu.Lock() 797 defer tc.repl.mu.Unlock() 798 _, pending := tc.repl.mu.pendingLeaseRequest.TransferInProgress(repDesc.ReplicaID) 799 if pending { 800 return errors.New("transfer pending") 801 } 802 return nil 803 }) 804 805 // Check that the replica doesn't use its lease, even though there's no longer 806 // a transfer in progress. This is because, even though the transfer got an 807 // error, that error is considered ambiguous as the transfer might still 808 // apply. 809 // Concretely, we're going to check that a read triggers a new lease 810 // acquisition. 811 tc.repl.mu.Lock() 812 minLeaseProposedTS := tc.repl.mu.minLeaseProposedTS 813 leaseStartTS := tc.repl.mu.state.Lease.Start 814 tc.repl.mu.Unlock() 815 if minLeaseProposedTS.LessEq(leaseStartTS) { 816 t.Fatalf("expected minLeaseProposedTS > lease start. minLeaseProposedTS: %s, "+ 817 "leas start: %s", minLeaseProposedTS, leaseStartTS) 818 } 819 expectedLeaseStartTS := tc.manualClock.UnixNano() 820 leaseAcquisitionCh := make(chan error) 821 leaseAcquisitionTrap.Store(func(ts hlc.Timestamp) { 822 if ts.WallTime == expectedLeaseStartTS { 823 close(leaseAcquisitionCh) 824 } else { 825 leaseAcquisitionCh <- errors.Errorf( 826 "expected acquisition of lease with start: %d but got start: %s", 827 expectedLeaseStartTS, ts) 828 } 829 }) 830 // We expect this call to succeed, but after acquiring a new lease. 831 if _, err := tc.SendWrapped(&gArgs); err != nil { 832 t.Fatal(err) 833 } 834 // Check that the Send above triggered a lease acquisition. 835 select { 836 case <-leaseAcquisitionCh: 837 case <-time.After(time.Second): 838 t.Fatalf("read did not acquire a new lease") 839 } 840 } 841 842 // TestApplyCmdLeaseError verifies that when during application of a Raft 843 // command the proposing node no longer holds the range lease, an error is 844 // returned. This prevents regression of #1483. 845 func TestApplyCmdLeaseError(t *testing.T) { 846 defer leaktest.AfterTest(t)() 847 848 stopper := stop.NewStopper() 849 defer stopper.Stop(context.Background()) 850 851 tc := testContext{manualClock: hlc.NewManualClock(123)} 852 cfg := TestStoreConfig(hlc.NewClock(tc.manualClock.UnixNano, time.Nanosecond)) 853 cfg.TestingKnobs.DisableAutomaticLeaseRenewal = true 854 tc.StartWithStoreConfig(t, stopper, cfg) 855 856 secondReplica, err := tc.addBogusReplicaToRangeDesc(context.Background()) 857 if err != nil { 858 t.Fatal(err) 859 } 860 861 pArgs := putArgs(roachpb.Key("a"), []byte("asd")) 862 863 // Lose the lease. 864 tc.manualClock.Set(leaseExpiry(tc.repl)) 865 start := tc.Clock().Now() 866 if err := sendLeaseRequest(tc.repl, &roachpb.Lease{ 867 Start: start, 868 Expiration: start.Add(10, 0).Clone(), 869 Replica: secondReplica, 870 }); err != nil { 871 t.Fatal(err) 872 } 873 874 _, pErr := tc.SendWrappedWith(roachpb.Header{ 875 Timestamp: tc.Clock().Now().Add(-100, 0), 876 }, &pArgs) 877 if _, ok := pErr.GetDetail().(*roachpb.NotLeaseHolderError); !ok { 878 t.Fatalf("expected not lease holder error in return, got %v", pErr) 879 } 880 } 881 882 func TestLeaseReplicaNotInDesc(t *testing.T) { 883 defer leaktest.AfterTest(t)() 884 tc := testContext{} 885 stopper := stop.NewStopper() 886 defer stopper.Stop(context.Background()) 887 tc.Start(t, stopper) 888 889 lease, _ := tc.repl.GetLease() 890 invalidLease := lease 891 invalidLease.Sequence++ 892 invalidLease.Replica.StoreID += 12345 893 894 raftCmd := kvserverpb.RaftCommand{ 895 ProposerLeaseSequence: lease.Sequence, 896 ReplicatedEvalResult: kvserverpb.ReplicatedEvalResult{ 897 IsLeaseRequest: true, 898 State: &kvserverpb.ReplicaState{ 899 Lease: &invalidLease, 900 }, 901 }, 902 } 903 tc.repl.mu.Lock() 904 _, _, pErr := checkForcedErr( 905 context.Background(), makeIDKey(), &raftCmd, false, /* isLocal */ 906 &tc.repl.mu.state, 907 ) 908 tc.repl.mu.Unlock() 909 if _, isErr := pErr.GetDetail().(*roachpb.LeaseRejectedError); !isErr { 910 t.Fatal(pErr) 911 } else if !testutils.IsPError(pErr, "replica not part of range") { 912 t.Fatal(pErr) 913 } 914 } 915 916 func TestReplicaRangeBoundsChecking(t *testing.T) { 917 defer leaktest.AfterTest(t)() 918 tc := testContext{} 919 stopper := stop.NewStopper() 920 defer stopper.Stop(context.Background()) 921 tc.Start(t, stopper) 922 923 key := roachpb.RKey("a") 924 firstRepl := tc.store.LookupReplica(key) 925 newRepl := splitTestRange(tc.store, key, key, t) 926 if _, pErr := newRepl.redirectOnOrAcquireLease(context.Background()); pErr != nil { 927 t.Fatal(pErr) 928 } 929 930 gArgs := getArgs(roachpb.Key("b")) 931 _, pErr := kv.SendWrappedWith(context.Background(), tc.store, roachpb.Header{ 932 RangeID: 1, 933 }, &gArgs) 934 935 if mismatchErr, ok := pErr.GetDetail().(*roachpb.RangeKeyMismatchError); !ok { 936 t.Errorf("expected range key mismatch error: %s", pErr) 937 } else { 938 if mismatchedDesc := mismatchErr.MismatchedRange; mismatchedDesc.RangeID != firstRepl.RangeID { 939 t.Errorf("expected mismatched range to be %d, found %v", firstRepl.RangeID, mismatchedDesc) 940 } 941 if suggestedDesc := mismatchErr.SuggestedRange; suggestedDesc == nil || suggestedDesc.RangeID != newRepl.RangeID { 942 t.Errorf("expected suggested range to be %d, found %v", newRepl.RangeID, suggestedDesc) 943 } 944 } 945 } 946 947 // hasLease returns whether the most recent range lease was held by the given 948 // range replica and whether it's expired for the given timestamp. 949 func hasLease(repl *Replica, timestamp hlc.Timestamp) (owned bool, expired bool) { 950 repl.mu.Lock() 951 defer repl.mu.Unlock() 952 status := repl.leaseStatus(*repl.mu.state.Lease, timestamp, repl.mu.minLeaseProposedTS) 953 return repl.mu.state.Lease.OwnedBy(repl.store.StoreID()), status.State != kvserverpb.LeaseState_VALID 954 } 955 956 func TestReplicaLease(t *testing.T) { 957 defer leaktest.AfterTest(t)() 958 tc := testContext{} 959 stopper := stop.NewStopper() 960 defer stopper.Stop(context.Background()) 961 962 var filterErr atomic.Value 963 applyFilter := func(args kvserverbase.ApplyFilterArgs) (int, *roachpb.Error) { 964 if pErr := filterErr.Load(); pErr != nil { 965 return 0, pErr.(*roachpb.Error) 966 } 967 return 0, nil 968 } 969 970 tc.manualClock = hlc.NewManualClock(123) 971 tsc := TestStoreConfig(hlc.NewClock(tc.manualClock.UnixNano, time.Nanosecond)) 972 tsc.TestingKnobs.DisableAutomaticLeaseRenewal = true 973 tsc.TestingKnobs.TestingApplyFilter = applyFilter 974 tc.StartWithStoreConfig(t, stopper, tsc) 975 secondReplica, err := tc.addBogusReplicaToRangeDesc(context.Background()) 976 if err != nil { 977 t.Fatal(err) 978 } 979 980 // Test that leases with invalid times are rejected. 981 // Start leases at a point that avoids overlapping with the existing lease. 982 leaseDuration := tc.store.cfg.RangeLeaseActiveDuration() 983 start := hlc.Timestamp{WallTime: (time.Second + leaseDuration).Nanoseconds(), Logical: 0} 984 for _, lease := range []roachpb.Lease{ 985 {Start: start, Expiration: &hlc.Timestamp{}}, 986 } { 987 if _, err := batcheval.RequestLease(context.Background(), tc.store.Engine(), 988 batcheval.CommandArgs{ 989 EvalCtx: NewReplicaEvalContext(tc.repl, &allSpans), 990 Args: &roachpb.RequestLeaseRequest{ 991 Lease: lease, 992 }, 993 }, &roachpb.RequestLeaseResponse{}); !testutils.IsError(err, "replica \\(n0,s0\\):\\? not found in r1") { 994 t.Fatalf("unexpected error: %+v", err) 995 } 996 } 997 998 if held, _ := hasLease(tc.repl, tc.Clock().Now()); !held { 999 t.Errorf("expected lease on range start") 1000 } 1001 tc.manualClock.Set(leaseExpiry(tc.repl)) 1002 now := tc.Clock().Now() 1003 if err := sendLeaseRequest(tc.repl, &roachpb.Lease{ 1004 Start: now.Add(10, 0), 1005 Expiration: now.Add(20, 0).Clone(), 1006 Replica: secondReplica, 1007 }); err != nil { 1008 t.Fatal(err) 1009 } 1010 if held, expired := hasLease(tc.repl, tc.Clock().Now().Add(15, 0)); held || expired { 1011 t.Errorf("expected second replica to have range lease") 1012 } 1013 1014 { 1015 _, pErr := tc.repl.redirectOnOrAcquireLease(context.Background()) 1016 if lErr, ok := pErr.GetDetail().(*roachpb.NotLeaseHolderError); !ok || lErr == nil { 1017 t.Fatalf("wanted NotLeaseHolderError, got %s", pErr) 1018 } 1019 } 1020 // Advance clock past expiration and verify that another has 1021 // range lease will not be true. 1022 tc.manualClock.Increment(21) // 21ns have passed 1023 if held, expired := hasLease(tc.repl, tc.Clock().Now()); held || !expired { 1024 t.Errorf("expected another replica to have expired lease; %t, %t", held, expired) 1025 } 1026 1027 // Verify that command returns NotLeaseHolderError when lease is rejected. 1028 filterErr.Store(roachpb.NewError(&roachpb.LeaseRejectedError{Message: "replica not found"})) 1029 1030 { 1031 _, err := tc.repl.redirectOnOrAcquireLease(context.Background()) 1032 if _, ok := err.GetDetail().(*roachpb.NotLeaseHolderError); !ok { 1033 t.Fatalf("expected %T, got %s", &roachpb.NotLeaseHolderError{}, err) 1034 } 1035 } 1036 } 1037 1038 func TestReplicaNotLeaseHolderError(t *testing.T) { 1039 defer leaktest.AfterTest(t)() 1040 1041 stopper := stop.NewStopper() 1042 defer stopper.Stop(context.Background()) 1043 1044 tc := testContext{manualClock: hlc.NewManualClock(123)} 1045 cfg := TestStoreConfig(hlc.NewClock(tc.manualClock.UnixNano, time.Nanosecond)) 1046 cfg.TestingKnobs.DisableAutomaticLeaseRenewal = true 1047 tc.StartWithStoreConfig(t, stopper, cfg) 1048 1049 secondReplica, err := tc.addBogusReplicaToRangeDesc(context.Background()) 1050 if err != nil { 1051 t.Fatal(err) 1052 } 1053 1054 tc.manualClock.Set(leaseExpiry(tc.repl)) 1055 now := tc.Clock().Now() 1056 if err := sendLeaseRequest(tc.repl, &roachpb.Lease{ 1057 Start: now, 1058 Expiration: now.Add(10, 0).Clone(), 1059 Replica: secondReplica, 1060 }); err != nil { 1061 t.Fatal(err) 1062 } 1063 1064 header := roachpb.RequestHeader{ 1065 Key: roachpb.Key("a"), 1066 } 1067 testCases := []roachpb.Request{ 1068 // Admin split covers admin commands. 1069 &roachpb.AdminSplitRequest{ 1070 RequestHeader: header, 1071 SplitKey: roachpb.Key("a"), 1072 }, 1073 // Get covers read-only commands. 1074 &roachpb.GetRequest{ 1075 RequestHeader: header, 1076 }, 1077 // Put covers read-write commands. 1078 &roachpb.PutRequest{ 1079 RequestHeader: header, 1080 Value: roachpb.MakeValueFromString("value"), 1081 }, 1082 } 1083 1084 for i, test := range testCases { 1085 _, pErr := tc.SendWrappedWith(roachpb.Header{Timestamp: now}, test) 1086 1087 if _, ok := pErr.GetDetail().(*roachpb.NotLeaseHolderError); !ok { 1088 t.Errorf("%d: expected not lease holder error: %s", i, pErr) 1089 } 1090 } 1091 } 1092 1093 // TestReplicaLeaseCounters verifies leaseRequest metrics counters are updated 1094 // correctly after a lease request. 1095 func TestReplicaLeaseCounters(t *testing.T) { 1096 defer leaktest.AfterTest(t)() 1097 defer EnableLeaseHistory(100)() 1098 stopper := stop.NewStopper() 1099 defer stopper.Stop(context.Background()) 1100 1101 var tc testContext 1102 cfg := TestStoreConfig(nil) 1103 // Disable reasonNewLeader and reasonNewLeaderOrConfigChange proposal 1104 // refreshes so that our lease proposal does not risk being rejected 1105 // with an AmbiguousResultError. 1106 cfg.TestingKnobs.DisableRefreshReasonNewLeader = true 1107 cfg.TestingKnobs.DisableRefreshReasonNewLeaderOrConfigChange = true 1108 tc.StartWithStoreConfig(t, stopper, cfg) 1109 1110 assert := func(actual, min, max int64) error { 1111 if actual < min || actual > max { 1112 return errors.Errorf( 1113 "metrics counters actual=%d, expected=[%d,%d]", 1114 actual, min, max, 1115 ) 1116 } 1117 return nil 1118 } 1119 metrics := tc.repl.store.metrics 1120 if err := assert(metrics.LeaseRequestSuccessCount.Count(), 1, 1000); err != nil { 1121 t.Fatal(err) 1122 } 1123 if err := assert(metrics.LeaseRequestErrorCount.Count(), 0, 0); err != nil { 1124 t.Fatal(err) 1125 } 1126 if err := tc.repl.store.updateReplicationGauges(context.Background()); err != nil { 1127 t.Fatal(err) 1128 } 1129 if a, e := metrics.LeaseExpirationCount.Value(), int64(1); a != e { 1130 t.Fatalf("expected expiration lease count of %d; got %d", e, a) 1131 } 1132 // Check the lease history to ensure it contains the first lease. 1133 if e, a := 1, len(tc.repl.leaseHistory.get()); e != a { 1134 t.Fatalf("expected lease history count to be %d, got %d", e, a) 1135 } 1136 1137 now := tc.Clock().Now() 1138 if err := sendLeaseRequest(tc.repl, &roachpb.Lease{ 1139 Start: now, 1140 Expiration: now.Add(10, 0).Clone(), 1141 Replica: roachpb.ReplicaDescriptor{ 1142 ReplicaID: 1, 1143 NodeID: 1, 1144 StoreID: 1, 1145 }, 1146 }); err != nil { 1147 t.Fatal(err) 1148 } 1149 if err := assert(metrics.LeaseRequestSuccessCount.Count(), 2, 1000); err != nil { 1150 t.Fatal(err) 1151 } 1152 if err := assert(metrics.LeaseRequestErrorCount.Count(), 0, 0); err != nil { 1153 t.Fatal(err) 1154 } 1155 // The expiration count should still be 1, as this is a gauge. 1156 if err := tc.repl.store.updateReplicationGauges(context.Background()); err != nil { 1157 t.Fatal(err) 1158 } 1159 if a, e := metrics.LeaseExpirationCount.Value(), int64(1); a != e { 1160 t.Fatalf("expected expiration lease count of %d; got %d", e, a) 1161 } 1162 // Check the lease history to ensure it recorded the new lease. 1163 if e, a := 2, len(tc.repl.leaseHistory.get()); e != a { 1164 t.Fatalf("expected lease history count to be %d, got %d", e, a) 1165 } 1166 1167 // Make lease request fail by requesting overlapping lease from bogus Replica. 1168 if err := sendLeaseRequest(tc.repl, &roachpb.Lease{ 1169 Start: now, 1170 Expiration: now.Add(10, 0).Clone(), 1171 Replica: roachpb.ReplicaDescriptor{ 1172 ReplicaID: 2, 1173 NodeID: 99, 1174 StoreID: 99, 1175 }, 1176 }); !testutils.IsError(err, "cannot replace lease") { 1177 t.Fatal(err) 1178 } 1179 1180 if err := assert(metrics.LeaseRequestSuccessCount.Count(), 2, 1000); err != nil { 1181 t.Fatal(err) 1182 } 1183 if err := assert(metrics.LeaseRequestErrorCount.Count(), 1, 1000); err != nil { 1184 t.Fatal(err) 1185 } 1186 // Check the lease history to ensure it did not record the failed lease. 1187 if e, a := 2, len(tc.repl.leaseHistory.get()); e != a { 1188 t.Fatalf("expected lease history count to be %d, got %d", e, a) 1189 } 1190 } 1191 1192 // TestReplicaGossipConfigsOnLease verifies that config info is gossiped 1193 // upon acquisition of the range lease. 1194 func TestReplicaGossipConfigsOnLease(t *testing.T) { 1195 defer leaktest.AfterTest(t)() 1196 1197 stopper := stop.NewStopper() 1198 defer stopper.Stop(context.Background()) 1199 1200 tc := testContext{manualClock: hlc.NewManualClock(123)} 1201 cfg := TestStoreConfig(hlc.NewClock(tc.manualClock.UnixNano, time.Nanosecond)) 1202 cfg.TestingKnobs.DisableAutomaticLeaseRenewal = true 1203 tc.StartWithStoreConfig(t, stopper, cfg) 1204 1205 secondReplica, err := tc.addBogusReplicaToRangeDesc(context.Background()) 1206 if err != nil { 1207 t.Fatal(err) 1208 } 1209 1210 // Write some arbitrary data in the system config span. 1211 key := keys.SystemSQLCodec.TablePrefix(keys.MaxSystemConfigDescID) 1212 var val roachpb.Value 1213 val.SetInt(42) 1214 if err := storage.MVCCPut(context.Background(), tc.engine, nil, key, hlc.Timestamp{}, val, nil); err != nil { 1215 t.Fatal(err) 1216 } 1217 1218 // If this actually failed, we would have gossiped from MVCCPutProto. 1219 // Unlikely, but why not check. 1220 if cfg := tc.gossip.GetSystemConfig(); cfg != nil { 1221 if nv := len(cfg.Values); nv == 1 && cfg.Values[nv-1].Key.Equal(key) { 1222 t.Errorf("unexpected gossip of system config: %s", cfg) 1223 } 1224 } 1225 1226 // Expire our own lease which we automagically acquired due to being 1227 // first range and config holder. 1228 tc.manualClock.Set(leaseExpiry(tc.repl)) 1229 now := tc.Clock().Now() 1230 1231 // Give lease to someone else. 1232 if err := sendLeaseRequest(tc.repl, &roachpb.Lease{ 1233 Start: now, 1234 Expiration: now.Add(10, 0).Clone(), 1235 Replica: secondReplica, 1236 }); err != nil { 1237 t.Fatal(err) 1238 } 1239 1240 // Expire that lease. 1241 tc.manualClock.Increment(11 + int64(tc.Clock().MaxOffset())) // advance time 1242 now = tc.Clock().Now() 1243 1244 // Give lease to this range. 1245 if err := sendLeaseRequest(tc.repl, &roachpb.Lease{ 1246 Start: now.Add(11, 0), 1247 Expiration: now.Add(20, 0).Clone(), 1248 Replica: roachpb.ReplicaDescriptor{ 1249 ReplicaID: 1, 1250 NodeID: 1, 1251 StoreID: 1, 1252 }, 1253 }); err != nil { 1254 t.Fatal(err) 1255 } 1256 1257 testutils.SucceedsSoon(t, func() error { 1258 cfg := tc.gossip.GetSystemConfig() 1259 if cfg == nil { 1260 return errors.Errorf("expected system config to be set") 1261 } 1262 numValues := len(cfg.Values) 1263 if numValues != 1 { 1264 return errors.Errorf("num config values != 1; got %d", numValues) 1265 } 1266 if k := cfg.Values[numValues-1].Key; !k.Equal(key) { 1267 return errors.Errorf("invalid key for config value (%q != %q)", k, key) 1268 } 1269 return nil 1270 }) 1271 } 1272 1273 // TestReplicaTSCacheLowWaterOnLease verifies that the low water mark 1274 // is set on the timestamp cache when the node is granted the lease holder 1275 // lease after not holding it and it is not set when the node is 1276 // granted the range lease when it was the last holder. 1277 // TODO(andrei): rewrite this test to use a TestCluster so we can test that the 1278 // cache gets the correct timestamp on all the replicas that get the lease at 1279 // some point; now we're just testing the cache on the first replica. 1280 func TestReplicaTSCacheLowWaterOnLease(t *testing.T) { 1281 defer leaktest.AfterTest(t)() 1282 1283 stopper := stop.NewStopper() 1284 defer stopper.Stop(context.Background()) 1285 1286 tc := testContext{manualClock: hlc.NewManualClock(123)} 1287 cfg := TestStoreConfig(hlc.NewClock(tc.manualClock.UnixNano, time.Nanosecond)) 1288 cfg.TestingKnobs.DisableAutomaticLeaseRenewal = true 1289 // Disable raft log truncation which confuses this test. 1290 cfg.TestingKnobs.DisableRaftLogQueue = true 1291 tc.StartWithStoreConfig(t, stopper, cfg) 1292 1293 secondReplica, err := tc.addBogusReplicaToRangeDesc(context.Background()) 1294 if err != nil { 1295 t.Fatal(err) 1296 } 1297 1298 tc.manualClock.Set(leaseExpiry(tc.repl)) 1299 now := tc.Clock().Now() 1300 1301 testCases := []struct { 1302 storeID roachpb.StoreID 1303 start hlc.Timestamp 1304 expiration hlc.Timestamp 1305 expLowWater int64 // 0 for not expecting anything 1306 expErr string 1307 }{ 1308 // Grant the lease fresh. 1309 {storeID: tc.store.StoreID(), 1310 start: now, expiration: now.Add(10, 0)}, 1311 // Renew the lease. 1312 {storeID: tc.store.StoreID(), 1313 start: now.Add(15, 0), expiration: now.Add(30, 0)}, 1314 // Renew the lease but shorten expiration. This is silently ignored. 1315 {storeID: tc.store.StoreID(), 1316 start: now.Add(16, 0), expiration: now.Add(25, 0)}, 1317 // Another Store attempts to get the lease, but overlaps. If the 1318 // previous lease expiration had worked, this would have too. 1319 {storeID: secondReplica.StoreID, 1320 start: now.Add(29, 0), expiration: now.Add(50, 0), 1321 expErr: "overlaps previous"}, 1322 // The other store tries again, this time without the overlap. 1323 {storeID: secondReplica.StoreID, 1324 start: now.Add(31, 0), expiration: now.Add(50, 0), 1325 // The cache now moves to this other store, and we can't query that. 1326 expLowWater: 0}, 1327 // Lease is regranted to this store. The low-water mark is updated to the 1328 // beginning of the lease. 1329 {storeID: tc.store.StoreID(), 1330 start: now.Add(60, 0), expiration: now.Add(70, 0), 1331 // We expect 50, not 60, because the new lease is wound back to the end 1332 // of the previous lease. 1333 expLowWater: now.Add(50, 0).WallTime}, 1334 } 1335 1336 for i, test := range testCases { 1337 if err := sendLeaseRequest(tc.repl, &roachpb.Lease{ 1338 Start: test.start, 1339 Expiration: test.expiration.Clone(), 1340 Replica: roachpb.ReplicaDescriptor{ 1341 ReplicaID: roachpb.ReplicaID(test.storeID), 1342 NodeID: roachpb.NodeID(test.storeID), 1343 StoreID: test.storeID, 1344 }, 1345 }); !testutils.IsError(err, test.expErr) { 1346 t.Fatalf("%d: unexpected error %v", i, err) 1347 } 1348 // Verify expected low water mark. 1349 rTS, _ := tc.repl.store.tsCache.GetMax(roachpb.Key("a"), nil /* end */) 1350 1351 if test.expLowWater == 0 { 1352 continue 1353 } 1354 if rTS.WallTime != test.expLowWater { 1355 t.Errorf("%d: expected low water %d; got max=%d", i, test.expLowWater, rTS.WallTime) 1356 } 1357 } 1358 } 1359 1360 // TestReplicaLeaseRejectUnknownRaftNodeID ensures that a replica cannot 1361 // obtain the range lease if it is not part of the current range descriptor. 1362 // TODO(mrtracy): This should probably be tested in client_raft_test package, 1363 // using a real second store. 1364 func TestReplicaLeaseRejectUnknownRaftNodeID(t *testing.T) { 1365 defer leaktest.AfterTest(t)() 1366 1367 stopper := stop.NewStopper() 1368 defer stopper.Stop(context.Background()) 1369 1370 tc := testContext{manualClock: hlc.NewManualClock(123)} 1371 cfg := TestStoreConfig(hlc.NewClock(tc.manualClock.UnixNano, time.Nanosecond)) 1372 cfg.TestingKnobs.DisableAutomaticLeaseRenewal = true 1373 tc.StartWithStoreConfig(t, stopper, cfg) 1374 1375 tc.manualClock.Set(leaseExpiry(tc.repl)) 1376 now := tc.Clock().Now() 1377 lease := &roachpb.Lease{ 1378 Start: now, 1379 Expiration: now.Add(10, 0).Clone(), 1380 Replica: roachpb.ReplicaDescriptor{ 1381 ReplicaID: 2, 1382 NodeID: 2, 1383 StoreID: 2, 1384 }, 1385 } 1386 exLease, _ := tc.repl.GetLease() 1387 ba := roachpb.BatchRequest{} 1388 ba.Timestamp = tc.repl.store.Clock().Now() 1389 ba.Add(&roachpb.RequestLeaseRequest{Lease: *lease}) 1390 ch, _, _, pErr := tc.repl.evalAndPropose(context.Background(), &ba, allSpansGuard(), &exLease) 1391 if pErr == nil { 1392 // Next if the command was committed, wait for the range to apply it. 1393 // TODO(bdarnell): refactor to a more conventional error-handling pattern. 1394 // Remove ambiguity about where the "replica not found" error comes from. 1395 pErr = (<-ch).Err 1396 } 1397 if !testutils.IsPError(pErr, "replica.*not found") { 1398 t.Errorf("unexpected error obtaining lease for invalid store: %v", pErr) 1399 } 1400 } 1401 1402 // TestReplicaDrainLease makes sure that no new leases are granted when 1403 // the Store is draining. 1404 func TestReplicaDrainLease(t *testing.T) { 1405 defer leaktest.AfterTest(t)() 1406 tc := testContext{} 1407 stopper := stop.NewStopper() 1408 defer stopper.Stop(context.Background()) 1409 tc.Start(t, stopper) 1410 1411 // Acquire initial lease. 1412 ctx := context.Background() 1413 status, pErr := tc.repl.redirectOnOrAcquireLease(ctx) 1414 if pErr != nil { 1415 t.Fatal(pErr) 1416 } 1417 1418 tc.store.SetDraining(true, nil /* reporter */) 1419 tc.repl.mu.Lock() 1420 pErr = <-tc.repl.requestLeaseLocked(ctx, status).C() 1421 tc.repl.mu.Unlock() 1422 _, ok := pErr.GetDetail().(*roachpb.NotLeaseHolderError) 1423 if !ok { 1424 t.Fatalf("expected NotLeaseHolderError, not %v", pErr) 1425 } 1426 tc.store.SetDraining(false, nil /* reporter */) 1427 // Newly undrained, leases work again. 1428 if _, pErr := tc.repl.redirectOnOrAcquireLease(ctx); pErr != nil { 1429 t.Fatal(pErr) 1430 } 1431 } 1432 1433 // TestReplicaGossipFirstRange verifies that the first range gossips its 1434 // location and the cluster ID. 1435 func TestReplicaGossipFirstRange(t *testing.T) { 1436 defer leaktest.AfterTest(t)() 1437 tc := testContext{} 1438 stopper := stop.NewStopper() 1439 defer stopper.Stop(context.Background()) 1440 tc.Start(t, stopper) 1441 for _, key := range []string{gossip.KeyClusterID, gossip.KeyFirstRangeDescriptor, gossip.KeySentinel} { 1442 bytes, err := tc.gossip.GetInfo(key) 1443 if err != nil { 1444 t.Errorf("missing first range gossip of key %s", key) 1445 } 1446 if key == gossip.KeyFirstRangeDescriptor { 1447 var rangeDesc roachpb.RangeDescriptor 1448 if err := protoutil.Unmarshal(bytes, &rangeDesc); err != nil { 1449 t.Fatal(err) 1450 } 1451 } 1452 if key == gossip.KeyClusterID && len(bytes) == 0 { 1453 t.Errorf("expected non-empty gossiped cluster ID, got %q", bytes) 1454 } 1455 if key == gossip.KeySentinel && len(bytes) == 0 { 1456 t.Errorf("expected non-empty gossiped sentinel, got %q", bytes) 1457 } 1458 } 1459 } 1460 1461 // TestReplicaGossipAllConfigs verifies that all config types are gossiped. 1462 func TestReplicaGossipAllConfigs(t *testing.T) { 1463 defer leaktest.AfterTest(t)() 1464 tc := testContext{} 1465 stopper := stop.NewStopper() 1466 defer stopper.Stop(context.Background()) 1467 tc.Start(t, stopper) 1468 if cfg := tc.gossip.GetSystemConfig(); cfg == nil { 1469 t.Fatal("config not set") 1470 } 1471 } 1472 1473 // TestReplicaNoGossipConfig verifies that certain commands (e.g., 1474 // reads, writes in uncommitted transactions) do not trigger gossip. 1475 func TestReplicaNoGossipConfig(t *testing.T) { 1476 defer leaktest.AfterTest(t)() 1477 tc := testContext{} 1478 stopper := stop.NewStopper() 1479 defer stopper.Stop(context.Background()) 1480 tc.Start(t, stopper) 1481 1482 // Write some arbitrary data in the system span (up to, but not including MaxReservedID+1) 1483 key := keys.SystemSQLCodec.TablePrefix(keys.MaxReservedDescID) 1484 1485 txn := newTransaction("test", key, 1 /* userPriority */, tc.Clock()) 1486 h := roachpb.Header{Txn: txn} 1487 req1 := putArgs(key, []byte("foo")) 1488 req2, _ := endTxnArgs(txn, true /* commit */) 1489 req2.LockSpans = []roachpb.Span{{Key: key}} 1490 req3 := getArgs(key) 1491 1492 testCases := []struct { 1493 req roachpb.Request 1494 h roachpb.Header 1495 }{ 1496 {&req1, h}, 1497 {&req2, h}, 1498 {&req3, roachpb.Header{}}, 1499 } 1500 1501 for i, test := range testCases { 1502 assignSeqNumsForReqs(txn, test.req) 1503 if _, pErr := kv.SendWrappedWith(context.Background(), tc.Sender(), test.h, test.req); pErr != nil { 1504 t.Fatal(pErr) 1505 } 1506 1507 // System config is not gossiped. 1508 cfg := tc.gossip.GetSystemConfig() 1509 if cfg == nil { 1510 t.Fatal("config not set") 1511 } 1512 if len(cfg.Values) != 0 { 1513 t.Errorf("System config was gossiped at #%d", i) 1514 } 1515 } 1516 } 1517 1518 // TestReplicaNoGossipFromNonLeader verifies that a non-lease holder replica 1519 // does not gossip configurations. 1520 func TestReplicaNoGossipFromNonLeader(t *testing.T) { 1521 defer leaktest.AfterTest(t)() 1522 tc := testContext{} 1523 stopper := stop.NewStopper() 1524 defer stopper.Stop(context.Background()) 1525 tc.Start(t, stopper) 1526 1527 // Write some arbitrary data in the system span (up to, but not including MaxReservedID+1) 1528 key := keys.SystemSQLCodec.TablePrefix(keys.MaxReservedDescID) 1529 1530 txn := newTransaction("test", key, 1 /* userPriority */, tc.Clock()) 1531 req1 := putArgs(key, nil) 1532 1533 assignSeqNumsForReqs(txn, &req1) 1534 if _, pErr := kv.SendWrappedWith(context.Background(), tc.Sender(), roachpb.Header{ 1535 Txn: txn, 1536 }, &req1); pErr != nil { 1537 t.Fatal(pErr) 1538 } 1539 1540 req2, h := endTxnArgs(txn, true /* commit */) 1541 req2.LockSpans = []roachpb.Span{{Key: key}} 1542 assignSeqNumsForReqs(txn, &req2) 1543 if _, pErr := tc.SendWrappedWith(h, &req2); pErr != nil { 1544 t.Fatal(pErr) 1545 } 1546 // Execute a get to resolve the intent. 1547 req3 := getArgs(key) 1548 if _, pErr := tc.SendWrappedWith(roachpb.Header{Timestamp: txn.WriteTimestamp}, &req3); pErr != nil { 1549 t.Fatal(pErr) 1550 } 1551 1552 // Increment the clock's timestamp to expire the range lease. 1553 tc.manualClock.Set(leaseExpiry(tc.repl)) 1554 lease, _ := tc.repl.GetLease() 1555 if tc.repl.leaseStatus(lease, tc.Clock().Now(), hlc.Timestamp{}).State != kvserverpb.LeaseState_EXPIRED { 1556 t.Fatal("range lease should have been expired") 1557 } 1558 1559 // Make sure the information for db1 is not gossiped. Since obtaining 1560 // a lease updates the gossiped information, we do that. 1561 if _, pErr := tc.repl.redirectOnOrAcquireLease(context.Background()); pErr != nil { 1562 t.Fatal(pErr) 1563 } 1564 // Fetch the raw gossip info. GetSystemConfig is based on callbacks at 1565 // modification time. But we're checking for _not_ gossiped, so there should 1566 // be no callbacks. Easier to check the raw info. 1567 var cfg config.SystemConfigEntries 1568 err := tc.gossip.GetInfoProto(gossip.KeySystemConfig, &cfg) 1569 if err != nil { 1570 t.Fatal(err) 1571 } 1572 if len(cfg.Values) != 0 { 1573 t.Fatalf("non-lease holder gossiped the system config") 1574 } 1575 } 1576 1577 func getArgs(key []byte) roachpb.GetRequest { 1578 return roachpb.GetRequest{ 1579 RequestHeader: roachpb.RequestHeader{ 1580 Key: key, 1581 }, 1582 } 1583 } 1584 1585 func putArgs(key roachpb.Key, value []byte) roachpb.PutRequest { 1586 return roachpb.PutRequest{ 1587 RequestHeader: roachpb.RequestHeader{ 1588 Key: key, 1589 }, 1590 Value: roachpb.MakeValueFromBytes(value), 1591 } 1592 } 1593 1594 func cPutArgs(key roachpb.Key, value, expValue []byte) roachpb.ConditionalPutRequest { 1595 var optExpV *roachpb.Value 1596 if expValue != nil { 1597 expV := roachpb.MakeValueFromBytes(expValue) 1598 optExpV = &expV 1599 } 1600 return roachpb.ConditionalPutRequest{ 1601 RequestHeader: roachpb.RequestHeader{ 1602 Key: key, 1603 }, 1604 Value: roachpb.MakeValueFromBytes(value), 1605 ExpValue: optExpV, 1606 } 1607 } 1608 1609 func iPutArgs(key roachpb.Key, value []byte) roachpb.InitPutRequest { 1610 return roachpb.InitPutRequest{ 1611 RequestHeader: roachpb.RequestHeader{ 1612 Key: key, 1613 }, 1614 Value: roachpb.MakeValueFromBytes(value), 1615 } 1616 } 1617 1618 func deleteArgs(key roachpb.Key) roachpb.DeleteRequest { 1619 return roachpb.DeleteRequest{ 1620 RequestHeader: roachpb.RequestHeader{ 1621 Key: key, 1622 }, 1623 } 1624 } 1625 1626 func deleteRangeArgs(key, endKey roachpb.Key) roachpb.DeleteRangeRequest { 1627 return roachpb.DeleteRangeRequest{ 1628 RequestHeader: roachpb.RequestHeader{ 1629 Key: key, 1630 EndKey: endKey, 1631 }, 1632 } 1633 } 1634 1635 // readOrWriteArgs returns either get or put arguments depending on 1636 // value of "read". Get for true; Put for false. 1637 func readOrWriteArgs(key roachpb.Key, read bool) roachpb.Request { 1638 if read { 1639 gArgs := getArgs(key) 1640 return &gArgs 1641 } 1642 pArgs := putArgs(key, []byte("value")) 1643 return &pArgs 1644 } 1645 1646 func incrementArgs(key []byte, inc int64) *roachpb.IncrementRequest { 1647 return &roachpb.IncrementRequest{ 1648 RequestHeader: roachpb.RequestHeader{ 1649 Key: key, 1650 }, 1651 Increment: inc, 1652 } 1653 } 1654 1655 func scanArgsString(s, e string) *roachpb.ScanRequest { 1656 return &roachpb.ScanRequest{ 1657 RequestHeader: roachpb.RequestHeader{Key: roachpb.Key(s), EndKey: roachpb.Key(e)}, 1658 } 1659 } 1660 1661 func getArgsString(k string) *roachpb.GetRequest { 1662 return &roachpb.GetRequest{ 1663 RequestHeader: roachpb.RequestHeader{Key: roachpb.Key(k)}, 1664 } 1665 } 1666 1667 func scanArgs(start, end []byte) *roachpb.ScanRequest { 1668 return &roachpb.ScanRequest{ 1669 RequestHeader: roachpb.RequestHeader{ 1670 Key: start, 1671 EndKey: end, 1672 }, 1673 } 1674 } 1675 1676 func revScanArgsString(s, e string) *roachpb.ReverseScanRequest { 1677 return &roachpb.ReverseScanRequest{ 1678 RequestHeader: roachpb.RequestHeader{Key: roachpb.Key(s), EndKey: roachpb.Key(e)}, 1679 } 1680 } 1681 1682 func revScanArgs(start, end []byte) *roachpb.ReverseScanRequest { 1683 return &roachpb.ReverseScanRequest{ 1684 RequestHeader: roachpb.RequestHeader{ 1685 Key: start, 1686 EndKey: end, 1687 }, 1688 } 1689 } 1690 1691 func heartbeatArgs( 1692 txn *roachpb.Transaction, now hlc.Timestamp, 1693 ) (roachpb.HeartbeatTxnRequest, roachpb.Header) { 1694 return roachpb.HeartbeatTxnRequest{ 1695 RequestHeader: roachpb.RequestHeader{ 1696 Key: txn.Key, 1697 }, 1698 Now: now, 1699 }, roachpb.Header{Txn: txn} 1700 } 1701 1702 // endTxnArgs creates a EndTxnRequest. By leaving the Sequence field 0, the 1703 // request will not qualify for 1PC. 1704 func endTxnArgs(txn *roachpb.Transaction, commit bool) (roachpb.EndTxnRequest, roachpb.Header) { 1705 return roachpb.EndTxnRequest{ 1706 RequestHeader: roachpb.RequestHeader{ 1707 Key: txn.Key, // not allowed when going through TxnCoordSender, but we're not 1708 }, 1709 Commit: commit, 1710 }, roachpb.Header{Txn: txn} 1711 } 1712 1713 func pushTxnArgs( 1714 pusher, pushee *roachpb.Transaction, pushType roachpb.PushTxnType, 1715 ) roachpb.PushTxnRequest { 1716 return roachpb.PushTxnRequest{ 1717 RequestHeader: roachpb.RequestHeader{ 1718 Key: pushee.Key, 1719 }, 1720 PushTo: pusher.WriteTimestamp.Next(), 1721 PusherTxn: *pusher, 1722 PusheeTxn: pushee.TxnMeta, 1723 PushType: pushType, 1724 } 1725 } 1726 1727 func recoverTxnArgs(txn *roachpb.Transaction, implicitlyCommitted bool) roachpb.RecoverTxnRequest { 1728 return roachpb.RecoverTxnRequest{ 1729 RequestHeader: roachpb.RequestHeader{ 1730 Key: txn.Key, 1731 }, 1732 Txn: txn.TxnMeta, 1733 ImplicitlyCommitted: implicitlyCommitted, 1734 } 1735 } 1736 1737 func queryTxnArgs(txn enginepb.TxnMeta, waitForUpdate bool) roachpb.QueryTxnRequest { 1738 return roachpb.QueryTxnRequest{ 1739 RequestHeader: roachpb.RequestHeader{ 1740 Key: txn.Key, 1741 }, 1742 Txn: txn, 1743 WaitForUpdate: waitForUpdate, 1744 } 1745 } 1746 1747 func queryIntentArgs( 1748 key []byte, txn enginepb.TxnMeta, errIfMissing bool, 1749 ) roachpb.QueryIntentRequest { 1750 return roachpb.QueryIntentRequest{ 1751 RequestHeader: roachpb.RequestHeader{ 1752 Key: key, 1753 }, 1754 Txn: txn, 1755 ErrorIfMissing: errIfMissing, 1756 } 1757 } 1758 1759 func resolveIntentRangeArgsString( 1760 s, e string, txn enginepb.TxnMeta, status roachpb.TransactionStatus, 1761 ) *roachpb.ResolveIntentRangeRequest { 1762 return &roachpb.ResolveIntentRangeRequest{ 1763 RequestHeader: roachpb.RequestHeader{Key: roachpb.Key(s), EndKey: roachpb.Key(e)}, 1764 IntentTxn: txn, 1765 Status: status, 1766 } 1767 } 1768 1769 func internalMergeArgs(key []byte, value roachpb.Value) roachpb.MergeRequest { 1770 return roachpb.MergeRequest{ 1771 RequestHeader: roachpb.RequestHeader{ 1772 Key: key, 1773 }, 1774 Value: value, 1775 } 1776 } 1777 1778 func truncateLogArgs(index uint64, rangeID roachpb.RangeID) roachpb.TruncateLogRequest { 1779 return roachpb.TruncateLogRequest{ 1780 Index: index, 1781 RangeID: rangeID, 1782 } 1783 } 1784 1785 func gcKey(key roachpb.Key, timestamp hlc.Timestamp) roachpb.GCRequest_GCKey { 1786 return roachpb.GCRequest_GCKey{ 1787 Key: key, 1788 Timestamp: timestamp, 1789 } 1790 } 1791 1792 func gcArgs(startKey []byte, endKey []byte, keys ...roachpb.GCRequest_GCKey) roachpb.GCRequest { 1793 return roachpb.GCRequest{ 1794 RequestHeader: roachpb.RequestHeader{ 1795 Key: startKey, 1796 EndKey: endKey, 1797 }, 1798 Keys: keys, 1799 } 1800 } 1801 1802 // TestOptimizePuts verifies that contiguous runs of puts and 1803 // conditional puts are marked as "blind" if they're written 1804 // to a virgin keyspace. 1805 func TestOptimizePuts(t *testing.T) { 1806 defer leaktest.AfterTest(t)() 1807 tc := testContext{} 1808 stopper := stop.NewStopper() 1809 defer stopper.Stop(context.Background()) 1810 tc.Start(t, stopper) 1811 1812 pArgs := make([]roachpb.PutRequest, optimizePutThreshold) 1813 cpArgs := make([]roachpb.ConditionalPutRequest, optimizePutThreshold) 1814 ipArgs := make([]roachpb.InitPutRequest, optimizePutThreshold) 1815 for i := 0; i < optimizePutThreshold; i++ { 1816 pArgs[i] = putArgs([]byte(fmt.Sprintf("%02d", i)), []byte("1")) 1817 cpArgs[i] = cPutArgs([]byte(fmt.Sprintf("%02d", i)), []byte("1"), []byte("0")) 1818 ipArgs[i] = iPutArgs([]byte(fmt.Sprintf("%02d", i)), []byte("1")) 1819 } 1820 incArgs := incrementArgs([]byte("inc"), 1) 1821 1822 testCases := []struct { 1823 exKey roachpb.Key 1824 reqs []roachpb.Request 1825 expBlind []bool 1826 }{ 1827 // No existing keys, single put. 1828 { 1829 nil, 1830 []roachpb.Request{ 1831 &pArgs[0], 1832 }, 1833 []bool{ 1834 false, 1835 }, 1836 }, 1837 // No existing keys, nine puts. 1838 { 1839 nil, 1840 []roachpb.Request{ 1841 &pArgs[0], &pArgs[1], &pArgs[2], &pArgs[3], &pArgs[4], &pArgs[5], &pArgs[6], &pArgs[7], &pArgs[8], 1842 }, 1843 []bool{ 1844 false, false, false, false, false, false, false, false, false, 1845 }, 1846 }, 1847 // No existing keys, ten puts. 1848 { 1849 nil, 1850 []roachpb.Request{ 1851 &pArgs[0], &pArgs[1], &pArgs[2], &pArgs[3], &pArgs[4], &pArgs[5], &pArgs[6], &pArgs[7], &pArgs[8], &pArgs[9], 1852 }, 1853 []bool{ 1854 true, true, true, true, true, true, true, true, true, true, 1855 }, 1856 }, 1857 // Existing key at "0", ten conditional puts. 1858 { 1859 roachpb.Key("0"), 1860 []roachpb.Request{ 1861 &cpArgs[0], &cpArgs[1], &cpArgs[2], &cpArgs[3], &cpArgs[4], &cpArgs[5], &cpArgs[6], &cpArgs[7], &cpArgs[8], &cpArgs[9], 1862 }, 1863 []bool{ 1864 true, true, true, true, true, true, true, true, true, true, 1865 }, 1866 }, 1867 // Existing key at "0", ten init puts. 1868 { 1869 roachpb.Key("0"), 1870 []roachpb.Request{ 1871 &ipArgs[0], &ipArgs[1], &ipArgs[2], &ipArgs[3], &ipArgs[4], &ipArgs[5], &ipArgs[6], &ipArgs[7], &ipArgs[8], &ipArgs[9], 1872 }, 1873 []bool{ 1874 true, true, true, true, true, true, true, true, true, true, 1875 }, 1876 }, 1877 // Existing key at 11, mixed put types. 1878 { 1879 roachpb.Key("11"), 1880 []roachpb.Request{ 1881 &pArgs[0], &cpArgs[1], &pArgs[2], &cpArgs[3], &ipArgs[4], &ipArgs[5], &pArgs[6], &cpArgs[7], &pArgs[8], &ipArgs[9], 1882 }, 1883 []bool{ 1884 true, true, true, true, true, true, true, true, true, true, 1885 }, 1886 }, 1887 // Existing key at 00, ten puts, expect nothing blind. 1888 { 1889 roachpb.Key("00"), 1890 []roachpb.Request{ 1891 &pArgs[0], &pArgs[1], &pArgs[2], &pArgs[3], &pArgs[4], &pArgs[5], &pArgs[6], &pArgs[7], &pArgs[8], &pArgs[9], 1892 }, 1893 []bool{ 1894 false, false, false, false, false, false, false, false, false, false, 1895 }, 1896 }, 1897 // Existing key at 00, ten puts in reverse order, expect nothing blind. 1898 { 1899 roachpb.Key("00"), 1900 []roachpb.Request{ 1901 &pArgs[9], &pArgs[8], &pArgs[7], &pArgs[6], &pArgs[5], &pArgs[4], &pArgs[3], &pArgs[2], &pArgs[1], &pArgs[0], 1902 }, 1903 []bool{ 1904 false, false, false, false, false, false, false, false, false, false, 1905 }, 1906 }, 1907 // Existing key at 05, ten puts, expect first five puts are blind. 1908 { 1909 roachpb.Key("05"), 1910 []roachpb.Request{ 1911 &pArgs[0], &pArgs[1], &pArgs[2], &pArgs[3], &pArgs[4], &pArgs[5], &pArgs[6], &pArgs[7], &pArgs[8], &pArgs[9], 1912 }, 1913 []bool{ 1914 true, true, true, true, true, false, false, false, false, false, 1915 }, 1916 }, 1917 // Existing key at 09, ten puts, expect first nine puts are blind. 1918 { 1919 roachpb.Key("09"), 1920 []roachpb.Request{ 1921 &pArgs[0], &pArgs[1], &pArgs[2], &pArgs[3], &pArgs[4], &pArgs[5], &pArgs[6], &pArgs[7], &pArgs[8], &pArgs[9], 1922 }, 1923 []bool{ 1924 true, true, true, true, true, true, true, true, true, false, 1925 }, 1926 }, 1927 // No existing key, ten puts + inc + ten cputs. 1928 { 1929 nil, 1930 []roachpb.Request{ 1931 &pArgs[0], &pArgs[1], &pArgs[2], &pArgs[3], &pArgs[4], &pArgs[5], &pArgs[6], &pArgs[7], &pArgs[8], &pArgs[9], 1932 incArgs, &cpArgs[0], &cpArgs[1], &cpArgs[2], &cpArgs[3], &cpArgs[4], &cpArgs[5], &cpArgs[6], &cpArgs[7], &cpArgs[8], &cpArgs[9], 1933 }, 1934 []bool{ 1935 true, true, true, true, true, true, true, true, true, true, 1936 false, false, false, false, false, false, false, false, false, false, false, 1937 }, 1938 }, 1939 // Duplicate put at 11th key; should see ten puts. 1940 { 1941 nil, 1942 []roachpb.Request{ 1943 &pArgs[0], &pArgs[1], &pArgs[2], &pArgs[3], &pArgs[4], &pArgs[5], &pArgs[6], &pArgs[7], &pArgs[8], &pArgs[9], &pArgs[9], 1944 }, 1945 []bool{ 1946 true, true, true, true, true, true, true, true, true, true, false, 1947 }, 1948 }, 1949 // Duplicate cput at 11th key; should see ten puts. 1950 { 1951 nil, 1952 []roachpb.Request{ 1953 &pArgs[0], &pArgs[1], &pArgs[2], &pArgs[3], &pArgs[4], &pArgs[5], &pArgs[6], &pArgs[7], &pArgs[8], &pArgs[9], &cpArgs[9], 1954 }, 1955 []bool{ 1956 true, true, true, true, true, true, true, true, true, true, false, 1957 }, 1958 }, 1959 // Duplicate iput at 11th key; should see ten puts. 1960 { 1961 nil, 1962 []roachpb.Request{ 1963 &pArgs[0], &pArgs[1], &pArgs[2], &pArgs[3], &pArgs[4], &pArgs[5], &pArgs[6], &pArgs[7], &pArgs[8], &pArgs[9], &ipArgs[9], 1964 }, 1965 []bool{ 1966 true, true, true, true, true, true, true, true, true, true, false, 1967 }, 1968 }, 1969 // Duplicate cput at 10th key; should see ten cputs. 1970 { 1971 nil, 1972 []roachpb.Request{ 1973 &cpArgs[0], &cpArgs[1], &cpArgs[2], &cpArgs[3], &cpArgs[4], &cpArgs[5], &cpArgs[6], &cpArgs[7], &cpArgs[8], &cpArgs[9], &cpArgs[9], 1974 }, 1975 []bool{ 1976 true, true, true, true, true, true, true, true, true, true, false, 1977 }, 1978 }, 1979 } 1980 1981 for i, c := range testCases { 1982 if c.exKey != nil { 1983 if err := storage.MVCCPut(context.Background(), tc.engine, nil, c.exKey, 1984 hlc.Timestamp{}, roachpb.MakeValueFromString("foo"), nil); err != nil { 1985 t.Fatal(err) 1986 } 1987 } 1988 batch := roachpb.BatchRequest{} 1989 for _, r := range c.reqs { 1990 batch.Add(r) 1991 } 1992 // Make a deep clone of the requests slice. We need a deep clone 1993 // because the regression which is prevented here changed data on the 1994 // individual requests, and not the slice. 1995 goldenRequests := append([]roachpb.RequestUnion(nil), batch.Requests...) 1996 for i := range goldenRequests { 1997 clone := protoutil.Clone(goldenRequests[i].GetInner()).(roachpb.Request) 1998 goldenRequests[i].MustSetInner(clone) 1999 } 2000 // Save the original slice, allowing us to assert that it doesn't 2001 // change when it is passed to optimizePuts. 2002 oldRequests := batch.Requests 2003 batch.Requests = optimizePuts(tc.engine, batch.Requests, false) 2004 if !reflect.DeepEqual(goldenRequests, oldRequests) { 2005 t.Fatalf("%d: optimizePuts mutated the original request slice: %s", 2006 i, pretty.Diff(goldenRequests, oldRequests), 2007 ) 2008 } 2009 2010 blind := []bool{} 2011 for _, r := range batch.Requests { 2012 switch t := r.GetInner().(type) { 2013 case *roachpb.PutRequest: 2014 blind = append(blind, t.Blind) 2015 t.Blind = false 2016 case *roachpb.ConditionalPutRequest: 2017 blind = append(blind, t.Blind) 2018 t.Blind = false 2019 case *roachpb.InitPutRequest: 2020 blind = append(blind, t.Blind) 2021 t.Blind = false 2022 default: 2023 blind = append(blind, false) 2024 } 2025 } 2026 if !reflect.DeepEqual(blind, c.expBlind) { 2027 t.Errorf("%d: expected %+v; got %+v", i, c.expBlind, blind) 2028 } 2029 if c.exKey != nil { 2030 if err := tc.engine.Clear(storage.MakeMVCCMetadataKey(c.exKey)); err != nil { 2031 t.Fatal(err) 2032 } 2033 } 2034 } 2035 } 2036 2037 // TestAcquireLease verifies that the range lease is acquired 2038 // for read and write methods, and eagerly renewed. 2039 func TestAcquireLease(t *testing.T) { 2040 defer leaktest.AfterTest(t)() 2041 2042 gArgs := getArgs([]byte("a")) 2043 pArgs := putArgs([]byte("b"), []byte("1")) 2044 2045 for _, test := range []roachpb.Request{ 2046 &gArgs, 2047 &pArgs, 2048 } { 2049 t.Run("", func(t *testing.T) { 2050 testutils.RunTrueAndFalse(t, "withMinLeaseProposedTS", func(t *testing.T, withMinLeaseProposedTS bool) { 2051 tc := testContext{} 2052 stopper := stop.NewStopper() 2053 defer stopper.Stop(context.Background()) 2054 tc.Start(t, stopper) 2055 2056 lease, _ := tc.repl.GetLease() 2057 2058 // This is a single-replica test; since we're automatically pushing back 2059 // the start of a lease as far as possible, and since there is an auto- 2060 // matic lease for us at the beginning, we'll basically create a lease 2061 // from then on. That is, unless the minLeaseProposedTS which gets set 2062 // automatically at server start forces us to get a new lease. We 2063 // simulate both cases. 2064 var expStart hlc.Timestamp 2065 2066 tc.repl.mu.Lock() 2067 if !withMinLeaseProposedTS { 2068 tc.repl.mu.minLeaseProposedTS = hlc.Timestamp{} 2069 expStart = lease.Start 2070 } else { 2071 expStart = tc.repl.mu.minLeaseProposedTS 2072 } 2073 tc.repl.mu.Unlock() 2074 2075 tc.manualClock.Set(leaseExpiry(tc.repl)) 2076 2077 ts := tc.Clock().Now().Next() 2078 if _, pErr := tc.SendWrappedWith(roachpb.Header{Timestamp: ts}, test); pErr != nil { 2079 t.Error(pErr) 2080 } 2081 if held, expired := hasLease(tc.repl, ts); !held || expired { 2082 t.Errorf("expected lease acquisition") 2083 } 2084 lease, _ = tc.repl.GetLease() 2085 if lease.Start != expStart { 2086 t.Errorf("unexpected lease start: %s; expected %s", lease.Start, expStart) 2087 } 2088 2089 if *lease.DeprecatedStartStasis != *lease.Expiration { 2090 t.Errorf("%s already in stasis (or beyond): %+v", ts, lease) 2091 } 2092 if lease.Expiration.LessEq(ts) { 2093 t.Errorf("%s already expired: %+v", ts, lease) 2094 } 2095 2096 shouldRenewTS := lease.Expiration.Add(-1, 0) 2097 tc.manualClock.Set(shouldRenewTS.WallTime + 1) 2098 if _, pErr := tc.SendWrapped(test); pErr != nil { 2099 t.Error(pErr) 2100 } 2101 // Since the command we sent above does not get blocked on the lease 2102 // extension, we need to wait for it to go through. 2103 testutils.SucceedsSoon(t, func() error { 2104 newLease, _ := tc.repl.GetLease() 2105 if newLease.Expiration.LessEq(*lease.Expiration) { 2106 return errors.Errorf("lease did not get extended: %+v to %+v", lease, newLease) 2107 } 2108 return nil 2109 }) 2110 }) 2111 }) 2112 } 2113 } 2114 2115 // TestLeaseConcurrent requests the lease multiple times, all of which 2116 // will join the same LeaseRequest command. This exercises the cloning of 2117 // the *roachpb.Error to ensure that each requestor gets a distinct 2118 // error object (which prevents regression of #6111) 2119 func TestLeaseConcurrent(t *testing.T) { 2120 defer leaktest.AfterTest(t)() 2121 const num = 5 2122 2123 const origMsg = "boom" 2124 testutils.RunTrueAndFalse(t, "withError", func(t *testing.T, withError bool) { 2125 stopper := stop.NewStopper() 2126 defer stopper.Stop(context.Background()) 2127 2128 var seen int32 2129 var active int32 2130 var wg sync.WaitGroup 2131 wg.Add(num) 2132 2133 tc := testContext{manualClock: hlc.NewManualClock(123)} 2134 cfg := TestStoreConfig(hlc.NewClock(tc.manualClock.UnixNano, time.Nanosecond)) 2135 // Disable reasonNewLeader and reasonNewLeaderOrConfigChange proposal 2136 // refreshes so that our lease proposal does not risk being rejected 2137 // with an AmbiguousResultError. 2138 cfg.TestingKnobs.DisableRefreshReasonNewLeader = true 2139 cfg.TestingKnobs.DisableRefreshReasonNewLeaderOrConfigChange = true 2140 cfg.TestingKnobs.TestingProposalFilter = func(args kvserverbase.ProposalFilterArgs) *roachpb.Error { 2141 ll, ok := args.Req.Requests[0].GetInner().(*roachpb.RequestLeaseRequest) 2142 if !ok || atomic.LoadInt32(&active) == 0 { 2143 return nil 2144 } 2145 if c := atomic.AddInt32(&seen, 1); c > 1 { 2146 // Morally speaking, this is an error, but reproposals can 2147 // happen and so we warn (in case this trips the test up 2148 // in more unexpected ways). 2149 log.Infof(context.Background(), "reproposal of %+v", ll) 2150 } 2151 // Wait for all lease requests to join the same LeaseRequest command. 2152 wg.Wait() 2153 if withError { 2154 return roachpb.NewErrorf(origMsg) 2155 } 2156 return nil 2157 } 2158 tc.StartWithStoreConfig(t, stopper, cfg) 2159 2160 atomic.StoreInt32(&active, 1) 2161 tc.manualClock.Increment(leaseExpiry(tc.repl)) 2162 ts := tc.Clock().Now() 2163 pErrCh := make(chan *roachpb.Error, num) 2164 for i := 0; i < num; i++ { 2165 if err := stopper.RunAsyncTask(context.Background(), "test", func(ctx context.Context) { 2166 tc.repl.mu.Lock() 2167 status := tc.repl.leaseStatus(*tc.repl.mu.state.Lease, ts, hlc.Timestamp{}) 2168 llHandle := tc.repl.requestLeaseLocked(ctx, status) 2169 tc.repl.mu.Unlock() 2170 wg.Done() 2171 pErr := <-llHandle.C() 2172 // Mutate the errors as we receive them to expose races. 2173 if pErr != nil { 2174 pErr.OriginNode = 0 2175 } 2176 pErrCh <- pErr 2177 }); err != nil { 2178 t.Fatal(err) 2179 } 2180 } 2181 2182 pErrs := make([]*roachpb.Error, num) 2183 for i := range pErrs { 2184 // Make sure all of the responses are in (just so that we can 2185 // mess with the "original" error knowing that all of the 2186 // cloning must have happened by now). 2187 pErrs[i] = <-pErrCh 2188 } 2189 2190 newMsg := "moob" 2191 for i, pErr := range pErrs { 2192 if withError != (pErr != nil) { 2193 t.Errorf("%d: wanted error: %t, got error %v", i, withError, pErr) 2194 } 2195 if testutils.IsPError(pErr, newMsg) { 2196 t.Errorf("%d: errors shared memory: %v", i, pErr) 2197 } else if testutils.IsPError(pErr, origMsg) { 2198 // Mess with anyone holding the same reference. 2199 pErr.Message = newMsg 2200 } else if pErr != nil { 2201 t.Errorf("%d: unexpected error: %s", i, pErr) 2202 } 2203 } 2204 }) 2205 } 2206 2207 // TestReplicaUpdateTSCache verifies that reads and writes update the 2208 // timestamp cache. 2209 func TestReplicaUpdateTSCache(t *testing.T) { 2210 defer leaktest.AfterTest(t)() 2211 tc := testContext{} 2212 stopper := stop.NewStopper() 2213 defer stopper.Stop(context.Background()) 2214 tc.Start(t, stopper) 2215 2216 startNanos := tc.Clock().Now().WallTime 2217 2218 // Set clock to time 1s and do the read. 2219 t0 := 1 * time.Second 2220 tc.manualClock.Set(t0.Nanoseconds()) 2221 gArgs := getArgs([]byte("a")) 2222 2223 if _, pErr := tc.SendWrappedWith(roachpb.Header{Timestamp: tc.Clock().Now()}, &gArgs); pErr != nil { 2224 t.Error(pErr) 2225 } 2226 // Set clock to time 2s for write. 2227 t1 := 2 * time.Second 2228 key := roachpb.Key([]byte("b")) 2229 tc.manualClock.Set(t1.Nanoseconds()) 2230 drArgs := roachpb.NewDeleteRange(key, key.Next(), false /* returnKeys */) 2231 2232 if _, pErr := tc.SendWrappedWith(roachpb.Header{Timestamp: tc.Clock().Now()}, drArgs); pErr != nil { 2233 t.Error(pErr) 2234 } 2235 // Verify the timestamp cache has rTS=1s and wTS=0s for "a". 2236 noID := uuid.UUID{} 2237 rTS, rTxnID := tc.repl.store.tsCache.GetMax(roachpb.Key("a"), nil) 2238 if rTS.WallTime != t0.Nanoseconds() || rTxnID != noID { 2239 t.Errorf("expected rTS=1s but got %s; rTxnID=%s", rTS, rTxnID) 2240 } 2241 // Verify the timestamp cache has rTS=2s for "b". 2242 rTS, rTxnID = tc.repl.store.tsCache.GetMax(roachpb.Key("b"), nil) 2243 if rTS.WallTime != t1.Nanoseconds() || rTxnID != noID { 2244 t.Errorf("expected rTS=2s but got %s; rTxnID=%s", rTS, rTxnID) 2245 } 2246 // Verify another key ("c") has 0sec in timestamp cache. 2247 rTS, rTxnID = tc.repl.store.tsCache.GetMax(roachpb.Key("c"), nil) 2248 if rTS.WallTime != startNanos || rTxnID != noID { 2249 t.Errorf("expected rTS=0s but got %s; rTxnID=%s", rTS, rTxnID) 2250 } 2251 } 2252 2253 // TestReplicaLatching verifies that reads/writes must wait for 2254 // pending commands to complete through Raft before being executed on 2255 // range. 2256 func TestReplicaLatching(t *testing.T) { 2257 defer leaktest.AfterTest(t)() 2258 // Test all four combinations of reads & writes waiting. 2259 testCases := []struct { 2260 cmd1Read, cmd2Read bool 2261 expWait, expLocalWait bool 2262 }{ 2263 // Read/read doesn't wait. 2264 {true, true, false, false}, 2265 // A write doesn't wait for an earlier read (except for local keys). 2266 {true, false, false, true}, 2267 // A read must wait for an earlier write. 2268 {false, true, true, true}, 2269 // Writes always wait for other writes. 2270 {false, false, true, true}, 2271 } 2272 2273 tooLong := 5 * time.Second 2274 2275 uniqueKeyCounter := int32(0) 2276 2277 for _, test := range testCases { 2278 var addReqs []string 2279 if test.cmd1Read { 2280 addReqs = []string{"", "read"} 2281 } else { 2282 addReqs = []string{"", "write"} 2283 } 2284 for _, addReq := range addReqs { 2285 for _, localKey := range []bool{false, true} { 2286 expWait := test.expWait 2287 if localKey { 2288 expWait = test.expLocalWait 2289 } 2290 readWriteLabels := map[bool]string{true: "read", false: "write"} 2291 testName := fmt.Sprintf( 2292 "%s-%s", readWriteLabels[test.cmd1Read], readWriteLabels[test.cmd2Read], 2293 ) 2294 switch addReq { 2295 case "read": 2296 testName += "-addRead" 2297 case "write": 2298 testName += "-addWrite" 2299 } 2300 if localKey { 2301 testName += "-local" 2302 } 2303 key1 := roachpb.Key(fmt.Sprintf("key1-%s", testName)) 2304 key2 := roachpb.Key(fmt.Sprintf("key2-%s", testName)) 2305 if localKey { 2306 key1 = keys.MakeRangeKeyPrefix(roachpb.RKey(key1)) 2307 key2 = keys.MakeRangeKeyPrefix(roachpb.RKey(key2)) 2308 } 2309 t.Run(testName, 2310 func(t *testing.T) { 2311 // Intercept commands matching a specific priority and block them. 2312 const blockingPriority = 42 2313 blockingStart := make(chan struct{}) 2314 blockingDone := make(chan struct{}) 2315 2316 tc := testContext{} 2317 tsc := TestStoreConfig(nil) 2318 tsc.TestingKnobs.EvalKnobs.TestingEvalFilter = 2319 func(filterArgs kvserverbase.FilterArgs) *roachpb.Error { 2320 if filterArgs.Hdr.UserPriority == blockingPriority && filterArgs.Index == 0 { 2321 blockingStart <- struct{}{} 2322 <-blockingDone 2323 } 2324 return nil 2325 } 2326 stopper := stop.NewStopper() 2327 defer stopper.Stop(context.Background()) 2328 tc.StartWithStoreConfig(t, stopper, tsc) 2329 2330 defer close(blockingDone) // make sure teardown can happen 2331 2332 sendWithHeader := func(header roachpb.Header, args roachpb.Request) *roachpb.Error { 2333 ba := roachpb.BatchRequest{} 2334 ba.Header = header 2335 ba.Add(args) 2336 2337 if header.UserPriority == blockingPriority { 2338 switch addReq { 2339 case "read", "write": 2340 // Additional reads and writes to unique keys do not 2341 // cause additional blocking; the read/write nature of 2342 // the keys for latching purposes is determined on a 2343 // per-request basis. 2344 key := roachpb.Key(fmt.Sprintf("unique-key-%s-%d", testName, atomic.AddInt32(&uniqueKeyCounter, 1))) 2345 if addReq == "read" { 2346 req := getArgs(key) 2347 ba.Add(&req) 2348 } else { 2349 req := putArgs(key, []byte{}) 2350 ba.Add(&req) 2351 } 2352 } 2353 } 2354 2355 _, pErr := tc.Sender().Send(context.Background(), ba) 2356 return pErr 2357 } 2358 2359 // Asynchronously put a value to the range with blocking enabled. 2360 cmd1Done := make(chan *roachpb.Error, 1) 2361 if err := stopper.RunAsyncTask(context.Background(), "test", func(_ context.Context) { 2362 args := readOrWriteArgs(key1, test.cmd1Read) 2363 cmd1Done <- sendWithHeader(roachpb.Header{ 2364 UserPriority: blockingPriority, 2365 }, args) 2366 }); err != nil { 2367 t.Fatal(err) 2368 } 2369 // Wait for cmd1 to get acquire latches. 2370 select { 2371 case <-blockingStart: 2372 case <-time.After(tooLong): 2373 t.Fatalf("waited %s for cmd1 to acquire latches", tooLong) 2374 } 2375 2376 // First, try a command for same key as cmd1 to verify whether it blocks. 2377 cmd2Done := make(chan *roachpb.Error, 1) 2378 if err := stopper.RunAsyncTask(context.Background(), "", func(_ context.Context) { 2379 args := readOrWriteArgs(key1, test.cmd2Read) 2380 cmd2Done <- sendWithHeader(roachpb.Header{}, args) 2381 }); err != nil { 2382 t.Fatal(err) 2383 } 2384 2385 // Next, try read for a non-impacted key--should go through immediately. 2386 cmd3Done := make(chan *roachpb.Error, 1) 2387 if err := stopper.RunAsyncTask(context.Background(), "", func(_ context.Context) { 2388 args := readOrWriteArgs(key2, true) 2389 cmd3Done <- sendWithHeader(roachpb.Header{}, args) 2390 }); err != nil { 2391 t.Fatal(err) 2392 } 2393 2394 // Verify that cmd3 finishes quickly no matter what cmds 1 and 2 were. 2395 select { 2396 case pErr := <-cmd3Done: 2397 if pErr != nil { 2398 t.Fatalf("cmd3 failed: %s", pErr) 2399 } 2400 // success. 2401 case pErr := <-cmd1Done: 2402 t.Fatalf("should not have been able execute cmd1 while blocked (pErr: %v)", pErr) 2403 case <-time.After(tooLong): 2404 t.Fatalf("waited %s for cmd3 of key2", tooLong) 2405 } 2406 2407 if expWait { 2408 // Ensure that cmd2 didn't finish while cmd1 is still blocked. 2409 select { 2410 case pErr := <-cmd2Done: 2411 t.Fatalf("should not have been able to execute cmd2 (pErr: %v)", pErr) 2412 case pErr := <-cmd1Done: 2413 t.Fatalf("should not have been able to execute cmd1 while blocked (pErr: %v)", pErr) 2414 default: 2415 // success 2416 } 2417 } else { 2418 // Ensure that cmd2 finished if we didn't expect to have to wait. 2419 select { 2420 case pErr := <-cmd2Done: 2421 if pErr != nil { 2422 t.Fatalf("cmd2 failed: %s", pErr) 2423 } 2424 // success. 2425 case pErr := <-cmd1Done: 2426 t.Fatalf("should not have been able to execute cmd1 while blocked (pErr: %v)", pErr) 2427 case <-time.After(tooLong): 2428 t.Fatalf("waited %s for cmd2 of key1", tooLong) 2429 } 2430 } 2431 2432 // Wait for cmd1 to finish. 2433 blockingDone <- struct{}{} 2434 select { 2435 case pErr := <-cmd1Done: 2436 if pErr != nil { 2437 t.Fatalf("cmd1 failed: %s", pErr) 2438 } 2439 // success. 2440 case <-time.After(tooLong): 2441 t.Fatalf("waited %s for cmd1 of key1", tooLong) 2442 } 2443 2444 // Wait for cmd2 now if it didn't finish above. 2445 if test.expWait { 2446 select { 2447 case pErr := <-cmd2Done: 2448 if pErr != nil { 2449 t.Fatalf("cmd2 failed: %s", pErr) 2450 } 2451 // success. 2452 case <-time.After(tooLong): 2453 t.Fatalf("waited %s for cmd2 of key1", tooLong) 2454 } 2455 } 2456 }) 2457 } 2458 } 2459 } 2460 } 2461 2462 // TestReplicaLatchingInconsistent verifies that inconsistent reads need 2463 // not wait for pending commands to complete through Raft. 2464 func TestReplicaLatchingInconsistent(t *testing.T) { 2465 defer leaktest.AfterTest(t)() 2466 2467 for _, rc := range []roachpb.ReadConsistencyType{ 2468 roachpb.READ_UNCOMMITTED, 2469 roachpb.INCONSISTENT, 2470 } { 2471 t.Run(rc.String(), func(t *testing.T) { 2472 key := roachpb.Key("key1") 2473 blockingStart := make(chan struct{}, 1) 2474 blockingDone := make(chan struct{}) 2475 2476 tc := testContext{} 2477 tsc := TestStoreConfig(nil) 2478 tsc.TestingKnobs.EvalKnobs.TestingEvalFilter = 2479 func(filterArgs kvserverbase.FilterArgs) *roachpb.Error { 2480 if put, ok := filterArgs.Req.(*roachpb.PutRequest); ok { 2481 putBytes, err := put.Value.GetBytes() 2482 if err != nil { 2483 return roachpb.NewErrorWithTxn(err, filterArgs.Hdr.Txn) 2484 } 2485 if bytes.Equal(put.Key, key) && bytes.Equal(putBytes, []byte{1}) { 2486 // Absence of replay protection can mean that we end up here 2487 // more often than we expect, hence the select (#3669). 2488 select { 2489 case blockingStart <- struct{}{}: 2490 default: 2491 } 2492 <-blockingDone 2493 } 2494 } 2495 2496 return nil 2497 } 2498 stopper := stop.NewStopper() 2499 defer stopper.Stop(context.Background()) 2500 tc.StartWithStoreConfig(t, stopper, tsc) 2501 cmd1Done := make(chan *roachpb.Error) 2502 go func() { 2503 args := putArgs(key, []byte{1}) 2504 2505 _, pErr := tc.SendWrapped(&args) 2506 cmd1Done <- pErr 2507 }() 2508 // Wait for cmd1 to get acquire latches. 2509 <-blockingStart 2510 2511 // An inconsistent read to the key won't wait. 2512 cmd2Done := make(chan *roachpb.Error) 2513 go func() { 2514 args := getArgs(key) 2515 2516 _, pErr := tc.SendWrappedWith(roachpb.Header{ 2517 ReadConsistency: rc, 2518 }, &args) 2519 cmd2Done <- pErr 2520 }() 2521 2522 select { 2523 case pErr := <-cmd2Done: 2524 if pErr != nil { 2525 t.Fatal(pErr) 2526 } 2527 // success. 2528 case pErr := <-cmd1Done: 2529 t.Fatalf("cmd1 should have been blocked, got %v", pErr) 2530 } 2531 2532 close(blockingDone) 2533 if pErr := <-cmd1Done; pErr != nil { 2534 t.Fatal(pErr) 2535 } 2536 // Success. 2537 }) 2538 } 2539 } 2540 2541 // TestReplicaLatchingSelfOverlap verifies that self-overlapping batches are 2542 // allowed, and in particular do not deadlock by introducing latch dependencies 2543 // between the parts of the batch. 2544 func TestReplicaLatchingSelfOverlap(t *testing.T) { 2545 defer leaktest.AfterTest(t)() 2546 tc := testContext{} 2547 stopper := stop.NewStopper() 2548 defer stopper.Stop(context.Background()) 2549 tc.Start(t, stopper) 2550 2551 testutils.RunTrueAndFalse(t, "cmd1Read", func(t *testing.T, cmd1Read bool) { 2552 testutils.RunTrueAndFalse(t, "cmd2Read", func(t *testing.T, cmd2Read bool) { 2553 key := fmt.Sprintf("%v,%v", cmd1Read, cmd2Read) 2554 ba := roachpb.BatchRequest{} 2555 ba.Add(readOrWriteArgs(roachpb.Key(key), cmd1Read)) 2556 ba.Add(readOrWriteArgs(roachpb.Key(key), cmd2Read)) 2557 2558 // Set a deadline for nicer error behavior on deadlock. 2559 ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) 2560 defer cancel() 2561 _, pErr := tc.Sender().Send(ctx, ba) 2562 if pErr != nil { 2563 if _, ok := pErr.GetDetail().(*roachpb.WriteTooOldError); ok && !cmd1Read && !cmd2Read { 2564 // WriteTooOldError is expected in the write/write case because we don't 2565 // allow self-overlapping non-transactional batches. 2566 } else { 2567 t.Fatal(pErr) 2568 } 2569 } 2570 }) 2571 }) 2572 } 2573 2574 // TestReplicaLatchingTimestampNonInterference verifies that 2575 // reads with earlier timestamps do not interfere with writes. 2576 func TestReplicaLatchingTimestampNonInterference(t *testing.T) { 2577 defer leaktest.AfterTest(t)() 2578 2579 var blockKey, blockReader, blockWriter atomic.Value 2580 blockKey.Store(roachpb.Key("a")) 2581 blockReader.Store(false) 2582 blockWriter.Store(false) 2583 blockCh := make(chan struct{}, 1) 2584 blockedCh := make(chan struct{}, 1) 2585 2586 tc := testContext{} 2587 tsc := TestStoreConfig(nil) 2588 tsc.TestingKnobs.EvalKnobs.TestingEvalFilter = 2589 func(filterArgs kvserverbase.FilterArgs) *roachpb.Error { 2590 // Make sure the direct GC path doesn't interfere with this test. 2591 if !filterArgs.Req.Header().Key.Equal(blockKey.Load().(roachpb.Key)) { 2592 return nil 2593 } 2594 if filterArgs.Req.Method() == roachpb.Get && blockReader.Load().(bool) { 2595 blockedCh <- struct{}{} 2596 <-blockCh 2597 } else if filterArgs.Req.Method() == roachpb.Put && blockWriter.Load().(bool) { 2598 blockedCh <- struct{}{} 2599 <-blockCh 2600 } 2601 return nil 2602 } 2603 stopper := stop.NewStopper() 2604 defer stopper.Stop(context.Background()) 2605 tc.StartWithStoreConfig(t, stopper, tsc) 2606 2607 testCases := []struct { 2608 readerTS hlc.Timestamp 2609 writerTS hlc.Timestamp 2610 key roachpb.Key 2611 readerFirst bool 2612 interferes bool 2613 }{ 2614 // Reader & writer have same timestamps. 2615 {makeTS(1, 0), makeTS(1, 0), roachpb.Key("a"), true, true}, 2616 {makeTS(1, 0), makeTS(1, 0), roachpb.Key("b"), false, true}, 2617 // Reader has earlier timestamp. 2618 {makeTS(1, 0), makeTS(1, 1), roachpb.Key("c"), true, false}, 2619 {makeTS(1, 0), makeTS(1, 1), roachpb.Key("d"), false, false}, 2620 // Writer has earlier timestamp. 2621 {makeTS(1, 1), makeTS(1, 0), roachpb.Key("e"), true, true}, 2622 {makeTS(1, 1), makeTS(1, 0), roachpb.Key("f"), false, true}, 2623 // Local keys always interfere. 2624 {makeTS(1, 0), makeTS(1, 1), keys.RangeDescriptorKey(roachpb.RKey("a")), true, true}, 2625 {makeTS(1, 0), makeTS(1, 1), keys.RangeDescriptorKey(roachpb.RKey("b")), false, true}, 2626 } 2627 for _, test := range testCases { 2628 t.Run(fmt.Sprintf("%+v", test), func(t *testing.T) { 2629 blockReader.Store(false) 2630 blockWriter.Store(false) 2631 blockKey.Store(test.key) 2632 errCh := make(chan *roachpb.Error, 2) 2633 2634 baR := roachpb.BatchRequest{} 2635 baR.Timestamp = test.readerTS 2636 gArgs := getArgs(test.key) 2637 baR.Add(&gArgs) 2638 baW := roachpb.BatchRequest{} 2639 baW.Timestamp = test.writerTS 2640 pArgs := putArgs(test.key, []byte("value")) 2641 baW.Add(&pArgs) 2642 2643 if test.readerFirst { 2644 blockReader.Store(true) 2645 go func() { 2646 _, pErr := tc.Sender().Send(context.Background(), baR) 2647 errCh <- pErr 2648 }() 2649 <-blockedCh 2650 go func() { 2651 _, pErr := tc.Sender().Send(context.Background(), baW) 2652 errCh <- pErr 2653 }() 2654 } else { 2655 blockWriter.Store(true) 2656 go func() { 2657 _, pErr := tc.Sender().Send(context.Background(), baW) 2658 errCh <- pErr 2659 }() 2660 <-blockedCh 2661 go func() { 2662 _, pErr := tc.Sender().Send(context.Background(), baR) 2663 errCh <- pErr 2664 }() 2665 } 2666 2667 if test.interferes { 2668 select { 2669 case <-time.After(10 * time.Millisecond): 2670 // Expected. 2671 case pErr := <-errCh: 2672 t.Fatalf("expected interference: got error %s", pErr) 2673 } 2674 } 2675 // Verify no errors on waiting read and write. 2676 blockCh <- struct{}{} 2677 for j := 0; j < 2; j++ { 2678 if pErr := <-errCh; pErr != nil { 2679 t.Errorf("error %d: unexpected error: %s", j, pErr) 2680 } 2681 } 2682 }) 2683 } 2684 } 2685 2686 // TestReplicaLatchingSplitDeclaresWrites verifies that split operations declare 2687 // non-MVCC read access to the LHS and non-MVCC write access to the RHS of the 2688 // split. This is necessary to avoid conflicting changes to the range's stats, 2689 // even though splits do not actually write to their data span (and therefore a 2690 // failure to declare writes are not caught directly by any other test). 2691 func TestReplicaLatchingSplitDeclaresWrites(t *testing.T) { 2692 defer leaktest.AfterTest(t)() 2693 2694 var spans spanset.SpanSet 2695 cmd, _ := batcheval.LookupCommand(roachpb.EndTxn) 2696 cmd.DeclareKeys( 2697 &roachpb.RangeDescriptor{StartKey: roachpb.RKey("a"), EndKey: roachpb.RKey("e")}, 2698 roachpb.Header{}, 2699 &roachpb.EndTxnRequest{ 2700 InternalCommitTrigger: &roachpb.InternalCommitTrigger{ 2701 SplitTrigger: &roachpb.SplitTrigger{ 2702 LeftDesc: roachpb.RangeDescriptor{ 2703 StartKey: roachpb.RKey("a"), 2704 EndKey: roachpb.RKey("c"), 2705 }, 2706 RightDesc: roachpb.RangeDescriptor{ 2707 StartKey: roachpb.RKey("c"), 2708 EndKey: roachpb.RKey("e"), 2709 }, 2710 }, 2711 }, 2712 }, 2713 &spans, 2714 nil, 2715 ) 2716 for _, tc := range []struct { 2717 access spanset.SpanAccess 2718 key roachpb.Key 2719 expectAccess bool 2720 }{ 2721 {spanset.SpanReadOnly, roachpb.Key("b"), true}, 2722 {spanset.SpanReadOnly, roachpb.Key("d"), true}, 2723 {spanset.SpanReadWrite, roachpb.Key("b"), false}, 2724 {spanset.SpanReadWrite, roachpb.Key("d"), true}, 2725 } { 2726 err := spans.CheckAllowed(tc.access, roachpb.Span{Key: tc.key}) 2727 if tc.expectAccess { 2728 require.NoError(t, err) 2729 } else { 2730 require.NotNil(t, err) 2731 require.Regexp(t, "undeclared span", err) 2732 } 2733 } 2734 } 2735 2736 // TestReplicaUseTSCache verifies that write timestamps are upgraded 2737 // based on the timestamp cache. 2738 func TestReplicaUseTSCache(t *testing.T) { 2739 defer leaktest.AfterTest(t)() 2740 tc := testContext{} 2741 stopper := stop.NewStopper() 2742 defer stopper.Stop(context.Background()) 2743 tc.Start(t, stopper) 2744 // Set clock to time 1s and do the read. 2745 t0 := 1 * time.Second 2746 tc.manualClock.Set(t0.Nanoseconds()) 2747 args := getArgs([]byte("a")) 2748 2749 _, pErr := tc.SendWrapped(&args) 2750 2751 if pErr != nil { 2752 t.Error(pErr) 2753 } 2754 pArgs := putArgs([]byte("a"), []byte("value")) 2755 2756 var ba roachpb.BatchRequest 2757 ba.Add(&pArgs) 2758 br, pErr := tc.Sender().Send(context.Background(), ba) 2759 if pErr != nil { 2760 t.Fatal(pErr) 2761 } 2762 if br.Timestamp.WallTime != tc.Clock().Now().WallTime { 2763 t.Errorf("expected write timestamp to upgrade to 1s; got %s", br.Timestamp) 2764 } 2765 } 2766 2767 // TestReplicaTSCacheForwardsIntentTS verifies that the timestamp cache affects 2768 // the timestamps at which intents are written. That is, if a transactional 2769 // write is forwarded by the timestamp cache due to a more recent read, the 2770 // written intents must be left at the forwarded timestamp. See the comment on 2771 // the enginepb.TxnMeta.Timestamp field for rationale. 2772 func TestReplicaTSCacheForwardsIntentTS(t *testing.T) { 2773 defer leaktest.AfterTest(t)() 2774 2775 ctx := context.Background() 2776 tc := testContext{} 2777 stopper := stop.NewStopper() 2778 defer stopper.Stop(ctx) 2779 tc.Start(t, stopper) 2780 2781 tsOld := tc.Clock().Now() 2782 tsNew := tsOld.Add(time.Millisecond.Nanoseconds(), 0) 2783 2784 // Read at tNew to populate the timestamp cache. 2785 // DeleteRange at tNew to populate the timestamp cache. 2786 txnNew := newTransaction("new", roachpb.Key("txn-anchor"), roachpb.NormalUserPriority, tc.Clock()) 2787 txnNew.ReadTimestamp = tsNew 2788 txnNew.WriteTimestamp = tsNew 2789 keyGet := roachpb.Key("get") 2790 keyDeleteRange := roachpb.Key("delete-range") 2791 gArgs := getArgs(keyGet) 2792 drArgs := deleteRangeArgs(keyDeleteRange, keyDeleteRange.Next()) 2793 assignSeqNumsForReqs(txnNew, &gArgs, &drArgs) 2794 var ba roachpb.BatchRequest 2795 ba.Header.Txn = txnNew 2796 ba.Add(&gArgs, &drArgs) 2797 if _, pErr := tc.Sender().Send(ctx, ba); pErr != nil { 2798 t.Fatal(pErr) 2799 } 2800 2801 // Write under the timestamp cache within the transaction, and verify that 2802 // the intents are written above the timestamp cache. 2803 txnOld := newTransaction("old", roachpb.Key("txn-anchor"), roachpb.NormalUserPriority, tc.Clock()) 2804 txnOld.ReadTimestamp = tsOld 2805 txnOld.WriteTimestamp = tsOld 2806 for _, key := range []roachpb.Key{keyGet, keyDeleteRange} { 2807 t.Run(string(key), func(t *testing.T) { 2808 pArgs := putArgs(key, []byte("foo")) 2809 assignSeqNumsForReqs(txnOld, &pArgs) 2810 if _, pErr := tc.SendWrappedWith(roachpb.Header{Txn: txnOld}, &pArgs); pErr != nil { 2811 t.Fatal(pErr) 2812 } 2813 iter := tc.engine.NewIterator(storage.IterOptions{Prefix: true}) 2814 defer iter.Close() 2815 mvccKey := storage.MakeMVCCMetadataKey(key) 2816 iter.SeekGE(mvccKey) 2817 var keyMeta enginepb.MVCCMetadata 2818 if ok, err := iter.Valid(); !ok || !iter.UnsafeKey().Equal(mvccKey) { 2819 t.Fatalf("missing mvcc metadata for %q: %+v", mvccKey, err) 2820 } else if err := iter.ValueProto(&keyMeta); err != nil { 2821 t.Fatalf("failed to unmarshal metadata for %q", mvccKey) 2822 } 2823 if tsNext := tsNew.Next(); hlc.Timestamp(keyMeta.Timestamp) != tsNext { 2824 t.Errorf("timestamp not forwarded for %q intent: expected %s but got %s", 2825 key, tsNext, keyMeta.Timestamp) 2826 } 2827 }) 2828 } 2829 } 2830 2831 func TestConditionalPutUpdatesTSCacheOnError(t *testing.T) { 2832 defer leaktest.AfterTest(t)() 2833 tc := testContext{manualClock: hlc.NewManualClock(123)} 2834 stopper := stop.NewStopper() 2835 defer stopper.Stop(context.Background()) 2836 cfg := TestStoreConfig(hlc.NewClock(tc.manualClock.UnixNano, time.Nanosecond)) 2837 cfg.TestingKnobs.DontPushOnWriteIntentError = true 2838 tc.StartWithStoreConfig(t, stopper, cfg) 2839 2840 // Set clock to time 2s and do the conditional put. 2841 t1 := makeTS(1*time.Second.Nanoseconds(), 0) 2842 t2 := makeTS(2*time.Second.Nanoseconds(), 0) 2843 t2Next := t2.Next() 2844 tc.manualClock.Set(t2.WallTime) 2845 2846 // CPut args which expect value "1" to write "0". 2847 key := []byte("a") 2848 cpArgs1 := cPutArgs(key, []byte("1"), []byte("0")) 2849 _, pErr := tc.SendWrappedWith(roachpb.Header{Timestamp: t2}, &cpArgs1) 2850 if cfErr, ok := pErr.GetDetail().(*roachpb.ConditionFailedError); !ok { 2851 t.Errorf("expected ConditionFailedError; got %v", pErr) 2852 } else if cfErr.ActualValue != nil { 2853 t.Errorf("expected empty actual value; got %s", cfErr.ActualValue) 2854 } 2855 2856 // Try a transactional conditional put at a lower timestamp and 2857 // ensure it is pushed. 2858 txnEarly := newTransaction("test", key, 1, tc.Clock()) 2859 txnEarly.ReadTimestamp, txnEarly.WriteTimestamp = t1, t1 2860 cpArgs2 := cPutArgs(key, []byte("value"), nil) 2861 resp, pErr := tc.SendWrappedWith(roachpb.Header{Txn: txnEarly}, &cpArgs2) 2862 if pErr != nil { 2863 t.Fatal(pErr) 2864 } else if respTS := resp.Header().Txn.WriteTimestamp; respTS != t2Next { 2865 t.Errorf("expected write timestamp to upgrade to %s; got %s", t2Next, respTS) 2866 } 2867 2868 // Try a conditional put at a later timestamp which will fail 2869 // because there's now a transaction intent. This failure will 2870 // not update the timestamp cache. 2871 t3 := makeTS(3*time.Second.Nanoseconds(), 0) 2872 tc.manualClock.Set(t3.WallTime) 2873 _, pErr = tc.SendWrapped(&cpArgs1) 2874 if _, ok := pErr.GetDetail().(*roachpb.WriteIntentError); !ok { 2875 t.Errorf("expected WriteIntentError; got %v", pErr) 2876 } 2877 2878 // Abort the intent and try a transactional conditional put at 2879 // a later timestamp. This should succeed and should not update 2880 // the timestamp cache. 2881 abortIntent := func(s roachpb.Span, abortTxn *roachpb.Transaction) { 2882 if _, pErr = tc.SendWrapped(&roachpb.ResolveIntentRequest{ 2883 RequestHeader: roachpb.RequestHeaderFromSpan(s), 2884 IntentTxn: abortTxn.TxnMeta, 2885 Status: roachpb.ABORTED, 2886 }); pErr != nil { 2887 t.Fatal(pErr) 2888 } 2889 } 2890 abortIntent(cpArgs2.Span(), txnEarly) 2891 txnLater := *txnEarly 2892 txnLater.ReadTimestamp, txnLater.WriteTimestamp = t3, t3 2893 resp, pErr = tc.SendWrappedWith(roachpb.Header{Txn: &txnLater}, &cpArgs2) 2894 if pErr != nil { 2895 t.Fatal(pErr) 2896 } else if respTS := resp.Header().Txn.WriteTimestamp; respTS != t3 { 2897 t.Errorf("expected write timestamp to be %s; got %s", t3, respTS) 2898 } 2899 2900 // Abort the intent again and try to write again to ensure the timestamp 2901 // cache wasn't updated by the second (successful), third (unsuccessful), 2902 // or fourth (successful) conditional put. Only the conditional put that 2903 // hit a ConditionFailedError should update the timestamp cache. 2904 abortIntent(cpArgs2.Span(), &txnLater) 2905 resp, pErr = tc.SendWrappedWith(roachpb.Header{Txn: txnEarly}, &cpArgs2) 2906 if pErr != nil { 2907 t.Fatal(pErr) 2908 } else if respTS := resp.Header().Txn.WriteTimestamp; respTS != t2Next { 2909 t.Errorf("expected write timestamp to upgrade to %s; got %s", t2Next, respTS) 2910 } 2911 } 2912 2913 func TestInitPutUpdatesTSCacheOnError(t *testing.T) { 2914 defer leaktest.AfterTest(t)() 2915 tc := testContext{manualClock: hlc.NewManualClock(123)} 2916 stopper := stop.NewStopper() 2917 defer stopper.Stop(context.Background()) 2918 cfg := TestStoreConfig(hlc.NewClock(tc.manualClock.UnixNano, time.Nanosecond)) 2919 cfg.TestingKnobs.DontPushOnWriteIntentError = true 2920 tc.StartWithStoreConfig(t, stopper, cfg) 2921 2922 // InitPut args to write "0". Should succeed. 2923 key := []byte("a") 2924 value := []byte("0") 2925 ipArgs1 := iPutArgs(key, value) 2926 _, pErr := tc.SendWrapped(&ipArgs1) 2927 if pErr != nil { 2928 t.Fatal(pErr) 2929 } 2930 2931 // Set clock to time 2s and do other init puts. 2932 t1 := makeTS(1*time.Second.Nanoseconds(), 0) 2933 t2 := makeTS(2*time.Second.Nanoseconds(), 0) 2934 t2Next := t2.Next() 2935 tc.manualClock.Set(t2.WallTime) 2936 2937 // InitPut args to write "1" to same key. Should fail. 2938 ipArgs2 := iPutArgs(key, []byte("1")) 2939 _, pErr = tc.SendWrappedWith(roachpb.Header{Timestamp: t2}, &ipArgs2) 2940 if cfErr, ok := pErr.GetDetail().(*roachpb.ConditionFailedError); !ok { 2941 t.Errorf("expected ConditionFailedError; got %v", pErr) 2942 } else if valueBytes, err := cfErr.ActualValue.GetBytes(); err != nil { 2943 t.Fatal(err) 2944 } else if cfErr.ActualValue == nil || !bytes.Equal(valueBytes, value) { 2945 t.Errorf("expected value %q; got %+v", value, valueBytes) 2946 } 2947 2948 // Try a transactional init put at a lower timestamp and 2949 // ensure it is pushed. 2950 txnEarly := newTransaction("test", key, 1, tc.Clock()) 2951 txnEarly.ReadTimestamp, txnEarly.WriteTimestamp = t1, t1 2952 resp, pErr := tc.SendWrappedWith(roachpb.Header{Txn: txnEarly}, &ipArgs1) 2953 if pErr != nil { 2954 t.Fatal(pErr) 2955 } else if respTS := resp.Header().Txn.WriteTimestamp; respTS != t2Next { 2956 t.Errorf("expected write timestamp to upgrade to %s; got %s", t2Next, respTS) 2957 } 2958 2959 // Try an init put at a later timestamp which will fail 2960 // because there's now a transaction intent. This failure 2961 // will not update the timestamp cache. 2962 t3 := makeTS(3*time.Second.Nanoseconds(), 0) 2963 tc.manualClock.Set(t3.WallTime) 2964 _, pErr = tc.SendWrapped(&ipArgs2) 2965 if _, ok := pErr.GetDetail().(*roachpb.WriteIntentError); !ok { 2966 t.Errorf("expected WriteIntentError; got %v", pErr) 2967 } 2968 2969 // Abort the intent and try a transactional init put at a later 2970 // timestamp. This should succeed and should not update the 2971 // timestamp cache. 2972 abortIntent := func(s roachpb.Span, abortTxn *roachpb.Transaction) { 2973 if _, pErr = tc.SendWrapped(&roachpb.ResolveIntentRequest{ 2974 RequestHeader: roachpb.RequestHeaderFromSpan(s), 2975 IntentTxn: abortTxn.TxnMeta, 2976 Status: roachpb.ABORTED, 2977 }); pErr != nil { 2978 t.Fatal(pErr) 2979 } 2980 } 2981 abortIntent(ipArgs1.Span(), txnEarly) 2982 txnLater := *txnEarly 2983 txnLater.ReadTimestamp, txnLater.WriteTimestamp = t3, t3 2984 resp, pErr = tc.SendWrappedWith(roachpb.Header{Txn: &txnLater}, &ipArgs1) 2985 if pErr != nil { 2986 t.Fatal(pErr) 2987 } else if respTS := resp.Header().Txn.WriteTimestamp; respTS != t3 { 2988 t.Errorf("expected write timestamp to be %s; got %s", t3, respTS) 2989 } 2990 2991 // Abort the intent again and try to write again to ensure the timestamp 2992 // cache wasn't updated by the second (successful), third (unsuccessful), 2993 // or fourth (successful) init put. Only the init put that hit a 2994 // ConditionFailedError should update the timestamp cache. 2995 abortIntent(ipArgs1.Span(), &txnLater) 2996 resp, pErr = tc.SendWrappedWith(roachpb.Header{Txn: txnEarly}, &ipArgs1) 2997 if pErr != nil { 2998 t.Fatal(pErr) 2999 } else if respTS := resp.Header().Txn.WriteTimestamp; respTS != t2Next { 3000 t.Errorf("expected write timestamp to upgrade to %s; got %s", t2Next, respTS) 3001 } 3002 } 3003 3004 // TestReplicaNoTSCacheInconsistent verifies that the timestamp cache 3005 // is not affected by inconsistent reads. 3006 func TestReplicaNoTSCacheInconsistent(t *testing.T) { 3007 defer leaktest.AfterTest(t)() 3008 3009 for _, rc := range []roachpb.ReadConsistencyType{ 3010 roachpb.READ_UNCOMMITTED, 3011 roachpb.INCONSISTENT, 3012 } { 3013 t.Run(rc.String(), func(t *testing.T) { 3014 tc := testContext{} 3015 stopper := stop.NewStopper() 3016 defer stopper.Stop(context.Background()) 3017 tc.Start(t, stopper) 3018 // Set clock to time 1s and do the read. 3019 t0 := 1 * time.Second 3020 tc.manualClock.Set(t0.Nanoseconds()) 3021 args := getArgs([]byte("a")) 3022 ts := tc.Clock().Now() 3023 3024 _, pErr := tc.SendWrappedWith(roachpb.Header{ 3025 Timestamp: ts, 3026 ReadConsistency: rc, 3027 }, &args) 3028 3029 if pErr != nil { 3030 t.Error(pErr) 3031 } 3032 pArgs := putArgs([]byte("a"), []byte("value")) 3033 3034 var ba roachpb.BatchRequest 3035 ba.Header = roachpb.Header{Timestamp: hlc.Timestamp{WallTime: 0, Logical: 1}} 3036 ba.Add(&pArgs) 3037 br, pErr := tc.Sender().Send(context.Background(), ba) 3038 if pErr != nil { 3039 t.Fatal(pErr) 3040 } 3041 if br.Timestamp.WallTime == tc.Clock().Now().WallTime { 3042 t.Errorf("expected write timestamp not to upgrade to 1s; got %s", br.Timestamp) 3043 } 3044 }) 3045 } 3046 } 3047 3048 // TestReplicaNoTSCacheUpdateOnFailure verifies that read and write 3049 // commands do not update the timestamp cache if they result in 3050 // failure. 3051 func TestReplicaNoTSCacheUpdateOnFailure(t *testing.T) { 3052 defer leaktest.AfterTest(t)() 3053 tc := testContext{} 3054 stopper := stop.NewStopper() 3055 defer stopper.Stop(context.Background()) 3056 cfg := TestStoreConfig(nil) 3057 cfg.TestingKnobs.DontPushOnWriteIntentError = true 3058 tc.StartWithStoreConfig(t, stopper, cfg) 3059 3060 // Test for both read & write attempts. 3061 for i, read := range []bool{true, false} { 3062 key := roachpb.Key(fmt.Sprintf("key-%d", i)) 3063 3064 // Start by laying down an intent to trip up future read or write to same key. 3065 txn := newTransaction("test", key, 1, tc.Clock()) 3066 pArgs := putArgs(key, []byte("value")) 3067 assignSeqNumsForReqs(txn, &pArgs) 3068 3069 _, pErr := tc.SendWrappedWith(roachpb.Header{ 3070 Txn: txn, 3071 }, &pArgs) 3072 if pErr != nil { 3073 t.Fatalf("test %d: %s", i, pErr) 3074 } 3075 3076 // Now attempt read or write. 3077 args := readOrWriteArgs(key, read) 3078 ts := tc.Clock().Now() // later timestamp 3079 3080 _, pErr = tc.SendWrappedWith(roachpb.Header{Timestamp: ts}, args) 3081 if _, ok := pErr.GetDetail().(*roachpb.WriteIntentError); !ok { 3082 t.Errorf("expected WriteIntentError; got %v", pErr) 3083 } 3084 3085 // Write the intent again -- should not have its timestamp upgraded! 3086 var ba roachpb.BatchRequest 3087 ba.Header = roachpb.Header{Txn: txn} 3088 ba.Add(&pArgs) 3089 assignSeqNumsForReqs(txn, &pArgs) 3090 br, pErr := tc.Sender().Send(context.Background(), ba) 3091 if pErr != nil { 3092 t.Fatal(pErr) 3093 } 3094 if br.Txn.WriteTimestamp != txn.WriteTimestamp { 3095 t.Errorf("expected timestamp not to advance %s != %s", br.Timestamp, txn.WriteTimestamp) 3096 } 3097 } 3098 } 3099 3100 // TestReplicaNoTimestampIncrementWithinTxn verifies that successive 3101 // read and write commands within the same transaction do not cause 3102 // the write to receive an incremented timestamp. 3103 func TestReplicaNoTimestampIncrementWithinTxn(t *testing.T) { 3104 defer leaktest.AfterTest(t)() 3105 tc := testContext{} 3106 stopper := stop.NewStopper() 3107 defer stopper.Stop(context.Background()) 3108 tc.Start(t, stopper) 3109 3110 // Test for both read & write attempts. 3111 key := roachpb.Key("a") 3112 txn := newTransaction("test", key, 1, tc.Clock()) 3113 3114 // Start with a read to warm the timestamp cache. 3115 gArgs := getArgs(key) 3116 assignSeqNumsForReqs(txn, &gArgs) 3117 3118 if _, pErr := tc.SendWrappedWith(roachpb.Header{ 3119 Txn: txn, 3120 }, &gArgs); pErr != nil { 3121 t.Fatal(pErr) 3122 } 3123 3124 // Now try a write and verify timestamp isn't incremented. 3125 var ba roachpb.BatchRequest 3126 ba.Header = roachpb.Header{Txn: txn} 3127 pArgs := putArgs(key, []byte("value")) 3128 ba.Add(&pArgs) 3129 assignSeqNumsForReqs(txn, &pArgs) 3130 br, pErr := tc.Sender().Send(context.Background(), ba) 3131 if pErr != nil { 3132 t.Fatal(pErr) 3133 } 3134 if br.Txn.WriteTimestamp != txn.WriteTimestamp { 3135 t.Errorf("expected timestamp to remain %s; got %s", txn.WriteTimestamp, br.Timestamp) 3136 } 3137 3138 // Resolve the intent. 3139 rArgs := &roachpb.ResolveIntentRequest{ 3140 RequestHeader: pArgs.Header(), 3141 IntentTxn: txn.TxnMeta, 3142 Status: roachpb.COMMITTED, 3143 } 3144 if _, pErr = tc.SendWrappedWith(roachpb.Header{Timestamp: txn.WriteTimestamp}, rArgs); pErr != nil { 3145 t.Fatal(pErr) 3146 } 3147 3148 // Finally, try a non-transactional write and verify timestamp is incremented. 3149 ts := txn.WriteTimestamp 3150 expTS := ts 3151 expTS.Logical++ 3152 3153 ba = roachpb.BatchRequest{} 3154 ba.Header = roachpb.Header{Timestamp: ts} 3155 ba.Add(&pArgs) 3156 assignSeqNumsForReqs(txn, &pArgs) 3157 br, pErr = tc.Sender().Send(context.Background(), ba) 3158 if pErr != nil { 3159 t.Fatal(pErr) 3160 } 3161 if br.Timestamp != expTS { 3162 t.Errorf("expected timestamp to increment to %s; got %s", expTS, br.Timestamp) 3163 } 3164 } 3165 3166 // TestReplicaAbortSpanReadError verifies that an error is returned 3167 // to the client in the event that a AbortSpan entry is found but is 3168 // not decodable. 3169 func TestReplicaAbortSpanReadError(t *testing.T) { 3170 defer leaktest.AfterTest(t)() 3171 3172 var exitStatus int 3173 log.SetExitFunc(true /* hideStack */, func(i int) { 3174 exitStatus = i 3175 }) 3176 defer log.ResetExitFunc() 3177 3178 tc := testContext{} 3179 stopper := stop.NewStopper() 3180 defer stopper.Stop(context.Background()) 3181 tc.Start(t, stopper) 3182 3183 k := []byte("a") 3184 txn := newTransaction("test", k, 10, tc.Clock()) 3185 args := incrementArgs(k, 1) 3186 assignSeqNumsForReqs(txn, args) 3187 3188 if _, pErr := tc.SendWrappedWith(roachpb.Header{ 3189 Txn: txn, 3190 }, args); pErr != nil { 3191 t.Fatal(pErr) 3192 } 3193 3194 // Overwrite Abort span entry with garbage for the last op. 3195 key := keys.AbortSpanKey(tc.repl.RangeID, txn.ID) 3196 err := storage.MVCCPut(context.Background(), tc.engine, nil, key, hlc.Timestamp{}, roachpb.MakeValueFromString("never read in this test"), nil) 3197 if err != nil { 3198 t.Fatal(err) 3199 } 3200 3201 // Now try increment again and verify error. 3202 _, pErr := tc.SendWrappedWith(roachpb.Header{ 3203 Txn: txn, 3204 }, args) 3205 if !testutils.IsPError(pErr, "replica corruption") { 3206 t.Fatal(pErr) 3207 } 3208 if exitStatus != 255 { 3209 t.Fatalf("did not fatal (exit status %d)", exitStatus) 3210 } 3211 } 3212 3213 // TestReplicaAbortSpanOnlyWithIntent verifies that a transactional command 3214 // which goes through Raft but is not a transactional write (i.e. does not 3215 // leave intents) passes the AbortSpan unhindered. 3216 func TestReplicaAbortSpanOnlyWithIntent(t *testing.T) { 3217 defer leaktest.AfterTest(t)() 3218 tc := testContext{} 3219 stopper := stop.NewStopper() 3220 defer stopper.Stop(context.Background()) 3221 tc.Start(t, stopper) 3222 3223 txn := newTransaction("test", []byte("test"), 10, tc.Clock()) 3224 txn.Sequence = 100 3225 entry := roachpb.AbortSpanEntry{ 3226 Key: txn.Key, 3227 Timestamp: txn.WriteTimestamp, 3228 Priority: 0, 3229 } 3230 if err := tc.repl.abortSpan.Put(context.Background(), tc.engine, nil, txn.ID, &entry); err != nil { 3231 t.Fatal(err) 3232 } 3233 3234 args, h := heartbeatArgs(txn, tc.Clock().Now()) 3235 // If the AbortSpan were active for this request, we'd catch a txn retry. 3236 // Instead, we expect no error and a successfully created transaction record. 3237 if _, pErr := tc.SendWrappedWith(h, &args); pErr != nil { 3238 t.Fatalf("unexpected error: %v", pErr) 3239 } 3240 } 3241 3242 // TestReplicaTxnIdempotency verifies that transactions run successfully and in 3243 // an idempotent manner when replaying the same requests. 3244 func TestReplicaTxnIdempotency(t *testing.T) { 3245 defer leaktest.AfterTest(t)() 3246 tc := testContext{} 3247 stopper := stop.NewStopper() 3248 defer stopper.Stop(context.Background()) 3249 tc.Start(t, stopper) 3250 3251 runWithTxn := func(txn *roachpb.Transaction, reqs ...roachpb.Request) error { 3252 ba := roachpb.BatchRequest{} 3253 ba.Header.Txn = txn 3254 ba.Add(reqs...) 3255 _, pErr := tc.Sender().Send(context.Background(), ba) 3256 return pErr.GoError() 3257 } 3258 keyAtSeqHasVal := func(txn *roachpb.Transaction, key []byte, seq enginepb.TxnSeq, val *roachpb.Value) error { 3259 args := getArgs(key) 3260 args.Sequence = seq 3261 resp, pErr := tc.SendWrappedWith(roachpb.Header{Txn: txn}, &args) 3262 if pErr != nil { 3263 return pErr.GoError() 3264 } 3265 foundVal := resp.(*roachpb.GetResponse).Value 3266 if (foundVal == nil) == (val == nil) { 3267 if foundVal == nil { 3268 return nil 3269 } 3270 if foundVal.EqualData(*val) { 3271 return nil 3272 } 3273 } 3274 return errors.Errorf("expected val %v at seq %d, found %v", val, seq, foundVal) 3275 } 3276 firstErr := func(errs ...error) error { 3277 for _, err := range errs { 3278 if err != nil { 3279 return err 3280 } 3281 } 3282 return nil 3283 } 3284 3285 val1 := []byte("value") 3286 val2 := []byte("value2") 3287 byteVal := func(b []byte) *roachpb.Value { 3288 var v roachpb.Value 3289 v.SetBytes(b) 3290 return &v 3291 } 3292 intVal := func(i int64) *roachpb.Value { 3293 var v roachpb.Value 3294 v.SetInt(i) 3295 return &v 3296 } 3297 3298 testCases := []struct { 3299 name string 3300 beforeTxnStart func(key []byte) error 3301 afterTxnStart func(txn *roachpb.Transaction, key []byte) error 3302 run func(txn *roachpb.Transaction, key []byte) error 3303 validate func(txn *roachpb.Transaction, key []byte) error 3304 expError string // regexp pattern to match on run error, if not empty 3305 }{ 3306 { 3307 // Requests are meant to be idempotent, so identical requests at the 3308 // same sequence should always succeed without changing state. 3309 name: "reissued put", 3310 afterTxnStart: func(txn *roachpb.Transaction, key []byte) error { 3311 args := putArgs(key, val1) 3312 args.Sequence = 2 3313 return runWithTxn(txn, &args) 3314 }, 3315 run: func(txn *roachpb.Transaction, key []byte) error { 3316 args := putArgs(key, val1) 3317 args.Sequence = 2 3318 return runWithTxn(txn, &args) 3319 }, 3320 validate: func(txn *roachpb.Transaction, key []byte) error { 3321 return firstErr( 3322 keyAtSeqHasVal(txn, key, 1, nil), 3323 keyAtSeqHasVal(txn, key, 2, byteVal(val1)), 3324 keyAtSeqHasVal(txn, key, 3, byteVal(val1)), 3325 ) 3326 }, 3327 }, 3328 { 3329 name: "reissued cput", 3330 beforeTxnStart: func(key []byte) error { 3331 // Write an initial key for the CPuts to expect. 3332 args := putArgs(key, val2) 3333 return runWithTxn(nil, &args) 3334 }, 3335 afterTxnStart: func(txn *roachpb.Transaction, key []byte) error { 3336 args := cPutArgs(key, val1, val2) 3337 args.Sequence = 2 3338 return runWithTxn(txn, &args) 3339 }, 3340 run: func(txn *roachpb.Transaction, key []byte) error { 3341 args := cPutArgs(key, val1, val2) 3342 args.Sequence = 2 3343 return runWithTxn(txn, &args) 3344 }, 3345 validate: func(txn *roachpb.Transaction, key []byte) error { 3346 return firstErr( 3347 keyAtSeqHasVal(txn, key, 1, byteVal(val2)), 3348 keyAtSeqHasVal(txn, key, 2, byteVal(val1)), 3349 keyAtSeqHasVal(txn, key, 3, byteVal(val1)), 3350 ) 3351 }, 3352 }, 3353 { 3354 name: "reissued initput", 3355 afterTxnStart: func(txn *roachpb.Transaction, key []byte) error { 3356 args := iPutArgs(key, val1) 3357 args.Sequence = 2 3358 return runWithTxn(txn, &args) 3359 }, 3360 run: func(txn *roachpb.Transaction, key []byte) error { 3361 args := iPutArgs(key, val1) 3362 args.Sequence = 2 3363 return runWithTxn(txn, &args) 3364 }, 3365 validate: func(txn *roachpb.Transaction, key []byte) error { 3366 return firstErr( 3367 keyAtSeqHasVal(txn, key, 1, nil), 3368 keyAtSeqHasVal(txn, key, 2, byteVal(val1)), 3369 keyAtSeqHasVal(txn, key, 3, byteVal(val1)), 3370 ) 3371 }, 3372 }, 3373 { 3374 name: "reissued increment", 3375 afterTxnStart: func(txn *roachpb.Transaction, key []byte) error { 3376 args := incrementArgs(key, 3) 3377 args.Sequence = 2 3378 return runWithTxn(txn, args) 3379 }, 3380 run: func(txn *roachpb.Transaction, key []byte) error { 3381 args := incrementArgs(key, 3) 3382 args.Sequence = 2 3383 return runWithTxn(txn, args) 3384 }, 3385 validate: func(txn *roachpb.Transaction, key []byte) error { 3386 return firstErr( 3387 keyAtSeqHasVal(txn, key, 1, nil), 3388 keyAtSeqHasVal(txn, key, 2, intVal(3)), 3389 keyAtSeqHasVal(txn, key, 3, intVal(3)), 3390 ) 3391 }, 3392 }, 3393 { 3394 name: "reissued delete", 3395 beforeTxnStart: func(key []byte) error { 3396 // Write an initial key to delete. 3397 args := putArgs(key, val2) 3398 return runWithTxn(nil, &args) 3399 }, 3400 afterTxnStart: func(txn *roachpb.Transaction, key []byte) error { 3401 args := deleteArgs(key) 3402 args.Sequence = 2 3403 return runWithTxn(txn, &args) 3404 }, 3405 run: func(txn *roachpb.Transaction, key []byte) error { 3406 args := deleteArgs(key) 3407 args.Sequence = 2 3408 return runWithTxn(txn, &args) 3409 }, 3410 validate: func(txn *roachpb.Transaction, key []byte) error { 3411 return firstErr( 3412 keyAtSeqHasVal(txn, key, 1, byteVal(val2)), 3413 keyAtSeqHasVal(txn, key, 2, nil), 3414 keyAtSeqHasVal(txn, key, 3, nil), 3415 ) 3416 }, 3417 }, 3418 { 3419 name: "reissued delete range", 3420 beforeTxnStart: func(key []byte) error { 3421 // Write an initial key to delete. 3422 args := putArgs(key, val2) 3423 return runWithTxn(nil, &args) 3424 }, 3425 afterTxnStart: func(txn *roachpb.Transaction, key []byte) error { 3426 args := deleteRangeArgs(key, append(key, 0)) 3427 args.Sequence = 2 3428 return runWithTxn(txn, &args) 3429 }, 3430 run: func(txn *roachpb.Transaction, key []byte) error { 3431 args := deleteRangeArgs(key, append(key, 0)) 3432 args.Sequence = 2 3433 return runWithTxn(txn, &args) 3434 }, 3435 validate: func(txn *roachpb.Transaction, key []byte) error { 3436 return firstErr( 3437 keyAtSeqHasVal(txn, key, 1, byteVal(val2)), 3438 keyAtSeqHasVal(txn, key, 2, nil), 3439 keyAtSeqHasVal(txn, key, 3, nil), 3440 ) 3441 }, 3442 }, 3443 { 3444 // A request reissued from an earlier txn epoch will be rejected. 3445 name: "reissued write at lower epoch", 3446 afterTxnStart: func(txn *roachpb.Transaction, key []byte) error { 3447 txnEpochBump := txn.Clone() 3448 txnEpochBump.Epoch++ 3449 3450 args := putArgs(key, val1) 3451 args.Sequence = 2 3452 return runWithTxn(txnEpochBump, &args) 3453 }, 3454 run: func(txn *roachpb.Transaction, key []byte) error { 3455 args := putArgs(key, val2) 3456 args.Sequence = 3 3457 return runWithTxn(txn, &args) 3458 }, 3459 expError: "put with epoch 0 came after put with epoch 1", 3460 validate: func(txn *roachpb.Transaction, key []byte) error { 3461 txnEpochBump := txn.Clone() 3462 txnEpochBump.Epoch++ 3463 3464 return firstErr( 3465 keyAtSeqHasVal(txnEpochBump, key, 1, nil), 3466 keyAtSeqHasVal(txnEpochBump, key, 2, byteVal(val1)), 3467 keyAtSeqHasVal(txnEpochBump, key, 3, byteVal(val1)), 3468 ) 3469 }, 3470 }, 3471 { 3472 // A request issued after a request with a larger sequence has 3473 // already laid down an intent on the same key will be rejected. 3474 // Unlike the next case, seq two was not issued before seq three, 3475 // which would indicate a faulty client. 3476 name: "reordered write at lower sequence", 3477 afterTxnStart: func(txn *roachpb.Transaction, key []byte) error { 3478 args := putArgs(key, val1) 3479 args.Sequence = 3 3480 return runWithTxn(txn, &args) 3481 }, 3482 run: func(txn *roachpb.Transaction, key []byte) error { 3483 args := putArgs(key, val1) 3484 args.Sequence = 2 3485 return runWithTxn(txn, &args) 3486 }, 3487 expError: "sequence 3 missing an intent with lower sequence 2", 3488 validate: func(txn *roachpb.Transaction, key []byte) error { 3489 return firstErr( 3490 keyAtSeqHasVal(txn, key, 1, nil), 3491 keyAtSeqHasVal(txn, key, 2, nil), 3492 keyAtSeqHasVal(txn, key, 3, byteVal(val1)), 3493 ) 3494 }, 3495 }, 3496 { 3497 // Unlike the previous case, here a request is reissued after a 3498 // request with an identical sequence is issued AND a request with a 3499 // larger sequence is issued. Because the replay isn't rewriting 3500 // history, it can succeed. This does not indicate a faulty client. 3501 // It is possible if a batch that writes to a key twice is reissued. 3502 name: "reissued write at lower sequence", 3503 afterTxnStart: func(txn *roachpb.Transaction, key []byte) error { 3504 args := putArgs(key, val1) 3505 args.Sequence = 2 3506 if err := runWithTxn(txn, &args); err != nil { 3507 return err 3508 } 3509 3510 args = putArgs(key, val2) 3511 args.Sequence = 3 3512 return runWithTxn(txn, &args) 3513 }, 3514 run: func(txn *roachpb.Transaction, key []byte) error { 3515 args := putArgs(key, val1) 3516 args.Sequence = 2 3517 return runWithTxn(txn, &args) 3518 }, 3519 validate: func(txn *roachpb.Transaction, key []byte) error { 3520 return firstErr( 3521 keyAtSeqHasVal(txn, key, 1, nil), 3522 keyAtSeqHasVal(txn, key, 2, byteVal(val1)), 3523 keyAtSeqHasVal(txn, key, 3, byteVal(val2)), 3524 ) 3525 }, 3526 }, 3527 { 3528 // A request at the same sequence as another but that produces a 3529 // different result will be rejected. 3530 name: "different write at same sequence", 3531 afterTxnStart: func(txn *roachpb.Transaction, key []byte) error { 3532 args := putArgs(key, val1) 3533 args.Sequence = 2 3534 return runWithTxn(txn, &args) 3535 }, 3536 run: func(txn *roachpb.Transaction, key []byte) error { 3537 args := putArgs(key, val2) 3538 args.Sequence = 2 3539 return runWithTxn(txn, &args) 3540 }, 3541 expError: "sequence 2 has a different value", 3542 validate: func(txn *roachpb.Transaction, key []byte) error { 3543 return firstErr( 3544 keyAtSeqHasVal(txn, key, 1, nil), 3545 keyAtSeqHasVal(txn, key, 2, byteVal(val1)), 3546 keyAtSeqHasVal(txn, key, 3, byteVal(val1)), 3547 ) 3548 }, 3549 }, 3550 { 3551 // A request is issued again, but with a lower timestamp than the 3552 // timestamp in the intent. This is possible if an intent is pushed 3553 // and then the request that wrote it is reissued. We allow this 3554 // without issue because timestamps on intents are moved to the 3555 // commit timestamp on commit. 3556 name: "reissued write at lower timestamp", 3557 afterTxnStart: func(txn *roachpb.Transaction, key []byte) error { 3558 txnHighTS := txn.Clone() 3559 txnHighTS.WriteTimestamp = txnHighTS.WriteTimestamp.Add(1, 0) 3560 3561 args := putArgs(key, val1) 3562 args.Sequence = 2 3563 return runWithTxn(txnHighTS, &args) 3564 }, 3565 run: func(txn *roachpb.Transaction, key []byte) error { 3566 args := putArgs(key, val1) 3567 args.Sequence = 2 3568 return runWithTxn(txn, &args) 3569 }, 3570 validate: func(txn *roachpb.Transaction, key []byte) error { 3571 return firstErr( 3572 keyAtSeqHasVal(txn, key, 1, nil), 3573 keyAtSeqHasVal(txn, key, 2, byteVal(val1)), 3574 keyAtSeqHasVal(txn, key, 3, byteVal(val1)), 3575 ) 3576 }, 3577 }, 3578 { 3579 // A request is issued again, but with a higher timestamp than the 3580 // timestamp in the intent. This is possible if the txn coordinator 3581 // increased its timestamp between the two requests (for instance, 3582 // after a refresh). We allow this without issue because timestamps 3583 // on intents are moved to the commit timestamp on commit. 3584 name: "reissued write at higher timestamp", 3585 afterTxnStart: func(txn *roachpb.Transaction, key []byte) error { 3586 args := putArgs(key, val1) 3587 args.Sequence = 2 3588 return runWithTxn(txn, &args) 3589 }, 3590 run: func(txn *roachpb.Transaction, key []byte) error { 3591 txnHighTS := txn.Clone() 3592 txnHighTS.WriteTimestamp = txnHighTS.WriteTimestamp.Add(1, 0) 3593 3594 args := putArgs(key, val1) 3595 args.Sequence = 2 3596 return runWithTxn(txnHighTS, &args) 3597 }, 3598 validate: func(txn *roachpb.Transaction, key []byte) error { 3599 return firstErr( 3600 keyAtSeqHasVal(txn, key, 1, nil), 3601 keyAtSeqHasVal(txn, key, 2, byteVal(val1)), 3602 keyAtSeqHasVal(txn, key, 3, byteVal(val1)), 3603 ) 3604 }, 3605 }, 3606 { 3607 // If part of a batch has already succeeded and another part hasn't 3608 // then the previously successful portion will be evaluated as a 3609 // no-op while the rest will evaluate as normal. This isn't common, 3610 // but it could happen if a partially successful batch is reissued 3611 // after a range merge. 3612 name: "reissued write in partially successful batch", 3613 afterTxnStart: func(txn *roachpb.Transaction, key []byte) error { 3614 args := putArgs(key, val1) 3615 args.Sequence = 2 3616 return runWithTxn(txn, &args) 3617 }, 3618 run: func(txn *roachpb.Transaction, key []byte) error { 3619 pArgs := putArgs(key, val1) 3620 pArgs.Sequence = 2 3621 dArgs := deleteArgs(key) 3622 dArgs.Sequence = 3 3623 return runWithTxn(txn, &pArgs, &dArgs) 3624 }, 3625 validate: func(txn *roachpb.Transaction, key []byte) error { 3626 return firstErr( 3627 keyAtSeqHasVal(txn, key, 1, nil), 3628 keyAtSeqHasVal(txn, key, 2, byteVal(val1)), 3629 keyAtSeqHasVal(txn, key, 3, nil), 3630 ) 3631 }, 3632 }, 3633 } 3634 for i, c := range testCases { 3635 t.Run(c.name, func(t *testing.T) { 3636 key := []byte(strconv.Itoa(i)) 3637 if c.beforeTxnStart != nil { 3638 if err := c.beforeTxnStart(key); err != nil { 3639 t.Fatalf("failed beforeTxnStart: %v", err) 3640 } 3641 } 3642 3643 txn := newTransaction(c.name, roachpb.Key(c.name), 1, tc.Clock()) 3644 if c.afterTxnStart != nil { 3645 if err := c.afterTxnStart(txn, key); err != nil { 3646 t.Fatalf("failed afterTxnStart: %v", err) 3647 } 3648 } 3649 3650 if err := c.run(txn, key); err != nil { 3651 if len(c.expError) == 0 { 3652 t.Fatalf("expected no failure, found %q", err.Error()) 3653 } 3654 if !testutils.IsError(err, regexp.QuoteMeta(c.expError)) { 3655 t.Fatalf("expected failure %q, found %q", c.expError, err.Error()) 3656 } 3657 } else { 3658 if len(c.expError) > 0 { 3659 t.Fatalf("expected failure %q", c.expError) 3660 } 3661 } 3662 3663 if c.validate != nil { 3664 if err := c.validate(txn, key); err != nil { 3665 t.Fatalf("failed during validation: %v", err) 3666 } 3667 } 3668 }) 3669 } 3670 } 3671 3672 // TestEndTxnDeadline verifies that EndTxn respects the transaction deadline. 3673 func TestEndTxnDeadline(t *testing.T) { 3674 defer leaktest.AfterTest(t)() 3675 tc := testContext{} 3676 stopper := stop.NewStopper() 3677 defer stopper.Stop(context.Background()) 3678 tc.Start(t, stopper) 3679 3680 // 4 cases: no deadline, past deadline, equal deadline, future deadline. 3681 for i := 0; i < 4; i++ { 3682 key := roachpb.Key("key: " + strconv.Itoa(i)) 3683 txn := newTransaction("txn: "+strconv.Itoa(i), key, 1, tc.Clock()) 3684 put := putArgs(key, key) 3685 assignSeqNumsForReqs(txn, &put) 3686 3687 if _, pErr := kv.SendWrappedWith( 3688 context.Background(), tc.Sender(), roachpb.Header{Txn: txn}, &put, 3689 ); pErr != nil { 3690 t.Fatal(pErr) 3691 } 3692 3693 etArgs, etHeader := endTxnArgs(txn, true /* commit */) 3694 switch i { 3695 case 0: 3696 // No deadline. 3697 case 1: 3698 // Past deadline. 3699 ts := txn.WriteTimestamp.Prev() 3700 etArgs.Deadline = &ts 3701 case 2: 3702 // Equal deadline. 3703 etArgs.Deadline = &txn.WriteTimestamp 3704 case 3: 3705 // Future deadline. 3706 ts := txn.WriteTimestamp.Next() 3707 etArgs.Deadline = &ts 3708 } 3709 3710 { 3711 assignSeqNumsForReqs(txn, &etArgs) 3712 _, pErr := tc.SendWrappedWith(etHeader, &etArgs) 3713 switch i { 3714 case 0: 3715 // No deadline. 3716 if pErr != nil { 3717 t.Error(pErr) 3718 } 3719 3720 case 1: 3721 fallthrough 3722 case 2: 3723 // Past deadline. 3724 retErr, ok := pErr.GetDetail().(*roachpb.TransactionRetryError) 3725 if !ok || retErr.Reason != roachpb.RETRY_COMMIT_DEADLINE_EXCEEDED { 3726 t.Fatalf("expected deadline exceeded, got: %v", pErr) 3727 } 3728 case 3: 3729 // Future deadline. 3730 if pErr != nil { 3731 t.Error(pErr) 3732 } 3733 } 3734 } 3735 } 3736 } 3737 3738 // Test that regular push retriable errors take precedence over the deadline 3739 // check. 3740 func TestSerializableDeadline(t *testing.T) { 3741 defer leaktest.AfterTest(t)() 3742 tc := testContext{} 3743 stopper := stop.NewStopper() 3744 defer stopper.Stop(context.Background()) 3745 tc.Start(t, stopper) 3746 3747 // Create our txn. It will be pushed next. 3748 key := roachpb.Key("key") 3749 txn := newTransaction("test txn", key, roachpb.MinUserPriority, tc.Clock()) 3750 3751 tc.manualClock.Increment(100) 3752 pusher := newTransaction( 3753 "test pusher", key, roachpb.MaxUserPriority, tc.Clock()) 3754 pushReq := pushTxnArgs(pusher, txn, roachpb.PUSH_TIMESTAMP) 3755 resp, pErr := tc.SendWrapped(&pushReq) 3756 if pErr != nil { 3757 t.Fatal(pErr) 3758 } 3759 updatedPushee := resp.(*roachpb.PushTxnResponse).PusheeTxn 3760 if updatedPushee.Status != roachpb.PENDING { 3761 t.Fatalf("expected pushee to still be alive, but got %+v", updatedPushee) 3762 } 3763 3764 // Send an EndTxn with a deadline below the point where the txn has been 3765 // pushed. 3766 etArgs, etHeader := endTxnArgs(txn, true /* commit */) 3767 deadline := updatedPushee.WriteTimestamp 3768 deadline.Logical-- 3769 etArgs.Deadline = &deadline 3770 _, pErr = tc.SendWrappedWith(etHeader, &etArgs) 3771 const expectedErrMsg = "TransactionRetryError: retry txn \\(RETRY_SERIALIZABLE\\)" 3772 if pErr == nil { 3773 t.Fatalf("expected %q, got: nil", expectedErrMsg) 3774 } 3775 err := pErr.GoError() 3776 if _, ok := pErr.GetDetail().(*roachpb.TransactionRetryError); !ok || 3777 !testutils.IsError(err, expectedErrMsg) { 3778 t.Fatalf("expected %q, got: %s (%T)", expectedErrMsg, 3779 err, pErr.GetDetail()) 3780 } 3781 } 3782 3783 // TestCreateTxnRecordAfterPushAndGC verifies that aborting transactions does 3784 // not lead to anomalies even after the aborted transaction record is cleaned 3785 // up. Precisely, verify that if the GC queue could potentially have removed a 3786 // txn record created through a successful push (by a concurrent actor), the 3787 // original transaction's subsequent attempt to create its initial record fails. 3788 // 3789 // See #9265 for context. 3790 func TestCreateTxnRecordAfterPushAndGC(t *testing.T) { 3791 defer leaktest.AfterTest(t)() 3792 tc := testContext{} 3793 stopper := stop.NewStopper() 3794 defer stopper.Stop(context.Background()) 3795 tc.Start(t, stopper) 3796 3797 key := roachpb.Key("a") 3798 desc := tc.repl.Desc() 3799 // This test avoids a zero-timestamp regression (see LastActive() below), 3800 // so avoid zero timestamps. 3801 tc.manualClock.Increment(123) 3802 pusher := newTransaction("pusher", key, 1, tc.Clock()) 3803 3804 // This pushee should never be allowed to write a txn record because it 3805 // will be aborted before it even tries. 3806 pushee := newTransaction("pushee", key, 1, tc.Clock()) 3807 pushReq := pushTxnArgs(pusher, pushee, roachpb.PUSH_ABORT) 3808 pushReq.Force = true 3809 resp, pErr := tc.SendWrapped(&pushReq) 3810 if pErr != nil { 3811 t.Fatal(pErr) 3812 } 3813 abortedPushee := resp.(*roachpb.PushTxnResponse).PusheeTxn 3814 if abortedPushee.Status != roachpb.ABORTED { 3815 t.Fatalf("expected push to abort pushee, got %+v", abortedPushee) 3816 } 3817 3818 gcHeader := roachpb.RequestHeader{ 3819 Key: desc.StartKey.AsRawKey(), 3820 EndKey: desc.EndKey.AsRawKey(), 3821 } 3822 3823 // Pretend that the GC queue removes the aborted transaction entry, as it 3824 // would after a period of inactivity, while our pushee txn is unaware and 3825 // may have written intents elsewhere. 3826 { 3827 gcReq := roachpb.GCRequest{ 3828 RequestHeader: gcHeader, 3829 Keys: []roachpb.GCRequest_GCKey{ 3830 {Key: keys.TransactionKey(pushee.Key, pushee.ID)}, 3831 }, 3832 } 3833 if _, pErr := tc.SendWrappedWith(roachpb.Header{RangeID: 1}, &gcReq); pErr != nil { 3834 t.Fatal(pErr) 3835 } 3836 } 3837 3838 // Try to let our transaction write its initial record. If this succeeds, 3839 // we're in trouble because other written intents may have been aborted, 3840 // i.e. the transaction might commit but lose some of its writes. It should 3841 // not succeed because the abort is reflected in the timestamp cache, 3842 // which is consulted when attempting to create the transaction record. 3843 { 3844 expErr := "TransactionAbortedError(ABORT_REASON_ABORTED_RECORD_FOUND)" 3845 3846 // HeartbeatTxn. 3847 hb, hbH := heartbeatArgs(pushee, tc.Clock().Now()) 3848 resp, pErr = tc.SendWrappedWith(hbH, &hb) 3849 if pErr == nil { 3850 t.Fatalf("unexpected success: %+v", resp) 3851 } else if !testutils.IsPError(pErr, regexp.QuoteMeta(expErr)) { 3852 t.Fatalf("expected %s, got %v and response %+v", expErr, pErr, resp) 3853 } 3854 3855 // EndTxn. 3856 et, etH := endTxnArgs(pushee, true) 3857 resp, pErr = tc.SendWrappedWith(etH, &et) 3858 if pErr == nil { 3859 t.Fatalf("unexpected success: %+v", resp) 3860 } else if !testutils.IsPError(pErr, regexp.QuoteMeta(expErr)) { 3861 t.Fatalf("expected %s, got %v and response %+v", expErr, pErr, resp) 3862 } 3863 } 3864 3865 // A transaction which starts later (i.e. at a higher timestamp) should not 3866 // be prevented from writing its record. 3867 // See #9522. 3868 { 3869 // HeartbeatTxn. 3870 newTxn1 := newTransaction("foo", key, 1, tc.Clock()) 3871 hb, hbH := heartbeatArgs(newTxn1, tc.Clock().Now()) 3872 if _, pErr := tc.SendWrappedWith(hbH, &hb); pErr != nil { 3873 t.Fatal(pErr) 3874 } 3875 3876 // EndTxn. 3877 newTxn2 := newTransaction("foo", key, 1, tc.Clock()) 3878 et, etH := endTxnArgs(newTxn2, true) 3879 if _, pErr := tc.SendWrappedWith(etH, &et); pErr != nil { 3880 t.Fatal(pErr) 3881 } 3882 } 3883 } 3884 3885 // TestEndTxnDeadline_1PC verifies that a transaction that exceeded its deadline 3886 // will be aborted even when one phase commit is applicable. 3887 func TestEndTxnDeadline_1PC(t *testing.T) { 3888 defer leaktest.AfterTest(t)() 3889 tc := testContext{} 3890 stopper := stop.NewStopper() 3891 defer stopper.Stop(context.Background()) 3892 tc.Start(t, stopper) 3893 3894 key := roachpb.Key("a") 3895 txn := newTransaction("test", key, 1, tc.Clock()) 3896 put := putArgs(key, []byte("value")) 3897 et, etH := endTxnArgs(txn, true) 3898 // Past deadline. 3899 ts := txn.WriteTimestamp.Prev() 3900 et.Deadline = &ts 3901 3902 var ba roachpb.BatchRequest 3903 ba.Header = etH 3904 ba.Add(&put, &et) 3905 assignSeqNumsForReqs(txn, &put, &et) 3906 _, pErr := tc.Sender().Send(context.Background(), ba) 3907 retErr, ok := pErr.GetDetail().(*roachpb.TransactionRetryError) 3908 if !ok || retErr.Reason != roachpb.RETRY_COMMIT_DEADLINE_EXCEEDED { 3909 t.Fatalf("expected deadline exceeded, got: %v", pErr) 3910 } 3911 } 3912 3913 // Test1PCTransactionWriteTimestamp verifies that the transaction's 3914 // timestamp is used when writing values in a 1PC transaction. We 3915 // verify this by updating the timestamp cache for the key being 3916 // written so that the timestamp there is greater than the txn's 3917 // ReadTimestamp. 3918 func Test1PCTransactionWriteTimestamp(t *testing.T) { 3919 defer leaktest.AfterTest(t)() 3920 tc := testContext{} 3921 stopper := stop.NewStopper() 3922 defer stopper.Stop(context.Background()) 3923 tc.Start(t, stopper) 3924 3925 key := roachpb.Key("key") 3926 txn := newTransaction("test", key, 1, tc.Clock()) 3927 put := putArgs(key, []byte("value")) 3928 et, etH := endTxnArgs(txn, true) 3929 3930 // Update the timestamp cache for the key being written. 3931 gArgs := getArgs(key) 3932 if _, pErr := tc.SendWrapped(&gArgs); pErr != nil { 3933 t.Fatal(pErr) 3934 } 3935 3936 // Now verify that the write triggers a retry. 3937 var ba roachpb.BatchRequest 3938 ba.Header = etH 3939 ba.Add(&put, &et) 3940 assignSeqNumsForReqs(txn, &put, &et) 3941 _, pErr := tc.Sender().Send(context.Background(), ba) 3942 if _, ok := pErr.GetDetail().(*roachpb.TransactionRetryError); !ok { 3943 t.Errorf("expected retry error; got %s", pErr) 3944 } 3945 } 3946 3947 // TestEndTxnWithMalformedSplitTrigger verifies an EndTxn call with a malformed 3948 // commit trigger fails. 3949 func TestEndTxnWithMalformedSplitTrigger(t *testing.T) { 3950 defer leaktest.AfterTest(t)() 3951 3952 var exitStatus int 3953 log.SetExitFunc(true /* hideStack */, func(i int) { 3954 exitStatus = i 3955 }) 3956 defer log.ResetExitFunc() 3957 3958 tc := testContext{} 3959 stopper := stop.NewStopper() 3960 defer stopper.Stop(context.Background()) 3961 tc.Start(t, stopper) 3962 3963 key := roachpb.Key("foo") 3964 txn := newTransaction("test", key, 1, tc.Clock()) 3965 pArgs := putArgs(key, []byte("only here to make this a rw transaction")) 3966 assignSeqNumsForReqs(txn, &pArgs) 3967 if _, pErr := kv.SendWrappedWith(context.Background(), tc.Sender(), roachpb.Header{ 3968 Txn: txn, 3969 }, &pArgs); pErr != nil { 3970 t.Fatal(pErr) 3971 } 3972 3973 args, h := endTxnArgs(txn, true /* commit */) 3974 // Make an EndTxn request which would fail if not stripped. In this case, we 3975 // set the start key to "bar" for a split of the default range; start key 3976 // must be "" in this case. 3977 args.InternalCommitTrigger = &roachpb.InternalCommitTrigger{ 3978 SplitTrigger: &roachpb.SplitTrigger{ 3979 LeftDesc: roachpb.RangeDescriptor{ 3980 StartKey: roachpb.RKey("bar"), 3981 EndKey: roachpb.RKey("foo"), 3982 }, 3983 RightDesc: roachpb.RangeDescriptor{ 3984 StartKey: roachpb.RKey("foo"), 3985 EndKey: roachpb.RKeyMax, 3986 }, 3987 }, 3988 } 3989 3990 assignSeqNumsForReqs(txn, &args) 3991 expErr := regexp.QuoteMeta("replica corruption (processed=true): range does not match splits") 3992 if _, pErr := tc.SendWrappedWith(h, &args); !testutils.IsPError(pErr, expErr) { 3993 t.Errorf("unexpected error: %s", pErr) 3994 } 3995 3996 if exitStatus != 255 { 3997 t.Fatalf("unexpected exit status %d", exitStatus) 3998 } 3999 } 4000 4001 // TestEndTxnBeforeHeartbeat verifies that a transaction can be 4002 // committed/aborted before being heartbeat. 4003 func TestEndTxnBeforeHeartbeat(t *testing.T) { 4004 defer leaktest.AfterTest(t)() 4005 // Don't automatically GC the Txn record: We want to heartbeat the 4006 // committed Transaction and compare it against our expectations. 4007 // When it's removed, the heartbeat would recreate it. 4008 defer setTxnAutoGC(false)() 4009 tc := testContext{} 4010 stopper := stop.NewStopper() 4011 defer stopper.Stop(context.Background()) 4012 tc.Start(t, stopper) 4013 4014 key := []byte("a") 4015 testutils.RunTrueAndFalse(t, "commit", func(t *testing.T, commit bool) { 4016 key = roachpb.Key(key).Next() 4017 txn := newTransaction("test", key, 1, tc.Clock()) 4018 h := roachpb.Header{Txn: txn} 4019 4020 put := putArgs(key, key) 4021 assignSeqNumsForReqs(txn, &put) 4022 if _, pErr := tc.SendWrappedWith(h, &put); pErr != nil { 4023 t.Fatal(pErr) 4024 } 4025 4026 et, _ := endTxnArgs(txn, commit) 4027 assignSeqNumsForReqs(txn, &et) 4028 resp, pErr := tc.SendWrappedWith(h, &et) 4029 if pErr != nil { 4030 t.Fatal(pErr) 4031 } 4032 reply := resp.(*roachpb.EndTxnResponse) 4033 expStatus := roachpb.COMMITTED 4034 if !commit { 4035 expStatus = roachpb.ABORTED 4036 } 4037 if reply.Txn.Status != expStatus { 4038 t.Errorf("expected transaction status to be %s; got %s", expStatus, reply.Txn.Status) 4039 } 4040 4041 // Try a heartbeat to the already-committed transaction; should get 4042 // committed txn back, but without last heartbeat timestamp set. 4043 hBA, h := heartbeatArgs(txn, tc.Clock().Now()) 4044 resp, pErr = tc.SendWrappedWith(h, &hBA) 4045 if pErr != nil { 4046 t.Error(pErr) 4047 } 4048 hBR := resp.(*roachpb.HeartbeatTxnResponse) 4049 if hBR.Txn.Status != expStatus { 4050 t.Errorf("expected transaction status to be %s, but got %s", expStatus, hBR.Txn.Status) 4051 } 4052 }) 4053 } 4054 4055 // TestEndTxnAfterHeartbeat verifies that a transaction can be committed/aborted 4056 // after being heartbeat. 4057 func TestEndTxnAfterHeartbeat(t *testing.T) { 4058 defer leaktest.AfterTest(t)() 4059 tc := testContext{} 4060 stopper := stop.NewStopper() 4061 defer stopper.Stop(context.Background()) 4062 tc.Start(t, stopper) 4063 4064 key := roachpb.Key("a") 4065 testutils.RunTrueAndFalse(t, "commit", func(t *testing.T, commit bool) { 4066 txn := newTransaction("test", key, 1, tc.Clock()) 4067 h := roachpb.Header{Txn: txn} 4068 4069 put := putArgs(key, key) 4070 assignSeqNumsForReqs(txn, &put) 4071 if _, pErr := tc.SendWrappedWith(h, &put); pErr != nil { 4072 t.Fatal(pErr) 4073 } 4074 4075 // Start out with a heartbeat to the transaction. 4076 hBA, _ := heartbeatArgs(txn, tc.Clock().Now()) 4077 resp, pErr := tc.SendWrappedWith(h, &hBA) 4078 if pErr != nil { 4079 t.Fatal(pErr) 4080 } 4081 hBR := resp.(*roachpb.HeartbeatTxnResponse) 4082 if hBR.Txn.Status != roachpb.PENDING { 4083 t.Errorf("expected transaction status to be %s, but got %s", hBR.Txn.Status, roachpb.PENDING) 4084 } 4085 4086 et, h := endTxnArgs(txn, commit) 4087 assignSeqNumsForReqs(txn, &et) 4088 resp, pErr = tc.SendWrappedWith(h, &et) 4089 if pErr != nil { 4090 t.Error(pErr) 4091 } 4092 reply := resp.(*roachpb.EndTxnResponse) 4093 expStatus := roachpb.COMMITTED 4094 if !commit { 4095 expStatus = roachpb.ABORTED 4096 } 4097 if reply.Txn.Status != expStatus { 4098 t.Errorf("expected transaction status to be %s; got %s", expStatus, reply.Txn.Status) 4099 } 4100 if reply.Txn.LastHeartbeat != hBR.Txn.LastHeartbeat { 4101 t.Errorf("expected heartbeats to remain equal: %+v != %+v", 4102 reply.Txn.LastHeartbeat, hBR.Txn.LastHeartbeat) 4103 } 4104 key = key.Next() 4105 }) 4106 } 4107 4108 // TestEndTxnWithPushedTimestamp verifies that txn can be ended (both commit or 4109 // abort) correctly when the commit timestamp is greater than the transaction 4110 // timestamp, depending on the isolation level. 4111 func TestEndTxnWithPushedTimestamp(t *testing.T) { 4112 defer leaktest.AfterTest(t)() 4113 tc := testContext{} 4114 stopper := stop.NewStopper() 4115 defer stopper.Stop(context.Background()) 4116 tc.Start(t, stopper) 4117 4118 testCases := []struct { 4119 commit bool 4120 expErr bool 4121 }{ 4122 {true, true}, 4123 {false, false}, 4124 } 4125 key := roachpb.Key("a") 4126 for i, test := range testCases { 4127 pushee := newTransaction("pushee", key, 1, tc.Clock()) 4128 pusher := newTransaction("pusher", key, 1, tc.Clock()) 4129 pushee.Priority = enginepb.MinTxnPriority 4130 pusher.Priority = enginepb.MaxTxnPriority // pusher will win 4131 put := putArgs(key, []byte("value")) 4132 assignSeqNumsForReqs(pushee, &put) 4133 if _, pErr := kv.SendWrappedWith( 4134 context.Background(), tc.Sender(), roachpb.Header{Txn: pushee}, &put, 4135 ); pErr != nil { 4136 t.Fatal(pErr) 4137 } 4138 4139 // Push pushee txn. 4140 pushTxn := pushTxnArgs(pusher, pushee, roachpb.PUSH_TIMESTAMP) 4141 pushTxn.Key = pusher.Key 4142 if _, pErr := tc.SendWrapped(&pushTxn); pErr != nil { 4143 t.Error(pErr) 4144 } 4145 4146 // End the transaction with args timestamp moved forward in time. 4147 endTxn, h := endTxnArgs(pushee, test.commit) 4148 assignSeqNumsForReqs(pushee, &endTxn) 4149 resp, pErr := tc.SendWrappedWith(h, &endTxn) 4150 4151 if test.expErr { 4152 if _, ok := pErr.GetDetail().(*roachpb.TransactionRetryError); !ok { 4153 t.Errorf("%d: expected retry error; got %s", i, pErr) 4154 } 4155 } else { 4156 if pErr != nil { 4157 t.Errorf("%d: unexpected error: %s", i, pErr) 4158 } 4159 expStatus := roachpb.COMMITTED 4160 if !test.commit { 4161 expStatus = roachpb.ABORTED 4162 } 4163 reply := resp.(*roachpb.EndTxnResponse) 4164 if reply.Txn.Status != expStatus { 4165 t.Errorf("%d: expected transaction status to be %s; got %s", i, expStatus, reply.Txn.Status) 4166 } 4167 } 4168 key = key.Next() 4169 } 4170 } 4171 4172 // TestEndTxnWithIncrementedEpoch verifies that txn ended with a higher epoch 4173 // (and priority) correctly assumes the higher epoch. 4174 func TestEndTxnWithIncrementedEpoch(t *testing.T) { 4175 defer leaktest.AfterTest(t)() 4176 tc := testContext{} 4177 stopper := stop.NewStopper() 4178 defer stopper.Stop(context.Background()) 4179 tc.Start(t, stopper) 4180 4181 key := []byte("a") 4182 txn := newTransaction("test", key, 1, tc.Clock()) 4183 put := putArgs(key, key) 4184 assignSeqNumsForReqs(txn, &put) 4185 if _, pErr := kv.SendWrappedWith(context.Background(), tc.Sender(), roachpb.Header{Txn: txn}, &put); pErr != nil { 4186 t.Fatal(pErr) 4187 } 4188 4189 // Start out with a heartbeat to the transaction. 4190 hBA, h := heartbeatArgs(txn, tc.Clock().Now()) 4191 4192 _, pErr := tc.SendWrappedWith(h, &hBA) 4193 if pErr != nil { 4194 t.Error(pErr) 4195 } 4196 4197 // Now end the txn with increased epoch and priority. 4198 args, h := endTxnArgs(txn, true) 4199 h.Txn.Epoch = txn.Epoch + 1 4200 h.Txn.Priority = txn.Priority + 1 4201 assignSeqNumsForReqs(txn, &args) 4202 4203 resp, pErr := tc.SendWrappedWith(h, &args) 4204 if pErr != nil { 4205 t.Error(pErr) 4206 } 4207 reply := resp.(*roachpb.EndTxnResponse) 4208 if reply.Txn.Status != roachpb.COMMITTED { 4209 t.Errorf("expected transaction status to be COMMITTED; got %s", reply.Txn.Status) 4210 } 4211 if reply.Txn.Epoch != txn.Epoch { 4212 t.Errorf("expected epoch to equal %d; got %d", txn.Epoch, reply.Txn.Epoch) 4213 } 4214 if reply.Txn.Priority != txn.Priority { 4215 t.Errorf("expected priority to equal %d; got %d", txn.Priority, reply.Txn.Priority) 4216 } 4217 } 4218 4219 // TestEndTxnWithErrors verifies various error conditions are checked such as 4220 // transaction already being committed or aborted, or timestamp or epoch 4221 // regression. 4222 func TestEndTxnWithErrors(t *testing.T) { 4223 defer leaktest.AfterTest(t)() 4224 tc := testContext{} 4225 ctx := context.Background() 4226 stopper := stop.NewStopper() 4227 defer stopper.Stop(ctx) 4228 tc.Start(t, stopper) 4229 4230 txn := newTransaction("test", roachpb.Key(""), 1, tc.Clock()) 4231 4232 testCases := []struct { 4233 key roachpb.Key 4234 existStatus roachpb.TransactionStatus 4235 existEpoch enginepb.TxnEpoch 4236 expErrRegexp string 4237 }{ 4238 {roachpb.Key("a"), roachpb.COMMITTED, txn.Epoch, "already committed"}, 4239 {roachpb.Key("b"), roachpb.ABORTED, txn.Epoch, 4240 regexp.QuoteMeta("TransactionAbortedError(ABORT_REASON_ABORTED_RECORD_FOUND)")}, 4241 {roachpb.Key("c"), roachpb.PENDING, txn.Epoch + 1, "epoch regression: 0"}, 4242 } 4243 for _, test := range testCases { 4244 t.Run("", func(t *testing.T) { 4245 // Establish existing txn state by writing directly to range engine. 4246 existTxn := txn.Clone() 4247 existTxn.Key = test.key 4248 existTxn.Status = test.existStatus 4249 existTxn.Epoch = test.existEpoch 4250 existTxnRecord := existTxn.AsRecord() 4251 txnKey := keys.TransactionKey(test.key, txn.ID) 4252 if err := storage.MVCCPutProto( 4253 ctx, tc.repl.store.Engine(), nil, txnKey, hlc.Timestamp{}, nil, &existTxnRecord, 4254 ); err != nil { 4255 t.Fatal(err) 4256 } 4257 4258 // End the transaction, verify expected error. 4259 txn.Key = test.key 4260 args, h := endTxnArgs(txn, true) 4261 args.LockSpans = []roachpb.Span{{Key: txn.Key}} 4262 args.Sequence = 2 4263 4264 if _, pErr := tc.SendWrappedWith(h, &args); !testutils.IsPError(pErr, test.expErrRegexp) { 4265 t.Fatalf("expected error:\n%s\nto match:\n%s", pErr, test.expErrRegexp) 4266 } else if txn := pErr.GetTxn(); txn != nil && txn.ID == (uuid.UUID{}) { 4267 // Prevent regression of #5591. 4268 t.Fatalf("received empty Transaction proto in error") 4269 } 4270 }) 4271 } 4272 } 4273 4274 // TestEndTxnWithErrorAndSyncIntentResolution verifies that an EndTransaction 4275 // request that hits an error and then is forced to perform intent resolution 4276 // synchronously does not deadlock on itself. This is a regression test against 4277 // #47187. 4278 func TestEndTxnWithErrorAndSyncIntentResolution(t *testing.T) { 4279 defer leaktest.AfterTest(t)() 4280 tc := testContext{} 4281 ctx := context.Background() 4282 stopper := stop.NewStopper() 4283 defer stopper.Stop(ctx) 4284 cfg := TestStoreConfig(nil) 4285 cfg.TestingKnobs.IntentResolverKnobs.ForceSyncIntentResolution = true 4286 tc.StartWithStoreConfig(t, stopper, cfg) 4287 4288 txn := newTransaction("test", roachpb.Key("a"), 1, tc.Clock()) 4289 4290 // Establish existing txn state by writing directly to range engine. 4291 existTxn := txn.Clone() 4292 existTxn.Status = roachpb.ABORTED 4293 existTxnRec := existTxn.AsRecord() 4294 txnKey := keys.TransactionKey(txn.Key, txn.ID) 4295 err := storage.MVCCPutProto(ctx, tc.repl.store.Engine(), nil, txnKey, hlc.Timestamp{}, nil, &existTxnRec) 4296 require.NoError(t, err) 4297 4298 // End the transaction, verify expected error, shouldn't deadlock. 4299 args, h := endTxnArgs(txn, true) 4300 args.LockSpans = []roachpb.Span{{Key: txn.Key}} 4301 args.Sequence = 2 4302 4303 _, pErr := tc.SendWrappedWith(h, &args) 4304 require.Regexp(t, `TransactionAbortedError\(ABORT_REASON_ABORTED_RECORD_FOUND\)`, pErr) 4305 require.NotNil(t, pErr.GetTxn()) 4306 require.Equal(t, txn.ID, pErr.GetTxn().ID) 4307 } 4308 4309 // TestEndTxnRollbackAbortedTransaction verifies that no error is returned when 4310 // a transaction that has already been aborted is rolled back by an EndTxn 4311 // request. 4312 func TestEndTxnRollbackAbortedTransaction(t *testing.T) { 4313 defer leaktest.AfterTest(t)() 4314 4315 testutils.RunTrueAndFalse(t, "populateAbortSpan", func(t *testing.T, populateAbortSpan bool) { 4316 tc := testContext{} 4317 stopper := stop.NewStopper() 4318 defer stopper.Stop(context.Background()) 4319 cfg := TestStoreConfig(nil) 4320 cfg.TestingKnobs.DontPushOnWriteIntentError = true 4321 tc.StartWithStoreConfig(t, stopper, cfg) 4322 4323 key := []byte("a") 4324 txn := newTransaction("test", key, 1, tc.Clock()) 4325 put := putArgs(key, key) 4326 assignSeqNumsForReqs(txn, &put) 4327 if _, pErr := kv.SendWrappedWith( 4328 context.Background(), tc.Sender(), roachpb.Header{Txn: txn}, &put, 4329 ); pErr != nil { 4330 t.Fatal(pErr) 4331 } 4332 // Simulate what the client is supposed to do (update the transaction 4333 // based on the response). The Writing field is needed by this test. 4334 4335 // Abort the transaction by pushing it with maximum priority. 4336 pusher := newTransaction("test", key, 1, tc.Clock()) 4337 pusher.Priority = enginepb.MaxTxnPriority 4338 pushArgs := pushTxnArgs(pusher, txn, roachpb.PUSH_ABORT) 4339 if _, pErr := tc.SendWrapped(&pushArgs); pErr != nil { 4340 t.Fatal(pErr) 4341 } 4342 4343 // Check that the intent has not yet been resolved. 4344 var ba roachpb.BatchRequest 4345 gArgs := getArgs(key) 4346 ba.Add(&gArgs) 4347 if err := ba.SetActiveTimestamp(tc.Clock().Now); err != nil { 4348 t.Fatal(err) 4349 } 4350 _, pErr := tc.Sender().Send(context.Background(), ba) 4351 if _, ok := pErr.GetDetail().(*roachpb.WriteIntentError); !ok { 4352 t.Errorf("expected write intent error, but got %s", pErr) 4353 } 4354 4355 if populateAbortSpan { 4356 var txnRecord roachpb.Transaction 4357 txnKey := keys.TransactionKey(txn.Key, txn.ID) 4358 if ok, err := storage.MVCCGetProto( 4359 context.Background(), tc.repl.store.Engine(), 4360 txnKey, hlc.Timestamp{}, &txnRecord, storage.MVCCGetOptions{}, 4361 ); err != nil { 4362 t.Fatal(err) 4363 } else if ok { 4364 t.Fatalf("unexpected txn record %v", txnRecord) 4365 } 4366 4367 if pErr := tc.store.intentResolver.ResolveIntents(context.Background(), 4368 []roachpb.LockUpdate{ 4369 roachpb.MakeLockUpdate(&txnRecord, roachpb.Span{Key: key}), 4370 }, intentresolver.ResolveOptions{Poison: true}); pErr != nil { 4371 t.Fatal(pErr) 4372 } 4373 } 4374 4375 // Abort the transaction again. No error is returned. 4376 args, h := endTxnArgs(txn, false /* commit */) 4377 args.LockSpans = []roachpb.Span{{Key: key}} 4378 resp, pErr := tc.SendWrappedWith(h, &args) 4379 if pErr != nil { 4380 t.Fatal(pErr) 4381 } 4382 reply := resp.(*roachpb.EndTxnResponse) 4383 if reply.Txn.Status != roachpb.ABORTED { 4384 t.Errorf("expected transaction status to be ABORTED; got %s", reply.Txn.Status) 4385 } 4386 4387 // Verify that the intent has been resolved. 4388 if _, pErr := tc.Sender().Send(context.Background(), ba); pErr != nil { 4389 t.Errorf("expected resolved intent, but got %s", pErr) 4390 } 4391 }) 4392 } 4393 4394 // TestRPCRetryProtectionInTxn verifies that transactional batches 4395 // enjoy protection from RPC replays. 4396 func TestRPCRetryProtectionInTxn(t *testing.T) { 4397 defer leaktest.AfterTest(t)() 4398 ctx := context.Background() 4399 cfg := TestStoreConfig(nil) 4400 tc := testContext{} 4401 stopper := stop.NewStopper() 4402 defer stopper.Stop(ctx) 4403 tc.StartWithStoreConfig(t, stopper, cfg) 4404 4405 testutils.RunTrueAndFalse(t, "CanCommitAtHigherTimestamp", func(t *testing.T, noPriorReads bool) { 4406 key := roachpb.Key("a") 4407 txn := newTransaction("test", key, 1, tc.Clock()) 4408 4409 // Send a batch with put & end txn. 4410 var ba roachpb.BatchRequest 4411 put := putArgs(key, []byte("value")) 4412 et, _ := endTxnArgs(txn, true) 4413 et.CanCommitAtHigherTimestamp = noPriorReads 4414 et.LockSpans = []roachpb.Span{{Key: key, EndKey: nil}} 4415 ba.Header = roachpb.Header{Txn: txn} 4416 ba.Add(&put) 4417 ba.Add(&et) 4418 assignSeqNumsForReqs(txn, &put, &et) 4419 _, pErr := tc.Sender().Send(ctx, ba) 4420 if pErr != nil { 4421 t.Fatalf("unexpected error: %s", pErr) 4422 } 4423 4424 // Replay the request. It initially tries to execute as a 1PC transaction, 4425 // but will fail because of a WriteTooOldError that pushes the transaction. 4426 // This forces the txn to execute normally, at which point it fails because 4427 // the EndTxn is detected to be a duplicate. 4428 _, pErr = tc.Sender().Send(ctx, ba) 4429 if pErr == nil { 4430 t.Fatalf("expected error, got nil") 4431 } 4432 require.Regexp(t, 4433 `TransactionAbortedError\(ABORT_REASON_ALREADY_COMMITTED_OR_ROLLED_BACK_POSSIBLE_REPLAY\)`, 4434 pErr) 4435 }) 4436 } 4437 4438 // Test that errors from batch evaluation never have the WriteTooOld flag set. 4439 // The WriteTooOld flag is supposed to only be set on successful responses. 4440 // 4441 // The test will construct a batch with a write that would normally cause the 4442 // WriteTooOld flag to be set on the response, and another CPut which causes an 4443 // error to be returned. 4444 func TestErrorsDontCarryWriteTooOldFlag(t *testing.T) { 4445 defer leaktest.AfterTest(t)() 4446 ctx := context.Background() 4447 cfg := TestStoreConfig(nil /* clock */) 4448 tc := testContext{} 4449 stopper := stop.NewStopper() 4450 defer stopper.Stop(ctx) 4451 tc.StartWithStoreConfig(t, stopper, cfg) 4452 4453 keyA := roachpb.Key("a") 4454 keyB := roachpb.Key("b") 4455 // Start a transaction early to get a low timestamp. 4456 txn := roachpb.MakeTransaction( 4457 "test", keyA, roachpb.NormalUserPriority, tc.Clock().Now(), 0 /* offset */) 4458 4459 // Write a value outside of the txn to cause a WriteTooOldError later. 4460 put := putArgs(keyA, []byte("val1")) 4461 var ba roachpb.BatchRequest 4462 ba.Add(&put) 4463 _, pErr := tc.Sender().Send(ctx, ba) 4464 require.Nil(t, pErr) 4465 4466 // This put will cause the WriteTooOld flag to be set. 4467 put = putArgs(keyA, []byte("val2")) 4468 // This will cause a ConditionFailedError. 4469 cput := cPutArgs(keyB, []byte("missing"), []byte("newVal")) 4470 ba.Header = roachpb.Header{Txn: &txn} 4471 ba.Add(&put) 4472 ba.Add(&cput) 4473 assignSeqNumsForReqs(&txn, &put, &cput) 4474 _, pErr = tc.Sender().Send(ctx, ba) 4475 require.IsType(t, pErr.GetDetail(), &roachpb.ConditionFailedError{}) 4476 require.False(t, pErr.GetTxn().WriteTooOld) 4477 } 4478 4479 // TestReplicaLaziness verifies that Raft Groups are brought up lazily. 4480 func TestReplicaLaziness(t *testing.T) { 4481 defer leaktest.AfterTest(t)() 4482 // testWithAction is a function that creates an uninitialized Raft group, 4483 // calls the supplied function, and then tests that the Raft group is 4484 // initialized. 4485 testWithAction := func(action func() roachpb.Request) { 4486 tc := testContext{bootstrapMode: bootstrapRangeOnly} 4487 stopper := stop.NewStopper() 4488 defer stopper.Stop(context.Background()) 4489 tc.Start(t, stopper) 4490 4491 if status := tc.repl.RaftStatus(); status != nil { 4492 t.Fatalf("expected raft group to not be initialized, got RaftStatus() of %v", status) 4493 } 4494 var ba roachpb.BatchRequest 4495 request := action() 4496 ba.Add(request) 4497 if _, pErr := tc.Sender().Send(context.Background(), ba); pErr != nil { 4498 t.Fatalf("unexpected error: %s", pErr) 4499 } 4500 4501 if tc.repl.RaftStatus() == nil { 4502 t.Fatalf("expected raft group to be initialized") 4503 } 4504 } 4505 4506 testWithAction(func() roachpb.Request { 4507 put := putArgs(roachpb.Key("a"), []byte("value")) 4508 return &put 4509 }) 4510 4511 testWithAction(func() roachpb.Request { 4512 get := getArgs(roachpb.Key("a")) 4513 return &get 4514 }) 4515 4516 testWithAction(func() roachpb.Request { 4517 scan := scanArgs(roachpb.Key("a"), roachpb.Key("b")) 4518 return scan 4519 }) 4520 } 4521 4522 // TestBatchRetryCantCommitIntents tests that transactional retries cannot 4523 // commit intents. 4524 // It also tests current behavior - that a retried transactional batch can lay 4525 // down an intent that will never be committed. We don't necessarily like this 4526 // behavior, though. Note that intents are not always left hanging by retries 4527 // like they are in this low-level test. For example: 4528 // - in case of Raft *reproposals*, the MaxLeaseIndex mechanism will make 4529 // the reproposal not execute if the original proposal had already been 4530 // applied. 4531 // - in case of request *re-evaluations*, we know that the original proposal 4532 // will not apply. 4533 func TestBatchRetryCantCommitIntents(t *testing.T) { 4534 defer leaktest.AfterTest(t)() 4535 tc := testContext{} 4536 stopper := stop.NewStopper() 4537 defer stopper.Stop(context.Background()) 4538 cfg := TestStoreConfig(nil) 4539 cfg.TestingKnobs.DontPushOnWriteIntentError = true 4540 tc.StartWithStoreConfig(t, stopper, cfg) 4541 4542 key := roachpb.Key("a") 4543 keyB := roachpb.Key("b") 4544 txn := newTransaction("test", key, 1, tc.Clock()) 4545 4546 // Send a put for keyA. 4547 var ba roachpb.BatchRequest 4548 put := putArgs(key, []byte("value")) 4549 ba.Header = roachpb.Header{Txn: txn} 4550 ba.Add(&put) 4551 assignSeqNumsForReqs(txn, &put) 4552 if err := ba.SetActiveTimestamp(tc.Clock().Now); err != nil { 4553 t.Fatal(err) 4554 } 4555 br, pErr := tc.Sender().Send(context.Background(), ba) 4556 if pErr != nil { 4557 t.Fatalf("unexpected error: %s", pErr) 4558 } 4559 4560 // Send a put for keyB. 4561 var ba2 roachpb.BatchRequest 4562 putB := putArgs(keyB, []byte("value")) 4563 putTxn := br.Txn.Clone() 4564 ba2.Header = roachpb.Header{Txn: putTxn} 4565 ba2.Add(&putB) 4566 assignSeqNumsForReqs(putTxn, &putB) 4567 br, pErr = tc.Sender().Send(context.Background(), ba2) 4568 if pErr != nil { 4569 t.Fatalf("unexpected error: %s", pErr) 4570 } 4571 4572 // HeartbeatTxn. 4573 hbTxn := br.Txn.Clone() 4574 hb, hbH := heartbeatArgs(hbTxn, tc.Clock().Now()) 4575 if _, pErr := tc.SendWrappedWith(hbH, &hb); pErr != nil { 4576 t.Fatalf("unexpected error: %s", pErr) 4577 } 4578 4579 // EndTxn. 4580 etTxn := br.Txn.Clone() 4581 et, etH := endTxnArgs(etTxn, true) 4582 et.LockSpans = []roachpb.Span{{Key: key, EndKey: nil}, {Key: keyB, EndKey: nil}} 4583 assignSeqNumsForReqs(etTxn, &et) 4584 if _, pErr := tc.SendWrappedWith(etH, &et); pErr != nil { 4585 t.Fatalf("unexpected error: %s", pErr) 4586 } 4587 4588 // Verify txn record is cleaned. 4589 var readTxn roachpb.Transaction 4590 txnKey := keys.TransactionKey(txn.Key, txn.ID) 4591 ok, err := storage.MVCCGetProto(context.Background(), tc.repl.store.Engine(), txnKey, 4592 hlc.Timestamp{}, &readTxn, storage.MVCCGetOptions{}) 4593 if err != nil || ok { 4594 t.Errorf("expected transaction record to be cleared (%t): %+v", ok, err) 4595 } 4596 4597 // Now replay put for key A; this succeeds as there's nothing to detect 4598 // the replay. The WriteTooOld flag will be set though. 4599 br, pErr = tc.Sender().Send(context.Background(), ba) 4600 require.NoError(t, pErr.GoError()) 4601 require.True(t, br.Txn.WriteTooOld) 4602 4603 // Intent should have been created. 4604 gArgs := getArgs(key) 4605 _, pErr = tc.SendWrapped(&gArgs) 4606 if _, ok := pErr.GetDetail().(*roachpb.WriteIntentError); !ok { 4607 t.Errorf("expected WriteIntentError, got: %v", pErr) 4608 } 4609 4610 // Heartbeat should fail with a TransactionAbortedError. 4611 _, pErr = tc.SendWrappedWith(hbH, &hb) 4612 expErr := "TransactionAbortedError(ABORT_REASON_ALREADY_COMMITTED_OR_ROLLED_BACK_POSSIBLE_REPLAY)" 4613 if !testutils.IsPError(pErr, regexp.QuoteMeta(expErr)) { 4614 t.Errorf("expected %s; got %v", expErr, pErr) 4615 } 4616 4617 // EndTxn should fail with a TransactionAbortedError. 4618 _, pErr = tc.SendWrappedWith(etH, &et) 4619 if !testutils.IsPError(pErr, regexp.QuoteMeta(expErr)) { 4620 t.Errorf("expected %s; got %v", expErr, pErr) 4621 } 4622 4623 // Expect that the txn left behind an intent on key A. 4624 gArgs = getArgs(key) 4625 _, pErr = tc.SendWrapped(&gArgs) 4626 if _, ok := pErr.GetDetail().(*roachpb.WriteIntentError); !ok { 4627 t.Errorf("expected WriteIntentError, got: %v", pErr) 4628 } 4629 } 4630 4631 // TestEndTxnGC verifies that a transaction record is immediately 4632 // garbage-collected upon EndTxn iff all of the supplied intents are local 4633 // relative to the transaction record's location. 4634 func TestEndTxnLocalGC(t *testing.T) { 4635 defer leaktest.AfterTest(t)() 4636 tc := testContext{} 4637 tsc := TestStoreConfig(nil) 4638 tsc.TestingKnobs.EvalKnobs.TestingEvalFilter = 4639 func(filterArgs kvserverbase.FilterArgs) *roachpb.Error { 4640 // Make sure the direct GC path doesn't interfere with this test. 4641 if filterArgs.Req.Method() == roachpb.GC { 4642 return roachpb.NewErrorWithTxn(errors.Errorf("boom"), filterArgs.Hdr.Txn) 4643 } 4644 return nil 4645 } 4646 stopper := stop.NewStopper() 4647 defer stopper.Stop(context.Background()) 4648 tc.StartWithStoreConfig(t, stopper, tsc) 4649 4650 splitKey := roachpb.RKey("c") 4651 splitTestRange(tc.store, splitKey, splitKey, t) 4652 key := roachpb.Key("a") 4653 putKey := key 4654 for i, test := range []struct { 4655 intents []roachpb.Span 4656 expGC bool 4657 }{ 4658 // Range inside. 4659 {[]roachpb.Span{{Key: roachpb.Key("a"), EndKey: roachpb.Key("b")}}, true}, 4660 // Two intents inside. 4661 {[]roachpb.Span{{Key: roachpb.Key("a")}, {Key: roachpb.Key("b")}}, true}, 4662 // Intent range spilling over right endpoint. 4663 {[]roachpb.Span{{Key: roachpb.Key("a"), EndKey: splitKey.Next().AsRawKey()}}, false}, 4664 // Intent range completely outside. 4665 {[]roachpb.Span{{Key: splitKey.AsRawKey(), EndKey: roachpb.Key("q")}}, false}, 4666 // Intent inside and outside. 4667 {[]roachpb.Span{{Key: roachpb.Key("a")}, {Key: splitKey.AsRawKey()}}, false}, 4668 } { 4669 txn := newTransaction("test", key, 1, tc.Clock()) 4670 put := putArgs(putKey, key) 4671 assignSeqNumsForReqs(txn, &put) 4672 if _, pErr := kv.SendWrappedWith(context.Background(), tc.Sender(), roachpb.Header{Txn: txn}, &put); pErr != nil { 4673 t.Fatal(pErr) 4674 } 4675 putKey = putKey.Next() // for the next iteration 4676 args, h := endTxnArgs(txn, true) 4677 args.LockSpans = test.intents 4678 assignSeqNumsForReqs(txn, &args) 4679 if _, pErr := tc.SendWrappedWith(h, &args); pErr != nil { 4680 t.Fatal(pErr) 4681 } 4682 var readTxn roachpb.Transaction 4683 txnKey := keys.TransactionKey(txn.Key, txn.ID) 4684 ok, err := storage.MVCCGetProto(context.Background(), tc.repl.store.Engine(), txnKey, hlc.Timestamp{}, 4685 &readTxn, storage.MVCCGetOptions{}) 4686 if err != nil { 4687 t.Fatal(err) 4688 } 4689 if !ok != test.expGC { 4690 t.Errorf("%d: unexpected gc'ed: %t", i, !ok) 4691 } 4692 } 4693 } 4694 4695 // setupResolutionTest splits the range at the specified splitKey and completes 4696 // a transaction which creates intents at key and splitKey. 4697 func setupResolutionTest( 4698 t *testing.T, tc testContext, key roachpb.Key, splitKey roachpb.RKey, commit bool, 4699 ) (*Replica, *roachpb.Transaction) { 4700 // Split the range and create an intent at splitKey and key. 4701 newRepl := splitTestRange(tc.store, splitKey, splitKey, t) 4702 4703 txn := newTransaction("test", key, 1, tc.Clock()) 4704 // This increment is not required, but testing feels safer when zero 4705 // values are unexpected. 4706 txn.Epoch++ 4707 pArgs := putArgs(key, []byte("value")) 4708 h := roachpb.Header{Txn: txn} 4709 assignSeqNumsForReqs(txn, &pArgs) 4710 if _, pErr := kv.SendWrappedWith(context.Background(), tc.Sender(), h, &pArgs); pErr != nil { 4711 t.Fatal(pErr) 4712 } 4713 4714 { 4715 var ba roachpb.BatchRequest 4716 ba.Header = h 4717 ba.RangeID = newRepl.RangeID 4718 if err := ba.SetActiveTimestamp(newRepl.store.Clock().Now); err != nil { 4719 t.Fatal(err) 4720 } 4721 pArgs := putArgs(splitKey.AsRawKey(), []byte("value")) 4722 ba.Add(&pArgs) 4723 assignSeqNumsForReqs(txn, &pArgs) 4724 if _, pErr := newRepl.Send(context.Background(), ba); pErr != nil { 4725 t.Fatal(pErr) 4726 } 4727 } 4728 4729 // End the transaction and resolve the intents. 4730 args, h := endTxnArgs(txn, commit) 4731 args.LockSpans = []roachpb.Span{{Key: key}, {Key: splitKey.AsRawKey()}} 4732 assignSeqNumsForReqs(txn, &args) 4733 if _, pErr := tc.SendWrappedWith(h, &args); pErr != nil { 4734 t.Fatal(pErr) 4735 } 4736 return newRepl, txn 4737 } 4738 4739 // TestEndTxnResolveOnlyLocalIntents verifies that an end transaction request 4740 // resolves only local intents within the same batch. 4741 func TestEndTxnResolveOnlyLocalIntents(t *testing.T) { 4742 defer leaktest.AfterTest(t)() 4743 tc := testContext{} 4744 tsc := TestStoreConfig(nil) 4745 tsc.TestingKnobs.DontPushOnWriteIntentError = true 4746 key := roachpb.Key("a") 4747 splitKey := roachpb.RKey(key).Next() 4748 tsc.TestingKnobs.EvalKnobs.TestingEvalFilter = 4749 func(filterArgs kvserverbase.FilterArgs) *roachpb.Error { 4750 if filterArgs.Req.Method() == roachpb.ResolveIntent && 4751 filterArgs.Req.Header().Key.Equal(splitKey.AsRawKey()) { 4752 return roachpb.NewErrorWithTxn(errors.Errorf("boom"), filterArgs.Hdr.Txn) 4753 } 4754 return nil 4755 } 4756 4757 stopper := stop.NewStopper() 4758 defer stopper.Stop(context.Background()) 4759 tc.StartWithStoreConfig(t, stopper, tsc) 4760 4761 newRepl, txn := setupResolutionTest(t, tc, key, splitKey, true /* commit */) 4762 4763 // Check if the intent in the other range has not yet been resolved. 4764 { 4765 var ba roachpb.BatchRequest 4766 ba.Header.RangeID = newRepl.RangeID 4767 gArgs := getArgs(splitKey) 4768 ba.Add(&gArgs) 4769 if err := ba.SetActiveTimestamp(tc.Clock().Now); err != nil { 4770 t.Fatal(err) 4771 } 4772 _, pErr := newRepl.Send(context.Background(), ba) 4773 if _, ok := pErr.GetDetail().(*roachpb.WriteIntentError); !ok { 4774 t.Errorf("expected write intent error, but got %s", pErr) 4775 } 4776 } 4777 4778 hbArgs, h := heartbeatArgs(txn, tc.Clock().Now()) 4779 reply, pErr := tc.SendWrappedWith(h, &hbArgs) 4780 if pErr != nil { 4781 t.Fatal(pErr) 4782 } 4783 hbResp := reply.(*roachpb.HeartbeatTxnResponse) 4784 expIntents := []roachpb.Span{{Key: splitKey.AsRawKey()}} 4785 if !reflect.DeepEqual(hbResp.Txn.LockSpans, expIntents) { 4786 t.Fatalf("expected persisted intents %v, got %v", 4787 expIntents, hbResp.Txn.LockSpans) 4788 } 4789 } 4790 4791 // TestEndTxnDirectGC verifies that after successfully resolving the external 4792 // intents of a transaction after EndTxn, the transaction and AbortSpan records 4793 // are purged on both the local range and non-local range. 4794 func TestEndTxnDirectGC(t *testing.T) { 4795 defer leaktest.AfterTest(t)() 4796 a := roachpb.Key("a") 4797 splitKey := keys.MustAddr(a).Next() 4798 4799 for i, testKey := range []roachpb.Key{ 4800 a, 4801 keys.RangeDescriptorKey(keys.MustAddr(a)), 4802 keys.RangeDescriptorKey(keys.MustAddr(roachpb.KeyMin)), 4803 } { 4804 func() { 4805 tc := testContext{} 4806 stopper := stop.NewStopper() 4807 defer stopper.Stop(context.Background()) 4808 tc.Start(t, stopper) 4809 4810 ctx := logtags.AddTag(context.Background(), "testcase", i) 4811 4812 rightRepl, txn := setupResolutionTest(t, tc, testKey, splitKey, false /* generate AbortSpan entry */) 4813 4814 testutils.SucceedsSoon(t, func() error { 4815 var gr roachpb.GetResponse 4816 if _, err := batcheval.Get( 4817 ctx, tc.engine, batcheval.CommandArgs{ 4818 EvalCtx: NewReplicaEvalContext(tc.repl, &allSpans), 4819 Args: &roachpb.GetRequest{RequestHeader: roachpb.RequestHeader{ 4820 Key: keys.TransactionKey(txn.Key, txn.ID), 4821 }}, 4822 }, 4823 &gr, 4824 ); err != nil { 4825 return err 4826 } else if gr.Value != nil { 4827 return errors.Errorf("%d: txn entry still there: %+v", i, gr) 4828 } 4829 4830 var entry roachpb.AbortSpanEntry 4831 if aborted, err := tc.repl.abortSpan.Get(ctx, tc.engine, txn.ID, &entry); err != nil { 4832 t.Fatal(err) 4833 } else if aborted { 4834 return errors.Errorf("%d: AbortSpan still populated: %v", i, entry) 4835 } 4836 if aborted, err := rightRepl.abortSpan.Get(ctx, tc.engine, txn.ID, &entry); err != nil { 4837 t.Fatal(err) 4838 } else if aborted { 4839 t.Fatalf("%d: right-hand side AbortSpan still populated: %v", i, entry) 4840 } 4841 4842 return nil 4843 }) 4844 }() 4845 } 4846 } 4847 4848 // TestEndTxnDirectGCFailure verifies that no immediate GC takes place 4849 // if external intents can't be resolved (see also TestEndTxnDirectGC). 4850 func TestEndTxnDirectGCFailure(t *testing.T) { 4851 defer leaktest.AfterTest(t)() 4852 tc := testContext{} 4853 key := roachpb.Key("a") 4854 splitKey := roachpb.RKey(key).Next() 4855 var count int64 4856 tsc := TestStoreConfig(nil) 4857 tsc.TestingKnobs.EvalKnobs.TestingEvalFilter = 4858 func(filterArgs kvserverbase.FilterArgs) *roachpb.Error { 4859 if filterArgs.Req.Method() == roachpb.ResolveIntent && 4860 filterArgs.Req.Header().Key.Equal(splitKey.AsRawKey()) { 4861 atomic.AddInt64(&count, 1) 4862 return roachpb.NewErrorWithTxn(errors.Errorf("boom"), filterArgs.Hdr.Txn) 4863 } else if filterArgs.Req.Method() == roachpb.GC { 4864 // Can't fatal since we're on a goroutine. This'll do it. 4865 t.Error(errors.Errorf("unexpected GCRequest: %+v", filterArgs.Req)) 4866 } 4867 return nil 4868 } 4869 stopper := stop.NewStopper() 4870 defer stopper.Stop(context.Background()) 4871 tc.StartWithStoreConfig(t, stopper, tsc) 4872 4873 setupResolutionTest(t, tc, key, splitKey, true /* commit */) 4874 4875 // Now test that no GCRequest is issued. We can't test that directly (since 4876 // it's completely asynchronous), so we first make sure ResolveIntent 4877 // happened and subsequently issue a bogus Put which is likely to make it 4878 // into Raft only after a rogue GCRequest (at least sporadically), which 4879 // would trigger a Fatal from the command filter. 4880 testutils.SucceedsSoon(t, func() error { 4881 if atomic.LoadInt64(&count) == 0 { 4882 return errors.Errorf("intent resolution not attempted yet") 4883 } else if err := tc.store.DB().Put(context.Background(), "panama", "banana"); err != nil { 4884 return err 4885 } 4886 return nil 4887 }) 4888 } 4889 4890 // TestEndTxnDirectGC_1PC runs a test similar to TestEndTxnDirectGC 4891 // for the case of a transaction which is contained in a single batch. 4892 func TestEndTxnDirectGC_1PC(t *testing.T) { 4893 defer leaktest.AfterTest(t)() 4894 for _, commit := range []bool{true, false} { 4895 func() { 4896 tc := testContext{} 4897 stopper := stop.NewStopper() 4898 defer stopper.Stop(context.Background()) 4899 tc.Start(t, stopper) 4900 4901 key := roachpb.Key("a") 4902 txn := newTransaction("test", key, 1, tc.Clock()) 4903 put := putArgs(key, []byte("value")) 4904 et, etH := endTxnArgs(txn, commit) 4905 et.LockSpans = []roachpb.Span{{Key: key}} 4906 assignSeqNumsForReqs(txn, &put, &et) 4907 4908 var ba roachpb.BatchRequest 4909 ba.Header = etH 4910 ba.Add(&put, &et) 4911 br, err := tc.Sender().Send(context.Background(), ba) 4912 if err != nil { 4913 t.Fatalf("commit=%t: %+v", commit, err) 4914 } 4915 etArgs, ok := br.Responses[len(br.Responses)-1].GetInner().(*roachpb.EndTxnResponse) 4916 if !ok || (!etArgs.OnePhaseCommit && commit) { 4917 t.Errorf("commit=%t: expected one phase commit", commit) 4918 } 4919 4920 var entry roachpb.AbortSpanEntry 4921 if aborted, err := tc.repl.abortSpan.Get(context.Background(), tc.engine, txn.ID, &entry); err != nil { 4922 t.Fatal(err) 4923 } else if aborted { 4924 t.Fatalf("commit=%t: AbortSpan still populated: %v", commit, entry) 4925 } 4926 }() 4927 } 4928 } 4929 4930 // TestReplicaTransactionRequires1PC verifies that a transaction which sets 4931 // Requires1PC on EndTxn request will never leave intents in the event that it 4932 // experiences an error or the timestamp is advanced. 4933 func TestReplicaTransactionRequires1PC(t *testing.T) { 4934 defer leaktest.AfterTest(t)() 4935 4936 tsc := TestStoreConfig(nil) 4937 var injectErrorOnKey atomic.Value 4938 injectErrorOnKey.Store(roachpb.Key("")) 4939 4940 tsc.TestingKnobs.EvalKnobs.TestingEvalFilter = 4941 func(filterArgs kvserverbase.FilterArgs) *roachpb.Error { 4942 if filterArgs.Req.Method() == roachpb.Put && 4943 injectErrorOnKey.Load().(roachpb.Key).Equal(filterArgs.Req.Header().Key) { 4944 return roachpb.NewErrorf("injected error") 4945 } 4946 return nil 4947 } 4948 tc := testContext{} 4949 stopper := stop.NewStopper() 4950 defer stopper.Stop(context.Background()) 4951 tc.StartWithStoreConfig(t, stopper, tsc) 4952 4953 testCases := []struct { 4954 setupFn func(roachpb.Key) 4955 expErrorPat string 4956 }{ 4957 // Case 1: verify error if we augment the timestamp cache in order 4958 // to cause the response timestamp to move forward. 4959 { 4960 setupFn: func(key roachpb.Key) { 4961 gArgs := getArgs(key) 4962 if _, pErr := tc.SendWrapped(&gArgs); pErr != nil { 4963 t.Fatal(pErr) 4964 } 4965 }, 4966 expErrorPat: "could not commit in one phase as requested", 4967 }, 4968 // Case 2: inject an error on the put. 4969 { 4970 setupFn: func(key roachpb.Key) { 4971 injectErrorOnKey.Store(key) 4972 }, 4973 expErrorPat: "injected error", 4974 }, 4975 } 4976 4977 for i, test := range testCases { 4978 t.Run("", func(t *testing.T) { 4979 key := roachpb.Key(fmt.Sprintf("%d", i)) 4980 4981 // Create the 1PC batch. 4982 var ba roachpb.BatchRequest 4983 txn := newTransaction("test", key, 1, tc.Clock()) 4984 put := putArgs(key, []byte("value")) 4985 et, etH := endTxnArgs(txn, true) 4986 et.Require1PC = true 4987 ba.Header = etH 4988 ba.Add(&put, &et) 4989 assignSeqNumsForReqs(txn, &put, &et) 4990 4991 // Run the setup method. 4992 test.setupFn(key) 4993 4994 // Send the batch command. 4995 _, pErr := tc.Sender().Send(context.Background(), ba) 4996 if !testutils.IsPError(pErr, test.expErrorPat) { 4997 t.Errorf("expected error=%q running required 1PC txn; got %s", test.expErrorPat, pErr) 4998 } 4999 5000 // Do a consistent scan to verify no intents were created. 5001 sArgs := scanArgs(key, key.Next()) 5002 _, pErr = tc.SendWrapped(sArgs) 5003 if pErr != nil { 5004 t.Fatalf("error scanning to verify no intent present: %s", pErr) 5005 } 5006 }) 5007 } 5008 } 5009 5010 // TestReplicaEndTxnWithRequire1PC verifies an error if an EndTxn request is 5011 // received with the Requires1PC flag set to true. 5012 func TestReplicaEndTxnWithRequire1PC(t *testing.T) { 5013 defer leaktest.AfterTest(t)() 5014 5015 tc := testContext{} 5016 stopper := stop.NewStopper() 5017 defer stopper.Stop(context.Background()) 5018 tc.Start(t, stopper) 5019 5020 key := roachpb.Key("a") 5021 txn := newTransaction("test", key, 1, tc.Clock()) 5022 var ba roachpb.BatchRequest 5023 ba.Header = roachpb.Header{Txn: txn} 5024 put := putArgs(key, []byte("value")) 5025 ba.Add(&put) 5026 assignSeqNumsForReqs(txn, &put) 5027 if _, pErr := tc.Sender().Send(context.Background(), ba); pErr != nil { 5028 t.Fatalf("unexpected error: %s", pErr) 5029 } 5030 5031 et, etH := endTxnArgs(txn, true) 5032 et.Require1PC = true 5033 ba = roachpb.BatchRequest{} 5034 ba.Header = etH 5035 ba.Add(&et) 5036 assignSeqNumsForReqs(txn, &et) 5037 _, pErr := tc.Sender().Send(context.Background(), ba) 5038 if !testutils.IsPError(pErr, "could not commit in one phase as requested") { 5039 t.Fatalf("expected requires 1PC error; fgot %v", pErr) 5040 } 5041 } 5042 5043 // TestAbortSpanPoisonOnResolve verifies that when an intent is 5044 // aborted, the AbortSpan on the respective Range is poisoned and 5045 // the pushee is presented with a txn abort on its next contact with 5046 // the Range in the same epoch. 5047 func TestAbortSpanPoisonOnResolve(t *testing.T) { 5048 defer leaktest.AfterTest(t)() 5049 key := roachpb.Key("a") 5050 5051 // Whether we're going to abort the pushee. 5052 // Run the actual meat of the test, which pushes the pushee and 5053 // checks whether we get the correct behavior as it touches the 5054 // Range again. 5055 run := func(abort bool) { 5056 tc := testContext{} 5057 stopper := stop.NewStopper() 5058 defer stopper.Stop(context.Background()) 5059 tc.Start(t, stopper) 5060 5061 pusher := newTransaction("test", key, 1, tc.Clock()) 5062 pushee := newTransaction("test", key, 1, tc.Clock()) 5063 pusher.Priority = enginepb.MaxTxnPriority 5064 pushee.Priority = enginepb.MinTxnPriority // pusher will win 5065 5066 inc := func(actor *roachpb.Transaction, k roachpb.Key) (*roachpb.IncrementResponse, *roachpb.Error) { 5067 incArgs := &roachpb.IncrementRequest{ 5068 RequestHeader: roachpb.RequestHeader{Key: k}, Increment: 123, 5069 } 5070 assignSeqNumsForReqs(actor, incArgs) 5071 reply, pErr := kv.SendWrappedWith(context.Background(), tc.store, roachpb.Header{ 5072 Txn: actor, 5073 RangeID: 1, 5074 }, incArgs) 5075 if pErr != nil { 5076 return nil, pErr 5077 } 5078 return reply.(*roachpb.IncrementResponse), nil 5079 } 5080 5081 get := func(actor *roachpb.Transaction, k roachpb.Key) *roachpb.Error { 5082 gArgs := getArgs(k) 5083 assignSeqNumsForReqs(actor, &gArgs) 5084 _, pErr := kv.SendWrappedWith(context.Background(), tc.store, roachpb.Header{ 5085 Txn: actor, 5086 RangeID: 1, 5087 }, &gArgs) 5088 return pErr 5089 } 5090 5091 // Write an intent (this also begins the pushee's transaction). 5092 if _, pErr := inc(pushee, key); pErr != nil { 5093 t.Fatal(pErr) 5094 } 5095 5096 // Have the pusher run into the intent. That pushes our pushee and 5097 // resolves the intent, which in turn should poison the AbortSpan. 5098 var assert func(*roachpb.Error) error 5099 if abort { 5100 // Write/Write conflict will abort pushee. 5101 if _, pErr := inc(pusher, key); pErr != nil { 5102 t.Fatal(pErr) 5103 } 5104 assert = func(pErr *roachpb.Error) error { 5105 if _, ok := pErr.GetDetail().(*roachpb.TransactionAbortedError); !ok { 5106 return errors.Errorf("abort=%t: expected txn abort, got %s", abort, pErr) 5107 } 5108 return nil 5109 } 5110 } else { 5111 // Verify we're not poisoned. 5112 assert = func(pErr *roachpb.Error) error { 5113 if pErr != nil { 5114 return errors.Errorf("abort=%t: unexpected: %s", abort, pErr) 5115 } 5116 return nil 5117 } 5118 } 5119 5120 { 5121 // Our assert should be true for any reads or writes. 5122 pErr := get(pushee, key) 5123 if err := assert(pErr); err != nil { 5124 t.Fatal(err) 5125 } 5126 } 5127 { 5128 _, pErr := inc(pushee, key) 5129 if err := assert(pErr); err != nil { 5130 t.Fatal(err) 5131 } 5132 } 5133 { 5134 // Still poisoned (on any key on the Range). 5135 pErr := get(pushee, key.Next()) 5136 if err := assert(pErr); err != nil { 5137 t.Fatal(err) 5138 } 5139 } 5140 { 5141 _, pErr := inc(pushee, key.Next()) 5142 if err := assert(pErr); err != nil { 5143 t.Fatal(err) 5144 } 5145 } 5146 { 5147 // Pretend we're coming back. Increasing the epoch on an abort should 5148 // still fail obviously, while on no abort will succeed. 5149 pushee.Epoch++ 5150 _, pErr := inc(pushee, roachpb.Key("b")) 5151 if err := assert(pErr); err != nil { 5152 t.Fatal(err) 5153 } 5154 } 5155 } 5156 5157 for _, abort := range []bool{false, true} { 5158 run(abort) 5159 } 5160 } 5161 5162 // TestAbortSpanError verifies that roachpb.Errors returned by checkIfTxnAborted 5163 // have txns that are identical to txns stored in Transaction{Retry,Aborted}Error. 5164 func TestAbortSpanError(t *testing.T) { 5165 defer leaktest.AfterTest(t)() 5166 tc := testContext{} 5167 stopper := stop.NewStopper() 5168 defer stopper.Stop(context.Background()) 5169 tc.Start(t, stopper) 5170 5171 txn := roachpb.Transaction{} 5172 txn.ID = uuid.MakeV4() 5173 txn.Priority = 1 5174 txn.Sequence = 1 5175 txn.WriteTimestamp = tc.Clock().Now().Add(1, 0) 5176 5177 key := roachpb.Key("k") 5178 ts := txn.WriteTimestamp.Next() 5179 priority := enginepb.TxnPriority(10) 5180 entry := roachpb.AbortSpanEntry{ 5181 Key: key, 5182 Timestamp: ts, 5183 Priority: priority, 5184 } 5185 if err := tc.repl.abortSpan.Put(context.Background(), tc.engine, nil, txn.ID, &entry); err != nil { 5186 t.Fatal(err) 5187 } 5188 5189 rec := &SpanSetReplicaEvalContext{tc.repl, allSpans} 5190 pErr := checkIfTxnAborted(context.Background(), rec, tc.engine, txn) 5191 if _, ok := pErr.GetDetail().(*roachpb.TransactionAbortedError); ok { 5192 expected := txn.Clone() 5193 expected.WriteTimestamp = txn.WriteTimestamp 5194 expected.Priority = priority 5195 expected.Status = roachpb.ABORTED 5196 if pErr.GetTxn() == nil || !reflect.DeepEqual(pErr.GetTxn(), expected) { 5197 t.Errorf("txn does not match: %s vs. %s", pErr.GetTxn(), expected) 5198 } 5199 } else { 5200 t.Errorf("unexpected error: %s", pErr) 5201 } 5202 } 5203 5204 // TestPushTxnBadKey verifies that args.Key equals args.PusheeTxn.ID. 5205 func TestPushTxnBadKey(t *testing.T) { 5206 defer leaktest.AfterTest(t)() 5207 tc := testContext{} 5208 stopper := stop.NewStopper() 5209 defer stopper.Stop(context.Background()) 5210 tc.Start(t, stopper) 5211 5212 pusher := newTransaction("test", roachpb.Key("a"), 1, tc.Clock()) 5213 pushee := newTransaction("test", roachpb.Key("b"), 1, tc.Clock()) 5214 5215 args := pushTxnArgs(pusher, pushee, roachpb.PUSH_ABORT) 5216 args.Key = pusher.Key 5217 5218 if _, pErr := tc.SendWrapped(&args); !testutils.IsPError(pErr, ".*should match pushee.*") { 5219 t.Errorf("unexpected error %s", pErr) 5220 } 5221 } 5222 5223 // TestPushTxnAlreadyCommittedOrAborted verifies success 5224 // (noop) in event that pushee is already committed or aborted. 5225 func TestPushTxnAlreadyCommittedOrAborted(t *testing.T) { 5226 defer leaktest.AfterTest(t)() 5227 5228 // This test simulates running into an open intent and resolving it using 5229 // the transaction record. We test this in two ways: 5230 // 1. The first prevents the transaction record from being GCed by the 5231 // EndTxn request. The effect of this is that the pusher finds the 5232 // transaction record in a finalized status and returns it directly. 5233 // 2. The second allows the transaction record to be GCed by the EndTxn 5234 // request. The effect of this is that the pusher finds no transaction 5235 // record but discovers that the transaction has already been finalized 5236 // using the timestamp cache. It doesn't know whether the transaction 5237 // was COMMITTED or ABORTED, so it is forced to be conservative and return 5238 // an ABORTED transaction. 5239 testutils.RunTrueAndFalse(t, "auto-gc", func(t *testing.T, autoGC bool) { 5240 defer setTxnAutoGC(autoGC)() 5241 5242 // Test for COMMITTED and ABORTED transactions. 5243 testutils.RunTrueAndFalse(t, "commit", func(t *testing.T, commit bool) { 5244 tc := testContext{} 5245 stopper := stop.NewStopper() 5246 defer stopper.Stop(context.Background()) 5247 tc.Start(t, stopper) 5248 5249 key := roachpb.Key(fmt.Sprintf("key-%t-%t", autoGC, commit)) 5250 pusher := newTransaction("test", key, 1, tc.Clock()) 5251 pushee := newTransaction("test", key, 1, tc.Clock()) 5252 5253 // Begin the pushee's transaction. 5254 put := putArgs(key, key) 5255 assignSeqNumsForReqs(pushee, &put) 5256 if _, pErr := kv.SendWrappedWith(context.Background(), tc.Sender(), roachpb.Header{Txn: pushee}, &put); pErr != nil { 5257 t.Fatal(pErr) 5258 } 5259 // End the pushee's transaction. 5260 etArgs, h := endTxnArgs(pushee, commit) 5261 assignSeqNumsForReqs(pushee, &etArgs) 5262 if _, pErr := tc.SendWrappedWith(h, &etArgs); pErr != nil { 5263 t.Fatal(pErr) 5264 } 5265 5266 // Now try to push what's already committed or aborted. 5267 args := pushTxnArgs(pusher, pushee, roachpb.PUSH_ABORT) 5268 resp, pErr := tc.SendWrapped(&args) 5269 if pErr != nil { 5270 t.Fatal(pErr) 5271 } 5272 reply := resp.(*roachpb.PushTxnResponse) 5273 5274 // We expect the push to return an ABORTED transaction record for all 5275 // cases except when the transaction is COMMITTED and its record is not 5276 // GCed. The case where it is COMMITTED and its record is GCed can be 5277 // surprising, but doesn't result in problems because a transaction must 5278 // resolve all of its intents before garbage collecting its record, so 5279 // the pusher won't end up removing a still-pending intent for a 5280 // COMMITTED transaction. 5281 expStatus := roachpb.ABORTED 5282 if commit && !autoGC { 5283 expStatus = roachpb.COMMITTED 5284 } 5285 if reply.PusheeTxn.Status != expStatus { 5286 t.Errorf("expected push txn to return with status == %s; got %+v", expStatus, reply.PusheeTxn) 5287 } 5288 }) 5289 }) 5290 } 5291 5292 // TestPushTxnUpgradeExistingTxn verifies that pushing a transaction record 5293 // with a new timestamp upgrades the pushee's timestamp if greater. In all 5294 // test cases, the priorities are set such that the push will succeed. 5295 func TestPushTxnUpgradeExistingTxn(t *testing.T) { 5296 defer leaktest.AfterTest(t)() 5297 tc := testContext{} 5298 stopper := stop.NewStopper() 5299 defer stopper.Stop(context.Background()) 5300 tc.Start(t, stopper) 5301 5302 now := tc.Clock().Now() 5303 ts1 := now.Add(1, 0) 5304 ts2 := now.Add(2, 0) 5305 testCases := []struct { 5306 startTS, ts, expTS hlc.Timestamp 5307 }{ 5308 // Noop. 5309 {ts1, ts1, ts1}, 5310 // Move timestamp forward. 5311 {ts1, ts2, ts2}, 5312 // Move timestamp backwards (has no effect). 5313 {ts2, ts1, ts2}, 5314 } 5315 5316 for i, test := range testCases { 5317 key := roachpb.Key(fmt.Sprintf("key-%d", i)) 5318 pusher := newTransaction("test", key, 1, tc.Clock()) 5319 pushee := newTransaction("test", key, 1, tc.Clock()) 5320 pushee.Epoch = 12345 5321 pusher.Priority = enginepb.MaxTxnPriority // Pusher will win 5322 5323 // First, establish "start" of existing pushee's txn via HeartbeatTxn. 5324 pushee.WriteTimestamp = test.startTS 5325 pushee.LastHeartbeat = test.startTS 5326 pushee.ReadTimestamp = test.startTS 5327 hb, hbH := heartbeatArgs(pushee, pushee.WriteTimestamp) 5328 if _, pErr := kv.SendWrappedWith(context.Background(), tc.Sender(), hbH, &hb); pErr != nil { 5329 t.Fatal(pErr) 5330 } 5331 5332 // Now, attempt to push the transaction using updated timestamp. 5333 pushee.WriteTimestamp = test.ts 5334 args := pushTxnArgs(pusher, pushee, roachpb.PUSH_ABORT) 5335 5336 // Set header timestamp to the maximum of the pusher and pushee timestamps. 5337 h := roachpb.Header{Timestamp: args.PushTo} 5338 h.Timestamp.Forward(pushee.WriteTimestamp) 5339 resp, pErr := tc.SendWrappedWith(h, &args) 5340 if pErr != nil { 5341 t.Fatal(pErr) 5342 } 5343 reply := resp.(*roachpb.PushTxnResponse) 5344 expTxnRecord := pushee.AsRecord() 5345 expTxn := expTxnRecord.AsTransaction() 5346 expTxn.Priority = enginepb.MaxTxnPriority - 1 5347 expTxn.Epoch = pushee.Epoch // no change 5348 expTxn.WriteTimestamp = test.expTS 5349 expTxn.Status = roachpb.ABORTED 5350 5351 if !reflect.DeepEqual(expTxn, reply.PusheeTxn) { 5352 t.Fatalf("unexpected push txn in trial %d: %s", i, pretty.Diff(expTxn, reply.PusheeTxn)) 5353 } 5354 } 5355 } 5356 5357 // TestPushTxnQueryPusheerHasNewerVersion verifies that PushTxn 5358 // uses the newer version of the pushee in a push request. 5359 func TestPushTxnQueryPusheeHasNewerVersion(t *testing.T) { 5360 defer leaktest.AfterTest(t)() 5361 tc := testContext{} 5362 stopper := stop.NewStopper() 5363 defer stopper.Stop(context.Background()) 5364 cfg := TestStoreConfig(nil) 5365 cfg.TestingKnobs.DontRetryPushTxnFailures = true 5366 tc.StartWithStoreConfig(t, stopper, cfg) 5367 5368 key := roachpb.Key("key") 5369 pushee := newTransaction("test", key, 1, tc.Clock()) 5370 pushee.Priority = 1 5371 pushee.Epoch = 12345 5372 pushee.Sequence = 2 5373 ts := tc.Clock().Now() 5374 pushee.WriteTimestamp = ts 5375 pushee.LastHeartbeat = ts 5376 5377 pusher := newTransaction("test", key, 1, tc.Clock()) 5378 pusher.Priority = 2 5379 5380 put := putArgs(key, key) 5381 assignSeqNumsForReqs(pushee, &put) 5382 if _, pErr := kv.SendWrappedWith(context.Background(), tc.Sender(), roachpb.Header{Txn: pushee}, &put); pErr != nil { 5383 t.Fatal(pErr) 5384 } 5385 5386 // Make sure the pushee in the request has updated information on the pushee. 5387 // Since the pushee has higher priority than the pusher, the push should fail. 5388 pushee.Priority = 4 5389 args := pushTxnArgs(pusher, pushee, roachpb.PUSH_ABORT) 5390 5391 _, pErr := tc.SendWrapped(&args) 5392 if pErr == nil { 5393 t.Fatalf("unexpected push success") 5394 } 5395 if _, ok := pErr.GetDetail().(*roachpb.TransactionPushError); !ok { 5396 t.Errorf("expected txn push error: %s", pErr) 5397 } 5398 } 5399 5400 // TestPushTxnHeartbeatTimeout verifies that a txn which hasn't been 5401 // heartbeat within its transaction liveness threshold can be pushed/aborted. 5402 func TestPushTxnHeartbeatTimeout(t *testing.T) { 5403 defer leaktest.AfterTest(t)() 5404 tc := testContext{manualClock: hlc.NewManualClock(123)} 5405 stopper := stop.NewStopper() 5406 defer stopper.Stop(context.Background()) 5407 cfg := TestStoreConfig(hlc.NewClock(tc.manualClock.UnixNano, time.Nanosecond)) 5408 cfg.TestingKnobs.DontRetryPushTxnFailures = true 5409 cfg.TestingKnobs.DontRecoverIndeterminateCommits = true 5410 tc.StartWithStoreConfig(t, stopper, cfg) 5411 5412 const noError = "" 5413 const txnPushError = "failed to push" 5414 const indetCommitError = "txn in indeterminate STAGING state" 5415 5416 m := int64(txnwait.TxnLivenessHeartbeatMultiplier) 5417 ns := base.DefaultTxnHeartbeatInterval.Nanoseconds() 5418 testCases := []struct { 5419 status roachpb.TransactionStatus // -1 for no record 5420 heartbeatOffset int64 // nanoseconds from original timestamp, 0 for no heartbeat 5421 timeOffset int64 // nanoseconds from original timestamp 5422 pushType roachpb.PushTxnType 5423 expErr string 5424 }{ 5425 // Avoid using offsets that result in outcomes that depend on logical 5426 // ticks. 5427 {roachpb.PENDING, 0, 1, roachpb.PUSH_TIMESTAMP, txnPushError}, 5428 {roachpb.PENDING, 0, 1, roachpb.PUSH_ABORT, txnPushError}, 5429 {roachpb.PENDING, 0, 1, roachpb.PUSH_TOUCH, txnPushError}, 5430 {roachpb.PENDING, 0, ns, roachpb.PUSH_TIMESTAMP, txnPushError}, 5431 {roachpb.PENDING, 0, ns, roachpb.PUSH_ABORT, txnPushError}, 5432 {roachpb.PENDING, 0, ns, roachpb.PUSH_TOUCH, txnPushError}, 5433 {roachpb.PENDING, 0, m*ns - 1, roachpb.PUSH_TIMESTAMP, txnPushError}, 5434 {roachpb.PENDING, 0, m*ns - 1, roachpb.PUSH_ABORT, txnPushError}, 5435 {roachpb.PENDING, 0, m*ns - 1, roachpb.PUSH_TOUCH, txnPushError}, 5436 {roachpb.PENDING, 0, m*ns + 1, roachpb.PUSH_TIMESTAMP, noError}, 5437 {roachpb.PENDING, 0, m*ns + 1, roachpb.PUSH_ABORT, noError}, 5438 {roachpb.PENDING, 0, m*ns + 1, roachpb.PUSH_TOUCH, noError}, 5439 {roachpb.PENDING, ns, m*ns + 1, roachpb.PUSH_TIMESTAMP, txnPushError}, 5440 {roachpb.PENDING, ns, m*ns + 1, roachpb.PUSH_ABORT, txnPushError}, 5441 {roachpb.PENDING, ns, m*ns + 1, roachpb.PUSH_TOUCH, txnPushError}, 5442 {roachpb.PENDING, ns, (m+1)*ns - 1, roachpb.PUSH_TIMESTAMP, txnPushError}, 5443 {roachpb.PENDING, ns, (m+1)*ns - 1, roachpb.PUSH_ABORT, txnPushError}, 5444 {roachpb.PENDING, ns, (m+1)*ns - 1, roachpb.PUSH_TOUCH, txnPushError}, 5445 {roachpb.PENDING, ns, (m+1)*ns + 1, roachpb.PUSH_TIMESTAMP, noError}, 5446 {roachpb.PENDING, ns, (m+1)*ns + 1, roachpb.PUSH_ABORT, noError}, 5447 {roachpb.PENDING, ns, (m+1)*ns + 1, roachpb.PUSH_TOUCH, noError}, 5448 // If the transaction record is STAGING then any case that previously 5449 // returned a TransactionPushError will continue to return that error, 5450 // but any case that previously succeeded in pushing the transaction 5451 // will now return an IndeterminateCommitError. 5452 {roachpb.STAGING, 0, 1, roachpb.PUSH_TIMESTAMP, txnPushError}, 5453 {roachpb.STAGING, 0, 1, roachpb.PUSH_ABORT, txnPushError}, 5454 {roachpb.STAGING, 0, 1, roachpb.PUSH_TOUCH, txnPushError}, 5455 {roachpb.STAGING, 0, ns, roachpb.PUSH_TIMESTAMP, txnPushError}, 5456 {roachpb.STAGING, 0, ns, roachpb.PUSH_ABORT, txnPushError}, 5457 {roachpb.STAGING, 0, ns, roachpb.PUSH_TOUCH, txnPushError}, 5458 {roachpb.STAGING, 0, m*ns - 1, roachpb.PUSH_TIMESTAMP, txnPushError}, 5459 {roachpb.STAGING, 0, m*ns - 1, roachpb.PUSH_ABORT, txnPushError}, 5460 {roachpb.STAGING, 0, m*ns - 1, roachpb.PUSH_TOUCH, txnPushError}, 5461 {roachpb.STAGING, 0, m*ns + 1, roachpb.PUSH_TIMESTAMP, indetCommitError}, 5462 {roachpb.STAGING, 0, m*ns + 1, roachpb.PUSH_ABORT, indetCommitError}, 5463 {roachpb.STAGING, 0, m*ns + 1, roachpb.PUSH_TOUCH, indetCommitError}, 5464 {roachpb.STAGING, ns, m*ns + 1, roachpb.PUSH_TIMESTAMP, txnPushError}, 5465 {roachpb.STAGING, ns, m*ns + 1, roachpb.PUSH_ABORT, txnPushError}, 5466 {roachpb.STAGING, ns, m*ns + 1, roachpb.PUSH_TOUCH, txnPushError}, 5467 {roachpb.STAGING, ns, (m+1)*ns - 1, roachpb.PUSH_TIMESTAMP, txnPushError}, 5468 {roachpb.STAGING, ns, (m+1)*ns - 1, roachpb.PUSH_ABORT, txnPushError}, 5469 {roachpb.STAGING, ns, (m+1)*ns - 1, roachpb.PUSH_TOUCH, txnPushError}, 5470 {roachpb.STAGING, ns, (m+1)*ns + 1, roachpb.PUSH_TIMESTAMP, indetCommitError}, 5471 {roachpb.STAGING, ns, (m+1)*ns + 1, roachpb.PUSH_ABORT, indetCommitError}, 5472 {roachpb.STAGING, ns, (m+1)*ns + 1, roachpb.PUSH_TOUCH, indetCommitError}, 5473 // Even when a transaction record doesn't exist, if the timestamp 5474 // from the PushTxn request indicates sufficiently recent client 5475 // activity, the push will fail. 5476 {-1, 0, 1, roachpb.PUSH_TIMESTAMP, txnPushError}, 5477 {-1, 0, 1, roachpb.PUSH_ABORT, txnPushError}, 5478 {-1, 0, 1, roachpb.PUSH_TOUCH, txnPushError}, 5479 {-1, 0, ns, roachpb.PUSH_TIMESTAMP, txnPushError}, 5480 {-1, 0, ns, roachpb.PUSH_ABORT, txnPushError}, 5481 {-1, 0, ns, roachpb.PUSH_TOUCH, txnPushError}, 5482 {-1, 0, m*ns - 1, roachpb.PUSH_TIMESTAMP, txnPushError}, 5483 {-1, 0, m*ns - 1, roachpb.PUSH_ABORT, txnPushError}, 5484 {-1, 0, m*ns - 1, roachpb.PUSH_TOUCH, txnPushError}, 5485 {-1, 0, m*ns + 1, roachpb.PUSH_TIMESTAMP, noError}, 5486 {-1, 0, m*ns + 1, roachpb.PUSH_ABORT, noError}, 5487 {-1, 0, m*ns + 1, roachpb.PUSH_TOUCH, noError}, 5488 } 5489 5490 for i, test := range testCases { 5491 key := roachpb.Key(fmt.Sprintf("key-%d", i)) 5492 pushee := newTransaction(fmt.Sprintf("test-%d", i), key, 1, tc.Clock()) 5493 pusher := newTransaction("pusher", key, 1, tc.Clock()) 5494 5495 // Add the pushee's heartbeat offset. 5496 if test.heartbeatOffset != 0 { 5497 if test.status == -1 { 5498 t.Fatal("cannot heartbeat transaction record if it doesn't exist") 5499 } 5500 pushee.LastHeartbeat = pushee.ReadTimestamp.Add(test.heartbeatOffset, 0) 5501 } 5502 5503 switch test.status { 5504 case -1: 5505 // Do nothing. 5506 case roachpb.PENDING: 5507 // Establish "start" of existing pushee's txn via HeartbeatTxn request 5508 // if the test case wants an existing transaction record. 5509 hb, hbH := heartbeatArgs(pushee, pushee.WriteTimestamp) 5510 if _, pErr := kv.SendWrappedWith(context.Background(), tc.Sender(), hbH, &hb); pErr != nil { 5511 t.Fatalf("%d: %s", i, pErr) 5512 } 5513 case roachpb.STAGING: 5514 et, etH := endTxnArgs(pushee, true) 5515 et.InFlightWrites = []roachpb.SequencedWrite{ 5516 {Key: key, Sequence: 1}, 5517 } 5518 if _, pErr := kv.SendWrappedWith(context.Background(), tc.Sender(), etH, &et); pErr != nil { 5519 t.Fatalf("%d: %s", i, pErr) 5520 } 5521 default: 5522 t.Fatalf("unexpected status: %v", test.status) 5523 } 5524 5525 // Now, attempt to push the transaction. 5526 args := pushTxnArgs(pusher, pushee, test.pushType) 5527 args.PushTo = pushee.ReadTimestamp.Add(0, 1) 5528 h := roachpb.Header{Timestamp: args.PushTo} 5529 5530 // Set the manual clock to the txn start time + offset. This is the time 5531 // source used to detect transaction expiration. We make sure to set it 5532 // above h.Timestamp to avoid it being updated by the request. 5533 now := pushee.ReadTimestamp.Add(test.timeOffset, 0) 5534 tc.manualClock.Set(now.WallTime) 5535 5536 reply, pErr := tc.SendWrappedWith(h, &args) 5537 if !testutils.IsPError(pErr, test.expErr) { 5538 t.Fatalf("%d: expected error %q; got %v, args=%+v, reply=%+v", i, test.expErr, pErr, args, reply) 5539 } 5540 if reply != nil { 5541 if txn := reply.(*roachpb.PushTxnResponse).PusheeTxn; txn.Status != roachpb.ABORTED { 5542 t.Errorf("%d: expected aborted transaction, got %s", i, txn) 5543 } 5544 } 5545 } 5546 } 5547 5548 // TestResolveIntentPushTxnReplyTxn makes sure that no Txn is returned from 5549 // PushTxn and that it and ResolveIntent{,Range} can not be carried out in a 5550 // transaction. 5551 func TestResolveIntentPushTxnReplyTxn(t *testing.T) { 5552 defer leaktest.AfterTest(t)() 5553 tc := testContext{} 5554 stopper := stop.NewStopper() 5555 defer stopper.Stop(context.Background()) 5556 tc.Start(t, stopper) 5557 5558 b := tc.engine.NewBatch() 5559 defer b.Close() 5560 5561 txn := newTransaction("test", roachpb.Key("test"), 1, tc.Clock()) 5562 txnPushee := txn.Clone() 5563 pa := pushTxnArgs(txn, txnPushee, roachpb.PUSH_ABORT) 5564 pa.Force = true 5565 var ms enginepb.MVCCStats 5566 var ra roachpb.ResolveIntentRequest 5567 var rra roachpb.ResolveIntentRangeRequest 5568 5569 ctx := context.Background() 5570 h := roachpb.Header{Txn: txn, Timestamp: tc.Clock().Now()} 5571 // Should not be able to push or resolve in a transaction. 5572 if _, err := batcheval.PushTxn(ctx, b, batcheval.CommandArgs{Stats: &ms, Header: h, Args: &pa}, &roachpb.PushTxnResponse{}); !testutils.IsError(err, batcheval.ErrTransactionUnsupported.Error()) { 5573 t.Fatalf("transactional PushTxn returned unexpected error: %+v", err) 5574 } 5575 if _, err := batcheval.ResolveIntent(ctx, b, batcheval.CommandArgs{Stats: &ms, Header: h, Args: &ra}, &roachpb.ResolveIntentResponse{}); !testutils.IsError(err, batcheval.ErrTransactionUnsupported.Error()) { 5576 t.Fatalf("transactional ResolveIntent returned unexpected error: %+v", err) 5577 } 5578 if _, err := batcheval.ResolveIntentRange(ctx, b, batcheval.CommandArgs{Stats: &ms, Header: h, Args: &rra}, &roachpb.ResolveIntentRangeResponse{}); !testutils.IsError(err, batcheval.ErrTransactionUnsupported.Error()) { 5579 t.Fatalf("transactional ResolveIntentRange returned unexpected error: %+v", err) 5580 } 5581 5582 // Should not get a transaction back from PushTxn. It used to erroneously 5583 // return args.PusherTxn. 5584 h = roachpb.Header{Timestamp: tc.Clock().Now()} 5585 var reply roachpb.PushTxnResponse 5586 if _, err := batcheval.PushTxn(ctx, b, batcheval.CommandArgs{EvalCtx: tc.repl, Stats: &ms, Header: h, Args: &pa}, &reply); err != nil { 5587 t.Fatal(err) 5588 } else if reply.Txn != nil { 5589 t.Fatalf("expected nil response txn, but got %s", reply.Txn) 5590 } 5591 } 5592 5593 // TestPushTxnPriorities verifies that txns with lower 5594 // priority are pushed; if priorities are equal, then the txns 5595 // are ordered by txn timestamp, with the more recent timestamp 5596 // being pushable. 5597 // TODO(tschottdorf): we should have a randomized version of this test which 5598 // also simulates the client proto and persisted record diverging. For example, 5599 // clients may be using a higher timestamp for their push or the persisted 5600 // record (which they are not using) might have a higher timestamp, and even 5601 // in the presence of such skewed information, conflicts between two (or more) 5602 // conflicting transactions must not deadlock (see #5685 for an example of this 5603 // happening with older code). 5604 func TestPushTxnPriorities(t *testing.T) { 5605 defer leaktest.AfterTest(t)() 5606 tc := testContext{} 5607 stopper := stop.NewStopper() 5608 defer stopper.Stop(context.Background()) 5609 cfg := TestStoreConfig(nil) 5610 cfg.TestingKnobs.DontRetryPushTxnFailures = true 5611 tc.StartWithStoreConfig(t, stopper, cfg) 5612 5613 now := tc.Clock().Now() 5614 ts1 := now.Add(1, 0) 5615 ts2 := now.Add(2, 0) 5616 testCases := []struct { 5617 pusherPriority, pusheePriority enginepb.TxnPriority 5618 pusherTS, pusheeTS hlc.Timestamp 5619 pushType roachpb.PushTxnType 5620 expSuccess bool 5621 }{ 5622 // Pusher with higher priority succeeds. 5623 {enginepb.MaxTxnPriority, enginepb.MinTxnPriority, ts1, ts1, roachpb.PUSH_TIMESTAMP, true}, 5624 {enginepb.MaxTxnPriority, enginepb.MinTxnPriority, ts1, ts1, roachpb.PUSH_ABORT, true}, 5625 // Pusher with lower priority fails. 5626 {enginepb.MinTxnPriority, enginepb.MaxTxnPriority, ts1, ts1, roachpb.PUSH_ABORT, false}, 5627 {enginepb.MinTxnPriority, enginepb.MaxTxnPriority, ts1, ts1, roachpb.PUSH_TIMESTAMP, false}, 5628 // Pusher with lower priority fails, even with older txn timestamp. 5629 {enginepb.MinTxnPriority, enginepb.MaxTxnPriority, ts1, ts2, roachpb.PUSH_ABORT, false}, 5630 // Pusher has lower priority, but older txn timestamp allows success if 5631 // !abort since there's nothing to do. 5632 {enginepb.MinTxnPriority, enginepb.MaxTxnPriority, ts1, ts2, roachpb.PUSH_TIMESTAMP, true}, 5633 // When touching, priority never wins. 5634 {enginepb.MaxTxnPriority, enginepb.MinTxnPriority, ts1, ts1, roachpb.PUSH_TOUCH, false}, 5635 {enginepb.MinTxnPriority, enginepb.MaxTxnPriority, ts1, ts1, roachpb.PUSH_TOUCH, false}, 5636 } 5637 5638 for i, test := range testCases { 5639 key := roachpb.Key(fmt.Sprintf("key-%d", i)) 5640 pusher := newTransaction("test", key, 1, tc.Clock()) 5641 pushee := newTransaction("test", key, 1, tc.Clock()) 5642 pusher.Priority = test.pusherPriority 5643 pushee.Priority = test.pusheePriority 5644 pusher.WriteTimestamp = test.pusherTS 5645 pushee.WriteTimestamp = test.pusheeTS 5646 // Make sure pusher ID is greater; if priorities and timestamps are the same, 5647 // the greater ID succeeds with push. 5648 if bytes.Compare(pusher.ID.GetBytes(), pushee.ID.GetBytes()) < 0 { 5649 pusher.ID, pushee.ID = pushee.ID, pusher.ID 5650 } 5651 5652 put := putArgs(key, key) 5653 assignSeqNumsForReqs(pushee, &put) 5654 if _, pErr := kv.SendWrappedWith(context.Background(), tc.Sender(), roachpb.Header{Txn: pushee}, &put); pErr != nil { 5655 t.Fatal(pErr) 5656 } 5657 // Now, attempt to push the transaction with intent epoch set appropriately. 5658 args := pushTxnArgs(pusher, pushee, test.pushType) 5659 5660 // Set header timestamp to the maximum of the pusher and pushee timestamps. 5661 h := roachpb.Header{Timestamp: args.PushTo} 5662 h.Timestamp.Forward(pushee.WriteTimestamp) 5663 _, pErr := tc.SendWrappedWith(h, &args) 5664 5665 if test.expSuccess != (pErr == nil) { 5666 t.Errorf("expected success on trial %d? %t; got err %s", i, test.expSuccess, pErr) 5667 } 5668 if pErr != nil { 5669 if _, ok := pErr.GetDetail().(*roachpb.TransactionPushError); !ok { 5670 t.Errorf("expected txn push error: %s", pErr) 5671 } 5672 } 5673 } 5674 } 5675 5676 // TestPushTxnPushTimestamp verifies that with PUSH_TIMESTAMP pushes (i.e. for 5677 // read/write conflict), the pushed txn keeps status PENDING, but has its txn 5678 // Timestamp moved forward to the pusher's txn Timestamp + 1. 5679 func TestPushTxnPushTimestamp(t *testing.T) { 5680 defer leaktest.AfterTest(t)() 5681 tc := testContext{} 5682 stopper := stop.NewStopper() 5683 defer stopper.Stop(context.Background()) 5684 tc.Start(t, stopper) 5685 5686 pusher := newTransaction("test", roachpb.Key("a"), 1, tc.Clock()) 5687 pushee := newTransaction("test", roachpb.Key("b"), 1, tc.Clock()) 5688 pusher.Priority = enginepb.MaxTxnPriority 5689 pushee.Priority = enginepb.MinTxnPriority // pusher will win 5690 now := tc.Clock().Now() 5691 pusher.WriteTimestamp = now.Add(50, 25) 5692 pushee.WriteTimestamp = now.Add(5, 1) 5693 5694 key := roachpb.Key("a") 5695 put := putArgs(key, key) 5696 assignSeqNumsForReqs(pushee, &put) 5697 if _, pErr := kv.SendWrappedWith(context.Background(), tc.Sender(), roachpb.Header{Txn: pushee}, &put); pErr != nil { 5698 t.Fatal(pErr) 5699 } 5700 5701 // Now, push the transaction using a PUSH_TIMESTAMP push request. 5702 args := pushTxnArgs(pusher, pushee, roachpb.PUSH_TIMESTAMP) 5703 5704 resp, pErr := tc.SendWrappedWith(roachpb.Header{Timestamp: args.PushTo}, &args) 5705 if pErr != nil { 5706 t.Fatalf("unexpected error on push: %s", pErr) 5707 } 5708 expTS := pusher.WriteTimestamp 5709 expTS.Logical++ 5710 reply := resp.(*roachpb.PushTxnResponse) 5711 if reply.PusheeTxn.WriteTimestamp != expTS { 5712 t.Errorf("expected timestamp to be pushed to %+v; got %+v", expTS, reply.PusheeTxn.WriteTimestamp) 5713 } 5714 if reply.PusheeTxn.Status != roachpb.PENDING { 5715 t.Errorf("expected pushed txn to have status PENDING; got %s", reply.PusheeTxn.Status) 5716 } 5717 } 5718 5719 // TestPushTxnPushTimestampAlreadyPushed verifies that pushing 5720 // a timestamp forward which is already far enough forward is a simple 5721 // noop. We do this by ensuring that priorities would otherwise make 5722 // pushing impossible. 5723 func TestPushTxnPushTimestampAlreadyPushed(t *testing.T) { 5724 defer leaktest.AfterTest(t)() 5725 tc := testContext{} 5726 stopper := stop.NewStopper() 5727 defer stopper.Stop(context.Background()) 5728 tc.Start(t, stopper) 5729 5730 pusher := newTransaction("test", roachpb.Key("a"), 1, tc.Clock()) 5731 pushee := newTransaction("test", roachpb.Key("b"), 1, tc.Clock()) 5732 now := tc.Clock().Now() 5733 pusher.WriteTimestamp = now.Add(50, 0) 5734 pushee.WriteTimestamp = now.Add(50, 1) 5735 5736 key := roachpb.Key("a") 5737 put := putArgs(key, key) 5738 assignSeqNumsForReqs(pushee, &put) 5739 if _, pErr := kv.SendWrappedWith(context.Background(), tc.Sender(), roachpb.Header{Txn: pushee}, &put); pErr != nil { 5740 t.Fatal(pErr) 5741 } 5742 5743 // Now, push the transaction using a PUSH_TIMESTAMP push request. 5744 args := pushTxnArgs(pusher, pushee, roachpb.PUSH_TIMESTAMP) 5745 5746 resp, pErr := tc.SendWrappedWith(roachpb.Header{Timestamp: args.PushTo}, &args) 5747 if pErr != nil { 5748 t.Fatalf("unexpected pError on push: %s", pErr) 5749 } 5750 reply := resp.(*roachpb.PushTxnResponse) 5751 if reply.PusheeTxn.WriteTimestamp != pushee.WriteTimestamp { 5752 t.Errorf("expected timestamp to be equal to original %+v; got %+v", pushee.WriteTimestamp, reply.PusheeTxn.WriteTimestamp) 5753 } 5754 if reply.PusheeTxn.Status != roachpb.PENDING { 5755 t.Errorf("expected pushed txn to have status PENDING; got %s", reply.PusheeTxn.Status) 5756 } 5757 } 5758 5759 // TestPushTxnSerializableRestart simulates a transaction which is 5760 // started at t=0, fails serializable commit due to a read at a key 5761 // being written at t=1, is then restarted at the updated timestamp, 5762 // but before the txn can be retried, it's pushed to t=2, an even 5763 // higher timestamp. The test verifies that the serializable commit 5764 // fails yet again, preventing regression of a bug in which we blindly 5765 // overwrote the transaction record on the second epoch. 5766 func TestPushTxnSerializableRestart(t *testing.T) { 5767 defer leaktest.AfterTest(t)() 5768 ctx := context.Background() 5769 tc := testContext{} 5770 stopper := stop.NewStopper() 5771 defer stopper.Stop(ctx) 5772 tc.Start(t, stopper) 5773 5774 key := roachpb.Key("a") 5775 pushee := newTransaction("test", key, 1, tc.Clock()) 5776 pusher := newTransaction("test", key, 1, tc.Clock()) 5777 pushee.Priority = enginepb.MinTxnPriority 5778 pusher.Priority = enginepb.MaxTxnPriority // pusher will win 5779 5780 // Read from the key to increment the timestamp cache. 5781 gArgs := getArgs(key) 5782 if _, pErr := tc.SendWrapped(&gArgs); pErr != nil { 5783 t.Fatal(pErr) 5784 } 5785 5786 // Write to a key. 5787 put := putArgs(key, []byte("foo")) 5788 assignSeqNumsForReqs(pushee, &put) 5789 resp, pErr := kv.SendWrappedWith(ctx, tc.Sender(), roachpb.Header{Txn: pushee}, &put) 5790 if pErr != nil { 5791 t.Fatal(pErr) 5792 } 5793 pushee.Update(resp.Header().Txn) 5794 5795 // Try to end the pushee's transaction; should get a retry failure. 5796 etArgs, h := endTxnArgs(pushee, true /* commit */) 5797 assignSeqNumsForReqs(pushee, &etArgs) 5798 _, pErr = tc.SendWrappedWith(h, &etArgs) 5799 if _, ok := pErr.GetDetail().(*roachpb.TransactionRetryError); !ok { 5800 t.Fatalf("expected retry error; got %s", pErr) 5801 } 5802 pusheeCopy := *pushee 5803 pushee.Restart(1, 1, pusher.WriteTimestamp) 5804 5805 // Next push pushee to advance timestamp of txn record. 5806 pusher.WriteTimestamp = tc.repl.store.Clock().Now() 5807 args := pushTxnArgs(pusher, &pusheeCopy, roachpb.PUSH_TIMESTAMP) 5808 if _, pErr := tc.SendWrapped(&args); pErr != nil { 5809 t.Fatal(pErr) 5810 } 5811 5812 // Try to end pushed transaction at restart timestamp, which is 5813 // earlier than its now-pushed timestamp. Should fail. 5814 var ba roachpb.BatchRequest 5815 ba.Add(&put) 5816 ba.Add(&etArgs) 5817 ba.Header.Txn = pushee 5818 assignSeqNumsForReqs(pushee, &put, &etArgs) 5819 _, pErr = tc.Sender().Send(ctx, ba) 5820 if _, ok := pErr.GetDetail().(*roachpb.TransactionRetryError); !ok { 5821 t.Fatalf("expected retry error; got %s", pErr) 5822 } 5823 // Verify that the returned transaction has timestamp equal to the pushed 5824 // timestamp. This verifies that the EndTxn found the pushed record and 5825 // propagated it. 5826 if txn := pErr.GetTxn(); txn.WriteTimestamp != pusher.WriteTimestamp.Next() { 5827 t.Errorf("expected retry error txn timestamp %s; got %s", pusher.WriteTimestamp, txn.WriteTimestamp) 5828 } 5829 } 5830 5831 // TestQueryIntentRequest tests QueryIntent requests in a number of scenarios, 5832 // both with and without the ErrorIfMissing option set to true. 5833 func TestQueryIntentRequest(t *testing.T) { 5834 defer leaktest.AfterTest(t)() 5835 5836 testutils.RunTrueAndFalse(t, "errIfMissing", func(t *testing.T, errIfMissing bool) { 5837 tc := testContext{} 5838 stopper := stop.NewStopper() 5839 defer stopper.Stop(context.Background()) 5840 tc.Start(t, stopper) 5841 5842 key1 := roachpb.Key("a") 5843 key2 := roachpb.Key("b") 5844 txn := newTransaction("test", key1, 1, tc.Clock()) 5845 txn2 := newTransaction("test2", key2, 1, tc.Clock()) 5846 5847 pArgs := putArgs(key1, []byte("value1")) 5848 assignSeqNumsForReqs(txn, &pArgs) 5849 if _, pErr := tc.SendWrappedWith(roachpb.Header{Txn: txn}, &pArgs); pErr != nil { 5850 t.Fatal(pErr) 5851 } 5852 5853 queryIntent := func( 5854 key []byte, 5855 txnMeta enginepb.TxnMeta, 5856 baTxn *roachpb.Transaction, 5857 expectIntent bool, 5858 ) { 5859 t.Helper() 5860 qiArgs := queryIntentArgs(key, txnMeta, errIfMissing) 5861 qiRes, pErr := tc.SendWrappedWith(roachpb.Header{Txn: baTxn}, &qiArgs) 5862 if errIfMissing && !expectIntent { 5863 ownIntent := baTxn != nil && baTxn.ID == txnMeta.ID 5864 if ownIntent && txnMeta.WriteTimestamp.Less(txn.WriteTimestamp) { 5865 if _, ok := pErr.GetDetail().(*roachpb.TransactionRetryError); !ok { 5866 t.Fatalf("expected TransactionRetryError, found %v %v", txnMeta, pErr) 5867 } 5868 } else { 5869 if _, ok := pErr.GetDetail().(*roachpb.IntentMissingError); !ok { 5870 t.Fatalf("expected IntentMissingError, found %v", pErr) 5871 } 5872 } 5873 } else { 5874 if pErr != nil { 5875 t.Fatal(pErr) 5876 } 5877 if e, a := expectIntent, qiRes.(*roachpb.QueryIntentResponse).FoundIntent; e != a { 5878 t.Fatalf("expected FoundIntent=%t but FoundIntent=%t", e, a) 5879 } 5880 } 5881 } 5882 5883 for i, baTxn := range []*roachpb.Transaction{nil, txn, txn2} { 5884 // Query the intent with the correct txn meta. Should see intent regardless 5885 // of whether we're inside the txn or not. 5886 queryIntent(key1, txn.TxnMeta, baTxn, true) 5887 5888 // Query an intent on a different key for the same transaction. Should not 5889 // see an intent. 5890 keyPrevent := roachpb.Key(fmt.Sprintf("%s-%t-%d", key2, errIfMissing, i)) 5891 queryIntent(keyPrevent, txn.TxnMeta, baTxn, false) 5892 5893 // Query an intent on the same key for a different transaction. Should not 5894 // see an intent. 5895 diffIDMeta := txn.TxnMeta 5896 diffIDMeta.ID = txn2.ID 5897 queryIntent(key1, diffIDMeta, baTxn, false) 5898 5899 // Query the intent with a larger epoch. Should not see an intent. 5900 largerEpochMeta := txn.TxnMeta 5901 largerEpochMeta.Epoch++ 5902 queryIntent(key1, largerEpochMeta, baTxn, false) 5903 5904 // Query the intent with a smaller epoch. Should not see an intent. 5905 smallerEpochMeta := txn.TxnMeta 5906 smallerEpochMeta.Epoch-- 5907 queryIntent(key1, smallerEpochMeta, baTxn, false) 5908 5909 // Query the intent with a larger timestamp. Should see an intent. 5910 // See the comment on QueryIntentRequest.Txn for an explanation of why 5911 // the request behaves like this. 5912 largerTSMeta := txn.TxnMeta 5913 largerTSMeta.WriteTimestamp = largerTSMeta.WriteTimestamp.Next() 5914 queryIntent(key1, largerTSMeta, baTxn, true) 5915 5916 // Query the intent with a smaller timestamp. Should not see an 5917 // intent unless we're querying our own intent, in which case 5918 // the smaller timestamp will be forwarded to the batch header 5919 // transaction's timestamp. 5920 smallerTSMeta := txn.TxnMeta 5921 smallerTSMeta.WriteTimestamp = smallerTSMeta.WriteTimestamp.Prev() 5922 queryIntent(key1, smallerTSMeta, baTxn, baTxn == txn) 5923 5924 // Query the intent with a larger sequence number. Should not see an intent. 5925 largerSeqMeta := txn.TxnMeta 5926 largerSeqMeta.Sequence++ 5927 queryIntent(key1, largerSeqMeta, baTxn, false) 5928 5929 // Query the intent with a smaller sequence number. Should see an intent. 5930 // See the comment on QueryIntentRequest.Txn for an explanation of why 5931 // the request behaves like this. 5932 smallerSeqMeta := txn.TxnMeta 5933 smallerSeqMeta.Sequence-- 5934 queryIntent(key1, smallerSeqMeta, baTxn, true) 5935 5936 // Perform a write at keyPrevent. The associated intent at this key 5937 // was queried and found to be missing, so this write should be 5938 // prevented and pushed to a higher timestamp. 5939 txnCopy := *txn 5940 pArgs2 := putArgs(keyPrevent, []byte("value2")) 5941 assignSeqNumsForReqs(&txnCopy, &pArgs2) 5942 ba := roachpb.BatchRequest{} 5943 ba.Header = roachpb.Header{Txn: &txnCopy} 5944 ba.Add(&pArgs2) 5945 br, pErr := tc.Sender().Send(context.Background(), ba) 5946 if pErr != nil { 5947 t.Fatal(pErr) 5948 } 5949 if br.Txn.WriteTimestamp == br.Txn.ReadTimestamp { 5950 t.Fatalf("transaction timestamp not bumped: %v", br.Txn) 5951 } 5952 } 5953 }) 5954 } 5955 5956 // TestReplicaResolveIntentRange verifies resolving a range of intents. 5957 func TestReplicaResolveIntentRange(t *testing.T) { 5958 defer leaktest.AfterTest(t)() 5959 tc := testContext{} 5960 stopper := stop.NewStopper() 5961 defer stopper.Stop(context.Background()) 5962 tc.Start(t, stopper) 5963 5964 keys := []roachpb.Key{roachpb.Key("a"), roachpb.Key("b")} 5965 txn := newTransaction("test", keys[0], 1, tc.Clock()) 5966 5967 // Put two values transactionally. 5968 for _, key := range keys { 5969 pArgs := putArgs(key, []byte("value1")) 5970 assignSeqNumsForReqs(txn, &pArgs) 5971 if _, pErr := tc.SendWrappedWith(roachpb.Header{Txn: txn}, &pArgs); pErr != nil { 5972 t.Fatal(pErr) 5973 } 5974 } 5975 5976 // Resolve the intents. 5977 rArgs := &roachpb.ResolveIntentRangeRequest{ 5978 RequestHeader: roachpb.RequestHeader{ 5979 Key: roachpb.Key("a"), 5980 EndKey: roachpb.Key("c"), 5981 }, 5982 IntentTxn: txn.TxnMeta, 5983 Status: roachpb.COMMITTED, 5984 IgnoredSeqNums: txn.IgnoredSeqNums, 5985 } 5986 if _, pErr := tc.SendWrapped(rArgs); pErr != nil { 5987 t.Fatal(pErr) 5988 } 5989 5990 // Do a consistent scan to verify intents have been cleared. 5991 sArgs := scanArgs(roachpb.Key("a"), roachpb.Key("c")) 5992 reply, pErr := tc.SendWrapped(sArgs) 5993 if pErr != nil { 5994 t.Fatalf("unexpected error on scan: %s", pErr) 5995 } 5996 sReply := reply.(*roachpb.ScanResponse) 5997 if len(sReply.Rows) != 2 { 5998 t.Errorf("expected 2 rows; got %v", sReply.Rows) 5999 } 6000 } 6001 6002 func verifyRangeStats( 6003 reader storage.Reader, rangeID roachpb.RangeID, expMS enginepb.MVCCStats, 6004 ) error { 6005 ms, err := stateloader.Make(rangeID).LoadMVCCStats(context.Background(), reader) 6006 if err != nil { 6007 return err 6008 } 6009 if ms != expMS { 6010 return errors.Errorf("expected and actual stats differ:\n%s", pretty.Diff(expMS, ms)) 6011 } 6012 return nil 6013 } 6014 6015 // TestRangeStatsComputation verifies that commands executed against a 6016 // range update the range stat counters. The stat values are 6017 // empirically derived; we're really just testing that they increment 6018 // in the right ways, not the exact amounts. If the encodings change, 6019 // will need to update this test. 6020 func TestRangeStatsComputation(t *testing.T) { 6021 defer leaktest.AfterTest(t)() 6022 tc := testContext{ 6023 bootstrapMode: bootstrapRangeOnly, 6024 } 6025 stopper := stop.NewStopper() 6026 defer stopper.Stop(context.Background()) 6027 tc.Start(t, stopper) 6028 6029 baseStats := initialStats() 6030 // The initial stats contain no lease, but there will be an initial 6031 // nontrivial lease requested with the first write below. 6032 baseStats.Add(enginepb.MVCCStats{ 6033 SysBytes: 24, 6034 }) 6035 6036 // Our clock might not be set to zero. 6037 baseStats.LastUpdateNanos = tc.manualClock.UnixNano() 6038 6039 // Put a value. 6040 pArgs := putArgs([]byte("a"), []byte("value1")) 6041 6042 if _, pErr := tc.SendWrapped(&pArgs); pErr != nil { 6043 t.Fatal(pErr) 6044 } 6045 expMS := baseStats 6046 expMS.Add(enginepb.MVCCStats{ 6047 LiveBytes: 25, 6048 KeyBytes: 14, 6049 ValBytes: 11, 6050 LiveCount: 1, 6051 KeyCount: 1, 6052 ValCount: 1, 6053 }) 6054 6055 if err := verifyRangeStats(tc.engine, tc.repl.RangeID, expMS); err != nil { 6056 t.Fatal(err) 6057 } 6058 6059 // Put a 2nd value transactionally. 6060 pArgs = putArgs([]byte("b"), []byte("value2")) 6061 6062 // Consistent UUID needed for a deterministic SysBytes value. This is because 6063 // a random UUID could have a 0x00 byte that would be escaped by the encoding, 6064 // increasing the encoded size and throwing off statistics verification. 6065 uuid, err := uuid.FromString("ea5b9590-a157-421b-8b93-a4caa2c41137") 6066 if err != nil { 6067 t.Fatal(err) 6068 } 6069 txn := newTransaction("test", pArgs.Key, 1, tc.Clock()) 6070 txn.Priority = 123 // So we don't have random values messing with the byte counts on encoding 6071 txn.ID = uuid 6072 6073 assignSeqNumsForReqs(txn, &pArgs) 6074 if _, pErr := tc.SendWrappedWith(roachpb.Header{Txn: txn}, &pArgs); pErr != nil { 6075 t.Fatal(pErr) 6076 } 6077 expMS = baseStats 6078 expMS.Add(enginepb.MVCCStats{ 6079 LiveBytes: 101, 6080 KeyBytes: 28, 6081 ValBytes: 73, 6082 IntentBytes: 23, 6083 LiveCount: 2, 6084 KeyCount: 2, 6085 ValCount: 2, 6086 IntentCount: 1, 6087 }) 6088 if err := verifyRangeStats(tc.engine, tc.repl.RangeID, expMS); err != nil { 6089 t.Fatal(err) 6090 } 6091 6092 // Resolve the 2nd value. 6093 rArgs := &roachpb.ResolveIntentRequest{ 6094 RequestHeader: roachpb.RequestHeader{ 6095 Key: pArgs.Key, 6096 }, 6097 IntentTxn: txn.TxnMeta, 6098 Status: roachpb.COMMITTED, 6099 IgnoredSeqNums: txn.IgnoredSeqNums, 6100 } 6101 6102 if _, pErr := tc.SendWrapped(rArgs); pErr != nil { 6103 t.Fatal(pErr) 6104 } 6105 expMS = baseStats 6106 expMS.Add(enginepb.MVCCStats{ 6107 LiveBytes: 50, 6108 KeyBytes: 28, 6109 ValBytes: 22, 6110 LiveCount: 2, 6111 KeyCount: 2, 6112 ValCount: 2, 6113 }) 6114 if err := verifyRangeStats(tc.engine, tc.repl.RangeID, expMS); err != nil { 6115 t.Fatal(err) 6116 } 6117 6118 // Delete the 1st value. 6119 dArgs := deleteArgs([]byte("a")) 6120 6121 if _, pErr := tc.SendWrapped(&dArgs); pErr != nil { 6122 t.Fatal(pErr) 6123 } 6124 expMS = baseStats 6125 expMS.Add(enginepb.MVCCStats{ 6126 LiveBytes: 25, 6127 KeyBytes: 40, 6128 ValBytes: 22, 6129 LiveCount: 1, 6130 KeyCount: 2, 6131 ValCount: 3, 6132 }) 6133 if err := verifyRangeStats(tc.engine, tc.repl.RangeID, expMS); err != nil { 6134 t.Fatal(err) 6135 } 6136 } 6137 6138 // TestMerge verifies that the Merge command is behaving as expected. Time 6139 // series data is used, as it is the only data type currently fully supported by 6140 // the merge command. 6141 func TestMerge(t *testing.T) { 6142 defer leaktest.AfterTest(t)() 6143 tc := testContext{} 6144 stopper := stop.NewStopper() 6145 defer stopper.Stop(context.Background()) 6146 tc.Start(t, stopper) 6147 6148 key := []byte("mergedkey") 6149 args := make([]roachpb.InternalTimeSeriesData, 3) 6150 expected := roachpb.InternalTimeSeriesData{ 6151 StartTimestampNanos: 0, 6152 SampleDurationNanos: 1000, 6153 Samples: make([]roachpb.InternalTimeSeriesSample, len(args)), 6154 } 6155 6156 for i := 0; i < len(args); i++ { 6157 sample := roachpb.InternalTimeSeriesSample{ 6158 Offset: int32(i), 6159 Count: 1, 6160 Sum: float64(i), 6161 } 6162 args[i] = roachpb.InternalTimeSeriesData{ 6163 StartTimestampNanos: expected.StartTimestampNanos, 6164 SampleDurationNanos: expected.SampleDurationNanos, 6165 Samples: []roachpb.InternalTimeSeriesSample{sample}, 6166 } 6167 expected.Samples[i] = sample 6168 } 6169 6170 for _, arg := range args { 6171 var v roachpb.Value 6172 if err := v.SetProto(&arg); err != nil { 6173 t.Fatal(err) 6174 } 6175 mergeArgs := internalMergeArgs(key, v) 6176 if _, pErr := tc.SendWrapped(&mergeArgs); pErr != nil { 6177 t.Fatalf("unexpected error from Merge: %s", pErr) 6178 } 6179 } 6180 6181 getArgs := getArgs(key) 6182 6183 reply, pErr := tc.SendWrapped(&getArgs) 6184 if pErr != nil { 6185 t.Fatalf("unexpected error from Get: %s", pErr) 6186 } 6187 resp := reply.(*roachpb.GetResponse) 6188 if resp.Value == nil { 6189 t.Fatal("GetResponse had nil value") 6190 } 6191 6192 var actual roachpb.InternalTimeSeriesData 6193 if err := resp.Value.GetProto(&actual); err != nil { 6194 t.Fatal(err) 6195 } 6196 if !proto.Equal(&actual, &expected) { 6197 t.Errorf("Get did not return expected value: %v != %v", actual, expected) 6198 } 6199 } 6200 6201 // TestConditionFailedError tests that a ConditionFailedError correctly 6202 // bubbles up from MVCC to Range. 6203 func TestConditionFailedError(t *testing.T) { 6204 defer leaktest.AfterTest(t)() 6205 tc := testContext{} 6206 stopper := stop.NewStopper() 6207 defer stopper.Stop(context.Background()) 6208 tc.Start(t, stopper) 6209 6210 key := []byte("k") 6211 value := []byte("quack") 6212 pArgs := putArgs(key, value) 6213 if _, pErr := tc.SendWrapped(&pArgs); pErr != nil { 6214 t.Fatal(pErr) 6215 } 6216 6217 cpArgs := cPutArgs(key, value, []byte("moo")) 6218 _, pErr := tc.SendWrappedWith(roachpb.Header{Timestamp: hlc.MinTimestamp}, &cpArgs) 6219 if cErr, ok := pErr.GetDetail().(*roachpb.ConditionFailedError); pErr == nil || !ok { 6220 t.Fatalf("expected ConditionFailedError, got %T with content %+v", pErr, pErr) 6221 } else if valueBytes, err := cErr.ActualValue.GetBytes(); err != nil { 6222 t.Fatal(err) 6223 } else if cErr.ActualValue == nil || !bytes.Equal(valueBytes, value) { 6224 t.Errorf("ConditionFailedError with bytes %q expected, but got %+v", value, cErr.ActualValue) 6225 } 6226 } 6227 6228 // TestReplicaSetsEqual tests to ensure that intersectReplicaSets 6229 // returns the correct responses. 6230 func TestReplicaSetsEqual(t *testing.T) { 6231 defer leaktest.AfterTest(t)() 6232 testData := []struct { 6233 expected bool 6234 a []roachpb.ReplicaDescriptor 6235 b []roachpb.ReplicaDescriptor 6236 }{ 6237 {true, []roachpb.ReplicaDescriptor{}, []roachpb.ReplicaDescriptor{}}, 6238 {true, createReplicaSets([]roachpb.StoreID{1}), createReplicaSets([]roachpb.StoreID{1})}, 6239 {true, createReplicaSets([]roachpb.StoreID{1, 2}), createReplicaSets([]roachpb.StoreID{1, 2})}, 6240 {true, createReplicaSets([]roachpb.StoreID{1, 2}), createReplicaSets([]roachpb.StoreID{2, 1})}, 6241 {false, createReplicaSets([]roachpb.StoreID{1}), createReplicaSets([]roachpb.StoreID{2})}, 6242 {false, createReplicaSets([]roachpb.StoreID{1, 2}), createReplicaSets([]roachpb.StoreID{2})}, 6243 {false, createReplicaSets([]roachpb.StoreID{1, 2}), createReplicaSets([]roachpb.StoreID{1})}, 6244 {false, createReplicaSets([]roachpb.StoreID{}), createReplicaSets([]roachpb.StoreID{1})}, 6245 {true, createReplicaSets([]roachpb.StoreID{1, 2, 3}), createReplicaSets([]roachpb.StoreID{2, 3, 1})}, 6246 {true, createReplicaSets([]roachpb.StoreID{1, 1}), createReplicaSets([]roachpb.StoreID{1, 1})}, 6247 {false, createReplicaSets([]roachpb.StoreID{1, 1}), createReplicaSets([]roachpb.StoreID{1, 1, 1})}, 6248 {true, createReplicaSets([]roachpb.StoreID{1, 2, 3, 1, 2, 3}), createReplicaSets([]roachpb.StoreID{1, 1, 2, 2, 3, 3})}, 6249 } 6250 for _, test := range testData { 6251 if replicaSetsEqual(test.a, test.b) != test.expected { 6252 t.Fatalf("unexpected replica intersection: %+v", test) 6253 } 6254 } 6255 } 6256 6257 func TestAppliedIndex(t *testing.T) { 6258 defer leaktest.AfterTest(t)() 6259 tc := testContext{} 6260 stopper := stop.NewStopper() 6261 defer stopper.Stop(context.Background()) 6262 tc.Start(t, stopper) 6263 6264 var appliedIndex uint64 6265 var sum int64 6266 for i := int64(1); i <= 10; i++ { 6267 args := incrementArgs([]byte("a"), i) 6268 6269 resp, pErr := tc.SendWrapped(args) 6270 if pErr != nil { 6271 t.Fatal(pErr) 6272 } 6273 reply := resp.(*roachpb.IncrementResponse) 6274 sum += i 6275 6276 if reply.NewValue != sum { 6277 t.Errorf("expected %d, got %d", sum, reply.NewValue) 6278 } 6279 6280 tc.repl.mu.Lock() 6281 newAppliedIndex := tc.repl.mu.state.RaftAppliedIndex 6282 tc.repl.mu.Unlock() 6283 if newAppliedIndex <= appliedIndex { 6284 t.Errorf("appliedIndex did not advance. Was %d, now %d", appliedIndex, newAppliedIndex) 6285 } 6286 appliedIndex = newAppliedIndex 6287 } 6288 } 6289 6290 // TestReplicaCorruption verifies that a replicaCorruptionError correctly marks 6291 // the range as corrupt. 6292 func TestReplicaCorruption(t *testing.T) { 6293 defer leaktest.AfterTest(t)() 6294 6295 var exitStatus int 6296 log.SetExitFunc(true /* hideStack */, func(i int) { 6297 exitStatus = i 6298 }) 6299 defer log.ResetExitFunc() 6300 6301 tsc := TestStoreConfig(nil) 6302 tsc.TestingKnobs.EvalKnobs.TestingEvalFilter = 6303 func(filterArgs kvserverbase.FilterArgs) *roachpb.Error { 6304 if filterArgs.Req.Header().Key.Equal(roachpb.Key("boom")) { 6305 return roachpb.NewError(roachpb.NewReplicaCorruptionError(errors.New("boom"))) 6306 } 6307 return nil 6308 } 6309 6310 tc := testContext{} 6311 stopper := stop.NewStopper() 6312 defer stopper.Stop(context.Background()) 6313 tc.StartWithStoreConfig(t, stopper, tsc) 6314 6315 // First send a regular command. 6316 args := putArgs(roachpb.Key("test1"), []byte("value")) 6317 if _, pErr := tc.SendWrapped(&args); pErr != nil { 6318 t.Fatal(pErr) 6319 } 6320 6321 key := roachpb.Key("boom") 6322 6323 args = putArgs(key, []byte("value")) 6324 _, pErr := tc.SendWrapped(&args) 6325 if !testutils.IsPError(pErr, "replica corruption \\(processed=true\\)") { 6326 t.Fatalf("unexpected error: %s", pErr) 6327 } 6328 6329 // Should have laid down marker file to prevent startup. 6330 _, err := os.Stat(base.PreventedStartupFile(tc.engine.GetAuxiliaryDir())) 6331 require.NoError(t, err) 6332 6333 // Should have triggered fatal error. 6334 if exitStatus != 255 { 6335 t.Fatalf("unexpected exit status %d", exitStatus) 6336 } 6337 } 6338 6339 // TestChangeReplicasDuplicateError tests that a replica change that would 6340 // use a NodeID twice in the replica configuration fails. 6341 func TestChangeReplicasDuplicateError(t *testing.T) { 6342 defer leaktest.AfterTest(t)() 6343 tc := testContext{} 6344 stopper := stop.NewStopper() 6345 defer stopper.Stop(context.Background()) 6346 tc.Start(t, stopper) 6347 6348 chgs := roachpb.MakeReplicationChanges(roachpb.ADD_REPLICA, roachpb.ReplicationTarget{ 6349 NodeID: tc.store.Ident.NodeID, 6350 StoreID: 9999, 6351 }) 6352 if _, err := tc.repl.ChangeReplicas(context.Background(), tc.repl.Desc(), SnapshotRequest_REBALANCE, kvserverpb.ReasonRebalance, "", chgs); err == nil || !strings.Contains(err.Error(), "node already has a replica") { 6353 t.Fatalf("must not be able to add second replica to same node (err=%+v)", err) 6354 } 6355 } 6356 6357 // TestReplicaDanglingMetaIntent creates a dangling intent on a meta2 6358 // record and verifies that RangeLookup scans behave 6359 // appropriately. Normally, the old value and a write intent error 6360 // should be returned. If IgnoreIntents is specified, then a random 6361 // choice of old or new is returned with no error. 6362 // TODO(tschottdorf): add a test in which there is a dangling intent on a 6363 // descriptor we would've otherwise discarded in a reverse scan; verify that 6364 // we don't erroneously return that descriptor (recently fixed bug). 6365 func TestReplicaDanglingMetaIntent(t *testing.T) { 6366 defer leaktest.AfterTest(t)() 6367 6368 testutils.RunTrueAndFalse(t, "reverse", func(t *testing.T, reverse bool) { 6369 tc := testContext{} 6370 ctx := context.Background() 6371 stopper := stop.NewStopper() 6372 defer stopper.Stop(ctx) 6373 cfg := TestStoreConfig(nil) 6374 cfg.TestingKnobs.DontPushOnWriteIntentError = true 6375 tc.StartWithStoreConfig(t, stopper, cfg) 6376 6377 key := roachpb.Key("a") 6378 6379 // Get original meta2 descriptor. 6380 rs, _, err := kv.RangeLookup(ctx, tc.Sender(), key, roachpb.READ_UNCOMMITTED, 0, reverse) 6381 if err != nil { 6382 t.Fatal(err) 6383 } 6384 origDesc := rs[0] 6385 6386 newDesc := origDesc 6387 newDesc.EndKey, err = keys.Addr(key) 6388 if err != nil { 6389 t.Fatal(err) 6390 } 6391 6392 // Write the new descriptor as an intent. 6393 data, err := protoutil.Marshal(&newDesc) 6394 if err != nil { 6395 t.Fatal(err) 6396 } 6397 txn := newTransaction("test", key, 1, tc.Clock()) 6398 // Officially begin the transaction. If not for this, the intent resolution 6399 // machinery would simply remove the intent we write below, see #3020. 6400 // We send directly to Replica throughout this test, so there's no danger 6401 // of the Store aborting this transaction (i.e. we don't have to set a high 6402 // priority). 6403 pArgs := putArgs(keys.RangeMetaKey(roachpb.RKey(key)).AsRawKey(), data) 6404 assignSeqNumsForReqs(txn, &pArgs) 6405 if _, pErr := kv.SendWrappedWith(ctx, tc.Sender(), roachpb.Header{Txn: txn}, &pArgs); pErr != nil { 6406 t.Fatal(pErr) 6407 } 6408 6409 // Now lookup the range; should get the value. Since the lookup is 6410 // not consistent, there's no WriteIntentError. It should return both 6411 // the committed descriptor and the intent descriptor. 6412 // 6413 // Note that 'A' < 'a'. 6414 newKey := roachpb.Key{'A'} 6415 rs, _, err = kv.RangeLookup(ctx, tc.Sender(), newKey, roachpb.READ_UNCOMMITTED, 0, reverse) 6416 if err != nil { 6417 t.Fatal(err) 6418 } 6419 if len(rs) != 2 { 6420 t.Fatalf("expected 2 matching range descriptors, found %v", rs) 6421 } 6422 if desc := rs[0]; !reflect.DeepEqual(desc, origDesc) { 6423 t.Errorf("expected original descriptor %s; got %s", &origDesc, &desc) 6424 } 6425 if intentDesc := rs[1]; !reflect.DeepEqual(intentDesc, newDesc) { 6426 t.Errorf("expected original descriptor %s; got %s", &newDesc, &intentDesc) 6427 } 6428 6429 // Switch to consistent lookups, which should run into the intent. 6430 _, _, err = kv.RangeLookup(ctx, tc.Sender(), newKey, roachpb.CONSISTENT, 0, reverse) 6431 if !errors.HasType(err, (*roachpb.WriteIntentError)(nil)) { 6432 t.Fatalf("expected WriteIntentError, not %s", err) 6433 } 6434 }) 6435 } 6436 6437 // TestReplicaLookupUseReverseScan verifies the correctness of the results which are retrieved 6438 // from RangeLookup by using ReverseScan. 6439 func TestReplicaLookupUseReverseScan(t *testing.T) { 6440 defer leaktest.AfterTest(t)() 6441 6442 tc := testContext{} 6443 ctx := context.Background() 6444 stopper := stop.NewStopper() 6445 defer stopper.Stop(ctx) 6446 tc.Start(t, stopper) 6447 6448 splitRangeBefore := roachpb.RangeDescriptor{RangeID: 3, StartKey: roachpb.RKey("c"), EndKey: roachpb.RKey("h")} 6449 splitRangeLHS := roachpb.RangeDescriptor{RangeID: 3, StartKey: roachpb.RKey("c"), EndKey: roachpb.RKey("f")} 6450 splitRangeRHS := roachpb.RangeDescriptor{RangeID: 5, StartKey: roachpb.RKey("f"), EndKey: roachpb.RKey("h")} 6451 6452 // Test ranges: ["a","c"), ["c","f"), ["f","h") and ["h","y"). 6453 testRanges := []roachpb.RangeDescriptor{ 6454 {RangeID: 2, StartKey: roachpb.RKey("a"), EndKey: roachpb.RKey("c")}, 6455 splitRangeBefore, 6456 {RangeID: 4, StartKey: roachpb.RKey("h"), EndKey: roachpb.RKey("y")}, 6457 } 6458 6459 testCases := []struct { 6460 key string 6461 expected roachpb.RangeDescriptor 6462 }{ 6463 // For testRanges[0|1|3] there is no intent. A key in the middle 6464 // and the end key should both give us the range itself. 6465 {key: "b", expected: testRanges[0]}, 6466 {key: "c", expected: testRanges[0]}, 6467 {key: "d", expected: testRanges[1]}, 6468 {key: "f", expected: testRanges[1]}, 6469 {key: "j", expected: testRanges[2]}, 6470 // testRanges[2] has an intent, so the inconsistent scan will read 6471 // an old value (nil). Since we're in reverse mode, testRanges[1] 6472 // is the result. 6473 {key: "g", expected: testRanges[1]}, 6474 {key: "h", expected: testRanges[1]}, 6475 } 6476 6477 { 6478 txn := newTransaction("test", roachpb.Key{}, 1, tc.Clock()) 6479 for _, r := range testRanges { 6480 // Write the new descriptor as an intent. 6481 data, err := protoutil.Marshal(&r) 6482 if err != nil { 6483 t.Fatal(err) 6484 } 6485 pArgs := putArgs(keys.RangeMetaKey(r.EndKey).AsRawKey(), data) 6486 assignSeqNumsForReqs(txn, &pArgs) 6487 6488 if _, pErr := tc.SendWrappedWith(roachpb.Header{Txn: txn}, &pArgs); pErr != nil { 6489 t.Fatal(pErr) 6490 } 6491 } 6492 6493 // Resolve the intents. 6494 rArgs := &roachpb.ResolveIntentRangeRequest{ 6495 RequestHeader: roachpb.RequestHeader{ 6496 Key: keys.RangeMetaKey(roachpb.RKey("a")).AsRawKey(), 6497 EndKey: keys.RangeMetaKey(roachpb.RKey("z")).AsRawKey(), 6498 }, 6499 IntentTxn: txn.TxnMeta, 6500 Status: roachpb.COMMITTED, 6501 IgnoredSeqNums: txn.IgnoredSeqNums, 6502 } 6503 if _, pErr := tc.SendWrapped(rArgs); pErr != nil { 6504 t.Fatal(pErr) 6505 } 6506 } 6507 6508 // Test reverse RangeLookup scan without intents. 6509 for _, c := range testCases { 6510 rs, _, err := kv.RangeLookup(ctx, tc.Sender(), roachpb.Key(c.key), 6511 roachpb.READ_UNCOMMITTED, 0, true) 6512 if err != nil { 6513 t.Fatal(err) 6514 } 6515 seen := rs[0] 6516 if !(seen.StartKey.Equal(c.expected.StartKey) && seen.EndKey.Equal(c.expected.EndKey)) { 6517 t.Errorf("expected descriptor %s; got %s", &c.expected, &seen) 6518 } 6519 } 6520 6521 // Write the new descriptors as intents. 6522 txn := newTransaction("test", roachpb.Key{}, 1, tc.Clock()) 6523 for _, r := range []roachpb.RangeDescriptor{splitRangeLHS, splitRangeRHS} { 6524 // Write the new descriptor as an intent. 6525 data, err := protoutil.Marshal(&r) 6526 if err != nil { 6527 t.Fatal(err) 6528 } 6529 pArgs := putArgs(keys.RangeMetaKey(r.EndKey).AsRawKey(), data) 6530 assignSeqNumsForReqs(txn, &pArgs) 6531 6532 if _, pErr := tc.SendWrappedWith(roachpb.Header{Txn: txn}, &pArgs); pErr != nil { 6533 t.Fatal(pErr) 6534 } 6535 } 6536 6537 // Test reverse RangeLookup scan with intents. 6538 for _, c := range testCases { 6539 rs, _, err := kv.RangeLookup(ctx, tc.Sender(), roachpb.Key(c.key), 6540 roachpb.READ_UNCOMMITTED, 0, true) 6541 if err != nil { 6542 t.Fatal(err) 6543 } 6544 seen := rs[0] 6545 if !(seen.StartKey.Equal(c.expected.StartKey) && seen.EndKey.Equal(c.expected.EndKey)) { 6546 t.Errorf("expected descriptor %s; got %s", &c.expected, &seen) 6547 } 6548 } 6549 } 6550 6551 func TestRangeLookup(t *testing.T) { 6552 defer leaktest.AfterTest(t)() 6553 tc := testContext{} 6554 ctx := context.Background() 6555 stopper := stop.NewStopper() 6556 defer stopper.Stop(ctx) 6557 tc.Start(t, stopper) 6558 6559 expected := []roachpb.RangeDescriptor{*tc.repl.Desc()} 6560 testCases := []struct { 6561 key roachpb.RKey 6562 reverse bool 6563 expected []roachpb.RangeDescriptor 6564 }{ 6565 // Test with the first range (StartKey==KeyMin). Normally we look 6566 // up this range in gossip instead of executing the RPC, but 6567 // RangeLookup is still used when up-to-date information is 6568 // required. 6569 {key: roachpb.RKey(keys.Meta1Prefix), reverse: false, expected: expected}, 6570 // Test with the last key in a meta prefix. This is an edge case in the 6571 // implementation. 6572 {key: roachpb.RKey(keys.Meta2KeyMax), reverse: false, expected: expected}, 6573 {key: roachpb.RKey(roachpb.KeyMax), reverse: false, expected: nil}, 6574 {key: roachpb.RKey(keys.Meta2KeyMax), reverse: true, expected: expected}, 6575 {key: roachpb.RKey(roachpb.KeyMax), reverse: true, expected: expected}, 6576 } 6577 6578 for i, c := range testCases { 6579 rs, _, err := kv.RangeLookup(ctx, tc.Sender(), c.key.AsRawKey(), 6580 roachpb.CONSISTENT, 0, c.reverse) 6581 if err != nil { 6582 if c.expected != nil { 6583 t.Fatal(err) 6584 } 6585 } else { 6586 if !reflect.DeepEqual(rs, c.expected) { 6587 t.Errorf("%d: expected %+v, got %+v", i, c.expected, rs) 6588 } 6589 } 6590 } 6591 } 6592 6593 // TestRequestLeaderEncounterGroupDeleteError verifies that a lease request which fails with 6594 // RaftGroupDeletedError is converted to a RangeNotFoundError in the Store. 6595 func TestRequestLeaderEncounterGroupDeleteError(t *testing.T) { 6596 defer leaktest.AfterTest(t)() 6597 stopper := stop.NewStopper() 6598 defer stopper.Stop(context.Background()) 6599 6600 // Mock propose to return a roachpb.RaftGroupDeletedError. 6601 var active int32 6602 proposeFn := func(fArgs kvserverbase.ProposalFilterArgs) *roachpb.Error { 6603 if atomic.LoadInt32(&active) == 1 { 6604 return roachpb.NewError(&roachpb.RaftGroupDeletedError{}) 6605 } 6606 return nil 6607 } 6608 6609 manual := hlc.NewManualClock(123) 6610 tc := testContext{manualClock: manual} 6611 cfg := TestStoreConfig(hlc.NewClock(manual.UnixNano, time.Nanosecond)) 6612 cfg.TestingKnobs.TestingProposalFilter = proposeFn 6613 tc.StartWithStoreConfig(t, stopper, cfg) 6614 6615 atomic.StoreInt32(&active, 1) 6616 gArgs := getArgs(roachpb.Key("a")) 6617 // Force the read command request a new lease. 6618 manual.Set(leaseExpiry(tc.repl)) 6619 _, pErr := kv.SendWrappedWith(context.Background(), tc.store, roachpb.Header{ 6620 Timestamp: tc.Clock().Now(), 6621 RangeID: 1, 6622 }, &gArgs) 6623 if _, ok := pErr.GetDetail().(*roachpb.RangeNotFoundError); !ok { 6624 t.Fatalf("expected a RangeNotFoundError, get %s", pErr) 6625 } 6626 } 6627 6628 func TestIntentIntersect(t *testing.T) { 6629 defer leaktest.AfterTest(t)() 6630 iPt := roachpb.Span{ 6631 Key: roachpb.Key("asd"), 6632 EndKey: nil, 6633 } 6634 iRn := roachpb.Span{ 6635 Key: roachpb.Key("c"), 6636 EndKey: roachpb.Key("x"), 6637 } 6638 6639 suffix := roachpb.RKey("abcd") 6640 iLc := roachpb.Span{ 6641 Key: keys.MakeRangeKey(roachpb.RKey("c"), suffix, nil), 6642 EndKey: keys.MakeRangeKey(roachpb.RKey("x"), suffix, nil), 6643 } 6644 kl1 := string(iLc.Key) 6645 kl2 := string(iLc.EndKey) 6646 6647 for i, tc := range []struct { 6648 intent roachpb.Span 6649 from, to string 6650 exp []string 6651 }{ 6652 {intent: iPt, from: "", to: "z", exp: []string{"", "", "asd", ""}}, 6653 6654 {intent: iRn, from: "", to: "a", exp: []string{"", "", "c", "x"}}, 6655 {intent: iRn, from: "", to: "c", exp: []string{"", "", "c", "x"}}, 6656 {intent: iRn, from: "a", to: "z", exp: []string{"c", "x"}}, 6657 {intent: iRn, from: "c", to: "d", exp: []string{"c", "d", "d", "x"}}, 6658 {intent: iRn, from: "c", to: "x", exp: []string{"c", "x"}}, 6659 {intent: iRn, from: "d", to: "x", exp: []string{"d", "x", "c", "d"}}, 6660 {intent: iRn, from: "d", to: "w", exp: []string{"d", "w", "c", "d", "w", "x"}}, 6661 {intent: iRn, from: "c", to: "w", exp: []string{"c", "w", "w", "x"}}, 6662 {intent: iRn, from: "w", to: "x", exp: []string{"w", "x", "c", "w"}}, 6663 {intent: iRn, from: "x", to: "z", exp: []string{"", "", "c", "x"}}, 6664 {intent: iRn, from: "y", to: "z", exp: []string{"", "", "c", "x"}}, 6665 6666 // A local intent range always comes back in one piece, either inside 6667 // or outside of the Range. 6668 {intent: iLc, from: "a", to: "b", exp: []string{"", "", kl1, kl2}}, 6669 {intent: iLc, from: "d", to: "z", exp: []string{"", "", kl1, kl2}}, 6670 {intent: iLc, from: "f", to: "g", exp: []string{"", "", kl1, kl2}}, 6671 {intent: iLc, from: "c", to: "x", exp: []string{kl1, kl2}}, 6672 {intent: iLc, from: "a", to: "z", exp: []string{kl1, kl2}}, 6673 } { 6674 var all []string 6675 in, out := kvserverbase.IntersectSpan(tc.intent, &roachpb.RangeDescriptor{ 6676 StartKey: roachpb.RKey(tc.from), 6677 EndKey: roachpb.RKey(tc.to), 6678 }) 6679 if in != nil { 6680 all = append(all, string(in.Key), string(in.EndKey)) 6681 } else { 6682 all = append(all, "", "") 6683 } 6684 for _, o := range out { 6685 all = append(all, string(o.Key), string(o.EndKey)) 6686 } 6687 if !reflect.DeepEqual(all, tc.exp) { 6688 t.Errorf("%d: wanted %v, got %v", i, tc.exp, all) 6689 } 6690 } 6691 } 6692 6693 // TestBatchErrorWithIndex tests that when an individual entry in a 6694 // batch results in an error with an index, the index of this command 6695 // is stored into the error. 6696 func TestBatchErrorWithIndex(t *testing.T) { 6697 defer leaktest.AfterTest(t)() 6698 tc := testContext{} 6699 stopper := stop.NewStopper() 6700 defer stopper.Stop(context.Background()) 6701 tc.Start(t, stopper) 6702 6703 ba := roachpb.BatchRequest{} 6704 // This one succeeds. 6705 ba.Add(&roachpb.PutRequest{ 6706 RequestHeader: roachpb.RequestHeader{Key: roachpb.Key("k")}, 6707 Value: roachpb.MakeValueFromString("not nil"), 6708 }) 6709 // This one fails with a ConditionalPutError, which will populate the 6710 // returned error's index. 6711 ba.Add(&roachpb.ConditionalPutRequest{ 6712 RequestHeader: roachpb.RequestHeader{Key: roachpb.Key("k")}, 6713 Value: roachpb.MakeValueFromString("irrelevant"), 6714 ExpValue: nil, // not true after above Put 6715 }) 6716 // This one is never executed. 6717 ba.Add(&roachpb.GetRequest{ 6718 RequestHeader: roachpb.RequestHeader{Key: roachpb.Key("k")}, 6719 }) 6720 6721 if _, pErr := tc.Sender().Send(context.Background(), ba); pErr == nil { 6722 t.Fatal("expected an error") 6723 } else if pErr.Index == nil || pErr.Index.Index != 1 || !testutils.IsPError(pErr, "unexpected value") { 6724 t.Fatalf("invalid index or error type: %s", pErr) 6725 } 6726 } 6727 6728 // TestReplicaLoadSystemConfigSpanIntent verifies that intents on the SystemConfigSpan 6729 // cause an error, but trigger asynchronous cleanup. 6730 func TestReplicaLoadSystemConfigSpanIntent(t *testing.T) { 6731 defer leaktest.AfterTest(t)() 6732 tc := testContext{} 6733 stopper := stop.NewStopper() 6734 defer stopper.Stop(context.Background()) 6735 tc.Start(t, stopper) 6736 scStartSddr, err := keys.Addr(keys.SystemConfigSpan.Key) 6737 if err != nil { 6738 t.Fatal(err) 6739 } 6740 repl := tc.store.LookupReplica(scStartSddr) 6741 if repl == nil { 6742 t.Fatalf("no replica contains the SystemConfig span") 6743 } 6744 6745 // Create a transaction and write an intent to the system 6746 // config span. 6747 key := keys.SystemConfigSpan.Key 6748 pushee := newTransaction("test", key, 1, repl.store.Clock()) 6749 pushee.Priority = enginepb.MinTxnPriority // low so it can be pushed 6750 put := putArgs(key, []byte("foo")) 6751 assignSeqNumsForReqs(pushee, &put) 6752 if _, pErr := kv.SendWrappedWith(context.Background(), tc.Sender(), roachpb.Header{Txn: pushee}, &put); pErr != nil { 6753 t.Fatal(pErr) 6754 } 6755 6756 // Abort the transaction so that the async intent resolution caused 6757 // by loading the system config span doesn't waste any time in 6758 // clearing the intent. 6759 pusher := newTransaction("test", key, 1, repl.store.Clock()) 6760 pusher.Priority = enginepb.MaxTxnPriority // will push successfully 6761 pushArgs := pushTxnArgs(pusher, pushee, roachpb.PUSH_ABORT) 6762 if _, pErr := tc.SendWrapped(&pushArgs); pErr != nil { 6763 t.Fatal(pErr) 6764 } 6765 6766 // Verify that the intent trips up loading the SystemConfig data. 6767 if _, err := repl.loadSystemConfig(context.Background()); !errors.Is(err, errSystemConfigIntent) { 6768 t.Fatal(err) 6769 } 6770 6771 // In the loop, wait until the intent is aborted. Then write a "real" value 6772 // there and verify that we can now load the data as expected. 6773 v := roachpb.MakeValueFromString("foo") 6774 testutils.SucceedsSoon(t, func() error { 6775 if err := storage.MVCCPut(context.Background(), repl.store.Engine(), &enginepb.MVCCStats{}, 6776 keys.SystemConfigSpan.Key, repl.store.Clock().Now(), v, nil); err != nil { 6777 return err 6778 } 6779 6780 cfg, err := repl.loadSystemConfig(context.Background()) 6781 if err != nil { 6782 return err 6783 } 6784 6785 if len(cfg.Values) != 1 || !bytes.Equal(cfg.Values[0].Key, keys.SystemConfigSpan.Key) { 6786 return errors.Errorf("expected only key %s in SystemConfigSpan map: %+v", keys.SystemConfigSpan.Key, cfg) 6787 } 6788 return nil 6789 }) 6790 } 6791 6792 func TestReplicaDestroy(t *testing.T) { 6793 defer leaktest.AfterTest(t)() 6794 ctx := context.Background() 6795 tc := testContext{} 6796 stopper := stop.NewStopper() 6797 defer stopper.Stop(ctx) 6798 tc.Start(t, stopper) 6799 6800 repl, err := tc.store.GetReplica(1) 6801 if err != nil { 6802 t.Fatal(err) 6803 } 6804 6805 func() { 6806 tc.repl.raftMu.Lock() 6807 defer tc.repl.raftMu.Unlock() 6808 if err := tc.store.removeInitializedReplicaRaftMuLocked(ctx, tc.repl, repl.Desc().NextReplicaID, RemoveOptions{ 6809 DestroyData: true, 6810 }); err != nil { 6811 t.Fatal(err) 6812 } 6813 }() 6814 6815 iter := rditer.NewReplicaDataIterator(tc.repl.Desc(), tc.repl.store.Engine(), 6816 false /* replicatedOnly */, false /* seekEnd */) 6817 defer iter.Close() 6818 if ok, err := iter.Valid(); err != nil { 6819 t.Fatal(err) 6820 } else if ok { 6821 // If the range is destroyed, only a tombstone key should be there. 6822 k1 := iter.Key().Key 6823 if tombstoneKey := keys.RangeTombstoneKey(tc.repl.RangeID); !bytes.Equal(k1, tombstoneKey) { 6824 t.Errorf("expected a tombstone key %q, but found %q", tombstoneKey, k1) 6825 } 6826 6827 iter.Next() 6828 if ok, err := iter.Valid(); err != nil { 6829 t.Fatal(err) 6830 } else if ok { 6831 t.Errorf("expected a destroyed replica to have only a tombstone key, but found more") 6832 } 6833 } else { 6834 t.Errorf("expected a tombstone key, but got an empty iteration") 6835 } 6836 } 6837 6838 // TestQuotaPoolReleasedOnFailedProposal tests that the quota acquired by 6839 // proposals is released back into the quota pool if the proposal fails before 6840 // being submitted to Raft. 6841 func TestQuotaPoolReleasedOnFailedProposal(t *testing.T) { 6842 defer leaktest.AfterTest(t)() 6843 tc := testContext{} 6844 stopper := stop.NewStopper() 6845 defer stopper.Stop(context.Background()) 6846 tc.Start(t, stopper) 6847 6848 // Flush a write all the way through the Raft proposal pipeline to ensure 6849 // that the replica becomes the Raft leader and sets up its quota pool. 6850 iArgs := incrementArgs([]byte("a"), 1) 6851 if _, pErr := tc.SendWrapped(iArgs); pErr != nil { 6852 t.Fatal(pErr) 6853 } 6854 6855 type magicKey struct{} 6856 var minQuotaSize uint64 6857 propErr := errors.New("proposal error") 6858 6859 tc.repl.mu.Lock() 6860 tc.repl.mu.proposalBuf.testing.leaseIndexFilter = func(p *ProposalData) (indexOverride uint64, _ error) { 6861 if v := p.ctx.Value(magicKey{}); v != nil { 6862 minQuotaSize = tc.repl.mu.proposalQuota.ApproximateQuota() + p.quotaAlloc.Acquired() 6863 return 0, propErr 6864 } 6865 return 0, nil 6866 } 6867 tc.repl.mu.Unlock() 6868 6869 var ba roachpb.BatchRequest 6870 pArg := putArgs(roachpb.Key("a"), make([]byte, 1<<10)) 6871 ba.Add(&pArg) 6872 ctx := context.WithValue(context.Background(), magicKey{}, "foo") 6873 if _, pErr := tc.Sender().Send(ctx, ba); !testutils.IsPError(pErr, propErr.Error()) { 6874 t.Fatalf("expected error %v, found %v", propErr, pErr) 6875 } 6876 if curQuota := tc.repl.QuotaAvailable(); curQuota < minQuotaSize { 6877 t.Fatalf("proposal quota not released: found=%d, want=%d", curQuota, minQuotaSize) 6878 } 6879 } 6880 6881 // TestQuotaPoolAccessOnDestroyedReplica tests the occurrence of #17303 where 6882 // following a leader replica getting destroyed, the scheduling of 6883 // handleRaftReady twice on the replica would cause a panic when 6884 // finding a nil/closed quota pool. 6885 func TestQuotaPoolAccessOnDestroyedReplica(t *testing.T) { 6886 defer leaktest.AfterTest(t)() 6887 tc := testContext{} 6888 stopper := stop.NewStopper() 6889 defer stopper.Stop(context.Background()) 6890 tc.Start(t, stopper) 6891 6892 repl, err := tc.store.GetReplica(1) 6893 if err != nil { 6894 t.Fatal(err) 6895 } 6896 6897 ctx := repl.AnnotateCtx(context.Background()) 6898 func() { 6899 tc.repl.raftMu.Lock() 6900 defer tc.repl.raftMu.Unlock() 6901 if err := tc.store.removeInitializedReplicaRaftMuLocked(ctx, repl, repl.Desc().NextReplicaID, RemoveOptions{ 6902 DestroyData: true, 6903 }); err != nil { 6904 t.Fatal(err) 6905 } 6906 }() 6907 6908 if _, _, err := repl.handleRaftReady(ctx, noSnap); err != nil { 6909 t.Fatal(err) 6910 } 6911 6912 if _, _, err := repl.handleRaftReady(ctx, noSnap); err != nil { 6913 t.Fatal(err) 6914 } 6915 } 6916 6917 func TestEntries(t *testing.T) { 6918 defer leaktest.AfterTest(t)() 6919 tc := testContext{} 6920 cfg := TestStoreConfig(nil) 6921 // Disable ticks to avoid quiescence, which can result in empty 6922 // entries being proposed and causing the test to flake. 6923 cfg.RaftTickInterval = math.MaxInt32 6924 cfg.TestingKnobs.DisableRaftLogQueue = true 6925 stopper := stop.NewStopper() 6926 defer stopper.Stop(context.Background()) 6927 tc.StartWithStoreConfig(t, stopper, cfg) 6928 6929 repl := tc.repl 6930 rangeID := repl.RangeID 6931 var indexes []uint64 6932 6933 populateLogs := func(from, to int) []uint64 { 6934 var newIndexes []uint64 6935 for i := from; i < to; i++ { 6936 args := incrementArgs([]byte("a"), int64(i)) 6937 if _, pErr := tc.SendWrapped(args); pErr != nil { 6938 t.Fatal(pErr) 6939 } 6940 idx, err := repl.GetLastIndex() 6941 if err != nil { 6942 t.Fatal(err) 6943 } 6944 newIndexes = append(newIndexes, idx) 6945 } 6946 return newIndexes 6947 } 6948 6949 truncateLogs := func(index int) { 6950 truncateArgs := truncateLogArgs(indexes[index], rangeID) 6951 if _, err := kv.SendWrappedWith( 6952 context.Background(), 6953 tc.Sender(), 6954 roachpb.Header{RangeID: 1}, 6955 &truncateArgs, 6956 ); err != nil { 6957 t.Fatal(err) 6958 } 6959 } 6960 6961 // Populate the log with 10 entries. Save the LastIndex after each write. 6962 indexes = append(indexes, populateLogs(0, 10)...) 6963 6964 for i, tc := range []struct { 6965 lo uint64 6966 hi uint64 6967 maxBytes uint64 6968 expResultCount int 6969 expCacheCount int 6970 expError error 6971 // Setup, if not nil, is called before running the test case. 6972 setup func() 6973 }{ 6974 // Case 0: All of the entries from cache. 6975 {lo: indexes[0], hi: indexes[9] + 1, expResultCount: 10, expCacheCount: 10, setup: nil}, 6976 // Case 1: Get the first entry from cache. 6977 {lo: indexes[0], hi: indexes[1], expResultCount: 1, expCacheCount: 1, setup: nil}, 6978 // Case 2: Get the last entry from cache. 6979 {lo: indexes[9], hi: indexes[9] + 1, expResultCount: 1, expCacheCount: 1, setup: nil}, 6980 // Case 3: lo is available, but hi is not, cache miss. 6981 {lo: indexes[9], hi: indexes[9] + 2, expCacheCount: 1, expError: raft.ErrUnavailable, setup: nil}, 6982 6983 // Case 4: Just most of the entries from cache. 6984 {lo: indexes[5], hi: indexes[9], expResultCount: 4, expCacheCount: 4, setup: func() { 6985 // Discard the first half of the log. 6986 truncateLogs(5) 6987 }}, 6988 // Case 5: Get a single entry from cache. 6989 {lo: indexes[5], hi: indexes[6], expResultCount: 1, expCacheCount: 1, setup: nil}, 6990 // Case 6: Get range without size limitation. (Like case 4, without truncating). 6991 {lo: indexes[5], hi: indexes[9], expResultCount: 4, expCacheCount: 4, setup: nil}, 6992 // Case 7: maxBytes is set low so only a single value should be 6993 // returned. 6994 {lo: indexes[5], hi: indexes[9], maxBytes: 1, expResultCount: 1, expCacheCount: 1, setup: nil}, 6995 // Case 8: hi value is just past the last index, should return all 6996 // available entries. 6997 {lo: indexes[5], hi: indexes[9] + 1, expResultCount: 5, expCacheCount: 5, setup: nil}, 6998 // Case 9: all values have been truncated from cache and storage. 6999 {lo: indexes[1], hi: indexes[2], expCacheCount: 0, expError: raft.ErrCompacted, setup: nil}, 7000 // Case 10: hi has just been truncated from cache and storage. 7001 {lo: indexes[1], hi: indexes[4], expCacheCount: 0, expError: raft.ErrCompacted, setup: nil}, 7002 // Case 11: another case where hi has just been truncated from 7003 // cache and storage. 7004 {lo: indexes[3], hi: indexes[4], expCacheCount: 0, expError: raft.ErrCompacted, setup: nil}, 7005 // Case 12: lo has been truncated and hi is the truncation point. 7006 {lo: indexes[4], hi: indexes[5], expCacheCount: 0, expError: raft.ErrCompacted, setup: nil}, 7007 // Case 13: lo has been truncated but hi is available. 7008 {lo: indexes[4], hi: indexes[9], expCacheCount: 0, expError: raft.ErrCompacted, setup: nil}, 7009 // Case 14: lo has been truncated and hi is not available. 7010 {lo: indexes[4], hi: indexes[9] + 100, expCacheCount: 0, expError: raft.ErrCompacted, setup: nil}, 7011 // Case 15: lo has been truncated but hi is available, and maxBytes is 7012 // set low. 7013 {lo: indexes[4], hi: indexes[9], maxBytes: 1, expCacheCount: 0, expError: raft.ErrCompacted, setup: nil}, 7014 // Case 16: lo is available but hi is not. 7015 {lo: indexes[5], hi: indexes[9] + 100, expCacheCount: 6, expError: raft.ErrUnavailable, setup: nil}, 7016 // Case 17: both lo and hi are not available, cache miss. 7017 {lo: indexes[9] + 100, hi: indexes[9] + 1000, expCacheCount: 0, expError: raft.ErrUnavailable, setup: nil}, 7018 // Case 18: lo is available, hi is not, but it was cut off by maxBytes. 7019 {lo: indexes[5], hi: indexes[9] + 1000, maxBytes: 1, expResultCount: 1, expCacheCount: 1, setup: nil}, 7020 // Case 19: lo and hi are available, but entry cache evicted. 7021 {lo: indexes[5], hi: indexes[9], expResultCount: 4, expCacheCount: 0, setup: func() { 7022 // Manually evict cache for the first 10 log entries. 7023 repl.store.raftEntryCache.Clear(rangeID, indexes[9]+1) 7024 indexes = append(indexes, populateLogs(10, 40)...) 7025 }}, 7026 // Case 20: lo and hi are available, entry cache evicted and hi available in cache. 7027 {lo: indexes[5], hi: indexes[9] + 5, expResultCount: 9, expCacheCount: 0, setup: nil}, 7028 // Case 21: lo and hi are available and in entry cache. 7029 {lo: indexes[9] + 2, hi: indexes[9] + 32, expResultCount: 30, expCacheCount: 30, setup: nil}, 7030 // Case 22: lo is available and hi is not. 7031 {lo: indexes[9] + 2, hi: indexes[9] + 33, expCacheCount: 30, expError: raft.ErrUnavailable, setup: nil}, 7032 } { 7033 if tc.setup != nil { 7034 tc.setup() 7035 } 7036 if tc.maxBytes == 0 { 7037 tc.maxBytes = math.MaxUint64 7038 } 7039 cacheEntries, _, _, hitLimit := repl.store.raftEntryCache.Scan(nil, rangeID, tc.lo, tc.hi, tc.maxBytes) 7040 if len(cacheEntries) != tc.expCacheCount { 7041 t.Errorf("%d: expected cache count %d, got %d", i, tc.expCacheCount, len(cacheEntries)) 7042 } 7043 repl.mu.Lock() 7044 ents, err := repl.raftEntriesLocked(tc.lo, tc.hi, tc.maxBytes) 7045 repl.mu.Unlock() 7046 if tc.expError == nil && err != nil { 7047 t.Errorf("%d: expected no error, got %s", i, err) 7048 continue 7049 } else if !errors.Is(err, tc.expError) { 7050 t.Errorf("%d: expected error %s, got %s", i, tc.expError, err) 7051 continue 7052 } 7053 if len(ents) != tc.expResultCount { 7054 t.Errorf("%d: expected %d entries, got %d", i, tc.expResultCount, len(ents)) 7055 } else if tc.expResultCount > 0 { 7056 expHitLimit := ents[len(ents)-1].Index < tc.hi-1 7057 if hitLimit != expHitLimit { 7058 t.Errorf("%d: unexpected hit limit: %t", i, hitLimit) 7059 } 7060 } 7061 } 7062 7063 // Case 23: Lo must be less than or equal to hi. 7064 repl.mu.Lock() 7065 if _, err := repl.raftEntriesLocked(indexes[9], indexes[5], math.MaxUint64); err == nil { 7066 t.Errorf("23: error expected, got none") 7067 } 7068 repl.mu.Unlock() 7069 } 7070 7071 func TestTerm(t *testing.T) { 7072 defer leaktest.AfterTest(t)() 7073 tc := testContext{} 7074 tsc := TestStoreConfig(nil) 7075 tsc.TestingKnobs.DisableRaftLogQueue = true 7076 stopper := stop.NewStopper() 7077 defer stopper.Stop(context.Background()) 7078 tc.StartWithStoreConfig(t, stopper, tsc) 7079 7080 repl := tc.repl 7081 rangeID := repl.RangeID 7082 7083 // Populate the log with 10 entries. Save the LastIndex after each write. 7084 var indexes []uint64 7085 for i := 0; i < 10; i++ { 7086 args := incrementArgs([]byte("a"), int64(i)) 7087 7088 if _, pErr := tc.SendWrapped(args); pErr != nil { 7089 t.Fatal(pErr) 7090 } 7091 idx, err := tc.repl.GetLastIndex() 7092 if err != nil { 7093 t.Fatal(err) 7094 } 7095 indexes = append(indexes, idx) 7096 } 7097 7098 // Discard the first half of the log. 7099 truncateArgs := truncateLogArgs(indexes[5], rangeID) 7100 if _, pErr := tc.SendWrappedWith(roachpb.Header{RangeID: 1}, &truncateArgs); pErr != nil { 7101 t.Fatal(pErr) 7102 } 7103 7104 repl.mu.Lock() 7105 defer repl.mu.Unlock() 7106 7107 firstIndex, err := repl.raftFirstIndexLocked() 7108 if err != nil { 7109 t.Fatal(err) 7110 } 7111 if firstIndex != indexes[5] { 7112 t.Fatalf("expected firstIndex %d to be %d", firstIndex, indexes[4]) 7113 } 7114 7115 // Truncated logs should return an ErrCompacted error. 7116 if _, err := tc.repl.raftTermRLocked(indexes[1]); !errors.Is(err, raft.ErrCompacted) { 7117 t.Errorf("expected ErrCompacted, got %s", err) 7118 } 7119 if _, err := tc.repl.raftTermRLocked(indexes[3]); !errors.Is(err, raft.ErrCompacted) { 7120 t.Errorf("expected ErrCompacted, got %s", err) 7121 } 7122 7123 // FirstIndex-1 should return the term of firstIndex. 7124 firstIndexTerm, err := tc.repl.raftTermRLocked(firstIndex) 7125 if err != nil { 7126 t.Errorf("expect no error, got %s", err) 7127 } 7128 7129 term, err := tc.repl.raftTermRLocked(indexes[4]) 7130 if err != nil { 7131 t.Errorf("expect no error, got %s", err) 7132 } 7133 if term != firstIndexTerm { 7134 t.Errorf("expected firstIndex-1's term:%d to equal that of firstIndex:%d", term, firstIndexTerm) 7135 } 7136 7137 lastIndex, err := repl.raftLastIndexLocked() 7138 if err != nil { 7139 t.Fatal(err) 7140 } 7141 7142 // Last index should return correctly. 7143 if _, err := tc.repl.raftTermRLocked(lastIndex); err != nil { 7144 t.Errorf("expected no error, got %s", err) 7145 } 7146 7147 // Terms for after the last index should return ErrUnavailable. 7148 if _, err := tc.repl.raftTermRLocked(lastIndex + 1); !errors.Is(err, raft.ErrUnavailable) { 7149 t.Errorf("expected ErrUnavailable, got %s", err) 7150 } 7151 if _, err := tc.repl.raftTermRLocked(indexes[9] + 1000); !errors.Is(err, raft.ErrUnavailable) { 7152 t.Errorf("expected ErrUnavailable, got %s", err) 7153 } 7154 } 7155 7156 func TestGCIncorrectRange(t *testing.T) { 7157 defer leaktest.AfterTest(t)() 7158 tc := testContext{} 7159 stopper := stop.NewStopper() 7160 defer stopper.Stop(context.Background()) 7161 tc.Start(t, stopper) 7162 7163 // Split range into two ranges. 7164 splitKey := roachpb.RKey("c") 7165 repl1 := tc.repl 7166 repl2 := splitTestRange(tc.store, splitKey, splitKey, t) 7167 7168 // Write a key to range 2 at two different timestamps so we can 7169 // GC the earlier timestamp without needing to delete it. 7170 key := splitKey.PrefixEnd().AsRawKey() 7171 val := []byte("value") 7172 putReq := putArgs(key, val) 7173 now := tc.Clock().Now() 7174 ts1 := now.Add(1, 0) 7175 ts2 := now.Add(2, 0) 7176 ts1Header := roachpb.Header{RangeID: repl2.RangeID, Timestamp: ts1} 7177 ts2Header := roachpb.Header{RangeID: repl2.RangeID, Timestamp: ts2} 7178 if _, pErr := kv.SendWrappedWith(context.Background(), repl2, ts1Header, &putReq); pErr != nil { 7179 t.Errorf("unexpected pError on put key request: %s", pErr) 7180 } 7181 if _, pErr := kv.SendWrappedWith(context.Background(), repl2, ts2Header, &putReq); pErr != nil { 7182 t.Errorf("unexpected pError on put key request: %s", pErr) 7183 } 7184 7185 // Send GC request to range 1 for the key on range 2, which 7186 // should succeed even though it doesn't contain the key, because 7187 // the request for the incorrect key will be silently dropped. 7188 gKey := gcKey(key, ts1) 7189 gcReq := gcArgs(repl1.Desc().StartKey, repl1.Desc().EndKey, gKey) 7190 if _, pErr := kv.SendWrappedWith( 7191 context.Background(), 7192 repl1, 7193 roachpb.Header{RangeID: 1, Timestamp: tc.Clock().Now()}, 7194 &gcReq, 7195 ); pErr != nil { 7196 t.Errorf("unexpected pError on garbage collection request to incorrect range: %s", pErr) 7197 } 7198 7199 // Make sure the key still exists on range 2. 7200 getReq := getArgs(key) 7201 if res, pErr := kv.SendWrappedWith(context.Background(), repl2, ts1Header, &getReq); pErr != nil { 7202 t.Errorf("unexpected pError on get request to correct range: %s", pErr) 7203 } else if resVal := res.(*roachpb.GetResponse).Value; resVal == nil { 7204 t.Errorf("expected value %s to exists after GC to incorrect range but before GC to correct range, found %v", val, resVal) 7205 } 7206 7207 // Send GC request to range 2 for the same key. 7208 gcReq = gcArgs(repl2.Desc().StartKey, repl2.Desc().EndKey, gKey) 7209 if _, pErr := kv.SendWrappedWith( 7210 context.Background(), 7211 repl2, 7212 roachpb.Header{RangeID: repl2.RangeID, Timestamp: tc.Clock().Now()}, 7213 &gcReq, 7214 ); pErr != nil { 7215 t.Errorf("unexpected pError on garbage collection request to correct range: %s", pErr) 7216 } 7217 7218 // Make sure the key no longer exists on range 2. 7219 if res, pErr := kv.SendWrappedWith(context.Background(), repl2, ts1Header, &getReq); pErr != nil { 7220 t.Errorf("unexpected pError on get request to correct range: %s", pErr) 7221 } else if resVal := res.(*roachpb.GetResponse).Value; resVal != nil { 7222 t.Errorf("expected value at key %s to no longer exist after GC to correct range, found value %v", key, resVal) 7223 } 7224 } 7225 7226 // TestReplicaCancelRaft checks that it is possible to safely abandon Raft 7227 // commands via a cancelable context.Context. 7228 func TestReplicaCancelRaft(t *testing.T) { 7229 defer leaktest.AfterTest(t)() 7230 for _, cancelEarly := range []bool{true, false} { 7231 func() { 7232 // Pick a key unlikely to be used by background processes. 7233 key := []byte("acdfg") 7234 ctx, cancel := context.WithCancel(context.Background()) 7235 defer cancel() 7236 cfg := TestStoreConfig(nil) 7237 if !cancelEarly { 7238 cfg.TestingKnobs.TestingProposalFilter = 7239 func(args kvserverbase.ProposalFilterArgs) *roachpb.Error { 7240 for _, union := range args.Req.Requests { 7241 if union.GetInner().Header().Key.Equal(key) { 7242 cancel() 7243 break 7244 } 7245 } 7246 return nil 7247 } 7248 } 7249 tc := testContext{} 7250 stopper := stop.NewStopper() 7251 defer stopper.Stop(context.Background()) 7252 tc.StartWithStoreConfig(t, stopper, cfg) 7253 if cancelEarly { 7254 cancel() 7255 } 7256 var ba roachpb.BatchRequest 7257 ba.RangeID = 1 7258 ba.Add(&roachpb.GetRequest{ 7259 RequestHeader: roachpb.RequestHeader{Key: key}, 7260 }) 7261 if err := ba.SetActiveTimestamp(tc.Clock().Now); err != nil { 7262 t.Fatal(err) 7263 } 7264 _, pErr := tc.repl.executeBatchWithConcurrencyRetries(ctx, &ba, (*Replica).executeWriteBatch) 7265 if cancelEarly { 7266 if !testutils.IsPError(pErr, context.Canceled.Error()) { 7267 t.Fatalf("expected canceled error; got %v", pErr) 7268 } 7269 } else { 7270 if pErr == nil { 7271 // We canceled the context while the command was already 7272 // being processed, so the client had to wait for successful 7273 // execution. 7274 return 7275 } 7276 detail := pErr.GetDetail() 7277 if _, ok := detail.(*roachpb.AmbiguousResultError); !ok { 7278 t.Fatalf("expected AmbiguousResultError error; got %s (%T)", detail, detail) 7279 } 7280 } 7281 }() 7282 } 7283 } 7284 7285 // TestReplicaAbandonProposal checks that canceling a request that has been 7286 // proposed to Raft but before it has been executed correctly releases its 7287 // latches. See #11986. 7288 func TestReplicaAbandonProposal(t *testing.T) { 7289 defer leaktest.AfterTest(t)() 7290 stopper := stop.NewStopper() 7291 defer stopper.Stop(context.Background()) 7292 tc := testContext{} 7293 tc.Start(t, stopper) 7294 7295 type magicKey struct{} 7296 ctx, cancel := context.WithCancel(context.Background()) 7297 ctx = context.WithValue(ctx, magicKey{}, "foo") 7298 7299 // Cancel the request before it is proposed to Raft. 7300 dropProp := int32(1) 7301 tc.repl.mu.Lock() 7302 tc.repl.mu.proposalBuf.testing.submitProposalFilter = func(p *ProposalData) (drop bool, _ error) { 7303 if v := p.ctx.Value(magicKey{}); v != nil { 7304 cancel() 7305 return atomic.LoadInt32(&dropProp) == 1, nil 7306 } 7307 return false, nil 7308 } 7309 tc.repl.mu.Unlock() 7310 7311 var ba roachpb.BatchRequest 7312 ba.RangeID = 1 7313 ba.Timestamp = tc.Clock().Now() 7314 ba.Add(&roachpb.PutRequest{ 7315 RequestHeader: roachpb.RequestHeader{Key: []byte("acdfg")}, 7316 }) 7317 _, pErr := tc.repl.executeBatchWithConcurrencyRetries(ctx, &ba, (*Replica).executeWriteBatch) 7318 if pErr == nil { 7319 t.Fatal("expected failure, but found success") 7320 } 7321 detail := pErr.GetDetail() 7322 if _, ok := detail.(*roachpb.AmbiguousResultError); !ok { 7323 t.Fatalf("expected AmbiguousResultError error; got %s (%T)", detail, detail) 7324 } 7325 7326 // The request should still be holding its latches. 7327 latchInfoGlobal, _ := tc.repl.concMgr.LatchMetrics() 7328 if w := latchInfoGlobal.WriteCount; w == 0 { 7329 t.Fatal("expected non-empty latch manager") 7330 } 7331 7332 // Let the proposal be reproposed and go through. 7333 atomic.StoreInt32(&dropProp, 0) 7334 7335 // Even though we canceled the command it will still get executed and its 7336 // latches cleaned up. 7337 testutils.SucceedsSoon(t, func() error { 7338 latchInfoGlobal, _ := tc.repl.concMgr.LatchMetrics() 7339 if w := latchInfoGlobal.WriteCount; w != 0 { 7340 return errors.Errorf("expected empty latch manager") 7341 } 7342 return nil 7343 }) 7344 } 7345 7346 func TestNewReplicaCorruptionError(t *testing.T) { 7347 defer leaktest.AfterTest(t)() 7348 for i, tc := range []struct { 7349 errStruct *roachpb.ReplicaCorruptionError 7350 expErr string 7351 }{ 7352 {roachpb.NewReplicaCorruptionError(errors.New("")), "replica corruption (processed=false)"}, 7353 {roachpb.NewReplicaCorruptionError(errors.New("foo")), "replica corruption (processed=false): foo"}, 7354 {roachpb.NewReplicaCorruptionError(errors.Wrap(errors.New("bar"), "foo")), "replica corruption (processed=false): foo: bar"}, 7355 } { 7356 // This uses fmt.Sprint because that ends up calling Error() and is the 7357 // intended use. A previous version of this test called String() directly 7358 // which called the wrong (reflection-based) implementation. 7359 if errStr := fmt.Sprint(tc.errStruct); errStr != tc.expErr { 7360 t.Errorf("%d: expected '%s' but got '%s'", i, tc.expErr, errStr) 7361 } 7362 } 7363 } 7364 7365 func TestDiffRange(t *testing.T) { 7366 defer leaktest.AfterTest(t)() 7367 7368 // TODO(tschottdorf): this test should really pass the data through a 7369 // RocksDB engine to verify that the original snapshots sort correctly. 7370 7371 if diff := diffRange(nil, nil); diff != nil { 7372 t.Fatalf("diff of nils = %v", diff) 7373 } 7374 7375 timestamp := hlc.Timestamp{WallTime: 1729, Logical: 1} 7376 value := []byte("foo") 7377 7378 // Construct the two snapshots. 7379 leaderSnapshot := &roachpb.RaftSnapshotData{ 7380 KV: []roachpb.RaftSnapshotData_KeyValue{ 7381 {Key: []byte("a"), Timestamp: timestamp, Value: value}, 7382 {Key: []byte("abc"), Timestamp: timestamp, Value: value}, 7383 {Key: []byte("abcd"), Timestamp: timestamp, Value: value}, 7384 {Key: []byte("abcde"), Timestamp: timestamp, Value: value}, 7385 // Timestamps sort in descending order, with the notable exception 7386 // of the zero timestamp, which sorts first. 7387 {Key: []byte("abcdefg"), Timestamp: hlc.Timestamp{}, Value: value}, 7388 {Key: []byte("abcdefg"), Timestamp: timestamp, Value: value}, 7389 {Key: []byte("abcdefg"), Timestamp: timestamp.Add(0, -1), Value: value}, 7390 {Key: []byte("abcdefgh"), Timestamp: timestamp, Value: value}, 7391 {Key: []byte("x"), Timestamp: timestamp, Value: value}, 7392 {Key: []byte("y"), Timestamp: timestamp, Value: value}, 7393 // Both 'zeroleft' and 'zeroright' share the version at (1,1), but 7394 // a zero timestamp (=meta) key pair exists on the leader or 7395 // follower, respectively. 7396 {Key: []byte("zeroleft"), Timestamp: hlc.Timestamp{}, Value: value}, 7397 {Key: []byte("zeroleft"), Timestamp: hlc.Timestamp{WallTime: 1, Logical: 1}, Value: value}, 7398 {Key: []byte("zeroright"), Timestamp: hlc.Timestamp{WallTime: 1, Logical: 1}, Value: value}, 7399 }, 7400 } 7401 7402 // No diff works. 7403 if diff := diffRange(leaderSnapshot, leaderSnapshot); diff != nil { 7404 t.Fatalf("diff of equal snapshots = %v", diff) 7405 } 7406 7407 replicaSnapshot := &roachpb.RaftSnapshotData{ 7408 KV: []roachpb.RaftSnapshotData_KeyValue{ 7409 {Key: []byte("ab"), Timestamp: timestamp, Value: value}, 7410 {Key: []byte("abc"), Timestamp: timestamp, Value: value}, 7411 {Key: []byte("abcde"), Timestamp: timestamp, Value: value}, 7412 {Key: []byte("abcdef"), Timestamp: timestamp, Value: value}, 7413 {Key: []byte("abcdefg"), Timestamp: hlc.Timestamp{}, Value: value}, 7414 {Key: []byte("abcdefg"), Timestamp: timestamp.Add(0, 1), Value: value}, 7415 {Key: []byte("abcdefg"), Timestamp: timestamp, Value: value}, 7416 {Key: []byte("abcdefgh"), Timestamp: timestamp, Value: value}, 7417 {Key: []byte("x"), Timestamp: timestamp, Value: []byte("bar")}, 7418 {Key: []byte("z"), Timestamp: timestamp, Value: value}, 7419 {Key: []byte("zeroleft"), Timestamp: hlc.Timestamp{WallTime: 1, Logical: 1}, Value: value}, 7420 {Key: []byte("zeroright"), Timestamp: hlc.Timestamp{}, Value: value}, 7421 {Key: []byte("zeroright"), Timestamp: hlc.Timestamp{WallTime: 1, Logical: 1}, Value: value}, 7422 }, 7423 } 7424 7425 // The expected diff. 7426 eDiff := ReplicaSnapshotDiffSlice{ 7427 {LeaseHolder: true, Key: []byte("a"), Timestamp: timestamp, Value: value}, 7428 {LeaseHolder: false, Key: []byte("ab"), Timestamp: timestamp, Value: value}, 7429 {LeaseHolder: true, Key: []byte("abcd"), Timestamp: timestamp, Value: value}, 7430 {LeaseHolder: false, Key: []byte("abcdef"), Timestamp: timestamp, Value: value}, 7431 {LeaseHolder: false, Key: []byte("abcdefg"), Timestamp: timestamp.Add(0, 1), Value: value}, 7432 {LeaseHolder: true, Key: []byte("abcdefg"), Timestamp: timestamp.Add(0, -1), Value: value}, 7433 {LeaseHolder: true, Key: []byte("x"), Timestamp: timestamp, Value: value}, 7434 {LeaseHolder: false, Key: []byte("x"), Timestamp: timestamp, Value: []byte("bar")}, 7435 {LeaseHolder: true, Key: []byte("y"), Timestamp: timestamp, Value: value}, 7436 {LeaseHolder: false, Key: []byte("z"), Timestamp: timestamp, Value: value}, 7437 {LeaseHolder: true, Key: []byte("zeroleft"), Timestamp: hlc.Timestamp{}, Value: value}, 7438 {LeaseHolder: false, Key: []byte("zeroright"), Timestamp: hlc.Timestamp{}, Value: value}, 7439 } 7440 7441 diff := diffRange(leaderSnapshot, replicaSnapshot) 7442 7443 for i, e := range eDiff { 7444 v := diff[i] 7445 if e.LeaseHolder != v.LeaseHolder || !bytes.Equal(e.Key, v.Key) || e.Timestamp != v.Timestamp || !bytes.Equal(e.Value, v.Value) { 7446 t.Fatalf("diff varies at row %d, want %v and got %v\n\ngot:\n%s\nexpected:\n%s", i, e, v, diff, eDiff) 7447 } 7448 } 7449 7450 // Document the stringifed output. This is what the consistency checker 7451 // will actually print. 7452 stringDiff := append(eDiff[:4], 7453 ReplicaSnapshotDiff{Key: []byte("foo"), Value: value}, 7454 ) 7455 7456 const expDiff = `--- leaseholder 7457 +++ follower 7458 -0.000001729,1 "a" 7459 - ts:1970-01-01 00:00:00.000001729 +0000 UTC 7460 - value:"foo" 7461 - raw mvcc_key/value: 610000000000000006c1000000010d 666f6f 7462 +0.000001729,1 "ab" 7463 + ts:1970-01-01 00:00:00.000001729 +0000 UTC 7464 + value:"foo" 7465 + raw mvcc_key/value: 61620000000000000006c1000000010d 666f6f 7466 -0.000001729,1 "abcd" 7467 - ts:1970-01-01 00:00:00.000001729 +0000 UTC 7468 - value:"foo" 7469 - raw mvcc_key/value: 616263640000000000000006c1000000010d 666f6f 7470 +0.000001729,1 "abcdef" 7471 + ts:1970-01-01 00:00:00.000001729 +0000 UTC 7472 + value:"foo" 7473 + raw mvcc_key/value: 6162636465660000000000000006c1000000010d 666f6f 7474 +0.000000000,0 "foo" 7475 + ts:<zero> 7476 + value:"foo" 7477 + raw mvcc_key/value: 666f6f00 666f6f 7478 ` 7479 7480 if diff := stringDiff.String(); diff != expDiff { 7481 t.Fatalf("expected:\n%s\ngot:\n%s", expDiff, diff) 7482 } 7483 } 7484 7485 func TestSyncSnapshot(t *testing.T) { 7486 defer leaktest.AfterTest(t)() 7487 7488 tsc := TestStoreConfig(nil) 7489 tc := testContext{} 7490 stopper := stop.NewStopper() 7491 defer stopper.Stop(context.Background()) 7492 tc.StartWithStoreConfig(t, stopper, tsc) 7493 7494 // With enough time in BlockingSnapshotDuration, we succeed on the 7495 // first try. 7496 tc.repl.mu.Lock() 7497 snap, err := tc.repl.raftSnapshotLocked() 7498 tc.repl.mu.Unlock() 7499 7500 if err != nil { 7501 t.Fatal(err) 7502 } 7503 if len(snap.Data) != 0 { 7504 t.Fatal("snapshot is not empty") 7505 } 7506 } 7507 7508 func TestReplicaRetryRaftProposal(t *testing.T) { 7509 defer leaktest.AfterTest(t)() 7510 7511 ctx := context.Background() 7512 var tc testContext 7513 stopper := stop.NewStopper() 7514 defer stopper.Stop(context.Background()) 7515 tc.Start(t, stopper) 7516 7517 type magicKey struct{} 7518 7519 var c int32 // updated atomically 7520 var wrongLeaseIndex uint64 // populated below 7521 7522 tc.repl.mu.Lock() 7523 tc.repl.mu.proposalBuf.testing.leaseIndexFilter = func(p *ProposalData) (indexOverride uint64, _ error) { 7524 if v := p.ctx.Value(magicKey{}); v != nil { 7525 if curAttempt := atomic.AddInt32(&c, 1); curAttempt == 1 { 7526 return wrongLeaseIndex, nil 7527 } 7528 } 7529 return 0, nil 7530 } 7531 tc.repl.mu.Unlock() 7532 7533 pArg := putArgs(roachpb.Key("a"), []byte("asd")) 7534 { 7535 var ba roachpb.BatchRequest 7536 ba.Add(&pArg) 7537 ba.Timestamp = tc.Clock().Now() 7538 if _, pErr := tc.Sender().Send(ctx, ba); pErr != nil { 7539 t.Fatal(pErr) 7540 } 7541 } 7542 7543 // Set the max lease index to that of the recently applied write. 7544 // Two requests can't have the same lease applied index. 7545 tc.repl.mu.RLock() 7546 wrongLeaseIndex = tc.repl.mu.state.LeaseAppliedIndex 7547 if wrongLeaseIndex < 1 { 7548 t.Fatal("committed a few batches, but still at lease index zero") 7549 } 7550 tc.repl.mu.RUnlock() 7551 7552 log.Infof(ctx, "test begins") 7553 7554 var ba roachpb.BatchRequest 7555 ba.RangeID = 1 7556 ba.Timestamp = tc.Clock().Now() 7557 const expInc = 123 7558 iArg := incrementArgs(roachpb.Key("b"), expInc) 7559 ba.Add(iArg) 7560 { 7561 _, pErr := tc.repl.executeBatchWithConcurrencyRetries( 7562 context.WithValue(ctx, magicKey{}, "foo"), 7563 &ba, 7564 (*Replica).executeWriteBatch, 7565 ) 7566 if pErr != nil { 7567 t.Fatalf("write batch returned error: %s", pErr) 7568 } 7569 // The command was reproposed internally, for two total proposals. 7570 if exp, act := int32(2), atomic.LoadInt32(&c); exp != act { 7571 t.Fatalf("expected %d proposals, got %d", exp, act) 7572 } 7573 } 7574 7575 // Test LeaseRequest since it's special: MaxLeaseIndex plays no role and so 7576 // there is no re-evaluation of the request. 7577 atomic.StoreInt32(&c, 0) 7578 { 7579 prevLease, _ := tc.repl.GetLease() 7580 ba := ba 7581 ba.Requests = nil 7582 7583 lease := prevLease 7584 lease.Sequence = 0 7585 7586 ba.Add(&roachpb.RequestLeaseRequest{ 7587 RequestHeader: roachpb.RequestHeader{ 7588 Key: tc.repl.Desc().StartKey.AsRawKey(), 7589 }, 7590 Lease: lease, 7591 PrevLease: prevLease, 7592 }) 7593 _, pErr := tc.repl.executeBatchWithConcurrencyRetries( 7594 context.WithValue(ctx, magicKey{}, "foo"), 7595 &ba, 7596 (*Replica).executeWriteBatch, 7597 ) 7598 if pErr != nil { 7599 t.Fatal(pErr) 7600 } 7601 if exp, act := int32(1), atomic.LoadInt32(&c); exp != act { 7602 t.Fatalf("expected %d proposals, got %d", exp, act) 7603 } 7604 } 7605 7606 } 7607 7608 // TestReplicaCancelRaftCommandProgress creates a number of Raft commands and 7609 // immediately abandons some of them, while proposing the remaining ones. It 7610 // then verifies that all the non-abandoned commands get applied (which would 7611 // not be the case if gaps in the applied index posed an issue). 7612 func TestReplicaCancelRaftCommandProgress(t *testing.T) { 7613 defer leaktest.AfterTest(t)() 7614 ctx := context.Background() 7615 stopper := stop.NewStopper() 7616 defer stopper.Stop(ctx) 7617 var tc testContext 7618 tc.Start(t, stopper) 7619 repl := tc.repl 7620 7621 tc.repl.mu.Lock() 7622 lease := *repl.mu.state.Lease 7623 abandoned := make(map[int64]struct{}) // protected by repl.mu 7624 tc.repl.mu.proposalBuf.testing.submitProposalFilter = func(p *ProposalData) (drop bool, _ error) { 7625 if _, ok := abandoned[int64(p.command.MaxLeaseIndex)]; ok { 7626 log.Infof(p.ctx, "abandoning command") 7627 return true, nil 7628 } 7629 return false, nil 7630 } 7631 tc.repl.mu.Unlock() 7632 7633 var chs []chan proposalResult 7634 const num = 10 7635 for i := 0; i < num; i++ { 7636 var ba roachpb.BatchRequest 7637 ba.Timestamp = tc.Clock().Now() 7638 ba.Add(&roachpb.PutRequest{ 7639 RequestHeader: roachpb.RequestHeader{ 7640 Key: roachpb.Key(fmt.Sprintf("k%d", i)), 7641 }, 7642 }) 7643 ch, _, idx, err := repl.evalAndPropose(ctx, &ba, allSpansGuard(), &lease) 7644 if err != nil { 7645 t.Fatal(err) 7646 } 7647 7648 repl.mu.Lock() 7649 if rand.Intn(2) == 0 { 7650 abandoned[idx] = struct{}{} 7651 } else { 7652 chs = append(chs, ch) 7653 } 7654 repl.mu.Unlock() 7655 } 7656 7657 log.Infof(ctx, "waiting on %d chans", len(chs)) 7658 for _, ch := range chs { 7659 if rwe := <-ch; rwe.Err != nil { 7660 t.Fatal(rwe.Err) 7661 } 7662 } 7663 } 7664 7665 // TestReplicaBurstPendingCommandsAndRepropose verifies that a burst of 7666 // proposed commands assigns a correct sequence of required indexes, 7667 // and then goes and checks that a reproposal (without prior proposal) results 7668 // in these commands applying at the computed indexes. 7669 func TestReplicaBurstPendingCommandsAndRepropose(t *testing.T) { 7670 defer leaktest.AfterTest(t)() 7671 7672 var tc testContext 7673 cfg := TestStoreConfig(nil) 7674 // Disable reasonNewLeader and reasonNewLeaderOrConfigChange proposal 7675 // refreshes so that our proposals don't risk being reproposed due to 7676 // Raft leadership instability. 7677 cfg.TestingKnobs.DisableRefreshReasonNewLeader = true 7678 cfg.TestingKnobs.DisableRefreshReasonNewLeaderOrConfigChange = true 7679 stopper := stop.NewStopper() 7680 defer stopper.Stop(context.Background()) 7681 tc.StartWithStoreConfig(t, stopper, cfg) 7682 7683 type magicKey struct{} 7684 ctx := context.WithValue(context.Background(), magicKey{}, "foo") 7685 7686 var seenCmds []int 7687 dropAll := int32(1) 7688 tc.repl.mu.Lock() 7689 tc.repl.mu.proposalBuf.testing.submitProposalFilter = func(p *ProposalData) (drop bool, _ error) { 7690 if atomic.LoadInt32(&dropAll) == 1 { 7691 return true, nil 7692 } 7693 if v := p.ctx.Value(magicKey{}); v != nil { 7694 seenCmds = append(seenCmds, int(p.command.MaxLeaseIndex)) 7695 } 7696 return false, nil 7697 } 7698 lease := *tc.repl.mu.state.Lease 7699 tc.repl.mu.Unlock() 7700 7701 const num = 10 7702 expIndexes := make([]int, 0, num) 7703 chs := make([]chan proposalResult, 0, num) 7704 for i := 0; i < num; i++ { 7705 var ba roachpb.BatchRequest 7706 ba.Timestamp = tc.Clock().Now() 7707 ba.Add(&roachpb.PutRequest{ 7708 RequestHeader: roachpb.RequestHeader{ 7709 Key: roachpb.Key(fmt.Sprintf("k%d", i)), 7710 }, 7711 }) 7712 ch, _, idx, err := tc.repl.evalAndPropose(ctx, &ba, allSpansGuard(), &lease) 7713 if err != nil { 7714 t.Fatal(err) 7715 } 7716 chs = append(chs, ch) 7717 expIndexes = append(expIndexes, int(idx)) 7718 } 7719 7720 tc.repl.mu.Lock() 7721 if err := tc.repl.mu.proposalBuf.flushLocked(); err != nil { 7722 t.Fatal(err) 7723 } 7724 origIndexes := make([]int, 0, num) 7725 for _, p := range tc.repl.mu.proposals { 7726 if v := p.ctx.Value(magicKey{}); v != nil { 7727 origIndexes = append(origIndexes, int(p.command.MaxLeaseIndex)) 7728 } 7729 } 7730 sort.Ints(origIndexes) 7731 tc.repl.mu.Unlock() 7732 7733 if !reflect.DeepEqual(expIndexes, origIndexes) { 7734 t.Fatalf("wanted required indexes %v, got %v", expIndexes, origIndexes) 7735 } 7736 7737 tc.repl.raftMu.Lock() 7738 tc.repl.mu.Lock() 7739 atomic.StoreInt32(&dropAll, 0) 7740 tc.repl.refreshProposalsLocked(ctx, 0 /* refreshAtDelta */, reasonTicks) 7741 if err := tc.repl.mu.proposalBuf.flushLocked(); err != nil { 7742 t.Fatal(err) 7743 } 7744 tc.repl.mu.Unlock() 7745 tc.repl.raftMu.Unlock() 7746 7747 for _, ch := range chs { 7748 if pErr := (<-ch).Err; pErr != nil { 7749 t.Fatal(pErr) 7750 } 7751 } 7752 7753 if !reflect.DeepEqual(seenCmds, expIndexes) { 7754 t.Fatalf("expected indexes %v, got %v", expIndexes, seenCmds) 7755 } 7756 7757 tc.repl.mu.RLock() 7758 defer tc.repl.mu.RUnlock() 7759 if tc.repl.hasPendingProposalsRLocked() { 7760 t.Fatal("still pending commands") 7761 } 7762 lastAssignedIdx := tc.repl.mu.proposalBuf.LastAssignedLeaseIndexRLocked() 7763 curIdx := tc.repl.mu.state.LeaseAppliedIndex 7764 if c := lastAssignedIdx - curIdx; c > 0 { 7765 t.Errorf("no pending cmds, but have required index offset %d", c) 7766 } 7767 } 7768 7769 func TestReplicaRefreshPendingCommandsTicks(t *testing.T) { 7770 defer leaktest.AfterTest(t)() 7771 var tc testContext 7772 cfg := TestStoreConfig(nil) 7773 // Disable ticks which would interfere with the manual ticking in this test. 7774 cfg.RaftTickInterval = math.MaxInt32 7775 stopper := stop.NewStopper() 7776 defer stopper.Stop(context.Background()) 7777 tc.StartWithStoreConfig(t, stopper, cfg) 7778 7779 // Flush a write all the way through the Raft proposal pipeline. This 7780 // ensures that leadership settles down before we start manually submitting 7781 // proposals and that we don't see any unexpected proposal refreshes due to 7782 // reasons like reasonNewLeaderOrConfigChange. 7783 args := incrementArgs([]byte("a"), 1) 7784 if _, pErr := tc.SendWrapped(args); pErr != nil { 7785 t.Fatal(pErr) 7786 } 7787 7788 r := tc.repl 7789 electionTicks := tc.store.cfg.RaftElectionTimeoutTicks 7790 { 7791 // The verifications of the reproposal counts below rely on r.mu.ticks 7792 // starting with a value of 0 (modulo electionTicks). Move the replica into 7793 // that state in case the replica was ticked before we grabbed 7794 // processRaftMu. 7795 r.mu.Lock() 7796 ticks := r.mu.ticks 7797 r.mu.Unlock() 7798 for ; (ticks % electionTicks) != 0; ticks++ { 7799 if _, err := r.tick(nil); err != nil { 7800 t.Fatal(err) 7801 } 7802 } 7803 } 7804 7805 var dropProposals struct { 7806 syncutil.Mutex 7807 m map[*ProposalData]struct{} 7808 } 7809 dropProposals.m = make(map[*ProposalData]struct{}) 7810 7811 r.mu.Lock() 7812 r.mu.proposalBuf.testing.submitProposalFilter = func(p *ProposalData) (drop bool, _ error) { 7813 dropProposals.Lock() 7814 defer dropProposals.Unlock() 7815 _, ok := dropProposals.m[p] 7816 return ok, nil 7817 } 7818 r.mu.Unlock() 7819 7820 // We tick the replica 2*RaftElectionTimeoutTicks. RaftElectionTimeoutTicks 7821 // is special in that it controls how often pending commands are reproposed. 7822 for i := 0; i < 2*electionTicks; i++ { 7823 // Add another pending command on each iteration. 7824 id := fmt.Sprintf("%08d", i) 7825 var ba roachpb.BatchRequest 7826 ba.Timestamp = tc.Clock().Now() 7827 ba.Add(&roachpb.PutRequest{RequestHeader: roachpb.RequestHeader{Key: roachpb.Key(id)}}) 7828 lease, _ := r.GetLease() 7829 ctx := context.Background() 7830 cmd, pErr := r.requestToProposal(ctx, kvserverbase.CmdIDKey(id), &ba, &allSpans) 7831 if pErr != nil { 7832 t.Fatal(pErr) 7833 } 7834 7835 dropProposals.Lock() 7836 dropProposals.m[cmd] = struct{}{} // silently drop proposals 7837 dropProposals.Unlock() 7838 7839 cmd.command.ProposerLeaseSequence = lease.Sequence 7840 if _, pErr := r.propose(ctx, cmd); pErr != nil { 7841 t.Error(pErr) 7842 } 7843 r.mu.Lock() 7844 if err := tc.repl.mu.proposalBuf.flushLocked(); err != nil { 7845 t.Fatal(err) 7846 } 7847 r.mu.Unlock() 7848 7849 // Tick raft. 7850 if _, err := r.tick(nil); err != nil { 7851 t.Fatal(err) 7852 } 7853 7854 r.mu.Lock() 7855 ticks := r.mu.ticks 7856 r.mu.Unlock() 7857 7858 var reproposed []*ProposalData 7859 r.mu.Lock() // avoid data race - proposals belong to the Replica 7860 if err := tc.repl.mu.proposalBuf.flushLocked(); err != nil { 7861 t.Fatal(err) 7862 } 7863 dropProposals.Lock() 7864 for p := range dropProposals.m { 7865 if p.proposedAtTicks >= ticks { 7866 reproposed = append(reproposed, p) 7867 } 7868 } 7869 dropProposals.Unlock() 7870 r.mu.Unlock() 7871 7872 // Reproposals are only performed every electionTicks. We'll need 7873 // to fix this test if that changes. 7874 if (ticks % electionTicks) == 0 { 7875 if len(reproposed) != i-1 { 7876 t.Fatalf("%d: expected %d reproposed commands, but found %d", i, i-1, len(reproposed)) 7877 } 7878 } else { 7879 if len(reproposed) != 0 { 7880 t.Fatalf("%d: expected no reproposed commands, but found %+v", i, reproposed) 7881 } 7882 } 7883 } 7884 } 7885 7886 // TestReplicaRefreshMultiple tests an interaction between refreshing 7887 // proposals after a new leader or ticks (which results in multiple 7888 // copies in the log with the same lease index) and refreshing after 7889 // an illegal lease index error (with a new lease index assigned). 7890 // 7891 // The setup here is rather artificial, but it represents something 7892 // that can happen (very rarely) in the real world with multiple raft 7893 // leadership transfers. 7894 func TestReplicaRefreshMultiple(t *testing.T) { 7895 defer leaktest.AfterTest(t)() 7896 7897 ctx := context.Background() 7898 7899 var filterActive int32 7900 var incCmdID kvserverbase.CmdIDKey 7901 var incApplyCount int64 7902 tsc := TestStoreConfig(nil) 7903 tsc.TestingKnobs.TestingApplyFilter = func(filterArgs kvserverbase.ApplyFilterArgs) (int, *roachpb.Error) { 7904 if atomic.LoadInt32(&filterActive) != 0 && filterArgs.CmdID == incCmdID { 7905 atomic.AddInt64(&incApplyCount, 1) 7906 } 7907 return 0, nil 7908 } 7909 var tc testContext 7910 stopper := stop.NewStopper() 7911 defer stopper.Stop(ctx) 7912 tc.StartWithStoreConfig(t, stopper, tsc) 7913 repl := tc.repl 7914 7915 key := roachpb.Key("a") 7916 7917 // Run a few commands first: This advances the lease index, which is 7918 // necessary for the tricks we're going to play to induce failures 7919 // (we need to be able to subtract from the current lease index 7920 // without going below 0). 7921 for i := 0; i < 3; i++ { 7922 inc := incrementArgs(key, 1) 7923 if _, pErr := kv.SendWrapped(ctx, tc.Sender(), inc); pErr != nil { 7924 t.Fatal(pErr) 7925 } 7926 } 7927 // Sanity check the resulting value. 7928 get := getArgs(key) 7929 if resp, pErr := kv.SendWrapped(ctx, tc.Sender(), &get); pErr != nil { 7930 t.Fatal(pErr) 7931 } else if x, err := resp.(*roachpb.GetResponse).Value.GetInt(); err != nil { 7932 t.Fatalf("returned non-int: %+v", err) 7933 } else if x != 3 { 7934 t.Fatalf("expected 3, got %d", x) 7935 } 7936 7937 // Manually propose another increment. This is the one we'll 7938 // manipulate into failing. (the use of increment here is not 7939 // significant. I originally wrote it this way because I thought the 7940 // non-idempotence of increment would make it easier to test, but 7941 // since the reproposals we're concerned with don't result in 7942 // reevaluation it doesn't matter) 7943 inc := incrementArgs(key, 1) 7944 var ba roachpb.BatchRequest 7945 ba.Add(inc) 7946 ba.Timestamp = tc.Clock().Now() 7947 7948 incCmdID = makeIDKey() 7949 atomic.StoreInt32(&filterActive, 1) 7950 proposal, pErr := repl.requestToProposal(ctx, incCmdID, &ba, &allSpans) 7951 if pErr != nil { 7952 t.Fatal(pErr) 7953 } 7954 // Save this channel; it may get reset to nil before we read from it. 7955 proposalDoneCh := proposal.doneCh 7956 7957 repl.mu.Lock() 7958 ai := repl.mu.state.LeaseAppliedIndex 7959 if ai <= 1 { 7960 // Lease index zero is special in this test because we subtract 7961 // from it below, so we need enough previous proposals in the 7962 // log to ensure it doesn't go negative. 7963 t.Fatalf("test requires LeaseAppliedIndex >= 2 at this point, have %d", ai) 7964 } 7965 assigned := false 7966 repl.mu.proposalBuf.testing.leaseIndexFilter = func(p *ProposalData) (indexOverride uint64, _ error) { 7967 if p == proposal && !assigned { 7968 assigned = true 7969 return ai - 1, nil 7970 } 7971 return 0, nil 7972 } 7973 repl.mu.Unlock() 7974 7975 // Propose the command manually with errors induced. The first time it is 7976 // proposed it will be given the incorrect max lease index which ensures 7977 // that it will generate a retry when it fails. Then call refreshProposals 7978 // twice to repropose it and put it in the logs twice more. 7979 proposal.command.ProposerLeaseSequence = repl.mu.state.Lease.Sequence 7980 if _, pErr := repl.propose(ctx, proposal); pErr != nil { 7981 t.Fatal(pErr) 7982 } 7983 repl.mu.Lock() 7984 if err := tc.repl.mu.proposalBuf.flushLocked(); err != nil { 7985 t.Fatal(err) 7986 } 7987 repl.refreshProposalsLocked(ctx, 0 /* refreshAtDelta */, reasonNewLeader) 7988 repl.refreshProposalsLocked(ctx, 0 /* refreshAtDelta */, reasonNewLeader) 7989 repl.mu.Unlock() 7990 7991 // Wait for our proposal to apply. The two refreshed proposals above 7992 // will fail due to their illegal lease index. Then they'll generate 7993 // a reproposal (in the bug that we're testing against, they'd 7994 // *each* generate a reproposal). When this reproposal succeeds, the 7995 // doneCh is signaled. 7996 select { 7997 case resp := <-proposalDoneCh: 7998 if resp.Err != nil { 7999 t.Fatal(resp.Err) 8000 } 8001 case <-time.After(5 * time.Second): 8002 t.Fatal("timed out") 8003 } 8004 // In the buggy case, there's a second reproposal that we don't have 8005 // a good way to observe, so just sleep to let it apply if it's in 8006 // the system. 8007 time.Sleep(10 * time.Millisecond) 8008 8009 // The command applied exactly once. Note that this check would pass 8010 // even in the buggy case, since illegal lease index proposals do 8011 // not generate reevaluations (and increment is handled upstream of 8012 // raft). 8013 if resp, pErr := kv.SendWrapped(ctx, tc.Sender(), &get); pErr != nil { 8014 t.Fatal(pErr) 8015 } else if x, err := resp.(*roachpb.GetResponse).Value.GetInt(); err != nil { 8016 t.Fatalf("returned non-int: %+v", err) 8017 } else if x != 4 { 8018 t.Fatalf("expected 4, got %d", x) 8019 } 8020 8021 // The real test: our apply filter can tell us whether there was a 8022 // duplicate reproposal. (A reproposed increment isn't harmful, but 8023 // some other commands could be) 8024 if x := atomic.LoadInt64(&incApplyCount); x != 1 { 8025 t.Fatalf("expected 1, got %d", x) 8026 } 8027 } 8028 8029 // TestReplicaReproposalWithNewLeaseIndexError tests an interaction where a 8030 // proposal is rejected beneath raft due an illegal lease index error and then 8031 // hits an error when being reproposed. The expectation is that this error 8032 // manages to make its way back to the client. 8033 func TestReplicaReproposalWithNewLeaseIndexError(t *testing.T) { 8034 defer leaktest.AfterTest(t)() 8035 8036 ctx := context.Background() 8037 var tc testContext 8038 stopper := stop.NewStopper() 8039 defer stopper.Stop(ctx) 8040 tc.Start(t, stopper) 8041 8042 type magicKey struct{} 8043 magicCtx := context.WithValue(ctx, magicKey{}, "foo") 8044 8045 var c int32 // updated atomically 8046 tc.repl.mu.Lock() 8047 tc.repl.mu.proposalBuf.testing.leaseIndexFilter = func(p *ProposalData) (indexOverride uint64, _ error) { 8048 if v := p.ctx.Value(magicKey{}); v != nil { 8049 curAttempt := atomic.AddInt32(&c, 1) 8050 switch curAttempt { 8051 case 1: 8052 // This is the first time the command is being given a max lease 8053 // applied index. Set the index to that of the recently applied 8054 // write. Two requests can't have the same lease applied index, 8055 // so this will cause it to be rejected beneath raft with an 8056 // illegal lease index error. 8057 wrongLeaseIndex := uint64(1) 8058 return wrongLeaseIndex, nil 8059 case 2: 8060 // This is the second time the command is being given a max 8061 // lease applied index, which should be after the command was 8062 // rejected beneath raft. Return an error. We expect this error 8063 // to propagate up through tryReproposeWithNewLeaseIndex and 8064 // make it back to the client. 8065 return 0, errors.New("boom") 8066 default: 8067 // Unexpected. Asserted against below. 8068 return 0, nil 8069 } 8070 } 8071 return 0, nil 8072 } 8073 tc.repl.mu.Unlock() 8074 8075 // Perform a few writes to advance the lease applied index. 8076 const initCount = 3 8077 key := roachpb.Key("a") 8078 for i := 0; i < initCount; i++ { 8079 iArg := incrementArgs(key, 1) 8080 if _, pErr := tc.SendWrapped(iArg); pErr != nil { 8081 t.Fatal(pErr) 8082 } 8083 } 8084 8085 // Perform a write that will first hit an illegal lease index error and 8086 // will then hit the injected error when we attempt to repropose it. 8087 var ba roachpb.BatchRequest 8088 iArg := incrementArgs(key, 10) 8089 ba.Add(iArg) 8090 if _, pErr := tc.Sender().Send(magicCtx, ba); pErr == nil { 8091 t.Fatal("expected a non-nil error") 8092 } else if !testutils.IsPError(pErr, "boom") { 8093 t.Fatalf("unexpected error: %v", pErr) 8094 } 8095 // The command should have picked a new max lease index exactly twice. 8096 if exp, act := int32(2), atomic.LoadInt32(&c); exp != act { 8097 t.Fatalf("expected %d proposals, got %d", exp, act) 8098 } 8099 8100 // The command should not have applied. 8101 gArgs := getArgs(key) 8102 if reply, pErr := tc.SendWrapped(&gArgs); pErr != nil { 8103 t.Fatal(pErr) 8104 } else if v, err := reply.(*roachpb.GetResponse).Value.GetInt(); err != nil { 8105 t.Fatal(err) 8106 } else if v != initCount { 8107 t.Fatalf("expected value of %d, found %d", initCount, v) 8108 } 8109 } 8110 8111 // TestGCWithoutThreshold validates that GCRequest only declares the threshold 8112 // key if it is subject to change, and that it does not access this key if it 8113 // does not declare them. 8114 func TestGCWithoutThreshold(t *testing.T) { 8115 defer leaktest.AfterTest(t)() 8116 8117 desc := &roachpb.RangeDescriptor{StartKey: roachpb.RKey("a"), EndKey: roachpb.RKey("z")} 8118 ctx := context.Background() 8119 8120 tc := &testContext{} 8121 stopper := stop.NewStopper() 8122 defer stopper.Stop(ctx) 8123 tc.Start(t, stopper) 8124 8125 for _, keyThresh := range []hlc.Timestamp{{}, {Logical: 1}} { 8126 t.Run(fmt.Sprintf("thresh=%s", keyThresh), func(t *testing.T) { 8127 var gc roachpb.GCRequest 8128 var spans spanset.SpanSet 8129 8130 gc.Threshold = keyThresh 8131 cmd, _ := batcheval.LookupCommand(roachpb.GC) 8132 cmd.DeclareKeys(desc, roachpb.Header{RangeID: tc.repl.RangeID}, &gc, &spans, nil) 8133 8134 expSpans := 1 8135 if !keyThresh.IsEmpty() { 8136 expSpans++ 8137 } 8138 if numSpans := spans.Len(); numSpans != expSpans { 8139 t.Fatalf("expected %d declared keys, found %d", expSpans, numSpans) 8140 } 8141 8142 eng := storage.NewDefaultInMem() 8143 defer eng.Close() 8144 8145 batch := eng.NewBatch() 8146 defer batch.Close() 8147 rw := spanset.NewBatch(batch, &spans) 8148 8149 var resp roachpb.GCResponse 8150 8151 if _, err := batcheval.GC(ctx, rw, batcheval.CommandArgs{ 8152 Args: &gc, 8153 EvalCtx: NewReplicaEvalContext(tc.repl, &spans), 8154 }, &resp); err != nil { 8155 t.Fatal(err) 8156 } 8157 }) 8158 } 8159 } 8160 8161 // Test that, if the Raft command resulting from EndTxn request fails to be 8162 // processed/apply, then the LocalResult associated with that command is 8163 // cleared. 8164 func TestFailureToProcessCommandClearsLocalResult(t *testing.T) { 8165 defer leaktest.AfterTest(t)() 8166 ctx := context.Background() 8167 var tc testContext 8168 cfg := TestStoreConfig(nil) 8169 stopper := stop.NewStopper() 8170 defer stopper.Stop(ctx) 8171 tc.StartWithStoreConfig(t, stopper, cfg) 8172 8173 key := roachpb.Key("a") 8174 txn := newTransaction("test", key, 1, tc.Clock()) 8175 8176 var ba roachpb.BatchRequest 8177 ba.Header = roachpb.Header{Txn: txn} 8178 put := putArgs(key, []byte("value")) 8179 assignSeqNumsForReqs(txn, &put) 8180 ba.Add(&put) 8181 if _, err := tc.Sender().Send(ctx, ba); err != nil { 8182 t.Fatal(err) 8183 } 8184 8185 var proposalRecognized int64 // accessed atomically 8186 8187 r := tc.repl 8188 r.mu.Lock() 8189 r.mu.proposalBuf.testing.leaseIndexFilter = func(p *ProposalData) (indexOverride uint64, _ error) { 8190 // We're going to recognize the first time the commnand for the EndTxn 8191 // is proposed and we're going to hackily decrease its MaxLeaseIndex, so 8192 // that the processing gets rejected further on. 8193 ut := p.Local.UpdatedTxns 8194 if atomic.LoadInt64(&proposalRecognized) == 0 && ut != nil && len(ut) == 1 && ut[0].ID == txn.ID { 8195 atomic.StoreInt64(&proposalRecognized, 1) 8196 return p.command.MaxLeaseIndex - 1, nil 8197 } 8198 return 0, nil 8199 } 8200 r.mu.Unlock() 8201 8202 opCtx, collect, cancel := tracing.ContextWithRecordingSpan(ctx, "test-recording") 8203 defer cancel() 8204 8205 ba = roachpb.BatchRequest{} 8206 et, etH := endTxnArgs(txn, true /* commit */) 8207 et.LockSpans = []roachpb.Span{{Key: key}} 8208 assignSeqNumsForReqs(txn, &et) 8209 ba.Header = etH 8210 ba.Add(&et) 8211 if _, err := tc.Sender().Send(opCtx, ba); err != nil { 8212 t.Fatal(err) 8213 } 8214 formatted := collect().String() 8215 if err := testutils.MatchInOrder(formatted, 8216 // The first proposal is rejected. 8217 "retry proposal.*applied at lease index.*but required", 8218 // The request will be re-evaluated. 8219 "retry: proposalIllegalLeaseIndex", 8220 // The LocalResult is nil. This is the important part for this test. 8221 "LocalResult: nil", 8222 // Re-evaluation succeeds and one txn is to be updated. 8223 "LocalResult \\(reply.*#updated txns: 1", 8224 ); err != nil { 8225 t.Fatal(err) 8226 } 8227 } 8228 8229 // TestCommandTimeThreshold verifies that commands outside the replica GC 8230 // threshold fail. 8231 func TestCommandTimeThreshold(t *testing.T) { 8232 defer leaktest.AfterTest(t)() 8233 tc := testContext{} 8234 stopper := stop.NewStopper() 8235 defer stopper.Stop(context.Background()) 8236 tc.Start(t, stopper) 8237 8238 now := tc.Clock().Now() 8239 ts1 := now.Add(1, 0) 8240 ts2 := now.Add(2, 0) 8241 ts3 := now.Add(3, 0) 8242 8243 key := roachpb.Key("a") 8244 keycp := roachpb.Key("c") 8245 8246 va := []byte("a") 8247 vb := []byte("b") 8248 8249 // Verify a Get works. 8250 gArgs := getArgs(key) 8251 if _, err := tc.SendWrappedWith(roachpb.Header{ 8252 Timestamp: ts1, 8253 }, &gArgs); err != nil { 8254 t.Fatalf("could not get data: %+v", err) 8255 } 8256 // Verify a later Get works. 8257 if _, err := tc.SendWrappedWith(roachpb.Header{ 8258 Timestamp: ts3, 8259 }, &gArgs); err != nil { 8260 t.Fatalf("could not get data: %+v", err) 8261 } 8262 8263 // Put some data for use with CP later on. 8264 pArgs := putArgs(keycp, va) 8265 if _, err := tc.SendWrappedWith(roachpb.Header{ 8266 Timestamp: ts1, 8267 }, &pArgs); err != nil { 8268 t.Fatalf("could not put data: %+v", err) 8269 } 8270 8271 // Do a GC. 8272 gcr := roachpb.GCRequest{ 8273 Threshold: ts2, 8274 } 8275 if _, err := tc.SendWrappedWith(roachpb.Header{RangeID: 1}, &gcr); err != nil { 8276 t.Fatal(err) 8277 } 8278 8279 // Do the same Get, which should now fail. 8280 if _, pErr := tc.SendWrappedWith(roachpb.Header{ 8281 Timestamp: ts1, 8282 }, &gArgs); !testutils.IsPError(pErr, `batch timestamp 0.\d+,\d+ must be after replica GC threshold 0.\d+,\d+`) { 8283 t.Fatalf("unexpected error: %v", pErr) 8284 } 8285 8286 // Verify a later Get works. 8287 if _, pErr := tc.SendWrappedWith(roachpb.Header{ 8288 Timestamp: ts3, 8289 }, &gArgs); pErr != nil { 8290 t.Fatal(pErr) 8291 } 8292 8293 // Verify an early CPut fails. 8294 cpArgs := cPutArgs(keycp, vb, va) 8295 if _, pErr := tc.SendWrappedWith(roachpb.Header{ 8296 Timestamp: ts2, 8297 }, &cpArgs); !testutils.IsPError(pErr, `batch timestamp 0.\d+,\d+ must be after replica GC threshold 0.\d+,\d+`) { 8298 t.Fatalf("unexpected error: %v", pErr) 8299 } 8300 // Verify a later CPut works. 8301 if _, pErr := tc.SendWrappedWith(roachpb.Header{ 8302 Timestamp: ts3, 8303 }, &cpArgs); pErr != nil { 8304 t.Fatal(pErr) 8305 } 8306 } 8307 8308 func TestReplicaTimestampCacheBumpNotLost(t *testing.T) { 8309 defer leaktest.AfterTest(t)() 8310 8311 tc := testContext{} 8312 stopper := stop.NewStopper() 8313 defer stopper.Stop(context.Background()) 8314 tc.Start(t, stopper) 8315 8316 ctx := tc.store.AnnotateCtx(context.Background()) 8317 key := keys.LocalMax 8318 8319 txn := newTransaction("test", key, 1, tc.Clock()) 8320 8321 minNewTS := func() hlc.Timestamp { 8322 var ba roachpb.BatchRequest 8323 scan := scanArgs(key, tc.repl.Desc().EndKey.AsRawKey()) 8324 ba.Add(scan) 8325 8326 resp, pErr := tc.Sender().Send(ctx, ba) 8327 if pErr != nil { 8328 t.Fatal(pErr) 8329 } 8330 if resp.Timestamp.LessEq(txn.WriteTimestamp) { 8331 t.Fatalf("expected txn ts %s < scan TS %s", txn.WriteTimestamp, resp.Timestamp) 8332 } 8333 return resp.Timestamp 8334 }() 8335 8336 var ba roachpb.BatchRequest 8337 ba.Txn = txn 8338 txnPut := putArgs(key, []byte("timestamp should be bumped")) 8339 ba.Add(&txnPut) 8340 8341 assignSeqNumsForReqs(txn, &txnPut) 8342 origTxn := txn.Clone() 8343 8344 resp, pErr := tc.Sender().Send(ctx, ba) 8345 if pErr != nil { 8346 t.Fatal(pErr) 8347 } 8348 8349 if !reflect.DeepEqual(origTxn, txn) { 8350 t.Fatalf( 8351 "original transaction proto was mutated: %s", 8352 pretty.Diff(origTxn, txn), 8353 ) 8354 } 8355 if resp.Txn == nil { 8356 t.Fatal("no transaction in response") 8357 } else if resp.Txn.WriteTimestamp.Less(minNewTS) { 8358 t.Fatalf( 8359 "expected txn ts bumped at least to %s, but got %s", 8360 minNewTS, txn.WriteTimestamp, 8361 ) 8362 } 8363 } 8364 8365 func TestReplicaEvaluationNotTxnMutation(t *testing.T) { 8366 defer leaktest.AfterTest(t)() 8367 8368 tc := testContext{} 8369 stopper := stop.NewStopper() 8370 defer stopper.Stop(context.Background()) 8371 tc.Start(t, stopper) 8372 8373 ctx := tc.repl.AnnotateCtx(context.Background()) 8374 key := keys.LocalMax 8375 8376 txn := newTransaction("test", key, 1, tc.Clock()) 8377 8378 var ba roachpb.BatchRequest 8379 ba.Txn = txn 8380 ba.Timestamp = txn.WriteTimestamp 8381 txnPut := putArgs(key, []byte("foo")) 8382 txnPut2 := txnPut 8383 // Add two puts (the second one gets Sequence 2, which was a failure mode 8384 // observed when this test was written and the failure fixed). Originally 8385 // observed in #10137, where this became relevant (before that, evaluation 8386 // happened downstream of Raft, so a serialization pass always took place). 8387 ba.Add(&txnPut) 8388 ba.Add(&txnPut2) 8389 assignSeqNumsForReqs(txn, &txnPut, &txnPut2) 8390 origTxn := txn.Clone() 8391 8392 batch, _, _, _, pErr := tc.repl.evaluateWriteBatch(ctx, makeIDKey(), &ba, &allSpans) 8393 defer batch.Close() 8394 if pErr != nil { 8395 t.Fatal(pErr) 8396 } 8397 if !reflect.DeepEqual(origTxn, txn) { 8398 t.Fatalf("transaction was mutated during evaluation: %s", pretty.Diff(origTxn, txn)) 8399 } 8400 } 8401 8402 // TODO(peter): Test replicaMetrics.leaseholder. 8403 func TestReplicaMetrics(t *testing.T) { 8404 defer leaktest.AfterTest(t)() 8405 8406 progress := func(vals ...uint64) map[uint64]tracker.Progress { 8407 m := make(map[uint64]tracker.Progress) 8408 for i, v := range vals { 8409 m[uint64(i+1)] = tracker.Progress{Match: v} 8410 } 8411 return m 8412 } 8413 status := func(lead uint64, progress map[uint64]tracker.Progress) *raft.Status { 8414 status := &raft.Status{ 8415 Progress: progress, 8416 } 8417 // The commit index is set so that a progress.Match value of 1 is behind 8418 // and 2 is ok. 8419 status.HardState.Commit = 12 8420 if lead == 1 { 8421 status.SoftState.RaftState = raft.StateLeader 8422 } else { 8423 status.SoftState.RaftState = raft.StateFollower 8424 } 8425 status.SoftState.Lead = lead 8426 return status 8427 } 8428 desc := func(ids ...int) roachpb.RangeDescriptor { 8429 var d roachpb.RangeDescriptor 8430 for i, id := range ids { 8431 d.InternalReplicas = append(d.InternalReplicas, roachpb.ReplicaDescriptor{ 8432 ReplicaID: roachpb.ReplicaID(i + 1), 8433 StoreID: roachpb.StoreID(id), 8434 NodeID: roachpb.NodeID(id), 8435 }) 8436 } 8437 return d 8438 } 8439 live := func(ids ...roachpb.NodeID) IsLiveMap { 8440 m := IsLiveMap{} 8441 for _, id := range ids { 8442 m[id] = IsLiveMapEntry{IsLive: true} 8443 } 8444 return m 8445 } 8446 8447 var tc testContext 8448 stopper := stop.NewStopper() 8449 defer stopper.Stop(context.Background()) 8450 cfg := TestStoreConfig(nil) 8451 tc.StartWithStoreConfig(t, stopper, cfg) 8452 8453 testCases := []struct { 8454 replicas int32 8455 storeID roachpb.StoreID 8456 desc roachpb.RangeDescriptor 8457 raftStatus *raft.Status 8458 liveness IsLiveMap 8459 raftLogSize int64 8460 expected ReplicaMetrics 8461 }{ 8462 // The leader of a 1-replica range is up. 8463 {1, 1, desc(1), status(1, progress(2)), live(1), 0, 8464 ReplicaMetrics{ 8465 Leader: true, 8466 RangeCounter: true, 8467 Unavailable: false, 8468 Underreplicated: false, 8469 BehindCount: 10, 8470 }}, 8471 // The leader of a 2-replica range is up (only 1 replica present). 8472 {2, 1, desc(1), status(1, progress(2)), live(1), 0, 8473 ReplicaMetrics{ 8474 Leader: true, 8475 RangeCounter: true, 8476 Unavailable: false, 8477 Underreplicated: true, 8478 BehindCount: 10, 8479 }}, 8480 // The leader of a 2-replica range is up. 8481 {2, 1, desc(1, 2), status(1, progress(2)), live(1), 0, 8482 ReplicaMetrics{ 8483 Leader: true, 8484 RangeCounter: true, 8485 Unavailable: true, 8486 Underreplicated: true, 8487 BehindCount: 10, 8488 }}, 8489 // Both replicas of a 2-replica range are up to date. 8490 {2, 1, desc(1, 2), status(1, progress(2, 2)), live(1, 2), 0, 8491 ReplicaMetrics{ 8492 Leader: true, 8493 RangeCounter: true, 8494 Unavailable: false, 8495 Underreplicated: false, 8496 BehindCount: 20, 8497 }}, 8498 // Both replicas of a 2-replica range are up to date (local replica is not leader) 8499 {2, 2, desc(1, 2), status(2, progress(2, 2)), live(1, 2), 0, 8500 ReplicaMetrics{ 8501 Leader: false, 8502 RangeCounter: false, 8503 Unavailable: false, 8504 Underreplicated: false, 8505 }}, 8506 // Both replicas of a 2-replica range are live, but follower is behind. 8507 {2, 1, desc(1, 2), status(1, progress(2, 1)), live(1, 2), 0, 8508 ReplicaMetrics{ 8509 Leader: true, 8510 RangeCounter: true, 8511 Unavailable: false, 8512 Underreplicated: false, 8513 BehindCount: 21, 8514 }}, 8515 // Both replicas of a 2-replica range are up to date, but follower is dead. 8516 {2, 1, desc(1, 2), status(1, progress(2, 2)), live(1), 0, 8517 ReplicaMetrics{ 8518 Leader: true, 8519 RangeCounter: true, 8520 Unavailable: true, 8521 Underreplicated: true, 8522 BehindCount: 20, 8523 }}, 8524 // The leader of a 3-replica range is up. 8525 {3, 1, desc(1, 2, 3), status(1, progress(1)), live(1), 0, 8526 ReplicaMetrics{ 8527 Leader: true, 8528 RangeCounter: true, 8529 Unavailable: true, 8530 Underreplicated: true, 8531 BehindCount: 11, 8532 }}, 8533 // All replicas of a 3-replica range are up to date. 8534 {3, 1, desc(1, 2, 3), status(1, progress(2, 2, 2)), live(1, 2, 3), 0, 8535 ReplicaMetrics{ 8536 Leader: true, 8537 RangeCounter: true, 8538 Unavailable: false, 8539 Underreplicated: false, 8540 BehindCount: 30, 8541 }}, 8542 // All replicas of a 3-replica range are up to date (match = 0 is 8543 // considered up to date). 8544 {3, 1, desc(1, 2, 3), status(1, progress(2, 2, 0)), live(1, 2, 3), 0, 8545 ReplicaMetrics{ 8546 Leader: true, 8547 RangeCounter: true, 8548 Unavailable: false, 8549 Underreplicated: false, 8550 BehindCount: 20, 8551 }}, 8552 // All replicas of a 3-replica range are live but one replica is behind. 8553 {3, 1, desc(1, 2, 3), status(1, progress(2, 2, 1)), live(1, 2, 3), 0, 8554 ReplicaMetrics{ 8555 Leader: true, 8556 RangeCounter: true, 8557 Unavailable: false, 8558 Underreplicated: false, 8559 BehindCount: 31, 8560 }}, 8561 // All replicas of a 3-replica range are live but two replicas are behind. 8562 {3, 1, desc(1, 2, 3), status(1, progress(2, 1, 1)), live(1, 2, 3), 0, 8563 ReplicaMetrics{ 8564 Leader: true, 8565 RangeCounter: true, 8566 Unavailable: false, 8567 Underreplicated: false, 8568 BehindCount: 32, 8569 }}, 8570 // All replicas of a 3-replica range are up to date, but one replica is dead. 8571 {3, 1, desc(1, 2, 3), status(1, progress(2, 2, 2)), live(1, 2), 0, 8572 ReplicaMetrics{ 8573 Leader: true, 8574 RangeCounter: true, 8575 Unavailable: false, 8576 Underreplicated: true, 8577 BehindCount: 30, 8578 }}, 8579 // All replicas of a 3-replica range are up to date, but two replicas are dead. 8580 {3, 1, desc(1, 2, 3), status(1, progress(2, 2, 2)), live(1), 0, 8581 ReplicaMetrics{ 8582 Leader: true, 8583 RangeCounter: true, 8584 Unavailable: true, 8585 Underreplicated: true, 8586 BehindCount: 30, 8587 }}, 8588 // All replicas of a 3-replica range are up to date, but two replicas are 8589 // dead, including the leader. 8590 {3, 2, desc(1, 2, 3), status(0, progress(2, 2, 2)), live(2), 0, 8591 ReplicaMetrics{ 8592 Leader: false, 8593 RangeCounter: true, 8594 Unavailable: true, 8595 Underreplicated: true, 8596 BehindCount: 0, 8597 }}, 8598 // Range has no leader, local replica is the range counter. 8599 {3, 1, desc(1, 2, 3), status(0, progress(2, 2, 2)), live(1, 2, 3), 0, 8600 ReplicaMetrics{ 8601 Leader: false, 8602 RangeCounter: true, 8603 Unavailable: false, 8604 Underreplicated: false, 8605 }}, 8606 // Range has no leader, local replica is the range counter. 8607 {3, 3, desc(3, 2, 1), status(0, progress(2, 2, 2)), live(1, 2, 3), 0, 8608 ReplicaMetrics{ 8609 Leader: false, 8610 RangeCounter: true, 8611 Unavailable: false, 8612 Underreplicated: false, 8613 }}, 8614 // Range has no leader, local replica is not the range counter. 8615 {3, 2, desc(1, 2, 3), status(0, progress(2, 2, 2)), live(1, 2, 3), 0, 8616 ReplicaMetrics{ 8617 Leader: false, 8618 RangeCounter: false, 8619 Unavailable: false, 8620 Underreplicated: false, 8621 }}, 8622 // Range has no leader, local replica is not the range counter. 8623 {3, 3, desc(1, 2, 3), status(0, progress(2, 2, 2)), live(1, 2, 3), 0, 8624 ReplicaMetrics{ 8625 Leader: false, 8626 RangeCounter: false, 8627 Unavailable: false, 8628 Underreplicated: false, 8629 }}, 8630 // The leader of a 1-replica range is up and raft log is too large. 8631 {1, 1, desc(1), status(1, progress(2)), live(1), 5 * cfg.RaftLogTruncationThreshold, 8632 ReplicaMetrics{ 8633 Leader: true, 8634 RangeCounter: true, 8635 Unavailable: false, 8636 Underreplicated: false, 8637 BehindCount: 10, 8638 RaftLogTooLarge: true, 8639 }}, 8640 } 8641 8642 for i, c := range testCases { 8643 t.Run("", func(t *testing.T) { 8644 zoneConfig := protoutil.Clone(cfg.DefaultZoneConfig).(*zonepb.ZoneConfig) 8645 zoneConfig.NumReplicas = proto.Int32(c.replicas) 8646 8647 // Alternate between quiescent and non-quiescent replicas to test the 8648 // quiescent metric. 8649 c.expected.Quiescent = i%2 == 0 8650 c.expected.Ticking = !c.expected.Quiescent 8651 metrics := calcReplicaMetrics( 8652 context.Background(), hlc.Timestamp{}, &cfg.RaftConfig, zoneConfig, 8653 c.liveness, 0, &c.desc, c.raftStatus, kvserverpb.LeaseStatus{}, 8654 c.storeID, c.expected.Quiescent, c.expected.Ticking, 8655 kvserverpb.LatchManagerInfo{}, kvserverpb.LatchManagerInfo{}, c.raftLogSize, true) 8656 if c.expected != metrics { 8657 t.Fatalf("unexpected metrics:\n%s", pretty.Diff(c.expected, metrics)) 8658 } 8659 }) 8660 } 8661 } 8662 8663 // TestCancelPendingCommands verifies that cancelPendingCommands sends 8664 // an error to each command awaiting execution. 8665 func TestCancelPendingCommands(t *testing.T) { 8666 defer leaktest.AfterTest(t)() 8667 8668 ctx := context.Background() 8669 tc := testContext{} 8670 stopper := stop.NewStopper() 8671 defer stopper.Stop(ctx) 8672 tc.Start(t, stopper) 8673 8674 // Install a proposal function which drops all increment commands on 8675 // the floor (so the command remains "pending" until we cancel it). 8676 proposalDroppedCh := make(chan struct{}) 8677 proposalDropped := false 8678 tc.repl.mu.Lock() 8679 tc.repl.mu.proposalBuf.testing.submitProposalFilter = func(p *ProposalData) (drop bool, _ error) { 8680 if _, ok := p.Request.GetArg(roachpb.Increment); ok { 8681 if !proposalDropped { 8682 // Notify the main thread the first time we drop a proposal. 8683 close(proposalDroppedCh) 8684 proposalDropped = true 8685 } 8686 return true, nil 8687 } 8688 return false, nil 8689 } 8690 tc.repl.mu.Unlock() 8691 8692 errChan := make(chan *roachpb.Error, 1) 8693 go func() { 8694 incArgs := incrementArgs(roachpb.Key("a"), 1) 8695 _, pErr := kv.SendWrapped(ctx, tc.Sender(), incArgs) 8696 errChan <- pErr 8697 }() 8698 8699 <-proposalDroppedCh 8700 8701 select { 8702 case pErr := <-errChan: 8703 t.Fatalf("command finished earlier than expected with error %v", pErr) 8704 default: 8705 } 8706 tc.repl.raftMu.Lock() 8707 tc.repl.disconnectReplicationRaftMuLocked(ctx) 8708 tc.repl.raftMu.Unlock() 8709 pErr := <-errChan 8710 if _, ok := pErr.GetDetail().(*roachpb.AmbiguousResultError); !ok { 8711 t.Errorf("expected AmbiguousResultError, got %v", pErr) 8712 } 8713 } 8714 8715 // TestProposalNoop verifies that batches that result in no-ops do not 8716 // get proposed through Raft and wait for consensus before returning to 8717 // the client. 8718 func TestNoopRequestsNotProposed(t *testing.T) { 8719 defer leaktest.AfterTest(t)() 8720 8721 cfg := TestStoreConfig(nil) 8722 cfg.TestingKnobs.DontRetryPushTxnFailures = true 8723 rh := roachpb.RequestHeader{Key: roachpb.Key("a")} 8724 txn := newTransaction( 8725 "name", 8726 rh.Key, 8727 roachpb.NormalUserPriority, 8728 cfg.Clock, 8729 ) 8730 8731 getReq := &roachpb.GetRequest{ 8732 RequestHeader: rh, 8733 } 8734 putReq := &roachpb.PutRequest{ 8735 RequestHeader: rh, 8736 Value: roachpb.MakeValueFromBytes([]byte("val")), 8737 } 8738 deleteReq := &roachpb.DeleteRequest{ 8739 RequestHeader: rh, 8740 } 8741 hbTxnReq := &roachpb.HeartbeatTxnRequest{ 8742 RequestHeader: rh, 8743 Now: cfg.Clock.Now(), 8744 } 8745 pushTxnReq := &roachpb.PushTxnRequest{ 8746 RequestHeader: roachpb.RequestHeader{ 8747 Key: txn.TxnMeta.Key, 8748 }, 8749 PusheeTxn: txn.TxnMeta, 8750 PushType: roachpb.PUSH_ABORT, 8751 Force: true, 8752 } 8753 resolveCommittedIntentReq := &roachpb.ResolveIntentRequest{ 8754 RequestHeader: rh, 8755 IntentTxn: txn.TxnMeta, 8756 Status: roachpb.COMMITTED, 8757 Poison: false, 8758 } 8759 resolveAbortedIntentReq := &roachpb.ResolveIntentRequest{ 8760 RequestHeader: rh, 8761 IntentTxn: txn.TxnMeta, 8762 Status: roachpb.ABORTED, 8763 Poison: true, 8764 } 8765 8766 sendReq := func( 8767 ctx context.Context, repl *Replica, req roachpb.Request, txn *roachpb.Transaction, 8768 ) *roachpb.Error { 8769 var ba roachpb.BatchRequest 8770 ba.Header.RangeID = repl.RangeID 8771 ba.Add(req) 8772 ba.Txn = txn 8773 if err := ba.SetActiveTimestamp(repl.Clock().Now); err != nil { 8774 t.Fatal(err) 8775 } 8776 _, pErr := repl.Send(ctx, ba) 8777 return pErr 8778 } 8779 8780 testCases := []struct { 8781 name string 8782 setup func(context.Context, *Replica) *roachpb.Error // optional 8783 useTxn bool 8784 req roachpb.Request 8785 expFailure string // regexp pattern to match on error if not empty 8786 expProposal bool 8787 }{ 8788 { 8789 name: "get req", 8790 req: getReq, 8791 expProposal: false, 8792 }, 8793 { 8794 name: "put req", 8795 req: putReq, 8796 expProposal: true, 8797 }, 8798 { 8799 name: "delete req", 8800 req: deleteReq, 8801 // NB: a tombstone is written even if no value exists at the key. 8802 expProposal: true, 8803 }, 8804 { 8805 name: "get req in txn", 8806 useTxn: true, 8807 req: getReq, 8808 expProposal: false, 8809 }, 8810 { 8811 name: "put req in txn", 8812 useTxn: true, 8813 req: putReq, 8814 expProposal: true, 8815 }, 8816 { 8817 name: "delete req in txn", 8818 useTxn: true, 8819 req: deleteReq, 8820 // NB: a tombstone intent is written even if no value exists at the key. 8821 expProposal: true, 8822 }, 8823 { 8824 name: "push txn req", 8825 setup: func(ctx context.Context, repl *Replica) *roachpb.Error { 8826 return sendReq(ctx, repl, hbTxnReq, txn) 8827 }, 8828 req: pushTxnReq, 8829 expProposal: true, 8830 }, 8831 { 8832 name: "redundant push txn req", 8833 setup: func(ctx context.Context, repl *Replica) *roachpb.Error { 8834 if pErr := sendReq(ctx, repl, hbTxnReq, txn); pErr != nil { 8835 return pErr 8836 } 8837 return sendReq(ctx, repl, pushTxnReq, nil /* txn */) 8838 }, 8839 req: pushTxnReq, 8840 // No-op - the transaction has already been pushed successfully. 8841 expProposal: false, 8842 }, 8843 { 8844 name: "resolve committed intent req, with intent", 8845 setup: func(ctx context.Context, repl *Replica) *roachpb.Error { 8846 return sendReq(ctx, repl, putReq, txn) 8847 }, 8848 req: resolveCommittedIntentReq, 8849 expProposal: true, 8850 }, 8851 { 8852 name: "resolve committed intent req, without intent", 8853 req: resolveCommittedIntentReq, 8854 // No-op - the intent is missing. 8855 expProposal: false, 8856 }, 8857 { 8858 name: "resolve aborted intent req, with intent", 8859 setup: func(ctx context.Context, repl *Replica) *roachpb.Error { 8860 return sendReq(ctx, repl, putReq, txn) 8861 }, 8862 req: resolveAbortedIntentReq, 8863 // Not a no-op - the request needs to poison the abort span. 8864 expProposal: true, 8865 }, 8866 { 8867 name: "resolve aborted intent req, without intent", 8868 req: resolveAbortedIntentReq, 8869 // No-op - the intent is missing, so there's nothing to resolve. 8870 // This also means that the abort span isn't written. 8871 expProposal: false, 8872 }, 8873 { 8874 name: "redundant resolve aborted intent req", 8875 setup: func(ctx context.Context, repl *Replica) *roachpb.Error { 8876 return sendReq(ctx, repl, resolveAbortedIntentReq, nil /* txn */) 8877 }, 8878 req: resolveAbortedIntentReq, 8879 // No-op - the abort span has already been poisoned. 8880 expProposal: false, 8881 }, 8882 } 8883 for _, c := range testCases { 8884 t.Run(c.name, func(t *testing.T) { 8885 ctx := context.Background() 8886 stopper := stop.NewStopper() 8887 defer stopper.Stop(ctx) 8888 8889 tc := testContext{} 8890 tc.StartWithStoreConfig(t, stopper, cfg) 8891 repl := tc.repl 8892 8893 // Update the transaction's timestamps so that it 8894 // doesn't run into issues with the new cluster. 8895 now := tc.Clock().Now() 8896 txn.WriteTimestamp = now 8897 txn.MinTimestamp = now 8898 txn.ReadTimestamp = now 8899 8900 if c.setup != nil { 8901 if pErr := c.setup(ctx, repl); pErr != nil { 8902 t.Fatalf("test setup failed: %v", pErr) 8903 } 8904 } 8905 8906 var propCount int32 8907 markerTS := tc.Clock().Now() 8908 repl.mu.Lock() 8909 repl.store.TestingKnobs().TestingProposalFilter = 8910 func(args kvserverbase.ProposalFilterArgs) *roachpb.Error { 8911 if args.Req.Timestamp == markerTS { 8912 atomic.AddInt32(&propCount, 1) 8913 } 8914 return nil 8915 } 8916 repl.mu.Unlock() 8917 8918 ba := roachpb.BatchRequest{} 8919 ba.Timestamp = markerTS 8920 ba.RangeID = repl.RangeID 8921 if c.useTxn { 8922 ba.Txn = txn 8923 ba.Txn.ReadTimestamp = markerTS 8924 ba.Txn.WriteTimestamp = markerTS 8925 assignSeqNumsForReqs(txn, c.req) 8926 } 8927 ba.Add(c.req) 8928 _, pErr := repl.Send(ctx, ba) 8929 8930 // Check return error. 8931 if c.expFailure == "" { 8932 if pErr != nil { 8933 t.Fatalf("unexpected error: %v", pErr) 8934 } 8935 } else { 8936 if !testutils.IsPError(pErr, c.expFailure) { 8937 t.Fatalf("expected error %q, found %v", c.expFailure, pErr) 8938 } 8939 } 8940 8941 // Check proposal status. 8942 if sawProp := (propCount > 0); sawProp != c.expProposal { 8943 t.Errorf("expected proposal=%t, found %t", c.expProposal, sawProp) 8944 } 8945 }) 8946 } 8947 } 8948 8949 func TestCommandTooLarge(t *testing.T) { 8950 defer leaktest.AfterTest(t)() 8951 8952 tc := testContext{} 8953 stopper := stop.NewStopper() 8954 defer stopper.Stop(context.Background()) 8955 tc.Start(t, stopper) 8956 8957 st := tc.store.cfg.Settings 8958 st.Manual.Store(true) 8959 MaxCommandSize.Override(&st.SV, 1024) 8960 8961 args := putArgs(roachpb.Key("k"), 8962 []byte(strings.Repeat("a", int(MaxCommandSize.Get(&st.SV))))) 8963 if _, pErr := tc.SendWrapped(&args); !testutils.IsPError(pErr, "command is too large") { 8964 t.Fatalf("did not get expected error: %v", pErr) 8965 } 8966 } 8967 8968 // Test that, if the application of a Raft command fails, intents are not 8969 // resolved. This is because we don't want intent resolution to take place if an 8970 // EndTxn fails. 8971 func TestErrorInRaftApplicationClearsIntents(t *testing.T) { 8972 defer leaktest.AfterTest(t)() 8973 8974 var storeKnobs StoreTestingKnobs 8975 var filterActive int32 8976 key := roachpb.Key("a") 8977 rkey, err := keys.Addr(key) 8978 if err != nil { 8979 t.Fatal(err) 8980 } 8981 storeKnobs.TestingApplyFilter = func(filterArgs kvserverbase.ApplyFilterArgs) (int, *roachpb.Error) { 8982 if atomic.LoadInt32(&filterActive) == 1 { 8983 return 0, roachpb.NewErrorf("boom") 8984 } 8985 return 0, nil 8986 } 8987 s, _, kvDB := serverutils.StartServer(t, base.TestServerArgs{ 8988 Knobs: base.TestingKnobs{Store: &storeKnobs}}) 8989 defer s.Stopper().Stop(context.Background()) 8990 8991 splitKey := roachpb.Key("b") 8992 if err := kvDB.AdminSplit(context.Background(), splitKey, splitKey, hlc.MaxTimestamp /* expirationTime */); err != nil { 8993 t.Fatal(err) 8994 } 8995 8996 // Fail future command applications. 8997 atomic.StoreInt32(&filterActive, 1) 8998 8999 // Propose an EndTxn with a remote intent. The _remote_ part is important 9000 // because intents local to the txn's range are resolved inline with the 9001 // EndTxn execution. 9002 // We do this by using replica.propose() directly, as opposed to going through 9003 // the DistSender, because we want to inspect the proposal's result after the 9004 // injected error. 9005 txn := newTransaction("test", key, roachpb.NormalUserPriority, s.Clock()) 9006 // Increase the sequence to make it look like there have been some writes. 9007 // This fits with the LockSpans that we're going to set on the EndTxn. 9008 // Without properly setting the sequence number, the EndTxn batch would 9009 // erroneously execute as a 1PC. 9010 txn.Sequence++ 9011 etArgs, _ := endTxnArgs(txn, true /* commit */) 9012 etArgs.LockSpans = []roachpb.Span{{Key: roachpb.Key("bb")}} 9013 var ba roachpb.BatchRequest 9014 ba.Header.Txn = txn 9015 ba.Add(&etArgs) 9016 assignSeqNumsForReqs(txn, &etArgs) 9017 require.NoError(t, ba.SetActiveTimestamp(func() hlc.Timestamp { return hlc.Timestamp{} })) 9018 // Get a reference to the txn's replica. 9019 stores := s.GetStores().(*Stores) 9020 store, err := stores.GetStore(s.GetFirstStoreID()) 9021 if err != nil { 9022 t.Fatal(err) 9023 } 9024 repl := store.LookupReplica(rkey) /* end */ 9025 if repl == nil { 9026 t.Fatalf("replica for key %s not found", rkey) 9027 } 9028 9029 exLease, _ := repl.GetLease() 9030 ch, _, _, pErr := repl.evalAndPropose(context.Background(), &ba, allSpansGuard(), &exLease) 9031 if pErr != nil { 9032 t.Fatal(pErr) 9033 } 9034 propRes := <-ch 9035 if !testutils.IsPError(propRes.Err, "boom") { 9036 t.Fatalf("expected injected error, got: %v", propRes.Err) 9037 } 9038 if len(propRes.EncounteredIntents) != 0 { 9039 t.Fatal("expected encountered intents to have been cleared") 9040 } 9041 } 9042 9043 // TestProposeWithAsyncConsensus tests that the proposal of a batch with 9044 // AsyncConsensus set to true will return its evaluation result before Raft 9045 // command has completed consensus and applied. 9046 func TestProposeWithAsyncConsensus(t *testing.T) { 9047 defer leaktest.AfterTest(t)() 9048 tc := testContext{} 9049 tsc := TestStoreConfig(nil) 9050 9051 var filterActive int32 9052 blockRaftApplication := make(chan struct{}) 9053 tsc.TestingKnobs.TestingApplyFilter = 9054 func(filterArgs kvserverbase.ApplyFilterArgs) (int, *roachpb.Error) { 9055 if atomic.LoadInt32(&filterActive) == 1 { 9056 <-blockRaftApplication 9057 } 9058 return 0, nil 9059 } 9060 9061 stopper := stop.NewStopper() 9062 defer stopper.Stop(context.Background()) 9063 tc.StartWithStoreConfig(t, stopper, tsc) 9064 repl := tc.repl 9065 9066 var ba roachpb.BatchRequest 9067 key := roachpb.Key("a") 9068 put := putArgs(key, []byte("val")) 9069 ba.Add(&put) 9070 ba.Timestamp = tc.Clock().Now() 9071 ba.AsyncConsensus = true 9072 9073 atomic.StoreInt32(&filterActive, 1) 9074 exLease, _ := repl.GetLease() 9075 ch, _, _, pErr := repl.evalAndPropose(context.Background(), &ba, allSpansGuard(), &exLease) 9076 if pErr != nil { 9077 t.Fatal(pErr) 9078 } 9079 9080 // The result should be signaled before consensus. 9081 propRes := <-ch 9082 if propRes.Err != nil { 9083 t.Fatalf("unexpected proposal result error: %v", propRes.Err) 9084 } 9085 if propRes.Reply == nil || len(propRes.Reply.Responses) != 1 { 9086 t.Fatalf("expected proposal result with 1 response, found: %v", propRes.Reply) 9087 } 9088 9089 // Stop blocking Raft application to allow everything to shut down cleanly. 9090 close(blockRaftApplication) 9091 } 9092 9093 // TestApplyPaginatedCommittedEntries tests that a Raft group's committed 9094 // entries are quickly applied, even if their application is paginated due to 9095 // the RaftMaxSizePerMsg configuration. This is a regression test for #31330. 9096 func TestApplyPaginatedCommittedEntries(t *testing.T) { 9097 defer leaktest.AfterTest(t)() 9098 ctx := context.Background() 9099 tc := testContext{} 9100 tsc := TestStoreConfig(nil) 9101 9102 // Drop the RaftMaxCommittedSizePerReady so that even small Raft entries 9103 // trigger pagination during entry application. 9104 tsc.RaftMaxCommittedSizePerReady = 128 9105 // Slow down the tick interval dramatically so that Raft groups can't rely 9106 // on ticks to trigger Raft ready iterations. 9107 tsc.RaftTickInterval = 5 * time.Second 9108 9109 var filterActive int32 9110 blockRaftApplication := make(chan struct{}) 9111 blockingRaftApplication := make(chan struct{}, 1) 9112 tsc.TestingKnobs.TestingApplyFilter = 9113 func(filterArgs kvserverbase.ApplyFilterArgs) (int, *roachpb.Error) { 9114 if atomic.LoadInt32(&filterActive) == 1 { 9115 select { 9116 case blockingRaftApplication <- struct{}{}: 9117 default: 9118 } 9119 <-blockRaftApplication 9120 } 9121 return 0, nil 9122 } 9123 9124 stopper := stop.NewStopper() 9125 defer stopper.Stop(ctx) 9126 tc.StartWithStoreConfig(t, stopper, tsc) 9127 repl := tc.repl 9128 9129 // Block command application then propose a command to Raft. 9130 var ba roachpb.BatchRequest 9131 key := roachpb.Key("a") 9132 put := putArgs(key, []byte("val")) 9133 ba.Add(&put) 9134 ba.Timestamp = tc.Clock().Now() 9135 9136 atomic.StoreInt32(&filterActive, 1) 9137 exLease, _ := repl.GetLease() 9138 _, _, _, pErr := repl.evalAndPropose(ctx, &ba, allSpansGuard(), &exLease) 9139 if pErr != nil { 9140 t.Fatal(pErr) 9141 } 9142 9143 // Once that command is stuck applying, propose a number of large commands. 9144 // This will allow them to all build up without any being applied so that 9145 // their application will require pagination. 9146 <-blockingRaftApplication 9147 var ch chan proposalResult 9148 for i := 0; i < 50; i++ { 9149 var ba2 roachpb.BatchRequest 9150 key := roachpb.Key("a") 9151 put := putArgs(key, make([]byte, 2*tsc.RaftMaxCommittedSizePerReady)) 9152 ba2.Add(&put) 9153 ba2.Timestamp = tc.Clock().Now() 9154 9155 var pErr *roachpb.Error 9156 ch, _, _, pErr = repl.evalAndPropose(ctx, &ba, allSpansGuard(), &exLease) 9157 if pErr != nil { 9158 t.Fatal(pErr) 9159 } 9160 } 9161 9162 // Stop blocking Raft application. All of the proposals should quickly 9163 // commit and apply, even if their application is paginated due to the 9164 // small RaftMaxCommittedSizePerReady. 9165 close(blockRaftApplication) 9166 const maxWait = 10 * time.Second 9167 select { 9168 case propRes := <-ch: 9169 if propRes.Err != nil { 9170 t.Fatalf("unexpected proposal result error: %v", propRes.Err) 9171 } 9172 if propRes.Reply == nil || len(propRes.Reply.Responses) != 1 { 9173 t.Fatalf("expected proposal result with 1 response, found: %v", propRes.Reply) 9174 } 9175 case <-time.After(maxWait): 9176 // If we don't re-enqueue Raft groups for another round of processing 9177 // when their committed entries are paginated and not all immediately 9178 // applied, this test will take more than three minutes to finish. 9179 t.Fatalf("stall detected, proposal did not finish within %s", maxWait) 9180 } 9181 } 9182 9183 func TestSplitMsgApps(t *testing.T) { 9184 defer leaktest.AfterTest(t)() 9185 9186 msgApp := func(idx uint64) raftpb.Message { 9187 return raftpb.Message{Index: idx, Type: raftpb.MsgApp} 9188 } 9189 otherMsg := func(idx uint64) raftpb.Message { 9190 return raftpb.Message{Index: idx, Type: raftpb.MsgVote} 9191 } 9192 formatMsgs := func(msgs []raftpb.Message) string { 9193 strs := make([]string, len(msgs)) 9194 for i, msg := range msgs { 9195 strs[i] = fmt.Sprintf("{%s:%d}", msg.Type, msg.Index) 9196 } 9197 return fmt.Sprint(strs) 9198 } 9199 9200 testCases := []struct { 9201 msgsIn, msgAppsOut, otherMsgsOut []raftpb.Message 9202 }{ 9203 // No msgs. 9204 { 9205 msgsIn: []raftpb.Message{}, 9206 msgAppsOut: []raftpb.Message{}, 9207 otherMsgsOut: []raftpb.Message{}, 9208 }, 9209 // Only msgApps. 9210 { 9211 msgsIn: []raftpb.Message{msgApp(1)}, 9212 msgAppsOut: []raftpb.Message{msgApp(1)}, 9213 otherMsgsOut: []raftpb.Message{}, 9214 }, 9215 { 9216 msgsIn: []raftpb.Message{msgApp(1), msgApp(2)}, 9217 msgAppsOut: []raftpb.Message{msgApp(1), msgApp(2)}, 9218 otherMsgsOut: []raftpb.Message{}, 9219 }, 9220 { 9221 msgsIn: []raftpb.Message{msgApp(2), msgApp(1)}, 9222 msgAppsOut: []raftpb.Message{msgApp(2), msgApp(1)}, 9223 otherMsgsOut: []raftpb.Message{}, 9224 }, 9225 // Only otherMsgs. 9226 { 9227 msgsIn: []raftpb.Message{otherMsg(1)}, 9228 msgAppsOut: []raftpb.Message{}, 9229 otherMsgsOut: []raftpb.Message{otherMsg(1)}, 9230 }, 9231 { 9232 msgsIn: []raftpb.Message{otherMsg(1), otherMsg(2)}, 9233 msgAppsOut: []raftpb.Message{}, 9234 otherMsgsOut: []raftpb.Message{otherMsg(1), otherMsg(2)}, 9235 }, 9236 { 9237 msgsIn: []raftpb.Message{otherMsg(2), otherMsg(1)}, 9238 msgAppsOut: []raftpb.Message{}, 9239 otherMsgsOut: []raftpb.Message{otherMsg(2), otherMsg(1)}, 9240 }, 9241 // Mixed msgApps and otherMsgs. 9242 { 9243 msgsIn: []raftpb.Message{msgApp(1), otherMsg(2)}, 9244 msgAppsOut: []raftpb.Message{msgApp(1)}, 9245 otherMsgsOut: []raftpb.Message{otherMsg(2)}, 9246 }, 9247 { 9248 msgsIn: []raftpb.Message{otherMsg(1), msgApp(2)}, 9249 msgAppsOut: []raftpb.Message{msgApp(2)}, 9250 otherMsgsOut: []raftpb.Message{otherMsg(1)}, 9251 }, 9252 { 9253 msgsIn: []raftpb.Message{msgApp(1), otherMsg(2), msgApp(3)}, 9254 msgAppsOut: []raftpb.Message{msgApp(1), msgApp(3)}, 9255 otherMsgsOut: []raftpb.Message{otherMsg(2)}, 9256 }, 9257 { 9258 msgsIn: []raftpb.Message{otherMsg(1), msgApp(2), otherMsg(3)}, 9259 msgAppsOut: []raftpb.Message{msgApp(2)}, 9260 otherMsgsOut: []raftpb.Message{otherMsg(1), otherMsg(3)}, 9261 }, 9262 } 9263 for _, c := range testCases { 9264 inStr := formatMsgs(c.msgsIn) 9265 t.Run(inStr, func(t *testing.T) { 9266 msgAppsRes, otherMsgsRes := splitMsgApps(c.msgsIn) 9267 if !reflect.DeepEqual(msgAppsRes, c.msgAppsOut) || !reflect.DeepEqual(otherMsgsRes, c.otherMsgsOut) { 9268 t.Errorf("expected splitMsgApps(%s)=%s/%s, found %s/%s", inStr, formatMsgs(c.msgAppsOut), 9269 formatMsgs(c.otherMsgsOut), formatMsgs(msgAppsRes), formatMsgs(otherMsgsRes)) 9270 } 9271 }) 9272 } 9273 } 9274 9275 type testQuiescer struct { 9276 desc roachpb.RangeDescriptor 9277 numProposals int 9278 status *raft.Status 9279 lastIndex uint64 9280 raftReady bool 9281 ownsValidLease bool 9282 mergeInProgress bool 9283 isDestroyed bool 9284 livenessMap IsLiveMap 9285 pendingQuota bool 9286 } 9287 9288 func (q *testQuiescer) descRLocked() *roachpb.RangeDescriptor { 9289 return &q.desc 9290 } 9291 9292 func (q *testQuiescer) raftStatusRLocked() *raft.Status { 9293 return q.status 9294 } 9295 9296 func (q *testQuiescer) raftLastIndexLocked() (uint64, error) { 9297 return q.lastIndex, nil 9298 } 9299 9300 func (q *testQuiescer) hasRaftReadyRLocked() bool { 9301 return q.raftReady 9302 } 9303 9304 func (q *testQuiescer) hasPendingProposalsRLocked() bool { 9305 return q.numProposals > 0 9306 } 9307 9308 func (q *testQuiescer) hasPendingProposalQuotaRLocked() bool { 9309 return q.pendingQuota 9310 } 9311 9312 func (q *testQuiescer) ownsValidLeaseRLocked(ts hlc.Timestamp) bool { 9313 return q.ownsValidLease 9314 } 9315 9316 func (q *testQuiescer) mergeInProgressRLocked() bool { 9317 return q.mergeInProgress 9318 } 9319 9320 func (q *testQuiescer) isDestroyedRLocked() (DestroyReason, error) { 9321 if q.isDestroyed { 9322 return destroyReasonRemoved, errors.New("testQuiescer: replica destroyed") 9323 } 9324 return 0, nil 9325 } 9326 9327 func TestShouldReplicaQuiesce(t *testing.T) { 9328 defer leaktest.AfterTest(t)() 9329 9330 const logIndex = 10 9331 const invalidIndex = 11 9332 test := func(expected bool, transform func(q *testQuiescer) *testQuiescer) { 9333 t.Run("", func(t *testing.T) { 9334 // A testQuiescer initialized so that shouldReplicaQuiesce will return 9335 // true. The transform function is intended to perform one mutation to 9336 // this quiescer so that shouldReplicaQuiesce will return false. 9337 q := &testQuiescer{ 9338 desc: roachpb.RangeDescriptor{ 9339 InternalReplicas: []roachpb.ReplicaDescriptor{ 9340 {NodeID: 1, ReplicaID: 1}, 9341 {NodeID: 2, ReplicaID: 2}, 9342 {NodeID: 3, ReplicaID: 3}, 9343 }, 9344 }, 9345 status: &raft.Status{ 9346 BasicStatus: raft.BasicStatus{ 9347 ID: 1, 9348 HardState: raftpb.HardState{ 9349 Commit: logIndex, 9350 }, 9351 SoftState: raft.SoftState{ 9352 RaftState: raft.StateLeader, 9353 }, 9354 Applied: logIndex, 9355 LeadTransferee: 0, 9356 }, 9357 Progress: map[uint64]tracker.Progress{ 9358 1: {Match: logIndex}, 9359 2: {Match: logIndex}, 9360 3: {Match: logIndex}, 9361 }, 9362 }, 9363 lastIndex: logIndex, 9364 raftReady: false, 9365 ownsValidLease: true, 9366 livenessMap: IsLiveMap{ 9367 1: {IsLive: true}, 9368 2: {IsLive: true}, 9369 3: {IsLive: true}, 9370 }, 9371 } 9372 q = transform(q) 9373 _, ok := shouldReplicaQuiesce(context.Background(), q, hlc.Timestamp{}, q.livenessMap) 9374 if expected != ok { 9375 t.Fatalf("expected %v, but found %v", expected, ok) 9376 } 9377 }) 9378 } 9379 9380 test(true, func(q *testQuiescer) *testQuiescer { 9381 return q 9382 }) 9383 test(false, func(q *testQuiescer) *testQuiescer { 9384 q.numProposals = 1 9385 return q 9386 }) 9387 test(false, func(q *testQuiescer) *testQuiescer { 9388 q.pendingQuota = true 9389 return q 9390 }) 9391 test(false, func(q *testQuiescer) *testQuiescer { 9392 q.mergeInProgress = true 9393 return q 9394 }) 9395 test(false, func(q *testQuiescer) *testQuiescer { 9396 q.isDestroyed = true 9397 return q 9398 }) 9399 test(false, func(q *testQuiescer) *testQuiescer { 9400 q.status = nil 9401 return q 9402 }) 9403 test(false, func(q *testQuiescer) *testQuiescer { 9404 q.status.RaftState = raft.StateFollower 9405 return q 9406 }) 9407 test(false, func(q *testQuiescer) *testQuiescer { 9408 q.status.RaftState = raft.StateCandidate 9409 return q 9410 }) 9411 test(false, func(q *testQuiescer) *testQuiescer { 9412 q.status.LeadTransferee = 1 9413 return q 9414 }) 9415 test(false, func(q *testQuiescer) *testQuiescer { 9416 q.status.Commit = invalidIndex 9417 return q 9418 }) 9419 test(false, func(q *testQuiescer) *testQuiescer { 9420 q.status.Applied = invalidIndex 9421 return q 9422 }) 9423 test(false, func(q *testQuiescer) *testQuiescer { 9424 q.lastIndex = invalidIndex 9425 return q 9426 }) 9427 for _, i := range []uint64{1, 2, 3} { 9428 test(false, func(q *testQuiescer) *testQuiescer { 9429 q.status.Progress[i] = tracker.Progress{Match: invalidIndex} 9430 return q 9431 }) 9432 } 9433 test(false, func(q *testQuiescer) *testQuiescer { 9434 delete(q.status.Progress, q.status.ID) 9435 return q 9436 }) 9437 test(false, func(q *testQuiescer) *testQuiescer { 9438 q.ownsValidLease = false 9439 return q 9440 }) 9441 test(false, func(q *testQuiescer) *testQuiescer { 9442 q.raftReady = true 9443 return q 9444 }) 9445 // Create a mismatch between the raft progress replica IDs and the 9446 // replica IDs in the range descriptor. 9447 for i := 0; i < 3; i++ { 9448 test(false, func(q *testQuiescer) *testQuiescer { 9449 q.desc.InternalReplicas[i].ReplicaID = roachpb.ReplicaID(4 + i) 9450 return q 9451 }) 9452 } 9453 // Pass a nil liveness map. 9454 test(true, func(q *testQuiescer) *testQuiescer { 9455 q.livenessMap = nil 9456 return q 9457 }) 9458 // Verify quiesce even when replica progress doesn't match, if 9459 // the replica is on a non-live node. 9460 for _, i := range []uint64{1, 2, 3} { 9461 test(true, func(q *testQuiescer) *testQuiescer { 9462 q.livenessMap[roachpb.NodeID(i)] = IsLiveMapEntry{IsLive: false} 9463 q.status.Progress[i] = tracker.Progress{Match: invalidIndex} 9464 return q 9465 }) 9466 } 9467 } 9468 9469 func TestReplicaRecomputeStats(t *testing.T) { 9470 defer leaktest.AfterTest(t)() 9471 tc := testContext{} 9472 stopper := stop.NewStopper() 9473 defer stopper.Stop(context.Background()) 9474 tc.Start(t, stopper) 9475 9476 key := roachpb.RKey("a") 9477 repl := tc.store.LookupReplica(key) 9478 desc := repl.Desc() 9479 sKey := desc.StartKey.AsRawKey() 9480 9481 const errMismatch = "descriptor mismatch; range likely merged" 9482 9483 type testCase struct { 9484 name string 9485 key roachpb.Key 9486 expDelta enginepb.MVCCStats 9487 expErr string 9488 } 9489 9490 runTest := func(test testCase) { 9491 t.Run(test.name, func(t *testing.T) { 9492 args := &roachpb.RecomputeStatsRequest{ 9493 RequestHeader: roachpb.RequestHeader{ 9494 Key: test.key, 9495 }, 9496 } 9497 9498 resp, pErr := tc.SendWrapped(args) 9499 if !testutils.IsPError(pErr, test.expErr) { 9500 t.Fatalf("got:\n%s\nexpected: %s", pErr, test.expErr) 9501 } 9502 if test.expErr != "" { 9503 return 9504 } 9505 9506 delta := enginepb.MVCCStats(resp.(*roachpb.RecomputeStatsResponse).AddedDelta) 9507 delta.AgeTo(test.expDelta.LastUpdateNanos) 9508 9509 if delta != test.expDelta { 9510 t.Fatal("diff(wanted, actual) = ", strings.Join(pretty.Diff(test.expDelta, delta), "\n")) 9511 } 9512 }) 9513 } 9514 9515 for _, test := range []testCase{ 9516 // Non-matching endpoints. 9517 {"leftmismatch", roachpb.Key("a"), enginepb.MVCCStats{}, errMismatch}, 9518 // Recomputation that shouldn't find anything. 9519 {"noop", sKey, enginepb.MVCCStats{}, ""}, 9520 } { 9521 runTest(test) 9522 } 9523 9524 ctx := context.Background() 9525 seed := randutil.NewPseudoSeed() 9526 t.Logf("seed is %d", seed) 9527 rnd := rand.New(rand.NewSource(seed)) 9528 9529 repl.raftMu.Lock() 9530 repl.mu.Lock() 9531 ms := repl.mu.state.Stats // intentionally mutated below 9532 disturbMS := enginepb.NewPopulatedMVCCStats(rnd, false) 9533 disturbMS.ContainsEstimates = 0 9534 ms.Add(*disturbMS) 9535 err := repl.raftMu.stateLoader.SetMVCCStats(ctx, tc.engine, ms) 9536 repl.assertStateLocked(ctx, tc.engine) 9537 repl.mu.Unlock() 9538 repl.raftMu.Unlock() 9539 9540 if err != nil { 9541 t.Fatal(err) 9542 } 9543 9544 // We have `stored ms = recomputable ms + disturbMS`, and so the returned delta 9545 // should be `recomputable ms - stored ms = -disturbMS`. 9546 var expDelta enginepb.MVCCStats 9547 expDelta.Subtract(*disturbMS) 9548 9549 runTest(testCase{"randdelta", sKey, expDelta, ""}) 9550 if !t.Failed() { 9551 runTest(testCase{"noopagain", sKey, enginepb.MVCCStats{}, ""}) 9552 } 9553 } 9554 9555 // TestConsistencyQueueErrorFromCheckConsistency exercises the case in which 9556 // the queue receives an error from CheckConsistency. 9557 func TestConsistenctQueueErrorFromCheckConsistency(t *testing.T) { 9558 defer leaktest.AfterTest(t)() 9559 9560 ctx := context.Background() 9561 stopper := stop.NewStopper() 9562 defer stopper.Stop(ctx) 9563 9564 cfg := TestStoreConfig(nil) 9565 cfg.TestingKnobs = StoreTestingKnobs{ 9566 TestingRequestFilter: func(_ context.Context, ba roachpb.BatchRequest) *roachpb.Error { 9567 if _, ok := ba.GetArg(roachpb.ComputeChecksum); ok { 9568 return roachpb.NewErrorf("boom") 9569 } 9570 return nil 9571 }, 9572 } 9573 tc := testContext{} 9574 tc.StartWithStoreConfig(t, stopper, cfg) 9575 9576 for i := 0; i < 2; i++ { 9577 // Do this twice because it used to deadlock. See #25456. 9578 sysCfg := tc.store.Gossip().GetSystemConfig() 9579 if err := tc.store.consistencyQueue.process(ctx, tc.repl, sysCfg); !testutils.IsError(err, "boom") { 9580 t.Fatal(err) 9581 } 9582 } 9583 } 9584 9585 // TestReplicaServersideRefreshes verifies local retry logic for transactional 9586 // and non transactional batches. Verifies the timestamp cache is updated to 9587 // reflect the timestamp at which retried batches are executed. 9588 func TestReplicaServersideRefreshes(t *testing.T) { 9589 defer leaktest.AfterTest(t)() 9590 // TODO(andrei): make each subtest use its own testContext so that they don't 9591 // have to use distinct keys. 9592 tc := testContext{} 9593 stopper := stop.NewStopper() 9594 defer stopper.Stop(context.Background()) 9595 tc.Start(t, stopper) 9596 9597 // Increment the clock so that all the transactions in the tests run at a 9598 // different physical timestamp than the one used to initialize the replica's 9599 // timestamp cache. This allows one of the tests to reset the logical part of 9600 // the timestamp it's operating and not run into the timestamp cache. 9601 tc.manualClock.Increment(1) 9602 9603 newTxn := func(key string, ts hlc.Timestamp) *roachpb.Transaction { 9604 txn := roachpb.MakeTransaction( 9605 "test", roachpb.Key(key), roachpb.NormalUserPriority, ts, 0, 9606 ) 9607 return &txn 9608 } 9609 send := func(ba roachpb.BatchRequest) (hlc.Timestamp, error) { 9610 br, pErr := tc.Sender().Send(context.Background(), ba) 9611 if pErr != nil { 9612 return hlc.Timestamp{}, pErr.GetDetail() 9613 } 9614 9615 // Check that we didn't mess up the stats. 9616 // Regression test for #31870. 9617 snap := tc.engine.NewSnapshot() 9618 defer snap.Close() 9619 res, err := tc.repl.sha512(context.Background(), *tc.repl.Desc(), tc.engine, nil /* diff */, roachpb.ChecksumMode_CHECK_FULL) 9620 if err != nil { 9621 return hlc.Timestamp{}, err 9622 } 9623 if res.PersistedMS != res.RecomputedMS { 9624 return hlc.Timestamp{}, errors.Errorf("stats are inconsistent:\npersisted:\n%+v\nrecomputed:\n%+v", res.PersistedMS, res.RecomputedMS) 9625 } 9626 9627 return br.Timestamp, nil 9628 } 9629 get := func(key string) (hlc.Timestamp, error) { 9630 var ba roachpb.BatchRequest 9631 get := getArgs(roachpb.Key(key)) 9632 ba.Add(&get) 9633 return send(ba) 9634 } 9635 put := func(key, val string) (hlc.Timestamp, error) { 9636 var ba roachpb.BatchRequest 9637 put := putArgs(roachpb.Key(key), []byte(val)) 9638 ba.Add(&put) 9639 return send(ba) 9640 } 9641 9642 testCases := []struct { 9643 name string 9644 setupFn func() (hlc.Timestamp, error) // returns expected batch execution timestamp 9645 batchFn func(hlc.Timestamp) (roachpb.BatchRequest, hlc.Timestamp) 9646 expErr string 9647 }{ 9648 { 9649 name: "serverside-refresh of write too old on put", 9650 setupFn: func() (hlc.Timestamp, error) { 9651 return put("a", "put") 9652 }, 9653 batchFn: func(ts hlc.Timestamp) (ba roachpb.BatchRequest, expTS hlc.Timestamp) { 9654 ba.Timestamp = ts.Prev() 9655 expTS = ts.Next() 9656 put := putArgs(roachpb.Key("a"), []byte("put2")) 9657 ba.Add(&put) 9658 return 9659 }, 9660 }, 9661 { 9662 name: "serverside-refresh of write too old on cput", 9663 setupFn: func() (hlc.Timestamp, error) { 9664 // Note there are two different version of the value, but a 9665 // non-txnal cput will evaluate the most recent version and 9666 // avoid a condition failed error. 9667 _, _ = put("b", "put1") 9668 return put("b", "put2") 9669 }, 9670 batchFn: func(ts hlc.Timestamp) (ba roachpb.BatchRequest, expTS hlc.Timestamp) { 9671 ba.Timestamp = ts.Prev() 9672 expTS = ts.Next() 9673 cput := cPutArgs(roachpb.Key("b"), []byte("cput"), []byte("put2")) 9674 ba.Add(&cput) 9675 return 9676 }, 9677 }, 9678 { 9679 name: "serverside-refresh of write too old on initput", 9680 setupFn: func() (hlc.Timestamp, error) { 9681 // Note there are two different version of the value, but a 9682 // non-txnal cput will evaluate the most recent version and 9683 // avoid a condition failed error. 9684 _, _ = put("b-iput", "put1") 9685 return put("b-iput", "put2") 9686 }, 9687 batchFn: func(ts hlc.Timestamp) (ba roachpb.BatchRequest, expTS hlc.Timestamp) { 9688 ba.Timestamp = ts.Prev() 9689 expTS = ts.Next() 9690 iput := iPutArgs(roachpb.Key("b-iput"), []byte("put2")) 9691 ba.Add(&iput) 9692 return 9693 }, 9694 }, 9695 // Serverside-refresh will not be allowed because the request contains 9696 // a read-only request that acquires read-latches. We cannot bump the 9697 // request's timestamp without re-acquiring latches, so we don't even 9698 // try to. 9699 // NOTE: this is an unusual batch because DistSender usually splits 9700 // reads and writes. Still, we should handle it correctly. 9701 { 9702 name: "no serverside-refresh of write too old on get and put", 9703 setupFn: func() (hlc.Timestamp, error) { 9704 return put("a", "put") 9705 }, 9706 batchFn: func(ts hlc.Timestamp) (ba roachpb.BatchRequest, expTS hlc.Timestamp) { 9707 ba.Timestamp = ts.Prev() 9708 get := getArgs(roachpb.Key("a")) 9709 put := putArgs(roachpb.Key("a"), []byte("put2")) 9710 ba.Add(&get, &put) 9711 return 9712 }, 9713 expErr: "write at timestamp .* too old", 9714 }, 9715 { 9716 name: "serializable push without retry", 9717 setupFn: func() (hlc.Timestamp, error) { 9718 return get("a") 9719 }, 9720 batchFn: func(ts hlc.Timestamp) (ba roachpb.BatchRequest, expTS hlc.Timestamp) { 9721 ba.Timestamp = ts.Prev() 9722 expTS = ts.Next() 9723 put := putArgs(roachpb.Key("a"), []byte("put2")) 9724 ba.Add(&put) 9725 return 9726 }, 9727 }, 9728 // Non-1PC serializable txn cput will fail with write too old error. 9729 { 9730 name: "no serverside-refresh of write too old on non-1PC txn cput", 9731 setupFn: func() (hlc.Timestamp, error) { 9732 _, _ = put("c-cput", "put") 9733 return put("c-cput", "put") 9734 }, 9735 batchFn: func(ts hlc.Timestamp) (ba roachpb.BatchRequest, expTS hlc.Timestamp) { 9736 ba.Txn = newTxn("c-cput", ts.Prev()) 9737 cput := cPutArgs(roachpb.Key("c-cput"), []byte("iput"), []byte("put")) 9738 ba.Add(&cput) 9739 assignSeqNumsForReqs(ba.Txn, &cput) 9740 return 9741 }, 9742 expErr: "write at timestamp .* too old", 9743 }, 9744 // Non-1PC serializable txn initput will fail with write too old error. 9745 { 9746 name: "no serverside-refresh of write too old on non-1PC txn initput", 9747 setupFn: func() (hlc.Timestamp, error) { 9748 return put("c-iput", "put") 9749 }, 9750 batchFn: func(ts hlc.Timestamp) (ba roachpb.BatchRequest, expTS hlc.Timestamp) { 9751 ba.Txn = newTxn("c-iput", ts.Prev()) 9752 iput := iPutArgs(roachpb.Key("c-iput"), []byte("iput")) 9753 ba.Add(&iput) 9754 assignSeqNumsForReqs(ba.Txn, &iput) 9755 return 9756 }, 9757 expErr: "write at timestamp .* too old", 9758 }, 9759 // Non-1PC serializable txn locking scan will fail with write too old error. 9760 { 9761 name: "no serverside-refresh of write too old on non-1PC txn locking scan", 9762 setupFn: func() (hlc.Timestamp, error) { 9763 return put("c-scan", "put") 9764 }, 9765 batchFn: func(ts hlc.Timestamp) (ba roachpb.BatchRequest, expTS hlc.Timestamp) { 9766 ba.Txn = newTxn("c-scan", ts.Prev()) 9767 scan := scanArgs(roachpb.Key("c-scan"), roachpb.Key("c-scan\x00")) 9768 scan.KeyLocking = lock.Exclusive 9769 ba.Add(scan) 9770 return 9771 }, 9772 expErr: "write at timestamp .* too old", 9773 }, 9774 // Non-1PC serializable txn cput with CanForwardReadTimestamp set to 9775 // true will succeed with write too old error. 9776 { 9777 name: "serverside-refresh of write too old on non-1PC txn cput without prior reads", 9778 setupFn: func() (hlc.Timestamp, error) { 9779 _, _ = put("c-cput", "put") 9780 return put("c-cput", "put") 9781 }, 9782 batchFn: func(ts hlc.Timestamp) (ba roachpb.BatchRequest, expTS hlc.Timestamp) { 9783 expTS = ts.Next() 9784 ba.Txn = newTxn("c-cput", ts.Prev()) 9785 ba.CanForwardReadTimestamp = true 9786 cput := cPutArgs(roachpb.Key("c-cput"), []byte("iput"), []byte("put")) 9787 ba.Add(&cput) 9788 assignSeqNumsForReqs(ba.Txn, &cput) 9789 return 9790 }, 9791 }, 9792 // This test tests a scenario where an InitPut is failing at its timestamp, 9793 // but it would succeed if it'd evaluate at a bumped timestamp. The request 9794 // is not retried at the bumped timestamp. We don't necessarily like this 9795 // current behavior; for example since there's nothing to refresh, the 9796 // request could be retried. 9797 { 9798 name: "serverside-refresh of write too old on non-1PC txn initput without prior reads", 9799 setupFn: func() (hlc.Timestamp, error) { 9800 // Note there are two different version of the value, but a 9801 // non-txnal cput will evaluate the most recent version and 9802 // avoid a condition failed error. 9803 _, _ = put("c-iput", "put1") 9804 return put("c-iput", "put2") 9805 }, 9806 batchFn: func(ts hlc.Timestamp) (ba roachpb.BatchRequest, expTS hlc.Timestamp) { 9807 ba.Txn = newTxn("c-iput", ts.Prev()) 9808 ba.CanForwardReadTimestamp = true 9809 iput := iPutArgs(roachpb.Key("c-iput"), []byte("put2")) 9810 ba.Add(&iput) 9811 assignSeqNumsForReqs(ba.Txn, &iput) 9812 return 9813 }, 9814 expErr: "unexpected value: .*", 9815 }, 9816 // Non-1PC serializable txn locking scan with CanForwardReadTimestamp 9817 // set to true will succeed with write too old error. 9818 { 9819 name: "serverside-refresh of write too old on non-1PC txn locking scan without prior reads", 9820 setupFn: func() (hlc.Timestamp, error) { 9821 return put("c-scan", "put") 9822 }, 9823 batchFn: func(ts hlc.Timestamp) (ba roachpb.BatchRequest, expTS hlc.Timestamp) { 9824 expTS = ts.Next() 9825 ba.Txn = newTxn("c-scan", ts.Prev()) 9826 ba.CanForwardReadTimestamp = true 9827 scan := scanArgs(roachpb.Key("c-scan"), roachpb.Key("c-scan\x00")) 9828 scan.KeyLocking = lock.Exclusive 9829 ba.Add(scan) 9830 return 9831 }, 9832 }, 9833 // 1PC serializable transaction will fail instead of retrying if 9834 // EndTxnRequest.CanCommitAtHigherTimestamp is not true. 9835 { 9836 name: "no serverside-refresh of write too old on 1PC txn and refresh spans", 9837 setupFn: func() (hlc.Timestamp, error) { 9838 _, _ = put("d", "put") 9839 return put("d", "put") 9840 }, 9841 batchFn: func(ts hlc.Timestamp) (ba roachpb.BatchRequest, expTS hlc.Timestamp) { 9842 ba.Txn = newTxn("d", ts.Prev()) 9843 cput := cPutArgs(ba.Txn.Key, []byte("cput"), []byte("put")) 9844 et, _ := endTxnArgs(ba.Txn, true /* commit */) 9845 ba.Add(&cput, &et) 9846 assignSeqNumsForReqs(ba.Txn, &cput, &et) 9847 return 9848 }, 9849 expErr: "WriteTooOldError", 9850 }, 9851 // 1PC serializable transaction will retry locally. 9852 { 9853 name: "serverside-refresh of write too old on 1PC txn", 9854 setupFn: func() (hlc.Timestamp, error) { 9855 _, _ = put("e", "put") 9856 return put("e", "put") 9857 }, 9858 batchFn: func(ts hlc.Timestamp) (ba roachpb.BatchRequest, expTS hlc.Timestamp) { 9859 expTS = ts.Next() 9860 ba.Txn = newTxn("e", ts.Prev()) 9861 cput := cPutArgs(ba.Txn.Key, []byte("cput"), []byte("put")) 9862 et, _ := endTxnArgs(ba.Txn, true /* commit */) 9863 // NOTE: setting CanCommitAtHigherTimestamp without 9864 // CanForwardReadTimestamp simulates the kinds of batches we 9865 // might see in a mixed-version cluster. All new versions will 9866 // keep the two flags in-sync. 9867 et.CanCommitAtHigherTimestamp = true // necessary to indicate serverside-refresh is possible 9868 ba.Add(&cput, &et) 9869 assignSeqNumsForReqs(ba.Txn, &cput, &et) 9870 return 9871 }, 9872 }, 9873 // 1PC serializable transaction will retry locally. 9874 { 9875 name: "serverside-refresh of write too old on 1PC txn without prior reads", 9876 setupFn: func() (hlc.Timestamp, error) { 9877 _, _ = put("e", "put") 9878 return put("e", "put") 9879 }, 9880 batchFn: func(ts hlc.Timestamp) (ba roachpb.BatchRequest, expTS hlc.Timestamp) { 9881 expTS = ts.Next() 9882 ba.Txn = newTxn("e", ts.Prev()) 9883 ba.CanForwardReadTimestamp = true 9884 cput := cPutArgs(ba.Txn.Key, []byte("cput"), []byte("put")) 9885 et, _ := endTxnArgs(ba.Txn, true /* commit */) 9886 et.CanCommitAtHigherTimestamp = true // necessary to indicate serverside-refresh is possible 9887 ba.Add(&cput, &et) 9888 assignSeqNumsForReqs(ba.Txn, &cput, &et) 9889 return 9890 }, 9891 }, 9892 // This test tests a scenario where a CPut is failing at its timestamp, but it would 9893 // succeed if it'd evaluate at a bumped timestamp. The request is not retried at the 9894 // bumped timestamp. We don't necessarily like this current behavior; for example if 9895 // there's nothing to refresh, the request could be retried. 9896 // The previous test shows different behavior for a non-transactional 9897 // request or a 1PC one. 9898 { 9899 name: "no serverside-refresh with failed cput despite write too old errors on txn", 9900 setupFn: func() (hlc.Timestamp, error) { 9901 return put("e1", "put") 9902 }, 9903 batchFn: func(ts hlc.Timestamp) (ba roachpb.BatchRequest, expTS hlc.Timestamp) { 9904 txn := newTxn("e1", ts.Prev()) 9905 9906 // Send write to another key first to avoid 1PC. 9907 ba.Txn = txn 9908 put := putArgs([]byte("e1-other-key"), []byte("otherput")) 9909 ba.Add(&put) 9910 assignSeqNumsForReqs(ba.Txn, &put) 9911 if _, err := send(ba); err != nil { 9912 panic(err) 9913 } 9914 9915 ba = roachpb.BatchRequest{} 9916 ba.Txn = txn 9917 cput := cPutArgs(roachpb.Key("e1"), []byte("cput"), []byte("put")) 9918 ba.Add(&cput) 9919 assignSeqNumsForReqs(ba.Txn, &cput) 9920 et, _ := endTxnArgs(ba.Txn, true /* commit */) 9921 // Indicate local retry is possible, even though we don't currently take 9922 // advantage of this. 9923 et.CanCommitAtHigherTimestamp = true 9924 ba.Add(&et) 9925 assignSeqNumsForReqs(ba.Txn, &et) 9926 return 9927 }, 9928 expErr: "unexpected value: <nil>", 9929 }, 9930 // Handle multiple write too old errors on a non-transactional request. 9931 // 9932 // Note that in this test's scenario if the request was transactional, it 9933 // generally would receive a ConditionFailedError from the CPuts. 9934 { 9935 name: "serverside-refresh with multiple write too old errors on non-txn request", 9936 setupFn: func() (hlc.Timestamp, error) { 9937 if _, err := put("f1", "put"); err != nil { 9938 return hlc.Timestamp{}, err 9939 } 9940 if _, err := put("f2", "put"); err != nil { 9941 return hlc.Timestamp{}, err 9942 } 9943 return put("f3", "put") 9944 }, 9945 batchFn: func(ts hlc.Timestamp) (ba roachpb.BatchRequest, expTS hlc.Timestamp) { 9946 expTS = ts.Next() 9947 // We're going to execute before any of the writes in setupFn. 9948 ts.Logical = 0 9949 ba.Timestamp = ts 9950 for i := 1; i <= 3; i++ { 9951 cput := cPutArgs(roachpb.Key(fmt.Sprintf("f%d", i)), []byte("cput"), []byte("put")) 9952 ba.Add(&cput) 9953 } 9954 return 9955 }, 9956 }, 9957 // Handle multiple write too old errors in 1PC transaction. 9958 { 9959 name: "serverside-refresh with multiple write too old errors on 1PC txn", 9960 setupFn: func() (hlc.Timestamp, error) { 9961 // Do a couple of writes. Their timestamps are going to differ in their 9962 // logical component. The batch that we're going to run in batchFn will 9963 // run at a lower timestamp than all of them. 9964 if _, err := put("ga1", "put"); err != nil { 9965 return hlc.Timestamp{}, err 9966 } 9967 if _, err := put("ga2", "put"); err != nil { 9968 return hlc.Timestamp{}, err 9969 } 9970 return put("ga3", "put") 9971 }, 9972 batchFn: func(ts hlc.Timestamp) (ba roachpb.BatchRequest, expTS hlc.Timestamp) { 9973 expTS = ts.Next() 9974 // We're going to execute before any of the writes in setupFn. 9975 ts.Logical = 0 9976 ba.Txn = newTxn("ga1", ts) 9977 for i := 1; i <= 3; i++ { 9978 cput := cPutArgs(roachpb.Key(fmt.Sprintf("ga%d", i)), []byte("cput"), []byte("put")) 9979 ba.Add(&cput) 9980 assignSeqNumsForReqs(ba.Txn, &cput) 9981 } 9982 et, _ := endTxnArgs(ba.Txn, true /* commit */) 9983 et.CanCommitAtHigherTimestamp = true // necessary to indicate serverside-refresh is possible 9984 ba.Add(&et) 9985 assignSeqNumsForReqs(ba.Txn, &et) 9986 return 9987 }, 9988 }, 9989 // Serializable transaction will commit with forwarded timestamp if no refresh spans. 9990 { 9991 name: "serializable commit with forwarded timestamp", 9992 setupFn: func() (hlc.Timestamp, error) { 9993 if _, err := put("h", "put"); err != nil { 9994 return hlc.Timestamp{}, err 9995 } 9996 return get("h") 9997 }, 9998 batchFn: func(ts hlc.Timestamp) (ba roachpb.BatchRequest, expTS hlc.Timestamp) { 9999 txn := newTxn("h", ts.Prev()) 10000 // Send write to another key first to avoid 1PC. 10001 ba.Txn = txn 10002 put := putArgs([]byte("h2"), []byte("otherput")) 10003 ba.Add(&put) 10004 assignSeqNumsForReqs(ba.Txn, &put) 10005 if _, err := send(ba); err != nil { 10006 panic(err) 10007 } 10008 // Send the remainder of the transaction in another batch. 10009 expTS = ts.Next() 10010 ba = roachpb.BatchRequest{} 10011 ba.Txn = txn 10012 cput := cPutArgs(ba.Txn.Key, []byte("cput"), []byte("put")) 10013 ba.Add(&cput) 10014 et, _ := endTxnArgs(ba.Txn, true /* commit */) 10015 et.CanCommitAtHigherTimestamp = true // necessary to indicate serverside-refresh is possible 10016 ba.Add(&et) 10017 assignSeqNumsForReqs(ba.Txn, &cput, &et) 10018 return 10019 }, 10020 }, 10021 // Serializable 1PC transaction will commit with forwarded timestamp 10022 // using the 1PC path if no refresh spans. 10023 { 10024 name: "serializable commit with forwarded timestamp on 1PC txn", 10025 setupFn: func() (hlc.Timestamp, error) { 10026 return get("a") 10027 }, 10028 batchFn: func(ts hlc.Timestamp) (ba roachpb.BatchRequest, expTS hlc.Timestamp) { 10029 ba.Txn = newTxn("a", ts.Prev()) 10030 expTS = ts.Next() 10031 cput := putArgs(ba.Txn.Key, []byte("put")) 10032 et, _ := endTxnArgs(ba.Txn, true /* commit */) 10033 et.Require1PC = true // don't allow this to bypass the 1PC optimization 10034 et.CanCommitAtHigherTimestamp = true // necessary to indicate serverside-refresh is possible 10035 ba.Add(&cput, &et) 10036 assignSeqNumsForReqs(ba.Txn, &cput, &et) 10037 return 10038 }, 10039 }, 10040 // Regression test for #43273. When locking scans run into write too old 10041 // errors, the refreshed timestamp should not be below the txn's 10042 // existing write timestamp. 10043 { 10044 name: "serverside-refresh with write too old errors during locking scan", 10045 setupFn: func() (hlc.Timestamp, error) { 10046 return put("lscan", "put") 10047 }, 10048 batchFn: func(ts hlc.Timestamp) (ba roachpb.BatchRequest, expTS hlc.Timestamp) { 10049 // Txn with (read_ts, write_ts) = (1, 4) finds a value with 10050 // `ts = 2`. Final timestamp should be `ts = 4`. 10051 ba.Txn = newTxn("lscan", ts.Prev()) 10052 ba.Txn.WriteTimestamp = ts.Next().Next() 10053 ba.CanForwardReadTimestamp = true 10054 10055 expTS = ba.Txn.WriteTimestamp 10056 10057 scan := scanArgs(roachpb.Key("lscan"), roachpb.Key("lscan\x00")) 10058 scan.KeyLocking = lock.Upgrade 10059 ba.Add(scan) 10060 return 10061 }, 10062 }, 10063 // Serializable transaction will commit with WriteTooOld flag if no refresh spans. 10064 { 10065 name: "serializable commit with write-too-old flag", 10066 setupFn: func() (hlc.Timestamp, error) { 10067 return put("i", "put") 10068 }, 10069 batchFn: func(ts hlc.Timestamp) (ba roachpb.BatchRequest, expTS hlc.Timestamp) { 10070 txn := newTxn("i", ts.Prev()) 10071 // Send write to another key first to avoid 1PC. 10072 ba.Txn = txn 10073 put1 := putArgs([]byte("i2"), []byte("otherput")) 10074 ba.Add(&put1) 10075 assignSeqNumsForReqs(ba.Txn, &put1) 10076 if _, err := send(ba); err != nil { 10077 panic(err) 10078 } 10079 // Send the remainder of the transaction in another batch. 10080 expTS = ts.Next() 10081 ba = roachpb.BatchRequest{} 10082 ba.Txn = txn 10083 put2 := putArgs(ba.Txn.Key, []byte("newput")) 10084 ba.Add(&put2) 10085 et, _ := endTxnArgs(ba.Txn, true /* commit */) 10086 et.CanCommitAtHigherTimestamp = true // necessary to indicate serverside-refresh is possible 10087 ba.Add(&et) 10088 assignSeqNumsForReqs(ba.Txn, &put2, &et) 10089 return 10090 }, 10091 }, 10092 // TODO(andrei): We should also have a test similar to the one above, but 10093 // with the WriteTooOld flag set by a different batch than the one with the 10094 // EndTransaction. This is hard to do at the moment, though, because we 10095 // never defer the handling of the write too old conditions to the end of 10096 // the transaction (but we might in the future). 10097 } 10098 10099 for _, test := range testCases { 10100 t.Run(test.name, func(t *testing.T) { 10101 ts, err := test.setupFn() 10102 if err != nil { 10103 t.Fatal(err) 10104 } 10105 ba, expTS := test.batchFn(ts) 10106 actualTS, err := send(ba) 10107 if !testutils.IsError(err, test.expErr) { 10108 t.Fatalf("expected error %q; got \"%v\"", test.expErr, err) 10109 } 10110 if actualTS != expTS { 10111 t.Fatalf("expected ts=%s; got %s", expTS, actualTS) 10112 } 10113 }) 10114 } 10115 } 10116 10117 // TestReplicaPushed1PC verifies that a transaction that has its 10118 // timestamp pushed while reading and then sends all its writes in a 10119 // 1PC batch correctly detects conflicts with writes between its 10120 // original and pushed timestamps. This was hypothesized as a possible 10121 // cause of https://github.com/cockroachdb/cockroach/issues/23176 10122 // but we were already guarding against this case. This test ensures 10123 // it stays that way. 10124 func TestReplicaPushed1PC(t *testing.T) { 10125 defer leaktest.AfterTest(t)() 10126 10127 tc := testContext{} 10128 stopper := stop.NewStopper() 10129 defer stopper.Stop(context.Background()) 10130 tc.Start(t, stopper) 10131 10132 ctx := context.Background() 10133 k := roachpb.Key("key") 10134 10135 // Start a transaction and assign its ReadTimestamp. 10136 ts1 := tc.Clock().Now() 10137 txn := roachpb.MakeTransaction("test", k, roachpb.NormalUserPriority, ts1, 0) 10138 10139 // Write a value outside the transaction. 10140 tc.manualClock.Increment(10) 10141 ts2 := tc.Clock().Now() 10142 if err := storage.MVCCPut(ctx, tc.engine, nil, k, ts2, roachpb.MakeValueFromString("one"), nil); err != nil { 10143 t.Fatalf("writing interfering value: %+v", err) 10144 } 10145 10146 // Push the transaction's timestamp. In real-world situations, 10147 // the only thing that can push a read-only transaction's 10148 // timestamp is ReadWithinUncertaintyIntervalError, but 10149 // synthesizing one of those in this single-node test harness is 10150 // tricky. 10151 tc.manualClock.Increment(10) 10152 ts3 := tc.Clock().Now() 10153 txn.WriteTimestamp.Forward(ts3) 10154 10155 // Execute the write phase of the transaction as a single batch, 10156 // which must return a WriteTooOldError. 10157 // 10158 // TODO(bdarnell): When this test was written, in SNAPSHOT 10159 // isolation we would attempt to execute the transaction on the 10160 // 1PC path, see a timestamp mismatch, and then then throw the 10161 // 1PC results away and re-execute it on the regular path (which 10162 // would generate the WRITE_TOO_OLD error). We have added earlier 10163 // timestamp checks for a small performance improvement, but 10164 // this difference is difficult to observe in a test. If we had 10165 // more detailed metrics we could assert that the 1PC path was 10166 // not even attempted here. 10167 var ba roachpb.BatchRequest 10168 ba.Header = roachpb.Header{Txn: &txn} 10169 put := putArgs(k, []byte("two")) 10170 et, _ := endTxnArgs(&txn, true) 10171 ba.Add(&put, &et) 10172 assignSeqNumsForReqs(&txn, &put, &et) 10173 if br, pErr := tc.Sender().Send(ctx, ba); pErr == nil { 10174 t.Errorf("did not get expected error. resp=%s", br) 10175 } else if wtoe, ok := pErr.GetDetail().(*roachpb.WriteTooOldError); !ok { 10176 t.Errorf("expected WriteTooOldError, got %s", wtoe) 10177 } 10178 } 10179 10180 // TestReplicaNotifyLockTableOn1PC verifies that a 1-phase commit transaction 10181 // notifies the concurrency manager's lock-table that the transaction has been 10182 // committed. This is necessary even though the transaction, by virtue of 10183 // performing a 1PC commit, could not have written any intents. It still could 10184 // have acquired read locks. 10185 func TestReplicaNotifyLockTableOn1PC(t *testing.T) { 10186 defer leaktest.AfterTest(t)() 10187 10188 ctx := context.Background() 10189 tc := testContext{} 10190 stopper := stop.NewStopper() 10191 defer stopper.Stop(ctx) 10192 tc.Start(t, stopper) 10193 10194 // Disable txn liveness pushes. See below for why. 10195 st := tc.store.cfg.Settings 10196 st.Manual.Store(true) 10197 concurrency.LockTableLivenessPushDelay.Override(&st.SV, 24*time.Hour) 10198 10199 // Write a value to a key A. 10200 key := roachpb.Key("a") 10201 initVal := incrementArgs(key, 1) 10202 if _, pErr := tc.SendWrapped(initVal); pErr != nil { 10203 t.Fatalf("unexpected error: %s", pErr) 10204 } 10205 10206 // Create a new transaction and perform a "for update" scan. This should 10207 // acquire unreplicated, exclusive locks on the key. 10208 txn := newTransaction("test", key, 1, tc.Clock()) 10209 var ba roachpb.BatchRequest 10210 ba.Header = roachpb.Header{Txn: txn} 10211 ba.Add(roachpb.NewScan(key, key.Next(), true /* forUpdate */)) 10212 if _, pErr := tc.Sender().Send(ctx, ba); pErr != nil { 10213 t.Fatalf("unexpected error: %s", pErr) 10214 } 10215 10216 // Try to write to the key outside of this transaction. Should wait on the 10217 // "for update" lock in a lock wait-queue in the concurrency manager until 10218 // the lock is released. If we don't notify the lock-table when the first 10219 // txn eventually commits, this will wait for much longer than it needs to. 10220 // It will eventually push the first txn and notice that it has committed. 10221 // However, we've disabled liveness pushes in this test, so the test will 10222 // block forever without the lock-table notification. We didn't need to 10223 // disable deadlock detection pushes because this is a non-transactional 10224 // write, so it never performs them. 10225 pErrC := make(chan *roachpb.Error, 1) 10226 go func() { 10227 otherWrite := incrementArgs(key, 1) 10228 _, pErr := tc.SendWrapped(otherWrite) 10229 pErrC <- pErr 10230 }() 10231 10232 // The second write should not complete. 10233 select { 10234 case pErr := <-pErrC: 10235 t.Fatalf("write unexpectedly finished with error: %v", pErr) 10236 case <-time.After(5 * time.Millisecond): 10237 } 10238 10239 // Update the locked value and commit in a single batch. This should release 10240 // the "for update" lock. 10241 ba = roachpb.BatchRequest{} 10242 incArgs := incrementArgs(key, 1) 10243 et, etH := endTxnArgs(txn, true /* commit */) 10244 et.Require1PC = true 10245 et.LockSpans = []roachpb.Span{{Key: key, EndKey: key.Next()}} 10246 ba.Header = etH 10247 ba.Add(incArgs, &et) 10248 assignSeqNumsForReqs(txn, incArgs, &et) 10249 if _, pErr := tc.Sender().Send(ctx, ba); pErr != nil { 10250 t.Fatalf("unexpected error: %s", pErr) 10251 } 10252 10253 // The second write should complete. 10254 pErr := <-pErrC 10255 if pErr != nil { 10256 t.Fatalf("unexpected error: %s", pErr) 10257 } 10258 } 10259 10260 func TestReplicaShouldCampaignOnWake(t *testing.T) { 10261 defer leaktest.AfterTest(t)() 10262 10263 const storeID = roachpb.StoreID(1) 10264 10265 myLease := roachpb.Lease{ 10266 Replica: roachpb.ReplicaDescriptor{ 10267 StoreID: storeID, 10268 }, 10269 } 10270 otherLease := roachpb.Lease{ 10271 Replica: roachpb.ReplicaDescriptor{ 10272 StoreID: roachpb.StoreID(2), 10273 }, 10274 } 10275 10276 followerWithoutLeader := raft.Status{BasicStatus: raft.BasicStatus{ 10277 SoftState: raft.SoftState{ 10278 RaftState: raft.StateFollower, 10279 Lead: 0, 10280 }, 10281 }} 10282 followerWithLeader := raft.Status{BasicStatus: raft.BasicStatus{ 10283 SoftState: raft.SoftState{ 10284 RaftState: raft.StateFollower, 10285 Lead: 1, 10286 }, 10287 }} 10288 candidate := raft.Status{BasicStatus: raft.BasicStatus{ 10289 SoftState: raft.SoftState{ 10290 RaftState: raft.StateCandidate, 10291 Lead: 0, 10292 }, 10293 }} 10294 leader := raft.Status{BasicStatus: raft.BasicStatus{ 10295 SoftState: raft.SoftState{ 10296 RaftState: raft.StateLeader, 10297 Lead: 1, 10298 }, 10299 }} 10300 10301 tests := []struct { 10302 leaseStatus kvserverpb.LeaseStatus 10303 lease roachpb.Lease 10304 raftStatus raft.Status 10305 exp bool 10306 }{ 10307 {kvserverpb.LeaseStatus{State: kvserverpb.LeaseState_VALID}, myLease, followerWithoutLeader, true}, 10308 {kvserverpb.LeaseStatus{State: kvserverpb.LeaseState_VALID}, otherLease, followerWithoutLeader, false}, 10309 {kvserverpb.LeaseStatus{State: kvserverpb.LeaseState_VALID}, myLease, followerWithLeader, false}, 10310 {kvserverpb.LeaseStatus{State: kvserverpb.LeaseState_VALID}, otherLease, followerWithLeader, false}, 10311 {kvserverpb.LeaseStatus{State: kvserverpb.LeaseState_VALID}, myLease, candidate, false}, 10312 {kvserverpb.LeaseStatus{State: kvserverpb.LeaseState_VALID}, otherLease, candidate, false}, 10313 {kvserverpb.LeaseStatus{State: kvserverpb.LeaseState_VALID}, myLease, leader, false}, 10314 {kvserverpb.LeaseStatus{State: kvserverpb.LeaseState_VALID}, otherLease, leader, false}, 10315 10316 {kvserverpb.LeaseStatus{State: kvserverpb.LeaseState_EXPIRED}, myLease, followerWithoutLeader, true}, 10317 {kvserverpb.LeaseStatus{State: kvserverpb.LeaseState_EXPIRED}, otherLease, followerWithoutLeader, true}, 10318 {kvserverpb.LeaseStatus{State: kvserverpb.LeaseState_EXPIRED}, myLease, followerWithLeader, false}, 10319 {kvserverpb.LeaseStatus{State: kvserverpb.LeaseState_EXPIRED}, otherLease, followerWithLeader, false}, 10320 {kvserverpb.LeaseStatus{State: kvserverpb.LeaseState_EXPIRED}, myLease, candidate, false}, 10321 {kvserverpb.LeaseStatus{State: kvserverpb.LeaseState_EXPIRED}, otherLease, candidate, false}, 10322 {kvserverpb.LeaseStatus{State: kvserverpb.LeaseState_EXPIRED}, myLease, leader, false}, 10323 {kvserverpb.LeaseStatus{State: kvserverpb.LeaseState_EXPIRED}, otherLease, leader, false}, 10324 } 10325 10326 for i, test := range tests { 10327 v := shouldCampaignOnWake(test.leaseStatus, test.lease, storeID, test.raftStatus) 10328 if v != test.exp { 10329 t.Errorf("%d: expected %v but got %v", i, test.exp, v) 10330 } 10331 } 10332 } 10333 10334 func TestRangeStatsRequest(t *testing.T) { 10335 defer leaktest.AfterTest(t)() 10336 10337 tc := testContext{} 10338 ctx := context.Background() 10339 stopper := stop.NewStopper() 10340 defer stopper.Stop(ctx) 10341 tc.Start(t, stopper) 10342 10343 keyPrefix := roachpb.RKey("dummy-prefix") 10344 10345 // Write some random data to the range and verify that a RangeStatsRequest 10346 // returns the same MVCC stats as the replica's in-memory state. 10347 WriteRandomDataToRange(t, tc.store, tc.repl.RangeID, keyPrefix) 10348 expMS := tc.repl.GetMVCCStats() 10349 res, pErr := kv.SendWrappedWith(ctx, tc.Sender(), roachpb.Header{ 10350 RangeID: tc.repl.RangeID, 10351 }, &roachpb.RangeStatsRequest{}) 10352 if pErr != nil { 10353 t.Fatal(pErr) 10354 } 10355 resMS := res.(*roachpb.RangeStatsResponse).MVCCStats 10356 require.Equal(t, expMS, resMS) 10357 10358 // Write another key to the range and verify that the MVCC stats returned 10359 // by a RangeStatsRequest reflect the additional key. 10360 key := append(keyPrefix, roachpb.RKey("123")...) 10361 if err := tc.store.DB().Put(ctx, key, "123"); err != nil { 10362 t.Fatal(err) 10363 } 10364 res, pErr = kv.SendWrappedWith(ctx, tc.Sender(), roachpb.Header{ 10365 RangeID: tc.repl.RangeID, 10366 }, &roachpb.RangeStatsRequest{}) 10367 if pErr != nil { 10368 t.Fatal(pErr) 10369 } 10370 resMS = res.(*roachpb.RangeStatsResponse).MVCCStats 10371 // Only verify the update is reflected in the key/value counts. Verifying 10372 // the byte count would couple this test too tightly to our encoding scheme. 10373 require.Equal(t, expMS.KeyCount+1, resMS.KeyCount) 10374 require.Equal(t, expMS.ValCount+1, resMS.ValCount) 10375 require.Equal(t, expMS.LiveCount+1, resMS.LiveCount) 10376 } 10377 10378 // TestTxnRecordLifecycleTransitions tests various scenarios where a transaction 10379 // attempts to create or modify its transaction record. It verifies that 10380 // finalized transaction records can never be recreated, even after they have 10381 // been GCed. It also verifies that the effect of transaction pushes is not lost 10382 // even when the push occurred before the transaction record was created. 10383 func TestTxnRecordLifecycleTransitions(t *testing.T) { 10384 defer leaktest.AfterTest(t)() 10385 10386 manual := hlc.NewManualClock(123) 10387 tc := testContext{manualClock: manual} 10388 tsc := TestStoreConfig(hlc.NewClock(manual.UnixNano, time.Nanosecond)) 10389 tsc.TestingKnobs.DisableGCQueue = true 10390 tsc.TestingKnobs.DontRetryPushTxnFailures = true 10391 tsc.TestingKnobs.DontRecoverIndeterminateCommits = true 10392 ctx := context.Background() 10393 stopper := stop.NewStopper() 10394 defer stopper.Stop(ctx) 10395 tc.StartWithStoreConfig(t, stopper, tsc) 10396 10397 pusher := newTransaction("test", roachpb.Key("a"), 1, tc.Clock()) 10398 pusher.Priority = enginepb.MaxTxnPriority 10399 10400 type runFunc func(*roachpb.Transaction, hlc.Timestamp) error 10401 sendWrappedWithErr := func(h roachpb.Header, args roachpb.Request) error { 10402 _, pErr := kv.SendWrappedWith(ctx, tc.Sender(), h, args) 10403 return pErr.GoError() 10404 } 10405 10406 intents := []roachpb.Span{{Key: roachpb.Key("a")}} 10407 inFlightWrites := []roachpb.SequencedWrite{{Key: roachpb.Key("a"), Sequence: 1}} 10408 otherInFlightWrites := []roachpb.SequencedWrite{{Key: roachpb.Key("b"), Sequence: 2}} 10409 10410 type verifyFunc func(*roachpb.Transaction, hlc.Timestamp) roachpb.TransactionRecord 10411 noTxnRecord := verifyFunc(nil) 10412 txnWithoutChanges := func(txn *roachpb.Transaction, _ hlc.Timestamp) roachpb.TransactionRecord { 10413 return txn.AsRecord() 10414 } 10415 txnWithStatus := func(status roachpb.TransactionStatus) verifyFunc { 10416 return func(txn *roachpb.Transaction, _ hlc.Timestamp) roachpb.TransactionRecord { 10417 record := txn.AsRecord() 10418 record.Status = status 10419 return record 10420 } 10421 } 10422 txnWithStagingStatusAndInFlightWrites := func(txn *roachpb.Transaction, now hlc.Timestamp) roachpb.TransactionRecord { 10423 record := txnWithStatus(roachpb.STAGING)(txn, now) 10424 record.InFlightWrites = inFlightWrites 10425 return record 10426 } 10427 10428 testCases := []struct { 10429 name string 10430 setup runFunc // all three functions are provided the same txn and timestamp 10431 run runFunc 10432 expTxn verifyFunc 10433 expError string // regexp pattern to match on run error, if not empty 10434 disableTxnAutoGC bool // disables auto txn record GC 10435 }{ 10436 { 10437 name: "heartbeat transaction", 10438 run: func(txn *roachpb.Transaction, now hlc.Timestamp) error { 10439 hb, hbH := heartbeatArgs(txn, now) 10440 return sendWrappedWithErr(hbH, &hb) 10441 }, 10442 expTxn: func(txn *roachpb.Transaction, hbTs hlc.Timestamp) roachpb.TransactionRecord { 10443 record := txn.AsRecord() 10444 record.LastHeartbeat.Forward(hbTs) 10445 return record 10446 }, 10447 }, 10448 { 10449 name: "end transaction (stage)", 10450 run: func(txn *roachpb.Transaction, _ hlc.Timestamp) error { 10451 et, etH := endTxnArgs(txn, true /* commit */) 10452 et.InFlightWrites = inFlightWrites 10453 return sendWrappedWithErr(etH, &et) 10454 }, 10455 expTxn: txnWithStagingStatusAndInFlightWrites, 10456 }, 10457 { 10458 name: "end transaction (abort)", 10459 run: func(txn *roachpb.Transaction, _ hlc.Timestamp) error { 10460 et, etH := endTxnArgs(txn, false /* commit */) 10461 return sendWrappedWithErr(etH, &et) 10462 }, 10463 // The transaction record will be eagerly GC-ed. 10464 expTxn: noTxnRecord, 10465 }, 10466 { 10467 name: "end transaction (commit)", 10468 run: func(txn *roachpb.Transaction, _ hlc.Timestamp) error { 10469 et, etH := endTxnArgs(txn, true /* commit */) 10470 return sendWrappedWithErr(etH, &et) 10471 }, 10472 // The transaction record will be eagerly GC-ed. 10473 expTxn: noTxnRecord, 10474 }, 10475 { 10476 name: "end transaction (abort) without eager gc", 10477 run: func(txn *roachpb.Transaction, _ hlc.Timestamp) error { 10478 et, etH := endTxnArgs(txn, false /* commit */) 10479 return sendWrappedWithErr(etH, &et) 10480 }, 10481 expTxn: txnWithStatus(roachpb.ABORTED), 10482 disableTxnAutoGC: true, 10483 }, 10484 { 10485 name: "end transaction (commit) without eager gc", 10486 run: func(txn *roachpb.Transaction, _ hlc.Timestamp) error { 10487 et, etH := endTxnArgs(txn, true /* commit */) 10488 return sendWrappedWithErr(etH, &et) 10489 }, 10490 expTxn: txnWithStatus(roachpb.COMMITTED), 10491 disableTxnAutoGC: true, 10492 }, 10493 { 10494 name: "push transaction (timestamp)", 10495 run: func(txn *roachpb.Transaction, now hlc.Timestamp) error { 10496 pt := pushTxnArgs(pusher, txn, roachpb.PUSH_TIMESTAMP) 10497 pt.PushTo = now 10498 return sendWrappedWithErr(roachpb.Header{}, &pt) 10499 }, 10500 // If no transaction record exists, the push (timestamp) request does 10501 // not create one. It only records its push in the tscache. 10502 expTxn: noTxnRecord, 10503 }, 10504 { 10505 name: "push transaction (abort)", 10506 run: func(txn *roachpb.Transaction, _ hlc.Timestamp) error { 10507 pt := pushTxnArgs(pusher, txn, roachpb.PUSH_ABORT) 10508 return sendWrappedWithErr(roachpb.Header{}, &pt) 10509 }, 10510 // If no transaction record exists, the push (abort) request does 10511 // not create one. It only records its push in the tscache. 10512 expTxn: noTxnRecord, 10513 }, 10514 { 10515 // Should not happen because RecoverTxn requests are only 10516 // sent after observing a STAGING transaction record. 10517 name: "recover transaction (implicitly committed)", 10518 run: func(txn *roachpb.Transaction, _ hlc.Timestamp) error { 10519 rt := recoverTxnArgs(txn, true /* implicitlyCommitted */) 10520 return sendWrappedWithErr(roachpb.Header{}, &rt) 10521 }, 10522 expError: "txn record synthesized with non-ABORTED status", 10523 expTxn: noTxnRecord, 10524 }, 10525 { 10526 // Should not happen because RecoverTxn requests are only 10527 // sent after observing a STAGING transaction record. 10528 name: "recover transaction (not implicitly committed)", 10529 run: func(txn *roachpb.Transaction, _ hlc.Timestamp) error { 10530 rt := recoverTxnArgs(txn, false /* implicitlyCommitted */) 10531 return sendWrappedWithErr(roachpb.Header{}, &rt) 10532 }, 10533 expError: "txn record synthesized with non-ABORTED status", 10534 expTxn: noTxnRecord, 10535 }, 10536 { 10537 name: "heartbeat transaction after heartbeat transaction", 10538 setup: func(txn *roachpb.Transaction, _ hlc.Timestamp) error { 10539 hb, hbH := heartbeatArgs(txn, txn.MinTimestamp) 10540 return sendWrappedWithErr(hbH, &hb) 10541 }, 10542 run: func(txn *roachpb.Transaction, now hlc.Timestamp) error { 10543 hb, hbH := heartbeatArgs(txn, now.Add(0, 5)) 10544 return sendWrappedWithErr(hbH, &hb) 10545 }, 10546 expTxn: func(txn *roachpb.Transaction, hbTs hlc.Timestamp) roachpb.TransactionRecord { 10547 record := txn.AsRecord() 10548 record.LastHeartbeat.Forward(hbTs.Add(0, 5)) 10549 return record 10550 }, 10551 }, 10552 { 10553 name: "heartbeat transaction with epoch bump after heartbeat transaction", 10554 setup: func(txn *roachpb.Transaction, _ hlc.Timestamp) error { 10555 hb, hbH := heartbeatArgs(txn, txn.MinTimestamp) 10556 return sendWrappedWithErr(hbH, &hb) 10557 }, 10558 run: func(txn *roachpb.Transaction, now hlc.Timestamp) error { 10559 clone := txn.Clone() 10560 clone.Restart(-1, 0, now) 10561 hb, hbH := heartbeatArgs(clone, now.Add(0, 5)) 10562 return sendWrappedWithErr(hbH, &hb) 10563 }, 10564 expTxn: func(txn *roachpb.Transaction, hbTs hlc.Timestamp) roachpb.TransactionRecord { 10565 record := txn.AsRecord() 10566 // NOTE: the HeartbeatTxnRequest with the larger epoch does not 10567 // update any fields other than LastHeartbeat. This is fine, 10568 // although it's arguably not optimal. 10569 // record.Epoch = txn.Epoch + 1 10570 // record.WriteTimestamp.Forward(hbTs) 10571 record.LastHeartbeat.Forward(hbTs.Add(0, 5)) 10572 return record 10573 }, 10574 }, 10575 { 10576 name: "end transaction (stage) after heartbeat transaction", 10577 setup: func(txn *roachpb.Transaction, _ hlc.Timestamp) error { 10578 hb, hbH := heartbeatArgs(txn, txn.MinTimestamp) 10579 return sendWrappedWithErr(hbH, &hb) 10580 }, 10581 run: func(txn *roachpb.Transaction, _ hlc.Timestamp) error { 10582 et, etH := endTxnArgs(txn, true /* commit */) 10583 et.InFlightWrites = inFlightWrites 10584 return sendWrappedWithErr(etH, &et) 10585 }, 10586 expTxn: txnWithStagingStatusAndInFlightWrites, 10587 }, 10588 { 10589 name: "end transaction (abort) after heartbeat transaction", 10590 setup: func(txn *roachpb.Transaction, _ hlc.Timestamp) error { 10591 hb, hbH := heartbeatArgs(txn, txn.MinTimestamp) 10592 return sendWrappedWithErr(hbH, &hb) 10593 }, 10594 run: func(txn *roachpb.Transaction, _ hlc.Timestamp) error { 10595 et, etH := endTxnArgs(txn, false /* commit */) 10596 return sendWrappedWithErr(etH, &et) 10597 }, 10598 // The transaction record will be eagerly GC-ed. 10599 expTxn: noTxnRecord, 10600 }, 10601 { 10602 name: "end transaction (commit) after heartbeat transaction", 10603 setup: func(txn *roachpb.Transaction, _ hlc.Timestamp) error { 10604 hb, hbH := heartbeatArgs(txn, txn.MinTimestamp) 10605 return sendWrappedWithErr(hbH, &hb) 10606 }, 10607 run: func(txn *roachpb.Transaction, _ hlc.Timestamp) error { 10608 et, etH := endTxnArgs(txn, true /* commit */) 10609 return sendWrappedWithErr(etH, &et) 10610 }, 10611 // The transaction record will be eagerly GC-ed. 10612 expTxn: noTxnRecord, 10613 }, 10614 { 10615 name: "end transaction (abort) without eager gc after heartbeat transaction", 10616 setup: func(txn *roachpb.Transaction, _ hlc.Timestamp) error { 10617 hb, hbH := heartbeatArgs(txn, txn.MinTimestamp) 10618 return sendWrappedWithErr(hbH, &hb) 10619 }, 10620 run: func(txn *roachpb.Transaction, _ hlc.Timestamp) error { 10621 et, etH := endTxnArgs(txn, false /* commit */) 10622 return sendWrappedWithErr(etH, &et) 10623 }, 10624 expTxn: txnWithStatus(roachpb.ABORTED), 10625 disableTxnAutoGC: true, 10626 }, 10627 { 10628 name: "end transaction (commit) without eager gc after heartbeat transaction", 10629 setup: func(txn *roachpb.Transaction, _ hlc.Timestamp) error { 10630 hb, hbH := heartbeatArgs(txn, txn.MinTimestamp) 10631 return sendWrappedWithErr(hbH, &hb) 10632 }, 10633 run: func(txn *roachpb.Transaction, _ hlc.Timestamp) error { 10634 et, etH := endTxnArgs(txn, true /* commit */) 10635 return sendWrappedWithErr(etH, &et) 10636 }, 10637 expTxn: txnWithStatus(roachpb.COMMITTED), 10638 disableTxnAutoGC: true, 10639 }, 10640 { 10641 name: "push transaction (timestamp) after heartbeat transaction", 10642 setup: func(txn *roachpb.Transaction, _ hlc.Timestamp) error { 10643 hb, hbH := heartbeatArgs(txn, txn.MinTimestamp) 10644 return sendWrappedWithErr(hbH, &hb) 10645 }, 10646 run: func(txn *roachpb.Transaction, now hlc.Timestamp) error { 10647 pt := pushTxnArgs(pusher, txn, roachpb.PUSH_TIMESTAMP) 10648 pt.PushTo = now 10649 return sendWrappedWithErr(roachpb.Header{}, &pt) 10650 }, 10651 expTxn: func(txn *roachpb.Transaction, pushTs hlc.Timestamp) roachpb.TransactionRecord { 10652 record := txn.AsRecord() 10653 record.WriteTimestamp.Forward(pushTs) 10654 record.Priority = pusher.Priority - 1 10655 return record 10656 }, 10657 }, 10658 { 10659 name: "push transaction (abort) after heartbeat transaction", 10660 setup: func(txn *roachpb.Transaction, _ hlc.Timestamp) error { 10661 hb, hbH := heartbeatArgs(txn, txn.MinTimestamp) 10662 return sendWrappedWithErr(hbH, &hb) 10663 }, 10664 run: func(txn *roachpb.Transaction, _ hlc.Timestamp) error { 10665 pt := pushTxnArgs(pusher, txn, roachpb.PUSH_ABORT) 10666 return sendWrappedWithErr(roachpb.Header{}, &pt) 10667 }, 10668 expTxn: func(txn *roachpb.Transaction, now hlc.Timestamp) roachpb.TransactionRecord { 10669 record := txnWithStatus(roachpb.ABORTED)(txn, now) 10670 record.Priority = pusher.Priority - 1 10671 return record 10672 }, 10673 }, 10674 { 10675 // Staging transaction records can still be heartbeat. 10676 name: "heartbeat transaction after end transaction (stage)", 10677 setup: func(txn *roachpb.Transaction, _ hlc.Timestamp) error { 10678 et, etH := endTxnArgs(txn, true /* commit */) 10679 et.InFlightWrites = inFlightWrites 10680 return sendWrappedWithErr(etH, &et) 10681 }, 10682 run: func(txn *roachpb.Transaction, now hlc.Timestamp) error { 10683 hb, hbH := heartbeatArgs(txn, now) 10684 return sendWrappedWithErr(hbH, &hb) 10685 }, 10686 expTxn: func(txn *roachpb.Transaction, hbTs hlc.Timestamp) roachpb.TransactionRecord { 10687 record := txnWithStagingStatusAndInFlightWrites(txn, hbTs) 10688 record.LastHeartbeat.Forward(hbTs) 10689 return record 10690 }, 10691 }, 10692 { 10693 // Should not be possible outside of replays or re-issues of the 10694 // same request, but also not prevented. If not a re-issue, the 10695 // second stage will always either bump the commit timestamp or 10696 // bump the epoch. 10697 name: "end transaction (stage) after end transaction (stage)", 10698 setup: func(txn *roachpb.Transaction, _ hlc.Timestamp) error { 10699 et, etH := endTxnArgs(txn, true /* commit */) 10700 et.InFlightWrites = inFlightWrites 10701 return sendWrappedWithErr(etH, &et) 10702 }, 10703 run: func(txn *roachpb.Transaction, _ hlc.Timestamp) error { 10704 et, etH := endTxnArgs(txn, true /* commit */) 10705 et.InFlightWrites = inFlightWrites 10706 return sendWrappedWithErr(etH, &et) 10707 }, 10708 expTxn: txnWithStagingStatusAndInFlightWrites, 10709 }, 10710 { 10711 // Case of a transaction that refreshed after an unsuccessful 10712 // implicit commit. If the refresh is successful then the 10713 // transaction coordinator can attempt the implicit commit again. 10714 name: "end transaction (stage) with timestamp increase after end transaction (stage)", 10715 setup: func(txn *roachpb.Transaction, _ hlc.Timestamp) error { 10716 et, etH := endTxnArgs(txn, true /* commit */) 10717 et.InFlightWrites = inFlightWrites 10718 return sendWrappedWithErr(etH, &et) 10719 }, 10720 run: func(txn *roachpb.Transaction, now hlc.Timestamp) error { 10721 clone := txn.Clone() 10722 clone.ReadTimestamp.Forward(now) 10723 clone.WriteTimestamp.Forward(now) 10724 et, etH := endTxnArgs(clone, true /* commit */) 10725 // Add different in-flight writes to test whether they are 10726 // replaced by the second EndTxn request. 10727 et.InFlightWrites = otherInFlightWrites 10728 return sendWrappedWithErr(etH, &et) 10729 }, 10730 expTxn: func(txn *roachpb.Transaction, now hlc.Timestamp) roachpb.TransactionRecord { 10731 record := txnWithStagingStatusAndInFlightWrites(txn, now) 10732 record.InFlightWrites = otherInFlightWrites 10733 record.WriteTimestamp.Forward(now) 10734 return record 10735 }, 10736 }, 10737 { 10738 // Case of a transaction that restarted after an unsuccessful 10739 // implicit commit. The transaction coordinator can attempt an 10740 // implicit commit in the next epoch. 10741 name: "end transaction (stage) with epoch bump after end transaction (stage)", 10742 setup: func(txn *roachpb.Transaction, _ hlc.Timestamp) error { 10743 et, etH := endTxnArgs(txn, true /* commit */) 10744 et.InFlightWrites = inFlightWrites 10745 return sendWrappedWithErr(etH, &et) 10746 }, 10747 run: func(txn *roachpb.Transaction, now hlc.Timestamp) error { 10748 clone := txn.Clone() 10749 clone.Restart(-1, 0, now) 10750 et, etH := endTxnArgs(clone, true /* commit */) 10751 // Add different in-flight writes to test whether they are 10752 // replaced by the second EndTxn request. 10753 et.InFlightWrites = otherInFlightWrites 10754 return sendWrappedWithErr(etH, &et) 10755 }, 10756 expTxn: func(txn *roachpb.Transaction, now hlc.Timestamp) roachpb.TransactionRecord { 10757 record := txnWithStagingStatusAndInFlightWrites(txn, now) 10758 record.InFlightWrites = otherInFlightWrites 10759 record.Epoch = txn.Epoch + 1 10760 record.WriteTimestamp.Forward(now) 10761 return record 10762 }, 10763 }, 10764 { 10765 // Case of a rollback after an unsuccessful implicit commit. 10766 name: "end transaction (abort) after end transaction (stage)", 10767 setup: func(txn *roachpb.Transaction, _ hlc.Timestamp) error { 10768 et, etH := endTxnArgs(txn, true /* commit */) 10769 et.InFlightWrites = inFlightWrites 10770 return sendWrappedWithErr(etH, &et) 10771 }, 10772 run: func(txn *roachpb.Transaction, _ hlc.Timestamp) error { 10773 et, etH := endTxnArgs(txn, false /* commit */) 10774 return sendWrappedWithErr(etH, &et) 10775 }, 10776 // The transaction record will be eagerly GC-ed. 10777 expTxn: noTxnRecord, 10778 }, 10779 { 10780 // Case of making a commit "explicit" after a successful implicit commit. 10781 name: "end transaction (commit) after end transaction (stage)", 10782 setup: func(txn *roachpb.Transaction, _ hlc.Timestamp) error { 10783 et, etH := endTxnArgs(txn, true /* commit */) 10784 et.InFlightWrites = inFlightWrites 10785 return sendWrappedWithErr(etH, &et) 10786 }, 10787 run: func(txn *roachpb.Transaction, _ hlc.Timestamp) error { 10788 et, etH := endTxnArgs(txn, true /* commit */) 10789 return sendWrappedWithErr(etH, &et) 10790 }, 10791 // The transaction record will be eagerly GC-ed. 10792 expTxn: noTxnRecord, 10793 }, 10794 { 10795 name: "end transaction (abort) without eager gc after end transaction (stage)", 10796 setup: func(txn *roachpb.Transaction, _ hlc.Timestamp) error { 10797 et, etH := endTxnArgs(txn, true /* commit */) 10798 et.InFlightWrites = inFlightWrites 10799 return sendWrappedWithErr(etH, &et) 10800 }, 10801 run: func(txn *roachpb.Transaction, _ hlc.Timestamp) error { 10802 et, etH := endTxnArgs(txn, false /* commit */) 10803 return sendWrappedWithErr(etH, &et) 10804 }, 10805 expTxn: txnWithStatus(roachpb.ABORTED), 10806 disableTxnAutoGC: true, 10807 }, 10808 { 10809 name: "end transaction (commit) without eager gc after end transaction (stage)", 10810 setup: func(txn *roachpb.Transaction, _ hlc.Timestamp) error { 10811 et, etH := endTxnArgs(txn, true /* commit */) 10812 et.InFlightWrites = inFlightWrites 10813 return sendWrappedWithErr(etH, &et) 10814 }, 10815 run: func(txn *roachpb.Transaction, _ hlc.Timestamp) error { 10816 et, etH := endTxnArgs(txn, true /* commit */) 10817 return sendWrappedWithErr(etH, &et) 10818 }, 10819 expTxn: txnWithStatus(roachpb.COMMITTED), 10820 disableTxnAutoGC: true, 10821 }, 10822 { 10823 name: "push transaction (timestamp) after end transaction (stage)", 10824 setup: func(txn *roachpb.Transaction, _ hlc.Timestamp) error { 10825 et, etH := endTxnArgs(txn, true /* commit */) 10826 et.InFlightWrites = inFlightWrites 10827 return sendWrappedWithErr(etH, &et) 10828 }, 10829 run: func(txn *roachpb.Transaction, now hlc.Timestamp) error { 10830 pt := pushTxnArgs(pusher, txn, roachpb.PUSH_TIMESTAMP) 10831 pt.PushTo = now 10832 return sendWrappedWithErr(roachpb.Header{}, &pt) 10833 }, 10834 expError: "found txn in indeterminate STAGING state", 10835 expTxn: txnWithStagingStatusAndInFlightWrites, 10836 }, 10837 { 10838 name: "push transaction (abort) after end transaction (stage)", 10839 setup: func(txn *roachpb.Transaction, _ hlc.Timestamp) error { 10840 et, etH := endTxnArgs(txn, true /* commit */) 10841 et.InFlightWrites = inFlightWrites 10842 return sendWrappedWithErr(etH, &et) 10843 }, 10844 run: func(txn *roachpb.Transaction, _ hlc.Timestamp) error { 10845 pt := pushTxnArgs(pusher, txn, roachpb.PUSH_ABORT) 10846 return sendWrappedWithErr(roachpb.Header{}, &pt) 10847 }, 10848 expError: "found txn in indeterminate STAGING state", 10849 expTxn: txnWithStagingStatusAndInFlightWrites, 10850 }, 10851 { 10852 // The pushee attempted a parallel commit that failed, so it is now 10853 // re-writing new intents at higher timestamps. The push should not 10854 // consider the pushee to be staging. 10855 name: "push transaction (timestamp) after end transaction (stage) with outdated timestamp", 10856 setup: func(txn *roachpb.Transaction, _ hlc.Timestamp) error { 10857 et, etH := endTxnArgs(txn, true /* commit */) 10858 et.InFlightWrites = inFlightWrites 10859 return sendWrappedWithErr(etH, &et) 10860 }, 10861 run: func(txn *roachpb.Transaction, now hlc.Timestamp) error { 10862 clone := txn.Clone() 10863 clone.WriteTimestamp = clone.WriteTimestamp.Add(0, 1) 10864 pt := pushTxnArgs(pusher, clone, roachpb.PUSH_TIMESTAMP) 10865 pt.PushTo = now 10866 return sendWrappedWithErr(roachpb.Header{}, &pt) 10867 }, 10868 expTxn: func(txn *roachpb.Transaction, pushTs hlc.Timestamp) roachpb.TransactionRecord { 10869 record := txn.AsRecord() 10870 record.WriteTimestamp.Forward(pushTs) 10871 record.Priority = pusher.Priority - 1 10872 return record 10873 }, 10874 }, 10875 { 10876 // The pushee attempted a parallel commit that failed, so it is now 10877 // re-writing new intents at higher timestamps. The push should not 10878 // consider the pushee to be staging. 10879 name: "push transaction (abort) after end transaction (stage) with outdated timestamp", 10880 setup: func(txn *roachpb.Transaction, _ hlc.Timestamp) error { 10881 et, etH := endTxnArgs(txn, true /* commit */) 10882 et.InFlightWrites = inFlightWrites 10883 return sendWrappedWithErr(etH, &et) 10884 }, 10885 run: func(txn *roachpb.Transaction, now hlc.Timestamp) error { 10886 clone := txn.Clone() 10887 clone.WriteTimestamp = clone.WriteTimestamp.Add(0, 1) 10888 pt := pushTxnArgs(pusher, clone, roachpb.PUSH_ABORT) 10889 return sendWrappedWithErr(roachpb.Header{}, &pt) 10890 }, 10891 expTxn: func(txn *roachpb.Transaction, pushTs hlc.Timestamp) roachpb.TransactionRecord { 10892 record := txnWithStatus(roachpb.ABORTED)(txn, pushTs) 10893 record.WriteTimestamp = record.WriteTimestamp.Add(0, 1) 10894 record.Priority = pusher.Priority - 1 10895 return record 10896 }, 10897 }, 10898 { 10899 // The pushee attempted a parallel commit that failed, so it is now 10900 // writing new intents in a new epoch. The push should not consider 10901 // the pushee to be staging. 10902 name: "push transaction (timestamp) after end transaction (stage) with outdated epoch", 10903 setup: func(txn *roachpb.Transaction, _ hlc.Timestamp) error { 10904 et, etH := endTxnArgs(txn, true /* commit */) 10905 et.InFlightWrites = inFlightWrites 10906 return sendWrappedWithErr(etH, &et) 10907 }, 10908 run: func(txn *roachpb.Transaction, now hlc.Timestamp) error { 10909 clone := txn.Clone() 10910 clone.Restart(-1, 0, clone.WriteTimestamp.Add(0, 1)) 10911 pt := pushTxnArgs(pusher, clone, roachpb.PUSH_TIMESTAMP) 10912 pt.PushTo = now 10913 return sendWrappedWithErr(roachpb.Header{}, &pt) 10914 }, 10915 expTxn: func(txn *roachpb.Transaction, pushTs hlc.Timestamp) roachpb.TransactionRecord { 10916 record := txn.AsRecord() 10917 record.Epoch = txn.Epoch + 1 10918 record.WriteTimestamp.Forward(pushTs) 10919 record.Priority = pusher.Priority - 1 10920 return record 10921 }, 10922 }, 10923 { 10924 // The pushee attempted a parallel commit that failed, so it is now 10925 // writing new intents in a new epoch. The push should not consider 10926 // the pushee to be staging. 10927 name: "push transaction (abort) after end transaction (stage) with outdated epoch", 10928 setup: func(txn *roachpb.Transaction, _ hlc.Timestamp) error { 10929 et, etH := endTxnArgs(txn, true /* commit */) 10930 et.InFlightWrites = inFlightWrites 10931 return sendWrappedWithErr(etH, &et) 10932 }, 10933 run: func(txn *roachpb.Transaction, now hlc.Timestamp) error { 10934 clone := txn.Clone() 10935 clone.Restart(-1, 0, clone.WriteTimestamp.Add(0, 1)) 10936 pt := pushTxnArgs(pusher, clone, roachpb.PUSH_ABORT) 10937 return sendWrappedWithErr(roachpb.Header{}, &pt) 10938 }, 10939 expTxn: func(txn *roachpb.Transaction, pushTs hlc.Timestamp) roachpb.TransactionRecord { 10940 record := txnWithStatus(roachpb.ABORTED)(txn, pushTs) 10941 record.Epoch = txn.Epoch + 1 10942 record.WriteTimestamp = record.WriteTimestamp.Add(0, 1) 10943 record.Priority = pusher.Priority - 1 10944 return record 10945 }, 10946 }, 10947 { 10948 name: "heartbeat transaction after end transaction (abort)", 10949 setup: func(txn *roachpb.Transaction, _ hlc.Timestamp) error { 10950 et, etH := endTxnArgs(txn, false /* commit */) 10951 return sendWrappedWithErr(etH, &et) 10952 }, 10953 run: func(txn *roachpb.Transaction, now hlc.Timestamp) error { 10954 hb, hbH := heartbeatArgs(txn, now) 10955 return sendWrappedWithErr(hbH, &hb) 10956 }, 10957 expError: "TransactionAbortedError(ABORT_REASON_ALREADY_COMMITTED_OR_ROLLED_BACK_POSSIBLE_REPLAY)", 10958 expTxn: noTxnRecord, 10959 }, 10960 { 10961 name: "heartbeat transaction with epoch bump after end transaction (abort)", 10962 setup: func(txn *roachpb.Transaction, _ hlc.Timestamp) error { 10963 et, etH := endTxnArgs(txn, false /* commit */) 10964 return sendWrappedWithErr(etH, &et) 10965 }, 10966 run: func(txn *roachpb.Transaction, now hlc.Timestamp) error { 10967 // Restart the transaction at a higher timestamp. This will 10968 // increment its ReadTimestamp as well. We used to check the GC 10969 // threshold against this timestamp instead of its minimum 10970 // timestamp. 10971 clone := txn.Clone() 10972 clone.Restart(-1, 0, now) 10973 hb, hbH := heartbeatArgs(clone, now) 10974 return sendWrappedWithErr(hbH, &hb) 10975 }, 10976 expError: "TransactionAbortedError(ABORT_REASON_ALREADY_COMMITTED_OR_ROLLED_BACK_POSSIBLE_REPLAY)", 10977 expTxn: noTxnRecord, 10978 }, 10979 { 10980 // Could be a replay or a retry. 10981 name: "end transaction (stage) after end transaction (abort)", 10982 setup: func(txn *roachpb.Transaction, _ hlc.Timestamp) error { 10983 et, etH := endTxnArgs(txn, false /* commit */) 10984 return sendWrappedWithErr(etH, &et) 10985 }, 10986 run: func(txn *roachpb.Transaction, _ hlc.Timestamp) error { 10987 et, etH := endTxnArgs(txn, true /* commit */) 10988 et.InFlightWrites = inFlightWrites 10989 return sendWrappedWithErr(etH, &et) 10990 }, 10991 expError: "TransactionAbortedError(ABORT_REASON_ALREADY_COMMITTED_OR_ROLLED_BACK_POSSIBLE_REPLAY)", 10992 expTxn: noTxnRecord, 10993 }, 10994 { 10995 // Could be a replay or a retry. 10996 name: "end transaction (abort) after end transaction (abort)", 10997 setup: func(txn *roachpb.Transaction, _ hlc.Timestamp) error { 10998 et, etH := endTxnArgs(txn, false /* commit */) 10999 return sendWrappedWithErr(etH, &et) 11000 }, 11001 run: func(txn *roachpb.Transaction, _ hlc.Timestamp) error { 11002 et, etH := endTxnArgs(txn, false /* commit */) 11003 return sendWrappedWithErr(etH, &et) 11004 }, 11005 expTxn: noTxnRecord, 11006 }, 11007 { 11008 // This case shouldn't happen in practice given a well-functioning 11009 // transaction coordinator, but is handled correctly nevertheless. 11010 name: "end transaction (commit) after end transaction (abort)", 11011 setup: func(txn *roachpb.Transaction, _ hlc.Timestamp) error { 11012 et, etH := endTxnArgs(txn, false /* commit */) 11013 return sendWrappedWithErr(etH, &et) 11014 }, 11015 run: func(txn *roachpb.Transaction, _ hlc.Timestamp) error { 11016 et, etH := endTxnArgs(txn, true /* commit */) 11017 return sendWrappedWithErr(etH, &et) 11018 }, 11019 expError: "TransactionAbortedError(ABORT_REASON_ALREADY_COMMITTED_OR_ROLLED_BACK_POSSIBLE_REPLAY)", 11020 expTxn: noTxnRecord, 11021 }, 11022 { 11023 name: "push transaction (timestamp) after end transaction (abort)", 11024 setup: func(txn *roachpb.Transaction, _ hlc.Timestamp) error { 11025 et, etH := endTxnArgs(txn, false /* commit */) 11026 return sendWrappedWithErr(etH, &et) 11027 }, 11028 run: func(txn *roachpb.Transaction, now hlc.Timestamp) error { 11029 pt := pushTxnArgs(pusher, txn, roachpb.PUSH_TIMESTAMP) 11030 pt.PushTo = now 11031 return sendWrappedWithErr(roachpb.Header{}, &pt) 11032 }, 11033 expTxn: noTxnRecord, 11034 }, 11035 { 11036 name: "push transaction (abort) after end transaction (abort)", 11037 setup: func(txn *roachpb.Transaction, _ hlc.Timestamp) error { 11038 et, etH := endTxnArgs(txn, false /* commit */) 11039 return sendWrappedWithErr(etH, &et) 11040 }, 11041 run: func(txn *roachpb.Transaction, _ hlc.Timestamp) error { 11042 pt := pushTxnArgs(pusher, txn, roachpb.PUSH_ABORT) 11043 return sendWrappedWithErr(roachpb.Header{}, &pt) 11044 }, 11045 expTxn: noTxnRecord, 11046 }, 11047 { 11048 name: "heartbeat transaction after end transaction (commit)", 11049 setup: func(txn *roachpb.Transaction, _ hlc.Timestamp) error { 11050 et, etH := endTxnArgs(txn, true /* commit */) 11051 return sendWrappedWithErr(etH, &et) 11052 }, 11053 run: func(txn *roachpb.Transaction, now hlc.Timestamp) error { 11054 hb, hbH := heartbeatArgs(txn, now) 11055 return sendWrappedWithErr(hbH, &hb) 11056 }, 11057 expError: "TransactionAbortedError(ABORT_REASON_ALREADY_COMMITTED_OR_ROLLED_BACK_POSSIBLE_REPLAY)", 11058 expTxn: noTxnRecord, 11059 }, 11060 { 11061 // Could be a replay or a retry. 11062 name: "end transaction (stage) after end transaction (commit)", 11063 setup: func(txn *roachpb.Transaction, _ hlc.Timestamp) error { 11064 et, etH := endTxnArgs(txn, true /* commit */) 11065 return sendWrappedWithErr(etH, &et) 11066 }, 11067 run: func(txn *roachpb.Transaction, _ hlc.Timestamp) error { 11068 et, etH := endTxnArgs(txn, true /* commit */) 11069 et.InFlightWrites = inFlightWrites 11070 return sendWrappedWithErr(etH, &et) 11071 }, 11072 expError: "TransactionAbortedError(ABORT_REASON_ALREADY_COMMITTED_OR_ROLLED_BACK_POSSIBLE_REPLAY)", 11073 expTxn: noTxnRecord, 11074 }, 11075 { 11076 // This case shouldn't happen in practice given a well-functioning 11077 // transaction coordinator, but is handled correctly nevertheless. 11078 name: "end transaction (abort) after end transaction (commit)", 11079 setup: func(txn *roachpb.Transaction, _ hlc.Timestamp) error { 11080 et, etH := endTxnArgs(txn, true /* commit */) 11081 return sendWrappedWithErr(etH, &et) 11082 }, 11083 run: func(txn *roachpb.Transaction, _ hlc.Timestamp) error { 11084 et, etH := endTxnArgs(txn, false /* commit */) 11085 return sendWrappedWithErr(etH, &et) 11086 }, 11087 expTxn: noTxnRecord, 11088 }, 11089 { 11090 // Could be a replay or a retry. 11091 name: "end transaction (commit) after end transaction (commit)", 11092 setup: func(txn *roachpb.Transaction, _ hlc.Timestamp) error { 11093 et, etH := endTxnArgs(txn, true /* commit */) 11094 return sendWrappedWithErr(etH, &et) 11095 }, 11096 run: func(txn *roachpb.Transaction, _ hlc.Timestamp) error { 11097 et, etH := endTxnArgs(txn, true /* commit */) 11098 return sendWrappedWithErr(etH, &et) 11099 }, 11100 expError: "TransactionAbortedError(ABORT_REASON_ALREADY_COMMITTED_OR_ROLLED_BACK_POSSIBLE_REPLAY)", 11101 expTxn: noTxnRecord, 11102 }, 11103 { 11104 name: "push transaction (timestamp) after end transaction (commit)", 11105 setup: func(txn *roachpb.Transaction, _ hlc.Timestamp) error { 11106 et, etH := endTxnArgs(txn, true /* commit */) 11107 return sendWrappedWithErr(etH, &et) 11108 }, 11109 run: func(txn *roachpb.Transaction, now hlc.Timestamp) error { 11110 pt := pushTxnArgs(pusher, txn, roachpb.PUSH_TIMESTAMP) 11111 pt.PushTo = now 11112 return sendWrappedWithErr(roachpb.Header{}, &pt) 11113 }, 11114 expTxn: noTxnRecord, 11115 }, 11116 { 11117 name: "push transaction (abort) after end transaction (commit)", 11118 setup: func(txn *roachpb.Transaction, _ hlc.Timestamp) error { 11119 et, etH := endTxnArgs(txn, true /* commit */) 11120 return sendWrappedWithErr(etH, &et) 11121 }, 11122 run: func(txn *roachpb.Transaction, _ hlc.Timestamp) error { 11123 pt := pushTxnArgs(pusher, txn, roachpb.PUSH_ABORT) 11124 return sendWrappedWithErr(roachpb.Header{}, &pt) 11125 }, 11126 expTxn: noTxnRecord, 11127 }, 11128 { 11129 name: "heartbeat transaction after end transaction (abort) without eager gc", 11130 setup: func(txn *roachpb.Transaction, _ hlc.Timestamp) error { 11131 et, etH := endTxnArgs(txn, false /* commit */) 11132 return sendWrappedWithErr(etH, &et) 11133 }, 11134 run: func(txn *roachpb.Transaction, now hlc.Timestamp) error { 11135 hb, hbH := heartbeatArgs(txn, now) 11136 return sendWrappedWithErr(hbH, &hb) 11137 }, 11138 // The heartbeat request won't throw an error, but also won't update the 11139 // transaction record. It will simply return the updated transaction state. 11140 // This is kind of strange, but also doesn't cause any issues. 11141 expError: "", 11142 expTxn: txnWithStatus(roachpb.ABORTED), 11143 disableTxnAutoGC: true, 11144 }, 11145 { 11146 // Could be a replay or a retry. 11147 name: "end transaction (stage) after end transaction (abort) without eager gc", 11148 setup: func(txn *roachpb.Transaction, _ hlc.Timestamp) error { 11149 et, etH := endTxnArgs(txn, false /* commit */) 11150 return sendWrappedWithErr(etH, &et) 11151 }, 11152 run: func(txn *roachpb.Transaction, _ hlc.Timestamp) error { 11153 et, etH := endTxnArgs(txn, true /* commit */) 11154 et.InFlightWrites = inFlightWrites 11155 return sendWrappedWithErr(etH, &et) 11156 }, 11157 expError: "TransactionAbortedError(ABORT_REASON_ABORTED_RECORD_FOUND)", 11158 expTxn: txnWithStatus(roachpb.ABORTED), 11159 disableTxnAutoGC: true, 11160 }, 11161 { 11162 // Could be a replay or a retry. 11163 name: "end transaction (abort) after end transaction (abort) without eager gc", 11164 setup: func(txn *roachpb.Transaction, _ hlc.Timestamp) error { 11165 et, etH := endTxnArgs(txn, false /* commit */) 11166 return sendWrappedWithErr(etH, &et) 11167 }, 11168 run: func(txn *roachpb.Transaction, _ hlc.Timestamp) error { 11169 et, etH := endTxnArgs(txn, false /* commit */) 11170 return sendWrappedWithErr(etH, &et) 11171 }, 11172 expTxn: txnWithStatus(roachpb.ABORTED), 11173 disableTxnAutoGC: true, 11174 }, 11175 { 11176 // This case shouldn't happen in practice given a well-functioning 11177 // transaction coordinator, but is handled correctly nevertheless. 11178 name: "end transaction (commit) after end transaction (abort) without eager gc", 11179 setup: func(txn *roachpb.Transaction, _ hlc.Timestamp) error { 11180 et, etH := endTxnArgs(txn, false /* commit */) 11181 return sendWrappedWithErr(etH, &et) 11182 }, 11183 run: func(txn *roachpb.Transaction, _ hlc.Timestamp) error { 11184 et, etH := endTxnArgs(txn, true /* commit */) 11185 return sendWrappedWithErr(etH, &et) 11186 }, 11187 expError: "TransactionAbortedError(ABORT_REASON_ABORTED_RECORD_FOUND)", 11188 expTxn: txnWithStatus(roachpb.ABORTED), 11189 disableTxnAutoGC: true, 11190 }, 11191 { 11192 name: "push transaction (timestamp) after end transaction (abort) without eager gc", 11193 setup: func(txn *roachpb.Transaction, _ hlc.Timestamp) error { 11194 et, etH := endTxnArgs(txn, false /* commit */) 11195 return sendWrappedWithErr(etH, &et) 11196 }, 11197 run: func(txn *roachpb.Transaction, now hlc.Timestamp) error { 11198 pt := pushTxnArgs(pusher, txn, roachpb.PUSH_TIMESTAMP) 11199 pt.PushTo = now 11200 return sendWrappedWithErr(roachpb.Header{}, &pt) 11201 }, 11202 expTxn: txnWithStatus(roachpb.ABORTED), 11203 disableTxnAutoGC: true, 11204 }, 11205 { 11206 name: "push transaction (abort) after end transaction (abort) without eager gc", 11207 setup: func(txn *roachpb.Transaction, _ hlc.Timestamp) error { 11208 et, etH := endTxnArgs(txn, false /* commit */) 11209 return sendWrappedWithErr(etH, &et) 11210 }, 11211 run: func(txn *roachpb.Transaction, _ hlc.Timestamp) error { 11212 pt := pushTxnArgs(pusher, txn, roachpb.PUSH_ABORT) 11213 return sendWrappedWithErr(roachpb.Header{}, &pt) 11214 }, 11215 expTxn: txnWithStatus(roachpb.ABORTED), 11216 disableTxnAutoGC: true, 11217 }, 11218 { 11219 name: "heartbeat transaction after end transaction (commit) without eager gc", 11220 setup: func(txn *roachpb.Transaction, _ hlc.Timestamp) error { 11221 et, etH := endTxnArgs(txn, true /* commit */) 11222 return sendWrappedWithErr(etH, &et) 11223 }, 11224 run: func(txn *roachpb.Transaction, now hlc.Timestamp) error { 11225 hb, hbH := heartbeatArgs(txn, now) 11226 return sendWrappedWithErr(hbH, &hb) 11227 }, 11228 // The heartbeat request won't throw an error, but also won't update the 11229 // transaction record. It will simply return the updated transaction state. 11230 // This is kind of strange, but also doesn't cause any issues. 11231 expError: "", 11232 expTxn: txnWithStatus(roachpb.COMMITTED), 11233 disableTxnAutoGC: true, 11234 }, 11235 { 11236 // Could be a replay or a retry. 11237 name: "end transaction (stage) after end transaction (commit) without eager gc", 11238 setup: func(txn *roachpb.Transaction, _ hlc.Timestamp) error { 11239 et, etH := endTxnArgs(txn, true /* commit */) 11240 return sendWrappedWithErr(etH, &et) 11241 }, 11242 run: func(txn *roachpb.Transaction, _ hlc.Timestamp) error { 11243 et, etH := endTxnArgs(txn, true /* commit */) 11244 et.InFlightWrites = inFlightWrites 11245 return sendWrappedWithErr(etH, &et) 11246 }, 11247 expError: "TransactionStatusError: already committed (REASON_TXN_COMMITTED)", 11248 expTxn: txnWithStatus(roachpb.COMMITTED), 11249 disableTxnAutoGC: true, 11250 }, 11251 { 11252 // This case shouldn't happen in practice given a well-functioning 11253 // transaction coordinator, but is handled correctly nevertheless. 11254 name: "end transaction (abort) after end transaction (commit) without eager gc", 11255 setup: func(txn *roachpb.Transaction, _ hlc.Timestamp) error { 11256 et, etH := endTxnArgs(txn, true /* commit */) 11257 return sendWrappedWithErr(etH, &et) 11258 }, 11259 run: func(txn *roachpb.Transaction, _ hlc.Timestamp) error { 11260 et, etH := endTxnArgs(txn, false /* commit */) 11261 return sendWrappedWithErr(etH, &et) 11262 }, 11263 expError: "TransactionStatusError: already committed (REASON_TXN_COMMITTED)", 11264 expTxn: txnWithStatus(roachpb.COMMITTED), 11265 disableTxnAutoGC: true, 11266 }, 11267 { 11268 // Could be a replay or a retry. 11269 name: "end transaction (commit) after end transaction (commit) without eager gc", 11270 setup: func(txn *roachpb.Transaction, _ hlc.Timestamp) error { 11271 et, etH := endTxnArgs(txn, true /* commit */) 11272 return sendWrappedWithErr(etH, &et) 11273 }, 11274 run: func(txn *roachpb.Transaction, _ hlc.Timestamp) error { 11275 et, etH := endTxnArgs(txn, true /* commit */) 11276 return sendWrappedWithErr(etH, &et) 11277 }, 11278 expError: "TransactionStatusError: already committed (REASON_TXN_COMMITTED)", 11279 expTxn: txnWithStatus(roachpb.COMMITTED), 11280 disableTxnAutoGC: true, 11281 }, 11282 { 11283 name: "push transaction (timestamp) after end transaction (commit) without eager gc", 11284 setup: func(txn *roachpb.Transaction, _ hlc.Timestamp) error { 11285 et, etH := endTxnArgs(txn, true /* commit */) 11286 return sendWrappedWithErr(etH, &et) 11287 }, 11288 run: func(txn *roachpb.Transaction, now hlc.Timestamp) error { 11289 pt := pushTxnArgs(pusher, txn, roachpb.PUSH_TIMESTAMP) 11290 pt.PushTo = now 11291 return sendWrappedWithErr(roachpb.Header{}, &pt) 11292 }, 11293 expTxn: txnWithStatus(roachpb.COMMITTED), 11294 disableTxnAutoGC: true, 11295 }, 11296 { 11297 name: "push transaction (abort) after end transaction (commit) without eager gc", 11298 setup: func(txn *roachpb.Transaction, _ hlc.Timestamp) error { 11299 et, etH := endTxnArgs(txn, true /* commit */) 11300 return sendWrappedWithErr(etH, &et) 11301 }, 11302 run: func(txn *roachpb.Transaction, _ hlc.Timestamp) error { 11303 pt := pushTxnArgs(pusher, txn, roachpb.PUSH_ABORT) 11304 return sendWrappedWithErr(roachpb.Header{}, &pt) 11305 }, 11306 expTxn: txnWithStatus(roachpb.COMMITTED), 11307 disableTxnAutoGC: true, 11308 }, 11309 { 11310 name: "heartbeat transaction after push transaction (timestamp)", 11311 setup: func(txn *roachpb.Transaction, now hlc.Timestamp) error { 11312 pt := pushTxnArgs(pusher, txn, roachpb.PUSH_TIMESTAMP) 11313 pt.PushTo = now 11314 return sendWrappedWithErr(roachpb.Header{}, &pt) 11315 }, 11316 run: func(txn *roachpb.Transaction, now hlc.Timestamp) error { 11317 hb, hbH := heartbeatArgs(txn, now.Add(0, 5)) 11318 return sendWrappedWithErr(hbH, &hb) 11319 }, 11320 expTxn: func(txn *roachpb.Transaction, now hlc.Timestamp) roachpb.TransactionRecord { 11321 record := txn.AsRecord() 11322 record.WriteTimestamp.Forward(now) 11323 record.LastHeartbeat.Forward(now.Add(0, 5)) 11324 return record 11325 }, 11326 }, 11327 { 11328 name: "end transaction (stage) after push transaction (timestamp)", 11329 setup: func(txn *roachpb.Transaction, now hlc.Timestamp) error { 11330 pt := pushTxnArgs(pusher, txn, roachpb.PUSH_TIMESTAMP) 11331 pt.PushTo = now 11332 return sendWrappedWithErr(roachpb.Header{}, &pt) 11333 }, 11334 run: func(txn *roachpb.Transaction, _ hlc.Timestamp) error { 11335 et, etH := endTxnArgs(txn, true /* commit */) 11336 et.InFlightWrites = inFlightWrites 11337 return sendWrappedWithErr(etH, &et) 11338 }, 11339 expError: "TransactionRetryError: retry txn (RETRY_SERIALIZABLE)", 11340 // The end transaction (stage) does not write a transaction record 11341 // if it hits a serializable retry error. 11342 expTxn: noTxnRecord, 11343 }, 11344 { 11345 name: "end transaction (abort) after push transaction (timestamp)", 11346 setup: func(txn *roachpb.Transaction, now hlc.Timestamp) error { 11347 pt := pushTxnArgs(pusher, txn, roachpb.PUSH_TIMESTAMP) 11348 pt.PushTo = now 11349 return sendWrappedWithErr(roachpb.Header{}, &pt) 11350 }, 11351 run: func(txn *roachpb.Transaction, _ hlc.Timestamp) error { 11352 et, etH := endTxnArgs(txn, false /* commit */) 11353 return sendWrappedWithErr(etH, &et) 11354 }, 11355 // The end transaction (abort) request succeeds and cleans up the 11356 // transaction record. 11357 expTxn: noTxnRecord, 11358 }, 11359 { 11360 name: "end transaction (commit) after push transaction (timestamp)", 11361 setup: func(txn *roachpb.Transaction, now hlc.Timestamp) error { 11362 pt := pushTxnArgs(pusher, txn, roachpb.PUSH_TIMESTAMP) 11363 pt.PushTo = now 11364 return sendWrappedWithErr(roachpb.Header{}, &pt) 11365 }, 11366 run: func(txn *roachpb.Transaction, _ hlc.Timestamp) error { 11367 et, etH := endTxnArgs(txn, true /* commit */) 11368 return sendWrappedWithErr(etH, &et) 11369 }, 11370 expError: "TransactionRetryError: retry txn (RETRY_SERIALIZABLE)", 11371 // The end transaction (commit) does not write a transaction record 11372 // if it hits a serializable retry error. 11373 expTxn: noTxnRecord, 11374 }, 11375 { 11376 name: "1PC end transaction after push transaction (timestamp)", 11377 setup: func(txn *roachpb.Transaction, now hlc.Timestamp) error { 11378 pt := pushTxnArgs(pusher, txn, roachpb.PUSH_TIMESTAMP) 11379 pt.PushTo = now 11380 return sendWrappedWithErr(roachpb.Header{}, &pt) 11381 }, 11382 run: func(txn *roachpb.Transaction, _ hlc.Timestamp) error { 11383 et := roachpb.EndTxnRequest{ 11384 RequestHeader: roachpb.RequestHeader{ 11385 Key: txn.Key, 11386 Sequence: 1, // This will qualify for 1PC. 11387 }, 11388 Commit: true, 11389 } 11390 etH := roachpb.Header{Txn: txn} 11391 return sendWrappedWithErr(etH, &et) 11392 }, 11393 expError: "TransactionRetryError: retry txn (RETRY_SERIALIZABLE)", 11394 // The end transaction (commit) does not write a transaction record 11395 // if it hits a serializable retry error. 11396 expTxn: noTxnRecord, 11397 }, 11398 { 11399 name: "1PC end transaction after push transaction (abort)", 11400 setup: func(txn *roachpb.Transaction, now hlc.Timestamp) error { 11401 pt := pushTxnArgs(pusher, txn, roachpb.PUSH_ABORT) 11402 pt.PushTo = now 11403 return sendWrappedWithErr(roachpb.Header{}, &pt) 11404 }, 11405 run: func(txn *roachpb.Transaction, _ hlc.Timestamp) error { 11406 et := roachpb.EndTxnRequest{ 11407 RequestHeader: roachpb.RequestHeader{ 11408 Key: txn.Key, 11409 Sequence: 1, // This will qualify for 1PC. 11410 }, 11411 Commit: true, 11412 } 11413 etH := roachpb.Header{Txn: txn} 11414 return sendWrappedWithErr(etH, &et) 11415 }, 11416 expError: "TransactionAbortedError(ABORT_REASON_ABORTED_RECORD_FOUND)", 11417 expTxn: noTxnRecord, 11418 }, 11419 { 11420 name: "heartbeat transaction after push transaction (abort)", 11421 setup: func(txn *roachpb.Transaction, _ hlc.Timestamp) error { 11422 pt := pushTxnArgs(pusher, txn, roachpb.PUSH_ABORT) 11423 return sendWrappedWithErr(roachpb.Header{}, &pt) 11424 }, 11425 run: func(txn *roachpb.Transaction, now hlc.Timestamp) error { 11426 hb, hbH := heartbeatArgs(txn, now) 11427 return sendWrappedWithErr(hbH, &hb) 11428 }, 11429 expError: "TransactionAbortedError(ABORT_REASON_ABORTED_RECORD_FOUND)", 11430 expTxn: noTxnRecord, 11431 }, 11432 { 11433 name: "heartbeat transaction with epoch bump after push transaction (abort)", 11434 setup: func(txn *roachpb.Transaction, _ hlc.Timestamp) error { 11435 pt := pushTxnArgs(pusher, txn, roachpb.PUSH_ABORT) 11436 return sendWrappedWithErr(roachpb.Header{}, &pt) 11437 }, 11438 run: func(txn *roachpb.Transaction, now hlc.Timestamp) error { 11439 // Restart the transaction at a higher timestamp. This will 11440 // increment its ReadTimestamp as well. We used to check the GC 11441 // threshold against this timestamp instead of its minimum 11442 // timestamp. 11443 clone := txn.Clone() 11444 clone.Restart(-1, 0, now) 11445 hb, hbH := heartbeatArgs(clone, now) 11446 return sendWrappedWithErr(hbH, &hb) 11447 }, 11448 expError: "TransactionAbortedError(ABORT_REASON_ABORTED_RECORD_FOUND)", 11449 expTxn: noTxnRecord, 11450 }, 11451 { 11452 name: "end transaction (stage) after push transaction (abort)", 11453 setup: func(txn *roachpb.Transaction, _ hlc.Timestamp) error { 11454 pt := pushTxnArgs(pusher, txn, roachpb.PUSH_ABORT) 11455 return sendWrappedWithErr(roachpb.Header{}, &pt) 11456 }, 11457 run: func(txn *roachpb.Transaction, _ hlc.Timestamp) error { 11458 et, etH := endTxnArgs(txn, true /* commit */) 11459 et.InFlightWrites = inFlightWrites 11460 return sendWrappedWithErr(etH, &et) 11461 }, 11462 expError: "TransactionAbortedError(ABORT_REASON_ABORTED_RECORD_FOUND)", 11463 expTxn: noTxnRecord, 11464 }, 11465 { 11466 name: "end transaction (abort) after push transaction (abort)", 11467 setup: func(txn *roachpb.Transaction, _ hlc.Timestamp) error { 11468 pt := pushTxnArgs(pusher, txn, roachpb.PUSH_ABORT) 11469 return sendWrappedWithErr(roachpb.Header{}, &pt) 11470 }, 11471 run: func(txn *roachpb.Transaction, _ hlc.Timestamp) error { 11472 et, etH := endTxnArgs(txn, false /* commit */) 11473 return sendWrappedWithErr(etH, &et) 11474 }, 11475 // The end transaction (abort) request succeeds and cleans up the 11476 // transaction record. 11477 expTxn: noTxnRecord, 11478 }, 11479 { 11480 name: "end transaction (commit) after push transaction (abort)", 11481 setup: func(txn *roachpb.Transaction, _ hlc.Timestamp) error { 11482 pt := pushTxnArgs(pusher, txn, roachpb.PUSH_ABORT) 11483 return sendWrappedWithErr(roachpb.Header{}, &pt) 11484 }, 11485 run: func(txn *roachpb.Transaction, _ hlc.Timestamp) error { 11486 et, etH := endTxnArgs(txn, true /* commit */) 11487 return sendWrappedWithErr(etH, &et) 11488 }, 11489 expError: "TransactionAbortedError(ABORT_REASON_ABORTED_RECORD_FOUND)", 11490 expTxn: noTxnRecord, 11491 }, 11492 { 11493 // Should not be possible. 11494 name: "recover transaction (implicitly committed) after heartbeat transaction", 11495 setup: func(txn *roachpb.Transaction, _ hlc.Timestamp) error { 11496 hb, hbH := heartbeatArgs(txn, txn.MinTimestamp) 11497 return sendWrappedWithErr(hbH, &hb) 11498 }, 11499 run: func(txn *roachpb.Transaction, _ hlc.Timestamp) error { 11500 rt := recoverTxnArgs(txn, true /* implicitlyCommitted */) 11501 return sendWrappedWithErr(roachpb.Header{}, &rt) 11502 }, 11503 expError: "found PENDING record for implicitly committed transaction", 11504 expTxn: txnWithoutChanges, 11505 }, 11506 { 11507 // Typical case of transaction recovery from a STAGING status after 11508 // a successful implicit commit. 11509 name: "recover transaction (implicitly committed) after end transaction (stage)", 11510 setup: func(txn *roachpb.Transaction, _ hlc.Timestamp) error { 11511 et, etH := endTxnArgs(txn, true /* commit */) 11512 et.InFlightWrites = inFlightWrites 11513 return sendWrappedWithErr(etH, &et) 11514 }, 11515 run: func(txn *roachpb.Transaction, _ hlc.Timestamp) error { 11516 rt := recoverTxnArgs(txn, true /* implicitlyCommitted */) 11517 return sendWrappedWithErr(roachpb.Header{}, &rt) 11518 }, 11519 expTxn: func(txn *roachpb.Transaction, now hlc.Timestamp) roachpb.TransactionRecord { 11520 record := txnWithStatus(roachpb.COMMITTED)(txn, now) 11521 // RecoverTxn does not synchronously resolve local intents. 11522 record.LockSpans = intents 11523 return record 11524 }, 11525 }, 11526 { 11527 // Should not be possible. 11528 name: "recover transaction (implicitly committed) after end transaction (stage) with timestamp increase", 11529 setup: func(txn *roachpb.Transaction, now hlc.Timestamp) error { 11530 clone := txn.Clone() 11531 clone.ReadTimestamp.Forward(now) 11532 clone.WriteTimestamp.Forward(now) 11533 et, etH := endTxnArgs(clone, true /* commit */) 11534 et.InFlightWrites = inFlightWrites 11535 return sendWrappedWithErr(etH, &et) 11536 }, 11537 run: func(txn *roachpb.Transaction, _ hlc.Timestamp) error { 11538 rt := recoverTxnArgs(txn, true /* implicitlyCommitted */) 11539 return sendWrappedWithErr(roachpb.Header{}, &rt) 11540 }, 11541 expError: "timestamp change by implicitly committed transaction", 11542 expTxn: func(txn *roachpb.Transaction, now hlc.Timestamp) roachpb.TransactionRecord { 11543 record := txnWithStagingStatusAndInFlightWrites(txn, now) 11544 record.WriteTimestamp.Forward(now) 11545 return record 11546 }, 11547 }, 11548 { 11549 // Should not be possible. 11550 name: "recover transaction (implicitly committed) after end transaction (stage) with epoch bump", 11551 setup: func(txn *roachpb.Transaction, now hlc.Timestamp) error { 11552 clone := txn.Clone() 11553 clone.Restart(-1, 0, now) 11554 et, etH := endTxnArgs(clone, true /* commit */) 11555 et.InFlightWrites = inFlightWrites 11556 return sendWrappedWithErr(etH, &et) 11557 }, 11558 run: func(txn *roachpb.Transaction, _ hlc.Timestamp) error { 11559 rt := recoverTxnArgs(txn, true /* implicitlyCommitted */) 11560 return sendWrappedWithErr(roachpb.Header{}, &rt) 11561 }, 11562 expError: "epoch change by implicitly committed transaction", 11563 expTxn: func(txn *roachpb.Transaction, now hlc.Timestamp) roachpb.TransactionRecord { 11564 record := txnWithStagingStatusAndInFlightWrites(txn, now) 11565 record.Epoch = txn.Epoch + 1 11566 record.WriteTimestamp.Forward(now) 11567 return record 11568 }, 11569 }, 11570 { 11571 // Should not be possible. 11572 name: "recover transaction (implicitly committed) after end transaction (abort)", 11573 setup: func(txn *roachpb.Transaction, _ hlc.Timestamp) error { 11574 et, etH := endTxnArgs(txn, false /* commit */) 11575 return sendWrappedWithErr(etH, &et) 11576 }, 11577 run: func(txn *roachpb.Transaction, _ hlc.Timestamp) error { 11578 rt := recoverTxnArgs(txn, true /* implicitlyCommitted */) 11579 return sendWrappedWithErr(roachpb.Header{}, &rt) 11580 }, 11581 // The transaction record was cleaned up, so RecoverTxn can't perform 11582 // the same assertion that it does in the case without eager gc. 11583 expTxn: noTxnRecord, 11584 }, 11585 { 11586 // A concurrent recovery process completed or the transaction 11587 // coordinator made its commit explicit. 11588 name: "recover transaction (implicitly committed) after end transaction (commit)", 11589 setup: func(txn *roachpb.Transaction, _ hlc.Timestamp) error { 11590 et, etH := endTxnArgs(txn, true /* commit */) 11591 return sendWrappedWithErr(etH, &et) 11592 }, 11593 run: func(txn *roachpb.Transaction, _ hlc.Timestamp) error { 11594 rt := recoverTxnArgs(txn, true /* implicitlyCommitted */) 11595 return sendWrappedWithErr(roachpb.Header{}, &rt) 11596 }, 11597 expTxn: noTxnRecord, 11598 }, 11599 { 11600 // Should not be possible. 11601 name: "recover transaction (implicitly committed) after end transaction (abort) without eager gc", 11602 setup: func(txn *roachpb.Transaction, _ hlc.Timestamp) error { 11603 et, etH := endTxnArgs(txn, false /* commit */) 11604 return sendWrappedWithErr(etH, &et) 11605 }, 11606 run: func(txn *roachpb.Transaction, _ hlc.Timestamp) error { 11607 rt := recoverTxnArgs(txn, true /* implicitlyCommitted */) 11608 return sendWrappedWithErr(roachpb.Header{}, &rt) 11609 }, 11610 expError: "found ABORTED record for implicitly committed transaction", 11611 expTxn: txnWithStatus(roachpb.ABORTED), 11612 disableTxnAutoGC: true, 11613 }, 11614 { 11615 // A concurrent recovery process completed or the transaction 11616 // coordinator made its commit explicit. 11617 name: "recover transaction (implicitly committed) after end transaction (commit) without eager gc", 11618 setup: func(txn *roachpb.Transaction, _ hlc.Timestamp) error { 11619 et, etH := endTxnArgs(txn, true /* commit */) 11620 return sendWrappedWithErr(etH, &et) 11621 }, 11622 run: func(txn *roachpb.Transaction, _ hlc.Timestamp) error { 11623 rt := recoverTxnArgs(txn, true /* implicitlyCommitted */) 11624 return sendWrappedWithErr(roachpb.Header{}, &rt) 11625 }, 11626 expTxn: txnWithStatus(roachpb.COMMITTED), 11627 disableTxnAutoGC: true, 11628 }, 11629 { 11630 // Should not be possible. 11631 name: "recover transaction (not implicitly committed) after heartbeat transaction", 11632 setup: func(txn *roachpb.Transaction, _ hlc.Timestamp) error { 11633 hb, hbH := heartbeatArgs(txn, txn.MinTimestamp) 11634 return sendWrappedWithErr(hbH, &hb) 11635 }, 11636 run: func(txn *roachpb.Transaction, _ hlc.Timestamp) error { 11637 rt := recoverTxnArgs(txn, false /* implicitlyCommitted */) 11638 return sendWrappedWithErr(roachpb.Header{}, &rt) 11639 }, 11640 expError: "cannot recover PENDING transaction", 11641 expTxn: txnWithoutChanges, 11642 }, 11643 { 11644 // Transaction coordinator restarted after failing to perform a 11645 // implicit commit. Common case. 11646 name: "recover transaction (not implicitly committed) after heartbeat transaction with epoch bump", 11647 setup: func(txn *roachpb.Transaction, now hlc.Timestamp) error { 11648 clone := txn.Clone() 11649 clone.Restart(-1, 0, now) 11650 hb, hbH := heartbeatArgs(clone, clone.MinTimestamp) 11651 return sendWrappedWithErr(hbH, &hb) 11652 }, 11653 run: func(txn *roachpb.Transaction, _ hlc.Timestamp) error { 11654 rt := recoverTxnArgs(txn, false /* implicitlyCommitted */) 11655 return sendWrappedWithErr(roachpb.Header{}, &rt) 11656 }, 11657 expTxn: func(txn *roachpb.Transaction, now hlc.Timestamp) roachpb.TransactionRecord { 11658 record := txn.AsRecord() 11659 record.Epoch = txn.Epoch + 1 11660 record.WriteTimestamp.Forward(now) 11661 return record 11662 }, 11663 }, 11664 { 11665 // Typical case of transaction recovery from a STAGING status after 11666 // an unsuccessful implicit commit. 11667 name: "recover transaction (not implicitly committed) after end transaction (stage)", 11668 setup: func(txn *roachpb.Transaction, _ hlc.Timestamp) error { 11669 et, etH := endTxnArgs(txn, true /* commit */) 11670 et.InFlightWrites = inFlightWrites 11671 return sendWrappedWithErr(etH, &et) 11672 }, 11673 run: func(txn *roachpb.Transaction, _ hlc.Timestamp) error { 11674 rt := recoverTxnArgs(txn, false /* implicitlyCommitted */) 11675 return sendWrappedWithErr(roachpb.Header{}, &rt) 11676 }, 11677 expTxn: func(txn *roachpb.Transaction, now hlc.Timestamp) roachpb.TransactionRecord { 11678 record := txnWithStatus(roachpb.ABORTED)(txn, now) 11679 // RecoverTxn does not synchronously resolve local intents. 11680 record.LockSpans = intents 11681 return record 11682 }, 11683 }, 11684 { 11685 // Typical case of transaction recovery from a STAGING status after 11686 // an unsuccessful implicit commit. Transaction coordinator bumped 11687 // timestamp in same epoch to attempt implicit commit again. The 11688 // RecoverTxn request should not modify the transaction record. 11689 name: "recover transaction (not implicitly committed) after end transaction (stage) with timestamp increase", 11690 setup: func(txn *roachpb.Transaction, now hlc.Timestamp) error { 11691 clone := txn.Clone() 11692 clone.ReadTimestamp.Forward(now) 11693 clone.WriteTimestamp.Forward(now) 11694 et, etH := endTxnArgs(clone, true /* commit */) 11695 et.InFlightWrites = inFlightWrites 11696 return sendWrappedWithErr(etH, &et) 11697 }, 11698 run: func(txn *roachpb.Transaction, _ hlc.Timestamp) error { 11699 rt := recoverTxnArgs(txn, false /* implicitlyCommitted */) 11700 return sendWrappedWithErr(roachpb.Header{}, &rt) 11701 }, 11702 expTxn: func(txn *roachpb.Transaction, now hlc.Timestamp) roachpb.TransactionRecord { 11703 // Unchanged by the RecoverTxn request. 11704 record := txnWithStagingStatusAndInFlightWrites(txn, now) 11705 record.WriteTimestamp.Forward(now) 11706 return record 11707 }, 11708 }, 11709 { 11710 // Typical case of transaction recovery from a STAGING status after 11711 // an unsuccessful implicit commit. Transaction coordinator bumped 11712 // epoch after a restart and is attempting implicit commit again. 11713 // The RecoverTxn request should not modify the transaction record. 11714 name: "recover transaction (not implicitly committed) after end transaction (stage) with epoch bump", 11715 setup: func(txn *roachpb.Transaction, now hlc.Timestamp) error { 11716 clone := txn.Clone() 11717 clone.Restart(-1, 0, now) 11718 et, etH := endTxnArgs(clone, true /* commit */) 11719 et.InFlightWrites = inFlightWrites 11720 return sendWrappedWithErr(etH, &et) 11721 }, 11722 run: func(txn *roachpb.Transaction, _ hlc.Timestamp) error { 11723 rt := recoverTxnArgs(txn, false /* implicitlyCommitted */) 11724 return sendWrappedWithErr(roachpb.Header{}, &rt) 11725 }, 11726 expTxn: func(txn *roachpb.Transaction, now hlc.Timestamp) roachpb.TransactionRecord { 11727 record := txnWithStagingStatusAndInFlightWrites(txn, now) 11728 record.Epoch = txn.Epoch + 1 11729 record.WriteTimestamp.Forward(now) 11730 return record 11731 }, 11732 }, 11733 { 11734 // A concurrent recovery process completed or the transaction 11735 // coordinator rolled back its transaction record after an 11736 // unsuccessful implicit commit. 11737 name: "recover transaction (not implicitly committed) after end transaction (abort)", 11738 setup: func(txn *roachpb.Transaction, _ hlc.Timestamp) error { 11739 et, etH := endTxnArgs(txn, false /* commit */) 11740 return sendWrappedWithErr(etH, &et) 11741 }, 11742 run: func(txn *roachpb.Transaction, _ hlc.Timestamp) error { 11743 rt := recoverTxnArgs(txn, false /* implicitlyCommitted */) 11744 return sendWrappedWithErr(roachpb.Header{}, &rt) 11745 }, 11746 expTxn: noTxnRecord, 11747 }, 11748 { 11749 // Should not be possible. 11750 name: "recover transaction (not implicitly committed) after end transaction (commit)", 11751 setup: func(txn *roachpb.Transaction, _ hlc.Timestamp) error { 11752 et, etH := endTxnArgs(txn, true /* commit */) 11753 return sendWrappedWithErr(etH, &et) 11754 }, 11755 run: func(txn *roachpb.Transaction, _ hlc.Timestamp) error { 11756 rt := recoverTxnArgs(txn, false /* implicitlyCommitted */) 11757 return sendWrappedWithErr(roachpb.Header{}, &rt) 11758 }, 11759 // The transaction record was cleaned up, so RecoverTxn can't perform 11760 // the same assertion that it does in the case without eager gc. 11761 expTxn: noTxnRecord, 11762 }, 11763 { 11764 // A concurrent recovery process completed or the transaction 11765 // coordinator rolled back its transaction record after an 11766 // unsuccessful implicit commit. 11767 name: "recover transaction (not implicitly committed) after end transaction (abort) without eager gc", 11768 setup: func(txn *roachpb.Transaction, _ hlc.Timestamp) error { 11769 et, etH := endTxnArgs(txn, false /* commit */) 11770 return sendWrappedWithErr(etH, &et) 11771 }, 11772 run: func(txn *roachpb.Transaction, _ hlc.Timestamp) error { 11773 rt := recoverTxnArgs(txn, false /* implicitlyCommitted */) 11774 return sendWrappedWithErr(roachpb.Header{}, &rt) 11775 }, 11776 expTxn: txnWithStatus(roachpb.ABORTED), 11777 disableTxnAutoGC: true, 11778 }, 11779 { 11780 // A transaction committed while a recovery process was running 11781 // concurrently. The recovery process attempted to prevent an intent 11782 // write after the intent write already succeeded (allowing the 11783 // transaction to commit) and was resolved. The recovery process 11784 // thinks that it prevented an intent write because the intent has 11785 // already been resolved, but later find that the transaction record 11786 // is committed, so it does nothing. 11787 name: "recover transaction (not implicitly committed) after end transaction (commit) without eager gc", 11788 setup: func(txn *roachpb.Transaction, _ hlc.Timestamp) error { 11789 et, etH := endTxnArgs(txn, true /* commit */) 11790 return sendWrappedWithErr(etH, &et) 11791 }, 11792 run: func(txn *roachpb.Transaction, _ hlc.Timestamp) error { 11793 rt := recoverTxnArgs(txn, false /* implicitlyCommitted */) 11794 return sendWrappedWithErr(roachpb.Header{}, &rt) 11795 }, 11796 expTxn: txnWithStatus(roachpb.COMMITTED), 11797 disableTxnAutoGC: true, 11798 }, 11799 } 11800 for _, c := range testCases { 11801 t.Run(c.name, func(t *testing.T) { 11802 defer setTxnAutoGC(!c.disableTxnAutoGC)() 11803 11804 txn := newTransaction(c.name, roachpb.Key(c.name), 1, tc.Clock()) 11805 manual.Increment(99) 11806 runTs := tc.Clock().Now() 11807 11808 if c.setup != nil { 11809 if err := c.setup(txn, runTs); err != nil { 11810 t.Fatalf("failed during test setup: %+v", err) 11811 } 11812 } 11813 11814 if err := c.run(txn, runTs); err != nil { 11815 if len(c.expError) == 0 { 11816 t.Fatalf("expected no failure, found %q", err.Error()) 11817 } 11818 if !testutils.IsError(err, regexp.QuoteMeta(c.expError)) { 11819 t.Fatalf("expected failure %q, found %q", c.expError, err.Error()) 11820 } 11821 } else { 11822 if len(c.expError) > 0 { 11823 t.Fatalf("expected failure %q", c.expError) 11824 } 11825 } 11826 11827 var foundRecord roachpb.TransactionRecord 11828 if found, err := storage.MVCCGetProto( 11829 ctx, tc.repl.store.Engine(), keys.TransactionKey(txn.Key, txn.ID), 11830 hlc.Timestamp{}, &foundRecord, storage.MVCCGetOptions{}, 11831 ); err != nil { 11832 t.Fatal(err) 11833 } else if found { 11834 if c.expTxn == nil { 11835 t.Fatalf("expected no txn record, found %v", found) 11836 } 11837 expRecord := c.expTxn(txn, runTs) 11838 if !reflect.DeepEqual(expRecord, foundRecord) { 11839 t.Fatalf("txn record does not match expectations:\n%s", 11840 strings.Join(pretty.Diff(foundRecord, expRecord), "\n")) 11841 } 11842 } else { 11843 if c.expTxn != nil { 11844 t.Fatalf("expected txn record, found no txn record") 11845 } 11846 } 11847 }) 11848 } 11849 } 11850 11851 // Test that an EndTxn(commit=false) request that doesn't find its transaction 11852 // record doesn't return an error. 11853 // This is relied upon by the client which liberally sends rollbacks even when 11854 // it's unclear whether the txn record has been written. 11855 func TestRollbackMissingTxnRecordNoError(t *testing.T) { 11856 defer leaktest.AfterTest(t)() 11857 11858 tc := testContext{} 11859 ctx := context.Background() 11860 stopper := stop.NewStopper() 11861 defer stopper.Stop(ctx) 11862 tc.Start(t, stopper) 11863 11864 key := roachpb.Key("bogus key") 11865 txn := newTransaction("test", key, roachpb.NormalUserPriority, tc.Clock()) 11866 11867 res, pErr := kv.SendWrappedWith(ctx, tc.Sender(), roachpb.Header{ 11868 RangeID: tc.repl.RangeID, 11869 Txn: txn, 11870 }, &roachpb.EndTxnRequest{ 11871 RequestHeader: roachpb.RequestHeader{ 11872 Key: key, 11873 }, 11874 Commit: false, 11875 }) 11876 if pErr != nil { 11877 t.Fatal(pErr) 11878 } 11879 if res.Header().Txn == nil { 11880 t.Fatal("expected Txn to be filled on the response") 11881 } 11882 11883 // For good measure, let's take the opportunity to check replay protection for 11884 // a HeartbeatTxn arriving after the rollback. 11885 _, pErr = kv.SendWrappedWith(ctx, tc.Sender(), roachpb.Header{ 11886 RangeID: tc.repl.RangeID, 11887 Txn: txn, 11888 }, &roachpb.HeartbeatTxnRequest{ 11889 RequestHeader: roachpb.RequestHeader{ 11890 Key: key, 11891 }, 11892 Now: tc.Clock().Now(), 11893 }) 11894 // Note that, as explained in the abort reason comments, the server generates 11895 // a retryable TransactionAbortedError, but if there's actually a sort of 11896 // replay at work and a client is still waiting for the error, the error would 11897 // be transformed into something more ambiguous on the way. 11898 expErr := "TransactionAbortedError(ABORT_REASON_ALREADY_COMMITTED_OR_ROLLED_BACK_POSSIBLE_REPLAY)" 11899 if !testutils.IsPError(pErr, regexp.QuoteMeta(expErr)) { 11900 t.Errorf("expected %s; got %v", expErr, pErr) 11901 } 11902 } 11903 11904 func TestSplitSnapshotWarningStr(t *testing.T) { 11905 defer leaktest.AfterTest(t)() 11906 11907 status := upToDateRaftStatus(replicas(1, 3, 5)) 11908 assert.Equal(t, "", splitSnapshotWarningStr(12, status)) 11909 11910 pr := status.Progress[2] 11911 pr.State = tracker.StateProbe 11912 status.Progress[2] = pr 11913 11914 assert.Equal( 11915 t, 11916 "; r12/2 is being probed (may or may not need a Raft snapshot)", 11917 splitSnapshotWarningStr(12, status), 11918 ) 11919 11920 pr.State = tracker.StateSnapshot 11921 11922 assert.Equal( 11923 t, 11924 "; r12/2 is being probed (may or may not need a Raft snapshot)", 11925 splitSnapshotWarningStr(12, status), 11926 ) 11927 } 11928 11929 // TestProposalNotAcknowledgedOrReproposedAfterApplication exercises a case 11930 // where a command is reproposed twice at different MaxLeaseIndex values to 11931 // ultimately fail with an error which cannot be reproposed (say due to a lease 11932 // transfer or change to the gc threshold). This test works to exercise the 11933 // invariant that when a proposal has been reproposed at different MaxLeaseIndex 11934 // values are not additionally reproposed or acknowledged after applying 11935 // locally. The test verfies this condition by asserting that the 11936 // span used to trace the execution of the proposal is not used after the 11937 // proposal has been finished as it would be if the proposal were reproposed 11938 // after applying locally. 11939 // 11940 // The test does the following things: 11941 // 11942 // * Propose cmd at an initial MaxLeaseIndex. 11943 // * Refresh that cmd immediately. 11944 // * Fail the initial command with an injected error which will lead to a 11945 // reproposal at a higher MaxLeaseIndex. 11946 // * Simultaneously update the lease sequence number on the replica so all 11947 // future commands will fail with NotLeaseHolderError. 11948 // * Enable unconditional refreshes of commands after a raft ready so that 11949 // higher MaxLeaseIndex commands are refreshed. 11950 // 11951 // This order of events ensures that there will be a committed command which 11952 // experiences the lease mismatch error but does not carry the highest 11953 // MaxLeaseIndex for the proposal. The test attempts to verify that once a 11954 // proposal has been acknowledged it will not be reproposed or acknowledged 11955 // again by asserting that the proposal's context is not reused after it is 11956 // finished by the waiting client. 11957 func TestProposalNotAcknowledgedOrReproposedAfterApplication(t *testing.T) { 11958 defer leaktest.AfterTest(t)() 11959 11960 // Set the trace infrastructure to log if a span is used after being finished. 11961 defer enableTraceDebugUseAfterFree()() 11962 11963 // Set logging up to a test specific directory. 11964 scope := log.Scope(t) 11965 defer scope.Close(t) 11966 11967 tc := testContext{} 11968 ctx := context.Background() 11969 11970 stopper := stop.NewStopper() 11971 defer stopper.Stop(ctx) 11972 tc.manualClock = hlc.NewManualClock(123) 11973 cfg := TestStoreConfig(hlc.NewClock(tc.manualClock.UnixNano, time.Nanosecond)) 11974 // Set the RaftMaxCommittedSizePerReady so that only a single raft entry is 11975 // applied at a time, which makes it easier to line up the timing of reproposals. 11976 cfg.RaftMaxCommittedSizePerReady = 1 11977 // Set up tracing. 11978 tracer := tracing.NewTracer() 11979 tracer.Configure(&cfg.Settings.SV) 11980 cfg.AmbientCtx.Tracer = tracer 11981 11982 // Below we set txnID to the value of the transaction we're going to force to 11983 // be proposed multiple times. 11984 var txnID uuid.UUID 11985 // In the TestingProposalFilter we populater cmdID with the id of the proposal 11986 // which corresponds to txnID. 11987 var cmdID kvserverbase.CmdIDKey 11988 // seen is used to detect the first application of our proposal. 11989 var seen bool 11990 cfg.TestingKnobs = StoreTestingKnobs{ 11991 // Constant reproposals are the worst case which this test is trying to 11992 // examine. 11993 EnableUnconditionalRefreshesInRaftReady: true, 11994 // Set the TestingProposalFilter in order to know the CmdIDKey for our 11995 // request by detecting its txnID. 11996 TestingProposalFilter: func(args kvserverbase.ProposalFilterArgs) *roachpb.Error { 11997 if args.Req.Header.Txn != nil && args.Req.Header.Txn.ID == txnID { 11998 cmdID = args.CmdID 11999 } 12000 return nil 12001 }, 12002 // Detect the application of the proposal to repropose it and also 12003 // invalidate the lease. 12004 TestingApplyFilter: func(args kvserverbase.ApplyFilterArgs) (retry int, pErr *roachpb.Error) { 12005 if seen || args.CmdID != cmdID { 12006 return 0, nil 12007 } 12008 seen = true 12009 tc.repl.mu.Lock() 12010 defer tc.repl.mu.Unlock() 12011 12012 // Increase the lease sequence so that future reproposals will fail with 12013 // NotLeaseHolderError. This mimics the outcome of a leaseholder change 12014 // slipping in between the application of the first proposal and the 12015 // reproposals. 12016 tc.repl.mu.state.Lease.Sequence++ 12017 // This return value will force another retry which will carry a yet 12018 // higher MaxLeaseIndex. The first reproposal will fail and return to the 12019 // client but the second (which hasn't been applied due to the 12020 // MaxCommittedSizePerReady setting) will be reproposed again. This test 12021 // ensure that it does not reuse the original proposal's context for that 12022 // reproposal by ensuring that no event is recorded after the original 12023 // proposal has been finished. 12024 return int(proposalIllegalLeaseIndex), 12025 roachpb.NewErrorf("forced error that can be reproposed at a higher index") 12026 }, 12027 } 12028 tc.StartWithStoreConfig(t, stopper, cfg) 12029 key := roachpb.Key("a") 12030 lease, _ := tc.repl.GetLease() 12031 txn := newTransaction("test", key, roachpb.NormalUserPriority, tc.Clock()) 12032 txnID = txn.ID 12033 ba := roachpb.BatchRequest{ 12034 Header: roachpb.Header{ 12035 RangeID: tc.repl.RangeID, 12036 Txn: txn, 12037 }, 12038 } 12039 ba.Timestamp = txn.ReadTimestamp 12040 ba.Add(&roachpb.PutRequest{ 12041 RequestHeader: roachpb.RequestHeader{ 12042 Key: key, 12043 }, 12044 Value: roachpb.MakeValueFromBytes([]byte("val")), 12045 }) 12046 12047 // Hold the RaftLock to ensure that after evalAndPropose our proposal is in 12048 // the proposal map. Entries are only removed from that map underneath raft. 12049 tc.repl.RaftLock() 12050 tracedCtx, cleanup := tracing.EnsureContext(ctx, cfg.AmbientCtx.Tracer, "replica send") 12051 ch, _, _, pErr := tc.repl.evalAndPropose(tracedCtx, &ba, allSpansGuard(), &lease) 12052 if pErr != nil { 12053 t.Fatal(pErr) 12054 } 12055 errCh := make(chan *roachpb.Error) 12056 go func() { 12057 res := <-ch 12058 cleanup() 12059 errCh <- res.Err 12060 }() 12061 12062 // While still holding the raftMu, repropose the initial proposal so we know 12063 // that there will be two instances 12064 func() { 12065 tc.repl.mu.Lock() 12066 defer tc.repl.mu.Unlock() 12067 if err := tc.repl.mu.proposalBuf.flushLocked(); err != nil { 12068 t.Fatal(err) 12069 } 12070 tc.repl.refreshProposalsLocked(ctx, 0 /* refreshAtDelta */, reasonNewLeaderOrConfigChange) 12071 }() 12072 tc.repl.RaftUnlock() 12073 12074 if pErr = <-errCh; !testutils.IsPError(pErr, "NotLeaseHolder") { 12075 t.Fatal(pErr) 12076 } 12077 12078 // Round trip another proposal through the replica to ensure that previously 12079 // committed entries have been applied. 12080 _, pErr = tc.repl.sendWithRangeID(ctx, tc.repl.RangeID, &ba) 12081 if pErr != nil { 12082 t.Fatal(pErr) 12083 } 12084 log.Flush() 12085 12086 stopper.Quiesce(ctx) 12087 entries, err := log.FetchEntriesFromFiles(0, math.MaxInt64, 1, 12088 regexp.MustCompile("net/trace")) 12089 if err != nil { 12090 t.Fatal(err) 12091 } 12092 if len(entries) > 0 { 12093 t.Fatalf("reused span after free: %v", entries) 12094 } 12095 } 12096 12097 // TestLaterReproposalsDoNotReuseContext ensures that when commands are 12098 // reproposed more than once at the same MaxLeaseIndex and the first command 12099 // applies that the later reproposals do not log into the proposal's context 12100 // as its underlying trace span may already be finished. 12101 func TestLaterReproposalsDoNotReuseContext(t *testing.T) { 12102 defer leaktest.AfterTest(t)() 12103 12104 // Set the trace infrastructure to log if a span is used after being finished. 12105 defer enableTraceDebugUseAfterFree()() 12106 12107 tc := testContext{} 12108 ctx := context.Background() 12109 12110 // Set logging up to a test specific directory. 12111 scope := log.Scope(t) 12112 defer scope.Close(t) 12113 12114 stopper := stop.NewStopper() 12115 defer stopper.Stop(ctx) 12116 cfg := TestStoreConfig(hlc.NewClock(hlc.UnixNano, time.Nanosecond)) 12117 // Set up tracing. 12118 tracer := tracing.NewTracer() 12119 tracer.Configure(&cfg.Settings.SV) 12120 tracer.AlwaysTrace() 12121 cfg.AmbientCtx.Tracer = tracer 12122 tc.StartWithStoreConfig(t, stopper, cfg) 12123 key := roachpb.Key("a") 12124 lease, _ := tc.repl.GetLease() 12125 txn := newTransaction("test", key, roachpb.NormalUserPriority, tc.Clock()) 12126 ba := roachpb.BatchRequest{ 12127 Header: roachpb.Header{ 12128 RangeID: tc.repl.RangeID, 12129 Txn: txn, 12130 }, 12131 } 12132 ba.Timestamp = txn.ReadTimestamp 12133 ba.Add(&roachpb.PutRequest{ 12134 RequestHeader: roachpb.RequestHeader{ 12135 Key: key, 12136 }, 12137 Value: roachpb.MakeValueFromBytes([]byte("val")), 12138 }) 12139 12140 // Hold the RaftLock to encourage the reproposals to occur in the same batch. 12141 tc.repl.RaftLock() 12142 sp := tracer.StartRootSpan("replica send", logtags.FromContext(ctx), tracing.RecordableSpan) 12143 tracedCtx := opentracing.ContextWithSpan(ctx, sp) 12144 // Go out of our way to enable recording so that expensive logging is enabled 12145 // for this context. 12146 tracing.StartRecording(sp, tracing.SingleNodeRecording) 12147 ch, _, _, pErr := tc.repl.evalAndPropose(tracedCtx, &ba, allSpansGuard(), &lease) 12148 if pErr != nil { 12149 t.Fatal(pErr) 12150 } 12151 // Launch a goroutine to finish the span as soon as a result has been sent. 12152 errCh := make(chan *roachpb.Error) 12153 go func() { 12154 res := <-ch 12155 sp.Finish() 12156 errCh <- res.Err 12157 }() 12158 12159 // Flush the proposal and then repropose it twice. 12160 // This test verifies that these later reproposals don't record into the 12161 // tracedCtx after its span has been finished. 12162 func() { 12163 tc.repl.mu.Lock() 12164 defer tc.repl.mu.Unlock() 12165 if err := tc.repl.mu.proposalBuf.flushLocked(); err != nil { 12166 t.Fatal(err) 12167 } 12168 tc.repl.refreshProposalsLocked(ctx, 0 /* refreshAtDelta */, reasonNewLeaderOrConfigChange) 12169 if err := tc.repl.mu.proposalBuf.flushLocked(); err != nil { 12170 t.Fatal(err) 12171 } 12172 tc.repl.refreshProposalsLocked(ctx, 0 /* refreshAtDelta */, reasonNewLeaderOrConfigChange) 12173 }() 12174 tc.repl.RaftUnlock() 12175 12176 if pErr = <-errCh; pErr != nil { 12177 t.Fatal(pErr) 12178 } 12179 // Round trip another proposal through the replica to ensure that previously 12180 // committed entries have been applied. 12181 _, pErr = tc.repl.sendWithRangeID(ctx, tc.repl.RangeID, &ba) 12182 if pErr != nil { 12183 t.Fatal(pErr) 12184 } 12185 12186 stopper.Quiesce(ctx) 12187 // Check and see if the trace package logged an error. 12188 log.Flush() 12189 entries, err := log.FetchEntriesFromFiles(0, math.MaxInt64, 1, 12190 regexp.MustCompile("net/trace")) 12191 if err != nil { 12192 t.Fatal(err) 12193 } 12194 if len(entries) > 0 { 12195 t.Fatalf("reused span after free: %v", entries) 12196 } 12197 } 12198 12199 // This test ensures that pushes due to closed timestamps are properly recorded 12200 // into the associated telemetry counter. 12201 func TestReplicaTelemetryCounterForPushesDueToClosedTimestamp(t *testing.T) { 12202 defer leaktest.AfterTest(t)() 12203 ctx := context.Background() 12204 keyA := append(keys.SystemSQLCodec.TablePrefix(math.MaxUint32), 'a') 12205 keyAA := append(keyA[:len(keyA):len(keyA)], 'a') 12206 rKeyA, err := keys.Addr(keyA) 12207 putReq := func(key roachpb.Key) *roachpb.PutRequest { 12208 r := putArgs(key, []byte("foo")) 12209 return &r 12210 } 12211 require.NoError(t, err) 12212 type testCase struct { 12213 name string 12214 f func(*testing.T, *Replica) 12215 } 12216 runTestCase := func(c testCase) { 12217 tc := testContext{} 12218 stopper := stop.NewStopper() 12219 defer stopper.Stop(ctx) 12220 _ = telemetry.GetFeatureCounts(telemetry.Raw, telemetry.ResetCounts) 12221 tc.Start(t, stopper) 12222 r := tc.store.LookupReplica(rKeyA) 12223 t.Run(c.name, func(t *testing.T) { 12224 c.f(t, r) 12225 }) 12226 } 12227 for _, c := range []testCase{ 12228 { 12229 // Test the case where no bump occurs. 12230 name: "no bump", f: func(t *testing.T, r *Replica) { 12231 ba := roachpb.BatchRequest{} 12232 ba.Add(putReq(keyA)) 12233 minReadTS := r.store.Clock().Now() 12234 ba.Timestamp = minReadTS.Next() 12235 require.False(t, r.applyTimestampCache(ctx, &ba, minReadTS)) 12236 require.Equal(t, int32(0), telemetry.Read(batchesPushedDueToClosedTimestamp)) 12237 }, 12238 }, 12239 { 12240 // Test the case where the bump occurs due to minReadTS. 12241 name: "bump due to minTS", f: func(t *testing.T, r *Replica) { 12242 ba := roachpb.BatchRequest{} 12243 ba.Add(putReq(keyA)) 12244 ba.Timestamp = r.store.Clock().Now() 12245 minReadTS := ba.Timestamp.Next() 12246 require.True(t, r.applyTimestampCache(ctx, &ba, minReadTS)) 12247 require.Equal(t, int32(1), telemetry.Read(batchesPushedDueToClosedTimestamp)) 12248 }, 12249 }, 12250 { 12251 // Test the case where we bump due to the read ts cache rather than the minReadTS. 12252 name: "bump due to later read ts cache entry", f: func(t *testing.T, r *Replica) { 12253 ba := roachpb.BatchRequest{} 12254 ba.Add(putReq(keyA)) 12255 ba.Timestamp = r.store.Clock().Now() 12256 minReadTS := ba.Timestamp.Next() 12257 r.store.tsCache.Add(keyA, keyA, minReadTS.Next(), uuid.MakeV4()) 12258 require.True(t, r.applyTimestampCache(ctx, &ba, minReadTS)) 12259 require.Equal(t, int32(0), telemetry.Read(batchesPushedDueToClosedTimestamp)) 12260 }, 12261 }, 12262 { 12263 // Test the case where we do initially bump due to the minReadTS but then 12264 // bump again to a higher ts due to the read ts cache. 12265 name: "higher bump due to read ts cache entry", f: func(t *testing.T, r *Replica) { 12266 ba := roachpb.BatchRequest{} 12267 ba.Add(putReq(keyA)) 12268 ba.Add(putReq(keyAA)) 12269 ba.Timestamp = r.store.Clock().Now() 12270 minReadTS := ba.Timestamp.Next() 12271 t.Log(ba.Timestamp, minReadTS, minReadTS.Next()) 12272 r.store.tsCache.Add(keyAA, keyAA, minReadTS.Next(), uuid.MakeV4()) 12273 require.True(t, r.applyTimestampCache(ctx, &ba, minReadTS)) 12274 require.Equal(t, int32(0), telemetry.Read(batchesPushedDueToClosedTimestamp)) 12275 }, 12276 }, 12277 } { 12278 runTestCase(c) 12279 } 12280 } 12281 12282 func TestReplicateQueueProcessOne(t *testing.T) { 12283 defer leaktest.AfterTest(t)() 12284 12285 ctx := context.Background() 12286 tc := testContext{} 12287 stopper := stop.NewStopper() 12288 defer stopper.Stop(ctx) 12289 tc.Start(t, stopper) 12290 12291 errBoom := errors.New("boom") 12292 tc.repl.mu.Lock() 12293 tc.repl.mu.destroyStatus.Set(errBoom, destroyReasonMergePending) 12294 tc.repl.mu.Unlock() 12295 12296 requeue, err := tc.store.replicateQueue.processOneChange(ctx, tc.repl, func() bool { return false }, true /* dryRun */) 12297 require.Equal(t, errBoom, err) 12298 require.False(t, requeue) 12299 } 12300 12301 // TestContainsEstimatesClamp tests the massaging of ContainsEstimates 12302 // before proposing a raft command. 12303 // - If the proposing node's version is lower than the VersionContainsEstimatesCounter, 12304 // ContainsEstimates must be clamped to {0,1}. 12305 // - Otherwise, it should always be >1 and an even number. 12306 func TestContainsEstimatesClampProposal(t *testing.T) { 12307 defer leaktest.AfterTest(t)() 12308 12309 _ = clusterversion.VersionContainsEstimatesCounter // see for details on the ContainsEstimates migration 12310 12311 someRequestToProposal := func(tc *testContext, ctx context.Context) *ProposalData { 12312 cmdIDKey := kvserverbase.CmdIDKey("some-cmdid-key") 12313 var ba roachpb.BatchRequest 12314 ba.Timestamp = tc.Clock().Now() 12315 req := putArgs(roachpb.Key("some-key"), []byte("some-value")) 12316 ba.Add(&req) 12317 proposal, err := tc.repl.requestToProposal(ctx, cmdIDKey, &ba, &allSpans) 12318 if err != nil { 12319 t.Error(err) 12320 } 12321 return proposal 12322 } 12323 12324 // Mock Put command so that it always adds 2 to ContainsEstimates. Could be 12325 // any number >1. 12326 defer setMockPutWithEstimates(2)() 12327 12328 t.Run("Pre-VersionContainsEstimatesCounter", func(t *testing.T) { 12329 ctx := context.Background() 12330 stopper := stop.NewStopper() 12331 defer stopper.Stop(ctx) 12332 cfg := TestStoreConfig(nil) 12333 version := clusterversion.VersionByKey(clusterversion.VersionContainsEstimatesCounter - 1) 12334 cfg.Settings = cluster.MakeTestingClusterSettingsWithVersions(version, version, false /* initializeVersion */) 12335 var tc testContext 12336 tc.StartWithStoreConfigAndVersion(t, stopper, cfg, version) 12337 12338 proposal := someRequestToProposal(&tc, ctx) 12339 12340 if proposal.command.ReplicatedEvalResult.Delta.ContainsEstimates != 1 { 12341 t.Error("Expected ContainsEstimates to be 1, was", proposal.command.ReplicatedEvalResult.Delta.ContainsEstimates) 12342 } 12343 }) 12344 12345 t.Run("VersionContainsEstimatesCounter", func(t *testing.T) { 12346 ctx := context.Background() 12347 stopper := stop.NewStopper() 12348 defer stopper.Stop(ctx) 12349 var tc testContext 12350 tc.Start(t, stopper) 12351 12352 proposal := someRequestToProposal(&tc, ctx) 12353 12354 if proposal.command.ReplicatedEvalResult.Delta.ContainsEstimates != 4 { 12355 t.Error("Expected ContainsEstimates to be 4, was", proposal.command.ReplicatedEvalResult.Delta.ContainsEstimates) 12356 } 12357 }) 12358 12359 } 12360 12361 // TestContainsEstimatesClampApplication tests that if the ContainsEstimates 12362 // delta from a proposed command is 1 (and the replica state ContainsEstimates <= 1), 12363 // ContainsEstimates will be kept 1 in the replica state. This is because 12364 // ContainsEstimates==1 in a proposed command means that the proposer may run 12365 // with a version older than VersionContainsEstimatesCounter, in which ContainsEstimates 12366 // is a bool. 12367 func TestContainsEstimatesClampApplication(t *testing.T) { 12368 defer leaktest.AfterTest(t)() 12369 12370 _ = clusterversion.VersionContainsEstimatesCounter // see for details on the ContainsEstimates migration 12371 12372 ctx := context.Background() 12373 stopper := stop.NewStopper() 12374 defer stopper.Stop(ctx) 12375 tc := testContext{} 12376 tc.Start(t, stopper) 12377 12378 // We will stage and apply 2 batches with a command that has ContainsEstimates=1 12379 // and expect that ReplicaState.Stats.ContainsEstimates will not become greater than 1. 12380 applyBatch := func() { 12381 tc.repl.raftMu.Lock() 12382 defer tc.repl.raftMu.Unlock() 12383 sm := tc.repl.getStateMachine() 12384 batch := sm.NewBatch(false /* ephemeral */) 12385 rAppbatch := batch.(*replicaAppBatch) 12386 12387 lease, _ := tc.repl.GetLease() 12388 12389 cmd := replicatedCmd{ 12390 ctx: ctx, 12391 ent: &raftpb.Entry{ 12392 // Term: 1, 12393 Index: rAppbatch.state.RaftAppliedIndex + 1, 12394 Type: raftpb.EntryNormal, 12395 }, 12396 decodedRaftEntry: decodedRaftEntry{ 12397 idKey: makeIDKey(), 12398 raftCmd: kvserverpb.RaftCommand{ 12399 ProposerLeaseSequence: rAppbatch.state.Lease.Sequence, 12400 ReplicatedEvalResult: kvserverpb.ReplicatedEvalResult{ 12401 Timestamp: tc.Clock().Now(), 12402 IsLeaseRequest: true, 12403 State: &kvserverpb.ReplicaState{ 12404 Lease: &lease, 12405 }, 12406 Delta: enginepb.MVCCStatsDelta{ 12407 ContainsEstimates: 1, 12408 }, 12409 }, 12410 }, 12411 }, 12412 } 12413 12414 _, err := rAppbatch.Stage(apply.Command(&cmd)) 12415 if err != nil { 12416 t.Fatal(err) 12417 } 12418 12419 if err := batch.ApplyToStateMachine(ctx); err != nil { 12420 t.Fatal(err) 12421 } 12422 } 12423 12424 applyBatch() 12425 assert.Equal(t, int64(1), tc.repl.State().ReplicaState.Stats.ContainsEstimates) 12426 12427 applyBatch() 12428 assert.Equal(t, int64(1), tc.repl.State().ReplicaState.Stats.ContainsEstimates) 12429 } 12430 12431 // setMockPutWithEstimates mocks the Put command (could be any) to simulate a command 12432 // that touches ContainsEstimates, in order to test request proposal behavior. 12433 func setMockPutWithEstimates(containsEstimatesDelta int64) (undo func()) { 12434 prev, _ := batcheval.LookupCommand(roachpb.Put) 12435 12436 mockPut := func( 12437 ctx context.Context, readWriter storage.ReadWriter, cArgs batcheval.CommandArgs, _ roachpb.Response, 12438 ) (result.Result, error) { 12439 args := cArgs.Args.(*roachpb.PutRequest) 12440 ms := cArgs.Stats 12441 ms.ContainsEstimates += containsEstimatesDelta 12442 ts := cArgs.Header.Timestamp 12443 return result.Result{}, storage.MVCCBlindPut(ctx, readWriter, ms, args.Key, ts, args.Value, cArgs.Header.Txn) 12444 } 12445 12446 batcheval.UnregisterCommand(roachpb.Put) 12447 batcheval.RegisterReadWriteCommand(roachpb.Put, batcheval.DefaultDeclareIsolatedKeys, mockPut) 12448 return func() { 12449 batcheval.UnregisterCommand(roachpb.Put) 12450 batcheval.RegisterReadWriteCommand(roachpb.Put, prev.DeclareKeys, prev.EvalRW) 12451 } 12452 } 12453 12454 type fakeStore struct { 12455 *cluster.Settings 12456 *StoreTestingKnobs 12457 } 12458 12459 func (s fakeStore) ClusterSettings() *cluster.Settings { 12460 return s.Settings 12461 } 12462 12463 func (s fakeStore) TestingKnobs() *StoreTestingKnobs { 12464 return s.StoreTestingKnobs 12465 } 12466 12467 func TestPrepareChangeReplicasTrigger(t *testing.T) { 12468 defer leaktest.AfterTest(t)() 12469 12470 ctx := context.Background() 12471 12472 s := fakeStore{ 12473 Settings: cluster.MakeTestingClusterSettings(), 12474 StoreTestingKnobs: &StoreTestingKnobs{}, 12475 } 12476 12477 type typOp struct { 12478 roachpb.ReplicaType 12479 internalChangeType // 0 for noop 12480 } 12481 12482 type testCase struct { 12483 desc *roachpb.RangeDescriptor 12484 chgs internalReplicationChanges 12485 expTrigger string 12486 } 12487 12488 const noop = internalChangeType(0) 12489 const none = roachpb.ReplicaType(-1) 12490 12491 mk := func(expTrigger string, typs ...typOp) testCase { 12492 chgs := make([]internalReplicationChange, 0, len(typs)) 12493 rDescs := make([]roachpb.ReplicaDescriptor, 0, len(typs)) 12494 for i, typ := range typs { 12495 typ := typ // local copy - we take addr below 12496 rDesc := roachpb.ReplicaDescriptor{ 12497 ReplicaID: roachpb.ReplicaID(i + 1), 12498 NodeID: roachpb.NodeID(100 * (1 + i)), 12499 StoreID: roachpb.StoreID(100 * (1 + i)), 12500 Type: &(typ.ReplicaType), 12501 } 12502 if typ.ReplicaType != none { 12503 rDescs = append(rDescs, rDesc) 12504 } 12505 if typ.internalChangeType != noop { 12506 chgs = append(chgs, internalReplicationChange{ 12507 target: roachpb.ReplicationTarget{NodeID: rDesc.NodeID, StoreID: rDesc.StoreID}, 12508 typ: typ.internalChangeType, 12509 }) 12510 } 12511 } 12512 desc := roachpb.NewRangeDescriptor(roachpb.RangeID(10), roachpb.RKeyMin, roachpb.RKeyMax, roachpb.MakeReplicaDescriptors(rDescs)) 12513 return testCase{ 12514 desc: desc, 12515 chgs: chgs, 12516 expTrigger: expTrigger, 12517 } 12518 } 12519 12520 tcs := []testCase{ 12521 // Simple addition of learner. 12522 mk( 12523 "SIMPLE(l2) ADD_REPLICA[(n200,s200):2LEARNER]: after=[(n100,s100):1 (n200,s200):2LEARNER] next=3", 12524 typOp{roachpb.VOTER_FULL, noop}, 12525 typOp{none, internalChangeTypeAddLearner}, 12526 ), 12527 // Simple addition of voter (necessarily via learner). 12528 mk( 12529 "SIMPLE(v2) ADD_REPLICA[(n200,s200):2]: after=[(n100,s100):1 (n200,s200):2] next=3", 12530 typOp{roachpb.VOTER_FULL, noop}, 12531 typOp{roachpb.LEARNER, internalChangeTypePromoteLearner}, 12532 ), 12533 // Simple removal of voter. 12534 mk( 12535 "SIMPLE(r2) REMOVE_REPLICA[(n200,s200):2]: after=[(n100,s100):1] next=3", 12536 typOp{roachpb.VOTER_FULL, noop}, 12537 typOp{roachpb.VOTER_FULL, internalChangeTypeRemove}, 12538 ), 12539 // Simple removal of learner. 12540 mk( 12541 "SIMPLE(r2) REMOVE_REPLICA[(n200,s200):2LEARNER]: after=[(n100,s100):1] next=3", 12542 typOp{roachpb.VOTER_FULL, noop}, 12543 typOp{roachpb.LEARNER, internalChangeTypeRemove}, 12544 ), 12545 12546 // All other cases below need to go through joint quorums (though some 12547 // of them only due to limitations in etcd/raft). 12548 12549 // Addition of learner and removal of voter at same time. 12550 mk( 12551 "ENTER_JOINT(r2 l3) ADD_REPLICA[(n200,s200):3LEARNER], REMOVE_REPLICA[(n300,s300):2VOTER_OUTGOING]: after=[(n100,s100):1 (n300,s300):2VOTER_OUTGOING (n200,s200):3LEARNER] next=4", 12552 typOp{roachpb.VOTER_FULL, noop}, 12553 typOp{none, internalChangeTypeAddLearner}, 12554 typOp{roachpb.VOTER_FULL, internalChangeTypeRemove}, 12555 ), 12556 12557 // Promotion of two voters. 12558 mk( 12559 "ENTER_JOINT(v2 v3) ADD_REPLICA[(n200,s200):2VOTER_INCOMING (n300,s300):3VOTER_INCOMING]: after=[(n100,s100):1 (n200,s200):2VOTER_INCOMING (n300,s300):3VOTER_INCOMING] next=4", 12560 typOp{roachpb.VOTER_FULL, noop}, 12561 typOp{roachpb.LEARNER, internalChangeTypePromoteLearner}, 12562 typOp{roachpb.LEARNER, internalChangeTypePromoteLearner}, 12563 ), 12564 12565 // Removal of two voters. 12566 mk( 12567 "ENTER_JOINT(r2 r3) REMOVE_REPLICA[(n200,s200):2VOTER_OUTGOING (n300,s300):3VOTER_OUTGOING]: after=[(n100,s100):1 (n200,s200):2VOTER_OUTGOING (n300,s300):3VOTER_OUTGOING] next=4", 12568 typOp{roachpb.VOTER_FULL, noop}, 12569 typOp{roachpb.VOTER_FULL, internalChangeTypeRemove}, 12570 typOp{roachpb.VOTER_FULL, internalChangeTypeRemove}, 12571 ), 12572 12573 // Demoting two voters. 12574 mk( 12575 "ENTER_JOINT(r2 l2 r3 l3) REMOVE_REPLICA[(n200,s200):2VOTER_DEMOTING (n300,s300):3VOTER_DEMOTING]: after=[(n100,s100):1 (n200,s200):2VOTER_DEMOTING (n300,s300):3VOTER_DEMOTING] next=4", 12576 typOp{roachpb.VOTER_FULL, noop}, 12577 typOp{roachpb.VOTER_FULL, internalChangeTypeDemote}, 12578 typOp{roachpb.VOTER_FULL, internalChangeTypeDemote}, 12579 ), 12580 // Leave joint config entered via demotion. 12581 mk( 12582 "LEAVE_JOINT: after=[(n100,s100):1 (n200,s200):2LEARNER (n300,s300):3LEARNER] next=4", 12583 typOp{roachpb.VOTER_FULL, noop}, 12584 typOp{roachpb.VOTER_DEMOTING, noop}, 12585 typOp{roachpb.VOTER_DEMOTING, noop}, 12586 ), 12587 } 12588 12589 for _, tc := range tcs { 12590 t.Run("", func(t *testing.T) { 12591 trigger, err := prepareChangeReplicasTrigger( 12592 ctx, 12593 s, 12594 tc.desc, 12595 tc.chgs, 12596 ) 12597 require.NoError(t, err) 12598 assert.Equal(t, tc.expTrigger, trigger.String()) 12599 }) 12600 } 12601 } 12602 12603 func enableTraceDebugUseAfterFree() (restore func()) { 12604 prev := trace.DebugUseAfterFinish 12605 trace.DebugUseAfterFinish = true 12606 return func() { trace.DebugUseAfterFinish = prev } 12607 } 12608 12609 func TestRangeUnavailableMessage(t *testing.T) { 12610 defer leaktest.AfterTest(t)() 12611 12612 var repls roachpb.ReplicaDescriptors 12613 repls.AddReplica(roachpb.ReplicaDescriptor{NodeID: 1, StoreID: 10, ReplicaID: 100}) 12614 repls.AddReplica(roachpb.ReplicaDescriptor{NodeID: 2, StoreID: 20, ReplicaID: 200}) 12615 desc := roachpb.NewRangeDescriptor(10, roachpb.RKey("a"), roachpb.RKey("z"), repls) 12616 dur := time.Minute 12617 var ba roachpb.BatchRequest 12618 ba.Add(&roachpb.RequestLeaseRequest{}) 12619 lm := IsLiveMap{ 12620 1: IsLiveMapEntry{IsLive: true}, 12621 } 12622 rs := raft.Status{} 12623 act := rangeUnavailableMessage(desc, lm, &rs, &ba, dur) 12624 const exp = `have been waiting 60.00s for proposing command RequestLease [/Min,/Min). 12625 This range is likely unavailable. 12626 Please submit this message to Cockroach Labs support along with the following information: 12627 12628 Descriptor: r10:{-} [(n1,s10):1, (n2,s20):2, next=3, gen=0] 12629 Live: (n1,s10):1 12630 Non-live: (n2,s20):2 12631 Raft Status: {"id":"0","term":0,"vote":"0","commit":0,"lead":"0","raftState":"StateFollower","applied":0,"progress":{},"leadtransferee":"0"} 12632 12633 and a copy of https://yourhost:8080/#/reports/range/10 12634 12635 If you are using CockroachDB Enterprise, reach out through your 12636 support contract. Otherwise, please open an issue at: 12637 12638 https://github.com/cockroachdb/cockroach/issues/new/choose 12639 ` 12640 12641 require.Equal(t, exp, act) 12642 }