github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/kv/kvserver/txn_wait_queue_test.go (about) 1 // Copyright 2017 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package kvserver 12 13 import ( 14 "bytes" 15 "context" 16 "fmt" 17 "reflect" 18 "regexp" 19 "sync/atomic" 20 "testing" 21 "time" 22 23 "github.com/cockroachdb/cockroach/pkg/keys" 24 "github.com/cockroachdb/cockroach/pkg/kv/kvserver/kvserverbase" 25 "github.com/cockroachdb/cockroach/pkg/kv/kvserver/txnwait" 26 "github.com/cockroachdb/cockroach/pkg/roachpb" 27 "github.com/cockroachdb/cockroach/pkg/storage" 28 "github.com/cockroachdb/cockroach/pkg/storage/enginepb" 29 "github.com/cockroachdb/cockroach/pkg/testutils" 30 "github.com/cockroachdb/cockroach/pkg/util/hlc" 31 "github.com/cockroachdb/cockroach/pkg/util/leaktest" 32 "github.com/cockroachdb/cockroach/pkg/util/stop" 33 "github.com/cockroachdb/cockroach/pkg/util/uuid" 34 "github.com/cockroachdb/errors" 35 "github.com/stretchr/testify/assert" 36 "github.com/stretchr/testify/require" 37 ) 38 39 func writeTxnRecord(ctx context.Context, tc *testContext, txn *roachpb.Transaction) error { 40 key := keys.TransactionKey(txn.Key, txn.ID) 41 return storage.MVCCPutProto(ctx, tc.store.Engine(), nil, key, hlc.Timestamp{}, nil, txn) 42 } 43 44 // createTxnForPushQueue creates a txn struct and writes a "fake" 45 // transaction record for it to the underlying engine. 46 func createTxnForPushQueue(ctx context.Context, tc *testContext) (*roachpb.Transaction, error) { 47 txn := newTransaction("txn", roachpb.Key("a"), 1, tc.Clock()) 48 return txn, writeTxnRecord(ctx, tc, txn) 49 } 50 51 type RespWithErr struct { 52 resp *roachpb.PushTxnResponse 53 pErr *roachpb.Error 54 } 55 56 func checkAllGaugesZero(tc testContext) error { 57 m := tc.store.txnWaitMetrics 58 if act := m.PusheeWaiting.Value(); act != 0 { 59 return errors.Errorf("expected PusheeWaiting to be 0, got %d instead", act) 60 } 61 if act := m.PusherWaiting.Value(); act != 0 { 62 return errors.Errorf("expected PusherWaiting to be 0, got %d instead", act) 63 } 64 if act := m.QueryWaiting.Value(); act != 0 { 65 return errors.Errorf("expected QueryWaiting to be 0, got %d instead", act) 66 } 67 if act := m.PusherSlow.Value(); act != 0 { 68 return errors.Errorf("expected PusherSlow to be 0, got %d instead", act) 69 } 70 return nil 71 } 72 73 func TestTxnWaitQueueEnableDisable(t *testing.T) { 74 defer leaktest.AfterTest(t)() 75 tc := testContext{} 76 stopper := stop.NewStopper() 77 defer stopper.Stop(context.Background()) 78 tc.Start(t, stopper) 79 80 txn, err := createTxnForPushQueue(context.Background(), &tc) 81 if err != nil { 82 t.Fatal(err) 83 } 84 85 // Queue starts enabled. 86 q := tc.repl.concMgr.TxnWaitQueue() 87 if !q.IsEnabled() { 88 t.Errorf("expected push txn queue is enabled") 89 } 90 if err := checkAllGaugesZero(tc); err != nil { 91 t.Fatal(err.Error()) 92 } 93 94 q.EnqueueTxn(txn) 95 if _, ok := q.TrackedTxns()[txn.ID]; !ok { 96 t.Fatalf("expected pendingTxn to be in txns map after enqueue") 97 } 98 m := tc.store.txnWaitMetrics 99 assert.EqualValues(tc, 1, m.PusheeWaiting.Value()) 100 101 pusher := newTransaction("pusher", roachpb.Key("a"), 1, tc.Clock()) 102 req := roachpb.PushTxnRequest{ 103 PushType: roachpb.PUSH_ABORT, 104 PusherTxn: *pusher, 105 PusheeTxn: txn.TxnMeta, 106 } 107 108 retCh := make(chan RespWithErr, 1) 109 go func() { 110 resp, pErr := q.MaybeWaitForPush(context.Background(), &req) 111 retCh <- RespWithErr{resp, pErr} 112 }() 113 114 testutils.SucceedsSoon(t, func() error { 115 expDeps := []uuid.UUID{pusher.ID} 116 if deps := q.GetDependents(txn.ID); !reflect.DeepEqual(deps, expDeps) { 117 return errors.Errorf("expected GetDependents %+v; got %+v", expDeps, deps) 118 } 119 if act, exp := m.PusherWaiting.Value(), int64(1); act != exp { 120 return errors.Errorf("%d pushers, but want %d", act, exp) 121 } 122 if act, exp := m.PusheeWaiting.Value(), int64(1); act != exp { 123 return errors.Errorf("%d pushees, but want %d", act, exp) 124 } 125 126 return nil 127 }) 128 129 // Now disable the queue and make sure the waiter is returned. 130 q.Clear(true /* disable */) 131 if q.IsEnabled() { 132 t.Errorf("expected queue to be disabled") 133 } 134 if err := checkAllGaugesZero(tc); err != nil { 135 t.Fatal(err.Error()) 136 } 137 138 respWithErr := <-retCh 139 if respWithErr.resp != nil { 140 t.Errorf("expected nil response; got %+v", respWithErr.resp) 141 } 142 if respWithErr.pErr != nil { 143 t.Errorf("expected nil err; got %+v", respWithErr.pErr) 144 } 145 146 if deps := q.GetDependents(txn.ID); deps != nil { 147 t.Errorf("expected GetDependents to return nil as queue is disabled; got %+v", deps) 148 } 149 150 q.EnqueueTxn(txn) 151 if q.IsEnabled() { 152 t.Errorf("expected enqueue to silently fail since queue is disabled") 153 } 154 if err := checkAllGaugesZero(tc); err != nil { 155 t.Fatal(err.Error()) 156 } 157 158 q.UpdateTxn(context.Background(), txn) 159 if len(q.TrackedTxns()) != 0 { 160 t.Fatalf("expected update to silently fail since queue is disabled") 161 } 162 163 if resp, pErr := q.MaybeWaitForPush(context.Background(), &req); resp != nil || pErr != nil { 164 t.Errorf("expected nil resp and err as queue is disabled; got %+v, %s", resp, pErr) 165 } 166 if err := checkAllGaugesZero(tc); err != nil { 167 t.Fatal(err.Error()) 168 } 169 } 170 171 func TestTxnWaitQueueCancel(t *testing.T) { 172 defer leaktest.AfterTest(t)() 173 tc := testContext{} 174 stopper := stop.NewStopper() 175 defer stopper.Stop(context.Background()) 176 tc.Start(t, stopper) 177 178 txn, err := createTxnForPushQueue(context.Background(), &tc) 179 if err != nil { 180 t.Fatal(err) 181 } 182 pusher := newTransaction("pusher", roachpb.Key("a"), 1, tc.Clock()) 183 req := roachpb.PushTxnRequest{ 184 PushType: roachpb.PUSH_ABORT, 185 PusherTxn: *pusher, 186 PusheeTxn: txn.TxnMeta, 187 } 188 189 q := tc.repl.concMgr.TxnWaitQueue() 190 q.Enable() 191 if err := checkAllGaugesZero(tc); err != nil { 192 t.Fatal(err.Error()) 193 } 194 q.EnqueueTxn(txn) 195 m := tc.store.txnWaitMetrics 196 assert.EqualValues(tc, 1, m.PusheeWaiting.Value()) 197 assert.EqualValues(tc, 0, m.PusherWaiting.Value()) 198 199 ctx, cancel := context.WithCancel(context.Background()) 200 retCh := make(chan RespWithErr, 1) 201 go func() { 202 resp, pErr := q.MaybeWaitForPush(ctx, &req) 203 retCh <- RespWithErr{resp, pErr} 204 }() 205 206 testutils.SucceedsSoon(t, func() error { 207 select { 208 case rwe := <-retCh: 209 t.Fatalf("MaybeWaitForPush terminated prematurely: %+v", rwe) 210 default: 211 } 212 expDeps := []uuid.UUID{pusher.ID} 213 if deps := q.GetDependents(txn.ID); !reflect.DeepEqual(deps, expDeps) { 214 return errors.Errorf("expected GetDependents %+v; got %+v", expDeps, deps) 215 } 216 if act, exp := m.PusherWaiting.Value(), int64(1); act != exp { 217 return errors.Errorf("%d pushers, but want %d", act, exp) 218 } 219 return nil 220 }) 221 cancel() 222 223 respWithErr := <-retCh 224 if respWithErr.resp != nil { 225 t.Errorf("expected nil response; got %+v", respWithErr.resp) 226 } 227 if !testutils.IsPError(respWithErr.pErr, context.Canceled.Error()) { 228 t.Errorf("expected context canceled error; got %v", respWithErr.pErr) 229 } 230 } 231 232 // TestTxnWaitQueueUpdateTxn creates two waiters on a txn and verifies 233 // both are returned when the txn is updated. 234 func TestTxnWaitQueueUpdateTxn(t *testing.T) { 235 defer leaktest.AfterTest(t)() 236 tc := testContext{} 237 stopper := stop.NewStopper() 238 defer stopper.Stop(context.Background()) 239 tc.Start(t, stopper) 240 241 txn, err := createTxnForPushQueue(context.Background(), &tc) 242 if err != nil { 243 t.Fatal(err) 244 } 245 pusher1 := newTransaction("pusher1", roachpb.Key("a"), 1, tc.Clock()) 246 pusher2 := newTransaction("pusher2", roachpb.Key("a"), 1, tc.Clock()) 247 req1 := roachpb.PushTxnRequest{ 248 PushType: roachpb.PUSH_ABORT, 249 PusherTxn: *pusher1, 250 PusheeTxn: txn.TxnMeta, 251 } 252 req2 := req1 253 req2.PusherTxn = *pusher2 254 255 q := tc.repl.concMgr.TxnWaitQueue() 256 q.Enable() 257 q.EnqueueTxn(txn) 258 m := tc.store.txnWaitMetrics 259 assert.EqualValues(tc, 1, m.PusheeWaiting.Value()) 260 261 retCh := make(chan RespWithErr, 2) 262 go func() { 263 resp, pErr := q.MaybeWaitForPush(context.Background(), &req1) 264 retCh <- RespWithErr{resp, pErr} 265 }() 266 testutils.SucceedsSoon(t, func() error { 267 expDeps := []uuid.UUID{pusher1.ID} 268 if deps := q.GetDependents(txn.ID); !reflect.DeepEqual(deps, expDeps) { 269 return errors.Errorf("expected GetDependents %+v; got %+v", expDeps, deps) 270 } 271 return nil 272 }) 273 testutils.SucceedsSoon(t, func() error { 274 if act, exp := m.PusherWaiting.Value(), int64(1); act != exp { 275 return errors.Errorf("%d pushers, but want %d", act, exp) 276 } 277 if act, exp := m.PusheeWaiting.Value(), int64(1); act != exp { 278 return errors.Errorf("%d pushees, but want %d", act, exp) 279 } 280 if act, exp := m.QueryWaiting.Value(), int64(1); act != exp { 281 return errors.Errorf("%d queries, but want %d", act, exp) 282 } 283 return nil 284 }) 285 286 go func() { 287 resp, pErr := q.MaybeWaitForPush(context.Background(), &req2) 288 retCh <- RespWithErr{resp, pErr} 289 }() 290 testutils.SucceedsSoon(t, func() error { 291 expDeps := []uuid.UUID{pusher1.ID, pusher2.ID} 292 if deps := q.GetDependents(txn.ID); !reflect.DeepEqual(deps, expDeps) { 293 return errors.Errorf("expected GetDependents %+v; got %+v", expDeps, deps) 294 } 295 if act, exp := m.PusherWaiting.Value(), int64(2); act != exp { 296 return errors.Errorf("%d pushers, but want %d", act, exp) 297 } 298 if act, exp := m.PusheeWaiting.Value(), int64(1); act != exp { 299 return errors.Errorf("%d pushees, but want %d", act, exp) 300 } 301 if act, exp := m.QueryWaiting.Value(), int64(2); act != exp { 302 return errors.Errorf("%d queries, but want %d", act, exp) 303 } 304 return nil 305 }) 306 307 updatedTxn := *txn 308 updatedTxn.Status = roachpb.COMMITTED 309 q.UpdateTxn(context.Background(), &updatedTxn) 310 testutils.SucceedsSoon(tc.TB, func() error { 311 return checkAllGaugesZero(tc) 312 }) 313 314 for i := 0; i < 2; i++ { 315 respWithErr := <-retCh 316 if respWithErr.resp == nil || respWithErr.resp.PusheeTxn.Status != roachpb.COMMITTED { 317 t.Errorf("expected committed txn response; got %+v, err=%v", respWithErr.resp, respWithErr.pErr) 318 } 319 } 320 } 321 322 // TestTxnWaitQueueTxnSilentlyCompletes creates a waiter on a txn and verifies 323 // that the waiter is eventually unblocked when the txn commits but UpdateTxn is 324 // not called. 325 // 326 // This simulates the following observed sequence of events. A transaction, TA, 327 // writes a key K. Another transaction, TB, attempts to read K. It notices the 328 // intent on K and sends a PushTxnRequest. The PushTxnRequest fails and returns 329 // a TransactionPushError. Before the replica handles the TransactionPushError, 330 // TA commits and the replica fully processes its EndTxnRequest. Only then does 331 // the replica notice the TransactionPushError and put TB's PushTxnRequest into 332 // TA's wait queue. Updates to TA will never be sent via Queue.UpdateTxn, 333 // because Queue.UpdateTxn was already called when the EndTxnRequest was 334 // processed, before TB's PushTxnRequest was in TA's wait queue. 335 // 336 // This sequence of events was previously mishandled when TA's transaction 337 // record was not immediately cleaned up, e.g. because it had non-local intents. 338 // The wait queue would continually poll TA's transaction record, notice it 339 // still existed, and continue waiting. In production, this meant that the 340 // PushTxnRequest would get stuck waiting out the full TxnLivenessThreshold for 341 // the transaction record to expire. In unit tests, where the clock might never 342 // be advanced, the PushTxnRequest could get stuck forever. 343 func TestTxnWaitQueueTxnSilentlyCompletes(t *testing.T) { 344 defer leaktest.AfterTest(t)() 345 // This test relies on concurrently waiting for a value to change in the 346 // underlying engine(s). Since the teeing engine does not respond well to 347 // value mismatches, whether transient or permanent, skip this test if the 348 // teeing engine is being used. See 349 // https://github.com/cockroachdb/cockroach/issues/42656 for more context. 350 if storage.DefaultStorageEngine == enginepb.EngineTypeTeePebbleRocksDB { 351 t.Skip("disabled on teeing engine") 352 } 353 tc := testContext{} 354 ctx := context.Background() 355 stopper := stop.NewStopper() 356 defer stopper.Stop(ctx) 357 tc.Start(t, stopper) 358 359 txn, err := createTxnForPushQueue(ctx, &tc) 360 if err != nil { 361 t.Fatal(err) 362 } 363 pusher := newTransaction("pusher", roachpb.Key("a"), 1, tc.Clock()) 364 req := &roachpb.PushTxnRequest{ 365 RequestHeader: roachpb.RequestHeader{ 366 Key: txn.Key, 367 }, 368 PushType: roachpb.PUSH_ABORT, 369 PusherTxn: *pusher, 370 PusheeTxn: txn.TxnMeta, 371 } 372 373 q := tc.repl.concMgr.TxnWaitQueue() 374 q.Enable() 375 q.EnqueueTxn(txn) 376 377 retCh := make(chan RespWithErr, 2) 378 go func() { 379 resp, pErr := q.MaybeWaitForPush(context.Background(), req) 380 retCh <- RespWithErr{resp, pErr} 381 }() 382 383 m := tc.store.txnWaitMetrics 384 testutils.SucceedsSoon(t, func() error { 385 expDeps := []uuid.UUID{pusher.ID} 386 if deps := q.GetDependents(txn.ID); !reflect.DeepEqual(deps, expDeps) { 387 return errors.Errorf("expected GetDependents %+v; got %+v", expDeps, deps) 388 } 389 if act, exp := m.PusherWaiting.Value(), int64(1); act != exp { 390 return errors.Errorf("%d pushers, but want %d", act, exp) 391 } 392 if act, exp := m.PusheeWaiting.Value(), int64(1); act != exp { 393 return errors.Errorf("%d pushees, but want %d", act, exp) 394 } 395 if act, exp := m.QueryWaiting.Value(), int64(1); act != exp { 396 return errors.Errorf("%d queries, but want %d", act, exp) 397 } 398 return nil 399 }) 400 401 txn.Status = roachpb.COMMITTED 402 if err := writeTxnRecord(ctx, &tc, txn); err != nil { 403 t.Fatal(err) 404 } 405 406 // Skip calling q.UpdateTxn to test that the wait queue periodically polls 407 // txn's record and notices when it is no longer pending. 408 409 respWithErr := <-retCh 410 if respWithErr.resp == nil || respWithErr.resp.PusheeTxn.Status != roachpb.COMMITTED { 411 t.Errorf("expected committed txn response; got %+v, err=%v", respWithErr.resp, respWithErr.pErr) 412 } 413 testutils.SucceedsSoon(t, func() error { 414 if act, exp := m.PusherWaiting.Value(), int64(1); act != exp { 415 return errors.Errorf("%d pushers, but want %d", act, exp) 416 } 417 if act, exp := m.PusheeWaiting.Value(), int64(1); act != exp { 418 return errors.Errorf("%d pushees, but want %d", act, exp) 419 } 420 if act, exp := m.QueryWaiting.Value(), int64(0); act != exp { 421 return errors.Errorf("%d queries, but want %d", act, exp) 422 } 423 return nil 424 }) 425 } 426 427 // TestTxnWaitQueueUpdateNotPushedTxn verifies that no PushTxnResponse 428 // is returned in the event that the pushee txn only has its timestamp 429 // updated. 430 func TestTxnWaitQueueUpdateNotPushedTxn(t *testing.T) { 431 defer leaktest.AfterTest(t)() 432 tc := testContext{} 433 stopper := stop.NewStopper() 434 defer stopper.Stop(context.Background()) 435 tc.Start(t, stopper) 436 437 txn, err := createTxnForPushQueue(context.Background(), &tc) 438 if err != nil { 439 t.Fatal(err) 440 } 441 pusher := newTransaction("pusher", roachpb.Key("a"), 1, tc.Clock()) 442 req := roachpb.PushTxnRequest{ 443 PushType: roachpb.PUSH_ABORT, 444 PusherTxn: *pusher, 445 PusheeTxn: txn.TxnMeta, 446 } 447 448 q := tc.repl.concMgr.TxnWaitQueue() 449 q.Enable() 450 q.EnqueueTxn(txn) 451 452 retCh := make(chan RespWithErr, 1) 453 go func() { 454 resp, pErr := q.MaybeWaitForPush(context.Background(), &req) 455 retCh <- RespWithErr{resp, pErr} 456 }() 457 458 testutils.SucceedsSoon(t, func() error { 459 expDeps := []uuid.UUID{pusher.ID} 460 if deps := q.GetDependents(txn.ID); !reflect.DeepEqual(deps, expDeps) { 461 return errors.Errorf("expected GetDependents %+v; got %+v", expDeps, deps) 462 } 463 return nil 464 }) 465 466 updatedTxn := *txn 467 updatedTxn.WriteTimestamp = txn.WriteTimestamp.Add(1, 0) 468 q.UpdateTxn(context.Background(), &updatedTxn) 469 470 respWithErr := <-retCh 471 if respWithErr.resp != nil { 472 t.Errorf("on non-committed txn update, expected nil response; got %+v", respWithErr.resp) 473 } 474 if respWithErr.pErr != nil { 475 t.Errorf("expected nil error; got %s", respWithErr.pErr) 476 } 477 testutils.SucceedsSoon(tc.TB, func() error { 478 return checkAllGaugesZero(tc) 479 }) 480 } 481 482 // TestTxnWaitQueuePusheeExpires verifies that just one pusher is 483 // returned when the pushee's txn may have expired. 484 func TestTxnWaitQueuePusheeExpires(t *testing.T) { 485 defer leaktest.AfterTest(t)() 486 var queryTxnCount int32 487 488 manual := hlc.NewManualClock(123) 489 clock := hlc.NewClock(manual.UnixNano, time.Nanosecond) 490 txn := newTransaction("txn", roachpb.Key("a"), 1, clock) 491 // Move the clock forward so that when the PushTxn is sent, the txn appears 492 // expired. 493 manual.Set(txnwait.TxnExpiration(txn).WallTime) 494 495 tc := testContext{} 496 tsc := TestStoreConfig(clock) 497 tsc.TestingKnobs.EvalKnobs.TestingEvalFilter = 498 func(filterArgs kvserverbase.FilterArgs) *roachpb.Error { 499 if qtReq, ok := filterArgs.Req.(*roachpb.QueryTxnRequest); ok && bytes.Equal(qtReq.Txn.Key, txn.Key) { 500 atomic.AddInt32(&queryTxnCount, 1) 501 } 502 return nil 503 } 504 stopper := stop.NewStopper() 505 defer stopper.Stop(context.Background()) 506 tc.StartWithStoreConfig(t, stopper, tsc) 507 508 pusher1 := newTransaction("pusher1", roachpb.Key("a"), 1, tc.Clock()) 509 pusher2 := newTransaction("pusher2", roachpb.Key("a"), 1, tc.Clock()) 510 req1 := roachpb.PushTxnRequest{ 511 PushType: roachpb.PUSH_ABORT, 512 PusherTxn: *pusher1, 513 PusheeTxn: txn.TxnMeta, 514 } 515 req2 := req1 516 req2.PusherTxn = *pusher2 517 518 // Create a "fake" txn record. 519 if err := writeTxnRecord(context.Background(), &tc, txn); err != nil { 520 t.Fatal(err) 521 } 522 523 q := tc.repl.concMgr.TxnWaitQueue() 524 q.Enable() 525 q.EnqueueTxn(txn) 526 527 retCh := make(chan RespWithErr, 2) 528 go func() { 529 resp, pErr := q.MaybeWaitForPush(context.Background(), &req1) 530 retCh <- RespWithErr{resp, pErr} 531 }() 532 testutils.SucceedsSoon(t, func() error { 533 expDeps := []uuid.UUID{pusher1.ID} 534 if deps := q.GetDependents(txn.ID); !reflect.DeepEqual(deps, expDeps) { 535 return errors.Errorf("expected GetDependents %+v; got %+v", expDeps, deps) 536 } 537 return nil 538 }) 539 540 go func() { 541 resp, pErr := q.MaybeWaitForPush(context.Background(), &req2) 542 retCh <- RespWithErr{resp, pErr} 543 }() 544 testutils.SucceedsSoon(t, func() error { 545 expDeps := []uuid.UUID{pusher1.ID, pusher2.ID} 546 if deps := q.GetDependents(txn.ID); !reflect.DeepEqual(deps, expDeps) { 547 return errors.Errorf("expected GetDependents %+v; got %+v", expDeps, deps) 548 } 549 return nil 550 }) 551 552 for i := 0; i < 2; i++ { 553 respWithErr := <-retCh 554 if respWithErr.resp != nil { 555 t.Errorf("expected nil txn response; got %+v", respWithErr.resp) 556 } 557 if respWithErr.pErr != nil { 558 t.Errorf("expected nil error; got %s", respWithErr.pErr) 559 } 560 } 561 562 m := tc.store.txnWaitMetrics 563 testutils.SucceedsSoon(t, func() error { 564 if act, exp := m.PusherWaiting.Value(), int64(2); act != exp { 565 return errors.Errorf("%d pushers, but want %d", act, exp) 566 } 567 if act, exp := m.PusheeWaiting.Value(), int64(1); act != exp { 568 return errors.Errorf("%d pushees, but want %d", act, exp) 569 } 570 if act, exp := m.QueryWaiting.Value(), int64(0); act != exp { 571 return errors.Errorf("%d queries, but want %d", act, exp) 572 } 573 return nil 574 }) 575 if a, minExpected := atomic.LoadInt32(&queryTxnCount), int32(2); a < minExpected { 576 t.Errorf("expected no fewer than %d query txns; got %d", minExpected, a) 577 } 578 } 579 580 // TestTxnWaitQueuePusherUpdate verifies that the pusher's status is 581 // periodically updated and will notice if the pusher has been aborted. 582 func TestTxnWaitQueuePusherUpdate(t *testing.T) { 583 defer leaktest.AfterTest(t)() 584 585 testutils.RunTrueAndFalse(t, "txnRecordExists", func(t *testing.T, txnRecordExists bool) { 586 // Test with the pusher txn record below the pusher's expected epoch, at 587 // the pusher's expected epoch, and above the pusher's expected epoch. 588 // Regardless of which epoch the transaction record is written at, if 589 // it is marked as ABORTED, it should terminate the push. 590 pushEpoch := enginepb.TxnEpoch(2) 591 for _, c := range []struct { 592 name string 593 recordEpoch enginepb.TxnEpoch 594 }{ 595 {"below", pushEpoch - 1}, 596 {"equal", pushEpoch}, 597 {"above", pushEpoch + 1}, 598 } { 599 t.Run(fmt.Sprintf("recordEpoch=%s", c.name), func(t *testing.T) { 600 tc := testContext{} 601 stopper := stop.NewStopper() 602 defer stopper.Stop(context.Background()) 603 tc.Start(t, stopper) 604 605 txn, err := createTxnForPushQueue(context.Background(), &tc) 606 if err != nil { 607 t.Fatal(err) 608 } 609 var pusher *roachpb.Transaction 610 if txnRecordExists { 611 pusher, err = createTxnForPushQueue(context.Background(), &tc) 612 if err != nil { 613 t.Fatal(err) 614 } 615 } else { 616 pusher = newTransaction("pusher", roachpb.Key("a"), 1, tc.Clock()) 617 } 618 pusher.Epoch = pushEpoch 619 620 req := roachpb.PushTxnRequest{ 621 PushType: roachpb.PUSH_ABORT, 622 PusherTxn: *pusher, 623 PusheeTxn: txn.TxnMeta, 624 } 625 626 q := tc.repl.concMgr.TxnWaitQueue() 627 q.Enable() 628 q.EnqueueTxn(txn) 629 630 retCh := make(chan RespWithErr, 1) 631 go func() { 632 resp, pErr := q.MaybeWaitForPush(context.Background(), &req) 633 retCh <- RespWithErr{resp, pErr} 634 }() 635 636 testutils.SucceedsSoon(t, func() error { 637 expDeps := []uuid.UUID{pusher.ID} 638 if deps := q.GetDependents(txn.ID); !reflect.DeepEqual(deps, expDeps) { 639 return errors.Errorf("expected GetDependents %+v; got %+v", expDeps, deps) 640 } 641 return nil 642 }) 643 644 // If the record doesn't exist yet, give the push queue enough 645 // time to query the missing record and notice. 646 if !txnRecordExists { 647 time.Sleep(10 * time.Millisecond) 648 } 649 650 // Update txn on disk with status ABORTED. 651 pusherUpdate := *pusher 652 pusherUpdate.Epoch = c.recordEpoch 653 pusherUpdate.Status = roachpb.ABORTED 654 if err := writeTxnRecord(context.Background(), &tc, &pusherUpdate); err != nil { 655 t.Fatal(err) 656 } 657 q.UpdateTxn(context.Background(), &pusherUpdate) 658 659 respWithErr := <-retCh 660 if respWithErr.resp != nil { 661 t.Errorf("expected nil response; got %+v", respWithErr.resp) 662 } 663 expErr := "TransactionAbortedError(ABORT_REASON_PUSHER_ABORTED)" 664 if !testutils.IsPError(respWithErr.pErr, regexp.QuoteMeta(expErr)) { 665 t.Errorf("expected %s; got %v", expErr, respWithErr.pErr) 666 } 667 668 m := tc.store.txnWaitMetrics 669 testutils.SucceedsSoon(t, func() error { 670 if act, exp := m.PusherWaiting.Value(), int64(1); act != exp { 671 return errors.Errorf("%d pushers, but want %d", act, exp) 672 } 673 if act, exp := m.PusheeWaiting.Value(), int64(1); act != exp { 674 return errors.Errorf("%d pushees, but want %d", act, exp) 675 } 676 if act, exp := m.QueryWaiting.Value(), int64(0); act != exp { 677 return errors.Errorf("%d queries, but want %d", act, exp) 678 } 679 return nil 680 }) 681 }) 682 } 683 }) 684 } 685 686 type ReqWithRespAndErr struct { 687 req *roachpb.PushTxnRequest 688 resp *roachpb.PushTxnResponse 689 pErr *roachpb.Error 690 } 691 692 // TestTxnWaitQueueDependencyCycle verifies that if txn A pushes txn B 693 // pushes txn C which in turn is pushing txn A, the cycle will be 694 // detected and broken by a higher priority pusher. 695 func TestTxnWaitQueueDependencyCycle(t *testing.T) { 696 defer leaktest.AfterTest(t)() 697 tc := testContext{} 698 stopper := stop.NewStopper() 699 defer stopper.Stop(context.Background()) 700 tc.Start(t, stopper) 701 702 txnA, err := createTxnForPushQueue(context.Background(), &tc) 703 if err != nil { 704 t.Fatal(err) 705 } 706 txnB, err := createTxnForPushQueue(context.Background(), &tc) 707 if err != nil { 708 t.Fatal(err) 709 } 710 txnC, err := createTxnForPushQueue(context.Background(), &tc) 711 if err != nil { 712 t.Fatal(err) 713 } 714 715 reqA := &roachpb.PushTxnRequest{ 716 RequestHeader: roachpb.RequestHeader{ 717 Key: txnB.Key, 718 }, 719 PushType: roachpb.PUSH_ABORT, 720 PusherTxn: *txnA, 721 PusheeTxn: txnB.TxnMeta, 722 } 723 reqB := &roachpb.PushTxnRequest{ 724 RequestHeader: roachpb.RequestHeader{ 725 Key: txnC.Key, 726 }, 727 PushType: roachpb.PUSH_ABORT, 728 PusherTxn: *txnB, 729 PusheeTxn: txnC.TxnMeta, 730 } 731 reqC := &roachpb.PushTxnRequest{ 732 RequestHeader: roachpb.RequestHeader{ 733 Key: txnA.Key, 734 }, 735 PushType: roachpb.PUSH_ABORT, 736 PusherTxn: *txnC, 737 PusheeTxn: txnA.TxnMeta, 738 } 739 740 q := tc.repl.concMgr.TxnWaitQueue() 741 q.Enable() 742 743 ctx, cancel := context.WithCancel(context.Background()) 744 defer cancel() 745 for _, txn := range []*roachpb.Transaction{txnA, txnB, txnC} { 746 q.EnqueueTxn(txn) 747 } 748 m := tc.store.txnWaitMetrics 749 assert.EqualValues(tc, 0, m.DeadlocksTotal.Count()) 750 751 reqs := []*roachpb.PushTxnRequest{reqA, reqB, reqC} 752 retCh := make(chan ReqWithRespAndErr, len(reqs)) 753 for _, req := range reqs { 754 go func(req *roachpb.PushTxnRequest) { 755 resp, pErr := q.MaybeWaitForPush(ctx, req) 756 retCh <- ReqWithRespAndErr{req, resp, pErr} 757 }(req) 758 } 759 760 // Wait for first request to finish, which should break the dependency cycle 761 // by performing a force push abort. This will allow all other requests to 762 // proceed. At least one txn will be aborted by another txn, although it's 763 // possible that up to two are in the case that the deadlock is detected by 764 // multiple txns concurrently. 765 var pushed bool 766 for i := 0; i < len(reqs); i++ { 767 ret := <-retCh 768 if ret.pErr != nil { 769 if !testutils.IsPError(ret.pErr, context.Canceled.Error()) { 770 require.Regexp(t, `TransactionAbortedError\(ABORT_REASON_PUSHER_ABORTED\)`, ret.pErr) 771 } 772 } else { 773 pushed = true 774 require.NotNil(t, ret.resp) 775 require.Equal(t, roachpb.ABORTED, ret.resp.PusheeTxn.Status) 776 777 // Cancel the pushers' context after the deadlock is initially broken. 778 cancel() 779 } 780 } 781 require.True(t, pushed) 782 require.GreaterOrEqual(t, m.DeadlocksTotal.Count(), int64(1)) 783 } 784 785 // TestTxnWaitQueueDependencyCycleWithPriorityInversion verifies that 786 // priority inversions between two dependent transactions are noticed 787 // and the dependency is appropriately broken. 788 func TestTxnWaitQueueDependencyCycleWithPriorityInversion(t *testing.T) { 789 defer leaktest.AfterTest(t)() 790 tc := testContext{} 791 stopper := stop.NewStopper() 792 defer stopper.Stop(context.Background()) 793 tc.Start(t, stopper) 794 795 // Create txnA with a lower priority so it won't think it could push 796 // txnB without updating its priority. 797 txnA := newTransaction("txn", roachpb.Key("a"), -1, tc.Clock()) 798 // However, write an "updated" txnA with higher priority, which it 799 // will need to read via a QueryTxn request in order to realize it 800 // can in fact break the deadlock. 801 updatedTxnA := *txnA 802 updatedTxnA.Priority = 3 803 if err := writeTxnRecord(context.Background(), &tc, &updatedTxnA); err != nil { 804 t.Fatal(err) 805 } 806 // Create txnB with priority=2, so txnA won't think it can push, but 807 // when we set up txnB as the pusher, the request will include txnA's 808 // updated priority, making txnB think it can't break a deadlock. 809 txnB := newTransaction("txn", roachpb.Key("a"), -2, tc.Clock()) 810 if err := writeTxnRecord(context.Background(), &tc, txnB); err != nil { 811 t.Fatal(err) 812 } 813 814 reqA := &roachpb.PushTxnRequest{ 815 RequestHeader: roachpb.RequestHeader{ 816 Key: txnB.Key, 817 }, 818 PushType: roachpb.PUSH_ABORT, 819 PusherTxn: *txnA, 820 PusheeTxn: txnB.TxnMeta, 821 } 822 reqB := &roachpb.PushTxnRequest{ 823 RequestHeader: roachpb.RequestHeader{ 824 Key: txnA.Key, 825 }, 826 PushType: roachpb.PUSH_ABORT, 827 PusherTxn: *txnB, 828 PusheeTxn: updatedTxnA.TxnMeta, 829 } 830 831 q := tc.repl.concMgr.TxnWaitQueue() 832 q.Enable() 833 834 for _, txn := range []*roachpb.Transaction{txnA, txnB} { 835 q.EnqueueTxn(txn) 836 } 837 m := tc.store.txnWaitMetrics 838 assert.EqualValues(tc, 0, m.DeadlocksTotal.Count()) 839 840 reqs := []*roachpb.PushTxnRequest{reqA, reqB} 841 retCh := make(chan ReqWithRespAndErr, len(reqs)) 842 for _, req := range reqs { 843 go func(req *roachpb.PushTxnRequest) { 844 resp, pErr := q.MaybeWaitForPush(context.Background(), req) 845 retCh <- ReqWithRespAndErr{req, resp, pErr} 846 }(req) 847 } 848 849 // Wait for the requests to finish. reqA should break the dependency 850 // cycle by force pushing. reqB should notice that it was aborted. 851 for i := 0; i < len(reqs); i++ { 852 ret := <-retCh 853 switch ret.req { 854 case reqA: 855 require.Nil(t, ret.pErr) 856 require.NotNil(t, ret.resp) 857 require.Equal(t, txnB.ID, ret.resp.PusheeTxn.ID) 858 require.Equal(t, roachpb.ABORTED, ret.resp.PusheeTxn.Status) 859 case reqB: 860 require.Regexp(t, `TransactionAbortedError\(ABORT_REASON_PUSHER_ABORTED\)`, ret.pErr) 861 default: 862 t.Fatal("unexpected") 863 } 864 } 865 require.EqualValues(t, 1, m.DeadlocksTotal.Count()) 866 }