github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/kv/kvserver/txnrecovery/manager_test.go (about)

     1  // Copyright 2019 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package txnrecovery
    12  
    13  import (
    14  	"context"
    15  	"testing"
    16  	"time"
    17  
    18  	"github.com/cockroachdb/cockroach/pkg/kv"
    19  	"github.com/cockroachdb/cockroach/pkg/roachpb"
    20  	"github.com/cockroachdb/cockroach/pkg/testutils"
    21  	"github.com/cockroachdb/cockroach/pkg/util/hlc"
    22  	"github.com/cockroachdb/cockroach/pkg/util/leaktest"
    23  	"github.com/cockroachdb/cockroach/pkg/util/log"
    24  	"github.com/cockroachdb/cockroach/pkg/util/stop"
    25  	"github.com/cockroachdb/cockroach/pkg/util/tracing"
    26  	"github.com/stretchr/testify/assert"
    27  )
    28  
    29  func makeManager(s *kv.Sender) (Manager, *hlc.Clock, *stop.Stopper) {
    30  	ac := log.AmbientContext{Tracer: tracing.NewTracer()}
    31  	clock := hlc.NewClock(hlc.UnixNano, time.Nanosecond)
    32  	stopper := stop.NewStopper()
    33  	db := kv.NewDB(ac, kv.NonTransactionalFactoryFunc(func(
    34  		ctx context.Context, ba roachpb.BatchRequest,
    35  	) (*roachpb.BatchResponse, *roachpb.Error) {
    36  		return (*s).Send(ctx, ba)
    37  	}), clock)
    38  	return NewManager(ac, clock, db, stopper), clock, stopper
    39  }
    40  
    41  func makeStagingTransaction(clock *hlc.Clock) roachpb.Transaction {
    42  	now := clock.Now()
    43  	offset := clock.MaxOffset().Nanoseconds()
    44  	txn := roachpb.MakeTransaction("test", roachpb.Key("a"), 0, now, offset)
    45  	txn.Status = roachpb.STAGING
    46  	return txn
    47  }
    48  
    49  type metricVals struct {
    50  	attemptsPending      int64
    51  	attempts             int64
    52  	successesAsCommitted int64
    53  	successesAsAborted   int64
    54  	successesAsPending   int64
    55  	failures             int64
    56  }
    57  
    58  func (v metricVals) merge(o metricVals) metricVals {
    59  	v.attemptsPending += o.attemptsPending
    60  	v.attempts += o.attempts
    61  	v.successesAsCommitted += o.successesAsCommitted
    62  	v.successesAsAborted += o.successesAsAborted
    63  	v.successesAsPending += o.successesAsPending
    64  	v.failures += o.failures
    65  	return v
    66  }
    67  
    68  func assertMetrics(t *testing.T, m Manager, v metricVals) {
    69  	assert.Equal(t, v.attemptsPending, m.Metrics().AttemptsPending.Value())
    70  	assert.Equal(t, v.attempts, m.Metrics().Attempts.Count())
    71  	assert.Equal(t, v.successesAsCommitted, m.Metrics().SuccessesAsCommitted.Count())
    72  	assert.Equal(t, v.successesAsAborted, m.Metrics().SuccessesAsAborted.Count())
    73  	assert.Equal(t, v.successesAsPending, m.Metrics().SuccessesAsPending.Count())
    74  	assert.Equal(t, v.failures, m.Metrics().Failures.Count())
    75  }
    76  
    77  // TestResolveIndeterminateCommit tests successful indeterminate commit
    78  // resolution attempts. It tests the case where an intent is prevented
    79  // and the case where an intent is not prevented.
    80  func TestResolveIndeterminateCommit(t *testing.T) {
    81  	defer leaktest.AfterTest(t)()
    82  
    83  	testutils.RunTrueAndFalse(t, "prevent", func(t *testing.T, prevent bool) {
    84  		var mockSender kv.Sender
    85  		m, clock, stopper := makeManager(&mockSender)
    86  		defer stopper.Stop(context.Background())
    87  
    88  		txn := makeStagingTransaction(clock)
    89  		txn.InFlightWrites = []roachpb.SequencedWrite{
    90  			{Key: roachpb.Key("a"), Sequence: 1},
    91  			{Key: roachpb.Key("b"), Sequence: 2},
    92  		}
    93  
    94  		mockSender = kv.SenderFunc(func(
    95  			_ context.Context, ba roachpb.BatchRequest,
    96  		) (*roachpb.BatchResponse, *roachpb.Error) {
    97  			// Probing Phase.
    98  			assertMetrics(t, m, metricVals{attemptsPending: 1, attempts: 1})
    99  
   100  			assert.Equal(t, 3, len(ba.Requests))
   101  			assert.IsType(t, &roachpb.QueryTxnRequest{}, ba.Requests[0].GetInner())
   102  			assert.IsType(t, &roachpb.QueryIntentRequest{}, ba.Requests[1].GetInner())
   103  			assert.IsType(t, &roachpb.QueryIntentRequest{}, ba.Requests[2].GetInner())
   104  
   105  			assert.Equal(t, roachpb.Key(txn.Key), ba.Requests[0].GetInner().Header().Key)
   106  			assert.Equal(t, roachpb.Key("a"), ba.Requests[1].GetInner().Header().Key)
   107  			assert.Equal(t, roachpb.Key("b"), ba.Requests[2].GetInner().Header().Key)
   108  
   109  			br := ba.CreateReply()
   110  			br.Responses[0].GetInner().(*roachpb.QueryTxnResponse).QueriedTxn = txn
   111  			br.Responses[1].GetInner().(*roachpb.QueryIntentResponse).FoundIntent = true
   112  			br.Responses[2].GetInner().(*roachpb.QueryIntentResponse).FoundIntent = !prevent
   113  
   114  			mockSender = kv.SenderFunc(func(
   115  				_ context.Context, ba roachpb.BatchRequest,
   116  			) (*roachpb.BatchResponse, *roachpb.Error) {
   117  				// Recovery Phase.
   118  				assertMetrics(t, m, metricVals{attemptsPending: 1, attempts: 1})
   119  
   120  				assert.Equal(t, 1, len(ba.Requests))
   121  				assert.IsType(t, &roachpb.RecoverTxnRequest{}, ba.Requests[0].GetInner())
   122  
   123  				recTxnReq := ba.Requests[0].GetInner().(*roachpb.RecoverTxnRequest)
   124  				assert.Equal(t, roachpb.Key(txn.Key), recTxnReq.Key)
   125  				assert.Equal(t, txn.TxnMeta, recTxnReq.Txn)
   126  				assert.Equal(t, !prevent, recTxnReq.ImplicitlyCommitted)
   127  
   128  				br2 := ba.CreateReply()
   129  				recTxnResp := br2.Responses[0].GetInner().(*roachpb.RecoverTxnResponse)
   130  				recTxnResp.RecoveredTxn = txn
   131  				if !prevent {
   132  					recTxnResp.RecoveredTxn.Status = roachpb.COMMITTED
   133  				} else {
   134  					recTxnResp.RecoveredTxn.Status = roachpb.ABORTED
   135  				}
   136  				return br2, nil
   137  			})
   138  			return br, nil
   139  		})
   140  
   141  		assertMetrics(t, m, metricVals{})
   142  		iceErr := roachpb.NewIndeterminateCommitError(txn)
   143  		resTxn, err := m.ResolveIndeterminateCommit(context.Background(), iceErr)
   144  		assert.NotNil(t, resTxn)
   145  		assert.Nil(t, err)
   146  
   147  		if !prevent {
   148  			assert.Equal(t, roachpb.COMMITTED, resTxn.Status)
   149  			assertMetrics(t, m, metricVals{attempts: 1, successesAsCommitted: 1})
   150  		} else {
   151  			assert.Equal(t, roachpb.ABORTED, resTxn.Status)
   152  			assertMetrics(t, m, metricVals{attempts: 1, successesAsAborted: 1})
   153  		}
   154  	})
   155  }
   156  
   157  // TestResolveIndeterminateCommitTxnChanges tests indeterminate commit
   158  // resolution attempts where the transaction record being recovered changes in
   159  // the middle of the process, either due to an active transaction coordinator or
   160  // due to a concurrent recovery.
   161  func TestResolveIndeterminateCommitTxnChanges(t *testing.T) {
   162  	defer leaktest.AfterTest(t)()
   163  
   164  	var mockSender kv.Sender
   165  	m, clock, stopper := makeManager(&mockSender)
   166  	defer stopper.Stop(context.Background())
   167  
   168  	txn := makeStagingTransaction(clock)
   169  	txn.InFlightWrites = []roachpb.SequencedWrite{
   170  		{Key: roachpb.Key("a"), Sequence: 1},
   171  		{Key: roachpb.Key("b"), Sequence: 2},
   172  	}
   173  
   174  	// Maintain an expected aggregation of metric updates.
   175  	var expMetrics metricVals
   176  	assertMetrics(t, m, expMetrics)
   177  
   178  	testCases := []struct {
   179  		name          string
   180  		duringProbing bool
   181  		changedTxn    roachpb.Transaction
   182  		metricImpact  metricVals
   183  	}{
   184  		{
   185  			name:          "transaction commit during probe",
   186  			duringProbing: true,
   187  			changedTxn: func() roachpb.Transaction {
   188  				txnCopy := txn
   189  				txnCopy.Status = roachpb.COMMITTED
   190  				txnCopy.InFlightWrites = nil
   191  				return txnCopy
   192  			}(),
   193  			metricImpact: metricVals{attempts: 1, successesAsCommitted: 1},
   194  		},
   195  		{
   196  			name:          "transaction abort during probe",
   197  			duringProbing: true,
   198  			changedTxn: func() roachpb.Transaction {
   199  				txnCopy := txn
   200  				txnCopy.Status = roachpb.ABORTED
   201  				txnCopy.InFlightWrites = nil
   202  				return txnCopy
   203  			}(),
   204  			metricImpact: metricVals{attempts: 1, successesAsAborted: 1},
   205  		},
   206  		{
   207  			name:          "transaction restart during probe",
   208  			duringProbing: true,
   209  			changedTxn: func() roachpb.Transaction {
   210  				txnCopy := txn
   211  				txnCopy.BumpEpoch()
   212  				return txnCopy
   213  			}(),
   214  			metricImpact: metricVals{attempts: 1, successesAsPending: 1},
   215  		},
   216  		{
   217  			name:          "transaction timestamp increase during probe",
   218  			duringProbing: true,
   219  			changedTxn: func() roachpb.Transaction {
   220  				txnCopy := txn
   221  				txnCopy.WriteTimestamp = txnCopy.WriteTimestamp.Add(1, 0)
   222  				return txnCopy
   223  			}(),
   224  			metricImpact: metricVals{attempts: 1, successesAsPending: 1},
   225  		},
   226  		{
   227  			name:          "transaction commit during recovery",
   228  			duringProbing: false,
   229  			changedTxn: func() roachpb.Transaction {
   230  				txnCopy := txn
   231  				txnCopy.Status = roachpb.COMMITTED
   232  				txnCopy.InFlightWrites = nil
   233  				return txnCopy
   234  			}(),
   235  			metricImpact: metricVals{attempts: 1, successesAsCommitted: 1},
   236  		},
   237  		{
   238  			name:          "transaction abort during recovery",
   239  			duringProbing: false,
   240  			changedTxn: func() roachpb.Transaction {
   241  				txnCopy := txn
   242  				txnCopy.Status = roachpb.ABORTED
   243  				txnCopy.InFlightWrites = nil
   244  				return txnCopy
   245  			}(),
   246  			metricImpact: metricVals{attempts: 1, successesAsAborted: 1},
   247  		},
   248  		{
   249  			name:          "transaction restart during recovery",
   250  			duringProbing: false,
   251  			changedTxn: func() roachpb.Transaction {
   252  				txnCopy := txn
   253  				txnCopy.BumpEpoch()
   254  				return txnCopy
   255  			}(),
   256  			metricImpact: metricVals{attempts: 1, successesAsPending: 1},
   257  		},
   258  		{
   259  			name:          "transaction timestamp increase during recovery",
   260  			duringProbing: false,
   261  			changedTxn: func() roachpb.Transaction {
   262  				txnCopy := txn
   263  				txnCopy.WriteTimestamp = txnCopy.WriteTimestamp.Add(1, 0)
   264  				return txnCopy
   265  			}(),
   266  			metricImpact: metricVals{attempts: 1, successesAsPending: 1},
   267  		},
   268  	}
   269  	for _, c := range testCases {
   270  		t.Run(c.name, func(t *testing.T) {
   271  			mockSender = kv.SenderFunc(func(
   272  				_ context.Context, ba roachpb.BatchRequest,
   273  			) (*roachpb.BatchResponse, *roachpb.Error) {
   274  				// Probing Phase.
   275  				assertMetrics(t, m, expMetrics.merge(metricVals{attemptsPending: 1, attempts: 1}))
   276  
   277  				assert.Equal(t, 3, len(ba.Requests))
   278  				assert.IsType(t, &roachpb.QueryTxnRequest{}, ba.Requests[0].GetInner())
   279  				assert.IsType(t, &roachpb.QueryIntentRequest{}, ba.Requests[1].GetInner())
   280  				assert.IsType(t, &roachpb.QueryIntentRequest{}, ba.Requests[2].GetInner())
   281  
   282  				assert.Equal(t, roachpb.Key(txn.Key), ba.Requests[0].GetInner().Header().Key)
   283  				assert.Equal(t, roachpb.Key("a"), ba.Requests[1].GetInner().Header().Key)
   284  				assert.Equal(t, roachpb.Key("b"), ba.Requests[2].GetInner().Header().Key)
   285  
   286  				br := ba.CreateReply()
   287  				if c.duringProbing {
   288  					br.Responses[0].GetInner().(*roachpb.QueryTxnResponse).QueriedTxn = c.changedTxn
   289  				} else {
   290  					br.Responses[0].GetInner().(*roachpb.QueryTxnResponse).QueriedTxn = txn
   291  				}
   292  				br.Responses[1].GetInner().(*roachpb.QueryIntentResponse).FoundIntent = true
   293  				br.Responses[2].GetInner().(*roachpb.QueryIntentResponse).FoundIntent = false
   294  
   295  				mockSender = kv.SenderFunc(func(
   296  					_ context.Context, ba roachpb.BatchRequest,
   297  				) (*roachpb.BatchResponse, *roachpb.Error) {
   298  					// Recovery Phase.
   299  					assert.False(t, c.duringProbing, "the recovery phase should not be run")
   300  					assertMetrics(t, m, expMetrics.merge(metricVals{attemptsPending: 1, attempts: 1}))
   301  
   302  					assert.Equal(t, 1, len(ba.Requests))
   303  					assert.IsType(t, &roachpb.RecoverTxnRequest{}, ba.Requests[0].GetInner())
   304  
   305  					recTxnReq := ba.Requests[0].GetInner().(*roachpb.RecoverTxnRequest)
   306  					assert.Equal(t, roachpb.Key(txn.Key), recTxnReq.Key)
   307  					assert.Equal(t, txn.TxnMeta, recTxnReq.Txn)
   308  					assert.Equal(t, false, recTxnReq.ImplicitlyCommitted)
   309  
   310  					br2 := ba.CreateReply()
   311  					br2.Responses[0].GetInner().(*roachpb.RecoverTxnResponse).RecoveredTxn = c.changedTxn
   312  					return br2, nil
   313  				})
   314  				return br, nil
   315  			})
   316  
   317  			iceErr := roachpb.NewIndeterminateCommitError(txn)
   318  			resTxn, err := m.ResolveIndeterminateCommit(context.Background(), iceErr)
   319  			assert.NotNil(t, resTxn)
   320  			assert.Equal(t, c.changedTxn, *resTxn)
   321  			assert.Nil(t, err)
   322  
   323  			expMetrics = expMetrics.merge(c.metricImpact)
   324  			assertMetrics(t, m, expMetrics)
   325  		})
   326  	}
   327  }
   328  
   329  // TestResolveIndeterminateCommitTxnWithoutInFlightWrites tests that an
   330  // indeterminate commit resolution attempt skips the probing phase entirely
   331  // when a STAGING transaction has no in-flight writes. This shouldn't happen
   332  // in practice because a transaction will move straight to being explicitly
   333  // committed if it doesn't have any concurrent writes at the time that it
   334  // is committing, but it is handled correctly nonetheless.
   335  func TestResolveIndeterminateCommitTxnWithoutInFlightWrites(t *testing.T) {
   336  	defer leaktest.AfterTest(t)()
   337  
   338  	var mockSender kv.Sender
   339  	m, clock, stopper := makeManager(&mockSender)
   340  	defer stopper.Stop(context.Background())
   341  
   342  	// Create STAGING txn without any in-flight writes.
   343  	txn := makeStagingTransaction(clock)
   344  
   345  	mockSender = kv.SenderFunc(func(
   346  		_ context.Context, ba roachpb.BatchRequest,
   347  	) (*roachpb.BatchResponse, *roachpb.Error) {
   348  		// Recovery Phase. Probing phase skipped.
   349  		assert.Equal(t, 1, len(ba.Requests))
   350  		assert.IsType(t, &roachpb.RecoverTxnRequest{}, ba.Requests[0].GetInner())
   351  
   352  		recTxnReq := ba.Requests[0].GetInner().(*roachpb.RecoverTxnRequest)
   353  		assert.Equal(t, roachpb.Key(txn.Key), recTxnReq.Key)
   354  		assert.Equal(t, txn.TxnMeta, recTxnReq.Txn)
   355  		assert.Equal(t, true, recTxnReq.ImplicitlyCommitted)
   356  
   357  		br := ba.CreateReply()
   358  		recTxnResp := br.Responses[0].GetInner().(*roachpb.RecoverTxnResponse)
   359  		recTxnResp.RecoveredTxn = txn
   360  		recTxnResp.RecoveredTxn.Status = roachpb.COMMITTED
   361  		return br, nil
   362  	})
   363  
   364  	iceErr := roachpb.NewIndeterminateCommitError(txn)
   365  	resTxn, err := m.ResolveIndeterminateCommit(context.Background(), iceErr)
   366  	assert.NotNil(t, resTxn)
   367  	assert.Equal(t, roachpb.COMMITTED, resTxn.Status)
   368  	assert.Nil(t, err)
   369  }