github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/kv/kvclient/kvcoord/txn_coord_sender_test.go

github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/kv/kvclient/kvcoord/txn_coord_sender_test.go (about)

     1  // Copyright 2014 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package kvcoord
    12  
    13  import (
    14  	"bytes"
    15  	"context"
    16  	"fmt"
    17  	"reflect"
    18  	"strconv"
    19  	"sync/atomic"
    20  	"testing"
    21  	"time"
    22  
    23  	"github.com/cockroachdb/cockroach/pkg/kv"
    24  	"github.com/cockroachdb/cockroach/pkg/kv/kvserver"
    25  	"github.com/cockroachdb/cockroach/pkg/roachpb"
    26  	"github.com/cockroachdb/cockroach/pkg/settings/cluster"
    27  	"github.com/cockroachdb/cockroach/pkg/storage"
    28  	"github.com/cockroachdb/cockroach/pkg/storage/enginepb"
    29  	"github.com/cockroachdb/cockroach/pkg/testutils"
    30  	"github.com/cockroachdb/cockroach/pkg/testutils/localtestcluster"
    31  	"github.com/cockroachdb/cockroach/pkg/util/hlc"
    32  	"github.com/cockroachdb/cockroach/pkg/util/leaktest"
    33  	"github.com/cockroachdb/cockroach/pkg/util/log"
    34  	"github.com/cockroachdb/cockroach/pkg/util/metric"
    35  	"github.com/cockroachdb/cockroach/pkg/util/stop"
    36  	"github.com/cockroachdb/cockroach/pkg/util/tracing"
    37  	"github.com/cockroachdb/errors"
    38  	"github.com/stretchr/testify/require"
    39  	"golang.org/x/sync/errgroup"
    40  )
    41  
    42  func strToValue(s string) *roachpb.Value {
    43  	v := roachpb.MakeValueFromBytes([]byte(s))
    44  	return &v
    45  }
    46  
    47  // createTestDB creates a local test server and starts it. The caller
    48  // is responsible for stopping the test server.
    49  func createTestDB(t testing.TB) *localtestcluster.LocalTestCluster {
    50  	return createTestDBWithContextAndKnobs(t, kv.DefaultDBContext(), nil)
    51  }
    52  
    53  func createTestDBWithContextAndKnobs(
    54  	t testing.TB, dbCtx kv.DBContext, knobs *kvserver.StoreTestingKnobs,
    55  ) *localtestcluster.LocalTestCluster {
    56  	s := &localtestcluster.LocalTestCluster{
    57  		DBContext:         &dbCtx,
    58  		StoreTestingKnobs: knobs,
    59  	}
    60  	s.Start(t, testutils.NewNodeTestBaseContext(), InitFactoryForLocalTestCluster)
    61  	return s
    62  }
    63  
    64  // makeTS creates a new timestamp.
    65  func makeTS(walltime int64, logical int32) hlc.Timestamp {
    66  	return hlc.Timestamp{
    67  		WallTime: walltime,
    68  		Logical:  logical,
    69  	}
    70  }
    71  
    72  // TestTxnCoordSenderBeginTransaction verifies that a command sent with a
    73  // not-nil Txn with empty ID gets a new transaction initialized.
    74  func TestTxnCoordSenderBeginTransaction(t *testing.T) {
    75  	defer leaktest.AfterTest(t)()
    76  	s := createTestDB(t)
    77  	defer s.Stop()
    78  	ctx := context.Background()
    79  
    80  	txn := kv.NewTxn(ctx, s.DB, 0 /* gatewayNodeID */)
    81  
    82  	// Put request will create a new transaction.
    83  	key := roachpb.Key("key")
    84  	txn.TestingSetPriority(10)
    85  	txn.SetDebugName("test txn")
    86  	if err := txn.Put(ctx, key, []byte("value")); err != nil {
    87  		t.Fatal(err)
    88  	}
    89  	proto := txn.TestingCloneTxn()
    90  	if proto.Name != "test txn" {
    91  		t.Errorf("expected txn name to be %q; got %q", "test txn", proto.Name)
    92  	}
    93  	if proto.Priority != 10 {
    94  		t.Errorf("expected txn priority 10; got %d", proto.Priority)
    95  	}
    96  	if !bytes.Equal(proto.Key, key) {
    97  		t.Errorf("expected txn Key to match %q != %q", key, proto.Key)
    98  	}
    99  }
   100  
   101  // TestTxnCoordSenderKeyRanges verifies that multiple requests to same or
   102  // overlapping key ranges causes the coordinator to keep track only of
   103  // the minimum number of ranges.
   104  func TestTxnCoordSenderKeyRanges(t *testing.T) {
   105  	defer leaktest.AfterTest(t)()
   106  
   107  	ctx := context.Background()
   108  	ranges := []struct {
   109  		start, end roachpb.Key
   110  	}{
   111  		{roachpb.Key("a"), roachpb.Key(nil)},
   112  		{roachpb.Key("a"), roachpb.Key(nil)},
   113  		{roachpb.Key("aa"), roachpb.Key(nil)},
   114  		{roachpb.Key("b"), roachpb.Key(nil)},
   115  		{roachpb.Key("aa"), roachpb.Key("c")},
   116  		{roachpb.Key("b"), roachpb.Key("c")},
   117  	}
   118  
   119  	s := createTestDB(t)
   120  	defer s.Stop()
   121  
   122  	txn := kv.NewTxn(ctx, s.DB, 0 /* gatewayNodeID */)
   123  	// Disable txn pipelining so that all write spans are immediately
   124  	// added to the transaction's lock footprint.
   125  	if err := txn.DisablePipelining(); err != nil {
   126  		t.Fatal(err)
   127  	}
   128  	tc := txn.Sender().(*TxnCoordSender)
   129  
   130  	for _, rng := range ranges {
   131  		if rng.end != nil {
   132  			if err := txn.DelRange(ctx, rng.start, rng.end); err != nil {
   133  				t.Fatal(err)
   134  			}
   135  		} else {
   136  			if err := txn.Put(ctx, rng.start, []byte("value")); err != nil {
   137  				t.Fatal(err)
   138  			}
   139  		}
   140  	}
   141  
   142  	// Verify that the transaction coordinator is only tracking two lock
   143  	// spans. "a" and range "aa"-"c".
   144  	tc.interceptorAlloc.txnPipeliner.lockFootprint.mergeAndSort()
   145  	lockSpans := tc.interceptorAlloc.txnPipeliner.lockFootprint.asSlice()
   146  	if len(lockSpans) != 2 {
   147  		t.Errorf("expected 2 entries in keys range group; got %v", lockSpans)
   148  	}
   149  }
   150  
   151  // TestTxnCoordSenderCondenseLockSpans verifies that lock spans are condensed
   152  // along range boundaries when they exceed the maximum intent bytes threshold.
   153  func TestTxnCoordSenderCondenseLockSpans(t *testing.T) {
   154  	defer leaktest.AfterTest(t)()
   155  	a := roachpb.Span{Key: roachpb.Key("a"), EndKey: roachpb.Key(nil)}
   156  	b := roachpb.Span{Key: roachpb.Key("b"), EndKey: roachpb.Key(nil)}
   157  	c := roachpb.Span{Key: roachpb.Key("c"), EndKey: roachpb.Key(nil)}
   158  	d := roachpb.Span{Key: roachpb.Key("dddddd"), EndKey: roachpb.Key(nil)}
   159  	e := roachpb.Span{Key: roachpb.Key("e"), EndKey: roachpb.Key(nil)}
   160  	aToBClosed := roachpb.Span{Key: roachpb.Key("a"), EndKey: roachpb.Key("b").Next()}
   161  	cToEClosed := roachpb.Span{Key: roachpb.Key("c"), EndKey: roachpb.Key("e").Next()}
   162  	fTof0 := roachpb.Span{Key: roachpb.Key("f"), EndKey: roachpb.Key("f0")}
   163  	g := roachpb.Span{Key: roachpb.Key("g"), EndKey: roachpb.Key(nil)}
   164  	g0Tog1 := roachpb.Span{Key: roachpb.Key("g0"), EndKey: roachpb.Key("g1")}
   165  	fTog1Closed := roachpb.Span{Key: roachpb.Key("f"), EndKey: roachpb.Key("g1")}
   166  	testCases := []struct {
   167  		span         roachpb.Span
   168  		expLocks     []roachpb.Span
   169  		expLocksSize int64
   170  	}{
   171  		{span: a, expLocks: []roachpb.Span{a}, expLocksSize: 1},
   172  		{span: b, expLocks: []roachpb.Span{a, b}, expLocksSize: 2},
   173  		{span: c, expLocks: []roachpb.Span{a, b, c}, expLocksSize: 3},
   174  		{span: d, expLocks: []roachpb.Span{a, b, c, d}, expLocksSize: 9},
   175  		// Note that c-e condenses and then lists first.
   176  		{span: e, expLocks: []roachpb.Span{cToEClosed, a, b}, expLocksSize: 5},
   177  		{span: fTof0, expLocks: []roachpb.Span{cToEClosed, a, b, fTof0}, expLocksSize: 8},
   178  		{span: g, expLocks: []roachpb.Span{cToEClosed, a, b, fTof0, g}, expLocksSize: 9},
   179  		{span: g0Tog1, expLocks: []roachpb.Span{fTog1Closed, cToEClosed, aToBClosed}, expLocksSize: 9},
   180  		// Add a key in the middle of a span, which will get merged on commit.
   181  		{span: c, expLocks: []roachpb.Span{aToBClosed, cToEClosed, fTog1Closed}, expLocksSize: 9},
   182  	}
   183  	splits := []roachpb.Span{
   184  		{Key: roachpb.Key("a"), EndKey: roachpb.Key("c")},
   185  		{Key: roachpb.Key("c"), EndKey: roachpb.Key("f")},
   186  		{Key: roachpb.Key("f"), EndKey: roachpb.Key("j")},
   187  	}
   188  	descs := []roachpb.RangeDescriptor{testMetaRangeDescriptor}
   189  	for i, s := range splits {
   190  		descs = append(descs, roachpb.RangeDescriptor{
   191  			RangeID:          roachpb.RangeID(2 + i),
   192  			StartKey:         roachpb.RKey(s.Key),
   193  			EndKey:           roachpb.RKey(s.EndKey),
   194  			InternalReplicas: []roachpb.ReplicaDescriptor{{NodeID: 1, StoreID: 1}},
   195  		})
   196  	}
   197  	descDB := mockRangeDescriptorDBForDescs(descs...)
   198  	s := createTestDB(t)
   199  	st := s.Store.ClusterSettings()
   200  	trackedWritesMaxSize.Override(&st.SV, 10) /* 10 bytes and it will condense */
   201  	defer s.Stop()
   202  
   203  	// Check end transaction locks, which should be condensed and split
   204  	// at range boundaries.
   205  	expLocks := []roachpb.Span{aToBClosed, cToEClosed, fTog1Closed}
   206  	var sendFn simpleSendFn = func(
   207  		_ context.Context, _ SendOptions, _ ReplicaSlice, args roachpb.BatchRequest,
   208  	) (*roachpb.BatchResponse, error) {
   209  		resp := args.CreateReply()
   210  		resp.Txn = args.Txn
   211  		if req, ok := args.GetArg(roachpb.EndTxn); ok {
   212  			if !req.(*roachpb.EndTxnRequest).Commit {
   213  				t.Errorf("expected commit to be true")
   214  			}
   215  			et := req.(*roachpb.EndTxnRequest)
   216  			if a, e := et.LockSpans, expLocks; !reflect.DeepEqual(a, e) {
   217  				t.Errorf("expected end transaction to have locks %+v; got %+v", e, a)
   218  			}
   219  			resp.Txn.Status = roachpb.COMMITTED
   220  		}
   221  		return resp, nil
   222  	}
   223  	ambient := log.AmbientContext{Tracer: tracing.NewTracer()}
   224  	ds := NewDistSender(
   225  		DistSenderConfig{
   226  			AmbientCtx: ambient,
   227  			Clock:      s.Clock,
   228  			RPCContext: s.Cfg.RPCContext,
   229  			TestingKnobs: ClientTestingKnobs{
   230  				TransportFactory: adaptSimpleTransport(sendFn),
   231  			},
   232  			RangeDescriptorDB: descDB,
   233  			Settings:          cluster.MakeTestingClusterSettings(),
   234  		},
   235  		s.Gossip,
   236  	)
   237  	tsf := NewTxnCoordSenderFactory(
   238  		TxnCoordSenderFactoryConfig{
   239  			AmbientCtx: ambient,
   240  			Settings:   st,
   241  			Clock:      s.Clock,
   242  			Stopper:    s.Stopper,
   243  		},
   244  		ds,
   245  	)
   246  	db := kv.NewDB(ambient, tsf, s.Clock)
   247  	ctx := context.Background()
   248  
   249  	txn := kv.NewTxn(ctx, db, 0 /* gatewayNodeID */)
   250  	// Disable txn pipelining so that all write spans are immediately
   251  	// added to the transaction's lock footprint.
   252  	if err := txn.DisablePipelining(); err != nil {
   253  		t.Fatal(err)
   254  	}
   255  	for i, tc := range testCases {
   256  		if tc.span.EndKey != nil {
   257  			if err := txn.DelRange(ctx, tc.span.Key, tc.span.EndKey); err != nil {
   258  				t.Fatal(err)
   259  			}
   260  		} else {
   261  			if err := txn.Put(ctx, tc.span.Key, []byte("value")); err != nil {
   262  				t.Fatal(err)
   263  			}
   264  		}
   265  		tcs := txn.Sender().(*TxnCoordSender)
   266  		locks := tcs.interceptorAlloc.txnPipeliner.lockFootprint.asSlice()
   267  		if a, e := locks, tc.expLocks; !reflect.DeepEqual(a, e) {
   268  			t.Errorf("%d: expected keys %+v; got %+v", i, e, a)
   269  		}
   270  		locksSize := int64(0)
   271  		for _, i := range locks {
   272  			locksSize += int64(len(i.Key) + len(i.EndKey))
   273  		}
   274  		if a, e := locksSize, tc.expLocksSize; a != e {
   275  			t.Errorf("%d: keys size expected %d; got %d", i, e, a)
   276  		}
   277  	}
   278  	if err := txn.Commit(ctx); err != nil {
   279  		t.Fatal(err)
   280  	}
   281  }
   282  
   283  // Test that the theartbeat loop detects aborted transactions and stops.
   284  func TestTxnCoordSenderHeartbeat(t *testing.T) {
   285  	defer leaktest.AfterTest(t)()
   286  	s := createTestDBWithContextAndKnobs(t, kv.DefaultDBContext(), &kvserver.StoreTestingKnobs{
   287  		DisableScanner:    true,
   288  		DisableSplitQueue: true,
   289  		DisableMergeQueue: true,
   290  	})
   291  	defer s.Stop()
   292  	ctx := context.Background()
   293  
   294  	keyA := roachpb.Key("a")
   295  	keyC := roachpb.Key("c")
   296  	splitKey := roachpb.Key("b")
   297  	if err := s.DB.AdminSplit(ctx, splitKey /* spanKey */, splitKey /* splitKey */, hlc.MaxTimestamp /* expirationTimestamp */); err != nil {
   298  		t.Fatal(err)
   299  	}
   300  
   301  	// Make a db with a short heartbeat interval.
   302  	ambient := log.AmbientContext{Tracer: tracing.NewTracer()}
   303  	tsf := NewTxnCoordSenderFactory(
   304  		TxnCoordSenderFactoryConfig{
   305  			AmbientCtx: ambient,
   306  			// Short heartbeat interval.
   307  			HeartbeatInterval: time.Millisecond,
   308  			Settings:          s.Cfg.Settings,
   309  			Clock:             s.Clock,
   310  			Stopper:           s.Stopper,
   311  		},
   312  		NewDistSenderForLocalTestCluster(
   313  			s.Cfg.Settings, &roachpb.NodeDescriptor{NodeID: 1},
   314  			ambient.Tracer, s.Clock, s.Latency, s.Stores, s.Stopper, s.Gossip,
   315  		),
   316  	)
   317  	quickHeartbeatDB := kv.NewDB(ambient, tsf, s.Clock)
   318  
   319  	// We're going to test twice. In both cases the heartbeat is supposed to
   320  	// notice that its transaction is aborted, but:
   321  	// - once the abort span is populated on the txn's range.
   322  	// - once the abort span is not populated.
   323  	// The two conditions are created by either clearing an intent from the txn's
   324  	// range or not (i.e. clearing an intent from another range).
   325  	// The difference is supposed to be immaterial for the heartbeat loop (that's
   326  	// what we're testing). As of June 2018, HeartbeatTxnRequests don't check the
   327  	// abort span.
   328  	for _, pusherKey := range []roachpb.Key{keyA, keyC} {
   329  		t.Run(fmt.Sprintf("pusher:%s", pusherKey), func(t *testing.T) {
   330  			// Make a db with a short heartbeat interval.
   331  			initialTxn := kv.NewTxn(ctx, quickHeartbeatDB, 0 /* gatewayNodeID */)
   332  			tc := initialTxn.Sender().(*TxnCoordSender)
   333  
   334  			if err := initialTxn.Put(ctx, keyA, []byte("value")); err != nil {
   335  				t.Fatal(err)
   336  			}
   337  			if err := initialTxn.Put(ctx, keyC, []byte("value")); err != nil {
   338  				t.Fatal(err)
   339  			}
   340  
   341  			// Verify 3 heartbeats.
   342  			var heartbeatTS hlc.Timestamp
   343  			for i := 0; i < 3; i++ {
   344  				testutils.SucceedsSoon(t, func() error {
   345  					txn, pErr := getTxn(ctx, initialTxn)
   346  					if pErr != nil {
   347  						t.Fatal(pErr)
   348  					}
   349  					// Advance clock by 1ns.
   350  					s.Manual.Increment(1)
   351  					if lastActive := txn.LastActive(); heartbeatTS.Less(lastActive) {
   352  						heartbeatTS = lastActive
   353  						return nil
   354  					}
   355  					return errors.Errorf("expected heartbeat")
   356  				})
   357  			}
   358  
   359  			// Push our txn with another high-priority txn.
   360  			{
   361  				if err := s.DB.Txn(ctx, func(ctx context.Context, txn *kv.Txn) error {
   362  					if err := txn.SetUserPriority(roachpb.MaxUserPriority); err != nil {
   363  						return err
   364  					}
   365  					return txn.Put(ctx, pusherKey, []byte("pusher val"))
   366  				}); err != nil {
   367  					t.Fatal(err)
   368  				}
   369  			}
   370  
   371  			// Verify that the abort is discovered and the heartbeat discontinued.
   372  			// This relies on the heartbeat loop stopping once it figures out that the txn
   373  			// has been aborted.
   374  			testutils.SucceedsSoon(t, func() error {
   375  				if tc.IsTracking() {
   376  					return fmt.Errorf("transaction is not aborted")
   377  				}
   378  				return nil
   379  			})
   380  
   381  			// Trying to do something else should give us a TransactionAbortedError.
   382  			_, err := initialTxn.Get(ctx, "a")
   383  			assertTransactionAbortedError(t, err)
   384  		})
   385  	}
   386  }
   387  
   388  // getTxn fetches the requested key and returns the transaction info.
   389  func getTxn(ctx context.Context, txn *kv.Txn) (*roachpb.Transaction, *roachpb.Error) {
   390  	txnMeta := txn.TestingCloneTxn().TxnMeta
   391  	qt := &roachpb.QueryTxnRequest{
   392  		RequestHeader: roachpb.RequestHeader{
   393  			Key: txnMeta.Key,
   394  		},
   395  		Txn: txnMeta,
   396  	}
   397  
   398  	ba := roachpb.BatchRequest{}
   399  	ba.Timestamp = txnMeta.WriteTimestamp
   400  	ba.Add(qt)
   401  
   402  	db := txn.DB()
   403  	sender := db.NonTransactionalSender()
   404  
   405  	br, pErr := sender.Send(ctx, ba)
   406  	if pErr != nil {
   407  		return nil, pErr
   408  	}
   409  	return &br.Responses[0].GetInner().(*roachpb.QueryTxnResponse).QueriedTxn, nil
   410  }
   411  
   412  func verifyCleanup(key roachpb.Key, eng storage.Engine, t *testing.T, coords ...*TxnCoordSender) {
   413  	testutils.SucceedsSoon(t, func() error {
   414  		for _, coord := range coords {
   415  			if coord.IsTracking() {
   416  				return fmt.Errorf("expected no heartbeat")
   417  			}
   418  		}
   419  		meta := &enginepb.MVCCMetadata{}
   420  		//lint:ignore SA1019 historical usage of deprecated eng.GetProto is OK
   421  		ok, _, _, err := eng.GetProto(storage.MakeMVCCMetadataKey(key), meta)
   422  		if err != nil {
   423  			return fmt.Errorf("error getting MVCC metadata: %s", err)
   424  		}
   425  		if ok && meta.Txn != nil {
   426  			return fmt.Errorf("found unexpected write intent: %s", meta)
   427  		}
   428  		return nil
   429  	})
   430  }
   431  
   432  // TestTxnCoordSenderEndTxn verifies that ending a transaction
   433  // sends resolve write intent requests.
   434  func TestTxnCoordSenderEndTxn(t *testing.T) {
   435  	defer leaktest.AfterTest(t)()
   436  	s := createTestDB(t)
   437  	defer s.Stop()
   438  	ctx := context.Background()
   439  
   440  	// 4 cases: no deadline, past deadline, equal deadline, future deadline.
   441  	for i := 0; i < 4; i++ {
   442  		key := roachpb.Key("key: " + strconv.Itoa(i))
   443  		txn := kv.NewTxn(ctx, s.DB, 0 /* gatewayNodeID */)
   444  		// Initialize the transaction.
   445  		if pErr := txn.Put(ctx, key, []byte("value")); pErr != nil {
   446  			t.Fatal(pErr)
   447  		}
   448  		// Conflicting transaction that pushes the above transaction.
   449  		conflictTxn := kv.NewTxn(ctx, s.DB, 0 /* gatewayNodeID */)
   450  		conflictTxn.TestingSetPriority(enginepb.MaxTxnPriority)
   451  		if _, pErr := conflictTxn.Get(ctx, key); pErr != nil {
   452  			t.Fatal(pErr)
   453  		}
   454  
   455  		// The transaction was pushed at least to conflictTxn's timestamp (but
   456  		// it could have been pushed more - the push takes a timestamp off the
   457  		// HLC).
   458  		pusheeTxn, pErr := getTxn(ctx, txn)
   459  		if pErr != nil {
   460  			t.Fatal(pErr)
   461  		}
   462  		pushedTimestamp := pusheeTxn.WriteTimestamp
   463  
   464  		{
   465  			var err error
   466  			switch i {
   467  			case 0:
   468  				// No deadline.
   469  
   470  			case 1:
   471  				// Past deadline.
   472  				if !txn.UpdateDeadlineMaybe(ctx, pushedTimestamp.Prev()) {
   473  					t.Fatalf("did not update deadline")
   474  				}
   475  
   476  			case 2:
   477  				// Equal deadline.
   478  				if !txn.UpdateDeadlineMaybe(ctx, pushedTimestamp) {
   479  					t.Fatalf("did not update deadline")
   480  				}
   481  
   482  			case 3:
   483  				// Future deadline.
   484  
   485  				if !txn.UpdateDeadlineMaybe(ctx, pushedTimestamp.Next()) {
   486  					t.Fatalf("did not update deadline")
   487  				}
   488  			}
   489  			err = txn.CommitOrCleanup(ctx)
   490  
   491  			switch i {
   492  			case 0:
   493  				// No deadline.
   494  				if err != nil {
   495  					t.Fatal(err)
   496  				}
   497  
   498  			case 1:
   499  				// Past deadline.
   500  				fallthrough
   501  			case 2:
   502  				// Equal deadline.
   503  				assertTransactionRetryError(t, err)
   504  				if !testutils.IsError(err, "RETRY_COMMIT_DEADLINE_EXCEEDED") {
   505  					t.Fatalf("expected deadline exceeded, got: %s", err)
   506  				}
   507  			case 3:
   508  				// Future deadline.
   509  				if err != nil {
   510  					t.Fatal(err)
   511  				}
   512  			}
   513  		}
   514  		verifyCleanup(key, s.Eng, t, txn.Sender().(*TxnCoordSender))
   515  	}
   516  }
   517  
   518  // TestTxnCoordSenderAddLockOnError verifies that locks are tracked if the
   519  // transaction is, even on error.
   520  func TestTxnCoordSenderAddLockOnError(t *testing.T) {
   521  	defer leaktest.AfterTest(t)()
   522  	s := createTestDB(t)
   523  	defer s.Stop()
   524  
   525  	ctx := context.Background()
   526  
   527  	// Create a transaction with intent at "x".
   528  	key := roachpb.Key("x")
   529  	txn := kv.NewTxn(ctx, s.DB, 0 /* gatewayNodeID */)
   530  	tc := txn.Sender().(*TxnCoordSender)
   531  
   532  	// Write so that the coordinator begins tracking this txn.
   533  	if err := txn.Put(ctx, "x", "y"); err != nil {
   534  		t.Fatal(err)
   535  	}
   536  	{
   537  		err := txn.CPut(ctx, key, []byte("x"), strToValue("born to fail"))
   538  		if !errors.HasType(err, (*roachpb.ConditionFailedError)(nil)) {
   539  			t.Fatal(err)
   540  		}
   541  	}
   542  	tc.interceptorAlloc.txnPipeliner.lockFootprint.mergeAndSort()
   543  	lockSpans := tc.interceptorAlloc.txnPipeliner.lockFootprint.asSlice()
   544  	expSpans := []roachpb.Span{{Key: key, EndKey: []byte("")}}
   545  	equal := !reflect.DeepEqual(lockSpans, expSpans)
   546  	if err := txn.Rollback(ctx); err != nil {
   547  		t.Fatal(err)
   548  	}
   549  	if !equal {
   550  		t.Fatalf("expected stored locks %v, got %v", expSpans, lockSpans)
   551  	}
   552  }
   553  
   554  func assertTransactionRetryError(t *testing.T, e error) {
   555  	t.Helper()
   556  	if retErr := (*roachpb.TransactionRetryWithProtoRefreshError)(nil); errors.As(e, &retErr) {
   557  		if !testutils.IsError(retErr, "TransactionRetryError") {
   558  			t.Fatalf("expected the cause to be TransactionRetryError, but got %s",
   559  				retErr)
   560  		}
   561  	} else {
   562  		t.Fatalf("expected a retryable error, but got %s (%T)", e, e)
   563  	}
   564  }
   565  
   566  func assertTransactionAbortedError(t *testing.T, e error) {
   567  	if retErr := (*roachpb.TransactionRetryWithProtoRefreshError)(nil); errors.As(e, &retErr) {
   568  		if !testutils.IsError(retErr, "TransactionAbortedError") {
   569  			t.Fatalf("expected the cause to be TransactionAbortedError, but got %s",
   570  				retErr)
   571  		}
   572  	} else {
   573  		t.Fatalf("expected a retryable error, but got %s (%T)", e, e)
   574  	}
   575  }
   576  
   577  // TestTxnCoordSenderCleanupOnAborted verifies that if a txn receives a
   578  // TransactionAbortedError, the coordinator cleans up the transaction.
   579  func TestTxnCoordSenderCleanupOnAborted(t *testing.T) {
   580  	defer leaktest.AfterTest(t)()
   581  	s := createTestDB(t)
   582  	defer s.Stop()
   583  	ctx := context.Background()
   584  
   585  	// Create a transaction with intent at "a".
   586  	key := roachpb.Key("a")
   587  	txn1 := kv.NewTxn(ctx, s.DB, 0 /* gatewayNodeID */)
   588  	if err := txn1.Put(ctx, key, []byte("value")); err != nil {
   589  		t.Fatal(err)
   590  	}
   591  
   592  	// Push the transaction (by writing key "a" with higher priority) to abort it.
   593  	txn2 := kv.NewTxn(ctx, s.DB, 0 /* gatewayNodeID */)
   594  	if err := txn2.SetUserPriority(roachpb.MaxUserPriority); err != nil {
   595  		t.Fatal(err)
   596  	}
   597  	if err := txn2.Put(ctx, key, []byte("value2")); err != nil {
   598  		t.Fatal(err)
   599  	}
   600  
   601  	// Now end the transaction and verify we've cleanup up, even though
   602  	// end transaction failed.
   603  	err := txn1.CommitOrCleanup(ctx)
   604  	assertTransactionAbortedError(t, err)
   605  	if err := txn2.CommitOrCleanup(ctx); err != nil {
   606  		t.Fatal(err)
   607  	}
   608  	verifyCleanup(key, s.Eng, t, txn1.Sender().(*TxnCoordSender), txn2.Sender().(*TxnCoordSender))
   609  }
   610  
   611  // TestTxnCoordSenderCleanupOnCommitAfterRestart verifies that if a txn restarts
   612  // at a higher epoch and then commits before it has acquired any locks in the new
   613  // epoch, the coordinator still cleans up the transaction. In #40466, we saw that
   614  // this case could be detected as a 1PC transaction and the cleanup during the
   615  // commit could be omitted.
   616  func TestTxnCoordSenderCleanupOnCommitAfterRestart(t *testing.T) {
   617  	defer leaktest.AfterTest(t)()
   618  	s := createTestDB(t)
   619  	defer s.Stop()
   620  	ctx := context.Background()
   621  
   622  	// Create a transaction with intent at "a".
   623  	key := roachpb.Key("a")
   624  	txn := kv.NewTxn(ctx, s.DB, 0 /* gatewayNodeID */)
   625  	if err := txn.Put(ctx, key, []byte("value")); err != nil {
   626  		t.Fatal(err)
   627  	}
   628  
   629  	// Restart the transaction with a new epoch.
   630  	txn.ManualRestart(ctx, s.Clock.Now())
   631  
   632  	// Now immediately commit.
   633  	if err := txn.CommitOrCleanup(ctx); err != nil {
   634  		t.Fatal(err)
   635  	}
   636  	verifyCleanup(key, s.Eng, t, txn.Sender().(*TxnCoordSender))
   637  }
   638  
   639  // TestTxnCoordSenderGCWithAmbiguousResultErr verifies that the coordinator
   640  // cleans up extant transactions and locks after an ambiguous result error is
   641  // observed, even if the error is on the first request.
   642  func TestTxnCoordSenderGCWithAmbiguousResultErr(t *testing.T) {
   643  	defer leaktest.AfterTest(t)()
   644  
   645  	testutils.RunTrueAndFalse(t, "errOnFirst", func(t *testing.T, errOnFirst bool) {
   646  		key := roachpb.Key("a")
   647  		are := roachpb.NewAmbiguousResultError("very ambiguous")
   648  		knobs := &kvserver.StoreTestingKnobs{
   649  			TestingResponseFilter: func(ctx context.Context, ba roachpb.BatchRequest, br *roachpb.BatchResponse) *roachpb.Error {
   650  				for _, req := range ba.Requests {
   651  					if putReq, ok := req.GetInner().(*roachpb.PutRequest); ok && putReq.Key.Equal(key) {
   652  						return roachpb.NewError(are)
   653  					}
   654  				}
   655  				return nil
   656  			},
   657  		}
   658  
   659  		s := createTestDBWithContextAndKnobs(t, kv.DefaultDBContext(), knobs)
   660  		defer s.Stop()
   661  
   662  		ctx := context.Background()
   663  		txn := kv.NewTxn(ctx, s.DB, 0 /* gatewayNodeID */)
   664  		tc := txn.Sender().(*TxnCoordSender)
   665  		if !errOnFirst {
   666  			otherKey := roachpb.Key("other")
   667  			if err := txn.Put(ctx, otherKey, []byte("value")); err != nil {
   668  				t.Fatal(err)
   669  			}
   670  		}
   671  		if err := txn.Put(ctx, key, []byte("value")); !testutils.IsError(err, "result is ambiguous") {
   672  			t.Fatalf("expected error %v, found %v", are, err)
   673  		}
   674  
   675  		if err := txn.Rollback(ctx); err != nil {
   676  			t.Fatal(err)
   677  		}
   678  
   679  		testutils.SucceedsSoon(t, func() error {
   680  			// Locking the TxnCoordSender to prevent a data race.
   681  			if tc.IsTracking() {
   682  				return errors.Errorf("expected garbage collection")
   683  			}
   684  			return nil
   685  		})
   686  
   687  		verifyCleanup(key, s.Eng, t, tc)
   688  	})
   689  }
   690  
   691  // TestTxnCoordSenderTxnUpdatedOnError verifies that errors adjust the
   692  // response transaction's timestamp and priority as appropriate.
   693  func TestTxnCoordSenderTxnUpdatedOnError(t *testing.T) {
   694  	defer leaktest.AfterTest(t)()
   695  	ctx := context.Background()
   696  	origTS := makeTS(123, 0)
   697  	plus10 := origTS.Add(10, 10)
   698  	plus20 := origTS.Add(20, 0)
   699  	testCases := []struct {
   700  		// The test's name.
   701  		name             string
   702  		pErrGen          func(txn *roachpb.Transaction) *roachpb.Error
   703  		expEpoch         enginepb.TxnEpoch
   704  		expPri           enginepb.TxnPriority
   705  		expTS, expOrigTS hlc.Timestamp
   706  		// Is set, we're expecting that the Transaction proto is re-initialized (as
   707  		// opposed to just having the epoch incremented).
   708  		expNewTransaction bool
   709  		nodeSeen          bool
   710  	}{
   711  		{
   712  			// No error, so nothing interesting either.
   713  			name:      "nil",
   714  			pErrGen:   func(_ *roachpb.Transaction) *roachpb.Error { return nil },
   715  			expEpoch:  0,
   716  			expPri:    1,
   717  			expTS:     origTS,
   718  			expOrigTS: origTS,
   719  		},
   720  		{
   721  			// On uncertainty error, new epoch begins and node is seen.
   722  			// Timestamp moves ahead of the existing write.
   723  			name: "ReadWithinUncertaintyIntervalError",
   724  			pErrGen: func(txn *roachpb.Transaction) *roachpb.Error {
   725  				const nodeID = 1
   726  				txn.UpdateObservedTimestamp(nodeID, plus10)
   727  				pErr := roachpb.NewErrorWithTxn(
   728  					roachpb.NewReadWithinUncertaintyIntervalError(
   729  						hlc.Timestamp{}, hlc.Timestamp{}, nil),
   730  					txn)
   731  				pErr.OriginNode = nodeID
   732  				return pErr
   733  			},
   734  			expEpoch:  1,
   735  			expPri:    1,
   736  			expTS:     plus10,
   737  			expOrigTS: plus10,
   738  			nodeSeen:  true,
   739  		},
   740  		{
   741  			// On abort, nothing changes but we get a new priority to use for
   742  			// the next attempt.
   743  			name: "TransactionAbortedError",
   744  			pErrGen: func(txn *roachpb.Transaction) *roachpb.Error {
   745  				txn.WriteTimestamp = plus20
   746  				txn.Priority = 10
   747  				return roachpb.NewErrorWithTxn(&roachpb.TransactionAbortedError{}, txn)
   748  			},
   749  			expNewTransaction: true,
   750  			expPri:            10,
   751  			expTS:             plus20,
   752  			expOrigTS:         plus20,
   753  		},
   754  		{
   755  			// On failed push, new epoch begins just past the pushed timestamp.
   756  			// Additionally, priority ratchets up to just below the pusher's.
   757  			name: "TransactionPushError",
   758  			pErrGen: func(txn *roachpb.Transaction) *roachpb.Error {
   759  				return roachpb.NewErrorWithTxn(&roachpb.TransactionPushError{
   760  					PusheeTxn: roachpb.Transaction{
   761  						TxnMeta: enginepb.TxnMeta{WriteTimestamp: plus10, Priority: 10},
   762  					},
   763  				}, txn)
   764  			},
   765  			expEpoch:  1,
   766  			expPri:    9,
   767  			expTS:     plus10,
   768  			expOrigTS: plus10,
   769  		},
   770  		{
   771  			// On retry, restart with new epoch, timestamp and priority.
   772  			name: "TransactionRetryError",
   773  			pErrGen: func(txn *roachpb.Transaction) *roachpb.Error {
   774  				txn.WriteTimestamp = plus10
   775  				txn.Priority = 10
   776  				return roachpb.NewErrorWithTxn(&roachpb.TransactionRetryError{}, txn)
   777  			},
   778  			expEpoch:  1,
   779  			expPri:    10,
   780  			expTS:     plus10,
   781  			expOrigTS: plus10,
   782  		},
   783  	}
   784  
   785  	for _, test := range testCases {
   786  		t.Run(test.name, func(t *testing.T) {
   787  			stopper := stop.NewStopper()
   788  
   789  			manual := hlc.NewManualClock(origTS.WallTime)
   790  			clock := hlc.NewClock(manual.UnixNano, 20*time.Nanosecond)
   791  
   792  			var senderFn kv.SenderFunc = func(
   793  				_ context.Context, ba roachpb.BatchRequest,
   794  			) (*roachpb.BatchResponse, *roachpb.Error) {
   795  				var reply *roachpb.BatchResponse
   796  				pErr := test.pErrGen(ba.Txn)
   797  				if pErr == nil {
   798  					reply = ba.CreateReply()
   799  					reply.Txn = ba.Txn
   800  				} else if txn := pErr.GetTxn(); txn != nil {
   801  					// Update the manual clock to simulate an
   802  					// error updating a local hlc clock.
   803  					manual.Set(txn.WriteTimestamp.WallTime)
   804  				}
   805  				return reply, pErr
   806  			}
   807  			ambient := log.AmbientContext{Tracer: tracing.NewTracer()}
   808  			tsf := NewTxnCoordSenderFactory(
   809  				TxnCoordSenderFactoryConfig{
   810  					AmbientCtx: ambient,
   811  					Clock:      clock,
   812  					Stopper:    stopper,
   813  				},
   814  				senderFn,
   815  			)
   816  			db := kv.NewDB(ambient, tsf, clock)
   817  			key := roachpb.Key("test-key")
   818  			now := clock.Now()
   819  			origTxnProto := roachpb.MakeTransaction(
   820  				"test txn",
   821  				key,
   822  				roachpb.UserPriority(0),
   823  				now,
   824  				clock.MaxOffset().Nanoseconds(),
   825  			)
   826  			// TODO(andrei): I've monkeyed with the priorities on this initial
   827  			// Transaction to keep the test happy from a previous version in which the
   828  			// Transaction was not initialized before use (which became insufficient
   829  			// when we started testing that TransactionAbortedError's properly
   830  			// re-initializes the proto), but this deserves cleanup. I think this test
   831  			// is strict in what updated priorities it expects and also our mechanism
   832  			// for assigning exact priorities doesn't work properly when faced with
   833  			// updates.
   834  			origTxnProto.Priority = 1
   835  			txn := kv.NewTxnFromProto(ctx, db, 0 /* gatewayNodeID */, now, kv.RootTxn, &origTxnProto)
   836  			txn.TestingSetPriority(1)
   837  
   838  			err := txn.Put(ctx, key, []byte("value"))
   839  			stopper.Stop(ctx)
   840  
   841  			if test.name != "nil" && err == nil {
   842  				t.Fatalf("expected an error")
   843  			}
   844  			proto := txn.TestingCloneTxn()
   845  			txnReset := origTxnProto.ID != proto.ID
   846  			if txnReset != test.expNewTransaction {
   847  				t.Fatalf("expected txn reset: %t and got: %t", test.expNewTransaction, txnReset)
   848  			}
   849  			if proto.Epoch != test.expEpoch {
   850  				t.Errorf("expected epoch = %d; got %d",
   851  					test.expEpoch, proto.Epoch)
   852  			}
   853  			if proto.Priority != test.expPri {
   854  				t.Errorf("expected priority = %d; got %d",
   855  					test.expPri, proto.Priority)
   856  			}
   857  			if proto.WriteTimestamp != test.expTS {
   858  				t.Errorf("expected timestamp to be %s; got %s",
   859  					test.expTS, proto.WriteTimestamp)
   860  			}
   861  			if proto.ReadTimestamp != test.expOrigTS {
   862  				t.Errorf("expected orig timestamp to be %s; got %s",
   863  					test.expOrigTS, proto.ReadTimestamp)
   864  			}
   865  			if ns := proto.ObservedTimestamps; (len(ns) != 0) != test.nodeSeen {
   866  				t.Errorf("expected nodeSeen=%t, but list of hosts is %v",
   867  					test.nodeSeen, ns)
   868  			}
   869  		})
   870  	}
   871  }
   872  
   873  // TestTxnMultipleCoord checks that multiple txn coordinators can be
   874  // used for reads by a single transaction, and their state can be combined.
   875  func TestTxnMultipleCoord(t *testing.T) {
   876  	defer leaktest.AfterTest(t)()
   877  	s := createTestDB(t)
   878  	defer s.Stop()
   879  
   880  	ctx := context.Background()
   881  	txn := kv.NewTxn(ctx, s.DB, 0 /* gatewayNodeID */)
   882  
   883  	// Start the transaction.
   884  	key := roachpb.Key("a")
   885  	if _, err := txn.Get(ctx, key); err != nil {
   886  		t.Fatal(err)
   887  	}
   888  
   889  	// New create a second, leaf coordinator.
   890  	leafInputState := txn.GetLeafTxnInputState(ctx)
   891  	txn2 := kv.NewLeafTxn(ctx, s.DB, 0 /* gatewayNodeID */, &leafInputState)
   892  
   893  	// Start the second transaction.
   894  	key2 := roachpb.Key("b")
   895  	if _, err := txn2.Get(ctx, key2); err != nil {
   896  		t.Fatal(err)
   897  	}
   898  
   899  	// Augment txn with txn2's meta & commit.
   900  	tfs, err := txn2.GetLeafTxnFinalState(ctx)
   901  	if err != nil {
   902  		t.Fatal(err)
   903  	}
   904  	if err := txn.UpdateRootWithLeafFinalState(ctx, &tfs); err != nil {
   905  		t.Fatal(err)
   906  	}
   907  
   908  	// Verify presence of both locks.
   909  	tcs := txn.Sender().(*TxnCoordSender)
   910  	refreshSpans := tcs.interceptorAlloc.txnSpanRefresher.refreshFootprint.asSlice()
   911  	require.Equal(t, []roachpb.Span{{Key: key}, {Key: key2}}, refreshSpans)
   912  
   913  	ba := txn.NewBatch()
   914  	ba.AddRawRequest(&roachpb.EndTxnRequest{Commit: true})
   915  	if err := txn.Run(ctx, ba); err != nil {
   916  		t.Fatal(err)
   917  	}
   918  }
   919  
   920  // TestTxnCoordSenderNoDuplicateLockSpans verifies that TxnCoordSender does not
   921  // generate duplicate lock spans and that it merges lock spans that have
   922  // overlapping ranges.
   923  func TestTxnCoordSenderNoDuplicateLockSpans(t *testing.T) {
   924  	defer leaktest.AfterTest(t)()
   925  	ctx := context.Background()
   926  	stopper := stop.NewStopper()
   927  	manual := hlc.NewManualClock(123)
   928  	clock := hlc.NewClock(manual.UnixNano, time.Nanosecond)
   929  
   930  	var expectedLockSpans []roachpb.Span
   931  
   932  	var senderFn kv.SenderFunc = func(_ context.Context, ba roachpb.BatchRequest) (
   933  		*roachpb.BatchResponse, *roachpb.Error) {
   934  		br := ba.CreateReply()
   935  		br.Txn = ba.Txn.Clone()
   936  		if rArgs, ok := ba.GetArg(roachpb.EndTxn); ok {
   937  			et := rArgs.(*roachpb.EndTxnRequest)
   938  			if !reflect.DeepEqual(et.LockSpans, expectedLockSpans) {
   939  				t.Errorf("Invalid lock spans: %+v; expected %+v", et.LockSpans, expectedLockSpans)
   940  			}
   941  			br.Txn.Status = roachpb.COMMITTED
   942  		}
   943  		return br, nil
   944  	}
   945  	ambient := log.AmbientContext{Tracer: tracing.NewTracer()}
   946  
   947  	factory := NewTxnCoordSenderFactory(
   948  		TxnCoordSenderFactoryConfig{
   949  			AmbientCtx: ambient,
   950  			Clock:      clock,
   951  			Stopper:    stopper,
   952  			Settings:   cluster.MakeTestingClusterSettings(),
   953  		},
   954  		senderFn,
   955  	)
   956  	defer stopper.Stop(ctx)
   957  
   958  	db := kv.NewDB(ambient, factory, clock)
   959  	txn := kv.NewTxn(ctx, db, 0 /* gatewayNodeID */)
   960  
   961  	// Acquire locks on a-b, c, u-w before the final batch.
   962  	_, pErr := txn.ReverseScanForUpdate(ctx, roachpb.Key("a"), roachpb.Key("b"), 0)
   963  	if pErr != nil {
   964  		t.Fatal(pErr)
   965  	}
   966  	pErr = txn.Put(ctx, roachpb.Key("c"), []byte("value"))
   967  	if pErr != nil {
   968  		t.Fatal(pErr)
   969  	}
   970  	pErr = txn.DelRange(ctx, roachpb.Key("u"), roachpb.Key("w"))
   971  	if pErr != nil {
   972  		t.Fatal(pErr)
   973  	}
   974  
   975  	// The final batch overwrites key c and overlaps part of the a-b and u-w ranges.
   976  	b := txn.NewBatch()
   977  	b.Put(roachpb.Key("b"), []byte("value"))
   978  	b.Put(roachpb.Key("c"), []byte("value"))
   979  	b.Put(roachpb.Key("d"), []byte("value"))
   980  	b.ReverseScanForUpdate(roachpb.Key("v"), roachpb.Key("z"))
   981  
   982  	// The expected locks are a-b, c, and u-z.
   983  	expectedLockSpans = []roachpb.Span{
   984  		{Key: roachpb.Key("a"), EndKey: roachpb.Key("b").Next()},
   985  		{Key: roachpb.Key("c"), EndKey: nil},
   986  		{Key: roachpb.Key("d"), EndKey: nil},
   987  		{Key: roachpb.Key("u"), EndKey: roachpb.Key("z")},
   988  	}
   989  
   990  	pErr = txn.CommitInBatch(ctx, b)
   991  	if pErr != nil {
   992  		t.Fatal(pErr)
   993  	}
   994  }
   995  
   996  // checkTxnMetrics verifies that the provided Sender's transaction metrics match the expected
   997  // values. This is done through a series of retries with increasing backoffs, to work around
   998  // the TxnCoordSender's asynchronous updating of metrics after a transaction ends.
   999  func checkTxnMetrics(
  1000  	t *testing.T, metrics TxnMetrics, name string, commits, commits1PC, aborts, restarts int64,
  1001  ) {
  1002  	testutils.SucceedsSoon(t, func() error {
  1003  		return checkTxnMetricsOnce(t, metrics, name, commits, commits1PC, aborts, restarts)
  1004  	})
  1005  }
  1006  
  1007  func checkTxnMetricsOnce(
  1008  	t *testing.T, metrics TxnMetrics, name string, commits, commits1PC, aborts, restarts int64,
  1009  ) error {
  1010  	testcases := []struct {
  1011  		name string
  1012  		a, e int64
  1013  	}{
  1014  		{"commits", metrics.Commits.Count(), commits},
  1015  		{"commits1PC", metrics.Commits1PC.Count(), commits1PC},
  1016  		{"aborts", metrics.Aborts.Count(), aborts},
  1017  		{"durations", metrics.Durations.TotalCount(), commits + aborts},
  1018  	}
  1019  
  1020  	for _, tc := range testcases {
  1021  		if tc.a != tc.e {
  1022  			return errors.Errorf("%s: actual %s %d != expected %d", name, tc.name, tc.a, tc.e)
  1023  		}
  1024  	}
  1025  
  1026  	// Handle restarts separately, because that's a histogram. Though the
  1027  	// histogram is approximate, we're recording so few distinct values
  1028  	// that we should be okay.
  1029  	dist := metrics.Restarts.Snapshot().Distribution()
  1030  	var actualRestarts int64
  1031  	for _, b := range dist {
  1032  		if b.From == b.To {
  1033  			actualRestarts += b.From * b.Count
  1034  		} else {
  1035  			t.Fatalf("unexpected value in histogram: %d-%d", b.From, b.To)
  1036  		}
  1037  	}
  1038  	if a, e := actualRestarts, restarts; a != e {
  1039  		return errors.Errorf("%s: actual restarts %d != expected %d", name, a, e)
  1040  	}
  1041  
  1042  	return nil
  1043  }
  1044  
  1045  // setupMetricsTest sets the txn coord sender factory's metrics to
  1046  // have a faster sample interval and returns a cleanup function to be
  1047  // executed by callers.
  1048  func setupMetricsTest(t *testing.T) (*localtestcluster.LocalTestCluster, TxnMetrics, func()) {
  1049  	dbCtx := kv.DefaultDBContext()
  1050  	s := &localtestcluster.LocalTestCluster{
  1051  		DBContext: &dbCtx,
  1052  		// Liveness heartbeat txns mess up the metrics.
  1053  		DisableLivenessHeartbeat: true,
  1054  		DontCreateSystemRanges:   true,
  1055  	}
  1056  	s.Start(t, testutils.NewNodeTestBaseContext(), InitFactoryForLocalTestCluster)
  1057  
  1058  	metrics := MakeTxnMetrics(metric.TestSampleInterval)
  1059  	s.DB.GetFactory().(*TxnCoordSenderFactory).metrics = metrics
  1060  	return s, metrics, func() {
  1061  		s.Stop()
  1062  	}
  1063  }
  1064  
  1065  // Test a normal transaction. This and the other metrics tests below use real KV operations,
  1066  // because it took far too much mucking with TxnCoordSender internals to mock out the sender
  1067  // function as other tests do.
  1068  func TestTxnCommit(t *testing.T) {
  1069  	defer leaktest.AfterTest(t)()
  1070  	s, metrics, cleanupFn := setupMetricsTest(t)
  1071  	defer cleanupFn()
  1072  	value := []byte("value")
  1073  
  1074  	// Test a write txn commit.
  1075  	if err := s.DB.Txn(context.Background(), func(ctx context.Context, txn *kv.Txn) error {
  1076  		key := []byte("key-commit")
  1077  		return txn.Put(ctx, key, value)
  1078  	}); err != nil {
  1079  		t.Fatal(err)
  1080  	}
  1081  	checkTxnMetrics(t, metrics, "commit txn", 1 /* commits */, 0 /* commits1PC */, 0, 0)
  1082  
  1083  	// Test a read-only txn.
  1084  	if err := s.DB.Txn(context.Background(), func(ctx context.Context, txn *kv.Txn) error {
  1085  		key := []byte("key-commit")
  1086  		_, err := txn.Get(ctx, key)
  1087  		return err
  1088  	}); err != nil {
  1089  		t.Fatal(err)
  1090  	}
  1091  
  1092  	checkTxnMetrics(t, metrics, "commit txn", 2 /* commits */, 0 /* commits1PC */, 0, 0)
  1093  }
  1094  
  1095  // TestTxnOnePhaseCommit verifies that 1PC metric tracking works.
  1096  func TestTxnOnePhaseCommit(t *testing.T) {
  1097  	defer leaktest.AfterTest(t)()
  1098  	s, metrics, cleanupFn := setupMetricsTest(t)
  1099  	defer cleanupFn()
  1100  
  1101  	value := []byte("value")
  1102  
  1103  	ctx := context.Background()
  1104  	if err := s.DB.Txn(ctx, func(ctx context.Context, txn *kv.Txn) error {
  1105  		key := []byte("key-commit")
  1106  		b := txn.NewBatch()
  1107  		b.Put(key, value)
  1108  		return txn.CommitInBatch(ctx, b)
  1109  	}); err != nil {
  1110  		t.Fatal(err)
  1111  	}
  1112  	kv, err := s.DB.Get(ctx, []byte("key-commit"))
  1113  	if err != nil {
  1114  		t.Fatal(err)
  1115  	}
  1116  	if kv.Value == nil {
  1117  		t.Fatal("expected value not found")
  1118  	}
  1119  	val, err := kv.Value.GetBytes()
  1120  	if err != nil {
  1121  		t.Fatal(err)
  1122  	}
  1123  	if !bytes.Equal(val, value) {
  1124  		t.Fatalf("expected: %s, got: %s", value, val)
  1125  	}
  1126  	checkTxnMetrics(t, metrics, "commit 1PC txn", 1 /* commits */, 1 /* 1PC */, 0, 0)
  1127  }
  1128  
  1129  func TestTxnAbortCount(t *testing.T) {
  1130  	defer leaktest.AfterTest(t)()
  1131  	s, metrics, cleanupFn := setupMetricsTest(t)
  1132  	defer cleanupFn()
  1133  
  1134  	value := []byte("value")
  1135  
  1136  	const intentionalErrText = "intentional error to cause abort"
  1137  	// Test aborted transaction.
  1138  	if err := s.DB.Txn(context.Background(), func(ctx context.Context, txn *kv.Txn) error {
  1139  		key := []byte("key-abort")
  1140  
  1141  		if err := txn.Put(ctx, key, value); err != nil {
  1142  			t.Fatal(err)
  1143  		}
  1144  
  1145  		return errors.New(intentionalErrText)
  1146  	}); !testutils.IsError(err, intentionalErrText) {
  1147  		t.Fatalf("unexpected error: %v", err)
  1148  	}
  1149  	checkTxnMetrics(t, metrics, "abort txn", 0, 0, 1 /* aborts */, 0)
  1150  }
  1151  
  1152  func TestTxnRestartCount(t *testing.T) {
  1153  	defer leaktest.AfterTest(t)()
  1154  
  1155  	readKey := []byte("read")
  1156  	writeKey := []byte("write")
  1157  	value := []byte("value")
  1158  	ctx := context.Background()
  1159  
  1160  	s, metrics, cleanupFn := setupMetricsTest(t)
  1161  	defer cleanupFn()
  1162  
  1163  	// Start a transaction and read a key that we're going to modify outside the
  1164  	// txn. This ensures that refreshing the txn will not succeed, so a restart
  1165  	// will be necessary.
  1166  	txn := kv.NewTxn(ctx, s.DB, 0 /* gatewayNodeID */)
  1167  	if _, err := txn.Get(ctx, readKey); err != nil {
  1168  		t.Fatal(err)
  1169  	}
  1170  
  1171  	// Write the read key outside of the transaction, at a higher timestamp, which
  1172  	// will necessitate a txn restart when the original read key span is updated.
  1173  	if err := s.DB.Put(ctx, readKey, value); err != nil {
  1174  		t.Fatal(err)
  1175  	}
  1176  
  1177  	// Outside of the transaction, read the same key as will be
  1178  	// written within the transaction. This means that future
  1179  	// attempts to write will forward the txn timestamp.
  1180  	if _, err := s.DB.Get(ctx, writeKey); err != nil {
  1181  		t.Fatal(err)
  1182  	}
  1183  
  1184  	// This put will lay down an intent, txn timestamp will increase
  1185  	// beyond DeprecatedOrigTimestamp.
  1186  	if err := txn.Put(ctx, writeKey, value); err != nil {
  1187  		t.Fatal(err)
  1188  	}
  1189  	proto := txn.TestingCloneTxn()
  1190  	if proto.WriteTimestamp.LessEq(proto.ReadTimestamp) {
  1191  		t.Errorf("expected timestamp to increase: %s", proto)
  1192  	}
  1193  
  1194  	// Wait for heartbeat to start.
  1195  	tc := txn.Sender().(*TxnCoordSender)
  1196  	testutils.SucceedsSoon(t, func() error {
  1197  		if !tc.IsTracking() {
  1198  			return errors.New("expected heartbeat to start")
  1199  		}
  1200  		return nil
  1201  	})
  1202  
  1203  	// Commit (should cause restart metric to increase).
  1204  	err := txn.CommitOrCleanup(ctx)
  1205  	assertTransactionRetryError(t, err)
  1206  	checkTxnMetrics(t, metrics, "restart txn", 0, 0, 1 /* aborts */, 1 /* restarts */)
  1207  }
  1208  
  1209  func TestTxnDurations(t *testing.T) {
  1210  	defer leaktest.AfterTest(t)()
  1211  	s, metrics, cleanupFn := setupMetricsTest(t)
  1212  	manual := s.Manual
  1213  	defer cleanupFn()
  1214  	const puts = 10
  1215  
  1216  	const incr int64 = 1000
  1217  	for i := 0; i < puts; i++ {
  1218  		key := roachpb.Key(fmt.Sprintf("key-txn-durations-%d", i))
  1219  		if err := s.DB.Txn(context.Background(), func(ctx context.Context, txn *kv.Txn) error {
  1220  			if err := txn.Put(ctx, key, []byte("val")); err != nil {
  1221  				return err
  1222  			}
  1223  			manual.Increment(incr)
  1224  			return nil
  1225  		}); err != nil {
  1226  			t.Fatal(err)
  1227  		}
  1228  	}
  1229  
  1230  	checkTxnMetrics(t, metrics, "txn durations", puts, 0, 0, 0)
  1231  
  1232  	hist := metrics.Durations
  1233  	// The clock is a bit odd in these tests, so I can't test the mean without
  1234  	// introducing spurious errors or being overly lax.
  1235  	//
  1236  	// TODO(cdo): look into cause of variance.
  1237  	if a, e := hist.TotalCount(), int64(puts); a != e {
  1238  		t.Fatalf("durations %d != expected %d", a, e)
  1239  	}
  1240  
  1241  	// Metrics lose fidelity, so we can't compare incr directly.
  1242  	if min, thresh := hist.Min(), incr-10; min < thresh {
  1243  		t.Fatalf("min %d < %d", min, thresh)
  1244  	}
  1245  }
  1246  
  1247  // TestAbortTransactionOnCommitErrors verifies that transactions are
  1248  // aborted on the correct errors.
  1249  func TestAbortTransactionOnCommitErrors(t *testing.T) {
  1250  	defer leaktest.AfterTest(t)()
  1251  	ctx := context.Background()
  1252  	clock := hlc.NewClock(hlc.UnixNano, time.Nanosecond)
  1253  
  1254  	testCases := []struct {
  1255  		err        error
  1256  		errFn      func(roachpb.Transaction) *roachpb.Error
  1257  		asyncAbort bool
  1258  	}{
  1259  		{
  1260  			errFn: func(txn roachpb.Transaction) *roachpb.Error {
  1261  				const nodeID = 0
  1262  				// ReadWithinUncertaintyIntervalErrors need a clock to have been
  1263  				// recorded on the origin.
  1264  				txn.UpdateObservedTimestamp(nodeID, makeTS(123, 0))
  1265  				return roachpb.NewErrorWithTxn(
  1266  					roachpb.NewReadWithinUncertaintyIntervalError(hlc.Timestamp{}, hlc.Timestamp{}, nil),
  1267  					&txn)
  1268  			},
  1269  			asyncAbort: false},
  1270  		{err: &roachpb.TransactionAbortedError{}, asyncAbort: true},
  1271  		{err: &roachpb.TransactionPushError{}, asyncAbort: false},
  1272  		{err: &roachpb.TransactionRetryError{}, asyncAbort: false},
  1273  		{err: &roachpb.RangeNotFoundError{}, asyncAbort: false},
  1274  		{err: &roachpb.RangeKeyMismatchError{}, asyncAbort: false},
  1275  		{err: &roachpb.TransactionStatusError{}, asyncAbort: false},
  1276  	}
  1277  
  1278  	for _, test := range testCases {
  1279  		t.Run(fmt.Sprintf("%T", test.err), func(t *testing.T) {
  1280  			var commit, abort atomic.Value
  1281  			commit.Store(false)
  1282  			abort.Store(false)
  1283  
  1284  			stopper := stop.NewStopper()
  1285  			defer stopper.Stop(ctx)
  1286  			var senderFn kv.SenderFunc = func(
  1287  				_ context.Context, ba roachpb.BatchRequest,
  1288  			) (*roachpb.BatchResponse, *roachpb.Error) {
  1289  				br := ba.CreateReply()
  1290  				br.Txn = ba.Txn.Clone()
  1291  
  1292  				if _, hasPut := ba.GetArg(roachpb.Put); hasPut {
  1293  					if _, ok := ba.Requests[0].GetInner().(*roachpb.PutRequest); !ok {
  1294  						t.Fatalf("expected Put")
  1295  					}
  1296  					union := &br.Responses[0] // avoid operating on copy
  1297  					union.MustSetInner(&roachpb.PutResponse{})
  1298  					if ba.Txn != nil && br.Txn == nil {
  1299  						br.Txn.Status = roachpb.PENDING
  1300  					}
  1301  				} else if et, hasET := ba.GetArg(roachpb.EndTxn); hasET {
  1302  					if et.(*roachpb.EndTxnRequest).Commit {
  1303  						commit.Store(true)
  1304  						if test.errFn != nil {
  1305  							return nil, test.errFn(*ba.Txn)
  1306  						}
  1307  						return nil, roachpb.NewErrorWithTxn(test.err, ba.Txn)
  1308  					}
  1309  					abort.Store(true)
  1310  				} else {
  1311  					t.Fatalf("unexpected batch: %s", ba)
  1312  				}
  1313  				return br, nil
  1314  			}
  1315  			ambient := log.AmbientContext{Tracer: tracing.NewTracer()}
  1316  			factory := NewTxnCoordSenderFactory(
  1317  				TxnCoordSenderFactoryConfig{
  1318  					AmbientCtx: ambient,
  1319  					Clock:      clock,
  1320  					Stopper:    stopper,
  1321  					Settings:   cluster.MakeTestingClusterSettings(),
  1322  				},
  1323  				senderFn,
  1324  			)
  1325  
  1326  			db := kv.NewDB(ambient, factory, clock)
  1327  			txn := kv.NewTxn(ctx, db, 0 /* gatewayNodeID */)
  1328  			if pErr := txn.Put(ctx, "a", "b"); pErr != nil {
  1329  				t.Fatalf("put failed: %s", pErr)
  1330  			}
  1331  			if pErr := txn.CommitOrCleanup(ctx); pErr == nil {
  1332  				t.Fatalf("unexpected commit success")
  1333  			}
  1334  
  1335  			if !commit.Load().(bool) {
  1336  				t.Errorf("%T: failed to find initial commit request", test.err)
  1337  			}
  1338  			if !test.asyncAbort && !abort.Load().(bool) {
  1339  				t.Errorf("%T: failed to find expected synchronous abort", test.err)
  1340  			} else {
  1341  				testutils.SucceedsSoon(t, func() error {
  1342  					if !abort.Load().(bool) {
  1343  						return errors.Errorf("%T: failed to find expected asynchronous abort", test.err)
  1344  					}
  1345  					return nil
  1346  				})
  1347  			}
  1348  		})
  1349  	}
  1350  }
  1351  
  1352  // mockSender is a client.Sender implementation that passes requests to a list
  1353  // of provided matchers, in sequence. The first matcher that returns either a
  1354  // response or an error is used to provide the result for the request.
  1355  type mockSender struct {
  1356  	matchers []matcher
  1357  }
  1358  
  1359  var _ kv.Sender = &mockSender{}
  1360  
  1361  type matcher func(roachpb.BatchRequest) (*roachpb.BatchResponse, *roachpb.Error)
  1362  
  1363  // match adds a matcher to the list of matchers.
  1364  func (s *mockSender) match(m matcher) {
  1365  	s.matchers = append(s.matchers, m)
  1366  }
  1367  
  1368  // Send implements the client.Sender interface.
  1369  func (s *mockSender) Send(
  1370  	_ context.Context, ba roachpb.BatchRequest,
  1371  ) (*roachpb.BatchResponse, *roachpb.Error) {
  1372  	for _, m := range s.matchers {
  1373  		br, pErr := m(ba)
  1374  		if br != nil || pErr != nil {
  1375  			return br, pErr
  1376  		}
  1377  	}
  1378  	// If none of the matchers triggered, just create an empty reply.
  1379  	br := ba.CreateReply()
  1380  	br.Txn = ba.Txn.Clone()
  1381  	return br, nil
  1382  }
  1383  
  1384  // Test that a rollback sent to the TxnCoordSender stops the heartbeat loop even
  1385  // if it encounters an error. As of June 2018, there's a separate code path for
  1386  // handling errors on rollback in this regard.
  1387  func TestRollbackErrorStopsHeartbeat(t *testing.T) {
  1388  	defer leaktest.AfterTest(t)()
  1389  	ctx := context.Background()
  1390  	clock := hlc.NewClock(hlc.UnixNano, time.Nanosecond)
  1391  	ambient := log.AmbientContext{Tracer: tracing.NewTracer()}
  1392  	sender := &mockSender{}
  1393  	stopper := stop.NewStopper()
  1394  	defer stopper.Stop(ctx)
  1395  
  1396  	factory := NewTxnCoordSenderFactory(
  1397  		TxnCoordSenderFactoryConfig{
  1398  			AmbientCtx: log.AmbientContext{Tracer: tracing.NewTracer()},
  1399  			Clock:      clock,
  1400  			Stopper:    stopper,
  1401  			Settings:   cluster.MakeTestingClusterSettings(),
  1402  		},
  1403  		sender,
  1404  	)
  1405  	db := kv.NewDB(ambient, factory, clock)
  1406  
  1407  	sender.match(func(ba roachpb.BatchRequest) (*roachpb.BatchResponse, *roachpb.Error) {
  1408  		if _, ok := ba.GetArg(roachpb.EndTxn); !ok {
  1409  			resp := ba.CreateReply()
  1410  			resp.Txn = ba.Txn
  1411  			return resp, nil
  1412  		}
  1413  		return nil, roachpb.NewErrorf("injected err")
  1414  	})
  1415  
  1416  	txn := kv.NewTxn(ctx, db, roachpb.NodeID(1))
  1417  	txnHeader := roachpb.Header{
  1418  		Txn: txn.TestingCloneTxn(),
  1419  	}
  1420  	if _, pErr := kv.SendWrappedWith(
  1421  		ctx, txn, txnHeader, &roachpb.PutRequest{
  1422  			RequestHeader: roachpb.RequestHeader{
  1423  				Key: roachpb.Key("a"),
  1424  			},
  1425  		},
  1426  	); pErr != nil {
  1427  		t.Fatal(pErr)
  1428  	}
  1429  	if !txn.Sender().(*TxnCoordSender).IsTracking() {
  1430  		t.Fatalf("expected TxnCoordSender to be tracking after the write")
  1431  	}
  1432  
  1433  	if _, pErr := kv.SendWrappedWith(
  1434  		ctx, txn, txnHeader,
  1435  		&roachpb.EndTxnRequest{Commit: false},
  1436  	); !testutils.IsPError(pErr, "injected err") {
  1437  		t.Fatal(pErr)
  1438  	}
  1439  
  1440  	testutils.SucceedsSoon(t, func() error {
  1441  		if txn.Sender().(*TxnCoordSender).IsTracking() {
  1442  			return fmt.Errorf("still tracking")
  1443  		}
  1444  		return nil
  1445  	})
  1446  }
  1447  
  1448  // Test that lock tracking behaves correctly for transactions that attempt to
  1449  // run a batch containing an EndTxn. Since in case of an error it's not easy to
  1450  // determine whether any locks have been laid down (i.e. in case the batch was
  1451  // split by the DistSender and then there was mixed success for the sub-batches,
  1452  // or in case a retriable error is returned), the test verifies that all
  1453  // possible locks are properly tracked and attached to a subsequent EndTxn.
  1454  func TestOnePCErrorTracking(t *testing.T) {
  1455  	defer leaktest.AfterTest(t)()
  1456  	ctx := context.Background()
  1457  	clock := hlc.NewClock(hlc.UnixNano, time.Nanosecond)
  1458  	ambient := log.AmbientContext{Tracer: tracing.NewTracer()}
  1459  	sender := &mockSender{}
  1460  	stopper := stop.NewStopper()
  1461  	defer stopper.Stop(ctx)
  1462  
  1463  	factory := NewTxnCoordSenderFactory(
  1464  		TxnCoordSenderFactoryConfig{
  1465  			AmbientCtx: log.AmbientContext{Tracer: tracing.NewTracer()},
  1466  			Clock:      clock,
  1467  			Stopper:    stopper,
  1468  			Settings:   cluster.MakeTestingClusterSettings(),
  1469  		},
  1470  		sender,
  1471  	)
  1472  	db := kv.NewDB(ambient, factory, clock)
  1473  	keyA, keyB, keyC := roachpb.Key("a"), roachpb.Key("b"), roachpb.Key("c")
  1474  
  1475  	// Register a matcher catching the commit attempt.
  1476  	sender.match(func(ba roachpb.BatchRequest) (*roachpb.BatchResponse, *roachpb.Error) {
  1477  		if et, ok := ba.GetArg(roachpb.EndTxn); !ok {
  1478  			return nil, nil
  1479  		} else if !et.(*roachpb.EndTxnRequest).Commit {
  1480  			return nil, nil
  1481  		}
  1482  		return nil, roachpb.NewErrorf("injected err")
  1483  	})
  1484  	// Register a matcher catching the rollback attempt.
  1485  	sender.match(func(ba roachpb.BatchRequest) (*roachpb.BatchResponse, *roachpb.Error) {
  1486  		et, ok := ba.GetArg(roachpb.EndTxn)
  1487  		if !ok {
  1488  			return nil, nil
  1489  		}
  1490  		etReq := et.(*roachpb.EndTxnRequest)
  1491  		if etReq.Commit {
  1492  			return nil, nil
  1493  		}
  1494  		expLocks := []roachpb.Span{{Key: keyA}, {Key: keyB, EndKey: keyC}}
  1495  		locks := etReq.LockSpans
  1496  		if !reflect.DeepEqual(locks, expLocks) {
  1497  			return nil, roachpb.NewErrorf("expected locks %s, got: %s", expLocks, locks)
  1498  		}
  1499  		resp := ba.CreateReply()
  1500  		// Set the response's txn to the Aborted status (as the server would). This
  1501  		// will make the TxnCoordSender stop the heartbeat loop.
  1502  		resp.Txn = ba.Txn.Clone()
  1503  		resp.Txn.Status = roachpb.ABORTED
  1504  		return resp, nil
  1505  	})
  1506  
  1507  	txn := kv.NewTxn(ctx, db, roachpb.NodeID(1))
  1508  	txnHeader := roachpb.Header{
  1509  		Txn: txn.TestingCloneTxn(),
  1510  	}
  1511  	b := txn.NewBatch()
  1512  	b.Put(keyA, "test value")
  1513  	b.ScanForUpdate(keyB, keyC)
  1514  	if err := txn.CommitInBatch(ctx, b); !testutils.IsError(err, "injected err") {
  1515  		t.Fatal(err)
  1516  	}
  1517  
  1518  	// Now send a rollback and verify that the TxnCoordSender attaches the locks
  1519  	// to it.
  1520  	if _, pErr := kv.SendWrappedWith(
  1521  		ctx, txn, txnHeader,
  1522  		&roachpb.EndTxnRequest{Commit: false},
  1523  	); pErr != nil {
  1524  		t.Fatal(pErr)
  1525  	}
  1526  
  1527  	// As always, check that the rollback we just sent stops the heartbeat loop.
  1528  	testutils.SucceedsSoon(t, func() error {
  1529  		if txn.Sender().(*TxnCoordSender).IsTracking() {
  1530  			return fmt.Errorf("still tracking")
  1531  		}
  1532  		return nil
  1533  	})
  1534  }
  1535  
  1536  // TestCommitReadOnlyTransaction verifies that a read-only does not send an
  1537  // EndTxnRequest.
  1538  func TestCommitReadOnlyTransaction(t *testing.T) {
  1539  	defer leaktest.AfterTest(t)()
  1540  	ctx := context.Background()
  1541  	clock := hlc.NewClock(hlc.UnixNano, time.Nanosecond)
  1542  	ambient := log.AmbientContext{Tracer: tracing.NewTracer()}
  1543  	sender := &mockSender{}
  1544  	stopper := stop.NewStopper()
  1545  	defer stopper.Stop(ctx)
  1546  
  1547  	var calls []roachpb.Method
  1548  	sender.match(func(ba roachpb.BatchRequest) (*roachpb.BatchResponse, *roachpb.Error) {
  1549  		calls = append(calls, ba.Methods()...)
  1550  		return nil, nil
  1551  	})
  1552  
  1553  	factory := NewTxnCoordSenderFactory(
  1554  		TxnCoordSenderFactoryConfig{
  1555  			AmbientCtx: ambient,
  1556  			Clock:      clock,
  1557  			Stopper:    stopper,
  1558  			Settings:   cluster.MakeTestingClusterSettings(),
  1559  		},
  1560  		sender,
  1561  	)
  1562  	testutils.RunTrueAndFalse(t, "explicit txn", func(t *testing.T, explicitTxn bool) {
  1563  		testutils.RunTrueAndFalse(t, "with get", func(t *testing.T, withGet bool) {
  1564  			calls = nil
  1565  			db := kv.NewDB(testutils.MakeAmbientCtx(), factory, clock)
  1566  			if err := db.Txn(ctx, func(ctx context.Context, txn *kv.Txn) error {
  1567  				b := txn.NewBatch()
  1568  				if withGet {
  1569  					b.Get("foo")
  1570  				}
  1571  				if explicitTxn {
  1572  					return txn.CommitInBatch(ctx, b)
  1573  				}
  1574  				return txn.Run(ctx, b)
  1575  			}); err != nil {
  1576  				t.Fatal(err)
  1577  			}
  1578  
  1579  			expectedCalls := []roachpb.Method(nil)
  1580  			if withGet {
  1581  				expectedCalls = append(expectedCalls, roachpb.Get)
  1582  			}
  1583  			if !reflect.DeepEqual(expectedCalls, calls) {
  1584  				t.Fatalf("expected %s, got %s", expectedCalls, calls)
  1585  			}
  1586  		})
  1587  	})
  1588  }
  1589  
  1590  // TestCommitMutatingTransaction verifies that a transaction is committed
  1591  // upon successful invocation of the retryable func.
  1592  func TestCommitMutatingTransaction(t *testing.T) {
  1593  	defer leaktest.AfterTest(t)()
  1594  	ctx := context.Background()
  1595  	clock := hlc.NewClock(hlc.UnixNano, time.Nanosecond)
  1596  	ambient := log.AmbientContext{Tracer: tracing.NewTracer()}
  1597  	sender := &mockSender{}
  1598  	stopper := stop.NewStopper()
  1599  	defer stopper.Stop(ctx)
  1600  
  1601  	var calls []roachpb.Method
  1602  	sender.match(func(ba roachpb.BatchRequest) (*roachpb.BatchResponse, *roachpb.Error) {
  1603  		br := ba.CreateReply()
  1604  		br.Txn = ba.Txn.Clone()
  1605  
  1606  		calls = append(calls, ba.Methods()...)
  1607  		if !bytes.Equal(ba.Txn.Key, roachpb.Key("a")) {
  1608  			t.Errorf("expected transaction key to be \"a\"; got %s", ba.Txn.Key)
  1609  		}
  1610  		if et, ok := ba.GetArg(roachpb.EndTxn); ok {
  1611  			if !et.(*roachpb.EndTxnRequest).Commit {
  1612  				t.Errorf("expected commit to be true")
  1613  			}
  1614  			br.Txn.Status = roachpb.COMMITTED
  1615  		}
  1616  		return br, nil
  1617  	})
  1618  
  1619  	factory := NewTxnCoordSenderFactory(
  1620  		TxnCoordSenderFactoryConfig{
  1621  			AmbientCtx: ambient,
  1622  			Clock:      clock,
  1623  			Stopper:    stopper,
  1624  			Settings:   cluster.MakeTestingClusterSettings(),
  1625  		},
  1626  		sender,
  1627  	)
  1628  
  1629  	// Test all transactional write methods.
  1630  	testArgs := []struct {
  1631  		f         func(ctx context.Context, txn *kv.Txn) error
  1632  		expMethod roachpb.Method
  1633  		// pointWrite is set if the method is a "point write", which means that it
  1634  		// will be pipelined and we should expect a QueryIntent request at commit
  1635  		// time.
  1636  		pointWrite bool
  1637  	}{
  1638  		{
  1639  			f:          func(ctx context.Context, txn *kv.Txn) error { return txn.Put(ctx, "a", "b") },
  1640  			expMethod:  roachpb.Put,
  1641  			pointWrite: true,
  1642  		},
  1643  		{
  1644  			f:          func(ctx context.Context, txn *kv.Txn) error { return txn.CPut(ctx, "a", "b", nil) },
  1645  			expMethod:  roachpb.ConditionalPut,
  1646  			pointWrite: true,
  1647  		},
  1648  		{
  1649  			f: func(ctx context.Context, txn *kv.Txn) error {
  1650  				_, err := txn.Inc(ctx, "a", 1)
  1651  				return err
  1652  			},
  1653  			expMethod:  roachpb.Increment,
  1654  			pointWrite: true,
  1655  		},
  1656  		{
  1657  			f:          func(ctx context.Context, txn *kv.Txn) error { return txn.Del(ctx, "a") },
  1658  			expMethod:  roachpb.Delete,
  1659  			pointWrite: true,
  1660  		},
  1661  		{
  1662  			f:          func(ctx context.Context, txn *kv.Txn) error { return txn.DelRange(ctx, "a", "b") },
  1663  			expMethod:  roachpb.DeleteRange,
  1664  			pointWrite: false,
  1665  		},
  1666  	}
  1667  	for i, test := range testArgs {
  1668  		t.Run(test.expMethod.String(), func(t *testing.T) {
  1669  			calls = nil
  1670  			db := kv.NewDB(testutils.MakeAmbientCtx(), factory, clock)
  1671  			if err := db.Txn(ctx, test.f); err != nil {
  1672  				t.Fatalf("%d: unexpected error on commit: %s", i, err)
  1673  			}
  1674  			expectedCalls := []roachpb.Method{test.expMethod}
  1675  			if test.pointWrite {
  1676  				expectedCalls = append(expectedCalls, roachpb.QueryIntent)
  1677  			}
  1678  			expectedCalls = append(expectedCalls, roachpb.EndTxn)
  1679  			if !reflect.DeepEqual(expectedCalls, calls) {
  1680  				t.Fatalf("%d: expected %s, got %s", i, expectedCalls, calls)
  1681  			}
  1682  		})
  1683  	}
  1684  }
  1685  
  1686  // TestAbortReadOnlyTransaction verifies that aborting a read-only
  1687  // transaction does not prompt an EndTxn call.
  1688  func TestAbortReadOnlyTransaction(t *testing.T) {
  1689  	defer leaktest.AfterTest(t)()
  1690  	ctx := context.Background()
  1691  	clock := hlc.NewClock(hlc.UnixNano, time.Nanosecond)
  1692  	ambient := log.AmbientContext{Tracer: tracing.NewTracer()}
  1693  	sender := &mockSender{}
  1694  	stopper := stop.NewStopper()
  1695  	defer stopper.Stop(ctx)
  1696  
  1697  	var calls []roachpb.Method
  1698  	sender.match(func(ba roachpb.BatchRequest) (*roachpb.BatchResponse, *roachpb.Error) {
  1699  		calls = append(calls, ba.Methods()...)
  1700  		return nil, nil
  1701  	})
  1702  
  1703  	factory := NewTxnCoordSenderFactory(
  1704  		TxnCoordSenderFactoryConfig{
  1705  			AmbientCtx: ambient,
  1706  			Clock:      clock,
  1707  			Stopper:    stopper,
  1708  			Settings:   cluster.MakeTestingClusterSettings(),
  1709  		},
  1710  		sender,
  1711  	)
  1712  	db := kv.NewDB(testutils.MakeAmbientCtx(), factory, clock)
  1713  	if err := db.Txn(context.Background(), func(ctx context.Context, txn *kv.Txn) error {
  1714  		return errors.New("foo")
  1715  	}); err == nil {
  1716  		t.Fatal("expected error on abort")
  1717  	}
  1718  
  1719  	if calls != nil {
  1720  		t.Fatalf("expected no calls, got %s", calls)
  1721  	}
  1722  }
  1723  
  1724  // TestEndWriteRestartReadOnlyTransaction verifies that if
  1725  // a transaction writes, then restarts and turns read-only,
  1726  // an explicit EndTxn call is still sent if retry- able
  1727  // didn't, regardless of whether there is an error or not.
  1728  func TestEndWriteRestartReadOnlyTransaction(t *testing.T) {
  1729  	defer leaktest.AfterTest(t)()
  1730  	ctx := context.Background()
  1731  	clock := hlc.NewClock(hlc.UnixNano, time.Nanosecond)
  1732  	ambient := log.AmbientContext{Tracer: tracing.NewTracer()}
  1733  	sender := &mockSender{}
  1734  	stopper := stop.NewStopper()
  1735  	defer stopper.Stop(ctx)
  1736  
  1737  	var calls []roachpb.Method
  1738  	sender.match(func(ba roachpb.BatchRequest) (*roachpb.BatchResponse, *roachpb.Error) {
  1739  		br := ba.CreateReply()
  1740  		br.Txn = ba.Txn.Clone()
  1741  
  1742  		calls = append(calls, ba.Methods()...)
  1743  		switch ba.Requests[0].GetInner().Method() {
  1744  		case roachpb.Put, roachpb.Scan:
  1745  			return nil, roachpb.NewErrorWithTxn(
  1746  				roachpb.NewTransactionRetryError(roachpb.RETRY_SERIALIZABLE, "test err"),
  1747  				ba.Txn)
  1748  		case roachpb.EndTxn:
  1749  			br.Txn.Status = roachpb.COMMITTED
  1750  		}
  1751  		return br, nil
  1752  	})
  1753  
  1754  	factory := NewTxnCoordSenderFactory(
  1755  		TxnCoordSenderFactoryConfig{
  1756  			AmbientCtx: ambient,
  1757  			Clock:      clock,
  1758  			Stopper:    stopper,
  1759  			Settings:   cluster.MakeTestingClusterSettings(),
  1760  			TestingKnobs: ClientTestingKnobs{
  1761  				// Disable span refresh, otherwise it kicks and retries batches by
  1762  				// itself.
  1763  				MaxTxnRefreshAttempts: -1,
  1764  			},
  1765  		},
  1766  		sender,
  1767  	)
  1768  	db := kv.NewDB(testutils.MakeAmbientCtx(), factory, clock)
  1769  
  1770  	testutils.RunTrueAndFalse(t, "write", func(t *testing.T, write bool) {
  1771  		testutils.RunTrueAndFalse(t, "success", func(t *testing.T, success bool) {
  1772  			calls = nil
  1773  			firstIter := true
  1774  			if err := db.Txn(ctx, func(ctx context.Context, txn *kv.Txn) error {
  1775  				if firstIter {
  1776  					firstIter = false
  1777  					var err error
  1778  					if write {
  1779  						err = txn.Put(ctx, "consider", "phlebas")
  1780  					} else /* locking read */ {
  1781  						_, err = txn.ScanForUpdate(ctx, "a", "b", 0)
  1782  					}
  1783  					if err == nil {
  1784  						t.Fatal("missing injected retriable error")
  1785  					}
  1786  				}
  1787  				if !success {
  1788  					return errors.New("aborting on purpose")
  1789  				}
  1790  				return nil
  1791  			}); err == nil != success {
  1792  				t.Fatalf("expected error: %t, got error: %v", !success, err)
  1793  			}
  1794  
  1795  			var expCalls []roachpb.Method
  1796  			if write {
  1797  				expCalls = []roachpb.Method{roachpb.Put, roachpb.EndTxn}
  1798  			} else {
  1799  				expCalls = []roachpb.Method{roachpb.Scan, roachpb.EndTxn}
  1800  			}
  1801  			if !reflect.DeepEqual(expCalls, calls) {
  1802  				t.Fatalf("expected %v, got %v", expCalls, calls)
  1803  			}
  1804  		})
  1805  	})
  1806  }
  1807  
  1808  // TestTransactionKeyNotChangedInRestart verifies that if the transaction
  1809  // already has a key (we're in a restart), the key in the transaction request is
  1810  // not changed.
  1811  func TestTransactionKeyNotChangedInRestart(t *testing.T) {
  1812  	defer leaktest.AfterTest(t)()
  1813  	ctx := context.Background()
  1814  	clock := hlc.NewClock(hlc.UnixNano, time.Nanosecond)
  1815  	ambient := log.AmbientContext{Tracer: tracing.NewTracer()}
  1816  	sender := &mockSender{}
  1817  	stopper := stop.NewStopper()
  1818  	defer stopper.Stop(ctx)
  1819  
  1820  	keys := []string{"first", "second"}
  1821  	attempt := 0
  1822  	sender.match(func(ba roachpb.BatchRequest) (*roachpb.BatchResponse, *roachpb.Error) {
  1823  		br := ba.CreateReply()
  1824  		br.Txn = ba.Txn.Clone()
  1825  
  1826  		// Ignore the final EndTxnRequest.
  1827  		if _, ok := ba.GetArg(roachpb.EndTxn); ok {
  1828  			br.Txn.Status = roachpb.COMMITTED
  1829  			return br, nil
  1830  		}
  1831  
  1832  		// Both attempts should have a PutRequest.
  1833  		if _, ok := ba.GetArg(roachpb.Put); !ok {
  1834  			t.Fatalf("failed to find a put request: %v", ba)
  1835  		}
  1836  
  1837  		// In the first attempt, the transaction key is the key of the first write command.
  1838  		// This key is retained between restarts, so we see the same key in the second attempt.
  1839  		if expectedKey := []byte(keys[0]); !bytes.Equal(expectedKey, ba.Txn.Key) {
  1840  			t.Fatalf("expected transaction key %v, got %v", expectedKey, ba.Txn.Key)
  1841  		}
  1842  
  1843  		if attempt == 0 {
  1844  			return nil, roachpb.NewErrorWithTxn(
  1845  				roachpb.NewTransactionRetryError(roachpb.RETRY_SERIALIZABLE, "test err"),
  1846  				ba.Txn)
  1847  		}
  1848  		return br, nil
  1849  	})
  1850  	factory := NewTxnCoordSenderFactory(
  1851  		TxnCoordSenderFactoryConfig{
  1852  			AmbientCtx: ambient,
  1853  			Clock:      clock,
  1854  			Stopper:    stopper,
  1855  			Settings:   cluster.MakeTestingClusterSettings(),
  1856  		},
  1857  		sender,
  1858  	)
  1859  	db := kv.NewDB(testutils.MakeAmbientCtx(), factory, clock)
  1860  
  1861  	if err := db.Txn(context.Background(), func(ctx context.Context, txn *kv.Txn) error {
  1862  		defer func() { attempt++ }()
  1863  		b := txn.NewBatch()
  1864  		b.Put(keys[attempt], "b")
  1865  		return txn.Run(ctx, b)
  1866  	}); err != nil {
  1867  		t.Errorf("unexpected error on commit: %s", err)
  1868  	}
  1869  	minimumAttempts := 2
  1870  	if attempt < minimumAttempts {
  1871  		t.Errorf("expected attempt count >= %d, got %d", minimumAttempts, attempt)
  1872  	}
  1873  }
  1874  
  1875  // TestSequenceNumbers verifies Requests are given sequence numbers and that
  1876  // they are incremented on successive commands.
  1877  func TestSequenceNumbers(t *testing.T) {
  1878  	defer leaktest.AfterTest(t)()
  1879  	ctx := context.Background()
  1880  	clock := hlc.NewClock(hlc.UnixNano, time.Nanosecond)
  1881  	ambient := log.AmbientContext{Tracer: tracing.NewTracer()}
  1882  	sender := &mockSender{}
  1883  	stopper := stop.NewStopper()
  1884  	defer stopper.Stop(ctx)
  1885  
  1886  	var expSequence enginepb.TxnSeq
  1887  	sender.match(func(ba roachpb.BatchRequest) (*roachpb.BatchResponse, *roachpb.Error) {
  1888  		for _, ru := range ba.Requests {
  1889  			args := ru.GetInner()
  1890  			if args.Method() == roachpb.QueryIntent {
  1891  				// QueryIntent requests don't have sequence numbers.
  1892  				continue
  1893  			}
  1894  			expSequence++
  1895  			if seq := args.Header().Sequence; expSequence != seq {
  1896  				t.Errorf("expected Request sequence %d; got %d. request: %T",
  1897  					expSequence, seq, args)
  1898  			}
  1899  		}
  1900  		br := ba.CreateReply()
  1901  		br.Txn = ba.Txn
  1902  		return br, nil
  1903  	})
  1904  
  1905  	factory := NewTxnCoordSenderFactory(
  1906  		TxnCoordSenderFactoryConfig{
  1907  			AmbientCtx: ambient,
  1908  			Clock:      clock,
  1909  			Stopper:    stopper,
  1910  			Settings:   cluster.MakeTestingClusterSettings(),
  1911  		},
  1912  		sender,
  1913  	)
  1914  	db := kv.NewDB(testutils.MakeAmbientCtx(), factory, clock)
  1915  	txn := kv.NewTxn(ctx, db, 0 /* gatewayNodeID */)
  1916  
  1917  	for i := 0; i < 5; i++ {
  1918  		var ba roachpb.BatchRequest
  1919  		for j := 0; j < i; j++ {
  1920  			ba.Add(roachpb.NewPut(roachpb.Key("a"), roachpb.MakeValueFromString("foo")).(*roachpb.PutRequest))
  1921  		}
  1922  		if _, pErr := txn.Send(ctx, ba); pErr != nil {
  1923  			t.Fatal(pErr)
  1924  		}
  1925  	}
  1926  }
  1927  
  1928  // TestConcurrentTxnRequests verifies that multiple requests cannot be executed
  1929  // on a transaction at the same time from multiple goroutines.
  1930  func TestConcurrentTxnRequestsProhibited(t *testing.T) {
  1931  	defer leaktest.AfterTest(t)()
  1932  	ctx := context.Background()
  1933  	clock := hlc.NewClock(hlc.UnixNano, time.Nanosecond)
  1934  	ambient := log.AmbientContext{Tracer: tracing.NewTracer()}
  1935  	sender := &mockSender{}
  1936  	stopper := stop.NewStopper()
  1937  	defer stopper.Stop(ctx)
  1938  
  1939  	putSync := make(chan struct{})
  1940  	sender.match(func(ba roachpb.BatchRequest) (*roachpb.BatchResponse, *roachpb.Error) {
  1941  		if _, ok := ba.GetArg(roachpb.Put); ok {
  1942  			// Block the Put until the Get runs.
  1943  			putSync <- struct{}{}
  1944  			<-putSync
  1945  		}
  1946  		br := ba.CreateReply()
  1947  		br.Txn = ba.Txn.Clone()
  1948  		if _, ok := ba.GetArg(roachpb.EndTxn); ok {
  1949  			br.Txn.Status = roachpb.COMMITTED
  1950  		}
  1951  		return br, nil
  1952  	})
  1953  
  1954  	factory := NewTxnCoordSenderFactory(
  1955  		TxnCoordSenderFactoryConfig{
  1956  			AmbientCtx: ambient,
  1957  			Clock:      clock,
  1958  			Stopper:    stopper,
  1959  			Settings:   cluster.MakeTestingClusterSettings(),
  1960  		},
  1961  		sender,
  1962  	)
  1963  	db := kv.NewDB(ambient, factory, clock)
  1964  
  1965  	err := db.Txn(ctx, func(ctx context.Context, txn *kv.Txn) error {
  1966  		g, gCtx := errgroup.WithContext(ctx)
  1967  		g.Go(func() error {
  1968  			return txn.Put(gCtx, "test_put", "val")
  1969  		})
  1970  		g.Go(func() error {
  1971  			// Wait for the Put to be blocked.
  1972  			<-putSync
  1973  			_, err := txn.Get(gCtx, "test_get")
  1974  			// Unblock the Put.
  1975  			putSync <- struct{}{}
  1976  			return err
  1977  		})
  1978  		return g.Wait()
  1979  	})
  1980  	require.Regexp(t, "concurrent txn use detected", err)
  1981  }
  1982  
  1983  // TestTxnRequestTxnTimestamp verifies response txn timestamp is
  1984  // always upgraded on successive requests.
  1985  func TestTxnRequestTxnTimestamp(t *testing.T) {
  1986  	defer leaktest.AfterTest(t)()
  1987  	ctx := context.Background()
  1988  	manual := hlc.NewManualClock(123)
  1989  	clock := hlc.NewClock(manual.UnixNano, time.Nanosecond)
  1990  	ambient := log.AmbientContext{Tracer: tracing.NewTracer()}
  1991  	sender := &mockSender{}
  1992  	stopper := stop.NewStopper()
  1993  	defer stopper.Stop(ctx)
  1994  
  1995  	factory := NewTxnCoordSenderFactory(
  1996  		TxnCoordSenderFactoryConfig{
  1997  			AmbientCtx: ambient,
  1998  			Clock:      clock,
  1999  			Stopper:    stopper,
  2000  			Settings:   cluster.MakeTestingClusterSettings(),
  2001  		},
  2002  		sender,
  2003  	)
  2004  	db := kv.NewDB(testutils.MakeAmbientCtx(), factory, clock)
  2005  
  2006  	curReq := 0
  2007  	requests := []struct {
  2008  		expRequestTS, responseTS hlc.Timestamp
  2009  	}{
  2010  		{hlc.Timestamp{WallTime: 5, Logical: 0}, hlc.Timestamp{WallTime: 10, Logical: 0}},
  2011  		{hlc.Timestamp{WallTime: 10, Logical: 0}, hlc.Timestamp{WallTime: 10, Logical: 1}},
  2012  		{hlc.Timestamp{WallTime: 10, Logical: 1}, hlc.Timestamp{WallTime: 10, Logical: 0}},
  2013  		{hlc.Timestamp{WallTime: 10, Logical: 1}, hlc.Timestamp{WallTime: 20, Logical: 1}},
  2014  		{hlc.Timestamp{WallTime: 20, Logical: 1}, hlc.Timestamp{WallTime: 20, Logical: 1}},
  2015  		{hlc.Timestamp{WallTime: 20, Logical: 1}, hlc.Timestamp{WallTime: 19, Logical: 0}},
  2016  		{hlc.Timestamp{WallTime: 20, Logical: 1}, hlc.Timestamp{WallTime: 20, Logical: 1}},
  2017  	}
  2018  
  2019  	sender.match(func(ba roachpb.BatchRequest) (*roachpb.BatchResponse, *roachpb.Error) {
  2020  		req := requests[curReq]
  2021  		if req.expRequestTS != ba.Txn.WriteTimestamp {
  2022  			return nil, roachpb.NewErrorf("%d: expected ts %s got %s",
  2023  				curReq, req.expRequestTS, ba.Txn.WriteTimestamp)
  2024  		}
  2025  
  2026  		br := ba.CreateReply()
  2027  		br.Txn = ba.Txn.Clone()
  2028  		br.Txn.WriteTimestamp.Forward(requests[curReq].responseTS)
  2029  		return br, nil
  2030  	})
  2031  
  2032  	manual.Set(requests[0].expRequestTS.WallTime)
  2033  
  2034  	if err := db.Txn(ctx, func(ctx context.Context, txn *kv.Txn) error {
  2035  		for curReq = range requests {
  2036  			if _, err := txn.Get(ctx, "k"); err != nil {
  2037  				return err
  2038  			}
  2039  		}
  2040  		return nil
  2041  	}); err != nil {
  2042  		t.Fatal(err)
  2043  	}
  2044  }
  2045  
  2046  // TestReadOnlyTxnObeysDeadline tests that read-only transactions obey the
  2047  // deadline. Read-only transactions have their EndTxn elided, so the enforcement
  2048  // of the deadline is done in the client.
  2049  func TestReadOnlyTxnObeysDeadline(t *testing.T) {
  2050  	defer leaktest.AfterTest(t)()
  2051  	ctx := context.Background()
  2052  	manual := hlc.NewManualClock(123)
  2053  	clock := hlc.NewClock(manual.UnixNano, time.Nanosecond)
  2054  	ambient := log.AmbientContext{Tracer: tracing.NewTracer()}
  2055  	sender := &mockSender{}
  2056  	stopper := stop.NewStopper()
  2057  	defer stopper.Stop(ctx)
  2058  
  2059  	sender.match(func(ba roachpb.BatchRequest) (*roachpb.BatchResponse, *roachpb.Error) {
  2060  		if _, ok := ba.GetArg(roachpb.Get); ok {
  2061  			manual.Increment(100)
  2062  			br := ba.CreateReply()
  2063  			br.Txn = ba.Txn.Clone()
  2064  			br.Txn.WriteTimestamp.Forward(clock.Now())
  2065  			return br, nil
  2066  		}
  2067  		return nil, nil
  2068  	})
  2069  
  2070  	factory := NewTxnCoordSenderFactory(
  2071  		TxnCoordSenderFactoryConfig{
  2072  			AmbientCtx: ambient,
  2073  			Clock:      clock,
  2074  			Stopper:    stopper,
  2075  			Settings:   cluster.MakeTestingClusterSettings(),
  2076  		},
  2077  		sender,
  2078  	)
  2079  	db := kv.NewDB(testutils.MakeAmbientCtx(), factory, clock)
  2080  
  2081  	// We're going to run two tests: one where the EndTxn is by itself in a
  2082  	// batch, one where it is not. As of June 2018, the EndTxn is elided in
  2083  	// different ways in the two cases.
  2084  
  2085  	t.Run("standalone commit", func(t *testing.T) {
  2086  		txn := kv.NewTxn(ctx, db, 0 /* gatewayNodeID */)
  2087  		// Set a deadline. We'll generate a retriable error with a higher timestamp.
  2088  		txn.UpdateDeadlineMaybe(ctx, clock.Now())
  2089  		if _, err := txn.Get(ctx, "k"); err != nil {
  2090  			t.Fatal(err)
  2091  		}
  2092  		err := txn.Commit(ctx)
  2093  		assertTransactionRetryError(t, err)
  2094  		if !testutils.IsError(err, "RETRY_COMMIT_DEADLINE_EXCEEDED") {
  2095  			t.Fatalf("expected deadline exceeded, got: %s", err)
  2096  		}
  2097  	})
  2098  
  2099  	t.Run("commit in batch", func(t *testing.T) {
  2100  		txn := kv.NewTxn(ctx, db, 0 /* gatewayNodeID */)
  2101  		// Set a deadline. We'll generate a retriable error with a higher timestamp.
  2102  		txn.UpdateDeadlineMaybe(ctx, clock.Now())
  2103  		b := txn.NewBatch()
  2104  		b.Get("k")
  2105  		err := txn.CommitInBatch(ctx, b)
  2106  		assertTransactionRetryError(t, err)
  2107  		if !testutils.IsError(err, "RETRY_COMMIT_DEADLINE_EXCEEDED") {
  2108  			t.Fatalf("expected deadline exceeded, got: %s", err)
  2109  		}
  2110  	})
  2111  }
  2112  
  2113  // TestTxnCoordSenderPipelining verifies that transactional pipelining of writes
  2114  // is enabled by default in a transaction and is disabled after
  2115  // DisablePipelining is called. It also verifies that DisablePipelining returns
  2116  // an error if the transaction has already performed an operation.
  2117  func TestTxnCoordSenderPipelining(t *testing.T) {
  2118  	defer leaktest.AfterTest(t)()
  2119  
  2120  	ctx := context.Background()
  2121  	s := createTestDB(t)
  2122  	defer s.Stop()
  2123  	distSender := s.DB.GetFactory().(*TxnCoordSenderFactory).NonTransactionalSender()
  2124  
  2125  	var calls []roachpb.Method
  2126  	var senderFn kv.SenderFunc = func(
  2127  		ctx context.Context, ba roachpb.BatchRequest,
  2128  	) (*roachpb.BatchResponse, *roachpb.Error) {
  2129  		calls = append(calls, ba.Methods()...)
  2130  		if et, ok := ba.GetArg(roachpb.EndTxn); ok {
  2131  			// Ensure that no transactions enter a STAGING state.
  2132  			et.(*roachpb.EndTxnRequest).InFlightWrites = nil
  2133  		}
  2134  		return distSender.Send(ctx, ba)
  2135  	}
  2136  
  2137  	ambientCtx := log.AmbientContext{Tracer: tracing.NewTracer()}
  2138  	tsf := NewTxnCoordSenderFactory(TxnCoordSenderFactoryConfig{
  2139  		AmbientCtx: ambientCtx,
  2140  		Settings:   s.Cfg.Settings,
  2141  		Clock:      s.Clock,
  2142  		Stopper:    s.Stopper,
  2143  	}, senderFn)
  2144  	db := kv.NewDB(ambientCtx, tsf, s.Clock)
  2145  
  2146  	err := db.Txn(ctx, func(ctx context.Context, txn *kv.Txn) error {
  2147  		return txn.Put(ctx, "key", "val")
  2148  	})
  2149  	if err != nil {
  2150  		t.Fatal(err)
  2151  	}
  2152  
  2153  	err = db.Txn(ctx, func(ctx context.Context, txn *kv.Txn) error {
  2154  		if err := txn.DisablePipelining(); err != nil {
  2155  			return err
  2156  		}
  2157  		return txn.Put(ctx, "key", "val")
  2158  	})
  2159  	if err != nil {
  2160  		t.Fatal(err)
  2161  	}
  2162  
  2163  	require.Equal(t, []roachpb.Method{
  2164  		roachpb.Put, roachpb.QueryIntent, roachpb.EndTxn,
  2165  		roachpb.Put, roachpb.EndTxn,
  2166  	}, calls)
  2167  
  2168  	for _, action := range []func(ctx context.Context, txn *kv.Txn) error{
  2169  		func(ctx context.Context, txn *kv.Txn) error { return txn.Put(ctx, "key", "val") },
  2170  		func(ctx context.Context, txn *kv.Txn) error { _, err := txn.Get(ctx, "key"); return err },
  2171  	} {
  2172  		err = db.Txn(ctx, func(ctx context.Context, txn *kv.Txn) error {
  2173  			if err := action(ctx, txn); err != nil {
  2174  				t.Fatal(err)
  2175  			}
  2176  			return txn.DisablePipelining()
  2177  		})
  2178  		if exp := "cannot disable pipelining on a running transaction"; !testutils.IsError(err, exp) {
  2179  			t.Fatalf("expected %q error, but got %v", exp, err)
  2180  		}
  2181  	}
  2182  }
  2183  
  2184  // Test that a txn's anchor is set to the first write key in batches mixing
  2185  // reads with writes.
  2186  func TestAnchorKey(t *testing.T) {
  2187  	defer leaktest.AfterTest(t)()
  2188  
  2189  	ctx := context.Background()
  2190  	manual := hlc.NewManualClock(123)
  2191  	clock := hlc.NewClock(manual.UnixNano, time.Nanosecond)
  2192  	ambient := log.AmbientContext{Tracer: tracing.NewTracer()}
  2193  	stopper := stop.NewStopper()
  2194  	defer stopper.Stop(ctx)
  2195  
  2196  	key1 := roachpb.Key("a")
  2197  	key2 := roachpb.Key("b")
  2198  
  2199  	var senderFn kv.SenderFunc = func(
  2200  		ctx context.Context, ba roachpb.BatchRequest,
  2201  	) (*roachpb.BatchResponse, *roachpb.Error) {
  2202  		if !roachpb.Key(ba.Txn.Key).Equal(key2) {
  2203  			t.Fatalf("expected anchor %q, got %q", key2, ba.Txn.Key)
  2204  		}
  2205  		br := ba.CreateReply()
  2206  		br.Txn = ba.Txn.Clone()
  2207  		if _, ok := ba.GetArg(roachpb.EndTxn); ok {
  2208  			br.Txn.Status = roachpb.COMMITTED
  2209  		}
  2210  		return br, nil
  2211  	}
  2212  
  2213  	factory := NewTxnCoordSenderFactory(
  2214  		TxnCoordSenderFactoryConfig{
  2215  			AmbientCtx: ambient,
  2216  			Clock:      clock,
  2217  			Stopper:    stopper,
  2218  			Settings:   cluster.MakeTestingClusterSettings(),
  2219  		},
  2220  		senderFn,
  2221  	)
  2222  	db := kv.NewDB(testutils.MakeAmbientCtx(), factory, clock)
  2223  
  2224  	if err := db.Txn(ctx, func(ctx context.Context, txn *kv.Txn) error {
  2225  		ba := txn.NewBatch()
  2226  		ba.Get(key1)
  2227  		ba.Put(key2, "val")
  2228  		return txn.Run(ctx, ba)
  2229  	}); err != nil {
  2230  		t.Fatal(err)
  2231  	}
  2232  }
  2233  
  2234  // Test that a leaf txn returns a raw error when "rejecting a client" (a client
  2235  // sending something after the txn is known to be aborted), not a
  2236  // TransactionRetryWithProtoRefreshError. This is important as leaves are not supposed to create
  2237  // "handled" errors; instead the DistSQL infra knows to recognize raw retryable
  2238  // errors and feed them to the root txn.
  2239  func TestLeafTxnClientRejectError(t *testing.T) {
  2240  	defer leaktest.AfterTest(t)()
  2241  
  2242  	// We're going to inject an error so that a leaf txn is "poisoned". This can
  2243  	// happen, for example, if the leaf is used concurrently by multiple requests,
  2244  	// where the first one gets a TransactionAbortedError.
  2245  	errKey := roachpb.Key("a")
  2246  	knobs := &kvserver.StoreTestingKnobs{
  2247  		TestingRequestFilter: func(_ context.Context, ba roachpb.BatchRequest) *roachpb.Error {
  2248  			if g, ok := ba.GetArg(roachpb.Get); ok && g.(*roachpb.GetRequest).Key.Equal(errKey) {
  2249  				txn := ba.Txn.Clone()
  2250  				txn.Status = roachpb.ABORTED
  2251  				return roachpb.NewErrorWithTxn(
  2252  					roachpb.NewTransactionAbortedError(roachpb.ABORT_REASON_UNKNOWN), txn,
  2253  				)
  2254  			}
  2255  			return nil
  2256  		},
  2257  	}
  2258  
  2259  	s := createTestDBWithContextAndKnobs(t, kv.DefaultDBContext(), knobs)
  2260  	defer s.Stop()
  2261  
  2262  	ctx := context.Background()
  2263  	rootTxn := kv.NewTxn(ctx, s.DB, 0 /* gatewayNodeID */)
  2264  	leafInputState := rootTxn.GetLeafTxnInputState(ctx)
  2265  
  2266  	// New create a second, leaf coordinator.
  2267  	leafTxn := kv.NewLeafTxn(ctx, s.DB, 0 /* gatewayNodeID */, &leafInputState)
  2268  
  2269  	if _, err := leafTxn.Get(ctx, errKey); !testutils.IsError(err, "TransactionAbortedError") {
  2270  		t.Fatalf("expected injected err, got: %v", err)
  2271  	}
  2272  
  2273  	// Now use the leaf and check the error. At the TxnCoordSender level, the
  2274  	// pErr will be TransactionAbortedError. When pErr.GoError() is called, that's
  2275  	// transformed into an UnhandledRetryableError. For our purposes, what this
  2276  	// test is interested in demonstrating is that it's not a
  2277  	// TransactionRetryWithProtoRefreshError.
  2278  	_, err := leafTxn.Get(ctx, roachpb.Key("a"))
  2279  	if !errors.HasType(err, (*roachpb.UnhandledRetryableError)(nil)) {
  2280  		t.Fatalf("expected UnhandledRetryableError(TransactionAbortedError), got: (%T) %v", err, err)
  2281  	}
  2282  }
  2283  
  2284  // Check that ingesting an Aborted txn record is a no-op. The TxnCoordSender is
  2285  // supposed to reject such updates because they risk putting it into an
  2286  // inconsistent state. See comments in TxnCoordSender.UpdateRootWithLeafFinalState().
  2287  func TestUpdateRoootWithLeafFinalStateInAbortedTxn(t *testing.T) {
  2288  	defer leaktest.AfterTest(t)()
  2289  	s := createTestDBWithContextAndKnobs(t, kv.DefaultDBContext(), nil /* knobs */)
  2290  	defer s.Stop()
  2291  	ctx := context.Background()
  2292  
  2293  	txn := kv.NewTxn(ctx, s.DB, 0 /* gatewayNodeID */)
  2294  	leafInputState := txn.GetLeafTxnInputState(ctx)
  2295  	leafTxn := kv.NewLeafTxn(ctx, s.DB, 0, &leafInputState)
  2296  
  2297  	finalState, err := leafTxn.GetLeafTxnFinalState(ctx)
  2298  	if err != nil {
  2299  		t.Fatal(err)
  2300  	}
  2301  	finalState.Txn.Status = roachpb.ABORTED
  2302  	if err := txn.UpdateRootWithLeafFinalState(ctx, &finalState); err != nil {
  2303  		t.Fatal(err)
  2304  	}
  2305  
  2306  	// Check that the transaction was not updated.
  2307  	leafInputState2 := txn.GetLeafTxnInputState(ctx)
  2308  	if leafInputState2.Txn.Status != roachpb.PENDING {
  2309  		t.Fatalf("expected PENDING txn, got: %s", leafInputState2.Txn.Status)
  2310  	}
  2311  }