github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/distsql_running_test.go (about)

     1  // Copyright 2018 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package sql
    12  
    13  import (
    14  	"context"
    15  	"fmt"
    16  	"testing"
    17  	"time"
    18  
    19  	"github.com/cockroachdb/cockroach/pkg/base"
    20  	"github.com/cockroachdb/cockroach/pkg/kv"
    21  	"github.com/cockroachdb/cockroach/pkg/kv/kvclient/kvcoord"
    22  	"github.com/cockroachdb/cockroach/pkg/roachpb"
    23  	"github.com/cockroachdb/cockroach/pkg/security"
    24  	"github.com/cockroachdb/cockroach/pkg/sql/execinfrapb"
    25  	"github.com/cockroachdb/cockroach/pkg/sql/parser"
    26  	"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
    27  	"github.com/cockroachdb/cockroach/pkg/testutils"
    28  	"github.com/cockroachdb/cockroach/pkg/testutils/serverutils"
    29  	"github.com/cockroachdb/cockroach/pkg/util/hlc"
    30  	"github.com/cockroachdb/cockroach/pkg/util/leaktest"
    31  	"github.com/cockroachdb/cockroach/pkg/util/log"
    32  	"github.com/cockroachdb/cockroach/pkg/util/tracing"
    33  )
    34  
    35  // Test that we don't attempt to create flows in an aborted transaction.
    36  // Instead, a retryable error is created on the gateway. The point is to
    37  // simulate a race where the heartbeat loop finds out that the txn is aborted
    38  // just before a plan starts execution and check that we don't create flows in
    39  // an aborted txn (which isn't allowed). Note that, once running, each flow can
    40  // discover on its own that its txn is aborted - that's handled separately. But
    41  // flows can't start in a txn that's already known to be aborted.
    42  //
    43  // We test this by manually aborting a txn and then attempting to execute a plan
    44  // in it. We're careful to not use the transaction for anything but running the
    45  // plan; planning will be performed outside of the transaction.
    46  func TestDistSQLRunningInAbortedTxn(t *testing.T) {
    47  	defer leaktest.AfterTest(t)()
    48  
    49  	ctx := context.Background()
    50  	s, sqlDB, db := serverutils.StartServer(t, base.TestServerArgs{})
    51  	defer s.Stopper().Stop(ctx)
    52  
    53  	if _, err := sqlDB.ExecContext(
    54  		ctx, "create database test; create table test.t(a int)"); err != nil {
    55  		t.Fatal(err)
    56  	}
    57  	key := roachpb.Key("a")
    58  
    59  	// Plan a statement.
    60  	execCfg := s.ExecutorConfig().(ExecutorConfig)
    61  	internalPlanner, cleanup := NewInternalPlanner(
    62  		"test",
    63  		kv.NewTxn(ctx, db, s.NodeID()),
    64  		security.RootUser,
    65  		&MemoryMetrics{},
    66  		&execCfg,
    67  	)
    68  	defer cleanup()
    69  	p := internalPlanner.(*planner)
    70  	query := "select * from test.t"
    71  	stmt, err := parser.ParseOne(query)
    72  	if err != nil {
    73  		t.Fatal(err)
    74  	}
    75  
    76  	push := func(ctx context.Context, key roachpb.Key) error {
    77  		// Conflicting transaction that pushes another transaction.
    78  		conflictTxn := kv.NewTxn(ctx, db, 0 /* gatewayNodeID */)
    79  		// We need to explicitly set a high priority for the push to happen.
    80  		if err := conflictTxn.SetUserPriority(roachpb.MaxUserPriority); err != nil {
    81  			return err
    82  		}
    83  		// Push through a Put, as opposed to a Get, so that the pushee gets aborted.
    84  		if err := conflictTxn.Put(ctx, key, "pusher was here"); err != nil {
    85  			return err
    86  		}
    87  		return conflictTxn.CommitOrCleanup(ctx)
    88  	}
    89  
    90  	// Make a db with a short heartbeat interval, so that the aborted txn finds
    91  	// out quickly.
    92  	ambient := log.AmbientContext{Tracer: tracing.NewTracer()}
    93  	tsf := kvcoord.NewTxnCoordSenderFactory(
    94  		kvcoord.TxnCoordSenderFactoryConfig{
    95  			AmbientCtx: ambient,
    96  			// Short heartbeat interval.
    97  			HeartbeatInterval: time.Millisecond,
    98  			Settings:          s.ClusterSettings(),
    99  			Clock:             s.Clock(),
   100  			Stopper:           s.Stopper(),
   101  		},
   102  		s.DistSenderI().(*kvcoord.DistSender),
   103  	)
   104  	shortDB := kv.NewDB(ambient, tsf, s.Clock())
   105  
   106  	iter := 0
   107  	// We'll trace to make sure the test isn't fooling itself.
   108  	runningCtx, getRec, cancel := tracing.ContextWithRecordingSpan(ctx, "test")
   109  	defer cancel()
   110  	err = shortDB.Txn(runningCtx, func(ctx context.Context, txn *kv.Txn) error {
   111  		iter++
   112  		if iter == 1 {
   113  			// On the first iteration, abort the txn.
   114  
   115  			if err := txn.Put(ctx, key, "val"); err != nil {
   116  				t.Fatal(err)
   117  			}
   118  
   119  			if err := push(ctx, key); err != nil {
   120  				t.Fatal(err)
   121  			}
   122  
   123  			// Now wait until the heartbeat loop notices that the transaction is aborted.
   124  			testutils.SucceedsSoon(t, func() error {
   125  				if txn.Sender().(*kvcoord.TxnCoordSender).IsTracking() {
   126  					return fmt.Errorf("txn heartbeat loop running")
   127  				}
   128  				return nil
   129  			})
   130  		}
   131  
   132  		// Create and run a DistSQL plan.
   133  		rw := newCallbackResultWriter(func(ctx context.Context, row tree.Datums) error {
   134  			return nil
   135  		})
   136  		recv := MakeDistSQLReceiver(
   137  			ctx,
   138  			rw,
   139  			stmt.AST.StatementType(),
   140  			execCfg.RangeDescriptorCache,
   141  			execCfg.LeaseHolderCache,
   142  			txn,
   143  			func(ts hlc.Timestamp) {
   144  				execCfg.Clock.Update(ts)
   145  			},
   146  			p.ExtendedEvalContext().Tracing,
   147  		)
   148  
   149  		// We need to re-plan every time, since close() below makes
   150  		// the plan unusable across retries.
   151  		p.stmt = &Statement{Statement: stmt}
   152  		if err := p.makeOptimizerPlan(ctx); err != nil {
   153  			t.Fatal(err)
   154  		}
   155  		defer p.curPlan.close(ctx)
   156  
   157  		evalCtx := p.ExtendedEvalContext()
   158  		// We need distribute = true so that executing the plan involves marshaling
   159  		// the root txn meta to leaf txns. Local flows can start in aborted txns
   160  		// because they just use the root txn.
   161  		planCtx := execCfg.DistSQLPlanner.NewPlanningCtx(ctx, evalCtx, nil /* txn */, true /* distribute */)
   162  		planCtx.planner = p
   163  		planCtx.stmtType = recv.stmtType
   164  
   165  		execCfg.DistSQLPlanner.PlanAndRun(
   166  			ctx, evalCtx, planCtx, txn, p.curPlan.main, recv,
   167  		)()
   168  		return rw.Err()
   169  	})
   170  	if err != nil {
   171  		t.Fatal(err)
   172  	}
   173  	if iter != 2 {
   174  		t.Fatalf("expected two iterations, but txn took %d to succeed", iter)
   175  	}
   176  	if tracing.FindMsgInRecording(getRec(), clientRejectedMsg) == -1 {
   177  		t.Fatalf("didn't find expected message in trace: %s", clientRejectedMsg)
   178  	}
   179  }
   180  
   181  // Test that the DistSQLReceiver overwrites previous errors as "better" errors
   182  // come along.
   183  func TestDistSQLReceiverErrorRanking(t *testing.T) {
   184  	defer leaktest.AfterTest(t)()
   185  
   186  	// This test goes through the trouble of creating a server because it wants to
   187  	// create a txn. It creates the txn because it wants to test an interaction
   188  	// between the DistSQLReceiver and the TxnCoordSender: the DistSQLReceiver
   189  	// will feed retriable errors to the TxnCoordSender which will change those
   190  	// errors to TransactionRetryWithProtoRefreshError.
   191  	ctx := context.Background()
   192  	s, _, db := serverutils.StartServer(t, base.TestServerArgs{})
   193  	defer s.Stopper().Stop(ctx)
   194  
   195  	txn := kv.NewTxn(ctx, db, s.NodeID())
   196  
   197  	// We're going to use a rowResultWriter to which only errors will be passed.
   198  	rw := newCallbackResultWriter(nil /* fn */)
   199  	recv := MakeDistSQLReceiver(
   200  		ctx,
   201  		rw,
   202  		tree.Rows, /* StatementType */
   203  		nil,       /* rangeCache */
   204  		nil,       /* leaseCache */
   205  		txn,
   206  		func(hlc.Timestamp) {}, /* updateClock */
   207  		&SessionTracing{},
   208  	)
   209  
   210  	retryErr := roachpb.NewErrorWithTxn(
   211  		roachpb.NewTransactionRetryError(
   212  			roachpb.RETRY_SERIALIZABLE, "test err"),
   213  		txn.TestingCloneTxn()).GoError()
   214  
   215  	abortErr := roachpb.NewErrorWithTxn(
   216  		roachpb.NewTransactionAbortedError(
   217  			roachpb.ABORT_REASON_ABORTED_RECORD_FOUND),
   218  		txn.TestingCloneTxn()).GoError()
   219  
   220  	errs := []struct {
   221  		err    error
   222  		expErr string
   223  	}{
   224  		{
   225  			// Initial error, retriable.
   226  			err:    retryErr,
   227  			expErr: "TransactionRetryWithProtoRefreshError: TransactionRetryError",
   228  		},
   229  		{
   230  			// A non-retriable error overwrites a retriable one.
   231  			err:    fmt.Errorf("err1"),
   232  			expErr: "err1",
   233  		},
   234  		{
   235  			// Another non-retriable error doesn't overwrite the previous one.
   236  			err:    fmt.Errorf("err2"),
   237  			expErr: "err1",
   238  		},
   239  		{
   240  			// A TransactionAbortedError overwrites anything.
   241  			err:    abortErr,
   242  			expErr: "TransactionRetryWithProtoRefreshError: TransactionAbortedError",
   243  		},
   244  		{
   245  			// A non-aborted retriable error does not overried the
   246  			// TransactionAbortedError.
   247  			err:    retryErr,
   248  			expErr: "TransactionRetryWithProtoRefreshError: TransactionAbortedError",
   249  		},
   250  	}
   251  
   252  	for i, tc := range errs {
   253  		recv.Push(nil, /* row */
   254  			&execinfrapb.ProducerMetadata{
   255  				Err: tc.err,
   256  			})
   257  		if !testutils.IsError(rw.Err(), tc.expErr) {
   258  			t.Fatalf("%d: expected %s, got %s", i, tc.expErr, rw.Err())
   259  		}
   260  	}
   261  }