github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/distsql_running_test.go (about) 1 // Copyright 2018 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package sql 12 13 import ( 14 "context" 15 "fmt" 16 "testing" 17 "time" 18 19 "github.com/cockroachdb/cockroach/pkg/base" 20 "github.com/cockroachdb/cockroach/pkg/kv" 21 "github.com/cockroachdb/cockroach/pkg/kv/kvclient/kvcoord" 22 "github.com/cockroachdb/cockroach/pkg/roachpb" 23 "github.com/cockroachdb/cockroach/pkg/security" 24 "github.com/cockroachdb/cockroach/pkg/sql/execinfrapb" 25 "github.com/cockroachdb/cockroach/pkg/sql/parser" 26 "github.com/cockroachdb/cockroach/pkg/sql/sem/tree" 27 "github.com/cockroachdb/cockroach/pkg/testutils" 28 "github.com/cockroachdb/cockroach/pkg/testutils/serverutils" 29 "github.com/cockroachdb/cockroach/pkg/util/hlc" 30 "github.com/cockroachdb/cockroach/pkg/util/leaktest" 31 "github.com/cockroachdb/cockroach/pkg/util/log" 32 "github.com/cockroachdb/cockroach/pkg/util/tracing" 33 ) 34 35 // Test that we don't attempt to create flows in an aborted transaction. 36 // Instead, a retryable error is created on the gateway. The point is to 37 // simulate a race where the heartbeat loop finds out that the txn is aborted 38 // just before a plan starts execution and check that we don't create flows in 39 // an aborted txn (which isn't allowed). Note that, once running, each flow can 40 // discover on its own that its txn is aborted - that's handled separately. But 41 // flows can't start in a txn that's already known to be aborted. 42 // 43 // We test this by manually aborting a txn and then attempting to execute a plan 44 // in it. We're careful to not use the transaction for anything but running the 45 // plan; planning will be performed outside of the transaction. 46 func TestDistSQLRunningInAbortedTxn(t *testing.T) { 47 defer leaktest.AfterTest(t)() 48 49 ctx := context.Background() 50 s, sqlDB, db := serverutils.StartServer(t, base.TestServerArgs{}) 51 defer s.Stopper().Stop(ctx) 52 53 if _, err := sqlDB.ExecContext( 54 ctx, "create database test; create table test.t(a int)"); err != nil { 55 t.Fatal(err) 56 } 57 key := roachpb.Key("a") 58 59 // Plan a statement. 60 execCfg := s.ExecutorConfig().(ExecutorConfig) 61 internalPlanner, cleanup := NewInternalPlanner( 62 "test", 63 kv.NewTxn(ctx, db, s.NodeID()), 64 security.RootUser, 65 &MemoryMetrics{}, 66 &execCfg, 67 ) 68 defer cleanup() 69 p := internalPlanner.(*planner) 70 query := "select * from test.t" 71 stmt, err := parser.ParseOne(query) 72 if err != nil { 73 t.Fatal(err) 74 } 75 76 push := func(ctx context.Context, key roachpb.Key) error { 77 // Conflicting transaction that pushes another transaction. 78 conflictTxn := kv.NewTxn(ctx, db, 0 /* gatewayNodeID */) 79 // We need to explicitly set a high priority for the push to happen. 80 if err := conflictTxn.SetUserPriority(roachpb.MaxUserPriority); err != nil { 81 return err 82 } 83 // Push through a Put, as opposed to a Get, so that the pushee gets aborted. 84 if err := conflictTxn.Put(ctx, key, "pusher was here"); err != nil { 85 return err 86 } 87 return conflictTxn.CommitOrCleanup(ctx) 88 } 89 90 // Make a db with a short heartbeat interval, so that the aborted txn finds 91 // out quickly. 92 ambient := log.AmbientContext{Tracer: tracing.NewTracer()} 93 tsf := kvcoord.NewTxnCoordSenderFactory( 94 kvcoord.TxnCoordSenderFactoryConfig{ 95 AmbientCtx: ambient, 96 // Short heartbeat interval. 97 HeartbeatInterval: time.Millisecond, 98 Settings: s.ClusterSettings(), 99 Clock: s.Clock(), 100 Stopper: s.Stopper(), 101 }, 102 s.DistSenderI().(*kvcoord.DistSender), 103 ) 104 shortDB := kv.NewDB(ambient, tsf, s.Clock()) 105 106 iter := 0 107 // We'll trace to make sure the test isn't fooling itself. 108 runningCtx, getRec, cancel := tracing.ContextWithRecordingSpan(ctx, "test") 109 defer cancel() 110 err = shortDB.Txn(runningCtx, func(ctx context.Context, txn *kv.Txn) error { 111 iter++ 112 if iter == 1 { 113 // On the first iteration, abort the txn. 114 115 if err := txn.Put(ctx, key, "val"); err != nil { 116 t.Fatal(err) 117 } 118 119 if err := push(ctx, key); err != nil { 120 t.Fatal(err) 121 } 122 123 // Now wait until the heartbeat loop notices that the transaction is aborted. 124 testutils.SucceedsSoon(t, func() error { 125 if txn.Sender().(*kvcoord.TxnCoordSender).IsTracking() { 126 return fmt.Errorf("txn heartbeat loop running") 127 } 128 return nil 129 }) 130 } 131 132 // Create and run a DistSQL plan. 133 rw := newCallbackResultWriter(func(ctx context.Context, row tree.Datums) error { 134 return nil 135 }) 136 recv := MakeDistSQLReceiver( 137 ctx, 138 rw, 139 stmt.AST.StatementType(), 140 execCfg.RangeDescriptorCache, 141 execCfg.LeaseHolderCache, 142 txn, 143 func(ts hlc.Timestamp) { 144 execCfg.Clock.Update(ts) 145 }, 146 p.ExtendedEvalContext().Tracing, 147 ) 148 149 // We need to re-plan every time, since close() below makes 150 // the plan unusable across retries. 151 p.stmt = &Statement{Statement: stmt} 152 if err := p.makeOptimizerPlan(ctx); err != nil { 153 t.Fatal(err) 154 } 155 defer p.curPlan.close(ctx) 156 157 evalCtx := p.ExtendedEvalContext() 158 // We need distribute = true so that executing the plan involves marshaling 159 // the root txn meta to leaf txns. Local flows can start in aborted txns 160 // because they just use the root txn. 161 planCtx := execCfg.DistSQLPlanner.NewPlanningCtx(ctx, evalCtx, nil /* txn */, true /* distribute */) 162 planCtx.planner = p 163 planCtx.stmtType = recv.stmtType 164 165 execCfg.DistSQLPlanner.PlanAndRun( 166 ctx, evalCtx, planCtx, txn, p.curPlan.main, recv, 167 )() 168 return rw.Err() 169 }) 170 if err != nil { 171 t.Fatal(err) 172 } 173 if iter != 2 { 174 t.Fatalf("expected two iterations, but txn took %d to succeed", iter) 175 } 176 if tracing.FindMsgInRecording(getRec(), clientRejectedMsg) == -1 { 177 t.Fatalf("didn't find expected message in trace: %s", clientRejectedMsg) 178 } 179 } 180 181 // Test that the DistSQLReceiver overwrites previous errors as "better" errors 182 // come along. 183 func TestDistSQLReceiverErrorRanking(t *testing.T) { 184 defer leaktest.AfterTest(t)() 185 186 // This test goes through the trouble of creating a server because it wants to 187 // create a txn. It creates the txn because it wants to test an interaction 188 // between the DistSQLReceiver and the TxnCoordSender: the DistSQLReceiver 189 // will feed retriable errors to the TxnCoordSender which will change those 190 // errors to TransactionRetryWithProtoRefreshError. 191 ctx := context.Background() 192 s, _, db := serverutils.StartServer(t, base.TestServerArgs{}) 193 defer s.Stopper().Stop(ctx) 194 195 txn := kv.NewTxn(ctx, db, s.NodeID()) 196 197 // We're going to use a rowResultWriter to which only errors will be passed. 198 rw := newCallbackResultWriter(nil /* fn */) 199 recv := MakeDistSQLReceiver( 200 ctx, 201 rw, 202 tree.Rows, /* StatementType */ 203 nil, /* rangeCache */ 204 nil, /* leaseCache */ 205 txn, 206 func(hlc.Timestamp) {}, /* updateClock */ 207 &SessionTracing{}, 208 ) 209 210 retryErr := roachpb.NewErrorWithTxn( 211 roachpb.NewTransactionRetryError( 212 roachpb.RETRY_SERIALIZABLE, "test err"), 213 txn.TestingCloneTxn()).GoError() 214 215 abortErr := roachpb.NewErrorWithTxn( 216 roachpb.NewTransactionAbortedError( 217 roachpb.ABORT_REASON_ABORTED_RECORD_FOUND), 218 txn.TestingCloneTxn()).GoError() 219 220 errs := []struct { 221 err error 222 expErr string 223 }{ 224 { 225 // Initial error, retriable. 226 err: retryErr, 227 expErr: "TransactionRetryWithProtoRefreshError: TransactionRetryError", 228 }, 229 { 230 // A non-retriable error overwrites a retriable one. 231 err: fmt.Errorf("err1"), 232 expErr: "err1", 233 }, 234 { 235 // Another non-retriable error doesn't overwrite the previous one. 236 err: fmt.Errorf("err2"), 237 expErr: "err1", 238 }, 239 { 240 // A TransactionAbortedError overwrites anything. 241 err: abortErr, 242 expErr: "TransactionRetryWithProtoRefreshError: TransactionAbortedError", 243 }, 244 { 245 // A non-aborted retriable error does not overried the 246 // TransactionAbortedError. 247 err: retryErr, 248 expErr: "TransactionRetryWithProtoRefreshError: TransactionAbortedError", 249 }, 250 } 251 252 for i, tc := range errs { 253 recv.Push(nil, /* row */ 254 &execinfrapb.ProducerMetadata{ 255 Err: tc.err, 256 }) 257 if !testutils.IsError(rw.Err(), tc.expErr) { 258 t.Fatalf("%d: expected %s, got %s", i, tc.expErr, rw.Err()) 259 } 260 } 261 }