github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/ambiguous_commit_test.go (about)

     1  // Copyright 2016 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package sql_test
    12  
    13  import (
    14  	"bytes"
    15  	"context"
    16  	"sync/atomic"
    17  	"testing"
    18  
    19  	"github.com/cockroachdb/cockroach/pkg/base"
    20  	"github.com/cockroachdb/cockroach/pkg/keys"
    21  	"github.com/cockroachdb/cockroach/pkg/kv/kvclient/kvcoord"
    22  	"github.com/cockroachdb/cockroach/pkg/kv/kvserver"
    23  	"github.com/cockroachdb/cockroach/pkg/roachpb"
    24  	"github.com/cockroachdb/cockroach/pkg/rpc/nodedialer"
    25  	"github.com/cockroachdb/cockroach/pkg/sql"
    26  	"github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgcode"
    27  	"github.com/cockroachdb/cockroach/pkg/sql/sessiondata"
    28  	"github.com/cockroachdb/cockroach/pkg/testutils"
    29  	"github.com/cockroachdb/cockroach/pkg/testutils/sqlutils"
    30  	"github.com/cockroachdb/cockroach/pkg/testutils/testcluster"
    31  	"github.com/cockroachdb/cockroach/pkg/util/leaktest"
    32  	"github.com/cockroachdb/errors"
    33  	"github.com/lib/pq"
    34  )
    35  
    36  type interceptingTransport struct {
    37  	kvcoord.Transport
    38  	sendNext func(context.Context, roachpb.BatchRequest) (*roachpb.BatchResponse, error)
    39  }
    40  
    41  func (t *interceptingTransport) SendNext(
    42  	ctx context.Context, ba roachpb.BatchRequest,
    43  ) (*roachpb.BatchResponse, error) {
    44  	if fn := t.sendNext; fn != nil {
    45  		return fn(ctx, ba)
    46  	} else {
    47  		return t.Transport.SendNext(ctx, ba)
    48  	}
    49  }
    50  
    51  // TestAmbiguousCommit verifies that an ambiguous commit error is returned from
    52  // sql.Exec in situations where an EndTxn is part of a batch and the disposition
    53  // of the batch request is unknown after a network failure or timeout. The goal
    54  // here is to prevent spurious transaction retries after the initial transaction
    55  // actually succeeded. In cases where there's an auto- generated primary key,
    56  // this can result in silent duplications. In cases where the primary key is
    57  // specified in advance, it can result in violated uniqueness constraints, or
    58  // duplicate key violations. See #6053, #7604, and #10023.
    59  func TestAmbiguousCommit(t *testing.T) {
    60  	defer leaktest.AfterTest(t)()
    61  
    62  	testutils.RunTrueAndFalse(t, "ambiguousSuccess", func(t *testing.T, ambiguousSuccess bool) {
    63  		var params base.TestServerArgs
    64  		var processed int32
    65  		var tableStartKey atomic.Value
    66  
    67  		translateToRPCError := roachpb.NewError(errors.Errorf("%s: RPC error: success=%t", t.Name(), ambiguousSuccess))
    68  
    69  		maybeRPCError := func(req *roachpb.ConditionalPutRequest) *roachpb.Error {
    70  			tsk, ok := tableStartKey.Load().(roachpb.Key)
    71  			if !ok {
    72  				return nil
    73  			}
    74  			if !bytes.HasPrefix(req.Header().Key, tsk) {
    75  				return nil
    76  			}
    77  			if atomic.AddInt32(&processed, 1) == 1 {
    78  				return translateToRPCError
    79  			}
    80  			return nil
    81  		}
    82  
    83  		params.Knobs.KVClient = &kvcoord.ClientTestingKnobs{
    84  			TransportFactory: func(
    85  				opts kvcoord.SendOptions, nodeDialer *nodedialer.Dialer, replicas kvcoord.ReplicaSlice,
    86  			) (kvcoord.Transport, error) {
    87  				transport, err := kvcoord.GRPCTransportFactory(opts, nodeDialer, replicas)
    88  				return &interceptingTransport{
    89  					Transport: transport,
    90  					sendNext: func(ctx context.Context, ba roachpb.BatchRequest) (*roachpb.BatchResponse, error) {
    91  						if ambiguousSuccess {
    92  							br, err := transport.SendNext(ctx, ba)
    93  							// During shutdown, we may get responses that
    94  							// have call.Err set and all we have to do is
    95  							// not crash on those.
    96  							//
    97  							// For the rest, compare and perhaps inject an
    98  							// RPC error ourselves.
    99  							if err == nil && br.Error.Equal(translateToRPCError) {
   100  								// Translate the injected error into an RPC
   101  								// error to simulate an ambiguous result.
   102  								return nil, br.Error.GoError()
   103  							}
   104  							return br, err
   105  						} else {
   106  							if req, ok := ba.GetArg(roachpb.ConditionalPut); ok {
   107  								if pErr := maybeRPCError(req.(*roachpb.ConditionalPutRequest)); pErr != nil {
   108  									// Blackhole the RPC and return an
   109  									// error to simulate an ambiguous
   110  									// result.
   111  									return nil, pErr.GoError()
   112  								}
   113  							}
   114  							return transport.SendNext(ctx, ba)
   115  						}
   116  					},
   117  				}, err
   118  			},
   119  		}
   120  
   121  		if ambiguousSuccess {
   122  			params.Knobs.Store = &kvserver.StoreTestingKnobs{
   123  				TestingResponseFilter: func(
   124  					ctx context.Context, args roachpb.BatchRequest, _ *roachpb.BatchResponse,
   125  				) *roachpb.Error {
   126  					if req, ok := args.GetArg(roachpb.ConditionalPut); ok {
   127  						return maybeRPCError(req.(*roachpb.ConditionalPutRequest))
   128  					}
   129  					return nil
   130  				},
   131  			}
   132  		}
   133  
   134  		testClusterArgs := base.TestClusterArgs{
   135  			ReplicationMode: base.ReplicationAuto,
   136  			ServerArgs:      params,
   137  		}
   138  
   139  		const numReplicas = 3
   140  		tc := testcluster.StartTestCluster(t, numReplicas, testClusterArgs)
   141  		defer tc.Stopper().Stop(context.Background())
   142  
   143  		// Avoid distSQL so we can reliably hydrate the intended dist
   144  		// sender's cache below.
   145  		for _, server := range tc.Servers {
   146  			st := server.ClusterSettings()
   147  			st.Manual.Store(true)
   148  			sql.DistSQLClusterExecMode.Override(&st.SV, int64(sessiondata.DistSQLOff))
   149  		}
   150  
   151  		sqlDB := tc.Conns[0]
   152  
   153  		if _, err := sqlDB.Exec(`CREATE DATABASE test`); err != nil {
   154  			t.Fatal(err)
   155  		}
   156  		if _, err := sqlDB.Exec(`CREATE TABLE test.t (k SERIAL PRIMARY KEY, v INT)`); err != nil {
   157  			t.Fatal(err)
   158  		}
   159  
   160  		tableID := sqlutils.QueryTableID(t, sqlDB, "test", "public", "t")
   161  		tableStartKey.Store(keys.SystemSQLCodec.TablePrefix(tableID))
   162  
   163  		// Wait for new table to split & replication.
   164  		if err := tc.WaitForSplitAndInitialization(tableStartKey.Load().(roachpb.Key)); err != nil {
   165  			t.Fatal(err)
   166  		}
   167  
   168  		// Ensure that the dist sender's cache is up to date before
   169  		// fault injection.
   170  		if rows, err := sqlDB.Query(`SELECT * FROM test.t`); err != nil {
   171  			t.Fatal(err)
   172  		} else if err := rows.Close(); err != nil {
   173  			t.Fatal(err)
   174  		}
   175  
   176  		if _, err := sqlDB.Exec(`INSERT INTO test.t (v) VALUES (1)`); ambiguousSuccess {
   177  			if pqErr := (*pq.Error)(nil); errors.As(err, &pqErr) {
   178  				if pqErr.Code != pgcode.StatementCompletionUnknown {
   179  					t.Errorf("expected code %q, got %q (err: %s)",
   180  						pgcode.StatementCompletionUnknown, pqErr.Code, err)
   181  				}
   182  			} else {
   183  				t.Errorf("expected pq error; got %v", err)
   184  			}
   185  		} else {
   186  			if err != nil {
   187  				t.Error(err)
   188  			}
   189  		}
   190  
   191  		// Verify a single row exists in the table.
   192  		var rowCount int
   193  		if err := sqlDB.QueryRow(`SELECT count(*) FROM test.t`).Scan(&rowCount); err != nil {
   194  			t.Fatal(err)
   195  		}
   196  		if e := 1; rowCount != e {
   197  			t.Errorf("expected %d row(s) but found %d", e, rowCount)
   198  		}
   199  	})
   200  }