github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/ccl/importccl/import_into_test.go (about)

     1  // Copyright 2020 The Cockroach Authors.
     2  //
     3  // Licensed as a CockroachDB Enterprise file under the Cockroach Community
     4  // License (the "License"); you may not use this file except in compliance with
     5  // the License. You may obtain a copy of the License at
     6  //
     7  //     https://github.com/cockroachdb/cockroach/blob/master/licenses/CCL.txt
     8  
     9  package importccl_test
    10  
    11  import (
    12  	"context"
    13  	"fmt"
    14  	"net/http"
    15  	"net/http/httptest"
    16  	"regexp"
    17  	"strings"
    18  	"testing"
    19  	"time"
    20  
    21  	"github.com/cockroachdb/cockroach/pkg/base"
    22  	"github.com/cockroachdb/cockroach/pkg/kv/kvserver"
    23  	"github.com/cockroachdb/cockroach/pkg/roachpb"
    24  	"github.com/cockroachdb/cockroach/pkg/testutils"
    25  	"github.com/cockroachdb/cockroach/pkg/testutils/serverutils"
    26  	"github.com/cockroachdb/cockroach/pkg/testutils/sqlutils"
    27  	"github.com/cockroachdb/cockroach/pkg/testutils/testcluster"
    28  	"github.com/cockroachdb/cockroach/pkg/util/leaktest"
    29  	"github.com/cockroachdb/cockroach/pkg/util/randutil"
    30  	"github.com/cockroachdb/errors"
    31  	"github.com/stretchr/testify/require"
    32  )
    33  
    34  // TestProtectedTimestampsDuringImportInto ensures that the timestamp at which
    35  // a table is taken offline is protected during an IMPORT INTO job to ensure
    36  // that if data is imported into a range it can be reverted in the case of
    37  // cancelation or failure.
    38  func TestProtectedTimestampsDuringImportInto(t *testing.T) {
    39  	defer leaktest.AfterTest(t)()
    40  
    41  	// A sketch of the test is as follows:
    42  	//
    43  	//  * Create a table foo to import into.
    44  	//  * Set a 1 second gcttl for foo.
    45  	//  * Start an import into with two HTTP backed CSV files where
    46  	//    one server will serve a row and the other will block until
    47  	//    it's signaled.
    48  	//  * Manually enqueue the ranges for GC and ensure that at least one
    49  	//    range ran the GC.
    50  	//  * Force the IMPORT to fail.
    51  	//  * Ensure that it was rolled back.
    52  	//  * Ensure that we can GC after the job has finished.
    53  
    54  	ctx, cancel := context.WithCancel(context.Background())
    55  	defer cancel()
    56  	args := base.TestClusterArgs{}
    57  	tc := testcluster.StartTestCluster(t, 3, args)
    58  	defer tc.Stopper().Stop(ctx)
    59  
    60  	tc.WaitForNodeLiveness(t)
    61  	require.NoError(t, tc.WaitForFullReplication())
    62  
    63  	conn := tc.ServerConn(0)
    64  	runner := sqlutils.MakeSQLRunner(conn)
    65  	runner.Exec(t, "CREATE TABLE foo (k INT PRIMARY KEY, v BYTES)")
    66  	runner.Exec(t, "SET CLUSTER SETTING kv.protectedts.poll_interval = '100ms';")
    67  	runner.Exec(t, "ALTER TABLE foo CONFIGURE ZONE USING gc.ttlseconds = 1;")
    68  	rRand, _ := randutil.NewPseudoRand()
    69  	writeGarbage := func(from, to int) {
    70  		for i := from; i < to; i++ {
    71  			runner.Exec(t, "UPSERT INTO foo VALUES ($1, $2)", i, randutil.RandBytes(rRand, 1<<10))
    72  		}
    73  	}
    74  	writeGarbage(3, 10)
    75  	rowsBeforeImportInto := runner.QueryStr(t, "SELECT * FROM foo")
    76  
    77  	mkServer := func(method string, handler func(w http.ResponseWriter, r *http.Request)) *httptest.Server {
    78  		return httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
    79  			if r.Method == method {
    80  				handler(w, r)
    81  			}
    82  		}))
    83  	}
    84  	srv1 := mkServer("GET", func(w http.ResponseWriter, r *http.Request) {
    85  		_, _ = w.Write([]byte("1,asdfasdfasdfasdf"))
    86  	})
    87  	defer srv1.Close()
    88  	// Let's start an import into this table of ours.
    89  	allowResponse := make(chan struct{})
    90  	srv2 := mkServer("GET", func(w http.ResponseWriter, r *http.Request) {
    91  		select {
    92  		case <-allowResponse:
    93  		case <-ctx.Done(): // Deal with test failures.
    94  		}
    95  		w.WriteHeader(500)
    96  	})
    97  	defer srv2.Close()
    98  
    99  	importErrCh := make(chan error, 1)
   100  	go func() {
   101  		_, err := conn.Exec(`IMPORT INTO foo (k, v) CSV DATA ($1, $2)`,
   102  			srv1.URL, srv2.URL)
   103  		importErrCh <- err
   104  	}()
   105  
   106  	var jobID string
   107  	testutils.SucceedsSoon(t, func() error {
   108  		row := conn.QueryRow("SELECT job_id FROM [SHOW JOBS] ORDER BY created DESC LIMIT 1")
   109  		return row.Scan(&jobID)
   110  	})
   111  
   112  	time.Sleep(3 * time.Second) // Wait for the data to definitely be expired and GC to run.
   113  	gcTable := func(skipShouldQueue bool) (traceStr string) {
   114  		rows := runner.Query(t, "SELECT start_key"+
   115  			" FROM crdb_internal.ranges_no_leases"+
   116  			" WHERE table_name = $1"+
   117  			" AND database_name = current_database()"+
   118  			" ORDER BY start_key ASC", "foo")
   119  		var traceBuf strings.Builder
   120  		for rows.Next() {
   121  			var startKey roachpb.Key
   122  			require.NoError(t, rows.Scan(&startKey))
   123  			r := tc.LookupRangeOrFatal(t, startKey)
   124  			l, _, err := tc.FindRangeLease(r, nil)
   125  			require.NoError(t, err)
   126  			lhServer := tc.Server(int(l.Replica.NodeID) - 1)
   127  			s, repl := getFirstStoreReplica(t, lhServer, startKey)
   128  			trace, _, err := s.ManuallyEnqueue(ctx, "gc", repl, skipShouldQueue)
   129  			require.NoError(t, err)
   130  			fmt.Fprintf(&traceBuf, "%s\n", trace.String())
   131  		}
   132  		require.NoError(t, rows.Err())
   133  		return traceBuf.String()
   134  	}
   135  
   136  	// We should have refused to GC over the timestamp which we needed to protect.
   137  	gcTable(true /* skipShouldQueue */)
   138  
   139  	// Unblock the blocked import request.
   140  	close(allowResponse)
   141  
   142  	require.Regexp(t, "error response from server: 500 Internal Server Error", <-importErrCh)
   143  
   144  	runner.CheckQueryResultsRetry(t, "SELECT * FROM foo", rowsBeforeImportInto)
   145  
   146  	// Write some fresh garbage.
   147  
   148  	// Wait for the ranges to learn about the removed record and ensure that we
   149  	// can GC from the range soon.
   150  	// This regex matches when all float priorities other than 0.00000. It does
   151  	// this by matching either a float >= 1 (e.g. 1230.012) or a float < 1 (e.g.
   152  	// 0.000123).
   153  	matchNonZero := "[1-9]\\d*\\.\\d+|0\\.\\d*[1-9]\\d*"
   154  	nonZeroProgressRE := regexp.MustCompile(fmt.Sprintf("priority=(%s)", matchNonZero))
   155  	testutils.SucceedsSoon(t, func() error {
   156  		writeGarbage(3, 10)
   157  		if trace := gcTable(false /* skipShouldQueue */); !nonZeroProgressRE.MatchString(trace) {
   158  			return fmt.Errorf("expected %v in trace: %v", nonZeroProgressRE, trace)
   159  		}
   160  		return nil
   161  	})
   162  }
   163  
   164  func getFirstStoreReplica(
   165  	t *testing.T, s serverutils.TestServerInterface, key roachpb.Key,
   166  ) (*kvserver.Store, *kvserver.Replica) {
   167  	t.Helper()
   168  	store, err := s.GetStores().(*kvserver.Stores).GetStore(s.GetFirstStoreID())
   169  	require.NoError(t, err)
   170  	var repl *kvserver.Replica
   171  	testutils.SucceedsSoon(t, func() error {
   172  		repl = store.LookupReplica(roachpb.RKey(key))
   173  		if repl == nil {
   174  			return errors.New(`could not find replica`)
   175  		}
   176  		return nil
   177  	})
   178  	return store, repl
   179  }