github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/ccl/importccl/import_stmt_test.go

github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/ccl/importccl/import_stmt_test.go (about)

     1  // Copyright 2017 The Cockroach Authors.
     2  //
     3  // Licensed as a CockroachDB Enterprise file under the Cockroach Community
     4  // License (the "License"); you may not use this file except in compliance with
     5  // the License. You may obtain a copy of the License at
     6  //
     7  //     https://github.com/cockroachdb/cockroach/blob/master/licenses/CCL.txt
     8  
     9  package importccl
    10  
    11  import (
    12  	"bytes"
    13  	"context"
    14  	gosql "database/sql"
    15  	"fmt"
    16  	"io"
    17  	"io/ioutil"
    18  	"math/rand"
    19  	"net/http"
    20  	"net/http/httptest"
    21  	"net/url"
    22  	"path/filepath"
    23  	"strings"
    24  	"sync"
    25  	"testing"
    26  	"time"
    27  
    28  	"github.com/cockroachdb/cockroach/pkg/base"
    29  	"github.com/cockroachdb/cockroach/pkg/ccl/backupccl"
    30  	"github.com/cockroachdb/cockroach/pkg/jobs"
    31  	"github.com/cockroachdb/cockroach/pkg/jobs/jobspb"
    32  	"github.com/cockroachdb/cockroach/pkg/keys"
    33  	"github.com/cockroachdb/cockroach/pkg/kv"
    34  	"github.com/cockroachdb/cockroach/pkg/kv/kvserver"
    35  	"github.com/cockroachdb/cockroach/pkg/roachpb"
    36  	"github.com/cockroachdb/cockroach/pkg/security"
    37  	"github.com/cockroachdb/cockroach/pkg/settings/cluster"
    38  	"github.com/cockroachdb/cockroach/pkg/sql"
    39  	"github.com/cockroachdb/cockroach/pkg/sql/gcjob"
    40  	"github.com/cockroachdb/cockroach/pkg/sql/parser"
    41  	"github.com/cockroachdb/cockroach/pkg/sql/row"
    42  	"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
    43  	"github.com/cockroachdb/cockroach/pkg/sql/sqlbase"
    44  	"github.com/cockroachdb/cockroach/pkg/sql/stats"
    45  	"github.com/cockroachdb/cockroach/pkg/sql/tests"
    46  	"github.com/cockroachdb/cockroach/pkg/testutils"
    47  	"github.com/cockroachdb/cockroach/pkg/testutils/jobutils"
    48  	"github.com/cockroachdb/cockroach/pkg/testutils/serverutils"
    49  	"github.com/cockroachdb/cockroach/pkg/testutils/sqlutils"
    50  	"github.com/cockroachdb/cockroach/pkg/testutils/testcluster"
    51  	"github.com/cockroachdb/cockroach/pkg/util"
    52  	"github.com/cockroachdb/cockroach/pkg/util/ctxgroup"
    53  	"github.com/cockroachdb/cockroach/pkg/util/hlc"
    54  	"github.com/cockroachdb/cockroach/pkg/util/leaktest"
    55  	"github.com/cockroachdb/cockroach/pkg/util/protoutil"
    56  	"github.com/cockroachdb/cockroach/pkg/util/syncutil"
    57  	"github.com/cockroachdb/errors"
    58  	"github.com/jackc/pgx"
    59  	"github.com/stretchr/testify/assert"
    60  	"github.com/stretchr/testify/require"
    61  )
    62  
    63  func TestImportData(t *testing.T) {
    64  	defer leaktest.AfterTest(t)()
    65  
    66  	t.Skipf("failing on teamcity with testrace")
    67  
    68  	s, db, _ := serverutils.StartServer(t, base.TestServerArgs{})
    69  	ctx := context.Background()
    70  	defer s.Stopper().Stop(ctx)
    71  	sqlDB := sqlutils.MakeSQLRunner(db)
    72  
    73  	sqlDB.Exec(t, `SET CLUSTER SETTING kv.bulk_ingest.batch_size = '10KB'`)
    74  
    75  	tests := []struct {
    76  		name     string
    77  		create   string
    78  		with     string
    79  		typ      string
    80  		data     string
    81  		err      string
    82  		rejected string
    83  		query    map[string][][]string
    84  	}{
    85  		{
    86  			name: "duplicate unique index key",
    87  			create: `
    88  				a int8 primary key,
    89  				i int8,
    90  				unique index idx_f (i)
    91  			`,
    92  			typ: "CSV",
    93  			data: `1,1
    94  2,2
    95  3,3
    96  4,3
    97  5,4`,
    98  			err: "duplicate key",
    99  		},
   100  		{
   101  			name: "duplicate PK",
   102  			create: `
   103  				i int8 primary key,
   104  				s string
   105  			`,
   106  			typ: "CSV",
   107  			data: `1, A
   108  2, B
   109  3, C
   110  3, D
   111  4, E`,
   112  			err: "duplicate key",
   113  		},
   114  		{
   115  			name: "duplicate collated string key",
   116  			create: `
   117  				s string collate en_u_ks_level1 primary key
   118  			`,
   119  			typ: "CSV",
   120  			data: `a
   121  B
   122  c
   123  D
   124  d
   125  `,
   126  			err: "duplicate key",
   127  		},
   128  		{
   129  			name: "duplicate PK at sst boundary",
   130  			create: `
   131  				i int8 primary key,
   132  				s string
   133  			`,
   134  			with: `WITH sstsize = '10B'`,
   135  			typ:  "CSV",
   136  			data: `1,0000000000
   137  1,0000000001`,
   138  			err: "duplicate key",
   139  		},
   140  		{
   141  			name: "verify no splits mid row",
   142  			create: `
   143  				i int8 primary key,
   144  				s string,
   145  				b int8,
   146  				c int8,
   147  				index (s),
   148  				index (i, s),
   149  				family (i, b),
   150  				family (s, c)
   151  			`,
   152  			with: `WITH sstsize = '1B'`,
   153  			typ:  "CSV",
   154  			data: `5,STRING,7,9`,
   155  			query: map[string][][]string{
   156  				`SELECT count(*) from t`: {{"1"}},
   157  			},
   158  		},
   159  		{
   160  			name:   "good bytes encoding",
   161  			create: `b bytes`,
   162  			typ:    "CSV",
   163  			data: `\x0143
   164  0143`,
   165  			query: map[string][][]string{
   166  				`SELECT * from t`: {{"\x01C"}, {"0143"}},
   167  			},
   168  		},
   169  		{
   170  			name:     "invalid byte",
   171  			create:   `b bytes`,
   172  			typ:      "CSV",
   173  			data:     `\x0g`,
   174  			rejected: `\x0g` + "\n",
   175  			err:      "invalid byte",
   176  		},
   177  		{
   178  			name:     "bad bytes length",
   179  			create:   `b bytes`,
   180  			typ:      "CSV",
   181  			data:     `\x0`,
   182  			rejected: `\x0` + "\n",
   183  			err:      "odd length hex string",
   184  		},
   185  		{
   186  			name:   "oversample",
   187  			create: `i int8`,
   188  			with:   `WITH oversample = '100'`,
   189  			typ:    "CSV",
   190  			data:   "1",
   191  		},
   192  		{
   193  			name:   "new line characters",
   194  			create: `t text`,
   195  			typ:    "CSV",
   196  			data:   "\"hello\r\nworld\"\n\"friend\nfoe\"\n\"mr\rmrs\"",
   197  			query: map[string][][]string{
   198  				`SELECT t from t`: {{"hello\r\nworld"}, {"friend\nfoe"}, {"mr\rmrs"}},
   199  			},
   200  		},
   201  		{
   202  			name:   "CR in int8, 2 cols",
   203  			create: `a int8, b int8`,
   204  			typ:    "CSV",
   205  			data:   "1,2\r\n3,4\n5,6",
   206  			query: map[string][][]string{
   207  				`SELECT * FROM t ORDER BY a`: {{"1", "2"}, {"3", "4"}, {"5", "6"}},
   208  			},
   209  		},
   210  		{
   211  			name:   "CR in int8, 1 col",
   212  			create: `a int8`,
   213  			typ:    "CSV",
   214  			data:   "1\r\n3\n5",
   215  			query: map[string][][]string{
   216  				`SELECT * FROM t ORDER BY a`: {{"1"}, {"3"}, {"5"}},
   217  			},
   218  		},
   219  		{
   220  			name:   "collated strings",
   221  			create: `s string collate en_u_ks_level1`,
   222  			typ:    "CSV",
   223  			data:   strings.Repeat("1\n", 2000),
   224  			query: map[string][][]string{
   225  				`SELECT s, count(*) FROM t GROUP BY s`: {{"1", "2000"}},
   226  			},
   227  		},
   228  		{
   229  			name:   "quotes are accepted in a quoted string",
   230  			create: `s string`,
   231  			typ:    "CSV",
   232  			data:   `"abc""de"`,
   233  			query: map[string][][]string{
   234  				`SELECT s FROM t`: {{`abc"de`}},
   235  			},
   236  		},
   237  		{
   238  			name:   "bare quote in the middle of a field that is not quoted",
   239  			create: `s string`,
   240  			typ:    "CSV",
   241  			data:   `abc"de`,
   242  			query:  map[string][][]string{`SELECT * from t`: {{`abc"de`}}},
   243  		},
   244  		{
   245  			name:   "strict quotes: bare quote in the middle of a field that is not quoted",
   246  			create: `s string`,
   247  			typ:    "CSV",
   248  			with:   `WITH strict_quotes`,
   249  			data:   `abc"de`,
   250  			err:    `row 1: reading CSV record: parse error on line 1, column 3: bare " in non-quoted-field`,
   251  		},
   252  		{
   253  			name:   "no matching quote in a quoted field",
   254  			create: `s string`,
   255  			typ:    "CSV",
   256  			data:   `"abc"de`,
   257  			query:  map[string][][]string{`SELECT * from t`: {{`abc"de`}}},
   258  		},
   259  		{
   260  			name:   "strict quotes: bare quote in the middle of a quoted field is not ok",
   261  			create: `s string`,
   262  			typ:    "CSV",
   263  			with:   `WITH strict_quotes`,
   264  			data:   `"abc"de"`,
   265  			err:    `row 1: reading CSV record: parse error on line 1, column 4: extraneous or missing " in quoted-field`,
   266  		},
   267  		{
   268  			name:     "too many imported columns",
   269  			create:   `i int8`,
   270  			typ:      "CSV",
   271  			data:     "1,2\n3\n11,22",
   272  			err:      "row 1: expected 1 fields, got 2",
   273  			rejected: "1,2\n11,22\n",
   274  			query:    map[string][][]string{`SELECT * from t`: {{"3"}}},
   275  		},
   276  		{
   277  			name:     "parsing error",
   278  			create:   `i int8, j int8`,
   279  			typ:      "CSV",
   280  			data:     "not_int,2\n3,4",
   281  			err:      `row 1: parse "i" as INT8: could not parse "not_int" as type int`,
   282  			rejected: "not_int,2\n",
   283  			query:    map[string][][]string{`SELECT * from t`: {{"3", "4"}}},
   284  		},
   285  
   286  		// MySQL OUTFILE
   287  		// If err field is non-empty, the query filed specifies what expect
   288  		// to get from the rows that are parsed correctly (see option experimental_save_rejected).
   289  		{
   290  			name:   "empty file",
   291  			create: `a string`,
   292  			typ:    "DELIMITED",
   293  			data:   "",
   294  			query:  map[string][][]string{`SELECT * from t`: {}},
   295  		},
   296  		{
   297  			name:   "empty field",
   298  			create: `a string, b string`,
   299  			typ:    "DELIMITED",
   300  			data:   "\t",
   301  			query:  map[string][][]string{`SELECT * from t`: {{"", ""}}},
   302  		},
   303  		{
   304  			name:   "empty line",
   305  			create: `a string`,
   306  			typ:    "DELIMITED",
   307  			data:   "\n",
   308  			query:  map[string][][]string{`SELECT * from t`: {{""}}},
   309  		},
   310  		{
   311  			name:     "too many imported columns",
   312  			create:   `i int8`,
   313  			typ:      "DELIMITED",
   314  			data:     "1\t2\n3",
   315  			err:      "row 1: too many columns, got 2 expected 1",
   316  			rejected: "1\t2\n",
   317  			query:    map[string][][]string{`SELECT * from t`: {{"3"}}},
   318  		},
   319  		{
   320  			name:     "cannot parse data",
   321  			create:   `i int8, j int8`,
   322  			typ:      "DELIMITED",
   323  			data:     "bad_int\t2\n3\t4",
   324  			err:      "error parsing row 1",
   325  			rejected: "bad_int\t2\n",
   326  			query:    map[string][][]string{`SELECT * from t`: {{"3", "4"}}},
   327  		},
   328  		{
   329  			name:     "unexpected number of columns",
   330  			create:   `a string, b string`,
   331  			typ:      "DELIMITED",
   332  			data:     "1,2\n3\t4",
   333  			err:      "row 1: unexpected number of columns, expected 2 got 1",
   334  			rejected: "1,2\n",
   335  			query:    map[string][][]string{`SELECT * from t`: {{"3", "4"}}},
   336  		},
   337  		{
   338  			name:     "unexpected number of columns in 1st row",
   339  			create:   `a string, b string`,
   340  			typ:      "DELIMITED",
   341  			data:     "1,2\n3\t4",
   342  			err:      "row 1: unexpected number of columns, expected 2 got 1",
   343  			rejected: "1,2\n",
   344  			query:    map[string][][]string{`SELECT * from t`: {{"3", "4"}}},
   345  		},
   346  		{
   347  			name:   "field enclosure",
   348  			create: `a string, b string`,
   349  			with:   `WITH fields_enclosed_by = '$'`,
   350  			typ:    "DELIMITED",
   351  			data:   "$foo$\tnormal",
   352  			query: map[string][][]string{
   353  				`SELECT * from t`: {{"foo", "normal"}},
   354  			},
   355  		},
   356  		{
   357  			name:   "field enclosure in middle of unquoted field",
   358  			create: `a string, b string`,
   359  			with:   `WITH fields_enclosed_by = '$'`,
   360  			typ:    "DELIMITED",
   361  			data:   "fo$o\tb$a$z",
   362  			query: map[string][][]string{
   363  				`SELECT * from t`: {{"fo$o", "b$a$z"}},
   364  			},
   365  		},
   366  		{
   367  			name:   "field enclosure in middle of quoted field",
   368  			create: `a string, b string`,
   369  			with:   `WITH fields_enclosed_by = '$'`,
   370  			typ:    "DELIMITED",
   371  			data:   "$fo$o$\t$b$a$z$",
   372  			query: map[string][][]string{
   373  				`SELECT * from t`: {{"fo$o", "b$a$z"}},
   374  			},
   375  		},
   376  		{
   377  			name:     "unmatched field enclosure",
   378  			create:   `a string, b string`,
   379  			with:     `WITH fields_enclosed_by = '$'`,
   380  			typ:      "DELIMITED",
   381  			data:     "$foo\tnormal\nbaz\tbar",
   382  			err:      "error parsing row 1: unmatched field enclosure at start of field",
   383  			rejected: "$foo\tnormal\nbaz\tbar\n",
   384  			query:    map[string][][]string{`SELECT * from t`: {}},
   385  		},
   386  		{
   387  			name:     "unmatched field enclosure at end",
   388  			create:   `a string, b string`,
   389  			with:     `WITH fields_enclosed_by = '$'`,
   390  			typ:      "DELIMITED",
   391  			data:     "foo$\tnormal\nbar\tbaz",
   392  			err:      "row 1: unmatched field enclosure at end of field",
   393  			rejected: "foo$\tnormal\n",
   394  			query:    map[string][][]string{`SELECT * from t`: {{"bar", "baz"}}},
   395  		},
   396  		{
   397  			name:     "unmatched field enclosure 2nd field",
   398  			create:   `a string, b string`,
   399  			with:     `WITH fields_enclosed_by = '$'`,
   400  			typ:      "DELIMITED",
   401  			data:     "normal\t$foo",
   402  			err:      "row 1: unmatched field enclosure at start of field",
   403  			rejected: "normal\t$foo\n",
   404  			query:    map[string][][]string{`SELECT * from t`: {}},
   405  		},
   406  		{
   407  			name:     "unmatched field enclosure at end 2nd field",
   408  			create:   `a string, b string`,
   409  			with:     `WITH fields_enclosed_by = '$'`,
   410  			typ:      "DELIMITED",
   411  			data:     "normal\tfoo$",
   412  			err:      "row 1: unmatched field enclosure at end of field",
   413  			rejected: "normal\tfoo$\n",
   414  			query:    map[string][][]string{`SELECT * from t`: {}},
   415  		},
   416  		{
   417  			name:     "unmatched literal",
   418  			create:   `i int8`,
   419  			with:     `WITH fields_escaped_by = '\'`,
   420  			typ:      "DELIMITED",
   421  			data:     `\`,
   422  			err:      "row 1: unmatched literal",
   423  			rejected: "\\\n",
   424  			query:    map[string][][]string{`SELECT * from t`: {}},
   425  		},
   426  		{
   427  			name:   "escaped field enclosure",
   428  			create: `a string, b string`,
   429  			with: `WITH fields_enclosed_by = '$', fields_escaped_by = '\',
   430  				    fields_terminated_by = ','`,
   431  			typ:  "DELIMITED",
   432  			data: `\$foo\$,\$baz`,
   433  			query: map[string][][]string{
   434  				`SELECT * from t`: {{"$foo$", "$baz"}},
   435  			},
   436  		},
   437  		{
   438  			name:   "weird escape char",
   439  			create: `s STRING`,
   440  			with:   `WITH fields_escaped_by = '@'`,
   441  			typ:    "DELIMITED",
   442  			data:   "@N\nN@@@\n\nNULL",
   443  			query: map[string][][]string{
   444  				`SELECT COALESCE(s, '(null)') from t`: {{"(null)"}, {"N@\n"}, {"NULL"}},
   445  			},
   446  		},
   447  		{
   448  			name:   `null and \N with escape`,
   449  			create: `s STRING`,
   450  			with:   `WITH fields_escaped_by = '\'`,
   451  			typ:    "DELIMITED",
   452  			data:   "\\N\n\\\\N\nNULL",
   453  			query: map[string][][]string{
   454  				`SELECT COALESCE(s, '(null)') from t`: {{"(null)"}, {`\N`}, {"NULL"}},
   455  			},
   456  		},
   457  		{
   458  			name:     `\N with trailing char`,
   459  			create:   `s STRING`,
   460  			with:     `WITH fields_escaped_by = '\'`,
   461  			typ:      "DELIMITED",
   462  			data:     "\\N1\nfoo",
   463  			err:      "row 1: unexpected data after null encoding",
   464  			rejected: "\\N1\n",
   465  			query:    map[string][][]string{`SELECT * from t`: {{"foo"}}},
   466  		},
   467  		{
   468  			name:     `double null`,
   469  			create:   `s STRING`,
   470  			with:     `WITH fields_escaped_by = '\'`,
   471  			typ:      "DELIMITED",
   472  			data:     `\N\N`,
   473  			err:      "row 1: unexpected null encoding",
   474  			rejected: `\N\N` + "\n",
   475  			query:    map[string][][]string{`SELECT * from t`: {}},
   476  		},
   477  		{
   478  			name:   `null and \N without escape`,
   479  			create: `s STRING`,
   480  			typ:    "DELIMITED",
   481  			data:   "\\N\n\\\\N\nNULL",
   482  			query: map[string][][]string{
   483  				`SELECT COALESCE(s, '(null)') from t`: {{`\N`}, {`\\N`}, {"(null)"}},
   484  			},
   485  		},
   486  		{
   487  			name:   `bytes with escape`,
   488  			create: `b BYTES`,
   489  			typ:    "DELIMITED",
   490  			data:   `\x`,
   491  			query: map[string][][]string{
   492  				`SELECT * from t`: {{`\x`}},
   493  			},
   494  		},
   495  		{
   496  			name:   "skip 0 lines",
   497  			create: `a string, b string`,
   498  			with:   `WITH fields_terminated_by = ',', skip = '0'`,
   499  			typ:    "DELIMITED",
   500  			data:   "foo,normal",
   501  			query: map[string][][]string{
   502  				`SELECT * from t`: {{"foo", "normal"}},
   503  			},
   504  		},
   505  		{
   506  			name:   "skip 1 lines",
   507  			create: `a string, b string`,
   508  			with:   `WITH fields_terminated_by = ',', skip = '1'`,
   509  			typ:    "DELIMITED",
   510  			data:   "a string, b string\nfoo,normal",
   511  			query: map[string][][]string{
   512  				`SELECT * from t`: {{"foo", "normal"}},
   513  			},
   514  		},
   515  		{
   516  			name:   "skip 2 lines",
   517  			create: `a string, b string`,
   518  			with:   `WITH fields_terminated_by = ',', skip = '2'`,
   519  			typ:    "DELIMITED",
   520  			data:   "a string, b string\nfoo,normal\nbar,baz",
   521  			query: map[string][][]string{
   522  				`SELECT * from t`: {{"bar", "baz"}},
   523  			},
   524  		},
   525  		{
   526  			name:   "skip all lines",
   527  			create: `a string, b string`,
   528  			with:   `WITH fields_terminated_by = ',', skip = '3'`,
   529  			typ:    "DELIMITED",
   530  			data:   "a string, b string\nfoo,normal\nbar,baz",
   531  			query: map[string][][]string{
   532  				`SELECT * from t`: {},
   533  			},
   534  		},
   535  		{
   536  			name:   "skip > all lines",
   537  			create: `a string, b string`,
   538  			with:   `WITH fields_terminated_by = ',', skip = '4'`,
   539  			typ:    "DELIMITED",
   540  			data:   "a string, b string\nfoo,normal\nbar,baz",
   541  			query:  map[string][][]string{`SELECT * from t`: {}},
   542  		},
   543  		{
   544  			name:   "skip -1 lines",
   545  			create: `a string, b string`,
   546  			with:   `WITH fields_terminated_by = ',', skip = '-1'`,
   547  			typ:    "DELIMITED",
   548  			data:   "a string, b string\nfoo,normal",
   549  			err:    "pq: skip must be >= 0",
   550  		},
   551  		{
   552  			name:   "nullif empty string",
   553  			create: `a string, b string`,
   554  			with:   `WITH fields_terminated_by = ',', nullif = ''`,
   555  			typ:    "DELIMITED",
   556  			data:   ",normal",
   557  			query: map[string][][]string{
   558  				`SELECT * from t`: {{"NULL", "normal"}},
   559  			},
   560  		},
   561  		{
   562  			name:   "nullif empty string plus escape",
   563  			create: `a INT8, b INT8`,
   564  			with:   `WITH fields_terminated_by = ',', fields_escaped_by = '\', nullif = ''`,
   565  			typ:    "DELIMITED",
   566  			data:   ",4",
   567  			query: map[string][][]string{
   568  				`SELECT * from t`: {{"NULL", "4"}},
   569  			},
   570  		},
   571  		{
   572  			name:   "nullif single char string",
   573  			create: `a string, b string`,
   574  			with:   `WITH fields_terminated_by = ',', nullif = 'f'`,
   575  			typ:    "DELIMITED",
   576  			data:   "f,normal",
   577  			query: map[string][][]string{
   578  				`SELECT * from t`: {{"NULL", "normal"}},
   579  			},
   580  		},
   581  		{
   582  			name:   "nullif multiple char string",
   583  			create: `a string, b string`,
   584  			with:   `WITH fields_terminated_by = ',', nullif = 'foo'`,
   585  			typ:    "DELIMITED",
   586  			data:   "foo,foop",
   587  			query: map[string][][]string{
   588  				`SELECT * from t`: {{"NULL", "foop"}},
   589  			},
   590  		},
   591  
   592  		// PG COPY
   593  		{
   594  			name:   "unexpected escape x",
   595  			create: `b bytes`,
   596  			typ:    "PGCOPY",
   597  			data:   `\x`,
   598  			err:    `row 1: unsupported escape sequence: \\x`,
   599  		},
   600  		{
   601  			name:   "unexpected escape 3",
   602  			create: `b bytes`,
   603  			typ:    "PGCOPY",
   604  			data:   `\3`,
   605  			err:    `row 1: unsupported escape sequence: \\3`,
   606  		},
   607  		{
   608  			name:   "escapes",
   609  			create: `b bytes`,
   610  			typ:    "PGCOPY",
   611  			data:   `\x43\122`,
   612  			query: map[string][][]string{
   613  				`SELECT * from t`: {{"CR"}},
   614  			},
   615  		},
   616  		{
   617  			name:   "normal",
   618  			create: `i int8, s string`,
   619  			typ:    "PGCOPY",
   620  			data:   "1\tSTR\n2\t\\N\n\\N\t\\t",
   621  			query: map[string][][]string{
   622  				`SELECT * from t`: {{"1", "STR"}, {"2", "NULL"}, {"NULL", "\t"}},
   623  			},
   624  		},
   625  		{
   626  			name:   "comma delim",
   627  			create: `i int8, s string`,
   628  			typ:    "PGCOPY",
   629  			with:   `WITH delimiter = ','`,
   630  			data:   "1,STR\n2,\\N\n\\N,\\,",
   631  			query: map[string][][]string{
   632  				`SELECT * from t`: {{"1", "STR"}, {"2", "NULL"}, {"NULL", ","}},
   633  			},
   634  		},
   635  		{
   636  			name:   "size out of range",
   637  			create: `i int8`,
   638  			typ:    "PGCOPY",
   639  			with:   `WITH max_row_size = '10GB'`,
   640  			err:    "max_row_size out of range",
   641  		},
   642  		{
   643  			name:   "line too long",
   644  			create: `i int8`,
   645  			typ:    "PGCOPY",
   646  			data:   "123456",
   647  			with:   `WITH max_row_size = '5B'`,
   648  			err:    "line too long",
   649  		},
   650  		{
   651  			name:   "not enough values",
   652  			typ:    "PGCOPY",
   653  			create: "a INT8, b INT8",
   654  			data:   `1`,
   655  			err:    "expected 2 values, got 1",
   656  		},
   657  		{
   658  			name:   "too many values",
   659  			typ:    "PGCOPY",
   660  			create: "a INT8, b INT8",
   661  			data:   "1\t2\t3",
   662  			err:    "expected 2 values, got 3",
   663  		},
   664  
   665  		// Postgres DUMP
   666  		{
   667  			name: "mismatch cols",
   668  			typ:  "PGDUMP",
   669  			data: `
   670  				CREATE TABLE t (i int8);
   671  				COPY t (s) FROM stdin;
   672  				0
   673  				\.
   674  			`,
   675  			err: `COPY columns do not match table columns for table t`,
   676  		},
   677  		{
   678  			name: "missing COPY done",
   679  			typ:  "PGDUMP",
   680  			data: `
   681  				CREATE TABLE t (i int8);
   682  				COPY t (i) FROM stdin;
   683  0
   684  `,
   685  			err: `unexpected EOF`,
   686  		},
   687  		{
   688  			name: "semicolons and comments",
   689  			typ:  "PGDUMP",
   690  			data: `
   691  				CREATE TABLE t (i int8);
   692  				;;;
   693  				-- nothing ;
   694  				;
   695  				-- blah
   696  			`,
   697  			query: map[string][][]string{
   698  				`SELECT * from t`: {},
   699  			},
   700  		},
   701  		{
   702  			name: "size out of range",
   703  			typ:  "PGDUMP",
   704  			with: `WITH max_row_size = '10GB'`,
   705  			err:  "max_row_size out of range",
   706  		},
   707  		{
   708  			name: "line too long",
   709  			typ:  "PGDUMP",
   710  			data: "CREATE TABLE t (i INT8);",
   711  			with: `WITH max_row_size = '5B'`,
   712  			err:  "line too long",
   713  		},
   714  		{
   715  			name: "not enough values",
   716  			typ:  "PGDUMP",
   717  			data: `
   718  CREATE TABLE t (a INT8, b INT8);
   719  
   720  COPY t (a, b) FROM stdin;
   721  1
   722  \.
   723  			`,
   724  			err: "expected 2 values, got 1",
   725  		},
   726  		{
   727  			name: "too many values",
   728  			typ:  "PGDUMP",
   729  			data: `
   730  CREATE TABLE t (a INT8, b INT8);
   731  
   732  COPY t (a, b) FROM stdin;
   733  1	2	3
   734  \.
   735  			`,
   736  			err: "expected 2 values, got 3",
   737  		},
   738  		{
   739  			name: "too many cols",
   740  			typ:  "PGDUMP",
   741  			data: `
   742  CREATE TABLE t (a INT8, b INT8);
   743  
   744  COPY t (a, b, c) FROM stdin;
   745  1	2	3
   746  \.
   747  			`,
   748  			err: "expected 2 columns, got 3",
   749  		},
   750  		{
   751  			name: "fk",
   752  			typ:  "PGDUMP",
   753  			data: testPgdumpFk,
   754  			query: map[string][][]string{
   755  				`SHOW TABLES`:              {{"public", "cities", "table"}, {"public", "weather", "table"}},
   756  				`SELECT city FROM cities`:  {{"Berkeley"}},
   757  				`SELECT city FROM weather`: {{"Berkeley"}},
   758  
   759  				`SELECT dependson_name
   760  				FROM crdb_internal.backward_dependencies
   761  				`: {{"weather_city_fkey"}},
   762  
   763  				`SELECT create_statement
   764  				FROM crdb_internal.create_statements
   765  				WHERE descriptor_name in ('cities', 'weather')
   766  				ORDER BY descriptor_name
   767  				`: {{testPgdumpCreateCities}, {testPgdumpCreateWeather}},
   768  
   769  				// Verify the constraint is unvalidated.
   770  				`SHOW CONSTRAINTS FROM weather
   771  				`: {{"weather", "weather_city_fkey", "FOREIGN KEY", "FOREIGN KEY (city) REFERENCES cities(city)", "false"}},
   772  			},
   773  		},
   774  		{
   775  			name: "fk-circular",
   776  			typ:  "PGDUMP",
   777  			data: testPgdumpFkCircular,
   778  			query: map[string][][]string{
   779  				`SHOW TABLES`:        {{"public", "a", "table"}, {"public", "b", "table"}},
   780  				`SELECT i, k FROM a`: {{"2", "2"}},
   781  				`SELECT j FROM b`:    {{"2"}},
   782  
   783  				`SELECT dependson_name
   784  				FROM crdb_internal.backward_dependencies ORDER BY dependson_name`: {
   785  					{"a_i_fkey"},
   786  					{"a_k_fkey"},
   787  					{"b_j_fkey"},
   788  				},
   789  
   790  				`SELECT create_statement
   791  				FROM crdb_internal.create_statements
   792  				WHERE descriptor_name in ('a', 'b')
   793  				ORDER BY descriptor_name
   794  				`: {{
   795  					`CREATE TABLE a (
   796  	i INT8 NOT NULL,
   797  	k INT8 NULL,
   798  	CONSTRAINT a_pkey PRIMARY KEY (i ASC),
   799  	CONSTRAINT a_i_fkey FOREIGN KEY (i) REFERENCES b(j),
   800  	CONSTRAINT a_k_fkey FOREIGN KEY (k) REFERENCES a(i),
   801  	INDEX a_auto_index_a_k_fkey (k ASC),
   802  	FAMILY "primary" (i, k)
   803  )`}, {
   804  					`CREATE TABLE b (
   805  	j INT8 NOT NULL,
   806  	CONSTRAINT b_pkey PRIMARY KEY (j ASC),
   807  	CONSTRAINT b_j_fkey FOREIGN KEY (j) REFERENCES a(i),
   808  	FAMILY "primary" (j)
   809  )`,
   810  				}},
   811  
   812  				`SHOW CONSTRAINTS FROM a`: {
   813  					{"a", "a_i_fkey", "FOREIGN KEY", "FOREIGN KEY (i) REFERENCES b(j)", "false"},
   814  					{"a", "a_k_fkey", "FOREIGN KEY", "FOREIGN KEY (k) REFERENCES a(i)", "false"},
   815  					{"a", "a_pkey", "PRIMARY KEY", "PRIMARY KEY (i ASC)", "true"},
   816  				},
   817  				`SHOW CONSTRAINTS FROM b`: {
   818  					{"b", "b_j_fkey", "FOREIGN KEY", "FOREIGN KEY (j) REFERENCES a(i)", "false"},
   819  					{"b", "b_pkey", "PRIMARY KEY", "PRIMARY KEY (j ASC)", "true"},
   820  				},
   821  			},
   822  		},
   823  		{
   824  			name: "fk-skip",
   825  			typ:  "PGDUMP",
   826  			data: testPgdumpFk,
   827  			with: `WITH skip_foreign_keys`,
   828  			query: map[string][][]string{
   829  				`SHOW TABLES`: {{"public", "cities", "table"}, {"public", "weather", "table"}},
   830  				// Verify the constraint is skipped.
   831  				`SELECT dependson_name FROM crdb_internal.backward_dependencies`: {},
   832  				`SHOW CONSTRAINTS FROM weather`:                                  {},
   833  			},
   834  		},
   835  		{
   836  			name: "fk unreferenced",
   837  			typ:  "TABLE weather FROM PGDUMP",
   838  			data: testPgdumpFk,
   839  			err:  `table "cities" not found`,
   840  		},
   841  		{
   842  			name: "fk unreferenced skipped",
   843  			typ:  "TABLE weather FROM PGDUMP",
   844  			data: testPgdumpFk,
   845  			with: `WITH skip_foreign_keys`,
   846  			query: map[string][][]string{
   847  				`SHOW TABLES`: {{"public", "weather", "table"}},
   848  			},
   849  		},
   850  		{
   851  			name: "sequence",
   852  			typ:  "PGDUMP",
   853  			data: `
   854  					CREATE TABLE t (a INT8);
   855  					CREATE SEQUENCE public.i_seq
   856  						START WITH 1
   857  						INCREMENT BY 1
   858  						NO MINVALUE
   859  						NO MAXVALUE
   860  						CACHE 1;
   861  					ALTER SEQUENCE public.i_seq OWNED BY public.i.id;
   862  					ALTER TABLE ONLY t ALTER COLUMN a SET DEFAULT nextval('public.i_seq'::regclass);
   863  					SELECT pg_catalog.setval('public.i_seq', 10, true);
   864  				`,
   865  			query: map[string][][]string{
   866  				`SELECT nextval('i_seq')`:    {{"11"}},
   867  				`SHOW CREATE SEQUENCE i_seq`: {{"i_seq", "CREATE SEQUENCE i_seq MINVALUE 1 MAXVALUE 9223372036854775807 INCREMENT 1 START 1"}},
   868  			},
   869  		},
   870  		{
   871  			name: "non-public schema",
   872  			typ:  "PGDUMP",
   873  			data: "create table s.t (i INT8)",
   874  			err:  `non-public schemas unsupported: s`,
   875  		},
   876  		{
   877  			name: "unsupported type",
   878  			typ:  "PGDUMP",
   879  			data: "create table t (t time with time zone)",
   880  			err: `create table t \(t time with time zone\)
   881                                   \^`,
   882  		},
   883  		{
   884  			name: "various create ignores",
   885  			typ:  "PGDUMP",
   886  			data: `
   887  				CREATE TRIGGER conditions_set_updated_at BEFORE UPDATE ON conditions FOR EACH ROW EXECUTE PROCEDURE set_updated_at();
   888  				REVOKE ALL ON SEQUENCE knex_migrations_id_seq FROM PUBLIC;
   889  				REVOKE ALL ON SEQUENCE knex_migrations_id_seq FROM database;
   890  				GRANT ALL ON SEQUENCE knex_migrations_id_seq TO database;
   891  				GRANT SELECT ON SEQUENCE knex_migrations_id_seq TO opentrials_readonly;
   892  
   893  				CREATE FUNCTION public.isnumeric(text) RETURNS boolean
   894  				    LANGUAGE sql
   895  				    AS $_$
   896  				SELECT $1 ~ '^[0-9]+$'
   897  				$_$;
   898  				ALTER FUNCTION public.isnumeric(text) OWNER TO roland;
   899  
   900  				CREATE TABLE t (i INT8);
   901  			`,
   902  			query: map[string][][]string{
   903  				`SHOW TABLES`: {{"public", "t", "table"}},
   904  			},
   905  		},
   906  		{
   907  			name: "many tables",
   908  			typ:  "PGDUMP",
   909  			data: func() string {
   910  				var sb strings.Builder
   911  				for i := 1; i <= 100; i++ {
   912  					fmt.Fprintf(&sb, "CREATE TABLE t%d ();\n", i)
   913  				}
   914  				return sb.String()
   915  			}(),
   916  		},
   917  
   918  		// Error
   919  		{
   920  			name:   "unsupported import format",
   921  			create: `b bytes`,
   922  			typ:    "NOPE",
   923  			err:    `unsupported import format`,
   924  		},
   925  		{
   926  			name:   "sequences",
   927  			create: `i int8 default nextval('s')`,
   928  			typ:    "CSV",
   929  			err:    `"s" not found`,
   930  		},
   931  	}
   932  
   933  	var mockRecorder struct {
   934  		syncutil.Mutex
   935  		dataString, rejectedString string
   936  	}
   937  	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
   938  		mockRecorder.Lock()
   939  		defer mockRecorder.Unlock()
   940  		if r.Method == "GET" {
   941  			fmt.Fprint(w, mockRecorder.dataString)
   942  		}
   943  		if r.Method == "PUT" {
   944  			body, err := ioutil.ReadAll(r.Body)
   945  			if err != nil {
   946  				panic(err)
   947  			}
   948  			mockRecorder.rejectedString = string(body)
   949  		}
   950  	}))
   951  	defer srv.Close()
   952  
   953  	// Create and drop a table to make sure a descriptor ID gets used to verify
   954  	// ID rewrites happen correctly. Useful when running just a single test.
   955  	sqlDB.Exec(t, `CREATE TABLE blah (i int8)`)
   956  	sqlDB.Exec(t, `DROP TABLE blah`)
   957  
   958  	for _, saveRejected := range []bool{false, true} {
   959  		// this test is big and slow as is, so we can't afford to double it in race.
   960  		if util.RaceEnabled && saveRejected {
   961  			continue
   962  		}
   963  
   964  		for i, tc := range tests {
   965  			if tc.typ != "CSV" && tc.typ != "DELIMITED" && saveRejected {
   966  				continue
   967  			}
   968  			if saveRejected {
   969  				if tc.with == "" {
   970  					tc.with = "WITH experimental_save_rejected"
   971  				} else {
   972  					tc.with += ", experimental_save_rejected"
   973  				}
   974  			}
   975  			t.Run(fmt.Sprintf("%s/%s: save_rejected=%v", tc.typ, tc.name, saveRejected), func(t *testing.T) {
   976  				dbName := fmt.Sprintf("d%d", i)
   977  				sqlDB.Exec(t, fmt.Sprintf(`CREATE DATABASE %s; USE %[1]s`, dbName))
   978  				defer sqlDB.Exec(t, fmt.Sprintf(`DROP DATABASE %s`, dbName))
   979  				var q string
   980  				if tc.create != "" {
   981  					q = fmt.Sprintf(`IMPORT TABLE t (%s) %s DATA ($1) %s`, tc.create, tc.typ, tc.with)
   982  				} else {
   983  					q = fmt.Sprintf(`IMPORT %s ($1) %s`, tc.typ, tc.with)
   984  				}
   985  				t.Log(q, srv.URL, "\nFile contents:\n", tc.data)
   986  				mockRecorder.dataString = tc.data
   987  				mockRecorder.rejectedString = ""
   988  				if !saveRejected || tc.rejected == "" {
   989  					sqlDB.ExpectErr(t, tc.err, q, srv.URL)
   990  				} else {
   991  					sqlDB.Exec(t, q, srv.URL)
   992  				}
   993  				if tc.err == "" || saveRejected {
   994  					for query, res := range tc.query {
   995  						sqlDB.CheckQueryResults(t, query, res)
   996  					}
   997  					if tc.rejected != mockRecorder.rejectedString {
   998  						t.Errorf("expected:\n%q\ngot:\n%q\n", tc.rejected,
   999  							mockRecorder.rejectedString)
  1000  					}
  1001  				}
  1002  			})
  1003  		}
  1004  	}
  1005  
  1006  	t.Run("mysqlout multiple", func(t *testing.T) {
  1007  		sqlDB.Exec(t, `CREATE DATABASE mysqlout; USE mysqlout`)
  1008  		mockRecorder.dataString = "1"
  1009  		sqlDB.Exec(t, `IMPORT TABLE t (s STRING) DELIMITED DATA ($1, $1)`, srv.URL)
  1010  		sqlDB.CheckQueryResults(t, `SELECT * FROM t`, [][]string{{"1"}, {"1"}})
  1011  	})
  1012  }
  1013  
  1014  const (
  1015  	testPgdumpCreateCities = `CREATE TABLE cities (
  1016  	city VARCHAR(80) NOT NULL,
  1017  	CONSTRAINT cities_pkey PRIMARY KEY (city ASC),
  1018  	FAMILY "primary" (city)
  1019  )`
  1020  	testPgdumpCreateWeather = `CREATE TABLE weather (
  1021  	city VARCHAR(80) NULL,
  1022  	temp_lo INT8 NULL,
  1023  	temp_hi INT8 NULL,
  1024  	prcp FLOAT4 NULL,
  1025  	date DATE NULL,
  1026  	CONSTRAINT weather_city_fkey FOREIGN KEY (city) REFERENCES cities(city),
  1027  	INDEX weather_auto_index_weather_city_fkey (city ASC),
  1028  	FAMILY "primary" (city, temp_lo, temp_hi, prcp, date, rowid)
  1029  )`
  1030  	testPgdumpFk = `
  1031  CREATE TABLE public.cities (
  1032      city character varying(80) NOT NULL
  1033  );
  1034  
  1035  ALTER TABLE public.cities OWNER TO postgres;
  1036  
  1037  CREATE TABLE public.weather (
  1038      city character varying(80),
  1039      temp_lo int8,
  1040      temp_hi int8,
  1041      prcp real,
  1042      date date
  1043  );
  1044  
  1045  ALTER TABLE public.weather OWNER TO postgres;
  1046  
  1047  COPY public.cities (city) FROM stdin;
  1048  Berkeley
  1049  \.
  1050  
  1051  COPY public.weather (city, temp_lo, temp_hi, prcp, date) FROM stdin;
  1052  Berkeley	45	53	0	1994-11-28
  1053  \.
  1054  
  1055  ALTER TABLE ONLY public.cities
  1056      ADD CONSTRAINT cities_pkey PRIMARY KEY (city);
  1057  
  1058  ALTER TABLE ONLY public.weather
  1059      ADD CONSTRAINT weather_city_fkey FOREIGN KEY (city) REFERENCES public.cities(city);
  1060  `
  1061  
  1062  	testPgdumpFkCircular = `
  1063  CREATE TABLE public.a (
  1064      i int8 NOT NULL,
  1065      k int8
  1066  );
  1067  
  1068  CREATE TABLE public.b (
  1069      j int8 NOT NULL
  1070  );
  1071  
  1072  COPY public.a (i, k) FROM stdin;
  1073  2	2
  1074  \.
  1075  
  1076  COPY public.b (j) FROM stdin;
  1077  2
  1078  \.
  1079  
  1080  ALTER TABLE ONLY public.a
  1081      ADD CONSTRAINT a_pkey PRIMARY KEY (i);
  1082  
  1083  ALTER TABLE ONLY public.b
  1084      ADD CONSTRAINT b_pkey PRIMARY KEY (j);
  1085  
  1086  ALTER TABLE ONLY public.a
  1087      ADD CONSTRAINT a_i_fkey FOREIGN KEY (i) REFERENCES public.b(j);
  1088  
  1089  ALTER TABLE ONLY public.a
  1090      ADD CONSTRAINT a_k_fkey FOREIGN KEY (k) REFERENCES public.a(i);
  1091  
  1092  ALTER TABLE ONLY public.b
  1093      ADD CONSTRAINT b_j_fkey FOREIGN KEY (j) REFERENCES public.a(i);
  1094  `
  1095  )
  1096  
  1097  func TestImportCSVStmt(t *testing.T) {
  1098  	defer leaktest.AfterTest(t)()
  1099  	if testing.Short() {
  1100  		t.Skip("short")
  1101  	}
  1102  
  1103  	const nodes = 3
  1104  
  1105  	numFiles := nodes + 2
  1106  	rowsPerFile := 1000
  1107  	rowsPerRaceFile := 16
  1108  
  1109  	var forceFailure bool
  1110  	blockGC := make(chan struct{})
  1111  
  1112  	ctx := context.Background()
  1113  	baseDir := filepath.Join("testdata", "csv")
  1114  	tc := testcluster.StartTestCluster(t, nodes, base.TestClusterArgs{ServerArgs: base.TestServerArgs{
  1115  		SQLMemoryPoolSize: 256 << 20,
  1116  		ExternalIODir:     baseDir,
  1117  		Knobs: base.TestingKnobs{
  1118  			GCJob: &sql.GCJobTestingKnobs{RunBeforeResume: func(_ int64) error { <-blockGC; return nil }},
  1119  		},
  1120  	}})
  1121  	defer tc.Stopper().Stop(ctx)
  1122  	conn := tc.Conns[0]
  1123  
  1124  	for i := range tc.Servers {
  1125  		tc.Servers[i].JobRegistry().(*jobs.Registry).TestingResumerCreationKnobs = map[jobspb.Type]func(raw jobs.Resumer) jobs.Resumer{
  1126  			jobspb.TypeImport: func(raw jobs.Resumer) jobs.Resumer {
  1127  				r := raw.(*importResumer)
  1128  				r.testingKnobs.afterImport = func(_ backupccl.RowCount) error {
  1129  					if forceFailure {
  1130  						return errors.New("testing injected failure")
  1131  					}
  1132  					return nil
  1133  				}
  1134  				return r
  1135  			},
  1136  		}
  1137  	}
  1138  
  1139  	sqlDB := sqlutils.MakeSQLRunner(conn)
  1140  	kvDB := tc.Server(0).DB()
  1141  
  1142  	sqlDB.Exec(t, `SET CLUSTER SETTING kv.bulk_ingest.batch_size = '10KB'`)
  1143  
  1144  	testFiles := makeCSVData(t, numFiles, rowsPerFile, nodes, rowsPerRaceFile)
  1145  	if util.RaceEnabled {
  1146  		// This test takes a while with the race detector, so reduce the number of
  1147  		// files and rows per file in an attempt to speed it up.
  1148  		numFiles = nodes
  1149  		rowsPerFile = rowsPerRaceFile
  1150  	}
  1151  
  1152  	// Table schema used in IMPORT TABLE tests.
  1153  	tablePath := filepath.Join(baseDir, "table")
  1154  	if err := ioutil.WriteFile(tablePath, []byte(`
  1155  		CREATE TABLE t (
  1156  			a int8 primary key,
  1157  			b string,
  1158  			index (b),
  1159  			index (a, b)
  1160  		)
  1161  	`), 0666); err != nil {
  1162  		t.Fatal(err)
  1163  	}
  1164  	schema := []interface{}{"nodelocal://0/table"}
  1165  
  1166  	if err := ioutil.WriteFile(filepath.Join(baseDir, "empty.csv"), nil, 0666); err != nil {
  1167  		t.Fatal(err)
  1168  	}
  1169  	empty := []string{"'nodelocal://0/empty.csv'"}
  1170  	emptySchema := []interface{}{"nodelocal://0/empty.schema"}
  1171  
  1172  	// Support subtests by keeping track of the number of jobs that are executed.
  1173  	testNum := -1
  1174  	expectedRows := numFiles * rowsPerFile
  1175  	for i, tc := range []struct {
  1176  		name    string
  1177  		query   string        // must have one `%s` for the files list.
  1178  		args    []interface{} // will have backupPath appended
  1179  		files   []string
  1180  		jobOpts string
  1181  		err     string
  1182  	}{
  1183  		{
  1184  			"schema-in-file",
  1185  			`IMPORT TABLE t CREATE USING $1 CSV DATA (%s)`,
  1186  			schema,
  1187  			testFiles.files,
  1188  			``,
  1189  			"",
  1190  		},
  1191  		{
  1192  			"schema-in-file-intodb",
  1193  			`IMPORT TABLE csv1.t CREATE USING $1 CSV DATA (%s)`,
  1194  			schema,
  1195  			testFiles.files,
  1196  			``,
  1197  			"",
  1198  		},
  1199  		{
  1200  			"schema-in-query",
  1201  			`IMPORT TABLE t (a INT8 PRIMARY KEY, b STRING, INDEX (b), INDEX (a, b)) CSV DATA (%s)`,
  1202  			nil,
  1203  			testFiles.files,
  1204  			``,
  1205  			"",
  1206  		},
  1207  		{
  1208  			"schema-in-query-opts",
  1209  			`IMPORT TABLE t (a INT8 PRIMARY KEY, b STRING, INDEX (b), INDEX (a, b)) CSV DATA (%s) WITH delimiter = '|', comment = '#', nullif='', skip = '2'`,
  1210  			nil,
  1211  			testFiles.filesWithOpts,
  1212  			` WITH comment = '#', delimiter = '|', "nullif" = '', skip = '2'`,
  1213  			"",
  1214  		},
  1215  		{
  1216  			// Force some SST splits.
  1217  			"schema-in-file-sstsize",
  1218  			`IMPORT TABLE t CREATE USING $1 CSV DATA (%s) WITH sstsize = '10K'`,
  1219  			schema,
  1220  			testFiles.files,
  1221  			` WITH sstsize = '10K'`,
  1222  			"",
  1223  		},
  1224  		{
  1225  			"empty-file",
  1226  			`IMPORT TABLE t CREATE USING $1 CSV DATA (%s)`,
  1227  			schema,
  1228  			empty,
  1229  			``,
  1230  			"",
  1231  		},
  1232  		{
  1233  			"empty-with-files",
  1234  			`IMPORT TABLE t CREATE USING $1 CSV DATA (%s)`,
  1235  			schema,
  1236  			append(empty, testFiles.files...),
  1237  			``,
  1238  			"",
  1239  		},
  1240  		{
  1241  			"schema-in-file-auto-decompress",
  1242  			`IMPORT TABLE t CREATE USING $1 CSV DATA (%s) WITH decompress = 'auto'`,
  1243  			schema,
  1244  			testFiles.files,
  1245  			` WITH decompress = 'auto'`,
  1246  			"",
  1247  		},
  1248  		{
  1249  			"schema-in-file-no-decompress",
  1250  			`IMPORT TABLE t CREATE USING $1 CSV DATA (%s) WITH decompress = 'none'`,
  1251  			schema,
  1252  			testFiles.files,
  1253  			` WITH decompress = 'none'`,
  1254  			"",
  1255  		},
  1256  		{
  1257  			"schema-in-file-explicit-gzip",
  1258  			`IMPORT TABLE t CREATE USING $1 CSV DATA (%s) WITH decompress = 'gzip'`,
  1259  			schema,
  1260  			testFiles.gzipFiles,
  1261  			` WITH decompress = 'gzip'`,
  1262  			"",
  1263  		},
  1264  		{
  1265  			"schema-in-file-auto-gzip",
  1266  			`IMPORT TABLE t CREATE USING $1 CSV DATA (%s) WITH decompress = 'auto'`,
  1267  			schema,
  1268  			testFiles.bzipFiles,
  1269  			` WITH decompress = 'auto'`,
  1270  			"",
  1271  		},
  1272  		{
  1273  			"schema-in-file-implicit-gzip",
  1274  			`IMPORT TABLE t CREATE USING $1 CSV DATA (%s)`,
  1275  			schema,
  1276  			testFiles.gzipFiles,
  1277  			``,
  1278  			"",
  1279  		},
  1280  		{
  1281  			"schema-in-file-explicit-bzip",
  1282  			`IMPORT TABLE t CREATE USING $1 CSV DATA (%s) WITH decompress = 'bzip'`,
  1283  			schema,
  1284  			testFiles.bzipFiles,
  1285  			` WITH decompress = 'bzip'`,
  1286  			"",
  1287  		},
  1288  		{
  1289  			"schema-in-file-auto-bzip",
  1290  			`IMPORT TABLE t CREATE USING $1 CSV DATA (%s) WITH decompress = 'auto'`,
  1291  			schema,
  1292  			testFiles.bzipFiles,
  1293  			` WITH decompress = 'auto'`,
  1294  			"",
  1295  		},
  1296  		{
  1297  			"schema-in-file-implicit-bzip",
  1298  			`IMPORT TABLE t CREATE USING $1 CSV DATA (%s)`,
  1299  			schema,
  1300  			testFiles.bzipFiles,
  1301  			``,
  1302  			"",
  1303  		},
  1304  		// NB: successes above, failures below, because we check the i-th job.
  1305  		{
  1306  			"bad-opt-name",
  1307  			`IMPORT TABLE t (a INT8 PRIMARY KEY, b STRING, INDEX (b), INDEX (a, b)) CSV DATA (%s) WITH foo = 'bar'`,
  1308  			nil,
  1309  			testFiles.files,
  1310  			``,
  1311  			"invalid option \"foo\"",
  1312  		},
  1313  		{
  1314  			"bad-computed-column",
  1315  			`IMPORT TABLE t (a INT8 PRIMARY KEY, b STRING AS ('hello') STORED, INDEX (b), INDEX (a, b)) CSV DATA (%s) WITH skip = '2'`,
  1316  			nil,
  1317  			testFiles.filesWithOpts,
  1318  			``,
  1319  			"computed columns not supported",
  1320  		},
  1321  		{
  1322  			"primary-key-dup",
  1323  			`IMPORT TABLE t CREATE USING $1 CSV DATA (%s)`,
  1324  			schema,
  1325  			testFiles.filesWithDups,
  1326  			``,
  1327  			"duplicate key in primary index",
  1328  		},
  1329  		{
  1330  			"no-database",
  1331  			`IMPORT TABLE nonexistent.t CREATE USING $1 CSV DATA (%s)`,
  1332  			schema,
  1333  			testFiles.files,
  1334  			``,
  1335  			`database does not exist: "nonexistent.t"`,
  1336  		},
  1337  		{
  1338  			"into-db-fails",
  1339  			`IMPORT TABLE t CREATE USING $1 CSV DATA (%s) WITH into_db = 'test'`,
  1340  			schema,
  1341  			testFiles.files,
  1342  			``,
  1343  			`invalid option "into_db"`,
  1344  		},
  1345  		{
  1346  			"schema-in-file-no-decompress-gzip",
  1347  			`IMPORT TABLE t CREATE USING $1 CSV DATA (%s) WITH decompress = 'none'`,
  1348  			schema,
  1349  			testFiles.gzipFiles,
  1350  			` WITH decompress = 'none'`,
  1351  			// This returns different errors for `make test` and `make testrace` but
  1352  			// field is in both error messages.
  1353  			`field`,
  1354  		},
  1355  		{
  1356  			"schema-in-file-decompress-gzip",
  1357  			`IMPORT TABLE t CREATE USING $1 CSV DATA (%s) WITH decompress = 'gzip'`,
  1358  			schema,
  1359  			testFiles.files,
  1360  			` WITH decompress = 'gzip'`,
  1361  			"gzip: invalid header",
  1362  		},
  1363  		{
  1364  			"csv-with-invalid-delimited-option",
  1365  			`IMPORT TABLE t CREATE USING $1 CSV DATA (%s) WITH fields_delimited_by = '|'`,
  1366  			schema,
  1367  			testFiles.files,
  1368  			``,
  1369  			"invalid option",
  1370  		},
  1371  		{
  1372  			"empty-schema-in-file",
  1373  			`IMPORT TABLE t CREATE USING $1 CSV DATA (%s)`,
  1374  			emptySchema,
  1375  			testFiles.files,
  1376  			``,
  1377  			"expected 1 create table statement",
  1378  		},
  1379  	} {
  1380  		t.Run(tc.name, func(t *testing.T) {
  1381  			if strings.Contains(tc.name, "bzip") && len(testFiles.bzipFiles) == 0 {
  1382  				t.Skip("bzip2 not available on PATH?")
  1383  			}
  1384  			intodb := fmt.Sprintf(`csv%d`, i)
  1385  			sqlDB.Exec(t, fmt.Sprintf(`CREATE DATABASE %s`, intodb))
  1386  			sqlDB.Exec(t, fmt.Sprintf(`SET DATABASE = %s`, intodb))
  1387  
  1388  			var unused string
  1389  			var restored struct {
  1390  				rows, idx, bytes int
  1391  			}
  1392  
  1393  			var result int
  1394  			query := fmt.Sprintf(tc.query, strings.Join(tc.files, ", "))
  1395  			testNum++
  1396  			if tc.err != "" {
  1397  				sqlDB.ExpectErr(t, tc.err, query, tc.args...)
  1398  				return
  1399  			}
  1400  			sqlDB.QueryRow(t, query, tc.args...).Scan(
  1401  				&unused, &unused, &unused, &restored.rows, &restored.idx, &restored.bytes,
  1402  			)
  1403  
  1404  			jobPrefix := fmt.Sprintf(`IMPORT TABLE %s.public.t (a INT8 PRIMARY KEY, b STRING, INDEX (b), INDEX (a, b))`, intodb)
  1405  
  1406  			if err := jobutils.VerifySystemJob(t, sqlDB, testNum, jobspb.TypeImport, jobs.StatusSucceeded, jobs.Record{
  1407  				Username:    security.RootUser,
  1408  				Description: fmt.Sprintf(jobPrefix+` CSV DATA (%s)`+tc.jobOpts, strings.ReplaceAll(strings.Join(tc.files, ", "), "?AWS_SESSION_TOKEN=secrets", "?AWS_SESSION_TOKEN=redacted")),
  1409  			}); err != nil {
  1410  				t.Fatal(err)
  1411  			}
  1412  
  1413  			isEmpty := len(tc.files) == 1 && tc.files[0] == empty[0]
  1414  
  1415  			if isEmpty {
  1416  				sqlDB.QueryRow(t, `SELECT count(*) FROM t`).Scan(&result)
  1417  				if expect := 0; result != expect {
  1418  					t.Fatalf("expected %d rows, got %d", expect, result)
  1419  				}
  1420  				return
  1421  			}
  1422  
  1423  			if expected, actual := expectedRows, restored.rows; expected != actual {
  1424  				t.Fatalf("expected %d rows, got %d", expected, actual)
  1425  			}
  1426  
  1427  			// Verify correct number of rows via COUNT.
  1428  			sqlDB.QueryRow(t, `SELECT count(*) FROM t`).Scan(&result)
  1429  			if expect := expectedRows; result != expect {
  1430  				t.Fatalf("expected %d rows, got %d", expect, result)
  1431  			}
  1432  
  1433  			// Verify correct number of NULLs via COUNT.
  1434  			sqlDB.QueryRow(t, `SELECT count(*) FROM t WHERE b IS NULL`).Scan(&result)
  1435  			expectedNulls := 0
  1436  			if strings.Contains(tc.query, "nullif") {
  1437  				expectedNulls = expectedRows / 4
  1438  			}
  1439  			if result != expectedNulls {
  1440  				t.Fatalf("expected %d rows, got %d", expectedNulls, result)
  1441  			}
  1442  
  1443  			// Verify sstsize created > 1 SST files.
  1444  			if tc.name == "schema-in-file-sstsize-dist" {
  1445  				pattern := filepath.Join(baseDir, fmt.Sprintf("%d", i), "*.sst")
  1446  				matches, err := filepath.Glob(pattern)
  1447  				if err != nil {
  1448  					t.Fatal(err)
  1449  				}
  1450  				if len(matches) < 2 {
  1451  					t.Fatal("expected > 1 SST files")
  1452  				}
  1453  			}
  1454  
  1455  		})
  1456  	}
  1457  
  1458  	// Verify unique_rowid is replaced for tables without primary keys.
  1459  	t.Run("unique_rowid", func(t *testing.T) {
  1460  		sqlDB.Exec(t, "CREATE DATABASE pk")
  1461  		sqlDB.Exec(t, fmt.Sprintf(`IMPORT TABLE pk.t (a INT8, b STRING) CSV DATA (%s)`, strings.Join(testFiles.files, ", ")))
  1462  		// Verify the rowids are being generated as expected.
  1463  		sqlDB.CheckQueryResults(t,
  1464  			`SELECT count(*) FROM pk.t`,
  1465  			sqlDB.QueryStr(t, `
  1466  				SELECT count(*) FROM
  1467  					(SELECT * FROM
  1468  						(SELECT generate_series(0, $1 - 1) file),
  1469  						(SELECT generate_series(1, $2) rownum)
  1470  					)
  1471  			`, numFiles, rowsPerFile),
  1472  		)
  1473  	})
  1474  
  1475  	// Verify a failed IMPORT won't prevent a second IMPORT.
  1476  	t.Run("checkpoint-leftover", func(t *testing.T) {
  1477  		sqlDB.Exec(t, "CREATE DATABASE checkpoint; USE checkpoint")
  1478  
  1479  		// Specify wrong number of columns.
  1480  		sqlDB.ExpectErr(
  1481  			t, "expected 1 fields, got 2",
  1482  			fmt.Sprintf(`IMPORT TABLE t (a INT8 PRIMARY KEY) CSV DATA (%s)`, testFiles.files[0]),
  1483  		)
  1484  
  1485  		// Specify wrong table name; still shouldn't leave behind a checkpoint file.
  1486  		sqlDB.ExpectErr(
  1487  			t, `file specifies a schema for table t`,
  1488  			fmt.Sprintf(`IMPORT TABLE bad CREATE USING $1 CSV DATA (%s)`, testFiles.files[0]), schema[0],
  1489  		)
  1490  
  1491  		// Expect it to succeed with correct columns.
  1492  		sqlDB.Exec(t, fmt.Sprintf(`IMPORT TABLE t (a INT8 PRIMARY KEY, b STRING) CSV DATA (%s)`, testFiles.files[0]))
  1493  
  1494  		// A second attempt should fail fast. A "slow fail" is the error message
  1495  		// "restoring table desc and namespace entries: table already exists".
  1496  		sqlDB.ExpectErr(
  1497  			t, `relation "t" already exists`,
  1498  			fmt.Sprintf(`IMPORT TABLE t (a INT8 PRIMARY KEY, b STRING) CSV DATA (%s)`, testFiles.files[0]),
  1499  		)
  1500  	})
  1501  
  1502  	// Verify that a failed import will clean up after itself. This means:
  1503  	//  - Delete the garbage data that it partially imported.
  1504  	//  - Delete the table descriptor for the table that was created during the
  1505  	//  import.
  1506  	t.Run("failed-import-gc", func(t *testing.T) {
  1507  		forceFailure = true
  1508  		defer func() { forceFailure = false }()
  1509  		defer gcjob.SetSmallMaxGCIntervalForTest()()
  1510  		beforeImport, err := tree.MakeDTimestampTZ(tc.Server(0).Clock().Now().GoTime(), time.Millisecond)
  1511  		if err != nil {
  1512  			t.Fatal(err)
  1513  		}
  1514  
  1515  		sqlDB.Exec(t, "CREATE DATABASE failedimport; USE failedimport;")
  1516  		// Hit a failure during import.
  1517  		sqlDB.ExpectErr(
  1518  			t, `testing injected failure`,
  1519  			fmt.Sprintf(`IMPORT TABLE t (a INT PRIMARY KEY, b STRING) CSV DATA (%s)`, testFiles.files[1]),
  1520  		)
  1521  		// Nudge the registry to quickly adopt the job.
  1522  		tc.Server(0).JobRegistry().(*jobs.Registry).TestingNudgeAdoptionQueue()
  1523  
  1524  		// In the case of the test, the ID of the table that will be cleaned up due
  1525  		// to the failed import will be one higher than the ID of the empty database
  1526  		// it was created in.
  1527  		dbID := sqlutils.QueryDatabaseID(t, sqlDB.DB, "failedimport")
  1528  		tableID := sqlbase.ID(dbID + 1)
  1529  		var td *sqlbase.TableDescriptor
  1530  		if err := kvDB.Txn(ctx, func(ctx context.Context, txn *kv.Txn) error {
  1531  			var err error
  1532  			td, err = sqlbase.GetTableDescFromID(ctx, txn, keys.SystemSQLCodec, tableID)
  1533  			return err
  1534  		}); err != nil {
  1535  			t.Fatal(err)
  1536  		}
  1537  		// Ensure that we have garbage written to the descriptor that we want to
  1538  		// clean up.
  1539  		tests.CheckKeyCount(t, kvDB, td.TableSpan(keys.SystemSQLCodec), rowsPerFile)
  1540  
  1541  		// Allow GC to progress.
  1542  		close(blockGC)
  1543  		// Ensure that a GC job was created, and wait for it to finish.
  1544  		doneGCQuery := fmt.Sprintf(
  1545  			"SELECT count(*) FROM [SHOW JOBS] WHERE job_type = '%s' AND status = '%s' AND created > %s",
  1546  			"SCHEMA CHANGE GC", jobs.StatusSucceeded, beforeImport.String(),
  1547  		)
  1548  		sqlDB.CheckQueryResultsRetry(t, doneGCQuery, [][]string{{"1"}})
  1549  		// Expect there are no more KVs for this span.
  1550  		tests.CheckKeyCount(t, kvDB, td.TableSpan(keys.SystemSQLCodec), 0)
  1551  		// Expect that the table descriptor is deleted.
  1552  		if err := kvDB.Txn(ctx, func(ctx context.Context, txn *kv.Txn) error {
  1553  			_, err := sqlbase.GetTableDescFromID(ctx, txn, keys.SystemSQLCodec, tableID)
  1554  			if !testutils.IsError(err, "descriptor not found") {
  1555  				return err
  1556  			}
  1557  			return nil
  1558  		}); err != nil {
  1559  			t.Fatal(err)
  1560  		}
  1561  	})
  1562  
  1563  	// Test basic role based access control. Users who have the admin role should
  1564  	// be able to IMPORT.
  1565  	t.Run("RBAC", func(t *testing.T) {
  1566  		sqlDB.Exec(t, `CREATE USER testuser`)
  1567  		sqlDB.Exec(t, `GRANT admin TO testuser`)
  1568  		pgURL, cleanupFunc := sqlutils.PGUrl(
  1569  			t, tc.Server(0).ServingSQLAddr(), "TestImportPrivileges-testuser", url.User("testuser"),
  1570  		)
  1571  		defer cleanupFunc()
  1572  		testuser, err := gosql.Open("postgres", pgURL.String())
  1573  		if err != nil {
  1574  			t.Fatal(err)
  1575  		}
  1576  		defer testuser.Close()
  1577  
  1578  		t.Run("IMPORT TABLE", func(t *testing.T) {
  1579  			if _, err := testuser.Exec(fmt.Sprintf(`IMPORT TABLE rbac_table_t (a INT8 PRIMARY KEY, b STRING) CSV DATA (%s)`, testFiles.files[0])); err != nil {
  1580  				t.Fatal(err)
  1581  			}
  1582  		})
  1583  
  1584  		t.Run("IMPORT INTO", func(t *testing.T) {
  1585  			if _, err := testuser.Exec("CREATE TABLE rbac_into_t (a INT8 PRIMARY KEY, b STRING)"); err != nil {
  1586  				t.Fatal(err)
  1587  			}
  1588  			if _, err := testuser.Exec(fmt.Sprintf(`IMPORT INTO rbac_into_t (a, b) CSV DATA (%s)`, testFiles.files[0])); err != nil {
  1589  				t.Fatal(err)
  1590  			}
  1591  		})
  1592  	})
  1593  
  1594  	// Verify DEFAULT columns and SERIAL are allowed but not evaluated.
  1595  	t.Run("allow-default", func(t *testing.T) {
  1596  		var data string
  1597  		srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
  1598  			if r.Method == "GET" {
  1599  				_, _ = w.Write([]byte(data))
  1600  			}
  1601  		}))
  1602  		defer srv.Close()
  1603  
  1604  		sqlDB.Exec(t, `CREATE DATABASE d`)
  1605  		sqlDB.Exec(t, `SET DATABASE = d`)
  1606  
  1607  		const (
  1608  			query = `IMPORT TABLE t (
  1609  			a SERIAL8,
  1610  			b INT8 DEFAULT unique_rowid(),
  1611  			c STRING DEFAULT 's',
  1612  			d SERIAL8,
  1613  			e INT8 DEFAULT unique_rowid(),
  1614  			f STRING DEFAULT 's',
  1615  			PRIMARY KEY (a, b, c)
  1616  		) CSV DATA ($1)`
  1617  			nullif = ` WITH nullif=''`
  1618  		)
  1619  
  1620  		data = ",5,e,7,,"
  1621  		t.Run(data, func(t *testing.T) {
  1622  			sqlDB.ExpectErr(
  1623  				t, `row 1: parse "a" as INT8: could not parse ""`,
  1624  				query, srv.URL,
  1625  			)
  1626  			sqlDB.ExpectErr(
  1627  				t, `row 1: generate insert row: null value in column "a" violates not-null constraint`,
  1628  				query+nullif, srv.URL,
  1629  			)
  1630  		})
  1631  		data = "2,5,e,,,"
  1632  		t.Run(data, func(t *testing.T) {
  1633  			sqlDB.ExpectErr(
  1634  				t, `row 1: generate insert row: null value in column "d" violates not-null constraint`,
  1635  				query+nullif, srv.URL,
  1636  			)
  1637  		})
  1638  		data = "2,,e,,,"
  1639  		t.Run(data, func(t *testing.T) {
  1640  			sqlDB.ExpectErr(
  1641  				t, `"b" violates not-null constraint`,
  1642  				query+nullif, srv.URL,
  1643  			)
  1644  		})
  1645  
  1646  		data = "2,5,,,,"
  1647  		t.Run(data, func(t *testing.T) {
  1648  			sqlDB.ExpectErr(
  1649  				t, `"c" violates not-null constraint`,
  1650  				query+nullif, srv.URL,
  1651  			)
  1652  		})
  1653  
  1654  		data = "2,5,e,-1,,"
  1655  		t.Run(data, func(t *testing.T) {
  1656  			sqlDB.Exec(t, query+nullif, srv.URL)
  1657  			sqlDB.CheckQueryResults(t,
  1658  				`SELECT * FROM t`,
  1659  				sqlDB.QueryStr(t, `SELECT 2, 5, 'e', -1, NULL, NULL`),
  1660  			)
  1661  		})
  1662  	})
  1663  }
  1664  
  1665  func TestExportImportRoundTrip(t *testing.T) {
  1666  	defer leaktest.AfterTest(t)()
  1667  	ctx := context.Background()
  1668  	baseDir, cleanup := testutils.TempDir(t)
  1669  	defer cleanup()
  1670  	tc := testcluster.StartTestCluster(
  1671  		t, 1, base.TestClusterArgs{ServerArgs: base.TestServerArgs{ExternalIODir: baseDir}})
  1672  	defer tc.Stopper().Stop(ctx)
  1673  	conn := tc.Conns[0]
  1674  	sqlDB := sqlutils.MakeSQLRunner(conn)
  1675  
  1676  	tests := []struct {
  1677  		stmts    string
  1678  		tbl      string
  1679  		expected string
  1680  	}{
  1681  		// Note that the directory names that are being imported from and exported into
  1682  		// need to differ across runs, so we let the test runner format the stmts field
  1683  		// with a unique directory name per run.
  1684  		{
  1685  			stmts: `EXPORT INTO CSV 'nodelocal://0/%[1]s' FROM SELECT ARRAY['a', 'b', 'c'];
  1686  							IMPORT TABLE t (x TEXT[]) CSV DATA ('nodelocal://0/%[1]s/n1.0.csv')`,
  1687  			tbl:      "t",
  1688  			expected: `SELECT ARRAY['a', 'b', 'c']`,
  1689  		},
  1690  		{
  1691  			stmts: `EXPORT INTO CSV 'nodelocal://0/%[1]s' FROM SELECT ARRAY[b'abc', b'\141\142\143', b'\x61\x62\x63'];
  1692  							IMPORT TABLE t (x BYTES[]) CSV DATA ('nodelocal://0/%[1]s/n1.0.csv')`,
  1693  			tbl:      "t",
  1694  			expected: `SELECT ARRAY[b'abc', b'\141\142\143', b'\x61\x62\x63']`,
  1695  		},
  1696  		{
  1697  			stmts: `EXPORT INTO CSV 'nodelocal://0/%[1]s' FROM SELECT 'dog' COLLATE en;
  1698  							IMPORT TABLE t (x STRING COLLATE en) CSV DATA ('nodelocal://0/%[1]s/n1.0.csv')`,
  1699  			tbl:      "t",
  1700  			expected: `SELECT 'dog' COLLATE en`,
  1701  		},
  1702  	}
  1703  
  1704  	for i, test := range tests {
  1705  		sqlDB.Exec(t, fmt.Sprintf(`DROP TABLE IF EXISTS %s`, test.tbl))
  1706  		sqlDB.Exec(t, fmt.Sprintf(test.stmts, fmt.Sprintf("run%d", i)))
  1707  		sqlDB.CheckQueryResults(t, fmt.Sprintf(`SELECT * FROM %s`, test.tbl), sqlDB.QueryStr(t, test.expected))
  1708  	}
  1709  }
  1710  
  1711  // TODO(adityamaru): Tests still need to be added incrementally as
  1712  // relevant IMPORT INTO logic is added. Some of them include:
  1713  // -> FK and constraint violation
  1714  // -> CSV containing keys which will shadow existing data
  1715  // -> Rollback of a failed IMPORT INTO
  1716  func TestImportIntoCSV(t *testing.T) {
  1717  	defer leaktest.AfterTest(t)()
  1718  
  1719  	if testing.Short() {
  1720  		t.Skip("short")
  1721  	}
  1722  
  1723  	const nodes = 3
  1724  
  1725  	numFiles := nodes + 2
  1726  	rowsPerFile := 1000
  1727  	rowsPerRaceFile := 16
  1728  
  1729  	ctx := context.Background()
  1730  	baseDir := filepath.Join("testdata", "csv")
  1731  	tc := testcluster.StartTestCluster(t, nodes, base.TestClusterArgs{ServerArgs: base.TestServerArgs{ExternalIODir: baseDir}})
  1732  	defer tc.Stopper().Stop(ctx)
  1733  	conn := tc.Conns[0]
  1734  
  1735  	var forceFailure bool
  1736  	var importBodyFinished chan struct{}
  1737  	var delayImportFinish chan struct{}
  1738  
  1739  	for i := range tc.Servers {
  1740  		tc.Servers[i].JobRegistry().(*jobs.Registry).TestingResumerCreationKnobs = map[jobspb.Type]func(raw jobs.Resumer) jobs.Resumer{
  1741  			jobspb.TypeImport: func(raw jobs.Resumer) jobs.Resumer {
  1742  				r := raw.(*importResumer)
  1743  				r.testingKnobs.afterImport = func(_ backupccl.RowCount) error {
  1744  					if importBodyFinished != nil {
  1745  						importBodyFinished <- struct{}{}
  1746  					}
  1747  					if delayImportFinish != nil {
  1748  						<-delayImportFinish
  1749  					}
  1750  
  1751  					if forceFailure {
  1752  						return errors.New("testing injected failure")
  1753  					}
  1754  					return nil
  1755  				}
  1756  				return r
  1757  			},
  1758  		}
  1759  	}
  1760  
  1761  	sqlDB := sqlutils.MakeSQLRunner(conn)
  1762  
  1763  	sqlDB.Exec(t, `SET CLUSTER SETTING kv.bulk_ingest.batch_size = '10KB'`)
  1764  
  1765  	testFiles := makeCSVData(t, numFiles, rowsPerFile, nodes, rowsPerRaceFile)
  1766  	if util.RaceEnabled {
  1767  		// This test takes a while with the race detector, so reduce the number of
  1768  		// files and rows per file in an attempt to speed it up.
  1769  		numFiles = nodes
  1770  		rowsPerFile = rowsPerRaceFile
  1771  	}
  1772  
  1773  	if err := ioutil.WriteFile(filepath.Join(baseDir, "empty.csv"), nil, 0666); err != nil {
  1774  		t.Fatal(err)
  1775  	}
  1776  	empty := []string{"'nodelocal://0/empty.csv'"}
  1777  
  1778  	// Support subtests by keeping track of the number of jobs that are executed.
  1779  	testNum := -1
  1780  	insertedRows := numFiles * rowsPerFile
  1781  
  1782  	for _, tc := range []struct {
  1783  		name    string
  1784  		query   string // must have one `%s` for the files list.
  1785  		files   []string
  1786  		jobOpts string
  1787  		err     string
  1788  	}{
  1789  		{
  1790  			"simple-import-into",
  1791  			`IMPORT INTO t (a, b) CSV DATA (%s)`,
  1792  			testFiles.files,
  1793  			``,
  1794  			"",
  1795  		},
  1796  		{
  1797  			"import-into-with-opts",
  1798  			`IMPORT INTO t (a, b) CSV DATA (%s) WITH delimiter = '|', comment = '#', nullif='', skip = '2'`,
  1799  			testFiles.filesWithOpts,
  1800  			` WITH comment = '#', delimiter = '|', "nullif" = '', skip = '2'`,
  1801  			"",
  1802  		},
  1803  		{
  1804  			// Force some SST splits.
  1805  			"import-into-sstsize",
  1806  			`IMPORT INTO t (a, b) CSV DATA (%s) WITH sstsize = '10K'`,
  1807  			testFiles.files,
  1808  			` WITH sstsize = '10K'`,
  1809  			"",
  1810  		},
  1811  		{
  1812  			"empty-file",
  1813  			`IMPORT INTO t (a, b) CSV DATA (%s)`,
  1814  			empty,
  1815  			``,
  1816  			"",
  1817  		},
  1818  		{
  1819  			"empty-with-files",
  1820  			`IMPORT INTO t (a, b) CSV DATA (%s)`,
  1821  			append(empty, testFiles.files...),
  1822  			``,
  1823  			"",
  1824  		},
  1825  		{
  1826  			"import-into-auto-decompress",
  1827  			`IMPORT INTO t (a, b) CSV DATA (%s) WITH decompress = 'auto'`,
  1828  			testFiles.files,
  1829  			` WITH decompress = 'auto'`,
  1830  			"",
  1831  		},
  1832  		{
  1833  			"import-into-no-decompress",
  1834  			`IMPORT INTO t (a, b) CSV DATA (%s) WITH decompress = 'none'`,
  1835  			testFiles.files,
  1836  			` WITH decompress = 'none'`,
  1837  			"",
  1838  		},
  1839  		{
  1840  			"import-into-explicit-gzip",
  1841  			`IMPORT INTO t (a, b) CSV DATA (%s) WITH decompress = 'gzip'`,
  1842  			testFiles.gzipFiles,
  1843  			` WITH decompress = 'gzip'`,
  1844  			"",
  1845  		},
  1846  		{
  1847  			"import-into-auto-gzip",
  1848  			`IMPORT INTO t (a, b) CSV DATA (%s) WITH decompress = 'auto'`,
  1849  			testFiles.gzipFiles,
  1850  			` WITH decompress = 'auto'`,
  1851  			"",
  1852  		},
  1853  		{
  1854  			"import-into-implicit-gzip",
  1855  			`IMPORT INTO t (a, b) CSV DATA (%s)`,
  1856  			testFiles.gzipFiles,
  1857  			``,
  1858  			"",
  1859  		},
  1860  		{
  1861  			"import-into-explicit-bzip",
  1862  			`IMPORT INTO t (a, b) CSV DATA (%s) WITH decompress = 'bzip'`,
  1863  			testFiles.bzipFiles,
  1864  			` WITH decompress = 'bzip'`,
  1865  			"",
  1866  		},
  1867  		{
  1868  			"import-into-auto-bzip",
  1869  			`IMPORT INTO t (a, b) CSV DATA (%s) WITH decompress = 'auto'`,
  1870  			testFiles.bzipFiles,
  1871  			` WITH decompress = 'auto'`,
  1872  			"",
  1873  		},
  1874  		{
  1875  			"import-into-implicit-bzip",
  1876  			`IMPORT INTO t (a, b) CSV DATA (%s)`,
  1877  			testFiles.bzipFiles,
  1878  			``,
  1879  			"",
  1880  		},
  1881  		{
  1882  			"import-into-no-decompress-wildcard",
  1883  			`IMPORT INTO t (a, b) CSV DATA (%s) WITH decompress = 'none'`,
  1884  			testFiles.filesUsingWildcard,
  1885  			` WITH decompress = 'none'`,
  1886  			"",
  1887  		},
  1888  		{
  1889  			"import-into-explicit-gzip-wildcard",
  1890  			`IMPORT INTO t (a, b) CSV DATA (%s) WITH decompress = 'gzip'`,
  1891  			testFiles.gzipFilesUsingWildcard,
  1892  			` WITH decompress = 'gzip'`,
  1893  			"",
  1894  		},
  1895  		{
  1896  			"import-into-auto-bzip-wildcard",
  1897  			`IMPORT INTO t (a, b) CSV DATA (%s) WITH decompress = 'auto'`,
  1898  			testFiles.gzipFilesUsingWildcard,
  1899  			` WITH decompress = 'auto'`,
  1900  			"",
  1901  		},
  1902  		// NB: successes above, failures below, because we check the i-th job.
  1903  		{
  1904  			"import-into-bad-opt-name",
  1905  			`IMPORT INTO t (a, b) CSV DATA (%s) WITH foo = 'bar'`,
  1906  			testFiles.files,
  1907  			``,
  1908  			"invalid option \"foo\"",
  1909  		},
  1910  		{
  1911  			"import-into-no-database",
  1912  			`IMPORT INTO nonexistent.t (a, b) CSV DATA (%s)`,
  1913  			testFiles.files,
  1914  			``,
  1915  			`database does not exist: "nonexistent.t"`,
  1916  		},
  1917  		{
  1918  			"import-into-no-table",
  1919  			`IMPORT INTO g (a, b) CSV DATA (%s)`,
  1920  			testFiles.files,
  1921  			``,
  1922  			`pq: relation "g" does not exist`,
  1923  		},
  1924  		{
  1925  			"import-into-no-decompress-gzip",
  1926  			`IMPORT INTO t (a, b) CSV DATA (%s) WITH decompress = 'none'`,
  1927  			testFiles.gzipFiles,
  1928  			` WITH decompress = 'none'`,
  1929  			// This returns different errors for `make test` and `make testrace` but
  1930  			// field is in both error messages.
  1931  			"field",
  1932  		},
  1933  		{
  1934  			"import-into-no-decompress-gzip",
  1935  			`IMPORT INTO t (a, b) CSV DATA (%s) WITH decompress = 'gzip'`,
  1936  			testFiles.files,
  1937  			` WITH decompress = 'gzip'`,
  1938  			"gzip: invalid header",
  1939  		},
  1940  		{
  1941  			"import-no-files-match-wildcard",
  1942  			`IMPORT INTO t (a, b) CSV DATA (%s) WITH decompress = 'auto'`,
  1943  			[]string{`'nodelocal://0/data-[0-9][0-9]*'`},
  1944  			` WITH decompress = 'auto'`,
  1945  			`pq: no files matched uri provided`,
  1946  		},
  1947  		{
  1948  			"import-into-no-glob-wildcard",
  1949  			`IMPORT INTO t (a, b) CSV DATA (%s) WITH disable_glob_matching`,
  1950  			testFiles.filesUsingWildcard,
  1951  			` WITH disable_glob_matching`,
  1952  			"pq: (.+) no such file or directory",
  1953  		},
  1954  	} {
  1955  		t.Run(tc.name, func(t *testing.T) {
  1956  			if strings.Contains(tc.name, "bzip") && len(testFiles.bzipFiles) == 0 {
  1957  				t.Skip("bzip2 not available on PATH?")
  1958  			}
  1959  			sqlDB.Exec(t, `CREATE TABLE t (a INT, b STRING)`)
  1960  			defer sqlDB.Exec(t, `DROP TABLE t`)
  1961  
  1962  			var unused string
  1963  			var restored struct {
  1964  				rows, idx, bytes int
  1965  			}
  1966  
  1967  			// Insert the test data
  1968  			insert := []string{"''", "'text'", "'a'", "'e'", "'l'", "'t'", "'z'"}
  1969  			numExistingRows := len(insert)
  1970  
  1971  			for i, v := range insert {
  1972  				sqlDB.Exec(t, "INSERT INTO t (a, b) VALUES ($1, $2)", i, v)
  1973  			}
  1974  
  1975  			var result int
  1976  			query := fmt.Sprintf(tc.query, strings.Join(tc.files, ", "))
  1977  			testNum++
  1978  			if tc.err != "" {
  1979  				sqlDB.ExpectErr(t, tc.err, query)
  1980  				return
  1981  			}
  1982  
  1983  			sqlDB.QueryRow(t, query).Scan(
  1984  				&unused, &unused, &unused, &restored.rows, &restored.idx, &restored.bytes,
  1985  			)
  1986  
  1987  			jobPrefix := fmt.Sprintf(`IMPORT INTO defaultdb.public.t(a, b)`)
  1988  			if err := jobutils.VerifySystemJob(t, sqlDB, testNum, jobspb.TypeImport, jobs.StatusSucceeded, jobs.Record{
  1989  				Username:    security.RootUser,
  1990  				Description: fmt.Sprintf(jobPrefix+` CSV DATA (%s)`+tc.jobOpts, strings.ReplaceAll(strings.Join(tc.files, ", "), "?AWS_SESSION_TOKEN=secrets", "?AWS_SESSION_TOKEN=redacted")),
  1991  			}); err != nil {
  1992  				t.Fatal(err)
  1993  			}
  1994  
  1995  			isEmpty := len(tc.files) == 1 && tc.files[0] == empty[0]
  1996  			if isEmpty {
  1997  				sqlDB.QueryRow(t, `SELECT count(*) FROM t`).Scan(&result)
  1998  				if result != numExistingRows {
  1999  					t.Fatalf("expected %d rows, got %d", numExistingRows, result)
  2000  				}
  2001  				return
  2002  			}
  2003  
  2004  			if expected, actual := insertedRows, restored.rows; expected != actual {
  2005  				t.Fatalf("expected %d rows, got %d", expected, actual)
  2006  			}
  2007  
  2008  			// Verify correct number of rows via COUNT.
  2009  			sqlDB.QueryRow(t, `SELECT count(*) FROM t`).Scan(&result)
  2010  			if expect := numExistingRows + insertedRows; result != expect {
  2011  				t.Fatalf("expected %d rows, got %d", expect, result)
  2012  			}
  2013  
  2014  			// Verify correct number of NULLs via COUNT.
  2015  			sqlDB.QueryRow(t, `SELECT count(*) FROM t WHERE b IS NULL`).Scan(&result)
  2016  			expectedNulls := 0
  2017  			if strings.Contains(tc.query, "nullif") {
  2018  				expectedNulls = insertedRows / 4
  2019  			}
  2020  			if result != expectedNulls {
  2021  				t.Fatalf("expected %d rows, got %d", expectedNulls, result)
  2022  			}
  2023  		})
  2024  	}
  2025  
  2026  	// Verify unique_rowid is replaced for tables without primary keys.
  2027  	t.Run("import-into-unique_rowid", func(t *testing.T) {
  2028  		sqlDB.Exec(t, `CREATE TABLE t (a INT, b STRING)`)
  2029  		defer sqlDB.Exec(t, `DROP TABLE t`)
  2030  
  2031  		// Insert the test data
  2032  		insert := []string{"''", "'text'", "'a'", "'e'", "'l'", "'t'", "'z'"}
  2033  		numExistingRows := len(insert)
  2034  
  2035  		for i, v := range insert {
  2036  			sqlDB.Exec(t, "INSERT INTO t (a, b) VALUES ($1, $2)", i, v)
  2037  		}
  2038  
  2039  		sqlDB.Exec(t, fmt.Sprintf(`IMPORT INTO t (a, b) CSV DATA (%s)`, strings.Join(testFiles.files, ", ")))
  2040  		// Verify the rowids are being generated as expected.
  2041  		sqlDB.CheckQueryResults(t,
  2042  			`SELECT count(*) FROM t`,
  2043  			sqlDB.QueryStr(t, `
  2044  			SELECT count(*) + $3 FROM
  2045  			(SELECT * FROM
  2046  				(SELECT generate_series(0, $1 - 1) file),
  2047  				(SELECT generate_series(1, $2) rownum)
  2048  			)
  2049  			`, numFiles, rowsPerFile, numExistingRows),
  2050  		)
  2051  	})
  2052  
  2053  	// Verify a failed IMPORT INTO won't prevent a subsequent IMPORT INTO.
  2054  	t.Run("import-into-checkpoint-leftover", func(t *testing.T) {
  2055  		sqlDB.Exec(t, `CREATE TABLE t (a INT PRIMARY KEY, b STRING)`)
  2056  		defer sqlDB.Exec(t, `DROP TABLE t`)
  2057  
  2058  		// Insert the test data
  2059  		insert := []string{"''", "'text'", "'a'", "'e'", "'l'", "'t'", "'z'"}
  2060  
  2061  		for i, v := range insert {
  2062  			sqlDB.Exec(t, "INSERT INTO t (a, b) VALUES ($1, $2)", i, v)
  2063  		}
  2064  
  2065  		// Hit a failure during import.
  2066  		forceFailure = true
  2067  		sqlDB.ExpectErr(
  2068  			t, `testing injected failure`,
  2069  			fmt.Sprintf(`IMPORT INTO t (a, b) CSV DATA (%s)`, testFiles.files[1]),
  2070  		)
  2071  		forceFailure = false
  2072  
  2073  		// Expect it to succeed on re-attempt.
  2074  		sqlDB.Exec(t, fmt.Sprintf(`IMPORT INTO t (a, b) CSV DATA (%s)`, testFiles.files[1]))
  2075  	})
  2076  
  2077  	// Verify that during IMPORT INTO the table is offline.
  2078  	t.Run("offline-state", func(t *testing.T) {
  2079  		sqlDB.Exec(t, `CREATE TABLE t (a INT PRIMARY KEY, b STRING)`)
  2080  		defer sqlDB.Exec(t, `DROP TABLE t`)
  2081  
  2082  		// Insert the test data
  2083  		insert := []string{"''", "'text'", "'a'", "'e'", "'l'", "'t'", "'z'"}
  2084  
  2085  		for i, v := range insert {
  2086  			sqlDB.Exec(t, "INSERT INTO t (a, b) VALUES ($1, $2)", i, v)
  2087  		}
  2088  
  2089  		// Hit a failure during import.
  2090  		importBodyFinished = make(chan struct{})
  2091  		delayImportFinish = make(chan struct{})
  2092  		defer func() {
  2093  			importBodyFinished = nil
  2094  			delayImportFinish = nil
  2095  		}()
  2096  
  2097  		var unused interface{}
  2098  
  2099  		g := ctxgroup.WithContext(ctx)
  2100  		g.GoCtx(func(ctx context.Context) error {
  2101  			defer close(importBodyFinished)
  2102  			_, err := sqlDB.DB.ExecContext(ctx, fmt.Sprintf(`IMPORT INTO t (a, b) CSV DATA (%s)`, testFiles.files[1]))
  2103  			return err
  2104  		})
  2105  		g.GoCtx(func(ctx context.Context) error {
  2106  			defer close(delayImportFinish)
  2107  			<-importBodyFinished
  2108  
  2109  			err := sqlDB.DB.QueryRowContext(ctx, `SELECT 1 FROM t`).Scan(&unused)
  2110  			if !testutils.IsError(err, "relation \"t\" does not exist") {
  2111  				return err
  2112  			}
  2113  			return nil
  2114  		})
  2115  		if err := g.Wait(); err != nil {
  2116  			t.Fatal(err)
  2117  		}
  2118  		t.Skip()
  2119  
  2120  		// Expect it to succeed on re-attempt.
  2121  		sqlDB.QueryRow(t, `SELECT 1 FROM t`).Scan(&unused)
  2122  	})
  2123  
  2124  	// Tests for user specified target columns in IMPORT INTO statements.
  2125  	//
  2126  	// Tests IMPORT INTO with various target column sets, and an implicit PK
  2127  	// provided by the hidden column row_id.
  2128  	t.Run("target-cols-with-default-pk", func(t *testing.T) {
  2129  		var data string
  2130  		srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
  2131  			if r.Method == "GET" {
  2132  				_, _ = w.Write([]byte(data))
  2133  			}
  2134  		}))
  2135  		defer srv.Close()
  2136  
  2137  		createQuery := `CREATE TABLE t (a INT8,
  2138  			b INT8,
  2139  			c STRING,
  2140  			d INT8,
  2141  			e INT8,
  2142  			f STRING)`
  2143  
  2144  		t.Run(data, func(t *testing.T) {
  2145  			sqlDB.Exec(t, createQuery)
  2146  			defer sqlDB.Exec(t, `DROP TABLE t`)
  2147  
  2148  			data = "1"
  2149  			sqlDB.Exec(t, `IMPORT INTO t (a) CSV DATA ($1)`, srv.URL)
  2150  			sqlDB.CheckQueryResults(t, `SELECT * FROM t`,
  2151  				sqlDB.QueryStr(t, `SELECT 1, NULL, NULL, NULL, NULL, 'NULL'`),
  2152  			)
  2153  		})
  2154  		t.Run(data, func(t *testing.T) {
  2155  			sqlDB.Exec(t, createQuery)
  2156  			defer sqlDB.Exec(t, `DROP TABLE t`)
  2157  
  2158  			data = "1,teststr"
  2159  			sqlDB.Exec(t, `IMPORT INTO t (a, f) CSV DATA ($1)`, srv.URL)
  2160  			sqlDB.CheckQueryResults(t, `SELECT * FROM t`,
  2161  				sqlDB.QueryStr(t, `SELECT 1, NULL, NULL, NULL, NULL, 'teststr'`),
  2162  			)
  2163  		})
  2164  		t.Run(data, func(t *testing.T) {
  2165  			sqlDB.Exec(t, createQuery)
  2166  			defer sqlDB.Exec(t, `DROP TABLE t`)
  2167  
  2168  			data = "7,12,teststr"
  2169  			sqlDB.Exec(t, `IMPORT INTO t (d, e, f) CSV DATA ($1)`, srv.URL)
  2170  			sqlDB.CheckQueryResults(t, `SELECT * FROM t`,
  2171  				sqlDB.QueryStr(t, `SELECT NULL, NULL, NULL, 7, 12, 'teststr'`),
  2172  			)
  2173  		})
  2174  	})
  2175  
  2176  	// Tests IMPORT INTO with a target column set, and an explicit PK.
  2177  	t.Run("target-cols-with-explicit-pk", func(t *testing.T) {
  2178  		sqlDB.Exec(t, `CREATE TABLE t (a INT PRIMARY KEY, b STRING)`)
  2179  		defer sqlDB.Exec(t, `DROP TABLE t`)
  2180  
  2181  		// Insert the test data
  2182  		insert := []string{"''", "'text'", "'a'", "'e'", "'l'", "'t'", "'z'"}
  2183  
  2184  		for i, v := range insert {
  2185  			sqlDB.Exec(t, "INSERT INTO t (a, b) VALUES ($1, $2)", i+1000, v)
  2186  		}
  2187  
  2188  		data := []string{"1", "2", "3", "4", "5", "6", "7", "8", "9", "10"}
  2189  		srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
  2190  			if r.Method == "GET" {
  2191  				_, _ = w.Write([]byte(strings.Join(data, "\n")))
  2192  			}
  2193  		}))
  2194  		defer srv.Close()
  2195  
  2196  		sqlDB.Exec(t, "IMPORT INTO t (a) CSV DATA ($1)", srv.URL)
  2197  
  2198  		var result int
  2199  		numExistingRows := len(insert)
  2200  		// Verify that the target column has been populated.
  2201  		sqlDB.QueryRow(t, `SELECT count(*) FROM t WHERE a IS NOT NULL`).Scan(&result)
  2202  		if expect := numExistingRows + len(data); result != expect {
  2203  			t.Fatalf("expected %d rows, got %d", expect, result)
  2204  		}
  2205  
  2206  		// Verify that the non-target columns have NULLs.
  2207  		sqlDB.QueryRow(t, `SELECT count(*) FROM t WHERE b IS NULL`).Scan(&result)
  2208  		expectedNulls := len(data)
  2209  		if result != expectedNulls {
  2210  			t.Fatalf("expected %d rows, got %d", expectedNulls, result)
  2211  		}
  2212  	})
  2213  
  2214  	// Tests IMPORT INTO with a CSV file having more columns when targeted, expected to
  2215  	// get an error indicating the error.
  2216  	t.Run("csv-with-more-than-targeted-columns", func(t *testing.T) {
  2217  		sqlDB.Exec(t, `CREATE TABLE t (a INT PRIMARY KEY, b STRING)`)
  2218  		defer sqlDB.Exec(t, `DROP TABLE t`)
  2219  
  2220  		// Expect an error if attempting to IMPORT INTO with CSV having more columns
  2221  		// than targeted.
  2222  		sqlDB.ExpectErr(
  2223  			t, `row 1: expected 1 fields, got 2`,
  2224  			fmt.Sprintf("IMPORT INTO t (a) CSV DATA (%s)", testFiles.files[0]),
  2225  		)
  2226  	})
  2227  
  2228  	// Tests IMPORT INTO with a target column set which does not include all PKs.
  2229  	// As a result the non-target column is non-nullable, which is not allowed
  2230  	// until we support DEFAULT expressions.
  2231  	t.Run("target-cols-excluding-explicit-pk", func(t *testing.T) {
  2232  		sqlDB.Exec(t, `CREATE TABLE t (a INT PRIMARY KEY, b STRING)`)
  2233  		defer sqlDB.Exec(t, `DROP TABLE t`)
  2234  
  2235  		// Expect an error if attempting to IMPORT INTO a target list which does
  2236  		// not include all the PKs of the table.
  2237  		sqlDB.ExpectErr(
  2238  			t, `pq: all non-target columns in IMPORT INTO must be nullable`,
  2239  			fmt.Sprintf(`IMPORT INTO t (b) CSV DATA (%s)`, testFiles.files[0]),
  2240  		)
  2241  	})
  2242  
  2243  	// Tests behavior when the existing table being imported into has more columns
  2244  	// in its schema then the source CSV file.
  2245  	t.Run("more-table-cols-than-csv", func(t *testing.T) {
  2246  		sqlDB.Exec(t, `CREATE TABLE t (a INT, b STRING, c INT)`)
  2247  		defer sqlDB.Exec(t, `DROP TABLE t`)
  2248  
  2249  		// Insert the test data
  2250  		insert := []string{"''", "'text'", "'a'", "'e'", "'l'", "'t'", "'z'"}
  2251  
  2252  		for i, v := range insert {
  2253  			sqlDB.Exec(t, "INSERT INTO t (a, b) VALUES ($1, $2)", i, v)
  2254  		}
  2255  
  2256  		sqlDB.ExpectErr(
  2257  			t, "row 1: expected 3 fields, got 2",
  2258  			fmt.Sprintf(`IMPORT INTO t (a, b, c) CSV DATA (%s)`, testFiles.files[0]),
  2259  		)
  2260  	})
  2261  
  2262  	// Tests the case where we create table columns in specific order while trying
  2263  	// to import data from csv where columns order is different and import expression
  2264  	// defines in what order columns should be imported to align with table definition
  2265  	t.Run("target-cols-reordered", func(t *testing.T) {
  2266  		sqlDB.Exec(t, "CREATE TABLE t (a INT PRIMARY KEY, b INT, c STRING NOT NULL, d DECIMAL NOT NULL)")
  2267  		defer sqlDB.Exec(t, `DROP TABLE t`)
  2268  
  2269  		const data = "3.14,c is a string,1\n2.73,another string,2"
  2270  		srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
  2271  			if r.Method == "GET" {
  2272  				_, _ = w.Write([]byte(data))
  2273  			}
  2274  		}))
  2275  		defer srv.Close()
  2276  
  2277  		sqlDB.Exec(t, fmt.Sprintf(`IMPORT INTO t (d, c, a) CSV DATA ("%s")`, srv.URL))
  2278  		sqlDB.CheckQueryResults(t, `SELECT * FROM t ORDER BY a`,
  2279  			[][]string{{"1", "NULL", "c is a string", "3.14"}, {"2", "NULL", "another string", "2.73"}},
  2280  		)
  2281  	})
  2282  
  2283  	// Tests that we can import into the table even if the table has columns named with
  2284  	// reserved keywords.
  2285  	t.Run("cols-named-with-reserved-keywords", func(t *testing.T) {
  2286  		sqlDB.Exec(t, `CREATE TABLE t ("select" INT PRIMARY KEY, "from" INT, "Some-c,ol-'Name'" STRING NOT NULL)`)
  2287  		defer sqlDB.Exec(t, `DROP TABLE t`)
  2288  
  2289  		const data = "today,1,2"
  2290  		srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
  2291  			if r.Method == "GET" {
  2292  				_, _ = w.Write([]byte(data))
  2293  			}
  2294  		}))
  2295  		defer srv.Close()
  2296  
  2297  		sqlDB.Exec(t, fmt.Sprintf(
  2298  			`IMPORT INTO t ("Some-c,ol-'Name'", "select", "from") CSV DATA ("%s")`, srv.URL))
  2299  		sqlDB.CheckQueryResults(t, `SELECT * FROM t`, [][]string{{"1", "2", "today"}})
  2300  	})
  2301  
  2302  	// Tests behvior when the existing table being imported into has fewer columns
  2303  	// in its schema then the source CSV file.
  2304  	t.Run("fewer-table-cols-than-csv", func(t *testing.T) {
  2305  		sqlDB.Exec(t, `CREATE TABLE t (a INT)`)
  2306  		defer sqlDB.Exec(t, `DROP TABLE t`)
  2307  
  2308  		sqlDB.ExpectErr(
  2309  			t, "row 1: expected 1 fields, got 2",
  2310  			fmt.Sprintf(`IMPORT INTO t (a) CSV DATA (%s)`, testFiles.files[0]),
  2311  		)
  2312  	})
  2313  
  2314  	// Tests IMPORT INTO without any target columns specified. This implies an
  2315  	// import of all columns in the exisiting table.
  2316  	t.Run("no-target-cols-specified", func(t *testing.T) {
  2317  		sqlDB.Exec(t, `CREATE TABLE t (a INT PRIMARY KEY, b STRING)`)
  2318  		defer sqlDB.Exec(t, `DROP TABLE t`)
  2319  
  2320  		// Insert the test data
  2321  		insert := []string{"''", "'text'", "'a'", "'e'", "'l'", "'t'", "'z'"}
  2322  
  2323  		for i, v := range insert {
  2324  			sqlDB.Exec(t, "INSERT INTO t (a, b) VALUES ($1, $2)", i+rowsPerFile, v)
  2325  		}
  2326  
  2327  		sqlDB.Exec(t, fmt.Sprintf("IMPORT INTO t CSV DATA (%s)", testFiles.files[0]))
  2328  
  2329  		var result int
  2330  		numExistingRows := len(insert)
  2331  		// Verify that all columns have been populated with imported data.
  2332  		sqlDB.QueryRow(t, `SELECT count(*) FROM t WHERE a IS NOT NULL`).Scan(&result)
  2333  		if expect := numExistingRows + rowsPerFile; result != expect {
  2334  			t.Fatalf("expected %d rows, got %d", expect, result)
  2335  		}
  2336  
  2337  		sqlDB.QueryRow(t, `SELECT count(*) FROM t WHERE b IS NOT NULL`).Scan(&result)
  2338  		if expect := numExistingRows + rowsPerFile; result != expect {
  2339  			t.Fatalf("expected %d rows, got %d", expect, result)
  2340  		}
  2341  	})
  2342  
  2343  	// IMPORT INTO does not support DEFAULT expressions for either target or
  2344  	// non-target columns.
  2345  	t.Run("import-into-check-no-default-cols", func(t *testing.T) {
  2346  		sqlDB.Exec(t, `CREATE TABLE t (a INT DEFAULT 1, b STRING)`)
  2347  		defer sqlDB.Exec(t, `DROP TABLE t`)
  2348  
  2349  		// Insert the test data
  2350  		insert := []string{"''", "'text'", "'a'", "'e'", "'l'", "'t'", "'z'"}
  2351  
  2352  		for i, v := range insert {
  2353  			sqlDB.Exec(t, "INSERT INTO t (a, b) VALUES ($1, $2)", i, v)
  2354  		}
  2355  
  2356  		sqlDB.ExpectErr(
  2357  			t, fmt.Sprintf("pq: cannot IMPORT INTO a table with a DEFAULT expression for any of its columns"),
  2358  			fmt.Sprintf(`IMPORT INTO t (a) CSV DATA (%s)`, testFiles.files[0]),
  2359  		)
  2360  	})
  2361  
  2362  	// IMPORT INTO does not currently support import into interleaved tables.
  2363  	t.Run("import-into-rejects-interleaved-tables", func(t *testing.T) {
  2364  		sqlDB.Exec(t, `CREATE TABLE parent (parent_id INT PRIMARY KEY)`)
  2365  		sqlDB.Exec(t, `CREATE TABLE child (
  2366  				parent_id INT,
  2367  				child_id INT,
  2368  				PRIMARY KEY(parent_id, child_id))
  2369  				INTERLEAVE IN PARENT parent(parent_id)`)
  2370  		defer sqlDB.Exec(t, `DROP TABLE parent`)
  2371  		defer sqlDB.Exec(t, `DROP TABLE child`)
  2372  
  2373  		// Cannot IMPORT INTO interleaved parent
  2374  		sqlDB.ExpectErr(
  2375  			t, "Cannot use IMPORT INTO with interleaved tables",
  2376  			fmt.Sprintf(`IMPORT INTO parent (parent_id) CSV DATA (%s)`, testFiles.files[0]))
  2377  
  2378  		// Cannot IMPORT INTO interleaved child either.
  2379  		sqlDB.ExpectErr(
  2380  			t, "Cannot use IMPORT INTO with interleaved tables",
  2381  			fmt.Sprintf(`IMPORT INTO child (parent_id, child_id) CSV DATA (%s)`, testFiles.files[0]))
  2382  	})
  2383  
  2384  	// This tests that consecutive imports from unique data sources into an
  2385  	// existing table without an explicit PK, do not overwrite each other. It
  2386  	// exercises the row_id generation in IMPORT.
  2387  	t.Run("multiple-import-into-without-pk", func(t *testing.T) {
  2388  		sqlDB.Exec(t, `CREATE TABLE t (a INT, b STRING)`)
  2389  		defer sqlDB.Exec(t, `DROP TABLE t`)
  2390  
  2391  		// Insert the test data
  2392  		insert := []string{"''", "'text'", "'a'", "'e'", "'l'", "'t'", "'z'"}
  2393  		numExistingRows := len(insert)
  2394  		insertedRows := rowsPerFile * 3
  2395  
  2396  		for i, v := range insert {
  2397  			sqlDB.Exec(t, "INSERT INTO t (a, b) VALUES ($1, $2)", i, v)
  2398  		}
  2399  
  2400  		// Expect it to succeed with correct columns.
  2401  		sqlDB.Exec(t, fmt.Sprintf(`IMPORT INTO t (a, b) CSV DATA (%s)`, testFiles.files[0]))
  2402  		sqlDB.Exec(t, fmt.Sprintf(`IMPORT INTO t (a, b) CSV DATA (%s)`, testFiles.files[1]))
  2403  		sqlDB.Exec(t, fmt.Sprintf(`IMPORT INTO t (a, b) CSV DATA (%s)`, testFiles.files[2]))
  2404  
  2405  		// Verify correct number of rows via COUNT.
  2406  		var result int
  2407  		sqlDB.QueryRow(t, `SELECT count(*) FROM t`).Scan(&result)
  2408  		if expect := numExistingRows + insertedRows; result != expect {
  2409  			t.Fatalf("expected %d rows, got %d", expect, result)
  2410  		}
  2411  	})
  2412  
  2413  	// This tests that a collision is not detected when importing the same source
  2414  	// file twice in the same IMPORT, into a table without a PK. It exercises the
  2415  	// row_id generation logic.
  2416  	t.Run("multiple-file-import-into-without-pk", func(t *testing.T) {
  2417  		sqlDB.Exec(t, `CREATE TABLE t (a INT, b STRING)`)
  2418  		defer sqlDB.Exec(t, `DROP TABLE t`)
  2419  
  2420  		sqlDB.Exec(t,
  2421  			fmt.Sprintf(`IMPORT INTO t (a, b) CSV DATA (%s, %s)`, testFiles.files[0], testFiles.files[0]),
  2422  		)
  2423  
  2424  		// Verify correct number of rows via COUNT.
  2425  		var result int
  2426  		sqlDB.QueryRow(t, `SELECT count(*) FROM t`).Scan(&result)
  2427  		if result != rowsPerFile*2 {
  2428  			t.Fatalf("expected %d rows, got %d", rowsPerFile*2, result)
  2429  		}
  2430  	})
  2431  
  2432  	// IMPORT INTO disallows shadowing of existing keys when ingesting data. With
  2433  	// the exception of shadowing keys having the same ts and value.
  2434  	//
  2435  	// This tests key collision detection when importing the same source file
  2436  	// twice. The ts across imports is different, and so this is considered a
  2437  	// collision.
  2438  	t.Run("import-into-same-file-diff-imports", func(t *testing.T) {
  2439  		sqlDB.Exec(t, `CREATE TABLE t (a INT PRIMARY KEY, b STRING)`)
  2440  		defer sqlDB.Exec(t, `DROP TABLE t`)
  2441  
  2442  		sqlDB.Exec(t,
  2443  			fmt.Sprintf(`IMPORT INTO t (a, b) CSV DATA (%s)`, testFiles.files[0]),
  2444  		)
  2445  
  2446  		sqlDB.ExpectErr(
  2447  			t, `ingested key collides with an existing one: /Table/\d+/1/0/0`,
  2448  			fmt.Sprintf(`IMPORT INTO t (a, b) CSV DATA (%s)`, testFiles.files[0]),
  2449  		)
  2450  	})
  2451  
  2452  	// When the ts and value of the ingested keys across SSTs match the existing
  2453  	// keys we do not consider this to be a collision. This is to support IMPORT
  2454  	// job pause/resumption.
  2455  	//
  2456  	// To ensure uniform behavior we apply the same exception to keys within the
  2457  	// same SST.
  2458  	//
  2459  	// This test attempts to ingest duplicate keys in the same SST, with the same
  2460  	// value, and succeeds in doing so.
  2461  	t.Run("import-into-dups-in-sst", func(t *testing.T) {
  2462  		sqlDB.Exec(t, `CREATE TABLE t (a INT PRIMARY KEY, b STRING)`)
  2463  		defer sqlDB.Exec(t, `DROP TABLE t`)
  2464  
  2465  		sqlDB.Exec(t,
  2466  			fmt.Sprintf(`IMPORT INTO t (a, b) CSV DATA (%s)`, testFiles.fileWithDupKeySameValue[0]),
  2467  		)
  2468  
  2469  		// Verify correct number of rows via COUNT.
  2470  		var result int
  2471  		sqlDB.QueryRow(t, `SELECT count(*) FROM t`).Scan(&result)
  2472  		if result != 200 {
  2473  			t.Fatalf("expected 200 rows, got %d", result)
  2474  		}
  2475  	})
  2476  
  2477  	// This tests key collision detection and importing a source file with the
  2478  	// colliding key sandwiched between valid keys.
  2479  	t.Run("import-into-key-collision", func(t *testing.T) {
  2480  		sqlDB.Exec(t, `CREATE TABLE t (a INT PRIMARY KEY, b STRING)`)
  2481  		defer sqlDB.Exec(t, `DROP TABLE t`)
  2482  
  2483  		sqlDB.Exec(t,
  2484  			fmt.Sprintf(`IMPORT INTO t (a, b) CSV DATA (%s)`, testFiles.files[0]),
  2485  		)
  2486  
  2487  		sqlDB.ExpectErr(
  2488  			t, `ingested key collides with an existing one: /Table/\d+/1/0/0`,
  2489  			fmt.Sprintf(`IMPORT INTO t (a, b) CSV DATA (%s)`, testFiles.fileWithShadowKeys[0]),
  2490  		)
  2491  	})
  2492  
  2493  	// Tests that IMPORT INTO invalidates FK and CHECK constraints.
  2494  	t.Run("import-into-invalidate-constraints", func(t *testing.T) {
  2495  
  2496  		sqlDB.Exec(t, `CREATE TABLE ref (b STRING PRIMARY KEY)`)
  2497  		defer sqlDB.Exec(t, `DROP TABLE ref`)
  2498  		sqlDB.Exec(t, `CREATE TABLE t (a INT CHECK (a >= 0), b STRING, CONSTRAINT fk_ref FOREIGN KEY (b) REFERENCES ref)`)
  2499  		defer sqlDB.Exec(t, `DROP TABLE t`)
  2500  
  2501  		var checkValidated, fkValidated bool
  2502  		sqlDB.QueryRow(t, fmt.Sprintf(`SELECT validated from [SHOW CONSTRAINT FROM t] WHERE constraint_name = 'check_a'`)).Scan(&checkValidated)
  2503  		sqlDB.QueryRow(t, fmt.Sprintf(`SELECT validated from [SHOW CONSTRAINT FROM t] WHERE constraint_name = 'fk_ref'`)).Scan(&fkValidated)
  2504  
  2505  		// Prior to import all constraints should be validated.
  2506  		if !checkValidated || !fkValidated {
  2507  			t.Fatal("Constraints not validated on creation.\n")
  2508  		}
  2509  
  2510  		sqlDB.Exec(t, fmt.Sprintf(`IMPORT INTO t (a, b) CSV DATA (%s)`, testFiles.files[0]))
  2511  
  2512  		sqlDB.QueryRow(t, fmt.Sprintf(`SELECT validated from [SHOW CONSTRAINT FROM t] WHERE constraint_name = 'check_a'`)).Scan(&checkValidated)
  2513  		sqlDB.QueryRow(t, fmt.Sprintf(`SELECT validated from [SHOW CONSTRAINT FROM t] WHERE constraint_name = 'fk_ref'`)).Scan(&fkValidated)
  2514  
  2515  		// Following an import the constraints should be unvalidated.
  2516  		if checkValidated || fkValidated {
  2517  			t.Fatal("FK and CHECK constraints not unvalidated after IMPORT INTO\n")
  2518  		}
  2519  	})
  2520  }
  2521  
  2522  func BenchmarkImport(b *testing.B) {
  2523  	const (
  2524  		nodes    = 3
  2525  		numFiles = nodes + 2
  2526  	)
  2527  	baseDir := filepath.Join("testdata", "csv")
  2528  	ctx := context.Background()
  2529  	tc := testcluster.StartTestCluster(b, nodes, base.TestClusterArgs{ServerArgs: base.TestServerArgs{ExternalIODir: baseDir}})
  2530  	defer tc.Stopper().Stop(ctx)
  2531  	sqlDB := sqlutils.MakeSQLRunner(tc.Conns[0])
  2532  
  2533  	testFiles := makeCSVData(b, numFiles, b.N*100, nodes, 16)
  2534  
  2535  	b.ResetTimer()
  2536  
  2537  	sqlDB.Exec(b,
  2538  		fmt.Sprintf(
  2539  			`IMPORT TABLE t (a INT8 PRIMARY KEY, b STRING, INDEX (b), INDEX (a, b))
  2540  			CSV DATA (%s)`,
  2541  			strings.Join(testFiles.files, ","),
  2542  		))
  2543  }
  2544  
  2545  // a importRowProducer implementation that returns 'n' rows.
  2546  type csvBenchmarkStream struct {
  2547  	n    int
  2548  	pos  int
  2549  	data [][]string
  2550  }
  2551  
  2552  func (s *csvBenchmarkStream) Progress() float32 {
  2553  	return float32(s.pos) / float32(s.n)
  2554  }
  2555  
  2556  func (s *csvBenchmarkStream) Scan() bool {
  2557  	s.pos++
  2558  	return s.pos <= s.n
  2559  }
  2560  
  2561  func (s *csvBenchmarkStream) Err() error {
  2562  	return nil
  2563  }
  2564  
  2565  func (s *csvBenchmarkStream) Skip() error {
  2566  	return nil
  2567  }
  2568  
  2569  func (s *csvBenchmarkStream) Row() (interface{}, error) {
  2570  	return s.data[s.pos%len(s.data)], nil
  2571  }
  2572  
  2573  // Read implements Reader interface.  It's used by delimited
  2574  // benchmark to read its tab separated input.
  2575  func (s *csvBenchmarkStream) Read(buf []byte) (int, error) {
  2576  	if s.Scan() {
  2577  		r, err := s.Row()
  2578  		if err != nil {
  2579  			return 0, err
  2580  		}
  2581  		return copy(buf, strings.Join(r.([]string), "\t")+"\n"), nil
  2582  	}
  2583  	return 0, io.EOF
  2584  }
  2585  
  2586  var _ importRowProducer = &csvBenchmarkStream{}
  2587  
  2588  // BenchmarkConvertRecord-16    	 1000000	      2107 ns/op	  56.94 MB/s	    3600 B/op	     101 allocs/op
  2589  // BenchmarkConvertRecord-16    	  500000	      2106 ns/op	  56.97 MB/s	    3606 B/op	     101 allocs/op
  2590  // BenchmarkConvertRecord-16    	  500000	      2100 ns/op	  57.14 MB/s	    3606 B/op	     101 allocs/op
  2591  // BenchmarkConvertRecord-16    	  500000	      2286 ns/op	  52.49 MB/s	    3606 B/op	     101 allocs/op
  2592  // BenchmarkConvertRecord-16    	  500000	      2378 ns/op	  50.46 MB/s	    3606 B/op	     101 allocs/op
  2593  // BenchmarkConvertRecord-16    	  500000	      2427 ns/op	  49.43 MB/s	    3606 B/op	     101 allocs/op
  2594  // BenchmarkConvertRecord-16    	  500000	      2399 ns/op	  50.02 MB/s	    3606 B/op	     101 allocs/op
  2595  // BenchmarkConvertRecord-16    	  500000	      2365 ns/op	  50.73 MB/s	    3606 B/op	     101 allocs/op
  2596  // BenchmarkConvertRecord-16    	  500000	      2376 ns/op	  50.49 MB/s	    3606 B/op	     101 allocs/op
  2597  // BenchmarkConvertRecord-16    	  500000	      2390 ns/op	  50.20 MB/s	    3606 B/op	     101 allocs/op
  2598  func BenchmarkCSVConvertRecord(b *testing.B) {
  2599  	ctx := context.Background()
  2600  
  2601  	tpchLineItemDataRows := [][]string{
  2602  		{"1", "155190", "7706", "1", "17", "21168.23", "0.04", "0.02", "N", "O", "1996-03-13", "1996-02-12", "1996-03-22", "DELIVER IN PERSON", "TRUCK", "egular courts above the"},
  2603  		{"1", "67310", "7311", "2", "36", "45983.16", "0.09", "0.06", "N", "O", "1996-04-12", "1996-02-28", "1996-04-20", "TAKE BACK RETURN", "MAIL", "ly final dependencies: slyly bold "},
  2604  		{"1", "63700", "3701", "3", "8", "13309.60", "0.10", "0.02", "N", "O", "1996-01-29", "1996-03-05", "1996-01-31", "TAKE BACK RETURN", "REG AIR", "riously. regular, express dep"},
  2605  		{"1", "2132", "4633", "4", "28", "28955.64", "0.09", "0.06", "N", "O", "1996-04-21", "1996-03-30", "1996-05-16", "NONE", "AIR", "lites. fluffily even de"},
  2606  		{"1", "24027", "1534", "5", "24", "22824.48", "0.10", "0.04", "N", "O", "1996-03-30", "1996-03-14", "1996-04-01", "NONE", "FOB", " pending foxes. slyly re"},
  2607  		{"1", "15635", "638", "6", "32", "49620.16", "0.07", "0.02", "N", "O", "1996-01-30", "1996-02-07", "1996-02-03", "DELIVER IN PERSON", "MAIL", "arefully slyly ex"},
  2608  		{"2", "106170", "1191", "1", "38", "44694.46", "0.00", "0.05", "N", "O", "1997-01-28", "1997-01-14", "1997-02-02", "TAKE BACK RETURN", "RAIL", "ven requests. deposits breach a"},
  2609  		{"3", "4297", "1798", "1", "45", "54058.05", "0.06", "0.00", "R", "F", "1994-02-02", "1994-01-04", "1994-02-23", "NONE", "AIR", "ongside of the furiously brave acco"},
  2610  		{"3", "19036", "6540", "2", "49", "46796.47", "0.10", "0.00", "R", "F", "1993-11-09", "1993-12-20", "1993-11-24", "TAKE BACK RETURN", "RAIL", " unusual accounts. eve"},
  2611  		{"3", "128449", "3474", "3", "27", "39890.88", "0.06", "0.07", "A", "F", "1994-01-16", "1993-11-22", "1994-01-23", "DELIVER IN PERSON", "SHIP", "nal foxes wake."},
  2612  	}
  2613  	b.SetBytes(120) // Raw input size. With 8 indexes, expect more on output side.
  2614  
  2615  	stmt, err := parser.ParseOne(`CREATE TABLE lineitem (
  2616  		l_orderkey      INT8 NOT NULL,
  2617  		l_partkey       INT8 NOT NULL,
  2618  		l_suppkey       INT8 NOT NULL,
  2619  		l_linenumber    INT8 NOT NULL,
  2620  		l_quantity      DECIMAL(15,2) NOT NULL,
  2621  		l_extendedprice DECIMAL(15,2) NOT NULL,
  2622  		l_discount      DECIMAL(15,2) NOT NULL,
  2623  		l_tax           DECIMAL(15,2) NOT NULL,
  2624  		l_returnflag    CHAR(1) NOT NULL,
  2625  		l_linestatus    CHAR(1) NOT NULL,
  2626  		l_shipdate      DATE NOT NULL,
  2627  		l_commitdate    DATE NOT NULL,
  2628  		l_receiptdate   DATE NOT NULL,
  2629  		l_shipinstruct  CHAR(25) NOT NULL,
  2630  		l_shipmode      CHAR(10) NOT NULL,
  2631  		l_comment       VARCHAR(44) NOT NULL,
  2632  		PRIMARY KEY     (l_orderkey, l_linenumber),
  2633  		INDEX l_ok      (l_orderkey ASC),
  2634  		INDEX l_pk      (l_partkey ASC),
  2635  		INDEX l_sk      (l_suppkey ASC),
  2636  		INDEX l_sd      (l_shipdate ASC),
  2637  		INDEX l_cd      (l_commitdate ASC),
  2638  		INDEX l_rd      (l_receiptdate ASC),
  2639  		INDEX l_pk_sk   (l_partkey ASC, l_suppkey ASC),
  2640  		INDEX l_sk_pk   (l_suppkey ASC, l_partkey ASC)
  2641  	)`)
  2642  	if err != nil {
  2643  		b.Fatal(err)
  2644  	}
  2645  	create := stmt.AST.(*tree.CreateTable)
  2646  	st := cluster.MakeTestingClusterSettings()
  2647  	evalCtx := tree.MakeTestingEvalContext(st)
  2648  
  2649  	tableDesc, err := MakeSimpleTableDescriptor(ctx, st, create, sqlbase.ID(100), sqlbase.ID(100), NoFKs, 1)
  2650  	if err != nil {
  2651  		b.Fatal(err)
  2652  	}
  2653  
  2654  	kvCh := make(chan row.KVBatch)
  2655  	// no-op drain kvs channel.
  2656  	go func() {
  2657  		for range kvCh {
  2658  		}
  2659  	}()
  2660  
  2661  	descr := tableDesc.TableDesc()
  2662  	importCtx := &parallelImportContext{
  2663  		evalCtx:   &evalCtx,
  2664  		tableDesc: descr,
  2665  		kvCh:      kvCh,
  2666  	}
  2667  
  2668  	producer := &csvBenchmarkStream{
  2669  		n:    b.N,
  2670  		pos:  0,
  2671  		data: tpchLineItemDataRows,
  2672  	}
  2673  	consumer := &csvRowConsumer{importCtx: importCtx, opts: &roachpb.CSVOptions{}}
  2674  	b.ResetTimer()
  2675  	require.NoError(b, runParallelImport(ctx, importCtx, &importFileContext{}, producer, consumer))
  2676  	close(kvCh)
  2677  	b.ReportAllocs()
  2678  }
  2679  
  2680  // goos: darwin
  2681  // goarch: amd64
  2682  // pkg: github.com/cockroachdb/cockroach/pkg/ccl/importccl
  2683  // BenchmarkDelimitedConvertRecord-16    	  500000	      2473 ns/op	  48.51 MB/s
  2684  // BenchmarkDelimitedConvertRecord-16    	  500000	      2580 ns/op	  46.51 MB/s
  2685  // BenchmarkDelimitedConvertRecord-16    	  500000	      2678 ns/op	  44.80 MB/s
  2686  // BenchmarkDelimitedConvertRecord-16    	  500000	      2897 ns/op	  41.41 MB/s
  2687  // BenchmarkDelimitedConvertRecord-16    	  500000	      3250 ns/op	  36.92 MB/s
  2688  // BenchmarkDelimitedConvertRecord-16    	  500000	      3261 ns/op	  36.80 MB/s
  2689  // BenchmarkDelimitedConvertRecord-16    	  500000	      3016 ns/op	  39.79 MB/s
  2690  // BenchmarkDelimitedConvertRecord-16    	  500000	      2943 ns/op	  40.77 MB/s
  2691  // BenchmarkDelimitedConvertRecord-16    	  500000	      3004 ns/op	  39.94 MB/s
  2692  // BenchmarkDelimitedConvertRecord-16    	  500000	      2966 ns/op	  40.45 MB/s
  2693  func BenchmarkDelimitedConvertRecord(b *testing.B) {
  2694  	ctx := context.Background()
  2695  
  2696  	tpchLineItemDataRows := [][]string{
  2697  		{"1", "155190", "7706", "1", "17", "21168.23", "0.04", "0.02", "N", "O", "1996-03-13", "1996-02-12", "1996-03-22", "DELIVER IN PERSON", "TRUCK", "egular courts above the"},
  2698  		{"1", "67310", "7311", "2", "36", "45983.16", "0.09", "0.06", "N", "O", "1996-04-12", "1996-02-28", "1996-04-20", "TAKE BACK RETURN", "MAIL", "ly final dependencies: slyly bold "},
  2699  		{"1", "63700", "3701", "3", "8", "13309.60", "0.10", "0.02", "N", "O", "1996-01-29", "1996-03-05", "1996-01-31", "TAKE BACK RETURN", "REG AIR", "riously. regular, express dep"},
  2700  		{"1", "2132", "4633", "4", "28", "28955.64", "0.09", "0.06", "N", "O", "1996-04-21", "1996-03-30", "1996-05-16", "NONE", "AIR", "lites. fluffily even de"},
  2701  		{"1", "24027", "1534", "5", "24", "22824.48", "0.10", "0.04", "N", "O", "1996-03-30", "1996-03-14", "1996-04-01", "NONE", "FOB", " pending foxes. slyly re"},
  2702  		{"1", "15635", "638", "6", "32", "49620.16", "0.07", "0.02", "N", "O", "1996-01-30", "1996-02-07", "1996-02-03", "DELIVER IN PERSON", "MAIL", "arefully slyly ex"},
  2703  		{"2", "106170", "1191", "1", "38", "44694.46", "0.00", "0.05", "N", "O", "1997-01-28", "1997-01-14", "1997-02-02", "TAKE BACK RETURN", "RAIL", "ven requests. deposits breach a"},
  2704  		{"3", "4297", "1798", "1", "45", "54058.05", "0.06", "0.00", "R", "F", "1994-02-02", "1994-01-04", "1994-02-23", "NONE", "AIR", "ongside of the furiously brave acco"},
  2705  		{"3", "19036", "6540", "2", "49", "46796.47", "0.10", "0.00", "R", "F", "1993-11-09", "1993-12-20", "1993-11-24", "TAKE BACK RETURN", "RAIL", " unusual accounts. eve"},
  2706  		{"3", "128449", "3474", "3", "27", "39890.88", "0.06", "0.07", "A", "F", "1994-01-16", "1993-11-22", "1994-01-23", "DELIVER IN PERSON", "SHIP", "nal foxes wake."},
  2707  	}
  2708  	b.SetBytes(120) // Raw input size. With 8 indexes, expect more on output side.
  2709  
  2710  	stmt, err := parser.ParseOne(`CREATE TABLE lineitem (
  2711  		l_orderkey      INT8 NOT NULL,
  2712  		l_partkey       INT8 NOT NULL,
  2713  		l_suppkey       INT8 NOT NULL,
  2714  		l_linenumber    INT8 NOT NULL,
  2715  		l_quantity      DECIMAL(15,2) NOT NULL,
  2716  		l_extendedprice DECIMAL(15,2) NOT NULL,
  2717  		l_discount      DECIMAL(15,2) NOT NULL,
  2718  		l_tax           DECIMAL(15,2) NOT NULL,
  2719  		l_returnflag    CHAR(1) NOT NULL,
  2720  		l_linestatus    CHAR(1) NOT NULL,
  2721  		l_shipdate      DATE NOT NULL,
  2722  		l_commitdate    DATE NOT NULL,
  2723  		l_receiptdate   DATE NOT NULL,
  2724  		l_shipinstruct  CHAR(25) NOT NULL,
  2725  		l_shipmode      CHAR(10) NOT NULL,
  2726  		l_comment       VARCHAR(44) NOT NULL,
  2727  		PRIMARY KEY     (l_orderkey, l_linenumber),
  2728  		INDEX l_ok      (l_orderkey ASC),
  2729  		INDEX l_pk      (l_partkey ASC),
  2730  		INDEX l_sk      (l_suppkey ASC),
  2731  		INDEX l_sd      (l_shipdate ASC),
  2732  		INDEX l_cd      (l_commitdate ASC),
  2733  		INDEX l_rd      (l_receiptdate ASC),
  2734  		INDEX l_pk_sk   (l_partkey ASC, l_suppkey ASC),
  2735  		INDEX l_sk_pk   (l_suppkey ASC, l_partkey ASC)
  2736  	)`)
  2737  	if err != nil {
  2738  		b.Fatal(err)
  2739  	}
  2740  	create := stmt.AST.(*tree.CreateTable)
  2741  	st := cluster.MakeTestingClusterSettings()
  2742  	evalCtx := tree.MakeTestingEvalContext(st)
  2743  
  2744  	tableDesc, err := MakeSimpleTableDescriptor(ctx, st, create, sqlbase.ID(100), sqlbase.ID(100), NoFKs, 1)
  2745  	if err != nil {
  2746  		b.Fatal(err)
  2747  	}
  2748  
  2749  	kvCh := make(chan row.KVBatch)
  2750  	// no-op drain kvs channel.
  2751  	go func() {
  2752  		for range kvCh {
  2753  		}
  2754  	}()
  2755  
  2756  	descr := tableDesc.TableDesc()
  2757  	cols := make(tree.NameList, len(descr.Columns))
  2758  	for i, col := range descr.Columns {
  2759  		cols[i] = tree.Name(col.Name)
  2760  	}
  2761  	r, err := newMysqloutfileReader(roachpb.MySQLOutfileOptions{
  2762  		RowSeparator:   '\n',
  2763  		FieldSeparator: '\t',
  2764  	}, kvCh, 0, 0, descr, &evalCtx)
  2765  	require.NoError(b, err)
  2766  
  2767  	producer := &csvBenchmarkStream{
  2768  		n:    b.N,
  2769  		pos:  0,
  2770  		data: tpchLineItemDataRows,
  2771  	}
  2772  
  2773  	delimited := &fileReader{Reader: producer}
  2774  	b.ResetTimer()
  2775  	require.NoError(b, r.readFile(ctx, delimited, 0, 0, nil))
  2776  	close(kvCh)
  2777  	b.ReportAllocs()
  2778  }
  2779  
  2780  // TestImportControlJob tests that PAUSE JOB, RESUME JOB, and CANCEL JOB
  2781  // work as intended on import jobs.
  2782  func TestImportControlJob(t *testing.T) {
  2783  	defer leaktest.AfterTest(t)()
  2784  
  2785  	t.Skip("TODO(dt): add knob to force faster progress checks.")
  2786  
  2787  	defer func(oldInterval time.Duration) {
  2788  		jobs.DefaultAdoptInterval = oldInterval
  2789  	}(jobs.DefaultAdoptInterval)
  2790  	jobs.DefaultAdoptInterval = 100 * time.Millisecond
  2791  
  2792  	var serverArgs base.TestServerArgs
  2793  	// Disable external processing of mutations so that the final check of
  2794  	// crdb_internal.tables is guaranteed to not be cleaned up. Although this
  2795  	// was never observed by a stress test, it is here for safety.
  2796  	serverArgs.Knobs.SQLSchemaChanger = &sql.SchemaChangerTestingKnobs{
  2797  		// TODO (lucy): if/when this test gets reinstated, figure out what knobs are
  2798  		// needed.
  2799  	}
  2800  
  2801  	var allowResponse chan struct{}
  2802  	params := base.TestClusterArgs{ServerArgs: serverArgs}
  2803  	params.ServerArgs.Knobs.Store = &kvserver.StoreTestingKnobs{
  2804  		TestingResponseFilter: jobutils.BulkOpResponseFilter(&allowResponse),
  2805  	}
  2806  
  2807  	ctx := context.Background()
  2808  	tc := testcluster.StartTestCluster(t, 1, params)
  2809  	defer tc.Stopper().Stop(ctx)
  2810  	sqlDB := sqlutils.MakeSQLRunner(tc.Conns[0])
  2811  	sqlDB.Exec(t, `CREATE DATABASE data`)
  2812  
  2813  	makeSrv := func() *httptest.Server {
  2814  		var once sync.Once
  2815  		return httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
  2816  			if r.Method == "GET" {
  2817  				// The following code correctly handles both the case where, after the
  2818  				// CANCEL JOB is issued, the second stage of the IMPORT (the shuffle,
  2819  				// after the sampling) may or may not be started. If it was started, then a
  2820  				// second GET request is done. The once here will cause that request to not
  2821  				// block. The draining for loop below will cause jobutils.RunJob's second send
  2822  				// on allowResponse to succeed (which it does after issuing the CANCEL JOB).
  2823  				once.Do(func() {
  2824  					<-allowResponse
  2825  					go func() {
  2826  						for range allowResponse {
  2827  						}
  2828  					}()
  2829  				})
  2830  
  2831  				_, _ = w.Write([]byte(r.URL.Path[1:]))
  2832  			}
  2833  		}))
  2834  	}
  2835  
  2836  	t.Run("cancel", func(t *testing.T) {
  2837  		sqlDB.Exec(t, `CREATE DATABASE cancelimport`)
  2838  
  2839  		srv := makeSrv()
  2840  		defer srv.Close()
  2841  
  2842  		var urls []string
  2843  		for i := 0; i < 10; i++ {
  2844  			urls = append(urls, fmt.Sprintf("'%s/%d'", srv.URL, i))
  2845  		}
  2846  		csvURLs := strings.Join(urls, ", ")
  2847  
  2848  		query := fmt.Sprintf(`IMPORT TABLE cancelimport.t (i INT8 PRIMARY KEY) CSV DATA (%s)`, csvURLs)
  2849  
  2850  		if _, err := jobutils.RunJob(
  2851  			t, sqlDB, &allowResponse, []string{"cancel"}, query,
  2852  		); !testutils.IsError(err, "job canceled") {
  2853  			t.Fatalf("expected 'job canceled' error, but got %+v", err)
  2854  		}
  2855  		// Check that executing again succeeds. This won't work if the first import
  2856  		// was not successfully canceled.
  2857  		sqlDB.Exec(t, query)
  2858  	})
  2859  
  2860  	t.Run("pause", func(t *testing.T) {
  2861  		// Test that IMPORT can be paused and resumed. This test also attempts to
  2862  		// only pause the job after it has begun splitting ranges. When the job
  2863  		// is resumed, if the sampling phase is re-run, the splits points will
  2864  		// differ. When AddSSTable attempts to import the new ranges, they will
  2865  		// fail because there is an existing split in the key space that it cannot
  2866  		// handle. Use a sstsize that will more-or-less (since it is statistical)
  2867  		// always cause this condition.
  2868  
  2869  		sqlDB.Exec(t, `CREATE DATABASE pauseimport`)
  2870  
  2871  		srv := makeSrv()
  2872  		defer srv.Close()
  2873  
  2874  		count := 100
  2875  		// This test takes a while with the race detector, so reduce the number of
  2876  		// files in an attempt to speed it up.
  2877  		if util.RaceEnabled {
  2878  			count = 20
  2879  		}
  2880  
  2881  		urls := make([]string, count)
  2882  		for i := 0; i < count; i++ {
  2883  			urls[i] = fmt.Sprintf("'%s/%d'", srv.URL, i)
  2884  		}
  2885  		csvURLs := strings.Join(urls, ", ")
  2886  		query := fmt.Sprintf(`IMPORT TABLE pauseimport.t (i INT8 PRIMARY KEY) CSV DATA (%s) WITH sstsize = '50B'`, csvURLs)
  2887  
  2888  		jobID, err := jobutils.RunJob(
  2889  			t, sqlDB, &allowResponse, []string{"PAUSE"}, query,
  2890  		)
  2891  		if !testutils.IsError(err, "job paused") {
  2892  			t.Fatalf("unexpected: %v", err)
  2893  		}
  2894  		sqlDB.Exec(t, fmt.Sprintf(`RESUME JOB %d`, jobID))
  2895  		jobutils.WaitForJob(t, sqlDB, jobID)
  2896  		sqlDB.CheckQueryResults(t,
  2897  			`SELECT * FROM pauseimport.t ORDER BY i`,
  2898  			sqlDB.QueryStr(t, `SELECT * FROM generate_series(0, $1)`, count-1),
  2899  		)
  2900  	})
  2901  }
  2902  
  2903  // TestImportWorkerFailure tests that IMPORT can restart after the failure
  2904  // of a worker node.
  2905  func TestImportWorkerFailure(t *testing.T) {
  2906  	defer leaktest.AfterTest(t)()
  2907  
  2908  	// TODO(mjibson): Although this test passes most of the time it still
  2909  	// sometimes fails because not all kinds of failures caused by shutting a
  2910  	// node down are detected and retried.
  2911  	t.Skip("flaky due to undetected kinds of failures when the node is shutdown")
  2912  
  2913  	defer func(oldInterval time.Duration) {
  2914  		jobs.DefaultAdoptInterval = oldInterval
  2915  	}(jobs.DefaultAdoptInterval)
  2916  	jobs.DefaultAdoptInterval = 100 * time.Millisecond
  2917  
  2918  	allowResponse := make(chan struct{})
  2919  	params := base.TestClusterArgs{}
  2920  	params.ServerArgs.Knobs.Store = &kvserver.StoreTestingKnobs{
  2921  		TestingResponseFilter: jobutils.BulkOpResponseFilter(&allowResponse),
  2922  	}
  2923  
  2924  	ctx := context.Background()
  2925  	tc := testcluster.StartTestCluster(t, 3, params)
  2926  	defer tc.Stopper().Stop(ctx)
  2927  	conn := tc.Conns[0]
  2928  	sqlDB := sqlutils.MakeSQLRunner(conn)
  2929  
  2930  	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
  2931  		if r.Method == "GET" {
  2932  			_, _ = w.Write([]byte(r.URL.Path[1:]))
  2933  		}
  2934  	}))
  2935  	defer srv.Close()
  2936  
  2937  	count := 20
  2938  	urls := make([]string, count)
  2939  	for i := 0; i < count; i++ {
  2940  		urls[i] = fmt.Sprintf("'%s/%d'", srv.URL, i)
  2941  	}
  2942  	csvURLs := strings.Join(urls, ", ")
  2943  	query := fmt.Sprintf(`IMPORT TABLE t (i INT8 PRIMARY KEY) CSV DATA (%s) WITH sstsize = '1B'`, csvURLs)
  2944  
  2945  	errCh := make(chan error)
  2946  	go func() {
  2947  		_, err := conn.Exec(query)
  2948  		errCh <- err
  2949  	}()
  2950  	select {
  2951  	case allowResponse <- struct{}{}:
  2952  	case err := <-errCh:
  2953  		t.Fatalf("%s: query returned before expected: %s", err, query)
  2954  	}
  2955  	var jobID int64
  2956  	sqlDB.QueryRow(t, `SELECT id FROM system.jobs ORDER BY created DESC LIMIT 1`).Scan(&jobID)
  2957  
  2958  	// Shut down a node. This should force LoadCSV to fail in its current
  2959  	// execution. It should detect this as a context canceled error.
  2960  	tc.StopServer(1)
  2961  
  2962  	close(allowResponse)
  2963  	// We expect the statement to fail.
  2964  	if err := <-errCh; !testutils.IsError(err, "node failure") {
  2965  		t.Fatal(err)
  2966  	}
  2967  
  2968  	// But the job should be restarted and succeed eventually.
  2969  	jobutils.WaitForJob(t, sqlDB, jobID)
  2970  	sqlDB.CheckQueryResults(t,
  2971  		`SELECT * FROM t ORDER BY i`,
  2972  		sqlDB.QueryStr(t, `SELECT * FROM generate_series(0, $1)`, count-1),
  2973  	)
  2974  }
  2975  
  2976  // TestImportLivenessWithRestart tests that a node liveness transition
  2977  // during IMPORT correctly resumes after the node executing the job
  2978  // becomes non-live (from the perspective of the jobs registry).
  2979  //
  2980  // Its actual purpose is to address the second bug listed in #22924 about
  2981  // the addsstable arguments not in request range. The theory was that the
  2982  // restart in that issue was caused by node liveness and that the work
  2983  // already performed (the splits and addsstables) somehow caused the second
  2984  // error. However this does not appear to be the case, as running many stress
  2985  // iterations with differing constants (rows, sstsize, kv.bulk_ingest.batch_size)
  2986  // was not able to fail in the way listed by the second bug.
  2987  func TestImportLivenessWithRestart(t *testing.T) {
  2988  	defer leaktest.AfterTest(t)()
  2989  
  2990  	t.Skip("TODO(dt): this relies on chunking done by prior version of IMPORT." +
  2991  		"Rework this test, or replace it with resume-tests + jobs infra tests.")
  2992  
  2993  	defer func(oldInterval time.Duration) {
  2994  		jobs.DefaultAdoptInterval = oldInterval
  2995  	}(jobs.DefaultAdoptInterval)
  2996  	jobs.DefaultAdoptInterval = 100 * time.Millisecond
  2997  	jobs.DefaultCancelInterval = 100 * time.Millisecond
  2998  
  2999  	const nodes = 1
  3000  	nl := jobs.NewFakeNodeLiveness(nodes)
  3001  	serverArgs := base.TestServerArgs{
  3002  		Knobs: base.TestingKnobs{
  3003  			RegistryLiveness: nl,
  3004  		},
  3005  	}
  3006  
  3007  	var allowResponse chan struct{}
  3008  	params := base.TestClusterArgs{ServerArgs: serverArgs}
  3009  	params.ServerArgs.Knobs.Store = &kvserver.StoreTestingKnobs{
  3010  		TestingResponseFilter: jobutils.BulkOpResponseFilter(&allowResponse),
  3011  	}
  3012  
  3013  	ctx := context.Background()
  3014  	tc := testcluster.StartTestCluster(t, nodes, params)
  3015  	defer tc.Stopper().Stop(ctx)
  3016  	conn := tc.Conns[0]
  3017  	sqlDB := sqlutils.MakeSQLRunner(conn)
  3018  
  3019  	// Prevent hung HTTP connections in leaktest.
  3020  	sqlDB.Exec(t, `SET CLUSTER SETTING cloudstorage.timeout = '3s'`)
  3021  
  3022  	sqlDB.Exec(t, `SET CLUSTER SETTING kv.bulk_ingest.batch_size = '300B'`)
  3023  	sqlDB.Exec(t, `CREATE DATABASE liveness`)
  3024  
  3025  	const rows = 5000
  3026  	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
  3027  		if r.Method == "GET" {
  3028  			for i := 0; i < rows; i++ {
  3029  				fmt.Fprintln(w, i)
  3030  			}
  3031  		}
  3032  	}))
  3033  	defer srv.Close()
  3034  
  3035  	const query = `IMPORT TABLE liveness.t (i INT8 PRIMARY KEY) CSV DATA ($1) WITH sstsize = '500B', experimental_sorted_ingestion`
  3036  
  3037  	// Start an IMPORT and wait until it's done one addsstable.
  3038  	allowResponse = make(chan struct{})
  3039  	errCh := make(chan error)
  3040  	go func() {
  3041  		_, err := conn.Exec(query, srv.URL)
  3042  		errCh <- err
  3043  	}()
  3044  	// Allow many, but not all, addsstables to complete.
  3045  	for i := 0; i < 50; i++ {
  3046  		select {
  3047  		case allowResponse <- struct{}{}:
  3048  		case err := <-errCh:
  3049  			t.Fatal(err)
  3050  		}
  3051  	}
  3052  	// Fetch the new job ID and lease since we know it's running now.
  3053  	var jobID int64
  3054  	originalLease := &jobspb.Progress{}
  3055  	{
  3056  		var expectedLeaseBytes []byte
  3057  		sqlDB.QueryRow(
  3058  			t, `SELECT id, progress FROM system.jobs ORDER BY created DESC LIMIT 1`,
  3059  		).Scan(&jobID, &expectedLeaseBytes)
  3060  		if err := protoutil.Unmarshal(expectedLeaseBytes, originalLease); err != nil {
  3061  			t.Fatal(err)
  3062  		}
  3063  	}
  3064  
  3065  	// addsstable is done, make the node non-live and wait for cancellation
  3066  	nl.FakeSetExpiration(1, hlc.MinTimestamp)
  3067  	// Wait for the registry cancel loop to run and cancel the job.
  3068  	<-nl.SelfCalledCh
  3069  	<-nl.SelfCalledCh
  3070  	close(allowResponse)
  3071  	err := <-errCh
  3072  	if !testutils.IsError(err, "job .*: node liveness error") {
  3073  		t.Fatalf("unexpected: %v", err)
  3074  	}
  3075  
  3076  	// Ensure that partial progress has been recorded
  3077  	partialProgress := jobutils.GetJobProgress(t, sqlDB, jobID)
  3078  	if len(partialProgress.Details.(*jobspb.Progress_Import).Import.SpanProgress) == 0 {
  3079  		t.Fatal("no partial import progress detected")
  3080  	}
  3081  
  3082  	// Make the node live again
  3083  	nl.FakeSetExpiration(1, hlc.MaxTimestamp)
  3084  	// The registry should now adopt the job and resume it.
  3085  	jobutils.WaitForJob(t, sqlDB, jobID)
  3086  	// Verify that the job lease was updated
  3087  	rescheduledProgress := jobutils.GetJobProgress(t, sqlDB, jobID)
  3088  	if rescheduledProgress.ModifiedMicros <= originalLease.ModifiedMicros {
  3089  		t.Fatalf("expecting rescheduled job to have a later modification time: %d vs %d",
  3090  			rescheduledProgress.ModifiedMicros, originalLease.ModifiedMicros)
  3091  	}
  3092  
  3093  	// Verify that all expected rows are present after a stop/start cycle.
  3094  	var rowCount int
  3095  	sqlDB.QueryRow(t, "SELECT count(*) from liveness.t").Scan(&rowCount)
  3096  	if rowCount != rows {
  3097  		t.Fatalf("not all rows were present.  Expecting %d, had %d", rows, rowCount)
  3098  	}
  3099  
  3100  	// Verify that all write progress coalesced into a single span
  3101  	// encompassing the entire table.
  3102  	spans := rescheduledProgress.Details.(*jobspb.Progress_Import).Import.SpanProgress
  3103  	if len(spans) != 1 {
  3104  		t.Fatalf("expecting only a single progress span, had %d\n%s", len(spans), spans)
  3105  	}
  3106  
  3107  	// Ensure that an entire table range is marked as complete
  3108  	tableSpan := roachpb.Span{
  3109  		Key:    keys.MinKey,
  3110  		EndKey: keys.MaxKey,
  3111  	}
  3112  	if !tableSpan.EqualValue(spans[0]) {
  3113  		t.Fatalf("expected entire table to be marked complete, had %s", spans[0])
  3114  	}
  3115  }
  3116  
  3117  // TestImportLivenessWithLeniency tests that a temporary node liveness
  3118  // transition during IMPORT doesn't cancel the job, but allows the
  3119  // owning node to continue processing.
  3120  func TestImportLivenessWithLeniency(t *testing.T) {
  3121  	defer leaktest.AfterTest(t)()
  3122  
  3123  	defer func(oldInterval time.Duration) {
  3124  		jobs.DefaultAdoptInterval = oldInterval
  3125  	}(jobs.DefaultAdoptInterval)
  3126  	jobs.DefaultAdoptInterval = 100 * time.Millisecond
  3127  	jobs.DefaultCancelInterval = 100 * time.Millisecond
  3128  
  3129  	const nodes = 1
  3130  	nl := jobs.NewFakeNodeLiveness(nodes)
  3131  	serverArgs := base.TestServerArgs{
  3132  		Knobs: base.TestingKnobs{
  3133  			RegistryLiveness: nl,
  3134  		},
  3135  	}
  3136  
  3137  	var allowResponse chan struct{}
  3138  	params := base.TestClusterArgs{ServerArgs: serverArgs}
  3139  	params.ServerArgs.Knobs.Store = &kvserver.StoreTestingKnobs{
  3140  		TestingResponseFilter: jobutils.BulkOpResponseFilter(&allowResponse),
  3141  	}
  3142  
  3143  	ctx := context.Background()
  3144  	tc := testcluster.StartTestCluster(t, nodes, params)
  3145  	defer tc.Stopper().Stop(ctx)
  3146  	conn := tc.Conns[0]
  3147  	sqlDB := sqlutils.MakeSQLRunner(conn)
  3148  
  3149  	// Prevent hung HTTP connections in leaktest.
  3150  	sqlDB.Exec(t, `SET CLUSTER SETTING cloudstorage.timeout = '3s'`)
  3151  	// We want to know exactly how much leniency is configured.
  3152  	sqlDB.Exec(t, `SET CLUSTER SETTING jobs.registry.leniency = '1m'`)
  3153  	sqlDB.Exec(t, `SET CLUSTER SETTING kv.bulk_ingest.batch_size = '300B'`)
  3154  	sqlDB.Exec(t, `CREATE DATABASE liveness`)
  3155  
  3156  	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
  3157  		const rows = 5000
  3158  		if r.Method == "GET" {
  3159  			for i := 0; i < rows; i++ {
  3160  				fmt.Fprintln(w, i)
  3161  			}
  3162  		}
  3163  	}))
  3164  	defer srv.Close()
  3165  
  3166  	const query = `IMPORT TABLE liveness.t (i INT8 PRIMARY KEY) CSV DATA ($1) WITH sstsize = '500B'`
  3167  
  3168  	// Start an IMPORT and wait until it's done one addsstable.
  3169  	allowResponse = make(chan struct{})
  3170  	errCh := make(chan error)
  3171  	go func() {
  3172  		_, err := conn.Exec(query, srv.URL)
  3173  		errCh <- err
  3174  	}()
  3175  	// Allow many, but not all, addsstables to complete.
  3176  	for i := 0; i < 50; i++ {
  3177  		select {
  3178  		case allowResponse <- struct{}{}:
  3179  		case err := <-errCh:
  3180  			t.Fatal(err)
  3181  		}
  3182  	}
  3183  	// Fetch the new job ID and lease since we know it's running now.
  3184  	var jobID int64
  3185  	originalLease := &jobspb.Payload{}
  3186  	{
  3187  		var expectedLeaseBytes []byte
  3188  		sqlDB.QueryRow(
  3189  			t, `SELECT id, payload FROM system.jobs ORDER BY created DESC LIMIT 1`,
  3190  		).Scan(&jobID, &expectedLeaseBytes)
  3191  		if err := protoutil.Unmarshal(expectedLeaseBytes, originalLease); err != nil {
  3192  			t.Fatal(err)
  3193  		}
  3194  	}
  3195  
  3196  	// addsstable is done, make the node slightly tardy.
  3197  	nl.FakeSetExpiration(1, hlc.Timestamp{
  3198  		WallTime: hlc.UnixNano() - (15 * time.Second).Nanoseconds(),
  3199  	})
  3200  
  3201  	// Wait for the registry cancel loop to run and not cancel the job.
  3202  	<-nl.SelfCalledCh
  3203  	<-nl.SelfCalledCh
  3204  	close(allowResponse)
  3205  
  3206  	// Set the node to be fully live again.  This prevents the registry
  3207  	// from canceling all of the jobs if the test node is saturated
  3208  	// and the import runs slowly.
  3209  	nl.FakeSetExpiration(1, hlc.MaxTimestamp)
  3210  
  3211  	// Verify that the client didn't see anything amiss.
  3212  	if err := <-errCh; err != nil {
  3213  		t.Fatalf("import job should have completed: %s", err)
  3214  	}
  3215  
  3216  	// The job should have completed normally.
  3217  	jobutils.WaitForJob(t, sqlDB, jobID)
  3218  }
  3219  
  3220  // TestImportMVCCChecksums verifies that MVCC checksums are correctly
  3221  // computed by issuing a secondary index change that runs a CPut on the
  3222  // index. See #23984.
  3223  func TestImportMVCCChecksums(t *testing.T) {
  3224  	defer leaktest.AfterTest(t)()
  3225  
  3226  	s, db, _ := serverutils.StartServer(t, base.TestServerArgs{})
  3227  	ctx := context.Background()
  3228  	defer s.Stopper().Stop(ctx)
  3229  	sqlDB := sqlutils.MakeSQLRunner(db)
  3230  
  3231  	sqlDB.Exec(t, `CREATE DATABASE d`)
  3232  
  3233  	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
  3234  		if r.Method == "GET" {
  3235  			fmt.Fprint(w, "1,1,1")
  3236  		}
  3237  	}))
  3238  	defer srv.Close()
  3239  
  3240  	sqlDB.Exec(t, `IMPORT TABLE d.t (
  3241  		a INT8 PRIMARY KEY,
  3242  		b INT8,
  3243  		c INT8,
  3244  		INDEX (b) STORING (c)
  3245  	) CSV DATA ($1)`, srv.URL)
  3246  	sqlDB.Exec(t, `UPDATE d.t SET c = 2 WHERE a = 1`)
  3247  }
  3248  
  3249  func TestImportMysql(t *testing.T) {
  3250  	defer leaktest.AfterTest(t)()
  3251  
  3252  	t.Skip("https://github.com/cockroachdb/cockroach/issues/40263")
  3253  
  3254  	const (
  3255  		nodes = 3
  3256  	)
  3257  	ctx := context.Background()
  3258  	baseDir := filepath.Join("testdata")
  3259  	args := base.TestServerArgs{ExternalIODir: baseDir}
  3260  	tc := testcluster.StartTestCluster(t, nodes, base.TestClusterArgs{ServerArgs: args})
  3261  	defer tc.Stopper().Stop(ctx)
  3262  	sqlDB := sqlutils.MakeSQLRunner(tc.Conns[0])
  3263  
  3264  	sqlDB.Exec(t, `SET CLUSTER SETTING kv.bulk_ingest.batch_size = '10KB'`)
  3265  	sqlDB.Exec(t, `CREATE DATABASE foo; SET DATABASE = foo`)
  3266  
  3267  	files := getMysqldumpTestdata(t)
  3268  	simple := []interface{}{fmt.Sprintf("nodelocal://0%s", strings.TrimPrefix(files.simple, baseDir))}
  3269  	second := []interface{}{fmt.Sprintf("nodelocal://0%s", strings.TrimPrefix(files.second, baseDir))}
  3270  	multitable := []interface{}{fmt.Sprintf("nodelocal://0%s", strings.TrimPrefix(files.wholeDB, baseDir))}
  3271  	multitableGz := []interface{}{fmt.Sprintf("nodelocal://0%s", strings.TrimPrefix(files.wholeDB+".gz", baseDir))}
  3272  	multitableBz := []interface{}{fmt.Sprintf("nodelocal://0%s", strings.TrimPrefix(files.wholeDB+".bz2", baseDir))}
  3273  
  3274  	const expectSimple, expectSecond, expectEverything = 1 << 0, 1 << 2, 1 << 3
  3275  	const expectAll = -1
  3276  	for _, c := range []struct {
  3277  		name     string
  3278  		expected int
  3279  		query    string
  3280  		args     []interface{}
  3281  	}{
  3282  		{`read data only`, expectSimple, `IMPORT TABLE simple (i INT8 PRIMARY KEY, s text, b bytea) MYSQLDUMP DATA ($1)`, simple},
  3283  		{`single table dump`, expectSimple, `IMPORT TABLE simple FROM MYSQLDUMP ($1)`, simple},
  3284  		{`second table dump`, expectSecond, `IMPORT TABLE second FROM MYSQLDUMP ($1) WITH skip_foreign_keys`, second},
  3285  		{`simple from multi`, expectSimple, `IMPORT TABLE simple FROM MYSQLDUMP ($1)`, multitable},
  3286  		{`second from multi`, expectSecond, `IMPORT TABLE second FROM MYSQLDUMP ($1) WITH skip_foreign_keys`, multitable},
  3287  		{`all from multi`, expectAll, `IMPORT MYSQLDUMP ($1)`, multitable},
  3288  		{`all from multi gzip`, expectAll, `IMPORT MYSQLDUMP ($1)`, multitableGz},
  3289  		{`all from multi bzip`, expectAll, `IMPORT MYSQLDUMP ($1)`, multitableBz},
  3290  	} {
  3291  		t.Run(c.name, func(t *testing.T) {
  3292  			sqlDB.Exec(t, `DROP TABLE IF EXISTS simple, second, third, everything CASCADE`)
  3293  			sqlDB.Exec(t, `DROP SEQUENCE IF EXISTS simple_auto_inc, third_auto_inc`)
  3294  			sqlDB.Exec(t, c.query, c.args...)
  3295  
  3296  			if c.expected&expectSimple != 0 {
  3297  				if c.name != "read data only" {
  3298  					sqlDB.Exec(t, "INSERT INTO simple (s) VALUES ('auto-inc')")
  3299  				}
  3300  
  3301  				for idx, row := range sqlDB.QueryStr(t, "SELECT * FROM simple ORDER BY i") {
  3302  					{
  3303  						if idx == len(simpleTestRows) {
  3304  							if expected, actual := "auto-inc", row[1]; expected != actual {
  3305  								t.Fatalf("expected rowi=%s string to be %q, got %q", row[0], expected, actual)
  3306  							}
  3307  							continue
  3308  						}
  3309  						expected, actual := simpleTestRows[idx].s, row[1]
  3310  						if expected == injectNull {
  3311  							expected = "NULL"
  3312  						}
  3313  						if expected != actual {
  3314  							t.Fatalf("expected rowi=%s string to be %q, got %q", row[0], expected, actual)
  3315  						}
  3316  					}
  3317  
  3318  					{
  3319  						expected, actual := simpleTestRows[idx].b, row[2]
  3320  						if expected == nil {
  3321  							expected = []byte("NULL")
  3322  						}
  3323  						if !bytes.Equal(expected, []byte(actual)) {
  3324  							t.Fatalf("expected rowi=%s bytes to be %q, got %q", row[0], expected, actual)
  3325  						}
  3326  					}
  3327  				}
  3328  			} else {
  3329  				sqlDB.ExpectErr(t, "does not exist", `SELECT 1 FROM simple LIMIT 1`)
  3330  			}
  3331  
  3332  			if c.expected&expectSecond != 0 {
  3333  				res := sqlDB.QueryStr(t, "SELECT * FROM second ORDER BY i")
  3334  				if expected, actual := secondTableRows, len(res); expected != actual {
  3335  					t.Fatalf("expected %d, got %d", expected, actual)
  3336  				}
  3337  				for _, row := range res {
  3338  					if i, j := row[0], row[1]; i != "-"+j {
  3339  						t.Fatalf("expected %s = - %s", i, j)
  3340  					}
  3341  				}
  3342  			} else {
  3343  				sqlDB.ExpectErr(t, "does not exist", `SELECT 1 FROM second LIMIT 1`)
  3344  			}
  3345  			if c.expected&expectEverything != 0 {
  3346  				res := sqlDB.QueryStr(t, "SELECT i, c, iw, fl, d53, j FROM everything ORDER BY i")
  3347  				if expected, actual := len(everythingTestRows), len(res); expected != actual {
  3348  					t.Fatalf("expected %d, got %d", expected, actual)
  3349  				}
  3350  				for i := range res {
  3351  					if got, expected := res[i][0], fmt.Sprintf("%v", everythingTestRows[i].i); got != expected {
  3352  						t.Fatalf("expected %s got %s", expected, got)
  3353  					}
  3354  					if got, expected := res[i][1], everythingTestRows[i].c; got != expected {
  3355  						t.Fatalf("expected %s got %s", expected, got)
  3356  					}
  3357  					if got, expected := res[i][2], fmt.Sprintf("%v", everythingTestRows[i].iw); got != expected {
  3358  						t.Fatalf("expected %s got %s", expected, got)
  3359  					}
  3360  					if got, expected := res[i][3], fmt.Sprintf("%v", everythingTestRows[i].fl); got != expected {
  3361  						t.Fatalf("expected %s got %s", expected, got)
  3362  					}
  3363  					if got, expected := res[i][4], everythingTestRows[i].d53; got != expected {
  3364  						t.Fatalf("expected %s got %s", expected, got)
  3365  					}
  3366  					if got, expected := res[i][5], everythingTestRows[i].j; got != expected {
  3367  						t.Fatalf("expected %s got %s", expected, got)
  3368  					}
  3369  				}
  3370  			} else {
  3371  				sqlDB.ExpectErr(t, "does not exist", `SELECT 1 FROM everything LIMIT 1`)
  3372  			}
  3373  		})
  3374  	}
  3375  }
  3376  
  3377  func TestImportMysqlOutfile(t *testing.T) {
  3378  	defer leaktest.AfterTest(t)()
  3379  
  3380  	const (
  3381  		nodes = 3
  3382  	)
  3383  	ctx := context.Background()
  3384  	baseDir := filepath.Join("testdata", "mysqlout")
  3385  	args := base.TestServerArgs{ExternalIODir: baseDir}
  3386  	tc := testcluster.StartTestCluster(t, nodes, base.TestClusterArgs{ServerArgs: args})
  3387  	defer tc.Stopper().Stop(ctx)
  3388  	conn := tc.Conns[0]
  3389  	sqlDB := sqlutils.MakeSQLRunner(conn)
  3390  
  3391  	sqlDB.Exec(t, `SET CLUSTER SETTING kv.bulk_ingest.batch_size = '10KB'`)
  3392  	sqlDB.Exec(t, `CREATE DATABASE foo; SET DATABASE = foo`)
  3393  
  3394  	testRows, configs := getMysqlOutfileTestdata(t)
  3395  
  3396  	for i, cfg := range configs {
  3397  		t.Run(cfg.name, func(t *testing.T) {
  3398  			var opts []interface{}
  3399  
  3400  			cmd := fmt.Sprintf(`IMPORT TABLE test%d (i INT8 PRIMARY KEY, s text, b bytea) DELIMITED DATA ($1)`, i)
  3401  			opts = append(opts, fmt.Sprintf("nodelocal://0%s", strings.TrimPrefix(cfg.filename, baseDir)))
  3402  
  3403  			var flags []string
  3404  			if cfg.opts.RowSeparator != '\n' {
  3405  				opts = append(opts, string(cfg.opts.RowSeparator))
  3406  				flags = append(flags, fmt.Sprintf("rows_terminated_by = $%d", len(opts)))
  3407  			}
  3408  			if cfg.opts.FieldSeparator != '\t' {
  3409  				opts = append(opts, string(cfg.opts.FieldSeparator))
  3410  				flags = append(flags, fmt.Sprintf("fields_terminated_by = $%d", len(opts)))
  3411  			}
  3412  			if cfg.opts.Enclose == roachpb.MySQLOutfileOptions_Always {
  3413  				opts = append(opts, string(cfg.opts.Encloser))
  3414  				flags = append(flags, fmt.Sprintf("fields_enclosed_by = $%d", len(opts)))
  3415  			}
  3416  			if cfg.opts.HasEscape {
  3417  				opts = append(opts, string(cfg.opts.Escape))
  3418  				flags = append(flags, fmt.Sprintf("fields_escaped_by = $%d", len(opts)))
  3419  			}
  3420  			if len(flags) > 0 {
  3421  				cmd += " WITH " + strings.Join(flags, ", ")
  3422  			}
  3423  			sqlDB.Exec(t, cmd, opts...)
  3424  			for idx, row := range sqlDB.QueryStr(t, fmt.Sprintf("SELECT * FROM test%d ORDER BY i", i)) {
  3425  				expected, actual := testRows[idx].s, row[1]
  3426  				if expected == injectNull {
  3427  					expected = "NULL"
  3428  				}
  3429  
  3430  				if expected != actual {
  3431  					t.Fatalf("expected row i=%s string to be %q, got %q", row[0], expected, actual)
  3432  				}
  3433  			}
  3434  		})
  3435  	}
  3436  }
  3437  
  3438  func TestImportPgCopy(t *testing.T) {
  3439  	defer leaktest.AfterTest(t)()
  3440  
  3441  	const (
  3442  		nodes = 3
  3443  	)
  3444  	ctx := context.Background()
  3445  	baseDir := filepath.Join("testdata", "pgcopy")
  3446  	args := base.TestServerArgs{ExternalIODir: baseDir}
  3447  	tc := testcluster.StartTestCluster(t, nodes, base.TestClusterArgs{ServerArgs: args})
  3448  	defer tc.Stopper().Stop(ctx)
  3449  	conn := tc.Conns[0]
  3450  	sqlDB := sqlutils.MakeSQLRunner(conn)
  3451  
  3452  	sqlDB.Exec(t, `SET CLUSTER SETTING kv.bulk_ingest.batch_size = '10KB'`)
  3453  	sqlDB.Exec(t, `CREATE DATABASE foo; SET DATABASE = foo`)
  3454  
  3455  	testRows, configs := getPgCopyTestdata(t)
  3456  
  3457  	for i, cfg := range configs {
  3458  		t.Run(cfg.name, func(t *testing.T) {
  3459  			var opts []interface{}
  3460  
  3461  			cmd := fmt.Sprintf(`IMPORT TABLE test%d (i INT8 PRIMARY KEY, s text, b bytea) PGCOPY DATA ($1)`, i)
  3462  			opts = append(opts, fmt.Sprintf("nodelocal://0%s", strings.TrimPrefix(cfg.filename, baseDir)))
  3463  
  3464  			var flags []string
  3465  			if cfg.opts.Delimiter != '\t' {
  3466  				opts = append(opts, string(cfg.opts.Delimiter))
  3467  				flags = append(flags, fmt.Sprintf("delimiter = $%d", len(opts)))
  3468  			}
  3469  			if cfg.opts.Null != `\N` {
  3470  				opts = append(opts, cfg.opts.Null)
  3471  				flags = append(flags, fmt.Sprintf("nullif = $%d", len(opts)))
  3472  			}
  3473  			if len(flags) > 0 {
  3474  				cmd += " WITH " + strings.Join(flags, ", ")
  3475  			}
  3476  			t.Log(cmd, opts)
  3477  			sqlDB.Exec(t, cmd, opts...)
  3478  			for idx, row := range sqlDB.QueryStr(t, fmt.Sprintf("SELECT * FROM test%d ORDER BY i", i)) {
  3479  				{
  3480  					expected, actual := testRows[idx].s, row[1]
  3481  					if expected == injectNull {
  3482  						expected = "NULL"
  3483  					}
  3484  
  3485  					if expected != actual {
  3486  						t.Fatalf("expected row i=%s string to be %q, got %q", row[0], expected, actual)
  3487  					}
  3488  				}
  3489  
  3490  				{
  3491  					expected, actual := testRows[idx].b, row[2]
  3492  					if expected == nil {
  3493  						expected = []byte("NULL")
  3494  					}
  3495  					if !bytes.Equal(expected, []byte(actual)) {
  3496  						t.Fatalf("expected rowi=%s bytes to be %q, got %q", row[0], expected, actual)
  3497  					}
  3498  				}
  3499  			}
  3500  		})
  3501  	}
  3502  }
  3503  
  3504  func TestImportPgDump(t *testing.T) {
  3505  	defer leaktest.AfterTest(t)()
  3506  
  3507  	const (
  3508  		nodes = 3
  3509  	)
  3510  	ctx := context.Background()
  3511  	baseDir := filepath.Join("testdata")
  3512  	args := base.TestServerArgs{ExternalIODir: baseDir}
  3513  	tc := testcluster.StartTestCluster(t, nodes, base.TestClusterArgs{ServerArgs: args})
  3514  	defer tc.Stopper().Stop(ctx)
  3515  	conn := tc.Conns[0]
  3516  	sqlDB := sqlutils.MakeSQLRunner(conn)
  3517  
  3518  	sqlDB.Exec(t, `SET CLUSTER SETTING kv.bulk_ingest.batch_size = '10KB'`)
  3519  	sqlDB.Exec(t, `CREATE DATABASE foo; SET DATABASE = foo`)
  3520  
  3521  	simplePgTestRows, simpleFile := getSimplePostgresDumpTestdata(t)
  3522  	simple := []interface{}{fmt.Sprintf("nodelocal://0%s", strings.TrimPrefix(simpleFile, baseDir))}
  3523  	secondTableRowCount, secondFile := getSecondPostgresDumpTestdata(t)
  3524  	second := []interface{}{fmt.Sprintf("nodelocal://0%s", strings.TrimPrefix(secondFile, baseDir))}
  3525  	multitableFile := getMultiTablePostgresDumpTestdata(t)
  3526  	multitable := []interface{}{fmt.Sprintf("nodelocal://0%s", strings.TrimPrefix(multitableFile, baseDir))}
  3527  
  3528  	const expectAll, expectSimple, expectSecond = 1, 2, 3
  3529  
  3530  	for _, c := range []struct {
  3531  		name     string
  3532  		expected int
  3533  		query    string
  3534  		args     []interface{}
  3535  	}{
  3536  		{
  3537  			`read data only`,
  3538  			expectSimple,
  3539  			`IMPORT TABLE simple (
  3540  				i INT8,
  3541  				s text,
  3542  				b bytea,
  3543  				CONSTRAINT simple_pkey PRIMARY KEY (i),
  3544  				UNIQUE INDEX simple_b_s_idx (b, s),
  3545  				INDEX simple_s_idx (s)
  3546  			) PGDUMP DATA ($1)`,
  3547  			simple,
  3548  		},
  3549  		{`single table dump`, expectSimple, `IMPORT TABLE simple FROM PGDUMP ($1)`, simple},
  3550  		{`second table dump`, expectSecond, `IMPORT TABLE second FROM PGDUMP ($1)`, second},
  3551  		{`simple from multi`, expectSimple, `IMPORT TABLE simple FROM PGDUMP ($1)`, multitable},
  3552  		{`second from multi`, expectSecond, `IMPORT TABLE second FROM PGDUMP ($1)`, multitable},
  3553  		{`all from multi`, expectAll, `IMPORT PGDUMP ($1)`, multitable},
  3554  	} {
  3555  		t.Run(c.name, func(t *testing.T) {
  3556  			sqlDB.Exec(t, `DROP TABLE IF EXISTS simple, second`)
  3557  			sqlDB.Exec(t, c.query, c.args...)
  3558  
  3559  			if c.expected == expectSimple || c.expected == expectAll {
  3560  				// Verify table schema because PKs and indexes are at the bottom of pg_dump.
  3561  				sqlDB.CheckQueryResults(t, `SHOW CREATE TABLE simple`, [][]string{{
  3562  					"simple", `CREATE TABLE simple (
  3563  	i INT8 NOT NULL,
  3564  	s STRING NULL,
  3565  	b BYTES NULL,
  3566  	CONSTRAINT simple_pkey PRIMARY KEY (i ASC),
  3567  	UNIQUE INDEX simple_b_s_idx (b ASC, s ASC),
  3568  	INDEX simple_s_idx (s ASC),
  3569  	FAMILY "primary" (i, s, b)
  3570  )`,
  3571  				}})
  3572  
  3573  				rows := sqlDB.QueryStr(t, "SELECT * FROM simple ORDER BY i")
  3574  				if a, e := len(rows), len(simplePostgresTestRows); a != e {
  3575  					t.Fatalf("got %d rows, expected %d", a, e)
  3576  				}
  3577  
  3578  				for idx, row := range rows {
  3579  					{
  3580  						expected, actual := simplePostgresTestRows[idx].s, row[1]
  3581  						if expected == injectNull {
  3582  							expected = "NULL"
  3583  						}
  3584  						if expected != actual {
  3585  							t.Fatalf("expected rowi=%s string to be %q, got %q", row[0], expected, actual)
  3586  						}
  3587  					}
  3588  
  3589  					{
  3590  						expected, actual := simplePgTestRows[idx].b, row[2]
  3591  						if expected == nil {
  3592  							expected = []byte("NULL")
  3593  						}
  3594  						if !bytes.Equal(expected, []byte(actual)) {
  3595  							t.Fatalf("expected rowi=%s bytes to be %q, got %q", row[0], expected, actual)
  3596  						}
  3597  					}
  3598  				}
  3599  			}
  3600  
  3601  			if c.expected == expectSecond || c.expected == expectAll {
  3602  				// Verify table schema because PKs and indexes are at the bottom of pg_dump.
  3603  				sqlDB.CheckQueryResults(t, `SHOW CREATE TABLE second`, [][]string{{
  3604  					"second", `CREATE TABLE second (
  3605  	i INT8 NOT NULL,
  3606  	s STRING NULL,
  3607  	CONSTRAINT second_pkey PRIMARY KEY (i ASC),
  3608  	FAMILY "primary" (i, s)
  3609  )`,
  3610  				}})
  3611  				res := sqlDB.QueryStr(t, "SELECT * FROM second ORDER BY i")
  3612  				if expected, actual := secondTableRowCount, len(res); expected != actual {
  3613  					t.Fatalf("expected %d, got %d", expected, actual)
  3614  				}
  3615  				for _, row := range res {
  3616  					if i, s := row[0], row[1]; i != s {
  3617  						t.Fatalf("expected %s = %s", i, s)
  3618  					}
  3619  				}
  3620  			}
  3621  
  3622  			if c.expected == expectSecond {
  3623  				sqlDB.ExpectErr(t, "does not exist", `SELECT 1 FROM simple LIMIT 1`)
  3624  			}
  3625  			if c.expected == expectSimple {
  3626  				sqlDB.ExpectErr(t, "does not exist", `SELECT 1 FROM second LIMIT 1`)
  3627  			}
  3628  			if c.expected == expectAll {
  3629  				sqlDB.CheckQueryResults(t, `SHOW CREATE TABLE seqtable`, [][]string{{
  3630  					"seqtable", `CREATE TABLE seqtable (
  3631  	a INT8 NULL DEFAULT nextval('public.a_seq':::STRING),
  3632  	b INT8 NULL,
  3633  	FAMILY "primary" (a, b, rowid)
  3634  )`,
  3635  				}})
  3636  				sqlDB.CheckQueryResults(t, `SHOW CREATE SEQUENCE a_seq`, [][]string{{
  3637  					"a_seq", `CREATE SEQUENCE a_seq MINVALUE 1 MAXVALUE 9223372036854775807 INCREMENT 1 START 1`,
  3638  				}})
  3639  				sqlDB.CheckQueryResults(t, `select last_value from a_seq`, [][]string{{"7"}})
  3640  				sqlDB.CheckQueryResults(t,
  3641  					`SELECT * FROM seqtable ORDER BY a`,
  3642  					sqlDB.QueryStr(t, `select a+1, a*10 from generate_series(0, 6) a`),
  3643  				)
  3644  				sqlDB.CheckQueryResults(t, `select last_value from a_seq`, [][]string{{"7"}})
  3645  				// This can sometimes retry, so the next value might not be 8.
  3646  				sqlDB.Exec(t, `INSERT INTO seqtable (b) VALUES (70)`)
  3647  				sqlDB.CheckQueryResults(t, `select last_value >= 8 from a_seq`, [][]string{{"true"}})
  3648  				sqlDB.CheckQueryResults(t,
  3649  					`SELECT b FROM seqtable WHERE a = (SELECT last_value FROM a_seq)`,
  3650  					[][]string{{"70"}},
  3651  				)
  3652  			}
  3653  		})
  3654  	}
  3655  }
  3656  
  3657  func TestImportCockroachDump(t *testing.T) {
  3658  	defer leaktest.AfterTest(t)()
  3659  
  3660  	const (
  3661  		nodes = 3
  3662  	)
  3663  	ctx := context.Background()
  3664  	baseDir := filepath.Join("testdata")
  3665  	args := base.TestServerArgs{ExternalIODir: baseDir}
  3666  	tc := testcluster.StartTestCluster(t, nodes, base.TestClusterArgs{ServerArgs: args})
  3667  	defer tc.Stopper().Stop(ctx)
  3668  	conn := tc.Conns[0]
  3669  	sqlDB := sqlutils.MakeSQLRunner(conn)
  3670  
  3671  	sqlDB.Exec(t, "IMPORT PGDUMP ($1)", "nodelocal://0/cockroachdump/dump.sql")
  3672  	sqlDB.CheckQueryResults(t, "SELECT * FROM t ORDER BY i", [][]string{
  3673  		{"1", "test"},
  3674  		{"2", "other"},
  3675  	})
  3676  	sqlDB.CheckQueryResults(t, "SELECT * FROM a", [][]string{
  3677  		{"2"},
  3678  	})
  3679  	sqlDB.CheckQueryResults(t, "SHOW EXPERIMENTAL_FINGERPRINTS FROM TABLE t", [][]string{
  3680  		{"primary", "-6413178410144704641"},
  3681  		{"t_t_idx", "-4841734847805280813"},
  3682  	})
  3683  	sqlDB.CheckQueryResults(t, "SHOW EXPERIMENTAL_FINGERPRINTS FROM TABLE a", [][]string{
  3684  		{"primary", "-5808590958014384147"},
  3685  	})
  3686  	sqlDB.CheckQueryResults(t, "SHOW CREATE TABLE t", [][]string{
  3687  		{"t", `CREATE TABLE t (
  3688  	i INT8 NOT NULL,
  3689  	t STRING NULL,
  3690  	CONSTRAINT "primary" PRIMARY KEY (i ASC),
  3691  	INDEX t_t_idx (t ASC),
  3692  	FAMILY "primary" (i, t)
  3693  )`},
  3694  	})
  3695  	sqlDB.CheckQueryResults(t, "SHOW CREATE TABLE a", [][]string{
  3696  		{"a", `CREATE TABLE a (
  3697  	i INT8 NOT NULL,
  3698  	CONSTRAINT "primary" PRIMARY KEY (i ASC),
  3699  	CONSTRAINT fk_i_ref_t FOREIGN KEY (i) REFERENCES t(i),
  3700  	FAMILY "primary" (i)
  3701  )`},
  3702  	})
  3703  }
  3704  
  3705  func TestCreateStatsAfterImport(t *testing.T) {
  3706  	defer leaktest.AfterTest(t)()
  3707  
  3708  	defer func(oldRefreshInterval, oldAsOf time.Duration) {
  3709  		stats.DefaultRefreshInterval = oldRefreshInterval
  3710  		stats.DefaultAsOfTime = oldAsOf
  3711  	}(stats.DefaultRefreshInterval, stats.DefaultAsOfTime)
  3712  	stats.DefaultRefreshInterval = time.Millisecond
  3713  	stats.DefaultAsOfTime = time.Microsecond
  3714  
  3715  	const nodes = 1
  3716  	ctx := context.Background()
  3717  	baseDir := filepath.Join("testdata")
  3718  	args := base.TestServerArgs{ExternalIODir: baseDir}
  3719  	tc := testcluster.StartTestCluster(t, nodes, base.TestClusterArgs{ServerArgs: args})
  3720  	defer tc.Stopper().Stop(ctx)
  3721  	conn := tc.Conns[0]
  3722  	sqlDB := sqlutils.MakeSQLRunner(conn)
  3723  
  3724  	sqlDB.Exec(t, `SET CLUSTER SETTING sql.stats.automatic_collection.enabled=true`)
  3725  
  3726  	sqlDB.Exec(t, "IMPORT PGDUMP ($1)", "nodelocal://0/cockroachdump/dump.sql")
  3727  
  3728  	// Verify that statistics have been created.
  3729  	sqlDB.CheckQueryResultsRetry(t,
  3730  		`SELECT statistics_name, column_names, row_count, distinct_count, null_count
  3731  	  FROM [SHOW STATISTICS FOR TABLE t]`,
  3732  		[][]string{
  3733  			{"__auto__", "{i}", "2", "2", "0"},
  3734  			{"__auto__", "{t}", "2", "2", "0"},
  3735  		})
  3736  	sqlDB.CheckQueryResultsRetry(t,
  3737  		`SELECT statistics_name, column_names, row_count, distinct_count, null_count
  3738  	  FROM [SHOW STATISTICS FOR TABLE a]`,
  3739  		[][]string{
  3740  			{"__auto__", "{i}", "1", "1", "0"},
  3741  		})
  3742  }
  3743  
  3744  func TestImportAvro(t *testing.T) {
  3745  	defer leaktest.AfterTest(t)()
  3746  
  3747  	const (
  3748  		nodes = 3
  3749  	)
  3750  	ctx := context.Background()
  3751  	baseDir := filepath.Join("testdata", "avro")
  3752  	args := base.TestServerArgs{ExternalIODir: baseDir}
  3753  	tc := testcluster.StartTestCluster(t, nodes, base.TestClusterArgs{ServerArgs: args})
  3754  	defer tc.Stopper().Stop(ctx)
  3755  	sqlDB := sqlutils.MakeSQLRunner(tc.Conns[0])
  3756  
  3757  	sqlDB.Exec(t, `SET CLUSTER SETTING kv.bulk_ingest.batch_size = '10KB'`)
  3758  	sqlDB.Exec(t, `CREATE DATABASE foo; SET DATABASE = foo`)
  3759  
  3760  	simpleOcf := fmt.Sprintf("nodelocal://0/%s", "simple.ocf")
  3761  	simpleSchemaURI := fmt.Sprintf("nodelocal://0/%s", "simple-schema.json")
  3762  	simpleJSON := fmt.Sprintf("nodelocal://0/%s", "simple-sorted.json")
  3763  	simplePrettyJSON := fmt.Sprintf("nodelocal://0/%s", "simple-sorted.pjson")
  3764  	simpleBinRecords := fmt.Sprintf("nodelocal://0/%s", "simple-sorted-records.avro")
  3765  	tableSchema := fmt.Sprintf("nodelocal://0/%s", "simple-schema.sql")
  3766  
  3767  	data, err := ioutil.ReadFile("testdata/avro/simple-schema.json")
  3768  	if err != nil {
  3769  		t.Fatal(err)
  3770  	}
  3771  	simpleSchema := string(data)
  3772  
  3773  	tests := []struct {
  3774  		name   string
  3775  		sql    string
  3776  		create string
  3777  		args   []interface{}
  3778  		err    bool
  3779  	}{
  3780  		{
  3781  			name: "import-ocf",
  3782  			sql:  "IMPORT TABLE simple (i INT8 PRIMARY KEY, s text, b bytea) AVRO DATA ($1)",
  3783  			args: []interface{}{simpleOcf},
  3784  		},
  3785  		{
  3786  			name:   "import-ocf-into-table",
  3787  			sql:    "IMPORT INTO simple AVRO DATA ($1)",
  3788  			create: "CREATE TABLE simple (i INT8 PRIMARY KEY, s text, b bytea)",
  3789  			args:   []interface{}{simpleOcf},
  3790  		},
  3791  		{
  3792  			name:   "import-ocf-into-table-with-strict-validation",
  3793  			sql:    "IMPORT INTO simple AVRO DATA ($1)  WITH strict_validation",
  3794  			create: "CREATE TABLE simple (i INT8, s text, b bytea)",
  3795  			args:   []interface{}{simpleOcf},
  3796  		},
  3797  		{
  3798  			name: "import-ocf-create-using",
  3799  			sql:  "IMPORT TABLE simple CREATE USING $1 AVRO DATA ($2)",
  3800  			args: []interface{}{tableSchema, simpleOcf},
  3801  		},
  3802  		{
  3803  			name: "import-json-records",
  3804  			sql:  "IMPORT TABLE simple CREATE USING $1 AVRO DATA ($2) WITH data_as_json_records, schema_uri=$3",
  3805  			args: []interface{}{tableSchema, simpleJSON, simpleSchemaURI},
  3806  		},
  3807  		{
  3808  			name:   "import-json-records-into-table-ignores-extra-fields",
  3809  			sql:    "IMPORT INTO simple AVRO DATA ($1) WITH data_as_json_records, schema_uri=$2",
  3810  			create: "CREATE TABLE simple (i INT8 PRIMARY KEY)",
  3811  			args:   []interface{}{simpleJSON, simpleSchemaURI},
  3812  		},
  3813  		{
  3814  			name: "import-json-records-inline-schema",
  3815  			sql:  "IMPORT TABLE simple CREATE USING $1 AVRO DATA ($2) WITH data_as_json_records, schema=$3",
  3816  			args: []interface{}{tableSchema, simpleJSON, simpleSchema},
  3817  		},
  3818  		{
  3819  			name: "import-json-pretty-printed-records",
  3820  			sql:  "IMPORT TABLE simple CREATE USING $1 AVRO DATA ($2) WITH data_as_json_records, schema_uri=$3",
  3821  			args: []interface{}{tableSchema, simplePrettyJSON, simpleSchemaURI},
  3822  		},
  3823  		{
  3824  			name: "import-avro-fragments",
  3825  			sql:  "IMPORT TABLE simple CREATE USING $1 AVRO DATA ($2) WITH data_as_binary_records, records_terminated_by='', schema_uri=$3",
  3826  			args: []interface{}{tableSchema, simpleBinRecords, simpleSchemaURI},
  3827  		},
  3828  		{
  3829  			name: "fail-import-expect-ocf-got-json",
  3830  			sql:  "IMPORT TABLE simple CREATE USING $1 AVRO DATA ($2)",
  3831  			args: []interface{}{tableSchema, simpleJSON},
  3832  			err:  true,
  3833  		},
  3834  		{
  3835  			name: "relaxed-import-sets-missing-fields",
  3836  			sql:  "IMPORT TABLE simple (i INT8 PRIMARY KEY, s text, b bytea, z int) AVRO DATA ($1)",
  3837  			args: []interface{}{simpleOcf},
  3838  		},
  3839  		{
  3840  			name: "relaxed-import-ignores-extra-fields",
  3841  			sql:  "IMPORT TABLE simple (i INT8 PRIMARY KEY) AVRO DATA ($1)",
  3842  			args: []interface{}{simpleOcf},
  3843  		},
  3844  		{
  3845  			name: "strict-import-errors-missing-fields",
  3846  			sql:  "IMPORT TABLE simple (i INT8 PRIMARY KEY, s text, b bytea, z int) AVRO DATA ($1) WITH strict_validation",
  3847  			args: []interface{}{simpleOcf},
  3848  			err:  true,
  3849  		},
  3850  		{
  3851  			name: "strict-import-errors-extra-fields",
  3852  			sql:  "IMPORT TABLE simple (i INT8 PRIMARY KEY) AVRO DATA ($1) WITH strict_validation",
  3853  			args: []interface{}{simpleOcf},
  3854  			err:  true,
  3855  		},
  3856  	}
  3857  
  3858  	for i, test := range tests {
  3859  		t.Run(test.name, func(t *testing.T) {
  3860  			// Play a bit with producer/consumer batch sizes.
  3861  			defer TestingSetParallelImporterReaderBatchSize(13 * i)()
  3862  
  3863  			_, err := sqlDB.DB.ExecContext(context.Background(), `DROP TABLE IF EXISTS simple CASCADE`)
  3864  			require.NoError(t, err)
  3865  
  3866  			if len(test.create) > 0 {
  3867  				_, err := sqlDB.DB.ExecContext(context.Background(), test.create)
  3868  				require.NoError(t, err)
  3869  			}
  3870  
  3871  			_, err = sqlDB.DB.ExecContext(context.Background(), test.sql, test.args...)
  3872  			if test.err {
  3873  				if err == nil {
  3874  					t.Error("expected error, but alas")
  3875  				}
  3876  				return
  3877  			}
  3878  
  3879  			require.NoError(t, err)
  3880  
  3881  			var numRows int
  3882  			sqlDB.QueryRow(t, `SELECT count(*) FROM simple`).Scan(&numRows)
  3883  			if numRows == 0 {
  3884  				t.Error("expected some rows after import")
  3885  			}
  3886  		})
  3887  	}
  3888  }
  3889  
  3890  // TestImportClientDisconnect ensures that an import job can complete even if
  3891  // the client connection which started it closes. This test uses a helper
  3892  // subprocess to force a closed client connection without needing to rely
  3893  // on the driver to close a TCP connection. See TestImportClientDisconnectHelper
  3894  // for the subprocess.
  3895  func TestImportClientDisconnect(t *testing.T) {
  3896  	defer leaktest.AfterTest(t)()
  3897  
  3898  	ctx, cancel := context.WithCancel(context.Background())
  3899  	defer cancel()
  3900  	args := base.TestClusterArgs{}
  3901  	tc := testcluster.StartTestCluster(t, 1, args)
  3902  	defer tc.Stopper().Stop(ctx)
  3903  
  3904  	tc.WaitForNodeLiveness(t)
  3905  	require.NoError(t, tc.WaitForFullReplication())
  3906  
  3907  	conn := tc.ServerConn(0)
  3908  	runner := sqlutils.MakeSQLRunner(conn)
  3909  	runner.Exec(t, "SET CLUSTER SETTING kv.protectedts.poll_interval = '100ms';")
  3910  
  3911  	// Make a server that will tell us when somebody has sent a request, wait to
  3912  	// be signaled, and then serve a CSV row for our table.
  3913  	allowResponse := make(chan struct{})
  3914  	gotRequest := make(chan struct{}, 1)
  3915  	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
  3916  		if r.Method != "GET" {
  3917  			return
  3918  		}
  3919  		select {
  3920  		case gotRequest <- struct{}{}:
  3921  		default:
  3922  		}
  3923  		select {
  3924  		case <-allowResponse:
  3925  		case <-ctx.Done(): // Deal with test failures.
  3926  		}
  3927  		_, _ = w.Write([]byte("1,asdfasdfasdfasdf"))
  3928  	}))
  3929  	defer srv.Close()
  3930  
  3931  	// Make credentials for the new connection.
  3932  	runner.Exec(t, `CREATE USER testuser`)
  3933  	runner.Exec(t, `GRANT admin TO testuser`)
  3934  	pgURL, cleanup := sqlutils.PGUrl(t, tc.Server(0).ServingSQLAddr(),
  3935  		"TestImportClientDisconnect-testuser", url.User("testuser"))
  3936  	defer cleanup()
  3937  
  3938  	// Kick off the import on a new connection which we're going to close.
  3939  	done := make(chan struct{})
  3940  	ctxToCancel, cancel := context.WithCancel(ctx)
  3941  	defer cancel()
  3942  	go func() {
  3943  		defer close(done)
  3944  		connCfg, err := pgx.ParseConnectionString(pgURL.String())
  3945  		assert.NoError(t, err)
  3946  		db, err := pgx.Connect(connCfg)
  3947  		assert.NoError(t, err)
  3948  		defer func() { _ = db.Close() }()
  3949  		_, err = db.ExecEx(ctxToCancel, `IMPORT TABLE foo (k INT PRIMARY KEY, v STRING) CSV DATA ($1)`,
  3950  			nil /* options */, srv.URL)
  3951  		assert.Equal(t, context.Canceled, err)
  3952  	}()
  3953  
  3954  	// Wait for the import job to start.
  3955  	var jobID string
  3956  	testutils.SucceedsSoon(t, func() error {
  3957  		row := conn.QueryRow("SELECT job_id FROM [SHOW JOBS] WHERE job_type = 'IMPORT' ORDER BY created DESC LIMIT 1")
  3958  		return row.Scan(&jobID)
  3959  	})
  3960  
  3961  	// Wait for it to actually start.
  3962  	<-gotRequest
  3963  
  3964  	// Cancel the import context and wait for the goroutine to exit.
  3965  	cancel()
  3966  	<-done
  3967  
  3968  	// Allow the import to proceed.
  3969  	close(allowResponse)
  3970  
  3971  	// Wait for the job to get marked as succeeded.
  3972  	testutils.SucceedsSoon(t, func() error {
  3973  		var status string
  3974  		if err := conn.QueryRow("SELECT status FROM [SHOW JOB " + jobID + "]").Scan(&status); err != nil {
  3975  			return err
  3976  		}
  3977  		const succeeded = "succeeded"
  3978  		if status != succeeded {
  3979  			return errors.Errorf("expected %s, got %v", succeeded, status)
  3980  		}
  3981  		return nil
  3982  	})
  3983  }
  3984  
  3985  func TestDisallowsInvalidFormatOptions(t *testing.T) {
  3986  	defer leaktest.AfterTest(t)()
  3987  
  3988  	allOpts := make(map[string]struct{})
  3989  	addOpts := func(opts map[string]struct{}) {
  3990  		for opt := range opts {
  3991  			allOpts[opt] = struct{}{}
  3992  		}
  3993  	}
  3994  	addOpts(allowedCommonOptions)
  3995  	addOpts(avroAllowedOptions)
  3996  	addOpts(csvAllowedOptions)
  3997  	addOpts(mysqlDumpAllowedOptions)
  3998  	addOpts(mysqlOutAllowedOptions)
  3999  	addOpts(pgDumpAllowedOptions)
  4000  	addOpts(pgCopyAllowedOptions)
  4001  
  4002  	// Helper to pick num options from the set of allowed and the set
  4003  	// of all other options.  Returns generated options plus a flag indicating
  4004  	// if the generated options contain disallowed ones.
  4005  	pickOpts := func(num int, allowed map[string]struct{}) (map[string]string, bool) {
  4006  		opts := make(map[string]string, num)
  4007  		haveDisallowed := false
  4008  		var picks []string
  4009  		if rand.Intn(10) > 5 {
  4010  			for opt := range allOpts {
  4011  				picks = append(picks, opt)
  4012  			}
  4013  		} else {
  4014  			for opt := range allowed {
  4015  				picks = append(picks, opt)
  4016  			}
  4017  		}
  4018  		require.NotNil(t, picks)
  4019  
  4020  		for i := 0; i < num; i++ {
  4021  			pick := picks[rand.Intn(len(picks))]
  4022  			_, allowed := allowed[pick]
  4023  			if !allowed {
  4024  				_, allowed = allowedCommonOptions[pick]
  4025  			}
  4026  			if allowed {
  4027  				opts[pick] = "ok"
  4028  			} else {
  4029  				opts[pick] = "bad"
  4030  				haveDisallowed = true
  4031  			}
  4032  		}
  4033  
  4034  		return opts, haveDisallowed
  4035  	}
  4036  
  4037  	tests := []struct {
  4038  		format  string
  4039  		allowed map[string]struct{}
  4040  	}{
  4041  		{"avro", avroAllowedOptions},
  4042  		{"csv", csvAllowedOptions},
  4043  		{"mysqouout", mysqlOutAllowedOptions},
  4044  		{"mysqldump", mysqlDumpAllowedOptions},
  4045  		{"pgdump", pgDumpAllowedOptions},
  4046  		{"pgcopy", pgCopyAllowedOptions},
  4047  	}
  4048  
  4049  	for _, tc := range tests {
  4050  		for i := 0; i < 5; i++ {
  4051  			opts, haveBadOptions := pickOpts(i, tc.allowed)
  4052  			t.Run(fmt.Sprintf("validate-%s-%d/badOpts=%t", tc.format, i, haveBadOptions),
  4053  				func(t *testing.T) {
  4054  					err := validateFormatOptions(tc.format, opts, tc.allowed)
  4055  					if haveBadOptions {
  4056  						require.Error(t, err, opts)
  4057  					} else {
  4058  						require.NoError(t, err, opts)
  4059  					}
  4060  				})
  4061  		}
  4062  	}
  4063  }