github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/ccl/importccl/import_stmt_test.go (about) 1 // Copyright 2017 The Cockroach Authors. 2 // 3 // Licensed as a CockroachDB Enterprise file under the Cockroach Community 4 // License (the "License"); you may not use this file except in compliance with 5 // the License. You may obtain a copy of the License at 6 // 7 // https://github.com/cockroachdb/cockroach/blob/master/licenses/CCL.txt 8 9 package importccl 10 11 import ( 12 "bytes" 13 "context" 14 gosql "database/sql" 15 "fmt" 16 "io" 17 "io/ioutil" 18 "math/rand" 19 "net/http" 20 "net/http/httptest" 21 "net/url" 22 "path/filepath" 23 "strings" 24 "sync" 25 "testing" 26 "time" 27 28 "github.com/cockroachdb/cockroach/pkg/base" 29 "github.com/cockroachdb/cockroach/pkg/ccl/backupccl" 30 "github.com/cockroachdb/cockroach/pkg/jobs" 31 "github.com/cockroachdb/cockroach/pkg/jobs/jobspb" 32 "github.com/cockroachdb/cockroach/pkg/keys" 33 "github.com/cockroachdb/cockroach/pkg/kv" 34 "github.com/cockroachdb/cockroach/pkg/kv/kvserver" 35 "github.com/cockroachdb/cockroach/pkg/roachpb" 36 "github.com/cockroachdb/cockroach/pkg/security" 37 "github.com/cockroachdb/cockroach/pkg/settings/cluster" 38 "github.com/cockroachdb/cockroach/pkg/sql" 39 "github.com/cockroachdb/cockroach/pkg/sql/gcjob" 40 "github.com/cockroachdb/cockroach/pkg/sql/parser" 41 "github.com/cockroachdb/cockroach/pkg/sql/row" 42 "github.com/cockroachdb/cockroach/pkg/sql/sem/tree" 43 "github.com/cockroachdb/cockroach/pkg/sql/sqlbase" 44 "github.com/cockroachdb/cockroach/pkg/sql/stats" 45 "github.com/cockroachdb/cockroach/pkg/sql/tests" 46 "github.com/cockroachdb/cockroach/pkg/testutils" 47 "github.com/cockroachdb/cockroach/pkg/testutils/jobutils" 48 "github.com/cockroachdb/cockroach/pkg/testutils/serverutils" 49 "github.com/cockroachdb/cockroach/pkg/testutils/sqlutils" 50 "github.com/cockroachdb/cockroach/pkg/testutils/testcluster" 51 "github.com/cockroachdb/cockroach/pkg/util" 52 "github.com/cockroachdb/cockroach/pkg/util/ctxgroup" 53 "github.com/cockroachdb/cockroach/pkg/util/hlc" 54 "github.com/cockroachdb/cockroach/pkg/util/leaktest" 55 "github.com/cockroachdb/cockroach/pkg/util/protoutil" 56 "github.com/cockroachdb/cockroach/pkg/util/syncutil" 57 "github.com/cockroachdb/errors" 58 "github.com/jackc/pgx" 59 "github.com/stretchr/testify/assert" 60 "github.com/stretchr/testify/require" 61 ) 62 63 func TestImportData(t *testing.T) { 64 defer leaktest.AfterTest(t)() 65 66 t.Skipf("failing on teamcity with testrace") 67 68 s, db, _ := serverutils.StartServer(t, base.TestServerArgs{}) 69 ctx := context.Background() 70 defer s.Stopper().Stop(ctx) 71 sqlDB := sqlutils.MakeSQLRunner(db) 72 73 sqlDB.Exec(t, `SET CLUSTER SETTING kv.bulk_ingest.batch_size = '10KB'`) 74 75 tests := []struct { 76 name string 77 create string 78 with string 79 typ string 80 data string 81 err string 82 rejected string 83 query map[string][][]string 84 }{ 85 { 86 name: "duplicate unique index key", 87 create: ` 88 a int8 primary key, 89 i int8, 90 unique index idx_f (i) 91 `, 92 typ: "CSV", 93 data: `1,1 94 2,2 95 3,3 96 4,3 97 5,4`, 98 err: "duplicate key", 99 }, 100 { 101 name: "duplicate PK", 102 create: ` 103 i int8 primary key, 104 s string 105 `, 106 typ: "CSV", 107 data: `1, A 108 2, B 109 3, C 110 3, D 111 4, E`, 112 err: "duplicate key", 113 }, 114 { 115 name: "duplicate collated string key", 116 create: ` 117 s string collate en_u_ks_level1 primary key 118 `, 119 typ: "CSV", 120 data: `a 121 B 122 c 123 D 124 d 125 `, 126 err: "duplicate key", 127 }, 128 { 129 name: "duplicate PK at sst boundary", 130 create: ` 131 i int8 primary key, 132 s string 133 `, 134 with: `WITH sstsize = '10B'`, 135 typ: "CSV", 136 data: `1,0000000000 137 1,0000000001`, 138 err: "duplicate key", 139 }, 140 { 141 name: "verify no splits mid row", 142 create: ` 143 i int8 primary key, 144 s string, 145 b int8, 146 c int8, 147 index (s), 148 index (i, s), 149 family (i, b), 150 family (s, c) 151 `, 152 with: `WITH sstsize = '1B'`, 153 typ: "CSV", 154 data: `5,STRING,7,9`, 155 query: map[string][][]string{ 156 `SELECT count(*) from t`: {{"1"}}, 157 }, 158 }, 159 { 160 name: "good bytes encoding", 161 create: `b bytes`, 162 typ: "CSV", 163 data: `\x0143 164 0143`, 165 query: map[string][][]string{ 166 `SELECT * from t`: {{"\x01C"}, {"0143"}}, 167 }, 168 }, 169 { 170 name: "invalid byte", 171 create: `b bytes`, 172 typ: "CSV", 173 data: `\x0g`, 174 rejected: `\x0g` + "\n", 175 err: "invalid byte", 176 }, 177 { 178 name: "bad bytes length", 179 create: `b bytes`, 180 typ: "CSV", 181 data: `\x0`, 182 rejected: `\x0` + "\n", 183 err: "odd length hex string", 184 }, 185 { 186 name: "oversample", 187 create: `i int8`, 188 with: `WITH oversample = '100'`, 189 typ: "CSV", 190 data: "1", 191 }, 192 { 193 name: "new line characters", 194 create: `t text`, 195 typ: "CSV", 196 data: "\"hello\r\nworld\"\n\"friend\nfoe\"\n\"mr\rmrs\"", 197 query: map[string][][]string{ 198 `SELECT t from t`: {{"hello\r\nworld"}, {"friend\nfoe"}, {"mr\rmrs"}}, 199 }, 200 }, 201 { 202 name: "CR in int8, 2 cols", 203 create: `a int8, b int8`, 204 typ: "CSV", 205 data: "1,2\r\n3,4\n5,6", 206 query: map[string][][]string{ 207 `SELECT * FROM t ORDER BY a`: {{"1", "2"}, {"3", "4"}, {"5", "6"}}, 208 }, 209 }, 210 { 211 name: "CR in int8, 1 col", 212 create: `a int8`, 213 typ: "CSV", 214 data: "1\r\n3\n5", 215 query: map[string][][]string{ 216 `SELECT * FROM t ORDER BY a`: {{"1"}, {"3"}, {"5"}}, 217 }, 218 }, 219 { 220 name: "collated strings", 221 create: `s string collate en_u_ks_level1`, 222 typ: "CSV", 223 data: strings.Repeat("1\n", 2000), 224 query: map[string][][]string{ 225 `SELECT s, count(*) FROM t GROUP BY s`: {{"1", "2000"}}, 226 }, 227 }, 228 { 229 name: "quotes are accepted in a quoted string", 230 create: `s string`, 231 typ: "CSV", 232 data: `"abc""de"`, 233 query: map[string][][]string{ 234 `SELECT s FROM t`: {{`abc"de`}}, 235 }, 236 }, 237 { 238 name: "bare quote in the middle of a field that is not quoted", 239 create: `s string`, 240 typ: "CSV", 241 data: `abc"de`, 242 query: map[string][][]string{`SELECT * from t`: {{`abc"de`}}}, 243 }, 244 { 245 name: "strict quotes: bare quote in the middle of a field that is not quoted", 246 create: `s string`, 247 typ: "CSV", 248 with: `WITH strict_quotes`, 249 data: `abc"de`, 250 err: `row 1: reading CSV record: parse error on line 1, column 3: bare " in non-quoted-field`, 251 }, 252 { 253 name: "no matching quote in a quoted field", 254 create: `s string`, 255 typ: "CSV", 256 data: `"abc"de`, 257 query: map[string][][]string{`SELECT * from t`: {{`abc"de`}}}, 258 }, 259 { 260 name: "strict quotes: bare quote in the middle of a quoted field is not ok", 261 create: `s string`, 262 typ: "CSV", 263 with: `WITH strict_quotes`, 264 data: `"abc"de"`, 265 err: `row 1: reading CSV record: parse error on line 1, column 4: extraneous or missing " in quoted-field`, 266 }, 267 { 268 name: "too many imported columns", 269 create: `i int8`, 270 typ: "CSV", 271 data: "1,2\n3\n11,22", 272 err: "row 1: expected 1 fields, got 2", 273 rejected: "1,2\n11,22\n", 274 query: map[string][][]string{`SELECT * from t`: {{"3"}}}, 275 }, 276 { 277 name: "parsing error", 278 create: `i int8, j int8`, 279 typ: "CSV", 280 data: "not_int,2\n3,4", 281 err: `row 1: parse "i" as INT8: could not parse "not_int" as type int`, 282 rejected: "not_int,2\n", 283 query: map[string][][]string{`SELECT * from t`: {{"3", "4"}}}, 284 }, 285 286 // MySQL OUTFILE 287 // If err field is non-empty, the query filed specifies what expect 288 // to get from the rows that are parsed correctly (see option experimental_save_rejected). 289 { 290 name: "empty file", 291 create: `a string`, 292 typ: "DELIMITED", 293 data: "", 294 query: map[string][][]string{`SELECT * from t`: {}}, 295 }, 296 { 297 name: "empty field", 298 create: `a string, b string`, 299 typ: "DELIMITED", 300 data: "\t", 301 query: map[string][][]string{`SELECT * from t`: {{"", ""}}}, 302 }, 303 { 304 name: "empty line", 305 create: `a string`, 306 typ: "DELIMITED", 307 data: "\n", 308 query: map[string][][]string{`SELECT * from t`: {{""}}}, 309 }, 310 { 311 name: "too many imported columns", 312 create: `i int8`, 313 typ: "DELIMITED", 314 data: "1\t2\n3", 315 err: "row 1: too many columns, got 2 expected 1", 316 rejected: "1\t2\n", 317 query: map[string][][]string{`SELECT * from t`: {{"3"}}}, 318 }, 319 { 320 name: "cannot parse data", 321 create: `i int8, j int8`, 322 typ: "DELIMITED", 323 data: "bad_int\t2\n3\t4", 324 err: "error parsing row 1", 325 rejected: "bad_int\t2\n", 326 query: map[string][][]string{`SELECT * from t`: {{"3", "4"}}}, 327 }, 328 { 329 name: "unexpected number of columns", 330 create: `a string, b string`, 331 typ: "DELIMITED", 332 data: "1,2\n3\t4", 333 err: "row 1: unexpected number of columns, expected 2 got 1", 334 rejected: "1,2\n", 335 query: map[string][][]string{`SELECT * from t`: {{"3", "4"}}}, 336 }, 337 { 338 name: "unexpected number of columns in 1st row", 339 create: `a string, b string`, 340 typ: "DELIMITED", 341 data: "1,2\n3\t4", 342 err: "row 1: unexpected number of columns, expected 2 got 1", 343 rejected: "1,2\n", 344 query: map[string][][]string{`SELECT * from t`: {{"3", "4"}}}, 345 }, 346 { 347 name: "field enclosure", 348 create: `a string, b string`, 349 with: `WITH fields_enclosed_by = '$'`, 350 typ: "DELIMITED", 351 data: "$foo$\tnormal", 352 query: map[string][][]string{ 353 `SELECT * from t`: {{"foo", "normal"}}, 354 }, 355 }, 356 { 357 name: "field enclosure in middle of unquoted field", 358 create: `a string, b string`, 359 with: `WITH fields_enclosed_by = '$'`, 360 typ: "DELIMITED", 361 data: "fo$o\tb$a$z", 362 query: map[string][][]string{ 363 `SELECT * from t`: {{"fo$o", "b$a$z"}}, 364 }, 365 }, 366 { 367 name: "field enclosure in middle of quoted field", 368 create: `a string, b string`, 369 with: `WITH fields_enclosed_by = '$'`, 370 typ: "DELIMITED", 371 data: "$fo$o$\t$b$a$z$", 372 query: map[string][][]string{ 373 `SELECT * from t`: {{"fo$o", "b$a$z"}}, 374 }, 375 }, 376 { 377 name: "unmatched field enclosure", 378 create: `a string, b string`, 379 with: `WITH fields_enclosed_by = '$'`, 380 typ: "DELIMITED", 381 data: "$foo\tnormal\nbaz\tbar", 382 err: "error parsing row 1: unmatched field enclosure at start of field", 383 rejected: "$foo\tnormal\nbaz\tbar\n", 384 query: map[string][][]string{`SELECT * from t`: {}}, 385 }, 386 { 387 name: "unmatched field enclosure at end", 388 create: `a string, b string`, 389 with: `WITH fields_enclosed_by = '$'`, 390 typ: "DELIMITED", 391 data: "foo$\tnormal\nbar\tbaz", 392 err: "row 1: unmatched field enclosure at end of field", 393 rejected: "foo$\tnormal\n", 394 query: map[string][][]string{`SELECT * from t`: {{"bar", "baz"}}}, 395 }, 396 { 397 name: "unmatched field enclosure 2nd field", 398 create: `a string, b string`, 399 with: `WITH fields_enclosed_by = '$'`, 400 typ: "DELIMITED", 401 data: "normal\t$foo", 402 err: "row 1: unmatched field enclosure at start of field", 403 rejected: "normal\t$foo\n", 404 query: map[string][][]string{`SELECT * from t`: {}}, 405 }, 406 { 407 name: "unmatched field enclosure at end 2nd field", 408 create: `a string, b string`, 409 with: `WITH fields_enclosed_by = '$'`, 410 typ: "DELIMITED", 411 data: "normal\tfoo$", 412 err: "row 1: unmatched field enclosure at end of field", 413 rejected: "normal\tfoo$\n", 414 query: map[string][][]string{`SELECT * from t`: {}}, 415 }, 416 { 417 name: "unmatched literal", 418 create: `i int8`, 419 with: `WITH fields_escaped_by = '\'`, 420 typ: "DELIMITED", 421 data: `\`, 422 err: "row 1: unmatched literal", 423 rejected: "\\\n", 424 query: map[string][][]string{`SELECT * from t`: {}}, 425 }, 426 { 427 name: "escaped field enclosure", 428 create: `a string, b string`, 429 with: `WITH fields_enclosed_by = '$', fields_escaped_by = '\', 430 fields_terminated_by = ','`, 431 typ: "DELIMITED", 432 data: `\$foo\$,\$baz`, 433 query: map[string][][]string{ 434 `SELECT * from t`: {{"$foo$", "$baz"}}, 435 }, 436 }, 437 { 438 name: "weird escape char", 439 create: `s STRING`, 440 with: `WITH fields_escaped_by = '@'`, 441 typ: "DELIMITED", 442 data: "@N\nN@@@\n\nNULL", 443 query: map[string][][]string{ 444 `SELECT COALESCE(s, '(null)') from t`: {{"(null)"}, {"N@\n"}, {"NULL"}}, 445 }, 446 }, 447 { 448 name: `null and \N with escape`, 449 create: `s STRING`, 450 with: `WITH fields_escaped_by = '\'`, 451 typ: "DELIMITED", 452 data: "\\N\n\\\\N\nNULL", 453 query: map[string][][]string{ 454 `SELECT COALESCE(s, '(null)') from t`: {{"(null)"}, {`\N`}, {"NULL"}}, 455 }, 456 }, 457 { 458 name: `\N with trailing char`, 459 create: `s STRING`, 460 with: `WITH fields_escaped_by = '\'`, 461 typ: "DELIMITED", 462 data: "\\N1\nfoo", 463 err: "row 1: unexpected data after null encoding", 464 rejected: "\\N1\n", 465 query: map[string][][]string{`SELECT * from t`: {{"foo"}}}, 466 }, 467 { 468 name: `double null`, 469 create: `s STRING`, 470 with: `WITH fields_escaped_by = '\'`, 471 typ: "DELIMITED", 472 data: `\N\N`, 473 err: "row 1: unexpected null encoding", 474 rejected: `\N\N` + "\n", 475 query: map[string][][]string{`SELECT * from t`: {}}, 476 }, 477 { 478 name: `null and \N without escape`, 479 create: `s STRING`, 480 typ: "DELIMITED", 481 data: "\\N\n\\\\N\nNULL", 482 query: map[string][][]string{ 483 `SELECT COALESCE(s, '(null)') from t`: {{`\N`}, {`\\N`}, {"(null)"}}, 484 }, 485 }, 486 { 487 name: `bytes with escape`, 488 create: `b BYTES`, 489 typ: "DELIMITED", 490 data: `\x`, 491 query: map[string][][]string{ 492 `SELECT * from t`: {{`\x`}}, 493 }, 494 }, 495 { 496 name: "skip 0 lines", 497 create: `a string, b string`, 498 with: `WITH fields_terminated_by = ',', skip = '0'`, 499 typ: "DELIMITED", 500 data: "foo,normal", 501 query: map[string][][]string{ 502 `SELECT * from t`: {{"foo", "normal"}}, 503 }, 504 }, 505 { 506 name: "skip 1 lines", 507 create: `a string, b string`, 508 with: `WITH fields_terminated_by = ',', skip = '1'`, 509 typ: "DELIMITED", 510 data: "a string, b string\nfoo,normal", 511 query: map[string][][]string{ 512 `SELECT * from t`: {{"foo", "normal"}}, 513 }, 514 }, 515 { 516 name: "skip 2 lines", 517 create: `a string, b string`, 518 with: `WITH fields_terminated_by = ',', skip = '2'`, 519 typ: "DELIMITED", 520 data: "a string, b string\nfoo,normal\nbar,baz", 521 query: map[string][][]string{ 522 `SELECT * from t`: {{"bar", "baz"}}, 523 }, 524 }, 525 { 526 name: "skip all lines", 527 create: `a string, b string`, 528 with: `WITH fields_terminated_by = ',', skip = '3'`, 529 typ: "DELIMITED", 530 data: "a string, b string\nfoo,normal\nbar,baz", 531 query: map[string][][]string{ 532 `SELECT * from t`: {}, 533 }, 534 }, 535 { 536 name: "skip > all lines", 537 create: `a string, b string`, 538 with: `WITH fields_terminated_by = ',', skip = '4'`, 539 typ: "DELIMITED", 540 data: "a string, b string\nfoo,normal\nbar,baz", 541 query: map[string][][]string{`SELECT * from t`: {}}, 542 }, 543 { 544 name: "skip -1 lines", 545 create: `a string, b string`, 546 with: `WITH fields_terminated_by = ',', skip = '-1'`, 547 typ: "DELIMITED", 548 data: "a string, b string\nfoo,normal", 549 err: "pq: skip must be >= 0", 550 }, 551 { 552 name: "nullif empty string", 553 create: `a string, b string`, 554 with: `WITH fields_terminated_by = ',', nullif = ''`, 555 typ: "DELIMITED", 556 data: ",normal", 557 query: map[string][][]string{ 558 `SELECT * from t`: {{"NULL", "normal"}}, 559 }, 560 }, 561 { 562 name: "nullif empty string plus escape", 563 create: `a INT8, b INT8`, 564 with: `WITH fields_terminated_by = ',', fields_escaped_by = '\', nullif = ''`, 565 typ: "DELIMITED", 566 data: ",4", 567 query: map[string][][]string{ 568 `SELECT * from t`: {{"NULL", "4"}}, 569 }, 570 }, 571 { 572 name: "nullif single char string", 573 create: `a string, b string`, 574 with: `WITH fields_terminated_by = ',', nullif = 'f'`, 575 typ: "DELIMITED", 576 data: "f,normal", 577 query: map[string][][]string{ 578 `SELECT * from t`: {{"NULL", "normal"}}, 579 }, 580 }, 581 { 582 name: "nullif multiple char string", 583 create: `a string, b string`, 584 with: `WITH fields_terminated_by = ',', nullif = 'foo'`, 585 typ: "DELIMITED", 586 data: "foo,foop", 587 query: map[string][][]string{ 588 `SELECT * from t`: {{"NULL", "foop"}}, 589 }, 590 }, 591 592 // PG COPY 593 { 594 name: "unexpected escape x", 595 create: `b bytes`, 596 typ: "PGCOPY", 597 data: `\x`, 598 err: `row 1: unsupported escape sequence: \\x`, 599 }, 600 { 601 name: "unexpected escape 3", 602 create: `b bytes`, 603 typ: "PGCOPY", 604 data: `\3`, 605 err: `row 1: unsupported escape sequence: \\3`, 606 }, 607 { 608 name: "escapes", 609 create: `b bytes`, 610 typ: "PGCOPY", 611 data: `\x43\122`, 612 query: map[string][][]string{ 613 `SELECT * from t`: {{"CR"}}, 614 }, 615 }, 616 { 617 name: "normal", 618 create: `i int8, s string`, 619 typ: "PGCOPY", 620 data: "1\tSTR\n2\t\\N\n\\N\t\\t", 621 query: map[string][][]string{ 622 `SELECT * from t`: {{"1", "STR"}, {"2", "NULL"}, {"NULL", "\t"}}, 623 }, 624 }, 625 { 626 name: "comma delim", 627 create: `i int8, s string`, 628 typ: "PGCOPY", 629 with: `WITH delimiter = ','`, 630 data: "1,STR\n2,\\N\n\\N,\\,", 631 query: map[string][][]string{ 632 `SELECT * from t`: {{"1", "STR"}, {"2", "NULL"}, {"NULL", ","}}, 633 }, 634 }, 635 { 636 name: "size out of range", 637 create: `i int8`, 638 typ: "PGCOPY", 639 with: `WITH max_row_size = '10GB'`, 640 err: "max_row_size out of range", 641 }, 642 { 643 name: "line too long", 644 create: `i int8`, 645 typ: "PGCOPY", 646 data: "123456", 647 with: `WITH max_row_size = '5B'`, 648 err: "line too long", 649 }, 650 { 651 name: "not enough values", 652 typ: "PGCOPY", 653 create: "a INT8, b INT8", 654 data: `1`, 655 err: "expected 2 values, got 1", 656 }, 657 { 658 name: "too many values", 659 typ: "PGCOPY", 660 create: "a INT8, b INT8", 661 data: "1\t2\t3", 662 err: "expected 2 values, got 3", 663 }, 664 665 // Postgres DUMP 666 { 667 name: "mismatch cols", 668 typ: "PGDUMP", 669 data: ` 670 CREATE TABLE t (i int8); 671 COPY t (s) FROM stdin; 672 0 673 \. 674 `, 675 err: `COPY columns do not match table columns for table t`, 676 }, 677 { 678 name: "missing COPY done", 679 typ: "PGDUMP", 680 data: ` 681 CREATE TABLE t (i int8); 682 COPY t (i) FROM stdin; 683 0 684 `, 685 err: `unexpected EOF`, 686 }, 687 { 688 name: "semicolons and comments", 689 typ: "PGDUMP", 690 data: ` 691 CREATE TABLE t (i int8); 692 ;;; 693 -- nothing ; 694 ; 695 -- blah 696 `, 697 query: map[string][][]string{ 698 `SELECT * from t`: {}, 699 }, 700 }, 701 { 702 name: "size out of range", 703 typ: "PGDUMP", 704 with: `WITH max_row_size = '10GB'`, 705 err: "max_row_size out of range", 706 }, 707 { 708 name: "line too long", 709 typ: "PGDUMP", 710 data: "CREATE TABLE t (i INT8);", 711 with: `WITH max_row_size = '5B'`, 712 err: "line too long", 713 }, 714 { 715 name: "not enough values", 716 typ: "PGDUMP", 717 data: ` 718 CREATE TABLE t (a INT8, b INT8); 719 720 COPY t (a, b) FROM stdin; 721 1 722 \. 723 `, 724 err: "expected 2 values, got 1", 725 }, 726 { 727 name: "too many values", 728 typ: "PGDUMP", 729 data: ` 730 CREATE TABLE t (a INT8, b INT8); 731 732 COPY t (a, b) FROM stdin; 733 1 2 3 734 \. 735 `, 736 err: "expected 2 values, got 3", 737 }, 738 { 739 name: "too many cols", 740 typ: "PGDUMP", 741 data: ` 742 CREATE TABLE t (a INT8, b INT8); 743 744 COPY t (a, b, c) FROM stdin; 745 1 2 3 746 \. 747 `, 748 err: "expected 2 columns, got 3", 749 }, 750 { 751 name: "fk", 752 typ: "PGDUMP", 753 data: testPgdumpFk, 754 query: map[string][][]string{ 755 `SHOW TABLES`: {{"public", "cities", "table"}, {"public", "weather", "table"}}, 756 `SELECT city FROM cities`: {{"Berkeley"}}, 757 `SELECT city FROM weather`: {{"Berkeley"}}, 758 759 `SELECT dependson_name 760 FROM crdb_internal.backward_dependencies 761 `: {{"weather_city_fkey"}}, 762 763 `SELECT create_statement 764 FROM crdb_internal.create_statements 765 WHERE descriptor_name in ('cities', 'weather') 766 ORDER BY descriptor_name 767 `: {{testPgdumpCreateCities}, {testPgdumpCreateWeather}}, 768 769 // Verify the constraint is unvalidated. 770 `SHOW CONSTRAINTS FROM weather 771 `: {{"weather", "weather_city_fkey", "FOREIGN KEY", "FOREIGN KEY (city) REFERENCES cities(city)", "false"}}, 772 }, 773 }, 774 { 775 name: "fk-circular", 776 typ: "PGDUMP", 777 data: testPgdumpFkCircular, 778 query: map[string][][]string{ 779 `SHOW TABLES`: {{"public", "a", "table"}, {"public", "b", "table"}}, 780 `SELECT i, k FROM a`: {{"2", "2"}}, 781 `SELECT j FROM b`: {{"2"}}, 782 783 `SELECT dependson_name 784 FROM crdb_internal.backward_dependencies ORDER BY dependson_name`: { 785 {"a_i_fkey"}, 786 {"a_k_fkey"}, 787 {"b_j_fkey"}, 788 }, 789 790 `SELECT create_statement 791 FROM crdb_internal.create_statements 792 WHERE descriptor_name in ('a', 'b') 793 ORDER BY descriptor_name 794 `: {{ 795 `CREATE TABLE a ( 796 i INT8 NOT NULL, 797 k INT8 NULL, 798 CONSTRAINT a_pkey PRIMARY KEY (i ASC), 799 CONSTRAINT a_i_fkey FOREIGN KEY (i) REFERENCES b(j), 800 CONSTRAINT a_k_fkey FOREIGN KEY (k) REFERENCES a(i), 801 INDEX a_auto_index_a_k_fkey (k ASC), 802 FAMILY "primary" (i, k) 803 )`}, { 804 `CREATE TABLE b ( 805 j INT8 NOT NULL, 806 CONSTRAINT b_pkey PRIMARY KEY (j ASC), 807 CONSTRAINT b_j_fkey FOREIGN KEY (j) REFERENCES a(i), 808 FAMILY "primary" (j) 809 )`, 810 }}, 811 812 `SHOW CONSTRAINTS FROM a`: { 813 {"a", "a_i_fkey", "FOREIGN KEY", "FOREIGN KEY (i) REFERENCES b(j)", "false"}, 814 {"a", "a_k_fkey", "FOREIGN KEY", "FOREIGN KEY (k) REFERENCES a(i)", "false"}, 815 {"a", "a_pkey", "PRIMARY KEY", "PRIMARY KEY (i ASC)", "true"}, 816 }, 817 `SHOW CONSTRAINTS FROM b`: { 818 {"b", "b_j_fkey", "FOREIGN KEY", "FOREIGN KEY (j) REFERENCES a(i)", "false"}, 819 {"b", "b_pkey", "PRIMARY KEY", "PRIMARY KEY (j ASC)", "true"}, 820 }, 821 }, 822 }, 823 { 824 name: "fk-skip", 825 typ: "PGDUMP", 826 data: testPgdumpFk, 827 with: `WITH skip_foreign_keys`, 828 query: map[string][][]string{ 829 `SHOW TABLES`: {{"public", "cities", "table"}, {"public", "weather", "table"}}, 830 // Verify the constraint is skipped. 831 `SELECT dependson_name FROM crdb_internal.backward_dependencies`: {}, 832 `SHOW CONSTRAINTS FROM weather`: {}, 833 }, 834 }, 835 { 836 name: "fk unreferenced", 837 typ: "TABLE weather FROM PGDUMP", 838 data: testPgdumpFk, 839 err: `table "cities" not found`, 840 }, 841 { 842 name: "fk unreferenced skipped", 843 typ: "TABLE weather FROM PGDUMP", 844 data: testPgdumpFk, 845 with: `WITH skip_foreign_keys`, 846 query: map[string][][]string{ 847 `SHOW TABLES`: {{"public", "weather", "table"}}, 848 }, 849 }, 850 { 851 name: "sequence", 852 typ: "PGDUMP", 853 data: ` 854 CREATE TABLE t (a INT8); 855 CREATE SEQUENCE public.i_seq 856 START WITH 1 857 INCREMENT BY 1 858 NO MINVALUE 859 NO MAXVALUE 860 CACHE 1; 861 ALTER SEQUENCE public.i_seq OWNED BY public.i.id; 862 ALTER TABLE ONLY t ALTER COLUMN a SET DEFAULT nextval('public.i_seq'::regclass); 863 SELECT pg_catalog.setval('public.i_seq', 10, true); 864 `, 865 query: map[string][][]string{ 866 `SELECT nextval('i_seq')`: {{"11"}}, 867 `SHOW CREATE SEQUENCE i_seq`: {{"i_seq", "CREATE SEQUENCE i_seq MINVALUE 1 MAXVALUE 9223372036854775807 INCREMENT 1 START 1"}}, 868 }, 869 }, 870 { 871 name: "non-public schema", 872 typ: "PGDUMP", 873 data: "create table s.t (i INT8)", 874 err: `non-public schemas unsupported: s`, 875 }, 876 { 877 name: "unsupported type", 878 typ: "PGDUMP", 879 data: "create table t (t time with time zone)", 880 err: `create table t \(t time with time zone\) 881 \^`, 882 }, 883 { 884 name: "various create ignores", 885 typ: "PGDUMP", 886 data: ` 887 CREATE TRIGGER conditions_set_updated_at BEFORE UPDATE ON conditions FOR EACH ROW EXECUTE PROCEDURE set_updated_at(); 888 REVOKE ALL ON SEQUENCE knex_migrations_id_seq FROM PUBLIC; 889 REVOKE ALL ON SEQUENCE knex_migrations_id_seq FROM database; 890 GRANT ALL ON SEQUENCE knex_migrations_id_seq TO database; 891 GRANT SELECT ON SEQUENCE knex_migrations_id_seq TO opentrials_readonly; 892 893 CREATE FUNCTION public.isnumeric(text) RETURNS boolean 894 LANGUAGE sql 895 AS $_$ 896 SELECT $1 ~ '^[0-9]+$' 897 $_$; 898 ALTER FUNCTION public.isnumeric(text) OWNER TO roland; 899 900 CREATE TABLE t (i INT8); 901 `, 902 query: map[string][][]string{ 903 `SHOW TABLES`: {{"public", "t", "table"}}, 904 }, 905 }, 906 { 907 name: "many tables", 908 typ: "PGDUMP", 909 data: func() string { 910 var sb strings.Builder 911 for i := 1; i <= 100; i++ { 912 fmt.Fprintf(&sb, "CREATE TABLE t%d ();\n", i) 913 } 914 return sb.String() 915 }(), 916 }, 917 918 // Error 919 { 920 name: "unsupported import format", 921 create: `b bytes`, 922 typ: "NOPE", 923 err: `unsupported import format`, 924 }, 925 { 926 name: "sequences", 927 create: `i int8 default nextval('s')`, 928 typ: "CSV", 929 err: `"s" not found`, 930 }, 931 } 932 933 var mockRecorder struct { 934 syncutil.Mutex 935 dataString, rejectedString string 936 } 937 srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 938 mockRecorder.Lock() 939 defer mockRecorder.Unlock() 940 if r.Method == "GET" { 941 fmt.Fprint(w, mockRecorder.dataString) 942 } 943 if r.Method == "PUT" { 944 body, err := ioutil.ReadAll(r.Body) 945 if err != nil { 946 panic(err) 947 } 948 mockRecorder.rejectedString = string(body) 949 } 950 })) 951 defer srv.Close() 952 953 // Create and drop a table to make sure a descriptor ID gets used to verify 954 // ID rewrites happen correctly. Useful when running just a single test. 955 sqlDB.Exec(t, `CREATE TABLE blah (i int8)`) 956 sqlDB.Exec(t, `DROP TABLE blah`) 957 958 for _, saveRejected := range []bool{false, true} { 959 // this test is big and slow as is, so we can't afford to double it in race. 960 if util.RaceEnabled && saveRejected { 961 continue 962 } 963 964 for i, tc := range tests { 965 if tc.typ != "CSV" && tc.typ != "DELIMITED" && saveRejected { 966 continue 967 } 968 if saveRejected { 969 if tc.with == "" { 970 tc.with = "WITH experimental_save_rejected" 971 } else { 972 tc.with += ", experimental_save_rejected" 973 } 974 } 975 t.Run(fmt.Sprintf("%s/%s: save_rejected=%v", tc.typ, tc.name, saveRejected), func(t *testing.T) { 976 dbName := fmt.Sprintf("d%d", i) 977 sqlDB.Exec(t, fmt.Sprintf(`CREATE DATABASE %s; USE %[1]s`, dbName)) 978 defer sqlDB.Exec(t, fmt.Sprintf(`DROP DATABASE %s`, dbName)) 979 var q string 980 if tc.create != "" { 981 q = fmt.Sprintf(`IMPORT TABLE t (%s) %s DATA ($1) %s`, tc.create, tc.typ, tc.with) 982 } else { 983 q = fmt.Sprintf(`IMPORT %s ($1) %s`, tc.typ, tc.with) 984 } 985 t.Log(q, srv.URL, "\nFile contents:\n", tc.data) 986 mockRecorder.dataString = tc.data 987 mockRecorder.rejectedString = "" 988 if !saveRejected || tc.rejected == "" { 989 sqlDB.ExpectErr(t, tc.err, q, srv.URL) 990 } else { 991 sqlDB.Exec(t, q, srv.URL) 992 } 993 if tc.err == "" || saveRejected { 994 for query, res := range tc.query { 995 sqlDB.CheckQueryResults(t, query, res) 996 } 997 if tc.rejected != mockRecorder.rejectedString { 998 t.Errorf("expected:\n%q\ngot:\n%q\n", tc.rejected, 999 mockRecorder.rejectedString) 1000 } 1001 } 1002 }) 1003 } 1004 } 1005 1006 t.Run("mysqlout multiple", func(t *testing.T) { 1007 sqlDB.Exec(t, `CREATE DATABASE mysqlout; USE mysqlout`) 1008 mockRecorder.dataString = "1" 1009 sqlDB.Exec(t, `IMPORT TABLE t (s STRING) DELIMITED DATA ($1, $1)`, srv.URL) 1010 sqlDB.CheckQueryResults(t, `SELECT * FROM t`, [][]string{{"1"}, {"1"}}) 1011 }) 1012 } 1013 1014 const ( 1015 testPgdumpCreateCities = `CREATE TABLE cities ( 1016 city VARCHAR(80) NOT NULL, 1017 CONSTRAINT cities_pkey PRIMARY KEY (city ASC), 1018 FAMILY "primary" (city) 1019 )` 1020 testPgdumpCreateWeather = `CREATE TABLE weather ( 1021 city VARCHAR(80) NULL, 1022 temp_lo INT8 NULL, 1023 temp_hi INT8 NULL, 1024 prcp FLOAT4 NULL, 1025 date DATE NULL, 1026 CONSTRAINT weather_city_fkey FOREIGN KEY (city) REFERENCES cities(city), 1027 INDEX weather_auto_index_weather_city_fkey (city ASC), 1028 FAMILY "primary" (city, temp_lo, temp_hi, prcp, date, rowid) 1029 )` 1030 testPgdumpFk = ` 1031 CREATE TABLE public.cities ( 1032 city character varying(80) NOT NULL 1033 ); 1034 1035 ALTER TABLE public.cities OWNER TO postgres; 1036 1037 CREATE TABLE public.weather ( 1038 city character varying(80), 1039 temp_lo int8, 1040 temp_hi int8, 1041 prcp real, 1042 date date 1043 ); 1044 1045 ALTER TABLE public.weather OWNER TO postgres; 1046 1047 COPY public.cities (city) FROM stdin; 1048 Berkeley 1049 \. 1050 1051 COPY public.weather (city, temp_lo, temp_hi, prcp, date) FROM stdin; 1052 Berkeley 45 53 0 1994-11-28 1053 \. 1054 1055 ALTER TABLE ONLY public.cities 1056 ADD CONSTRAINT cities_pkey PRIMARY KEY (city); 1057 1058 ALTER TABLE ONLY public.weather 1059 ADD CONSTRAINT weather_city_fkey FOREIGN KEY (city) REFERENCES public.cities(city); 1060 ` 1061 1062 testPgdumpFkCircular = ` 1063 CREATE TABLE public.a ( 1064 i int8 NOT NULL, 1065 k int8 1066 ); 1067 1068 CREATE TABLE public.b ( 1069 j int8 NOT NULL 1070 ); 1071 1072 COPY public.a (i, k) FROM stdin; 1073 2 2 1074 \. 1075 1076 COPY public.b (j) FROM stdin; 1077 2 1078 \. 1079 1080 ALTER TABLE ONLY public.a 1081 ADD CONSTRAINT a_pkey PRIMARY KEY (i); 1082 1083 ALTER TABLE ONLY public.b 1084 ADD CONSTRAINT b_pkey PRIMARY KEY (j); 1085 1086 ALTER TABLE ONLY public.a 1087 ADD CONSTRAINT a_i_fkey FOREIGN KEY (i) REFERENCES public.b(j); 1088 1089 ALTER TABLE ONLY public.a 1090 ADD CONSTRAINT a_k_fkey FOREIGN KEY (k) REFERENCES public.a(i); 1091 1092 ALTER TABLE ONLY public.b 1093 ADD CONSTRAINT b_j_fkey FOREIGN KEY (j) REFERENCES public.a(i); 1094 ` 1095 ) 1096 1097 func TestImportCSVStmt(t *testing.T) { 1098 defer leaktest.AfterTest(t)() 1099 if testing.Short() { 1100 t.Skip("short") 1101 } 1102 1103 const nodes = 3 1104 1105 numFiles := nodes + 2 1106 rowsPerFile := 1000 1107 rowsPerRaceFile := 16 1108 1109 var forceFailure bool 1110 blockGC := make(chan struct{}) 1111 1112 ctx := context.Background() 1113 baseDir := filepath.Join("testdata", "csv") 1114 tc := testcluster.StartTestCluster(t, nodes, base.TestClusterArgs{ServerArgs: base.TestServerArgs{ 1115 SQLMemoryPoolSize: 256 << 20, 1116 ExternalIODir: baseDir, 1117 Knobs: base.TestingKnobs{ 1118 GCJob: &sql.GCJobTestingKnobs{RunBeforeResume: func(_ int64) error { <-blockGC; return nil }}, 1119 }, 1120 }}) 1121 defer tc.Stopper().Stop(ctx) 1122 conn := tc.Conns[0] 1123 1124 for i := range tc.Servers { 1125 tc.Servers[i].JobRegistry().(*jobs.Registry).TestingResumerCreationKnobs = map[jobspb.Type]func(raw jobs.Resumer) jobs.Resumer{ 1126 jobspb.TypeImport: func(raw jobs.Resumer) jobs.Resumer { 1127 r := raw.(*importResumer) 1128 r.testingKnobs.afterImport = func(_ backupccl.RowCount) error { 1129 if forceFailure { 1130 return errors.New("testing injected failure") 1131 } 1132 return nil 1133 } 1134 return r 1135 }, 1136 } 1137 } 1138 1139 sqlDB := sqlutils.MakeSQLRunner(conn) 1140 kvDB := tc.Server(0).DB() 1141 1142 sqlDB.Exec(t, `SET CLUSTER SETTING kv.bulk_ingest.batch_size = '10KB'`) 1143 1144 testFiles := makeCSVData(t, numFiles, rowsPerFile, nodes, rowsPerRaceFile) 1145 if util.RaceEnabled { 1146 // This test takes a while with the race detector, so reduce the number of 1147 // files and rows per file in an attempt to speed it up. 1148 numFiles = nodes 1149 rowsPerFile = rowsPerRaceFile 1150 } 1151 1152 // Table schema used in IMPORT TABLE tests. 1153 tablePath := filepath.Join(baseDir, "table") 1154 if err := ioutil.WriteFile(tablePath, []byte(` 1155 CREATE TABLE t ( 1156 a int8 primary key, 1157 b string, 1158 index (b), 1159 index (a, b) 1160 ) 1161 `), 0666); err != nil { 1162 t.Fatal(err) 1163 } 1164 schema := []interface{}{"nodelocal://0/table"} 1165 1166 if err := ioutil.WriteFile(filepath.Join(baseDir, "empty.csv"), nil, 0666); err != nil { 1167 t.Fatal(err) 1168 } 1169 empty := []string{"'nodelocal://0/empty.csv'"} 1170 emptySchema := []interface{}{"nodelocal://0/empty.schema"} 1171 1172 // Support subtests by keeping track of the number of jobs that are executed. 1173 testNum := -1 1174 expectedRows := numFiles * rowsPerFile 1175 for i, tc := range []struct { 1176 name string 1177 query string // must have one `%s` for the files list. 1178 args []interface{} // will have backupPath appended 1179 files []string 1180 jobOpts string 1181 err string 1182 }{ 1183 { 1184 "schema-in-file", 1185 `IMPORT TABLE t CREATE USING $1 CSV DATA (%s)`, 1186 schema, 1187 testFiles.files, 1188 ``, 1189 "", 1190 }, 1191 { 1192 "schema-in-file-intodb", 1193 `IMPORT TABLE csv1.t CREATE USING $1 CSV DATA (%s)`, 1194 schema, 1195 testFiles.files, 1196 ``, 1197 "", 1198 }, 1199 { 1200 "schema-in-query", 1201 `IMPORT TABLE t (a INT8 PRIMARY KEY, b STRING, INDEX (b), INDEX (a, b)) CSV DATA (%s)`, 1202 nil, 1203 testFiles.files, 1204 ``, 1205 "", 1206 }, 1207 { 1208 "schema-in-query-opts", 1209 `IMPORT TABLE t (a INT8 PRIMARY KEY, b STRING, INDEX (b), INDEX (a, b)) CSV DATA (%s) WITH delimiter = '|', comment = '#', nullif='', skip = '2'`, 1210 nil, 1211 testFiles.filesWithOpts, 1212 ` WITH comment = '#', delimiter = '|', "nullif" = '', skip = '2'`, 1213 "", 1214 }, 1215 { 1216 // Force some SST splits. 1217 "schema-in-file-sstsize", 1218 `IMPORT TABLE t CREATE USING $1 CSV DATA (%s) WITH sstsize = '10K'`, 1219 schema, 1220 testFiles.files, 1221 ` WITH sstsize = '10K'`, 1222 "", 1223 }, 1224 { 1225 "empty-file", 1226 `IMPORT TABLE t CREATE USING $1 CSV DATA (%s)`, 1227 schema, 1228 empty, 1229 ``, 1230 "", 1231 }, 1232 { 1233 "empty-with-files", 1234 `IMPORT TABLE t CREATE USING $1 CSV DATA (%s)`, 1235 schema, 1236 append(empty, testFiles.files...), 1237 ``, 1238 "", 1239 }, 1240 { 1241 "schema-in-file-auto-decompress", 1242 `IMPORT TABLE t CREATE USING $1 CSV DATA (%s) WITH decompress = 'auto'`, 1243 schema, 1244 testFiles.files, 1245 ` WITH decompress = 'auto'`, 1246 "", 1247 }, 1248 { 1249 "schema-in-file-no-decompress", 1250 `IMPORT TABLE t CREATE USING $1 CSV DATA (%s) WITH decompress = 'none'`, 1251 schema, 1252 testFiles.files, 1253 ` WITH decompress = 'none'`, 1254 "", 1255 }, 1256 { 1257 "schema-in-file-explicit-gzip", 1258 `IMPORT TABLE t CREATE USING $1 CSV DATA (%s) WITH decompress = 'gzip'`, 1259 schema, 1260 testFiles.gzipFiles, 1261 ` WITH decompress = 'gzip'`, 1262 "", 1263 }, 1264 { 1265 "schema-in-file-auto-gzip", 1266 `IMPORT TABLE t CREATE USING $1 CSV DATA (%s) WITH decompress = 'auto'`, 1267 schema, 1268 testFiles.bzipFiles, 1269 ` WITH decompress = 'auto'`, 1270 "", 1271 }, 1272 { 1273 "schema-in-file-implicit-gzip", 1274 `IMPORT TABLE t CREATE USING $1 CSV DATA (%s)`, 1275 schema, 1276 testFiles.gzipFiles, 1277 ``, 1278 "", 1279 }, 1280 { 1281 "schema-in-file-explicit-bzip", 1282 `IMPORT TABLE t CREATE USING $1 CSV DATA (%s) WITH decompress = 'bzip'`, 1283 schema, 1284 testFiles.bzipFiles, 1285 ` WITH decompress = 'bzip'`, 1286 "", 1287 }, 1288 { 1289 "schema-in-file-auto-bzip", 1290 `IMPORT TABLE t CREATE USING $1 CSV DATA (%s) WITH decompress = 'auto'`, 1291 schema, 1292 testFiles.bzipFiles, 1293 ` WITH decompress = 'auto'`, 1294 "", 1295 }, 1296 { 1297 "schema-in-file-implicit-bzip", 1298 `IMPORT TABLE t CREATE USING $1 CSV DATA (%s)`, 1299 schema, 1300 testFiles.bzipFiles, 1301 ``, 1302 "", 1303 }, 1304 // NB: successes above, failures below, because we check the i-th job. 1305 { 1306 "bad-opt-name", 1307 `IMPORT TABLE t (a INT8 PRIMARY KEY, b STRING, INDEX (b), INDEX (a, b)) CSV DATA (%s) WITH foo = 'bar'`, 1308 nil, 1309 testFiles.files, 1310 ``, 1311 "invalid option \"foo\"", 1312 }, 1313 { 1314 "bad-computed-column", 1315 `IMPORT TABLE t (a INT8 PRIMARY KEY, b STRING AS ('hello') STORED, INDEX (b), INDEX (a, b)) CSV DATA (%s) WITH skip = '2'`, 1316 nil, 1317 testFiles.filesWithOpts, 1318 ``, 1319 "computed columns not supported", 1320 }, 1321 { 1322 "primary-key-dup", 1323 `IMPORT TABLE t CREATE USING $1 CSV DATA (%s)`, 1324 schema, 1325 testFiles.filesWithDups, 1326 ``, 1327 "duplicate key in primary index", 1328 }, 1329 { 1330 "no-database", 1331 `IMPORT TABLE nonexistent.t CREATE USING $1 CSV DATA (%s)`, 1332 schema, 1333 testFiles.files, 1334 ``, 1335 `database does not exist: "nonexistent.t"`, 1336 }, 1337 { 1338 "into-db-fails", 1339 `IMPORT TABLE t CREATE USING $1 CSV DATA (%s) WITH into_db = 'test'`, 1340 schema, 1341 testFiles.files, 1342 ``, 1343 `invalid option "into_db"`, 1344 }, 1345 { 1346 "schema-in-file-no-decompress-gzip", 1347 `IMPORT TABLE t CREATE USING $1 CSV DATA (%s) WITH decompress = 'none'`, 1348 schema, 1349 testFiles.gzipFiles, 1350 ` WITH decompress = 'none'`, 1351 // This returns different errors for `make test` and `make testrace` but 1352 // field is in both error messages. 1353 `field`, 1354 }, 1355 { 1356 "schema-in-file-decompress-gzip", 1357 `IMPORT TABLE t CREATE USING $1 CSV DATA (%s) WITH decompress = 'gzip'`, 1358 schema, 1359 testFiles.files, 1360 ` WITH decompress = 'gzip'`, 1361 "gzip: invalid header", 1362 }, 1363 { 1364 "csv-with-invalid-delimited-option", 1365 `IMPORT TABLE t CREATE USING $1 CSV DATA (%s) WITH fields_delimited_by = '|'`, 1366 schema, 1367 testFiles.files, 1368 ``, 1369 "invalid option", 1370 }, 1371 { 1372 "empty-schema-in-file", 1373 `IMPORT TABLE t CREATE USING $1 CSV DATA (%s)`, 1374 emptySchema, 1375 testFiles.files, 1376 ``, 1377 "expected 1 create table statement", 1378 }, 1379 } { 1380 t.Run(tc.name, func(t *testing.T) { 1381 if strings.Contains(tc.name, "bzip") && len(testFiles.bzipFiles) == 0 { 1382 t.Skip("bzip2 not available on PATH?") 1383 } 1384 intodb := fmt.Sprintf(`csv%d`, i) 1385 sqlDB.Exec(t, fmt.Sprintf(`CREATE DATABASE %s`, intodb)) 1386 sqlDB.Exec(t, fmt.Sprintf(`SET DATABASE = %s`, intodb)) 1387 1388 var unused string 1389 var restored struct { 1390 rows, idx, bytes int 1391 } 1392 1393 var result int 1394 query := fmt.Sprintf(tc.query, strings.Join(tc.files, ", ")) 1395 testNum++ 1396 if tc.err != "" { 1397 sqlDB.ExpectErr(t, tc.err, query, tc.args...) 1398 return 1399 } 1400 sqlDB.QueryRow(t, query, tc.args...).Scan( 1401 &unused, &unused, &unused, &restored.rows, &restored.idx, &restored.bytes, 1402 ) 1403 1404 jobPrefix := fmt.Sprintf(`IMPORT TABLE %s.public.t (a INT8 PRIMARY KEY, b STRING, INDEX (b), INDEX (a, b))`, intodb) 1405 1406 if err := jobutils.VerifySystemJob(t, sqlDB, testNum, jobspb.TypeImport, jobs.StatusSucceeded, jobs.Record{ 1407 Username: security.RootUser, 1408 Description: fmt.Sprintf(jobPrefix+` CSV DATA (%s)`+tc.jobOpts, strings.ReplaceAll(strings.Join(tc.files, ", "), "?AWS_SESSION_TOKEN=secrets", "?AWS_SESSION_TOKEN=redacted")), 1409 }); err != nil { 1410 t.Fatal(err) 1411 } 1412 1413 isEmpty := len(tc.files) == 1 && tc.files[0] == empty[0] 1414 1415 if isEmpty { 1416 sqlDB.QueryRow(t, `SELECT count(*) FROM t`).Scan(&result) 1417 if expect := 0; result != expect { 1418 t.Fatalf("expected %d rows, got %d", expect, result) 1419 } 1420 return 1421 } 1422 1423 if expected, actual := expectedRows, restored.rows; expected != actual { 1424 t.Fatalf("expected %d rows, got %d", expected, actual) 1425 } 1426 1427 // Verify correct number of rows via COUNT. 1428 sqlDB.QueryRow(t, `SELECT count(*) FROM t`).Scan(&result) 1429 if expect := expectedRows; result != expect { 1430 t.Fatalf("expected %d rows, got %d", expect, result) 1431 } 1432 1433 // Verify correct number of NULLs via COUNT. 1434 sqlDB.QueryRow(t, `SELECT count(*) FROM t WHERE b IS NULL`).Scan(&result) 1435 expectedNulls := 0 1436 if strings.Contains(tc.query, "nullif") { 1437 expectedNulls = expectedRows / 4 1438 } 1439 if result != expectedNulls { 1440 t.Fatalf("expected %d rows, got %d", expectedNulls, result) 1441 } 1442 1443 // Verify sstsize created > 1 SST files. 1444 if tc.name == "schema-in-file-sstsize-dist" { 1445 pattern := filepath.Join(baseDir, fmt.Sprintf("%d", i), "*.sst") 1446 matches, err := filepath.Glob(pattern) 1447 if err != nil { 1448 t.Fatal(err) 1449 } 1450 if len(matches) < 2 { 1451 t.Fatal("expected > 1 SST files") 1452 } 1453 } 1454 1455 }) 1456 } 1457 1458 // Verify unique_rowid is replaced for tables without primary keys. 1459 t.Run("unique_rowid", func(t *testing.T) { 1460 sqlDB.Exec(t, "CREATE DATABASE pk") 1461 sqlDB.Exec(t, fmt.Sprintf(`IMPORT TABLE pk.t (a INT8, b STRING) CSV DATA (%s)`, strings.Join(testFiles.files, ", "))) 1462 // Verify the rowids are being generated as expected. 1463 sqlDB.CheckQueryResults(t, 1464 `SELECT count(*) FROM pk.t`, 1465 sqlDB.QueryStr(t, ` 1466 SELECT count(*) FROM 1467 (SELECT * FROM 1468 (SELECT generate_series(0, $1 - 1) file), 1469 (SELECT generate_series(1, $2) rownum) 1470 ) 1471 `, numFiles, rowsPerFile), 1472 ) 1473 }) 1474 1475 // Verify a failed IMPORT won't prevent a second IMPORT. 1476 t.Run("checkpoint-leftover", func(t *testing.T) { 1477 sqlDB.Exec(t, "CREATE DATABASE checkpoint; USE checkpoint") 1478 1479 // Specify wrong number of columns. 1480 sqlDB.ExpectErr( 1481 t, "expected 1 fields, got 2", 1482 fmt.Sprintf(`IMPORT TABLE t (a INT8 PRIMARY KEY) CSV DATA (%s)`, testFiles.files[0]), 1483 ) 1484 1485 // Specify wrong table name; still shouldn't leave behind a checkpoint file. 1486 sqlDB.ExpectErr( 1487 t, `file specifies a schema for table t`, 1488 fmt.Sprintf(`IMPORT TABLE bad CREATE USING $1 CSV DATA (%s)`, testFiles.files[0]), schema[0], 1489 ) 1490 1491 // Expect it to succeed with correct columns. 1492 sqlDB.Exec(t, fmt.Sprintf(`IMPORT TABLE t (a INT8 PRIMARY KEY, b STRING) CSV DATA (%s)`, testFiles.files[0])) 1493 1494 // A second attempt should fail fast. A "slow fail" is the error message 1495 // "restoring table desc and namespace entries: table already exists". 1496 sqlDB.ExpectErr( 1497 t, `relation "t" already exists`, 1498 fmt.Sprintf(`IMPORT TABLE t (a INT8 PRIMARY KEY, b STRING) CSV DATA (%s)`, testFiles.files[0]), 1499 ) 1500 }) 1501 1502 // Verify that a failed import will clean up after itself. This means: 1503 // - Delete the garbage data that it partially imported. 1504 // - Delete the table descriptor for the table that was created during the 1505 // import. 1506 t.Run("failed-import-gc", func(t *testing.T) { 1507 forceFailure = true 1508 defer func() { forceFailure = false }() 1509 defer gcjob.SetSmallMaxGCIntervalForTest()() 1510 beforeImport, err := tree.MakeDTimestampTZ(tc.Server(0).Clock().Now().GoTime(), time.Millisecond) 1511 if err != nil { 1512 t.Fatal(err) 1513 } 1514 1515 sqlDB.Exec(t, "CREATE DATABASE failedimport; USE failedimport;") 1516 // Hit a failure during import. 1517 sqlDB.ExpectErr( 1518 t, `testing injected failure`, 1519 fmt.Sprintf(`IMPORT TABLE t (a INT PRIMARY KEY, b STRING) CSV DATA (%s)`, testFiles.files[1]), 1520 ) 1521 // Nudge the registry to quickly adopt the job. 1522 tc.Server(0).JobRegistry().(*jobs.Registry).TestingNudgeAdoptionQueue() 1523 1524 // In the case of the test, the ID of the table that will be cleaned up due 1525 // to the failed import will be one higher than the ID of the empty database 1526 // it was created in. 1527 dbID := sqlutils.QueryDatabaseID(t, sqlDB.DB, "failedimport") 1528 tableID := sqlbase.ID(dbID + 1) 1529 var td *sqlbase.TableDescriptor 1530 if err := kvDB.Txn(ctx, func(ctx context.Context, txn *kv.Txn) error { 1531 var err error 1532 td, err = sqlbase.GetTableDescFromID(ctx, txn, keys.SystemSQLCodec, tableID) 1533 return err 1534 }); err != nil { 1535 t.Fatal(err) 1536 } 1537 // Ensure that we have garbage written to the descriptor that we want to 1538 // clean up. 1539 tests.CheckKeyCount(t, kvDB, td.TableSpan(keys.SystemSQLCodec), rowsPerFile) 1540 1541 // Allow GC to progress. 1542 close(blockGC) 1543 // Ensure that a GC job was created, and wait for it to finish. 1544 doneGCQuery := fmt.Sprintf( 1545 "SELECT count(*) FROM [SHOW JOBS] WHERE job_type = '%s' AND status = '%s' AND created > %s", 1546 "SCHEMA CHANGE GC", jobs.StatusSucceeded, beforeImport.String(), 1547 ) 1548 sqlDB.CheckQueryResultsRetry(t, doneGCQuery, [][]string{{"1"}}) 1549 // Expect there are no more KVs for this span. 1550 tests.CheckKeyCount(t, kvDB, td.TableSpan(keys.SystemSQLCodec), 0) 1551 // Expect that the table descriptor is deleted. 1552 if err := kvDB.Txn(ctx, func(ctx context.Context, txn *kv.Txn) error { 1553 _, err := sqlbase.GetTableDescFromID(ctx, txn, keys.SystemSQLCodec, tableID) 1554 if !testutils.IsError(err, "descriptor not found") { 1555 return err 1556 } 1557 return nil 1558 }); err != nil { 1559 t.Fatal(err) 1560 } 1561 }) 1562 1563 // Test basic role based access control. Users who have the admin role should 1564 // be able to IMPORT. 1565 t.Run("RBAC", func(t *testing.T) { 1566 sqlDB.Exec(t, `CREATE USER testuser`) 1567 sqlDB.Exec(t, `GRANT admin TO testuser`) 1568 pgURL, cleanupFunc := sqlutils.PGUrl( 1569 t, tc.Server(0).ServingSQLAddr(), "TestImportPrivileges-testuser", url.User("testuser"), 1570 ) 1571 defer cleanupFunc() 1572 testuser, err := gosql.Open("postgres", pgURL.String()) 1573 if err != nil { 1574 t.Fatal(err) 1575 } 1576 defer testuser.Close() 1577 1578 t.Run("IMPORT TABLE", func(t *testing.T) { 1579 if _, err := testuser.Exec(fmt.Sprintf(`IMPORT TABLE rbac_table_t (a INT8 PRIMARY KEY, b STRING) CSV DATA (%s)`, testFiles.files[0])); err != nil { 1580 t.Fatal(err) 1581 } 1582 }) 1583 1584 t.Run("IMPORT INTO", func(t *testing.T) { 1585 if _, err := testuser.Exec("CREATE TABLE rbac_into_t (a INT8 PRIMARY KEY, b STRING)"); err != nil { 1586 t.Fatal(err) 1587 } 1588 if _, err := testuser.Exec(fmt.Sprintf(`IMPORT INTO rbac_into_t (a, b) CSV DATA (%s)`, testFiles.files[0])); err != nil { 1589 t.Fatal(err) 1590 } 1591 }) 1592 }) 1593 1594 // Verify DEFAULT columns and SERIAL are allowed but not evaluated. 1595 t.Run("allow-default", func(t *testing.T) { 1596 var data string 1597 srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 1598 if r.Method == "GET" { 1599 _, _ = w.Write([]byte(data)) 1600 } 1601 })) 1602 defer srv.Close() 1603 1604 sqlDB.Exec(t, `CREATE DATABASE d`) 1605 sqlDB.Exec(t, `SET DATABASE = d`) 1606 1607 const ( 1608 query = `IMPORT TABLE t ( 1609 a SERIAL8, 1610 b INT8 DEFAULT unique_rowid(), 1611 c STRING DEFAULT 's', 1612 d SERIAL8, 1613 e INT8 DEFAULT unique_rowid(), 1614 f STRING DEFAULT 's', 1615 PRIMARY KEY (a, b, c) 1616 ) CSV DATA ($1)` 1617 nullif = ` WITH nullif=''` 1618 ) 1619 1620 data = ",5,e,7,," 1621 t.Run(data, func(t *testing.T) { 1622 sqlDB.ExpectErr( 1623 t, `row 1: parse "a" as INT8: could not parse ""`, 1624 query, srv.URL, 1625 ) 1626 sqlDB.ExpectErr( 1627 t, `row 1: generate insert row: null value in column "a" violates not-null constraint`, 1628 query+nullif, srv.URL, 1629 ) 1630 }) 1631 data = "2,5,e,,," 1632 t.Run(data, func(t *testing.T) { 1633 sqlDB.ExpectErr( 1634 t, `row 1: generate insert row: null value in column "d" violates not-null constraint`, 1635 query+nullif, srv.URL, 1636 ) 1637 }) 1638 data = "2,,e,,," 1639 t.Run(data, func(t *testing.T) { 1640 sqlDB.ExpectErr( 1641 t, `"b" violates not-null constraint`, 1642 query+nullif, srv.URL, 1643 ) 1644 }) 1645 1646 data = "2,5,,,," 1647 t.Run(data, func(t *testing.T) { 1648 sqlDB.ExpectErr( 1649 t, `"c" violates not-null constraint`, 1650 query+nullif, srv.URL, 1651 ) 1652 }) 1653 1654 data = "2,5,e,-1,," 1655 t.Run(data, func(t *testing.T) { 1656 sqlDB.Exec(t, query+nullif, srv.URL) 1657 sqlDB.CheckQueryResults(t, 1658 `SELECT * FROM t`, 1659 sqlDB.QueryStr(t, `SELECT 2, 5, 'e', -1, NULL, NULL`), 1660 ) 1661 }) 1662 }) 1663 } 1664 1665 func TestExportImportRoundTrip(t *testing.T) { 1666 defer leaktest.AfterTest(t)() 1667 ctx := context.Background() 1668 baseDir, cleanup := testutils.TempDir(t) 1669 defer cleanup() 1670 tc := testcluster.StartTestCluster( 1671 t, 1, base.TestClusterArgs{ServerArgs: base.TestServerArgs{ExternalIODir: baseDir}}) 1672 defer tc.Stopper().Stop(ctx) 1673 conn := tc.Conns[0] 1674 sqlDB := sqlutils.MakeSQLRunner(conn) 1675 1676 tests := []struct { 1677 stmts string 1678 tbl string 1679 expected string 1680 }{ 1681 // Note that the directory names that are being imported from and exported into 1682 // need to differ across runs, so we let the test runner format the stmts field 1683 // with a unique directory name per run. 1684 { 1685 stmts: `EXPORT INTO CSV 'nodelocal://0/%[1]s' FROM SELECT ARRAY['a', 'b', 'c']; 1686 IMPORT TABLE t (x TEXT[]) CSV DATA ('nodelocal://0/%[1]s/n1.0.csv')`, 1687 tbl: "t", 1688 expected: `SELECT ARRAY['a', 'b', 'c']`, 1689 }, 1690 { 1691 stmts: `EXPORT INTO CSV 'nodelocal://0/%[1]s' FROM SELECT ARRAY[b'abc', b'\141\142\143', b'\x61\x62\x63']; 1692 IMPORT TABLE t (x BYTES[]) CSV DATA ('nodelocal://0/%[1]s/n1.0.csv')`, 1693 tbl: "t", 1694 expected: `SELECT ARRAY[b'abc', b'\141\142\143', b'\x61\x62\x63']`, 1695 }, 1696 { 1697 stmts: `EXPORT INTO CSV 'nodelocal://0/%[1]s' FROM SELECT 'dog' COLLATE en; 1698 IMPORT TABLE t (x STRING COLLATE en) CSV DATA ('nodelocal://0/%[1]s/n1.0.csv')`, 1699 tbl: "t", 1700 expected: `SELECT 'dog' COLLATE en`, 1701 }, 1702 } 1703 1704 for i, test := range tests { 1705 sqlDB.Exec(t, fmt.Sprintf(`DROP TABLE IF EXISTS %s`, test.tbl)) 1706 sqlDB.Exec(t, fmt.Sprintf(test.stmts, fmt.Sprintf("run%d", i))) 1707 sqlDB.CheckQueryResults(t, fmt.Sprintf(`SELECT * FROM %s`, test.tbl), sqlDB.QueryStr(t, test.expected)) 1708 } 1709 } 1710 1711 // TODO(adityamaru): Tests still need to be added incrementally as 1712 // relevant IMPORT INTO logic is added. Some of them include: 1713 // -> FK and constraint violation 1714 // -> CSV containing keys which will shadow existing data 1715 // -> Rollback of a failed IMPORT INTO 1716 func TestImportIntoCSV(t *testing.T) { 1717 defer leaktest.AfterTest(t)() 1718 1719 if testing.Short() { 1720 t.Skip("short") 1721 } 1722 1723 const nodes = 3 1724 1725 numFiles := nodes + 2 1726 rowsPerFile := 1000 1727 rowsPerRaceFile := 16 1728 1729 ctx := context.Background() 1730 baseDir := filepath.Join("testdata", "csv") 1731 tc := testcluster.StartTestCluster(t, nodes, base.TestClusterArgs{ServerArgs: base.TestServerArgs{ExternalIODir: baseDir}}) 1732 defer tc.Stopper().Stop(ctx) 1733 conn := tc.Conns[0] 1734 1735 var forceFailure bool 1736 var importBodyFinished chan struct{} 1737 var delayImportFinish chan struct{} 1738 1739 for i := range tc.Servers { 1740 tc.Servers[i].JobRegistry().(*jobs.Registry).TestingResumerCreationKnobs = map[jobspb.Type]func(raw jobs.Resumer) jobs.Resumer{ 1741 jobspb.TypeImport: func(raw jobs.Resumer) jobs.Resumer { 1742 r := raw.(*importResumer) 1743 r.testingKnobs.afterImport = func(_ backupccl.RowCount) error { 1744 if importBodyFinished != nil { 1745 importBodyFinished <- struct{}{} 1746 } 1747 if delayImportFinish != nil { 1748 <-delayImportFinish 1749 } 1750 1751 if forceFailure { 1752 return errors.New("testing injected failure") 1753 } 1754 return nil 1755 } 1756 return r 1757 }, 1758 } 1759 } 1760 1761 sqlDB := sqlutils.MakeSQLRunner(conn) 1762 1763 sqlDB.Exec(t, `SET CLUSTER SETTING kv.bulk_ingest.batch_size = '10KB'`) 1764 1765 testFiles := makeCSVData(t, numFiles, rowsPerFile, nodes, rowsPerRaceFile) 1766 if util.RaceEnabled { 1767 // This test takes a while with the race detector, so reduce the number of 1768 // files and rows per file in an attempt to speed it up. 1769 numFiles = nodes 1770 rowsPerFile = rowsPerRaceFile 1771 } 1772 1773 if err := ioutil.WriteFile(filepath.Join(baseDir, "empty.csv"), nil, 0666); err != nil { 1774 t.Fatal(err) 1775 } 1776 empty := []string{"'nodelocal://0/empty.csv'"} 1777 1778 // Support subtests by keeping track of the number of jobs that are executed. 1779 testNum := -1 1780 insertedRows := numFiles * rowsPerFile 1781 1782 for _, tc := range []struct { 1783 name string 1784 query string // must have one `%s` for the files list. 1785 files []string 1786 jobOpts string 1787 err string 1788 }{ 1789 { 1790 "simple-import-into", 1791 `IMPORT INTO t (a, b) CSV DATA (%s)`, 1792 testFiles.files, 1793 ``, 1794 "", 1795 }, 1796 { 1797 "import-into-with-opts", 1798 `IMPORT INTO t (a, b) CSV DATA (%s) WITH delimiter = '|', comment = '#', nullif='', skip = '2'`, 1799 testFiles.filesWithOpts, 1800 ` WITH comment = '#', delimiter = '|', "nullif" = '', skip = '2'`, 1801 "", 1802 }, 1803 { 1804 // Force some SST splits. 1805 "import-into-sstsize", 1806 `IMPORT INTO t (a, b) CSV DATA (%s) WITH sstsize = '10K'`, 1807 testFiles.files, 1808 ` WITH sstsize = '10K'`, 1809 "", 1810 }, 1811 { 1812 "empty-file", 1813 `IMPORT INTO t (a, b) CSV DATA (%s)`, 1814 empty, 1815 ``, 1816 "", 1817 }, 1818 { 1819 "empty-with-files", 1820 `IMPORT INTO t (a, b) CSV DATA (%s)`, 1821 append(empty, testFiles.files...), 1822 ``, 1823 "", 1824 }, 1825 { 1826 "import-into-auto-decompress", 1827 `IMPORT INTO t (a, b) CSV DATA (%s) WITH decompress = 'auto'`, 1828 testFiles.files, 1829 ` WITH decompress = 'auto'`, 1830 "", 1831 }, 1832 { 1833 "import-into-no-decompress", 1834 `IMPORT INTO t (a, b) CSV DATA (%s) WITH decompress = 'none'`, 1835 testFiles.files, 1836 ` WITH decompress = 'none'`, 1837 "", 1838 }, 1839 { 1840 "import-into-explicit-gzip", 1841 `IMPORT INTO t (a, b) CSV DATA (%s) WITH decompress = 'gzip'`, 1842 testFiles.gzipFiles, 1843 ` WITH decompress = 'gzip'`, 1844 "", 1845 }, 1846 { 1847 "import-into-auto-gzip", 1848 `IMPORT INTO t (a, b) CSV DATA (%s) WITH decompress = 'auto'`, 1849 testFiles.gzipFiles, 1850 ` WITH decompress = 'auto'`, 1851 "", 1852 }, 1853 { 1854 "import-into-implicit-gzip", 1855 `IMPORT INTO t (a, b) CSV DATA (%s)`, 1856 testFiles.gzipFiles, 1857 ``, 1858 "", 1859 }, 1860 { 1861 "import-into-explicit-bzip", 1862 `IMPORT INTO t (a, b) CSV DATA (%s) WITH decompress = 'bzip'`, 1863 testFiles.bzipFiles, 1864 ` WITH decompress = 'bzip'`, 1865 "", 1866 }, 1867 { 1868 "import-into-auto-bzip", 1869 `IMPORT INTO t (a, b) CSV DATA (%s) WITH decompress = 'auto'`, 1870 testFiles.bzipFiles, 1871 ` WITH decompress = 'auto'`, 1872 "", 1873 }, 1874 { 1875 "import-into-implicit-bzip", 1876 `IMPORT INTO t (a, b) CSV DATA (%s)`, 1877 testFiles.bzipFiles, 1878 ``, 1879 "", 1880 }, 1881 { 1882 "import-into-no-decompress-wildcard", 1883 `IMPORT INTO t (a, b) CSV DATA (%s) WITH decompress = 'none'`, 1884 testFiles.filesUsingWildcard, 1885 ` WITH decompress = 'none'`, 1886 "", 1887 }, 1888 { 1889 "import-into-explicit-gzip-wildcard", 1890 `IMPORT INTO t (a, b) CSV DATA (%s) WITH decompress = 'gzip'`, 1891 testFiles.gzipFilesUsingWildcard, 1892 ` WITH decompress = 'gzip'`, 1893 "", 1894 }, 1895 { 1896 "import-into-auto-bzip-wildcard", 1897 `IMPORT INTO t (a, b) CSV DATA (%s) WITH decompress = 'auto'`, 1898 testFiles.gzipFilesUsingWildcard, 1899 ` WITH decompress = 'auto'`, 1900 "", 1901 }, 1902 // NB: successes above, failures below, because we check the i-th job. 1903 { 1904 "import-into-bad-opt-name", 1905 `IMPORT INTO t (a, b) CSV DATA (%s) WITH foo = 'bar'`, 1906 testFiles.files, 1907 ``, 1908 "invalid option \"foo\"", 1909 }, 1910 { 1911 "import-into-no-database", 1912 `IMPORT INTO nonexistent.t (a, b) CSV DATA (%s)`, 1913 testFiles.files, 1914 ``, 1915 `database does not exist: "nonexistent.t"`, 1916 }, 1917 { 1918 "import-into-no-table", 1919 `IMPORT INTO g (a, b) CSV DATA (%s)`, 1920 testFiles.files, 1921 ``, 1922 `pq: relation "g" does not exist`, 1923 }, 1924 { 1925 "import-into-no-decompress-gzip", 1926 `IMPORT INTO t (a, b) CSV DATA (%s) WITH decompress = 'none'`, 1927 testFiles.gzipFiles, 1928 ` WITH decompress = 'none'`, 1929 // This returns different errors for `make test` and `make testrace` but 1930 // field is in both error messages. 1931 "field", 1932 }, 1933 { 1934 "import-into-no-decompress-gzip", 1935 `IMPORT INTO t (a, b) CSV DATA (%s) WITH decompress = 'gzip'`, 1936 testFiles.files, 1937 ` WITH decompress = 'gzip'`, 1938 "gzip: invalid header", 1939 }, 1940 { 1941 "import-no-files-match-wildcard", 1942 `IMPORT INTO t (a, b) CSV DATA (%s) WITH decompress = 'auto'`, 1943 []string{`'nodelocal://0/data-[0-9][0-9]*'`}, 1944 ` WITH decompress = 'auto'`, 1945 `pq: no files matched uri provided`, 1946 }, 1947 { 1948 "import-into-no-glob-wildcard", 1949 `IMPORT INTO t (a, b) CSV DATA (%s) WITH disable_glob_matching`, 1950 testFiles.filesUsingWildcard, 1951 ` WITH disable_glob_matching`, 1952 "pq: (.+) no such file or directory", 1953 }, 1954 } { 1955 t.Run(tc.name, func(t *testing.T) { 1956 if strings.Contains(tc.name, "bzip") && len(testFiles.bzipFiles) == 0 { 1957 t.Skip("bzip2 not available on PATH?") 1958 } 1959 sqlDB.Exec(t, `CREATE TABLE t (a INT, b STRING)`) 1960 defer sqlDB.Exec(t, `DROP TABLE t`) 1961 1962 var unused string 1963 var restored struct { 1964 rows, idx, bytes int 1965 } 1966 1967 // Insert the test data 1968 insert := []string{"''", "'text'", "'a'", "'e'", "'l'", "'t'", "'z'"} 1969 numExistingRows := len(insert) 1970 1971 for i, v := range insert { 1972 sqlDB.Exec(t, "INSERT INTO t (a, b) VALUES ($1, $2)", i, v) 1973 } 1974 1975 var result int 1976 query := fmt.Sprintf(tc.query, strings.Join(tc.files, ", ")) 1977 testNum++ 1978 if tc.err != "" { 1979 sqlDB.ExpectErr(t, tc.err, query) 1980 return 1981 } 1982 1983 sqlDB.QueryRow(t, query).Scan( 1984 &unused, &unused, &unused, &restored.rows, &restored.idx, &restored.bytes, 1985 ) 1986 1987 jobPrefix := fmt.Sprintf(`IMPORT INTO defaultdb.public.t(a, b)`) 1988 if err := jobutils.VerifySystemJob(t, sqlDB, testNum, jobspb.TypeImport, jobs.StatusSucceeded, jobs.Record{ 1989 Username: security.RootUser, 1990 Description: fmt.Sprintf(jobPrefix+` CSV DATA (%s)`+tc.jobOpts, strings.ReplaceAll(strings.Join(tc.files, ", "), "?AWS_SESSION_TOKEN=secrets", "?AWS_SESSION_TOKEN=redacted")), 1991 }); err != nil { 1992 t.Fatal(err) 1993 } 1994 1995 isEmpty := len(tc.files) == 1 && tc.files[0] == empty[0] 1996 if isEmpty { 1997 sqlDB.QueryRow(t, `SELECT count(*) FROM t`).Scan(&result) 1998 if result != numExistingRows { 1999 t.Fatalf("expected %d rows, got %d", numExistingRows, result) 2000 } 2001 return 2002 } 2003 2004 if expected, actual := insertedRows, restored.rows; expected != actual { 2005 t.Fatalf("expected %d rows, got %d", expected, actual) 2006 } 2007 2008 // Verify correct number of rows via COUNT. 2009 sqlDB.QueryRow(t, `SELECT count(*) FROM t`).Scan(&result) 2010 if expect := numExistingRows + insertedRows; result != expect { 2011 t.Fatalf("expected %d rows, got %d", expect, result) 2012 } 2013 2014 // Verify correct number of NULLs via COUNT. 2015 sqlDB.QueryRow(t, `SELECT count(*) FROM t WHERE b IS NULL`).Scan(&result) 2016 expectedNulls := 0 2017 if strings.Contains(tc.query, "nullif") { 2018 expectedNulls = insertedRows / 4 2019 } 2020 if result != expectedNulls { 2021 t.Fatalf("expected %d rows, got %d", expectedNulls, result) 2022 } 2023 }) 2024 } 2025 2026 // Verify unique_rowid is replaced for tables without primary keys. 2027 t.Run("import-into-unique_rowid", func(t *testing.T) { 2028 sqlDB.Exec(t, `CREATE TABLE t (a INT, b STRING)`) 2029 defer sqlDB.Exec(t, `DROP TABLE t`) 2030 2031 // Insert the test data 2032 insert := []string{"''", "'text'", "'a'", "'e'", "'l'", "'t'", "'z'"} 2033 numExistingRows := len(insert) 2034 2035 for i, v := range insert { 2036 sqlDB.Exec(t, "INSERT INTO t (a, b) VALUES ($1, $2)", i, v) 2037 } 2038 2039 sqlDB.Exec(t, fmt.Sprintf(`IMPORT INTO t (a, b) CSV DATA (%s)`, strings.Join(testFiles.files, ", "))) 2040 // Verify the rowids are being generated as expected. 2041 sqlDB.CheckQueryResults(t, 2042 `SELECT count(*) FROM t`, 2043 sqlDB.QueryStr(t, ` 2044 SELECT count(*) + $3 FROM 2045 (SELECT * FROM 2046 (SELECT generate_series(0, $1 - 1) file), 2047 (SELECT generate_series(1, $2) rownum) 2048 ) 2049 `, numFiles, rowsPerFile, numExistingRows), 2050 ) 2051 }) 2052 2053 // Verify a failed IMPORT INTO won't prevent a subsequent IMPORT INTO. 2054 t.Run("import-into-checkpoint-leftover", func(t *testing.T) { 2055 sqlDB.Exec(t, `CREATE TABLE t (a INT PRIMARY KEY, b STRING)`) 2056 defer sqlDB.Exec(t, `DROP TABLE t`) 2057 2058 // Insert the test data 2059 insert := []string{"''", "'text'", "'a'", "'e'", "'l'", "'t'", "'z'"} 2060 2061 for i, v := range insert { 2062 sqlDB.Exec(t, "INSERT INTO t (a, b) VALUES ($1, $2)", i, v) 2063 } 2064 2065 // Hit a failure during import. 2066 forceFailure = true 2067 sqlDB.ExpectErr( 2068 t, `testing injected failure`, 2069 fmt.Sprintf(`IMPORT INTO t (a, b) CSV DATA (%s)`, testFiles.files[1]), 2070 ) 2071 forceFailure = false 2072 2073 // Expect it to succeed on re-attempt. 2074 sqlDB.Exec(t, fmt.Sprintf(`IMPORT INTO t (a, b) CSV DATA (%s)`, testFiles.files[1])) 2075 }) 2076 2077 // Verify that during IMPORT INTO the table is offline. 2078 t.Run("offline-state", func(t *testing.T) { 2079 sqlDB.Exec(t, `CREATE TABLE t (a INT PRIMARY KEY, b STRING)`) 2080 defer sqlDB.Exec(t, `DROP TABLE t`) 2081 2082 // Insert the test data 2083 insert := []string{"''", "'text'", "'a'", "'e'", "'l'", "'t'", "'z'"} 2084 2085 for i, v := range insert { 2086 sqlDB.Exec(t, "INSERT INTO t (a, b) VALUES ($1, $2)", i, v) 2087 } 2088 2089 // Hit a failure during import. 2090 importBodyFinished = make(chan struct{}) 2091 delayImportFinish = make(chan struct{}) 2092 defer func() { 2093 importBodyFinished = nil 2094 delayImportFinish = nil 2095 }() 2096 2097 var unused interface{} 2098 2099 g := ctxgroup.WithContext(ctx) 2100 g.GoCtx(func(ctx context.Context) error { 2101 defer close(importBodyFinished) 2102 _, err := sqlDB.DB.ExecContext(ctx, fmt.Sprintf(`IMPORT INTO t (a, b) CSV DATA (%s)`, testFiles.files[1])) 2103 return err 2104 }) 2105 g.GoCtx(func(ctx context.Context) error { 2106 defer close(delayImportFinish) 2107 <-importBodyFinished 2108 2109 err := sqlDB.DB.QueryRowContext(ctx, `SELECT 1 FROM t`).Scan(&unused) 2110 if !testutils.IsError(err, "relation \"t\" does not exist") { 2111 return err 2112 } 2113 return nil 2114 }) 2115 if err := g.Wait(); err != nil { 2116 t.Fatal(err) 2117 } 2118 t.Skip() 2119 2120 // Expect it to succeed on re-attempt. 2121 sqlDB.QueryRow(t, `SELECT 1 FROM t`).Scan(&unused) 2122 }) 2123 2124 // Tests for user specified target columns in IMPORT INTO statements. 2125 // 2126 // Tests IMPORT INTO with various target column sets, and an implicit PK 2127 // provided by the hidden column row_id. 2128 t.Run("target-cols-with-default-pk", func(t *testing.T) { 2129 var data string 2130 srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 2131 if r.Method == "GET" { 2132 _, _ = w.Write([]byte(data)) 2133 } 2134 })) 2135 defer srv.Close() 2136 2137 createQuery := `CREATE TABLE t (a INT8, 2138 b INT8, 2139 c STRING, 2140 d INT8, 2141 e INT8, 2142 f STRING)` 2143 2144 t.Run(data, func(t *testing.T) { 2145 sqlDB.Exec(t, createQuery) 2146 defer sqlDB.Exec(t, `DROP TABLE t`) 2147 2148 data = "1" 2149 sqlDB.Exec(t, `IMPORT INTO t (a) CSV DATA ($1)`, srv.URL) 2150 sqlDB.CheckQueryResults(t, `SELECT * FROM t`, 2151 sqlDB.QueryStr(t, `SELECT 1, NULL, NULL, NULL, NULL, 'NULL'`), 2152 ) 2153 }) 2154 t.Run(data, func(t *testing.T) { 2155 sqlDB.Exec(t, createQuery) 2156 defer sqlDB.Exec(t, `DROP TABLE t`) 2157 2158 data = "1,teststr" 2159 sqlDB.Exec(t, `IMPORT INTO t (a, f) CSV DATA ($1)`, srv.URL) 2160 sqlDB.CheckQueryResults(t, `SELECT * FROM t`, 2161 sqlDB.QueryStr(t, `SELECT 1, NULL, NULL, NULL, NULL, 'teststr'`), 2162 ) 2163 }) 2164 t.Run(data, func(t *testing.T) { 2165 sqlDB.Exec(t, createQuery) 2166 defer sqlDB.Exec(t, `DROP TABLE t`) 2167 2168 data = "7,12,teststr" 2169 sqlDB.Exec(t, `IMPORT INTO t (d, e, f) CSV DATA ($1)`, srv.URL) 2170 sqlDB.CheckQueryResults(t, `SELECT * FROM t`, 2171 sqlDB.QueryStr(t, `SELECT NULL, NULL, NULL, 7, 12, 'teststr'`), 2172 ) 2173 }) 2174 }) 2175 2176 // Tests IMPORT INTO with a target column set, and an explicit PK. 2177 t.Run("target-cols-with-explicit-pk", func(t *testing.T) { 2178 sqlDB.Exec(t, `CREATE TABLE t (a INT PRIMARY KEY, b STRING)`) 2179 defer sqlDB.Exec(t, `DROP TABLE t`) 2180 2181 // Insert the test data 2182 insert := []string{"''", "'text'", "'a'", "'e'", "'l'", "'t'", "'z'"} 2183 2184 for i, v := range insert { 2185 sqlDB.Exec(t, "INSERT INTO t (a, b) VALUES ($1, $2)", i+1000, v) 2186 } 2187 2188 data := []string{"1", "2", "3", "4", "5", "6", "7", "8", "9", "10"} 2189 srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 2190 if r.Method == "GET" { 2191 _, _ = w.Write([]byte(strings.Join(data, "\n"))) 2192 } 2193 })) 2194 defer srv.Close() 2195 2196 sqlDB.Exec(t, "IMPORT INTO t (a) CSV DATA ($1)", srv.URL) 2197 2198 var result int 2199 numExistingRows := len(insert) 2200 // Verify that the target column has been populated. 2201 sqlDB.QueryRow(t, `SELECT count(*) FROM t WHERE a IS NOT NULL`).Scan(&result) 2202 if expect := numExistingRows + len(data); result != expect { 2203 t.Fatalf("expected %d rows, got %d", expect, result) 2204 } 2205 2206 // Verify that the non-target columns have NULLs. 2207 sqlDB.QueryRow(t, `SELECT count(*) FROM t WHERE b IS NULL`).Scan(&result) 2208 expectedNulls := len(data) 2209 if result != expectedNulls { 2210 t.Fatalf("expected %d rows, got %d", expectedNulls, result) 2211 } 2212 }) 2213 2214 // Tests IMPORT INTO with a CSV file having more columns when targeted, expected to 2215 // get an error indicating the error. 2216 t.Run("csv-with-more-than-targeted-columns", func(t *testing.T) { 2217 sqlDB.Exec(t, `CREATE TABLE t (a INT PRIMARY KEY, b STRING)`) 2218 defer sqlDB.Exec(t, `DROP TABLE t`) 2219 2220 // Expect an error if attempting to IMPORT INTO with CSV having more columns 2221 // than targeted. 2222 sqlDB.ExpectErr( 2223 t, `row 1: expected 1 fields, got 2`, 2224 fmt.Sprintf("IMPORT INTO t (a) CSV DATA (%s)", testFiles.files[0]), 2225 ) 2226 }) 2227 2228 // Tests IMPORT INTO with a target column set which does not include all PKs. 2229 // As a result the non-target column is non-nullable, which is not allowed 2230 // until we support DEFAULT expressions. 2231 t.Run("target-cols-excluding-explicit-pk", func(t *testing.T) { 2232 sqlDB.Exec(t, `CREATE TABLE t (a INT PRIMARY KEY, b STRING)`) 2233 defer sqlDB.Exec(t, `DROP TABLE t`) 2234 2235 // Expect an error if attempting to IMPORT INTO a target list which does 2236 // not include all the PKs of the table. 2237 sqlDB.ExpectErr( 2238 t, `pq: all non-target columns in IMPORT INTO must be nullable`, 2239 fmt.Sprintf(`IMPORT INTO t (b) CSV DATA (%s)`, testFiles.files[0]), 2240 ) 2241 }) 2242 2243 // Tests behavior when the existing table being imported into has more columns 2244 // in its schema then the source CSV file. 2245 t.Run("more-table-cols-than-csv", func(t *testing.T) { 2246 sqlDB.Exec(t, `CREATE TABLE t (a INT, b STRING, c INT)`) 2247 defer sqlDB.Exec(t, `DROP TABLE t`) 2248 2249 // Insert the test data 2250 insert := []string{"''", "'text'", "'a'", "'e'", "'l'", "'t'", "'z'"} 2251 2252 for i, v := range insert { 2253 sqlDB.Exec(t, "INSERT INTO t (a, b) VALUES ($1, $2)", i, v) 2254 } 2255 2256 sqlDB.ExpectErr( 2257 t, "row 1: expected 3 fields, got 2", 2258 fmt.Sprintf(`IMPORT INTO t (a, b, c) CSV DATA (%s)`, testFiles.files[0]), 2259 ) 2260 }) 2261 2262 // Tests the case where we create table columns in specific order while trying 2263 // to import data from csv where columns order is different and import expression 2264 // defines in what order columns should be imported to align with table definition 2265 t.Run("target-cols-reordered", func(t *testing.T) { 2266 sqlDB.Exec(t, "CREATE TABLE t (a INT PRIMARY KEY, b INT, c STRING NOT NULL, d DECIMAL NOT NULL)") 2267 defer sqlDB.Exec(t, `DROP TABLE t`) 2268 2269 const data = "3.14,c is a string,1\n2.73,another string,2" 2270 srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 2271 if r.Method == "GET" { 2272 _, _ = w.Write([]byte(data)) 2273 } 2274 })) 2275 defer srv.Close() 2276 2277 sqlDB.Exec(t, fmt.Sprintf(`IMPORT INTO t (d, c, a) CSV DATA ("%s")`, srv.URL)) 2278 sqlDB.CheckQueryResults(t, `SELECT * FROM t ORDER BY a`, 2279 [][]string{{"1", "NULL", "c is a string", "3.14"}, {"2", "NULL", "another string", "2.73"}}, 2280 ) 2281 }) 2282 2283 // Tests that we can import into the table even if the table has columns named with 2284 // reserved keywords. 2285 t.Run("cols-named-with-reserved-keywords", func(t *testing.T) { 2286 sqlDB.Exec(t, `CREATE TABLE t ("select" INT PRIMARY KEY, "from" INT, "Some-c,ol-'Name'" STRING NOT NULL)`) 2287 defer sqlDB.Exec(t, `DROP TABLE t`) 2288 2289 const data = "today,1,2" 2290 srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 2291 if r.Method == "GET" { 2292 _, _ = w.Write([]byte(data)) 2293 } 2294 })) 2295 defer srv.Close() 2296 2297 sqlDB.Exec(t, fmt.Sprintf( 2298 `IMPORT INTO t ("Some-c,ol-'Name'", "select", "from") CSV DATA ("%s")`, srv.URL)) 2299 sqlDB.CheckQueryResults(t, `SELECT * FROM t`, [][]string{{"1", "2", "today"}}) 2300 }) 2301 2302 // Tests behvior when the existing table being imported into has fewer columns 2303 // in its schema then the source CSV file. 2304 t.Run("fewer-table-cols-than-csv", func(t *testing.T) { 2305 sqlDB.Exec(t, `CREATE TABLE t (a INT)`) 2306 defer sqlDB.Exec(t, `DROP TABLE t`) 2307 2308 sqlDB.ExpectErr( 2309 t, "row 1: expected 1 fields, got 2", 2310 fmt.Sprintf(`IMPORT INTO t (a) CSV DATA (%s)`, testFiles.files[0]), 2311 ) 2312 }) 2313 2314 // Tests IMPORT INTO without any target columns specified. This implies an 2315 // import of all columns in the exisiting table. 2316 t.Run("no-target-cols-specified", func(t *testing.T) { 2317 sqlDB.Exec(t, `CREATE TABLE t (a INT PRIMARY KEY, b STRING)`) 2318 defer sqlDB.Exec(t, `DROP TABLE t`) 2319 2320 // Insert the test data 2321 insert := []string{"''", "'text'", "'a'", "'e'", "'l'", "'t'", "'z'"} 2322 2323 for i, v := range insert { 2324 sqlDB.Exec(t, "INSERT INTO t (a, b) VALUES ($1, $2)", i+rowsPerFile, v) 2325 } 2326 2327 sqlDB.Exec(t, fmt.Sprintf("IMPORT INTO t CSV DATA (%s)", testFiles.files[0])) 2328 2329 var result int 2330 numExistingRows := len(insert) 2331 // Verify that all columns have been populated with imported data. 2332 sqlDB.QueryRow(t, `SELECT count(*) FROM t WHERE a IS NOT NULL`).Scan(&result) 2333 if expect := numExistingRows + rowsPerFile; result != expect { 2334 t.Fatalf("expected %d rows, got %d", expect, result) 2335 } 2336 2337 sqlDB.QueryRow(t, `SELECT count(*) FROM t WHERE b IS NOT NULL`).Scan(&result) 2338 if expect := numExistingRows + rowsPerFile; result != expect { 2339 t.Fatalf("expected %d rows, got %d", expect, result) 2340 } 2341 }) 2342 2343 // IMPORT INTO does not support DEFAULT expressions for either target or 2344 // non-target columns. 2345 t.Run("import-into-check-no-default-cols", func(t *testing.T) { 2346 sqlDB.Exec(t, `CREATE TABLE t (a INT DEFAULT 1, b STRING)`) 2347 defer sqlDB.Exec(t, `DROP TABLE t`) 2348 2349 // Insert the test data 2350 insert := []string{"''", "'text'", "'a'", "'e'", "'l'", "'t'", "'z'"} 2351 2352 for i, v := range insert { 2353 sqlDB.Exec(t, "INSERT INTO t (a, b) VALUES ($1, $2)", i, v) 2354 } 2355 2356 sqlDB.ExpectErr( 2357 t, fmt.Sprintf("pq: cannot IMPORT INTO a table with a DEFAULT expression for any of its columns"), 2358 fmt.Sprintf(`IMPORT INTO t (a) CSV DATA (%s)`, testFiles.files[0]), 2359 ) 2360 }) 2361 2362 // IMPORT INTO does not currently support import into interleaved tables. 2363 t.Run("import-into-rejects-interleaved-tables", func(t *testing.T) { 2364 sqlDB.Exec(t, `CREATE TABLE parent (parent_id INT PRIMARY KEY)`) 2365 sqlDB.Exec(t, `CREATE TABLE child ( 2366 parent_id INT, 2367 child_id INT, 2368 PRIMARY KEY(parent_id, child_id)) 2369 INTERLEAVE IN PARENT parent(parent_id)`) 2370 defer sqlDB.Exec(t, `DROP TABLE parent`) 2371 defer sqlDB.Exec(t, `DROP TABLE child`) 2372 2373 // Cannot IMPORT INTO interleaved parent 2374 sqlDB.ExpectErr( 2375 t, "Cannot use IMPORT INTO with interleaved tables", 2376 fmt.Sprintf(`IMPORT INTO parent (parent_id) CSV DATA (%s)`, testFiles.files[0])) 2377 2378 // Cannot IMPORT INTO interleaved child either. 2379 sqlDB.ExpectErr( 2380 t, "Cannot use IMPORT INTO with interleaved tables", 2381 fmt.Sprintf(`IMPORT INTO child (parent_id, child_id) CSV DATA (%s)`, testFiles.files[0])) 2382 }) 2383 2384 // This tests that consecutive imports from unique data sources into an 2385 // existing table without an explicit PK, do not overwrite each other. It 2386 // exercises the row_id generation in IMPORT. 2387 t.Run("multiple-import-into-without-pk", func(t *testing.T) { 2388 sqlDB.Exec(t, `CREATE TABLE t (a INT, b STRING)`) 2389 defer sqlDB.Exec(t, `DROP TABLE t`) 2390 2391 // Insert the test data 2392 insert := []string{"''", "'text'", "'a'", "'e'", "'l'", "'t'", "'z'"} 2393 numExistingRows := len(insert) 2394 insertedRows := rowsPerFile * 3 2395 2396 for i, v := range insert { 2397 sqlDB.Exec(t, "INSERT INTO t (a, b) VALUES ($1, $2)", i, v) 2398 } 2399 2400 // Expect it to succeed with correct columns. 2401 sqlDB.Exec(t, fmt.Sprintf(`IMPORT INTO t (a, b) CSV DATA (%s)`, testFiles.files[0])) 2402 sqlDB.Exec(t, fmt.Sprintf(`IMPORT INTO t (a, b) CSV DATA (%s)`, testFiles.files[1])) 2403 sqlDB.Exec(t, fmt.Sprintf(`IMPORT INTO t (a, b) CSV DATA (%s)`, testFiles.files[2])) 2404 2405 // Verify correct number of rows via COUNT. 2406 var result int 2407 sqlDB.QueryRow(t, `SELECT count(*) FROM t`).Scan(&result) 2408 if expect := numExistingRows + insertedRows; result != expect { 2409 t.Fatalf("expected %d rows, got %d", expect, result) 2410 } 2411 }) 2412 2413 // This tests that a collision is not detected when importing the same source 2414 // file twice in the same IMPORT, into a table without a PK. It exercises the 2415 // row_id generation logic. 2416 t.Run("multiple-file-import-into-without-pk", func(t *testing.T) { 2417 sqlDB.Exec(t, `CREATE TABLE t (a INT, b STRING)`) 2418 defer sqlDB.Exec(t, `DROP TABLE t`) 2419 2420 sqlDB.Exec(t, 2421 fmt.Sprintf(`IMPORT INTO t (a, b) CSV DATA (%s, %s)`, testFiles.files[0], testFiles.files[0]), 2422 ) 2423 2424 // Verify correct number of rows via COUNT. 2425 var result int 2426 sqlDB.QueryRow(t, `SELECT count(*) FROM t`).Scan(&result) 2427 if result != rowsPerFile*2 { 2428 t.Fatalf("expected %d rows, got %d", rowsPerFile*2, result) 2429 } 2430 }) 2431 2432 // IMPORT INTO disallows shadowing of existing keys when ingesting data. With 2433 // the exception of shadowing keys having the same ts and value. 2434 // 2435 // This tests key collision detection when importing the same source file 2436 // twice. The ts across imports is different, and so this is considered a 2437 // collision. 2438 t.Run("import-into-same-file-diff-imports", func(t *testing.T) { 2439 sqlDB.Exec(t, `CREATE TABLE t (a INT PRIMARY KEY, b STRING)`) 2440 defer sqlDB.Exec(t, `DROP TABLE t`) 2441 2442 sqlDB.Exec(t, 2443 fmt.Sprintf(`IMPORT INTO t (a, b) CSV DATA (%s)`, testFiles.files[0]), 2444 ) 2445 2446 sqlDB.ExpectErr( 2447 t, `ingested key collides with an existing one: /Table/\d+/1/0/0`, 2448 fmt.Sprintf(`IMPORT INTO t (a, b) CSV DATA (%s)`, testFiles.files[0]), 2449 ) 2450 }) 2451 2452 // When the ts and value of the ingested keys across SSTs match the existing 2453 // keys we do not consider this to be a collision. This is to support IMPORT 2454 // job pause/resumption. 2455 // 2456 // To ensure uniform behavior we apply the same exception to keys within the 2457 // same SST. 2458 // 2459 // This test attempts to ingest duplicate keys in the same SST, with the same 2460 // value, and succeeds in doing so. 2461 t.Run("import-into-dups-in-sst", func(t *testing.T) { 2462 sqlDB.Exec(t, `CREATE TABLE t (a INT PRIMARY KEY, b STRING)`) 2463 defer sqlDB.Exec(t, `DROP TABLE t`) 2464 2465 sqlDB.Exec(t, 2466 fmt.Sprintf(`IMPORT INTO t (a, b) CSV DATA (%s)`, testFiles.fileWithDupKeySameValue[0]), 2467 ) 2468 2469 // Verify correct number of rows via COUNT. 2470 var result int 2471 sqlDB.QueryRow(t, `SELECT count(*) FROM t`).Scan(&result) 2472 if result != 200 { 2473 t.Fatalf("expected 200 rows, got %d", result) 2474 } 2475 }) 2476 2477 // This tests key collision detection and importing a source file with the 2478 // colliding key sandwiched between valid keys. 2479 t.Run("import-into-key-collision", func(t *testing.T) { 2480 sqlDB.Exec(t, `CREATE TABLE t (a INT PRIMARY KEY, b STRING)`) 2481 defer sqlDB.Exec(t, `DROP TABLE t`) 2482 2483 sqlDB.Exec(t, 2484 fmt.Sprintf(`IMPORT INTO t (a, b) CSV DATA (%s)`, testFiles.files[0]), 2485 ) 2486 2487 sqlDB.ExpectErr( 2488 t, `ingested key collides with an existing one: /Table/\d+/1/0/0`, 2489 fmt.Sprintf(`IMPORT INTO t (a, b) CSV DATA (%s)`, testFiles.fileWithShadowKeys[0]), 2490 ) 2491 }) 2492 2493 // Tests that IMPORT INTO invalidates FK and CHECK constraints. 2494 t.Run("import-into-invalidate-constraints", func(t *testing.T) { 2495 2496 sqlDB.Exec(t, `CREATE TABLE ref (b STRING PRIMARY KEY)`) 2497 defer sqlDB.Exec(t, `DROP TABLE ref`) 2498 sqlDB.Exec(t, `CREATE TABLE t (a INT CHECK (a >= 0), b STRING, CONSTRAINT fk_ref FOREIGN KEY (b) REFERENCES ref)`) 2499 defer sqlDB.Exec(t, `DROP TABLE t`) 2500 2501 var checkValidated, fkValidated bool 2502 sqlDB.QueryRow(t, fmt.Sprintf(`SELECT validated from [SHOW CONSTRAINT FROM t] WHERE constraint_name = 'check_a'`)).Scan(&checkValidated) 2503 sqlDB.QueryRow(t, fmt.Sprintf(`SELECT validated from [SHOW CONSTRAINT FROM t] WHERE constraint_name = 'fk_ref'`)).Scan(&fkValidated) 2504 2505 // Prior to import all constraints should be validated. 2506 if !checkValidated || !fkValidated { 2507 t.Fatal("Constraints not validated on creation.\n") 2508 } 2509 2510 sqlDB.Exec(t, fmt.Sprintf(`IMPORT INTO t (a, b) CSV DATA (%s)`, testFiles.files[0])) 2511 2512 sqlDB.QueryRow(t, fmt.Sprintf(`SELECT validated from [SHOW CONSTRAINT FROM t] WHERE constraint_name = 'check_a'`)).Scan(&checkValidated) 2513 sqlDB.QueryRow(t, fmt.Sprintf(`SELECT validated from [SHOW CONSTRAINT FROM t] WHERE constraint_name = 'fk_ref'`)).Scan(&fkValidated) 2514 2515 // Following an import the constraints should be unvalidated. 2516 if checkValidated || fkValidated { 2517 t.Fatal("FK and CHECK constraints not unvalidated after IMPORT INTO\n") 2518 } 2519 }) 2520 } 2521 2522 func BenchmarkImport(b *testing.B) { 2523 const ( 2524 nodes = 3 2525 numFiles = nodes + 2 2526 ) 2527 baseDir := filepath.Join("testdata", "csv") 2528 ctx := context.Background() 2529 tc := testcluster.StartTestCluster(b, nodes, base.TestClusterArgs{ServerArgs: base.TestServerArgs{ExternalIODir: baseDir}}) 2530 defer tc.Stopper().Stop(ctx) 2531 sqlDB := sqlutils.MakeSQLRunner(tc.Conns[0]) 2532 2533 testFiles := makeCSVData(b, numFiles, b.N*100, nodes, 16) 2534 2535 b.ResetTimer() 2536 2537 sqlDB.Exec(b, 2538 fmt.Sprintf( 2539 `IMPORT TABLE t (a INT8 PRIMARY KEY, b STRING, INDEX (b), INDEX (a, b)) 2540 CSV DATA (%s)`, 2541 strings.Join(testFiles.files, ","), 2542 )) 2543 } 2544 2545 // a importRowProducer implementation that returns 'n' rows. 2546 type csvBenchmarkStream struct { 2547 n int 2548 pos int 2549 data [][]string 2550 } 2551 2552 func (s *csvBenchmarkStream) Progress() float32 { 2553 return float32(s.pos) / float32(s.n) 2554 } 2555 2556 func (s *csvBenchmarkStream) Scan() bool { 2557 s.pos++ 2558 return s.pos <= s.n 2559 } 2560 2561 func (s *csvBenchmarkStream) Err() error { 2562 return nil 2563 } 2564 2565 func (s *csvBenchmarkStream) Skip() error { 2566 return nil 2567 } 2568 2569 func (s *csvBenchmarkStream) Row() (interface{}, error) { 2570 return s.data[s.pos%len(s.data)], nil 2571 } 2572 2573 // Read implements Reader interface. It's used by delimited 2574 // benchmark to read its tab separated input. 2575 func (s *csvBenchmarkStream) Read(buf []byte) (int, error) { 2576 if s.Scan() { 2577 r, err := s.Row() 2578 if err != nil { 2579 return 0, err 2580 } 2581 return copy(buf, strings.Join(r.([]string), "\t")+"\n"), nil 2582 } 2583 return 0, io.EOF 2584 } 2585 2586 var _ importRowProducer = &csvBenchmarkStream{} 2587 2588 // BenchmarkConvertRecord-16 1000000 2107 ns/op 56.94 MB/s 3600 B/op 101 allocs/op 2589 // BenchmarkConvertRecord-16 500000 2106 ns/op 56.97 MB/s 3606 B/op 101 allocs/op 2590 // BenchmarkConvertRecord-16 500000 2100 ns/op 57.14 MB/s 3606 B/op 101 allocs/op 2591 // BenchmarkConvertRecord-16 500000 2286 ns/op 52.49 MB/s 3606 B/op 101 allocs/op 2592 // BenchmarkConvertRecord-16 500000 2378 ns/op 50.46 MB/s 3606 B/op 101 allocs/op 2593 // BenchmarkConvertRecord-16 500000 2427 ns/op 49.43 MB/s 3606 B/op 101 allocs/op 2594 // BenchmarkConvertRecord-16 500000 2399 ns/op 50.02 MB/s 3606 B/op 101 allocs/op 2595 // BenchmarkConvertRecord-16 500000 2365 ns/op 50.73 MB/s 3606 B/op 101 allocs/op 2596 // BenchmarkConvertRecord-16 500000 2376 ns/op 50.49 MB/s 3606 B/op 101 allocs/op 2597 // BenchmarkConvertRecord-16 500000 2390 ns/op 50.20 MB/s 3606 B/op 101 allocs/op 2598 func BenchmarkCSVConvertRecord(b *testing.B) { 2599 ctx := context.Background() 2600 2601 tpchLineItemDataRows := [][]string{ 2602 {"1", "155190", "7706", "1", "17", "21168.23", "0.04", "0.02", "N", "O", "1996-03-13", "1996-02-12", "1996-03-22", "DELIVER IN PERSON", "TRUCK", "egular courts above the"}, 2603 {"1", "67310", "7311", "2", "36", "45983.16", "0.09", "0.06", "N", "O", "1996-04-12", "1996-02-28", "1996-04-20", "TAKE BACK RETURN", "MAIL", "ly final dependencies: slyly bold "}, 2604 {"1", "63700", "3701", "3", "8", "13309.60", "0.10", "0.02", "N", "O", "1996-01-29", "1996-03-05", "1996-01-31", "TAKE BACK RETURN", "REG AIR", "riously. regular, express dep"}, 2605 {"1", "2132", "4633", "4", "28", "28955.64", "0.09", "0.06", "N", "O", "1996-04-21", "1996-03-30", "1996-05-16", "NONE", "AIR", "lites. fluffily even de"}, 2606 {"1", "24027", "1534", "5", "24", "22824.48", "0.10", "0.04", "N", "O", "1996-03-30", "1996-03-14", "1996-04-01", "NONE", "FOB", " pending foxes. slyly re"}, 2607 {"1", "15635", "638", "6", "32", "49620.16", "0.07", "0.02", "N", "O", "1996-01-30", "1996-02-07", "1996-02-03", "DELIVER IN PERSON", "MAIL", "arefully slyly ex"}, 2608 {"2", "106170", "1191", "1", "38", "44694.46", "0.00", "0.05", "N", "O", "1997-01-28", "1997-01-14", "1997-02-02", "TAKE BACK RETURN", "RAIL", "ven requests. deposits breach a"}, 2609 {"3", "4297", "1798", "1", "45", "54058.05", "0.06", "0.00", "R", "F", "1994-02-02", "1994-01-04", "1994-02-23", "NONE", "AIR", "ongside of the furiously brave acco"}, 2610 {"3", "19036", "6540", "2", "49", "46796.47", "0.10", "0.00", "R", "F", "1993-11-09", "1993-12-20", "1993-11-24", "TAKE BACK RETURN", "RAIL", " unusual accounts. eve"}, 2611 {"3", "128449", "3474", "3", "27", "39890.88", "0.06", "0.07", "A", "F", "1994-01-16", "1993-11-22", "1994-01-23", "DELIVER IN PERSON", "SHIP", "nal foxes wake."}, 2612 } 2613 b.SetBytes(120) // Raw input size. With 8 indexes, expect more on output side. 2614 2615 stmt, err := parser.ParseOne(`CREATE TABLE lineitem ( 2616 l_orderkey INT8 NOT NULL, 2617 l_partkey INT8 NOT NULL, 2618 l_suppkey INT8 NOT NULL, 2619 l_linenumber INT8 NOT NULL, 2620 l_quantity DECIMAL(15,2) NOT NULL, 2621 l_extendedprice DECIMAL(15,2) NOT NULL, 2622 l_discount DECIMAL(15,2) NOT NULL, 2623 l_tax DECIMAL(15,2) NOT NULL, 2624 l_returnflag CHAR(1) NOT NULL, 2625 l_linestatus CHAR(1) NOT NULL, 2626 l_shipdate DATE NOT NULL, 2627 l_commitdate DATE NOT NULL, 2628 l_receiptdate DATE NOT NULL, 2629 l_shipinstruct CHAR(25) NOT NULL, 2630 l_shipmode CHAR(10) NOT NULL, 2631 l_comment VARCHAR(44) NOT NULL, 2632 PRIMARY KEY (l_orderkey, l_linenumber), 2633 INDEX l_ok (l_orderkey ASC), 2634 INDEX l_pk (l_partkey ASC), 2635 INDEX l_sk (l_suppkey ASC), 2636 INDEX l_sd (l_shipdate ASC), 2637 INDEX l_cd (l_commitdate ASC), 2638 INDEX l_rd (l_receiptdate ASC), 2639 INDEX l_pk_sk (l_partkey ASC, l_suppkey ASC), 2640 INDEX l_sk_pk (l_suppkey ASC, l_partkey ASC) 2641 )`) 2642 if err != nil { 2643 b.Fatal(err) 2644 } 2645 create := stmt.AST.(*tree.CreateTable) 2646 st := cluster.MakeTestingClusterSettings() 2647 evalCtx := tree.MakeTestingEvalContext(st) 2648 2649 tableDesc, err := MakeSimpleTableDescriptor(ctx, st, create, sqlbase.ID(100), sqlbase.ID(100), NoFKs, 1) 2650 if err != nil { 2651 b.Fatal(err) 2652 } 2653 2654 kvCh := make(chan row.KVBatch) 2655 // no-op drain kvs channel. 2656 go func() { 2657 for range kvCh { 2658 } 2659 }() 2660 2661 descr := tableDesc.TableDesc() 2662 importCtx := ¶llelImportContext{ 2663 evalCtx: &evalCtx, 2664 tableDesc: descr, 2665 kvCh: kvCh, 2666 } 2667 2668 producer := &csvBenchmarkStream{ 2669 n: b.N, 2670 pos: 0, 2671 data: tpchLineItemDataRows, 2672 } 2673 consumer := &csvRowConsumer{importCtx: importCtx, opts: &roachpb.CSVOptions{}} 2674 b.ResetTimer() 2675 require.NoError(b, runParallelImport(ctx, importCtx, &importFileContext{}, producer, consumer)) 2676 close(kvCh) 2677 b.ReportAllocs() 2678 } 2679 2680 // goos: darwin 2681 // goarch: amd64 2682 // pkg: github.com/cockroachdb/cockroach/pkg/ccl/importccl 2683 // BenchmarkDelimitedConvertRecord-16 500000 2473 ns/op 48.51 MB/s 2684 // BenchmarkDelimitedConvertRecord-16 500000 2580 ns/op 46.51 MB/s 2685 // BenchmarkDelimitedConvertRecord-16 500000 2678 ns/op 44.80 MB/s 2686 // BenchmarkDelimitedConvertRecord-16 500000 2897 ns/op 41.41 MB/s 2687 // BenchmarkDelimitedConvertRecord-16 500000 3250 ns/op 36.92 MB/s 2688 // BenchmarkDelimitedConvertRecord-16 500000 3261 ns/op 36.80 MB/s 2689 // BenchmarkDelimitedConvertRecord-16 500000 3016 ns/op 39.79 MB/s 2690 // BenchmarkDelimitedConvertRecord-16 500000 2943 ns/op 40.77 MB/s 2691 // BenchmarkDelimitedConvertRecord-16 500000 3004 ns/op 39.94 MB/s 2692 // BenchmarkDelimitedConvertRecord-16 500000 2966 ns/op 40.45 MB/s 2693 func BenchmarkDelimitedConvertRecord(b *testing.B) { 2694 ctx := context.Background() 2695 2696 tpchLineItemDataRows := [][]string{ 2697 {"1", "155190", "7706", "1", "17", "21168.23", "0.04", "0.02", "N", "O", "1996-03-13", "1996-02-12", "1996-03-22", "DELIVER IN PERSON", "TRUCK", "egular courts above the"}, 2698 {"1", "67310", "7311", "2", "36", "45983.16", "0.09", "0.06", "N", "O", "1996-04-12", "1996-02-28", "1996-04-20", "TAKE BACK RETURN", "MAIL", "ly final dependencies: slyly bold "}, 2699 {"1", "63700", "3701", "3", "8", "13309.60", "0.10", "0.02", "N", "O", "1996-01-29", "1996-03-05", "1996-01-31", "TAKE BACK RETURN", "REG AIR", "riously. regular, express dep"}, 2700 {"1", "2132", "4633", "4", "28", "28955.64", "0.09", "0.06", "N", "O", "1996-04-21", "1996-03-30", "1996-05-16", "NONE", "AIR", "lites. fluffily even de"}, 2701 {"1", "24027", "1534", "5", "24", "22824.48", "0.10", "0.04", "N", "O", "1996-03-30", "1996-03-14", "1996-04-01", "NONE", "FOB", " pending foxes. slyly re"}, 2702 {"1", "15635", "638", "6", "32", "49620.16", "0.07", "0.02", "N", "O", "1996-01-30", "1996-02-07", "1996-02-03", "DELIVER IN PERSON", "MAIL", "arefully slyly ex"}, 2703 {"2", "106170", "1191", "1", "38", "44694.46", "0.00", "0.05", "N", "O", "1997-01-28", "1997-01-14", "1997-02-02", "TAKE BACK RETURN", "RAIL", "ven requests. deposits breach a"}, 2704 {"3", "4297", "1798", "1", "45", "54058.05", "0.06", "0.00", "R", "F", "1994-02-02", "1994-01-04", "1994-02-23", "NONE", "AIR", "ongside of the furiously brave acco"}, 2705 {"3", "19036", "6540", "2", "49", "46796.47", "0.10", "0.00", "R", "F", "1993-11-09", "1993-12-20", "1993-11-24", "TAKE BACK RETURN", "RAIL", " unusual accounts. eve"}, 2706 {"3", "128449", "3474", "3", "27", "39890.88", "0.06", "0.07", "A", "F", "1994-01-16", "1993-11-22", "1994-01-23", "DELIVER IN PERSON", "SHIP", "nal foxes wake."}, 2707 } 2708 b.SetBytes(120) // Raw input size. With 8 indexes, expect more on output side. 2709 2710 stmt, err := parser.ParseOne(`CREATE TABLE lineitem ( 2711 l_orderkey INT8 NOT NULL, 2712 l_partkey INT8 NOT NULL, 2713 l_suppkey INT8 NOT NULL, 2714 l_linenumber INT8 NOT NULL, 2715 l_quantity DECIMAL(15,2) NOT NULL, 2716 l_extendedprice DECIMAL(15,2) NOT NULL, 2717 l_discount DECIMAL(15,2) NOT NULL, 2718 l_tax DECIMAL(15,2) NOT NULL, 2719 l_returnflag CHAR(1) NOT NULL, 2720 l_linestatus CHAR(1) NOT NULL, 2721 l_shipdate DATE NOT NULL, 2722 l_commitdate DATE NOT NULL, 2723 l_receiptdate DATE NOT NULL, 2724 l_shipinstruct CHAR(25) NOT NULL, 2725 l_shipmode CHAR(10) NOT NULL, 2726 l_comment VARCHAR(44) NOT NULL, 2727 PRIMARY KEY (l_orderkey, l_linenumber), 2728 INDEX l_ok (l_orderkey ASC), 2729 INDEX l_pk (l_partkey ASC), 2730 INDEX l_sk (l_suppkey ASC), 2731 INDEX l_sd (l_shipdate ASC), 2732 INDEX l_cd (l_commitdate ASC), 2733 INDEX l_rd (l_receiptdate ASC), 2734 INDEX l_pk_sk (l_partkey ASC, l_suppkey ASC), 2735 INDEX l_sk_pk (l_suppkey ASC, l_partkey ASC) 2736 )`) 2737 if err != nil { 2738 b.Fatal(err) 2739 } 2740 create := stmt.AST.(*tree.CreateTable) 2741 st := cluster.MakeTestingClusterSettings() 2742 evalCtx := tree.MakeTestingEvalContext(st) 2743 2744 tableDesc, err := MakeSimpleTableDescriptor(ctx, st, create, sqlbase.ID(100), sqlbase.ID(100), NoFKs, 1) 2745 if err != nil { 2746 b.Fatal(err) 2747 } 2748 2749 kvCh := make(chan row.KVBatch) 2750 // no-op drain kvs channel. 2751 go func() { 2752 for range kvCh { 2753 } 2754 }() 2755 2756 descr := tableDesc.TableDesc() 2757 cols := make(tree.NameList, len(descr.Columns)) 2758 for i, col := range descr.Columns { 2759 cols[i] = tree.Name(col.Name) 2760 } 2761 r, err := newMysqloutfileReader(roachpb.MySQLOutfileOptions{ 2762 RowSeparator: '\n', 2763 FieldSeparator: '\t', 2764 }, kvCh, 0, 0, descr, &evalCtx) 2765 require.NoError(b, err) 2766 2767 producer := &csvBenchmarkStream{ 2768 n: b.N, 2769 pos: 0, 2770 data: tpchLineItemDataRows, 2771 } 2772 2773 delimited := &fileReader{Reader: producer} 2774 b.ResetTimer() 2775 require.NoError(b, r.readFile(ctx, delimited, 0, 0, nil)) 2776 close(kvCh) 2777 b.ReportAllocs() 2778 } 2779 2780 // TestImportControlJob tests that PAUSE JOB, RESUME JOB, and CANCEL JOB 2781 // work as intended on import jobs. 2782 func TestImportControlJob(t *testing.T) { 2783 defer leaktest.AfterTest(t)() 2784 2785 t.Skip("TODO(dt): add knob to force faster progress checks.") 2786 2787 defer func(oldInterval time.Duration) { 2788 jobs.DefaultAdoptInterval = oldInterval 2789 }(jobs.DefaultAdoptInterval) 2790 jobs.DefaultAdoptInterval = 100 * time.Millisecond 2791 2792 var serverArgs base.TestServerArgs 2793 // Disable external processing of mutations so that the final check of 2794 // crdb_internal.tables is guaranteed to not be cleaned up. Although this 2795 // was never observed by a stress test, it is here for safety. 2796 serverArgs.Knobs.SQLSchemaChanger = &sql.SchemaChangerTestingKnobs{ 2797 // TODO (lucy): if/when this test gets reinstated, figure out what knobs are 2798 // needed. 2799 } 2800 2801 var allowResponse chan struct{} 2802 params := base.TestClusterArgs{ServerArgs: serverArgs} 2803 params.ServerArgs.Knobs.Store = &kvserver.StoreTestingKnobs{ 2804 TestingResponseFilter: jobutils.BulkOpResponseFilter(&allowResponse), 2805 } 2806 2807 ctx := context.Background() 2808 tc := testcluster.StartTestCluster(t, 1, params) 2809 defer tc.Stopper().Stop(ctx) 2810 sqlDB := sqlutils.MakeSQLRunner(tc.Conns[0]) 2811 sqlDB.Exec(t, `CREATE DATABASE data`) 2812 2813 makeSrv := func() *httptest.Server { 2814 var once sync.Once 2815 return httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 2816 if r.Method == "GET" { 2817 // The following code correctly handles both the case where, after the 2818 // CANCEL JOB is issued, the second stage of the IMPORT (the shuffle, 2819 // after the sampling) may or may not be started. If it was started, then a 2820 // second GET request is done. The once here will cause that request to not 2821 // block. The draining for loop below will cause jobutils.RunJob's second send 2822 // on allowResponse to succeed (which it does after issuing the CANCEL JOB). 2823 once.Do(func() { 2824 <-allowResponse 2825 go func() { 2826 for range allowResponse { 2827 } 2828 }() 2829 }) 2830 2831 _, _ = w.Write([]byte(r.URL.Path[1:])) 2832 } 2833 })) 2834 } 2835 2836 t.Run("cancel", func(t *testing.T) { 2837 sqlDB.Exec(t, `CREATE DATABASE cancelimport`) 2838 2839 srv := makeSrv() 2840 defer srv.Close() 2841 2842 var urls []string 2843 for i := 0; i < 10; i++ { 2844 urls = append(urls, fmt.Sprintf("'%s/%d'", srv.URL, i)) 2845 } 2846 csvURLs := strings.Join(urls, ", ") 2847 2848 query := fmt.Sprintf(`IMPORT TABLE cancelimport.t (i INT8 PRIMARY KEY) CSV DATA (%s)`, csvURLs) 2849 2850 if _, err := jobutils.RunJob( 2851 t, sqlDB, &allowResponse, []string{"cancel"}, query, 2852 ); !testutils.IsError(err, "job canceled") { 2853 t.Fatalf("expected 'job canceled' error, but got %+v", err) 2854 } 2855 // Check that executing again succeeds. This won't work if the first import 2856 // was not successfully canceled. 2857 sqlDB.Exec(t, query) 2858 }) 2859 2860 t.Run("pause", func(t *testing.T) { 2861 // Test that IMPORT can be paused and resumed. This test also attempts to 2862 // only pause the job after it has begun splitting ranges. When the job 2863 // is resumed, if the sampling phase is re-run, the splits points will 2864 // differ. When AddSSTable attempts to import the new ranges, they will 2865 // fail because there is an existing split in the key space that it cannot 2866 // handle. Use a sstsize that will more-or-less (since it is statistical) 2867 // always cause this condition. 2868 2869 sqlDB.Exec(t, `CREATE DATABASE pauseimport`) 2870 2871 srv := makeSrv() 2872 defer srv.Close() 2873 2874 count := 100 2875 // This test takes a while with the race detector, so reduce the number of 2876 // files in an attempt to speed it up. 2877 if util.RaceEnabled { 2878 count = 20 2879 } 2880 2881 urls := make([]string, count) 2882 for i := 0; i < count; i++ { 2883 urls[i] = fmt.Sprintf("'%s/%d'", srv.URL, i) 2884 } 2885 csvURLs := strings.Join(urls, ", ") 2886 query := fmt.Sprintf(`IMPORT TABLE pauseimport.t (i INT8 PRIMARY KEY) CSV DATA (%s) WITH sstsize = '50B'`, csvURLs) 2887 2888 jobID, err := jobutils.RunJob( 2889 t, sqlDB, &allowResponse, []string{"PAUSE"}, query, 2890 ) 2891 if !testutils.IsError(err, "job paused") { 2892 t.Fatalf("unexpected: %v", err) 2893 } 2894 sqlDB.Exec(t, fmt.Sprintf(`RESUME JOB %d`, jobID)) 2895 jobutils.WaitForJob(t, sqlDB, jobID) 2896 sqlDB.CheckQueryResults(t, 2897 `SELECT * FROM pauseimport.t ORDER BY i`, 2898 sqlDB.QueryStr(t, `SELECT * FROM generate_series(0, $1)`, count-1), 2899 ) 2900 }) 2901 } 2902 2903 // TestImportWorkerFailure tests that IMPORT can restart after the failure 2904 // of a worker node. 2905 func TestImportWorkerFailure(t *testing.T) { 2906 defer leaktest.AfterTest(t)() 2907 2908 // TODO(mjibson): Although this test passes most of the time it still 2909 // sometimes fails because not all kinds of failures caused by shutting a 2910 // node down are detected and retried. 2911 t.Skip("flaky due to undetected kinds of failures when the node is shutdown") 2912 2913 defer func(oldInterval time.Duration) { 2914 jobs.DefaultAdoptInterval = oldInterval 2915 }(jobs.DefaultAdoptInterval) 2916 jobs.DefaultAdoptInterval = 100 * time.Millisecond 2917 2918 allowResponse := make(chan struct{}) 2919 params := base.TestClusterArgs{} 2920 params.ServerArgs.Knobs.Store = &kvserver.StoreTestingKnobs{ 2921 TestingResponseFilter: jobutils.BulkOpResponseFilter(&allowResponse), 2922 } 2923 2924 ctx := context.Background() 2925 tc := testcluster.StartTestCluster(t, 3, params) 2926 defer tc.Stopper().Stop(ctx) 2927 conn := tc.Conns[0] 2928 sqlDB := sqlutils.MakeSQLRunner(conn) 2929 2930 srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 2931 if r.Method == "GET" { 2932 _, _ = w.Write([]byte(r.URL.Path[1:])) 2933 } 2934 })) 2935 defer srv.Close() 2936 2937 count := 20 2938 urls := make([]string, count) 2939 for i := 0; i < count; i++ { 2940 urls[i] = fmt.Sprintf("'%s/%d'", srv.URL, i) 2941 } 2942 csvURLs := strings.Join(urls, ", ") 2943 query := fmt.Sprintf(`IMPORT TABLE t (i INT8 PRIMARY KEY) CSV DATA (%s) WITH sstsize = '1B'`, csvURLs) 2944 2945 errCh := make(chan error) 2946 go func() { 2947 _, err := conn.Exec(query) 2948 errCh <- err 2949 }() 2950 select { 2951 case allowResponse <- struct{}{}: 2952 case err := <-errCh: 2953 t.Fatalf("%s: query returned before expected: %s", err, query) 2954 } 2955 var jobID int64 2956 sqlDB.QueryRow(t, `SELECT id FROM system.jobs ORDER BY created DESC LIMIT 1`).Scan(&jobID) 2957 2958 // Shut down a node. This should force LoadCSV to fail in its current 2959 // execution. It should detect this as a context canceled error. 2960 tc.StopServer(1) 2961 2962 close(allowResponse) 2963 // We expect the statement to fail. 2964 if err := <-errCh; !testutils.IsError(err, "node failure") { 2965 t.Fatal(err) 2966 } 2967 2968 // But the job should be restarted and succeed eventually. 2969 jobutils.WaitForJob(t, sqlDB, jobID) 2970 sqlDB.CheckQueryResults(t, 2971 `SELECT * FROM t ORDER BY i`, 2972 sqlDB.QueryStr(t, `SELECT * FROM generate_series(0, $1)`, count-1), 2973 ) 2974 } 2975 2976 // TestImportLivenessWithRestart tests that a node liveness transition 2977 // during IMPORT correctly resumes after the node executing the job 2978 // becomes non-live (from the perspective of the jobs registry). 2979 // 2980 // Its actual purpose is to address the second bug listed in #22924 about 2981 // the addsstable arguments not in request range. The theory was that the 2982 // restart in that issue was caused by node liveness and that the work 2983 // already performed (the splits and addsstables) somehow caused the second 2984 // error. However this does not appear to be the case, as running many stress 2985 // iterations with differing constants (rows, sstsize, kv.bulk_ingest.batch_size) 2986 // was not able to fail in the way listed by the second bug. 2987 func TestImportLivenessWithRestart(t *testing.T) { 2988 defer leaktest.AfterTest(t)() 2989 2990 t.Skip("TODO(dt): this relies on chunking done by prior version of IMPORT." + 2991 "Rework this test, or replace it with resume-tests + jobs infra tests.") 2992 2993 defer func(oldInterval time.Duration) { 2994 jobs.DefaultAdoptInterval = oldInterval 2995 }(jobs.DefaultAdoptInterval) 2996 jobs.DefaultAdoptInterval = 100 * time.Millisecond 2997 jobs.DefaultCancelInterval = 100 * time.Millisecond 2998 2999 const nodes = 1 3000 nl := jobs.NewFakeNodeLiveness(nodes) 3001 serverArgs := base.TestServerArgs{ 3002 Knobs: base.TestingKnobs{ 3003 RegistryLiveness: nl, 3004 }, 3005 } 3006 3007 var allowResponse chan struct{} 3008 params := base.TestClusterArgs{ServerArgs: serverArgs} 3009 params.ServerArgs.Knobs.Store = &kvserver.StoreTestingKnobs{ 3010 TestingResponseFilter: jobutils.BulkOpResponseFilter(&allowResponse), 3011 } 3012 3013 ctx := context.Background() 3014 tc := testcluster.StartTestCluster(t, nodes, params) 3015 defer tc.Stopper().Stop(ctx) 3016 conn := tc.Conns[0] 3017 sqlDB := sqlutils.MakeSQLRunner(conn) 3018 3019 // Prevent hung HTTP connections in leaktest. 3020 sqlDB.Exec(t, `SET CLUSTER SETTING cloudstorage.timeout = '3s'`) 3021 3022 sqlDB.Exec(t, `SET CLUSTER SETTING kv.bulk_ingest.batch_size = '300B'`) 3023 sqlDB.Exec(t, `CREATE DATABASE liveness`) 3024 3025 const rows = 5000 3026 srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 3027 if r.Method == "GET" { 3028 for i := 0; i < rows; i++ { 3029 fmt.Fprintln(w, i) 3030 } 3031 } 3032 })) 3033 defer srv.Close() 3034 3035 const query = `IMPORT TABLE liveness.t (i INT8 PRIMARY KEY) CSV DATA ($1) WITH sstsize = '500B', experimental_sorted_ingestion` 3036 3037 // Start an IMPORT and wait until it's done one addsstable. 3038 allowResponse = make(chan struct{}) 3039 errCh := make(chan error) 3040 go func() { 3041 _, err := conn.Exec(query, srv.URL) 3042 errCh <- err 3043 }() 3044 // Allow many, but not all, addsstables to complete. 3045 for i := 0; i < 50; i++ { 3046 select { 3047 case allowResponse <- struct{}{}: 3048 case err := <-errCh: 3049 t.Fatal(err) 3050 } 3051 } 3052 // Fetch the new job ID and lease since we know it's running now. 3053 var jobID int64 3054 originalLease := &jobspb.Progress{} 3055 { 3056 var expectedLeaseBytes []byte 3057 sqlDB.QueryRow( 3058 t, `SELECT id, progress FROM system.jobs ORDER BY created DESC LIMIT 1`, 3059 ).Scan(&jobID, &expectedLeaseBytes) 3060 if err := protoutil.Unmarshal(expectedLeaseBytes, originalLease); err != nil { 3061 t.Fatal(err) 3062 } 3063 } 3064 3065 // addsstable is done, make the node non-live and wait for cancellation 3066 nl.FakeSetExpiration(1, hlc.MinTimestamp) 3067 // Wait for the registry cancel loop to run and cancel the job. 3068 <-nl.SelfCalledCh 3069 <-nl.SelfCalledCh 3070 close(allowResponse) 3071 err := <-errCh 3072 if !testutils.IsError(err, "job .*: node liveness error") { 3073 t.Fatalf("unexpected: %v", err) 3074 } 3075 3076 // Ensure that partial progress has been recorded 3077 partialProgress := jobutils.GetJobProgress(t, sqlDB, jobID) 3078 if len(partialProgress.Details.(*jobspb.Progress_Import).Import.SpanProgress) == 0 { 3079 t.Fatal("no partial import progress detected") 3080 } 3081 3082 // Make the node live again 3083 nl.FakeSetExpiration(1, hlc.MaxTimestamp) 3084 // The registry should now adopt the job and resume it. 3085 jobutils.WaitForJob(t, sqlDB, jobID) 3086 // Verify that the job lease was updated 3087 rescheduledProgress := jobutils.GetJobProgress(t, sqlDB, jobID) 3088 if rescheduledProgress.ModifiedMicros <= originalLease.ModifiedMicros { 3089 t.Fatalf("expecting rescheduled job to have a later modification time: %d vs %d", 3090 rescheduledProgress.ModifiedMicros, originalLease.ModifiedMicros) 3091 } 3092 3093 // Verify that all expected rows are present after a stop/start cycle. 3094 var rowCount int 3095 sqlDB.QueryRow(t, "SELECT count(*) from liveness.t").Scan(&rowCount) 3096 if rowCount != rows { 3097 t.Fatalf("not all rows were present. Expecting %d, had %d", rows, rowCount) 3098 } 3099 3100 // Verify that all write progress coalesced into a single span 3101 // encompassing the entire table. 3102 spans := rescheduledProgress.Details.(*jobspb.Progress_Import).Import.SpanProgress 3103 if len(spans) != 1 { 3104 t.Fatalf("expecting only a single progress span, had %d\n%s", len(spans), spans) 3105 } 3106 3107 // Ensure that an entire table range is marked as complete 3108 tableSpan := roachpb.Span{ 3109 Key: keys.MinKey, 3110 EndKey: keys.MaxKey, 3111 } 3112 if !tableSpan.EqualValue(spans[0]) { 3113 t.Fatalf("expected entire table to be marked complete, had %s", spans[0]) 3114 } 3115 } 3116 3117 // TestImportLivenessWithLeniency tests that a temporary node liveness 3118 // transition during IMPORT doesn't cancel the job, but allows the 3119 // owning node to continue processing. 3120 func TestImportLivenessWithLeniency(t *testing.T) { 3121 defer leaktest.AfterTest(t)() 3122 3123 defer func(oldInterval time.Duration) { 3124 jobs.DefaultAdoptInterval = oldInterval 3125 }(jobs.DefaultAdoptInterval) 3126 jobs.DefaultAdoptInterval = 100 * time.Millisecond 3127 jobs.DefaultCancelInterval = 100 * time.Millisecond 3128 3129 const nodes = 1 3130 nl := jobs.NewFakeNodeLiveness(nodes) 3131 serverArgs := base.TestServerArgs{ 3132 Knobs: base.TestingKnobs{ 3133 RegistryLiveness: nl, 3134 }, 3135 } 3136 3137 var allowResponse chan struct{} 3138 params := base.TestClusterArgs{ServerArgs: serverArgs} 3139 params.ServerArgs.Knobs.Store = &kvserver.StoreTestingKnobs{ 3140 TestingResponseFilter: jobutils.BulkOpResponseFilter(&allowResponse), 3141 } 3142 3143 ctx := context.Background() 3144 tc := testcluster.StartTestCluster(t, nodes, params) 3145 defer tc.Stopper().Stop(ctx) 3146 conn := tc.Conns[0] 3147 sqlDB := sqlutils.MakeSQLRunner(conn) 3148 3149 // Prevent hung HTTP connections in leaktest. 3150 sqlDB.Exec(t, `SET CLUSTER SETTING cloudstorage.timeout = '3s'`) 3151 // We want to know exactly how much leniency is configured. 3152 sqlDB.Exec(t, `SET CLUSTER SETTING jobs.registry.leniency = '1m'`) 3153 sqlDB.Exec(t, `SET CLUSTER SETTING kv.bulk_ingest.batch_size = '300B'`) 3154 sqlDB.Exec(t, `CREATE DATABASE liveness`) 3155 3156 srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 3157 const rows = 5000 3158 if r.Method == "GET" { 3159 for i := 0; i < rows; i++ { 3160 fmt.Fprintln(w, i) 3161 } 3162 } 3163 })) 3164 defer srv.Close() 3165 3166 const query = `IMPORT TABLE liveness.t (i INT8 PRIMARY KEY) CSV DATA ($1) WITH sstsize = '500B'` 3167 3168 // Start an IMPORT and wait until it's done one addsstable. 3169 allowResponse = make(chan struct{}) 3170 errCh := make(chan error) 3171 go func() { 3172 _, err := conn.Exec(query, srv.URL) 3173 errCh <- err 3174 }() 3175 // Allow many, but not all, addsstables to complete. 3176 for i := 0; i < 50; i++ { 3177 select { 3178 case allowResponse <- struct{}{}: 3179 case err := <-errCh: 3180 t.Fatal(err) 3181 } 3182 } 3183 // Fetch the new job ID and lease since we know it's running now. 3184 var jobID int64 3185 originalLease := &jobspb.Payload{} 3186 { 3187 var expectedLeaseBytes []byte 3188 sqlDB.QueryRow( 3189 t, `SELECT id, payload FROM system.jobs ORDER BY created DESC LIMIT 1`, 3190 ).Scan(&jobID, &expectedLeaseBytes) 3191 if err := protoutil.Unmarshal(expectedLeaseBytes, originalLease); err != nil { 3192 t.Fatal(err) 3193 } 3194 } 3195 3196 // addsstable is done, make the node slightly tardy. 3197 nl.FakeSetExpiration(1, hlc.Timestamp{ 3198 WallTime: hlc.UnixNano() - (15 * time.Second).Nanoseconds(), 3199 }) 3200 3201 // Wait for the registry cancel loop to run and not cancel the job. 3202 <-nl.SelfCalledCh 3203 <-nl.SelfCalledCh 3204 close(allowResponse) 3205 3206 // Set the node to be fully live again. This prevents the registry 3207 // from canceling all of the jobs if the test node is saturated 3208 // and the import runs slowly. 3209 nl.FakeSetExpiration(1, hlc.MaxTimestamp) 3210 3211 // Verify that the client didn't see anything amiss. 3212 if err := <-errCh; err != nil { 3213 t.Fatalf("import job should have completed: %s", err) 3214 } 3215 3216 // The job should have completed normally. 3217 jobutils.WaitForJob(t, sqlDB, jobID) 3218 } 3219 3220 // TestImportMVCCChecksums verifies that MVCC checksums are correctly 3221 // computed by issuing a secondary index change that runs a CPut on the 3222 // index. See #23984. 3223 func TestImportMVCCChecksums(t *testing.T) { 3224 defer leaktest.AfterTest(t)() 3225 3226 s, db, _ := serverutils.StartServer(t, base.TestServerArgs{}) 3227 ctx := context.Background() 3228 defer s.Stopper().Stop(ctx) 3229 sqlDB := sqlutils.MakeSQLRunner(db) 3230 3231 sqlDB.Exec(t, `CREATE DATABASE d`) 3232 3233 srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 3234 if r.Method == "GET" { 3235 fmt.Fprint(w, "1,1,1") 3236 } 3237 })) 3238 defer srv.Close() 3239 3240 sqlDB.Exec(t, `IMPORT TABLE d.t ( 3241 a INT8 PRIMARY KEY, 3242 b INT8, 3243 c INT8, 3244 INDEX (b) STORING (c) 3245 ) CSV DATA ($1)`, srv.URL) 3246 sqlDB.Exec(t, `UPDATE d.t SET c = 2 WHERE a = 1`) 3247 } 3248 3249 func TestImportMysql(t *testing.T) { 3250 defer leaktest.AfterTest(t)() 3251 3252 t.Skip("https://github.com/cockroachdb/cockroach/issues/40263") 3253 3254 const ( 3255 nodes = 3 3256 ) 3257 ctx := context.Background() 3258 baseDir := filepath.Join("testdata") 3259 args := base.TestServerArgs{ExternalIODir: baseDir} 3260 tc := testcluster.StartTestCluster(t, nodes, base.TestClusterArgs{ServerArgs: args}) 3261 defer tc.Stopper().Stop(ctx) 3262 sqlDB := sqlutils.MakeSQLRunner(tc.Conns[0]) 3263 3264 sqlDB.Exec(t, `SET CLUSTER SETTING kv.bulk_ingest.batch_size = '10KB'`) 3265 sqlDB.Exec(t, `CREATE DATABASE foo; SET DATABASE = foo`) 3266 3267 files := getMysqldumpTestdata(t) 3268 simple := []interface{}{fmt.Sprintf("nodelocal://0%s", strings.TrimPrefix(files.simple, baseDir))} 3269 second := []interface{}{fmt.Sprintf("nodelocal://0%s", strings.TrimPrefix(files.second, baseDir))} 3270 multitable := []interface{}{fmt.Sprintf("nodelocal://0%s", strings.TrimPrefix(files.wholeDB, baseDir))} 3271 multitableGz := []interface{}{fmt.Sprintf("nodelocal://0%s", strings.TrimPrefix(files.wholeDB+".gz", baseDir))} 3272 multitableBz := []interface{}{fmt.Sprintf("nodelocal://0%s", strings.TrimPrefix(files.wholeDB+".bz2", baseDir))} 3273 3274 const expectSimple, expectSecond, expectEverything = 1 << 0, 1 << 2, 1 << 3 3275 const expectAll = -1 3276 for _, c := range []struct { 3277 name string 3278 expected int 3279 query string 3280 args []interface{} 3281 }{ 3282 {`read data only`, expectSimple, `IMPORT TABLE simple (i INT8 PRIMARY KEY, s text, b bytea) MYSQLDUMP DATA ($1)`, simple}, 3283 {`single table dump`, expectSimple, `IMPORT TABLE simple FROM MYSQLDUMP ($1)`, simple}, 3284 {`second table dump`, expectSecond, `IMPORT TABLE second FROM MYSQLDUMP ($1) WITH skip_foreign_keys`, second}, 3285 {`simple from multi`, expectSimple, `IMPORT TABLE simple FROM MYSQLDUMP ($1)`, multitable}, 3286 {`second from multi`, expectSecond, `IMPORT TABLE second FROM MYSQLDUMP ($1) WITH skip_foreign_keys`, multitable}, 3287 {`all from multi`, expectAll, `IMPORT MYSQLDUMP ($1)`, multitable}, 3288 {`all from multi gzip`, expectAll, `IMPORT MYSQLDUMP ($1)`, multitableGz}, 3289 {`all from multi bzip`, expectAll, `IMPORT MYSQLDUMP ($1)`, multitableBz}, 3290 } { 3291 t.Run(c.name, func(t *testing.T) { 3292 sqlDB.Exec(t, `DROP TABLE IF EXISTS simple, second, third, everything CASCADE`) 3293 sqlDB.Exec(t, `DROP SEQUENCE IF EXISTS simple_auto_inc, third_auto_inc`) 3294 sqlDB.Exec(t, c.query, c.args...) 3295 3296 if c.expected&expectSimple != 0 { 3297 if c.name != "read data only" { 3298 sqlDB.Exec(t, "INSERT INTO simple (s) VALUES ('auto-inc')") 3299 } 3300 3301 for idx, row := range sqlDB.QueryStr(t, "SELECT * FROM simple ORDER BY i") { 3302 { 3303 if idx == len(simpleTestRows) { 3304 if expected, actual := "auto-inc", row[1]; expected != actual { 3305 t.Fatalf("expected rowi=%s string to be %q, got %q", row[0], expected, actual) 3306 } 3307 continue 3308 } 3309 expected, actual := simpleTestRows[idx].s, row[1] 3310 if expected == injectNull { 3311 expected = "NULL" 3312 } 3313 if expected != actual { 3314 t.Fatalf("expected rowi=%s string to be %q, got %q", row[0], expected, actual) 3315 } 3316 } 3317 3318 { 3319 expected, actual := simpleTestRows[idx].b, row[2] 3320 if expected == nil { 3321 expected = []byte("NULL") 3322 } 3323 if !bytes.Equal(expected, []byte(actual)) { 3324 t.Fatalf("expected rowi=%s bytes to be %q, got %q", row[0], expected, actual) 3325 } 3326 } 3327 } 3328 } else { 3329 sqlDB.ExpectErr(t, "does not exist", `SELECT 1 FROM simple LIMIT 1`) 3330 } 3331 3332 if c.expected&expectSecond != 0 { 3333 res := sqlDB.QueryStr(t, "SELECT * FROM second ORDER BY i") 3334 if expected, actual := secondTableRows, len(res); expected != actual { 3335 t.Fatalf("expected %d, got %d", expected, actual) 3336 } 3337 for _, row := range res { 3338 if i, j := row[0], row[1]; i != "-"+j { 3339 t.Fatalf("expected %s = - %s", i, j) 3340 } 3341 } 3342 } else { 3343 sqlDB.ExpectErr(t, "does not exist", `SELECT 1 FROM second LIMIT 1`) 3344 } 3345 if c.expected&expectEverything != 0 { 3346 res := sqlDB.QueryStr(t, "SELECT i, c, iw, fl, d53, j FROM everything ORDER BY i") 3347 if expected, actual := len(everythingTestRows), len(res); expected != actual { 3348 t.Fatalf("expected %d, got %d", expected, actual) 3349 } 3350 for i := range res { 3351 if got, expected := res[i][0], fmt.Sprintf("%v", everythingTestRows[i].i); got != expected { 3352 t.Fatalf("expected %s got %s", expected, got) 3353 } 3354 if got, expected := res[i][1], everythingTestRows[i].c; got != expected { 3355 t.Fatalf("expected %s got %s", expected, got) 3356 } 3357 if got, expected := res[i][2], fmt.Sprintf("%v", everythingTestRows[i].iw); got != expected { 3358 t.Fatalf("expected %s got %s", expected, got) 3359 } 3360 if got, expected := res[i][3], fmt.Sprintf("%v", everythingTestRows[i].fl); got != expected { 3361 t.Fatalf("expected %s got %s", expected, got) 3362 } 3363 if got, expected := res[i][4], everythingTestRows[i].d53; got != expected { 3364 t.Fatalf("expected %s got %s", expected, got) 3365 } 3366 if got, expected := res[i][5], everythingTestRows[i].j; got != expected { 3367 t.Fatalf("expected %s got %s", expected, got) 3368 } 3369 } 3370 } else { 3371 sqlDB.ExpectErr(t, "does not exist", `SELECT 1 FROM everything LIMIT 1`) 3372 } 3373 }) 3374 } 3375 } 3376 3377 func TestImportMysqlOutfile(t *testing.T) { 3378 defer leaktest.AfterTest(t)() 3379 3380 const ( 3381 nodes = 3 3382 ) 3383 ctx := context.Background() 3384 baseDir := filepath.Join("testdata", "mysqlout") 3385 args := base.TestServerArgs{ExternalIODir: baseDir} 3386 tc := testcluster.StartTestCluster(t, nodes, base.TestClusterArgs{ServerArgs: args}) 3387 defer tc.Stopper().Stop(ctx) 3388 conn := tc.Conns[0] 3389 sqlDB := sqlutils.MakeSQLRunner(conn) 3390 3391 sqlDB.Exec(t, `SET CLUSTER SETTING kv.bulk_ingest.batch_size = '10KB'`) 3392 sqlDB.Exec(t, `CREATE DATABASE foo; SET DATABASE = foo`) 3393 3394 testRows, configs := getMysqlOutfileTestdata(t) 3395 3396 for i, cfg := range configs { 3397 t.Run(cfg.name, func(t *testing.T) { 3398 var opts []interface{} 3399 3400 cmd := fmt.Sprintf(`IMPORT TABLE test%d (i INT8 PRIMARY KEY, s text, b bytea) DELIMITED DATA ($1)`, i) 3401 opts = append(opts, fmt.Sprintf("nodelocal://0%s", strings.TrimPrefix(cfg.filename, baseDir))) 3402 3403 var flags []string 3404 if cfg.opts.RowSeparator != '\n' { 3405 opts = append(opts, string(cfg.opts.RowSeparator)) 3406 flags = append(flags, fmt.Sprintf("rows_terminated_by = $%d", len(opts))) 3407 } 3408 if cfg.opts.FieldSeparator != '\t' { 3409 opts = append(opts, string(cfg.opts.FieldSeparator)) 3410 flags = append(flags, fmt.Sprintf("fields_terminated_by = $%d", len(opts))) 3411 } 3412 if cfg.opts.Enclose == roachpb.MySQLOutfileOptions_Always { 3413 opts = append(opts, string(cfg.opts.Encloser)) 3414 flags = append(flags, fmt.Sprintf("fields_enclosed_by = $%d", len(opts))) 3415 } 3416 if cfg.opts.HasEscape { 3417 opts = append(opts, string(cfg.opts.Escape)) 3418 flags = append(flags, fmt.Sprintf("fields_escaped_by = $%d", len(opts))) 3419 } 3420 if len(flags) > 0 { 3421 cmd += " WITH " + strings.Join(flags, ", ") 3422 } 3423 sqlDB.Exec(t, cmd, opts...) 3424 for idx, row := range sqlDB.QueryStr(t, fmt.Sprintf("SELECT * FROM test%d ORDER BY i", i)) { 3425 expected, actual := testRows[idx].s, row[1] 3426 if expected == injectNull { 3427 expected = "NULL" 3428 } 3429 3430 if expected != actual { 3431 t.Fatalf("expected row i=%s string to be %q, got %q", row[0], expected, actual) 3432 } 3433 } 3434 }) 3435 } 3436 } 3437 3438 func TestImportPgCopy(t *testing.T) { 3439 defer leaktest.AfterTest(t)() 3440 3441 const ( 3442 nodes = 3 3443 ) 3444 ctx := context.Background() 3445 baseDir := filepath.Join("testdata", "pgcopy") 3446 args := base.TestServerArgs{ExternalIODir: baseDir} 3447 tc := testcluster.StartTestCluster(t, nodes, base.TestClusterArgs{ServerArgs: args}) 3448 defer tc.Stopper().Stop(ctx) 3449 conn := tc.Conns[0] 3450 sqlDB := sqlutils.MakeSQLRunner(conn) 3451 3452 sqlDB.Exec(t, `SET CLUSTER SETTING kv.bulk_ingest.batch_size = '10KB'`) 3453 sqlDB.Exec(t, `CREATE DATABASE foo; SET DATABASE = foo`) 3454 3455 testRows, configs := getPgCopyTestdata(t) 3456 3457 for i, cfg := range configs { 3458 t.Run(cfg.name, func(t *testing.T) { 3459 var opts []interface{} 3460 3461 cmd := fmt.Sprintf(`IMPORT TABLE test%d (i INT8 PRIMARY KEY, s text, b bytea) PGCOPY DATA ($1)`, i) 3462 opts = append(opts, fmt.Sprintf("nodelocal://0%s", strings.TrimPrefix(cfg.filename, baseDir))) 3463 3464 var flags []string 3465 if cfg.opts.Delimiter != '\t' { 3466 opts = append(opts, string(cfg.opts.Delimiter)) 3467 flags = append(flags, fmt.Sprintf("delimiter = $%d", len(opts))) 3468 } 3469 if cfg.opts.Null != `\N` { 3470 opts = append(opts, cfg.opts.Null) 3471 flags = append(flags, fmt.Sprintf("nullif = $%d", len(opts))) 3472 } 3473 if len(flags) > 0 { 3474 cmd += " WITH " + strings.Join(flags, ", ") 3475 } 3476 t.Log(cmd, opts) 3477 sqlDB.Exec(t, cmd, opts...) 3478 for idx, row := range sqlDB.QueryStr(t, fmt.Sprintf("SELECT * FROM test%d ORDER BY i", i)) { 3479 { 3480 expected, actual := testRows[idx].s, row[1] 3481 if expected == injectNull { 3482 expected = "NULL" 3483 } 3484 3485 if expected != actual { 3486 t.Fatalf("expected row i=%s string to be %q, got %q", row[0], expected, actual) 3487 } 3488 } 3489 3490 { 3491 expected, actual := testRows[idx].b, row[2] 3492 if expected == nil { 3493 expected = []byte("NULL") 3494 } 3495 if !bytes.Equal(expected, []byte(actual)) { 3496 t.Fatalf("expected rowi=%s bytes to be %q, got %q", row[0], expected, actual) 3497 } 3498 } 3499 } 3500 }) 3501 } 3502 } 3503 3504 func TestImportPgDump(t *testing.T) { 3505 defer leaktest.AfterTest(t)() 3506 3507 const ( 3508 nodes = 3 3509 ) 3510 ctx := context.Background() 3511 baseDir := filepath.Join("testdata") 3512 args := base.TestServerArgs{ExternalIODir: baseDir} 3513 tc := testcluster.StartTestCluster(t, nodes, base.TestClusterArgs{ServerArgs: args}) 3514 defer tc.Stopper().Stop(ctx) 3515 conn := tc.Conns[0] 3516 sqlDB := sqlutils.MakeSQLRunner(conn) 3517 3518 sqlDB.Exec(t, `SET CLUSTER SETTING kv.bulk_ingest.batch_size = '10KB'`) 3519 sqlDB.Exec(t, `CREATE DATABASE foo; SET DATABASE = foo`) 3520 3521 simplePgTestRows, simpleFile := getSimplePostgresDumpTestdata(t) 3522 simple := []interface{}{fmt.Sprintf("nodelocal://0%s", strings.TrimPrefix(simpleFile, baseDir))} 3523 secondTableRowCount, secondFile := getSecondPostgresDumpTestdata(t) 3524 second := []interface{}{fmt.Sprintf("nodelocal://0%s", strings.TrimPrefix(secondFile, baseDir))} 3525 multitableFile := getMultiTablePostgresDumpTestdata(t) 3526 multitable := []interface{}{fmt.Sprintf("nodelocal://0%s", strings.TrimPrefix(multitableFile, baseDir))} 3527 3528 const expectAll, expectSimple, expectSecond = 1, 2, 3 3529 3530 for _, c := range []struct { 3531 name string 3532 expected int 3533 query string 3534 args []interface{} 3535 }{ 3536 { 3537 `read data only`, 3538 expectSimple, 3539 `IMPORT TABLE simple ( 3540 i INT8, 3541 s text, 3542 b bytea, 3543 CONSTRAINT simple_pkey PRIMARY KEY (i), 3544 UNIQUE INDEX simple_b_s_idx (b, s), 3545 INDEX simple_s_idx (s) 3546 ) PGDUMP DATA ($1)`, 3547 simple, 3548 }, 3549 {`single table dump`, expectSimple, `IMPORT TABLE simple FROM PGDUMP ($1)`, simple}, 3550 {`second table dump`, expectSecond, `IMPORT TABLE second FROM PGDUMP ($1)`, second}, 3551 {`simple from multi`, expectSimple, `IMPORT TABLE simple FROM PGDUMP ($1)`, multitable}, 3552 {`second from multi`, expectSecond, `IMPORT TABLE second FROM PGDUMP ($1)`, multitable}, 3553 {`all from multi`, expectAll, `IMPORT PGDUMP ($1)`, multitable}, 3554 } { 3555 t.Run(c.name, func(t *testing.T) { 3556 sqlDB.Exec(t, `DROP TABLE IF EXISTS simple, second`) 3557 sqlDB.Exec(t, c.query, c.args...) 3558 3559 if c.expected == expectSimple || c.expected == expectAll { 3560 // Verify table schema because PKs and indexes are at the bottom of pg_dump. 3561 sqlDB.CheckQueryResults(t, `SHOW CREATE TABLE simple`, [][]string{{ 3562 "simple", `CREATE TABLE simple ( 3563 i INT8 NOT NULL, 3564 s STRING NULL, 3565 b BYTES NULL, 3566 CONSTRAINT simple_pkey PRIMARY KEY (i ASC), 3567 UNIQUE INDEX simple_b_s_idx (b ASC, s ASC), 3568 INDEX simple_s_idx (s ASC), 3569 FAMILY "primary" (i, s, b) 3570 )`, 3571 }}) 3572 3573 rows := sqlDB.QueryStr(t, "SELECT * FROM simple ORDER BY i") 3574 if a, e := len(rows), len(simplePostgresTestRows); a != e { 3575 t.Fatalf("got %d rows, expected %d", a, e) 3576 } 3577 3578 for idx, row := range rows { 3579 { 3580 expected, actual := simplePostgresTestRows[idx].s, row[1] 3581 if expected == injectNull { 3582 expected = "NULL" 3583 } 3584 if expected != actual { 3585 t.Fatalf("expected rowi=%s string to be %q, got %q", row[0], expected, actual) 3586 } 3587 } 3588 3589 { 3590 expected, actual := simplePgTestRows[idx].b, row[2] 3591 if expected == nil { 3592 expected = []byte("NULL") 3593 } 3594 if !bytes.Equal(expected, []byte(actual)) { 3595 t.Fatalf("expected rowi=%s bytes to be %q, got %q", row[0], expected, actual) 3596 } 3597 } 3598 } 3599 } 3600 3601 if c.expected == expectSecond || c.expected == expectAll { 3602 // Verify table schema because PKs and indexes are at the bottom of pg_dump. 3603 sqlDB.CheckQueryResults(t, `SHOW CREATE TABLE second`, [][]string{{ 3604 "second", `CREATE TABLE second ( 3605 i INT8 NOT NULL, 3606 s STRING NULL, 3607 CONSTRAINT second_pkey PRIMARY KEY (i ASC), 3608 FAMILY "primary" (i, s) 3609 )`, 3610 }}) 3611 res := sqlDB.QueryStr(t, "SELECT * FROM second ORDER BY i") 3612 if expected, actual := secondTableRowCount, len(res); expected != actual { 3613 t.Fatalf("expected %d, got %d", expected, actual) 3614 } 3615 for _, row := range res { 3616 if i, s := row[0], row[1]; i != s { 3617 t.Fatalf("expected %s = %s", i, s) 3618 } 3619 } 3620 } 3621 3622 if c.expected == expectSecond { 3623 sqlDB.ExpectErr(t, "does not exist", `SELECT 1 FROM simple LIMIT 1`) 3624 } 3625 if c.expected == expectSimple { 3626 sqlDB.ExpectErr(t, "does not exist", `SELECT 1 FROM second LIMIT 1`) 3627 } 3628 if c.expected == expectAll { 3629 sqlDB.CheckQueryResults(t, `SHOW CREATE TABLE seqtable`, [][]string{{ 3630 "seqtable", `CREATE TABLE seqtable ( 3631 a INT8 NULL DEFAULT nextval('public.a_seq':::STRING), 3632 b INT8 NULL, 3633 FAMILY "primary" (a, b, rowid) 3634 )`, 3635 }}) 3636 sqlDB.CheckQueryResults(t, `SHOW CREATE SEQUENCE a_seq`, [][]string{{ 3637 "a_seq", `CREATE SEQUENCE a_seq MINVALUE 1 MAXVALUE 9223372036854775807 INCREMENT 1 START 1`, 3638 }}) 3639 sqlDB.CheckQueryResults(t, `select last_value from a_seq`, [][]string{{"7"}}) 3640 sqlDB.CheckQueryResults(t, 3641 `SELECT * FROM seqtable ORDER BY a`, 3642 sqlDB.QueryStr(t, `select a+1, a*10 from generate_series(0, 6) a`), 3643 ) 3644 sqlDB.CheckQueryResults(t, `select last_value from a_seq`, [][]string{{"7"}}) 3645 // This can sometimes retry, so the next value might not be 8. 3646 sqlDB.Exec(t, `INSERT INTO seqtable (b) VALUES (70)`) 3647 sqlDB.CheckQueryResults(t, `select last_value >= 8 from a_seq`, [][]string{{"true"}}) 3648 sqlDB.CheckQueryResults(t, 3649 `SELECT b FROM seqtable WHERE a = (SELECT last_value FROM a_seq)`, 3650 [][]string{{"70"}}, 3651 ) 3652 } 3653 }) 3654 } 3655 } 3656 3657 func TestImportCockroachDump(t *testing.T) { 3658 defer leaktest.AfterTest(t)() 3659 3660 const ( 3661 nodes = 3 3662 ) 3663 ctx := context.Background() 3664 baseDir := filepath.Join("testdata") 3665 args := base.TestServerArgs{ExternalIODir: baseDir} 3666 tc := testcluster.StartTestCluster(t, nodes, base.TestClusterArgs{ServerArgs: args}) 3667 defer tc.Stopper().Stop(ctx) 3668 conn := tc.Conns[0] 3669 sqlDB := sqlutils.MakeSQLRunner(conn) 3670 3671 sqlDB.Exec(t, "IMPORT PGDUMP ($1)", "nodelocal://0/cockroachdump/dump.sql") 3672 sqlDB.CheckQueryResults(t, "SELECT * FROM t ORDER BY i", [][]string{ 3673 {"1", "test"}, 3674 {"2", "other"}, 3675 }) 3676 sqlDB.CheckQueryResults(t, "SELECT * FROM a", [][]string{ 3677 {"2"}, 3678 }) 3679 sqlDB.CheckQueryResults(t, "SHOW EXPERIMENTAL_FINGERPRINTS FROM TABLE t", [][]string{ 3680 {"primary", "-6413178410144704641"}, 3681 {"t_t_idx", "-4841734847805280813"}, 3682 }) 3683 sqlDB.CheckQueryResults(t, "SHOW EXPERIMENTAL_FINGERPRINTS FROM TABLE a", [][]string{ 3684 {"primary", "-5808590958014384147"}, 3685 }) 3686 sqlDB.CheckQueryResults(t, "SHOW CREATE TABLE t", [][]string{ 3687 {"t", `CREATE TABLE t ( 3688 i INT8 NOT NULL, 3689 t STRING NULL, 3690 CONSTRAINT "primary" PRIMARY KEY (i ASC), 3691 INDEX t_t_idx (t ASC), 3692 FAMILY "primary" (i, t) 3693 )`}, 3694 }) 3695 sqlDB.CheckQueryResults(t, "SHOW CREATE TABLE a", [][]string{ 3696 {"a", `CREATE TABLE a ( 3697 i INT8 NOT NULL, 3698 CONSTRAINT "primary" PRIMARY KEY (i ASC), 3699 CONSTRAINT fk_i_ref_t FOREIGN KEY (i) REFERENCES t(i), 3700 FAMILY "primary" (i) 3701 )`}, 3702 }) 3703 } 3704 3705 func TestCreateStatsAfterImport(t *testing.T) { 3706 defer leaktest.AfterTest(t)() 3707 3708 defer func(oldRefreshInterval, oldAsOf time.Duration) { 3709 stats.DefaultRefreshInterval = oldRefreshInterval 3710 stats.DefaultAsOfTime = oldAsOf 3711 }(stats.DefaultRefreshInterval, stats.DefaultAsOfTime) 3712 stats.DefaultRefreshInterval = time.Millisecond 3713 stats.DefaultAsOfTime = time.Microsecond 3714 3715 const nodes = 1 3716 ctx := context.Background() 3717 baseDir := filepath.Join("testdata") 3718 args := base.TestServerArgs{ExternalIODir: baseDir} 3719 tc := testcluster.StartTestCluster(t, nodes, base.TestClusterArgs{ServerArgs: args}) 3720 defer tc.Stopper().Stop(ctx) 3721 conn := tc.Conns[0] 3722 sqlDB := sqlutils.MakeSQLRunner(conn) 3723 3724 sqlDB.Exec(t, `SET CLUSTER SETTING sql.stats.automatic_collection.enabled=true`) 3725 3726 sqlDB.Exec(t, "IMPORT PGDUMP ($1)", "nodelocal://0/cockroachdump/dump.sql") 3727 3728 // Verify that statistics have been created. 3729 sqlDB.CheckQueryResultsRetry(t, 3730 `SELECT statistics_name, column_names, row_count, distinct_count, null_count 3731 FROM [SHOW STATISTICS FOR TABLE t]`, 3732 [][]string{ 3733 {"__auto__", "{i}", "2", "2", "0"}, 3734 {"__auto__", "{t}", "2", "2", "0"}, 3735 }) 3736 sqlDB.CheckQueryResultsRetry(t, 3737 `SELECT statistics_name, column_names, row_count, distinct_count, null_count 3738 FROM [SHOW STATISTICS FOR TABLE a]`, 3739 [][]string{ 3740 {"__auto__", "{i}", "1", "1", "0"}, 3741 }) 3742 } 3743 3744 func TestImportAvro(t *testing.T) { 3745 defer leaktest.AfterTest(t)() 3746 3747 const ( 3748 nodes = 3 3749 ) 3750 ctx := context.Background() 3751 baseDir := filepath.Join("testdata", "avro") 3752 args := base.TestServerArgs{ExternalIODir: baseDir} 3753 tc := testcluster.StartTestCluster(t, nodes, base.TestClusterArgs{ServerArgs: args}) 3754 defer tc.Stopper().Stop(ctx) 3755 sqlDB := sqlutils.MakeSQLRunner(tc.Conns[0]) 3756 3757 sqlDB.Exec(t, `SET CLUSTER SETTING kv.bulk_ingest.batch_size = '10KB'`) 3758 sqlDB.Exec(t, `CREATE DATABASE foo; SET DATABASE = foo`) 3759 3760 simpleOcf := fmt.Sprintf("nodelocal://0/%s", "simple.ocf") 3761 simpleSchemaURI := fmt.Sprintf("nodelocal://0/%s", "simple-schema.json") 3762 simpleJSON := fmt.Sprintf("nodelocal://0/%s", "simple-sorted.json") 3763 simplePrettyJSON := fmt.Sprintf("nodelocal://0/%s", "simple-sorted.pjson") 3764 simpleBinRecords := fmt.Sprintf("nodelocal://0/%s", "simple-sorted-records.avro") 3765 tableSchema := fmt.Sprintf("nodelocal://0/%s", "simple-schema.sql") 3766 3767 data, err := ioutil.ReadFile("testdata/avro/simple-schema.json") 3768 if err != nil { 3769 t.Fatal(err) 3770 } 3771 simpleSchema := string(data) 3772 3773 tests := []struct { 3774 name string 3775 sql string 3776 create string 3777 args []interface{} 3778 err bool 3779 }{ 3780 { 3781 name: "import-ocf", 3782 sql: "IMPORT TABLE simple (i INT8 PRIMARY KEY, s text, b bytea) AVRO DATA ($1)", 3783 args: []interface{}{simpleOcf}, 3784 }, 3785 { 3786 name: "import-ocf-into-table", 3787 sql: "IMPORT INTO simple AVRO DATA ($1)", 3788 create: "CREATE TABLE simple (i INT8 PRIMARY KEY, s text, b bytea)", 3789 args: []interface{}{simpleOcf}, 3790 }, 3791 { 3792 name: "import-ocf-into-table-with-strict-validation", 3793 sql: "IMPORT INTO simple AVRO DATA ($1) WITH strict_validation", 3794 create: "CREATE TABLE simple (i INT8, s text, b bytea)", 3795 args: []interface{}{simpleOcf}, 3796 }, 3797 { 3798 name: "import-ocf-create-using", 3799 sql: "IMPORT TABLE simple CREATE USING $1 AVRO DATA ($2)", 3800 args: []interface{}{tableSchema, simpleOcf}, 3801 }, 3802 { 3803 name: "import-json-records", 3804 sql: "IMPORT TABLE simple CREATE USING $1 AVRO DATA ($2) WITH data_as_json_records, schema_uri=$3", 3805 args: []interface{}{tableSchema, simpleJSON, simpleSchemaURI}, 3806 }, 3807 { 3808 name: "import-json-records-into-table-ignores-extra-fields", 3809 sql: "IMPORT INTO simple AVRO DATA ($1) WITH data_as_json_records, schema_uri=$2", 3810 create: "CREATE TABLE simple (i INT8 PRIMARY KEY)", 3811 args: []interface{}{simpleJSON, simpleSchemaURI}, 3812 }, 3813 { 3814 name: "import-json-records-inline-schema", 3815 sql: "IMPORT TABLE simple CREATE USING $1 AVRO DATA ($2) WITH data_as_json_records, schema=$3", 3816 args: []interface{}{tableSchema, simpleJSON, simpleSchema}, 3817 }, 3818 { 3819 name: "import-json-pretty-printed-records", 3820 sql: "IMPORT TABLE simple CREATE USING $1 AVRO DATA ($2) WITH data_as_json_records, schema_uri=$3", 3821 args: []interface{}{tableSchema, simplePrettyJSON, simpleSchemaURI}, 3822 }, 3823 { 3824 name: "import-avro-fragments", 3825 sql: "IMPORT TABLE simple CREATE USING $1 AVRO DATA ($2) WITH data_as_binary_records, records_terminated_by='', schema_uri=$3", 3826 args: []interface{}{tableSchema, simpleBinRecords, simpleSchemaURI}, 3827 }, 3828 { 3829 name: "fail-import-expect-ocf-got-json", 3830 sql: "IMPORT TABLE simple CREATE USING $1 AVRO DATA ($2)", 3831 args: []interface{}{tableSchema, simpleJSON}, 3832 err: true, 3833 }, 3834 { 3835 name: "relaxed-import-sets-missing-fields", 3836 sql: "IMPORT TABLE simple (i INT8 PRIMARY KEY, s text, b bytea, z int) AVRO DATA ($1)", 3837 args: []interface{}{simpleOcf}, 3838 }, 3839 { 3840 name: "relaxed-import-ignores-extra-fields", 3841 sql: "IMPORT TABLE simple (i INT8 PRIMARY KEY) AVRO DATA ($1)", 3842 args: []interface{}{simpleOcf}, 3843 }, 3844 { 3845 name: "strict-import-errors-missing-fields", 3846 sql: "IMPORT TABLE simple (i INT8 PRIMARY KEY, s text, b bytea, z int) AVRO DATA ($1) WITH strict_validation", 3847 args: []interface{}{simpleOcf}, 3848 err: true, 3849 }, 3850 { 3851 name: "strict-import-errors-extra-fields", 3852 sql: "IMPORT TABLE simple (i INT8 PRIMARY KEY) AVRO DATA ($1) WITH strict_validation", 3853 args: []interface{}{simpleOcf}, 3854 err: true, 3855 }, 3856 } 3857 3858 for i, test := range tests { 3859 t.Run(test.name, func(t *testing.T) { 3860 // Play a bit with producer/consumer batch sizes. 3861 defer TestingSetParallelImporterReaderBatchSize(13 * i)() 3862 3863 _, err := sqlDB.DB.ExecContext(context.Background(), `DROP TABLE IF EXISTS simple CASCADE`) 3864 require.NoError(t, err) 3865 3866 if len(test.create) > 0 { 3867 _, err := sqlDB.DB.ExecContext(context.Background(), test.create) 3868 require.NoError(t, err) 3869 } 3870 3871 _, err = sqlDB.DB.ExecContext(context.Background(), test.sql, test.args...) 3872 if test.err { 3873 if err == nil { 3874 t.Error("expected error, but alas") 3875 } 3876 return 3877 } 3878 3879 require.NoError(t, err) 3880 3881 var numRows int 3882 sqlDB.QueryRow(t, `SELECT count(*) FROM simple`).Scan(&numRows) 3883 if numRows == 0 { 3884 t.Error("expected some rows after import") 3885 } 3886 }) 3887 } 3888 } 3889 3890 // TestImportClientDisconnect ensures that an import job can complete even if 3891 // the client connection which started it closes. This test uses a helper 3892 // subprocess to force a closed client connection without needing to rely 3893 // on the driver to close a TCP connection. See TestImportClientDisconnectHelper 3894 // for the subprocess. 3895 func TestImportClientDisconnect(t *testing.T) { 3896 defer leaktest.AfterTest(t)() 3897 3898 ctx, cancel := context.WithCancel(context.Background()) 3899 defer cancel() 3900 args := base.TestClusterArgs{} 3901 tc := testcluster.StartTestCluster(t, 1, args) 3902 defer tc.Stopper().Stop(ctx) 3903 3904 tc.WaitForNodeLiveness(t) 3905 require.NoError(t, tc.WaitForFullReplication()) 3906 3907 conn := tc.ServerConn(0) 3908 runner := sqlutils.MakeSQLRunner(conn) 3909 runner.Exec(t, "SET CLUSTER SETTING kv.protectedts.poll_interval = '100ms';") 3910 3911 // Make a server that will tell us when somebody has sent a request, wait to 3912 // be signaled, and then serve a CSV row for our table. 3913 allowResponse := make(chan struct{}) 3914 gotRequest := make(chan struct{}, 1) 3915 srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 3916 if r.Method != "GET" { 3917 return 3918 } 3919 select { 3920 case gotRequest <- struct{}{}: 3921 default: 3922 } 3923 select { 3924 case <-allowResponse: 3925 case <-ctx.Done(): // Deal with test failures. 3926 } 3927 _, _ = w.Write([]byte("1,asdfasdfasdfasdf")) 3928 })) 3929 defer srv.Close() 3930 3931 // Make credentials for the new connection. 3932 runner.Exec(t, `CREATE USER testuser`) 3933 runner.Exec(t, `GRANT admin TO testuser`) 3934 pgURL, cleanup := sqlutils.PGUrl(t, tc.Server(0).ServingSQLAddr(), 3935 "TestImportClientDisconnect-testuser", url.User("testuser")) 3936 defer cleanup() 3937 3938 // Kick off the import on a new connection which we're going to close. 3939 done := make(chan struct{}) 3940 ctxToCancel, cancel := context.WithCancel(ctx) 3941 defer cancel() 3942 go func() { 3943 defer close(done) 3944 connCfg, err := pgx.ParseConnectionString(pgURL.String()) 3945 assert.NoError(t, err) 3946 db, err := pgx.Connect(connCfg) 3947 assert.NoError(t, err) 3948 defer func() { _ = db.Close() }() 3949 _, err = db.ExecEx(ctxToCancel, `IMPORT TABLE foo (k INT PRIMARY KEY, v STRING) CSV DATA ($1)`, 3950 nil /* options */, srv.URL) 3951 assert.Equal(t, context.Canceled, err) 3952 }() 3953 3954 // Wait for the import job to start. 3955 var jobID string 3956 testutils.SucceedsSoon(t, func() error { 3957 row := conn.QueryRow("SELECT job_id FROM [SHOW JOBS] WHERE job_type = 'IMPORT' ORDER BY created DESC LIMIT 1") 3958 return row.Scan(&jobID) 3959 }) 3960 3961 // Wait for it to actually start. 3962 <-gotRequest 3963 3964 // Cancel the import context and wait for the goroutine to exit. 3965 cancel() 3966 <-done 3967 3968 // Allow the import to proceed. 3969 close(allowResponse) 3970 3971 // Wait for the job to get marked as succeeded. 3972 testutils.SucceedsSoon(t, func() error { 3973 var status string 3974 if err := conn.QueryRow("SELECT status FROM [SHOW JOB " + jobID + "]").Scan(&status); err != nil { 3975 return err 3976 } 3977 const succeeded = "succeeded" 3978 if status != succeeded { 3979 return errors.Errorf("expected %s, got %v", succeeded, status) 3980 } 3981 return nil 3982 }) 3983 } 3984 3985 func TestDisallowsInvalidFormatOptions(t *testing.T) { 3986 defer leaktest.AfterTest(t)() 3987 3988 allOpts := make(map[string]struct{}) 3989 addOpts := func(opts map[string]struct{}) { 3990 for opt := range opts { 3991 allOpts[opt] = struct{}{} 3992 } 3993 } 3994 addOpts(allowedCommonOptions) 3995 addOpts(avroAllowedOptions) 3996 addOpts(csvAllowedOptions) 3997 addOpts(mysqlDumpAllowedOptions) 3998 addOpts(mysqlOutAllowedOptions) 3999 addOpts(pgDumpAllowedOptions) 4000 addOpts(pgCopyAllowedOptions) 4001 4002 // Helper to pick num options from the set of allowed and the set 4003 // of all other options. Returns generated options plus a flag indicating 4004 // if the generated options contain disallowed ones. 4005 pickOpts := func(num int, allowed map[string]struct{}) (map[string]string, bool) { 4006 opts := make(map[string]string, num) 4007 haveDisallowed := false 4008 var picks []string 4009 if rand.Intn(10) > 5 { 4010 for opt := range allOpts { 4011 picks = append(picks, opt) 4012 } 4013 } else { 4014 for opt := range allowed { 4015 picks = append(picks, opt) 4016 } 4017 } 4018 require.NotNil(t, picks) 4019 4020 for i := 0; i < num; i++ { 4021 pick := picks[rand.Intn(len(picks))] 4022 _, allowed := allowed[pick] 4023 if !allowed { 4024 _, allowed = allowedCommonOptions[pick] 4025 } 4026 if allowed { 4027 opts[pick] = "ok" 4028 } else { 4029 opts[pick] = "bad" 4030 haveDisallowed = true 4031 } 4032 } 4033 4034 return opts, haveDisallowed 4035 } 4036 4037 tests := []struct { 4038 format string 4039 allowed map[string]struct{} 4040 }{ 4041 {"avro", avroAllowedOptions}, 4042 {"csv", csvAllowedOptions}, 4043 {"mysqouout", mysqlOutAllowedOptions}, 4044 {"mysqldump", mysqlDumpAllowedOptions}, 4045 {"pgdump", pgDumpAllowedOptions}, 4046 {"pgcopy", pgCopyAllowedOptions}, 4047 } 4048 4049 for _, tc := range tests { 4050 for i := 0; i < 5; i++ { 4051 opts, haveBadOptions := pickOpts(i, tc.allowed) 4052 t.Run(fmt.Sprintf("validate-%s-%d/badOpts=%t", tc.format, i, haveBadOptions), 4053 func(t *testing.T) { 4054 err := validateFormatOptions(tc.format, opts, tc.allowed) 4055 if haveBadOptions { 4056 require.Error(t, err, opts) 4057 } else { 4058 require.NoError(t, err, opts) 4059 } 4060 }) 4061 } 4062 } 4063 }