github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/ccl/backupccl/backup_test.go (about) 1 // Copyright 2016 The Cockroach Authors. 2 // 3 // Licensed as a CockroachDB Enterprise file under the Cockroach Community 4 // License (the "License"); you may not use this file except in compliance with 5 // the License. You may obtain a copy of the License at 6 // 7 // https://github.com/cockroachdb/cockroach/blob/master/licenses/CCL.txt 8 9 package backupccl_test 10 11 import ( 12 "bytes" 13 "context" 14 gosql "database/sql" 15 "fmt" 16 "hash/crc32" 17 "io" 18 "io/ioutil" 19 "math/rand" 20 "net/http" 21 "net/url" 22 "os" 23 "path/filepath" 24 "reflect" 25 "regexp" 26 "strconv" 27 "strings" 28 "sync/atomic" 29 "testing" 30 "time" 31 32 "github.com/cockroachdb/cockroach-go/crdb" 33 "github.com/cockroachdb/cockroach/pkg/base" 34 "github.com/cockroachdb/cockroach/pkg/ccl/backupccl" 35 _ "github.com/cockroachdb/cockroach/pkg/ccl/partitionccl" 36 "github.com/cockroachdb/cockroach/pkg/ccl/utilccl/sampledataccl" 37 "github.com/cockroachdb/cockroach/pkg/config" 38 "github.com/cockroachdb/cockroach/pkg/config/zonepb" 39 "github.com/cockroachdb/cockroach/pkg/jobs" 40 "github.com/cockroachdb/cockroach/pkg/jobs/jobspb" 41 "github.com/cockroachdb/cockroach/pkg/keys" 42 "github.com/cockroachdb/cockroach/pkg/kv" 43 "github.com/cockroachdb/cockroach/pkg/kv/kvclient/kvcoord" 44 "github.com/cockroachdb/cockroach/pkg/kv/kvserver" 45 "github.com/cockroachdb/cockroach/pkg/roachpb" 46 "github.com/cockroachdb/cockroach/pkg/security" 47 "github.com/cockroachdb/cockroach/pkg/sql" 48 "github.com/cockroachdb/cockroach/pkg/sql/catalog/catalogkv" 49 "github.com/cockroachdb/cockroach/pkg/sql/sem/tree" 50 "github.com/cockroachdb/cockroach/pkg/sql/sqlbase" 51 "github.com/cockroachdb/cockroach/pkg/testutils" 52 "github.com/cockroachdb/cockroach/pkg/testutils/jobutils" 53 "github.com/cockroachdb/cockroach/pkg/testutils/serverutils" 54 "github.com/cockroachdb/cockroach/pkg/testutils/sqlutils" 55 "github.com/cockroachdb/cockroach/pkg/testutils/testcluster" 56 "github.com/cockroachdb/cockroach/pkg/util/leaktest" 57 "github.com/cockroachdb/cockroach/pkg/util/protoutil" 58 "github.com/cockroachdb/cockroach/pkg/util/randutil" 59 "github.com/cockroachdb/cockroach/pkg/util/retry" 60 "github.com/cockroachdb/cockroach/pkg/util/stop" 61 "github.com/cockroachdb/cockroach/pkg/util/timeutil" 62 "github.com/cockroachdb/cockroach/pkg/workload/bank" 63 "github.com/cockroachdb/cockroach/pkg/workload/workloadsql" 64 "github.com/cockroachdb/errors" 65 "github.com/gogo/protobuf/proto" 66 "github.com/kr/pretty" 67 "github.com/stretchr/testify/require" 68 "golang.org/x/sync/errgroup" 69 ) 70 71 const ( 72 singleNode = 1 73 multiNode = 3 74 backupRestoreDefaultRanges = 10 75 backupRestoreRowPayloadSize = 100 76 localFoo = "nodelocal://0/foo" 77 ) 78 79 func backupRestoreTestSetupWithParams( 80 t testing.TB, 81 clusterSize int, 82 numAccounts int, 83 init func(tc *testcluster.TestCluster), 84 params base.TestClusterArgs, 85 ) ( 86 ctx context.Context, 87 tc *testcluster.TestCluster, 88 sqlDB *sqlutils.SQLRunner, 89 tempDir string, 90 cleanup func(), 91 ) { 92 ctx = context.Background() 93 94 dir, dirCleanupFn := testutils.TempDir(t) 95 params.ServerArgs.ExternalIODir = dir 96 params.ServerArgs.UseDatabase = "data" 97 tc = testcluster.StartTestCluster(t, clusterSize, params) 98 init(tc) 99 100 const payloadSize = 100 101 splits := 10 102 if numAccounts == 0 { 103 splits = 0 104 } 105 bankData := bank.FromConfig(numAccounts, numAccounts, payloadSize, splits) 106 107 sqlDB = sqlutils.MakeSQLRunner(tc.Conns[0]) 108 sqlDB.Exec(t, `CREATE DATABASE data`) 109 l := workloadsql.InsertsDataLoader{BatchSize: 1000, Concurrency: 4} 110 if _, err := workloadsql.Setup(ctx, sqlDB.DB.(*gosql.DB), bankData, l); err != nil { 111 t.Fatalf("%+v", err) 112 } 113 114 if err := tc.WaitForFullReplication(); err != nil { 115 t.Fatal(err) 116 } 117 118 cleanupFn := func() { 119 tc.Stopper().Stop(ctx) // cleans up in memory storage's auxiliary dirs 120 dirCleanupFn() // cleans up dir, which is the nodelocal:// storage 121 } 122 123 return ctx, tc, sqlDB, dir, cleanupFn 124 } 125 126 func backupRestoreTestSetup( 127 t testing.TB, clusterSize int, numAccounts int, init func(*testcluster.TestCluster), 128 ) ( 129 ctx context.Context, 130 tc *testcluster.TestCluster, 131 sqlDB *sqlutils.SQLRunner, 132 tempDir string, 133 cleanup func(), 134 ) { 135 return backupRestoreTestSetupWithParams(t, clusterSize, numAccounts, init, base.TestClusterArgs{}) 136 } 137 138 func backupRestoreTestSetupEmpty( 139 t testing.TB, clusterSize int, tempDir string, init func(*testcluster.TestCluster), 140 ) (ctx context.Context, tc *testcluster.TestCluster, sqlDB *sqlutils.SQLRunner, cleanup func()) { 141 return backupRestoreTestSetupEmptyWithParams(t, clusterSize, tempDir, init, base.TestClusterArgs{}) 142 } 143 144 func verifyBackupRestoreStatementResult( 145 t *testing.T, sqlDB *sqlutils.SQLRunner, query string, args ...interface{}, 146 ) error { 147 t.Helper() 148 rows := sqlDB.Query(t, query, args...) 149 150 columns, err := rows.Columns() 151 if err != nil { 152 return err 153 } 154 if e, a := columns, []string{ 155 "job_id", "status", "fraction_completed", "rows", "index_entries", "bytes", 156 }; !reflect.DeepEqual(e, a) { 157 return errors.Errorf("unexpected columns:\n%s", strings.Join(pretty.Diff(e, a), "\n")) 158 } 159 160 type job struct { 161 id int64 162 status string 163 fractionCompleted float32 164 } 165 166 var expectedJob job 167 var actualJob job 168 var unused int64 169 170 if !rows.Next() { 171 return errors.New("zero rows in result") 172 } 173 if err := rows.Scan( 174 &actualJob.id, &actualJob.status, &actualJob.fractionCompleted, &unused, &unused, &unused, 175 ); err != nil { 176 return err 177 } 178 if rows.Next() { 179 return errors.New("more than one row in result") 180 } 181 182 sqlDB.QueryRow(t, 183 `SELECT job_id, status, fraction_completed FROM crdb_internal.jobs WHERE job_id = $1`, actualJob.id, 184 ).Scan( 185 &expectedJob.id, &expectedJob.status, &expectedJob.fractionCompleted, 186 ) 187 188 if e, a := expectedJob, actualJob; !reflect.DeepEqual(e, a) { 189 return errors.Errorf("result does not match system.jobs:\n%s", 190 strings.Join(pretty.Diff(e, a), "\n")) 191 } 192 193 return nil 194 } 195 196 func generateInterleavedData( 197 sqlDB *sqlutils.SQLRunner, t *testing.T, numAccounts int, 198 ) (int, []string) { 199 _ = sqlDB.Exec(t, `SET CLUSTER SETTING kv.range_merge.queue_enabled = false`) 200 // TODO(dan): The INTERLEAVE IN PARENT clause currently doesn't allow the 201 // `db.table` syntax. Fix that and use it here instead of `SET DATABASE`. 202 _ = sqlDB.Exec(t, `SET DATABASE = data`) 203 _ = sqlDB.Exec(t, `CREATE TABLE strpk (id string, v int, primary key (id, v)) PARTITION BY LIST (id) ( PARTITION ab VALUES IN (('a'), ('b')), PARTITION xy VALUES IN (('x'), ('y')) );`) 204 _ = sqlDB.Exec(t, `ALTER PARTITION ab OF TABLE strpk CONFIGURE ZONE USING gc.ttlseconds = 60`) 205 _ = sqlDB.Exec(t, `INSERT INTO strpk VALUES ('a', 1), ('a', 2), ('x', 100), ('y', 101)`) 206 const numStrPK = 4 207 _ = sqlDB.Exec(t, `CREATE TABLE strpkchild (a string, b int, c int, primary key (a, b, c)) INTERLEAVE IN PARENT strpk (a, b)`) 208 // i0 interleaves in parent with a, and has a multi-col PK of its own b, c 209 _ = sqlDB.Exec(t, `CREATE TABLE i0 (a INT, b INT, c INT, PRIMARY KEY (a, b, c)) INTERLEAVE IN PARENT bank (a)`) 210 // Split at at a _strict prefix_ of the cols in i_0's PK 211 _ = sqlDB.Exec(t, `ALTER TABLE i0 SPLIT AT VALUES (1, 1)`) 212 // i0_0 interleaves into i0. 213 _ = sqlDB.Exec(t, `CREATE TABLE i0_0 (a INT, b INT, c INT, d INT, PRIMARY KEY (a, b, c, d)) INTERLEAVE IN PARENT i0 (a, b, c)`) 214 _ = sqlDB.Exec(t, `CREATE TABLE i1 (a INT, b CHAR, PRIMARY KEY (a, b)) INTERLEAVE IN PARENT bank (a)`) 215 _ = sqlDB.Exec(t, `CREATE TABLE i2 (a INT, b CHAR, PRIMARY KEY (a, b)) INTERLEAVE IN PARENT bank (a)`) 216 // The bank table has numAccounts accounts, put 2x that in i0, 3x in i0_0, 217 // and 4x in i1. 218 totalRows := numAccounts + numStrPK 219 for i := 0; i < numAccounts; i++ { 220 _ = sqlDB.Exec(t, `INSERT INTO i0 VALUES ($1, 1, 1), ($1, 2, 2)`, i) 221 totalRows += 2 222 _ = sqlDB.Exec(t, `INSERT INTO i0_0 VALUES ($1, 1, 1, 1), ($1, 2, 2, 2), ($1, 3, 3, 3)`, i) 223 totalRows += 3 224 _ = sqlDB.Exec(t, `INSERT INTO i1 VALUES ($1, 'a'), ($1, 'b'), ($1, 'c'), ($1, 'd')`, i) 225 totalRows += 4 226 _ = sqlDB.Exec(t, `INSERT INTO i2 VALUES ($1, 'e'), ($1, 'f'), ($1, 'g'), ($1, 'h')`, i) 227 totalRows += 4 228 } 229 // Split some rows to attempt to exercise edge conditions in the key rewriter. 230 _ = sqlDB.Exec(t, `ALTER TABLE i0 SPLIT AT SELECT * from i0 where a % 2 = 0 LIMIT $1`, numAccounts) 231 _ = sqlDB.Exec(t, `ALTER TABLE i0_0 SPLIT AT SELECT * from i0_0 LIMIT $1`, numAccounts) 232 _ = sqlDB.Exec(t, `ALTER TABLE i1 SPLIT AT SELECT * from i1 WHERE a % 3 = 0`) 233 _ = sqlDB.Exec(t, `ALTER TABLE i2 SPLIT AT SELECT * from i2 WHERE a % 5 = 0`) 234 // Truncate will allocate a new ID for i1. At that point the splits we created 235 // above will still exist, but will contain the old table ID. Since the table 236 // does not exist anymore, it will not be in the backup, so the rewriting will 237 // not have a configured rewrite for that part of those splits, but we still 238 // expect RESTORE to succeed. 239 _ = sqlDB.Exec(t, `TRUNCATE i1`) 240 for i := 0; i < numAccounts; i++ { 241 _ = sqlDB.Exec(t, `INSERT INTO i1 VALUES ($1, 'a'), ($1, 'b'), ($1, 'c'), ($1, 'd')`, i) 242 } 243 tableNames := []string{ 244 "strpk", 245 "strpkchild", 246 "i0", 247 "i0_0", 248 "i1", 249 "i2", 250 } 251 return totalRows, tableNames 252 } 253 254 func TestBackupRestoreStatementResult(t *testing.T) { 255 defer leaktest.AfterTest(t)() 256 257 const numAccounts = 1 258 _, _, sqlDB, dir, cleanupFn := backupRestoreTestSetup(t, singleNode, numAccounts, initNone) 259 defer cleanupFn() 260 261 if err := verifyBackupRestoreStatementResult( 262 t, sqlDB, "BACKUP DATABASE data TO $1", localFoo, 263 ); err != nil { 264 t.Fatal(err) 265 } 266 // The GZipBackupManifest subtest is to verify that BackupManifest objects 267 // have been stored in the GZip compressed format. 268 t.Run("GZipBackupManifest", func(t *testing.T) { 269 backupDir := fmt.Sprintf("%s/foo", dir) 270 backupManifestFile := backupDir + "/" + backupccl.BackupManifestName 271 backupManifestBytes, err := ioutil.ReadFile(backupManifestFile) 272 if err != nil { 273 t.Fatal(err) 274 } 275 fileType := http.DetectContentType(backupManifestBytes) 276 require.Equal(t, backupccl.ZipType, fileType) 277 }) 278 279 sqlDB.Exec(t, "CREATE DATABASE data2") 280 281 if err := verifyBackupRestoreStatementResult( 282 t, sqlDB, "RESTORE data.* FROM $1 WITH OPTIONS ('into_db'='data2')", localFoo, 283 ); err != nil { 284 t.Fatal(err) 285 } 286 } 287 288 func TestBackupRestoreSingleNodeLocal(t *testing.T) { 289 defer leaktest.AfterTest(t)() 290 291 const numAccounts = 1000 292 ctx, tc, _, _, cleanupFn := backupRestoreTestSetup(t, singleNode, numAccounts, initNone) 293 defer cleanupFn() 294 295 backupAndRestore(ctx, t, tc, []string{localFoo}, []string{localFoo}, numAccounts) 296 } 297 298 func TestBackupRestoreMultiNodeLocal(t *testing.T) { 299 defer leaktest.AfterTest(t)() 300 301 const numAccounts = 1000 302 ctx, tc, _, _, cleanupFn := backupRestoreTestSetup(t, multiNode, numAccounts, initNone) 303 defer cleanupFn() 304 305 backupAndRestore(ctx, t, tc, []string{localFoo}, []string{localFoo}, numAccounts) 306 } 307 308 func TestBackupRestoreMultiNodeRemote(t *testing.T) { 309 defer leaktest.AfterTest(t)() 310 311 const numAccounts = 1000 312 ctx, tc, _, _, cleanupFn := backupRestoreTestSetup(t, multiNode, numAccounts, initNone) 313 defer cleanupFn() 314 // Backing up to node2's local file system 315 remoteFoo := "nodelocal://2/foo" 316 317 backupAndRestore(ctx, t, tc, []string{remoteFoo}, []string{localFoo}, numAccounts) 318 } 319 320 func TestBackupRestorePartitioned(t *testing.T) { 321 defer leaktest.AfterTest(t)() 322 323 const numAccounts = 1000 324 ctx, tc, sqlDB, dir, cleanupFn := backupRestoreTestSetup(t, multiNode, numAccounts, initNone) 325 defer cleanupFn() 326 327 // Ensure that each node has at least one leaseholder. (These splits were 328 // made in backupRestoreTestSetup.) These are wrapped with SucceedsSoon() 329 // because EXPERIMENTAL_RELOCATE can fail if there are other replication 330 // changes happening. 331 for _, stmt := range []string{ 332 `ALTER TABLE data.bank EXPERIMENTAL_RELOCATE VALUES (ARRAY[1], 0)`, 333 `ALTER TABLE data.bank EXPERIMENTAL_RELOCATE VALUES (ARRAY[2], 100)`, 334 `ALTER TABLE data.bank EXPERIMENTAL_RELOCATE VALUES (ARRAY[3], 200)`, 335 } { 336 testutils.SucceedsSoon(t, func() error { 337 _, err := sqlDB.DB.ExecContext(ctx, stmt) 338 return err 339 }) 340 } 341 const localFoo1 = localFoo + "/1" 342 const localFoo2 = localFoo + "/2" 343 const localFoo3 = localFoo + "/3" 344 backupURIs := []string{ 345 fmt.Sprintf("%s?COCKROACH_LOCALITY=%s", localFoo1, url.QueryEscape("default")), 346 fmt.Sprintf("%s?COCKROACH_LOCALITY=%s", localFoo2, url.QueryEscape("dc=dc1")), 347 fmt.Sprintf("%s?COCKROACH_LOCALITY=%s", localFoo3, url.QueryEscape("dc=dc2")), 348 } 349 restoreURIs := []string{ 350 localFoo1, 351 localFoo2, 352 localFoo3, 353 } 354 backupAndRestore(ctx, t, tc, backupURIs, restoreURIs, numAccounts) 355 356 // Verify that at least one SST exists in each backup destination. 357 sstMatcher := regexp.MustCompile(`\d+\.sst`) 358 for i := 1; i <= 3; i++ { 359 subDir := fmt.Sprintf("%s/foo/%d", dir, i) 360 files, err := ioutil.ReadDir(subDir) 361 if err != nil { 362 t.Fatal(err) 363 } 364 found := false 365 for _, f := range files { 366 if sstMatcher.MatchString(f.Name()) { 367 found = true 368 break 369 } 370 } 371 if !found { 372 t.Fatalf("no SSTs found in %s", subDir) 373 } 374 } 375 // The PartitionGZip subtest is to verify that partition descriptor files 376 // are in the GZip compressed format. 377 t.Run("PartitionGZip", func(t *testing.T) { 378 partitionMatcher := regexp.MustCompile(`^BACKUP_PART_`) 379 for i := 1; i <= 3; i++ { 380 subDir := fmt.Sprintf("%s/foo/%d", dir, i) 381 files, err := ioutil.ReadDir(subDir) 382 if err != nil { 383 t.Fatal(err) 384 } 385 for _, f := range files { 386 fName := f.Name() 387 if partitionMatcher.MatchString(fName) { 388 backupPartitionFile := subDir + "/" + fName 389 backupPartitionBytes, err := ioutil.ReadFile(backupPartitionFile) 390 if err != nil { 391 t.Fatal(err) 392 } 393 fileType := http.DetectContentType(backupPartitionBytes) 394 require.Equal(t, backupccl.ZipType, fileType) 395 } 396 } 397 } 398 }) 399 } 400 401 func TestBackupRestoreAppend(t *testing.T) { 402 defer leaktest.AfterTest(t)() 403 404 const numAccounts = 1000 405 ctx, _, sqlDB, _, cleanupFn := backupRestoreTestSetup(t, multiNode, numAccounts, initNone) 406 defer cleanupFn() 407 408 // Ensure that each node has at least one leaseholder. (These splits were 409 // made in backupRestoreTestSetup.) These are wrapped with SucceedsSoon() 410 // because EXPERIMENTAL_RELOCATE can fail if there are other replication 411 // changes happening. 412 for _, stmt := range []string{ 413 `ALTER TABLE data.bank EXPERIMENTAL_RELOCATE VALUES (ARRAY[1], 0)`, 414 `ALTER TABLE data.bank EXPERIMENTAL_RELOCATE VALUES (ARRAY[2], 100)`, 415 `ALTER TABLE data.bank EXPERIMENTAL_RELOCATE VALUES (ARRAY[3], 200)`, 416 } { 417 testutils.SucceedsSoon(t, func() error { 418 _, err := sqlDB.DB.ExecContext(ctx, stmt) 419 return err 420 }) 421 } 422 const localFoo1, localFoo2, localFoo3 = localFoo + "/1", localFoo + "/2", localFoo + "/3" 423 424 backups := []interface{}{ 425 fmt.Sprintf("%s?COCKROACH_LOCALITY=%s", localFoo1, url.QueryEscape("default")), 426 fmt.Sprintf("%s?COCKROACH_LOCALITY=%s", localFoo2, url.QueryEscape("dc=dc1")), 427 fmt.Sprintf("%s?COCKROACH_LOCALITY=%s", localFoo3, url.QueryEscape("dc=dc2")), 428 } 429 430 var tsBefore, ts1, ts2 string 431 sqlDB.QueryRow(t, "SELECT cluster_logical_timestamp()").Scan(&tsBefore) 432 433 sqlDB.Exec(t, "BACKUP TO ($1, $2, $3) AS OF SYSTEM TIME "+tsBefore, backups...) 434 435 sqlDB.QueryRow(t, "UPDATE data.bank SET balance = 100 RETURNING cluster_logical_timestamp()").Scan(&ts1) 436 sqlDB.Exec(t, "BACKUP TO ($1, $2, $3) AS OF SYSTEM TIME "+ts1, backups...) 437 438 sqlDB.QueryRow(t, "UPDATE data.bank SET balance = 200 RETURNING cluster_logical_timestamp()").Scan(&ts2) 439 rowsTS2 := sqlDB.QueryStr(t, "SELECT * from data.bank ORDER BY id") 440 sqlDB.Exec(t, "BACKUP TO ($1, $2, $3) AS OF SYSTEM TIME "+ts2, backups...) 441 442 sqlDB.Exec(t, "ALTER TABLE data.bank RENAME TO data.renamed") 443 sqlDB.Exec(t, "BACKUP TO ($1, $2, $3)", backups...) 444 445 sqlDB.ExpectErr(t, "cannot append a backup of specific", "BACKUP system.users TO ($1, $2, $3)", backups...) 446 447 sqlDB.Exec(t, "DROP DATABASE data CASCADE") 448 sqlDB.Exec(t, "RESTORE DATABASE data FROM ($1, $2, $3)", backups...) 449 sqlDB.ExpectErr(t, "relation \"data.bank\" does not exist", "SELECT * FROM data.bank ORDER BY id") 450 sqlDB.CheckQueryResults(t, "SELECT * from data.renamed ORDER BY id", rowsTS2) 451 452 // TODO(dt): test restoring to other backups via AOST. 453 } 454 455 func TestBackupRestorePartitionedMergeDirectories(t *testing.T) { 456 defer leaktest.AfterTest(t)() 457 458 const numAccounts = 1000 459 ctx, tc, _, _, cleanupFn := backupRestoreTestSetup(t, multiNode, numAccounts, initNone) 460 defer cleanupFn() 461 462 // TODO (lucy): This test writes a partitioned backup where all files are 463 // written to the same directory, which is similar to the case where a backup 464 // is created and then all files are consolidated into the same directory, but 465 // we should still have a separate test where the files are actually moved. 466 const localFoo1 = localFoo + "/1" 467 backupURIs := []string{ 468 fmt.Sprintf("%s?COCKROACH_LOCALITY=%s", localFoo1, url.QueryEscape("default")), 469 fmt.Sprintf("%s?COCKROACH_LOCALITY=%s", localFoo1, url.QueryEscape("dc=dc1")), 470 fmt.Sprintf("%s?COCKROACH_LOCALITY=%s", localFoo1, url.QueryEscape("dc=dc2")), 471 } 472 restoreURIs := []string{ 473 localFoo1, 474 } 475 backupAndRestore(ctx, t, tc, backupURIs, restoreURIs, numAccounts) 476 } 477 478 func TestBackupRestoreEmpty(t *testing.T) { 479 defer leaktest.AfterTest(t)() 480 481 const numAccounts = 0 482 ctx, tc, _, _, cleanupFn := backupRestoreTestSetup(t, singleNode, numAccounts, initNone) 483 defer cleanupFn() 484 485 backupAndRestore(ctx, t, tc, []string{localFoo}, []string{localFoo}, numAccounts) 486 } 487 488 // Regression test for #16008. In short, the way RESTORE constructed split keys 489 // for tables with negative primary key data caused AdminSplit to fail. 490 func TestBackupRestoreNegativePrimaryKey(t *testing.T) { 491 defer leaktest.AfterTest(t)() 492 493 const numAccounts = 1000 494 495 ctx, tc, sqlDB, _, cleanupFn := backupRestoreTestSetup(t, multiNode, numAccounts, initNone) 496 defer cleanupFn() 497 498 // Give half the accounts negative primary keys. 499 sqlDB.Exec(t, `UPDATE data.bank SET id = $1 - id WHERE id > $1`, numAccounts/2) 500 501 // Resplit that half of the table space. 502 sqlDB.Exec(t, 503 `ALTER TABLE data.bank SPLIT AT SELECT generate_series($1, 0, $2)`, 504 -numAccounts/2, numAccounts/backupRestoreDefaultRanges/2, 505 ) 506 507 backupAndRestore(ctx, t, tc, []string{localFoo}, []string{localFoo}, numAccounts) 508 509 sqlDB.Exec(t, `CREATE UNIQUE INDEX id2 ON data.bank (id)`) 510 sqlDB.Exec(t, `ALTER TABLE data.bank ALTER PRIMARY KEY USING COLUMNS(id)`) 511 512 var unused string 513 var exportedRows, exportedIndexEntries int 514 sqlDB.QueryRow(t, `BACKUP DATABASE data TO $1`, localFoo+"/alteredPK").Scan( 515 &unused, &unused, &unused, &exportedRows, &exportedIndexEntries, &unused, 516 ) 517 if exportedRows != numAccounts { 518 t.Fatalf("expected %d rows, got %d", numAccounts, exportedRows) 519 } 520 expectedIndexEntries := numAccounts * 3 // old PK, new and old secondary idx 521 if exportedIndexEntries != expectedIndexEntries { 522 t.Fatalf("expected %d index entries, got %d", expectedIndexEntries, exportedIndexEntries) 523 } 524 525 } 526 527 func backupAndRestore( 528 ctx context.Context, 529 t *testing.T, 530 tc *testcluster.TestCluster, 531 backupURIs []string, 532 restoreURIs []string, 533 numAccounts int, 534 ) { 535 // uriFmtStringAndArgs returns format strings like "$1" or "($1, $2, $3)" and 536 // an []interface{} of URIs for the BACKUP/RESTORE queries. 537 uriFmtStringAndArgs := func(uris []string) (string, []interface{}) { 538 urisForFormat := make([]interface{}, len(uris)) 539 var fmtString strings.Builder 540 if len(uris) > 1 { 541 fmtString.WriteString("(") 542 } 543 for i, uri := range uris { 544 if i > 0 { 545 fmtString.WriteString(", ") 546 } 547 fmtString.WriteString(fmt.Sprintf("$%d", i+1)) 548 urisForFormat[i] = uri 549 } 550 if len(uris) > 1 { 551 fmtString.WriteString(")") 552 } 553 return fmtString.String(), urisForFormat 554 } 555 556 conn := tc.Conns[0] 557 sqlDB := sqlutils.MakeSQLRunner(conn) 558 { 559 sqlDB.Exec(t, `CREATE INDEX balance_idx ON data.bank (balance)`) 560 testutils.SucceedsSoon(t, func() error { 561 var unused string 562 var createTable string 563 sqlDB.QueryRow(t, `SHOW CREATE TABLE data.bank`).Scan(&unused, &createTable) 564 if !strings.Contains(createTable, "balance_idx") { 565 return errors.New("expected a balance_idx index") 566 } 567 return nil 568 }) 569 570 var unused string 571 var exported struct { 572 rows, idx, bytes int64 573 } 574 575 backupURIFmtString, backupURIArgs := uriFmtStringAndArgs(backupURIs) 576 backupQuery := fmt.Sprintf("BACKUP DATABASE data TO %s", backupURIFmtString) 577 sqlDB.QueryRow(t, backupQuery, backupURIArgs...).Scan( 578 &unused, &unused, &unused, &exported.rows, &exported.idx, &exported.bytes, 579 ) 580 // When numAccounts == 0, our approxBytes formula breaks down because 581 // backups of no data still contain the system.users and system.descriptor 582 // tables. Just skip the check in this case. 583 if numAccounts > 0 { 584 approxBytes := int64(backupRestoreRowPayloadSize * numAccounts) 585 if max := approxBytes * 3; exported.bytes < approxBytes || exported.bytes > max { 586 t.Errorf("expected data size in [%d,%d] but was %d", approxBytes, max, exported.bytes) 587 } 588 } 589 if expected := int64(numAccounts * 1); exported.rows != expected { 590 t.Fatalf("expected %d rows for %d accounts, got %d", expected, numAccounts, exported.rows) 591 } 592 593 found := false 594 const stmt = "SELECT payload FROM system.jobs ORDER BY created DESC LIMIT 10" 595 for rows := sqlDB.Query(t, stmt); rows.Next(); { 596 var payloadBytes []byte 597 if err := rows.Scan(&payloadBytes); err != nil { 598 t.Fatal(err) 599 } 600 601 payload := &jobspb.Payload{} 602 if err := protoutil.Unmarshal(payloadBytes, payload); err != nil { 603 t.Fatal("cannot unmarshal job payload from system.jobs") 604 } 605 606 backupManifest := &backupccl.BackupManifest{} 607 backupPayload, ok := payload.Details.(*jobspb.Payload_Backup) 608 if !ok { 609 t.Logf("job %T is not a backup: %v", payload.Details, payload.Details) 610 continue 611 } 612 backupDetails := backupPayload.Backup 613 found = true 614 if err := protoutil.Unmarshal(backupDetails.BackupManifest, backupManifest); err != nil { 615 t.Fatal("cannot unmarshal backup descriptor from job payload from system.jobs") 616 } 617 if backupManifest.Statistics != nil { 618 t.Fatal("expected statistics field of backup descriptor payload to be nil") 619 } 620 } 621 if !found { 622 t.Fatal("scanned job rows did not contain a backup!") 623 } 624 } 625 626 // Start a new cluster to restore into. 627 { 628 // If the backup is on nodelocal, we need to determine which node it's on. 629 // Othewise, default to 0. 630 backupNodeID := 0 631 uri, err := url.Parse(backupURIs[0]) 632 if err != nil { 633 t.Fatal(err) 634 } 635 if uri.Scheme == "nodelocal" && uri.Host != "" { 636 // If the backup is on nodelocal and has specified a host, expect it to 637 // be an integer. 638 var err error 639 backupNodeID, err = strconv.Atoi(uri.Host) 640 if err != nil { 641 t.Fatal(err) 642 } 643 } 644 args := base.TestServerArgs{ExternalIODir: tc.Servers[backupNodeID].ClusterSettings().ExternalIODir} 645 tcRestore := testcluster.StartTestCluster(t, singleNode, base.TestClusterArgs{ServerArgs: args}) 646 defer tcRestore.Stopper().Stop(ctx) 647 sqlDBRestore := sqlutils.MakeSQLRunner(tcRestore.Conns[0]) 648 649 // Create some other descriptors to change up IDs 650 sqlDBRestore.Exec(t, `CREATE DATABASE other`) 651 // Force the ID of the restored bank table to be different. 652 sqlDBRestore.Exec(t, `CREATE TABLE other.empty (a INT PRIMARY KEY)`) 653 654 var unused string 655 var restored struct { 656 rows, idx, bytes int64 657 } 658 659 restoreURIFmtString, restoreURIArgs := uriFmtStringAndArgs(restoreURIs) 660 restoreQuery := fmt.Sprintf("RESTORE DATABASE DATA FROM %s", restoreURIFmtString) 661 sqlDBRestore.QueryRow(t, restoreQuery, restoreURIArgs...).Scan( 662 &unused, &unused, &unused, &restored.rows, &restored.idx, &restored.bytes, 663 ) 664 approxBytes := int64(backupRestoreRowPayloadSize * numAccounts) 665 if max := approxBytes * 3; restored.bytes < approxBytes || restored.bytes > max { 666 t.Errorf("expected data size in [%d,%d] but was %d", approxBytes, max, restored.bytes) 667 } 668 if expected := int64(numAccounts); restored.rows != expected { 669 t.Fatalf("expected %d rows for %d accounts, got %d", expected, numAccounts, restored.rows) 670 } 671 if expected := int64(numAccounts); restored.idx != expected { 672 t.Fatalf("expected %d idx rows for %d accounts, got %d", expected, numAccounts, restored.idx) 673 } 674 675 var rowCount int64 676 sqlDBRestore.QueryRow(t, `SELECT count(*) FROM data.bank`).Scan(&rowCount) 677 if rowCount != int64(numAccounts) { 678 t.Fatalf("expected %d rows but found %d", numAccounts, rowCount) 679 } 680 681 sqlDBRestore.QueryRow(t, `SELECT count(*) FROM data.bank@balance_idx`).Scan(&rowCount) 682 if rowCount != int64(numAccounts) { 683 t.Fatalf("expected %d rows but found %d", numAccounts, rowCount) 684 } 685 686 // Verify there's no /Table/51 - /Table/51/1 empty span. 687 { 688 var count int 689 sqlDBRestore.QueryRow(t, ` 690 SELECT count(*) FROM crdb_internal.ranges 691 WHERE start_pretty = ( 692 ('/Table/' || 693 (SELECT table_id FROM crdb_internal.tables 694 WHERE database_name = $1 AND name = $2 695 )::STRING) || 696 '/1' 697 ) 698 `, "data", "bank").Scan(&count) 699 if count != 0 { 700 t.Fatal("unexpected span start at primary index") 701 } 702 } 703 } 704 } 705 706 func TestBackupRestoreSystemTables(t *testing.T) { 707 defer leaktest.AfterTest(t)() 708 709 const numAccounts = 0 710 ctx, _, sqlDB, _, cleanupFn := backupRestoreTestSetup(t, multiNode, numAccounts, initNone) 711 conn := sqlDB.DB.(*gosql.DB) 712 defer cleanupFn() 713 714 // At the time this test was written, these were the only system tables that 715 // were reasonable for a user to backup and restore into another cluster. 716 tables := []string{"locations", "role_members", "users", "zones"} 717 tableSpec := "system." + strings.Join(tables, ", system.") 718 719 // Take a consistent fingerprint of the original tables. 720 var backupAsOf string 721 expectedFingerprints := map[string][][]string{} 722 err := crdb.ExecuteTx(ctx, conn, nil /* txopts */, func(tx *gosql.Tx) error { 723 for _, table := range tables { 724 rows, err := conn.Query("SHOW EXPERIMENTAL_FINGERPRINTS FROM TABLE system." + table) 725 if err != nil { 726 return err 727 } 728 defer rows.Close() 729 expectedFingerprints[table], err = sqlutils.RowsToStrMatrix(rows) 730 if err != nil { 731 return err 732 } 733 } 734 // Record the transaction's timestamp so we can take a backup at the 735 // same time. 736 return conn.QueryRow("SELECT cluster_logical_timestamp()").Scan(&backupAsOf) 737 }) 738 if err != nil { 739 t.Fatal(err) 740 } 741 742 // Backup and restore the tables into a new database. 743 sqlDB.Exec(t, `CREATE DATABASE system_new`) 744 sqlDB.Exec(t, fmt.Sprintf(`BACKUP %s TO '%s' AS OF SYSTEM TIME %s`, tableSpec, localFoo, backupAsOf)) 745 sqlDB.Exec(t, fmt.Sprintf(`RESTORE %s FROM '%s' WITH into_db='system_new'`, tableSpec, localFoo)) 746 747 // Verify the fingerprints match. 748 for _, table := range tables { 749 a := sqlDB.QueryStr(t, "SHOW EXPERIMENTAL_FINGERPRINTS FROM TABLE system_new."+table) 750 if e := expectedFingerprints[table]; !reflect.DeepEqual(e, a) { 751 t.Fatalf("fingerprints between system.%[1]s and system_new.%[1]s did not match:%s\n", 752 table, strings.Join(pretty.Diff(e, a), "\n")) 753 } 754 } 755 756 // Verify we can't shoot ourselves in the foot by accidentally restoring 757 // directly over the existing system tables. 758 sqlDB.ExpectErr( 759 t, `relation ".+" already exists`, 760 fmt.Sprintf(`RESTORE %s FROM '%s'`, tableSpec, localFoo), 761 ) 762 } 763 764 func TestBackupRestoreSystemJobs(t *testing.T) { 765 defer leaktest.AfterTest(t)() 766 767 const numAccounts = 0 768 _, _, sqlDB, _, cleanupFn := backupRestoreTestSetup(t, multiNode, numAccounts, initNone) 769 conn := sqlDB.DB.(*gosql.DB) 770 defer cleanupFn() 771 772 sanitizedIncDir := localFoo + "/inc?AWS_SESSION_TOKEN=" 773 incDir := sanitizedIncDir + "secretCredentialsHere" 774 775 sanitizedFullDir := localFoo + "/full?AWS_SESSION_TOKEN=" 776 fullDir := sanitizedFullDir + "moarSecretsHere" 777 778 backupDatabaseID := sqlutils.QueryDatabaseID(t, conn, "data") 779 backupTableID := sqlutils.QueryTableID(t, conn, "data", "public", "bank") 780 781 sqlDB.Exec(t, `CREATE DATABASE restoredb`) 782 restoreDatabaseID := sqlutils.QueryDatabaseID(t, conn, "restoredb") 783 784 // We create a full backup so that, below, we can test that incremental 785 // backups sanitize credentials in "INCREMENTAL FROM" URLs. 786 // 787 // NB: We don't bother making assertions about this full backup since there 788 // are no meaningful differences in how full and incremental backups report 789 // status to the system.jobs table. Since the incremental BACKUP syntax is a 790 // superset of the full BACKUP syntax, we'll cover everything by verifying the 791 // incremental backup below. 792 sqlDB.Exec(t, `BACKUP DATABASE data TO $1`, fullDir) 793 sqlDB.Exec(t, `SET DATABASE = data`) 794 795 sqlDB.Exec(t, `BACKUP TABLE bank TO $1 INCREMENTAL FROM $2`, incDir, fullDir) 796 if err := jobutils.VerifySystemJob(t, sqlDB, 1, jobspb.TypeBackup, jobs.StatusSucceeded, jobs.Record{ 797 Username: security.RootUser, 798 Description: fmt.Sprintf( 799 `BACKUP TABLE bank TO '%s' INCREMENTAL FROM '%s'`, 800 sanitizedIncDir+"redacted", sanitizedFullDir+"redacted", 801 ), 802 DescriptorIDs: sqlbase.IDs{ 803 sqlbase.ID(backupDatabaseID), 804 sqlbase.ID(backupTableID), 805 }, 806 }); err != nil { 807 t.Fatal(err) 808 } 809 810 sqlDB.Exec(t, `RESTORE TABLE bank FROM $1, $2 WITH OPTIONS ('into_db'='restoredb')`, fullDir, incDir) 811 if err := jobutils.VerifySystemJob(t, sqlDB, 0, jobspb.TypeRestore, jobs.StatusSucceeded, jobs.Record{ 812 Username: security.RootUser, 813 Description: fmt.Sprintf( 814 `RESTORE TABLE bank FROM '%s', '%s' WITH into_db = 'restoredb'`, 815 sanitizedFullDir+"redacted", sanitizedIncDir+"redacted", 816 ), 817 DescriptorIDs: sqlbase.IDs{ 818 sqlbase.ID(restoreDatabaseID + 1), 819 }, 820 }); err != nil { 821 t.Fatal(err) 822 } 823 } 824 825 type inProgressChecker func(context context.Context, ip inProgressState) error 826 827 // inProgressState holds state about an in-progress backup or restore 828 // for use in inProgressCheckers. 829 type inProgressState struct { 830 *gosql.DB 831 backupTableID uint32 832 dir, name string 833 } 834 835 func (ip inProgressState) latestJobID() (int64, error) { 836 var id int64 837 if err := ip.QueryRow( 838 `SELECT job_id FROM crdb_internal.jobs ORDER BY created DESC LIMIT 1`, 839 ).Scan(&id); err != nil { 840 return 0, err 841 } 842 return id, nil 843 } 844 845 // checkInProgressBackupRestore will run a backup and restore, pausing each 846 // approximately halfway through to run either `checkBackup` or `checkRestore`. 847 func checkInProgressBackupRestore( 848 t testing.TB, checkBackup inProgressChecker, checkRestore inProgressChecker, 849 ) { 850 // To test incremental progress updates, we install a store response filter, 851 // which runs immediately before a KV command returns its response, in our 852 // test cluster. Whenever we see an Export or Import response, we do a 853 // blocking read on the allowResponse channel to give the test a chance to 854 // assert the progress of the job. 855 var allowResponse chan struct{} 856 params := base.TestClusterArgs{} 857 params.ServerArgs.Knobs.Store = &kvserver.StoreTestingKnobs{ 858 TestingResponseFilter: func(ctx context.Context, ba roachpb.BatchRequest, br *roachpb.BatchResponse) *roachpb.Error { 859 for _, ru := range br.Responses { 860 switch ru.GetInner().(type) { 861 case *roachpb.ExportResponse, *roachpb.ImportResponse: 862 <-allowResponse 863 } 864 } 865 return nil 866 }, 867 } 868 869 const numAccounts = 1000 870 const totalExpectedResponses = backupRestoreDefaultRanges 871 872 ctx, _, sqlDB, dir, cleanup := backupRestoreTestSetupWithParams(t, multiNode, numAccounts, initNone, params) 873 conn := sqlDB.DB.(*gosql.DB) 874 defer cleanup() 875 876 sqlDB.Exec(t, `CREATE DATABASE restoredb`) 877 878 backupTableID := sqlutils.QueryTableID(t, conn, "data", "public", "bank") 879 880 do := func(query string, check inProgressChecker) { 881 jobDone := make(chan error) 882 allowResponse = make(chan struct{}, totalExpectedResponses) 883 884 go func() { 885 _, err := conn.Exec(query, localFoo) 886 jobDone <- err 887 }() 888 889 // Allow half the total expected responses to proceed. 890 for i := 0; i < totalExpectedResponses/2; i++ { 891 allowResponse <- struct{}{} 892 } 893 894 err := retry.ForDuration(testutils.DefaultSucceedsSoonDuration, func() error { 895 return check(ctx, inProgressState{ 896 DB: conn, 897 backupTableID: backupTableID, 898 dir: dir, 899 name: "foo", 900 }) 901 }) 902 903 // Close the channel to allow all remaining responses to proceed. We do this 904 // even if the above retry.ForDuration failed, otherwise the test will hang 905 // forever. 906 close(allowResponse) 907 908 if err := <-jobDone; err != nil { 909 t.Fatalf("%q: %+v", query, err) 910 } 911 912 if err != nil { 913 t.Fatal(err) 914 } 915 } 916 917 do(`BACKUP DATABASE data TO $1`, checkBackup) 918 do(`RESTORE data.* FROM $1 WITH OPTIONS ('into_db'='restoredb')`, checkRestore) 919 } 920 921 func TestBackupRestoreSystemJobsProgress(t *testing.T) { 922 defer leaktest.AfterTest(t)() 923 defer jobs.TestingSetProgressThresholds()() 924 925 checkFraction := func(ctx context.Context, ip inProgressState) error { 926 jobID, err := ip.latestJobID() 927 if err != nil { 928 return err 929 } 930 var fractionCompleted float32 931 if err := ip.QueryRow( 932 `SELECT fraction_completed FROM crdb_internal.jobs WHERE job_id = $1`, 933 jobID, 934 ).Scan(&fractionCompleted); err != nil { 935 return err 936 } 937 if fractionCompleted < 0.25 || fractionCompleted > 0.75 { 938 return errors.Errorf( 939 "expected progress to be in range [0.25, 0.75] but got %f", 940 fractionCompleted, 941 ) 942 } 943 return nil 944 } 945 946 checkInProgressBackupRestore(t, checkFraction, checkFraction) 947 } 948 949 func TestBackupRestoreCheckpointing(t *testing.T) { 950 defer leaktest.AfterTest(t)() 951 952 t.Skip("https://github.com/cockroachdb/cockroach/issues/33357") 953 954 defer func(oldInterval time.Duration) { 955 backupccl.BackupCheckpointInterval = oldInterval 956 }(backupccl.BackupCheckpointInterval) 957 backupccl.BackupCheckpointInterval = 0 958 959 var checkpointPath string 960 961 checkBackup := func(ctx context.Context, ip inProgressState) error { 962 checkpointPath = filepath.Join(ip.dir, ip.name, backupccl.BackupManifestCheckpointName) 963 checkpointDescBytes, err := ioutil.ReadFile(checkpointPath) 964 if err != nil { 965 return errors.Errorf("%+v", err) 966 } 967 var checkpointDesc backupccl.BackupManifest 968 if err := protoutil.Unmarshal(checkpointDescBytes, &checkpointDesc); err != nil { 969 return errors.Errorf("%+v", err) 970 } 971 if len(checkpointDesc.Files) == 0 { 972 return errors.Errorf("empty backup checkpoint descriptor") 973 } 974 return nil 975 } 976 977 checkRestore := func(ctx context.Context, ip inProgressState) error { 978 jobID, err := ip.latestJobID() 979 if err != nil { 980 return err 981 } 982 highWaterMark, err := getHighWaterMark(jobID, ip.DB) 983 if err != nil { 984 return err 985 } 986 low := keys.SystemSQLCodec.TablePrefix(ip.backupTableID) 987 high := keys.SystemSQLCodec.TablePrefix(ip.backupTableID + 1) 988 if bytes.Compare(highWaterMark, low) <= 0 || bytes.Compare(highWaterMark, high) >= 0 { 989 return errors.Errorf("expected high-water mark %v to be between %v and %v", 990 highWaterMark, low, high) 991 } 992 return nil 993 } 994 995 checkInProgressBackupRestore(t, checkBackup, checkRestore) 996 997 if _, err := os.Stat(checkpointPath); err == nil { 998 t.Fatalf("backup checkpoint descriptor at %s not cleaned up", checkpointPath) 999 } else if !os.IsNotExist(err) { 1000 t.Fatal(err) 1001 } 1002 } 1003 1004 func createAndWaitForJob( 1005 t *testing.T, 1006 db *sqlutils.SQLRunner, 1007 descriptorIDs []sqlbase.ID, 1008 details jobspb.Details, 1009 progress jobspb.ProgressDetails, 1010 ) { 1011 t.Helper() 1012 now := timeutil.ToUnixMicros(timeutil.Now()) 1013 payload, err := protoutil.Marshal(&jobspb.Payload{ 1014 Username: security.RootUser, 1015 DescriptorIDs: descriptorIDs, 1016 StartedMicros: now, 1017 Details: jobspb.WrapPayloadDetails(details), 1018 Lease: &jobspb.Lease{NodeID: 1}, 1019 }) 1020 if err != nil { 1021 t.Fatal(err) 1022 } 1023 1024 progressBytes, err := protoutil.Marshal(&jobspb.Progress{ 1025 ModifiedMicros: now, 1026 Details: jobspb.WrapProgressDetails(progress), 1027 }) 1028 if err != nil { 1029 t.Fatal(err) 1030 } 1031 1032 var jobID int64 1033 db.QueryRow( 1034 t, `INSERT INTO system.jobs (created, status, payload, progress) VALUES ($1, $2, $3, $4) RETURNING id`, 1035 timeutil.FromUnixMicros(now), jobs.StatusRunning, payload, progressBytes, 1036 ).Scan(&jobID) 1037 jobutils.WaitForJob(t, db, jobID) 1038 } 1039 1040 // TestBackupRestoreResume tests whether backup and restore jobs are properly 1041 // resumed after a coordinator failure. It synthesizes a partially-complete 1042 // backup job and a partially-complete restore job, both with expired leases, by 1043 // writing checkpoints directly to system.jobs, then verifies they are resumed 1044 // and successfully completed within a few seconds. The test additionally 1045 // verifies that backup and restore do not re-perform work the checkpoint claims 1046 // to have completed. 1047 func TestBackupRestoreResume(t *testing.T) { 1048 defer leaktest.AfterTest(t)() 1049 1050 defer func(oldInterval time.Duration) { 1051 jobs.DefaultAdoptInterval = oldInterval 1052 }(jobs.DefaultAdoptInterval) 1053 jobs.DefaultAdoptInterval = 100 * time.Millisecond 1054 1055 ctx := context.Background() 1056 1057 const numAccounts = 1000 1058 _, tc, outerDB, dir, cleanupFn := backupRestoreTestSetup(t, multiNode, numAccounts, initNone) 1059 defer cleanupFn() 1060 1061 backupTableDesc := sqlbase.GetTableDescriptor(tc.Servers[0].DB(), keys.SystemSQLCodec, "data", "bank") 1062 1063 t.Run("backup", func(t *testing.T) { 1064 sqlDB := sqlutils.MakeSQLRunner(outerDB.DB) 1065 backupStartKey := backupTableDesc.PrimaryIndexSpan(keys.SystemSQLCodec).Key 1066 backupEndKey, err := sqlbase.TestingMakePrimaryIndexKey(backupTableDesc, numAccounts/2) 1067 if err != nil { 1068 t.Fatal(err) 1069 } 1070 backupCompletedSpan := roachpb.Span{Key: backupStartKey, EndKey: backupEndKey} 1071 mockManifest, err := protoutil.Marshal(&backupccl.BackupManifest{ 1072 ClusterID: tc.Servers[0].ClusterID(), 1073 Files: []backupccl.BackupManifest_File{ 1074 {Path: "garbage-checkpoint", Span: backupCompletedSpan}, 1075 }, 1076 }) 1077 if err != nil { 1078 t.Fatal(err) 1079 } 1080 backupDir := dir + "/backup" 1081 if err := os.MkdirAll(backupDir, 0755); err != nil { 1082 t.Fatal(err) 1083 } 1084 checkpointFile := backupDir + "/" + backupccl.BackupManifestCheckpointName 1085 if err := ioutil.WriteFile(checkpointFile, mockManifest, 0644); err != nil { 1086 t.Fatal(err) 1087 } 1088 createAndWaitForJob( 1089 t, sqlDB, []sqlbase.ID{backupTableDesc.ID}, 1090 jobspb.BackupDetails{ 1091 EndTime: tc.Servers[0].Clock().Now(), 1092 URI: "nodelocal://0/backup", 1093 BackupManifest: mockManifest, 1094 }, 1095 jobspb.BackupProgress{}, 1096 ) 1097 1098 // If the backup properly took the (incorrect) checkpoint into account, it 1099 // won't have tried to re-export any keys within backupCompletedSpan. 1100 backupManifestFile := backupDir + "/" + backupccl.BackupManifestName 1101 backupManifestBytes, err := ioutil.ReadFile(backupManifestFile) 1102 if err != nil { 1103 t.Fatal(err) 1104 } 1105 fileType := http.DetectContentType(backupManifestBytes) 1106 if fileType == backupccl.ZipType { 1107 backupManifestBytes, err = backupccl.DecompressData(backupManifestBytes) 1108 require.NoError(t, err) 1109 } 1110 var backupManifest backupccl.BackupManifest 1111 if err := protoutil.Unmarshal(backupManifestBytes, &backupManifest); err != nil { 1112 t.Fatal(err) 1113 } 1114 for _, file := range backupManifest.Files { 1115 if file.Span.Overlaps(backupCompletedSpan) && file.Path != "garbage-checkpoint" { 1116 t.Fatalf("backup re-exported checkpointed span %s", file.Span) 1117 } 1118 } 1119 }) 1120 1121 t.Run("restore", func(t *testing.T) { 1122 sqlDB := sqlutils.MakeSQLRunner(outerDB.DB) 1123 restoreDir := "nodelocal://0/restore" 1124 sqlDB.Exec(t, `BACKUP DATABASE DATA TO $1`, restoreDir) 1125 sqlDB.Exec(t, `CREATE DATABASE restoredb`) 1126 restoreDatabaseID := sqlutils.QueryDatabaseID(t, sqlDB.DB, "restoredb") 1127 restoreTableID, err := catalogkv.GenerateUniqueDescID(ctx, tc.Servers[0].DB(), keys.SystemSQLCodec) 1128 if err != nil { 1129 t.Fatal(err) 1130 } 1131 restoreHighWaterMark, err := sqlbase.TestingMakePrimaryIndexKey(backupTableDesc, numAccounts/2) 1132 if err != nil { 1133 t.Fatal(err) 1134 } 1135 createAndWaitForJob( 1136 t, sqlDB, []sqlbase.ID{restoreTableID}, 1137 jobspb.RestoreDetails{ 1138 TableRewrites: map[sqlbase.ID]*jobspb.RestoreDetails_TableRewrite{ 1139 backupTableDesc.ID: { 1140 ParentID: sqlbase.ID(restoreDatabaseID), 1141 TableID: restoreTableID, 1142 }, 1143 }, 1144 URIs: []string{restoreDir}, 1145 }, 1146 jobspb.RestoreProgress{ 1147 HighWater: restoreHighWaterMark, 1148 }, 1149 ) 1150 // If the restore properly took the (incorrect) low-water mark into account, 1151 // the first half of the table will be missing. 1152 var restoredCount int64 1153 sqlDB.QueryRow(t, `SELECT count(*) FROM restoredb.bank`).Scan(&restoredCount) 1154 if e, a := int64(numAccounts)/2, restoredCount; e != a { 1155 t.Fatalf("expected %d restored rows, but got %d\n", e, a) 1156 } 1157 sqlDB.Exec(t, `DELETE FROM data.bank WHERE id < $1`, numAccounts/2) 1158 sqlDB.CheckQueryResults(t, 1159 `SHOW EXPERIMENTAL_FINGERPRINTS FROM TABLE restoredb.bank`, 1160 sqlDB.QueryStr(t, `SHOW EXPERIMENTAL_FINGERPRINTS FROM TABLE data.bank`), 1161 ) 1162 }) 1163 } 1164 1165 func getHighWaterMark(jobID int64, sqlDB *gosql.DB) (roachpb.Key, error) { 1166 var progressBytes []byte 1167 if err := sqlDB.QueryRow( 1168 `SELECT progress FROM system.jobs WHERE id = $1`, jobID, 1169 ).Scan(&progressBytes); err != nil { 1170 return nil, err 1171 } 1172 var payload jobspb.Progress 1173 if err := protoutil.Unmarshal(progressBytes, &payload); err != nil { 1174 return nil, err 1175 } 1176 switch d := payload.Details.(type) { 1177 case *jobspb.Progress_Restore: 1178 return d.Restore.HighWater, nil 1179 default: 1180 return nil, errors.Errorf("unexpected job details type %T", d) 1181 } 1182 } 1183 1184 // TestBackupRestoreControlJob tests that PAUSE JOB, RESUME JOB, and CANCEL JOB 1185 // work as intended on backup and restore jobs. 1186 func TestBackupRestoreControlJob(t *testing.T) { 1187 defer leaktest.AfterTest(t)() 1188 t.Skip("#24136") 1189 1190 // force every call to update 1191 defer jobs.TestingSetProgressThresholds()() 1192 1193 defer func(oldInterval time.Duration) { 1194 jobs.DefaultAdoptInterval = oldInterval 1195 }(jobs.DefaultAdoptInterval) 1196 jobs.DefaultAdoptInterval = 100 * time.Millisecond 1197 1198 serverArgs := base.TestServerArgs{} 1199 // Disable external processing of mutations so that the final check of 1200 // crdb_internal.tables is guaranteed to not be cleaned up. Although this 1201 // was never observed by a stress test, it is here for safety. 1202 serverArgs.Knobs.SQLSchemaChanger = &sql.SchemaChangerTestingKnobs{ 1203 // TODO (lucy): if/when this test gets reinstated, figure out what knobs are 1204 // needed. 1205 } 1206 1207 // PAUSE JOB and CANCEL JOB are racy in that it's hard to guarantee that the 1208 // job is still running when executing a PAUSE or CANCEL--or that the job has 1209 // even started running. To synchronize, we install a store response filter 1210 // which does a blocking receive whenever it encounters an export or import 1211 // response. Below, when we want to guarantee the job is in progress, we do 1212 // exactly one blocking send. When this send completes, we know the job has 1213 // started, as we've seen one export or import response. We also know the job 1214 // has not finished, because we're blocking all future export and import 1215 // responses until we close the channel, and our backup or restore is large 1216 // enough that it will generate more than one export or import response. 1217 var allowResponse chan struct{} 1218 params := base.TestClusterArgs{ServerArgs: serverArgs} 1219 params.ServerArgs.Knobs.Store = &kvserver.StoreTestingKnobs{ 1220 TestingResponseFilter: jobutils.BulkOpResponseFilter(&allowResponse), 1221 } 1222 1223 // We need lots of ranges to see what happens when they get chunked. Rather 1224 // than make a huge table, dial down the zone config for the bank table. 1225 init := func(tc *testcluster.TestCluster) { 1226 config.TestingSetupZoneConfigHook(tc.Stopper()) 1227 v, err := tc.Servers[0].DB().Get(context.Background(), keys.SystemSQLCodec.DescIDSequenceKey()) 1228 if err != nil { 1229 t.Fatal(err) 1230 } 1231 last := uint32(v.ValueInt()) 1232 zoneConfig := zonepb.DefaultZoneConfig() 1233 zoneConfig.RangeMaxBytes = proto.Int64(5000) 1234 config.TestingSetZoneConfig(last+1, zoneConfig) 1235 } 1236 const numAccounts = 1000 1237 _, _, outerDB, _, cleanup := backupRestoreTestSetupWithParams(t, multiNode, numAccounts, init, params) 1238 defer cleanup() 1239 1240 sqlDB := sqlutils.MakeSQLRunner(outerDB.DB) 1241 1242 t.Run("foreign", func(t *testing.T) { 1243 foreignDir := "nodelocal://0/foreign" 1244 sqlDB.Exec(t, `CREATE DATABASE orig_fkdb`) 1245 sqlDB.Exec(t, `CREATE DATABASE restore_fkdb`) 1246 sqlDB.Exec(t, `CREATE TABLE orig_fkdb.fk (i INT REFERENCES data.bank)`) 1247 // Generate some FK data with splits so backup/restore block correctly. 1248 for i := 0; i < 10; i++ { 1249 sqlDB.Exec(t, `INSERT INTO orig_fkdb.fk (i) VALUES ($1)`, i) 1250 sqlDB.Exec(t, `ALTER TABLE orig_fkdb.fk SPLIT AT VALUES ($1)`, i) 1251 } 1252 1253 for i, query := range []string{ 1254 `BACKUP TABLE orig_fkdb.fk TO $1`, 1255 `RESTORE TABLE orig_fkdb.fk FROM $1 WITH OPTIONS ('skip_missing_foreign_keys', 'into_db'='restore_fkdb')`, 1256 } { 1257 jobID, err := jobutils.RunJob(t, sqlDB, &allowResponse, []string{"PAUSE"}, query, foreignDir) 1258 if !testutils.IsError(err, "job paused") { 1259 t.Fatalf("%d: expected 'job paused' error, but got %+v", i, err) 1260 } 1261 sqlDB.Exec(t, fmt.Sprintf(`RESUME JOB %d`, jobID)) 1262 jobutils.WaitForJob(t, sqlDB, jobID) 1263 } 1264 1265 sqlDB.CheckQueryResults(t, 1266 `SHOW EXPERIMENTAL_FINGERPRINTS FROM TABLE orig_fkdb.fk`, 1267 sqlDB.QueryStr(t, `SHOW EXPERIMENTAL_FINGERPRINTS FROM TABLE restore_fkdb.fk`), 1268 ) 1269 }) 1270 1271 t.Run("pause", func(t *testing.T) { 1272 pauseDir := "nodelocal://0/pause" 1273 noOfflineDir := "nodelocal://0/no-offline" 1274 sqlDB.Exec(t, `CREATE DATABASE pause`) 1275 1276 for i, query := range []string{ 1277 `BACKUP DATABASE data TO $1`, 1278 `RESTORE TABLE data.* FROM $1 WITH OPTIONS ('into_db'='pause')`, 1279 } { 1280 ops := []string{"PAUSE", "RESUME", "PAUSE"} 1281 jobID, err := jobutils.RunJob(t, sqlDB, &allowResponse, ops, query, pauseDir) 1282 if !testutils.IsError(err, "job paused") { 1283 t.Fatalf("%d: expected 'job paused' error, but got %+v", i, err) 1284 } 1285 if i > 0 { 1286 sqlDB.CheckQueryResults(t, 1287 `SELECT name FROM crdb_internal.tables WHERE database_name = 'pause' AND state = 'OFFLINE'`, 1288 [][]string{{"bank"}}, 1289 ) 1290 // Ensure that OFFLINE tables can be accessed to set zone configs. 1291 sqlDB.Exec(t, `ALTER TABLE pause.bank CONFIGURE ZONE USING constraints='[+dc=dc1]'`) 1292 // Ensure that OFFLINE tables are not included in a BACKUP. 1293 sqlDB.ExpectErr(t, `table "pause.public.bank" does not exist`, `BACKUP pause.bank TO $1`, noOfflineDir) 1294 sqlDB.Exec(t, `BACKUP pause.* TO $1`, noOfflineDir) 1295 sqlDB.CheckQueryResults(t, fmt.Sprintf("SHOW BACKUP '%s'", noOfflineDir), [][]string{}) 1296 } 1297 sqlDB.Exec(t, fmt.Sprintf(`RESUME JOB %d`, jobID)) 1298 jobutils.WaitForJob(t, sqlDB, jobID) 1299 } 1300 sqlDB.CheckQueryResults(t, 1301 `SELECT count(*) FROM pause.bank`, 1302 sqlDB.QueryStr(t, `SELECT count(*) FROM data.bank`), 1303 ) 1304 1305 sqlDB.CheckQueryResults(t, 1306 `SHOW EXPERIMENTAL_FINGERPRINTS FROM TABLE pause.bank`, 1307 sqlDB.QueryStr(t, `SHOW EXPERIMENTAL_FINGERPRINTS FROM TABLE data.bank`), 1308 ) 1309 }) 1310 1311 t.Run("pause-cancel", func(t *testing.T) { 1312 backupDir := "nodelocal://0/backup" 1313 1314 backupJobID, err := jobutils.RunJob(t, sqlDB, &allowResponse, nil, "BACKUP DATABASE data TO $1", backupDir) 1315 if err != nil { 1316 t.Fatalf("error while running backup %+v", err) 1317 } 1318 jobutils.WaitForJob(t, sqlDB, backupJobID) 1319 1320 sqlDB.Exec(t, `DROP DATABASE data`) 1321 1322 query := `RESTORE DATABASE data FROM $1` 1323 ops := []string{"PAUSE"} 1324 jobID, err := jobutils.RunJob(t, sqlDB, &allowResponse, ops, query, backupDir) 1325 if !testutils.IsError(err, "job paused") { 1326 t.Fatalf("expected 'job paused' error, but got %+v", err) 1327 } 1328 1329 // Create a table while the RESTORE is in progress on the database that was 1330 // created by the restore. 1331 sqlDB.Exec(t, `CREATE TABLE data.new_table (a int)`) 1332 1333 // Do things while the job is paused. 1334 sqlDB.Exec(t, `CANCEL JOB $1`, jobID) 1335 1336 // Ensure that the tables created by the user, during the RESTORE are 1337 // still present. Also ensure that the table that was being restored (bank) 1338 // is not. 1339 sqlDB.Exec(t, `USE data;`) 1340 sqlDB.CheckQueryResults(t, `SHOW TABLES;`, [][]string{{"public", "new_table", "table"}}) 1341 }) 1342 1343 t.Run("cancel", func(t *testing.T) { 1344 cancelDir := "nodelocal://0/cancel" 1345 sqlDB.Exec(t, `CREATE DATABASE cancel`) 1346 1347 for i, query := range []string{ 1348 `BACKUP DATABASE data TO $1`, 1349 `RESTORE TABLE data.* FROM $1 WITH OPTIONS ('into_db'='cancel')`, 1350 } { 1351 if _, err := jobutils.RunJob( 1352 t, sqlDB, &allowResponse, []string{"cancel"}, query, cancelDir, 1353 ); !testutils.IsError(err, "job canceled") { 1354 t.Fatalf("%d: expected 'job canceled' error, but got %+v", i, err) 1355 } 1356 // Check that executing the same backup or restore succeeds. This won't 1357 // work if the first backup or restore was not successfully canceled. 1358 sqlDB.Exec(t, query, cancelDir) 1359 } 1360 // Verify the canceled RESTORE added some DROP tables. 1361 sqlDB.CheckQueryResults(t, 1362 `SELECT name FROM crdb_internal.tables WHERE database_name = 'cancel' AND state = 'DROP'`, 1363 [][]string{{"bank"}}, 1364 ) 1365 }) 1366 } 1367 1368 // TestRestoreFailCleanup tests that a failed RESTORE is cleaned up. 1369 func TestRestoreFailCleanup(t *testing.T) { 1370 defer leaktest.AfterTest(t)() 1371 1372 params := base.TestServerArgs{} 1373 // Disable GC job so that the final check of crdb_internal.tables is 1374 // guaranteed to not be cleaned up. Although this was never observed by a 1375 // stress test, it is here for safety. 1376 params.Knobs.GCJob = &sql.GCJobTestingKnobs{RunBeforeResume: func(_ int64) error { select {} /* blocks forever */ }} 1377 1378 const numAccounts = 1000 1379 _, _, sqlDB, dir, cleanup := backupRestoreTestSetupWithParams(t, singleNode, numAccounts, 1380 initNone, base.TestClusterArgs{ServerArgs: params}) 1381 defer cleanup() 1382 1383 dir = dir + "/foo" 1384 1385 sqlDB.Exec(t, `CREATE DATABASE restore`) 1386 sqlDB.Exec(t, `BACKUP DATABASE data TO $1`, localFoo) 1387 // Bugger the backup by removing the SST files. 1388 if err := filepath.Walk(dir, func(path string, info os.FileInfo, err error) error { 1389 if err != nil { 1390 t.Fatal(err) 1391 } 1392 if info.Name() == backupccl.BackupManifestName || !strings.HasSuffix(path, ".sst") { 1393 return nil 1394 } 1395 return os.Remove(path) 1396 }); err != nil { 1397 t.Fatal(err) 1398 } 1399 sqlDB.ExpectErr( 1400 t, "sst: no such file", 1401 `RESTORE data.* FROM $1 WITH OPTIONS ('into_db'='restore')`, localFoo, 1402 ) 1403 // Verify the failed RESTORE added some DROP tables. 1404 sqlDB.CheckQueryResults(t, 1405 `SELECT name FROM crdb_internal.tables WHERE database_name = 'restore' AND state = 'DROP'`, 1406 [][]string{{"bank"}}, 1407 ) 1408 } 1409 1410 // TestRestoreFailDatabaseCleanup tests that a failed RESTORE is cleaned up 1411 // when restoring an entire database. 1412 func TestRestoreFailDatabaseCleanup(t *testing.T) { 1413 defer leaktest.AfterTest(t)() 1414 1415 params := base.TestServerArgs{} 1416 // Disable external processing of mutations so that the final check of 1417 // crdb_internal.tables is guaranteed to not be cleaned up. Although this 1418 // was never observed by a stress test, it is here for safety. 1419 blockGC := make(chan struct{}) 1420 params.Knobs.GCJob = &sql.GCJobTestingKnobs{RunBeforeResume: func(_ int64) error { <-blockGC; return nil }} 1421 1422 const numAccounts = 1000 1423 _, _, sqlDB, dir, cleanup := backupRestoreTestSetupWithParams(t, singleNode, numAccounts, 1424 initNone, base.TestClusterArgs{ServerArgs: params}) 1425 defer cleanup() 1426 1427 dir = dir + "/foo" 1428 1429 sqlDB.Exec(t, `BACKUP DATABASE data TO $1`, localFoo) 1430 // Bugger the backup by removing the SST files. 1431 if err := filepath.Walk(dir, func(path string, info os.FileInfo, err error) error { 1432 if err != nil { 1433 t.Fatal(err) 1434 } 1435 if info.Name() == backupccl.BackupManifestName || !strings.HasSuffix(path, ".sst") { 1436 return nil 1437 } 1438 return os.Remove(path) 1439 }); err != nil { 1440 t.Fatal(err) 1441 } 1442 sqlDB.Exec(t, `DROP DATABASE data`) 1443 sqlDB.ExpectErr( 1444 t, "sst: no such file", 1445 `RESTORE DATABASE data FROM $1`, localFoo, 1446 ) 1447 sqlDB.ExpectErr( 1448 t, `database "data" does not exist`, 1449 `DROP DATABASE data`, 1450 ) 1451 close(blockGC) 1452 } 1453 1454 func TestBackupRestoreInterleaved(t *testing.T) { 1455 defer leaktest.AfterTest(t)() 1456 const numAccounts = 20 1457 1458 _, _, sqlDB, dir, cleanupFn := backupRestoreTestSetup(t, singleNode, numAccounts, initNone) 1459 defer cleanupFn() 1460 args := base.TestServerArgs{ExternalIODir: dir} 1461 1462 totalRows, _ := generateInterleavedData(sqlDB, t, numAccounts) 1463 1464 var unused string 1465 var exportedRows int 1466 sqlDB.QueryRow(t, `BACKUP DATABASE data TO $1`, localFoo).Scan( 1467 &unused, &unused, &unused, &exportedRows, &unused, &unused, 1468 ) 1469 if exportedRows != totalRows { 1470 // TODO(dt): fix row-count including interleaved garbarge 1471 t.Logf("expected %d rows in BACKUP, got %d", totalRows, exportedRows) 1472 } 1473 1474 t.Run("all tables in interleave hierarchy", func(t *testing.T) { 1475 tcRestore := testcluster.StartTestCluster(t, singleNode, base.TestClusterArgs{ServerArgs: args}) 1476 defer tcRestore.Stopper().Stop(context.Background()) 1477 sqlDBRestore := sqlutils.MakeSQLRunner(tcRestore.Conns[0]) 1478 // Create a dummy database to verify rekeying is correctly performed. 1479 sqlDBRestore.Exec(t, `CREATE DATABASE ignored`) 1480 sqlDBRestore.Exec(t, `CREATE DATABASE data`) 1481 1482 var importedRows int 1483 sqlDBRestore.QueryRow(t, `RESTORE data.* FROM $1`, localFoo).Scan( 1484 &unused, &unused, &unused, &importedRows, &unused, &unused, 1485 ) 1486 1487 if importedRows != totalRows { 1488 t.Fatalf("expected %d rows, got %d", totalRows, importedRows) 1489 } 1490 1491 var rowCount int64 1492 sqlDBRestore.QueryRow(t, `SELECT count(*) FROM data.bank`).Scan(&rowCount) 1493 if rowCount != numAccounts { 1494 t.Errorf("expected %d rows but found %d", numAccounts, rowCount) 1495 } 1496 sqlDBRestore.QueryRow(t, `SELECT count(*) FROM data.i0`).Scan(&rowCount) 1497 if rowCount != 2*numAccounts { 1498 t.Errorf("expected %d rows but found %d", 2*numAccounts, rowCount) 1499 } 1500 sqlDBRestore.QueryRow(t, `SELECT count(*) FROM data.i0_0`).Scan(&rowCount) 1501 if rowCount != 3*numAccounts { 1502 t.Errorf("expected %d rows but found %d", 3*numAccounts, rowCount) 1503 } 1504 sqlDBRestore.QueryRow(t, `SELECT count(*) FROM data.i1`).Scan(&rowCount) 1505 if rowCount != 4*numAccounts { 1506 t.Errorf("expected %d rows but found %d", 4*numAccounts, rowCount) 1507 } 1508 }) 1509 1510 t.Run("interleaved table without parent", func(t *testing.T) { 1511 sqlDB.ExpectErr(t, "without interleave parent", `BACKUP data.i0 TO $1`, localFoo) 1512 1513 tcRestore := testcluster.StartTestCluster(t, singleNode, base.TestClusterArgs{ServerArgs: args}) 1514 defer tcRestore.Stopper().Stop(context.Background()) 1515 sqlDBRestore := sqlutils.MakeSQLRunner(tcRestore.Conns[0]) 1516 sqlDBRestore.Exec(t, `CREATE DATABASE data`) 1517 sqlDBRestore.ExpectErr( 1518 t, "without interleave parent", 1519 `RESTORE TABLE data.i0 FROM $1`, localFoo, 1520 ) 1521 }) 1522 1523 t.Run("interleaved table without child", func(t *testing.T) { 1524 sqlDB.ExpectErr(t, "without interleave child", `BACKUP data.bank TO $1`, localFoo) 1525 1526 tcRestore := testcluster.StartTestCluster(t, singleNode, base.TestClusterArgs{ServerArgs: args}) 1527 defer tcRestore.Stopper().Stop(context.Background()) 1528 sqlDBRestore := sqlutils.MakeSQLRunner(tcRestore.Conns[0]) 1529 sqlDBRestore.Exec(t, `CREATE DATABASE data`) 1530 sqlDBRestore.ExpectErr(t, "without interleave child", `RESTORE TABLE data.bank FROM $1`, localFoo) 1531 }) 1532 } 1533 1534 func TestBackupRestoreCrossTableReferences(t *testing.T) { 1535 defer leaktest.AfterTest(t)() 1536 1537 const numAccounts = 30 1538 const createStore = "CREATE DATABASE store" 1539 const createStoreStats = "CREATE DATABASE storestats" 1540 1541 _, _, origDB, dir, cleanupFn := backupRestoreTestSetup(t, singleNode, numAccounts, initNone) 1542 defer cleanupFn() 1543 args := base.TestServerArgs{ExternalIODir: dir} 1544 1545 // Generate some testdata and back it up. 1546 { 1547 origDB.Exec(t, createStore) 1548 origDB.Exec(t, createStoreStats) 1549 1550 // customers has multiple inbound FKs, to different indexes. 1551 origDB.Exec(t, `CREATE TABLE store.customers ( 1552 id INT PRIMARY KEY, 1553 email STRING UNIQUE 1554 )`) 1555 1556 // orders has both in and outbound FKs (receipts and customers). 1557 // the index on placed makes indexIDs non-contiguous. 1558 origDB.Exec(t, `CREATE TABLE store.orders ( 1559 id INT PRIMARY KEY, 1560 placed TIMESTAMP, 1561 INDEX (placed DESC), 1562 customerid INT REFERENCES store.customers 1563 )`) 1564 1565 // unused makes our table IDs non-contiguous. 1566 origDB.Exec(t, `CREATE TABLE data.unused (id INT PRIMARY KEY)`) 1567 1568 // receipts is has a self-referential FK. 1569 origDB.Exec(t, `CREATE TABLE store.receipts ( 1570 id INT PRIMARY KEY, 1571 reissue INT REFERENCES store.receipts(id), 1572 dest STRING REFERENCES store.customers(email), 1573 orderid INT REFERENCES store.orders 1574 )`) 1575 1576 // and a few views for good measure. 1577 origDB.Exec(t, `CREATE VIEW store.early_customers AS SELECT id, email from store.customers WHERE id < 5`) 1578 origDB.Exec(t, `CREATE VIEW storestats.ordercounts AS 1579 SELECT c.id, c.email, count(o.id) 1580 FROM store.customers AS c 1581 LEFT OUTER JOIN store.orders AS o ON o.customerid = c.id 1582 GROUP BY c.id, c.email 1583 ORDER BY c.id, c.email 1584 `) 1585 origDB.Exec(t, `CREATE VIEW store.unused_view AS SELECT id from store.customers WHERE FALSE`) 1586 origDB.Exec(t, `CREATE VIEW store.referencing_early_customers AS SELECT id, email FROM store.early_customers`) 1587 1588 for i := 0; i < numAccounts; i++ { 1589 origDB.Exec(t, `INSERT INTO store.customers VALUES ($1, $1::string)`, i) 1590 } 1591 // Each even customerID gets 3 orders, with predictable order and receipt IDs. 1592 for cID := 0; cID < numAccounts; cID += 2 { 1593 for i := 0; i < 3; i++ { 1594 oID := cID*100 + i 1595 rID := oID * 10 1596 origDB.Exec(t, `INSERT INTO store.orders VALUES ($1, now(), $2)`, oID, cID) 1597 origDB.Exec(t, `INSERT INTO store.receipts VALUES ($1, NULL, $2, $3)`, rID, cID, oID) 1598 if i > 1 { 1599 origDB.Exec(t, `INSERT INTO store.receipts VALUES ($1, $2, $3, $4)`, rID+1, rID, cID, oID) 1600 } 1601 } 1602 } 1603 _ = origDB.Exec(t, `BACKUP DATABASE store, storestats TO $1`, localFoo) 1604 } 1605 1606 origCustomers := origDB.QueryStr(t, `SHOW CONSTRAINTS FROM store.customers`) 1607 origOrders := origDB.QueryStr(t, `SHOW CONSTRAINTS FROM store.orders`) 1608 origReceipts := origDB.QueryStr(t, `SHOW CONSTRAINTS FROM store.receipts`) 1609 1610 origEarlyCustomers := origDB.QueryStr(t, `SELECT * from store.early_customers`) 1611 origOrderCounts := origDB.QueryStr(t, `SELECT * from storestats.ordercounts ORDER BY id`) 1612 1613 t.Run("restore everything to new cluster", func(t *testing.T) { 1614 tc := testcluster.StartTestCluster(t, singleNode, base.TestClusterArgs{ServerArgs: args}) 1615 defer tc.Stopper().Stop(context.Background()) 1616 db := sqlutils.MakeSQLRunner(tc.Conns[0]) 1617 1618 db.Exec(t, createStore) 1619 db.Exec(t, `RESTORE store.* FROM $1`, localFoo) 1620 // Restore's Validate checks all the tables point to each other correctly. 1621 1622 db.CheckQueryResults(t, `SHOW CONSTRAINTS FROM store.customers`, origCustomers) 1623 db.CheckQueryResults(t, `SHOW CONSTRAINTS FROM store.orders`, origOrders) 1624 db.CheckQueryResults(t, `SHOW CONSTRAINTS FROM store.receipts`, origReceipts) 1625 1626 // FK validation on customers from receipts is preserved. 1627 db.ExpectErr( 1628 t, "update.*violates foreign key constraint \"fk_dest_ref_customers\"", 1629 `UPDATE store.customers SET email = concat(id::string, 'nope')`, 1630 ) 1631 1632 // FK validation on customers from orders is preserved. 1633 db.ExpectErr( 1634 t, "update.*violates foreign key constraint \"fk_customerid_ref_customers\"", 1635 `UPDATE store.customers SET id = id * 1000`, 1636 ) 1637 1638 // FK validation of customer id is preserved. 1639 db.ExpectErr( 1640 t, "insert.*violates foreign key constraint \"fk_customerid_ref_customers\"", 1641 `INSERT INTO store.orders VALUES (999, NULL, 999)`, 1642 ) 1643 1644 // FK validation of self-FK is preserved. 1645 db.ExpectErr( 1646 t, "insert.*violates foreign key constraint \"fk_reissue_ref_receipts\"", 1647 `INSERT INTO store.receipts VALUES (1, 999, NULL, NULL)`, 1648 ) 1649 }) 1650 1651 t.Run("restore customers to new cluster", func(t *testing.T) { 1652 tc := testcluster.StartTestCluster(t, singleNode, base.TestClusterArgs{ServerArgs: args}) 1653 defer tc.Stopper().Stop(context.Background()) 1654 db := sqlutils.MakeSQLRunner(tc.Conns[0]) 1655 db.Exec(t, createStore) 1656 db.Exec(t, `RESTORE store.customers, store.orders FROM $1`, localFoo) 1657 // Restore's Validate checks all the tables point to each other correctly. 1658 1659 // FK validation on customers from orders is preserved. 1660 db.ExpectErr( 1661 t, "update.*violates foreign key constraint \"fk_customerid_ref_customers\"", 1662 `UPDATE store.customers SET id = id*100`, 1663 ) 1664 1665 // FK validation on customers from receipts is gone. 1666 db.Exec(t, `UPDATE store.customers SET email = id::string`) 1667 }) 1668 1669 t.Run("restore orders to new cluster", func(t *testing.T) { 1670 tc := testcluster.StartTestCluster(t, singleNode, base.TestClusterArgs{ServerArgs: args}) 1671 defer tc.Stopper().Stop(context.Background()) 1672 db := sqlutils.MakeSQLRunner(tc.Conns[0]) 1673 db.Exec(t, createStore) 1674 1675 // FK validation of self-FK is preserved. 1676 db.ExpectErr( 1677 t, "cannot restore table \"orders\" without referenced table .* \\(or \"skip_missing_foreign_keys\" option\\)", 1678 `RESTORE store.orders FROM $1`, localFoo, 1679 ) 1680 1681 db.Exec(t, `RESTORE store.orders FROM $1 WITH OPTIONS ('skip_missing_foreign_keys')`, localFoo) 1682 // Restore's Validate checks all the tables point to each other correctly. 1683 1684 // FK validation is gone. 1685 db.Exec(t, `INSERT INTO store.orders VALUES (999, NULL, 999)`) 1686 db.Exec(t, `DELETE FROM store.orders`) 1687 }) 1688 1689 t.Run("restore receipts to new cluster", func(t *testing.T) { 1690 tc := testcluster.StartTestCluster(t, singleNode, base.TestClusterArgs{ServerArgs: args}) 1691 defer tc.Stopper().Stop(context.Background()) 1692 db := sqlutils.MakeSQLRunner(tc.Conns[0]) 1693 db.Exec(t, createStore) 1694 db.Exec(t, `RESTORE store.receipts FROM $1 WITH OPTIONS ('skip_missing_foreign_keys')`, localFoo) 1695 // Restore's Validate checks all the tables point to each other correctly. 1696 1697 // FK validation of orders and customer is gone. 1698 db.Exec(t, `INSERT INTO store.receipts VALUES (1, NULL, '987', 999)`) 1699 1700 // FK validation of self-FK is preserved. 1701 db.ExpectErr( 1702 t, "insert.*violates foreign key constraint \"fk_reissue_ref_receipts\"", 1703 `INSERT INTO store.receipts VALUES (-1, 999, NULL, NULL)`, 1704 ) 1705 }) 1706 1707 t.Run("restore receipts and customers to new cluster", func(t *testing.T) { 1708 tc := testcluster.StartTestCluster(t, singleNode, base.TestClusterArgs{ServerArgs: args}) 1709 defer tc.Stopper().Stop(context.Background()) 1710 db := sqlutils.MakeSQLRunner(tc.Conns[0]) 1711 db.Exec(t, createStore) 1712 db.Exec(t, `RESTORE store.receipts, store.customers FROM $1 WITH OPTIONS ('skip_missing_foreign_keys')`, localFoo) 1713 // Restore's Validate checks all the tables point to each other correctly. 1714 1715 // FK validation of orders is gone. 1716 db.Exec(t, `INSERT INTO store.receipts VALUES (1, NULL, '0', 999)`) 1717 1718 // FK validation of customer email is preserved. 1719 db.ExpectErr( 1720 t, "nsert.*violates foreign key constraint \"fk_dest_ref_customers\"", 1721 `INSERT INTO store.receipts VALUES (-1, NULL, '999', 999)`, 1722 ) 1723 1724 // FK validation on customers from receipts is preserved. 1725 db.ExpectErr( 1726 t, "delete.*violates foreign key constraint \"fk_dest_ref_customers\"", 1727 `DELETE FROM store.customers`, 1728 ) 1729 1730 // FK validation of self-FK is preserved. 1731 db.ExpectErr( 1732 t, "insert.*violates foreign key constraint \"fk_reissue_ref_receipts\"", 1733 `INSERT INTO store.receipts VALUES (-1, 999, NULL, NULL)`, 1734 ) 1735 }) 1736 1737 t.Run("restore simple view", func(t *testing.T) { 1738 tc := testcluster.StartTestCluster(t, singleNode, base.TestClusterArgs{ServerArgs: args}) 1739 defer tc.Stopper().Stop(context.Background()) 1740 db := sqlutils.MakeSQLRunner(tc.Conns[0]) 1741 db.Exec(t, createStore) 1742 db.ExpectErr( 1743 t, `cannot restore view "early_customers" without restoring referenced table`, 1744 `RESTORE store.early_customers FROM $1`, localFoo, 1745 ) 1746 db.Exec(t, `RESTORE store.early_customers, store.customers, store.orders FROM $1`, localFoo) 1747 db.CheckQueryResults(t, `SELECT * FROM store.early_customers`, origEarlyCustomers) 1748 1749 // nothing depends on orders so it can be dropped. 1750 db.Exec(t, `DROP TABLE store.orders`) 1751 1752 // customers is aware of the view that depends on it. 1753 db.ExpectErr( 1754 t, `cannot drop relation "customers" because view "early_customers" depends on it`, 1755 `DROP TABLE store.customers`, 1756 ) 1757 1758 // We want to be able to drop columns not used by the view, 1759 // however the detection thereof is currently broken - #17269. 1760 // 1761 // // columns not depended on by the view are unaffected. 1762 // db.Exec(`ALTER TABLE store.customers DROP COLUMN email`) 1763 // db.CheckQueryResults(t, `SELECT * FROM store.early_customers`, origEarlyCustomers) 1764 1765 db.Exec(t, `DROP TABLE store.customers CASCADE`) 1766 }) 1767 1768 t.Run("restore multi-table view", func(t *testing.T) { 1769 tc := testcluster.StartTestCluster(t, singleNode, base.TestClusterArgs{ServerArgs: args}) 1770 defer tc.Stopper().Stop(context.Background()) 1771 db := sqlutils.MakeSQLRunner(tc.Conns[0]) 1772 1773 db.ExpectErr( 1774 t, `cannot restore view "ordercounts" without restoring referenced table`, 1775 `RESTORE DATABASE storestats FROM $1`, localFoo, 1776 ) 1777 1778 db.Exec(t, createStore) 1779 db.Exec(t, createStoreStats) 1780 1781 db.ExpectErr( 1782 t, `cannot restore view "ordercounts" without restoring referenced table`, 1783 `RESTORE storestats.ordercounts, store.customers FROM $1`, localFoo, 1784 ) 1785 1786 db.Exec(t, `RESTORE store.customers, storestats.ordercounts, store.orders FROM $1`, localFoo) 1787 1788 // we want to observe just the view-related errors, not fk errors below. 1789 db.Exec(t, `ALTER TABLE store.orders DROP CONSTRAINT fk_customerid_ref_customers`) 1790 1791 // customers is aware of the view that depends on it. 1792 db.ExpectErr( 1793 t, `cannot drop relation "customers" because view "storestats.public.ordercounts" depends on it`, 1794 `DROP TABLE store.customers`, 1795 ) 1796 db.ExpectErr( 1797 t, `cannot drop column "email" because view "storestats.public.ordercounts" depends on it`, 1798 `ALTER TABLE store.customers DROP COLUMN email`, 1799 ) 1800 1801 // orders is aware of the view that depends on it. 1802 db.ExpectErr( 1803 t, `cannot drop relation "orders" because view "storestats.public.ordercounts" depends on it`, 1804 `DROP TABLE store.orders`, 1805 ) 1806 1807 db.CheckQueryResults(t, `SELECT * FROM storestats.ordercounts ORDER BY id`, origOrderCounts) 1808 1809 db.Exec(t, `CREATE DATABASE otherstore`) 1810 db.Exec(t, `RESTORE store.* FROM $1 WITH into_db = 'otherstore'`, localFoo) 1811 // we want to observe just the view-related errors, not fk errors below. 1812 db.Exec(t, `ALTER TABLE otherstore.orders DROP CONSTRAINT fk_customerid_ref_customers`) 1813 db.Exec(t, `DROP TABLE otherstore.receipts`) 1814 1815 db.ExpectErr( 1816 t, `cannot drop relation "customers" because view "early_customers" depends on it`, 1817 `DROP TABLE otherstore.customers`, 1818 ) 1819 1820 db.ExpectErr(t, `cannot drop column "email" because view "early_customers" depends on it`, 1821 `ALTER TABLE otherstore.customers DROP COLUMN email`, 1822 ) 1823 db.Exec(t, `DROP DATABASE store CASCADE`) 1824 db.CheckQueryResults(t, `SELECT * FROM otherstore.early_customers ORDER BY id`, origEarlyCustomers) 1825 1826 }) 1827 1828 t.Run("restore and skip missing views", func(t *testing.T) { 1829 tc := testcluster.StartTestCluster(t, singleNode, base.TestClusterArgs{ServerArgs: args}) 1830 defer tc.Stopper().Stop(context.Background()) 1831 db := sqlutils.MakeSQLRunner(tc.Conns[0]) 1832 1833 // Test cases where, after filtering out views that can't be restored, there are no other tables to restore 1834 1835 db.Exec(t, `RESTORE DATABASE storestats from $1 WITH OPTIONS ('skip_missing_views')`, localFoo) 1836 db.Exec(t, `RESTORE storestats.ordercounts from $1 WITH OPTIONS ('skip_missing_views')`, localFoo) 1837 // Ensure that the views were not restored since they are missing the tables they reference. 1838 db.CheckQueryResults(t, `USE storestats; SHOW TABLES;`, [][]string{}) 1839 1840 db.Exec(t, `RESTORE store.early_customers, store.referencing_early_customers from $1 WITH OPTIONS ('skip_missing_views')`, localFoo) 1841 // Ensure that the views were not restored since they are missing the tables they reference. 1842 db.CheckQueryResults(t, `SHOW TABLES;`, [][]string{}) 1843 1844 // Test that views with valid dependencies are restored 1845 1846 db.Exec(t, `RESTORE DATABASE store from $1 WITH OPTIONS ('skip_missing_views')`, localFoo) 1847 db.CheckQueryResults(t, `SELECT * FROM store.early_customers`, origEarlyCustomers) 1848 db.CheckQueryResults(t, `SELECT * FROM store.referencing_early_customers`, origEarlyCustomers) 1849 // TODO(lucy, jordan): DROP DATABASE CASCADE doesn't work in the mixed 19.1/ 1850 // 19.2 state, which is unrelated to backup/restore. See #39504 for a 1851 // description of that problem, which is yet to be investigated. 1852 // db.Exec(t, `DROP DATABASE store CASCADE`) 1853 1854 // Test when some tables (views) are skipped and others are restored 1855 1856 // See above comment for why we can't delete store and have to create 1857 // another database for now.... 1858 // db.Exec(t, createStore) 1859 // storestats.ordercounts depends also on store.orders, so it can't be restored 1860 db.Exec(t, `CREATE DATABASE store2`) 1861 db.Exec(t, `RESTORE storestats.ordercounts, store.customers from $1 WITH OPTIONS ('skip_missing_views', 'into_db'='store2')`, localFoo) 1862 db.CheckQueryResults(t, `SHOW CONSTRAINTS FROM store2.customers`, origCustomers) 1863 db.ExpectErr(t, `relation "storestats.ordercounts" does not exist`, `SELECT * FROM storestats.ordercounts`) 1864 }) 1865 } 1866 1867 func checksumBankPayload(t *testing.T, sqlDB *sqlutils.SQLRunner) uint32 { 1868 crc := crc32.New(crc32.MakeTable(crc32.Castagnoli)) 1869 rows := sqlDB.Query(t, `SELECT id, balance, payload FROM data.bank`) 1870 defer rows.Close() 1871 var id, balance int 1872 var payload []byte 1873 for rows.Next() { 1874 if err := rows.Scan(&id, &balance, &payload); err != nil { 1875 t.Fatal(err) 1876 } 1877 if _, err := crc.Write(payload); err != nil { 1878 t.Fatal(err) 1879 } 1880 } 1881 if err := rows.Err(); err != nil { 1882 t.Fatal(err) 1883 } 1884 return crc.Sum32() 1885 } 1886 1887 func TestBackupRestoreIncremental(t *testing.T) { 1888 defer leaktest.AfterTest(t)() 1889 1890 const numAccounts = 10 1891 const numBackups = 4 1892 windowSize := int(numAccounts / 3) 1893 1894 _, tc, sqlDB, dir, cleanupFn := backupRestoreTestSetup(t, singleNode, 0, initNone) 1895 defer cleanupFn() 1896 args := base.TestServerArgs{ExternalIODir: dir} 1897 rng, _ := randutil.NewPseudoRand() 1898 1899 var backupDirs []string 1900 var checksums []uint32 1901 { 1902 for backupNum := 0; backupNum < numBackups; backupNum++ { 1903 // In the following, windowSize is `w` and offset is `o`. The first 1904 // mutation creates accounts with id [w,3w). Every mutation after 1905 // that deletes everything less than o, leaves [o, o+w) unchanged, 1906 // mutates [o+w,o+2w), and inserts [o+2w,o+3w). 1907 offset := windowSize * backupNum 1908 var buf bytes.Buffer 1909 fmt.Fprintf(&buf, `DELETE FROM data.bank WHERE id < %d; `, offset) 1910 buf.WriteString(`UPSERT INTO data.bank VALUES `) 1911 for j := 0; j < windowSize*2; j++ { 1912 if j != 0 { 1913 buf.WriteRune(',') 1914 } 1915 id := offset + windowSize + j 1916 payload := randutil.RandBytes(rng, backupRestoreRowPayloadSize) 1917 fmt.Fprintf(&buf, `(%d, %d, '%s')`, id, backupNum, payload) 1918 } 1919 sqlDB.Exec(t, buf.String()) 1920 1921 checksums = append(checksums, checksumBankPayload(t, sqlDB)) 1922 1923 backupDir := fmt.Sprintf("nodelocal://0/%d", backupNum) 1924 var from string 1925 if backupNum > 0 { 1926 from = fmt.Sprintf(` INCREMENTAL FROM %s`, strings.Join(backupDirs, `,`)) 1927 } 1928 sqlDB.Exec(t, fmt.Sprintf(`BACKUP TABLE data.bank TO '%s' %s`, backupDir, from)) 1929 1930 backupDirs = append(backupDirs, fmt.Sprintf(`'%s'`, backupDir)) 1931 } 1932 1933 // Test a regression in RESTORE where the batch end key was not 1934 // being set correctly in Import: make an incremental backup such that 1935 // the greatest key in the diff is less than the previous backups. 1936 sqlDB.Exec(t, `INSERT INTO data.bank VALUES (0, -1, 'final')`) 1937 checksums = append(checksums, checksumBankPayload(t, sqlDB)) 1938 sqlDB.Exec(t, fmt.Sprintf(`BACKUP TABLE data.bank TO '%s' %s`, 1939 "nodelocal://0/final", fmt.Sprintf(` INCREMENTAL FROM %s`, strings.Join(backupDirs, `,`)), 1940 )) 1941 backupDirs = append(backupDirs, `'nodelocal://0/final'`) 1942 } 1943 1944 // Start a new cluster to restore into. 1945 { 1946 restoreTC := testcluster.StartTestCluster(t, singleNode, base.TestClusterArgs{ServerArgs: args}) 1947 defer restoreTC.Stopper().Stop(context.Background()) 1948 sqlDBRestore := sqlutils.MakeSQLRunner(restoreTC.Conns[0]) 1949 1950 sqlDBRestore.Exec(t, `CREATE DATABASE data`) 1951 sqlDBRestore.Exec(t, `CREATE TABLE data.bank (id INT PRIMARY KEY)`) 1952 // This "data.bank" table isn't actually the same table as the backup at all 1953 // so we should not allow using a backup of the other in incremental. We 1954 // usually compare IDs, but those are only meaningful in the context of a 1955 // single cluster, so we also need to ensure the previous backup was indeed 1956 // generated by the same cluster. 1957 1958 sqlDBRestore.ExpectErr( 1959 t, fmt.Sprintf("belongs to cluster %s", tc.Servers[0].ClusterID()), 1960 `BACKUP TABLE data.bank TO $1 INCREMENTAL FROM $2`, 1961 "nodelocal://0/some-other-table", "nodelocal://0/0", 1962 ) 1963 1964 for i := len(backupDirs); i > 0; i-- { 1965 sqlDBRestore.Exec(t, `DROP TABLE IF EXISTS data.bank`) 1966 from := strings.Join(backupDirs[:i], `,`) 1967 sqlDBRestore.Exec(t, fmt.Sprintf(`RESTORE data.bank FROM %s`, from)) 1968 1969 checksum := checksumBankPayload(t, sqlDBRestore) 1970 if checksum != checksums[i-1] { 1971 t.Fatalf("checksum mismatch at index %d: got %d expected %d", 1972 i-1, checksum, checksums[i]) 1973 } 1974 } 1975 } 1976 } 1977 1978 func TestBackupRestorePartitionedIncremental(t *testing.T) { 1979 defer leaktest.AfterTest(t)() 1980 1981 const numAccounts = 10 1982 const numBackups = 4 1983 windowSize := int(numAccounts / 3) 1984 1985 _, _, sqlDB, dir, cleanupFn := backupRestoreTestSetup(t, multiNode, 0, initNone) 1986 defer cleanupFn() 1987 args := base.TestServerArgs{ExternalIODir: dir} 1988 rng, _ := randutil.NewPseudoRand() 1989 1990 // Each incremental backup is written to two different subdirectories in 1991 // defaultDir and dc1Dir, respectively. 1992 const defaultDir = "nodelocal://0/default" 1993 const dc1Dir = "nodelocal://0/dc=dc1" 1994 var defaultBackupDirs []string 1995 var checksums []uint32 1996 { 1997 for backupNum := 0; backupNum < numBackups; backupNum++ { 1998 // In the following, windowSize is `w` and offset is `o`. The first 1999 // mutation creates accounts with id [w,3w). Every mutation after 2000 // that deletes everything less than o, leaves [o, o+w) unchanged, 2001 // mutates [o+w,o+2w), and inserts [o+2w,o+3w). 2002 offset := windowSize * backupNum 2003 var buf bytes.Buffer 2004 fmt.Fprintf(&buf, `DELETE FROM data.bank WHERE id < %d; `, offset) 2005 buf.WriteString(`UPSERT INTO data.bank VALUES `) 2006 for j := 0; j < windowSize*2; j++ { 2007 if j != 0 { 2008 buf.WriteRune(',') 2009 } 2010 id := offset + windowSize + j 2011 payload := randutil.RandBytes(rng, backupRestoreRowPayloadSize) 2012 fmt.Fprintf(&buf, `(%d, %d, '%s')`, id, backupNum, payload) 2013 } 2014 sqlDB.Exec(t, buf.String()) 2015 2016 checksums = append(checksums, checksumBankPayload(t, sqlDB)) 2017 2018 defaultBackupDir := fmt.Sprintf("%s/%d", defaultDir, backupNum) 2019 dc1BackupDir := fmt.Sprintf("%s/%d", dc1Dir, backupNum) 2020 var from string 2021 if backupNum > 0 { 2022 from = fmt.Sprintf(` INCREMENTAL FROM %s`, strings.Join(defaultBackupDirs, `,`)) 2023 } 2024 sqlDB.Exec( 2025 t, 2026 fmt.Sprintf(`BACKUP TABLE data.bank TO ('%s?COCKROACH_LOCALITY=%s', '%s?COCKROACH_LOCALITY=%s') %s`, 2027 defaultBackupDir, url.QueryEscape("default"), 2028 dc1BackupDir, url.QueryEscape("dc=dc1"), 2029 from), 2030 ) 2031 2032 defaultBackupDirs = append(defaultBackupDirs, fmt.Sprintf(`'%s'`, defaultBackupDir)) 2033 } 2034 } 2035 2036 // Start a new cluster to restore into. 2037 { 2038 restoreTC := testcluster.StartTestCluster(t, singleNode, base.TestClusterArgs{ServerArgs: args}) 2039 defer restoreTC.Stopper().Stop(context.Background()) 2040 sqlDBRestore := sqlutils.MakeSQLRunner(restoreTC.Conns[0]) 2041 2042 sqlDBRestore.Exec(t, `CREATE DATABASE data`) 2043 for i := len(defaultBackupDirs); i > 0; i-- { 2044 sqlDBRestore.Exec(t, `DROP TABLE IF EXISTS data.bank`) 2045 var from strings.Builder 2046 for backupNum := range defaultBackupDirs[:i] { 2047 if backupNum > 0 { 2048 from.WriteString(", ") 2049 } 2050 from.WriteString(fmt.Sprintf("('%s/%d', '%s/%d')", defaultDir, backupNum, dc1Dir, backupNum)) 2051 } 2052 sqlDBRestore.Exec(t, fmt.Sprintf(`RESTORE data.bank FROM %s`, from.String())) 2053 2054 checksum := checksumBankPayload(t, sqlDBRestore) 2055 if checksum != checksums[i-1] { 2056 t.Fatalf("checksum mismatch at index %d: got %d expected %d", 2057 i-1, checksum, checksums[i]) 2058 } 2059 } 2060 } 2061 } 2062 2063 // a bg worker is intended to write to the bank table concurrent with other 2064 // operations (writes, backups, restores), mutating the payload on rows-maxID. 2065 // it notified the `wake` channel (to allow ensuring bg activity has occurred) 2066 // and can be informed when errors are allowable (e.g. when the bank table is 2067 // unavailable between a drop and restore) via the atomic "bool" allowErrors. 2068 func startBackgroundWrites( 2069 stopper *stop.Stopper, sqlDB *gosql.DB, maxID int, wake chan<- struct{}, allowErrors *int32, 2070 ) error { 2071 rng, _ := randutil.NewPseudoRand() 2072 2073 for { 2074 select { 2075 case <-stopper.ShouldQuiesce(): 2076 return nil // All done. 2077 default: 2078 // Keep going. 2079 } 2080 2081 id := rand.Intn(maxID) 2082 payload := randutil.RandBytes(rng, backupRestoreRowPayloadSize) 2083 2084 updateFn := func() error { 2085 select { 2086 case <-stopper.ShouldQuiesce(): 2087 return nil // All done. 2088 default: 2089 // Keep going. 2090 } 2091 _, err := sqlDB.Exec(`UPDATE data.bank SET payload = $1 WHERE id = $2`, payload, id) 2092 if atomic.LoadInt32(allowErrors) == 1 { 2093 return nil 2094 } 2095 return err 2096 } 2097 if err := retry.ForDuration(testutils.DefaultSucceedsSoonDuration, updateFn); err != nil { 2098 return err 2099 } 2100 select { 2101 case wake <- struct{}{}: 2102 default: 2103 } 2104 } 2105 } 2106 2107 func TestBackupRestoreWithConcurrentWrites(t *testing.T) { 2108 defer leaktest.AfterTest(t)() 2109 2110 const rows = 10 2111 const numBackgroundTasks = multiNode 2112 2113 _, tc, sqlDB, _, cleanupFn := backupRestoreTestSetup(t, multiNode, rows, initNone) 2114 defer cleanupFn() 2115 2116 bgActivity := make(chan struct{}) 2117 // allowErrors is used as an atomic bool to tell bg workers when to allow 2118 // errors, between dropping and restoring the table they are using. 2119 var allowErrors int32 2120 for task := 0; task < numBackgroundTasks; task++ { 2121 taskNum := task 2122 tc.Stopper().RunWorker(context.Background(), func(context.Context) { 2123 conn := tc.Conns[taskNum%len(tc.Conns)] 2124 // Use different sql gateways to make sure leasing is right. 2125 if err := startBackgroundWrites(tc.Stopper(), conn, rows, bgActivity, &allowErrors); err != nil { 2126 t.Error(err) 2127 } 2128 }) 2129 } 2130 2131 // Use the data.bank table as a key (id), value (balance) table with a 2132 // payload.The background tasks are mutating the table concurrently while we 2133 // backup and restore. 2134 <-bgActivity 2135 2136 // Set, break, then reset the id=balance invariant -- while doing concurrent 2137 // writes -- to get multiple MVCC revisions as well as txn conflicts. 2138 sqlDB.Exec(t, `UPDATE data.bank SET balance = id`) 2139 <-bgActivity 2140 sqlDB.Exec(t, `UPDATE data.bank SET balance = -1`) 2141 <-bgActivity 2142 sqlDB.Exec(t, `UPDATE data.bank SET balance = id`) 2143 <-bgActivity 2144 2145 // Backup DB while concurrent writes continue. 2146 sqlDB.Exec(t, `BACKUP DATABASE data TO $1`, localFoo) 2147 2148 // Drop the table and restore from backup and check our invariant. 2149 atomic.StoreInt32(&allowErrors, 1) 2150 sqlDB.Exec(t, `DROP TABLE data.bank`) 2151 sqlDB.Exec(t, `RESTORE data.* FROM $1`, localFoo) 2152 atomic.StoreInt32(&allowErrors, 0) 2153 2154 bad := sqlDB.QueryStr(t, `SELECT id, balance, payload FROM data.bank WHERE id != balance`) 2155 for _, r := range bad { 2156 t.Errorf("bad row ID %s = bal %s (payload: %q)", r[0], r[1], r[2]) 2157 } 2158 } 2159 2160 func TestConcurrentBackupRestores(t *testing.T) { 2161 defer leaktest.AfterTest(t)() 2162 2163 const numAccounts = 10 2164 const concurrency, numIterations = 2, 3 2165 ctx, _, sqlDB, _, cleanupFn := backupRestoreTestSetup(t, multiNode, numAccounts, initNone) 2166 defer cleanupFn() 2167 2168 g, gCtx := errgroup.WithContext(ctx) 2169 for i := 0; i < concurrency; i++ { 2170 table := fmt.Sprintf("bank_%d", i) 2171 sqlDB.Exec(t, fmt.Sprintf( 2172 `CREATE TABLE data.%s AS (SELECT * FROM data.bank WHERE id > %d ORDER BY id)`, 2173 table, i, 2174 )) 2175 g.Go(func() error { 2176 for j := 0; j < numIterations; j++ { 2177 dbName := fmt.Sprintf("%s_%d", table, j) 2178 backupDir := fmt.Sprintf("nodelocal://0/%s", dbName) 2179 backupQ := fmt.Sprintf(`BACKUP data.%s TO $1`, table) 2180 if _, err := sqlDB.DB.ExecContext(gCtx, backupQ, backupDir); err != nil { 2181 return err 2182 } 2183 if _, err := sqlDB.DB.ExecContext(gCtx, fmt.Sprintf(`CREATE DATABASE %s`, dbName)); err != nil { 2184 return err 2185 } 2186 restoreQ := fmt.Sprintf(`RESTORE data.%s FROM $1 WITH OPTIONS ('into_db'='%s')`, table, dbName) 2187 if _, err := sqlDB.DB.ExecContext(gCtx, restoreQ, backupDir); err != nil { 2188 return err 2189 } 2190 } 2191 return nil 2192 }) 2193 } 2194 if err := g.Wait(); err != nil { 2195 t.Fatalf("%+v", err) 2196 } 2197 2198 for i := 0; i < concurrency; i++ { 2199 orig := sqlDB.QueryStr(t, `SELECT * FROM data.bank WHERE id > $1 ORDER BY id`, i) 2200 for j := 0; j < numIterations; j++ { 2201 selectQ := fmt.Sprintf(`SELECT * FROM bank_%d_%d.bank_%d ORDER BY id`, i, j, i) 2202 sqlDB.CheckQueryResults(t, selectQ, orig) 2203 } 2204 } 2205 } 2206 2207 func TestBackupAsOfSystemTime(t *testing.T) { 2208 defer leaktest.AfterTest(t)() 2209 2210 const numAccounts = 1000 2211 2212 ctx, _, sqlDB, _, cleanupFn := backupRestoreTestSetup(t, singleNode, numAccounts, initNone) 2213 defer cleanupFn() 2214 2215 var beforeTs, equalTs string 2216 var rowCount int 2217 2218 sqlDB.QueryRow(t, `SELECT cluster_logical_timestamp()`).Scan(&beforeTs) 2219 2220 err := crdb.ExecuteTx(ctx, sqlDB.DB.(*gosql.DB), nil /* txopts */, func(tx *gosql.Tx) error { 2221 _, err := tx.Exec(`DELETE FROM data.bank WHERE id % 4 = 1`) 2222 if err != nil { 2223 return err 2224 } 2225 return tx.QueryRow(`SELECT cluster_logical_timestamp()`).Scan(&equalTs) 2226 }) 2227 if err != nil { 2228 t.Fatal(err) 2229 } 2230 2231 sqlDB.QueryRow(t, `SELECT count(*) FROM data.bank`).Scan(&rowCount) 2232 if expected := numAccounts * 3 / 4; rowCount != expected { 2233 t.Fatalf("expected %d rows but found %d", expected, rowCount) 2234 } 2235 2236 beforeDir := localFoo + `/beforeTs` 2237 sqlDB.Exec(t, fmt.Sprintf(`BACKUP DATABASE data TO '%s' AS OF SYSTEM TIME %s`, beforeDir, beforeTs)) 2238 equalDir := localFoo + `/equalTs` 2239 sqlDB.Exec(t, fmt.Sprintf(`BACKUP DATABASE data TO '%s' AS OF SYSTEM TIME %s`, equalDir, equalTs)) 2240 2241 sqlDB.Exec(t, `DROP TABLE data.bank`) 2242 sqlDB.Exec(t, `RESTORE data.* FROM $1`, beforeDir) 2243 sqlDB.QueryRow(t, `SELECT count(*) FROM data.bank`).Scan(&rowCount) 2244 if expected := numAccounts; rowCount != expected { 2245 t.Fatalf("expected %d rows but found %d", expected, rowCount) 2246 } 2247 2248 sqlDB.Exec(t, `DROP TABLE data.bank`) 2249 sqlDB.Exec(t, `RESTORE data.* FROM $1`, equalDir) 2250 sqlDB.QueryRow(t, `SELECT count(*) FROM data.bank`).Scan(&rowCount) 2251 if expected := numAccounts * 3 / 4; rowCount != expected { 2252 t.Fatalf("expected %d rows but found %d", expected, rowCount) 2253 } 2254 } 2255 2256 func TestRestoreAsOfSystemTime(t *testing.T) { 2257 defer leaktest.AfterTest(t)() 2258 2259 const numAccounts = 10 2260 ctx, _, sqlDB, _, cleanupFn := backupRestoreTestSetup(t, singleNode, numAccounts, initNone) 2261 defer cleanupFn() 2262 const dir = "nodelocal://0/" 2263 2264 ts := make([]string, 9) 2265 2266 sqlDB.QueryRow(t, `SELECT cluster_logical_timestamp()`).Scan(&ts[0]) 2267 2268 sqlDB.Exec(t, `UPDATE data.bank SET balance = 1`) 2269 sqlDB.QueryRow(t, `SELECT cluster_logical_timestamp()`).Scan(&ts[1]) 2270 2271 // Change the data in the tabe. 2272 sqlDB.Exec(t, `CREATE TABLE data.teller (id INT PRIMARY KEY, name STRING)`) 2273 sqlDB.Exec(t, `INSERT INTO data.teller VALUES (1, 'alice'), (7, 'bob'), (3, 'eve')`) 2274 2275 err := crdb.ExecuteTx(ctx, sqlDB.DB.(*gosql.DB), nil /* txopts */, func(tx *gosql.Tx) error { 2276 _, err := tx.Exec(`UPDATE data.bank SET balance = 2`) 2277 if err != nil { 2278 return err 2279 } 2280 return tx.QueryRow(`SELECT cluster_logical_timestamp()`).Scan(&ts[2]) 2281 }) 2282 if err != nil { 2283 t.Fatal(err) 2284 } 2285 2286 fullBackup, latestBackup := dir+"/full", dir+"/latest" 2287 incBackup, incLatestBackup := dir+"/inc", dir+"/inc-latest" 2288 inc2Backup, inc2LatestBackup := incBackup+".2", incLatestBackup+".2" 2289 2290 sqlDB.Exec(t, 2291 fmt.Sprintf(`BACKUP DATABASE data TO $1 AS OF SYSTEM TIME %s WITH revision_history`, ts[2]), 2292 fullBackup, 2293 ) 2294 sqlDB.Exec(t, 2295 fmt.Sprintf(`BACKUP DATABASE data TO $1 AS OF SYSTEM TIME %s`, ts[2]), 2296 latestBackup, 2297 ) 2298 2299 fullTableBackup := dir + "/tbl" 2300 sqlDB.Exec(t, 2301 fmt.Sprintf(`BACKUP data.bank TO $1 AS OF SYSTEM TIME %s WITH revision_history`, ts[2]), 2302 fullTableBackup, 2303 ) 2304 2305 sqlDB.Exec(t, `UPDATE data.bank SET balance = 3`) 2306 2307 // Create a table in some other DB -- this won't be in this backup (yet). 2308 sqlDB.Exec(t, `CREATE DATABASE other`) 2309 sqlDB.Exec(t, `CREATE TABLE other.sometable (id INT PRIMARY KEY, somevalue INT)`) 2310 sqlDB.Exec(t, `INSERT INTO other.sometable VALUES (1, 2), (7, 5), (3, 3)`) 2311 2312 sqlDB.QueryRow(t, `SELECT cluster_logical_timestamp()`).Scan(&ts[3]) 2313 2314 sqlDB.Exec(t, `DELETE FROM data.bank WHERE id >= $1 / 2`, numAccounts) 2315 sqlDB.Exec(t, `ALTER TABLE other.sometable RENAME TO data.sometable`) 2316 sqlDB.QueryRow(t, `SELECT cluster_logical_timestamp()`).Scan(&ts[4]) 2317 2318 sqlDB.Exec(t, `INSERT INTO data.sometable VALUES (2, 2), (4, 5), (6, 3)`) 2319 sqlDB.Exec(t, `ALTER TABLE data.bank ADD COLUMN points_balance INT DEFAULT 50`) 2320 sqlDB.QueryRow(t, `SELECT cluster_logical_timestamp()`).Scan(&ts[5]) 2321 2322 sqlDB.Exec(t, `TRUNCATE TABLE data.bank`) 2323 sqlDB.Exec(t, `TRUNCATE TABLE data.bank`) 2324 sqlDB.Exec(t, `TRUNCATE TABLE data.bank`) 2325 sqlDB.Exec(t, `ALTER TABLE data.sometable RENAME TO other.sometable`) 2326 sqlDB.Exec(t, `CREATE INDEX ON data.teller (name)`) 2327 sqlDB.Exec(t, `INSERT INTO data.bank VALUES (2, 2), (4, 4)`) 2328 sqlDB.Exec(t, `INSERT INTO data.teller VALUES (2, 'craig')`) 2329 sqlDB.QueryRow(t, `SELECT cluster_logical_timestamp()`).Scan(&ts[6]) 2330 2331 sqlDB.Exec(t, `TRUNCATE TABLE data.bank`) 2332 sqlDB.Exec(t, `INSERT INTO data.bank VALUES (2, 2), (4, 4)`) 2333 sqlDB.Exec(t, `DROP TABLE other.sometable`) 2334 sqlDB.QueryRow(t, `SELECT cluster_logical_timestamp()`).Scan(&ts[7]) 2335 2336 sqlDB.Exec(t, `UPSERT INTO data.bank (id, balance) 2337 SELECT i, 4 FROM generate_series(0, $1 - 1) AS g(i)`, numAccounts) 2338 sqlDB.QueryRow(t, `SELECT cluster_logical_timestamp()`).Scan(&ts[8]) 2339 2340 sqlDB.Exec(t, 2341 fmt.Sprintf(`BACKUP DATABASE data TO $1 AS OF SYSTEM TIME %s INCREMENTAL FROM $2 WITH revision_history`, ts[5]), 2342 incBackup, fullBackup, 2343 ) 2344 sqlDB.Exec(t, 2345 `BACKUP DATABASE data TO $1 INCREMENTAL FROM $2, $3 WITH revision_history`, 2346 inc2Backup, fullBackup, incBackup, 2347 ) 2348 2349 sqlDB.Exec(t, 2350 fmt.Sprintf(`BACKUP DATABASE data TO $1 AS OF SYSTEM TIME %s INCREMENTAL FROM $2`, ts[5]), 2351 incLatestBackup, latestBackup, 2352 ) 2353 sqlDB.Exec(t, 2354 `BACKUP DATABASE data TO $1 INCREMENTAL FROM $2, $3`, 2355 inc2LatestBackup, latestBackup, incLatestBackup, 2356 ) 2357 2358 incTableBackup := dir + "/inctbl" 2359 sqlDB.Exec(t, 2360 `BACKUP data.bank TO $1 INCREMENTAL FROM $2 WITH revision_history`, 2361 incTableBackup, fullTableBackup, 2362 ) 2363 2364 var after string 2365 sqlDB.QueryRow(t, `SELECT cluster_logical_timestamp()`).Scan(&after) 2366 2367 for i, timestamp := range ts { 2368 name := fmt.Sprintf("ts%d", i) 2369 t.Run(name, func(t *testing.T) { 2370 sqlDB = sqlutils.MakeSQLRunner(sqlDB.DB) 2371 // Create new DBs into which we'll restore our copies without conflicting 2372 // with the existing, original table. 2373 sqlDB.Exec(t, fmt.Sprintf(`CREATE DATABASE %s`, name)) 2374 sqlDB.Exec(t, fmt.Sprintf(`CREATE DATABASE %stbl`, name)) 2375 // Restore the bank table from the full DB MVCC backup to time x, into a 2376 // separate DB so that we can later compare it to the original table via 2377 // time-travel. 2378 sqlDB.Exec(t, 2379 fmt.Sprintf( 2380 `RESTORE data.* FROM $1, $2, $3 AS OF SYSTEM TIME %s WITH into_db='%s'`, 2381 timestamp, name, 2382 ), 2383 fullBackup, incBackup, inc2Backup, 2384 ) 2385 // Similarly restore the since-table backup -- since full DB and single table 2386 // backups sometimes behave differently. 2387 sqlDB.Exec(t, 2388 fmt.Sprintf( 2389 `RESTORE data.bank FROM $1, $2 AS OF SYSTEM TIME %s WITH into_db='%stbl'`, 2390 timestamp, name, 2391 ), 2392 fullTableBackup, incTableBackup, 2393 ) 2394 2395 // Use time-travel on the existing bank table to determine what RESTORE 2396 // with AS OF should have produced. 2397 expected := sqlDB.QueryStr( 2398 t, fmt.Sprintf(`SELECT * FROM data.bank AS OF SYSTEM TIME %s ORDER BY id`, timestamp), 2399 ) 2400 // Confirm reading (with no as-of) from the as-of restored table matches. 2401 sqlDB.CheckQueryResults(t, fmt.Sprintf(`SELECT * FROM %s.bank ORDER BY id`, name), expected) 2402 sqlDB.CheckQueryResults(t, fmt.Sprintf(`SELECT * FROM %stbl.bank ORDER BY id`, name), expected) 2403 2404 // `sometable` moved in to data between after ts 3 and removed before 5. 2405 if i == 4 || i == 5 { 2406 sqlDB.CheckQueryResults(t, 2407 fmt.Sprintf(`SELECT * FROM %s.sometable ORDER BY id`, name), 2408 sqlDB.QueryStr(t, fmt.Sprintf(`SELECT * FROM data.sometable AS OF SYSTEM TIME %s ORDER BY id`, timestamp)), 2409 ) 2410 } 2411 // teller was created after ts 2. 2412 if i > 2 { 2413 sqlDB.CheckQueryResults(t, 2414 fmt.Sprintf(`SELECT * FROM %s.teller ORDER BY id`, name), 2415 sqlDB.QueryStr(t, fmt.Sprintf(`SELECT * FROM data.teller AS OF SYSTEM TIME %s ORDER BY id`, timestamp)), 2416 ) 2417 } 2418 }) 2419 } 2420 2421 t.Run("latest", func(t *testing.T) { 2422 sqlDB = sqlutils.MakeSQLRunner(sqlDB.DB) 2423 // The "latest" backup didn't specify ALL mvcc values, so we can't restore 2424 // to times in the middle. 2425 sqlDB.Exec(t, `CREATE DATABASE err`) 2426 2427 // fullBackup covers up to ts[2], inc to ts[5], inc2 to > ts[8]. 2428 sqlDB.ExpectErr( 2429 t, "invalid RESTORE timestamp", 2430 fmt.Sprintf(`RESTORE data.* FROM $1 AS OF SYSTEM TIME %s WITH into_db='err'`, ts[3]), 2431 fullBackup, 2432 ) 2433 2434 for _, i := range ts { 2435 2436 if i == ts[2] { 2437 // latestBackup is _at_ ts2 so that is the time, and the only time, at 2438 // which restoring it is allowed. 2439 sqlDB.Exec( 2440 t, fmt.Sprintf(`RESTORE data.* FROM $1 AS OF SYSTEM TIME %s WITH into_db='err'`, i), 2441 latestBackup, 2442 ) 2443 sqlDB.Exec(t, `DROP DATABASE err; CREATE DATABASE err`) 2444 } else { 2445 sqlDB.ExpectErr( 2446 t, "invalid RESTORE timestamp", 2447 fmt.Sprintf(`RESTORE data.* FROM $1 AS OF SYSTEM TIME %s WITH into_db='err'`, i), 2448 latestBackup, 2449 ) 2450 } 2451 2452 if i == ts[2] || i == ts[5] { 2453 // latestBackup is _at_ ts2 and incLatestBackup is at ts5, so either of 2454 // those are valid for the chain (latest,incLatest,inc2Latest). In fact 2455 // there's a third time -- that of inc2Latest, that is valid as well but 2456 // it isn't fixed when created above so we know it / test for it. 2457 sqlDB.Exec( 2458 t, fmt.Sprintf(`RESTORE data.* FROM $1, $2, $3 AS OF SYSTEM TIME %s WITH into_db='err'`, i), 2459 latestBackup, incLatestBackup, inc2LatestBackup, 2460 ) 2461 sqlDB.Exec(t, `DROP DATABASE err; CREATE DATABASE err`) 2462 } else { 2463 sqlDB.ExpectErr( 2464 t, "invalid RESTORE timestamp", 2465 fmt.Sprintf(`RESTORE data.* FROM $1, $2, $3 AS OF SYSTEM TIME %s WITH into_db='err'`, i), 2466 latestBackup, incLatestBackup, inc2LatestBackup, 2467 ) 2468 } 2469 } 2470 2471 sqlDB.ExpectErr( 2472 t, "invalid RESTORE timestamp", 2473 fmt.Sprintf(`RESTORE data.* FROM $1 AS OF SYSTEM TIME %s WITH into_db='err'`, after), 2474 latestBackup, 2475 ) 2476 }) 2477 } 2478 2479 func TestRestoreAsOfSystemTimeGCBounds(t *testing.T) { 2480 defer leaktest.AfterTest(t)() 2481 2482 const numAccounts = 10 2483 ctx, tc, sqlDB, _, cleanupFn := backupRestoreTestSetup(t, singleNode, numAccounts, initNone) 2484 defer cleanupFn() 2485 const dir = "nodelocal://0/" 2486 preGC := tree.TimestampToDecimal(tc.Server(0).Clock().Now()).String() 2487 2488 gcr := roachpb.GCRequest{ 2489 // Bogus span to make it a valid request. 2490 RequestHeader: roachpb.RequestHeader{ 2491 Key: keys.SystemSQLCodec.TablePrefix(keys.MinUserDescID), 2492 EndKey: keys.MaxKey, 2493 }, 2494 Threshold: tc.Server(0).Clock().Now(), 2495 } 2496 if _, err := kv.SendWrapped( 2497 ctx, tc.Server(0).DistSenderI().(*kvcoord.DistSender), &gcr, 2498 ); err != nil { 2499 t.Fatal(err) 2500 } 2501 2502 postGC := tree.TimestampToDecimal(tc.Server(0).Clock().Now()).String() 2503 2504 lateFullTableBackup := dir + "/tbl-after-gc" 2505 sqlDB.Exec(t, `BACKUP data.bank TO $1 WITH revision_history`, lateFullTableBackup) 2506 sqlDB.Exec(t, `DROP TABLE data.bank`) 2507 sqlDB.ExpectErr( 2508 t, `BACKUP for requested time only has revision history from`, 2509 fmt.Sprintf(`RESTORE data.bank FROM $1 AS OF SYSTEM TIME %s`, preGC), 2510 lateFullTableBackup, 2511 ) 2512 sqlDB.Exec( 2513 t, fmt.Sprintf(`RESTORE data.bank FROM $1 AS OF SYSTEM TIME %s`, postGC), lateFullTableBackup, 2514 ) 2515 } 2516 2517 func TestAsOfSystemTimeOnRestoredData(t *testing.T) { 2518 defer leaktest.AfterTest(t)() 2519 2520 _, _, sqlDB, dir, cleanupFn := backupRestoreTestSetup(t, singleNode, 0, initNone) 2521 defer cleanupFn() 2522 2523 sqlDB.Exec(t, `DROP TABLE data.bank`) 2524 2525 const numAccounts = 10 2526 bankData := bank.FromRows(numAccounts).Tables()[0] 2527 if _, err := sampledataccl.ToBackup(t, bankData, filepath.Join(dir, "foo")); err != nil { 2528 t.Fatalf("%+v", err) 2529 } 2530 2531 var beforeTs string 2532 sqlDB.QueryRow(t, `SELECT cluster_logical_timestamp()`).Scan(&beforeTs) 2533 sqlDB.Exec(t, `RESTORE data.* FROM $1`, localFoo) 2534 var afterTs string 2535 sqlDB.QueryRow(t, `SELECT cluster_logical_timestamp()`).Scan(&afterTs) 2536 2537 var rowCount int 2538 const q = `SELECT count(*) FROM data.bank AS OF SYSTEM TIME '%s'` 2539 // Before the RESTORE, the table doesn't exist, so an AS OF query should fail. 2540 sqlDB.ExpectErr( 2541 t, `relation "data.bank" does not exist`, 2542 fmt.Sprintf(q, beforeTs), 2543 ) 2544 // After the RESTORE, an AS OF query should work. 2545 sqlDB.QueryRow(t, fmt.Sprintf(q, afterTs)).Scan(&rowCount) 2546 if expected := numAccounts; rowCount != expected { 2547 t.Fatalf("expected %d rows but found %d", expected, rowCount) 2548 } 2549 } 2550 2551 func TestBackupRestoreChecksum(t *testing.T) { 2552 defer leaktest.AfterTest(t)() 2553 2554 const numAccounts = 1000 2555 _, _, sqlDB, dir, cleanupFn := backupRestoreTestSetup(t, singleNode, numAccounts, initNone) 2556 defer cleanupFn() 2557 dir = filepath.Join(dir, "foo") 2558 2559 sqlDB.Exec(t, `BACKUP DATABASE data TO $1`, localFoo) 2560 2561 var backupManifest backupccl.BackupManifest 2562 { 2563 backupManifestBytes, err := ioutil.ReadFile(filepath.Join(dir, backupccl.BackupManifestName)) 2564 if err != nil { 2565 t.Fatalf("%+v", err) 2566 } 2567 fileType := http.DetectContentType(backupManifestBytes) 2568 if fileType == backupccl.ZipType { 2569 backupManifestBytes, err = backupccl.DecompressData(backupManifestBytes) 2570 require.NoError(t, err) 2571 } 2572 if err := protoutil.Unmarshal(backupManifestBytes, &backupManifest); err != nil { 2573 t.Fatalf("%+v", err) 2574 } 2575 } 2576 2577 // Corrupt one of the files in the backup. 2578 f, err := os.OpenFile(filepath.Join(dir, backupManifest.Files[1].Path), os.O_WRONLY, 0) 2579 if err != nil { 2580 t.Fatalf("%+v", err) 2581 } 2582 defer f.Close() 2583 // The last eight bytes of an SST file store a nonzero magic number. We can 2584 // blindly null out those bytes and guarantee that the checksum will change. 2585 if _, err := f.Seek(-8, io.SeekEnd); err != nil { 2586 t.Fatalf("%+v", err) 2587 } 2588 if _, err := f.Write(make([]byte, 8)); err != nil { 2589 t.Fatalf("%+v", err) 2590 } 2591 if err := f.Sync(); err != nil { 2592 t.Fatalf("%+v", err) 2593 } 2594 2595 sqlDB.Exec(t, `DROP TABLE data.bank`) 2596 sqlDB.ExpectErr(t, "checksum mismatch", `RESTORE data.* FROM $1`, localFoo) 2597 } 2598 2599 func TestTimestampMismatch(t *testing.T) { 2600 defer leaktest.AfterTest(t)() 2601 const numAccounts = 1 2602 2603 _, _, sqlDB, _, cleanupFn := backupRestoreTestSetup(t, singleNode, numAccounts, initNone) 2604 defer cleanupFn() 2605 2606 sqlDB.Exec(t, `CREATE TABLE data.t2 (a INT PRIMARY KEY)`) 2607 sqlDB.Exec(t, `INSERT INTO data.t2 VALUES (1)`) 2608 2609 fullBackup := localFoo + "/0" 2610 incrementalT1FromFull := localFoo + "/1" 2611 incrementalT2FromT1 := localFoo + "/2" 2612 incrementalT3FromT1OneTable := localFoo + "/3" 2613 2614 sqlDB.Exec(t, `BACKUP DATABASE data TO $1`, 2615 fullBackup) 2616 sqlDB.Exec(t, `BACKUP DATABASE data TO $1 INCREMENTAL FROM $2`, 2617 incrementalT1FromFull, fullBackup) 2618 sqlDB.Exec(t, `BACKUP TABLE data.bank TO $1 INCREMENTAL FROM $2`, 2619 incrementalT3FromT1OneTable, fullBackup) 2620 sqlDB.Exec(t, `BACKUP DATABASE data TO $1 INCREMENTAL FROM $2, $3`, 2621 incrementalT2FromT1, fullBackup, incrementalT1FromFull) 2622 2623 t.Run("Backup", func(t *testing.T) { 2624 // Missing the initial full backup. 2625 sqlDB.ExpectErr( 2626 t, "backups listed out of order", 2627 `BACKUP DATABASE data TO $1 INCREMENTAL FROM $2`, 2628 localFoo, incrementalT1FromFull, 2629 ) 2630 2631 // Missing an intermediate incremental backup. 2632 sqlDB.ExpectErr( 2633 t, "backups listed out of order", 2634 `BACKUP DATABASE data TO $1 INCREMENTAL FROM $2, $3`, 2635 localFoo, fullBackup, incrementalT2FromT1, 2636 ) 2637 2638 // Backups specified out of order. 2639 sqlDB.ExpectErr( 2640 t, "out of order", 2641 `BACKUP DATABASE data TO $1 INCREMENTAL FROM $2, $3`, 2642 localFoo, incrementalT1FromFull, fullBackup, 2643 ) 2644 2645 // Missing data for one table in the most recent backup. 2646 sqlDB.ExpectErr( 2647 t, "previous backup does not contain table", 2648 `BACKUP DATABASE data TO $1 INCREMENTAL FROM $2, $3`, 2649 localFoo, fullBackup, incrementalT3FromT1OneTable, 2650 ) 2651 }) 2652 2653 sqlDB.Exec(t, `DROP TABLE data.bank`) 2654 sqlDB.Exec(t, `DROP TABLE data.t2`) 2655 t.Run("Restore", func(t *testing.T) { 2656 // Missing the initial full backup. 2657 sqlDB.ExpectErr(t, "no backup covers time", `RESTORE data.* FROM $1`, incrementalT1FromFull) 2658 2659 // Missing an intermediate incremental backup. 2660 sqlDB.ExpectErr( 2661 t, "no backup covers time", 2662 `RESTORE data.* FROM $1, $2`, fullBackup, incrementalT2FromT1, 2663 ) 2664 2665 // Backups specified out of order. 2666 sqlDB.ExpectErr( 2667 t, "out of order", 2668 `RESTORE data.* FROM $1, $2`, incrementalT1FromFull, fullBackup, 2669 ) 2670 2671 // Missing data for one table in the most recent backup. 2672 sqlDB.ExpectErr( 2673 t, "table \"data.t2\" does not exist", 2674 `RESTORE data.bank, data.t2 FROM $1, $2`, fullBackup, incrementalT3FromT1OneTable, 2675 ) 2676 }) 2677 } 2678 2679 func TestBackupLevelDB(t *testing.T) { 2680 defer leaktest.AfterTest(t)() 2681 2682 _, _, sqlDB, rawDir, cleanupFn := backupRestoreTestSetup(t, singleNode, 1, initNone) 2683 defer cleanupFn() 2684 2685 _ = sqlDB.Exec(t, `BACKUP DATABASE data TO $1`, localFoo) 2686 // Verify that the sstables are in LevelDB format by checking the trailer 2687 // magic. 2688 var magic = []byte("\x57\xfb\x80\x8b\x24\x75\x47\xdb") 2689 foundSSTs := 0 2690 if err := filepath.Walk(rawDir, func(path string, info os.FileInfo, err error) error { 2691 if filepath.Ext(path) == ".sst" { 2692 foundSSTs++ 2693 data, err := ioutil.ReadFile(path) 2694 if err != nil { 2695 t.Fatal(err) 2696 } 2697 if !bytes.HasSuffix(data, magic) { 2698 t.Fatalf("trailer magic is not LevelDB sstable: %s", path) 2699 } 2700 } 2701 return nil 2702 }); err != nil { 2703 t.Fatalf("%+v", err) 2704 } 2705 if foundSSTs == 0 { 2706 t.Fatal("found no sstables") 2707 } 2708 } 2709 2710 func TestBackupEncrypted(t *testing.T) { 2711 defer leaktest.AfterTest(t)() 2712 2713 ctx, _, sqlDB, rawDir, cleanupFn := backupRestoreTestSetup(t, multiNode, 3, initNone) 2714 defer cleanupFn() 2715 2716 // Create a table with a name and content that we never see in cleartext in a 2717 // backup. And while the content and name are user data and metadata, by also 2718 // partitioning the table at the sentinel value, we can ensure it also appears 2719 // in the *backup* metadata as well (since partion = range boundary = backup 2720 // file boundary that is recorded in metadata). 2721 sqlDB.Exec(t, `CREATE DATABASE neverappears`) 2722 sqlDB.Exec(t, `CREATE TABLE neverappears.neverappears ( 2723 neverappears STRING PRIMARY KEY, other string, INDEX neverappears (other) 2724 ) PARTITION BY LIST (neverappears) ( 2725 PARTITION neverappears2 VALUES IN ('neverappears2'), PARTITION default VALUES IN (default) 2726 )`) 2727 2728 // Move a partition to n2 to ensure we get multiple writers during BACKUP and 2729 // by partitioning *at* the sentinel we also ensure it is in a range boundary. 2730 sqlDB.Exec(t, `ALTER PARTITION neverappears2 OF TABLE neverappears.neverappears 2731 CONFIGURE ZONE USING constraints='[+dc=dc2]'`) 2732 testutils.SucceedsSoon(t, func() error { 2733 _, err := sqlDB.DB.ExecContext(ctx, `ALTER TABLE neverappears.neverappears 2734 EXPERIMENTAL_RELOCATE VALUES (ARRAY[2], 'neverappears2')`) 2735 return err 2736 }) 2737 2738 // Add the actual content with our sentinel too. 2739 sqlDB.Exec(t, `INSERT INTO neverappears.neverappears values 2740 ('neverappears1', 'neverappears1-v'), 2741 ('neverappears2', 'neverappears2-v'), 2742 ('neverappears3', 'neverappears3-v')`) 2743 2744 // Let's throw it in some other cluster metadata too for fun. 2745 sqlDB.Exec(t, `CREATE USER neverappears`) 2746 sqlDB.Exec(t, `SET CLUSTER SETTING cluster.organization = 'neverappears'`) 2747 2748 // Full cluster-backup to capture all possible metadata. 2749 backupLoc1 := localFoo + "/x?COCKROACH_LOCALITY=default" 2750 backupLoc2 := localFoo + "/x2?COCKROACH_LOCALITY=" + url.QueryEscape("dc=dc1") 2751 backupLoc1inc := localFoo + "/inc1/x?COCKROACH_LOCALITY=default" 2752 backupLoc2inc := localFoo + "/inc1/x2?COCKROACH_LOCALITY=" + url.QueryEscape("dc=dc1") 2753 2754 plainBackupLoc1 := localFoo + "/cleartext?COCKROACH_LOCALITY=default" 2755 plainBackupLoc2 := localFoo + "/cleartext?COCKROACH_LOCALITY=" + url.QueryEscape("dc=dc1") 2756 2757 sqlDB.Exec(t, `BACKUP TO ($1, $2)`, plainBackupLoc1, plainBackupLoc2) 2758 2759 sqlDB.Exec(t, `BACKUP TO ($1, $2) WITH encryption_passphrase='abcdefg'`, backupLoc1, backupLoc2) 2760 // Add the actual content with our sentinel too. 2761 sqlDB.Exec(t, `UPDATE neverappears.neverappears SET other = 'neverappears'`) 2762 sqlDB.Exec(t, `BACKUP TO ($1, $2) INCREMENTAL FROM $3 WITH encryption_passphrase='abcdefg'`, 2763 backupLoc1inc, backupLoc2inc, backupLoc1) 2764 2765 before := sqlDB.QueryStr(t, `SHOW EXPERIMENTAL_FINGERPRINTS FROM TABLE neverappears.neverappears`) 2766 2767 checkedFiles := 0 2768 if err := filepath.Walk(rawDir, func(path string, info os.FileInfo, err error) error { 2769 if !info.IsDir() && !strings.Contains(path, "foo/cleartext") { 2770 data, err := ioutil.ReadFile(path) 2771 if err != nil { 2772 t.Fatal(err) 2773 } 2774 if bytes.Contains(data, []byte("neverappears")) { 2775 t.Errorf("found cleartext occurrence of sentinel string in %s", path) 2776 } 2777 checkedFiles++ 2778 } 2779 return nil 2780 }); err != nil { 2781 t.Fatalf("%+v", err) 2782 } 2783 if checkedFiles == 0 { 2784 t.Fatal("test didn't didn't check any files") 2785 } 2786 2787 sqlDB.Exec(t, `DROP DATABASE neverappears CASCADE`) 2788 2789 sqlDB.Exec(t, `SHOW BACKUP $1 WITH encryption_passphrase='abcdefg'`, backupLoc1) 2790 sqlDB.ExpectErr(t, `cipher: message authentication failed`, `SHOW BACKUP $1 WITH encryption_passphrase='wronngpassword'`, backupLoc1) 2791 sqlDB.ExpectErr(t, `file appears encrypted -- try specifying "encryption_passphrase"`, `SHOW BACKUP $1`, backupLoc1) 2792 sqlDB.ExpectErr(t, `could not find or read encryption information`, `SHOW BACKUP $1 WITH encryption_passphrase='wronngpassword'`, plainBackupLoc1) 2793 2794 sqlDB.Exec(t, `RESTORE DATABASE neverappears FROM ($1, $2), ($3, $4) WITH encryption_passphrase='abcdefg'`, 2795 backupLoc1, backupLoc2, backupLoc1inc, backupLoc2inc) 2796 2797 sqlDB.CheckQueryResults(t, `SHOW EXPERIMENTAL_FINGERPRINTS FROM TABLE neverappears.neverappears`, before) 2798 } 2799 2800 func TestRestoredPrivileges(t *testing.T) { 2801 defer leaktest.AfterTest(t)() 2802 2803 const numAccounts = 1 2804 _, _, sqlDB, dir, cleanupFn := backupRestoreTestSetup(t, singleNode, numAccounts, initNone) 2805 defer cleanupFn() 2806 args := base.TestServerArgs{ExternalIODir: dir} 2807 2808 rootOnly := sqlDB.QueryStr(t, `SHOW GRANTS ON data.bank`) 2809 2810 sqlDB.Exec(t, `CREATE USER someone`) 2811 sqlDB.Exec(t, `GRANT SELECT, INSERT, UPDATE, DELETE ON data.bank TO someone`) 2812 2813 sqlDB.Exec(t, `CREATE DATABASE data2`) 2814 // Explicitly don't restore grants when just restoring a database since we 2815 // cannot ensure that the same users exist in the restoring cluster. 2816 data2Grants := sqlDB.QueryStr(t, `SHOW GRANTS ON DATABASE data2`) 2817 sqlDB.Exec(t, `GRANT SELECT, INSERT, UPDATE, DELETE ON DATABASE data2 TO someone`) 2818 2819 withGrants := sqlDB.QueryStr(t, `SHOW GRANTS ON data.bank`) 2820 2821 sqlDB.Exec(t, `BACKUP DATABASE data, data2 TO $1`, localFoo) 2822 sqlDB.Exec(t, `DROP TABLE data.bank`) 2823 2824 t.Run("into fresh db", func(t *testing.T) { 2825 tc := testcluster.StartTestCluster(t, singleNode, base.TestClusterArgs{ServerArgs: args}) 2826 defer tc.Stopper().Stop(context.Background()) 2827 sqlDBRestore := sqlutils.MakeSQLRunner(tc.Conns[0]) 2828 sqlDBRestore.Exec(t, `CREATE DATABASE data`) 2829 sqlDBRestore.Exec(t, `RESTORE data.bank FROM $1`, localFoo) 2830 sqlDBRestore.CheckQueryResults(t, `SHOW GRANTS ON data.bank`, rootOnly) 2831 }) 2832 2833 t.Run("into db with added grants", func(t *testing.T) { 2834 tc := testcluster.StartTestCluster(t, singleNode, base.TestClusterArgs{ServerArgs: args}) 2835 defer tc.Stopper().Stop(context.Background()) 2836 sqlDBRestore := sqlutils.MakeSQLRunner(tc.Conns[0]) 2837 sqlDBRestore.Exec(t, `CREATE DATABASE data`) 2838 sqlDBRestore.Exec(t, `CREATE USER someone`) 2839 sqlDBRestore.Exec(t, `GRANT SELECT, INSERT, UPDATE, DELETE ON DATABASE data TO someone`) 2840 sqlDBRestore.Exec(t, `RESTORE data.bank FROM $1`, localFoo) 2841 sqlDBRestore.CheckQueryResults(t, `SHOW GRANTS ON data.bank`, withGrants) 2842 }) 2843 2844 t.Run("into db on db grants", func(t *testing.T) { 2845 tc := testcluster.StartTestCluster(t, singleNode, base.TestClusterArgs{ServerArgs: args}) 2846 defer tc.Stopper().Stop(context.Background()) 2847 sqlDBRestore := sqlutils.MakeSQLRunner(tc.Conns[0]) 2848 sqlDBRestore.Exec(t, `CREATE USER someone`) 2849 sqlDBRestore.Exec(t, `RESTORE DATABASE data2 FROM $1`, localFoo) 2850 sqlDBRestore.CheckQueryResults(t, `SHOW GRANTS ON DATABASE data2`, data2Grants) 2851 }) 2852 } 2853 2854 func TestRestoreInto(t *testing.T) { 2855 defer leaktest.AfterTest(t)() 2856 2857 const numAccounts = 1 2858 _, _, sqlDB, _, cleanupFn := backupRestoreTestSetup(t, singleNode, numAccounts, initNone) 2859 defer cleanupFn() 2860 2861 sqlDB.Exec(t, `BACKUP DATABASE data TO $1`, localFoo) 2862 2863 restoreStmt := fmt.Sprintf(`RESTORE data.bank FROM '%s' WITH into_db = 'data 2'`, localFoo) 2864 2865 sqlDB.ExpectErr(t, "a database named \"data 2\" needs to exist", restoreStmt) 2866 2867 sqlDB.Exec(t, `CREATE DATABASE "data 2"`) 2868 sqlDB.Exec(t, restoreStmt) 2869 2870 expected := sqlDB.QueryStr(t, `SELECT * FROM data.bank`) 2871 sqlDB.CheckQueryResults(t, `SELECT * FROM "data 2".bank`, expected) 2872 } 2873 2874 func TestBackupRestorePermissions(t *testing.T) { 2875 defer leaktest.AfterTest(t)() 2876 2877 const numAccounts = 1 2878 _, tc, sqlDB, _, cleanupFn := backupRestoreTestSetup(t, singleNode, numAccounts, initNone) 2879 defer cleanupFn() 2880 2881 sqlDB.Exec(t, `CREATE USER testuser`) 2882 pgURL, cleanupFunc := sqlutils.PGUrl( 2883 t, tc.Server(0).ServingSQLAddr(), "TestBackupRestorePermissions-testuser", url.User("testuser"), 2884 ) 2885 defer cleanupFunc() 2886 testuser, err := gosql.Open("postgres", pgURL.String()) 2887 if err != nil { 2888 t.Fatal(err) 2889 } 2890 defer testuser.Close() 2891 2892 backupStmt := fmt.Sprintf(`BACKUP DATABASE data TO '%s'`, localFoo) 2893 2894 t.Run("root-only", func(t *testing.T) { 2895 if _, err := testuser.Exec(backupStmt); !testutils.IsError( 2896 err, "only users with the admin role are allowed to BACKUP", 2897 ) { 2898 t.Fatal(err) 2899 } 2900 if _, err := testuser.Exec(`RESTORE blah FROM 'blah'`); !testutils.IsError( 2901 err, "only users with the admin role are allowed to RESTORE", 2902 ) { 2903 t.Fatal(err) 2904 } 2905 }) 2906 2907 t.Run("privs-required", func(t *testing.T) { 2908 sqlDB.Exec(t, backupStmt) 2909 // Root doesn't have CREATE on `system` DB, so that should fail. Still need 2910 // a valid `dir` though, since descriptors are always loaded first. 2911 sqlDB.ExpectErr( 2912 t, "user root does not have CREATE privilege", 2913 `RESTORE data.bank FROM $1 WITH OPTIONS ('into_db'='system')`, localFoo, 2914 ) 2915 }) 2916 2917 // Ensure that non-root users with the admin role can backup and restore. 2918 t.Run("non-root-admin", func(t *testing.T) { 2919 sqlDB.Exec(t, "GRANT admin TO testuser") 2920 2921 t.Run("backup-table", func(t *testing.T) { 2922 testLocalFoo := fmt.Sprintf("nodelocal://0/%s", t.Name()) 2923 testLocalBackupStmt := fmt.Sprintf(`BACKUP data.bank TO '%s'`, testLocalFoo) 2924 if _, err := testuser.Exec(testLocalBackupStmt); err != nil { 2925 t.Fatal(err) 2926 } 2927 sqlDB.Exec(t, `CREATE DATABASE data2`) 2928 if _, err := testuser.Exec(`RESTORE data.bank FROM $1 WITH OPTIONS ('into_db'='data2')`, testLocalFoo); err != nil { 2929 t.Fatal(err) 2930 } 2931 }) 2932 2933 t.Run("backup-database", func(t *testing.T) { 2934 testLocalFoo := fmt.Sprintf("nodelocal://0/%s", t.Name()) 2935 testLocalBackupStmt := fmt.Sprintf(`BACKUP DATABASE data TO '%s'`, testLocalFoo) 2936 if _, err := testuser.Exec(testLocalBackupStmt); err != nil { 2937 t.Fatal(err) 2938 } 2939 sqlDB.Exec(t, "DROP DATABASE data") 2940 if _, err := testuser.Exec(`RESTORE DATABASE data FROM $1`, testLocalFoo); err != nil { 2941 t.Fatal(err) 2942 } 2943 }) 2944 }) 2945 } 2946 2947 func TestRestoreDatabaseVersusTable(t *testing.T) { 2948 defer leaktest.AfterTest(t)() 2949 2950 const numAccounts = 1 2951 _, tc, origDB, _, cleanupFn := backupRestoreTestSetup(t, singleNode, numAccounts, initNone) 2952 defer cleanupFn() 2953 args := base.TestServerArgs{ExternalIODir: tc.Servers[0].ClusterSettings().ExternalIODir} 2954 2955 for _, q := range []string{ 2956 `CREATE DATABASE d2`, 2957 `CREATE DATABASE d3`, 2958 `CREATE TABLE d3.foo (a INT)`, 2959 `CREATE DATABASE d4`, 2960 `CREATE TABLE d4.foo (a INT)`, 2961 `CREATE TABLE d4.bar (a INT)`, 2962 } { 2963 origDB.Exec(t, q) 2964 } 2965 2966 d4foo := "nodelocal://0/d4foo" 2967 d4foobar := "nodelocal://0/d4foobar" 2968 d4star := "nodelocal://0/d4star" 2969 2970 origDB.Exec(t, `BACKUP DATABASE data, d2, d3, d4 TO $1`, localFoo) 2971 origDB.Exec(t, `BACKUP d4.foo TO $1`, d4foo) 2972 origDB.Exec(t, `BACKUP d4.foo, d4.bar TO $1`, d4foobar) 2973 origDB.Exec(t, `BACKUP d4.* TO $1`, d4star) 2974 2975 t.Run("incomplete-db", func(t *testing.T) { 2976 tcRestore := testcluster.StartTestCluster(t, singleNode, base.TestClusterArgs{ServerArgs: args}) 2977 defer tcRestore.Stopper().Stop(context.Background()) 2978 sqlDB := sqlutils.MakeSQLRunner(tcRestore.Conns[0]) 2979 2980 sqlDB.Exec(t, `create database d5`) 2981 2982 sqlDB.ExpectErr( 2983 t, "cannot RESTORE DATABASE from a backup of individual tables", 2984 `RESTORE database d4 FROM $1`, d4foo, 2985 ) 2986 2987 sqlDB.ExpectErr( 2988 t, "cannot RESTORE <database>.* from a backup of individual tables", 2989 `RESTORE d4.* FROM $1 WITH into_db = 'd5'`, d4foo, 2990 ) 2991 2992 sqlDB.ExpectErr( 2993 t, "cannot RESTORE DATABASE from a backup of individual tables", 2994 `RESTORE database d4 FROM $1`, d4foobar, 2995 ) 2996 2997 sqlDB.ExpectErr( 2998 t, "cannot RESTORE <database>.* from a backup of individual tables", 2999 `RESTORE d4.* FROM $1 WITH into_db = 'd5'`, d4foobar, 3000 ) 3001 3002 sqlDB.ExpectErr( 3003 t, "cannot RESTORE DATABASE from a backup of individual tables", 3004 `RESTORE database d4 FROM $1`, d4foo, 3005 ) 3006 3007 sqlDB.Exec(t, `RESTORE database d4 FROM $1`, d4star) 3008 3009 }) 3010 3011 t.Run("db", func(t *testing.T) { 3012 tcRestore := testcluster.StartTestCluster(t, singleNode, base.TestClusterArgs{ServerArgs: args}) 3013 defer tcRestore.Stopper().Stop(context.Background()) 3014 sqlDB := sqlutils.MakeSQLRunner(tcRestore.Conns[0]) 3015 sqlDB.Exec(t, `RESTORE DATABASE data, d2, d3 FROM $1`, localFoo) 3016 }) 3017 3018 t.Run("db-exists", func(t *testing.T) { 3019 tcRestore := testcluster.StartTestCluster(t, singleNode, base.TestClusterArgs{ServerArgs: args}) 3020 defer tcRestore.Stopper().Stop(context.Background()) 3021 sqlDB := sqlutils.MakeSQLRunner(tcRestore.Conns[0]) 3022 3023 sqlDB.Exec(t, `CREATE DATABASE data`) 3024 sqlDB.ExpectErr(t, "already exists", `RESTORE DATABASE data FROM $1`, localFoo) 3025 }) 3026 3027 t.Run("tables", func(t *testing.T) { 3028 tcRestore := testcluster.StartTestCluster(t, singleNode, base.TestClusterArgs{ServerArgs: args}) 3029 defer tcRestore.Stopper().Stop(context.Background()) 3030 sqlDB := sqlutils.MakeSQLRunner(tcRestore.Conns[0]) 3031 3032 sqlDB.Exec(t, `CREATE DATABASE data`) 3033 sqlDB.Exec(t, `RESTORE data.* FROM $1`, localFoo) 3034 }) 3035 3036 t.Run("tables-needs-db", func(t *testing.T) { 3037 tcRestore := testcluster.StartTestCluster(t, singleNode, base.TestClusterArgs{ServerArgs: args}) 3038 defer tcRestore.Stopper().Stop(context.Background()) 3039 sqlDB := sqlutils.MakeSQLRunner(tcRestore.Conns[0]) 3040 3041 sqlDB.ExpectErr(t, "needs to exist", `RESTORE data.*, d4.* FROM $1`, localFoo) 3042 }) 3043 3044 t.Run("into_db", func(t *testing.T) { 3045 tcRestore := testcluster.StartTestCluster(t, singleNode, base.TestClusterArgs{ServerArgs: args}) 3046 defer tcRestore.Stopper().Stop(context.Background()) 3047 sqlDB := sqlutils.MakeSQLRunner(tcRestore.Conns[0]) 3048 3049 sqlDB.ExpectErr( 3050 t, `cannot use "into_db"`, 3051 `RESTORE DATABASE data FROM $1 WITH into_db = 'other'`, localFoo, 3052 ) 3053 }) 3054 } 3055 3056 func TestBackupAzureAccountName(t *testing.T) { 3057 defer leaktest.AfterTest(t)() 3058 3059 const numAccounts = 1 3060 _, _, sqlDB, _, cleanupFn := backupRestoreTestSetup(t, singleNode, numAccounts, initNone) 3061 defer cleanupFn() 3062 3063 values := url.Values{} 3064 values.Set("AZURE_ACCOUNT_KEY", "password") 3065 values.Set("AZURE_ACCOUNT_NAME", "\n") 3066 3067 url := &url.URL{ 3068 Scheme: "azure", 3069 Host: "host", 3070 Path: "/backup", 3071 RawQuery: values.Encode(), 3072 } 3073 3074 // Verify newlines in the account name cause an error. 3075 sqlDB.ExpectErr(t, "azure: account name is not valid", `backup database data to $1`, url.String()) 3076 } 3077 3078 // If an operator issues a bad query or if a deploy contains a bug that corrupts 3079 // data, it should be possible to return to a previous point in time before the 3080 // badness. For cases when the last good timestamp is within the gc threshold, 3081 // see the subtests for two ways this can work. 3082 func TestPointInTimeRecovery(t *testing.T) { 3083 defer leaktest.AfterTest(t)() 3084 3085 const numAccounts = 1000 3086 _, _, sqlDB, _, cleanupFn := backupRestoreTestSetup(t, singleNode, numAccounts, initNone) 3087 defer cleanupFn() 3088 3089 fullBackupDir := localFoo + "/full" 3090 sqlDB.Exec(t, `BACKUP data.* TO $1`, fullBackupDir) 3091 3092 sqlDB.Exec(t, `UPDATE data.bank SET balance = 2`) 3093 3094 incBackupDir := localFoo + "/inc" 3095 sqlDB.Exec(t, `BACKUP data.* TO $1 INCREMENTAL FROM $2`, incBackupDir, fullBackupDir) 3096 3097 var beforeBadThingTs string 3098 sqlDB.Exec(t, `UPDATE data.bank SET balance = 3`) 3099 sqlDB.QueryRow(t, `SELECT cluster_logical_timestamp()`).Scan(&beforeBadThingTs) 3100 3101 // Something bad happens. 3102 sqlDB.Exec(t, `UPDATE data.bank SET balance = 4`) 3103 3104 beforeBadThingData := sqlDB.QueryStr(t, 3105 fmt.Sprintf(`SELECT * FROM data.bank AS OF SYSTEM TIME '%s' ORDER BY id`, beforeBadThingTs), 3106 ) 3107 3108 // If no previous BACKUPs have been taken, a new one can be taken using `AS 3109 // OF SYSTEM TIME` with a timestamp before the badness started. This can 3110 // then be RESTORE'd into a temporary database. The operator can manually 3111 // reconcile the current data with the restored data before finally 3112 // RENAME-ing the table into the final location. 3113 t.Run("recovery=new-backup", func(t *testing.T) { 3114 sqlDB = sqlutils.MakeSQLRunner(sqlDB.DB) 3115 recoveryDir := localFoo + "/new-backup" 3116 sqlDB.Exec(t, 3117 fmt.Sprintf(`BACKUP data.* TO $1 AS OF SYSTEM TIME '%s'`, beforeBadThingTs), 3118 recoveryDir, 3119 ) 3120 sqlDB.Exec(t, `CREATE DATABASE newbackup`) 3121 sqlDB.Exec(t, `RESTORE data.* FROM $1 WITH into_db=newbackup`, recoveryDir) 3122 3123 // Some manual reconciliation of the data in data.bank and 3124 // newbackup.bank could be done here by the operator. 3125 3126 sqlDB.Exec(t, `DROP TABLE data.bank`) 3127 sqlDB.Exec(t, `ALTER TABLE newbackup.bank RENAME TO data.bank`) 3128 sqlDB.Exec(t, `DROP DATABASE newbackup`) 3129 sqlDB.CheckQueryResults(t, `SELECT * FROM data.bank ORDER BY id`, beforeBadThingData) 3130 }) 3131 3132 // If there is a recent BACKUP (either full or incremental), then it will 3133 // likely be faster to make a BACKUP that is incremental from it and RESTORE 3134 // using that. Everything else works the same as above. 3135 t.Run("recovery=inc-backup", func(t *testing.T) { 3136 sqlDB = sqlutils.MakeSQLRunner(sqlDB.DB) 3137 recoveryDir := localFoo + "/inc-backup" 3138 sqlDB.Exec(t, 3139 fmt.Sprintf(`BACKUP data.* TO $1 AS OF SYSTEM TIME '%s' INCREMENTAL FROM $2, $3`, beforeBadThingTs), 3140 recoveryDir, fullBackupDir, incBackupDir, 3141 ) 3142 sqlDB.Exec(t, `CREATE DATABASE incbackup`) 3143 sqlDB.Exec(t, 3144 `RESTORE data.* FROM $1, $2, $3 WITH into_db=incbackup`, 3145 fullBackupDir, incBackupDir, recoveryDir, 3146 ) 3147 3148 // Some manual reconciliation of the data in data.bank and 3149 // incbackup.bank could be done here by the operator. 3150 3151 sqlDB.Exec(t, `DROP TABLE data.bank`) 3152 sqlDB.Exec(t, `ALTER TABLE incbackup.bank RENAME TO data.bank`) 3153 sqlDB.Exec(t, `DROP DATABASE incbackup`) 3154 sqlDB.CheckQueryResults(t, `SELECT * FROM data.bank ORDER BY id`, beforeBadThingData) 3155 }) 3156 } 3157 3158 func TestBackupRestoreDropDB(t *testing.T) { 3159 defer leaktest.AfterTest(t)() 3160 3161 const numAccounts = 1 3162 _, _, sqlDB, _, cleanupFn := backupRestoreTestSetup(t, singleNode, numAccounts, initNone) 3163 defer cleanupFn() 3164 3165 sqlDB.Exec(t, `DROP DATABASE data`) 3166 sqlDB.Exec(t, `CREATE DATABASE data`) 3167 sqlDB.Exec(t, `CREATE TABLE data.bank (i int)`) 3168 sqlDB.Exec(t, `INSERT INTO data.bank VALUES (1)`) 3169 3170 sqlDB.Exec(t, "BACKUP DATABASE data TO $1", localFoo) 3171 sqlDB.Exec(t, "CREATE DATABASE data2") 3172 sqlDB.Exec(t, "RESTORE data.* FROM $1 WITH OPTIONS ('into_db'='data2')", localFoo) 3173 3174 expected := sqlDB.QueryStr(t, `SELECT * FROM data.bank`) 3175 sqlDB.CheckQueryResults(t, `SELECT * FROM data2.bank`, expected) 3176 } 3177 3178 func TestBackupRestoreDropTable(t *testing.T) { 3179 defer leaktest.AfterTest(t)() 3180 3181 const numAccounts = 1 3182 _, _, sqlDB, _, cleanupFn := backupRestoreTestSetup(t, singleNode, numAccounts, initNone) 3183 defer cleanupFn() 3184 3185 sqlDB.Exec(t, `DROP TABLE data.bank`) 3186 sqlDB.Exec(t, ` 3187 CREATE TABLE data.bank (i int); 3188 INSERT INTO data.bank VALUES (1); 3189 `) 3190 3191 sqlDB.Exec(t, "BACKUP DATABASE data TO $1", localFoo) 3192 sqlDB.Exec(t, "CREATE DATABASE data2") 3193 sqlDB.Exec(t, "RESTORE data.* FROM $1 WITH OPTIONS ('into_db'='data2')", localFoo) 3194 3195 expected := sqlDB.QueryStr(t, `SELECT * FROM data.bank`) 3196 sqlDB.CheckQueryResults(t, `SELECT * FROM data2.bank`, expected) 3197 } 3198 3199 func TestBackupRestoreIncrementalAddTable(t *testing.T) { 3200 defer leaktest.AfterTest(t)() 3201 3202 const numAccounts = 1 3203 _, _, sqlDB, _, cleanupFn := backupRestoreTestSetup(t, singleNode, numAccounts, initNone) 3204 defer cleanupFn() 3205 sqlDB.Exec(t, `CREATE DATABASE data2`) 3206 sqlDB.Exec(t, `CREATE TABLE data.t (s string PRIMARY KEY)`) 3207 full, inc := localFoo+"/full", localFoo+"/inc" 3208 3209 sqlDB.Exec(t, `INSERT INTO data.t VALUES ('before')`) 3210 sqlDB.Exec(t, `BACKUP data.*, data2.* TO $1`, full) 3211 sqlDB.Exec(t, `UPDATE data.t SET s = 'after'`) 3212 3213 sqlDB.Exec(t, `CREATE TABLE data2.t2 (i int)`) 3214 sqlDB.Exec(t, "BACKUP data.*, data2.* TO $1 INCREMENTAL FROM $2", inc, full) 3215 } 3216 3217 func TestBackupRestoreIncrementalAddTableMissing(t *testing.T) { 3218 defer leaktest.AfterTest(t)() 3219 3220 const numAccounts = 1 3221 _, _, sqlDB, _, cleanupFn := backupRestoreTestSetup(t, singleNode, numAccounts, initNone) 3222 defer cleanupFn() 3223 sqlDB.Exec(t, `CREATE DATABASE data2`) 3224 sqlDB.Exec(t, `CREATE TABLE data.t (s string PRIMARY KEY)`) 3225 full, inc := localFoo+"/full", localFoo+"/inc" 3226 3227 sqlDB.Exec(t, `INSERT INTO data.t VALUES ('before')`) 3228 sqlDB.Exec(t, `BACKUP data.* TO $1`, full) 3229 sqlDB.Exec(t, `UPDATE data.t SET s = 'after'`) 3230 3231 sqlDB.Exec(t, `CREATE TABLE data2.t2 (i int)`) 3232 sqlDB.ExpectErr( 3233 t, "previous backup does not contain table", 3234 "BACKUP data.*, data2.* TO $1 INCREMENTAL FROM $2", inc, full, 3235 ) 3236 } 3237 3238 func TestBackupRestoreIncrementalTrucateTable(t *testing.T) { 3239 defer leaktest.AfterTest(t)() 3240 3241 const numAccounts = 1 3242 _, _, sqlDB, _, cleanupFn := backupRestoreTestSetup(t, singleNode, numAccounts, initNone) 3243 defer cleanupFn() 3244 sqlDB.Exec(t, `CREATE TABLE data.t (s string PRIMARY KEY)`) 3245 full, inc := localFoo+"/full", localFoo+"/inc" 3246 3247 sqlDB.Exec(t, `INSERT INTO data.t VALUES ('before')`) 3248 sqlDB.Exec(t, `BACKUP DATABASE data TO $1`, full) 3249 sqlDB.Exec(t, `UPDATE data.t SET s = 'after'`) 3250 sqlDB.Exec(t, `TRUNCATE data.t`) 3251 3252 sqlDB.Exec(t, "BACKUP DATABASE data TO $1 INCREMENTAL FROM $2", inc, full) 3253 } 3254 3255 func TestBackupRestoreIncrementalDropTable(t *testing.T) { 3256 defer leaktest.AfterTest(t)() 3257 3258 const numAccounts = 1 3259 _, _, sqlDB, _, cleanupFn := backupRestoreTestSetup(t, singleNode, numAccounts, initNone) 3260 defer cleanupFn() 3261 sqlDB.Exec(t, `CREATE TABLE data.t (s string PRIMARY KEY)`) 3262 full, inc := localFoo+"/full", localFoo+"/inc" 3263 3264 sqlDB.Exec(t, `INSERT INTO data.t VALUES ('before')`) 3265 sqlDB.Exec(t, `BACKUP DATABASE data TO $1`, full) 3266 sqlDB.Exec(t, `UPDATE data.t SET s = 'after'`) 3267 sqlDB.Exec(t, `DROP TABLE data.t`) 3268 3269 sqlDB.Exec(t, "BACKUP DATABASE data TO $1 INCREMENTAL FROM $2", inc, full) 3270 sqlDB.Exec(t, `DROP DATABASE data`) 3271 3272 // Restoring to backup before DROP restores t. 3273 sqlDB.Exec(t, `RESTORE DATABASE data FROM $1`, full) 3274 sqlDB.Exec(t, `SELECT 1 FROM data.t LIMIT 0`) 3275 sqlDB.Exec(t, `DROP DATABASE data`) 3276 3277 // Restoring to backup after DROP does not restore t. 3278 sqlDB.Exec(t, `RESTORE DATABASE data FROM $1, $2`, full, inc) 3279 sqlDB.ExpectErr(t, "relation \"data.t\" does not exist", `SELECT 1 FROM data.t LIMIT 0`) 3280 } 3281 3282 func TestFileIOLimits(t *testing.T) { 3283 defer leaktest.AfterTest(t)() 3284 3285 const numAccounts = 11 3286 _, _, sqlDB, _, cleanupFn := backupRestoreTestSetup(t, singleNode, numAccounts, initNone) 3287 defer cleanupFn() 3288 3289 elsewhere := "nodelocal://0/../../blah" 3290 3291 sqlDB.Exec(t, `BACKUP data.bank TO $1`, localFoo) 3292 sqlDB.ExpectErr( 3293 t, "local file access to paths outside of external-io-dir is not allowed", 3294 `BACKUP data.bank TO $1`, elsewhere, 3295 ) 3296 3297 sqlDB.Exec(t, `DROP TABLE data.bank`) 3298 3299 sqlDB.Exec(t, `RESTORE data.bank FROM $1`, localFoo) 3300 sqlDB.ExpectErr( 3301 t, "local file access to paths outside of external-io-dir is not allowed", 3302 `RESTORE data.bank FROM $1`, elsewhere, 3303 ) 3304 } 3305 3306 func TestBackupRestoreNotInTxn(t *testing.T) { 3307 defer leaktest.AfterTest(t)() 3308 3309 const numAccounts = 1 3310 _, _, sqlDB, _, cleanupFn := backupRestoreTestSetup(t, singleNode, numAccounts, initNone) 3311 defer cleanupFn() 3312 3313 db := sqlDB.DB.(*gosql.DB) 3314 tx, err := db.Begin() 3315 if err != nil { 3316 t.Fatal(err) 3317 } 3318 if _, err := tx.Exec(`BACKUP DATABASE data TO 'blah'`); !testutils.IsError(err, "cannot be used inside a transaction") { 3319 t.Fatal(err) 3320 } 3321 if err := tx.Rollback(); err != nil { 3322 t.Fatal(err) 3323 } 3324 3325 sqlDB.Exec(t, `BACKUP DATABASE data TO $1`, localFoo) 3326 sqlDB.Exec(t, `DROP DATABASE data`) 3327 sqlDB.Exec(t, `RESTORE DATABASE data FROM $1`, localFoo) 3328 3329 tx, err = db.Begin() 3330 if err != nil { 3331 t.Fatal(err) 3332 } 3333 if _, err := tx.Exec(`RESTORE DATABASE data FROM 'blah'`); !testutils.IsError(err, "cannot be used inside a transaction") { 3334 t.Fatal(err) 3335 } 3336 if err := tx.Rollback(); err != nil { 3337 t.Fatal(err) 3338 } 3339 3340 // TODO(dt): move to importccl. 3341 tx, err = db.Begin() 3342 if err != nil { 3343 t.Fatal(err) 3344 } 3345 if _, err := tx.Exec(`IMPORT TABLE t (id INT PRIMARY KEY) CSV DATA ('blah')`); !testutils.IsError(err, "cannot be used inside a transaction") { 3346 t.Fatal(err) 3347 } 3348 if err := tx.Rollback(); err != nil { 3349 t.Fatal(err) 3350 } 3351 } 3352 3353 func TestBackupRestoreSequence(t *testing.T) { 3354 defer leaktest.AfterTest(t)() 3355 const numAccounts = 1 3356 _, _, origDB, dir, cleanupFn := backupRestoreTestSetup(t, singleNode, numAccounts, initNone) 3357 defer cleanupFn() 3358 args := base.TestServerArgs{ExternalIODir: dir} 3359 3360 backupLoc := localFoo 3361 3362 origDB.Exec(t, `CREATE SEQUENCE data.t_id_seq`) 3363 origDB.Exec(t, `CREATE TABLE data.t (id INT PRIMARY KEY DEFAULT nextval('data.t_id_seq'), v text)`) 3364 origDB.Exec(t, `INSERT INTO data.t (v) VALUES ('foo'), ('bar'), ('baz')`) 3365 3366 origDB.Exec(t, `BACKUP DATABASE data TO $1`, backupLoc) 3367 3368 t.Run("restore both table & sequence to a new cluster", func(t *testing.T) { 3369 tc := testcluster.StartTestCluster(t, singleNode, base.TestClusterArgs{ServerArgs: args}) 3370 defer tc.Stopper().Stop(context.Background()) 3371 newDB := sqlutils.MakeSQLRunner(tc.Conns[0]) 3372 3373 newDB.Exec(t, `RESTORE DATABASE data FROM $1`, backupLoc) 3374 newDB.Exec(t, `USE data`) 3375 3376 // Verify that the db was restored correctly. 3377 newDB.CheckQueryResults(t, `SELECT * FROM t`, [][]string{ 3378 {"1", "foo"}, 3379 {"2", "bar"}, 3380 {"3", "baz"}, 3381 }) 3382 newDB.CheckQueryResults(t, `SELECT last_value FROM t_id_seq`, [][]string{ 3383 {"3"}, 3384 }) 3385 3386 // Verify that we can kkeep inserting into the table, without violating a uniqueness constraint. 3387 newDB.Exec(t, `INSERT INTO data.t (v) VALUES ('bar')`) 3388 3389 // Verify that sequence <=> table dependencies are still in place. 3390 newDB.ExpectErr( 3391 t, "pq: cannot drop sequence t_id_seq because other objects depend on it", 3392 `DROP SEQUENCE t_id_seq`, 3393 ) 3394 }) 3395 3396 t.Run("restore just the table to a new cluster", func(t *testing.T) { 3397 tc := testcluster.StartTestCluster(t, singleNode, base.TestClusterArgs{ServerArgs: args}) 3398 defer tc.Stopper().Stop(context.Background()) 3399 newDB := sqlutils.MakeSQLRunner(tc.Conns[0]) 3400 3401 newDB.Exec(t, `CREATE DATABASE data`) 3402 newDB.Exec(t, `USE data`) 3403 3404 newDB.ExpectErr( 3405 t, "pq: cannot restore table \"t\" without referenced sequence 54 \\(or \"skip_missing_sequences\" option\\)", 3406 `RESTORE TABLE t FROM $1`, localFoo, 3407 ) 3408 3409 newDB.Exec(t, `RESTORE TABLE t FROM $1 WITH OPTIONS ('skip_missing_sequences')`, localFoo) 3410 3411 // Verify that the table was restored correctly. 3412 newDB.CheckQueryResults(t, `SELECT * FROM data.t`, [][]string{ 3413 {"1", "foo"}, 3414 {"2", "bar"}, 3415 {"3", "baz"}, 3416 }) 3417 3418 // Test that insertion without specifying the id column doesn't work, since 3419 // the DEFAULT expression has been removed. 3420 newDB.ExpectErr( 3421 t, `pq: missing \"id\" primary key column`, 3422 `INSERT INTO t (v) VALUES ('bloop')`, 3423 ) 3424 3425 // Test that inserting with a value specified works. 3426 newDB.Exec(t, `INSERT INTO t (id, v) VALUES (4, 'bloop')`) 3427 }) 3428 3429 t.Run("restore just the sequence to a new cluster", func(t *testing.T) { 3430 tc := testcluster.StartTestCluster(t, singleNode, base.TestClusterArgs{ServerArgs: args}) 3431 defer tc.Stopper().Stop(context.Background()) 3432 newDB := sqlutils.MakeSQLRunner(tc.Conns[0]) 3433 3434 newDB.Exec(t, `CREATE DATABASE data`) 3435 newDB.Exec(t, `USE data`) 3436 // TODO(vilterp): create `RESTORE SEQUENCE` instead of `RESTORE TABLE`, and force 3437 // people to use that? 3438 newDB.Exec(t, `RESTORE TABLE t_id_seq FROM $1`, backupLoc) 3439 3440 // Verify that the sequence value was restored. 3441 newDB.CheckQueryResults(t, `SELECT last_value FROM data.t_id_seq`, [][]string{ 3442 {"3"}, 3443 }) 3444 3445 // Verify that the reference to the table that used it was removed, and 3446 // it can be dropped. 3447 newDB.Exec(t, `DROP SEQUENCE t_id_seq`) 3448 }) 3449 } 3450 3451 func TestBackupRestoreShowJob(t *testing.T) { 3452 defer leaktest.AfterTest(t)() 3453 3454 const numAccounts = 1 3455 _, _, sqlDB, _, cleanupFn := backupRestoreTestSetup(t, singleNode, numAccounts, initNone) 3456 defer cleanupFn() 3457 3458 sqlDB.Exec(t, `BACKUP DATABASE data TO $1 WITH revision_history`, localFoo) 3459 sqlDB.Exec(t, `CREATE DATABASE "data 2"`) 3460 3461 sqlDB.Exec(t, `RESTORE data.bank FROM $1 WITH skip_missing_foreign_keys, into_db = $2`, localFoo, "data 2") 3462 // The "updating privileges" clause in the SELECT statement is for excluding jobs 3463 // run by an unrelated startup migration. 3464 // TODO (lucy): Update this if/when we decide to change how these jobs queued by 3465 // the startup migration are handled. 3466 sqlDB.CheckQueryResults( 3467 t, "SELECT description FROM [SHOW JOBS] WHERE description != 'updating privileges' ORDER BY description", 3468 [][]string{ 3469 {"BACKUP DATABASE data TO 'nodelocal://0/foo' WITH revision_history"}, 3470 {"RESTORE TABLE data.bank FROM 'nodelocal://0/foo' WITH into_db = 'data 2', skip_missing_foreign_keys"}, 3471 }, 3472 ) 3473 } 3474 3475 func TestBackupCreatedStats(t *testing.T) { 3476 defer leaktest.AfterTest(t)() 3477 3478 const numAccounts = 1 3479 _, _, sqlDB, _, cleanupFn := backupRestoreTestSetup(t, singleNode, numAccounts, initNone) 3480 defer cleanupFn() 3481 3482 sqlDB.Exec(t, `SET CLUSTER SETTING sql.stats.automatic_collection.enabled=false`) 3483 3484 sqlDB.Exec(t, `CREATE TABLE data.foo (a INT PRIMARY KEY)`) 3485 sqlDB.Exec(t, `CREATE STATISTICS foo_stats FROM data.foo`) 3486 sqlDB.Exec(t, `CREATE STATISTICS bank_stats FROM data.bank`) 3487 sqlDB.Exec(t, `BACKUP data.bank, data.foo TO $1 WITH revision_history`, localFoo) 3488 sqlDB.Exec(t, `CREATE DATABASE "data 2"`) 3489 sqlDB.Exec(t, `RESTORE data.bank, data.foo FROM $1 WITH skip_missing_foreign_keys, into_db = $2`, 3490 localFoo, "data 2") 3491 3492 sqlDB.CheckQueryResults(t, 3493 `SELECT statistics_name, column_names, row_count, distinct_count, null_count 3494 FROM [SHOW STATISTICS FOR TABLE "data 2".bank] WHERE statistics_name='bank_stats'`, 3495 [][]string{ 3496 {"bank_stats", "{id}", "1", "1", "0"}, 3497 {"bank_stats", "{balance}", "1", "1", "0"}, 3498 {"bank_stats", "{payload}", "1", "1", "0"}, 3499 }) 3500 sqlDB.CheckQueryResults(t, 3501 `SELECT statistics_name, column_names, row_count, distinct_count, null_count 3502 FROM [SHOW STATISTICS FOR TABLE "data 2".foo] WHERE statistics_name='foo_stats'`, 3503 [][]string{ 3504 {"foo_stats", "{a}", "0", "0", "0"}, 3505 }) 3506 } 3507 3508 // Ensure that backing up and restoring an empty database succeeds. 3509 func TestBackupRestoreEmptyDB(t *testing.T) { 3510 defer leaktest.AfterTest(t)() 3511 3512 const numAccounts = 1 3513 _, _, sqlDB, _, cleanupFn := backupRestoreTestSetup(t, singleNode, numAccounts, initNone) 3514 defer cleanupFn() 3515 3516 sqlDB.Exec(t, `CREATE DATABASE empty`) 3517 sqlDB.Exec(t, `BACKUP DATABASE empty TO $1`, localFoo) 3518 sqlDB.Exec(t, `DROP DATABASE empty`) 3519 sqlDB.Exec(t, `RESTORE DATABASE empty FROM $1`, localFoo) 3520 sqlDB.CheckQueryResults(t, `USE empty; SHOW TABLES;`, [][]string{}) 3521 } 3522 3523 func TestBackupRestoreSubsetCreatedStats(t *testing.T) { 3524 defer leaktest.AfterTest(t)() 3525 3526 const numAccounts = 1 3527 _, _, sqlDB, _, cleanupFn := backupRestoreTestSetup(t, singleNode, numAccounts, initNone) 3528 defer cleanupFn() 3529 3530 sqlDB.Exec(t, `SET CLUSTER SETTING sql.stats.automatic_collection.enabled=false`) 3531 3532 sqlDB.Exec(t, `CREATE TABLE data.foo (a INT)`) 3533 sqlDB.Exec(t, `CREATE STATISTICS foo_stats FROM data.foo`) 3534 sqlDB.Exec(t, `CREATE STATISTICS bank_stats FROM data.bank`) 3535 3536 sqlDB.Exec(t, `BACKUP data.bank, data.foo TO $1 WITH revision_history`, localFoo) 3537 sqlDB.Exec(t, `DELETE FROM system.table_statistics WHERE name = 'foo_stats' OR name = 'bank_stats'`) 3538 sqlDB.Exec(t, `CREATE DATABASE "data 2"`) 3539 sqlDB.Exec(t, `RESTORE data.bank FROM $1 WITH skip_missing_foreign_keys, into_db = $2`, 3540 localFoo, "data 2") 3541 3542 // Ensure that the bank_stats have been restored, but foo_stats have not. 3543 sqlDB.CheckQueryResults(t, 3544 `SELECT name, "columnIDs", "rowCount", "distinctCount", "nullCount" FROM system.table_statistics`, 3545 [][]string{ 3546 {"bank_stats", "{1}", "1", "1", "0"}, // id column 3547 {"bank_stats", "{2}", "1", "1", "0"}, // balance column 3548 {"bank_stats", "{3}", "1", "1", "0"}, // payload column 3549 }) 3550 } 3551 3552 // Ensure that statistics are restored from correct backup. 3553 func TestBackupCreatedStatsFromIncrementalBackup(t *testing.T) { 3554 defer leaktest.AfterTest(t)() 3555 3556 const incremental1Foo = "nodelocal://0/incremental1foo" 3557 const incremental2Foo = "nodelocal://0/incremental2foo" 3558 const numAccounts = 1 3559 _, _, sqlDB, _, cleanupFn := backupRestoreTestSetup(t, singleNode, numAccounts, initNone) 3560 defer cleanupFn() 3561 var beforeTs string 3562 3563 sqlDB.Exec(t, `SET CLUSTER SETTING sql.stats.automatic_collection.enabled=false`) 3564 3565 // Create the 1st backup, where data.bank has 1 account. 3566 sqlDB.Exec(t, `CREATE STATISTICS bank_stats FROM data.bank`) 3567 sqlDB.Exec(t, `BACKUP data.bank TO $1 WITH revision_history`, localFoo) 3568 3569 // Create the 2nd backup, where data.bank has 3 accounts. 3570 sqlDB.Exec(t, `INSERT INTO data.bank VALUES (2, 2), (4, 4)`) 3571 sqlDB.Exec(t, `CREATE STATISTICS bank_stats FROM data.bank`) 3572 sqlDB.QueryRow(t, `SELECT cluster_logical_timestamp()`).Scan(&beforeTs) // Save time to restore to this point. 3573 sqlDB.Exec(t, `BACKUP data.bank TO $1 INCREMENTAL FROM $2 WITH revision_history`, incremental1Foo, localFoo) 3574 3575 // Create the 3rd backup, where data.bank has 5 accounts. 3576 sqlDB.Exec(t, `INSERT INTO data.bank VALUES (3, 3), (5, 2)`) 3577 sqlDB.Exec(t, `CREATE STATISTICS bank_stats FROM data.bank`) 3578 sqlDB.Exec(t, `BACKUP data.bank TO $1 INCREMENTAL FROM $2, $3 WITH revision_history`, incremental2Foo, localFoo, incremental1Foo) 3579 3580 // Restore the 2nd backup. 3581 sqlDB.Exec(t, `CREATE DATABASE "data 2"`) 3582 sqlDB.Exec(t, fmt.Sprintf(`RESTORE data.bank FROM "%s", "%s", "%s" AS OF SYSTEM TIME %s WITH skip_missing_foreign_keys, into_db = "%s"`, 3583 localFoo, incremental1Foo, incremental2Foo, beforeTs, "data 2")) 3584 3585 // Expect the values in row_count and distinct_count to be 3. The values 3586 // would be 1 if the stats from the full backup were restored and 5 if 3587 // the stats from the latest incremental backup were restored. 3588 sqlDB.CheckQueryResults(t, 3589 `SELECT statistics_name, column_names, row_count, distinct_count, null_count 3590 FROM [SHOW STATISTICS FOR TABLE "data 2".bank] WHERE statistics_name='bank_stats'`, 3591 [][]string{ 3592 {"bank_stats", "{id}", "3", "3", "0"}, 3593 {"bank_stats", "{balance}", "3", "3", "0"}, 3594 {"bank_stats", "{payload}", "3", "2", "2"}, 3595 }) 3596 } 3597 3598 // TestProtectedTimestampsDuringBackup ensures that the timestamp at which a 3599 // table is taken offline is protected during a BACKUP job to ensure that if 3600 // data can be read for a period longer than the default GC interval. 3601 func TestProtectedTimestampsDuringBackup(t *testing.T) { 3602 defer leaktest.AfterTest(t)() 3603 3604 // A sketch of the test is as follows: 3605 // 3606 // * Create a table foo to backup. 3607 // * Set a 1 second gcttl for foo. 3608 // * Start a BACKUP which blocks after setup (after time of backup is 3609 // decided), until it is signaled. 3610 // * Manually enqueue the ranges for GC and ensure that at least one 3611 // range ran the GC. 3612 // * Unblock the backup. 3613 // * Ensure the backup has succeeded. 3614 3615 ctx, cancel := context.WithCancel(context.Background()) 3616 defer cancel() 3617 3618 allowResponse := make(chan struct{}) 3619 dir, dirCleanupFn := testutils.TempDir(t) 3620 defer dirCleanupFn() 3621 params := base.TestClusterArgs{} 3622 params.ServerArgs.ExternalIODir = dir 3623 params.ServerArgs.Knobs.Store = &kvserver.StoreTestingKnobs{ 3624 TestingResponseFilter: func( 3625 ctx context.Context, ba roachpb.BatchRequest, br *roachpb.BatchResponse, 3626 ) *roachpb.Error { 3627 for _, ru := range br.Responses { 3628 switch ru.GetInner().(type) { 3629 case *roachpb.ExportResponse, *roachpb.ImportResponse: 3630 <-allowResponse 3631 } 3632 } 3633 return nil 3634 }, 3635 } 3636 tc := testcluster.StartTestCluster(t, 3, params) 3637 defer tc.Stopper().Stop(ctx) 3638 3639 tc.WaitForNodeLiveness(t) 3640 require.NoError(t, tc.WaitForFullReplication()) 3641 3642 conn := tc.ServerConn(0) 3643 runner := sqlutils.MakeSQLRunner(conn) 3644 runner.Exec(t, "CREATE TABLE foo (k INT PRIMARY KEY, v BYTES)") 3645 runner.Exec(t, "SET CLUSTER SETTING kv.protectedts.poll_interval = '100ms';") 3646 runner.Exec(t, "ALTER TABLE foo CONFIGURE ZONE USING gc.ttlseconds = 1;") 3647 rRand, _ := randutil.NewPseudoRand() 3648 writeGarbage := func(from, to int) { 3649 for i := from; i < to; i++ { 3650 runner.Exec(t, "UPSERT INTO foo VALUES ($1, $2)", i, randutil.RandBytes(rRand, 1<<10)) 3651 } 3652 } 3653 writeGarbage(3, 10) 3654 rowCount := runner.QueryStr(t, "SELECT * FROM foo") 3655 3656 go func() { 3657 // N.B. We use the conn rather than the runner here since the test may 3658 // finish before the job finishes. The test will finish as soon as the 3659 // timestamp is no longer protected. If the test starts tearing down the 3660 // cluster before the backup job is done, the test may still fail when the 3661 // backup fails. This test does not particularly care if the BACKUP 3662 // completes with a success or failure, as long as the timestamp is released 3663 // shortly after the BACKUP is unblocked. 3664 _, _ = conn.Exec(`BACKUP TABLE FOO TO 'nodelocal://1/foo'`) // ignore error. 3665 }() 3666 3667 var jobID string 3668 testutils.SucceedsSoon(t, func() error { 3669 row := conn.QueryRow("SELECT job_id FROM [SHOW JOBS] ORDER BY created DESC LIMIT 1") 3670 return row.Scan(&jobID) 3671 }) 3672 3673 time.Sleep(3 * time.Second) // Wait for the data to definitely be expired and GC to run. 3674 gcTable := func(skipShouldQueue bool) (traceStr string) { 3675 rows := runner.Query(t, "SELECT start_key"+ 3676 " FROM crdb_internal.ranges_no_leases"+ 3677 " WHERE table_name = $1"+ 3678 " AND database_name = current_database()"+ 3679 " ORDER BY start_key ASC", "foo") 3680 var traceBuf strings.Builder 3681 for rows.Next() { 3682 var startKey roachpb.Key 3683 require.NoError(t, rows.Scan(&startKey)) 3684 r := tc.LookupRangeOrFatal(t, startKey) 3685 l, _, err := tc.FindRangeLease(r, nil) 3686 require.NoError(t, err) 3687 lhServer := tc.Server(int(l.Replica.NodeID) - 1) 3688 s, repl := getFirstStoreReplica(t, lhServer, startKey) 3689 trace, _, err := s.ManuallyEnqueue(ctx, "gc", repl, skipShouldQueue) 3690 require.NoError(t, err) 3691 fmt.Fprintf(&traceBuf, "%s\n", trace.String()) 3692 } 3693 require.NoError(t, rows.Err()) 3694 return traceBuf.String() 3695 } 3696 3697 // We should have refused to GC over the timestamp which we needed to protect. 3698 gcTable(true /* skipShouldQueue */) 3699 3700 // Unblock the blocked backup request. 3701 close(allowResponse) 3702 3703 runner.CheckQueryResultsRetry(t, "SELECT * FROM foo", rowCount) 3704 3705 // Wait for the ranges to learn about the removed record and ensure that we 3706 // can GC from the range soon. 3707 // This regex matches when all float priorities other than 0.00000. It does 3708 // this by matching either a float >= 1 (e.g. 1230.012) or a float < 1 (e.g. 3709 // 0.000123). 3710 matchNonZero := "[1-9]\\d*\\.\\d+|0\\.\\d*[1-9]\\d*" 3711 nonZeroProgressRE := regexp.MustCompile(fmt.Sprintf("priority=(%s)", matchNonZero)) 3712 testutils.SucceedsSoon(t, func() error { 3713 writeGarbage(3, 10) 3714 if trace := gcTable(false /* skipShouldQueue */); !nonZeroProgressRE.MatchString(trace) { 3715 return fmt.Errorf("expected %v in trace: %v", nonZeroProgressRE, trace) 3716 } 3717 return nil 3718 }) 3719 } 3720 3721 func getFirstStoreReplica( 3722 t *testing.T, s serverutils.TestServerInterface, key roachpb.Key, 3723 ) (*kvserver.Store, *kvserver.Replica) { 3724 t.Helper() 3725 store, err := s.GetStores().(*kvserver.Stores).GetStore(s.GetFirstStoreID()) 3726 require.NoError(t, err) 3727 var repl *kvserver.Replica 3728 testutils.SucceedsSoon(t, func() error { 3729 repl = store.LookupReplica(roachpb.RKey(key)) 3730 if repl == nil { 3731 return errors.New(`could not find replica`) 3732 } 3733 return nil 3734 }) 3735 return store, repl 3736 }