github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/ccl/backupccl/full_cluster_backup_restore_test.go (about)

     1  // Copyright 2020 The Cockroach Authors.
     2  //
     3  // Licensed as a CockroachDB Enterprise file under the Cockroach Community
     4  // License (the "License"); you may not use this file except in compliance with
     5  // the License. You may obtain a copy of the License at
     6  //
     7  //     https://github.com/cockroachdb/cockroach/blob/master/licenses/CCL.txt
     8  
     9  package backupccl_test
    10  
    11  import (
    12  	"context"
    13  	"fmt"
    14  	"reflect"
    15  	"strconv"
    16  	"testing"
    17  
    18  	"github.com/cockroachdb/cockroach/pkg/base"
    19  	_ "github.com/cockroachdb/cockroach/pkg/ccl/partitionccl"
    20  	"github.com/cockroachdb/cockroach/pkg/sql/sqlbase"
    21  	"github.com/cockroachdb/cockroach/pkg/testutils"
    22  	"github.com/cockroachdb/cockroach/pkg/testutils/sqlutils"
    23  	"github.com/cockroachdb/cockroach/pkg/testutils/testcluster"
    24  	"github.com/cockroachdb/cockroach/pkg/util/leaktest"
    25  )
    26  
    27  func backupRestoreTestSetupEmptyWithParams(
    28  	t testing.TB,
    29  	clusterSize int,
    30  	dir string,
    31  	init func(tc *testcluster.TestCluster),
    32  	params base.TestClusterArgs,
    33  ) (ctx context.Context, tc *testcluster.TestCluster, sqlDB *sqlutils.SQLRunner, cleanup func()) {
    34  	ctx = context.Background()
    35  
    36  	params.ServerArgs.ExternalIODir = dir
    37  	tc = testcluster.StartTestCluster(t, clusterSize, params)
    38  	init(tc)
    39  
    40  	sqlDB = sqlutils.MakeSQLRunner(tc.Conns[0])
    41  
    42  	cleanupFn := func() {
    43  		tc.Stopper().Stop(ctx) // cleans up in memory storage's auxiliary dirs
    44  	}
    45  
    46  	return ctx, tc, sqlDB, cleanupFn
    47  }
    48  
    49  func createEmptyCluster(
    50  	t testing.TB, clusterSize int,
    51  ) (sqlDB *sqlutils.SQLRunner, tempDir string, cleanup func()) {
    52  	ctx := context.Background()
    53  
    54  	dir, dirCleanupFn := testutils.TempDir(t)
    55  	params := base.TestClusterArgs{}
    56  	params.ServerArgs.ExternalIODir = dir
    57  	tc := testcluster.StartTestCluster(t, clusterSize, params)
    58  
    59  	sqlDB = sqlutils.MakeSQLRunner(tc.Conns[0])
    60  
    61  	cleanupFn := func() {
    62  		tc.Stopper().Stop(ctx) // cleans up in memory storage's auxiliary dirs
    63  		dirCleanupFn()         // cleans up dir, which is the nodelocal:// storage
    64  	}
    65  
    66  	return sqlDB, dir, cleanupFn
    67  }
    68  
    69  // Large test to ensure that all of the system table data is being restored in
    70  // the new cluster. Ensures that all the moving pieces are working together.
    71  func TestFullClusterBackup(t *testing.T) {
    72  	defer leaktest.AfterTest(t)()
    73  
    74  	const numAccounts = 10
    75  	_, _, sqlDB, tempDir, cleanupFn := backupRestoreTestSetup(t, singleNode, numAccounts, initNone)
    76  	_, _, sqlDBRestore, cleanupEmptyCluster := backupRestoreTestSetupEmpty(t, singleNode, tempDir, initNone)
    77  	defer cleanupFn()
    78  	defer cleanupEmptyCluster()
    79  
    80  	// Disable automatic stats collection on the backup and restoring clusters to ensure
    81  	// the test is deterministic.
    82  	sqlDB.Exec(t, `SET CLUSTER SETTING sql.stats.automatic_collection.enabled=false`)
    83  	sqlDBRestore.Exec(t, `SET CLUSTER SETTING sql.stats.automatic_collection.enabled=false`)
    84  
    85  	// Create some other databases and tables.
    86  	sqlDB.Exec(t, `CREATE TABLE defaultdb.foo (a int);`)
    87  	sqlDB.Exec(t, `CREATE DATABASE data2;`)
    88  	sqlDB.Exec(t, `CREATE TABLE data2.foo (a int);`)
    89  
    90  	// Setup the system systemTablesToVerify to ensure that they are copied to the new cluster.
    91  	// Populate system.users.
    92  	for i := 0; i < 1000; i++ {
    93  		sqlDB.Exec(t, fmt.Sprintf("CREATE USER maxroach%d", i))
    94  	}
    95  	// Populate system.zones.
    96  	sqlDB.Exec(t, `ALTER TABLE data.bank CONFIGURE ZONE USING gc.ttlseconds = 3600`)
    97  	sqlDB.Exec(t, `ALTER TABLE defaultdb.foo CONFIGURE ZONE USING gc.ttlseconds = 45`)
    98  	sqlDB.Exec(t, `ALTER DATABASE data2 CONFIGURE ZONE USING gc.ttlseconds = 900`)
    99  	// Populate system.jobs.
   100  	// Note: this is not the backup under test, this just serves as a job which should appear in the restore.
   101  	sqlDB.Exec(t, `BACKUP data.bank TO 'nodelocal://0/throwawayjob'`)
   102  	preBackupJobs := sqlDB.QueryStr(t, "SELECT * FROM system.jobs")
   103  	// Populate system.settings.
   104  	sqlDB.Exec(t, `SET CLUSTER SETTING kv.bulk_io_write.concurrent_addsstable_requests = 5`)
   105  	sqlDB.Exec(t, `INSERT INTO system.ui (key, value, "lastUpdated") VALUES ($1, $2, now())`, "some_key", "some_val")
   106  	// Populate system.comments.
   107  	sqlDB.Exec(t, `COMMENT ON TABLE data.bank IS 'table comment string'`)
   108  	sqlDB.Exec(t, `COMMENT ON DATABASE data IS 'database comment string'`)
   109  
   110  	sqlDB.Exec(t,
   111  		`INSERT INTO system.locations ("localityKey", "localityValue", latitude, longitude) VALUES ($1, $2, $3, $4)`,
   112  		"city", "New York City", 40.71427, -74.00597,
   113  	)
   114  	// Populate system.role_members.
   115  	sqlDB.Exec(t, `CREATE ROLE system_ops;`)
   116  	sqlDB.Exec(t, `GRANT CREATE, SELECT ON DATABASE data TO system_ops;`)
   117  	sqlDB.Exec(t, `GRANT system_ops TO maxroach1;`)
   118  
   119  	sqlDB.Exec(t, `CREATE STATISTICS my_stats FROM data.bank`)
   120  	sqlDB.Exec(t, `BACKUP TO $1`, localFoo)
   121  
   122  	// Create a bunch of user tables on the restoring cluster that we're going
   123  	// to delete.
   124  	for i := 0; i < 50; i++ {
   125  		sqlDBRestore.Exec(t, `CREATE DATABASE db_to_drop`)
   126  		sqlDBRestore.Exec(t, `CREATE TABLE db_to_drop.table_to_drop (a int)`)
   127  		sqlDBRestore.Exec(t, `ALTER TABLE db_to_drop.table_to_drop CONFIGURE ZONE USING gc.ttlseconds=1`)
   128  		sqlDBRestore.Exec(t, `DROP DATABASE db_to_drop`)
   129  	}
   130  	// Wait for the GC job to finish to ensure the descriptors no longer exist.
   131  	sqlDBRestore.CheckQueryResultsRetry(
   132  		t, "SELECT count(*) FROM [SHOW JOBS] WHERE job_type = 'SCHEMA CHANGE GC' AND status = 'running'",
   133  		[][]string{{"0"}},
   134  	)
   135  
   136  	sqlDBRestore.Exec(t, `RESTORE FROM $1`, localFoo)
   137  
   138  	t.Run("ensure all databases restored", func(t *testing.T) {
   139  		sqlDBRestore.CheckQueryResults(t,
   140  			`SHOW DATABASES`,
   141  			[][]string{
   142  				{"data"},
   143  				{"data2"},
   144  				{"defaultdb"},
   145  				{"postgres"},
   146  				{"system"},
   147  			})
   148  	})
   149  
   150  	t.Run("ensure system table data restored", func(t *testing.T) {
   151  		// Note the absence of the jobs table. Jobs are tested by another test as
   152  		// jobs are created during the RESTORE process.
   153  		systemTablesToVerify := []string{
   154  			sqlbase.CommentsTable.Name,
   155  			sqlbase.LocationsTable.Name,
   156  			sqlbase.RoleMembersTable.Name,
   157  			sqlbase.SettingsTable.Name,
   158  			sqlbase.TableStatisticsTable.Name,
   159  			sqlbase.UITable.Name,
   160  			sqlbase.UsersTable.Name,
   161  			sqlbase.ZonesTable.Name,
   162  		}
   163  
   164  		verificationQueries := make([]string, len(systemTablesToVerify))
   165  		// Populate the list of tables we expect to be restored as well as queries
   166  		// that can be used to ensure that data in those tables is restored.
   167  		for i, table := range systemTablesToVerify {
   168  			switch table {
   169  			case sqlbase.TableStatisticsTable.Name:
   170  				// createdAt and statisticsID are re-generated on RESTORE.
   171  				query := fmt.Sprintf("SELECT \"tableID\", name, \"columnIDs\", \"rowCount\" FROM system.table_statistics")
   172  				verificationQueries[i] = query
   173  			default:
   174  				query := fmt.Sprintf("SELECT * FROM system.%s", table)
   175  				verificationQueries[i] = query
   176  			}
   177  		}
   178  
   179  		for _, read := range verificationQueries {
   180  			sqlDBRestore.CheckQueryResults(t, read, sqlDB.QueryStr(t, read))
   181  		}
   182  	})
   183  
   184  	t.Run("ensure table IDs have not changed", func(t *testing.T) {
   185  		// Check that all tables have been restored. DISTINCT is needed in order to
   186  		// deal with the inclusion of schemas in the system.namespace table.
   187  		tableIDCheck := "SELECT DISTINCT name, id FROM system.namespace"
   188  		sqlDBRestore.CheckQueryResults(t, tableIDCheck, sqlDB.QueryStr(t, tableIDCheck))
   189  	})
   190  
   191  	t.Run("ensure user table data restored", func(t *testing.T) {
   192  		expectedUserTables := [][]string{
   193  			{"data", "bank"},
   194  			{"data2", "foo"},
   195  			{"defaultdb", "foo"},
   196  		}
   197  
   198  		for _, table := range expectedUserTables {
   199  			query := fmt.Sprintf("SELECT * FROM %s.%s", table[0], table[1])
   200  			sqlDBRestore.CheckQueryResults(t, query, sqlDB.QueryStr(t, query))
   201  		}
   202  	})
   203  
   204  	t.Run("ensure that grants are restored", func(t *testing.T) {
   205  		grantCheck := "use system; SHOW grants"
   206  		sqlDBRestore.CheckQueryResults(t, grantCheck, sqlDB.QueryStr(t, grantCheck))
   207  		grantCheck = "use data; SHOW grants"
   208  		sqlDBRestore.CheckQueryResults(t, grantCheck, sqlDB.QueryStr(t, grantCheck))
   209  	})
   210  
   211  	t.Run("ensure that jobs are restored", func(t *testing.T) {
   212  		// Ensure that the jobs in the RESTORE cluster is a superset of the jobs
   213  		// that were in the BACKUP cluster (before the full cluster BACKUP job was
   214  		// run). There may be more jobs now because the restore can run jobs of
   215  		// its own.
   216  		newJobs := sqlDBRestore.QueryStr(t, "SELECT * FROM system.jobs")
   217  		for _, oldJob := range preBackupJobs {
   218  			present := false
   219  			for _, newJob := range newJobs {
   220  				if reflect.DeepEqual(oldJob, newJob) {
   221  					present = true
   222  				}
   223  			}
   224  			if !present {
   225  				t.Errorf("Expected to find job %+v in RESTORE cluster, but not found", oldJob)
   226  			}
   227  		}
   228  	})
   229  
   230  	t.Run("ensure that tables can be created at the execpted ID", func(t *testing.T) {
   231  		maxID, err := strconv.Atoi(sqlDBRestore.QueryStr(t, "SELECT max(id) FROM system.namespace")[0][0])
   232  		if err != nil {
   233  			t.Fatal(err)
   234  		}
   235  		dbName, tableName := "new_db", "new_table"
   236  		// N.B. We skip the database ID that was allocated too the temporary
   237  		// system table and all of the temporary system tables (1 + 8).
   238  		numIDsToSkip := 9
   239  		expectedDBID := maxID + numIDsToSkip + 1
   240  		expectedTableID := maxID + numIDsToSkip + 2
   241  		sqlDBRestore.Exec(t, fmt.Sprintf("CREATE DATABASE %s", dbName))
   242  		sqlDBRestore.Exec(t, fmt.Sprintf("CREATE TABLE %s.%s (a int)", dbName, tableName))
   243  		sqlDBRestore.CheckQueryResults(
   244  			t, fmt.Sprintf("SELECT id FROM system.namespace WHERE name = '%s'", dbName),
   245  			[][]string{{strconv.Itoa(expectedDBID)}},
   246  		)
   247  		sqlDBRestore.CheckQueryResults(
   248  			t, fmt.Sprintf("SELECT id FROM system.namespace WHERE name = '%s'", tableName),
   249  			[][]string{{strconv.Itoa(expectedTableID)}},
   250  		)
   251  	})
   252  }
   253  
   254  func TestFullClusterBackupDroppedTables(t *testing.T) {
   255  	defer leaktest.AfterTest(t)()
   256  
   257  	const numAccounts = 10
   258  	_, _, sqlDB, tempDir, cleanupFn := backupRestoreTestSetup(t, singleNode, numAccounts, initNone)
   259  	_, _, sqlDBRestore, cleanupEmptyCluster := backupRestoreTestSetupEmpty(t, singleNode, tempDir, initNone)
   260  	defer cleanupFn()
   261  	defer cleanupEmptyCluster()
   262  
   263  	_, tablesToCheck := generateInterleavedData(sqlDB, t, numAccounts)
   264  
   265  	sqlDB.Exec(t, `BACKUP TO $1`, localFoo)
   266  	sqlDBRestore.Exec(t, `RESTORE FROM $1`, localFoo)
   267  
   268  	for _, table := range tablesToCheck {
   269  		query := fmt.Sprintf("SELECT * FROM data.%s", table)
   270  		sqlDBRestore.CheckQueryResults(t, query, sqlDB.QueryStr(t, query))
   271  	}
   272  }
   273  
   274  func TestIncrementalFullClusterBackup(t *testing.T) {
   275  	defer leaktest.AfterTest(t)()
   276  
   277  	const numAccounts = 10
   278  	const incrementalBackupLocation = "nodelocal://0/inc-full-backup"
   279  	_, _, sqlDB, tempDir, cleanupFn := backupRestoreTestSetup(t, singleNode, numAccounts, initNone)
   280  	_, _, sqlDBRestore, cleanupEmptyCluster := backupRestoreTestSetupEmpty(t, singleNode, tempDir, initNone)
   281  	defer cleanupFn()
   282  	defer cleanupEmptyCluster()
   283  
   284  	sqlDB.Exec(t, `BACKUP TO $1`, localFoo)
   285  	sqlDB.Exec(t, fmt.Sprintf("CREATE USER maxroach1"))
   286  
   287  	sqlDB.Exec(t, `BACKUP TO $1 INCREMENTAL FROM $2`, incrementalBackupLocation, localFoo)
   288  	sqlDBRestore.Exec(t, `RESTORE FROM $1, $2`, localFoo, incrementalBackupLocation)
   289  
   290  	checkQuery := "SELECT * FROM system.users"
   291  	sqlDBRestore.CheckQueryResults(t, checkQuery, sqlDB.QueryStr(t, checkQuery))
   292  }
   293  
   294  // TestEmptyFullClusterResotre ensures that we can backup and restore a full
   295  // cluster backup with only metadata (no user data). Regression test for #49573.
   296  func TestEmptyFullClusterRestore(t *testing.T) {
   297  	defer leaktest.AfterTest(t)()
   298  
   299  	sqlDB, tempDir, cleanupFn := createEmptyCluster(t, singleNode)
   300  	_, _, sqlDBRestore, cleanupEmptyCluster := backupRestoreTestSetupEmpty(t, singleNode, tempDir, initNone)
   301  	defer cleanupFn()
   302  	defer cleanupEmptyCluster()
   303  
   304  	sqlDB.Exec(t, `CREATE USER alice`)
   305  	sqlDB.Exec(t, `CREATE USER bob`)
   306  	sqlDB.Exec(t, `BACKUP TO $1`, localFoo)
   307  	sqlDBRestore.Exec(t, `RESTORE FROM $1`, localFoo)
   308  
   309  	checkQuery := "SELECT * FROM system.users"
   310  	sqlDBRestore.CheckQueryResults(t, checkQuery, sqlDB.QueryStr(t, checkQuery))
   311  }
   312  
   313  func TestDisallowFullClusterRestoreOnNonFreshCluster(t *testing.T) {
   314  	defer leaktest.AfterTest(t)()
   315  
   316  	const numAccounts = 10
   317  	_, _, sqlDB, tempDir, cleanupFn := backupRestoreTestSetup(t, singleNode, numAccounts, initNone)
   318  	_, _, sqlDBRestore, cleanupEmptyCluster := backupRestoreTestSetupEmpty(t, singleNode, tempDir, initNone)
   319  	defer cleanupFn()
   320  	defer cleanupEmptyCluster()
   321  
   322  	sqlDB.Exec(t, `BACKUP TO $1`, localFoo)
   323  	sqlDBRestore.Exec(t, `CREATE DATABASE foo`)
   324  	sqlDBRestore.ExpectErr(
   325  		t, "pq: full cluster restore can only be run on a cluster with no tables or databases but found 1 descriptors",
   326  		`RESTORE FROM $1`, localFoo,
   327  	)
   328  }
   329  
   330  func TestDisallowFullClusterRestoreOfNonFullBackup(t *testing.T) {
   331  	defer leaktest.AfterTest(t)()
   332  
   333  	const numAccounts = 10
   334  	_, _, sqlDB, tempDir, cleanupFn := backupRestoreTestSetup(t, singleNode, numAccounts, initNone)
   335  	_, _, sqlDBRestore, cleanupEmptyCluster := backupRestoreTestSetupEmpty(t, singleNode, tempDir, initNone)
   336  	defer cleanupFn()
   337  	defer cleanupEmptyCluster()
   338  
   339  	sqlDB.Exec(t, `BACKUP data.bank TO $1`, localFoo)
   340  	sqlDBRestore.ExpectErr(
   341  		t, "pq: full cluster RESTORE can only be used on full cluster BACKUP files",
   342  		`RESTORE FROM $1`, localFoo,
   343  	)
   344  }
   345  
   346  func TestAllowNonFullClusterRestoreOfFullBackup(t *testing.T) {
   347  	defer leaktest.AfterTest(t)()
   348  
   349  	const numAccounts = 10
   350  	_, _, sqlDB, _, cleanupFn := backupRestoreTestSetup(t, singleNode, numAccounts, initNone)
   351  	defer cleanupFn()
   352  
   353  	sqlDB.Exec(t, `BACKUP TO $1`, localFoo)
   354  	sqlDB.Exec(t, `CREATE DATABASE data2`)
   355  	sqlDB.Exec(t, `RESTORE data.bank FROM $1 WITH into_db='data2'`, localFoo)
   356  
   357  	checkResults := "SELECT * FROM data.bank"
   358  	sqlDB.CheckQueryResults(t, checkResults, sqlDB.QueryStr(t, checkResults))
   359  }
   360  
   361  func TestResotreDatabaseFromFullClusterBackup(t *testing.T) {
   362  	defer leaktest.AfterTest(t)()
   363  
   364  	const numAccounts = 10
   365  	_, _, sqlDB, _, cleanupFn := backupRestoreTestSetup(t, singleNode, numAccounts, initNone)
   366  	defer cleanupFn()
   367  
   368  	sqlDB.Exec(t, `BACKUP TO $1`, localFoo)
   369  	sqlDB.Exec(t, `DROP DATABASE data`)
   370  	sqlDB.Exec(t, `RESTORE DATABASE data FROM $1`, localFoo)
   371  
   372  	sqlDB.CheckQueryResults(t, "SELECT count(*) FROM data.bank", [][]string{{"10"}})
   373  }
   374  
   375  func TestRestoreSystemTableFromFullClusterBackup(t *testing.T) {
   376  	defer leaktest.AfterTest(t)()
   377  
   378  	const numAccounts = 10
   379  	_, _, sqlDB, _, cleanupFn := backupRestoreTestSetup(t, singleNode, numAccounts, initNone)
   380  	defer cleanupFn()
   381  
   382  	sqlDB.Exec(t, `CREATE USER maxroach`)
   383  	sqlDB.Exec(t, `BACKUP TO $1`, localFoo)
   384  	sqlDB.Exec(t, `CREATE DATABASE temp_sys`)
   385  	sqlDB.Exec(t, `RESTORE system.users FROM $1 WITH into_db='temp_sys'`, localFoo)
   386  
   387  	sqlDB.CheckQueryResults(t, "SELECT * FROM temp_sys.users", sqlDB.QueryStr(t, "SELECT * FROM system.users"))
   388  }
   389  
   390  func TestCreateDBAndTableIncrementalFullClusterBackup(t *testing.T) {
   391  	defer leaktest.AfterTest(t)()
   392  
   393  	_, _, sqlDB, _, cleanupFn := backupRestoreTestSetup(t, singleNode, 0, initNone)
   394  	defer cleanupFn()
   395  
   396  	sqlDB.Exec(t, `BACKUP TO $1`, localFoo)
   397  	sqlDB.Exec(t, `CREATE DATABASE foo`)
   398  	sqlDB.Exec(t, `CREATE TABLE foo.bar (a int)`)
   399  
   400  	// Ensure that the new backup succeeds.
   401  	sqlDB.Exec(t, `BACKUP TO $1`, localFoo)
   402  }