github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/ccl/backupccl/full_cluster_backup_restore_test.go (about) 1 // Copyright 2020 The Cockroach Authors. 2 // 3 // Licensed as a CockroachDB Enterprise file under the Cockroach Community 4 // License (the "License"); you may not use this file except in compliance with 5 // the License. You may obtain a copy of the License at 6 // 7 // https://github.com/cockroachdb/cockroach/blob/master/licenses/CCL.txt 8 9 package backupccl_test 10 11 import ( 12 "context" 13 "fmt" 14 "reflect" 15 "strconv" 16 "testing" 17 18 "github.com/cockroachdb/cockroach/pkg/base" 19 _ "github.com/cockroachdb/cockroach/pkg/ccl/partitionccl" 20 "github.com/cockroachdb/cockroach/pkg/sql/sqlbase" 21 "github.com/cockroachdb/cockroach/pkg/testutils" 22 "github.com/cockroachdb/cockroach/pkg/testutils/sqlutils" 23 "github.com/cockroachdb/cockroach/pkg/testutils/testcluster" 24 "github.com/cockroachdb/cockroach/pkg/util/leaktest" 25 ) 26 27 func backupRestoreTestSetupEmptyWithParams( 28 t testing.TB, 29 clusterSize int, 30 dir string, 31 init func(tc *testcluster.TestCluster), 32 params base.TestClusterArgs, 33 ) (ctx context.Context, tc *testcluster.TestCluster, sqlDB *sqlutils.SQLRunner, cleanup func()) { 34 ctx = context.Background() 35 36 params.ServerArgs.ExternalIODir = dir 37 tc = testcluster.StartTestCluster(t, clusterSize, params) 38 init(tc) 39 40 sqlDB = sqlutils.MakeSQLRunner(tc.Conns[0]) 41 42 cleanupFn := func() { 43 tc.Stopper().Stop(ctx) // cleans up in memory storage's auxiliary dirs 44 } 45 46 return ctx, tc, sqlDB, cleanupFn 47 } 48 49 func createEmptyCluster( 50 t testing.TB, clusterSize int, 51 ) (sqlDB *sqlutils.SQLRunner, tempDir string, cleanup func()) { 52 ctx := context.Background() 53 54 dir, dirCleanupFn := testutils.TempDir(t) 55 params := base.TestClusterArgs{} 56 params.ServerArgs.ExternalIODir = dir 57 tc := testcluster.StartTestCluster(t, clusterSize, params) 58 59 sqlDB = sqlutils.MakeSQLRunner(tc.Conns[0]) 60 61 cleanupFn := func() { 62 tc.Stopper().Stop(ctx) // cleans up in memory storage's auxiliary dirs 63 dirCleanupFn() // cleans up dir, which is the nodelocal:// storage 64 } 65 66 return sqlDB, dir, cleanupFn 67 } 68 69 // Large test to ensure that all of the system table data is being restored in 70 // the new cluster. Ensures that all the moving pieces are working together. 71 func TestFullClusterBackup(t *testing.T) { 72 defer leaktest.AfterTest(t)() 73 74 const numAccounts = 10 75 _, _, sqlDB, tempDir, cleanupFn := backupRestoreTestSetup(t, singleNode, numAccounts, initNone) 76 _, _, sqlDBRestore, cleanupEmptyCluster := backupRestoreTestSetupEmpty(t, singleNode, tempDir, initNone) 77 defer cleanupFn() 78 defer cleanupEmptyCluster() 79 80 // Disable automatic stats collection on the backup and restoring clusters to ensure 81 // the test is deterministic. 82 sqlDB.Exec(t, `SET CLUSTER SETTING sql.stats.automatic_collection.enabled=false`) 83 sqlDBRestore.Exec(t, `SET CLUSTER SETTING sql.stats.automatic_collection.enabled=false`) 84 85 // Create some other databases and tables. 86 sqlDB.Exec(t, `CREATE TABLE defaultdb.foo (a int);`) 87 sqlDB.Exec(t, `CREATE DATABASE data2;`) 88 sqlDB.Exec(t, `CREATE TABLE data2.foo (a int);`) 89 90 // Setup the system systemTablesToVerify to ensure that they are copied to the new cluster. 91 // Populate system.users. 92 for i := 0; i < 1000; i++ { 93 sqlDB.Exec(t, fmt.Sprintf("CREATE USER maxroach%d", i)) 94 } 95 // Populate system.zones. 96 sqlDB.Exec(t, `ALTER TABLE data.bank CONFIGURE ZONE USING gc.ttlseconds = 3600`) 97 sqlDB.Exec(t, `ALTER TABLE defaultdb.foo CONFIGURE ZONE USING gc.ttlseconds = 45`) 98 sqlDB.Exec(t, `ALTER DATABASE data2 CONFIGURE ZONE USING gc.ttlseconds = 900`) 99 // Populate system.jobs. 100 // Note: this is not the backup under test, this just serves as a job which should appear in the restore. 101 sqlDB.Exec(t, `BACKUP data.bank TO 'nodelocal://0/throwawayjob'`) 102 preBackupJobs := sqlDB.QueryStr(t, "SELECT * FROM system.jobs") 103 // Populate system.settings. 104 sqlDB.Exec(t, `SET CLUSTER SETTING kv.bulk_io_write.concurrent_addsstable_requests = 5`) 105 sqlDB.Exec(t, `INSERT INTO system.ui (key, value, "lastUpdated") VALUES ($1, $2, now())`, "some_key", "some_val") 106 // Populate system.comments. 107 sqlDB.Exec(t, `COMMENT ON TABLE data.bank IS 'table comment string'`) 108 sqlDB.Exec(t, `COMMENT ON DATABASE data IS 'database comment string'`) 109 110 sqlDB.Exec(t, 111 `INSERT INTO system.locations ("localityKey", "localityValue", latitude, longitude) VALUES ($1, $2, $3, $4)`, 112 "city", "New York City", 40.71427, -74.00597, 113 ) 114 // Populate system.role_members. 115 sqlDB.Exec(t, `CREATE ROLE system_ops;`) 116 sqlDB.Exec(t, `GRANT CREATE, SELECT ON DATABASE data TO system_ops;`) 117 sqlDB.Exec(t, `GRANT system_ops TO maxroach1;`) 118 119 sqlDB.Exec(t, `CREATE STATISTICS my_stats FROM data.bank`) 120 sqlDB.Exec(t, `BACKUP TO $1`, localFoo) 121 122 // Create a bunch of user tables on the restoring cluster that we're going 123 // to delete. 124 for i := 0; i < 50; i++ { 125 sqlDBRestore.Exec(t, `CREATE DATABASE db_to_drop`) 126 sqlDBRestore.Exec(t, `CREATE TABLE db_to_drop.table_to_drop (a int)`) 127 sqlDBRestore.Exec(t, `ALTER TABLE db_to_drop.table_to_drop CONFIGURE ZONE USING gc.ttlseconds=1`) 128 sqlDBRestore.Exec(t, `DROP DATABASE db_to_drop`) 129 } 130 // Wait for the GC job to finish to ensure the descriptors no longer exist. 131 sqlDBRestore.CheckQueryResultsRetry( 132 t, "SELECT count(*) FROM [SHOW JOBS] WHERE job_type = 'SCHEMA CHANGE GC' AND status = 'running'", 133 [][]string{{"0"}}, 134 ) 135 136 sqlDBRestore.Exec(t, `RESTORE FROM $1`, localFoo) 137 138 t.Run("ensure all databases restored", func(t *testing.T) { 139 sqlDBRestore.CheckQueryResults(t, 140 `SHOW DATABASES`, 141 [][]string{ 142 {"data"}, 143 {"data2"}, 144 {"defaultdb"}, 145 {"postgres"}, 146 {"system"}, 147 }) 148 }) 149 150 t.Run("ensure system table data restored", func(t *testing.T) { 151 // Note the absence of the jobs table. Jobs are tested by another test as 152 // jobs are created during the RESTORE process. 153 systemTablesToVerify := []string{ 154 sqlbase.CommentsTable.Name, 155 sqlbase.LocationsTable.Name, 156 sqlbase.RoleMembersTable.Name, 157 sqlbase.SettingsTable.Name, 158 sqlbase.TableStatisticsTable.Name, 159 sqlbase.UITable.Name, 160 sqlbase.UsersTable.Name, 161 sqlbase.ZonesTable.Name, 162 } 163 164 verificationQueries := make([]string, len(systemTablesToVerify)) 165 // Populate the list of tables we expect to be restored as well as queries 166 // that can be used to ensure that data in those tables is restored. 167 for i, table := range systemTablesToVerify { 168 switch table { 169 case sqlbase.TableStatisticsTable.Name: 170 // createdAt and statisticsID are re-generated on RESTORE. 171 query := fmt.Sprintf("SELECT \"tableID\", name, \"columnIDs\", \"rowCount\" FROM system.table_statistics") 172 verificationQueries[i] = query 173 default: 174 query := fmt.Sprintf("SELECT * FROM system.%s", table) 175 verificationQueries[i] = query 176 } 177 } 178 179 for _, read := range verificationQueries { 180 sqlDBRestore.CheckQueryResults(t, read, sqlDB.QueryStr(t, read)) 181 } 182 }) 183 184 t.Run("ensure table IDs have not changed", func(t *testing.T) { 185 // Check that all tables have been restored. DISTINCT is needed in order to 186 // deal with the inclusion of schemas in the system.namespace table. 187 tableIDCheck := "SELECT DISTINCT name, id FROM system.namespace" 188 sqlDBRestore.CheckQueryResults(t, tableIDCheck, sqlDB.QueryStr(t, tableIDCheck)) 189 }) 190 191 t.Run("ensure user table data restored", func(t *testing.T) { 192 expectedUserTables := [][]string{ 193 {"data", "bank"}, 194 {"data2", "foo"}, 195 {"defaultdb", "foo"}, 196 } 197 198 for _, table := range expectedUserTables { 199 query := fmt.Sprintf("SELECT * FROM %s.%s", table[0], table[1]) 200 sqlDBRestore.CheckQueryResults(t, query, sqlDB.QueryStr(t, query)) 201 } 202 }) 203 204 t.Run("ensure that grants are restored", func(t *testing.T) { 205 grantCheck := "use system; SHOW grants" 206 sqlDBRestore.CheckQueryResults(t, grantCheck, sqlDB.QueryStr(t, grantCheck)) 207 grantCheck = "use data; SHOW grants" 208 sqlDBRestore.CheckQueryResults(t, grantCheck, sqlDB.QueryStr(t, grantCheck)) 209 }) 210 211 t.Run("ensure that jobs are restored", func(t *testing.T) { 212 // Ensure that the jobs in the RESTORE cluster is a superset of the jobs 213 // that were in the BACKUP cluster (before the full cluster BACKUP job was 214 // run). There may be more jobs now because the restore can run jobs of 215 // its own. 216 newJobs := sqlDBRestore.QueryStr(t, "SELECT * FROM system.jobs") 217 for _, oldJob := range preBackupJobs { 218 present := false 219 for _, newJob := range newJobs { 220 if reflect.DeepEqual(oldJob, newJob) { 221 present = true 222 } 223 } 224 if !present { 225 t.Errorf("Expected to find job %+v in RESTORE cluster, but not found", oldJob) 226 } 227 } 228 }) 229 230 t.Run("ensure that tables can be created at the execpted ID", func(t *testing.T) { 231 maxID, err := strconv.Atoi(sqlDBRestore.QueryStr(t, "SELECT max(id) FROM system.namespace")[0][0]) 232 if err != nil { 233 t.Fatal(err) 234 } 235 dbName, tableName := "new_db", "new_table" 236 // N.B. We skip the database ID that was allocated too the temporary 237 // system table and all of the temporary system tables (1 + 8). 238 numIDsToSkip := 9 239 expectedDBID := maxID + numIDsToSkip + 1 240 expectedTableID := maxID + numIDsToSkip + 2 241 sqlDBRestore.Exec(t, fmt.Sprintf("CREATE DATABASE %s", dbName)) 242 sqlDBRestore.Exec(t, fmt.Sprintf("CREATE TABLE %s.%s (a int)", dbName, tableName)) 243 sqlDBRestore.CheckQueryResults( 244 t, fmt.Sprintf("SELECT id FROM system.namespace WHERE name = '%s'", dbName), 245 [][]string{{strconv.Itoa(expectedDBID)}}, 246 ) 247 sqlDBRestore.CheckQueryResults( 248 t, fmt.Sprintf("SELECT id FROM system.namespace WHERE name = '%s'", tableName), 249 [][]string{{strconv.Itoa(expectedTableID)}}, 250 ) 251 }) 252 } 253 254 func TestFullClusterBackupDroppedTables(t *testing.T) { 255 defer leaktest.AfterTest(t)() 256 257 const numAccounts = 10 258 _, _, sqlDB, tempDir, cleanupFn := backupRestoreTestSetup(t, singleNode, numAccounts, initNone) 259 _, _, sqlDBRestore, cleanupEmptyCluster := backupRestoreTestSetupEmpty(t, singleNode, tempDir, initNone) 260 defer cleanupFn() 261 defer cleanupEmptyCluster() 262 263 _, tablesToCheck := generateInterleavedData(sqlDB, t, numAccounts) 264 265 sqlDB.Exec(t, `BACKUP TO $1`, localFoo) 266 sqlDBRestore.Exec(t, `RESTORE FROM $1`, localFoo) 267 268 for _, table := range tablesToCheck { 269 query := fmt.Sprintf("SELECT * FROM data.%s", table) 270 sqlDBRestore.CheckQueryResults(t, query, sqlDB.QueryStr(t, query)) 271 } 272 } 273 274 func TestIncrementalFullClusterBackup(t *testing.T) { 275 defer leaktest.AfterTest(t)() 276 277 const numAccounts = 10 278 const incrementalBackupLocation = "nodelocal://0/inc-full-backup" 279 _, _, sqlDB, tempDir, cleanupFn := backupRestoreTestSetup(t, singleNode, numAccounts, initNone) 280 _, _, sqlDBRestore, cleanupEmptyCluster := backupRestoreTestSetupEmpty(t, singleNode, tempDir, initNone) 281 defer cleanupFn() 282 defer cleanupEmptyCluster() 283 284 sqlDB.Exec(t, `BACKUP TO $1`, localFoo) 285 sqlDB.Exec(t, fmt.Sprintf("CREATE USER maxroach1")) 286 287 sqlDB.Exec(t, `BACKUP TO $1 INCREMENTAL FROM $2`, incrementalBackupLocation, localFoo) 288 sqlDBRestore.Exec(t, `RESTORE FROM $1, $2`, localFoo, incrementalBackupLocation) 289 290 checkQuery := "SELECT * FROM system.users" 291 sqlDBRestore.CheckQueryResults(t, checkQuery, sqlDB.QueryStr(t, checkQuery)) 292 } 293 294 // TestEmptyFullClusterResotre ensures that we can backup and restore a full 295 // cluster backup with only metadata (no user data). Regression test for #49573. 296 func TestEmptyFullClusterRestore(t *testing.T) { 297 defer leaktest.AfterTest(t)() 298 299 sqlDB, tempDir, cleanupFn := createEmptyCluster(t, singleNode) 300 _, _, sqlDBRestore, cleanupEmptyCluster := backupRestoreTestSetupEmpty(t, singleNode, tempDir, initNone) 301 defer cleanupFn() 302 defer cleanupEmptyCluster() 303 304 sqlDB.Exec(t, `CREATE USER alice`) 305 sqlDB.Exec(t, `CREATE USER bob`) 306 sqlDB.Exec(t, `BACKUP TO $1`, localFoo) 307 sqlDBRestore.Exec(t, `RESTORE FROM $1`, localFoo) 308 309 checkQuery := "SELECT * FROM system.users" 310 sqlDBRestore.CheckQueryResults(t, checkQuery, sqlDB.QueryStr(t, checkQuery)) 311 } 312 313 func TestDisallowFullClusterRestoreOnNonFreshCluster(t *testing.T) { 314 defer leaktest.AfterTest(t)() 315 316 const numAccounts = 10 317 _, _, sqlDB, tempDir, cleanupFn := backupRestoreTestSetup(t, singleNode, numAccounts, initNone) 318 _, _, sqlDBRestore, cleanupEmptyCluster := backupRestoreTestSetupEmpty(t, singleNode, tempDir, initNone) 319 defer cleanupFn() 320 defer cleanupEmptyCluster() 321 322 sqlDB.Exec(t, `BACKUP TO $1`, localFoo) 323 sqlDBRestore.Exec(t, `CREATE DATABASE foo`) 324 sqlDBRestore.ExpectErr( 325 t, "pq: full cluster restore can only be run on a cluster with no tables or databases but found 1 descriptors", 326 `RESTORE FROM $1`, localFoo, 327 ) 328 } 329 330 func TestDisallowFullClusterRestoreOfNonFullBackup(t *testing.T) { 331 defer leaktest.AfterTest(t)() 332 333 const numAccounts = 10 334 _, _, sqlDB, tempDir, cleanupFn := backupRestoreTestSetup(t, singleNode, numAccounts, initNone) 335 _, _, sqlDBRestore, cleanupEmptyCluster := backupRestoreTestSetupEmpty(t, singleNode, tempDir, initNone) 336 defer cleanupFn() 337 defer cleanupEmptyCluster() 338 339 sqlDB.Exec(t, `BACKUP data.bank TO $1`, localFoo) 340 sqlDBRestore.ExpectErr( 341 t, "pq: full cluster RESTORE can only be used on full cluster BACKUP files", 342 `RESTORE FROM $1`, localFoo, 343 ) 344 } 345 346 func TestAllowNonFullClusterRestoreOfFullBackup(t *testing.T) { 347 defer leaktest.AfterTest(t)() 348 349 const numAccounts = 10 350 _, _, sqlDB, _, cleanupFn := backupRestoreTestSetup(t, singleNode, numAccounts, initNone) 351 defer cleanupFn() 352 353 sqlDB.Exec(t, `BACKUP TO $1`, localFoo) 354 sqlDB.Exec(t, `CREATE DATABASE data2`) 355 sqlDB.Exec(t, `RESTORE data.bank FROM $1 WITH into_db='data2'`, localFoo) 356 357 checkResults := "SELECT * FROM data.bank" 358 sqlDB.CheckQueryResults(t, checkResults, sqlDB.QueryStr(t, checkResults)) 359 } 360 361 func TestResotreDatabaseFromFullClusterBackup(t *testing.T) { 362 defer leaktest.AfterTest(t)() 363 364 const numAccounts = 10 365 _, _, sqlDB, _, cleanupFn := backupRestoreTestSetup(t, singleNode, numAccounts, initNone) 366 defer cleanupFn() 367 368 sqlDB.Exec(t, `BACKUP TO $1`, localFoo) 369 sqlDB.Exec(t, `DROP DATABASE data`) 370 sqlDB.Exec(t, `RESTORE DATABASE data FROM $1`, localFoo) 371 372 sqlDB.CheckQueryResults(t, "SELECT count(*) FROM data.bank", [][]string{{"10"}}) 373 } 374 375 func TestRestoreSystemTableFromFullClusterBackup(t *testing.T) { 376 defer leaktest.AfterTest(t)() 377 378 const numAccounts = 10 379 _, _, sqlDB, _, cleanupFn := backupRestoreTestSetup(t, singleNode, numAccounts, initNone) 380 defer cleanupFn() 381 382 sqlDB.Exec(t, `CREATE USER maxroach`) 383 sqlDB.Exec(t, `BACKUP TO $1`, localFoo) 384 sqlDB.Exec(t, `CREATE DATABASE temp_sys`) 385 sqlDB.Exec(t, `RESTORE system.users FROM $1 WITH into_db='temp_sys'`, localFoo) 386 387 sqlDB.CheckQueryResults(t, "SELECT * FROM temp_sys.users", sqlDB.QueryStr(t, "SELECT * FROM system.users")) 388 } 389 390 func TestCreateDBAndTableIncrementalFullClusterBackup(t *testing.T) { 391 defer leaktest.AfterTest(t)() 392 393 _, _, sqlDB, _, cleanupFn := backupRestoreTestSetup(t, singleNode, 0, initNone) 394 defer cleanupFn() 395 396 sqlDB.Exec(t, `BACKUP TO $1`, localFoo) 397 sqlDB.Exec(t, `CREATE DATABASE foo`) 398 sqlDB.Exec(t, `CREATE TABLE foo.bar (a int)`) 399 400 // Ensure that the new backup succeeds. 401 sqlDB.Exec(t, `BACKUP TO $1`, localFoo) 402 }