vitess.io/vitess@v0.16.2/go/test/endtoend/backup/vtbackup/backup_only_test.go (about) 1 /* 2 Copyright 2019 The Vitess Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package vtbackup 18 19 import ( 20 "context" 21 "fmt" 22 "os" 23 "path" 24 "strings" 25 "testing" 26 "time" 27 28 "github.com/stretchr/testify/assert" 29 "github.com/stretchr/testify/require" 30 31 "vitess.io/vitess/go/mysql" 32 "vitess.io/vitess/go/test/endtoend/cluster" 33 "vitess.io/vitess/go/vt/log" 34 "vitess.io/vitess/go/vt/mysqlctl" 35 ) 36 37 var ( 38 vtInsertTest = ` 39 create table if not exists vt_insert_test ( 40 id bigint auto_increment, 41 msg varchar(64), 42 primary key (id) 43 ) Engine=InnoDB;` 44 ) 45 46 func TestTabletInitialBackup(t *testing.T) { 47 // Test Initial Backup Flow 48 // TestTabletInitialBackup will: 49 // - Create a shard using vtbackup and --initial-backup 50 // - Create the rest of the cluster restoring from backup 51 // - Externally Reparenting to a primary tablet 52 // - Insert Some data 53 // - Verify that the cluster is working 54 // - Take a Second Backup 55 // - Bring up a second replica, and restore from the second backup 56 // - list the backups, remove them 57 defer cluster.PanicHandler(t) 58 59 vtBackup(t, true, false, false) 60 verifyBackupCount(t, shardKsName, 1) 61 62 // Initialize the tablets 63 initTablets(t, false, false) 64 65 // Restore the Tablets 66 67 restore(t, primary, "replica", "NOT_SERVING") 68 // Vitess expects that the user has set the database into ReadWrite mode before calling 69 // TabletExternallyReparented 70 err := localCluster.VtctlclientProcess.ExecuteCommand( 71 "SetReadWrite", primary.Alias) 72 require.Nil(t, err) 73 err = localCluster.VtctlclientProcess.ExecuteCommand( 74 "TabletExternallyReparented", primary.Alias) 75 require.Nil(t, err) 76 restore(t, replica1, "replica", "SERVING") 77 78 // Run the entire backup test 79 firstBackupTest(t, "replica") 80 81 tearDown(t, true) 82 } 83 84 func TestTabletBackupOnly(t *testing.T) { 85 // Test Backup Flow 86 // TestTabletBackupOnly will: 87 // - Create a shard using regular init & start tablet 88 // - Run InitShardPrimary to start replication 89 // - Insert Some data 90 // - Verify that the cluster is working 91 // - Take a Second Backup 92 // - Bring up a second replica, and restore from the second backup 93 // - list the backups, remove them 94 defer cluster.PanicHandler(t) 95 96 // Reset the tablet object values in order on init tablet in the next step. 97 primary.VttabletProcess.ServingStatus = "NOT_SERVING" 98 replica1.VttabletProcess.ServingStatus = "NOT_SERVING" 99 100 initTablets(t, true, true) 101 firstBackupTest(t, "replica") 102 103 tearDown(t, false) 104 } 105 106 func firstBackupTest(t *testing.T, tabletType string) { 107 // Test First Backup flow. 108 // 109 // firstBackupTest will: 110 // - create a shard with primary and replica1 only 111 // - run InitShardPrimary 112 // - insert some data 113 // - take a backup 114 // - insert more data on the primary 115 // - bring up replica2 after the fact, let it restore the backup 116 // - check all data is right (before+after backup data) 117 // - list the backup, remove it 118 119 // Store initial backup counts 120 backups, err := listBackups(shardKsName) 121 require.Nil(t, err) 122 123 // insert data on primary, wait for replica to get it 124 _, err = primary.VttabletProcess.QueryTablet(vtInsertTest, keyspaceName, true) 125 require.Nil(t, err) 126 // Add a single row with value 'test1' to the primary tablet 127 _, err = primary.VttabletProcess.QueryTablet("insert into vt_insert_test (msg) values ('test1')", keyspaceName, true) 128 require.Nil(t, err) 129 130 // Check that the specified tablet has the expected number of rows 131 cluster.VerifyRowsInTablet(t, replica1, keyspaceName, 1) 132 133 // backup the replica 134 log.Infof("taking backup %s", time.Now()) 135 vtBackup(t, false, true, true) 136 log.Infof("done taking backup %s", time.Now()) 137 138 // check that the backup shows up in the listing 139 verifyBackupCount(t, shardKsName, len(backups)+1) 140 141 // insert more data on the primary 142 _, err = primary.VttabletProcess.QueryTablet("insert into vt_insert_test (msg) values ('test2')", keyspaceName, true) 143 require.Nil(t, err) 144 cluster.VerifyRowsInTablet(t, replica1, keyspaceName, 2) 145 146 // even though we change the value of compression it won't affect 147 // decompression since it gets its value from MANIFEST file, created 148 // as part of backup. 149 mysqlctl.CompressionEngineName = "lz4" 150 defer func() { mysqlctl.CompressionEngineName = "pgzip" }() 151 // now bring up the other replica, letting it restore from backup. 152 err = localCluster.VtctlclientProcess.InitTablet(replica2, cell, keyspaceName, hostname, shardName) 153 require.Nil(t, err) 154 restore(t, replica2, "replica", "SERVING") 155 // Replica2 takes time to serve. Sleeping for 5 sec. 156 time.Sleep(5 * time.Second) 157 //check the new replica has the data 158 cluster.VerifyRowsInTablet(t, replica2, keyspaceName, 2) 159 160 removeBackups(t) 161 verifyBackupCount(t, shardKsName, 0) 162 } 163 164 func vtBackup(t *testing.T, initialBackup bool, restartBeforeBackup, disableRedoLog bool) { 165 mysqlSocket, err := os.CreateTemp("", "vtbackup_test_mysql.sock") 166 require.Nil(t, err) 167 defer os.Remove(mysqlSocket.Name()) 168 169 // Take the back using vtbackup executable 170 extraArgs := []string{ 171 "--allow_first_backup", 172 "--db-credentials-file", dbCredentialFile, 173 "--mysql_socket", mysqlSocket.Name(), 174 } 175 if restartBeforeBackup { 176 extraArgs = append(extraArgs, "--restart_before_backup") 177 } 178 if disableRedoLog { 179 extraArgs = append(extraArgs, "--disable-redo-log") 180 } 181 182 ctx, cancel := context.WithCancel(context.Background()) 183 defer cancel() 184 185 if !initialBackup && disableRedoLog { 186 go verifyDisableEnableRedoLogs(ctx, t, mysqlSocket.Name()) 187 } 188 189 log.Infof("starting backup tablet %s", time.Now()) 190 err = localCluster.StartVtbackup(newInitDBFile, initialBackup, keyspaceName, shardName, cell, extraArgs...) 191 require.Nil(t, err) 192 } 193 194 func verifyBackupCount(t *testing.T, shardKsName string, expected int) []string { 195 backups, err := listBackups(shardKsName) 196 require.Nil(t, err) 197 assert.Equalf(t, expected, len(backups), "invalid number of backups") 198 return backups 199 } 200 201 func listBackups(shardKsName string) ([]string, error) { 202 backups, err := localCluster.VtctlProcess.ExecuteCommandWithOutput( 203 "--backup_storage_implementation", "file", 204 "--file_backup_storage_root", 205 path.Join(os.Getenv("VTDATAROOT"), "tmp", "backupstorage"), 206 "ListBackups", shardKsName, 207 ) 208 if err != nil { 209 return nil, err 210 } 211 result := strings.Split(backups, "\n") 212 var returnResult []string 213 for _, str := range result { 214 if str != "" { 215 returnResult = append(returnResult, str) 216 } 217 } 218 return returnResult, nil 219 } 220 221 func removeBackups(t *testing.T) { 222 // Remove all the backups from the shard 223 backups, err := listBackups(shardKsName) 224 require.Nil(t, err) 225 for _, backup := range backups { 226 _, err := localCluster.VtctlProcess.ExecuteCommandWithOutput( 227 "--backup_storage_implementation", "file", 228 "--file_backup_storage_root", 229 path.Join(os.Getenv("VTDATAROOT"), "tmp", "backupstorage"), 230 "RemoveBackup", shardKsName, backup, 231 ) 232 require.Nil(t, err) 233 } 234 } 235 236 func initTablets(t *testing.T, startTablet bool, initShardPrimary bool) { 237 // Initialize tablets 238 for _, tablet := range []cluster.Vttablet{*primary, *replica1} { 239 err := localCluster.VtctlclientProcess.InitTablet(&tablet, cell, keyspaceName, hostname, shardName) 240 require.Nil(t, err) 241 242 if startTablet { 243 err = tablet.VttabletProcess.Setup() 244 require.Nil(t, err) 245 } 246 } 247 248 if initShardPrimary { 249 // choose primary and start replication 250 err := localCluster.VtctlclientProcess.InitShardPrimary(keyspaceName, shardName, cell, primary.TabletUID) 251 require.Nil(t, err) 252 } 253 } 254 255 func restore(t *testing.T, tablet *cluster.Vttablet, tabletType string, waitForState string) { 256 // Erase mysql/tablet dir, then start tablet with restore enabled. 257 258 log.Infof("restoring tablet %s", time.Now()) 259 resetTabletDirectory(t, *tablet, true) 260 261 err := tablet.VttabletProcess.CreateDB(keyspaceName) 262 require.Nil(t, err) 263 264 // Start tablets 265 tablet.VttabletProcess.ExtraArgs = []string{"--db-credentials-file", dbCredentialFile} 266 tablet.VttabletProcess.TabletType = tabletType 267 tablet.VttabletProcess.ServingStatus = waitForState 268 tablet.VttabletProcess.SupportsBackup = true 269 err = tablet.VttabletProcess.Setup() 270 require.Nil(t, err) 271 } 272 273 func resetTabletDirectory(t *testing.T, tablet cluster.Vttablet, initMysql bool) { 274 extraArgs := []string{"--db-credentials-file", dbCredentialFile} 275 tablet.MysqlctlProcess.ExtraArgs = extraArgs 276 277 // Shutdown Mysql 278 err := tablet.MysqlctlProcess.Stop() 279 require.Nil(t, err) 280 // Teardown Tablet 281 err = tablet.VttabletProcess.TearDown() 282 require.Nil(t, err) 283 284 // Clear out the previous data 285 tablet.MysqlctlProcess.CleanupFiles(tablet.TabletUID) 286 287 if initMysql { 288 // Init the Mysql 289 tablet.MysqlctlProcess.InitDBFile = newInitDBFile 290 err = tablet.MysqlctlProcess.Start() 291 require.Nil(t, err) 292 } 293 } 294 295 func tearDown(t *testing.T, initMysql bool) { 296 // reset replication 297 promoteCommands := "STOP SLAVE; RESET SLAVE ALL; RESET MASTER;" 298 disableSemiSyncCommands := "SET GLOBAL rpl_semi_sync_master_enabled = false; SET GLOBAL rpl_semi_sync_slave_enabled = false" 299 for _, tablet := range []cluster.Vttablet{*primary, *replica1, *replica2} { 300 _, err := tablet.VttabletProcess.QueryTablet(promoteCommands, keyspaceName, true) 301 require.Nil(t, err) 302 _, err = tablet.VttabletProcess.QueryTablet(disableSemiSyncCommands, keyspaceName, true) 303 require.Nil(t, err) 304 for _, db := range []string{"_vt", "vt_insert_test"} { 305 _, err = tablet.VttabletProcess.QueryTablet(fmt.Sprintf("drop database if exists %s", db), keyspaceName, true) 306 require.Nil(t, err) 307 } 308 } 309 310 // TODO: Ideally we should not be resetting the mysql. 311 // So in below code we will have to uncomment the commented code and remove resetTabletDirectory 312 for _, tablet := range []cluster.Vttablet{*primary, *replica1, *replica2} { 313 //Tear down Tablet 314 //err := tablet.VttabletProcess.TearDown() 315 //require.Nil(t, err) 316 317 resetTabletDirectory(t, tablet, initMysql) 318 // DeleteTablet on a primary will cause tablet to shutdown, so should only call it after tablet is already shut down 319 err := localCluster.VtctlclientProcess.ExecuteCommand("DeleteTablet", "--", "--allow_primary", tablet.Alias) 320 require.Nil(t, err) 321 } 322 } 323 324 func verifyDisableEnableRedoLogs(ctx context.Context, t *testing.T, mysqlSocket string) { 325 params := cluster.NewConnParams(0, dbPassword, mysqlSocket, keyspaceName) 326 327 for { 328 select { 329 case <-time.After(100 * time.Millisecond): 330 // Connect to vtbackup mysqld. 331 conn, err := mysql.Connect(ctx, ¶ms) 332 if err != nil { 333 // Keep trying, vtbackup mysqld may not be ready yet. 334 continue 335 } 336 337 // Check if server supports disable/enable redo log. 338 qr, err := conn.ExecuteFetch("SELECT 1 FROM performance_schema.global_status WHERE variable_name = 'innodb_redo_log_enabled'", 1, false) 339 require.Nil(t, err) 340 // If not, there's nothing to test. 341 if len(qr.Rows) == 0 { 342 return 343 } 344 345 // MY-013600 346 // https://dev.mysql.com/doc/mysql-errors/8.0/en/server-error-reference.html#error_er_ib_wrn_redo_disabled 347 qr, err = conn.ExecuteFetch("SELECT 1 FROM performance_schema.error_log WHERE error_code = 'MY-013600'", 1, false) 348 require.Nil(t, err) 349 if len(qr.Rows) != 1 { 350 // Keep trying, possible we haven't disabled yet. 351 continue 352 } 353 354 // MY-013601 355 // https://dev.mysql.com/doc/mysql-errors/8.0/en/server-error-reference.html#error_er_ib_wrn_redo_enabled 356 qr, err = conn.ExecuteFetch("SELECT 1 FROM performance_schema.error_log WHERE error_code = 'MY-013601'", 1, false) 357 require.Nil(t, err) 358 if len(qr.Rows) != 1 { 359 // Keep trying, possible we haven't disabled yet. 360 continue 361 } 362 363 // Success 364 return 365 case <-ctx.Done(): 366 require.Fail(t, "Failed to verify disable/enable redo log.") 367 } 368 } 369 }