vitess.io/vitess@v0.16.2/go/test/endtoend/backup/vtbackup/backup_only_test.go (about)

     1  /*
     2  Copyright 2019 The Vitess Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package vtbackup
    18  
    19  import (
    20  	"context"
    21  	"fmt"
    22  	"os"
    23  	"path"
    24  	"strings"
    25  	"testing"
    26  	"time"
    27  
    28  	"github.com/stretchr/testify/assert"
    29  	"github.com/stretchr/testify/require"
    30  
    31  	"vitess.io/vitess/go/mysql"
    32  	"vitess.io/vitess/go/test/endtoend/cluster"
    33  	"vitess.io/vitess/go/vt/log"
    34  	"vitess.io/vitess/go/vt/mysqlctl"
    35  )
    36  
    37  var (
    38  	vtInsertTest = `
    39  		create table if not exists vt_insert_test (
    40  		id bigint auto_increment,
    41  		msg varchar(64),
    42  		primary key (id)
    43  		) Engine=InnoDB;`
    44  )
    45  
    46  func TestTabletInitialBackup(t *testing.T) {
    47  	// Test Initial Backup Flow
    48  	//    TestTabletInitialBackup will:
    49  	//    - Create a shard using vtbackup and --initial-backup
    50  	//    - Create the rest of the cluster restoring from backup
    51  	//    - Externally Reparenting to a primary tablet
    52  	//    - Insert Some data
    53  	//    - Verify that the cluster is working
    54  	//    - Take a Second Backup
    55  	//    - Bring up a second replica, and restore from the second backup
    56  	//    - list the backups, remove them
    57  	defer cluster.PanicHandler(t)
    58  
    59  	vtBackup(t, true, false, false)
    60  	verifyBackupCount(t, shardKsName, 1)
    61  
    62  	// Initialize the tablets
    63  	initTablets(t, false, false)
    64  
    65  	// Restore the Tablets
    66  
    67  	restore(t, primary, "replica", "NOT_SERVING")
    68  	// Vitess expects that the user has set the database into ReadWrite mode before calling
    69  	// TabletExternallyReparented
    70  	err := localCluster.VtctlclientProcess.ExecuteCommand(
    71  		"SetReadWrite", primary.Alias)
    72  	require.Nil(t, err)
    73  	err = localCluster.VtctlclientProcess.ExecuteCommand(
    74  		"TabletExternallyReparented", primary.Alias)
    75  	require.Nil(t, err)
    76  	restore(t, replica1, "replica", "SERVING")
    77  
    78  	// Run the entire backup test
    79  	firstBackupTest(t, "replica")
    80  
    81  	tearDown(t, true)
    82  }
    83  
    84  func TestTabletBackupOnly(t *testing.T) {
    85  	// Test Backup Flow
    86  	//    TestTabletBackupOnly will:
    87  	//    - Create a shard using regular init & start tablet
    88  	//    - Run InitShardPrimary to start replication
    89  	//    - Insert Some data
    90  	//    - Verify that the cluster is working
    91  	//    - Take a Second Backup
    92  	//    - Bring up a second replica, and restore from the second backup
    93  	//    - list the backups, remove them
    94  	defer cluster.PanicHandler(t)
    95  
    96  	// Reset the tablet object values in order on init tablet in the next step.
    97  	primary.VttabletProcess.ServingStatus = "NOT_SERVING"
    98  	replica1.VttabletProcess.ServingStatus = "NOT_SERVING"
    99  
   100  	initTablets(t, true, true)
   101  	firstBackupTest(t, "replica")
   102  
   103  	tearDown(t, false)
   104  }
   105  
   106  func firstBackupTest(t *testing.T, tabletType string) {
   107  	// Test First Backup flow.
   108  	//
   109  	//    firstBackupTest will:
   110  	//    - create a shard with primary and replica1 only
   111  	//    - run InitShardPrimary
   112  	//    - insert some data
   113  	//    - take a backup
   114  	//    - insert more data on the primary
   115  	//    - bring up replica2 after the fact, let it restore the backup
   116  	//    - check all data is right (before+after backup data)
   117  	//    - list the backup, remove it
   118  
   119  	// Store initial backup counts
   120  	backups, err := listBackups(shardKsName)
   121  	require.Nil(t, err)
   122  
   123  	// insert data on primary, wait for replica to get it
   124  	_, err = primary.VttabletProcess.QueryTablet(vtInsertTest, keyspaceName, true)
   125  	require.Nil(t, err)
   126  	// Add a single row with value 'test1' to the primary tablet
   127  	_, err = primary.VttabletProcess.QueryTablet("insert into vt_insert_test (msg) values ('test1')", keyspaceName, true)
   128  	require.Nil(t, err)
   129  
   130  	// Check that the specified tablet has the expected number of rows
   131  	cluster.VerifyRowsInTablet(t, replica1, keyspaceName, 1)
   132  
   133  	// backup the replica
   134  	log.Infof("taking backup %s", time.Now())
   135  	vtBackup(t, false, true, true)
   136  	log.Infof("done taking backup %s", time.Now())
   137  
   138  	// check that the backup shows up in the listing
   139  	verifyBackupCount(t, shardKsName, len(backups)+1)
   140  
   141  	// insert more data on the primary
   142  	_, err = primary.VttabletProcess.QueryTablet("insert into vt_insert_test (msg) values ('test2')", keyspaceName, true)
   143  	require.Nil(t, err)
   144  	cluster.VerifyRowsInTablet(t, replica1, keyspaceName, 2)
   145  
   146  	// even though we change the value of compression it won't affect
   147  	// decompression since it gets its value from MANIFEST file, created
   148  	// as part of backup.
   149  	mysqlctl.CompressionEngineName = "lz4"
   150  	defer func() { mysqlctl.CompressionEngineName = "pgzip" }()
   151  	// now bring up the other replica, letting it restore from backup.
   152  	err = localCluster.VtctlclientProcess.InitTablet(replica2, cell, keyspaceName, hostname, shardName)
   153  	require.Nil(t, err)
   154  	restore(t, replica2, "replica", "SERVING")
   155  	// Replica2 takes time to serve. Sleeping for 5 sec.
   156  	time.Sleep(5 * time.Second)
   157  	//check the new replica has the data
   158  	cluster.VerifyRowsInTablet(t, replica2, keyspaceName, 2)
   159  
   160  	removeBackups(t)
   161  	verifyBackupCount(t, shardKsName, 0)
   162  }
   163  
   164  func vtBackup(t *testing.T, initialBackup bool, restartBeforeBackup, disableRedoLog bool) {
   165  	mysqlSocket, err := os.CreateTemp("", "vtbackup_test_mysql.sock")
   166  	require.Nil(t, err)
   167  	defer os.Remove(mysqlSocket.Name())
   168  
   169  	// Take the back using vtbackup executable
   170  	extraArgs := []string{
   171  		"--allow_first_backup",
   172  		"--db-credentials-file", dbCredentialFile,
   173  		"--mysql_socket", mysqlSocket.Name(),
   174  	}
   175  	if restartBeforeBackup {
   176  		extraArgs = append(extraArgs, "--restart_before_backup")
   177  	}
   178  	if disableRedoLog {
   179  		extraArgs = append(extraArgs, "--disable-redo-log")
   180  	}
   181  
   182  	ctx, cancel := context.WithCancel(context.Background())
   183  	defer cancel()
   184  
   185  	if !initialBackup && disableRedoLog {
   186  		go verifyDisableEnableRedoLogs(ctx, t, mysqlSocket.Name())
   187  	}
   188  
   189  	log.Infof("starting backup tablet %s", time.Now())
   190  	err = localCluster.StartVtbackup(newInitDBFile, initialBackup, keyspaceName, shardName, cell, extraArgs...)
   191  	require.Nil(t, err)
   192  }
   193  
   194  func verifyBackupCount(t *testing.T, shardKsName string, expected int) []string {
   195  	backups, err := listBackups(shardKsName)
   196  	require.Nil(t, err)
   197  	assert.Equalf(t, expected, len(backups), "invalid number of backups")
   198  	return backups
   199  }
   200  
   201  func listBackups(shardKsName string) ([]string, error) {
   202  	backups, err := localCluster.VtctlProcess.ExecuteCommandWithOutput(
   203  		"--backup_storage_implementation", "file",
   204  		"--file_backup_storage_root",
   205  		path.Join(os.Getenv("VTDATAROOT"), "tmp", "backupstorage"),
   206  		"ListBackups", shardKsName,
   207  	)
   208  	if err != nil {
   209  		return nil, err
   210  	}
   211  	result := strings.Split(backups, "\n")
   212  	var returnResult []string
   213  	for _, str := range result {
   214  		if str != "" {
   215  			returnResult = append(returnResult, str)
   216  		}
   217  	}
   218  	return returnResult, nil
   219  }
   220  
   221  func removeBackups(t *testing.T) {
   222  	// Remove all the backups from the shard
   223  	backups, err := listBackups(shardKsName)
   224  	require.Nil(t, err)
   225  	for _, backup := range backups {
   226  		_, err := localCluster.VtctlProcess.ExecuteCommandWithOutput(
   227  			"--backup_storage_implementation", "file",
   228  			"--file_backup_storage_root",
   229  			path.Join(os.Getenv("VTDATAROOT"), "tmp", "backupstorage"),
   230  			"RemoveBackup", shardKsName, backup,
   231  		)
   232  		require.Nil(t, err)
   233  	}
   234  }
   235  
   236  func initTablets(t *testing.T, startTablet bool, initShardPrimary bool) {
   237  	// Initialize tablets
   238  	for _, tablet := range []cluster.Vttablet{*primary, *replica1} {
   239  		err := localCluster.VtctlclientProcess.InitTablet(&tablet, cell, keyspaceName, hostname, shardName)
   240  		require.Nil(t, err)
   241  
   242  		if startTablet {
   243  			err = tablet.VttabletProcess.Setup()
   244  			require.Nil(t, err)
   245  		}
   246  	}
   247  
   248  	if initShardPrimary {
   249  		// choose primary and start replication
   250  		err := localCluster.VtctlclientProcess.InitShardPrimary(keyspaceName, shardName, cell, primary.TabletUID)
   251  		require.Nil(t, err)
   252  	}
   253  }
   254  
   255  func restore(t *testing.T, tablet *cluster.Vttablet, tabletType string, waitForState string) {
   256  	// Erase mysql/tablet dir, then start tablet with restore enabled.
   257  
   258  	log.Infof("restoring tablet %s", time.Now())
   259  	resetTabletDirectory(t, *tablet, true)
   260  
   261  	err := tablet.VttabletProcess.CreateDB(keyspaceName)
   262  	require.Nil(t, err)
   263  
   264  	// Start tablets
   265  	tablet.VttabletProcess.ExtraArgs = []string{"--db-credentials-file", dbCredentialFile}
   266  	tablet.VttabletProcess.TabletType = tabletType
   267  	tablet.VttabletProcess.ServingStatus = waitForState
   268  	tablet.VttabletProcess.SupportsBackup = true
   269  	err = tablet.VttabletProcess.Setup()
   270  	require.Nil(t, err)
   271  }
   272  
   273  func resetTabletDirectory(t *testing.T, tablet cluster.Vttablet, initMysql bool) {
   274  	extraArgs := []string{"--db-credentials-file", dbCredentialFile}
   275  	tablet.MysqlctlProcess.ExtraArgs = extraArgs
   276  
   277  	// Shutdown Mysql
   278  	err := tablet.MysqlctlProcess.Stop()
   279  	require.Nil(t, err)
   280  	// Teardown Tablet
   281  	err = tablet.VttabletProcess.TearDown()
   282  	require.Nil(t, err)
   283  
   284  	// Clear out the previous data
   285  	tablet.MysqlctlProcess.CleanupFiles(tablet.TabletUID)
   286  
   287  	if initMysql {
   288  		// Init the Mysql
   289  		tablet.MysqlctlProcess.InitDBFile = newInitDBFile
   290  		err = tablet.MysqlctlProcess.Start()
   291  		require.Nil(t, err)
   292  	}
   293  }
   294  
   295  func tearDown(t *testing.T, initMysql bool) {
   296  	// reset replication
   297  	promoteCommands := "STOP SLAVE; RESET SLAVE ALL; RESET MASTER;"
   298  	disableSemiSyncCommands := "SET GLOBAL rpl_semi_sync_master_enabled = false; SET GLOBAL rpl_semi_sync_slave_enabled = false"
   299  	for _, tablet := range []cluster.Vttablet{*primary, *replica1, *replica2} {
   300  		_, err := tablet.VttabletProcess.QueryTablet(promoteCommands, keyspaceName, true)
   301  		require.Nil(t, err)
   302  		_, err = tablet.VttabletProcess.QueryTablet(disableSemiSyncCommands, keyspaceName, true)
   303  		require.Nil(t, err)
   304  		for _, db := range []string{"_vt", "vt_insert_test"} {
   305  			_, err = tablet.VttabletProcess.QueryTablet(fmt.Sprintf("drop database if exists %s", db), keyspaceName, true)
   306  			require.Nil(t, err)
   307  		}
   308  	}
   309  
   310  	// TODO: Ideally we should not be resetting the mysql.
   311  	// So in below code we will have to uncomment the commented code and remove resetTabletDirectory
   312  	for _, tablet := range []cluster.Vttablet{*primary, *replica1, *replica2} {
   313  		//Tear down Tablet
   314  		//err := tablet.VttabletProcess.TearDown()
   315  		//require.Nil(t, err)
   316  
   317  		resetTabletDirectory(t, tablet, initMysql)
   318  		// DeleteTablet on a primary will cause tablet to shutdown, so should only call it after tablet is already shut down
   319  		err := localCluster.VtctlclientProcess.ExecuteCommand("DeleteTablet", "--", "--allow_primary", tablet.Alias)
   320  		require.Nil(t, err)
   321  	}
   322  }
   323  
   324  func verifyDisableEnableRedoLogs(ctx context.Context, t *testing.T, mysqlSocket string) {
   325  	params := cluster.NewConnParams(0, dbPassword, mysqlSocket, keyspaceName)
   326  
   327  	for {
   328  		select {
   329  		case <-time.After(100 * time.Millisecond):
   330  			// Connect to vtbackup mysqld.
   331  			conn, err := mysql.Connect(ctx, &params)
   332  			if err != nil {
   333  				// Keep trying, vtbackup mysqld may not be ready yet.
   334  				continue
   335  			}
   336  
   337  			// Check if server supports disable/enable redo log.
   338  			qr, err := conn.ExecuteFetch("SELECT 1 FROM performance_schema.global_status WHERE variable_name = 'innodb_redo_log_enabled'", 1, false)
   339  			require.Nil(t, err)
   340  			// If not, there's nothing to test.
   341  			if len(qr.Rows) == 0 {
   342  				return
   343  			}
   344  
   345  			// MY-013600
   346  			// https://dev.mysql.com/doc/mysql-errors/8.0/en/server-error-reference.html#error_er_ib_wrn_redo_disabled
   347  			qr, err = conn.ExecuteFetch("SELECT 1 FROM performance_schema.error_log WHERE error_code = 'MY-013600'", 1, false)
   348  			require.Nil(t, err)
   349  			if len(qr.Rows) != 1 {
   350  				// Keep trying, possible we haven't disabled yet.
   351  				continue
   352  			}
   353  
   354  			// MY-013601
   355  			// https://dev.mysql.com/doc/mysql-errors/8.0/en/server-error-reference.html#error_er_ib_wrn_redo_enabled
   356  			qr, err = conn.ExecuteFetch("SELECT 1 FROM performance_schema.error_log WHERE error_code = 'MY-013601'", 1, false)
   357  			require.Nil(t, err)
   358  			if len(qr.Rows) != 1 {
   359  				// Keep trying, possible we haven't disabled yet.
   360  				continue
   361  			}
   362  
   363  			// Success
   364  			return
   365  		case <-ctx.Done():
   366  			require.Fail(t, "Failed to verify disable/enable redo log.")
   367  		}
   368  	}
   369  }