vitess.io/vitess@v0.16.2/go/test/endtoend/recovery/unshardedrecovery/recovery.go (about)

     1  /*
     2  Copyright 2020 The Vitess Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package unshardedrecovery
    18  
    19  import (
    20  	"context"
    21  	"flag"
    22  	"fmt"
    23  	"os"
    24  	"os/exec"
    25  	"path"
    26  	"testing"
    27  	"time"
    28  
    29  	"github.com/stretchr/testify/assert"
    30  	"github.com/stretchr/testify/require"
    31  
    32  	"vitess.io/vitess/go/test/endtoend/cluster"
    33  	"vitess.io/vitess/go/test/endtoend/recovery"
    34  	"vitess.io/vitess/go/vt/log"
    35  	"vitess.io/vitess/go/vt/vtgate/vtgateconn"
    36  )
    37  
    38  var (
    39  	primary          *cluster.Vttablet
    40  	replica1         *cluster.Vttablet
    41  	replica2         *cluster.Vttablet
    42  	replica3         *cluster.Vttablet
    43  	localCluster     *cluster.LocalProcessCluster
    44  	newInitDBFile    string
    45  	cell             = cluster.DefaultCell
    46  	hostname         = "localhost"
    47  	keyspaceName     = "ks"
    48  	dbPassword       = "VtDbaPass"
    49  	shardKsName      = fmt.Sprintf("%s/%s", keyspaceName, shardName)
    50  	dbCredentialFile string
    51  	shardName        = "0"
    52  	commonTabletArg  = []string{
    53  		"--vreplication_healthcheck_topology_refresh", "1s",
    54  		"--vreplication_healthcheck_retry_delay", "1s",
    55  		"--vreplication_retry_delay", "1s",
    56  		"--degraded_threshold", "5s",
    57  		"--lock_tables_timeout", "5s",
    58  		"--watch_replication_stream",
    59  		"--serving_state_grace_period", "1s"}
    60  	recoveryKS1  = "recovery_ks1"
    61  	recoveryKS2  = "recovery_ks2"
    62  	vtInsertTest = `create table vt_insert_test (
    63  					  id bigint auto_increment,
    64  					  msg varchar(64),
    65  					  primary key (id)
    66  					  ) Engine=InnoDB`
    67  	vSchema = `{
    68      "tables": {
    69          "vt_insert_test": {}
    70      }
    71  }`
    72  )
    73  
    74  // TestMainImpl creates cluster for unsharded recovery testing.
    75  func TestMainImpl(m *testing.M) {
    76  	defer cluster.PanicHandler(nil)
    77  	flag.Parse()
    78  
    79  	exitCode, err := func() (int, error) {
    80  		localCluster = cluster.NewCluster(cell, hostname)
    81  		defer localCluster.Teardown()
    82  
    83  		// Start topo server
    84  		err := localCluster.StartTopo()
    85  		if err != nil {
    86  			return 1, err
    87  		}
    88  
    89  		// Start keyspace
    90  		keyspace := &cluster.Keyspace{
    91  			Name: keyspaceName,
    92  		}
    93  		localCluster.Keyspaces = append(localCluster.Keyspaces, *keyspace)
    94  
    95  		dbCredentialFile = cluster.WriteDbCredentialToTmp(localCluster.TmpDirectory)
    96  		initDb, _ := os.ReadFile(path.Join(os.Getenv("VTROOT"), "/config/init_db.sql"))
    97  		sql := string(initDb)
    98  		newInitDBFile = path.Join(localCluster.TmpDirectory, "init_db_with_passwords.sql")
    99  		sql = sql + cluster.GetPasswordUpdateSQL(localCluster)
   100  		// https://github.com/vitessio/vitess/issues/8315
   101  		oldAlterTableMode := `
   102  SET GLOBAL old_alter_table = ON;
   103  `
   104  		sql = sql + oldAlterTableMode
   105  		os.WriteFile(newInitDBFile, []byte(sql), 0666)
   106  
   107  		extraArgs := []string{"--db-credentials-file", dbCredentialFile}
   108  		commonTabletArg = append(commonTabletArg, "--db-credentials-file", dbCredentialFile)
   109  
   110  		shard := cluster.Shard{
   111  			Name: shardName,
   112  		}
   113  
   114  		var mysqlProcs []*exec.Cmd
   115  		for i := 0; i < 4; i++ {
   116  			tabletType := "replica"
   117  			if i == 0 {
   118  				tabletType = "primary"
   119  			}
   120  			tablet := localCluster.NewVttabletInstance(tabletType, 0, cell)
   121  			tablet.VttabletProcess = localCluster.VtprocessInstanceFromVttablet(tablet, shard.Name, keyspaceName)
   122  			tablet.VttabletProcess.DbPassword = dbPassword
   123  			tablet.VttabletProcess.ExtraArgs = commonTabletArg
   124  			if recovery.UseXb {
   125  				tablet.VttabletProcess.ExtraArgs = append(tablet.VttabletProcess.ExtraArgs, recovery.XbArgs...)
   126  			}
   127  			tablet.VttabletProcess.SupportsBackup = true
   128  
   129  			tablet.MysqlctlProcess = *cluster.MysqlCtlProcessInstance(tablet.TabletUID, tablet.MySQLPort, localCluster.TmpDirectory)
   130  			tablet.MysqlctlProcess.InitDBFile = newInitDBFile
   131  			tablet.MysqlctlProcess.ExtraArgs = extraArgs
   132  			proc, err := tablet.MysqlctlProcess.StartProcess()
   133  			if err != nil {
   134  				return 1, err
   135  			}
   136  			mysqlProcs = append(mysqlProcs, proc)
   137  
   138  			shard.Vttablets = append(shard.Vttablets, tablet)
   139  		}
   140  		for _, proc := range mysqlProcs {
   141  			if err := proc.Wait(); err != nil {
   142  				return 1, err
   143  			}
   144  		}
   145  		primary = shard.Vttablets[0]
   146  		replica1 = shard.Vttablets[1]
   147  		replica2 = shard.Vttablets[2]
   148  		replica3 = shard.Vttablets[3]
   149  
   150  		for _, tablet := range []cluster.Vttablet{*primary, *replica1} {
   151  			if err := tablet.VttabletProcess.Setup(); err != nil {
   152  				return 1, err
   153  			}
   154  		}
   155  
   156  		vtctldClientProcess := cluster.VtctldClientProcessInstance("localhost", localCluster.VtctldProcess.GrpcPort, localCluster.TmpDirectory)
   157  		_, err = vtctldClientProcess.ExecuteCommandWithOutput("SetKeyspaceDurabilityPolicy", keyspaceName, "--durability-policy=semi_sync")
   158  		if err != nil {
   159  			return 1, err
   160  		}
   161  		if err := localCluster.VtctlclientProcess.InitializeShard(keyspaceName, shard.Name, cell, primary.TabletUID); err != nil {
   162  			return 1, err
   163  		}
   164  		if err := localCluster.StartVTOrc(keyspaceName); err != nil {
   165  			return 1, err
   166  		}
   167  		return m.Run(), nil
   168  	}()
   169  
   170  	if err != nil {
   171  		log.Error(err.Error())
   172  		os.Exit(1)
   173  	} else {
   174  		os.Exit(exitCode)
   175  	}
   176  
   177  }
   178  
   179  // TestRecoveryImpl does following
   180  // - create a shard with primary and replica1 only
   181  // - run InitShardPrimary
   182  // - insert some data
   183  // - take a backup
   184  // - insert more data on the primary
   185  // - take another backup
   186  // - create a recovery keyspace after first backup
   187  // - bring up tablet_replica2 in the new keyspace
   188  // - check that new tablet does not have data created after backup1
   189  // - create second recovery keyspace after second backup
   190  // - bring up tablet_replica3 in second keyspace
   191  // - check that new tablet has data created after backup1 but not data created after backup2
   192  // - check that vtgate queries work correctly
   193  func TestRecoveryImpl(t *testing.T) {
   194  	defer cluster.PanicHandler(t)
   195  	defer tabletsTeardown()
   196  	verifyInitialReplication(t)
   197  
   198  	err := localCluster.VtctlclientProcess.ExecuteCommand("Backup", replica1.Alias)
   199  	assert.NoError(t, err)
   200  
   201  	backups := listBackups(t)
   202  	require.Equal(t, len(backups), 1)
   203  	assert.Contains(t, backups[0], replica1.Alias)
   204  
   205  	_, err = primary.VttabletProcess.QueryTablet("insert into vt_insert_test (msg) values ('test2')", keyspaceName, true)
   206  	assert.NoError(t, err)
   207  	cluster.VerifyRowsInTablet(t, replica1, keyspaceName, 2)
   208  
   209  	err = localCluster.VtctlclientProcess.ApplyVSchema(keyspaceName, vSchema)
   210  	assert.NoError(t, err)
   211  
   212  	output, err := localCluster.VtctlclientProcess.ExecuteCommandWithOutput("GetVSchema", keyspaceName)
   213  	assert.NoError(t, err)
   214  	assert.Contains(t, output, "vt_insert_test")
   215  
   216  	recovery.RestoreTablet(t, localCluster, replica2, recoveryKS1, "0", keyspaceName, commonTabletArg)
   217  
   218  	output, err = localCluster.VtctlclientProcess.ExecuteCommandWithOutput("GetSrvVSchema", cell)
   219  	assert.NoError(t, err)
   220  	assert.Contains(t, output, keyspaceName)
   221  	assert.Contains(t, output, recoveryKS1)
   222  
   223  	err = localCluster.VtctlclientProcess.ExecuteCommand("GetSrvKeyspace", cell, keyspaceName)
   224  	assert.NoError(t, err)
   225  
   226  	output, err = localCluster.VtctlclientProcess.ExecuteCommandWithOutput("GetVSchema", recoveryKS1)
   227  	assert.NoError(t, err)
   228  	assert.Contains(t, output, "vt_insert_test")
   229  
   230  	cluster.VerifyRowsInTablet(t, replica2, keyspaceName, 1)
   231  
   232  	// update the original row in primary
   233  	_, err = primary.VttabletProcess.QueryTablet("update vt_insert_test set msg = 'msgx1' where id = 1", keyspaceName, true)
   234  	assert.NoError(t, err)
   235  
   236  	// verify that primary has new value
   237  	qr, err := primary.VttabletProcess.QueryTablet("select msg from vt_insert_test where id = 1", keyspaceName, true)
   238  	assert.NoError(t, err)
   239  	assert.Equal(t, "msgx1", qr.Rows[0][0].ToString())
   240  
   241  	// verify that restored replica has old value
   242  	qr, err = replica2.VttabletProcess.QueryTablet("select msg from vt_insert_test where id = 1", keyspaceName, true)
   243  	assert.NoError(t, err)
   244  	assert.Equal(t, "test1", qr.Rows[0][0].ToString())
   245  
   246  	err = localCluster.VtctlclientProcess.ExecuteCommand("Backup", replica1.Alias)
   247  	assert.NoError(t, err)
   248  
   249  	_, err = primary.VttabletProcess.QueryTablet("insert into vt_insert_test (msg) values ('test3')", keyspaceName, true)
   250  	assert.NoError(t, err)
   251  	cluster.VerifyRowsInTablet(t, replica1, keyspaceName, 3)
   252  
   253  	recovery.RestoreTablet(t, localCluster, replica3, recoveryKS2, "0", keyspaceName, commonTabletArg)
   254  
   255  	output, err = localCluster.VtctlclientProcess.ExecuteCommandWithOutput("GetVSchema", recoveryKS2)
   256  	assert.NoError(t, err)
   257  	assert.Contains(t, output, "vt_insert_test")
   258  
   259  	cluster.VerifyRowsInTablet(t, replica3, keyspaceName, 2)
   260  
   261  	// update the original row in primary
   262  	_, err = primary.VttabletProcess.QueryTablet("update vt_insert_test set msg = 'msgx2' where id = 1", keyspaceName, true)
   263  	assert.NoError(t, err)
   264  
   265  	// verify that primary has new value
   266  	qr, err = primary.VttabletProcess.QueryTablet("select msg from vt_insert_test where id = 1", keyspaceName, true)
   267  	assert.NoError(t, err)
   268  	assert.Equal(t, "msgx2", qr.Rows[0][0].ToString())
   269  
   270  	// verify that restored replica has old value
   271  	qr, err = replica3.VttabletProcess.QueryTablet("select msg from vt_insert_test where id = 1", keyspaceName, true)
   272  	assert.NoError(t, err)
   273  	assert.Equal(t, "msgx1", qr.Rows[0][0].ToString())
   274  
   275  	vtgateInstance := localCluster.NewVtgateInstance()
   276  	vtgateInstance.TabletTypesToWait = "REPLICA"
   277  	err = vtgateInstance.Setup()
   278  	localCluster.VtgateGrpcPort = vtgateInstance.GrpcPort
   279  	assert.NoError(t, err)
   280  	defer vtgateInstance.TearDown()
   281  	assert.NoError(t, vtgateInstance.WaitForStatusOfTabletInShard(fmt.Sprintf("%s.%s.primary", keyspaceName, shardName), 1, 30*time.Second))
   282  	assert.NoError(t, vtgateInstance.WaitForStatusOfTabletInShard(fmt.Sprintf("%s.%s.replica", keyspaceName, shardName), 1, 30*time.Second))
   283  	assert.NoError(t, vtgateInstance.WaitForStatusOfTabletInShard(fmt.Sprintf("%s.%s.replica", recoveryKS1, shardName), 1, 30*time.Second))
   284  	assert.NoError(t, vtgateInstance.WaitForStatusOfTabletInShard(fmt.Sprintf("%s.%s.replica", recoveryKS2, shardName), 1, 30*time.Second))
   285  
   286  	// Build vtgate grpc connection
   287  	grpcAddress := fmt.Sprintf("%s:%d", localCluster.Hostname, localCluster.VtgateGrpcPort)
   288  	vtgateConn, err := vtgateconn.Dial(context.Background(), grpcAddress)
   289  	assert.NoError(t, err)
   290  	defer vtgateConn.Close()
   291  	session := vtgateConn.Session("@replica", nil)
   292  
   293  	// check that vtgate doesn't route queries to new tablet
   294  	recovery.VerifyQueriesUsingVtgate(t, session, "select count(*) from vt_insert_test", "INT64(3)")
   295  	recovery.VerifyQueriesUsingVtgate(t, session, "select msg from vt_insert_test where id = 1", `VARCHAR("msgx2")`)
   296  	recovery.VerifyQueriesUsingVtgate(t, session, fmt.Sprintf("select count(*) from %s.vt_insert_test", recoveryKS1), "INT64(1)")
   297  	recovery.VerifyQueriesUsingVtgate(t, session, fmt.Sprintf("select msg from %s.vt_insert_test where id = 1", recoveryKS1), `VARCHAR("test1")`)
   298  	recovery.VerifyQueriesUsingVtgate(t, session, fmt.Sprintf("select count(*) from %s.vt_insert_test", recoveryKS2), "INT64(2)")
   299  	recovery.VerifyQueriesUsingVtgate(t, session, fmt.Sprintf("select msg from %s.vt_insert_test where id = 1", recoveryKS2), `VARCHAR("msgx1")`)
   300  
   301  	// check that new keyspace is accessible with 'use ks'
   302  	cluster.ExecuteQueriesUsingVtgate(t, session, "use "+recoveryKS1+"@replica")
   303  	recovery.VerifyQueriesUsingVtgate(t, session, "select count(*) from vt_insert_test", "INT64(1)")
   304  
   305  	cluster.ExecuteQueriesUsingVtgate(t, session, "use "+recoveryKS2+"@replica")
   306  	recovery.VerifyQueriesUsingVtgate(t, session, "select count(*) from vt_insert_test", "INT64(2)")
   307  
   308  	// check that new tablet is accessible with use `ks:shard`
   309  	cluster.ExecuteQueriesUsingVtgate(t, session, "use `"+recoveryKS1+":0@replica`")
   310  	recovery.VerifyQueriesUsingVtgate(t, session, "select count(*) from vt_insert_test", "INT64(1)")
   311  
   312  	cluster.ExecuteQueriesUsingVtgate(t, session, "use `"+recoveryKS2+":0@replica`")
   313  	recovery.VerifyQueriesUsingVtgate(t, session, "select count(*) from vt_insert_test", "INT64(2)")
   314  }
   315  
   316  // verifyInitialReplication will create schema in primary, insert some data to primary and verify the same data in replica.
   317  func verifyInitialReplication(t *testing.T) {
   318  	_, err := primary.VttabletProcess.QueryTablet(vtInsertTest, keyspaceName, true)
   319  	assert.NoError(t, err)
   320  	_, err = primary.VttabletProcess.QueryTablet("insert into vt_insert_test (msg) values ('test1')", keyspaceName, true)
   321  	assert.NoError(t, err)
   322  	cluster.VerifyRowsInTablet(t, replica1, keyspaceName, 1)
   323  }
   324  
   325  func listBackups(t *testing.T) []string {
   326  	output, err := localCluster.ListBackups(shardKsName)
   327  	assert.NoError(t, err)
   328  	return output
   329  }
   330  
   331  func tabletsTeardown() {
   332  	var mysqlProcs []*exec.Cmd
   333  	for _, tablet := range []*cluster.Vttablet{primary, replica1, replica2, replica3} {
   334  		proc, _ := tablet.MysqlctlProcess.StopProcess()
   335  		mysqlProcs = append(mysqlProcs, proc)
   336  		tablet.VttabletProcess.TearDown()
   337  	}
   338  	for _, proc := range mysqlProcs {
   339  		proc.Wait()
   340  	}
   341  }