vitess.io/vitess@v0.16.2/go/test/endtoend/recovery/pitr/shardedpitr_test.go (about)

     1  /*
     2  Copyright 2020 The Vitess Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package pitr
    18  
    19  import (
    20  	"context"
    21  	"fmt"
    22  	"os/exec"
    23  	"testing"
    24  	"time"
    25  
    26  	"github.com/buger/jsonparser"
    27  	"github.com/stretchr/testify/assert"
    28  	"github.com/stretchr/testify/require"
    29  
    30  	"vitess.io/vitess/go/mysql"
    31  	"vitess.io/vitess/go/test/endtoend/cluster"
    32  	"vitess.io/vitess/go/vt/log"
    33  )
    34  
    35  var (
    36  	createTable = `create table product (id bigint(20) primary key, name char(10), created bigint(20));`
    37  	insertTable = `insert into product (id, name, created) values(%d, '%s', unix_timestamp());`
    38  	getCountID  = `select count(*) from product`
    39  )
    40  
    41  var (
    42  	clusterInstance *cluster.LocalProcessCluster
    43  
    44  	primary        *cluster.Vttablet
    45  	replica1       *cluster.Vttablet
    46  	replica2       *cluster.Vttablet
    47  	shard0Primary  *cluster.Vttablet
    48  	shard0Replica1 *cluster.Vttablet
    49  	shard0Replica2 *cluster.Vttablet
    50  	shard1Primary  *cluster.Vttablet
    51  	shard1Replica1 *cluster.Vttablet
    52  	shard1Replica2 *cluster.Vttablet
    53  
    54  	cell           = "zone1"
    55  	hostname       = "localhost"
    56  	binlogHost     = "127.0.0.1"
    57  	keyspaceName   = "ks"
    58  	restoreKS1Name = "restoreks1"
    59  	restoreKS2Name = "restoreks2"
    60  	restoreKS3Name = "restoreks3"
    61  	shardName      = "0"
    62  	shard0Name     = "-80"
    63  	shard1Name     = "80-"
    64  	dbName         = "vt_ks"
    65  	mysqlUserName  = "vt_dba"
    66  	mysqlPassword  = "password"
    67  	vSchema        = `{
    68  		"sharded": true,
    69  		"vindexes": {
    70  			"hash_index": {
    71  				"type": "hash"
    72  			}
    73  		},
    74  		"tables": {
    75  			"product": {
    76  				"column_vindexes": [
    77  					{
    78  						"column": "id",
    79  						"name": "hash_index"
    80  					}
    81  				]
    82  			}
    83  		}
    84  	}`
    85  	commonTabletArg = []string{
    86  		"--vreplication_healthcheck_topology_refresh", "1s",
    87  		"--vreplication_healthcheck_retry_delay", "1s",
    88  		"--vreplication_retry_delay", "1s",
    89  		"--degraded_threshold", "5s",
    90  		"--lock_tables_timeout", "5s",
    91  		"--watch_replication_stream",
    92  		"--serving_state_grace_period", "1s"}
    93  
    94  	defaultTimeout = 30 * time.Second
    95  	defaultTick    = 1 * time.Second
    96  )
    97  
    98  // Test pitr (Point in time recovery).
    99  // -------------------------------------------
   100  // The following test will:
   101  // - create a shard with primary and replica
   102  // - run InitShardPrimary
   103  // - point binlog server to primary
   104  // - insert some data using vtgate (e.g. here we have inserted rows 1,2)
   105  // - verify the replication
   106  // - take backup of replica
   107  // - insert some data using vtgate (e.g. we inserted rows 3 4 5 6), while inserting row-4, note down the time (restoreTime1)
   108  // - perform a resharding to create 2 shards (-80, 80-), and delete the old shard
   109  // - point binlog server to primary of both shards
   110  // - insert some data using vtgate (e.g. we will insert 7 8 9 10) and verify we get required number of rows in -80, 80- shard
   111  // - take backup of both shards
   112  // - insert some more data using vtgate (e.g. we will insert 11 12 13 14 15), while inserting row-13, note down the time (restoreTime2)
   113  // - note down the current time (restoreTime3)
   114  
   115  // - Till now we did all the presetup for assertions
   116  
   117  // - asserting that restoring to restoreTime1 (going from 2 shards to 1 shard) is working, i.e. we should get 4 rows.
   118  // - asserting that while restoring if we give small timeout value, it will restore upto to the last available backup (asserting only -80 shard)
   119  // - asserting that restoring to restoreTime2 (going from 2 shards to 2 shards with past time) is working, it will assert for both shards
   120  // - asserting that restoring to restoreTime3 is working, we should get complete data after restoring,  as we have in existing shards.
   121  func TestPITRRecovery(t *testing.T) {
   122  	defer cluster.PanicHandler(nil)
   123  	initializeCluster(t)
   124  	defer clusterInstance.Teardown()
   125  
   126  	//start the binlog server and point it to primary
   127  	bs := startBinlogServer(t, primary)
   128  	defer bs.stop()
   129  
   130  	// Creating the table
   131  	_, err := primary.VttabletProcess.QueryTablet(createTable, keyspaceName, true)
   132  	require.NoError(t, err)
   133  
   134  	insertRow(t, 1, "prd-1", false)
   135  	insertRow(t, 2, "prd-2", false)
   136  
   137  	cluster.VerifyRowsInTabletForTable(t, replica1, keyspaceName, 2, "product")
   138  
   139  	// backup the replica
   140  	err = clusterInstance.VtctlclientProcess.ExecuteCommand("Backup", replica1.Alias)
   141  	require.NoError(t, err)
   142  
   143  	// check that the backup shows up in the listing
   144  	output, err := clusterInstance.ListBackups("ks/0")
   145  	require.NoError(t, err)
   146  	assert.Equal(t, 1, len(output))
   147  
   148  	// now insert some more data to simulate the changes after regular backup
   149  	// every insert has some time lag/difference to simulate the time gap between rows
   150  	// and when we recover to certain time, this time gap will be able to identify the exact eligible row
   151  	var restoreTime1 string
   152  	for counter := 3; counter <= 6; counter++ {
   153  		if counter == 4 { // we want to recovery till this, so noting the time
   154  			tm := time.Now().Add(1 * time.Second).UTC()
   155  			restoreTime1 = tm.Format(time.RFC3339)
   156  		}
   157  		insertRow(t, counter, fmt.Sprintf("prd-%d", counter), true)
   158  	}
   159  
   160  	// starting resharding process
   161  	performResharding(t)
   162  
   163  	//start the binlog server and point it to shard0Primary
   164  	bs0 := startBinlogServer(t, shard0Primary)
   165  	defer bs0.stop()
   166  
   167  	//start the binlog server and point it to shard1Primary
   168  	bs1 := startBinlogServer(t, shard1Primary)
   169  	defer bs1.stop()
   170  
   171  	for counter := 7; counter <= 10; counter++ {
   172  		insertRow(t, counter, fmt.Sprintf("prd-%d", counter), false)
   173  	}
   174  
   175  	// wait till all the shards have required data
   176  	cluster.VerifyRowsInTabletForTable(t, shard0Replica1, keyspaceName, 6, "product")
   177  	cluster.VerifyRowsInTabletForTable(t, shard1Replica1, keyspaceName, 4, "product")
   178  
   179  	// take the backup (to simulate the regular backup)
   180  	err = clusterInstance.VtctlclientProcess.ExecuteCommand("Backup", shard0Replica1.Alias)
   181  	require.NoError(t, err)
   182  	// take the backup (to simulate the regular backup)
   183  	err = clusterInstance.VtctlclientProcess.ExecuteCommand("Backup", shard1Replica1.Alias)
   184  	require.NoError(t, err)
   185  
   186  	backups, err := clusterInstance.ListBackups(keyspaceName + "/-80")
   187  	require.NoError(t, err)
   188  	require.Equal(t, len(backups), 1)
   189  
   190  	backups, err = clusterInstance.ListBackups(keyspaceName + "/80-")
   191  	require.NoError(t, err)
   192  	require.Equal(t, len(backups), 1)
   193  
   194  	// now insert some more data to simulate the changes after regular backup
   195  	// every insert has some time lag/difference to simulate the time gap between rows
   196  	// and when we recover to certain time, this time gap will be able to identify the exact eligible row
   197  	var restoreTime2 string
   198  	for counter := 11; counter <= 15; counter++ {
   199  		if counter == 13 { // we want to recovery till this, so noting the time
   200  			tm := time.Now().Add(1 * time.Second).UTC()
   201  			restoreTime2 = tm.Format(time.RFC3339)
   202  		}
   203  		insertRow(t, counter, fmt.Sprintf("prd-%d", counter), true)
   204  	}
   205  	restoreTime3 := time.Now().UTC().Format(time.RFC3339)
   206  
   207  	// creating restore keyspace with snapshot time as restoreTime1
   208  	createRestoreKeyspace(t, restoreTime1, restoreKS1Name)
   209  
   210  	// Launching a recovery tablet which recovers data from the primary till the restoreTime1
   211  	testTabletRecovery(t, bs, "2m", restoreKS1Name, "0", "INT64(4)")
   212  
   213  	// create restoreKeyspace with snapshot time as restoreTime2
   214  	createRestoreKeyspace(t, restoreTime2, restoreKS2Name)
   215  
   216  	// test the recovery with smaller binlog_lookup_timeout for shard0
   217  	// since we have small lookup timeout, it will just get whatever available in the backup
   218  	// mysql> select * from product;
   219  	// +----+--------+------------+
   220  	// | id | name   | created    |
   221  	// +----+--------+------------+
   222  	// |  1 | prd-1  | 1597219030 |
   223  	// |  2 | prd-2  | 1597219030 |
   224  	// |  3 | prd-3  | 1597219043 |
   225  	// |  5 | prd-5  | 1597219045 |
   226  	// |  9 | prd-9  | 1597219130 |
   227  	// | 10 | prd-10 | 1597219130 |
   228  	// +----+--------+------------+
   229  	testTabletRecovery(t, bs0, "1ms", restoreKS2Name, "-80", "INT64(6)")
   230  
   231  	// test the recovery with valid binlog_lookup_timeout for shard0 and getting the data till the restoreTime2
   232  	// 	mysql> select * from product;
   233  	// +----+--------+------------+
   234  	// | id | name   | created    |
   235  	// +----+--------+------------+
   236  	// |  1 | prd-1  | 1597219030 |
   237  	// |  2 | prd-2  | 1597219030 |
   238  	// |  3 | prd-3  | 1597219043 |
   239  	// |  5 | prd-5  | 1597219045 |
   240  	// |  9 | prd-9  | 1597219130 |
   241  	// | 10 | prd-10 | 1597219130 |
   242  	// | 13 | prd-13 | 1597219141 |
   243  	// +----+--------+------------+
   244  	testTabletRecovery(t, bs0, "2m", restoreKS2Name, "-80", "INT64(7)")
   245  
   246  	// test the recovery with valid binlog_lookup_timeout for shard1 and getting the data till the restoreTime2
   247  	// 	mysql> select * from product;
   248  	// +----+--------+------------+
   249  	// | id | name   | created    |
   250  	// +----+--------+------------+
   251  	// |  4 | prd-4  | 1597219044 |
   252  	// |  6 | prd-6  | 1597219046 |
   253  	// |  7 | prd-7  | 1597219130 |
   254  	// |  8 | prd-8  | 1597219130 |
   255  	// | 11 | prd-11 | 1597219139 |
   256  	// | 12 | prd-12 | 1597219140 |
   257  	// +----+--------+------------+
   258  	testTabletRecovery(t, bs1, "2m", restoreKS2Name, "80-", "INT64(6)")
   259  
   260  	// test the recovery with timetorecover > (timestmap of last binlog event in binlog server)
   261  	createRestoreKeyspace(t, restoreTime3, restoreKS3Name)
   262  
   263  	// 	mysql> select * from product;
   264  	// +----+--------+------------+
   265  	// | id | name   | created    |
   266  	// +----+--------+------------+
   267  	// |  1 | prd-1  | 1597219030 |
   268  	// |  2 | prd-2  | 1597219030 |
   269  	// |  3 | prd-3  | 1597219043 |
   270  	// |  5 | prd-5  | 1597219045 |
   271  	// |  9 | prd-9  | 1597219130 |
   272  	// | 10 | prd-10 | 1597219130 |
   273  	// | 13 | prd-13 | 1597219141 |
   274  	// | 15 | prd-15 | 1597219142 |
   275  	// +----+--------+------------+
   276  	testTabletRecovery(t, bs0, "2m", restoreKS3Name, "-80", "INT64(8)")
   277  
   278  	// 	mysql> select * from product;
   279  	// +----+--------+------------+
   280  	// | id | name   | created    |
   281  	// +----+--------+------------+
   282  	// |  4 | prd-4  | 1597219044 |
   283  	// |  6 | prd-6  | 1597219046 |
   284  	// |  7 | prd-7  | 1597219130 |
   285  	// |  8 | prd-8  | 1597219130 |
   286  	// | 11 | prd-11 | 1597219139 |
   287  	// | 12 | prd-12 | 1597219140 |
   288  	// | 14 | prd-14 | 1597219142 |
   289  	// +----+--------+------------+
   290  	testTabletRecovery(t, bs1, "2m", restoreKS3Name, "80-", "INT64(7)")
   291  }
   292  
   293  func performResharding(t *testing.T) {
   294  	err := clusterInstance.VtctlclientProcess.ApplyVSchema(keyspaceName, vSchema)
   295  	require.NoError(t, err)
   296  
   297  	err = clusterInstance.VtctlclientProcess.ExecuteCommand("Reshard", "--", "--source_shards=0", "--target_shards=-80,80-", "Create", "ks.reshardWorkflow")
   298  	require.NoError(t, err)
   299  
   300  	waitTimeout := 30 * time.Second
   301  	shard0Primary.VttabletProcess.WaitForVReplicationToCatchup(t, "ks.reshardWorkflow", dbName, waitTimeout)
   302  	shard1Primary.VttabletProcess.WaitForVReplicationToCatchup(t, "ks.reshardWorkflow", dbName, waitTimeout)
   303  
   304  	waitForNoWorkflowLag(t, clusterInstance, "ks.reshardWorkflow")
   305  
   306  	err = clusterInstance.VtctlclientProcess.ExecuteCommand("Reshard", "--", "--tablet_types=rdonly", "SwitchTraffic", "ks.reshardWorkflow")
   307  	require.NoError(t, err)
   308  
   309  	err = clusterInstance.VtctlclientProcess.ExecuteCommand("Reshard", "--", "--tablet_types=replica", "SwitchTraffic", "ks.reshardWorkflow")
   310  	require.NoError(t, err)
   311  
   312  	// then serve primary from the split shards
   313  	err = clusterInstance.VtctlclientProcess.ExecuteCommand("Reshard", "--", "--tablet_types=primary", "SwitchTraffic", "ks.reshardWorkflow")
   314  	require.NoError(t, err)
   315  
   316  	// remove the original tablets in the original shard
   317  	removeTablets(t, []*cluster.Vttablet{primary, replica1, replica2})
   318  
   319  	for _, tablet := range []*cluster.Vttablet{replica1, replica2} {
   320  		err = clusterInstance.VtctlclientProcess.ExecuteCommand("DeleteTablet", tablet.Alias)
   321  		require.NoError(t, err)
   322  	}
   323  	err = clusterInstance.VtctlclientProcess.ExecuteCommand("DeleteTablet", "--", "--allow_primary", primary.Alias)
   324  	require.NoError(t, err)
   325  
   326  	// rebuild the serving graph, all mentions of the old shards should be gone
   327  	err = clusterInstance.VtctlclientProcess.ExecuteCommand("RebuildKeyspaceGraph", "ks")
   328  	require.NoError(t, err)
   329  
   330  	// delete the original shard
   331  	err = clusterInstance.VtctlclientProcess.ExecuteCommand("DeleteShard", "ks/0")
   332  	require.NoError(t, err)
   333  
   334  	// Restart vtgate process
   335  	err = clusterInstance.VtgateProcess.TearDown()
   336  	require.NoError(t, err)
   337  
   338  	err = clusterInstance.VtgateProcess.Setup()
   339  	require.NoError(t, err)
   340  
   341  	clusterInstance.WaitForTabletsToHealthyInVtgate()
   342  }
   343  
   344  func startBinlogServer(t *testing.T, primaryTablet *cluster.Vttablet) *binLogServer {
   345  	bs, err := newBinlogServer(hostname, clusterInstance.GetAndReservePort())
   346  	require.NoError(t, err)
   347  
   348  	err = bs.start(mysqlSource{
   349  		hostname: binlogHost,
   350  		port:     primaryTablet.MysqlctlProcess.MySQLPort,
   351  		username: mysqlUserName,
   352  		password: mysqlPassword,
   353  	})
   354  	require.NoError(t, err)
   355  	return bs
   356  }
   357  
   358  func removeTablets(t *testing.T, tablets []*cluster.Vttablet) {
   359  	var mysqlProcs []*exec.Cmd
   360  	for _, tablet := range tablets {
   361  		proc, _ := tablet.MysqlctlProcess.StopProcess()
   362  		mysqlProcs = append(mysqlProcs, proc)
   363  	}
   364  	for _, proc := range mysqlProcs {
   365  		err := proc.Wait()
   366  		require.NoError(t, err)
   367  	}
   368  	for _, tablet := range tablets {
   369  		tablet.VttabletProcess.TearDown()
   370  	}
   371  }
   372  
   373  func initializeCluster(t *testing.T) {
   374  	clusterInstance = cluster.NewCluster(cell, hostname)
   375  
   376  	// Start topo server
   377  	err := clusterInstance.StartTopo()
   378  	require.NoError(t, err)
   379  
   380  	// Start keyspace
   381  	keyspace := &cluster.Keyspace{
   382  		Name: keyspaceName,
   383  	}
   384  	clusterInstance.Keyspaces = append(clusterInstance.Keyspaces, *keyspace)
   385  
   386  	shard := &cluster.Shard{
   387  		Name: shardName,
   388  	}
   389  	shard0 := &cluster.Shard{
   390  		Name: shard0Name,
   391  	}
   392  	shard1 := &cluster.Shard{
   393  		Name: shard1Name,
   394  	}
   395  
   396  	// Defining all the tablets
   397  	primary = clusterInstance.NewVttabletInstance("replica", 0, "")
   398  	replica1 = clusterInstance.NewVttabletInstance("replica", 0, "")
   399  	replica2 = clusterInstance.NewVttabletInstance("replica", 0, "")
   400  	shard0Primary = clusterInstance.NewVttabletInstance("replica", 0, "")
   401  	shard0Replica1 = clusterInstance.NewVttabletInstance("replica", 0, "")
   402  	shard0Replica2 = clusterInstance.NewVttabletInstance("replica", 0, "")
   403  	shard1Primary = clusterInstance.NewVttabletInstance("replica", 0, "")
   404  	shard1Replica1 = clusterInstance.NewVttabletInstance("replica", 0, "")
   405  	shard1Replica2 = clusterInstance.NewVttabletInstance("replica", 0, "")
   406  
   407  	shard.Vttablets = []*cluster.Vttablet{primary, replica1, replica2}
   408  	shard0.Vttablets = []*cluster.Vttablet{shard0Primary, shard0Replica1, shard0Replica2}
   409  	shard1.Vttablets = []*cluster.Vttablet{shard1Primary, shard1Replica1, shard1Replica2}
   410  
   411  	clusterInstance.VtTabletExtraArgs = append(clusterInstance.VtTabletExtraArgs, commonTabletArg...)
   412  	clusterInstance.VtTabletExtraArgs = append(clusterInstance.VtTabletExtraArgs, "--restore_from_backup")
   413  
   414  	err = clusterInstance.SetupCluster(keyspace, []cluster.Shard{*shard, *shard0, *shard1})
   415  	require.NoError(t, err)
   416  	vtctldClientProcess := cluster.VtctldClientProcessInstance("localhost", clusterInstance.VtctldProcess.GrpcPort, clusterInstance.TmpDirectory)
   417  	out, err := vtctldClientProcess.ExecuteCommandWithOutput("SetKeyspaceDurabilityPolicy", keyspaceName, "--durability-policy=semi_sync")
   418  	require.NoError(t, err, out)
   419  	// Start MySql
   420  	var mysqlCtlProcessList []*exec.Cmd
   421  	for _, shard := range clusterInstance.Keyspaces[0].Shards {
   422  		for _, tablet := range shard.Vttablets {
   423  			proc, err := tablet.MysqlctlProcess.StartProcess()
   424  			require.NoError(t, err)
   425  			mysqlCtlProcessList = append(mysqlCtlProcessList, proc)
   426  		}
   427  	}
   428  
   429  	// Wait for mysql processes to start
   430  	for _, proc := range mysqlCtlProcessList {
   431  		err = proc.Wait()
   432  		require.NoError(t, err)
   433  	}
   434  
   435  	queryCmds := []string{
   436  		fmt.Sprintf("CREATE USER '%s'@'%%' IDENTIFIED BY '%s';", mysqlUserName, mysqlPassword),
   437  		fmt.Sprintf("GRANT ALL ON *.* TO '%s'@'%%';", mysqlUserName),
   438  		fmt.Sprintf("GRANT GRANT OPTION ON *.* TO '%s'@'%%';", mysqlUserName),
   439  		fmt.Sprintf("create database %s;", "vt_ks"),
   440  		"FLUSH PRIVILEGES;",
   441  	}
   442  
   443  	for _, shard := range clusterInstance.Keyspaces[0].Shards {
   444  		for _, tablet := range shard.Vttablets {
   445  			for _, query := range queryCmds {
   446  				_, err = tablet.VttabletProcess.QueryTablet(query, keyspace.Name, false)
   447  				require.NoError(t, err)
   448  			}
   449  
   450  			err = tablet.VttabletProcess.Setup()
   451  			require.NoError(t, err)
   452  		}
   453  	}
   454  
   455  	err = clusterInstance.VtctlclientProcess.InitShardPrimary(keyspaceName, shard.Name, cell, primary.TabletUID)
   456  	require.NoError(t, err)
   457  
   458  	err = clusterInstance.VtctlclientProcess.InitShardPrimary(keyspaceName, shard0.Name, cell, shard0Primary.TabletUID)
   459  	require.NoError(t, err)
   460  
   461  	err = clusterInstance.VtctlclientProcess.InitShardPrimary(keyspaceName, shard1.Name, cell, shard1Primary.TabletUID)
   462  	require.NoError(t, err)
   463  
   464  	err = clusterInstance.StartVTOrc(keyspaceName)
   465  	require.NoError(t, err)
   466  
   467  	// Start vtgate
   468  	err = clusterInstance.StartVtgate()
   469  	require.NoError(t, err)
   470  }
   471  
   472  func insertRow(t *testing.T, id int, productName string, isSlow bool) {
   473  	ctx := context.Background()
   474  	vtParams := mysql.ConnParams{
   475  		Host: clusterInstance.Hostname,
   476  		Port: clusterInstance.VtgateMySQLPort,
   477  	}
   478  	conn, err := mysql.Connect(ctx, &vtParams)
   479  	require.NoError(t, err)
   480  	defer conn.Close()
   481  
   482  	insertSmt := fmt.Sprintf(insertTable, id, productName)
   483  	_, err = conn.ExecuteFetch(insertSmt, 1000, true)
   484  	require.NoError(t, err)
   485  
   486  	if isSlow {
   487  		time.Sleep(1 * time.Second)
   488  	}
   489  }
   490  
   491  func createRestoreKeyspace(t *testing.T, timeToRecover, restoreKeyspaceName string) {
   492  	output, err := clusterInstance.VtctlclientProcess.ExecuteCommandWithOutput("CreateKeyspace", "--",
   493  		"--keyspace_type=SNAPSHOT", "--base_keyspace="+keyspaceName,
   494  		"--snapshot_time", timeToRecover, restoreKeyspaceName)
   495  	log.Info(output)
   496  	require.NoError(t, err)
   497  }
   498  
   499  func testTabletRecovery(t *testing.T, binlogServer *binLogServer, lookupTimeout, restoreKeyspaceName, shardName, expectedRows string) {
   500  	recoveryTablet := clusterInstance.NewVttabletInstance("replica", 0, cell)
   501  	launchRecoveryTablet(t, recoveryTablet, binlogServer, lookupTimeout, restoreKeyspaceName, shardName)
   502  
   503  	sqlRes, err := recoveryTablet.VttabletProcess.QueryTablet(getCountID, keyspaceName, true)
   504  	require.NoError(t, err)
   505  	assert.Equal(t, expectedRows, sqlRes.Rows[0][0].String())
   506  
   507  	defer recoveryTablet.MysqlctlProcess.Stop()
   508  	defer recoveryTablet.VttabletProcess.TearDown()
   509  }
   510  
   511  func launchRecoveryTablet(t *testing.T, tablet *cluster.Vttablet, binlogServer *binLogServer, lookupTimeout, restoreKeyspaceName, shardName string) {
   512  	tablet.MysqlctlProcess = *cluster.MysqlCtlProcessInstance(tablet.TabletUID, tablet.MySQLPort, clusterInstance.TmpDirectory)
   513  	err := tablet.MysqlctlProcess.Start()
   514  	require.NoError(t, err)
   515  
   516  	tablet.VttabletProcess = cluster.VttabletProcessInstance(
   517  		tablet.HTTPPort,
   518  		tablet.GrpcPort,
   519  		tablet.TabletUID,
   520  		clusterInstance.Cell,
   521  		shardName,
   522  		keyspaceName,
   523  		clusterInstance.VtctldProcess.Port,
   524  		tablet.Type,
   525  		clusterInstance.TopoProcess.Port,
   526  		clusterInstance.Hostname,
   527  		clusterInstance.TmpDirectory,
   528  		clusterInstance.VtTabletExtraArgs,
   529  		clusterInstance.DefaultCharset)
   530  	tablet.Alias = tablet.VttabletProcess.TabletPath
   531  	tablet.VttabletProcess.SupportsBackup = true
   532  	tablet.VttabletProcess.Keyspace = restoreKeyspaceName
   533  	tablet.VttabletProcess.ExtraArgs = []string{
   534  		"--disable_active_reparents",
   535  		"--enable_replication_reporter=false",
   536  		"--init_db_name_override", dbName,
   537  		"--init_tablet_type", "replica",
   538  		"--init_keyspace", restoreKeyspaceName,
   539  		"--init_shard", shardName,
   540  		"--binlog_host", binlogServer.hostname,
   541  		"--binlog_port", fmt.Sprintf("%d", binlogServer.port),
   542  		"--binlog_user", binlogServer.username,
   543  		"--binlog_password", binlogServer.password,
   544  		"--pitr_gtid_lookup_timeout", lookupTimeout,
   545  		"--vreplication_healthcheck_topology_refresh", "1s",
   546  		"--vreplication_healthcheck_retry_delay", "1s",
   547  		"--vreplication_tablet_type", "replica",
   548  		"--vreplication_retry_delay", "1s",
   549  		"--degraded_threshold", "5s",
   550  		"--lock_tables_timeout", "5s",
   551  		"--watch_replication_stream",
   552  		"--serving_state_grace_period", "1s",
   553  	}
   554  	tablet.VttabletProcess.ServingStatus = ""
   555  
   556  	err = tablet.VttabletProcess.Setup()
   557  	require.NoError(t, err)
   558  
   559  	tablet.VttabletProcess.WaitForTabletStatusesForTimeout([]string{"SERVING"}, 20*time.Second)
   560  }
   561  
   562  // waitForNoWorkflowLag waits for the VReplication workflow's MaxVReplicationTransactionLag
   563  // value to be 0.
   564  func waitForNoWorkflowLag(t *testing.T, vc *cluster.LocalProcessCluster, ksWorkflow string) {
   565  	lag := int64(0)
   566  	timer := time.NewTimer(defaultTimeout)
   567  	defer timer.Stop()
   568  	for {
   569  		output, err := vc.VtctlclientProcess.ExecuteCommandWithOutput("Workflow", "--", ksWorkflow, "show")
   570  		require.NoError(t, err)
   571  		lag, err = jsonparser.GetInt([]byte(output), "MaxVReplicationTransactionLag")
   572  		require.NoError(t, err)
   573  		if lag == 0 {
   574  			return
   575  		}
   576  		select {
   577  		case <-timer.C:
   578  			require.FailNow(t, fmt.Sprintf("workflow %q did not eliminate VReplication lag before the timeout of %s; last seen MaxVReplicationTransactionLag: %d",
   579  				ksWorkflow, defaultTimeout, lag))
   580  		default:
   581  			time.Sleep(defaultTick)
   582  		}
   583  	}
   584  }