vitess.io/vitess@v0.16.2/go/test/endtoend/recovery/pitr/shardedpitr_test.go (about) 1 /* 2 Copyright 2020 The Vitess Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package pitr 18 19 import ( 20 "context" 21 "fmt" 22 "os/exec" 23 "testing" 24 "time" 25 26 "github.com/buger/jsonparser" 27 "github.com/stretchr/testify/assert" 28 "github.com/stretchr/testify/require" 29 30 "vitess.io/vitess/go/mysql" 31 "vitess.io/vitess/go/test/endtoend/cluster" 32 "vitess.io/vitess/go/vt/log" 33 ) 34 35 var ( 36 createTable = `create table product (id bigint(20) primary key, name char(10), created bigint(20));` 37 insertTable = `insert into product (id, name, created) values(%d, '%s', unix_timestamp());` 38 getCountID = `select count(*) from product` 39 ) 40 41 var ( 42 clusterInstance *cluster.LocalProcessCluster 43 44 primary *cluster.Vttablet 45 replica1 *cluster.Vttablet 46 replica2 *cluster.Vttablet 47 shard0Primary *cluster.Vttablet 48 shard0Replica1 *cluster.Vttablet 49 shard0Replica2 *cluster.Vttablet 50 shard1Primary *cluster.Vttablet 51 shard1Replica1 *cluster.Vttablet 52 shard1Replica2 *cluster.Vttablet 53 54 cell = "zone1" 55 hostname = "localhost" 56 binlogHost = "127.0.0.1" 57 keyspaceName = "ks" 58 restoreKS1Name = "restoreks1" 59 restoreKS2Name = "restoreks2" 60 restoreKS3Name = "restoreks3" 61 shardName = "0" 62 shard0Name = "-80" 63 shard1Name = "80-" 64 dbName = "vt_ks" 65 mysqlUserName = "vt_dba" 66 mysqlPassword = "password" 67 vSchema = `{ 68 "sharded": true, 69 "vindexes": { 70 "hash_index": { 71 "type": "hash" 72 } 73 }, 74 "tables": { 75 "product": { 76 "column_vindexes": [ 77 { 78 "column": "id", 79 "name": "hash_index" 80 } 81 ] 82 } 83 } 84 }` 85 commonTabletArg = []string{ 86 "--vreplication_healthcheck_topology_refresh", "1s", 87 "--vreplication_healthcheck_retry_delay", "1s", 88 "--vreplication_retry_delay", "1s", 89 "--degraded_threshold", "5s", 90 "--lock_tables_timeout", "5s", 91 "--watch_replication_stream", 92 "--serving_state_grace_period", "1s"} 93 94 defaultTimeout = 30 * time.Second 95 defaultTick = 1 * time.Second 96 ) 97 98 // Test pitr (Point in time recovery). 99 // ------------------------------------------- 100 // The following test will: 101 // - create a shard with primary and replica 102 // - run InitShardPrimary 103 // - point binlog server to primary 104 // - insert some data using vtgate (e.g. here we have inserted rows 1,2) 105 // - verify the replication 106 // - take backup of replica 107 // - insert some data using vtgate (e.g. we inserted rows 3 4 5 6), while inserting row-4, note down the time (restoreTime1) 108 // - perform a resharding to create 2 shards (-80, 80-), and delete the old shard 109 // - point binlog server to primary of both shards 110 // - insert some data using vtgate (e.g. we will insert 7 8 9 10) and verify we get required number of rows in -80, 80- shard 111 // - take backup of both shards 112 // - insert some more data using vtgate (e.g. we will insert 11 12 13 14 15), while inserting row-13, note down the time (restoreTime2) 113 // - note down the current time (restoreTime3) 114 115 // - Till now we did all the presetup for assertions 116 117 // - asserting that restoring to restoreTime1 (going from 2 shards to 1 shard) is working, i.e. we should get 4 rows. 118 // - asserting that while restoring if we give small timeout value, it will restore upto to the last available backup (asserting only -80 shard) 119 // - asserting that restoring to restoreTime2 (going from 2 shards to 2 shards with past time) is working, it will assert for both shards 120 // - asserting that restoring to restoreTime3 is working, we should get complete data after restoring, as we have in existing shards. 121 func TestPITRRecovery(t *testing.T) { 122 defer cluster.PanicHandler(nil) 123 initializeCluster(t) 124 defer clusterInstance.Teardown() 125 126 //start the binlog server and point it to primary 127 bs := startBinlogServer(t, primary) 128 defer bs.stop() 129 130 // Creating the table 131 _, err := primary.VttabletProcess.QueryTablet(createTable, keyspaceName, true) 132 require.NoError(t, err) 133 134 insertRow(t, 1, "prd-1", false) 135 insertRow(t, 2, "prd-2", false) 136 137 cluster.VerifyRowsInTabletForTable(t, replica1, keyspaceName, 2, "product") 138 139 // backup the replica 140 err = clusterInstance.VtctlclientProcess.ExecuteCommand("Backup", replica1.Alias) 141 require.NoError(t, err) 142 143 // check that the backup shows up in the listing 144 output, err := clusterInstance.ListBackups("ks/0") 145 require.NoError(t, err) 146 assert.Equal(t, 1, len(output)) 147 148 // now insert some more data to simulate the changes after regular backup 149 // every insert has some time lag/difference to simulate the time gap between rows 150 // and when we recover to certain time, this time gap will be able to identify the exact eligible row 151 var restoreTime1 string 152 for counter := 3; counter <= 6; counter++ { 153 if counter == 4 { // we want to recovery till this, so noting the time 154 tm := time.Now().Add(1 * time.Second).UTC() 155 restoreTime1 = tm.Format(time.RFC3339) 156 } 157 insertRow(t, counter, fmt.Sprintf("prd-%d", counter), true) 158 } 159 160 // starting resharding process 161 performResharding(t) 162 163 //start the binlog server and point it to shard0Primary 164 bs0 := startBinlogServer(t, shard0Primary) 165 defer bs0.stop() 166 167 //start the binlog server and point it to shard1Primary 168 bs1 := startBinlogServer(t, shard1Primary) 169 defer bs1.stop() 170 171 for counter := 7; counter <= 10; counter++ { 172 insertRow(t, counter, fmt.Sprintf("prd-%d", counter), false) 173 } 174 175 // wait till all the shards have required data 176 cluster.VerifyRowsInTabletForTable(t, shard0Replica1, keyspaceName, 6, "product") 177 cluster.VerifyRowsInTabletForTable(t, shard1Replica1, keyspaceName, 4, "product") 178 179 // take the backup (to simulate the regular backup) 180 err = clusterInstance.VtctlclientProcess.ExecuteCommand("Backup", shard0Replica1.Alias) 181 require.NoError(t, err) 182 // take the backup (to simulate the regular backup) 183 err = clusterInstance.VtctlclientProcess.ExecuteCommand("Backup", shard1Replica1.Alias) 184 require.NoError(t, err) 185 186 backups, err := clusterInstance.ListBackups(keyspaceName + "/-80") 187 require.NoError(t, err) 188 require.Equal(t, len(backups), 1) 189 190 backups, err = clusterInstance.ListBackups(keyspaceName + "/80-") 191 require.NoError(t, err) 192 require.Equal(t, len(backups), 1) 193 194 // now insert some more data to simulate the changes after regular backup 195 // every insert has some time lag/difference to simulate the time gap between rows 196 // and when we recover to certain time, this time gap will be able to identify the exact eligible row 197 var restoreTime2 string 198 for counter := 11; counter <= 15; counter++ { 199 if counter == 13 { // we want to recovery till this, so noting the time 200 tm := time.Now().Add(1 * time.Second).UTC() 201 restoreTime2 = tm.Format(time.RFC3339) 202 } 203 insertRow(t, counter, fmt.Sprintf("prd-%d", counter), true) 204 } 205 restoreTime3 := time.Now().UTC().Format(time.RFC3339) 206 207 // creating restore keyspace with snapshot time as restoreTime1 208 createRestoreKeyspace(t, restoreTime1, restoreKS1Name) 209 210 // Launching a recovery tablet which recovers data from the primary till the restoreTime1 211 testTabletRecovery(t, bs, "2m", restoreKS1Name, "0", "INT64(4)") 212 213 // create restoreKeyspace with snapshot time as restoreTime2 214 createRestoreKeyspace(t, restoreTime2, restoreKS2Name) 215 216 // test the recovery with smaller binlog_lookup_timeout for shard0 217 // since we have small lookup timeout, it will just get whatever available in the backup 218 // mysql> select * from product; 219 // +----+--------+------------+ 220 // | id | name | created | 221 // +----+--------+------------+ 222 // | 1 | prd-1 | 1597219030 | 223 // | 2 | prd-2 | 1597219030 | 224 // | 3 | prd-3 | 1597219043 | 225 // | 5 | prd-5 | 1597219045 | 226 // | 9 | prd-9 | 1597219130 | 227 // | 10 | prd-10 | 1597219130 | 228 // +----+--------+------------+ 229 testTabletRecovery(t, bs0, "1ms", restoreKS2Name, "-80", "INT64(6)") 230 231 // test the recovery with valid binlog_lookup_timeout for shard0 and getting the data till the restoreTime2 232 // mysql> select * from product; 233 // +----+--------+------------+ 234 // | id | name | created | 235 // +----+--------+------------+ 236 // | 1 | prd-1 | 1597219030 | 237 // | 2 | prd-2 | 1597219030 | 238 // | 3 | prd-3 | 1597219043 | 239 // | 5 | prd-5 | 1597219045 | 240 // | 9 | prd-9 | 1597219130 | 241 // | 10 | prd-10 | 1597219130 | 242 // | 13 | prd-13 | 1597219141 | 243 // +----+--------+------------+ 244 testTabletRecovery(t, bs0, "2m", restoreKS2Name, "-80", "INT64(7)") 245 246 // test the recovery with valid binlog_lookup_timeout for shard1 and getting the data till the restoreTime2 247 // mysql> select * from product; 248 // +----+--------+------------+ 249 // | id | name | created | 250 // +----+--------+------------+ 251 // | 4 | prd-4 | 1597219044 | 252 // | 6 | prd-6 | 1597219046 | 253 // | 7 | prd-7 | 1597219130 | 254 // | 8 | prd-8 | 1597219130 | 255 // | 11 | prd-11 | 1597219139 | 256 // | 12 | prd-12 | 1597219140 | 257 // +----+--------+------------+ 258 testTabletRecovery(t, bs1, "2m", restoreKS2Name, "80-", "INT64(6)") 259 260 // test the recovery with timetorecover > (timestmap of last binlog event in binlog server) 261 createRestoreKeyspace(t, restoreTime3, restoreKS3Name) 262 263 // mysql> select * from product; 264 // +----+--------+------------+ 265 // | id | name | created | 266 // +----+--------+------------+ 267 // | 1 | prd-1 | 1597219030 | 268 // | 2 | prd-2 | 1597219030 | 269 // | 3 | prd-3 | 1597219043 | 270 // | 5 | prd-5 | 1597219045 | 271 // | 9 | prd-9 | 1597219130 | 272 // | 10 | prd-10 | 1597219130 | 273 // | 13 | prd-13 | 1597219141 | 274 // | 15 | prd-15 | 1597219142 | 275 // +----+--------+------------+ 276 testTabletRecovery(t, bs0, "2m", restoreKS3Name, "-80", "INT64(8)") 277 278 // mysql> select * from product; 279 // +----+--------+------------+ 280 // | id | name | created | 281 // +----+--------+------------+ 282 // | 4 | prd-4 | 1597219044 | 283 // | 6 | prd-6 | 1597219046 | 284 // | 7 | prd-7 | 1597219130 | 285 // | 8 | prd-8 | 1597219130 | 286 // | 11 | prd-11 | 1597219139 | 287 // | 12 | prd-12 | 1597219140 | 288 // | 14 | prd-14 | 1597219142 | 289 // +----+--------+------------+ 290 testTabletRecovery(t, bs1, "2m", restoreKS3Name, "80-", "INT64(7)") 291 } 292 293 func performResharding(t *testing.T) { 294 err := clusterInstance.VtctlclientProcess.ApplyVSchema(keyspaceName, vSchema) 295 require.NoError(t, err) 296 297 err = clusterInstance.VtctlclientProcess.ExecuteCommand("Reshard", "--", "--source_shards=0", "--target_shards=-80,80-", "Create", "ks.reshardWorkflow") 298 require.NoError(t, err) 299 300 waitTimeout := 30 * time.Second 301 shard0Primary.VttabletProcess.WaitForVReplicationToCatchup(t, "ks.reshardWorkflow", dbName, waitTimeout) 302 shard1Primary.VttabletProcess.WaitForVReplicationToCatchup(t, "ks.reshardWorkflow", dbName, waitTimeout) 303 304 waitForNoWorkflowLag(t, clusterInstance, "ks.reshardWorkflow") 305 306 err = clusterInstance.VtctlclientProcess.ExecuteCommand("Reshard", "--", "--tablet_types=rdonly", "SwitchTraffic", "ks.reshardWorkflow") 307 require.NoError(t, err) 308 309 err = clusterInstance.VtctlclientProcess.ExecuteCommand("Reshard", "--", "--tablet_types=replica", "SwitchTraffic", "ks.reshardWorkflow") 310 require.NoError(t, err) 311 312 // then serve primary from the split shards 313 err = clusterInstance.VtctlclientProcess.ExecuteCommand("Reshard", "--", "--tablet_types=primary", "SwitchTraffic", "ks.reshardWorkflow") 314 require.NoError(t, err) 315 316 // remove the original tablets in the original shard 317 removeTablets(t, []*cluster.Vttablet{primary, replica1, replica2}) 318 319 for _, tablet := range []*cluster.Vttablet{replica1, replica2} { 320 err = clusterInstance.VtctlclientProcess.ExecuteCommand("DeleteTablet", tablet.Alias) 321 require.NoError(t, err) 322 } 323 err = clusterInstance.VtctlclientProcess.ExecuteCommand("DeleteTablet", "--", "--allow_primary", primary.Alias) 324 require.NoError(t, err) 325 326 // rebuild the serving graph, all mentions of the old shards should be gone 327 err = clusterInstance.VtctlclientProcess.ExecuteCommand("RebuildKeyspaceGraph", "ks") 328 require.NoError(t, err) 329 330 // delete the original shard 331 err = clusterInstance.VtctlclientProcess.ExecuteCommand("DeleteShard", "ks/0") 332 require.NoError(t, err) 333 334 // Restart vtgate process 335 err = clusterInstance.VtgateProcess.TearDown() 336 require.NoError(t, err) 337 338 err = clusterInstance.VtgateProcess.Setup() 339 require.NoError(t, err) 340 341 clusterInstance.WaitForTabletsToHealthyInVtgate() 342 } 343 344 func startBinlogServer(t *testing.T, primaryTablet *cluster.Vttablet) *binLogServer { 345 bs, err := newBinlogServer(hostname, clusterInstance.GetAndReservePort()) 346 require.NoError(t, err) 347 348 err = bs.start(mysqlSource{ 349 hostname: binlogHost, 350 port: primaryTablet.MysqlctlProcess.MySQLPort, 351 username: mysqlUserName, 352 password: mysqlPassword, 353 }) 354 require.NoError(t, err) 355 return bs 356 } 357 358 func removeTablets(t *testing.T, tablets []*cluster.Vttablet) { 359 var mysqlProcs []*exec.Cmd 360 for _, tablet := range tablets { 361 proc, _ := tablet.MysqlctlProcess.StopProcess() 362 mysqlProcs = append(mysqlProcs, proc) 363 } 364 for _, proc := range mysqlProcs { 365 err := proc.Wait() 366 require.NoError(t, err) 367 } 368 for _, tablet := range tablets { 369 tablet.VttabletProcess.TearDown() 370 } 371 } 372 373 func initializeCluster(t *testing.T) { 374 clusterInstance = cluster.NewCluster(cell, hostname) 375 376 // Start topo server 377 err := clusterInstance.StartTopo() 378 require.NoError(t, err) 379 380 // Start keyspace 381 keyspace := &cluster.Keyspace{ 382 Name: keyspaceName, 383 } 384 clusterInstance.Keyspaces = append(clusterInstance.Keyspaces, *keyspace) 385 386 shard := &cluster.Shard{ 387 Name: shardName, 388 } 389 shard0 := &cluster.Shard{ 390 Name: shard0Name, 391 } 392 shard1 := &cluster.Shard{ 393 Name: shard1Name, 394 } 395 396 // Defining all the tablets 397 primary = clusterInstance.NewVttabletInstance("replica", 0, "") 398 replica1 = clusterInstance.NewVttabletInstance("replica", 0, "") 399 replica2 = clusterInstance.NewVttabletInstance("replica", 0, "") 400 shard0Primary = clusterInstance.NewVttabletInstance("replica", 0, "") 401 shard0Replica1 = clusterInstance.NewVttabletInstance("replica", 0, "") 402 shard0Replica2 = clusterInstance.NewVttabletInstance("replica", 0, "") 403 shard1Primary = clusterInstance.NewVttabletInstance("replica", 0, "") 404 shard1Replica1 = clusterInstance.NewVttabletInstance("replica", 0, "") 405 shard1Replica2 = clusterInstance.NewVttabletInstance("replica", 0, "") 406 407 shard.Vttablets = []*cluster.Vttablet{primary, replica1, replica2} 408 shard0.Vttablets = []*cluster.Vttablet{shard0Primary, shard0Replica1, shard0Replica2} 409 shard1.Vttablets = []*cluster.Vttablet{shard1Primary, shard1Replica1, shard1Replica2} 410 411 clusterInstance.VtTabletExtraArgs = append(clusterInstance.VtTabletExtraArgs, commonTabletArg...) 412 clusterInstance.VtTabletExtraArgs = append(clusterInstance.VtTabletExtraArgs, "--restore_from_backup") 413 414 err = clusterInstance.SetupCluster(keyspace, []cluster.Shard{*shard, *shard0, *shard1}) 415 require.NoError(t, err) 416 vtctldClientProcess := cluster.VtctldClientProcessInstance("localhost", clusterInstance.VtctldProcess.GrpcPort, clusterInstance.TmpDirectory) 417 out, err := vtctldClientProcess.ExecuteCommandWithOutput("SetKeyspaceDurabilityPolicy", keyspaceName, "--durability-policy=semi_sync") 418 require.NoError(t, err, out) 419 // Start MySql 420 var mysqlCtlProcessList []*exec.Cmd 421 for _, shard := range clusterInstance.Keyspaces[0].Shards { 422 for _, tablet := range shard.Vttablets { 423 proc, err := tablet.MysqlctlProcess.StartProcess() 424 require.NoError(t, err) 425 mysqlCtlProcessList = append(mysqlCtlProcessList, proc) 426 } 427 } 428 429 // Wait for mysql processes to start 430 for _, proc := range mysqlCtlProcessList { 431 err = proc.Wait() 432 require.NoError(t, err) 433 } 434 435 queryCmds := []string{ 436 fmt.Sprintf("CREATE USER '%s'@'%%' IDENTIFIED BY '%s';", mysqlUserName, mysqlPassword), 437 fmt.Sprintf("GRANT ALL ON *.* TO '%s'@'%%';", mysqlUserName), 438 fmt.Sprintf("GRANT GRANT OPTION ON *.* TO '%s'@'%%';", mysqlUserName), 439 fmt.Sprintf("create database %s;", "vt_ks"), 440 "FLUSH PRIVILEGES;", 441 } 442 443 for _, shard := range clusterInstance.Keyspaces[0].Shards { 444 for _, tablet := range shard.Vttablets { 445 for _, query := range queryCmds { 446 _, err = tablet.VttabletProcess.QueryTablet(query, keyspace.Name, false) 447 require.NoError(t, err) 448 } 449 450 err = tablet.VttabletProcess.Setup() 451 require.NoError(t, err) 452 } 453 } 454 455 err = clusterInstance.VtctlclientProcess.InitShardPrimary(keyspaceName, shard.Name, cell, primary.TabletUID) 456 require.NoError(t, err) 457 458 err = clusterInstance.VtctlclientProcess.InitShardPrimary(keyspaceName, shard0.Name, cell, shard0Primary.TabletUID) 459 require.NoError(t, err) 460 461 err = clusterInstance.VtctlclientProcess.InitShardPrimary(keyspaceName, shard1.Name, cell, shard1Primary.TabletUID) 462 require.NoError(t, err) 463 464 err = clusterInstance.StartVTOrc(keyspaceName) 465 require.NoError(t, err) 466 467 // Start vtgate 468 err = clusterInstance.StartVtgate() 469 require.NoError(t, err) 470 } 471 472 func insertRow(t *testing.T, id int, productName string, isSlow bool) { 473 ctx := context.Background() 474 vtParams := mysql.ConnParams{ 475 Host: clusterInstance.Hostname, 476 Port: clusterInstance.VtgateMySQLPort, 477 } 478 conn, err := mysql.Connect(ctx, &vtParams) 479 require.NoError(t, err) 480 defer conn.Close() 481 482 insertSmt := fmt.Sprintf(insertTable, id, productName) 483 _, err = conn.ExecuteFetch(insertSmt, 1000, true) 484 require.NoError(t, err) 485 486 if isSlow { 487 time.Sleep(1 * time.Second) 488 } 489 } 490 491 func createRestoreKeyspace(t *testing.T, timeToRecover, restoreKeyspaceName string) { 492 output, err := clusterInstance.VtctlclientProcess.ExecuteCommandWithOutput("CreateKeyspace", "--", 493 "--keyspace_type=SNAPSHOT", "--base_keyspace="+keyspaceName, 494 "--snapshot_time", timeToRecover, restoreKeyspaceName) 495 log.Info(output) 496 require.NoError(t, err) 497 } 498 499 func testTabletRecovery(t *testing.T, binlogServer *binLogServer, lookupTimeout, restoreKeyspaceName, shardName, expectedRows string) { 500 recoveryTablet := clusterInstance.NewVttabletInstance("replica", 0, cell) 501 launchRecoveryTablet(t, recoveryTablet, binlogServer, lookupTimeout, restoreKeyspaceName, shardName) 502 503 sqlRes, err := recoveryTablet.VttabletProcess.QueryTablet(getCountID, keyspaceName, true) 504 require.NoError(t, err) 505 assert.Equal(t, expectedRows, sqlRes.Rows[0][0].String()) 506 507 defer recoveryTablet.MysqlctlProcess.Stop() 508 defer recoveryTablet.VttabletProcess.TearDown() 509 } 510 511 func launchRecoveryTablet(t *testing.T, tablet *cluster.Vttablet, binlogServer *binLogServer, lookupTimeout, restoreKeyspaceName, shardName string) { 512 tablet.MysqlctlProcess = *cluster.MysqlCtlProcessInstance(tablet.TabletUID, tablet.MySQLPort, clusterInstance.TmpDirectory) 513 err := tablet.MysqlctlProcess.Start() 514 require.NoError(t, err) 515 516 tablet.VttabletProcess = cluster.VttabletProcessInstance( 517 tablet.HTTPPort, 518 tablet.GrpcPort, 519 tablet.TabletUID, 520 clusterInstance.Cell, 521 shardName, 522 keyspaceName, 523 clusterInstance.VtctldProcess.Port, 524 tablet.Type, 525 clusterInstance.TopoProcess.Port, 526 clusterInstance.Hostname, 527 clusterInstance.TmpDirectory, 528 clusterInstance.VtTabletExtraArgs, 529 clusterInstance.DefaultCharset) 530 tablet.Alias = tablet.VttabletProcess.TabletPath 531 tablet.VttabletProcess.SupportsBackup = true 532 tablet.VttabletProcess.Keyspace = restoreKeyspaceName 533 tablet.VttabletProcess.ExtraArgs = []string{ 534 "--disable_active_reparents", 535 "--enable_replication_reporter=false", 536 "--init_db_name_override", dbName, 537 "--init_tablet_type", "replica", 538 "--init_keyspace", restoreKeyspaceName, 539 "--init_shard", shardName, 540 "--binlog_host", binlogServer.hostname, 541 "--binlog_port", fmt.Sprintf("%d", binlogServer.port), 542 "--binlog_user", binlogServer.username, 543 "--binlog_password", binlogServer.password, 544 "--pitr_gtid_lookup_timeout", lookupTimeout, 545 "--vreplication_healthcheck_topology_refresh", "1s", 546 "--vreplication_healthcheck_retry_delay", "1s", 547 "--vreplication_tablet_type", "replica", 548 "--vreplication_retry_delay", "1s", 549 "--degraded_threshold", "5s", 550 "--lock_tables_timeout", "5s", 551 "--watch_replication_stream", 552 "--serving_state_grace_period", "1s", 553 } 554 tablet.VttabletProcess.ServingStatus = "" 555 556 err = tablet.VttabletProcess.Setup() 557 require.NoError(t, err) 558 559 tablet.VttabletProcess.WaitForTabletStatusesForTimeout([]string{"SERVING"}, 20*time.Second) 560 } 561 562 // waitForNoWorkflowLag waits for the VReplication workflow's MaxVReplicationTransactionLag 563 // value to be 0. 564 func waitForNoWorkflowLag(t *testing.T, vc *cluster.LocalProcessCluster, ksWorkflow string) { 565 lag := int64(0) 566 timer := time.NewTimer(defaultTimeout) 567 defer timer.Stop() 568 for { 569 output, err := vc.VtctlclientProcess.ExecuteCommandWithOutput("Workflow", "--", ksWorkflow, "show") 570 require.NoError(t, err) 571 lag, err = jsonparser.GetInt([]byte(output), "MaxVReplicationTransactionLag") 572 require.NoError(t, err) 573 if lag == 0 { 574 return 575 } 576 select { 577 case <-timer.C: 578 require.FailNow(t, fmt.Sprintf("workflow %q did not eliminate VReplication lag before the timeout of %s; last seen MaxVReplicationTransactionLag: %d", 579 ksWorkflow, defaultTimeout, lag)) 580 default: 581 time.Sleep(defaultTick) 582 } 583 } 584 }