vitess.io/vitess@v0.16.2/go/test/endtoend/recovery/unshardedrecovery/recovery.go (about) 1 /* 2 Copyright 2020 The Vitess Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package unshardedrecovery 18 19 import ( 20 "context" 21 "flag" 22 "fmt" 23 "os" 24 "os/exec" 25 "path" 26 "testing" 27 "time" 28 29 "github.com/stretchr/testify/assert" 30 "github.com/stretchr/testify/require" 31 32 "vitess.io/vitess/go/test/endtoend/cluster" 33 "vitess.io/vitess/go/test/endtoend/recovery" 34 "vitess.io/vitess/go/vt/log" 35 "vitess.io/vitess/go/vt/vtgate/vtgateconn" 36 ) 37 38 var ( 39 primary *cluster.Vttablet 40 replica1 *cluster.Vttablet 41 replica2 *cluster.Vttablet 42 replica3 *cluster.Vttablet 43 localCluster *cluster.LocalProcessCluster 44 newInitDBFile string 45 cell = cluster.DefaultCell 46 hostname = "localhost" 47 keyspaceName = "ks" 48 dbPassword = "VtDbaPass" 49 shardKsName = fmt.Sprintf("%s/%s", keyspaceName, shardName) 50 dbCredentialFile string 51 shardName = "0" 52 commonTabletArg = []string{ 53 "--vreplication_healthcheck_topology_refresh", "1s", 54 "--vreplication_healthcheck_retry_delay", "1s", 55 "--vreplication_retry_delay", "1s", 56 "--degraded_threshold", "5s", 57 "--lock_tables_timeout", "5s", 58 "--watch_replication_stream", 59 "--serving_state_grace_period", "1s"} 60 recoveryKS1 = "recovery_ks1" 61 recoveryKS2 = "recovery_ks2" 62 vtInsertTest = `create table vt_insert_test ( 63 id bigint auto_increment, 64 msg varchar(64), 65 primary key (id) 66 ) Engine=InnoDB` 67 vSchema = `{ 68 "tables": { 69 "vt_insert_test": {} 70 } 71 }` 72 ) 73 74 // TestMainImpl creates cluster for unsharded recovery testing. 75 func TestMainImpl(m *testing.M) { 76 defer cluster.PanicHandler(nil) 77 flag.Parse() 78 79 exitCode, err := func() (int, error) { 80 localCluster = cluster.NewCluster(cell, hostname) 81 defer localCluster.Teardown() 82 83 // Start topo server 84 err := localCluster.StartTopo() 85 if err != nil { 86 return 1, err 87 } 88 89 // Start keyspace 90 keyspace := &cluster.Keyspace{ 91 Name: keyspaceName, 92 } 93 localCluster.Keyspaces = append(localCluster.Keyspaces, *keyspace) 94 95 dbCredentialFile = cluster.WriteDbCredentialToTmp(localCluster.TmpDirectory) 96 initDb, _ := os.ReadFile(path.Join(os.Getenv("VTROOT"), "/config/init_db.sql")) 97 sql := string(initDb) 98 newInitDBFile = path.Join(localCluster.TmpDirectory, "init_db_with_passwords.sql") 99 sql = sql + cluster.GetPasswordUpdateSQL(localCluster) 100 // https://github.com/vitessio/vitess/issues/8315 101 oldAlterTableMode := ` 102 SET GLOBAL old_alter_table = ON; 103 ` 104 sql = sql + oldAlterTableMode 105 os.WriteFile(newInitDBFile, []byte(sql), 0666) 106 107 extraArgs := []string{"--db-credentials-file", dbCredentialFile} 108 commonTabletArg = append(commonTabletArg, "--db-credentials-file", dbCredentialFile) 109 110 shard := cluster.Shard{ 111 Name: shardName, 112 } 113 114 var mysqlProcs []*exec.Cmd 115 for i := 0; i < 4; i++ { 116 tabletType := "replica" 117 if i == 0 { 118 tabletType = "primary" 119 } 120 tablet := localCluster.NewVttabletInstance(tabletType, 0, cell) 121 tablet.VttabletProcess = localCluster.VtprocessInstanceFromVttablet(tablet, shard.Name, keyspaceName) 122 tablet.VttabletProcess.DbPassword = dbPassword 123 tablet.VttabletProcess.ExtraArgs = commonTabletArg 124 if recovery.UseXb { 125 tablet.VttabletProcess.ExtraArgs = append(tablet.VttabletProcess.ExtraArgs, recovery.XbArgs...) 126 } 127 tablet.VttabletProcess.SupportsBackup = true 128 129 tablet.MysqlctlProcess = *cluster.MysqlCtlProcessInstance(tablet.TabletUID, tablet.MySQLPort, localCluster.TmpDirectory) 130 tablet.MysqlctlProcess.InitDBFile = newInitDBFile 131 tablet.MysqlctlProcess.ExtraArgs = extraArgs 132 proc, err := tablet.MysqlctlProcess.StartProcess() 133 if err != nil { 134 return 1, err 135 } 136 mysqlProcs = append(mysqlProcs, proc) 137 138 shard.Vttablets = append(shard.Vttablets, tablet) 139 } 140 for _, proc := range mysqlProcs { 141 if err := proc.Wait(); err != nil { 142 return 1, err 143 } 144 } 145 primary = shard.Vttablets[0] 146 replica1 = shard.Vttablets[1] 147 replica2 = shard.Vttablets[2] 148 replica3 = shard.Vttablets[3] 149 150 for _, tablet := range []cluster.Vttablet{*primary, *replica1} { 151 if err := tablet.VttabletProcess.Setup(); err != nil { 152 return 1, err 153 } 154 } 155 156 vtctldClientProcess := cluster.VtctldClientProcessInstance("localhost", localCluster.VtctldProcess.GrpcPort, localCluster.TmpDirectory) 157 _, err = vtctldClientProcess.ExecuteCommandWithOutput("SetKeyspaceDurabilityPolicy", keyspaceName, "--durability-policy=semi_sync") 158 if err != nil { 159 return 1, err 160 } 161 if err := localCluster.VtctlclientProcess.InitializeShard(keyspaceName, shard.Name, cell, primary.TabletUID); err != nil { 162 return 1, err 163 } 164 if err := localCluster.StartVTOrc(keyspaceName); err != nil { 165 return 1, err 166 } 167 return m.Run(), nil 168 }() 169 170 if err != nil { 171 log.Error(err.Error()) 172 os.Exit(1) 173 } else { 174 os.Exit(exitCode) 175 } 176 177 } 178 179 // TestRecoveryImpl does following 180 // - create a shard with primary and replica1 only 181 // - run InitShardPrimary 182 // - insert some data 183 // - take a backup 184 // - insert more data on the primary 185 // - take another backup 186 // - create a recovery keyspace after first backup 187 // - bring up tablet_replica2 in the new keyspace 188 // - check that new tablet does not have data created after backup1 189 // - create second recovery keyspace after second backup 190 // - bring up tablet_replica3 in second keyspace 191 // - check that new tablet has data created after backup1 but not data created after backup2 192 // - check that vtgate queries work correctly 193 func TestRecoveryImpl(t *testing.T) { 194 defer cluster.PanicHandler(t) 195 defer tabletsTeardown() 196 verifyInitialReplication(t) 197 198 err := localCluster.VtctlclientProcess.ExecuteCommand("Backup", replica1.Alias) 199 assert.NoError(t, err) 200 201 backups := listBackups(t) 202 require.Equal(t, len(backups), 1) 203 assert.Contains(t, backups[0], replica1.Alias) 204 205 _, err = primary.VttabletProcess.QueryTablet("insert into vt_insert_test (msg) values ('test2')", keyspaceName, true) 206 assert.NoError(t, err) 207 cluster.VerifyRowsInTablet(t, replica1, keyspaceName, 2) 208 209 err = localCluster.VtctlclientProcess.ApplyVSchema(keyspaceName, vSchema) 210 assert.NoError(t, err) 211 212 output, err := localCluster.VtctlclientProcess.ExecuteCommandWithOutput("GetVSchema", keyspaceName) 213 assert.NoError(t, err) 214 assert.Contains(t, output, "vt_insert_test") 215 216 recovery.RestoreTablet(t, localCluster, replica2, recoveryKS1, "0", keyspaceName, commonTabletArg) 217 218 output, err = localCluster.VtctlclientProcess.ExecuteCommandWithOutput("GetSrvVSchema", cell) 219 assert.NoError(t, err) 220 assert.Contains(t, output, keyspaceName) 221 assert.Contains(t, output, recoveryKS1) 222 223 err = localCluster.VtctlclientProcess.ExecuteCommand("GetSrvKeyspace", cell, keyspaceName) 224 assert.NoError(t, err) 225 226 output, err = localCluster.VtctlclientProcess.ExecuteCommandWithOutput("GetVSchema", recoveryKS1) 227 assert.NoError(t, err) 228 assert.Contains(t, output, "vt_insert_test") 229 230 cluster.VerifyRowsInTablet(t, replica2, keyspaceName, 1) 231 232 // update the original row in primary 233 _, err = primary.VttabletProcess.QueryTablet("update vt_insert_test set msg = 'msgx1' where id = 1", keyspaceName, true) 234 assert.NoError(t, err) 235 236 // verify that primary has new value 237 qr, err := primary.VttabletProcess.QueryTablet("select msg from vt_insert_test where id = 1", keyspaceName, true) 238 assert.NoError(t, err) 239 assert.Equal(t, "msgx1", qr.Rows[0][0].ToString()) 240 241 // verify that restored replica has old value 242 qr, err = replica2.VttabletProcess.QueryTablet("select msg from vt_insert_test where id = 1", keyspaceName, true) 243 assert.NoError(t, err) 244 assert.Equal(t, "test1", qr.Rows[0][0].ToString()) 245 246 err = localCluster.VtctlclientProcess.ExecuteCommand("Backup", replica1.Alias) 247 assert.NoError(t, err) 248 249 _, err = primary.VttabletProcess.QueryTablet("insert into vt_insert_test (msg) values ('test3')", keyspaceName, true) 250 assert.NoError(t, err) 251 cluster.VerifyRowsInTablet(t, replica1, keyspaceName, 3) 252 253 recovery.RestoreTablet(t, localCluster, replica3, recoveryKS2, "0", keyspaceName, commonTabletArg) 254 255 output, err = localCluster.VtctlclientProcess.ExecuteCommandWithOutput("GetVSchema", recoveryKS2) 256 assert.NoError(t, err) 257 assert.Contains(t, output, "vt_insert_test") 258 259 cluster.VerifyRowsInTablet(t, replica3, keyspaceName, 2) 260 261 // update the original row in primary 262 _, err = primary.VttabletProcess.QueryTablet("update vt_insert_test set msg = 'msgx2' where id = 1", keyspaceName, true) 263 assert.NoError(t, err) 264 265 // verify that primary has new value 266 qr, err = primary.VttabletProcess.QueryTablet("select msg from vt_insert_test where id = 1", keyspaceName, true) 267 assert.NoError(t, err) 268 assert.Equal(t, "msgx2", qr.Rows[0][0].ToString()) 269 270 // verify that restored replica has old value 271 qr, err = replica3.VttabletProcess.QueryTablet("select msg from vt_insert_test where id = 1", keyspaceName, true) 272 assert.NoError(t, err) 273 assert.Equal(t, "msgx1", qr.Rows[0][0].ToString()) 274 275 vtgateInstance := localCluster.NewVtgateInstance() 276 vtgateInstance.TabletTypesToWait = "REPLICA" 277 err = vtgateInstance.Setup() 278 localCluster.VtgateGrpcPort = vtgateInstance.GrpcPort 279 assert.NoError(t, err) 280 defer vtgateInstance.TearDown() 281 assert.NoError(t, vtgateInstance.WaitForStatusOfTabletInShard(fmt.Sprintf("%s.%s.primary", keyspaceName, shardName), 1, 30*time.Second)) 282 assert.NoError(t, vtgateInstance.WaitForStatusOfTabletInShard(fmt.Sprintf("%s.%s.replica", keyspaceName, shardName), 1, 30*time.Second)) 283 assert.NoError(t, vtgateInstance.WaitForStatusOfTabletInShard(fmt.Sprintf("%s.%s.replica", recoveryKS1, shardName), 1, 30*time.Second)) 284 assert.NoError(t, vtgateInstance.WaitForStatusOfTabletInShard(fmt.Sprintf("%s.%s.replica", recoveryKS2, shardName), 1, 30*time.Second)) 285 286 // Build vtgate grpc connection 287 grpcAddress := fmt.Sprintf("%s:%d", localCluster.Hostname, localCluster.VtgateGrpcPort) 288 vtgateConn, err := vtgateconn.Dial(context.Background(), grpcAddress) 289 assert.NoError(t, err) 290 defer vtgateConn.Close() 291 session := vtgateConn.Session("@replica", nil) 292 293 // check that vtgate doesn't route queries to new tablet 294 recovery.VerifyQueriesUsingVtgate(t, session, "select count(*) from vt_insert_test", "INT64(3)") 295 recovery.VerifyQueriesUsingVtgate(t, session, "select msg from vt_insert_test where id = 1", `VARCHAR("msgx2")`) 296 recovery.VerifyQueriesUsingVtgate(t, session, fmt.Sprintf("select count(*) from %s.vt_insert_test", recoveryKS1), "INT64(1)") 297 recovery.VerifyQueriesUsingVtgate(t, session, fmt.Sprintf("select msg from %s.vt_insert_test where id = 1", recoveryKS1), `VARCHAR("test1")`) 298 recovery.VerifyQueriesUsingVtgate(t, session, fmt.Sprintf("select count(*) from %s.vt_insert_test", recoveryKS2), "INT64(2)") 299 recovery.VerifyQueriesUsingVtgate(t, session, fmt.Sprintf("select msg from %s.vt_insert_test where id = 1", recoveryKS2), `VARCHAR("msgx1")`) 300 301 // check that new keyspace is accessible with 'use ks' 302 cluster.ExecuteQueriesUsingVtgate(t, session, "use "+recoveryKS1+"@replica") 303 recovery.VerifyQueriesUsingVtgate(t, session, "select count(*) from vt_insert_test", "INT64(1)") 304 305 cluster.ExecuteQueriesUsingVtgate(t, session, "use "+recoveryKS2+"@replica") 306 recovery.VerifyQueriesUsingVtgate(t, session, "select count(*) from vt_insert_test", "INT64(2)") 307 308 // check that new tablet is accessible with use `ks:shard` 309 cluster.ExecuteQueriesUsingVtgate(t, session, "use `"+recoveryKS1+":0@replica`") 310 recovery.VerifyQueriesUsingVtgate(t, session, "select count(*) from vt_insert_test", "INT64(1)") 311 312 cluster.ExecuteQueriesUsingVtgate(t, session, "use `"+recoveryKS2+":0@replica`") 313 recovery.VerifyQueriesUsingVtgate(t, session, "select count(*) from vt_insert_test", "INT64(2)") 314 } 315 316 // verifyInitialReplication will create schema in primary, insert some data to primary and verify the same data in replica. 317 func verifyInitialReplication(t *testing.T) { 318 _, err := primary.VttabletProcess.QueryTablet(vtInsertTest, keyspaceName, true) 319 assert.NoError(t, err) 320 _, err = primary.VttabletProcess.QueryTablet("insert into vt_insert_test (msg) values ('test1')", keyspaceName, true) 321 assert.NoError(t, err) 322 cluster.VerifyRowsInTablet(t, replica1, keyspaceName, 1) 323 } 324 325 func listBackups(t *testing.T) []string { 326 output, err := localCluster.ListBackups(shardKsName) 327 assert.NoError(t, err) 328 return output 329 } 330 331 func tabletsTeardown() { 332 var mysqlProcs []*exec.Cmd 333 for _, tablet := range []*cluster.Vttablet{primary, replica1, replica2, replica3} { 334 proc, _ := tablet.MysqlctlProcess.StopProcess() 335 mysqlProcs = append(mysqlProcs, proc) 336 tablet.VttabletProcess.TearDown() 337 } 338 for _, proc := range mysqlProcs { 339 proc.Wait() 340 } 341 }