vitess.io/vitess@v0.16.2/go/test/endtoend/reparent/plannedreparent/reparent_test.go (about) 1 /* 2 Copyright 2019 The Vitess Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package plannedreparent 18 19 import ( 20 "context" 21 "fmt" 22 "strconv" 23 "testing" 24 "time" 25 26 "github.com/stretchr/testify/assert" 27 "github.com/stretchr/testify/require" 28 29 "google.golang.org/protobuf/encoding/protojson" 30 31 "vitess.io/vitess/go/mysql" 32 "vitess.io/vitess/go/test/endtoend/cluster" 33 "vitess.io/vitess/go/test/endtoend/reparent/utils" 34 "vitess.io/vitess/go/vt/log" 35 replicationdatapb "vitess.io/vitess/go/vt/proto/replicationdata" 36 ) 37 38 func TestPrimaryToSpareStateChangeImpossible(t *testing.T) { 39 defer cluster.PanicHandler(t) 40 clusterInstance := utils.SetupReparentCluster(t, "semi_sync") 41 defer utils.TeardownCluster(clusterInstance) 42 tablets := clusterInstance.Keyspaces[0].Shards[0].Vttablets 43 44 // We cannot change a primary to spare 45 out, err := clusterInstance.VtctlclientProcess.ExecuteCommandWithOutput("ChangeTabletType", tablets[0].Alias, "spare") 46 require.Error(t, err, out) 47 require.Contains(t, out, "type change PRIMARY -> SPARE is not an allowed transition for ChangeTabletType") 48 } 49 50 func TestReparentCrossCell(t *testing.T) { 51 defer cluster.PanicHandler(t) 52 clusterInstance := utils.SetupReparentCluster(t, "semi_sync") 53 defer utils.TeardownCluster(clusterInstance) 54 tablets := clusterInstance.Keyspaces[0].Shards[0].Vttablets 55 56 // Perform a graceful reparent operation to another cell. 57 _, err := utils.Prs(t, clusterInstance, tablets[3]) 58 require.NoError(t, err) 59 60 utils.ValidateTopology(t, clusterInstance, false) 61 utils.CheckPrimaryTablet(t, clusterInstance, tablets[3]) 62 } 63 64 func TestReparentGraceful(t *testing.T) { 65 defer cluster.PanicHandler(t) 66 clusterInstance := utils.SetupReparentCluster(t, "semi_sync") 67 defer utils.TeardownCluster(clusterInstance) 68 tablets := clusterInstance.Keyspaces[0].Shards[0].Vttablets 69 70 // Run this to make sure it succeeds. 71 utils.WaitForReplicationToStart(t, clusterInstance, utils.KeyspaceName, utils.ShardName, len(tablets), true) 72 73 // Perform a graceful reparent operation 74 utils.Prs(t, clusterInstance, tablets[1]) 75 utils.ValidateTopology(t, clusterInstance, false) 76 utils.CheckPrimaryTablet(t, clusterInstance, tablets[1]) 77 78 // A graceful reparent to the same primary should be idempotent. 79 utils.Prs(t, clusterInstance, tablets[1]) 80 utils.ValidateTopology(t, clusterInstance, false) 81 utils.CheckPrimaryTablet(t, clusterInstance, tablets[1]) 82 83 utils.ConfirmReplication(t, tablets[1], []*cluster.Vttablet{tablets[0], tablets[2], tablets[3]}) 84 } 85 86 // TestPRSWithDrainedLaggingTablet tests that PRS succeeds even if we have a lagging drained tablet 87 func TestPRSWithDrainedLaggingTablet(t *testing.T) { 88 defer cluster.PanicHandler(t) 89 clusterInstance := utils.SetupReparentCluster(t, "semi_sync") 90 defer utils.TeardownCluster(clusterInstance) 91 tablets := clusterInstance.Keyspaces[0].Shards[0].Vttablets 92 93 err := clusterInstance.VtctlclientProcess.ExecuteCommand("ChangeTabletType", tablets[1].Alias, "drained") 94 require.NoError(t, err) 95 96 utils.ConfirmReplication(t, tablets[0], []*cluster.Vttablet{tablets[1], tablets[2], tablets[3]}) 97 98 // make tablets[1 lag from the other tablets by setting the delay to a large number 99 utils.RunSQL(context.Background(), t, `stop slave;CHANGE MASTER TO MASTER_DELAY = 1999;start slave;`, tablets[1]) 100 101 // insert another row in tablets[1 102 utils.ConfirmReplication(t, tablets[0], []*cluster.Vttablet{tablets[2], tablets[3]}) 103 104 // assert that there is indeed only 1 row in tablets[1 105 res := utils.RunSQL(context.Background(), t, `select msg from vt_insert_test;`, tablets[1]) 106 assert.Equal(t, 1, len(res.Rows)) 107 108 // Perform a graceful reparent operation 109 utils.Prs(t, clusterInstance, tablets[2]) 110 utils.ValidateTopology(t, clusterInstance, false) 111 utils.CheckPrimaryTablet(t, clusterInstance, tablets[2]) 112 } 113 114 func TestReparentReplicaOffline(t *testing.T) { 115 defer cluster.PanicHandler(t) 116 clusterInstance := utils.SetupReparentCluster(t, "semi_sync") 117 defer utils.TeardownCluster(clusterInstance) 118 tablets := clusterInstance.Keyspaces[0].Shards[0].Vttablets 119 120 // Kill one tablet so we seem offline 121 utils.StopTablet(t, tablets[3], true) 122 123 // Perform a graceful reparent operation. 124 out, err := utils.PrsWithTimeout(t, clusterInstance, tablets[1], false, "", "31s") 125 require.Error(t, err) 126 assert.True(t, utils.SetReplicationSourceFailed(tablets[3], out)) 127 128 utils.CheckPrimaryTablet(t, clusterInstance, tablets[1]) 129 } 130 131 func TestReparentAvoid(t *testing.T) { 132 defer cluster.PanicHandler(t) 133 clusterInstance := utils.SetupReparentCluster(t, "semi_sync") 134 defer utils.TeardownCluster(clusterInstance) 135 tablets := clusterInstance.Keyspaces[0].Shards[0].Vttablets 136 utils.DeleteTablet(t, clusterInstance, tablets[2]) 137 138 // Perform a reparent operation with avoid_tablet pointing to non-primary. It 139 // should succeed without doing anything. 140 _, err := utils.PrsAvoid(t, clusterInstance, tablets[1]) 141 require.NoError(t, err) 142 143 utils.ValidateTopology(t, clusterInstance, false) 144 utils.CheckPrimaryTablet(t, clusterInstance, tablets[0]) 145 146 // Perform a reparent operation with avoid_tablet pointing to primary. 147 _, err = utils.PrsAvoid(t, clusterInstance, tablets[0]) 148 require.NoError(t, err) 149 utils.ValidateTopology(t, clusterInstance, false) 150 151 // tablets[1 is in the same cell and tablets[3] is in a different cell, so we must land on tablets[1 152 utils.CheckPrimaryTablet(t, clusterInstance, tablets[1]) 153 154 // If we kill the tablet in the same cell as primary then reparent --avoid_tablet will fail. 155 utils.StopTablet(t, tablets[0], true) 156 out, err := utils.PrsAvoid(t, clusterInstance, tablets[1]) 157 require.Error(t, err) 158 assert.Contains(t, out, "cannot find a tablet to reparent to in the same cell as the current primary") 159 utils.ValidateTopology(t, clusterInstance, false) 160 utils.CheckPrimaryTablet(t, clusterInstance, tablets[1]) 161 } 162 163 func TestReparentFromOutside(t *testing.T) { 164 defer cluster.PanicHandler(t) 165 clusterInstance := utils.SetupReparentCluster(t, "semi_sync") 166 defer utils.TeardownCluster(clusterInstance) 167 reparentFromOutside(t, clusterInstance, false) 168 } 169 170 func TestReparentFromOutsideWithNoPrimary(t *testing.T) { 171 defer cluster.PanicHandler(t) 172 clusterInstance := utils.SetupReparentCluster(t, "semi_sync") 173 defer utils.TeardownCluster(clusterInstance) 174 tablets := clusterInstance.Keyspaces[0].Shards[0].Vttablets 175 176 reparentFromOutside(t, clusterInstance, true) 177 178 // FIXME: @Deepthi: is this needed, since we teardown the cluster, does this achieve any additional test coverage? 179 // We will have to restart mysql to avoid hanging/locks due to external Reparent 180 for _, tablet := range tablets { 181 log.Infof("Restarting MySql for tablet %v", tablet.Alias) 182 err := tablet.MysqlctlProcess.Stop() 183 require.NoError(t, err) 184 tablet.MysqlctlProcess.InitMysql = false 185 err = tablet.MysqlctlProcess.Start() 186 require.NoError(t, err) 187 } 188 } 189 190 func reparentFromOutside(t *testing.T, clusterInstance *cluster.LocalProcessCluster, downPrimary bool) { 191 //This test will start a primary and 3 replicas. 192 //Then: 193 //- one replica will be the new primary 194 //- one replica will be reparented to that new primary 195 //- one replica will be busted and dead in the water and we'll call TabletExternallyReparented. 196 //Args: 197 //downPrimary: kills the old primary first 198 ctx := context.Background() 199 tablets := clusterInstance.Keyspaces[0].Shards[0].Vttablets 200 201 // now manually reparent 1 out of 2 tablets 202 // tablets[1 will be the new primary 203 // tablets[2 won't be re-parented, so it will be busted 204 205 if !downPrimary { 206 // commands to stop the current primary 207 demoteCommands := "SET GLOBAL read_only = ON; FLUSH TABLES WITH READ LOCK; UNLOCK TABLES" 208 utils.RunSQL(ctx, t, demoteCommands, tablets[0]) 209 210 //Get the position of the old primary and wait for the new one to catch up. 211 err := utils.WaitForReplicationPosition(t, tablets[0], tablets[1]) 212 require.NoError(t, err) 213 } 214 215 // commands to convert a replica to be writable 216 promoteReplicaCommands := "STOP SLAVE; RESET SLAVE ALL; SET GLOBAL read_only = OFF;" 217 utils.RunSQL(ctx, t, promoteReplicaCommands, tablets[1]) 218 219 // Get primary position 220 _, gtID := cluster.GetPrimaryPosition(t, *tablets[1], utils.Hostname) 221 222 // tablets[0] will now be a replica of tablets[1 223 changeReplicationSourceCommands := fmt.Sprintf("RESET MASTER; RESET SLAVE; SET GLOBAL gtid_purged = '%s';"+ 224 "CHANGE MASTER TO MASTER_HOST='%s', MASTER_PORT=%d, MASTER_USER='vt_repl', MASTER_AUTO_POSITION = 1;"+ 225 "START SLAVE;", gtID, utils.Hostname, tablets[1].MySQLPort) 226 utils.RunSQL(ctx, t, changeReplicationSourceCommands, tablets[0]) 227 228 // Capture time when we made tablets[1 writable 229 baseTime := time.Now().UnixNano() / 1000000000 230 231 // tablets[2 will be a replica of tablets[1 232 changeReplicationSourceCommands = fmt.Sprintf("STOP SLAVE; RESET MASTER; SET GLOBAL gtid_purged = '%s';"+ 233 "CHANGE MASTER TO MASTER_HOST='%s', MASTER_PORT=%d, MASTER_USER='vt_repl', MASTER_AUTO_POSITION = 1;"+ 234 "START SLAVE;", gtID, utils.Hostname, tablets[1].MySQLPort) 235 utils.RunSQL(ctx, t, changeReplicationSourceCommands, tablets[2]) 236 237 // To test the downPrimary, we kill the old primary first and delete its tablet record 238 if downPrimary { 239 err := tablets[0].VttabletProcess.TearDownWithTimeout(30 * time.Second) 240 require.NoError(t, err) 241 err = clusterInstance.VtctlclientProcess.ExecuteCommand("DeleteTablet", "--", 242 "--allow_primary", tablets[0].Alias) 243 require.NoError(t, err) 244 } 245 246 // update topology with the new server 247 err := clusterInstance.VtctlclientProcess.ExecuteCommand("TabletExternallyReparented", 248 tablets[1].Alias) 249 require.NoError(t, err) 250 251 utils.CheckReparentFromOutside(t, clusterInstance, tablets[1], downPrimary, baseTime) 252 253 if !downPrimary { 254 err := tablets[0].VttabletProcess.TearDownWithTimeout(30 * time.Second) 255 require.NoError(t, err) 256 } 257 } 258 259 func TestReparentWithDownReplica(t *testing.T) { 260 defer cluster.PanicHandler(t) 261 clusterInstance := utils.SetupReparentCluster(t, "semi_sync") 262 defer utils.TeardownCluster(clusterInstance) 263 tablets := clusterInstance.Keyspaces[0].Shards[0].Vttablets 264 265 ctx := context.Background() 266 267 utils.ConfirmReplication(t, tablets[0], []*cluster.Vttablet{tablets[1], tablets[2], tablets[3]}) 268 269 // Stop replica mysql Process 270 err := tablets[2].MysqlctlProcess.Stop() 271 require.NoError(t, err) 272 273 utils.ConfirmReplication(t, tablets[0], []*cluster.Vttablet{tablets[1], tablets[3]}) 274 275 // Perform a graceful reparent operation. It will fail as one tablet is down. 276 out, err := utils.Prs(t, clusterInstance, tablets[1]) 277 require.Error(t, err) 278 assert.True(t, utils.SetReplicationSourceFailed(tablets[2], out)) 279 280 // insert data into the new primary, check the connected replica work 281 insertVal := utils.ConfirmReplication(t, tablets[1], []*cluster.Vttablet{tablets[0], tablets[3]}) 282 283 // restart mysql on the old replica, should still be connecting to the old primary 284 tablets[2].MysqlctlProcess.InitMysql = false 285 err = tablets[2].MysqlctlProcess.Start() 286 require.NoError(t, err) 287 288 // Use the same PlannedReparentShard command to fix up the tablet. 289 _, err = utils.Prs(t, clusterInstance, tablets[1]) 290 require.NoError(t, err) 291 292 // We have to StartReplication on tablets[2] since the MySQL instance is restarted and does not have replication running 293 // We earlier used to rely on replicationManager to fix this but we have disabled it in our testing environment for latest versions of vttablet and vtctl. 294 err = clusterInstance.VtctlclientProcess.ExecuteCommand("StartReplication", tablets[2].Alias) 295 require.NoError(t, err) 296 297 // wait until it gets the data 298 err = utils.CheckInsertedValues(ctx, t, tablets[2], insertVal) 299 require.NoError(t, err) 300 } 301 302 func TestChangeTypeSemiSync(t *testing.T) { 303 defer cluster.PanicHandler(t) 304 clusterInstance := utils.SetupReparentCluster(t, "semi_sync") 305 defer utils.TeardownCluster(clusterInstance) 306 tablets := clusterInstance.Keyspaces[0].Shards[0].Vttablets 307 308 ctx := context.Background() 309 310 // Create new names for tablets, so this test is less confusing. 311 primary, replica, rdonly1, rdonly2 := tablets[0], tablets[1], tablets[2], tablets[3] 312 313 // Updated rdonly tablet and set tablet type to rdonly 314 err := clusterInstance.VtctlclientProcess.ExecuteCommand("ChangeTabletType", rdonly1.Alias, "rdonly") 315 require.NoError(t, err) 316 err = clusterInstance.VtctlclientProcess.ExecuteCommand("ChangeTabletType", rdonly2.Alias, "rdonly") 317 require.NoError(t, err) 318 319 utils.ValidateTopology(t, clusterInstance, true) 320 321 utils.CheckPrimaryTablet(t, clusterInstance, primary) 322 323 // Stop replication on rdonly1, to make sure when we make it replica it doesn't start again. 324 // Note we do a similar test for replica -> rdonly below. 325 err = clusterInstance.VtctlclientProcess.ExecuteCommand("StopReplication", rdonly1.Alias) 326 require.NoError(t, err) 327 328 // Check semi-sync on replicas. 329 // The flag is only an indication of the value to use next time 330 // we turn replication on, so also check the status. 331 // rdonly1 is not replicating, so its status is off. 332 utils.CheckDBvar(ctx, t, replica, "rpl_semi_sync_slave_enabled", "ON") 333 utils.CheckDBvar(ctx, t, rdonly1, "rpl_semi_sync_slave_enabled", "OFF") 334 utils.CheckDBvar(ctx, t, rdonly2, "rpl_semi_sync_slave_enabled", "OFF") 335 utils.CheckDBstatus(ctx, t, replica, "Rpl_semi_sync_slave_status", "ON") 336 utils.CheckDBstatus(ctx, t, rdonly1, "Rpl_semi_sync_slave_status", "OFF") 337 utils.CheckDBstatus(ctx, t, rdonly2, "Rpl_semi_sync_slave_status", "OFF") 338 339 // Change replica to rdonly while replicating, should turn off semi-sync, and restart replication. 340 err = clusterInstance.VtctlclientProcess.ExecuteCommand("ChangeTabletType", replica.Alias, "rdonly") 341 require.NoError(t, err) 342 utils.CheckDBvar(ctx, t, replica, "rpl_semi_sync_slave_enabled", "OFF") 343 utils.CheckDBstatus(ctx, t, replica, "Rpl_semi_sync_slave_status", "OFF") 344 345 // Change rdonly1 to replica, should turn on semi-sync, and not start replication. 346 err = clusterInstance.VtctlclientProcess.ExecuteCommand("ChangeTabletType", rdonly1.Alias, "replica") 347 require.NoError(t, err) 348 utils.CheckDBvar(ctx, t, rdonly1, "rpl_semi_sync_slave_enabled", "ON") 349 utils.CheckDBstatus(ctx, t, rdonly1, "Rpl_semi_sync_slave_status", "OFF") 350 utils.CheckReplicaStatus(ctx, t, rdonly1) 351 352 // Now change from replica back to rdonly, make sure replication is still not enabled. 353 err = clusterInstance.VtctlclientProcess.ExecuteCommand("ChangeTabletType", rdonly1.Alias, "rdonly") 354 require.NoError(t, err) 355 utils.CheckDBvar(ctx, t, rdonly1, "rpl_semi_sync_slave_enabled", "OFF") 356 utils.CheckDBstatus(ctx, t, rdonly1, "Rpl_semi_sync_slave_status", "OFF") 357 utils.CheckReplicaStatus(ctx, t, rdonly1) 358 359 // Change rdonly2 to replica, should turn on semi-sync, and restart replication. 360 err = clusterInstance.VtctlclientProcess.ExecuteCommand("ChangeTabletType", rdonly2.Alias, "replica") 361 require.NoError(t, err) 362 utils.CheckDBvar(ctx, t, rdonly2, "rpl_semi_sync_slave_enabled", "ON") 363 utils.CheckDBstatus(ctx, t, rdonly2, "Rpl_semi_sync_slave_status", "ON") 364 } 365 366 // TestCrossCellDurability tests 2 things - 367 // 1. When PRS is run with the cross_cell durability policy setup, then the semi-sync settings on all the tablets are as expected 368 // 2. Bringing up a new vttablet should have its replication and semi-sync setup correctly without any manual intervention 369 func TestCrossCellDurability(t *testing.T) { 370 defer cluster.PanicHandler(t) 371 clusterInstance := utils.SetupReparentCluster(t, "cross_cell") 372 defer utils.TeardownCluster(clusterInstance) 373 tablets := clusterInstance.Keyspaces[0].Shards[0].Vttablets 374 375 utils.ConfirmReplication(t, tablets[0], []*cluster.Vttablet{tablets[1], tablets[2], tablets[3]}) 376 377 // When tablets[0] is the primary, the only tablet in a different cell is tablets[3]. 378 // So the other two should have semi-sync turned off 379 utils.CheckSemiSyncSetupCorrectly(t, tablets[0], "ON") 380 utils.CheckSemiSyncSetupCorrectly(t, tablets[3], "ON") 381 utils.CheckSemiSyncSetupCorrectly(t, tablets[1], "OFF") 382 utils.CheckSemiSyncSetupCorrectly(t, tablets[2], "OFF") 383 384 // Run forced reparent operation, this should proceed unimpeded. 385 out, err := utils.Prs(t, clusterInstance, tablets[3]) 386 require.NoError(t, err, out) 387 388 utils.ConfirmReplication(t, tablets[3], []*cluster.Vttablet{tablets[0], tablets[1], tablets[2]}) 389 390 // All the tablets will have semi-sync setup since tablets[3] is in Cell2 and all 391 // others are in Cell1, so all of them are eligible to send semi-sync ACKs 392 for _, tablet := range tablets { 393 utils.CheckSemiSyncSetupCorrectly(t, tablet, "ON") 394 } 395 396 for i, supportsBackup := range []bool{false, true} { 397 // Bring up a new replica tablet 398 // In this new tablet, we do not disable active reparents, otherwise replication will not be started. 399 newReplica := utils.StartNewVTTablet(t, clusterInstance, 300+i, supportsBackup) 400 // Add the tablet to the list of tablets in this shard 401 clusterInstance.Keyspaces[0].Shards[0].Vttablets = append(clusterInstance.Keyspaces[0].Shards[0].Vttablets, newReplica) 402 // Check that we can replicate to it and semi-sync is setup correctly on it 403 utils.ConfirmReplication(t, tablets[3], []*cluster.Vttablet{tablets[0], tablets[1], tablets[2], newReplica}) 404 utils.CheckSemiSyncSetupCorrectly(t, newReplica, "ON") 405 } 406 } 407 408 // TestFullStatus tests that the RPC FullStatus works as intended. 409 func TestFullStatus(t *testing.T) { 410 defer cluster.PanicHandler(t) 411 clusterInstance := utils.SetupReparentCluster(t, "semi_sync") 412 defer utils.TeardownCluster(clusterInstance) 413 tablets := clusterInstance.Keyspaces[0].Shards[0].Vttablets 414 utils.ConfirmReplication(t, tablets[0], []*cluster.Vttablet{tablets[1], tablets[2], tablets[3]}) 415 416 // Check that full status gives the correct result for a primary tablet 417 primaryTablet := tablets[0] 418 primaryStatusString, err := clusterInstance.VtctldClientProcess.ExecuteCommandWithOutput("GetFullStatus", primaryTablet.Alias) 419 require.NoError(t, err) 420 primaryStatus := &replicationdatapb.FullStatus{} 421 opt := protojson.UnmarshalOptions{DiscardUnknown: true} 422 err = opt.Unmarshal([]byte(primaryStatusString), primaryStatus) 423 require.NoError(t, err) 424 assert.NotEmpty(t, primaryStatus.ServerUuid) 425 assert.NotEmpty(t, primaryStatus.ServerId) 426 // For a primary tablet there is no replication status 427 assert.Nil(t, primaryStatus.ReplicationStatus) 428 assert.Contains(t, primaryStatus.PrimaryStatus.String(), "vt-0000000101-bin") 429 assert.Equal(t, primaryStatus.GtidPurged, "MySQL56/") 430 assert.False(t, primaryStatus.ReadOnly) 431 assert.True(t, primaryStatus.SemiSyncPrimaryEnabled) 432 assert.True(t, primaryStatus.SemiSyncReplicaEnabled) 433 assert.True(t, primaryStatus.SemiSyncPrimaryStatus) 434 assert.False(t, primaryStatus.SemiSyncReplicaStatus) 435 assert.EqualValues(t, 3, primaryStatus.SemiSyncPrimaryClients) 436 assert.EqualValues(t, 1000000000000000000, primaryStatus.SemiSyncPrimaryTimeout) 437 assert.EqualValues(t, 1, primaryStatus.SemiSyncWaitForReplicaCount) 438 assert.Equal(t, "ROW", primaryStatus.BinlogFormat) 439 assert.Equal(t, "FULL", primaryStatus.BinlogRowImage) 440 assert.Equal(t, "ON", primaryStatus.GtidMode) 441 assert.True(t, primaryStatus.LogReplicaUpdates) 442 assert.True(t, primaryStatus.LogBinEnabled) 443 assert.Regexp(t, `[58]\.[07].*`, primaryStatus.Version) 444 assert.NotEmpty(t, primaryStatus.VersionComment) 445 446 replicaTablet := tablets[1] 447 448 waitForFilePosition(t, clusterInstance, primaryTablet, replicaTablet, 5*time.Second) 449 450 // Check that full status gives the correct result for a replica tablet 451 replicaStatusString, err := clusterInstance.VtctldClientProcess.ExecuteCommandWithOutput("GetFullStatus", replicaTablet.Alias) 452 require.NoError(t, err) 453 replicaStatus := &replicationdatapb.FullStatus{} 454 opt = protojson.UnmarshalOptions{DiscardUnknown: true} 455 err = opt.Unmarshal([]byte(replicaStatusString), replicaStatus) 456 require.NoError(t, err) 457 assert.NotEmpty(t, replicaStatus.ServerUuid) 458 assert.NotEmpty(t, replicaStatus.ServerId) 459 assert.Contains(t, replicaStatus.ReplicationStatus.Position, "MySQL56/"+replicaStatus.ReplicationStatus.SourceUuid) 460 assert.EqualValues(t, mysql.ReplicationStateRunning, replicaStatus.ReplicationStatus.IoState) 461 assert.EqualValues(t, mysql.ReplicationStateRunning, replicaStatus.ReplicationStatus.SqlState) 462 assert.Equal(t, fileNameFromPosition(replicaStatus.ReplicationStatus.FilePosition), fileNameFromPosition(primaryStatus.PrimaryStatus.FilePosition)) 463 assert.LessOrEqual(t, rowNumberFromPosition(replicaStatus.ReplicationStatus.FilePosition), rowNumberFromPosition(primaryStatus.PrimaryStatus.FilePosition)) 464 assert.Equal(t, replicaStatus.ReplicationStatus.RelayLogSourceBinlogEquivalentPosition, primaryStatus.PrimaryStatus.FilePosition) 465 assert.Contains(t, replicaStatus.ReplicationStatus.RelayLogFilePosition, "vt-0000000102-relay") 466 assert.Equal(t, replicaStatus.ReplicationStatus.Position, primaryStatus.PrimaryStatus.Position) 467 assert.Equal(t, replicaStatus.ReplicationStatus.RelayLogPosition, primaryStatus.PrimaryStatus.Position) 468 assert.Empty(t, replicaStatus.ReplicationStatus.LastIoError) 469 assert.Empty(t, replicaStatus.ReplicationStatus.LastSqlError) 470 assert.Equal(t, replicaStatus.ReplicationStatus.SourceUuid, primaryStatus.ServerUuid) 471 assert.LessOrEqual(t, int(replicaStatus.ReplicationStatus.ReplicationLagSeconds), 1) 472 assert.False(t, replicaStatus.ReplicationStatus.ReplicationLagUnknown) 473 assert.EqualValues(t, 0, replicaStatus.ReplicationStatus.SqlDelay) 474 assert.False(t, replicaStatus.ReplicationStatus.SslAllowed) 475 assert.False(t, replicaStatus.ReplicationStatus.HasReplicationFilters) 476 assert.False(t, replicaStatus.ReplicationStatus.UsingGtid) 477 assert.True(t, replicaStatus.ReplicationStatus.AutoPosition) 478 assert.Equal(t, replicaStatus.ReplicationStatus.SourceHost, utils.Hostname) 479 assert.EqualValues(t, replicaStatus.ReplicationStatus.SourcePort, tablets[0].MySQLPort) 480 assert.Equal(t, replicaStatus.ReplicationStatus.SourceUser, "vt_repl") 481 assert.Contains(t, replicaStatus.PrimaryStatus.String(), "vt-0000000102-bin") 482 assert.Equal(t, replicaStatus.GtidPurged, "MySQL56/") 483 assert.True(t, replicaStatus.ReadOnly) 484 assert.False(t, replicaStatus.SemiSyncPrimaryEnabled) 485 assert.True(t, replicaStatus.SemiSyncReplicaEnabled) 486 assert.False(t, replicaStatus.SemiSyncPrimaryStatus) 487 assert.True(t, replicaStatus.SemiSyncReplicaStatus) 488 assert.EqualValues(t, 0, replicaStatus.SemiSyncPrimaryClients) 489 assert.EqualValues(t, 1000000000000000000, replicaStatus.SemiSyncPrimaryTimeout) 490 assert.EqualValues(t, 1, replicaStatus.SemiSyncWaitForReplicaCount) 491 assert.Equal(t, "ROW", replicaStatus.BinlogFormat) 492 assert.Equal(t, "FULL", replicaStatus.BinlogRowImage) 493 assert.Equal(t, "ON", replicaStatus.GtidMode) 494 assert.True(t, replicaStatus.LogReplicaUpdates) 495 assert.True(t, replicaStatus.LogBinEnabled) 496 assert.Regexp(t, `[58]\.[07].*`, replicaStatus.Version) 497 assert.NotEmpty(t, replicaStatus.VersionComment) 498 } 499 500 func getFullStatus(t *testing.T, clusterInstance *cluster.LocalProcessCluster, tablet *cluster.Vttablet) *replicationdatapb.FullStatus { 501 statusString, err := clusterInstance.VtctldClientProcess.ExecuteCommandWithOutput("GetFullStatus", tablet.Alias) 502 require.NoError(t, err) 503 status := &replicationdatapb.FullStatus{} 504 opt := protojson.UnmarshalOptions{DiscardUnknown: true} 505 err = opt.Unmarshal([]byte(statusString), status) 506 require.NoError(t, err) 507 return status 508 } 509 510 // waitForFilePosition waits for timeout to see if FilePositions align b/w primary and replica, to fix flakiness in tests due to race conditions where replica is still catching up 511 func waitForFilePosition(t *testing.T, clusterInstance *cluster.LocalProcessCluster, primary *cluster.Vttablet, replica *cluster.Vttablet, timeout time.Duration) { 512 start := time.Now() 513 for { 514 primaryStatus := getFullStatus(t, clusterInstance, primary) 515 replicaStatus := getFullStatus(t, clusterInstance, replica) 516 if primaryStatus.PrimaryStatus.FilePosition == replicaStatus.ReplicationStatus.FilePosition { 517 return 518 } 519 if d := time.Since(start); d > timeout { 520 require.FailNowf(t, "waitForFilePosition timed out, primary %s, replica %s", 521 primaryStatus.PrimaryStatus.FilePosition, replicaStatus.ReplicationStatus.FilePosition) 522 } 523 time.Sleep(100 * time.Millisecond) 524 } 525 } 526 527 // fileNameFromPosition gets the file name from the position 528 func fileNameFromPosition(pos string) string { 529 return pos[0 : len(pos)-4] 530 } 531 532 // rowNumberFromPosition gets the row number from the position 533 func rowNumberFromPosition(pos string) int { 534 rowNumStr := pos[len(pos)-4:] 535 rowNum, _ := strconv.Atoi(rowNumStr) 536 return rowNum 537 }