vitess.io/vitess@v0.16.2/go/vt/wrangler/testlib/planned_reparent_shard_test.go (about) 1 /* 2 Copyright 2019 The Vitess Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package testlib 18 19 import ( 20 "context" 21 "errors" 22 "testing" 23 "time" 24 25 "vitess.io/vitess/go/vt/mysqlctl" 26 27 "github.com/stretchr/testify/assert" 28 "github.com/stretchr/testify/require" 29 30 "vitess.io/vitess/go/mysql" 31 "vitess.io/vitess/go/vt/discovery" 32 "vitess.io/vitess/go/vt/logutil" 33 "vitess.io/vitess/go/vt/topo/memorytopo" 34 "vitess.io/vitess/go/vt/topo/topoproto" 35 "vitess.io/vitess/go/vt/vtctl/reparentutil/reparenttestutil" 36 "vitess.io/vitess/go/vt/vttablet/tabletservermock" 37 "vitess.io/vitess/go/vt/vttablet/tmclient" 38 "vitess.io/vitess/go/vt/wrangler" 39 40 topodatapb "vitess.io/vitess/go/vt/proto/topodata" 41 ) 42 43 func TestPlannedReparentShardNoPrimaryProvided(t *testing.T) { 44 delay := discovery.GetTabletPickerRetryDelay() 45 defer func() { 46 discovery.SetTabletPickerRetryDelay(delay) 47 }() 48 discovery.SetTabletPickerRetryDelay(5 * time.Millisecond) 49 50 ts := memorytopo.NewServer("cell1", "cell2") 51 wr := wrangler.New(logutil.NewConsoleLogger(), ts, tmclient.NewTabletManagerClient()) 52 vp := NewVtctlPipe(t, ts) 53 defer vp.Close() 54 55 // Create a primary, a couple good replicas 56 oldPrimary := NewFakeTablet(t, wr, "cell1", 0, topodatapb.TabletType_PRIMARY, nil) 57 newPrimary := NewFakeTablet(t, wr, "cell1", 1, topodatapb.TabletType_REPLICA, nil) 58 goodReplica1 := NewFakeTablet(t, wr, "cell2", 2, topodatapb.TabletType_REPLICA, nil) 59 reparenttestutil.SetKeyspaceDurability(context.Background(), t, ts, "test_keyspace", "semi_sync") 60 61 // new primary 62 newPrimary.FakeMysqlDaemon.ReadOnly = true 63 newPrimary.FakeMysqlDaemon.Replicating = true 64 newPrimary.FakeMysqlDaemon.WaitPrimaryPositions = []mysql.Position{{ 65 GTIDSet: mysql.MariadbGTIDSet{ 66 7: mysql.MariadbGTID{ 67 Domain: 7, 68 Server: 123, 69 Sequence: 990, 70 }, 71 }, 72 }} 73 newPrimary.FakeMysqlDaemon.PromoteResult = mysql.Position{ 74 GTIDSet: mysql.MariadbGTIDSet{ 75 7: mysql.MariadbGTID{ 76 Domain: 7, 77 Server: 456, 78 Sequence: 991, 79 }, 80 }, 81 } 82 newPrimary.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ 83 "STOP SLAVE", 84 "RESET SLAVE ALL", 85 "FAKE SET MASTER", 86 "START SLAVE", 87 "SUBINSERT INTO _vt.reparent_journal (time_created_ns, action_name, primary_alias, replication_position) VALUES", 88 } 89 newPrimary.StartActionLoop(t, wr) 90 defer newPrimary.StopActionLoop(t) 91 92 // old primary 93 oldPrimary.FakeMysqlDaemon.ReadOnly = false 94 oldPrimary.FakeMysqlDaemon.Replicating = false 95 oldPrimary.FakeMysqlDaemon.ReplicationStatusError = mysql.ErrNotReplica 96 oldPrimary.FakeMysqlDaemon.CurrentPrimaryPosition = newPrimary.FakeMysqlDaemon.WaitPrimaryPositions[0] 97 oldPrimary.FakeMysqlDaemon.SetReplicationSourceInputs = append(oldPrimary.FakeMysqlDaemon.SetReplicationSourceInputs, topoproto.MysqlAddr(newPrimary.Tablet)) 98 oldPrimary.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ 99 "RESET SLAVE ALL", 100 "FAKE SET MASTER", 101 "START SLAVE", 102 // we end up calling SetReplicationSource twice on the old primary 103 "RESET SLAVE ALL", 104 "FAKE SET MASTER", 105 "START SLAVE", 106 } 107 oldPrimary.StartActionLoop(t, wr) 108 defer oldPrimary.StopActionLoop(t) 109 oldPrimary.TM.QueryServiceControl.(*tabletservermock.Controller).SetQueryServiceEnabledForTests(true) 110 111 // SetReplicationSource is called on new primary to make sure it's replicating before reparenting. 112 newPrimary.FakeMysqlDaemon.SetReplicationSourceInputs = append(newPrimary.FakeMysqlDaemon.SetReplicationSourceInputs, topoproto.MysqlAddr(oldPrimary.Tablet)) 113 114 // good replica 1 is replicating 115 goodReplica1.FakeMysqlDaemon.ReadOnly = true 116 goodReplica1.FakeMysqlDaemon.Replicating = true 117 goodReplica1.FakeMysqlDaemon.SetReplicationSourceInputs = append(goodReplica1.FakeMysqlDaemon.SetReplicationSourceInputs, topoproto.MysqlAddr(newPrimary.Tablet), topoproto.MysqlAddr(oldPrimary.Tablet)) 118 goodReplica1.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ 119 // These 4 statements come from tablet startup 120 "STOP SLAVE", 121 "RESET SLAVE ALL", 122 "FAKE SET MASTER", 123 "START SLAVE", 124 "STOP SLAVE", 125 "RESET SLAVE ALL", 126 "FAKE SET MASTER", 127 "START SLAVE", 128 } 129 goodReplica1.StartActionLoop(t, wr) 130 defer goodReplica1.StopActionLoop(t) 131 132 // run PlannedReparentShard 133 err := vp.Run([]string{"PlannedReparentShard", "--wait_replicas_timeout", "10s", "--keyspace_shard", newPrimary.Tablet.Keyspace + "/" + newPrimary.Tablet.Shard}) 134 require.NoError(t, err) 135 136 // check what was run 137 err = newPrimary.FakeMysqlDaemon.CheckSuperQueryList() 138 require.NoError(t, err) 139 140 err = oldPrimary.FakeMysqlDaemon.CheckSuperQueryList() 141 require.NoError(t, err) 142 143 err = goodReplica1.FakeMysqlDaemon.CheckSuperQueryList() 144 require.NoError(t, err) 145 146 assert.False(t, newPrimary.FakeMysqlDaemon.ReadOnly, "newPrimary.FakeMysqlDaemon.ReadOnly is set") 147 assert.True(t, oldPrimary.FakeMysqlDaemon.ReadOnly, "oldPrimary.FakeMysqlDaemon.ReadOnly not set") 148 assert.True(t, goodReplica1.FakeMysqlDaemon.ReadOnly, "goodReplica1.FakeMysqlDaemon.ReadOnly not set") 149 assert.True(t, oldPrimary.TM.QueryServiceControl.IsServing(), "oldPrimary...QueryServiceControl not serving") 150 151 // verify the old primary was told to start replicating (and not 152 // the replica that wasn't replicating in the first place) 153 assert.True(t, oldPrimary.FakeMysqlDaemon.Replicating, "oldPrimary.FakeMysqlDaemon.Replicating not set") 154 assert.True(t, goodReplica1.FakeMysqlDaemon.Replicating, "goodReplica1.FakeMysqlDaemon.Replicating not set") 155 checkSemiSyncEnabled(t, true, true, newPrimary) 156 checkSemiSyncEnabled(t, false, true, goodReplica1, oldPrimary) 157 } 158 159 func TestPlannedReparentShardNoError(t *testing.T) { 160 delay := discovery.GetTabletPickerRetryDelay() 161 defer func() { 162 discovery.SetTabletPickerRetryDelay(delay) 163 }() 164 discovery.SetTabletPickerRetryDelay(5 * time.Millisecond) 165 166 ts := memorytopo.NewServer("cell1", "cell2") 167 wr := wrangler.New(logutil.NewConsoleLogger(), ts, tmclient.NewTabletManagerClient()) 168 vp := NewVtctlPipe(t, ts) 169 defer vp.Close() 170 171 // Create a primary, a couple good replicas 172 oldPrimary := NewFakeTablet(t, wr, "cell1", 0, topodatapb.TabletType_PRIMARY, nil) 173 newPrimary := NewFakeTablet(t, wr, "cell1", 1, topodatapb.TabletType_REPLICA, nil) 174 goodReplica1 := NewFakeTablet(t, wr, "cell1", 2, topodatapb.TabletType_REPLICA, nil) 175 goodReplica2 := NewFakeTablet(t, wr, "cell2", 3, topodatapb.TabletType_REPLICA, nil) 176 reparenttestutil.SetKeyspaceDurability(context.Background(), t, ts, "test_keyspace", "semi_sync") 177 178 // new primary 179 newPrimary.FakeMysqlDaemon.ReadOnly = true 180 newPrimary.FakeMysqlDaemon.Replicating = true 181 newPrimary.FakeMysqlDaemon.WaitPrimaryPositions = []mysql.Position{{ 182 GTIDSet: mysql.MariadbGTIDSet{ 183 7: mysql.MariadbGTID{ 184 Domain: 7, 185 Server: 123, 186 Sequence: 990, 187 }, 188 }, 189 }} 190 newPrimary.FakeMysqlDaemon.PromoteResult = mysql.Position{ 191 GTIDSet: mysql.MariadbGTIDSet{ 192 7: mysql.MariadbGTID{ 193 Domain: 7, 194 Server: 456, 195 Sequence: 991, 196 }, 197 }, 198 } 199 newPrimary.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ 200 "STOP SLAVE", 201 "RESET SLAVE ALL", 202 "FAKE SET MASTER", 203 "START SLAVE", 204 "SUBINSERT INTO _vt.reparent_journal (time_created_ns, action_name, primary_alias, replication_position) VALUES", 205 } 206 newPrimary.StartActionLoop(t, wr) 207 defer newPrimary.StopActionLoop(t) 208 209 // old primary 210 oldPrimary.FakeMysqlDaemon.ReadOnly = false 211 oldPrimary.FakeMysqlDaemon.Replicating = false 212 oldPrimary.FakeMysqlDaemon.ReplicationStatusError = mysql.ErrNotReplica 213 oldPrimary.FakeMysqlDaemon.CurrentPrimaryPosition = newPrimary.FakeMysqlDaemon.WaitPrimaryPositions[0] 214 oldPrimary.FakeMysqlDaemon.SetReplicationSourceInputs = append(oldPrimary.FakeMysqlDaemon.SetReplicationSourceInputs, topoproto.MysqlAddr(newPrimary.Tablet)) 215 oldPrimary.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ 216 "RESET SLAVE ALL", 217 "FAKE SET MASTER", 218 "START SLAVE", 219 // we end up calling SetReplicationSource twice on the old primary 220 "RESET SLAVE ALL", 221 "FAKE SET MASTER", 222 "START SLAVE", 223 } 224 oldPrimary.StartActionLoop(t, wr) 225 defer oldPrimary.StopActionLoop(t) 226 oldPrimary.TM.QueryServiceControl.(*tabletservermock.Controller).SetQueryServiceEnabledForTests(true) 227 228 // SetReplicationSource is called on new primary to make sure it's replicating before reparenting. 229 newPrimary.FakeMysqlDaemon.SetReplicationSourceInputs = append(newPrimary.FakeMysqlDaemon.SetReplicationSourceInputs, topoproto.MysqlAddr(oldPrimary.Tablet)) 230 231 // goodReplica1 is replicating 232 goodReplica1.FakeMysqlDaemon.ReadOnly = true 233 goodReplica1.FakeMysqlDaemon.Replicating = true 234 goodReplica1.FakeMysqlDaemon.SetReplicationSourceInputs = append(goodReplica1.FakeMysqlDaemon.SetReplicationSourceInputs, topoproto.MysqlAddr(newPrimary.Tablet), topoproto.MysqlAddr(oldPrimary.Tablet)) 235 goodReplica1.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ 236 // These 4 statements come from tablet startup 237 "STOP SLAVE", 238 "RESET SLAVE ALL", 239 "FAKE SET MASTER", 240 "START SLAVE", 241 "STOP SLAVE", 242 "RESET SLAVE ALL", 243 "FAKE SET MASTER", 244 "START SLAVE", 245 } 246 goodReplica1.StartActionLoop(t, wr) 247 defer goodReplica1.StopActionLoop(t) 248 249 // goodReplica2 is not replicating 250 goodReplica2.FakeMysqlDaemon.ReadOnly = true 251 goodReplica2.FakeMysqlDaemon.SetReplicationSourceInputs = append(goodReplica2.FakeMysqlDaemon.SetReplicationSourceInputs, topoproto.MysqlAddr(newPrimary.Tablet), topoproto.MysqlAddr(oldPrimary.Tablet)) 252 goodReplica2.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ 253 // These 4 statements come from tablet startup 254 "STOP SLAVE", 255 "RESET SLAVE ALL", 256 "FAKE SET MASTER", 257 "START SLAVE", 258 "RESET SLAVE ALL", 259 "FAKE SET MASTER", 260 } 261 goodReplica2.StartActionLoop(t, wr) 262 goodReplica2.FakeMysqlDaemon.Replicating = false 263 defer goodReplica2.StopActionLoop(t) 264 265 // run PlannedReparentShard 266 err := vp.Run([]string{"PlannedReparentShard", "--wait_replicas_timeout", "10s", "--keyspace_shard", newPrimary.Tablet.Keyspace + "/" + newPrimary.Tablet.Shard, "--new_primary", 267 topoproto.TabletAliasString(newPrimary.Tablet.Alias)}) 268 require.NoError(t, err) 269 270 // check what was run 271 err = newPrimary.FakeMysqlDaemon.CheckSuperQueryList() 272 require.NoError(t, err) 273 err = oldPrimary.FakeMysqlDaemon.CheckSuperQueryList() 274 require.NoError(t, err) 275 err = goodReplica1.FakeMysqlDaemon.CheckSuperQueryList() 276 require.NoError(t, err) 277 err = goodReplica2.FakeMysqlDaemon.CheckSuperQueryList() 278 require.NoError(t, err) 279 280 assert.False(t, newPrimary.FakeMysqlDaemon.ReadOnly, "newPrimary.FakeMysqlDaemon.ReadOnly set") 281 assert.True(t, oldPrimary.FakeMysqlDaemon.ReadOnly, "oldPrimary.FakeMysqlDaemon.ReadOnly not set") 282 assert.True(t, goodReplica1.FakeMysqlDaemon.ReadOnly, "goodReplica1.FakeMysqlDaemon.ReadOnly not set") 283 284 assert.True(t, goodReplica2.FakeMysqlDaemon.ReadOnly, "goodReplica2.FakeMysqlDaemon.ReadOnly not set") 285 assert.True(t, oldPrimary.TM.QueryServiceControl.IsServing(), "oldPrimary...QueryServiceControl not serving") 286 287 // verify the old primary was told to start replicating (and not 288 // the replica that wasn't replicating in the first place) 289 assert.True(t, oldPrimary.FakeMysqlDaemon.Replicating, "oldPrimary.FakeMysqlDaemon.Replicating not set") 290 assert.True(t, goodReplica1.FakeMysqlDaemon.Replicating, "goodReplica1.FakeMysqlDaemon.Replicating not set") 291 assert.False(t, goodReplica2.FakeMysqlDaemon.Replicating, "goodReplica2.FakeMysqlDaemon.Replicating set") 292 293 checkSemiSyncEnabled(t, true, true, newPrimary) 294 checkSemiSyncEnabled(t, false, true, goodReplica1, goodReplica2, oldPrimary) 295 } 296 297 func TestPlannedReparentInitialization(t *testing.T) { 298 delay := discovery.GetTabletPickerRetryDelay() 299 defer func() { 300 discovery.SetTabletPickerRetryDelay(delay) 301 }() 302 discovery.SetTabletPickerRetryDelay(5 * time.Millisecond) 303 304 ts := memorytopo.NewServer("cell1", "cell2") 305 wr := wrangler.New(logutil.NewConsoleLogger(), ts, tmclient.NewTabletManagerClient()) 306 vp := NewVtctlPipe(t, ts) 307 defer vp.Close() 308 309 // Create a few replicas. 310 newPrimary := NewFakeTablet(t, wr, "cell1", 1, topodatapb.TabletType_REPLICA, nil) 311 goodReplica1 := NewFakeTablet(t, wr, "cell1", 2, topodatapb.TabletType_REPLICA, nil) 312 goodReplica2 := NewFakeTablet(t, wr, "cell2", 3, topodatapb.TabletType_REPLICA, nil) 313 reparenttestutil.SetKeyspaceDurability(context.Background(), t, ts, "test_keyspace", "semi_sync") 314 315 // new primary 316 newPrimary.FakeMysqlDaemon.ReadOnly = true 317 newPrimary.FakeMysqlDaemon.Replicating = true 318 newPrimary.FakeMysqlDaemon.PromoteResult = mysql.Position{ 319 GTIDSet: mysql.MariadbGTIDSet{ 320 7: mysql.MariadbGTID{ 321 Domain: 7, 322 Server: 456, 323 Sequence: 991, 324 }, 325 }, 326 } 327 newPrimary.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ 328 mysqlctl.GenerateInitialBinlogEntry(), 329 "SUBINSERT INTO _vt.reparent_journal (time_created_ns, action_name, primary_alias, replication_position) VALUES", 330 } 331 newPrimary.StartActionLoop(t, wr) 332 defer newPrimary.StopActionLoop(t) 333 334 // goodReplica1 is replicating 335 goodReplica1.FakeMysqlDaemon.ReadOnly = true 336 goodReplica1.FakeMysqlDaemon.Replicating = true 337 goodReplica1.FakeMysqlDaemon.SetReplicationSourceInputs = append(goodReplica1.FakeMysqlDaemon.SetReplicationSourceInputs, topoproto.MysqlAddr(newPrimary.Tablet)) 338 goodReplica1.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ 339 "STOP SLAVE", 340 "RESET SLAVE ALL", 341 "FAKE SET MASTER", 342 "START SLAVE", 343 } 344 goodReplica1.StartActionLoop(t, wr) 345 defer goodReplica1.StopActionLoop(t) 346 347 // goodReplica2 is not replicating 348 goodReplica2.FakeMysqlDaemon.ReadOnly = true 349 goodReplica2.FakeMysqlDaemon.Replicating = false 350 goodReplica2.FakeMysqlDaemon.SetReplicationSourceInputs = append(goodReplica2.FakeMysqlDaemon.SetReplicationSourceInputs, topoproto.MysqlAddr(newPrimary.Tablet)) 351 goodReplica2.StartActionLoop(t, wr) 352 goodReplica2.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ 353 "RESET SLAVE ALL", 354 "FAKE SET MASTER", 355 } 356 defer goodReplica2.StopActionLoop(t) 357 358 // run PlannedReparentShard 359 err := vp.Run([]string{"PlannedReparentShard", "--wait_replicas_timeout", "10s", "--keyspace_shard", newPrimary.Tablet.Keyspace + "/" + newPrimary.Tablet.Shard, "--new_primary", topoproto.TabletAliasString(newPrimary.Tablet.Alias)}) 360 require.NoError(t, err) 361 362 // check what was run 363 err = newPrimary.FakeMysqlDaemon.CheckSuperQueryList() 364 require.NoError(t, err) 365 err = goodReplica1.FakeMysqlDaemon.CheckSuperQueryList() 366 require.NoError(t, err) 367 err = goodReplica2.FakeMysqlDaemon.CheckSuperQueryList() 368 require.NoError(t, err) 369 370 assert.False(t, newPrimary.FakeMysqlDaemon.ReadOnly, "newPrimary.FakeMysqlDaemon.ReadOnly set") 371 assert.True(t, goodReplica1.FakeMysqlDaemon.ReadOnly, "goodReplica1.FakeMysqlDaemon.ReadOnly not set") 372 assert.True(t, goodReplica2.FakeMysqlDaemon.ReadOnly, "goodReplica2.FakeMysqlDaemon.ReadOnly not set") 373 374 assert.True(t, goodReplica1.FakeMysqlDaemon.Replicating, "goodReplica1.FakeMysqlDaemon.Replicating not set") 375 assert.False(t, goodReplica2.FakeMysqlDaemon.Replicating, "goodReplica2.FakeMysqlDaemon.Replicating set") 376 377 checkSemiSyncEnabled(t, true, true, newPrimary) 378 checkSemiSyncEnabled(t, false, true, goodReplica1, goodReplica2) 379 } 380 381 // TestPlannedReparentShardWaitForPositionFail simulates a failure of the WaitForPosition call 382 // on the desired new primary tablet 383 func TestPlannedReparentShardWaitForPositionFail(t *testing.T) { 384 delay := discovery.GetTabletPickerRetryDelay() 385 defer func() { 386 discovery.SetTabletPickerRetryDelay(delay) 387 }() 388 discovery.SetTabletPickerRetryDelay(5 * time.Millisecond) 389 390 ts := memorytopo.NewServer("cell1", "cell2") 391 wr := wrangler.New(logutil.NewConsoleLogger(), ts, tmclient.NewTabletManagerClient()) 392 vp := NewVtctlPipe(t, ts) 393 defer vp.Close() 394 395 // Create a primary, a couple good replicas 396 oldPrimary := NewFakeTablet(t, wr, "cell1", 0, topodatapb.TabletType_PRIMARY, nil) 397 newPrimary := NewFakeTablet(t, wr, "cell1", 1, topodatapb.TabletType_REPLICA, nil) 398 goodReplica1 := NewFakeTablet(t, wr, "cell1", 2, topodatapb.TabletType_REPLICA, nil) 399 goodReplica2 := NewFakeTablet(t, wr, "cell2", 3, topodatapb.TabletType_REPLICA, nil) 400 401 // new primary 402 newPrimary.FakeMysqlDaemon.ReadOnly = true 403 newPrimary.FakeMysqlDaemon.Replicating = true 404 newPrimary.FakeMysqlDaemon.WaitPrimaryPositions = []mysql.Position{{ 405 GTIDSet: mysql.MariadbGTIDSet{ 406 7: mysql.MariadbGTID{ 407 Domain: 7, 408 Server: 123, 409 Sequence: 990, 410 }, 411 }, 412 }} 413 newPrimary.FakeMysqlDaemon.PromoteResult = mysql.Position{ 414 GTIDSet: mysql.MariadbGTIDSet{ 415 7: mysql.MariadbGTID{ 416 Domain: 7, 417 Server: 456, 418 Sequence: 991, 419 }, 420 }, 421 } 422 newPrimary.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ 423 "STOP SLAVE", 424 "RESET SLAVE ALL", 425 "FAKE SET MASTER", 426 "START SLAVE", 427 "SUBINSERT INTO _vt.reparent_journal (time_created_ns, action_name, primary_alias, replication_position) VALUES", 428 } 429 newPrimary.StartActionLoop(t, wr) 430 defer newPrimary.StopActionLoop(t) 431 432 // old primary 433 oldPrimary.FakeMysqlDaemon.ReadOnly = false 434 oldPrimary.FakeMysqlDaemon.Replicating = false 435 // set to incorrect value to make promote fail on WaitForReplicationPos 436 oldPrimary.FakeMysqlDaemon.CurrentPrimaryPosition = newPrimary.FakeMysqlDaemon.PromoteResult 437 oldPrimary.FakeMysqlDaemon.SetReplicationSourceInputs = append(oldPrimary.FakeMysqlDaemon.SetReplicationSourceInputs, topoproto.MysqlAddr(newPrimary.Tablet)) 438 oldPrimary.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ 439 "RESET SLAVE ALL", 440 "FAKE SET MASTER", 441 "START SLAVE", 442 } 443 oldPrimary.StartActionLoop(t, wr) 444 defer oldPrimary.StopActionLoop(t) 445 oldPrimary.TM.QueryServiceControl.(*tabletservermock.Controller).SetQueryServiceEnabledForTests(true) 446 // SetReplicationSource is called on new primary to make sure it's replicating before reparenting. 447 newPrimary.FakeMysqlDaemon.SetReplicationSourceInputs = append(newPrimary.FakeMysqlDaemon.SetReplicationSourceInputs, topoproto.MysqlAddr(oldPrimary.Tablet)) 448 449 // good replica 1 is replicating 450 goodReplica1.FakeMysqlDaemon.ReadOnly = true 451 goodReplica1.FakeMysqlDaemon.Replicating = true 452 goodReplica1.FakeMysqlDaemon.SetReplicationSourceInputs = append(goodReplica1.FakeMysqlDaemon.SetReplicationSourceInputs, topoproto.MysqlAddr(newPrimary.Tablet), topoproto.MysqlAddr(oldPrimary.Tablet)) 453 goodReplica1.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ 454 // These 4 statements come from tablet startup 455 "STOP SLAVE", 456 "RESET SLAVE ALL", 457 "FAKE SET MASTER", 458 "START SLAVE", 459 "STOP SLAVE", 460 "RESET SLAVE ALL", 461 "FAKE SET MASTER", 462 "START SLAVE", 463 } 464 goodReplica1.StartActionLoop(t, wr) 465 defer goodReplica1.StopActionLoop(t) 466 467 // good replica 2 is not replicating 468 goodReplica2.FakeMysqlDaemon.ReadOnly = true 469 goodReplica2.FakeMysqlDaemon.SetReplicationSourceInputs = append(goodReplica2.FakeMysqlDaemon.SetReplicationSourceInputs, topoproto.MysqlAddr(newPrimary.Tablet), topoproto.MysqlAddr(oldPrimary.Tablet)) 470 goodReplica2.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ 471 // These 4 statements come from tablet startup 472 "STOP SLAVE", 473 "RESET SLAVE ALL", 474 "FAKE SET MASTER", 475 "START SLAVE", 476 "RESET SLAVE ALL", 477 "FAKE SET MASTER", 478 } 479 goodReplica2.StartActionLoop(t, wr) 480 goodReplica2.FakeMysqlDaemon.Replicating = false 481 defer goodReplica2.StopActionLoop(t) 482 483 // run PlannedReparentShard 484 err := vp.Run([]string{"PlannedReparentShard", "--wait_replicas_timeout", "10s", "--keyspace_shard", newPrimary.Tablet.Keyspace + "/" + newPrimary.Tablet.Shard, "--new_primary", topoproto.TabletAliasString(newPrimary.Tablet.Alias)}) 485 assert.Error(t, err) 486 assert.Contains(t, err.Error(), "replication on primary-elect cell1-0000000001 did not catch up in time") 487 488 // now check that DemotePrimary was undone and old primary is still primary 489 assert.True(t, newPrimary.FakeMysqlDaemon.ReadOnly, "newPrimary.FakeMysqlDaemon.ReadOnly not set") 490 assert.False(t, oldPrimary.FakeMysqlDaemon.ReadOnly, "oldPrimary.FakeMysqlDaemon.ReadOnly set") 491 } 492 493 // TestPlannedReparentShardWaitForPositionTimeout simulates a context timeout 494 // during the WaitForPosition call to the desired new primary 495 func TestPlannedReparentShardWaitForPositionTimeout(t *testing.T) { 496 delay := discovery.GetTabletPickerRetryDelay() 497 defer func() { 498 discovery.SetTabletPickerRetryDelay(delay) 499 }() 500 discovery.SetTabletPickerRetryDelay(5 * time.Millisecond) 501 502 ts := memorytopo.NewServer("cell1", "cell2") 503 wr := wrangler.New(logutil.NewConsoleLogger(), ts, tmclient.NewTabletManagerClient()) 504 vp := NewVtctlPipe(t, ts) 505 defer vp.Close() 506 507 // Create a primary, a couple good replicas 508 oldPrimary := NewFakeTablet(t, wr, "cell1", 0, topodatapb.TabletType_PRIMARY, nil) 509 newPrimary := NewFakeTablet(t, wr, "cell1", 1, topodatapb.TabletType_REPLICA, nil) 510 goodReplica1 := NewFakeTablet(t, wr, "cell1", 2, topodatapb.TabletType_REPLICA, nil) 511 goodReplica2 := NewFakeTablet(t, wr, "cell2", 3, topodatapb.TabletType_REPLICA, nil) 512 513 // new primary 514 newPrimary.FakeMysqlDaemon.TimeoutHook = func() error { return context.DeadlineExceeded } 515 newPrimary.FakeMysqlDaemon.ReadOnly = true 516 newPrimary.FakeMysqlDaemon.Replicating = true 517 newPrimary.FakeMysqlDaemon.WaitPrimaryPositions = []mysql.Position{{ 518 GTIDSet: mysql.MariadbGTIDSet{ 519 7: mysql.MariadbGTID{ 520 Domain: 7, 521 Server: 123, 522 Sequence: 990, 523 }, 524 }, 525 }} 526 newPrimary.FakeMysqlDaemon.PromoteResult = mysql.Position{ 527 GTIDSet: mysql.MariadbGTIDSet{ 528 7: mysql.MariadbGTID{ 529 Domain: 7, 530 Server: 456, 531 Sequence: 991, 532 }, 533 }, 534 } 535 newPrimary.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ 536 "STOP SLAVE", 537 "RESET SLAVE ALL", 538 "FAKE SET MASTER", 539 "START SLAVE", 540 "SUBINSERT INTO _vt.reparent_journal (time_created_ns, action_name, primary_alias, replication_position) VALUES", 541 } 542 newPrimary.StartActionLoop(t, wr) 543 defer newPrimary.StopActionLoop(t) 544 545 // old primary 546 oldPrimary.FakeMysqlDaemon.ReadOnly = false 547 oldPrimary.FakeMysqlDaemon.Replicating = false 548 oldPrimary.FakeMysqlDaemon.CurrentPrimaryPosition = newPrimary.FakeMysqlDaemon.WaitPrimaryPositions[0] 549 oldPrimary.FakeMysqlDaemon.SetReplicationSourceInputs = append(oldPrimary.FakeMysqlDaemon.SetReplicationSourceInputs, topoproto.MysqlAddr(newPrimary.Tablet)) 550 oldPrimary.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ 551 "RESET SLAVE ALL", 552 "FAKE SET MASTER", 553 "START SLAVE", 554 } 555 oldPrimary.StartActionLoop(t, wr) 556 defer oldPrimary.StopActionLoop(t) 557 oldPrimary.TM.QueryServiceControl.(*tabletservermock.Controller).SetQueryServiceEnabledForTests(true) 558 559 // SetReplicationSource is called on new primary to make sure it's replicating before reparenting. 560 newPrimary.FakeMysqlDaemon.SetReplicationSourceInputs = append(newPrimary.FakeMysqlDaemon.SetReplicationSourceInputs, topoproto.MysqlAddr(oldPrimary.Tablet)) 561 // good replica 1 is replicating 562 goodReplica1.FakeMysqlDaemon.ReadOnly = true 563 goodReplica1.FakeMysqlDaemon.Replicating = true 564 goodReplica1.FakeMysqlDaemon.SetReplicationSourceInputs = append(goodReplica1.FakeMysqlDaemon.SetReplicationSourceInputs, topoproto.MysqlAddr(newPrimary.Tablet), topoproto.MysqlAddr(oldPrimary.Tablet)) 565 goodReplica1.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ 566 // These 4 statements come from tablet startup 567 "STOP SLAVE", 568 "RESET SLAVE ALL", 569 "FAKE SET MASTER", 570 "START SLAVE", 571 "STOP SLAVE", 572 "RESET SLAVE ALL", 573 "FAKE SET MASTER", 574 "START SLAVE", 575 } 576 goodReplica1.StartActionLoop(t, wr) 577 defer goodReplica1.StopActionLoop(t) 578 579 // good replica 2 is not replicating 580 goodReplica2.FakeMysqlDaemon.ReadOnly = true 581 goodReplica2.FakeMysqlDaemon.SetReplicationSourceInputs = append(goodReplica2.FakeMysqlDaemon.SetReplicationSourceInputs, topoproto.MysqlAddr(newPrimary.Tablet), topoproto.MysqlAddr(oldPrimary.Tablet)) 582 goodReplica2.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ 583 // These 4 statements come from tablet startup 584 "STOP SLAVE", 585 "RESET SLAVE ALL", 586 "FAKE SET MASTER", 587 "START SLAVE", 588 "RESET SLAVE ALL", 589 "FAKE SET MASTER", 590 } 591 goodReplica2.StartActionLoop(t, wr) 592 goodReplica2.FakeMysqlDaemon.Replicating = false 593 defer goodReplica2.StopActionLoop(t) 594 595 // run PlannedReparentShard 596 err := vp.Run([]string{"PlannedReparentShard", "--wait_replicas_timeout", "10s", "--keyspace_shard", newPrimary.Tablet.Keyspace + "/" + newPrimary.Tablet.Shard, "--new_primary", topoproto.TabletAliasString(newPrimary.Tablet.Alias)}) 597 assert.Error(t, err) 598 assert.Contains(t, err.Error(), "replication on primary-elect cell1-0000000001 did not catch up in time") 599 600 // now check that DemotePrimary was undone and old primary is still primary 601 assert.True(t, newPrimary.FakeMysqlDaemon.ReadOnly, "newPrimary.FakeMysqlDaemon.ReadOnly not set") 602 assert.False(t, oldPrimary.FakeMysqlDaemon.ReadOnly, "oldPrimary.FakeMysqlDaemon.ReadOnly set") 603 } 604 605 func TestPlannedReparentShardRelayLogError(t *testing.T) { 606 delay := discovery.GetTabletPickerRetryDelay() 607 defer func() { 608 discovery.SetTabletPickerRetryDelay(delay) 609 }() 610 discovery.SetTabletPickerRetryDelay(5 * time.Millisecond) 611 612 ts := memorytopo.NewServer("cell1") 613 wr := wrangler.New(logutil.NewConsoleLogger(), ts, tmclient.NewTabletManagerClient()) 614 vp := NewVtctlPipe(t, ts) 615 defer vp.Close() 616 617 // Create a primary, a couple good replicas 618 primary := NewFakeTablet(t, wr, "cell1", 0, topodatapb.TabletType_PRIMARY, nil) 619 goodReplica1 := NewFakeTablet(t, wr, "cell1", 2, topodatapb.TabletType_REPLICA, nil) 620 621 // old primary 622 primary.FakeMysqlDaemon.ReadOnly = false 623 primary.FakeMysqlDaemon.Replicating = false 624 primary.FakeMysqlDaemon.ReplicationStatusError = mysql.ErrNotReplica 625 primary.FakeMysqlDaemon.CurrentPrimaryPosition = mysql.Position{ 626 GTIDSet: mysql.MariadbGTIDSet{ 627 7: mysql.MariadbGTID{ 628 Domain: 7, 629 Server: 123, 630 Sequence: 990, 631 }, 632 }, 633 } 634 primary.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ 635 "SUBINSERT INTO _vt.reparent_journal (time_created_ns, action_name, primary_alias, replication_position) VALUES", 636 } 637 primary.StartActionLoop(t, wr) 638 defer primary.StopActionLoop(t) 639 primary.TM.QueryServiceControl.(*tabletservermock.Controller).SetQueryServiceEnabledForTests(true) 640 641 // goodReplica1 is replicating 642 goodReplica1.FakeMysqlDaemon.ReadOnly = true 643 goodReplica1.FakeMysqlDaemon.Replicating = true 644 goodReplica1.FakeMysqlDaemon.SetReplicationSourceInputs = append(goodReplica1.FakeMysqlDaemon.SetReplicationSourceInputs, topoproto.MysqlAddr(primary.Tablet)) 645 goodReplica1.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ 646 // These 4 statements come from tablet startup 647 "STOP SLAVE", 648 "RESET SLAVE ALL", 649 "FAKE SET MASTER", 650 "START SLAVE", 651 // simulate error that will trigger a call to RestartReplication 652 "STOP SLAVE", 653 "RESET SLAVE", 654 "START SLAVE", 655 } 656 goodReplica1.StartActionLoop(t, wr) 657 goodReplica1.FakeMysqlDaemon.SetReplicationSourceError = errors.New("Slave failed to initialize relay log info structure from the repository") 658 defer goodReplica1.StopActionLoop(t) 659 660 // run PlannedReparentShard 661 err := vp.Run([]string{"PlannedReparentShard", "--wait_replicas_timeout", "10s", "--keyspace_shard", primary.Tablet.Keyspace + "/" + primary.Tablet.Shard, "--new_primary", 662 topoproto.TabletAliasString(primary.Tablet.Alias)}) 663 require.NoError(t, err) 664 // check what was run 665 err = primary.FakeMysqlDaemon.CheckSuperQueryList() 666 require.NoError(t, err) 667 err = goodReplica1.FakeMysqlDaemon.CheckSuperQueryList() 668 require.NoError(t, err) 669 670 assert.False(t, primary.FakeMysqlDaemon.ReadOnly, "primary.FakeMysqlDaemon.ReadOnly set") 671 assert.True(t, goodReplica1.FakeMysqlDaemon.ReadOnly, "goodReplica1.FakeMysqlDaemon.ReadOnly not set") 672 assert.True(t, primary.TM.QueryServiceControl.IsServing(), "primary...QueryServiceControl not serving") 673 674 // verify the old primary was told to start replicating (and not 675 // the replica that wasn't replicating in the first place) 676 assert.True(t, goodReplica1.FakeMysqlDaemon.Replicating, "goodReplica1.FakeMysqlDaemon.Replicating not set") 677 } 678 679 // TestPlannedReparentShardRelayLogErrorStartReplication is similar to 680 // TestPlannedReparentShardRelayLogError with the difference that goodReplica1 681 // is not replicating to start with (IO_Thread is not running) and we 682 // simulate an error from the attempt to start replication 683 func TestPlannedReparentShardRelayLogErrorStartReplication(t *testing.T) { 684 delay := discovery.GetTabletPickerRetryDelay() 685 defer func() { 686 discovery.SetTabletPickerRetryDelay(delay) 687 }() 688 discovery.SetTabletPickerRetryDelay(5 * time.Millisecond) 689 690 ts := memorytopo.NewServer("cell1") 691 wr := wrangler.New(logutil.NewConsoleLogger(), ts, tmclient.NewTabletManagerClient()) 692 vp := NewVtctlPipe(t, ts) 693 defer vp.Close() 694 695 // Create a primary, a couple good replicas 696 primary := NewFakeTablet(t, wr, "cell1", 0, topodatapb.TabletType_PRIMARY, nil) 697 goodReplica1 := NewFakeTablet(t, wr, "cell1", 2, topodatapb.TabletType_REPLICA, nil) 698 reparenttestutil.SetKeyspaceDurability(context.Background(), t, ts, "test_keyspace", "semi_sync") 699 700 // old primary 701 primary.FakeMysqlDaemon.ReadOnly = false 702 primary.FakeMysqlDaemon.Replicating = false 703 primary.FakeMysqlDaemon.ReplicationStatusError = mysql.ErrNotReplica 704 primary.FakeMysqlDaemon.CurrentPrimaryPosition = mysql.Position{ 705 GTIDSet: mysql.MariadbGTIDSet{ 706 7: mysql.MariadbGTID{ 707 Domain: 7, 708 Server: 123, 709 Sequence: 990, 710 }, 711 }, 712 } 713 primary.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ 714 "SUBINSERT INTO _vt.reparent_journal (time_created_ns, action_name, primary_alias, replication_position) VALUES", 715 } 716 primary.StartActionLoop(t, wr) 717 defer primary.StopActionLoop(t) 718 primary.TM.QueryServiceControl.(*tabletservermock.Controller).SetQueryServiceEnabledForTests(true) 719 720 // goodReplica1 is not replicating 721 goodReplica1.FakeMysqlDaemon.ReadOnly = true 722 goodReplica1.FakeMysqlDaemon.Replicating = true 723 goodReplica1.FakeMysqlDaemon.IOThreadRunning = false 724 goodReplica1.FakeMysqlDaemon.SetReplicationSourceInputs = append(goodReplica1.FakeMysqlDaemon.SetReplicationSourceInputs, topoproto.MysqlAddr(primary.Tablet)) 725 goodReplica1.FakeMysqlDaemon.CurrentSourceHost = primary.Tablet.MysqlHostname 726 goodReplica1.FakeMysqlDaemon.CurrentSourcePort = int(primary.Tablet.MysqlPort) 727 goodReplica1.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ 728 // simulate error that will trigger a call to RestartReplication 729 // These 4 statements come from tablet startup 730 "STOP SLAVE", 731 "RESET SLAVE ALL", 732 "FAKE SET MASTER", 733 "START SLAVE", 734 // In SetReplicationSource, we find that the source host and port was already set correctly, 735 // So we try to stop and start replication. The first STOP SLAVE comes from there 736 "STOP SLAVE", 737 // During the START SLAVE call, we find a relay log error, so we try to restart replication. 738 "STOP SLAVE", 739 "RESET SLAVE", 740 "START SLAVE", 741 } 742 goodReplica1.StartActionLoop(t, wr) 743 goodReplica1.FakeMysqlDaemon.StartReplicationError = errors.New("Slave failed to initialize relay log info structure from the repository") 744 defer goodReplica1.StopActionLoop(t) 745 746 // run PlannedReparentShard 747 err := vp.Run([]string{"PlannedReparentShard", "--wait_replicas_timeout", "10s", "--keyspace_shard", primary.Tablet.Keyspace + "/" + primary.Tablet.Shard, "--new_primary", 748 topoproto.TabletAliasString(primary.Tablet.Alias)}) 749 require.NoError(t, err) 750 // check what was run 751 err = primary.FakeMysqlDaemon.CheckSuperQueryList() 752 require.NoError(t, err) 753 err = goodReplica1.FakeMysqlDaemon.CheckSuperQueryList() 754 require.NoError(t, err) 755 756 assert.False(t, primary.FakeMysqlDaemon.ReadOnly, "primary.FakeMysqlDaemon.ReadOnly set") 757 assert.True(t, goodReplica1.FakeMysqlDaemon.ReadOnly, "goodReplica1.FakeMysqlDaemon.ReadOnly not set") 758 assert.True(t, primary.TM.QueryServiceControl.IsServing(), "primary...QueryServiceControl not serving") 759 760 // verify the old primary was told to start replicating (and not 761 // the replica that wasn't replicating in the first place) 762 assert.True(t, goodReplica1.FakeMysqlDaemon.Replicating, "goodReplica1.FakeMysqlDaemon.Replicating not set") 763 } 764 765 // TestPlannedReparentShardPromoteReplicaFail simulates a failure of the PromoteReplica call 766 // on the desired new primary tablet 767 func TestPlannedReparentShardPromoteReplicaFail(t *testing.T) { 768 delay := discovery.GetTabletPickerRetryDelay() 769 defer func() { 770 discovery.SetTabletPickerRetryDelay(delay) 771 }() 772 discovery.SetTabletPickerRetryDelay(5 * time.Millisecond) 773 774 ts := memorytopo.NewServer("cell1", "cell2") 775 wr := wrangler.New(logutil.NewConsoleLogger(), ts, tmclient.NewTabletManagerClient()) 776 vp := NewVtctlPipe(t, ts) 777 defer vp.Close() 778 779 // Create a primary, a couple good replicas 780 oldPrimary := NewFakeTablet(t, wr, "cell1", 0, topodatapb.TabletType_PRIMARY, nil) 781 newPrimary := NewFakeTablet(t, wr, "cell1", 1, topodatapb.TabletType_REPLICA, nil) 782 goodReplica1 := NewFakeTablet(t, wr, "cell1", 2, topodatapb.TabletType_REPLICA, nil) 783 goodReplica2 := NewFakeTablet(t, wr, "cell2", 3, topodatapb.TabletType_REPLICA, nil) 784 785 // new primary 786 newPrimary.FakeMysqlDaemon.ReadOnly = true 787 newPrimary.FakeMysqlDaemon.Replicating = true 788 // make promote fail 789 newPrimary.FakeMysqlDaemon.PromoteError = errors.New("some error") 790 newPrimary.FakeMysqlDaemon.WaitPrimaryPositions = []mysql.Position{{ 791 GTIDSet: mysql.MariadbGTIDSet{ 792 7: mysql.MariadbGTID{ 793 Domain: 7, 794 Server: 123, 795 Sequence: 990, 796 }, 797 }, 798 }} 799 newPrimary.FakeMysqlDaemon.PromoteResult = mysql.Position{ 800 GTIDSet: mysql.MariadbGTIDSet{ 801 7: mysql.MariadbGTID{ 802 Domain: 7, 803 Server: 456, 804 Sequence: 991, 805 }, 806 }, 807 } 808 newPrimary.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ 809 "STOP SLAVE", 810 "RESET SLAVE ALL", 811 "FAKE SET MASTER", 812 "START SLAVE", 813 "SUBINSERT INTO _vt.reparent_journal (time_created_ns, action_name, primary_alias, replication_position) VALUES", 814 } 815 newPrimary.StartActionLoop(t, wr) 816 defer newPrimary.StopActionLoop(t) 817 818 // old primary 819 oldPrimary.FakeMysqlDaemon.ReadOnly = false 820 oldPrimary.FakeMysqlDaemon.Replicating = false 821 oldPrimary.FakeMysqlDaemon.ReplicationStatusError = mysql.ErrNotReplica 822 oldPrimary.FakeMysqlDaemon.CurrentPrimaryPosition = newPrimary.FakeMysqlDaemon.WaitPrimaryPositions[0] 823 oldPrimary.FakeMysqlDaemon.SetReplicationSourceInputs = append(oldPrimary.FakeMysqlDaemon.SetReplicationSourceInputs, topoproto.MysqlAddr(newPrimary.Tablet)) 824 oldPrimary.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ 825 "RESET SLAVE ALL", 826 "FAKE SET MASTER", 827 "START SLAVE", 828 } 829 oldPrimary.StartActionLoop(t, wr) 830 defer oldPrimary.StopActionLoop(t) 831 oldPrimary.TM.QueryServiceControl.(*tabletservermock.Controller).SetQueryServiceEnabledForTests(true) 832 833 // SetReplicationSource is called on new primary to make sure it's replicating before reparenting. 834 newPrimary.FakeMysqlDaemon.SetReplicationSourceInputs = append(newPrimary.FakeMysqlDaemon.SetReplicationSourceInputs, topoproto.MysqlAddr(oldPrimary.Tablet)) 835 // good replica 1 is replicating 836 goodReplica1.FakeMysqlDaemon.ReadOnly = true 837 goodReplica1.FakeMysqlDaemon.Replicating = true 838 goodReplica1.FakeMysqlDaemon.SetReplicationSourceInputs = append(goodReplica1.FakeMysqlDaemon.SetReplicationSourceInputs, topoproto.MysqlAddr(newPrimary.Tablet), topoproto.MysqlAddr(oldPrimary.Tablet)) 839 goodReplica1.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ 840 // These 4 statements come from tablet startup 841 "STOP SLAVE", 842 "RESET SLAVE ALL", 843 "FAKE SET MASTER", 844 "START SLAVE", 845 "STOP SLAVE", 846 "RESET SLAVE ALL", 847 "FAKE SET MASTER", 848 "START SLAVE", 849 } 850 goodReplica1.StartActionLoop(t, wr) 851 defer goodReplica1.StopActionLoop(t) 852 853 // good replica 2 is not replicating 854 goodReplica2.FakeMysqlDaemon.ReadOnly = true 855 goodReplica2.FakeMysqlDaemon.SetReplicationSourceInputs = append(goodReplica2.FakeMysqlDaemon.SetReplicationSourceInputs, topoproto.MysqlAddr(newPrimary.Tablet), topoproto.MysqlAddr(oldPrimary.Tablet)) 856 goodReplica2.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ 857 // These 4 statements come from tablet startup 858 "STOP SLAVE", 859 "RESET SLAVE ALL", 860 "FAKE SET MASTER", 861 "START SLAVE", 862 "RESET SLAVE ALL", 863 "FAKE SET MASTER", 864 } 865 goodReplica2.StartActionLoop(t, wr) 866 goodReplica2.FakeMysqlDaemon.Replicating = false 867 defer goodReplica2.StopActionLoop(t) 868 869 // run PlannedReparentShard 870 err := vp.Run([]string{"PlannedReparentShard", "--wait_replicas_timeout", "10s", "--keyspace_shard", newPrimary.Tablet.Keyspace + "/" + newPrimary.Tablet.Shard, "--new_primary", topoproto.TabletAliasString(newPrimary.Tablet.Alias)}) 871 872 assert.Error(t, err) 873 assert.Contains(t, err.Error(), "some error") 874 875 // when promote fails, we don't call UndoDemotePrimary, so the old primary should be read-only 876 assert.True(t, newPrimary.FakeMysqlDaemon.ReadOnly, "newPrimary.FakeMysqlDaemon.ReadOnly") 877 assert.True(t, oldPrimary.FakeMysqlDaemon.ReadOnly, "oldPrimary.FakeMysqlDaemon.ReadOnly") 878 879 // retrying should work 880 newPrimary.FakeMysqlDaemon.PromoteError = nil 881 newPrimary.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ 882 "STOP SLAVE", 883 "RESET SLAVE ALL", 884 "FAKE SET MASTER", 885 "START SLAVE", 886 // extra commands because of retry 887 "STOP SLAVE", 888 "RESET SLAVE ALL", 889 "FAKE SET MASTER", 890 "START SLAVE", 891 "SUBINSERT INTO _vt.reparent_journal (time_created_ns, action_name, primary_alias, replication_position) VALUES", 892 } 893 oldPrimary.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ 894 "RESET SLAVE ALL", 895 "FAKE SET MASTER", 896 "START SLAVE", 897 // extra commands because of retry 898 "RESET SLAVE ALL", 899 "FAKE SET MASTER", 900 "START SLAVE", 901 } 902 903 // run PlannedReparentShard 904 err = vp.Run([]string{"PlannedReparentShard", "--wait_replicas_timeout", "10s", "--keyspace_shard", newPrimary.Tablet.Keyspace + "/" + newPrimary.Tablet.Shard, "--new_primary", topoproto.TabletAliasString(newPrimary.Tablet.Alias)}) 905 require.NoError(t, err) 906 907 // check that primary changed correctly 908 assert.False(t, newPrimary.FakeMysqlDaemon.ReadOnly, "newPrimary.FakeMysqlDaemon.ReadOnly") 909 assert.True(t, oldPrimary.FakeMysqlDaemon.ReadOnly, "oldPrimary.FakeMysqlDaemon.ReadOnly") 910 } 911 912 // TestPlannedReparentShardSamePrimary tests PRS with oldPrimary works correctly 913 // Simulate failure of previous PRS and oldPrimary is ReadOnly 914 // Verify that primary correctly gets set to ReadWrite 915 func TestPlannedReparentShardSamePrimary(t *testing.T) { 916 delay := discovery.GetTabletPickerRetryDelay() 917 defer func() { 918 discovery.SetTabletPickerRetryDelay(delay) 919 }() 920 discovery.SetTabletPickerRetryDelay(5 * time.Millisecond) 921 922 ts := memorytopo.NewServer("cell1", "cell2") 923 wr := wrangler.New(logutil.NewConsoleLogger(), ts, tmclient.NewTabletManagerClient()) 924 vp := NewVtctlPipe(t, ts) 925 defer vp.Close() 926 927 // Create a primary, a couple good replicas 928 oldPrimary := NewFakeTablet(t, wr, "cell1", 0, topodatapb.TabletType_PRIMARY, nil) 929 goodReplica1 := NewFakeTablet(t, wr, "cell1", 2, topodatapb.TabletType_REPLICA, nil) 930 goodReplica2 := NewFakeTablet(t, wr, "cell2", 3, topodatapb.TabletType_REPLICA, nil) 931 932 // old primary 933 oldPrimary.FakeMysqlDaemon.ReadOnly = true 934 oldPrimary.FakeMysqlDaemon.Replicating = false 935 oldPrimary.FakeMysqlDaemon.ReplicationStatusError = mysql.ErrNotReplica 936 oldPrimary.FakeMysqlDaemon.CurrentPrimaryPosition = mysql.Position{ 937 GTIDSet: mysql.MariadbGTIDSet{ 938 7: mysql.MariadbGTID{ 939 Domain: 7, 940 Server: 123, 941 Sequence: 990, 942 }, 943 }, 944 } 945 oldPrimary.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ 946 "SUBINSERT INTO _vt.reparent_journal (time_created_ns, action_name, primary_alias, replication_position) VALUES", 947 } 948 oldPrimary.StartActionLoop(t, wr) 949 defer oldPrimary.StopActionLoop(t) 950 oldPrimary.TM.QueryServiceControl.(*tabletservermock.Controller).SetQueryServiceEnabledForTests(true) 951 952 // good replica 1 is replicating 953 goodReplica1.FakeMysqlDaemon.ReadOnly = true 954 goodReplica1.FakeMysqlDaemon.Replicating = true 955 goodReplica1.FakeMysqlDaemon.SetReplicationSourceInputs = append(goodReplica1.FakeMysqlDaemon.SetReplicationSourceInputs, topoproto.MysqlAddr(oldPrimary.Tablet)) 956 goodReplica1.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ 957 // These 4 statements come from tablet startup 958 "STOP SLAVE", 959 "RESET SLAVE ALL", 960 "FAKE SET MASTER", 961 "START SLAVE", 962 "STOP SLAVE", 963 "RESET SLAVE ALL", 964 "FAKE SET MASTER", 965 "START SLAVE", 966 } 967 goodReplica1.StartActionLoop(t, wr) 968 defer goodReplica1.StopActionLoop(t) 969 970 // goodReplica2 is not replicating 971 goodReplica2.FakeMysqlDaemon.ReadOnly = true 972 goodReplica2.FakeMysqlDaemon.SetReplicationSourceInputs = append(goodReplica2.FakeMysqlDaemon.SetReplicationSourceInputs, topoproto.MysqlAddr(oldPrimary.Tablet)) 973 goodReplica2.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ 974 // These 4 statements come from tablet startup 975 "STOP SLAVE", 976 "RESET SLAVE ALL", 977 "FAKE SET MASTER", 978 "START SLAVE", 979 "RESET SLAVE ALL", 980 "FAKE SET MASTER", 981 } 982 goodReplica2.StartActionLoop(t, wr) 983 goodReplica2.FakeMysqlDaemon.Replicating = false 984 defer goodReplica2.StopActionLoop(t) 985 986 // run PlannedReparentShard 987 err := vp.Run([]string{"PlannedReparentShard", "--wait_replicas_timeout", "10s", "--keyspace_shard", oldPrimary.Tablet.Keyspace + "/" + oldPrimary.Tablet.Shard, "--new_primary", topoproto.TabletAliasString(oldPrimary.Tablet.Alias)}) 988 require.NoError(t, err) 989 assert.False(t, oldPrimary.FakeMysqlDaemon.ReadOnly, "oldPrimary.FakeMysqlDaemon.ReadOnly") 990 }