vitess.io/vitess@v0.16.2/go/vt/wrangler/testlib/emergency_reparent_shard_test.go (about) 1 /* 2 Copyright 2019 The Vitess Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package testlib 18 19 import ( 20 "context" 21 "fmt" 22 "testing" 23 "time" 24 25 "github.com/stretchr/testify/assert" 26 "github.com/stretchr/testify/require" 27 "k8s.io/apimachinery/pkg/util/sets" 28 29 "vitess.io/vitess/go/mysql" 30 "vitess.io/vitess/go/vt/discovery" 31 "vitess.io/vitess/go/vt/logutil" 32 "vitess.io/vitess/go/vt/topo/memorytopo" 33 "vitess.io/vitess/go/vt/topo/topoproto" 34 "vitess.io/vitess/go/vt/vtctl/reparentutil/reparenttestutil" 35 "vitess.io/vitess/go/vt/vttablet/tmclient" 36 "vitess.io/vitess/go/vt/wrangler" 37 38 topodatapb "vitess.io/vitess/go/vt/proto/topodata" 39 ) 40 41 func TestEmergencyReparentShard(t *testing.T) { 42 delay := discovery.GetTabletPickerRetryDelay() 43 defer func() { 44 discovery.SetTabletPickerRetryDelay(delay) 45 }() 46 discovery.SetTabletPickerRetryDelay(5 * time.Millisecond) 47 48 ts := memorytopo.NewServer("cell1", "cell2") 49 wr := wrangler.New(logutil.NewConsoleLogger(), ts, tmclient.NewTabletManagerClient()) 50 vp := NewVtctlPipe(t, ts) 51 defer vp.Close() 52 53 // Create a primary, a couple good replicas 54 oldPrimary := NewFakeTablet(t, wr, "cell1", 0, topodatapb.TabletType_PRIMARY, nil) 55 newPrimary := NewFakeTablet(t, wr, "cell1", 1, topodatapb.TabletType_REPLICA, nil) 56 goodReplica1 := NewFakeTablet(t, wr, "cell1", 2, topodatapb.TabletType_REPLICA, nil) 57 goodReplica2 := NewFakeTablet(t, wr, "cell2", 3, topodatapb.TabletType_REPLICA, nil) 58 reparenttestutil.SetKeyspaceDurability(context.Background(), t, ts, "test_keyspace", "semi_sync") 59 60 oldPrimary.FakeMysqlDaemon.Replicating = false 61 oldPrimary.FakeMysqlDaemon.CurrentPrimaryPosition = mysql.Position{ 62 GTIDSet: mysql.MariadbGTIDSet{ 63 2: mysql.MariadbGTID{ 64 Domain: 2, 65 Server: 123, 66 Sequence: 456, 67 }, 68 }, 69 } 70 currentPrimaryFilePosition, _ := mysql.ParseFilePosGTIDSet("mariadb-bin.000010:456") 71 oldPrimary.FakeMysqlDaemon.CurrentSourceFilePosition = mysql.Position{ 72 GTIDSet: currentPrimaryFilePosition, 73 } 74 75 // new primary 76 newPrimary.FakeMysqlDaemon.ReadOnly = true 77 newPrimary.FakeMysqlDaemon.Replicating = true 78 newPrimary.FakeMysqlDaemon.CurrentPrimaryPosition = mysql.Position{ 79 GTIDSet: mysql.MariadbGTIDSet{ 80 2: mysql.MariadbGTID{ 81 Domain: 2, 82 Server: 123, 83 Sequence: 456, 84 }, 85 }, 86 } 87 newPrimaryRelayLogPos, _ := mysql.ParseFilePosGTIDSet("relay-bin.000004:456") 88 newPrimary.FakeMysqlDaemon.CurrentSourceFilePosition = mysql.Position{ 89 GTIDSet: newPrimaryRelayLogPos, 90 } 91 newPrimary.FakeMysqlDaemon.WaitPrimaryPositions = append(newPrimary.FakeMysqlDaemon.WaitPrimaryPositions, newPrimary.FakeMysqlDaemon.CurrentSourceFilePosition) 92 newPrimary.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ 93 "STOP SLAVE IO_THREAD", 94 "SUBINSERT INTO _vt.reparent_journal (time_created_ns, action_name, primary_alias, replication_position) VALUES", 95 } 96 newPrimary.FakeMysqlDaemon.PromoteResult = mysql.Position{ 97 GTIDSet: mysql.MariadbGTIDSet{ 98 2: mysql.MariadbGTID{ 99 Domain: 2, 100 Server: 123, 101 Sequence: 456, 102 }, 103 }, 104 } 105 newPrimary.StartActionLoop(t, wr) 106 defer newPrimary.StopActionLoop(t) 107 108 // old primary, will be scrapped 109 oldPrimary.FakeMysqlDaemon.ReadOnly = false 110 oldPrimary.FakeMysqlDaemon.ReplicationStatusError = mysql.ErrNotReplica 111 oldPrimary.FakeMysqlDaemon.SetReplicationSourceInputs = append(oldPrimary.FakeMysqlDaemon.SetReplicationSourceInputs, topoproto.MysqlAddr(newPrimary.Tablet)) 112 oldPrimary.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ 113 "STOP SLAVE", 114 } 115 oldPrimary.StartActionLoop(t, wr) 116 defer oldPrimary.StopActionLoop(t) 117 118 // good replica 1 is replicating 119 goodReplica1.FakeMysqlDaemon.ReadOnly = true 120 goodReplica1.FakeMysqlDaemon.Replicating = true 121 goodReplica1.FakeMysqlDaemon.CurrentPrimaryPosition = mysql.Position{ 122 GTIDSet: mysql.MariadbGTIDSet{ 123 2: mysql.MariadbGTID{ 124 Domain: 2, 125 Server: 123, 126 Sequence: 455, 127 }, 128 }, 129 } 130 goodReplica1RelayLogPos, _ := mysql.ParseFilePosGTIDSet("relay-bin.000004:455") 131 goodReplica1.FakeMysqlDaemon.CurrentSourceFilePosition = mysql.Position{ 132 GTIDSet: goodReplica1RelayLogPos, 133 } 134 goodReplica1.FakeMysqlDaemon.WaitPrimaryPositions = append(goodReplica1.FakeMysqlDaemon.WaitPrimaryPositions, goodReplica1.FakeMysqlDaemon.CurrentSourceFilePosition) 135 goodReplica1.FakeMysqlDaemon.SetReplicationSourceInputs = append(goodReplica1.FakeMysqlDaemon.SetReplicationSourceInputs, topoproto.MysqlAddr(newPrimary.Tablet), topoproto.MysqlAddr(oldPrimary.Tablet)) 136 goodReplica1.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ 137 // These 4 statements come from tablet startup 138 "STOP SLAVE", 139 "RESET SLAVE ALL", 140 "FAKE SET MASTER", 141 "START SLAVE", 142 "STOP SLAVE IO_THREAD", 143 "STOP SLAVE", 144 "RESET SLAVE ALL", 145 "FAKE SET MASTER", 146 "START SLAVE", 147 } 148 goodReplica1.StartActionLoop(t, wr) 149 defer goodReplica1.StopActionLoop(t) 150 151 // good replica 2 is not replicating 152 goodReplica2.FakeMysqlDaemon.ReadOnly = true 153 goodReplica2.FakeMysqlDaemon.Replicating = false 154 goodReplica2.FakeMysqlDaemon.CurrentPrimaryPosition = mysql.Position{ 155 GTIDSet: mysql.MariadbGTIDSet{ 156 2: mysql.MariadbGTID{ 157 Domain: 2, 158 Server: 123, 159 Sequence: 454, 160 }, 161 }, 162 } 163 goodReplica2RelayLogPos, _ := mysql.ParseFilePosGTIDSet("relay-bin.000004:454") 164 goodReplica2.FakeMysqlDaemon.CurrentSourceFilePosition = mysql.Position{ 165 GTIDSet: goodReplica2RelayLogPos, 166 } 167 goodReplica2.FakeMysqlDaemon.WaitPrimaryPositions = append(goodReplica2.FakeMysqlDaemon.WaitPrimaryPositions, goodReplica2.FakeMysqlDaemon.CurrentSourceFilePosition) 168 goodReplica2.FakeMysqlDaemon.SetReplicationSourceInputs = append(goodReplica2.FakeMysqlDaemon.SetReplicationSourceInputs, topoproto.MysqlAddr(newPrimary.Tablet), topoproto.MysqlAddr(oldPrimary.Tablet)) 169 goodReplica2.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ 170 // These 4 statements come from tablet startup 171 "STOP SLAVE", 172 "RESET SLAVE ALL", 173 "FAKE SET MASTER", 174 "START SLAVE", 175 "RESET SLAVE ALL", 176 "FAKE SET MASTER", 177 } 178 goodReplica2.StartActionLoop(t, wr) 179 defer goodReplica2.StopActionLoop(t) 180 181 // run EmergencyReparentShard 182 waitReplicaTimeout := time.Second * 2 183 err := vp.Run([]string{"EmergencyReparentShard", "--wait_replicas_timeout", waitReplicaTimeout.String(), newPrimary.Tablet.Keyspace + "/" + newPrimary.Tablet.Shard, 184 topoproto.TabletAliasString(newPrimary.Tablet.Alias)}) 185 require.NoError(t, err) 186 // check what was run 187 err = newPrimary.FakeMysqlDaemon.CheckSuperQueryList() 188 require.NoError(t, err) 189 190 assert.False(t, newPrimary.FakeMysqlDaemon.ReadOnly, "newPrimary.FakeMysqlDaemon.ReadOnly set") 191 checkSemiSyncEnabled(t, true, true, newPrimary) 192 } 193 194 // TestEmergencyReparentShardPrimaryElectNotBest tries to emergency reparent 195 // to a host that is not the latest in replication position. 196 func TestEmergencyReparentShardPrimaryElectNotBest(t *testing.T) { 197 ctx, cancel := context.WithTimeout(context.Background(), time.Second*30) 198 defer cancel() 199 200 delay := discovery.GetTabletPickerRetryDelay() 201 defer func() { 202 discovery.SetTabletPickerRetryDelay(delay) 203 }() 204 discovery.SetTabletPickerRetryDelay(5 * time.Millisecond) 205 206 ts := memorytopo.NewServer("cell1", "cell2") 207 wr := wrangler.New(logutil.NewConsoleLogger(), ts, tmclient.NewTabletManagerClient()) 208 209 // Create a primary, a couple good replicas 210 oldPrimary := NewFakeTablet(t, wr, "cell1", 0, topodatapb.TabletType_PRIMARY, nil) 211 newPrimary := NewFakeTablet(t, wr, "cell1", 1, topodatapb.TabletType_REPLICA, nil) 212 moreAdvancedReplica := NewFakeTablet(t, wr, "cell1", 2, topodatapb.TabletType_REPLICA, nil) 213 reparenttestutil.SetKeyspaceDurability(context.Background(), t, ts, "test_keyspace", "semi_sync") 214 215 // new primary 216 newPrimary.FakeMysqlDaemon.Replicating = true 217 // It has transactions in its relay log, but not as many as 218 // moreAdvancedReplica 219 newPrimary.FakeMysqlDaemon.CurrentPrimaryPosition = mysql.Position{ 220 GTIDSet: mysql.MariadbGTIDSet{ 221 2: mysql.MariadbGTID{ 222 Domain: 2, 223 Server: 123, 224 Sequence: 456, 225 }, 226 }, 227 } 228 newPrimaryRelayLogPos, _ := mysql.ParseFilePosGTIDSet("relay-bin.000004:456") 229 newPrimary.FakeMysqlDaemon.CurrentSourceFilePosition = mysql.Position{ 230 GTIDSet: newPrimaryRelayLogPos, 231 } 232 newPrimary.FakeMysqlDaemon.WaitPrimaryPositions = append(newPrimary.FakeMysqlDaemon.WaitPrimaryPositions, newPrimary.FakeMysqlDaemon.CurrentSourceFilePosition) 233 newPrimary.FakeMysqlDaemon.SetReplicationSourceInputs = append(newPrimary.FakeMysqlDaemon.SetReplicationSourceInputs, topoproto.MysqlAddr(moreAdvancedReplica.Tablet)) 234 newPrimary.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ 235 "STOP SLAVE IO_THREAD", 236 "STOP SLAVE", 237 "RESET SLAVE ALL", 238 "FAKE SET MASTER", 239 "START SLAVE", 240 "SUBINSERT INTO _vt.reparent_journal (time_created_ns, action_name, primary_alias, replication_position) VALUES", 241 } 242 newPrimary.StartActionLoop(t, wr) 243 defer newPrimary.StopActionLoop(t) 244 245 // old primary, will be scrapped 246 oldPrimary.FakeMysqlDaemon.ReplicationStatusError = fmt.Errorf("old primary stopped working") 247 oldPrimary.StartActionLoop(t, wr) 248 defer oldPrimary.StopActionLoop(t) 249 250 // more advanced replica 251 moreAdvancedReplica.FakeMysqlDaemon.Replicating = true 252 // relay log position is more advanced than desired new primary 253 moreAdvancedReplica.FakeMysqlDaemon.CurrentPrimaryPosition = mysql.Position{ 254 GTIDSet: mysql.MariadbGTIDSet{ 255 2: mysql.MariadbGTID{ 256 Domain: 2, 257 Server: 123, 258 Sequence: 457, 259 }, 260 }, 261 } 262 moreAdvancedReplicaLogPos, _ := mysql.ParseFilePosGTIDSet("relay-bin.000004:457") 263 moreAdvancedReplica.FakeMysqlDaemon.CurrentSourceFilePosition = mysql.Position{ 264 GTIDSet: moreAdvancedReplicaLogPos, 265 } 266 moreAdvancedReplica.FakeMysqlDaemon.SetReplicationSourceInputs = append(moreAdvancedReplica.FakeMysqlDaemon.SetReplicationSourceInputs, topoproto.MysqlAddr(newPrimary.Tablet), topoproto.MysqlAddr(oldPrimary.Tablet)) 267 moreAdvancedReplica.FakeMysqlDaemon.WaitPrimaryPositions = append(moreAdvancedReplica.FakeMysqlDaemon.WaitPrimaryPositions, moreAdvancedReplica.FakeMysqlDaemon.CurrentSourceFilePosition) 268 newPrimary.FakeMysqlDaemon.WaitPrimaryPositions = append(newPrimary.FakeMysqlDaemon.WaitPrimaryPositions, moreAdvancedReplica.FakeMysqlDaemon.CurrentPrimaryPosition) 269 moreAdvancedReplica.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ 270 // These 4 statements come from tablet startup 271 "STOP SLAVE", 272 "RESET SLAVE ALL", 273 "FAKE SET MASTER", 274 "START SLAVE", 275 "STOP SLAVE IO_THREAD", 276 "STOP SLAVE", 277 "RESET SLAVE ALL", 278 "FAKE SET MASTER", 279 "START SLAVE", 280 } 281 moreAdvancedReplica.StartActionLoop(t, wr) 282 defer moreAdvancedReplica.StopActionLoop(t) 283 284 // run EmergencyReparentShard 285 err := wr.EmergencyReparentShard(ctx, newPrimary.Tablet.Keyspace, newPrimary.Tablet.Shard, newPrimary.Tablet.Alias, 10*time.Second, sets.New[string](), false) 286 cancel() 287 288 assert.NoError(t, err) 289 // check what was run 290 err = newPrimary.FakeMysqlDaemon.CheckSuperQueryList() 291 require.NoError(t, err) 292 err = oldPrimary.FakeMysqlDaemon.CheckSuperQueryList() 293 require.NoError(t, err) 294 err = moreAdvancedReplica.FakeMysqlDaemon.CheckSuperQueryList() 295 require.NoError(t, err) 296 }