vitess.io/vitess@v0.16.2/go/vt/wrangler/testlib/planned_reparent_shard_test.go (about)

     1  /*
     2  Copyright 2019 The Vitess Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package testlib
    18  
    19  import (
    20  	"context"
    21  	"errors"
    22  	"testing"
    23  	"time"
    24  
    25  	"vitess.io/vitess/go/vt/mysqlctl"
    26  
    27  	"github.com/stretchr/testify/assert"
    28  	"github.com/stretchr/testify/require"
    29  
    30  	"vitess.io/vitess/go/mysql"
    31  	"vitess.io/vitess/go/vt/discovery"
    32  	"vitess.io/vitess/go/vt/logutil"
    33  	"vitess.io/vitess/go/vt/topo/memorytopo"
    34  	"vitess.io/vitess/go/vt/topo/topoproto"
    35  	"vitess.io/vitess/go/vt/vtctl/reparentutil/reparenttestutil"
    36  	"vitess.io/vitess/go/vt/vttablet/tabletservermock"
    37  	"vitess.io/vitess/go/vt/vttablet/tmclient"
    38  	"vitess.io/vitess/go/vt/wrangler"
    39  
    40  	topodatapb "vitess.io/vitess/go/vt/proto/topodata"
    41  )
    42  
    43  func TestPlannedReparentShardNoPrimaryProvided(t *testing.T) {
    44  	delay := discovery.GetTabletPickerRetryDelay()
    45  	defer func() {
    46  		discovery.SetTabletPickerRetryDelay(delay)
    47  	}()
    48  	discovery.SetTabletPickerRetryDelay(5 * time.Millisecond)
    49  
    50  	ts := memorytopo.NewServer("cell1", "cell2")
    51  	wr := wrangler.New(logutil.NewConsoleLogger(), ts, tmclient.NewTabletManagerClient())
    52  	vp := NewVtctlPipe(t, ts)
    53  	defer vp.Close()
    54  
    55  	// Create a primary, a couple good replicas
    56  	oldPrimary := NewFakeTablet(t, wr, "cell1", 0, topodatapb.TabletType_PRIMARY, nil)
    57  	newPrimary := NewFakeTablet(t, wr, "cell1", 1, topodatapb.TabletType_REPLICA, nil)
    58  	goodReplica1 := NewFakeTablet(t, wr, "cell2", 2, topodatapb.TabletType_REPLICA, nil)
    59  	reparenttestutil.SetKeyspaceDurability(context.Background(), t, ts, "test_keyspace", "semi_sync")
    60  
    61  	// new primary
    62  	newPrimary.FakeMysqlDaemon.ReadOnly = true
    63  	newPrimary.FakeMysqlDaemon.Replicating = true
    64  	newPrimary.FakeMysqlDaemon.WaitPrimaryPositions = []mysql.Position{{
    65  		GTIDSet: mysql.MariadbGTIDSet{
    66  			7: mysql.MariadbGTID{
    67  				Domain:   7,
    68  				Server:   123,
    69  				Sequence: 990,
    70  			},
    71  		},
    72  	}}
    73  	newPrimary.FakeMysqlDaemon.PromoteResult = mysql.Position{
    74  		GTIDSet: mysql.MariadbGTIDSet{
    75  			7: mysql.MariadbGTID{
    76  				Domain:   7,
    77  				Server:   456,
    78  				Sequence: 991,
    79  			},
    80  		},
    81  	}
    82  	newPrimary.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{
    83  		"STOP SLAVE",
    84  		"RESET SLAVE ALL",
    85  		"FAKE SET MASTER",
    86  		"START SLAVE",
    87  		"SUBINSERT INTO _vt.reparent_journal (time_created_ns, action_name, primary_alias, replication_position) VALUES",
    88  	}
    89  	newPrimary.StartActionLoop(t, wr)
    90  	defer newPrimary.StopActionLoop(t)
    91  
    92  	// old primary
    93  	oldPrimary.FakeMysqlDaemon.ReadOnly = false
    94  	oldPrimary.FakeMysqlDaemon.Replicating = false
    95  	oldPrimary.FakeMysqlDaemon.ReplicationStatusError = mysql.ErrNotReplica
    96  	oldPrimary.FakeMysqlDaemon.CurrentPrimaryPosition = newPrimary.FakeMysqlDaemon.WaitPrimaryPositions[0]
    97  	oldPrimary.FakeMysqlDaemon.SetReplicationSourceInputs = append(oldPrimary.FakeMysqlDaemon.SetReplicationSourceInputs, topoproto.MysqlAddr(newPrimary.Tablet))
    98  	oldPrimary.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{
    99  		"RESET SLAVE ALL",
   100  		"FAKE SET MASTER",
   101  		"START SLAVE",
   102  		// we end up calling SetReplicationSource twice on the old primary
   103  		"RESET SLAVE ALL",
   104  		"FAKE SET MASTER",
   105  		"START SLAVE",
   106  	}
   107  	oldPrimary.StartActionLoop(t, wr)
   108  	defer oldPrimary.StopActionLoop(t)
   109  	oldPrimary.TM.QueryServiceControl.(*tabletservermock.Controller).SetQueryServiceEnabledForTests(true)
   110  
   111  	// SetReplicationSource is called on new primary to make sure it's replicating before reparenting.
   112  	newPrimary.FakeMysqlDaemon.SetReplicationSourceInputs = append(newPrimary.FakeMysqlDaemon.SetReplicationSourceInputs, topoproto.MysqlAddr(oldPrimary.Tablet))
   113  
   114  	// good replica 1 is replicating
   115  	goodReplica1.FakeMysqlDaemon.ReadOnly = true
   116  	goodReplica1.FakeMysqlDaemon.Replicating = true
   117  	goodReplica1.FakeMysqlDaemon.SetReplicationSourceInputs = append(goodReplica1.FakeMysqlDaemon.SetReplicationSourceInputs, topoproto.MysqlAddr(newPrimary.Tablet), topoproto.MysqlAddr(oldPrimary.Tablet))
   118  	goodReplica1.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{
   119  		// These 4 statements come from tablet startup
   120  		"STOP SLAVE",
   121  		"RESET SLAVE ALL",
   122  		"FAKE SET MASTER",
   123  		"START SLAVE",
   124  		"STOP SLAVE",
   125  		"RESET SLAVE ALL",
   126  		"FAKE SET MASTER",
   127  		"START SLAVE",
   128  	}
   129  	goodReplica1.StartActionLoop(t, wr)
   130  	defer goodReplica1.StopActionLoop(t)
   131  
   132  	// run PlannedReparentShard
   133  	err := vp.Run([]string{"PlannedReparentShard", "--wait_replicas_timeout", "10s", "--keyspace_shard", newPrimary.Tablet.Keyspace + "/" + newPrimary.Tablet.Shard})
   134  	require.NoError(t, err)
   135  
   136  	// check what was run
   137  	err = newPrimary.FakeMysqlDaemon.CheckSuperQueryList()
   138  	require.NoError(t, err)
   139  
   140  	err = oldPrimary.FakeMysqlDaemon.CheckSuperQueryList()
   141  	require.NoError(t, err)
   142  
   143  	err = goodReplica1.FakeMysqlDaemon.CheckSuperQueryList()
   144  	require.NoError(t, err)
   145  
   146  	assert.False(t, newPrimary.FakeMysqlDaemon.ReadOnly, "newPrimary.FakeMysqlDaemon.ReadOnly is set")
   147  	assert.True(t, oldPrimary.FakeMysqlDaemon.ReadOnly, "oldPrimary.FakeMysqlDaemon.ReadOnly not set")
   148  	assert.True(t, goodReplica1.FakeMysqlDaemon.ReadOnly, "goodReplica1.FakeMysqlDaemon.ReadOnly not set")
   149  	assert.True(t, oldPrimary.TM.QueryServiceControl.IsServing(), "oldPrimary...QueryServiceControl not serving")
   150  
   151  	// verify the old primary was told to start replicating (and not
   152  	// the replica that wasn't replicating in the first place)
   153  	assert.True(t, oldPrimary.FakeMysqlDaemon.Replicating, "oldPrimary.FakeMysqlDaemon.Replicating not set")
   154  	assert.True(t, goodReplica1.FakeMysqlDaemon.Replicating, "goodReplica1.FakeMysqlDaemon.Replicating not set")
   155  	checkSemiSyncEnabled(t, true, true, newPrimary)
   156  	checkSemiSyncEnabled(t, false, true, goodReplica1, oldPrimary)
   157  }
   158  
   159  func TestPlannedReparentShardNoError(t *testing.T) {
   160  	delay := discovery.GetTabletPickerRetryDelay()
   161  	defer func() {
   162  		discovery.SetTabletPickerRetryDelay(delay)
   163  	}()
   164  	discovery.SetTabletPickerRetryDelay(5 * time.Millisecond)
   165  
   166  	ts := memorytopo.NewServer("cell1", "cell2")
   167  	wr := wrangler.New(logutil.NewConsoleLogger(), ts, tmclient.NewTabletManagerClient())
   168  	vp := NewVtctlPipe(t, ts)
   169  	defer vp.Close()
   170  
   171  	// Create a primary, a couple good replicas
   172  	oldPrimary := NewFakeTablet(t, wr, "cell1", 0, topodatapb.TabletType_PRIMARY, nil)
   173  	newPrimary := NewFakeTablet(t, wr, "cell1", 1, topodatapb.TabletType_REPLICA, nil)
   174  	goodReplica1 := NewFakeTablet(t, wr, "cell1", 2, topodatapb.TabletType_REPLICA, nil)
   175  	goodReplica2 := NewFakeTablet(t, wr, "cell2", 3, topodatapb.TabletType_REPLICA, nil)
   176  	reparenttestutil.SetKeyspaceDurability(context.Background(), t, ts, "test_keyspace", "semi_sync")
   177  
   178  	// new primary
   179  	newPrimary.FakeMysqlDaemon.ReadOnly = true
   180  	newPrimary.FakeMysqlDaemon.Replicating = true
   181  	newPrimary.FakeMysqlDaemon.WaitPrimaryPositions = []mysql.Position{{
   182  		GTIDSet: mysql.MariadbGTIDSet{
   183  			7: mysql.MariadbGTID{
   184  				Domain:   7,
   185  				Server:   123,
   186  				Sequence: 990,
   187  			},
   188  		},
   189  	}}
   190  	newPrimary.FakeMysqlDaemon.PromoteResult = mysql.Position{
   191  		GTIDSet: mysql.MariadbGTIDSet{
   192  			7: mysql.MariadbGTID{
   193  				Domain:   7,
   194  				Server:   456,
   195  				Sequence: 991,
   196  			},
   197  		},
   198  	}
   199  	newPrimary.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{
   200  		"STOP SLAVE",
   201  		"RESET SLAVE ALL",
   202  		"FAKE SET MASTER",
   203  		"START SLAVE",
   204  		"SUBINSERT INTO _vt.reparent_journal (time_created_ns, action_name, primary_alias, replication_position) VALUES",
   205  	}
   206  	newPrimary.StartActionLoop(t, wr)
   207  	defer newPrimary.StopActionLoop(t)
   208  
   209  	// old primary
   210  	oldPrimary.FakeMysqlDaemon.ReadOnly = false
   211  	oldPrimary.FakeMysqlDaemon.Replicating = false
   212  	oldPrimary.FakeMysqlDaemon.ReplicationStatusError = mysql.ErrNotReplica
   213  	oldPrimary.FakeMysqlDaemon.CurrentPrimaryPosition = newPrimary.FakeMysqlDaemon.WaitPrimaryPositions[0]
   214  	oldPrimary.FakeMysqlDaemon.SetReplicationSourceInputs = append(oldPrimary.FakeMysqlDaemon.SetReplicationSourceInputs, topoproto.MysqlAddr(newPrimary.Tablet))
   215  	oldPrimary.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{
   216  		"RESET SLAVE ALL",
   217  		"FAKE SET MASTER",
   218  		"START SLAVE",
   219  		// we end up calling SetReplicationSource twice on the old primary
   220  		"RESET SLAVE ALL",
   221  		"FAKE SET MASTER",
   222  		"START SLAVE",
   223  	}
   224  	oldPrimary.StartActionLoop(t, wr)
   225  	defer oldPrimary.StopActionLoop(t)
   226  	oldPrimary.TM.QueryServiceControl.(*tabletservermock.Controller).SetQueryServiceEnabledForTests(true)
   227  
   228  	// SetReplicationSource is called on new primary to make sure it's replicating before reparenting.
   229  	newPrimary.FakeMysqlDaemon.SetReplicationSourceInputs = append(newPrimary.FakeMysqlDaemon.SetReplicationSourceInputs, topoproto.MysqlAddr(oldPrimary.Tablet))
   230  
   231  	// goodReplica1 is replicating
   232  	goodReplica1.FakeMysqlDaemon.ReadOnly = true
   233  	goodReplica1.FakeMysqlDaemon.Replicating = true
   234  	goodReplica1.FakeMysqlDaemon.SetReplicationSourceInputs = append(goodReplica1.FakeMysqlDaemon.SetReplicationSourceInputs, topoproto.MysqlAddr(newPrimary.Tablet), topoproto.MysqlAddr(oldPrimary.Tablet))
   235  	goodReplica1.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{
   236  		// These 4 statements come from tablet startup
   237  		"STOP SLAVE",
   238  		"RESET SLAVE ALL",
   239  		"FAKE SET MASTER",
   240  		"START SLAVE",
   241  		"STOP SLAVE",
   242  		"RESET SLAVE ALL",
   243  		"FAKE SET MASTER",
   244  		"START SLAVE",
   245  	}
   246  	goodReplica1.StartActionLoop(t, wr)
   247  	defer goodReplica1.StopActionLoop(t)
   248  
   249  	// goodReplica2 is not replicating
   250  	goodReplica2.FakeMysqlDaemon.ReadOnly = true
   251  	goodReplica2.FakeMysqlDaemon.SetReplicationSourceInputs = append(goodReplica2.FakeMysqlDaemon.SetReplicationSourceInputs, topoproto.MysqlAddr(newPrimary.Tablet), topoproto.MysqlAddr(oldPrimary.Tablet))
   252  	goodReplica2.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{
   253  		// These 4 statements come from tablet startup
   254  		"STOP SLAVE",
   255  		"RESET SLAVE ALL",
   256  		"FAKE SET MASTER",
   257  		"START SLAVE",
   258  		"RESET SLAVE ALL",
   259  		"FAKE SET MASTER",
   260  	}
   261  	goodReplica2.StartActionLoop(t, wr)
   262  	goodReplica2.FakeMysqlDaemon.Replicating = false
   263  	defer goodReplica2.StopActionLoop(t)
   264  
   265  	// run PlannedReparentShard
   266  	err := vp.Run([]string{"PlannedReparentShard", "--wait_replicas_timeout", "10s", "--keyspace_shard", newPrimary.Tablet.Keyspace + "/" + newPrimary.Tablet.Shard, "--new_primary",
   267  		topoproto.TabletAliasString(newPrimary.Tablet.Alias)})
   268  	require.NoError(t, err)
   269  
   270  	// check what was run
   271  	err = newPrimary.FakeMysqlDaemon.CheckSuperQueryList()
   272  	require.NoError(t, err)
   273  	err = oldPrimary.FakeMysqlDaemon.CheckSuperQueryList()
   274  	require.NoError(t, err)
   275  	err = goodReplica1.FakeMysqlDaemon.CheckSuperQueryList()
   276  	require.NoError(t, err)
   277  	err = goodReplica2.FakeMysqlDaemon.CheckSuperQueryList()
   278  	require.NoError(t, err)
   279  
   280  	assert.False(t, newPrimary.FakeMysqlDaemon.ReadOnly, "newPrimary.FakeMysqlDaemon.ReadOnly set")
   281  	assert.True(t, oldPrimary.FakeMysqlDaemon.ReadOnly, "oldPrimary.FakeMysqlDaemon.ReadOnly not set")
   282  	assert.True(t, goodReplica1.FakeMysqlDaemon.ReadOnly, "goodReplica1.FakeMysqlDaemon.ReadOnly not set")
   283  
   284  	assert.True(t, goodReplica2.FakeMysqlDaemon.ReadOnly, "goodReplica2.FakeMysqlDaemon.ReadOnly not set")
   285  	assert.True(t, oldPrimary.TM.QueryServiceControl.IsServing(), "oldPrimary...QueryServiceControl not serving")
   286  
   287  	// verify the old primary was told to start replicating (and not
   288  	// the replica that wasn't replicating in the first place)
   289  	assert.True(t, oldPrimary.FakeMysqlDaemon.Replicating, "oldPrimary.FakeMysqlDaemon.Replicating not set")
   290  	assert.True(t, goodReplica1.FakeMysqlDaemon.Replicating, "goodReplica1.FakeMysqlDaemon.Replicating not set")
   291  	assert.False(t, goodReplica2.FakeMysqlDaemon.Replicating, "goodReplica2.FakeMysqlDaemon.Replicating set")
   292  
   293  	checkSemiSyncEnabled(t, true, true, newPrimary)
   294  	checkSemiSyncEnabled(t, false, true, goodReplica1, goodReplica2, oldPrimary)
   295  }
   296  
   297  func TestPlannedReparentInitialization(t *testing.T) {
   298  	delay := discovery.GetTabletPickerRetryDelay()
   299  	defer func() {
   300  		discovery.SetTabletPickerRetryDelay(delay)
   301  	}()
   302  	discovery.SetTabletPickerRetryDelay(5 * time.Millisecond)
   303  
   304  	ts := memorytopo.NewServer("cell1", "cell2")
   305  	wr := wrangler.New(logutil.NewConsoleLogger(), ts, tmclient.NewTabletManagerClient())
   306  	vp := NewVtctlPipe(t, ts)
   307  	defer vp.Close()
   308  
   309  	// Create a few replicas.
   310  	newPrimary := NewFakeTablet(t, wr, "cell1", 1, topodatapb.TabletType_REPLICA, nil)
   311  	goodReplica1 := NewFakeTablet(t, wr, "cell1", 2, topodatapb.TabletType_REPLICA, nil)
   312  	goodReplica2 := NewFakeTablet(t, wr, "cell2", 3, topodatapb.TabletType_REPLICA, nil)
   313  	reparenttestutil.SetKeyspaceDurability(context.Background(), t, ts, "test_keyspace", "semi_sync")
   314  
   315  	// new primary
   316  	newPrimary.FakeMysqlDaemon.ReadOnly = true
   317  	newPrimary.FakeMysqlDaemon.Replicating = true
   318  	newPrimary.FakeMysqlDaemon.PromoteResult = mysql.Position{
   319  		GTIDSet: mysql.MariadbGTIDSet{
   320  			7: mysql.MariadbGTID{
   321  				Domain:   7,
   322  				Server:   456,
   323  				Sequence: 991,
   324  			},
   325  		},
   326  	}
   327  	newPrimary.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{
   328  		mysqlctl.GenerateInitialBinlogEntry(),
   329  		"SUBINSERT INTO _vt.reparent_journal (time_created_ns, action_name, primary_alias, replication_position) VALUES",
   330  	}
   331  	newPrimary.StartActionLoop(t, wr)
   332  	defer newPrimary.StopActionLoop(t)
   333  
   334  	// goodReplica1 is replicating
   335  	goodReplica1.FakeMysqlDaemon.ReadOnly = true
   336  	goodReplica1.FakeMysqlDaemon.Replicating = true
   337  	goodReplica1.FakeMysqlDaemon.SetReplicationSourceInputs = append(goodReplica1.FakeMysqlDaemon.SetReplicationSourceInputs, topoproto.MysqlAddr(newPrimary.Tablet))
   338  	goodReplica1.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{
   339  		"STOP SLAVE",
   340  		"RESET SLAVE ALL",
   341  		"FAKE SET MASTER",
   342  		"START SLAVE",
   343  	}
   344  	goodReplica1.StartActionLoop(t, wr)
   345  	defer goodReplica1.StopActionLoop(t)
   346  
   347  	// goodReplica2 is not replicating
   348  	goodReplica2.FakeMysqlDaemon.ReadOnly = true
   349  	goodReplica2.FakeMysqlDaemon.Replicating = false
   350  	goodReplica2.FakeMysqlDaemon.SetReplicationSourceInputs = append(goodReplica2.FakeMysqlDaemon.SetReplicationSourceInputs, topoproto.MysqlAddr(newPrimary.Tablet))
   351  	goodReplica2.StartActionLoop(t, wr)
   352  	goodReplica2.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{
   353  		"RESET SLAVE ALL",
   354  		"FAKE SET MASTER",
   355  	}
   356  	defer goodReplica2.StopActionLoop(t)
   357  
   358  	// run PlannedReparentShard
   359  	err := vp.Run([]string{"PlannedReparentShard", "--wait_replicas_timeout", "10s", "--keyspace_shard", newPrimary.Tablet.Keyspace + "/" + newPrimary.Tablet.Shard, "--new_primary", topoproto.TabletAliasString(newPrimary.Tablet.Alias)})
   360  	require.NoError(t, err)
   361  
   362  	// check what was run
   363  	err = newPrimary.FakeMysqlDaemon.CheckSuperQueryList()
   364  	require.NoError(t, err)
   365  	err = goodReplica1.FakeMysqlDaemon.CheckSuperQueryList()
   366  	require.NoError(t, err)
   367  	err = goodReplica2.FakeMysqlDaemon.CheckSuperQueryList()
   368  	require.NoError(t, err)
   369  
   370  	assert.False(t, newPrimary.FakeMysqlDaemon.ReadOnly, "newPrimary.FakeMysqlDaemon.ReadOnly set")
   371  	assert.True(t, goodReplica1.FakeMysqlDaemon.ReadOnly, "goodReplica1.FakeMysqlDaemon.ReadOnly not set")
   372  	assert.True(t, goodReplica2.FakeMysqlDaemon.ReadOnly, "goodReplica2.FakeMysqlDaemon.ReadOnly not set")
   373  
   374  	assert.True(t, goodReplica1.FakeMysqlDaemon.Replicating, "goodReplica1.FakeMysqlDaemon.Replicating not set")
   375  	assert.False(t, goodReplica2.FakeMysqlDaemon.Replicating, "goodReplica2.FakeMysqlDaemon.Replicating set")
   376  
   377  	checkSemiSyncEnabled(t, true, true, newPrimary)
   378  	checkSemiSyncEnabled(t, false, true, goodReplica1, goodReplica2)
   379  }
   380  
   381  // TestPlannedReparentShardWaitForPositionFail simulates a failure of the WaitForPosition call
   382  // on the desired new primary tablet
   383  func TestPlannedReparentShardWaitForPositionFail(t *testing.T) {
   384  	delay := discovery.GetTabletPickerRetryDelay()
   385  	defer func() {
   386  		discovery.SetTabletPickerRetryDelay(delay)
   387  	}()
   388  	discovery.SetTabletPickerRetryDelay(5 * time.Millisecond)
   389  
   390  	ts := memorytopo.NewServer("cell1", "cell2")
   391  	wr := wrangler.New(logutil.NewConsoleLogger(), ts, tmclient.NewTabletManagerClient())
   392  	vp := NewVtctlPipe(t, ts)
   393  	defer vp.Close()
   394  
   395  	// Create a primary, a couple good replicas
   396  	oldPrimary := NewFakeTablet(t, wr, "cell1", 0, topodatapb.TabletType_PRIMARY, nil)
   397  	newPrimary := NewFakeTablet(t, wr, "cell1", 1, topodatapb.TabletType_REPLICA, nil)
   398  	goodReplica1 := NewFakeTablet(t, wr, "cell1", 2, topodatapb.TabletType_REPLICA, nil)
   399  	goodReplica2 := NewFakeTablet(t, wr, "cell2", 3, topodatapb.TabletType_REPLICA, nil)
   400  
   401  	// new primary
   402  	newPrimary.FakeMysqlDaemon.ReadOnly = true
   403  	newPrimary.FakeMysqlDaemon.Replicating = true
   404  	newPrimary.FakeMysqlDaemon.WaitPrimaryPositions = []mysql.Position{{
   405  		GTIDSet: mysql.MariadbGTIDSet{
   406  			7: mysql.MariadbGTID{
   407  				Domain:   7,
   408  				Server:   123,
   409  				Sequence: 990,
   410  			},
   411  		},
   412  	}}
   413  	newPrimary.FakeMysqlDaemon.PromoteResult = mysql.Position{
   414  		GTIDSet: mysql.MariadbGTIDSet{
   415  			7: mysql.MariadbGTID{
   416  				Domain:   7,
   417  				Server:   456,
   418  				Sequence: 991,
   419  			},
   420  		},
   421  	}
   422  	newPrimary.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{
   423  		"STOP SLAVE",
   424  		"RESET SLAVE ALL",
   425  		"FAKE SET MASTER",
   426  		"START SLAVE",
   427  		"SUBINSERT INTO _vt.reparent_journal (time_created_ns, action_name, primary_alias, replication_position) VALUES",
   428  	}
   429  	newPrimary.StartActionLoop(t, wr)
   430  	defer newPrimary.StopActionLoop(t)
   431  
   432  	// old primary
   433  	oldPrimary.FakeMysqlDaemon.ReadOnly = false
   434  	oldPrimary.FakeMysqlDaemon.Replicating = false
   435  	// set to incorrect value to make promote fail on WaitForReplicationPos
   436  	oldPrimary.FakeMysqlDaemon.CurrentPrimaryPosition = newPrimary.FakeMysqlDaemon.PromoteResult
   437  	oldPrimary.FakeMysqlDaemon.SetReplicationSourceInputs = append(oldPrimary.FakeMysqlDaemon.SetReplicationSourceInputs, topoproto.MysqlAddr(newPrimary.Tablet))
   438  	oldPrimary.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{
   439  		"RESET SLAVE ALL",
   440  		"FAKE SET MASTER",
   441  		"START SLAVE",
   442  	}
   443  	oldPrimary.StartActionLoop(t, wr)
   444  	defer oldPrimary.StopActionLoop(t)
   445  	oldPrimary.TM.QueryServiceControl.(*tabletservermock.Controller).SetQueryServiceEnabledForTests(true)
   446  	// SetReplicationSource is called on new primary to make sure it's replicating before reparenting.
   447  	newPrimary.FakeMysqlDaemon.SetReplicationSourceInputs = append(newPrimary.FakeMysqlDaemon.SetReplicationSourceInputs, topoproto.MysqlAddr(oldPrimary.Tablet))
   448  
   449  	// good replica 1 is replicating
   450  	goodReplica1.FakeMysqlDaemon.ReadOnly = true
   451  	goodReplica1.FakeMysqlDaemon.Replicating = true
   452  	goodReplica1.FakeMysqlDaemon.SetReplicationSourceInputs = append(goodReplica1.FakeMysqlDaemon.SetReplicationSourceInputs, topoproto.MysqlAddr(newPrimary.Tablet), topoproto.MysqlAddr(oldPrimary.Tablet))
   453  	goodReplica1.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{
   454  		// These 4 statements come from tablet startup
   455  		"STOP SLAVE",
   456  		"RESET SLAVE ALL",
   457  		"FAKE SET MASTER",
   458  		"START SLAVE",
   459  		"STOP SLAVE",
   460  		"RESET SLAVE ALL",
   461  		"FAKE SET MASTER",
   462  		"START SLAVE",
   463  	}
   464  	goodReplica1.StartActionLoop(t, wr)
   465  	defer goodReplica1.StopActionLoop(t)
   466  
   467  	// good replica 2 is not replicating
   468  	goodReplica2.FakeMysqlDaemon.ReadOnly = true
   469  	goodReplica2.FakeMysqlDaemon.SetReplicationSourceInputs = append(goodReplica2.FakeMysqlDaemon.SetReplicationSourceInputs, topoproto.MysqlAddr(newPrimary.Tablet), topoproto.MysqlAddr(oldPrimary.Tablet))
   470  	goodReplica2.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{
   471  		// These 4 statements come from tablet startup
   472  		"STOP SLAVE",
   473  		"RESET SLAVE ALL",
   474  		"FAKE SET MASTER",
   475  		"START SLAVE",
   476  		"RESET SLAVE ALL",
   477  		"FAKE SET MASTER",
   478  	}
   479  	goodReplica2.StartActionLoop(t, wr)
   480  	goodReplica2.FakeMysqlDaemon.Replicating = false
   481  	defer goodReplica2.StopActionLoop(t)
   482  
   483  	// run PlannedReparentShard
   484  	err := vp.Run([]string{"PlannedReparentShard", "--wait_replicas_timeout", "10s", "--keyspace_shard", newPrimary.Tablet.Keyspace + "/" + newPrimary.Tablet.Shard, "--new_primary", topoproto.TabletAliasString(newPrimary.Tablet.Alias)})
   485  	assert.Error(t, err)
   486  	assert.Contains(t, err.Error(), "replication on primary-elect cell1-0000000001 did not catch up in time")
   487  
   488  	// now check that DemotePrimary was undone and old primary is still primary
   489  	assert.True(t, newPrimary.FakeMysqlDaemon.ReadOnly, "newPrimary.FakeMysqlDaemon.ReadOnly not set")
   490  	assert.False(t, oldPrimary.FakeMysqlDaemon.ReadOnly, "oldPrimary.FakeMysqlDaemon.ReadOnly set")
   491  }
   492  
   493  // TestPlannedReparentShardWaitForPositionTimeout simulates a context timeout
   494  // during the WaitForPosition call to the desired new primary
   495  func TestPlannedReparentShardWaitForPositionTimeout(t *testing.T) {
   496  	delay := discovery.GetTabletPickerRetryDelay()
   497  	defer func() {
   498  		discovery.SetTabletPickerRetryDelay(delay)
   499  	}()
   500  	discovery.SetTabletPickerRetryDelay(5 * time.Millisecond)
   501  
   502  	ts := memorytopo.NewServer("cell1", "cell2")
   503  	wr := wrangler.New(logutil.NewConsoleLogger(), ts, tmclient.NewTabletManagerClient())
   504  	vp := NewVtctlPipe(t, ts)
   505  	defer vp.Close()
   506  
   507  	// Create a primary, a couple good replicas
   508  	oldPrimary := NewFakeTablet(t, wr, "cell1", 0, topodatapb.TabletType_PRIMARY, nil)
   509  	newPrimary := NewFakeTablet(t, wr, "cell1", 1, topodatapb.TabletType_REPLICA, nil)
   510  	goodReplica1 := NewFakeTablet(t, wr, "cell1", 2, topodatapb.TabletType_REPLICA, nil)
   511  	goodReplica2 := NewFakeTablet(t, wr, "cell2", 3, topodatapb.TabletType_REPLICA, nil)
   512  
   513  	// new primary
   514  	newPrimary.FakeMysqlDaemon.TimeoutHook = func() error { return context.DeadlineExceeded }
   515  	newPrimary.FakeMysqlDaemon.ReadOnly = true
   516  	newPrimary.FakeMysqlDaemon.Replicating = true
   517  	newPrimary.FakeMysqlDaemon.WaitPrimaryPositions = []mysql.Position{{
   518  		GTIDSet: mysql.MariadbGTIDSet{
   519  			7: mysql.MariadbGTID{
   520  				Domain:   7,
   521  				Server:   123,
   522  				Sequence: 990,
   523  			},
   524  		},
   525  	}}
   526  	newPrimary.FakeMysqlDaemon.PromoteResult = mysql.Position{
   527  		GTIDSet: mysql.MariadbGTIDSet{
   528  			7: mysql.MariadbGTID{
   529  				Domain:   7,
   530  				Server:   456,
   531  				Sequence: 991,
   532  			},
   533  		},
   534  	}
   535  	newPrimary.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{
   536  		"STOP SLAVE",
   537  		"RESET SLAVE ALL",
   538  		"FAKE SET MASTER",
   539  		"START SLAVE",
   540  		"SUBINSERT INTO _vt.reparent_journal (time_created_ns, action_name, primary_alias, replication_position) VALUES",
   541  	}
   542  	newPrimary.StartActionLoop(t, wr)
   543  	defer newPrimary.StopActionLoop(t)
   544  
   545  	// old primary
   546  	oldPrimary.FakeMysqlDaemon.ReadOnly = false
   547  	oldPrimary.FakeMysqlDaemon.Replicating = false
   548  	oldPrimary.FakeMysqlDaemon.CurrentPrimaryPosition = newPrimary.FakeMysqlDaemon.WaitPrimaryPositions[0]
   549  	oldPrimary.FakeMysqlDaemon.SetReplicationSourceInputs = append(oldPrimary.FakeMysqlDaemon.SetReplicationSourceInputs, topoproto.MysqlAddr(newPrimary.Tablet))
   550  	oldPrimary.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{
   551  		"RESET SLAVE ALL",
   552  		"FAKE SET MASTER",
   553  		"START SLAVE",
   554  	}
   555  	oldPrimary.StartActionLoop(t, wr)
   556  	defer oldPrimary.StopActionLoop(t)
   557  	oldPrimary.TM.QueryServiceControl.(*tabletservermock.Controller).SetQueryServiceEnabledForTests(true)
   558  
   559  	// SetReplicationSource is called on new primary to make sure it's replicating before reparenting.
   560  	newPrimary.FakeMysqlDaemon.SetReplicationSourceInputs = append(newPrimary.FakeMysqlDaemon.SetReplicationSourceInputs, topoproto.MysqlAddr(oldPrimary.Tablet))
   561  	// good replica 1 is replicating
   562  	goodReplica1.FakeMysqlDaemon.ReadOnly = true
   563  	goodReplica1.FakeMysqlDaemon.Replicating = true
   564  	goodReplica1.FakeMysqlDaemon.SetReplicationSourceInputs = append(goodReplica1.FakeMysqlDaemon.SetReplicationSourceInputs, topoproto.MysqlAddr(newPrimary.Tablet), topoproto.MysqlAddr(oldPrimary.Tablet))
   565  	goodReplica1.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{
   566  		// These 4 statements come from tablet startup
   567  		"STOP SLAVE",
   568  		"RESET SLAVE ALL",
   569  		"FAKE SET MASTER",
   570  		"START SLAVE",
   571  		"STOP SLAVE",
   572  		"RESET SLAVE ALL",
   573  		"FAKE SET MASTER",
   574  		"START SLAVE",
   575  	}
   576  	goodReplica1.StartActionLoop(t, wr)
   577  	defer goodReplica1.StopActionLoop(t)
   578  
   579  	// good replica 2 is not replicating
   580  	goodReplica2.FakeMysqlDaemon.ReadOnly = true
   581  	goodReplica2.FakeMysqlDaemon.SetReplicationSourceInputs = append(goodReplica2.FakeMysqlDaemon.SetReplicationSourceInputs, topoproto.MysqlAddr(newPrimary.Tablet), topoproto.MysqlAddr(oldPrimary.Tablet))
   582  	goodReplica2.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{
   583  		// These 4 statements come from tablet startup
   584  		"STOP SLAVE",
   585  		"RESET SLAVE ALL",
   586  		"FAKE SET MASTER",
   587  		"START SLAVE",
   588  		"RESET SLAVE ALL",
   589  		"FAKE SET MASTER",
   590  	}
   591  	goodReplica2.StartActionLoop(t, wr)
   592  	goodReplica2.FakeMysqlDaemon.Replicating = false
   593  	defer goodReplica2.StopActionLoop(t)
   594  
   595  	// run PlannedReparentShard
   596  	err := vp.Run([]string{"PlannedReparentShard", "--wait_replicas_timeout", "10s", "--keyspace_shard", newPrimary.Tablet.Keyspace + "/" + newPrimary.Tablet.Shard, "--new_primary", topoproto.TabletAliasString(newPrimary.Tablet.Alias)})
   597  	assert.Error(t, err)
   598  	assert.Contains(t, err.Error(), "replication on primary-elect cell1-0000000001 did not catch up in time")
   599  
   600  	// now check that DemotePrimary was undone and old primary is still primary
   601  	assert.True(t, newPrimary.FakeMysqlDaemon.ReadOnly, "newPrimary.FakeMysqlDaemon.ReadOnly not set")
   602  	assert.False(t, oldPrimary.FakeMysqlDaemon.ReadOnly, "oldPrimary.FakeMysqlDaemon.ReadOnly set")
   603  }
   604  
   605  func TestPlannedReparentShardRelayLogError(t *testing.T) {
   606  	delay := discovery.GetTabletPickerRetryDelay()
   607  	defer func() {
   608  		discovery.SetTabletPickerRetryDelay(delay)
   609  	}()
   610  	discovery.SetTabletPickerRetryDelay(5 * time.Millisecond)
   611  
   612  	ts := memorytopo.NewServer("cell1")
   613  	wr := wrangler.New(logutil.NewConsoleLogger(), ts, tmclient.NewTabletManagerClient())
   614  	vp := NewVtctlPipe(t, ts)
   615  	defer vp.Close()
   616  
   617  	// Create a primary, a couple good replicas
   618  	primary := NewFakeTablet(t, wr, "cell1", 0, topodatapb.TabletType_PRIMARY, nil)
   619  	goodReplica1 := NewFakeTablet(t, wr, "cell1", 2, topodatapb.TabletType_REPLICA, nil)
   620  
   621  	// old primary
   622  	primary.FakeMysqlDaemon.ReadOnly = false
   623  	primary.FakeMysqlDaemon.Replicating = false
   624  	primary.FakeMysqlDaemon.ReplicationStatusError = mysql.ErrNotReplica
   625  	primary.FakeMysqlDaemon.CurrentPrimaryPosition = mysql.Position{
   626  		GTIDSet: mysql.MariadbGTIDSet{
   627  			7: mysql.MariadbGTID{
   628  				Domain:   7,
   629  				Server:   123,
   630  				Sequence: 990,
   631  			},
   632  		},
   633  	}
   634  	primary.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{
   635  		"SUBINSERT INTO _vt.reparent_journal (time_created_ns, action_name, primary_alias, replication_position) VALUES",
   636  	}
   637  	primary.StartActionLoop(t, wr)
   638  	defer primary.StopActionLoop(t)
   639  	primary.TM.QueryServiceControl.(*tabletservermock.Controller).SetQueryServiceEnabledForTests(true)
   640  
   641  	// goodReplica1 is replicating
   642  	goodReplica1.FakeMysqlDaemon.ReadOnly = true
   643  	goodReplica1.FakeMysqlDaemon.Replicating = true
   644  	goodReplica1.FakeMysqlDaemon.SetReplicationSourceInputs = append(goodReplica1.FakeMysqlDaemon.SetReplicationSourceInputs, topoproto.MysqlAddr(primary.Tablet))
   645  	goodReplica1.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{
   646  		// These 4 statements come from tablet startup
   647  		"STOP SLAVE",
   648  		"RESET SLAVE ALL",
   649  		"FAKE SET MASTER",
   650  		"START SLAVE",
   651  		// simulate error that will trigger a call to RestartReplication
   652  		"STOP SLAVE",
   653  		"RESET SLAVE",
   654  		"START SLAVE",
   655  	}
   656  	goodReplica1.StartActionLoop(t, wr)
   657  	goodReplica1.FakeMysqlDaemon.SetReplicationSourceError = errors.New("Slave failed to initialize relay log info structure from the repository")
   658  	defer goodReplica1.StopActionLoop(t)
   659  
   660  	// run PlannedReparentShard
   661  	err := vp.Run([]string{"PlannedReparentShard", "--wait_replicas_timeout", "10s", "--keyspace_shard", primary.Tablet.Keyspace + "/" + primary.Tablet.Shard, "--new_primary",
   662  		topoproto.TabletAliasString(primary.Tablet.Alias)})
   663  	require.NoError(t, err)
   664  	// check what was run
   665  	err = primary.FakeMysqlDaemon.CheckSuperQueryList()
   666  	require.NoError(t, err)
   667  	err = goodReplica1.FakeMysqlDaemon.CheckSuperQueryList()
   668  	require.NoError(t, err)
   669  
   670  	assert.False(t, primary.FakeMysqlDaemon.ReadOnly, "primary.FakeMysqlDaemon.ReadOnly set")
   671  	assert.True(t, goodReplica1.FakeMysqlDaemon.ReadOnly, "goodReplica1.FakeMysqlDaemon.ReadOnly not set")
   672  	assert.True(t, primary.TM.QueryServiceControl.IsServing(), "primary...QueryServiceControl not serving")
   673  
   674  	// verify the old primary was told to start replicating (and not
   675  	// the replica that wasn't replicating in the first place)
   676  	assert.True(t, goodReplica1.FakeMysqlDaemon.Replicating, "goodReplica1.FakeMysqlDaemon.Replicating not set")
   677  }
   678  
   679  // TestPlannedReparentShardRelayLogErrorStartReplication is similar to
   680  // TestPlannedReparentShardRelayLogError with the difference that goodReplica1
   681  // is not replicating to start with (IO_Thread is not running) and we
   682  // simulate an error from the attempt to start replication
   683  func TestPlannedReparentShardRelayLogErrorStartReplication(t *testing.T) {
   684  	delay := discovery.GetTabletPickerRetryDelay()
   685  	defer func() {
   686  		discovery.SetTabletPickerRetryDelay(delay)
   687  	}()
   688  	discovery.SetTabletPickerRetryDelay(5 * time.Millisecond)
   689  
   690  	ts := memorytopo.NewServer("cell1")
   691  	wr := wrangler.New(logutil.NewConsoleLogger(), ts, tmclient.NewTabletManagerClient())
   692  	vp := NewVtctlPipe(t, ts)
   693  	defer vp.Close()
   694  
   695  	// Create a primary, a couple good replicas
   696  	primary := NewFakeTablet(t, wr, "cell1", 0, topodatapb.TabletType_PRIMARY, nil)
   697  	goodReplica1 := NewFakeTablet(t, wr, "cell1", 2, topodatapb.TabletType_REPLICA, nil)
   698  	reparenttestutil.SetKeyspaceDurability(context.Background(), t, ts, "test_keyspace", "semi_sync")
   699  
   700  	// old primary
   701  	primary.FakeMysqlDaemon.ReadOnly = false
   702  	primary.FakeMysqlDaemon.Replicating = false
   703  	primary.FakeMysqlDaemon.ReplicationStatusError = mysql.ErrNotReplica
   704  	primary.FakeMysqlDaemon.CurrentPrimaryPosition = mysql.Position{
   705  		GTIDSet: mysql.MariadbGTIDSet{
   706  			7: mysql.MariadbGTID{
   707  				Domain:   7,
   708  				Server:   123,
   709  				Sequence: 990,
   710  			},
   711  		},
   712  	}
   713  	primary.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{
   714  		"SUBINSERT INTO _vt.reparent_journal (time_created_ns, action_name, primary_alias, replication_position) VALUES",
   715  	}
   716  	primary.StartActionLoop(t, wr)
   717  	defer primary.StopActionLoop(t)
   718  	primary.TM.QueryServiceControl.(*tabletservermock.Controller).SetQueryServiceEnabledForTests(true)
   719  
   720  	// goodReplica1 is not replicating
   721  	goodReplica1.FakeMysqlDaemon.ReadOnly = true
   722  	goodReplica1.FakeMysqlDaemon.Replicating = true
   723  	goodReplica1.FakeMysqlDaemon.IOThreadRunning = false
   724  	goodReplica1.FakeMysqlDaemon.SetReplicationSourceInputs = append(goodReplica1.FakeMysqlDaemon.SetReplicationSourceInputs, topoproto.MysqlAddr(primary.Tablet))
   725  	goodReplica1.FakeMysqlDaemon.CurrentSourceHost = primary.Tablet.MysqlHostname
   726  	goodReplica1.FakeMysqlDaemon.CurrentSourcePort = int(primary.Tablet.MysqlPort)
   727  	goodReplica1.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{
   728  		// simulate error that will trigger a call to RestartReplication
   729  		// These 4 statements come from tablet startup
   730  		"STOP SLAVE",
   731  		"RESET SLAVE ALL",
   732  		"FAKE SET MASTER",
   733  		"START SLAVE",
   734  		// In SetReplicationSource, we find that the source host and port was already set correctly,
   735  		// So we try to stop and start replication. The first STOP SLAVE comes from there
   736  		"STOP SLAVE",
   737  		// During the START SLAVE call, we find a relay log error, so we try to restart replication.
   738  		"STOP SLAVE",
   739  		"RESET SLAVE",
   740  		"START SLAVE",
   741  	}
   742  	goodReplica1.StartActionLoop(t, wr)
   743  	goodReplica1.FakeMysqlDaemon.StartReplicationError = errors.New("Slave failed to initialize relay log info structure from the repository")
   744  	defer goodReplica1.StopActionLoop(t)
   745  
   746  	// run PlannedReparentShard
   747  	err := vp.Run([]string{"PlannedReparentShard", "--wait_replicas_timeout", "10s", "--keyspace_shard", primary.Tablet.Keyspace + "/" + primary.Tablet.Shard, "--new_primary",
   748  		topoproto.TabletAliasString(primary.Tablet.Alias)})
   749  	require.NoError(t, err)
   750  	// check what was run
   751  	err = primary.FakeMysqlDaemon.CheckSuperQueryList()
   752  	require.NoError(t, err)
   753  	err = goodReplica1.FakeMysqlDaemon.CheckSuperQueryList()
   754  	require.NoError(t, err)
   755  
   756  	assert.False(t, primary.FakeMysqlDaemon.ReadOnly, "primary.FakeMysqlDaemon.ReadOnly set")
   757  	assert.True(t, goodReplica1.FakeMysqlDaemon.ReadOnly, "goodReplica1.FakeMysqlDaemon.ReadOnly not set")
   758  	assert.True(t, primary.TM.QueryServiceControl.IsServing(), "primary...QueryServiceControl not serving")
   759  
   760  	// verify the old primary was told to start replicating (and not
   761  	// the replica that wasn't replicating in the first place)
   762  	assert.True(t, goodReplica1.FakeMysqlDaemon.Replicating, "goodReplica1.FakeMysqlDaemon.Replicating not set")
   763  }
   764  
   765  // TestPlannedReparentShardPromoteReplicaFail simulates a failure of the PromoteReplica call
   766  // on the desired new primary tablet
   767  func TestPlannedReparentShardPromoteReplicaFail(t *testing.T) {
   768  	delay := discovery.GetTabletPickerRetryDelay()
   769  	defer func() {
   770  		discovery.SetTabletPickerRetryDelay(delay)
   771  	}()
   772  	discovery.SetTabletPickerRetryDelay(5 * time.Millisecond)
   773  
   774  	ts := memorytopo.NewServer("cell1", "cell2")
   775  	wr := wrangler.New(logutil.NewConsoleLogger(), ts, tmclient.NewTabletManagerClient())
   776  	vp := NewVtctlPipe(t, ts)
   777  	defer vp.Close()
   778  
   779  	// Create a primary, a couple good replicas
   780  	oldPrimary := NewFakeTablet(t, wr, "cell1", 0, topodatapb.TabletType_PRIMARY, nil)
   781  	newPrimary := NewFakeTablet(t, wr, "cell1", 1, topodatapb.TabletType_REPLICA, nil)
   782  	goodReplica1 := NewFakeTablet(t, wr, "cell1", 2, topodatapb.TabletType_REPLICA, nil)
   783  	goodReplica2 := NewFakeTablet(t, wr, "cell2", 3, topodatapb.TabletType_REPLICA, nil)
   784  
   785  	// new primary
   786  	newPrimary.FakeMysqlDaemon.ReadOnly = true
   787  	newPrimary.FakeMysqlDaemon.Replicating = true
   788  	// make promote fail
   789  	newPrimary.FakeMysqlDaemon.PromoteError = errors.New("some error")
   790  	newPrimary.FakeMysqlDaemon.WaitPrimaryPositions = []mysql.Position{{
   791  		GTIDSet: mysql.MariadbGTIDSet{
   792  			7: mysql.MariadbGTID{
   793  				Domain:   7,
   794  				Server:   123,
   795  				Sequence: 990,
   796  			},
   797  		},
   798  	}}
   799  	newPrimary.FakeMysqlDaemon.PromoteResult = mysql.Position{
   800  		GTIDSet: mysql.MariadbGTIDSet{
   801  			7: mysql.MariadbGTID{
   802  				Domain:   7,
   803  				Server:   456,
   804  				Sequence: 991,
   805  			},
   806  		},
   807  	}
   808  	newPrimary.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{
   809  		"STOP SLAVE",
   810  		"RESET SLAVE ALL",
   811  		"FAKE SET MASTER",
   812  		"START SLAVE",
   813  		"SUBINSERT INTO _vt.reparent_journal (time_created_ns, action_name, primary_alias, replication_position) VALUES",
   814  	}
   815  	newPrimary.StartActionLoop(t, wr)
   816  	defer newPrimary.StopActionLoop(t)
   817  
   818  	// old primary
   819  	oldPrimary.FakeMysqlDaemon.ReadOnly = false
   820  	oldPrimary.FakeMysqlDaemon.Replicating = false
   821  	oldPrimary.FakeMysqlDaemon.ReplicationStatusError = mysql.ErrNotReplica
   822  	oldPrimary.FakeMysqlDaemon.CurrentPrimaryPosition = newPrimary.FakeMysqlDaemon.WaitPrimaryPositions[0]
   823  	oldPrimary.FakeMysqlDaemon.SetReplicationSourceInputs = append(oldPrimary.FakeMysqlDaemon.SetReplicationSourceInputs, topoproto.MysqlAddr(newPrimary.Tablet))
   824  	oldPrimary.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{
   825  		"RESET SLAVE ALL",
   826  		"FAKE SET MASTER",
   827  		"START SLAVE",
   828  	}
   829  	oldPrimary.StartActionLoop(t, wr)
   830  	defer oldPrimary.StopActionLoop(t)
   831  	oldPrimary.TM.QueryServiceControl.(*tabletservermock.Controller).SetQueryServiceEnabledForTests(true)
   832  
   833  	// SetReplicationSource is called on new primary to make sure it's replicating before reparenting.
   834  	newPrimary.FakeMysqlDaemon.SetReplicationSourceInputs = append(newPrimary.FakeMysqlDaemon.SetReplicationSourceInputs, topoproto.MysqlAddr(oldPrimary.Tablet))
   835  	// good replica 1 is replicating
   836  	goodReplica1.FakeMysqlDaemon.ReadOnly = true
   837  	goodReplica1.FakeMysqlDaemon.Replicating = true
   838  	goodReplica1.FakeMysqlDaemon.SetReplicationSourceInputs = append(goodReplica1.FakeMysqlDaemon.SetReplicationSourceInputs, topoproto.MysqlAddr(newPrimary.Tablet), topoproto.MysqlAddr(oldPrimary.Tablet))
   839  	goodReplica1.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{
   840  		// These 4 statements come from tablet startup
   841  		"STOP SLAVE",
   842  		"RESET SLAVE ALL",
   843  		"FAKE SET MASTER",
   844  		"START SLAVE",
   845  		"STOP SLAVE",
   846  		"RESET SLAVE ALL",
   847  		"FAKE SET MASTER",
   848  		"START SLAVE",
   849  	}
   850  	goodReplica1.StartActionLoop(t, wr)
   851  	defer goodReplica1.StopActionLoop(t)
   852  
   853  	// good replica 2 is not replicating
   854  	goodReplica2.FakeMysqlDaemon.ReadOnly = true
   855  	goodReplica2.FakeMysqlDaemon.SetReplicationSourceInputs = append(goodReplica2.FakeMysqlDaemon.SetReplicationSourceInputs, topoproto.MysqlAddr(newPrimary.Tablet), topoproto.MysqlAddr(oldPrimary.Tablet))
   856  	goodReplica2.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{
   857  		// These 4 statements come from tablet startup
   858  		"STOP SLAVE",
   859  		"RESET SLAVE ALL",
   860  		"FAKE SET MASTER",
   861  		"START SLAVE",
   862  		"RESET SLAVE ALL",
   863  		"FAKE SET MASTER",
   864  	}
   865  	goodReplica2.StartActionLoop(t, wr)
   866  	goodReplica2.FakeMysqlDaemon.Replicating = false
   867  	defer goodReplica2.StopActionLoop(t)
   868  
   869  	// run PlannedReparentShard
   870  	err := vp.Run([]string{"PlannedReparentShard", "--wait_replicas_timeout", "10s", "--keyspace_shard", newPrimary.Tablet.Keyspace + "/" + newPrimary.Tablet.Shard, "--new_primary", topoproto.TabletAliasString(newPrimary.Tablet.Alias)})
   871  
   872  	assert.Error(t, err)
   873  	assert.Contains(t, err.Error(), "some error")
   874  
   875  	// when promote fails, we don't call UndoDemotePrimary, so the old primary should be read-only
   876  	assert.True(t, newPrimary.FakeMysqlDaemon.ReadOnly, "newPrimary.FakeMysqlDaemon.ReadOnly")
   877  	assert.True(t, oldPrimary.FakeMysqlDaemon.ReadOnly, "oldPrimary.FakeMysqlDaemon.ReadOnly")
   878  
   879  	// retrying should work
   880  	newPrimary.FakeMysqlDaemon.PromoteError = nil
   881  	newPrimary.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{
   882  		"STOP SLAVE",
   883  		"RESET SLAVE ALL",
   884  		"FAKE SET MASTER",
   885  		"START SLAVE",
   886  		// extra commands because of retry
   887  		"STOP SLAVE",
   888  		"RESET SLAVE ALL",
   889  		"FAKE SET MASTER",
   890  		"START SLAVE",
   891  		"SUBINSERT INTO _vt.reparent_journal (time_created_ns, action_name, primary_alias, replication_position) VALUES",
   892  	}
   893  	oldPrimary.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{
   894  		"RESET SLAVE ALL",
   895  		"FAKE SET MASTER",
   896  		"START SLAVE",
   897  		// extra commands because of retry
   898  		"RESET SLAVE ALL",
   899  		"FAKE SET MASTER",
   900  		"START SLAVE",
   901  	}
   902  
   903  	// run PlannedReparentShard
   904  	err = vp.Run([]string{"PlannedReparentShard", "--wait_replicas_timeout", "10s", "--keyspace_shard", newPrimary.Tablet.Keyspace + "/" + newPrimary.Tablet.Shard, "--new_primary", topoproto.TabletAliasString(newPrimary.Tablet.Alias)})
   905  	require.NoError(t, err)
   906  
   907  	// check that primary changed correctly
   908  	assert.False(t, newPrimary.FakeMysqlDaemon.ReadOnly, "newPrimary.FakeMysqlDaemon.ReadOnly")
   909  	assert.True(t, oldPrimary.FakeMysqlDaemon.ReadOnly, "oldPrimary.FakeMysqlDaemon.ReadOnly")
   910  }
   911  
   912  // TestPlannedReparentShardSamePrimary tests PRS with oldPrimary works correctly
   913  // Simulate failure of previous PRS and oldPrimary is ReadOnly
   914  // Verify that primary correctly gets set to ReadWrite
   915  func TestPlannedReparentShardSamePrimary(t *testing.T) {
   916  	delay := discovery.GetTabletPickerRetryDelay()
   917  	defer func() {
   918  		discovery.SetTabletPickerRetryDelay(delay)
   919  	}()
   920  	discovery.SetTabletPickerRetryDelay(5 * time.Millisecond)
   921  
   922  	ts := memorytopo.NewServer("cell1", "cell2")
   923  	wr := wrangler.New(logutil.NewConsoleLogger(), ts, tmclient.NewTabletManagerClient())
   924  	vp := NewVtctlPipe(t, ts)
   925  	defer vp.Close()
   926  
   927  	// Create a primary, a couple good replicas
   928  	oldPrimary := NewFakeTablet(t, wr, "cell1", 0, topodatapb.TabletType_PRIMARY, nil)
   929  	goodReplica1 := NewFakeTablet(t, wr, "cell1", 2, topodatapb.TabletType_REPLICA, nil)
   930  	goodReplica2 := NewFakeTablet(t, wr, "cell2", 3, topodatapb.TabletType_REPLICA, nil)
   931  
   932  	// old primary
   933  	oldPrimary.FakeMysqlDaemon.ReadOnly = true
   934  	oldPrimary.FakeMysqlDaemon.Replicating = false
   935  	oldPrimary.FakeMysqlDaemon.ReplicationStatusError = mysql.ErrNotReplica
   936  	oldPrimary.FakeMysqlDaemon.CurrentPrimaryPosition = mysql.Position{
   937  		GTIDSet: mysql.MariadbGTIDSet{
   938  			7: mysql.MariadbGTID{
   939  				Domain:   7,
   940  				Server:   123,
   941  				Sequence: 990,
   942  			},
   943  		},
   944  	}
   945  	oldPrimary.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{
   946  		"SUBINSERT INTO _vt.reparent_journal (time_created_ns, action_name, primary_alias, replication_position) VALUES",
   947  	}
   948  	oldPrimary.StartActionLoop(t, wr)
   949  	defer oldPrimary.StopActionLoop(t)
   950  	oldPrimary.TM.QueryServiceControl.(*tabletservermock.Controller).SetQueryServiceEnabledForTests(true)
   951  
   952  	// good replica 1 is replicating
   953  	goodReplica1.FakeMysqlDaemon.ReadOnly = true
   954  	goodReplica1.FakeMysqlDaemon.Replicating = true
   955  	goodReplica1.FakeMysqlDaemon.SetReplicationSourceInputs = append(goodReplica1.FakeMysqlDaemon.SetReplicationSourceInputs, topoproto.MysqlAddr(oldPrimary.Tablet))
   956  	goodReplica1.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{
   957  		// These 4 statements come from tablet startup
   958  		"STOP SLAVE",
   959  		"RESET SLAVE ALL",
   960  		"FAKE SET MASTER",
   961  		"START SLAVE",
   962  		"STOP SLAVE",
   963  		"RESET SLAVE ALL",
   964  		"FAKE SET MASTER",
   965  		"START SLAVE",
   966  	}
   967  	goodReplica1.StartActionLoop(t, wr)
   968  	defer goodReplica1.StopActionLoop(t)
   969  
   970  	// goodReplica2 is not replicating
   971  	goodReplica2.FakeMysqlDaemon.ReadOnly = true
   972  	goodReplica2.FakeMysqlDaemon.SetReplicationSourceInputs = append(goodReplica2.FakeMysqlDaemon.SetReplicationSourceInputs, topoproto.MysqlAddr(oldPrimary.Tablet))
   973  	goodReplica2.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{
   974  		// These 4 statements come from tablet startup
   975  		"STOP SLAVE",
   976  		"RESET SLAVE ALL",
   977  		"FAKE SET MASTER",
   978  		"START SLAVE",
   979  		"RESET SLAVE ALL",
   980  		"FAKE SET MASTER",
   981  	}
   982  	goodReplica2.StartActionLoop(t, wr)
   983  	goodReplica2.FakeMysqlDaemon.Replicating = false
   984  	defer goodReplica2.StopActionLoop(t)
   985  
   986  	// run PlannedReparentShard
   987  	err := vp.Run([]string{"PlannedReparentShard", "--wait_replicas_timeout", "10s", "--keyspace_shard", oldPrimary.Tablet.Keyspace + "/" + oldPrimary.Tablet.Shard, "--new_primary", topoproto.TabletAliasString(oldPrimary.Tablet.Alias)})
   988  	require.NoError(t, err)
   989  	assert.False(t, oldPrimary.FakeMysqlDaemon.ReadOnly, "oldPrimary.FakeMysqlDaemon.ReadOnly")
   990  }