vitess.io/vitess@v0.16.2/go/vt/wrangler/testlib/emergency_reparent_shard_test.go (about)

     1  /*
     2  Copyright 2019 The Vitess Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package testlib
    18  
    19  import (
    20  	"context"
    21  	"fmt"
    22  	"testing"
    23  	"time"
    24  
    25  	"github.com/stretchr/testify/assert"
    26  	"github.com/stretchr/testify/require"
    27  	"k8s.io/apimachinery/pkg/util/sets"
    28  
    29  	"vitess.io/vitess/go/mysql"
    30  	"vitess.io/vitess/go/vt/discovery"
    31  	"vitess.io/vitess/go/vt/logutil"
    32  	"vitess.io/vitess/go/vt/topo/memorytopo"
    33  	"vitess.io/vitess/go/vt/topo/topoproto"
    34  	"vitess.io/vitess/go/vt/vtctl/reparentutil/reparenttestutil"
    35  	"vitess.io/vitess/go/vt/vttablet/tmclient"
    36  	"vitess.io/vitess/go/vt/wrangler"
    37  
    38  	topodatapb "vitess.io/vitess/go/vt/proto/topodata"
    39  )
    40  
    41  func TestEmergencyReparentShard(t *testing.T) {
    42  	delay := discovery.GetTabletPickerRetryDelay()
    43  	defer func() {
    44  		discovery.SetTabletPickerRetryDelay(delay)
    45  	}()
    46  	discovery.SetTabletPickerRetryDelay(5 * time.Millisecond)
    47  
    48  	ts := memorytopo.NewServer("cell1", "cell2")
    49  	wr := wrangler.New(logutil.NewConsoleLogger(), ts, tmclient.NewTabletManagerClient())
    50  	vp := NewVtctlPipe(t, ts)
    51  	defer vp.Close()
    52  
    53  	// Create a primary, a couple good replicas
    54  	oldPrimary := NewFakeTablet(t, wr, "cell1", 0, topodatapb.TabletType_PRIMARY, nil)
    55  	newPrimary := NewFakeTablet(t, wr, "cell1", 1, topodatapb.TabletType_REPLICA, nil)
    56  	goodReplica1 := NewFakeTablet(t, wr, "cell1", 2, topodatapb.TabletType_REPLICA, nil)
    57  	goodReplica2 := NewFakeTablet(t, wr, "cell2", 3, topodatapb.TabletType_REPLICA, nil)
    58  	reparenttestutil.SetKeyspaceDurability(context.Background(), t, ts, "test_keyspace", "semi_sync")
    59  
    60  	oldPrimary.FakeMysqlDaemon.Replicating = false
    61  	oldPrimary.FakeMysqlDaemon.CurrentPrimaryPosition = mysql.Position{
    62  		GTIDSet: mysql.MariadbGTIDSet{
    63  			2: mysql.MariadbGTID{
    64  				Domain:   2,
    65  				Server:   123,
    66  				Sequence: 456,
    67  			},
    68  		},
    69  	}
    70  	currentPrimaryFilePosition, _ := mysql.ParseFilePosGTIDSet("mariadb-bin.000010:456")
    71  	oldPrimary.FakeMysqlDaemon.CurrentSourceFilePosition = mysql.Position{
    72  		GTIDSet: currentPrimaryFilePosition,
    73  	}
    74  
    75  	// new primary
    76  	newPrimary.FakeMysqlDaemon.ReadOnly = true
    77  	newPrimary.FakeMysqlDaemon.Replicating = true
    78  	newPrimary.FakeMysqlDaemon.CurrentPrimaryPosition = mysql.Position{
    79  		GTIDSet: mysql.MariadbGTIDSet{
    80  			2: mysql.MariadbGTID{
    81  				Domain:   2,
    82  				Server:   123,
    83  				Sequence: 456,
    84  			},
    85  		},
    86  	}
    87  	newPrimaryRelayLogPos, _ := mysql.ParseFilePosGTIDSet("relay-bin.000004:456")
    88  	newPrimary.FakeMysqlDaemon.CurrentSourceFilePosition = mysql.Position{
    89  		GTIDSet: newPrimaryRelayLogPos,
    90  	}
    91  	newPrimary.FakeMysqlDaemon.WaitPrimaryPositions = append(newPrimary.FakeMysqlDaemon.WaitPrimaryPositions, newPrimary.FakeMysqlDaemon.CurrentSourceFilePosition)
    92  	newPrimary.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{
    93  		"STOP SLAVE IO_THREAD",
    94  		"SUBINSERT INTO _vt.reparent_journal (time_created_ns, action_name, primary_alias, replication_position) VALUES",
    95  	}
    96  	newPrimary.FakeMysqlDaemon.PromoteResult = mysql.Position{
    97  		GTIDSet: mysql.MariadbGTIDSet{
    98  			2: mysql.MariadbGTID{
    99  				Domain:   2,
   100  				Server:   123,
   101  				Sequence: 456,
   102  			},
   103  		},
   104  	}
   105  	newPrimary.StartActionLoop(t, wr)
   106  	defer newPrimary.StopActionLoop(t)
   107  
   108  	// old primary, will be scrapped
   109  	oldPrimary.FakeMysqlDaemon.ReadOnly = false
   110  	oldPrimary.FakeMysqlDaemon.ReplicationStatusError = mysql.ErrNotReplica
   111  	oldPrimary.FakeMysqlDaemon.SetReplicationSourceInputs = append(oldPrimary.FakeMysqlDaemon.SetReplicationSourceInputs, topoproto.MysqlAddr(newPrimary.Tablet))
   112  	oldPrimary.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{
   113  		"STOP SLAVE",
   114  	}
   115  	oldPrimary.StartActionLoop(t, wr)
   116  	defer oldPrimary.StopActionLoop(t)
   117  
   118  	// good replica 1 is replicating
   119  	goodReplica1.FakeMysqlDaemon.ReadOnly = true
   120  	goodReplica1.FakeMysqlDaemon.Replicating = true
   121  	goodReplica1.FakeMysqlDaemon.CurrentPrimaryPosition = mysql.Position{
   122  		GTIDSet: mysql.MariadbGTIDSet{
   123  			2: mysql.MariadbGTID{
   124  				Domain:   2,
   125  				Server:   123,
   126  				Sequence: 455,
   127  			},
   128  		},
   129  	}
   130  	goodReplica1RelayLogPos, _ := mysql.ParseFilePosGTIDSet("relay-bin.000004:455")
   131  	goodReplica1.FakeMysqlDaemon.CurrentSourceFilePosition = mysql.Position{
   132  		GTIDSet: goodReplica1RelayLogPos,
   133  	}
   134  	goodReplica1.FakeMysqlDaemon.WaitPrimaryPositions = append(goodReplica1.FakeMysqlDaemon.WaitPrimaryPositions, goodReplica1.FakeMysqlDaemon.CurrentSourceFilePosition)
   135  	goodReplica1.FakeMysqlDaemon.SetReplicationSourceInputs = append(goodReplica1.FakeMysqlDaemon.SetReplicationSourceInputs, topoproto.MysqlAddr(newPrimary.Tablet), topoproto.MysqlAddr(oldPrimary.Tablet))
   136  	goodReplica1.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{
   137  		// These 4 statements come from tablet startup
   138  		"STOP SLAVE",
   139  		"RESET SLAVE ALL",
   140  		"FAKE SET MASTER",
   141  		"START SLAVE",
   142  		"STOP SLAVE IO_THREAD",
   143  		"STOP SLAVE",
   144  		"RESET SLAVE ALL",
   145  		"FAKE SET MASTER",
   146  		"START SLAVE",
   147  	}
   148  	goodReplica1.StartActionLoop(t, wr)
   149  	defer goodReplica1.StopActionLoop(t)
   150  
   151  	// good replica 2 is not replicating
   152  	goodReplica2.FakeMysqlDaemon.ReadOnly = true
   153  	goodReplica2.FakeMysqlDaemon.Replicating = false
   154  	goodReplica2.FakeMysqlDaemon.CurrentPrimaryPosition = mysql.Position{
   155  		GTIDSet: mysql.MariadbGTIDSet{
   156  			2: mysql.MariadbGTID{
   157  				Domain:   2,
   158  				Server:   123,
   159  				Sequence: 454,
   160  			},
   161  		},
   162  	}
   163  	goodReplica2RelayLogPos, _ := mysql.ParseFilePosGTIDSet("relay-bin.000004:454")
   164  	goodReplica2.FakeMysqlDaemon.CurrentSourceFilePosition = mysql.Position{
   165  		GTIDSet: goodReplica2RelayLogPos,
   166  	}
   167  	goodReplica2.FakeMysqlDaemon.WaitPrimaryPositions = append(goodReplica2.FakeMysqlDaemon.WaitPrimaryPositions, goodReplica2.FakeMysqlDaemon.CurrentSourceFilePosition)
   168  	goodReplica2.FakeMysqlDaemon.SetReplicationSourceInputs = append(goodReplica2.FakeMysqlDaemon.SetReplicationSourceInputs, topoproto.MysqlAddr(newPrimary.Tablet), topoproto.MysqlAddr(oldPrimary.Tablet))
   169  	goodReplica2.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{
   170  		// These 4 statements come from tablet startup
   171  		"STOP SLAVE",
   172  		"RESET SLAVE ALL",
   173  		"FAKE SET MASTER",
   174  		"START SLAVE",
   175  		"RESET SLAVE ALL",
   176  		"FAKE SET MASTER",
   177  	}
   178  	goodReplica2.StartActionLoop(t, wr)
   179  	defer goodReplica2.StopActionLoop(t)
   180  
   181  	// run EmergencyReparentShard
   182  	waitReplicaTimeout := time.Second * 2
   183  	err := vp.Run([]string{"EmergencyReparentShard", "--wait_replicas_timeout", waitReplicaTimeout.String(), newPrimary.Tablet.Keyspace + "/" + newPrimary.Tablet.Shard,
   184  		topoproto.TabletAliasString(newPrimary.Tablet.Alias)})
   185  	require.NoError(t, err)
   186  	// check what was run
   187  	err = newPrimary.FakeMysqlDaemon.CheckSuperQueryList()
   188  	require.NoError(t, err)
   189  
   190  	assert.False(t, newPrimary.FakeMysqlDaemon.ReadOnly, "newPrimary.FakeMysqlDaemon.ReadOnly set")
   191  	checkSemiSyncEnabled(t, true, true, newPrimary)
   192  }
   193  
   194  // TestEmergencyReparentShardPrimaryElectNotBest tries to emergency reparent
   195  // to a host that is not the latest in replication position.
   196  func TestEmergencyReparentShardPrimaryElectNotBest(t *testing.T) {
   197  	ctx, cancel := context.WithTimeout(context.Background(), time.Second*30)
   198  	defer cancel()
   199  
   200  	delay := discovery.GetTabletPickerRetryDelay()
   201  	defer func() {
   202  		discovery.SetTabletPickerRetryDelay(delay)
   203  	}()
   204  	discovery.SetTabletPickerRetryDelay(5 * time.Millisecond)
   205  
   206  	ts := memorytopo.NewServer("cell1", "cell2")
   207  	wr := wrangler.New(logutil.NewConsoleLogger(), ts, tmclient.NewTabletManagerClient())
   208  
   209  	// Create a primary, a couple good replicas
   210  	oldPrimary := NewFakeTablet(t, wr, "cell1", 0, topodatapb.TabletType_PRIMARY, nil)
   211  	newPrimary := NewFakeTablet(t, wr, "cell1", 1, topodatapb.TabletType_REPLICA, nil)
   212  	moreAdvancedReplica := NewFakeTablet(t, wr, "cell1", 2, topodatapb.TabletType_REPLICA, nil)
   213  	reparenttestutil.SetKeyspaceDurability(context.Background(), t, ts, "test_keyspace", "semi_sync")
   214  
   215  	// new primary
   216  	newPrimary.FakeMysqlDaemon.Replicating = true
   217  	// It has transactions in its relay log, but not as many as
   218  	// moreAdvancedReplica
   219  	newPrimary.FakeMysqlDaemon.CurrentPrimaryPosition = mysql.Position{
   220  		GTIDSet: mysql.MariadbGTIDSet{
   221  			2: mysql.MariadbGTID{
   222  				Domain:   2,
   223  				Server:   123,
   224  				Sequence: 456,
   225  			},
   226  		},
   227  	}
   228  	newPrimaryRelayLogPos, _ := mysql.ParseFilePosGTIDSet("relay-bin.000004:456")
   229  	newPrimary.FakeMysqlDaemon.CurrentSourceFilePosition = mysql.Position{
   230  		GTIDSet: newPrimaryRelayLogPos,
   231  	}
   232  	newPrimary.FakeMysqlDaemon.WaitPrimaryPositions = append(newPrimary.FakeMysqlDaemon.WaitPrimaryPositions, newPrimary.FakeMysqlDaemon.CurrentSourceFilePosition)
   233  	newPrimary.FakeMysqlDaemon.SetReplicationSourceInputs = append(newPrimary.FakeMysqlDaemon.SetReplicationSourceInputs, topoproto.MysqlAddr(moreAdvancedReplica.Tablet))
   234  	newPrimary.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{
   235  		"STOP SLAVE IO_THREAD",
   236  		"STOP SLAVE",
   237  		"RESET SLAVE ALL",
   238  		"FAKE SET MASTER",
   239  		"START SLAVE",
   240  		"SUBINSERT INTO _vt.reparent_journal (time_created_ns, action_name, primary_alias, replication_position) VALUES",
   241  	}
   242  	newPrimary.StartActionLoop(t, wr)
   243  	defer newPrimary.StopActionLoop(t)
   244  
   245  	// old primary, will be scrapped
   246  	oldPrimary.FakeMysqlDaemon.ReplicationStatusError = fmt.Errorf("old primary stopped working")
   247  	oldPrimary.StartActionLoop(t, wr)
   248  	defer oldPrimary.StopActionLoop(t)
   249  
   250  	// more advanced replica
   251  	moreAdvancedReplica.FakeMysqlDaemon.Replicating = true
   252  	// relay log position is more advanced than desired new primary
   253  	moreAdvancedReplica.FakeMysqlDaemon.CurrentPrimaryPosition = mysql.Position{
   254  		GTIDSet: mysql.MariadbGTIDSet{
   255  			2: mysql.MariadbGTID{
   256  				Domain:   2,
   257  				Server:   123,
   258  				Sequence: 457,
   259  			},
   260  		},
   261  	}
   262  	moreAdvancedReplicaLogPos, _ := mysql.ParseFilePosGTIDSet("relay-bin.000004:457")
   263  	moreAdvancedReplica.FakeMysqlDaemon.CurrentSourceFilePosition = mysql.Position{
   264  		GTIDSet: moreAdvancedReplicaLogPos,
   265  	}
   266  	moreAdvancedReplica.FakeMysqlDaemon.SetReplicationSourceInputs = append(moreAdvancedReplica.FakeMysqlDaemon.SetReplicationSourceInputs, topoproto.MysqlAddr(newPrimary.Tablet), topoproto.MysqlAddr(oldPrimary.Tablet))
   267  	moreAdvancedReplica.FakeMysqlDaemon.WaitPrimaryPositions = append(moreAdvancedReplica.FakeMysqlDaemon.WaitPrimaryPositions, moreAdvancedReplica.FakeMysqlDaemon.CurrentSourceFilePosition)
   268  	newPrimary.FakeMysqlDaemon.WaitPrimaryPositions = append(newPrimary.FakeMysqlDaemon.WaitPrimaryPositions, moreAdvancedReplica.FakeMysqlDaemon.CurrentPrimaryPosition)
   269  	moreAdvancedReplica.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{
   270  		// These 4 statements come from tablet startup
   271  		"STOP SLAVE",
   272  		"RESET SLAVE ALL",
   273  		"FAKE SET MASTER",
   274  		"START SLAVE",
   275  		"STOP SLAVE IO_THREAD",
   276  		"STOP SLAVE",
   277  		"RESET SLAVE ALL",
   278  		"FAKE SET MASTER",
   279  		"START SLAVE",
   280  	}
   281  	moreAdvancedReplica.StartActionLoop(t, wr)
   282  	defer moreAdvancedReplica.StopActionLoop(t)
   283  
   284  	// run EmergencyReparentShard
   285  	err := wr.EmergencyReparentShard(ctx, newPrimary.Tablet.Keyspace, newPrimary.Tablet.Shard, newPrimary.Tablet.Alias, 10*time.Second, sets.New[string](), false)
   286  	cancel()
   287  
   288  	assert.NoError(t, err)
   289  	// check what was run
   290  	err = newPrimary.FakeMysqlDaemon.CheckSuperQueryList()
   291  	require.NoError(t, err)
   292  	err = oldPrimary.FakeMysqlDaemon.CheckSuperQueryList()
   293  	require.NoError(t, err)
   294  	err = moreAdvancedReplica.FakeMysqlDaemon.CheckSuperQueryList()
   295  	require.NoError(t, err)
   296  }