vitess.io/vitess@v0.16.2/go/test/endtoend/reparent/newfeaturetest/reparent_test.go (about)

     1  /*
     2  Copyright 2022 The Vitess Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package newfeaturetest
    18  
    19  import (
    20  	"testing"
    21  
    22  	"github.com/stretchr/testify/require"
    23  
    24  	"vitess.io/vitess/go/test/endtoend/cluster"
    25  	"vitess.io/vitess/go/test/endtoend/reparent/utils"
    26  )
    27  
    28  // TestRecoverWithMultipleVttabletFailures tests that ERS succeeds with the default values
    29  // even when there are multiple vttablet failures. In this test we use the semi_sync policy
    30  // to allow multiple failures to happen and still be recoverable.
    31  // The test takes down the vttablets of the primary and a rdonly tablet and runs ERS with the
    32  // default values of remote_operation_timeout, lock-timeout flags and wait_replicas_timeout subflag.
    33  func TestRecoverWithMultipleVttabletFailures(t *testing.T) {
    34  	defer cluster.PanicHandler(t)
    35  	clusterInstance := utils.SetupReparentCluster(t, "semi_sync")
    36  	defer utils.TeardownCluster(clusterInstance)
    37  	tablets := clusterInstance.Keyspaces[0].Shards[0].Vttablets
    38  	utils.ConfirmReplication(t, tablets[0], []*cluster.Vttablet{tablets[1], tablets[2], tablets[3]})
    39  
    40  	// make tablets[1] a rdonly tablet.
    41  	err := clusterInstance.VtctlclientProcess.ExecuteCommand("ChangeTabletType", tablets[1].Alias, "rdonly")
    42  	require.NoError(t, err)
    43  
    44  	// Confirm that replication is still working as intended
    45  	utils.ConfirmReplication(t, tablets[0], tablets[1:])
    46  
    47  	// Make the rdonly and primary tablets and databases unavailable.
    48  	utils.StopTablet(t, tablets[1], true)
    49  	utils.StopTablet(t, tablets[0], true)
    50  
    51  	// We expect this to succeed since we only have 1 primary eligible tablet which is down
    52  	out, err := utils.Ers(clusterInstance, nil, "", "")
    53  	require.NoError(t, err, out)
    54  
    55  	newPrimary := utils.GetNewPrimary(t, clusterInstance)
    56  	utils.ConfirmReplication(t, newPrimary, []*cluster.Vttablet{tablets[2], tablets[3]})
    57  }
    58  
    59  // TetsSingeReplicaERS tests that ERS works even when there is only 1 tablet left
    60  // as long the durability policy allows this failure. Moreover, this also tests that the
    61  // replica is one such that it was a primary itself before. This way its executed gtid set
    62  // will have atleast 2 tablets in it. We want to make sure this tablet is not marked as errant
    63  // and ERS succeeds.
    64  func TestSingleReplicaERS(t *testing.T) {
    65  	// Set up a cluster with none durability policy
    66  	defer cluster.PanicHandler(t)
    67  	clusterInstance := utils.SetupReparentCluster(t, "none")
    68  	defer utils.TeardownCluster(clusterInstance)
    69  	tablets := clusterInstance.Keyspaces[0].Shards[0].Vttablets
    70  	// Confirm that the replication is setup correctly in the beginning.
    71  	// tablets[0] is the primary tablet in the beginning.
    72  	utils.ConfirmReplication(t, tablets[0], []*cluster.Vttablet{tablets[1], tablets[2], tablets[3]})
    73  
    74  	// Delete and stop two tablets. We only want to have 2 tablets for this test.
    75  	utils.DeleteTablet(t, clusterInstance, tablets[2])
    76  	utils.DeleteTablet(t, clusterInstance, tablets[3])
    77  	utils.StopTablet(t, tablets[2], true)
    78  	utils.StopTablet(t, tablets[3], true)
    79  
    80  	// Reparent to the other replica
    81  	output, err := utils.Prs(t, clusterInstance, tablets[1])
    82  	require.NoError(t, err, "error in PlannedReparentShard output - %s", output)
    83  
    84  	// Check the replication is set up correctly before we failover
    85  	utils.ConfirmReplication(t, tablets[1], []*cluster.Vttablet{tablets[0]})
    86  
    87  	// Make the current primary vttablet unavailable.
    88  	utils.StopTablet(t, tablets[1], true)
    89  
    90  	// Run an ERS with only one replica reachable. Also, this replica is such that it was a primary before.
    91  	output, err = utils.Ers(clusterInstance, tablets[0], "", "")
    92  	require.NoError(t, err, "error in Emergency Reparent Shard output - %s", output)
    93  
    94  	// Check the tablet is indeed promoted
    95  	utils.CheckPrimaryTablet(t, clusterInstance, tablets[0])
    96  	// Also check the writes succeed after failover
    97  	utils.ConfirmReplication(t, tablets[0], []*cluster.Vttablet{})
    98  }
    99  
   100  // TestTabletRestart tests that a running tablet can be  restarted and everything is still fine
   101  func TestTabletRestart(t *testing.T) {
   102  	defer cluster.PanicHandler(t)
   103  	clusterInstance := utils.SetupReparentCluster(t, "semi_sync")
   104  	defer utils.TeardownCluster(clusterInstance)
   105  	tablets := clusterInstance.Keyspaces[0].Shards[0].Vttablets
   106  
   107  	utils.StopTablet(t, tablets[1], false)
   108  	tablets[1].VttabletProcess.ServingStatus = "SERVING"
   109  	err := tablets[1].VttabletProcess.Setup()
   110  	require.NoError(t, err)
   111  }