vitess.io/vitess@v0.16.2/go/test/endtoend/reparent/newfeaturetest/reparent_test.go (about) 1 /* 2 Copyright 2022 The Vitess Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package newfeaturetest 18 19 import ( 20 "testing" 21 22 "github.com/stretchr/testify/require" 23 24 "vitess.io/vitess/go/test/endtoend/cluster" 25 "vitess.io/vitess/go/test/endtoend/reparent/utils" 26 ) 27 28 // TestRecoverWithMultipleVttabletFailures tests that ERS succeeds with the default values 29 // even when there are multiple vttablet failures. In this test we use the semi_sync policy 30 // to allow multiple failures to happen and still be recoverable. 31 // The test takes down the vttablets of the primary and a rdonly tablet and runs ERS with the 32 // default values of remote_operation_timeout, lock-timeout flags and wait_replicas_timeout subflag. 33 func TestRecoverWithMultipleVttabletFailures(t *testing.T) { 34 defer cluster.PanicHandler(t) 35 clusterInstance := utils.SetupReparentCluster(t, "semi_sync") 36 defer utils.TeardownCluster(clusterInstance) 37 tablets := clusterInstance.Keyspaces[0].Shards[0].Vttablets 38 utils.ConfirmReplication(t, tablets[0], []*cluster.Vttablet{tablets[1], tablets[2], tablets[3]}) 39 40 // make tablets[1] a rdonly tablet. 41 err := clusterInstance.VtctlclientProcess.ExecuteCommand("ChangeTabletType", tablets[1].Alias, "rdonly") 42 require.NoError(t, err) 43 44 // Confirm that replication is still working as intended 45 utils.ConfirmReplication(t, tablets[0], tablets[1:]) 46 47 // Make the rdonly and primary tablets and databases unavailable. 48 utils.StopTablet(t, tablets[1], true) 49 utils.StopTablet(t, tablets[0], true) 50 51 // We expect this to succeed since we only have 1 primary eligible tablet which is down 52 out, err := utils.Ers(clusterInstance, nil, "", "") 53 require.NoError(t, err, out) 54 55 newPrimary := utils.GetNewPrimary(t, clusterInstance) 56 utils.ConfirmReplication(t, newPrimary, []*cluster.Vttablet{tablets[2], tablets[3]}) 57 } 58 59 // TetsSingeReplicaERS tests that ERS works even when there is only 1 tablet left 60 // as long the durability policy allows this failure. Moreover, this also tests that the 61 // replica is one such that it was a primary itself before. This way its executed gtid set 62 // will have atleast 2 tablets in it. We want to make sure this tablet is not marked as errant 63 // and ERS succeeds. 64 func TestSingleReplicaERS(t *testing.T) { 65 // Set up a cluster with none durability policy 66 defer cluster.PanicHandler(t) 67 clusterInstance := utils.SetupReparentCluster(t, "none") 68 defer utils.TeardownCluster(clusterInstance) 69 tablets := clusterInstance.Keyspaces[0].Shards[0].Vttablets 70 // Confirm that the replication is setup correctly in the beginning. 71 // tablets[0] is the primary tablet in the beginning. 72 utils.ConfirmReplication(t, tablets[0], []*cluster.Vttablet{tablets[1], tablets[2], tablets[3]}) 73 74 // Delete and stop two tablets. We only want to have 2 tablets for this test. 75 utils.DeleteTablet(t, clusterInstance, tablets[2]) 76 utils.DeleteTablet(t, clusterInstance, tablets[3]) 77 utils.StopTablet(t, tablets[2], true) 78 utils.StopTablet(t, tablets[3], true) 79 80 // Reparent to the other replica 81 output, err := utils.Prs(t, clusterInstance, tablets[1]) 82 require.NoError(t, err, "error in PlannedReparentShard output - %s", output) 83 84 // Check the replication is set up correctly before we failover 85 utils.ConfirmReplication(t, tablets[1], []*cluster.Vttablet{tablets[0]}) 86 87 // Make the current primary vttablet unavailable. 88 utils.StopTablet(t, tablets[1], true) 89 90 // Run an ERS with only one replica reachable. Also, this replica is such that it was a primary before. 91 output, err = utils.Ers(clusterInstance, tablets[0], "", "") 92 require.NoError(t, err, "error in Emergency Reparent Shard output - %s", output) 93 94 // Check the tablet is indeed promoted 95 utils.CheckPrimaryTablet(t, clusterInstance, tablets[0]) 96 // Also check the writes succeed after failover 97 utils.ConfirmReplication(t, tablets[0], []*cluster.Vttablet{}) 98 } 99 100 // TestTabletRestart tests that a running tablet can be restarted and everything is still fine 101 func TestTabletRestart(t *testing.T) { 102 defer cluster.PanicHandler(t) 103 clusterInstance := utils.SetupReparentCluster(t, "semi_sync") 104 defer utils.TeardownCluster(clusterInstance) 105 tablets := clusterInstance.Keyspaces[0].Shards[0].Vttablets 106 107 utils.StopTablet(t, tablets[1], false) 108 tablets[1].VttabletProcess.ServingStatus = "SERVING" 109 err := tablets[1].VttabletProcess.Setup() 110 require.NoError(t, err) 111 }