vitess.io/vitess@v0.16.2/go/test/endtoend/tabletgateway/buffer/reparent/failover_buffer_test.go (about)

     1  /*
     2  Copyright 2021 The Vitess Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package reparent
    18  
    19  import (
    20  	"fmt"
    21  	"testing"
    22  	"time"
    23  
    24  	"github.com/stretchr/testify/assert"
    25  	"github.com/stretchr/testify/require"
    26  
    27  	"vitess.io/vitess/go/test/endtoend/cluster"
    28  	"vitess.io/vitess/go/test/endtoend/tabletgateway/buffer"
    29  	"vitess.io/vitess/go/vt/log"
    30  )
    31  
    32  const (
    33  	demoteQuery  = "SET GLOBAL read_only = ON;FLUSH TABLES WITH READ LOCK;UNLOCK TABLES;"
    34  	promoteQuery = "STOP SLAVE;RESET SLAVE ALL;SET GLOBAL read_only = OFF;"
    35  
    36  	hostname = "localhost"
    37  )
    38  
    39  func failoverExternalReparenting(t *testing.T, clusterInstance *cluster.LocalProcessCluster, keyspaceUnshardedName string, reads, writes buffer.QueryEngine) {
    40  	// Execute the failover.
    41  	reads.ExpectQueries(10)
    42  	writes.ExpectQueries(10)
    43  
    44  	start := time.Now()
    45  
    46  	// Demote Query
    47  	primary := clusterInstance.Keyspaces[0].Shards[0].Vttablets[0]
    48  	replica := clusterInstance.Keyspaces[0].Shards[0].Vttablets[1]
    49  	oldPrimary := primary
    50  	newPrimary := replica
    51  	primary.VttabletProcess.QueryTablet(demoteQuery, keyspaceUnshardedName, true)
    52  
    53  	// Wait for replica to catch up to primary.
    54  	cluster.WaitForReplicationPos(t, primary, replica, "localhost", 60.0)
    55  
    56  	duration := time.Since(start)
    57  	minUnavailabilityInS := 1.0
    58  	if duration.Seconds() < minUnavailabilityInS {
    59  		w := minUnavailabilityInS - duration.Seconds()
    60  		log.Infof("Waiting for %.1f seconds because the failover was too fast (took only %.3f seconds)", w, duration.Seconds())
    61  		time.Sleep(time.Duration(w) * time.Second)
    62  	}
    63  
    64  	// Promote replica to new primary.
    65  	replica.VttabletProcess.QueryTablet(promoteQuery, keyspaceUnshardedName, true)
    66  
    67  	// Configure old primary to replicate from new primary.
    68  
    69  	_, gtID := cluster.GetPrimaryPosition(t, *newPrimary, hostname)
    70  
    71  	// Use 'localhost' as hostname because Travis CI worker hostnames
    72  	// are too long for MySQL replication.
    73  	changeSourceCommands := fmt.Sprintf("RESET SLAVE;SET GLOBAL gtid_slave_pos = '%s';CHANGE MASTER TO MASTER_HOST='%s', MASTER_PORT=%d ,MASTER_USER='vt_repl', MASTER_USE_GTID = slave_pos;START SLAVE;", gtID, "localhost", newPrimary.MySQLPort)
    74  	oldPrimary.VttabletProcess.QueryTablet(changeSourceCommands, keyspaceUnshardedName, true)
    75  
    76  	// Notify the new vttablet primary about the reparent.
    77  	err := clusterInstance.VtctlclientProcess.ExecuteCommand("TabletExternallyReparented", newPrimary.Alias)
    78  	require.NoError(t, err)
    79  }
    80  
    81  func failoverPlannedReparenting(t *testing.T, clusterInstance *cluster.LocalProcessCluster, keyspaceUnshardedName string, reads, writes buffer.QueryEngine) {
    82  	// Execute the failover.
    83  	reads.ExpectQueries(10)
    84  	writes.ExpectQueries(10)
    85  
    86  	err := clusterInstance.VtctlclientProcess.ExecuteCommand("PlannedReparentShard", "--", "--keyspace_shard",
    87  		fmt.Sprintf("%s/%s", keyspaceUnshardedName, "0"),
    88  		"--new_primary", clusterInstance.Keyspaces[0].Shards[0].Vttablets[1].Alias)
    89  	require.NoError(t, err)
    90  }
    91  
    92  func assertFailover(t *testing.T, shard string, stats *buffer.VTGateBufferingStats) {
    93  	stopLabel := fmt.Sprintf("%s.%s", shard, "NewPrimarySeen")
    94  
    95  	assert.Greater(t, stats.BufferFailoverDurationSumMs[shard], 0)
    96  	assert.Greater(t, stats.BufferRequestsBuffered[shard], 0)
    97  	assert.Greater(t, stats.BufferStops[stopLabel], 0)
    98  
    99  	// Number of buffering stops must be equal to the number of seen failovers.
   100  	assert.Equal(t, stats.HealthcheckPrimaryPromoted[shard], stats.BufferStops[stopLabel])
   101  }
   102  
   103  func TestBufferReparenting(t *testing.T) {
   104  	t.Run("TER without reserved connection", func(t *testing.T) {
   105  		bt := &buffer.BufferingTest{
   106  			Assert:      assertFailover,
   107  			Failover:    failoverExternalReparenting,
   108  			ReserveConn: false,
   109  		}
   110  		bt.Test(t)
   111  	})
   112  	t.Run("TER with reserved connection", func(t *testing.T) {
   113  		bt := &buffer.BufferingTest{
   114  			Assert:      assertFailover,
   115  			Failover:    failoverExternalReparenting,
   116  			ReserveConn: true,
   117  		}
   118  		bt.Test(t)
   119  	})
   120  	t.Run("PRS without reserved connections", func(t *testing.T) {
   121  		bt := &buffer.BufferingTest{
   122  			Assert:      assertFailover,
   123  			Failover:    failoverPlannedReparenting,
   124  			ReserveConn: false,
   125  		}
   126  		bt.Test(t)
   127  	})
   128  	t.Run("PRS with reserved connections", func(t *testing.T) {
   129  		bt := &buffer.BufferingTest{
   130  			Assert:      assertFailover,
   131  			Failover:    failoverPlannedReparenting,
   132  			ReserveConn: true,
   133  		}
   134  		bt.Test(t)
   135  	})
   136  }