vitess.io/vitess@v0.16.2/go/test/endtoend/vtgate/tablet_healthcheck_cache/correctness_test.go (about)

     1  /*
     2  Copyright 2021 The Vitess Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package tablethealthcheckcache
    18  
    19  import (
    20  	"context"
    21  	"flag"
    22  	"fmt"
    23  	"os"
    24  	"sync"
    25  	"testing"
    26  	"time"
    27  
    28  	"github.com/stretchr/testify/assert"
    29  	"github.com/stretchr/testify/require"
    30  
    31  	"vitess.io/vitess/go/mysql"
    32  	"vitess.io/vitess/go/test/endtoend/cluster"
    33  )
    34  
    35  var (
    36  	clusterInstance       *cluster.LocalProcessCluster
    37  	vtParams              mysql.ConnParams
    38  	tabletRefreshInterval = 5 * time.Second
    39  	keyspaceName          = "healthcheck_test_ks"
    40  	cell                  = "healthcheck_test_cell"
    41  	shards                = []string{"-80", "80-"}
    42  	schemaSQL             = `
    43  create table customer(
    44  	customer_id bigint not null auto_increment,
    45  	email varbinary(128),
    46  	primary key(customer_id)
    47  ) ENGINE=InnoDB;
    48  create table corder(
    49  	order_id bigint not null auto_increment,
    50  	customer_id bigint,
    51  	sku varbinary(128),
    52  	price bigint,
    53  	primary key(order_id)
    54  ) ENGINE=InnoDB;
    55  `
    56  
    57  	vSchema = `
    58  {
    59  	"sharded": true,
    60  	"vindexes": {
    61  		"hash": {
    62  			"type": "hash"
    63  		}
    64  	},
    65  	"tables": {
    66  		"customer": {
    67  			"column_vindexes": [
    68  				{
    69  					"column": "customer_id",
    70  					"name": "hash"
    71  				}
    72  			]
    73  		},
    74  		"corder": {
    75  			"column_vindexes": [
    76  				{
    77  					"column": "customer_id",
    78  					"name": "hash"
    79  				}
    80  			]
    81  		}
    82  	}
    83  }
    84  `
    85  )
    86  
    87  // TestMain sets up the vitess cluster for any subsequent tests
    88  func TestMain(m *testing.M) {
    89  	defer cluster.PanicHandler(nil)
    90  	flag.Parse()
    91  
    92  	exitCode := func() int {
    93  		clusterInstance = cluster.NewCluster(cell, "localhost")
    94  		defer clusterInstance.Teardown()
    95  
    96  		// Start topo server
    97  		err := clusterInstance.StartTopo()
    98  		if err != nil {
    99  			return 1
   100  		}
   101  
   102  		// Start keyspace
   103  		keyspace := &cluster.Keyspace{
   104  			Name:      keyspaceName,
   105  			SchemaSQL: schemaSQL,
   106  			VSchema:   vSchema,
   107  		}
   108  		clusterInstance.VtTabletExtraArgs = append(clusterInstance.VtTabletExtraArgs, []string{"--health_check_interval", "1s"}...)
   109  		err = clusterInstance.StartKeyspace(*keyspace, shards, 1, false)
   110  		if err != nil {
   111  			return 1
   112  		}
   113  
   114  		clusterInstance.VtGateExtraArgs = append(clusterInstance.VtGateExtraArgs, []string{"--tablet_refresh_interval", tabletRefreshInterval.String()}...)
   115  		err = clusterInstance.StartVtgate()
   116  		if err != nil {
   117  			return 1
   118  		}
   119  
   120  		vtParams = mysql.ConnParams{
   121  			Host: clusterInstance.Hostname,
   122  			Port: clusterInstance.VtgateMySQLPort,
   123  		}
   124  		return m.Run()
   125  	}()
   126  	os.Exit(exitCode)
   127  }
   128  
   129  // TestHealthCheckCacheWithTabletChurn verifies that the tablet healthcheck cache has the correct number of records
   130  // after many rounds of adding and removing tablets in quick succession. This verifies that we don't have any race
   131  // conditions with these operations and their interactions with the cache.
   132  func TestHealthCheckCacheWithTabletChurn(t *testing.T) {
   133  	ctx := context.Background()
   134  	tries := 5
   135  	numShards := len(shards)
   136  	// 1 for primary,replica
   137  	expectedTabletHCcacheEntries := numShards * 2
   138  	churnTabletUID := 9999
   139  	churnTabletType := "rdonly"
   140  
   141  	// verify output of SHOW VITESS_TABLETS
   142  	vtgateConn, err := mysql.Connect(ctx, &vtParams)
   143  	require.Nil(t, err)
   144  	defer vtgateConn.Close()
   145  	query := "show vitess_tablets"
   146  
   147  	// starting with two shards, each with 1 primary and 1 replica tablet)
   148  	// we'll be adding and removing a tablet of type churnTabletType with churnTabletUID
   149  	qr, _ := vtgateConn.ExecuteFetch(query, 100, true)
   150  	assert.Equal(t, expectedTabletHCcacheEntries, len(qr.Rows), "wrong number of tablet records in healthcheck cache, expected %d but had %d. Results: %v", expectedTabletHCcacheEntries, len(qr.Rows), qr.Rows)
   151  
   152  	for i := 0; i < tries; i++ {
   153  		tablet := addTablet(t, churnTabletUID, churnTabletType)
   154  		expectedTabletHCcacheEntries++
   155  
   156  		qr, _ := vtgateConn.ExecuteFetch(query, 100, true)
   157  		assert.Equal(t, expectedTabletHCcacheEntries, len(qr.Rows), "wrong number of tablet records in healthcheck cache, expected %d but had %d. Results: %v", expectedTabletHCcacheEntries, len(qr.Rows), qr.Rows)
   158  
   159  		deleteTablet(t, tablet)
   160  		expectedTabletHCcacheEntries--
   161  
   162  		// We need to sleep for at least vtgate's --tablet_refresh_interval to be sure we
   163  		// have resynchronized the healthcheck cache with the topo server via the topology
   164  		// watcher and pruned the deleted tablet from the healthcheck cache.
   165  		time.Sleep(tabletRefreshInterval)
   166  
   167  		qr, _ = vtgateConn.ExecuteFetch(query, 100, true)
   168  		assert.Equal(t, expectedTabletHCcacheEntries, len(qr.Rows), "wrong number of tablet records in healthcheck cache, expected %d but had %d. Results: %v", expectedTabletHCcacheEntries, len(qr.Rows), qr.Rows)
   169  	}
   170  
   171  	// one final time, w/o the churning tablet
   172  	qr, _ = vtgateConn.ExecuteFetch(query, 100, true)
   173  	assert.Equal(t, expectedTabletHCcacheEntries, len(qr.Rows), "wrong number of tablet records in healthcheck cache, expected %d but had %d", expectedTabletHCcacheEntries, len(qr.Rows))
   174  }
   175  
   176  func addTablet(t *testing.T, tabletUID int, tabletType string) *cluster.Vttablet {
   177  	tablet := &cluster.Vttablet{
   178  		TabletUID: tabletUID,
   179  		Type:      tabletType,
   180  		HTTPPort:  clusterInstance.GetAndReservePort(),
   181  		GrpcPort:  clusterInstance.GetAndReservePort(),
   182  		MySQLPort: clusterInstance.GetAndReservePort(),
   183  		Alias:     fmt.Sprintf("%s-%010d", cell, tabletUID),
   184  	}
   185  	// Start Mysqlctl process
   186  	tablet.MysqlctlProcess = *cluster.MysqlCtlProcessInstanceOptionalInit(tablet.TabletUID, tablet.MySQLPort, clusterInstance.TmpDirectory, !clusterInstance.ReusingVTDATAROOT)
   187  	proc, err := tablet.MysqlctlProcess.StartProcess()
   188  	require.Nil(t, err)
   189  
   190  	// Start vttablet process
   191  	tablet.VttabletProcess = cluster.VttabletProcessInstance(
   192  		tablet.HTTPPort,
   193  		tablet.GrpcPort,
   194  		tabletUID,
   195  		cell,
   196  		shards[0],
   197  		keyspaceName,
   198  		clusterInstance.VtctldProcess.Port,
   199  		tablet.Type,
   200  		clusterInstance.TopoProcess.Port,
   201  		clusterInstance.Hostname,
   202  		clusterInstance.TmpDirectory,
   203  		clusterInstance.VtTabletExtraArgs,
   204  		clusterInstance.DefaultCharset)
   205  
   206  	// wait for mysqld to be ready
   207  	err = proc.Wait()
   208  	require.Nil(t, err)
   209  
   210  	tablet.VttabletProcess.ServingStatus = ""
   211  	err = tablet.VttabletProcess.Setup()
   212  	require.Nil(t, err)
   213  
   214  	serving := tablet.VttabletProcess.WaitForStatus("SERVING", time.Duration(60*time.Second))
   215  	assert.Equal(t, serving, true, "Tablet did not become ready within a reasonable time")
   216  	err = clusterInstance.VtgateProcess.WaitForStatusOfTabletInShard(fmt.Sprintf("%s.%s.%s",
   217  		tablet.VttabletProcess.Keyspace, tablet.VttabletProcess.Shard, tablet.Type), 1, 30*time.Second)
   218  	require.Nil(t, err)
   219  
   220  	t.Logf("Added tablet: %s", tablet.Alias)
   221  	return tablet
   222  }
   223  
   224  func deleteTablet(t *testing.T, tablet *cluster.Vttablet) {
   225  	var wg sync.WaitGroup
   226  	wg.Add(1)
   227  	go func(tablet *cluster.Vttablet) {
   228  		defer wg.Done()
   229  		_ = tablet.VttabletProcess.TearDown()
   230  		_ = tablet.MysqlctlProcess.Stop()
   231  		tablet.MysqlctlProcess.CleanupFiles(tablet.TabletUID)
   232  	}(tablet)
   233  	wg.Wait()
   234  
   235  	err := clusterInstance.VtctlclientProcess.ExecuteCommand("DeleteTablet", tablet.Alias)
   236  	require.Nil(t, err)
   237  
   238  	t.Logf("Deleted tablet: %s", tablet.Alias)
   239  }