vitess.io/vitess@v0.16.2/go/test/endtoend/cellalias/cell_alias_test.go (about)

     1  /*
     2  Copyright 2019 The Vitess Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  
    16  This test cell aliases feature
    17  
    18  We start with no aliases and assert that vtgates can't route to replicas/rondly tablets.
    19  Then we add an alias, and these tablets should be routable
    20  */
    21  
    22  package binlog
    23  
    24  import (
    25  	"context"
    26  	"flag"
    27  	"fmt"
    28  	"os"
    29  	"os/exec"
    30  	"testing"
    31  	"time"
    32  
    33  	"github.com/stretchr/testify/assert"
    34  	"github.com/stretchr/testify/require"
    35  
    36  	"vitess.io/vitess/go/test/endtoend/cluster"
    37  	"vitess.io/vitess/go/vt/proto/topodata"
    38  )
    39  
    40  var (
    41  	localCluster *cluster.LocalProcessCluster
    42  	cell1        = "zone1"
    43  	cell2        = "zone2"
    44  	hostname     = "localhost"
    45  	keyspaceName = "ks"
    46  	tableName    = "test_table"
    47  	sqlSchema    = `
    48  					create table %s(
    49  					id bigint(20) unsigned auto_increment,
    50  					msg varchar(64),
    51  					primary key (id),
    52  					index by_msg (msg)
    53  					) Engine=InnoDB
    54  `
    55  	commonTabletArg = []string{
    56  		"--vreplication_healthcheck_topology_refresh", "1s",
    57  		"--vreplication_healthcheck_retry_delay", "1s",
    58  		"--vreplication_retry_delay", "1s",
    59  		"--degraded_threshold", "5s",
    60  		"--lock_tables_timeout", "5s",
    61  		"--watch_replication_stream",
    62  		"--enable_replication_reporter",
    63  		"--serving_state_grace_period", "1s",
    64  		"--binlog_player_protocol", "grpc",
    65  	}
    66  	vSchema = `
    67  		{
    68  		  "sharded": true,
    69  		  "vindexes": {
    70  			"hash_index": {
    71  			  "type": "hash"
    72  			}
    73  		  },
    74  		  "tables": {
    75  			"%s": {
    76  			   "column_vindexes": [
    77  				{
    78  				  "column": "id",
    79  				  "name": "hash_index"
    80  				}
    81  			  ] 
    82  			}
    83  		  }
    84  		}
    85  `
    86  	shard1Primary *cluster.Vttablet
    87  	shard1Replica *cluster.Vttablet
    88  	shard1Rdonly  *cluster.Vttablet
    89  	shard2Primary *cluster.Vttablet
    90  	shard2Replica *cluster.Vttablet
    91  	shard2Rdonly  *cluster.Vttablet
    92  )
    93  
    94  func TestMain(m *testing.M) {
    95  	defer cluster.PanicHandler(nil)
    96  	flag.Parse()
    97  
    98  	exitcode, err := func() (int, error) {
    99  		localCluster = cluster.NewCluster(cell1, hostname)
   100  		defer localCluster.Teardown()
   101  		localCluster.Keyspaces = append(localCluster.Keyspaces, cluster.Keyspace{
   102  			Name: keyspaceName,
   103  		})
   104  
   105  		// Start topo server
   106  		if err := localCluster.StartTopo(); err != nil {
   107  			return 1, err
   108  		}
   109  
   110  		// Adding another cell in the same cluster
   111  		err := localCluster.TopoProcess.ManageTopoDir("mkdir", "/vitess/"+cell2)
   112  		if err != nil {
   113  			return 1, err
   114  		}
   115  		err = localCluster.VtctlProcess.AddCellInfo(cell2)
   116  		if err != nil {
   117  			return 1, err
   118  		}
   119  
   120  		vtctldClientProcess := cluster.VtctldClientProcessInstance("localhost", localCluster.VtctldProcess.GrpcPort, localCluster.TmpDirectory)
   121  		_, err = vtctldClientProcess.ExecuteCommandWithOutput("CreateKeyspace", keyspaceName, "--durability-policy=semi_sync")
   122  		if err != nil {
   123  			return 1, err
   124  		}
   125  
   126  		shard1Primary = localCluster.NewVttabletInstance("primary", 0, cell1)
   127  		shard1Replica = localCluster.NewVttabletInstance("replica", 0, cell2)
   128  		shard1Rdonly = localCluster.NewVttabletInstance("rdonly", 0, cell2)
   129  
   130  		shard2Primary = localCluster.NewVttabletInstance("primary", 0, cell1)
   131  		shard2Replica = localCluster.NewVttabletInstance("replica", 0, cell2)
   132  		shard2Rdonly = localCluster.NewVttabletInstance("rdonly", 0, cell2)
   133  
   134  		var mysqlProcs []*exec.Cmd
   135  		for _, tablet := range []*cluster.Vttablet{shard1Primary, shard1Replica, shard1Rdonly, shard2Primary, shard2Replica, shard2Rdonly} {
   136  			tablet.MysqlctlProcess = *cluster.MysqlCtlProcessInstance(tablet.TabletUID, tablet.MySQLPort, localCluster.TmpDirectory)
   137  			tablet.VttabletProcess = cluster.VttabletProcessInstance(tablet.HTTPPort,
   138  				tablet.GrpcPort,
   139  				tablet.TabletUID,
   140  				tablet.Cell,
   141  				"",
   142  				keyspaceName,
   143  				localCluster.VtctldProcess.Port,
   144  				tablet.Type,
   145  				localCluster.TopoPort,
   146  				hostname,
   147  				localCluster.TmpDirectory,
   148  				commonTabletArg,
   149  				localCluster.DefaultCharset,
   150  			)
   151  			tablet.VttabletProcess.SupportsBackup = true
   152  			proc, err := tablet.MysqlctlProcess.StartProcess()
   153  			if err != nil {
   154  				return 1, err
   155  			}
   156  			mysqlProcs = append(mysqlProcs, proc)
   157  		}
   158  		for _, proc := range mysqlProcs {
   159  			if err := proc.Wait(); err != nil {
   160  				return 1, err
   161  			}
   162  		}
   163  
   164  		shard1 := cluster.Shard{
   165  			Name:      "-80",
   166  			Vttablets: []*cluster.Vttablet{shard1Primary, shard1Replica, shard1Rdonly},
   167  		}
   168  		for idx := range shard1.Vttablets {
   169  			shard1.Vttablets[idx].VttabletProcess.Shard = shard1.Name
   170  		}
   171  		localCluster.Keyspaces[0].Shards = append(localCluster.Keyspaces[0].Shards, shard1)
   172  
   173  		shard2 := cluster.Shard{
   174  			Name:      "80-",
   175  			Vttablets: []*cluster.Vttablet{shard2Primary, shard2Replica, shard2Rdonly},
   176  		}
   177  		for idx := range shard2.Vttablets {
   178  			shard2.Vttablets[idx].VttabletProcess.Shard = shard2.Name
   179  		}
   180  		localCluster.Keyspaces[0].Shards = append(localCluster.Keyspaces[0].Shards, shard2)
   181  
   182  		for _, tablet := range shard1.Vttablets {
   183  			if err := tablet.VttabletProcess.Setup(); err != nil {
   184  				return 1, err
   185  			}
   186  		}
   187  		if err := localCluster.VtctlclientProcess.InitializeShard(keyspaceName, shard1.Name, shard1Primary.Cell, shard1Primary.TabletUID); err != nil {
   188  			return 1, err
   189  		}
   190  
   191  		// run a health check on source replica so it responds to discovery
   192  		// (for binlog players) and on the source rdonlys (for workers)
   193  		for _, tablet := range []string{shard1Replica.Alias, shard1Rdonly.Alias} {
   194  			if err := localCluster.VtctlclientProcess.ExecuteCommand("RunHealthCheck", tablet); err != nil {
   195  				return 1, err
   196  			}
   197  		}
   198  
   199  		for _, tablet := range shard2.Vttablets {
   200  			if err := tablet.VttabletProcess.Setup(); err != nil {
   201  				return 1, err
   202  			}
   203  		}
   204  
   205  		if err := localCluster.VtctlclientProcess.InitializeShard(keyspaceName, shard2.Name, shard2Primary.Cell, shard2Primary.TabletUID); err != nil {
   206  			return 1, err
   207  		}
   208  
   209  		if err := localCluster.StartVTOrc(keyspaceName); err != nil {
   210  			return 1, err
   211  		}
   212  
   213  		if err := localCluster.VtctlclientProcess.ApplySchema(keyspaceName, fmt.Sprintf(sqlSchema, tableName)); err != nil {
   214  			return 1, err
   215  		}
   216  		if err := localCluster.VtctlclientProcess.ApplyVSchema(keyspaceName, fmt.Sprintf(vSchema, tableName)); err != nil {
   217  			return 1, err
   218  		}
   219  
   220  		_ = localCluster.VtctlclientProcess.ExecuteCommand("RebuildKeyspaceGraph", keyspaceName)
   221  
   222  		return m.Run(), nil
   223  	}()
   224  	if err != nil {
   225  		fmt.Printf("%v\n", err)
   226  		os.Exit(1)
   227  	} else {
   228  		os.Exit(exitcode)
   229  	}
   230  }
   231  
   232  func TestAlias(t *testing.T) {
   233  	defer cluster.PanicHandler(t)
   234  
   235  	insertInitialValues(t)
   236  	defer deleteInitialValues(t)
   237  
   238  	err := localCluster.VtctlclientProcess.ExecuteCommand("RebuildKeyspaceGraph", keyspaceName)
   239  	require.NoError(t, err)
   240  	shard1 := localCluster.Keyspaces[0].Shards[0]
   241  	shard2 := localCluster.Keyspaces[0].Shards[1]
   242  	allCells := fmt.Sprintf("%s,%s", cell1, cell2)
   243  
   244  	expectedPartitions := map[topodata.TabletType][]string{}
   245  	expectedPartitions[topodata.TabletType_PRIMARY] = []string{shard1.Name, shard2.Name}
   246  	expectedPartitions[topodata.TabletType_REPLICA] = []string{shard1.Name, shard2.Name}
   247  	expectedPartitions[topodata.TabletType_RDONLY] = []string{shard1.Name, shard2.Name}
   248  	cluster.CheckSrvKeyspace(t, cell1, keyspaceName, expectedPartitions, *localCluster)
   249  	cluster.CheckSrvKeyspace(t, cell2, keyspaceName, expectedPartitions, *localCluster)
   250  
   251  	// Adds alias so vtgate can route to replica/rdonly tablets that are not in the same cell, but same alias
   252  	err = localCluster.VtctlclientProcess.ExecuteCommand("AddCellsAlias", "--",
   253  		"--cells", allCells,
   254  		"region_east_coast")
   255  	require.NoError(t, err)
   256  	err = localCluster.VtctlclientProcess.ExecuteCommand("UpdateCellsAlias", "--",
   257  		"--cells", allCells,
   258  		"region_east_coast")
   259  	require.NoError(t, err)
   260  
   261  	vtgateInstance := localCluster.NewVtgateInstance()
   262  	vtgateInstance.CellsToWatch = allCells
   263  	vtgateInstance.TabletTypesToWait = "PRIMARY,REPLICA"
   264  	err = vtgateInstance.Setup()
   265  	require.NoError(t, err)
   266  
   267  	// Cluster teardown will not teardown vtgate because we are not
   268  	// actually setting this on localCluster.VtgateInstance
   269  	defer vtgateInstance.TearDown()
   270  
   271  	waitTillAllTabletsAreHealthyInVtgate(t, *vtgateInstance, shard1.Name, shard2.Name)
   272  
   273  	testQueriesOnTabletType(t, "primary", vtgateInstance.GrpcPort, false)
   274  	testQueriesOnTabletType(t, "replica", vtgateInstance.GrpcPort, false)
   275  	testQueriesOnTabletType(t, "rdonly", vtgateInstance.GrpcPort, false)
   276  
   277  	// now, delete the alias, so that if we run above assertions again, it will fail for replica,rdonly target type
   278  	err = localCluster.VtctlclientProcess.ExecuteCommand("DeleteCellsAlias",
   279  		"region_east_coast")
   280  	require.NoError(t, err)
   281  
   282  	// restarts the vtgate process
   283  	vtgateInstance.TabletTypesToWait = "PRIMARY"
   284  	err = vtgateInstance.TearDown()
   285  	require.NoError(t, err)
   286  	err = vtgateInstance.Setup()
   287  	require.NoError(t, err)
   288  
   289  	// since replica and rdonly tablets of all shards in cell2, the last 2 assertion is expected to fail
   290  	testQueriesOnTabletType(t, "primary", vtgateInstance.GrpcPort, false)
   291  	testQueriesOnTabletType(t, "replica", vtgateInstance.GrpcPort, true)
   292  	testQueriesOnTabletType(t, "rdonly", vtgateInstance.GrpcPort, true)
   293  
   294  }
   295  
   296  func TestAddAliasWhileVtgateUp(t *testing.T) {
   297  	defer cluster.PanicHandler(t)
   298  
   299  	insertInitialValues(t)
   300  	defer deleteInitialValues(t)
   301  
   302  	err := localCluster.VtctlclientProcess.ExecuteCommand("RebuildKeyspaceGraph", keyspaceName)
   303  	require.NoError(t, err)
   304  	shard1 := localCluster.Keyspaces[0].Shards[0]
   305  	shard2 := localCluster.Keyspaces[0].Shards[1]
   306  	allCells := fmt.Sprintf("%s,%s", cell1, cell2)
   307  
   308  	expectedPartitions := map[topodata.TabletType][]string{}
   309  	expectedPartitions[topodata.TabletType_PRIMARY] = []string{shard1.Name, shard2.Name}
   310  	expectedPartitions[topodata.TabletType_REPLICA] = []string{shard1.Name, shard2.Name}
   311  	expectedPartitions[topodata.TabletType_RDONLY] = []string{shard1.Name, shard2.Name}
   312  	cluster.CheckSrvKeyspace(t, cell1, keyspaceName, expectedPartitions, *localCluster)
   313  	cluster.CheckSrvKeyspace(t, cell2, keyspaceName, expectedPartitions, *localCluster)
   314  
   315  	vtgateInstance := localCluster.NewVtgateInstance()
   316  	vtgateInstance.CellsToWatch = allCells
   317  	// only primary is in vtgate's "cell", other tablet types are not visible because they are in the other cell
   318  	vtgateInstance.TabletTypesToWait = "PRIMARY"
   319  	err = vtgateInstance.Setup()
   320  	require.NoError(t, err)
   321  	defer vtgateInstance.TearDown()
   322  
   323  	// since replica and rdonly tablets of all shards in cell2, the last 2 assertion is expected to fail
   324  	testQueriesOnTabletType(t, "primary", vtgateInstance.GrpcPort, false)
   325  	testQueriesOnTabletType(t, "replica", vtgateInstance.GrpcPort, true)
   326  	testQueriesOnTabletType(t, "rdonly", vtgateInstance.GrpcPort, true)
   327  
   328  	// Adds alias so vtgate can route to replica/rdonly tablets that are not in the same cell, but same alias
   329  	err = localCluster.VtctlclientProcess.ExecuteCommand("AddCellsAlias", "--",
   330  		"--cells", allCells,
   331  		"region_east_coast")
   332  	require.NoError(t, err)
   333  
   334  	testQueriesOnTabletType(t, "primary", vtgateInstance.GrpcPort, false)
   335  	// TODO(deepthi) change the following to shouldFail:false when fixing https://github.com/vitessio/vitess/issues/5911
   336  	testQueriesOnTabletType(t, "replica", vtgateInstance.GrpcPort, true)
   337  	testQueriesOnTabletType(t, "rdonly", vtgateInstance.GrpcPort, true)
   338  
   339  }
   340  
   341  func waitTillAllTabletsAreHealthyInVtgate(t *testing.T, vtgateInstance cluster.VtgateProcess, shards ...string) {
   342  	for _, shard := range shards {
   343  		require.NoError(t, vtgateInstance.WaitForStatusOfTabletInShard(fmt.Sprintf("%s.%s.primary", keyspaceName, shard), 1, 30*time.Second))
   344  		require.NoError(t, vtgateInstance.WaitForStatusOfTabletInShard(fmt.Sprintf("%s.%s.replica", keyspaceName, shard), 1, 30*time.Second))
   345  		require.NoError(t, vtgateInstance.WaitForStatusOfTabletInShard(fmt.Sprintf("%s.%s.rdonly", keyspaceName, shard), 1, 30*time.Second))
   346  	}
   347  }
   348  
   349  func testQueriesOnTabletType(t *testing.T, tabletType string, vtgateGrpcPort int, shouldFail bool) {
   350  	qr, err := localCluster.ExecOnVTGate(context.Background(),
   351  		fmt.Sprintf("%s:%d", localCluster.Hostname, vtgateGrpcPort),
   352  		"@"+tabletType,
   353  		fmt.Sprintf(`select * from %s`, tableName), nil, nil,
   354  	)
   355  	if shouldFail {
   356  		require.Error(t, err)
   357  		return
   358  	}
   359  	assert.Equal(t, len(qr.Rows), 3)
   360  }
   361  
   362  func insertInitialValues(t *testing.T) {
   363  	cluster.ExecuteOnTablet(t,
   364  		fmt.Sprintf(cluster.InsertTabletTemplateKsID, tableName, 1, "msg1", 1),
   365  		*shard1Primary,
   366  		keyspaceName,
   367  		false)
   368  
   369  	cluster.ExecuteOnTablet(t,
   370  		fmt.Sprintf(cluster.InsertTabletTemplateKsID, tableName, 2, "msg2", 2),
   371  		*shard1Primary,
   372  		keyspaceName,
   373  		false)
   374  
   375  	cluster.ExecuteOnTablet(t,
   376  		fmt.Sprintf(cluster.InsertTabletTemplateKsID, tableName, 4, "msg4", 4),
   377  		*shard2Primary,
   378  		keyspaceName,
   379  		false)
   380  }
   381  
   382  func deleteInitialValues(t *testing.T) {
   383  	cluster.ExecuteOnTablet(t,
   384  		fmt.Sprintf("delete from %s where id = %v", tableName, 1),
   385  		*shard1Primary,
   386  		keyspaceName,
   387  		false)
   388  
   389  	cluster.ExecuteOnTablet(t,
   390  		fmt.Sprintf("delete from %s where id = %v", tableName, 2),
   391  		*shard1Primary,
   392  		keyspaceName,
   393  		false)
   394  
   395  	cluster.ExecuteOnTablet(t,
   396  		fmt.Sprintf("delete from %s where id = %v", tableName, 4),
   397  		*shard2Primary,
   398  		keyspaceName,
   399  		false)
   400  }