vitess.io/vitess@v0.16.2/go/vt/discovery/tablet_picker_test.go (about)

     1  /*
     2  Copyright 2019 The Vitess Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package discovery
    18  
    19  import (
    20  	"context"
    21  	"testing"
    22  	"time"
    23  
    24  	"github.com/stretchr/testify/assert"
    25  	"github.com/stretchr/testify/require"
    26  	"google.golang.org/protobuf/proto"
    27  
    28  	querypb "vitess.io/vitess/go/vt/proto/query"
    29  	topodatapb "vitess.io/vitess/go/vt/proto/topodata"
    30  	"vitess.io/vitess/go/vt/topo"
    31  	"vitess.io/vitess/go/vt/topo/memorytopo"
    32  )
    33  
    34  func TestPickSimple(t *testing.T) {
    35  	te := newPickerTestEnv(t, []string{"cell"})
    36  	want := addTablet(te, 100, topodatapb.TabletType_REPLICA, "cell", true, true)
    37  	defer deleteTablet(t, te, want)
    38  
    39  	tp, err := NewTabletPicker(te.topoServ, te.cells, te.keyspace, te.shard, "replica")
    40  	require.NoError(t, err)
    41  
    42  	tablet, err := tp.PickForStreaming(context.Background())
    43  	require.NoError(t, err)
    44  	assert.True(t, proto.Equal(want, tablet), "Pick: %v, want %v", tablet, want)
    45  }
    46  
    47  func TestPickFromTwoHealthy(t *testing.T) {
    48  	te := newPickerTestEnv(t, []string{"cell"})
    49  	want1 := addTablet(te, 100, topodatapb.TabletType_REPLICA, "cell", true, true)
    50  	defer deleteTablet(t, te, want1)
    51  	want2 := addTablet(te, 101, topodatapb.TabletType_RDONLY, "cell", true, true)
    52  	defer deleteTablet(t, te, want2)
    53  
    54  	tp, err := NewTabletPicker(te.topoServ, te.cells, te.keyspace, te.shard, "replica,rdonly")
    55  	require.NoError(t, err)
    56  
    57  	// In 20 attempts, both tablet types must be picked at least once.
    58  	var picked1, picked2 bool
    59  	for i := 0; i < 20; i++ {
    60  		tablet, err := tp.PickForStreaming(context.Background())
    61  		require.NoError(t, err)
    62  		if proto.Equal(tablet, want1) {
    63  			picked1 = true
    64  		}
    65  		if proto.Equal(tablet, want2) {
    66  			picked2 = true
    67  		}
    68  	}
    69  	assert.True(t, picked1)
    70  	assert.True(t, picked2)
    71  }
    72  
    73  func TestPickInOrder1(t *testing.T) {
    74  	te := newPickerTestEnv(t, []string{"cell"})
    75  	want1 := addTablet(te, 100, topodatapb.TabletType_REPLICA, "cell", true, true)
    76  	defer deleteTablet(t, te, want1)
    77  	want2 := addTablet(te, 101, topodatapb.TabletType_RDONLY, "cell", true, true)
    78  	defer deleteTablet(t, te, want2)
    79  
    80  	tp, err := NewTabletPicker(te.topoServ, te.cells, te.keyspace, te.shard, "in_order:replica,rdonly")
    81  	require.NoError(t, err)
    82  
    83  	// In 20 attempts, we always pick the first healthy tablet in order
    84  	var picked1, picked2 bool
    85  	for i := 0; i < 20; i++ {
    86  		tablet, err := tp.PickForStreaming(context.Background())
    87  		require.NoError(t, err)
    88  		if proto.Equal(tablet, want1) {
    89  			picked1 = true
    90  		}
    91  		if proto.Equal(tablet, want2) {
    92  			picked2 = true
    93  		}
    94  	}
    95  	assert.True(t, picked1)
    96  	assert.False(t, picked2)
    97  }
    98  
    99  func TestPickInOrder2(t *testing.T) {
   100  	te := newPickerTestEnv(t, []string{"cell"})
   101  	want1 := addTablet(te, 100, topodatapb.TabletType_REPLICA, "cell", true, true)
   102  	defer deleteTablet(t, te, want1)
   103  	want2 := addTablet(te, 101, topodatapb.TabletType_RDONLY, "cell", true, true)
   104  	defer deleteTablet(t, te, want2)
   105  
   106  	tp, err := NewTabletPicker(te.topoServ, te.cells, te.keyspace, te.shard, "in_order:rdonly,replica")
   107  	require.NoError(t, err)
   108  
   109  	// In 20 attempts, we always pick the first healthy tablet in order
   110  	var picked1, picked2 bool
   111  	for i := 0; i < 20; i++ {
   112  		tablet, err := tp.PickForStreaming(context.Background())
   113  		require.NoError(t, err)
   114  		if proto.Equal(tablet, want1) {
   115  			picked1 = true
   116  		}
   117  		if proto.Equal(tablet, want2) {
   118  			picked2 = true
   119  		}
   120  	}
   121  	assert.False(t, picked1)
   122  	assert.True(t, picked2)
   123  }
   124  
   125  func TestPickInOrderMultipleInGroup(t *testing.T) {
   126  	te := newPickerTestEnv(t, []string{"cell"})
   127  	want1 := addTablet(te, 100, topodatapb.TabletType_REPLICA, "cell", true, true)
   128  	defer deleteTablet(t, te, want1)
   129  	want2 := addTablet(te, 101, topodatapb.TabletType_RDONLY, "cell", true, true)
   130  	defer deleteTablet(t, te, want2)
   131  	want3 := addTablet(te, 102, topodatapb.TabletType_RDONLY, "cell", true, true)
   132  	defer deleteTablet(t, te, want3)
   133  	want4 := addTablet(te, 103, topodatapb.TabletType_RDONLY, "cell", true, true)
   134  	defer deleteTablet(t, te, want4)
   135  
   136  	tp, err := NewTabletPicker(te.topoServ, te.cells, te.keyspace, te.shard, "in_order:rdonly,replica")
   137  	require.NoError(t, err)
   138  
   139  	// In 40 attempts, we pick each of the three RDONLY, but never the REPLICA
   140  	var picked1, picked2, picked3, picked4 bool
   141  	for i := 0; i < 40; i++ {
   142  		tablet, err := tp.PickForStreaming(context.Background())
   143  		require.NoError(t, err)
   144  		if proto.Equal(tablet, want1) {
   145  			picked1 = true
   146  		}
   147  		if proto.Equal(tablet, want2) {
   148  			picked2 = true
   149  		}
   150  		if proto.Equal(tablet, want3) {
   151  			picked3 = true
   152  		}
   153  		if proto.Equal(tablet, want4) {
   154  			picked4 = true
   155  		}
   156  	}
   157  	assert.False(t, picked1)
   158  	assert.True(t, picked2)
   159  	assert.True(t, picked3)
   160  	assert.True(t, picked4)
   161  }
   162  
   163  func TestPickRespectsTabletType(t *testing.T) {
   164  	te := newPickerTestEnv(t, []string{"cell"})
   165  	want := addTablet(te, 100, topodatapb.TabletType_REPLICA, "cell", true, true)
   166  	defer deleteTablet(t, te, want)
   167  	dont := addTablet(te, 101, topodatapb.TabletType_PRIMARY, "cell", true, true)
   168  	defer deleteTablet(t, te, dont)
   169  
   170  	tp, err := NewTabletPicker(te.topoServ, te.cells, te.keyspace, te.shard, "replica,rdonly")
   171  	require.NoError(t, err)
   172  
   173  	// In 20 attempts, primary tablet must be never picked
   174  	for i := 0; i < 20; i++ {
   175  		tablet, err := tp.PickForStreaming(context.Background())
   176  		require.NoError(t, err)
   177  		require.NotNil(t, tablet)
   178  		require.True(t, proto.Equal(tablet, want), "picked wrong tablet type")
   179  	}
   180  }
   181  
   182  func TestPickMultiCell(t *testing.T) {
   183  	te := newPickerTestEnv(t, []string{"cell", "otherCell"})
   184  	want := addTablet(te, 100, topodatapb.TabletType_REPLICA, "cell", true, true)
   185  	defer deleteTablet(t, te, want)
   186  
   187  	tp, err := NewTabletPicker(te.topoServ, te.cells, te.keyspace, te.shard, "replica")
   188  	require.NoError(t, err)
   189  
   190  	ctx, cancel := context.WithTimeout(context.Background(), 200*time.Millisecond)
   191  	defer cancel()
   192  	tablet, err := tp.PickForStreaming(ctx)
   193  	require.NoError(t, err)
   194  	assert.True(t, proto.Equal(want, tablet), "Pick: %v, want %v", tablet, want)
   195  }
   196  
   197  func TestPickPrimary(t *testing.T) {
   198  	te := newPickerTestEnv(t, []string{"cell", "otherCell"})
   199  	want := addTablet(te, 100, topodatapb.TabletType_PRIMARY, "cell", true, true)
   200  	defer deleteTablet(t, te, want)
   201  	ctx, cancel := context.WithTimeout(context.Background(), 200*time.Millisecond)
   202  	defer cancel()
   203  	_, err := te.topoServ.UpdateShardFields(ctx, te.keyspace, te.shard, func(si *topo.ShardInfo) error {
   204  		si.PrimaryAlias = want.Alias
   205  		return nil
   206  	})
   207  	require.NoError(t, err)
   208  
   209  	tp, err := NewTabletPicker(te.topoServ, []string{"otherCell"}, te.keyspace, te.shard, "primary")
   210  	require.NoError(t, err)
   211  
   212  	ctx2, cancel2 := context.WithTimeout(context.Background(), 200*time.Millisecond)
   213  	defer cancel2()
   214  	tablet, err := tp.PickForStreaming(ctx2)
   215  	require.NoError(t, err)
   216  	assert.True(t, proto.Equal(want, tablet), "Pick: %v, want %v", tablet, want)
   217  }
   218  
   219  func TestPickFromOtherCell(t *testing.T) {
   220  	te := newPickerTestEnv(t, []string{"cell", "otherCell"})
   221  	want := addTablet(te, 100, topodatapb.TabletType_REPLICA, "otherCell", true, true)
   222  	defer deleteTablet(t, te, want)
   223  
   224  	tp, err := NewTabletPicker(te.topoServ, te.cells, te.keyspace, te.shard, "replica")
   225  	require.NoError(t, err)
   226  
   227  	ctx, cancel := context.WithTimeout(context.Background(), 200*time.Millisecond)
   228  	defer cancel()
   229  	tablet, err := tp.PickForStreaming(ctx)
   230  	require.NoError(t, err)
   231  	assert.True(t, proto.Equal(want, tablet), "Pick: %v, want %v", tablet, want)
   232  }
   233  
   234  func TestDontPickFromOtherCell(t *testing.T) {
   235  	te := newPickerTestEnv(t, []string{"cell", "otherCell"})
   236  	want1 := addTablet(te, 100, topodatapb.TabletType_REPLICA, "cell", true, true)
   237  	defer deleteTablet(t, te, want1)
   238  	want2 := addTablet(te, 101, topodatapb.TabletType_REPLICA, "otherCell", true, true)
   239  	defer deleteTablet(t, te, want2)
   240  
   241  	tp, err := NewTabletPicker(te.topoServ, []string{"cell"}, te.keyspace, te.shard, "replica")
   242  	require.NoError(t, err)
   243  
   244  	ctx, cancel := context.WithTimeout(context.Background(), 200*time.Millisecond)
   245  	defer cancel()
   246  
   247  	// In 20 attempts, only want1 must be picked because TabletPicker.cells = "cell"
   248  	var picked1, picked2 bool
   249  	for i := 0; i < 20; i++ {
   250  		tablet, err := tp.PickForStreaming(ctx)
   251  		require.NoError(t, err)
   252  		if proto.Equal(tablet, want1) {
   253  			picked1 = true
   254  		}
   255  		if proto.Equal(tablet, want2) {
   256  			picked2 = true
   257  		}
   258  	}
   259  	assert.True(t, picked1)
   260  	assert.False(t, picked2)
   261  }
   262  
   263  func TestPickMultiCellTwoTablets(t *testing.T) {
   264  	te := newPickerTestEnv(t, []string{"cell", "otherCell"})
   265  	want1 := addTablet(te, 100, topodatapb.TabletType_REPLICA, "cell", true, true)
   266  	defer deleteTablet(t, te, want1)
   267  	want2 := addTablet(te, 101, topodatapb.TabletType_REPLICA, "otherCell", true, true)
   268  	defer deleteTablet(t, te, want2)
   269  
   270  	tp, err := NewTabletPicker(te.topoServ, te.cells, te.keyspace, te.shard, "replica")
   271  	require.NoError(t, err)
   272  
   273  	ctx, cancel := context.WithTimeout(context.Background(), 200*time.Millisecond)
   274  	defer cancel()
   275  
   276  	// In 20 attempts, both tablet types must be picked at least once.
   277  	var picked1, picked2 bool
   278  	for i := 0; i < 20; i++ {
   279  		tablet, err := tp.PickForStreaming(ctx)
   280  		require.NoError(t, err)
   281  		if proto.Equal(tablet, want1) {
   282  			picked1 = true
   283  		}
   284  		if proto.Equal(tablet, want2) {
   285  			picked2 = true
   286  		}
   287  	}
   288  	assert.True(t, picked1)
   289  	assert.True(t, picked2)
   290  }
   291  
   292  func TestPickMultiCellTwoTabletTypes(t *testing.T) {
   293  	te := newPickerTestEnv(t, []string{"cell", "otherCell"})
   294  	want1 := addTablet(te, 100, topodatapb.TabletType_REPLICA, "cell", true, true)
   295  	defer deleteTablet(t, te, want1)
   296  	want2 := addTablet(te, 101, topodatapb.TabletType_RDONLY, "otherCell", true, true)
   297  	defer deleteTablet(t, te, want2)
   298  
   299  	tp, err := NewTabletPicker(te.topoServ, te.cells, te.keyspace, te.shard, "replica,rdonly")
   300  	require.NoError(t, err)
   301  
   302  	ctx, cancel := context.WithTimeout(context.Background(), 200*time.Millisecond)
   303  	defer cancel()
   304  
   305  	// In 20 attempts, both tablet types must be picked at least once.
   306  	var picked1, picked2 bool
   307  	for i := 0; i < 20; i++ {
   308  		tablet, err := tp.PickForStreaming(ctx)
   309  		require.NoError(t, err)
   310  		if proto.Equal(tablet, want1) {
   311  			picked1 = true
   312  		}
   313  		if proto.Equal(tablet, want2) {
   314  			picked2 = true
   315  		}
   316  	}
   317  	assert.True(t, picked1)
   318  	assert.True(t, picked2)
   319  }
   320  
   321  func TestPickUsingCellAlias(t *testing.T) {
   322  	// test env puts all cells into an alias called "cella"
   323  	te := newPickerTestEnv(t, []string{"cell", "otherCell"})
   324  	want1 := addTablet(te, 100, topodatapb.TabletType_REPLICA, "cell", true, true)
   325  	defer deleteTablet(t, te, want1)
   326  
   327  	tp, err := NewTabletPicker(te.topoServ, []string{"cella"}, te.keyspace, te.shard, "replica")
   328  	require.NoError(t, err)
   329  
   330  	ctx1, cancel1 := context.WithTimeout(context.Background(), 200*time.Millisecond)
   331  	defer cancel1()
   332  	tablet, err := tp.PickForStreaming(ctx1)
   333  	require.NoError(t, err)
   334  	assert.True(t, proto.Equal(want1, tablet), "Pick: %v, want %v", tablet, want1)
   335  
   336  	// create a tablet in the other cell, it should be picked
   337  	deleteTablet(t, te, want1)
   338  	want2 := addTablet(te, 101, topodatapb.TabletType_REPLICA, "otherCell", true, true)
   339  	defer deleteTablet(t, te, want2)
   340  	ctx2, cancel2 := context.WithTimeout(context.Background(), 200*time.Millisecond)
   341  	defer cancel2()
   342  	tablet, err = tp.PickForStreaming(ctx2)
   343  	require.NoError(t, err)
   344  	assert.True(t, proto.Equal(want2, tablet), "Pick: %v, want %v", tablet, want2)
   345  
   346  	// addTablet again and test that both are picked at least once
   347  	want1 = addTablet(te, 100, topodatapb.TabletType_REPLICA, "cell", true, true)
   348  	ctx3, cancel3 := context.WithTimeout(context.Background(), 200*time.Millisecond)
   349  	defer cancel3()
   350  
   351  	// In 20 attempts, both tablet types must be picked at least once.
   352  	var picked1, picked2 bool
   353  	for i := 0; i < 20; i++ {
   354  		tablet, err := tp.PickForStreaming(ctx3)
   355  		require.NoError(t, err)
   356  		if proto.Equal(tablet, want1) {
   357  			picked1 = true
   358  		}
   359  		if proto.Equal(tablet, want2) {
   360  			picked2 = true
   361  		}
   362  	}
   363  	assert.True(t, picked1)
   364  	assert.True(t, picked2)
   365  }
   366  
   367  func TestTabletAppearsDuringSleep(t *testing.T) {
   368  	te := newPickerTestEnv(t, []string{"cell"})
   369  	tp, err := NewTabletPicker(te.topoServ, te.cells, te.keyspace, te.shard, "replica")
   370  	require.NoError(t, err)
   371  
   372  	delay := GetTabletPickerRetryDelay()
   373  	defer func() {
   374  		SetTabletPickerRetryDelay(delay)
   375  	}()
   376  	SetTabletPickerRetryDelay(11 * time.Millisecond)
   377  
   378  	result := make(chan *topodatapb.Tablet)
   379  	// start picker first, then add tablet
   380  	go func() {
   381  		ctx, cancel := context.WithTimeout(context.Background(), 20*time.Millisecond)
   382  		defer cancel()
   383  		tablet, err := tp.PickForStreaming(ctx)
   384  		assert.NoError(t, err)
   385  		result <- tablet
   386  	}()
   387  
   388  	want := addTablet(te, 100, topodatapb.TabletType_REPLICA, "cell", true, true)
   389  	defer deleteTablet(t, te, want)
   390  	got := <-result
   391  	require.NotNil(t, got, "Tablet should not be nil")
   392  	assert.True(t, proto.Equal(want, got), "Pick: %v, want %v", got, want)
   393  }
   394  
   395  func TestPickError(t *testing.T) {
   396  	te := newPickerTestEnv(t, []string{"cell"})
   397  	_, err := NewTabletPicker(te.topoServ, te.cells, te.keyspace, te.shard, "badtype")
   398  	assert.EqualError(t, err, "failed to parse list of tablet types: badtype")
   399  
   400  	tp, err := NewTabletPicker(te.topoServ, te.cells, te.keyspace, te.shard, "replica")
   401  	require.NoError(t, err)
   402  	delay := GetTabletPickerRetryDelay()
   403  	defer func() {
   404  		SetTabletPickerRetryDelay(delay)
   405  	}()
   406  	SetTabletPickerRetryDelay(11 * time.Millisecond)
   407  
   408  	ctx, cancel := context.WithTimeout(context.Background(), 20*time.Millisecond)
   409  	defer cancel()
   410  	// no tablets
   411  	_, err = tp.PickForStreaming(ctx)
   412  	require.EqualError(t, err, "context has expired")
   413  	// no tablets of the correct type
   414  	defer deleteTablet(t, te, addTablet(te, 200, topodatapb.TabletType_RDONLY, "cell", true, true))
   415  	ctx, cancel = context.WithTimeout(context.Background(), 20*time.Millisecond)
   416  	defer cancel()
   417  	_, err = tp.PickForStreaming(ctx)
   418  	require.EqualError(t, err, "context has expired")
   419  	require.Greater(t, globalTPStats.noTabletFoundError.Counts()["cell.ks.0.replica"], int64(0))
   420  }
   421  
   422  type pickerTestEnv struct {
   423  	t        *testing.T
   424  	keyspace string
   425  	shard    string
   426  	cells    []string
   427  
   428  	topoServ *topo.Server
   429  }
   430  
   431  func newPickerTestEnv(t *testing.T, cells []string) *pickerTestEnv {
   432  	ctx := context.Background()
   433  
   434  	te := &pickerTestEnv{
   435  		t:        t,
   436  		keyspace: "ks",
   437  		shard:    "0",
   438  		cells:    cells,
   439  		topoServ: memorytopo.NewServer(cells...),
   440  	}
   441  	// create cell alias
   442  	err := te.topoServ.CreateCellsAlias(ctx, "cella", &topodatapb.CellsAlias{
   443  		Cells: cells,
   444  	})
   445  	require.NoError(t, err)
   446  	err = te.topoServ.CreateKeyspace(ctx, te.keyspace, &topodatapb.Keyspace{})
   447  	require.NoError(t, err)
   448  	err = te.topoServ.CreateShard(ctx, te.keyspace, te.shard)
   449  	require.NoError(t, err)
   450  	return te
   451  }
   452  
   453  func addTablet(te *pickerTestEnv, id int, tabletType topodatapb.TabletType, cell string, serving, healthy bool) *topodatapb.Tablet {
   454  	tablet := &topodatapb.Tablet{
   455  		Alias: &topodatapb.TabletAlias{
   456  			Cell: cell,
   457  			Uid:  uint32(id),
   458  		},
   459  		Keyspace: te.keyspace,
   460  		Shard:    te.shard,
   461  		KeyRange: &topodatapb.KeyRange{},
   462  		Type:     tabletType,
   463  		PortMap: map[string]int32{
   464  			"test": int32(id),
   465  		},
   466  	}
   467  	err := te.topoServ.CreateTablet(context.Background(), tablet)
   468  	require.NoError(te.t, err)
   469  
   470  	if healthy {
   471  		_ = createFixedHealthConn(tablet, &querypb.StreamHealthResponse{
   472  			Serving: serving,
   473  			Target: &querypb.Target{
   474  				Keyspace:   te.keyspace,
   475  				Shard:      te.shard,
   476  				TabletType: tabletType,
   477  			},
   478  			RealtimeStats: &querypb.RealtimeStats{HealthError: ""},
   479  		})
   480  	}
   481  
   482  	return tablet
   483  }
   484  
   485  func deleteTablet(t *testing.T, te *pickerTestEnv, tablet *topodatapb.Tablet) {
   486  	if tablet == nil {
   487  		return
   488  	}
   489  	{ //log error
   490  		err := te.topoServ.DeleteTablet(context.Background(), tablet.Alias)
   491  		require.NoError(t, err, "failed to DeleteTablet with alias: %v", err)
   492  	}
   493  	{ //This is not automatically removed from shard replication, which results in log spam and log error
   494  		err := topo.DeleteTabletReplicationData(context.Background(), te.topoServ, tablet)
   495  		require.NoError(t, err, "failed to automatically remove from shard replication: %v", err)
   496  	}
   497  }