vitess.io/vitess@v0.16.2/go/vt/vtgr/controller/diagnose_test.go (about)

     1  /*
     2  Copyright 2021 The Vitess Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package controller
    18  
    19  import (
    20  	"context"
    21  	"errors"
    22  	"math"
    23  	"os"
    24  	"strconv"
    25  	"strings"
    26  	"testing"
    27  	"time"
    28  
    29  	"github.com/golang/mock/gomock"
    30  	"github.com/stretchr/testify/assert"
    31  
    32  	"vitess.io/vitess/go/mysql"
    33  	"vitess.io/vitess/go/vt/topo"
    34  	"vitess.io/vitess/go/vt/topo/memorytopo"
    35  	"vitess.io/vitess/go/vt/vtctl/grpcvtctldserver/testutil"
    36  	"vitess.io/vitess/go/vt/vtgr/config"
    37  	"vitess.io/vitess/go/vt/vtgr/db"
    38  	"vitess.io/vitess/go/vt/vtgr/inst"
    39  
    40  	topodatapb "vitess.io/vitess/go/vt/proto/topodata"
    41  )
    42  
    43  const diagnoseGroupSize = 3
    44  
    45  var (
    46  	testHost, _ = os.Hostname()
    47  	alias0      = "test_cell-0000000000"
    48  	alias1      = "test_cell-0000000001"
    49  	alias2      = "test_cell-0000000002"
    50  	testPort0   = 17000
    51  	testPort1   = 17001
    52  	testPort2   = 17002
    53  )
    54  
    55  type testGroupInput struct {
    56  	groupName   string
    57  	readOnly    bool
    58  	checkResult int
    59  	groupState  []db.TestGroupState
    60  	gtid        mysql.GTIDSet
    61  }
    62  
    63  func TestShardIsHealthy(t *testing.T) {
    64  	ctrl := gomock.NewController(t)
    65  	defer ctrl.Finish()
    66  	ctx := context.Background()
    67  	ts := memorytopo.NewServer("test_cell")
    68  	defer ts.Close()
    69  	ts.CreateKeyspace(ctx, "ks", &topodatapb.Keyspace{})
    70  	ts.CreateShard(ctx, "ks", "0")
    71  	tmc := NewMockGRTmcClient(ctrl)
    72  	dbAgent := db.NewMockAgent(ctrl)
    73  	tablet1 := buildTabletInfo(uint32(testPort0), testHost, testPort0, topodatapb.TabletType_PRIMARY, time.Now())
    74  	tablet2 := buildTabletInfo(uint32(testPort1), testHost, testPort1, topodatapb.TabletType_SPARE, time.Time{})
    75  	tablet3 := buildTabletInfo(uint32(testPort2), testHost, testPort2, topodatapb.TabletType_REPLICA, time.Time{})
    76  	testutil.AddTablet(ctx, t, ts, tablet1.Tablet, nil)
    77  	testutil.AddTablet(ctx, t, ts, tablet2.Tablet, nil)
    78  	testutil.AddTablet(ctx, t, ts, tablet3.Tablet, nil)
    79  	ts.UpdateShardFields(ctx, "ks", "0", func(si *topo.ShardInfo) error {
    80  		si.PrimaryAlias = tablet1.Alias
    81  		return nil
    82  	})
    83  	dbAgent.
    84  		EXPECT().
    85  		FetchGroupView(gomock.Any(), gomock.Any()).
    86  		DoAndReturn(func(alias string, target *inst.InstanceKey) (*db.GroupView, error) {
    87  			return db.BuildGroupView(alias, "group", testHost, testPort0, false, 0, []db.TestGroupState{
    88  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"},
    89  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "ONLINE", MemberRole: "SECONDARY"},
    90  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "ONLINE", MemberRole: "SECONDARY"},
    91  			}), nil
    92  		}).
    93  		AnyTimes()
    94  	tmc.EXPECT().Ping(gomock.Any(), gomock.Any()).Return(nil).AnyTimes()
    95  	cfg := &config.VTGRConfig{BootstrapGroupSize: 3, MinNumReplica: 2, BackoffErrorWaitTimeSeconds: 1, BootstrapWaitTimeSeconds: 1}
    96  	shard := NewGRShard("ks", "0", nil, tmc, ts, dbAgent, cfg, testPort0, true)
    97  	shard.refreshTabletsInShardLocked(ctx)
    98  	diagnose, _ := shard.Diagnose(ctx)
    99  	assert.Equal(t, DiagnoseTypeHealthy, string(diagnose))
   100  }
   101  
   102  func TestTabletIssueDiagnoses(t *testing.T) {
   103  	type data struct {
   104  		pingable bool
   105  		ttype    topodatapb.TabletType
   106  	}
   107  	var tablettests = []struct {
   108  		name         string
   109  		expected     DiagnoseType
   110  		errMessage   string
   111  		primaryAlias string
   112  		inputs       []data
   113  	}{
   114  		{name: "healthy shard", expected: DiagnoseTypeHealthy, errMessage: "", primaryAlias: "test_cell-0000017000", inputs: []data{
   115  			{true, topodatapb.TabletType_PRIMARY},
   116  			{true, topodatapb.TabletType_REPLICA},
   117  			{true, topodatapb.TabletType_REPLICA},
   118  		}},
   119  		{name: "non primary tablet is not pingable", expected: DiagnoseTypeHealthy, errMessage: "", primaryAlias: "test_cell-0000017000", inputs: []data{ // vtgr should do nothing
   120  			{true, topodatapb.TabletType_PRIMARY},
   121  			{false, topodatapb.TabletType_REPLICA},
   122  			{false, topodatapb.TabletType_REPLICA},
   123  		}},
   124  		{name: "primary tablet is not pingable", expected: DiagnoseTypeUnreachablePrimary, errMessage: "", primaryAlias: "test_cell-0000017000", inputs: []data{ // vtgr should trigger a failover
   125  			{false, topodatapb.TabletType_PRIMARY},
   126  			{true, topodatapb.TabletType_REPLICA},
   127  			{true, topodatapb.TabletType_REPLICA},
   128  		}},
   129  		{name: "no primary tablet", expected: DiagnoseTypeWrongPrimaryTablet, errMessage: "", primaryAlias: "", inputs: []data{ // vtgr should create one based on mysql
   130  			{true, topodatapb.TabletType_REPLICA},
   131  			{true, topodatapb.TabletType_REPLICA},
   132  			{true, topodatapb.TabletType_REPLICA},
   133  		}},
   134  		{name: "wrong primary in tablet types", expected: DiagnoseTypeWrongPrimaryTablet, errMessage: "", primaryAlias: "test_cell-0000017001", inputs: []data{ // shard info returns differently comparing with tablet type
   135  			{true, topodatapb.TabletType_PRIMARY},
   136  			{true, topodatapb.TabletType_REPLICA},
   137  			{true, topodatapb.TabletType_REPLICA},
   138  		}},
   139  		{name: "mysql and vttablet has different primary", expected: DiagnoseTypeWrongPrimaryTablet, errMessage: "", primaryAlias: "test_cell-0000017001", inputs: []data{ // vtgr should fix vttablet
   140  			{true, topodatapb.TabletType_REPLICA},
   141  			{true, topodatapb.TabletType_PRIMARY},
   142  			{true, topodatapb.TabletType_REPLICA},
   143  		}},
   144  		{name: "unreachable wrong vttablet primary", expected: DiagnoseTypeWrongPrimaryTablet, errMessage: "", primaryAlias: "test_cell-0000017001", inputs: []data{ // vtgr should fix vttablet
   145  			{true, topodatapb.TabletType_REPLICA},
   146  			{false, topodatapb.TabletType_PRIMARY},
   147  			{true, topodatapb.TabletType_REPLICA},
   148  		}},
   149  		{name: "unreachable uninitialized primary vttablet", expected: DiagnoseTypeUnreachablePrimary, errMessage: "", inputs: []data{ // vtgr should failover
   150  			{false, topodatapb.TabletType_REPLICA},
   151  			{true, topodatapb.TabletType_REPLICA},
   152  			{true, topodatapb.TabletType_REPLICA},
   153  		}},
   154  	}
   155  	for _, tt := range tablettests {
   156  		t.Run(tt.name, func(t *testing.T) {
   157  			expected := tt.expected
   158  			ctrl := gomock.NewController(t)
   159  			defer ctrl.Finish()
   160  			ts := NewMockGRTopo(ctrl)
   161  			tmc := NewMockGRTmcClient(ctrl)
   162  			dbAgent := db.NewMockAgent(ctrl)
   163  			tablets := make(map[string]*topo.TabletInfo)
   164  			if tt.primaryAlias == "" {
   165  				ts.
   166  					EXPECT().
   167  					GetShard(gomock.Any(), gomock.Eq("ks"), gomock.Eq("0")).
   168  					Return(&topo.ShardInfo{Shard: &topodatapb.Shard{}}, nil)
   169  			}
   170  			for i, input := range tt.inputs {
   171  				id := uint32(testPort0 + i)
   172  				tablet := buildTabletInfo(id, testHost, testPort0+i, input.ttype, time.Now())
   173  				tablets[tablet.AliasString()] = tablet
   174  				var response = struct {
   175  					pingable bool
   176  				}{input.pingable}
   177  				if tt.primaryAlias == tablet.AliasString() {
   178  					si := &topo.ShardInfo{
   179  						Shard: &topodatapb.Shard{
   180  							PrimaryAlias: tablet.Alias,
   181  						},
   182  					}
   183  					ts.
   184  						EXPECT().
   185  						GetShard(gomock.Any(), gomock.Eq("ks"), gomock.Eq("0")).
   186  						Return(si, nil)
   187  				}
   188  				dbAgent.
   189  					EXPECT().
   190  					FetchGroupView(gomock.Any(), gomock.Any()).
   191  					DoAndReturn(func(alias string, target *inst.InstanceKey) (*db.GroupView, error) {
   192  						if target.Hostname == "" || target.Port == 0 {
   193  							return nil, errors.New("invalid mysql instance key")
   194  						}
   195  						return db.BuildGroupView(alias, "group", testHost, testPort0, false, 0, []db.TestGroupState{
   196  							{MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"},
   197  							{MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "ONLINE", MemberRole: "SECONDARY"},
   198  							{MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "ONLINE", MemberRole: "SECONDARY"},
   199  						}), nil
   200  					}).
   201  					AnyTimes()
   202  				tmc.
   203  					EXPECT().
   204  					Ping(gomock.Any(), &topodatapb.Tablet{
   205  						Alias:                tablet.Alias,
   206  						Hostname:             tablet.Hostname,
   207  						Keyspace:             tablet.Keyspace,
   208  						Shard:                tablet.Shard,
   209  						Type:                 tablet.Type,
   210  						Tags:                 tablet.Tags,
   211  						MysqlHostname:        tablet.MysqlHostname,
   212  						MysqlPort:            tablet.MysqlPort,
   213  						PrimaryTermStartTime: tablet.PrimaryTermStartTime,
   214  					}).
   215  					DoAndReturn(func(_ context.Context, t *topodatapb.Tablet) error {
   216  						if !response.pingable {
   217  							return errors.New("unreachable")
   218  						}
   219  						return nil
   220  					}).
   221  					AnyTimes()
   222  			}
   223  			ts.
   224  				EXPECT().
   225  				GetTabletMapForShardByCell(gomock.Any(), gomock.Eq("ks"), gomock.Eq("0"), gomock.Any()).
   226  				Return(tablets, nil)
   227  
   228  			ctx := context.Background()
   229  			cfg := &config.VTGRConfig{BootstrapGroupSize: diagnoseGroupSize, MinNumReplica: 2, BackoffErrorWaitTimeSeconds: 1, BootstrapWaitTimeSeconds: 1}
   230  			shard := NewGRShard("ks", "0", nil, tmc, ts, dbAgent, cfg, testPort0, true)
   231  			shard.refreshTabletsInShardLocked(ctx)
   232  			diagnose, err := shard.Diagnose(ctx)
   233  			assert.Equal(t, expected, diagnose)
   234  			if tt.errMessage == "" {
   235  				assert.NoError(t, err)
   236  			} else {
   237  				assert.Error(t, err)
   238  				assert.True(t, strings.Contains(err.Error(), tt.errMessage), err.Error())
   239  			}
   240  		})
   241  	}
   242  }
   243  
   244  func TestMysqlIssueDiagnoses(t *testing.T) {
   245  	cfg := &config.VTGRConfig{BootstrapGroupSize: diagnoseGroupSize, MinNumReplica: 2, BackoffErrorWaitTimeSeconds: 1, BootstrapWaitTimeSeconds: 1}
   246  	disableProtectionCfg := &config.VTGRConfig{BootstrapGroupSize: diagnoseGroupSize, MinNumReplica: 2, DisableReadOnlyProtection: true, BackoffErrorWaitTimeSeconds: 1, BootstrapWaitTimeSeconds: 1}
   247  	heartbeatThreshold = 10
   248  	defer func() {
   249  		heartbeatThreshold = math.MaxInt64
   250  	}()
   251  	type data struct {
   252  		alias       string
   253  		groupName   string
   254  		readOnly    bool
   255  		checkResult int
   256  		groupInput  []db.TestGroupState
   257  		ttype       topodatapb.TabletType
   258  	}
   259  	var sqltests = []struct {
   260  		name          string
   261  		expected      DiagnoseType
   262  		errMessage    string
   263  		config        *config.VTGRConfig
   264  		inputs        []data
   265  		removeTablets []string // to simulate missing tablet in topology
   266  	}{
   267  		{name: "healthy shard", expected: DiagnoseTypeHealthy, errMessage: "", inputs: []data{
   268  			{alias0, "group", false, 0, []db.TestGroupState{
   269  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"},
   270  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "ONLINE", MemberRole: "SECONDARY"},
   271  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "ONLINE", MemberRole: "SECONDARY"},
   272  			}, topodatapb.TabletType_PRIMARY},
   273  			{alias1, "group", true, 0, []db.TestGroupState{
   274  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"},
   275  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "ONLINE", MemberRole: "SECONDARY"},
   276  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "ONLINE", MemberRole: "SECONDARY"},
   277  			}, topodatapb.TabletType_REPLICA},
   278  			{alias2, "group", true, 0, []db.TestGroupState{
   279  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"},
   280  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "ONLINE", MemberRole: "SECONDARY"},
   281  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "ONLINE", MemberRole: "SECONDARY"},
   282  			}, topodatapb.TabletType_REPLICA},
   283  		}},
   284  		{name: "recovering primary shard", expected: DiagnoseTypeBackoffError, errMessage: "", inputs: []data{
   285  			{alias0, "group", false, 0, []db.TestGroupState{
   286  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "RECOVERING", MemberRole: "PRIMARY"},
   287  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "RECOVERING", MemberRole: "SECONDARY"},
   288  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "RECOVERING", MemberRole: "SECONDARY"},
   289  			}, topodatapb.TabletType_PRIMARY},
   290  			{alias1, "group", true, 0, []db.TestGroupState{
   291  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "RECOVERING", MemberRole: "PRIMARY"},
   292  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "RECOVERING", MemberRole: "SECONDARY"},
   293  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "RECOVERING", MemberRole: "SECONDARY"},
   294  			}, topodatapb.TabletType_REPLICA},
   295  			{alias2, "group", true, 0, []db.TestGroupState{
   296  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "RECOVERING", MemberRole: "PRIMARY"},
   297  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "RECOVERING", MemberRole: "SECONDARY"},
   298  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "RECOVERING", MemberRole: "SECONDARY"},
   299  			}, topodatapb.TabletType_REPLICA},
   300  		}},
   301  		{name: "no group in shard", expected: DiagnoseTypeShardHasNoGroup, errMessage: "", inputs: []data{
   302  			{alias0, "", true, 0, []db.TestGroupState{
   303  				{MemberHost: "", MemberPort: "", MemberState: "OFFLINE", MemberRole: ""},
   304  			}, topodatapb.TabletType_REPLICA},
   305  			{alias1, "", true, 0, []db.TestGroupState{
   306  				{MemberHost: "", MemberPort: "", MemberState: "OFFLINE", MemberRole: ""},
   307  			}, topodatapb.TabletType_REPLICA},
   308  			{alias2, "", true, 0, []db.TestGroupState{
   309  				{MemberHost: "", MemberPort: "", MemberState: "OFFLINE", MemberRole: ""},
   310  			}, topodatapb.TabletType_REPLICA},
   311  		}},
   312  		{name: "fail to bootstrap with incorrect number of nodes", expected: DiagnoseTypeError, errMessage: "fail to diagnose ShardHasNoGroup with 3 nodes", inputs: []data{
   313  			{alias0, "", true, 0, []db.TestGroupState{
   314  				{MemberHost: "", MemberPort: "", MemberState: "OFFLINE", MemberRole: ""},
   315  			}, topodatapb.TabletType_REPLICA},
   316  			{alias1, "", true, 0, []db.TestGroupState{
   317  				{MemberHost: "", MemberPort: "", MemberState: "OFFLINE", MemberRole: ""},
   318  			}, topodatapb.TabletType_REPLICA},
   319  			{alias2, "", true, 0, []db.TestGroupState{
   320  				{MemberHost: "", MemberPort: "", MemberState: "OFFLINE", MemberRole: ""},
   321  			}, topodatapb.TabletType_REPLICA},
   322  		}, config: &config.VTGRConfig{BootstrapGroupSize: 2, MinNumReplica: 2, BackoffErrorWaitTimeSeconds: 1, BootstrapWaitTimeSeconds: 1}},
   323  		{name: "unreachable node", expected: DiagnoseTypeBackoffError, errMessage: "", inputs: []data{
   324  			{alias0, "group", false, 0, []db.TestGroupState{
   325  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"},
   326  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "UNREACHABLE", MemberRole: "SECONDARY"},
   327  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "ONLINE", MemberRole: "SECONDARY"},
   328  			}, topodatapb.TabletType_PRIMARY},
   329  			{alias1, "group", true, 0, []db.TestGroupState{
   330  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "UNREACHABLE", MemberRole: "SECONDARY"},
   331  			}, topodatapb.TabletType_REPLICA},
   332  			{alias2, "group", true, 0, []db.TestGroupState{
   333  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"},
   334  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "UNREACHABLE", MemberRole: "SECONDARY"},
   335  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "ONLINE", MemberRole: "SECONDARY"},
   336  			}, topodatapb.TabletType_REPLICA},
   337  		}},
   338  		{name: "mysql and tablet has different primary", expected: DiagnoseTypeWrongPrimaryTablet, errMessage: "", inputs: []data{ // vtgr should failover vttablet
   339  			{alias0, "group", false, 0, []db.TestGroupState{
   340  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"},
   341  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "ONLINE", MemberRole: "SECONDARY"},
   342  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "ONLINE", MemberRole: "SECONDARY"},
   343  			}, topodatapb.TabletType_REPLICA},
   344  			{alias1, "group", true, 0, []db.TestGroupState{
   345  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"},
   346  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "ONLINE", MemberRole: "SECONDARY"},
   347  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "ONLINE", MemberRole: "SECONDARY"},
   348  			}, topodatapb.TabletType_PRIMARY},
   349  			{alias2, "group", true, 0, []db.TestGroupState{
   350  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"},
   351  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "ONLINE", MemberRole: "SECONDARY"},
   352  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "ONLINE", MemberRole: "SECONDARY"},
   353  			}, topodatapb.TabletType_REPLICA},
   354  		}},
   355  		{name: "mysql primary out of topology", expected: DiagnoseTypeUnreachablePrimary, errMessage: "", inputs: []data{ // vtgr should failover mysql
   356  			{alias0, "group", false, 0, []db.TestGroupState{
   357  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"},
   358  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "ONLINE", MemberRole: "SECONDARY"},
   359  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "ONLINE", MemberRole: "SECONDARY"},
   360  			}, topodatapb.TabletType_REPLICA},
   361  			{alias1, "group", true, 0, []db.TestGroupState{
   362  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"},
   363  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "ONLINE", MemberRole: "SECONDARY"},
   364  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "ONLINE", MemberRole: "SECONDARY"},
   365  			}, topodatapb.TabletType_PRIMARY},
   366  			{alias2, "group", true, 0, []db.TestGroupState{
   367  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"},
   368  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "ONLINE", MemberRole: "SECONDARY"},
   369  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "ONLINE", MemberRole: "SECONDARY"},
   370  			}, topodatapb.TabletType_REPLICA},
   371  		}, removeTablets: []string{alias0}},
   372  		{name: "one error node", expected: DiagnoseTypeUnconnectedReplica, errMessage: "", inputs: []data{
   373  			{alias0, "group", false, 0, []db.TestGroupState{
   374  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"},
   375  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "ERROR", MemberRole: "SECONDARY"},
   376  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "ONLINE", MemberRole: "SECONDARY"},
   377  			}, topodatapb.TabletType_PRIMARY},
   378  			{alias1, "group", true, 0, []db.TestGroupState{
   379  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "ERROR", MemberRole: "SECONDARY"},
   380  			}, topodatapb.TabletType_REPLICA},
   381  			{alias2, "group", true, 0, []db.TestGroupState{
   382  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"},
   383  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "ERROR", MemberRole: "SECONDARY"},
   384  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "ONLINE", MemberRole: "SECONDARY"},
   385  			}, topodatapb.TabletType_REPLICA},
   386  		}},
   387  		{name: "inactive group with divergent state", expected: DiagnoseTypeShardHasInactiveGroup, errMessage: "", inputs: []data{
   388  			{alias0, "group", true, 11, []db.TestGroupState{
   389  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "OFFLINE", MemberRole: "SECONDARY"},
   390  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "ERROR", MemberRole: "SECONDARY"},
   391  			}, topodatapb.TabletType_PRIMARY},
   392  			{alias1, "group", true, 11, []db.TestGroupState{
   393  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "OFFLINE", MemberRole: ""},
   394  			}, topodatapb.TabletType_REPLICA},
   395  			{alias2, "group", true, 11, []db.TestGroupState{
   396  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "OFFLINE", MemberRole: ""},
   397  			}, topodatapb.TabletType_REPLICA},
   398  		}},
   399  		{name: "two error node", expected: DiagnoseTypeInsufficientGroupSize, errMessage: "", inputs: []data{
   400  			{alias0, "group", false, 0, []db.TestGroupState{
   401  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"},
   402  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "ERROR", MemberRole: "SECONDARY"},
   403  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "ERROR", MemberRole: "SECONDARY"},
   404  			}, topodatapb.TabletType_PRIMARY},
   405  			{alias1, "group", true, 0, []db.TestGroupState{
   406  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "ERROR", MemberRole: "SECONDARY"},
   407  			}, topodatapb.TabletType_REPLICA},
   408  			{alias2, "group", true, 0, []db.TestGroupState{
   409  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "ERROR", MemberRole: "SECONDARY"},
   410  			}, topodatapb.TabletType_REPLICA},
   411  		}},
   412  		{name: "insufficient group member", expected: DiagnoseTypeInsufficientGroupSize, errMessage: "", inputs: []data{
   413  			{alias0, "group", false, 0, []db.TestGroupState{
   414  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"},
   415  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "RECOVERING", MemberRole: "SECONDARY"},
   416  			}, topodatapb.TabletType_PRIMARY},
   417  			{alias1, "group", true, 0, []db.TestGroupState{}, topodatapb.TabletType_REPLICA},
   418  			{alias2, "group", true, 0, []db.TestGroupState{
   419  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"},
   420  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "RECOVERING", MemberRole: "SECONDARY"},
   421  			}, topodatapb.TabletType_REPLICA},
   422  		}},
   423  		{name: "unconnected node", expected: DiagnoseTypeBackoffError, errMessage: "", inputs: []data{
   424  			{alias0, "group", true, 0, []db.TestGroupState{
   425  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"},
   426  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "UNREACHABLE", MemberRole: "SECONDARY"},
   427  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "RECOVERING", MemberRole: "SECONDARY"},
   428  			}, topodatapb.TabletType_PRIMARY},
   429  			{alias1, "group", true, 0, []db.TestGroupState{
   430  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"},
   431  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "UNREACHABLE", MemberRole: "SECONDARY"},
   432  			}, topodatapb.TabletType_REPLICA},
   433  			{alias2, "group", true, 0, []db.TestGroupState{
   434  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"},
   435  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "UNREACHABLE", MemberRole: "SECONDARY"},
   436  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "RECOVERING", MemberRole: "SECONDARY"},
   437  			}, topodatapb.TabletType_REPLICA},
   438  		}},
   439  		{name: "unreachable primary", expected: DiagnoseTypeBackoffError, errMessage: "", inputs: []data{
   440  			{alias0, "group", false, 0, []db.TestGroupState{
   441  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "UNREACHABLE", MemberRole: "PRIMARY"},
   442  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "ONLINE", MemberRole: "SECONDARY"},
   443  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "RECOVERING", MemberRole: "SECONDARY"},
   444  			}, topodatapb.TabletType_PRIMARY},
   445  			{alias1, "group", true, 0, []db.TestGroupState{
   446  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "UNREACHABLE", MemberRole: "PRIMARY"},
   447  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "ONLINE", MemberRole: "SECONDARY"},
   448  			}, topodatapb.TabletType_REPLICA},
   449  			{alias2, "group", true, 0, []db.TestGroupState{
   450  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "UNREACHABLE", MemberRole: "PRIMARY"},
   451  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "ONLINE", MemberRole: "SECONDARY"},
   452  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "RECOVERING", MemberRole: "SECONDARY"},
   453  			}, topodatapb.TabletType_REPLICA},
   454  		}},
   455  		{name: "more than one group name", expected: DiagnoseTypeError, errMessage: "fail to refreshSQLGroup: group has split brain", inputs: []data{ // vtgr should raise error
   456  			{alias0, "group", false, 0, []db.TestGroupState{
   457  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"},
   458  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "ONLINE", MemberRole: "SECONDARY"},
   459  			}, topodatapb.TabletType_PRIMARY},
   460  			{alias1, "group_xxx", false, 0, []db.TestGroupState{
   461  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "ONLINE", MemberRole: "PRIMARY"},
   462  			}, topodatapb.TabletType_REPLICA},
   463  			{alias2, "group", true, 0, []db.TestGroupState{
   464  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"},
   465  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "ONLINE", MemberRole: "SECONDARY"},
   466  			}, topodatapb.TabletType_REPLICA},
   467  		}},
   468  		{name: "different primary", expected: DiagnoseTypeError, errMessage: "fail to refreshSQLGroup: group has split brain", inputs: []data{ // vtgr should raise error
   469  			{alias0, "group", false, 0, []db.TestGroupState{
   470  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"},
   471  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "ONLINE", MemberRole: "SECONDARY"},
   472  			}, topodatapb.TabletType_PRIMARY},
   473  			{alias1, "group", false, 0, []db.TestGroupState{
   474  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "ONLINE", MemberRole: "PRIMARY"},
   475  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "ONLINE", MemberRole: "SECONDARY"},
   476  			}, topodatapb.TabletType_REPLICA},
   477  			{alias2, "group", true, 0, []db.TestGroupState{
   478  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"},
   479  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "ONLINE", MemberRole: "SECONDARY"},
   480  			}, topodatapb.TabletType_REPLICA},
   481  		}},
   482  		{name: "insufficient members in group", expected: DiagnoseTypeInsufficientGroupSize, errMessage: "", inputs: []data{
   483  			{alias0, "group", false, 0, []db.TestGroupState{
   484  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"},
   485  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "RECOVERING", MemberRole: "SECONDARY"},
   486  			}, topodatapb.TabletType_PRIMARY},
   487  			{alias1, "group", true, 0, []db.TestGroupState{
   488  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"},
   489  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "RECOVERING", MemberRole: "SECONDARY"},
   490  			}, topodatapb.TabletType_REPLICA},
   491  			{alias2, "group", true, 0, []db.TestGroupState{
   492  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "ERROR", MemberRole: "SECONDARY"},
   493  			}, topodatapb.TabletType_REPLICA},
   494  		}},
   495  		// the shard has insufficient member, but the primary is already read_only
   496  		// we should try to connect the replica node
   497  		{name: "insufficient members in read only shard", expected: DiagnoseTypeUnconnectedReplica, errMessage: "", inputs: []data{
   498  			{alias0, "group", true, 0, []db.TestGroupState{
   499  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"},
   500  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "RECOVERING", MemberRole: "SECONDARY"},
   501  			}, topodatapb.TabletType_PRIMARY},
   502  			{alias1, "group", true, 0, []db.TestGroupState{
   503  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"},
   504  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "RECOVERING", MemberRole: "SECONDARY"},
   505  			}, topodatapb.TabletType_REPLICA},
   506  			{alias2, "group", true, 0, []db.TestGroupState{
   507  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "ERROR", MemberRole: "SECONDARY"},
   508  			}, topodatapb.TabletType_REPLICA},
   509  		}},
   510  		{name: "insufficient members in group with disable read only protection", expected: DiagnoseTypeUnconnectedReplica, errMessage: "", config: disableProtectionCfg, inputs: []data{
   511  			{alias0, "group", false, 0, []db.TestGroupState{
   512  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"},
   513  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "RECOVERING", MemberRole: "SECONDARY"},
   514  			}, topodatapb.TabletType_PRIMARY},
   515  			{alias1, "group", true, 0, []db.TestGroupState{
   516  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"},
   517  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "RECOVERING", MemberRole: "SECONDARY"},
   518  			}, topodatapb.TabletType_REPLICA},
   519  			{alias2, "group", true, 0, []db.TestGroupState{
   520  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "ERROR", MemberRole: "SECONDARY"},
   521  			}, topodatapb.TabletType_REPLICA},
   522  		}},
   523  		{name: "read only with disable read only protection", expected: DiagnoseTypeReadOnlyShard, errMessage: "", config: disableProtectionCfg, inputs: []data{
   524  			{alias0, "group", true, 0, []db.TestGroupState{
   525  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"},
   526  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "RECOVERING", MemberRole: "SECONDARY"},
   527  			}, topodatapb.TabletType_PRIMARY},
   528  			{alias1, "group", true, 0, []db.TestGroupState{
   529  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"},
   530  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "RECOVERING", MemberRole: "SECONDARY"},
   531  			}, topodatapb.TabletType_REPLICA},
   532  			{alias2, "group", true, 0, []db.TestGroupState{
   533  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "ERROR", MemberRole: "SECONDARY"},
   534  			}, topodatapb.TabletType_REPLICA},
   535  		}},
   536  		{name: "read only healthy shard", expected: DiagnoseTypeReadOnlyShard, errMessage: "", inputs: []data{
   537  			{alias0, "group", true, 0, []db.TestGroupState{
   538  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"},
   539  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "ONLINE", MemberRole: "SECONDARY"},
   540  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "ONLINE", MemberRole: "SECONDARY"},
   541  			}, topodatapb.TabletType_PRIMARY},
   542  			{alias1, "group", true, 0, []db.TestGroupState{
   543  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"},
   544  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "ONLINE", MemberRole: "SECONDARY"},
   545  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "ONLINE", MemberRole: "SECONDARY"},
   546  			}, topodatapb.TabletType_REPLICA},
   547  			{alias2, "group", true, 0, []db.TestGroupState{
   548  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"},
   549  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "ONLINE", MemberRole: "SECONDARY"},
   550  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "ONLINE", MemberRole: "SECONDARY"},
   551  			}, topodatapb.TabletType_REPLICA},
   552  		}},
   553  		{name: "inconsistent member state", expected: DiagnoseTypeBackoffError, errMessage: "", inputs: []data{
   554  			{alias0, "group", true, 11, []db.TestGroupState{
   555  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "OFFLINE", MemberRole: ""},
   556  			}, topodatapb.TabletType_REPLICA},
   557  			{alias1, "group", true, 12, []db.TestGroupState{
   558  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "SECONDARY"},
   559  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "ONLINE", MemberRole: "PRIMARY"},
   560  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "ONLINE", MemberRole: "SECONDARY"},
   561  			}, topodatapb.TabletType_PRIMARY},
   562  			{alias2, "group", true, math.MaxInt64, []db.TestGroupState{
   563  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "OFFLINE", MemberRole: ""},
   564  			}, topodatapb.TabletType_REPLICA},
   565  		}},
   566  		{name: "network partition", expected: DiagnoseTypeBackoffError, errMessage: "", inputs: []data{
   567  			{alias0, "group", true, 0, []db.TestGroupState{
   568  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"},
   569  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "UNREACHABLE", MemberRole: "SECONDARY"},
   570  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "UNREACHABLE", MemberRole: "SECONDARY"},
   571  			}, topodatapb.TabletType_PRIMARY},
   572  			{alias1, "group", true, 0, []db.TestGroupState{
   573  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "OFFLINE", MemberRole: "SECONDARY"},
   574  			}, topodatapb.TabletType_REPLICA},
   575  			{alias2, "group", true, 0, []db.TestGroupState{
   576  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "OFFLINE", MemberRole: "SECONDARY"},
   577  			}, topodatapb.TabletType_REPLICA},
   578  		}},
   579  		{name: "start bootstrap in progress", expected: DiagnoseTypeBootstrapBackoff, errMessage: "", inputs: []data{
   580  			{alias0, "group", true, 0, []db.TestGroupState{
   581  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "RECOVERING", MemberRole: "SECONDARY"},
   582  			}, topodatapb.TabletType_REPLICA},
   583  			{alias1, "", true, 0, []db.TestGroupState{}, topodatapb.TabletType_REPLICA},
   584  			{alias2, "", true, 0, []db.TestGroupState{
   585  				{MemberHost: "", MemberPort: "", MemberState: "OFFLINE", MemberRole: ""},
   586  			}, topodatapb.TabletType_REPLICA},
   587  		}},
   588  	}
   589  	for _, tt := range sqltests {
   590  		t.Run(tt.name, func(t *testing.T) {
   591  			ctrl := gomock.NewController(t)
   592  			defer ctrl.Finish()
   593  			ts := NewMockGRTopo(ctrl)
   594  			tmc := NewMockGRTmcClient(ctrl)
   595  			dbAgent := db.NewMockAgent(ctrl)
   596  			tablets := make(map[string]*topo.TabletInfo)
   597  			expected := tt.expected
   598  			inputMap := make(map[string]testGroupInput)
   599  			if tt.config == nil {
   600  				tt.config = cfg
   601  			}
   602  			conf := tt.config
   603  			hasPrimary := false
   604  			for i, input := range tt.inputs {
   605  				id := uint32(i)
   606  				//id := uint32(testPort0 + i)
   607  				tablet := buildTabletInfo(id, testHost, testPort0+i, input.ttype, time.Now())
   608  				tablets[tablet.AliasString()] = tablet
   609  				inputMap[input.alias] = testGroupInput{
   610  					input.groupName,
   611  					input.readOnly,
   612  					input.checkResult,
   613  					input.groupInput,
   614  					nil,
   615  				}
   616  				if tablet.Type == topodatapb.TabletType_PRIMARY {
   617  					si := &topo.ShardInfo{
   618  						Shard: &topodatapb.Shard{
   619  							PrimaryAlias: tablet.Alias,
   620  						},
   621  					}
   622  					ts.
   623  						EXPECT().
   624  						GetShard(gomock.Any(), gomock.Eq("ks"), gomock.Eq("0")).
   625  						Return(si, nil)
   626  					hasPrimary = true
   627  				}
   628  				dbAgent.
   629  					EXPECT().
   630  					FetchGroupView(gomock.Any(), gomock.Any()).
   631  					DoAndReturn(func(alias string, target *inst.InstanceKey) (*db.GroupView, error) {
   632  						if target.Hostname == "" || target.Port == 0 {
   633  							return nil, errors.New("invalid mysql instance key")
   634  						}
   635  						s := inputMap[alias]
   636  						view := db.BuildGroupView(alias, s.groupName, target.Hostname, target.Port, s.readOnly, s.checkResult, s.groupState)
   637  						return view, nil
   638  					}).
   639  					AnyTimes()
   640  			}
   641  			if !hasPrimary {
   642  				ts.
   643  					EXPECT().
   644  					GetShard(gomock.Any(), gomock.Eq("ks"), gomock.Eq("0")).
   645  					Return(&topo.ShardInfo{Shard: &topodatapb.Shard{}}, nil)
   646  			}
   647  			for _, tid := range tt.removeTablets {
   648  				delete(tablets, tid)
   649  			}
   650  			ts.
   651  				EXPECT().
   652  				GetTabletMapForShardByCell(gomock.Any(), gomock.Eq("ks"), gomock.Eq("0"), gomock.Any()).
   653  				Return(tablets, nil)
   654  			tmc.EXPECT().Ping(gomock.Any(), gomock.Any()).Return(nil).AnyTimes()
   655  
   656  			ctx := context.Background()
   657  			shard := NewGRShard("ks", "0", nil, tmc, ts, dbAgent, conf, testPort0, true)
   658  			shard.refreshTabletsInShardLocked(ctx)
   659  			diagnose, err := shard.Diagnose(ctx)
   660  			assert.Equal(t, expected, diagnose)
   661  			if tt.errMessage == "" {
   662  				assert.NoError(t, err)
   663  			} else {
   664  				assert.Error(t, err)
   665  				assert.True(t, strings.Contains(err.Error(), tt.errMessage), err.Error())
   666  			}
   667  		})
   668  	}
   669  }
   670  
   671  func TestDiagnoseWithInactive(t *testing.T) {
   672  	cfg := &config.VTGRConfig{BootstrapGroupSize: diagnoseGroupSize, MinNumReplica: 2, BackoffErrorWaitTimeSeconds: 1, BootstrapWaitTimeSeconds: 1}
   673  	type data struct {
   674  		alias      string
   675  		groupName  string
   676  		readOnly   bool
   677  		pingable   bool
   678  		groupInput []db.TestGroupState
   679  		ttype      topodatapb.TabletType
   680  	}
   681  	var sqltests = []struct {
   682  		name                 string
   683  		expected             DiagnoseType
   684  		errMessage           string
   685  		config               *config.VTGRConfig
   686  		inputs               []data
   687  		rebootstrapGroupSize int
   688  		removeTablets        []string // to simulate missing tablet in topology
   689  	}{
   690  		// although mysql and vitess has different primary, but since this is an active shard, VTGR won't fix that
   691  		{name: "mysql and tablet has different primary", expected: DiagnoseTypeHealthy, errMessage: "", inputs: []data{
   692  			{alias0, "group", true, true, []db.TestGroupState{
   693  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"},
   694  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "ONLINE", MemberRole: "SECONDARY"},
   695  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "ONLINE", MemberRole: "SECONDARY"},
   696  			}, topodatapb.TabletType_REPLICA},
   697  			{alias1, "group", true, true, []db.TestGroupState{
   698  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"},
   699  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "ONLINE", MemberRole: "SECONDARY"},
   700  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "ONLINE", MemberRole: "SECONDARY"},
   701  			}, topodatapb.TabletType_PRIMARY},
   702  			{alias2, "group", true, true, []db.TestGroupState{
   703  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"},
   704  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "ONLINE", MemberRole: "SECONDARY"},
   705  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "ONLINE", MemberRole: "SECONDARY"},
   706  			}, topodatapb.TabletType_REPLICA},
   707  		}},
   708  		{name: "different primary with unconnected node", expected: DiagnoseTypeUnconnectedReplica, errMessage: "", inputs: []data{
   709  			{alias0, "group", true, true, []db.TestGroupState{
   710  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"},
   711  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "ONLINE", MemberRole: "SECONDARY"},
   712  			}, topodatapb.TabletType_REPLICA},
   713  			{alias1, "group", true, true, []db.TestGroupState{
   714  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"},
   715  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "ONLINE", MemberRole: "SECONDARY"},
   716  			}, topodatapb.TabletType_PRIMARY},
   717  			{alias2, "group", true, true, []db.TestGroupState{
   718  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "OFFLINE", MemberRole: ""},
   719  			}, topodatapb.TabletType_REPLICA},
   720  		}},
   721  		{name: "primary tablet is not pingable", expected: DiagnoseTypeHealthy, errMessage: "", inputs: []data{
   722  			{alias0, "group", true, false, []db.TestGroupState{
   723  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"},
   724  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "ONLINE", MemberRole: "SECONDARY"},
   725  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "ONLINE", MemberRole: "SECONDARY"},
   726  			}, topodatapb.TabletType_PRIMARY},
   727  			{alias1, "group", true, true, []db.TestGroupState{
   728  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"},
   729  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "ONLINE", MemberRole: "SECONDARY"},
   730  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "ONLINE", MemberRole: "SECONDARY"},
   731  			}, topodatapb.TabletType_REPLICA},
   732  			{alias2, "group", true, true, []db.TestGroupState{
   733  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"},
   734  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "ONLINE", MemberRole: "SECONDARY"},
   735  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "ONLINE", MemberRole: "SECONDARY"},
   736  			}, topodatapb.TabletType_REPLICA},
   737  		}},
   738  		// This is a read only shard, but since it's an inactive shard we will diagnose it as healthy
   739  		{name: "read only healthy shard", expected: DiagnoseTypeHealthy, errMessage: "", inputs: []data{
   740  			{alias0, "group", true, true, []db.TestGroupState{
   741  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"},
   742  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "ONLINE", MemberRole: "SECONDARY"},
   743  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "ONLINE", MemberRole: "SECONDARY"},
   744  			}, topodatapb.TabletType_PRIMARY},
   745  			{alias1, "group", true, true, []db.TestGroupState{
   746  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"},
   747  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "ONLINE", MemberRole: "SECONDARY"},
   748  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "ONLINE", MemberRole: "SECONDARY"},
   749  			}, topodatapb.TabletType_REPLICA},
   750  			{alias2, "group", true, true, []db.TestGroupState{
   751  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"},
   752  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "ONLINE", MemberRole: "SECONDARY"},
   753  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "ONLINE", MemberRole: "SECONDARY"},
   754  			}, topodatapb.TabletType_REPLICA},
   755  		}},
   756  		{name: "writable shard", expected: DiagnoseTypeInsufficientGroupSize, errMessage: "", inputs: []data{
   757  			{alias0, "group", false, true, []db.TestGroupState{
   758  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"},
   759  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "ONLINE", MemberRole: "SECONDARY"},
   760  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "ONLINE", MemberRole: "SECONDARY"},
   761  			}, topodatapb.TabletType_PRIMARY},
   762  			{alias1, "group", true, true, []db.TestGroupState{
   763  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"},
   764  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "ONLINE", MemberRole: "SECONDARY"},
   765  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "ONLINE", MemberRole: "SECONDARY"},
   766  			}, topodatapb.TabletType_REPLICA},
   767  			{alias2, "group", true, true, []db.TestGroupState{
   768  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"},
   769  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "ONLINE", MemberRole: "SECONDARY"},
   770  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "ONLINE", MemberRole: "SECONDARY"},
   771  			}, topodatapb.TabletType_REPLICA},
   772  		}},
   773  		{name: "error when there are only two nodes", expected: DiagnoseTypeError, errMessage: "fail to diagnose ShardHasInactiveGroup with 3 nodes expecting 2", inputs: []data{
   774  			{alias0, "group", true, true, []db.TestGroupState{
   775  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "OFFLINE", MemberRole: ""},
   776  			}, topodatapb.TabletType_REPLICA},
   777  			{alias1, "group", true, true, []db.TestGroupState{
   778  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "OFFLINE", MemberRole: ""},
   779  			}, topodatapb.TabletType_REPLICA},
   780  			{alias2, "group", true, true, []db.TestGroupState{
   781  				{MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "OFFLINE", MemberRole: ""},
   782  			}, topodatapb.TabletType_REPLICA},
   783  		}, rebootstrapGroupSize: 2},
   784  	}
   785  	for _, tt := range sqltests {
   786  		t.Run(tt.name, func(t *testing.T) {
   787  			ctrl := gomock.NewController(t)
   788  			defer ctrl.Finish()
   789  			ctx := context.Background()
   790  			ts := memorytopo.NewServer("test_cell")
   791  			defer ts.Close()
   792  			ts.CreateKeyspace(ctx, "ks", &topodatapb.Keyspace{})
   793  			ts.CreateShard(ctx, "ks", "0")
   794  			tmc := NewMockGRTmcClient(ctrl)
   795  			dbAgent := db.NewMockAgent(ctrl)
   796  			expected := tt.expected
   797  			inputMap := make(map[string]testGroupInput)
   798  			pingable := make(map[string]bool)
   799  			if tt.config == nil {
   800  				tt.config = cfg
   801  			}
   802  			conf := tt.config
   803  			for i, input := range tt.inputs {
   804  				tablet := buildTabletInfo(uint32(i), testHost, testPort0+i, input.ttype, time.Now())
   805  				testutil.AddTablet(ctx, t, ts, tablet.Tablet, nil)
   806  				inputMap[input.alias] = testGroupInput{
   807  					input.groupName,
   808  					input.readOnly,
   809  					0,
   810  					input.groupInput,
   811  					nil,
   812  				}
   813  				pingable[input.alias] = input.pingable
   814  				if tablet.Type == topodatapb.TabletType_PRIMARY {
   815  					ts.UpdateShardFields(ctx, "ks", "0", func(si *topo.ShardInfo) error {
   816  						si.PrimaryAlias = tablet.Alias
   817  						return nil
   818  					})
   819  				}
   820  				dbAgent.
   821  					EXPECT().
   822  					FetchGroupView(gomock.Any(), gomock.Any()).
   823  					DoAndReturn(func(alias string, target *inst.InstanceKey) (*db.GroupView, error) {
   824  						if target.Hostname == "" || target.Port == 0 {
   825  							return nil, errors.New("invalid mysql instance key")
   826  						}
   827  						s := inputMap[alias]
   828  						view := db.BuildGroupView(alias, s.groupName, target.Hostname, target.Port, s.readOnly, s.checkResult, s.groupState)
   829  						return view, nil
   830  					}).
   831  					AnyTimes()
   832  				tmc.
   833  					EXPECT().
   834  					Ping(gomock.Any(), &topodatapb.Tablet{
   835  						Alias:                tablet.Alias,
   836  						Hostname:             tablet.Hostname,
   837  						Keyspace:             tablet.Keyspace,
   838  						Shard:                tablet.Shard,
   839  						Type:                 tablet.Type,
   840  						Tags:                 tablet.Tags,
   841  						MysqlHostname:        tablet.MysqlHostname,
   842  						MysqlPort:            tablet.MysqlPort,
   843  						PrimaryTermStartTime: tablet.PrimaryTermStartTime,
   844  					}).
   845  					DoAndReturn(func(_ context.Context, t *topodatapb.Tablet) error {
   846  						if !pingable[tablet.Alias.String()] {
   847  							return errors.New("unreachable")
   848  						}
   849  						return nil
   850  					}).
   851  					AnyTimes()
   852  			}
   853  			shard := NewGRShard("ks", "0", nil, tmc, ts, dbAgent, conf, testPort0, false)
   854  			if tt.rebootstrapGroupSize != 0 {
   855  				shard.OverrideRebootstrapGroupSize(tt.rebootstrapGroupSize)
   856  			}
   857  			shard.refreshTabletsInShardLocked(ctx)
   858  			diagnose, err := shard.Diagnose(ctx)
   859  			assert.Equal(t, expected, diagnose)
   860  			if tt.errMessage == "" {
   861  				assert.NoError(t, err)
   862  			} else {
   863  				assert.Error(t, err)
   864  				assert.True(t, strings.Contains(err.Error(), tt.errMessage), err.Error())
   865  			}
   866  		})
   867  	}
   868  }
   869  
   870  func TestGroupStatusRecorder(t *testing.T) {
   871  	r := &groupGTIDRecorder{}
   872  
   873  	err := r.recordGroupStatus("group1", true)
   874  	assert.NoError(t, err)
   875  	assert.Equal(t, r.name, "group1")
   876  	assert.Equal(t, r.hasActive, true)
   877  
   878  	err = r.recordGroupStatus("group2", false)
   879  	assert.Error(t, err, "group has more than one group name")
   880  	assert.Equal(t, r.name, "group1")
   881  
   882  	err = r.recordGroupStatus("group1", false)
   883  	assert.NoError(t, err)
   884  	assert.Equal(t, r.name, "group1")
   885  	assert.Equal(t, r.hasActive, true)
   886  
   887  	pos1, err := mysql.ParsePosition(mysql.Mysql56FlavorID, "264a8230-67d2-11eb-acdd-0a8d91f24125:1-22:1000019-1000021")
   888  	assert.NoError(t, err)
   889  	inst1 := &grInstance{alias: "alias1"}
   890  	r.recordGroupGTIDs(pos1.GTIDSet, inst1)
   891  	pos2, err := mysql.ParsePosition(mysql.Mysql56FlavorID, "264a8230-67d2-11eb-acdd-0a8d91f24125:1-1000021")
   892  	assert.NoError(t, err)
   893  	inst2 := &grInstance{alias: "alias2"}
   894  	r.recordGroupGTIDs(pos2.GTIDSet, inst2)
   895  	assert.Equal(t, len(r.gtidWithInstances), 2)
   896  	assert.Equal(t, r.gtidWithInstances[0].instance, inst1)
   897  	assert.Equal(t, pos1.GTIDSet.Equal(r.gtidWithInstances[0].gtids), true)
   898  	assert.Equal(t, r.gtidWithInstances[1].instance, inst2)
   899  	assert.Equal(t, pos2.GTIDSet.Equal(r.gtidWithInstances[1].gtids), true)
   900  }