vitess.io/vitess@v0.16.2/go/vt/vtgr/controller/diagnose_test.go (about) 1 /* 2 Copyright 2021 The Vitess Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package controller 18 19 import ( 20 "context" 21 "errors" 22 "math" 23 "os" 24 "strconv" 25 "strings" 26 "testing" 27 "time" 28 29 "github.com/golang/mock/gomock" 30 "github.com/stretchr/testify/assert" 31 32 "vitess.io/vitess/go/mysql" 33 "vitess.io/vitess/go/vt/topo" 34 "vitess.io/vitess/go/vt/topo/memorytopo" 35 "vitess.io/vitess/go/vt/vtctl/grpcvtctldserver/testutil" 36 "vitess.io/vitess/go/vt/vtgr/config" 37 "vitess.io/vitess/go/vt/vtgr/db" 38 "vitess.io/vitess/go/vt/vtgr/inst" 39 40 topodatapb "vitess.io/vitess/go/vt/proto/topodata" 41 ) 42 43 const diagnoseGroupSize = 3 44 45 var ( 46 testHost, _ = os.Hostname() 47 alias0 = "test_cell-0000000000" 48 alias1 = "test_cell-0000000001" 49 alias2 = "test_cell-0000000002" 50 testPort0 = 17000 51 testPort1 = 17001 52 testPort2 = 17002 53 ) 54 55 type testGroupInput struct { 56 groupName string 57 readOnly bool 58 checkResult int 59 groupState []db.TestGroupState 60 gtid mysql.GTIDSet 61 } 62 63 func TestShardIsHealthy(t *testing.T) { 64 ctrl := gomock.NewController(t) 65 defer ctrl.Finish() 66 ctx := context.Background() 67 ts := memorytopo.NewServer("test_cell") 68 defer ts.Close() 69 ts.CreateKeyspace(ctx, "ks", &topodatapb.Keyspace{}) 70 ts.CreateShard(ctx, "ks", "0") 71 tmc := NewMockGRTmcClient(ctrl) 72 dbAgent := db.NewMockAgent(ctrl) 73 tablet1 := buildTabletInfo(uint32(testPort0), testHost, testPort0, topodatapb.TabletType_PRIMARY, time.Now()) 74 tablet2 := buildTabletInfo(uint32(testPort1), testHost, testPort1, topodatapb.TabletType_SPARE, time.Time{}) 75 tablet3 := buildTabletInfo(uint32(testPort2), testHost, testPort2, topodatapb.TabletType_REPLICA, time.Time{}) 76 testutil.AddTablet(ctx, t, ts, tablet1.Tablet, nil) 77 testutil.AddTablet(ctx, t, ts, tablet2.Tablet, nil) 78 testutil.AddTablet(ctx, t, ts, tablet3.Tablet, nil) 79 ts.UpdateShardFields(ctx, "ks", "0", func(si *topo.ShardInfo) error { 80 si.PrimaryAlias = tablet1.Alias 81 return nil 82 }) 83 dbAgent. 84 EXPECT(). 85 FetchGroupView(gomock.Any(), gomock.Any()). 86 DoAndReturn(func(alias string, target *inst.InstanceKey) (*db.GroupView, error) { 87 return db.BuildGroupView(alias, "group", testHost, testPort0, false, 0, []db.TestGroupState{ 88 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"}, 89 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "ONLINE", MemberRole: "SECONDARY"}, 90 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "ONLINE", MemberRole: "SECONDARY"}, 91 }), nil 92 }). 93 AnyTimes() 94 tmc.EXPECT().Ping(gomock.Any(), gomock.Any()).Return(nil).AnyTimes() 95 cfg := &config.VTGRConfig{BootstrapGroupSize: 3, MinNumReplica: 2, BackoffErrorWaitTimeSeconds: 1, BootstrapWaitTimeSeconds: 1} 96 shard := NewGRShard("ks", "0", nil, tmc, ts, dbAgent, cfg, testPort0, true) 97 shard.refreshTabletsInShardLocked(ctx) 98 diagnose, _ := shard.Diagnose(ctx) 99 assert.Equal(t, DiagnoseTypeHealthy, string(diagnose)) 100 } 101 102 func TestTabletIssueDiagnoses(t *testing.T) { 103 type data struct { 104 pingable bool 105 ttype topodatapb.TabletType 106 } 107 var tablettests = []struct { 108 name string 109 expected DiagnoseType 110 errMessage string 111 primaryAlias string 112 inputs []data 113 }{ 114 {name: "healthy shard", expected: DiagnoseTypeHealthy, errMessage: "", primaryAlias: "test_cell-0000017000", inputs: []data{ 115 {true, topodatapb.TabletType_PRIMARY}, 116 {true, topodatapb.TabletType_REPLICA}, 117 {true, topodatapb.TabletType_REPLICA}, 118 }}, 119 {name: "non primary tablet is not pingable", expected: DiagnoseTypeHealthy, errMessage: "", primaryAlias: "test_cell-0000017000", inputs: []data{ // vtgr should do nothing 120 {true, topodatapb.TabletType_PRIMARY}, 121 {false, topodatapb.TabletType_REPLICA}, 122 {false, topodatapb.TabletType_REPLICA}, 123 }}, 124 {name: "primary tablet is not pingable", expected: DiagnoseTypeUnreachablePrimary, errMessage: "", primaryAlias: "test_cell-0000017000", inputs: []data{ // vtgr should trigger a failover 125 {false, topodatapb.TabletType_PRIMARY}, 126 {true, topodatapb.TabletType_REPLICA}, 127 {true, topodatapb.TabletType_REPLICA}, 128 }}, 129 {name: "no primary tablet", expected: DiagnoseTypeWrongPrimaryTablet, errMessage: "", primaryAlias: "", inputs: []data{ // vtgr should create one based on mysql 130 {true, topodatapb.TabletType_REPLICA}, 131 {true, topodatapb.TabletType_REPLICA}, 132 {true, topodatapb.TabletType_REPLICA}, 133 }}, 134 {name: "wrong primary in tablet types", expected: DiagnoseTypeWrongPrimaryTablet, errMessage: "", primaryAlias: "test_cell-0000017001", inputs: []data{ // shard info returns differently comparing with tablet type 135 {true, topodatapb.TabletType_PRIMARY}, 136 {true, topodatapb.TabletType_REPLICA}, 137 {true, topodatapb.TabletType_REPLICA}, 138 }}, 139 {name: "mysql and vttablet has different primary", expected: DiagnoseTypeWrongPrimaryTablet, errMessage: "", primaryAlias: "test_cell-0000017001", inputs: []data{ // vtgr should fix vttablet 140 {true, topodatapb.TabletType_REPLICA}, 141 {true, topodatapb.TabletType_PRIMARY}, 142 {true, topodatapb.TabletType_REPLICA}, 143 }}, 144 {name: "unreachable wrong vttablet primary", expected: DiagnoseTypeWrongPrimaryTablet, errMessage: "", primaryAlias: "test_cell-0000017001", inputs: []data{ // vtgr should fix vttablet 145 {true, topodatapb.TabletType_REPLICA}, 146 {false, topodatapb.TabletType_PRIMARY}, 147 {true, topodatapb.TabletType_REPLICA}, 148 }}, 149 {name: "unreachable uninitialized primary vttablet", expected: DiagnoseTypeUnreachablePrimary, errMessage: "", inputs: []data{ // vtgr should failover 150 {false, topodatapb.TabletType_REPLICA}, 151 {true, topodatapb.TabletType_REPLICA}, 152 {true, topodatapb.TabletType_REPLICA}, 153 }}, 154 } 155 for _, tt := range tablettests { 156 t.Run(tt.name, func(t *testing.T) { 157 expected := tt.expected 158 ctrl := gomock.NewController(t) 159 defer ctrl.Finish() 160 ts := NewMockGRTopo(ctrl) 161 tmc := NewMockGRTmcClient(ctrl) 162 dbAgent := db.NewMockAgent(ctrl) 163 tablets := make(map[string]*topo.TabletInfo) 164 if tt.primaryAlias == "" { 165 ts. 166 EXPECT(). 167 GetShard(gomock.Any(), gomock.Eq("ks"), gomock.Eq("0")). 168 Return(&topo.ShardInfo{Shard: &topodatapb.Shard{}}, nil) 169 } 170 for i, input := range tt.inputs { 171 id := uint32(testPort0 + i) 172 tablet := buildTabletInfo(id, testHost, testPort0+i, input.ttype, time.Now()) 173 tablets[tablet.AliasString()] = tablet 174 var response = struct { 175 pingable bool 176 }{input.pingable} 177 if tt.primaryAlias == tablet.AliasString() { 178 si := &topo.ShardInfo{ 179 Shard: &topodatapb.Shard{ 180 PrimaryAlias: tablet.Alias, 181 }, 182 } 183 ts. 184 EXPECT(). 185 GetShard(gomock.Any(), gomock.Eq("ks"), gomock.Eq("0")). 186 Return(si, nil) 187 } 188 dbAgent. 189 EXPECT(). 190 FetchGroupView(gomock.Any(), gomock.Any()). 191 DoAndReturn(func(alias string, target *inst.InstanceKey) (*db.GroupView, error) { 192 if target.Hostname == "" || target.Port == 0 { 193 return nil, errors.New("invalid mysql instance key") 194 } 195 return db.BuildGroupView(alias, "group", testHost, testPort0, false, 0, []db.TestGroupState{ 196 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"}, 197 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "ONLINE", MemberRole: "SECONDARY"}, 198 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "ONLINE", MemberRole: "SECONDARY"}, 199 }), nil 200 }). 201 AnyTimes() 202 tmc. 203 EXPECT(). 204 Ping(gomock.Any(), &topodatapb.Tablet{ 205 Alias: tablet.Alias, 206 Hostname: tablet.Hostname, 207 Keyspace: tablet.Keyspace, 208 Shard: tablet.Shard, 209 Type: tablet.Type, 210 Tags: tablet.Tags, 211 MysqlHostname: tablet.MysqlHostname, 212 MysqlPort: tablet.MysqlPort, 213 PrimaryTermStartTime: tablet.PrimaryTermStartTime, 214 }). 215 DoAndReturn(func(_ context.Context, t *topodatapb.Tablet) error { 216 if !response.pingable { 217 return errors.New("unreachable") 218 } 219 return nil 220 }). 221 AnyTimes() 222 } 223 ts. 224 EXPECT(). 225 GetTabletMapForShardByCell(gomock.Any(), gomock.Eq("ks"), gomock.Eq("0"), gomock.Any()). 226 Return(tablets, nil) 227 228 ctx := context.Background() 229 cfg := &config.VTGRConfig{BootstrapGroupSize: diagnoseGroupSize, MinNumReplica: 2, BackoffErrorWaitTimeSeconds: 1, BootstrapWaitTimeSeconds: 1} 230 shard := NewGRShard("ks", "0", nil, tmc, ts, dbAgent, cfg, testPort0, true) 231 shard.refreshTabletsInShardLocked(ctx) 232 diagnose, err := shard.Diagnose(ctx) 233 assert.Equal(t, expected, diagnose) 234 if tt.errMessage == "" { 235 assert.NoError(t, err) 236 } else { 237 assert.Error(t, err) 238 assert.True(t, strings.Contains(err.Error(), tt.errMessage), err.Error()) 239 } 240 }) 241 } 242 } 243 244 func TestMysqlIssueDiagnoses(t *testing.T) { 245 cfg := &config.VTGRConfig{BootstrapGroupSize: diagnoseGroupSize, MinNumReplica: 2, BackoffErrorWaitTimeSeconds: 1, BootstrapWaitTimeSeconds: 1} 246 disableProtectionCfg := &config.VTGRConfig{BootstrapGroupSize: diagnoseGroupSize, MinNumReplica: 2, DisableReadOnlyProtection: true, BackoffErrorWaitTimeSeconds: 1, BootstrapWaitTimeSeconds: 1} 247 heartbeatThreshold = 10 248 defer func() { 249 heartbeatThreshold = math.MaxInt64 250 }() 251 type data struct { 252 alias string 253 groupName string 254 readOnly bool 255 checkResult int 256 groupInput []db.TestGroupState 257 ttype topodatapb.TabletType 258 } 259 var sqltests = []struct { 260 name string 261 expected DiagnoseType 262 errMessage string 263 config *config.VTGRConfig 264 inputs []data 265 removeTablets []string // to simulate missing tablet in topology 266 }{ 267 {name: "healthy shard", expected: DiagnoseTypeHealthy, errMessage: "", inputs: []data{ 268 {alias0, "group", false, 0, []db.TestGroupState{ 269 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"}, 270 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "ONLINE", MemberRole: "SECONDARY"}, 271 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "ONLINE", MemberRole: "SECONDARY"}, 272 }, topodatapb.TabletType_PRIMARY}, 273 {alias1, "group", true, 0, []db.TestGroupState{ 274 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"}, 275 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "ONLINE", MemberRole: "SECONDARY"}, 276 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "ONLINE", MemberRole: "SECONDARY"}, 277 }, topodatapb.TabletType_REPLICA}, 278 {alias2, "group", true, 0, []db.TestGroupState{ 279 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"}, 280 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "ONLINE", MemberRole: "SECONDARY"}, 281 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "ONLINE", MemberRole: "SECONDARY"}, 282 }, topodatapb.TabletType_REPLICA}, 283 }}, 284 {name: "recovering primary shard", expected: DiagnoseTypeBackoffError, errMessage: "", inputs: []data{ 285 {alias0, "group", false, 0, []db.TestGroupState{ 286 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "RECOVERING", MemberRole: "PRIMARY"}, 287 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "RECOVERING", MemberRole: "SECONDARY"}, 288 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "RECOVERING", MemberRole: "SECONDARY"}, 289 }, topodatapb.TabletType_PRIMARY}, 290 {alias1, "group", true, 0, []db.TestGroupState{ 291 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "RECOVERING", MemberRole: "PRIMARY"}, 292 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "RECOVERING", MemberRole: "SECONDARY"}, 293 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "RECOVERING", MemberRole: "SECONDARY"}, 294 }, topodatapb.TabletType_REPLICA}, 295 {alias2, "group", true, 0, []db.TestGroupState{ 296 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "RECOVERING", MemberRole: "PRIMARY"}, 297 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "RECOVERING", MemberRole: "SECONDARY"}, 298 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "RECOVERING", MemberRole: "SECONDARY"}, 299 }, topodatapb.TabletType_REPLICA}, 300 }}, 301 {name: "no group in shard", expected: DiagnoseTypeShardHasNoGroup, errMessage: "", inputs: []data{ 302 {alias0, "", true, 0, []db.TestGroupState{ 303 {MemberHost: "", MemberPort: "", MemberState: "OFFLINE", MemberRole: ""}, 304 }, topodatapb.TabletType_REPLICA}, 305 {alias1, "", true, 0, []db.TestGroupState{ 306 {MemberHost: "", MemberPort: "", MemberState: "OFFLINE", MemberRole: ""}, 307 }, topodatapb.TabletType_REPLICA}, 308 {alias2, "", true, 0, []db.TestGroupState{ 309 {MemberHost: "", MemberPort: "", MemberState: "OFFLINE", MemberRole: ""}, 310 }, topodatapb.TabletType_REPLICA}, 311 }}, 312 {name: "fail to bootstrap with incorrect number of nodes", expected: DiagnoseTypeError, errMessage: "fail to diagnose ShardHasNoGroup with 3 nodes", inputs: []data{ 313 {alias0, "", true, 0, []db.TestGroupState{ 314 {MemberHost: "", MemberPort: "", MemberState: "OFFLINE", MemberRole: ""}, 315 }, topodatapb.TabletType_REPLICA}, 316 {alias1, "", true, 0, []db.TestGroupState{ 317 {MemberHost: "", MemberPort: "", MemberState: "OFFLINE", MemberRole: ""}, 318 }, topodatapb.TabletType_REPLICA}, 319 {alias2, "", true, 0, []db.TestGroupState{ 320 {MemberHost: "", MemberPort: "", MemberState: "OFFLINE", MemberRole: ""}, 321 }, topodatapb.TabletType_REPLICA}, 322 }, config: &config.VTGRConfig{BootstrapGroupSize: 2, MinNumReplica: 2, BackoffErrorWaitTimeSeconds: 1, BootstrapWaitTimeSeconds: 1}}, 323 {name: "unreachable node", expected: DiagnoseTypeBackoffError, errMessage: "", inputs: []data{ 324 {alias0, "group", false, 0, []db.TestGroupState{ 325 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"}, 326 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "UNREACHABLE", MemberRole: "SECONDARY"}, 327 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "ONLINE", MemberRole: "SECONDARY"}, 328 }, topodatapb.TabletType_PRIMARY}, 329 {alias1, "group", true, 0, []db.TestGroupState{ 330 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "UNREACHABLE", MemberRole: "SECONDARY"}, 331 }, topodatapb.TabletType_REPLICA}, 332 {alias2, "group", true, 0, []db.TestGroupState{ 333 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"}, 334 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "UNREACHABLE", MemberRole: "SECONDARY"}, 335 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "ONLINE", MemberRole: "SECONDARY"}, 336 }, topodatapb.TabletType_REPLICA}, 337 }}, 338 {name: "mysql and tablet has different primary", expected: DiagnoseTypeWrongPrimaryTablet, errMessage: "", inputs: []data{ // vtgr should failover vttablet 339 {alias0, "group", false, 0, []db.TestGroupState{ 340 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"}, 341 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "ONLINE", MemberRole: "SECONDARY"}, 342 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "ONLINE", MemberRole: "SECONDARY"}, 343 }, topodatapb.TabletType_REPLICA}, 344 {alias1, "group", true, 0, []db.TestGroupState{ 345 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"}, 346 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "ONLINE", MemberRole: "SECONDARY"}, 347 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "ONLINE", MemberRole: "SECONDARY"}, 348 }, topodatapb.TabletType_PRIMARY}, 349 {alias2, "group", true, 0, []db.TestGroupState{ 350 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"}, 351 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "ONLINE", MemberRole: "SECONDARY"}, 352 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "ONLINE", MemberRole: "SECONDARY"}, 353 }, topodatapb.TabletType_REPLICA}, 354 }}, 355 {name: "mysql primary out of topology", expected: DiagnoseTypeUnreachablePrimary, errMessage: "", inputs: []data{ // vtgr should failover mysql 356 {alias0, "group", false, 0, []db.TestGroupState{ 357 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"}, 358 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "ONLINE", MemberRole: "SECONDARY"}, 359 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "ONLINE", MemberRole: "SECONDARY"}, 360 }, topodatapb.TabletType_REPLICA}, 361 {alias1, "group", true, 0, []db.TestGroupState{ 362 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"}, 363 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "ONLINE", MemberRole: "SECONDARY"}, 364 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "ONLINE", MemberRole: "SECONDARY"}, 365 }, topodatapb.TabletType_PRIMARY}, 366 {alias2, "group", true, 0, []db.TestGroupState{ 367 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"}, 368 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "ONLINE", MemberRole: "SECONDARY"}, 369 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "ONLINE", MemberRole: "SECONDARY"}, 370 }, topodatapb.TabletType_REPLICA}, 371 }, removeTablets: []string{alias0}}, 372 {name: "one error node", expected: DiagnoseTypeUnconnectedReplica, errMessage: "", inputs: []data{ 373 {alias0, "group", false, 0, []db.TestGroupState{ 374 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"}, 375 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "ERROR", MemberRole: "SECONDARY"}, 376 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "ONLINE", MemberRole: "SECONDARY"}, 377 }, topodatapb.TabletType_PRIMARY}, 378 {alias1, "group", true, 0, []db.TestGroupState{ 379 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "ERROR", MemberRole: "SECONDARY"}, 380 }, topodatapb.TabletType_REPLICA}, 381 {alias2, "group", true, 0, []db.TestGroupState{ 382 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"}, 383 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "ERROR", MemberRole: "SECONDARY"}, 384 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "ONLINE", MemberRole: "SECONDARY"}, 385 }, topodatapb.TabletType_REPLICA}, 386 }}, 387 {name: "inactive group with divergent state", expected: DiagnoseTypeShardHasInactiveGroup, errMessage: "", inputs: []data{ 388 {alias0, "group", true, 11, []db.TestGroupState{ 389 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "OFFLINE", MemberRole: "SECONDARY"}, 390 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "ERROR", MemberRole: "SECONDARY"}, 391 }, topodatapb.TabletType_PRIMARY}, 392 {alias1, "group", true, 11, []db.TestGroupState{ 393 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "OFFLINE", MemberRole: ""}, 394 }, topodatapb.TabletType_REPLICA}, 395 {alias2, "group", true, 11, []db.TestGroupState{ 396 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "OFFLINE", MemberRole: ""}, 397 }, topodatapb.TabletType_REPLICA}, 398 }}, 399 {name: "two error node", expected: DiagnoseTypeInsufficientGroupSize, errMessage: "", inputs: []data{ 400 {alias0, "group", false, 0, []db.TestGroupState{ 401 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"}, 402 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "ERROR", MemberRole: "SECONDARY"}, 403 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "ERROR", MemberRole: "SECONDARY"}, 404 }, topodatapb.TabletType_PRIMARY}, 405 {alias1, "group", true, 0, []db.TestGroupState{ 406 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "ERROR", MemberRole: "SECONDARY"}, 407 }, topodatapb.TabletType_REPLICA}, 408 {alias2, "group", true, 0, []db.TestGroupState{ 409 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "ERROR", MemberRole: "SECONDARY"}, 410 }, topodatapb.TabletType_REPLICA}, 411 }}, 412 {name: "insufficient group member", expected: DiagnoseTypeInsufficientGroupSize, errMessage: "", inputs: []data{ 413 {alias0, "group", false, 0, []db.TestGroupState{ 414 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"}, 415 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "RECOVERING", MemberRole: "SECONDARY"}, 416 }, topodatapb.TabletType_PRIMARY}, 417 {alias1, "group", true, 0, []db.TestGroupState{}, topodatapb.TabletType_REPLICA}, 418 {alias2, "group", true, 0, []db.TestGroupState{ 419 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"}, 420 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "RECOVERING", MemberRole: "SECONDARY"}, 421 }, topodatapb.TabletType_REPLICA}, 422 }}, 423 {name: "unconnected node", expected: DiagnoseTypeBackoffError, errMessage: "", inputs: []data{ 424 {alias0, "group", true, 0, []db.TestGroupState{ 425 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"}, 426 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "UNREACHABLE", MemberRole: "SECONDARY"}, 427 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "RECOVERING", MemberRole: "SECONDARY"}, 428 }, topodatapb.TabletType_PRIMARY}, 429 {alias1, "group", true, 0, []db.TestGroupState{ 430 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"}, 431 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "UNREACHABLE", MemberRole: "SECONDARY"}, 432 }, topodatapb.TabletType_REPLICA}, 433 {alias2, "group", true, 0, []db.TestGroupState{ 434 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"}, 435 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "UNREACHABLE", MemberRole: "SECONDARY"}, 436 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "RECOVERING", MemberRole: "SECONDARY"}, 437 }, topodatapb.TabletType_REPLICA}, 438 }}, 439 {name: "unreachable primary", expected: DiagnoseTypeBackoffError, errMessage: "", inputs: []data{ 440 {alias0, "group", false, 0, []db.TestGroupState{ 441 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "UNREACHABLE", MemberRole: "PRIMARY"}, 442 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "ONLINE", MemberRole: "SECONDARY"}, 443 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "RECOVERING", MemberRole: "SECONDARY"}, 444 }, topodatapb.TabletType_PRIMARY}, 445 {alias1, "group", true, 0, []db.TestGroupState{ 446 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "UNREACHABLE", MemberRole: "PRIMARY"}, 447 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "ONLINE", MemberRole: "SECONDARY"}, 448 }, topodatapb.TabletType_REPLICA}, 449 {alias2, "group", true, 0, []db.TestGroupState{ 450 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "UNREACHABLE", MemberRole: "PRIMARY"}, 451 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "ONLINE", MemberRole: "SECONDARY"}, 452 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "RECOVERING", MemberRole: "SECONDARY"}, 453 }, topodatapb.TabletType_REPLICA}, 454 }}, 455 {name: "more than one group name", expected: DiagnoseTypeError, errMessage: "fail to refreshSQLGroup: group has split brain", inputs: []data{ // vtgr should raise error 456 {alias0, "group", false, 0, []db.TestGroupState{ 457 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"}, 458 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "ONLINE", MemberRole: "SECONDARY"}, 459 }, topodatapb.TabletType_PRIMARY}, 460 {alias1, "group_xxx", false, 0, []db.TestGroupState{ 461 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "ONLINE", MemberRole: "PRIMARY"}, 462 }, topodatapb.TabletType_REPLICA}, 463 {alias2, "group", true, 0, []db.TestGroupState{ 464 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"}, 465 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "ONLINE", MemberRole: "SECONDARY"}, 466 }, topodatapb.TabletType_REPLICA}, 467 }}, 468 {name: "different primary", expected: DiagnoseTypeError, errMessage: "fail to refreshSQLGroup: group has split brain", inputs: []data{ // vtgr should raise error 469 {alias0, "group", false, 0, []db.TestGroupState{ 470 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"}, 471 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "ONLINE", MemberRole: "SECONDARY"}, 472 }, topodatapb.TabletType_PRIMARY}, 473 {alias1, "group", false, 0, []db.TestGroupState{ 474 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "ONLINE", MemberRole: "PRIMARY"}, 475 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "ONLINE", MemberRole: "SECONDARY"}, 476 }, topodatapb.TabletType_REPLICA}, 477 {alias2, "group", true, 0, []db.TestGroupState{ 478 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"}, 479 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "ONLINE", MemberRole: "SECONDARY"}, 480 }, topodatapb.TabletType_REPLICA}, 481 }}, 482 {name: "insufficient members in group", expected: DiagnoseTypeInsufficientGroupSize, errMessage: "", inputs: []data{ 483 {alias0, "group", false, 0, []db.TestGroupState{ 484 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"}, 485 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "RECOVERING", MemberRole: "SECONDARY"}, 486 }, topodatapb.TabletType_PRIMARY}, 487 {alias1, "group", true, 0, []db.TestGroupState{ 488 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"}, 489 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "RECOVERING", MemberRole: "SECONDARY"}, 490 }, topodatapb.TabletType_REPLICA}, 491 {alias2, "group", true, 0, []db.TestGroupState{ 492 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "ERROR", MemberRole: "SECONDARY"}, 493 }, topodatapb.TabletType_REPLICA}, 494 }}, 495 // the shard has insufficient member, but the primary is already read_only 496 // we should try to connect the replica node 497 {name: "insufficient members in read only shard", expected: DiagnoseTypeUnconnectedReplica, errMessage: "", inputs: []data{ 498 {alias0, "group", true, 0, []db.TestGroupState{ 499 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"}, 500 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "RECOVERING", MemberRole: "SECONDARY"}, 501 }, topodatapb.TabletType_PRIMARY}, 502 {alias1, "group", true, 0, []db.TestGroupState{ 503 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"}, 504 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "RECOVERING", MemberRole: "SECONDARY"}, 505 }, topodatapb.TabletType_REPLICA}, 506 {alias2, "group", true, 0, []db.TestGroupState{ 507 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "ERROR", MemberRole: "SECONDARY"}, 508 }, topodatapb.TabletType_REPLICA}, 509 }}, 510 {name: "insufficient members in group with disable read only protection", expected: DiagnoseTypeUnconnectedReplica, errMessage: "", config: disableProtectionCfg, inputs: []data{ 511 {alias0, "group", false, 0, []db.TestGroupState{ 512 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"}, 513 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "RECOVERING", MemberRole: "SECONDARY"}, 514 }, topodatapb.TabletType_PRIMARY}, 515 {alias1, "group", true, 0, []db.TestGroupState{ 516 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"}, 517 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "RECOVERING", MemberRole: "SECONDARY"}, 518 }, topodatapb.TabletType_REPLICA}, 519 {alias2, "group", true, 0, []db.TestGroupState{ 520 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "ERROR", MemberRole: "SECONDARY"}, 521 }, topodatapb.TabletType_REPLICA}, 522 }}, 523 {name: "read only with disable read only protection", expected: DiagnoseTypeReadOnlyShard, errMessage: "", config: disableProtectionCfg, inputs: []data{ 524 {alias0, "group", true, 0, []db.TestGroupState{ 525 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"}, 526 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "RECOVERING", MemberRole: "SECONDARY"}, 527 }, topodatapb.TabletType_PRIMARY}, 528 {alias1, "group", true, 0, []db.TestGroupState{ 529 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"}, 530 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "RECOVERING", MemberRole: "SECONDARY"}, 531 }, topodatapb.TabletType_REPLICA}, 532 {alias2, "group", true, 0, []db.TestGroupState{ 533 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "ERROR", MemberRole: "SECONDARY"}, 534 }, topodatapb.TabletType_REPLICA}, 535 }}, 536 {name: "read only healthy shard", expected: DiagnoseTypeReadOnlyShard, errMessage: "", inputs: []data{ 537 {alias0, "group", true, 0, []db.TestGroupState{ 538 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"}, 539 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "ONLINE", MemberRole: "SECONDARY"}, 540 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "ONLINE", MemberRole: "SECONDARY"}, 541 }, topodatapb.TabletType_PRIMARY}, 542 {alias1, "group", true, 0, []db.TestGroupState{ 543 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"}, 544 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "ONLINE", MemberRole: "SECONDARY"}, 545 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "ONLINE", MemberRole: "SECONDARY"}, 546 }, topodatapb.TabletType_REPLICA}, 547 {alias2, "group", true, 0, []db.TestGroupState{ 548 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"}, 549 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "ONLINE", MemberRole: "SECONDARY"}, 550 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "ONLINE", MemberRole: "SECONDARY"}, 551 }, topodatapb.TabletType_REPLICA}, 552 }}, 553 {name: "inconsistent member state", expected: DiagnoseTypeBackoffError, errMessage: "", inputs: []data{ 554 {alias0, "group", true, 11, []db.TestGroupState{ 555 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "OFFLINE", MemberRole: ""}, 556 }, topodatapb.TabletType_REPLICA}, 557 {alias1, "group", true, 12, []db.TestGroupState{ 558 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "SECONDARY"}, 559 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "ONLINE", MemberRole: "PRIMARY"}, 560 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "ONLINE", MemberRole: "SECONDARY"}, 561 }, topodatapb.TabletType_PRIMARY}, 562 {alias2, "group", true, math.MaxInt64, []db.TestGroupState{ 563 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "OFFLINE", MemberRole: ""}, 564 }, topodatapb.TabletType_REPLICA}, 565 }}, 566 {name: "network partition", expected: DiagnoseTypeBackoffError, errMessage: "", inputs: []data{ 567 {alias0, "group", true, 0, []db.TestGroupState{ 568 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"}, 569 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "UNREACHABLE", MemberRole: "SECONDARY"}, 570 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "UNREACHABLE", MemberRole: "SECONDARY"}, 571 }, topodatapb.TabletType_PRIMARY}, 572 {alias1, "group", true, 0, []db.TestGroupState{ 573 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "OFFLINE", MemberRole: "SECONDARY"}, 574 }, topodatapb.TabletType_REPLICA}, 575 {alias2, "group", true, 0, []db.TestGroupState{ 576 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "OFFLINE", MemberRole: "SECONDARY"}, 577 }, topodatapb.TabletType_REPLICA}, 578 }}, 579 {name: "start bootstrap in progress", expected: DiagnoseTypeBootstrapBackoff, errMessage: "", inputs: []data{ 580 {alias0, "group", true, 0, []db.TestGroupState{ 581 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "RECOVERING", MemberRole: "SECONDARY"}, 582 }, topodatapb.TabletType_REPLICA}, 583 {alias1, "", true, 0, []db.TestGroupState{}, topodatapb.TabletType_REPLICA}, 584 {alias2, "", true, 0, []db.TestGroupState{ 585 {MemberHost: "", MemberPort: "", MemberState: "OFFLINE", MemberRole: ""}, 586 }, topodatapb.TabletType_REPLICA}, 587 }}, 588 } 589 for _, tt := range sqltests { 590 t.Run(tt.name, func(t *testing.T) { 591 ctrl := gomock.NewController(t) 592 defer ctrl.Finish() 593 ts := NewMockGRTopo(ctrl) 594 tmc := NewMockGRTmcClient(ctrl) 595 dbAgent := db.NewMockAgent(ctrl) 596 tablets := make(map[string]*topo.TabletInfo) 597 expected := tt.expected 598 inputMap := make(map[string]testGroupInput) 599 if tt.config == nil { 600 tt.config = cfg 601 } 602 conf := tt.config 603 hasPrimary := false 604 for i, input := range tt.inputs { 605 id := uint32(i) 606 //id := uint32(testPort0 + i) 607 tablet := buildTabletInfo(id, testHost, testPort0+i, input.ttype, time.Now()) 608 tablets[tablet.AliasString()] = tablet 609 inputMap[input.alias] = testGroupInput{ 610 input.groupName, 611 input.readOnly, 612 input.checkResult, 613 input.groupInput, 614 nil, 615 } 616 if tablet.Type == topodatapb.TabletType_PRIMARY { 617 si := &topo.ShardInfo{ 618 Shard: &topodatapb.Shard{ 619 PrimaryAlias: tablet.Alias, 620 }, 621 } 622 ts. 623 EXPECT(). 624 GetShard(gomock.Any(), gomock.Eq("ks"), gomock.Eq("0")). 625 Return(si, nil) 626 hasPrimary = true 627 } 628 dbAgent. 629 EXPECT(). 630 FetchGroupView(gomock.Any(), gomock.Any()). 631 DoAndReturn(func(alias string, target *inst.InstanceKey) (*db.GroupView, error) { 632 if target.Hostname == "" || target.Port == 0 { 633 return nil, errors.New("invalid mysql instance key") 634 } 635 s := inputMap[alias] 636 view := db.BuildGroupView(alias, s.groupName, target.Hostname, target.Port, s.readOnly, s.checkResult, s.groupState) 637 return view, nil 638 }). 639 AnyTimes() 640 } 641 if !hasPrimary { 642 ts. 643 EXPECT(). 644 GetShard(gomock.Any(), gomock.Eq("ks"), gomock.Eq("0")). 645 Return(&topo.ShardInfo{Shard: &topodatapb.Shard{}}, nil) 646 } 647 for _, tid := range tt.removeTablets { 648 delete(tablets, tid) 649 } 650 ts. 651 EXPECT(). 652 GetTabletMapForShardByCell(gomock.Any(), gomock.Eq("ks"), gomock.Eq("0"), gomock.Any()). 653 Return(tablets, nil) 654 tmc.EXPECT().Ping(gomock.Any(), gomock.Any()).Return(nil).AnyTimes() 655 656 ctx := context.Background() 657 shard := NewGRShard("ks", "0", nil, tmc, ts, dbAgent, conf, testPort0, true) 658 shard.refreshTabletsInShardLocked(ctx) 659 diagnose, err := shard.Diagnose(ctx) 660 assert.Equal(t, expected, diagnose) 661 if tt.errMessage == "" { 662 assert.NoError(t, err) 663 } else { 664 assert.Error(t, err) 665 assert.True(t, strings.Contains(err.Error(), tt.errMessage), err.Error()) 666 } 667 }) 668 } 669 } 670 671 func TestDiagnoseWithInactive(t *testing.T) { 672 cfg := &config.VTGRConfig{BootstrapGroupSize: diagnoseGroupSize, MinNumReplica: 2, BackoffErrorWaitTimeSeconds: 1, BootstrapWaitTimeSeconds: 1} 673 type data struct { 674 alias string 675 groupName string 676 readOnly bool 677 pingable bool 678 groupInput []db.TestGroupState 679 ttype topodatapb.TabletType 680 } 681 var sqltests = []struct { 682 name string 683 expected DiagnoseType 684 errMessage string 685 config *config.VTGRConfig 686 inputs []data 687 rebootstrapGroupSize int 688 removeTablets []string // to simulate missing tablet in topology 689 }{ 690 // although mysql and vitess has different primary, but since this is an active shard, VTGR won't fix that 691 {name: "mysql and tablet has different primary", expected: DiagnoseTypeHealthy, errMessage: "", inputs: []data{ 692 {alias0, "group", true, true, []db.TestGroupState{ 693 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"}, 694 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "ONLINE", MemberRole: "SECONDARY"}, 695 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "ONLINE", MemberRole: "SECONDARY"}, 696 }, topodatapb.TabletType_REPLICA}, 697 {alias1, "group", true, true, []db.TestGroupState{ 698 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"}, 699 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "ONLINE", MemberRole: "SECONDARY"}, 700 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "ONLINE", MemberRole: "SECONDARY"}, 701 }, topodatapb.TabletType_PRIMARY}, 702 {alias2, "group", true, true, []db.TestGroupState{ 703 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"}, 704 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "ONLINE", MemberRole: "SECONDARY"}, 705 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "ONLINE", MemberRole: "SECONDARY"}, 706 }, topodatapb.TabletType_REPLICA}, 707 }}, 708 {name: "different primary with unconnected node", expected: DiagnoseTypeUnconnectedReplica, errMessage: "", inputs: []data{ 709 {alias0, "group", true, true, []db.TestGroupState{ 710 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"}, 711 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "ONLINE", MemberRole: "SECONDARY"}, 712 }, topodatapb.TabletType_REPLICA}, 713 {alias1, "group", true, true, []db.TestGroupState{ 714 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"}, 715 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "ONLINE", MemberRole: "SECONDARY"}, 716 }, topodatapb.TabletType_PRIMARY}, 717 {alias2, "group", true, true, []db.TestGroupState{ 718 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "OFFLINE", MemberRole: ""}, 719 }, topodatapb.TabletType_REPLICA}, 720 }}, 721 {name: "primary tablet is not pingable", expected: DiagnoseTypeHealthy, errMessage: "", inputs: []data{ 722 {alias0, "group", true, false, []db.TestGroupState{ 723 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"}, 724 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "ONLINE", MemberRole: "SECONDARY"}, 725 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "ONLINE", MemberRole: "SECONDARY"}, 726 }, topodatapb.TabletType_PRIMARY}, 727 {alias1, "group", true, true, []db.TestGroupState{ 728 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"}, 729 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "ONLINE", MemberRole: "SECONDARY"}, 730 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "ONLINE", MemberRole: "SECONDARY"}, 731 }, topodatapb.TabletType_REPLICA}, 732 {alias2, "group", true, true, []db.TestGroupState{ 733 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"}, 734 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "ONLINE", MemberRole: "SECONDARY"}, 735 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "ONLINE", MemberRole: "SECONDARY"}, 736 }, topodatapb.TabletType_REPLICA}, 737 }}, 738 // This is a read only shard, but since it's an inactive shard we will diagnose it as healthy 739 {name: "read only healthy shard", expected: DiagnoseTypeHealthy, errMessage: "", inputs: []data{ 740 {alias0, "group", true, true, []db.TestGroupState{ 741 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"}, 742 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "ONLINE", MemberRole: "SECONDARY"}, 743 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "ONLINE", MemberRole: "SECONDARY"}, 744 }, topodatapb.TabletType_PRIMARY}, 745 {alias1, "group", true, true, []db.TestGroupState{ 746 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"}, 747 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "ONLINE", MemberRole: "SECONDARY"}, 748 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "ONLINE", MemberRole: "SECONDARY"}, 749 }, topodatapb.TabletType_REPLICA}, 750 {alias2, "group", true, true, []db.TestGroupState{ 751 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"}, 752 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "ONLINE", MemberRole: "SECONDARY"}, 753 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "ONLINE", MemberRole: "SECONDARY"}, 754 }, topodatapb.TabletType_REPLICA}, 755 }}, 756 {name: "writable shard", expected: DiagnoseTypeInsufficientGroupSize, errMessage: "", inputs: []data{ 757 {alias0, "group", false, true, []db.TestGroupState{ 758 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"}, 759 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "ONLINE", MemberRole: "SECONDARY"}, 760 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "ONLINE", MemberRole: "SECONDARY"}, 761 }, topodatapb.TabletType_PRIMARY}, 762 {alias1, "group", true, true, []db.TestGroupState{ 763 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"}, 764 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "ONLINE", MemberRole: "SECONDARY"}, 765 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "ONLINE", MemberRole: "SECONDARY"}, 766 }, topodatapb.TabletType_REPLICA}, 767 {alias2, "group", true, true, []db.TestGroupState{ 768 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "ONLINE", MemberRole: "PRIMARY"}, 769 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "ONLINE", MemberRole: "SECONDARY"}, 770 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "ONLINE", MemberRole: "SECONDARY"}, 771 }, topodatapb.TabletType_REPLICA}, 772 }}, 773 {name: "error when there are only two nodes", expected: DiagnoseTypeError, errMessage: "fail to diagnose ShardHasInactiveGroup with 3 nodes expecting 2", inputs: []data{ 774 {alias0, "group", true, true, []db.TestGroupState{ 775 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort0), MemberState: "OFFLINE", MemberRole: ""}, 776 }, topodatapb.TabletType_REPLICA}, 777 {alias1, "group", true, true, []db.TestGroupState{ 778 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort1), MemberState: "OFFLINE", MemberRole: ""}, 779 }, topodatapb.TabletType_REPLICA}, 780 {alias2, "group", true, true, []db.TestGroupState{ 781 {MemberHost: testHost, MemberPort: strconv.Itoa(testPort2), MemberState: "OFFLINE", MemberRole: ""}, 782 }, topodatapb.TabletType_REPLICA}, 783 }, rebootstrapGroupSize: 2}, 784 } 785 for _, tt := range sqltests { 786 t.Run(tt.name, func(t *testing.T) { 787 ctrl := gomock.NewController(t) 788 defer ctrl.Finish() 789 ctx := context.Background() 790 ts := memorytopo.NewServer("test_cell") 791 defer ts.Close() 792 ts.CreateKeyspace(ctx, "ks", &topodatapb.Keyspace{}) 793 ts.CreateShard(ctx, "ks", "0") 794 tmc := NewMockGRTmcClient(ctrl) 795 dbAgent := db.NewMockAgent(ctrl) 796 expected := tt.expected 797 inputMap := make(map[string]testGroupInput) 798 pingable := make(map[string]bool) 799 if tt.config == nil { 800 tt.config = cfg 801 } 802 conf := tt.config 803 for i, input := range tt.inputs { 804 tablet := buildTabletInfo(uint32(i), testHost, testPort0+i, input.ttype, time.Now()) 805 testutil.AddTablet(ctx, t, ts, tablet.Tablet, nil) 806 inputMap[input.alias] = testGroupInput{ 807 input.groupName, 808 input.readOnly, 809 0, 810 input.groupInput, 811 nil, 812 } 813 pingable[input.alias] = input.pingable 814 if tablet.Type == topodatapb.TabletType_PRIMARY { 815 ts.UpdateShardFields(ctx, "ks", "0", func(si *topo.ShardInfo) error { 816 si.PrimaryAlias = tablet.Alias 817 return nil 818 }) 819 } 820 dbAgent. 821 EXPECT(). 822 FetchGroupView(gomock.Any(), gomock.Any()). 823 DoAndReturn(func(alias string, target *inst.InstanceKey) (*db.GroupView, error) { 824 if target.Hostname == "" || target.Port == 0 { 825 return nil, errors.New("invalid mysql instance key") 826 } 827 s := inputMap[alias] 828 view := db.BuildGroupView(alias, s.groupName, target.Hostname, target.Port, s.readOnly, s.checkResult, s.groupState) 829 return view, nil 830 }). 831 AnyTimes() 832 tmc. 833 EXPECT(). 834 Ping(gomock.Any(), &topodatapb.Tablet{ 835 Alias: tablet.Alias, 836 Hostname: tablet.Hostname, 837 Keyspace: tablet.Keyspace, 838 Shard: tablet.Shard, 839 Type: tablet.Type, 840 Tags: tablet.Tags, 841 MysqlHostname: tablet.MysqlHostname, 842 MysqlPort: tablet.MysqlPort, 843 PrimaryTermStartTime: tablet.PrimaryTermStartTime, 844 }). 845 DoAndReturn(func(_ context.Context, t *topodatapb.Tablet) error { 846 if !pingable[tablet.Alias.String()] { 847 return errors.New("unreachable") 848 } 849 return nil 850 }). 851 AnyTimes() 852 } 853 shard := NewGRShard("ks", "0", nil, tmc, ts, dbAgent, conf, testPort0, false) 854 if tt.rebootstrapGroupSize != 0 { 855 shard.OverrideRebootstrapGroupSize(tt.rebootstrapGroupSize) 856 } 857 shard.refreshTabletsInShardLocked(ctx) 858 diagnose, err := shard.Diagnose(ctx) 859 assert.Equal(t, expected, diagnose) 860 if tt.errMessage == "" { 861 assert.NoError(t, err) 862 } else { 863 assert.Error(t, err) 864 assert.True(t, strings.Contains(err.Error(), tt.errMessage), err.Error()) 865 } 866 }) 867 } 868 } 869 870 func TestGroupStatusRecorder(t *testing.T) { 871 r := &groupGTIDRecorder{} 872 873 err := r.recordGroupStatus("group1", true) 874 assert.NoError(t, err) 875 assert.Equal(t, r.name, "group1") 876 assert.Equal(t, r.hasActive, true) 877 878 err = r.recordGroupStatus("group2", false) 879 assert.Error(t, err, "group has more than one group name") 880 assert.Equal(t, r.name, "group1") 881 882 err = r.recordGroupStatus("group1", false) 883 assert.NoError(t, err) 884 assert.Equal(t, r.name, "group1") 885 assert.Equal(t, r.hasActive, true) 886 887 pos1, err := mysql.ParsePosition(mysql.Mysql56FlavorID, "264a8230-67d2-11eb-acdd-0a8d91f24125:1-22:1000019-1000021") 888 assert.NoError(t, err) 889 inst1 := &grInstance{alias: "alias1"} 890 r.recordGroupGTIDs(pos1.GTIDSet, inst1) 891 pos2, err := mysql.ParsePosition(mysql.Mysql56FlavorID, "264a8230-67d2-11eb-acdd-0a8d91f24125:1-1000021") 892 assert.NoError(t, err) 893 inst2 := &grInstance{alias: "alias2"} 894 r.recordGroupGTIDs(pos2.GTIDSet, inst2) 895 assert.Equal(t, len(r.gtidWithInstances), 2) 896 assert.Equal(t, r.gtidWithInstances[0].instance, inst1) 897 assert.Equal(t, pos1.GTIDSet.Equal(r.gtidWithInstances[0].gtids), true) 898 assert.Equal(t, r.gtidWithInstances[1].instance, inst2) 899 assert.Equal(t, pos2.GTIDSet.Equal(r.gtidWithInstances[1].gtids), true) 900 }