vitess.io/vitess@v0.16.2/go/vt/discovery/topology_watcher_test.go (about) 1 /* 2 Copyright 2019 The Vitess Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package discovery 18 19 import ( 20 "context" 21 "math/rand" 22 "testing" 23 "time" 24 25 "github.com/stretchr/testify/assert" 26 "github.com/stretchr/testify/require" 27 "google.golang.org/protobuf/proto" 28 29 "vitess.io/vitess/go/vt/logutil" 30 topodatapb "vitess.io/vitess/go/vt/proto/topodata" 31 "vitess.io/vitess/go/vt/topo" 32 "vitess.io/vitess/go/vt/topo/memorytopo" 33 ) 34 35 func checkOpCounts(t *testing.T, prevCounts, deltas map[string]int64) map[string]int64 { 36 t.Helper() 37 newCounts := topologyWatcherOperations.Counts() 38 for key, prevVal := range prevCounts { 39 delta, ok := deltas[key] 40 if !ok { 41 delta = 0 42 } 43 newVal, ok := newCounts[key] 44 if !ok { 45 newVal = 0 46 } 47 48 assert.Equal(t, newVal, prevVal+delta, "expected %v to increase by %v, got %v -> %v", key, delta, prevVal, newVal) 49 } 50 return newCounts 51 } 52 53 func checkChecksum(t *testing.T, tw *TopologyWatcher, want uint32) { 54 t.Helper() 55 assert.Equal(t, want, tw.TopoChecksum()) 56 } 57 58 func TestStartAndCloseTopoWatcher(t *testing.T) { 59 ts := memorytopo.NewServer("aa") 60 fhc := NewFakeHealthCheck(nil) 61 topologyWatcherOperations.ZeroAll() 62 tw := NewCellTabletsWatcher(context.Background(), ts, fhc, nil, "aa", 100*time.Microsecond, true, 5) 63 64 done := make(chan bool, 3) 65 result := make(chan bool, 1) 66 go func() { 67 // We wait for the done channel three times since we execute three 68 // topo-watcher actions (Start, Stop and Wait), once we have read 69 // from the done channel three times we know we have completed all 70 // the actions, the test is then successful. 71 // Each action has a one-second timeout after which the test will be 72 // marked as failed. 73 for i := 0; i < 3; i++ { 74 select { 75 case <-time.After(1 * time.Second): 76 close(result) 77 return 78 case <-done: 79 break 80 } 81 } 82 result <- true 83 }() 84 85 tw.Start() 86 done <- true 87 88 // This sleep gives enough time to the topo-watcher to do 10 iterations 89 // The topo-watcher's refresh interval is set to 100 microseconds. 90 time.Sleep(1 * time.Millisecond) 91 92 tw.Stop() 93 done <- true 94 95 tw.wg.Wait() 96 done <- true 97 98 _, ok := <-result 99 if !ok { 100 t.Fatal("timed out") 101 } 102 } 103 104 func TestCellTabletsWatcher(t *testing.T) { 105 checkWatcher(t, true) 106 } 107 108 func TestCellTabletsWatcherNoRefreshKnown(t *testing.T) { 109 checkWatcher(t, false) 110 } 111 112 func checkWatcher(t *testing.T, refreshKnownTablets bool) { 113 ts := memorytopo.NewServer("aa") 114 fhc := NewFakeHealthCheck(nil) 115 logger := logutil.NewMemoryLogger() 116 topologyWatcherOperations.ZeroAll() 117 counts := topologyWatcherOperations.Counts() 118 tw := NewCellTabletsWatcher(context.Background(), ts, fhc, nil, "aa", 10*time.Minute, refreshKnownTablets, 5) 119 120 counts = checkOpCounts(t, counts, map[string]int64{}) 121 checkChecksum(t, tw, 0) 122 123 // Add a tablet to the topology. 124 tablet := &topodatapb.Tablet{ 125 Alias: &topodatapb.TabletAlias{ 126 Cell: "aa", 127 Uid: 0, 128 }, 129 Hostname: "host1", 130 PortMap: map[string]int32{ 131 "vt": 123, 132 }, 133 Keyspace: "keyspace", 134 Shard: "shard", 135 } 136 if err := ts.CreateTablet(context.Background(), tablet); err != nil { 137 t.Fatalf("CreateTablet failed: %v", err) 138 } 139 tw.loadTablets() 140 counts = checkOpCounts(t, counts, map[string]int64{"ListTablets": 1, "GetTablet": 1, "AddTablet": 1}) 141 checkChecksum(t, tw, 3238442862) 142 143 // Check the tablet is returned by GetAllTablets(). 144 allTablets := fhc.GetAllTablets() 145 key := TabletToMapKey(tablet) 146 if _, ok := allTablets[key]; !ok || len(allTablets) != 1 || !proto.Equal(allTablets[key], tablet) { 147 t.Errorf("fhc.GetAllTablets() = %+v; want %+v", allTablets, tablet) 148 } 149 150 // Add a second tablet to the topology. 151 tablet2 := &topodatapb.Tablet{ 152 Alias: &topodatapb.TabletAlias{ 153 Cell: "aa", 154 Uid: 2, 155 }, 156 Hostname: "host2", 157 PortMap: map[string]int32{ 158 "vt": 789, 159 }, 160 Keyspace: "keyspace", 161 Shard: "shard", 162 } 163 if err := ts.CreateTablet(context.Background(), tablet2); err != nil { 164 t.Fatalf("CreateTablet failed: %v", err) 165 } 166 tw.loadTablets() 167 168 // If refreshKnownTablets is disabled, only the new tablet is read 169 // from the topo 170 if refreshKnownTablets { 171 counts = checkOpCounts(t, counts, map[string]int64{"ListTablets": 1, "GetTablet": 2, "AddTablet": 1}) 172 } else { 173 counts = checkOpCounts(t, counts, map[string]int64{"ListTablets": 1, "GetTablet": 1, "AddTablet": 1}) 174 } 175 checkChecksum(t, tw, 2762153755) 176 177 // Check the new tablet is returned by GetAllTablets(). 178 allTablets = fhc.GetAllTablets() 179 key = TabletToMapKey(tablet2) 180 if _, ok := allTablets[key]; !ok || len(allTablets) != 2 || !proto.Equal(allTablets[key], tablet2) { 181 t.Errorf("fhc.GetAllTablets() = %+v; want %+v", allTablets, tablet2) 182 } 183 184 // Load the tablets again to show that when refreshKnownTablets is disabled, 185 // only the list is read from the topo and the checksum doesn't change 186 tw.loadTablets() 187 if refreshKnownTablets { 188 counts = checkOpCounts(t, counts, map[string]int64{"ListTablets": 1, "GetTablet": 2}) 189 } else { 190 counts = checkOpCounts(t, counts, map[string]int64{"ListTablets": 1}) 191 } 192 checkChecksum(t, tw, 2762153755) 193 194 // same tablet, different port, should update (previous 195 // one should go away, new one be added) 196 // 197 // if refreshKnownTablets is disabled, this case is *not* 198 // detected and the tablet remains in the topo using the 199 // old key 200 origTablet := proto.Clone(tablet).(*topodatapb.Tablet) 201 origKey := TabletToMapKey(tablet) 202 tablet.PortMap["vt"] = 456 203 if _, err := ts.UpdateTabletFields(context.Background(), tablet.Alias, func(t *topodatapb.Tablet) error { 204 t.PortMap["vt"] = 456 205 return nil 206 }); err != nil { 207 t.Fatalf("UpdateTabletFields failed: %v", err) 208 } 209 tw.loadTablets() 210 allTablets = fhc.GetAllTablets() 211 key = TabletToMapKey(tablet) 212 213 if refreshKnownTablets { 214 counts = checkOpCounts(t, counts, map[string]int64{"ListTablets": 1, "GetTablet": 2, "ReplaceTablet": 1}) 215 216 if _, ok := allTablets[key]; !ok || len(allTablets) != 2 || !proto.Equal(allTablets[key], tablet) { 217 t.Errorf("fhc.GetAllTablets() = %+v; want %+v", allTablets, tablet) 218 } 219 if _, ok := allTablets[origKey]; ok { 220 t.Errorf("fhc.GetAllTablets() = %+v; don't want %v", allTablets, origKey) 221 } 222 checkChecksum(t, tw, 2762153755) 223 } else { 224 counts = checkOpCounts(t, counts, map[string]int64{"ListTablets": 1}) 225 226 if _, ok := allTablets[origKey]; !ok || len(allTablets) != 2 || !proto.Equal(allTablets[origKey], origTablet) { 227 t.Errorf("fhc.GetAllTablets() = %+v; want %+v", allTablets, origTablet) 228 } 229 if _, ok := allTablets[key]; ok { 230 t.Errorf("fhc.GetAllTablets() = %+v; don't want %v", allTablets, key) 231 } 232 checkChecksum(t, tw, 2762153755) 233 } 234 235 // Both tablets restart on different hosts. 236 // tablet2 happens to land on the host:port that tablet 1 used to be on. 237 // This can only be tested when we refresh known tablets. 238 if refreshKnownTablets { 239 origTablet := proto.Clone(tablet).(*topodatapb.Tablet) 240 origTablet2 := proto.Clone(tablet2).(*topodatapb.Tablet) 241 242 if _, err := ts.UpdateTabletFields(context.Background(), tablet2.Alias, func(t *topodatapb.Tablet) error { 243 t.Hostname = tablet.Hostname 244 t.PortMap = tablet.PortMap 245 tablet2 = t 246 return nil 247 }); err != nil { 248 t.Fatalf("UpdateTabletFields failed: %v", err) 249 } 250 if _, err := ts.UpdateTabletFields(context.Background(), tablet.Alias, func(t *topodatapb.Tablet) error { 251 t.Hostname = "host3" 252 tablet = t 253 return nil 254 }); err != nil { 255 t.Fatalf("UpdateTabletFields failed: %v", err) 256 } 257 tw.loadTablets() 258 counts = checkOpCounts(t, counts, map[string]int64{"ListTablets": 1, "GetTablet": 2, "ReplaceTablet": 2}) 259 allTablets = fhc.GetAllTablets() 260 key2 := TabletToMapKey(tablet2) 261 if _, ok := allTablets[key2]; !ok { 262 t.Fatalf("tablet was lost because it's reusing an address recently used by another tablet: %v", key2) 263 } 264 265 // Change tablets back to avoid altering later tests. 266 if _, err := ts.UpdateTabletFields(context.Background(), tablet2.Alias, func(t *topodatapb.Tablet) error { 267 t.Hostname = origTablet2.Hostname 268 t.PortMap = origTablet2.PortMap 269 tablet2 = t 270 return nil 271 }); err != nil { 272 t.Fatalf("UpdateTabletFields failed: %v", err) 273 } 274 if _, err := ts.UpdateTabletFields(context.Background(), tablet.Alias, func(t *topodatapb.Tablet) error { 275 t.Hostname = origTablet.Hostname 276 tablet = t 277 return nil 278 }); err != nil { 279 t.Fatalf("UpdateTabletFields failed: %v", err) 280 } 281 tw.loadTablets() 282 counts = checkOpCounts(t, counts, map[string]int64{"ListTablets": 1, "GetTablet": 2, "ReplaceTablet": 2}) 283 } 284 285 // Remove the tablet and check that it is detected as being gone. 286 if err := ts.DeleteTablet(context.Background(), tablet.Alias); err != nil { 287 t.Fatalf("DeleteTablet failed: %v", err) 288 } 289 if _, err := topo.FixShardReplication(context.Background(), ts, logger, "aa", "keyspace", "shard"); err != nil { 290 t.Fatalf("FixShardReplication failed: %v", err) 291 } 292 tw.loadTablets() 293 if refreshKnownTablets { 294 counts = checkOpCounts(t, counts, map[string]int64{"ListTablets": 1, "GetTablet": 1, "RemoveTablet": 1}) 295 } else { 296 counts = checkOpCounts(t, counts, map[string]int64{"ListTablets": 1, "RemoveTablet": 1}) 297 } 298 checkChecksum(t, tw, 789108290) 299 300 allTablets = fhc.GetAllTablets() 301 key = TabletToMapKey(tablet) 302 if _, ok := allTablets[key]; ok || len(allTablets) != 1 { 303 t.Errorf("fhc.GetAllTablets() = %+v; don't want %v", allTablets, key) 304 } 305 key = TabletToMapKey(tablet2) 306 if _, ok := allTablets[key]; !ok || len(allTablets) != 1 || !proto.Equal(allTablets[key], tablet2) { 307 t.Errorf("fhc.GetAllTablets() = %+v; want %+v", allTablets, tablet2) 308 } 309 310 // Remove the other and check that it is detected as being gone. 311 if err := ts.DeleteTablet(context.Background(), tablet2.Alias); err != nil { 312 t.Fatalf("DeleteTablet failed: %v", err) 313 } 314 if _, err := topo.FixShardReplication(context.Background(), ts, logger, "aa", "keyspace", "shard"); err != nil { 315 t.Fatalf("FixShardReplication failed: %v", err) 316 } 317 tw.loadTablets() 318 checkOpCounts(t, counts, map[string]int64{"ListTablets": 1, "GetTablet": 0, "RemoveTablet": 1}) 319 checkChecksum(t, tw, 0) 320 321 allTablets = fhc.GetAllTablets() 322 key = TabletToMapKey(tablet) 323 if _, ok := allTablets[key]; ok || len(allTablets) != 0 { 324 t.Errorf("fhc.GetAllTablets() = %+v; don't want %v", allTablets, key) 325 } 326 key = TabletToMapKey(tablet2) 327 if _, ok := allTablets[key]; ok || len(allTablets) != 0 { 328 t.Errorf("fhc.GetAllTablets() = %+v; don't want %v", allTablets, key) 329 } 330 331 tw.Stop() 332 } 333 334 func TestFilterByShard(t *testing.T) { 335 testcases := []struct { 336 filters []string 337 keyspace string 338 shard string 339 included bool 340 }{ 341 // un-sharded keyspaces 342 { 343 filters: []string{"ks1|0"}, 344 keyspace: "ks1", 345 shard: "0", 346 included: true, 347 }, 348 { 349 filters: []string{"ks1|0"}, 350 keyspace: "ks2", 351 shard: "0", 352 included: false, 353 }, 354 // custom sharding, different shard 355 { 356 filters: []string{"ks1|0"}, 357 keyspace: "ks1", 358 shard: "1", 359 included: false, 360 }, 361 // keyrange based sharding 362 { 363 filters: []string{"ks1|-80"}, 364 keyspace: "ks1", 365 shard: "0", 366 included: false, 367 }, 368 { 369 filters: []string{"ks1|-80"}, 370 keyspace: "ks1", 371 shard: "-40", 372 included: true, 373 }, 374 { 375 filters: []string{"ks1|-80"}, 376 keyspace: "ks1", 377 shard: "-80", 378 included: true, 379 }, 380 { 381 filters: []string{"ks1|-80"}, 382 keyspace: "ks1", 383 shard: "80-", 384 included: false, 385 }, 386 { 387 filters: []string{"ks1|-80"}, 388 keyspace: "ks1", 389 shard: "c0-", 390 included: false, 391 }, 392 } 393 394 for _, tc := range testcases { 395 fbs, err := NewFilterByShard(tc.filters) 396 if err != nil { 397 t.Errorf("cannot create FilterByShard for filters %v: %v", tc.filters, err) 398 } 399 400 tablet := &topodatapb.Tablet{ 401 Keyspace: tc.keyspace, 402 Shard: tc.shard, 403 } 404 405 got := fbs.IsIncluded(tablet) 406 if got != tc.included { 407 t.Errorf("isIncluded(%v,%v) for filters %v returned %v but expected %v", tc.keyspace, tc.shard, tc.filters, got, tc.included) 408 } 409 } 410 } 411 412 var ( 413 testFilterByKeyspace = []struct { 414 keyspace string 415 expected bool 416 }{ 417 {"ks1", true}, 418 {"ks2", true}, 419 {"ks3", false}, 420 {"ks4", true}, 421 {"ks5", true}, 422 {"ks6", false}, 423 {"ks7", false}, 424 } 425 testKeyspacesToWatch = []string{"ks1", "ks2", "ks4", "ks5"} 426 testCell = "testCell" 427 testShard = "testShard" 428 testHostName = "testHostName" 429 ) 430 431 func TestFilterByKeyspace(t *testing.T) { 432 hc := NewFakeHealthCheck(nil) 433 f := NewFilterByKeyspace(testKeyspacesToWatch) 434 ts := memorytopo.NewServer(testCell) 435 tw := NewCellTabletsWatcher(context.Background(), ts, hc, f, testCell, 10*time.Minute, true, 5) 436 437 for _, test := range testFilterByKeyspace { 438 // Add a new tablet to the topology. 439 port := rand.Int31n(1000) 440 tablet := &topodatapb.Tablet{ 441 Alias: &topodatapb.TabletAlias{ 442 Cell: testCell, 443 Uid: rand.Uint32(), 444 }, 445 Hostname: testHostName, 446 PortMap: map[string]int32{ 447 "vt": port, 448 }, 449 Keyspace: test.keyspace, 450 Shard: testShard, 451 } 452 453 got := f.IsIncluded(tablet) 454 if got != test.expected { 455 t.Errorf("isIncluded(%v) for keyspace %v returned %v but expected %v", test.keyspace, test.keyspace, got, test.expected) 456 } 457 458 if err := ts.CreateTablet(context.Background(), tablet); err != nil { 459 t.Errorf("CreateTablet failed: %v", err) 460 } 461 462 tw.loadTablets() 463 key := TabletToMapKey(tablet) 464 allTablets := hc.GetAllTablets() 465 466 if _, ok := allTablets[key]; ok != test.expected && proto.Equal(allTablets[key], tablet) != test.expected { 467 t.Errorf("Error adding tablet - got %v; want %v", ok, test.expected) 468 } 469 470 // Replace the tablet we added above 471 tabletReplacement := &topodatapb.Tablet{ 472 Alias: &topodatapb.TabletAlias{ 473 Cell: testCell, 474 Uid: rand.Uint32(), 475 }, 476 Hostname: testHostName, 477 PortMap: map[string]int32{ 478 "vt": port, 479 }, 480 Keyspace: test.keyspace, 481 Shard: testShard, 482 } 483 got = f.IsIncluded(tabletReplacement) 484 if got != test.expected { 485 t.Errorf("isIncluded(%v) for keyspace %v returned %v but expected %v", test.keyspace, test.keyspace, got, test.expected) 486 } 487 if err := ts.CreateTablet(context.Background(), tabletReplacement); err != nil { 488 t.Errorf("CreateTablet failed: %v", err) 489 } 490 491 tw.loadTablets() 492 key = TabletToMapKey(tabletReplacement) 493 allTablets = hc.GetAllTablets() 494 495 if _, ok := allTablets[key]; ok != test.expected && proto.Equal(allTablets[key], tabletReplacement) != test.expected { 496 t.Errorf("Error replacing tablet - got %v; want %v", ok, test.expected) 497 } 498 499 // Delete the tablet 500 if err := ts.DeleteTablet(context.Background(), tabletReplacement.Alias); err != nil { 501 t.Fatalf("DeleteTablet failed: %v", err) 502 } 503 } 504 } 505 506 // TestFilterByKeypsaceSkipsIgnoredTablets confirms a bug fix for the case when a TopologyWatcher 507 // has a FilterByKeyspace TabletFilter configured along with refreshKnownTablets turned off. We want 508 // to ensure that the TopologyWatcher: 509 // - does not continuosly call GetTablets for tablets that do not satisfy the filter 510 // - does not add or remove these filtered out tablets from the its healtcheck 511 func TestFilterByKeypsaceSkipsIgnoredTablets(t *testing.T) { 512 ts := memorytopo.NewServer("aa") 513 fhc := NewFakeHealthCheck(nil) 514 topologyWatcherOperations.ZeroAll() 515 counts := topologyWatcherOperations.Counts() 516 f := NewFilterByKeyspace(testKeyspacesToWatch) 517 tw := NewCellTabletsWatcher(context.Background(), ts, fhc, f, "aa", 10*time.Minute, false /*refreshKnownTablets*/, 5) 518 519 counts = checkOpCounts(t, counts, map[string]int64{}) 520 checkChecksum(t, tw, 0) 521 522 // Add a tablet from a tracked keyspace to the topology. 523 tablet := &topodatapb.Tablet{ 524 Alias: &topodatapb.TabletAlias{ 525 Cell: "aa", 526 Uid: 0, 527 }, 528 Hostname: "host1", 529 PortMap: map[string]int32{ 530 "vt": 123, 531 }, 532 Keyspace: "ks1", 533 Shard: "shard", 534 } 535 require.NoError(t, ts.CreateTablet(context.Background(), tablet)) 536 537 tw.loadTablets() 538 counts = checkOpCounts(t, counts, map[string]int64{"ListTablets": 1, "GetTablet": 1, "AddTablet": 1}) 539 checkChecksum(t, tw, 3238442862) 540 541 // Check tablet is reported by HealthCheck 542 allTablets := fhc.GetAllTablets() 543 key := TabletToMapKey(tablet) 544 assert.Contains(t, allTablets, key) 545 assert.True(t, proto.Equal(tablet, allTablets[key])) 546 547 // Add a second tablet to the topology that should get filtered out by the keyspace filter 548 tablet2 := &topodatapb.Tablet{ 549 Alias: &topodatapb.TabletAlias{ 550 Cell: "aa", 551 Uid: 2, 552 }, 553 Hostname: "host2", 554 PortMap: map[string]int32{ 555 "vt": 789, 556 }, 557 Keyspace: "ks3", 558 Shard: "shard", 559 } 560 require.NoError(t, ts.CreateTablet(context.Background(), tablet2)) 561 562 tw.loadTablets() 563 counts = checkOpCounts(t, counts, map[string]int64{"ListTablets": 1, "GetTablet": 1}) 564 checkChecksum(t, tw, 2762153755) 565 566 // Check the new tablet is NOT reported by HealthCheck. 567 allTablets = fhc.GetAllTablets() 568 assert.Len(t, allTablets, 1) 569 key = TabletToMapKey(tablet2) 570 assert.NotContains(t, allTablets, key) 571 572 // Load the tablets again to show that when refreshKnownTablets is disabled, 573 // only the list is read from the topo and the checksum doesn't change 574 tw.loadTablets() 575 counts = checkOpCounts(t, counts, map[string]int64{"ListTablets": 1}) 576 checkChecksum(t, tw, 2762153755) 577 578 // With refreshKnownTablets set to false, changes to the port map for the same tablet alias 579 // should not be reflected in the HealtCheck state 580 _, err := ts.UpdateTabletFields(context.Background(), tablet.Alias, func(t *topodatapb.Tablet) error { 581 t.PortMap["vt"] = 456 582 return nil 583 }) 584 require.NoError(t, err) 585 586 tw.loadTablets() 587 counts = checkOpCounts(t, counts, map[string]int64{"ListTablets": 1}) 588 checkChecksum(t, tw, 2762153755) 589 590 allTablets = fhc.GetAllTablets() 591 assert.Len(t, allTablets, 1) 592 origKey := TabletToMapKey(tablet) 593 tabletWithNewPort := proto.Clone(tablet).(*topodatapb.Tablet) 594 tabletWithNewPort.PortMap["vt"] = 456 595 keyWithNewPort := TabletToMapKey(tabletWithNewPort) 596 assert.Contains(t, allTablets, origKey) 597 assert.NotContains(t, allTablets, keyWithNewPort) 598 599 // Remove the tracked tablet from the topo and check that it is detected as being gone. 600 require.NoError(t, ts.DeleteTablet(context.Background(), tablet.Alias)) 601 602 tw.loadTablets() 603 counts = checkOpCounts(t, counts, map[string]int64{"ListTablets": 1, "RemoveTablet": 1}) 604 checkChecksum(t, tw, 789108290) 605 assert.Empty(t, fhc.GetAllTablets()) 606 607 // Remove ignored tablet and check that we didn't try to remove it from the health check 608 require.NoError(t, ts.DeleteTablet(context.Background(), tablet2.Alias)) 609 610 tw.loadTablets() 611 checkOpCounts(t, counts, map[string]int64{"ListTablets": 1}) 612 checkChecksum(t, tw, 0) 613 assert.Empty(t, fhc.GetAllTablets()) 614 615 tw.Stop() 616 }