vitess.io/vitess@v0.16.2/go/vt/vtctl/grpcvtctldserver/server_slow_test.go (about) 1 /* 2 Copyright 2021 The Vitess Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package grpcvtctldserver 18 19 import ( 20 "context" 21 "testing" 22 "time" 23 24 "github.com/stretchr/testify/assert" 25 "github.com/stretchr/testify/require" 26 27 "vitess.io/vitess/go/mysql" 28 "vitess.io/vitess/go/protoutil" 29 "vitess.io/vitess/go/vt/topo" 30 "vitess.io/vitess/go/vt/topo/memorytopo" 31 "vitess.io/vitess/go/vt/vtctl/grpcvtctldserver/testutil" 32 "vitess.io/vitess/go/vt/vttablet/tmclient" 33 34 replicationdatapb "vitess.io/vitess/go/vt/proto/replicationdata" 35 topodatapb "vitess.io/vitess/go/vt/proto/topodata" 36 vtctldatapb "vitess.io/vitess/go/vt/proto/vtctldata" 37 vtctlservicepb "vitess.io/vitess/go/vt/proto/vtctlservice" 38 "vitess.io/vitess/go/vt/proto/vttime" 39 ) 40 41 func TestEmergencyReparentShardSlow(t *testing.T) { 42 t.Parallel() 43 44 tests := []struct { 45 name string 46 ts *topo.Server 47 tmc tmclient.TabletManagerClient 48 tablets []*topodatapb.Tablet 49 50 req *vtctldatapb.EmergencyReparentShardRequest 51 expected *vtctldatapb.EmergencyReparentShardResponse 52 expectEventsToOccur bool 53 shouldErr bool 54 }{ 55 { 56 // Note: this test case and the one below combine to assert that a 57 // nil WaitReplicasTimeout in the request results in a default 30 58 // second WaitReplicasTimeout. 59 // 60 // They are also very slow, because they require waiting 29 seconds 61 // and 30 seconds, respectively. Fortunately, we can run them 62 // concurrently, so the total time is only around 30 seconds, but 63 // that's still a long time for a unit test! 64 name: "nil WaitReplicasTimeout and request takes 29 seconds is ok", 65 ts: memorytopo.NewServer("zone1"), 66 tablets: []*topodatapb.Tablet{ 67 { 68 Alias: &topodatapb.TabletAlias{ 69 Cell: "zone1", 70 Uid: 100, 71 }, 72 Type: topodatapb.TabletType_PRIMARY, 73 PrimaryTermStartTime: &vttime.Time{ 74 Seconds: 100, 75 }, 76 Keyspace: "testkeyspace", 77 Shard: "-", 78 }, 79 { 80 Alias: &topodatapb.TabletAlias{ 81 Cell: "zone1", 82 Uid: 200, 83 }, 84 Type: topodatapb.TabletType_REPLICA, 85 Keyspace: "testkeyspace", 86 Shard: "-", 87 }, 88 { 89 Alias: &topodatapb.TabletAlias{ 90 Cell: "zone1", 91 Uid: 101, 92 }, 93 Type: topodatapb.TabletType_RDONLY, 94 Keyspace: "testkeyspace", 95 Shard: "-", 96 }, 97 }, 98 tmc: &testutil.TabletManagerClient{ 99 DemotePrimaryResults: map[string]struct { 100 Status *replicationdatapb.PrimaryStatus 101 Error error 102 }{ 103 "zone1-0000000100": { 104 Status: &replicationdatapb.PrimaryStatus{ 105 Position: "MySQL56/3E11FA47-71CA-11E1-9E33-C80AA9429562:1-5", 106 }, 107 }, 108 }, 109 PopulateReparentJournalDelays: map[string]time.Duration{ 110 "zone1-0000000200": time.Second * 29, 111 }, 112 PopulateReparentJournalResults: map[string]error{ 113 "zone1-0000000200": nil, 114 }, 115 PromoteReplicaResults: map[string]struct { 116 Result string 117 Error error 118 }{ 119 "zone1-0000000200": {}, 120 }, 121 PrimaryPositionResults: map[string]struct { 122 Position string 123 Error error 124 }{ 125 "zone1-0000000200": {}, 126 }, 127 SetReplicationSourceResults: map[string]error{ 128 "zone1-0000000100": nil, 129 "zone1-0000000101": nil, 130 }, 131 StopReplicationAndGetStatusResults: map[string]struct { 132 StopStatus *replicationdatapb.StopReplicationStatus 133 Error error 134 }{ 135 "zone1-0000000100": { 136 Error: mysql.ErrNotReplica, 137 }, 138 "zone1-0000000101": { 139 Error: assert.AnError, 140 }, 141 "zone1-0000000200": { 142 StopStatus: &replicationdatapb.StopReplicationStatus{ 143 Before: &replicationdatapb.Status{IoState: int32(mysql.ReplicationStateRunning), SqlState: int32(mysql.ReplicationStateRunning)}, 144 After: &replicationdatapb.Status{ 145 SourceUuid: "3E11FA47-71CA-11E1-9E33-C80AA9429562", 146 RelayLogPosition: "MySQL56/3E11FA47-71CA-11E1-9E33-C80AA9429562:1-5", 147 Position: "MySQL56/3E11FA47-71CA-11E1-9E33-C80AA9429562:1-5", 148 }, 149 }, 150 }, 151 }, 152 WaitForPositionResults: map[string]map[string]error{ 153 "zone1-0000000100": { 154 "MySQL56/3E11FA47-71CA-11E1-9E33-C80AA9429562:1-5": nil, 155 }, 156 "zone1-0000000200": { 157 "MySQL56/3E11FA47-71CA-11E1-9E33-C80AA9429562:1-5": nil, 158 }, 159 }, 160 }, 161 req: &vtctldatapb.EmergencyReparentShardRequest{ 162 Keyspace: "testkeyspace", 163 Shard: "-", 164 NewPrimary: &topodatapb.TabletAlias{ 165 Cell: "zone1", 166 Uid: 200, 167 }, 168 WaitReplicasTimeout: nil, 169 }, 170 expected: &vtctldatapb.EmergencyReparentShardResponse{ 171 Keyspace: "testkeyspace", 172 Shard: "-", 173 PromotedPrimary: &topodatapb.TabletAlias{ 174 Cell: "zone1", 175 Uid: 200, 176 }, 177 }, 178 expectEventsToOccur: true, 179 shouldErr: false, 180 }, 181 { 182 name: "nil WaitReplicasTimeout and request takes 31 seconds is error", 183 ts: memorytopo.NewServer("zone1"), 184 tablets: []*topodatapb.Tablet{ 185 { 186 Alias: &topodatapb.TabletAlias{ 187 Cell: "zone1", 188 Uid: 100, 189 }, 190 Type: topodatapb.TabletType_PRIMARY, 191 PrimaryTermStartTime: &vttime.Time{ 192 Seconds: 100, 193 }, 194 Keyspace: "testkeyspace", 195 Shard: "-", 196 }, 197 { 198 Alias: &topodatapb.TabletAlias{ 199 Cell: "zone1", 200 Uid: 200, 201 }, 202 Type: topodatapb.TabletType_REPLICA, 203 Keyspace: "testkeyspace", 204 Shard: "-", 205 }, 206 { 207 Alias: &topodatapb.TabletAlias{ 208 Cell: "zone1", 209 Uid: 101, 210 }, 211 Type: topodatapb.TabletType_RDONLY, 212 Keyspace: "testkeyspace", 213 Shard: "-", 214 }, 215 }, 216 tmc: &testutil.TabletManagerClient{ 217 DemotePrimaryResults: map[string]struct { 218 Status *replicationdatapb.PrimaryStatus 219 Error error 220 }{ 221 "zone1-0000000100": { 222 Status: &replicationdatapb.PrimaryStatus{ 223 Position: "MySQL56/3E11FA47-71CA-11E1-9E33-C80AA9429562:1-5", 224 }, 225 }, 226 }, 227 PopulateReparentJournalDelays: map[string]time.Duration{ 228 "zone1-0000000200": time.Second * 31, 229 }, 230 PopulateReparentJournalResults: map[string]error{ 231 "zone1-0000000200": nil, 232 }, 233 PromoteReplicaResults: map[string]struct { 234 Result string 235 Error error 236 }{ 237 "zone1-0000000200": {}, 238 }, 239 PrimaryPositionResults: map[string]struct { 240 Position string 241 Error error 242 }{ 243 "zone1-0000000200": {}, 244 }, 245 SetReplicationSourceResults: map[string]error{ 246 "zone1-0000000100": nil, 247 "zone1-0000000101": nil, 248 }, 249 StopReplicationAndGetStatusResults: map[string]struct { 250 StopStatus *replicationdatapb.StopReplicationStatus 251 Error error 252 }{ 253 "zone1-0000000100": { 254 Error: mysql.ErrNotReplica, 255 }, 256 "zone1-0000000101": { 257 Error: assert.AnError, 258 }, 259 "zone1-0000000200": { 260 StopStatus: &replicationdatapb.StopReplicationStatus{ 261 Before: &replicationdatapb.Status{IoState: int32(mysql.ReplicationStateRunning), SqlState: int32(mysql.ReplicationStateRunning)}, 262 After: &replicationdatapb.Status{ 263 SourceUuid: "3E11FA47-71CA-11E1-9E33-C80AA9429562", 264 RelayLogPosition: "MySQL56/3E11FA47-71CA-11E1-9E33-C80AA9429562:1-5", 265 Position: "MySQL56/3E11FA47-71CA-11E1-9E33-C80AA9429562:1-5", 266 }, 267 }, 268 }, 269 }, 270 WaitForPositionResults: map[string]map[string]error{ 271 "zone1-0000000100": { 272 "MySQL56/3E11FA47-71CA-11E1-9E33-C80AA9429562:1-5": nil, 273 }, 274 "zone1-0000000200": { 275 "MySQL56/3E11FA47-71CA-11E1-9E33-C80AA9429562:1-5": nil, 276 }, 277 }, 278 }, 279 req: &vtctldatapb.EmergencyReparentShardRequest{ 280 Keyspace: "testkeyspace", 281 Shard: "-", 282 NewPrimary: &topodatapb.TabletAlias{ 283 Cell: "zone1", 284 Uid: 200, 285 }, 286 WaitReplicasTimeout: nil, 287 }, 288 expectEventsToOccur: true, 289 shouldErr: true, 290 }, 291 } 292 293 ctx := context.Background() 294 295 for _, tt := range tests { 296 tt := tt 297 298 t.Run(tt.name, func(t *testing.T) { 299 t.Parallel() 300 301 if tt.req == nil { 302 t.Skip("tt.EmergencyReparentShardRequest = nil implies test not ready to run") 303 } 304 305 testutil.AddTablets(ctx, t, tt.ts, &testutil.AddTabletOptions{ 306 AlsoSetShardPrimary: true, 307 ForceSetShardPrimary: true, 308 SkipShardCreation: false, 309 }, tt.tablets...) 310 311 vtctld := testutil.NewVtctldServerWithTabletManagerClient(t, tt.ts, tt.tmc, func(ts *topo.Server) vtctlservicepb.VtctldServer { 312 return NewVtctldServer(ts) 313 }) 314 resp, err := vtctld.EmergencyReparentShard(ctx, tt.req) 315 316 // We defer this because we want to check in both error and non- 317 // error cases, but after the main set of assertions for those 318 // cases. 319 defer func() { 320 if !tt.expectEventsToOccur { 321 testutil.AssertNoLogutilEventsOccurred(t, resp, "expected no events to occur during ERS") 322 323 return 324 } 325 326 testutil.AssertLogutilEventsOccurred(t, resp, "expected events to occur during ERS") 327 }() 328 329 if tt.shouldErr { 330 assert.Error(t, err) 331 332 return 333 } 334 335 assert.NoError(t, err) 336 testutil.AssertEmergencyReparentShardResponsesEqual(t, tt.expected, resp) 337 }) 338 } 339 } 340 341 func TestPlannedReparentShardSlow(t *testing.T) { 342 t.Parallel() 343 344 tests := []struct { 345 name string 346 ts *topo.Server 347 tmc tmclient.TabletManagerClient 348 tablets []*topodatapb.Tablet 349 350 req *vtctldatapb.PlannedReparentShardRequest 351 expected *vtctldatapb.PlannedReparentShardResponse 352 expectEventsToOccur bool 353 shouldErr bool 354 }{ 355 { 356 // Note: this test case and the one below combine to assert that a 357 // nil WaitReplicasTimeout in the request results in a default 30 358 // second WaitReplicasTimeout. 359 name: "nil WaitReplicasTimeout and request takes 29 seconds is ok", 360 ts: memorytopo.NewServer("zone1"), 361 tablets: []*topodatapb.Tablet{ 362 { 363 Alias: &topodatapb.TabletAlias{ 364 Cell: "zone1", 365 Uid: 100, 366 }, 367 Type: topodatapb.TabletType_PRIMARY, 368 PrimaryTermStartTime: &vttime.Time{ 369 Seconds: 100, 370 }, 371 Keyspace: "testkeyspace", 372 Shard: "-", 373 }, 374 { 375 Alias: &topodatapb.TabletAlias{ 376 Cell: "zone1", 377 Uid: 200, 378 }, 379 Type: topodatapb.TabletType_REPLICA, 380 Keyspace: "testkeyspace", 381 Shard: "-", 382 }, 383 { 384 Alias: &topodatapb.TabletAlias{ 385 Cell: "zone1", 386 Uid: 101, 387 }, 388 Type: topodatapb.TabletType_RDONLY, 389 Keyspace: "testkeyspace", 390 Shard: "-", 391 }, 392 }, 393 tmc: &testutil.TabletManagerClient{ 394 DemotePrimaryResults: map[string]struct { 395 Status *replicationdatapb.PrimaryStatus 396 Error error 397 }{ 398 "zone1-0000000100": { 399 Status: &replicationdatapb.PrimaryStatus{ 400 Position: "primary-demotion position", 401 }, 402 Error: nil, 403 }, 404 }, 405 PrimaryPositionResults: map[string]struct { 406 Position string 407 Error error 408 }{ 409 "zone1-0000000100": { 410 Position: "doesn't matter", 411 Error: nil, 412 }, 413 }, 414 PopulateReparentJournalResults: map[string]error{ 415 "zone1-0000000200": nil, 416 }, 417 PromoteReplicaPostDelays: map[string]time.Duration{ 418 "zone1-0000000200": time.Second * 28, 419 }, 420 PromoteReplicaResults: map[string]struct { 421 Result string 422 Error error 423 }{ 424 "zone1-0000000200": { 425 Result: "promotion position", 426 Error: nil, 427 }, 428 }, 429 SetReplicationSourceResults: map[string]error{ 430 "zone1-0000000200": nil, // waiting for primary-position during promotion 431 // reparent SetReplicationSource calls 432 "zone1-0000000100": nil, 433 "zone1-0000000101": nil, 434 }, 435 WaitForPositionResults: map[string]map[string]error{ 436 "zone1-0000000200": { 437 "primary-demotion position": nil, 438 }, 439 }, 440 }, 441 req: &vtctldatapb.PlannedReparentShardRequest{ 442 Keyspace: "testkeyspace", 443 Shard: "-", 444 NewPrimary: &topodatapb.TabletAlias{ 445 Cell: "zone1", 446 Uid: 200, 447 }, 448 WaitReplicasTimeout: nil, 449 }, 450 expected: &vtctldatapb.PlannedReparentShardResponse{ 451 Keyspace: "testkeyspace", 452 Shard: "-", 453 PromotedPrimary: &topodatapb.TabletAlias{ 454 Cell: "zone1", 455 Uid: 200, 456 }, 457 }, 458 expectEventsToOccur: true, 459 shouldErr: false, 460 }, 461 { 462 name: "nil WaitReplicasTimeout and request takes 31 seconds is error", 463 ts: memorytopo.NewServer("zone1"), 464 tablets: []*topodatapb.Tablet{ 465 { 466 Alias: &topodatapb.TabletAlias{ 467 Cell: "zone1", 468 Uid: 100, 469 }, 470 Type: topodatapb.TabletType_PRIMARY, 471 PrimaryTermStartTime: &vttime.Time{ 472 Seconds: 100, 473 }, 474 Keyspace: "testkeyspace", 475 Shard: "-", 476 }, 477 { 478 Alias: &topodatapb.TabletAlias{ 479 Cell: "zone1", 480 Uid: 200, 481 }, 482 Type: topodatapb.TabletType_REPLICA, 483 Keyspace: "testkeyspace", 484 Shard: "-", 485 }, 486 { 487 Alias: &topodatapb.TabletAlias{ 488 Cell: "zone1", 489 Uid: 101, 490 }, 491 Type: topodatapb.TabletType_RDONLY, 492 Keyspace: "testkeyspace", 493 Shard: "-", 494 }, 495 }, 496 tmc: &testutil.TabletManagerClient{ 497 DemotePrimaryResults: map[string]struct { 498 Status *replicationdatapb.PrimaryStatus 499 Error error 500 }{ 501 "zone1-0000000100": { 502 Status: &replicationdatapb.PrimaryStatus{ 503 Position: "primary-demotion position", 504 }, 505 Error: nil, 506 }, 507 }, 508 PrimaryPositionResults: map[string]struct { 509 Position string 510 Error error 511 }{ 512 "zone1-0000000100": { 513 Position: "doesn't matter", 514 Error: nil, 515 }, 516 }, 517 PopulateReparentJournalResults: map[string]error{ 518 "zone1-0000000200": nil, 519 }, 520 PromoteReplicaPostDelays: map[string]time.Duration{ 521 "zone1-0000000200": time.Second * 30, 522 }, 523 PromoteReplicaResults: map[string]struct { 524 Result string 525 Error error 526 }{ 527 "zone1-0000000200": { 528 Result: "promotion position", 529 Error: nil, 530 }, 531 }, 532 SetReplicationSourceResults: map[string]error{ 533 "zone1-0000000200": nil, // waiting for primary-position during promotion 534 // reparent SetReplicationSource calls 535 "zone1-0000000100": nil, 536 "zone1-0000000101": nil, 537 }, 538 WaitForPositionResults: map[string]map[string]error{ 539 "zone1-0000000200": { 540 "primary-demotion position": nil, 541 }, 542 }, 543 }, 544 req: &vtctldatapb.PlannedReparentShardRequest{ 545 Keyspace: "testkeyspace", 546 Shard: "-", 547 NewPrimary: &topodatapb.TabletAlias{ 548 Cell: "zone1", 549 Uid: 200, 550 }, 551 WaitReplicasTimeout: nil, 552 }, 553 expected: &vtctldatapb.PlannedReparentShardResponse{ 554 Keyspace: "testkeyspace", 555 Shard: "-", 556 PromotedPrimary: &topodatapb.TabletAlias{ 557 Cell: "zone1", 558 Uid: 200, 559 }, 560 }, 561 expectEventsToOccur: true, 562 shouldErr: false, 563 }, 564 } 565 566 ctx := context.Background() 567 568 for _, tt := range tests { 569 tt := tt 570 571 t.Run(tt.name, func(t *testing.T) { 572 t.Parallel() 573 574 testutil.AddTablets(ctx, t, tt.ts, &testutil.AddTabletOptions{ 575 AlsoSetShardPrimary: true, 576 ForceSetShardPrimary: true, 577 SkipShardCreation: false, 578 }, tt.tablets...) 579 580 vtctld := testutil.NewVtctldServerWithTabletManagerClient(t, tt.ts, tt.tmc, func(ts *topo.Server) vtctlservicepb.VtctldServer { 581 return NewVtctldServer(ts) 582 }) 583 resp, err := vtctld.PlannedReparentShard(ctx, tt.req) 584 585 // We defer this because we want to check in both error and non- 586 // error cases, but after the main set of assertions for those 587 // cases. 588 defer func() { 589 if !tt.expectEventsToOccur { 590 testutil.AssertNoLogutilEventsOccurred(t, resp, "expected no events to occur during ERS") 591 592 return 593 } 594 595 testutil.AssertLogutilEventsOccurred(t, resp, "expected events to occur during ERS") 596 }() 597 598 if tt.shouldErr { 599 assert.Error(t, err) 600 601 return 602 } 603 604 assert.NoError(t, err) 605 testutil.AssertPlannedReparentShardResponsesEqual(t, tt.expected, resp) 606 }) 607 } 608 } 609 610 func TestSleepTablet(t *testing.T) { 611 t.Parallel() 612 613 ctx := context.Background() 614 ts := memorytopo.NewServer("zone1") 615 testutil.AddTablet(ctx, t, ts, &topodatapb.Tablet{ 616 Alias: &topodatapb.TabletAlias{ 617 Cell: "zone1", 618 Uid: 100, 619 }, 620 Keyspace: "testkeyspace", 621 Shard: "-", 622 }, nil) 623 624 tests := []struct { 625 name string 626 tmc testutil.TabletManagerClient 627 req *vtctldatapb.SleepTabletRequest 628 expected *vtctldatapb.SleepTabletResponse 629 shouldErr bool 630 }{ 631 { 632 name: "ok", 633 tmc: testutil.TabletManagerClient{ 634 SleepResults: map[string]error{ 635 "zone1-0000000100": nil, 636 }, 637 }, 638 req: &vtctldatapb.SleepTabletRequest{ 639 TabletAlias: &topodatapb.TabletAlias{ 640 Cell: "zone1", 641 Uid: 100, 642 }, 643 Duration: protoutil.DurationToProto(time.Millisecond), 644 }, 645 expected: &vtctldatapb.SleepTabletResponse{}, 646 }, 647 { 648 name: "default sleep duration", // this is the slowest test case, and takes 30 seconds. comment this out to go faster. 649 tmc: testutil.TabletManagerClient{ 650 SleepResults: map[string]error{ 651 "zone1-0000000100": nil, 652 }, 653 }, 654 req: &vtctldatapb.SleepTabletRequest{ 655 TabletAlias: &topodatapb.TabletAlias{ 656 Cell: "zone1", 657 Uid: 100, 658 }, 659 }, 660 expected: &vtctldatapb.SleepTabletResponse{}, 661 }, 662 { 663 name: "tablet not found", 664 tmc: testutil.TabletManagerClient{ 665 SleepResults: map[string]error{ 666 "zone1-0000000100": nil, 667 }, 668 }, 669 req: &vtctldatapb.SleepTabletRequest{ 670 TabletAlias: &topodatapb.TabletAlias{ 671 Cell: "zone2", 672 Uid: 404, 673 }, 674 }, 675 shouldErr: true, 676 }, 677 { 678 name: "sleep rpc error", 679 tmc: testutil.TabletManagerClient{ 680 SleepResults: map[string]error{ 681 "zone1-0000000100": assert.AnError, 682 }, 683 }, 684 req: &vtctldatapb.SleepTabletRequest{ 685 TabletAlias: &topodatapb.TabletAlias{ 686 Cell: "zone1", 687 Uid: 100, 688 }, 689 Duration: protoutil.DurationToProto(time.Millisecond), 690 }, 691 shouldErr: true, 692 }, 693 } 694 695 expectedDur := func(t *testing.T, in *vttime.Duration, defaultDur time.Duration) time.Duration { 696 dur, ok, err := protoutil.DurationFromProto(in) 697 require.NoError(t, err) 698 699 if !ok { 700 return defaultDur 701 } 702 703 return dur 704 } 705 706 for _, tt := range tests { 707 tt := tt 708 t.Run(tt.name, func(t *testing.T) { 709 t.Parallel() 710 711 vtctld := testutil.NewVtctldServerWithTabletManagerClient(t, ts, &tt.tmc, func(ts *topo.Server) vtctlservicepb.VtctldServer { 712 return NewVtctldServer(ts) 713 }) 714 715 start := time.Now() 716 resp, err := vtctld.SleepTablet(ctx, tt.req) 717 sleepDur := time.Since(start) 718 if tt.shouldErr { 719 assert.Error(t, err) 720 assert.Nil(t, resp) 721 return 722 } 723 724 require.NoError(t, err) 725 assert.Equal(t, tt.expected, resp) 726 dur := expectedDur(t, tt.req.Duration, topo.RemoteOperationTimeout) 727 assert.LessOrEqual(t, dur, sleepDur, "sleep should have taken at least %v; took %v", dur, sleepDur) 728 }) 729 } 730 }