vitess.io/vitess@v0.16.2/go/vt/vttablet/tabletserver/state_manager_test.go (about) 1 /* 2 Copyright 2020 The Vitess Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package tabletserver 18 19 import ( 20 "context" 21 "errors" 22 "sync" 23 "testing" 24 "time" 25 26 "google.golang.org/protobuf/proto" 27 28 "github.com/stretchr/testify/assert" 29 "github.com/stretchr/testify/require" 30 31 "vitess.io/vitess/go/mysql/fakesqldb" 32 33 "vitess.io/vitess/go/sync2" 34 "vitess.io/vitess/go/vt/log" 35 querypb "vitess.io/vitess/go/vt/proto/query" 36 topodatapb "vitess.io/vitess/go/vt/proto/topodata" 37 "vitess.io/vitess/go/vt/vttablet/tabletserver/tabletenv" 38 ) 39 40 var testNow = time.Now() 41 42 func TestStateManagerStateByName(t *testing.T) { 43 sm := &stateManager{} 44 45 sm.replHealthy = true 46 sm.wantState = StateServing 47 sm.state = StateNotConnected 48 assert.Equal(t, "NOT_SERVING", sm.IsServingString()) 49 50 sm.state = StateNotServing 51 assert.Equal(t, "NOT_SERVING", sm.IsServingString()) 52 53 sm.state = StateServing 54 assert.Equal(t, "SERVING", sm.IsServingString()) 55 56 sm.wantState = StateNotServing 57 assert.Equal(t, "NOT_SERVING", sm.IsServingString()) 58 sm.wantState = StateServing 59 60 sm.EnterLameduck() 61 assert.Equal(t, "NOT_SERVING", sm.IsServingString()) 62 sm.ExitLameduck() 63 64 sm.replHealthy = false 65 assert.Equal(t, "NOT_SERVING", sm.IsServingString()) 66 } 67 68 func TestStateManagerServePrimary(t *testing.T) { 69 sm := newTestStateManager(t) 70 defer sm.StopService() 71 sm.EnterLameduck() 72 err := sm.SetServingType(topodatapb.TabletType_PRIMARY, testNow, StateServing, "") 73 require.NoError(t, err) 74 75 assert.Equal(t, false, sm.lameduck) 76 assert.Equal(t, testNow, sm.terTimestamp) 77 78 verifySubcomponent(t, 1, sm.watcher, testStateClosed) 79 80 verifySubcomponent(t, 2, sm.se, testStateOpen) 81 verifySubcomponent(t, 3, sm.vstreamer, testStateOpen) 82 verifySubcomponent(t, 4, sm.qe, testStateOpen) 83 verifySubcomponent(t, 5, sm.txThrottler, testStateOpen) 84 verifySubcomponent(t, 6, sm.rt, testStatePrimary) 85 verifySubcomponent(t, 7, sm.tracker, testStateOpen) 86 verifySubcomponent(t, 8, sm.te, testStatePrimary) 87 verifySubcomponent(t, 9, sm.messager, testStateOpen) 88 verifySubcomponent(t, 10, sm.throttler, testStateOpen) 89 verifySubcomponent(t, 11, sm.tableGC, testStateOpen) 90 verifySubcomponent(t, 12, sm.ddle, testStateOpen) 91 92 assert.False(t, sm.se.(*testSchemaEngine).nonPrimary) 93 assert.True(t, sm.se.(*testSchemaEngine).ensureCalled) 94 95 assert.Equal(t, topodatapb.TabletType_PRIMARY, sm.target.TabletType) 96 assert.Equal(t, StateServing, sm.state) 97 } 98 99 func TestStateManagerServeNonPrimary(t *testing.T) { 100 sm := newTestStateManager(t) 101 defer sm.StopService() 102 err := sm.SetServingType(topodatapb.TabletType_REPLICA, testNow, StateServing, "") 103 require.NoError(t, err) 104 105 verifySubcomponent(t, 1, sm.ddle, testStateClosed) 106 verifySubcomponent(t, 2, sm.tableGC, testStateClosed) 107 verifySubcomponent(t, 3, sm.messager, testStateClosed) 108 verifySubcomponent(t, 4, sm.tracker, testStateClosed) 109 assert.True(t, sm.se.(*testSchemaEngine).nonPrimary) 110 111 verifySubcomponent(t, 5, sm.se, testStateOpen) 112 verifySubcomponent(t, 6, sm.vstreamer, testStateOpen) 113 verifySubcomponent(t, 7, sm.qe, testStateOpen) 114 verifySubcomponent(t, 8, sm.txThrottler, testStateOpen) 115 verifySubcomponent(t, 9, sm.te, testStateNonPrimary) 116 verifySubcomponent(t, 10, sm.rt, testStateNonPrimary) 117 verifySubcomponent(t, 11, sm.watcher, testStateOpen) 118 verifySubcomponent(t, 12, sm.throttler, testStateOpen) 119 120 assert.Equal(t, topodatapb.TabletType_REPLICA, sm.target.TabletType) 121 assert.Equal(t, StateServing, sm.state) 122 } 123 124 func TestStateManagerUnservePrimary(t *testing.T) { 125 sm := newTestStateManager(t) 126 defer sm.StopService() 127 err := sm.SetServingType(topodatapb.TabletType_PRIMARY, testNow, StateNotServing, "") 128 require.NoError(t, err) 129 130 verifySubcomponent(t, 1, sm.ddle, testStateClosed) 131 verifySubcomponent(t, 2, sm.tableGC, testStateClosed) 132 verifySubcomponent(t, 3, sm.throttler, testStateClosed) 133 verifySubcomponent(t, 4, sm.messager, testStateClosed) 134 verifySubcomponent(t, 5, sm.te, testStateClosed) 135 136 verifySubcomponent(t, 6, sm.tracker, testStateClosed) 137 verifySubcomponent(t, 7, sm.watcher, testStateClosed) 138 verifySubcomponent(t, 8, sm.se, testStateOpen) 139 verifySubcomponent(t, 9, sm.vstreamer, testStateOpen) 140 verifySubcomponent(t, 10, sm.qe, testStateOpen) 141 verifySubcomponent(t, 11, sm.txThrottler, testStateOpen) 142 143 verifySubcomponent(t, 12, sm.rt, testStatePrimary) 144 145 assert.Equal(t, topodatapb.TabletType_PRIMARY, sm.target.TabletType) 146 assert.Equal(t, StateNotServing, sm.state) 147 } 148 149 func TestStateManagerUnserveNonPrimary(t *testing.T) { 150 sm := newTestStateManager(t) 151 defer sm.StopService() 152 err := sm.SetServingType(topodatapb.TabletType_RDONLY, testNow, StateNotServing, "") 153 require.NoError(t, err) 154 155 verifySubcomponent(t, 1, sm.ddle, testStateClosed) 156 verifySubcomponent(t, 2, sm.tableGC, testStateClosed) 157 verifySubcomponent(t, 3, sm.throttler, testStateClosed) 158 verifySubcomponent(t, 4, sm.messager, testStateClosed) 159 verifySubcomponent(t, 5, sm.te, testStateClosed) 160 161 verifySubcomponent(t, 6, sm.tracker, testStateClosed) 162 assert.True(t, sm.se.(*testSchemaEngine).nonPrimary) 163 164 verifySubcomponent(t, 7, sm.se, testStateOpen) 165 verifySubcomponent(t, 8, sm.vstreamer, testStateOpen) 166 verifySubcomponent(t, 9, sm.qe, testStateOpen) 167 verifySubcomponent(t, 10, sm.txThrottler, testStateOpen) 168 169 verifySubcomponent(t, 11, sm.rt, testStateNonPrimary) 170 verifySubcomponent(t, 12, sm.watcher, testStateOpen) 171 172 assert.Equal(t, topodatapb.TabletType_RDONLY, sm.target.TabletType) 173 assert.Equal(t, StateNotServing, sm.state) 174 } 175 176 func TestStateManagerClose(t *testing.T) { 177 sm := newTestStateManager(t) 178 defer sm.StopService() 179 err := sm.SetServingType(topodatapb.TabletType_RDONLY, testNow, StateNotConnected, "") 180 require.NoError(t, err) 181 182 verifySubcomponent(t, 1, sm.ddle, testStateClosed) 183 verifySubcomponent(t, 2, sm.tableGC, testStateClosed) 184 verifySubcomponent(t, 3, sm.throttler, testStateClosed) 185 verifySubcomponent(t, 4, sm.messager, testStateClosed) 186 verifySubcomponent(t, 5, sm.te, testStateClosed) 187 verifySubcomponent(t, 6, sm.tracker, testStateClosed) 188 189 verifySubcomponent(t, 7, sm.txThrottler, testStateClosed) 190 verifySubcomponent(t, 8, sm.qe, testStateClosed) 191 verifySubcomponent(t, 9, sm.watcher, testStateClosed) 192 verifySubcomponent(t, 10, sm.vstreamer, testStateClosed) 193 verifySubcomponent(t, 11, sm.rt, testStateClosed) 194 verifySubcomponent(t, 12, sm.se, testStateClosed) 195 196 assert.Equal(t, topodatapb.TabletType_RDONLY, sm.target.TabletType) 197 assert.Equal(t, StateNotConnected, sm.state) 198 } 199 200 func TestStateManagerStopService(t *testing.T) { 201 sm := newTestStateManager(t) 202 defer sm.StopService() 203 err := sm.SetServingType(topodatapb.TabletType_REPLICA, testNow, StateServing, "") 204 require.NoError(t, err) 205 206 assert.Equal(t, topodatapb.TabletType_REPLICA, sm.target.TabletType) 207 assert.Equal(t, StateServing, sm.state) 208 209 sm.StopService() 210 assert.Equal(t, topodatapb.TabletType_REPLICA, sm.target.TabletType) 211 assert.Equal(t, StateNotConnected, sm.state) 212 } 213 214 func TestStateManagerGracePeriod(t *testing.T) { 215 sm := newTestStateManager(t) 216 defer sm.StopService() 217 sm.transitionGracePeriod = 10 * time.Millisecond 218 219 alsoAllow := func() topodatapb.TabletType { 220 sm.mu.Lock() 221 defer sm.mu.Unlock() 222 if len(sm.alsoAllow) == 0 { 223 return topodatapb.TabletType_UNKNOWN 224 } 225 return sm.alsoAllow[0] 226 } 227 228 err := sm.SetServingType(topodatapb.TabletType_REPLICA, testNow, StateServing, "") 229 require.NoError(t, err) 230 231 assert.Equal(t, topodatapb.TabletType_UNKNOWN, alsoAllow()) 232 assert.Equal(t, topodatapb.TabletType_REPLICA, sm.target.TabletType) 233 assert.Equal(t, StateServing, sm.state) 234 235 err = sm.SetServingType(topodatapb.TabletType_PRIMARY, testNow, StateServing, "") 236 require.NoError(t, err) 237 238 assert.Equal(t, topodatapb.TabletType_REPLICA, alsoAllow()) 239 assert.Equal(t, topodatapb.TabletType_PRIMARY, sm.target.TabletType) 240 assert.Equal(t, StateServing, sm.state) 241 242 time.Sleep(20 * time.Millisecond) 243 assert.Equal(t, topodatapb.TabletType_UNKNOWN, alsoAllow()) 244 } 245 246 // testWatcher is used as a hook to invoke another transition 247 type testWatcher struct { 248 t *testing.T 249 sm *stateManager 250 wg sync.WaitGroup 251 } 252 253 func (te *testWatcher) Open() { 254 } 255 256 func (te *testWatcher) Close() { 257 te.wg.Add(1) 258 go func() { 259 defer te.wg.Done() 260 261 err := te.sm.SetServingType(topodatapb.TabletType_RDONLY, testNow, StateNotServing, "") 262 assert.NoError(te.t, err) 263 }() 264 } 265 266 func TestStateManagerSetServingTypeRace(t *testing.T) { 267 // We don't call StopService because that in turn 268 // will call Close again on testWatcher. 269 sm := newTestStateManager(t) 270 te := &testWatcher{ 271 t: t, 272 sm: sm, 273 } 274 sm.watcher = te 275 err := sm.SetServingType(topodatapb.TabletType_PRIMARY, testNow, StateServing, "") 276 require.NoError(t, err) 277 278 // Ensure the next call waits and then succeeds. 279 te.wg.Wait() 280 281 // End state should be the final desired state. 282 assert.Equal(t, topodatapb.TabletType_RDONLY, sm.target.TabletType) 283 assert.Equal(t, StateNotServing, sm.state) 284 } 285 286 func TestStateManagerSetServingTypeNoChange(t *testing.T) { 287 log.Infof("starting") 288 sm := newTestStateManager(t) 289 defer sm.StopService() 290 err := sm.SetServingType(topodatapb.TabletType_REPLICA, testNow, StateServing, "") 291 require.NoError(t, err) 292 293 err = sm.SetServingType(topodatapb.TabletType_REPLICA, testNow, StateServing, "") 294 require.NoError(t, err) 295 296 verifySubcomponent(t, 1, sm.ddle, testStateClosed) 297 verifySubcomponent(t, 2, sm.tableGC, testStateClosed) 298 verifySubcomponent(t, 3, sm.messager, testStateClosed) 299 verifySubcomponent(t, 4, sm.tracker, testStateClosed) 300 assert.True(t, sm.se.(*testSchemaEngine).nonPrimary) 301 302 verifySubcomponent(t, 5, sm.se, testStateOpen) 303 verifySubcomponent(t, 6, sm.vstreamer, testStateOpen) 304 verifySubcomponent(t, 7, sm.qe, testStateOpen) 305 verifySubcomponent(t, 8, sm.txThrottler, testStateOpen) 306 verifySubcomponent(t, 9, sm.te, testStateNonPrimary) 307 verifySubcomponent(t, 10, sm.rt, testStateNonPrimary) 308 verifySubcomponent(t, 11, sm.watcher, testStateOpen) 309 verifySubcomponent(t, 12, sm.throttler, testStateOpen) 310 311 assert.Equal(t, topodatapb.TabletType_REPLICA, sm.target.TabletType) 312 assert.Equal(t, StateServing, sm.state) 313 } 314 315 func TestStateManagerTransitionFailRetry(t *testing.T) { 316 defer func(saved time.Duration) { transitionRetryInterval = saved }(transitionRetryInterval) 317 transitionRetryInterval = 10 * time.Millisecond 318 319 sm := newTestStateManager(t) 320 defer sm.StopService() 321 sm.se.(*testSchemaEngine).failMySQL = true 322 323 err := sm.SetServingType(topodatapb.TabletType_PRIMARY, testNow, StateServing, "") 324 require.Error(t, err) 325 326 // Calling retryTransition while retrying should be a no-op. 327 sm.retryTransition("") 328 329 // Steal the lock and wait long enough for the retry 330 // to fail, and then release it. The retry will have 331 // to keep retrying. 332 sm.transitioning.Acquire() 333 time.Sleep(30 * time.Millisecond) 334 sm.transitioning.Release() 335 336 for { 337 sm.mu.Lock() 338 retrying := sm.retrying 339 sm.mu.Unlock() 340 if !retrying { 341 break 342 } 343 time.Sleep(10 * time.Millisecond) 344 } 345 346 assert.Equal(t, topodatapb.TabletType_PRIMARY, sm.Target().TabletType) 347 assert.Equal(t, StateServing, sm.State()) 348 } 349 350 func TestStateManagerNotConnectedType(t *testing.T) { 351 sm := newTestStateManager(t) 352 defer sm.StopService() 353 sm.EnterLameduck() 354 err := sm.SetServingType(topodatapb.TabletType_RESTORE, testNow, StateNotServing, "") 355 require.NoError(t, err) 356 357 assert.Equal(t, topodatapb.TabletType_RESTORE, sm.target.TabletType) 358 assert.Equal(t, StateNotConnected, sm.state) 359 360 err = sm.SetServingType(topodatapb.TabletType_BACKUP, testNow, StateNotServing, "") 361 require.NoError(t, err) 362 363 assert.Equal(t, topodatapb.TabletType_BACKUP, sm.target.TabletType) 364 assert.Equal(t, StateNotConnected, sm.state) 365 } 366 367 type delayedTxEngine struct { 368 } 369 370 func (te *delayedTxEngine) AcceptReadWrite() { 371 } 372 373 func (te *delayedTxEngine) AcceptReadOnly() { 374 time.Sleep(50 * time.Millisecond) 375 } 376 377 func (te *delayedTxEngine) Close() { 378 time.Sleep(50 * time.Millisecond) 379 } 380 381 type killableConn struct { 382 id int64 383 killed sync2.AtomicBool 384 } 385 386 func (k *killableConn) Current() string { 387 return "" 388 } 389 390 func (k *killableConn) ID() int64 { 391 return k.id 392 } 393 394 func (k *killableConn) Kill(message string, elapsed time.Duration) error { 395 k.killed.Set(true) 396 return nil 397 } 398 399 func TestStateManagerShutdownGracePeriod(t *testing.T) { 400 sm := newTestStateManager(t) 401 defer sm.StopService() 402 403 sm.te = &delayedTxEngine{} 404 kconn1 := &killableConn{id: 1} 405 sm.statelessql.Add(&QueryDetail{ 406 conn: kconn1, 407 connID: kconn1.id, 408 }) 409 kconn2 := &killableConn{id: 2} 410 sm.statefulql.Add(&QueryDetail{ 411 conn: kconn2, 412 connID: kconn2.id, 413 }) 414 415 // Transition to replica with no shutdown grace period should kill kconn2 but not kconn1. 416 err := sm.SetServingType(topodatapb.TabletType_PRIMARY, testNow, StateServing, "") 417 require.NoError(t, err) 418 assert.False(t, kconn1.killed.Get()) 419 assert.True(t, kconn2.killed.Get()) 420 421 // Transition without grace period. No conns should be killed. 422 kconn2.killed.Set(false) 423 err = sm.SetServingType(topodatapb.TabletType_REPLICA, testNow, StateServing, "") 424 require.NoError(t, err) 425 assert.False(t, kconn1.killed.Get()) 426 assert.False(t, kconn2.killed.Get()) 427 428 // Transition to primary with a short shutdown grace period should kill both conns. 429 err = sm.SetServingType(topodatapb.TabletType_PRIMARY, testNow, StateServing, "") 430 require.NoError(t, err) 431 sm.shutdownGracePeriod = 10 * time.Millisecond 432 err = sm.SetServingType(topodatapb.TabletType_REPLICA, testNow, StateServing, "") 433 require.NoError(t, err) 434 assert.True(t, kconn1.killed.Get()) 435 assert.True(t, kconn2.killed.Get()) 436 437 // Primary non-serving should also kill the conn. 438 err = sm.SetServingType(topodatapb.TabletType_PRIMARY, testNow, StateServing, "") 439 require.NoError(t, err) 440 sm.shutdownGracePeriod = 10 * time.Millisecond 441 kconn1.killed.Set(false) 442 kconn2.killed.Set(false) 443 err = sm.SetServingType(topodatapb.TabletType_PRIMARY, testNow, StateNotServing, "") 444 require.NoError(t, err) 445 assert.True(t, kconn1.killed.Get()) 446 assert.True(t, kconn2.killed.Get()) 447 } 448 449 func TestStateManagerCheckMySQL(t *testing.T) { 450 defer func(saved time.Duration) { transitionRetryInterval = saved }(transitionRetryInterval) 451 transitionRetryInterval = 10 * time.Millisecond 452 453 sm := newTestStateManager(t) 454 defer sm.StopService() 455 456 err := sm.SetServingType(topodatapb.TabletType_PRIMARY, testNow, StateServing, "") 457 require.NoError(t, err) 458 459 sm.te = &delayedTxEngine{} 460 sm.qe.(*testQueryEngine).failMySQL = true 461 order.Set(0) 462 sm.checkMySQL() 463 // We know checkMySQL will take atleast 50 milliseconds since txEngine.Close has a sleep in the test code 464 time.Sleep(10 * time.Millisecond) 465 assert.EqualValues(t, 1, sm.isCheckMySQLRunning()) 466 // When we are in CheckMySQL state, we should not be accepting any new requests which aren't transactional 467 assert.False(t, sm.IsServing()) 468 469 // Rechecking immediately should be a no-op: 470 sm.checkMySQL() 471 472 // Wait for closeAll to get under way. 473 for { 474 if order.Get() >= 1 { 475 break 476 } 477 time.Sleep(10 * time.Millisecond) 478 } 479 480 // Wait to get out of transitioning state. 481 for { 482 if !sm.isTransitioning() { 483 break 484 } 485 time.Sleep(10 * time.Millisecond) 486 } 487 488 // Wait for retry to finish. 489 for { 490 sm.mu.Lock() 491 retrying := sm.retrying 492 sm.mu.Unlock() 493 if !retrying { 494 break 495 } 496 time.Sleep(10 * time.Millisecond) 497 } 498 499 assert.True(t, sm.IsServing()) 500 assert.Equal(t, topodatapb.TabletType_PRIMARY, sm.Target().TabletType) 501 assert.Equal(t, StateServing, sm.State()) 502 503 // Wait for checkMySQL to finish. 504 timeout := time.After(2 * time.Second) 505 for { 506 select { 507 case <-timeout: 508 t.Fatalf("Timedout waiting for checkMySQL to finish") 509 default: 510 if sm.isCheckMySQLRunning() == 0 { 511 return 512 } 513 time.Sleep(100 * time.Millisecond) 514 } 515 } 516 } 517 518 func TestStateManagerValidations(t *testing.T) { 519 sm := newTestStateManager(t) 520 target := &querypb.Target{TabletType: topodatapb.TabletType_PRIMARY} 521 sm.target = proto.Clone(target).(*querypb.Target) 522 523 err := sm.StartRequest(ctx, target, false) 524 assert.Contains(t, err.Error(), "operation not allowed") 525 526 sm.replHealthy = false 527 sm.state = StateServing 528 sm.wantState = StateServing 529 err = sm.StartRequest(ctx, target, false) 530 assert.Contains(t, err.Error(), "operation not allowed") 531 532 sm.replHealthy = true 533 sm.state = StateServing 534 sm.wantState = StateNotServing 535 err = sm.StartRequest(ctx, target, false) 536 assert.Contains(t, err.Error(), "operation not allowed") 537 538 err = sm.StartRequest(ctx, target, true) 539 assert.NoError(t, err) 540 541 sm.wantState = StateServing 542 target.Keyspace = "a" 543 err = sm.StartRequest(ctx, target, false) 544 assert.Contains(t, err.Error(), "invalid keyspace") 545 err = sm.VerifyTarget(ctx, target) 546 assert.Contains(t, err.Error(), "invalid keyspace") 547 548 target.Keyspace = "" 549 target.Shard = "a" 550 err = sm.StartRequest(ctx, target, false) 551 assert.Contains(t, err.Error(), "invalid shard") 552 err = sm.VerifyTarget(ctx, target) 553 assert.Contains(t, err.Error(), "invalid shard") 554 555 target.Shard = "" 556 target.TabletType = topodatapb.TabletType_REPLICA 557 err = sm.StartRequest(ctx, target, false) 558 assert.Contains(t, err.Error(), "wrong tablet type") 559 err = sm.VerifyTarget(ctx, target) 560 assert.Contains(t, err.Error(), "wrong tablet type") 561 562 sm.alsoAllow = []topodatapb.TabletType{topodatapb.TabletType_REPLICA} 563 err = sm.StartRequest(ctx, target, false) 564 assert.NoError(t, err) 565 err = sm.VerifyTarget(ctx, target) 566 assert.NoError(t, err) 567 568 err = sm.StartRequest(ctx, nil, false) 569 assert.Contains(t, err.Error(), "No target") 570 err = sm.VerifyTarget(ctx, nil) 571 assert.Contains(t, err.Error(), "No target") 572 573 localctx := tabletenv.LocalContext() 574 err = sm.StartRequest(localctx, nil, false) 575 assert.NoError(t, err) 576 err = sm.VerifyTarget(localctx, nil) 577 assert.NoError(t, err) 578 } 579 580 func TestStateManagerWaitForRequests(t *testing.T) { 581 sm := newTestStateManager(t) 582 defer sm.StopService() 583 target := &querypb.Target{TabletType: topodatapb.TabletType_PRIMARY} 584 sm.target = target 585 sm.timebombDuration = 10 * time.Second 586 587 sm.replHealthy = true 588 err := sm.SetServingType(topodatapb.TabletType_PRIMARY, testNow, StateServing, "") 589 require.NoError(t, err) 590 591 err = sm.StartRequest(ctx, target, false) 592 require.NoError(t, err) 593 594 // This will go into transition and wait. 595 // Wait for that state. 596 go sm.StopService() 597 for { 598 if !sm.isTransitioning() { 599 time.Sleep(10 * time.Millisecond) 600 continue 601 } 602 break 603 } 604 605 // Verify that we're still transitioning. 606 assert.True(t, sm.isTransitioning()) 607 608 sm.EndRequest() 609 610 for { 611 if sm.isTransitioning() { 612 time.Sleep(10 * time.Millisecond) 613 continue 614 } 615 break 616 } 617 assert.Equal(t, StateNotConnected, sm.State()) 618 } 619 620 func TestStateManagerNotify(t *testing.T) { 621 sm := newTestStateManager(t) 622 defer sm.StopService() 623 624 blpFunc = testBlpFunc 625 626 err := sm.SetServingType(topodatapb.TabletType_REPLICA, testNow, StateServing, "") 627 require.NoError(t, err) 628 629 ch := make(chan *querypb.StreamHealthResponse, 5) 630 var wg sync.WaitGroup 631 wg.Add(1) 632 go func() { 633 defer wg.Done() 634 err := sm.hs.Stream(context.Background(), func(shr *querypb.StreamHealthResponse) error { 635 ch <- shr 636 return nil 637 }) 638 assert.Contains(t, err.Error(), "tabletserver is shutdown") 639 }() 640 defer wg.Wait() 641 642 sm.Broadcast() 643 644 gotshr := <-ch 645 // Remove things we don't care about: 646 gotshr.RealtimeStats = nil 647 wantshr := &querypb.StreamHealthResponse{ 648 Target: &querypb.Target{ 649 TabletType: topodatapb.TabletType_REPLICA, 650 }, 651 Serving: true, 652 TabletAlias: &topodatapb.TabletAlias{}, 653 } 654 sm.hcticks.Stop() 655 assert.Truef(t, proto.Equal(gotshr, wantshr), "got: %v, want: %v", gotshr, wantshr) 656 sm.StopService() 657 } 658 659 func TestRefreshReplHealthLocked(t *testing.T) { 660 sm := newTestStateManager(t) 661 defer sm.StopService() 662 rt := sm.rt.(*testReplTracker) 663 664 sm.target.TabletType = topodatapb.TabletType_PRIMARY 665 sm.replHealthy = false 666 lag, err := sm.refreshReplHealthLocked() 667 assert.Equal(t, time.Duration(0), lag) 668 assert.NoError(t, err) 669 assert.True(t, sm.replHealthy) 670 671 sm.target.TabletType = topodatapb.TabletType_REPLICA 672 sm.replHealthy = false 673 lag, err = sm.refreshReplHealthLocked() 674 assert.Equal(t, 1*time.Second, lag) 675 assert.NoError(t, err) 676 assert.True(t, sm.replHealthy) 677 678 rt.err = errors.New("err") 679 sm.replHealthy = true 680 lag, err = sm.refreshReplHealthLocked() 681 assert.Equal(t, 1*time.Second, lag) 682 assert.Error(t, err) 683 assert.False(t, sm.replHealthy) 684 685 rt.err = nil 686 rt.lag = 3 * time.Hour 687 sm.replHealthy = true 688 lag, err = sm.refreshReplHealthLocked() 689 assert.Equal(t, 3*time.Hour, lag) 690 assert.NoError(t, err) 691 assert.False(t, sm.replHealthy) 692 } 693 694 func verifySubcomponent(t *testing.T, order int64, component any, state testState) { 695 tos := component.(orderState) 696 assert.Equal(t, order, tos.Order()) 697 assert.Equal(t, state, tos.State()) 698 } 699 700 func newTestStateManager(t *testing.T) *stateManager { 701 order.Set(0) 702 config := tabletenv.NewDefaultConfig() 703 env := tabletenv.NewEnv(config, "StateManagerTest") 704 sm := &stateManager{ 705 statelessql: NewQueryList("stateless"), 706 statefulql: NewQueryList("stateful"), 707 olapql: NewQueryList("olap"), 708 hs: newHealthStreamer(env, &topodatapb.TabletAlias{}), 709 se: &testSchemaEngine{}, 710 rt: &testReplTracker{lag: 1 * time.Second}, 711 vstreamer: &testSubcomponent{}, 712 tracker: &testSubcomponent{}, 713 watcher: &testSubcomponent{}, 714 qe: &testQueryEngine{}, 715 txThrottler: &testTxThrottler{}, 716 te: &testTxEngine{}, 717 messager: &testSubcomponent{}, 718 ddle: &testOnlineDDLExecutor{}, 719 throttler: &testLagThrottler{}, 720 tableGC: &testTableGC{}, 721 } 722 sm.Init(env, &querypb.Target{}) 723 sm.hs.InitDBConfig(&querypb.Target{}, fakesqldb.New(t).ConnParams()) 724 log.Infof("returning sm: %p", sm) 725 return sm 726 } 727 728 func (sm *stateManager) isTransitioning() bool { 729 if sm.transitioning.TryAcquire() { 730 sm.transitioning.Release() 731 return false 732 } 733 return true 734 } 735 736 var order sync2.AtomicInt64 737 738 type testState int 739 740 const ( 741 _ = testState(iota) 742 testStateOpen 743 testStateClosed 744 testStatePrimary 745 testStateNonPrimary 746 ) 747 748 type orderState interface { 749 Order() int64 750 State() testState 751 } 752 753 type testOrderState struct { 754 order int64 755 state testState 756 } 757 758 func (tos testOrderState) Order() int64 { 759 return tos.order 760 } 761 762 func (tos testOrderState) State() testState { 763 return tos.state 764 } 765 766 type testSchemaEngine struct { 767 testOrderState 768 ensureCalled bool 769 nonPrimary bool 770 771 failMySQL bool 772 } 773 774 func (te *testSchemaEngine) EnsureConnectionAndDB(tabletType topodatapb.TabletType) error { 775 if te.failMySQL { 776 te.failMySQL = false 777 return errors.New("intentional error") 778 } 779 te.ensureCalled = true 780 return nil 781 } 782 783 func (te *testSchemaEngine) Open() error { 784 te.order = order.Add(1) 785 te.state = testStateOpen 786 return nil 787 } 788 789 func (te *testSchemaEngine) MakeNonPrimary() { 790 te.nonPrimary = true 791 } 792 793 func (te *testSchemaEngine) Close() { 794 te.order = order.Add(1) 795 te.state = testStateClosed 796 } 797 798 type testReplTracker struct { 799 testOrderState 800 lag time.Duration 801 err error 802 } 803 804 func (te *testReplTracker) MakePrimary() { 805 te.order = order.Add(1) 806 te.state = testStatePrimary 807 } 808 809 func (te *testReplTracker) MakeNonPrimary() { 810 te.order = order.Add(1) 811 te.state = testStateNonPrimary 812 } 813 814 func (te *testReplTracker) Close() { 815 te.order = order.Add(1) 816 te.state = testStateClosed 817 } 818 819 func (te *testReplTracker) Status() (time.Duration, error) { 820 return te.lag, te.err 821 } 822 823 type testQueryEngine struct { 824 testOrderState 825 826 failMySQL bool 827 } 828 829 func (te *testQueryEngine) Open() error { 830 te.order = order.Add(1) 831 te.state = testStateOpen 832 return nil 833 } 834 835 func (te *testQueryEngine) IsMySQLReachable() error { 836 if te.failMySQL { 837 te.failMySQL = false 838 return errors.New("intentional error") 839 } 840 return nil 841 } 842 843 func (te *testQueryEngine) Close() { 844 te.order = order.Add(1) 845 te.state = testStateClosed 846 } 847 848 type testTxEngine struct { 849 testOrderState 850 } 851 852 func (te *testTxEngine) AcceptReadWrite() { 853 te.order = order.Add(1) 854 te.state = testStatePrimary 855 } 856 857 func (te *testTxEngine) AcceptReadOnly() { 858 te.order = order.Add(1) 859 te.state = testStateNonPrimary 860 } 861 862 func (te *testTxEngine) Close() { 863 te.order = order.Add(1) 864 te.state = testStateClosed 865 } 866 867 type testSubcomponent struct { 868 testOrderState 869 } 870 871 func (te *testSubcomponent) Open() { 872 te.order = order.Add(1) 873 te.state = testStateOpen 874 } 875 876 func (te *testSubcomponent) Close() { 877 te.order = order.Add(1) 878 te.state = testStateClosed 879 } 880 881 type testTxThrottler struct { 882 testOrderState 883 } 884 885 func (te *testTxThrottler) Open() error { 886 te.order = order.Add(1) 887 te.state = testStateOpen 888 return nil 889 } 890 891 func (te *testTxThrottler) Close() { 892 te.order = order.Add(1) 893 te.state = testStateClosed 894 } 895 896 type testOnlineDDLExecutor struct { 897 testOrderState 898 } 899 900 func (te *testOnlineDDLExecutor) Open() error { 901 te.order = order.Add(1) 902 te.state = testStateOpen 903 return nil 904 } 905 906 func (te *testOnlineDDLExecutor) Close() { 907 te.order = order.Add(1) 908 te.state = testStateClosed 909 } 910 911 type testLagThrottler struct { 912 testOrderState 913 } 914 915 func (te *testLagThrottler) Open() error { 916 te.order = order.Add(1) 917 te.state = testStateOpen 918 return nil 919 } 920 921 func (te *testLagThrottler) Close() { 922 te.order = order.Add(1) 923 te.state = testStateClosed 924 } 925 926 type testTableGC struct { 927 testOrderState 928 } 929 930 func (te *testTableGC) Open() error { 931 te.order = order.Add(1) 932 te.state = testStateOpen 933 return nil 934 } 935 936 func (te *testTableGC) Close() { 937 te.order = order.Add(1) 938 te.state = testStateClosed 939 }