vitess.io/vitess@v0.16.2/go/vt/vttablet/tabletserver/state_manager_test.go (about)

     1  /*
     2  Copyright 2020 The Vitess Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package tabletserver
    18  
    19  import (
    20  	"context"
    21  	"errors"
    22  	"sync"
    23  	"testing"
    24  	"time"
    25  
    26  	"google.golang.org/protobuf/proto"
    27  
    28  	"github.com/stretchr/testify/assert"
    29  	"github.com/stretchr/testify/require"
    30  
    31  	"vitess.io/vitess/go/mysql/fakesqldb"
    32  
    33  	"vitess.io/vitess/go/sync2"
    34  	"vitess.io/vitess/go/vt/log"
    35  	querypb "vitess.io/vitess/go/vt/proto/query"
    36  	topodatapb "vitess.io/vitess/go/vt/proto/topodata"
    37  	"vitess.io/vitess/go/vt/vttablet/tabletserver/tabletenv"
    38  )
    39  
    40  var testNow = time.Now()
    41  
    42  func TestStateManagerStateByName(t *testing.T) {
    43  	sm := &stateManager{}
    44  
    45  	sm.replHealthy = true
    46  	sm.wantState = StateServing
    47  	sm.state = StateNotConnected
    48  	assert.Equal(t, "NOT_SERVING", sm.IsServingString())
    49  
    50  	sm.state = StateNotServing
    51  	assert.Equal(t, "NOT_SERVING", sm.IsServingString())
    52  
    53  	sm.state = StateServing
    54  	assert.Equal(t, "SERVING", sm.IsServingString())
    55  
    56  	sm.wantState = StateNotServing
    57  	assert.Equal(t, "NOT_SERVING", sm.IsServingString())
    58  	sm.wantState = StateServing
    59  
    60  	sm.EnterLameduck()
    61  	assert.Equal(t, "NOT_SERVING", sm.IsServingString())
    62  	sm.ExitLameduck()
    63  
    64  	sm.replHealthy = false
    65  	assert.Equal(t, "NOT_SERVING", sm.IsServingString())
    66  }
    67  
    68  func TestStateManagerServePrimary(t *testing.T) {
    69  	sm := newTestStateManager(t)
    70  	defer sm.StopService()
    71  	sm.EnterLameduck()
    72  	err := sm.SetServingType(topodatapb.TabletType_PRIMARY, testNow, StateServing, "")
    73  	require.NoError(t, err)
    74  
    75  	assert.Equal(t, false, sm.lameduck)
    76  	assert.Equal(t, testNow, sm.terTimestamp)
    77  
    78  	verifySubcomponent(t, 1, sm.watcher, testStateClosed)
    79  
    80  	verifySubcomponent(t, 2, sm.se, testStateOpen)
    81  	verifySubcomponent(t, 3, sm.vstreamer, testStateOpen)
    82  	verifySubcomponent(t, 4, sm.qe, testStateOpen)
    83  	verifySubcomponent(t, 5, sm.txThrottler, testStateOpen)
    84  	verifySubcomponent(t, 6, sm.rt, testStatePrimary)
    85  	verifySubcomponent(t, 7, sm.tracker, testStateOpen)
    86  	verifySubcomponent(t, 8, sm.te, testStatePrimary)
    87  	verifySubcomponent(t, 9, sm.messager, testStateOpen)
    88  	verifySubcomponent(t, 10, sm.throttler, testStateOpen)
    89  	verifySubcomponent(t, 11, sm.tableGC, testStateOpen)
    90  	verifySubcomponent(t, 12, sm.ddle, testStateOpen)
    91  
    92  	assert.False(t, sm.se.(*testSchemaEngine).nonPrimary)
    93  	assert.True(t, sm.se.(*testSchemaEngine).ensureCalled)
    94  
    95  	assert.Equal(t, topodatapb.TabletType_PRIMARY, sm.target.TabletType)
    96  	assert.Equal(t, StateServing, sm.state)
    97  }
    98  
    99  func TestStateManagerServeNonPrimary(t *testing.T) {
   100  	sm := newTestStateManager(t)
   101  	defer sm.StopService()
   102  	err := sm.SetServingType(topodatapb.TabletType_REPLICA, testNow, StateServing, "")
   103  	require.NoError(t, err)
   104  
   105  	verifySubcomponent(t, 1, sm.ddle, testStateClosed)
   106  	verifySubcomponent(t, 2, sm.tableGC, testStateClosed)
   107  	verifySubcomponent(t, 3, sm.messager, testStateClosed)
   108  	verifySubcomponent(t, 4, sm.tracker, testStateClosed)
   109  	assert.True(t, sm.se.(*testSchemaEngine).nonPrimary)
   110  
   111  	verifySubcomponent(t, 5, sm.se, testStateOpen)
   112  	verifySubcomponent(t, 6, sm.vstreamer, testStateOpen)
   113  	verifySubcomponent(t, 7, sm.qe, testStateOpen)
   114  	verifySubcomponent(t, 8, sm.txThrottler, testStateOpen)
   115  	verifySubcomponent(t, 9, sm.te, testStateNonPrimary)
   116  	verifySubcomponent(t, 10, sm.rt, testStateNonPrimary)
   117  	verifySubcomponent(t, 11, sm.watcher, testStateOpen)
   118  	verifySubcomponent(t, 12, sm.throttler, testStateOpen)
   119  
   120  	assert.Equal(t, topodatapb.TabletType_REPLICA, sm.target.TabletType)
   121  	assert.Equal(t, StateServing, sm.state)
   122  }
   123  
   124  func TestStateManagerUnservePrimary(t *testing.T) {
   125  	sm := newTestStateManager(t)
   126  	defer sm.StopService()
   127  	err := sm.SetServingType(topodatapb.TabletType_PRIMARY, testNow, StateNotServing, "")
   128  	require.NoError(t, err)
   129  
   130  	verifySubcomponent(t, 1, sm.ddle, testStateClosed)
   131  	verifySubcomponent(t, 2, sm.tableGC, testStateClosed)
   132  	verifySubcomponent(t, 3, sm.throttler, testStateClosed)
   133  	verifySubcomponent(t, 4, sm.messager, testStateClosed)
   134  	verifySubcomponent(t, 5, sm.te, testStateClosed)
   135  
   136  	verifySubcomponent(t, 6, sm.tracker, testStateClosed)
   137  	verifySubcomponent(t, 7, sm.watcher, testStateClosed)
   138  	verifySubcomponent(t, 8, sm.se, testStateOpen)
   139  	verifySubcomponent(t, 9, sm.vstreamer, testStateOpen)
   140  	verifySubcomponent(t, 10, sm.qe, testStateOpen)
   141  	verifySubcomponent(t, 11, sm.txThrottler, testStateOpen)
   142  
   143  	verifySubcomponent(t, 12, sm.rt, testStatePrimary)
   144  
   145  	assert.Equal(t, topodatapb.TabletType_PRIMARY, sm.target.TabletType)
   146  	assert.Equal(t, StateNotServing, sm.state)
   147  }
   148  
   149  func TestStateManagerUnserveNonPrimary(t *testing.T) {
   150  	sm := newTestStateManager(t)
   151  	defer sm.StopService()
   152  	err := sm.SetServingType(topodatapb.TabletType_RDONLY, testNow, StateNotServing, "")
   153  	require.NoError(t, err)
   154  
   155  	verifySubcomponent(t, 1, sm.ddle, testStateClosed)
   156  	verifySubcomponent(t, 2, sm.tableGC, testStateClosed)
   157  	verifySubcomponent(t, 3, sm.throttler, testStateClosed)
   158  	verifySubcomponent(t, 4, sm.messager, testStateClosed)
   159  	verifySubcomponent(t, 5, sm.te, testStateClosed)
   160  
   161  	verifySubcomponent(t, 6, sm.tracker, testStateClosed)
   162  	assert.True(t, sm.se.(*testSchemaEngine).nonPrimary)
   163  
   164  	verifySubcomponent(t, 7, sm.se, testStateOpen)
   165  	verifySubcomponent(t, 8, sm.vstreamer, testStateOpen)
   166  	verifySubcomponent(t, 9, sm.qe, testStateOpen)
   167  	verifySubcomponent(t, 10, sm.txThrottler, testStateOpen)
   168  
   169  	verifySubcomponent(t, 11, sm.rt, testStateNonPrimary)
   170  	verifySubcomponent(t, 12, sm.watcher, testStateOpen)
   171  
   172  	assert.Equal(t, topodatapb.TabletType_RDONLY, sm.target.TabletType)
   173  	assert.Equal(t, StateNotServing, sm.state)
   174  }
   175  
   176  func TestStateManagerClose(t *testing.T) {
   177  	sm := newTestStateManager(t)
   178  	defer sm.StopService()
   179  	err := sm.SetServingType(topodatapb.TabletType_RDONLY, testNow, StateNotConnected, "")
   180  	require.NoError(t, err)
   181  
   182  	verifySubcomponent(t, 1, sm.ddle, testStateClosed)
   183  	verifySubcomponent(t, 2, sm.tableGC, testStateClosed)
   184  	verifySubcomponent(t, 3, sm.throttler, testStateClosed)
   185  	verifySubcomponent(t, 4, sm.messager, testStateClosed)
   186  	verifySubcomponent(t, 5, sm.te, testStateClosed)
   187  	verifySubcomponent(t, 6, sm.tracker, testStateClosed)
   188  
   189  	verifySubcomponent(t, 7, sm.txThrottler, testStateClosed)
   190  	verifySubcomponent(t, 8, sm.qe, testStateClosed)
   191  	verifySubcomponent(t, 9, sm.watcher, testStateClosed)
   192  	verifySubcomponent(t, 10, sm.vstreamer, testStateClosed)
   193  	verifySubcomponent(t, 11, sm.rt, testStateClosed)
   194  	verifySubcomponent(t, 12, sm.se, testStateClosed)
   195  
   196  	assert.Equal(t, topodatapb.TabletType_RDONLY, sm.target.TabletType)
   197  	assert.Equal(t, StateNotConnected, sm.state)
   198  }
   199  
   200  func TestStateManagerStopService(t *testing.T) {
   201  	sm := newTestStateManager(t)
   202  	defer sm.StopService()
   203  	err := sm.SetServingType(topodatapb.TabletType_REPLICA, testNow, StateServing, "")
   204  	require.NoError(t, err)
   205  
   206  	assert.Equal(t, topodatapb.TabletType_REPLICA, sm.target.TabletType)
   207  	assert.Equal(t, StateServing, sm.state)
   208  
   209  	sm.StopService()
   210  	assert.Equal(t, topodatapb.TabletType_REPLICA, sm.target.TabletType)
   211  	assert.Equal(t, StateNotConnected, sm.state)
   212  }
   213  
   214  func TestStateManagerGracePeriod(t *testing.T) {
   215  	sm := newTestStateManager(t)
   216  	defer sm.StopService()
   217  	sm.transitionGracePeriod = 10 * time.Millisecond
   218  
   219  	alsoAllow := func() topodatapb.TabletType {
   220  		sm.mu.Lock()
   221  		defer sm.mu.Unlock()
   222  		if len(sm.alsoAllow) == 0 {
   223  			return topodatapb.TabletType_UNKNOWN
   224  		}
   225  		return sm.alsoAllow[0]
   226  	}
   227  
   228  	err := sm.SetServingType(topodatapb.TabletType_REPLICA, testNow, StateServing, "")
   229  	require.NoError(t, err)
   230  
   231  	assert.Equal(t, topodatapb.TabletType_UNKNOWN, alsoAllow())
   232  	assert.Equal(t, topodatapb.TabletType_REPLICA, sm.target.TabletType)
   233  	assert.Equal(t, StateServing, sm.state)
   234  
   235  	err = sm.SetServingType(topodatapb.TabletType_PRIMARY, testNow, StateServing, "")
   236  	require.NoError(t, err)
   237  
   238  	assert.Equal(t, topodatapb.TabletType_REPLICA, alsoAllow())
   239  	assert.Equal(t, topodatapb.TabletType_PRIMARY, sm.target.TabletType)
   240  	assert.Equal(t, StateServing, sm.state)
   241  
   242  	time.Sleep(20 * time.Millisecond)
   243  	assert.Equal(t, topodatapb.TabletType_UNKNOWN, alsoAllow())
   244  }
   245  
   246  // testWatcher is used as a hook to invoke another transition
   247  type testWatcher struct {
   248  	t  *testing.T
   249  	sm *stateManager
   250  	wg sync.WaitGroup
   251  }
   252  
   253  func (te *testWatcher) Open() {
   254  }
   255  
   256  func (te *testWatcher) Close() {
   257  	te.wg.Add(1)
   258  	go func() {
   259  		defer te.wg.Done()
   260  
   261  		err := te.sm.SetServingType(topodatapb.TabletType_RDONLY, testNow, StateNotServing, "")
   262  		assert.NoError(te.t, err)
   263  	}()
   264  }
   265  
   266  func TestStateManagerSetServingTypeRace(t *testing.T) {
   267  	// We don't call StopService because that in turn
   268  	// will call Close again on testWatcher.
   269  	sm := newTestStateManager(t)
   270  	te := &testWatcher{
   271  		t:  t,
   272  		sm: sm,
   273  	}
   274  	sm.watcher = te
   275  	err := sm.SetServingType(topodatapb.TabletType_PRIMARY, testNow, StateServing, "")
   276  	require.NoError(t, err)
   277  
   278  	// Ensure the next call waits and then succeeds.
   279  	te.wg.Wait()
   280  
   281  	// End state should be the final desired state.
   282  	assert.Equal(t, topodatapb.TabletType_RDONLY, sm.target.TabletType)
   283  	assert.Equal(t, StateNotServing, sm.state)
   284  }
   285  
   286  func TestStateManagerSetServingTypeNoChange(t *testing.T) {
   287  	log.Infof("starting")
   288  	sm := newTestStateManager(t)
   289  	defer sm.StopService()
   290  	err := sm.SetServingType(topodatapb.TabletType_REPLICA, testNow, StateServing, "")
   291  	require.NoError(t, err)
   292  
   293  	err = sm.SetServingType(topodatapb.TabletType_REPLICA, testNow, StateServing, "")
   294  	require.NoError(t, err)
   295  
   296  	verifySubcomponent(t, 1, sm.ddle, testStateClosed)
   297  	verifySubcomponent(t, 2, sm.tableGC, testStateClosed)
   298  	verifySubcomponent(t, 3, sm.messager, testStateClosed)
   299  	verifySubcomponent(t, 4, sm.tracker, testStateClosed)
   300  	assert.True(t, sm.se.(*testSchemaEngine).nonPrimary)
   301  
   302  	verifySubcomponent(t, 5, sm.se, testStateOpen)
   303  	verifySubcomponent(t, 6, sm.vstreamer, testStateOpen)
   304  	verifySubcomponent(t, 7, sm.qe, testStateOpen)
   305  	verifySubcomponent(t, 8, sm.txThrottler, testStateOpen)
   306  	verifySubcomponent(t, 9, sm.te, testStateNonPrimary)
   307  	verifySubcomponent(t, 10, sm.rt, testStateNonPrimary)
   308  	verifySubcomponent(t, 11, sm.watcher, testStateOpen)
   309  	verifySubcomponent(t, 12, sm.throttler, testStateOpen)
   310  
   311  	assert.Equal(t, topodatapb.TabletType_REPLICA, sm.target.TabletType)
   312  	assert.Equal(t, StateServing, sm.state)
   313  }
   314  
   315  func TestStateManagerTransitionFailRetry(t *testing.T) {
   316  	defer func(saved time.Duration) { transitionRetryInterval = saved }(transitionRetryInterval)
   317  	transitionRetryInterval = 10 * time.Millisecond
   318  
   319  	sm := newTestStateManager(t)
   320  	defer sm.StopService()
   321  	sm.se.(*testSchemaEngine).failMySQL = true
   322  
   323  	err := sm.SetServingType(topodatapb.TabletType_PRIMARY, testNow, StateServing, "")
   324  	require.Error(t, err)
   325  
   326  	// Calling retryTransition while retrying should be a no-op.
   327  	sm.retryTransition("")
   328  
   329  	// Steal the lock and wait long enough for the retry
   330  	// to fail, and then release it. The retry will have
   331  	// to keep retrying.
   332  	sm.transitioning.Acquire()
   333  	time.Sleep(30 * time.Millisecond)
   334  	sm.transitioning.Release()
   335  
   336  	for {
   337  		sm.mu.Lock()
   338  		retrying := sm.retrying
   339  		sm.mu.Unlock()
   340  		if !retrying {
   341  			break
   342  		}
   343  		time.Sleep(10 * time.Millisecond)
   344  	}
   345  
   346  	assert.Equal(t, topodatapb.TabletType_PRIMARY, sm.Target().TabletType)
   347  	assert.Equal(t, StateServing, sm.State())
   348  }
   349  
   350  func TestStateManagerNotConnectedType(t *testing.T) {
   351  	sm := newTestStateManager(t)
   352  	defer sm.StopService()
   353  	sm.EnterLameduck()
   354  	err := sm.SetServingType(topodatapb.TabletType_RESTORE, testNow, StateNotServing, "")
   355  	require.NoError(t, err)
   356  
   357  	assert.Equal(t, topodatapb.TabletType_RESTORE, sm.target.TabletType)
   358  	assert.Equal(t, StateNotConnected, sm.state)
   359  
   360  	err = sm.SetServingType(topodatapb.TabletType_BACKUP, testNow, StateNotServing, "")
   361  	require.NoError(t, err)
   362  
   363  	assert.Equal(t, topodatapb.TabletType_BACKUP, sm.target.TabletType)
   364  	assert.Equal(t, StateNotConnected, sm.state)
   365  }
   366  
   367  type delayedTxEngine struct {
   368  }
   369  
   370  func (te *delayedTxEngine) AcceptReadWrite() {
   371  }
   372  
   373  func (te *delayedTxEngine) AcceptReadOnly() {
   374  	time.Sleep(50 * time.Millisecond)
   375  }
   376  
   377  func (te *delayedTxEngine) Close() {
   378  	time.Sleep(50 * time.Millisecond)
   379  }
   380  
   381  type killableConn struct {
   382  	id     int64
   383  	killed sync2.AtomicBool
   384  }
   385  
   386  func (k *killableConn) Current() string {
   387  	return ""
   388  }
   389  
   390  func (k *killableConn) ID() int64 {
   391  	return k.id
   392  }
   393  
   394  func (k *killableConn) Kill(message string, elapsed time.Duration) error {
   395  	k.killed.Set(true)
   396  	return nil
   397  }
   398  
   399  func TestStateManagerShutdownGracePeriod(t *testing.T) {
   400  	sm := newTestStateManager(t)
   401  	defer sm.StopService()
   402  
   403  	sm.te = &delayedTxEngine{}
   404  	kconn1 := &killableConn{id: 1}
   405  	sm.statelessql.Add(&QueryDetail{
   406  		conn:   kconn1,
   407  		connID: kconn1.id,
   408  	})
   409  	kconn2 := &killableConn{id: 2}
   410  	sm.statefulql.Add(&QueryDetail{
   411  		conn:   kconn2,
   412  		connID: kconn2.id,
   413  	})
   414  
   415  	// Transition to replica with no shutdown grace period should kill kconn2 but not kconn1.
   416  	err := sm.SetServingType(topodatapb.TabletType_PRIMARY, testNow, StateServing, "")
   417  	require.NoError(t, err)
   418  	assert.False(t, kconn1.killed.Get())
   419  	assert.True(t, kconn2.killed.Get())
   420  
   421  	// Transition without grace period. No conns should be killed.
   422  	kconn2.killed.Set(false)
   423  	err = sm.SetServingType(topodatapb.TabletType_REPLICA, testNow, StateServing, "")
   424  	require.NoError(t, err)
   425  	assert.False(t, kconn1.killed.Get())
   426  	assert.False(t, kconn2.killed.Get())
   427  
   428  	// Transition to primary with a short shutdown grace period should kill both conns.
   429  	err = sm.SetServingType(topodatapb.TabletType_PRIMARY, testNow, StateServing, "")
   430  	require.NoError(t, err)
   431  	sm.shutdownGracePeriod = 10 * time.Millisecond
   432  	err = sm.SetServingType(topodatapb.TabletType_REPLICA, testNow, StateServing, "")
   433  	require.NoError(t, err)
   434  	assert.True(t, kconn1.killed.Get())
   435  	assert.True(t, kconn2.killed.Get())
   436  
   437  	// Primary non-serving should also kill the conn.
   438  	err = sm.SetServingType(topodatapb.TabletType_PRIMARY, testNow, StateServing, "")
   439  	require.NoError(t, err)
   440  	sm.shutdownGracePeriod = 10 * time.Millisecond
   441  	kconn1.killed.Set(false)
   442  	kconn2.killed.Set(false)
   443  	err = sm.SetServingType(topodatapb.TabletType_PRIMARY, testNow, StateNotServing, "")
   444  	require.NoError(t, err)
   445  	assert.True(t, kconn1.killed.Get())
   446  	assert.True(t, kconn2.killed.Get())
   447  }
   448  
   449  func TestStateManagerCheckMySQL(t *testing.T) {
   450  	defer func(saved time.Duration) { transitionRetryInterval = saved }(transitionRetryInterval)
   451  	transitionRetryInterval = 10 * time.Millisecond
   452  
   453  	sm := newTestStateManager(t)
   454  	defer sm.StopService()
   455  
   456  	err := sm.SetServingType(topodatapb.TabletType_PRIMARY, testNow, StateServing, "")
   457  	require.NoError(t, err)
   458  
   459  	sm.te = &delayedTxEngine{}
   460  	sm.qe.(*testQueryEngine).failMySQL = true
   461  	order.Set(0)
   462  	sm.checkMySQL()
   463  	// We know checkMySQL will take atleast 50 milliseconds since txEngine.Close has a sleep in the test code
   464  	time.Sleep(10 * time.Millisecond)
   465  	assert.EqualValues(t, 1, sm.isCheckMySQLRunning())
   466  	// When we are in CheckMySQL state, we should not be accepting any new requests which aren't transactional
   467  	assert.False(t, sm.IsServing())
   468  
   469  	// Rechecking immediately should be a no-op:
   470  	sm.checkMySQL()
   471  
   472  	// Wait for closeAll to get under way.
   473  	for {
   474  		if order.Get() >= 1 {
   475  			break
   476  		}
   477  		time.Sleep(10 * time.Millisecond)
   478  	}
   479  
   480  	// Wait to get out of transitioning state.
   481  	for {
   482  		if !sm.isTransitioning() {
   483  			break
   484  		}
   485  		time.Sleep(10 * time.Millisecond)
   486  	}
   487  
   488  	// Wait for retry to finish.
   489  	for {
   490  		sm.mu.Lock()
   491  		retrying := sm.retrying
   492  		sm.mu.Unlock()
   493  		if !retrying {
   494  			break
   495  		}
   496  		time.Sleep(10 * time.Millisecond)
   497  	}
   498  
   499  	assert.True(t, sm.IsServing())
   500  	assert.Equal(t, topodatapb.TabletType_PRIMARY, sm.Target().TabletType)
   501  	assert.Equal(t, StateServing, sm.State())
   502  
   503  	// Wait for checkMySQL to finish.
   504  	timeout := time.After(2 * time.Second)
   505  	for {
   506  		select {
   507  		case <-timeout:
   508  			t.Fatalf("Timedout waiting for checkMySQL to finish")
   509  		default:
   510  			if sm.isCheckMySQLRunning() == 0 {
   511  				return
   512  			}
   513  			time.Sleep(100 * time.Millisecond)
   514  		}
   515  	}
   516  }
   517  
   518  func TestStateManagerValidations(t *testing.T) {
   519  	sm := newTestStateManager(t)
   520  	target := &querypb.Target{TabletType: topodatapb.TabletType_PRIMARY}
   521  	sm.target = proto.Clone(target).(*querypb.Target)
   522  
   523  	err := sm.StartRequest(ctx, target, false)
   524  	assert.Contains(t, err.Error(), "operation not allowed")
   525  
   526  	sm.replHealthy = false
   527  	sm.state = StateServing
   528  	sm.wantState = StateServing
   529  	err = sm.StartRequest(ctx, target, false)
   530  	assert.Contains(t, err.Error(), "operation not allowed")
   531  
   532  	sm.replHealthy = true
   533  	sm.state = StateServing
   534  	sm.wantState = StateNotServing
   535  	err = sm.StartRequest(ctx, target, false)
   536  	assert.Contains(t, err.Error(), "operation not allowed")
   537  
   538  	err = sm.StartRequest(ctx, target, true)
   539  	assert.NoError(t, err)
   540  
   541  	sm.wantState = StateServing
   542  	target.Keyspace = "a"
   543  	err = sm.StartRequest(ctx, target, false)
   544  	assert.Contains(t, err.Error(), "invalid keyspace")
   545  	err = sm.VerifyTarget(ctx, target)
   546  	assert.Contains(t, err.Error(), "invalid keyspace")
   547  
   548  	target.Keyspace = ""
   549  	target.Shard = "a"
   550  	err = sm.StartRequest(ctx, target, false)
   551  	assert.Contains(t, err.Error(), "invalid shard")
   552  	err = sm.VerifyTarget(ctx, target)
   553  	assert.Contains(t, err.Error(), "invalid shard")
   554  
   555  	target.Shard = ""
   556  	target.TabletType = topodatapb.TabletType_REPLICA
   557  	err = sm.StartRequest(ctx, target, false)
   558  	assert.Contains(t, err.Error(), "wrong tablet type")
   559  	err = sm.VerifyTarget(ctx, target)
   560  	assert.Contains(t, err.Error(), "wrong tablet type")
   561  
   562  	sm.alsoAllow = []topodatapb.TabletType{topodatapb.TabletType_REPLICA}
   563  	err = sm.StartRequest(ctx, target, false)
   564  	assert.NoError(t, err)
   565  	err = sm.VerifyTarget(ctx, target)
   566  	assert.NoError(t, err)
   567  
   568  	err = sm.StartRequest(ctx, nil, false)
   569  	assert.Contains(t, err.Error(), "No target")
   570  	err = sm.VerifyTarget(ctx, nil)
   571  	assert.Contains(t, err.Error(), "No target")
   572  
   573  	localctx := tabletenv.LocalContext()
   574  	err = sm.StartRequest(localctx, nil, false)
   575  	assert.NoError(t, err)
   576  	err = sm.VerifyTarget(localctx, nil)
   577  	assert.NoError(t, err)
   578  }
   579  
   580  func TestStateManagerWaitForRequests(t *testing.T) {
   581  	sm := newTestStateManager(t)
   582  	defer sm.StopService()
   583  	target := &querypb.Target{TabletType: topodatapb.TabletType_PRIMARY}
   584  	sm.target = target
   585  	sm.timebombDuration = 10 * time.Second
   586  
   587  	sm.replHealthy = true
   588  	err := sm.SetServingType(topodatapb.TabletType_PRIMARY, testNow, StateServing, "")
   589  	require.NoError(t, err)
   590  
   591  	err = sm.StartRequest(ctx, target, false)
   592  	require.NoError(t, err)
   593  
   594  	// This will go into transition and wait.
   595  	// Wait for that state.
   596  	go sm.StopService()
   597  	for {
   598  		if !sm.isTransitioning() {
   599  			time.Sleep(10 * time.Millisecond)
   600  			continue
   601  		}
   602  		break
   603  	}
   604  
   605  	// Verify that we're still transitioning.
   606  	assert.True(t, sm.isTransitioning())
   607  
   608  	sm.EndRequest()
   609  
   610  	for {
   611  		if sm.isTransitioning() {
   612  			time.Sleep(10 * time.Millisecond)
   613  			continue
   614  		}
   615  		break
   616  	}
   617  	assert.Equal(t, StateNotConnected, sm.State())
   618  }
   619  
   620  func TestStateManagerNotify(t *testing.T) {
   621  	sm := newTestStateManager(t)
   622  	defer sm.StopService()
   623  
   624  	blpFunc = testBlpFunc
   625  
   626  	err := sm.SetServingType(topodatapb.TabletType_REPLICA, testNow, StateServing, "")
   627  	require.NoError(t, err)
   628  
   629  	ch := make(chan *querypb.StreamHealthResponse, 5)
   630  	var wg sync.WaitGroup
   631  	wg.Add(1)
   632  	go func() {
   633  		defer wg.Done()
   634  		err := sm.hs.Stream(context.Background(), func(shr *querypb.StreamHealthResponse) error {
   635  			ch <- shr
   636  			return nil
   637  		})
   638  		assert.Contains(t, err.Error(), "tabletserver is shutdown")
   639  	}()
   640  	defer wg.Wait()
   641  
   642  	sm.Broadcast()
   643  
   644  	gotshr := <-ch
   645  	// Remove things we don't care about:
   646  	gotshr.RealtimeStats = nil
   647  	wantshr := &querypb.StreamHealthResponse{
   648  		Target: &querypb.Target{
   649  			TabletType: topodatapb.TabletType_REPLICA,
   650  		},
   651  		Serving:     true,
   652  		TabletAlias: &topodatapb.TabletAlias{},
   653  	}
   654  	sm.hcticks.Stop()
   655  	assert.Truef(t, proto.Equal(gotshr, wantshr), "got: %v, want: %v", gotshr, wantshr)
   656  	sm.StopService()
   657  }
   658  
   659  func TestRefreshReplHealthLocked(t *testing.T) {
   660  	sm := newTestStateManager(t)
   661  	defer sm.StopService()
   662  	rt := sm.rt.(*testReplTracker)
   663  
   664  	sm.target.TabletType = topodatapb.TabletType_PRIMARY
   665  	sm.replHealthy = false
   666  	lag, err := sm.refreshReplHealthLocked()
   667  	assert.Equal(t, time.Duration(0), lag)
   668  	assert.NoError(t, err)
   669  	assert.True(t, sm.replHealthy)
   670  
   671  	sm.target.TabletType = topodatapb.TabletType_REPLICA
   672  	sm.replHealthy = false
   673  	lag, err = sm.refreshReplHealthLocked()
   674  	assert.Equal(t, 1*time.Second, lag)
   675  	assert.NoError(t, err)
   676  	assert.True(t, sm.replHealthy)
   677  
   678  	rt.err = errors.New("err")
   679  	sm.replHealthy = true
   680  	lag, err = sm.refreshReplHealthLocked()
   681  	assert.Equal(t, 1*time.Second, lag)
   682  	assert.Error(t, err)
   683  	assert.False(t, sm.replHealthy)
   684  
   685  	rt.err = nil
   686  	rt.lag = 3 * time.Hour
   687  	sm.replHealthy = true
   688  	lag, err = sm.refreshReplHealthLocked()
   689  	assert.Equal(t, 3*time.Hour, lag)
   690  	assert.NoError(t, err)
   691  	assert.False(t, sm.replHealthy)
   692  }
   693  
   694  func verifySubcomponent(t *testing.T, order int64, component any, state testState) {
   695  	tos := component.(orderState)
   696  	assert.Equal(t, order, tos.Order())
   697  	assert.Equal(t, state, tos.State())
   698  }
   699  
   700  func newTestStateManager(t *testing.T) *stateManager {
   701  	order.Set(0)
   702  	config := tabletenv.NewDefaultConfig()
   703  	env := tabletenv.NewEnv(config, "StateManagerTest")
   704  	sm := &stateManager{
   705  		statelessql: NewQueryList("stateless"),
   706  		statefulql:  NewQueryList("stateful"),
   707  		olapql:      NewQueryList("olap"),
   708  		hs:          newHealthStreamer(env, &topodatapb.TabletAlias{}),
   709  		se:          &testSchemaEngine{},
   710  		rt:          &testReplTracker{lag: 1 * time.Second},
   711  		vstreamer:   &testSubcomponent{},
   712  		tracker:     &testSubcomponent{},
   713  		watcher:     &testSubcomponent{},
   714  		qe:          &testQueryEngine{},
   715  		txThrottler: &testTxThrottler{},
   716  		te:          &testTxEngine{},
   717  		messager:    &testSubcomponent{},
   718  		ddle:        &testOnlineDDLExecutor{},
   719  		throttler:   &testLagThrottler{},
   720  		tableGC:     &testTableGC{},
   721  	}
   722  	sm.Init(env, &querypb.Target{})
   723  	sm.hs.InitDBConfig(&querypb.Target{}, fakesqldb.New(t).ConnParams())
   724  	log.Infof("returning sm: %p", sm)
   725  	return sm
   726  }
   727  
   728  func (sm *stateManager) isTransitioning() bool {
   729  	if sm.transitioning.TryAcquire() {
   730  		sm.transitioning.Release()
   731  		return false
   732  	}
   733  	return true
   734  }
   735  
   736  var order sync2.AtomicInt64
   737  
   738  type testState int
   739  
   740  const (
   741  	_ = testState(iota)
   742  	testStateOpen
   743  	testStateClosed
   744  	testStatePrimary
   745  	testStateNonPrimary
   746  )
   747  
   748  type orderState interface {
   749  	Order() int64
   750  	State() testState
   751  }
   752  
   753  type testOrderState struct {
   754  	order int64
   755  	state testState
   756  }
   757  
   758  func (tos testOrderState) Order() int64 {
   759  	return tos.order
   760  }
   761  
   762  func (tos testOrderState) State() testState {
   763  	return tos.state
   764  }
   765  
   766  type testSchemaEngine struct {
   767  	testOrderState
   768  	ensureCalled bool
   769  	nonPrimary   bool
   770  
   771  	failMySQL bool
   772  }
   773  
   774  func (te *testSchemaEngine) EnsureConnectionAndDB(tabletType topodatapb.TabletType) error {
   775  	if te.failMySQL {
   776  		te.failMySQL = false
   777  		return errors.New("intentional error")
   778  	}
   779  	te.ensureCalled = true
   780  	return nil
   781  }
   782  
   783  func (te *testSchemaEngine) Open() error {
   784  	te.order = order.Add(1)
   785  	te.state = testStateOpen
   786  	return nil
   787  }
   788  
   789  func (te *testSchemaEngine) MakeNonPrimary() {
   790  	te.nonPrimary = true
   791  }
   792  
   793  func (te *testSchemaEngine) Close() {
   794  	te.order = order.Add(1)
   795  	te.state = testStateClosed
   796  }
   797  
   798  type testReplTracker struct {
   799  	testOrderState
   800  	lag time.Duration
   801  	err error
   802  }
   803  
   804  func (te *testReplTracker) MakePrimary() {
   805  	te.order = order.Add(1)
   806  	te.state = testStatePrimary
   807  }
   808  
   809  func (te *testReplTracker) MakeNonPrimary() {
   810  	te.order = order.Add(1)
   811  	te.state = testStateNonPrimary
   812  }
   813  
   814  func (te *testReplTracker) Close() {
   815  	te.order = order.Add(1)
   816  	te.state = testStateClosed
   817  }
   818  
   819  func (te *testReplTracker) Status() (time.Duration, error) {
   820  	return te.lag, te.err
   821  }
   822  
   823  type testQueryEngine struct {
   824  	testOrderState
   825  
   826  	failMySQL bool
   827  }
   828  
   829  func (te *testQueryEngine) Open() error {
   830  	te.order = order.Add(1)
   831  	te.state = testStateOpen
   832  	return nil
   833  }
   834  
   835  func (te *testQueryEngine) IsMySQLReachable() error {
   836  	if te.failMySQL {
   837  		te.failMySQL = false
   838  		return errors.New("intentional error")
   839  	}
   840  	return nil
   841  }
   842  
   843  func (te *testQueryEngine) Close() {
   844  	te.order = order.Add(1)
   845  	te.state = testStateClosed
   846  }
   847  
   848  type testTxEngine struct {
   849  	testOrderState
   850  }
   851  
   852  func (te *testTxEngine) AcceptReadWrite() {
   853  	te.order = order.Add(1)
   854  	te.state = testStatePrimary
   855  }
   856  
   857  func (te *testTxEngine) AcceptReadOnly() {
   858  	te.order = order.Add(1)
   859  	te.state = testStateNonPrimary
   860  }
   861  
   862  func (te *testTxEngine) Close() {
   863  	te.order = order.Add(1)
   864  	te.state = testStateClosed
   865  }
   866  
   867  type testSubcomponent struct {
   868  	testOrderState
   869  }
   870  
   871  func (te *testSubcomponent) Open() {
   872  	te.order = order.Add(1)
   873  	te.state = testStateOpen
   874  }
   875  
   876  func (te *testSubcomponent) Close() {
   877  	te.order = order.Add(1)
   878  	te.state = testStateClosed
   879  }
   880  
   881  type testTxThrottler struct {
   882  	testOrderState
   883  }
   884  
   885  func (te *testTxThrottler) Open() error {
   886  	te.order = order.Add(1)
   887  	te.state = testStateOpen
   888  	return nil
   889  }
   890  
   891  func (te *testTxThrottler) Close() {
   892  	te.order = order.Add(1)
   893  	te.state = testStateClosed
   894  }
   895  
   896  type testOnlineDDLExecutor struct {
   897  	testOrderState
   898  }
   899  
   900  func (te *testOnlineDDLExecutor) Open() error {
   901  	te.order = order.Add(1)
   902  	te.state = testStateOpen
   903  	return nil
   904  }
   905  
   906  func (te *testOnlineDDLExecutor) Close() {
   907  	te.order = order.Add(1)
   908  	te.state = testStateClosed
   909  }
   910  
   911  type testLagThrottler struct {
   912  	testOrderState
   913  }
   914  
   915  func (te *testLagThrottler) Open() error {
   916  	te.order = order.Add(1)
   917  	te.state = testStateOpen
   918  	return nil
   919  }
   920  
   921  func (te *testLagThrottler) Close() {
   922  	te.order = order.Add(1)
   923  	te.state = testStateClosed
   924  }
   925  
   926  type testTableGC struct {
   927  	testOrderState
   928  }
   929  
   930  func (te *testTableGC) Open() error {
   931  	te.order = order.Add(1)
   932  	te.state = testStateOpen
   933  	return nil
   934  }
   935  
   936  func (te *testTableGC) Close() {
   937  	te.order = order.Add(1)
   938  	te.state = testStateClosed
   939  }