github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/cdc/scheduler/internal/v3/agent/agent_test.go (about)

     1  // Copyright 2022 PingCAP, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package agent
    15  
    16  import (
    17  	"context"
    18  	"sort"
    19  	"testing"
    20  
    21  	"github.com/golang/mock/gomock"
    22  	"github.com/pingcap/log"
    23  	"github.com/pingcap/tiflow/cdc/model"
    24  	"github.com/pingcap/tiflow/cdc/processor/tablepb"
    25  	"github.com/pingcap/tiflow/cdc/scheduler/internal"
    26  	"github.com/pingcap/tiflow/cdc/scheduler/internal/v3/compat"
    27  	"github.com/pingcap/tiflow/cdc/scheduler/internal/v3/transport"
    28  	"github.com/pingcap/tiflow/cdc/scheduler/schedulepb"
    29  	"github.com/pingcap/tiflow/pkg/config"
    30  	cerror "github.com/pingcap/tiflow/pkg/errors"
    31  	mock_etcd "github.com/pingcap/tiflow/pkg/etcd/mock"
    32  	"github.com/pingcap/tiflow/pkg/spanz"
    33  	"github.com/stretchr/testify/mock"
    34  	"github.com/stretchr/testify/require"
    35  	"go.etcd.io/etcd/client/v3/concurrency"
    36  	"go.uber.org/zap"
    37  )
    38  
    39  // See https://stackoverflow.com/a/30230552/3920448 for details.
    40  func nextPerm(p []int) {
    41  	for i := len(p) - 1; i >= 0; i-- {
    42  		if i == 0 || p[i] < len(p)-i-1 {
    43  			p[i]++
    44  			return
    45  		}
    46  		p[i] = 0
    47  	}
    48  }
    49  
    50  func getPerm(orig, p []int) []int {
    51  	result := append([]int{}, orig...)
    52  	for i, v := range p {
    53  		result[i], result[i+v] = result[i+v], result[i]
    54  	}
    55  	return result
    56  }
    57  
    58  func iterPermutation(sequence []int, fn func(sequence []int)) {
    59  	for p := make([]int, len(sequence)); p[0] < len(p); nextPerm(p) {
    60  		fn(getPerm(sequence, p))
    61  	}
    62  }
    63  
    64  func newAgent4Test() *agent {
    65  	cfg := config.GetDefaultServerConfig().Debug.Scheduler
    66  	cfg.ChangefeedSettings = config.GetDefaultReplicaConfig().Scheduler
    67  	a := &agent{
    68  		ownerInfo: ownerInfo{
    69  			CaptureInfo: model.CaptureInfo{
    70  				Version: "owner-version-1",
    71  				ID:      "owner-1",
    72  			},
    73  			Revision: schedulepb.OwnerRevision{Revision: 1},
    74  		},
    75  		compat: compat.New(cfg, map[string]*model.CaptureInfo{}),
    76  	}
    77  
    78  	a.Version = "agent-version-1"
    79  	a.Epoch = schedulepb.ProcessorEpoch{Epoch: "agent-epoch-1"}
    80  	a.CaptureID = "agent-1"
    81  	liveness := model.LivenessCaptureAlive
    82  	a.liveness = &liveness
    83  	return a
    84  }
    85  
    86  func TestNewAgent(t *testing.T) {
    87  	t.Parallel()
    88  
    89  	liveness := model.LivenessCaptureAlive
    90  	changefeed := model.DefaultChangeFeedID("changefeed-test")
    91  	me := mock_etcd.NewMockCDCEtcdClient(gomock.NewController(t))
    92  
    93  	tableExector := newMockTableExecutor()
    94  	cfg := &config.SchedulerConfig{
    95  		ChangefeedSettings: &config.ChangefeedSchedulerConfig{
    96  			EnableTableAcrossNodes: true,
    97  			RegionThreshold:        1,
    98  		},
    99  	}
   100  
   101  	// owner and revision found successfully
   102  	me.EXPECT().GetOwnerID(gomock.Any()).Return("ownerID", nil).Times(1)
   103  	me.EXPECT().GetCaptures(
   104  		gomock.Any()).Return(int64(0), []*model.CaptureInfo{{ID: "ownerID"}}, nil).Times(1)
   105  	me.EXPECT().GetOwnerRevision(gomock.Any(), gomock.Any()).Return(int64(2333), nil).Times(1)
   106  	a, err := newAgent(
   107  		context.Background(), "capture-test", &liveness, changefeed, me, tableExector, 0, cfg)
   108  	require.NoError(t, err)
   109  	require.NotNil(t, a)
   110  
   111  	// owner not found temporarily, it's ok.
   112  	me.EXPECT().GetOwnerID(gomock.Any()).
   113  		Return("", concurrency.ErrElectionNoLeader).Times(1)
   114  	a, err = newAgent(
   115  		context.Background(), "capture-test", &liveness, changefeed, me, tableExector, 0, cfg)
   116  	require.NoError(t, err)
   117  	require.NotNil(t, a)
   118  
   119  	// owner not found since pd is unstable
   120  	me.EXPECT().GetOwnerID(gomock.Any()).Return("", cerror.ErrPDEtcdAPIError).Times(1)
   121  	a, err = newAgent(
   122  		context.Background(), "capture-test", &liveness, changefeed, me, tableExector, 0, cfg)
   123  	require.Error(t, err)
   124  	require.Nil(t, a)
   125  
   126  	// owner found, get revision failed.
   127  	me.EXPECT().GetOwnerID(gomock.Any()).Return("ownerID", nil).Times(1)
   128  	me.EXPECT().GetCaptures(
   129  		gomock.Any()).Return(int64(0), []*model.CaptureInfo{{ID: "ownerID"}}, nil).Times(1)
   130  	me.EXPECT().GetOwnerRevision(gomock.Any(), gomock.Any()).
   131  		Return(int64(0), cerror.ErrPDEtcdAPIError).Times(1)
   132  	a, err = newAgent(
   133  		context.Background(), "capture-test", &liveness, changefeed, me, tableExector, 0, cfg)
   134  	require.Error(t, err)
   135  	require.Nil(t, a)
   136  
   137  	me.EXPECT().GetOwnerID(gomock.Any()).Return("ownerID", nil).Times(1)
   138  	me.EXPECT().GetCaptures(
   139  		gomock.Any()).Return(int64(0), []*model.CaptureInfo{{ID: "ownerID"}}, nil).Times(1)
   140  	me.EXPECT().GetOwnerRevision(gomock.Any(), gomock.Any()).
   141  		Return(int64(0), cerror.ErrOwnerNotFound).Times(1)
   142  	a, err = newAgent(
   143  		context.Background(), "capture-test", &liveness, changefeed, me, tableExector, 0, cfg)
   144  	require.NoError(t, err)
   145  	require.NotNil(t, a)
   146  }
   147  
   148  func TestAgentHandleMessageDispatchTable(t *testing.T) {
   149  	t.Parallel()
   150  
   151  	a := newAgent4Test()
   152  	mockTableExecutor := newMockTableExecutor()
   153  	a.tableM = newTableSpanManager(model.ChangeFeedID{}, mockTableExecutor)
   154  
   155  	removeTableRequest := &schedulepb.DispatchTableRequest{
   156  		Request: &schedulepb.DispatchTableRequest_RemoveTable{
   157  			RemoveTable: &schedulepb.RemoveTableRequest{
   158  				Span: spanz.TableIDToComparableSpan(1),
   159  			},
   160  		},
   161  	}
   162  	processorEpoch := schedulepb.ProcessorEpoch{Epoch: "agent-epoch-1"}
   163  
   164  	// remove table not exist
   165  	ctx := context.Background()
   166  	a.handleMessageDispatchTableRequest(removeTableRequest, processorEpoch)
   167  	responses, err := a.tableM.poll(ctx)
   168  	require.NoError(t, err)
   169  	require.Len(t, responses, 0)
   170  
   171  	addTableRequest := &schedulepb.DispatchTableRequest{
   172  		Request: &schedulepb.DispatchTableRequest_AddTable{
   173  			AddTable: &schedulepb.AddTableRequest{
   174  				Span:        spanz.TableIDToComparableSpan(1),
   175  				IsSecondary: true,
   176  			},
   177  		},
   178  	}
   179  
   180  	// addTableRequest should be not ignored even if it's stopping.
   181  	a.handleLivenessUpdate(model.LivenessCaptureStopping)
   182  	require.Equal(t, model.LivenessCaptureStopping, a.liveness.Load())
   183  	mockTableExecutor.On("AddTableSpan", mock.Anything, mock.Anything,
   184  		mock.Anything, mock.Anything).Return(false, nil)
   185  	a.handleMessageDispatchTableRequest(addTableRequest, processorEpoch)
   186  	responses, err = a.tableM.poll(ctx)
   187  	require.NoError(t, err)
   188  	require.Len(t, responses, 1)
   189  
   190  	addTableResponse, ok := responses[0].DispatchTableResponse.
   191  		Response.(*schedulepb.DispatchTableResponse_AddTable)
   192  	require.True(t, ok)
   193  	require.Equal(t, model.TableID(1), addTableResponse.AddTable.Status.Span.TableID)
   194  	require.Equal(t, tablepb.TableStateAbsent, addTableResponse.AddTable.Status.State)
   195  	require.False(t, a.tableM.tables.Has(spanz.TableIDToComparableSpan(1)))
   196  
   197  	// Force set liveness to alive.
   198  	*a.liveness = model.LivenessCaptureAlive
   199  	require.Equal(t, model.LivenessCaptureAlive, a.liveness.Load())
   200  	mockTableExecutor.ExpectedCalls = nil
   201  	mockTableExecutor.On("AddTableSpan", mock.Anything, mock.Anything,
   202  		mock.Anything, mock.Anything).Return(true, nil)
   203  	mockTableExecutor.On("IsAddTableSpanFinished", mock.Anything,
   204  		mock.Anything, mock.Anything).Return(false, nil)
   205  	a.handleMessageDispatchTableRequest(addTableRequest, processorEpoch)
   206  	_, err = a.tableM.poll(ctx)
   207  	require.NoError(t, err)
   208  
   209  	mockTableExecutor.ExpectedCalls = mockTableExecutor.ExpectedCalls[:1]
   210  	mockTableExecutor.On("IsAddTableSpanFinished", mock.Anything,
   211  		mock.Anything, mock.Anything).Return(true, nil)
   212  	a.handleMessageDispatchTableRequest(addTableRequest, processorEpoch)
   213  	responses, err = a.tableM.poll(ctx)
   214  	require.NoError(t, err)
   215  	require.Len(t, responses, 1)
   216  
   217  	addTableResponse, ok = responses[0].DispatchTableResponse.
   218  		Response.(*schedulepb.DispatchTableResponse_AddTable)
   219  	require.True(t, ok)
   220  	require.Equal(t, model.TableID(1), addTableResponse.AddTable.Status.Span.TableID)
   221  	require.Equal(t, tablepb.TableStatePrepared, addTableResponse.AddTable.Status.State)
   222  	require.True(t, a.tableM.tables.Has(spanz.TableIDToComparableSpan(1)))
   223  
   224  	// let the prepared table become replicating, by set `IsSecondary` to false.
   225  	addTableRequest.Request.(*schedulepb.DispatchTableRequest_AddTable).
   226  		AddTable.IsSecondary = false
   227  
   228  	// only mock `IsAddTableSpanFinished`, since `AddTable` by start a prepared
   229  	// table span always success.
   230  	mockTableExecutor.ExpectedCalls = nil
   231  	mockTableExecutor.On("IsAddTableSpanFinished", mock.Anything,
   232  		mock.Anything, mock.Anything).Return(false, nil)
   233  
   234  	a.handleMessageDispatchTableRequest(addTableRequest, processorEpoch)
   235  	responses, err = a.tableM.poll(ctx)
   236  	require.NoError(t, err)
   237  	require.Len(t, responses, 1)
   238  
   239  	addTableResponse, ok = responses[0].DispatchTableResponse.
   240  		Response.(*schedulepb.DispatchTableResponse_AddTable)
   241  	require.True(t, ok)
   242  	require.Equal(t, model.TableID(1), addTableResponse.AddTable.Status.Span.TableID)
   243  	require.Equal(t, tablepb.TableStatePrepared, addTableResponse.AddTable.Status.State)
   244  	require.True(t, a.tableM.tables.Has(spanz.TableIDToComparableSpan(1)))
   245  
   246  	mockTableExecutor.ExpectedCalls = nil
   247  	mockTableExecutor.On("IsAddTableSpanFinished", mock.Anything,
   248  		mock.Anything, mock.Anything).Return(true, nil)
   249  	a.handleMessageDispatchTableRequest(addTableRequest, processorEpoch)
   250  	responses, err = a.tableM.poll(ctx)
   251  	require.NoError(t, err)
   252  	require.Len(t, responses, 1)
   253  
   254  	addTableResponse, ok = responses[0].DispatchTableResponse.
   255  		Response.(*schedulepb.DispatchTableResponse_AddTable)
   256  	require.True(t, ok)
   257  	require.Equal(t, model.TableID(1), addTableResponse.AddTable.Status.Span.TableID)
   258  	require.Equal(t, tablepb.TableStateReplicating, addTableResponse.AddTable.Status.State)
   259  	require.True(t, a.tableM.tables.Has(spanz.TableIDToComparableSpan(1)))
   260  
   261  	mockTableExecutor.On("RemoveTableSpan", mock.Anything, mock.Anything).
   262  		Return(false)
   263  	// remove table in the replicating state failed, should still in replicating.
   264  	a.handleMessageDispatchTableRequest(removeTableRequest, processorEpoch)
   265  	responses, err = a.tableM.poll(ctx)
   266  	require.NoError(t, err)
   267  	require.Len(t, responses, 1)
   268  	removeTableResponse, ok := responses[0].DispatchTableResponse.
   269  		Response.(*schedulepb.DispatchTableResponse_RemoveTable)
   270  	require.True(t, ok)
   271  	require.Equal(t, model.TableID(1), removeTableResponse.RemoveTable.Status.Span.TableID)
   272  	require.Equal(t, tablepb.TableStateStopping, removeTableResponse.RemoveTable.Status.State)
   273  	require.True(t, a.tableM.tables.Has(spanz.TableIDToComparableSpan(1)))
   274  
   275  	mockTableExecutor.ExpectedCalls = nil
   276  	mockTableExecutor.On("RemoveTableSpan", mock.Anything, mock.Anything).
   277  		Return(true)
   278  	mockTableExecutor.On("IsRemoveTableSpanFinished", mock.Anything, mock.Anything).
   279  		Return(3, false)
   280  	// remove table in the replicating state failed, should still in replicating.
   281  	a.handleMessageDispatchTableRequest(removeTableRequest, processorEpoch)
   282  	responses, err = a.tableM.poll(ctx)
   283  	require.NoError(t, err)
   284  	require.Len(t, responses, 1)
   285  	removeTableResponse, ok = responses[0].DispatchTableResponse.
   286  		Response.(*schedulepb.DispatchTableResponse_RemoveTable)
   287  	require.True(t, ok)
   288  	require.Equal(t, model.TableID(1), removeTableResponse.RemoveTable.Status.Span.TableID)
   289  	require.Equal(t, tablepb.TableStateStopping, removeTableResponse.RemoveTable.Status.State)
   290  
   291  	mockTableExecutor.ExpectedCalls = mockTableExecutor.ExpectedCalls[:1]
   292  	mockTableExecutor.On("IsRemoveTableSpanFinished", mock.Anything, mock.Anything).
   293  		Return(3, true)
   294  	// remove table in the replicating state success, should in stopped
   295  	a.handleMessageDispatchTableRequest(removeTableRequest, processorEpoch)
   296  	responses, err = a.tableM.poll(ctx)
   297  	require.NoError(t, err)
   298  	require.Len(t, responses, 1)
   299  	removeTableResponse, ok = responses[0].DispatchTableResponse.
   300  		Response.(*schedulepb.DispatchTableResponse_RemoveTable)
   301  	require.True(t, ok)
   302  	require.Equal(t, model.TableID(1), removeTableResponse.RemoveTable.Status.Span.TableID)
   303  	require.Equal(t, tablepb.TableStateStopped, removeTableResponse.RemoveTable.Status.State)
   304  	require.Equal(t, model.Ts(3), removeTableResponse.RemoveTable.Checkpoint.CheckpointTs)
   305  	require.False(t, a.tableM.tables.Has(spanz.TableIDToComparableSpan(1)))
   306  }
   307  
   308  func TestAgentHandleMessageHeartbeat(t *testing.T) {
   309  	t.Parallel()
   310  
   311  	a := newAgent4Test()
   312  	mockTableExecutor := newMockTableExecutor()
   313  	a.tableM = newTableSpanManager(model.ChangeFeedID{}, mockTableExecutor)
   314  
   315  	for i := 0; i < 5; i++ {
   316  		a.tableM.addTableSpan(spanz.TableIDToComparableSpan(int64(i)))
   317  	}
   318  
   319  	a.tableM.tables.GetV(spanz.TableIDToComparableSpan(0)).state = tablepb.TableStatePreparing
   320  	a.tableM.tables.GetV(spanz.TableIDToComparableSpan(1)).state = tablepb.TableStatePrepared
   321  	a.tableM.tables.GetV(spanz.TableIDToComparableSpan(2)).state = tablepb.TableStateReplicating
   322  	a.tableM.tables.GetV(spanz.TableIDToComparableSpan(3)).state = tablepb.TableStateStopping
   323  	a.tableM.tables.GetV(spanz.TableIDToComparableSpan(4)).state = tablepb.TableStateStopped
   324  
   325  	mockTableExecutor.tables.ReplaceOrInsert(
   326  		spanz.TableIDToComparableSpan(0), tablepb.TableStatePreparing)
   327  	mockTableExecutor.tables.ReplaceOrInsert(
   328  		spanz.TableIDToComparableSpan(1), tablepb.TableStatePrepared)
   329  	mockTableExecutor.tables.ReplaceOrInsert(
   330  		spanz.TableIDToComparableSpan(2), tablepb.TableStateReplicating)
   331  	mockTableExecutor.tables.ReplaceOrInsert(
   332  		spanz.TableIDToComparableSpan(3), tablepb.TableStateStopping)
   333  	mockTableExecutor.tables.ReplaceOrInsert(
   334  		spanz.TableIDToComparableSpan(4), tablepb.TableStateStopped)
   335  
   336  	heartbeat := &schedulepb.Message{
   337  		Header: &schedulepb.Message_Header{
   338  			Version:       "version-1",
   339  			OwnerRevision: schedulepb.OwnerRevision{Revision: 1},
   340  		},
   341  		MsgType: schedulepb.MsgHeartbeat,
   342  		From:    "owner-1",
   343  		Heartbeat: &schedulepb.Heartbeat{
   344  			Spans: []tablepb.Span{
   345  				spanz.TableIDToComparableSpan(0),
   346  				spanz.TableIDToComparableSpan(1),
   347  				spanz.TableIDToComparableSpan(2),
   348  				spanz.TableIDToComparableSpan(3),
   349  				spanz.TableIDToComparableSpan(4),
   350  				spanz.TableIDToComparableSpan(5),
   351  				spanz.TableIDToComparableSpan(6),
   352  				spanz.TableIDToComparableSpan(7),
   353  				spanz.TableIDToComparableSpan(8),
   354  				spanz.TableIDToComparableSpan(9),
   355  			},
   356  		},
   357  	}
   358  
   359  	response, _ := a.handleMessage([]*schedulepb.Message{heartbeat})
   360  	require.Len(t, response, 1)
   361  	require.Equal(t, model.LivenessCaptureAlive, response[0].GetHeartbeatResponse().Liveness)
   362  
   363  	result := response[0].GetHeartbeatResponse().Tables
   364  	require.Len(t, result, 10)
   365  	sort.Slice(result, func(i, j int) bool {
   366  		return result[i].Span.Less(&result[j].Span)
   367  	})
   368  
   369  	require.Equal(t, tablepb.TableStatePreparing, result[0].State)
   370  	require.Equal(t, tablepb.TableStatePrepared, result[1].State)
   371  	require.Equal(t, tablepb.TableStateReplicating, result[2].State)
   372  	require.Equal(t, tablepb.TableStateStopping, result[3].State)
   373  	require.Equal(t, tablepb.TableStateStopped, result[4].State)
   374  	for i := 5; i < 10; i++ {
   375  		require.Equal(t, tablepb.TableStateAbsent, result[i].State)
   376  	}
   377  
   378  	a.tableM.tables.GetV(spanz.TableIDToComparableSpan(1)).task = &dispatchTableTask{IsRemove: true}
   379  	response, _ = a.handleMessage([]*schedulepb.Message{heartbeat})
   380  	result = response[0].GetHeartbeatResponse().Tables
   381  	sort.Slice(result, func(i, j int) bool {
   382  		return result[i].Span.TableID < result[j].Span.TableID
   383  	})
   384  	require.Equal(t, tablepb.TableStateStopping, result[1].State)
   385  
   386  	a.handleLivenessUpdate(model.LivenessCaptureStopping)
   387  	response, _ = a.handleMessage([]*schedulepb.Message{heartbeat})
   388  	require.Len(t, response, 1)
   389  	require.Equal(t, model.LivenessCaptureStopping, response[0].GetHeartbeatResponse().Liveness)
   390  
   391  	a.handleLivenessUpdate(model.LivenessCaptureAlive)
   392  	heartbeat.Heartbeat.IsStopping = true
   393  	response, _ = a.handleMessage([]*schedulepb.Message{heartbeat})
   394  	require.Equal(t, model.LivenessCaptureStopping, response[0].GetHeartbeatResponse().Liveness)
   395  	require.Equal(t, model.LivenessCaptureStopping, a.liveness.Load())
   396  }
   397  
   398  func TestAgentPermuteMessages(t *testing.T) {
   399  	t.Parallel()
   400  
   401  	a := newAgent4Test()
   402  	mockTableExecutor := newMockTableExecutor()
   403  	a.tableM = newTableSpanManager(model.ChangeFeedID{}, mockTableExecutor)
   404  
   405  	trans := transport.NewMockTrans()
   406  	a.trans = trans
   407  
   408  	// all possible inbound Messages can be received
   409  	var inboundMessages []*schedulepb.Message
   410  	inboundMessages = append(inboundMessages, &schedulepb.Message{
   411  		Header: &schedulepb.Message_Header{
   412  			Version:        a.ownerInfo.Version,
   413  			OwnerRevision:  a.ownerInfo.Revision,
   414  			ProcessorEpoch: a.Epoch,
   415  		},
   416  		MsgType: schedulepb.MsgDispatchTableRequest,
   417  		From:    a.ownerInfo.ID,
   418  		To:      a.CaptureID,
   419  		DispatchTableRequest: &schedulepb.DispatchTableRequest{
   420  			Request: &schedulepb.DispatchTableRequest_RemoveTable{
   421  				RemoveTable: &schedulepb.RemoveTableRequest{
   422  					Span: spanz.TableIDToComparableSpan(1),
   423  				},
   424  			},
   425  		},
   426  	})
   427  	for _, isSecondary := range []bool{true, false} {
   428  		inboundMessages = append(inboundMessages, &schedulepb.Message{
   429  			Header: &schedulepb.Message_Header{
   430  				Version:        a.ownerInfo.Version,
   431  				OwnerRevision:  a.ownerInfo.Revision,
   432  				ProcessorEpoch: a.Epoch,
   433  			},
   434  			MsgType: schedulepb.MsgDispatchTableRequest,
   435  			From:    a.ownerInfo.ID,
   436  			To:      a.CaptureID,
   437  			DispatchTableRequest: &schedulepb.DispatchTableRequest{
   438  				Request: &schedulepb.DispatchTableRequest_AddTable{
   439  					AddTable: &schedulepb.AddTableRequest{
   440  						Span:        spanz.TableIDToComparableSpan(1),
   441  						IsSecondary: isSecondary,
   442  					},
   443  				},
   444  			},
   445  		})
   446  	}
   447  
   448  	inboundMessages = append(inboundMessages, &schedulepb.Message{
   449  		Header: &schedulepb.Message_Header{
   450  			Version:        "version-1",
   451  			OwnerRevision:  schedulepb.OwnerRevision{Revision: 1},
   452  			ProcessorEpoch: a.Epoch,
   453  		},
   454  		MsgType: schedulepb.MsgHeartbeat,
   455  		From:    "owner-1",
   456  		Heartbeat: &schedulepb.Heartbeat{
   457  			Spans: []tablepb.Span{{TableID: 1}},
   458  		},
   459  	})
   460  
   461  	states := []tablepb.TableState{
   462  		tablepb.TableStateAbsent,
   463  		tablepb.TableStatePreparing,
   464  		tablepb.TableStatePrepared,
   465  		tablepb.TableStateReplicating,
   466  		tablepb.TableStateStopping,
   467  		tablepb.TableStateStopped,
   468  	}
   469  	ctx := context.Background()
   470  	tableID := model.TableID(1)
   471  	for _, state := range states {
   472  		iterPermutation([]int{0, 1, 2, 3}, func(sequence []int) {
   473  			t.Logf("test %v, %v", state, sequence)
   474  			switch state {
   475  			case tablepb.TableStatePreparing:
   476  				mockTableExecutor.tables.ReplaceOrInsert(
   477  					spanz.TableIDToComparableSpan(tableID), tablepb.TableStatePreparing)
   478  			case tablepb.TableStatePrepared:
   479  				mockTableExecutor.tables.ReplaceOrInsert(
   480  					spanz.TableIDToComparableSpan(tableID), tablepb.TableStatePrepared)
   481  			case tablepb.TableStateReplicating:
   482  				mockTableExecutor.tables.ReplaceOrInsert(
   483  					spanz.TableIDToComparableSpan(tableID), tablepb.TableStateReplicating)
   484  			case tablepb.TableStateStopping:
   485  				mockTableExecutor.tables.ReplaceOrInsert(
   486  					spanz.TableIDToComparableSpan(tableID), tablepb.TableStateStopping)
   487  			case tablepb.TableStateStopped:
   488  				mockTableExecutor.tables.ReplaceOrInsert(
   489  					spanz.TableIDToComparableSpan(tableID), tablepb.TableStateStopped)
   490  			case tablepb.TableStateAbsent:
   491  			default:
   492  			}
   493  
   494  			for _, idx := range sequence {
   495  				message := inboundMessages[idx]
   496  				if message.MsgType == schedulepb.MsgHeartbeat {
   497  					trans.RecvBuffer = append(trans.RecvBuffer, message)
   498  					_, err := a.Tick(ctx)
   499  					require.NoError(t, err)
   500  					require.Len(t, trans.SendBuffer, 1)
   501  					heartbeatResponse := trans.SendBuffer[0].HeartbeatResponse
   502  					trans.SendBuffer = trans.SendBuffer[:0]
   503  					require.Equal(t, model.LivenessCaptureAlive, heartbeatResponse.Liveness)
   504  
   505  					continue
   506  				}
   507  
   508  				switch message.DispatchTableRequest.Request.(type) {
   509  				case *schedulepb.DispatchTableRequest_AddTable:
   510  					for _, ok := range []bool{false, true} {
   511  						mockTableExecutor.On("AddTableSpan", mock.Anything, mock.Anything,
   512  							mock.Anything, mock.Anything).Return(ok, nil)
   513  						for _, ok1 := range []bool{false, true} {
   514  							mockTableExecutor.On("IsAddTableSpanFinished", mock.Anything,
   515  								mock.Anything, mock.Anything).Return(ok1, nil)
   516  
   517  							trans.RecvBuffer = append(trans.RecvBuffer, message)
   518  							_, err := a.Tick(ctx)
   519  							require.NoError(t, err)
   520  							trans.SendBuffer = trans.SendBuffer[:0]
   521  
   522  							mockTableExecutor.ExpectedCalls = mockTableExecutor.ExpectedCalls[:1]
   523  						}
   524  						mockTableExecutor.ExpectedCalls = nil
   525  					}
   526  				case *schedulepb.DispatchTableRequest_RemoveTable:
   527  					for _, ok := range []bool{false, true} {
   528  						mockTableExecutor.On("RemoveTableSpan", mock.Anything,
   529  							mock.Anything).Return(ok)
   530  						for _, ok1 := range []bool{false, true} {
   531  							trans.RecvBuffer = append(trans.RecvBuffer, message)
   532  							mockTableExecutor.On("IsRemoveTableSpanFinished",
   533  								mock.Anything, mock.Anything).Return(0, ok1)
   534  							_, err := a.Tick(ctx)
   535  							require.NoError(t, err)
   536  							if len(trans.SendBuffer) != 0 {
   537  								require.Len(t, trans.SendBuffer, 1)
   538  								response, yes := trans.SendBuffer[0].DispatchTableResponse.
   539  									Response.(*schedulepb.DispatchTableResponse_RemoveTable)
   540  								trans.SendBuffer = trans.SendBuffer[:0]
   541  								require.True(t, yes)
   542  								expected := tablepb.TableStateStopping
   543  								if ok && ok1 {
   544  									expected = tablepb.TableStateStopped
   545  								}
   546  								require.Equal(t, expected, response.RemoveTable.Status.State)
   547  								mockTableExecutor.ExpectedCalls = mockTableExecutor.
   548  									ExpectedCalls[:1]
   549  							}
   550  						}
   551  						mockTableExecutor.ExpectedCalls = nil
   552  					}
   553  				default:
   554  					panic("unknown request")
   555  				}
   556  			}
   557  		})
   558  	}
   559  }
   560  
   561  func TestAgentHandleMessage(t *testing.T) {
   562  	t.Parallel()
   563  
   564  	mockTableExecutor := newMockTableExecutor()
   565  	tableM := newTableSpanManager(model.ChangeFeedID{}, mockTableExecutor)
   566  	a := newAgent4Test()
   567  	a.tableM = tableM
   568  
   569  	heartbeat := &schedulepb.Message{
   570  		Header: &schedulepb.Message_Header{
   571  			Version:       a.ownerInfo.Version,
   572  			OwnerRevision: a.ownerInfo.Revision,
   573  		},
   574  		MsgType:   schedulepb.MsgHeartbeat,
   575  		From:      a.ownerInfo.ID,
   576  		Heartbeat: &schedulepb.Heartbeat{},
   577  	}
   578  
   579  	// handle the first heartbeat, from the known owner.
   580  	response, _ := a.handleMessage([]*schedulepb.Message{heartbeat})
   581  	require.Len(t, response, 1)
   582  
   583  	addTableRequest := &schedulepb.Message{
   584  		Header: &schedulepb.Message_Header{
   585  			Version:       a.ownerInfo.Version,
   586  			OwnerRevision: a.ownerInfo.Revision,
   587  			// wrong epoch
   588  			ProcessorEpoch: schedulepb.ProcessorEpoch{Epoch: "wrong-agent-epoch-1"},
   589  		},
   590  		MsgType: schedulepb.MsgDispatchTableRequest,
   591  		From:    a.ownerInfo.ID,
   592  		DispatchTableRequest: &schedulepb.DispatchTableRequest{
   593  			Request: &schedulepb.DispatchTableRequest_AddTable{
   594  				AddTable: &schedulepb.AddTableRequest{
   595  					Span:        spanz.TableIDToComparableSpan(1),
   596  					IsSecondary: true,
   597  					Checkpoint:  tablepb.Checkpoint{},
   598  				},
   599  			},
   600  		},
   601  	}
   602  	// wrong epoch, ignored
   603  	responses, _ := a.handleMessage([]*schedulepb.Message{addTableRequest})
   604  	require.False(t, tableM.tables.Has(spanz.TableIDToComparableSpan(1)))
   605  	require.Len(t, responses, 0)
   606  
   607  	// correct epoch, processing.
   608  	addTableRequest.Header.ProcessorEpoch = a.Epoch
   609  	_, _ = a.handleMessage([]*schedulepb.Message{addTableRequest})
   610  	require.True(t, a.tableM.tables.Has(spanz.TableIDToComparableSpan(1)))
   611  
   612  	heartbeat.Header.OwnerRevision.Revision = 2
   613  	response, _ = a.handleMessage([]*schedulepb.Message{heartbeat})
   614  	require.Len(t, response, 1)
   615  
   616  	// this should never happen in real world
   617  	unknownMessage := &schedulepb.Message{
   618  		Header: &schedulepb.Message_Header{
   619  			Version:        a.ownerInfo.Version,
   620  			OwnerRevision:  schedulepb.OwnerRevision{Revision: 2},
   621  			ProcessorEpoch: a.Epoch,
   622  		},
   623  		MsgType: schedulepb.MsgUnknown,
   624  		From:    a.ownerInfo.ID,
   625  	}
   626  
   627  	response, _ = a.handleMessage([]*schedulepb.Message{unknownMessage})
   628  	require.Len(t, response, 0)
   629  
   630  	// staled message
   631  	heartbeat.Header.OwnerRevision.Revision = 1
   632  	response, _ = a.handleMessage([]*schedulepb.Message{heartbeat})
   633  	require.Len(t, response, 0)
   634  }
   635  
   636  func TestAgentUpdateOwnerInfo(t *testing.T) {
   637  	t.Parallel()
   638  
   639  	a := newAgent4Test()
   640  	ok := a.handleOwnerInfo("owner-1", 1, "version-1")
   641  	require.True(t, ok)
   642  
   643  	// staled owner
   644  	ok = a.handleOwnerInfo("owner-2", 0, "version-1")
   645  	require.False(t, ok)
   646  
   647  	// new owner with higher revision
   648  	ok = a.handleOwnerInfo("owner-2", 2, "version-1")
   649  	require.True(t, ok)
   650  }
   651  
   652  func TestAgentTick(t *testing.T) {
   653  	t.Parallel()
   654  
   655  	a := newAgent4Test()
   656  	trans := transport.NewMockTrans()
   657  	mockTableExecutor := newMockTableExecutor()
   658  	a.trans = trans
   659  	a.tableM = newTableSpanManager(model.ChangeFeedID{}, mockTableExecutor)
   660  
   661  	heartbeat := &schedulepb.Message{
   662  		Header: &schedulepb.Message_Header{
   663  			Version:       a.ownerInfo.Version,
   664  			OwnerRevision: a.ownerInfo.Revision,
   665  			// first heartbeat from the owner, no processor epoch
   666  			ProcessorEpoch: schedulepb.ProcessorEpoch{},
   667  		},
   668  		MsgType:   schedulepb.MsgHeartbeat,
   669  		From:      a.ownerInfo.ID,
   670  		Heartbeat: &schedulepb.Heartbeat{Spans: nil},
   671  	}
   672  
   673  	// receive first heartbeat from the owner
   674  	trans.RecvBuffer = append(trans.RecvBuffer, heartbeat)
   675  
   676  	ctx := context.Background()
   677  	_, err := a.Tick(ctx)
   678  	require.NoError(t, err)
   679  	require.Len(t, trans.SendBuffer, 1)
   680  	heartbeatResponse := trans.SendBuffer[0]
   681  	trans.SendBuffer = trans.SendBuffer[:0]
   682  
   683  	require.Equal(t, schedulepb.MsgHeartbeatResponse, heartbeatResponse.MsgType)
   684  	require.Equal(t, a.ownerInfo.ID, heartbeatResponse.To)
   685  	require.Equal(t, a.CaptureID, heartbeatResponse.From)
   686  
   687  	addTableRequest := &schedulepb.Message{
   688  		Header: &schedulepb.Message_Header{
   689  			Version:        a.ownerInfo.Version,
   690  			OwnerRevision:  a.ownerInfo.Revision,
   691  			ProcessorEpoch: a.Epoch,
   692  		},
   693  		MsgType: schedulepb.MsgDispatchTableRequest,
   694  		From:    a.ownerInfo.ID,
   695  		DispatchTableRequest: &schedulepb.DispatchTableRequest{
   696  			Request: &schedulepb.DispatchTableRequest_AddTable{
   697  				AddTable: &schedulepb.AddTableRequest{
   698  					Span:        spanz.TableIDToComparableSpan(1),
   699  					IsSecondary: true,
   700  					Checkpoint:  tablepb.Checkpoint{},
   701  				},
   702  			},
   703  		},
   704  	}
   705  
   706  	removeTableRequest := &schedulepb.Message{
   707  		Header: &schedulepb.Message_Header{
   708  			Version:        a.ownerInfo.Version,
   709  			OwnerRevision:  a.ownerInfo.Revision,
   710  			ProcessorEpoch: a.Epoch,
   711  		},
   712  		MsgType: schedulepb.MsgDispatchTableRequest,
   713  		From:    a.ownerInfo.ID,
   714  		DispatchTableRequest: &schedulepb.DispatchTableRequest{
   715  			Request: &schedulepb.DispatchTableRequest_RemoveTable{
   716  				RemoveTable: &schedulepb.RemoveTableRequest{
   717  					Span: tablepb.Span{TableID: 2},
   718  				},
   719  			},
   720  		},
   721  	}
   722  	var messages []*schedulepb.Message
   723  	messages = append(messages, addTableRequest)
   724  	messages = append(messages, removeTableRequest)
   725  	trans.RecvBuffer = append(trans.RecvBuffer, messages...)
   726  
   727  	mockTableExecutor.On("AddTableSpan", mock.Anything,
   728  		mock.Anything, mock.Anything, mock.Anything).Return(true, nil)
   729  	mockTableExecutor.On("IsAddTableSpanFinished", mock.Anything,
   730  		mock.Anything, mock.Anything).Return(false, nil)
   731  	_, err = a.Tick(ctx)
   732  	require.NoError(t, err)
   733  	trans.SendBuffer = trans.SendBuffer[:0]
   734  
   735  	trans.RecvBuffer = append(trans.RecvBuffer, addTableRequest)
   736  
   737  	mockTableExecutor.ExpectedCalls = mockTableExecutor.ExpectedCalls[:1]
   738  	mockTableExecutor.On("IsAddTableSpanFinished", mock.Anything,
   739  		mock.Anything, mock.Anything).Return(true, nil)
   740  	_, err = a.Tick(ctx)
   741  	require.NoError(t, err)
   742  	responses := trans.SendBuffer[:len(trans.SendBuffer)]
   743  	trans.SendBuffer = trans.SendBuffer[:0]
   744  	require.Len(t, responses, 1)
   745  	require.Equal(t, schedulepb.MsgDispatchTableResponse, responses[0].MsgType)
   746  	resp, ok := responses[0].DispatchTableResponse.
   747  		Response.(*schedulepb.DispatchTableResponse_AddTable)
   748  	require.True(t, ok)
   749  	require.Equal(t, tablepb.TableStatePrepared, resp.AddTable.Status.State)
   750  
   751  	require.NoError(t, a.Close())
   752  }
   753  
   754  func TestAgentHandleLivenessUpdate(t *testing.T) {
   755  	t.Parallel()
   756  
   757  	// Test liveness via heartbeat.
   758  	mockTableExecutor := newMockTableExecutor()
   759  	tableM := newTableSpanManager(model.ChangeFeedID{}, mockTableExecutor)
   760  	a := newAgent4Test()
   761  	a.tableM = tableM
   762  	require.Equal(t, model.LivenessCaptureAlive, a.liveness.Load())
   763  	a.handleMessage([]*schedulepb.Message{{
   764  		Header: &schedulepb.Message_Header{
   765  			Version:        a.ownerInfo.Version,
   766  			OwnerRevision:  a.ownerInfo.Revision,
   767  			ProcessorEpoch: a.Epoch,
   768  		},
   769  		MsgType: schedulepb.MsgHeartbeat,
   770  		From:    a.ownerInfo.ID,
   771  		Heartbeat: &schedulepb.Heartbeat{
   772  			IsStopping: true,
   773  		},
   774  	}})
   775  	require.Equal(t, model.LivenessCaptureStopping, a.liveness.Load())
   776  
   777  	a.handleLivenessUpdate(model.LivenessCaptureAlive)
   778  	require.Equal(t, model.LivenessCaptureStopping, a.liveness.Load())
   779  }
   780  
   781  func TestAgentCommitAddTableDuringStopping(t *testing.T) {
   782  	t.Parallel()
   783  
   784  	a := newAgent4Test()
   785  	mockTableExecutor := newMockTableExecutor()
   786  	a.tableM = newTableSpanManager(model.ChangeFeedID{}, mockTableExecutor)
   787  	trans := transport.NewMockTrans()
   788  	a.trans = trans
   789  
   790  	prepareTableMsg := &schedulepb.Message{
   791  		Header: &schedulepb.Message_Header{
   792  			Version:        "owner-version-1",
   793  			OwnerRevision:  schedulepb.OwnerRevision{Revision: 1},
   794  			ProcessorEpoch: schedulepb.ProcessorEpoch{Epoch: "agent-epoch-1"},
   795  		},
   796  		To:      "agent-1",
   797  		From:    "owner-1",
   798  		MsgType: schedulepb.MsgDispatchTableRequest,
   799  		DispatchTableRequest: &schedulepb.DispatchTableRequest{
   800  			Request: &schedulepb.DispatchTableRequest_AddTable{
   801  				AddTable: &schedulepb.AddTableRequest{
   802  					Span:        spanz.TableIDToComparableSpan(1),
   803  					IsSecondary: true,
   804  				},
   805  			},
   806  		},
   807  	}
   808  	trans.RecvBuffer = []*schedulepb.Message{prepareTableMsg}
   809  
   810  	// Prepare add table is still in-progress.
   811  	mockTableExecutor.
   812  		On("AddTableSpan", mock.Anything, mock.Anything, mock.Anything, mock.Anything, mock.Anything).
   813  		Return(true, nil).Once()
   814  	mockTableExecutor.
   815  		On("IsAddTableSpanFinished", mock.Anything, mock.Anything, mock.Anything, mock.Anything).
   816  		Return(false, nil).Once()
   817  	_, err := a.Tick(context.Background())
   818  	require.Nil(t, err)
   819  	require.Len(t, trans.SendBuffer, 0)
   820  
   821  	mockTableExecutor.
   822  		On("AddTableSpan", mock.Anything, mock.Anything, mock.Anything, mock.Anything, mock.Anything).
   823  		Return(true, nil).Once()
   824  	mockTableExecutor.
   825  		On("IsAddTableSpanFinished", mock.Anything, mock.Anything, mock.Anything, mock.Anything).
   826  		Return(true, nil).Once()
   827  	_, err = a.Tick(context.Background())
   828  	require.Nil(t, err)
   829  	require.Len(t, trans.SendBuffer, 1)
   830  	require.Equal(t, trans.SendBuffer[0].MsgType, schedulepb.MsgDispatchTableResponse)
   831  
   832  	// Commit add table request should not be rejected.
   833  	commitTableMsg := &schedulepb.Message{
   834  		Header: &schedulepb.Message_Header{
   835  			Version:        "owner-version-1",
   836  			OwnerRevision:  schedulepb.OwnerRevision{Revision: 1},
   837  			ProcessorEpoch: schedulepb.ProcessorEpoch{Epoch: "agent-epoch-1"},
   838  		},
   839  		To:      "agent-1",
   840  		From:    "owner-1",
   841  		MsgType: schedulepb.MsgDispatchTableRequest,
   842  		DispatchTableRequest: &schedulepb.DispatchTableRequest{
   843  			Request: &schedulepb.DispatchTableRequest_AddTable{
   844  				AddTable: &schedulepb.AddTableRequest{
   845  					Span:        spanz.TableIDToComparableSpan(1),
   846  					IsSecondary: false,
   847  				},
   848  			},
   849  		},
   850  	}
   851  	trans.RecvBuffer = []*schedulepb.Message{commitTableMsg}
   852  	trans.SendBuffer = []*schedulepb.Message{}
   853  	mockTableExecutor.
   854  		On("AddTableSpan", mock.Anything, mock.Anything, mock.Anything, mock.Anything).
   855  		Return(true, nil).Once()
   856  	mockTableExecutor.
   857  		On("IsAddTableSpanFinished", mock.Anything, mock.Anything, mock.Anything, mock.Anything).
   858  		Return(false, nil).Once()
   859  	// Set liveness to stopping.
   860  	a.liveness.Store(model.LivenessCaptureStopping)
   861  	_, err = a.Tick(context.Background())
   862  	require.Nil(t, err)
   863  	require.Len(t, trans.SendBuffer, 1)
   864  
   865  	trans.RecvBuffer = []*schedulepb.Message{}
   866  	trans.SendBuffer = []*schedulepb.Message{}
   867  	mockTableExecutor.
   868  		On("IsAddTableSpanFinished", mock.Anything, mock.Anything, mock.Anything, mock.Anything).
   869  		Return(true, nil).Once()
   870  	_, err = a.Tick(context.Background())
   871  	require.Nil(t, err)
   872  	require.Len(t, trans.SendBuffer, 1)
   873  	require.Equal(t, schedulepb.MsgDispatchTableResponse, trans.SendBuffer[0].MsgType)
   874  	addTableResp := trans.SendBuffer[0].DispatchTableResponse.GetAddTable()
   875  	require.Equal(t, tablepb.TableStateReplicating, addTableResp.Status.State)
   876  }
   877  
   878  func TestAgentTransportCompat(t *testing.T) {
   879  	t.Parallel()
   880  
   881  	a := newAgent4Test()
   882  	mockTableExecutor := newMockTableExecutor()
   883  	a.tableM = newTableSpanManager(model.ChangeFeedID{}, mockTableExecutor)
   884  	trans := transport.NewMockTrans()
   885  	a.trans = trans
   886  	a.compat = compat.New(&config.SchedulerConfig{
   887  		ChangefeedSettings: &config.ChangefeedSchedulerConfig{
   888  			EnableTableAcrossNodes: true,
   889  			RegionThreshold:        1,
   890  		},
   891  	}, map[model.CaptureID]*model.CaptureInfo{})
   892  	ctx := context.Background()
   893  
   894  	// Disable span replication.
   895  	a.handleOwnerInfo("a", a.ownerInfo.Revision.Revision+1, "4.0.0")
   896  	require.False(t, a.compat.CheckSpanReplicationEnabled())
   897  
   898  	// Test compat.BeforeTransportSend.
   899  	a.sendMsgs(
   900  		ctx, []*schedulepb.Message{{
   901  			Header: &schedulepb.Message_Header{
   902  				Version:        a.Version,
   903  				ProcessorEpoch: a.Epoch,
   904  			},
   905  			From: a.CaptureID, To: "a", MsgType: schedulepb.MsgDispatchTableResponse,
   906  			DispatchTableResponse: &schedulepb.DispatchTableResponse{
   907  				Response: &schedulepb.DispatchTableResponse_AddTable{
   908  					AddTable: &schedulepb.AddTableResponse{
   909  						Status: &tablepb.TableStatus{
   910  							Span: spanz.TableIDToComparableSpan(1),
   911  						},
   912  					},
   913  				},
   914  			},
   915  		}})
   916  	require.EqualValues(t, []*schedulepb.Message{{
   917  		Header: &schedulepb.Message_Header{
   918  			Version:        a.Version,
   919  			ProcessorEpoch: a.Epoch,
   920  			OwnerRevision:  a.ownerInfo.Revision,
   921  		},
   922  		From: a.CaptureID, To: "a", MsgType: schedulepb.MsgDispatchTableResponse,
   923  		DispatchTableResponse: &schedulepb.DispatchTableResponse{
   924  			Response: &schedulepb.DispatchTableResponse_AddTable{
   925  				AddTable: &schedulepb.AddTableResponse{
   926  					Status: &tablepb.TableStatus{
   927  						TableID: 1,
   928  						Span:    spanz.TableIDToComparableSpan(1),
   929  					},
   930  				},
   931  			},
   932  		},
   933  	}}, trans.SendBuffer)
   934  	// Test compat.AfterTransportReceive.
   935  	trans.RecvBuffer = append(trans.RecvBuffer, &schedulepb.Message{
   936  		Header: &schedulepb.Message_Header{
   937  			Version:       a.Version,
   938  			OwnerRevision: a.ownerInfo.Revision,
   939  		},
   940  		From: "a", To: a.CaptureID, MsgType: schedulepb.MsgDispatchTableRequest,
   941  		DispatchTableRequest: &schedulepb.DispatchTableRequest{
   942  			Request: &schedulepb.DispatchTableRequest_AddTable{
   943  				AddTable: &schedulepb.AddTableRequest{
   944  					TableID: 1,
   945  				},
   946  			},
   947  		},
   948  	})
   949  	msgs, err := a.recvMsgs(ctx)
   950  	require.NoError(t, err)
   951  	require.EqualValues(t, []*schedulepb.Message{{
   952  		Header: &schedulepb.Message_Header{
   953  			Version:       a.Version,
   954  			OwnerRevision: a.ownerInfo.Revision,
   955  		},
   956  		From: "a", To: a.CaptureID, MsgType: schedulepb.MsgDispatchTableRequest,
   957  		DispatchTableRequest: &schedulepb.DispatchTableRequest{
   958  			Request: &schedulepb.DispatchTableRequest_AddTable{
   959  				AddTable: &schedulepb.AddTableRequest{
   960  					TableID: 1,
   961  					Span:    spanz.TableIDToComparableSpan(1),
   962  				},
   963  			},
   964  		},
   965  	}}, msgs)
   966  }
   967  
   968  func TestAgentDropMsgIfChangefeedEpochMismatch(t *testing.T) {
   969  	t.Parallel()
   970  
   971  	a := newAgent4Test()
   972  	mockTableExecutor := newMockTableExecutor()
   973  	a.tableM = newTableSpanManager(model.ChangeFeedID{}, mockTableExecutor)
   974  	trans := transport.NewMockTrans()
   975  	a.trans = trans
   976  	a.compat = compat.New(&config.SchedulerConfig{
   977  		ChangefeedSettings: &config.ChangefeedSchedulerConfig{
   978  			EnableTableAcrossNodes: true,
   979  			RegionThreshold:        1,
   980  		},
   981  	}, map[model.CaptureID]*model.CaptureInfo{})
   982  	a.changefeedEpoch = 1
   983  	ctx := context.Background()
   984  
   985  	// Enable changefeed epoch.
   986  	a.handleOwnerInfo(
   987  		"a", a.ownerInfo.Revision.Revision+1, compat.ChangefeedEpochMinVersion.String())
   988  
   989  	trans.RecvBuffer = append(trans.RecvBuffer, &schedulepb.Message{
   990  		Header: &schedulepb.Message_Header{
   991  			Version:         a.Version,
   992  			OwnerRevision:   a.ownerInfo.Revision,
   993  			ChangefeedEpoch: schedulepb.ChangefeedEpoch{Epoch: 1},
   994  		},
   995  		From: "a", To: a.CaptureID, MsgType: schedulepb.MsgDispatchTableRequest,
   996  		DispatchTableRequest: &schedulepb.DispatchTableRequest{
   997  			Request: &schedulepb.DispatchTableRequest_AddTable{
   998  				AddTable: &schedulepb.AddTableRequest{
   999  					TableID: 1,
  1000  				},
  1001  			},
  1002  		},
  1003  	})
  1004  	trans.RecvBuffer = append(trans.RecvBuffer,
  1005  		&schedulepb.Message{
  1006  			Header: &schedulepb.Message_Header{
  1007  				Version:         a.Version,
  1008  				OwnerRevision:   a.ownerInfo.Revision,
  1009  				ChangefeedEpoch: schedulepb.ChangefeedEpoch{Epoch: 2}, // mismatch
  1010  			},
  1011  			From: "a", To: a.CaptureID, MsgType: schedulepb.MsgDispatchTableRequest,
  1012  			DispatchTableRequest: &schedulepb.DispatchTableRequest{
  1013  				Request: &schedulepb.DispatchTableRequest_AddTable{
  1014  					AddTable: &schedulepb.AddTableRequest{
  1015  						TableID: 1,
  1016  					},
  1017  				},
  1018  			},
  1019  		})
  1020  	msgs, err := a.recvMsgs(ctx)
  1021  	require.NoError(t, err)
  1022  	require.Len(t, msgs, 1)
  1023  	require.EqualValues(t, "a", msgs[0].From)
  1024  
  1025  	// Disable changefeed epoch
  1026  	unsupported := *compat.ChangefeedEpochMinVersion
  1027  	unsupported.Major--
  1028  	a.handleOwnerInfo(
  1029  		"a", a.ownerInfo.Revision.Revision+1, unsupported.String())
  1030  
  1031  	trans.RecvBuffer = trans.RecvBuffer[:0]
  1032  	trans.RecvBuffer = append(trans.RecvBuffer, &schedulepb.Message{
  1033  		Header: &schedulepb.Message_Header{
  1034  			Version:         unsupported.String(),
  1035  			OwnerRevision:   a.ownerInfo.Revision,
  1036  			ChangefeedEpoch: schedulepb.ChangefeedEpoch{Epoch: 2}, // mistmatch
  1037  		},
  1038  		From: "a", To: a.CaptureID, MsgType: schedulepb.MsgDispatchTableRequest,
  1039  		DispatchTableRequest: &schedulepb.DispatchTableRequest{
  1040  			Request: &schedulepb.DispatchTableRequest_AddTable{
  1041  				AddTable: &schedulepb.AddTableRequest{
  1042  					TableID: 1,
  1043  				},
  1044  			},
  1045  		},
  1046  	})
  1047  	msgs, err = a.recvMsgs(ctx)
  1048  	require.NoError(t, err)
  1049  	require.Len(t, msgs, 1)
  1050  	require.EqualValues(t, "a", msgs[0].From)
  1051  }
  1052  
  1053  // MockTableExecutor is a mock implementation of TableExecutor.
  1054  type MockTableExecutor struct {
  1055  	mock.Mock
  1056  
  1057  	// it's preferred to use `pipeline.MockPipeline` here to make the test more vivid.
  1058  	tables *spanz.BtreeMap[tablepb.TableState]
  1059  }
  1060  
  1061  var _ internal.TableExecutor = (*MockTableExecutor)(nil)
  1062  
  1063  // newMockTableExecutor creates a new mock table executor.
  1064  func newMockTableExecutor() *MockTableExecutor {
  1065  	return &MockTableExecutor{
  1066  		tables: spanz.NewBtreeMap[tablepb.TableState](),
  1067  	}
  1068  }
  1069  
  1070  // AddTableSpan adds a table span to the executor.
  1071  func (e *MockTableExecutor) AddTableSpan(
  1072  	ctx context.Context, span tablepb.Span, checkpoint tablepb.Checkpoint, isPrepare bool,
  1073  ) (bool, error) {
  1074  	startTs := checkpoint.CheckpointTs
  1075  	log.Info("AddTableSpan",
  1076  		zap.String("span", span.String()),
  1077  		zap.Any("startTs", startTs),
  1078  		zap.Bool("isPrepare", isPrepare))
  1079  
  1080  	state, ok := e.tables.Get(span)
  1081  	if ok {
  1082  		switch state {
  1083  		case tablepb.TableStatePreparing:
  1084  			return true, nil
  1085  		case tablepb.TableStatePrepared:
  1086  			if !isPrepare {
  1087  				e.tables.ReplaceOrInsert(span, tablepb.TableStateReplicating)
  1088  			}
  1089  			return true, nil
  1090  		case tablepb.TableStateReplicating:
  1091  			return true, nil
  1092  		case tablepb.TableStateStopped:
  1093  			e.tables.Delete(span)
  1094  		}
  1095  	}
  1096  	args := e.Called(ctx, span, startTs, isPrepare)
  1097  	if args.Bool(0) {
  1098  		e.tables.ReplaceOrInsert(span, tablepb.TableStatePreparing)
  1099  	}
  1100  	return args.Bool(0), args.Error(1)
  1101  }
  1102  
  1103  // IsAddTableSpanFinished determines if the table span has been added.
  1104  func (e *MockTableExecutor) IsAddTableSpanFinished(span tablepb.Span, isPrepare bool) bool {
  1105  	_, ok := e.tables.Get(span)
  1106  	if !ok {
  1107  		log.Panic("table which was added is not found",
  1108  			zap.String("span", span.String()),
  1109  			zap.Bool("isPrepare", isPrepare))
  1110  	}
  1111  
  1112  	args := e.Called(span, isPrepare)
  1113  	if args.Bool(0) {
  1114  		e.tables.ReplaceOrInsert(span, tablepb.TableStatePrepared)
  1115  		if !isPrepare {
  1116  			e.tables.ReplaceOrInsert(span, tablepb.TableStateReplicating)
  1117  		}
  1118  		return true
  1119  	}
  1120  
  1121  	e.tables.ReplaceOrInsert(span, tablepb.TableStatePreparing)
  1122  	if !isPrepare {
  1123  		e.tables.ReplaceOrInsert(span, tablepb.TableStatePrepared)
  1124  	}
  1125  
  1126  	return false
  1127  }
  1128  
  1129  // RemoveTableSpan removes a table span from the executor.
  1130  func (e *MockTableExecutor) RemoveTableSpan(span tablepb.Span) bool {
  1131  	state, ok := e.tables.Get(span)
  1132  	if !ok {
  1133  		log.Warn("table to be remove is not found", zap.String("span", span.String()))
  1134  		return true
  1135  	}
  1136  	switch state {
  1137  	case tablepb.TableStateStopping, tablepb.TableStateStopped:
  1138  		return true
  1139  	case tablepb.TableStatePreparing, tablepb.TableStatePrepared, tablepb.TableStateReplicating:
  1140  	default:
  1141  	}
  1142  	// the current `processor implementation, does not consider table's state
  1143  	log.Info("RemoveTableSpan", zap.String("span", span.String()), zap.Any("state", state))
  1144  
  1145  	args := e.Called(span)
  1146  	if args.Bool(0) {
  1147  		e.tables.ReplaceOrInsert(span, tablepb.TableStateStopped)
  1148  	}
  1149  	return args.Bool(0)
  1150  }
  1151  
  1152  // IsRemoveTableSpanFinished determines if the table span has been removed.
  1153  func (e *MockTableExecutor) IsRemoveTableSpanFinished(span tablepb.Span) (model.Ts, bool) {
  1154  	state, ok := e.tables.Get(span)
  1155  	if !ok {
  1156  		// the real `table executor` processor, would panic in such case.
  1157  		log.Warn("table to be removed is not found",
  1158  			zap.String("span", span.String()))
  1159  		return 0, true
  1160  	}
  1161  	args := e.Called(span)
  1162  	if args.Bool(1) {
  1163  		log.Info("remove table finished, remove it from the executor",
  1164  			zap.String("span", span.String()), zap.Any("state", state))
  1165  		e.tables.Delete(span)
  1166  	} else {
  1167  		// revert the state back to old state, assume it's `replicating`,
  1168  		// but `preparing` / `prepared` can also be removed.
  1169  		e.tables.ReplaceOrInsert(span, tablepb.TableStateReplicating)
  1170  	}
  1171  
  1172  	return model.Ts(args.Int(0)), args.Bool(1)
  1173  }
  1174  
  1175  // GetTableSpanCount returns all tables that are currently being adding, running, or removing.
  1176  func (e *MockTableExecutor) GetTableSpanCount() int {
  1177  	var result int
  1178  	e.tables.Ascend(func(span tablepb.Span, value tablepb.TableState) bool {
  1179  		result++
  1180  		return true
  1181  	})
  1182  	return result
  1183  }
  1184  
  1185  // GetTableSpanStatus implements TableExecutor interface
  1186  func (e *MockTableExecutor) GetTableSpanStatus(
  1187  	span tablepb.Span, collectStat bool,
  1188  ) tablepb.TableStatus {
  1189  	state, ok := e.tables.Get(span)
  1190  	if !ok {
  1191  		state = tablepb.TableStateAbsent
  1192  	}
  1193  	return tablepb.TableStatus{
  1194  		Span:  span,
  1195  		State: state,
  1196  	}
  1197  }