github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/cdc/scheduler/internal/v3/coordinator_test.go (about)

     1  // Copyright 2022 PingCAP, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package v3
    15  
    16  import (
    17  	"context"
    18  	"math"
    19  	"testing"
    20  
    21  	"github.com/pingcap/tiflow/cdc/model"
    22  	"github.com/pingcap/tiflow/cdc/processor/tablepb"
    23  	"github.com/pingcap/tiflow/cdc/redo"
    24  	"github.com/pingcap/tiflow/cdc/scheduler/internal/v3/compat"
    25  	"github.com/pingcap/tiflow/cdc/scheduler/internal/v3/keyspan"
    26  	"github.com/pingcap/tiflow/cdc/scheduler/internal/v3/member"
    27  	"github.com/pingcap/tiflow/cdc/scheduler/internal/v3/replication"
    28  	"github.com/pingcap/tiflow/cdc/scheduler/internal/v3/scheduler"
    29  	"github.com/pingcap/tiflow/cdc/scheduler/internal/v3/transport"
    30  	"github.com/pingcap/tiflow/cdc/scheduler/schedulepb"
    31  	"github.com/pingcap/tiflow/pkg/config"
    32  	"github.com/pingcap/tiflow/pkg/leakutil"
    33  	"github.com/pingcap/tiflow/pkg/spanz"
    34  	"github.com/pingcap/tiflow/pkg/version"
    35  	"github.com/stretchr/testify/require"
    36  )
    37  
    38  func TestMain(m *testing.M) {
    39  	leakutil.SetUpLeakTest(m)
    40  }
    41  
    42  func TestCoordinatorSendMsgs(t *testing.T) {
    43  	t.Parallel()
    44  	ctx := context.Background()
    45  	coord, trans := newTestCoordinator(&config.SchedulerConfig{
    46  		ChangefeedSettings: &config.ChangefeedSchedulerConfig{
    47  			// Enable span replication.
    48  			EnableTableAcrossNodes: true,
    49  			RegionThreshold:        10000,
    50  		},
    51  	})
    52  	coord.version = "6.2.0"
    53  	coord.revision = schedulepb.OwnerRevision{Revision: 3}
    54  	coord.captureID = "0"
    55  	cfg := config.NewDefaultSchedulerConfig()
    56  	coord.captureM = member.NewCaptureManager("", model.ChangeFeedID{}, coord.revision, cfg)
    57  	coord.sendMsgs(
    58  		ctx, []*schedulepb.Message{{To: "1", MsgType: schedulepb.MsgDispatchTableRequest}})
    59  
    60  	coord.captureM.Captures["1"] = &member.CaptureStatus{
    61  		Epoch: schedulepb.ProcessorEpoch{Epoch: "epoch"},
    62  	}
    63  	coord.sendMsgs(
    64  		ctx, []*schedulepb.Message{{To: "1", MsgType: schedulepb.MsgDispatchTableRequest}})
    65  
    66  	require.EqualValues(t, []*schedulepb.Message{{
    67  		Header: &schedulepb.Message_Header{
    68  			Version:       coord.version,
    69  			OwnerRevision: coord.revision,
    70  		},
    71  		From: "0", To: "1", MsgType: schedulepb.MsgDispatchTableRequest,
    72  	}, {
    73  		Header: &schedulepb.Message_Header{
    74  			Version:        coord.version,
    75  			OwnerRevision:  coord.revision,
    76  			ProcessorEpoch: schedulepb.ProcessorEpoch{Epoch: "epoch"},
    77  		},
    78  		From: "0", To: "1", MsgType: schedulepb.MsgDispatchTableRequest,
    79  	}}, trans.SendBuffer)
    80  }
    81  
    82  func TestCoordinatorRecvMsgs(t *testing.T) {
    83  	t.Parallel()
    84  
    85  	ctx := context.Background()
    86  	coord, trans := newTestCoordinator(&config.SchedulerConfig{
    87  		ChangefeedSettings: &config.ChangefeedSchedulerConfig{
    88  			// Enable span replication.
    89  			EnableTableAcrossNodes: true,
    90  			RegionThreshold:        10000,
    91  		},
    92  	})
    93  	coord.version = "6.2.0"
    94  	coord.revision = schedulepb.OwnerRevision{Revision: 3}
    95  	coord.captureID = "0"
    96  
    97  	trans.RecvBuffer = append(trans.RecvBuffer,
    98  		&schedulepb.Message{
    99  			Header: &schedulepb.Message_Header{
   100  				OwnerRevision: coord.revision,
   101  			},
   102  			From: "1", To: coord.captureID, MsgType: schedulepb.MsgDispatchTableResponse,
   103  		})
   104  	trans.RecvBuffer = append(trans.RecvBuffer,
   105  		&schedulepb.Message{
   106  			Header: &schedulepb.Message_Header{
   107  				OwnerRevision: schedulepb.OwnerRevision{Revision: 4},
   108  			},
   109  			From: "2", To: coord.captureID, MsgType: schedulepb.MsgDispatchTableResponse,
   110  		})
   111  	trans.RecvBuffer = append(trans.RecvBuffer,
   112  		&schedulepb.Message{
   113  			Header: &schedulepb.Message_Header{
   114  				OwnerRevision: coord.revision,
   115  			},
   116  			From: "3", To: "lost", MsgType: schedulepb.MsgDispatchTableResponse,
   117  		})
   118  
   119  	msgs, err := coord.recvMsgs(ctx)
   120  	require.NoError(t, err)
   121  	require.EqualValues(t, []*schedulepb.Message{{
   122  		Header: &schedulepb.Message_Header{
   123  			OwnerRevision: coord.revision,
   124  		},
   125  		From: "1", To: "0", MsgType: schedulepb.MsgDispatchTableResponse,
   126  	}}, msgs)
   127  }
   128  
   129  func TestCoordinatorTransportCompat(t *testing.T) {
   130  	t.Parallel()
   131  
   132  	coord, trans := newTestCoordinator(&config.SchedulerConfig{
   133  		ChangefeedSettings: &config.ChangefeedSchedulerConfig{
   134  			RegionThreshold: 0, // Disable span replication.
   135  		},
   136  	})
   137  
   138  	ctx := context.Background()
   139  	// Test compat.BeforeTransportSend.
   140  	coord.sendMsgs(
   141  		ctx, []*schedulepb.Message{{
   142  			To:      "b",
   143  			MsgType: schedulepb.MsgDispatchTableRequest,
   144  			DispatchTableRequest: &schedulepb.DispatchTableRequest{
   145  				Request: &schedulepb.DispatchTableRequest_AddTable{
   146  					AddTable: &schedulepb.AddTableRequest{Span: spanz.TableIDToComparableSpan(1)},
   147  				},
   148  			},
   149  		}})
   150  
   151  	require.EqualValues(t, []*schedulepb.Message{{
   152  		Header: &schedulepb.Message_Header{
   153  			Version:       coord.version,
   154  			OwnerRevision: coord.revision,
   155  		},
   156  		From: "a", To: "b", MsgType: schedulepb.MsgDispatchTableRequest,
   157  		DispatchTableRequest: &schedulepb.DispatchTableRequest{
   158  			Request: &schedulepb.DispatchTableRequest_AddTable{
   159  				AddTable: &schedulepb.AddTableRequest{
   160  					TableID: 1,
   161  					Span:    spanz.TableIDToComparableSpan(1),
   162  				},
   163  			},
   164  		},
   165  	}}, trans.SendBuffer)
   166  
   167  	// Test compat.AfterTransportReceive.
   168  	trans.RecvBuffer = append(trans.RecvBuffer,
   169  		&schedulepb.Message{
   170  			Header: &schedulepb.Message_Header{
   171  				OwnerRevision: coord.revision,
   172  			},
   173  			From: "b", To: coord.captureID, MsgType: schedulepb.MsgDispatchTableResponse,
   174  			DispatchTableResponse: &schedulepb.DispatchTableResponse{
   175  				Response: &schedulepb.DispatchTableResponse_AddTable{
   176  					AddTable: &schedulepb.AddTableResponse{
   177  						Status: &tablepb.TableStatus{
   178  							TableID: 1,
   179  						},
   180  					},
   181  				},
   182  			},
   183  		})
   184  	msgs, err := coord.recvMsgs(ctx)
   185  	require.NoError(t, err)
   186  	require.EqualValues(t, []*schedulepb.Message{{
   187  		Header: &schedulepb.Message_Header{
   188  			OwnerRevision: coord.revision,
   189  		},
   190  		From: "b", To: coord.captureID, MsgType: schedulepb.MsgDispatchTableResponse,
   191  		DispatchTableResponse: &schedulepb.DispatchTableResponse{
   192  			Response: &schedulepb.DispatchTableResponse_AddTable{
   193  				AddTable: &schedulepb.AddTableResponse{
   194  					Status: &tablepb.TableStatus{
   195  						TableID: 1,
   196  						Span:    spanz.TableIDToComparableSpan(1),
   197  					},
   198  				},
   199  			},
   200  		},
   201  	}}, msgs)
   202  }
   203  
   204  func newCoordinatorForTest(
   205  	captureID model.CaptureID,
   206  	changefeedID model.ChangeFeedID,
   207  	ownerRevision int64,
   208  	cfg *config.SchedulerConfig,
   209  	redoMetaManager redo.MetaManager,
   210  ) *coordinator {
   211  	revision := schedulepb.OwnerRevision{Revision: ownerRevision}
   212  
   213  	return &coordinator{
   214  		version:   version.ReleaseSemver(),
   215  		revision:  revision,
   216  		captureID: captureID,
   217  		replicationM: replication.NewReplicationManager(
   218  			cfg.MaxTaskConcurrency, changefeedID),
   219  		captureM:        member.NewCaptureManager(captureID, changefeedID, revision, cfg),
   220  		schedulerM:      scheduler.NewSchedulerManager(changefeedID, cfg),
   221  		changefeedID:    changefeedID,
   222  		compat:          compat.New(cfg, map[model.CaptureID]*model.CaptureInfo{}),
   223  		redoMetaManager: redoMetaManager,
   224  	}
   225  }
   226  
   227  func newTestCoordinator(cfg *config.SchedulerConfig) (*coordinator, *transport.MockTrans) {
   228  	coord := newCoordinatorForTest("a", model.ChangeFeedID{}, 1, cfg, redo.NewDisabledMetaManager())
   229  	trans := transport.NewMockTrans()
   230  	coord.trans = trans
   231  	coord.reconciler = keyspan.NewReconcilerForTests(
   232  		keyspan.NewMockRegionCache(), cfg.ChangefeedSettings)
   233  	return coord, trans
   234  }
   235  
   236  func TestCoordinatorHeartbeat(t *testing.T) {
   237  	t.Parallel()
   238  
   239  	coord, trans := newTestCoordinator(&config.SchedulerConfig{
   240  		HeartbeatTick:      math.MaxInt,
   241  		CollectStatsTick:   math.MaxInt,
   242  		MaxTaskConcurrency: 1,
   243  		AddTableBatchSize:  50,
   244  		ChangefeedSettings: config.GetDefaultReplicaConfig().Scheduler,
   245  	})
   246  
   247  	// Prepare captureM and replicationM.
   248  	// Two captures "a", "b".
   249  	// Three tables 1 2 3.
   250  	ctx := context.Background()
   251  	currentTables := []model.TableID{1, 2, 3}
   252  	aliveCaptures := map[model.CaptureID]*model.CaptureInfo{"a": {}, "b": {}}
   253  	_, err := coord.poll(ctx, 0, currentTables, aliveCaptures, schedulepb.NewBarrierWithMinTs(0))
   254  	require.Nil(t, err)
   255  	msgs := trans.SendBuffer
   256  	require.Len(t, msgs, 2)
   257  	require.NotNil(t, msgs[0].Heartbeat, msgs[0])
   258  	require.NotNil(t, msgs[1].Heartbeat, msgs[1])
   259  	require.False(t, coord.captureM.CheckAllCaptureInitialized())
   260  
   261  	trans.RecvBuffer = append(trans.RecvBuffer, &schedulepb.Message{
   262  		Header: &schedulepb.Message_Header{
   263  			OwnerRevision: schedulepb.OwnerRevision{Revision: 1},
   264  		},
   265  		To:                "a",
   266  		From:              "b",
   267  		MsgType:           schedulepb.MsgHeartbeatResponse,
   268  		HeartbeatResponse: &schedulepb.HeartbeatResponse{},
   269  	})
   270  	trans.RecvBuffer = append(trans.RecvBuffer, &schedulepb.Message{
   271  		Header: &schedulepb.Message_Header{
   272  			OwnerRevision: schedulepb.OwnerRevision{Revision: 1},
   273  		},
   274  		To:      "a",
   275  		From:    "a",
   276  		MsgType: schedulepb.MsgHeartbeatResponse,
   277  		HeartbeatResponse: &schedulepb.HeartbeatResponse{
   278  			Tables: []tablepb.TableStatus{
   279  				{Span: spanz.TableIDToComparableSpan(1), State: tablepb.TableStateReplicating},
   280  				{Span: spanz.TableIDToComparableSpan(2), State: tablepb.TableStateReplicating},
   281  			},
   282  		},
   283  	})
   284  	trans.SendBuffer = []*schedulepb.Message{}
   285  	_, err = coord.poll(ctx, 0, currentTables, aliveCaptures, schedulepb.NewBarrierWithMinTs(0))
   286  	require.Nil(t, err)
   287  	require.True(t, coord.captureM.CheckAllCaptureInitialized())
   288  	msgs = trans.SendBuffer
   289  	require.Len(t, msgs, 1)
   290  	// Basic scheduler, make sure all tables get replicated.
   291  	require.EqualValues(t, 3, msgs[0].DispatchTableRequest.GetAddTable().Span.TableID)
   292  	require.Equal(t, coord.replicationM.GetReplicationSetForTests().Len(), 3)
   293  }
   294  
   295  func TestCoordinatorAddCapture(t *testing.T) {
   296  	t.Parallel()
   297  	coord, trans := newTestCoordinator(&config.SchedulerConfig{
   298  		HeartbeatTick:      math.MaxInt,
   299  		CollectStatsTick:   math.MaxInt,
   300  		MaxTaskConcurrency: 1,
   301  		ChangefeedSettings: config.GetDefaultReplicaConfig().Scheduler,
   302  	})
   303  
   304  	// Prepare captureM and replicationM.
   305  	// Two captures "a".
   306  	// Three tables 1 2 3.
   307  	coord.captureM.Captures["a"] = &member.CaptureStatus{State: member.CaptureStateInitialized}
   308  	coord.captureM.SetInitializedForTests(true)
   309  	require.True(t, coord.captureM.CheckAllCaptureInitialized())
   310  	init := map[string][]tablepb.TableStatus{
   311  		"a": {
   312  			{Span: spanz.TableIDToComparableSpan(1), State: tablepb.TableStateReplicating},
   313  			{Span: spanz.TableIDToComparableSpan(2), State: tablepb.TableStateReplicating},
   314  			{Span: spanz.TableIDToComparableSpan(3), State: tablepb.TableStateReplicating},
   315  		},
   316  	}
   317  	msgs, err := coord.replicationM.HandleCaptureChanges(init, nil, 0)
   318  	require.Nil(t, err)
   319  	require.Len(t, msgs, 0)
   320  	require.Equal(t, coord.replicationM.GetReplicationSetForTests().Len(), 3)
   321  
   322  	// Capture "b" is online, heartbeat, and then move one table to capture "b".
   323  	ctx := context.Background()
   324  	currentTables := []model.TableID{1, 2, 3}
   325  	aliveCaptures := map[model.CaptureID]*model.CaptureInfo{"a": {}, "b": {}}
   326  	_, err = coord.poll(ctx, 0, currentTables, aliveCaptures, schedulepb.NewBarrierWithMinTs(0))
   327  	require.Nil(t, err)
   328  	msgs = trans.SendBuffer
   329  	require.Len(t, msgs, 1)
   330  	require.NotNil(t, msgs[0].Heartbeat, msgs[0])
   331  
   332  	trans.RecvBuffer = append(trans.RecvBuffer, &schedulepb.Message{
   333  		Header: &schedulepb.Message_Header{
   334  			OwnerRevision: schedulepb.OwnerRevision{Revision: 1},
   335  		},
   336  		To:                "a",
   337  		From:              "b",
   338  		MsgType:           schedulepb.MsgHeartbeatResponse,
   339  		HeartbeatResponse: &schedulepb.HeartbeatResponse{},
   340  	})
   341  	trans.SendBuffer = []*schedulepb.Message{}
   342  	_, err = coord.poll(ctx, 0, currentTables, aliveCaptures, schedulepb.NewBarrierWithMinTs(0))
   343  	require.Nil(t, err)
   344  	msgs = trans.SendBuffer
   345  	require.Len(t, msgs, 1)
   346  	require.NotNil(t, msgs[0].DispatchTableRequest.GetAddTable(), msgs[0])
   347  	require.True(t, msgs[0].DispatchTableRequest.GetAddTable().IsSecondary)
   348  }
   349  
   350  func TestCoordinatorRemoveCapture(t *testing.T) {
   351  	t.Parallel()
   352  
   353  	coord, trans := newTestCoordinator(&config.SchedulerConfig{
   354  		HeartbeatTick:      math.MaxInt,
   355  		CollectStatsTick:   math.MaxInt,
   356  		MaxTaskConcurrency: 1,
   357  		AddTableBatchSize:  50,
   358  		ChangefeedSettings: config.GetDefaultReplicaConfig().Scheduler,
   359  	})
   360  
   361  	// Prepare captureM and replicationM.
   362  	// Three captures "a" "b" "c".
   363  	// Three tables 1 2 3.
   364  	coord.captureM.Captures["a"] = &member.CaptureStatus{State: member.CaptureStateInitialized}
   365  	coord.captureM.Captures["b"] = &member.CaptureStatus{State: member.CaptureStateInitialized}
   366  	coord.captureM.Captures["c"] = &member.CaptureStatus{State: member.CaptureStateInitialized}
   367  	coord.captureM.SetInitializedForTests(true)
   368  	require.True(t, coord.captureM.CheckAllCaptureInitialized())
   369  	init := map[string][]tablepb.TableStatus{
   370  		"a": {{Span: spanz.TableIDToComparableSpan(1), State: tablepb.TableStateReplicating}},
   371  		"b": {{Span: spanz.TableIDToComparableSpan(2), State: tablepb.TableStateReplicating}},
   372  		"c": {{Span: spanz.TableIDToComparableSpan(3), State: tablepb.TableStateReplicating}},
   373  	}
   374  	msgs, err := coord.replicationM.HandleCaptureChanges(init, nil, 0)
   375  	require.Nil(t, err)
   376  	require.Len(t, msgs, 0)
   377  	require.Equal(t, coord.replicationM.GetReplicationSetForTests().Len(), 3)
   378  
   379  	// Capture "c" is removed, add table 3 to another capture.
   380  	ctx := context.Background()
   381  	currentTables := []model.TableID{1, 2, 3}
   382  	aliveCaptures := map[model.CaptureID]*model.CaptureInfo{"a": {}, "b": {}}
   383  	_, err = coord.poll(ctx, 0, currentTables, aliveCaptures, schedulepb.NewBarrierWithMinTs(0))
   384  	require.Nil(t, err)
   385  	msgs = trans.SendBuffer
   386  	require.Len(t, msgs, 1)
   387  	require.NotNil(t, msgs[0].DispatchTableRequest.GetAddTable(), msgs[0])
   388  	require.EqualValues(t, 3, msgs[0].DispatchTableRequest.GetAddTable().Span.TableID)
   389  }
   390  
   391  func TestCoordinatorDrainCapture(t *testing.T) {
   392  	t.Parallel()
   393  
   394  	coord := coordinator{
   395  		version:   "6.2.0",
   396  		revision:  schedulepb.OwnerRevision{Revision: 3},
   397  		captureID: "a",
   398  	}
   399  	cfg := config.NewDefaultSchedulerConfig()
   400  	coord.captureM = member.NewCaptureManager("", model.ChangeFeedID{}, coord.revision, cfg)
   401  
   402  	coord.captureM.SetInitializedForTests(true)
   403  	coord.captureM.Captures["a"] = &member.CaptureStatus{State: member.CaptureStateUninitialized}
   404  	count, err := coord.DrainCapture("a")
   405  	require.Nil(t, err)
   406  	require.Equal(t, 1, count)
   407  
   408  	coord.captureM.Captures["a"] = &member.CaptureStatus{State: member.CaptureStateInitialized}
   409  	coord.replicationM = replication.NewReplicationManager(10, model.ChangeFeedID{})
   410  	count, err = coord.DrainCapture("a")
   411  	require.NoError(t, err)
   412  	require.Equal(t, 0, count)
   413  
   414  	coord.replicationM.SetReplicationSetForTests(&replication.ReplicationSet{
   415  		Span:    spanz.TableIDToComparableSpan(1),
   416  		State:   replication.ReplicationSetStateReplicating,
   417  		Primary: "a",
   418  	})
   419  
   420  	count, err = coord.DrainCapture("a")
   421  	require.NoError(t, err)
   422  	require.Equal(t, 1, count)
   423  
   424  	coord.captureM.Captures["b"] = &member.CaptureStatus{State: member.CaptureStateInitialized}
   425  	coord.replicationM.SetReplicationSetForTests(&replication.ReplicationSet{
   426  		Span:    spanz.TableIDToComparableSpan(2),
   427  		State:   replication.ReplicationSetStateReplicating,
   428  		Primary: "b",
   429  	})
   430  
   431  	count, err = coord.DrainCapture("a")
   432  	require.NoError(t, err)
   433  	require.Equal(t, 1, count)
   434  
   435  	coord.schedulerM = scheduler.NewSchedulerManager(
   436  		model.ChangeFeedID{}, config.NewDefaultSchedulerConfig())
   437  	count, err = coord.DrainCapture("b")
   438  	require.NoError(t, err)
   439  	require.Equal(t, 1, count)
   440  }
   441  
   442  func TestCoordinatorAdvanceCheckpoint(t *testing.T) {
   443  	t.Parallel()
   444  
   445  	coord, trans := newTestCoordinator(&config.SchedulerConfig{
   446  		HeartbeatTick:      math.MaxInt,
   447  		CollectStatsTick:   math.MaxInt,
   448  		MaxTaskConcurrency: 1,
   449  		ChangefeedSettings: config.GetDefaultReplicaConfig().Scheduler,
   450  	})
   451  
   452  	// Prepare captureM and replicationM.
   453  	// Two captures "a", "b".
   454  	// Three tables 1 2.
   455  	ctx := context.Background()
   456  	currentTables := []model.TableID{1, 2}
   457  	aliveCaptures := map[model.CaptureID]*model.CaptureInfo{"a": {}, "b": {}}
   458  	_, err := coord.poll(ctx, 0, currentTables, aliveCaptures, schedulepb.NewBarrierWithMinTs(0))
   459  	require.Nil(t, err)
   460  
   461  	// Initialize captures.
   462  	trans.RecvBuffer = append(trans.RecvBuffer, &schedulepb.Message{
   463  		Header: &schedulepb.Message_Header{
   464  			OwnerRevision: schedulepb.OwnerRevision{Revision: 1},
   465  		},
   466  		To:                "a",
   467  		From:              "b",
   468  		MsgType:           schedulepb.MsgHeartbeatResponse,
   469  		HeartbeatResponse: &schedulepb.HeartbeatResponse{},
   470  	})
   471  	trans.RecvBuffer = append(trans.RecvBuffer, &schedulepb.Message{
   472  		Header: &schedulepb.Message_Header{
   473  			OwnerRevision: schedulepb.OwnerRevision{Revision: 1},
   474  		},
   475  		To:      "a",
   476  		From:    "a",
   477  		MsgType: schedulepb.MsgHeartbeatResponse,
   478  		HeartbeatResponse: &schedulepb.HeartbeatResponse{
   479  			Tables: []tablepb.TableStatus{
   480  				{
   481  					Span:  spanz.TableIDToComparableSpan(1),
   482  					State: tablepb.TableStateReplicating,
   483  					Checkpoint: tablepb.Checkpoint{
   484  						CheckpointTs: 2, ResolvedTs: 4, LastSyncedTs: 3,
   485  					},
   486  					Stats: tablepb.Stats{
   487  						StageCheckpoints: map[string]tablepb.Checkpoint{
   488  							"puller-egress": {
   489  								ResolvedTs: model.Ts(5),
   490  							},
   491  						},
   492  					},
   493  				},
   494  				{
   495  					Span:  spanz.TableIDToComparableSpan(2),
   496  					State: tablepb.TableStateReplicating,
   497  					Checkpoint: tablepb.Checkpoint{
   498  						CheckpointTs: 2, ResolvedTs: 4, LastSyncedTs: 4,
   499  					},
   500  					Stats: tablepb.Stats{
   501  						StageCheckpoints: map[string]tablepb.Checkpoint{
   502  							"puller-egress": {
   503  								ResolvedTs: model.Ts(6),
   504  							},
   505  						},
   506  					},
   507  				},
   508  			},
   509  		},
   510  	})
   511  	watermark, err := coord.poll(ctx, 0, currentTables, aliveCaptures, schedulepb.NewBarrierWithMinTs(5))
   512  	require.Nil(t, err)
   513  	require.True(t, coord.captureM.CheckAllCaptureInitialized())
   514  	require.EqualValues(t, 2, watermark.CheckpointTs)
   515  	require.EqualValues(t, 4, watermark.ResolvedTs)
   516  	require.EqualValues(t, 4, watermark.LastSyncedTs)
   517  	require.EqualValues(t, 5, watermark.PullerResolvedTs)
   518  
   519  	// Checkpoint should be advanced even if there is an uninitialized capture.
   520  	aliveCaptures["c"] = &model.CaptureInfo{}
   521  	trans.RecvBuffer = nil
   522  	trans.RecvBuffer = append(trans.RecvBuffer, &schedulepb.Message{
   523  		Header: &schedulepb.Message_Header{
   524  			OwnerRevision: schedulepb.OwnerRevision{Revision: 1},
   525  		},
   526  		To:      "a",
   527  		From:    "a",
   528  		MsgType: schedulepb.MsgHeartbeatResponse,
   529  		HeartbeatResponse: &schedulepb.HeartbeatResponse{
   530  			Tables: []tablepb.TableStatus{
   531  				{
   532  					Span:  spanz.TableIDToComparableSpan(1),
   533  					State: tablepb.TableStateReplicating,
   534  					Checkpoint: tablepb.Checkpoint{
   535  						CheckpointTs: 3, ResolvedTs: 5, LastSyncedTs: 4,
   536  					},
   537  					Stats: tablepb.Stats{
   538  						StageCheckpoints: map[string]tablepb.Checkpoint{
   539  							"puller-egress": {
   540  								ResolvedTs: model.Ts(7),
   541  							},
   542  						},
   543  					},
   544  				},
   545  				{
   546  					Span:  spanz.TableIDToComparableSpan(2),
   547  					State: tablepb.TableStateReplicating,
   548  					Checkpoint: tablepb.Checkpoint{
   549  						CheckpointTs: 4, ResolvedTs: 5, LastSyncedTs: 6,
   550  					},
   551  					Stats: tablepb.Stats{
   552  						StageCheckpoints: map[string]tablepb.Checkpoint{
   553  							"puller-egress": {
   554  								ResolvedTs: model.Ts(7),
   555  							},
   556  						},
   557  					},
   558  				},
   559  			},
   560  		},
   561  	})
   562  	watermark, err = coord.poll(ctx, 0, currentTables, aliveCaptures, schedulepb.NewBarrierWithMinTs(5))
   563  	require.Nil(t, err)
   564  	require.False(t, coord.captureM.CheckAllCaptureInitialized())
   565  	require.EqualValues(t, 3, watermark.CheckpointTs)
   566  	require.EqualValues(t, 5, watermark.ResolvedTs)
   567  	require.EqualValues(t, 6, watermark.LastSyncedTs)
   568  	require.EqualValues(t, 7, watermark.PullerResolvedTs)
   569  }
   570  
   571  func TestCoordinatorDropMsgIfChangefeedEpochMismatch(t *testing.T) {
   572  	t.Parallel()
   573  
   574  	ctx := context.Background()
   575  	coord, trans := newTestCoordinator(&config.SchedulerConfig{
   576  		ChangefeedSettings: config.GetDefaultReplicaConfig().Scheduler,
   577  	})
   578  	coord.captureID = "0"
   579  	coord.changefeedEpoch = 1
   580  
   581  	unsupported := *compat.ChangefeedEpochMinVersion
   582  	unsupported.Major--
   583  	coord.compat.UpdateCaptureInfo(map[string]*model.CaptureInfo{
   584  		"1": {Version: compat.ChangefeedEpochMinVersion.String()},
   585  		"2": {Version: compat.ChangefeedEpochMinVersion.String()},
   586  		"3": {Version: unsupported.String()},
   587  	})
   588  	trans.RecvBuffer = append(trans.RecvBuffer,
   589  		&schedulepb.Message{
   590  			Header: &schedulepb.Message_Header{
   591  				OwnerRevision:   coord.revision,
   592  				ChangefeedEpoch: schedulepb.ChangefeedEpoch{Epoch: 1},
   593  			},
   594  			From: "1", To: coord.captureID, MsgType: schedulepb.MsgDispatchTableResponse,
   595  			DispatchTableResponse: &schedulepb.DispatchTableResponse{
   596  				Response: &schedulepb.DispatchTableResponse_AddTable{
   597  					AddTable: &schedulepb.AddTableResponse{
   598  						Status: &tablepb.TableStatus{},
   599  					},
   600  				},
   601  			},
   602  		})
   603  	trans.RecvBuffer = append(trans.RecvBuffer,
   604  		&schedulepb.Message{
   605  			Header: &schedulepb.Message_Header{
   606  				OwnerRevision:   coord.revision,
   607  				ChangefeedEpoch: schedulepb.ChangefeedEpoch{Epoch: 2},
   608  			},
   609  			From: "2", To: coord.captureID, MsgType: schedulepb.MsgDispatchTableResponse,
   610  			DispatchTableResponse: &schedulepb.DispatchTableResponse{
   611  				Response: &schedulepb.DispatchTableResponse_AddTable{
   612  					AddTable: &schedulepb.AddTableResponse{
   613  						Status: &tablepb.TableStatus{},
   614  					},
   615  				},
   616  			},
   617  		})
   618  	trans.RecvBuffer = append(trans.RecvBuffer,
   619  		&schedulepb.Message{
   620  			Header: &schedulepb.Message_Header{
   621  				OwnerRevision: coord.revision,
   622  			},
   623  			From: "3", To: coord.captureID, MsgType: schedulepb.MsgDispatchTableResponse,
   624  			DispatchTableResponse: &schedulepb.DispatchTableResponse{
   625  				Response: &schedulepb.DispatchTableResponse_AddTable{
   626  					AddTable: &schedulepb.AddTableResponse{
   627  						Status: &tablepb.TableStatus{},
   628  					},
   629  				},
   630  			},
   631  		})
   632  
   633  	msgs, err := coord.recvMsgs(ctx)
   634  	require.NoError(t, err)
   635  	require.Len(t, msgs, 2)
   636  	require.EqualValues(t, "1", msgs[0].From)
   637  	require.EqualValues(t, "3", msgs[1].From)
   638  }