github.com/pingcap/ticdc@v0.0.0-20220526033649-485a10ef2652/cdc/owner_test.go (about)

     1  // Copyright 2020 PingCAP, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package cdc
    15  
    16  import (
    17  	"bytes"
    18  	"context"
    19  	"fmt"
    20  	"net/url"
    21  	"sync"
    22  	"sync/atomic"
    23  	"time"
    24  
    25  	"github.com/google/uuid"
    26  	"github.com/pingcap/check"
    27  	"github.com/pingcap/errors"
    28  	timodel "github.com/pingcap/parser/model"
    29  	"github.com/pingcap/parser/mysql"
    30  	"github.com/pingcap/parser/types"
    31  	"github.com/pingcap/ticdc/cdc/entry"
    32  	"github.com/pingcap/ticdc/cdc/kv"
    33  	"github.com/pingcap/ticdc/cdc/model"
    34  	"github.com/pingcap/ticdc/pkg/config"
    35  	cdcContext "github.com/pingcap/ticdc/pkg/context"
    36  	cerror "github.com/pingcap/ticdc/pkg/errors"
    37  	"github.com/pingcap/ticdc/pkg/etcd"
    38  	"github.com/pingcap/ticdc/pkg/filter"
    39  	"github.com/pingcap/ticdc/pkg/security"
    40  	"github.com/pingcap/ticdc/pkg/util"
    41  	"github.com/pingcap/ticdc/pkg/util/testleak"
    42  	"github.com/pingcap/tidb/meta"
    43  	"github.com/pingcap/tidb/store/mockstore"
    44  	"github.com/pingcap/tidb/store/tikv/oracle"
    45  	pd "github.com/tikv/pd/client"
    46  	"go.etcd.io/etcd/clientv3"
    47  	"go.etcd.io/etcd/clientv3/concurrency"
    48  	"go.etcd.io/etcd/embed"
    49  	"golang.org/x/sync/errgroup"
    50  )
    51  
    52  const TiKVGCLifeTime = 10 * 60 * time.Second // 10 min
    53  
    54  type ownerSuite struct {
    55  	e         *embed.Etcd
    56  	clientURL *url.URL
    57  	client    kv.CDCEtcdClient
    58  	ctx       context.Context
    59  	cancel    context.CancelFunc
    60  	errg      *errgroup.Group
    61  }
    62  
    63  var _ = check.Suite(&ownerSuite{})
    64  
    65  func (s *ownerSuite) SetUpTest(c *check.C) {
    66  	dir := c.MkDir()
    67  	var err error
    68  	s.clientURL, s.e, err = etcd.SetupEmbedEtcd(dir)
    69  	c.Assert(err, check.IsNil)
    70  	client, err := clientv3.New(clientv3.Config{
    71  		Endpoints:   []string{s.clientURL.String()},
    72  		DialTimeout: 3 * time.Second,
    73  	})
    74  	c.Assert(err, check.IsNil)
    75  	s.client = kv.NewCDCEtcdClient(context.TODO(), client)
    76  	s.ctx, s.cancel = context.WithCancel(context.Background())
    77  	s.errg = util.HandleErrWithErrGroup(s.ctx, s.e.Err(), func(e error) { c.Log(e) })
    78  }
    79  
    80  func (s *ownerSuite) TearDownTest(c *check.C) {
    81  	s.e.Close()
    82  	s.cancel()
    83  	err := s.errg.Wait()
    84  	if err != nil {
    85  		c.Errorf("Error group error: %s", err)
    86  	}
    87  	s.client.Close() //nolint:errcheck
    88  }
    89  
    90  type mockPDClient struct {
    91  	pd.Client
    92  	invokeCounter      int
    93  	mockSafePointLost  bool
    94  	mockPDFailure      bool
    95  	mockTiKVGCLifeTime bool
    96  }
    97  
    98  func (m *mockPDClient) GetTS(ctx context.Context) (int64, int64, error) {
    99  	if m.mockPDFailure {
   100  		return 0, 0, errors.New("injected PD failure")
   101  	}
   102  	if m.mockSafePointLost {
   103  		return 0, 0, nil
   104  	}
   105  	return oracle.GetPhysical(time.Now()), 0, nil
   106  }
   107  
   108  func (m *mockPDClient) UpdateServiceGCSafePoint(ctx context.Context, serviceID string, ttl int64, safePoint uint64) (uint64, error) {
   109  	m.invokeCounter++
   110  
   111  	if m.mockSafePointLost {
   112  		return 1000, nil
   113  	}
   114  	if m.mockPDFailure {
   115  		return 0, errors.New("injected PD failure")
   116  	}
   117  	if m.mockTiKVGCLifeTime {
   118  		Ts := oracle.GoTimeToTS(time.Now().Add(-TiKVGCLifeTime))
   119  		return Ts, nil
   120  	}
   121  	return safePoint, nil
   122  }
   123  
   124  type mockSink struct {
   125  	AsyncSink
   126  	checkpointTs model.Ts
   127  
   128  	checkpointMu    sync.Mutex
   129  	checkpointError error
   130  }
   131  
   132  func (m *mockSink) EmitCheckpointTs(ctx cdcContext.Context, ts uint64) error {
   133  	m.checkpointMu.Lock()
   134  	defer m.checkpointMu.Unlock()
   135  	atomic.StoreUint64(&m.checkpointTs, ts)
   136  	return m.checkpointError
   137  }
   138  
   139  func (m *mockSink) Close(ctx context.Context) error {
   140  	return nil
   141  }
   142  
   143  func (m *mockSink) Barrier(ctx context.Context) error {
   144  	return nil
   145  }
   146  
   147  // Test whether the owner can tolerate sink caused error, it won't be killed.
   148  // also set the specific changefeed to stop
   149  func (s *ownerSuite) TestOwnerCalcResolvedTs(c *check.C) {
   150  	defer testleak.AfterTest(c)()
   151  	mockPDCli := &mockPDClient{}
   152  
   153  	sink := &mockSink{checkpointError: cerror.ErrKafkaSendMessage}
   154  	changeFeeds := map[model.ChangeFeedID]*changeFeed{
   155  		"test_change_feed_1": {
   156  			info:    &model.ChangeFeedInfo{State: model.StateNormal},
   157  			etcdCli: s.client,
   158  			status: &model.ChangeFeedStatus{
   159  				CheckpointTs: 0,
   160  			},
   161  			targetTs:      2000,
   162  			ddlResolvedTs: 2000,
   163  			ddlState:      model.ChangeFeedSyncDML,
   164  			taskStatus: model.ProcessorsInfos{
   165  				"capture_1": {},
   166  				"capture_2": {},
   167  			},
   168  			taskPositions: map[string]*model.TaskPosition{
   169  				"capture_1": {
   170  					CheckPointTs: 2333,
   171  					ResolvedTs:   2333,
   172  				},
   173  				"capture_2": {
   174  					CheckPointTs: 2333,
   175  					ResolvedTs:   2333,
   176  				},
   177  			},
   178  			sink: sink,
   179  		},
   180  	}
   181  
   182  	session, err := concurrency.NewSession(s.client.Client.Unwrap(),
   183  		concurrency.WithTTL(config.GetDefaultServerConfig().CaptureSessionTTL))
   184  	c.Assert(err, check.IsNil)
   185  	mockOwner := Owner{
   186  		session:                 session,
   187  		pdClient:                mockPDCli,
   188  		etcdClient:              s.client,
   189  		lastFlushChangefeeds:    time.Now(),
   190  		flushChangefeedInterval: 1 * time.Hour,
   191  		changeFeeds:             changeFeeds,
   192  		cfRWriter:               s.client,
   193  		stoppedFeeds:            make(map[model.ChangeFeedID]*model.ChangeFeedStatus),
   194  		minGCSafePointCache:     minGCSafePointCacheEntry{},
   195  	}
   196  
   197  	err = mockOwner.calcResolvedTs(s.ctx)
   198  	c.Assert(err, check.IsNil)
   199  
   200  	err = mockOwner.handleDDL(s.ctx)
   201  	c.Assert(err, check.IsNil)
   202  
   203  	err = mockOwner.handleAdminJob(s.ctx)
   204  	c.Assert(err, check.IsNil)
   205  	c.Assert(mockOwner.stoppedFeeds["test_change_feed_1"], check.NotNil)
   206  
   207  	err = mockOwner.flushChangeFeedInfos(s.ctx)
   208  	c.Assert(err, check.IsNil)
   209  	c.Assert(mockPDCli.invokeCounter, check.Equals, 1)
   210  
   211  	s.TearDownTest(c)
   212  }
   213  
   214  func (s *ownerSuite) TestOwnerFlushChangeFeedInfos(c *check.C) {
   215  	defer testleak.AfterTest(c)()
   216  	session, err := concurrency.NewSession(s.client.Client.Unwrap(),
   217  		concurrency.WithTTL(config.GetDefaultServerConfig().CaptureSessionTTL))
   218  	c.Assert(err, check.IsNil)
   219  	mockPDCli := &mockPDClient{}
   220  	mockOwner := Owner{
   221  		session:               session,
   222  		etcdClient:            s.client,
   223  		pdClient:              mockPDCli,
   224  		gcSafepointLastUpdate: time.Now(),
   225  	}
   226  
   227  	err = mockOwner.flushChangeFeedInfos(s.ctx)
   228  	c.Assert(err, check.IsNil)
   229  	c.Assert(mockPDCli.invokeCounter, check.Equals, 1)
   230  	s.TearDownTest(c)
   231  }
   232  
   233  func (s *ownerSuite) TestOwnerFlushChangeFeedInfosFailed(c *check.C) {
   234  	defer testleak.AfterTest(c)()
   235  	mockPDCli := &mockPDClient{
   236  		mockPDFailure: true,
   237  	}
   238  
   239  	changeFeeds := map[model.ChangeFeedID]*changeFeed{
   240  		"test_change_feed_1": {
   241  			info: &model.ChangeFeedInfo{State: model.StateNormal},
   242  			status: &model.ChangeFeedStatus{
   243  				CheckpointTs: 100,
   244  			},
   245  			targetTs: 2000,
   246  			ddlState: model.ChangeFeedSyncDML,
   247  			taskStatus: model.ProcessorsInfos{
   248  				"capture_1": {},
   249  				"capture_2": {},
   250  			},
   251  			taskPositions: map[string]*model.TaskPosition{
   252  				"capture_1": {},
   253  				"capture_2": {},
   254  			},
   255  		},
   256  	}
   257  
   258  	session, err := concurrency.NewSession(s.client.Client.Unwrap(),
   259  		concurrency.WithTTL(config.GetDefaultServerConfig().CaptureSessionTTL))
   260  	c.Assert(err, check.IsNil)
   261  	mockOwner := Owner{
   262  		session:                 session,
   263  		pdClient:                mockPDCli,
   264  		etcdClient:              s.client,
   265  		lastFlushChangefeeds:    time.Now(),
   266  		flushChangefeedInterval: 1 * time.Hour,
   267  		gcSafepointLastUpdate:   time.Now(),
   268  		gcTTL:                   6, // 6 seconds
   269  		changeFeeds:             changeFeeds,
   270  	}
   271  
   272  	time.Sleep(3 * time.Second)
   273  	err = mockOwner.flushChangeFeedInfos(s.ctx)
   274  	c.Assert(err, check.IsNil)
   275  	c.Assert(mockPDCli.invokeCounter, check.Equals, 1)
   276  
   277  	time.Sleep(6 * time.Second)
   278  	err = mockOwner.flushChangeFeedInfos(s.ctx)
   279  	c.Assert(err, check.ErrorMatches, ".*CDC:ErrUpdateServiceSafepointFailed.*")
   280  	c.Assert(mockPDCli.invokeCounter, check.Equals, 2)
   281  
   282  	s.TearDownTest(c)
   283  }
   284  
   285  // Test whether it is possible to successfully create a changefeed
   286  // with startTs less than currentTs - gcTTL when tikv_gc_life_time is greater than gc-ttl
   287  func (s *ownerSuite) TestTiKVGCLifeTimeLargeThanGCTTL(c *check.C) {
   288  	defer testleak.AfterTest(c)
   289  	mockPDCli := &mockPDClient{}
   290  	mockPDCli.mockTiKVGCLifeTime = true
   291  
   292  	changeFeeds := map[model.ChangeFeedID]*changeFeed{
   293  		"test_change_feed_1": {
   294  			info:    &model.ChangeFeedInfo{State: model.StateNormal},
   295  			etcdCli: s.client,
   296  			status: &model.ChangeFeedStatus{
   297  				CheckpointTs: oracle.GoTimeToTS(time.Now().Add(-6 * time.Second)),
   298  			},
   299  			targetTs: 2000,
   300  			ddlState: model.ChangeFeedSyncDML,
   301  			taskStatus: model.ProcessorsInfos{
   302  				"capture_1": {},
   303  				"capture_2": {},
   304  			},
   305  			taskPositions: map[string]*model.TaskPosition{
   306  				"capture_1": {},
   307  				"capture_2": {},
   308  			},
   309  		},
   310  	}
   311  
   312  	session, err := concurrency.NewSession(s.client.Client.Unwrap(),
   313  		concurrency.WithTTL(config.GetDefaultServerConfig().CaptureSessionTTL))
   314  	c.Assert(err, check.IsNil)
   315  
   316  	mockOwner := Owner{
   317  		session:                 session,
   318  		pdClient:                mockPDCli,
   319  		etcdClient:              s.client,
   320  		lastFlushChangefeeds:    time.Now(),
   321  		flushChangefeedInterval: 1 * time.Hour,
   322  		// gcSafepointLastUpdate:   time.Now(),
   323  		gcTTL:               6, // 6 seconds
   324  		changeFeeds:         changeFeeds,
   325  		cfRWriter:           s.client,
   326  		stoppedFeeds:        make(map[model.ChangeFeedID]*model.ChangeFeedStatus),
   327  		minGCSafePointCache: minGCSafePointCacheEntry{},
   328  	}
   329  
   330  	err = mockOwner.flushChangeFeedInfos(s.ctx)
   331  	c.Assert(err, check.IsNil)
   332  	c.Assert(mockPDCli.invokeCounter, check.Equals, 1)
   333  
   334  	err = mockOwner.handleAdminJob(s.ctx)
   335  	c.Assert(err, check.IsNil)
   336  	c.Assert(mockOwner.stoppedFeeds["test_change_feed_1"], check.IsNil)
   337  	c.Assert(mockOwner.changeFeeds["test_change_feed_1"].info.State, check.Equals, model.StateNormal)
   338  
   339  	time.Sleep(7 * time.Second) // wait for gcTTL time pass
   340  	err = mockOwner.flushChangeFeedInfos(s.ctx)
   341  	c.Assert(err, check.IsNil)
   342  	c.Assert(mockPDCli.invokeCounter, check.Equals, 2)
   343  
   344  	err = mockOwner.handleAdminJob(s.ctx)
   345  	c.Assert(err, check.IsNil)
   346  	c.Assert(mockOwner.stoppedFeeds["test_change_feed_1"], check.IsNil)
   347  
   348  	s.TearDownTest(c)
   349  }
   350  
   351  // Test whether the owner handles the stagnant task correctly, so that it can't block the update of gcSafePoint.
   352  // If a changefeed is put into the stop queue due to stagnation, it can no longer affect the update of gcSafePoint.
   353  // So we just need to test whether the stagnant changefeed is put into the stop queue.
   354  func (s *ownerSuite) TestOwnerHandleStaleChangeFeed(c *check.C) {
   355  	defer testleak.AfterTest(c)()
   356  	mockPDCli := &mockPDClient{}
   357  	changeFeeds := map[model.ChangeFeedID]*changeFeed{
   358  		"test_change_feed_1": {
   359  			info:    &model.ChangeFeedInfo{State: model.StateNormal},
   360  			etcdCli: s.client,
   361  			status: &model.ChangeFeedStatus{
   362  				CheckpointTs: 1000,
   363  			},
   364  			targetTs: 2000,
   365  			ddlState: model.ChangeFeedSyncDML,
   366  			taskStatus: model.ProcessorsInfos{
   367  				"capture_1": {},
   368  				"capture_2": {},
   369  			},
   370  			taskPositions: map[string]*model.TaskPosition{
   371  				"capture_1": {},
   372  				"capture_2": {},
   373  			},
   374  		},
   375  		"test_change_feed_2": {
   376  			info:    &model.ChangeFeedInfo{State: model.StateNormal},
   377  			etcdCli: s.client,
   378  			status: &model.ChangeFeedStatus{
   379  				CheckpointTs: oracle.EncodeTSO(oracle.GetPhysical(time.Now())),
   380  			},
   381  			targetTs: 2000,
   382  			ddlState: model.ChangeFeedSyncDML,
   383  			taskStatus: model.ProcessorsInfos{
   384  				"capture_1": {},
   385  				"capture_2": {},
   386  			},
   387  			taskPositions: map[string]*model.TaskPosition{
   388  				"capture_1": {},
   389  				"capture_2": {},
   390  			},
   391  		},
   392  	}
   393  
   394  	session, err := concurrency.NewSession(s.client.Client.Unwrap(),
   395  		concurrency.WithTTL(config.GetDefaultServerConfig().CaptureSessionTTL))
   396  	c.Assert(err, check.IsNil)
   397  
   398  	mockOwner := Owner{
   399  		session:                 session,
   400  		pdClient:                mockPDCli,
   401  		etcdClient:              s.client,
   402  		lastFlushChangefeeds:    time.Now(),
   403  		flushChangefeedInterval: 1 * time.Hour,
   404  		gcSafepointLastUpdate:   time.Now().Add(-4 * time.Second),
   405  		gcTTL:                   6, // 6 seconds
   406  		changeFeeds:             changeFeeds,
   407  		cfRWriter:               s.client,
   408  		stoppedFeeds:            make(map[model.ChangeFeedID]*model.ChangeFeedStatus),
   409  		minGCSafePointCache:     minGCSafePointCacheEntry{},
   410  	}
   411  
   412  	err = mockOwner.flushChangeFeedInfos(s.ctx)
   413  	c.Assert(err, check.IsNil)
   414  	c.Assert(mockPDCli.invokeCounter, check.Equals, 1)
   415  	err = mockOwner.handleAdminJob(s.ctx)
   416  	c.Assert(err, check.IsNil)
   417  
   418  	time.Sleep(2 * time.Second)
   419  	err = mockOwner.flushChangeFeedInfos(s.ctx)
   420  	c.Assert(err, check.IsNil)
   421  	c.Assert(mockPDCli.invokeCounter, check.Equals, 2)
   422  	err = mockOwner.handleAdminJob(s.ctx)
   423  	c.Assert(err, check.IsNil)
   424  
   425  	c.Assert(mockOwner.stoppedFeeds["test_change_feed_1"], check.NotNil)
   426  	c.Assert(mockOwner.changeFeeds["test_change_feed_2"].info.State, check.Equals, model.StateNormal)
   427  
   428  	time.Sleep(6 * time.Second)
   429  	err = mockOwner.flushChangeFeedInfos(s.ctx)
   430  	c.Assert(err, check.IsNil)
   431  	c.Assert(mockPDCli.invokeCounter, check.Equals, 3)
   432  	err = mockOwner.handleAdminJob(s.ctx)
   433  	c.Assert(err, check.IsNil)
   434  
   435  	time.Sleep(2 * time.Second)
   436  	err = mockOwner.flushChangeFeedInfos(s.ctx)
   437  	c.Assert(err, check.IsNil)
   438  	c.Assert(mockPDCli.invokeCounter, check.Equals, 4)
   439  	err = mockOwner.handleAdminJob(s.ctx)
   440  	c.Assert(err, check.IsNil)
   441  	c.Assert(mockOwner.stoppedFeeds["test_change_feed_2"], check.NotNil)
   442  
   443  	s.TearDownTest(c)
   444  }
   445  
   446  func (s *ownerSuite) TestOwnerUploadGCSafePointOutdated(c *check.C) {
   447  	defer testleak.AfterTest(c)()
   448  	mockPDCli := &mockPDClient{
   449  		mockSafePointLost: true,
   450  	}
   451  	changeFeeds := map[model.ChangeFeedID]*changeFeed{
   452  		"test_change_feed_1": {
   453  			info:    &model.ChangeFeedInfo{State: model.StateNormal},
   454  			etcdCli: s.client,
   455  			status: &model.ChangeFeedStatus{
   456  				CheckpointTs: 100,
   457  			},
   458  			targetTs: 2000,
   459  			ddlState: model.ChangeFeedSyncDML,
   460  			taskStatus: model.ProcessorsInfos{
   461  				"capture_1": {},
   462  				"capture_2": {},
   463  			},
   464  			taskPositions: map[string]*model.TaskPosition{
   465  				"capture_1": {},
   466  				"capture_2": {},
   467  			},
   468  		},
   469  		"test_change_feed_2": {
   470  			info:    &model.ChangeFeedInfo{State: model.StateNormal},
   471  			etcdCli: s.client,
   472  			status: &model.ChangeFeedStatus{
   473  				CheckpointTs: 1100,
   474  			},
   475  			targetTs: 2000,
   476  			ddlState: model.ChangeFeedSyncDML,
   477  			taskStatus: model.ProcessorsInfos{
   478  				"capture_1": {},
   479  				"capture_2": {},
   480  			},
   481  			taskPositions: map[string]*model.TaskPosition{
   482  				"capture_1": {},
   483  				"capture_2": {},
   484  			},
   485  		},
   486  	}
   487  
   488  	session, err := concurrency.NewSession(s.client.Client.Unwrap(),
   489  		concurrency.WithTTL(config.GetDefaultServerConfig().CaptureSessionTTL))
   490  	c.Assert(err, check.IsNil)
   491  
   492  	mockOwner := Owner{
   493  		pdClient:                mockPDCli,
   494  		session:                 session,
   495  		etcdClient:              s.client,
   496  		lastFlushChangefeeds:    time.Now(),
   497  		flushChangefeedInterval: 1 * time.Hour,
   498  		changeFeeds:             changeFeeds,
   499  		cfRWriter:               s.client,
   500  		stoppedFeeds:            make(map[model.ChangeFeedID]*model.ChangeFeedStatus),
   501  		minGCSafePointCache:     minGCSafePointCacheEntry{},
   502  	}
   503  
   504  	err = mockOwner.flushChangeFeedInfos(s.ctx)
   505  	c.Assert(err, check.IsNil)
   506  	c.Assert(mockPDCli.invokeCounter, check.Equals, 1)
   507  
   508  	err = mockOwner.handleAdminJob(s.ctx)
   509  	c.Assert(err, check.IsNil)
   510  
   511  	c.Assert(mockOwner.stoppedFeeds["test_change_feed_1"], check.NotNil)
   512  	c.Assert(changeFeeds["test_change_feed_2"].info.State, check.Equals, model.StateNormal)
   513  	s.TearDownTest(c)
   514  }
   515  
   516  /*
   517  type handlerForPrueDMLTest struct {
   518  	mu               sync.RWMutex
   519  	index            int
   520  	resolvedTs1      []uint64
   521  	resolvedTs2      []uint64
   522  	expectResolvedTs []uint64
   523  	c                *check.C
   524  	cancel           func()
   525  }
   526  
   527  func (h *handlerForPrueDMLTest) PullDDL() (resolvedTs uint64, ddl []*model.DDL, err error) {
   528  	return uint64(math.MaxUint64), nil, nil
   529  }
   530  
   531  func (h *handlerForPrueDMLTest) ExecDDL(context.Context, string, map[string]string, model.SingleTableTxn) error {
   532  	panic("unreachable")
   533  }
   534  
   535  func (h *handlerForPrueDMLTest) Close() error {
   536  	return nil
   537  }
   538  
   539  var _ ChangeFeedRWriter = &handlerForPrueDMLTest{}
   540  
   541  func (h *handlerForPrueDMLTest) GetChangeFeeds(ctx context.Context) (int64, map[string]*mvccpb.KeyValue, error) {
   542  	h.mu.RLock()
   543  	defer h.mu.RUnlock()
   544  	cfInfo := &model.ChangeFeedInfo{
   545  		TargetTs: 100,
   546  	}
   547  	cfInfoJSON, err := cfInfo.Marshal()
   548  	h.c.Assert(err, check.IsNil)
   549  	rawKV := &mvccpb.KeyValue{
   550  		Value: []byte(cfInfoJSON),
   551  	}
   552  	return 0, map[model.ChangeFeedID]*mvccpb.KeyValue{
   553  		"test_change_feed": rawKV,
   554  	}, nil
   555  }
   556  
   557  func (h *handlerForPrueDMLTest) GetAllTaskStatus(ctx context.Context, changefeedID string) (model.ProcessorsInfos, error) {
   558  	if changefeedID != "test_change_feed" {
   559  		return nil, cerror.ErrTaskStatusNotExists.GenWithStackByArgs("test_change_feed)
   560  	}
   561  	h.mu.RLock()
   562  	defer h.mu.RUnlock()
   563  	h.index++
   564  	return model.ProcessorsInfos{
   565  		"capture_1": {},
   566  		"capture_2": {},
   567  	}, nil
   568  }
   569  
   570  func (h *handlerForPrueDMLTest) GetAllTaskPositions(ctx context.Context, changefeedID string) (map[string]*model.TaskPosition, error) {
   571  	if changefeedID != "test_change_feed" {
   572  		return nil, cerror.ErrTaskStatusNotExists.GenWithStackByArgs("test_change_feed)
   573  	}
   574  	h.mu.RLock()
   575  	defer h.mu.RUnlock()
   576  	h.index++
   577  	return map[string]*model.TaskPosition{
   578  		"capture_1": {
   579  			ResolvedTs: h.resolvedTs1[h.index],
   580  		},
   581  		"capture_2": {
   582  			ResolvedTs: h.resolvedTs2[h.index],
   583  		},
   584  	}, nil
   585  }
   586  
   587  func (h *handlerForPrueDMLTest) GetChangeFeedStatus(ctx context.Context, id string) (*model.ChangeFeedStatus, error) {
   588  	return nil, cerror.ErrChangeFeedNotExists.GenWithStackByArgs(id)
   589  }
   590  
   591  func (h *handlerForPrueDMLTest) PutAllChangeFeedStatus(ctx context.Context, infos map[model.ChangeFeedID]*model.ChangeFeedStatus) error {
   592  	h.mu.Lock()
   593  	defer h.mu.Unlock()
   594  	info, exist := infos["test_change_feed"]
   595  	h.c.Assert(exist, check.IsTrue)
   596  	h.c.Assert(info.ResolvedTs, check.Equals, h.expectResolvedTs[h.index])
   597  	// h.c.Assert(info.State, check.Equals, model.ChangeFeedSyncDML)
   598  	if h.index >= len(h.expectResolvedTs)-1 {
   599  		log.Info("cancel")
   600  		h.cancel()
   601  	}
   602  	return nil
   603  }
   604  
   605  func (s *ownerSuite) TestPureDML(c *check.C) {
   606  		defer testleak.AfterTest(c)()
   607  	ctx, cancel := context.WithCancel(context.Background())
   608  	handler := &handlerForPrueDMLTest{
   609  		index:            -1,
   610  		resolvedTs1:      []uint64{10, 22, 64, 92, 99, 120},
   611  		resolvedTs2:      []uint64{8, 36, 53, 88, 103, 108},
   612  		expectResolvedTs: []uint64{8, 22, 53, 88, 99, 100},
   613  		cancel:           cancel,
   614  		c:                c,
   615  	}
   616  
   617  	tables := map[uint64]model.TableName{1: {Schema: "any"}}
   618  
   619  	changeFeeds := map[model.ChangeFeedID]*changeFeed{
   620  		"test_change_feed": {
   621  			tables:   tables,
   622  			status:   &model.ChangeFeedStatus{},
   623  			targetTs: 100,
   624  			ddlState: model.ChangeFeedSyncDML,
   625  			taskStatus: model.ProcessorsInfos{
   626  				"capture_1": {},
   627  				"capture_2": {},
   628  			},
   629  			taskPositions: map[string]*model.TaskPosition{
   630  				"capture_1": {},
   631  				"capture_2": {},
   632  			},
   633  			ddlHandler: handler,
   634  		},
   635  	}
   636  
   637  	manager := roles.NewMockManager(uuid.New().String(), cancel)
   638  	err := manager.CampaignOwner(ctx)
   639  	c.Assert(err, check.IsNil)
   640  	owner := &ownerImpl{
   641  		cancelWatchCapture: cancel,
   642  		changeFeeds:        changeFeeds,
   643  		cfRWriter:          handler,
   644  		etcdClient:         s.client,
   645  		manager:            manager,
   646  	}
   647  	s.owner = owner
   648  	err = owner.Run(ctx, 50*time.Millisecond)
   649  	c.Assert(err.Error(), check.Equals, "context canceled")
   650  }
   651  
   652  type handlerForDDLTest struct {
   653  	mu sync.RWMutex
   654  
   655  	ddlIndex      int
   656  	ddls          []*model.DDL
   657  	ddlResolvedTs []uint64
   658  
   659  	ddlExpectIndex int
   660  
   661  	dmlIndex                int
   662  	resolvedTs1             []uint64
   663  	resolvedTs2             []uint64
   664  	currentGlobalResolvedTs uint64
   665  
   666  	dmlExpectIndex   int
   667  	expectResolvedTs []uint64
   668  	expectStatus     []model.ChangeFeedDDLState
   669  
   670  	c      *check.C
   671  	cancel func()
   672  }
   673  
   674  func (h *handlerForDDLTest) PullDDL() (resolvedTs uint64, jobs []*model.DDL, err error) {
   675  	h.mu.RLock()
   676  	defer h.mu.RUnlock()
   677  	if h.ddlIndex < len(h.ddls)-1 {
   678  		h.ddlIndex++
   679  	}
   680  	return h.ddlResolvedTs[h.ddlIndex], []*model.DDL{h.ddls[h.ddlIndex]}, nil
   681  }
   682  
   683  func (h *handlerForDDLTest) ExecDDL(ctx context.Context, sinkURI string, _ map[string]string, txn model.SingleTableTxn) error {
   684  	h.mu.Lock()
   685  	defer h.mu.Unlock()
   686  	h.ddlExpectIndex++
   687  	h.c.Assert(txn.DDL, check.DeepEquals, h.ddls[h.ddlExpectIndex])
   688  	h.c.Assert(txn.DDL.Job.BinlogInfo.FinishedTS, check.Equals, h.currentGlobalResolvedTs)
   689  	return nil
   690  }
   691  
   692  func (h *handlerForDDLTest) Close() error {
   693  	return nil
   694  }
   695  
   696  func (h *handlerForDDLTest) GetChangeFeeds(ctx context.Context) (int64, map[string]*mvccpb.KeyValue, error) {
   697  	h.mu.RLock()
   698  	defer h.mu.RUnlock()
   699  	cfInfo := &model.ChangeFeedInfo{
   700  		TargetTs: 100,
   701  	}
   702  	cfInfoJSON, err := cfInfo.Marshal()
   703  	h.c.Assert(err, check.IsNil)
   704  	rawKV := &mvccpb.KeyValue{
   705  		Value: []byte(cfInfoJSON),
   706  	}
   707  	return 0, map[model.ChangeFeedID]*mvccpb.KeyValue{
   708  		"test_change_feed": rawKV,
   709  	}, nil
   710  }
   711  
   712  func (h *handlerForDDLTest) GetAllTaskStatus(ctx context.Context, changefeedID string) (model.ProcessorsInfos, error) {
   713  	if changefeedID != "test_change_feed" {
   714  		return nil, cerror.ErrTaskStatusNotExists.GenWithStackByArgs("test_change_feed")
   715  	}
   716  	h.mu.RLock()
   717  	defer h.mu.RUnlock()
   718  	if h.dmlIndex < len(h.resolvedTs1)-1 {
   719  		h.dmlIndex++
   720  	}
   721  	return model.ProcessorsInfos{
   722  		"capture_1": {},
   723  		"capture_2": {},
   724  	}, nil
   725  }
   726  
   727  func (h *handlerForDDLTest) GetAllTaskPositions(ctx context.Context, changefeedID string) (map[string]*model.TaskPosition, error) {
   728  	if changefeedID != "test_change_feed" {
   729  		return nil, cerror.ErrTaskStatusNotExists.GenWithStackByArgs("test_change_feed")
   730  	}
   731  	h.mu.RLock()
   732  	defer h.mu.RUnlock()
   733  	if h.dmlIndex < len(h.resolvedTs1)-1 {
   734  		h.dmlIndex++
   735  	}
   736  	return map[string]*model.TaskPosition{
   737  		"capture_1": {
   738  			ResolvedTs:   h.resolvedTs1[h.dmlIndex],
   739  			CheckPointTs: h.currentGlobalResolvedTs,
   740  		},
   741  		"capture_2": {
   742  			ResolvedTs:   h.resolvedTs2[h.dmlIndex],
   743  			CheckPointTs: h.currentGlobalResolvedTs,
   744  		},
   745  	}, nil
   746  }
   747  
   748  func (h *handlerForDDLTest) GetChangeFeedStatus(ctx context.Context, id string) (*model.ChangeFeedStatus, error) {
   749  	return nil, cerror.ErrChangeFeedNotExists.GenWithStackByArgs(id)
   750  }
   751  
   752  func (h *handlerForDDLTest) PutAllChangeFeedStatus(ctx context.Context, infos map[model.ChangeFeedID]*model.ChangeFeedStatus) error {
   753  	h.mu.Lock()
   754  	defer h.mu.Unlock()
   755  	h.dmlExpectIndex++
   756  	info, exist := infos["test_change_feed"]
   757  	h.c.Assert(exist, check.IsTrue)
   758  	h.currentGlobalResolvedTs = info.ResolvedTs
   759  	h.c.Assert(info.ResolvedTs, check.Equals, h.expectResolvedTs[h.dmlExpectIndex])
   760  	// h.c.Assert(info.State, check.Equals, h.expectStatus[h.dmlExpectIndex])
   761  	if h.dmlExpectIndex >= len(h.expectResolvedTs)-1 {
   762  		log.Info("cancel")
   763  		h.cancel()
   764  	}
   765  	return nil
   766  }
   767  
   768  func (s *ownerSuite) TestDDL(c *check.C) {
   769  		defer testleak.AfterTest(c)()
   770  	ctx, cancel := context.WithCancel(context.Background())
   771  
   772  	handler := &handlerForDDLTest{
   773  		ddlIndex:      -1,
   774  		ddlResolvedTs: []uint64{5, 8, 49, 91, 113},
   775  		ddls: []*model.DDL{
   776  			{Job: &timodel.Job{
   777  				ID: 1,
   778  				BinlogInfo: &timodel.HistoryInfo{
   779  					FinishedTS: 3,
   780  				},
   781  			}},
   782  			{Job: &timodel.Job{
   783  				ID: 2,
   784  				BinlogInfo: &timodel.HistoryInfo{
   785  					FinishedTS: 7,
   786  				},
   787  			}},
   788  			{Job: &timodel.Job{
   789  				ID: 3,
   790  				BinlogInfo: &timodel.HistoryInfo{
   791  					FinishedTS: 11,
   792  				},
   793  			}},
   794  			{Job: &timodel.Job{
   795  				ID: 4,
   796  				BinlogInfo: &timodel.HistoryInfo{
   797  					FinishedTS: 89,
   798  				},
   799  			}},
   800  			{Job: &timodel.Job{
   801  				ID: 5,
   802  				BinlogInfo: &timodel.HistoryInfo{
   803  					FinishedTS: 111,
   804  				},
   805  			}},
   806  		},
   807  
   808  		ddlExpectIndex: -1,
   809  
   810  		dmlIndex:                -1,
   811  		resolvedTs1:             []uint64{10, 22, 64, 92, 99, 120},
   812  		resolvedTs2:             []uint64{8, 36, 53, 88, 103, 108},
   813  		currentGlobalResolvedTs: 0,
   814  
   815  		dmlExpectIndex: -1,
   816  		expectResolvedTs: []uint64{
   817  			3, 3,
   818  			7, 7,
   819  			11, 11,
   820  			89, 89,
   821  			100},
   822  		expectStatus: []model.ChangeFeedDDLState{
   823  			model.ChangeFeedWaitToExecDDL, model.ChangeFeedExecDDL,
   824  			model.ChangeFeedWaitToExecDDL, model.ChangeFeedExecDDL,
   825  			model.ChangeFeedWaitToExecDDL, model.ChangeFeedExecDDL,
   826  			model.ChangeFeedWaitToExecDDL, model.ChangeFeedExecDDL,
   827  			model.ChangeFeedSyncDML},
   828  
   829  		cancel: cancel,
   830  		c:      c,
   831  	}
   832  
   833  	tables := map[uint64]model.TableName{1: {Schema: "any"}}
   834  
   835  	filter, err := newTxnFilter(&model.ReplicaConfig{})
   836  	c.Assert(err, check.IsNil)
   837  	changeFeeds := map[model.ChangeFeedID]*changeFeed{
   838  		"test_change_feed": {
   839  			tables:   tables,
   840  			info:     &model.ChangeFeedInfo{},
   841  			status:   &model.ChangeFeedStatus{},
   842  			targetTs: 100,
   843  			ddlState: model.ChangeFeedSyncDML,
   844  			taskStatus: model.ProcessorsInfos{
   845  				"capture_1": {},
   846  				"capture_2": {},
   847  			},
   848  			taskPositions: map[string]*model.TaskPosition{
   849  				"capture_1": {},
   850  				"capture_2": {},
   851  			},
   852  			ddlHandler: handler,
   853  			filter:     filter,
   854  		},
   855  	}
   856  
   857  	manager := roles.NewMockManager(uuid.New().String(), cancel)
   858  	err = manager.CampaignOwner(ctx)
   859  	c.Assert(err, check.IsNil)
   860  	owner := &ownerImpl{
   861  		cancelWatchCapture: cancel,
   862  		changeFeeds:        changeFeeds,
   863  
   864  		// ddlHandler: handler,
   865  		etcdClient: s.client,
   866  		cfRWriter:  handler,
   867  		manager:    manager,
   868  	}
   869  	s.owner = owner
   870  	err = owner.Run(ctx, 50*time.Millisecond)
   871  	c.Assert(errors.Cause(err), check.DeepEquals, context.Canceled)
   872  }
   873  */
   874  var cdcGCSafePointTTL4Test = int64(24 * 60 * 60)
   875  
   876  func (s *ownerSuite) TestHandleAdmin(c *check.C) {
   877  	defer testleak.AfterTest(c)()
   878  	defer s.TearDownTest(c)
   879  	cfID := "test_handle_admin"
   880  
   881  	ctx, cancel0 := context.WithCancel(context.Background())
   882  	defer cancel0()
   883  	cctx, cancel := context.WithCancel(ctx)
   884  	errg, _ := errgroup.WithContext(cctx)
   885  
   886  	replicaConf := config.GetDefaultReplicaConfig()
   887  
   888  	sampleCF := &changeFeed{
   889  		id:       cfID,
   890  		info:     &model.ChangeFeedInfo{Config: replicaConf, SinkURI: "blackhole://"},
   891  		status:   &model.ChangeFeedStatus{},
   892  		ddlState: model.ChangeFeedSyncDML,
   893  		taskStatus: model.ProcessorsInfos{
   894  			"capture_1": {},
   895  			"capture_2": {},
   896  		},
   897  		taskPositions: map[string]*model.TaskPosition{
   898  			"capture_1": {ResolvedTs: 10001},
   899  			"capture_2": {},
   900  		},
   901  		ddlHandler: &ddlHandler{
   902  			cancel: cancel,
   903  			wg:     errg,
   904  		},
   905  		cancel: cancel,
   906  	}
   907  	// new asyncSink
   908  	cdcCtx := cdcContext.NewContext(ctx, &cdcContext.GlobalVars{})
   909  	cdcCtx = cdcContext.WithChangefeedVars(cdcCtx, &cdcContext.ChangefeedVars{
   910  		ID:   cfID,
   911  		Info: sampleCF.info,
   912  	})
   913  	sink, err := newAsyncSink(cdcCtx)
   914  	c.Assert(err, check.IsNil)
   915  	defer sink.Close(cctx) //nolint:errcheck
   916  	sampleCF.sink = sink
   917  
   918  	capture, err := NewCapture(ctx, []string{s.clientURL.String()}, nil, nil)
   919  	c.Assert(err, check.IsNil)
   920  	err = capture.Campaign(ctx)
   921  	c.Assert(err, check.IsNil)
   922  
   923  	grpcPool := kv.NewGrpcPoolImpl(ctx, &security.Credential{})
   924  	defer grpcPool.Close()
   925  	owner, err := NewOwner(ctx, nil, grpcPool, capture.session, cdcGCSafePointTTL4Test, time.Millisecond*200)
   926  	c.Assert(err, check.IsNil)
   927  
   928  	sampleCF.etcdCli = owner.etcdClient
   929  	owner.changeFeeds = map[model.ChangeFeedID]*changeFeed{cfID: sampleCF}
   930  	for cid, pinfo := range sampleCF.taskPositions {
   931  		key := kv.GetEtcdKeyTaskStatus(cfID, cid)
   932  		pinfoStr, err := pinfo.Marshal()
   933  		c.Assert(err, check.IsNil)
   934  		_, err = s.client.Client.Put(ctx, key, pinfoStr)
   935  		c.Assert(err, check.IsNil)
   936  	}
   937  	err = owner.etcdClient.PutChangeFeedStatus(ctx, cfID, &model.ChangeFeedStatus{})
   938  	c.Assert(err, check.IsNil)
   939  	err = owner.etcdClient.SaveChangeFeedInfo(ctx, sampleCF.info, cfID)
   940  	c.Assert(err, check.IsNil)
   941  	checkAdminJobLen := func(length int) {
   942  		owner.adminJobsLock.Lock()
   943  		c.Assert(owner.adminJobs, check.HasLen, length)
   944  		owner.adminJobsLock.Unlock()
   945  	}
   946  
   947  	c.Assert(owner.EnqueueJob(model.AdminJob{CfID: cfID, Type: model.AdminStop}), check.IsNil)
   948  	checkAdminJobLen(1)
   949  	c.Assert(owner.handleAdminJob(ctx), check.IsNil)
   950  	checkAdminJobLen(0)
   951  	c.Assert(len(owner.changeFeeds), check.Equals, 0)
   952  	// check changefeed info is set admin job
   953  	info, err := owner.etcdClient.GetChangeFeedInfo(ctx, cfID)
   954  	c.Assert(err, check.IsNil)
   955  	c.Assert(info.AdminJobType, check.Equals, model.AdminStop)
   956  	// check processor is set admin job
   957  	for cid := range sampleCF.taskPositions {
   958  		_, subInfo, err := owner.etcdClient.GetTaskStatus(ctx, cfID, cid)
   959  		c.Assert(err, check.IsNil)
   960  		c.Assert(subInfo.AdminJobType, check.Equals, model.AdminStop)
   961  	}
   962  	// check changefeed status is set admin job
   963  	st, _, err := owner.etcdClient.GetChangeFeedStatus(ctx, cfID)
   964  	c.Assert(err, check.IsNil)
   965  	c.Assert(st.AdminJobType, check.Equals, model.AdminStop)
   966  	// check changefeed context is canceled
   967  	select {
   968  	case <-cctx.Done():
   969  	default:
   970  		c.Fatal("changefeed context is expected canceled")
   971  	}
   972  
   973  	cctx, cancel = context.WithCancel(ctx)
   974  	sampleCF.cancel = cancel
   975  
   976  	c.Assert(owner.EnqueueJob(model.AdminJob{CfID: cfID, Type: model.AdminResume}), check.IsNil)
   977  	c.Assert(owner.handleAdminJob(ctx), check.IsNil)
   978  	checkAdminJobLen(0)
   979  	// check changefeed info is set admin job
   980  	info, err = owner.etcdClient.GetChangeFeedInfo(ctx, cfID)
   981  	c.Assert(err, check.IsNil)
   982  	c.Assert(info.AdminJobType, check.Equals, model.AdminResume)
   983  	// check changefeed status is set admin job
   984  	st, _, err = owner.etcdClient.GetChangeFeedStatus(ctx, cfID)
   985  	c.Assert(err, check.IsNil)
   986  	c.Assert(st.AdminJobType, check.Equals, model.AdminResume)
   987  
   988  	owner.changeFeeds[cfID] = sampleCF
   989  	c.Assert(owner.EnqueueJob(model.AdminJob{CfID: cfID, Type: model.AdminRemove}), check.IsNil)
   990  	c.Assert(owner.handleAdminJob(ctx), check.IsNil)
   991  	checkAdminJobLen(0)
   992  	c.Assert(len(owner.changeFeeds), check.Equals, 0)
   993  	// check changefeed info is deleted
   994  	_, err = owner.etcdClient.GetChangeFeedInfo(ctx, cfID)
   995  	c.Assert(cerror.ErrChangeFeedNotExists.Equal(err), check.IsTrue)
   996  	// check processor is set admin job
   997  	for cid := range sampleCF.taskPositions {
   998  		_, subInfo, err := owner.etcdClient.GetTaskStatus(ctx, cfID, cid)
   999  		c.Assert(err, check.IsNil)
  1000  		c.Assert(subInfo.AdminJobType, check.Equals, model.AdminRemove)
  1001  	}
  1002  	// check changefeed status is set admin job
  1003  	st, _, err = owner.etcdClient.GetChangeFeedStatus(ctx, cfID)
  1004  	c.Assert(err, check.IsNil)
  1005  	c.Assert(st.AdminJobType, check.Equals, model.AdminRemove)
  1006  	// check changefeed context is canceled
  1007  	select {
  1008  	case <-cctx.Done():
  1009  	default:
  1010  		c.Fatal("changefeed context is expected canceled")
  1011  	}
  1012  	owner.etcdClient.Close() //nolint:errcheck
  1013  }
  1014  
  1015  func (s *ownerSuite) TestChangefeedApplyDDLJob(c *check.C) {
  1016  	defer testleak.AfterTest(c)()
  1017  	var (
  1018  		jobs = []*timodel.Job{
  1019  			{
  1020  				ID:       1,
  1021  				SchemaID: 1,
  1022  				Type:     timodel.ActionCreateSchema,
  1023  				State:    timodel.JobStateSynced,
  1024  				Query:    "create database test",
  1025  				BinlogInfo: &timodel.HistoryInfo{
  1026  					SchemaVersion: 1,
  1027  					DBInfo: &timodel.DBInfo{
  1028  						ID:   1,
  1029  						Name: timodel.NewCIStr("test"),
  1030  					},
  1031  				},
  1032  			},
  1033  			{
  1034  				ID:       2,
  1035  				SchemaID: 1,
  1036  				Type:     timodel.ActionCreateTable,
  1037  				State:    timodel.JobStateSynced,
  1038  				Query:    "create table t1 (id int primary key)",
  1039  				BinlogInfo: &timodel.HistoryInfo{
  1040  					SchemaVersion: 2,
  1041  					DBInfo: &timodel.DBInfo{
  1042  						ID:   1,
  1043  						Name: timodel.NewCIStr("test"),
  1044  					},
  1045  					TableInfo: &timodel.TableInfo{
  1046  						ID:         47,
  1047  						Name:       timodel.NewCIStr("t1"),
  1048  						PKIsHandle: true,
  1049  						Columns: []*timodel.ColumnInfo{
  1050  							{ID: 1, FieldType: types.FieldType{Flag: mysql.PriKeyFlag}, State: timodel.StatePublic},
  1051  						},
  1052  					},
  1053  				},
  1054  			},
  1055  			{
  1056  				ID:       2,
  1057  				SchemaID: 1,
  1058  				Type:     timodel.ActionCreateTable,
  1059  				State:    timodel.JobStateSynced,
  1060  				Query:    "create table t2 (id int primary key)",
  1061  				BinlogInfo: &timodel.HistoryInfo{
  1062  					SchemaVersion: 2,
  1063  					DBInfo: &timodel.DBInfo{
  1064  						ID:   1,
  1065  						Name: timodel.NewCIStr("test"),
  1066  					},
  1067  					TableInfo: &timodel.TableInfo{
  1068  						ID:         49,
  1069  						Name:       timodel.NewCIStr("t2"),
  1070  						PKIsHandle: true,
  1071  						Columns: []*timodel.ColumnInfo{
  1072  							{ID: 1, FieldType: types.FieldType{Flag: mysql.PriKeyFlag}, State: timodel.StatePublic},
  1073  						},
  1074  					},
  1075  				},
  1076  			},
  1077  			{
  1078  				ID:       2,
  1079  				SchemaID: 1,
  1080  				TableID:  49,
  1081  				Type:     timodel.ActionDropTable,
  1082  				State:    timodel.JobStateSynced,
  1083  				Query:    "drop table t2",
  1084  				BinlogInfo: &timodel.HistoryInfo{
  1085  					SchemaVersion: 3,
  1086  					DBInfo: &timodel.DBInfo{
  1087  						ID:   1,
  1088  						Name: timodel.NewCIStr("test"),
  1089  					},
  1090  					TableInfo: &timodel.TableInfo{
  1091  						ID:   49,
  1092  						Name: timodel.NewCIStr("t2"),
  1093  					},
  1094  				},
  1095  			},
  1096  			{
  1097  				ID:       2,
  1098  				SchemaID: 1,
  1099  				TableID:  47,
  1100  				Type:     timodel.ActionTruncateTable,
  1101  				State:    timodel.JobStateSynced,
  1102  				Query:    "truncate table t1",
  1103  				BinlogInfo: &timodel.HistoryInfo{
  1104  					SchemaVersion: 4,
  1105  					DBInfo: &timodel.DBInfo{
  1106  						ID:   1,
  1107  						Name: timodel.NewCIStr("test"),
  1108  					},
  1109  					TableInfo: &timodel.TableInfo{
  1110  						ID:         51,
  1111  						Name:       timodel.NewCIStr("t1"),
  1112  						PKIsHandle: true,
  1113  						Columns: []*timodel.ColumnInfo{
  1114  							{ID: 1, FieldType: types.FieldType{Flag: mysql.PriKeyFlag}, State: timodel.StatePublic},
  1115  						},
  1116  					},
  1117  				},
  1118  			},
  1119  			{
  1120  				ID:       2,
  1121  				SchemaID: 1,
  1122  				TableID:  51,
  1123  				Type:     timodel.ActionDropTable,
  1124  				State:    timodel.JobStateSynced,
  1125  				Query:    "drop table t1",
  1126  				BinlogInfo: &timodel.HistoryInfo{
  1127  					SchemaVersion: 5,
  1128  					DBInfo: &timodel.DBInfo{
  1129  						ID:   1,
  1130  						Name: timodel.NewCIStr("test"),
  1131  					},
  1132  					TableInfo: &timodel.TableInfo{
  1133  						ID:   51,
  1134  						Name: timodel.NewCIStr("t1"),
  1135  					},
  1136  				},
  1137  			},
  1138  			{
  1139  				ID:       2,
  1140  				SchemaID: 1,
  1141  				Type:     timodel.ActionDropSchema,
  1142  				State:    timodel.JobStateSynced,
  1143  				Query:    "drop database test",
  1144  				BinlogInfo: &timodel.HistoryInfo{
  1145  					SchemaVersion: 6,
  1146  					DBInfo: &timodel.DBInfo{
  1147  						ID:   1,
  1148  						Name: timodel.NewCIStr("test"),
  1149  					},
  1150  				},
  1151  			},
  1152  		}
  1153  
  1154  		expectSchemas = []map[int64]tableIDMap{
  1155  			{1: make(tableIDMap)},
  1156  			{1: {47: struct{}{}}},
  1157  			{1: {47: struct{}{}, 49: struct{}{}}},
  1158  			{1: {47: struct{}{}}},
  1159  			{1: {51: struct{}{}}},
  1160  			{1: make(tableIDMap)},
  1161  			{},
  1162  		}
  1163  
  1164  		expectTables = []map[int64]model.TableName{
  1165  			{},
  1166  			{47: {Schema: "test", Table: "t1"}},
  1167  			{47: {Schema: "test", Table: "t1"}, 49: {Schema: "test", Table: "t2"}},
  1168  			{47: {Schema: "test", Table: "t1"}},
  1169  			{51: {Schema: "test", Table: "t1"}},
  1170  			{},
  1171  			{},
  1172  		}
  1173  	)
  1174  	f, err := filter.NewFilter(config.GetDefaultReplicaConfig())
  1175  	c.Assert(err, check.IsNil)
  1176  
  1177  	store, err := mockstore.NewMockStore()
  1178  	c.Assert(err, check.IsNil)
  1179  	defer func() {
  1180  		_ = store.Close()
  1181  	}()
  1182  
  1183  	txn, err := store.Begin()
  1184  	c.Assert(err, check.IsNil)
  1185  	defer func() {
  1186  		_ = txn.Rollback()
  1187  	}()
  1188  	t := meta.NewMeta(txn)
  1189  
  1190  	schemaSnap, err := entry.NewSingleSchemaSnapshotFromMeta(t, 0, false)
  1191  	c.Assert(err, check.IsNil)
  1192  
  1193  	cf := &changeFeed{
  1194  		schema:        schemaSnap,
  1195  		schemas:       make(map[model.SchemaID]tableIDMap),
  1196  		tables:        make(map[model.TableID]model.TableName),
  1197  		partitions:    make(map[model.TableID][]int64),
  1198  		orphanTables:  make(map[model.TableID]model.Ts),
  1199  		toCleanTables: make(map[model.TableID]model.Ts),
  1200  		filter:        f,
  1201  		info:          &model.ChangeFeedInfo{Config: config.GetDefaultReplicaConfig()},
  1202  	}
  1203  	for i, job := range jobs {
  1204  		err = cf.schema.HandleDDL(job)
  1205  		c.Assert(err, check.IsNil)
  1206  		err = cf.schema.FillSchemaName(job)
  1207  		c.Assert(err, check.IsNil)
  1208  		_, err = cf.applyJob(job)
  1209  		c.Assert(err, check.IsNil)
  1210  		c.Assert(cf.schemas, check.DeepEquals, expectSchemas[i])
  1211  		c.Assert(cf.tables, check.DeepEquals, expectTables[i])
  1212  	}
  1213  	s.TearDownTest(c)
  1214  }
  1215  
  1216  func (s *ownerSuite) TestWatchCampaignKey(c *check.C) {
  1217  	defer testleak.AfterTest(c)()
  1218  	defer s.TearDownTest(c)
  1219  	ctx, cancel := context.WithCancel(context.Background())
  1220  	defer cancel()
  1221  	capture, err := NewCapture(ctx, []string{s.clientURL.String()}, nil, nil)
  1222  	c.Assert(err, check.IsNil)
  1223  	err = capture.Campaign(ctx)
  1224  	c.Assert(err, check.IsNil)
  1225  
  1226  	grpcPool := kv.NewGrpcPoolImpl(ctx, &security.Credential{})
  1227  	defer grpcPool.Close()
  1228  	ctx1, cancel1 := context.WithCancel(ctx)
  1229  	owner, err := NewOwner(ctx1, nil, grpcPool, capture.session,
  1230  		cdcGCSafePointTTL4Test, time.Millisecond*200)
  1231  	c.Assert(err, check.IsNil)
  1232  
  1233  	// check campaign key deleted can be detected
  1234  	var wg sync.WaitGroup
  1235  	wg.Add(1)
  1236  	go func() {
  1237  		defer wg.Done()
  1238  		err := owner.watchCampaignKey(ctx1)
  1239  		c.Assert(cerror.ErrOwnerCampaignKeyDeleted.Equal(err), check.IsTrue)
  1240  		cancel1()
  1241  	}()
  1242  	// ensure the watch loop has started
  1243  	time.Sleep(time.Millisecond * 100)
  1244  	etcdCli := owner.etcdClient.Client.Unwrap()
  1245  	key := fmt.Sprintf("%s/%x", kv.CaptureOwnerKey, owner.session.Lease())
  1246  	_, err = etcdCli.Delete(ctx, key)
  1247  	c.Assert(err, check.IsNil)
  1248  	wg.Wait()
  1249  
  1250  	// check key is deleted before watch loop starts
  1251  	ctx1, cancel1 = context.WithCancel(ctx)
  1252  	err = owner.watchCampaignKey(ctx1)
  1253  	c.Assert(cerror.ErrOwnerCampaignKeyDeleted.Equal(err), check.IsTrue)
  1254  
  1255  	// check the watch routine can be canceled
  1256  	err = capture.Campaign(ctx)
  1257  	c.Assert(err, check.IsNil)
  1258  	wg.Add(1)
  1259  	go func() {
  1260  		defer wg.Done()
  1261  		err := owner.watchCampaignKey(ctx1)
  1262  		c.Assert(err, check.IsNil)
  1263  	}()
  1264  	// ensure the watch loop has started
  1265  	time.Sleep(time.Millisecond * 100)
  1266  	cancel1()
  1267  	wg.Wait()
  1268  
  1269  	err = capture.etcdClient.Close()
  1270  	c.Assert(err, check.IsNil)
  1271  }
  1272  
  1273  func (s *ownerSuite) TestCleanUpStaleTasks(c *check.C) {
  1274  	defer testleak.AfterTest(c)()
  1275  	defer s.TearDownTest(c)
  1276  	ctx, cancel := context.WithCancel(context.Background())
  1277  	defer cancel()
  1278  	addr := "127.0.0.1:12034"
  1279  	ctx = util.PutCaptureAddrInCtx(ctx, addr)
  1280  	capture, err := NewCapture(ctx, []string{s.clientURL.String()}, nil, nil)
  1281  	c.Assert(err, check.IsNil)
  1282  	err = s.client.PutCaptureInfo(ctx, capture.info, capture.session.Lease())
  1283  	c.Assert(err, check.IsNil)
  1284  
  1285  	changefeed := "changefeed-name"
  1286  	invalidCapture := uuid.New().String()
  1287  	for _, captureID := range []string{capture.info.ID, invalidCapture} {
  1288  		taskStatus := &model.TaskStatus{}
  1289  		if captureID == invalidCapture {
  1290  			taskStatus.Tables = map[model.TableID]*model.TableReplicaInfo{
  1291  				51: {StartTs: 110},
  1292  			}
  1293  		}
  1294  		err = s.client.PutTaskStatus(ctx, changefeed, captureID, taskStatus)
  1295  		c.Assert(err, check.IsNil)
  1296  		_, err = s.client.PutTaskPositionOnChange(ctx, changefeed, captureID, &model.TaskPosition{CheckPointTs: 100, ResolvedTs: 120})
  1297  		c.Assert(err, check.IsNil)
  1298  		err = s.client.PutTaskWorkload(ctx, changefeed, captureID, &model.TaskWorkload{})
  1299  		c.Assert(err, check.IsNil)
  1300  	}
  1301  	err = s.client.SaveChangeFeedInfo(ctx, &model.ChangeFeedInfo{}, changefeed)
  1302  	c.Assert(err, check.IsNil)
  1303  
  1304  	_, captureList, err := s.client.GetCaptures(ctx)
  1305  	c.Assert(err, check.IsNil)
  1306  	captures := make(map[model.CaptureID]*model.CaptureInfo)
  1307  	for _, c := range captureList {
  1308  		captures[c.ID] = c
  1309  	}
  1310  	grpcPool := kv.NewGrpcPoolImpl(ctx, &security.Credential{})
  1311  	defer grpcPool.Close()
  1312  	owner, err := NewOwner(ctx, nil, grpcPool, capture.session,
  1313  		cdcGCSafePointTTL4Test, time.Millisecond*200)
  1314  	c.Assert(err, check.IsNil)
  1315  	// It is better to update changefeed information by `loadChangeFeeds`, however
  1316  	// `loadChangeFeeds` is too overweight, just mock enough information here.
  1317  	owner.changeFeeds = map[model.ChangeFeedID]*changeFeed{
  1318  		changefeed: {
  1319  			id:           changefeed,
  1320  			orphanTables: make(map[model.TableID]model.Ts),
  1321  			status: &model.ChangeFeedStatus{
  1322  				CheckpointTs: 100,
  1323  			},
  1324  		},
  1325  	}
  1326  
  1327  	// capture information is not built, owner.run does nothing
  1328  	err = owner.run(ctx)
  1329  	c.Assert(err, check.IsNil)
  1330  	statuses, err := s.client.GetAllTaskStatus(ctx, changefeed)
  1331  	c.Assert(err, check.IsNil)
  1332  	// stale tasks are not cleaned up, since `cleanUpStaleTasks` does not run
  1333  	c.Assert(len(statuses), check.Equals, 2)
  1334  	c.Assert(len(owner.captures), check.Equals, 0)
  1335  
  1336  	err = owner.rebuildCaptureEvents(ctx, captures)
  1337  	c.Assert(err, check.IsNil)
  1338  	c.Assert(len(owner.captures), check.Equals, 1)
  1339  	c.Assert(owner.captures, check.HasKey, capture.info.ID)
  1340  	c.Assert(owner.changeFeeds[changefeed].orphanTables, check.DeepEquals, map[model.TableID]model.Ts{51: 110})
  1341  	c.Assert(atomic.LoadInt32(&owner.captureLoaded), check.Equals, int32(1))
  1342  	// check stale tasks are cleaned up
  1343  	statuses, err = s.client.GetAllTaskStatus(ctx, changefeed)
  1344  	c.Assert(err, check.IsNil)
  1345  	c.Assert(len(statuses), check.Equals, 1)
  1346  	c.Assert(statuses, check.HasKey, capture.info.ID)
  1347  	positions, err := s.client.GetAllTaskPositions(ctx, changefeed)
  1348  	c.Assert(err, check.IsNil)
  1349  	c.Assert(len(positions), check.Equals, 1)
  1350  	c.Assert(positions, check.HasKey, capture.info.ID)
  1351  	workloads, err := s.client.GetAllTaskWorkloads(ctx, changefeed)
  1352  	c.Assert(err, check.IsNil)
  1353  	c.Assert(len(workloads), check.Equals, 1)
  1354  	c.Assert(workloads, check.HasKey, capture.info.ID)
  1355  
  1356  	err = capture.etcdClient.Close()
  1357  	c.Assert(err, check.IsNil)
  1358  }
  1359  
  1360  func (s *ownerSuite) TestWatchFeedChange(c *check.C) {
  1361  	defer testleak.AfterTest(c)()
  1362  	defer s.TearDownTest(c)
  1363  
  1364  	ctx, cancel := context.WithCancel(context.Background())
  1365  	defer cancel()
  1366  	addr := "127.0.0.1:12034"
  1367  	ctx = util.PutCaptureAddrInCtx(ctx, addr)
  1368  	capture, err := NewCapture(ctx, []string{s.clientURL.String()}, nil, nil)
  1369  	c.Assert(err, check.IsNil)
  1370  	grpcPool := kv.NewGrpcPoolImpl(ctx, &security.Credential{})
  1371  	defer grpcPool.Close()
  1372  	owner, err := NewOwner(ctx, nil, grpcPool, capture.session,
  1373  		cdcGCSafePointTTL4Test, time.Millisecond*200)
  1374  	c.Assert(err, check.IsNil)
  1375  
  1376  	var (
  1377  		wg              sync.WaitGroup
  1378  		updateCount     = 0
  1379  		recvChangeCount = 0
  1380  	)
  1381  	ctx1, cancel1 := context.WithCancel(ctx)
  1382  	wg.Add(1)
  1383  	go func() {
  1384  		defer wg.Done()
  1385  		changefeedID := "test-changefeed"
  1386  		pos := &model.TaskPosition{CheckPointTs: 100, ResolvedTs: 102}
  1387  		for {
  1388  			select {
  1389  			case <-ctx1.Done():
  1390  				return
  1391  			default:
  1392  			}
  1393  			pos.ResolvedTs++
  1394  			pos.CheckPointTs++
  1395  			updated, err := capture.etcdClient.PutTaskPositionOnChange(ctx1, changefeedID, capture.info.ID, pos)
  1396  			if errors.Cause(err) == context.Canceled {
  1397  				return
  1398  			}
  1399  			c.Assert(err, check.IsNil)
  1400  			c.Assert(updated, check.IsTrue)
  1401  			updateCount++
  1402  			// sleep to avoid other goroutine starvation
  1403  			time.Sleep(time.Millisecond)
  1404  		}
  1405  	}()
  1406  
  1407  	feedChangeReceiver, err := owner.feedChangeNotifier.NewReceiver(ownerRunInterval)
  1408  	c.Assert(err, check.IsNil)
  1409  	defer feedChangeReceiver.Stop()
  1410  	owner.watchFeedChange(ctx)
  1411  	wg.Add(1)
  1412  	go func() {
  1413  		defer func() {
  1414  			// there could be one message remaining in notification receiver, try to consume it
  1415  			select {
  1416  			case <-feedChangeReceiver.C:
  1417  			default:
  1418  			}
  1419  			wg.Done()
  1420  		}()
  1421  		for {
  1422  			select {
  1423  			case <-ctx1.Done():
  1424  				return
  1425  			case <-feedChangeReceiver.C:
  1426  				recvChangeCount++
  1427  				// sleep to simulate some owner work
  1428  				time.Sleep(time.Millisecond * 50)
  1429  			}
  1430  		}
  1431  	}()
  1432  
  1433  	time.Sleep(time.Second * 2)
  1434  	// use cancel1 to avoid cancel the watchFeedChange
  1435  	cancel1()
  1436  	wg.Wait()
  1437  	c.Assert(recvChangeCount, check.Greater, 0)
  1438  	c.Assert(recvChangeCount, check.Less, updateCount)
  1439  	select {
  1440  	case <-feedChangeReceiver.C:
  1441  		c.Error("should not receive message from feed change chan any more")
  1442  	default:
  1443  	}
  1444  
  1445  	err = capture.etcdClient.Close()
  1446  	if err != nil {
  1447  		c.Assert(errors.Cause(err), check.Equals, context.Canceled)
  1448  	}
  1449  }
  1450  
  1451  func (s *ownerSuite) TestWriteDebugInfo(c *check.C) {
  1452  	defer testleak.AfterTest(c)()
  1453  	defer s.TearDownTest(c)
  1454  	owner := &Owner{
  1455  		changeFeeds: map[model.ChangeFeedID]*changeFeed{
  1456  			"test": {
  1457  				id: "test",
  1458  				info: &model.ChangeFeedInfo{
  1459  					SinkURI: "blackhole://",
  1460  					Config:  config.GetDefaultReplicaConfig(),
  1461  				},
  1462  				status: &model.ChangeFeedStatus{
  1463  					ResolvedTs:   120,
  1464  					CheckpointTs: 100,
  1465  				},
  1466  			},
  1467  		},
  1468  		stoppedFeeds: map[model.ChangeFeedID]*model.ChangeFeedStatus{
  1469  			"test-2": {
  1470  				ResolvedTs:   120,
  1471  				CheckpointTs: 100,
  1472  			},
  1473  		},
  1474  		captures: map[model.CaptureID]*model.CaptureInfo{
  1475  			"capture-1": {
  1476  				ID:            "capture-1",
  1477  				AdvertiseAddr: "127.0.0.1:8301",
  1478  			},
  1479  		},
  1480  	}
  1481  	var buf bytes.Buffer
  1482  	owner.writeDebugInfo(&buf)
  1483  	c.Assert(buf.String(), check.Matches, `[\s\S]*active changefeeds[\s\S]*stopped changefeeds[\s\S]*captures[\s\S]*`)
  1484  }