github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/cdc/controller/controller_test.go (about)

     1  // Copyright 2023 PingCAP, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package controller
    15  
    16  import (
    17  	"context"
    18  	"fmt"
    19  	"math"
    20  	"math/rand"
    21  	"testing"
    22  	"time"
    23  
    24  	"github.com/pingcap/tiflow/cdc/model"
    25  	"github.com/pingcap/tiflow/cdc/vars"
    26  	"github.com/pingcap/tiflow/pkg/config"
    27  	"github.com/pingcap/tiflow/pkg/errors"
    28  	"github.com/pingcap/tiflow/pkg/etcd"
    29  	"github.com/pingcap/tiflow/pkg/orchestrator"
    30  	"github.com/pingcap/tiflow/pkg/pdutil"
    31  	"github.com/pingcap/tiflow/pkg/txnutil/gc"
    32  	"github.com/pingcap/tiflow/pkg/upstream"
    33  	"github.com/pingcap/tiflow/pkg/util"
    34  	"github.com/stretchr/testify/require"
    35  	"github.com/tikv/client-go/v2/oracle"
    36  )
    37  
    38  func createController4Test(globalVars *vars.GlobalVars,
    39  	t *testing.T) (*controllerImpl, *orchestrator.GlobalReactorState,
    40  	*orchestrator.ReactorStateTester,
    41  ) {
    42  	pdClient := &gc.MockPDClient{
    43  		UpdateServiceGCSafePointFunc: func(ctx context.Context, serviceID string, ttl int64, safePoint uint64) (uint64, error) {
    44  			return safePoint, nil
    45  		},
    46  	}
    47  
    48  	m := upstream.NewManager4Test(pdClient)
    49  	o := NewController(m, &model.CaptureInfo{}, nil).(*controllerImpl)
    50  
    51  	state := orchestrator.NewGlobalStateForTest(etcd.DefaultCDCClusterID)
    52  	tester := orchestrator.NewReactorStateTester(t, state, nil)
    53  
    54  	// set captures
    55  	cdcKey := etcd.CDCKey{
    56  		ClusterID: state.ClusterID,
    57  		Tp:        etcd.CDCKeyTypeCapture,
    58  		CaptureID: globalVars.CaptureInfo.ID,
    59  	}
    60  	captureBytes, err := globalVars.CaptureInfo.Marshal()
    61  	require.Nil(t, err)
    62  	tester.MustUpdate(cdcKey.String(), captureBytes)
    63  	return o, state, tester
    64  }
    65  
    66  func TestUpdateGCSafePoint(t *testing.T) {
    67  	mockPDClient := &gc.MockPDClient{}
    68  	m := upstream.NewManager4Test(mockPDClient)
    69  	o := NewController(m, &model.CaptureInfo{}, nil).(*controllerImpl)
    70  	ctx, cancel := context.WithCancel(context.Background())
    71  	defer cancel()
    72  	state := orchestrator.NewGlobalStateForTest(etcd.DefaultCDCClusterID)
    73  	tester := orchestrator.NewReactorStateTester(t, state, nil)
    74  
    75  	// no changefeed, the gc safe point should be max uint64
    76  	mockPDClient.UpdateServiceGCSafePointFunc = func(
    77  		ctx context.Context, serviceID string, ttl int64, safePoint uint64,
    78  	) (uint64, error) {
    79  		// Owner will do a snapshot read at (checkpointTs - 1) from TiKV,
    80  		// set GC safepoint to (checkpointTs - 1)
    81  		require.Equal(t, safePoint, uint64(math.MaxUint64-1))
    82  		return 0, nil
    83  	}
    84  
    85  	// add a failed changefeed, it must not trigger update GC safepoint.
    86  	mockPDClient.UpdateServiceGCSafePointFunc = func(
    87  		ctx context.Context, serviceID string, ttl int64, safePoint uint64,
    88  	) (uint64, error) {
    89  		return 0, nil
    90  	}
    91  	changefeedID1 := model.DefaultChangeFeedID("test-changefeed1")
    92  	tester.MustUpdate(
    93  		fmt.Sprintf("%s/changefeed/info/%s",
    94  			etcd.DefaultClusterAndNamespacePrefix,
    95  			changefeedID1.ID),
    96  		[]byte(`{"config":{},"state":"failed"}`))
    97  	tester.MustApplyPatches()
    98  	gcErr := errors.ChangeFeedGCFastFailError[rand.Intn(len(errors.ChangeFeedGCFastFailError))]
    99  	errCode, ok := errors.RFCCode(gcErr)
   100  	require.True(t, ok)
   101  	state.Changefeeds[changefeedID1].PatchInfo(
   102  		func(info *model.ChangeFeedInfo) (*model.ChangeFeedInfo, bool, error) {
   103  			if info == nil {
   104  				return nil, false, nil
   105  			}
   106  			info.Error = &model.RunningError{Code: string(errCode), Message: gcErr.Error()}
   107  			return info, true, nil
   108  		})
   109  	state.Changefeeds[changefeedID1].PatchStatus(
   110  		func(status *model.ChangeFeedStatus) (*model.ChangeFeedStatus, bool, error) {
   111  			return &model.ChangeFeedStatus{CheckpointTs: 2}, true, nil
   112  		})
   113  	tester.MustApplyPatches()
   114  	err := o.updateGCSafepoint(ctx, state)
   115  	require.Nil(t, err)
   116  
   117  	// switch the state of changefeed to normal, it must update GC safepoint to
   118  	// 1 (checkpoint Ts of changefeed-test1).
   119  	ch := make(chan struct{}, 1)
   120  	mockPDClient.UpdateServiceGCSafePointFunc = func(
   121  		ctx context.Context, serviceID string, ttl int64, safePoint uint64,
   122  	) (uint64, error) {
   123  		// Owner will do a snapshot read at (checkpointTs - 1) from TiKV,
   124  		// set GC safepoint to (checkpointTs - 1)
   125  		require.Equal(t, safePoint, uint64(1))
   126  		require.Equal(t, serviceID, etcd.GcServiceIDForTest())
   127  		ch <- struct{}{}
   128  		return 0, nil
   129  	}
   130  	state.Changefeeds[changefeedID1].PatchInfo(
   131  		func(info *model.ChangeFeedInfo) (*model.ChangeFeedInfo, bool, error) {
   132  			info.State = model.StateNormal
   133  			return info, true, nil
   134  		})
   135  	tester.MustApplyPatches()
   136  	err = o.updateGCSafepoint(ctx, state)
   137  	require.Nil(t, err)
   138  	select {
   139  	case <-time.After(5 * time.Second):
   140  		t.Fatal("timeout")
   141  	case <-ch:
   142  	}
   143  
   144  	// add another changefeed, it must update GC safepoint.
   145  	changefeedID2 := model.DefaultChangeFeedID("test-changefeed2")
   146  	tester.MustUpdate(
   147  		fmt.Sprintf("%s/changefeed/info/%s",
   148  			etcd.DefaultClusterAndNamespacePrefix,
   149  			changefeedID2.ID),
   150  		[]byte(`{"config":{},"state":"normal"}`))
   151  	tester.MustApplyPatches()
   152  	state.Changefeeds[changefeedID1].PatchStatus(
   153  		func(status *model.ChangeFeedStatus) (*model.ChangeFeedStatus, bool, error) {
   154  			return &model.ChangeFeedStatus{CheckpointTs: 20}, true, nil
   155  		})
   156  	state.Changefeeds[changefeedID2].PatchStatus(
   157  		func(status *model.ChangeFeedStatus) (*model.ChangeFeedStatus, bool, error) {
   158  			return &model.ChangeFeedStatus{CheckpointTs: 30}, true, nil
   159  		})
   160  	tester.MustApplyPatches()
   161  	mockPDClient.UpdateServiceGCSafePointFunc = func(
   162  		ctx context.Context, serviceID string, ttl int64, safePoint uint64,
   163  	) (uint64, error) {
   164  		// Owner will do a snapshot read at (checkpointTs - 1) from TiKV,
   165  		// set GC safepoint to (checkpointTs - 1)
   166  		require.Equal(t, safePoint, uint64(19))
   167  		require.Equal(t, serviceID, etcd.GcServiceIDForTest())
   168  		ch <- struct{}{}
   169  		return 0, nil
   170  	}
   171  	err = o.updateGCSafepoint(ctx, state)
   172  	require.Nil(t, err)
   173  	select {
   174  	case <-time.After(5 * time.Second):
   175  		t.Fatal("timeout")
   176  	case <-ch:
   177  	}
   178  }
   179  
   180  func TestCalculateGCSafepointTs(t *testing.T) {
   181  	state := orchestrator.NewGlobalStateForTest(etcd.DefaultCDCClusterID)
   182  	expectMinTsMap := make(map[uint64]uint64)
   183  	expectForceUpdateMap := make(map[uint64]interface{})
   184  	o := &controllerImpl{changefeeds: make(map[model.ChangeFeedID]*orchestrator.ChangefeedReactorState)}
   185  	o.upstreamManager = upstream.NewManager4Test(nil)
   186  
   187  	stateMap := []model.FeedState{
   188  		model.StateNormal, model.StateStopped,
   189  		model.StateWarning, model.StatePending,
   190  		model.StateFailed, /* failed changefeed with normal error should not be ignored */
   191  	}
   192  	for i := 0; i < 100; i++ {
   193  		cfID := model.DefaultChangeFeedID(fmt.Sprintf("testChangefeed-%d", i))
   194  		upstreamID := uint64(i / 10)
   195  		cfStatus := &model.ChangeFeedStatus{CheckpointTs: uint64(i) + 100}
   196  		cfInfo := &model.ChangeFeedInfo{UpstreamID: upstreamID, State: stateMap[rand.Intn(4)]}
   197  		if cfInfo.State == model.StateFailed {
   198  			cfInfo.Error = &model.RunningError{
   199  				Addr:    "test",
   200  				Code:    "test",
   201  				Message: "test",
   202  			}
   203  		}
   204  		changefeed := &orchestrator.ChangefeedReactorState{
   205  			ID:     cfID,
   206  			Info:   cfInfo,
   207  			Status: cfStatus,
   208  		}
   209  		state.Changefeeds[cfID] = changefeed
   210  
   211  		// expectMinTsMap will be like map[upstreamID]{0, 10, 20, ..., 90}
   212  		if i%10 == 0 {
   213  			expectMinTsMap[upstreamID] = uint64(i) + 100
   214  		}
   215  
   216  		// If a changefeed does not exist in ownerImpl.changefeeds,
   217  		// forceUpdate should be true.
   218  		if upstreamID%2 == 0 {
   219  			expectForceUpdateMap[upstreamID] = nil
   220  		} else {
   221  			o.changefeeds[cfID] = nil
   222  		}
   223  	}
   224  
   225  	for i := 0; i < 10; i++ {
   226  		cfID := model.DefaultChangeFeedID(fmt.Sprintf("testChangefeed-ignored-%d", i))
   227  		upstreamID := uint64(i)
   228  		cfStatus := &model.ChangeFeedStatus{CheckpointTs: uint64(i)}
   229  		err := errors.ChangeFeedGCFastFailError[rand.Intn(len(errors.ChangeFeedGCFastFailError))]
   230  		errCode, ok := errors.RFCCode(err)
   231  		require.True(t, ok)
   232  		cfInfo := &model.ChangeFeedInfo{
   233  			UpstreamID: upstreamID,
   234  			State:      model.StateFailed,
   235  			Error:      &model.RunningError{Code: string(errCode), Message: err.Error()},
   236  		}
   237  		changefeed := &orchestrator.ChangefeedReactorState{
   238  			ID:     cfID,
   239  			Info:   cfInfo,
   240  			Status: cfStatus,
   241  		}
   242  		state.Changefeeds[cfID] = changefeed
   243  	}
   244  
   245  	minCheckpoinTsMap, forceUpdateMap := o.calculateGCSafepoint(state)
   246  
   247  	require.Equal(t, expectMinTsMap, minCheckpoinTsMap)
   248  	require.Equal(t, expectForceUpdateMap, forceUpdateMap)
   249  }
   250  
   251  func TestCalculateGCSafepointTsNoChangefeed(t *testing.T) {
   252  	state := orchestrator.NewGlobalStateForTest(etcd.DefaultCDCClusterID)
   253  	expectForceUpdateMap := make(map[uint64]interface{})
   254  	o := &controllerImpl{changefeeds: make(map[model.ChangeFeedID]*orchestrator.ChangefeedReactorState)}
   255  	o.upstreamManager = upstream.NewManager4Test(nil)
   256  	up, err := o.upstreamManager.GetDefaultUpstream()
   257  	require.Nil(t, err)
   258  	up.PDClock = pdutil.NewClock4Test()
   259  
   260  	minCheckpoinTsMap, forceUpdateMap := o.calculateGCSafepoint(state)
   261  	require.Equal(t, 1, len(minCheckpoinTsMap))
   262  	require.Equal(t, expectForceUpdateMap, forceUpdateMap)
   263  }
   264  
   265  func TestFixChangefeedState(t *testing.T) {
   266  	globalVars := vars.NewGlobalVars4Test()
   267  	ctx := context.Background()
   268  	controller4Test, state, tester := createController4Test(globalVars, t)
   269  	changefeedID := model.DefaultChangeFeedID("test-changefeed")
   270  	// Mismatched state and admin job.
   271  	changefeedInfo := &model.ChangeFeedInfo{
   272  		State:        model.StateNormal,
   273  		AdminJobType: model.AdminStop,
   274  		StartTs:      oracle.GoTimeToTS(time.Now()),
   275  		Config:       config.GetDefaultReplicaConfig(),
   276  	}
   277  	changefeedStr, err := changefeedInfo.Marshal()
   278  	require.Nil(t, err)
   279  	cdcKey := etcd.CDCKey{
   280  		ClusterID:    state.ClusterID,
   281  		Tp:           etcd.CDCKeyTypeChangefeedInfo,
   282  		ChangefeedID: changefeedID,
   283  	}
   284  	tester.MustUpdate(cdcKey.String(), []byte(changefeedStr))
   285  	// For the first tick, we do a bootstrap, and it tries to fix the meta information.
   286  	_, err = controller4Test.Tick(ctx, state)
   287  	tester.MustApplyPatches()
   288  	require.Nil(t, err)
   289  	require.NotContains(t, controller4Test.changefeeds, changefeedID)
   290  	// Start tick normally.
   291  	_, err = controller4Test.Tick(ctx, state)
   292  	tester.MustApplyPatches()
   293  	require.Nil(t, err)
   294  	require.Contains(t, controller4Test.changefeeds, changefeedID)
   295  	// The meta information is fixed correctly.
   296  	require.Equal(t, controller4Test.changefeeds[changefeedID].Info.State, model.StateStopped)
   297  }
   298  
   299  func TestCheckClusterVersion(t *testing.T) {
   300  	globalVars := vars.NewGlobalVars4Test()
   301  	controller4Test, state, tester := createController4Test(globalVars, t)
   302  	ctx, cancel := context.WithCancel(context.Background())
   303  	defer cancel()
   304  
   305  	tester.MustUpdate(fmt.Sprintf("%s/capture/6bbc01c8-0605-4f86-a0f9-b3119109b225",
   306  		etcd.DefaultClusterAndMetaPrefix),
   307  		[]byte(`{"id":"6bbc01c8-0605-4f86-a0f9-b3119109b225",
   308  "address":"127.0.0.1:8300","version":"v6.0.0"}`))
   309  
   310  	changefeedID := model.DefaultChangeFeedID("test-changefeed")
   311  	changefeedInfo := &model.ChangeFeedInfo{
   312  		StartTs: oracle.GoTimeToTS(time.Now()),
   313  		Config:  config.GetDefaultReplicaConfig(),
   314  	}
   315  	changefeedStr, err := changefeedInfo.Marshal()
   316  	require.Nil(t, err)
   317  	cdcKey := etcd.CDCKey{
   318  		ClusterID:    state.ClusterID,
   319  		Tp:           etcd.CDCKeyTypeChangefeedInfo,
   320  		ChangefeedID: changefeedID,
   321  	}
   322  	tester.MustUpdate(cdcKey.String(), []byte(changefeedStr))
   323  
   324  	// check the tick is skipped and the changefeed will not be handled
   325  	_, err = controller4Test.Tick(ctx, state)
   326  	tester.MustApplyPatches()
   327  	require.Nil(t, err)
   328  	require.NotContains(t, controller4Test.changefeeds, changefeedID)
   329  
   330  	tester.MustUpdate(fmt.Sprintf("%s/capture/6bbc01c8-0605-4f86-a0f9-b3119109b225",
   331  		etcd.DefaultClusterAndMetaPrefix,
   332  	),
   333  		[]byte(`{"id":"6bbc01c8-0605-4f86-a0f9-b3119109b225","address":"127.0.0.1:8300","version":"`+
   334  			globalVars.CaptureInfo.Version+`"}`))
   335  
   336  	// check the tick is not skipped and the changefeed will be handled normally
   337  	_, err = controller4Test.Tick(ctx, state)
   338  	tester.MustApplyPatches()
   339  	require.Nil(t, err)
   340  	require.Contains(t, controller4Test.changefeeds, changefeedID)
   341  }
   342  
   343  func TestFixChangefeedSinkProtocol(t *testing.T) {
   344  	globalVars := vars.NewGlobalVars4Test()
   345  	controller4Test, state, tester := createController4Test(globalVars, t)
   346  	ctx := context.Background()
   347  	changefeedID := model.DefaultChangeFeedID("test-changefeed")
   348  	// Unknown protocol.
   349  	changefeedInfo := &model.ChangeFeedInfo{
   350  		State:          model.StateNormal,
   351  		AdminJobType:   model.AdminStop,
   352  		StartTs:        oracle.GoTimeToTS(time.Now()),
   353  		CreatorVersion: "5.3.0",
   354  		SinkURI:        "kafka://127.0.0.1:9092/ticdc-test2?protocol=random",
   355  		Config: &config.ReplicaConfig{
   356  			Sink: &config.SinkConfig{Protocol: util.AddressOf(config.ProtocolDefault.String())},
   357  		},
   358  	}
   359  	changefeedStr, err := changefeedInfo.Marshal()
   360  	require.Nil(t, err)
   361  	cdcKey := etcd.CDCKey{
   362  		ClusterID:    state.ClusterID,
   363  		Tp:           etcd.CDCKeyTypeChangefeedInfo,
   364  		ChangefeedID: changefeedID,
   365  	}
   366  	tester.MustUpdate(cdcKey.String(), []byte(changefeedStr))
   367  	// For the first tick, we do a bootstrap, and it tries to fix the meta information.
   368  	_, err = controller4Test.Tick(ctx, state)
   369  	tester.MustApplyPatches()
   370  	require.Nil(t, err)
   371  	require.NotContains(t, controller4Test.changefeeds, changefeedID)
   372  
   373  	// Start tick normally.
   374  	_, err = controller4Test.Tick(ctx, state)
   375  	tester.MustApplyPatches()
   376  	require.Nil(t, err)
   377  	require.Contains(t, controller4Test.changefeeds, changefeedID)
   378  	// The meta information is fixed correctly.
   379  	require.Equal(t, controller4Test.changefeeds[changefeedID].Info.SinkURI,
   380  		"kafka://127.0.0.1:9092/ticdc-test2?protocol=open-protocol")
   381  }