github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/dm/worker/task_checker_test.go (about)

     1  // Copyright 2019 PingCAP, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package worker
    15  
    16  import (
    17  	"testing"
    18  	"time"
    19  
    20  	"github.com/pingcap/errors"
    21  	tmysql "github.com/pingcap/tidb/pkg/parser/mysql"
    22  	"github.com/pingcap/tiflow/dm/config"
    23  	"github.com/pingcap/tiflow/dm/pb"
    24  	"github.com/pingcap/tiflow/dm/pkg/backoff"
    25  	"github.com/pingcap/tiflow/dm/pkg/log"
    26  	"github.com/pingcap/tiflow/dm/pkg/terror"
    27  	"github.com/pingcap/tiflow/dm/unit"
    28  	"github.com/stretchr/testify/require"
    29  	"go.uber.org/zap"
    30  )
    31  
    32  var (
    33  	unsupportedModifyColumnError = unit.NewProcessError(terror.ErrDBExecuteFailed.Delegate(&tmysql.SQLError{Code: 1105, Message: "unsupported modify column length 20 is less than origin 40", State: tmysql.DefaultMySQLState}))
    34  	unknownProcessError          = unit.NewProcessError(errors.New("error message"))
    35  )
    36  
    37  func TestResumeStrategy(t *testing.T) {
    38  	require.Equal(t, resumeStrategy2Str[ResumeSkip], ResumeSkip.String())
    39  	require.Equal(t, "unsupported resume strategy: 10000", ResumeStrategy(10000).String())
    40  
    41  	taskName := "test-task"
    42  	now := func(addition time.Duration) time.Time { return time.Now().Add(addition) }
    43  	testCases := []struct {
    44  		status         *pb.SubTaskStatus
    45  		latestResumeFn func(addition time.Duration) time.Time
    46  		addition       time.Duration
    47  		duration       time.Duration
    48  		expected       ResumeStrategy
    49  	}{
    50  		{nil, now, time.Duration(0), 1 * time.Millisecond, ResumeIgnore},
    51  		{&pb.SubTaskStatus{Name: taskName, Stage: pb.Stage_Running}, now, time.Duration(0), 1 * time.Millisecond, ResumeIgnore},
    52  		{&pb.SubTaskStatus{Name: taskName, Stage: pb.Stage_Paused}, now, time.Duration(0), 1 * time.Millisecond, ResumeIgnore},
    53  		{&pb.SubTaskStatus{Name: taskName, Stage: pb.Stage_Paused, Result: &pb.ProcessResult{IsCanceled: true}}, now, time.Duration(0), 1 * time.Millisecond, ResumeIgnore},
    54  		{&pb.SubTaskStatus{Name: taskName, Stage: pb.Stage_Paused, Result: &pb.ProcessResult{IsCanceled: false, Errors: []*pb.ProcessError{unsupportedModifyColumnError}}}, now, time.Duration(0), 1 * time.Millisecond, ResumeNoSense},
    55  		{&pb.SubTaskStatus{Name: taskName, Stage: pb.Stage_Paused, Result: &pb.ProcessResult{IsCanceled: false}}, now, time.Duration(0), 1 * time.Second, ResumeSkip},
    56  		{&pb.SubTaskStatus{Name: taskName, Stage: pb.Stage_Paused, Result: &pb.ProcessResult{IsCanceled: false}}, now, -2 * time.Millisecond, 1 * time.Millisecond, ResumeDispatch},
    57  	}
    58  
    59  	tsc := NewRealTaskStatusChecker(config.CheckerConfig{
    60  		CheckEnable:     true,
    61  		CheckInterval:   config.Duration{Duration: config.DefaultCheckInterval},
    62  		BackoffRollback: config.Duration{Duration: config.DefaultBackoffRollback},
    63  		BackoffMin:      config.Duration{Duration: config.DefaultBackoffMin},
    64  		BackoffMax:      config.Duration{Duration: config.DefaultBackoffMax},
    65  		BackoffFactor:   config.DefaultBackoffFactor,
    66  	}, nil)
    67  	for _, tc := range testCases {
    68  		rtsc, ok := tsc.(*realTaskStatusChecker)
    69  		require.True(t, ok)
    70  		bf, _ := backoff.NewBackoff(
    71  			1,
    72  			false,
    73  			tc.duration,
    74  			tc.duration)
    75  		rtsc.subtaskAutoResume[taskName] = &AutoResumeInfo{
    76  			Backoff:          bf,
    77  			LatestResumeTime: tc.latestResumeFn(tc.addition),
    78  		}
    79  		strategy := rtsc.subtaskAutoResume[taskName].CheckResumeSubtask(tc.status, config.DefaultBackoffRollback)
    80  		require.Equal(t, tc.expected, strategy)
    81  	}
    82  }
    83  
    84  func TestCheck(t *testing.T) {
    85  	var (
    86  		latestResumeTime time.Time
    87  		latestPausedTime time.Time
    88  		latestBlockTime  time.Time
    89  		taskName         = "test-check-task"
    90  	)
    91  
    92  	NewRelayHolder = NewDummyRelayHolder
    93  	dir := t.TempDir()
    94  	cfg := loadSourceConfigWithoutPassword2(t)
    95  	cfg.RelayDir = dir
    96  	cfg.MetaDir = dir
    97  	w, err := NewSourceWorker(cfg, nil, "", "")
    98  	require.NoError(t, err)
    99  	w.closed.Store(false)
   100  
   101  	tsc := NewRealTaskStatusChecker(config.CheckerConfig{
   102  		CheckEnable:     true,
   103  		CheckInterval:   config.Duration{Duration: config.DefaultCheckInterval},
   104  		BackoffRollback: config.Duration{Duration: 200 * time.Millisecond},
   105  		BackoffMin:      config.Duration{Duration: 1 * time.Millisecond},
   106  		BackoffMax:      config.Duration{Duration: 1 * time.Second},
   107  		BackoffFactor:   config.DefaultBackoffFactor,
   108  	}, nil)
   109  	require.NoError(t, tsc.Init())
   110  	rtsc, ok := tsc.(*realTaskStatusChecker)
   111  	require.True(t, ok)
   112  	rtsc.w = w
   113  
   114  	st := &SubTask{
   115  		cfg:   &config.SubTaskConfig{SourceID: "source", Name: taskName},
   116  		stage: pb.Stage_Running,
   117  		l:     log.With(zap.String("subtask", taskName)),
   118  	}
   119  	require.NoError(t, st.cfg.Adjust(false))
   120  	rtsc.w.subTaskHolder.recordSubTask(st)
   121  	rtsc.check()
   122  	bf := rtsc.subtaskAutoResume[taskName].Backoff
   123  
   124  	// test resume with paused task
   125  	st.stage = pb.Stage_Paused
   126  	st.result = &pb.ProcessResult{
   127  		IsCanceled: false,
   128  		Errors:     []*pb.ProcessError{unknownProcessError},
   129  	}
   130  	time.Sleep(1 * time.Millisecond)
   131  	rtsc.check()
   132  	time.Sleep(2 * time.Millisecond)
   133  	rtsc.check()
   134  	time.Sleep(4 * time.Millisecond)
   135  	rtsc.check()
   136  	require.Equal(t, 8*time.Millisecond, bf.Current())
   137  
   138  	// test backoff rollback at least once, as well as resume ignore strategy
   139  	st.result = &pb.ProcessResult{IsCanceled: true}
   140  	time.Sleep(200 * time.Millisecond)
   141  	rtsc.check()
   142  	require.True(t, bf.Current() <= 4*time.Millisecond)
   143  	current := bf.Current()
   144  
   145  	// test no sense strategy
   146  	st.result = &pb.ProcessResult{
   147  		IsCanceled: false,
   148  		Errors:     []*pb.ProcessError{unsupportedModifyColumnError},
   149  	}
   150  	rtsc.check()
   151  	require.True(t, latestPausedTime.Before(rtsc.subtaskAutoResume[taskName].LatestPausedTime))
   152  	latestBlockTime = rtsc.subtaskAutoResume[taskName].LatestBlockTime
   153  	time.Sleep(200 * time.Millisecond)
   154  	rtsc.check()
   155  	require.Equal(t, latestBlockTime, rtsc.subtaskAutoResume[taskName].LatestBlockTime)
   156  	require.Equal(t, current, bf.Current())
   157  
   158  	// test resume skip strategy
   159  	tsc = NewRealTaskStatusChecker(config.CheckerConfig{
   160  		CheckEnable:     true,
   161  		CheckInterval:   config.Duration{Duration: config.DefaultCheckInterval},
   162  		BackoffRollback: config.Duration{Duration: 200 * time.Millisecond},
   163  		BackoffMin:      config.Duration{Duration: 10 * time.Second},
   164  		BackoffMax:      config.Duration{Duration: 100 * time.Second},
   165  		BackoffFactor:   config.DefaultBackoffFactor,
   166  	}, w)
   167  	require.NoError(t, tsc.Init())
   168  	rtsc, ok = tsc.(*realTaskStatusChecker)
   169  	require.True(t, ok)
   170  
   171  	st = &SubTask{
   172  		cfg:   &config.SubTaskConfig{Name: taskName},
   173  		stage: pb.Stage_Running,
   174  		l:     log.With(zap.String("subtask", taskName)),
   175  	}
   176  	rtsc.w.subTaskHolder.recordSubTask(st)
   177  	rtsc.check()
   178  	bf = rtsc.subtaskAutoResume[taskName].Backoff
   179  
   180  	st.stage = pb.Stage_Paused
   181  	st.result = &pb.ProcessResult{
   182  		IsCanceled: false,
   183  		Errors:     []*pb.ProcessError{unknownProcessError},
   184  	}
   185  	rtsc.check()
   186  	latestResumeTime = rtsc.subtaskAutoResume[taskName].LatestResumeTime
   187  	latestPausedTime = rtsc.subtaskAutoResume[taskName].LatestPausedTime
   188  	require.Equal(t, 10*time.Second, bf.Current())
   189  	for i := 0; i < 10; i++ {
   190  		rtsc.check()
   191  		require.Equal(t, latestResumeTime, rtsc.subtaskAutoResume[taskName].LatestResumeTime)
   192  		require.True(t, latestPausedTime.Before(rtsc.subtaskAutoResume[taskName].LatestPausedTime))
   193  		latestPausedTime = rtsc.subtaskAutoResume[taskName].LatestPausedTime
   194  	}
   195  }
   196  
   197  func TestCheckTaskIndependent(t *testing.T) {
   198  	var (
   199  		task1                 = "task1"
   200  		task2                 = "tesk2"
   201  		task1LatestResumeTime time.Time
   202  		task2LatestResumeTime time.Time
   203  		backoffMin            = 5 * time.Millisecond
   204  	)
   205  
   206  	NewRelayHolder = NewDummyRelayHolder
   207  	dir := t.TempDir()
   208  	cfg := loadSourceConfigWithoutPassword2(t)
   209  	cfg.RelayDir = dir
   210  	cfg.MetaDir = dir
   211  	w, err := NewSourceWorker(cfg, nil, "", "")
   212  	require.NoError(t, err)
   213  	w.closed.Store(false)
   214  
   215  	tsc := NewRealTaskStatusChecker(config.CheckerConfig{
   216  		CheckEnable:     true,
   217  		CheckInterval:   config.Duration{Duration: config.DefaultCheckInterval},
   218  		BackoffRollback: config.Duration{Duration: 200 * time.Millisecond},
   219  		BackoffMin:      config.Duration{Duration: backoffMin},
   220  		BackoffMax:      config.Duration{Duration: 10 * time.Second},
   221  		BackoffFactor:   1.0,
   222  	}, nil)
   223  	require.NoError(t, tsc.Init())
   224  	rtsc, ok := tsc.(*realTaskStatusChecker)
   225  	require.True(t, ok)
   226  	rtsc.w = w
   227  
   228  	st1 := &SubTask{
   229  		cfg:   &config.SubTaskConfig{Name: task1},
   230  		stage: pb.Stage_Running,
   231  		l:     log.With(zap.String("subtask", task1)),
   232  	}
   233  	rtsc.w.subTaskHolder.recordSubTask(st1)
   234  	st2 := &SubTask{
   235  		cfg:   &config.SubTaskConfig{Name: task2},
   236  		stage: pb.Stage_Running,
   237  		l:     log.With(zap.String("subtask", task2)),
   238  	}
   239  	rtsc.w.subTaskHolder.recordSubTask(st2)
   240  	rtsc.check()
   241  	require.Len(t, rtsc.subtaskAutoResume, 2)
   242  	for _, times := range rtsc.subtaskAutoResume {
   243  		require.True(t, times.LatestBlockTime.IsZero())
   244  	}
   245  
   246  	// test backoff strategies of different tasks do not affect each other
   247  	st1 = &SubTask{
   248  		cfg:   &config.SubTaskConfig{SourceID: "source", Name: task1},
   249  		stage: pb.Stage_Paused,
   250  		result: &pb.ProcessResult{
   251  			IsCanceled: false,
   252  			Errors:     []*pb.ProcessError{unsupportedModifyColumnError},
   253  		},
   254  		l: log.With(zap.String("subtask", task1)),
   255  	}
   256  	require.NoError(t, st1.cfg.Adjust(false))
   257  	rtsc.w.subTaskHolder.recordSubTask(st1)
   258  	st2 = &SubTask{
   259  		cfg:   &config.SubTaskConfig{SourceID: "source", Name: task2},
   260  		stage: pb.Stage_Paused,
   261  		result: &pb.ProcessResult{
   262  			IsCanceled: false,
   263  			Errors:     []*pb.ProcessError{unknownProcessError},
   264  		},
   265  		l: log.With(zap.String("subtask", task2)),
   266  	}
   267  	require.NoError(t, st2.cfg.Adjust(false))
   268  	rtsc.w.subTaskHolder.recordSubTask(st2)
   269  
   270  	task1LatestResumeTime = rtsc.subtaskAutoResume[task1].LatestResumeTime
   271  	task2LatestResumeTime = rtsc.subtaskAutoResume[task2].LatestResumeTime
   272  	for i := 0; i < 10; i++ {
   273  		time.Sleep(backoffMin)
   274  		rtsc.check()
   275  		require.Equal(t, task1LatestResumeTime, rtsc.subtaskAutoResume[task1].LatestResumeTime)
   276  		require.True(t, task2LatestResumeTime.Before(rtsc.subtaskAutoResume[task2].LatestResumeTime))
   277  		require.False(t, rtsc.subtaskAutoResume[task1].LatestBlockTime.IsZero())
   278  		require.True(t, rtsc.subtaskAutoResume[task2].LatestBlockTime.IsZero())
   279  
   280  		task2LatestResumeTime = rtsc.subtaskAutoResume[task2].LatestResumeTime
   281  	}
   282  
   283  	// test task information cleanup in task status checker
   284  	rtsc.w.subTaskHolder.removeSubTask(task1)
   285  	time.Sleep(backoffMin)
   286  	rtsc.check()
   287  	require.True(t, task2LatestResumeTime.Before(rtsc.subtaskAutoResume[task2].LatestResumeTime))
   288  	require.Len(t, rtsc.subtaskAutoResume, 1)
   289  	require.True(t, rtsc.subtaskAutoResume[task2].LatestBlockTime.IsZero())
   290  }