github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/engine/framework/base_jobmaster_test.go (about)

     1  // Copyright 2022 PingCAP, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package framework
    15  
    16  import (
    17  	"context"
    18  	"net/http"
    19  	"net/http/httptest"
    20  	"sync"
    21  	"testing"
    22  	"time"
    23  
    24  	"github.com/gin-gonic/gin"
    25  	"github.com/golang/mock/gomock"
    26  	frameModel "github.com/pingcap/tiflow/engine/framework/model"
    27  	"github.com/pingcap/tiflow/engine/pkg/client"
    28  	dcontext "github.com/pingcap/tiflow/engine/pkg/context"
    29  	"github.com/pingcap/tiflow/engine/pkg/deps"
    30  	metaMock "github.com/pingcap/tiflow/engine/pkg/meta/mock"
    31  	pkgOrm "github.com/pingcap/tiflow/engine/pkg/orm"
    32  	ormModel "github.com/pingcap/tiflow/engine/pkg/orm/model"
    33  	"github.com/pingcap/tiflow/engine/pkg/p2p"
    34  	"github.com/pingcap/tiflow/engine/pkg/tenant"
    35  	"github.com/pingcap/tiflow/pkg/errors"
    36  	"github.com/stretchr/testify/mock"
    37  	"github.com/stretchr/testify/require"
    38  )
    39  
    40  const (
    41  	jobManagerID = "job-manager"
    42  	jobMasterID  = "my-master"
    43  )
    44  
    45  // testJobMasterImpl is a mock JobMasterImpl used to test
    46  // the correctness of BaseJobMaster.
    47  // TODO move testJobMasterImpl to a separate file
    48  type testJobMasterImpl struct {
    49  	mu sync.Mutex
    50  	mock.Mock
    51  
    52  	base *DefaultBaseJobMaster
    53  }
    54  
    55  var _ JobMasterImpl = (*testJobMasterImpl)(nil)
    56  
    57  func (m *testJobMasterImpl) InitImpl(ctx context.Context) error {
    58  	m.mu.Lock()
    59  	defer m.mu.Unlock()
    60  
    61  	args := m.Called(ctx)
    62  	return args.Error(0)
    63  }
    64  
    65  func (m *testJobMasterImpl) Tick(ctx context.Context) error {
    66  	m.mu.Lock()
    67  	defer m.mu.Unlock()
    68  
    69  	args := m.Called(ctx)
    70  	return args.Error(0)
    71  }
    72  
    73  func (m *testJobMasterImpl) CloseImpl(ctx context.Context) {
    74  	m.mu.Lock()
    75  	defer m.mu.Unlock()
    76  
    77  	m.Called(ctx)
    78  }
    79  
    80  func (m *testJobMasterImpl) StopImpl(ctx context.Context) {
    81  	m.mu.Lock()
    82  	defer m.mu.Unlock()
    83  
    84  	m.Called(ctx)
    85  }
    86  
    87  func (m *testJobMasterImpl) OnMasterRecovered(ctx context.Context) error {
    88  	m.mu.Lock()
    89  	defer m.mu.Unlock()
    90  
    91  	args := m.Called(ctx)
    92  	return args.Error(0)
    93  }
    94  
    95  func (m *testJobMasterImpl) OnWorkerStatusUpdated(worker WorkerHandle, newStatus *frameModel.WorkerStatus) error {
    96  	m.mu.Lock()
    97  	defer m.mu.Unlock()
    98  
    99  	args := m.Called(worker, newStatus)
   100  	return args.Error(0)
   101  }
   102  
   103  func (m *testJobMasterImpl) OnWorkerDispatched(worker WorkerHandle, result error) error {
   104  	m.mu.Lock()
   105  	defer m.mu.Unlock()
   106  
   107  	args := m.Called(worker, result)
   108  	return args.Error(0)
   109  }
   110  
   111  func (m *testJobMasterImpl) OnWorkerOnline(worker WorkerHandle) error {
   112  	m.mu.Lock()
   113  	defer m.mu.Unlock()
   114  
   115  	args := m.Called(worker)
   116  	return args.Error(0)
   117  }
   118  
   119  func (m *testJobMasterImpl) OnWorkerOffline(worker WorkerHandle, reason error) error {
   120  	m.mu.Lock()
   121  	defer m.mu.Unlock()
   122  
   123  	args := m.Called(worker, reason)
   124  	return args.Error(0)
   125  }
   126  
   127  func (m *testJobMasterImpl) OnWorkerMessage(worker WorkerHandle, topic p2p.Topic, message interface{}) error {
   128  	m.mu.Lock()
   129  	defer m.mu.Unlock()
   130  
   131  	args := m.Called(worker, topic, message)
   132  	return args.Error(0)
   133  }
   134  
   135  func (m *testJobMasterImpl) OnOpenAPIInitialized(apiGroup *gin.RouterGroup) {
   136  	apiGroup.GET("/status", func(c *gin.Context) {
   137  		c.String(http.StatusOK, "success")
   138  	})
   139  }
   140  
   141  func (m *testJobMasterImpl) IsJobMasterImpl() {
   142  	panic("unreachable")
   143  }
   144  
   145  func (m *testJobMasterImpl) Status() frameModel.WorkerStatus {
   146  	return frameModel.WorkerStatus{
   147  		State: frameModel.WorkerStateNormal,
   148  	}
   149  }
   150  
   151  func (m *testJobMasterImpl) OnCancel(ctx context.Context) error {
   152  	m.mu.Lock()
   153  	defer m.mu.Unlock()
   154  
   155  	args := m.Called(ctx)
   156  	return args.Error(0)
   157  }
   158  
   159  // simulate the job manager to insert a job record first since job master will only update the job
   160  func prepareInsertJob(ctx context.Context, cli pkgOrm.Client, jobID string) error {
   161  	return cli.UpsertJob(ctx, &frameModel.MasterMeta{
   162  		ID:    jobID,
   163  		State: frameModel.MasterStateUninit,
   164  	})
   165  }
   166  
   167  func newBaseJobMasterForTests(t *testing.T, impl JobMasterImpl) *DefaultBaseJobMaster {
   168  	cli, err := pkgOrm.NewMockClient()
   169  	require.NoError(t, err)
   170  	params := masterParamListForTest{
   171  		MessageHandlerManager: p2p.NewMockMessageHandlerManager(),
   172  		MessageSender:         p2p.NewMockMessageSender(),
   173  		FrameMetaClient:       cli,
   174  		BusinessClientConn:    metaMock.NewMockClientConn(),
   175  		ExecutorGroup:         client.NewMockExecutorGroup(),
   176  		ServerMasterClient:    client.NewMockServerMasterClient(gomock.NewController(t)),
   177  	}
   178  	dp := deps.NewDeps()
   179  	err = dp.Provide(func() masterParamListForTest {
   180  		return params
   181  	})
   182  	require.NoError(t, err)
   183  
   184  	ctx := dcontext.Background()
   185  	epoch, err := params.FrameMetaClient.GenEpoch(ctx)
   186  	require.NoError(t, err)
   187  
   188  	ctx = ctx.WithDeps(dp)
   189  	ctx.Environ.NodeID = "test-node-id"
   190  	ctx.Environ.Addr = "127.0.0.1:10000"
   191  	ctx.ProjectInfo = tenant.TestProjectInfo
   192  	masterMeta := &frameModel.MasterMeta{
   193  		ProjectID: tenant.TestProjectInfo.UniqueID(),
   194  		Addr:      ctx.Environ.Addr,
   195  		NodeID:    ctx.Environ.NodeID,
   196  		ID:        jobMasterID,
   197  		Type:      frameModel.FakeJobMaster,
   198  		Epoch:     epoch,
   199  		State:     frameModel.MasterStateUninit,
   200  	}
   201  	masterMetaBytes, err := masterMeta.Marshal()
   202  	require.NoError(t, err)
   203  	ctx.Environ.MasterMetaBytes = masterMetaBytes
   204  	err = cli.UpsertJob(ctx, masterMeta)
   205  	require.NoError(t, err)
   206  
   207  	return NewBaseJobMaster(
   208  		ctx,
   209  		impl,
   210  		jobManagerID,
   211  		jobMasterID,
   212  		frameModel.FakeTask,
   213  		epoch,
   214  	).(*DefaultBaseJobMaster)
   215  }
   216  
   217  func TestBaseJobMasterBasics(t *testing.T) {
   218  	t.Parallel()
   219  
   220  	jobMaster := &testJobMasterImpl{}
   221  	base := newBaseJobMasterForTests(t, jobMaster)
   222  	jobMaster.base = base
   223  
   224  	ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
   225  	defer cancel()
   226  
   227  	jobMaster.mu.Lock()
   228  	jobMaster.On("InitImpl", mock.Anything).Return(nil)
   229  	jobMaster.mu.Unlock()
   230  
   231  	err := jobMaster.base.Init(ctx)
   232  	require.NoError(t, err)
   233  
   234  	jobMaster.mu.Lock()
   235  	jobMaster.AssertNumberOfCalls(t, "InitImpl", 1)
   236  
   237  	// clean status
   238  	jobMaster.ExpectedCalls = nil
   239  	jobMaster.Calls = nil
   240  
   241  	jobMaster.On("Tick", mock.Anything).Return(nil)
   242  	jobMaster.mu.Unlock()
   243  
   244  	err = jobMaster.base.Poll(ctx)
   245  	require.NoError(t, err)
   246  
   247  	jobMaster.mu.Lock()
   248  	jobMaster.AssertNumberOfCalls(t, "Tick", 1)
   249  
   250  	// clean status
   251  	jobMaster.ExpectedCalls = nil
   252  	jobMaster.Calls = nil
   253  
   254  	jobMaster.On("CloseImpl", mock.Anything).Return()
   255  	jobMaster.mu.Unlock()
   256  
   257  	status := jobMaster.Status()
   258  	err = jobMaster.base.Exit(ctx, ExitReasonFinished, nil, status.ExtBytes)
   259  	require.NoError(t, err)
   260  
   261  	err = jobMaster.base.Close(ctx)
   262  	require.NoError(t, err)
   263  
   264  	jobMaster.mu.Lock()
   265  	jobMaster.AssertNumberOfCalls(t, "CloseImpl", 1)
   266  	jobMaster.mu.Unlock()
   267  }
   268  
   269  func TestOnOpenAPIInitialized(t *testing.T) {
   270  	t.Parallel()
   271  
   272  	jobMaster := &testJobMasterImpl{}
   273  	base := newBaseJobMasterForTests(t, jobMaster)
   274  	jobMaster.base = base
   275  
   276  	engine := gin.New()
   277  	apiGroup := engine.Group("/api/v1/jobs/test")
   278  	base.TriggerOpenAPIInitialize(apiGroup)
   279  
   280  	w := httptest.NewRecorder()
   281  	req := httptest.NewRequest(http.MethodGet, "/api/v1/jobs/test/status", nil)
   282  	engine.ServeHTTP(w, req)
   283  	require.Equal(t, http.StatusOK, w.Code)
   284  	require.Equal(t, "success", w.Body.String())
   285  }
   286  
   287  func TestJobMasterExit(t *testing.T) {
   288  	t.Parallel()
   289  
   290  	cases := []struct {
   291  		exitReason       ExitReason
   292  		err              error
   293  		detail           string
   294  		expectedState    frameModel.MasterState
   295  		expectedErrorMsg string
   296  		expectedDetail   string
   297  	}{
   298  		{
   299  			exitReason:       ExitReasonFinished,
   300  			err:              nil,
   301  			detail:           "test finished",
   302  			expectedState:    frameModel.MasterStateFinished,
   303  			expectedErrorMsg: "",
   304  			expectedDetail:   "test finished",
   305  		},
   306  		{
   307  			exitReason:       ExitReasonFinished,
   308  			err:              errors.New("test finished with error"),
   309  			detail:           "test finished",
   310  			expectedState:    frameModel.MasterStateFinished,
   311  			expectedErrorMsg: "test finished with error",
   312  			expectedDetail:   "test finished",
   313  		},
   314  		{
   315  			exitReason:       ExitReasonCanceled,
   316  			err:              nil,
   317  			detail:           "test canceled",
   318  			expectedState:    frameModel.MasterStateStopped,
   319  			expectedErrorMsg: "",
   320  			expectedDetail:   "test canceled",
   321  		},
   322  		{
   323  			exitReason:       ExitReasonCanceled,
   324  			err:              errors.New("test canceled with error"),
   325  			detail:           "test canceled",
   326  			expectedState:    frameModel.MasterStateStopped,
   327  			expectedErrorMsg: "test canceled with error",
   328  			expectedDetail:   "test canceled",
   329  		},
   330  		{
   331  			exitReason:       ExitReasonFailed,
   332  			err:              nil,
   333  			detail:           "test failed",
   334  			expectedState:    frameModel.MasterStateFailed,
   335  			expectedErrorMsg: "",
   336  			expectedDetail:   "test failed",
   337  		},
   338  		{
   339  			exitReason:       ExitReasonFailed,
   340  			err:              errors.New("test failed with error"),
   341  			detail:           "test failed",
   342  			expectedState:    frameModel.MasterStateFailed,
   343  			expectedErrorMsg: "test failed with error",
   344  			expectedDetail:   "test failed",
   345  		},
   346  	}
   347  
   348  	for _, cs := range cases {
   349  		jobMaster := &testJobMasterImpl{}
   350  		base := newBaseJobMasterForTests(t, jobMaster)
   351  		jobMaster.base = base
   352  		require.Equal(t, jobMasterID, jobMaster.base.ID())
   353  
   354  		ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
   355  		defer cancel()
   356  
   357  		err := prepareInsertJob(ctx, base.master.frameMetaClient, jobMaster.base.ID())
   358  		require.NoError(t, err)
   359  
   360  		jobMaster.mu.Lock()
   361  		jobMaster.On("InitImpl", mock.Anything).Return(nil)
   362  		jobMaster.mu.Unlock()
   363  
   364  		err = jobMaster.base.Init(ctx)
   365  		require.NoError(t, err)
   366  
   367  		metas, err := jobMaster.base.master.frameMetaClient.QueryJobs(ctx)
   368  		require.NoError(t, err)
   369  		require.Len(t, metas, 1)
   370  
   371  		jobMaster.mu.Lock()
   372  		jobMaster.AssertNumberOfCalls(t, "InitImpl", 1)
   373  
   374  		// clean status
   375  		jobMaster.ExpectedCalls = nil
   376  		jobMaster.Calls = nil
   377  
   378  		jobMaster.On("Tick", mock.Anything).Return(nil)
   379  		jobMaster.mu.Unlock()
   380  
   381  		err = jobMaster.base.Poll(ctx)
   382  		require.NoError(t, err)
   383  
   384  		jobMaster.mu.Lock()
   385  		jobMaster.AssertNumberOfCalls(t, "Tick", 1)
   386  
   387  		// clean status
   388  		jobMaster.ExpectedCalls = nil
   389  		jobMaster.Calls = nil
   390  
   391  		jobMaster.On("CloseImpl", mock.Anything).Return()
   392  		jobMaster.mu.Unlock()
   393  
   394  		// test exit status
   395  		err = jobMaster.base.Exit(ctx, cs.exitReason, cs.err, []byte(cs.detail))
   396  		require.NoError(t, err)
   397  		meta, err := jobMaster.base.master.frameMetaClient.GetJobByID(ctx, jobMaster.base.ID())
   398  		require.NoError(t, err)
   399  		require.Equal(t, cs.expectedState, meta.State)
   400  		require.Equal(t, []byte(cs.expectedDetail), meta.Detail)
   401  		err = jobMaster.base.Close(ctx)
   402  		require.NoError(t, err)
   403  
   404  		jobMaster.mu.Lock()
   405  		jobMaster.AssertNumberOfCalls(t, "CloseImpl", 1)
   406  		jobMaster.mu.Unlock()
   407  	}
   408  }
   409  
   410  func TestJobMasterInitReturnError(t *testing.T) {
   411  	t.Parallel()
   412  
   413  	jobMaster := &testJobMasterImpl{}
   414  	base := newBaseJobMasterForTests(t, jobMaster)
   415  	jobMaster.base = base
   416  
   417  	ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
   418  	defer cancel()
   419  
   420  	initError := errors.New("init impl error")
   421  	jobMaster.mu.Lock()
   422  	jobMaster.On("InitImpl", mock.Anything).Return(initError)
   423  	jobMaster.mu.Unlock()
   424  
   425  	err := jobMaster.base.Init(ctx)
   426  	require.Error(t, err)
   427  	require.Equal(t, initError, err)
   428  
   429  	jobMaster.mu.Lock()
   430  	// clean status
   431  	jobMaster.ExpectedCalls = nil
   432  	jobMaster.Calls = nil
   433  	jobMaster.On("CloseImpl", mock.Anything).Return()
   434  	jobMaster.mu.Unlock()
   435  
   436  	err = jobMaster.base.Close(ctx)
   437  	require.NoError(t, err)
   438  
   439  	jobMaster.mu.Lock()
   440  	jobMaster.AssertNumberOfCalls(t, "CloseImpl", 1)
   441  	jobMaster.mu.Unlock()
   442  
   443  	meta, err := jobMaster.base.master.frameMetaClient.GetJobByID(ctx, jobMaster.base.ID())
   444  	require.NoError(t, err)
   445  	require.Equal(t, frameModel.MasterStateUninit, meta.State)
   446  	require.Equal(t, initError.Error(), meta.ErrorMsg)
   447  }
   448  
   449  func TestJobMasterPollReturnError(t *testing.T) {
   450  	t.Parallel()
   451  
   452  	jobMaster := &testJobMasterImpl{}
   453  	base := newBaseJobMasterForTests(t, jobMaster)
   454  	jobMaster.base = base
   455  
   456  	ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
   457  	defer cancel()
   458  
   459  	jobMaster.mu.Lock()
   460  	jobMaster.On("InitImpl", mock.Anything).Return(nil)
   461  	jobMaster.mu.Unlock()
   462  
   463  	err := jobMaster.base.Init(ctx)
   464  	require.NoError(t, err)
   465  
   466  	jobMaster.mu.Lock()
   467  	jobMaster.AssertNumberOfCalls(t, "InitImpl", 1)
   468  	// clean status
   469  	jobMaster.ExpectedCalls = nil
   470  	jobMaster.Calls = nil
   471  	jobMaster.mu.Unlock()
   472  
   473  	pollError := errors.New("master impl poll error")
   474  	jobMaster.mu.Lock()
   475  	jobMaster.On("Tick", mock.Anything).Return(pollError)
   476  	jobMaster.mu.Unlock()
   477  
   478  	err = jobMaster.base.Poll(ctx)
   479  	require.Error(t, err)
   480  	require.Equal(t, pollError, err)
   481  
   482  	jobMaster.mu.Lock()
   483  	// clean status
   484  	jobMaster.ExpectedCalls = nil
   485  	jobMaster.Calls = nil
   486  	jobMaster.On("CloseImpl", mock.Anything).Return()
   487  	jobMaster.mu.Unlock()
   488  
   489  	err = jobMaster.base.Close(ctx)
   490  	require.NoError(t, err)
   491  
   492  	jobMaster.mu.Lock()
   493  	jobMaster.AssertNumberOfCalls(t, "CloseImpl", 1)
   494  	jobMaster.mu.Unlock()
   495  
   496  	meta, err := jobMaster.base.master.frameMetaClient.GetJobByID(ctx, jobMaster.base.ID())
   497  	require.NoError(t, err)
   498  	require.Equal(t, frameModel.MasterStateInit, meta.State)
   499  	require.Equal(t, pollError.Error(), meta.ErrorMsg)
   500  }
   501  
   502  func TestJobMasterExitClearOldError(t *testing.T) {
   503  	t.Parallel()
   504  
   505  	jobMaster := &testJobMasterImpl{}
   506  	base := newBaseJobMasterForTests(t, jobMaster)
   507  	jobMaster.base = base
   508  
   509  	ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
   510  	defer cancel()
   511  
   512  	// simulate job failed in last round, and failover again
   513  	err := jobMaster.base.master.frameMetaClient.UpdateJob(
   514  		ctx, jobMasterID, ormModel.KeyValueMap{
   515  			"state":         frameModel.MasterStateInit,
   516  			"error_message": "error in last period",
   517  		})
   518  	require.NoError(t, err)
   519  
   520  	jobMaster.mu.Lock()
   521  	jobMaster.On("OnMasterRecovered", mock.Anything).Return(nil)
   522  	jobMaster.mu.Unlock()
   523  
   524  	err = jobMaster.base.Init(ctx)
   525  	require.NoError(t, err)
   526  
   527  	jobMaster.mu.Lock()
   528  	jobMaster.AssertNumberOfCalls(t, "OnMasterRecovered", 1)
   529  	// clean status
   530  	jobMaster.ExpectedCalls = nil
   531  	jobMaster.Calls = nil
   532  	jobMaster.mu.Unlock()
   533  
   534  	status := jobMaster.Status()
   535  	jobMaster.base.Exit(ctx, ExitReasonFinished, nil, status.ExtBytes)
   536  	require.NoError(t, err)
   537  
   538  	meta, err := jobMaster.base.master.frameMetaClient.GetJobByID(ctx, jobMaster.base.ID())
   539  	require.NoError(t, err)
   540  	require.Equal(t, frameModel.MasterStateFinished, meta.State)
   541  	require.Equal(t, status.ExtBytes, meta.Detail)
   542  	require.Empty(t, meta.ErrorMsg)
   543  }