github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/engine/framework/base_jobmaster_test.go (about) 1 // Copyright 2022 PingCAP, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package framework 15 16 import ( 17 "context" 18 "net/http" 19 "net/http/httptest" 20 "sync" 21 "testing" 22 "time" 23 24 "github.com/gin-gonic/gin" 25 "github.com/golang/mock/gomock" 26 frameModel "github.com/pingcap/tiflow/engine/framework/model" 27 "github.com/pingcap/tiflow/engine/pkg/client" 28 dcontext "github.com/pingcap/tiflow/engine/pkg/context" 29 "github.com/pingcap/tiflow/engine/pkg/deps" 30 metaMock "github.com/pingcap/tiflow/engine/pkg/meta/mock" 31 pkgOrm "github.com/pingcap/tiflow/engine/pkg/orm" 32 ormModel "github.com/pingcap/tiflow/engine/pkg/orm/model" 33 "github.com/pingcap/tiflow/engine/pkg/p2p" 34 "github.com/pingcap/tiflow/engine/pkg/tenant" 35 "github.com/pingcap/tiflow/pkg/errors" 36 "github.com/stretchr/testify/mock" 37 "github.com/stretchr/testify/require" 38 ) 39 40 const ( 41 jobManagerID = "job-manager" 42 jobMasterID = "my-master" 43 ) 44 45 // testJobMasterImpl is a mock JobMasterImpl used to test 46 // the correctness of BaseJobMaster. 47 // TODO move testJobMasterImpl to a separate file 48 type testJobMasterImpl struct { 49 mu sync.Mutex 50 mock.Mock 51 52 base *DefaultBaseJobMaster 53 } 54 55 var _ JobMasterImpl = (*testJobMasterImpl)(nil) 56 57 func (m *testJobMasterImpl) InitImpl(ctx context.Context) error { 58 m.mu.Lock() 59 defer m.mu.Unlock() 60 61 args := m.Called(ctx) 62 return args.Error(0) 63 } 64 65 func (m *testJobMasterImpl) Tick(ctx context.Context) error { 66 m.mu.Lock() 67 defer m.mu.Unlock() 68 69 args := m.Called(ctx) 70 return args.Error(0) 71 } 72 73 func (m *testJobMasterImpl) CloseImpl(ctx context.Context) { 74 m.mu.Lock() 75 defer m.mu.Unlock() 76 77 m.Called(ctx) 78 } 79 80 func (m *testJobMasterImpl) StopImpl(ctx context.Context) { 81 m.mu.Lock() 82 defer m.mu.Unlock() 83 84 m.Called(ctx) 85 } 86 87 func (m *testJobMasterImpl) OnMasterRecovered(ctx context.Context) error { 88 m.mu.Lock() 89 defer m.mu.Unlock() 90 91 args := m.Called(ctx) 92 return args.Error(0) 93 } 94 95 func (m *testJobMasterImpl) OnWorkerStatusUpdated(worker WorkerHandle, newStatus *frameModel.WorkerStatus) error { 96 m.mu.Lock() 97 defer m.mu.Unlock() 98 99 args := m.Called(worker, newStatus) 100 return args.Error(0) 101 } 102 103 func (m *testJobMasterImpl) OnWorkerDispatched(worker WorkerHandle, result error) error { 104 m.mu.Lock() 105 defer m.mu.Unlock() 106 107 args := m.Called(worker, result) 108 return args.Error(0) 109 } 110 111 func (m *testJobMasterImpl) OnWorkerOnline(worker WorkerHandle) error { 112 m.mu.Lock() 113 defer m.mu.Unlock() 114 115 args := m.Called(worker) 116 return args.Error(0) 117 } 118 119 func (m *testJobMasterImpl) OnWorkerOffline(worker WorkerHandle, reason error) error { 120 m.mu.Lock() 121 defer m.mu.Unlock() 122 123 args := m.Called(worker, reason) 124 return args.Error(0) 125 } 126 127 func (m *testJobMasterImpl) OnWorkerMessage(worker WorkerHandle, topic p2p.Topic, message interface{}) error { 128 m.mu.Lock() 129 defer m.mu.Unlock() 130 131 args := m.Called(worker, topic, message) 132 return args.Error(0) 133 } 134 135 func (m *testJobMasterImpl) OnOpenAPIInitialized(apiGroup *gin.RouterGroup) { 136 apiGroup.GET("/status", func(c *gin.Context) { 137 c.String(http.StatusOK, "success") 138 }) 139 } 140 141 func (m *testJobMasterImpl) IsJobMasterImpl() { 142 panic("unreachable") 143 } 144 145 func (m *testJobMasterImpl) Status() frameModel.WorkerStatus { 146 return frameModel.WorkerStatus{ 147 State: frameModel.WorkerStateNormal, 148 } 149 } 150 151 func (m *testJobMasterImpl) OnCancel(ctx context.Context) error { 152 m.mu.Lock() 153 defer m.mu.Unlock() 154 155 args := m.Called(ctx) 156 return args.Error(0) 157 } 158 159 // simulate the job manager to insert a job record first since job master will only update the job 160 func prepareInsertJob(ctx context.Context, cli pkgOrm.Client, jobID string) error { 161 return cli.UpsertJob(ctx, &frameModel.MasterMeta{ 162 ID: jobID, 163 State: frameModel.MasterStateUninit, 164 }) 165 } 166 167 func newBaseJobMasterForTests(t *testing.T, impl JobMasterImpl) *DefaultBaseJobMaster { 168 cli, err := pkgOrm.NewMockClient() 169 require.NoError(t, err) 170 params := masterParamListForTest{ 171 MessageHandlerManager: p2p.NewMockMessageHandlerManager(), 172 MessageSender: p2p.NewMockMessageSender(), 173 FrameMetaClient: cli, 174 BusinessClientConn: metaMock.NewMockClientConn(), 175 ExecutorGroup: client.NewMockExecutorGroup(), 176 ServerMasterClient: client.NewMockServerMasterClient(gomock.NewController(t)), 177 } 178 dp := deps.NewDeps() 179 err = dp.Provide(func() masterParamListForTest { 180 return params 181 }) 182 require.NoError(t, err) 183 184 ctx := dcontext.Background() 185 epoch, err := params.FrameMetaClient.GenEpoch(ctx) 186 require.NoError(t, err) 187 188 ctx = ctx.WithDeps(dp) 189 ctx.Environ.NodeID = "test-node-id" 190 ctx.Environ.Addr = "127.0.0.1:10000" 191 ctx.ProjectInfo = tenant.TestProjectInfo 192 masterMeta := &frameModel.MasterMeta{ 193 ProjectID: tenant.TestProjectInfo.UniqueID(), 194 Addr: ctx.Environ.Addr, 195 NodeID: ctx.Environ.NodeID, 196 ID: jobMasterID, 197 Type: frameModel.FakeJobMaster, 198 Epoch: epoch, 199 State: frameModel.MasterStateUninit, 200 } 201 masterMetaBytes, err := masterMeta.Marshal() 202 require.NoError(t, err) 203 ctx.Environ.MasterMetaBytes = masterMetaBytes 204 err = cli.UpsertJob(ctx, masterMeta) 205 require.NoError(t, err) 206 207 return NewBaseJobMaster( 208 ctx, 209 impl, 210 jobManagerID, 211 jobMasterID, 212 frameModel.FakeTask, 213 epoch, 214 ).(*DefaultBaseJobMaster) 215 } 216 217 func TestBaseJobMasterBasics(t *testing.T) { 218 t.Parallel() 219 220 jobMaster := &testJobMasterImpl{} 221 base := newBaseJobMasterForTests(t, jobMaster) 222 jobMaster.base = base 223 224 ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) 225 defer cancel() 226 227 jobMaster.mu.Lock() 228 jobMaster.On("InitImpl", mock.Anything).Return(nil) 229 jobMaster.mu.Unlock() 230 231 err := jobMaster.base.Init(ctx) 232 require.NoError(t, err) 233 234 jobMaster.mu.Lock() 235 jobMaster.AssertNumberOfCalls(t, "InitImpl", 1) 236 237 // clean status 238 jobMaster.ExpectedCalls = nil 239 jobMaster.Calls = nil 240 241 jobMaster.On("Tick", mock.Anything).Return(nil) 242 jobMaster.mu.Unlock() 243 244 err = jobMaster.base.Poll(ctx) 245 require.NoError(t, err) 246 247 jobMaster.mu.Lock() 248 jobMaster.AssertNumberOfCalls(t, "Tick", 1) 249 250 // clean status 251 jobMaster.ExpectedCalls = nil 252 jobMaster.Calls = nil 253 254 jobMaster.On("CloseImpl", mock.Anything).Return() 255 jobMaster.mu.Unlock() 256 257 status := jobMaster.Status() 258 err = jobMaster.base.Exit(ctx, ExitReasonFinished, nil, status.ExtBytes) 259 require.NoError(t, err) 260 261 err = jobMaster.base.Close(ctx) 262 require.NoError(t, err) 263 264 jobMaster.mu.Lock() 265 jobMaster.AssertNumberOfCalls(t, "CloseImpl", 1) 266 jobMaster.mu.Unlock() 267 } 268 269 func TestOnOpenAPIInitialized(t *testing.T) { 270 t.Parallel() 271 272 jobMaster := &testJobMasterImpl{} 273 base := newBaseJobMasterForTests(t, jobMaster) 274 jobMaster.base = base 275 276 engine := gin.New() 277 apiGroup := engine.Group("/api/v1/jobs/test") 278 base.TriggerOpenAPIInitialize(apiGroup) 279 280 w := httptest.NewRecorder() 281 req := httptest.NewRequest(http.MethodGet, "/api/v1/jobs/test/status", nil) 282 engine.ServeHTTP(w, req) 283 require.Equal(t, http.StatusOK, w.Code) 284 require.Equal(t, "success", w.Body.String()) 285 } 286 287 func TestJobMasterExit(t *testing.T) { 288 t.Parallel() 289 290 cases := []struct { 291 exitReason ExitReason 292 err error 293 detail string 294 expectedState frameModel.MasterState 295 expectedErrorMsg string 296 expectedDetail string 297 }{ 298 { 299 exitReason: ExitReasonFinished, 300 err: nil, 301 detail: "test finished", 302 expectedState: frameModel.MasterStateFinished, 303 expectedErrorMsg: "", 304 expectedDetail: "test finished", 305 }, 306 { 307 exitReason: ExitReasonFinished, 308 err: errors.New("test finished with error"), 309 detail: "test finished", 310 expectedState: frameModel.MasterStateFinished, 311 expectedErrorMsg: "test finished with error", 312 expectedDetail: "test finished", 313 }, 314 { 315 exitReason: ExitReasonCanceled, 316 err: nil, 317 detail: "test canceled", 318 expectedState: frameModel.MasterStateStopped, 319 expectedErrorMsg: "", 320 expectedDetail: "test canceled", 321 }, 322 { 323 exitReason: ExitReasonCanceled, 324 err: errors.New("test canceled with error"), 325 detail: "test canceled", 326 expectedState: frameModel.MasterStateStopped, 327 expectedErrorMsg: "test canceled with error", 328 expectedDetail: "test canceled", 329 }, 330 { 331 exitReason: ExitReasonFailed, 332 err: nil, 333 detail: "test failed", 334 expectedState: frameModel.MasterStateFailed, 335 expectedErrorMsg: "", 336 expectedDetail: "test failed", 337 }, 338 { 339 exitReason: ExitReasonFailed, 340 err: errors.New("test failed with error"), 341 detail: "test failed", 342 expectedState: frameModel.MasterStateFailed, 343 expectedErrorMsg: "test failed with error", 344 expectedDetail: "test failed", 345 }, 346 } 347 348 for _, cs := range cases { 349 jobMaster := &testJobMasterImpl{} 350 base := newBaseJobMasterForTests(t, jobMaster) 351 jobMaster.base = base 352 require.Equal(t, jobMasterID, jobMaster.base.ID()) 353 354 ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) 355 defer cancel() 356 357 err := prepareInsertJob(ctx, base.master.frameMetaClient, jobMaster.base.ID()) 358 require.NoError(t, err) 359 360 jobMaster.mu.Lock() 361 jobMaster.On("InitImpl", mock.Anything).Return(nil) 362 jobMaster.mu.Unlock() 363 364 err = jobMaster.base.Init(ctx) 365 require.NoError(t, err) 366 367 metas, err := jobMaster.base.master.frameMetaClient.QueryJobs(ctx) 368 require.NoError(t, err) 369 require.Len(t, metas, 1) 370 371 jobMaster.mu.Lock() 372 jobMaster.AssertNumberOfCalls(t, "InitImpl", 1) 373 374 // clean status 375 jobMaster.ExpectedCalls = nil 376 jobMaster.Calls = nil 377 378 jobMaster.On("Tick", mock.Anything).Return(nil) 379 jobMaster.mu.Unlock() 380 381 err = jobMaster.base.Poll(ctx) 382 require.NoError(t, err) 383 384 jobMaster.mu.Lock() 385 jobMaster.AssertNumberOfCalls(t, "Tick", 1) 386 387 // clean status 388 jobMaster.ExpectedCalls = nil 389 jobMaster.Calls = nil 390 391 jobMaster.On("CloseImpl", mock.Anything).Return() 392 jobMaster.mu.Unlock() 393 394 // test exit status 395 err = jobMaster.base.Exit(ctx, cs.exitReason, cs.err, []byte(cs.detail)) 396 require.NoError(t, err) 397 meta, err := jobMaster.base.master.frameMetaClient.GetJobByID(ctx, jobMaster.base.ID()) 398 require.NoError(t, err) 399 require.Equal(t, cs.expectedState, meta.State) 400 require.Equal(t, []byte(cs.expectedDetail), meta.Detail) 401 err = jobMaster.base.Close(ctx) 402 require.NoError(t, err) 403 404 jobMaster.mu.Lock() 405 jobMaster.AssertNumberOfCalls(t, "CloseImpl", 1) 406 jobMaster.mu.Unlock() 407 } 408 } 409 410 func TestJobMasterInitReturnError(t *testing.T) { 411 t.Parallel() 412 413 jobMaster := &testJobMasterImpl{} 414 base := newBaseJobMasterForTests(t, jobMaster) 415 jobMaster.base = base 416 417 ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) 418 defer cancel() 419 420 initError := errors.New("init impl error") 421 jobMaster.mu.Lock() 422 jobMaster.On("InitImpl", mock.Anything).Return(initError) 423 jobMaster.mu.Unlock() 424 425 err := jobMaster.base.Init(ctx) 426 require.Error(t, err) 427 require.Equal(t, initError, err) 428 429 jobMaster.mu.Lock() 430 // clean status 431 jobMaster.ExpectedCalls = nil 432 jobMaster.Calls = nil 433 jobMaster.On("CloseImpl", mock.Anything).Return() 434 jobMaster.mu.Unlock() 435 436 err = jobMaster.base.Close(ctx) 437 require.NoError(t, err) 438 439 jobMaster.mu.Lock() 440 jobMaster.AssertNumberOfCalls(t, "CloseImpl", 1) 441 jobMaster.mu.Unlock() 442 443 meta, err := jobMaster.base.master.frameMetaClient.GetJobByID(ctx, jobMaster.base.ID()) 444 require.NoError(t, err) 445 require.Equal(t, frameModel.MasterStateUninit, meta.State) 446 require.Equal(t, initError.Error(), meta.ErrorMsg) 447 } 448 449 func TestJobMasterPollReturnError(t *testing.T) { 450 t.Parallel() 451 452 jobMaster := &testJobMasterImpl{} 453 base := newBaseJobMasterForTests(t, jobMaster) 454 jobMaster.base = base 455 456 ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) 457 defer cancel() 458 459 jobMaster.mu.Lock() 460 jobMaster.On("InitImpl", mock.Anything).Return(nil) 461 jobMaster.mu.Unlock() 462 463 err := jobMaster.base.Init(ctx) 464 require.NoError(t, err) 465 466 jobMaster.mu.Lock() 467 jobMaster.AssertNumberOfCalls(t, "InitImpl", 1) 468 // clean status 469 jobMaster.ExpectedCalls = nil 470 jobMaster.Calls = nil 471 jobMaster.mu.Unlock() 472 473 pollError := errors.New("master impl poll error") 474 jobMaster.mu.Lock() 475 jobMaster.On("Tick", mock.Anything).Return(pollError) 476 jobMaster.mu.Unlock() 477 478 err = jobMaster.base.Poll(ctx) 479 require.Error(t, err) 480 require.Equal(t, pollError, err) 481 482 jobMaster.mu.Lock() 483 // clean status 484 jobMaster.ExpectedCalls = nil 485 jobMaster.Calls = nil 486 jobMaster.On("CloseImpl", mock.Anything).Return() 487 jobMaster.mu.Unlock() 488 489 err = jobMaster.base.Close(ctx) 490 require.NoError(t, err) 491 492 jobMaster.mu.Lock() 493 jobMaster.AssertNumberOfCalls(t, "CloseImpl", 1) 494 jobMaster.mu.Unlock() 495 496 meta, err := jobMaster.base.master.frameMetaClient.GetJobByID(ctx, jobMaster.base.ID()) 497 require.NoError(t, err) 498 require.Equal(t, frameModel.MasterStateInit, meta.State) 499 require.Equal(t, pollError.Error(), meta.ErrorMsg) 500 } 501 502 func TestJobMasterExitClearOldError(t *testing.T) { 503 t.Parallel() 504 505 jobMaster := &testJobMasterImpl{} 506 base := newBaseJobMasterForTests(t, jobMaster) 507 jobMaster.base = base 508 509 ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) 510 defer cancel() 511 512 // simulate job failed in last round, and failover again 513 err := jobMaster.base.master.frameMetaClient.UpdateJob( 514 ctx, jobMasterID, ormModel.KeyValueMap{ 515 "state": frameModel.MasterStateInit, 516 "error_message": "error in last period", 517 }) 518 require.NoError(t, err) 519 520 jobMaster.mu.Lock() 521 jobMaster.On("OnMasterRecovered", mock.Anything).Return(nil) 522 jobMaster.mu.Unlock() 523 524 err = jobMaster.base.Init(ctx) 525 require.NoError(t, err) 526 527 jobMaster.mu.Lock() 528 jobMaster.AssertNumberOfCalls(t, "OnMasterRecovered", 1) 529 // clean status 530 jobMaster.ExpectedCalls = nil 531 jobMaster.Calls = nil 532 jobMaster.mu.Unlock() 533 534 status := jobMaster.Status() 535 jobMaster.base.Exit(ctx, ExitReasonFinished, nil, status.ExtBytes) 536 require.NoError(t, err) 537 538 meta, err := jobMaster.base.master.frameMetaClient.GetJobByID(ctx, jobMaster.base.ID()) 539 require.NoError(t, err) 540 require.Equal(t, frameModel.MasterStateFinished, meta.State) 541 require.Equal(t, status.ExtBytes, meta.Detail) 542 require.Empty(t, meta.ErrorMsg) 543 }