github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/cdc/owner/feed_state_manager_test.go (about) 1 // Copyright 2021 PingCAP, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package owner 15 16 import ( 17 "context" 18 "fmt" 19 "testing" 20 "time" 21 22 "github.com/cenkalti/backoff/v4" 23 "github.com/pingcap/tiflow/cdc/model" 24 "github.com/pingcap/tiflow/cdc/vars" 25 "github.com/pingcap/tiflow/pkg/config" 26 cerror "github.com/pingcap/tiflow/pkg/errors" 27 "github.com/pingcap/tiflow/pkg/etcd" 28 "github.com/pingcap/tiflow/pkg/orchestrator" 29 "github.com/pingcap/tiflow/pkg/pdutil" 30 "github.com/pingcap/tiflow/pkg/upstream" 31 "github.com/pingcap/tiflow/pkg/util" 32 "github.com/stretchr/testify/require" 33 pd "github.com/tikv/pd/client" 34 ) 35 36 type mockPD struct { 37 pd.Client 38 39 getTs func() (int64, int64, error) 40 } 41 42 func (p *mockPD) GetTS(_ context.Context) (int64, int64, error) { 43 if p.getTs != nil { 44 return p.getTs() 45 } 46 return 1, 2, nil 47 } 48 49 // newFeedStateManager4Test creates feedStateManager for test 50 func newFeedStateManager4Test( 51 initialIntervalInMs, maxIntervalInMs, maxElapsedTimeInMs int, 52 multiplier float64, 53 ) *feedStateManager { 54 f := new(feedStateManager) 55 f.upstream = new(upstream.Upstream) 56 f.upstream.PDClient = &mockPD{} 57 f.upstream.PDClock = pdutil.NewClock4Test() 58 59 f.errBackoff = backoff.NewExponentialBackOff() 60 f.errBackoff.InitialInterval = time.Duration(initialIntervalInMs) * time.Millisecond 61 f.errBackoff.MaxInterval = time.Duration(maxIntervalInMs) * time.Millisecond 62 f.errBackoff.MaxElapsedTime = time.Duration(maxElapsedTimeInMs) * time.Millisecond 63 f.errBackoff.Multiplier = multiplier 64 f.errBackoff.RandomizationFactor = 0 65 66 f.resetErrRetry() 67 68 f.changefeedErrorStuckDuration = time.Second * 3 69 70 return f 71 } 72 73 func TestHandleJob(t *testing.T) { 74 _, changefeedInfo := vars.NewGlobalVarsAndChangefeedInfo4Test() 75 manager := newFeedStateManager4Test(200, 1600, 0, 2.0) 76 state := orchestrator.NewChangefeedReactorState(etcd.DefaultCDCClusterID, 77 model.DefaultChangeFeedID(changefeedInfo.ID)) 78 manager.state = state 79 tester := orchestrator.NewReactorStateTester(t, state, nil) 80 state.PatchInfo(func(info *model.ChangeFeedInfo) (*model.ChangeFeedInfo, bool, error) { 81 require.Nil(t, info) 82 return &model.ChangeFeedInfo{SinkURI: "123", Config: &config.ReplicaConfig{}}, true, nil 83 }) 84 state.PatchStatus(func(status *model.ChangeFeedStatus) (*model.ChangeFeedStatus, bool, error) { 85 require.Nil(t, status) 86 return &model.ChangeFeedStatus{}, true, nil 87 }) 88 tester.MustApplyPatches() 89 manager.Tick(0, state.Status, state.Info) 90 tester.MustApplyPatches() 91 require.True(t, manager.ShouldRunning()) 92 93 // an admin job which of changefeed is not match 94 manager.PushAdminJob(&model.AdminJob{ 95 CfID: model.DefaultChangeFeedID("fake-changefeed-id"), 96 Type: model.AdminStop, 97 }) 98 manager.Tick(0, state.Status, state.Info) 99 tester.MustApplyPatches() 100 require.True(t, manager.ShouldRunning()) 101 102 // a running can not be resume 103 manager.PushAdminJob(&model.AdminJob{ 104 CfID: model.DefaultChangeFeedID(changefeedInfo.ID), 105 Type: model.AdminResume, 106 }) 107 manager.Tick(0, state.Status, state.Info) 108 tester.MustApplyPatches() 109 require.True(t, manager.ShouldRunning()) 110 111 // stop a changefeed 112 manager.PushAdminJob(&model.AdminJob{ 113 CfID: model.DefaultChangeFeedID(changefeedInfo.ID), 114 Type: model.AdminStop, 115 }) 116 manager.Tick(0, state.Status, state.Info) 117 tester.MustApplyPatches() 118 119 require.False(t, manager.ShouldRunning()) 120 require.False(t, manager.ShouldRemoved()) 121 require.Equal(t, state.Info.State, model.StateStopped) 122 require.Equal(t, state.Info.AdminJobType, model.AdminStop) 123 require.Equal(t, state.Status.AdminJobType, model.AdminStop) 124 125 // resume a changefeed 126 manager.PushAdminJob(&model.AdminJob{ 127 CfID: model.DefaultChangeFeedID(changefeedInfo.ID), 128 Type: model.AdminResume, 129 }) 130 manager.Tick(0, state.Status, state.Info) 131 tester.MustApplyPatches() 132 require.True(t, manager.ShouldRunning()) 133 require.False(t, manager.ShouldRemoved()) 134 require.Equal(t, state.Info.State, model.StateNormal) 135 require.Equal(t, state.Info.AdminJobType, model.AdminNone) 136 require.Equal(t, state.Status.AdminJobType, model.AdminNone) 137 138 // remove a changefeed 139 manager.PushAdminJob(&model.AdminJob{ 140 CfID: model.DefaultChangeFeedID(changefeedInfo.ID), 141 Type: model.AdminRemove, 142 }) 143 manager.Tick(0, state.Status, state.Info) 144 tester.MustApplyPatches() 145 146 require.False(t, manager.ShouldRunning()) 147 require.True(t, manager.ShouldRemoved()) 148 require.False(t, state.Exist()) 149 } 150 151 func TestResumeChangefeedWithCheckpointTs(t *testing.T) { 152 globalVars, changefeedInfo := vars.NewGlobalVarsAndChangefeedInfo4Test() 153 manager := newFeedStateManager4Test(200, 1600, 0, 2.0) 154 state := orchestrator.NewChangefeedReactorState(etcd.DefaultCDCClusterID, 155 model.DefaultChangeFeedID(changefeedInfo.ID)) 156 tester := orchestrator.NewReactorStateTester(t, state, nil) 157 state.PatchInfo(func(info *model.ChangeFeedInfo) (*model.ChangeFeedInfo, bool, error) { 158 require.Nil(t, info) 159 return &model.ChangeFeedInfo{SinkURI: "123", Config: &config.ReplicaConfig{}}, true, nil 160 }) 161 state.PatchStatus(func(status *model.ChangeFeedStatus) (*model.ChangeFeedStatus, bool, error) { 162 require.Nil(t, status) 163 return &model.ChangeFeedStatus{}, true, nil 164 }) 165 tester.MustApplyPatches() 166 manager.state = state 167 manager.Tick(0, state.Status, state.Info) 168 tester.MustApplyPatches() 169 require.True(t, manager.ShouldRunning()) 170 171 // stop a changefeed 172 manager.PushAdminJob(&model.AdminJob{ 173 CfID: model.DefaultChangeFeedID(changefeedInfo.ID), 174 Type: model.AdminStop, 175 }) 176 manager.Tick(0, state.Status, state.Info) 177 tester.MustApplyPatches() 178 179 require.False(t, manager.ShouldRunning()) 180 require.False(t, manager.ShouldRemoved()) 181 require.Equal(t, state.Info.State, model.StateStopped) 182 require.Equal(t, state.Info.AdminJobType, model.AdminStop) 183 require.Equal(t, state.Status.AdminJobType, model.AdminStop) 184 185 // resume the changefeed in stopped state 186 manager.PushAdminJob(&model.AdminJob{ 187 CfID: model.DefaultChangeFeedID(changefeedInfo.ID), 188 Type: model.AdminResume, 189 OverwriteCheckpointTs: 100, 190 }) 191 manager.Tick(0, state.Status, state.Info) 192 tester.MustApplyPatches() 193 require.True(t, manager.ShouldRunning()) 194 require.False(t, manager.ShouldRemoved()) 195 require.Equal(t, state.Info.State, model.StateNormal) 196 require.Equal(t, state.Info.AdminJobType, model.AdminNone) 197 require.Equal(t, state.Status.AdminJobType, model.AdminNone) 198 199 // mock a non-retryable error occurs for this changefeed 200 state.PatchTaskPosition(globalVars.CaptureInfo.ID, 201 func(position *model.TaskPosition) (*model.TaskPosition, bool, error) { 202 return &model.TaskPosition{Error: &model.RunningError{ 203 Addr: globalVars.CaptureInfo.AdvertiseAddr, 204 Code: "CDC:ErrStartTsBeforeGC", 205 Message: "fake error for test", 206 }}, true, nil 207 }) 208 tester.MustApplyPatches() 209 manager.Tick(0, state.Status, state.Info) 210 tester.MustApplyPatches() 211 require.Equal(t, state.Info.State, model.StateFailed) 212 require.Equal(t, state.Info.AdminJobType, model.AdminStop) 213 require.Equal(t, state.Status.AdminJobType, model.AdminStop) 214 215 // resume the changefeed in failed state 216 manager.isRetrying = true 217 manager.PushAdminJob(&model.AdminJob{ 218 CfID: model.DefaultChangeFeedID(changefeedInfo.ID), 219 Type: model.AdminResume, 220 OverwriteCheckpointTs: 200, 221 }) 222 manager.Tick(0, state.Status, state.Info) 223 tester.MustApplyPatches() 224 require.True(t, manager.ShouldRunning()) 225 require.False(t, manager.ShouldRemoved()) 226 require.Equal(t, state.Info.State, model.StateNormal) 227 require.Equal(t, state.Info.AdminJobType, model.AdminNone) 228 require.Equal(t, state.Status.AdminJobType, model.AdminNone) 229 require.False(t, manager.isRetrying) 230 } 231 232 func TestMarkFinished(t *testing.T) { 233 _, changefeedInfo := vars.NewGlobalVarsAndChangefeedInfo4Test() 234 manager := newFeedStateManager4Test(200, 1600, 0, 2.0) 235 state := orchestrator.NewChangefeedReactorState(etcd.DefaultCDCClusterID, 236 model.DefaultChangeFeedID(changefeedInfo.ID)) 237 tester := orchestrator.NewReactorStateTester(t, state, nil) 238 state.PatchInfo(func(info *model.ChangeFeedInfo) (*model.ChangeFeedInfo, bool, error) { 239 require.Nil(t, info) 240 return &model.ChangeFeedInfo{SinkURI: "123", Config: &config.ReplicaConfig{}}, true, nil 241 }) 242 state.PatchStatus(func(status *model.ChangeFeedStatus) (*model.ChangeFeedStatus, bool, error) { 243 require.Nil(t, status) 244 return &model.ChangeFeedStatus{}, true, nil 245 }) 246 tester.MustApplyPatches() 247 manager.state = state 248 manager.Tick(0, state.Status, state.Info) 249 tester.MustApplyPatches() 250 require.True(t, manager.ShouldRunning()) 251 252 manager.MarkFinished() 253 manager.Tick(0, state.Status, state.Info) 254 tester.MustApplyPatches() 255 256 require.False(t, manager.ShouldRunning()) 257 require.Equal(t, state.Info.State, model.StateFinished) 258 require.Equal(t, state.Info.AdminJobType, model.AdminFinish) 259 require.Equal(t, state.Status.AdminJobType, model.AdminFinish) 260 } 261 262 func TestCleanUpInfos(t *testing.T) { 263 globalVars, changefeedInfo := vars.NewGlobalVarsAndChangefeedInfo4Test() 264 manager := newFeedStateManager4Test(200, 1600, 0, 2.0) 265 state := orchestrator.NewChangefeedReactorState(etcd.DefaultCDCClusterID, 266 model.DefaultChangeFeedID(changefeedInfo.ID)) 267 tester := orchestrator.NewReactorStateTester(t, state, nil) 268 state.PatchInfo(func(info *model.ChangeFeedInfo) (*model.ChangeFeedInfo, bool, error) { 269 require.Nil(t, info) 270 return &model.ChangeFeedInfo{SinkURI: "123", Config: &config.ReplicaConfig{}}, true, nil 271 }) 272 state.PatchStatus(func(status *model.ChangeFeedStatus) (*model.ChangeFeedStatus, bool, error) { 273 require.Nil(t, status) 274 return &model.ChangeFeedStatus{}, true, nil 275 }) 276 state.PatchTaskPosition(globalVars.CaptureInfo.ID, 277 func(position *model.TaskPosition) (*model.TaskPosition, bool, error) { 278 return &model.TaskPosition{}, true, nil 279 }) 280 tester.MustApplyPatches() 281 require.Contains(t, state.TaskPositions, globalVars.CaptureInfo.ID) 282 manager.state = state 283 manager.Tick(0, state.Status, state.Info) 284 tester.MustApplyPatches() 285 require.True(t, manager.ShouldRunning()) 286 287 manager.MarkFinished() 288 manager.Tick(0, state.Status, state.Info) 289 tester.MustApplyPatches() 290 require.False(t, manager.ShouldRunning()) 291 require.Equal(t, state.Info.State, model.StateFinished) 292 require.Equal(t, state.Info.AdminJobType, model.AdminFinish) 293 require.Equal(t, state.Status.AdminJobType, model.AdminFinish) 294 require.NotContains(t, state.TaskPositions, globalVars.CaptureInfo.ID) 295 } 296 297 func TestHandleError(t *testing.T) { 298 globalVars, changefeedInfo := vars.NewGlobalVarsAndChangefeedInfo4Test() 299 manager := newFeedStateManager4Test(200, 1600, 0, 2.0) 300 state := orchestrator.NewChangefeedReactorState(etcd.DefaultCDCClusterID, 301 model.DefaultChangeFeedID(changefeedInfo.ID)) 302 tester := orchestrator.NewReactorStateTester(t, state, nil) 303 state.PatchInfo(func(info *model.ChangeFeedInfo) (*model.ChangeFeedInfo, bool, error) { 304 require.Nil(t, info) 305 return &model.ChangeFeedInfo{SinkURI: "123", Config: &config.ReplicaConfig{}}, true, nil 306 }) 307 state.PatchStatus(func(status *model.ChangeFeedStatus) (*model.ChangeFeedStatus, bool, error) { 308 require.Nil(t, status) 309 return &model.ChangeFeedStatus{ 310 CheckpointTs: 200, 311 }, true, nil 312 }) 313 314 tester.MustApplyPatches() 315 manager.state = state 316 manager.Tick(0, state.Status, state.Info) 317 tester.MustApplyPatches() 318 319 intervals := []time.Duration{200, 400, 800, 1600, 1600} 320 for i, d := range intervals { 321 intervals[i] = d * time.Millisecond 322 } 323 324 for _, d := range intervals { 325 require.True(t, manager.ShouldRunning()) 326 state.PatchTaskPosition(globalVars.CaptureInfo.ID, 327 func(position *model.TaskPosition) (*model.TaskPosition, bool, error) { 328 return &model.TaskPosition{Error: &model.RunningError{ 329 Addr: globalVars.CaptureInfo.AdvertiseAddr, 330 Code: "[CDC:ErrEtcdSessionDone]", 331 Message: "fake error for test", 332 }}, true, nil 333 }) 334 tester.MustApplyPatches() 335 manager.Tick(0, state.Status, state.Info) 336 tester.MustApplyPatches() 337 require.False(t, manager.ShouldRunning()) 338 require.Equal(t, state.Info.State, model.StatePending) 339 require.Equal(t, state.Info.AdminJobType, model.AdminStop) 340 require.Equal(t, state.Status.AdminJobType, model.AdminStop) 341 time.Sleep(d) 342 manager.Tick(0, state.Status, state.Info) 343 tester.MustApplyPatches() 344 } 345 346 // no error tick, state should be transferred from pending to warning 347 manager.Tick(0, state.Status, state.Info) 348 require.True(t, manager.ShouldRunning()) 349 require.Equal(t, model.StateWarning, state.Info.State) 350 require.Equal(t, model.AdminNone, state.Info.AdminJobType) 351 require.Equal(t, model.AdminNone, state.Status.AdminJobType) 352 353 // no error tick and checkpointTs is progressing, 354 // state should be transferred from warning to normal 355 state.PatchStatus( 356 func(status *model.ChangeFeedStatus) (*model.ChangeFeedStatus, bool, error) { 357 status.CheckpointTs += 1 358 return status, true, nil 359 }) 360 tester.MustApplyPatches() 361 manager.Tick(0, state.Status, state.Info) 362 tester.MustApplyPatches() 363 require.True(t, manager.ShouldRunning()) 364 state.PatchStatus( 365 func(status *model.ChangeFeedStatus) (*model.ChangeFeedStatus, bool, error) { 366 status.CheckpointTs += 1 367 return status, true, nil 368 }) 369 manager.Tick(0, state.Status, state.Info) 370 tester.MustApplyPatches() 371 require.Equal(t, model.StateNormal, state.Info.State) 372 require.Equal(t, model.AdminNone, state.Info.AdminJobType) 373 require.Equal(t, model.AdminNone, state.Status.AdminJobType) 374 } 375 376 func TestHandleFastFailError(t *testing.T) { 377 globalVars, changefeedInfo := vars.NewGlobalVarsAndChangefeedInfo4Test() 378 manager := newFeedStateManager4Test(0, 0, 0, 0) 379 state := orchestrator.NewChangefeedReactorState(etcd.DefaultCDCClusterID, 380 model.DefaultChangeFeedID(changefeedInfo.ID)) 381 tester := orchestrator.NewReactorStateTester(t, state, nil) 382 state.PatchInfo(func(info *model.ChangeFeedInfo) (*model.ChangeFeedInfo, bool, error) { 383 require.Nil(t, info) 384 return &model.ChangeFeedInfo{SinkURI: "123", Config: &config.ReplicaConfig{}}, true, nil 385 }) 386 state.PatchTaskPosition(globalVars.CaptureInfo.ID, 387 func(position *model.TaskPosition) (*model.TaskPosition, bool, error) { 388 return &model.TaskPosition{Error: &model.RunningError{ 389 Addr: globalVars.CaptureInfo.AdvertiseAddr, 390 Code: "CDC:ErrStartTsBeforeGC", 391 Message: "fake error for test", 392 }}, true, nil 393 }) 394 tester.MustApplyPatches() 395 manager.state = state 396 manager.Tick(0, state.Status, state.Info) 397 // test handling fast failed error with non-nil ChangeFeedInfo 398 tester.MustApplyPatches() 399 // test handling fast failed error with nil ChangeFeedInfo 400 // set info to nil when this patch is applied 401 state.PatchInfo(func(info *model.ChangeFeedInfo) (*model.ChangeFeedInfo, bool, error) { 402 return nil, true, nil 403 }) 404 manager.Tick(0, state.Status, state.Info) 405 // When the patches are applied, the callback function of PatchInfo in feedStateManager.HandleError will be called. 406 // At that time, the nil pointer will be checked instead of throwing a panic. See issue #3128 for more detail. 407 tester.MustApplyPatches() 408 } 409 410 func TestHandleErrorWhenChangefeedIsPaused(t *testing.T) { 411 globalVars, changefeedInfo := vars.NewGlobalVarsAndChangefeedInfo4Test() 412 manager := newFeedStateManager4Test(0, 0, 0, 0) 413 manager.state = orchestrator.NewChangefeedReactorState(etcd.DefaultCDCClusterID, 414 model.DefaultChangeFeedID(changefeedInfo.ID)) 415 err := &model.RunningError{ 416 Addr: globalVars.CaptureInfo.AdvertiseAddr, 417 Code: "CDC:ErrReachMaxTry", 418 Message: "fake error for test", 419 } 420 manager.state.(*orchestrator.ChangefeedReactorState).Info = &model.ChangeFeedInfo{ 421 State: model.StateStopped, 422 } 423 manager.HandleError(err) 424 require.Equal(t, model.StateStopped, manager.state.(*orchestrator.ChangefeedReactorState).Info.State) 425 } 426 427 func TestChangefeedStatusNotExist(t *testing.T) { 428 changefeedInfo := ` 429 { 430 "ddlSink-uri": "blackhole:///", 431 "create-time": "2021-06-05T00:44:15.065939487+08:00", 432 "start-ts": 425381670108266496, 433 "target-ts": 0, 434 "admin-job-type": 1, 435 "sort-engine": "unified", 436 "config": { 437 "case-sensitive": true, 438 "force-replicate": false, 439 "check-gc-safe-point": true, 440 "filter": { 441 "rules": [ 442 "*.*" 443 ], 444 "ignore-txn-start-ts": null 445 }, 446 "mounter": { 447 "worker-num": 16 448 }, 449 "ddlSink": { 450 "dispatchers": null, 451 "protocol": "open-protocol" 452 } 453 }, 454 "state": "failed", 455 "history": [], 456 "error": { 457 "addr": "172.16.6.147:8300", 458 "code": "CDC:ErrSnapshotLostByGC", 459 "message": ` + "\"[CDC:ErrSnapshotLostByGC]fail to create or maintain changefeed " + 460 "due to snapshot loss caused by GC. tableCheckpoint-ts 425381670108266496 " + 461 "is earlier than GC safepoint at 0\"" + ` 462 }, 463 "sync-point-enabled": false, 464 "sync-point-interval": 600000000000, 465 "creator-version": "v5.0.0-master-dirty" 466 } 467 ` 468 _, changefeedConfig := vars.NewGlobalVarsAndChangefeedInfo4Test() 469 manager := newFeedStateManager4Test(200, 1600, 0, 2.0) 470 state := orchestrator.NewChangefeedReactorState(etcd.DefaultCDCClusterID, 471 model.DefaultChangeFeedID(changefeedConfig.ID)) 472 tester := orchestrator.NewReactorStateTester(t, state, map[string]string{ 473 fmt.Sprintf("%s/capture/d563bfc0-f406-4f34-bc7d-6dc2e35a44e5", 474 etcd.DefaultClusterAndMetaPrefix): ` 475 {"id":"d563bfc0-f406-4f34-bc7d-6dc2e35a44e5", 476 "address":"172.16.6.147:8300","version":"v5.0.0-master-dirty"}`, 477 fmt.Sprintf("%s/changefeed/info/", 478 etcd.DefaultClusterAndNamespacePrefix) + 479 changefeedConfig.ID: changefeedInfo, 480 fmt.Sprintf("%s/owner/156579d017f84a68", 481 etcd.DefaultClusterAndMetaPrefix, 482 ): "d563bfc0-f406-4f34-bc7d-6dc2e35a44e5", 483 }) 484 manager.state = state 485 manager.Tick(0, state.Status, state.Info) 486 require.False(t, manager.ShouldRunning()) 487 require.False(t, manager.ShouldRemoved()) 488 tester.MustApplyPatches() 489 490 manager.PushAdminJob(&model.AdminJob{ 491 CfID: model.DefaultChangeFeedID(changefeedConfig.ID), 492 Type: model.AdminRemove, 493 }) 494 manager.Tick(0, state.Status, state.Info) 495 require.False(t, manager.ShouldRunning()) 496 require.True(t, manager.ShouldRemoved()) 497 tester.MustApplyPatches() 498 require.Nil(t, state.Info) 499 require.False(t, state.Exist()) 500 } 501 502 func TestChangefeedNotRetry(t *testing.T) { 503 _, changefeedInfo := vars.NewGlobalVarsAndChangefeedInfo4Test() 504 manager := newFeedStateManager4Test(200, 1600, 0, 2.0) 505 state := orchestrator.NewChangefeedReactorState(etcd.DefaultCDCClusterID, 506 model.DefaultChangeFeedID(changefeedInfo.ID)) 507 tester := orchestrator.NewReactorStateTester(t, state, nil) 508 509 // changefeed state normal 510 state.PatchInfo(func(info *model.ChangeFeedInfo) (*model.ChangeFeedInfo, bool, error) { 511 require.Nil(t, info) 512 return &model.ChangeFeedInfo{SinkURI: "123", Config: &config.ReplicaConfig{}, State: model.StateNormal}, true, nil 513 }) 514 tester.MustApplyPatches() 515 manager.state = state 516 manager.Tick(0, state.Status, state.Info) 517 require.True(t, manager.ShouldRunning()) 518 519 // changefeed in error state but error can be retried 520 state.PatchInfo(func(info *model.ChangeFeedInfo) (*model.ChangeFeedInfo, bool, error) { 521 return &model.ChangeFeedInfo{ 522 SinkURI: "123", 523 Config: &config.ReplicaConfig{}, 524 State: model.StateWarning, 525 Error: &model.RunningError{ 526 Addr: "127.0.0.1", 527 Code: "CDC:ErrPipelineTryAgain", 528 Message: "pipeline is full, please try again. Internal use only, " + 529 "report a bug if seen externally", 530 }, 531 }, true, nil 532 }) 533 tester.MustApplyPatches() 534 manager.Tick(0, state.Status, state.Info) 535 require.True(t, manager.ShouldRunning()) 536 537 state.PatchTaskPosition("test", 538 func(position *model.TaskPosition) (*model.TaskPosition, bool, error) { 539 if position == nil { 540 position = &model.TaskPosition{} 541 } 542 position.Error = &model.RunningError{ 543 Time: time.Now(), 544 Addr: "test", 545 Code: "CDC:ErrExpressionColumnNotFound", 546 Message: "what ever", 547 } 548 return position, true, nil 549 }) 550 tester.MustApplyPatches() 551 manager.Tick(0, state.Status, state.Info) 552 require.False(t, manager.ShouldRunning()) 553 554 state.PatchTaskPosition("test", 555 func(position *model.TaskPosition) (*model.TaskPosition, bool, error) { 556 if position == nil { 557 position = &model.TaskPosition{} 558 } 559 position.Error = &model.RunningError{ 560 Addr: "127.0.0.1", 561 Code: string(cerror.ErrExpressionColumnNotFound.RFCCode()), 562 Message: cerror.ErrExpressionColumnNotFound.Error(), 563 } 564 return position, true, nil 565 }) 566 tester.MustApplyPatches() 567 manager.Tick(0, state.Status, state.Info) 568 // should be false 569 require.False(t, manager.ShouldRunning()) 570 571 state.PatchTaskPosition("test", 572 func(position *model.TaskPosition) (*model.TaskPosition, bool, error) { 573 if position == nil { 574 position = &model.TaskPosition{} 575 } 576 position.Error = &model.RunningError{ 577 Addr: "127.0.0.1", 578 Code: string(cerror.ErrExpressionParseFailed.RFCCode()), 579 Message: cerror.ErrExpressionParseFailed.Error(), 580 } 581 return position, true, nil 582 }) 583 tester.MustApplyPatches() 584 manager.Tick(0, state.Status, state.Info) 585 // should be false 586 require.False(t, manager.ShouldRunning()) 587 } 588 589 func TestBackoffStopsUnexpectedly(t *testing.T) { 590 globalVars, changefeedInfo := vars.NewGlobalVarsAndChangefeedInfo4Test() 591 // after 4000ms, the backoff will stop 592 manager := newFeedStateManager4Test(500, 500, 4000, 1.0) 593 state := orchestrator.NewChangefeedReactorState(etcd.DefaultCDCClusterID, 594 model.DefaultChangeFeedID(changefeedInfo.ID)) 595 tester := orchestrator.NewReactorStateTester(t, state, nil) 596 state.PatchInfo(func(info *model.ChangeFeedInfo) (*model.ChangeFeedInfo, bool, error) { 597 require.Nil(t, info) 598 return &model.ChangeFeedInfo{SinkURI: "123", Config: &config.ReplicaConfig{}}, true, nil 599 }) 600 state.PatchStatus(func(status *model.ChangeFeedStatus) (*model.ChangeFeedStatus, bool, error) { 601 require.Nil(t, status) 602 return &model.ChangeFeedStatus{}, true, nil 603 }) 604 605 tester.MustApplyPatches() 606 manager.state = state 607 manager.Tick(0, state.Status, state.Info) 608 tester.MustApplyPatches() 609 610 for i := 1; i <= 10; i++ { 611 if i >= 8 { 612 // after round 8, the maxElapsedTime of backoff will exceed 4000ms, 613 // and NextBackOff() will return -1, so the changefeed state will 614 // never turn into error state. 615 require.Equal(t, state.Info.State, model.StateFailed) 616 require.False(t, manager.ShouldRunning()) 617 } else { 618 if i == 1 { 619 require.Equal(t, model.StateNormal, state.Info.State) 620 } else { 621 require.Equal(t, model.StateWarning, state.Info.State) 622 } 623 require.True(t, manager.ShouldRunning()) 624 state.PatchTaskPosition(globalVars.CaptureInfo.ID, 625 func(position *model.TaskPosition) ( 626 *model.TaskPosition, bool, error, 627 ) { 628 return &model.TaskPosition{Error: &model.RunningError{ 629 Addr: globalVars.CaptureInfo.AdvertiseAddr, 630 Code: "[CDC:ErrEtcdSessionDone]", 631 Message: "fake error for test", 632 }}, true, nil 633 }) 634 tester.MustApplyPatches() 635 manager.Tick(0, state.Status, state.Info) 636 tester.MustApplyPatches() 637 // If an error occurs, backing off from running the task. 638 require.False(t, manager.ShouldRunning()) 639 require.Equal(t, model.StatePending, state.Info.State) 640 require.Equal(t, state.Info.AdminJobType, model.AdminStop) 641 require.Equal(t, state.Status.AdminJobType, model.AdminStop) 642 } 643 644 // 500ms is the backoff interval, so sleep 500ms and after a manager 645 // tick, the changefeed will turn into normal state 646 time.Sleep(500 * time.Millisecond) 647 manager.Tick(0, state.Status, state.Info) 648 tester.MustApplyPatches() 649 } 650 } 651 652 func TestBackoffNeverStops(t *testing.T) { 653 globalVars, changefeedInfo := vars.NewGlobalVarsAndChangefeedInfo4Test() 654 // the backoff will never stop 655 manager := newFeedStateManager4Test(100, 100, 0, 1.0) 656 state := orchestrator.NewChangefeedReactorState(etcd.DefaultCDCClusterID, 657 model.DefaultChangeFeedID(changefeedInfo.ID)) 658 tester := orchestrator.NewReactorStateTester(t, state, nil) 659 state.PatchInfo(func(info *model.ChangeFeedInfo) (*model.ChangeFeedInfo, bool, error) { 660 require.Nil(t, info) 661 return &model.ChangeFeedInfo{SinkURI: "123", Config: &config.ReplicaConfig{}}, true, nil 662 }) 663 state.PatchStatus(func(status *model.ChangeFeedStatus) (*model.ChangeFeedStatus, bool, error) { 664 require.Nil(t, status) 665 return &model.ChangeFeedStatus{}, true, nil 666 }) 667 668 tester.MustApplyPatches() 669 manager.state = state 670 manager.Tick(0, state.Status, state.Info) 671 tester.MustApplyPatches() 672 673 for i := 1; i <= 30; i++ { 674 if i == 1 { 675 require.Equal(t, model.StateNormal, state.Info.State) 676 } else { 677 require.Equal(t, model.StateWarning, state.Info.State) 678 } 679 require.True(t, manager.ShouldRunning()) 680 state.PatchTaskPosition(globalVars.CaptureInfo.ID, 681 func(position *model.TaskPosition) (*model.TaskPosition, bool, error) { 682 return &model.TaskPosition{Error: &model.RunningError{ 683 Addr: globalVars.CaptureInfo.AdvertiseAddr, 684 Code: "[CDC:ErrEtcdSessionDone]", 685 Message: "fake error for test", 686 }}, true, nil 687 }) 688 tester.MustApplyPatches() 689 manager.Tick(0, state.Status, state.Info) 690 tester.MustApplyPatches() 691 require.False(t, manager.ShouldRunning()) 692 require.Equal(t, model.StatePending, state.Info.State) 693 require.Equal(t, state.Info.AdminJobType, model.AdminStop) 694 require.Equal(t, state.Status.AdminJobType, model.AdminStop) 695 // 100ms is the backoff interval, so sleep 100ms and after a manager tick, 696 // the changefeed will turn into normal state 697 time.Sleep(100 * time.Millisecond) 698 manager.Tick(0, state.Status, state.Info) 699 tester.MustApplyPatches() 700 } 701 } 702 703 func TestUpdateChangefeedEpoch(t *testing.T) { 704 globalVars, changefeedInfo := vars.NewGlobalVarsAndChangefeedInfo4Test() 705 // Set a long backoff time 706 manager := newFeedStateManager4Test(int(time.Hour), int(time.Hour), 0, 1.0) 707 state := orchestrator.NewChangefeedReactorState(etcd.DefaultCDCClusterID, 708 model.DefaultChangeFeedID(changefeedInfo.ID)) 709 tester := orchestrator.NewReactorStateTester(t, state, nil) 710 state.PatchInfo(func(info *model.ChangeFeedInfo) (*model.ChangeFeedInfo, bool, error) { 711 require.Nil(t, info) 712 return &model.ChangeFeedInfo{SinkURI: "123", Config: &config.ReplicaConfig{}}, true, nil 713 }) 714 state.PatchStatus(func(status *model.ChangeFeedStatus) (*model.ChangeFeedStatus, bool, error) { 715 require.Nil(t, status) 716 return &model.ChangeFeedStatus{}, true, nil 717 }) 718 719 tester.MustApplyPatches() 720 manager.state = state 721 manager.Tick(0, state.Status, state.Info) 722 tester.MustApplyPatches() 723 require.Equal(t, state.Info.State, model.StateNormal) 724 require.True(t, manager.ShouldRunning()) 725 726 for i := 1; i <= 30; i++ { 727 manager.upstream.PDClient.(*mockPD).getTs = func() (int64, int64, error) { 728 return int64(i), 0, nil 729 } 730 previousEpoch := state.Info.Epoch 731 previousState := state.Info.State 732 state.PatchTaskPosition(globalVars.CaptureInfo.ID, 733 func(position *model.TaskPosition) (*model.TaskPosition, bool, error) { 734 return &model.TaskPosition{Error: &model.RunningError{ 735 Addr: globalVars.CaptureInfo.AdvertiseAddr, 736 Code: "[CDC:ErrEtcdSessionDone]", 737 Message: "fake error for test", 738 }}, true, nil 739 }) 740 tester.MustApplyPatches() 741 manager.Tick(0, state.Status, state.Info) 742 tester.MustApplyPatches() 743 require.False(t, manager.ShouldRunning()) 744 require.Equal(t, model.StatePending, state.Info.State, i) 745 746 require.Equal(t, state.Info.AdminJobType, model.AdminStop) 747 require.Equal(t, state.Status.AdminJobType, model.AdminStop) 748 749 // Epoch only changes when State changes. 750 if previousState == state.Info.State { 751 require.Equal(t, previousEpoch, state.Info.Epoch) 752 } else { 753 require.NotEqual(t, previousEpoch, state.Info.Epoch) 754 } 755 } 756 } 757 758 func TestHandleWarning(t *testing.T) { 759 globalVars, changefeedInfo := vars.NewGlobalVarsAndChangefeedInfo4Test() 760 manager := newFeedStateManager4Test(200, 1600, 0, 2.0) 761 manager.changefeedErrorStuckDuration = 100 * time.Millisecond 762 state := orchestrator.NewChangefeedReactorState(etcd.DefaultCDCClusterID, 763 model.DefaultChangeFeedID(changefeedInfo.ID)) 764 tester := orchestrator.NewReactorStateTester(t, state, nil) 765 state.PatchInfo(func(info *model.ChangeFeedInfo) (*model.ChangeFeedInfo, bool, error) { 766 require.Nil(t, info) 767 return &model.ChangeFeedInfo{SinkURI: "123", Config: &config.ReplicaConfig{}}, true, nil 768 }) 769 state.PatchStatus(func(status *model.ChangeFeedStatus) (*model.ChangeFeedStatus, bool, error) { 770 require.Nil(t, status) 771 return &model.ChangeFeedStatus{ 772 CheckpointTs: 200, 773 }, true, nil 774 }) 775 776 tester.MustApplyPatches() 777 manager.state = state 778 manager.Tick(0, state.Status, state.Info) 779 tester.MustApplyPatches() 780 require.Equal(t, model.StateNormal, state.Info.State) 781 require.True(t, manager.ShouldRunning()) 782 783 // 1. test when an warning occurs, the changefeed state will be changed to warning 784 // and it will still keep running 785 state.PatchTaskPosition(globalVars.CaptureInfo.ID, 786 func(position *model.TaskPosition) (*model.TaskPosition, bool, error) { 787 return &model.TaskPosition{Warning: &model.RunningError{ 788 Addr: globalVars.CaptureInfo.AdvertiseAddr, 789 Code: "[CDC:ErrSinkManagerRunError]", // it is fake error 790 Message: "fake error for test", 791 }}, true, nil 792 }) 793 tester.MustApplyPatches() 794 manager.Tick(0, state.Status, state.Info) 795 // some patches will be generated when the manager.Tick is called 796 // so we need to apply the patches before we check the state 797 tester.MustApplyPatches() 798 require.Equal(t, model.StateWarning, state.Info.State) 799 require.True(t, manager.ShouldRunning()) 800 801 // 2. test when the changefeed is in warning state, and the checkpointTs is not progressing, 802 // the changefeed state will remain warning 803 state.PatchStatus(func(status *model.ChangeFeedStatus) (*model.ChangeFeedStatus, bool, error) { 804 require.NotNil(t, status) 805 return &model.ChangeFeedStatus{ 806 CheckpointTs: 200, 807 }, true, nil 808 }) 809 tester.MustApplyPatches() 810 manager.Tick(0, state.Status, state.Info) 811 tester.MustApplyPatches() 812 require.Equal(t, model.StateWarning, state.Info.State) 813 require.True(t, manager.ShouldRunning()) 814 815 // 3. test when the changefeed is in warning state, and the checkpointTs is progressing, 816 // the changefeed state will be changed to normal 817 state.PatchStatus(func(status *model.ChangeFeedStatus) (*model.ChangeFeedStatus, bool, error) { 818 require.NotNil(t, status) 819 return &model.ChangeFeedStatus{ 820 CheckpointTs: 201, 821 }, true, nil 822 }) 823 tester.MustApplyPatches() 824 manager.Tick(0, state.Status, state.Info) 825 tester.MustApplyPatches() 826 require.Equal(t, model.StateNormal, state.Info.State) 827 require.True(t, manager.ShouldRunning()) 828 829 // 4. test when the changefeed is in warning state, and the checkpointTs is not progressing 830 // for defaultBackoffMaxElapsedTime time, the changefeed state will be changed to failed 831 // and it will stop running 832 state.PatchTaskPosition(globalVars.CaptureInfo.ID, 833 func(position *model.TaskPosition) (*model.TaskPosition, bool, error) { 834 return &model.TaskPosition{Warning: &model.RunningError{ 835 Addr: globalVars.CaptureInfo.AdvertiseAddr, 836 Code: "[CDC:ErrSinkManagerRunError]", // it is fake error 837 Message: "fake error for test", 838 }}, true, nil 839 }) 840 tester.MustApplyPatches() 841 manager.Tick(0, state.Status, state.Info) 842 // some patches will be generated when the manager.Tick is called 843 // so we need to apply the patches before we check the state 844 tester.MustApplyPatches() 845 require.Equal(t, model.StateWarning, state.Info.State) 846 require.True(t, manager.ShouldRunning()) 847 848 state.PatchTaskPosition(globalVars.CaptureInfo.ID, 849 func(position *model.TaskPosition) (*model.TaskPosition, bool, error) { 850 return &model.TaskPosition{Warning: &model.RunningError{ 851 Addr: globalVars.CaptureInfo.AdvertiseAddr, 852 Code: "[CDC:ErrSinkManagerRunError]", // it is fake error 853 Message: "fake error for test", 854 }}, true, nil 855 }) 856 tester.MustApplyPatches() 857 // mock the checkpointTs is not progressing for defaultBackoffMaxElapsedTime time 858 manager.checkpointTsAdvanced = manager. 859 checkpointTsAdvanced.Add(-(manager.changefeedErrorStuckDuration + 1)) 860 // resolveTs = 202 > checkpointTs = 201 861 manager.Tick(202, state.Status, state.Info) 862 // some patches will be generated when the manager.Tick is called 863 // so we need to apply the patches before we check the state 864 tester.MustApplyPatches() 865 require.Equal(t, model.StateFailed, state.Info.State) 866 require.False(t, manager.ShouldRunning()) 867 } 868 869 func TestErrorAfterWarning(t *testing.T) { 870 t.Parallel() 871 872 maxElapsedTimeInMs := 2000 873 globalVars, changefeedInfo := vars.NewGlobalVarsAndChangefeedInfo4Test() 874 manager := newFeedStateManager4Test(200, 1600, maxElapsedTimeInMs, 2.0) 875 state := orchestrator.NewChangefeedReactorState(etcd.DefaultCDCClusterID, 876 model.DefaultChangeFeedID(changefeedInfo.ID)) 877 tester := orchestrator.NewReactorStateTester(t, state, nil) 878 state.PatchInfo(func(info *model.ChangeFeedInfo) (*model.ChangeFeedInfo, bool, error) { 879 require.Nil(t, info) 880 return &model.ChangeFeedInfo{SinkURI: "123", Config: &config.ReplicaConfig{}}, true, nil 881 }) 882 state.PatchStatus(func(status *model.ChangeFeedStatus) (*model.ChangeFeedStatus, bool, error) { 883 require.Nil(t, status) 884 return &model.ChangeFeedStatus{ 885 CheckpointTs: 200, 886 }, true, nil 887 }) 888 889 tester.MustApplyPatches() 890 manager.state = state 891 manager.Tick(0, state.Status, state.Info) 892 tester.MustApplyPatches() 893 require.Equal(t, model.StateNormal, state.Info.State) 894 require.True(t, manager.ShouldRunning()) 895 896 // 1. test when an warning occurs, the changefeed state will be changed to warning 897 // and it will still keep running 898 state.PatchTaskPosition(globalVars.CaptureInfo.ID, 899 func(position *model.TaskPosition) (*model.TaskPosition, bool, error) { 900 return &model.TaskPosition{Warning: &model.RunningError{ 901 Addr: globalVars.CaptureInfo.AdvertiseAddr, 902 Code: "[CDC:ErrSinkManagerRunError]", // it is fake error 903 Message: "fake error for test", 904 }}, true, nil 905 }) 906 tester.MustApplyPatches() 907 manager.Tick(0, state.Status, state.Info) 908 // some patches will be generated when the manager.Tick is called 909 // so we need to apply the patches before we check the state 910 tester.MustApplyPatches() 911 require.Equal(t, model.StateWarning, state.Info.State) 912 require.True(t, manager.ShouldRunning()) 913 914 // 2. test when the changefeed is in warning state, and the checkpointTs is not progressing, 915 // the changefeed state will remain warning 916 state.PatchStatus(func(status *model.ChangeFeedStatus) (*model.ChangeFeedStatus, bool, error) { 917 require.NotNil(t, status) 918 return &model.ChangeFeedStatus{ 919 CheckpointTs: 200, 920 }, true, nil 921 }) 922 tester.MustApplyPatches() 923 manager.Tick(0, state.Status, state.Info) 924 tester.MustApplyPatches() 925 require.Equal(t, model.StateWarning, state.Info.State) 926 require.True(t, manager.ShouldRunning()) 927 928 // 3. Sleep maxElapsedTimeInMs to wait backoff timeout. And when an error occurs after an warning, 929 // the backoff will be reseted, and changefeed state will be changed to warning and it will still 930 // keep running. 931 time.Sleep(time.Millisecond * time.Duration(maxElapsedTimeInMs)) 932 state.PatchTaskPosition(globalVars.CaptureInfo.ID, 933 func(position *model.TaskPosition) (*model.TaskPosition, bool, error) { 934 return &model.TaskPosition{Error: &model.RunningError{ 935 Addr: globalVars.CaptureInfo.AdvertiseAddr, 936 Code: "[CDC:ErrSinkManagerRunError]", // it is fake error 937 Message: "fake error for test", 938 }}, true, nil 939 }) 940 tester.MustApplyPatches() 941 942 manager.Tick(0, state.Status, state.Info) 943 // some patches will be generated when the manager.Tick is called 944 // so we need to apply the patches before we check the state 945 tester.MustApplyPatches() 946 require.Equal(t, model.StatePending, state.Info.State) 947 require.False(t, manager.ShouldRunning()) 948 manager.Tick(0, state.Status, state.Info) 949 950 // some patches will be generated when the manager.Tick is called 951 // so we need to apply the patches before we check the state 952 tester.MustApplyPatches() 953 require.Equal(t, model.StateWarning, state.Info.State) 954 require.True(t, manager.ShouldRunning()) 955 } 956 957 func TestHandleWarningWhileAdvanceResolvedTs(t *testing.T) { 958 t.Parallel() 959 960 maxElapsedTimeInMs := 2000 961 globalVars, changefeedInfo := vars.NewGlobalVarsAndChangefeedInfo4Test() 962 manager := newFeedStateManager4Test(200, 1600, maxElapsedTimeInMs, 2.0) 963 state := orchestrator.NewChangefeedReactorState(etcd.DefaultCDCClusterID, 964 model.DefaultChangeFeedID(changefeedInfo.ID)) 965 manager.state = state 966 tester := orchestrator.NewReactorStateTester(t, state, nil) 967 state.PatchInfo(func(info *model.ChangeFeedInfo) (*model.ChangeFeedInfo, bool, error) { 968 require.Nil(t, info) 969 return &model.ChangeFeedInfo{SinkURI: "123", Config: &config.ReplicaConfig{}}, true, nil 970 }) 971 state.PatchStatus(func(status *model.ChangeFeedStatus) (*model.ChangeFeedStatus, bool, error) { 972 require.Nil(t, status) 973 return &model.ChangeFeedStatus{ 974 CheckpointTs: 200, 975 }, true, nil 976 }) 977 978 tester.MustApplyPatches() 979 manager.Tick(200, state.Status, state.Info) 980 tester.MustApplyPatches() 981 require.Equal(t, model.StateNormal, state.Info.State) 982 require.True(t, manager.ShouldRunning()) 983 984 // 1. test when an warning occurs, the changefeed state will be changed to warning 985 // and it will still keep running 986 state.PatchTaskPosition(globalVars.CaptureInfo.ID, 987 func(position *model.TaskPosition) (*model.TaskPosition, bool, error) { 988 return &model.TaskPosition{Warning: &model.RunningError{ 989 Addr: globalVars.CaptureInfo.AdvertiseAddr, 990 Code: "[CDC:ErrSinkManagerRunError]", // it is fake error 991 Message: "fake error for test", 992 }}, true, nil 993 }) 994 tester.MustApplyPatches() 995 manager.Tick(200, state.Status, state.Info) 996 // some patches will be generated when the manager.Tick is called 997 // so we need to apply the patches before we check the state 998 tester.MustApplyPatches() 999 require.Equal(t, model.StateWarning, state.Info.State) 1000 require.True(t, manager.ShouldRunning()) 1001 1002 // 2. test when the changefeed is in warning state, and the resolvedTs and checkpointTs is not progressing, 1003 // the changefeed state will remain warning when a new warning is encountered. 1004 time.Sleep(manager.changefeedErrorStuckDuration + 10) 1005 state.PatchStatus(func(status *model.ChangeFeedStatus) (*model.ChangeFeedStatus, bool, error) { 1006 require.NotNil(t, status) 1007 return &model.ChangeFeedStatus{ 1008 CheckpointTs: 200, 1009 }, true, nil 1010 }) 1011 state.PatchTaskPosition(globalVars.CaptureInfo.ID, 1012 func(position *model.TaskPosition) (*model.TaskPosition, bool, error) { 1013 return &model.TaskPosition{Warning: &model.RunningError{ 1014 Addr: globalVars.CaptureInfo.AdvertiseAddr, 1015 Code: "[CDC:ErrSinkManagerRunError]", // it is fake error 1016 Message: "fake error for test", 1017 }}, true, nil 1018 }) 1019 tester.MustApplyPatches() 1020 manager.Tick(200, state.Status, state.Info) 1021 tester.MustApplyPatches() 1022 require.Equal(t, model.StateWarning, state.Info.State) 1023 require.True(t, manager.ShouldRunning()) 1024 1025 // 3. Test changefeed remain warning when resolvedTs is progressing after stuck beyond the detection time. 1026 state.PatchStatus(func(status *model.ChangeFeedStatus) (*model.ChangeFeedStatus, bool, error) { 1027 require.NotNil(t, status) 1028 return &model.ChangeFeedStatus{ 1029 CheckpointTs: 200, 1030 }, true, nil 1031 }) 1032 state.PatchTaskPosition(globalVars.CaptureInfo.ID, 1033 func(position *model.TaskPosition) (*model.TaskPosition, bool, error) { 1034 return &model.TaskPosition{Warning: &model.RunningError{ 1035 Addr: globalVars.CaptureInfo.AdvertiseAddr, 1036 Code: "[CDC:ErrSinkManagerRunError]", // it is fake error 1037 Message: "fake error for test", 1038 }}, true, nil 1039 }) 1040 tester.MustApplyPatches() 1041 manager.Tick(400, state.Status, state.Info) 1042 tester.MustApplyPatches() 1043 require.Equal(t, model.StateWarning, state.Info.State) 1044 require.True(t, manager.ShouldRunning()) 1045 1046 // 4. Test changefeed failed when checkpointTs is not progressing for changefeedErrorStuckDuration time. 1047 time.Sleep(manager.changefeedErrorStuckDuration + 10) 1048 state.PatchStatus(func(status *model.ChangeFeedStatus) (*model.ChangeFeedStatus, bool, error) { 1049 require.NotNil(t, status) 1050 return &model.ChangeFeedStatus{ 1051 CheckpointTs: 200, 1052 }, true, nil 1053 }) 1054 state.PatchTaskPosition(globalVars.CaptureInfo.ID, 1055 func(position *model.TaskPosition) (*model.TaskPosition, bool, error) { 1056 return &model.TaskPosition{Warning: &model.RunningError{ 1057 Addr: globalVars.CaptureInfo.AdvertiseAddr, 1058 Code: "[CDC:ErrSinkManagerRunError]", // it is fake error 1059 Message: "fake error for test", 1060 }}, true, nil 1061 }) 1062 tester.MustApplyPatches() 1063 manager.Tick(400, state.Status, state.Info) 1064 tester.MustApplyPatches() 1065 require.Equal(t, model.StateFailed, state.Info.State) 1066 require.False(t, manager.ShouldRunning()) 1067 } 1068 1069 func TestUpdateChangefeedWithChangefeedErrorStuckDuration(t *testing.T) { 1070 globalVars, changefeedInfo := vars.NewGlobalVarsAndChangefeedInfo4Test() 1071 manager := newFeedStateManager4Test(200, 1600, 0, 2.0) 1072 state := orchestrator.NewChangefeedReactorState(etcd.DefaultCDCClusterID, 1073 model.DefaultChangeFeedID(changefeedInfo.ID)) 1074 tester := orchestrator.NewReactorStateTester(t, state, nil) 1075 state.PatchInfo(func(info *model.ChangeFeedInfo) (*model.ChangeFeedInfo, bool, error) { 1076 require.Nil(t, info) 1077 return &model.ChangeFeedInfo{SinkURI: "123", Config: &config.ReplicaConfig{}}, true, nil 1078 }) 1079 state.PatchStatus(func(status *model.ChangeFeedStatus) (*model.ChangeFeedStatus, bool, error) { 1080 require.Nil(t, status) 1081 return &model.ChangeFeedStatus{}, true, nil 1082 }) 1083 tester.MustApplyPatches() 1084 manager.state = state 1085 manager.Tick(0, state.Status, state.Info) 1086 tester.MustApplyPatches() 1087 require.True(t, manager.ShouldRunning()) 1088 1089 stuckDuration := manager.changefeedErrorStuckDuration + time.Second*3 1090 state.PatchTaskPosition(globalVars.CaptureInfo.ID, 1091 func(position *model.TaskPosition) (*model.TaskPosition, bool, error) { 1092 return &model.TaskPosition{Warning: &model.RunningError{ 1093 Addr: globalVars.CaptureInfo.AdvertiseAddr, 1094 Code: "[CDC:ErrSinkManagerRunError]", // it is fake error 1095 Message: "fake error for test", 1096 }}, true, nil 1097 }) 1098 tester.MustApplyPatches() 1099 time.Sleep(stuckDuration - time.Second) 1100 manager.Tick(100, state.Status, state.Info) 1101 tester.MustApplyPatches() 1102 require.False(t, manager.ShouldRunning()) 1103 require.Less(t, manager.changefeedErrorStuckDuration, stuckDuration) 1104 require.Equal(t, state.Info.State, model.StateFailed) 1105 1106 // update ChangefeedErrorStuckDuration 1107 state.PatchInfo(func(info *model.ChangeFeedInfo) (*model.ChangeFeedInfo, bool, error) { 1108 require.NotNil(t, info) 1109 info.Config.ChangefeedErrorStuckDuration = util.AddressOf(stuckDuration) 1110 return info, true, nil 1111 }) 1112 // update status 1113 state.PatchStatus(func(status *model.ChangeFeedStatus) (*model.ChangeFeedStatus, bool, error) { 1114 require.NotNil(t, status) 1115 return &model.ChangeFeedStatus{ 1116 CheckpointTs: 100, 1117 }, true, nil 1118 }) 1119 tester.MustApplyPatches() 1120 1121 // resume the changefeed in failed state 1122 manager.PushAdminJob(&model.AdminJob{ 1123 CfID: model.DefaultChangeFeedID(changefeedInfo.ID), 1124 Type: model.AdminResume, 1125 OverwriteCheckpointTs: 100, 1126 }) 1127 1128 manager.Tick(101, state.Status, state.Info) 1129 tester.MustApplyPatches() 1130 require.True(t, manager.ShouldRunning()) 1131 require.False(t, manager.ShouldRemoved()) 1132 require.Equal(t, manager.changefeedErrorStuckDuration, stuckDuration) 1133 require.Equal(t, state.Info.State, model.StateNormal) 1134 require.Equal(t, state.Info.AdminJobType, model.AdminNone) 1135 require.Equal(t, state.Status.AdminJobType, model.AdminNone) 1136 1137 state.PatchTaskPosition(globalVars.CaptureInfo.ID, 1138 func(position *model.TaskPosition) (*model.TaskPosition, bool, error) { 1139 return &model.TaskPosition{Warning: &model.RunningError{ 1140 Addr: globalVars.CaptureInfo.AdvertiseAddr, 1141 Code: "[CDC:ErrSinkManagerRunError]", // it is fake error 1142 Message: "fake error for test", 1143 }}, true, nil 1144 }) 1145 tester.MustApplyPatches() 1146 1147 time.Sleep(stuckDuration - time.Second) 1148 manager.Tick(200, state.Status, state.Info) 1149 tester.MustApplyPatches() 1150 require.True(t, manager.ShouldRunning()) 1151 require.Equal(t, state.Info.State, model.StateWarning) 1152 1153 state.PatchTaskPosition(globalVars.CaptureInfo.ID, 1154 func(position *model.TaskPosition) (*model.TaskPosition, bool, error) { 1155 return &model.TaskPosition{Warning: &model.RunningError{ 1156 Addr: globalVars.CaptureInfo.AdvertiseAddr, 1157 Code: "[CDC:ErrSinkManagerRunError]", // it is fake error 1158 Message: "fake error for test", 1159 }}, true, nil 1160 }) 1161 tester.MustApplyPatches() 1162 1163 time.Sleep(time.Second) 1164 manager.Tick(201, state.Status, state.Info) 1165 tester.MustApplyPatches() 1166 require.False(t, manager.ShouldRunning()) 1167 require.Equal(t, state.Info.State, model.StateFailed) 1168 }