github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/cdc/controller/controller_test.go (about) 1 // Copyright 2023 PingCAP, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package controller 15 16 import ( 17 "context" 18 "fmt" 19 "math" 20 "math/rand" 21 "testing" 22 "time" 23 24 "github.com/pingcap/tiflow/cdc/model" 25 "github.com/pingcap/tiflow/cdc/vars" 26 "github.com/pingcap/tiflow/pkg/config" 27 "github.com/pingcap/tiflow/pkg/errors" 28 "github.com/pingcap/tiflow/pkg/etcd" 29 "github.com/pingcap/tiflow/pkg/orchestrator" 30 "github.com/pingcap/tiflow/pkg/pdutil" 31 "github.com/pingcap/tiflow/pkg/txnutil/gc" 32 "github.com/pingcap/tiflow/pkg/upstream" 33 "github.com/pingcap/tiflow/pkg/util" 34 "github.com/stretchr/testify/require" 35 "github.com/tikv/client-go/v2/oracle" 36 ) 37 38 func createController4Test(globalVars *vars.GlobalVars, 39 t *testing.T) (*controllerImpl, *orchestrator.GlobalReactorState, 40 *orchestrator.ReactorStateTester, 41 ) { 42 pdClient := &gc.MockPDClient{ 43 UpdateServiceGCSafePointFunc: func(ctx context.Context, serviceID string, ttl int64, safePoint uint64) (uint64, error) { 44 return safePoint, nil 45 }, 46 } 47 48 m := upstream.NewManager4Test(pdClient) 49 o := NewController(m, &model.CaptureInfo{}, nil).(*controllerImpl) 50 51 state := orchestrator.NewGlobalStateForTest(etcd.DefaultCDCClusterID) 52 tester := orchestrator.NewReactorStateTester(t, state, nil) 53 54 // set captures 55 cdcKey := etcd.CDCKey{ 56 ClusterID: state.ClusterID, 57 Tp: etcd.CDCKeyTypeCapture, 58 CaptureID: globalVars.CaptureInfo.ID, 59 } 60 captureBytes, err := globalVars.CaptureInfo.Marshal() 61 require.Nil(t, err) 62 tester.MustUpdate(cdcKey.String(), captureBytes) 63 return o, state, tester 64 } 65 66 func TestUpdateGCSafePoint(t *testing.T) { 67 mockPDClient := &gc.MockPDClient{} 68 m := upstream.NewManager4Test(mockPDClient) 69 o := NewController(m, &model.CaptureInfo{}, nil).(*controllerImpl) 70 ctx, cancel := context.WithCancel(context.Background()) 71 defer cancel() 72 state := orchestrator.NewGlobalStateForTest(etcd.DefaultCDCClusterID) 73 tester := orchestrator.NewReactorStateTester(t, state, nil) 74 75 // no changefeed, the gc safe point should be max uint64 76 mockPDClient.UpdateServiceGCSafePointFunc = func( 77 ctx context.Context, serviceID string, ttl int64, safePoint uint64, 78 ) (uint64, error) { 79 // Owner will do a snapshot read at (checkpointTs - 1) from TiKV, 80 // set GC safepoint to (checkpointTs - 1) 81 require.Equal(t, safePoint, uint64(math.MaxUint64-1)) 82 return 0, nil 83 } 84 85 // add a failed changefeed, it must not trigger update GC safepoint. 86 mockPDClient.UpdateServiceGCSafePointFunc = func( 87 ctx context.Context, serviceID string, ttl int64, safePoint uint64, 88 ) (uint64, error) { 89 return 0, nil 90 } 91 changefeedID1 := model.DefaultChangeFeedID("test-changefeed1") 92 tester.MustUpdate( 93 fmt.Sprintf("%s/changefeed/info/%s", 94 etcd.DefaultClusterAndNamespacePrefix, 95 changefeedID1.ID), 96 []byte(`{"config":{},"state":"failed"}`)) 97 tester.MustApplyPatches() 98 gcErr := errors.ChangeFeedGCFastFailError[rand.Intn(len(errors.ChangeFeedGCFastFailError))] 99 errCode, ok := errors.RFCCode(gcErr) 100 require.True(t, ok) 101 state.Changefeeds[changefeedID1].PatchInfo( 102 func(info *model.ChangeFeedInfo) (*model.ChangeFeedInfo, bool, error) { 103 if info == nil { 104 return nil, false, nil 105 } 106 info.Error = &model.RunningError{Code: string(errCode), Message: gcErr.Error()} 107 return info, true, nil 108 }) 109 state.Changefeeds[changefeedID1].PatchStatus( 110 func(status *model.ChangeFeedStatus) (*model.ChangeFeedStatus, bool, error) { 111 return &model.ChangeFeedStatus{CheckpointTs: 2}, true, nil 112 }) 113 tester.MustApplyPatches() 114 err := o.updateGCSafepoint(ctx, state) 115 require.Nil(t, err) 116 117 // switch the state of changefeed to normal, it must update GC safepoint to 118 // 1 (checkpoint Ts of changefeed-test1). 119 ch := make(chan struct{}, 1) 120 mockPDClient.UpdateServiceGCSafePointFunc = func( 121 ctx context.Context, serviceID string, ttl int64, safePoint uint64, 122 ) (uint64, error) { 123 // Owner will do a snapshot read at (checkpointTs - 1) from TiKV, 124 // set GC safepoint to (checkpointTs - 1) 125 require.Equal(t, safePoint, uint64(1)) 126 require.Equal(t, serviceID, etcd.GcServiceIDForTest()) 127 ch <- struct{}{} 128 return 0, nil 129 } 130 state.Changefeeds[changefeedID1].PatchInfo( 131 func(info *model.ChangeFeedInfo) (*model.ChangeFeedInfo, bool, error) { 132 info.State = model.StateNormal 133 return info, true, nil 134 }) 135 tester.MustApplyPatches() 136 err = o.updateGCSafepoint(ctx, state) 137 require.Nil(t, err) 138 select { 139 case <-time.After(5 * time.Second): 140 t.Fatal("timeout") 141 case <-ch: 142 } 143 144 // add another changefeed, it must update GC safepoint. 145 changefeedID2 := model.DefaultChangeFeedID("test-changefeed2") 146 tester.MustUpdate( 147 fmt.Sprintf("%s/changefeed/info/%s", 148 etcd.DefaultClusterAndNamespacePrefix, 149 changefeedID2.ID), 150 []byte(`{"config":{},"state":"normal"}`)) 151 tester.MustApplyPatches() 152 state.Changefeeds[changefeedID1].PatchStatus( 153 func(status *model.ChangeFeedStatus) (*model.ChangeFeedStatus, bool, error) { 154 return &model.ChangeFeedStatus{CheckpointTs: 20}, true, nil 155 }) 156 state.Changefeeds[changefeedID2].PatchStatus( 157 func(status *model.ChangeFeedStatus) (*model.ChangeFeedStatus, bool, error) { 158 return &model.ChangeFeedStatus{CheckpointTs: 30}, true, nil 159 }) 160 tester.MustApplyPatches() 161 mockPDClient.UpdateServiceGCSafePointFunc = func( 162 ctx context.Context, serviceID string, ttl int64, safePoint uint64, 163 ) (uint64, error) { 164 // Owner will do a snapshot read at (checkpointTs - 1) from TiKV, 165 // set GC safepoint to (checkpointTs - 1) 166 require.Equal(t, safePoint, uint64(19)) 167 require.Equal(t, serviceID, etcd.GcServiceIDForTest()) 168 ch <- struct{}{} 169 return 0, nil 170 } 171 err = o.updateGCSafepoint(ctx, state) 172 require.Nil(t, err) 173 select { 174 case <-time.After(5 * time.Second): 175 t.Fatal("timeout") 176 case <-ch: 177 } 178 } 179 180 func TestCalculateGCSafepointTs(t *testing.T) { 181 state := orchestrator.NewGlobalStateForTest(etcd.DefaultCDCClusterID) 182 expectMinTsMap := make(map[uint64]uint64) 183 expectForceUpdateMap := make(map[uint64]interface{}) 184 o := &controllerImpl{changefeeds: make(map[model.ChangeFeedID]*orchestrator.ChangefeedReactorState)} 185 o.upstreamManager = upstream.NewManager4Test(nil) 186 187 stateMap := []model.FeedState{ 188 model.StateNormal, model.StateStopped, 189 model.StateWarning, model.StatePending, 190 model.StateFailed, /* failed changefeed with normal error should not be ignored */ 191 } 192 for i := 0; i < 100; i++ { 193 cfID := model.DefaultChangeFeedID(fmt.Sprintf("testChangefeed-%d", i)) 194 upstreamID := uint64(i / 10) 195 cfStatus := &model.ChangeFeedStatus{CheckpointTs: uint64(i) + 100} 196 cfInfo := &model.ChangeFeedInfo{UpstreamID: upstreamID, State: stateMap[rand.Intn(4)]} 197 if cfInfo.State == model.StateFailed { 198 cfInfo.Error = &model.RunningError{ 199 Addr: "test", 200 Code: "test", 201 Message: "test", 202 } 203 } 204 changefeed := &orchestrator.ChangefeedReactorState{ 205 ID: cfID, 206 Info: cfInfo, 207 Status: cfStatus, 208 } 209 state.Changefeeds[cfID] = changefeed 210 211 // expectMinTsMap will be like map[upstreamID]{0, 10, 20, ..., 90} 212 if i%10 == 0 { 213 expectMinTsMap[upstreamID] = uint64(i) + 100 214 } 215 216 // If a changefeed does not exist in ownerImpl.changefeeds, 217 // forceUpdate should be true. 218 if upstreamID%2 == 0 { 219 expectForceUpdateMap[upstreamID] = nil 220 } else { 221 o.changefeeds[cfID] = nil 222 } 223 } 224 225 for i := 0; i < 10; i++ { 226 cfID := model.DefaultChangeFeedID(fmt.Sprintf("testChangefeed-ignored-%d", i)) 227 upstreamID := uint64(i) 228 cfStatus := &model.ChangeFeedStatus{CheckpointTs: uint64(i)} 229 err := errors.ChangeFeedGCFastFailError[rand.Intn(len(errors.ChangeFeedGCFastFailError))] 230 errCode, ok := errors.RFCCode(err) 231 require.True(t, ok) 232 cfInfo := &model.ChangeFeedInfo{ 233 UpstreamID: upstreamID, 234 State: model.StateFailed, 235 Error: &model.RunningError{Code: string(errCode), Message: err.Error()}, 236 } 237 changefeed := &orchestrator.ChangefeedReactorState{ 238 ID: cfID, 239 Info: cfInfo, 240 Status: cfStatus, 241 } 242 state.Changefeeds[cfID] = changefeed 243 } 244 245 minCheckpoinTsMap, forceUpdateMap := o.calculateGCSafepoint(state) 246 247 require.Equal(t, expectMinTsMap, minCheckpoinTsMap) 248 require.Equal(t, expectForceUpdateMap, forceUpdateMap) 249 } 250 251 func TestCalculateGCSafepointTsNoChangefeed(t *testing.T) { 252 state := orchestrator.NewGlobalStateForTest(etcd.DefaultCDCClusterID) 253 expectForceUpdateMap := make(map[uint64]interface{}) 254 o := &controllerImpl{changefeeds: make(map[model.ChangeFeedID]*orchestrator.ChangefeedReactorState)} 255 o.upstreamManager = upstream.NewManager4Test(nil) 256 up, err := o.upstreamManager.GetDefaultUpstream() 257 require.Nil(t, err) 258 up.PDClock = pdutil.NewClock4Test() 259 260 minCheckpoinTsMap, forceUpdateMap := o.calculateGCSafepoint(state) 261 require.Equal(t, 1, len(minCheckpoinTsMap)) 262 require.Equal(t, expectForceUpdateMap, forceUpdateMap) 263 } 264 265 func TestFixChangefeedState(t *testing.T) { 266 globalVars := vars.NewGlobalVars4Test() 267 ctx := context.Background() 268 controller4Test, state, tester := createController4Test(globalVars, t) 269 changefeedID := model.DefaultChangeFeedID("test-changefeed") 270 // Mismatched state and admin job. 271 changefeedInfo := &model.ChangeFeedInfo{ 272 State: model.StateNormal, 273 AdminJobType: model.AdminStop, 274 StartTs: oracle.GoTimeToTS(time.Now()), 275 Config: config.GetDefaultReplicaConfig(), 276 } 277 changefeedStr, err := changefeedInfo.Marshal() 278 require.Nil(t, err) 279 cdcKey := etcd.CDCKey{ 280 ClusterID: state.ClusterID, 281 Tp: etcd.CDCKeyTypeChangefeedInfo, 282 ChangefeedID: changefeedID, 283 } 284 tester.MustUpdate(cdcKey.String(), []byte(changefeedStr)) 285 // For the first tick, we do a bootstrap, and it tries to fix the meta information. 286 _, err = controller4Test.Tick(ctx, state) 287 tester.MustApplyPatches() 288 require.Nil(t, err) 289 require.NotContains(t, controller4Test.changefeeds, changefeedID) 290 // Start tick normally. 291 _, err = controller4Test.Tick(ctx, state) 292 tester.MustApplyPatches() 293 require.Nil(t, err) 294 require.Contains(t, controller4Test.changefeeds, changefeedID) 295 // The meta information is fixed correctly. 296 require.Equal(t, controller4Test.changefeeds[changefeedID].Info.State, model.StateStopped) 297 } 298 299 func TestCheckClusterVersion(t *testing.T) { 300 globalVars := vars.NewGlobalVars4Test() 301 controller4Test, state, tester := createController4Test(globalVars, t) 302 ctx, cancel := context.WithCancel(context.Background()) 303 defer cancel() 304 305 tester.MustUpdate(fmt.Sprintf("%s/capture/6bbc01c8-0605-4f86-a0f9-b3119109b225", 306 etcd.DefaultClusterAndMetaPrefix), 307 []byte(`{"id":"6bbc01c8-0605-4f86-a0f9-b3119109b225", 308 "address":"127.0.0.1:8300","version":"v6.0.0"}`)) 309 310 changefeedID := model.DefaultChangeFeedID("test-changefeed") 311 changefeedInfo := &model.ChangeFeedInfo{ 312 StartTs: oracle.GoTimeToTS(time.Now()), 313 Config: config.GetDefaultReplicaConfig(), 314 } 315 changefeedStr, err := changefeedInfo.Marshal() 316 require.Nil(t, err) 317 cdcKey := etcd.CDCKey{ 318 ClusterID: state.ClusterID, 319 Tp: etcd.CDCKeyTypeChangefeedInfo, 320 ChangefeedID: changefeedID, 321 } 322 tester.MustUpdate(cdcKey.String(), []byte(changefeedStr)) 323 324 // check the tick is skipped and the changefeed will not be handled 325 _, err = controller4Test.Tick(ctx, state) 326 tester.MustApplyPatches() 327 require.Nil(t, err) 328 require.NotContains(t, controller4Test.changefeeds, changefeedID) 329 330 tester.MustUpdate(fmt.Sprintf("%s/capture/6bbc01c8-0605-4f86-a0f9-b3119109b225", 331 etcd.DefaultClusterAndMetaPrefix, 332 ), 333 []byte(`{"id":"6bbc01c8-0605-4f86-a0f9-b3119109b225","address":"127.0.0.1:8300","version":"`+ 334 globalVars.CaptureInfo.Version+`"}`)) 335 336 // check the tick is not skipped and the changefeed will be handled normally 337 _, err = controller4Test.Tick(ctx, state) 338 tester.MustApplyPatches() 339 require.Nil(t, err) 340 require.Contains(t, controller4Test.changefeeds, changefeedID) 341 } 342 343 func TestFixChangefeedSinkProtocol(t *testing.T) { 344 globalVars := vars.NewGlobalVars4Test() 345 controller4Test, state, tester := createController4Test(globalVars, t) 346 ctx := context.Background() 347 changefeedID := model.DefaultChangeFeedID("test-changefeed") 348 // Unknown protocol. 349 changefeedInfo := &model.ChangeFeedInfo{ 350 State: model.StateNormal, 351 AdminJobType: model.AdminStop, 352 StartTs: oracle.GoTimeToTS(time.Now()), 353 CreatorVersion: "5.3.0", 354 SinkURI: "kafka://127.0.0.1:9092/ticdc-test2?protocol=random", 355 Config: &config.ReplicaConfig{ 356 Sink: &config.SinkConfig{Protocol: util.AddressOf(config.ProtocolDefault.String())}, 357 }, 358 } 359 changefeedStr, err := changefeedInfo.Marshal() 360 require.Nil(t, err) 361 cdcKey := etcd.CDCKey{ 362 ClusterID: state.ClusterID, 363 Tp: etcd.CDCKeyTypeChangefeedInfo, 364 ChangefeedID: changefeedID, 365 } 366 tester.MustUpdate(cdcKey.String(), []byte(changefeedStr)) 367 // For the first tick, we do a bootstrap, and it tries to fix the meta information. 368 _, err = controller4Test.Tick(ctx, state) 369 tester.MustApplyPatches() 370 require.Nil(t, err) 371 require.NotContains(t, controller4Test.changefeeds, changefeedID) 372 373 // Start tick normally. 374 _, err = controller4Test.Tick(ctx, state) 375 tester.MustApplyPatches() 376 require.Nil(t, err) 377 require.Contains(t, controller4Test.changefeeds, changefeedID) 378 // The meta information is fixed correctly. 379 require.Equal(t, controller4Test.changefeeds[changefeedID].Info.SinkURI, 380 "kafka://127.0.0.1:9092/ticdc-test2?protocol=open-protocol") 381 }