github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/engine/pkg/externalresource/manager/gc_runner_test.go (about) 1 // Copyright 2022 PingCAP, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package manager 15 16 import ( 17 "context" 18 "fmt" 19 "math/rand" 20 "sync" 21 "testing" 22 "time" 23 24 "github.com/pingcap/tiflow/engine/model" 25 "github.com/pingcap/tiflow/engine/pkg/clock" 26 "github.com/pingcap/tiflow/engine/pkg/externalresource/internal/bucket" 27 resModel "github.com/pingcap/tiflow/engine/pkg/externalresource/model" 28 pkgOrm "github.com/pingcap/tiflow/engine/pkg/orm" 29 "github.com/pingcap/tiflow/pkg/errors" 30 "github.com/stretchr/testify/require" 31 ) 32 33 type gcRunnerTestHelper struct { 34 Runner *DefaultGCRunner 35 Meta pkgOrm.ResourceClient 36 Clock *clock.Mock 37 38 wg sync.WaitGroup 39 ctx context.Context 40 cancel context.CancelFunc 41 errCh chan error 42 43 gcRequestCh chan *resModel.ResourceMeta 44 } 45 46 func newGCRunnerTestHelper() *gcRunnerTestHelper { 47 meta, err := pkgOrm.NewMockClient() 48 if err != nil { 49 panic(err) 50 } 51 return newGCRunnerTestHelperWithMeta(meta) 52 } 53 54 func newGCRunnerTestHelperWithMeta(meta pkgOrm.ResourceClient) *gcRunnerTestHelper { 55 reqCh := make(chan *resModel.ResourceMeta, 16) 56 gcExecutorCh := make(chan []*resModel.ResourceMeta, 16) 57 runner := NewGCRunner(meta, nil, nil) 58 runner.gcHandlers[resModel.ResourceTypeLocalFile] = &mockResourceController{gcRequestCh: reqCh} 59 runner.gcHandlers[resModel.ResourceTypeS3] = &mockResourceController{ 60 gcRequestCh: reqCh, 61 gcExecutorCh: gcExecutorCh, 62 } 63 clk := clock.NewMock() 64 runner.clock = clk 65 ctx, cancel := context.WithCancel(context.Background()) 66 67 return &gcRunnerTestHelper{ 68 Runner: runner, 69 Meta: meta, 70 Clock: clk, 71 72 ctx: ctx, 73 cancel: cancel, 74 errCh: make(chan error, 1), 75 gcRequestCh: reqCh, 76 } 77 } 78 79 func (h *gcRunnerTestHelper) Start() { 80 h.wg.Add(1) 81 go func() { 82 defer h.wg.Done() 83 84 h.errCh <- h.Runner.Run(h.ctx) 85 }() 86 } 87 88 func (h *gcRunnerTestHelper) Close() { 89 h.cancel() 90 h.wg.Wait() 91 } 92 93 func (h *gcRunnerTestHelper) WaitGC(t *testing.T) (meta *resModel.ResourceMeta) { 94 select { 95 case <-time.After(2 * time.Second): 96 t.Fatal("timeout waiting for GC") 97 case meta = <-h.gcRequestCh: 98 } 99 return 100 } 101 102 // mockMetaClientErrOnce is a temporary solution for testing 103 // the retry logic of gcOnce(). 104 // TODO make a more generic version of this struct, and 105 // do better error condition testing in other situations too. 106 type mockMetaClientErrOnce struct { 107 pkgOrm.ResourceClient 108 109 methodsAllReadyErred map[string]struct{} 110 } 111 112 func newMockMetaClientErrOnce() *mockMetaClientErrOnce { 113 inner, err := pkgOrm.NewMockClient() 114 if err != nil { 115 panic(err) 116 } 117 118 return &mockMetaClientErrOnce{ 119 ResourceClient: inner, 120 methodsAllReadyErred: make(map[string]struct{}), 121 } 122 } 123 124 func (c *mockMetaClientErrOnce) DeleteResource(ctx context.Context, resourceKey pkgOrm.ResourceKey) (pkgOrm.Result, error) { 125 if _, erred := c.methodsAllReadyErred["DeleteResource"]; !erred { 126 c.methodsAllReadyErred["DeleteResource"] = struct{}{} 127 return nil, errors.New("injected error") 128 } 129 130 return c.ResourceClient.DeleteResource(ctx, resourceKey) 131 } 132 133 func (c *mockMetaClientErrOnce) GetOneResourceForGC(ctx context.Context) (*resModel.ResourceMeta, error) { 134 if _, erred := c.methodsAllReadyErred["GetOneResourceForGC"]; !erred { 135 c.methodsAllReadyErred["GetOneResourceForGC"] = struct{}{} 136 return nil, errors.New("injected error") 137 } 138 139 return c.ResourceClient.GetOneResourceForGC(ctx) 140 } 141 142 func (c *mockMetaClientErrOnce) DeleteResourcesByTypeAndExecutorIDs(ctx context.Context, 143 resType resModel.ResourceType, executorID ...model.ExecutorID, 144 ) (pkgOrm.Result, error) { 145 if _, erred := c.methodsAllReadyErred["DeleteResourcesByTypeAndExecutorIDs"]; !erred { 146 c.methodsAllReadyErred["DeleteResourcesByTypeAndExecutorIDs"] = struct{}{} 147 return nil, errors.New("injected error") 148 } 149 150 return c.ResourceClient.DeleteResourcesByTypeAndExecutorIDs(ctx, resType, executorID...) 151 } 152 153 func (c *mockMetaClientErrOnce) QueryResourcesByExecutorIDs(ctx context.Context, 154 executorID ...model.ExecutorID, 155 ) ([]*resModel.ResourceMeta, error) { 156 if _, erred := c.methodsAllReadyErred["QueryResourcesByExecutorIDs"]; !erred { 157 c.methodsAllReadyErred["QueryResourcesByExecutorIDs"] = struct{}{} 158 return nil, errors.New("injected error") 159 } 160 161 return c.ResourceClient.QueryResourcesByExecutorIDs(ctx, executorID...) 162 } 163 164 func TestGCRunnerNotify(t *testing.T) { 165 t.Parallel() 166 helper := newGCRunnerTestHelper() 167 helper.Start() 168 169 resources := []string{"/local/resource-1", "/s3/resource-1"} 170 for _, res := range resources { 171 err := helper.Meta.CreateResource(context.Background(), &resModel.ResourceMeta{ 172 ID: res, 173 Job: "job-1", 174 Worker: "worker-1", 175 Executor: "executor-1", 176 GCPending: true, 177 }) 178 require.NoError(t, err) 179 180 // Note that since we are not advancing the clock, 181 // GC can only be triggered by calling Notify. 182 helper.Runner.GCNotify() 183 184 gcRes := helper.WaitGC(t) 185 require.Equal(t, res, gcRes.ID) 186 } 187 188 helper.Close() 189 } 190 191 func TestGCRunnerUnsupportedResourceType(t *testing.T) { 192 t.Parallel() 193 helper := newGCRunnerTestHelper() 194 195 // Unsupported resources should be ignored by the GCRunner. 196 err := helper.Meta.CreateResource(context.Background(), &resModel.ResourceMeta{ 197 ID: "/unsupported/resource-1", 198 Job: "job-1", 199 Worker: "worker-1", 200 Executor: "executor-1", 201 GCPending: true, 202 }) 203 require.NoError(t, err) 204 205 helper.Start() 206 helper.Runner.GCNotify() 207 208 // Assert that unsupported resources should not cause panic 209 // and are NOT removed from meta. 210 startTime := time.Now() 211 for { 212 if time.Since(startTime) > 1*time.Second { 213 break 214 } 215 216 res, err := helper.Meta.GetResourceByID( 217 context.Background(), 218 pkgOrm.ResourceKey{ 219 JobID: "job-1", 220 ID: "/unsupported/resource-1", 221 }) 222 require.NoError(t, err) 223 require.True(t, res.GCPending) 224 } 225 226 helper.Close() 227 } 228 229 func TestGCRunnerTicker(t *testing.T) { 230 t.Parallel() 231 helper := newGCRunnerTestHelper() 232 helper.Start() 233 234 resources := []string{"/local/resource-1", "/s3/resource-1"} 235 for _, res := range resources { 236 err := helper.Meta.CreateResource(context.Background(), &resModel.ResourceMeta{ 237 ID: res, 238 Job: "job-1", 239 Worker: "worker-1", 240 Executor: "executor-1", 241 GCPending: true, 242 }) 243 require.NoError(t, err) 244 245 helper.Clock.Add(10 * time.Second) 246 247 gcRes := helper.WaitGC(t) 248 require.Equal(t, res, gcRes.ID) 249 } 250 251 helper.Close() 252 } 253 254 func TestGCRunnerMultiple(t *testing.T) { 255 t.Parallel() 256 helper := newGCRunnerTestHelper() 257 258 resources := []string{"/local/resource", "/s3/resource"} 259 const numResources = 1000 260 for i := 0; i < numResources; i++ { 261 err := helper.Meta.CreateResource(context.Background(), &resModel.ResourceMeta{ 262 ID: fmt.Sprintf("%s-%d", resources[rand.Intn(2)], i), 263 Job: "job-1", 264 Worker: "worker-1", 265 Executor: "executor-1", 266 GCPending: i%2 == 0, // marks half the resources as needing GC. 267 }) 268 require.NoError(t, err) 269 } 270 271 helper.Start() 272 273 alreadyGCedSet := make(map[resModel.ResourceID]struct{}) 274 loop: 275 for { 276 select { 277 case meta := <-helper.gcRequestCh: 278 _, exists := alreadyGCedSet[meta.ID] 279 require.False(t, exists) 280 alreadyGCedSet[meta.ID] = struct{}{} 281 282 if len(alreadyGCedSet) == 500 { 283 break loop 284 } 285 default: 286 } 287 288 helper.Runner.GCNotify() 289 } 290 291 helper.Close() 292 } 293 294 func TestGCRunnerRetry(t *testing.T) { 295 t.Parallel() 296 mockMeta := newMockMetaClientErrOnce() 297 helper := newGCRunnerTestHelperWithMeta(mockMeta) 298 299 err := helper.Meta.CreateResource(context.Background(), &resModel.ResourceMeta{ 300 ID: "/local/resource-1", 301 Job: "job-1", 302 Worker: "worker-1", 303 Executor: "executor-1", 304 GCPending: true, 305 }) 306 require.NoError(t, err) 307 308 helper.Start() 309 310 // Note that since we are not advancing the clock, 311 // GC can only be triggered by calling Notify. 312 helper.Runner.GCNotify() 313 314 gcRes := helper.WaitGC(t) 315 require.Equal(t, "/local/resource-1", gcRes.ID) 316 317 helper.Close() 318 } 319 320 func TestGCExecutors(t *testing.T) { 321 helper := newGCRunnerTestHelper() 322 testGCExecutors(t, helper) 323 helper.Close() 324 } 325 326 func TestGCExecutorsRetry(t *testing.T) { 327 helper := newGCRunnerTestHelperWithMeta(newMockMetaClientErrOnce()) 328 testGCExecutors(t, helper) 329 helper.Close() 330 } 331 332 func testGCExecutors(t *testing.T, helper *gcRunnerTestHelper) { 333 gcExecutorsTimeout = 10 * time.Second 334 gcExecutorsRateLimit = 200 335 gcExecutorsMinIntervalMs = int64(10) 336 gcExecutorsMaxIntervalMs = int64(100) 337 338 checkAlive := func(ctx context.Context, executors ...model.ExecutorID) { 339 for _, executor := range executors { 340 res, err := helper.Meta.GetResourceByID(ctx, pkgOrm.ResourceKey{ 341 JobID: bucket.GetDummyJobID(executor), 342 ID: bucket.DummyResourceID, 343 }) 344 require.NoError(t, err) 345 require.NotNil(t, res) 346 } 347 } 348 checkOffline := func(ctx context.Context, executors ...model.ExecutorID) { 349 metas, err := helper.Meta.QueryResourcesByExecutorIDs(ctx, executors...) 350 require.NoError(t, err) 351 for _, meta := range metas { 352 tp, resName, err := resModel.ParseResourceID(meta.ID) 353 require.NoError(t, err) 354 require.Equal(t, resModel.ResourceTypeS3, tp) 355 require.NotEqual(t, bucket.GetDummyResourceName(), resName) 356 } 357 } 358 359 resources := []string{"/local/resource", "/s3/resource"} 360 executors := []string{"executor-1", "executor-2", "executor-3", "executor-never-offline"} 361 // generate mock meta 362 for _, executor := range executors { 363 err := helper.Meta.CreateResource(context.Background(), &resModel.ResourceMeta{ 364 ID: bucket.DummyResourceID, 365 Job: bucket.GetDummyJobID(model.ExecutorID(executor)), 366 Worker: bucket.DummyWorkerID, 367 Executor: model.ExecutorID(executor), 368 }) 369 require.NoError(t, err) 370 } 371 const numResources = 1000 372 for i := 0; i < numResources; i++ { 373 workerID := rand.Intn(4) 374 err := helper.Meta.CreateResource(context.Background(), &resModel.ResourceMeta{ 375 ID: fmt.Sprintf("%s-%d", resources[rand.Intn(2)], i), 376 Job: "job-1", 377 Worker: fmt.Sprintf("worker-%d", workerID), 378 Executor: model.ExecutorID(executors[workerID]), 379 GCPending: i%2 == 0, // marks half the resources as needing GC. 380 }) 381 require.NoError(t, err) 382 } 383 384 ctx, cancel := context.WithCancel(context.Background()) 385 defer cancel() 386 387 helper.Runner.GCExecutors(ctx, "executor-1", "executor-2") 388 checkOffline(ctx, "executor-1", "executor-2") 389 checkAlive(ctx, "executor-3", "executor-never-offline") 390 391 helper.Runner.GCExecutors(ctx, "executor-3") 392 checkOffline(ctx, "executor-3") 393 checkAlive(ctx, "executor-never-offline") 394 }