github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/engine/pkg/externalresource/manager/gc_runner_test.go (about)

     1  // Copyright 2022 PingCAP, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package manager
    15  
    16  import (
    17  	"context"
    18  	"fmt"
    19  	"math/rand"
    20  	"sync"
    21  	"testing"
    22  	"time"
    23  
    24  	"github.com/pingcap/tiflow/engine/model"
    25  	"github.com/pingcap/tiflow/engine/pkg/clock"
    26  	"github.com/pingcap/tiflow/engine/pkg/externalresource/internal/bucket"
    27  	resModel "github.com/pingcap/tiflow/engine/pkg/externalresource/model"
    28  	pkgOrm "github.com/pingcap/tiflow/engine/pkg/orm"
    29  	"github.com/pingcap/tiflow/pkg/errors"
    30  	"github.com/stretchr/testify/require"
    31  )
    32  
    33  type gcRunnerTestHelper struct {
    34  	Runner *DefaultGCRunner
    35  	Meta   pkgOrm.ResourceClient
    36  	Clock  *clock.Mock
    37  
    38  	wg     sync.WaitGroup
    39  	ctx    context.Context
    40  	cancel context.CancelFunc
    41  	errCh  chan error
    42  
    43  	gcRequestCh chan *resModel.ResourceMeta
    44  }
    45  
    46  func newGCRunnerTestHelper() *gcRunnerTestHelper {
    47  	meta, err := pkgOrm.NewMockClient()
    48  	if err != nil {
    49  		panic(err)
    50  	}
    51  	return newGCRunnerTestHelperWithMeta(meta)
    52  }
    53  
    54  func newGCRunnerTestHelperWithMeta(meta pkgOrm.ResourceClient) *gcRunnerTestHelper {
    55  	reqCh := make(chan *resModel.ResourceMeta, 16)
    56  	gcExecutorCh := make(chan []*resModel.ResourceMeta, 16)
    57  	runner := NewGCRunner(meta, nil, nil)
    58  	runner.gcHandlers[resModel.ResourceTypeLocalFile] = &mockResourceController{gcRequestCh: reqCh}
    59  	runner.gcHandlers[resModel.ResourceTypeS3] = &mockResourceController{
    60  		gcRequestCh:  reqCh,
    61  		gcExecutorCh: gcExecutorCh,
    62  	}
    63  	clk := clock.NewMock()
    64  	runner.clock = clk
    65  	ctx, cancel := context.WithCancel(context.Background())
    66  
    67  	return &gcRunnerTestHelper{
    68  		Runner: runner,
    69  		Meta:   meta,
    70  		Clock:  clk,
    71  
    72  		ctx:         ctx,
    73  		cancel:      cancel,
    74  		errCh:       make(chan error, 1),
    75  		gcRequestCh: reqCh,
    76  	}
    77  }
    78  
    79  func (h *gcRunnerTestHelper) Start() {
    80  	h.wg.Add(1)
    81  	go func() {
    82  		defer h.wg.Done()
    83  
    84  		h.errCh <- h.Runner.Run(h.ctx)
    85  	}()
    86  }
    87  
    88  func (h *gcRunnerTestHelper) Close() {
    89  	h.cancel()
    90  	h.wg.Wait()
    91  }
    92  
    93  func (h *gcRunnerTestHelper) WaitGC(t *testing.T) (meta *resModel.ResourceMeta) {
    94  	select {
    95  	case <-time.After(2 * time.Second):
    96  		t.Fatal("timeout waiting for GC")
    97  	case meta = <-h.gcRequestCh:
    98  	}
    99  	return
   100  }
   101  
   102  // mockMetaClientErrOnce is a temporary solution for testing
   103  // the retry logic of gcOnce().
   104  // TODO make a more generic version of this struct, and
   105  // do better error condition testing in other situations too.
   106  type mockMetaClientErrOnce struct {
   107  	pkgOrm.ResourceClient
   108  
   109  	methodsAllReadyErred map[string]struct{}
   110  }
   111  
   112  func newMockMetaClientErrOnce() *mockMetaClientErrOnce {
   113  	inner, err := pkgOrm.NewMockClient()
   114  	if err != nil {
   115  		panic(err)
   116  	}
   117  
   118  	return &mockMetaClientErrOnce{
   119  		ResourceClient:       inner,
   120  		methodsAllReadyErred: make(map[string]struct{}),
   121  	}
   122  }
   123  
   124  func (c *mockMetaClientErrOnce) DeleteResource(ctx context.Context, resourceKey pkgOrm.ResourceKey) (pkgOrm.Result, error) {
   125  	if _, erred := c.methodsAllReadyErred["DeleteResource"]; !erred {
   126  		c.methodsAllReadyErred["DeleteResource"] = struct{}{}
   127  		return nil, errors.New("injected error")
   128  	}
   129  
   130  	return c.ResourceClient.DeleteResource(ctx, resourceKey)
   131  }
   132  
   133  func (c *mockMetaClientErrOnce) GetOneResourceForGC(ctx context.Context) (*resModel.ResourceMeta, error) {
   134  	if _, erred := c.methodsAllReadyErred["GetOneResourceForGC"]; !erred {
   135  		c.methodsAllReadyErred["GetOneResourceForGC"] = struct{}{}
   136  		return nil, errors.New("injected error")
   137  	}
   138  
   139  	return c.ResourceClient.GetOneResourceForGC(ctx)
   140  }
   141  
   142  func (c *mockMetaClientErrOnce) DeleteResourcesByTypeAndExecutorIDs(ctx context.Context,
   143  	resType resModel.ResourceType, executorID ...model.ExecutorID,
   144  ) (pkgOrm.Result, error) {
   145  	if _, erred := c.methodsAllReadyErred["DeleteResourcesByTypeAndExecutorIDs"]; !erred {
   146  		c.methodsAllReadyErred["DeleteResourcesByTypeAndExecutorIDs"] = struct{}{}
   147  		return nil, errors.New("injected error")
   148  	}
   149  
   150  	return c.ResourceClient.DeleteResourcesByTypeAndExecutorIDs(ctx, resType, executorID...)
   151  }
   152  
   153  func (c *mockMetaClientErrOnce) QueryResourcesByExecutorIDs(ctx context.Context,
   154  	executorID ...model.ExecutorID,
   155  ) ([]*resModel.ResourceMeta, error) {
   156  	if _, erred := c.methodsAllReadyErred["QueryResourcesByExecutorIDs"]; !erred {
   157  		c.methodsAllReadyErred["QueryResourcesByExecutorIDs"] = struct{}{}
   158  		return nil, errors.New("injected error")
   159  	}
   160  
   161  	return c.ResourceClient.QueryResourcesByExecutorIDs(ctx, executorID...)
   162  }
   163  
   164  func TestGCRunnerNotify(t *testing.T) {
   165  	t.Parallel()
   166  	helper := newGCRunnerTestHelper()
   167  	helper.Start()
   168  
   169  	resources := []string{"/local/resource-1", "/s3/resource-1"}
   170  	for _, res := range resources {
   171  		err := helper.Meta.CreateResource(context.Background(), &resModel.ResourceMeta{
   172  			ID:        res,
   173  			Job:       "job-1",
   174  			Worker:    "worker-1",
   175  			Executor:  "executor-1",
   176  			GCPending: true,
   177  		})
   178  		require.NoError(t, err)
   179  
   180  		// Note that since we are not advancing the clock,
   181  		// GC can only be triggered by calling Notify.
   182  		helper.Runner.GCNotify()
   183  
   184  		gcRes := helper.WaitGC(t)
   185  		require.Equal(t, res, gcRes.ID)
   186  	}
   187  
   188  	helper.Close()
   189  }
   190  
   191  func TestGCRunnerUnsupportedResourceType(t *testing.T) {
   192  	t.Parallel()
   193  	helper := newGCRunnerTestHelper()
   194  
   195  	// Unsupported resources should be ignored by the GCRunner.
   196  	err := helper.Meta.CreateResource(context.Background(), &resModel.ResourceMeta{
   197  		ID:        "/unsupported/resource-1",
   198  		Job:       "job-1",
   199  		Worker:    "worker-1",
   200  		Executor:  "executor-1",
   201  		GCPending: true,
   202  	})
   203  	require.NoError(t, err)
   204  
   205  	helper.Start()
   206  	helper.Runner.GCNotify()
   207  
   208  	// Assert that unsupported resources should not cause panic
   209  	// and are NOT removed from meta.
   210  	startTime := time.Now()
   211  	for {
   212  		if time.Since(startTime) > 1*time.Second {
   213  			break
   214  		}
   215  
   216  		res, err := helper.Meta.GetResourceByID(
   217  			context.Background(),
   218  			pkgOrm.ResourceKey{
   219  				JobID: "job-1",
   220  				ID:    "/unsupported/resource-1",
   221  			})
   222  		require.NoError(t, err)
   223  		require.True(t, res.GCPending)
   224  	}
   225  
   226  	helper.Close()
   227  }
   228  
   229  func TestGCRunnerTicker(t *testing.T) {
   230  	t.Parallel()
   231  	helper := newGCRunnerTestHelper()
   232  	helper.Start()
   233  
   234  	resources := []string{"/local/resource-1", "/s3/resource-1"}
   235  	for _, res := range resources {
   236  		err := helper.Meta.CreateResource(context.Background(), &resModel.ResourceMeta{
   237  			ID:        res,
   238  			Job:       "job-1",
   239  			Worker:    "worker-1",
   240  			Executor:  "executor-1",
   241  			GCPending: true,
   242  		})
   243  		require.NoError(t, err)
   244  
   245  		helper.Clock.Add(10 * time.Second)
   246  
   247  		gcRes := helper.WaitGC(t)
   248  		require.Equal(t, res, gcRes.ID)
   249  	}
   250  
   251  	helper.Close()
   252  }
   253  
   254  func TestGCRunnerMultiple(t *testing.T) {
   255  	t.Parallel()
   256  	helper := newGCRunnerTestHelper()
   257  
   258  	resources := []string{"/local/resource", "/s3/resource"}
   259  	const numResources = 1000
   260  	for i := 0; i < numResources; i++ {
   261  		err := helper.Meta.CreateResource(context.Background(), &resModel.ResourceMeta{
   262  			ID:        fmt.Sprintf("%s-%d", resources[rand.Intn(2)], i),
   263  			Job:       "job-1",
   264  			Worker:    "worker-1",
   265  			Executor:  "executor-1",
   266  			GCPending: i%2 == 0, // marks half the resources as needing GC.
   267  		})
   268  		require.NoError(t, err)
   269  	}
   270  
   271  	helper.Start()
   272  
   273  	alreadyGCedSet := make(map[resModel.ResourceID]struct{})
   274  loop:
   275  	for {
   276  		select {
   277  		case meta := <-helper.gcRequestCh:
   278  			_, exists := alreadyGCedSet[meta.ID]
   279  			require.False(t, exists)
   280  			alreadyGCedSet[meta.ID] = struct{}{}
   281  
   282  			if len(alreadyGCedSet) == 500 {
   283  				break loop
   284  			}
   285  		default:
   286  		}
   287  
   288  		helper.Runner.GCNotify()
   289  	}
   290  
   291  	helper.Close()
   292  }
   293  
   294  func TestGCRunnerRetry(t *testing.T) {
   295  	t.Parallel()
   296  	mockMeta := newMockMetaClientErrOnce()
   297  	helper := newGCRunnerTestHelperWithMeta(mockMeta)
   298  
   299  	err := helper.Meta.CreateResource(context.Background(), &resModel.ResourceMeta{
   300  		ID:        "/local/resource-1",
   301  		Job:       "job-1",
   302  		Worker:    "worker-1",
   303  		Executor:  "executor-1",
   304  		GCPending: true,
   305  	})
   306  	require.NoError(t, err)
   307  
   308  	helper.Start()
   309  
   310  	// Note that since we are not advancing the clock,
   311  	// GC can only be triggered by calling Notify.
   312  	helper.Runner.GCNotify()
   313  
   314  	gcRes := helper.WaitGC(t)
   315  	require.Equal(t, "/local/resource-1", gcRes.ID)
   316  
   317  	helper.Close()
   318  }
   319  
   320  func TestGCExecutors(t *testing.T) {
   321  	helper := newGCRunnerTestHelper()
   322  	testGCExecutors(t, helper)
   323  	helper.Close()
   324  }
   325  
   326  func TestGCExecutorsRetry(t *testing.T) {
   327  	helper := newGCRunnerTestHelperWithMeta(newMockMetaClientErrOnce())
   328  	testGCExecutors(t, helper)
   329  	helper.Close()
   330  }
   331  
   332  func testGCExecutors(t *testing.T, helper *gcRunnerTestHelper) {
   333  	gcExecutorsTimeout = 10 * time.Second
   334  	gcExecutorsRateLimit = 200
   335  	gcExecutorsMinIntervalMs = int64(10)
   336  	gcExecutorsMaxIntervalMs = int64(100)
   337  
   338  	checkAlive := func(ctx context.Context, executors ...model.ExecutorID) {
   339  		for _, executor := range executors {
   340  			res, err := helper.Meta.GetResourceByID(ctx, pkgOrm.ResourceKey{
   341  				JobID: bucket.GetDummyJobID(executor),
   342  				ID:    bucket.DummyResourceID,
   343  			})
   344  			require.NoError(t, err)
   345  			require.NotNil(t, res)
   346  		}
   347  	}
   348  	checkOffline := func(ctx context.Context, executors ...model.ExecutorID) {
   349  		metas, err := helper.Meta.QueryResourcesByExecutorIDs(ctx, executors...)
   350  		require.NoError(t, err)
   351  		for _, meta := range metas {
   352  			tp, resName, err := resModel.ParseResourceID(meta.ID)
   353  			require.NoError(t, err)
   354  			require.Equal(t, resModel.ResourceTypeS3, tp)
   355  			require.NotEqual(t, bucket.GetDummyResourceName(), resName)
   356  		}
   357  	}
   358  
   359  	resources := []string{"/local/resource", "/s3/resource"}
   360  	executors := []string{"executor-1", "executor-2", "executor-3", "executor-never-offline"}
   361  	// generate mock meta
   362  	for _, executor := range executors {
   363  		err := helper.Meta.CreateResource(context.Background(), &resModel.ResourceMeta{
   364  			ID:       bucket.DummyResourceID,
   365  			Job:      bucket.GetDummyJobID(model.ExecutorID(executor)),
   366  			Worker:   bucket.DummyWorkerID,
   367  			Executor: model.ExecutorID(executor),
   368  		})
   369  		require.NoError(t, err)
   370  	}
   371  	const numResources = 1000
   372  	for i := 0; i < numResources; i++ {
   373  		workerID := rand.Intn(4)
   374  		err := helper.Meta.CreateResource(context.Background(), &resModel.ResourceMeta{
   375  			ID:        fmt.Sprintf("%s-%d", resources[rand.Intn(2)], i),
   376  			Job:       "job-1",
   377  			Worker:    fmt.Sprintf("worker-%d", workerID),
   378  			Executor:  model.ExecutorID(executors[workerID]),
   379  			GCPending: i%2 == 0, // marks half the resources as needing GC.
   380  		})
   381  		require.NoError(t, err)
   382  	}
   383  
   384  	ctx, cancel := context.WithCancel(context.Background())
   385  	defer cancel()
   386  
   387  	helper.Runner.GCExecutors(ctx, "executor-1", "executor-2")
   388  	checkOffline(ctx, "executor-1", "executor-2")
   389  	checkAlive(ctx, "executor-3", "executor-never-offline")
   390  
   391  	helper.Runner.GCExecutors(ctx, "executor-3")
   392  	checkOffline(ctx, "executor-3")
   393  	checkAlive(ctx, "executor-never-offline")
   394  }