github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/cdc/redo/manager_test.go (about)

     1  // Copyright 2023 PingCAP, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package redo
    15  
    16  import (
    17  	"context"
    18  	"fmt"
    19  	"math"
    20  	"math/rand"
    21  	"sync"
    22  	"testing"
    23  	"time"
    24  
    25  	"github.com/pingcap/log"
    26  	"github.com/pingcap/tiflow/cdc/model"
    27  	"github.com/pingcap/tiflow/cdc/processor/tablepb"
    28  	"github.com/pingcap/tiflow/cdc/redo/writer"
    29  	"github.com/pingcap/tiflow/pkg/config"
    30  	"github.com/pingcap/tiflow/pkg/redo"
    31  	"github.com/pingcap/tiflow/pkg/spanz"
    32  	"github.com/stretchr/testify/require"
    33  	"go.uber.org/zap"
    34  	"golang.org/x/sync/errgroup"
    35  )
    36  
    37  // Use a smaller worker number for test to speed up the test.
    38  var workerNumberForTest = 2
    39  
    40  func checkResolvedTs(t *testing.T, mgr *logManager, expectedRts uint64) {
    41  	require.Eventually(t, func() bool {
    42  		resolvedTs := uint64(math.MaxUint64)
    43  		mgr.rtsMap.Range(func(span tablepb.Span, value any) bool {
    44  			v, ok := value.(*statefulRts)
    45  			require.True(t, ok)
    46  			ts := v.getFlushed()
    47  			if ts < resolvedTs {
    48  				resolvedTs = ts
    49  			}
    50  			return true
    51  		})
    52  		return resolvedTs == expectedRts
    53  		// This retry 80 times, with redo.MinFlushIntervalInMs(50ms) interval,
    54  		// it will take 4s at most.
    55  	}, time.Second*4, time.Millisecond*redo.MinFlushIntervalInMs)
    56  }
    57  
    58  func TestConsistentConfig(t *testing.T) {
    59  	t.Parallel()
    60  	levelCases := []struct {
    61  		level string
    62  		valid bool
    63  	}{
    64  		{"none", true},
    65  		{"eventual", true},
    66  		{"NONE", false},
    67  		{"", false},
    68  	}
    69  	for _, lc := range levelCases {
    70  		require.Equal(t, lc.valid, redo.IsValidConsistentLevel(lc.level))
    71  	}
    72  
    73  	levelEnableCases := []struct {
    74  		level      string
    75  		consistent bool
    76  	}{
    77  		{"invalid-level", false},
    78  		{"none", false},
    79  		{"eventual", true},
    80  	}
    81  	for _, lc := range levelEnableCases {
    82  		require.Equal(t, lc.consistent, redo.IsConsistentEnabled(lc.level))
    83  	}
    84  
    85  	storageCases := []struct {
    86  		storage string
    87  		valid   bool
    88  	}{
    89  		{"local", true},
    90  		{"nfs", true},
    91  		{"s3", true},
    92  		{"blackhole", true},
    93  		{"Local", false},
    94  		{"", false},
    95  	}
    96  	for _, sc := range storageCases {
    97  		require.Equal(t, sc.valid, redo.IsValidConsistentStorage(sc.storage))
    98  	}
    99  
   100  	s3StorageCases := []struct {
   101  		storage   string
   102  		s3Enabled bool
   103  	}{
   104  		{"local", false},
   105  		{"nfs", false},
   106  		{"s3", true},
   107  		{"blackhole", false},
   108  	}
   109  	for _, sc := range s3StorageCases {
   110  		require.Equal(t, sc.s3Enabled, redo.IsExternalStorage(sc.storage))
   111  	}
   112  }
   113  
   114  // TestLogManagerInProcessor tests how redo log manager is used in processor.
   115  func TestLogManagerInProcessor(t *testing.T) {
   116  	t.Parallel()
   117  	ctx, cancel := context.WithCancel(context.Background())
   118  	defer cancel()
   119  
   120  	testWriteDMLs := func(storage string, useFileBackend bool) {
   121  		ctx, cancel := context.WithCancel(ctx)
   122  		cfg := &config.ConsistentConfig{
   123  			Level:                 string(redo.ConsistentLevelEventual),
   124  			MaxLogSize:            redo.DefaultMaxLogSize,
   125  			Storage:               storage,
   126  			FlushIntervalInMs:     redo.MinFlushIntervalInMs,
   127  			MetaFlushIntervalInMs: redo.MinFlushIntervalInMs,
   128  			EncodingWorkerNum:     workerNumberForTest,
   129  			FlushWorkerNum:        workerNumberForTest,
   130  			UseFileBackend:        useFileBackend,
   131  		}
   132  		dmlMgr := NewDMLManager(model.DefaultChangeFeedID("test"), cfg)
   133  		var eg errgroup.Group
   134  		eg.Go(func() error {
   135  			return dmlMgr.Run(ctx)
   136  		})
   137  		// check emit row changed events can move forward resolved ts
   138  		spans := []tablepb.Span{
   139  			spanz.TableIDToComparableSpan(53),
   140  			spanz.TableIDToComparableSpan(55),
   141  			spanz.TableIDToComparableSpan(57),
   142  			spanz.TableIDToComparableSpan(59),
   143  		}
   144  
   145  		startTs := uint64(100)
   146  		for _, span := range spans {
   147  			dmlMgr.AddTable(span, startTs)
   148  		}
   149  		tableInfo := &model.TableInfo{
   150  			TableName: model.TableName{Schema: "test", Table: "t"},
   151  		}
   152  		testCases := []struct {
   153  			span tablepb.Span
   154  			rows []*model.RowChangedEvent
   155  		}{
   156  			{
   157  				span: spanz.TableIDToComparableSpan(53),
   158  				rows: []*model.RowChangedEvent{
   159  					{CommitTs: 120, PhysicalTableID: 53, TableInfo: tableInfo},
   160  					{CommitTs: 125, PhysicalTableID: 53, TableInfo: tableInfo},
   161  					{CommitTs: 130, PhysicalTableID: 53, TableInfo: tableInfo},
   162  				},
   163  			},
   164  			{
   165  				span: spanz.TableIDToComparableSpan(55),
   166  				rows: []*model.RowChangedEvent{
   167  					{CommitTs: 130, PhysicalTableID: 55, TableInfo: tableInfo},
   168  					{CommitTs: 135, PhysicalTableID: 55, TableInfo: tableInfo},
   169  				},
   170  			},
   171  			{
   172  				span: spanz.TableIDToComparableSpan(57),
   173  				rows: []*model.RowChangedEvent{
   174  					{CommitTs: 130, PhysicalTableID: 57, TableInfo: tableInfo},
   175  				},
   176  			},
   177  			{
   178  				span: spanz.TableIDToComparableSpan(59),
   179  				rows: []*model.RowChangedEvent{
   180  					{CommitTs: 128, PhysicalTableID: 59, TableInfo: tableInfo},
   181  					{CommitTs: 130, PhysicalTableID: 59, TableInfo: tableInfo},
   182  					{CommitTs: 133, PhysicalTableID: 59, TableInfo: tableInfo},
   183  				},
   184  			},
   185  		}
   186  		for _, tc := range testCases {
   187  			err := dmlMgr.EmitRowChangedEvents(ctx, tc.span, nil, tc.rows...)
   188  			require.NoError(t, err)
   189  		}
   190  
   191  		// check UpdateResolvedTs can move forward the resolved ts when there is not row event.
   192  		flushResolvedTs := uint64(150)
   193  		for _, span := range spans {
   194  			checkResolvedTs(t, dmlMgr.logManager, startTs)
   195  			err := dmlMgr.UpdateResolvedTs(ctx, span, flushResolvedTs)
   196  			require.NoError(t, err)
   197  		}
   198  		checkResolvedTs(t, dmlMgr.logManager, flushResolvedTs)
   199  
   200  		// check remove table can work normally
   201  		removeTable := spans[len(spans)-1]
   202  		spans = spans[:len(spans)-1]
   203  		dmlMgr.RemoveTable(removeTable)
   204  		flushResolvedTs = uint64(200)
   205  		for _, span := range spans {
   206  			err := dmlMgr.UpdateResolvedTs(ctx, span, flushResolvedTs)
   207  			require.NoError(t, err)
   208  		}
   209  		checkResolvedTs(t, dmlMgr.logManager, flushResolvedTs)
   210  
   211  		cancel()
   212  		require.ErrorIs(t, eg.Wait(), context.Canceled)
   213  	}
   214  
   215  	testWriteDMLs("blackhole://", true)
   216  	storages := []string{
   217  		fmt.Sprintf("file://%s", t.TempDir()),
   218  		fmt.Sprintf("nfs://%s", t.TempDir()),
   219  	}
   220  	for _, storage := range storages {
   221  		testWriteDMLs(storage, true)
   222  		testWriteDMLs(storage, false)
   223  	}
   224  }
   225  
   226  // TestLogManagerInOwner tests how redo log manager is used in owner,
   227  // where the redo log manager needs to handle DDL event only.
   228  func TestLogManagerInOwner(t *testing.T) {
   229  	t.Parallel()
   230  	ctx, cancel := context.WithCancel(context.Background())
   231  	defer cancel()
   232  
   233  	testWriteDDLs := func(storage string, useFileBackend bool) {
   234  		ctx, cancel := context.WithCancel(ctx)
   235  		cfg := &config.ConsistentConfig{
   236  			Level:                 string(redo.ConsistentLevelEventual),
   237  			MaxLogSize:            redo.DefaultMaxLogSize,
   238  			Storage:               storage,
   239  			FlushIntervalInMs:     redo.MinFlushIntervalInMs,
   240  			MetaFlushIntervalInMs: redo.MinFlushIntervalInMs,
   241  			EncodingWorkerNum:     workerNumberForTest,
   242  			FlushWorkerNum:        workerNumberForTest,
   243  			UseFileBackend:        useFileBackend,
   244  		}
   245  		startTs := model.Ts(10)
   246  		ddlMgr := NewDDLManager(model.DefaultChangeFeedID("test"), cfg, startTs)
   247  
   248  		var eg errgroup.Group
   249  		eg.Go(func() error {
   250  			return ddlMgr.Run(ctx)
   251  		})
   252  
   253  		require.Equal(t, startTs, ddlMgr.GetResolvedTs())
   254  		ddl := &model.DDLEvent{StartTs: 100, CommitTs: 120, Query: "CREATE TABLE `TEST.T1`"}
   255  		err := ddlMgr.EmitDDLEvent(ctx, ddl)
   256  		require.NoError(t, err)
   257  		require.Equal(t, startTs, ddlMgr.GetResolvedTs())
   258  
   259  		ddlMgr.UpdateResolvedTs(ctx, ddl.CommitTs)
   260  		checkResolvedTs(t, ddlMgr.logManager, ddl.CommitTs)
   261  
   262  		cancel()
   263  		require.ErrorIs(t, eg.Wait(), context.Canceled)
   264  	}
   265  
   266  	testWriteDDLs("blackhole://", true)
   267  	storages := []string{
   268  		fmt.Sprintf("file://%s", t.TempDir()),
   269  		fmt.Sprintf("nfs://%s", t.TempDir()),
   270  	}
   271  	for _, storage := range storages {
   272  		testWriteDDLs(storage, true)
   273  		testWriteDDLs(storage, false)
   274  	}
   275  }
   276  
   277  // TestManagerError tests whether internal error in bgUpdateLog could be managed correctly.
   278  func TestLogManagerError(t *testing.T) {
   279  	t.Parallel()
   280  	ctx, cancel := context.WithTimeout(context.Background(), time.Second*5)
   281  	defer cancel()
   282  
   283  	cfg := &config.ConsistentConfig{
   284  		Level:                 string(redo.ConsistentLevelEventual),
   285  		MaxLogSize:            redo.DefaultMaxLogSize,
   286  		Storage:               "blackhole-invalid://",
   287  		FlushIntervalInMs:     redo.MinFlushIntervalInMs,
   288  		MetaFlushIntervalInMs: redo.MinFlushIntervalInMs,
   289  		EncodingWorkerNum:     workerNumberForTest,
   290  		FlushWorkerNum:        workerNumberForTest,
   291  	}
   292  	logMgr := NewDMLManager(model.DefaultChangeFeedID("test"), cfg)
   293  	var eg errgroup.Group
   294  	eg.Go(func() error {
   295  		return logMgr.Run(ctx)
   296  	})
   297  
   298  	tableInfo := &model.TableInfo{
   299  		TableName: model.TableName{Schema: "test", Table: "t"},
   300  	}
   301  	testCases := []struct {
   302  		span tablepb.Span
   303  		rows []writer.RedoEvent
   304  	}{
   305  		{
   306  			span: spanz.TableIDToComparableSpan(53),
   307  			rows: []writer.RedoEvent{
   308  				&model.RowChangedEvent{CommitTs: 120, PhysicalTableID: 53, TableInfo: tableInfo},
   309  				&model.RowChangedEvent{CommitTs: 125, PhysicalTableID: 53, TableInfo: tableInfo},
   310  				&model.RowChangedEvent{CommitTs: 130, PhysicalTableID: 53, TableInfo: tableInfo},
   311  			},
   312  		},
   313  	}
   314  	for _, tc := range testCases {
   315  		err := logMgr.emitRedoEvents(ctx, tc.span, nil, tc.rows...)
   316  		require.NoError(t, err)
   317  	}
   318  
   319  	err := eg.Wait()
   320  	require.Regexp(t, ".*invalid black hole writer.*", err)
   321  	require.Regexp(t, ".*WriteLog.*", err)
   322  }
   323  
   324  func BenchmarkBlackhole(b *testing.B) {
   325  	runBenchTest(b, "blackhole://", false)
   326  }
   327  
   328  func BenchmarkMemoryWriter(b *testing.B) {
   329  	storage := fmt.Sprintf("file://%s", b.TempDir())
   330  	runBenchTest(b, storage, false)
   331  }
   332  
   333  func BenchmarkFileWriter(b *testing.B) {
   334  	storage := fmt.Sprintf("file://%s", b.TempDir())
   335  	runBenchTest(b, storage, true)
   336  }
   337  
   338  func runBenchTest(b *testing.B, storage string, useFileBackend bool) {
   339  	ctx, cancel := context.WithCancel(context.Background())
   340  	cfg := &config.ConsistentConfig{
   341  		Level:                 string(redo.ConsistentLevelEventual),
   342  		MaxLogSize:            redo.DefaultMaxLogSize,
   343  		Storage:               storage,
   344  		FlushIntervalInMs:     redo.MinFlushIntervalInMs,
   345  		MetaFlushIntervalInMs: redo.MinFlushIntervalInMs,
   346  		EncodingWorkerNum:     redo.DefaultEncodingWorkerNum,
   347  		FlushWorkerNum:        redo.DefaultFlushWorkerNum,
   348  		UseFileBackend:        useFileBackend,
   349  	}
   350  	dmlMgr := NewDMLManager(model.DefaultChangeFeedID("test"), cfg)
   351  	var eg errgroup.Group
   352  	eg.Go(func() error {
   353  		return dmlMgr.Run(ctx)
   354  	})
   355  
   356  	// Init tables
   357  	numOfTables := 200
   358  	tables := make([]model.TableID, 0, numOfTables)
   359  	maxTsMap := spanz.NewHashMap[*model.Ts]()
   360  	startTs := uint64(100)
   361  	for i := 0; i < numOfTables; i++ {
   362  		tableID := model.TableID(i)
   363  		tables = append(tables, tableID)
   364  		span := spanz.TableIDToComparableSpan(tableID)
   365  		ts := startTs
   366  		maxTsMap.ReplaceOrInsert(span, &ts)
   367  		dmlMgr.AddTable(span, startTs)
   368  	}
   369  
   370  	// write rows
   371  	maxRowCount := 100000
   372  	wg := sync.WaitGroup{}
   373  	b.ResetTimer()
   374  	for _, tableID := range tables {
   375  		wg.Add(1)
   376  		tableInfo := &model.TableInfo{
   377  			TableName: model.TableName{Schema: "test", Table: fmt.Sprintf("t_%d", tableID)},
   378  		}
   379  		go func(span tablepb.Span) {
   380  			defer wg.Done()
   381  			maxCommitTs := maxTsMap.GetV(span)
   382  			var rows []*model.RowChangedEvent
   383  			for i := 0; i < maxRowCount; i++ {
   384  				if i%100 == 0 {
   385  					// prepare new row change events
   386  					b.StopTimer()
   387  					*maxCommitTs += rand.Uint64() % 10
   388  					rows = []*model.RowChangedEvent{
   389  						{CommitTs: *maxCommitTs, PhysicalTableID: span.TableID, TableInfo: tableInfo},
   390  						{CommitTs: *maxCommitTs, PhysicalTableID: span.TableID, TableInfo: tableInfo},
   391  						{CommitTs: *maxCommitTs, PhysicalTableID: span.TableID, TableInfo: tableInfo},
   392  					}
   393  
   394  					b.StartTimer()
   395  				}
   396  				dmlMgr.EmitRowChangedEvents(ctx, span, nil, rows...)
   397  				if i%100 == 0 {
   398  					dmlMgr.UpdateResolvedTs(ctx, span, *maxCommitTs)
   399  				}
   400  			}
   401  		}(spanz.TableIDToComparableSpan(tableID))
   402  	}
   403  	wg.Wait()
   404  
   405  	// wait flushed
   406  	for {
   407  		ok := true
   408  		maxTsMap.Range(func(span tablepb.Span, targetp *uint64) bool {
   409  			flushed := dmlMgr.GetResolvedTs(span)
   410  			if flushed != *targetp {
   411  				ok = false
   412  				log.Info("", zap.Uint64("targetTs", *targetp),
   413  					zap.Uint64("flushed", flushed),
   414  					zap.Any("tableID", span.TableID))
   415  				return false
   416  			}
   417  			return true
   418  		})
   419  		if ok {
   420  			break
   421  		}
   422  		time.Sleep(time.Millisecond * 500)
   423  	}
   424  	cancel()
   425  
   426  	require.ErrorIs(b, eg.Wait(), context.Canceled)
   427  }