github.com/pingcap/ticdc@v0.0.0-20220526033649-485a10ef2652/cdc/puller/sorter/sorter_test.go (about)

     1  // Copyright 2020 PingCAP, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package sorter
    15  
    16  import (
    17  	"context"
    18  	"math"
    19  	_ "net/http/pprof"
    20  	"os"
    21  	"path/filepath"
    22  	"sync/atomic"
    23  	"testing"
    24  	"time"
    25  
    26  	"github.com/pingcap/check"
    27  	"github.com/pingcap/failpoint"
    28  	"github.com/pingcap/log"
    29  	"github.com/pingcap/ticdc/cdc/model"
    30  	"github.com/pingcap/ticdc/cdc/puller"
    31  	"github.com/pingcap/ticdc/pkg/config"
    32  	"github.com/pingcap/ticdc/pkg/util/testleak"
    33  	"go.uber.org/zap"
    34  	"go.uber.org/zap/zapcore"
    35  	"golang.org/x/sync/errgroup"
    36  )
    37  
    38  const (
    39  	numProducers = 16
    40  )
    41  
    42  type sorterSuite struct{}
    43  
    44  var _ = check.SerialSuites(&sorterSuite{})
    45  
    46  func Test(t *testing.T) { check.TestingT(t) }
    47  
    48  func generateMockRawKV(ts uint64) *model.RawKVEntry {
    49  	return &model.RawKVEntry{
    50  		OpType:   model.OpTypePut,
    51  		Key:      []byte{},
    52  		Value:    []byte{},
    53  		OldValue: nil,
    54  		StartTs:  ts - 5,
    55  		CRTs:     ts,
    56  		RegionID: 0,
    57  	}
    58  }
    59  
    60  func (s *sorterSuite) TestSorterBasic(c *check.C) {
    61  	defer testleak.AfterTest(c)()
    62  	defer UnifiedSorterCleanUp()
    63  
    64  	conf := config.GetDefaultServerConfig()
    65  	conf.DataDir = c.MkDir()
    66  	sortDir := filepath.Join(conf.DataDir, config.DefaultSortDir)
    67  	conf.Sorter = &config.SorterConfig{
    68  		NumConcurrentWorker:    8,
    69  		ChunkSizeLimit:         1 * 1024 * 1024 * 1024,
    70  		MaxMemoryPressure:      60,
    71  		MaxMemoryConsumption:   16 * 1024 * 1024 * 1024,
    72  		NumWorkerPoolGoroutine: 4,
    73  		SortDir:                sortDir,
    74  	}
    75  	config.StoreGlobalServerConfig(conf)
    76  
    77  	err := os.MkdirAll(conf.Sorter.SortDir, 0o755)
    78  	c.Assert(err, check.IsNil)
    79  	sorter, err := NewUnifiedSorter(conf.Sorter.SortDir, "test-cf", "test", 0, "0.0.0.0:0")
    80  	c.Assert(err, check.IsNil)
    81  
    82  	ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute)
    83  	defer cancel()
    84  	err = testSorter(ctx, c, sorter, 10000)
    85  	c.Assert(err, check.ErrorMatches, ".*context cancel.*")
    86  }
    87  
    88  func (s *sorterSuite) TestSorterCancel(c *check.C) {
    89  	defer testleak.AfterTest(c)()
    90  	defer UnifiedSorterCleanUp()
    91  
    92  	conf := config.GetDefaultServerConfig()
    93  	conf.DataDir = c.MkDir()
    94  	sortDir := filepath.Join(conf.DataDir, config.DefaultSortDir)
    95  	conf.Sorter = &config.SorterConfig{
    96  		NumConcurrentWorker:    8,
    97  		ChunkSizeLimit:         1 * 1024 * 1024 * 1024,
    98  		MaxMemoryPressure:      60,
    99  		MaxMemoryConsumption:   0,
   100  		NumWorkerPoolGoroutine: 4,
   101  		SortDir:                sortDir,
   102  	}
   103  	config.StoreGlobalServerConfig(conf)
   104  
   105  	err := os.MkdirAll(conf.Sorter.SortDir, 0o755)
   106  	c.Assert(err, check.IsNil)
   107  	sorter, err := NewUnifiedSorter(conf.Sorter.SortDir, "test-cf", "test", 0, "0.0.0.0:0")
   108  	c.Assert(err, check.IsNil)
   109  
   110  	ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
   111  	defer cancel()
   112  
   113  	finishedCh := make(chan struct{})
   114  	go func() {
   115  		err := testSorter(ctx, c, sorter, 10000000)
   116  		c.Assert(err, check.ErrorMatches, ".*context deadline exceeded.*")
   117  		close(finishedCh)
   118  	}()
   119  
   120  	after := time.After(30 * time.Second)
   121  	select {
   122  	case <-after:
   123  		c.Fatal("TestSorterCancel timed out")
   124  	case <-finishedCh:
   125  	}
   126  
   127  	log.Info("Sorter successfully cancelled")
   128  }
   129  
   130  func testSorter(ctx context.Context, c *check.C, sorter puller.EventSorter, count int) error {
   131  	err := failpoint.Enable("github.com/pingcap/ticdc/cdc/puller/sorter/sorterDebug", "return(true)")
   132  	if err != nil {
   133  		log.Panic("Could not enable failpoint", zap.Error(err))
   134  	}
   135  
   136  	c.Assert(failpoint.Enable("github.com/pingcap/ticdc/pkg/util/InjectCheckDataDirSatisfied", ""), check.IsNil)
   137  	defer func() {
   138  		c.Assert(failpoint.Disable("github.com/pingcap/ticdc/pkg/util/InjectCheckDataDirSatisfied"), check.IsNil)
   139  	}()
   140  
   141  	ctx, cancel := context.WithCancel(ctx)
   142  	errg, ctx := errgroup.WithContext(ctx)
   143  	errg.Go(func() error {
   144  		return sorter.Run(ctx)
   145  	})
   146  	errg.Go(func() error {
   147  		return RunWorkerPool(ctx)
   148  	})
   149  
   150  	producerProgress := make([]uint64, numProducers)
   151  
   152  	// launch the producers
   153  	for i := 0; i < numProducers; i++ {
   154  		finalI := i
   155  		errg.Go(func() error {
   156  			for j := 1; j <= count; j++ {
   157  				select {
   158  				case <-ctx.Done():
   159  					return ctx.Err()
   160  				default:
   161  				}
   162  
   163  				sorter.AddEntry(ctx, model.NewPolymorphicEvent(generateMockRawKV(uint64(j)<<5)))
   164  				if j%10000 == 0 {
   165  					atomic.StoreUint64(&producerProgress[finalI], uint64(j)<<5)
   166  				}
   167  			}
   168  			sorter.AddEntry(ctx, model.NewPolymorphicEvent(generateMockRawKV(uint64(count+1)<<5)))
   169  			atomic.StoreUint64(&producerProgress[finalI], uint64(count+1)<<5)
   170  			return nil
   171  		})
   172  	}
   173  
   174  	// launch the resolver
   175  	errg.Go(func() error {
   176  		ticker := time.NewTicker(1 * time.Second)
   177  		defer ticker.Stop()
   178  		for {
   179  			select {
   180  			case <-ctx.Done():
   181  				return ctx.Err()
   182  			case <-ticker.C:
   183  				resolvedTs := uint64(math.MaxUint64)
   184  				for i := range producerProgress {
   185  					ts := atomic.LoadUint64(&producerProgress[i])
   186  					if resolvedTs > ts {
   187  						resolvedTs = ts
   188  					}
   189  				}
   190  				sorter.AddEntry(ctx, model.NewResolvedPolymorphicEvent(0, resolvedTs))
   191  				if resolvedTs == uint64(count)<<5 {
   192  					return nil
   193  				}
   194  			}
   195  		}
   196  	})
   197  
   198  	// launch the consumer
   199  	errg.Go(func() error {
   200  		counter := 0
   201  		lastTs := uint64(0)
   202  		ticker := time.NewTicker(1 * time.Second)
   203  		defer ticker.Stop()
   204  		for {
   205  			select {
   206  			case <-ctx.Done():
   207  				return ctx.Err()
   208  			case event := <-sorter.Output():
   209  				if event.RawKV.OpType != model.OpTypeResolved {
   210  					if event.CRTs < lastTs {
   211  						panic("regressed")
   212  					}
   213  					lastTs = event.CRTs
   214  					counter += 1
   215  					if counter%10000 == 0 {
   216  						log.Debug("Messages received", zap.Int("counter", counter))
   217  					}
   218  					if counter >= numProducers*count {
   219  						log.Debug("Unified Sorter test successful")
   220  						cancel()
   221  					}
   222  				}
   223  			case <-ticker.C:
   224  				log.Debug("Consumer is alive")
   225  			}
   226  		}
   227  	})
   228  
   229  	return errg.Wait()
   230  }
   231  
   232  func (s *sorterSuite) TestSortDirConfigLocal(c *check.C) {
   233  	defer testleak.AfterTest(c)()
   234  	defer UnifiedSorterCleanUp()
   235  
   236  	poolMu.Lock()
   237  	// Clean up the back-end pool if one has been created
   238  	pool = nil
   239  	poolMu.Unlock()
   240  
   241  	baseDir := c.MkDir()
   242  	dir := filepath.Join(baseDir, "sorter_local")
   243  	err := os.MkdirAll(dir, 0o755)
   244  	c.Assert(err, check.IsNil)
   245  	// We expect the local setting to override the changefeed setting
   246  	config.GetGlobalServerConfig().Sorter.SortDir = dir
   247  
   248  	_, err = NewUnifiedSorter(filepath.Join(baseDir, "sorter"), /* the changefeed setting */
   249  		"test-cf",
   250  		"test",
   251  		0,
   252  		"0.0.0.0:0")
   253  	c.Assert(err, check.IsNil)
   254  
   255  	poolMu.Lock()
   256  	defer poolMu.Unlock()
   257  
   258  	c.Assert(pool, check.NotNil)
   259  	c.Assert(pool.dir, check.Equals, dir)
   260  }
   261  
   262  func (s *sorterSuite) TestSortDirConfigChangeFeed(c *check.C) {
   263  	defer testleak.AfterTest(c)()
   264  	defer UnifiedSorterCleanUp()
   265  
   266  	poolMu.Lock()
   267  	// Clean up the back-end pool if one has been created
   268  	pool = nil
   269  	poolMu.Unlock()
   270  
   271  	dir := c.MkDir()
   272  	// We expect the changefeed setting to take effect
   273  	config.GetGlobalServerConfig().Sorter.SortDir = ""
   274  
   275  	_, err := NewUnifiedSorter(dir, /* the changefeed setting */
   276  		"test-cf",
   277  		"test",
   278  		0,
   279  		"0.0.0.0:0")
   280  	c.Assert(err, check.IsNil)
   281  
   282  	poolMu.Lock()
   283  	defer poolMu.Unlock()
   284  
   285  	c.Assert(pool, check.NotNil)
   286  	c.Assert(pool.dir, check.Equals, dir)
   287  }
   288  
   289  // TestSorterCancelRestart tests the situation where the Unified Sorter is repeatedly canceled and
   290  // restarted. There should not be any problem, especially file corruptions.
   291  func (s *sorterSuite) TestSorterCancelRestart(c *check.C) {
   292  	defer testleak.AfterTest(c)()
   293  	defer UnifiedSorterCleanUp()
   294  
   295  	conf := config.GetDefaultServerConfig()
   296  	conf.DataDir = c.MkDir()
   297  	sortDir := filepath.Join(conf.DataDir, config.DefaultSortDir)
   298  	conf.Sorter = &config.SorterConfig{
   299  		NumConcurrentWorker:    8,
   300  		ChunkSizeLimit:         1 * 1024 * 1024 * 1024,
   301  		MaxMemoryPressure:      0, // disable memory sort
   302  		MaxMemoryConsumption:   0,
   303  		NumWorkerPoolGoroutine: 4,
   304  		SortDir:                sortDir,
   305  	}
   306  	config.StoreGlobalServerConfig(conf)
   307  
   308  	err := os.MkdirAll(conf.Sorter.SortDir, 0o755)
   309  	c.Assert(err, check.IsNil)
   310  
   311  	// enable the failpoint to simulate delays
   312  	err = failpoint.Enable("github.com/pingcap/ticdc/cdc/puller/sorter/asyncFlushStartDelay", "sleep(100)")
   313  	c.Assert(err, check.IsNil)
   314  	defer func() {
   315  		_ = failpoint.Disable("github.com/pingcap/ticdc/cdc/puller/sorter/asyncFlushStartDelay")
   316  	}()
   317  
   318  	// enable the failpoint to simulate delays
   319  	err = failpoint.Enable("github.com/pingcap/ticdc/cdc/puller/sorter/asyncFlushInProcessDelay", "1%sleep(1)")
   320  	c.Assert(err, check.IsNil)
   321  	defer func() {
   322  		_ = failpoint.Disable("github.com/pingcap/ticdc/cdc/puller/sorter/asyncFlushInProcessDelay")
   323  	}()
   324  
   325  	for i := 0; i < 5; i++ {
   326  		sorter, err := NewUnifiedSorter(conf.Sorter.SortDir, "test-cf", "test", 0, "0.0.0.0:0")
   327  		c.Assert(err, check.IsNil)
   328  		ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
   329  		err = testSorter(ctx, c, sorter, 100000000)
   330  		c.Assert(err, check.ErrorMatches, ".*context deadline exceeded.*")
   331  		cancel()
   332  	}
   333  }
   334  
   335  func (s *sorterSuite) TestSorterIOError(c *check.C) {
   336  	defer testleak.AfterTest(c)()
   337  	defer UnifiedSorterCleanUp()
   338  
   339  	log.SetLevel(zapcore.DebugLevel)
   340  	defer log.SetLevel(zapcore.InfoLevel)
   341  
   342  	conf := config.GetDefaultServerConfig()
   343  	conf.DataDir = c.MkDir()
   344  	sortDir := filepath.Join(conf.DataDir, config.DefaultSortDir)
   345  	conf.Sorter = &config.SorterConfig{
   346  		NumConcurrentWorker:    8,
   347  		ChunkSizeLimit:         1 * 1024 * 1024 * 1024,
   348  		MaxMemoryPressure:      60,
   349  		MaxMemoryConsumption:   0,
   350  		NumWorkerPoolGoroutine: 4,
   351  		SortDir:                sortDir,
   352  	}
   353  	config.StoreGlobalServerConfig(conf)
   354  
   355  	err := os.MkdirAll(conf.Sorter.SortDir, 0o755)
   356  	c.Assert(err, check.IsNil)
   357  	sorter, err := NewUnifiedSorter(conf.Sorter.SortDir, "test-cf", "test", 0, "0.0.0.0:0")
   358  	c.Assert(err, check.IsNil)
   359  
   360  	ctx, cancel := context.WithTimeout(context.Background(), 60*time.Second)
   361  	defer cancel()
   362  
   363  	// enable the failpoint to simulate backEnd allocation error (usually would happen when creating a file)
   364  	err = failpoint.Enable("github.com/pingcap/ticdc/cdc/puller/sorter/InjectErrorBackEndAlloc", "return(true)")
   365  	c.Assert(err, check.IsNil)
   366  	defer func() {
   367  		_ = failpoint.Disable("github.com/pingcap/ticdc/cdc/puller/sorter/InjectErrorBackEndAlloc")
   368  	}()
   369  
   370  	finishedCh := make(chan struct{})
   371  	go func() {
   372  		err := testSorter(ctx, c, sorter, 10000)
   373  		c.Assert(err, check.ErrorMatches, ".*injected alloc error.*")
   374  		close(finishedCh)
   375  	}()
   376  
   377  	after := time.After(60 * time.Second)
   378  	select {
   379  	case <-after:
   380  		c.Fatal("TestSorterIOError timed out")
   381  	case <-finishedCh:
   382  	}
   383  
   384  	UnifiedSorterCleanUp()
   385  	_ = failpoint.Disable("github.com/pingcap/ticdc/cdc/puller/sorter/InjectErrorBackEndAlloc")
   386  	// enable the failpoint to simulate backEnd write error (usually would happen when writing to a file)
   387  	err = failpoint.Enable("github.com/pingcap/ticdc/cdc/puller/sorter/InjectErrorBackEndWrite", "return(true)")
   388  	c.Assert(err, check.IsNil)
   389  	defer func() {
   390  		_ = failpoint.Disable("github.com/pingcap/ticdc/cdc/puller/sorter/InjectErrorBackEndWrite")
   391  	}()
   392  
   393  	sorter, err = NewUnifiedSorter(conf.Sorter.SortDir, "test-cf", "test", 0, "0.0.0.0:0")
   394  	c.Assert(err, check.IsNil)
   395  
   396  	finishedCh = make(chan struct{})
   397  	go func() {
   398  		err := testSorter(ctx, c, sorter, 10000)
   399  		c.Assert(err, check.ErrorMatches, ".*injected write error.*")
   400  		close(finishedCh)
   401  	}()
   402  
   403  	after = time.After(60 * time.Second)
   404  	select {
   405  	case <-after:
   406  		c.Fatal("TestSorterIOError timed out")
   407  	case <-finishedCh:
   408  	}
   409  }
   410  
   411  func (s *sorterSuite) TestSorterErrorReportCorrect(c *check.C) {
   412  	defer testleak.AfterTest(c)()
   413  	defer UnifiedSorterCleanUp()
   414  
   415  	log.SetLevel(zapcore.DebugLevel)
   416  	defer log.SetLevel(zapcore.InfoLevel)
   417  
   418  	conf := config.GetDefaultServerConfig()
   419  	conf.DataDir = c.MkDir()
   420  	sortDir := filepath.Join(conf.DataDir, config.DefaultSortDir)
   421  	conf.Sorter = &config.SorterConfig{
   422  		NumConcurrentWorker:    8,
   423  		ChunkSizeLimit:         1 * 1024 * 1024 * 1024,
   424  		MaxMemoryPressure:      60,
   425  		MaxMemoryConsumption:   0,
   426  		NumWorkerPoolGoroutine: 4,
   427  		SortDir:                sortDir,
   428  	}
   429  	config.StoreGlobalServerConfig(conf)
   430  
   431  	err := os.MkdirAll(conf.Sorter.SortDir, 0o755)
   432  	c.Assert(err, check.IsNil)
   433  	sorter, err := NewUnifiedSorter(conf.Sorter.SortDir, "test-cf", "test", 0, "0.0.0.0:0")
   434  	c.Assert(err, check.IsNil)
   435  
   436  	ctx, cancel := context.WithTimeout(context.Background(), 60*time.Second)
   437  	defer cancel()
   438  
   439  	// enable the failpoint to simulate backEnd allocation error (usually would happen when creating a file)
   440  	err = failpoint.Enable("github.com/pingcap/ticdc/cdc/puller/sorter/InjectHeapSorterExitDelay", "sleep(2000)")
   441  	c.Assert(err, check.IsNil)
   442  	defer func() {
   443  		_ = failpoint.Disable("github.com/pingcap/ticdc/cdc/puller/sorter/InjectHeapSorterExitDelay")
   444  	}()
   445  
   446  	err = failpoint.Enable("github.com/pingcap/ticdc/cdc/puller/sorter/InjectErrorBackEndAlloc", "return(true)")
   447  	c.Assert(err, check.IsNil)
   448  	defer func() {
   449  		_ = failpoint.Disable("github.com/pingcap/ticdc/cdc/puller/sorter/InjectErrorBackEndAlloc")
   450  	}()
   451  
   452  	finishedCh := make(chan struct{})
   453  	go func() {
   454  		err := testSorter(ctx, c, sorter, 10000)
   455  		c.Assert(err, check.ErrorMatches, ".*injected alloc error.*")
   456  		close(finishedCh)
   457  	}()
   458  
   459  	after := time.After(60 * time.Second)
   460  	select {
   461  	case <-after:
   462  		c.Fatal("TestSorterIOError timed out")
   463  	case <-finishedCh:
   464  	}
   465  }
   466  
   467  func (s *sorterSuite) TestSortClosedAddEntry(c *check.C) {
   468  	defer testleak.AfterTest(c)()
   469  	defer UnifiedSorterCleanUp()
   470  
   471  	sorter, err := NewUnifiedSorter(c.MkDir(),
   472  		"test-cf",
   473  		"test",
   474  		0,
   475  		"0.0.0.0:0")
   476  	c.Assert(err, check.IsNil)
   477  
   478  	ctx, cancel := context.WithTimeout(context.Background(), time.Millisecond*100)
   479  	defer cancel()
   480  	err = sorter.Run(ctx)
   481  	c.Assert(err, check.ErrorMatches, ".*deadline.*")
   482  
   483  	ctx1, cancel1 := context.WithTimeout(context.Background(), time.Second*10)
   484  	defer cancel1()
   485  	for i := 0; i < 10000; i++ {
   486  		sorter.AddEntry(ctx1, model.NewPolymorphicEvent(generateMockRawKV(uint64(i))))
   487  	}
   488  
   489  	select {
   490  	case <-ctx1.Done():
   491  		c.Fatal("TestSortClosedAddEntry timed out")
   492  	default:
   493  	}
   494  	cancel1()
   495  }
   496  
   497  func (s *sorterSuite) TestUnifiedSorterFileLockConflict(c *check.C) {
   498  	defer testleak.AfterTest(c)()
   499  	defer UnifiedSorterCleanUp()
   500  
   501  	dir := c.MkDir()
   502  	captureAddr := "0.0.0.0:0"
   503  	_, err := newBackEndPool(dir, captureAddr)
   504  	c.Assert(err, check.IsNil)
   505  
   506  	// GlobalServerConfig overrides dir parameter in NewUnifiedSorter.
   507  	config.GetGlobalServerConfig().Sorter.SortDir = dir
   508  	_, err = NewUnifiedSorter(dir, "test-cf", "test", 0, captureAddr)
   509  	c.Assert(err, check.ErrorMatches, ".*file lock conflict.*")
   510  }