github.com/pingcap/br@v5.3.0-alpha.0.20220125034240-ec59c7b6ce30+incompatible/pkg/lightning/restore/checksum_test.go (about)

     1  package restore
     2  
     3  import (
     4  	"context"
     5  	"database/sql"
     6  	"fmt"
     7  	"sort"
     8  	"strings"
     9  	"sync"
    10  	"sync/atomic"
    11  	"time"
    12  
    13  	"github.com/pingcap/tidb/util/memory"
    14  
    15  	"github.com/pingcap/parser"
    16  	"github.com/pingcap/parser/ast"
    17  	"github.com/pingcap/tidb/ddl"
    18  	tmock "github.com/pingcap/tidb/util/mock"
    19  	"github.com/tikv/client-go/v2/oracle"
    20  
    21  	"github.com/pingcap/tidb/kv"
    22  	"github.com/pingcap/tipb/go-tipb"
    23  
    24  	pd "github.com/tikv/pd/client"
    25  
    26  	"github.com/DATA-DOG/go-sqlmock"
    27  	. "github.com/pingcap/check"
    28  	"github.com/pingcap/errors"
    29  
    30  	. "github.com/pingcap/br/pkg/lightning/checkpoints"
    31  )
    32  
    33  var _ = Suite(&checksumSuite{})
    34  
    35  type checksumSuite struct{}
    36  
    37  func MockDoChecksumCtx(db *sql.DB) context.Context {
    38  	ctx := context.Background()
    39  	manager := newTiDBChecksumExecutor(db)
    40  	return context.WithValue(ctx, &checksumManagerKey, manager)
    41  }
    42  
    43  func (s *checksumSuite) TestDoChecksum(c *C) {
    44  	db, mock, err := sqlmock.New()
    45  	c.Assert(err, IsNil)
    46  
    47  	mock.ExpectQuery("\\QSELECT VARIABLE_VALUE FROM mysql.tidb WHERE VARIABLE_NAME = 'tikv_gc_life_time'\\E").
    48  		WillReturnRows(sqlmock.NewRows([]string{"VARIABLE_VALUE"}).AddRow("10m"))
    49  	mock.ExpectExec("\\QUPDATE mysql.tidb SET VARIABLE_VALUE = ? WHERE VARIABLE_NAME = 'tikv_gc_life_time'\\E").
    50  		WithArgs("100h0m0s").
    51  		WillReturnResult(sqlmock.NewResult(1, 1))
    52  	mock.ExpectQuery("\\QADMIN CHECKSUM TABLE `test`.`t`\\E").
    53  		WillReturnRows(
    54  			sqlmock.NewRows([]string{"Db_name", "Table_name", "Checksum_crc64_xor", "Total_kvs", "Total_bytes"}).
    55  				AddRow("test", "t", 8520875019404689597, 7296873, 357601387),
    56  		)
    57  	mock.ExpectExec("\\QUPDATE mysql.tidb SET VARIABLE_VALUE = ? WHERE VARIABLE_NAME = 'tikv_gc_life_time'\\E").
    58  		WithArgs("10m").
    59  		WillReturnResult(sqlmock.NewResult(2, 1))
    60  	mock.ExpectClose()
    61  
    62  	ctx := MockDoChecksumCtx(db)
    63  	checksum, err := DoChecksum(ctx, &TidbTableInfo{DB: "test", Name: "t"})
    64  	c.Assert(err, IsNil)
    65  	c.Assert(*checksum, DeepEquals, RemoteChecksum{
    66  		Schema:     "test",
    67  		Table:      "t",
    68  		Checksum:   8520875019404689597,
    69  		TotalKVs:   7296873,
    70  		TotalBytes: 357601387,
    71  	})
    72  
    73  	c.Assert(db.Close(), IsNil)
    74  	c.Assert(mock.ExpectationsWereMet(), IsNil)
    75  }
    76  
    77  func (s *checksumSuite) TestDoChecksumParallel(c *C) {
    78  	db, mock, err := sqlmock.New()
    79  	c.Assert(err, IsNil)
    80  
    81  	mock.ExpectQuery("\\QSELECT VARIABLE_VALUE FROM mysql.tidb WHERE VARIABLE_NAME = 'tikv_gc_life_time'\\E").
    82  		WillReturnRows(sqlmock.NewRows([]string{"VARIABLE_VALUE"}).AddRow("10m"))
    83  	mock.ExpectExec("\\QUPDATE mysql.tidb SET VARIABLE_VALUE = ? WHERE VARIABLE_NAME = 'tikv_gc_life_time'\\E").
    84  		WithArgs("100h0m0s").
    85  		WillReturnResult(sqlmock.NewResult(1, 1))
    86  	for i := 0; i < 5; i++ {
    87  		mock.ExpectQuery("\\QADMIN CHECKSUM TABLE `test`.`t`\\E").
    88  			WillDelayFor(100 * time.Millisecond).
    89  			WillReturnRows(
    90  				sqlmock.NewRows([]string{"Db_name", "Table_name", "Checksum_crc64_xor", "Total_kvs", "Total_bytes"}).
    91  					AddRow("test", "t", 8520875019404689597, 7296873, 357601387),
    92  			)
    93  	}
    94  	mock.ExpectExec("\\QUPDATE mysql.tidb SET VARIABLE_VALUE = ? WHERE VARIABLE_NAME = 'tikv_gc_life_time'\\E").
    95  		WithArgs("10m").
    96  		WillReturnResult(sqlmock.NewResult(2, 1))
    97  	mock.ExpectClose()
    98  
    99  	ctx := MockDoChecksumCtx(db)
   100  
   101  	// db.Close() will close all connections from its idle pool, set it 1 to expect one close
   102  	db.SetMaxIdleConns(1)
   103  	var wg sync.WaitGroup
   104  	wg.Add(5)
   105  	for i := 0; i < 5; i++ {
   106  		go func() {
   107  			defer wg.Done()
   108  			checksum, err := DoChecksum(ctx, &TidbTableInfo{DB: "test", Name: "t"})
   109  			c.Assert(err, IsNil)
   110  			c.Assert(*checksum, DeepEquals, RemoteChecksum{
   111  				Schema:     "test",
   112  				Table:      "t",
   113  				Checksum:   8520875019404689597,
   114  				TotalKVs:   7296873,
   115  				TotalBytes: 357601387,
   116  			})
   117  		}()
   118  	}
   119  	wg.Wait()
   120  
   121  	c.Assert(db.Close(), IsNil)
   122  	c.Assert(mock.ExpectationsWereMet(), IsNil)
   123  }
   124  
   125  func (s *checksumSuite) TestIncreaseGCLifeTimeFail(c *C) {
   126  	db, mock, err := sqlmock.New()
   127  	c.Assert(err, IsNil)
   128  
   129  	for i := 0; i < 5; i++ {
   130  		mock.ExpectQuery("\\QSELECT VARIABLE_VALUE FROM mysql.tidb WHERE VARIABLE_NAME = 'tikv_gc_life_time'\\E").
   131  			WillReturnRows(sqlmock.NewRows([]string{"VARIABLE_VALUE"}).AddRow("10m"))
   132  		mock.ExpectExec("\\QUPDATE mysql.tidb SET VARIABLE_VALUE = ? WHERE VARIABLE_NAME = 'tikv_gc_life_time'\\E").
   133  			WithArgs("100h0m0s").
   134  			WillReturnError(errors.Annotate(context.Canceled, "update gc error"))
   135  	}
   136  	// This recover GC Life Time SQL should not be executed in DoChecksum
   137  	mock.ExpectExec("\\QUPDATE mysql.tidb SET VARIABLE_VALUE = ? WHERE VARIABLE_NAME = 'tikv_gc_life_time'\\E").
   138  		WithArgs("10m").
   139  		WillReturnResult(sqlmock.NewResult(1, 1))
   140  	mock.ExpectClose()
   141  
   142  	ctx := MockDoChecksumCtx(db)
   143  	var wg sync.WaitGroup
   144  	wg.Add(5)
   145  	for i := 0; i < 5; i++ {
   146  		go func() {
   147  			_, errChecksum := DoChecksum(ctx, &TidbTableInfo{DB: "test", Name: "t"})
   148  			c.Assert(errChecksum, ErrorMatches, "update GC lifetime failed: update gc error: context canceled")
   149  			wg.Done()
   150  		}()
   151  	}
   152  	wg.Wait()
   153  
   154  	_, err = db.Exec("\\QUPDATE mysql.tidb SET VARIABLE_VALUE = ? WHERE VARIABLE_NAME = 'tikv_gc_life_time'\\E", "10m")
   155  	c.Assert(err, IsNil)
   156  
   157  	c.Assert(db.Close(), IsNil)
   158  	c.Assert(mock.ExpectationsWereMet(), IsNil)
   159  }
   160  
   161  func (s *checksumSuite) TestDoChecksumWithTikv(c *C) {
   162  	// set up mock tikv checksum manager
   163  	pdClient := &testPDClient{}
   164  	resp := tipb.ChecksumResponse{Checksum: 123, TotalKvs: 10, TotalBytes: 1000}
   165  	kvClient := &mockChecksumKVClient{checksum: resp, respDur: time.Second * 5}
   166  
   167  	// mock a table info
   168  	p := parser.New()
   169  	se := tmock.NewContext()
   170  	node, err := p.ParseOneStmt("CREATE TABLE `t1` (`c1` varchar(5) NOT NULL)", "utf8mb4", "utf8mb4_bin")
   171  	c.Assert(err, IsNil)
   172  	tableInfo, err := ddl.MockTableInfo(se, node.(*ast.CreateTableStmt), 999)
   173  	c.Assert(err, IsNil)
   174  
   175  	for i := 0; i <= maxErrorRetryCount; i++ {
   176  		kvClient.maxErrCount = i
   177  		kvClient.curErrCount = 0
   178  		checksumExec := &tikvChecksumManager{manager: newGCTTLManager(pdClient), client: kvClient}
   179  		startTS := oracle.ComposeTS(time.Now().Unix()*1000, 0)
   180  		ctx := context.WithValue(context.Background(), &checksumManagerKey, checksumExec)
   181  		_, err = DoChecksum(ctx, &TidbTableInfo{DB: "test", Name: "t", Core: tableInfo})
   182  		// with max error retry < maxErrorRetryCount, the checksum can success
   183  		if i >= maxErrorRetryCount {
   184  			c.Assert(err, ErrorMatches, "tikv timeout")
   185  			continue
   186  		} else {
   187  			c.Assert(err, IsNil)
   188  		}
   189  
   190  		// after checksum, safepint should be small than start ts
   191  		ts := pdClient.currentSafePoint()
   192  		// 1ms for the schedule deviation
   193  		c.Assert(ts <= startTS+1, IsTrue)
   194  		c.Assert(atomic.LoadUint32(&checksumExec.manager.started) > 0, IsTrue)
   195  	}
   196  }
   197  
   198  func (s *checksumSuite) TestDoChecksumWithTikvErrRetry(c *C) {
   199  }
   200  
   201  func (s *checksumSuite) TestDoChecksumWithErrorAndLongOriginalLifetime(c *C) {
   202  	db, mock, err := sqlmock.New()
   203  	c.Assert(err, IsNil)
   204  
   205  	mock.ExpectQuery("\\QSELECT VARIABLE_VALUE FROM mysql.tidb WHERE VARIABLE_NAME = 'tikv_gc_life_time'\\E").
   206  		WillReturnRows(sqlmock.NewRows([]string{"VARIABLE_VALUE"}).AddRow("300h"))
   207  	mock.ExpectQuery("\\QADMIN CHECKSUM TABLE `test`.`t`\\E").
   208  		WillReturnError(errors.Annotate(context.Canceled, "mock syntax error"))
   209  	mock.ExpectExec("\\QUPDATE mysql.tidb SET VARIABLE_VALUE = ? WHERE VARIABLE_NAME = 'tikv_gc_life_time'\\E").
   210  		WithArgs("300h").
   211  		WillReturnResult(sqlmock.NewResult(1, 1))
   212  	mock.ExpectClose()
   213  
   214  	ctx := MockDoChecksumCtx(db)
   215  	_, err = DoChecksum(ctx, &TidbTableInfo{DB: "test", Name: "t"})
   216  	c.Assert(err, ErrorMatches, "compute remote checksum failed: mock syntax error.*")
   217  
   218  	c.Assert(db.Close(), IsNil)
   219  	c.Assert(mock.ExpectationsWereMet(), IsNil)
   220  }
   221  
   222  type safePointTTL struct {
   223  	safePoint uint64
   224  	// ttl is the last timestamp this safe point is valid
   225  	ttl int64
   226  }
   227  
   228  type testPDClient struct {
   229  	sync.Mutex
   230  	pd.Client
   231  	count       int32
   232  	gcSafePoint []safePointTTL
   233  }
   234  
   235  func (c *testPDClient) currentSafePoint() uint64 {
   236  	ts := time.Now().Unix()
   237  	c.Lock()
   238  	defer c.Unlock()
   239  	for _, s := range c.gcSafePoint {
   240  		if s.ttl > ts {
   241  			return s.safePoint
   242  		}
   243  	}
   244  	return 0
   245  }
   246  
   247  func (c *testPDClient) GetTS(ctx context.Context) (int64, int64, error) {
   248  	return time.Now().Unix(), 0, nil
   249  }
   250  
   251  func (c *testPDClient) UpdateServiceGCSafePoint(ctx context.Context, serviceID string, ttl int64, safePoint uint64) (uint64, error) {
   252  	if !strings.HasPrefix(serviceID, "lightning") {
   253  		panic("service ID must start with 'lightning'")
   254  	}
   255  	atomic.AddInt32(&c.count, 1)
   256  	c.Lock()
   257  	idx := sort.Search(len(c.gcSafePoint), func(i int) bool {
   258  		return c.gcSafePoint[i].safePoint >= safePoint
   259  	})
   260  	sp := c.gcSafePoint
   261  	ttlEnd := time.Now().Unix() + ttl
   262  	spTTL := safePointTTL{safePoint: safePoint, ttl: ttlEnd}
   263  	switch {
   264  	case idx >= len(sp):
   265  		c.gcSafePoint = append(c.gcSafePoint, spTTL)
   266  	case sp[idx].safePoint == safePoint:
   267  		if ttlEnd > sp[idx].ttl {
   268  			sp[idx].ttl = ttlEnd
   269  		}
   270  	default:
   271  		c.gcSafePoint = append(append(sp[:idx], spTTL), sp[idx:]...)
   272  	}
   273  	c.Unlock()
   274  	return c.currentSafePoint(), nil
   275  }
   276  
   277  func (s *checksumSuite) TestGcTTLManagerSingle(c *C) {
   278  	pdClient := &testPDClient{}
   279  	manager := newGCTTLManager(pdClient)
   280  	c.Assert(manager.serviceID, Not(Equals), "")
   281  	ctx, cancel := context.WithCancel(context.Background())
   282  	defer cancel()
   283  	oldTTL := serviceSafePointTTL
   284  	// set serviceSafePointTTL to 3 second, so lightning will update it in each 1 seconds.
   285  	serviceSafePointTTL = 3
   286  	defer func() {
   287  		serviceSafePointTTL = oldTTL
   288  	}()
   289  
   290  	err := manager.addOneJob(ctx, "test", uint64(time.Now().Unix()))
   291  	c.Assert(err, IsNil)
   292  
   293  	time.Sleep(6*time.Second + 10*time.Millisecond)
   294  
   295  	// after 6 seconds, must at least update 5 times
   296  	val := atomic.LoadInt32(&pdClient.count)
   297  	c.Assert(val, GreaterEqual, int32(5))
   298  
   299  	// after remove the job, there are no job remain, gc ttl needn't to be updated
   300  	manager.removeOneJob("test")
   301  	time.Sleep(10 * time.Millisecond)
   302  	val = atomic.LoadInt32(&pdClient.count)
   303  	time.Sleep(3*time.Second + 10*time.Millisecond)
   304  	c.Assert(atomic.LoadInt32(&pdClient.count), Equals, val)
   305  }
   306  
   307  func (s *checksumSuite) TestGcTTLManagerMulti(c *C) {
   308  	manager := newGCTTLManager(&testPDClient{})
   309  	ctx := context.Background()
   310  
   311  	for i := uint64(1); i <= 5; i++ {
   312  		err := manager.addOneJob(ctx, fmt.Sprintf("test%d", i), i)
   313  		c.Assert(err, IsNil)
   314  		c.Assert(manager.currentTS, Equals, uint64(1))
   315  	}
   316  
   317  	manager.removeOneJob("test2")
   318  	c.Assert(manager.currentTS, Equals, uint64(1))
   319  
   320  	manager.removeOneJob("test1")
   321  	c.Assert(manager.currentTS, Equals, uint64(3))
   322  
   323  	manager.removeOneJob("test3")
   324  	c.Assert(manager.currentTS, Equals, uint64(4))
   325  
   326  	manager.removeOneJob("test4")
   327  	c.Assert(manager.currentTS, Equals, uint64(5))
   328  
   329  	manager.removeOneJob("test5")
   330  	c.Assert(manager.currentTS, Equals, uint64(0))
   331  }
   332  
   333  func (s *checksumSuite) TestPdServiceID(c *C) {
   334  	pdCli := &testPDClient{}
   335  	gcTTLManager1 := newGCTTLManager(pdCli)
   336  	c.Assert(gcTTLManager1.serviceID, Matches, "lightning-.*")
   337  	gcTTLManager2 := newGCTTLManager(pdCli)
   338  	c.Assert(gcTTLManager2.serviceID, Matches, "lightning-.*")
   339  
   340  	c.Assert(gcTTLManager1.serviceID != gcTTLManager2.serviceID, IsTrue)
   341  }
   342  
   343  type mockResponse struct {
   344  	finished bool
   345  	data     []byte
   346  }
   347  
   348  func (r *mockResponse) Next(ctx context.Context) (resultSubset kv.ResultSubset, err error) {
   349  	if r.finished {
   350  		return nil, nil
   351  	}
   352  	r.finished = true
   353  	return &mockResultSubset{data: r.data}, nil
   354  }
   355  
   356  func (r *mockResponse) Close() error {
   357  	return nil
   358  }
   359  
   360  type mockErrorResponse struct {
   361  	err string
   362  }
   363  
   364  func (r *mockErrorResponse) Next(ctx context.Context) (resultSubset kv.ResultSubset, err error) {
   365  	return nil, errors.New(r.err)
   366  }
   367  
   368  func (r *mockErrorResponse) Close() error {
   369  	return nil
   370  }
   371  
   372  type mockResultSubset struct {
   373  	data []byte
   374  }
   375  
   376  func (r *mockResultSubset) GetData() []byte {
   377  	return r.data
   378  }
   379  
   380  func (r *mockResultSubset) GetStartKey() kv.Key {
   381  	return []byte{}
   382  }
   383  
   384  func (r *mockResultSubset) MemSize() int64 {
   385  	return 0
   386  }
   387  
   388  func (r *mockResultSubset) RespTime() time.Duration {
   389  	return time.Millisecond
   390  }
   391  
   392  type mockChecksumKVClient struct {
   393  	kv.Client
   394  	checksum tipb.ChecksumResponse
   395  	respDur  time.Duration
   396  	// return error count before return success
   397  	maxErrCount int
   398  	curErrCount int
   399  }
   400  
   401  // a mock client for checksum request
   402  func (c *mockChecksumKVClient) Send(ctx context.Context, req *kv.Request, vars interface{}, sessionMemTracker *memory.Tracker, enabledRateLimitAction bool) kv.Response {
   403  	if c.curErrCount < c.maxErrCount {
   404  		c.curErrCount++
   405  		return &mockErrorResponse{err: "tikv timeout"}
   406  	}
   407  	data, _ := c.checksum.Marshal()
   408  	time.Sleep(c.respDur)
   409  	return &mockResponse{data: data}
   410  }