github.com/pingcap/tidb-lightning@v5.0.0-rc.0.20210428090220-84b649866577+incompatible/lightning/restore/checksum_test.go (about)

     1  package restore
     2  
     3  import (
     4  	"context"
     5  	"database/sql"
     6  	"fmt"
     7  	"sort"
     8  	"strings"
     9  	"sync"
    10  	"sync/atomic"
    11  	"time"
    12  
    13  	"github.com/pingcap/tidb/util/memory"
    14  
    15  	"github.com/pingcap/parser"
    16  	"github.com/pingcap/parser/ast"
    17  	"github.com/pingcap/tidb/ddl"
    18  	"github.com/pingcap/tidb/store/tikv/oracle"
    19  	tmock "github.com/pingcap/tidb/util/mock"
    20  
    21  	"github.com/pingcap/tidb/kv"
    22  	"github.com/pingcap/tipb/go-tipb"
    23  
    24  	pd "github.com/tikv/pd/client"
    25  
    26  	"github.com/DATA-DOG/go-sqlmock"
    27  	. "github.com/pingcap/check"
    28  	"github.com/pingcap/errors"
    29  
    30  	. "github.com/pingcap/tidb-lightning/lightning/checkpoints"
    31  )
    32  
    33  var _ = Suite(&checksumSuite{})
    34  
    35  type checksumSuite struct{}
    36  
    37  func MockDoChecksumCtx(db *sql.DB) context.Context {
    38  	ctx := context.Background()
    39  	manager := newTiDBChecksumExecutor(db)
    40  	return context.WithValue(ctx, &checksumManagerKey, manager)
    41  }
    42  
    43  func (s *checksumSuite) TestDoChecksum(c *C) {
    44  	db, mock, err := sqlmock.New()
    45  	c.Assert(err, IsNil)
    46  
    47  	mock.ExpectQuery("\\QSELECT VARIABLE_VALUE FROM mysql.tidb WHERE VARIABLE_NAME = 'tikv_gc_life_time'\\E").
    48  		WillReturnRows(sqlmock.NewRows([]string{"VARIABLE_VALUE"}).AddRow("10m"))
    49  	mock.ExpectExec("\\QUPDATE mysql.tidb SET VARIABLE_VALUE = ? WHERE VARIABLE_NAME = 'tikv_gc_life_time'\\E").
    50  		WithArgs("100h0m0s").
    51  		WillReturnResult(sqlmock.NewResult(1, 1))
    52  	mock.ExpectQuery("\\QADMIN CHECKSUM TABLE `test`.`t`\\E").
    53  		WillReturnRows(
    54  			sqlmock.NewRows([]string{"Db_name", "Table_name", "Checksum_crc64_xor", "Total_kvs", "Total_bytes"}).
    55  				AddRow("test", "t", 8520875019404689597, 7296873, 357601387),
    56  		)
    57  	mock.ExpectExec("\\QUPDATE mysql.tidb SET VARIABLE_VALUE = ? WHERE VARIABLE_NAME = 'tikv_gc_life_time'\\E").
    58  		WithArgs("10m").
    59  		WillReturnResult(sqlmock.NewResult(2, 1))
    60  	mock.ExpectClose()
    61  
    62  	ctx := MockDoChecksumCtx(db)
    63  	checksum, err := DoChecksum(ctx, &TidbTableInfo{DB: "test", Name: "t"})
    64  	c.Assert(err, IsNil)
    65  	c.Assert(*checksum, DeepEquals, RemoteChecksum{
    66  		Schema:     "test",
    67  		Table:      "t",
    68  		Checksum:   8520875019404689597,
    69  		TotalKVs:   7296873,
    70  		TotalBytes: 357601387,
    71  	})
    72  
    73  	c.Assert(db.Close(), IsNil)
    74  	c.Assert(mock.ExpectationsWereMet(), IsNil)
    75  }
    76  
    77  func (s *checksumSuite) TestDoChecksumParallel(c *C) {
    78  	db, mock, err := sqlmock.New()
    79  	c.Assert(err, IsNil)
    80  
    81  	mock.ExpectQuery("\\QSELECT VARIABLE_VALUE FROM mysql.tidb WHERE VARIABLE_NAME = 'tikv_gc_life_time'\\E").
    82  		WillReturnRows(sqlmock.NewRows([]string{"VARIABLE_VALUE"}).AddRow("10m"))
    83  	mock.ExpectExec("\\QUPDATE mysql.tidb SET VARIABLE_VALUE = ? WHERE VARIABLE_NAME = 'tikv_gc_life_time'\\E").
    84  		WithArgs("100h0m0s").
    85  		WillReturnResult(sqlmock.NewResult(1, 1))
    86  	for i := 0; i < 5; i++ {
    87  		mock.ExpectQuery("\\QADMIN CHECKSUM TABLE `test`.`t`\\E").
    88  			WillDelayFor(100 * time.Millisecond).
    89  			WillReturnRows(
    90  				sqlmock.NewRows([]string{"Db_name", "Table_name", "Checksum_crc64_xor", "Total_kvs", "Total_bytes"}).
    91  					AddRow("test", "t", 8520875019404689597, 7296873, 357601387),
    92  			)
    93  	}
    94  	mock.ExpectExec("\\QUPDATE mysql.tidb SET VARIABLE_VALUE = ? WHERE VARIABLE_NAME = 'tikv_gc_life_time'\\E").
    95  		WithArgs("10m").
    96  		WillReturnResult(sqlmock.NewResult(2, 1))
    97  	mock.ExpectClose()
    98  
    99  	ctx := MockDoChecksumCtx(db)
   100  
   101  	// db.Close() will close all connections from its idle pool, set it 1 to expect one close
   102  	db.SetMaxIdleConns(1)
   103  	var wg sync.WaitGroup
   104  	wg.Add(5)
   105  	for i := 0; i < 5; i++ {
   106  		go func() {
   107  			defer wg.Done()
   108  			checksum, err := DoChecksum(ctx, &TidbTableInfo{DB: "test", Name: "t"})
   109  			c.Assert(err, IsNil)
   110  			c.Assert(*checksum, DeepEquals, RemoteChecksum{
   111  				Schema:     "test",
   112  				Table:      "t",
   113  				Checksum:   8520875019404689597,
   114  				TotalKVs:   7296873,
   115  				TotalBytes: 357601387,
   116  			})
   117  		}()
   118  	}
   119  	wg.Wait()
   120  
   121  	c.Assert(db.Close(), IsNil)
   122  	c.Assert(mock.ExpectationsWereMet(), IsNil)
   123  }
   124  
   125  func (s *checksumSuite) TestIncreaseGCLifeTimeFail(c *C) {
   126  	db, mock, err := sqlmock.New()
   127  	c.Assert(err, IsNil)
   128  
   129  	for i := 0; i < 5; i++ {
   130  		mock.ExpectQuery("\\QSELECT VARIABLE_VALUE FROM mysql.tidb WHERE VARIABLE_NAME = 'tikv_gc_life_time'\\E").
   131  			WillReturnRows(sqlmock.NewRows([]string{"VARIABLE_VALUE"}).AddRow("10m"))
   132  		mock.ExpectExec("\\QUPDATE mysql.tidb SET VARIABLE_VALUE = ? WHERE VARIABLE_NAME = 'tikv_gc_life_time'\\E").
   133  			WithArgs("100h0m0s").
   134  			WillReturnError(errors.Annotate(context.Canceled, "update gc error"))
   135  	}
   136  	// This recover GC Life Time SQL should not be executed in DoChecksum
   137  	mock.ExpectExec("\\QUPDATE mysql.tidb SET VARIABLE_VALUE = ? WHERE VARIABLE_NAME = 'tikv_gc_life_time'\\E").
   138  		WithArgs("10m").
   139  		WillReturnResult(sqlmock.NewResult(1, 1))
   140  	mock.ExpectClose()
   141  
   142  	ctx := MockDoChecksumCtx(db)
   143  	var wg sync.WaitGroup
   144  	wg.Add(5)
   145  	for i := 0; i < 5; i++ {
   146  		go func() {
   147  			_, err = DoChecksum(ctx, &TidbTableInfo{DB: "test", Name: "t"})
   148  			c.Assert(err, ErrorMatches, "update GC lifetime failed: update gc error: context canceled")
   149  			wg.Done()
   150  		}()
   151  	}
   152  	wg.Wait()
   153  
   154  	_, err = db.Exec("\\QUPDATE mysql.tidb SET VARIABLE_VALUE = ? WHERE VARIABLE_NAME = 'tikv_gc_life_time'\\E", "10m")
   155  	c.Assert(err, IsNil)
   156  
   157  	c.Assert(db.Close(), IsNil)
   158  	c.Assert(mock.ExpectationsWereMet(), IsNil)
   159  }
   160  
   161  func (s *checksumSuite) TestDoChecksumWithTikv(c *C) {
   162  	// set up mock tikv checksum manager
   163  	pdClient := &testPDClient{}
   164  	resp := tipb.ChecksumResponse{Checksum: 123, TotalKvs: 10, TotalBytes: 1000}
   165  	kvClient := &mockChecksumKVClient{checksum: resp, respDur: time.Second * 5}
   166  
   167  	// mock a table info
   168  	p := parser.New()
   169  	se := tmock.NewContext()
   170  	node, err := p.ParseOneStmt("CREATE TABLE `t1` (`c1` varchar(5) NOT NULL)", "utf8mb4", "utf8mb4_bin")
   171  	c.Assert(err, IsNil)
   172  	tableInfo, err := ddl.MockTableInfo(se, node.(*ast.CreateTableStmt), 999)
   173  	c.Assert(err, IsNil)
   174  
   175  	for i := 0; i <= maxErrorRetryCount; i++ {
   176  		kvClient.maxErrCount = i
   177  		kvClient.curErrCount = 0
   178  		checksumExec := &tikvChecksumManager{manager: newGCTTLManager(pdClient), client: kvClient}
   179  		startTs := oracle.ComposeTS(time.Now().Unix()*1000, 0)
   180  		ctx := context.WithValue(context.Background(), &checksumManagerKey, checksumExec)
   181  		_, err = DoChecksum(ctx, &TidbTableInfo{DB: "test", Name: "t", Core: tableInfo})
   182  		// with max error retry < maxErrorRetryCount, the checksum can success
   183  		if i >= maxErrorRetryCount {
   184  			c.Assert(err, ErrorMatches, "tikv timeout")
   185  			continue
   186  		} else {
   187  			c.Assert(err, IsNil)
   188  		}
   189  
   190  		// after checksum, safepint should be small than start ts
   191  		ts := pdClient.currentSafePoint()
   192  		// 1ms for the schedule deviation
   193  		c.Assert(ts <= startTs+1, IsTrue)
   194  		c.Assert(atomic.LoadUint32(&checksumExec.manager.started) > 0, IsTrue)
   195  	}
   196  
   197  }
   198  
   199  func (s *checksumSuite) TestDoChecksumWithTikvErrRetry(c *C) {
   200  
   201  }
   202  
   203  func (s *checksumSuite) TestDoChecksumWithErrorAndLongOriginalLifetime(c *C) {
   204  	db, mock, err := sqlmock.New()
   205  	c.Assert(err, IsNil)
   206  
   207  	mock.ExpectQuery("\\QSELECT VARIABLE_VALUE FROM mysql.tidb WHERE VARIABLE_NAME = 'tikv_gc_life_time'\\E").
   208  		WillReturnRows(sqlmock.NewRows([]string{"VARIABLE_VALUE"}).AddRow("300h"))
   209  	mock.ExpectQuery("\\QADMIN CHECKSUM TABLE `test`.`t`\\E").
   210  		WillReturnError(errors.Annotate(context.Canceled, "mock syntax error"))
   211  	mock.ExpectExec("\\QUPDATE mysql.tidb SET VARIABLE_VALUE = ? WHERE VARIABLE_NAME = 'tikv_gc_life_time'\\E").
   212  		WithArgs("300h").
   213  		WillReturnResult(sqlmock.NewResult(1, 1))
   214  	mock.ExpectClose()
   215  
   216  	ctx := MockDoChecksumCtx(db)
   217  	_, err = DoChecksum(ctx, &TidbTableInfo{DB: "test", Name: "t"})
   218  	c.Assert(err, ErrorMatches, "compute remote checksum failed: mock syntax error.*")
   219  
   220  	c.Assert(db.Close(), IsNil)
   221  	c.Assert(mock.ExpectationsWereMet(), IsNil)
   222  }
   223  
   224  type safePointTTL struct {
   225  	safePoint uint64
   226  	// ttl is the last timestamp this safe point is valid
   227  	ttl int64
   228  }
   229  
   230  type testPDClient struct {
   231  	sync.Mutex
   232  	pd.Client
   233  	count       int32
   234  	gcSafePoint []safePointTTL
   235  }
   236  
   237  func (c *testPDClient) currentSafePoint() uint64 {
   238  	ts := time.Now().Unix()
   239  	c.Lock()
   240  	defer c.Unlock()
   241  	for _, s := range c.gcSafePoint {
   242  		if s.ttl > ts {
   243  			return s.safePoint
   244  		}
   245  	}
   246  	return 0
   247  }
   248  
   249  func (c *testPDClient) UpdateServiceGCSafePoint(ctx context.Context, serviceID string, ttl int64, safePoint uint64) (uint64, error) {
   250  	if !strings.HasPrefix(serviceID, "lightning") {
   251  		panic("service ID must start with 'lightning'")
   252  	}
   253  	atomic.AddInt32(&c.count, 1)
   254  	c.Lock()
   255  	idx := sort.Search(len(c.gcSafePoint), func(i int) bool {
   256  		return c.gcSafePoint[i].safePoint >= safePoint
   257  	})
   258  	sp := c.gcSafePoint
   259  	ttlEnd := time.Now().Unix() + ttl
   260  	spTTL := safePointTTL{safePoint: safePoint, ttl: ttlEnd}
   261  	switch {
   262  	case idx >= len(sp):
   263  		c.gcSafePoint = append(c.gcSafePoint, spTTL)
   264  	case sp[idx].safePoint == safePoint:
   265  		if ttlEnd > sp[idx].ttl {
   266  			sp[idx].ttl = ttlEnd
   267  		}
   268  	default:
   269  		c.gcSafePoint = append(append(sp[:idx], spTTL), sp[idx:]...)
   270  	}
   271  	c.Unlock()
   272  	return c.currentSafePoint(), nil
   273  }
   274  
   275  func (s *checksumSuite) TestGcTTLManagerSingle(c *C) {
   276  	pdClient := &testPDClient{}
   277  	manager := newGCTTLManager(pdClient)
   278  	c.Assert(manager.serviceID, Not(Equals), "")
   279  	ctx, cancel := context.WithCancel(context.Background())
   280  	defer cancel()
   281  	oldTTL := serviceSafePointTTL
   282  	// set serviceSafePointTTL to 3 second, so lightning will update it in each 1 seconds.
   283  	serviceSafePointTTL = 3
   284  	defer func() {
   285  		serviceSafePointTTL = oldTTL
   286  	}()
   287  
   288  	err := manager.addOneJob(ctx, "test", uint64(time.Now().Unix()))
   289  	c.Assert(err, IsNil)
   290  
   291  	time.Sleep(6*time.Second + 10*time.Millisecond)
   292  
   293  	// after 6 seconds, must at least update 5 times
   294  	val := atomic.LoadInt32(&pdClient.count)
   295  	c.Assert(val, GreaterEqual, int32(5))
   296  
   297  	// after remove the job, there are no job remain, gc ttl needn't to be updated
   298  	manager.removeOneJob("test")
   299  	time.Sleep(10 * time.Millisecond)
   300  	val = atomic.LoadInt32(&pdClient.count)
   301  	time.Sleep(3*time.Second + 10*time.Millisecond)
   302  	c.Assert(atomic.LoadInt32(&pdClient.count), Equals, val)
   303  }
   304  
   305  func (s *checksumSuite) TestGcTTLManagerMulti(c *C) {
   306  	manager := newGCTTLManager(&testPDClient{})
   307  	ctx := context.Background()
   308  
   309  	for i := uint64(1); i <= 5; i++ {
   310  		err := manager.addOneJob(ctx, fmt.Sprintf("test%d", i), i)
   311  		c.Assert(err, IsNil)
   312  		c.Assert(manager.currentTs, Equals, uint64(1))
   313  	}
   314  
   315  	manager.removeOneJob("test2")
   316  	c.Assert(manager.currentTs, Equals, uint64(1))
   317  
   318  	manager.removeOneJob("test1")
   319  	c.Assert(manager.currentTs, Equals, uint64(3))
   320  
   321  	manager.removeOneJob("test3")
   322  	c.Assert(manager.currentTs, Equals, uint64(4))
   323  
   324  	manager.removeOneJob("test4")
   325  	c.Assert(manager.currentTs, Equals, uint64(5))
   326  
   327  	manager.removeOneJob("test5")
   328  	c.Assert(manager.currentTs, Equals, uint64(0))
   329  }
   330  
   331  func (s *checksumSuite) TestPdServiceID(c *C) {
   332  	pdCli := &testPDClient{}
   333  	gcTTLManager1 := newGCTTLManager(pdCli)
   334  	c.Assert(gcTTLManager1.serviceID, Matches, "lightning-.*")
   335  	gcTTLManager2 := newGCTTLManager(pdCli)
   336  	c.Assert(gcTTLManager2.serviceID, Matches, "lightning-.*")
   337  
   338  	c.Assert(gcTTLManager1.serviceID != gcTTLManager2.serviceID, IsTrue)
   339  }
   340  
   341  type mockResponse struct {
   342  	finished bool
   343  	data     []byte
   344  }
   345  
   346  func (r *mockResponse) Next(ctx context.Context) (resultSubset kv.ResultSubset, err error) {
   347  	if r.finished {
   348  		return nil, nil
   349  	}
   350  	r.finished = true
   351  	return &mockResultSubset{data: r.data}, nil
   352  }
   353  func (r *mockResponse) Close() error {
   354  	return nil
   355  }
   356  
   357  type mockErrorResponse struct {
   358  	err string
   359  }
   360  
   361  func (r *mockErrorResponse) Next(ctx context.Context) (resultSubset kv.ResultSubset, err error) {
   362  	return nil, errors.New(r.err)
   363  }
   364  func (r *mockErrorResponse) Close() error {
   365  	return nil
   366  }
   367  
   368  type mockResultSubset struct {
   369  	data []byte
   370  }
   371  
   372  func (r *mockResultSubset) GetData() []byte {
   373  	return r.data
   374  }
   375  
   376  func (r *mockResultSubset) GetStartKey() kv.Key {
   377  	return []byte{}
   378  }
   379  func (r *mockResultSubset) MemSize() int64 {
   380  	return 0
   381  }
   382  func (r *mockResultSubset) RespTime() time.Duration {
   383  	return time.Millisecond
   384  }
   385  
   386  type mockChecksumKVClient struct {
   387  	kv.Client
   388  	checksum tipb.ChecksumResponse
   389  	respDur  time.Duration
   390  	// return error count before return success
   391  	maxErrCount int
   392  	curErrCount int
   393  }
   394  
   395  // a mock client for checksum request
   396  func (c *mockChecksumKVClient) Send(ctx context.Context, req *kv.Request, vars *kv.Variables, sessionMemTracker *memory.Tracker, enabledRateLimitAction bool) kv.Response {
   397  	if c.curErrCount < c.maxErrCount {
   398  		c.curErrCount++
   399  		return &mockErrorResponse{err: "tikv timeout"}
   400  	}
   401  	data, _ := c.checksum.Marshal()
   402  	time.Sleep(c.respDur)
   403  	return &mockResponse{data: data}
   404  }