github.com/pingcap/tidb-lightning@v5.0.0-rc.0.20210428090220-84b649866577+incompatible/lightning/restore/checksum_test.go (about) 1 package restore 2 3 import ( 4 "context" 5 "database/sql" 6 "fmt" 7 "sort" 8 "strings" 9 "sync" 10 "sync/atomic" 11 "time" 12 13 "github.com/pingcap/tidb/util/memory" 14 15 "github.com/pingcap/parser" 16 "github.com/pingcap/parser/ast" 17 "github.com/pingcap/tidb/ddl" 18 "github.com/pingcap/tidb/store/tikv/oracle" 19 tmock "github.com/pingcap/tidb/util/mock" 20 21 "github.com/pingcap/tidb/kv" 22 "github.com/pingcap/tipb/go-tipb" 23 24 pd "github.com/tikv/pd/client" 25 26 "github.com/DATA-DOG/go-sqlmock" 27 . "github.com/pingcap/check" 28 "github.com/pingcap/errors" 29 30 . "github.com/pingcap/tidb-lightning/lightning/checkpoints" 31 ) 32 33 var _ = Suite(&checksumSuite{}) 34 35 type checksumSuite struct{} 36 37 func MockDoChecksumCtx(db *sql.DB) context.Context { 38 ctx := context.Background() 39 manager := newTiDBChecksumExecutor(db) 40 return context.WithValue(ctx, &checksumManagerKey, manager) 41 } 42 43 func (s *checksumSuite) TestDoChecksum(c *C) { 44 db, mock, err := sqlmock.New() 45 c.Assert(err, IsNil) 46 47 mock.ExpectQuery("\\QSELECT VARIABLE_VALUE FROM mysql.tidb WHERE VARIABLE_NAME = 'tikv_gc_life_time'\\E"). 48 WillReturnRows(sqlmock.NewRows([]string{"VARIABLE_VALUE"}).AddRow("10m")) 49 mock.ExpectExec("\\QUPDATE mysql.tidb SET VARIABLE_VALUE = ? WHERE VARIABLE_NAME = 'tikv_gc_life_time'\\E"). 50 WithArgs("100h0m0s"). 51 WillReturnResult(sqlmock.NewResult(1, 1)) 52 mock.ExpectQuery("\\QADMIN CHECKSUM TABLE `test`.`t`\\E"). 53 WillReturnRows( 54 sqlmock.NewRows([]string{"Db_name", "Table_name", "Checksum_crc64_xor", "Total_kvs", "Total_bytes"}). 55 AddRow("test", "t", 8520875019404689597, 7296873, 357601387), 56 ) 57 mock.ExpectExec("\\QUPDATE mysql.tidb SET VARIABLE_VALUE = ? WHERE VARIABLE_NAME = 'tikv_gc_life_time'\\E"). 58 WithArgs("10m"). 59 WillReturnResult(sqlmock.NewResult(2, 1)) 60 mock.ExpectClose() 61 62 ctx := MockDoChecksumCtx(db) 63 checksum, err := DoChecksum(ctx, &TidbTableInfo{DB: "test", Name: "t"}) 64 c.Assert(err, IsNil) 65 c.Assert(*checksum, DeepEquals, RemoteChecksum{ 66 Schema: "test", 67 Table: "t", 68 Checksum: 8520875019404689597, 69 TotalKVs: 7296873, 70 TotalBytes: 357601387, 71 }) 72 73 c.Assert(db.Close(), IsNil) 74 c.Assert(mock.ExpectationsWereMet(), IsNil) 75 } 76 77 func (s *checksumSuite) TestDoChecksumParallel(c *C) { 78 db, mock, err := sqlmock.New() 79 c.Assert(err, IsNil) 80 81 mock.ExpectQuery("\\QSELECT VARIABLE_VALUE FROM mysql.tidb WHERE VARIABLE_NAME = 'tikv_gc_life_time'\\E"). 82 WillReturnRows(sqlmock.NewRows([]string{"VARIABLE_VALUE"}).AddRow("10m")) 83 mock.ExpectExec("\\QUPDATE mysql.tidb SET VARIABLE_VALUE = ? WHERE VARIABLE_NAME = 'tikv_gc_life_time'\\E"). 84 WithArgs("100h0m0s"). 85 WillReturnResult(sqlmock.NewResult(1, 1)) 86 for i := 0; i < 5; i++ { 87 mock.ExpectQuery("\\QADMIN CHECKSUM TABLE `test`.`t`\\E"). 88 WillDelayFor(100 * time.Millisecond). 89 WillReturnRows( 90 sqlmock.NewRows([]string{"Db_name", "Table_name", "Checksum_crc64_xor", "Total_kvs", "Total_bytes"}). 91 AddRow("test", "t", 8520875019404689597, 7296873, 357601387), 92 ) 93 } 94 mock.ExpectExec("\\QUPDATE mysql.tidb SET VARIABLE_VALUE = ? WHERE VARIABLE_NAME = 'tikv_gc_life_time'\\E"). 95 WithArgs("10m"). 96 WillReturnResult(sqlmock.NewResult(2, 1)) 97 mock.ExpectClose() 98 99 ctx := MockDoChecksumCtx(db) 100 101 // db.Close() will close all connections from its idle pool, set it 1 to expect one close 102 db.SetMaxIdleConns(1) 103 var wg sync.WaitGroup 104 wg.Add(5) 105 for i := 0; i < 5; i++ { 106 go func() { 107 defer wg.Done() 108 checksum, err := DoChecksum(ctx, &TidbTableInfo{DB: "test", Name: "t"}) 109 c.Assert(err, IsNil) 110 c.Assert(*checksum, DeepEquals, RemoteChecksum{ 111 Schema: "test", 112 Table: "t", 113 Checksum: 8520875019404689597, 114 TotalKVs: 7296873, 115 TotalBytes: 357601387, 116 }) 117 }() 118 } 119 wg.Wait() 120 121 c.Assert(db.Close(), IsNil) 122 c.Assert(mock.ExpectationsWereMet(), IsNil) 123 } 124 125 func (s *checksumSuite) TestIncreaseGCLifeTimeFail(c *C) { 126 db, mock, err := sqlmock.New() 127 c.Assert(err, IsNil) 128 129 for i := 0; i < 5; i++ { 130 mock.ExpectQuery("\\QSELECT VARIABLE_VALUE FROM mysql.tidb WHERE VARIABLE_NAME = 'tikv_gc_life_time'\\E"). 131 WillReturnRows(sqlmock.NewRows([]string{"VARIABLE_VALUE"}).AddRow("10m")) 132 mock.ExpectExec("\\QUPDATE mysql.tidb SET VARIABLE_VALUE = ? WHERE VARIABLE_NAME = 'tikv_gc_life_time'\\E"). 133 WithArgs("100h0m0s"). 134 WillReturnError(errors.Annotate(context.Canceled, "update gc error")) 135 } 136 // This recover GC Life Time SQL should not be executed in DoChecksum 137 mock.ExpectExec("\\QUPDATE mysql.tidb SET VARIABLE_VALUE = ? WHERE VARIABLE_NAME = 'tikv_gc_life_time'\\E"). 138 WithArgs("10m"). 139 WillReturnResult(sqlmock.NewResult(1, 1)) 140 mock.ExpectClose() 141 142 ctx := MockDoChecksumCtx(db) 143 var wg sync.WaitGroup 144 wg.Add(5) 145 for i := 0; i < 5; i++ { 146 go func() { 147 _, err = DoChecksum(ctx, &TidbTableInfo{DB: "test", Name: "t"}) 148 c.Assert(err, ErrorMatches, "update GC lifetime failed: update gc error: context canceled") 149 wg.Done() 150 }() 151 } 152 wg.Wait() 153 154 _, err = db.Exec("\\QUPDATE mysql.tidb SET VARIABLE_VALUE = ? WHERE VARIABLE_NAME = 'tikv_gc_life_time'\\E", "10m") 155 c.Assert(err, IsNil) 156 157 c.Assert(db.Close(), IsNil) 158 c.Assert(mock.ExpectationsWereMet(), IsNil) 159 } 160 161 func (s *checksumSuite) TestDoChecksumWithTikv(c *C) { 162 // set up mock tikv checksum manager 163 pdClient := &testPDClient{} 164 resp := tipb.ChecksumResponse{Checksum: 123, TotalKvs: 10, TotalBytes: 1000} 165 kvClient := &mockChecksumKVClient{checksum: resp, respDur: time.Second * 5} 166 167 // mock a table info 168 p := parser.New() 169 se := tmock.NewContext() 170 node, err := p.ParseOneStmt("CREATE TABLE `t1` (`c1` varchar(5) NOT NULL)", "utf8mb4", "utf8mb4_bin") 171 c.Assert(err, IsNil) 172 tableInfo, err := ddl.MockTableInfo(se, node.(*ast.CreateTableStmt), 999) 173 c.Assert(err, IsNil) 174 175 for i := 0; i <= maxErrorRetryCount; i++ { 176 kvClient.maxErrCount = i 177 kvClient.curErrCount = 0 178 checksumExec := &tikvChecksumManager{manager: newGCTTLManager(pdClient), client: kvClient} 179 startTs := oracle.ComposeTS(time.Now().Unix()*1000, 0) 180 ctx := context.WithValue(context.Background(), &checksumManagerKey, checksumExec) 181 _, err = DoChecksum(ctx, &TidbTableInfo{DB: "test", Name: "t", Core: tableInfo}) 182 // with max error retry < maxErrorRetryCount, the checksum can success 183 if i >= maxErrorRetryCount { 184 c.Assert(err, ErrorMatches, "tikv timeout") 185 continue 186 } else { 187 c.Assert(err, IsNil) 188 } 189 190 // after checksum, safepint should be small than start ts 191 ts := pdClient.currentSafePoint() 192 // 1ms for the schedule deviation 193 c.Assert(ts <= startTs+1, IsTrue) 194 c.Assert(atomic.LoadUint32(&checksumExec.manager.started) > 0, IsTrue) 195 } 196 197 } 198 199 func (s *checksumSuite) TestDoChecksumWithTikvErrRetry(c *C) { 200 201 } 202 203 func (s *checksumSuite) TestDoChecksumWithErrorAndLongOriginalLifetime(c *C) { 204 db, mock, err := sqlmock.New() 205 c.Assert(err, IsNil) 206 207 mock.ExpectQuery("\\QSELECT VARIABLE_VALUE FROM mysql.tidb WHERE VARIABLE_NAME = 'tikv_gc_life_time'\\E"). 208 WillReturnRows(sqlmock.NewRows([]string{"VARIABLE_VALUE"}).AddRow("300h")) 209 mock.ExpectQuery("\\QADMIN CHECKSUM TABLE `test`.`t`\\E"). 210 WillReturnError(errors.Annotate(context.Canceled, "mock syntax error")) 211 mock.ExpectExec("\\QUPDATE mysql.tidb SET VARIABLE_VALUE = ? WHERE VARIABLE_NAME = 'tikv_gc_life_time'\\E"). 212 WithArgs("300h"). 213 WillReturnResult(sqlmock.NewResult(1, 1)) 214 mock.ExpectClose() 215 216 ctx := MockDoChecksumCtx(db) 217 _, err = DoChecksum(ctx, &TidbTableInfo{DB: "test", Name: "t"}) 218 c.Assert(err, ErrorMatches, "compute remote checksum failed: mock syntax error.*") 219 220 c.Assert(db.Close(), IsNil) 221 c.Assert(mock.ExpectationsWereMet(), IsNil) 222 } 223 224 type safePointTTL struct { 225 safePoint uint64 226 // ttl is the last timestamp this safe point is valid 227 ttl int64 228 } 229 230 type testPDClient struct { 231 sync.Mutex 232 pd.Client 233 count int32 234 gcSafePoint []safePointTTL 235 } 236 237 func (c *testPDClient) currentSafePoint() uint64 { 238 ts := time.Now().Unix() 239 c.Lock() 240 defer c.Unlock() 241 for _, s := range c.gcSafePoint { 242 if s.ttl > ts { 243 return s.safePoint 244 } 245 } 246 return 0 247 } 248 249 func (c *testPDClient) UpdateServiceGCSafePoint(ctx context.Context, serviceID string, ttl int64, safePoint uint64) (uint64, error) { 250 if !strings.HasPrefix(serviceID, "lightning") { 251 panic("service ID must start with 'lightning'") 252 } 253 atomic.AddInt32(&c.count, 1) 254 c.Lock() 255 idx := sort.Search(len(c.gcSafePoint), func(i int) bool { 256 return c.gcSafePoint[i].safePoint >= safePoint 257 }) 258 sp := c.gcSafePoint 259 ttlEnd := time.Now().Unix() + ttl 260 spTTL := safePointTTL{safePoint: safePoint, ttl: ttlEnd} 261 switch { 262 case idx >= len(sp): 263 c.gcSafePoint = append(c.gcSafePoint, spTTL) 264 case sp[idx].safePoint == safePoint: 265 if ttlEnd > sp[idx].ttl { 266 sp[idx].ttl = ttlEnd 267 } 268 default: 269 c.gcSafePoint = append(append(sp[:idx], spTTL), sp[idx:]...) 270 } 271 c.Unlock() 272 return c.currentSafePoint(), nil 273 } 274 275 func (s *checksumSuite) TestGcTTLManagerSingle(c *C) { 276 pdClient := &testPDClient{} 277 manager := newGCTTLManager(pdClient) 278 c.Assert(manager.serviceID, Not(Equals), "") 279 ctx, cancel := context.WithCancel(context.Background()) 280 defer cancel() 281 oldTTL := serviceSafePointTTL 282 // set serviceSafePointTTL to 3 second, so lightning will update it in each 1 seconds. 283 serviceSafePointTTL = 3 284 defer func() { 285 serviceSafePointTTL = oldTTL 286 }() 287 288 err := manager.addOneJob(ctx, "test", uint64(time.Now().Unix())) 289 c.Assert(err, IsNil) 290 291 time.Sleep(6*time.Second + 10*time.Millisecond) 292 293 // after 6 seconds, must at least update 5 times 294 val := atomic.LoadInt32(&pdClient.count) 295 c.Assert(val, GreaterEqual, int32(5)) 296 297 // after remove the job, there are no job remain, gc ttl needn't to be updated 298 manager.removeOneJob("test") 299 time.Sleep(10 * time.Millisecond) 300 val = atomic.LoadInt32(&pdClient.count) 301 time.Sleep(3*time.Second + 10*time.Millisecond) 302 c.Assert(atomic.LoadInt32(&pdClient.count), Equals, val) 303 } 304 305 func (s *checksumSuite) TestGcTTLManagerMulti(c *C) { 306 manager := newGCTTLManager(&testPDClient{}) 307 ctx := context.Background() 308 309 for i := uint64(1); i <= 5; i++ { 310 err := manager.addOneJob(ctx, fmt.Sprintf("test%d", i), i) 311 c.Assert(err, IsNil) 312 c.Assert(manager.currentTs, Equals, uint64(1)) 313 } 314 315 manager.removeOneJob("test2") 316 c.Assert(manager.currentTs, Equals, uint64(1)) 317 318 manager.removeOneJob("test1") 319 c.Assert(manager.currentTs, Equals, uint64(3)) 320 321 manager.removeOneJob("test3") 322 c.Assert(manager.currentTs, Equals, uint64(4)) 323 324 manager.removeOneJob("test4") 325 c.Assert(manager.currentTs, Equals, uint64(5)) 326 327 manager.removeOneJob("test5") 328 c.Assert(manager.currentTs, Equals, uint64(0)) 329 } 330 331 func (s *checksumSuite) TestPdServiceID(c *C) { 332 pdCli := &testPDClient{} 333 gcTTLManager1 := newGCTTLManager(pdCli) 334 c.Assert(gcTTLManager1.serviceID, Matches, "lightning-.*") 335 gcTTLManager2 := newGCTTLManager(pdCli) 336 c.Assert(gcTTLManager2.serviceID, Matches, "lightning-.*") 337 338 c.Assert(gcTTLManager1.serviceID != gcTTLManager2.serviceID, IsTrue) 339 } 340 341 type mockResponse struct { 342 finished bool 343 data []byte 344 } 345 346 func (r *mockResponse) Next(ctx context.Context) (resultSubset kv.ResultSubset, err error) { 347 if r.finished { 348 return nil, nil 349 } 350 r.finished = true 351 return &mockResultSubset{data: r.data}, nil 352 } 353 func (r *mockResponse) Close() error { 354 return nil 355 } 356 357 type mockErrorResponse struct { 358 err string 359 } 360 361 func (r *mockErrorResponse) Next(ctx context.Context) (resultSubset kv.ResultSubset, err error) { 362 return nil, errors.New(r.err) 363 } 364 func (r *mockErrorResponse) Close() error { 365 return nil 366 } 367 368 type mockResultSubset struct { 369 data []byte 370 } 371 372 func (r *mockResultSubset) GetData() []byte { 373 return r.data 374 } 375 376 func (r *mockResultSubset) GetStartKey() kv.Key { 377 return []byte{} 378 } 379 func (r *mockResultSubset) MemSize() int64 { 380 return 0 381 } 382 func (r *mockResultSubset) RespTime() time.Duration { 383 return time.Millisecond 384 } 385 386 type mockChecksumKVClient struct { 387 kv.Client 388 checksum tipb.ChecksumResponse 389 respDur time.Duration 390 // return error count before return success 391 maxErrCount int 392 curErrCount int 393 } 394 395 // a mock client for checksum request 396 func (c *mockChecksumKVClient) Send(ctx context.Context, req *kv.Request, vars *kv.Variables, sessionMemTracker *memory.Tracker, enabledRateLimitAction bool) kv.Response { 397 if c.curErrCount < c.maxErrCount { 398 c.curErrCount++ 399 return &mockErrorResponse{err: "tikv timeout"} 400 } 401 data, _ := c.checksum.Marshal() 402 time.Sleep(c.respDur) 403 return &mockResponse{data: data} 404 }