github.com/pingcap/br@v5.3.0-alpha.0.20220125034240-ec59c7b6ce30+incompatible/pkg/lightning/restore/checksum_test.go (about) 1 package restore 2 3 import ( 4 "context" 5 "database/sql" 6 "fmt" 7 "sort" 8 "strings" 9 "sync" 10 "sync/atomic" 11 "time" 12 13 "github.com/pingcap/tidb/util/memory" 14 15 "github.com/pingcap/parser" 16 "github.com/pingcap/parser/ast" 17 "github.com/pingcap/tidb/ddl" 18 tmock "github.com/pingcap/tidb/util/mock" 19 "github.com/tikv/client-go/v2/oracle" 20 21 "github.com/pingcap/tidb/kv" 22 "github.com/pingcap/tipb/go-tipb" 23 24 pd "github.com/tikv/pd/client" 25 26 "github.com/DATA-DOG/go-sqlmock" 27 . "github.com/pingcap/check" 28 "github.com/pingcap/errors" 29 30 . "github.com/pingcap/br/pkg/lightning/checkpoints" 31 ) 32 33 var _ = Suite(&checksumSuite{}) 34 35 type checksumSuite struct{} 36 37 func MockDoChecksumCtx(db *sql.DB) context.Context { 38 ctx := context.Background() 39 manager := newTiDBChecksumExecutor(db) 40 return context.WithValue(ctx, &checksumManagerKey, manager) 41 } 42 43 func (s *checksumSuite) TestDoChecksum(c *C) { 44 db, mock, err := sqlmock.New() 45 c.Assert(err, IsNil) 46 47 mock.ExpectQuery("\\QSELECT VARIABLE_VALUE FROM mysql.tidb WHERE VARIABLE_NAME = 'tikv_gc_life_time'\\E"). 48 WillReturnRows(sqlmock.NewRows([]string{"VARIABLE_VALUE"}).AddRow("10m")) 49 mock.ExpectExec("\\QUPDATE mysql.tidb SET VARIABLE_VALUE = ? WHERE VARIABLE_NAME = 'tikv_gc_life_time'\\E"). 50 WithArgs("100h0m0s"). 51 WillReturnResult(sqlmock.NewResult(1, 1)) 52 mock.ExpectQuery("\\QADMIN CHECKSUM TABLE `test`.`t`\\E"). 53 WillReturnRows( 54 sqlmock.NewRows([]string{"Db_name", "Table_name", "Checksum_crc64_xor", "Total_kvs", "Total_bytes"}). 55 AddRow("test", "t", 8520875019404689597, 7296873, 357601387), 56 ) 57 mock.ExpectExec("\\QUPDATE mysql.tidb SET VARIABLE_VALUE = ? WHERE VARIABLE_NAME = 'tikv_gc_life_time'\\E"). 58 WithArgs("10m"). 59 WillReturnResult(sqlmock.NewResult(2, 1)) 60 mock.ExpectClose() 61 62 ctx := MockDoChecksumCtx(db) 63 checksum, err := DoChecksum(ctx, &TidbTableInfo{DB: "test", Name: "t"}) 64 c.Assert(err, IsNil) 65 c.Assert(*checksum, DeepEquals, RemoteChecksum{ 66 Schema: "test", 67 Table: "t", 68 Checksum: 8520875019404689597, 69 TotalKVs: 7296873, 70 TotalBytes: 357601387, 71 }) 72 73 c.Assert(db.Close(), IsNil) 74 c.Assert(mock.ExpectationsWereMet(), IsNil) 75 } 76 77 func (s *checksumSuite) TestDoChecksumParallel(c *C) { 78 db, mock, err := sqlmock.New() 79 c.Assert(err, IsNil) 80 81 mock.ExpectQuery("\\QSELECT VARIABLE_VALUE FROM mysql.tidb WHERE VARIABLE_NAME = 'tikv_gc_life_time'\\E"). 82 WillReturnRows(sqlmock.NewRows([]string{"VARIABLE_VALUE"}).AddRow("10m")) 83 mock.ExpectExec("\\QUPDATE mysql.tidb SET VARIABLE_VALUE = ? WHERE VARIABLE_NAME = 'tikv_gc_life_time'\\E"). 84 WithArgs("100h0m0s"). 85 WillReturnResult(sqlmock.NewResult(1, 1)) 86 for i := 0; i < 5; i++ { 87 mock.ExpectQuery("\\QADMIN CHECKSUM TABLE `test`.`t`\\E"). 88 WillDelayFor(100 * time.Millisecond). 89 WillReturnRows( 90 sqlmock.NewRows([]string{"Db_name", "Table_name", "Checksum_crc64_xor", "Total_kvs", "Total_bytes"}). 91 AddRow("test", "t", 8520875019404689597, 7296873, 357601387), 92 ) 93 } 94 mock.ExpectExec("\\QUPDATE mysql.tidb SET VARIABLE_VALUE = ? WHERE VARIABLE_NAME = 'tikv_gc_life_time'\\E"). 95 WithArgs("10m"). 96 WillReturnResult(sqlmock.NewResult(2, 1)) 97 mock.ExpectClose() 98 99 ctx := MockDoChecksumCtx(db) 100 101 // db.Close() will close all connections from its idle pool, set it 1 to expect one close 102 db.SetMaxIdleConns(1) 103 var wg sync.WaitGroup 104 wg.Add(5) 105 for i := 0; i < 5; i++ { 106 go func() { 107 defer wg.Done() 108 checksum, err := DoChecksum(ctx, &TidbTableInfo{DB: "test", Name: "t"}) 109 c.Assert(err, IsNil) 110 c.Assert(*checksum, DeepEquals, RemoteChecksum{ 111 Schema: "test", 112 Table: "t", 113 Checksum: 8520875019404689597, 114 TotalKVs: 7296873, 115 TotalBytes: 357601387, 116 }) 117 }() 118 } 119 wg.Wait() 120 121 c.Assert(db.Close(), IsNil) 122 c.Assert(mock.ExpectationsWereMet(), IsNil) 123 } 124 125 func (s *checksumSuite) TestIncreaseGCLifeTimeFail(c *C) { 126 db, mock, err := sqlmock.New() 127 c.Assert(err, IsNil) 128 129 for i := 0; i < 5; i++ { 130 mock.ExpectQuery("\\QSELECT VARIABLE_VALUE FROM mysql.tidb WHERE VARIABLE_NAME = 'tikv_gc_life_time'\\E"). 131 WillReturnRows(sqlmock.NewRows([]string{"VARIABLE_VALUE"}).AddRow("10m")) 132 mock.ExpectExec("\\QUPDATE mysql.tidb SET VARIABLE_VALUE = ? WHERE VARIABLE_NAME = 'tikv_gc_life_time'\\E"). 133 WithArgs("100h0m0s"). 134 WillReturnError(errors.Annotate(context.Canceled, "update gc error")) 135 } 136 // This recover GC Life Time SQL should not be executed in DoChecksum 137 mock.ExpectExec("\\QUPDATE mysql.tidb SET VARIABLE_VALUE = ? WHERE VARIABLE_NAME = 'tikv_gc_life_time'\\E"). 138 WithArgs("10m"). 139 WillReturnResult(sqlmock.NewResult(1, 1)) 140 mock.ExpectClose() 141 142 ctx := MockDoChecksumCtx(db) 143 var wg sync.WaitGroup 144 wg.Add(5) 145 for i := 0; i < 5; i++ { 146 go func() { 147 _, errChecksum := DoChecksum(ctx, &TidbTableInfo{DB: "test", Name: "t"}) 148 c.Assert(errChecksum, ErrorMatches, "update GC lifetime failed: update gc error: context canceled") 149 wg.Done() 150 }() 151 } 152 wg.Wait() 153 154 _, err = db.Exec("\\QUPDATE mysql.tidb SET VARIABLE_VALUE = ? WHERE VARIABLE_NAME = 'tikv_gc_life_time'\\E", "10m") 155 c.Assert(err, IsNil) 156 157 c.Assert(db.Close(), IsNil) 158 c.Assert(mock.ExpectationsWereMet(), IsNil) 159 } 160 161 func (s *checksumSuite) TestDoChecksumWithTikv(c *C) { 162 // set up mock tikv checksum manager 163 pdClient := &testPDClient{} 164 resp := tipb.ChecksumResponse{Checksum: 123, TotalKvs: 10, TotalBytes: 1000} 165 kvClient := &mockChecksumKVClient{checksum: resp, respDur: time.Second * 5} 166 167 // mock a table info 168 p := parser.New() 169 se := tmock.NewContext() 170 node, err := p.ParseOneStmt("CREATE TABLE `t1` (`c1` varchar(5) NOT NULL)", "utf8mb4", "utf8mb4_bin") 171 c.Assert(err, IsNil) 172 tableInfo, err := ddl.MockTableInfo(se, node.(*ast.CreateTableStmt), 999) 173 c.Assert(err, IsNil) 174 175 for i := 0; i <= maxErrorRetryCount; i++ { 176 kvClient.maxErrCount = i 177 kvClient.curErrCount = 0 178 checksumExec := &tikvChecksumManager{manager: newGCTTLManager(pdClient), client: kvClient} 179 startTS := oracle.ComposeTS(time.Now().Unix()*1000, 0) 180 ctx := context.WithValue(context.Background(), &checksumManagerKey, checksumExec) 181 _, err = DoChecksum(ctx, &TidbTableInfo{DB: "test", Name: "t", Core: tableInfo}) 182 // with max error retry < maxErrorRetryCount, the checksum can success 183 if i >= maxErrorRetryCount { 184 c.Assert(err, ErrorMatches, "tikv timeout") 185 continue 186 } else { 187 c.Assert(err, IsNil) 188 } 189 190 // after checksum, safepint should be small than start ts 191 ts := pdClient.currentSafePoint() 192 // 1ms for the schedule deviation 193 c.Assert(ts <= startTS+1, IsTrue) 194 c.Assert(atomic.LoadUint32(&checksumExec.manager.started) > 0, IsTrue) 195 } 196 } 197 198 func (s *checksumSuite) TestDoChecksumWithTikvErrRetry(c *C) { 199 } 200 201 func (s *checksumSuite) TestDoChecksumWithErrorAndLongOriginalLifetime(c *C) { 202 db, mock, err := sqlmock.New() 203 c.Assert(err, IsNil) 204 205 mock.ExpectQuery("\\QSELECT VARIABLE_VALUE FROM mysql.tidb WHERE VARIABLE_NAME = 'tikv_gc_life_time'\\E"). 206 WillReturnRows(sqlmock.NewRows([]string{"VARIABLE_VALUE"}).AddRow("300h")) 207 mock.ExpectQuery("\\QADMIN CHECKSUM TABLE `test`.`t`\\E"). 208 WillReturnError(errors.Annotate(context.Canceled, "mock syntax error")) 209 mock.ExpectExec("\\QUPDATE mysql.tidb SET VARIABLE_VALUE = ? WHERE VARIABLE_NAME = 'tikv_gc_life_time'\\E"). 210 WithArgs("300h"). 211 WillReturnResult(sqlmock.NewResult(1, 1)) 212 mock.ExpectClose() 213 214 ctx := MockDoChecksumCtx(db) 215 _, err = DoChecksum(ctx, &TidbTableInfo{DB: "test", Name: "t"}) 216 c.Assert(err, ErrorMatches, "compute remote checksum failed: mock syntax error.*") 217 218 c.Assert(db.Close(), IsNil) 219 c.Assert(mock.ExpectationsWereMet(), IsNil) 220 } 221 222 type safePointTTL struct { 223 safePoint uint64 224 // ttl is the last timestamp this safe point is valid 225 ttl int64 226 } 227 228 type testPDClient struct { 229 sync.Mutex 230 pd.Client 231 count int32 232 gcSafePoint []safePointTTL 233 } 234 235 func (c *testPDClient) currentSafePoint() uint64 { 236 ts := time.Now().Unix() 237 c.Lock() 238 defer c.Unlock() 239 for _, s := range c.gcSafePoint { 240 if s.ttl > ts { 241 return s.safePoint 242 } 243 } 244 return 0 245 } 246 247 func (c *testPDClient) GetTS(ctx context.Context) (int64, int64, error) { 248 return time.Now().Unix(), 0, nil 249 } 250 251 func (c *testPDClient) UpdateServiceGCSafePoint(ctx context.Context, serviceID string, ttl int64, safePoint uint64) (uint64, error) { 252 if !strings.HasPrefix(serviceID, "lightning") { 253 panic("service ID must start with 'lightning'") 254 } 255 atomic.AddInt32(&c.count, 1) 256 c.Lock() 257 idx := sort.Search(len(c.gcSafePoint), func(i int) bool { 258 return c.gcSafePoint[i].safePoint >= safePoint 259 }) 260 sp := c.gcSafePoint 261 ttlEnd := time.Now().Unix() + ttl 262 spTTL := safePointTTL{safePoint: safePoint, ttl: ttlEnd} 263 switch { 264 case idx >= len(sp): 265 c.gcSafePoint = append(c.gcSafePoint, spTTL) 266 case sp[idx].safePoint == safePoint: 267 if ttlEnd > sp[idx].ttl { 268 sp[idx].ttl = ttlEnd 269 } 270 default: 271 c.gcSafePoint = append(append(sp[:idx], spTTL), sp[idx:]...) 272 } 273 c.Unlock() 274 return c.currentSafePoint(), nil 275 } 276 277 func (s *checksumSuite) TestGcTTLManagerSingle(c *C) { 278 pdClient := &testPDClient{} 279 manager := newGCTTLManager(pdClient) 280 c.Assert(manager.serviceID, Not(Equals), "") 281 ctx, cancel := context.WithCancel(context.Background()) 282 defer cancel() 283 oldTTL := serviceSafePointTTL 284 // set serviceSafePointTTL to 3 second, so lightning will update it in each 1 seconds. 285 serviceSafePointTTL = 3 286 defer func() { 287 serviceSafePointTTL = oldTTL 288 }() 289 290 err := manager.addOneJob(ctx, "test", uint64(time.Now().Unix())) 291 c.Assert(err, IsNil) 292 293 time.Sleep(6*time.Second + 10*time.Millisecond) 294 295 // after 6 seconds, must at least update 5 times 296 val := atomic.LoadInt32(&pdClient.count) 297 c.Assert(val, GreaterEqual, int32(5)) 298 299 // after remove the job, there are no job remain, gc ttl needn't to be updated 300 manager.removeOneJob("test") 301 time.Sleep(10 * time.Millisecond) 302 val = atomic.LoadInt32(&pdClient.count) 303 time.Sleep(3*time.Second + 10*time.Millisecond) 304 c.Assert(atomic.LoadInt32(&pdClient.count), Equals, val) 305 } 306 307 func (s *checksumSuite) TestGcTTLManagerMulti(c *C) { 308 manager := newGCTTLManager(&testPDClient{}) 309 ctx := context.Background() 310 311 for i := uint64(1); i <= 5; i++ { 312 err := manager.addOneJob(ctx, fmt.Sprintf("test%d", i), i) 313 c.Assert(err, IsNil) 314 c.Assert(manager.currentTS, Equals, uint64(1)) 315 } 316 317 manager.removeOneJob("test2") 318 c.Assert(manager.currentTS, Equals, uint64(1)) 319 320 manager.removeOneJob("test1") 321 c.Assert(manager.currentTS, Equals, uint64(3)) 322 323 manager.removeOneJob("test3") 324 c.Assert(manager.currentTS, Equals, uint64(4)) 325 326 manager.removeOneJob("test4") 327 c.Assert(manager.currentTS, Equals, uint64(5)) 328 329 manager.removeOneJob("test5") 330 c.Assert(manager.currentTS, Equals, uint64(0)) 331 } 332 333 func (s *checksumSuite) TestPdServiceID(c *C) { 334 pdCli := &testPDClient{} 335 gcTTLManager1 := newGCTTLManager(pdCli) 336 c.Assert(gcTTLManager1.serviceID, Matches, "lightning-.*") 337 gcTTLManager2 := newGCTTLManager(pdCli) 338 c.Assert(gcTTLManager2.serviceID, Matches, "lightning-.*") 339 340 c.Assert(gcTTLManager1.serviceID != gcTTLManager2.serviceID, IsTrue) 341 } 342 343 type mockResponse struct { 344 finished bool 345 data []byte 346 } 347 348 func (r *mockResponse) Next(ctx context.Context) (resultSubset kv.ResultSubset, err error) { 349 if r.finished { 350 return nil, nil 351 } 352 r.finished = true 353 return &mockResultSubset{data: r.data}, nil 354 } 355 356 func (r *mockResponse) Close() error { 357 return nil 358 } 359 360 type mockErrorResponse struct { 361 err string 362 } 363 364 func (r *mockErrorResponse) Next(ctx context.Context) (resultSubset kv.ResultSubset, err error) { 365 return nil, errors.New(r.err) 366 } 367 368 func (r *mockErrorResponse) Close() error { 369 return nil 370 } 371 372 type mockResultSubset struct { 373 data []byte 374 } 375 376 func (r *mockResultSubset) GetData() []byte { 377 return r.data 378 } 379 380 func (r *mockResultSubset) GetStartKey() kv.Key { 381 return []byte{} 382 } 383 384 func (r *mockResultSubset) MemSize() int64 { 385 return 0 386 } 387 388 func (r *mockResultSubset) RespTime() time.Duration { 389 return time.Millisecond 390 } 391 392 type mockChecksumKVClient struct { 393 kv.Client 394 checksum tipb.ChecksumResponse 395 respDur time.Duration 396 // return error count before return success 397 maxErrCount int 398 curErrCount int 399 } 400 401 // a mock client for checksum request 402 func (c *mockChecksumKVClient) Send(ctx context.Context, req *kv.Request, vars interface{}, sessionMemTracker *memory.Tracker, enabledRateLimitAction bool) kv.Response { 403 if c.curErrCount < c.maxErrCount { 404 c.curErrCount++ 405 return &mockErrorResponse{err: "tikv timeout"} 406 } 407 data, _ := c.checksum.Marshal() 408 time.Sleep(c.respDur) 409 return &mockResponse{data: data} 410 }