github.com/pingcap/br@v5.3.0-alpha.0.20220125034240-ec59c7b6ce30+incompatible/pkg/lightning/backend/tidb/tidb.go (about) 1 // Copyright 2019 PingCAP, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package tidb 15 16 import ( 17 "context" 18 "database/sql" 19 "encoding/hex" 20 "fmt" 21 "strconv" 22 "strings" 23 "time" 24 25 "github.com/google/uuid" 26 "github.com/pingcap/errors" 27 "github.com/pingcap/failpoint" 28 "github.com/pingcap/parser/model" 29 "github.com/pingcap/parser/mysql" 30 "github.com/pingcap/tidb/sessionctx" 31 "github.com/pingcap/tidb/table" 32 "github.com/pingcap/tidb/types" 33 "go.uber.org/zap" 34 "go.uber.org/zap/zapcore" 35 36 "github.com/pingcap/br/pkg/lightning/backend" 37 "github.com/pingcap/br/pkg/lightning/backend/kv" 38 "github.com/pingcap/br/pkg/lightning/common" 39 "github.com/pingcap/br/pkg/lightning/config" 40 "github.com/pingcap/br/pkg/lightning/log" 41 "github.com/pingcap/br/pkg/lightning/verification" 42 "github.com/pingcap/br/pkg/redact" 43 "github.com/pingcap/br/pkg/version" 44 ) 45 46 var extraHandleTableColumn = &table.Column{ 47 ColumnInfo: kv.ExtraHandleColumnInfo, 48 GeneratedExpr: nil, 49 DefaultExpr: nil, 50 } 51 52 const ( 53 writeRowsMaxRetryTimes = 3 54 ) 55 56 type tidbRow string 57 58 type tidbRows []tidbRow 59 60 // MarshalLogArray implements the zapcore.ArrayMarshaler interface 61 func (rows tidbRows) MarshalLogArray(encoder zapcore.ArrayEncoder) error { 62 for _, r := range rows { 63 encoder.AppendString(redact.String(string(r))) 64 } 65 return nil 66 } 67 68 type tidbEncoder struct { 69 mode mysql.SQLMode 70 tbl table.Table 71 se sessionctx.Context 72 // the index of table columns for each data field. 73 // index == len(table.columns) means this field is `_tidb_rowid` 74 columnIdx []int 75 columnCnt int 76 } 77 78 type tidbBackend struct { 79 db *sql.DB 80 onDuplicate string 81 } 82 83 // NewTiDBBackend creates a new TiDB backend using the given database. 84 // 85 // The backend does not take ownership of `db`. Caller should close `db` 86 // manually after the backend expired. 87 func NewTiDBBackend(db *sql.DB, onDuplicate string) backend.Backend { 88 switch onDuplicate { 89 case config.ReplaceOnDup, config.IgnoreOnDup, config.ErrorOnDup: 90 default: 91 log.L().Warn("unsupported action on duplicate, overwrite with `replace`") 92 onDuplicate = config.ReplaceOnDup 93 } 94 return backend.MakeBackend(&tidbBackend{db: db, onDuplicate: onDuplicate}) 95 } 96 97 func (row tidbRow) Size() uint64 { 98 return uint64(len(row)) 99 } 100 101 func (row tidbRow) ClassifyAndAppend(data *kv.Rows, checksum *verification.KVChecksum, _ *kv.Rows, _ *verification.KVChecksum) { 102 rows := (*data).(tidbRows) 103 // Cannot do `rows := data.(*tidbRows); *rows = append(*rows, row)`. 104 //nolint:gocritic 105 *data = append(rows, row) 106 cs := verification.MakeKVChecksum(uint64(len(row)), 1, 0) 107 checksum.Add(&cs) 108 } 109 110 func (rows tidbRows) SplitIntoChunks(splitSize int) []kv.Rows { 111 if len(rows) == 0 { 112 return nil 113 } 114 115 res := make([]kv.Rows, 0, 1) 116 i := 0 117 cumSize := 0 118 119 for j, row := range rows { 120 if i < j && cumSize+len(row) > splitSize { 121 res = append(res, rows[i:j]) 122 i = j 123 cumSize = 0 124 } 125 cumSize += len(row) 126 } 127 128 return append(res, rows[i:]) 129 } 130 131 func (rows tidbRows) Clear() kv.Rows { 132 return rows[:0] 133 } 134 135 func (enc *tidbEncoder) appendSQLBytes(sb *strings.Builder, value []byte) { 136 sb.Grow(2 + len(value)) 137 sb.WriteByte('\'') 138 if enc.mode.HasNoBackslashEscapesMode() { 139 for _, b := range value { 140 if b == '\'' { 141 sb.WriteString(`''`) 142 } else { 143 sb.WriteByte(b) 144 } 145 } 146 } else { 147 for _, b := range value { 148 switch b { 149 case 0: 150 sb.WriteString(`\0`) 151 case '\b': 152 sb.WriteString(`\b`) 153 case '\n': 154 sb.WriteString(`\n`) 155 case '\r': 156 sb.WriteString(`\r`) 157 case '\t': 158 sb.WriteString(`\t`) 159 case 26: 160 sb.WriteString(`\Z`) 161 case '\'': 162 sb.WriteString(`''`) 163 case '\\': 164 sb.WriteString(`\\`) 165 default: 166 sb.WriteByte(b) 167 } 168 } 169 } 170 sb.WriteByte('\'') 171 } 172 173 // appendSQL appends the SQL representation of the Datum into the string builder. 174 // Note that we cannot use Datum.ToString since it doesn't perform SQL escaping. 175 func (enc *tidbEncoder) appendSQL(sb *strings.Builder, datum *types.Datum, _ *table.Column) error { 176 switch datum.Kind() { 177 case types.KindNull: 178 sb.WriteString("NULL") 179 180 case types.KindMinNotNull: 181 sb.WriteString("MINVALUE") 182 183 case types.KindMaxValue: 184 sb.WriteString("MAXVALUE") 185 186 case types.KindInt64: 187 // longest int64 = -9223372036854775808 which has 20 characters 188 var buffer [20]byte 189 value := strconv.AppendInt(buffer[:0], datum.GetInt64(), 10) 190 sb.Write(value) 191 192 case types.KindUint64, types.KindMysqlEnum, types.KindMysqlSet: 193 // longest uint64 = 18446744073709551615 which has 20 characters 194 var buffer [20]byte 195 value := strconv.AppendUint(buffer[:0], datum.GetUint64(), 10) 196 sb.Write(value) 197 198 case types.KindFloat32, types.KindFloat64: 199 // float64 has 16 digits of precision, so a buffer size of 32 is more than enough... 200 var buffer [32]byte 201 value := strconv.AppendFloat(buffer[:0], datum.GetFloat64(), 'g', -1, 64) 202 sb.Write(value) 203 case types.KindString: 204 // See: https://github.com/pingcap/tidb-lightning/issues/550 205 // if enc.mode.HasStrictMode() { 206 // d, err := table.CastValue(enc.se, *datum, col.ToInfo(), false, false) 207 // if err != nil { 208 // return errors.Trace(err) 209 // } 210 // datum = &d 211 // } 212 213 enc.appendSQLBytes(sb, datum.GetBytes()) 214 case types.KindBytes: 215 enc.appendSQLBytes(sb, datum.GetBytes()) 216 217 case types.KindMysqlJSON: 218 value, err := datum.GetMysqlJSON().MarshalJSON() 219 if err != nil { 220 return err 221 } 222 enc.appendSQLBytes(sb, value) 223 224 case types.KindBinaryLiteral: 225 value := datum.GetBinaryLiteral() 226 sb.Grow(3 + 2*len(value)) 227 sb.WriteString("x'") 228 if _, err := hex.NewEncoder(sb).Write(value); err != nil { 229 return errors.Trace(err) 230 } 231 sb.WriteByte('\'') 232 233 case types.KindMysqlBit: 234 var buffer [20]byte 235 intValue, err := datum.GetBinaryLiteral().ToInt(nil) 236 if err != nil { 237 return err 238 } 239 value := strconv.AppendUint(buffer[:0], intValue, 10) 240 sb.Write(value) 241 242 // time, duration, decimal 243 default: 244 value, err := datum.ToString() 245 if err != nil { 246 return err 247 } 248 sb.WriteByte('\'') 249 sb.WriteString(value) 250 sb.WriteByte('\'') 251 } 252 253 return nil 254 } 255 256 func (*tidbEncoder) Close() {} 257 258 func getColumnByIndex(cols []*table.Column, index int) *table.Column { 259 if index == len(cols) { 260 return extraHandleTableColumn 261 } 262 return cols[index] 263 } 264 265 func (enc *tidbEncoder) Encode(logger log.Logger, row []types.Datum, _ int64, columnPermutation []int, _ int64) (kv.Row, error) { 266 cols := enc.tbl.Cols() 267 268 if len(enc.columnIdx) == 0 { 269 columnCount := 0 270 columnIdx := make([]int, len(columnPermutation)) 271 for i, idx := range columnPermutation { 272 if idx >= 0 { 273 columnIdx[idx] = i 274 columnCount++ 275 } 276 } 277 enc.columnIdx = columnIdx 278 enc.columnCnt = columnCount 279 } 280 281 // TODO: since the column count doesn't exactly reflect the real column names, we only check the upper bound currently. 282 // See: tests/generated_columns/data/gencol.various_types.0.sql this sql has no columns, so encodeLoop will fill the 283 // column permutation with default, thus enc.columnCnt > len(row). 284 if len(row) > enc.columnCnt { 285 logger.Error("column count mismatch", zap.Ints("column_permutation", columnPermutation), 286 zap.Array("data", kv.RowArrayMarshaler(row))) 287 return nil, errors.Errorf("column count mismatch, expected %d, got %d", enc.columnCnt, len(row)) 288 } 289 290 var encoded strings.Builder 291 encoded.Grow(8 * len(row)) 292 encoded.WriteByte('(') 293 for i, field := range row { 294 if i != 0 { 295 encoded.WriteByte(',') 296 } 297 datum := field 298 if err := enc.appendSQL(&encoded, &datum, getColumnByIndex(cols, enc.columnIdx[i])); err != nil { 299 logger.Error("tidb encode failed", 300 zap.Array("original", kv.RowArrayMarshaler(row)), 301 zap.Int("originalCol", i), 302 log.ShortError(err), 303 ) 304 return nil, err 305 } 306 } 307 encoded.WriteByte(')') 308 return tidbRow(encoded.String()), nil 309 } 310 311 func (be *tidbBackend) Close() { 312 // *Not* going to close `be.db`. The db object is normally borrowed from a 313 // TidbManager, so we let the manager to close it. 314 } 315 316 func (be *tidbBackend) MakeEmptyRows() kv.Rows { 317 return tidbRows(nil) 318 } 319 320 func (be *tidbBackend) RetryImportDelay() time.Duration { 321 return 0 322 } 323 324 func (be *tidbBackend) MaxChunkSize() int { 325 failpoint.Inject("FailIfImportedSomeRows", func() { 326 failpoint.Return(1) 327 }) 328 return 1048576 329 } 330 331 func (be *tidbBackend) ShouldPostProcess() bool { 332 return true 333 } 334 335 func (be *tidbBackend) CheckRequirements(ctx context.Context, _ *backend.CheckCtx) error { 336 log.L().Info("skipping check requirements for tidb backend") 337 return nil 338 } 339 340 func (be *tidbBackend) NewEncoder(tbl table.Table, options *kv.SessionOptions) (kv.Encoder, error) { 341 se := kv.NewSession(options) 342 if options.SQLMode.HasStrictMode() { 343 se.GetSessionVars().SkipUTF8Check = false 344 se.GetSessionVars().SkipASCIICheck = false 345 } 346 347 return &tidbEncoder{mode: options.SQLMode, tbl: tbl, se: se}, nil 348 } 349 350 func (be *tidbBackend) OpenEngine(context.Context, *backend.EngineConfig, uuid.UUID) error { 351 return nil 352 } 353 354 func (be *tidbBackend) CloseEngine(context.Context, *backend.EngineConfig, uuid.UUID) error { 355 return nil 356 } 357 358 func (be *tidbBackend) CleanupEngine(context.Context, uuid.UUID) error { 359 return nil 360 } 361 362 func (be *tidbBackend) CollectLocalDuplicateRows(ctx context.Context, tbl table.Table) error { 363 panic("Unsupported Operation") 364 } 365 366 func (be *tidbBackend) CollectRemoteDuplicateRows(ctx context.Context, tbl table.Table) error { 367 panic("Unsupported Operation") 368 } 369 370 func (be *tidbBackend) ImportEngine(context.Context, uuid.UUID) error { 371 return nil 372 } 373 374 func (be *tidbBackend) WriteRows(ctx context.Context, _ uuid.UUID, tableName string, columnNames []string, rows kv.Rows) error { 375 var err error 376 outside: 377 for _, r := range rows.SplitIntoChunks(be.MaxChunkSize()) { 378 for i := 0; i < writeRowsMaxRetryTimes; i++ { 379 err = be.WriteRowsToDB(ctx, tableName, columnNames, r) 380 switch { 381 case err == nil: 382 continue outside 383 case common.IsRetryableError(err): 384 // retry next loop 385 default: 386 return err 387 } 388 } 389 return errors.Annotatef(err, "[%s] write rows reach max retry %d and still failed", tableName, writeRowsMaxRetryTimes) 390 } 391 return nil 392 } 393 394 func (be *tidbBackend) WriteRowsToDB(ctx context.Context, tableName string, columnNames []string, r kv.Rows) error { 395 rows := r.(tidbRows) 396 if len(rows) == 0 { 397 return nil 398 } 399 400 var insertStmt strings.Builder 401 switch be.onDuplicate { 402 case config.ReplaceOnDup: 403 insertStmt.WriteString("REPLACE INTO ") 404 case config.IgnoreOnDup: 405 insertStmt.WriteString("INSERT IGNORE INTO ") 406 case config.ErrorOnDup: 407 insertStmt.WriteString("INSERT INTO ") 408 } 409 410 insertStmt.WriteString(tableName) 411 if len(columnNames) > 0 { 412 insertStmt.WriteByte('(') 413 for i, colName := range columnNames { 414 if i != 0 { 415 insertStmt.WriteByte(',') 416 } 417 common.WriteMySQLIdentifier(&insertStmt, colName) 418 } 419 insertStmt.WriteByte(')') 420 } 421 insertStmt.WriteString(" VALUES") 422 423 // Note: we are not going to do interpolation (prepared statements) to avoid 424 // complication arise from data length overflow of BIT and BINARY columns 425 426 for i, row := range rows { 427 if i != 0 { 428 insertStmt.WriteByte(',') 429 } 430 insertStmt.WriteString(string(row)) 431 } 432 433 // Retry will be done externally, so we're not going to retry here. 434 _, err := be.db.ExecContext(ctx, insertStmt.String()) 435 if err != nil && !common.IsContextCanceledError(err) { 436 log.L().Error("execute statement failed", zap.String("stmt", redact.String(insertStmt.String())), 437 zap.Array("rows", rows), zap.Error(err)) 438 } 439 failpoint.Inject("FailIfImportedSomeRows", func() { 440 panic("forcing failure due to FailIfImportedSomeRows, before saving checkpoint") 441 }) 442 return errors.Trace(err) 443 } 444 445 //nolint:nakedret // TODO: refactor 446 func (be *tidbBackend) FetchRemoteTableModels(ctx context.Context, schemaName string) (tables []*model.TableInfo, err error) { 447 s := common.SQLWithRetry{ 448 DB: be.db, 449 Logger: log.L(), 450 } 451 452 err = s.Transact(ctx, "fetch table columns", func(c context.Context, tx *sql.Tx) error { 453 var versionStr string 454 if err = tx.QueryRowContext(ctx, "SELECT version()").Scan(&versionStr); err != nil { 455 return err 456 } 457 tidbVersion, err := version.ExtractTiDBVersion(versionStr) 458 if err != nil { 459 return err 460 } 461 462 rows, e := tx.Query(` 463 SELECT table_name, column_name, column_type, extra 464 FROM information_schema.columns 465 WHERE table_schema = ? 466 ORDER BY table_name, ordinal_position; 467 `, schemaName) 468 if e != nil { 469 return e 470 } 471 defer rows.Close() 472 473 var ( 474 curTableName string 475 curColOffset int 476 curTable *model.TableInfo 477 ) 478 for rows.Next() { 479 var tableName, columnName, columnType, columnExtra string 480 if e := rows.Scan(&tableName, &columnName, &columnType, &columnExtra); e != nil { 481 return e 482 } 483 if tableName != curTableName { 484 curTable = &model.TableInfo{ 485 Name: model.NewCIStr(tableName), 486 State: model.StatePublic, 487 PKIsHandle: true, 488 } 489 tables = append(tables, curTable) 490 curTableName = tableName 491 curColOffset = 0 492 } 493 494 // see: https://github.com/pingcap/parser/blob/3b2fb4b41d73710bc6c4e1f4e8679d8be6a4863e/types/field_type.go#L185-L191 495 var flag uint 496 if strings.HasSuffix(columnType, "unsigned") { 497 flag |= mysql.UnsignedFlag 498 } 499 if strings.Contains(columnExtra, "auto_increment") { 500 flag |= mysql.AutoIncrementFlag 501 } 502 curTable.Columns = append(curTable.Columns, &model.ColumnInfo{ 503 Name: model.NewCIStr(columnName), 504 Offset: curColOffset, 505 State: model.StatePublic, 506 FieldType: types.FieldType{ 507 Flag: flag, 508 }, 509 }) 510 curColOffset++ 511 } 512 if rows.Err() != nil { 513 return rows.Err() 514 } 515 // shard_row_id/auto random is only available after tidb v4.0.0 516 // `show table next_row_id` is also not available before tidb v4.0.0 517 if tidbVersion.Major < 4 { 518 return nil 519 } 520 521 // init auto id column for each table 522 for _, tbl := range tables { 523 tblName := common.UniqueTable(schemaName, tbl.Name.O) 524 autoIDInfos, err := FetchTableAutoIDInfos(ctx, tx, tblName) 525 if err != nil { 526 return errors.Trace(err) 527 } 528 for _, info := range autoIDInfos { 529 for _, col := range tbl.Columns { 530 if col.Name.O == info.Column { 531 switch info.Type { 532 case "AUTO_INCREMENT": 533 col.Flag |= mysql.AutoIncrementFlag 534 case "AUTO_RANDOM": 535 col.Flag |= mysql.PriKeyFlag 536 tbl.PKIsHandle = true 537 // set a stub here, since we don't really need the real value 538 tbl.AutoRandomBits = 1 539 } 540 } 541 } 542 } 543 544 } 545 return nil 546 }) 547 return 548 } 549 550 func (be *tidbBackend) EngineFileSizes() []backend.EngineFileSize { 551 return nil 552 } 553 554 func (be *tidbBackend) FlushEngine(context.Context, uuid.UUID) error { 555 return nil 556 } 557 558 func (be *tidbBackend) FlushAllEngines(context.Context) error { 559 return nil 560 } 561 562 func (be *tidbBackend) ResetEngine(context.Context, uuid.UUID) error { 563 return errors.New("cannot reset an engine in TiDB backend") 564 } 565 566 func (be *tidbBackend) LocalWriter( 567 ctx context.Context, 568 cfg *backend.LocalWriterConfig, 569 engineUUID uuid.UUID, 570 ) (backend.EngineWriter, error) { 571 return &Writer{be: be, engineUUID: engineUUID}, nil 572 } 573 574 type Writer struct { 575 be *tidbBackend 576 engineUUID uuid.UUID 577 } 578 579 func (w *Writer) Close(ctx context.Context) (backend.ChunkFlushStatus, error) { 580 return nil, nil 581 } 582 583 func (w *Writer) AppendRows(ctx context.Context, tableName string, columnNames []string, rows kv.Rows) error { 584 return w.be.WriteRows(ctx, w.engineUUID, tableName, columnNames, rows) 585 } 586 587 func (w *Writer) IsSynced() bool { 588 return true 589 } 590 591 type TableAutoIDInfo struct { 592 Column string 593 NextID int64 594 Type string 595 } 596 597 func FetchTableAutoIDInfos(ctx context.Context, exec common.QueryExecutor, tableName string) ([]*TableAutoIDInfo, error) { 598 rows, e := exec.QueryContext(ctx, fmt.Sprintf("SHOW TABLE %s NEXT_ROW_ID", tableName)) 599 if e != nil { 600 return nil, errors.Trace(e) 601 } 602 var autoIDInfos []*TableAutoIDInfo 603 for rows.Next() { 604 var ( 605 dbName, tblName, columnName, idType string 606 nextID int64 607 ) 608 columns, err := rows.Columns() 609 if err != nil { 610 return nil, errors.Trace(err) 611 } 612 613 //+--------------+------------+-------------+--------------------+----------------+ 614 //| DB_NAME | TABLE_NAME | COLUMN_NAME | NEXT_GLOBAL_ROW_ID | ID_TYPE | 615 //+--------------+------------+-------------+--------------------+----------------+ 616 //| testsysbench | t | _tidb_rowid | 1 | AUTO_INCREMENT | 617 //+--------------+------------+-------------+--------------------+----------------+ 618 619 // if columns length is 4, it doesn't contains the last column `ID_TYPE`, and it will always be 'AUTO_INCREMENT' 620 // for v4.0.0~v4.0.2 show table t next_row_id only returns 4 columns. 621 if len(columns) == 4 { 622 err = rows.Scan(&dbName, &tblName, &columnName, &nextID) 623 idType = "AUTO_INCREMENT" 624 } else { 625 err = rows.Scan(&dbName, &tblName, &columnName, &nextID, &idType) 626 } 627 if err != nil { 628 return nil, errors.Trace(err) 629 } 630 autoIDInfos = append(autoIDInfos, &TableAutoIDInfo{ 631 Column: columnName, 632 NextID: nextID, 633 Type: idType, 634 }) 635 } 636 // Defer in for-loop would be costly, anyway, we don't need those rows after this turn of iteration. 637 //nolint:sqlclosecheck 638 if err := rows.Close(); err != nil { 639 return nil, errors.Trace(err) 640 } 641 if rows.Err() != nil { 642 return nil, errors.Trace(rows.Err()) 643 } 644 return autoIDInfos, nil 645 }