github.com/pingcap/br@v5.3.0-alpha.0.20220125034240-ec59c7b6ce30+incompatible/pkg/lightning/backend/kv/sql2kv.go (about) 1 // Copyright 2019 PingCAP, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 // TODO combine with the pkg/kv package outside. 15 16 package kv 17 18 import ( 19 "fmt" 20 "math" 21 "math/rand" 22 "sort" 23 24 "github.com/pingcap/errors" 25 "github.com/pingcap/parser/model" 26 "github.com/pingcap/parser/mysql" 27 "github.com/pingcap/tidb/expression" 28 "github.com/pingcap/tidb/meta/autoid" 29 "github.com/pingcap/tidb/sessionctx/variable" 30 "github.com/pingcap/tidb/table" 31 "github.com/pingcap/tidb/table/tables" 32 "github.com/pingcap/tidb/tablecodec" 33 "github.com/pingcap/tidb/types" 34 "github.com/pingcap/tidb/util/chunk" 35 "go.uber.org/zap" 36 "go.uber.org/zap/zapcore" 37 38 // Import tidb/planner/core to initialize expression.RewriteAstExpr 39 _ "github.com/pingcap/tidb/planner/core" 40 41 "github.com/pingcap/br/pkg/lightning/common" 42 "github.com/pingcap/br/pkg/lightning/log" 43 "github.com/pingcap/br/pkg/lightning/metric" 44 "github.com/pingcap/br/pkg/lightning/verification" 45 "github.com/pingcap/br/pkg/logutil" 46 "github.com/pingcap/br/pkg/redact" 47 ) 48 49 var ExtraHandleColumnInfo = model.NewExtraHandleColInfo() 50 51 type genCol struct { 52 index int 53 expr expression.Expression 54 } 55 56 type autoIDConverter func(int64) int64 57 58 type tableKVEncoder struct { 59 tbl table.Table 60 se *session 61 recordCache []types.Datum 62 genCols []genCol 63 // convert auto id for shard rowid or auto random id base on row id generated by lightning 64 autoIDFn autoIDConverter 65 } 66 67 func NewTableKVEncoder(tbl table.Table, options *SessionOptions) (Encoder, error) { 68 metric.KvEncoderCounter.WithLabelValues("open").Inc() 69 meta := tbl.Meta() 70 cols := tbl.Cols() 71 se := newSession(options) 72 // Set CommonAddRecordCtx to session to reuse the slices and BufStore in AddRecord 73 recordCtx := tables.NewCommonAddRecordCtx(len(cols)) 74 tables.SetAddRecordCtx(se, recordCtx) 75 76 autoIDFn := func(id int64) int64 { return id } 77 if meta.PKIsHandle && meta.ContainsAutoRandomBits() { 78 for _, col := range cols { 79 if mysql.HasPriKeyFlag(col.Flag) { 80 incrementalBits := autoRandomIncrementBits(col, int(meta.AutoRandomBits)) 81 autoRandomBits := rand.New(rand.NewSource(options.AutoRandomSeed)).Int63n(1<<meta.AutoRandomBits) << incrementalBits 82 autoIDFn = func(id int64) int64 { 83 return autoRandomBits | id 84 } 85 break 86 } 87 } 88 } else if meta.ShardRowIDBits > 0 { 89 rd := rand.New(rand.NewSource(options.AutoRandomSeed)) 90 mask := int64(1)<<meta.ShardRowIDBits - 1 91 shift := autoid.RowIDBitLength - meta.ShardRowIDBits - 1 92 autoIDFn = func(id int64) int64 { 93 rd.Seed(id) 94 shardBits := (int64(rd.Uint32()) & mask) << shift 95 return shardBits | id 96 } 97 } 98 99 // collect expressions for evaluating stored generated columns 100 genCols, err := collectGeneratedColumns(se, meta, cols) 101 if err != nil { 102 return nil, errors.Annotate(err, "failed to parse generated column expressions") 103 } 104 105 return &tableKVEncoder{ 106 tbl: tbl, 107 se: se, 108 genCols: genCols, 109 autoIDFn: autoIDFn, 110 }, nil 111 } 112 113 func autoRandomIncrementBits(col *table.Column, randomBits int) int { 114 typeBitsLength := mysql.DefaultLengthOfMysqlTypes[col.Tp] * 8 115 incrementalBits := typeBitsLength - randomBits 116 hasSignBit := !mysql.HasUnsignedFlag(col.Flag) 117 if hasSignBit { 118 incrementalBits-- 119 } 120 return incrementalBits 121 } 122 123 // collectGeneratedColumns collects all expressions required to evaluate the 124 // results of all generated columns. The returning slice is in evaluation order. 125 func collectGeneratedColumns(se *session, meta *model.TableInfo, cols []*table.Column) ([]genCol, error) { 126 hasGenCol := false 127 for _, col := range cols { 128 if col.GeneratedExpr != nil { 129 hasGenCol = true 130 break 131 } 132 } 133 134 if !hasGenCol { 135 return nil, nil 136 } 137 138 // the expression rewriter requires a non-nil TxnCtx. 139 se.vars.TxnCtx = new(variable.TransactionContext) 140 defer func() { 141 se.vars.TxnCtx = nil 142 }() 143 144 // not using TableInfo2SchemaAndNames to avoid parsing all virtual generated columns again. 145 exprColumns := make([]*expression.Column, 0, len(cols)) 146 names := make(types.NameSlice, 0, len(cols)) 147 for i, col := range cols { 148 names = append(names, &types.FieldName{ 149 OrigTblName: meta.Name, 150 OrigColName: col.Name, 151 TblName: meta.Name, 152 ColName: col.Name, 153 }) 154 exprColumns = append(exprColumns, &expression.Column{ 155 RetType: col.FieldType.Clone(), 156 ID: col.ID, 157 UniqueID: int64(i), 158 Index: col.Offset, 159 OrigName: names[i].String(), 160 IsHidden: col.Hidden, 161 }) 162 } 163 schema := expression.NewSchema(exprColumns...) 164 165 // as long as we have a stored generated column, all columns it referred to must be evaluated as well. 166 // for simplicity we just evaluate all generated columns (virtual or not) before the last stored one. 167 var genCols []genCol 168 for i, col := range cols { 169 if col.GeneratedExpr != nil { 170 expr, err := expression.RewriteAstExpr(se, col.GeneratedExpr, schema, names) 171 if err != nil { 172 return nil, err 173 } 174 genCols = append(genCols, genCol{ 175 index: i, 176 expr: expr, 177 }) 178 } 179 } 180 181 // order the result by column offset so they match the evaluation order. 182 sort.Slice(genCols, func(i, j int) bool { 183 return cols[genCols[i].index].Offset < cols[genCols[j].index].Offset 184 }) 185 return genCols, nil 186 } 187 188 func (kvcodec *tableKVEncoder) Close() { 189 kvcodec.se.Close() 190 metric.KvEncoderCounter.WithLabelValues("closed").Inc() 191 } 192 193 // RowArrayMarshaler wraps a slice of types.Datum for logging the content into zap. 194 type RowArrayMarshaler []types.Datum 195 196 var kindStr = [...]string{ 197 types.KindNull: "null", 198 types.KindInt64: "int64", 199 types.KindUint64: "uint64", 200 types.KindFloat32: "float32", 201 types.KindFloat64: "float64", 202 types.KindString: "string", 203 types.KindBytes: "bytes", 204 types.KindBinaryLiteral: "binary", 205 types.KindMysqlDecimal: "decimal", 206 types.KindMysqlDuration: "duration", 207 types.KindMysqlEnum: "enum", 208 types.KindMysqlBit: "bit", 209 types.KindMysqlSet: "set", 210 types.KindMysqlTime: "time", 211 types.KindInterface: "interface", 212 types.KindMinNotNull: "min", 213 types.KindMaxValue: "max", 214 types.KindRaw: "raw", 215 types.KindMysqlJSON: "json", 216 } 217 218 // MarshalLogArray implements the zapcore.ArrayMarshaler interface 219 func (row RowArrayMarshaler) MarshalLogArray(encoder zapcore.ArrayEncoder) error { 220 for _, datum := range row { 221 kind := datum.Kind() 222 var str string 223 var err error 224 switch kind { 225 case types.KindNull: 226 str = "NULL" 227 case types.KindMinNotNull: 228 str = "-inf" 229 case types.KindMaxValue: 230 str = "+inf" 231 default: 232 str, err = datum.ToString() 233 if err != nil { 234 return err 235 } 236 } 237 if err := encoder.AppendObject(zapcore.ObjectMarshalerFunc(func(enc zapcore.ObjectEncoder) error { 238 enc.AddString("kind", kindStr[kind]) 239 enc.AddString("val", redact.String(str)) 240 return nil 241 })); err != nil { 242 return err 243 } 244 } 245 return nil 246 } 247 248 func logKVConvertFailed(logger log.Logger, row []types.Datum, j int, colInfo *model.ColumnInfo, err error) error { 249 var original types.Datum 250 if 0 <= j && j < len(row) { 251 original = row[j] 252 row = row[j : j+1] 253 } 254 255 logger.Error("kv convert failed", 256 zap.Array("original", RowArrayMarshaler(row)), 257 zap.Int("originalCol", j), 258 zap.String("colName", colInfo.Name.O), 259 zap.Stringer("colType", &colInfo.FieldType), 260 log.ShortError(err), 261 ) 262 263 log.L().Error("failed to covert kv value", logutil.RedactAny("origVal", original.GetValue()), 264 zap.Stringer("fieldType", &colInfo.FieldType), zap.String("column", colInfo.Name.O), 265 zap.Int("columnID", j+1)) 266 return errors.Annotatef( 267 err, 268 "failed to cast value as %s for column `%s` (#%d)", &colInfo.FieldType, colInfo.Name.O, j+1, 269 ) 270 } 271 272 func logEvalGenExprFailed(logger log.Logger, row []types.Datum, colInfo *model.ColumnInfo, err error) error { 273 logger.Error("kv convert failed: cannot evaluate generated column expression", 274 zap.Array("original", RowArrayMarshaler(row)), 275 zap.String("colName", colInfo.Name.O), 276 log.ShortError(err), 277 ) 278 279 return errors.Annotatef( 280 err, 281 "failed to evaluate generated column expression for column `%s`", 282 colInfo.Name.O, 283 ) 284 } 285 286 type KvPairs struct { 287 pairs []common.KvPair 288 bytesBuf *bytesBuf 289 memBuf *kvMemBuf 290 } 291 292 // MakeRowsFromKvPairs converts a KvPair slice into a Rows instance. This is 293 // mainly used for testing only. The resulting Rows instance should only be used 294 // for the importer backend. 295 func MakeRowsFromKvPairs(pairs []common.KvPair) Rows { 296 return &KvPairs{pairs: pairs} 297 } 298 299 // MakeRowFromKvPairs converts a KvPair slice into a Row instance. This is 300 // mainly used for testing only. The resulting Row instance should only be used 301 // for the importer backend. 302 func MakeRowFromKvPairs(pairs []common.KvPair) Row { 303 return &KvPairs{pairs: pairs} 304 } 305 306 // KvPairsFromRows converts a Rows instance constructed from MakeRowsFromKvPairs 307 // back into a slice of KvPair. This method panics if the Rows is not 308 // constructed in such way. 309 // nolint:golint // kv.KvPairsFromRows sounds good. 310 func KvPairsFromRows(rows Rows) []common.KvPair { 311 return rows.(*KvPairs).pairs 312 } 313 314 // Encode a row of data into KV pairs. 315 // 316 // See comments in `(*TableRestore).initializeColumns` for the meaning of the 317 // `columnPermutation` parameter. 318 func (kvcodec *tableKVEncoder) Encode( 319 logger log.Logger, 320 row []types.Datum, 321 rowID int64, 322 columnPermutation []int, 323 offset int64, 324 ) (Row, error) { 325 cols := kvcodec.tbl.Cols() 326 327 var value types.Datum 328 var err error 329 //nolint:prealloc // This is a placeholder. 330 var record []types.Datum 331 332 if kvcodec.recordCache != nil { 333 record = kvcodec.recordCache 334 } else { 335 record = make([]types.Datum, 0, len(cols)+1) 336 } 337 338 meta := kvcodec.tbl.Meta() 339 isAutoRandom := meta.PKIsHandle && meta.ContainsAutoRandomBits() 340 for i, col := range cols { 341 j := columnPermutation[i] 342 isAutoIncCol := mysql.HasAutoIncrementFlag(col.Flag) 343 isPk := mysql.HasPriKeyFlag(col.Flag) 344 switch { 345 case j >= 0 && j < len(row): 346 value, err = table.CastValue(kvcodec.se, row[j], col.ToInfo(), false, false) 347 if err == nil { 348 err = col.HandleBadNull(&value, kvcodec.se.vars.StmtCtx) 349 } 350 case isAutoIncCol: 351 // we still need a conversion, e.g. to catch overflow with a TINYINT column. 352 value, err = table.CastValue(kvcodec.se, types.NewIntDatum(rowID), col.ToInfo(), false, false) 353 case isAutoRandom && isPk: 354 var val types.Datum 355 realRowID := kvcodec.autoIDFn(rowID) 356 if mysql.HasUnsignedFlag(col.Flag) { 357 val = types.NewUintDatum(uint64(realRowID)) 358 } else { 359 val = types.NewIntDatum(realRowID) 360 } 361 value, err = table.CastValue(kvcodec.se, val, col.ToInfo(), false, false) 362 case col.IsGenerated(): 363 // inject some dummy value for gen col so that MutRowFromDatums below sees a real value instead of nil. 364 // if MutRowFromDatums sees a nil it won't initialize the underlying storage and cause SetDatum to panic. 365 value = types.GetMinValue(&col.FieldType) 366 default: 367 value, err = table.GetColDefaultValue(kvcodec.se, col.ToInfo()) 368 } 369 if err != nil { 370 return nil, logKVConvertFailed(logger, row, j, col.ToInfo(), err) 371 } 372 373 record = append(record, value) 374 375 if isAutoRandom && isPk { 376 incrementalBits := autoRandomIncrementBits(col, int(meta.AutoRandomBits)) 377 if err := kvcodec.tbl.RebaseAutoID(kvcodec.se, value.GetInt64()&((1<<incrementalBits)-1), false, autoid.AutoRandomType); err != nil { 378 return nil, errors.Trace(err) 379 } 380 } 381 if isAutoIncCol { 382 if err := kvcodec.tbl.RebaseAutoID(kvcodec.se, getAutoRecordID(value, &col.FieldType), false, autoid.AutoIncrementType); err != nil { 383 return nil, errors.Trace(err) 384 } 385 } 386 } 387 388 if common.TableHasAutoRowID(meta) { 389 rowValue := rowID 390 j := columnPermutation[len(cols)] 391 if j >= 0 && j < len(row) { 392 value, err = table.CastValue(kvcodec.se, row[j], ExtraHandleColumnInfo, false, false) 393 rowValue = value.GetInt64() 394 } else { 395 rowID := kvcodec.autoIDFn(rowID) 396 value, err = types.NewIntDatum(rowID), nil 397 } 398 if err != nil { 399 return nil, logKVConvertFailed(logger, row, j, ExtraHandleColumnInfo, err) 400 } 401 record = append(record, value) 402 if err := kvcodec.tbl.RebaseAutoID(kvcodec.se, rowValue, false, autoid.RowIDAllocType); err != nil { 403 return nil, errors.Trace(err) 404 } 405 } 406 407 if len(kvcodec.genCols) > 0 { 408 mutRow := chunk.MutRowFromDatums(record) 409 for _, gc := range kvcodec.genCols { 410 col := cols[gc.index].ToInfo() 411 evaluated, err := gc.expr.Eval(mutRow.ToRow()) 412 if err != nil { 413 return nil, logEvalGenExprFailed(logger, row, col, err) 414 } 415 value, err := table.CastValue(kvcodec.se, evaluated, col, false, false) 416 if err != nil { 417 return nil, logEvalGenExprFailed(logger, row, col, err) 418 } 419 mutRow.SetDatum(gc.index, value) 420 record[gc.index] = value 421 } 422 } 423 424 _, err = kvcodec.tbl.AddRecord(kvcodec.se, record) 425 if err != nil { 426 logger.Error("kv encode failed", 427 zap.Array("originalRow", RowArrayMarshaler(row)), 428 zap.Array("convertedRow", RowArrayMarshaler(record)), 429 log.ShortError(err), 430 ) 431 return nil, errors.Trace(err) 432 } 433 kvPairs := kvcodec.se.takeKvPairs() 434 for i := 0; i < len(kvPairs.pairs); i++ { 435 kvPairs.pairs[i].RowID = rowID 436 kvPairs.pairs[i].Offset = offset 437 } 438 kvcodec.recordCache = record[:0] 439 return kvPairs, nil 440 } 441 442 // get record value for auto-increment field 443 // 444 // See: https://github.com/pingcap/tidb/blob/47f0f15b14ed54fc2222f3e304e29df7b05e6805/executor/insert_common.go#L781-L852 445 func getAutoRecordID(d types.Datum, target *types.FieldType) int64 { 446 switch target.Tp { 447 case mysql.TypeFloat, mysql.TypeDouble: 448 return int64(math.Round(d.GetFloat64())) 449 case mysql.TypeTiny, mysql.TypeShort, mysql.TypeInt24, mysql.TypeLong, mysql.TypeLonglong: 450 return d.GetInt64() 451 default: 452 panic(fmt.Sprintf("unsupported auto-increment field type '%d'", target.Tp)) 453 } 454 } 455 456 func (kvs *KvPairs) Size() uint64 { 457 size := uint64(0) 458 for _, kv := range kvs.pairs { 459 size += uint64(len(kv.Key) + len(kv.Val)) 460 } 461 return size 462 } 463 464 func (kvs *KvPairs) ClassifyAndAppend( 465 data *Rows, 466 dataChecksum *verification.KVChecksum, 467 indices *Rows, 468 indexChecksum *verification.KVChecksum, 469 ) { 470 dataKVs := (*data).(*KvPairs) 471 indexKVs := (*indices).(*KvPairs) 472 473 for _, kv := range kvs.pairs { 474 if kv.Key[tablecodec.TableSplitKeyLen+1] == 'r' { 475 dataKVs.pairs = append(dataKVs.pairs, kv) 476 dataChecksum.UpdateOne(kv) 477 } else { 478 indexKVs.pairs = append(indexKVs.pairs, kv) 479 indexChecksum.UpdateOne(kv) 480 } 481 } 482 483 // the related buf is shared, so we only need to set it into one of the kvs so it can be released 484 if kvs.bytesBuf != nil { 485 dataKVs.bytesBuf = kvs.bytesBuf 486 dataKVs.memBuf = kvs.memBuf 487 kvs.bytesBuf = nil 488 kvs.memBuf = nil 489 } 490 491 *data = dataKVs 492 *indices = indexKVs 493 } 494 495 func (kvs *KvPairs) SplitIntoChunks(splitSize int) []Rows { 496 if len(kvs.pairs) == 0 { 497 return nil 498 } 499 500 res := make([]Rows, 0, 1) 501 i := 0 502 cumSize := 0 503 for j, pair := range kvs.pairs { 504 size := len(pair.Key) + len(pair.Val) 505 if i < j && cumSize+size > splitSize { 506 res = append(res, &KvPairs{pairs: kvs.pairs[i:j]}) 507 i = j 508 cumSize = 0 509 } 510 cumSize += size 511 } 512 513 if i == 0 { 514 res = append(res, kvs) 515 } else { 516 res = append(res, &KvPairs{ 517 pairs: kvs.pairs[i:], 518 bytesBuf: kvs.bytesBuf, 519 memBuf: kvs.memBuf, 520 }) 521 } 522 return res 523 } 524 525 func (kvs *KvPairs) Clear() Rows { 526 if kvs.bytesBuf != nil { 527 kvs.memBuf.Recycle(kvs.bytesBuf) 528 kvs.bytesBuf = nil 529 kvs.memBuf = nil 530 } 531 kvs.pairs = kvs.pairs[:0] 532 return kvs 533 }