github.com/pingcap/br@v5.3.0-alpha.0.20220125034240-ec59c7b6ce30+incompatible/pkg/lightning/backend/kv/sql2kv.go (about)

     1  // Copyright 2019 PingCAP, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  // TODO combine with the pkg/kv package outside.
    15  
    16  package kv
    17  
    18  import (
    19  	"fmt"
    20  	"math"
    21  	"math/rand"
    22  	"sort"
    23  
    24  	"github.com/pingcap/errors"
    25  	"github.com/pingcap/parser/model"
    26  	"github.com/pingcap/parser/mysql"
    27  	"github.com/pingcap/tidb/expression"
    28  	"github.com/pingcap/tidb/meta/autoid"
    29  	"github.com/pingcap/tidb/sessionctx/variable"
    30  	"github.com/pingcap/tidb/table"
    31  	"github.com/pingcap/tidb/table/tables"
    32  	"github.com/pingcap/tidb/tablecodec"
    33  	"github.com/pingcap/tidb/types"
    34  	"github.com/pingcap/tidb/util/chunk"
    35  	"go.uber.org/zap"
    36  	"go.uber.org/zap/zapcore"
    37  
    38  	// Import tidb/planner/core to initialize expression.RewriteAstExpr
    39  	_ "github.com/pingcap/tidb/planner/core"
    40  
    41  	"github.com/pingcap/br/pkg/lightning/common"
    42  	"github.com/pingcap/br/pkg/lightning/log"
    43  	"github.com/pingcap/br/pkg/lightning/metric"
    44  	"github.com/pingcap/br/pkg/lightning/verification"
    45  	"github.com/pingcap/br/pkg/logutil"
    46  	"github.com/pingcap/br/pkg/redact"
    47  )
    48  
    49  var ExtraHandleColumnInfo = model.NewExtraHandleColInfo()
    50  
    51  type genCol struct {
    52  	index int
    53  	expr  expression.Expression
    54  }
    55  
    56  type autoIDConverter func(int64) int64
    57  
    58  type tableKVEncoder struct {
    59  	tbl         table.Table
    60  	se          *session
    61  	recordCache []types.Datum
    62  	genCols     []genCol
    63  	// convert auto id for shard rowid or auto random id base on row id generated by lightning
    64  	autoIDFn autoIDConverter
    65  }
    66  
    67  func NewTableKVEncoder(tbl table.Table, options *SessionOptions) (Encoder, error) {
    68  	metric.KvEncoderCounter.WithLabelValues("open").Inc()
    69  	meta := tbl.Meta()
    70  	cols := tbl.Cols()
    71  	se := newSession(options)
    72  	// Set CommonAddRecordCtx to session to reuse the slices and BufStore in AddRecord
    73  	recordCtx := tables.NewCommonAddRecordCtx(len(cols))
    74  	tables.SetAddRecordCtx(se, recordCtx)
    75  
    76  	autoIDFn := func(id int64) int64 { return id }
    77  	if meta.PKIsHandle && meta.ContainsAutoRandomBits() {
    78  		for _, col := range cols {
    79  			if mysql.HasPriKeyFlag(col.Flag) {
    80  				incrementalBits := autoRandomIncrementBits(col, int(meta.AutoRandomBits))
    81  				autoRandomBits := rand.New(rand.NewSource(options.AutoRandomSeed)).Int63n(1<<meta.AutoRandomBits) << incrementalBits
    82  				autoIDFn = func(id int64) int64 {
    83  					return autoRandomBits | id
    84  				}
    85  				break
    86  			}
    87  		}
    88  	} else if meta.ShardRowIDBits > 0 {
    89  		rd := rand.New(rand.NewSource(options.AutoRandomSeed))
    90  		mask := int64(1)<<meta.ShardRowIDBits - 1
    91  		shift := autoid.RowIDBitLength - meta.ShardRowIDBits - 1
    92  		autoIDFn = func(id int64) int64 {
    93  			rd.Seed(id)
    94  			shardBits := (int64(rd.Uint32()) & mask) << shift
    95  			return shardBits | id
    96  		}
    97  	}
    98  
    99  	// collect expressions for evaluating stored generated columns
   100  	genCols, err := collectGeneratedColumns(se, meta, cols)
   101  	if err != nil {
   102  		return nil, errors.Annotate(err, "failed to parse generated column expressions")
   103  	}
   104  
   105  	return &tableKVEncoder{
   106  		tbl:      tbl,
   107  		se:       se,
   108  		genCols:  genCols,
   109  		autoIDFn: autoIDFn,
   110  	}, nil
   111  }
   112  
   113  func autoRandomIncrementBits(col *table.Column, randomBits int) int {
   114  	typeBitsLength := mysql.DefaultLengthOfMysqlTypes[col.Tp] * 8
   115  	incrementalBits := typeBitsLength - randomBits
   116  	hasSignBit := !mysql.HasUnsignedFlag(col.Flag)
   117  	if hasSignBit {
   118  		incrementalBits--
   119  	}
   120  	return incrementalBits
   121  }
   122  
   123  // collectGeneratedColumns collects all expressions required to evaluate the
   124  // results of all generated columns. The returning slice is in evaluation order.
   125  func collectGeneratedColumns(se *session, meta *model.TableInfo, cols []*table.Column) ([]genCol, error) {
   126  	hasGenCol := false
   127  	for _, col := range cols {
   128  		if col.GeneratedExpr != nil {
   129  			hasGenCol = true
   130  			break
   131  		}
   132  	}
   133  
   134  	if !hasGenCol {
   135  		return nil, nil
   136  	}
   137  
   138  	// the expression rewriter requires a non-nil TxnCtx.
   139  	se.vars.TxnCtx = new(variable.TransactionContext)
   140  	defer func() {
   141  		se.vars.TxnCtx = nil
   142  	}()
   143  
   144  	// not using TableInfo2SchemaAndNames to avoid parsing all virtual generated columns again.
   145  	exprColumns := make([]*expression.Column, 0, len(cols))
   146  	names := make(types.NameSlice, 0, len(cols))
   147  	for i, col := range cols {
   148  		names = append(names, &types.FieldName{
   149  			OrigTblName: meta.Name,
   150  			OrigColName: col.Name,
   151  			TblName:     meta.Name,
   152  			ColName:     col.Name,
   153  		})
   154  		exprColumns = append(exprColumns, &expression.Column{
   155  			RetType:  col.FieldType.Clone(),
   156  			ID:       col.ID,
   157  			UniqueID: int64(i),
   158  			Index:    col.Offset,
   159  			OrigName: names[i].String(),
   160  			IsHidden: col.Hidden,
   161  		})
   162  	}
   163  	schema := expression.NewSchema(exprColumns...)
   164  
   165  	// as long as we have a stored generated column, all columns it referred to must be evaluated as well.
   166  	// for simplicity we just evaluate all generated columns (virtual or not) before the last stored one.
   167  	var genCols []genCol
   168  	for i, col := range cols {
   169  		if col.GeneratedExpr != nil {
   170  			expr, err := expression.RewriteAstExpr(se, col.GeneratedExpr, schema, names)
   171  			if err != nil {
   172  				return nil, err
   173  			}
   174  			genCols = append(genCols, genCol{
   175  				index: i,
   176  				expr:  expr,
   177  			})
   178  		}
   179  	}
   180  
   181  	// order the result by column offset so they match the evaluation order.
   182  	sort.Slice(genCols, func(i, j int) bool {
   183  		return cols[genCols[i].index].Offset < cols[genCols[j].index].Offset
   184  	})
   185  	return genCols, nil
   186  }
   187  
   188  func (kvcodec *tableKVEncoder) Close() {
   189  	kvcodec.se.Close()
   190  	metric.KvEncoderCounter.WithLabelValues("closed").Inc()
   191  }
   192  
   193  // RowArrayMarshaler wraps a slice of types.Datum for logging the content into zap.
   194  type RowArrayMarshaler []types.Datum
   195  
   196  var kindStr = [...]string{
   197  	types.KindNull:          "null",
   198  	types.KindInt64:         "int64",
   199  	types.KindUint64:        "uint64",
   200  	types.KindFloat32:       "float32",
   201  	types.KindFloat64:       "float64",
   202  	types.KindString:        "string",
   203  	types.KindBytes:         "bytes",
   204  	types.KindBinaryLiteral: "binary",
   205  	types.KindMysqlDecimal:  "decimal",
   206  	types.KindMysqlDuration: "duration",
   207  	types.KindMysqlEnum:     "enum",
   208  	types.KindMysqlBit:      "bit",
   209  	types.KindMysqlSet:      "set",
   210  	types.KindMysqlTime:     "time",
   211  	types.KindInterface:     "interface",
   212  	types.KindMinNotNull:    "min",
   213  	types.KindMaxValue:      "max",
   214  	types.KindRaw:           "raw",
   215  	types.KindMysqlJSON:     "json",
   216  }
   217  
   218  // MarshalLogArray implements the zapcore.ArrayMarshaler interface
   219  func (row RowArrayMarshaler) MarshalLogArray(encoder zapcore.ArrayEncoder) error {
   220  	for _, datum := range row {
   221  		kind := datum.Kind()
   222  		var str string
   223  		var err error
   224  		switch kind {
   225  		case types.KindNull:
   226  			str = "NULL"
   227  		case types.KindMinNotNull:
   228  			str = "-inf"
   229  		case types.KindMaxValue:
   230  			str = "+inf"
   231  		default:
   232  			str, err = datum.ToString()
   233  			if err != nil {
   234  				return err
   235  			}
   236  		}
   237  		if err := encoder.AppendObject(zapcore.ObjectMarshalerFunc(func(enc zapcore.ObjectEncoder) error {
   238  			enc.AddString("kind", kindStr[kind])
   239  			enc.AddString("val", redact.String(str))
   240  			return nil
   241  		})); err != nil {
   242  			return err
   243  		}
   244  	}
   245  	return nil
   246  }
   247  
   248  func logKVConvertFailed(logger log.Logger, row []types.Datum, j int, colInfo *model.ColumnInfo, err error) error {
   249  	var original types.Datum
   250  	if 0 <= j && j < len(row) {
   251  		original = row[j]
   252  		row = row[j : j+1]
   253  	}
   254  
   255  	logger.Error("kv convert failed",
   256  		zap.Array("original", RowArrayMarshaler(row)),
   257  		zap.Int("originalCol", j),
   258  		zap.String("colName", colInfo.Name.O),
   259  		zap.Stringer("colType", &colInfo.FieldType),
   260  		log.ShortError(err),
   261  	)
   262  
   263  	log.L().Error("failed to covert kv value", logutil.RedactAny("origVal", original.GetValue()),
   264  		zap.Stringer("fieldType", &colInfo.FieldType), zap.String("column", colInfo.Name.O),
   265  		zap.Int("columnID", j+1))
   266  	return errors.Annotatef(
   267  		err,
   268  		"failed to cast value as %s for column `%s` (#%d)", &colInfo.FieldType, colInfo.Name.O, j+1,
   269  	)
   270  }
   271  
   272  func logEvalGenExprFailed(logger log.Logger, row []types.Datum, colInfo *model.ColumnInfo, err error) error {
   273  	logger.Error("kv convert failed: cannot evaluate generated column expression",
   274  		zap.Array("original", RowArrayMarshaler(row)),
   275  		zap.String("colName", colInfo.Name.O),
   276  		log.ShortError(err),
   277  	)
   278  
   279  	return errors.Annotatef(
   280  		err,
   281  		"failed to evaluate generated column expression for column `%s`",
   282  		colInfo.Name.O,
   283  	)
   284  }
   285  
   286  type KvPairs struct {
   287  	pairs    []common.KvPair
   288  	bytesBuf *bytesBuf
   289  	memBuf   *kvMemBuf
   290  }
   291  
   292  // MakeRowsFromKvPairs converts a KvPair slice into a Rows instance. This is
   293  // mainly used for testing only. The resulting Rows instance should only be used
   294  // for the importer backend.
   295  func MakeRowsFromKvPairs(pairs []common.KvPair) Rows {
   296  	return &KvPairs{pairs: pairs}
   297  }
   298  
   299  // MakeRowFromKvPairs converts a KvPair slice into a Row instance. This is
   300  // mainly used for testing only. The resulting Row instance should only be used
   301  // for the importer backend.
   302  func MakeRowFromKvPairs(pairs []common.KvPair) Row {
   303  	return &KvPairs{pairs: pairs}
   304  }
   305  
   306  // KvPairsFromRows converts a Rows instance constructed from MakeRowsFromKvPairs
   307  // back into a slice of KvPair. This method panics if the Rows is not
   308  // constructed in such way.
   309  // nolint:golint // kv.KvPairsFromRows sounds good.
   310  func KvPairsFromRows(rows Rows) []common.KvPair {
   311  	return rows.(*KvPairs).pairs
   312  }
   313  
   314  // Encode a row of data into KV pairs.
   315  //
   316  // See comments in `(*TableRestore).initializeColumns` for the meaning of the
   317  // `columnPermutation` parameter.
   318  func (kvcodec *tableKVEncoder) Encode(
   319  	logger log.Logger,
   320  	row []types.Datum,
   321  	rowID int64,
   322  	columnPermutation []int,
   323  	offset int64,
   324  ) (Row, error) {
   325  	cols := kvcodec.tbl.Cols()
   326  
   327  	var value types.Datum
   328  	var err error
   329  	//nolint:prealloc // This is a placeholder.
   330  	var record []types.Datum
   331  
   332  	if kvcodec.recordCache != nil {
   333  		record = kvcodec.recordCache
   334  	} else {
   335  		record = make([]types.Datum, 0, len(cols)+1)
   336  	}
   337  
   338  	meta := kvcodec.tbl.Meta()
   339  	isAutoRandom := meta.PKIsHandle && meta.ContainsAutoRandomBits()
   340  	for i, col := range cols {
   341  		j := columnPermutation[i]
   342  		isAutoIncCol := mysql.HasAutoIncrementFlag(col.Flag)
   343  		isPk := mysql.HasPriKeyFlag(col.Flag)
   344  		switch {
   345  		case j >= 0 && j < len(row):
   346  			value, err = table.CastValue(kvcodec.se, row[j], col.ToInfo(), false, false)
   347  			if err == nil {
   348  				err = col.HandleBadNull(&value, kvcodec.se.vars.StmtCtx)
   349  			}
   350  		case isAutoIncCol:
   351  			// we still need a conversion, e.g. to catch overflow with a TINYINT column.
   352  			value, err = table.CastValue(kvcodec.se, types.NewIntDatum(rowID), col.ToInfo(), false, false)
   353  		case isAutoRandom && isPk:
   354  			var val types.Datum
   355  			realRowID := kvcodec.autoIDFn(rowID)
   356  			if mysql.HasUnsignedFlag(col.Flag) {
   357  				val = types.NewUintDatum(uint64(realRowID))
   358  			} else {
   359  				val = types.NewIntDatum(realRowID)
   360  			}
   361  			value, err = table.CastValue(kvcodec.se, val, col.ToInfo(), false, false)
   362  		case col.IsGenerated():
   363  			// inject some dummy value for gen col so that MutRowFromDatums below sees a real value instead of nil.
   364  			// if MutRowFromDatums sees a nil it won't initialize the underlying storage and cause SetDatum to panic.
   365  			value = types.GetMinValue(&col.FieldType)
   366  		default:
   367  			value, err = table.GetColDefaultValue(kvcodec.se, col.ToInfo())
   368  		}
   369  		if err != nil {
   370  			return nil, logKVConvertFailed(logger, row, j, col.ToInfo(), err)
   371  		}
   372  
   373  		record = append(record, value)
   374  
   375  		if isAutoRandom && isPk {
   376  			incrementalBits := autoRandomIncrementBits(col, int(meta.AutoRandomBits))
   377  			if err := kvcodec.tbl.RebaseAutoID(kvcodec.se, value.GetInt64()&((1<<incrementalBits)-1), false, autoid.AutoRandomType); err != nil {
   378  				return nil, errors.Trace(err)
   379  			}
   380  		}
   381  		if isAutoIncCol {
   382  			if err := kvcodec.tbl.RebaseAutoID(kvcodec.se, getAutoRecordID(value, &col.FieldType), false, autoid.AutoIncrementType); err != nil {
   383  				return nil, errors.Trace(err)
   384  			}
   385  		}
   386  	}
   387  
   388  	if common.TableHasAutoRowID(meta) {
   389  		rowValue := rowID
   390  		j := columnPermutation[len(cols)]
   391  		if j >= 0 && j < len(row) {
   392  			value, err = table.CastValue(kvcodec.se, row[j], ExtraHandleColumnInfo, false, false)
   393  			rowValue = value.GetInt64()
   394  		} else {
   395  			rowID := kvcodec.autoIDFn(rowID)
   396  			value, err = types.NewIntDatum(rowID), nil
   397  		}
   398  		if err != nil {
   399  			return nil, logKVConvertFailed(logger, row, j, ExtraHandleColumnInfo, err)
   400  		}
   401  		record = append(record, value)
   402  		if err := kvcodec.tbl.RebaseAutoID(kvcodec.se, rowValue, false, autoid.RowIDAllocType); err != nil {
   403  			return nil, errors.Trace(err)
   404  		}
   405  	}
   406  
   407  	if len(kvcodec.genCols) > 0 {
   408  		mutRow := chunk.MutRowFromDatums(record)
   409  		for _, gc := range kvcodec.genCols {
   410  			col := cols[gc.index].ToInfo()
   411  			evaluated, err := gc.expr.Eval(mutRow.ToRow())
   412  			if err != nil {
   413  				return nil, logEvalGenExprFailed(logger, row, col, err)
   414  			}
   415  			value, err := table.CastValue(kvcodec.se, evaluated, col, false, false)
   416  			if err != nil {
   417  				return nil, logEvalGenExprFailed(logger, row, col, err)
   418  			}
   419  			mutRow.SetDatum(gc.index, value)
   420  			record[gc.index] = value
   421  		}
   422  	}
   423  
   424  	_, err = kvcodec.tbl.AddRecord(kvcodec.se, record)
   425  	if err != nil {
   426  		logger.Error("kv encode failed",
   427  			zap.Array("originalRow", RowArrayMarshaler(row)),
   428  			zap.Array("convertedRow", RowArrayMarshaler(record)),
   429  			log.ShortError(err),
   430  		)
   431  		return nil, errors.Trace(err)
   432  	}
   433  	kvPairs := kvcodec.se.takeKvPairs()
   434  	for i := 0; i < len(kvPairs.pairs); i++ {
   435  		kvPairs.pairs[i].RowID = rowID
   436  		kvPairs.pairs[i].Offset = offset
   437  	}
   438  	kvcodec.recordCache = record[:0]
   439  	return kvPairs, nil
   440  }
   441  
   442  // get record value for auto-increment field
   443  //
   444  // See: https://github.com/pingcap/tidb/blob/47f0f15b14ed54fc2222f3e304e29df7b05e6805/executor/insert_common.go#L781-L852
   445  func getAutoRecordID(d types.Datum, target *types.FieldType) int64 {
   446  	switch target.Tp {
   447  	case mysql.TypeFloat, mysql.TypeDouble:
   448  		return int64(math.Round(d.GetFloat64()))
   449  	case mysql.TypeTiny, mysql.TypeShort, mysql.TypeInt24, mysql.TypeLong, mysql.TypeLonglong:
   450  		return d.GetInt64()
   451  	default:
   452  		panic(fmt.Sprintf("unsupported auto-increment field type '%d'", target.Tp))
   453  	}
   454  }
   455  
   456  func (kvs *KvPairs) Size() uint64 {
   457  	size := uint64(0)
   458  	for _, kv := range kvs.pairs {
   459  		size += uint64(len(kv.Key) + len(kv.Val))
   460  	}
   461  	return size
   462  }
   463  
   464  func (kvs *KvPairs) ClassifyAndAppend(
   465  	data *Rows,
   466  	dataChecksum *verification.KVChecksum,
   467  	indices *Rows,
   468  	indexChecksum *verification.KVChecksum,
   469  ) {
   470  	dataKVs := (*data).(*KvPairs)
   471  	indexKVs := (*indices).(*KvPairs)
   472  
   473  	for _, kv := range kvs.pairs {
   474  		if kv.Key[tablecodec.TableSplitKeyLen+1] == 'r' {
   475  			dataKVs.pairs = append(dataKVs.pairs, kv)
   476  			dataChecksum.UpdateOne(kv)
   477  		} else {
   478  			indexKVs.pairs = append(indexKVs.pairs, kv)
   479  			indexChecksum.UpdateOne(kv)
   480  		}
   481  	}
   482  
   483  	// the related buf is shared, so we only need to set it into one of the kvs so it can be released
   484  	if kvs.bytesBuf != nil {
   485  		dataKVs.bytesBuf = kvs.bytesBuf
   486  		dataKVs.memBuf = kvs.memBuf
   487  		kvs.bytesBuf = nil
   488  		kvs.memBuf = nil
   489  	}
   490  
   491  	*data = dataKVs
   492  	*indices = indexKVs
   493  }
   494  
   495  func (kvs *KvPairs) SplitIntoChunks(splitSize int) []Rows {
   496  	if len(kvs.pairs) == 0 {
   497  		return nil
   498  	}
   499  
   500  	res := make([]Rows, 0, 1)
   501  	i := 0
   502  	cumSize := 0
   503  	for j, pair := range kvs.pairs {
   504  		size := len(pair.Key) + len(pair.Val)
   505  		if i < j && cumSize+size > splitSize {
   506  			res = append(res, &KvPairs{pairs: kvs.pairs[i:j]})
   507  			i = j
   508  			cumSize = 0
   509  		}
   510  		cumSize += size
   511  	}
   512  
   513  	if i == 0 {
   514  		res = append(res, kvs)
   515  	} else {
   516  		res = append(res, &KvPairs{
   517  			pairs:    kvs.pairs[i:],
   518  			bytesBuf: kvs.bytesBuf,
   519  			memBuf:   kvs.memBuf,
   520  		})
   521  	}
   522  	return res
   523  }
   524  
   525  func (kvs *KvPairs) Clear() Rows {
   526  	if kvs.bytesBuf != nil {
   527  		kvs.memBuf.Recycle(kvs.bytesBuf)
   528  		kvs.bytesBuf = nil
   529  		kvs.memBuf = nil
   530  	}
   531  	kvs.pairs = kvs.pairs[:0]
   532  	return kvs
   533  }