github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/cdc/entry/codec.go (about)

     1  // Copyright 2020 PingCAP, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package entry
    15  
    16  import (
    17  	"bytes"
    18  	"time"
    19  
    20  	"github.com/pingcap/errors"
    21  	"github.com/pingcap/tidb/pkg/kv"
    22  	"github.com/pingcap/tidb/pkg/parser/mysql"
    23  	"github.com/pingcap/tidb/pkg/tablecodec"
    24  	"github.com/pingcap/tidb/pkg/types"
    25  	"github.com/pingcap/tidb/pkg/util/codec"
    26  	"github.com/pingcap/tidb/pkg/util/rowcodec"
    27  	"github.com/pingcap/tiflow/cdc/model"
    28  	cerror "github.com/pingcap/tiflow/pkg/errors"
    29  )
    30  
    31  var (
    32  	tablePrefix  = []byte{'t'}
    33  	recordPrefix = []byte("_r")
    34  	metaPrefix   = []byte("m")
    35  )
    36  
    37  var (
    38  	intLen           = 8
    39  	tablePrefixLen   = len(tablePrefix)
    40  	prefixTableIDLen = tablePrefixLen + intLen /*tableID*/
    41  )
    42  
    43  // MetaType is for data structure meta/data flag.
    44  type MetaType byte
    45  
    46  const (
    47  	// UnknownMetaType is used for all unknown meta types
    48  	UnknownMetaType MetaType = 0
    49  	// StringMeta is the flag for string meta.
    50  	StringMeta MetaType = 'S'
    51  	// StringData is the flag for string data.
    52  	StringData MetaType = 's'
    53  	// HashMeta is the flag for hash meta.
    54  	HashMeta MetaType = 'H'
    55  	// HashData is the flag for hash data.
    56  	HashData MetaType = 'h'
    57  	// ListMeta is the flag for list meta.
    58  	ListMeta MetaType = 'L'
    59  	// ListData is the flag for list data.
    60  	ListData MetaType = 'l'
    61  )
    62  
    63  func decodeTableID(key []byte) (rest []byte, tableID int64, err error) {
    64  	if len(key) < prefixTableIDLen || !bytes.HasPrefix(key, tablePrefix) {
    65  		return nil, 0, cerror.ErrInvalidRecordKey.GenWithStackByArgs(key)
    66  	}
    67  	key = key[tablePrefixLen:]
    68  	rest, tableID, err = codec.DecodeInt(key)
    69  	if err != nil {
    70  		return nil, 0, cerror.WrapError(cerror.ErrCodecDecode, err)
    71  	}
    72  	return
    73  }
    74  
    75  // decodeRow decodes a byte slice into datums with an existing row map.
    76  func decodeRow(b []byte, recordID kv.Handle, tableInfo *model.TableInfo, tz *time.Location) (map[int64]types.Datum, error) {
    77  	if len(b) == 0 {
    78  		return map[int64]types.Datum{}, nil
    79  	}
    80  	handleColIDs, handleColFt, reqCols := tableInfo.GetRowColInfos()
    81  	var (
    82  		datums map[int64]types.Datum
    83  		err    error
    84  	)
    85  	if rowcodec.IsNewFormat(b) {
    86  		encoder := rowcodec.NewDatumMapDecoder(reqCols, tz)
    87  		datums, err = decodeRowV2(encoder, b)
    88  	} else {
    89  		datums, err = decodeRowV1(b, tableInfo, tz)
    90  	}
    91  	if err != nil {
    92  		return nil, errors.Trace(err)
    93  	}
    94  	return tablecodec.DecodeHandleToDatumMap(recordID, handleColIDs, handleColFt, tz, datums)
    95  }
    96  
    97  // decodeRowV1 decodes value data using old encoding format.
    98  // Row layout: colID1, value1, colID2, value2, .....
    99  func decodeRowV1(b []byte, tableInfo *model.TableInfo, tz *time.Location) (map[int64]types.Datum, error) {
   100  	row := make(map[int64]types.Datum)
   101  	if len(b) == 1 && b[0] == codec.NilFlag {
   102  		b = b[1:]
   103  	}
   104  	var err error
   105  	var data []byte
   106  	for len(b) > 0 {
   107  		// Get col id.
   108  		data, b, err = codec.CutOne(b)
   109  		if err != nil {
   110  			return nil, cerror.WrapError(cerror.ErrCodecDecode, err)
   111  		}
   112  		_, cid, err := codec.DecodeOne(data)
   113  		if err != nil {
   114  			return nil, cerror.WrapError(cerror.ErrCodecDecode, err)
   115  		}
   116  		id := cid.GetInt64()
   117  
   118  		// Get col value.
   119  		data, b, err = codec.CutOne(b)
   120  		if err != nil {
   121  			return nil, cerror.WrapError(cerror.ErrCodecDecode, err)
   122  		}
   123  		_, v, err := codec.DecodeOne(data)
   124  		if err != nil {
   125  			return nil, cerror.WrapError(cerror.ErrCodecDecode, err)
   126  		}
   127  
   128  		// unflatten value
   129  		colInfo, exist := tableInfo.GetColumnInfo(id)
   130  		if !exist {
   131  			// can not find column info, ignore this column because the column should be in WRITE ONLY state
   132  			continue
   133  		}
   134  		fieldType := &colInfo.FieldType
   135  		datum, err := unflatten(v, fieldType, tz)
   136  		if err != nil {
   137  			return nil, cerror.WrapError(cerror.ErrCodecDecode, err)
   138  		}
   139  		row[id] = datum
   140  	}
   141  	return row, nil
   142  }
   143  
   144  // decodeRowV2 decodes value data using new encoding format.
   145  // Ref: https://github.com/pingcap/tidb/pull/12634
   146  //
   147  //	https://github.com/pingcap/tidb/blob/master/docs/design/2018-07-19-row-format.md
   148  func decodeRowV2(
   149  	decoder *rowcodec.DatumMapDecoder, data []byte,
   150  ) (map[int64]types.Datum, error) {
   151  	datums, err := decoder.DecodeToDatumMap(data, nil)
   152  	if err != nil {
   153  		return datums, cerror.WrapError(cerror.ErrDecodeRowToDatum, err)
   154  	}
   155  	return datums, nil
   156  }
   157  
   158  // unflatten converts a raw datum to a column datum.
   159  func unflatten(datum types.Datum, ft *types.FieldType, loc *time.Location) (types.Datum, error) {
   160  	if datum.IsNull() {
   161  		return datum, nil
   162  	}
   163  	switch ft.GetType() {
   164  	case mysql.TypeFloat:
   165  		datum.SetFloat32(float32(datum.GetFloat64()))
   166  		return datum, nil
   167  	case mysql.TypeVarchar, mysql.TypeString, mysql.TypeVarString, mysql.TypeTinyBlob,
   168  		mysql.TypeMediumBlob, mysql.TypeBlob, mysql.TypeLongBlob:
   169  		datum.SetString(datum.GetString(), ft.GetCollate())
   170  	case mysql.TypeTiny, mysql.TypeShort, mysql.TypeYear, mysql.TypeInt24,
   171  		mysql.TypeLong, mysql.TypeLonglong, mysql.TypeDouble:
   172  		return datum, nil
   173  	case mysql.TypeDate, mysql.TypeDatetime, mysql.TypeTimestamp:
   174  		t := types.NewTime(types.ZeroCoreTime, ft.GetType(), ft.GetDecimal())
   175  		var err error
   176  		err = t.FromPackedUint(datum.GetUint64())
   177  		if err != nil {
   178  			return datum, cerror.WrapError(cerror.ErrDatumUnflatten, err)
   179  		}
   180  		if ft.GetType() == mysql.TypeTimestamp && !t.IsZero() {
   181  			err = t.ConvertTimeZone(time.UTC, loc)
   182  			if err != nil {
   183  				return datum, cerror.WrapError(cerror.ErrDatumUnflatten, err)
   184  			}
   185  		}
   186  		datum.SetUint64(0)
   187  		datum.SetMysqlTime(t)
   188  		return datum, nil
   189  	case mysql.TypeDuration: // duration should read fsp from column meta data
   190  		dur := types.Duration{Duration: time.Duration(datum.GetInt64()), Fsp: ft.GetDecimal()}
   191  		datum.SetMysqlDuration(dur)
   192  		return datum, nil
   193  	case mysql.TypeEnum:
   194  		// ignore error deliberately, to read empty enum value.
   195  		enum, err := types.ParseEnumValue(ft.GetElems(), datum.GetUint64())
   196  		if err != nil {
   197  			enum = types.Enum{}
   198  		}
   199  		datum.SetMysqlEnum(enum, ft.GetCollate())
   200  		return datum, nil
   201  	case mysql.TypeSet:
   202  		set, err := types.ParseSetValue(ft.GetElems(), datum.GetUint64())
   203  		if err != nil {
   204  			return datum, cerror.WrapError(cerror.ErrDatumUnflatten, err)
   205  		}
   206  		datum.SetMysqlSet(set, ft.GetCollate())
   207  		return datum, nil
   208  	case mysql.TypeBit:
   209  		val := datum.GetUint64()
   210  		byteSize := (ft.GetFlen() + 7) >> 3
   211  		datum.SetUint64(0)
   212  		datum.SetMysqlBit(types.NewBinaryLiteralFromUint(val, byteSize))
   213  	}
   214  	return datum, nil
   215  }