github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/pkg/sink/codec/common/verify_checksum.go (about)

     1  // Copyright 2023 PingCAP, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package common
    15  
    16  import (
    17  	"encoding/binary"
    18  	"hash/crc32"
    19  	"math"
    20  	"strconv"
    21  
    22  	"github.com/pingcap/errors"
    23  	"github.com/pingcap/log"
    24  	timodel "github.com/pingcap/tidb/pkg/parser/model"
    25  	"github.com/pingcap/tidb/pkg/parser/mysql"
    26  	"github.com/pingcap/tiflow/cdc/model"
    27  	"github.com/pingcap/tiflow/pkg/util"
    28  	"go.uber.org/zap"
    29  )
    30  
    31  // VerifyChecksum calculate the checksum value, and compare it with the expected one, return error if not identical.
    32  func VerifyChecksum(columns []*model.ColumnData, columnInfo []*timodel.ColumnInfo, expected uint32) error {
    33  	// if expected is 0, it means the checksum is not enabled, so we don't need to verify it.
    34  	// the data maybe restored by br, and the checksum is not enabled, so no expected here.
    35  	if expected == 0 {
    36  		return nil
    37  	}
    38  	checksum, err := calculateChecksum(columns, columnInfo)
    39  	if err != nil {
    40  		return errors.Trace(err)
    41  	}
    42  	if checksum != expected {
    43  		log.Error("checksum mismatch",
    44  			zap.Uint32("expected", expected),
    45  			zap.Uint32("actual", checksum))
    46  		return errors.New("checksum mismatch")
    47  	}
    48  
    49  	return nil
    50  }
    51  
    52  // calculate the checksum, caller should make sure all columns is ordered by the column's id.
    53  // by follow: https://github.com/pingcap/tidb/blob/e3417913f58cdd5a136259b902bf177eaf3aa637/util/rowcodec/common.go#L294
    54  func calculateChecksum(columns []*model.ColumnData, columnInfo []*timodel.ColumnInfo) (uint32, error) {
    55  	var (
    56  		checksum uint32
    57  		err      error
    58  	)
    59  	buf := make([]byte, 0)
    60  	for idx, col := range columns {
    61  		if len(buf) > 0 {
    62  			buf = buf[:0]
    63  		}
    64  		buf, err = buildChecksumBytes(buf, col.Value, columnInfo[idx].GetType())
    65  		if err != nil {
    66  			return 0, errors.Trace(err)
    67  		}
    68  		checksum = crc32.Update(checksum, crc32.IEEETable, buf)
    69  	}
    70  	return checksum, nil
    71  }
    72  
    73  // buildChecksumBytes append value to the buf, mysqlType is used to convert value interface to concrete type.
    74  // by follow: https://github.com/pingcap/tidb/blob/e3417913f58cdd5a136259b902bf177eaf3aa637/util/rowcodec/common.go#L308
    75  func buildChecksumBytes(buf []byte, value interface{}, mysqlType byte) ([]byte, error) {
    76  	if value == nil {
    77  		return buf, nil
    78  	}
    79  
    80  	switch mysqlType {
    81  	// TypeTiny, TypeShort, TypeInt32 is encoded as int32
    82  	// TypeLong is encoded as int32 if signed, else int64.
    83  	// TypeLongLong is encoded as int64 if signed, else uint64,
    84  	// if bigintUnsignedHandlingMode set as string, encode as string.
    85  	case mysql.TypeTiny, mysql.TypeShort, mysql.TypeLong, mysql.TypeLonglong, mysql.TypeInt24, mysql.TypeYear:
    86  		var (
    87  			v   uint64
    88  			err error
    89  		)
    90  		switch a := value.(type) {
    91  		case int32:
    92  			v = uint64(a)
    93  		case uint32:
    94  			v = uint64(a)
    95  		case int64:
    96  			v = uint64(a)
    97  		case uint64:
    98  			v = a
    99  		case string:
   100  			v, err = strconv.ParseUint(a, 10, 64)
   101  			if err != nil {
   102  				return nil, errors.Trace(err)
   103  			}
   104  		case map[string]interface{}:
   105  			// this may only happen for bigint larger than math.uint64
   106  			v = uint64(a["value"].(int64))
   107  		default:
   108  			log.Panic("unknown golang type for the integral value",
   109  				zap.Any("value", value), zap.Any("mysqlType", mysqlType))
   110  		}
   111  		buf = binary.LittleEndian.AppendUint64(buf, v)
   112  	// TypeFloat encoded as float32, TypeDouble encoded as float64
   113  	case mysql.TypeFloat, mysql.TypeDouble:
   114  		var v float64
   115  		switch a := value.(type) {
   116  		case float32:
   117  			v = float64(a)
   118  		case float64:
   119  			v = a
   120  		}
   121  		if math.IsInf(v, 0) || math.IsNaN(v) {
   122  			v = 0
   123  		}
   124  		buf = binary.LittleEndian.AppendUint64(buf, math.Float64bits(v))
   125  	// TypeEnum, TypeSet encoded as string
   126  	// but convert to int by the getColumnValue function
   127  	case mysql.TypeEnum, mysql.TypeSet:
   128  		var number uint64
   129  		switch v := value.(type) {
   130  		case uint64:
   131  			number = v
   132  		case int64:
   133  			number = uint64(v)
   134  		}
   135  		buf = binary.LittleEndian.AppendUint64(buf, number)
   136  	case mysql.TypeBit:
   137  		var number uint64
   138  		switch v := value.(type) {
   139  		// TypeBit encoded as bytes for the avro protocol
   140  		case []byte:
   141  			number = MustBinaryLiteralToInt(v)
   142  		// TypeBit encoded as uint64 for the simple protocol
   143  		case uint64:
   144  			number = v
   145  		}
   146  		buf = binary.LittleEndian.AppendUint64(buf, number)
   147  	// encoded as bytes if binary flag set to true, else string
   148  	case mysql.TypeVarchar, mysql.TypeVarString, mysql.TypeString, mysql.TypeTinyBlob, mysql.TypeMediumBlob, mysql.TypeLongBlob, mysql.TypeBlob:
   149  		switch a := value.(type) {
   150  		case string:
   151  			buf = appendLengthValue(buf, []byte(a))
   152  		case []byte:
   153  			buf = appendLengthValue(buf, a)
   154  		default:
   155  			log.Panic("unknown golang type for the string value",
   156  				zap.Any("value", value), zap.Any("mysqlType", mysqlType))
   157  		}
   158  	case mysql.TypeTimestamp:
   159  		location := "Local"
   160  		var ts string
   161  		switch data := value.(type) {
   162  		case map[string]interface{}:
   163  			ts = data["value"].(string)
   164  			location = data["location"].(string)
   165  		case string:
   166  			ts = data
   167  		}
   168  		ts, err := util.ConvertTimezone(ts, location)
   169  		if err != nil {
   170  			log.Panic("convert timestamp to timezone failed",
   171  				zap.String("timestamp", ts), zap.String("location", location),
   172  				zap.Error(err))
   173  		}
   174  		buf = appendLengthValue(buf, []byte(ts))
   175  	// all encoded as string
   176  	case mysql.TypeDatetime, mysql.TypeDate, mysql.TypeDuration, mysql.TypeNewDate:
   177  		buf = appendLengthValue(buf, []byte(value.(string)))
   178  	// encoded as string if decimalHandlingMode set to string, it's required to enable checksum.
   179  	case mysql.TypeNewDecimal:
   180  		buf = appendLengthValue(buf, []byte(value.(string)))
   181  	// encoded as string
   182  	case mysql.TypeJSON:
   183  		buf = appendLengthValue(buf, []byte(value.(string)))
   184  	// this should not happen, does not take into the checksum calculation.
   185  	case mysql.TypeNull, mysql.TypeGeometry:
   186  		// do nothing
   187  	default:
   188  		return buf, errors.New("invalid type for the checksum calculation")
   189  	}
   190  	return buf, nil
   191  }
   192  
   193  func appendLengthValue(buf []byte, val []byte) []byte {
   194  	buf = binary.LittleEndian.AppendUint32(buf, uint32(len(val)))
   195  	buf = append(buf, val...)
   196  	return buf
   197  }