github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/pkg/sink/codec/common/verify_checksum.go (about) 1 // Copyright 2023 PingCAP, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package common 15 16 import ( 17 "encoding/binary" 18 "hash/crc32" 19 "math" 20 "strconv" 21 22 "github.com/pingcap/errors" 23 "github.com/pingcap/log" 24 timodel "github.com/pingcap/tidb/pkg/parser/model" 25 "github.com/pingcap/tidb/pkg/parser/mysql" 26 "github.com/pingcap/tiflow/cdc/model" 27 "github.com/pingcap/tiflow/pkg/util" 28 "go.uber.org/zap" 29 ) 30 31 // VerifyChecksum calculate the checksum value, and compare it with the expected one, return error if not identical. 32 func VerifyChecksum(columns []*model.ColumnData, columnInfo []*timodel.ColumnInfo, expected uint32) error { 33 // if expected is 0, it means the checksum is not enabled, so we don't need to verify it. 34 // the data maybe restored by br, and the checksum is not enabled, so no expected here. 35 if expected == 0 { 36 return nil 37 } 38 checksum, err := calculateChecksum(columns, columnInfo) 39 if err != nil { 40 return errors.Trace(err) 41 } 42 if checksum != expected { 43 log.Error("checksum mismatch", 44 zap.Uint32("expected", expected), 45 zap.Uint32("actual", checksum)) 46 return errors.New("checksum mismatch") 47 } 48 49 return nil 50 } 51 52 // calculate the checksum, caller should make sure all columns is ordered by the column's id. 53 // by follow: https://github.com/pingcap/tidb/blob/e3417913f58cdd5a136259b902bf177eaf3aa637/util/rowcodec/common.go#L294 54 func calculateChecksum(columns []*model.ColumnData, columnInfo []*timodel.ColumnInfo) (uint32, error) { 55 var ( 56 checksum uint32 57 err error 58 ) 59 buf := make([]byte, 0) 60 for idx, col := range columns { 61 if len(buf) > 0 { 62 buf = buf[:0] 63 } 64 buf, err = buildChecksumBytes(buf, col.Value, columnInfo[idx].GetType()) 65 if err != nil { 66 return 0, errors.Trace(err) 67 } 68 checksum = crc32.Update(checksum, crc32.IEEETable, buf) 69 } 70 return checksum, nil 71 } 72 73 // buildChecksumBytes append value to the buf, mysqlType is used to convert value interface to concrete type. 74 // by follow: https://github.com/pingcap/tidb/blob/e3417913f58cdd5a136259b902bf177eaf3aa637/util/rowcodec/common.go#L308 75 func buildChecksumBytes(buf []byte, value interface{}, mysqlType byte) ([]byte, error) { 76 if value == nil { 77 return buf, nil 78 } 79 80 switch mysqlType { 81 // TypeTiny, TypeShort, TypeInt32 is encoded as int32 82 // TypeLong is encoded as int32 if signed, else int64. 83 // TypeLongLong is encoded as int64 if signed, else uint64, 84 // if bigintUnsignedHandlingMode set as string, encode as string. 85 case mysql.TypeTiny, mysql.TypeShort, mysql.TypeLong, mysql.TypeLonglong, mysql.TypeInt24, mysql.TypeYear: 86 var ( 87 v uint64 88 err error 89 ) 90 switch a := value.(type) { 91 case int32: 92 v = uint64(a) 93 case uint32: 94 v = uint64(a) 95 case int64: 96 v = uint64(a) 97 case uint64: 98 v = a 99 case string: 100 v, err = strconv.ParseUint(a, 10, 64) 101 if err != nil { 102 return nil, errors.Trace(err) 103 } 104 case map[string]interface{}: 105 // this may only happen for bigint larger than math.uint64 106 v = uint64(a["value"].(int64)) 107 default: 108 log.Panic("unknown golang type for the integral value", 109 zap.Any("value", value), zap.Any("mysqlType", mysqlType)) 110 } 111 buf = binary.LittleEndian.AppendUint64(buf, v) 112 // TypeFloat encoded as float32, TypeDouble encoded as float64 113 case mysql.TypeFloat, mysql.TypeDouble: 114 var v float64 115 switch a := value.(type) { 116 case float32: 117 v = float64(a) 118 case float64: 119 v = a 120 } 121 if math.IsInf(v, 0) || math.IsNaN(v) { 122 v = 0 123 } 124 buf = binary.LittleEndian.AppendUint64(buf, math.Float64bits(v)) 125 // TypeEnum, TypeSet encoded as string 126 // but convert to int by the getColumnValue function 127 case mysql.TypeEnum, mysql.TypeSet: 128 var number uint64 129 switch v := value.(type) { 130 case uint64: 131 number = v 132 case int64: 133 number = uint64(v) 134 } 135 buf = binary.LittleEndian.AppendUint64(buf, number) 136 case mysql.TypeBit: 137 var number uint64 138 switch v := value.(type) { 139 // TypeBit encoded as bytes for the avro protocol 140 case []byte: 141 number = MustBinaryLiteralToInt(v) 142 // TypeBit encoded as uint64 for the simple protocol 143 case uint64: 144 number = v 145 } 146 buf = binary.LittleEndian.AppendUint64(buf, number) 147 // encoded as bytes if binary flag set to true, else string 148 case mysql.TypeVarchar, mysql.TypeVarString, mysql.TypeString, mysql.TypeTinyBlob, mysql.TypeMediumBlob, mysql.TypeLongBlob, mysql.TypeBlob: 149 switch a := value.(type) { 150 case string: 151 buf = appendLengthValue(buf, []byte(a)) 152 case []byte: 153 buf = appendLengthValue(buf, a) 154 default: 155 log.Panic("unknown golang type for the string value", 156 zap.Any("value", value), zap.Any("mysqlType", mysqlType)) 157 } 158 case mysql.TypeTimestamp: 159 location := "Local" 160 var ts string 161 switch data := value.(type) { 162 case map[string]interface{}: 163 ts = data["value"].(string) 164 location = data["location"].(string) 165 case string: 166 ts = data 167 } 168 ts, err := util.ConvertTimezone(ts, location) 169 if err != nil { 170 log.Panic("convert timestamp to timezone failed", 171 zap.String("timestamp", ts), zap.String("location", location), 172 zap.Error(err)) 173 } 174 buf = appendLengthValue(buf, []byte(ts)) 175 // all encoded as string 176 case mysql.TypeDatetime, mysql.TypeDate, mysql.TypeDuration, mysql.TypeNewDate: 177 buf = appendLengthValue(buf, []byte(value.(string))) 178 // encoded as string if decimalHandlingMode set to string, it's required to enable checksum. 179 case mysql.TypeNewDecimal: 180 buf = appendLengthValue(buf, []byte(value.(string))) 181 // encoded as string 182 case mysql.TypeJSON: 183 buf = appendLengthValue(buf, []byte(value.(string))) 184 // this should not happen, does not take into the checksum calculation. 185 case mysql.TypeNull, mysql.TypeGeometry: 186 // do nothing 187 default: 188 return buf, errors.New("invalid type for the checksum calculation") 189 } 190 return buf, nil 191 } 192 193 func appendLengthValue(buf []byte, val []byte) []byte { 194 buf = binary.LittleEndian.AppendUint32(buf, uint32(len(val))) 195 buf = append(buf, val...) 196 return buf 197 }