github.com/pingcap/ticdc@v0.0.0-20220526033649-485a10ef2652/cdc/entry/codec.go (about) 1 // Copyright 2020 PingCAP, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package entry 15 16 import ( 17 "bytes" 18 "time" 19 20 "github.com/pingcap/errors" 21 "github.com/pingcap/parser/mysql" 22 "github.com/pingcap/ticdc/cdc/model" 23 cerror "github.com/pingcap/ticdc/pkg/errors" 24 "github.com/pingcap/tidb/kv" 25 "github.com/pingcap/tidb/tablecodec" 26 "github.com/pingcap/tidb/types" 27 "github.com/pingcap/tidb/util/codec" 28 "github.com/pingcap/tidb/util/rowcodec" 29 ) 30 31 var ( 32 tablePrefix = []byte{'t'} 33 recordPrefix = []byte("_r") 34 metaPrefix = []byte("m") 35 ) 36 37 var ( 38 intLen = 8 39 tablePrefixLen = len(tablePrefix) 40 recordPrefixLen = len(recordPrefix) 41 metaPrefixLen = len(metaPrefix) 42 prefixTableIDLen = tablePrefixLen + intLen /*tableID*/ 43 prefixRecordIDLen = recordPrefixLen + intLen /*recordID*/ 44 ) 45 46 // MetaType is for data structure meta/data flag. 47 type MetaType byte 48 49 const ( 50 // UnknownMetaType is used for all unknown meta types 51 UnknownMetaType MetaType = 0 52 // StringMeta is the flag for string meta. 53 StringMeta MetaType = 'S' 54 // StringData is the flag for string data. 55 StringData MetaType = 's' 56 // HashMeta is the flag for hash meta. 57 HashMeta MetaType = 'H' 58 // HashData is the flag for hash data. 59 HashData MetaType = 'h' 60 // ListMeta is the flag for list meta. 61 ListMeta MetaType = 'L' 62 // ListData is the flag for list data. 63 ListData MetaType = 'l' 64 ) 65 66 type meta interface { 67 getType() MetaType 68 } 69 70 type metaHashData struct { 71 key string 72 field []byte 73 } 74 75 func (d metaHashData) getType() MetaType { 76 return HashData 77 } 78 79 type metaListData struct { 80 key string 81 index int64 82 } 83 84 func (d metaListData) getType() MetaType { 85 return ListData 86 } 87 88 type other struct { 89 tp MetaType 90 } 91 92 func (d other) getType() MetaType { 93 return d.tp 94 } 95 96 func decodeTableID(key []byte) (rest []byte, tableID int64, err error) { 97 if len(key) < prefixTableIDLen || !bytes.HasPrefix(key, tablePrefix) { 98 return nil, 0, cerror.ErrInvalidRecordKey.GenWithStackByArgs(key) 99 } 100 key = key[tablePrefixLen:] 101 rest, tableID, err = codec.DecodeInt(key) 102 if err != nil { 103 return nil, 0, cerror.WrapError(cerror.ErrCodecDecode, err) 104 } 105 return 106 } 107 108 func decodeRecordID(key []byte) (rest []byte, recordID int64, err error) { 109 if len(key) < prefixRecordIDLen || !bytes.HasPrefix(key, recordPrefix) { 110 return nil, 0, cerror.ErrInvalidRecordKey.GenWithStackByArgs(key) 111 } 112 key = key[recordPrefixLen:] 113 rest, recordID, err = codec.DecodeInt(key) 114 if err != nil { 115 return nil, 0, cerror.WrapError(cerror.ErrCodecDecode, err) 116 } 117 return 118 } 119 120 func decodeMetaKey(ek []byte) (meta, error) { 121 if !bytes.HasPrefix(ek, metaPrefix) { 122 return nil, cerror.ErrInvalidRecordKey.GenWithStackByArgs(ek) 123 } 124 125 ek = ek[metaPrefixLen:] 126 ek, rawKey, err := codec.DecodeBytes(ek, nil) 127 if err != nil { 128 return nil, cerror.WrapError(cerror.ErrCodecDecode, err) 129 } 130 key := string(rawKey) 131 132 ek, rawTp, err := codec.DecodeUint(ek) 133 if err != nil { 134 return nil, cerror.WrapError(cerror.ErrCodecDecode, err) 135 } 136 switch MetaType(rawTp) { 137 case HashData: 138 if len(ek) > 0 { 139 var field []byte 140 _, field, err = codec.DecodeBytes(ek, nil) 141 if err != nil { 142 return nil, cerror.WrapError(cerror.ErrCodecDecode, err) 143 } 144 return metaHashData{key: key, field: field}, nil 145 } 146 if len(ek) > 0 { 147 // TODO: warning hash key decode failure 148 panic("hash key decode failure, should never happen") 149 } 150 case ListData: 151 if len(ek) == 0 { 152 panic("list key decode failure") 153 } 154 var index int64 155 _, index, err = codec.DecodeInt(ek) 156 if err != nil { 157 return nil, cerror.WrapError(cerror.ErrCodecDecode, err) 158 } 159 return metaListData{key: key, index: index}, nil 160 // TODO decode other key 161 default: 162 return other{tp: MetaType(rawTp)}, nil 163 } 164 return nil, cerror.ErrUnknownMetaType.GenWithStackByArgs(rawTp) 165 } 166 167 // decodeRow decodes a byte slice into datums with a existing row map. 168 func decodeRow(b []byte, recordID kv.Handle, tableInfo *model.TableInfo, tz *time.Location) (map[int64]types.Datum, error) { 169 if len(b) == 0 { 170 return map[int64]types.Datum{}, nil 171 } 172 handleColIDs, handleColFt, reqCols := tableInfo.GetRowColInfos() 173 var datums map[int64]types.Datum 174 var err error 175 if rowcodec.IsNewFormat(b) { 176 datums, err = decodeRowV2(b, reqCols, tz) 177 } else { 178 datums, err = decodeRowV1(b, tableInfo, tz) 179 } 180 if err != nil { 181 return nil, errors.Trace(err) 182 } 183 return tablecodec.DecodeHandleToDatumMap(recordID, handleColIDs, handleColFt, tz, datums) 184 } 185 186 // decodeRowV1 decodes value data using old encoding format. 187 // Row layout: colID1, value1, colID2, value2, ..... 188 func decodeRowV1(b []byte, tableInfo *model.TableInfo, tz *time.Location) (map[int64]types.Datum, error) { 189 row := make(map[int64]types.Datum) 190 if len(b) == 1 && b[0] == codec.NilFlag { 191 b = b[1:] 192 } 193 var err error 194 var data []byte 195 for len(b) > 0 { 196 // Get col id. 197 data, b, err = codec.CutOne(b) 198 if err != nil { 199 return nil, cerror.WrapError(cerror.ErrCodecDecode, err) 200 } 201 _, cid, err := codec.DecodeOne(data) 202 if err != nil { 203 return nil, cerror.WrapError(cerror.ErrCodecDecode, err) 204 } 205 id := cid.GetInt64() 206 207 // Get col value. 208 data, b, err = codec.CutOne(b) 209 if err != nil { 210 return nil, cerror.WrapError(cerror.ErrCodecDecode, err) 211 } 212 _, v, err := codec.DecodeOne(data) 213 if err != nil { 214 return nil, cerror.WrapError(cerror.ErrCodecDecode, err) 215 } 216 217 // unflatten value 218 colInfo, exist := tableInfo.GetColumnInfo(id) 219 if !exist { 220 // can not find column info, ignore this column because the column should be in WRITE ONLY state 221 continue 222 } 223 fieldType := &colInfo.FieldType 224 datum, err := unflatten(v, fieldType, tz) 225 if err != nil { 226 return nil, cerror.WrapError(cerror.ErrCodecDecode, err) 227 } 228 row[id] = datum 229 } 230 return row, nil 231 } 232 233 // decodeRowV2 decodes value data using new encoding format. 234 // Ref: https://github.com/pingcap/tidb/pull/12634 235 // https://github.com/pingcap/tidb/blob/master/docs/design/2018-07-19-row-format.md 236 func decodeRowV2(data []byte, columns []rowcodec.ColInfo, tz *time.Location) (map[int64]types.Datum, error) { 237 decoder := rowcodec.NewDatumMapDecoder(columns, tz) 238 datums, err := decoder.DecodeToDatumMap(data, nil) 239 if err != nil { 240 return datums, cerror.WrapError(cerror.ErrDecodeRowToDatum, err) 241 } 242 return datums, nil 243 } 244 245 // unflatten converts a raw datum to a column datum. 246 func unflatten(datum types.Datum, ft *types.FieldType, loc *time.Location) (types.Datum, error) { 247 if datum.IsNull() { 248 return datum, nil 249 } 250 switch ft.Tp { 251 case mysql.TypeFloat: 252 datum.SetFloat32(float32(datum.GetFloat64())) 253 return datum, nil 254 case mysql.TypeVarchar, mysql.TypeString, mysql.TypeVarString, mysql.TypeTinyBlob, 255 mysql.TypeMediumBlob, mysql.TypeBlob, mysql.TypeLongBlob: 256 datum.SetString(datum.GetString(), ft.Collate) 257 case mysql.TypeTiny, mysql.TypeShort, mysql.TypeYear, mysql.TypeInt24, 258 mysql.TypeLong, mysql.TypeLonglong, mysql.TypeDouble: 259 return datum, nil 260 case mysql.TypeDate, mysql.TypeDatetime, mysql.TypeTimestamp: 261 t := types.NewTime(types.ZeroCoreTime, ft.Tp, int8(ft.Decimal)) 262 var err error 263 err = t.FromPackedUint(datum.GetUint64()) 264 if err != nil { 265 return datum, cerror.WrapError(cerror.ErrDatumUnflatten, err) 266 } 267 if ft.Tp == mysql.TypeTimestamp && !t.IsZero() { 268 err = t.ConvertTimeZone(time.UTC, loc) 269 if err != nil { 270 return datum, cerror.WrapError(cerror.ErrDatumUnflatten, err) 271 } 272 } 273 datum.SetUint64(0) 274 datum.SetMysqlTime(t) 275 return datum, nil 276 case mysql.TypeDuration: // duration should read fsp from column meta data 277 dur := types.Duration{Duration: time.Duration(datum.GetInt64()), Fsp: int8(ft.Decimal)} 278 datum.SetMysqlDuration(dur) 279 return datum, nil 280 case mysql.TypeEnum: 281 // ignore error deliberately, to read empty enum value. 282 enum, err := types.ParseEnumValue(ft.Elems, datum.GetUint64()) 283 if err != nil { 284 enum = types.Enum{} 285 } 286 datum.SetMysqlEnum(enum, ft.Collate) 287 return datum, nil 288 case mysql.TypeSet: 289 set, err := types.ParseSetValue(ft.Elems, datum.GetUint64()) 290 if err != nil { 291 return datum, cerror.WrapError(cerror.ErrDatumUnflatten, err) 292 } 293 datum.SetMysqlSet(set, ft.Collate) 294 return datum, nil 295 case mysql.TypeBit: 296 val := datum.GetUint64() 297 byteSize := (ft.Flen + 7) >> 3 298 datum.SetUint64(0) 299 datum.SetMysqlBit(types.NewBinaryLiteralFromUint(val, byteSize)) 300 } 301 return datum, nil 302 }