github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/pkg/sink/codec/craft/message_encoder.go (about) 1 // Copyright 2021 PingCAP, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License") 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.orglicensesLICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package craft 15 16 import ( 17 "encoding/binary" 18 "math" 19 "unsafe" 20 21 "github.com/pingcap/tidb/pkg/parser/mysql" 22 "github.com/pingcap/tiflow/cdc/model" 23 ) 24 25 // create byte slice from string without copying 26 func unsafeStringToBytes(s string) []byte { 27 return *(*[]byte)(unsafe.Pointer( 28 &struct { 29 string 30 Cap int 31 }{s, len(s)}, 32 )) 33 } 34 35 // Primitive type encoders 36 func encodeFloat64(bits []byte, data float64) []byte { 37 v := math.Float64bits(data) 38 return append(bits, byte(v), byte(v>>8), byte(v>>16), byte(v>>24), byte(v>>32), byte(v>>40), byte(v>>48), byte(v>>56)) 39 } 40 41 func encodeVarint(bits []byte, data int64) []byte { 42 udata := uint64(data) << 1 43 if data < 0 { 44 udata = ^udata 45 } 46 return encodeUvarint(bits, udata) 47 } 48 49 func encodeUvarint(bits []byte, data uint64) []byte { 50 // Encode uint64 in varint format that is used in protobuf 51 // Reference: https://developers.google.com/protocol-buffers/docs/encoding#varints 52 for data >= 0x80 { 53 bits = append(bits, byte(data)|0x80) 54 data >>= 7 55 } 56 return append(bits, byte(data)) 57 } 58 59 func encodeUvarintReversed(bits []byte, data uint64) ([]byte, int) { 60 // Encode uint64 in varint format that is similar to protobuf but with bytes order reversed 61 // Reference: https://developers.google.com/protocol-buffers/docs/encoding#varints 62 buf := make([]byte, binary.MaxVarintLen64) 63 i := 0 64 for data >= 0x80 { 65 buf[i] = byte(data) | 0x80 66 data >>= 7 67 i++ 68 } 69 buf[i] = byte(data) 70 for bi := i; bi >= 0; bi-- { 71 bits = append(bits, buf[bi]) 72 } 73 return bits, i + 1 74 } 75 76 //nolint:unused,deadcode 77 func encodeBytes(bits []byte, data []byte) []byte { 78 l := len(data) 79 bits = encodeUvarint(bits, uint64(l)) 80 return append(bits, data...) 81 } 82 83 func encodeString(bits []byte, data string) []byte { 84 l := len(data) 85 bits = encodeUvarint(bits, uint64(l)) 86 return append(bits, data...) 87 } 88 89 // / Chunk encoders 90 func encodeStringChunk(bits []byte, data []string) []byte { 91 for _, s := range data { 92 bits = encodeUvarint(bits, uint64(len(s))) 93 } 94 for _, s := range data { 95 bits = append(bits, s...) 96 } 97 return bits 98 } 99 100 func encodeNullableStringChunk(bits []byte, data []*string) []byte { 101 for _, s := range data { 102 var l int64 = -1 103 if s != nil { 104 l = int64(len(*s)) 105 } 106 bits = encodeVarint(bits, l) 107 } 108 for _, s := range data { 109 if s != nil { 110 bits = append(bits, *s...) 111 } 112 } 113 return bits 114 } 115 116 //nolint:unused,deadcode 117 func encodeBytesChunk(bits []byte, data [][]byte) []byte { 118 for _, b := range data { 119 bits = encodeUvarint(bits, uint64(len(b))) 120 } 121 for _, b := range data { 122 bits = append(bits, b...) 123 } 124 return bits 125 } 126 127 func encodeNullableBytesChunk(bits []byte, data [][]byte) []byte { 128 for _, b := range data { 129 var l int64 = -1 130 if b != nil { 131 l = int64(len(b)) 132 } 133 bits = encodeVarint(bits, l) 134 } 135 for _, b := range data { 136 if b != nil { 137 bits = append(bits, b...) 138 } 139 } 140 return bits 141 } 142 143 func encodeVarintChunk(bits []byte, data []int64) []byte { 144 for _, v := range data { 145 bits = encodeVarint(bits, v) 146 } 147 return bits 148 } 149 150 func encodeUvarintChunk(bits []byte, data []uint64) []byte { 151 for _, v := range data { 152 bits = encodeUvarint(bits, v) 153 } 154 return bits 155 } 156 157 func encodeDeltaVarintChunk(bits []byte, data []int64) []byte { 158 last := data[0] 159 bits = encodeVarint(bits, last) 160 for _, v := range data[1:] { 161 bits = encodeVarint(bits, v-last) 162 last = v 163 } 164 return bits 165 } 166 167 func encodeDeltaUvarintChunk(bits []byte, data []uint64) []byte { 168 last := data[0] 169 bits = encodeUvarint(bits, last) 170 for _, v := range data[1:] { 171 bits = encodeUvarint(bits, v-last) 172 last = v 173 } 174 return bits 175 } 176 177 func encodeSizeTables(bits []byte, tables [][]int64) []byte { 178 size := len(bits) 179 for _, table := range tables { 180 bits = encodeUvarint(bits, uint64(len(table))) 181 bits = encodeDeltaVarintChunk(bits, table) 182 } 183 bits, _ = encodeUvarintReversed(bits, uint64(len(bits)-size)) 184 return bits 185 } 186 187 // EncodeTiDBType encodes TiDB types 188 func EncodeTiDBType(allocator *SliceAllocator, ty byte, flag model.ColumnFlagType, value interface{}) []byte { 189 if value == nil { 190 return nil 191 } 192 switch ty { 193 case mysql.TypeDate, mysql.TypeDatetime, mysql.TypeNewDate, mysql.TypeTimestamp, mysql.TypeDuration, mysql.TypeJSON, mysql.TypeNewDecimal: 194 // value type for these mysql types are string 195 return unsafeStringToBytes(value.(string)) 196 case mysql.TypeEnum, mysql.TypeSet, mysql.TypeBit: 197 // value type for these mysql types are uint64 198 return encodeUvarint(allocator.byteSlice(binary.MaxVarintLen64)[:0], value.(uint64)) 199 case mysql.TypeString, mysql.TypeVarString, mysql.TypeVarchar, 200 mysql.TypeTinyBlob, mysql.TypeMediumBlob, mysql.TypeLongBlob, mysql.TypeBlob: 201 // value type for these mysql types are []byte 202 return value.([]byte) 203 case mysql.TypeFloat: 204 return encodeFloat64(allocator.byteSlice(4)[:0], float64(value.(float32))) 205 case mysql.TypeDouble: 206 // value type for these mysql types are float64 207 return encodeFloat64(allocator.byteSlice(8)[:0], value.(float64)) 208 case mysql.TypeYear: 209 // year is encoded as int64 210 return encodeVarint(allocator.byteSlice(binary.MaxVarintLen64)[:0], value.(int64)) 211 case mysql.TypeTiny, mysql.TypeShort, mysql.TypeLong, mysql.TypeLonglong, mysql.TypeInt24: 212 // value type for these mysql types are int64 or uint64 depends on flags 213 if flag.IsUnsigned() { 214 return encodeUvarint(allocator.byteSlice(binary.MaxVarintLen64)[:0], value.(uint64)) 215 } 216 return encodeVarint(allocator.byteSlice(binary.MaxVarintLen64)[:0], value.(int64)) 217 case mysql.TypeUnspecified: 218 fallthrough 219 case mysql.TypeNull: 220 fallthrough 221 case mysql.TypeGeometry: 222 return nil 223 } 224 return nil 225 } 226 227 // MessageEncoder is encoder for message 228 type MessageEncoder struct { 229 bits []byte 230 sizeTables [][]int64 231 bodyLastOffset int 232 bodySize []int64 233 bodySizeIndex int 234 metaSizeTable []int64 235 236 allocator *SliceAllocator 237 dict *termDictionary 238 } 239 240 // NewMessageEncoder creates a new encoder with given allocator 241 func NewMessageEncoder(allocator *SliceAllocator) *MessageEncoder { 242 return &MessageEncoder{ 243 bits: encodeUvarint(make([]byte, 0, DefaultBufferCapacity), Version1), 244 allocator: allocator, 245 dict: newEncodingTermDictionary(), 246 } 247 } 248 249 func (e *MessageEncoder) encodeBodySize() *MessageEncoder { 250 e.bodySize[e.bodySizeIndex] = int64(len(e.bits) - e.bodyLastOffset) 251 e.bodyLastOffset = len(e.bits) 252 e.bodySizeIndex++ 253 return e 254 } 255 256 func (e *MessageEncoder) encodeUvarint(u64 uint64) *MessageEncoder { 257 e.bits = encodeUvarint(e.bits, u64) 258 return e 259 } 260 261 func (e *MessageEncoder) encodeString(s string) *MessageEncoder { 262 e.bits = encodeString(e.bits, s) 263 return e 264 } 265 266 func (e *MessageEncoder) encodeHeaders(headers *Headers) *MessageEncoder { 267 oldSize := len(e.bits) 268 e.bodySize = e.allocator.int64Slice(headers.count) 269 e.bits = headers.encode(e.bits, e.dict) 270 e.bodyLastOffset = len(e.bits) 271 e.metaSizeTable = e.allocator.int64Slice(maxMetaSizeIndex + 1) 272 e.metaSizeTable[headerSizeIndex] = int64(len(e.bits) - oldSize) 273 e.sizeTables = append(e.sizeTables, e.metaSizeTable, e.bodySize) 274 return e 275 } 276 277 // Encode message into bits 278 func (e *MessageEncoder) Encode() []byte { 279 offset := len(e.bits) 280 e.bits = encodeTermDictionary(e.bits, e.dict) 281 e.metaSizeTable[termDictionarySizeIndex] = int64(len(e.bits) - offset) 282 return encodeSizeTables(e.bits, e.sizeTables) 283 } 284 285 func (e *MessageEncoder) encodeRowChangeEvents(events []rowChangedEvent) *MessageEncoder { 286 sizeTables := e.sizeTables 287 for _, event := range events { 288 columnGroupSizeTable := e.allocator.int64Slice(len(event)) 289 for gi, group := range event { 290 oldSize := len(e.bits) 291 e.bits = group.encode(e.bits, e.dict) 292 columnGroupSizeTable[gi] = int64(len(e.bits) - oldSize) 293 } 294 sizeTables = append(sizeTables, columnGroupSizeTable) 295 e.encodeBodySize() 296 } 297 e.sizeTables = sizeTables 298 return e 299 } 300 301 // NewResolvedEventEncoder creates a new encoder with given allocator and timestamp 302 func NewResolvedEventEncoder(allocator *SliceAllocator, ts uint64) *MessageEncoder { 303 return NewMessageEncoder(allocator).encodeHeaders(&Headers{ 304 ts: allocator.oneUint64Slice(ts), 305 ty: allocator.oneUint64Slice(uint64(model.MessageTypeResolved)), 306 partition: oneNullInt64Slice, 307 schema: oneNullStringSlice, 308 table: oneNullStringSlice, 309 count: 1, 310 }).encodeBodySize() 311 } 312 313 // NewDDLEventEncoder creates a new encoder with given allocator and timestamp 314 func NewDDLEventEncoder(allocator *SliceAllocator, ev *model.DDLEvent) *MessageEncoder { 315 ty := uint64(ev.Type) 316 query := ev.Query 317 var schema, table *string 318 if len(ev.TableInfo.TableName.Schema) > 0 { 319 schema = &ev.TableInfo.TableName.Schema 320 } 321 if len(ev.TableInfo.TableName.Table) > 0 { 322 table = &ev.TableInfo.TableName.Table 323 } 324 return NewMessageEncoder(allocator).encodeHeaders(&Headers{ 325 ts: allocator.oneUint64Slice(ev.CommitTs), 326 ty: allocator.oneUint64Slice(uint64(model.MessageTypeDDL)), 327 partition: oneNullInt64Slice, 328 schema: allocator.oneNullableStringSlice(schema), 329 table: allocator.oneNullableStringSlice(table), 330 count: 1, 331 }).encodeUvarint(ty).encodeString(query).encodeBodySize() 332 }