github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/pkg/sink/codec/craft/message_encoder.go (about)

     1  // Copyright 2021 PingCAP, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License")
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.orglicensesLICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package craft
    15  
    16  import (
    17  	"encoding/binary"
    18  	"math"
    19  	"unsafe"
    20  
    21  	"github.com/pingcap/tidb/pkg/parser/mysql"
    22  	"github.com/pingcap/tiflow/cdc/model"
    23  )
    24  
    25  // create byte slice from string without copying
    26  func unsafeStringToBytes(s string) []byte {
    27  	return *(*[]byte)(unsafe.Pointer(
    28  		&struct {
    29  			string
    30  			Cap int
    31  		}{s, len(s)},
    32  	))
    33  }
    34  
    35  // Primitive type encoders
    36  func encodeFloat64(bits []byte, data float64) []byte {
    37  	v := math.Float64bits(data)
    38  	return append(bits, byte(v), byte(v>>8), byte(v>>16), byte(v>>24), byte(v>>32), byte(v>>40), byte(v>>48), byte(v>>56))
    39  }
    40  
    41  func encodeVarint(bits []byte, data int64) []byte {
    42  	udata := uint64(data) << 1
    43  	if data < 0 {
    44  		udata = ^udata
    45  	}
    46  	return encodeUvarint(bits, udata)
    47  }
    48  
    49  func encodeUvarint(bits []byte, data uint64) []byte {
    50  	// Encode uint64 in varint format that is used in protobuf
    51  	// Reference: https://developers.google.com/protocol-buffers/docs/encoding#varints
    52  	for data >= 0x80 {
    53  		bits = append(bits, byte(data)|0x80)
    54  		data >>= 7
    55  	}
    56  	return append(bits, byte(data))
    57  }
    58  
    59  func encodeUvarintReversed(bits []byte, data uint64) ([]byte, int) {
    60  	// Encode uint64 in varint format that is similar to protobuf but with bytes order reversed
    61  	// Reference: https://developers.google.com/protocol-buffers/docs/encoding#varints
    62  	buf := make([]byte, binary.MaxVarintLen64)
    63  	i := 0
    64  	for data >= 0x80 {
    65  		buf[i] = byte(data) | 0x80
    66  		data >>= 7
    67  		i++
    68  	}
    69  	buf[i] = byte(data)
    70  	for bi := i; bi >= 0; bi-- {
    71  		bits = append(bits, buf[bi])
    72  	}
    73  	return bits, i + 1
    74  }
    75  
    76  //nolint:unused,deadcode
    77  func encodeBytes(bits []byte, data []byte) []byte {
    78  	l := len(data)
    79  	bits = encodeUvarint(bits, uint64(l))
    80  	return append(bits, data...)
    81  }
    82  
    83  func encodeString(bits []byte, data string) []byte {
    84  	l := len(data)
    85  	bits = encodeUvarint(bits, uint64(l))
    86  	return append(bits, data...)
    87  }
    88  
    89  // / Chunk encoders
    90  func encodeStringChunk(bits []byte, data []string) []byte {
    91  	for _, s := range data {
    92  		bits = encodeUvarint(bits, uint64(len(s)))
    93  	}
    94  	for _, s := range data {
    95  		bits = append(bits, s...)
    96  	}
    97  	return bits
    98  }
    99  
   100  func encodeNullableStringChunk(bits []byte, data []*string) []byte {
   101  	for _, s := range data {
   102  		var l int64 = -1
   103  		if s != nil {
   104  			l = int64(len(*s))
   105  		}
   106  		bits = encodeVarint(bits, l)
   107  	}
   108  	for _, s := range data {
   109  		if s != nil {
   110  			bits = append(bits, *s...)
   111  		}
   112  	}
   113  	return bits
   114  }
   115  
   116  //nolint:unused,deadcode
   117  func encodeBytesChunk(bits []byte, data [][]byte) []byte {
   118  	for _, b := range data {
   119  		bits = encodeUvarint(bits, uint64(len(b)))
   120  	}
   121  	for _, b := range data {
   122  		bits = append(bits, b...)
   123  	}
   124  	return bits
   125  }
   126  
   127  func encodeNullableBytesChunk(bits []byte, data [][]byte) []byte {
   128  	for _, b := range data {
   129  		var l int64 = -1
   130  		if b != nil {
   131  			l = int64(len(b))
   132  		}
   133  		bits = encodeVarint(bits, l)
   134  	}
   135  	for _, b := range data {
   136  		if b != nil {
   137  			bits = append(bits, b...)
   138  		}
   139  	}
   140  	return bits
   141  }
   142  
   143  func encodeVarintChunk(bits []byte, data []int64) []byte {
   144  	for _, v := range data {
   145  		bits = encodeVarint(bits, v)
   146  	}
   147  	return bits
   148  }
   149  
   150  func encodeUvarintChunk(bits []byte, data []uint64) []byte {
   151  	for _, v := range data {
   152  		bits = encodeUvarint(bits, v)
   153  	}
   154  	return bits
   155  }
   156  
   157  func encodeDeltaVarintChunk(bits []byte, data []int64) []byte {
   158  	last := data[0]
   159  	bits = encodeVarint(bits, last)
   160  	for _, v := range data[1:] {
   161  		bits = encodeVarint(bits, v-last)
   162  		last = v
   163  	}
   164  	return bits
   165  }
   166  
   167  func encodeDeltaUvarintChunk(bits []byte, data []uint64) []byte {
   168  	last := data[0]
   169  	bits = encodeUvarint(bits, last)
   170  	for _, v := range data[1:] {
   171  		bits = encodeUvarint(bits, v-last)
   172  		last = v
   173  	}
   174  	return bits
   175  }
   176  
   177  func encodeSizeTables(bits []byte, tables [][]int64) []byte {
   178  	size := len(bits)
   179  	for _, table := range tables {
   180  		bits = encodeUvarint(bits, uint64(len(table)))
   181  		bits = encodeDeltaVarintChunk(bits, table)
   182  	}
   183  	bits, _ = encodeUvarintReversed(bits, uint64(len(bits)-size))
   184  	return bits
   185  }
   186  
   187  // EncodeTiDBType encodes TiDB types
   188  func EncodeTiDBType(allocator *SliceAllocator, ty byte, flag model.ColumnFlagType, value interface{}) []byte {
   189  	if value == nil {
   190  		return nil
   191  	}
   192  	switch ty {
   193  	case mysql.TypeDate, mysql.TypeDatetime, mysql.TypeNewDate, mysql.TypeTimestamp, mysql.TypeDuration, mysql.TypeJSON, mysql.TypeNewDecimal:
   194  		// value type for these mysql types are string
   195  		return unsafeStringToBytes(value.(string))
   196  	case mysql.TypeEnum, mysql.TypeSet, mysql.TypeBit:
   197  		// value type for these mysql types are uint64
   198  		return encodeUvarint(allocator.byteSlice(binary.MaxVarintLen64)[:0], value.(uint64))
   199  	case mysql.TypeString, mysql.TypeVarString, mysql.TypeVarchar,
   200  		mysql.TypeTinyBlob, mysql.TypeMediumBlob, mysql.TypeLongBlob, mysql.TypeBlob:
   201  		// value type for these mysql types are []byte
   202  		return value.([]byte)
   203  	case mysql.TypeFloat:
   204  		return encodeFloat64(allocator.byteSlice(4)[:0], float64(value.(float32)))
   205  	case mysql.TypeDouble:
   206  		// value type for these mysql types are float64
   207  		return encodeFloat64(allocator.byteSlice(8)[:0], value.(float64))
   208  	case mysql.TypeYear:
   209  		// year is encoded as int64
   210  		return encodeVarint(allocator.byteSlice(binary.MaxVarintLen64)[:0], value.(int64))
   211  	case mysql.TypeTiny, mysql.TypeShort, mysql.TypeLong, mysql.TypeLonglong, mysql.TypeInt24:
   212  		// value type for these mysql types are int64 or uint64 depends on flags
   213  		if flag.IsUnsigned() {
   214  			return encodeUvarint(allocator.byteSlice(binary.MaxVarintLen64)[:0], value.(uint64))
   215  		}
   216  		return encodeVarint(allocator.byteSlice(binary.MaxVarintLen64)[:0], value.(int64))
   217  	case mysql.TypeUnspecified:
   218  		fallthrough
   219  	case mysql.TypeNull:
   220  		fallthrough
   221  	case mysql.TypeGeometry:
   222  		return nil
   223  	}
   224  	return nil
   225  }
   226  
   227  // MessageEncoder is encoder for message
   228  type MessageEncoder struct {
   229  	bits           []byte
   230  	sizeTables     [][]int64
   231  	bodyLastOffset int
   232  	bodySize       []int64
   233  	bodySizeIndex  int
   234  	metaSizeTable  []int64
   235  
   236  	allocator *SliceAllocator
   237  	dict      *termDictionary
   238  }
   239  
   240  // NewMessageEncoder creates a new encoder with given allocator
   241  func NewMessageEncoder(allocator *SliceAllocator) *MessageEncoder {
   242  	return &MessageEncoder{
   243  		bits:      encodeUvarint(make([]byte, 0, DefaultBufferCapacity), Version1),
   244  		allocator: allocator,
   245  		dict:      newEncodingTermDictionary(),
   246  	}
   247  }
   248  
   249  func (e *MessageEncoder) encodeBodySize() *MessageEncoder {
   250  	e.bodySize[e.bodySizeIndex] = int64(len(e.bits) - e.bodyLastOffset)
   251  	e.bodyLastOffset = len(e.bits)
   252  	e.bodySizeIndex++
   253  	return e
   254  }
   255  
   256  func (e *MessageEncoder) encodeUvarint(u64 uint64) *MessageEncoder {
   257  	e.bits = encodeUvarint(e.bits, u64)
   258  	return e
   259  }
   260  
   261  func (e *MessageEncoder) encodeString(s string) *MessageEncoder {
   262  	e.bits = encodeString(e.bits, s)
   263  	return e
   264  }
   265  
   266  func (e *MessageEncoder) encodeHeaders(headers *Headers) *MessageEncoder {
   267  	oldSize := len(e.bits)
   268  	e.bodySize = e.allocator.int64Slice(headers.count)
   269  	e.bits = headers.encode(e.bits, e.dict)
   270  	e.bodyLastOffset = len(e.bits)
   271  	e.metaSizeTable = e.allocator.int64Slice(maxMetaSizeIndex + 1)
   272  	e.metaSizeTable[headerSizeIndex] = int64(len(e.bits) - oldSize)
   273  	e.sizeTables = append(e.sizeTables, e.metaSizeTable, e.bodySize)
   274  	return e
   275  }
   276  
   277  // Encode message into bits
   278  func (e *MessageEncoder) Encode() []byte {
   279  	offset := len(e.bits)
   280  	e.bits = encodeTermDictionary(e.bits, e.dict)
   281  	e.metaSizeTable[termDictionarySizeIndex] = int64(len(e.bits) - offset)
   282  	return encodeSizeTables(e.bits, e.sizeTables)
   283  }
   284  
   285  func (e *MessageEncoder) encodeRowChangeEvents(events []rowChangedEvent) *MessageEncoder {
   286  	sizeTables := e.sizeTables
   287  	for _, event := range events {
   288  		columnGroupSizeTable := e.allocator.int64Slice(len(event))
   289  		for gi, group := range event {
   290  			oldSize := len(e.bits)
   291  			e.bits = group.encode(e.bits, e.dict)
   292  			columnGroupSizeTable[gi] = int64(len(e.bits) - oldSize)
   293  		}
   294  		sizeTables = append(sizeTables, columnGroupSizeTable)
   295  		e.encodeBodySize()
   296  	}
   297  	e.sizeTables = sizeTables
   298  	return e
   299  }
   300  
   301  // NewResolvedEventEncoder creates a new encoder with given allocator and timestamp
   302  func NewResolvedEventEncoder(allocator *SliceAllocator, ts uint64) *MessageEncoder {
   303  	return NewMessageEncoder(allocator).encodeHeaders(&Headers{
   304  		ts:        allocator.oneUint64Slice(ts),
   305  		ty:        allocator.oneUint64Slice(uint64(model.MessageTypeResolved)),
   306  		partition: oneNullInt64Slice,
   307  		schema:    oneNullStringSlice,
   308  		table:     oneNullStringSlice,
   309  		count:     1,
   310  	}).encodeBodySize()
   311  }
   312  
   313  // NewDDLEventEncoder creates a new encoder with given allocator and timestamp
   314  func NewDDLEventEncoder(allocator *SliceAllocator, ev *model.DDLEvent) *MessageEncoder {
   315  	ty := uint64(ev.Type)
   316  	query := ev.Query
   317  	var schema, table *string
   318  	if len(ev.TableInfo.TableName.Schema) > 0 {
   319  		schema = &ev.TableInfo.TableName.Schema
   320  	}
   321  	if len(ev.TableInfo.TableName.Table) > 0 {
   322  		table = &ev.TableInfo.TableName.Table
   323  	}
   324  	return NewMessageEncoder(allocator).encodeHeaders(&Headers{
   325  		ts:        allocator.oneUint64Slice(ev.CommitTs),
   326  		ty:        allocator.oneUint64Slice(uint64(model.MessageTypeDDL)),
   327  		partition: oneNullInt64Slice,
   328  		schema:    allocator.oneNullableStringSlice(schema),
   329  		table:     allocator.oneNullableStringSlice(table),
   330  		count:     1,
   331  	}).encodeUvarint(ty).encodeString(query).encodeBodySize()
   332  }