github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/pkg/sink/codec/builder/codec_test.go (about)

     1  // Copyright 2021 PingCAP, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package builder
    15  
    16  import (
    17  	"bytes"
    18  	"compress/zlib"
    19  	"context"
    20  	"testing"
    21  
    22  	"github.com/pingcap/tiflow/cdc/model"
    23  	"github.com/pingcap/tiflow/pkg/config"
    24  	"github.com/pingcap/tiflow/pkg/sink/codec"
    25  	"github.com/pingcap/tiflow/pkg/sink/codec/common"
    26  	"github.com/pingcap/tiflow/pkg/sink/codec/craft"
    27  	"github.com/pingcap/tiflow/pkg/sink/codec/internal"
    28  	"github.com/pingcap/tiflow/pkg/sink/codec/open"
    29  	"github.com/pingcap/tiflow/proto/benchmark"
    30  	"github.com/stretchr/testify/require"
    31  )
    32  
    33  func checkCompressedSize(messages []*common.Message) (int, int) {
    34  	var buff bytes.Buffer
    35  	writer := zlib.NewWriter(&buff)
    36  	originalSize := 0
    37  	for _, message := range messages {
    38  		originalSize += len(message.Key) + len(message.Value)
    39  		if len(message.Key) > 0 {
    40  			_, _ = writer.Write(message.Key)
    41  		}
    42  		_, _ = writer.Write(message.Value)
    43  	}
    44  	writer.Close()
    45  	return originalSize, buff.Len()
    46  }
    47  
    48  func encodeRowCase(t *testing.T, encoder codec.RowEventEncoder,
    49  	events []*model.RowChangedEvent,
    50  ) []*common.Message {
    51  	msg, err := codecEncodeRowCase(encoder, events)
    52  	require.Nil(t, err)
    53  	return msg
    54  }
    55  
    56  func TestJsonVsCraftVsPB(t *testing.T) {
    57  	t.Parallel()
    58  	t.Logf("| case | craft size | json size | protobuf 1 size | protobuf 2 size | craft compressed | json compressed | protobuf 1 compressed | protobuf 2 compressed |")
    59  	t.Logf("| :---- | :--------- | :-------- | :-------------- | :-------------- | :--------------- | :-------------- | :-------------------- | :-------------------- |")
    60  	dmlCases := internal.NewDMLTestCases(t)
    61  	for i, cs := range dmlCases {
    62  		if len(cs) == 0 {
    63  			continue
    64  		}
    65  
    66  		codecConfig := common.NewConfig(config.ProtocolCraft)
    67  		codecConfig.MaxMessageBytes = 8192
    68  		codecConfig.MaxBatchSize = 64
    69  
    70  		craftEncoder := craft.NewBatchEncoder(codecConfig)
    71  		craftMessages := encodeRowCase(t, craftEncoder, cs)
    72  
    73  		builder, err := open.NewBatchEncoderBuilder(context.Background(), codecConfig)
    74  		require.NoError(t, err)
    75  		jsonEncoder := builder.Build()
    76  		jsonMessages := encodeRowCase(t, jsonEncoder, cs)
    77  
    78  		protobuf1Messages := codecEncodeRowChangedPB1ToMessage(cs)
    79  		protobuf2Messages := codecEncodeRowChangedPB2ToMessage(cs)
    80  		craftOriginal, craftCompressed := checkCompressedSize(craftMessages)
    81  		jsonOriginal, jsonCompressed := checkCompressedSize(jsonMessages)
    82  		protobuf1Original, protobuf1Compressed := checkCompressedSize(protobuf1Messages)
    83  		protobuf2Original, protobuf2Compressed := checkCompressedSize(protobuf2Messages)
    84  		t.Logf("| case %d | %d | %d (%d%%)+ | %d (%d%%)+ | %d (%d%%)+ | %d | %d (%d%%)+ | %d (%d%%)+ | %d (%d%%)+ |", i,
    85  			craftOriginal, jsonOriginal, 100*jsonOriginal/craftOriginal-100,
    86  			protobuf1Original, 100*protobuf1Original/craftOriginal-100,
    87  			protobuf2Original, 100*protobuf2Original/craftOriginal-100,
    88  			craftCompressed, jsonCompressed, 100*jsonCompressed/craftCompressed-100,
    89  			protobuf1Compressed, 100*protobuf1Compressed/craftCompressed-100,
    90  			protobuf2Compressed, 100*protobuf2Compressed/craftCompressed-100)
    91  	}
    92  }
    93  
    94  func codecEncodeKeyPB(event *model.RowChangedEvent) []byte {
    95  	key := &benchmark.Key{
    96  		Ts:        event.CommitTs,
    97  		Schema:    event.TableInfo.GetSchemaName(),
    98  		Table:     event.TableInfo.GetTableName(),
    99  		RowId:     event.RowID,
   100  		Partition: 0,
   101  	}
   102  	if b, err := key.Marshal(); err != nil {
   103  		panic(err)
   104  	} else {
   105  		return b
   106  	}
   107  }
   108  
   109  func codecEncodeColumnPB(column *model.Column) *benchmark.Column {
   110  	codecTestSliceAllocator := craft.NewSliceAllocator(512)
   111  	return &benchmark.Column{
   112  		Name: column.Name,
   113  		Type: uint32(column.Type),
   114  		Flag: uint32(column.Flag),
   115  		Value: craft.EncodeTiDBType(codecTestSliceAllocator,
   116  			column.Type, column.Flag, column.Value),
   117  	}
   118  }
   119  
   120  func codecEncodeColumnsPB(columns []*model.Column) []*benchmark.Column {
   121  	converted := make([]*benchmark.Column, len(columns))
   122  	for i, column := range columns {
   123  		converted[i] = codecEncodeColumnPB(column)
   124  	}
   125  	return converted
   126  }
   127  
   128  func codecEncodeRowChangedPB(event *model.RowChangedEvent) []byte {
   129  	rowChanged := &benchmark.RowChanged{
   130  		OldValue: codecEncodeColumnsPB(event.GetPreColumns()),
   131  		NewValue: codecEncodeColumnsPB(event.GetColumns()),
   132  	}
   133  	if b, err := rowChanged.Marshal(); err != nil {
   134  		panic(err)
   135  	} else {
   136  		return b
   137  	}
   138  }
   139  
   140  func codecEncodeRowChangedPB1ToMessage(events []*model.RowChangedEvent) []*common.Message {
   141  	result := make([]*common.Message, len(events))
   142  	for i, event := range events {
   143  		result[i] = &common.Message{
   144  			Key:   codecEncodeKeyPB(event),
   145  			Value: codecEncodeRowChangedPB(event),
   146  		}
   147  	}
   148  	return result
   149  }
   150  
   151  func codecEncodeRowChangedPB2ToMessage(events []*model.RowChangedEvent) []*common.Message {
   152  	return []*common.Message{{
   153  		Key:   codecEncodeKeysPB2(events),
   154  		Value: codecEncodeRowChangedPB2(events),
   155  	}}
   156  }
   157  
   158  func codecEncodeKeysPB2(events []*model.RowChangedEvent) []byte {
   159  	converted := &benchmark.KeysColumnar{}
   160  
   161  	for _, event := range events {
   162  		converted.Ts = append(converted.Ts, event.CommitTs)
   163  		converted.Schema = append(converted.Schema, event.TableInfo.GetSchemaName())
   164  		converted.Table = append(converted.Table, event.TableInfo.GetTableName())
   165  		converted.RowId = append(converted.RowId, event.RowID)
   166  		converted.Partition = append(converted.Partition, 0)
   167  	}
   168  
   169  	if b, err := converted.Marshal(); err != nil {
   170  		panic(err)
   171  	} else {
   172  		return b
   173  	}
   174  }
   175  
   176  func codecEncodeColumnsPB2(columns []*model.Column) *benchmark.ColumnsColumnar {
   177  	converted := &benchmark.ColumnsColumnar{
   178  		Name:  make([]string, len(columns)),
   179  		Type:  make([]uint32, len(columns)),
   180  		Flag:  make([]uint32, len(columns)),
   181  		Value: make([][]byte, len(columns)),
   182  	}
   183  
   184  	codecTestSliceAllocator := craft.NewSliceAllocator(512)
   185  	for i, column := range columns {
   186  		converted.Name[i] = column.Name
   187  		converted.Type[i] = uint32(column.Type)
   188  		converted.Flag[i] = uint32(column.Flag)
   189  		converted.Value[i] = craft.EncodeTiDBType(codecTestSliceAllocator,
   190  			column.Type, column.Flag, column.Value)
   191  	}
   192  	return converted
   193  }
   194  
   195  func codecEncodeRowChangedPB2(events []*model.RowChangedEvent) []byte {
   196  	rowChanged := &benchmark.RowChangedColumnar{}
   197  	for _, event := range events {
   198  		rowChanged.OldValue = append(rowChanged.OldValue, codecEncodeColumnsPB2(event.GetPreColumns()))
   199  		rowChanged.NewValue = append(rowChanged.NewValue, codecEncodeColumnsPB2(event.GetColumns()))
   200  	}
   201  	if b, err := rowChanged.Marshal(); err != nil {
   202  		panic(err)
   203  	} else {
   204  		return b
   205  	}
   206  }
   207  
   208  func codecEncodeRowCase(encoder codec.RowEventEncoder,
   209  	events []*model.RowChangedEvent,
   210  ) ([]*common.Message, error) {
   211  	for _, event := range events {
   212  		err := encoder.AppendRowChangedEvent(context.Background(), "", event, nil)
   213  		if err != nil {
   214  			return nil, err
   215  		}
   216  	}
   217  
   218  	if len(events) > 0 {
   219  		return encoder.Build(), nil
   220  	}
   221  	return nil, nil
   222  }
   223  
   224  func BenchmarkCraftEncoding(b *testing.B) {
   225  	codecConfig := common.NewConfig(config.ProtocolCraft)
   226  	codecConfig.MaxMessageBytes = 8192
   227  	codecConfig.MaxBatchSize = 64
   228  	allocator := craft.NewSliceAllocator(128)
   229  	encoder := craft.NewBatchEncoderWithAllocator(allocator, codecConfig)
   230  
   231  	dmlCases := internal.NewDMLTestCases(b)
   232  	codecBenchmarkRowChanges := dmlCases[1]
   233  	for i := 0; i < b.N; i++ {
   234  		_, _ = codecEncodeRowCase(encoder, codecBenchmarkRowChanges)
   235  	}
   236  }
   237  
   238  func BenchmarkJsonEncoding(b *testing.B) {
   239  	codecConfig := common.NewConfig(config.ProtocolCraft)
   240  	codecConfig.MaxMessageBytes = 8192
   241  	codecConfig.MaxBatchSize = 64
   242  
   243  	builder, err := open.NewBatchEncoderBuilder(context.Background(), codecConfig)
   244  	require.NoError(b, err)
   245  	encoder := builder.Build()
   246  
   247  	dmlCases := internal.NewDMLTestCases(b)
   248  	codecBenchmarkRowChanges := dmlCases[1]
   249  	for i := 0; i < b.N; i++ {
   250  		_, _ = codecEncodeRowCase(encoder, codecBenchmarkRowChanges)
   251  	}
   252  }
   253  
   254  func BenchmarkProtobuf1Encoding(b *testing.B) {
   255  	dmlCases := internal.NewDMLTestCases(b)
   256  	codecBenchmarkRowChanges := dmlCases[1]
   257  	for i := 0; i < b.N; i++ {
   258  		_ = codecEncodeRowChangedPB1ToMessage(codecBenchmarkRowChanges)
   259  	}
   260  }
   261  
   262  func BenchmarkProtobuf2Encoding(b *testing.B) {
   263  	dmlCases := internal.NewDMLTestCases(b)
   264  	codecBenchmarkRowChanges := dmlCases[1]
   265  	for i := 0; i < b.N; i++ {
   266  		_ = codecEncodeRowChangedPB2ToMessage(codecBenchmarkRowChanges)
   267  	}
   268  }
   269  
   270  func BenchmarkCraftDecoding(b *testing.B) {
   271  	codecConfig := common.NewConfig(config.ProtocolCraft)
   272  	codecConfig.MaxMessageBytes = 8192
   273  	codecConfig.MaxBatchSize = 64
   274  	encoder := craft.NewBatchEncoder(codecConfig)
   275  
   276  	dmlCases := internal.NewDMLTestCases(b)
   277  	codecBenchmarkRowChanges := dmlCases[1]
   278  	codecCraftEncodedRowChanges, err := codecEncodeRowCase(encoder, codecBenchmarkRowChanges)
   279  	require.NoError(b, err)
   280  	allocator := craft.NewSliceAllocator(128)
   281  	for i := 0; i < b.N; i++ {
   282  		decoder := craft.NewBatchDecoderWithAllocator(allocator)
   283  		for _, message := range codecCraftEncodedRowChanges {
   284  			if err := decoder.AddKeyValue(message.Key, message.Value); err != nil {
   285  				panic(err)
   286  			}
   287  			for {
   288  				if _, hasNext, err := decoder.HasNext(); err != nil {
   289  					panic(err)
   290  				} else if hasNext {
   291  					_, _ = decoder.NextRowChangedEvent()
   292  				} else {
   293  					break
   294  				}
   295  			}
   296  		}
   297  	}
   298  }
   299  
   300  func BenchmarkJsonDecoding(b *testing.B) {
   301  	codecConfig := common.NewConfig(config.ProtocolCraft)
   302  	codecConfig.MaxMessageBytes = 8192
   303  	codecConfig.MaxBatchSize = 64
   304  	builder, err := open.NewBatchEncoderBuilder(context.Background(), codecConfig)
   305  	require.NoError(b, err)
   306  
   307  	encoder := builder.Build()
   308  	dmlCases := internal.NewDMLTestCases(b)
   309  	codecBenchmarkRowChanges := dmlCases[1]
   310  	codecJSONEncodedRowChanges, err := codecEncodeRowCase(encoder, codecBenchmarkRowChanges)
   311  	require.NoError(b, err)
   312  	for i := 0; i < b.N; i++ {
   313  		for _, message := range codecJSONEncodedRowChanges {
   314  			codecConfig := common.NewConfig(config.ProtocolOpen)
   315  			decoder, err := open.NewBatchDecoder(context.Background(), codecConfig, nil)
   316  			require.NoError(b, err)
   317  			if err := decoder.AddKeyValue(message.Key, message.Value); err != nil {
   318  				panic(err)
   319  			}
   320  			for {
   321  				if _, hasNext, err := decoder.HasNext(); err != nil {
   322  					panic(err)
   323  				} else if hasNext {
   324  					_, _ = decoder.NextRowChangedEvent()
   325  				} else {
   326  					break
   327  				}
   328  			}
   329  		}
   330  	}
   331  }
   332  
   333  func codecDecodeRowChangedPB1(columns []*benchmark.Column) []*model.Column {
   334  	if len(columns) == 0 {
   335  		return nil
   336  	}
   337  
   338  	result := make([]*model.Column, len(columns))
   339  	for i, column := range columns {
   340  		value, _ := craft.DecodeTiDBType(byte(column.Type),
   341  			model.ColumnFlagType(column.Flag), column.Value)
   342  		result[i] = &model.Column{
   343  			Name:  column.Name,
   344  			Type:  byte(column.Type),
   345  			Flag:  model.ColumnFlagType(column.Flag),
   346  			Value: value,
   347  		}
   348  	}
   349  
   350  	return result
   351  }
   352  
   353  func benchmarkProtobuf1Decoding(b *testing.B) []*model.RowChangedEvent {
   354  	dmlCases := internal.NewDMLTestCases(b)
   355  	codecBenchmarkRowChanges := dmlCases[1]
   356  	codecPB1EncodedRowChanges := codecEncodeRowChangedPB1ToMessage(codecBenchmarkRowChanges)
   357  	result := make([]*model.RowChangedEvent, 0, 4)
   358  	for _, message := range codecPB1EncodedRowChanges {
   359  		key := &benchmark.Key{}
   360  		if err := key.Unmarshal(message.Key); err != nil {
   361  			panic(err)
   362  		}
   363  		value := &benchmark.RowChanged{}
   364  		if err := value.Unmarshal(message.Value); err != nil {
   365  			panic(err)
   366  		}
   367  		ev := &model.RowChangedEvent{}
   368  		ev.TableInfo = model.BuildTableInfo(key.Schema, key.Table, codecDecodeRowChangedPB1(value.OldValue), nil)
   369  		ev.PreColumns = model.Columns2ColumnDatas(codecDecodeRowChangedPB1(value.OldValue), ev.TableInfo)
   370  		ev.Columns = model.Columns2ColumnDatas(codecDecodeRowChangedPB1(value.NewValue), ev.TableInfo)
   371  		ev.CommitTs = key.Ts
   372  		if key.Partition >= 0 {
   373  			ev.PhysicalTableID = key.Partition
   374  			ev.TableInfo.TableName.IsPartition = true
   375  		}
   376  		result = append(result, ev)
   377  	}
   378  	return result
   379  }
   380  
   381  func BenchmarkProtobuf1Decoding(b *testing.B) {
   382  	for i := 0; i < b.N; i++ {
   383  		for _, row := range benchmarkProtobuf1Decoding(b) {
   384  			_ = row
   385  		}
   386  	}
   387  }
   388  
   389  func codecDecodeRowChangedPB2(columns *benchmark.ColumnsColumnar) []*model.Column {
   390  	result := make([]*model.Column, len(columns.Value))
   391  	for i, value := range columns.Value {
   392  		v, _ := craft.DecodeTiDBType(byte(columns.Type[i]),
   393  			model.ColumnFlagType(columns.Flag[i]), value)
   394  		result[i] = &model.Column{
   395  			Name:  columns.Name[i],
   396  			Type:  byte(columns.Type[i]),
   397  			Flag:  model.ColumnFlagType(columns.Flag[i]),
   398  			Value: v,
   399  		}
   400  	}
   401  	return result
   402  }
   403  
   404  func benchmarkProtobuf2Decoding(b *testing.B) []*model.RowChangedEvent {
   405  	dmlCases := internal.NewDMLTestCases(b)
   406  	codecBenchmarkRowChanges := dmlCases[1]
   407  	codecPB2EncodedRowChanges := codecEncodeRowChangedPB2ToMessage(codecBenchmarkRowChanges)
   408  	result := make([]*model.RowChangedEvent, 0, 4)
   409  	for _, message := range codecPB2EncodedRowChanges {
   410  		keys := &benchmark.KeysColumnar{}
   411  		if err := keys.Unmarshal(message.Key); err != nil {
   412  			panic(err)
   413  		}
   414  		values := &benchmark.RowChangedColumnar{}
   415  		if err := values.Unmarshal(message.Value); err != nil {
   416  			panic(err)
   417  		}
   418  
   419  		for i, ts := range keys.Ts {
   420  			ev := &model.RowChangedEvent{}
   421  			if len(values.OldValue) > i {
   422  				ev.TableInfo = model.BuildTableInfo(keys.Schema[i], keys.Table[i], codecDecodeRowChangedPB2(values.OldValue[i]), nil)
   423  				ev.PreColumns = model.Columns2ColumnDatas(codecDecodeRowChangedPB2(values.OldValue[i]), ev.TableInfo)
   424  			}
   425  			if len(values.NewValue) > i {
   426  				ev.TableInfo = model.BuildTableInfo(keys.Schema[i], keys.Table[i], codecDecodeRowChangedPB2(values.NewValue[i]), nil)
   427  				ev.Columns = model.Columns2ColumnDatas(codecDecodeRowChangedPB2(values.NewValue[i]), ev.TableInfo)
   428  			}
   429  			ev.CommitTs = ts
   430  			if keys.Partition[i] >= 0 {
   431  				ev.PhysicalTableID = keys.Partition[i]
   432  				ev.TableInfo.TableName.IsPartition = true
   433  			}
   434  			result = append(result, ev)
   435  		}
   436  	}
   437  	return result
   438  }
   439  
   440  func BenchmarkProtobuf2Decoding(b *testing.B) {
   441  	for i := 0; i < b.N; i++ {
   442  		for _, row := range benchmarkProtobuf2Decoding(b) {
   443  			_ = row
   444  		}
   445  	}
   446  }