github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/pkg/sink/codec/builder/codec_test.go (about) 1 // Copyright 2021 PingCAP, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package builder 15 16 import ( 17 "bytes" 18 "compress/zlib" 19 "context" 20 "testing" 21 22 "github.com/pingcap/tiflow/cdc/model" 23 "github.com/pingcap/tiflow/pkg/config" 24 "github.com/pingcap/tiflow/pkg/sink/codec" 25 "github.com/pingcap/tiflow/pkg/sink/codec/common" 26 "github.com/pingcap/tiflow/pkg/sink/codec/craft" 27 "github.com/pingcap/tiflow/pkg/sink/codec/internal" 28 "github.com/pingcap/tiflow/pkg/sink/codec/open" 29 "github.com/pingcap/tiflow/proto/benchmark" 30 "github.com/stretchr/testify/require" 31 ) 32 33 func checkCompressedSize(messages []*common.Message) (int, int) { 34 var buff bytes.Buffer 35 writer := zlib.NewWriter(&buff) 36 originalSize := 0 37 for _, message := range messages { 38 originalSize += len(message.Key) + len(message.Value) 39 if len(message.Key) > 0 { 40 _, _ = writer.Write(message.Key) 41 } 42 _, _ = writer.Write(message.Value) 43 } 44 writer.Close() 45 return originalSize, buff.Len() 46 } 47 48 func encodeRowCase(t *testing.T, encoder codec.RowEventEncoder, 49 events []*model.RowChangedEvent, 50 ) []*common.Message { 51 msg, err := codecEncodeRowCase(encoder, events) 52 require.Nil(t, err) 53 return msg 54 } 55 56 func TestJsonVsCraftVsPB(t *testing.T) { 57 t.Parallel() 58 t.Logf("| case | craft size | json size | protobuf 1 size | protobuf 2 size | craft compressed | json compressed | protobuf 1 compressed | protobuf 2 compressed |") 59 t.Logf("| :---- | :--------- | :-------- | :-------------- | :-------------- | :--------------- | :-------------- | :-------------------- | :-------------------- |") 60 dmlCases := internal.NewDMLTestCases(t) 61 for i, cs := range dmlCases { 62 if len(cs) == 0 { 63 continue 64 } 65 66 codecConfig := common.NewConfig(config.ProtocolCraft) 67 codecConfig.MaxMessageBytes = 8192 68 codecConfig.MaxBatchSize = 64 69 70 craftEncoder := craft.NewBatchEncoder(codecConfig) 71 craftMessages := encodeRowCase(t, craftEncoder, cs) 72 73 builder, err := open.NewBatchEncoderBuilder(context.Background(), codecConfig) 74 require.NoError(t, err) 75 jsonEncoder := builder.Build() 76 jsonMessages := encodeRowCase(t, jsonEncoder, cs) 77 78 protobuf1Messages := codecEncodeRowChangedPB1ToMessage(cs) 79 protobuf2Messages := codecEncodeRowChangedPB2ToMessage(cs) 80 craftOriginal, craftCompressed := checkCompressedSize(craftMessages) 81 jsonOriginal, jsonCompressed := checkCompressedSize(jsonMessages) 82 protobuf1Original, protobuf1Compressed := checkCompressedSize(protobuf1Messages) 83 protobuf2Original, protobuf2Compressed := checkCompressedSize(protobuf2Messages) 84 t.Logf("| case %d | %d | %d (%d%%)+ | %d (%d%%)+ | %d (%d%%)+ | %d | %d (%d%%)+ | %d (%d%%)+ | %d (%d%%)+ |", i, 85 craftOriginal, jsonOriginal, 100*jsonOriginal/craftOriginal-100, 86 protobuf1Original, 100*protobuf1Original/craftOriginal-100, 87 protobuf2Original, 100*protobuf2Original/craftOriginal-100, 88 craftCompressed, jsonCompressed, 100*jsonCompressed/craftCompressed-100, 89 protobuf1Compressed, 100*protobuf1Compressed/craftCompressed-100, 90 protobuf2Compressed, 100*protobuf2Compressed/craftCompressed-100) 91 } 92 } 93 94 func codecEncodeKeyPB(event *model.RowChangedEvent) []byte { 95 key := &benchmark.Key{ 96 Ts: event.CommitTs, 97 Schema: event.TableInfo.GetSchemaName(), 98 Table: event.TableInfo.GetTableName(), 99 RowId: event.RowID, 100 Partition: 0, 101 } 102 if b, err := key.Marshal(); err != nil { 103 panic(err) 104 } else { 105 return b 106 } 107 } 108 109 func codecEncodeColumnPB(column *model.Column) *benchmark.Column { 110 codecTestSliceAllocator := craft.NewSliceAllocator(512) 111 return &benchmark.Column{ 112 Name: column.Name, 113 Type: uint32(column.Type), 114 Flag: uint32(column.Flag), 115 Value: craft.EncodeTiDBType(codecTestSliceAllocator, 116 column.Type, column.Flag, column.Value), 117 } 118 } 119 120 func codecEncodeColumnsPB(columns []*model.Column) []*benchmark.Column { 121 converted := make([]*benchmark.Column, len(columns)) 122 for i, column := range columns { 123 converted[i] = codecEncodeColumnPB(column) 124 } 125 return converted 126 } 127 128 func codecEncodeRowChangedPB(event *model.RowChangedEvent) []byte { 129 rowChanged := &benchmark.RowChanged{ 130 OldValue: codecEncodeColumnsPB(event.GetPreColumns()), 131 NewValue: codecEncodeColumnsPB(event.GetColumns()), 132 } 133 if b, err := rowChanged.Marshal(); err != nil { 134 panic(err) 135 } else { 136 return b 137 } 138 } 139 140 func codecEncodeRowChangedPB1ToMessage(events []*model.RowChangedEvent) []*common.Message { 141 result := make([]*common.Message, len(events)) 142 for i, event := range events { 143 result[i] = &common.Message{ 144 Key: codecEncodeKeyPB(event), 145 Value: codecEncodeRowChangedPB(event), 146 } 147 } 148 return result 149 } 150 151 func codecEncodeRowChangedPB2ToMessage(events []*model.RowChangedEvent) []*common.Message { 152 return []*common.Message{{ 153 Key: codecEncodeKeysPB2(events), 154 Value: codecEncodeRowChangedPB2(events), 155 }} 156 } 157 158 func codecEncodeKeysPB2(events []*model.RowChangedEvent) []byte { 159 converted := &benchmark.KeysColumnar{} 160 161 for _, event := range events { 162 converted.Ts = append(converted.Ts, event.CommitTs) 163 converted.Schema = append(converted.Schema, event.TableInfo.GetSchemaName()) 164 converted.Table = append(converted.Table, event.TableInfo.GetTableName()) 165 converted.RowId = append(converted.RowId, event.RowID) 166 converted.Partition = append(converted.Partition, 0) 167 } 168 169 if b, err := converted.Marshal(); err != nil { 170 panic(err) 171 } else { 172 return b 173 } 174 } 175 176 func codecEncodeColumnsPB2(columns []*model.Column) *benchmark.ColumnsColumnar { 177 converted := &benchmark.ColumnsColumnar{ 178 Name: make([]string, len(columns)), 179 Type: make([]uint32, len(columns)), 180 Flag: make([]uint32, len(columns)), 181 Value: make([][]byte, len(columns)), 182 } 183 184 codecTestSliceAllocator := craft.NewSliceAllocator(512) 185 for i, column := range columns { 186 converted.Name[i] = column.Name 187 converted.Type[i] = uint32(column.Type) 188 converted.Flag[i] = uint32(column.Flag) 189 converted.Value[i] = craft.EncodeTiDBType(codecTestSliceAllocator, 190 column.Type, column.Flag, column.Value) 191 } 192 return converted 193 } 194 195 func codecEncodeRowChangedPB2(events []*model.RowChangedEvent) []byte { 196 rowChanged := &benchmark.RowChangedColumnar{} 197 for _, event := range events { 198 rowChanged.OldValue = append(rowChanged.OldValue, codecEncodeColumnsPB2(event.GetPreColumns())) 199 rowChanged.NewValue = append(rowChanged.NewValue, codecEncodeColumnsPB2(event.GetColumns())) 200 } 201 if b, err := rowChanged.Marshal(); err != nil { 202 panic(err) 203 } else { 204 return b 205 } 206 } 207 208 func codecEncodeRowCase(encoder codec.RowEventEncoder, 209 events []*model.RowChangedEvent, 210 ) ([]*common.Message, error) { 211 for _, event := range events { 212 err := encoder.AppendRowChangedEvent(context.Background(), "", event, nil) 213 if err != nil { 214 return nil, err 215 } 216 } 217 218 if len(events) > 0 { 219 return encoder.Build(), nil 220 } 221 return nil, nil 222 } 223 224 func BenchmarkCraftEncoding(b *testing.B) { 225 codecConfig := common.NewConfig(config.ProtocolCraft) 226 codecConfig.MaxMessageBytes = 8192 227 codecConfig.MaxBatchSize = 64 228 allocator := craft.NewSliceAllocator(128) 229 encoder := craft.NewBatchEncoderWithAllocator(allocator, codecConfig) 230 231 dmlCases := internal.NewDMLTestCases(b) 232 codecBenchmarkRowChanges := dmlCases[1] 233 for i := 0; i < b.N; i++ { 234 _, _ = codecEncodeRowCase(encoder, codecBenchmarkRowChanges) 235 } 236 } 237 238 func BenchmarkJsonEncoding(b *testing.B) { 239 codecConfig := common.NewConfig(config.ProtocolCraft) 240 codecConfig.MaxMessageBytes = 8192 241 codecConfig.MaxBatchSize = 64 242 243 builder, err := open.NewBatchEncoderBuilder(context.Background(), codecConfig) 244 require.NoError(b, err) 245 encoder := builder.Build() 246 247 dmlCases := internal.NewDMLTestCases(b) 248 codecBenchmarkRowChanges := dmlCases[1] 249 for i := 0; i < b.N; i++ { 250 _, _ = codecEncodeRowCase(encoder, codecBenchmarkRowChanges) 251 } 252 } 253 254 func BenchmarkProtobuf1Encoding(b *testing.B) { 255 dmlCases := internal.NewDMLTestCases(b) 256 codecBenchmarkRowChanges := dmlCases[1] 257 for i := 0; i < b.N; i++ { 258 _ = codecEncodeRowChangedPB1ToMessage(codecBenchmarkRowChanges) 259 } 260 } 261 262 func BenchmarkProtobuf2Encoding(b *testing.B) { 263 dmlCases := internal.NewDMLTestCases(b) 264 codecBenchmarkRowChanges := dmlCases[1] 265 for i := 0; i < b.N; i++ { 266 _ = codecEncodeRowChangedPB2ToMessage(codecBenchmarkRowChanges) 267 } 268 } 269 270 func BenchmarkCraftDecoding(b *testing.B) { 271 codecConfig := common.NewConfig(config.ProtocolCraft) 272 codecConfig.MaxMessageBytes = 8192 273 codecConfig.MaxBatchSize = 64 274 encoder := craft.NewBatchEncoder(codecConfig) 275 276 dmlCases := internal.NewDMLTestCases(b) 277 codecBenchmarkRowChanges := dmlCases[1] 278 codecCraftEncodedRowChanges, err := codecEncodeRowCase(encoder, codecBenchmarkRowChanges) 279 require.NoError(b, err) 280 allocator := craft.NewSliceAllocator(128) 281 for i := 0; i < b.N; i++ { 282 decoder := craft.NewBatchDecoderWithAllocator(allocator) 283 for _, message := range codecCraftEncodedRowChanges { 284 if err := decoder.AddKeyValue(message.Key, message.Value); err != nil { 285 panic(err) 286 } 287 for { 288 if _, hasNext, err := decoder.HasNext(); err != nil { 289 panic(err) 290 } else if hasNext { 291 _, _ = decoder.NextRowChangedEvent() 292 } else { 293 break 294 } 295 } 296 } 297 } 298 } 299 300 func BenchmarkJsonDecoding(b *testing.B) { 301 codecConfig := common.NewConfig(config.ProtocolCraft) 302 codecConfig.MaxMessageBytes = 8192 303 codecConfig.MaxBatchSize = 64 304 builder, err := open.NewBatchEncoderBuilder(context.Background(), codecConfig) 305 require.NoError(b, err) 306 307 encoder := builder.Build() 308 dmlCases := internal.NewDMLTestCases(b) 309 codecBenchmarkRowChanges := dmlCases[1] 310 codecJSONEncodedRowChanges, err := codecEncodeRowCase(encoder, codecBenchmarkRowChanges) 311 require.NoError(b, err) 312 for i := 0; i < b.N; i++ { 313 for _, message := range codecJSONEncodedRowChanges { 314 codecConfig := common.NewConfig(config.ProtocolOpen) 315 decoder, err := open.NewBatchDecoder(context.Background(), codecConfig, nil) 316 require.NoError(b, err) 317 if err := decoder.AddKeyValue(message.Key, message.Value); err != nil { 318 panic(err) 319 } 320 for { 321 if _, hasNext, err := decoder.HasNext(); err != nil { 322 panic(err) 323 } else if hasNext { 324 _, _ = decoder.NextRowChangedEvent() 325 } else { 326 break 327 } 328 } 329 } 330 } 331 } 332 333 func codecDecodeRowChangedPB1(columns []*benchmark.Column) []*model.Column { 334 if len(columns) == 0 { 335 return nil 336 } 337 338 result := make([]*model.Column, len(columns)) 339 for i, column := range columns { 340 value, _ := craft.DecodeTiDBType(byte(column.Type), 341 model.ColumnFlagType(column.Flag), column.Value) 342 result[i] = &model.Column{ 343 Name: column.Name, 344 Type: byte(column.Type), 345 Flag: model.ColumnFlagType(column.Flag), 346 Value: value, 347 } 348 } 349 350 return result 351 } 352 353 func benchmarkProtobuf1Decoding(b *testing.B) []*model.RowChangedEvent { 354 dmlCases := internal.NewDMLTestCases(b) 355 codecBenchmarkRowChanges := dmlCases[1] 356 codecPB1EncodedRowChanges := codecEncodeRowChangedPB1ToMessage(codecBenchmarkRowChanges) 357 result := make([]*model.RowChangedEvent, 0, 4) 358 for _, message := range codecPB1EncodedRowChanges { 359 key := &benchmark.Key{} 360 if err := key.Unmarshal(message.Key); err != nil { 361 panic(err) 362 } 363 value := &benchmark.RowChanged{} 364 if err := value.Unmarshal(message.Value); err != nil { 365 panic(err) 366 } 367 ev := &model.RowChangedEvent{} 368 ev.TableInfo = model.BuildTableInfo(key.Schema, key.Table, codecDecodeRowChangedPB1(value.OldValue), nil) 369 ev.PreColumns = model.Columns2ColumnDatas(codecDecodeRowChangedPB1(value.OldValue), ev.TableInfo) 370 ev.Columns = model.Columns2ColumnDatas(codecDecodeRowChangedPB1(value.NewValue), ev.TableInfo) 371 ev.CommitTs = key.Ts 372 if key.Partition >= 0 { 373 ev.PhysicalTableID = key.Partition 374 ev.TableInfo.TableName.IsPartition = true 375 } 376 result = append(result, ev) 377 } 378 return result 379 } 380 381 func BenchmarkProtobuf1Decoding(b *testing.B) { 382 for i := 0; i < b.N; i++ { 383 for _, row := range benchmarkProtobuf1Decoding(b) { 384 _ = row 385 } 386 } 387 } 388 389 func codecDecodeRowChangedPB2(columns *benchmark.ColumnsColumnar) []*model.Column { 390 result := make([]*model.Column, len(columns.Value)) 391 for i, value := range columns.Value { 392 v, _ := craft.DecodeTiDBType(byte(columns.Type[i]), 393 model.ColumnFlagType(columns.Flag[i]), value) 394 result[i] = &model.Column{ 395 Name: columns.Name[i], 396 Type: byte(columns.Type[i]), 397 Flag: model.ColumnFlagType(columns.Flag[i]), 398 Value: v, 399 } 400 } 401 return result 402 } 403 404 func benchmarkProtobuf2Decoding(b *testing.B) []*model.RowChangedEvent { 405 dmlCases := internal.NewDMLTestCases(b) 406 codecBenchmarkRowChanges := dmlCases[1] 407 codecPB2EncodedRowChanges := codecEncodeRowChangedPB2ToMessage(codecBenchmarkRowChanges) 408 result := make([]*model.RowChangedEvent, 0, 4) 409 for _, message := range codecPB2EncodedRowChanges { 410 keys := &benchmark.KeysColumnar{} 411 if err := keys.Unmarshal(message.Key); err != nil { 412 panic(err) 413 } 414 values := &benchmark.RowChangedColumnar{} 415 if err := values.Unmarshal(message.Value); err != nil { 416 panic(err) 417 } 418 419 for i, ts := range keys.Ts { 420 ev := &model.RowChangedEvent{} 421 if len(values.OldValue) > i { 422 ev.TableInfo = model.BuildTableInfo(keys.Schema[i], keys.Table[i], codecDecodeRowChangedPB2(values.OldValue[i]), nil) 423 ev.PreColumns = model.Columns2ColumnDatas(codecDecodeRowChangedPB2(values.OldValue[i]), ev.TableInfo) 424 } 425 if len(values.NewValue) > i { 426 ev.TableInfo = model.BuildTableInfo(keys.Schema[i], keys.Table[i], codecDecodeRowChangedPB2(values.NewValue[i]), nil) 427 ev.Columns = model.Columns2ColumnDatas(codecDecodeRowChangedPB2(values.NewValue[i]), ev.TableInfo) 428 } 429 ev.CommitTs = ts 430 if keys.Partition[i] >= 0 { 431 ev.PhysicalTableID = keys.Partition[i] 432 ev.TableInfo.TableName.IsPartition = true 433 } 434 result = append(result, ev) 435 } 436 } 437 return result 438 } 439 440 func BenchmarkProtobuf2Decoding(b *testing.B) { 441 for i := 0; i < b.N; i++ { 442 for _, row := range benchmarkProtobuf2Decoding(b) { 443 _ = row 444 } 445 } 446 }