github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/pkg/sink/codec/csv/csv_message_test.go (about) 1 // Copyright 2022 PingCAP, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package csv 15 16 import ( 17 "fmt" 18 "strings" 19 "testing" 20 21 "github.com/pingcap/tidb/pkg/kv" 22 "github.com/pingcap/tidb/pkg/parser/mysql" 23 "github.com/pingcap/tidb/pkg/types" 24 "github.com/pingcap/tidb/pkg/util/rowcodec" 25 "github.com/pingcap/tiflow/cdc/model" 26 "github.com/pingcap/tiflow/pkg/config" 27 "github.com/pingcap/tiflow/pkg/sink/codec/common" 28 "github.com/stretchr/testify/require" 29 ) 30 31 type csvTestColumnTuple struct { 32 col model.Column 33 colInfo rowcodec.ColInfo 34 want interface{} 35 BinaryEncodingMethod string 36 } 37 38 var csvTestColumnsGroup = [][]*csvTestColumnTuple{ 39 { 40 { 41 model.Column{Name: "tiny", Value: int64(1), Type: mysql.TypeTiny}, 42 rowcodec.ColInfo{ 43 ID: 1, 44 IsPKHandle: false, 45 VirtualGenCol: false, 46 Ft: types.NewFieldType(mysql.TypeTiny), 47 }, 48 int64(1), 49 config.BinaryEncodingBase64, 50 }, 51 { 52 model.Column{Name: "short", Value: int64(1), Type: mysql.TypeShort}, 53 rowcodec.ColInfo{ 54 ID: 2, 55 IsPKHandle: false, 56 VirtualGenCol: false, 57 Ft: types.NewFieldType(mysql.TypeShort), 58 }, 59 int64(1), 60 config.BinaryEncodingBase64, 61 }, 62 { 63 model.Column{Name: "int24", Value: int64(1), Type: mysql.TypeInt24}, 64 rowcodec.ColInfo{ 65 ID: 3, 66 IsPKHandle: false, 67 VirtualGenCol: false, 68 Ft: types.NewFieldType(mysql.TypeInt24), 69 }, 70 int64(1), 71 config.BinaryEncodingBase64, 72 }, 73 { 74 model.Column{Name: "long", Value: int64(1), Type: mysql.TypeLong}, 75 rowcodec.ColInfo{ 76 ID: 4, 77 IsPKHandle: false, 78 VirtualGenCol: false, 79 Ft: types.NewFieldType(mysql.TypeLong), 80 }, 81 int64(1), 82 config.BinaryEncodingBase64, 83 }, 84 { 85 model.Column{Name: "longlong", Value: int64(1), Type: mysql.TypeLonglong}, 86 rowcodec.ColInfo{ 87 ID: 5, 88 IsPKHandle: false, 89 VirtualGenCol: false, 90 Ft: types.NewFieldType(mysql.TypeLonglong), 91 }, 92 int64(1), 93 config.BinaryEncodingBase64, 94 }, 95 { 96 model.Column{ 97 Name: "tinyunsigned", 98 Value: uint64(1), 99 Type: mysql.TypeTiny, 100 Flag: model.UnsignedFlag, 101 }, 102 rowcodec.ColInfo{ 103 ID: 6, 104 IsPKHandle: false, 105 VirtualGenCol: false, 106 Ft: setFlag(types.NewFieldType(mysql.TypeTiny), uint(model.UnsignedFlag)), 107 }, 108 uint64(1), 109 config.BinaryEncodingBase64, 110 }, 111 { 112 model.Column{ 113 Name: "shortunsigned", 114 Value: uint64(1), 115 Type: mysql.TypeShort, 116 Flag: model.UnsignedFlag, 117 }, 118 rowcodec.ColInfo{ 119 ID: 7, 120 IsPKHandle: false, 121 VirtualGenCol: false, 122 Ft: setFlag(types.NewFieldType(mysql.TypeShort), uint(model.UnsignedFlag)), 123 }, 124 uint64(1), 125 config.BinaryEncodingBase64, 126 }, 127 { 128 model.Column{ 129 Name: "int24unsigned", 130 Value: uint64(1), 131 Type: mysql.TypeInt24, 132 Flag: model.UnsignedFlag, 133 }, 134 rowcodec.ColInfo{ 135 ID: 8, 136 IsPKHandle: false, 137 VirtualGenCol: false, 138 Ft: setFlag(types.NewFieldType(mysql.TypeInt24), uint(model.UnsignedFlag)), 139 }, 140 uint64(1), 141 config.BinaryEncodingBase64, 142 }, 143 { 144 model.Column{ 145 Name: "longunsigned", 146 Value: uint64(1), 147 Type: mysql.TypeLong, 148 Flag: model.UnsignedFlag, 149 }, 150 rowcodec.ColInfo{ 151 ID: 9, 152 IsPKHandle: false, 153 VirtualGenCol: false, 154 Ft: setFlag(types.NewFieldType(mysql.TypeLong), uint(model.UnsignedFlag)), 155 }, 156 uint64(1), 157 config.BinaryEncodingBase64, 158 }, 159 { 160 model.Column{ 161 Name: "longlongunsigned", 162 Value: uint64(1), 163 Type: mysql.TypeLonglong, 164 Flag: model.UnsignedFlag, 165 }, 166 rowcodec.ColInfo{ 167 ID: 10, 168 IsPKHandle: false, 169 VirtualGenCol: false, 170 Ft: setFlag( 171 types.NewFieldType(mysql.TypeLonglong), 172 uint(model.UnsignedFlag), 173 ), 174 }, 175 uint64(1), 176 config.BinaryEncodingBase64, 177 }, 178 }, 179 { 180 { 181 model.Column{Name: "float", Value: float64(3.14), Type: mysql.TypeFloat}, 182 rowcodec.ColInfo{ 183 ID: 11, 184 IsPKHandle: false, 185 VirtualGenCol: false, 186 Ft: types.NewFieldType(mysql.TypeFloat), 187 }, 188 float64(3.14), 189 config.BinaryEncodingBase64, 190 }, 191 { 192 model.Column{Name: "double", Value: float64(3.14), Type: mysql.TypeDouble}, 193 rowcodec.ColInfo{ 194 ID: 12, 195 IsPKHandle: false, 196 VirtualGenCol: false, 197 Ft: types.NewFieldType(mysql.TypeDouble), 198 }, 199 float64(3.14), 200 config.BinaryEncodingBase64, 201 }, 202 }, 203 { 204 { 205 model.Column{Name: "bit", Value: uint64(683), Type: mysql.TypeBit}, 206 rowcodec.ColInfo{ 207 ID: 13, 208 IsPKHandle: false, 209 VirtualGenCol: false, 210 Ft: types.NewFieldType(mysql.TypeBit), 211 }, 212 uint64(683), 213 config.BinaryEncodingBase64, 214 }, 215 }, 216 { 217 { 218 model.Column{Name: "decimal", Value: "129012.1230000", Type: mysql.TypeNewDecimal}, 219 rowcodec.ColInfo{ 220 ID: 14, 221 IsPKHandle: false, 222 VirtualGenCol: false, 223 Ft: types.NewFieldType(mysql.TypeNewDecimal), 224 }, 225 "129012.1230000", 226 config.BinaryEncodingBase64, 227 }, 228 }, 229 { 230 { 231 model.Column{Name: "tinytext", Value: []byte("hello world"), Type: mysql.TypeTinyBlob}, 232 rowcodec.ColInfo{ 233 ID: 15, 234 IsPKHandle: false, 235 VirtualGenCol: false, 236 Ft: types.NewFieldType(mysql.TypeBlob), 237 }, 238 "hello world", 239 config.BinaryEncodingBase64, 240 }, 241 { 242 model.Column{Name: "mediumtext", Value: []byte("hello world"), Type: mysql.TypeMediumBlob}, 243 rowcodec.ColInfo{ 244 ID: 16, 245 IsPKHandle: false, 246 VirtualGenCol: false, 247 Ft: types.NewFieldType(mysql.TypeMediumBlob), 248 }, 249 "hello world", 250 config.BinaryEncodingBase64, 251 }, 252 { 253 model.Column{Name: "text", Value: []byte("hello world"), Type: mysql.TypeBlob}, 254 rowcodec.ColInfo{ 255 ID: 17, 256 IsPKHandle: false, 257 VirtualGenCol: false, 258 Ft: types.NewFieldType(mysql.TypeBlob), 259 }, 260 "hello world", 261 config.BinaryEncodingBase64, 262 }, 263 { 264 model.Column{Name: "longtext", Value: []byte("hello world"), Type: mysql.TypeLongBlob}, 265 rowcodec.ColInfo{ 266 ID: 18, 267 IsPKHandle: false, 268 VirtualGenCol: false, 269 Ft: types.NewFieldType(mysql.TypeLongBlob), 270 }, 271 "hello world", 272 config.BinaryEncodingBase64, 273 }, 274 { 275 model.Column{Name: "varchar", Value: []byte("hello world"), Type: mysql.TypeVarchar}, 276 rowcodec.ColInfo{ 277 ID: 19, 278 IsPKHandle: false, 279 VirtualGenCol: false, 280 Ft: types.NewFieldType(mysql.TypeVarchar), 281 }, 282 "hello world", 283 config.BinaryEncodingBase64, 284 }, 285 { 286 model.Column{Name: "varstring", Value: []byte("hello world"), Type: mysql.TypeVarString}, 287 rowcodec.ColInfo{ 288 ID: 20, 289 IsPKHandle: false, 290 VirtualGenCol: false, 291 Ft: types.NewFieldType(mysql.TypeVarString), 292 }, 293 "hello world", 294 config.BinaryEncodingBase64, 295 }, 296 { 297 model.Column{Name: "string", Value: []byte("hello world"), Type: mysql.TypeString}, 298 rowcodec.ColInfo{ 299 ID: 21, 300 IsPKHandle: false, 301 VirtualGenCol: false, 302 Ft: types.NewFieldType(mysql.TypeString), 303 }, 304 "hello world", 305 config.BinaryEncodingBase64, 306 }, 307 { 308 model.Column{Name: "json", Value: `{"key": "value"}`, Type: mysql.TypeJSON}, 309 rowcodec.ColInfo{ 310 ID: 31, 311 IsPKHandle: false, 312 VirtualGenCol: false, 313 Ft: types.NewFieldType(mysql.TypeJSON), 314 }, 315 `{"key": "value"}`, 316 config.BinaryEncodingBase64, 317 }, 318 }, 319 { 320 { 321 model.Column{ 322 Name: "tinyblob", 323 Value: []byte("hello world"), 324 Type: mysql.TypeTinyBlob, 325 Flag: model.BinaryFlag, 326 }, 327 rowcodec.ColInfo{ 328 ID: 22, 329 IsPKHandle: false, 330 VirtualGenCol: false, 331 Ft: setBinChsClnFlag(types.NewFieldType(mysql.TypeTinyBlob)), 332 }, 333 "aGVsbG8gd29ybGQ=", 334 config.BinaryEncodingBase64, 335 }, 336 { 337 model.Column{ 338 Name: "mediumblob", 339 Value: []byte("hello world"), 340 Type: mysql.TypeMediumBlob, 341 Flag: model.BinaryFlag, 342 }, 343 rowcodec.ColInfo{ 344 ID: 23, 345 IsPKHandle: false, 346 VirtualGenCol: false, 347 Ft: setBinChsClnFlag(types.NewFieldType(mysql.TypeMediumBlob)), 348 }, 349 "aGVsbG8gd29ybGQ=", 350 config.BinaryEncodingBase64, 351 }, 352 { 353 model.Column{ 354 Name: "blob", 355 Value: []byte("hello world"), 356 Type: mysql.TypeBlob, 357 Flag: model.BinaryFlag, 358 }, 359 rowcodec.ColInfo{ 360 ID: 24, 361 IsPKHandle: false, 362 VirtualGenCol: false, 363 Ft: setBinChsClnFlag(types.NewFieldType(mysql.TypeBlob)), 364 }, 365 "aGVsbG8gd29ybGQ=", 366 config.BinaryEncodingBase64, 367 }, 368 { 369 model.Column{ 370 Name: "longblob", 371 Value: []byte("hello world"), 372 Type: mysql.TypeLongBlob, 373 Flag: model.BinaryFlag, 374 }, 375 rowcodec.ColInfo{ 376 ID: 25, 377 IsPKHandle: false, 378 VirtualGenCol: false, 379 Ft: setBinChsClnFlag(types.NewFieldType(mysql.TypeLongBlob)), 380 }, 381 "aGVsbG8gd29ybGQ=", 382 config.BinaryEncodingBase64, 383 }, 384 { 385 model.Column{ 386 Name: "varbinary", 387 Value: []byte("hello world"), 388 Type: mysql.TypeVarchar, 389 Flag: model.BinaryFlag, 390 }, 391 rowcodec.ColInfo{ 392 ID: 26, 393 IsPKHandle: false, 394 VirtualGenCol: false, 395 Ft: setBinChsClnFlag(types.NewFieldType(mysql.TypeVarchar)), 396 }, 397 "aGVsbG8gd29ybGQ=", 398 config.BinaryEncodingBase64, 399 }, 400 { 401 model.Column{ 402 Name: "varbinary1", 403 Value: []byte("hello world"), 404 Type: mysql.TypeVarString, 405 Flag: model.BinaryFlag, 406 }, 407 rowcodec.ColInfo{ 408 ID: 27, 409 IsPKHandle: false, 410 VirtualGenCol: false, 411 Ft: setBinChsClnFlag(types.NewFieldType(mysql.TypeVarString)), 412 }, 413 "aGVsbG8gd29ybGQ=", 414 config.BinaryEncodingBase64, 415 }, 416 { 417 model.Column{ 418 Name: "binary", 419 Value: []byte("hello world"), 420 Type: mysql.TypeString, 421 Flag: model.BinaryFlag, 422 }, 423 rowcodec.ColInfo{ 424 ID: 28, 425 IsPKHandle: false, 426 VirtualGenCol: false, 427 Ft: setBinChsClnFlag(types.NewFieldType(mysql.TypeString)), 428 }, 429 "aGVsbG8gd29ybGQ=", 430 config.BinaryEncodingBase64, 431 }, 432 }, 433 { 434 { 435 model.Column{ 436 Name: "tinyblob", 437 Value: []byte("hello world"), 438 Type: mysql.TypeTinyBlob, 439 Flag: model.BinaryFlag, 440 }, 441 rowcodec.ColInfo{ 442 ID: 22, 443 IsPKHandle: false, 444 VirtualGenCol: false, 445 Ft: setBinChsClnFlag(types.NewFieldType(mysql.TypeTinyBlob)), 446 }, 447 "68656c6c6f20776f726c64", 448 config.BinaryEncodingHex, 449 }, 450 { 451 model.Column{ 452 Name: "mediumblob", 453 Value: []byte("hello world"), 454 Type: mysql.TypeMediumBlob, 455 Flag: model.BinaryFlag, 456 }, 457 rowcodec.ColInfo{ 458 ID: 23, 459 IsPKHandle: false, 460 VirtualGenCol: false, 461 Ft: setBinChsClnFlag(types.NewFieldType(mysql.TypeMediumBlob)), 462 }, 463 "68656c6c6f20776f726c64", 464 config.BinaryEncodingHex, 465 }, 466 { 467 model.Column{ 468 Name: "blob", 469 Value: []byte("hello world"), 470 Type: mysql.TypeBlob, 471 Flag: model.BinaryFlag, 472 }, 473 rowcodec.ColInfo{ 474 ID: 24, 475 IsPKHandle: false, 476 VirtualGenCol: false, 477 Ft: setBinChsClnFlag(types.NewFieldType(mysql.TypeBlob)), 478 }, 479 "68656c6c6f20776f726c64", 480 config.BinaryEncodingHex, 481 }, 482 { 483 model.Column{ 484 Name: "longblob", 485 Value: []byte("hello world"), 486 Type: mysql.TypeLongBlob, 487 Flag: model.BinaryFlag, 488 }, 489 rowcodec.ColInfo{ 490 ID: 25, 491 IsPKHandle: false, 492 VirtualGenCol: false, 493 Ft: setBinChsClnFlag(types.NewFieldType(mysql.TypeLongBlob)), 494 }, 495 "68656c6c6f20776f726c64", 496 config.BinaryEncodingHex, 497 }, 498 { 499 model.Column{ 500 Name: "varbinary", 501 Value: []byte("hello world"), 502 Type: mysql.TypeVarchar, 503 Flag: model.BinaryFlag, 504 }, 505 rowcodec.ColInfo{ 506 ID: 26, 507 IsPKHandle: false, 508 VirtualGenCol: false, 509 Ft: setBinChsClnFlag(types.NewFieldType(mysql.TypeVarchar)), 510 }, 511 "68656c6c6f20776f726c64", 512 config.BinaryEncodingHex, 513 }, 514 { 515 model.Column{ 516 Name: "varbinary1", 517 Value: []byte("hello world"), 518 Type: mysql.TypeVarString, 519 Flag: model.BinaryFlag, 520 }, 521 rowcodec.ColInfo{ 522 ID: 27, 523 IsPKHandle: false, 524 VirtualGenCol: false, 525 Ft: setBinChsClnFlag(types.NewFieldType(mysql.TypeVarString)), 526 }, 527 "68656c6c6f20776f726c64", 528 config.BinaryEncodingHex, 529 }, 530 { 531 model.Column{ 532 Name: "binary", 533 Value: []byte("hello world"), 534 Type: mysql.TypeString, 535 Flag: model.BinaryFlag, 536 }, 537 rowcodec.ColInfo{ 538 ID: 28, 539 IsPKHandle: false, 540 VirtualGenCol: false, 541 Ft: setBinChsClnFlag(types.NewFieldType(mysql.TypeString)), 542 }, 543 "68656c6c6f20776f726c64", 544 config.BinaryEncodingHex, 545 }, 546 }, 547 { 548 { 549 model.Column{Name: "enum", Value: uint64(1), Type: mysql.TypeEnum}, 550 rowcodec.ColInfo{ 551 ID: 29, 552 IsPKHandle: false, 553 VirtualGenCol: false, 554 Ft: setElems(types.NewFieldType(mysql.TypeEnum), []string{"a,", "b"}), 555 }, 556 "a,", 557 config.BinaryEncodingBase64, 558 }, 559 }, 560 { 561 { 562 model.Column{Name: "set", Value: uint64(9), Type: mysql.TypeSet}, 563 rowcodec.ColInfo{ 564 ID: 30, 565 IsPKHandle: false, 566 VirtualGenCol: false, 567 Ft: setElems(types.NewFieldType(mysql.TypeSet), []string{"a", "b", "c", "d"}), 568 }, 569 "a,d", 570 config.BinaryEncodingBase64, 571 }, 572 }, 573 { 574 { 575 model.Column{Name: "date", Value: "2000-01-01", Type: mysql.TypeDate}, 576 rowcodec.ColInfo{ 577 ID: 32, 578 IsPKHandle: false, 579 VirtualGenCol: false, 580 Ft: types.NewFieldType(mysql.TypeDate), 581 }, 582 "2000-01-01", 583 config.BinaryEncodingBase64, 584 }, 585 { 586 model.Column{Name: "datetime", Value: "2015-12-20 23:58:58", Type: mysql.TypeDatetime}, 587 rowcodec.ColInfo{ 588 ID: 33, 589 IsPKHandle: false, 590 VirtualGenCol: false, 591 Ft: types.NewFieldType(mysql.TypeDatetime), 592 }, 593 "2015-12-20 23:58:58", 594 config.BinaryEncodingBase64, 595 }, 596 { 597 model.Column{Name: "timestamp", Value: "1973-12-30 15:30:00", Type: mysql.TypeTimestamp}, 598 rowcodec.ColInfo{ 599 ID: 34, 600 IsPKHandle: false, 601 VirtualGenCol: false, 602 Ft: types.NewFieldType(mysql.TypeTimestamp), 603 }, 604 "1973-12-30 15:30:00", 605 config.BinaryEncodingBase64, 606 }, 607 { 608 model.Column{Name: "time", Value: "23:59:59", Type: mysql.TypeDuration}, 609 rowcodec.ColInfo{ 610 ID: 35, 611 IsPKHandle: false, 612 VirtualGenCol: false, 613 Ft: types.NewFieldType(mysql.TypeDuration), 614 }, 615 "23:59:59", 616 config.BinaryEncodingBase64, 617 }, 618 }, 619 { 620 { 621 model.Column{Name: "year", Value: int64(1970), Type: mysql.TypeYear}, 622 rowcodec.ColInfo{ 623 ID: 36, 624 IsPKHandle: false, 625 VirtualGenCol: false, 626 Ft: types.NewFieldType(mysql.TypeYear), 627 }, 628 int64(1970), 629 config.BinaryEncodingBase64, 630 }, 631 }, 632 } 633 634 func setBinChsClnFlag(ft *types.FieldType) *types.FieldType { 635 types.SetBinChsClnFlag(ft) 636 return ft 637 } 638 639 //nolint:unparam 640 func setFlag(ft *types.FieldType, flag uint) *types.FieldType { 641 ft.SetFlag(flag) 642 return ft 643 } 644 645 func setElems(ft *types.FieldType, elems []string) *types.FieldType { 646 ft.SetElems(elems) 647 return ft 648 } 649 650 func TestFormatWithQuotes(t *testing.T) { 651 config := &common.Config{ 652 Quote: "\"", 653 } 654 655 testCases := []struct { 656 name string 657 input string 658 expected string 659 }{ 660 { 661 name: "string does not contain quote mark", 662 input: "a,b,c", 663 expected: `"a,b,c"`, 664 }, 665 { 666 name: "string contains quote mark", 667 input: `"a,b,c`, 668 expected: `"""a,b,c"`, 669 }, 670 { 671 name: "empty string", 672 input: "", 673 expected: `""`, 674 }, 675 } 676 for _, tc := range testCases { 677 csvMessage := newCSVMessage(config) 678 strBuilder := new(strings.Builder) 679 csvMessage.formatWithQuotes(tc.input, strBuilder) 680 require.Equal(t, tc.expected, strBuilder.String(), tc.name) 681 } 682 } 683 684 func TestFormatWithEscape(t *testing.T) { 685 testCases := []struct { 686 name string 687 config *common.Config 688 input string 689 expected string 690 }{ 691 { 692 name: "string does not contain CR/LF/backslash/delimiter", 693 config: &common.Config{Delimiter: ","}, 694 input: "abcdef", 695 expected: "abcdef", 696 }, 697 { 698 name: "string contains CRLF", 699 config: &common.Config{Delimiter: ","}, 700 input: "abc\r\ndef", 701 expected: "abc\\r\\ndef", 702 }, 703 { 704 name: "string contains backslash", 705 config: &common.Config{Delimiter: ","}, 706 input: `abc\def`, 707 expected: `abc\\def`, 708 }, 709 { 710 name: "string contains a single character delimiter", 711 config: &common.Config{Delimiter: ","}, 712 input: "abc,def", 713 expected: `abc\,def`, 714 }, 715 { 716 name: "string contains multi-character delimiter", 717 config: &common.Config{Delimiter: "***"}, 718 input: "abc***def", 719 expected: `abc\*\*\*def`, 720 }, 721 { 722 name: "string contains CR, LF, backslash and delimiter", 723 config: &common.Config{Delimiter: "?"}, 724 input: `abc\def?ghi\r\n`, 725 expected: `abc\\def\?ghi\\r\\n`, 726 }, 727 } 728 729 for _, tc := range testCases { 730 csvMessage := newCSVMessage(tc.config) 731 strBuilder := new(strings.Builder) 732 csvMessage.formatWithEscapes(tc.input, strBuilder) 733 require.Equal(t, tc.expected, strBuilder.String()) 734 } 735 } 736 737 func TestCSVMessageEncode(t *testing.T) { 738 type fields struct { 739 config *common.Config 740 opType operation 741 tableName string 742 schemaName string 743 commitTs uint64 744 preColumns []any 745 columns []any 746 HandleKey kv.Handle 747 } 748 testCases := []struct { 749 name string 750 fields fields 751 want []byte 752 }{ 753 { 754 name: "csv encode with typical configurations", 755 fields: fields{ 756 config: &common.Config{ 757 Delimiter: ",", 758 Quote: "\"", 759 Terminator: "\n", 760 NullString: "\\N", 761 IncludeCommitTs: true, 762 }, 763 opType: operationInsert, 764 tableName: "table1", 765 schemaName: "test", 766 commitTs: 435661838416609281, 767 columns: []any{123, "hello,world"}, 768 }, 769 want: []byte("\"I\",\"table1\",\"test\",435661838416609281,123,\"hello,world\"\n"), 770 }, 771 { 772 name: "csv encode values containing single-character delimter string, without quote mark", 773 fields: fields{ 774 config: &common.Config{ 775 Delimiter: "!", 776 Quote: "", 777 Terminator: "\n", 778 NullString: "\\N", 779 IncludeCommitTs: true, 780 }, 781 opType: operationUpdate, 782 tableName: "table2", 783 schemaName: "test", 784 commitTs: 435661838416609281, 785 columns: []any{"a!b!c", "def"}, 786 }, 787 want: []byte(`U!table2!test!435661838416609281!a\!b\!c!def` + "\n"), 788 }, 789 { 790 name: "csv encode values containing single-character delimter string, without quote mark, update with old value", 791 fields: fields{ 792 config: &common.Config{ 793 Delimiter: "!", 794 Quote: "", 795 Terminator: "\n", 796 NullString: "\\N", 797 IncludeCommitTs: true, 798 OutputOldValue: true, 799 OutputHandleKey: true, 800 }, 801 opType: operationUpdate, 802 tableName: "table2", 803 schemaName: "test", 804 commitTs: 435661838416609281, 805 preColumns: []any{"a!b!c", "abc"}, 806 columns: []any{"a!b!c", "def"}, 807 HandleKey: kv.IntHandle(1), 808 }, 809 want: []byte(`D!table2!test!435661838416609281!true!1!a\!b\!c!abc` + "\n" + 810 `I!table2!test!435661838416609281!true!1!a\!b\!c!def` + "\n"), 811 }, 812 { 813 name: "csv encode values containing single-character delimter string, without quote mark, update with old value", 814 fields: fields{ 815 config: &common.Config{ 816 Delimiter: "!", 817 Quote: "", 818 Terminator: "\n", 819 NullString: "\\N", 820 IncludeCommitTs: true, 821 OutputOldValue: true, 822 }, 823 opType: operationInsert, 824 tableName: "table2", 825 schemaName: "test", 826 commitTs: 435661838416609281, 827 columns: []any{"a!b!c", "def"}, 828 }, 829 want: []byte(`I!table2!test!435661838416609281!false!a\!b\!c!def` + "\n"), 830 }, 831 { 832 name: "csv encode values containing single-character delimter string, with quote mark", 833 fields: fields{ 834 config: &common.Config{ 835 Delimiter: ",", 836 Quote: "\"", 837 Terminator: "\n", 838 NullString: "\\N", 839 IncludeCommitTs: true, 840 }, 841 opType: operationUpdate, 842 tableName: "table3", 843 schemaName: "test", 844 commitTs: 435661838416609281, 845 columns: []any{"a,b,c", "def", "2022-08-31 17:07:00"}, 846 }, 847 want: []byte(`"U","table3","test",435661838416609281,"a,b,c","def","2022-08-31 17:07:00"` + "\n"), 848 }, 849 { 850 name: "csv encode values containing multi-character delimiter string, without quote mark", 851 fields: fields{ 852 config: &common.Config{ 853 Delimiter: "[*]", 854 Quote: "", 855 Terminator: "\r\n", 856 NullString: "\\N", 857 IncludeCommitTs: false, 858 }, 859 opType: operationDelete, 860 tableName: "table4", 861 schemaName: "test", 862 commitTs: 435661838416609281, 863 columns: []any{"a[*]b[*]c", "def"}, 864 }, 865 want: []byte(`D[*]table4[*]test[*]a\[\*\]b\[\*\]c[*]def` + "\r\n"), 866 }, 867 { 868 name: "csv encode with values containing multi-character delimiter string, with quote mark", 869 fields: fields{ 870 config: &common.Config{ 871 Delimiter: "[*]", 872 Quote: "'", 873 Terminator: "\n", 874 NullString: "\\N", 875 IncludeCommitTs: false, 876 }, 877 opType: operationInsert, 878 tableName: "table5", 879 schemaName: "test", 880 commitTs: 435661838416609281, 881 columns: []any{"a[*]b[*]c", "def", nil, 12345.678}, 882 }, 883 want: []byte(`'I'[*]'table5'[*]'test'[*]'a[*]b[*]c'[*]'def'[*]\N[*]12345.678` + "\n"), 884 }, 885 { 886 name: "csv encode with values containing backslash and LF, without quote mark", 887 fields: fields{ 888 config: &common.Config{ 889 Delimiter: ",", 890 Quote: "", 891 Terminator: "\n", 892 NullString: "\\N", 893 IncludeCommitTs: true, 894 }, 895 opType: operationUpdate, 896 tableName: "table6", 897 schemaName: "test", 898 commitTs: 435661838416609281, 899 columns: []any{"a\\b\\c", "def\n"}, 900 }, 901 want: []byte(`U,table6,test,435661838416609281,a\\b\\c,def\n` + "\n"), 902 }, 903 { 904 name: "csv encode with values containing backslash and CR, with quote mark", 905 fields: fields{ 906 config: &common.Config{ 907 Delimiter: ",", 908 Quote: "'", 909 Terminator: "\n", 910 NullString: "\\N", 911 IncludeCommitTs: false, 912 }, 913 opType: operationInsert, 914 tableName: "table7", 915 schemaName: "test", 916 commitTs: 435661838416609281, 917 columns: []any{"\\", "\\\r", "\\\\"}, 918 }, 919 want: []byte("'I','table7','test','\\','\\\r','\\\\'" + "\n"), 920 }, 921 { 922 name: "csv encode with values containing unicode characters", 923 fields: fields{ 924 config: &common.Config{ 925 Delimiter: "\t", 926 Quote: "\"", 927 Terminator: "\n", 928 NullString: "\\N", 929 IncludeCommitTs: true, 930 }, 931 opType: operationDelete, 932 tableName: "table8", 933 schemaName: "test", 934 commitTs: 435661838416609281, 935 columns: []any{"a\tb", 123.456, "你好,世界"}, 936 }, 937 want: []byte("\"D\"\t\"table8\"\t\"test\"\t435661838416609281\t\"a\tb\"\t123.456\t\"你好,世界\"\n"), 938 }, 939 } 940 for _, tc := range testCases { 941 t.Run(tc.name, func(t *testing.T) { 942 c := &csvMessage{ 943 config: tc.fields.config, 944 opType: tc.fields.opType, 945 tableName: tc.fields.tableName, 946 schemaName: tc.fields.schemaName, 947 commitTs: tc.fields.commitTs, 948 columns: tc.fields.columns, 949 preColumns: tc.fields.preColumns, 950 newRecord: true, 951 HandleKey: tc.fields.HandleKey, 952 } 953 954 require.Equal(t, tc.want, c.encode()) 955 }) 956 } 957 } 958 959 func TestConvertToCSVType(t *testing.T) { 960 for _, group := range csvTestColumnsGroup { 961 for _, c := range group { 962 val, _ := fromColValToCsvVal(&common.Config{ 963 BinaryEncodingMethod: c.BinaryEncodingMethod, 964 }, &c.col, c.colInfo.Ft) 965 require.Equal(t, c.want, val, c.col.Name) 966 } 967 } 968 } 969 970 func TestRowChangeEventConversion(t *testing.T) { 971 for idx, group := range csvTestColumnsGroup { 972 row := &model.RowChangedEvent{} 973 cols := make([]*model.Column, 0) 974 colInfos := make([]rowcodec.ColInfo, 0) 975 for _, c := range group { 976 cols = append(cols, &c.col) 977 colInfos = append(colInfos, c.colInfo) 978 } 979 tidbTableInfo := model.BuildTiDBTableInfo(fmt.Sprintf("table%d", idx), cols, nil) 980 model.AddExtraColumnInfo(tidbTableInfo, colInfos) 981 row.TableInfo = model.WrapTableInfo(100, "test", 100, tidbTableInfo) 982 983 if idx%3 == 0 { // delete operation 984 row.PreColumns = model.Columns2ColumnDatas(cols, row.TableInfo) 985 } else if idx%3 == 1 { // insert operation 986 row.Columns = model.Columns2ColumnDatas(cols, row.TableInfo) 987 } else { // update operation 988 row.PreColumns = model.Columns2ColumnDatas(cols, row.TableInfo) 989 row.Columns = model.Columns2ColumnDatas(cols, row.TableInfo) 990 } 991 csvMsg, err := rowChangedEvent2CSVMsg(&common.Config{ 992 Delimiter: "\t", 993 Quote: "\"", 994 Terminator: "\n", 995 NullString: "\\N", 996 IncludeCommitTs: true, 997 BinaryEncodingMethod: group[0].BinaryEncodingMethod, 998 }, row) 999 require.NotNil(t, csvMsg) 1000 require.Nil(t, err) 1001 1002 row2, err := csvMsg2RowChangedEvent(&common.Config{ 1003 BinaryEncodingMethod: group[0].BinaryEncodingMethod, 1004 }, csvMsg, row.TableInfo) 1005 require.Nil(t, err) 1006 require.NotNil(t, row2) 1007 } 1008 } 1009 1010 func TestCSVMessageDecode(t *testing.T) { 1011 // datums := make([][]types.Datum, 0, 4) 1012 testCases := []struct { 1013 row []types.Datum 1014 expectedCommitTs uint64 1015 expectedColsCnt int 1016 expectedErr string 1017 }{ 1018 { 1019 row: []types.Datum{ 1020 types.NewStringDatum("I"), 1021 types.NewStringDatum("employee"), 1022 types.NewStringDatum("hr"), 1023 types.NewStringDatum("433305438660591626"), 1024 types.NewStringDatum("101"), 1025 types.NewStringDatum("Smith"), 1026 types.NewStringDatum("Bob"), 1027 types.NewStringDatum("2014-06-04"), 1028 types.NewDatum(nil), 1029 }, 1030 expectedCommitTs: 433305438660591626, 1031 expectedColsCnt: 5, 1032 expectedErr: "", 1033 }, 1034 { 1035 row: []types.Datum{ 1036 types.NewStringDatum("U"), 1037 types.NewStringDatum("employee"), 1038 types.NewStringDatum("hr"), 1039 types.NewStringDatum("433305438660591627"), 1040 types.NewStringDatum("101"), 1041 types.NewStringDatum("Smith"), 1042 types.NewStringDatum("Bob"), 1043 types.NewStringDatum("2015-10-08"), 1044 types.NewStringDatum("Los Angeles"), 1045 }, 1046 expectedCommitTs: 433305438660591627, 1047 expectedColsCnt: 5, 1048 expectedErr: "", 1049 }, 1050 { 1051 row: []types.Datum{ 1052 types.NewStringDatum("D"), 1053 types.NewStringDatum("employee"), 1054 types.NewStringDatum("hr"), 1055 }, 1056 expectedCommitTs: 0, 1057 expectedColsCnt: 0, 1058 expectedErr: "the csv row should have at least four columns", 1059 }, 1060 { 1061 row: []types.Datum{ 1062 types.NewStringDatum("D"), 1063 types.NewStringDatum("employee"), 1064 types.NewStringDatum("hr"), 1065 types.NewStringDatum("hello world"), 1066 }, 1067 expectedCommitTs: 0, 1068 expectedColsCnt: 0, 1069 expectedErr: "the 4th column(hello world) of csv row should be a valid commit-ts", 1070 }, 1071 } 1072 for _, tc := range testCases { 1073 csvMsg := newCSVMessage(&common.Config{ 1074 Delimiter: ",", 1075 Quote: "\"", 1076 Terminator: "\n", 1077 NullString: "\\N", 1078 IncludeCommitTs: true, 1079 }) 1080 err := csvMsg.decode(tc.row) 1081 if tc.expectedErr != "" { 1082 require.Contains(t, err.Error(), tc.expectedErr) 1083 } else { 1084 require.Nil(t, err) 1085 require.Equal(t, tc.expectedCommitTs, csvMsg.commitTs) 1086 require.Equal(t, tc.expectedColsCnt, len(csvMsg.columns)) 1087 } 1088 } 1089 }