github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/pkg/sink/codec/csv/csv_message_test.go (about)

     1  // Copyright 2022 PingCAP, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package csv
    15  
    16  import (
    17  	"fmt"
    18  	"strings"
    19  	"testing"
    20  
    21  	"github.com/pingcap/tidb/pkg/kv"
    22  	"github.com/pingcap/tidb/pkg/parser/mysql"
    23  	"github.com/pingcap/tidb/pkg/types"
    24  	"github.com/pingcap/tidb/pkg/util/rowcodec"
    25  	"github.com/pingcap/tiflow/cdc/model"
    26  	"github.com/pingcap/tiflow/pkg/config"
    27  	"github.com/pingcap/tiflow/pkg/sink/codec/common"
    28  	"github.com/stretchr/testify/require"
    29  )
    30  
    31  type csvTestColumnTuple struct {
    32  	col                  model.Column
    33  	colInfo              rowcodec.ColInfo
    34  	want                 interface{}
    35  	BinaryEncodingMethod string
    36  }
    37  
    38  var csvTestColumnsGroup = [][]*csvTestColumnTuple{
    39  	{
    40  		{
    41  			model.Column{Name: "tiny", Value: int64(1), Type: mysql.TypeTiny},
    42  			rowcodec.ColInfo{
    43  				ID:            1,
    44  				IsPKHandle:    false,
    45  				VirtualGenCol: false,
    46  				Ft:            types.NewFieldType(mysql.TypeTiny),
    47  			},
    48  			int64(1),
    49  			config.BinaryEncodingBase64,
    50  		},
    51  		{
    52  			model.Column{Name: "short", Value: int64(1), Type: mysql.TypeShort},
    53  			rowcodec.ColInfo{
    54  				ID:            2,
    55  				IsPKHandle:    false,
    56  				VirtualGenCol: false,
    57  				Ft:            types.NewFieldType(mysql.TypeShort),
    58  			},
    59  			int64(1),
    60  			config.BinaryEncodingBase64,
    61  		},
    62  		{
    63  			model.Column{Name: "int24", Value: int64(1), Type: mysql.TypeInt24},
    64  			rowcodec.ColInfo{
    65  				ID:            3,
    66  				IsPKHandle:    false,
    67  				VirtualGenCol: false,
    68  				Ft:            types.NewFieldType(mysql.TypeInt24),
    69  			},
    70  			int64(1),
    71  			config.BinaryEncodingBase64,
    72  		},
    73  		{
    74  			model.Column{Name: "long", Value: int64(1), Type: mysql.TypeLong},
    75  			rowcodec.ColInfo{
    76  				ID:            4,
    77  				IsPKHandle:    false,
    78  				VirtualGenCol: false,
    79  				Ft:            types.NewFieldType(mysql.TypeLong),
    80  			},
    81  			int64(1),
    82  			config.BinaryEncodingBase64,
    83  		},
    84  		{
    85  			model.Column{Name: "longlong", Value: int64(1), Type: mysql.TypeLonglong},
    86  			rowcodec.ColInfo{
    87  				ID:            5,
    88  				IsPKHandle:    false,
    89  				VirtualGenCol: false,
    90  				Ft:            types.NewFieldType(mysql.TypeLonglong),
    91  			},
    92  			int64(1),
    93  			config.BinaryEncodingBase64,
    94  		},
    95  		{
    96  			model.Column{
    97  				Name:  "tinyunsigned",
    98  				Value: uint64(1),
    99  				Type:  mysql.TypeTiny,
   100  				Flag:  model.UnsignedFlag,
   101  			},
   102  			rowcodec.ColInfo{
   103  				ID:            6,
   104  				IsPKHandle:    false,
   105  				VirtualGenCol: false,
   106  				Ft:            setFlag(types.NewFieldType(mysql.TypeTiny), uint(model.UnsignedFlag)),
   107  			},
   108  			uint64(1),
   109  			config.BinaryEncodingBase64,
   110  		},
   111  		{
   112  			model.Column{
   113  				Name:  "shortunsigned",
   114  				Value: uint64(1),
   115  				Type:  mysql.TypeShort,
   116  				Flag:  model.UnsignedFlag,
   117  			},
   118  			rowcodec.ColInfo{
   119  				ID:            7,
   120  				IsPKHandle:    false,
   121  				VirtualGenCol: false,
   122  				Ft:            setFlag(types.NewFieldType(mysql.TypeShort), uint(model.UnsignedFlag)),
   123  			},
   124  			uint64(1),
   125  			config.BinaryEncodingBase64,
   126  		},
   127  		{
   128  			model.Column{
   129  				Name:  "int24unsigned",
   130  				Value: uint64(1),
   131  				Type:  mysql.TypeInt24,
   132  				Flag:  model.UnsignedFlag,
   133  			},
   134  			rowcodec.ColInfo{
   135  				ID:            8,
   136  				IsPKHandle:    false,
   137  				VirtualGenCol: false,
   138  				Ft:            setFlag(types.NewFieldType(mysql.TypeInt24), uint(model.UnsignedFlag)),
   139  			},
   140  			uint64(1),
   141  			config.BinaryEncodingBase64,
   142  		},
   143  		{
   144  			model.Column{
   145  				Name:  "longunsigned",
   146  				Value: uint64(1),
   147  				Type:  mysql.TypeLong,
   148  				Flag:  model.UnsignedFlag,
   149  			},
   150  			rowcodec.ColInfo{
   151  				ID:            9,
   152  				IsPKHandle:    false,
   153  				VirtualGenCol: false,
   154  				Ft:            setFlag(types.NewFieldType(mysql.TypeLong), uint(model.UnsignedFlag)),
   155  			},
   156  			uint64(1),
   157  			config.BinaryEncodingBase64,
   158  		},
   159  		{
   160  			model.Column{
   161  				Name:  "longlongunsigned",
   162  				Value: uint64(1),
   163  				Type:  mysql.TypeLonglong,
   164  				Flag:  model.UnsignedFlag,
   165  			},
   166  			rowcodec.ColInfo{
   167  				ID:            10,
   168  				IsPKHandle:    false,
   169  				VirtualGenCol: false,
   170  				Ft: setFlag(
   171  					types.NewFieldType(mysql.TypeLonglong),
   172  					uint(model.UnsignedFlag),
   173  				),
   174  			},
   175  			uint64(1),
   176  			config.BinaryEncodingBase64,
   177  		},
   178  	},
   179  	{
   180  		{
   181  			model.Column{Name: "float", Value: float64(3.14), Type: mysql.TypeFloat},
   182  			rowcodec.ColInfo{
   183  				ID:            11,
   184  				IsPKHandle:    false,
   185  				VirtualGenCol: false,
   186  				Ft:            types.NewFieldType(mysql.TypeFloat),
   187  			},
   188  			float64(3.14),
   189  			config.BinaryEncodingBase64,
   190  		},
   191  		{
   192  			model.Column{Name: "double", Value: float64(3.14), Type: mysql.TypeDouble},
   193  			rowcodec.ColInfo{
   194  				ID:            12,
   195  				IsPKHandle:    false,
   196  				VirtualGenCol: false,
   197  				Ft:            types.NewFieldType(mysql.TypeDouble),
   198  			},
   199  			float64(3.14),
   200  			config.BinaryEncodingBase64,
   201  		},
   202  	},
   203  	{
   204  		{
   205  			model.Column{Name: "bit", Value: uint64(683), Type: mysql.TypeBit},
   206  			rowcodec.ColInfo{
   207  				ID:            13,
   208  				IsPKHandle:    false,
   209  				VirtualGenCol: false,
   210  				Ft:            types.NewFieldType(mysql.TypeBit),
   211  			},
   212  			uint64(683),
   213  			config.BinaryEncodingBase64,
   214  		},
   215  	},
   216  	{
   217  		{
   218  			model.Column{Name: "decimal", Value: "129012.1230000", Type: mysql.TypeNewDecimal},
   219  			rowcodec.ColInfo{
   220  				ID:            14,
   221  				IsPKHandle:    false,
   222  				VirtualGenCol: false,
   223  				Ft:            types.NewFieldType(mysql.TypeNewDecimal),
   224  			},
   225  			"129012.1230000",
   226  			config.BinaryEncodingBase64,
   227  		},
   228  	},
   229  	{
   230  		{
   231  			model.Column{Name: "tinytext", Value: []byte("hello world"), Type: mysql.TypeTinyBlob},
   232  			rowcodec.ColInfo{
   233  				ID:            15,
   234  				IsPKHandle:    false,
   235  				VirtualGenCol: false,
   236  				Ft:            types.NewFieldType(mysql.TypeBlob),
   237  			},
   238  			"hello world",
   239  			config.BinaryEncodingBase64,
   240  		},
   241  		{
   242  			model.Column{Name: "mediumtext", Value: []byte("hello world"), Type: mysql.TypeMediumBlob},
   243  			rowcodec.ColInfo{
   244  				ID:            16,
   245  				IsPKHandle:    false,
   246  				VirtualGenCol: false,
   247  				Ft:            types.NewFieldType(mysql.TypeMediumBlob),
   248  			},
   249  			"hello world",
   250  			config.BinaryEncodingBase64,
   251  		},
   252  		{
   253  			model.Column{Name: "text", Value: []byte("hello world"), Type: mysql.TypeBlob},
   254  			rowcodec.ColInfo{
   255  				ID:            17,
   256  				IsPKHandle:    false,
   257  				VirtualGenCol: false,
   258  				Ft:            types.NewFieldType(mysql.TypeBlob),
   259  			},
   260  			"hello world",
   261  			config.BinaryEncodingBase64,
   262  		},
   263  		{
   264  			model.Column{Name: "longtext", Value: []byte("hello world"), Type: mysql.TypeLongBlob},
   265  			rowcodec.ColInfo{
   266  				ID:            18,
   267  				IsPKHandle:    false,
   268  				VirtualGenCol: false,
   269  				Ft:            types.NewFieldType(mysql.TypeLongBlob),
   270  			},
   271  			"hello world",
   272  			config.BinaryEncodingBase64,
   273  		},
   274  		{
   275  			model.Column{Name: "varchar", Value: []byte("hello world"), Type: mysql.TypeVarchar},
   276  			rowcodec.ColInfo{
   277  				ID:            19,
   278  				IsPKHandle:    false,
   279  				VirtualGenCol: false,
   280  				Ft:            types.NewFieldType(mysql.TypeVarchar),
   281  			},
   282  			"hello world",
   283  			config.BinaryEncodingBase64,
   284  		},
   285  		{
   286  			model.Column{Name: "varstring", Value: []byte("hello world"), Type: mysql.TypeVarString},
   287  			rowcodec.ColInfo{
   288  				ID:            20,
   289  				IsPKHandle:    false,
   290  				VirtualGenCol: false,
   291  				Ft:            types.NewFieldType(mysql.TypeVarString),
   292  			},
   293  			"hello world",
   294  			config.BinaryEncodingBase64,
   295  		},
   296  		{
   297  			model.Column{Name: "string", Value: []byte("hello world"), Type: mysql.TypeString},
   298  			rowcodec.ColInfo{
   299  				ID:            21,
   300  				IsPKHandle:    false,
   301  				VirtualGenCol: false,
   302  				Ft:            types.NewFieldType(mysql.TypeString),
   303  			},
   304  			"hello world",
   305  			config.BinaryEncodingBase64,
   306  		},
   307  		{
   308  			model.Column{Name: "json", Value: `{"key": "value"}`, Type: mysql.TypeJSON},
   309  			rowcodec.ColInfo{
   310  				ID:            31,
   311  				IsPKHandle:    false,
   312  				VirtualGenCol: false,
   313  				Ft:            types.NewFieldType(mysql.TypeJSON),
   314  			},
   315  			`{"key": "value"}`,
   316  			config.BinaryEncodingBase64,
   317  		},
   318  	},
   319  	{
   320  		{
   321  			model.Column{
   322  				Name:  "tinyblob",
   323  				Value: []byte("hello world"),
   324  				Type:  mysql.TypeTinyBlob,
   325  				Flag:  model.BinaryFlag,
   326  			},
   327  			rowcodec.ColInfo{
   328  				ID:            22,
   329  				IsPKHandle:    false,
   330  				VirtualGenCol: false,
   331  				Ft:            setBinChsClnFlag(types.NewFieldType(mysql.TypeTinyBlob)),
   332  			},
   333  			"aGVsbG8gd29ybGQ=",
   334  			config.BinaryEncodingBase64,
   335  		},
   336  		{
   337  			model.Column{
   338  				Name:  "mediumblob",
   339  				Value: []byte("hello world"),
   340  				Type:  mysql.TypeMediumBlob,
   341  				Flag:  model.BinaryFlag,
   342  			},
   343  			rowcodec.ColInfo{
   344  				ID:            23,
   345  				IsPKHandle:    false,
   346  				VirtualGenCol: false,
   347  				Ft:            setBinChsClnFlag(types.NewFieldType(mysql.TypeMediumBlob)),
   348  			},
   349  			"aGVsbG8gd29ybGQ=",
   350  			config.BinaryEncodingBase64,
   351  		},
   352  		{
   353  			model.Column{
   354  				Name:  "blob",
   355  				Value: []byte("hello world"),
   356  				Type:  mysql.TypeBlob,
   357  				Flag:  model.BinaryFlag,
   358  			},
   359  			rowcodec.ColInfo{
   360  				ID:            24,
   361  				IsPKHandle:    false,
   362  				VirtualGenCol: false,
   363  				Ft:            setBinChsClnFlag(types.NewFieldType(mysql.TypeBlob)),
   364  			},
   365  			"aGVsbG8gd29ybGQ=",
   366  			config.BinaryEncodingBase64,
   367  		},
   368  		{
   369  			model.Column{
   370  				Name:  "longblob",
   371  				Value: []byte("hello world"),
   372  				Type:  mysql.TypeLongBlob,
   373  				Flag:  model.BinaryFlag,
   374  			},
   375  			rowcodec.ColInfo{
   376  				ID:            25,
   377  				IsPKHandle:    false,
   378  				VirtualGenCol: false,
   379  				Ft:            setBinChsClnFlag(types.NewFieldType(mysql.TypeLongBlob)),
   380  			},
   381  			"aGVsbG8gd29ybGQ=",
   382  			config.BinaryEncodingBase64,
   383  		},
   384  		{
   385  			model.Column{
   386  				Name:  "varbinary",
   387  				Value: []byte("hello world"),
   388  				Type:  mysql.TypeVarchar,
   389  				Flag:  model.BinaryFlag,
   390  			},
   391  			rowcodec.ColInfo{
   392  				ID:            26,
   393  				IsPKHandle:    false,
   394  				VirtualGenCol: false,
   395  				Ft:            setBinChsClnFlag(types.NewFieldType(mysql.TypeVarchar)),
   396  			},
   397  			"aGVsbG8gd29ybGQ=",
   398  			config.BinaryEncodingBase64,
   399  		},
   400  		{
   401  			model.Column{
   402  				Name:  "varbinary1",
   403  				Value: []byte("hello world"),
   404  				Type:  mysql.TypeVarString,
   405  				Flag:  model.BinaryFlag,
   406  			},
   407  			rowcodec.ColInfo{
   408  				ID:            27,
   409  				IsPKHandle:    false,
   410  				VirtualGenCol: false,
   411  				Ft:            setBinChsClnFlag(types.NewFieldType(mysql.TypeVarString)),
   412  			},
   413  			"aGVsbG8gd29ybGQ=",
   414  			config.BinaryEncodingBase64,
   415  		},
   416  		{
   417  			model.Column{
   418  				Name:  "binary",
   419  				Value: []byte("hello world"),
   420  				Type:  mysql.TypeString,
   421  				Flag:  model.BinaryFlag,
   422  			},
   423  			rowcodec.ColInfo{
   424  				ID:            28,
   425  				IsPKHandle:    false,
   426  				VirtualGenCol: false,
   427  				Ft:            setBinChsClnFlag(types.NewFieldType(mysql.TypeString)),
   428  			},
   429  			"aGVsbG8gd29ybGQ=",
   430  			config.BinaryEncodingBase64,
   431  		},
   432  	},
   433  	{
   434  		{
   435  			model.Column{
   436  				Name:  "tinyblob",
   437  				Value: []byte("hello world"),
   438  				Type:  mysql.TypeTinyBlob,
   439  				Flag:  model.BinaryFlag,
   440  			},
   441  			rowcodec.ColInfo{
   442  				ID:            22,
   443  				IsPKHandle:    false,
   444  				VirtualGenCol: false,
   445  				Ft:            setBinChsClnFlag(types.NewFieldType(mysql.TypeTinyBlob)),
   446  			},
   447  			"68656c6c6f20776f726c64",
   448  			config.BinaryEncodingHex,
   449  		},
   450  		{
   451  			model.Column{
   452  				Name:  "mediumblob",
   453  				Value: []byte("hello world"),
   454  				Type:  mysql.TypeMediumBlob,
   455  				Flag:  model.BinaryFlag,
   456  			},
   457  			rowcodec.ColInfo{
   458  				ID:            23,
   459  				IsPKHandle:    false,
   460  				VirtualGenCol: false,
   461  				Ft:            setBinChsClnFlag(types.NewFieldType(mysql.TypeMediumBlob)),
   462  			},
   463  			"68656c6c6f20776f726c64",
   464  			config.BinaryEncodingHex,
   465  		},
   466  		{
   467  			model.Column{
   468  				Name:  "blob",
   469  				Value: []byte("hello world"),
   470  				Type:  mysql.TypeBlob,
   471  				Flag:  model.BinaryFlag,
   472  			},
   473  			rowcodec.ColInfo{
   474  				ID:            24,
   475  				IsPKHandle:    false,
   476  				VirtualGenCol: false,
   477  				Ft:            setBinChsClnFlag(types.NewFieldType(mysql.TypeBlob)),
   478  			},
   479  			"68656c6c6f20776f726c64",
   480  			config.BinaryEncodingHex,
   481  		},
   482  		{
   483  			model.Column{
   484  				Name:  "longblob",
   485  				Value: []byte("hello world"),
   486  				Type:  mysql.TypeLongBlob,
   487  				Flag:  model.BinaryFlag,
   488  			},
   489  			rowcodec.ColInfo{
   490  				ID:            25,
   491  				IsPKHandle:    false,
   492  				VirtualGenCol: false,
   493  				Ft:            setBinChsClnFlag(types.NewFieldType(mysql.TypeLongBlob)),
   494  			},
   495  			"68656c6c6f20776f726c64",
   496  			config.BinaryEncodingHex,
   497  		},
   498  		{
   499  			model.Column{
   500  				Name:  "varbinary",
   501  				Value: []byte("hello world"),
   502  				Type:  mysql.TypeVarchar,
   503  				Flag:  model.BinaryFlag,
   504  			},
   505  			rowcodec.ColInfo{
   506  				ID:            26,
   507  				IsPKHandle:    false,
   508  				VirtualGenCol: false,
   509  				Ft:            setBinChsClnFlag(types.NewFieldType(mysql.TypeVarchar)),
   510  			},
   511  			"68656c6c6f20776f726c64",
   512  			config.BinaryEncodingHex,
   513  		},
   514  		{
   515  			model.Column{
   516  				Name:  "varbinary1",
   517  				Value: []byte("hello world"),
   518  				Type:  mysql.TypeVarString,
   519  				Flag:  model.BinaryFlag,
   520  			},
   521  			rowcodec.ColInfo{
   522  				ID:            27,
   523  				IsPKHandle:    false,
   524  				VirtualGenCol: false,
   525  				Ft:            setBinChsClnFlag(types.NewFieldType(mysql.TypeVarString)),
   526  			},
   527  			"68656c6c6f20776f726c64",
   528  			config.BinaryEncodingHex,
   529  		},
   530  		{
   531  			model.Column{
   532  				Name:  "binary",
   533  				Value: []byte("hello world"),
   534  				Type:  mysql.TypeString,
   535  				Flag:  model.BinaryFlag,
   536  			},
   537  			rowcodec.ColInfo{
   538  				ID:            28,
   539  				IsPKHandle:    false,
   540  				VirtualGenCol: false,
   541  				Ft:            setBinChsClnFlag(types.NewFieldType(mysql.TypeString)),
   542  			},
   543  			"68656c6c6f20776f726c64",
   544  			config.BinaryEncodingHex,
   545  		},
   546  	},
   547  	{
   548  		{
   549  			model.Column{Name: "enum", Value: uint64(1), Type: mysql.TypeEnum},
   550  			rowcodec.ColInfo{
   551  				ID:            29,
   552  				IsPKHandle:    false,
   553  				VirtualGenCol: false,
   554  				Ft:            setElems(types.NewFieldType(mysql.TypeEnum), []string{"a,", "b"}),
   555  			},
   556  			"a,",
   557  			config.BinaryEncodingBase64,
   558  		},
   559  	},
   560  	{
   561  		{
   562  			model.Column{Name: "set", Value: uint64(9), Type: mysql.TypeSet},
   563  			rowcodec.ColInfo{
   564  				ID:            30,
   565  				IsPKHandle:    false,
   566  				VirtualGenCol: false,
   567  				Ft:            setElems(types.NewFieldType(mysql.TypeSet), []string{"a", "b", "c", "d"}),
   568  			},
   569  			"a,d",
   570  			config.BinaryEncodingBase64,
   571  		},
   572  	},
   573  	{
   574  		{
   575  			model.Column{Name: "date", Value: "2000-01-01", Type: mysql.TypeDate},
   576  			rowcodec.ColInfo{
   577  				ID:            32,
   578  				IsPKHandle:    false,
   579  				VirtualGenCol: false,
   580  				Ft:            types.NewFieldType(mysql.TypeDate),
   581  			},
   582  			"2000-01-01",
   583  			config.BinaryEncodingBase64,
   584  		},
   585  		{
   586  			model.Column{Name: "datetime", Value: "2015-12-20 23:58:58", Type: mysql.TypeDatetime},
   587  			rowcodec.ColInfo{
   588  				ID:            33,
   589  				IsPKHandle:    false,
   590  				VirtualGenCol: false,
   591  				Ft:            types.NewFieldType(mysql.TypeDatetime),
   592  			},
   593  			"2015-12-20 23:58:58",
   594  			config.BinaryEncodingBase64,
   595  		},
   596  		{
   597  			model.Column{Name: "timestamp", Value: "1973-12-30 15:30:00", Type: mysql.TypeTimestamp},
   598  			rowcodec.ColInfo{
   599  				ID:            34,
   600  				IsPKHandle:    false,
   601  				VirtualGenCol: false,
   602  				Ft:            types.NewFieldType(mysql.TypeTimestamp),
   603  			},
   604  			"1973-12-30 15:30:00",
   605  			config.BinaryEncodingBase64,
   606  		},
   607  		{
   608  			model.Column{Name: "time", Value: "23:59:59", Type: mysql.TypeDuration},
   609  			rowcodec.ColInfo{
   610  				ID:            35,
   611  				IsPKHandle:    false,
   612  				VirtualGenCol: false,
   613  				Ft:            types.NewFieldType(mysql.TypeDuration),
   614  			},
   615  			"23:59:59",
   616  			config.BinaryEncodingBase64,
   617  		},
   618  	},
   619  	{
   620  		{
   621  			model.Column{Name: "year", Value: int64(1970), Type: mysql.TypeYear},
   622  			rowcodec.ColInfo{
   623  				ID:            36,
   624  				IsPKHandle:    false,
   625  				VirtualGenCol: false,
   626  				Ft:            types.NewFieldType(mysql.TypeYear),
   627  			},
   628  			int64(1970),
   629  			config.BinaryEncodingBase64,
   630  		},
   631  	},
   632  }
   633  
   634  func setBinChsClnFlag(ft *types.FieldType) *types.FieldType {
   635  	types.SetBinChsClnFlag(ft)
   636  	return ft
   637  }
   638  
   639  //nolint:unparam
   640  func setFlag(ft *types.FieldType, flag uint) *types.FieldType {
   641  	ft.SetFlag(flag)
   642  	return ft
   643  }
   644  
   645  func setElems(ft *types.FieldType, elems []string) *types.FieldType {
   646  	ft.SetElems(elems)
   647  	return ft
   648  }
   649  
   650  func TestFormatWithQuotes(t *testing.T) {
   651  	config := &common.Config{
   652  		Quote: "\"",
   653  	}
   654  
   655  	testCases := []struct {
   656  		name     string
   657  		input    string
   658  		expected string
   659  	}{
   660  		{
   661  			name:     "string does not contain quote mark",
   662  			input:    "a,b,c",
   663  			expected: `"a,b,c"`,
   664  		},
   665  		{
   666  			name:     "string contains quote mark",
   667  			input:    `"a,b,c`,
   668  			expected: `"""a,b,c"`,
   669  		},
   670  		{
   671  			name:     "empty string",
   672  			input:    "",
   673  			expected: `""`,
   674  		},
   675  	}
   676  	for _, tc := range testCases {
   677  		csvMessage := newCSVMessage(config)
   678  		strBuilder := new(strings.Builder)
   679  		csvMessage.formatWithQuotes(tc.input, strBuilder)
   680  		require.Equal(t, tc.expected, strBuilder.String(), tc.name)
   681  	}
   682  }
   683  
   684  func TestFormatWithEscape(t *testing.T) {
   685  	testCases := []struct {
   686  		name     string
   687  		config   *common.Config
   688  		input    string
   689  		expected string
   690  	}{
   691  		{
   692  			name:     "string does not contain CR/LF/backslash/delimiter",
   693  			config:   &common.Config{Delimiter: ","},
   694  			input:    "abcdef",
   695  			expected: "abcdef",
   696  		},
   697  		{
   698  			name:     "string contains CRLF",
   699  			config:   &common.Config{Delimiter: ","},
   700  			input:    "abc\r\ndef",
   701  			expected: "abc\\r\\ndef",
   702  		},
   703  		{
   704  			name:     "string contains backslash",
   705  			config:   &common.Config{Delimiter: ","},
   706  			input:    `abc\def`,
   707  			expected: `abc\\def`,
   708  		},
   709  		{
   710  			name:     "string contains a single character delimiter",
   711  			config:   &common.Config{Delimiter: ","},
   712  			input:    "abc,def",
   713  			expected: `abc\,def`,
   714  		},
   715  		{
   716  			name:     "string contains multi-character delimiter",
   717  			config:   &common.Config{Delimiter: "***"},
   718  			input:    "abc***def",
   719  			expected: `abc\*\*\*def`,
   720  		},
   721  		{
   722  			name:     "string contains CR, LF, backslash and delimiter",
   723  			config:   &common.Config{Delimiter: "?"},
   724  			input:    `abc\def?ghi\r\n`,
   725  			expected: `abc\\def\?ghi\\r\\n`,
   726  		},
   727  	}
   728  
   729  	for _, tc := range testCases {
   730  		csvMessage := newCSVMessage(tc.config)
   731  		strBuilder := new(strings.Builder)
   732  		csvMessage.formatWithEscapes(tc.input, strBuilder)
   733  		require.Equal(t, tc.expected, strBuilder.String())
   734  	}
   735  }
   736  
   737  func TestCSVMessageEncode(t *testing.T) {
   738  	type fields struct {
   739  		config     *common.Config
   740  		opType     operation
   741  		tableName  string
   742  		schemaName string
   743  		commitTs   uint64
   744  		preColumns []any
   745  		columns    []any
   746  		HandleKey  kv.Handle
   747  	}
   748  	testCases := []struct {
   749  		name   string
   750  		fields fields
   751  		want   []byte
   752  	}{
   753  		{
   754  			name: "csv encode with typical configurations",
   755  			fields: fields{
   756  				config: &common.Config{
   757  					Delimiter:       ",",
   758  					Quote:           "\"",
   759  					Terminator:      "\n",
   760  					NullString:      "\\N",
   761  					IncludeCommitTs: true,
   762  				},
   763  				opType:     operationInsert,
   764  				tableName:  "table1",
   765  				schemaName: "test",
   766  				commitTs:   435661838416609281,
   767  				columns:    []any{123, "hello,world"},
   768  			},
   769  			want: []byte("\"I\",\"table1\",\"test\",435661838416609281,123,\"hello,world\"\n"),
   770  		},
   771  		{
   772  			name: "csv encode values containing single-character delimter string, without quote mark",
   773  			fields: fields{
   774  				config: &common.Config{
   775  					Delimiter:       "!",
   776  					Quote:           "",
   777  					Terminator:      "\n",
   778  					NullString:      "\\N",
   779  					IncludeCommitTs: true,
   780  				},
   781  				opType:     operationUpdate,
   782  				tableName:  "table2",
   783  				schemaName: "test",
   784  				commitTs:   435661838416609281,
   785  				columns:    []any{"a!b!c", "def"},
   786  			},
   787  			want: []byte(`U!table2!test!435661838416609281!a\!b\!c!def` + "\n"),
   788  		},
   789  		{
   790  			name: "csv encode values containing single-character delimter string, without quote mark, update with old value",
   791  			fields: fields{
   792  				config: &common.Config{
   793  					Delimiter:       "!",
   794  					Quote:           "",
   795  					Terminator:      "\n",
   796  					NullString:      "\\N",
   797  					IncludeCommitTs: true,
   798  					OutputOldValue:  true,
   799  					OutputHandleKey: true,
   800  				},
   801  				opType:     operationUpdate,
   802  				tableName:  "table2",
   803  				schemaName: "test",
   804  				commitTs:   435661838416609281,
   805  				preColumns: []any{"a!b!c", "abc"},
   806  				columns:    []any{"a!b!c", "def"},
   807  				HandleKey:  kv.IntHandle(1),
   808  			},
   809  			want: []byte(`D!table2!test!435661838416609281!true!1!a\!b\!c!abc` + "\n" +
   810  				`I!table2!test!435661838416609281!true!1!a\!b\!c!def` + "\n"),
   811  		},
   812  		{
   813  			name: "csv encode values containing single-character delimter string, without quote mark, update with old value",
   814  			fields: fields{
   815  				config: &common.Config{
   816  					Delimiter:       "!",
   817  					Quote:           "",
   818  					Terminator:      "\n",
   819  					NullString:      "\\N",
   820  					IncludeCommitTs: true,
   821  					OutputOldValue:  true,
   822  				},
   823  				opType:     operationInsert,
   824  				tableName:  "table2",
   825  				schemaName: "test",
   826  				commitTs:   435661838416609281,
   827  				columns:    []any{"a!b!c", "def"},
   828  			},
   829  			want: []byte(`I!table2!test!435661838416609281!false!a\!b\!c!def` + "\n"),
   830  		},
   831  		{
   832  			name: "csv encode values containing single-character delimter string, with quote mark",
   833  			fields: fields{
   834  				config: &common.Config{
   835  					Delimiter:       ",",
   836  					Quote:           "\"",
   837  					Terminator:      "\n",
   838  					NullString:      "\\N",
   839  					IncludeCommitTs: true,
   840  				},
   841  				opType:     operationUpdate,
   842  				tableName:  "table3",
   843  				schemaName: "test",
   844  				commitTs:   435661838416609281,
   845  				columns:    []any{"a,b,c", "def", "2022-08-31 17:07:00"},
   846  			},
   847  			want: []byte(`"U","table3","test",435661838416609281,"a,b,c","def","2022-08-31 17:07:00"` + "\n"),
   848  		},
   849  		{
   850  			name: "csv encode values containing multi-character delimiter string, without quote mark",
   851  			fields: fields{
   852  				config: &common.Config{
   853  					Delimiter:       "[*]",
   854  					Quote:           "",
   855  					Terminator:      "\r\n",
   856  					NullString:      "\\N",
   857  					IncludeCommitTs: false,
   858  				},
   859  				opType:     operationDelete,
   860  				tableName:  "table4",
   861  				schemaName: "test",
   862  				commitTs:   435661838416609281,
   863  				columns:    []any{"a[*]b[*]c", "def"},
   864  			},
   865  			want: []byte(`D[*]table4[*]test[*]a\[\*\]b\[\*\]c[*]def` + "\r\n"),
   866  		},
   867  		{
   868  			name: "csv encode with values containing multi-character delimiter string, with quote mark",
   869  			fields: fields{
   870  				config: &common.Config{
   871  					Delimiter:       "[*]",
   872  					Quote:           "'",
   873  					Terminator:      "\n",
   874  					NullString:      "\\N",
   875  					IncludeCommitTs: false,
   876  				},
   877  				opType:     operationInsert,
   878  				tableName:  "table5",
   879  				schemaName: "test",
   880  				commitTs:   435661838416609281,
   881  				columns:    []any{"a[*]b[*]c", "def", nil, 12345.678},
   882  			},
   883  			want: []byte(`'I'[*]'table5'[*]'test'[*]'a[*]b[*]c'[*]'def'[*]\N[*]12345.678` + "\n"),
   884  		},
   885  		{
   886  			name: "csv encode with values containing backslash and LF, without quote mark",
   887  			fields: fields{
   888  				config: &common.Config{
   889  					Delimiter:       ",",
   890  					Quote:           "",
   891  					Terminator:      "\n",
   892  					NullString:      "\\N",
   893  					IncludeCommitTs: true,
   894  				},
   895  				opType:     operationUpdate,
   896  				tableName:  "table6",
   897  				schemaName: "test",
   898  				commitTs:   435661838416609281,
   899  				columns:    []any{"a\\b\\c", "def\n"},
   900  			},
   901  			want: []byte(`U,table6,test,435661838416609281,a\\b\\c,def\n` + "\n"),
   902  		},
   903  		{
   904  			name: "csv encode with values containing backslash and CR, with quote mark",
   905  			fields: fields{
   906  				config: &common.Config{
   907  					Delimiter:       ",",
   908  					Quote:           "'",
   909  					Terminator:      "\n",
   910  					NullString:      "\\N",
   911  					IncludeCommitTs: false,
   912  				},
   913  				opType:     operationInsert,
   914  				tableName:  "table7",
   915  				schemaName: "test",
   916  				commitTs:   435661838416609281,
   917  				columns:    []any{"\\", "\\\r", "\\\\"},
   918  			},
   919  			want: []byte("'I','table7','test','\\','\\\r','\\\\'" + "\n"),
   920  		},
   921  		{
   922  			name: "csv encode with values containing unicode characters",
   923  			fields: fields{
   924  				config: &common.Config{
   925  					Delimiter:       "\t",
   926  					Quote:           "\"",
   927  					Terminator:      "\n",
   928  					NullString:      "\\N",
   929  					IncludeCommitTs: true,
   930  				},
   931  				opType:     operationDelete,
   932  				tableName:  "table8",
   933  				schemaName: "test",
   934  				commitTs:   435661838416609281,
   935  				columns:    []any{"a\tb", 123.456, "你好,世界"},
   936  			},
   937  			want: []byte("\"D\"\t\"table8\"\t\"test\"\t435661838416609281\t\"a\tb\"\t123.456\t\"你好,世界\"\n"),
   938  		},
   939  	}
   940  	for _, tc := range testCases {
   941  		t.Run(tc.name, func(t *testing.T) {
   942  			c := &csvMessage{
   943  				config:     tc.fields.config,
   944  				opType:     tc.fields.opType,
   945  				tableName:  tc.fields.tableName,
   946  				schemaName: tc.fields.schemaName,
   947  				commitTs:   tc.fields.commitTs,
   948  				columns:    tc.fields.columns,
   949  				preColumns: tc.fields.preColumns,
   950  				newRecord:  true,
   951  				HandleKey:  tc.fields.HandleKey,
   952  			}
   953  
   954  			require.Equal(t, tc.want, c.encode())
   955  		})
   956  	}
   957  }
   958  
   959  func TestConvertToCSVType(t *testing.T) {
   960  	for _, group := range csvTestColumnsGroup {
   961  		for _, c := range group {
   962  			val, _ := fromColValToCsvVal(&common.Config{
   963  				BinaryEncodingMethod: c.BinaryEncodingMethod,
   964  			}, &c.col, c.colInfo.Ft)
   965  			require.Equal(t, c.want, val, c.col.Name)
   966  		}
   967  	}
   968  }
   969  
   970  func TestRowChangeEventConversion(t *testing.T) {
   971  	for idx, group := range csvTestColumnsGroup {
   972  		row := &model.RowChangedEvent{}
   973  		cols := make([]*model.Column, 0)
   974  		colInfos := make([]rowcodec.ColInfo, 0)
   975  		for _, c := range group {
   976  			cols = append(cols, &c.col)
   977  			colInfos = append(colInfos, c.colInfo)
   978  		}
   979  		tidbTableInfo := model.BuildTiDBTableInfo(fmt.Sprintf("table%d", idx), cols, nil)
   980  		model.AddExtraColumnInfo(tidbTableInfo, colInfos)
   981  		row.TableInfo = model.WrapTableInfo(100, "test", 100, tidbTableInfo)
   982  
   983  		if idx%3 == 0 { // delete operation
   984  			row.PreColumns = model.Columns2ColumnDatas(cols, row.TableInfo)
   985  		} else if idx%3 == 1 { // insert operation
   986  			row.Columns = model.Columns2ColumnDatas(cols, row.TableInfo)
   987  		} else { // update operation
   988  			row.PreColumns = model.Columns2ColumnDatas(cols, row.TableInfo)
   989  			row.Columns = model.Columns2ColumnDatas(cols, row.TableInfo)
   990  		}
   991  		csvMsg, err := rowChangedEvent2CSVMsg(&common.Config{
   992  			Delimiter:            "\t",
   993  			Quote:                "\"",
   994  			Terminator:           "\n",
   995  			NullString:           "\\N",
   996  			IncludeCommitTs:      true,
   997  			BinaryEncodingMethod: group[0].BinaryEncodingMethod,
   998  		}, row)
   999  		require.NotNil(t, csvMsg)
  1000  		require.Nil(t, err)
  1001  
  1002  		row2, err := csvMsg2RowChangedEvent(&common.Config{
  1003  			BinaryEncodingMethod: group[0].BinaryEncodingMethod,
  1004  		}, csvMsg, row.TableInfo)
  1005  		require.Nil(t, err)
  1006  		require.NotNil(t, row2)
  1007  	}
  1008  }
  1009  
  1010  func TestCSVMessageDecode(t *testing.T) {
  1011  	// datums := make([][]types.Datum, 0, 4)
  1012  	testCases := []struct {
  1013  		row              []types.Datum
  1014  		expectedCommitTs uint64
  1015  		expectedColsCnt  int
  1016  		expectedErr      string
  1017  	}{
  1018  		{
  1019  			row: []types.Datum{
  1020  				types.NewStringDatum("I"),
  1021  				types.NewStringDatum("employee"),
  1022  				types.NewStringDatum("hr"),
  1023  				types.NewStringDatum("433305438660591626"),
  1024  				types.NewStringDatum("101"),
  1025  				types.NewStringDatum("Smith"),
  1026  				types.NewStringDatum("Bob"),
  1027  				types.NewStringDatum("2014-06-04"),
  1028  				types.NewDatum(nil),
  1029  			},
  1030  			expectedCommitTs: 433305438660591626,
  1031  			expectedColsCnt:  5,
  1032  			expectedErr:      "",
  1033  		},
  1034  		{
  1035  			row: []types.Datum{
  1036  				types.NewStringDatum("U"),
  1037  				types.NewStringDatum("employee"),
  1038  				types.NewStringDatum("hr"),
  1039  				types.NewStringDatum("433305438660591627"),
  1040  				types.NewStringDatum("101"),
  1041  				types.NewStringDatum("Smith"),
  1042  				types.NewStringDatum("Bob"),
  1043  				types.NewStringDatum("2015-10-08"),
  1044  				types.NewStringDatum("Los Angeles"),
  1045  			},
  1046  			expectedCommitTs: 433305438660591627,
  1047  			expectedColsCnt:  5,
  1048  			expectedErr:      "",
  1049  		},
  1050  		{
  1051  			row: []types.Datum{
  1052  				types.NewStringDatum("D"),
  1053  				types.NewStringDatum("employee"),
  1054  				types.NewStringDatum("hr"),
  1055  			},
  1056  			expectedCommitTs: 0,
  1057  			expectedColsCnt:  0,
  1058  			expectedErr:      "the csv row should have at least four columns",
  1059  		},
  1060  		{
  1061  			row: []types.Datum{
  1062  				types.NewStringDatum("D"),
  1063  				types.NewStringDatum("employee"),
  1064  				types.NewStringDatum("hr"),
  1065  				types.NewStringDatum("hello world"),
  1066  			},
  1067  			expectedCommitTs: 0,
  1068  			expectedColsCnt:  0,
  1069  			expectedErr:      "the 4th column(hello world) of csv row should be a valid commit-ts",
  1070  		},
  1071  	}
  1072  	for _, tc := range testCases {
  1073  		csvMsg := newCSVMessage(&common.Config{
  1074  			Delimiter:       ",",
  1075  			Quote:           "\"",
  1076  			Terminator:      "\n",
  1077  			NullString:      "\\N",
  1078  			IncludeCommitTs: true,
  1079  		})
  1080  		err := csvMsg.decode(tc.row)
  1081  		if tc.expectedErr != "" {
  1082  			require.Contains(t, err.Error(), tc.expectedErr)
  1083  		} else {
  1084  			require.Nil(t, err)
  1085  			require.Equal(t, tc.expectedCommitTs, csvMsg.commitTs)
  1086  			require.Equal(t, tc.expectedColsCnt, len(csvMsg.columns))
  1087  		}
  1088  	}
  1089  }