github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/pkg/sink/codec/csv/csv_decoder.go (about) 1 // Copyright 2022 PingCAP, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package csv 15 16 import ( 17 "context" 18 "io" 19 20 "github.com/pingcap/errors" 21 lconfig "github.com/pingcap/tidb/pkg/lightning/config" 22 "github.com/pingcap/tidb/pkg/lightning/mydump" 23 "github.com/pingcap/tidb/pkg/lightning/worker" 24 "github.com/pingcap/tiflow/cdc/model" 25 cerror "github.com/pingcap/tiflow/pkg/errors" 26 "github.com/pingcap/tiflow/pkg/sink/codec" 27 "github.com/pingcap/tiflow/pkg/sink/codec/common" 28 ) 29 30 const defaultIOConcurrency = 1 31 32 type batchDecoder struct { 33 codecConfig *common.Config 34 parser *mydump.CSVParser 35 data []byte 36 msg *csvMessage 37 tableInfo *model.TableInfo 38 closed bool 39 } 40 41 // NewBatchDecoder creates a new BatchDecoder 42 func NewBatchDecoder(ctx context.Context, 43 codecConfig *common.Config, 44 tableInfo *model.TableInfo, 45 value []byte, 46 ) (codec.RowEventDecoder, error) { 47 var backslashEscape bool 48 49 // if quote is not set in config, we should unespace backslash 50 // when parsing csv columns. 51 if len(codecConfig.Quote) == 0 { 52 backslashEscape = true 53 } 54 cfg := &lconfig.CSVConfig{ 55 Separator: codecConfig.Delimiter, 56 Delimiter: codecConfig.Quote, 57 Terminator: codecConfig.Terminator, 58 Null: []string{codecConfig.NullString}, 59 BackslashEscape: backslashEscape, 60 } 61 csvParser, err := mydump.NewCSVParser(ctx, cfg, 62 mydump.NewStringReader(string(value)), 63 int64(lconfig.ReadBlockSize), 64 worker.NewPool(ctx, defaultIOConcurrency, "io"), false, nil) 65 if err != nil { 66 return nil, err 67 } 68 return &batchDecoder{ 69 codecConfig: codecConfig, 70 tableInfo: tableInfo, 71 data: value, 72 msg: newCSVMessage(codecConfig), 73 parser: csvParser, 74 }, nil 75 } 76 77 // AddKeyValue implements the RowEventDecoder interface. 78 func (b *batchDecoder) AddKeyValue(_, _ []byte) error { 79 return nil 80 } 81 82 // HasNext implements the RowEventDecoder interface. 83 func (b *batchDecoder) HasNext() (model.MessageType, bool, error) { 84 err := b.parser.ReadRow() 85 if err != nil { 86 b.closed = true 87 if errors.Cause(err) == io.EOF { 88 return model.MessageTypeUnknown, false, nil 89 } 90 return model.MessageTypeUnknown, false, err 91 } 92 93 row := b.parser.LastRow() 94 if err = b.msg.decode(row.Row); err != nil { 95 return model.MessageTypeUnknown, false, errors.Trace(err) 96 } 97 98 return model.MessageTypeRow, true, nil 99 } 100 101 // NextResolvedEvent implements the RowEventDecoder interface. 102 func (b *batchDecoder) NextResolvedEvent() (uint64, error) { 103 return 0, nil 104 } 105 106 // NextRowChangedEvent implements the RowEventDecoder interface. 107 func (b *batchDecoder) NextRowChangedEvent() (*model.RowChangedEvent, error) { 108 if b.closed { 109 return nil, cerror.WrapError(cerror.ErrCSVDecodeFailed, errors.New("no csv row can be found")) 110 } 111 112 e, err := csvMsg2RowChangedEvent(b.codecConfig, b.msg, b.tableInfo) 113 if err != nil { 114 return nil, errors.Trace(err) 115 } 116 return e, nil 117 } 118 119 // NextDDLEvent implements the RowEventDecoder interface. 120 func (b *batchDecoder) NextDDLEvent() (*model.DDLEvent, error) { 121 return nil, nil 122 }