github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/cdc/sink/dmlsink/factory/factory.go (about)

     1  // Copyright 2022 PingCAP, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License
    13  
    14  package factory
    15  
    16  import (
    17  	"context"
    18  	"net/url"
    19  
    20  	"github.com/pingcap/log"
    21  	"github.com/pingcap/tiflow/cdc/model"
    22  	"github.com/pingcap/tiflow/cdc/processor/tablepb"
    23  	"github.com/pingcap/tiflow/cdc/sink/dmlsink"
    24  	"github.com/pingcap/tiflow/cdc/sink/dmlsink/blackhole"
    25  	"github.com/pingcap/tiflow/cdc/sink/dmlsink/cloudstorage"
    26  	"github.com/pingcap/tiflow/cdc/sink/dmlsink/mq"
    27  	"github.com/pingcap/tiflow/cdc/sink/dmlsink/mq/dmlproducer"
    28  	"github.com/pingcap/tiflow/cdc/sink/dmlsink/mq/manager"
    29  	"github.com/pingcap/tiflow/cdc/sink/dmlsink/txn"
    30  	"github.com/pingcap/tiflow/cdc/sink/tablesink"
    31  	"github.com/pingcap/tiflow/pkg/config"
    32  	cerror "github.com/pingcap/tiflow/pkg/errors"
    33  	"github.com/pingcap/tiflow/pkg/pdutil"
    34  	"github.com/pingcap/tiflow/pkg/sink"
    35  	"github.com/pingcap/tiflow/pkg/sink/kafka"
    36  	v2 "github.com/pingcap/tiflow/pkg/sink/kafka/v2"
    37  	pulsarConfig "github.com/pingcap/tiflow/pkg/sink/pulsar"
    38  	"github.com/pingcap/tiflow/pkg/util"
    39  	"github.com/prometheus/client_golang/prometheus"
    40  )
    41  
    42  // Category is for different DML sink categories.
    43  type Category = int
    44  
    45  const (
    46  	// CategoryTxn is for Txn sink.
    47  	CategoryTxn Category = 1
    48  	// CategoryMQ is for MQ sink.
    49  	CategoryMQ = 2
    50  	// CategoryCloudStorage is for CloudStorage sink.
    51  	CategoryCloudStorage = 3
    52  	// CategoryBlackhole is for Blackhole sink.
    53  	CategoryBlackhole = 4
    54  )
    55  
    56  // SinkFactory is the factory of sink.
    57  // It is responsible for creating sink and closing it.
    58  // Because there is no way to convert the eventsink.EventSink[*model.RowChangedEvent]
    59  // to eventsink.EventSink[eventsink.TableEvent].
    60  // So we have to use this factory to create and store the sink.
    61  type SinkFactory struct {
    62  	rowSink  dmlsink.EventSink[*model.RowChangedEvent]
    63  	txnSink  dmlsink.EventSink[*model.SingleTableTxn]
    64  	category Category
    65  }
    66  
    67  // New creates a new SinkFactory by scheme.
    68  func New(
    69  	ctx context.Context,
    70  	changefeedID model.ChangeFeedID,
    71  	sinkURIStr string,
    72  	cfg *config.ReplicaConfig,
    73  	errCh chan error,
    74  	pdClock pdutil.Clock,
    75  ) (*SinkFactory, error) {
    76  	sinkURI, err := url.Parse(sinkURIStr)
    77  	if err != nil {
    78  		return nil, cerror.WrapError(cerror.ErrSinkURIInvalid, err)
    79  	}
    80  
    81  	s := &SinkFactory{}
    82  	scheme := sink.GetScheme(sinkURI)
    83  	switch scheme {
    84  	case sink.MySQLScheme, sink.MySQLSSLScheme, sink.TiDBScheme, sink.TiDBSSLScheme:
    85  		txnSink, err := txn.NewMySQLSink(ctx, changefeedID, sinkURI, cfg, errCh,
    86  			txn.DefaultConflictDetectorSlots)
    87  		if err != nil {
    88  			return nil, err
    89  		}
    90  		s.txnSink = txnSink
    91  		s.category = CategoryTxn
    92  	case sink.KafkaScheme, sink.KafkaSSLScheme:
    93  		factoryCreator := kafka.NewSaramaFactory
    94  		if util.GetOrZero(cfg.Sink.EnableKafkaSinkV2) {
    95  			factoryCreator = v2.NewFactory
    96  		}
    97  		mqs, err := mq.NewKafkaDMLSink(ctx, changefeedID, sinkURI, cfg, errCh,
    98  			factoryCreator, dmlproducer.NewKafkaDMLProducer)
    99  		if err != nil {
   100  			return nil, err
   101  		}
   102  		s.txnSink = mqs
   103  		s.category = CategoryMQ
   104  	case sink.S3Scheme, sink.FileScheme, sink.GCSScheme, sink.GSScheme, sink.AzblobScheme, sink.AzureScheme, sink.CloudStorageNoopScheme:
   105  		storageSink, err := cloudstorage.NewDMLSink(ctx, changefeedID, pdClock, sinkURI, cfg, errCh)
   106  		if err != nil {
   107  			return nil, err
   108  		}
   109  		s.txnSink = storageSink
   110  		s.category = CategoryCloudStorage
   111  	case sink.BlackHoleScheme:
   112  		bs := blackhole.NewDMLSink()
   113  		s.rowSink = bs
   114  		s.category = CategoryBlackhole
   115  	case sink.PulsarScheme, sink.PulsarSSLScheme:
   116  		mqs, err := mq.NewPulsarDMLSink(ctx, changefeedID, sinkURI, cfg, errCh,
   117  			manager.NewPulsarTopicManager,
   118  			pulsarConfig.NewCreatorFactory, dmlproducer.NewPulsarDMLProducer)
   119  		if err != nil {
   120  			return nil, err
   121  		}
   122  		s.txnSink = mqs
   123  		s.category = CategoryMQ
   124  	default:
   125  		return nil,
   126  			cerror.ErrSinkURIInvalid.GenWithStack("the sink scheme (%s) is not supported", scheme)
   127  	}
   128  
   129  	return s, nil
   130  }
   131  
   132  // CreateTableSink creates a TableSink by schema.
   133  func (s *SinkFactory) CreateTableSink(
   134  	changefeedID model.ChangeFeedID,
   135  	span tablepb.Span,
   136  	startTs model.Ts,
   137  	PDClock pdutil.Clock,
   138  	totalRowsCounter prometheus.Counter,
   139  	flushLagDuration prometheus.Observer,
   140  ) tablesink.TableSink {
   141  	if s.txnSink != nil {
   142  		return tablesink.New(changefeedID, span, startTs, s.txnSink,
   143  			&dmlsink.TxnEventAppender{TableSinkStartTs: startTs}, PDClock, totalRowsCounter, flushLagDuration)
   144  	}
   145  
   146  	return tablesink.New(changefeedID, span, startTs, s.rowSink,
   147  		&dmlsink.RowChangeEventAppender{}, PDClock, totalRowsCounter, flushLagDuration)
   148  }
   149  
   150  // CreateTableSinkForConsumer creates a TableSink by schema for consumer.
   151  // The difference between CreateTableSink and CreateTableSinkForConsumer is that
   152  // CreateTableSinkForConsumer will not create a new sink for each table.
   153  // NOTICE: This only used for the consumer. Please do not use it in the processor.
   154  func (s *SinkFactory) CreateTableSinkForConsumer(
   155  	changefeedID model.ChangeFeedID,
   156  	span tablepb.Span, startTs model.Ts,
   157  ) tablesink.TableSink {
   158  	if s.txnSink != nil {
   159  		return tablesink.New(changefeedID, span, startTs, s.txnSink,
   160  			// IgnoreStartTs is true because the consumer can
   161  			// **not** get the start ts of the row changed event.
   162  			&dmlsink.TxnEventAppender{TableSinkStartTs: startTs, IgnoreStartTs: true},
   163  			pdutil.NewClock4Test(),
   164  			prometheus.NewCounter(prometheus.CounterOpts{}),
   165  			prometheus.NewHistogram(prometheus.HistogramOpts{}))
   166  	}
   167  
   168  	return tablesink.New(changefeedID, span, startTs, s.rowSink,
   169  		&dmlsink.RowChangeEventAppender{}, pdutil.NewClock4Test(),
   170  		prometheus.NewCounter(prometheus.CounterOpts{}),
   171  		prometheus.NewHistogram(prometheus.HistogramOpts{}))
   172  }
   173  
   174  // Close closes the sink.
   175  func (s *SinkFactory) Close() {
   176  	if s.rowSink != nil && s.txnSink != nil {
   177  		log.Panic("unreachable, rowSink and txnSink should not be both not nil")
   178  	}
   179  	if s.rowSink != nil {
   180  		s.rowSink.Close()
   181  	}
   182  	if s.txnSink != nil {
   183  		s.txnSink.Close()
   184  	}
   185  }
   186  
   187  // Category returns category of s.
   188  func (s *SinkFactory) Category() Category {
   189  	if s.category == 0 {
   190  		panic("should never happen")
   191  	}
   192  	return s.category
   193  }