github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/pkg/sink/mysql/config.go (about)

     1  // Copyright 2022 PingCAP, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package mysql
    15  
    16  import (
    17  	"fmt"
    18  	"net/http"
    19  	"net/url"
    20  	"strings"
    21  	"time"
    22  
    23  	"github.com/gin-gonic/gin/binding"
    24  	dmysql "github.com/go-sql-driver/mysql"
    25  	"github.com/imdario/mergo"
    26  	"github.com/pingcap/errors"
    27  	"github.com/pingcap/log"
    28  	"github.com/pingcap/tiflow/cdc/model"
    29  	"github.com/pingcap/tiflow/pkg/config"
    30  	cerror "github.com/pingcap/tiflow/pkg/errors"
    31  	"github.com/pingcap/tiflow/pkg/security"
    32  	"github.com/pingcap/tiflow/pkg/sink"
    33  	"github.com/pingcap/tiflow/pkg/util"
    34  	"go.uber.org/zap"
    35  )
    36  
    37  const (
    38  	txnModeOptimistic  = "optimistic"
    39  	txnModePessimistic = "pessimistic"
    40  
    41  	// DefaultWorkerCount is the default number of workers.
    42  	DefaultWorkerCount = 16
    43  	// DefaultMaxTxnRow is the default max number of rows in a transaction.
    44  	DefaultMaxTxnRow = 256
    45  	// defaultMaxMultiUpdateRowCount is the default max number of rows in a
    46  	// single multi update SQL.
    47  	defaultMaxMultiUpdateRowCount = 40
    48  	// defaultMaxMultiUpdateRowSize(1KB) defines the default value of MaxMultiUpdateRowSize
    49  	// When row average size is larger MaxMultiUpdateRowSize,
    50  	// disable multi update, otherwise enable multi update.
    51  	defaultMaxMultiUpdateRowSize = 1024
    52  	// The upper limit of max worker counts.
    53  	maxWorkerCount = 1024
    54  	// The upper limit of max txn rows.
    55  	maxMaxTxnRow = 2048
    56  	// The upper limit of max multi update rows in a single SQL.
    57  	maxMaxMultiUpdateRowCount = 256
    58  	// The upper limit of max multi update row size(8KB).
    59  	maxMaxMultiUpdateRowSize = 8192
    60  
    61  	defaultTiDBTxnMode  = txnModeOptimistic
    62  	defaultReadTimeout  = "2m"
    63  	defaultWriteTimeout = "2m"
    64  	defaultDialTimeout  = "2m"
    65  	// Note(dongmen): defaultSafeMode is set to false since v6.4.0.
    66  	defaultSafeMode       = false
    67  	defaultTxnIsolationRC = "READ-COMMITTED"
    68  	defaultCharacterSet   = "utf8mb4"
    69  
    70  	// BackoffBaseDelay indicates the base delay time for retrying.
    71  	BackoffBaseDelay = 500 * time.Millisecond
    72  	// BackoffMaxDelay indicates the max delay time for retrying.
    73  	BackoffMaxDelay = 60 * time.Second
    74  
    75  	defaultBatchDMLEnable  = true
    76  	defaultMultiStmtEnable = true
    77  
    78  	// defaultcachePrepStmts is the default value of cachePrepStmts
    79  	defaultCachePrepStmts = true
    80  )
    81  
    82  type urlConfig struct {
    83  	WorkerCount                  *int    `form:"worker-count"`
    84  	MaxTxnRow                    *int    `form:"max-txn-row"`
    85  	MaxMultiUpdateRowSize        *int    `form:"max-multi-update-row-size"`
    86  	MaxMultiUpdateRowCount       *int    `form:"max-multi-update-row"`
    87  	TiDBTxnMode                  *string `form:"tidb-txn-mode"`
    88  	SSLCa                        *string `form:"ssl-ca"`
    89  	SSLCert                      *string `form:"ssl-cert"`
    90  	SSLKey                       *string `form:"ssl-key"`
    91  	SafeMode                     *bool   `form:"safe-mode"`
    92  	TimeZone                     *string `form:"time-zone"`
    93  	WriteTimeout                 *string `form:"write-timeout"`
    94  	ReadTimeout                  *string `form:"read-timeout"`
    95  	Timeout                      *string `form:"timeout"`
    96  	EnableBatchDML               *bool   `form:"batch-dml-enable"`
    97  	EnableMultiStatement         *bool   `form:"multi-stmt-enable"`
    98  	EnableCachePreparedStatement *bool   `form:"cache-prep-stmts"`
    99  }
   100  
   101  // Config is the configs for MySQL backend.
   102  type Config struct {
   103  	WorkerCount            int
   104  	MaxTxnRow              int
   105  	MaxMultiUpdateRowCount int
   106  	MaxMultiUpdateRowSize  int
   107  	tidbTxnMode            string
   108  	ReadTimeout            string
   109  	WriteTimeout           string
   110  	DialTimeout            string
   111  	SafeMode               bool
   112  	Timezone               string
   113  	TLS                    string
   114  	ForceReplicate         bool
   115  
   116  	IsTiDB bool // IsTiDB is true if the downstream is TiDB
   117  	// IsBDRModeSupported is true if the downstream is TiDB and write source is existed.
   118  	// write source exists when the downstream is TiDB and version is greater than or equal to v6.5.0.
   119  	IsWriteSourceExisted bool
   120  
   121  	SourceID        uint64
   122  	BatchDMLEnable  bool
   123  	MultiStmtEnable bool
   124  	CachePrepStmts  bool
   125  }
   126  
   127  // NewConfig returns the default mysql backend config.
   128  func NewConfig() *Config {
   129  	return &Config{
   130  		WorkerCount:            DefaultWorkerCount,
   131  		MaxTxnRow:              DefaultMaxTxnRow,
   132  		MaxMultiUpdateRowCount: defaultMaxMultiUpdateRowCount,
   133  		MaxMultiUpdateRowSize:  defaultMaxMultiUpdateRowSize,
   134  		tidbTxnMode:            defaultTiDBTxnMode,
   135  		ReadTimeout:            defaultReadTimeout,
   136  		WriteTimeout:           defaultWriteTimeout,
   137  		DialTimeout:            defaultDialTimeout,
   138  		SafeMode:               defaultSafeMode,
   139  		BatchDMLEnable:         defaultBatchDMLEnable,
   140  		MultiStmtEnable:        defaultMultiStmtEnable,
   141  		CachePrepStmts:         defaultCachePrepStmts,
   142  		SourceID:               config.DefaultTiDBSourceID,
   143  	}
   144  }
   145  
   146  // Apply applies the sink URI parameters to the config.
   147  func (c *Config) Apply(
   148  	serverTimezone string,
   149  	changefeedID model.ChangeFeedID,
   150  	sinkURI *url.URL,
   151  	replicaConfig *config.ReplicaConfig,
   152  ) (err error) {
   153  	if sinkURI == nil {
   154  		return cerror.ErrMySQLInvalidConfig.GenWithStack("fail to open MySQL sink, empty SinkURI")
   155  	}
   156  
   157  	scheme := strings.ToLower(sinkURI.Scheme)
   158  	if !sink.IsMySQLCompatibleScheme(scheme) {
   159  		return cerror.ErrMySQLInvalidConfig.GenWithStack("can't create MySQL sink with unsupported scheme: %s", scheme)
   160  	}
   161  	req := &http.Request{URL: sinkURI}
   162  	urlParameter := &urlConfig{}
   163  	if err := binding.Query.Bind(req, urlParameter); err != nil {
   164  		return cerror.WrapError(cerror.ErrMySQLInvalidConfig, err)
   165  	}
   166  	if urlParameter, err = mergeConfig(replicaConfig, urlParameter); err != nil {
   167  		return err
   168  	}
   169  	if err = getWorkerCount(urlParameter, &c.WorkerCount); err != nil {
   170  		return err
   171  	}
   172  	if err = getMaxTxnRow(urlParameter, &c.MaxTxnRow); err != nil {
   173  		return err
   174  	}
   175  	if err = getMaxMultiUpdateRowCount(urlParameter, &c.MaxMultiUpdateRowCount); err != nil {
   176  		return err
   177  	}
   178  	if err = getMaxMultiUpdateRowSize(urlParameter, &c.MaxMultiUpdateRowSize); err != nil {
   179  		return err
   180  	}
   181  	getTiDBTxnMode(urlParameter, &c.tidbTxnMode)
   182  	if err = getSSLCA(urlParameter, changefeedID, &c.TLS); err != nil {
   183  		return err
   184  	}
   185  	getSafeMode(urlParameter, &c.SafeMode)
   186  	if err = getTimezone(serverTimezone, urlParameter, &c.Timezone); err != nil {
   187  		return err
   188  	}
   189  	if err = getDuration(urlParameter.ReadTimeout, &c.ReadTimeout); err != nil {
   190  		return err
   191  	}
   192  	if err = getDuration(urlParameter.WriteTimeout, &c.WriteTimeout); err != nil {
   193  		return err
   194  	}
   195  	if err = getDuration(urlParameter.Timeout, &c.DialTimeout); err != nil {
   196  		return err
   197  	}
   198  
   199  	getBatchDMLEnable(urlParameter, &c.BatchDMLEnable)
   200  	getMultiStmtEnable(urlParameter, &c.MultiStmtEnable)
   201  	getCachePrepStmts(urlParameter, &c.CachePrepStmts)
   202  	c.ForceReplicate = replicaConfig.ForceReplicate
   203  
   204  	// Note(dongmen): The TiDBSourceID should never be 0 here, but we have found that
   205  	// in some problematic cases, the TiDBSourceID is 0 since something went wrong in the
   206  	// configuration process. So we need to check it here again.
   207  	// We do this is because it can cause the data to be inconsistent if the TiDBSourceID is 0
   208  	// in BDR Mode cluster.
   209  	if replicaConfig.Sink.TiDBSourceID == 0 {
   210  		log.Error("The TiDB source ID should never be set to 0. Please report it as a bug. The default value will be used: 1.",
   211  			zap.Uint64("tidbSourceID", replicaConfig.Sink.TiDBSourceID))
   212  		c.SourceID = config.DefaultTiDBSourceID
   213  	} else {
   214  		c.SourceID = replicaConfig.Sink.TiDBSourceID
   215  		log.Info("TiDB source ID is set", zap.Uint64("sourceID", c.SourceID))
   216  	}
   217  
   218  	return nil
   219  }
   220  
   221  func mergeConfig(
   222  	replicaConfig *config.ReplicaConfig,
   223  	urlParameters *urlConfig,
   224  ) (*urlConfig, error) {
   225  	dest := &urlConfig{}
   226  	dest.SafeMode = replicaConfig.Sink.SafeMode
   227  	if replicaConfig.Sink != nil && replicaConfig.Sink.MySQLConfig != nil {
   228  		mConfig := replicaConfig.Sink.MySQLConfig
   229  		dest.WorkerCount = mConfig.WorkerCount
   230  		dest.MaxTxnRow = mConfig.MaxTxnRow
   231  		dest.MaxMultiUpdateRowCount = mConfig.MaxMultiUpdateRowCount
   232  		dest.MaxMultiUpdateRowSize = mConfig.MaxMultiUpdateRowSize
   233  		dest.TiDBTxnMode = mConfig.TiDBTxnMode
   234  		dest.SSLCa = mConfig.SSLCa
   235  		dest.SSLCert = mConfig.SSLCert
   236  		dest.SSLKey = mConfig.SSLKey
   237  		dest.TimeZone = mConfig.TimeZone
   238  		dest.WriteTimeout = mConfig.WriteTimeout
   239  		dest.ReadTimeout = mConfig.ReadTimeout
   240  		dest.Timeout = mConfig.Timeout
   241  		dest.EnableBatchDML = mConfig.EnableBatchDML
   242  		dest.EnableMultiStatement = mConfig.EnableMultiStatement
   243  		dest.EnableCachePreparedStatement = mConfig.EnableCachePreparedStatement
   244  	}
   245  	if err := mergo.Merge(dest, urlParameters, mergo.WithOverride); err != nil {
   246  		return nil, cerror.WrapError(cerror.ErrMySQLInvalidConfig, err)
   247  	}
   248  	return dest, nil
   249  }
   250  
   251  func getWorkerCount(values *urlConfig, workerCount *int) error {
   252  	if values.WorkerCount == nil {
   253  		return nil
   254  	}
   255  	c := *values.WorkerCount
   256  	if c <= 0 {
   257  		return cerror.WrapError(cerror.ErrMySQLInvalidConfig,
   258  			fmt.Errorf("invalid worker-count %d, which must be greater than 0", c))
   259  	}
   260  	if c > maxWorkerCount {
   261  		log.Warn("worker-count too large",
   262  			zap.Int("original", c), zap.Int("override", maxWorkerCount))
   263  		c = maxWorkerCount
   264  	}
   265  
   266  	*workerCount = c
   267  	return nil
   268  }
   269  
   270  func getMaxTxnRow(config *urlConfig, maxTxnRow *int) error {
   271  	if config.MaxTxnRow == nil {
   272  		return nil
   273  	}
   274  
   275  	c := *config.MaxTxnRow
   276  	if c <= 0 {
   277  		return cerror.WrapError(cerror.ErrMySQLInvalidConfig,
   278  			fmt.Errorf("invalid max-txn-row %d, which must be greater than 0", c))
   279  	}
   280  	if c > maxMaxTxnRow {
   281  		log.Warn("max-txn-row too large",
   282  			zap.Int("original", c), zap.Int("override", maxMaxTxnRow))
   283  		c = maxMaxTxnRow
   284  	}
   285  	*maxTxnRow = c
   286  	return nil
   287  }
   288  
   289  func getMaxMultiUpdateRowCount(values *urlConfig, maxMultiUpdateRow *int) error {
   290  	if values.MaxMultiUpdateRowCount == nil {
   291  		return nil
   292  	}
   293  
   294  	c := *values.MaxMultiUpdateRowCount
   295  	if c <= 0 {
   296  		return cerror.WrapError(cerror.ErrMySQLInvalidConfig,
   297  			fmt.Errorf("invalid max-multi-update-row %d, which must be greater than 0", c))
   298  	}
   299  	if c > maxMaxMultiUpdateRowCount {
   300  		log.Warn("max-multi-update-row too large",
   301  			zap.Int("original", c), zap.Int("override", maxMaxMultiUpdateRowCount))
   302  		c = maxMaxMultiUpdateRowCount
   303  	}
   304  	*maxMultiUpdateRow = c
   305  	return nil
   306  }
   307  
   308  func getMaxMultiUpdateRowSize(values *urlConfig, maxMultiUpdateRowSize *int) error {
   309  	if values.MaxMultiUpdateRowSize == nil {
   310  		return nil
   311  	}
   312  
   313  	c := *values.MaxMultiUpdateRowSize
   314  	if c < 0 {
   315  		return cerror.WrapError(cerror.ErrMySQLInvalidConfig,
   316  			fmt.Errorf("invalid max-multi-update-row-size %d, "+
   317  				"which must be greater than or equal to 0", c))
   318  	}
   319  	if c > maxMaxMultiUpdateRowSize {
   320  		log.Warn("max-multi-update-row-size too large",
   321  			zap.Int("original", c), zap.Int("override", maxMaxMultiUpdateRowSize))
   322  		c = maxMaxMultiUpdateRowSize
   323  	}
   324  	*maxMultiUpdateRowSize = c
   325  	return nil
   326  }
   327  
   328  func getTiDBTxnMode(values *urlConfig, mode *string) {
   329  	if values.TiDBTxnMode == nil || len(*values.TiDBTxnMode) == 0 {
   330  		return
   331  	}
   332  	s := strings.ToLower(*values.TiDBTxnMode)
   333  	if s == txnModeOptimistic || s == txnModePessimistic {
   334  		*mode = s
   335  	} else {
   336  		log.Warn("invalid tidb-txn-mode, should be pessimistic or optimistic",
   337  			zap.String("default", defaultTiDBTxnMode))
   338  	}
   339  }
   340  
   341  func getSSLCA(values *urlConfig, changefeedID model.ChangeFeedID, tls *string) error {
   342  	if values.SSLCa == nil || len(*values.SSLCa) == 0 {
   343  		return nil
   344  	}
   345  
   346  	var (
   347  		sslCert string
   348  		sslKey  string
   349  	)
   350  	if values.SSLCert != nil {
   351  		sslCert = *values.SSLCert
   352  	}
   353  	if values.SSLKey != nil {
   354  		sslKey = *values.SSLKey
   355  	}
   356  	credential := security.Credential{
   357  		CAPath:   *values.SSLCa,
   358  		CertPath: sslCert,
   359  		KeyPath:  sslKey,
   360  	}
   361  	tlsCfg, err := credential.ToTLSConfig()
   362  	if err != nil {
   363  		return errors.Trace(err)
   364  	}
   365  
   366  	name := "cdc_mysql_tls" + changefeedID.Namespace + "_" + changefeedID.ID
   367  	err = dmysql.RegisterTLSConfig(name, tlsCfg)
   368  	if err != nil {
   369  		return cerror.ErrMySQLConnectionError.Wrap(err).GenWithStack("fail to open MySQL connection")
   370  	}
   371  	*tls = "?tls=" + name
   372  	return nil
   373  }
   374  
   375  func getSafeMode(values *urlConfig, safeMode *bool) {
   376  	if values.SafeMode != nil {
   377  		*safeMode = *values.SafeMode
   378  	}
   379  }
   380  
   381  func getTimezone(serverTimezoneStr string,
   382  	values *urlConfig, timezone *string,
   383  ) error {
   384  	const pleaseSpecifyTimezone = "We recommend that you specify the time-zone explicitly. " +
   385  		"Please make sure that the timezone of the TiCDC server, " +
   386  		"sink-uri and the downstream database are consistent. " +
   387  		"If the downstream database does not load the timezone information, " +
   388  		"you can refer to https://dev.mysql.com/doc/refman/8.0/en/mysql-tzinfo-to-sql.html."
   389  	serverTimezone, err := util.GetTimezone(serverTimezoneStr)
   390  	if err != nil {
   391  		return cerror.WrapError(cerror.ErrMySQLInvalidConfig, err)
   392  	}
   393  	if values.TimeZone == nil {
   394  		// If time-zone is not specified, use the timezone of the server.
   395  		log.Warn("Because time-zone is not specified, "+
   396  			"the timezone of the TiCDC server will be used. "+
   397  			pleaseSpecifyTimezone,
   398  			zap.String("timezone", serverTimezone.String()))
   399  		*timezone = fmt.Sprintf(`"%s"`, serverTimezone.String())
   400  		return nil
   401  	}
   402  
   403  	s := *values.TimeZone
   404  	if len(s) == 0 {
   405  		*timezone = ""
   406  		log.Warn("Because time-zone is empty, " +
   407  			"the timezone of the downstream database will be used. " +
   408  			pleaseSpecifyTimezone)
   409  		return nil
   410  	}
   411  
   412  	changefeedTimezone, err := util.GetTimezone(s)
   413  	if err != nil {
   414  		return cerror.WrapError(cerror.ErrMySQLInvalidConfig, err)
   415  	}
   416  	*timezone = fmt.Sprintf(`"%s"`, changefeedTimezone.String())
   417  	// We need to check whether the timezone of the TiCDC server and the sink-uri are consistent.
   418  	// If they are inconsistent, it may cause the data to be inconsistent.
   419  	if changefeedTimezone.String() != serverTimezone.String() {
   420  		return cerror.WrapError(cerror.ErrMySQLInvalidConfig, errors.Errorf(
   421  			"the timezone of the TiCDC server and the sink-uri are inconsistent. "+
   422  				"TiCDC server timezone: %s, sink-uri timezone: %s. "+
   423  				"Please make sure that the timezone of the TiCDC server, "+
   424  				"sink-uri and the downstream database are consistent.",
   425  			serverTimezone.String(), changefeedTimezone.String()))
   426  	}
   427  
   428  	return nil
   429  }
   430  
   431  func getDuration(s *string, target *string) error {
   432  	if s == nil {
   433  		return nil
   434  	}
   435  	_, err := time.ParseDuration(*s)
   436  	if err != nil {
   437  		return cerror.WrapError(cerror.ErrMySQLInvalidConfig, err)
   438  	}
   439  	*target = *s
   440  	return nil
   441  }
   442  
   443  func getBatchDMLEnable(values *urlConfig, batchDMLEnable *bool) {
   444  	if values.EnableBatchDML != nil {
   445  		*batchDMLEnable = *values.EnableBatchDML
   446  	}
   447  }
   448  
   449  func getMultiStmtEnable(values *urlConfig, multiStmtEnable *bool) {
   450  	if values.EnableMultiStatement != nil {
   451  		*multiStmtEnable = *values.EnableMultiStatement
   452  	}
   453  }
   454  
   455  func getCachePrepStmts(values *urlConfig, cachePrepStmts *bool) {
   456  	if values.EnableCachePreparedStatement != nil {
   457  		*cachePrepStmts = *values.EnableCachePreparedStatement
   458  	}
   459  }