github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/pkg/config/replica_config.go (about)

     1  // Copyright 2021 PingCAP, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package config
    15  
    16  import (
    17  	"database/sql/driver"
    18  	"encoding/json"
    19  	"fmt"
    20  	"net/url"
    21  	"strings"
    22  	"time"
    23  
    24  	"github.com/pingcap/errors"
    25  	"github.com/pingcap/log"
    26  	"github.com/pingcap/tiflow/pkg/config/outdated"
    27  	cerror "github.com/pingcap/tiflow/pkg/errors"
    28  	"github.com/pingcap/tiflow/pkg/integrity"
    29  	"github.com/pingcap/tiflow/pkg/redo"
    30  	"github.com/pingcap/tiflow/pkg/sink"
    31  	"github.com/pingcap/tiflow/pkg/util"
    32  	"go.uber.org/zap"
    33  )
    34  
    35  const (
    36  	// minSyncPointInterval is the minimum of SyncPointInterval can be set.
    37  	minSyncPointInterval = time.Second * 30
    38  	// minSyncPointRetention is the minimum of SyncPointRetention can be set.
    39  	minSyncPointRetention           = time.Hour * 1
    40  	minChangeFeedErrorStuckDuration = time.Minute * 30
    41  	// DefaultTiDBSourceID is the default source ID of TiDB cluster.
    42  	DefaultTiDBSourceID = 1
    43  )
    44  
    45  var defaultReplicaConfig = &ReplicaConfig{
    46  	MemoryQuota:        DefaultChangefeedMemoryQuota,
    47  	CaseSensitive:      false,
    48  	CheckGCSafePoint:   true,
    49  	EnableSyncPoint:    util.AddressOf(false),
    50  	EnableTableMonitor: util.AddressOf(false),
    51  	SyncPointInterval:  util.AddressOf(10 * time.Minute),
    52  	SyncPointRetention: util.AddressOf(24 * time.Hour),
    53  	BDRMode:            util.AddressOf(false),
    54  	Filter: &FilterConfig{
    55  		Rules: []string{"*.*"},
    56  	},
    57  	Mounter: &MounterConfig{
    58  		WorkerNum: 16,
    59  	},
    60  	Sink: &SinkConfig{
    61  		CSVConfig: &CSVConfig{
    62  			Quote:                string(DoubleQuoteChar),
    63  			Delimiter:            Comma,
    64  			NullString:           NULL,
    65  			BinaryEncodingMethod: BinaryEncodingBase64,
    66  		},
    67  		EncoderConcurrency:               util.AddressOf(DefaultEncoderGroupConcurrency),
    68  		Terminator:                       util.AddressOf(CRLF),
    69  		DateSeparator:                    util.AddressOf(DateSeparatorDay.String()),
    70  		EnablePartitionSeparator:         util.AddressOf(true),
    71  		EnableKafkaSinkV2:                util.AddressOf(false),
    72  		OnlyOutputUpdatedColumns:         util.AddressOf(false),
    73  		DeleteOnlyOutputHandleKeyColumns: util.AddressOf(false),
    74  		ContentCompatible:                util.AddressOf(false),
    75  		TiDBSourceID:                     DefaultTiDBSourceID,
    76  		AdvanceTimeoutInSec:              util.AddressOf(DefaultAdvanceTimeoutInSec),
    77  		SendBootstrapIntervalInSec:       util.AddressOf(DefaultSendBootstrapIntervalInSec),
    78  		SendBootstrapInMsgCount:          util.AddressOf(DefaultSendBootstrapInMsgCount),
    79  		SendBootstrapToAllPartition:      util.AddressOf(DefaultSendBootstrapToAllPartition),
    80  		DebeziumDisableSchema:            util.AddressOf(false),
    81  		OpenProtocol:                     &OpenProtocolConfig{OutputOldValue: true},
    82  		Debezium:                         &DebeziumConfig{OutputOldValue: true},
    83  	},
    84  	Consistent: &ConsistentConfig{
    85  		Level:                 "none",
    86  		MaxLogSize:            redo.DefaultMaxLogSize,
    87  		FlushIntervalInMs:     redo.DefaultFlushIntervalInMs,
    88  		MetaFlushIntervalInMs: redo.DefaultMetaFlushIntervalInMs,
    89  		EncodingWorkerNum:     redo.DefaultEncodingWorkerNum,
    90  		FlushWorkerNum:        redo.DefaultFlushWorkerNum,
    91  		Storage:               "",
    92  		UseFileBackend:        false,
    93  		Compression:           "",
    94  		MemoryUsage: &ConsistentMemoryUsage{
    95  			MemoryQuotaPercentage: 50,
    96  		},
    97  	},
    98  	Scheduler: &ChangefeedSchedulerConfig{
    99  		EnableTableAcrossNodes: false,
   100  		RegionThreshold:        100_000,
   101  		WriteKeyThreshold:      0,
   102  	},
   103  	Integrity: &integrity.Config{
   104  		IntegrityCheckLevel:   integrity.CheckLevelNone,
   105  		CorruptionHandleLevel: integrity.CorruptionHandleLevelWarn,
   106  	},
   107  	ChangefeedErrorStuckDuration: util.AddressOf(time.Minute * 30),
   108  	SyncedStatus:                 &SyncedStatusConfig{SyncedCheckInterval: 5 * 60, CheckpointInterval: 15},
   109  }
   110  
   111  // GetDefaultReplicaConfig returns the default replica config.
   112  func GetDefaultReplicaConfig() *ReplicaConfig {
   113  	return defaultReplicaConfig.Clone()
   114  }
   115  
   116  // Duration wrap time.Duration to override UnmarshalText func
   117  type Duration struct {
   118  	time.Duration
   119  }
   120  
   121  // UnmarshalText unmarshal byte to duration
   122  func (d *Duration) UnmarshalText(text []byte) error {
   123  	var err error
   124  	d.Duration, err = time.ParseDuration(string(text))
   125  	return err
   126  }
   127  
   128  // ReplicaConfig represents some addition replication config for a changefeed
   129  type ReplicaConfig replicaConfig
   130  
   131  type replicaConfig struct {
   132  	MemoryQuota      uint64 `toml:"memory-quota" json:"memory-quota"`
   133  	CaseSensitive    bool   `toml:"case-sensitive" json:"case-sensitive"`
   134  	ForceReplicate   bool   `toml:"force-replicate" json:"force-replicate"`
   135  	CheckGCSafePoint bool   `toml:"check-gc-safe-point" json:"check-gc-safe-point"`
   136  	// EnableSyncPoint is only available when the downstream is a Database.
   137  	EnableSyncPoint    *bool `toml:"enable-sync-point" json:"enable-sync-point,omitempty"`
   138  	EnableTableMonitor *bool `toml:"enable-table-monitor" json:"enable-table-monitor"`
   139  	// IgnoreIneligibleTable is used to store the user's config when creating a changefeed.
   140  	// not used in the changefeed's lifecycle.
   141  	IgnoreIneligibleTable bool `toml:"ignore-ineligible-table" json:"ignore-ineligible-table"`
   142  
   143  	// BDR(Bidirectional Replication) is a feature that allows users to
   144  	// replicate data of same tables from TiDB-1 to TiDB-2 and vice versa.
   145  	// This feature is only available for TiDB.
   146  	BDRMode *bool `toml:"bdr-mode" json:"bdr-mode,omitempty"`
   147  	// SyncPointInterval is only available when the downstream is DB.
   148  	SyncPointInterval *time.Duration `toml:"sync-point-interval" json:"sync-point-interval,omitempty"`
   149  	// SyncPointRetention is only available when the downstream is DB.
   150  	SyncPointRetention *time.Duration `toml:"sync-point-retention" json:"sync-point-retention,omitempty"`
   151  	Filter             *FilterConfig  `toml:"filter" json:"filter"`
   152  	Mounter            *MounterConfig `toml:"mounter" json:"mounter"`
   153  	Sink               *SinkConfig    `toml:"sink" json:"sink"`
   154  	// Consistent is only available for DB downstream with redo feature enabled.
   155  	Consistent *ConsistentConfig `toml:"consistent" json:"consistent,omitempty"`
   156  	// Scheduler is the configuration for scheduler.
   157  	Scheduler *ChangefeedSchedulerConfig `toml:"scheduler" json:"scheduler"`
   158  	// Integrity is only available when the downstream is MQ.
   159  	Integrity                    *integrity.Config   `toml:"integrity" json:"integrity"`
   160  	ChangefeedErrorStuckDuration *time.Duration      `toml:"changefeed-error-stuck-duration" json:"changefeed-error-stuck-duration,omitempty"`
   161  	SyncedStatus                 *SyncedStatusConfig `toml:"synced-status" json:"synced-status,omitempty"`
   162  
   163  	// Deprecated: we don't use this field since v8.0.0.
   164  	SQLMode string `toml:"sql-mode" json:"sql-mode"`
   165  }
   166  
   167  // Value implements the driver.Valuer interface
   168  func (c ReplicaConfig) Value() (driver.Value, error) {
   169  	cfg, err := c.Marshal()
   170  	if err != nil {
   171  		return nil, err
   172  	}
   173  
   174  	// TODO: refactor the meaningless type conversion.
   175  	return []byte(cfg), nil
   176  }
   177  
   178  // Scan implements the sql.Scanner interface
   179  func (c *ReplicaConfig) Scan(value interface{}) error {
   180  	b, ok := value.([]byte)
   181  	if !ok {
   182  		return errors.New("type assertion to []byte failed")
   183  	}
   184  
   185  	return c.UnmarshalJSON(b)
   186  }
   187  
   188  // Marshal returns the json marshal format of a ReplicationConfig
   189  func (c *ReplicaConfig) Marshal() (string, error) {
   190  	cfg, err := json.Marshal(c)
   191  	if err != nil {
   192  		return "", cerror.WrapError(cerror.ErrEncodeFailed, errors.Annotatef(err, "Unmarshal data: %v", c))
   193  	}
   194  	return string(cfg), nil
   195  }
   196  
   197  // UnmarshalJSON unmarshals into *ReplicationConfig from json marshal byte slice
   198  func (c *ReplicaConfig) UnmarshalJSON(data []byte) error {
   199  	// The purpose of casting ReplicaConfig to replicaConfig is to avoid recursive calls UnmarshalJSON,
   200  	// resulting in stack overflow
   201  	r := (*replicaConfig)(c)
   202  	err := json.Unmarshal(data, &r)
   203  	if err != nil {
   204  		return cerror.WrapError(cerror.ErrDecodeFailed, err)
   205  	}
   206  	v1 := outdated.ReplicaConfigV1{}
   207  	err = v1.Unmarshal(data)
   208  	if err != nil {
   209  		return cerror.WrapError(cerror.ErrDecodeFailed, err)
   210  	}
   211  	r.fillFromV1(&v1)
   212  	return nil
   213  }
   214  
   215  // Clone clones a replica config
   216  func (c *ReplicaConfig) Clone() *ReplicaConfig {
   217  	str, err := c.Marshal()
   218  	if err != nil {
   219  		log.Panic("failed to marshal replica config",
   220  			zap.Error(cerror.WrapError(cerror.ErrDecodeFailed, err)))
   221  	}
   222  	clone := new(ReplicaConfig)
   223  	err = clone.UnmarshalJSON([]byte(str))
   224  	if err != nil {
   225  		log.Panic("failed to unmarshal replica config",
   226  			zap.Error(cerror.WrapError(cerror.ErrDecodeFailed, err)))
   227  	}
   228  	return clone
   229  }
   230  
   231  func (c *replicaConfig) fillFromV1(v1 *outdated.ReplicaConfigV1) {
   232  	if v1 == nil || v1.Sink == nil {
   233  		return
   234  	}
   235  	for _, dispatch := range v1.Sink.DispatchRules {
   236  		c.Sink.DispatchRules = append(c.Sink.DispatchRules, &DispatchRule{
   237  			Matcher:        []string{fmt.Sprintf("%s.%s", dispatch.Schema, dispatch.Name)},
   238  			DispatcherRule: dispatch.Rule,
   239  		})
   240  	}
   241  }
   242  
   243  // ValidateAndAdjust verifies and adjusts the replica configuration.
   244  func (c *ReplicaConfig) ValidateAndAdjust(sinkURI *url.URL) error { // check sink uri
   245  	if c.Sink != nil {
   246  		err := c.Sink.validateAndAdjust(sinkURI)
   247  		if err != nil {
   248  			return err
   249  		}
   250  	}
   251  
   252  	if c.Consistent != nil {
   253  		err := c.Consistent.ValidateAndAdjust()
   254  		if err != nil {
   255  			return err
   256  		}
   257  	}
   258  
   259  	// check sync point config
   260  	if util.GetOrZero(c.EnableSyncPoint) {
   261  		if c.SyncPointInterval != nil &&
   262  			*c.SyncPointInterval < minSyncPointInterval {
   263  			return cerror.ErrInvalidReplicaConfig.
   264  				FastGenByArgs(
   265  					fmt.Sprintf("The SyncPointInterval:%s must be larger than %s",
   266  						c.SyncPointInterval.String(),
   267  						minSyncPointInterval.String()))
   268  		}
   269  		if c.SyncPointRetention != nil &&
   270  			*c.SyncPointRetention < minSyncPointRetention {
   271  			return cerror.ErrInvalidReplicaConfig.
   272  				FastGenByArgs(
   273  					fmt.Sprintf("The SyncPointRetention:%s must be larger than %s",
   274  						c.SyncPointRetention.String(),
   275  						minSyncPointRetention.String()))
   276  		}
   277  	}
   278  	if c.MemoryQuota == uint64(0) {
   279  		c.FixMemoryQuota()
   280  	}
   281  	if c.Scheduler == nil {
   282  		c.FixScheduler(false)
   283  	} else {
   284  		err := c.Scheduler.Validate()
   285  		if err != nil {
   286  			return err
   287  		}
   288  	}
   289  	// TODO: Remove the hack once span replication is compatible with all sinks.
   290  	if !isSinkCompatibleWithSpanReplication(sinkURI) {
   291  		c.Scheduler.EnableTableAcrossNodes = false
   292  	}
   293  
   294  	if c.Integrity != nil {
   295  		switch strings.ToLower(sinkURI.Scheme) {
   296  		case sink.KafkaScheme, sink.KafkaSSLScheme:
   297  		default:
   298  			if c.Integrity.Enabled() {
   299  				log.Warn("integrity checksum only support kafka sink now, disable integrity")
   300  				c.Integrity.IntegrityCheckLevel = integrity.CheckLevelNone
   301  			}
   302  		}
   303  
   304  		if err := c.Integrity.Validate(); err != nil {
   305  			return err
   306  		}
   307  
   308  		if c.Integrity.Enabled() && len(c.Sink.ColumnSelectors) != 0 {
   309  			log.Error("it's not allowed to enable the integrity check and column selector at the same time")
   310  			return cerror.ErrInvalidReplicaConfig.GenWithStack(
   311  				"integrity check enabled and column selector set, not allowed")
   312  
   313  		}
   314  	}
   315  
   316  	if c.ChangefeedErrorStuckDuration != nil &&
   317  		*c.ChangefeedErrorStuckDuration < minChangeFeedErrorStuckDuration {
   318  		return cerror.ErrInvalidReplicaConfig.
   319  			FastGenByArgs(
   320  				fmt.Sprintf("The ChangefeedErrorStuckDuration:%f must be larger than %f Seconds",
   321  					c.ChangefeedErrorStuckDuration.Seconds(),
   322  					minChangeFeedErrorStuckDuration.Seconds()))
   323  	}
   324  
   325  	return nil
   326  }
   327  
   328  // FixScheduler adjusts scheduler to default value
   329  func (c *ReplicaConfig) FixScheduler(inheritV66 bool) {
   330  	if c.Scheduler == nil {
   331  		c.Scheduler = defaultReplicaConfig.Clone().Scheduler
   332  		return
   333  	}
   334  	if inheritV66 && c.Scheduler.RegionPerSpan != 0 {
   335  		c.Scheduler.EnableTableAcrossNodes = true
   336  		c.Scheduler.RegionThreshold = c.Scheduler.RegionPerSpan
   337  		c.Scheduler.RegionPerSpan = 0
   338  	}
   339  }
   340  
   341  // FixMemoryQuota adjusts memory quota to default value
   342  func (c *ReplicaConfig) FixMemoryQuota() {
   343  	c.MemoryQuota = DefaultChangefeedMemoryQuota
   344  }
   345  
   346  // isSinkCompatibleWithSpanReplication returns true if the sink uri is
   347  // compatible with span replication.
   348  func isSinkCompatibleWithSpanReplication(u *url.URL) bool {
   349  	return u != nil &&
   350  		(strings.Contains(u.Scheme, "kafka") || strings.Contains(u.Scheme, "blackhole"))
   351  }
   352  
   353  // MaskSensitiveData masks sensitive data in ReplicaConfig
   354  func (c *ReplicaConfig) MaskSensitiveData() {
   355  	if c.Sink != nil {
   356  		c.Sink.MaskSensitiveData()
   357  	}
   358  	if c.Consistent != nil {
   359  		c.Consistent.MaskSensitiveData()
   360  	}
   361  }