github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/pkg/config/replica_config.go (about) 1 // Copyright 2021 PingCAP, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package config 15 16 import ( 17 "database/sql/driver" 18 "encoding/json" 19 "fmt" 20 "net/url" 21 "strings" 22 "time" 23 24 "github.com/pingcap/errors" 25 "github.com/pingcap/log" 26 "github.com/pingcap/tiflow/pkg/config/outdated" 27 cerror "github.com/pingcap/tiflow/pkg/errors" 28 "github.com/pingcap/tiflow/pkg/integrity" 29 "github.com/pingcap/tiflow/pkg/redo" 30 "github.com/pingcap/tiflow/pkg/sink" 31 "github.com/pingcap/tiflow/pkg/util" 32 "go.uber.org/zap" 33 ) 34 35 const ( 36 // minSyncPointInterval is the minimum of SyncPointInterval can be set. 37 minSyncPointInterval = time.Second * 30 38 // minSyncPointRetention is the minimum of SyncPointRetention can be set. 39 minSyncPointRetention = time.Hour * 1 40 minChangeFeedErrorStuckDuration = time.Minute * 30 41 // DefaultTiDBSourceID is the default source ID of TiDB cluster. 42 DefaultTiDBSourceID = 1 43 ) 44 45 var defaultReplicaConfig = &ReplicaConfig{ 46 MemoryQuota: DefaultChangefeedMemoryQuota, 47 CaseSensitive: false, 48 CheckGCSafePoint: true, 49 EnableSyncPoint: util.AddressOf(false), 50 EnableTableMonitor: util.AddressOf(false), 51 SyncPointInterval: util.AddressOf(10 * time.Minute), 52 SyncPointRetention: util.AddressOf(24 * time.Hour), 53 BDRMode: util.AddressOf(false), 54 Filter: &FilterConfig{ 55 Rules: []string{"*.*"}, 56 }, 57 Mounter: &MounterConfig{ 58 WorkerNum: 16, 59 }, 60 Sink: &SinkConfig{ 61 CSVConfig: &CSVConfig{ 62 Quote: string(DoubleQuoteChar), 63 Delimiter: Comma, 64 NullString: NULL, 65 BinaryEncodingMethod: BinaryEncodingBase64, 66 }, 67 EncoderConcurrency: util.AddressOf(DefaultEncoderGroupConcurrency), 68 Terminator: util.AddressOf(CRLF), 69 DateSeparator: util.AddressOf(DateSeparatorDay.String()), 70 EnablePartitionSeparator: util.AddressOf(true), 71 EnableKafkaSinkV2: util.AddressOf(false), 72 OnlyOutputUpdatedColumns: util.AddressOf(false), 73 DeleteOnlyOutputHandleKeyColumns: util.AddressOf(false), 74 ContentCompatible: util.AddressOf(false), 75 TiDBSourceID: DefaultTiDBSourceID, 76 AdvanceTimeoutInSec: util.AddressOf(DefaultAdvanceTimeoutInSec), 77 SendBootstrapIntervalInSec: util.AddressOf(DefaultSendBootstrapIntervalInSec), 78 SendBootstrapInMsgCount: util.AddressOf(DefaultSendBootstrapInMsgCount), 79 SendBootstrapToAllPartition: util.AddressOf(DefaultSendBootstrapToAllPartition), 80 DebeziumDisableSchema: util.AddressOf(false), 81 OpenProtocol: &OpenProtocolConfig{OutputOldValue: true}, 82 Debezium: &DebeziumConfig{OutputOldValue: true}, 83 }, 84 Consistent: &ConsistentConfig{ 85 Level: "none", 86 MaxLogSize: redo.DefaultMaxLogSize, 87 FlushIntervalInMs: redo.DefaultFlushIntervalInMs, 88 MetaFlushIntervalInMs: redo.DefaultMetaFlushIntervalInMs, 89 EncodingWorkerNum: redo.DefaultEncodingWorkerNum, 90 FlushWorkerNum: redo.DefaultFlushWorkerNum, 91 Storage: "", 92 UseFileBackend: false, 93 Compression: "", 94 MemoryUsage: &ConsistentMemoryUsage{ 95 MemoryQuotaPercentage: 50, 96 }, 97 }, 98 Scheduler: &ChangefeedSchedulerConfig{ 99 EnableTableAcrossNodes: false, 100 RegionThreshold: 100_000, 101 WriteKeyThreshold: 0, 102 }, 103 Integrity: &integrity.Config{ 104 IntegrityCheckLevel: integrity.CheckLevelNone, 105 CorruptionHandleLevel: integrity.CorruptionHandleLevelWarn, 106 }, 107 ChangefeedErrorStuckDuration: util.AddressOf(time.Minute * 30), 108 SyncedStatus: &SyncedStatusConfig{SyncedCheckInterval: 5 * 60, CheckpointInterval: 15}, 109 } 110 111 // GetDefaultReplicaConfig returns the default replica config. 112 func GetDefaultReplicaConfig() *ReplicaConfig { 113 return defaultReplicaConfig.Clone() 114 } 115 116 // Duration wrap time.Duration to override UnmarshalText func 117 type Duration struct { 118 time.Duration 119 } 120 121 // UnmarshalText unmarshal byte to duration 122 func (d *Duration) UnmarshalText(text []byte) error { 123 var err error 124 d.Duration, err = time.ParseDuration(string(text)) 125 return err 126 } 127 128 // ReplicaConfig represents some addition replication config for a changefeed 129 type ReplicaConfig replicaConfig 130 131 type replicaConfig struct { 132 MemoryQuota uint64 `toml:"memory-quota" json:"memory-quota"` 133 CaseSensitive bool `toml:"case-sensitive" json:"case-sensitive"` 134 ForceReplicate bool `toml:"force-replicate" json:"force-replicate"` 135 CheckGCSafePoint bool `toml:"check-gc-safe-point" json:"check-gc-safe-point"` 136 // EnableSyncPoint is only available when the downstream is a Database. 137 EnableSyncPoint *bool `toml:"enable-sync-point" json:"enable-sync-point,omitempty"` 138 EnableTableMonitor *bool `toml:"enable-table-monitor" json:"enable-table-monitor"` 139 // IgnoreIneligibleTable is used to store the user's config when creating a changefeed. 140 // not used in the changefeed's lifecycle. 141 IgnoreIneligibleTable bool `toml:"ignore-ineligible-table" json:"ignore-ineligible-table"` 142 143 // BDR(Bidirectional Replication) is a feature that allows users to 144 // replicate data of same tables from TiDB-1 to TiDB-2 and vice versa. 145 // This feature is only available for TiDB. 146 BDRMode *bool `toml:"bdr-mode" json:"bdr-mode,omitempty"` 147 // SyncPointInterval is only available when the downstream is DB. 148 SyncPointInterval *time.Duration `toml:"sync-point-interval" json:"sync-point-interval,omitempty"` 149 // SyncPointRetention is only available when the downstream is DB. 150 SyncPointRetention *time.Duration `toml:"sync-point-retention" json:"sync-point-retention,omitempty"` 151 Filter *FilterConfig `toml:"filter" json:"filter"` 152 Mounter *MounterConfig `toml:"mounter" json:"mounter"` 153 Sink *SinkConfig `toml:"sink" json:"sink"` 154 // Consistent is only available for DB downstream with redo feature enabled. 155 Consistent *ConsistentConfig `toml:"consistent" json:"consistent,omitempty"` 156 // Scheduler is the configuration for scheduler. 157 Scheduler *ChangefeedSchedulerConfig `toml:"scheduler" json:"scheduler"` 158 // Integrity is only available when the downstream is MQ. 159 Integrity *integrity.Config `toml:"integrity" json:"integrity"` 160 ChangefeedErrorStuckDuration *time.Duration `toml:"changefeed-error-stuck-duration" json:"changefeed-error-stuck-duration,omitempty"` 161 SyncedStatus *SyncedStatusConfig `toml:"synced-status" json:"synced-status,omitempty"` 162 163 // Deprecated: we don't use this field since v8.0.0. 164 SQLMode string `toml:"sql-mode" json:"sql-mode"` 165 } 166 167 // Value implements the driver.Valuer interface 168 func (c ReplicaConfig) Value() (driver.Value, error) { 169 cfg, err := c.Marshal() 170 if err != nil { 171 return nil, err 172 } 173 174 // TODO: refactor the meaningless type conversion. 175 return []byte(cfg), nil 176 } 177 178 // Scan implements the sql.Scanner interface 179 func (c *ReplicaConfig) Scan(value interface{}) error { 180 b, ok := value.([]byte) 181 if !ok { 182 return errors.New("type assertion to []byte failed") 183 } 184 185 return c.UnmarshalJSON(b) 186 } 187 188 // Marshal returns the json marshal format of a ReplicationConfig 189 func (c *ReplicaConfig) Marshal() (string, error) { 190 cfg, err := json.Marshal(c) 191 if err != nil { 192 return "", cerror.WrapError(cerror.ErrEncodeFailed, errors.Annotatef(err, "Unmarshal data: %v", c)) 193 } 194 return string(cfg), nil 195 } 196 197 // UnmarshalJSON unmarshals into *ReplicationConfig from json marshal byte slice 198 func (c *ReplicaConfig) UnmarshalJSON(data []byte) error { 199 // The purpose of casting ReplicaConfig to replicaConfig is to avoid recursive calls UnmarshalJSON, 200 // resulting in stack overflow 201 r := (*replicaConfig)(c) 202 err := json.Unmarshal(data, &r) 203 if err != nil { 204 return cerror.WrapError(cerror.ErrDecodeFailed, err) 205 } 206 v1 := outdated.ReplicaConfigV1{} 207 err = v1.Unmarshal(data) 208 if err != nil { 209 return cerror.WrapError(cerror.ErrDecodeFailed, err) 210 } 211 r.fillFromV1(&v1) 212 return nil 213 } 214 215 // Clone clones a replica config 216 func (c *ReplicaConfig) Clone() *ReplicaConfig { 217 str, err := c.Marshal() 218 if err != nil { 219 log.Panic("failed to marshal replica config", 220 zap.Error(cerror.WrapError(cerror.ErrDecodeFailed, err))) 221 } 222 clone := new(ReplicaConfig) 223 err = clone.UnmarshalJSON([]byte(str)) 224 if err != nil { 225 log.Panic("failed to unmarshal replica config", 226 zap.Error(cerror.WrapError(cerror.ErrDecodeFailed, err))) 227 } 228 return clone 229 } 230 231 func (c *replicaConfig) fillFromV1(v1 *outdated.ReplicaConfigV1) { 232 if v1 == nil || v1.Sink == nil { 233 return 234 } 235 for _, dispatch := range v1.Sink.DispatchRules { 236 c.Sink.DispatchRules = append(c.Sink.DispatchRules, &DispatchRule{ 237 Matcher: []string{fmt.Sprintf("%s.%s", dispatch.Schema, dispatch.Name)}, 238 DispatcherRule: dispatch.Rule, 239 }) 240 } 241 } 242 243 // ValidateAndAdjust verifies and adjusts the replica configuration. 244 func (c *ReplicaConfig) ValidateAndAdjust(sinkURI *url.URL) error { // check sink uri 245 if c.Sink != nil { 246 err := c.Sink.validateAndAdjust(sinkURI) 247 if err != nil { 248 return err 249 } 250 } 251 252 if c.Consistent != nil { 253 err := c.Consistent.ValidateAndAdjust() 254 if err != nil { 255 return err 256 } 257 } 258 259 // check sync point config 260 if util.GetOrZero(c.EnableSyncPoint) { 261 if c.SyncPointInterval != nil && 262 *c.SyncPointInterval < minSyncPointInterval { 263 return cerror.ErrInvalidReplicaConfig. 264 FastGenByArgs( 265 fmt.Sprintf("The SyncPointInterval:%s must be larger than %s", 266 c.SyncPointInterval.String(), 267 minSyncPointInterval.String())) 268 } 269 if c.SyncPointRetention != nil && 270 *c.SyncPointRetention < minSyncPointRetention { 271 return cerror.ErrInvalidReplicaConfig. 272 FastGenByArgs( 273 fmt.Sprintf("The SyncPointRetention:%s must be larger than %s", 274 c.SyncPointRetention.String(), 275 minSyncPointRetention.String())) 276 } 277 } 278 if c.MemoryQuota == uint64(0) { 279 c.FixMemoryQuota() 280 } 281 if c.Scheduler == nil { 282 c.FixScheduler(false) 283 } else { 284 err := c.Scheduler.Validate() 285 if err != nil { 286 return err 287 } 288 } 289 // TODO: Remove the hack once span replication is compatible with all sinks. 290 if !isSinkCompatibleWithSpanReplication(sinkURI) { 291 c.Scheduler.EnableTableAcrossNodes = false 292 } 293 294 if c.Integrity != nil { 295 switch strings.ToLower(sinkURI.Scheme) { 296 case sink.KafkaScheme, sink.KafkaSSLScheme: 297 default: 298 if c.Integrity.Enabled() { 299 log.Warn("integrity checksum only support kafka sink now, disable integrity") 300 c.Integrity.IntegrityCheckLevel = integrity.CheckLevelNone 301 } 302 } 303 304 if err := c.Integrity.Validate(); err != nil { 305 return err 306 } 307 308 if c.Integrity.Enabled() && len(c.Sink.ColumnSelectors) != 0 { 309 log.Error("it's not allowed to enable the integrity check and column selector at the same time") 310 return cerror.ErrInvalidReplicaConfig.GenWithStack( 311 "integrity check enabled and column selector set, not allowed") 312 313 } 314 } 315 316 if c.ChangefeedErrorStuckDuration != nil && 317 *c.ChangefeedErrorStuckDuration < minChangeFeedErrorStuckDuration { 318 return cerror.ErrInvalidReplicaConfig. 319 FastGenByArgs( 320 fmt.Sprintf("The ChangefeedErrorStuckDuration:%f must be larger than %f Seconds", 321 c.ChangefeedErrorStuckDuration.Seconds(), 322 minChangeFeedErrorStuckDuration.Seconds())) 323 } 324 325 return nil 326 } 327 328 // FixScheduler adjusts scheduler to default value 329 func (c *ReplicaConfig) FixScheduler(inheritV66 bool) { 330 if c.Scheduler == nil { 331 c.Scheduler = defaultReplicaConfig.Clone().Scheduler 332 return 333 } 334 if inheritV66 && c.Scheduler.RegionPerSpan != 0 { 335 c.Scheduler.EnableTableAcrossNodes = true 336 c.Scheduler.RegionThreshold = c.Scheduler.RegionPerSpan 337 c.Scheduler.RegionPerSpan = 0 338 } 339 } 340 341 // FixMemoryQuota adjusts memory quota to default value 342 func (c *ReplicaConfig) FixMemoryQuota() { 343 c.MemoryQuota = DefaultChangefeedMemoryQuota 344 } 345 346 // isSinkCompatibleWithSpanReplication returns true if the sink uri is 347 // compatible with span replication. 348 func isSinkCompatibleWithSpanReplication(u *url.URL) bool { 349 return u != nil && 350 (strings.Contains(u.Scheme, "kafka") || strings.Contains(u.Scheme, "blackhole")) 351 } 352 353 // MaskSensitiveData masks sensitive data in ReplicaConfig 354 func (c *ReplicaConfig) MaskSensitiveData() { 355 if c.Sink != nil { 356 c.Sink.MaskSensitiveData() 357 } 358 if c.Consistent != nil { 359 c.Consistent.MaskSensitiveData() 360 } 361 }