github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/dm/config/subtask.go (about) 1 // Copyright 2019 PingCAP, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package config 15 16 import ( 17 "bytes" 18 "context" 19 "database/sql" 20 _ "embed" 21 "encoding/json" 22 "flag" 23 "fmt" 24 "net/url" 25 "regexp" 26 "strconv" 27 "strings" 28 "time" 29 30 "github.com/BurntSushi/toml" 31 extstorage "github.com/pingcap/tidb/br/pkg/storage" 32 "github.com/pingcap/tidb/pkg/util/dbutil" 33 "github.com/pingcap/tidb/pkg/util/filter" 34 regexprrouter "github.com/pingcap/tidb/pkg/util/regexpr-router" 35 router "github.com/pingcap/tidb/pkg/util/table-router" 36 "github.com/pingcap/tiflow/dm/config/dbconfig" 37 "github.com/pingcap/tiflow/dm/pkg/log" 38 "github.com/pingcap/tiflow/dm/pkg/storage" 39 "github.com/pingcap/tiflow/dm/pkg/terror" 40 "github.com/pingcap/tiflow/dm/pkg/utils" 41 "github.com/pingcap/tiflow/engine/pkg/promutil" 42 bf "github.com/pingcap/tiflow/pkg/binlog-filter" 43 "github.com/pingcap/tiflow/pkg/column-mapping" 44 "github.com/pingcap/tiflow/pkg/version" 45 "go.uber.org/atomic" 46 "go.uber.org/zap" 47 ) 48 49 // task modes. 50 const ( 51 ModeAll = "all" 52 ModeFull = "full" 53 ModeIncrement = "incremental" 54 ModeDump = "dump" 55 ModeLoadSync = "load&sync" 56 57 DefaultShadowTableRules = "^_(.+)_(?:new|gho)$" 58 DefaultTrashTableRules = "^_(.+)_(?:ghc|del|old)$" 59 60 ShadowTableRules = "shadow-table-rules" 61 TrashTableRules = "trash-table-rules" 62 TiDBLightningCheckpointPrefix = "tidb_lightning_checkpoint_" 63 ) 64 65 // FetchTimeZoneSetting fetch target db global time_zone setting. 66 // TODO: move GetTimeZoneOffset and FormatTimeZoneOffset from TiDB to tiflow. 67 func FetchTimeZoneSetting(ctx context.Context, db *sql.DB) (string, error) { 68 dur, err := dbutil.GetTimeZoneOffset(ctx, db) 69 if err != nil { 70 return "", err 71 } 72 return dbutil.FormatTimeZoneOffset(dur), nil 73 } 74 75 // GetDBConfigForTest is a helper function to get db config for unit test . 76 func GetDBConfigForTest() dbconfig.DBConfig { 77 return dbconfig.DBConfig{Host: "localhost", User: "root", Password: "not a real password", Port: 3306} 78 } 79 80 // SubTaskConfig is the configuration for SubTask. 81 type SubTaskConfig struct { 82 // BurntSushi/toml seems have a bug for flag "-" 83 // when doing encoding, if we use `toml:"-"`, it still try to encode it 84 // and it will panic because of unsupported type (reflect.Func) 85 // so we should not export flagSet 86 flagSet *flag.FlagSet 87 88 // when in sharding, multi dm-workers do one task 89 IsSharding bool `toml:"is-sharding" json:"is-sharding"` 90 ShardMode string `toml:"shard-mode" json:"shard-mode"` 91 StrictOptimisticShardMode bool `toml:"strict-optimistic-shard-mode" json:"strict-optimistic-shard-mode"` 92 OnlineDDL bool `toml:"online-ddl" json:"online-ddl"` 93 94 // pt/gh-ost name rule, support regex 95 ShadowTableRules []string `yaml:"shadow-table-rules" toml:"shadow-table-rules" json:"shadow-table-rules"` 96 TrashTableRules []string `yaml:"trash-table-rules" toml:"trash-table-rules" json:"trash-table-rules"` 97 98 // deprecated 99 OnlineDDLScheme string `toml:"online-ddl-scheme" json:"online-ddl-scheme"` 100 101 // handle schema/table name mode, and only for schema/table name/pattern 102 // if case insensitive, we would convert schema/table name/pattern to lower case 103 CaseSensitive bool `toml:"case-sensitive" json:"case-sensitive"` 104 105 // default "loose" handle create sql by original sql, will not add default collation as upstream 106 // "strict" will add default collation as upstream, and downstream will occur error when downstream don't support 107 CollationCompatible string `yaml:"collation_compatible" toml:"collation_compatible" json:"collation_compatible"` 108 109 Name string `toml:"name" json:"name"` 110 Mode string `toml:"mode" json:"mode"` 111 // treat it as hidden configuration 112 IgnoreCheckingItems []string `toml:"ignore-checking-items" json:"ignore-checking-items"` 113 // it represents a MySQL/MariaDB instance or a replica group 114 SourceID string `toml:"source-id" json:"source-id"` 115 ServerID uint32 `toml:"server-id" json:"server-id"` 116 Flavor string `toml:"flavor" json:"flavor"` 117 MetaSchema string `toml:"meta-schema" json:"meta-schema"` 118 // deprecated 119 HeartbeatUpdateInterval int `toml:"heartbeat-update-interval" json:"heartbeat-update-interval"` 120 // deprecated 121 HeartbeatReportInterval int `toml:"heartbeat-report-interval" json:"heartbeat-report-interval"` 122 // deprecated 123 EnableHeartbeat bool `toml:"enable-heartbeat" json:"enable-heartbeat"` 124 Timezone string `toml:"timezone" json:"timezone"` 125 126 Meta *Meta `toml:"meta" json:"meta"` 127 128 // RelayDir get value from dm-worker config 129 RelayDir string `toml:"relay-dir" json:"relay-dir"` 130 131 // UseRelay get value from dm-worker's relayEnabled 132 UseRelay bool `toml:"use-relay" json:"use-relay"` 133 From dbconfig.DBConfig `toml:"from" json:"from"` 134 To dbconfig.DBConfig `toml:"to" json:"to"` 135 136 RouteRules []*router.TableRule `toml:"route-rules" json:"route-rules"` 137 FilterRules []*bf.BinlogEventRule `toml:"filter-rules" json:"filter-rules"` 138 // deprecated 139 ColumnMappingRules []*column.Rule `toml:"mapping-rule" json:"mapping-rule"` 140 ExprFilter []*ExpressionFilter `yaml:"expression-filter" toml:"expression-filter" json:"expression-filter"` 141 142 // black-white-list is deprecated, use block-allow-list instead 143 BWList *filter.Rules `toml:"black-white-list" json:"black-white-list"` 144 BAList *filter.Rules `toml:"block-allow-list" json:"block-allow-list"` 145 146 MydumperConfig // Mydumper configuration 147 LoaderConfig // Loader configuration 148 SyncerConfig // Syncer configuration 149 ValidatorCfg ValidatorConfig 150 151 // compatible with standalone dm unit 152 LogLevel string `toml:"log-level" json:"log-level"` 153 LogFile string `toml:"log-file" json:"log-file"` 154 LogFormat string `toml:"log-format" json:"log-format"` 155 LogRotate string `toml:"log-rotate" json:"log-rotate"` 156 157 PprofAddr string `toml:"pprof-addr" json:"pprof-addr"` 158 StatusAddr string `toml:"status-addr" json:"status-addr"` 159 160 ConfigFile string `toml:"-" json:"config-file"` 161 162 CleanDumpFile bool `toml:"clean-dump-file" json:"clean-dump-file"` 163 164 // deprecated, will auto discover SQL mode 165 EnableANSIQuotes bool `toml:"ansi-quotes" json:"ansi-quotes"` 166 167 // still needed by Syncer / Loader bin 168 printVersion bool 169 170 // which DM worker is running the subtask, this will be injected when the real worker starts running the subtask(StartSubTask). 171 WorkerName string `toml:"-" json:"-"` 172 // task experimental configs 173 Experimental struct { 174 AsyncCheckpointFlush bool `yaml:"async-checkpoint-flush" toml:"async-checkpoint-flush" json:"async-checkpoint-flush"` 175 } `yaml:"experimental" toml:"experimental" json:"experimental"` 176 177 // members below are injected by dataflow engine 178 ExtStorage extstorage.ExternalStorage `toml:"-" json:"-"` 179 MetricsFactory promutil.Factory `toml:"-" json:"-"` 180 FrameworkLogger *zap.Logger `toml:"-" json:"-"` 181 // members below are injected by dataflow engine, UUID should be unique in 182 // one go runtime. 183 // IOTotalBytes is used build TCPConnWithIOCounter and UUID is used to as a 184 // key to let MySQL driver to find the right TCPConnWithIOCounter. 185 UUID string `toml:"-" json:"-"` 186 IOTotalBytes *atomic.Uint64 `toml:"-" json:"-"` 187 188 // meter network usage from upstream 189 // e.g., pulling binlog 190 DumpUUID string `toml:"-" json:"-"` 191 DumpIOTotalBytes *atomic.Uint64 `toml:"-" json:"-"` 192 } 193 194 // SampleSubtaskConfig is the content of subtask.toml in current folder. 195 // 196 //go:embed subtask.toml 197 var SampleSubtaskConfig string 198 199 // NewSubTaskConfig creates a new SubTaskConfig. 200 func NewSubTaskConfig() *SubTaskConfig { 201 cfg := &SubTaskConfig{} 202 return cfg 203 } 204 205 // GetFlagSet provides the pointer of subtask's flag set. 206 func (c *SubTaskConfig) GetFlagSet() *flag.FlagSet { 207 return c.flagSet 208 } 209 210 // SetFlagSet writes back the flag set. 211 func (c *SubTaskConfig) SetFlagSet(flagSet *flag.FlagSet) { 212 c.flagSet = flagSet 213 } 214 215 // String returns the config's json string. 216 func (c *SubTaskConfig) String() string { 217 cfg, err := json.Marshal(c) 218 if err != nil { 219 log.L().Error("marshal subtask config to json", zap.String("task", c.Name), log.ShortError(err)) 220 } 221 return string(cfg) 222 } 223 224 // Toml returns TOML format representation of config. 225 func (c *SubTaskConfig) Toml() (string, error) { 226 var b bytes.Buffer 227 enc := toml.NewEncoder(&b) 228 if err := enc.Encode(c); err != nil { 229 return "", terror.ErrConfigTomlTransform.Delegate(err, "encode subtask config") 230 } 231 return b.String(), nil 232 } 233 234 // DecodeFile loads and decodes config from file. 235 func (c *SubTaskConfig) DecodeFile(fpath string, verifyDecryptPassword bool) error { 236 _, err := toml.DecodeFile(fpath, c) 237 if err != nil { 238 return terror.ErrConfigTomlTransform.Delegate(err, "decode subtask config from file") 239 } 240 241 return c.Adjust(verifyDecryptPassword) 242 } 243 244 // Decode loads config from file data. 245 func (c *SubTaskConfig) Decode(data string, verifyDecryptPassword bool) error { 246 if _, err := toml.Decode(data, c); err != nil { 247 return terror.ErrConfigTomlTransform.Delegate(err, "decode subtask config from data") 248 } 249 250 return c.Adjust(verifyDecryptPassword) 251 } 252 253 func adjustOnlineTableRules(ruleType string, rules []string) ([]string, error) { 254 adjustedRules := make([]string, 0, len(rules)) 255 for _, r := range rules { 256 if !strings.HasPrefix(r, "^") { 257 r = "^" + r 258 } 259 260 if !strings.HasSuffix(r, "$") { 261 r += "$" 262 } 263 264 p, err := regexp.Compile(r) 265 if err != nil { 266 return rules, terror.ErrConfigOnlineDDLInvalidRegex.Generate(ruleType, r, "fail to compile: "+err.Error()) 267 } 268 if p.NumSubexp() != 1 { 269 return rules, terror.ErrConfigOnlineDDLInvalidRegex.Generate(ruleType, r, "rule isn't contains exactly one submatch") 270 } 271 adjustedRules = append(adjustedRules, r) 272 } 273 return adjustedRules, nil 274 } 275 276 // Adjust adjusts and verifies configs. 277 func (c *SubTaskConfig) Adjust(verifyDecryptPassword bool) error { 278 if c.Name == "" { 279 return terror.ErrConfigTaskNameEmpty.Generate() 280 } 281 282 if c.SourceID == "" { 283 return terror.ErrConfigEmptySourceID.Generate() 284 } 285 if len(c.SourceID) > MaxSourceIDLength { 286 return terror.ErrConfigTooLongSourceID.Generate() 287 } 288 289 if c.ShardMode != "" && c.ShardMode != ShardPessimistic && c.ShardMode != ShardOptimistic { 290 return terror.ErrConfigShardModeNotSupport.Generate(c.ShardMode) 291 } else if c.ShardMode == "" && c.IsSharding { 292 c.ShardMode = ShardPessimistic // use the pessimistic mode as default for back compatible. 293 } 294 if c.StrictOptimisticShardMode && c.ShardMode != ShardOptimistic { 295 return terror.ErrConfigStrictOptimisticShardMode.Generate() 296 } 297 298 if len(c.ColumnMappingRules) > 0 { 299 return terror.ErrConfigColumnMappingDeprecated.Generate() 300 } 301 302 if c.OnlineDDLScheme != "" && c.OnlineDDLScheme != PT && c.OnlineDDLScheme != GHOST { 303 return terror.ErrConfigOnlineSchemeNotSupport.Generate(c.OnlineDDLScheme) 304 } else if c.OnlineDDLScheme == PT || c.OnlineDDLScheme == GHOST { 305 c.OnlineDDL = true 306 log.L().Warn("'online-ddl-scheme' will be deprecated soon. Recommend that use online-ddl instead of online-ddl-scheme.") 307 } 308 if len(c.ShadowTableRules) == 0 { 309 c.ShadowTableRules = []string{DefaultShadowTableRules} 310 } else { 311 shadowTableRule, err := adjustOnlineTableRules(ShadowTableRules, c.ShadowTableRules) 312 if err != nil { 313 return err 314 } 315 c.ShadowTableRules = shadowTableRule 316 } 317 318 if len(c.TrashTableRules) == 0 { 319 c.TrashTableRules = []string{DefaultTrashTableRules} 320 } else { 321 trashTableRule, err := adjustOnlineTableRules(TrashTableRules, c.TrashTableRules) 322 if err != nil { 323 return err 324 } 325 c.TrashTableRules = trashTableRule 326 } 327 328 if c.MetaSchema == "" { 329 c.MetaSchema = defaultMetaSchema 330 } 331 332 // adjust dir, no need to do for load&sync mode because it needs its own s3 repository 333 if HasLoad(c.Mode) && c.Mode != ModeLoadSync { 334 // check 335 isS3 := storage.IsS3Path(c.LoaderConfig.Dir) 336 if isS3 && c.ImportMode == LoadModeLoader { 337 return terror.ErrConfigLoaderS3NotSupport.Generate(c.LoaderConfig.Dir) 338 } 339 // add suffix 340 var dirSuffix string 341 if isS3 { 342 // we will dump files to s3 dir's subdirectory 343 dirSuffix = "/" + c.Name + "." + c.SourceID 344 } else { 345 // TODO we will dump local file to dir's subdirectory, but it may have risk of compatibility, we will fix in other pr 346 dirSuffix = "." + c.Name 347 } 348 newDir, err := storage.AdjustPath(c.LoaderConfig.Dir, dirSuffix) 349 if err != nil { 350 return terror.ErrConfigLoaderDirInvalid.Delegate(err, c.LoaderConfig.Dir) 351 } 352 c.LoaderConfig.Dir = newDir 353 } 354 355 // adjust sorting dir 356 if HasLoad(c.Mode) { 357 newDir := c.LoaderConfig.Dir 358 if c.LoaderConfig.SortingDirPhysical == "" { 359 if storage.IsLocalDiskPath(newDir) { 360 // lightning will not recursively create directories, so we use same level dir 361 c.LoaderConfig.SortingDirPhysical = newDir + ".sorting" 362 } else { 363 c.LoaderConfig.SortingDirPhysical = "./sorting." + url.PathEscape(c.Name) 364 } 365 } 366 } 367 368 if c.SyncerConfig.QueueSize == 0 { 369 c.SyncerConfig.QueueSize = defaultQueueSize 370 } 371 if c.SyncerConfig.CheckpointFlushInterval == 0 { 372 c.SyncerConfig.CheckpointFlushInterval = defaultCheckpointFlushInterval 373 } 374 if c.SyncerConfig.SafeModeDuration == "" { 375 c.SyncerConfig.SafeModeDuration = strconv.Itoa(2*c.SyncerConfig.CheckpointFlushInterval) + "s" 376 } 377 if duration, err := time.ParseDuration(c.SyncerConfig.SafeModeDuration); err != nil { 378 return terror.ErrConfigInvalidSafeModeDuration.Generate(c.SyncerConfig.SafeModeDuration, err) 379 } else if c.SyncerConfig.SafeMode && duration == 0 { 380 return terror.ErrConfigConfictSafeModeDurationAndSafeMode.Generate() 381 } 382 383 c.From.AdjustWithTimeZone(c.Timezone) 384 c.To.AdjustWithTimeZone(c.Timezone) 385 386 if verifyDecryptPassword { 387 _, err1 := c.DecryptedClone() 388 if err1 != nil { 389 return err1 390 } 391 } 392 393 // only when block-allow-list is nil use black-white-list 394 if c.BAList == nil && c.BWList != nil { 395 c.BAList = c.BWList 396 } 397 398 if _, err := filter.New(c.CaseSensitive, c.BAList); err != nil { 399 return terror.ErrConfigGenBAList.Delegate(err) 400 } 401 if _, err := regexprrouter.NewRegExprRouter(c.CaseSensitive, c.RouteRules); err != nil { 402 return terror.ErrConfigGenTableRouter.Delegate(err) 403 } 404 // NewMapping will fill arguments with the default values. 405 if _, err := column.NewMapping(c.CaseSensitive, c.ColumnMappingRules); err != nil { 406 return terror.ErrConfigGenColumnMapping.Delegate(err) 407 } 408 if _, err := utils.ParseFileSize(c.MydumperConfig.ChunkFilesize, 0); err != nil { 409 return terror.ErrConfigInvalidChunkFileSize.Generate(c.MydumperConfig.ChunkFilesize) 410 } 411 412 if _, err := bf.NewBinlogEvent(c.CaseSensitive, c.FilterRules); err != nil { 413 return terror.ErrConfigBinlogEventFilter.Delegate(err) 414 } 415 if err := c.LoaderConfig.adjust(); err != nil { 416 return err 417 } 418 if err := c.ValidatorCfg.Adjust(); err != nil { 419 return err 420 } 421 422 // TODO: check every member 423 // TODO: since we checked here, we could remove other terror like ErrSyncerUnitGenBAList 424 // TODO: or we should check at task config and source config rather than this subtask config, to reduce duplication 425 426 return nil 427 } 428 429 // Parse parses flag definitions from the argument list. 430 func (c *SubTaskConfig) Parse(arguments []string, verifyDecryptPassword bool) error { 431 // Parse first to get config file. 432 err := c.flagSet.Parse(arguments) 433 if err != nil { 434 return terror.ErrConfigParseFlagSet.Delegate(err) 435 } 436 437 if c.printVersion { 438 fmt.Println(version.GetRawInfo()) 439 return flag.ErrHelp 440 } 441 442 // Load config file if specified. 443 if c.ConfigFile != "" { 444 err = c.DecodeFile(c.ConfigFile, verifyDecryptPassword) 445 if err != nil { 446 return err 447 } 448 } 449 450 // Parse again to replace with command line options. 451 err = c.flagSet.Parse(arguments) 452 if err != nil { 453 return terror.ErrConfigParseFlagSet.Delegate(err) 454 } 455 456 if len(c.flagSet.Args()) != 0 { 457 return terror.ErrConfigParseFlagSet.Generatef("'%s' is an invalid flag", c.flagSet.Arg(0)) 458 } 459 460 return c.Adjust(verifyDecryptPassword) 461 } 462 463 // DecryptedClone tries to decrypt db password in config. 464 func (c *SubTaskConfig) DecryptedClone() (*SubTaskConfig, error) { 465 clone, err := c.Clone() 466 if err != nil { 467 return nil, err 468 } 469 470 var ( 471 pswdTo string 472 pswdFrom string 473 ) 474 if len(clone.To.Password) > 0 { 475 pswdTo = utils.DecryptOrPlaintext(clone.To.Password) 476 } 477 if len(clone.From.Password) > 0 { 478 pswdFrom = utils.DecryptOrPlaintext(clone.From.Password) 479 } 480 clone.From.Password = pswdFrom 481 clone.To.Password = pswdTo 482 483 return clone, nil 484 } 485 486 // Clone returns a replica of SubTaskConfig. 487 func (c *SubTaskConfig) Clone() (*SubTaskConfig, error) { 488 content, err := c.Toml() 489 if err != nil { 490 return nil, err 491 } 492 493 clone := &SubTaskConfig{} 494 _, err = toml.Decode(content, clone) 495 if err != nil { 496 return nil, terror.ErrConfigTomlTransform.Delegate(err, "decode subtask config from data") 497 } 498 499 return clone, nil 500 }