github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/engine/jobmaster/dm/config/config.go (about) 1 // Copyright 2022 PingCAP, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package config 15 16 import ( 17 "context" 18 "os" 19 "time" 20 21 "github.com/dustin/go-humanize" 22 "github.com/google/uuid" 23 "github.com/pingcap/tidb/pkg/util/filter" 24 router "github.com/pingcap/tidb/pkg/util/table-router" 25 "github.com/pingcap/tiflow/dm/config" 26 dmconfig "github.com/pingcap/tiflow/dm/config" 27 "github.com/pingcap/tiflow/dm/config/dbconfig" 28 "github.com/pingcap/tiflow/dm/master" 29 bf "github.com/pingcap/tiflow/pkg/binlog-filter" 30 "github.com/pingcap/tiflow/pkg/column-mapping" 31 "github.com/pingcap/tiflow/pkg/errors" 32 "go.uber.org/atomic" 33 "gopkg.in/yaml.v2" 34 ) 35 36 // UpstreamCfg copies the needed fields from DM SourceCfg and MySQLInstance part 37 // of DM task config. 38 type UpstreamCfg struct { 39 dmconfig.MySQLInstance `yaml:",inline" toml:",inline" json:",inline"` 40 DBCfg *dbconfig.DBConfig `yaml:"db-config" toml:"db-config" json:"db-config"` 41 ServerID uint32 `yaml:"server-id" toml:"server-id" json:"server-id"` 42 Flavor string `yaml:"flavor" toml:"flavor" json:"flavor"` 43 EnableGTID bool `yaml:"enable-gtid" toml:"enable-gtid" json:"enable-gtid"` 44 CaseSensitive bool `yaml:"case-sensitive" toml:"case-sensitive" json:"case-sensitive"` 45 } 46 47 func (u *UpstreamCfg) fromDMSourceConfig(from *dmconfig.SourceConfig) { 48 u.DBCfg = from.From.Clone() 49 u.ServerID = from.ServerID 50 u.Flavor = from.Flavor 51 u.EnableGTID = from.EnableGTID 52 u.CaseSensitive = from.CaseSensitive 53 } 54 55 func (u *UpstreamCfg) toDMSourceConfig() *dmconfig.SourceConfig { 56 ret := dmconfig.NewSourceConfig() 57 ret.SourceID = u.SourceID 58 ret.From = *u.DBCfg.Clone() 59 ret.ServerID = u.ServerID 60 ret.Flavor = u.Flavor 61 ret.EnableGTID = u.EnableGTID 62 63 return ret 64 } 65 66 func (u *UpstreamCfg) adjust() error { 67 ctx, cancel := context.WithTimeout(context.Background(), time.Minute) 68 defer cancel() 69 dmSource := u.toDMSourceConfig() 70 err := master.CheckAndAdjustSourceConfigFunc(ctx, dmSource) 71 if err != nil { 72 return err 73 } 74 u.fromDMSourceConfig(dmSource) 75 return nil 76 } 77 78 // JobCfg copies from SubTaskConfig and removes some deprecated fields. 79 // It represents a DM subtask with multiple source configs embedded as Upstreams. 80 // DISCUSS: support command line args. e.g. --start-time. 81 type JobCfg struct { 82 TaskMode string `yaml:"task-mode" toml:"task-mode" json:"task-mode"` 83 ShardMode string `yaml:"shard-mode" toml:"shard-mode" json:"shard-mode"` // when `shard-mode` set, we always enable sharding support. 84 StrictOptimisticShardMode bool `yaml:"strict-optimistic-shard-mode" toml:"strict-optimistic-shard-mode" json:"strict-optimistic-shard-mode"` 85 IgnoreCheckingItems []string `yaml:"ignore-checking-items" toml:"ignore-checking-items" json:"ignore-checking-items"` 86 Timezone string `yaml:"timezone" toml:"timezone" json:"timezone"` 87 CollationCompatible string `yaml:"collation_compatible" toml:"collation_compatible" json:"collation_compatible"` 88 TargetDB *dbconfig.DBConfig `yaml:"target-database" toml:"target-database" json:"target-database"` 89 ShadowTableRules []string `yaml:"shadow-table-rules" toml:"shadow-table-rules" json:"shadow-table-rules"` 90 TrashTableRules []string `yaml:"trash-table-rules" toml:"trash-table-rules" json:"trash-table-rules"` 91 Filters map[string]*bf.BinlogEventRule `yaml:"filters" toml:"filters" json:"filters"` 92 ExprFilter map[string]*dmconfig.ExpressionFilter `yaml:"expression-filter" toml:"expression-filter" json:"expression-filter"` 93 BAList map[string]*filter.Rules `yaml:"block-allow-list" toml:"block-allow-list" json:"block-allow-list"` 94 Mydumpers map[string]*dmconfig.MydumperConfig `yaml:"mydumpers" toml:"mydumpers" json:"mydumpers"` 95 Loaders map[string]*dmconfig.LoaderConfig `yaml:"loaders" toml:"loaders" json:"loaders"` 96 Syncers map[string]*dmconfig.SyncerConfig `yaml:"syncers" toml:"syncers" json:"syncers"` 97 Routes map[string]*router.TableRule `yaml:"routes" toml:"routes" json:"routes"` 98 Validators map[string]*dmconfig.ValidatorConfig `yaml:"validators" toml:"validators" json:"validators"` 99 // remove source config, use db config instead. 100 Upstreams []*UpstreamCfg `yaml:"upstreams" toml:"upstreams" json:"upstreams"` 101 102 // no need experimental features? 103 Experimental struct { 104 AsyncCheckpointFlush bool `yaml:"async-checkpoint-flush" toml:"async-checkpoint-flush" json:"async-checkpoint-flush"` 105 } `yaml:"experimental" toml:"experimental" json:"experimental"` 106 107 // remove them later 108 MetaSchema string `yaml:"meta-schema" toml:"meta-schema" json:"meta-schema"` 109 OnlineDDL bool `yaml:"online-ddl" toml:"online-ddl" json:"online-ddl"` 110 ColumnMappings map[string]*column.Rule `yaml:"column-mappings" toml:"column-mappings" json:"column-mappings"` 111 112 // removed 113 // CleanDumpFile bool `yaml:"clean-dump-file" toml:"clean-dump-file" json:"clean-dump-file"` 114 115 // deprecated 116 // IsSharding bool `yaml:"is-sharding" toml:"is-sharding" json:"is-sharding"` 117 // EnableHeartbeat bool `yaml:"enable-heartbeat" toml:"enable-heartbeat" json:"enable-heartbeat"` 118 // HeartbeatUpdateInterval int `yaml:"heartbeat-update-interval" toml:"heartbeat-update-interval" json:"heartbeat-update-interval"` 119 // HeartbeatReportInterval int `yaml:"heartbeat-report-interval" toml:"heartbeat-report-interval" json:"heartbeat-report-interval"` 120 // pt/gh-ost name rule,support regex 121 // OnlineDDLScheme string `yaml:"online-ddl-scheme" toml:"online-ddl-scheme" json:"online-ddl-scheme"` 122 // BWList map[string]*filter.Rules `yaml:"black-white-list" toml:"black-white-list" json:"black-white-list"` 123 // EnableANSIQuotes bool `yaml:"ansi-quotes" toml:"ansi-quotes" json:"ansi-quotes"` 124 // RemoveMeta bool `yaml:"remove-meta"` 125 126 ModRevision uint64 `yaml:"mod-revision" toml:"mod-revision" json:"mod-revision"` 127 } 128 129 // DecodeFile reads file content from a given path and decodes it. 130 func (c *JobCfg) DecodeFile(fpath string) error { 131 bs, err := os.ReadFile(fpath) 132 if err != nil { 133 return errors.Trace(err) 134 } 135 return c.Decode(bs) 136 } 137 138 // Decode unmarshals the content into JobCfg and calls adjust() on it. 139 // TODO: unify config type 140 // Now, dmJobmaster use yaml, dmWorker use toml, and lib use json... 141 func (c *JobCfg) Decode(content []byte) error { 142 if err := yaml.UnmarshalStrict(content, c); err != nil { 143 return err 144 } 145 return c.adjust() 146 } 147 148 // Yaml serializes the JobCfg into a YAML document. 149 func (c *JobCfg) Yaml() ([]byte, error) { 150 return yaml.Marshal(c) 151 } 152 153 // Clone returns a deep copy of JobCfg 154 func (c *JobCfg) Clone() (*JobCfg, error) { 155 content, err := c.Yaml() 156 if err != nil { 157 return nil, err 158 } 159 clone := &JobCfg{} 160 err = yaml.Unmarshal(content, clone) 161 return clone, err 162 } 163 164 // ToTaskCfgs converts job config to a map, mapping from upstream source id 165 // to task config. 166 func (c *JobCfg) ToTaskCfgs() map[string]*TaskCfg { 167 taskCfgs := make(map[string]*TaskCfg, len(c.Upstreams)) 168 for _, mysqlInstance := range c.Upstreams { 169 taskCfg := c.ToTaskCfg() 170 taskCfg.Upstreams = []*UpstreamCfg{mysqlInstance} 171 taskCfgs[mysqlInstance.SourceID] = taskCfg 172 } 173 return taskCfgs 174 } 175 176 // FromTaskCfgs converts task configs to a jobCfg. 177 func FromTaskCfgs(taskCfgs []*TaskCfg) *JobCfg { 178 if len(taskCfgs) == 0 { 179 return nil 180 } 181 182 jobCfg := taskCfgs[0].ToJobCfg() 183 // nolint:errcheck 184 jobCfg, _ = jobCfg.Clone() 185 for i := 1; i < len(taskCfgs); i++ { 186 jobCfg.Upstreams = append(jobCfg.Upstreams, taskCfgs[i].Upstreams...) 187 } 188 return jobCfg 189 } 190 191 // toDMTaskConfig transform a jobCfg to DM TaskCfg. 192 func (c *JobCfg) toDMTaskConfig() (*dmconfig.TaskConfig, error) { 193 dmTaskCfg := dmconfig.NewTaskConfig() 194 // set task name for verify 195 // we will replace task name with job-id when create dm-worker 196 dmTaskCfg.Name = "engine_task" 197 198 // Copy all the fields contained in dmTaskCfg. 199 content, err := c.Yaml() 200 if err != nil { 201 return nil, err 202 } 203 if err = yaml.Unmarshal(content, dmTaskCfg); err != nil { 204 return nil, err 205 } 206 207 // transform all the fields not contained in dmTaskCfg. 208 for _, upstream := range c.Upstreams { 209 if err = upstream.adjust(); err != nil { 210 return nil, err 211 } 212 dmTaskCfg.MySQLInstances = append(dmTaskCfg.MySQLInstances, &upstream.MySQLInstance) 213 } 214 return dmTaskCfg, nil 215 } 216 217 func (c *JobCfg) fromDMTaskConfig(dmTaskCfg *dmconfig.TaskConfig) error { 218 // Copy all the fields contained in jobCfg. 219 return yaml.Unmarshal([]byte(dmTaskCfg.String()), c) 220 221 // transform all the fields not contained in dmTaskCfg. 222 // no need to transform mysqlInstance because we use reference above. 223 // nothing now. 224 } 225 226 func (c *JobCfg) adjust() error { 227 if err := c.verifySourceID(); err != nil { 228 return err 229 } 230 dmTaskCfg, err := c.toDMTaskConfig() 231 if err != nil { 232 return err 233 } 234 if err := dmTaskCfg.Adjust(); err != nil { 235 return err 236 } 237 return c.fromDMTaskConfig(dmTaskCfg) 238 } 239 240 func (c *JobCfg) verifySourceID() error { 241 sourceIDs := make(map[string]struct{}) 242 for i, upstream := range c.Upstreams { 243 if upstream.SourceID == "" { 244 return errors.Errorf("source-id of %s upstream is empty", humanize.Ordinal(i+1)) 245 } 246 if _, ok := sourceIDs[upstream.SourceID]; ok { 247 return errors.Errorf("source-id %s is duplicated", upstream.SourceID) 248 } 249 sourceIDs[upstream.SourceID] = struct{}{} 250 } 251 return nil 252 } 253 254 // ToTaskCfg converts JobCfg to TaskCfg. 255 func (c *JobCfg) ToTaskCfg() *TaskCfg { 256 // nolint:errcheck 257 clone, _ := c.Clone() 258 return &TaskCfg{ 259 JobCfg: *clone, 260 } 261 } 262 263 // TaskCfg shares same struct as JobCfg, but it only serves one upstream. 264 // TaskCfg can be converted to an equivalent DM subtask by ToDMSubTaskCfg. 265 // TaskCfg add some internal config for jobmaster/worker. 266 type TaskCfg struct { 267 JobCfg 268 269 // FIXME: remove this item after fix https://github.com/pingcap/tiflow/issues/7304 270 NeedExtStorage bool 271 } 272 273 // ToJobCfg converts TaskCfg to JobCfg. 274 func (c *TaskCfg) ToJobCfg() *JobCfg { 275 // nolint:errcheck 276 clone, _ := c.JobCfg.Clone() 277 return clone 278 } 279 280 // ToDMSubTaskCfg adapts a TaskCfg to a SubTaskCfg for worker now. 281 // TODO: fully support all fields 282 func (c *TaskCfg) ToDMSubTaskCfg(jobID string) *dmconfig.SubTaskConfig { 283 cfg := &dmconfig.SubTaskConfig{} 284 cfg.ShardMode = c.ShardMode 285 cfg.StrictOptimisticShardMode = c.StrictOptimisticShardMode 286 cfg.OnlineDDL = c.OnlineDDL 287 cfg.ShadowTableRules = c.ShadowTableRules 288 cfg.TrashTableRules = c.TrashTableRules 289 cfg.CollationCompatible = c.CollationCompatible 290 cfg.Name = jobID 291 cfg.Mode = c.TaskMode 292 cfg.IgnoreCheckingItems = c.IgnoreCheckingItems 293 // TODO: remove this after relay only supports configure in source config 294 // ignore check MetaPositionChecking first because we can't make sure whether relay is enabled 295 needIgnoreMetaChecking := true 296 for _, ignoreCheckingItem := range cfg.IgnoreCheckingItems { 297 if ignoreCheckingItem == config.MetaPositionChecking || ignoreCheckingItem == config.AllChecking { 298 needIgnoreMetaChecking = false 299 break 300 } 301 } 302 if needIgnoreMetaChecking { 303 cfg.IgnoreCheckingItems = append(c.IgnoreCheckingItems, config.MetaPositionChecking) 304 } 305 cfg.MetaSchema = c.MetaSchema 306 cfg.Timezone = c.Timezone 307 cfg.To = *c.TargetDB 308 cfg.Experimental = c.Experimental 309 cfg.CollationCompatible = c.CollationCompatible 310 cfg.BAList = c.BAList[c.Upstreams[0].BAListName] 311 312 cfg.SourceID = c.Upstreams[0].SourceID 313 cfg.Meta = c.Upstreams[0].Meta 314 cfg.From = *c.Upstreams[0].DBCfg 315 cfg.ServerID = c.Upstreams[0].ServerID 316 cfg.Flavor = c.Upstreams[0].Flavor 317 cfg.CaseSensitive = c.Upstreams[0].CaseSensitive 318 319 cfg.RouteRules = make([]*router.TableRule, len(c.Upstreams[0].RouteRules)) 320 for j, name := range c.Upstreams[0].RouteRules { 321 cfg.RouteRules[j] = c.Routes[name] 322 } 323 324 cfg.FilterRules = make([]*bf.BinlogEventRule, len(c.Upstreams[0].FilterRules)) 325 for j, name := range c.Upstreams[0].FilterRules { 326 cfg.FilterRules[j] = c.Filters[name] 327 } 328 329 cfg.ExprFilter = make([]*dmconfig.ExpressionFilter, len(c.Upstreams[0].ExpressionFilters)) 330 for j, name := range c.Upstreams[0].ExpressionFilters { 331 cfg.ExprFilter[j] = c.ExprFilter[name] 332 } 333 334 cfg.MydumperConfig = *c.Upstreams[0].Mydumper 335 cfg.LoaderConfig = *c.Upstreams[0].Loader 336 cfg.SyncerConfig = *c.Upstreams[0].Syncer 337 cfg.IOTotalBytes = atomic.NewUint64(0) 338 cfg.DumpIOTotalBytes = atomic.NewUint64(0) 339 cfg.UUID = uuid.NewString() 340 cfg.DumpUUID = uuid.NewString() 341 342 return cfg 343 }