github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/engine/jobmaster/dm/config/config.go (about)

     1  // Copyright 2022 PingCAP, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package config
    15  
    16  import (
    17  	"context"
    18  	"os"
    19  	"time"
    20  
    21  	"github.com/dustin/go-humanize"
    22  	"github.com/google/uuid"
    23  	"github.com/pingcap/tidb/pkg/util/filter"
    24  	router "github.com/pingcap/tidb/pkg/util/table-router"
    25  	"github.com/pingcap/tiflow/dm/config"
    26  	dmconfig "github.com/pingcap/tiflow/dm/config"
    27  	"github.com/pingcap/tiflow/dm/config/dbconfig"
    28  	"github.com/pingcap/tiflow/dm/master"
    29  	bf "github.com/pingcap/tiflow/pkg/binlog-filter"
    30  	"github.com/pingcap/tiflow/pkg/column-mapping"
    31  	"github.com/pingcap/tiflow/pkg/errors"
    32  	"go.uber.org/atomic"
    33  	"gopkg.in/yaml.v2"
    34  )
    35  
    36  // UpstreamCfg copies the needed fields from DM SourceCfg and MySQLInstance part
    37  // of DM task config.
    38  type UpstreamCfg struct {
    39  	dmconfig.MySQLInstance `yaml:",inline" toml:",inline" json:",inline"`
    40  	DBCfg                  *dbconfig.DBConfig `yaml:"db-config" toml:"db-config" json:"db-config"`
    41  	ServerID               uint32             `yaml:"server-id" toml:"server-id" json:"server-id"`
    42  	Flavor                 string             `yaml:"flavor" toml:"flavor" json:"flavor"`
    43  	EnableGTID             bool               `yaml:"enable-gtid" toml:"enable-gtid" json:"enable-gtid"`
    44  	CaseSensitive          bool               `yaml:"case-sensitive" toml:"case-sensitive" json:"case-sensitive"`
    45  }
    46  
    47  func (u *UpstreamCfg) fromDMSourceConfig(from *dmconfig.SourceConfig) {
    48  	u.DBCfg = from.From.Clone()
    49  	u.ServerID = from.ServerID
    50  	u.Flavor = from.Flavor
    51  	u.EnableGTID = from.EnableGTID
    52  	u.CaseSensitive = from.CaseSensitive
    53  }
    54  
    55  func (u *UpstreamCfg) toDMSourceConfig() *dmconfig.SourceConfig {
    56  	ret := dmconfig.NewSourceConfig()
    57  	ret.SourceID = u.SourceID
    58  	ret.From = *u.DBCfg.Clone()
    59  	ret.ServerID = u.ServerID
    60  	ret.Flavor = u.Flavor
    61  	ret.EnableGTID = u.EnableGTID
    62  
    63  	return ret
    64  }
    65  
    66  func (u *UpstreamCfg) adjust() error {
    67  	ctx, cancel := context.WithTimeout(context.Background(), time.Minute)
    68  	defer cancel()
    69  	dmSource := u.toDMSourceConfig()
    70  	err := master.CheckAndAdjustSourceConfigFunc(ctx, dmSource)
    71  	if err != nil {
    72  		return err
    73  	}
    74  	u.fromDMSourceConfig(dmSource)
    75  	return nil
    76  }
    77  
    78  // JobCfg copies from SubTaskConfig and removes some deprecated fields.
    79  // It represents a DM subtask with multiple source configs embedded as Upstreams.
    80  // DISCUSS: support command line args. e.g. --start-time.
    81  type JobCfg struct {
    82  	TaskMode                  string                                `yaml:"task-mode" toml:"task-mode" json:"task-mode"`
    83  	ShardMode                 string                                `yaml:"shard-mode" toml:"shard-mode" json:"shard-mode"` // when `shard-mode` set, we always enable sharding support.
    84  	StrictOptimisticShardMode bool                                  `yaml:"strict-optimistic-shard-mode" toml:"strict-optimistic-shard-mode" json:"strict-optimistic-shard-mode"`
    85  	IgnoreCheckingItems       []string                              `yaml:"ignore-checking-items" toml:"ignore-checking-items" json:"ignore-checking-items"`
    86  	Timezone                  string                                `yaml:"timezone" toml:"timezone" json:"timezone"`
    87  	CollationCompatible       string                                `yaml:"collation_compatible" toml:"collation_compatible" json:"collation_compatible"`
    88  	TargetDB                  *dbconfig.DBConfig                    `yaml:"target-database" toml:"target-database" json:"target-database"`
    89  	ShadowTableRules          []string                              `yaml:"shadow-table-rules" toml:"shadow-table-rules" json:"shadow-table-rules"`
    90  	TrashTableRules           []string                              `yaml:"trash-table-rules" toml:"trash-table-rules" json:"trash-table-rules"`
    91  	Filters                   map[string]*bf.BinlogEventRule        `yaml:"filters" toml:"filters" json:"filters"`
    92  	ExprFilter                map[string]*dmconfig.ExpressionFilter `yaml:"expression-filter" toml:"expression-filter" json:"expression-filter"`
    93  	BAList                    map[string]*filter.Rules              `yaml:"block-allow-list" toml:"block-allow-list" json:"block-allow-list"`
    94  	Mydumpers                 map[string]*dmconfig.MydumperConfig   `yaml:"mydumpers" toml:"mydumpers" json:"mydumpers"`
    95  	Loaders                   map[string]*dmconfig.LoaderConfig     `yaml:"loaders" toml:"loaders" json:"loaders"`
    96  	Syncers                   map[string]*dmconfig.SyncerConfig     `yaml:"syncers" toml:"syncers" json:"syncers"`
    97  	Routes                    map[string]*router.TableRule          `yaml:"routes" toml:"routes" json:"routes"`
    98  	Validators                map[string]*dmconfig.ValidatorConfig  `yaml:"validators" toml:"validators" json:"validators"`
    99  	// remove source config, use db config instead.
   100  	Upstreams []*UpstreamCfg `yaml:"upstreams" toml:"upstreams" json:"upstreams"`
   101  
   102  	// no need experimental features?
   103  	Experimental struct {
   104  		AsyncCheckpointFlush bool `yaml:"async-checkpoint-flush" toml:"async-checkpoint-flush" json:"async-checkpoint-flush"`
   105  	} `yaml:"experimental" toml:"experimental" json:"experimental"`
   106  
   107  	// remove them later
   108  	MetaSchema     string                  `yaml:"meta-schema" toml:"meta-schema" json:"meta-schema"`
   109  	OnlineDDL      bool                    `yaml:"online-ddl" toml:"online-ddl" json:"online-ddl"`
   110  	ColumnMappings map[string]*column.Rule `yaml:"column-mappings" toml:"column-mappings" json:"column-mappings"`
   111  
   112  	// removed
   113  	// CleanDumpFile  bool                    `yaml:"clean-dump-file" toml:"clean-dump-file" json:"clean-dump-file"`
   114  
   115  	// deprecated
   116  	// IsSharding          bool                                  `yaml:"is-sharding" toml:"is-sharding" json:"is-sharding"`
   117  	// EnableHeartbeat bool `yaml:"enable-heartbeat" toml:"enable-heartbeat" json:"enable-heartbeat"`
   118  	// HeartbeatUpdateInterval int `yaml:"heartbeat-update-interval" toml:"heartbeat-update-interval" json:"heartbeat-update-interval"`
   119  	// HeartbeatReportInterval int    `yaml:"heartbeat-report-interval" toml:"heartbeat-report-interval" json:"heartbeat-report-interval"`
   120  	// pt/gh-ost name rule,support regex
   121  	// OnlineDDLScheme string `yaml:"online-ddl-scheme" toml:"online-ddl-scheme" json:"online-ddl-scheme"`
   122  	// BWList map[string]*filter.Rules `yaml:"black-white-list" toml:"black-white-list" json:"black-white-list"`
   123  	// EnableANSIQuotes bool `yaml:"ansi-quotes" toml:"ansi-quotes" json:"ansi-quotes"`
   124  	// RemoveMeta bool `yaml:"remove-meta"`
   125  
   126  	ModRevision uint64 `yaml:"mod-revision" toml:"mod-revision" json:"mod-revision"`
   127  }
   128  
   129  // DecodeFile reads file content from a given path and decodes it.
   130  func (c *JobCfg) DecodeFile(fpath string) error {
   131  	bs, err := os.ReadFile(fpath)
   132  	if err != nil {
   133  		return errors.Trace(err)
   134  	}
   135  	return c.Decode(bs)
   136  }
   137  
   138  // Decode unmarshals the content into JobCfg and calls adjust() on it.
   139  // TODO: unify config type
   140  // Now, dmJobmaster use yaml, dmWorker use toml, and lib use json...
   141  func (c *JobCfg) Decode(content []byte) error {
   142  	if err := yaml.UnmarshalStrict(content, c); err != nil {
   143  		return err
   144  	}
   145  	return c.adjust()
   146  }
   147  
   148  // Yaml serializes the JobCfg into a YAML document.
   149  func (c *JobCfg) Yaml() ([]byte, error) {
   150  	return yaml.Marshal(c)
   151  }
   152  
   153  // Clone returns a deep copy of JobCfg
   154  func (c *JobCfg) Clone() (*JobCfg, error) {
   155  	content, err := c.Yaml()
   156  	if err != nil {
   157  		return nil, err
   158  	}
   159  	clone := &JobCfg{}
   160  	err = yaml.Unmarshal(content, clone)
   161  	return clone, err
   162  }
   163  
   164  // ToTaskCfgs converts job config to a map, mapping from upstream source id
   165  // to task config.
   166  func (c *JobCfg) ToTaskCfgs() map[string]*TaskCfg {
   167  	taskCfgs := make(map[string]*TaskCfg, len(c.Upstreams))
   168  	for _, mysqlInstance := range c.Upstreams {
   169  		taskCfg := c.ToTaskCfg()
   170  		taskCfg.Upstreams = []*UpstreamCfg{mysqlInstance}
   171  		taskCfgs[mysqlInstance.SourceID] = taskCfg
   172  	}
   173  	return taskCfgs
   174  }
   175  
   176  // FromTaskCfgs converts task configs to a jobCfg.
   177  func FromTaskCfgs(taskCfgs []*TaskCfg) *JobCfg {
   178  	if len(taskCfgs) == 0 {
   179  		return nil
   180  	}
   181  
   182  	jobCfg := taskCfgs[0].ToJobCfg()
   183  	// nolint:errcheck
   184  	jobCfg, _ = jobCfg.Clone()
   185  	for i := 1; i < len(taskCfgs); i++ {
   186  		jobCfg.Upstreams = append(jobCfg.Upstreams, taskCfgs[i].Upstreams...)
   187  	}
   188  	return jobCfg
   189  }
   190  
   191  // toDMTaskConfig transform a jobCfg to DM TaskCfg.
   192  func (c *JobCfg) toDMTaskConfig() (*dmconfig.TaskConfig, error) {
   193  	dmTaskCfg := dmconfig.NewTaskConfig()
   194  	// set task name for verify
   195  	// we will replace task name with job-id when create dm-worker
   196  	dmTaskCfg.Name = "engine_task"
   197  
   198  	// Copy all the fields contained in dmTaskCfg.
   199  	content, err := c.Yaml()
   200  	if err != nil {
   201  		return nil, err
   202  	}
   203  	if err = yaml.Unmarshal(content, dmTaskCfg); err != nil {
   204  		return nil, err
   205  	}
   206  
   207  	// transform all the fields not contained in dmTaskCfg.
   208  	for _, upstream := range c.Upstreams {
   209  		if err = upstream.adjust(); err != nil {
   210  			return nil, err
   211  		}
   212  		dmTaskCfg.MySQLInstances = append(dmTaskCfg.MySQLInstances, &upstream.MySQLInstance)
   213  	}
   214  	return dmTaskCfg, nil
   215  }
   216  
   217  func (c *JobCfg) fromDMTaskConfig(dmTaskCfg *dmconfig.TaskConfig) error {
   218  	// Copy all the fields contained in jobCfg.
   219  	return yaml.Unmarshal([]byte(dmTaskCfg.String()), c)
   220  
   221  	// transform all the fields not contained in dmTaskCfg.
   222  	// no need to transform mysqlInstance because we use reference above.
   223  	// nothing now.
   224  }
   225  
   226  func (c *JobCfg) adjust() error {
   227  	if err := c.verifySourceID(); err != nil {
   228  		return err
   229  	}
   230  	dmTaskCfg, err := c.toDMTaskConfig()
   231  	if err != nil {
   232  		return err
   233  	}
   234  	if err := dmTaskCfg.Adjust(); err != nil {
   235  		return err
   236  	}
   237  	return c.fromDMTaskConfig(dmTaskCfg)
   238  }
   239  
   240  func (c *JobCfg) verifySourceID() error {
   241  	sourceIDs := make(map[string]struct{})
   242  	for i, upstream := range c.Upstreams {
   243  		if upstream.SourceID == "" {
   244  			return errors.Errorf("source-id of %s upstream is empty", humanize.Ordinal(i+1))
   245  		}
   246  		if _, ok := sourceIDs[upstream.SourceID]; ok {
   247  			return errors.Errorf("source-id %s is duplicated", upstream.SourceID)
   248  		}
   249  		sourceIDs[upstream.SourceID] = struct{}{}
   250  	}
   251  	return nil
   252  }
   253  
   254  // ToTaskCfg converts JobCfg to TaskCfg.
   255  func (c *JobCfg) ToTaskCfg() *TaskCfg {
   256  	// nolint:errcheck
   257  	clone, _ := c.Clone()
   258  	return &TaskCfg{
   259  		JobCfg: *clone,
   260  	}
   261  }
   262  
   263  // TaskCfg shares same struct as JobCfg, but it only serves one upstream.
   264  // TaskCfg can be converted to an equivalent DM subtask by ToDMSubTaskCfg.
   265  // TaskCfg add some internal config for jobmaster/worker.
   266  type TaskCfg struct {
   267  	JobCfg
   268  
   269  	// FIXME: remove this item after fix https://github.com/pingcap/tiflow/issues/7304
   270  	NeedExtStorage bool
   271  }
   272  
   273  // ToJobCfg converts TaskCfg to JobCfg.
   274  func (c *TaskCfg) ToJobCfg() *JobCfg {
   275  	// nolint:errcheck
   276  	clone, _ := c.JobCfg.Clone()
   277  	return clone
   278  }
   279  
   280  // ToDMSubTaskCfg adapts a TaskCfg to a SubTaskCfg for worker now.
   281  // TODO: fully support all fields
   282  func (c *TaskCfg) ToDMSubTaskCfg(jobID string) *dmconfig.SubTaskConfig {
   283  	cfg := &dmconfig.SubTaskConfig{}
   284  	cfg.ShardMode = c.ShardMode
   285  	cfg.StrictOptimisticShardMode = c.StrictOptimisticShardMode
   286  	cfg.OnlineDDL = c.OnlineDDL
   287  	cfg.ShadowTableRules = c.ShadowTableRules
   288  	cfg.TrashTableRules = c.TrashTableRules
   289  	cfg.CollationCompatible = c.CollationCompatible
   290  	cfg.Name = jobID
   291  	cfg.Mode = c.TaskMode
   292  	cfg.IgnoreCheckingItems = c.IgnoreCheckingItems
   293  	// TODO: remove this after relay only supports configure in source config
   294  	// ignore check MetaPositionChecking first because we can't make sure whether relay is enabled
   295  	needIgnoreMetaChecking := true
   296  	for _, ignoreCheckingItem := range cfg.IgnoreCheckingItems {
   297  		if ignoreCheckingItem == config.MetaPositionChecking || ignoreCheckingItem == config.AllChecking {
   298  			needIgnoreMetaChecking = false
   299  			break
   300  		}
   301  	}
   302  	if needIgnoreMetaChecking {
   303  		cfg.IgnoreCheckingItems = append(c.IgnoreCheckingItems, config.MetaPositionChecking)
   304  	}
   305  	cfg.MetaSchema = c.MetaSchema
   306  	cfg.Timezone = c.Timezone
   307  	cfg.To = *c.TargetDB
   308  	cfg.Experimental = c.Experimental
   309  	cfg.CollationCompatible = c.CollationCompatible
   310  	cfg.BAList = c.BAList[c.Upstreams[0].BAListName]
   311  
   312  	cfg.SourceID = c.Upstreams[0].SourceID
   313  	cfg.Meta = c.Upstreams[0].Meta
   314  	cfg.From = *c.Upstreams[0].DBCfg
   315  	cfg.ServerID = c.Upstreams[0].ServerID
   316  	cfg.Flavor = c.Upstreams[0].Flavor
   317  	cfg.CaseSensitive = c.Upstreams[0].CaseSensitive
   318  
   319  	cfg.RouteRules = make([]*router.TableRule, len(c.Upstreams[0].RouteRules))
   320  	for j, name := range c.Upstreams[0].RouteRules {
   321  		cfg.RouteRules[j] = c.Routes[name]
   322  	}
   323  
   324  	cfg.FilterRules = make([]*bf.BinlogEventRule, len(c.Upstreams[0].FilterRules))
   325  	for j, name := range c.Upstreams[0].FilterRules {
   326  		cfg.FilterRules[j] = c.Filters[name]
   327  	}
   328  
   329  	cfg.ExprFilter = make([]*dmconfig.ExpressionFilter, len(c.Upstreams[0].ExpressionFilters))
   330  	for j, name := range c.Upstreams[0].ExpressionFilters {
   331  		cfg.ExprFilter[j] = c.ExprFilter[name]
   332  	}
   333  
   334  	cfg.MydumperConfig = *c.Upstreams[0].Mydumper
   335  	cfg.LoaderConfig = *c.Upstreams[0].Loader
   336  	cfg.SyncerConfig = *c.Upstreams[0].Syncer
   337  	cfg.IOTotalBytes = atomic.NewUint64(0)
   338  	cfg.DumpIOTotalBytes = atomic.NewUint64(0)
   339  	cfg.UUID = uuid.NewString()
   340  	cfg.DumpUUID = uuid.NewString()
   341  
   342  	return cfg
   343  }