github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/dm/config/subtask.go (about)

     1  // Copyright 2019 PingCAP, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package config
    15  
    16  import (
    17  	"bytes"
    18  	"context"
    19  	"database/sql"
    20  	_ "embed"
    21  	"encoding/json"
    22  	"flag"
    23  	"fmt"
    24  	"net/url"
    25  	"regexp"
    26  	"strconv"
    27  	"strings"
    28  	"time"
    29  
    30  	"github.com/BurntSushi/toml"
    31  	extstorage "github.com/pingcap/tidb/br/pkg/storage"
    32  	"github.com/pingcap/tidb/pkg/util/dbutil"
    33  	"github.com/pingcap/tidb/pkg/util/filter"
    34  	regexprrouter "github.com/pingcap/tidb/pkg/util/regexpr-router"
    35  	router "github.com/pingcap/tidb/pkg/util/table-router"
    36  	"github.com/pingcap/tiflow/dm/config/dbconfig"
    37  	"github.com/pingcap/tiflow/dm/pkg/log"
    38  	"github.com/pingcap/tiflow/dm/pkg/storage"
    39  	"github.com/pingcap/tiflow/dm/pkg/terror"
    40  	"github.com/pingcap/tiflow/dm/pkg/utils"
    41  	"github.com/pingcap/tiflow/engine/pkg/promutil"
    42  	bf "github.com/pingcap/tiflow/pkg/binlog-filter"
    43  	"github.com/pingcap/tiflow/pkg/column-mapping"
    44  	"github.com/pingcap/tiflow/pkg/version"
    45  	"go.uber.org/atomic"
    46  	"go.uber.org/zap"
    47  )
    48  
    49  // task modes.
    50  const (
    51  	ModeAll       = "all"
    52  	ModeFull      = "full"
    53  	ModeIncrement = "incremental"
    54  	ModeDump      = "dump"
    55  	ModeLoadSync  = "load&sync"
    56  
    57  	DefaultShadowTableRules = "^_(.+)_(?:new|gho)$"
    58  	DefaultTrashTableRules  = "^_(.+)_(?:ghc|del|old)$"
    59  
    60  	ShadowTableRules              = "shadow-table-rules"
    61  	TrashTableRules               = "trash-table-rules"
    62  	TiDBLightningCheckpointPrefix = "tidb_lightning_checkpoint_"
    63  )
    64  
    65  // FetchTimeZoneSetting fetch target db global time_zone setting.
    66  // TODO: move GetTimeZoneOffset and FormatTimeZoneOffset from TiDB to tiflow.
    67  func FetchTimeZoneSetting(ctx context.Context, db *sql.DB) (string, error) {
    68  	dur, err := dbutil.GetTimeZoneOffset(ctx, db)
    69  	if err != nil {
    70  		return "", err
    71  	}
    72  	return dbutil.FormatTimeZoneOffset(dur), nil
    73  }
    74  
    75  // GetDBConfigForTest is a helper function to get db config for unit test .
    76  func GetDBConfigForTest() dbconfig.DBConfig {
    77  	return dbconfig.DBConfig{Host: "localhost", User: "root", Password: "not a real password", Port: 3306}
    78  }
    79  
    80  // SubTaskConfig is the configuration for SubTask.
    81  type SubTaskConfig struct {
    82  	// BurntSushi/toml seems have a bug for flag "-"
    83  	// when doing encoding, if we use `toml:"-"`, it still try to encode it
    84  	// and it will panic because of unsupported type (reflect.Func)
    85  	// so we should not export flagSet
    86  	flagSet *flag.FlagSet
    87  
    88  	// when in sharding, multi dm-workers do one task
    89  	IsSharding                bool   `toml:"is-sharding" json:"is-sharding"`
    90  	ShardMode                 string `toml:"shard-mode" json:"shard-mode"`
    91  	StrictOptimisticShardMode bool   `toml:"strict-optimistic-shard-mode" json:"strict-optimistic-shard-mode"`
    92  	OnlineDDL                 bool   `toml:"online-ddl" json:"online-ddl"`
    93  
    94  	// pt/gh-ost name rule, support regex
    95  	ShadowTableRules []string `yaml:"shadow-table-rules" toml:"shadow-table-rules" json:"shadow-table-rules"`
    96  	TrashTableRules  []string `yaml:"trash-table-rules" toml:"trash-table-rules" json:"trash-table-rules"`
    97  
    98  	// deprecated
    99  	OnlineDDLScheme string `toml:"online-ddl-scheme" json:"online-ddl-scheme"`
   100  
   101  	// handle schema/table name mode, and only for schema/table name/pattern
   102  	// if case insensitive, we would convert schema/table name/pattern to lower case
   103  	CaseSensitive bool `toml:"case-sensitive" json:"case-sensitive"`
   104  
   105  	// default "loose" handle create sql by original sql, will not add default collation as upstream
   106  	// "strict" will add default collation as upstream, and downstream will occur error when downstream don't support
   107  	CollationCompatible string `yaml:"collation_compatible" toml:"collation_compatible" json:"collation_compatible"`
   108  
   109  	Name string `toml:"name" json:"name"`
   110  	Mode string `toml:"mode" json:"mode"`
   111  	//  treat it as hidden configuration
   112  	IgnoreCheckingItems []string `toml:"ignore-checking-items" json:"ignore-checking-items"`
   113  	// it represents a MySQL/MariaDB instance or a replica group
   114  	SourceID   string `toml:"source-id" json:"source-id"`
   115  	ServerID   uint32 `toml:"server-id" json:"server-id"`
   116  	Flavor     string `toml:"flavor" json:"flavor"`
   117  	MetaSchema string `toml:"meta-schema" json:"meta-schema"`
   118  	// deprecated
   119  	HeartbeatUpdateInterval int `toml:"heartbeat-update-interval" json:"heartbeat-update-interval"`
   120  	// deprecated
   121  	HeartbeatReportInterval int `toml:"heartbeat-report-interval" json:"heartbeat-report-interval"`
   122  	// deprecated
   123  	EnableHeartbeat bool   `toml:"enable-heartbeat" json:"enable-heartbeat"`
   124  	Timezone        string `toml:"timezone" json:"timezone"`
   125  
   126  	Meta *Meta `toml:"meta" json:"meta"`
   127  
   128  	// RelayDir get value from dm-worker config
   129  	RelayDir string `toml:"relay-dir" json:"relay-dir"`
   130  
   131  	// UseRelay get value from dm-worker's relayEnabled
   132  	UseRelay bool              `toml:"use-relay" json:"use-relay"`
   133  	From     dbconfig.DBConfig `toml:"from" json:"from"`
   134  	To       dbconfig.DBConfig `toml:"to" json:"to"`
   135  
   136  	RouteRules  []*router.TableRule   `toml:"route-rules" json:"route-rules"`
   137  	FilterRules []*bf.BinlogEventRule `toml:"filter-rules" json:"filter-rules"`
   138  	// deprecated
   139  	ColumnMappingRules []*column.Rule      `toml:"mapping-rule" json:"mapping-rule"`
   140  	ExprFilter         []*ExpressionFilter `yaml:"expression-filter" toml:"expression-filter" json:"expression-filter"`
   141  
   142  	// black-white-list is deprecated, use block-allow-list instead
   143  	BWList *filter.Rules `toml:"black-white-list" json:"black-white-list"`
   144  	BAList *filter.Rules `toml:"block-allow-list" json:"block-allow-list"`
   145  
   146  	MydumperConfig // Mydumper configuration
   147  	LoaderConfig   // Loader configuration
   148  	SyncerConfig   // Syncer configuration
   149  	ValidatorCfg   ValidatorConfig
   150  
   151  	// compatible with standalone dm unit
   152  	LogLevel  string `toml:"log-level" json:"log-level"`
   153  	LogFile   string `toml:"log-file" json:"log-file"`
   154  	LogFormat string `toml:"log-format" json:"log-format"`
   155  	LogRotate string `toml:"log-rotate" json:"log-rotate"`
   156  
   157  	PprofAddr  string `toml:"pprof-addr" json:"pprof-addr"`
   158  	StatusAddr string `toml:"status-addr" json:"status-addr"`
   159  
   160  	ConfigFile string `toml:"-" json:"config-file"`
   161  
   162  	CleanDumpFile bool `toml:"clean-dump-file" json:"clean-dump-file"`
   163  
   164  	// deprecated, will auto discover SQL mode
   165  	EnableANSIQuotes bool `toml:"ansi-quotes" json:"ansi-quotes"`
   166  
   167  	// still needed by Syncer / Loader bin
   168  	printVersion bool
   169  
   170  	// which DM worker is running the subtask, this will be injected when the real worker starts running the subtask(StartSubTask).
   171  	WorkerName string `toml:"-" json:"-"`
   172  	// task experimental configs
   173  	Experimental struct {
   174  		AsyncCheckpointFlush bool `yaml:"async-checkpoint-flush" toml:"async-checkpoint-flush" json:"async-checkpoint-flush"`
   175  	} `yaml:"experimental" toml:"experimental" json:"experimental"`
   176  
   177  	// members below are injected by dataflow engine
   178  	ExtStorage      extstorage.ExternalStorage `toml:"-" json:"-"`
   179  	MetricsFactory  promutil.Factory           `toml:"-" json:"-"`
   180  	FrameworkLogger *zap.Logger                `toml:"-" json:"-"`
   181  	// members below are injected by dataflow engine, UUID should be unique in
   182  	// one go runtime.
   183  	// IOTotalBytes is used build TCPConnWithIOCounter and UUID is used to as a
   184  	// key to let MySQL driver to find the right TCPConnWithIOCounter.
   185  	UUID         string         `toml:"-" json:"-"`
   186  	IOTotalBytes *atomic.Uint64 `toml:"-" json:"-"`
   187  
   188  	// meter network usage from upstream
   189  	// e.g., pulling binlog
   190  	DumpUUID         string         `toml:"-" json:"-"`
   191  	DumpIOTotalBytes *atomic.Uint64 `toml:"-" json:"-"`
   192  }
   193  
   194  // SampleSubtaskConfig is the content of subtask.toml in current folder.
   195  //
   196  //go:embed subtask.toml
   197  var SampleSubtaskConfig string
   198  
   199  // NewSubTaskConfig creates a new SubTaskConfig.
   200  func NewSubTaskConfig() *SubTaskConfig {
   201  	cfg := &SubTaskConfig{}
   202  	return cfg
   203  }
   204  
   205  // GetFlagSet provides the pointer of subtask's flag set.
   206  func (c *SubTaskConfig) GetFlagSet() *flag.FlagSet {
   207  	return c.flagSet
   208  }
   209  
   210  // SetFlagSet writes back the flag set.
   211  func (c *SubTaskConfig) SetFlagSet(flagSet *flag.FlagSet) {
   212  	c.flagSet = flagSet
   213  }
   214  
   215  // String returns the config's json string.
   216  func (c *SubTaskConfig) String() string {
   217  	cfg, err := json.Marshal(c)
   218  	if err != nil {
   219  		log.L().Error("marshal subtask config to json", zap.String("task", c.Name), log.ShortError(err))
   220  	}
   221  	return string(cfg)
   222  }
   223  
   224  // Toml returns TOML format representation of config.
   225  func (c *SubTaskConfig) Toml() (string, error) {
   226  	var b bytes.Buffer
   227  	enc := toml.NewEncoder(&b)
   228  	if err := enc.Encode(c); err != nil {
   229  		return "", terror.ErrConfigTomlTransform.Delegate(err, "encode subtask config")
   230  	}
   231  	return b.String(), nil
   232  }
   233  
   234  // DecodeFile loads and decodes config from file.
   235  func (c *SubTaskConfig) DecodeFile(fpath string, verifyDecryptPassword bool) error {
   236  	_, err := toml.DecodeFile(fpath, c)
   237  	if err != nil {
   238  		return terror.ErrConfigTomlTransform.Delegate(err, "decode subtask config from file")
   239  	}
   240  
   241  	return c.Adjust(verifyDecryptPassword)
   242  }
   243  
   244  // Decode loads config from file data.
   245  func (c *SubTaskConfig) Decode(data string, verifyDecryptPassword bool) error {
   246  	if _, err := toml.Decode(data, c); err != nil {
   247  		return terror.ErrConfigTomlTransform.Delegate(err, "decode subtask config from data")
   248  	}
   249  
   250  	return c.Adjust(verifyDecryptPassword)
   251  }
   252  
   253  func adjustOnlineTableRules(ruleType string, rules []string) ([]string, error) {
   254  	adjustedRules := make([]string, 0, len(rules))
   255  	for _, r := range rules {
   256  		if !strings.HasPrefix(r, "^") {
   257  			r = "^" + r
   258  		}
   259  
   260  		if !strings.HasSuffix(r, "$") {
   261  			r += "$"
   262  		}
   263  
   264  		p, err := regexp.Compile(r)
   265  		if err != nil {
   266  			return rules, terror.ErrConfigOnlineDDLInvalidRegex.Generate(ruleType, r, "fail to compile: "+err.Error())
   267  		}
   268  		if p.NumSubexp() != 1 {
   269  			return rules, terror.ErrConfigOnlineDDLInvalidRegex.Generate(ruleType, r, "rule isn't contains exactly one submatch")
   270  		}
   271  		adjustedRules = append(adjustedRules, r)
   272  	}
   273  	return adjustedRules, nil
   274  }
   275  
   276  // Adjust adjusts and verifies configs.
   277  func (c *SubTaskConfig) Adjust(verifyDecryptPassword bool) error {
   278  	if c.Name == "" {
   279  		return terror.ErrConfigTaskNameEmpty.Generate()
   280  	}
   281  
   282  	if c.SourceID == "" {
   283  		return terror.ErrConfigEmptySourceID.Generate()
   284  	}
   285  	if len(c.SourceID) > MaxSourceIDLength {
   286  		return terror.ErrConfigTooLongSourceID.Generate()
   287  	}
   288  
   289  	if c.ShardMode != "" && c.ShardMode != ShardPessimistic && c.ShardMode != ShardOptimistic {
   290  		return terror.ErrConfigShardModeNotSupport.Generate(c.ShardMode)
   291  	} else if c.ShardMode == "" && c.IsSharding {
   292  		c.ShardMode = ShardPessimistic // use the pessimistic mode as default for back compatible.
   293  	}
   294  	if c.StrictOptimisticShardMode && c.ShardMode != ShardOptimistic {
   295  		return terror.ErrConfigStrictOptimisticShardMode.Generate()
   296  	}
   297  
   298  	if len(c.ColumnMappingRules) > 0 {
   299  		return terror.ErrConfigColumnMappingDeprecated.Generate()
   300  	}
   301  
   302  	if c.OnlineDDLScheme != "" && c.OnlineDDLScheme != PT && c.OnlineDDLScheme != GHOST {
   303  		return terror.ErrConfigOnlineSchemeNotSupport.Generate(c.OnlineDDLScheme)
   304  	} else if c.OnlineDDLScheme == PT || c.OnlineDDLScheme == GHOST {
   305  		c.OnlineDDL = true
   306  		log.L().Warn("'online-ddl-scheme' will be deprecated soon. Recommend that use online-ddl instead of online-ddl-scheme.")
   307  	}
   308  	if len(c.ShadowTableRules) == 0 {
   309  		c.ShadowTableRules = []string{DefaultShadowTableRules}
   310  	} else {
   311  		shadowTableRule, err := adjustOnlineTableRules(ShadowTableRules, c.ShadowTableRules)
   312  		if err != nil {
   313  			return err
   314  		}
   315  		c.ShadowTableRules = shadowTableRule
   316  	}
   317  
   318  	if len(c.TrashTableRules) == 0 {
   319  		c.TrashTableRules = []string{DefaultTrashTableRules}
   320  	} else {
   321  		trashTableRule, err := adjustOnlineTableRules(TrashTableRules, c.TrashTableRules)
   322  		if err != nil {
   323  			return err
   324  		}
   325  		c.TrashTableRules = trashTableRule
   326  	}
   327  
   328  	if c.MetaSchema == "" {
   329  		c.MetaSchema = defaultMetaSchema
   330  	}
   331  
   332  	// adjust dir, no need to do for load&sync mode because it needs its own s3 repository
   333  	if HasLoad(c.Mode) && c.Mode != ModeLoadSync {
   334  		// check
   335  		isS3 := storage.IsS3Path(c.LoaderConfig.Dir)
   336  		if isS3 && c.ImportMode == LoadModeLoader {
   337  			return terror.ErrConfigLoaderS3NotSupport.Generate(c.LoaderConfig.Dir)
   338  		}
   339  		// add suffix
   340  		var dirSuffix string
   341  		if isS3 {
   342  			// we will dump files to s3 dir's subdirectory
   343  			dirSuffix = "/" + c.Name + "." + c.SourceID
   344  		} else {
   345  			// TODO we will dump local file to dir's subdirectory, but it may have risk of compatibility, we will fix in other pr
   346  			dirSuffix = "." + c.Name
   347  		}
   348  		newDir, err := storage.AdjustPath(c.LoaderConfig.Dir, dirSuffix)
   349  		if err != nil {
   350  			return terror.ErrConfigLoaderDirInvalid.Delegate(err, c.LoaderConfig.Dir)
   351  		}
   352  		c.LoaderConfig.Dir = newDir
   353  	}
   354  
   355  	// adjust sorting dir
   356  	if HasLoad(c.Mode) {
   357  		newDir := c.LoaderConfig.Dir
   358  		if c.LoaderConfig.SortingDirPhysical == "" {
   359  			if storage.IsLocalDiskPath(newDir) {
   360  				// lightning will not recursively create directories, so we use same level dir
   361  				c.LoaderConfig.SortingDirPhysical = newDir + ".sorting"
   362  			} else {
   363  				c.LoaderConfig.SortingDirPhysical = "./sorting." + url.PathEscape(c.Name)
   364  			}
   365  		}
   366  	}
   367  
   368  	if c.SyncerConfig.QueueSize == 0 {
   369  		c.SyncerConfig.QueueSize = defaultQueueSize
   370  	}
   371  	if c.SyncerConfig.CheckpointFlushInterval == 0 {
   372  		c.SyncerConfig.CheckpointFlushInterval = defaultCheckpointFlushInterval
   373  	}
   374  	if c.SyncerConfig.SafeModeDuration == "" {
   375  		c.SyncerConfig.SafeModeDuration = strconv.Itoa(2*c.SyncerConfig.CheckpointFlushInterval) + "s"
   376  	}
   377  	if duration, err := time.ParseDuration(c.SyncerConfig.SafeModeDuration); err != nil {
   378  		return terror.ErrConfigInvalidSafeModeDuration.Generate(c.SyncerConfig.SafeModeDuration, err)
   379  	} else if c.SyncerConfig.SafeMode && duration == 0 {
   380  		return terror.ErrConfigConfictSafeModeDurationAndSafeMode.Generate()
   381  	}
   382  
   383  	c.From.AdjustWithTimeZone(c.Timezone)
   384  	c.To.AdjustWithTimeZone(c.Timezone)
   385  
   386  	if verifyDecryptPassword {
   387  		_, err1 := c.DecryptedClone()
   388  		if err1 != nil {
   389  			return err1
   390  		}
   391  	}
   392  
   393  	// only when block-allow-list is nil use black-white-list
   394  	if c.BAList == nil && c.BWList != nil {
   395  		c.BAList = c.BWList
   396  	}
   397  
   398  	if _, err := filter.New(c.CaseSensitive, c.BAList); err != nil {
   399  		return terror.ErrConfigGenBAList.Delegate(err)
   400  	}
   401  	if _, err := regexprrouter.NewRegExprRouter(c.CaseSensitive, c.RouteRules); err != nil {
   402  		return terror.ErrConfigGenTableRouter.Delegate(err)
   403  	}
   404  	// NewMapping will fill arguments with the default values.
   405  	if _, err := column.NewMapping(c.CaseSensitive, c.ColumnMappingRules); err != nil {
   406  		return terror.ErrConfigGenColumnMapping.Delegate(err)
   407  	}
   408  	if _, err := utils.ParseFileSize(c.MydumperConfig.ChunkFilesize, 0); err != nil {
   409  		return terror.ErrConfigInvalidChunkFileSize.Generate(c.MydumperConfig.ChunkFilesize)
   410  	}
   411  
   412  	if _, err := bf.NewBinlogEvent(c.CaseSensitive, c.FilterRules); err != nil {
   413  		return terror.ErrConfigBinlogEventFilter.Delegate(err)
   414  	}
   415  	if err := c.LoaderConfig.adjust(); err != nil {
   416  		return err
   417  	}
   418  	if err := c.ValidatorCfg.Adjust(); err != nil {
   419  		return err
   420  	}
   421  
   422  	// TODO: check every member
   423  	// TODO: since we checked here, we could remove other terror like ErrSyncerUnitGenBAList
   424  	// TODO: or we should check at task config and source config rather than this subtask config, to reduce duplication
   425  
   426  	return nil
   427  }
   428  
   429  // Parse parses flag definitions from the argument list.
   430  func (c *SubTaskConfig) Parse(arguments []string, verifyDecryptPassword bool) error {
   431  	// Parse first to get config file.
   432  	err := c.flagSet.Parse(arguments)
   433  	if err != nil {
   434  		return terror.ErrConfigParseFlagSet.Delegate(err)
   435  	}
   436  
   437  	if c.printVersion {
   438  		fmt.Println(version.GetRawInfo())
   439  		return flag.ErrHelp
   440  	}
   441  
   442  	// Load config file if specified.
   443  	if c.ConfigFile != "" {
   444  		err = c.DecodeFile(c.ConfigFile, verifyDecryptPassword)
   445  		if err != nil {
   446  			return err
   447  		}
   448  	}
   449  
   450  	// Parse again to replace with command line options.
   451  	err = c.flagSet.Parse(arguments)
   452  	if err != nil {
   453  		return terror.ErrConfigParseFlagSet.Delegate(err)
   454  	}
   455  
   456  	if len(c.flagSet.Args()) != 0 {
   457  		return terror.ErrConfigParseFlagSet.Generatef("'%s' is an invalid flag", c.flagSet.Arg(0))
   458  	}
   459  
   460  	return c.Adjust(verifyDecryptPassword)
   461  }
   462  
   463  // DecryptedClone tries to decrypt db password in config.
   464  func (c *SubTaskConfig) DecryptedClone() (*SubTaskConfig, error) {
   465  	clone, err := c.Clone()
   466  	if err != nil {
   467  		return nil, err
   468  	}
   469  
   470  	var (
   471  		pswdTo   string
   472  		pswdFrom string
   473  	)
   474  	if len(clone.To.Password) > 0 {
   475  		pswdTo = utils.DecryptOrPlaintext(clone.To.Password)
   476  	}
   477  	if len(clone.From.Password) > 0 {
   478  		pswdFrom = utils.DecryptOrPlaintext(clone.From.Password)
   479  	}
   480  	clone.From.Password = pswdFrom
   481  	clone.To.Password = pswdTo
   482  
   483  	return clone, nil
   484  }
   485  
   486  // Clone returns a replica of SubTaskConfig.
   487  func (c *SubTaskConfig) Clone() (*SubTaskConfig, error) {
   488  	content, err := c.Toml()
   489  	if err != nil {
   490  		return nil, err
   491  	}
   492  
   493  	clone := &SubTaskConfig{}
   494  	_, err = toml.Decode(content, clone)
   495  	if err != nil {
   496  		return nil, terror.ErrConfigTomlTransform.Delegate(err, "decode subtask config from data")
   497  	}
   498  
   499  	return clone, nil
   500  }