github.com/pingcap/tidb-lightning@v5.0.0-rc.0.20210428090220-84b649866577+incompatible/lightning/config/config.go (about)

     1  // Copyright 2019 PingCAP, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package config
    15  
    16  import (
    17  	"context"
    18  	"encoding/json"
    19  	"fmt"
    20  	"net"
    21  	"net/url"
    22  	"os"
    23  	"path/filepath"
    24  	"runtime"
    25  	"strconv"
    26  	"strings"
    27  	"time"
    28  
    29  	"github.com/BurntSushi/toml"
    30  	"github.com/docker/go-units"
    31  	gomysql "github.com/go-sql-driver/mysql"
    32  	"github.com/pingcap/errors"
    33  	"github.com/pingcap/parser/mysql"
    34  	filter "github.com/pingcap/tidb-tools/pkg/table-filter"
    35  	router "github.com/pingcap/tidb-tools/pkg/table-router"
    36  	tidbcfg "github.com/pingcap/tidb/config"
    37  	"go.uber.org/zap"
    38  
    39  	"github.com/pingcap/tidb-lightning/lightning/common"
    40  	"github.com/pingcap/tidb-lightning/lightning/log"
    41  )
    42  
    43  const (
    44  	// ImportMode defines mode of import for tikv.
    45  	ImportMode = "import"
    46  	// NormalMode defines mode of normal for tikv.
    47  	NormalMode = "normal"
    48  
    49  	// BackendTiDB is a constant for choosing the "TiDB" backend in the configuration.
    50  	BackendTiDB = "tidb"
    51  	// BackendImporter is a constant for choosing the "Importer" backend in the configuration.
    52  	BackendImporter = "importer"
    53  	// BackendLocal is a constant for choosing the "Local" backup in the configuration.
    54  	// In this mode, we write & sort kv pairs with local storage and directly write them to tikv.
    55  	BackendLocal = "local"
    56  
    57  	// CheckpointDriverMySQL is a constant for choosing the "MySQL" checkpoint driver in the configuration.
    58  	CheckpointDriverMySQL = "mysql"
    59  	// CheckpointDriverFile is a constant for choosing the "File" checkpoint driver in the configuration.
    60  	CheckpointDriverFile = "file"
    61  
    62  	// ReplaceOnDup indicates using REPLACE INTO to insert data
    63  	ReplaceOnDup = "replace"
    64  	// IgnoreOnDup indicates using INSERT IGNORE INTO to insert data
    65  	IgnoreOnDup = "ignore"
    66  	// ErrorOnDup indicates using INSERT INTO to insert data, which would violate PK or UNIQUE constraint
    67  	ErrorOnDup = "error"
    68  
    69  	defaultDistSQLScanConcurrency     = 15
    70  	defaultBuildStatsConcurrency      = 20
    71  	defaultIndexSerialScanConcurrency = 20
    72  	defaultChecksumTableConcurrency   = 2
    73  )
    74  
    75  const (
    76  	LocalMemoryTableSize = 512 * units.MiB
    77  
    78  	// autoDiskQuotaLocalReservedSize is the estimated size a local-backend
    79  	// engine may gain after calling Flush(). This is currently defined by its
    80  	// max MemTable size (512 MiB). It is used to compensate for the soft limit
    81  	// of the disk quota against the hard limit of the disk free space.
    82  	//
    83  	// With a maximum of 8 engines, this should contribute 4.0 GiB to the
    84  	// reserved size.
    85  	autoDiskQuotaLocalReservedSize uint64 = LocalMemoryTableSize
    86  
    87  	// autoDiskQuotaLocalReservedSpeed is the estimated size increase per
    88  	// millisecond per write thread the local backend may gain on all engines.
    89  	// This is used to compute the maximum size overshoot between two disk quota
    90  	// checks, if the first one has barely passed.
    91  	//
    92  	// With cron.check-disk-quota = 1m, region-concurrency = 40, this should
    93  	// contribute 2.3 GiB to the reserved size.
    94  	autoDiskQuotaLocalReservedSpeed uint64 = 1 * units.KiB
    95  )
    96  
    97  var (
    98  	defaultConfigPaths    = []string{"tidb-lightning.toml", "conf/tidb-lightning.toml"}
    99  	supportedStorageTypes = []string{"file", "local", "s3", "noop"}
   100  
   101  	DefaultFilter = []string{
   102  		"*.*",
   103  		"!mysql.*",
   104  		"!sys.*",
   105  		"!INFORMATION_SCHEMA.*",
   106  		"!PERFORMANCE_SCHEMA.*",
   107  		"!METRICS_SCHEMA.*",
   108  		"!INSPECTION_SCHEMA.*",
   109  	}
   110  )
   111  
   112  type DBStore struct {
   113  	Host       string    `toml:"host" json:"host"`
   114  	Port       int       `toml:"port" json:"port"`
   115  	User       string    `toml:"user" json:"user"`
   116  	Psw        string    `toml:"password" json:"-"`
   117  	StatusPort int       `toml:"status-port" json:"status-port"`
   118  	PdAddr     string    `toml:"pd-addr" json:"pd-addr"`
   119  	StrSQLMode string    `toml:"sql-mode" json:"sql-mode"`
   120  	TLS        string    `toml:"tls" json:"tls"`
   121  	Security   *Security `toml:"security" json:"security"`
   122  
   123  	SQLMode          mysql.SQLMode `toml:"-" json:"-"`
   124  	MaxAllowedPacket uint64        `toml:"max-allowed-packet" json:"max-allowed-packet"`
   125  
   126  	DistSQLScanConcurrency     int `toml:"distsql-scan-concurrency" json:"distsql-scan-concurrency"`
   127  	BuildStatsConcurrency      int `toml:"build-stats-concurrency" json:"build-stats-concurrency"`
   128  	IndexSerialScanConcurrency int `toml:"index-serial-scan-concurrency" json:"index-serial-scan-concurrency"`
   129  	ChecksumTableConcurrency   int `toml:"checksum-table-concurrency" json:"checksum-table-concurrency"`
   130  }
   131  
   132  type Config struct {
   133  	TaskID int64 `toml:"-" json:"id"`
   134  
   135  	App  Lightning `toml:"lightning" json:"lightning"`
   136  	TiDB DBStore   `toml:"tidb" json:"tidb"`
   137  
   138  	Checkpoint   Checkpoint          `toml:"checkpoint" json:"checkpoint"`
   139  	Mydumper     MydumperRuntime     `toml:"mydumper" json:"mydumper"`
   140  	TikvImporter TikvImporter        `toml:"tikv-importer" json:"tikv-importer"`
   141  	PostRestore  PostRestore         `toml:"post-restore" json:"post-restore"`
   142  	Cron         Cron                `toml:"cron" json:"cron"`
   143  	Routes       []*router.TableRule `toml:"routes" json:"routes"`
   144  	Security     Security            `toml:"security" json:"security"`
   145  
   146  	BWList filter.MySQLReplicationRules `toml:"black-white-list" json:"black-white-list"`
   147  }
   148  
   149  func (c *Config) String() string {
   150  	bytes, err := json.Marshal(c)
   151  	if err != nil {
   152  		log.L().Error("marshal config to json error", log.ShortError(err))
   153  	}
   154  	return string(bytes)
   155  }
   156  
   157  func (c *Config) ToTLS() (*common.TLS, error) {
   158  	hostPort := net.JoinHostPort(c.TiDB.Host, strconv.Itoa(c.TiDB.StatusPort))
   159  	return common.NewTLS(c.Security.CAPath, c.Security.CertPath, c.Security.KeyPath, hostPort)
   160  }
   161  
   162  type Lightning struct {
   163  	TableConcurrency  int  `toml:"table-concurrency" json:"table-concurrency"`
   164  	IndexConcurrency  int  `toml:"index-concurrency" json:"index-concurrency"`
   165  	RegionConcurrency int  `toml:"region-concurrency" json:"region-concurrency"`
   166  	IOConcurrency     int  `toml:"io-concurrency" json:"io-concurrency"`
   167  	CheckRequirements bool `toml:"check-requirements" json:"check-requirements"`
   168  }
   169  
   170  type PostOpLevel int
   171  
   172  const (
   173  	OpLevelOff PostOpLevel = iota
   174  	OpLevelOptional
   175  	OpLevelRequired
   176  )
   177  
   178  func (t *PostOpLevel) UnmarshalTOML(v interface{}) error {
   179  	switch val := v.(type) {
   180  	case bool:
   181  		if val {
   182  			*t = OpLevelRequired
   183  		} else {
   184  			*t = OpLevelOff
   185  		}
   186  	case string:
   187  		return t.FromStringValue(val)
   188  	default:
   189  		return errors.Errorf("invalid op level '%v', please choose valid option between ['off', 'optional', 'required']", v)
   190  	}
   191  	return nil
   192  }
   193  
   194  func (t PostOpLevel) MarshalText() ([]byte, error) {
   195  	return []byte(t.String()), nil
   196  }
   197  
   198  // parser command line parameter
   199  func (t *PostOpLevel) FromStringValue(s string) error {
   200  	switch strings.ToLower(s) {
   201  	case "off", "false":
   202  		*t = OpLevelOff
   203  	case "required", "true":
   204  		*t = OpLevelRequired
   205  	case "optional":
   206  		*t = OpLevelOptional
   207  	default:
   208  		return errors.Errorf("invalid op level '%s', please choose valid option between ['off', 'optional', 'required']", s)
   209  	}
   210  	return nil
   211  }
   212  
   213  func (t *PostOpLevel) MarshalJSON() ([]byte, error) {
   214  	return []byte(`"` + t.String() + `"`), nil
   215  }
   216  
   217  func (t *PostOpLevel) UnmarshalJSON(data []byte) error {
   218  	return t.FromStringValue(strings.Trim(string(data), `"`))
   219  }
   220  
   221  func (t PostOpLevel) String() string {
   222  	switch t {
   223  	case OpLevelOff:
   224  		return "off"
   225  	case OpLevelOptional:
   226  		return "optional"
   227  	case OpLevelRequired:
   228  		return "required"
   229  	default:
   230  		panic(fmt.Sprintf("invalid post process type '%d'", t))
   231  	}
   232  }
   233  
   234  // PostRestore has some options which will be executed after kv restored.
   235  type PostRestore struct {
   236  	Level1Compact     bool        `toml:"level-1-compact" json:"level-1-compact"`
   237  	Compact           bool        `toml:"compact" json:"compact"`
   238  	Checksum          PostOpLevel `toml:"checksum" json:"checksum"`
   239  	Analyze           PostOpLevel `toml:"analyze" json:"analyze"`
   240  	PostProcessAtLast bool        `toml:"post-process-at-last" json:"post-process-at-last"`
   241  }
   242  
   243  type CSVConfig struct {
   244  	Separator       string `toml:"separator" json:"separator"`
   245  	Delimiter       string `toml:"delimiter" json:"delimiter"`
   246  	Header          bool   `toml:"header" json:"header"`
   247  	TrimLastSep     bool   `toml:"trim-last-separator" json:"trim-last-separator"`
   248  	NotNull         bool   `toml:"not-null" json:"not-null"`
   249  	Null            string `toml:"null" json:"null"`
   250  	BackslashEscape bool   `toml:"backslash-escape" json:"backslash-escape"`
   251  }
   252  
   253  type MydumperRuntime struct {
   254  	ReadBlockSize    ByteSize         `toml:"read-block-size" json:"read-block-size"`
   255  	BatchSize        ByteSize         `toml:"batch-size" json:"batch-size"`
   256  	BatchImportRatio float64          `toml:"batch-import-ratio" json:"batch-import-ratio"`
   257  	SourceDir        string           `toml:"data-source-dir" json:"data-source-dir"`
   258  	NoSchema         bool             `toml:"no-schema" json:"no-schema"`
   259  	CharacterSet     string           `toml:"character-set" json:"character-set"`
   260  	CSV              CSVConfig        `toml:"csv" json:"csv"`
   261  	CaseSensitive    bool             `toml:"case-sensitive" json:"case-sensitive"`
   262  	StrictFormat     bool             `toml:"strict-format" json:"strict-format"`
   263  	MaxRegionSize    ByteSize         `toml:"max-region-size" json:"max-region-size"`
   264  	Filter           []string         `toml:"filter" json:"filter"`
   265  	FileRouters      []*FileRouteRule `toml:"files" json:"files"`
   266  	DefaultFileRules bool             `toml:"default-file-rules" json:"default-file-rules"`
   267  }
   268  
   269  type FileRouteRule struct {
   270  	Pattern     string `json:"pattern" toml:"pattern" yaml:"pattern"`
   271  	Path        string `json:"path" toml:"path" yaml:"path"`
   272  	Schema      string `json:"schema" toml:"schema" yaml:"schema"`
   273  	Table       string `json:"table" toml:"table" yaml:"table"`
   274  	Type        string `json:"type" toml:"type" yaml:"type"`
   275  	Key         string `json:"key" toml:"key" yaml:"key"`
   276  	Compression string `json:"compression" toml:"compression" yaml:"compression"`
   277  }
   278  
   279  type TikvImporter struct {
   280  	Addr             string   `toml:"addr" json:"addr"`
   281  	Backend          string   `toml:"backend" json:"backend"`
   282  	OnDuplicate      string   `toml:"on-duplicate" json:"on-duplicate"`
   283  	MaxKVPairs       int      `toml:"max-kv-pairs" json:"max-kv-pairs"`
   284  	SendKVPairs      int      `toml:"send-kv-pairs" json:"send-kv-pairs"`
   285  	RegionSplitSize  ByteSize `toml:"region-split-size" json:"region-split-size"`
   286  	SortedKVDir      string   `toml:"sorted-kv-dir" json:"sorted-kv-dir"`
   287  	DiskQuota        ByteSize `toml:"disk-quota" json:"disk-quota"`
   288  	RangeConcurrency int      `toml:"range-concurrency" json:"range-concurrency"`
   289  }
   290  
   291  type Checkpoint struct {
   292  	Enable           bool   `toml:"enable" json:"enable"`
   293  	Schema           string `toml:"schema" json:"schema"`
   294  	DSN              string `toml:"dsn" json:"-"` // DSN may contain password, don't expose this to JSON.
   295  	Driver           string `toml:"driver" json:"driver"`
   296  	KeepAfterSuccess bool   `toml:"keep-after-success" json:"keep-after-success"`
   297  }
   298  
   299  type Cron struct {
   300  	SwitchMode     Duration `toml:"switch-mode" json:"switch-mode"`
   301  	LogProgress    Duration `toml:"log-progress" json:"log-progress"`
   302  	CheckDiskQuota Duration `toml:"check-disk-quota" json:"check-disk-quota"`
   303  }
   304  
   305  type Security struct {
   306  	CAPath   string `toml:"ca-path" json:"ca-path"`
   307  	CertPath string `toml:"cert-path" json:"cert-path"`
   308  	KeyPath  string `toml:"key-path" json:"key-path"`
   309  	// RedactInfoLog indicates that whether enabling redact log
   310  	RedactInfoLog bool `toml:"redact-info-log" json:"redact-info-log"`
   311  }
   312  
   313  // RegistersMySQL registers (or deregisters) the TLS config with name "cluster"
   314  // for use in `sql.Open()`. This method is goroutine-safe.
   315  func (sec *Security) RegisterMySQL() error {
   316  	if sec == nil {
   317  		return nil
   318  	}
   319  	tlsConfig, err := common.ToTLSConfig(sec.CAPath, sec.CertPath, sec.KeyPath)
   320  	switch {
   321  	case err != nil:
   322  		return err
   323  	case tlsConfig != nil:
   324  		// error happens only when the key coincides with the built-in names.
   325  		_ = gomysql.RegisterTLSConfig("cluster", tlsConfig)
   326  	default:
   327  		gomysql.DeregisterTLSConfig("cluster")
   328  	}
   329  	return nil
   330  }
   331  
   332  // A duration which can be deserialized from a TOML string.
   333  // Implemented as https://github.com/BurntSushi/toml#using-the-encodingtextunmarshaler-interface
   334  type Duration struct {
   335  	time.Duration
   336  }
   337  
   338  func (d *Duration) UnmarshalText(text []byte) error {
   339  	var err error
   340  	d.Duration, err = time.ParseDuration(string(text))
   341  	return err
   342  }
   343  
   344  func (d Duration) MarshalText() ([]byte, error) {
   345  	return []byte(d.String()), nil
   346  }
   347  
   348  func (d *Duration) MarshalJSON() ([]byte, error) {
   349  	return []byte(fmt.Sprintf(`"%s"`, d.Duration)), nil
   350  }
   351  
   352  func NewConfig() *Config {
   353  	return &Config{
   354  		App: Lightning{
   355  			RegionConcurrency: runtime.NumCPU(),
   356  			TableConcurrency:  0,
   357  			IndexConcurrency:  0,
   358  			IOConcurrency:     5,
   359  			CheckRequirements: true,
   360  		},
   361  		Checkpoint: Checkpoint{
   362  			Enable: true,
   363  		},
   364  		TiDB: DBStore{
   365  			Host:                       "127.0.0.1",
   366  			User:                       "root",
   367  			StatusPort:                 10080,
   368  			StrSQLMode:                 "ONLY_FULL_GROUP_BY,NO_AUTO_CREATE_USER",
   369  			MaxAllowedPacket:           defaultMaxAllowedPacket,
   370  			BuildStatsConcurrency:      defaultBuildStatsConcurrency,
   371  			DistSQLScanConcurrency:     defaultDistSQLScanConcurrency,
   372  			IndexSerialScanConcurrency: defaultIndexSerialScanConcurrency,
   373  			ChecksumTableConcurrency:   defaultChecksumTableConcurrency,
   374  		},
   375  		Cron: Cron{
   376  			SwitchMode:     Duration{Duration: 5 * time.Minute},
   377  			LogProgress:    Duration{Duration: 5 * time.Minute},
   378  			CheckDiskQuota: Duration{Duration: 1 * time.Minute},
   379  		},
   380  		Mydumper: MydumperRuntime{
   381  			ReadBlockSize: ReadBlockSize,
   382  			CSV: CSVConfig{
   383  				Separator:       ",",
   384  				Delimiter:       `"`,
   385  				Header:          true,
   386  				NotNull:         false,
   387  				Null:            `\N`,
   388  				BackslashEscape: true,
   389  				TrimLastSep:     false,
   390  			},
   391  			StrictFormat:  false,
   392  			MaxRegionSize: MaxRegionSize,
   393  			Filter:        DefaultFilter,
   394  		},
   395  		TikvImporter: TikvImporter{
   396  			Backend:         BackendImporter,
   397  			OnDuplicate:     ReplaceOnDup,
   398  			MaxKVPairs:      4096,
   399  			SendKVPairs:     32768,
   400  			RegionSplitSize: SplitRegionSize,
   401  		},
   402  		PostRestore: PostRestore{
   403  			Checksum:          OpLevelRequired,
   404  			Analyze:           OpLevelOptional,
   405  			PostProcessAtLast: true,
   406  		},
   407  	}
   408  }
   409  
   410  // LoadFromGlobal resets the current configuration to the global settings.
   411  func (cfg *Config) LoadFromGlobal(global *GlobalConfig) error {
   412  	if err := cfg.LoadFromTOML(global.ConfigFileContent); err != nil {
   413  		return err
   414  	}
   415  
   416  	cfg.TiDB.Host = global.TiDB.Host
   417  	cfg.TiDB.Port = global.TiDB.Port
   418  	cfg.TiDB.User = global.TiDB.User
   419  	cfg.TiDB.Psw = global.TiDB.Psw
   420  	cfg.TiDB.StatusPort = global.TiDB.StatusPort
   421  	cfg.TiDB.PdAddr = global.TiDB.PdAddr
   422  	cfg.Mydumper.SourceDir = global.Mydumper.SourceDir
   423  	cfg.Mydumper.NoSchema = global.Mydumper.NoSchema
   424  	cfg.Mydumper.Filter = global.Mydumper.Filter
   425  	cfg.TikvImporter.Addr = global.TikvImporter.Addr
   426  	cfg.TikvImporter.Backend = global.TikvImporter.Backend
   427  	cfg.TikvImporter.SortedKVDir = global.TikvImporter.SortedKVDir
   428  	cfg.Checkpoint.Enable = global.Checkpoint.Enable
   429  	cfg.PostRestore.Checksum = global.PostRestore.Checksum
   430  	cfg.PostRestore.Analyze = global.PostRestore.Analyze
   431  	cfg.App.CheckRequirements = global.App.CheckRequirements
   432  	cfg.Security = global.Security
   433  
   434  	return nil
   435  }
   436  
   437  // LoadFromTOML overwrites the current configuration by the TOML data
   438  // If data contains toml items not in Config and GlobalConfig, return an error
   439  // If data contains toml items not in Config, thus won't take effect, warn user
   440  func (cfg *Config) LoadFromTOML(data []byte) error {
   441  	// bothUnused saves toml items not belong to Config nor GlobalConfig
   442  	var bothUnused []string
   443  	// warnItems saves legal toml items but won't effect
   444  	var warnItems []string
   445  
   446  	dataStr := string(data)
   447  
   448  	// Here we load toml into cfg, and rest logic is check unused keys
   449  	metaData, err := toml.Decode(dataStr, cfg)
   450  
   451  	if err != nil {
   452  		return errors.Trace(err)
   453  	}
   454  
   455  	unusedConfigKeys := metaData.Undecoded()
   456  	if len(unusedConfigKeys) == 0 {
   457  		return nil
   458  	}
   459  
   460  	// Now we deal with potential both-unused keys of Config and GlobalConfig struct
   461  
   462  	metaDataGlobal, err := toml.Decode(dataStr, &GlobalConfig{})
   463  	if err != nil {
   464  		return errors.Trace(err)
   465  	}
   466  
   467  	// Key type returned by metadata.Undecoded doesn't have a equality comparison,
   468  	// we convert them to string type instead, and this conversion is identical
   469  	unusedGlobalKeys := metaDataGlobal.Undecoded()
   470  	unusedGlobalKeyStrs := make(map[string]struct{})
   471  	for _, key := range unusedGlobalKeys {
   472  		unusedGlobalKeyStrs[key.String()] = struct{}{}
   473  	}
   474  
   475  	for _, key := range unusedConfigKeys {
   476  		keyStr := key.String()
   477  		if _, found := unusedGlobalKeyStrs[keyStr]; found {
   478  			bothUnused = append(bothUnused, keyStr)
   479  		} else {
   480  			warnItems = append(warnItems, keyStr)
   481  		}
   482  	}
   483  
   484  	if len(bothUnused) > 0 {
   485  		return errors.Errorf("config file contained unknown configuration options: %s",
   486  			strings.Join(bothUnused, ", "))
   487  	}
   488  
   489  	// Warn that some legal field of config file won't be overwritten, such as lightning.file
   490  	if len(warnItems) > 0 {
   491  		log.L().Warn("currently only per-task configuration can be applied, global configuration changes can only be made on startup",
   492  			zap.Strings("global config changes", warnItems))
   493  	}
   494  
   495  	return nil
   496  }
   497  
   498  // Adjust fixes the invalid or unspecified settings to reasonable valid values.
   499  func (cfg *Config) Adjust(ctx context.Context) error {
   500  	// Reject problematic CSV configurations.
   501  	csv := &cfg.Mydumper.CSV
   502  	if len(csv.Separator) == 0 {
   503  		return errors.New("invalid config: `mydumper.csv.separator` must not be empty")
   504  	}
   505  
   506  	if len(csv.Delimiter) > 0 && (strings.HasPrefix(csv.Separator, csv.Delimiter) || strings.HasPrefix(csv.Delimiter, csv.Separator)) {
   507  		return errors.New("invalid config: `mydumper.csv.separator` and `mydumper.csv.delimiter` must not be prefix of each other")
   508  	}
   509  
   510  	if csv.BackslashEscape {
   511  		if csv.Separator == `\` {
   512  			return errors.New("invalid config: cannot use '\\' as CSV separator when `mydumper.csv.backslash-escape` is true")
   513  		}
   514  		if csv.Delimiter == `\` {
   515  			return errors.New("invalid config: cannot use '\\' as CSV delimiter when `mydumper.csv.backslash-escape` is true")
   516  		}
   517  	}
   518  
   519  	// adjust file routing
   520  	for _, rule := range cfg.Mydumper.FileRouters {
   521  		if filepath.IsAbs(rule.Path) {
   522  			relPath, err := filepath.Rel(cfg.Mydumper.SourceDir, rule.Path)
   523  			if err != nil {
   524  				return errors.Trace(err)
   525  			}
   526  			// ".." means that this path is not in source dir, so we should return an error
   527  			if strings.HasPrefix(relPath, "..") {
   528  				return errors.Errorf("file route path '%s' is not in source dir '%s'", rule.Path, cfg.Mydumper.SourceDir)
   529  			}
   530  			rule.Path = relPath
   531  		}
   532  	}
   533  
   534  	// enable default file route rule if no rules are set
   535  	if len(cfg.Mydumper.FileRouters) == 0 {
   536  		cfg.Mydumper.DefaultFileRules = true
   537  	}
   538  
   539  	cfg.TikvImporter.Backend = strings.ToLower(cfg.TikvImporter.Backend)
   540  	mustHaveInternalConnections := true
   541  	switch cfg.TikvImporter.Backend {
   542  	case BackendTiDB:
   543  		if cfg.App.IndexConcurrency == 0 {
   544  			cfg.App.IndexConcurrency = cfg.App.RegionConcurrency
   545  		}
   546  		if cfg.App.TableConcurrency == 0 {
   547  			cfg.App.TableConcurrency = cfg.App.RegionConcurrency
   548  		}
   549  		mustHaveInternalConnections = false
   550  	case BackendImporter, BackendLocal:
   551  		if cfg.App.IndexConcurrency == 0 {
   552  			cfg.App.IndexConcurrency = 2
   553  		}
   554  		if cfg.App.TableConcurrency == 0 {
   555  			cfg.App.TableConcurrency = 6
   556  		}
   557  		if cfg.TikvImporter.RangeConcurrency == 0 {
   558  			cfg.TikvImporter.RangeConcurrency = 16
   559  		}
   560  		if cfg.TikvImporter.RegionSplitSize == 0 {
   561  			cfg.TikvImporter.RegionSplitSize = SplitRegionSize
   562  		}
   563  		if cfg.TiDB.DistSQLScanConcurrency == 0 {
   564  			cfg.TiDB.DistSQLScanConcurrency = defaultDistSQLScanConcurrency
   565  		}
   566  		if cfg.TiDB.BuildStatsConcurrency == 0 {
   567  			cfg.TiDB.BuildStatsConcurrency = defaultBuildStatsConcurrency
   568  		}
   569  		if cfg.TiDB.IndexSerialScanConcurrency == 0 {
   570  			cfg.TiDB.IndexSerialScanConcurrency = defaultIndexSerialScanConcurrency
   571  		}
   572  		if cfg.TiDB.ChecksumTableConcurrency == 0 {
   573  			cfg.TiDB.ChecksumTableConcurrency = defaultChecksumTableConcurrency
   574  		}
   575  	default:
   576  		return errors.Errorf("invalid config: unsupported `tikv-importer.backend` (%s)", cfg.TikvImporter.Backend)
   577  	}
   578  
   579  	if cfg.TikvImporter.Backend == BackendLocal {
   580  		if len(cfg.TikvImporter.SortedKVDir) == 0 {
   581  			return errors.Errorf("tikv-importer.sorted-kv-dir must not be empty!")
   582  		}
   583  
   584  		storageSizeDir := filepath.Clean(cfg.TikvImporter.SortedKVDir)
   585  		sortedKVDirInfo, err := os.Stat(storageSizeDir)
   586  		switch {
   587  		case os.IsNotExist(err):
   588  			// the sorted-kv-dir does not exist, meaning we will create it automatically.
   589  			// so we extract the storage size from its parent directory.
   590  			storageSizeDir = filepath.Dir(storageSizeDir)
   591  		case err == nil:
   592  			if !sortedKVDirInfo.IsDir() {
   593  				return errors.Errorf("tikv-importer.sorted-kv-dir ('%s') is not a directory", storageSizeDir)
   594  			}
   595  		default:
   596  			return errors.Annotate(err, "invalid tikv-importer.sorted-kv-dir")
   597  		}
   598  
   599  		if cfg.TikvImporter.DiskQuota == 0 {
   600  			enginesCount := uint64(cfg.App.IndexConcurrency + cfg.App.TableConcurrency)
   601  			writeAmount := uint64(cfg.App.RegionConcurrency) * uint64(cfg.Cron.CheckDiskQuota.Milliseconds())
   602  			reservedSize := enginesCount*autoDiskQuotaLocalReservedSize + writeAmount*autoDiskQuotaLocalReservedSpeed
   603  
   604  			storageSize, err := common.GetStorageSize(storageSizeDir)
   605  			if err != nil {
   606  				return err
   607  			}
   608  			if storageSize.Available <= reservedSize {
   609  				return errors.Errorf(
   610  					"insufficient disk free space on `%s` (only %s, expecting >%s), please use a storage with enough free space, or specify `tikv-importer.disk-quota`",
   611  					cfg.TikvImporter.SortedKVDir,
   612  					units.BytesSize(float64(storageSize.Available)),
   613  					units.BytesSize(float64(reservedSize)))
   614  			}
   615  			cfg.TikvImporter.DiskQuota = ByteSize(storageSize.Available - reservedSize)
   616  		}
   617  	}
   618  
   619  	if cfg.TikvImporter.Backend == BackendTiDB {
   620  		cfg.TikvImporter.OnDuplicate = strings.ToLower(cfg.TikvImporter.OnDuplicate)
   621  		switch cfg.TikvImporter.OnDuplicate {
   622  		case ReplaceOnDup, IgnoreOnDup, ErrorOnDup:
   623  		default:
   624  			return errors.Errorf("invalid config: unsupported `tikv-importer.on-duplicate` (%s)", cfg.TikvImporter.OnDuplicate)
   625  		}
   626  	}
   627  
   628  	var err error
   629  	cfg.TiDB.SQLMode, err = mysql.GetSQLMode(cfg.TiDB.StrSQLMode)
   630  	if err != nil {
   631  		return errors.Annotate(err, "invalid config: `mydumper.tidb.sql_mode` must be a valid SQL_MODE")
   632  	}
   633  
   634  	if cfg.TiDB.Security == nil {
   635  		cfg.TiDB.Security = &cfg.Security
   636  	}
   637  
   638  	switch cfg.TiDB.TLS {
   639  	case "":
   640  		if len(cfg.TiDB.Security.CAPath) > 0 {
   641  			cfg.TiDB.TLS = "cluster"
   642  		} else {
   643  			cfg.TiDB.TLS = "false"
   644  		}
   645  	case "cluster":
   646  		if len(cfg.Security.CAPath) == 0 {
   647  			return errors.New("invalid config: cannot set `tidb.tls` to 'cluster' without a [security] section")
   648  		}
   649  	case "false", "skip-verify", "preferred":
   650  		break
   651  	default:
   652  		return errors.Errorf("invalid config: unsupported `tidb.tls` config %s", cfg.TiDB.TLS)
   653  	}
   654  
   655  	// mydumper.filter and black-white-list cannot co-exist.
   656  	if cfg.HasLegacyBlackWhiteList() {
   657  		log.L().Warn("the config `black-white-list` has been deprecated, please replace with `mydumper.filter`")
   658  		if !common.StringSliceEqual(cfg.Mydumper.Filter, DefaultFilter) {
   659  			return errors.New("invalid config: `mydumper.filter` and `black-white-list` cannot be simultaneously defined")
   660  		}
   661  	}
   662  
   663  	for _, rule := range cfg.Routes {
   664  		if !cfg.Mydumper.CaseSensitive {
   665  			rule.ToLower()
   666  		}
   667  		if err := rule.Valid(); err != nil {
   668  			return errors.Trace(err)
   669  		}
   670  	}
   671  
   672  	// automatically determine the TiDB port & PD address from TiDB settings
   673  	if mustHaveInternalConnections && (cfg.TiDB.Port <= 0 || len(cfg.TiDB.PdAddr) == 0) {
   674  		tls, err := cfg.ToTLS()
   675  		if err != nil {
   676  			return err
   677  		}
   678  
   679  		var settings tidbcfg.Config
   680  		err = tls.GetJSON(ctx, "/settings", &settings)
   681  		if err != nil {
   682  			return errors.Annotate(err, "cannot fetch settings from TiDB, please manually fill in `tidb.port` and `tidb.pd-addr`")
   683  		}
   684  		if cfg.TiDB.Port <= 0 {
   685  			cfg.TiDB.Port = int(settings.Port)
   686  		}
   687  		if len(cfg.TiDB.PdAddr) == 0 {
   688  			pdAddrs := strings.Split(settings.Path, ",")
   689  			cfg.TiDB.PdAddr = pdAddrs[0] // FIXME support multiple PDs once importer can.
   690  		}
   691  	}
   692  
   693  	if cfg.TiDB.Port <= 0 {
   694  		return errors.New("invalid `tidb.port` setting")
   695  	}
   696  	if mustHaveInternalConnections && len(cfg.TiDB.PdAddr) == 0 {
   697  		return errors.New("invalid `tidb.pd-addr` setting")
   698  	}
   699  
   700  	// handle mydumper
   701  	if cfg.Mydumper.BatchSize <= 0 {
   702  		// if rows in source files are not sorted by primary key(if primary is number or cluster index enabled),
   703  		// the key range in each data engine may have overlap, thus a bigger engine size can somewhat alleviate it.
   704  		cfg.Mydumper.BatchSize = defaultBatchSize
   705  
   706  	}
   707  	if cfg.Mydumper.BatchImportRatio < 0.0 || cfg.Mydumper.BatchImportRatio >= 1.0 {
   708  		cfg.Mydumper.BatchImportRatio = 0.75
   709  	}
   710  	if cfg.Mydumper.ReadBlockSize <= 0 {
   711  		cfg.Mydumper.ReadBlockSize = ReadBlockSize
   712  	}
   713  	if len(cfg.Mydumper.CharacterSet) == 0 {
   714  		cfg.Mydumper.CharacterSet = "auto"
   715  	}
   716  
   717  	if len(cfg.Checkpoint.Schema) == 0 {
   718  		cfg.Checkpoint.Schema = "tidb_lightning_checkpoint"
   719  	}
   720  	if len(cfg.Checkpoint.Driver) == 0 {
   721  		cfg.Checkpoint.Driver = CheckpointDriverFile
   722  	}
   723  	if len(cfg.Checkpoint.DSN) == 0 {
   724  		switch cfg.Checkpoint.Driver {
   725  		case CheckpointDriverMySQL:
   726  			param := common.MySQLConnectParam{
   727  				Host:             cfg.TiDB.Host,
   728  				Port:             cfg.TiDB.Port,
   729  				User:             cfg.TiDB.User,
   730  				Password:         cfg.TiDB.Psw,
   731  				SQLMode:          mysql.DefaultSQLMode,
   732  				MaxAllowedPacket: defaultMaxAllowedPacket,
   733  				TLS:              cfg.TiDB.TLS,
   734  			}
   735  			cfg.Checkpoint.DSN = param.ToDSN()
   736  		case CheckpointDriverFile:
   737  			cfg.Checkpoint.DSN = "/tmp/" + cfg.Checkpoint.Schema + ".pb"
   738  		}
   739  	}
   740  
   741  	var u *url.URL
   742  
   743  	// An absolute Windows path like "C:\Users\XYZ" would be interpreted as
   744  	// an URL with scheme "C" and opaque data "\Users\XYZ".
   745  	// Therefore, we only perform URL parsing if we are sure the path is not
   746  	// an absolute Windows path.
   747  	// Here we use the `filepath.VolumeName` which can identify the "C:" part
   748  	// out of the path. On Linux this method always return an empty string.
   749  	// On Windows, the drive letter can only be single letters from "A:" to "Z:",
   750  	// so this won't mistake "S3:" as a Windows path.
   751  	if len(filepath.VolumeName(cfg.Mydumper.SourceDir)) == 0 {
   752  		u, err = url.Parse(cfg.Mydumper.SourceDir)
   753  		if err != nil {
   754  			return errors.Trace(err)
   755  		}
   756  	} else {
   757  		u = &url.URL{}
   758  	}
   759  
   760  	// convert path and relative path to a valid file url
   761  	if u.Scheme == "" {
   762  		if !common.IsDirExists(cfg.Mydumper.SourceDir) {
   763  			return errors.Errorf("%s: mydumper dir does not exist", cfg.Mydumper.SourceDir)
   764  		}
   765  		absPath, err := filepath.Abs(cfg.Mydumper.SourceDir)
   766  		if err != nil {
   767  			return errors.Annotatef(err, "covert data-source-dir '%s' to absolute path failed", cfg.Mydumper.SourceDir)
   768  		}
   769  		cfg.Mydumper.SourceDir = "file://" + filepath.ToSlash(absPath)
   770  		u.Path = absPath
   771  		u.Scheme = "file"
   772  	}
   773  
   774  	found := false
   775  	for _, t := range supportedStorageTypes {
   776  		if u.Scheme == t {
   777  			found = true
   778  			break
   779  		}
   780  	}
   781  	if !found {
   782  		return errors.Errorf("Unsupported data-source-dir url '%s'", cfg.Mydumper.SourceDir)
   783  	}
   784  
   785  	return nil
   786  }
   787  
   788  // HasLegacyBlackWhiteList checks whether the deprecated [black-white-list] section
   789  // was defined.
   790  func (cfg *Config) HasLegacyBlackWhiteList() bool {
   791  	return len(cfg.BWList.DoTables) != 0 || len(cfg.BWList.DoDBs) != 0 || len(cfg.BWList.IgnoreTables) != 0 || len(cfg.BWList.IgnoreDBs) != 0
   792  }