github.com/pingcap/br@v5.3.0-alpha.0.20220125034240-ec59c7b6ce30+incompatible/pkg/lightning/config/config.go (about)

     1  // Copyright 2019 PingCAP, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package config
    15  
    16  import (
    17  	"context"
    18  	"encoding/json"
    19  	"fmt"
    20  	"math"
    21  	"net"
    22  	"net/url"
    23  	"os"
    24  	"path/filepath"
    25  	"runtime"
    26  	"strconv"
    27  	"strings"
    28  	"time"
    29  
    30  	"github.com/BurntSushi/toml"
    31  	"github.com/docker/go-units"
    32  	gomysql "github.com/go-sql-driver/mysql"
    33  	"github.com/pingcap/errors"
    34  	"github.com/pingcap/parser/mysql"
    35  	filter "github.com/pingcap/tidb-tools/pkg/table-filter"
    36  	router "github.com/pingcap/tidb-tools/pkg/table-router"
    37  	tidbcfg "github.com/pingcap/tidb/config"
    38  	"github.com/tikv/pd/server/api"
    39  	"go.uber.org/zap"
    40  
    41  	"github.com/pingcap/br/pkg/lightning/common"
    42  	"github.com/pingcap/br/pkg/lightning/log"
    43  )
    44  
    45  const (
    46  	// ImportMode defines mode of import for tikv.
    47  	ImportMode = "import"
    48  	// NormalMode defines mode of normal for tikv.
    49  	NormalMode = "normal"
    50  
    51  	// BackendTiDB is a constant for choosing the "TiDB" backend in the configuration.
    52  	BackendTiDB = "tidb"
    53  	// BackendImporter is a constant for choosing the "Importer" backend in the configuration.
    54  	BackendImporter = "importer"
    55  	// BackendLocal is a constant for choosing the "Local" backup in the configuration.
    56  	// In this mode, we write & sort kv pairs with local storage and directly write them to tikv.
    57  	BackendLocal = "local"
    58  
    59  	// CheckpointDriverMySQL is a constant for choosing the "MySQL" checkpoint driver in the configuration.
    60  	CheckpointDriverMySQL = "mysql"
    61  	// CheckpointDriverFile is a constant for choosing the "File" checkpoint driver in the configuration.
    62  	CheckpointDriverFile = "file"
    63  
    64  	// ReplaceOnDup indicates using REPLACE INTO to insert data
    65  	ReplaceOnDup = "replace"
    66  	// IgnoreOnDup indicates using INSERT IGNORE INTO to insert data
    67  	IgnoreOnDup = "ignore"
    68  	// ErrorOnDup indicates using INSERT INTO to insert data, which would violate PK or UNIQUE constraint
    69  	ErrorOnDup = "error"
    70  
    71  	defaultDistSQLScanConcurrency     = 15
    72  	distSQLScanConcurrencyPerStore    = 4
    73  	defaultBuildStatsConcurrency      = 20
    74  	defaultIndexSerialScanConcurrency = 20
    75  	defaultChecksumTableConcurrency   = 2
    76  	defaultTableConcurrency           = 6
    77  	defaultIndexConcurrency           = 2
    78  
    79  	// defaultMetaSchemaName is the default database name used to store lightning metadata
    80  	defaultMetaSchemaName = "lightning_metadata"
    81  
    82  	// autoDiskQuotaLocalReservedSpeed is the estimated size increase per
    83  	// millisecond per write thread the local backend may gain on all engines.
    84  	// This is used to compute the maximum size overshoot between two disk quota
    85  	// checks, if the first one has barely passed.
    86  	//
    87  	// With cron.check-disk-quota = 1m, region-concurrency = 40, this should
    88  	// contribute 2.3 GiB to the reserved size.
    89  	autoDiskQuotaLocalReservedSpeed uint64 = 1 * units.KiB
    90  	defaultEngineMemCacheSize              = 512 * units.MiB
    91  	defaultLocalWriterMemCacheSize         = 128 * units.MiB
    92  
    93  	maxRetryTimes           = 4
    94  	defaultRetryBackoffTime = 100 * time.Millisecond
    95  	pdStores                = "/pd/api/v1/stores"
    96  )
    97  
    98  var (
    99  	supportedStorageTypes = []string{"file", "local", "s3", "noop", "gcs"}
   100  
   101  	DefaultFilter = []string{
   102  		"*.*",
   103  		"!mysql.*",
   104  		"!sys.*",
   105  		"!INFORMATION_SCHEMA.*",
   106  		"!PERFORMANCE_SCHEMA.*",
   107  		"!METRICS_SCHEMA.*",
   108  		"!INSPECTION_SCHEMA.*",
   109  	}
   110  )
   111  
   112  type DBStore struct {
   113  	Host       string    `toml:"host" json:"host"`
   114  	Port       int       `toml:"port" json:"port"`
   115  	User       string    `toml:"user" json:"user"`
   116  	Psw        string    `toml:"password" json:"-"`
   117  	StatusPort int       `toml:"status-port" json:"status-port"`
   118  	PdAddr     string    `toml:"pd-addr" json:"pd-addr"`
   119  	StrSQLMode string    `toml:"sql-mode" json:"sql-mode"`
   120  	TLS        string    `toml:"tls" json:"tls"`
   121  	Security   *Security `toml:"security" json:"security"`
   122  
   123  	SQLMode          mysql.SQLMode `toml:"-" json:"-"`
   124  	MaxAllowedPacket uint64        `toml:"max-allowed-packet" json:"max-allowed-packet"`
   125  
   126  	DistSQLScanConcurrency     int `toml:"distsql-scan-concurrency" json:"distsql-scan-concurrency"`
   127  	BuildStatsConcurrency      int `toml:"build-stats-concurrency" json:"build-stats-concurrency"`
   128  	IndexSerialScanConcurrency int `toml:"index-serial-scan-concurrency" json:"index-serial-scan-concurrency"`
   129  	ChecksumTableConcurrency   int `toml:"checksum-table-concurrency" json:"checksum-table-concurrency"`
   130  }
   131  
   132  type Config struct {
   133  	TaskID int64 `toml:"-" json:"id"`
   134  
   135  	App  Lightning `toml:"lightning" json:"lightning"`
   136  	TiDB DBStore   `toml:"tidb" json:"tidb"`
   137  
   138  	Checkpoint   Checkpoint          `toml:"checkpoint" json:"checkpoint"`
   139  	Mydumper     MydumperRuntime     `toml:"mydumper" json:"mydumper"`
   140  	TikvImporter TikvImporter        `toml:"tikv-importer" json:"tikv-importer"`
   141  	PostRestore  PostRestore         `toml:"post-restore" json:"post-restore"`
   142  	Cron         Cron                `toml:"cron" json:"cron"`
   143  	Routes       []*router.TableRule `toml:"routes" json:"routes"`
   144  	Security     Security            `toml:"security" json:"security"`
   145  
   146  	BWList filter.MySQLReplicationRules `toml:"black-white-list" json:"black-white-list"`
   147  }
   148  
   149  func (cfg *Config) String() string {
   150  	bytes, err := json.Marshal(cfg)
   151  	if err != nil {
   152  		log.L().Error("marshal config to json error", log.ShortError(err))
   153  	}
   154  	return string(bytes)
   155  }
   156  
   157  func (cfg *Config) ToTLS() (*common.TLS, error) {
   158  	hostPort := net.JoinHostPort(cfg.TiDB.Host, strconv.Itoa(cfg.TiDB.StatusPort))
   159  	return common.NewTLS(cfg.Security.CAPath, cfg.Security.CertPath, cfg.Security.KeyPath, hostPort)
   160  }
   161  
   162  type Lightning struct {
   163  	TableConcurrency  int    `toml:"table-concurrency" json:"table-concurrency"`
   164  	IndexConcurrency  int    `toml:"index-concurrency" json:"index-concurrency"`
   165  	RegionConcurrency int    `toml:"region-concurrency" json:"region-concurrency"`
   166  	IOConcurrency     int    `toml:"io-concurrency" json:"io-concurrency"`
   167  	CheckRequirements bool   `toml:"check-requirements" json:"check-requirements"`
   168  	MetaSchemaName    string `toml:"meta-schema-name" json:"meta-schema-name"`
   169  }
   170  
   171  type PostOpLevel int
   172  
   173  const (
   174  	OpLevelOff PostOpLevel = iota
   175  	OpLevelOptional
   176  	OpLevelRequired
   177  )
   178  
   179  func (t *PostOpLevel) UnmarshalTOML(v interface{}) error {
   180  	switch val := v.(type) {
   181  	case bool:
   182  		if val {
   183  			*t = OpLevelRequired
   184  		} else {
   185  			*t = OpLevelOff
   186  		}
   187  	case string:
   188  		return t.FromStringValue(val)
   189  	default:
   190  		return errors.Errorf("invalid op level '%v', please choose valid option between ['off', 'optional', 'required']", v)
   191  	}
   192  	return nil
   193  }
   194  
   195  func (t PostOpLevel) MarshalText() ([]byte, error) {
   196  	return []byte(t.String()), nil
   197  }
   198  
   199  // parser command line parameter
   200  func (t *PostOpLevel) FromStringValue(s string) error {
   201  	switch strings.ToLower(s) {
   202  	//nolint:goconst // This 'false' and other 'false's aren't the same.
   203  	case "off", "false":
   204  		*t = OpLevelOff
   205  	case "required", "true":
   206  		*t = OpLevelRequired
   207  	case "optional":
   208  		*t = OpLevelOptional
   209  	default:
   210  		return errors.Errorf("invalid op level '%s', please choose valid option between ['off', 'optional', 'required']", s)
   211  	}
   212  	return nil
   213  }
   214  
   215  func (t *PostOpLevel) MarshalJSON() ([]byte, error) {
   216  	return []byte(`"` + t.String() + `"`), nil
   217  }
   218  
   219  func (t *PostOpLevel) UnmarshalJSON(data []byte) error {
   220  	return t.FromStringValue(strings.Trim(string(data), `"`))
   221  }
   222  
   223  func (t PostOpLevel) String() string {
   224  	switch t {
   225  	case OpLevelOff:
   226  		return "off"
   227  	case OpLevelOptional:
   228  		return "optional"
   229  	case OpLevelRequired:
   230  		return "required"
   231  	default:
   232  		panic(fmt.Sprintf("invalid post process type '%d'", t))
   233  	}
   234  }
   235  
   236  // PostRestore has some options which will be executed after kv restored.
   237  type PostRestore struct {
   238  	Checksum          PostOpLevel `toml:"checksum" json:"checksum"`
   239  	Analyze           PostOpLevel `toml:"analyze" json:"analyze"`
   240  	Level1Compact     bool        `toml:"level-1-compact" json:"level-1-compact"`
   241  	PostProcessAtLast bool        `toml:"post-process-at-last" json:"post-process-at-last"`
   242  	Compact           bool        `toml:"compact" json:"compact"`
   243  }
   244  
   245  type CSVConfig struct {
   246  	Separator       string `toml:"separator" json:"separator"`
   247  	Delimiter       string `toml:"delimiter" json:"delimiter"`
   248  	Terminator      string `toml:"terminator" json:"terminator"`
   249  	Null            string `toml:"null" json:"null"`
   250  	Header          bool   `toml:"header" json:"header"`
   251  	TrimLastSep     bool   `toml:"trim-last-separator" json:"trim-last-separator"`
   252  	NotNull         bool   `toml:"not-null" json:"not-null"`
   253  	BackslashEscape bool   `toml:"backslash-escape" json:"backslash-escape"`
   254  }
   255  
   256  type MydumperRuntime struct {
   257  	ReadBlockSize    ByteSize         `toml:"read-block-size" json:"read-block-size"`
   258  	BatchSize        ByteSize         `toml:"batch-size" json:"batch-size"`
   259  	BatchImportRatio float64          `toml:"batch-import-ratio" json:"batch-import-ratio"`
   260  	SourceDir        string           `toml:"data-source-dir" json:"data-source-dir"`
   261  	CharacterSet     string           `toml:"character-set" json:"character-set"`
   262  	CSV              CSVConfig        `toml:"csv" json:"csv"`
   263  	MaxRegionSize    ByteSize         `toml:"max-region-size" json:"max-region-size"`
   264  	Filter           []string         `toml:"filter" json:"filter"`
   265  	FileRouters      []*FileRouteRule `toml:"files" json:"files"`
   266  	// Deprecated: only used to keep the compatibility.
   267  	NoSchema         bool             `toml:"no-schema" json:"no-schema"`
   268  	CaseSensitive    bool             `toml:"case-sensitive" json:"case-sensitive"`
   269  	StrictFormat     bool             `toml:"strict-format" json:"strict-format"`
   270  	DefaultFileRules bool             `toml:"default-file-rules" json:"default-file-rules"`
   271  	IgnoreColumns    AllIgnoreColumns `toml:"ignore-data-columns" json:"ignore-data-columns"`
   272  }
   273  
   274  type AllIgnoreColumns []*IgnoreColumns
   275  
   276  type IgnoreColumns struct {
   277  	DB          string   `toml:"db" json:"db"`
   278  	Table       string   `toml:"table" json:"table"`
   279  	TableFilter []string `toml:"table-filter" json:"table-filter"`
   280  	Columns     []string `toml:"columns" json:"columns"`
   281  }
   282  
   283  // GetIgnoreColumns gets Ignore config by schema name/regex and table name/regex.
   284  func (igCols AllIgnoreColumns) GetIgnoreColumns(db string, table string, caseSensitive bool) (*IgnoreColumns, error) {
   285  	if !caseSensitive {
   286  		db = strings.ToLower(db)
   287  		table = strings.ToLower(table)
   288  	}
   289  	for i, ig := range igCols {
   290  		if ig.DB == db && ig.Table == table {
   291  			return igCols[i], nil
   292  		}
   293  		f, err := filter.Parse(ig.TableFilter)
   294  		if err != nil {
   295  			return nil, errors.Trace(err)
   296  		}
   297  		if f.MatchTable(db, table) {
   298  			return igCols[i], nil
   299  		}
   300  	}
   301  	return &IgnoreColumns{Columns: make([]string, 0)}, nil
   302  }
   303  
   304  type FileRouteRule struct {
   305  	Pattern     string `json:"pattern" toml:"pattern" yaml:"pattern"`
   306  	Path        string `json:"path" toml:"path" yaml:"path"`
   307  	Schema      string `json:"schema" toml:"schema" yaml:"schema"`
   308  	Table       string `json:"table" toml:"table" yaml:"table"`
   309  	Type        string `json:"type" toml:"type" yaml:"type"`
   310  	Key         string `json:"key" toml:"key" yaml:"key"`
   311  	Compression string `json:"compression" toml:"compression" yaml:"compression"`
   312  }
   313  
   314  type TikvImporter struct {
   315  	Addr               string   `toml:"addr" json:"addr"`
   316  	Backend            string   `toml:"backend" json:"backend"`
   317  	OnDuplicate        string   `toml:"on-duplicate" json:"on-duplicate"`
   318  	MaxKVPairs         int      `toml:"max-kv-pairs" json:"max-kv-pairs"`
   319  	SendKVPairs        int      `toml:"send-kv-pairs" json:"send-kv-pairs"`
   320  	RegionSplitSize    ByteSize `toml:"region-split-size" json:"region-split-size"`
   321  	SortedKVDir        string   `toml:"sorted-kv-dir" json:"sorted-kv-dir"`
   322  	DiskQuota          ByteSize `toml:"disk-quota" json:"disk-quota"`
   323  	RangeConcurrency   int      `toml:"range-concurrency" json:"range-concurrency"`
   324  	DuplicateDetection bool     `toml:"duplicate-detection" json:"duplicate-detection"`
   325  
   326  	EngineMemCacheSize      ByteSize `toml:"engine-mem-cache-size" json:"engine-mem-cache-size"`
   327  	LocalWriterMemCacheSize ByteSize `toml:"local-writer-mem-cache-size" json:"local-writer-mem-cache-size"`
   328  }
   329  
   330  type Checkpoint struct {
   331  	Schema           string `toml:"schema" json:"schema"`
   332  	DSN              string `toml:"dsn" json:"-"` // DSN may contain password, don't expose this to JSON.
   333  	Driver           string `toml:"driver" json:"driver"`
   334  	Enable           bool   `toml:"enable" json:"enable"`
   335  	KeepAfterSuccess bool   `toml:"keep-after-success" json:"keep-after-success"`
   336  }
   337  
   338  type Cron struct {
   339  	SwitchMode     Duration `toml:"switch-mode" json:"switch-mode"`
   340  	LogProgress    Duration `toml:"log-progress" json:"log-progress"`
   341  	CheckDiskQuota Duration `toml:"check-disk-quota" json:"check-disk-quota"`
   342  }
   343  
   344  type Security struct {
   345  	CAPath   string `toml:"ca-path" json:"ca-path"`
   346  	CertPath string `toml:"cert-path" json:"cert-path"`
   347  	KeyPath  string `toml:"key-path" json:"key-path"`
   348  	// RedactInfoLog indicates that whether enabling redact log
   349  	RedactInfoLog bool `toml:"redact-info-log" json:"redact-info-log"`
   350  }
   351  
   352  // RegistersMySQL registers (or deregisters) the TLS config with name "cluster"
   353  // for use in `sql.Open()`. This method is goroutine-safe.
   354  func (sec *Security) RegisterMySQL() error {
   355  	if sec == nil {
   356  		return nil
   357  	}
   358  	tlsConfig, err := common.ToTLSConfig(sec.CAPath, sec.CertPath, sec.KeyPath)
   359  	switch {
   360  	case err != nil:
   361  		return errors.Trace(err)
   362  	case tlsConfig != nil:
   363  		// error happens only when the key coincides with the built-in names.
   364  		_ = gomysql.RegisterTLSConfig("cluster", tlsConfig)
   365  	default:
   366  		gomysql.DeregisterTLSConfig("cluster")
   367  	}
   368  	return nil
   369  }
   370  
   371  // A duration which can be deserialized from a TOML string.
   372  // Implemented as https://github.com/BurntSushi/toml#using-the-encodingtextunmarshaler-interface
   373  type Duration struct {
   374  	time.Duration
   375  }
   376  
   377  func (d *Duration) UnmarshalText(text []byte) error {
   378  	var err error
   379  	d.Duration, err = time.ParseDuration(string(text))
   380  	return errors.Trace(err)
   381  }
   382  
   383  func (d Duration) MarshalText() ([]byte, error) {
   384  	return []byte(d.String()), nil
   385  }
   386  
   387  func (d *Duration) MarshalJSON() ([]byte, error) {
   388  	return []byte(fmt.Sprintf(`"%s"`, d.Duration)), nil
   389  }
   390  
   391  func NewConfig() *Config {
   392  	return &Config{
   393  		App: Lightning{
   394  			RegionConcurrency: runtime.NumCPU(),
   395  			TableConcurrency:  0,
   396  			IndexConcurrency:  0,
   397  			IOConcurrency:     5,
   398  			CheckRequirements: true,
   399  		},
   400  		Checkpoint: Checkpoint{
   401  			Enable: true,
   402  		},
   403  		TiDB: DBStore{
   404  			Host:                       "127.0.0.1",
   405  			User:                       "root",
   406  			StatusPort:                 10080,
   407  			StrSQLMode:                 "ONLY_FULL_GROUP_BY,NO_AUTO_CREATE_USER",
   408  			MaxAllowedPacket:           defaultMaxAllowedPacket,
   409  			BuildStatsConcurrency:      defaultBuildStatsConcurrency,
   410  			DistSQLScanConcurrency:     defaultDistSQLScanConcurrency,
   411  			IndexSerialScanConcurrency: defaultIndexSerialScanConcurrency,
   412  			ChecksumTableConcurrency:   defaultChecksumTableConcurrency,
   413  		},
   414  		Cron: Cron{
   415  			SwitchMode:     Duration{Duration: 5 * time.Minute},
   416  			LogProgress:    Duration{Duration: 5 * time.Minute},
   417  			CheckDiskQuota: Duration{Duration: 1 * time.Minute},
   418  		},
   419  		Mydumper: MydumperRuntime{
   420  			ReadBlockSize: ReadBlockSize,
   421  			CSV: CSVConfig{
   422  				Separator:       ",",
   423  				Delimiter:       `"`,
   424  				Header:          true,
   425  				NotNull:         false,
   426  				Null:            `\N`,
   427  				BackslashEscape: true,
   428  				TrimLastSep:     false,
   429  			},
   430  			StrictFormat:  false,
   431  			MaxRegionSize: MaxRegionSize,
   432  			Filter:        DefaultFilter,
   433  		},
   434  		TikvImporter: TikvImporter{
   435  			Backend:         "",
   436  			OnDuplicate:     ReplaceOnDup,
   437  			MaxKVPairs:      4096,
   438  			SendKVPairs:     32768,
   439  			RegionSplitSize: SplitRegionSize,
   440  			DiskQuota:       ByteSize(math.MaxInt64),
   441  		},
   442  		PostRestore: PostRestore{
   443  			Checksum:          OpLevelRequired,
   444  			Analyze:           OpLevelOptional,
   445  			PostProcessAtLast: true,
   446  		},
   447  	}
   448  }
   449  
   450  // LoadFromGlobal resets the current configuration to the global settings.
   451  func (cfg *Config) LoadFromGlobal(global *GlobalConfig) error {
   452  	if err := cfg.LoadFromTOML(global.ConfigFileContent); err != nil {
   453  		return err
   454  	}
   455  
   456  	cfg.TiDB.Host = global.TiDB.Host
   457  	cfg.TiDB.Port = global.TiDB.Port
   458  	cfg.TiDB.User = global.TiDB.User
   459  	cfg.TiDB.Psw = global.TiDB.Psw
   460  	cfg.TiDB.StatusPort = global.TiDB.StatusPort
   461  	cfg.TiDB.PdAddr = global.TiDB.PdAddr
   462  	cfg.Mydumper.NoSchema = global.Mydumper.NoSchema
   463  	cfg.Mydumper.SourceDir = global.Mydumper.SourceDir
   464  	cfg.Mydumper.Filter = global.Mydumper.Filter
   465  	cfg.TikvImporter.Addr = global.TikvImporter.Addr
   466  	cfg.TikvImporter.Backend = global.TikvImporter.Backend
   467  	cfg.TikvImporter.SortedKVDir = global.TikvImporter.SortedKVDir
   468  	cfg.Checkpoint.Enable = global.Checkpoint.Enable
   469  	cfg.PostRestore.Checksum = global.PostRestore.Checksum
   470  	cfg.PostRestore.Analyze = global.PostRestore.Analyze
   471  	cfg.App.CheckRequirements = global.App.CheckRequirements
   472  	cfg.Security = global.Security
   473  	cfg.Mydumper.IgnoreColumns = global.Mydumper.IgnoreColumns
   474  	return nil
   475  }
   476  
   477  // LoadFromTOML overwrites the current configuration by the TOML data
   478  // If data contains toml items not in Config and GlobalConfig, return an error
   479  // If data contains toml items not in Config, thus won't take effect, warn user
   480  func (cfg *Config) LoadFromTOML(data []byte) error {
   481  	// bothUnused saves toml items not belong to Config nor GlobalConfig
   482  	var bothUnused []string
   483  	// warnItems saves legal toml items but won't effect
   484  	var warnItems []string
   485  
   486  	dataStr := string(data)
   487  
   488  	// Here we load toml into cfg, and rest logic is check unused keys
   489  	metaData, err := toml.Decode(dataStr, cfg)
   490  	if err != nil {
   491  		return errors.Trace(err)
   492  	}
   493  
   494  	unusedConfigKeys := metaData.Undecoded()
   495  	if len(unusedConfigKeys) == 0 {
   496  		return nil
   497  	}
   498  
   499  	// Now we deal with potential both-unused keys of Config and GlobalConfig struct
   500  
   501  	metaDataGlobal, err := toml.Decode(dataStr, &GlobalConfig{})
   502  	if err != nil {
   503  		return errors.Trace(err)
   504  	}
   505  
   506  	// Key type returned by metadata.Undecoded doesn't have a equality comparison,
   507  	// we convert them to string type instead, and this conversion is identical
   508  	unusedGlobalKeys := metaDataGlobal.Undecoded()
   509  	unusedGlobalKeyStrs := make(map[string]struct{})
   510  	for _, key := range unusedGlobalKeys {
   511  		unusedGlobalKeyStrs[key.String()] = struct{}{}
   512  	}
   513  
   514  	for _, key := range unusedConfigKeys {
   515  		keyStr := key.String()
   516  		if _, found := unusedGlobalKeyStrs[keyStr]; found {
   517  			bothUnused = append(bothUnused, keyStr)
   518  		} else {
   519  			warnItems = append(warnItems, keyStr)
   520  		}
   521  	}
   522  
   523  	if len(bothUnused) > 0 {
   524  		return errors.Errorf("config file contained unknown configuration options: %s",
   525  			strings.Join(bothUnused, ", "))
   526  	}
   527  
   528  	// Warn that some legal field of config file won't be overwritten, such as lightning.file
   529  	if len(warnItems) > 0 {
   530  		log.L().Warn("currently only per-task configuration can be applied, global configuration changes can only be made on startup",
   531  			zap.Strings("global config changes", warnItems))
   532  	}
   533  
   534  	return nil
   535  }
   536  
   537  // Adjust fixes the invalid or unspecified settings to reasonable valid values.
   538  func (cfg *Config) Adjust(ctx context.Context) error {
   539  	// Reject problematic CSV configurations.
   540  	csv := &cfg.Mydumper.CSV
   541  	if len(csv.Separator) == 0 {
   542  		return errors.New("invalid config: `mydumper.csv.separator` must not be empty")
   543  	}
   544  
   545  	if len(csv.Delimiter) > 0 && (strings.HasPrefix(csv.Separator, csv.Delimiter) || strings.HasPrefix(csv.Delimiter, csv.Separator)) {
   546  		return errors.New("invalid config: `mydumper.csv.separator` and `mydumper.csv.delimiter` must not be prefix of each other")
   547  	}
   548  
   549  	if csv.BackslashEscape {
   550  		if csv.Separator == `\` {
   551  			return errors.New("invalid config: cannot use '\\' as CSV separator when `mydumper.csv.backslash-escape` is true")
   552  		}
   553  		if csv.Delimiter == `\` {
   554  			return errors.New("invalid config: cannot use '\\' as CSV delimiter when `mydumper.csv.backslash-escape` is true")
   555  		}
   556  		if csv.Terminator == `\` {
   557  			return errors.New("invalid config: cannot use '\\' as CSV terminator when `mydumper.csv.backslash-escape` is true")
   558  		}
   559  	}
   560  
   561  	// adjust file routing
   562  	for _, rule := range cfg.Mydumper.FileRouters {
   563  		if filepath.IsAbs(rule.Path) {
   564  			relPath, err := filepath.Rel(cfg.Mydumper.SourceDir, rule.Path)
   565  			if err != nil {
   566  				return errors.Trace(err)
   567  			}
   568  			// ".." means that this path is not in source dir, so we should return an error
   569  			if strings.HasPrefix(relPath, "..") {
   570  				return errors.Errorf("file route path '%s' is not in source dir '%s'", rule.Path, cfg.Mydumper.SourceDir)
   571  			}
   572  			rule.Path = relPath
   573  		}
   574  	}
   575  
   576  	// enable default file route rule if no rules are set
   577  	if len(cfg.Mydumper.FileRouters) == 0 {
   578  		cfg.Mydumper.DefaultFileRules = true
   579  	}
   580  
   581  	if cfg.TikvImporter.Backend == "" {
   582  		return errors.New("tikv-importer.backend must not be empty!")
   583  	}
   584  	cfg.TikvImporter.Backend = strings.ToLower(cfg.TikvImporter.Backend)
   585  	mustHaveInternalConnections := true
   586  	switch cfg.TikvImporter.Backend {
   587  	case BackendTiDB:
   588  		cfg.DefaultVarsForTiDBBackend()
   589  		mustHaveInternalConnections = false
   590  		cfg.PostRestore.Checksum = OpLevelOff
   591  		cfg.PostRestore.Analyze = OpLevelOff
   592  		cfg.TikvImporter.DuplicateDetection = false
   593  	case BackendImporter, BackendLocal:
   594  		// RegionConcurrency > NumCPU is meaningless.
   595  		cpuCount := runtime.NumCPU()
   596  		if cfg.App.RegionConcurrency > cpuCount {
   597  			cfg.App.RegionConcurrency = cpuCount
   598  		}
   599  		cfg.DefaultVarsForImporterAndLocalBackend(ctx)
   600  	default:
   601  		return errors.Errorf("invalid config: unsupported `tikv-importer.backend` (%s)", cfg.TikvImporter.Backend)
   602  	}
   603  
   604  	// TODO calculate these from the machine's free memory.
   605  	if cfg.TikvImporter.EngineMemCacheSize == 0 {
   606  		cfg.TikvImporter.EngineMemCacheSize = defaultEngineMemCacheSize
   607  	}
   608  	if cfg.TikvImporter.LocalWriterMemCacheSize == 0 {
   609  		cfg.TikvImporter.LocalWriterMemCacheSize = defaultLocalWriterMemCacheSize
   610  	}
   611  
   612  	if cfg.TikvImporter.Backend == BackendLocal {
   613  		if err := cfg.CheckAndAdjustForLocalBackend(); err != nil {
   614  			return err
   615  		}
   616  	} else if cfg.TikvImporter.DuplicateDetection {
   617  		return errors.Errorf("invalid config: unsupported backend (%s) for duplicate-detection", cfg.TikvImporter.Backend)
   618  	}
   619  
   620  	if cfg.TikvImporter.Backend == BackendTiDB {
   621  		cfg.TikvImporter.OnDuplicate = strings.ToLower(cfg.TikvImporter.OnDuplicate)
   622  		switch cfg.TikvImporter.OnDuplicate {
   623  		case ReplaceOnDup, IgnoreOnDup, ErrorOnDup:
   624  		default:
   625  			return errors.Errorf("invalid config: unsupported `tikv-importer.on-duplicate` (%s)", cfg.TikvImporter.OnDuplicate)
   626  		}
   627  	}
   628  
   629  	var err error
   630  	cfg.TiDB.SQLMode, err = mysql.GetSQLMode(cfg.TiDB.StrSQLMode)
   631  	if err != nil {
   632  		return errors.Annotate(err, "invalid config: `mydumper.tidb.sql_mode` must be a valid SQL_MODE")
   633  	}
   634  
   635  	if err := cfg.CheckAndAdjustSecurity(); err != nil {
   636  		return err
   637  	}
   638  
   639  	// mydumper.filter and black-white-list cannot co-exist.
   640  	if cfg.HasLegacyBlackWhiteList() {
   641  		log.L().Warn("the config `black-white-list` has been deprecated, please replace with `mydumper.filter`")
   642  		if !common.StringSliceEqual(cfg.Mydumper.Filter, DefaultFilter) {
   643  			return errors.New("invalid config: `mydumper.filter` and `black-white-list` cannot be simultaneously defined")
   644  		}
   645  	}
   646  
   647  	for _, rule := range cfg.Routes {
   648  		if !cfg.Mydumper.CaseSensitive {
   649  			rule.ToLower()
   650  		}
   651  		if err := rule.Valid(); err != nil {
   652  			return errors.Trace(err)
   653  		}
   654  	}
   655  
   656  	if err := cfg.CheckAndAdjustTiDBPort(ctx, mustHaveInternalConnections); err != nil {
   657  		return err
   658  	}
   659  	cfg.AdjustMydumper()
   660  	cfg.AdjustCheckPoint()
   661  	return cfg.CheckAndAdjustFilePath()
   662  }
   663  
   664  func (cfg *Config) CheckAndAdjustForLocalBackend() error {
   665  	if len(cfg.TikvImporter.SortedKVDir) == 0 {
   666  		return errors.Errorf("tikv-importer.sorted-kv-dir must not be empty!")
   667  	}
   668  
   669  	storageSizeDir := filepath.Clean(cfg.TikvImporter.SortedKVDir)
   670  	sortedKVDirInfo, err := os.Stat(storageSizeDir)
   671  
   672  	switch {
   673  	case os.IsNotExist(err):
   674  		// the sorted-kv-dir does not exist, meaning we will create it automatically.
   675  		// so we extract the storage size from its parent directory.
   676  		storageSizeDir = filepath.Dir(storageSizeDir)
   677  	case err == nil:
   678  		if !sortedKVDirInfo.IsDir() {
   679  			return errors.Errorf("tikv-importer.sorted-kv-dir ('%s') is not a directory", storageSizeDir)
   680  		}
   681  	default:
   682  		return errors.Annotate(err, "invalid tikv-importer.sorted-kv-dir")
   683  	}
   684  
   685  	return nil
   686  }
   687  
   688  func (cfg *Config) DefaultVarsForTiDBBackend() {
   689  	if cfg.App.TableConcurrency == 0 {
   690  		cfg.App.TableConcurrency = cfg.App.RegionConcurrency
   691  	}
   692  	if cfg.App.IndexConcurrency == 0 {
   693  		cfg.App.IndexConcurrency = cfg.App.RegionConcurrency
   694  	}
   695  }
   696  
   697  func (cfg *Config) adjustDistSQLConcurrency(ctx context.Context) error {
   698  	tls, err := cfg.ToTLS()
   699  	if err != nil {
   700  		return err
   701  	}
   702  	result := &api.StoresInfo{}
   703  	err = tls.WithHost(cfg.TiDB.PdAddr).GetJSON(ctx, pdStores, result)
   704  	if err != nil {
   705  		return errors.Trace(err)
   706  	}
   707  	cfg.TiDB.DistSQLScanConcurrency = len(result.Stores) * distSQLScanConcurrencyPerStore
   708  	if cfg.TiDB.DistSQLScanConcurrency < defaultDistSQLScanConcurrency {
   709  		cfg.TiDB.DistSQLScanConcurrency = defaultDistSQLScanConcurrency
   710  	}
   711  	log.L().Info("adjust scan concurrency success", zap.Int("DistSQLScanConcurrency", cfg.TiDB.DistSQLScanConcurrency))
   712  	return nil
   713  }
   714  
   715  func (cfg *Config) DefaultVarsForImporterAndLocalBackend(ctx context.Context) {
   716  	if cfg.TiDB.DistSQLScanConcurrency == defaultDistSQLScanConcurrency {
   717  		var e error
   718  		for i := 0; i < maxRetryTimes; i++ {
   719  			e = cfg.adjustDistSQLConcurrency(ctx)
   720  			if e == nil {
   721  				break
   722  			}
   723  			time.Sleep(defaultRetryBackoffTime)
   724  		}
   725  		if e != nil {
   726  			log.L().Error("failed to adjust scan concurrency", zap.Error(e))
   727  		}
   728  	}
   729  
   730  	if cfg.App.IndexConcurrency == 0 {
   731  		cfg.App.IndexConcurrency = defaultIndexConcurrency
   732  	}
   733  	if cfg.App.TableConcurrency == 0 {
   734  		cfg.App.TableConcurrency = defaultTableConcurrency
   735  	}
   736  
   737  	if len(cfg.App.MetaSchemaName) == 0 {
   738  		cfg.App.MetaSchemaName = defaultMetaSchemaName
   739  	}
   740  	if cfg.TikvImporter.RangeConcurrency == 0 {
   741  		cfg.TikvImporter.RangeConcurrency = 16
   742  	}
   743  	if cfg.TikvImporter.RegionSplitSize == 0 {
   744  		cfg.TikvImporter.RegionSplitSize = SplitRegionSize
   745  	}
   746  	if cfg.TiDB.BuildStatsConcurrency == 0 {
   747  		cfg.TiDB.BuildStatsConcurrency = defaultBuildStatsConcurrency
   748  	}
   749  	if cfg.TiDB.IndexSerialScanConcurrency == 0 {
   750  		cfg.TiDB.IndexSerialScanConcurrency = defaultIndexSerialScanConcurrency
   751  	}
   752  	if cfg.TiDB.ChecksumTableConcurrency == 0 {
   753  		cfg.TiDB.ChecksumTableConcurrency = defaultChecksumTableConcurrency
   754  	}
   755  }
   756  
   757  func (cfg *Config) CheckAndAdjustTiDBPort(ctx context.Context, mustHaveInternalConnections bool) error {
   758  	// automatically determine the TiDB port & PD address from TiDB settings
   759  	if mustHaveInternalConnections && (cfg.TiDB.Port <= 0 || len(cfg.TiDB.PdAddr) == 0) {
   760  		tls, err := cfg.ToTLS()
   761  		if err != nil {
   762  			return err
   763  		}
   764  
   765  		var settings tidbcfg.Config
   766  		err = tls.GetJSON(ctx, "/settings", &settings)
   767  		if err != nil {
   768  			return errors.Annotate(err, "cannot fetch settings from TiDB, please manually fill in `tidb.port` and `tidb.pd-addr`")
   769  		}
   770  		if cfg.TiDB.Port <= 0 {
   771  			cfg.TiDB.Port = int(settings.Port)
   772  		}
   773  		if len(cfg.TiDB.PdAddr) == 0 {
   774  			pdAddrs := strings.Split(settings.Path, ",")
   775  			cfg.TiDB.PdAddr = pdAddrs[0] // FIXME support multiple PDs once importer can.
   776  		}
   777  	}
   778  
   779  	if cfg.TiDB.Port <= 0 {
   780  		return errors.New("invalid `tidb.port` setting")
   781  	}
   782  	if mustHaveInternalConnections && len(cfg.TiDB.PdAddr) == 0 {
   783  		return errors.New("invalid `tidb.pd-addr` setting")
   784  	}
   785  	return nil
   786  }
   787  
   788  func (cfg *Config) CheckAndAdjustFilePath() error {
   789  	var u *url.URL
   790  
   791  	// An absolute Windows path like "C:\Users\XYZ" would be interpreted as
   792  	// an URL with scheme "C" and opaque data "\Users\XYZ".
   793  	// Therefore, we only perform URL parsing if we are sure the path is not
   794  	// an absolute Windows path.
   795  	// Here we use the `filepath.VolumeName` which can identify the "C:" part
   796  	// out of the path. On Linux this method always return an empty string.
   797  	// On Windows, the drive letter can only be single letters from "A:" to "Z:",
   798  	// so this won't mistake "S3:" as a Windows path.
   799  	if len(filepath.VolumeName(cfg.Mydumper.SourceDir)) == 0 {
   800  		var err error
   801  		u, err = url.Parse(cfg.Mydumper.SourceDir)
   802  		if err != nil {
   803  			return errors.Trace(err)
   804  		}
   805  	} else {
   806  		u = &url.URL{}
   807  	}
   808  
   809  	// convert path and relative path to a valid file url
   810  	if u.Scheme == "" {
   811  		if !common.IsDirExists(cfg.Mydumper.SourceDir) {
   812  			return errors.Errorf("%s: mydumper dir does not exist", cfg.Mydumper.SourceDir)
   813  		}
   814  		absPath, err := filepath.Abs(cfg.Mydumper.SourceDir)
   815  		if err != nil {
   816  			return errors.Annotatef(err, "covert data-source-dir '%s' to absolute path failed", cfg.Mydumper.SourceDir)
   817  		}
   818  		cfg.Mydumper.SourceDir = "file://" + filepath.ToSlash(absPath)
   819  		u.Path = absPath
   820  		u.Scheme = "file"
   821  	}
   822  
   823  	found := false
   824  	for _, t := range supportedStorageTypes {
   825  		if u.Scheme == t {
   826  			found = true
   827  			break
   828  		}
   829  	}
   830  	if !found {
   831  		return errors.Errorf("Unsupported data-source-dir url '%s'", cfg.Mydumper.SourceDir)
   832  	}
   833  	return nil
   834  }
   835  
   836  func (cfg *Config) AdjustCheckPoint() {
   837  	if len(cfg.Checkpoint.Schema) == 0 {
   838  		cfg.Checkpoint.Schema = "tidb_lightning_checkpoint"
   839  	}
   840  	if len(cfg.Checkpoint.Driver) == 0 {
   841  		cfg.Checkpoint.Driver = CheckpointDriverFile
   842  	}
   843  	if len(cfg.Checkpoint.DSN) == 0 {
   844  		switch cfg.Checkpoint.Driver {
   845  		case CheckpointDriverMySQL:
   846  			param := common.MySQLConnectParam{
   847  				Host:             cfg.TiDB.Host,
   848  				Port:             cfg.TiDB.Port,
   849  				User:             cfg.TiDB.User,
   850  				Password:         cfg.TiDB.Psw,
   851  				SQLMode:          mysql.DefaultSQLMode,
   852  				MaxAllowedPacket: defaultMaxAllowedPacket,
   853  				TLS:              cfg.TiDB.TLS,
   854  			}
   855  			cfg.Checkpoint.DSN = param.ToDSN()
   856  		case CheckpointDriverFile:
   857  			cfg.Checkpoint.DSN = "/tmp/" + cfg.Checkpoint.Schema + ".pb"
   858  		}
   859  	}
   860  }
   861  
   862  func (cfg *Config) AdjustMydumper() {
   863  	if cfg.Mydumper.BatchImportRatio < 0.0 || cfg.Mydumper.BatchImportRatio >= 1.0 {
   864  		cfg.Mydumper.BatchImportRatio = 0.75
   865  	}
   866  	if cfg.Mydumper.ReadBlockSize <= 0 {
   867  		cfg.Mydumper.ReadBlockSize = ReadBlockSize
   868  	}
   869  	if len(cfg.Mydumper.CharacterSet) == 0 {
   870  		cfg.Mydumper.CharacterSet = "auto"
   871  	}
   872  
   873  	if len(cfg.Mydumper.IgnoreColumns) != 0 {
   874  		// Tolower columns cause we use Name.L to compare column in tidb.
   875  		for _, ig := range cfg.Mydumper.IgnoreColumns {
   876  			cols := make([]string, len(ig.Columns))
   877  			for i, col := range ig.Columns {
   878  				cols[i] = strings.ToLower(col)
   879  			}
   880  			ig.Columns = cols
   881  		}
   882  	}
   883  }
   884  
   885  func (cfg *Config) CheckAndAdjustSecurity() error {
   886  	if cfg.TiDB.Security == nil {
   887  		cfg.TiDB.Security = &cfg.Security
   888  	}
   889  
   890  	switch cfg.TiDB.TLS {
   891  	case "":
   892  		if len(cfg.TiDB.Security.CAPath) > 0 {
   893  			cfg.TiDB.TLS = "cluster"
   894  		} else {
   895  			cfg.TiDB.TLS = "false"
   896  		}
   897  	case "cluster":
   898  		if len(cfg.Security.CAPath) == 0 {
   899  			return errors.New("invalid config: cannot set `tidb.tls` to 'cluster' without a [security] section")
   900  		}
   901  	case "false", "skip-verify", "preferred":
   902  		break
   903  	default:
   904  		return errors.Errorf("invalid config: unsupported `tidb.tls` config %s", cfg.TiDB.TLS)
   905  	}
   906  	return nil
   907  }
   908  
   909  // HasLegacyBlackWhiteList checks whether the deprecated [black-white-list] section
   910  // was defined.
   911  func (cfg *Config) HasLegacyBlackWhiteList() bool {
   912  	return len(cfg.BWList.DoTables) != 0 || len(cfg.BWList.DoDBs) != 0 || len(cfg.BWList.IgnoreTables) != 0 || len(cfg.BWList.IgnoreDBs) != 0
   913  }