github.com/pingcap/br@v5.3.0-alpha.0.20220125034240-ec59c7b6ce30+incompatible/pkg/lightning/config/config.go (about) 1 // Copyright 2019 PingCAP, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package config 15 16 import ( 17 "context" 18 "encoding/json" 19 "fmt" 20 "math" 21 "net" 22 "net/url" 23 "os" 24 "path/filepath" 25 "runtime" 26 "strconv" 27 "strings" 28 "time" 29 30 "github.com/BurntSushi/toml" 31 "github.com/docker/go-units" 32 gomysql "github.com/go-sql-driver/mysql" 33 "github.com/pingcap/errors" 34 "github.com/pingcap/parser/mysql" 35 filter "github.com/pingcap/tidb-tools/pkg/table-filter" 36 router "github.com/pingcap/tidb-tools/pkg/table-router" 37 tidbcfg "github.com/pingcap/tidb/config" 38 "github.com/tikv/pd/server/api" 39 "go.uber.org/zap" 40 41 "github.com/pingcap/br/pkg/lightning/common" 42 "github.com/pingcap/br/pkg/lightning/log" 43 ) 44 45 const ( 46 // ImportMode defines mode of import for tikv. 47 ImportMode = "import" 48 // NormalMode defines mode of normal for tikv. 49 NormalMode = "normal" 50 51 // BackendTiDB is a constant for choosing the "TiDB" backend in the configuration. 52 BackendTiDB = "tidb" 53 // BackendImporter is a constant for choosing the "Importer" backend in the configuration. 54 BackendImporter = "importer" 55 // BackendLocal is a constant for choosing the "Local" backup in the configuration. 56 // In this mode, we write & sort kv pairs with local storage and directly write them to tikv. 57 BackendLocal = "local" 58 59 // CheckpointDriverMySQL is a constant for choosing the "MySQL" checkpoint driver in the configuration. 60 CheckpointDriverMySQL = "mysql" 61 // CheckpointDriverFile is a constant for choosing the "File" checkpoint driver in the configuration. 62 CheckpointDriverFile = "file" 63 64 // ReplaceOnDup indicates using REPLACE INTO to insert data 65 ReplaceOnDup = "replace" 66 // IgnoreOnDup indicates using INSERT IGNORE INTO to insert data 67 IgnoreOnDup = "ignore" 68 // ErrorOnDup indicates using INSERT INTO to insert data, which would violate PK or UNIQUE constraint 69 ErrorOnDup = "error" 70 71 defaultDistSQLScanConcurrency = 15 72 distSQLScanConcurrencyPerStore = 4 73 defaultBuildStatsConcurrency = 20 74 defaultIndexSerialScanConcurrency = 20 75 defaultChecksumTableConcurrency = 2 76 defaultTableConcurrency = 6 77 defaultIndexConcurrency = 2 78 79 // defaultMetaSchemaName is the default database name used to store lightning metadata 80 defaultMetaSchemaName = "lightning_metadata" 81 82 // autoDiskQuotaLocalReservedSpeed is the estimated size increase per 83 // millisecond per write thread the local backend may gain on all engines. 84 // This is used to compute the maximum size overshoot between two disk quota 85 // checks, if the first one has barely passed. 86 // 87 // With cron.check-disk-quota = 1m, region-concurrency = 40, this should 88 // contribute 2.3 GiB to the reserved size. 89 autoDiskQuotaLocalReservedSpeed uint64 = 1 * units.KiB 90 defaultEngineMemCacheSize = 512 * units.MiB 91 defaultLocalWriterMemCacheSize = 128 * units.MiB 92 93 maxRetryTimes = 4 94 defaultRetryBackoffTime = 100 * time.Millisecond 95 pdStores = "/pd/api/v1/stores" 96 ) 97 98 var ( 99 supportedStorageTypes = []string{"file", "local", "s3", "noop", "gcs"} 100 101 DefaultFilter = []string{ 102 "*.*", 103 "!mysql.*", 104 "!sys.*", 105 "!INFORMATION_SCHEMA.*", 106 "!PERFORMANCE_SCHEMA.*", 107 "!METRICS_SCHEMA.*", 108 "!INSPECTION_SCHEMA.*", 109 } 110 ) 111 112 type DBStore struct { 113 Host string `toml:"host" json:"host"` 114 Port int `toml:"port" json:"port"` 115 User string `toml:"user" json:"user"` 116 Psw string `toml:"password" json:"-"` 117 StatusPort int `toml:"status-port" json:"status-port"` 118 PdAddr string `toml:"pd-addr" json:"pd-addr"` 119 StrSQLMode string `toml:"sql-mode" json:"sql-mode"` 120 TLS string `toml:"tls" json:"tls"` 121 Security *Security `toml:"security" json:"security"` 122 123 SQLMode mysql.SQLMode `toml:"-" json:"-"` 124 MaxAllowedPacket uint64 `toml:"max-allowed-packet" json:"max-allowed-packet"` 125 126 DistSQLScanConcurrency int `toml:"distsql-scan-concurrency" json:"distsql-scan-concurrency"` 127 BuildStatsConcurrency int `toml:"build-stats-concurrency" json:"build-stats-concurrency"` 128 IndexSerialScanConcurrency int `toml:"index-serial-scan-concurrency" json:"index-serial-scan-concurrency"` 129 ChecksumTableConcurrency int `toml:"checksum-table-concurrency" json:"checksum-table-concurrency"` 130 } 131 132 type Config struct { 133 TaskID int64 `toml:"-" json:"id"` 134 135 App Lightning `toml:"lightning" json:"lightning"` 136 TiDB DBStore `toml:"tidb" json:"tidb"` 137 138 Checkpoint Checkpoint `toml:"checkpoint" json:"checkpoint"` 139 Mydumper MydumperRuntime `toml:"mydumper" json:"mydumper"` 140 TikvImporter TikvImporter `toml:"tikv-importer" json:"tikv-importer"` 141 PostRestore PostRestore `toml:"post-restore" json:"post-restore"` 142 Cron Cron `toml:"cron" json:"cron"` 143 Routes []*router.TableRule `toml:"routes" json:"routes"` 144 Security Security `toml:"security" json:"security"` 145 146 BWList filter.MySQLReplicationRules `toml:"black-white-list" json:"black-white-list"` 147 } 148 149 func (cfg *Config) String() string { 150 bytes, err := json.Marshal(cfg) 151 if err != nil { 152 log.L().Error("marshal config to json error", log.ShortError(err)) 153 } 154 return string(bytes) 155 } 156 157 func (cfg *Config) ToTLS() (*common.TLS, error) { 158 hostPort := net.JoinHostPort(cfg.TiDB.Host, strconv.Itoa(cfg.TiDB.StatusPort)) 159 return common.NewTLS(cfg.Security.CAPath, cfg.Security.CertPath, cfg.Security.KeyPath, hostPort) 160 } 161 162 type Lightning struct { 163 TableConcurrency int `toml:"table-concurrency" json:"table-concurrency"` 164 IndexConcurrency int `toml:"index-concurrency" json:"index-concurrency"` 165 RegionConcurrency int `toml:"region-concurrency" json:"region-concurrency"` 166 IOConcurrency int `toml:"io-concurrency" json:"io-concurrency"` 167 CheckRequirements bool `toml:"check-requirements" json:"check-requirements"` 168 MetaSchemaName string `toml:"meta-schema-name" json:"meta-schema-name"` 169 } 170 171 type PostOpLevel int 172 173 const ( 174 OpLevelOff PostOpLevel = iota 175 OpLevelOptional 176 OpLevelRequired 177 ) 178 179 func (t *PostOpLevel) UnmarshalTOML(v interface{}) error { 180 switch val := v.(type) { 181 case bool: 182 if val { 183 *t = OpLevelRequired 184 } else { 185 *t = OpLevelOff 186 } 187 case string: 188 return t.FromStringValue(val) 189 default: 190 return errors.Errorf("invalid op level '%v', please choose valid option between ['off', 'optional', 'required']", v) 191 } 192 return nil 193 } 194 195 func (t PostOpLevel) MarshalText() ([]byte, error) { 196 return []byte(t.String()), nil 197 } 198 199 // parser command line parameter 200 func (t *PostOpLevel) FromStringValue(s string) error { 201 switch strings.ToLower(s) { 202 //nolint:goconst // This 'false' and other 'false's aren't the same. 203 case "off", "false": 204 *t = OpLevelOff 205 case "required", "true": 206 *t = OpLevelRequired 207 case "optional": 208 *t = OpLevelOptional 209 default: 210 return errors.Errorf("invalid op level '%s', please choose valid option between ['off', 'optional', 'required']", s) 211 } 212 return nil 213 } 214 215 func (t *PostOpLevel) MarshalJSON() ([]byte, error) { 216 return []byte(`"` + t.String() + `"`), nil 217 } 218 219 func (t *PostOpLevel) UnmarshalJSON(data []byte) error { 220 return t.FromStringValue(strings.Trim(string(data), `"`)) 221 } 222 223 func (t PostOpLevel) String() string { 224 switch t { 225 case OpLevelOff: 226 return "off" 227 case OpLevelOptional: 228 return "optional" 229 case OpLevelRequired: 230 return "required" 231 default: 232 panic(fmt.Sprintf("invalid post process type '%d'", t)) 233 } 234 } 235 236 // PostRestore has some options which will be executed after kv restored. 237 type PostRestore struct { 238 Checksum PostOpLevel `toml:"checksum" json:"checksum"` 239 Analyze PostOpLevel `toml:"analyze" json:"analyze"` 240 Level1Compact bool `toml:"level-1-compact" json:"level-1-compact"` 241 PostProcessAtLast bool `toml:"post-process-at-last" json:"post-process-at-last"` 242 Compact bool `toml:"compact" json:"compact"` 243 } 244 245 type CSVConfig struct { 246 Separator string `toml:"separator" json:"separator"` 247 Delimiter string `toml:"delimiter" json:"delimiter"` 248 Terminator string `toml:"terminator" json:"terminator"` 249 Null string `toml:"null" json:"null"` 250 Header bool `toml:"header" json:"header"` 251 TrimLastSep bool `toml:"trim-last-separator" json:"trim-last-separator"` 252 NotNull bool `toml:"not-null" json:"not-null"` 253 BackslashEscape bool `toml:"backslash-escape" json:"backslash-escape"` 254 } 255 256 type MydumperRuntime struct { 257 ReadBlockSize ByteSize `toml:"read-block-size" json:"read-block-size"` 258 BatchSize ByteSize `toml:"batch-size" json:"batch-size"` 259 BatchImportRatio float64 `toml:"batch-import-ratio" json:"batch-import-ratio"` 260 SourceDir string `toml:"data-source-dir" json:"data-source-dir"` 261 CharacterSet string `toml:"character-set" json:"character-set"` 262 CSV CSVConfig `toml:"csv" json:"csv"` 263 MaxRegionSize ByteSize `toml:"max-region-size" json:"max-region-size"` 264 Filter []string `toml:"filter" json:"filter"` 265 FileRouters []*FileRouteRule `toml:"files" json:"files"` 266 // Deprecated: only used to keep the compatibility. 267 NoSchema bool `toml:"no-schema" json:"no-schema"` 268 CaseSensitive bool `toml:"case-sensitive" json:"case-sensitive"` 269 StrictFormat bool `toml:"strict-format" json:"strict-format"` 270 DefaultFileRules bool `toml:"default-file-rules" json:"default-file-rules"` 271 IgnoreColumns AllIgnoreColumns `toml:"ignore-data-columns" json:"ignore-data-columns"` 272 } 273 274 type AllIgnoreColumns []*IgnoreColumns 275 276 type IgnoreColumns struct { 277 DB string `toml:"db" json:"db"` 278 Table string `toml:"table" json:"table"` 279 TableFilter []string `toml:"table-filter" json:"table-filter"` 280 Columns []string `toml:"columns" json:"columns"` 281 } 282 283 // GetIgnoreColumns gets Ignore config by schema name/regex and table name/regex. 284 func (igCols AllIgnoreColumns) GetIgnoreColumns(db string, table string, caseSensitive bool) (*IgnoreColumns, error) { 285 if !caseSensitive { 286 db = strings.ToLower(db) 287 table = strings.ToLower(table) 288 } 289 for i, ig := range igCols { 290 if ig.DB == db && ig.Table == table { 291 return igCols[i], nil 292 } 293 f, err := filter.Parse(ig.TableFilter) 294 if err != nil { 295 return nil, errors.Trace(err) 296 } 297 if f.MatchTable(db, table) { 298 return igCols[i], nil 299 } 300 } 301 return &IgnoreColumns{Columns: make([]string, 0)}, nil 302 } 303 304 type FileRouteRule struct { 305 Pattern string `json:"pattern" toml:"pattern" yaml:"pattern"` 306 Path string `json:"path" toml:"path" yaml:"path"` 307 Schema string `json:"schema" toml:"schema" yaml:"schema"` 308 Table string `json:"table" toml:"table" yaml:"table"` 309 Type string `json:"type" toml:"type" yaml:"type"` 310 Key string `json:"key" toml:"key" yaml:"key"` 311 Compression string `json:"compression" toml:"compression" yaml:"compression"` 312 } 313 314 type TikvImporter struct { 315 Addr string `toml:"addr" json:"addr"` 316 Backend string `toml:"backend" json:"backend"` 317 OnDuplicate string `toml:"on-duplicate" json:"on-duplicate"` 318 MaxKVPairs int `toml:"max-kv-pairs" json:"max-kv-pairs"` 319 SendKVPairs int `toml:"send-kv-pairs" json:"send-kv-pairs"` 320 RegionSplitSize ByteSize `toml:"region-split-size" json:"region-split-size"` 321 SortedKVDir string `toml:"sorted-kv-dir" json:"sorted-kv-dir"` 322 DiskQuota ByteSize `toml:"disk-quota" json:"disk-quota"` 323 RangeConcurrency int `toml:"range-concurrency" json:"range-concurrency"` 324 DuplicateDetection bool `toml:"duplicate-detection" json:"duplicate-detection"` 325 326 EngineMemCacheSize ByteSize `toml:"engine-mem-cache-size" json:"engine-mem-cache-size"` 327 LocalWriterMemCacheSize ByteSize `toml:"local-writer-mem-cache-size" json:"local-writer-mem-cache-size"` 328 } 329 330 type Checkpoint struct { 331 Schema string `toml:"schema" json:"schema"` 332 DSN string `toml:"dsn" json:"-"` // DSN may contain password, don't expose this to JSON. 333 Driver string `toml:"driver" json:"driver"` 334 Enable bool `toml:"enable" json:"enable"` 335 KeepAfterSuccess bool `toml:"keep-after-success" json:"keep-after-success"` 336 } 337 338 type Cron struct { 339 SwitchMode Duration `toml:"switch-mode" json:"switch-mode"` 340 LogProgress Duration `toml:"log-progress" json:"log-progress"` 341 CheckDiskQuota Duration `toml:"check-disk-quota" json:"check-disk-quota"` 342 } 343 344 type Security struct { 345 CAPath string `toml:"ca-path" json:"ca-path"` 346 CertPath string `toml:"cert-path" json:"cert-path"` 347 KeyPath string `toml:"key-path" json:"key-path"` 348 // RedactInfoLog indicates that whether enabling redact log 349 RedactInfoLog bool `toml:"redact-info-log" json:"redact-info-log"` 350 } 351 352 // RegistersMySQL registers (or deregisters) the TLS config with name "cluster" 353 // for use in `sql.Open()`. This method is goroutine-safe. 354 func (sec *Security) RegisterMySQL() error { 355 if sec == nil { 356 return nil 357 } 358 tlsConfig, err := common.ToTLSConfig(sec.CAPath, sec.CertPath, sec.KeyPath) 359 switch { 360 case err != nil: 361 return errors.Trace(err) 362 case tlsConfig != nil: 363 // error happens only when the key coincides with the built-in names. 364 _ = gomysql.RegisterTLSConfig("cluster", tlsConfig) 365 default: 366 gomysql.DeregisterTLSConfig("cluster") 367 } 368 return nil 369 } 370 371 // A duration which can be deserialized from a TOML string. 372 // Implemented as https://github.com/BurntSushi/toml#using-the-encodingtextunmarshaler-interface 373 type Duration struct { 374 time.Duration 375 } 376 377 func (d *Duration) UnmarshalText(text []byte) error { 378 var err error 379 d.Duration, err = time.ParseDuration(string(text)) 380 return errors.Trace(err) 381 } 382 383 func (d Duration) MarshalText() ([]byte, error) { 384 return []byte(d.String()), nil 385 } 386 387 func (d *Duration) MarshalJSON() ([]byte, error) { 388 return []byte(fmt.Sprintf(`"%s"`, d.Duration)), nil 389 } 390 391 func NewConfig() *Config { 392 return &Config{ 393 App: Lightning{ 394 RegionConcurrency: runtime.NumCPU(), 395 TableConcurrency: 0, 396 IndexConcurrency: 0, 397 IOConcurrency: 5, 398 CheckRequirements: true, 399 }, 400 Checkpoint: Checkpoint{ 401 Enable: true, 402 }, 403 TiDB: DBStore{ 404 Host: "127.0.0.1", 405 User: "root", 406 StatusPort: 10080, 407 StrSQLMode: "ONLY_FULL_GROUP_BY,NO_AUTO_CREATE_USER", 408 MaxAllowedPacket: defaultMaxAllowedPacket, 409 BuildStatsConcurrency: defaultBuildStatsConcurrency, 410 DistSQLScanConcurrency: defaultDistSQLScanConcurrency, 411 IndexSerialScanConcurrency: defaultIndexSerialScanConcurrency, 412 ChecksumTableConcurrency: defaultChecksumTableConcurrency, 413 }, 414 Cron: Cron{ 415 SwitchMode: Duration{Duration: 5 * time.Minute}, 416 LogProgress: Duration{Duration: 5 * time.Minute}, 417 CheckDiskQuota: Duration{Duration: 1 * time.Minute}, 418 }, 419 Mydumper: MydumperRuntime{ 420 ReadBlockSize: ReadBlockSize, 421 CSV: CSVConfig{ 422 Separator: ",", 423 Delimiter: `"`, 424 Header: true, 425 NotNull: false, 426 Null: `\N`, 427 BackslashEscape: true, 428 TrimLastSep: false, 429 }, 430 StrictFormat: false, 431 MaxRegionSize: MaxRegionSize, 432 Filter: DefaultFilter, 433 }, 434 TikvImporter: TikvImporter{ 435 Backend: "", 436 OnDuplicate: ReplaceOnDup, 437 MaxKVPairs: 4096, 438 SendKVPairs: 32768, 439 RegionSplitSize: SplitRegionSize, 440 DiskQuota: ByteSize(math.MaxInt64), 441 }, 442 PostRestore: PostRestore{ 443 Checksum: OpLevelRequired, 444 Analyze: OpLevelOptional, 445 PostProcessAtLast: true, 446 }, 447 } 448 } 449 450 // LoadFromGlobal resets the current configuration to the global settings. 451 func (cfg *Config) LoadFromGlobal(global *GlobalConfig) error { 452 if err := cfg.LoadFromTOML(global.ConfigFileContent); err != nil { 453 return err 454 } 455 456 cfg.TiDB.Host = global.TiDB.Host 457 cfg.TiDB.Port = global.TiDB.Port 458 cfg.TiDB.User = global.TiDB.User 459 cfg.TiDB.Psw = global.TiDB.Psw 460 cfg.TiDB.StatusPort = global.TiDB.StatusPort 461 cfg.TiDB.PdAddr = global.TiDB.PdAddr 462 cfg.Mydumper.NoSchema = global.Mydumper.NoSchema 463 cfg.Mydumper.SourceDir = global.Mydumper.SourceDir 464 cfg.Mydumper.Filter = global.Mydumper.Filter 465 cfg.TikvImporter.Addr = global.TikvImporter.Addr 466 cfg.TikvImporter.Backend = global.TikvImporter.Backend 467 cfg.TikvImporter.SortedKVDir = global.TikvImporter.SortedKVDir 468 cfg.Checkpoint.Enable = global.Checkpoint.Enable 469 cfg.PostRestore.Checksum = global.PostRestore.Checksum 470 cfg.PostRestore.Analyze = global.PostRestore.Analyze 471 cfg.App.CheckRequirements = global.App.CheckRequirements 472 cfg.Security = global.Security 473 cfg.Mydumper.IgnoreColumns = global.Mydumper.IgnoreColumns 474 return nil 475 } 476 477 // LoadFromTOML overwrites the current configuration by the TOML data 478 // If data contains toml items not in Config and GlobalConfig, return an error 479 // If data contains toml items not in Config, thus won't take effect, warn user 480 func (cfg *Config) LoadFromTOML(data []byte) error { 481 // bothUnused saves toml items not belong to Config nor GlobalConfig 482 var bothUnused []string 483 // warnItems saves legal toml items but won't effect 484 var warnItems []string 485 486 dataStr := string(data) 487 488 // Here we load toml into cfg, and rest logic is check unused keys 489 metaData, err := toml.Decode(dataStr, cfg) 490 if err != nil { 491 return errors.Trace(err) 492 } 493 494 unusedConfigKeys := metaData.Undecoded() 495 if len(unusedConfigKeys) == 0 { 496 return nil 497 } 498 499 // Now we deal with potential both-unused keys of Config and GlobalConfig struct 500 501 metaDataGlobal, err := toml.Decode(dataStr, &GlobalConfig{}) 502 if err != nil { 503 return errors.Trace(err) 504 } 505 506 // Key type returned by metadata.Undecoded doesn't have a equality comparison, 507 // we convert them to string type instead, and this conversion is identical 508 unusedGlobalKeys := metaDataGlobal.Undecoded() 509 unusedGlobalKeyStrs := make(map[string]struct{}) 510 for _, key := range unusedGlobalKeys { 511 unusedGlobalKeyStrs[key.String()] = struct{}{} 512 } 513 514 for _, key := range unusedConfigKeys { 515 keyStr := key.String() 516 if _, found := unusedGlobalKeyStrs[keyStr]; found { 517 bothUnused = append(bothUnused, keyStr) 518 } else { 519 warnItems = append(warnItems, keyStr) 520 } 521 } 522 523 if len(bothUnused) > 0 { 524 return errors.Errorf("config file contained unknown configuration options: %s", 525 strings.Join(bothUnused, ", ")) 526 } 527 528 // Warn that some legal field of config file won't be overwritten, such as lightning.file 529 if len(warnItems) > 0 { 530 log.L().Warn("currently only per-task configuration can be applied, global configuration changes can only be made on startup", 531 zap.Strings("global config changes", warnItems)) 532 } 533 534 return nil 535 } 536 537 // Adjust fixes the invalid or unspecified settings to reasonable valid values. 538 func (cfg *Config) Adjust(ctx context.Context) error { 539 // Reject problematic CSV configurations. 540 csv := &cfg.Mydumper.CSV 541 if len(csv.Separator) == 0 { 542 return errors.New("invalid config: `mydumper.csv.separator` must not be empty") 543 } 544 545 if len(csv.Delimiter) > 0 && (strings.HasPrefix(csv.Separator, csv.Delimiter) || strings.HasPrefix(csv.Delimiter, csv.Separator)) { 546 return errors.New("invalid config: `mydumper.csv.separator` and `mydumper.csv.delimiter` must not be prefix of each other") 547 } 548 549 if csv.BackslashEscape { 550 if csv.Separator == `\` { 551 return errors.New("invalid config: cannot use '\\' as CSV separator when `mydumper.csv.backslash-escape` is true") 552 } 553 if csv.Delimiter == `\` { 554 return errors.New("invalid config: cannot use '\\' as CSV delimiter when `mydumper.csv.backslash-escape` is true") 555 } 556 if csv.Terminator == `\` { 557 return errors.New("invalid config: cannot use '\\' as CSV terminator when `mydumper.csv.backslash-escape` is true") 558 } 559 } 560 561 // adjust file routing 562 for _, rule := range cfg.Mydumper.FileRouters { 563 if filepath.IsAbs(rule.Path) { 564 relPath, err := filepath.Rel(cfg.Mydumper.SourceDir, rule.Path) 565 if err != nil { 566 return errors.Trace(err) 567 } 568 // ".." means that this path is not in source dir, so we should return an error 569 if strings.HasPrefix(relPath, "..") { 570 return errors.Errorf("file route path '%s' is not in source dir '%s'", rule.Path, cfg.Mydumper.SourceDir) 571 } 572 rule.Path = relPath 573 } 574 } 575 576 // enable default file route rule if no rules are set 577 if len(cfg.Mydumper.FileRouters) == 0 { 578 cfg.Mydumper.DefaultFileRules = true 579 } 580 581 if cfg.TikvImporter.Backend == "" { 582 return errors.New("tikv-importer.backend must not be empty!") 583 } 584 cfg.TikvImporter.Backend = strings.ToLower(cfg.TikvImporter.Backend) 585 mustHaveInternalConnections := true 586 switch cfg.TikvImporter.Backend { 587 case BackendTiDB: 588 cfg.DefaultVarsForTiDBBackend() 589 mustHaveInternalConnections = false 590 cfg.PostRestore.Checksum = OpLevelOff 591 cfg.PostRestore.Analyze = OpLevelOff 592 cfg.TikvImporter.DuplicateDetection = false 593 case BackendImporter, BackendLocal: 594 // RegionConcurrency > NumCPU is meaningless. 595 cpuCount := runtime.NumCPU() 596 if cfg.App.RegionConcurrency > cpuCount { 597 cfg.App.RegionConcurrency = cpuCount 598 } 599 cfg.DefaultVarsForImporterAndLocalBackend(ctx) 600 default: 601 return errors.Errorf("invalid config: unsupported `tikv-importer.backend` (%s)", cfg.TikvImporter.Backend) 602 } 603 604 // TODO calculate these from the machine's free memory. 605 if cfg.TikvImporter.EngineMemCacheSize == 0 { 606 cfg.TikvImporter.EngineMemCacheSize = defaultEngineMemCacheSize 607 } 608 if cfg.TikvImporter.LocalWriterMemCacheSize == 0 { 609 cfg.TikvImporter.LocalWriterMemCacheSize = defaultLocalWriterMemCacheSize 610 } 611 612 if cfg.TikvImporter.Backend == BackendLocal { 613 if err := cfg.CheckAndAdjustForLocalBackend(); err != nil { 614 return err 615 } 616 } else if cfg.TikvImporter.DuplicateDetection { 617 return errors.Errorf("invalid config: unsupported backend (%s) for duplicate-detection", cfg.TikvImporter.Backend) 618 } 619 620 if cfg.TikvImporter.Backend == BackendTiDB { 621 cfg.TikvImporter.OnDuplicate = strings.ToLower(cfg.TikvImporter.OnDuplicate) 622 switch cfg.TikvImporter.OnDuplicate { 623 case ReplaceOnDup, IgnoreOnDup, ErrorOnDup: 624 default: 625 return errors.Errorf("invalid config: unsupported `tikv-importer.on-duplicate` (%s)", cfg.TikvImporter.OnDuplicate) 626 } 627 } 628 629 var err error 630 cfg.TiDB.SQLMode, err = mysql.GetSQLMode(cfg.TiDB.StrSQLMode) 631 if err != nil { 632 return errors.Annotate(err, "invalid config: `mydumper.tidb.sql_mode` must be a valid SQL_MODE") 633 } 634 635 if err := cfg.CheckAndAdjustSecurity(); err != nil { 636 return err 637 } 638 639 // mydumper.filter and black-white-list cannot co-exist. 640 if cfg.HasLegacyBlackWhiteList() { 641 log.L().Warn("the config `black-white-list` has been deprecated, please replace with `mydumper.filter`") 642 if !common.StringSliceEqual(cfg.Mydumper.Filter, DefaultFilter) { 643 return errors.New("invalid config: `mydumper.filter` and `black-white-list` cannot be simultaneously defined") 644 } 645 } 646 647 for _, rule := range cfg.Routes { 648 if !cfg.Mydumper.CaseSensitive { 649 rule.ToLower() 650 } 651 if err := rule.Valid(); err != nil { 652 return errors.Trace(err) 653 } 654 } 655 656 if err := cfg.CheckAndAdjustTiDBPort(ctx, mustHaveInternalConnections); err != nil { 657 return err 658 } 659 cfg.AdjustMydumper() 660 cfg.AdjustCheckPoint() 661 return cfg.CheckAndAdjustFilePath() 662 } 663 664 func (cfg *Config) CheckAndAdjustForLocalBackend() error { 665 if len(cfg.TikvImporter.SortedKVDir) == 0 { 666 return errors.Errorf("tikv-importer.sorted-kv-dir must not be empty!") 667 } 668 669 storageSizeDir := filepath.Clean(cfg.TikvImporter.SortedKVDir) 670 sortedKVDirInfo, err := os.Stat(storageSizeDir) 671 672 switch { 673 case os.IsNotExist(err): 674 // the sorted-kv-dir does not exist, meaning we will create it automatically. 675 // so we extract the storage size from its parent directory. 676 storageSizeDir = filepath.Dir(storageSizeDir) 677 case err == nil: 678 if !sortedKVDirInfo.IsDir() { 679 return errors.Errorf("tikv-importer.sorted-kv-dir ('%s') is not a directory", storageSizeDir) 680 } 681 default: 682 return errors.Annotate(err, "invalid tikv-importer.sorted-kv-dir") 683 } 684 685 return nil 686 } 687 688 func (cfg *Config) DefaultVarsForTiDBBackend() { 689 if cfg.App.TableConcurrency == 0 { 690 cfg.App.TableConcurrency = cfg.App.RegionConcurrency 691 } 692 if cfg.App.IndexConcurrency == 0 { 693 cfg.App.IndexConcurrency = cfg.App.RegionConcurrency 694 } 695 } 696 697 func (cfg *Config) adjustDistSQLConcurrency(ctx context.Context) error { 698 tls, err := cfg.ToTLS() 699 if err != nil { 700 return err 701 } 702 result := &api.StoresInfo{} 703 err = tls.WithHost(cfg.TiDB.PdAddr).GetJSON(ctx, pdStores, result) 704 if err != nil { 705 return errors.Trace(err) 706 } 707 cfg.TiDB.DistSQLScanConcurrency = len(result.Stores) * distSQLScanConcurrencyPerStore 708 if cfg.TiDB.DistSQLScanConcurrency < defaultDistSQLScanConcurrency { 709 cfg.TiDB.DistSQLScanConcurrency = defaultDistSQLScanConcurrency 710 } 711 log.L().Info("adjust scan concurrency success", zap.Int("DistSQLScanConcurrency", cfg.TiDB.DistSQLScanConcurrency)) 712 return nil 713 } 714 715 func (cfg *Config) DefaultVarsForImporterAndLocalBackend(ctx context.Context) { 716 if cfg.TiDB.DistSQLScanConcurrency == defaultDistSQLScanConcurrency { 717 var e error 718 for i := 0; i < maxRetryTimes; i++ { 719 e = cfg.adjustDistSQLConcurrency(ctx) 720 if e == nil { 721 break 722 } 723 time.Sleep(defaultRetryBackoffTime) 724 } 725 if e != nil { 726 log.L().Error("failed to adjust scan concurrency", zap.Error(e)) 727 } 728 } 729 730 if cfg.App.IndexConcurrency == 0 { 731 cfg.App.IndexConcurrency = defaultIndexConcurrency 732 } 733 if cfg.App.TableConcurrency == 0 { 734 cfg.App.TableConcurrency = defaultTableConcurrency 735 } 736 737 if len(cfg.App.MetaSchemaName) == 0 { 738 cfg.App.MetaSchemaName = defaultMetaSchemaName 739 } 740 if cfg.TikvImporter.RangeConcurrency == 0 { 741 cfg.TikvImporter.RangeConcurrency = 16 742 } 743 if cfg.TikvImporter.RegionSplitSize == 0 { 744 cfg.TikvImporter.RegionSplitSize = SplitRegionSize 745 } 746 if cfg.TiDB.BuildStatsConcurrency == 0 { 747 cfg.TiDB.BuildStatsConcurrency = defaultBuildStatsConcurrency 748 } 749 if cfg.TiDB.IndexSerialScanConcurrency == 0 { 750 cfg.TiDB.IndexSerialScanConcurrency = defaultIndexSerialScanConcurrency 751 } 752 if cfg.TiDB.ChecksumTableConcurrency == 0 { 753 cfg.TiDB.ChecksumTableConcurrency = defaultChecksumTableConcurrency 754 } 755 } 756 757 func (cfg *Config) CheckAndAdjustTiDBPort(ctx context.Context, mustHaveInternalConnections bool) error { 758 // automatically determine the TiDB port & PD address from TiDB settings 759 if mustHaveInternalConnections && (cfg.TiDB.Port <= 0 || len(cfg.TiDB.PdAddr) == 0) { 760 tls, err := cfg.ToTLS() 761 if err != nil { 762 return err 763 } 764 765 var settings tidbcfg.Config 766 err = tls.GetJSON(ctx, "/settings", &settings) 767 if err != nil { 768 return errors.Annotate(err, "cannot fetch settings from TiDB, please manually fill in `tidb.port` and `tidb.pd-addr`") 769 } 770 if cfg.TiDB.Port <= 0 { 771 cfg.TiDB.Port = int(settings.Port) 772 } 773 if len(cfg.TiDB.PdAddr) == 0 { 774 pdAddrs := strings.Split(settings.Path, ",") 775 cfg.TiDB.PdAddr = pdAddrs[0] // FIXME support multiple PDs once importer can. 776 } 777 } 778 779 if cfg.TiDB.Port <= 0 { 780 return errors.New("invalid `tidb.port` setting") 781 } 782 if mustHaveInternalConnections && len(cfg.TiDB.PdAddr) == 0 { 783 return errors.New("invalid `tidb.pd-addr` setting") 784 } 785 return nil 786 } 787 788 func (cfg *Config) CheckAndAdjustFilePath() error { 789 var u *url.URL 790 791 // An absolute Windows path like "C:\Users\XYZ" would be interpreted as 792 // an URL with scheme "C" and opaque data "\Users\XYZ". 793 // Therefore, we only perform URL parsing if we are sure the path is not 794 // an absolute Windows path. 795 // Here we use the `filepath.VolumeName` which can identify the "C:" part 796 // out of the path. On Linux this method always return an empty string. 797 // On Windows, the drive letter can only be single letters from "A:" to "Z:", 798 // so this won't mistake "S3:" as a Windows path. 799 if len(filepath.VolumeName(cfg.Mydumper.SourceDir)) == 0 { 800 var err error 801 u, err = url.Parse(cfg.Mydumper.SourceDir) 802 if err != nil { 803 return errors.Trace(err) 804 } 805 } else { 806 u = &url.URL{} 807 } 808 809 // convert path and relative path to a valid file url 810 if u.Scheme == "" { 811 if !common.IsDirExists(cfg.Mydumper.SourceDir) { 812 return errors.Errorf("%s: mydumper dir does not exist", cfg.Mydumper.SourceDir) 813 } 814 absPath, err := filepath.Abs(cfg.Mydumper.SourceDir) 815 if err != nil { 816 return errors.Annotatef(err, "covert data-source-dir '%s' to absolute path failed", cfg.Mydumper.SourceDir) 817 } 818 cfg.Mydumper.SourceDir = "file://" + filepath.ToSlash(absPath) 819 u.Path = absPath 820 u.Scheme = "file" 821 } 822 823 found := false 824 for _, t := range supportedStorageTypes { 825 if u.Scheme == t { 826 found = true 827 break 828 } 829 } 830 if !found { 831 return errors.Errorf("Unsupported data-source-dir url '%s'", cfg.Mydumper.SourceDir) 832 } 833 return nil 834 } 835 836 func (cfg *Config) AdjustCheckPoint() { 837 if len(cfg.Checkpoint.Schema) == 0 { 838 cfg.Checkpoint.Schema = "tidb_lightning_checkpoint" 839 } 840 if len(cfg.Checkpoint.Driver) == 0 { 841 cfg.Checkpoint.Driver = CheckpointDriverFile 842 } 843 if len(cfg.Checkpoint.DSN) == 0 { 844 switch cfg.Checkpoint.Driver { 845 case CheckpointDriverMySQL: 846 param := common.MySQLConnectParam{ 847 Host: cfg.TiDB.Host, 848 Port: cfg.TiDB.Port, 849 User: cfg.TiDB.User, 850 Password: cfg.TiDB.Psw, 851 SQLMode: mysql.DefaultSQLMode, 852 MaxAllowedPacket: defaultMaxAllowedPacket, 853 TLS: cfg.TiDB.TLS, 854 } 855 cfg.Checkpoint.DSN = param.ToDSN() 856 case CheckpointDriverFile: 857 cfg.Checkpoint.DSN = "/tmp/" + cfg.Checkpoint.Schema + ".pb" 858 } 859 } 860 } 861 862 func (cfg *Config) AdjustMydumper() { 863 if cfg.Mydumper.BatchImportRatio < 0.0 || cfg.Mydumper.BatchImportRatio >= 1.0 { 864 cfg.Mydumper.BatchImportRatio = 0.75 865 } 866 if cfg.Mydumper.ReadBlockSize <= 0 { 867 cfg.Mydumper.ReadBlockSize = ReadBlockSize 868 } 869 if len(cfg.Mydumper.CharacterSet) == 0 { 870 cfg.Mydumper.CharacterSet = "auto" 871 } 872 873 if len(cfg.Mydumper.IgnoreColumns) != 0 { 874 // Tolower columns cause we use Name.L to compare column in tidb. 875 for _, ig := range cfg.Mydumper.IgnoreColumns { 876 cols := make([]string, len(ig.Columns)) 877 for i, col := range ig.Columns { 878 cols[i] = strings.ToLower(col) 879 } 880 ig.Columns = cols 881 } 882 } 883 } 884 885 func (cfg *Config) CheckAndAdjustSecurity() error { 886 if cfg.TiDB.Security == nil { 887 cfg.TiDB.Security = &cfg.Security 888 } 889 890 switch cfg.TiDB.TLS { 891 case "": 892 if len(cfg.TiDB.Security.CAPath) > 0 { 893 cfg.TiDB.TLS = "cluster" 894 } else { 895 cfg.TiDB.TLS = "false" 896 } 897 case "cluster": 898 if len(cfg.Security.CAPath) == 0 { 899 return errors.New("invalid config: cannot set `tidb.tls` to 'cluster' without a [security] section") 900 } 901 case "false", "skip-verify", "preferred": 902 break 903 default: 904 return errors.Errorf("invalid config: unsupported `tidb.tls` config %s", cfg.TiDB.TLS) 905 } 906 return nil 907 } 908 909 // HasLegacyBlackWhiteList checks whether the deprecated [black-white-list] section 910 // was defined. 911 func (cfg *Config) HasLegacyBlackWhiteList() bool { 912 return len(cfg.BWList.DoTables) != 0 || len(cfg.BWList.DoDBs) != 0 || len(cfg.BWList.IgnoreTables) != 0 || len(cfg.BWList.IgnoreDBs) != 0 913 }