github.com/pingcap/tidb-lightning@v5.0.0-rc.0.20210428090220-84b649866577+incompatible/lightning/config/config.go (about) 1 // Copyright 2019 PingCAP, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package config 15 16 import ( 17 "context" 18 "encoding/json" 19 "fmt" 20 "net" 21 "net/url" 22 "os" 23 "path/filepath" 24 "runtime" 25 "strconv" 26 "strings" 27 "time" 28 29 "github.com/BurntSushi/toml" 30 "github.com/docker/go-units" 31 gomysql "github.com/go-sql-driver/mysql" 32 "github.com/pingcap/errors" 33 "github.com/pingcap/parser/mysql" 34 filter "github.com/pingcap/tidb-tools/pkg/table-filter" 35 router "github.com/pingcap/tidb-tools/pkg/table-router" 36 tidbcfg "github.com/pingcap/tidb/config" 37 "go.uber.org/zap" 38 39 "github.com/pingcap/tidb-lightning/lightning/common" 40 "github.com/pingcap/tidb-lightning/lightning/log" 41 ) 42 43 const ( 44 // ImportMode defines mode of import for tikv. 45 ImportMode = "import" 46 // NormalMode defines mode of normal for tikv. 47 NormalMode = "normal" 48 49 // BackendTiDB is a constant for choosing the "TiDB" backend in the configuration. 50 BackendTiDB = "tidb" 51 // BackendImporter is a constant for choosing the "Importer" backend in the configuration. 52 BackendImporter = "importer" 53 // BackendLocal is a constant for choosing the "Local" backup in the configuration. 54 // In this mode, we write & sort kv pairs with local storage and directly write them to tikv. 55 BackendLocal = "local" 56 57 // CheckpointDriverMySQL is a constant for choosing the "MySQL" checkpoint driver in the configuration. 58 CheckpointDriverMySQL = "mysql" 59 // CheckpointDriverFile is a constant for choosing the "File" checkpoint driver in the configuration. 60 CheckpointDriverFile = "file" 61 62 // ReplaceOnDup indicates using REPLACE INTO to insert data 63 ReplaceOnDup = "replace" 64 // IgnoreOnDup indicates using INSERT IGNORE INTO to insert data 65 IgnoreOnDup = "ignore" 66 // ErrorOnDup indicates using INSERT INTO to insert data, which would violate PK or UNIQUE constraint 67 ErrorOnDup = "error" 68 69 defaultDistSQLScanConcurrency = 15 70 defaultBuildStatsConcurrency = 20 71 defaultIndexSerialScanConcurrency = 20 72 defaultChecksumTableConcurrency = 2 73 ) 74 75 const ( 76 LocalMemoryTableSize = 512 * units.MiB 77 78 // autoDiskQuotaLocalReservedSize is the estimated size a local-backend 79 // engine may gain after calling Flush(). This is currently defined by its 80 // max MemTable size (512 MiB). It is used to compensate for the soft limit 81 // of the disk quota against the hard limit of the disk free space. 82 // 83 // With a maximum of 8 engines, this should contribute 4.0 GiB to the 84 // reserved size. 85 autoDiskQuotaLocalReservedSize uint64 = LocalMemoryTableSize 86 87 // autoDiskQuotaLocalReservedSpeed is the estimated size increase per 88 // millisecond per write thread the local backend may gain on all engines. 89 // This is used to compute the maximum size overshoot between two disk quota 90 // checks, if the first one has barely passed. 91 // 92 // With cron.check-disk-quota = 1m, region-concurrency = 40, this should 93 // contribute 2.3 GiB to the reserved size. 94 autoDiskQuotaLocalReservedSpeed uint64 = 1 * units.KiB 95 ) 96 97 var ( 98 defaultConfigPaths = []string{"tidb-lightning.toml", "conf/tidb-lightning.toml"} 99 supportedStorageTypes = []string{"file", "local", "s3", "noop"} 100 101 DefaultFilter = []string{ 102 "*.*", 103 "!mysql.*", 104 "!sys.*", 105 "!INFORMATION_SCHEMA.*", 106 "!PERFORMANCE_SCHEMA.*", 107 "!METRICS_SCHEMA.*", 108 "!INSPECTION_SCHEMA.*", 109 } 110 ) 111 112 type DBStore struct { 113 Host string `toml:"host" json:"host"` 114 Port int `toml:"port" json:"port"` 115 User string `toml:"user" json:"user"` 116 Psw string `toml:"password" json:"-"` 117 StatusPort int `toml:"status-port" json:"status-port"` 118 PdAddr string `toml:"pd-addr" json:"pd-addr"` 119 StrSQLMode string `toml:"sql-mode" json:"sql-mode"` 120 TLS string `toml:"tls" json:"tls"` 121 Security *Security `toml:"security" json:"security"` 122 123 SQLMode mysql.SQLMode `toml:"-" json:"-"` 124 MaxAllowedPacket uint64 `toml:"max-allowed-packet" json:"max-allowed-packet"` 125 126 DistSQLScanConcurrency int `toml:"distsql-scan-concurrency" json:"distsql-scan-concurrency"` 127 BuildStatsConcurrency int `toml:"build-stats-concurrency" json:"build-stats-concurrency"` 128 IndexSerialScanConcurrency int `toml:"index-serial-scan-concurrency" json:"index-serial-scan-concurrency"` 129 ChecksumTableConcurrency int `toml:"checksum-table-concurrency" json:"checksum-table-concurrency"` 130 } 131 132 type Config struct { 133 TaskID int64 `toml:"-" json:"id"` 134 135 App Lightning `toml:"lightning" json:"lightning"` 136 TiDB DBStore `toml:"tidb" json:"tidb"` 137 138 Checkpoint Checkpoint `toml:"checkpoint" json:"checkpoint"` 139 Mydumper MydumperRuntime `toml:"mydumper" json:"mydumper"` 140 TikvImporter TikvImporter `toml:"tikv-importer" json:"tikv-importer"` 141 PostRestore PostRestore `toml:"post-restore" json:"post-restore"` 142 Cron Cron `toml:"cron" json:"cron"` 143 Routes []*router.TableRule `toml:"routes" json:"routes"` 144 Security Security `toml:"security" json:"security"` 145 146 BWList filter.MySQLReplicationRules `toml:"black-white-list" json:"black-white-list"` 147 } 148 149 func (c *Config) String() string { 150 bytes, err := json.Marshal(c) 151 if err != nil { 152 log.L().Error("marshal config to json error", log.ShortError(err)) 153 } 154 return string(bytes) 155 } 156 157 func (c *Config) ToTLS() (*common.TLS, error) { 158 hostPort := net.JoinHostPort(c.TiDB.Host, strconv.Itoa(c.TiDB.StatusPort)) 159 return common.NewTLS(c.Security.CAPath, c.Security.CertPath, c.Security.KeyPath, hostPort) 160 } 161 162 type Lightning struct { 163 TableConcurrency int `toml:"table-concurrency" json:"table-concurrency"` 164 IndexConcurrency int `toml:"index-concurrency" json:"index-concurrency"` 165 RegionConcurrency int `toml:"region-concurrency" json:"region-concurrency"` 166 IOConcurrency int `toml:"io-concurrency" json:"io-concurrency"` 167 CheckRequirements bool `toml:"check-requirements" json:"check-requirements"` 168 } 169 170 type PostOpLevel int 171 172 const ( 173 OpLevelOff PostOpLevel = iota 174 OpLevelOptional 175 OpLevelRequired 176 ) 177 178 func (t *PostOpLevel) UnmarshalTOML(v interface{}) error { 179 switch val := v.(type) { 180 case bool: 181 if val { 182 *t = OpLevelRequired 183 } else { 184 *t = OpLevelOff 185 } 186 case string: 187 return t.FromStringValue(val) 188 default: 189 return errors.Errorf("invalid op level '%v', please choose valid option between ['off', 'optional', 'required']", v) 190 } 191 return nil 192 } 193 194 func (t PostOpLevel) MarshalText() ([]byte, error) { 195 return []byte(t.String()), nil 196 } 197 198 // parser command line parameter 199 func (t *PostOpLevel) FromStringValue(s string) error { 200 switch strings.ToLower(s) { 201 case "off", "false": 202 *t = OpLevelOff 203 case "required", "true": 204 *t = OpLevelRequired 205 case "optional": 206 *t = OpLevelOptional 207 default: 208 return errors.Errorf("invalid op level '%s', please choose valid option between ['off', 'optional', 'required']", s) 209 } 210 return nil 211 } 212 213 func (t *PostOpLevel) MarshalJSON() ([]byte, error) { 214 return []byte(`"` + t.String() + `"`), nil 215 } 216 217 func (t *PostOpLevel) UnmarshalJSON(data []byte) error { 218 return t.FromStringValue(strings.Trim(string(data), `"`)) 219 } 220 221 func (t PostOpLevel) String() string { 222 switch t { 223 case OpLevelOff: 224 return "off" 225 case OpLevelOptional: 226 return "optional" 227 case OpLevelRequired: 228 return "required" 229 default: 230 panic(fmt.Sprintf("invalid post process type '%d'", t)) 231 } 232 } 233 234 // PostRestore has some options which will be executed after kv restored. 235 type PostRestore struct { 236 Level1Compact bool `toml:"level-1-compact" json:"level-1-compact"` 237 Compact bool `toml:"compact" json:"compact"` 238 Checksum PostOpLevel `toml:"checksum" json:"checksum"` 239 Analyze PostOpLevel `toml:"analyze" json:"analyze"` 240 PostProcessAtLast bool `toml:"post-process-at-last" json:"post-process-at-last"` 241 } 242 243 type CSVConfig struct { 244 Separator string `toml:"separator" json:"separator"` 245 Delimiter string `toml:"delimiter" json:"delimiter"` 246 Header bool `toml:"header" json:"header"` 247 TrimLastSep bool `toml:"trim-last-separator" json:"trim-last-separator"` 248 NotNull bool `toml:"not-null" json:"not-null"` 249 Null string `toml:"null" json:"null"` 250 BackslashEscape bool `toml:"backslash-escape" json:"backslash-escape"` 251 } 252 253 type MydumperRuntime struct { 254 ReadBlockSize ByteSize `toml:"read-block-size" json:"read-block-size"` 255 BatchSize ByteSize `toml:"batch-size" json:"batch-size"` 256 BatchImportRatio float64 `toml:"batch-import-ratio" json:"batch-import-ratio"` 257 SourceDir string `toml:"data-source-dir" json:"data-source-dir"` 258 NoSchema bool `toml:"no-schema" json:"no-schema"` 259 CharacterSet string `toml:"character-set" json:"character-set"` 260 CSV CSVConfig `toml:"csv" json:"csv"` 261 CaseSensitive bool `toml:"case-sensitive" json:"case-sensitive"` 262 StrictFormat bool `toml:"strict-format" json:"strict-format"` 263 MaxRegionSize ByteSize `toml:"max-region-size" json:"max-region-size"` 264 Filter []string `toml:"filter" json:"filter"` 265 FileRouters []*FileRouteRule `toml:"files" json:"files"` 266 DefaultFileRules bool `toml:"default-file-rules" json:"default-file-rules"` 267 } 268 269 type FileRouteRule struct { 270 Pattern string `json:"pattern" toml:"pattern" yaml:"pattern"` 271 Path string `json:"path" toml:"path" yaml:"path"` 272 Schema string `json:"schema" toml:"schema" yaml:"schema"` 273 Table string `json:"table" toml:"table" yaml:"table"` 274 Type string `json:"type" toml:"type" yaml:"type"` 275 Key string `json:"key" toml:"key" yaml:"key"` 276 Compression string `json:"compression" toml:"compression" yaml:"compression"` 277 } 278 279 type TikvImporter struct { 280 Addr string `toml:"addr" json:"addr"` 281 Backend string `toml:"backend" json:"backend"` 282 OnDuplicate string `toml:"on-duplicate" json:"on-duplicate"` 283 MaxKVPairs int `toml:"max-kv-pairs" json:"max-kv-pairs"` 284 SendKVPairs int `toml:"send-kv-pairs" json:"send-kv-pairs"` 285 RegionSplitSize ByteSize `toml:"region-split-size" json:"region-split-size"` 286 SortedKVDir string `toml:"sorted-kv-dir" json:"sorted-kv-dir"` 287 DiskQuota ByteSize `toml:"disk-quota" json:"disk-quota"` 288 RangeConcurrency int `toml:"range-concurrency" json:"range-concurrency"` 289 } 290 291 type Checkpoint struct { 292 Enable bool `toml:"enable" json:"enable"` 293 Schema string `toml:"schema" json:"schema"` 294 DSN string `toml:"dsn" json:"-"` // DSN may contain password, don't expose this to JSON. 295 Driver string `toml:"driver" json:"driver"` 296 KeepAfterSuccess bool `toml:"keep-after-success" json:"keep-after-success"` 297 } 298 299 type Cron struct { 300 SwitchMode Duration `toml:"switch-mode" json:"switch-mode"` 301 LogProgress Duration `toml:"log-progress" json:"log-progress"` 302 CheckDiskQuota Duration `toml:"check-disk-quota" json:"check-disk-quota"` 303 } 304 305 type Security struct { 306 CAPath string `toml:"ca-path" json:"ca-path"` 307 CertPath string `toml:"cert-path" json:"cert-path"` 308 KeyPath string `toml:"key-path" json:"key-path"` 309 // RedactInfoLog indicates that whether enabling redact log 310 RedactInfoLog bool `toml:"redact-info-log" json:"redact-info-log"` 311 } 312 313 // RegistersMySQL registers (or deregisters) the TLS config with name "cluster" 314 // for use in `sql.Open()`. This method is goroutine-safe. 315 func (sec *Security) RegisterMySQL() error { 316 if sec == nil { 317 return nil 318 } 319 tlsConfig, err := common.ToTLSConfig(sec.CAPath, sec.CertPath, sec.KeyPath) 320 switch { 321 case err != nil: 322 return err 323 case tlsConfig != nil: 324 // error happens only when the key coincides with the built-in names. 325 _ = gomysql.RegisterTLSConfig("cluster", tlsConfig) 326 default: 327 gomysql.DeregisterTLSConfig("cluster") 328 } 329 return nil 330 } 331 332 // A duration which can be deserialized from a TOML string. 333 // Implemented as https://github.com/BurntSushi/toml#using-the-encodingtextunmarshaler-interface 334 type Duration struct { 335 time.Duration 336 } 337 338 func (d *Duration) UnmarshalText(text []byte) error { 339 var err error 340 d.Duration, err = time.ParseDuration(string(text)) 341 return err 342 } 343 344 func (d Duration) MarshalText() ([]byte, error) { 345 return []byte(d.String()), nil 346 } 347 348 func (d *Duration) MarshalJSON() ([]byte, error) { 349 return []byte(fmt.Sprintf(`"%s"`, d.Duration)), nil 350 } 351 352 func NewConfig() *Config { 353 return &Config{ 354 App: Lightning{ 355 RegionConcurrency: runtime.NumCPU(), 356 TableConcurrency: 0, 357 IndexConcurrency: 0, 358 IOConcurrency: 5, 359 CheckRequirements: true, 360 }, 361 Checkpoint: Checkpoint{ 362 Enable: true, 363 }, 364 TiDB: DBStore{ 365 Host: "127.0.0.1", 366 User: "root", 367 StatusPort: 10080, 368 StrSQLMode: "ONLY_FULL_GROUP_BY,NO_AUTO_CREATE_USER", 369 MaxAllowedPacket: defaultMaxAllowedPacket, 370 BuildStatsConcurrency: defaultBuildStatsConcurrency, 371 DistSQLScanConcurrency: defaultDistSQLScanConcurrency, 372 IndexSerialScanConcurrency: defaultIndexSerialScanConcurrency, 373 ChecksumTableConcurrency: defaultChecksumTableConcurrency, 374 }, 375 Cron: Cron{ 376 SwitchMode: Duration{Duration: 5 * time.Minute}, 377 LogProgress: Duration{Duration: 5 * time.Minute}, 378 CheckDiskQuota: Duration{Duration: 1 * time.Minute}, 379 }, 380 Mydumper: MydumperRuntime{ 381 ReadBlockSize: ReadBlockSize, 382 CSV: CSVConfig{ 383 Separator: ",", 384 Delimiter: `"`, 385 Header: true, 386 NotNull: false, 387 Null: `\N`, 388 BackslashEscape: true, 389 TrimLastSep: false, 390 }, 391 StrictFormat: false, 392 MaxRegionSize: MaxRegionSize, 393 Filter: DefaultFilter, 394 }, 395 TikvImporter: TikvImporter{ 396 Backend: BackendImporter, 397 OnDuplicate: ReplaceOnDup, 398 MaxKVPairs: 4096, 399 SendKVPairs: 32768, 400 RegionSplitSize: SplitRegionSize, 401 }, 402 PostRestore: PostRestore{ 403 Checksum: OpLevelRequired, 404 Analyze: OpLevelOptional, 405 PostProcessAtLast: true, 406 }, 407 } 408 } 409 410 // LoadFromGlobal resets the current configuration to the global settings. 411 func (cfg *Config) LoadFromGlobal(global *GlobalConfig) error { 412 if err := cfg.LoadFromTOML(global.ConfigFileContent); err != nil { 413 return err 414 } 415 416 cfg.TiDB.Host = global.TiDB.Host 417 cfg.TiDB.Port = global.TiDB.Port 418 cfg.TiDB.User = global.TiDB.User 419 cfg.TiDB.Psw = global.TiDB.Psw 420 cfg.TiDB.StatusPort = global.TiDB.StatusPort 421 cfg.TiDB.PdAddr = global.TiDB.PdAddr 422 cfg.Mydumper.SourceDir = global.Mydumper.SourceDir 423 cfg.Mydumper.NoSchema = global.Mydumper.NoSchema 424 cfg.Mydumper.Filter = global.Mydumper.Filter 425 cfg.TikvImporter.Addr = global.TikvImporter.Addr 426 cfg.TikvImporter.Backend = global.TikvImporter.Backend 427 cfg.TikvImporter.SortedKVDir = global.TikvImporter.SortedKVDir 428 cfg.Checkpoint.Enable = global.Checkpoint.Enable 429 cfg.PostRestore.Checksum = global.PostRestore.Checksum 430 cfg.PostRestore.Analyze = global.PostRestore.Analyze 431 cfg.App.CheckRequirements = global.App.CheckRequirements 432 cfg.Security = global.Security 433 434 return nil 435 } 436 437 // LoadFromTOML overwrites the current configuration by the TOML data 438 // If data contains toml items not in Config and GlobalConfig, return an error 439 // If data contains toml items not in Config, thus won't take effect, warn user 440 func (cfg *Config) LoadFromTOML(data []byte) error { 441 // bothUnused saves toml items not belong to Config nor GlobalConfig 442 var bothUnused []string 443 // warnItems saves legal toml items but won't effect 444 var warnItems []string 445 446 dataStr := string(data) 447 448 // Here we load toml into cfg, and rest logic is check unused keys 449 metaData, err := toml.Decode(dataStr, cfg) 450 451 if err != nil { 452 return errors.Trace(err) 453 } 454 455 unusedConfigKeys := metaData.Undecoded() 456 if len(unusedConfigKeys) == 0 { 457 return nil 458 } 459 460 // Now we deal with potential both-unused keys of Config and GlobalConfig struct 461 462 metaDataGlobal, err := toml.Decode(dataStr, &GlobalConfig{}) 463 if err != nil { 464 return errors.Trace(err) 465 } 466 467 // Key type returned by metadata.Undecoded doesn't have a equality comparison, 468 // we convert them to string type instead, and this conversion is identical 469 unusedGlobalKeys := metaDataGlobal.Undecoded() 470 unusedGlobalKeyStrs := make(map[string]struct{}) 471 for _, key := range unusedGlobalKeys { 472 unusedGlobalKeyStrs[key.String()] = struct{}{} 473 } 474 475 for _, key := range unusedConfigKeys { 476 keyStr := key.String() 477 if _, found := unusedGlobalKeyStrs[keyStr]; found { 478 bothUnused = append(bothUnused, keyStr) 479 } else { 480 warnItems = append(warnItems, keyStr) 481 } 482 } 483 484 if len(bothUnused) > 0 { 485 return errors.Errorf("config file contained unknown configuration options: %s", 486 strings.Join(bothUnused, ", ")) 487 } 488 489 // Warn that some legal field of config file won't be overwritten, such as lightning.file 490 if len(warnItems) > 0 { 491 log.L().Warn("currently only per-task configuration can be applied, global configuration changes can only be made on startup", 492 zap.Strings("global config changes", warnItems)) 493 } 494 495 return nil 496 } 497 498 // Adjust fixes the invalid or unspecified settings to reasonable valid values. 499 func (cfg *Config) Adjust(ctx context.Context) error { 500 // Reject problematic CSV configurations. 501 csv := &cfg.Mydumper.CSV 502 if len(csv.Separator) == 0 { 503 return errors.New("invalid config: `mydumper.csv.separator` must not be empty") 504 } 505 506 if len(csv.Delimiter) > 0 && (strings.HasPrefix(csv.Separator, csv.Delimiter) || strings.HasPrefix(csv.Delimiter, csv.Separator)) { 507 return errors.New("invalid config: `mydumper.csv.separator` and `mydumper.csv.delimiter` must not be prefix of each other") 508 } 509 510 if csv.BackslashEscape { 511 if csv.Separator == `\` { 512 return errors.New("invalid config: cannot use '\\' as CSV separator when `mydumper.csv.backslash-escape` is true") 513 } 514 if csv.Delimiter == `\` { 515 return errors.New("invalid config: cannot use '\\' as CSV delimiter when `mydumper.csv.backslash-escape` is true") 516 } 517 } 518 519 // adjust file routing 520 for _, rule := range cfg.Mydumper.FileRouters { 521 if filepath.IsAbs(rule.Path) { 522 relPath, err := filepath.Rel(cfg.Mydumper.SourceDir, rule.Path) 523 if err != nil { 524 return errors.Trace(err) 525 } 526 // ".." means that this path is not in source dir, so we should return an error 527 if strings.HasPrefix(relPath, "..") { 528 return errors.Errorf("file route path '%s' is not in source dir '%s'", rule.Path, cfg.Mydumper.SourceDir) 529 } 530 rule.Path = relPath 531 } 532 } 533 534 // enable default file route rule if no rules are set 535 if len(cfg.Mydumper.FileRouters) == 0 { 536 cfg.Mydumper.DefaultFileRules = true 537 } 538 539 cfg.TikvImporter.Backend = strings.ToLower(cfg.TikvImporter.Backend) 540 mustHaveInternalConnections := true 541 switch cfg.TikvImporter.Backend { 542 case BackendTiDB: 543 if cfg.App.IndexConcurrency == 0 { 544 cfg.App.IndexConcurrency = cfg.App.RegionConcurrency 545 } 546 if cfg.App.TableConcurrency == 0 { 547 cfg.App.TableConcurrency = cfg.App.RegionConcurrency 548 } 549 mustHaveInternalConnections = false 550 case BackendImporter, BackendLocal: 551 if cfg.App.IndexConcurrency == 0 { 552 cfg.App.IndexConcurrency = 2 553 } 554 if cfg.App.TableConcurrency == 0 { 555 cfg.App.TableConcurrency = 6 556 } 557 if cfg.TikvImporter.RangeConcurrency == 0 { 558 cfg.TikvImporter.RangeConcurrency = 16 559 } 560 if cfg.TikvImporter.RegionSplitSize == 0 { 561 cfg.TikvImporter.RegionSplitSize = SplitRegionSize 562 } 563 if cfg.TiDB.DistSQLScanConcurrency == 0 { 564 cfg.TiDB.DistSQLScanConcurrency = defaultDistSQLScanConcurrency 565 } 566 if cfg.TiDB.BuildStatsConcurrency == 0 { 567 cfg.TiDB.BuildStatsConcurrency = defaultBuildStatsConcurrency 568 } 569 if cfg.TiDB.IndexSerialScanConcurrency == 0 { 570 cfg.TiDB.IndexSerialScanConcurrency = defaultIndexSerialScanConcurrency 571 } 572 if cfg.TiDB.ChecksumTableConcurrency == 0 { 573 cfg.TiDB.ChecksumTableConcurrency = defaultChecksumTableConcurrency 574 } 575 default: 576 return errors.Errorf("invalid config: unsupported `tikv-importer.backend` (%s)", cfg.TikvImporter.Backend) 577 } 578 579 if cfg.TikvImporter.Backend == BackendLocal { 580 if len(cfg.TikvImporter.SortedKVDir) == 0 { 581 return errors.Errorf("tikv-importer.sorted-kv-dir must not be empty!") 582 } 583 584 storageSizeDir := filepath.Clean(cfg.TikvImporter.SortedKVDir) 585 sortedKVDirInfo, err := os.Stat(storageSizeDir) 586 switch { 587 case os.IsNotExist(err): 588 // the sorted-kv-dir does not exist, meaning we will create it automatically. 589 // so we extract the storage size from its parent directory. 590 storageSizeDir = filepath.Dir(storageSizeDir) 591 case err == nil: 592 if !sortedKVDirInfo.IsDir() { 593 return errors.Errorf("tikv-importer.sorted-kv-dir ('%s') is not a directory", storageSizeDir) 594 } 595 default: 596 return errors.Annotate(err, "invalid tikv-importer.sorted-kv-dir") 597 } 598 599 if cfg.TikvImporter.DiskQuota == 0 { 600 enginesCount := uint64(cfg.App.IndexConcurrency + cfg.App.TableConcurrency) 601 writeAmount := uint64(cfg.App.RegionConcurrency) * uint64(cfg.Cron.CheckDiskQuota.Milliseconds()) 602 reservedSize := enginesCount*autoDiskQuotaLocalReservedSize + writeAmount*autoDiskQuotaLocalReservedSpeed 603 604 storageSize, err := common.GetStorageSize(storageSizeDir) 605 if err != nil { 606 return err 607 } 608 if storageSize.Available <= reservedSize { 609 return errors.Errorf( 610 "insufficient disk free space on `%s` (only %s, expecting >%s), please use a storage with enough free space, or specify `tikv-importer.disk-quota`", 611 cfg.TikvImporter.SortedKVDir, 612 units.BytesSize(float64(storageSize.Available)), 613 units.BytesSize(float64(reservedSize))) 614 } 615 cfg.TikvImporter.DiskQuota = ByteSize(storageSize.Available - reservedSize) 616 } 617 } 618 619 if cfg.TikvImporter.Backend == BackendTiDB { 620 cfg.TikvImporter.OnDuplicate = strings.ToLower(cfg.TikvImporter.OnDuplicate) 621 switch cfg.TikvImporter.OnDuplicate { 622 case ReplaceOnDup, IgnoreOnDup, ErrorOnDup: 623 default: 624 return errors.Errorf("invalid config: unsupported `tikv-importer.on-duplicate` (%s)", cfg.TikvImporter.OnDuplicate) 625 } 626 } 627 628 var err error 629 cfg.TiDB.SQLMode, err = mysql.GetSQLMode(cfg.TiDB.StrSQLMode) 630 if err != nil { 631 return errors.Annotate(err, "invalid config: `mydumper.tidb.sql_mode` must be a valid SQL_MODE") 632 } 633 634 if cfg.TiDB.Security == nil { 635 cfg.TiDB.Security = &cfg.Security 636 } 637 638 switch cfg.TiDB.TLS { 639 case "": 640 if len(cfg.TiDB.Security.CAPath) > 0 { 641 cfg.TiDB.TLS = "cluster" 642 } else { 643 cfg.TiDB.TLS = "false" 644 } 645 case "cluster": 646 if len(cfg.Security.CAPath) == 0 { 647 return errors.New("invalid config: cannot set `tidb.tls` to 'cluster' without a [security] section") 648 } 649 case "false", "skip-verify", "preferred": 650 break 651 default: 652 return errors.Errorf("invalid config: unsupported `tidb.tls` config %s", cfg.TiDB.TLS) 653 } 654 655 // mydumper.filter and black-white-list cannot co-exist. 656 if cfg.HasLegacyBlackWhiteList() { 657 log.L().Warn("the config `black-white-list` has been deprecated, please replace with `mydumper.filter`") 658 if !common.StringSliceEqual(cfg.Mydumper.Filter, DefaultFilter) { 659 return errors.New("invalid config: `mydumper.filter` and `black-white-list` cannot be simultaneously defined") 660 } 661 } 662 663 for _, rule := range cfg.Routes { 664 if !cfg.Mydumper.CaseSensitive { 665 rule.ToLower() 666 } 667 if err := rule.Valid(); err != nil { 668 return errors.Trace(err) 669 } 670 } 671 672 // automatically determine the TiDB port & PD address from TiDB settings 673 if mustHaveInternalConnections && (cfg.TiDB.Port <= 0 || len(cfg.TiDB.PdAddr) == 0) { 674 tls, err := cfg.ToTLS() 675 if err != nil { 676 return err 677 } 678 679 var settings tidbcfg.Config 680 err = tls.GetJSON(ctx, "/settings", &settings) 681 if err != nil { 682 return errors.Annotate(err, "cannot fetch settings from TiDB, please manually fill in `tidb.port` and `tidb.pd-addr`") 683 } 684 if cfg.TiDB.Port <= 0 { 685 cfg.TiDB.Port = int(settings.Port) 686 } 687 if len(cfg.TiDB.PdAddr) == 0 { 688 pdAddrs := strings.Split(settings.Path, ",") 689 cfg.TiDB.PdAddr = pdAddrs[0] // FIXME support multiple PDs once importer can. 690 } 691 } 692 693 if cfg.TiDB.Port <= 0 { 694 return errors.New("invalid `tidb.port` setting") 695 } 696 if mustHaveInternalConnections && len(cfg.TiDB.PdAddr) == 0 { 697 return errors.New("invalid `tidb.pd-addr` setting") 698 } 699 700 // handle mydumper 701 if cfg.Mydumper.BatchSize <= 0 { 702 // if rows in source files are not sorted by primary key(if primary is number or cluster index enabled), 703 // the key range in each data engine may have overlap, thus a bigger engine size can somewhat alleviate it. 704 cfg.Mydumper.BatchSize = defaultBatchSize 705 706 } 707 if cfg.Mydumper.BatchImportRatio < 0.0 || cfg.Mydumper.BatchImportRatio >= 1.0 { 708 cfg.Mydumper.BatchImportRatio = 0.75 709 } 710 if cfg.Mydumper.ReadBlockSize <= 0 { 711 cfg.Mydumper.ReadBlockSize = ReadBlockSize 712 } 713 if len(cfg.Mydumper.CharacterSet) == 0 { 714 cfg.Mydumper.CharacterSet = "auto" 715 } 716 717 if len(cfg.Checkpoint.Schema) == 0 { 718 cfg.Checkpoint.Schema = "tidb_lightning_checkpoint" 719 } 720 if len(cfg.Checkpoint.Driver) == 0 { 721 cfg.Checkpoint.Driver = CheckpointDriverFile 722 } 723 if len(cfg.Checkpoint.DSN) == 0 { 724 switch cfg.Checkpoint.Driver { 725 case CheckpointDriverMySQL: 726 param := common.MySQLConnectParam{ 727 Host: cfg.TiDB.Host, 728 Port: cfg.TiDB.Port, 729 User: cfg.TiDB.User, 730 Password: cfg.TiDB.Psw, 731 SQLMode: mysql.DefaultSQLMode, 732 MaxAllowedPacket: defaultMaxAllowedPacket, 733 TLS: cfg.TiDB.TLS, 734 } 735 cfg.Checkpoint.DSN = param.ToDSN() 736 case CheckpointDriverFile: 737 cfg.Checkpoint.DSN = "/tmp/" + cfg.Checkpoint.Schema + ".pb" 738 } 739 } 740 741 var u *url.URL 742 743 // An absolute Windows path like "C:\Users\XYZ" would be interpreted as 744 // an URL with scheme "C" and opaque data "\Users\XYZ". 745 // Therefore, we only perform URL parsing if we are sure the path is not 746 // an absolute Windows path. 747 // Here we use the `filepath.VolumeName` which can identify the "C:" part 748 // out of the path. On Linux this method always return an empty string. 749 // On Windows, the drive letter can only be single letters from "A:" to "Z:", 750 // so this won't mistake "S3:" as a Windows path. 751 if len(filepath.VolumeName(cfg.Mydumper.SourceDir)) == 0 { 752 u, err = url.Parse(cfg.Mydumper.SourceDir) 753 if err != nil { 754 return errors.Trace(err) 755 } 756 } else { 757 u = &url.URL{} 758 } 759 760 // convert path and relative path to a valid file url 761 if u.Scheme == "" { 762 if !common.IsDirExists(cfg.Mydumper.SourceDir) { 763 return errors.Errorf("%s: mydumper dir does not exist", cfg.Mydumper.SourceDir) 764 } 765 absPath, err := filepath.Abs(cfg.Mydumper.SourceDir) 766 if err != nil { 767 return errors.Annotatef(err, "covert data-source-dir '%s' to absolute path failed", cfg.Mydumper.SourceDir) 768 } 769 cfg.Mydumper.SourceDir = "file://" + filepath.ToSlash(absPath) 770 u.Path = absPath 771 u.Scheme = "file" 772 } 773 774 found := false 775 for _, t := range supportedStorageTypes { 776 if u.Scheme == t { 777 found = true 778 break 779 } 780 } 781 if !found { 782 return errors.Errorf("Unsupported data-source-dir url '%s'", cfg.Mydumper.SourceDir) 783 } 784 785 return nil 786 } 787 788 // HasLegacyBlackWhiteList checks whether the deprecated [black-white-list] section 789 // was defined. 790 func (cfg *Config) HasLegacyBlackWhiteList() bool { 791 return len(cfg.BWList.DoTables) != 0 || len(cfg.BWList.DoDBs) != 0 || len(cfg.BWList.IgnoreTables) != 0 || len(cfg.BWList.IgnoreDBs) != 0 792 }