github.com/pingcap/br@v5.3.0-alpha.0.20220125034240-ec59c7b6ce30+incompatible/pkg/task/restore.go (about) 1 // Copyright 2020 PingCAP, Inc. Licensed under Apache-2.0. 2 3 package task 4 5 import ( 6 "context" 7 "time" 8 9 "github.com/pingcap/br/pkg/metautil" 10 11 "github.com/pingcap/br/pkg/version" 12 13 "github.com/opentracing/opentracing-go" 14 "github.com/pingcap/errors" 15 "github.com/pingcap/failpoint" 16 backuppb "github.com/pingcap/kvproto/pkg/backup" 17 "github.com/pingcap/log" 18 "github.com/pingcap/tidb/config" 19 "github.com/spf13/pflag" 20 "go.uber.org/multierr" 21 "go.uber.org/zap" 22 23 "github.com/pingcap/br/pkg/conn" 24 berrors "github.com/pingcap/br/pkg/errors" 25 "github.com/pingcap/br/pkg/glue" 26 "github.com/pingcap/br/pkg/pdutil" 27 "github.com/pingcap/br/pkg/restore" 28 "github.com/pingcap/br/pkg/storage" 29 "github.com/pingcap/br/pkg/summary" 30 "github.com/pingcap/br/pkg/utils" 31 ) 32 33 const ( 34 flagOnline = "online" 35 flagNoSchema = "no-schema" 36 37 // FlagMergeRegionSizeBytes is the flag name of merge small regions by size 38 FlagMergeRegionSizeBytes = "merge-region-size-bytes" 39 // FlagMergeRegionKeyCount is the flag name of merge small regions by key count 40 FlagMergeRegionKeyCount = "merge-region-key-count" 41 42 defaultRestoreConcurrency = 128 43 maxRestoreBatchSizeLimit = 10240 44 defaultDDLConcurrency = 16 45 ) 46 47 // RestoreCommonConfig is the common configuration for all BR restore tasks. 48 type RestoreCommonConfig struct { 49 Online bool `json:"online" toml:"online"` 50 51 // MergeSmallRegionSizeBytes is the threshold of merging small regions (Default 96MB, region split size). 52 // MergeSmallRegionKeyCount is the threshold of merging smalle regions (Default 960_000, region split key count). 53 // See https://github.com/tikv/tikv/blob/v4.0.8/components/raftstore/src/coprocessor/config.rs#L35-L38 54 MergeSmallRegionSizeBytes uint64 `json:"merge-region-size-bytes" toml:"merge-region-size-bytes"` 55 MergeSmallRegionKeyCount uint64 `json:"merge-region-key-count" toml:"merge-region-key-count"` 56 } 57 58 // adjust adjusts the abnormal config value in the current config. 59 // useful when not starting BR from CLI (e.g. from BRIE in SQL). 60 func (cfg *RestoreCommonConfig) adjust() { 61 if cfg.MergeSmallRegionKeyCount == 0 { 62 cfg.MergeSmallRegionKeyCount = restore.DefaultMergeRegionKeyCount 63 } 64 if cfg.MergeSmallRegionSizeBytes == 0 { 65 cfg.MergeSmallRegionSizeBytes = restore.DefaultMergeRegionSizeBytes 66 } 67 } 68 69 // DefineRestoreCommonFlags defines common flags for the restore command. 70 func DefineRestoreCommonFlags(flags *pflag.FlagSet) { 71 // TODO remove experimental tag if it's stable 72 flags.Bool(flagOnline, false, "(experimental) Whether online when restore") 73 74 flags.Uint64(FlagMergeRegionSizeBytes, restore.DefaultMergeRegionSizeBytes, 75 "the threshold of merging small regions (Default 96MB, region split size)") 76 flags.Uint64(FlagMergeRegionKeyCount, restore.DefaultMergeRegionKeyCount, 77 "the threshold of merging smalle regions (Default 960_000, region split key count)") 78 _ = flags.MarkHidden(FlagMergeRegionSizeBytes) 79 _ = flags.MarkHidden(FlagMergeRegionKeyCount) 80 } 81 82 // ParseFromFlags parses the config from the flag set. 83 func (cfg *RestoreCommonConfig) ParseFromFlags(flags *pflag.FlagSet) error { 84 var err error 85 cfg.Online, err = flags.GetBool(flagOnline) 86 if err != nil { 87 return errors.Trace(err) 88 } 89 cfg.MergeSmallRegionKeyCount, err = flags.GetUint64(FlagMergeRegionKeyCount) 90 if err != nil { 91 return errors.Trace(err) 92 } 93 cfg.MergeSmallRegionSizeBytes, err = flags.GetUint64(FlagMergeRegionSizeBytes) 94 if err != nil { 95 return errors.Trace(err) 96 } 97 return errors.Trace(err) 98 } 99 100 // RestoreConfig is the configuration specific for restore tasks. 101 type RestoreConfig struct { 102 Config 103 RestoreCommonConfig 104 105 NoSchema bool `json:"no-schema" toml:"no-schema"` 106 } 107 108 // DefineRestoreFlags defines common flags for the restore tidb command. 109 func DefineRestoreFlags(flags *pflag.FlagSet) { 110 flags.Bool(flagNoSchema, false, "skip creating schemas and tables, reuse existing empty ones") 111 // Do not expose this flag 112 _ = flags.MarkHidden(flagNoSchema) 113 114 DefineRestoreCommonFlags(flags) 115 } 116 117 // ParseFromFlags parses the restore-related flags from the flag set. 118 func (cfg *RestoreConfig) ParseFromFlags(flags *pflag.FlagSet) error { 119 var err error 120 cfg.NoSchema, err = flags.GetBool(flagNoSchema) 121 if err != nil { 122 return errors.Trace(err) 123 } 124 err = cfg.Config.ParseFromFlags(flags) 125 if err != nil { 126 return errors.Trace(err) 127 } 128 err = cfg.RestoreCommonConfig.ParseFromFlags(flags) 129 if err != nil { 130 return errors.Trace(err) 131 } 132 133 if cfg.Config.Concurrency == 0 { 134 cfg.Config.Concurrency = defaultRestoreConcurrency 135 } 136 return nil 137 } 138 139 // adjustRestoreConfig is use for BR(binary) and BR in TiDB. 140 // When new config was add and not included in parser. 141 // we should set proper value in this function. 142 // so that both binary and TiDB will use same default value. 143 func (cfg *RestoreConfig) adjustRestoreConfig() { 144 cfg.Config.adjust() 145 cfg.RestoreCommonConfig.adjust() 146 147 if cfg.Config.Concurrency == 0 { 148 cfg.Config.Concurrency = defaultRestoreConcurrency 149 } 150 if cfg.Config.SwitchModeInterval == 0 { 151 cfg.Config.SwitchModeInterval = defaultSwitchInterval 152 } 153 } 154 155 // CheckRestoreDBAndTable is used to check whether the restore dbs or tables have been backup 156 func CheckRestoreDBAndTable(client *restore.Client, cfg *RestoreConfig) error { 157 if len(cfg.Schemas) == 0 && len(cfg.Tables) == 0 { 158 return nil 159 } 160 schemas := client.GetDatabases() 161 schemasMap := make(map[string]struct{}) 162 tablesMap := make(map[string]struct{}) 163 for _, db := range schemas { 164 dbName := db.Info.Name.O 165 if name, ok := utils.GetSysDBName(db.Info.Name); utils.IsSysDB(name) && ok { 166 dbName = name 167 } 168 schemasMap[utils.EncloseName(dbName)] = struct{}{} 169 for _, table := range db.Tables { 170 tablesMap[utils.EncloseDBAndTable(dbName, table.Info.Name.O)] = struct{}{} 171 } 172 } 173 restoreSchemas := cfg.Schemas 174 restoreTables := cfg.Tables 175 for schema := range restoreSchemas { 176 if _, ok := schemasMap[schema]; !ok { 177 return errors.Annotatef(berrors.ErrUndefinedRestoreDbOrTable, 178 "[database: %v] has not been backup, please ensure you has input a correct database name", schema) 179 } 180 } 181 for table := range restoreTables { 182 if _, ok := tablesMap[table]; !ok { 183 return errors.Annotatef(berrors.ErrUndefinedRestoreDbOrTable, 184 "[table: %v] has not been backup, please ensure you has input a correct table name", table) 185 } 186 } 187 return nil 188 } 189 190 // RunRestore starts a restore task inside the current goroutine. 191 func RunRestore(c context.Context, g glue.Glue, cmdName string, cfg *RestoreConfig) error { 192 cfg.adjustRestoreConfig() 193 194 defer summary.Summary(cmdName) 195 ctx, cancel := context.WithCancel(c) 196 defer cancel() 197 198 if span := opentracing.SpanFromContext(ctx); span != nil && span.Tracer() != nil { 199 span1 := span.Tracer().StartSpan("task.RunRestore", opentracing.ChildOf(span.Context())) 200 defer span1.Finish() 201 ctx = opentracing.ContextWithSpan(ctx, span1) 202 } 203 204 // Restore needs domain to do DDL. 205 needDomain := true 206 mgr, err := NewMgr(ctx, g, cfg.PD, cfg.TLS, GetKeepalive(&cfg.Config), cfg.CheckRequirements, needDomain) 207 if err != nil { 208 return errors.Trace(err) 209 } 210 defer mgr.Close() 211 212 keepaliveCfg := GetKeepalive(&cfg.Config) 213 keepaliveCfg.PermitWithoutStream = true 214 client, err := restore.NewRestoreClient(g, mgr.GetPDClient(), mgr.GetStorage(), mgr.GetTLSConfig(), keepaliveCfg) 215 if err != nil { 216 return errors.Trace(err) 217 } 218 defer client.Close() 219 220 u, err := storage.ParseBackend(cfg.Storage, &cfg.BackendOptions) 221 if err != nil { 222 return errors.Trace(err) 223 } 224 opts := storage.ExternalStorageOptions{ 225 NoCredentials: cfg.NoCreds, 226 SendCredentials: cfg.SendCreds, 227 SkipCheckPath: cfg.SkipCheckPath, 228 } 229 if err = client.SetStorage(ctx, u, &opts); err != nil { 230 return errors.Trace(err) 231 } 232 client.SetRateLimit(cfg.RateLimit) 233 client.SetConcurrency(uint(cfg.Concurrency)) 234 if cfg.Online { 235 client.EnableOnline() 236 } 237 if cfg.NoSchema { 238 client.EnableSkipCreateSQL() 239 } 240 client.SetSwitchModeInterval(cfg.SwitchModeInterval) 241 err = client.LoadRestoreStores(ctx) 242 if err != nil { 243 return errors.Trace(err) 244 } 245 246 u, s, backupMeta, err := ReadBackupMeta(ctx, metautil.MetaFile, &cfg.Config) 247 if err != nil { 248 return errors.Trace(err) 249 } 250 backupVersion := version.NormalizeBackupVersion(backupMeta.ClusterVersion) 251 if cfg.CheckRequirements && backupVersion != nil { 252 if versionErr := version.CheckClusterVersion(ctx, mgr.GetPDClient(), version.CheckVersionForBackup(backupVersion)); versionErr != nil { 253 return errors.Trace(versionErr) 254 } 255 } 256 reader := metautil.NewMetaReader(backupMeta, s) 257 if err = client.InitBackupMeta(c, backupMeta, u, s, reader); err != nil { 258 return errors.Trace(err) 259 } 260 261 if client.IsRawKvMode() { 262 return errors.Annotate(berrors.ErrRestoreModeMismatch, "cannot do transactional restore from raw kv data") 263 } 264 if err = CheckRestoreDBAndTable(client, cfg); err != nil { 265 return err 266 } 267 files, tables, dbs := filterRestoreFiles(client, cfg) 268 if len(dbs) == 0 && len(tables) != 0 { 269 return errors.Annotate(berrors.ErrRestoreInvalidBackup, "contain tables but no databases") 270 } 271 archiveSize := reader.ArchiveSize(ctx, files) 272 g.Record(summary.RestoreDataSize, archiveSize) 273 restoreTS, err := client.GetTS(ctx) 274 if err != nil { 275 return errors.Trace(err) 276 } 277 278 sp := utils.BRServiceSafePoint{ 279 BackupTS: restoreTS, 280 TTL: utils.DefaultBRGCSafePointTTL, 281 ID: utils.MakeSafePointID(), 282 } 283 // restore checksum will check safe point with its start ts, see details at 284 // https://github.com/pingcap/tidb/blob/180c02127105bed73712050594da6ead4d70a85f/store/tikv/kv.go#L186-L190 285 // so, we should keep the safe point unchangeable. to avoid GC life time is shorter than transaction duration. 286 err = utils.StartServiceSafePointKeeper(ctx, mgr.GetPDClient(), sp) 287 if err != nil { 288 return errors.Trace(err) 289 } 290 291 var newTS uint64 292 if client.IsIncremental() { 293 newTS = restoreTS 294 } 295 ddlJobs := restore.FilterDDLJobs(client.GetDDLJobs(), tables) 296 297 err = client.PreCheckTableTiFlashReplica(ctx, tables) 298 if err != nil { 299 return errors.Trace(err) 300 } 301 302 err = client.PreCheckTableClusterIndex(tables, ddlJobs, mgr.GetDomain()) 303 if err != nil { 304 return errors.Trace(err) 305 } 306 307 // pre-set TiDB config for restore 308 restoreDBConfig := enableTiDBConfig() 309 defer restoreDBConfig() 310 311 // execute DDL first 312 err = client.ExecDDLs(ctx, ddlJobs) 313 if err != nil { 314 return errors.Trace(err) 315 } 316 317 // nothing to restore, maybe only ddl changes in incremental restore 318 if len(dbs) == 0 && len(tables) == 0 { 319 log.Info("nothing to restore, all databases and tables are filtered out") 320 // even nothing to restore, we show a success message since there is no failure. 321 summary.SetSuccessStatus(true) 322 return nil 323 } 324 325 for _, db := range dbs { 326 err = client.CreateDatabase(ctx, db.Info) 327 if err != nil { 328 return errors.Trace(err) 329 } 330 } 331 332 // We make bigger errCh so we won't block on multi-part failed. 333 errCh := make(chan error, 32) 334 // Maybe allow user modify the DDL concurrency isn't necessary, 335 // because executing DDL is really I/O bound (or, algorithm bound?), 336 // and we cost most of time at waiting DDL jobs be enqueued. 337 // So these jobs won't be faster or slower when machine become faster or slower, 338 // hence make it a fixed value would be fine. 339 var dbPool []*restore.DB 340 if g.OwnsStorage() { 341 // Only in binary we can use multi-thread sessions to create tables. 342 // so use OwnStorage() to tell whether we are use binary or SQL. 343 dbPool, err = restore.MakeDBPool(defaultDDLConcurrency, func() (*restore.DB, error) { 344 return restore.NewDB(g, mgr.GetStorage()) 345 }) 346 } 347 if err != nil { 348 log.Warn("create session pool failed, we will send DDLs only by created sessions", 349 zap.Error(err), 350 zap.Int("sessionCount", len(dbPool)), 351 ) 352 } 353 tableStream := client.GoCreateTables(ctx, mgr.GetDomain(), tables, newTS, dbPool, errCh) 354 if len(files) == 0 { 355 log.Info("no files, empty databases and tables are restored") 356 summary.SetSuccessStatus(true) 357 // don't return immediately, wait all pipeline done. 358 } 359 360 tableFileMap := restore.MapTableToFiles(files) 361 log.Debug("mapped table to files", zap.Any("result map", tableFileMap)) 362 363 rangeStream := restore.GoValidateFileRanges( 364 ctx, tableStream, tableFileMap, cfg.MergeSmallRegionKeyCount, cfg.MergeSmallRegionKeyCount, errCh) 365 366 rangeSize := restore.EstimateRangeSize(files) 367 summary.CollectInt("restore ranges", rangeSize) 368 log.Info("range and file prepared", zap.Int("file count", len(files)), zap.Int("range count", rangeSize)) 369 370 restoreSchedulers, err := restorePreWork(ctx, client, mgr) 371 if err != nil { 372 return errors.Trace(err) 373 } 374 // Always run the post-work even on error, so we don't stuck in the import 375 // mode or emptied schedulers 376 defer restorePostWork(ctx, client, restoreSchedulers) 377 378 // Do not reset timestamp if we are doing incremental restore, because 379 // we are not allowed to decrease timestamp. 380 if !client.IsIncremental() { 381 if err = client.ResetTS(ctx, cfg.PD); err != nil { 382 log.Error("reset pd TS failed", zap.Error(err)) 383 return errors.Trace(err) 384 } 385 } 386 387 // Restore sst files in batch. 388 batchSize := utils.ClampInt(int(cfg.Concurrency), defaultRestoreConcurrency, maxRestoreBatchSizeLimit) 389 failpoint.Inject("small-batch-size", func(v failpoint.Value) { 390 log.Info("failpoint small batch size is on", zap.Int("size", v.(int))) 391 batchSize = v.(int) 392 }) 393 394 // Redirect to log if there is no log file to avoid unreadable output. 395 updateCh := g.StartProgress( 396 ctx, 397 cmdName, 398 // Split/Scatter + Download/Ingest + Checksum 399 int64(rangeSize+len(files)+len(tables)), 400 !cfg.LogProgress) 401 defer updateCh.Close() 402 sender, err := restore.NewTiKVSender(ctx, client, updateCh) 403 if err != nil { 404 return errors.Trace(err) 405 } 406 manager := restore.NewBRContextManager(client) 407 batcher, afterRestoreStream := restore.NewBatcher(ctx, sender, manager, errCh) 408 batcher.SetThreshold(batchSize) 409 batcher.EnableAutoCommit(ctx, time.Second) 410 go restoreTableStream(ctx, rangeStream, batcher, errCh) 411 412 var finish <-chan struct{} 413 // Checksum 414 if cfg.Checksum { 415 finish = client.GoValidateChecksum( 416 ctx, afterRestoreStream, mgr.GetStorage().GetClient(), errCh, updateCh, cfg.ChecksumConcurrency) 417 } else { 418 // when user skip checksum, just collect tables, and drop them. 419 finish = dropToBlackhole(ctx, afterRestoreStream, errCh, updateCh) 420 } 421 422 select { 423 case err = <-errCh: 424 err = multierr.Append(err, multierr.Combine(restore.Exhaust(errCh)...)) 425 case <-finish: 426 } 427 428 // If any error happened, return now. 429 if err != nil { 430 return errors.Trace(err) 431 } 432 433 // The cost of rename user table / replace into system table wouldn't be so high. 434 // So leave it out of the pipeline for easier implementation. 435 client.RestoreSystemSchemas(ctx, cfg.TableFilter) 436 437 // Set task summary to success status. 438 summary.SetSuccessStatus(true) 439 return nil 440 } 441 442 // dropToBlackhole drop all incoming tables into black hole, 443 // i.e. don't execute checksum, just increase the process anyhow. 444 func dropToBlackhole( 445 ctx context.Context, 446 tableStream <-chan restore.CreatedTable, 447 errCh chan<- error, 448 updateCh glue.Progress, 449 ) <-chan struct{} { 450 outCh := make(chan struct{}, 1) 451 go func() { 452 defer func() { 453 outCh <- struct{}{} 454 }() 455 for { 456 select { 457 case <-ctx.Done(): 458 errCh <- ctx.Err() 459 return 460 case _, ok := <-tableStream: 461 if !ok { 462 return 463 } 464 updateCh.Inc() 465 } 466 } 467 }() 468 return outCh 469 } 470 471 func filterRestoreFiles( 472 client *restore.Client, 473 cfg *RestoreConfig, 474 ) (files []*backuppb.File, tables []*metautil.Table, dbs []*utils.Database) { 475 for _, db := range client.GetDatabases() { 476 createdDatabase := false 477 dbName := db.Info.Name.O 478 if name, ok := utils.GetSysDBName(db.Info.Name); utils.IsSysDB(name) && ok { 479 dbName = name 480 } 481 for _, table := range db.Tables { 482 if !cfg.TableFilter.MatchTable(dbName, table.Info.Name.O) { 483 continue 484 } 485 if !createdDatabase { 486 dbs = append(dbs, db) 487 createdDatabase = true 488 } 489 files = append(files, table.Files...) 490 tables = append(tables, table) 491 } 492 } 493 return 494 } 495 496 // restorePreWork executes some prepare work before restore. 497 // TODO make this function returns a restore post work. 498 func restorePreWork(ctx context.Context, client *restore.Client, mgr *conn.Mgr) (pdutil.UndoFunc, error) { 499 if client.IsOnline() { 500 return pdutil.Nop, nil 501 } 502 503 // Switch TiKV cluster to import mode (adjust rocksdb configuration). 504 client.SwitchToImportMode(ctx) 505 506 return mgr.RemoveSchedulers(ctx) 507 } 508 509 // restorePostWork executes some post work after restore. 510 // TODO: aggregate all lifetime manage methods into batcher's context manager field. 511 func restorePostWork( 512 ctx context.Context, client *restore.Client, restoreSchedulers pdutil.UndoFunc, 513 ) { 514 if ctx.Err() != nil { 515 log.Warn("context canceled, try shutdown") 516 ctx = context.Background() 517 } 518 if client.IsOnline() { 519 return 520 } 521 if err := client.SwitchToNormalMode(ctx); err != nil { 522 log.Warn("fail to switch to normal mode", zap.Error(err)) 523 } 524 if err := restoreSchedulers(ctx); err != nil { 525 log.Warn("failed to restore PD schedulers", zap.Error(err)) 526 } 527 } 528 529 // enableTiDBConfig tweaks some of configs of TiDB to make the restore progress go well. 530 // return a function that could restore the config to origin. 531 func enableTiDBConfig() func() { 532 restoreConfig := config.RestoreFunc() 533 config.UpdateGlobal(func(conf *config.Config) { 534 // set max-index-length before execute DDLs and create tables 535 // we set this value to max(3072*4), otherwise we might not restore table 536 // when upstream and downstream both set this value greater than default(3072) 537 conf.MaxIndexLength = config.DefMaxOfMaxIndexLength 538 log.Warn("set max-index-length to max(3072*4) to skip check index length in DDL") 539 }) 540 return restoreConfig 541 } 542 543 // restoreTableStream blocks current goroutine and restore a stream of tables, 544 // by send tables to batcher. 545 func restoreTableStream( 546 ctx context.Context, 547 inputCh <-chan restore.TableWithRange, 548 batcher *restore.Batcher, 549 errCh chan<- error, 550 ) { 551 // We cache old tables so that we can 'batch' recover TiFlash and tables. 552 oldTables := []*metautil.Table{} 553 defer func() { 554 // when things done, we must clean pending requests. 555 batcher.Close() 556 log.Info("doing postwork", 557 zap.Int("table count", len(oldTables)), 558 ) 559 }() 560 561 for { 562 select { 563 case <-ctx.Done(): 564 errCh <- ctx.Err() 565 return 566 case t, ok := <-inputCh: 567 if !ok { 568 return 569 } 570 oldTables = append(oldTables, t.OldTable) 571 572 batcher.Add(t) 573 } 574 } 575 }