github.com/pingcap/br@v5.3.0-alpha.0.20220125034240-ec59c7b6ce30+incompatible/pkg/backup/client.go (about)

     1  // Copyright 2020 PingCAP, Inc. Licensed under Apache-2.0.
     2  
     3  package backup
     4  
     5  import (
     6  	"context"
     7  	"encoding/hex"
     8  	"encoding/json"
     9  	"fmt"
    10  	"io"
    11  	"os"
    12  	"sync"
    13  	"time"
    14  
    15  	"github.com/pingcap/br/pkg/metautil"
    16  
    17  	"github.com/google/btree"
    18  	"github.com/opentracing/opentracing-go"
    19  	"github.com/pingcap/errors"
    20  	"github.com/pingcap/failpoint"
    21  	backuppb "github.com/pingcap/kvproto/pkg/backup"
    22  	"github.com/pingcap/kvproto/pkg/metapb"
    23  	"github.com/pingcap/log"
    24  	"github.com/pingcap/parser/model"
    25  	filter "github.com/pingcap/tidb-tools/pkg/table-filter"
    26  	"github.com/pingcap/tidb/distsql"
    27  	"github.com/pingcap/tidb/kv"
    28  	"github.com/pingcap/tidb/meta"
    29  	"github.com/pingcap/tidb/meta/autoid"
    30  	"github.com/pingcap/tidb/util"
    31  	"github.com/pingcap/tidb/util/codec"
    32  	"github.com/pingcap/tidb/util/ranger"
    33  	"github.com/tikv/client-go/v2/oracle"
    34  	"github.com/tikv/client-go/v2/tikv"
    35  	"github.com/tikv/client-go/v2/txnkv/txnlock"
    36  	pd "github.com/tikv/pd/client"
    37  	"go.uber.org/zap"
    38  	"golang.org/x/sync/errgroup"
    39  	"google.golang.org/grpc/codes"
    40  	"google.golang.org/grpc/status"
    41  
    42  	"github.com/pingcap/br/pkg/conn"
    43  	berrors "github.com/pingcap/br/pkg/errors"
    44  	"github.com/pingcap/br/pkg/logutil"
    45  	"github.com/pingcap/br/pkg/redact"
    46  	"github.com/pingcap/br/pkg/rtree"
    47  	"github.com/pingcap/br/pkg/storage"
    48  	"github.com/pingcap/br/pkg/summary"
    49  	"github.com/pingcap/br/pkg/utils"
    50  )
    51  
    52  // ClientMgr manages connections needed by backup.
    53  type ClientMgr interface {
    54  	GetBackupClient(ctx context.Context, storeID uint64) (backuppb.BackupClient, error)
    55  	ResetBackupClient(ctx context.Context, storeID uint64) (backuppb.BackupClient, error)
    56  	GetPDClient() pd.Client
    57  	GetLockResolver() *txnlock.LockResolver
    58  	Close()
    59  }
    60  
    61  // Checksum is the checksum of some backup files calculated by CollectChecksums.
    62  type Checksum struct {
    63  	Crc64Xor   uint64
    64  	TotalKvs   uint64
    65  	TotalBytes uint64
    66  }
    67  
    68  // ProgressUnit represents the unit of progress.
    69  type ProgressUnit string
    70  
    71  // Maximum total sleep time(in ms) for kv/cop commands.
    72  const (
    73  	backupFineGrainedMaxBackoff = 80000
    74  	backupRetryTimes            = 5
    75  	// RangeUnit represents the progress updated counter when a range finished.
    76  	RangeUnit ProgressUnit = "range"
    77  	// RegionUnit represents the progress updated counter when a region finished.
    78  	RegionUnit ProgressUnit = "region"
    79  )
    80  
    81  // Client is a client instructs TiKV how to do a backup.
    82  type Client struct {
    83  	mgr       ClientMgr
    84  	clusterID uint64
    85  
    86  	storage storage.ExternalStorage
    87  	backend *backuppb.StorageBackend
    88  
    89  	gcTTL int64
    90  }
    91  
    92  // NewBackupClient returns a new backup client.
    93  func NewBackupClient(ctx context.Context, mgr ClientMgr) (*Client, error) {
    94  	log.Info("new backup client")
    95  	pdClient := mgr.GetPDClient()
    96  	clusterID := pdClient.GetClusterID(ctx)
    97  	return &Client{
    98  		clusterID: clusterID,
    99  		mgr:       mgr,
   100  	}, nil
   101  }
   102  
   103  // GetTS returns the latest timestamp.
   104  func (bc *Client) GetTS(ctx context.Context, duration time.Duration, ts uint64) (uint64, error) {
   105  	var (
   106  		backupTS uint64
   107  		err      error
   108  	)
   109  	if ts > 0 {
   110  		backupTS = ts
   111  	} else {
   112  		p, l, err := bc.mgr.GetPDClient().GetTS(ctx)
   113  		if err != nil {
   114  			return 0, errors.Trace(err)
   115  		}
   116  		backupTS = oracle.ComposeTS(p, l)
   117  
   118  		switch {
   119  		case duration < 0:
   120  			return 0, errors.Annotate(berrors.ErrInvalidArgument, "negative timeago is not allowed")
   121  		case duration > 0:
   122  			log.Info("backup time ago", zap.Duration("timeago", duration))
   123  
   124  			backupTime := oracle.GetTimeFromTS(backupTS)
   125  			backupAgo := backupTime.Add(-duration)
   126  			if backupTS < oracle.ComposeTS(oracle.GetPhysical(backupAgo), l) {
   127  				return 0, errors.Annotate(berrors.ErrInvalidArgument, "backup ts overflow please choose a smaller timeago")
   128  			}
   129  			backupTS = oracle.ComposeTS(oracle.GetPhysical(backupAgo), l)
   130  		}
   131  	}
   132  
   133  	// check backup time do not exceed GCSafePoint
   134  	err = utils.CheckGCSafePoint(ctx, bc.mgr.GetPDClient(), backupTS)
   135  	if err != nil {
   136  		return 0, errors.Trace(err)
   137  	}
   138  	log.Info("backup encode timestamp", zap.Uint64("BackupTS", backupTS))
   139  	return backupTS, nil
   140  }
   141  
   142  // SetLockFile set write lock file.
   143  func (bc *Client) SetLockFile(ctx context.Context) error {
   144  	return bc.storage.WriteFile(ctx, metautil.LockFile,
   145  		[]byte("DO NOT DELETE\n"+
   146  			"This file exists to remind other backup jobs won't use this path"))
   147  }
   148  
   149  // SetGCTTL set gcTTL for client.
   150  func (bc *Client) SetGCTTL(ttl int64) {
   151  	if ttl <= 0 {
   152  		ttl = utils.DefaultBRGCSafePointTTL
   153  	}
   154  	bc.gcTTL = ttl
   155  }
   156  
   157  // GetGCTTL get gcTTL for this backup.
   158  func (bc *Client) GetGCTTL() int64 {
   159  	return bc.gcTTL
   160  }
   161  
   162  // GetStorage gets storage for this backup.
   163  func (bc *Client) GetStorage() storage.ExternalStorage {
   164  	return bc.storage
   165  }
   166  
   167  // SetStorage set ExternalStorage for client.
   168  func (bc *Client) SetStorage(ctx context.Context, backend *backuppb.StorageBackend, opts *storage.ExternalStorageOptions) error {
   169  	var err error
   170  	bc.storage, err = storage.New(ctx, backend, opts)
   171  	if err != nil {
   172  		return errors.Trace(err)
   173  	}
   174  	// backupmeta already exists
   175  	exist, err := bc.storage.FileExists(ctx, metautil.MetaFile)
   176  	if err != nil {
   177  		return errors.Annotatef(err, "error occurred when checking %s file", metautil.MetaFile)
   178  	}
   179  	if exist {
   180  		return errors.Annotatef(berrors.ErrInvalidArgument, "backup meta file exists in %v, "+
   181  			"there may be some backup files in the path already, "+
   182  			"please specify a correct backup directory!", bc.storage.URI()+"/"+metautil.MetaFile)
   183  	}
   184  	exist, err = bc.storage.FileExists(ctx, metautil.LockFile)
   185  	if err != nil {
   186  		return errors.Annotatef(err, "error occurred when checking %s file", metautil.LockFile)
   187  	}
   188  	if exist {
   189  		return errors.Annotatef(berrors.ErrInvalidArgument, "backup lock file exists in %v, "+
   190  			"there may be some backup files in the path already, "+
   191  			"please specify a correct backup directory!", bc.storage.URI()+"/"+metautil.LockFile)
   192  	}
   193  	bc.backend = backend
   194  	return nil
   195  }
   196  
   197  // GetClusterID returns the cluster ID of the tidb cluster to backup.
   198  func (bc *Client) GetClusterID() uint64 {
   199  	return bc.clusterID
   200  }
   201  
   202  // BuildTableRanges returns the key ranges encompassing the entire table,
   203  // and its partitions if exists.
   204  func BuildTableRanges(tbl *model.TableInfo) ([]kv.KeyRange, error) {
   205  	pis := tbl.GetPartitionInfo()
   206  	if pis == nil {
   207  		// Short path, no partition.
   208  		return appendRanges(tbl, tbl.ID)
   209  	}
   210  
   211  	ranges := make([]kv.KeyRange, 0, len(pis.Definitions)*(len(tbl.Indices)+1)+1)
   212  	for _, def := range pis.Definitions {
   213  		rgs, err := appendRanges(tbl, def.ID)
   214  		if err != nil {
   215  			return nil, errors.Trace(err)
   216  		}
   217  		ranges = append(ranges, rgs...)
   218  	}
   219  	return ranges, nil
   220  }
   221  
   222  func appendRanges(tbl *model.TableInfo, tblID int64) ([]kv.KeyRange, error) {
   223  	var ranges []*ranger.Range
   224  	if tbl.IsCommonHandle {
   225  		ranges = ranger.FullNotNullRange()
   226  	} else {
   227  		ranges = ranger.FullIntRange(false)
   228  	}
   229  
   230  	kvRanges, err := distsql.TableHandleRangesToKVRanges(nil, []int64{tblID}, tbl.IsCommonHandle, ranges, nil)
   231  	if err != nil {
   232  		return nil, errors.Trace(err)
   233  	}
   234  
   235  	for _, index := range tbl.Indices {
   236  		if index.State != model.StatePublic {
   237  			continue
   238  		}
   239  		ranges = ranger.FullRange()
   240  		idxRanges, err := distsql.IndexRangesToKVRanges(nil, tblID, index.ID, ranges, nil)
   241  		if err != nil {
   242  			return nil, errors.Trace(err)
   243  		}
   244  		kvRanges = append(kvRanges, idxRanges...)
   245  	}
   246  	return kvRanges, nil
   247  }
   248  
   249  // BuildBackupRangeAndSchema gets KV range and schema of tables.
   250  // KV ranges are separated by Table IDs.
   251  // Also, KV ranges are separated by Index IDs in the same table.
   252  func BuildBackupRangeAndSchema(
   253  	storage kv.Storage,
   254  	tableFilter filter.Filter,
   255  	backupTS uint64,
   256  ) ([]rtree.Range, *Schemas, error) {
   257  	snapshot := storage.GetSnapshot(kv.NewVersion(backupTS))
   258  	m := meta.NewSnapshotMeta(snapshot)
   259  
   260  	ranges := make([]rtree.Range, 0)
   261  	backupSchemas := newBackupSchemas()
   262  	dbs, err := m.ListDatabases()
   263  	if err != nil {
   264  		return nil, nil, errors.Trace(err)
   265  	}
   266  
   267  	for _, dbInfo := range dbs {
   268  		// skip system databases
   269  		if !tableFilter.MatchSchema(dbInfo.Name.O) || util.IsMemDB(dbInfo.Name.L) {
   270  			continue
   271  		}
   272  
   273  		idAlloc := autoid.NewAllocator(storage, dbInfo.ID, false, autoid.RowIDAllocType)
   274  		seqAlloc := autoid.NewAllocator(storage, dbInfo.ID, false, autoid.SequenceType)
   275  		randAlloc := autoid.NewAllocator(storage, dbInfo.ID, false, autoid.AutoRandomType)
   276  
   277  		tables, err := m.ListTables(dbInfo.ID)
   278  		if err != nil {
   279  			return nil, nil, errors.Trace(err)
   280  		}
   281  
   282  		if len(tables) == 0 {
   283  			log.Warn("It's not necessary for backing up empty database",
   284  				zap.Stringer("db", dbInfo.Name))
   285  			continue
   286  		}
   287  
   288  		for _, tableInfo := range tables {
   289  			if !tableFilter.MatchTable(dbInfo.Name.O, tableInfo.Name.O) {
   290  				// Skip tables other than the given table.
   291  				continue
   292  			}
   293  
   294  			logger := log.With(
   295  				zap.String("db", dbInfo.Name.O),
   296  				zap.String("table", tableInfo.Name.O),
   297  			)
   298  
   299  			var globalAutoID int64
   300  			switch {
   301  			case tableInfo.IsSequence():
   302  				globalAutoID, err = seqAlloc.NextGlobalAutoID(tableInfo.ID)
   303  			case tableInfo.IsView() || !utils.NeedAutoID(tableInfo):
   304  				// no auto ID for views or table without either rowID nor auto_increment ID.
   305  			default:
   306  				globalAutoID, err = idAlloc.NextGlobalAutoID(tableInfo.ID)
   307  			}
   308  			if err != nil {
   309  				return nil, nil, errors.Trace(err)
   310  			}
   311  			tableInfo.AutoIncID = globalAutoID
   312  
   313  			if tableInfo.PKIsHandle && tableInfo.ContainsAutoRandomBits() {
   314  				// this table has auto_random id, we need backup and rebase in restoration
   315  				var globalAutoRandID int64
   316  				globalAutoRandID, err = randAlloc.NextGlobalAutoID(tableInfo.ID)
   317  				if err != nil {
   318  					return nil, nil, errors.Trace(err)
   319  				}
   320  				tableInfo.AutoRandID = globalAutoRandID
   321  				logger.Debug("change table AutoRandID",
   322  					zap.Int64("AutoRandID", globalAutoRandID))
   323  			}
   324  			logger.Debug("change table AutoIncID",
   325  				zap.Int64("AutoIncID", globalAutoID))
   326  
   327  			// remove all non-public indices
   328  			n := 0
   329  			for _, index := range tableInfo.Indices {
   330  				if index.State == model.StatePublic {
   331  					tableInfo.Indices[n] = index
   332  					n++
   333  				}
   334  			}
   335  			tableInfo.Indices = tableInfo.Indices[:n]
   336  
   337  			backupSchemas.addSchema(dbInfo, tableInfo)
   338  
   339  			tableRanges, err := BuildTableRanges(tableInfo)
   340  			if err != nil {
   341  				return nil, nil, errors.Trace(err)
   342  			}
   343  			for _, r := range tableRanges {
   344  				ranges = append(ranges, rtree.Range{
   345  					StartKey: r.StartKey,
   346  					EndKey:   r.EndKey,
   347  				})
   348  			}
   349  		}
   350  	}
   351  
   352  	if backupSchemas.Len() == 0 {
   353  		log.Info("nothing to backup")
   354  		return nil, nil, nil
   355  	}
   356  	return ranges, backupSchemas, nil
   357  }
   358  
   359  // WriteBackupDDLJobs sends the ddl jobs are done in (lastBackupTS, backupTS] to metaWriter.
   360  func WriteBackupDDLJobs(metaWriter *metautil.MetaWriter, store kv.Storage, lastBackupTS, backupTS uint64) error {
   361  	snapshot := store.GetSnapshot(kv.NewVersion(backupTS))
   362  	snapMeta := meta.NewSnapshotMeta(snapshot)
   363  	lastSnapshot := store.GetSnapshot(kv.NewVersion(lastBackupTS))
   364  	lastSnapMeta := meta.NewSnapshotMeta(lastSnapshot)
   365  	lastSchemaVersion, err := lastSnapMeta.GetSchemaVersion()
   366  	if err != nil {
   367  		return errors.Trace(err)
   368  	}
   369  	allJobs := make([]*model.Job, 0)
   370  	defaultJobs, err := snapMeta.GetAllDDLJobsInQueue(meta.DefaultJobListKey)
   371  	if err != nil {
   372  		return errors.Trace(err)
   373  	}
   374  	log.Debug("get default jobs", zap.Int("jobs", len(defaultJobs)))
   375  	allJobs = append(allJobs, defaultJobs...)
   376  	addIndexJobs, err := snapMeta.GetAllDDLJobsInQueue(meta.AddIndexJobListKey)
   377  	if err != nil {
   378  		return errors.Trace(err)
   379  	}
   380  	log.Debug("get add index jobs", zap.Int("jobs", len(addIndexJobs)))
   381  	allJobs = append(allJobs, addIndexJobs...)
   382  	historyJobs, err := snapMeta.GetAllHistoryDDLJobs()
   383  	if err != nil {
   384  		return errors.Trace(err)
   385  	}
   386  	log.Debug("get history jobs", zap.Int("jobs", len(historyJobs)))
   387  	allJobs = append(allJobs, historyJobs...)
   388  
   389  	count := 0
   390  	for _, job := range allJobs {
   391  		if (job.State == model.JobStateDone || job.State == model.JobStateSynced) &&
   392  			(job.BinlogInfo != nil && job.BinlogInfo.SchemaVersion > lastSchemaVersion) {
   393  			jobBytes, err := json.Marshal(job)
   394  			if err != nil {
   395  				return errors.Trace(err)
   396  			}
   397  			metaWriter.Send(jobBytes, metautil.AppendDDL)
   398  			count++
   399  		}
   400  	}
   401  	log.Debug("get completed jobs", zap.Int("jobs", count))
   402  	return nil
   403  }
   404  
   405  // BackupRanges make a backup of the given key ranges.
   406  func (bc *Client) BackupRanges(
   407  	ctx context.Context,
   408  	ranges []rtree.Range,
   409  	req backuppb.BackupRequest,
   410  	concurrency uint,
   411  	metaWriter *metautil.MetaWriter,
   412  	progressCallBack func(ProgressUnit),
   413  ) error {
   414  	init := time.Now()
   415  	defer log.Info("Backup Ranges", zap.Duration("take", time.Since(init)))
   416  
   417  	if span := opentracing.SpanFromContext(ctx); span != nil && span.Tracer() != nil {
   418  		span1 := span.Tracer().StartSpan("Client.BackupRanges", opentracing.ChildOf(span.Context()))
   419  		defer span1.Finish()
   420  		ctx = opentracing.ContextWithSpan(ctx, span1)
   421  	}
   422  
   423  	// we collect all files in a single goroutine to avoid thread safety issues.
   424  	workerPool := utils.NewWorkerPool(concurrency, "Ranges")
   425  	eg, ectx := errgroup.WithContext(ctx)
   426  	for id, r := range ranges {
   427  		id := id
   428  		sk, ek := r.StartKey, r.EndKey
   429  		workerPool.ApplyOnErrorGroup(eg, func() error {
   430  			elctx := logutil.ContextWithField(ectx, logutil.RedactAny("range-sn", id))
   431  			err := bc.BackupRange(elctx, sk, ek, req, metaWriter, progressCallBack)
   432  			if err != nil {
   433  				return errors.Trace(err)
   434  			}
   435  			return nil
   436  		})
   437  	}
   438  	return eg.Wait()
   439  }
   440  
   441  // BackupRange make a backup of the given key range.
   442  // Returns an array of files backed up.
   443  func (bc *Client) BackupRange(
   444  	ctx context.Context,
   445  	startKey, endKey []byte,
   446  	req backuppb.BackupRequest,
   447  	metaWriter *metautil.MetaWriter,
   448  	progressCallBack func(ProgressUnit),
   449  ) (err error) {
   450  	start := time.Now()
   451  	defer func() {
   452  		elapsed := time.Since(start)
   453  		logutil.CL(ctx).Info("backup range finished", zap.Duration("take", elapsed))
   454  		key := "range start:" + hex.EncodeToString(startKey) + " end:" + hex.EncodeToString(endKey)
   455  		if err != nil {
   456  			summary.CollectFailureUnit(key, err)
   457  		}
   458  	}()
   459  	logutil.CL(ctx).Info("backup started",
   460  		logutil.Key("startKey", startKey), logutil.Key("endKey", endKey),
   461  		zap.Uint64("rateLimit", req.RateLimit),
   462  		zap.Uint32("concurrency", req.Concurrency))
   463  
   464  	var allStores []*metapb.Store
   465  	allStores, err = conn.GetAllTiKVStores(ctx, bc.mgr.GetPDClient(), conn.SkipTiFlash)
   466  	if err != nil {
   467  		return errors.Trace(err)
   468  	}
   469  
   470  	req.StartKey = startKey
   471  	req.EndKey = endKey
   472  	req.StorageBackend = bc.backend
   473  
   474  	push := newPushDown(bc.mgr, len(allStores))
   475  
   476  	var results rtree.RangeTree
   477  	results, err = push.pushBackup(ctx, req, allStores, progressCallBack)
   478  	if err != nil {
   479  		return errors.Trace(err)
   480  	}
   481  	logutil.CL(ctx).Info("finish backup push down", zap.Int("small-range-count", results.Len()))
   482  
   483  	// Find and backup remaining ranges.
   484  	// TODO: test fine grained backup.
   485  	err = bc.fineGrainedBackup(
   486  		ctx, startKey, endKey, req.StartVersion, req.EndVersion, req.CompressionType, req.CompressionLevel,
   487  		req.RateLimit, req.Concurrency, results, progressCallBack)
   488  	if err != nil {
   489  		return errors.Trace(err)
   490  	}
   491  
   492  	// update progress of range unit
   493  	progressCallBack(RangeUnit)
   494  
   495  	if req.IsRawKv {
   496  		logutil.CL(ctx).Info("raw ranges backed up",
   497  			logutil.Key("startKey", startKey),
   498  			logutil.Key("endKey", endKey),
   499  			zap.String("cf", req.Cf))
   500  	} else {
   501  		logutil.CL(ctx).Info("time range backed up",
   502  			zap.Reflect("StartVersion", req.StartVersion),
   503  			zap.Reflect("EndVersion", req.EndVersion))
   504  	}
   505  
   506  	var ascendErr error
   507  	results.Ascend(func(i btree.Item) bool {
   508  		r := i.(*rtree.Range)
   509  		for _, f := range r.Files {
   510  			summary.CollectSuccessUnit(summary.TotalKV, 1, f.TotalKvs)
   511  			summary.CollectSuccessUnit(summary.TotalBytes, 1, f.TotalBytes)
   512  		}
   513  		// we need keep the files in order after we support multi_ingest sst.
   514  		// default_sst and write_sst need to be together.
   515  		if err := metaWriter.Send(r.Files, metautil.AppendDataFile); err != nil {
   516  			ascendErr = err
   517  			return false
   518  		}
   519  		return true
   520  	})
   521  	if ascendErr != nil {
   522  		return errors.Trace(ascendErr)
   523  	}
   524  
   525  	// Check if there are duplicated files.
   526  	checkDupFiles(&results)
   527  
   528  	return nil
   529  }
   530  
   531  func (bc *Client) findRegionLeader(ctx context.Context, key []byte) (*metapb.Peer, error) {
   532  	// Keys are saved in encoded format in TiKV, so the key must be encoded
   533  	// in order to find the correct region.
   534  	key = codec.EncodeBytes([]byte{}, key)
   535  	for i := 0; i < 5; i++ {
   536  		// better backoff.
   537  		region, err := bc.mgr.GetPDClient().GetRegion(ctx, key)
   538  		if err != nil || region == nil {
   539  			log.Error("find leader failed", zap.Error(err), zap.Reflect("region", region))
   540  			time.Sleep(time.Millisecond * time.Duration(100*i))
   541  			continue
   542  		}
   543  		if region.Leader != nil {
   544  			log.Info("find leader",
   545  				zap.Reflect("Leader", region.Leader), logutil.Key("key", key))
   546  			return region.Leader, nil
   547  		}
   548  		log.Warn("no region found", logutil.Key("key", key))
   549  		time.Sleep(time.Millisecond * time.Duration(100*i))
   550  		continue
   551  	}
   552  	log.Error("can not find leader", logutil.Key("key", key))
   553  	return nil, errors.Annotatef(berrors.ErrBackupNoLeader, "can not find leader")
   554  }
   555  
   556  func (bc *Client) fineGrainedBackup(
   557  	ctx context.Context,
   558  	startKey, endKey []byte,
   559  	lastBackupTS uint64,
   560  	backupTS uint64,
   561  	compressType backuppb.CompressionType,
   562  	compressLevel int32,
   563  	rateLimit uint64,
   564  	concurrency uint32,
   565  	rangeTree rtree.RangeTree,
   566  	progressCallBack func(ProgressUnit),
   567  ) error {
   568  	if span := opentracing.SpanFromContext(ctx); span != nil && span.Tracer() != nil {
   569  		span1 := span.Tracer().StartSpan("Client.fineGrainedBackup", opentracing.ChildOf(span.Context()))
   570  		defer span1.Finish()
   571  		ctx = opentracing.ContextWithSpan(ctx, span1)
   572  	}
   573  
   574  	failpoint.Inject("hint-fine-grained-backup", func(v failpoint.Value) {
   575  		log.Info("failpoint hint-fine-grained-backup injected, "+
   576  			"process will sleep for 3s and notify the shell.", zap.String("file", v.(string)))
   577  		if sigFile, ok := v.(string); ok {
   578  			file, err := os.Create(sigFile)
   579  			if err != nil {
   580  				log.Warn("failed to create file for notifying, skipping notify", zap.Error(err))
   581  			}
   582  			if file != nil {
   583  				file.Close()
   584  			}
   585  			time.Sleep(3 * time.Second)
   586  		}
   587  	})
   588  
   589  	bo := tikv.NewBackoffer(ctx, backupFineGrainedMaxBackoff)
   590  	for {
   591  		// Step1, check whether there is any incomplete range
   592  		incomplete := rangeTree.GetIncompleteRange(startKey, endKey)
   593  		if len(incomplete) == 0 {
   594  			return nil
   595  		}
   596  		logutil.CL(ctx).Info("start fine grained backup", zap.Int("incomplete", len(incomplete)))
   597  		// Step2, retry backup on incomplete range
   598  		respCh := make(chan *backuppb.BackupResponse, 4)
   599  		errCh := make(chan error, 4)
   600  		retry := make(chan rtree.Range, 4)
   601  
   602  		max := &struct {
   603  			ms int
   604  			mu sync.Mutex
   605  		}{}
   606  		wg := new(sync.WaitGroup)
   607  		for i := 0; i < 4; i++ {
   608  			wg.Add(1)
   609  			fork, _ := bo.Fork()
   610  			go func(boFork *tikv.Backoffer) {
   611  				defer wg.Done()
   612  				for rg := range retry {
   613  					backoffMs, err :=
   614  						bc.handleFineGrained(ctx, boFork, rg, lastBackupTS, backupTS,
   615  							compressType, compressLevel, rateLimit, concurrency, respCh)
   616  					if err != nil {
   617  						errCh <- err
   618  						return
   619  					}
   620  					if backoffMs != 0 {
   621  						max.mu.Lock()
   622  						if max.ms < backoffMs {
   623  							max.ms = backoffMs
   624  						}
   625  						max.mu.Unlock()
   626  					}
   627  				}
   628  			}(fork)
   629  		}
   630  
   631  		// Dispatch rangs and wait
   632  		go func() {
   633  			for _, rg := range incomplete {
   634  				retry <- rg
   635  			}
   636  			close(retry)
   637  			wg.Wait()
   638  			close(respCh)
   639  		}()
   640  
   641  	selectLoop:
   642  		for {
   643  			select {
   644  			case err := <-errCh:
   645  				// TODO: should we handle err here?
   646  				return errors.Trace(err)
   647  			case resp, ok := <-respCh:
   648  				if !ok {
   649  					// Finished.
   650  					break selectLoop
   651  				}
   652  				if resp.Error != nil {
   653  					logutil.CL(ctx).Panic("unexpected backup error",
   654  						zap.Reflect("error", resp.Error))
   655  				}
   656  				logutil.CL(ctx).Info("put fine grained range",
   657  					logutil.Key("fine-grained-range-start", resp.StartKey),
   658  					logutil.Key("fine-grained-range-end", resp.EndKey),
   659  				)
   660  				rangeTree.Put(resp.StartKey, resp.EndKey, resp.Files)
   661  
   662  				// Update progress
   663  				progressCallBack(RegionUnit)
   664  			}
   665  		}
   666  
   667  		// Step3. Backoff if needed, then repeat.
   668  		max.mu.Lock()
   669  		ms := max.ms
   670  		max.mu.Unlock()
   671  		if ms != 0 {
   672  			log.Info("handle fine grained", zap.Int("backoffMs", ms))
   673  			// TODO: fill a meaningful error.
   674  			err := bo.BackoffWithMaxSleepTxnLockFast(ms, berrors.ErrUnknown)
   675  			if err != nil {
   676  				return errors.Trace(err)
   677  			}
   678  		}
   679  	}
   680  }
   681  
   682  // OnBackupResponse checks the backup resp, decides whether to retry and generate the error.
   683  func OnBackupResponse(
   684  	storeID uint64,
   685  	bo *tikv.Backoffer,
   686  	backupTS uint64,
   687  	lockResolver *txnlock.LockResolver,
   688  	resp *backuppb.BackupResponse,
   689  ) (*backuppb.BackupResponse, int, error) {
   690  	log.Debug("OnBackupResponse", zap.Reflect("resp", resp))
   691  	if resp.Error == nil {
   692  		return resp, 0, nil
   693  	}
   694  	backoffMs := 0
   695  	switch v := resp.Error.Detail.(type) {
   696  	case *backuppb.Error_KvError:
   697  		if lockErr := v.KvError.Locked; lockErr != nil {
   698  			// Try to resolve lock.
   699  			log.Warn("backup occur kv error", zap.Reflect("error", v))
   700  			msBeforeExpired, _, err1 := lockResolver.ResolveLocks(
   701  				bo, backupTS, []*txnlock.Lock{txnlock.NewLock(lockErr)})
   702  			if err1 != nil {
   703  				return nil, 0, errors.Trace(err1)
   704  			}
   705  			if msBeforeExpired > 0 {
   706  				backoffMs = int(msBeforeExpired)
   707  			}
   708  			return nil, backoffMs, nil
   709  		}
   710  		// Backup should not meet error other than KeyLocked.
   711  		log.Error("unexpect kv error", zap.Reflect("KvError", v.KvError))
   712  		return nil, backoffMs, errors.Annotatef(berrors.ErrKVUnknown, "storeID: %d OnBackupResponse error %v", storeID, v)
   713  
   714  	case *backuppb.Error_RegionError:
   715  		regionErr := v.RegionError
   716  		// Ignore following errors.
   717  		if !(regionErr.EpochNotMatch != nil ||
   718  			regionErr.NotLeader != nil ||
   719  			regionErr.RegionNotFound != nil ||
   720  			regionErr.ServerIsBusy != nil ||
   721  			regionErr.StaleCommand != nil ||
   722  			regionErr.StoreNotMatch != nil ||
   723  			regionErr.ReadIndexNotReady != nil ||
   724  			regionErr.ProposalInMergingMode != nil) {
   725  			log.Error("unexpect region error", zap.Reflect("RegionError", regionErr))
   726  			return nil, backoffMs, errors.Annotatef(berrors.ErrKVUnknown, "storeID: %d OnBackupResponse error %v", storeID, v)
   727  		}
   728  		log.Warn("backup occur region error",
   729  			zap.Reflect("RegionError", regionErr),
   730  			zap.Uint64("storeID", storeID))
   731  		// TODO: a better backoff.
   732  		backoffMs = 1000 /* 1s */
   733  		return nil, backoffMs, nil
   734  	case *backuppb.Error_ClusterIdError:
   735  		log.Error("backup occur cluster ID error", zap.Reflect("error", v), zap.Uint64("storeID", storeID))
   736  		return nil, 0, errors.Annotatef(berrors.ErrKVClusterIDMismatch, "%v on storeID: %d", resp.Error, storeID)
   737  	default:
   738  		// UNSAFE! TODO: use meaningful error code instead of unstructured message to find failed to write error.
   739  		if utils.MessageIsRetryableStorageError(resp.GetError().GetMsg()) {
   740  			log.Warn("backup occur storage error", zap.String("error", resp.GetError().GetMsg()))
   741  			// back off 3000ms, for S3 is 99.99% available (i.e. the max outage time would less than 52.56mins per year),
   742  			// this time would be probably enough for s3 to resume.
   743  			return nil, 3000, nil
   744  		}
   745  		log.Error("backup occur unknown error", zap.String("error", resp.Error.GetMsg()), zap.Uint64("storeID", storeID))
   746  		return nil, 0, errors.Annotatef(berrors.ErrKVUnknown, "%v on storeID: %d", resp.Error, storeID)
   747  	}
   748  }
   749  
   750  func (bc *Client) handleFineGrained(
   751  	ctx context.Context,
   752  	bo *tikv.Backoffer,
   753  	rg rtree.Range,
   754  	lastBackupTS uint64,
   755  	backupTS uint64,
   756  	compressType backuppb.CompressionType,
   757  	compressionLevel int32,
   758  	rateLimit uint64,
   759  	concurrency uint32,
   760  	respCh chan<- *backuppb.BackupResponse,
   761  ) (int, error) {
   762  	leader, pderr := bc.findRegionLeader(ctx, rg.StartKey)
   763  	if pderr != nil {
   764  		return 0, errors.Trace(pderr)
   765  	}
   766  	storeID := leader.GetStoreId()
   767  
   768  	req := backuppb.BackupRequest{
   769  		ClusterId:        bc.clusterID,
   770  		StartKey:         rg.StartKey, // TODO: the range may cross region.
   771  		EndKey:           rg.EndKey,
   772  		StartVersion:     lastBackupTS,
   773  		EndVersion:       backupTS,
   774  		StorageBackend:   bc.backend,
   775  		RateLimit:        rateLimit,
   776  		Concurrency:      concurrency,
   777  		CompressionType:  compressType,
   778  		CompressionLevel: compressionLevel,
   779  	}
   780  	lockResolver := bc.mgr.GetLockResolver()
   781  	client, err := bc.mgr.GetBackupClient(ctx, storeID)
   782  	if err != nil {
   783  		if berrors.Is(err, berrors.ErrFailedToConnect) {
   784  			// When the leader store is died,
   785  			// 20s for the default max duration before the raft election timer fires.
   786  			logutil.CL(ctx).Warn("failed to connect to store, skipping", logutil.ShortError(err), zap.Uint64("storeID", storeID))
   787  			return 20000, nil
   788  		}
   789  
   790  		logutil.CL(ctx).Error("fail to connect store", zap.Uint64("StoreID", storeID))
   791  		return 0, errors.Annotatef(err, "failed to connect to store %d", storeID)
   792  	}
   793  	hasProgress := false
   794  	backoffMill := 0
   795  	err = SendBackup(
   796  		ctx, storeID, client, req,
   797  		// Handle responses with the same backoffer.
   798  		func(resp *backuppb.BackupResponse) error {
   799  			response, shouldBackoff, err1 :=
   800  				OnBackupResponse(storeID, bo, backupTS, lockResolver, resp)
   801  			if err1 != nil {
   802  				return err1
   803  			}
   804  			if backoffMill < shouldBackoff {
   805  				backoffMill = shouldBackoff
   806  			}
   807  			if response != nil {
   808  				respCh <- response
   809  			}
   810  			// When meet an error, we need to set hasProgress too, in case of
   811  			// overriding the backoffTime of original error.
   812  			// hasProgress would be false iff there is a early io.EOF from the stream.
   813  			hasProgress = true
   814  			return nil
   815  		},
   816  		func() (backuppb.BackupClient, error) {
   817  			logutil.CL(ctx).Warn("reset the connection in handleFineGrained", zap.Uint64("storeID", storeID))
   818  			return bc.mgr.ResetBackupClient(ctx, storeID)
   819  		})
   820  	if err != nil {
   821  		if berrors.Is(err, berrors.ErrFailedToConnect) {
   822  			// When the leader store is died,
   823  			// 20s for the default max duration before the raft election timer fires.
   824  			logutil.CL(ctx).Warn("failed to connect to store, skipping", logutil.ShortError(err), zap.Uint64("storeID", storeID))
   825  			return 20000, nil
   826  		}
   827  		logutil.CL(ctx).Error("failed to send fine-grained backup", zap.Uint64("storeID", storeID), logutil.ShortError(err))
   828  		return 0, errors.Annotatef(err, "failed to send fine-grained backup [%s, %s)",
   829  			redact.Key(req.StartKey), redact.Key(req.EndKey))
   830  	}
   831  
   832  	// If no progress, backoff 10s for debouncing.
   833  	// 10s is the default interval of stores sending a heartbeat to the PD.
   834  	// And is the average new leader election timeout, which would be a reasonable back off time.
   835  	if !hasProgress {
   836  		backoffMill = 10000
   837  	}
   838  	return backoffMill, nil
   839  }
   840  
   841  // SendBackup send backup request to the given store.
   842  // Stop receiving response if respFn returns error.
   843  func SendBackup(
   844  	ctx context.Context,
   845  	// the `storeID` seems only used for logging now, maybe we can remove it then?
   846  	storeID uint64,
   847  	client backuppb.BackupClient,
   848  	req backuppb.BackupRequest,
   849  	respFn func(*backuppb.BackupResponse) error,
   850  	resetFn func() (backuppb.BackupClient, error),
   851  ) error {
   852  	if span := opentracing.SpanFromContext(ctx); span != nil && span.Tracer() != nil {
   853  		span1 := span.Tracer().StartSpan(
   854  			fmt.Sprintf("Client.SendBackup, storeID = %d, StartKey = %s, EndKey = %s",
   855  				storeID, redact.Key(req.StartKey), redact.Key(req.EndKey)),
   856  			opentracing.ChildOf(span.Context()))
   857  		defer span1.Finish()
   858  		ctx = opentracing.ContextWithSpan(ctx, span1)
   859  	}
   860  
   861  	var errReset error
   862  backupLoop:
   863  	for retry := 0; retry < backupRetryTimes; retry++ {
   864  		logutil.CL(ctx).Info("try backup",
   865  			zap.Int("retry time", retry),
   866  		)
   867  		failpoint.Inject("hint-backup-start", func(v failpoint.Value) {
   868  			logutil.CL(ctx).Info("failpoint hint-backup-start injected, " +
   869  				"process will notify the shell.")
   870  			if sigFile, ok := v.(string); ok {
   871  				file, err := os.Create(sigFile)
   872  				if err != nil {
   873  					log.Warn("failed to create file for notifying, skipping notify", zap.Error(err))
   874  				}
   875  				if file != nil {
   876  					file.Close()
   877  				}
   878  			}
   879  			time.Sleep(3 * time.Second)
   880  		})
   881  		bcli, err := client.Backup(ctx, &req)
   882  		failpoint.Inject("reset-retryable-error", func(val failpoint.Value) {
   883  			if val.(bool) {
   884  				logutil.CL(ctx).Debug("failpoint reset-retryable-error injected.")
   885  				err = status.Error(codes.Unavailable, "Unavailable error")
   886  			}
   887  		})
   888  		failpoint.Inject("reset-not-retryable-error", func(val failpoint.Value) {
   889  			if val.(bool) {
   890  				logutil.CL(ctx).Debug("failpoint reset-not-retryable-error injected.")
   891  				err = status.Error(codes.Unknown, "Your server was haunted hence doesn't work, meow :3")
   892  			}
   893  		})
   894  		if err != nil {
   895  			if isRetryableError(err) {
   896  				time.Sleep(3 * time.Second)
   897  				client, errReset = resetFn()
   898  				if errReset != nil {
   899  					return errors.Annotatef(errReset, "failed to reset backup connection on store:%d "+
   900  						"please check the tikv status", storeID)
   901  				}
   902  				continue
   903  			}
   904  			logutil.CL(ctx).Error("fail to backup", zap.Uint64("StoreID", storeID),
   905  				zap.Int("retry time", retry))
   906  			return berrors.ErrFailedToConnect.Wrap(err).GenWithStack("failed to create backup stream to store %d", storeID)
   907  		}
   908  		defer bcli.CloseSend()
   909  
   910  		for {
   911  			resp, err := bcli.Recv()
   912  			if err != nil {
   913  				if errors.Cause(err) == io.EOF { // nolint:errorlint
   914  					logutil.CL(ctx).Info("backup streaming finish",
   915  						zap.Int("retry-time", retry))
   916  					break backupLoop
   917  				}
   918  				if isRetryableError(err) {
   919  					time.Sleep(3 * time.Second)
   920  					// current tikv is unavailable
   921  					client, errReset = resetFn()
   922  					if errReset != nil {
   923  						return errors.Annotatef(errReset, "failed to reset recv connection on store:%d "+
   924  							"please check the tikv status", storeID)
   925  					}
   926  					break
   927  				}
   928  				return berrors.ErrFailedToConnect.Wrap(err).GenWithStack("failed to connect to store: %d with retry times:%d", storeID, retry)
   929  			}
   930  
   931  			// TODO: handle errors in the resp.
   932  			logutil.CL(ctx).Info("range backed up",
   933  				logutil.Key("small-range-start-key", resp.GetStartKey()),
   934  				logutil.Key("small-range-end-key", resp.GetEndKey()))
   935  			err = respFn(resp)
   936  			if err != nil {
   937  				return errors.Trace(err)
   938  			}
   939  		}
   940  	}
   941  	return nil
   942  }
   943  
   944  // isRetryableError represents whether we should retry reset grpc connection.
   945  func isRetryableError(err error) bool {
   946  	return status.Code(err) == codes.Unavailable
   947  }