github.com/pingcap/br@v5.3.0-alpha.0.20220125034240-ec59c7b6ce30+incompatible/pkg/restore/import.go (about)

     1  // Copyright 2020 PingCAP, Inc. Licensed under Apache-2.0.
     2  
     3  package restore
     4  
     5  import (
     6  	"bytes"
     7  	"context"
     8  	"crypto/tls"
     9  	"sync"
    10  	"time"
    11  
    12  	"github.com/google/uuid"
    13  	"github.com/pingcap/errors"
    14  	"github.com/pingcap/failpoint"
    15  	backuppb "github.com/pingcap/kvproto/pkg/backup"
    16  	"github.com/pingcap/kvproto/pkg/import_sstpb"
    17  	"github.com/pingcap/kvproto/pkg/kvrpcpb"
    18  	"github.com/pingcap/log"
    19  	pd "github.com/tikv/pd/client"
    20  	"github.com/tikv/pd/pkg/codec"
    21  	"go.uber.org/multierr"
    22  	"go.uber.org/zap"
    23  	"google.golang.org/grpc"
    24  	"google.golang.org/grpc/backoff"
    25  	"google.golang.org/grpc/codes"
    26  	"google.golang.org/grpc/credentials"
    27  	"google.golang.org/grpc/keepalive"
    28  	"google.golang.org/grpc/status"
    29  
    30  	"github.com/pingcap/br/pkg/conn"
    31  	berrors "github.com/pingcap/br/pkg/errors"
    32  	"github.com/pingcap/br/pkg/logutil"
    33  	"github.com/pingcap/br/pkg/summary"
    34  	"github.com/pingcap/br/pkg/utils"
    35  )
    36  
    37  const (
    38  	importScanRegionTime = 10 * time.Second
    39  	gRPCBackOffMaxDelay  = 3 * time.Second
    40  )
    41  
    42  // ImporterClient is used to import a file to TiKV.
    43  type ImporterClient interface {
    44  	DownloadSST(
    45  		ctx context.Context,
    46  		storeID uint64,
    47  		req *import_sstpb.DownloadRequest,
    48  	) (*import_sstpb.DownloadResponse, error)
    49  
    50  	IngestSST(
    51  		ctx context.Context,
    52  		storeID uint64,
    53  		req *import_sstpb.IngestRequest,
    54  	) (*import_sstpb.IngestResponse, error)
    55  	MultiIngest(
    56  		ctx context.Context,
    57  		storeID uint64,
    58  		req *import_sstpb.MultiIngestRequest,
    59  	) (*import_sstpb.IngestResponse, error)
    60  
    61  	SetDownloadSpeedLimit(
    62  		ctx context.Context,
    63  		storeID uint64,
    64  		req *import_sstpb.SetDownloadSpeedLimitRequest,
    65  	) (*import_sstpb.SetDownloadSpeedLimitResponse, error)
    66  
    67  	GetImportClient(
    68  		ctx context.Context,
    69  		storeID uint64,
    70  	) (import_sstpb.ImportSSTClient, error)
    71  
    72  	SupportMultiIngest(ctx context.Context, stores []uint64) (bool, error)
    73  }
    74  
    75  type importClient struct {
    76  	mu         sync.Mutex
    77  	metaClient SplitClient
    78  	clients    map[uint64]import_sstpb.ImportSSTClient
    79  	tlsConf    *tls.Config
    80  
    81  	keepaliveConf keepalive.ClientParameters
    82  }
    83  
    84  // NewImportClient returns a new ImporterClient.
    85  func NewImportClient(metaClient SplitClient, tlsConf *tls.Config, keepaliveConf keepalive.ClientParameters) ImporterClient {
    86  	return &importClient{
    87  		metaClient:    metaClient,
    88  		clients:       make(map[uint64]import_sstpb.ImportSSTClient),
    89  		tlsConf:       tlsConf,
    90  		keepaliveConf: keepaliveConf,
    91  	}
    92  }
    93  
    94  func (ic *importClient) DownloadSST(
    95  	ctx context.Context,
    96  	storeID uint64,
    97  	req *import_sstpb.DownloadRequest,
    98  ) (*import_sstpb.DownloadResponse, error) {
    99  	client, err := ic.GetImportClient(ctx, storeID)
   100  	if err != nil {
   101  		return nil, errors.Trace(err)
   102  	}
   103  	return client.Download(ctx, req)
   104  }
   105  
   106  func (ic *importClient) SetDownloadSpeedLimit(
   107  	ctx context.Context,
   108  	storeID uint64,
   109  	req *import_sstpb.SetDownloadSpeedLimitRequest,
   110  ) (*import_sstpb.SetDownloadSpeedLimitResponse, error) {
   111  	client, err := ic.GetImportClient(ctx, storeID)
   112  	if err != nil {
   113  		return nil, errors.Trace(err)
   114  	}
   115  	return client.SetDownloadSpeedLimit(ctx, req)
   116  }
   117  
   118  func (ic *importClient) IngestSST(
   119  	ctx context.Context,
   120  	storeID uint64,
   121  	req *import_sstpb.IngestRequest,
   122  ) (*import_sstpb.IngestResponse, error) {
   123  	client, err := ic.GetImportClient(ctx, storeID)
   124  	if err != nil {
   125  		return nil, errors.Trace(err)
   126  	}
   127  	return client.Ingest(ctx, req)
   128  }
   129  
   130  func (ic *importClient) MultiIngest(
   131  	ctx context.Context,
   132  	storeID uint64,
   133  	req *import_sstpb.MultiIngestRequest,
   134  ) (*import_sstpb.IngestResponse, error) {
   135  	client, err := ic.GetImportClient(ctx, storeID)
   136  	if err != nil {
   137  		return nil, errors.Trace(err)
   138  	}
   139  	return client.MultiIngest(ctx, req)
   140  }
   141  
   142  func (ic *importClient) GetImportClient(
   143  	ctx context.Context,
   144  	storeID uint64,
   145  ) (import_sstpb.ImportSSTClient, error) {
   146  	ic.mu.Lock()
   147  	defer ic.mu.Unlock()
   148  	client, ok := ic.clients[storeID]
   149  	if ok {
   150  		return client, nil
   151  	}
   152  	store, err := ic.metaClient.GetStore(ctx, storeID)
   153  	if err != nil {
   154  		return nil, errors.Trace(err)
   155  	}
   156  	opt := grpc.WithInsecure()
   157  	if ic.tlsConf != nil {
   158  		opt = grpc.WithTransportCredentials(credentials.NewTLS(ic.tlsConf))
   159  	}
   160  	addr := store.GetPeerAddress()
   161  	if addr == "" {
   162  		addr = store.GetAddress()
   163  	}
   164  	bfConf := backoff.DefaultConfig
   165  	bfConf.MaxDelay = gRPCBackOffMaxDelay
   166  	conn, err := grpc.DialContext(
   167  		ctx,
   168  		addr,
   169  		opt,
   170  		grpc.WithConnectParams(grpc.ConnectParams{Backoff: bfConf}),
   171  		grpc.WithKeepaliveParams(ic.keepaliveConf),
   172  	)
   173  	if err != nil {
   174  		return nil, errors.Trace(err)
   175  	}
   176  	client = import_sstpb.NewImportSSTClient(conn)
   177  	ic.clients[storeID] = client
   178  	return client, errors.Trace(err)
   179  }
   180  
   181  func (ic *importClient) SupportMultiIngest(ctx context.Context, stores []uint64) (bool, error) {
   182  	for _, storeID := range stores {
   183  		_, err := ic.MultiIngest(ctx, storeID, &import_sstpb.MultiIngestRequest{})
   184  		if err != nil {
   185  			if s, ok := status.FromError(err); ok {
   186  				if s.Code() == codes.Unimplemented {
   187  					return false, nil
   188  				}
   189  			}
   190  			return false, errors.Trace(err)
   191  		}
   192  	}
   193  	return true, nil
   194  }
   195  
   196  // FileImporter used to import a file to TiKV.
   197  type FileImporter struct {
   198  	metaClient   SplitClient
   199  	importClient ImporterClient
   200  	backend      *backuppb.StorageBackend
   201  	rateLimit    uint64
   202  
   203  	isRawKvMode        bool
   204  	rawStartKey        []byte
   205  	rawEndKey          []byte
   206  	supportMultiIngest bool
   207  }
   208  
   209  // NewFileImporter returns a new file importClient.
   210  func NewFileImporter(
   211  	metaClient SplitClient,
   212  	importClient ImporterClient,
   213  	backend *backuppb.StorageBackend,
   214  	isRawKvMode bool,
   215  	rateLimit uint64,
   216  ) FileImporter {
   217  	return FileImporter{
   218  		metaClient:   metaClient,
   219  		backend:      backend,
   220  		importClient: importClient,
   221  		isRawKvMode:  isRawKvMode,
   222  		rateLimit:    rateLimit,
   223  	}
   224  }
   225  
   226  // CheckMultiIngestSupport checks whether all stores support multi-ingest
   227  func (importer *FileImporter) CheckMultiIngestSupport(ctx context.Context, pdClient pd.Client) error {
   228  	allStores, err := conn.GetAllTiKVStores(ctx, pdClient, conn.SkipTiFlash)
   229  	if err != nil {
   230  		return errors.Trace(err)
   231  	}
   232  	storeIDs := make([]uint64, 0, len(allStores))
   233  	for _, s := range allStores {
   234  		storeIDs = append(storeIDs, s.Id)
   235  	}
   236  
   237  	support, err := importer.importClient.SupportMultiIngest(ctx, storeIDs)
   238  	if err != nil {
   239  		return errors.Trace(err)
   240  	}
   241  	importer.supportMultiIngest = support
   242  	log.L().Info("multi ingest support", zap.Bool("support", support))
   243  	return nil
   244  }
   245  
   246  // SetRawRange sets the range to be restored in raw kv mode.
   247  func (importer *FileImporter) SetRawRange(startKey, endKey []byte) error {
   248  	if !importer.isRawKvMode {
   249  		return errors.Annotate(berrors.ErrRestoreModeMismatch, "file importer is not in raw kv mode")
   250  	}
   251  	importer.rawStartKey = startKey
   252  	importer.rawEndKey = endKey
   253  	return nil
   254  }
   255  
   256  // Import tries to import a file.
   257  // All rules must contain encoded keys.
   258  func (importer *FileImporter) Import(
   259  	ctx context.Context,
   260  	files []*backuppb.File,
   261  	rewriteRules *RewriteRules,
   262  ) error {
   263  	log.Debug("import file", logutil.Files(files))
   264  	// Rewrite the start key and end key of file to scan regions
   265  	var startKey, endKey []byte
   266  	if importer.isRawKvMode {
   267  		startKey = files[0].StartKey
   268  		endKey = files[0].EndKey
   269  	} else {
   270  		for _, f := range files {
   271  			start, end, err := rewriteFileKeys(f, rewriteRules)
   272  			if err != nil {
   273  				return errors.Trace(err)
   274  			}
   275  			if len(startKey) == 0 || bytes.Compare(startKey, start) > 0 {
   276  				startKey = start
   277  			}
   278  			if bytes.Compare(endKey, end) < 0 {
   279  				endKey = end
   280  			}
   281  		}
   282  	}
   283  
   284  	log.Debug("rewrite file keys",
   285  		logutil.Files(files),
   286  		logutil.Key("startKey", startKey),
   287  		logutil.Key("endKey", endKey))
   288  
   289  	err := utils.WithRetry(ctx, func() error {
   290  		tctx, cancel := context.WithTimeout(ctx, importScanRegionTime)
   291  		defer cancel()
   292  		// Scan regions covered by the file range
   293  		regionInfos, errScanRegion := PaginateScanRegion(
   294  			tctx, importer.metaClient, startKey, endKey, ScanRegionPaginationLimit)
   295  		if errScanRegion != nil {
   296  			return errors.Trace(errScanRegion)
   297  		}
   298  
   299  		log.Debug("scan regions", logutil.Files(files), zap.Int("count", len(regionInfos)))
   300  		// Try to download and ingest the file in every region
   301  	regionLoop:
   302  		for _, regionInfo := range regionInfos {
   303  			info := regionInfo
   304  			// Try to download file.
   305  			downloadMetas := make([]*import_sstpb.SSTMeta, 0, len(files))
   306  			remainFiles := files
   307  			errDownload := utils.WithRetry(ctx, func() error {
   308  				var e error
   309  				for i, f := range remainFiles {
   310  					var downloadMeta *import_sstpb.SSTMeta
   311  					if importer.isRawKvMode {
   312  						downloadMeta, e = importer.downloadRawKVSST(ctx, info, f)
   313  					} else {
   314  						downloadMeta, e = importer.downloadSST(ctx, info, f, rewriteRules)
   315  					}
   316  					failpoint.Inject("restore-storage-error", func(val failpoint.Value) {
   317  						msg := val.(string)
   318  						log.Debug("failpoint restore-storage-error injected.", zap.String("msg", msg))
   319  						e = errors.Annotate(e, msg)
   320  					})
   321  					if e != nil {
   322  						remainFiles = remainFiles[i:]
   323  						return errors.Trace(e)
   324  					}
   325  					downloadMetas = append(downloadMetas, downloadMeta)
   326  				}
   327  
   328  				return nil
   329  			}, newDownloadSSTBackoffer())
   330  			if errDownload != nil {
   331  				for _, e := range multierr.Errors(errDownload) {
   332  					switch errors.Cause(e) { // nolint:errorlint
   333  					case berrors.ErrKVRewriteRuleNotFound, berrors.ErrKVRangeIsEmpty:
   334  						// Skip this region
   335  						log.Warn("download file skipped",
   336  							logutil.Files(files),
   337  							logutil.Region(info.Region),
   338  							logutil.Key("startKey", startKey),
   339  							logutil.Key("endKey", endKey),
   340  							logutil.ShortError(e))
   341  						continue regionLoop
   342  					}
   343  				}
   344  				log.Error("download file failed",
   345  					logutil.Files(files),
   346  					logutil.Region(info.Region),
   347  					logutil.Key("startKey", startKey),
   348  					logutil.Key("endKey", endKey),
   349  					logutil.ShortError(errDownload))
   350  				return errors.Trace(errDownload)
   351  			}
   352  
   353  			ingestResp, errIngest := importer.ingestSSTs(ctx, downloadMetas, info)
   354  		ingestRetry:
   355  			for errIngest == nil {
   356  				errPb := ingestResp.GetError()
   357  				if errPb == nil {
   358  					// Ingest success
   359  					break ingestRetry
   360  				}
   361  				switch {
   362  				case errPb.NotLeader != nil:
   363  					// If error is `NotLeader`, update the region info and retry
   364  					var newInfo *RegionInfo
   365  					if newLeader := errPb.GetNotLeader().GetLeader(); newLeader != nil {
   366  						newInfo = &RegionInfo{
   367  							Leader: newLeader,
   368  							Region: info.Region,
   369  						}
   370  					} else {
   371  						// Slow path, get region from PD
   372  						newInfo, errIngest = importer.metaClient.GetRegion(
   373  							ctx, info.Region.GetStartKey())
   374  						if errIngest != nil {
   375  							break ingestRetry
   376  						}
   377  						// do not get region info, wait a second and continue
   378  						if newInfo == nil {
   379  							log.Warn("get region by key return nil", logutil.Region(info.Region))
   380  							time.Sleep(time.Second)
   381  							continue
   382  						}
   383  					}
   384  					log.Debug("ingest sst returns not leader error, retry it",
   385  						logutil.Region(info.Region),
   386  						zap.Stringer("newLeader", newInfo.Leader))
   387  
   388  					if !checkRegionEpoch(newInfo, info) {
   389  						errIngest = errors.Trace(berrors.ErrKVEpochNotMatch)
   390  						break ingestRetry
   391  					}
   392  					ingestResp, errIngest = importer.ingestSSTs(ctx, downloadMetas, newInfo)
   393  				case errPb.EpochNotMatch != nil:
   394  					// TODO handle epoch not match error
   395  					//      1. retry download if needed
   396  					//      2. retry ingest
   397  					errIngest = errors.Trace(berrors.ErrKVEpochNotMatch)
   398  					break ingestRetry
   399  				case errPb.KeyNotInRegion != nil:
   400  					errIngest = errors.Trace(berrors.ErrKVKeyNotInRegion)
   401  					break ingestRetry
   402  				default:
   403  					// Other errors like `ServerIsBusy`, `RegionNotFound`, etc. should be retryable
   404  					errIngest = errors.Annotatef(berrors.ErrKVIngestFailed, "ingest error %s", errPb)
   405  					break ingestRetry
   406  				}
   407  			}
   408  
   409  			if errIngest != nil {
   410  				log.Error("ingest file failed",
   411  					logutil.Files(files),
   412  					logutil.SSTMetas(downloadMetas),
   413  					logutil.Region(info.Region),
   414  					zap.Error(errIngest))
   415  				return errors.Trace(errIngest)
   416  			}
   417  		}
   418  		for _, f := range files {
   419  			summary.CollectSuccessUnit(summary.TotalKV, 1, f.TotalKvs)
   420  			summary.CollectSuccessUnit(summary.TotalBytes, 1, f.TotalBytes)
   421  		}
   422  
   423  		return nil
   424  	}, newImportSSTBackoffer())
   425  	return errors.Trace(err)
   426  }
   427  
   428  func (importer *FileImporter) setDownloadSpeedLimit(ctx context.Context, storeID uint64) error {
   429  	req := &import_sstpb.SetDownloadSpeedLimitRequest{
   430  		SpeedLimit: importer.rateLimit,
   431  	}
   432  	_, err := importer.importClient.SetDownloadSpeedLimit(ctx, storeID, req)
   433  	return errors.Trace(err)
   434  }
   435  
   436  func (importer *FileImporter) downloadSST(
   437  	ctx context.Context,
   438  	regionInfo *RegionInfo,
   439  	file *backuppb.File,
   440  	rewriteRules *RewriteRules,
   441  ) (*import_sstpb.SSTMeta, error) {
   442  	uid := uuid.New()
   443  	id := uid[:]
   444  	// Assume one region reflects to one rewrite rule
   445  	_, key, err := codec.DecodeBytes(regionInfo.Region.GetStartKey())
   446  	if err != nil {
   447  		return nil, errors.Trace(err)
   448  	}
   449  	regionRule := matchNewPrefix(key, rewriteRules)
   450  	if regionRule == nil {
   451  		return nil, errors.Trace(berrors.ErrKVRewriteRuleNotFound)
   452  	}
   453  	rule := import_sstpb.RewriteRule{
   454  		OldKeyPrefix: encodeKeyPrefix(regionRule.GetOldKeyPrefix()),
   455  		NewKeyPrefix: encodeKeyPrefix(regionRule.GetNewKeyPrefix()),
   456  	}
   457  	sstMeta := GetSSTMetaFromFile(id, file, regionInfo.Region, &rule)
   458  
   459  	req := &import_sstpb.DownloadRequest{
   460  		Sst:            sstMeta,
   461  		StorageBackend: importer.backend,
   462  		Name:           file.GetName(),
   463  		RewriteRule:    rule,
   464  	}
   465  	log.Debug("download SST",
   466  		logutil.SSTMeta(&sstMeta),
   467  		logutil.File(file),
   468  		logutil.Region(regionInfo.Region),
   469  	)
   470  	var resp *import_sstpb.DownloadResponse
   471  	for _, peer := range regionInfo.Region.GetPeers() {
   472  		resp, err = importer.importClient.DownloadSST(ctx, peer.GetStoreId(), req)
   473  		if err != nil {
   474  			return nil, errors.Trace(err)
   475  		}
   476  		if resp.GetError() != nil {
   477  			return nil, errors.Annotate(berrors.ErrKVDownloadFailed, resp.GetError().GetMessage())
   478  		}
   479  		if resp.GetIsEmpty() {
   480  			return nil, errors.Trace(berrors.ErrKVRangeIsEmpty)
   481  		}
   482  	}
   483  	sstMeta.Range.Start = truncateTS(resp.Range.GetStart())
   484  	sstMeta.Range.End = truncateTS(resp.Range.GetEnd())
   485  	return &sstMeta, nil
   486  }
   487  
   488  func (importer *FileImporter) downloadRawKVSST(
   489  	ctx context.Context,
   490  	regionInfo *RegionInfo,
   491  	file *backuppb.File,
   492  ) (*import_sstpb.SSTMeta, error) {
   493  	uid := uuid.New()
   494  	id := uid[:]
   495  	// Empty rule
   496  	var rule import_sstpb.RewriteRule
   497  	sstMeta := GetSSTMetaFromFile(id, file, regionInfo.Region, &rule)
   498  
   499  	// Cut the SST file's range to fit in the restoring range.
   500  	if bytes.Compare(importer.rawStartKey, sstMeta.Range.GetStart()) > 0 {
   501  		sstMeta.Range.Start = importer.rawStartKey
   502  	}
   503  	if len(importer.rawEndKey) > 0 &&
   504  		(len(sstMeta.Range.GetEnd()) == 0 || bytes.Compare(importer.rawEndKey, sstMeta.Range.GetEnd()) <= 0) {
   505  		sstMeta.Range.End = importer.rawEndKey
   506  		sstMeta.EndKeyExclusive = true
   507  	}
   508  	if bytes.Compare(sstMeta.Range.GetStart(), sstMeta.Range.GetEnd()) > 0 {
   509  		return nil, errors.Trace(berrors.ErrKVRangeIsEmpty)
   510  	}
   511  
   512  	req := &import_sstpb.DownloadRequest{
   513  		Sst:            sstMeta,
   514  		StorageBackend: importer.backend,
   515  		Name:           file.GetName(),
   516  		RewriteRule:    rule,
   517  		IsRawKv:        true,
   518  	}
   519  	log.Debug("download SST", logutil.SSTMeta(&sstMeta), logutil.Region(regionInfo.Region))
   520  	var err error
   521  	var resp *import_sstpb.DownloadResponse
   522  	for _, peer := range regionInfo.Region.GetPeers() {
   523  		resp, err = importer.importClient.DownloadSST(ctx, peer.GetStoreId(), req)
   524  		if err != nil {
   525  			return nil, errors.Trace(err)
   526  		}
   527  		if resp.GetError() != nil {
   528  			return nil, errors.Annotate(berrors.ErrKVDownloadFailed, resp.GetError().GetMessage())
   529  		}
   530  		if resp.GetIsEmpty() {
   531  			return nil, errors.Trace(berrors.ErrKVRangeIsEmpty)
   532  		}
   533  	}
   534  	sstMeta.Range.Start = resp.Range.GetStart()
   535  	sstMeta.Range.End = resp.Range.GetEnd()
   536  	return &sstMeta, nil
   537  }
   538  
   539  func (importer *FileImporter) ingestSSTs(
   540  	ctx context.Context,
   541  	sstMetas []*import_sstpb.SSTMeta,
   542  	regionInfo *RegionInfo,
   543  ) (*import_sstpb.IngestResponse, error) {
   544  	leader := regionInfo.Leader
   545  	if leader == nil {
   546  		leader = regionInfo.Region.GetPeers()[0]
   547  	}
   548  	reqCtx := &kvrpcpb.Context{
   549  		RegionId:    regionInfo.Region.GetId(),
   550  		RegionEpoch: regionInfo.Region.GetRegionEpoch(),
   551  		Peer:        leader,
   552  	}
   553  
   554  	if !importer.supportMultiIngest {
   555  		// TODO: not sure we need this check
   556  		if len(sstMetas) != 1 {
   557  			panic("do not support batch ingest")
   558  		}
   559  		req := &import_sstpb.IngestRequest{
   560  			Context: reqCtx,
   561  			Sst:     sstMetas[0],
   562  		}
   563  		log.Debug("ingest SST", logutil.SSTMeta(sstMetas[0]), logutil.Leader(leader))
   564  		resp, err := importer.importClient.IngestSST(ctx, leader.GetStoreId(), req)
   565  		return resp, errors.Trace(err)
   566  	}
   567  
   568  	req := &import_sstpb.MultiIngestRequest{
   569  		Context: reqCtx,
   570  		Ssts:    sstMetas,
   571  	}
   572  	log.Debug("ingest SSTs", logutil.SSTMetas(sstMetas), logutil.Leader(leader))
   573  	resp, err := importer.importClient.MultiIngest(ctx, leader.GetStoreId(), req)
   574  	return resp, errors.Trace(err)
   575  }