github.com/pingcap/tidb-lightning@v5.0.0-rc.0.20210428090220-84b649866577+incompatible/lightning/backend/localhelper.go (about)

     1  // Copyright 2020 PingCAP, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package backend
    15  
    16  import (
    17  	"bytes"
    18  	"context"
    19  	"encoding/hex"
    20  	"regexp"
    21  	"sort"
    22  	"strings"
    23  	"time"
    24  
    25  	split "github.com/pingcap/br/pkg/restore"
    26  	"github.com/pingcap/br/pkg/utils"
    27  	"github.com/pingcap/errors"
    28  	sst "github.com/pingcap/kvproto/pkg/import_sstpb"
    29  	"github.com/pingcap/kvproto/pkg/metapb"
    30  	"github.com/pingcap/kvproto/pkg/pdpb"
    31  	"github.com/pingcap/tidb/util/codec"
    32  	"go.uber.org/zap"
    33  
    34  	"github.com/pingcap/tidb-lightning/lightning/log"
    35  )
    36  
    37  const (
    38  	SplitRetryTimes       = 8
    39  	retrySplitMaxWaitTime = 4 * time.Second
    40  )
    41  
    42  var (
    43  	// the max keys count in a batch to split one region
    44  	maxBatchSplitKeys = 4096
    45  	// the base exponential backoff time
    46  	// the variable is only changed in unit test for running test faster.
    47  	splitRegionBaseBackOffTime = time.Second
    48  )
    49  
    50  // TODO remove this file and use br internal functions
    51  // This File include region split & scatter operation just like br.
    52  // we can simply call br function, but we need to change some function signature of br
    53  // When the ranges total size is small, we can skip the split to avoid generate empty regions.
    54  func (local *local) SplitAndScatterRegionByRanges(ctx context.Context, ranges []Range, needSplit bool) error {
    55  	if len(ranges) == 0 {
    56  		return nil
    57  	}
    58  
    59  	minKey := codec.EncodeBytes([]byte{}, ranges[0].start)
    60  	maxKey := codec.EncodeBytes([]byte{}, ranges[len(ranges)-1].end)
    61  
    62  	var err error
    63  	scatterRegions := make([]*split.RegionInfo, 0)
    64  	var retryKeys [][]byte
    65  	waitTime := splitRegionBaseBackOffTime
    66  	for i := 0; i < SplitRetryTimes; i++ {
    67  		log.L().Info("split and scatter region",
    68  			log.ZapRedactBinary("minKey", minKey),
    69  			log.ZapRedactBinary("maxKey", maxKey),
    70  			zap.Int("retry", i),
    71  		)
    72  		if i > 0 {
    73  			select {
    74  			case <-time.After(waitTime):
    75  			case <-ctx.Done():
    76  				return ctx.Err()
    77  			}
    78  			waitTime *= 2
    79  			if waitTime > retrySplitMaxWaitTime {
    80  				waitTime = retrySplitMaxWaitTime
    81  			}
    82  		}
    83  		var regions []*split.RegionInfo
    84  		regions, err = paginateScanRegion(ctx, local.splitCli, minKey, maxKey, 128)
    85  		if err != nil {
    86  			log.L().Warn("paginate scan region failed", log.ZapRedactBinary("minKey", minKey), log.ZapRedactBinary("maxKey", maxKey),
    87  				log.ShortError(err), zap.Int("retry", i))
    88  			continue
    89  		}
    90  
    91  		if len(regions) == 0 {
    92  			log.L().Warn("paginate scan region returns empty result", log.ZapRedactBinary("minKey", minKey), log.ZapRedactBinary("maxKey", maxKey),
    93  				zap.Int("retry", i))
    94  			return errors.New("paginate scan region returns empty result")
    95  		}
    96  
    97  		log.L().Info("paginate scan region finished", log.ZapRedactBinary("minKey", minKey), log.ZapRedactBinary("maxKey", maxKey),
    98  			zap.Int("regions", len(regions)))
    99  
   100  		if !needSplit {
   101  			scatterRegions = append(scatterRegions, regions...)
   102  			break
   103  		}
   104  
   105  		regionMap := make(map[uint64]*split.RegionInfo)
   106  		for _, region := range regions {
   107  			regionMap[region.Region.GetId()] = region
   108  		}
   109  
   110  		var splitKeyMap map[uint64][][]byte
   111  		if len(retryKeys) > 0 {
   112  			firstKeyEnc := codec.EncodeBytes([]byte{}, retryKeys[0])
   113  			lastKeyEnc := codec.EncodeBytes([]byte{}, retryKeys[len(retryKeys)-1])
   114  			if bytes.Compare(firstKeyEnc, regions[0].Region.StartKey) < 0 || !beforeEnd(lastKeyEnc, regions[len(regions)-1].Region.EndKey) {
   115  				log.L().Warn("no valid key for split region",
   116  					log.ZapRedactBinary("firstKey", firstKeyEnc), log.ZapRedactBinary("lastKey", lastKeyEnc),
   117  					log.ZapRedactBinary("firstRegionStart", regions[0].Region.StartKey),
   118  					log.ZapRedactBinary("lastRegionEnd", regions[len(regions)-1].Region.EndKey))
   119  				return errors.New("check split keys failed")
   120  			}
   121  			splitKeyMap = getSplitKeys(retryKeys, regions)
   122  			retryKeys = retryKeys[:0]
   123  		} else {
   124  			splitKeyMap = getSplitKeysByRanges(ranges, regions)
   125  		}
   126  		for regionID, keys := range splitKeyMap {
   127  			var newRegions []*split.RegionInfo
   128  			region := regionMap[regionID]
   129  			sort.Slice(keys, func(i, j int) bool {
   130  				return bytes.Compare(keys[i], keys[j]) < 0
   131  			})
   132  			splitRegion := region
   133  			for j := 0; j < (len(keys)+maxBatchSplitKeys-1)/maxBatchSplitKeys; j++ {
   134  				start := j * maxBatchSplitKeys
   135  				end := utils.MinInt((j+1)*maxBatchSplitKeys, len(keys))
   136  				splitRegionStart := codec.EncodeBytes([]byte{}, keys[start])
   137  				splitRegionEnd := codec.EncodeBytes([]byte{}, keys[end-1])
   138  				if bytes.Compare(splitRegionStart, splitRegion.Region.StartKey) < 0 || !beforeEnd(splitRegionEnd, splitRegion.Region.EndKey) {
   139  					log.L().Fatal("no valid key in region",
   140  						log.ZapRedactBinary("startKey", splitRegionStart), log.ZapRedactBinary("endKey", splitRegionEnd),
   141  						log.ZapRedactBinary("regionStart", splitRegion.Region.StartKey), log.ZapRedactBinary("regionEnd", splitRegion.Region.EndKey),
   142  						log.ZapRedactReflect("region", splitRegion))
   143  				}
   144  				splitRegion, newRegions, err = local.BatchSplitRegions(ctx, splitRegion, keys[start:end])
   145  				if err != nil {
   146  					if strings.Contains(err.Error(), "no valid key") {
   147  						for _, key := range keys {
   148  							log.L().Warn("no valid key",
   149  								log.ZapRedactBinary("startKey", region.Region.StartKey),
   150  								log.ZapRedactBinary("endKey", region.Region.EndKey),
   151  								log.ZapRedactBinary("key", codec.EncodeBytes([]byte{}, key)))
   152  						}
   153  						return errors.Trace(err)
   154  					}
   155  					log.L().Warn("split regions", log.ShortError(err), zap.Int("retry time", j+1),
   156  						zap.Uint64("region_id", regionID))
   157  					retryKeys = append(retryKeys, keys[start:]...)
   158  					break
   159  				} else {
   160  					log.L().Info("batch split region", zap.Uint64("region_id", splitRegion.Region.Id),
   161  						zap.Int("keys", end-start), zap.Binary("firstKey", keys[start]),
   162  						zap.Binary("end", keys[end-1]))
   163  					sort.Slice(newRegions, func(i, j int) bool {
   164  						return bytes.Compare(newRegions[i].Region.StartKey, newRegions[j].Region.StartKey) < 0
   165  					})
   166  					scatterRegions = append(scatterRegions, newRegions...)
   167  					// the region with the max start key is the region need to be further split.
   168  					if bytes.Compare(splitRegion.Region.StartKey, newRegions[len(newRegions)-1].Region.StartKey) < 0 {
   169  						splitRegion = newRegions[len(newRegions)-1]
   170  					}
   171  				}
   172  			}
   173  		}
   174  		if len(retryKeys) == 0 {
   175  			break
   176  		} else {
   177  			sort.Slice(retryKeys, func(i, j int) bool {
   178  				return bytes.Compare(retryKeys[i], retryKeys[j]) < 0
   179  			})
   180  			minKey = codec.EncodeBytes([]byte{}, retryKeys[0])
   181  			maxKey = codec.EncodeBytes([]byte{}, nextKey(retryKeys[len(retryKeys)-1]))
   182  		}
   183  	}
   184  	if err != nil {
   185  		return errors.Trace(err)
   186  	}
   187  
   188  	startTime := time.Now()
   189  	scatterCount := 0
   190  	for _, region := range scatterRegions {
   191  		local.waitForScatterRegion(ctx, region)
   192  		if time.Since(startTime) > split.ScatterWaitUpperInterval {
   193  			break
   194  		}
   195  		scatterCount++
   196  	}
   197  	if scatterCount == len(scatterRegions) {
   198  		log.L().Info("waiting for scattering regions done",
   199  			zap.Int("regions", len(scatterRegions)), zap.Duration("take", time.Since(startTime)))
   200  	} else {
   201  		log.L().Info("waiting for scattering regions timeout",
   202  			zap.Int("scatterCount", scatterCount),
   203  			zap.Int("regions", len(scatterRegions)),
   204  			zap.Duration("take", time.Since(startTime)))
   205  	}
   206  	return nil
   207  }
   208  
   209  func paginateScanRegion(
   210  	ctx context.Context, client split.SplitClient, startKey, endKey []byte, limit int,
   211  ) ([]*split.RegionInfo, error) {
   212  	if len(endKey) != 0 && bytes.Compare(startKey, endKey) >= 0 {
   213  		log.L().Error("startKey > endKey when paginating scan region",
   214  			log.ZapRedactString("startKey", hex.EncodeToString(startKey)),
   215  			log.ZapRedactString("endKey", hex.EncodeToString(endKey)))
   216  		return nil, errors.Errorf("startKey > endKey when paginating scan region")
   217  	}
   218  
   219  	var regions []*split.RegionInfo
   220  	for {
   221  		batch, err := client.ScanRegions(ctx, startKey, endKey, limit)
   222  		if err != nil {
   223  			return nil, errors.Trace(err)
   224  		}
   225  		regions = append(regions, batch...)
   226  		if len(batch) < limit {
   227  			// No more region
   228  			break
   229  		}
   230  		startKey = batch[len(batch)-1].Region.GetEndKey()
   231  		if len(startKey) == 0 ||
   232  			(len(endKey) > 0 && bytes.Compare(startKey, endKey) >= 0) {
   233  			// All key space have scanned
   234  			break
   235  		}
   236  	}
   237  	return regions, nil
   238  }
   239  
   240  func (local *local) BatchSplitRegions(ctx context.Context, region *split.RegionInfo, keys [][]byte) (*split.RegionInfo, []*split.RegionInfo, error) {
   241  	region, newRegions, err := local.splitCli.BatchSplitRegionsWithOrigin(ctx, region, keys)
   242  	if err != nil {
   243  		return nil, nil, errors.Annotatef(err, "batch split regions failed")
   244  	}
   245  	var failedErr error
   246  	retryRegions := make([]*split.RegionInfo, 0)
   247  	scatterRegions := newRegions
   248  	waitTime := splitRegionBaseBackOffTime
   249  	for i := 0; i < maxRetryTimes; i++ {
   250  		for _, region := range scatterRegions {
   251  			// Wait for a while until the regions successfully splits.
   252  			local.waitForSplit(ctx, region.Region.Id)
   253  			if err = local.splitCli.ScatterRegion(ctx, region); err != nil {
   254  				failedErr = err
   255  				retryRegions = append(retryRegions, region)
   256  			}
   257  		}
   258  		if len(retryRegions) == 0 {
   259  			break
   260  		}
   261  		// the scatter operation likely fails because region replicate not finish yet
   262  		// pack them to one log to avoid printing a lot warn logs.
   263  		log.L().Warn("scatter region failed", zap.Int("regionCount", len(newRegions)),
   264  			zap.Int("failedCount", len(retryRegions)), zap.Error(failedErr), zap.Int("retry", i))
   265  		scatterRegions = retryRegions
   266  		retryRegions = make([]*split.RegionInfo, 0)
   267  		select {
   268  		case <-time.After(waitTime):
   269  		case <-ctx.Done():
   270  			return nil, nil, ctx.Err()
   271  		}
   272  		waitTime *= 2
   273  	}
   274  
   275  	return region, newRegions, nil
   276  }
   277  
   278  func (local *local) hasRegion(ctx context.Context, regionID uint64) (bool, error) {
   279  	regionInfo, err := local.splitCli.GetRegionByID(ctx, regionID)
   280  	if err != nil {
   281  		return false, err
   282  	}
   283  	return regionInfo != nil, nil
   284  }
   285  
   286  func (local *local) waitForSplit(ctx context.Context, regionID uint64) {
   287  	for i := 0; i < split.SplitCheckMaxRetryTimes; i++ {
   288  		ok, err := local.hasRegion(ctx, regionID)
   289  		if err != nil {
   290  			log.L().Info("wait for split failed", log.ShortError(err))
   291  			return
   292  		}
   293  		if ok {
   294  			break
   295  		}
   296  		select {
   297  		case <-time.After(time.Second):
   298  		case <-ctx.Done():
   299  			return
   300  		}
   301  	}
   302  }
   303  
   304  func (local *local) waitForScatterRegion(ctx context.Context, regionInfo *split.RegionInfo) {
   305  	regionID := regionInfo.Region.GetId()
   306  	for i := 0; i < split.ScatterWaitMaxRetryTimes; i++ {
   307  		ok, err := local.isScatterRegionFinished(ctx, regionID)
   308  		if err != nil {
   309  			log.L().Warn("scatter region failed: do not have the region",
   310  				log.ZapRedactStringer("region", regionInfo.Region))
   311  			return
   312  		}
   313  		if ok {
   314  			break
   315  		}
   316  		select {
   317  		case <-time.After(time.Second):
   318  		case <-ctx.Done():
   319  			return
   320  		}
   321  	}
   322  }
   323  
   324  func (local *local) isScatterRegionFinished(ctx context.Context, regionID uint64) (bool, error) {
   325  	resp, err := local.splitCli.GetOperator(ctx, regionID)
   326  	if err != nil {
   327  		return false, err
   328  	}
   329  	// Heartbeat may not be sent to PD
   330  	if respErr := resp.GetHeader().GetError(); respErr != nil {
   331  		if respErr.GetType() == pdpb.ErrorType_REGION_NOT_FOUND {
   332  			return true, nil
   333  		}
   334  		// don't return error if region replicate not complete
   335  		// TODO: should add a new error type to avoid this check by string matching
   336  		matches, _ := regexp.MatchString("region \\d+ is not fully replicated", respErr.Message)
   337  		if matches {
   338  			return false, nil
   339  		}
   340  		return false, errors.Errorf("get operator error: %s", respErr.GetType())
   341  	}
   342  	// If the current operator of the region is not 'scatter-region', we could assume
   343  	// that 'scatter-operator' has finished or timeout
   344  	ok := string(resp.GetDesc()) != "scatter-region" || resp.GetStatus() != pdpb.OperatorStatus_RUNNING
   345  	return ok, nil
   346  }
   347  
   348  func getSplitKeysByRanges(ranges []Range, regions []*split.RegionInfo) map[uint64][][]byte {
   349  	checkKeys := make([][]byte, 0)
   350  	var lastEnd []byte
   351  	for _, rg := range ranges {
   352  		if !bytes.Equal(lastEnd, rg.start) {
   353  			checkKeys = append(checkKeys, rg.start)
   354  		}
   355  		checkKeys = append(checkKeys, rg.end)
   356  		lastEnd = rg.end
   357  	}
   358  	return getSplitKeys(checkKeys, regions)
   359  }
   360  
   361  func getSplitKeys(checkKeys [][]byte, regions []*split.RegionInfo) map[uint64][][]byte {
   362  	splitKeyMap := make(map[uint64][][]byte)
   363  	for _, key := range checkKeys {
   364  		if region := needSplit(key, regions); region != nil {
   365  			splitKeys, ok := splitKeyMap[region.Region.GetId()]
   366  			if !ok {
   367  				splitKeys = make([][]byte, 0, 1)
   368  			}
   369  			splitKeyMap[region.Region.GetId()] = append(splitKeys, key)
   370  			log.L().Debug("get key for split region",
   371  				zap.Binary("key", key),
   372  				zap.Binary("startKey", region.Region.StartKey),
   373  				zap.Binary("endKey", region.Region.EndKey))
   374  		}
   375  	}
   376  	return splitKeyMap
   377  }
   378  
   379  // needSplit checks whether a key is necessary to split, if true returns the split region
   380  func needSplit(key []byte, regions []*split.RegionInfo) *split.RegionInfo {
   381  	// If splitKey is the max key.
   382  	if len(key) == 0 {
   383  		return nil
   384  	}
   385  	splitKey := codec.EncodeBytes([]byte{}, key)
   386  
   387  	for _, region := range regions {
   388  		// If splitKey is the boundary of the region
   389  		if bytes.Equal(splitKey, region.Region.GetStartKey()) {
   390  			return nil
   391  		}
   392  		// If splitKey is in a region
   393  		if bytes.Compare(splitKey, region.Region.GetStartKey()) > 0 && beforeEnd(splitKey, region.Region.GetEndKey()) {
   394  			log.L().Debug("need split",
   395  				zap.Binary("splitKey", key),
   396  				zap.Binary("encodedKey", splitKey),
   397  				zap.Binary("region start", region.Region.GetStartKey()),
   398  				zap.Binary("region end", region.Region.GetEndKey()),
   399  			)
   400  			return region
   401  		}
   402  	}
   403  	return nil
   404  }
   405  
   406  func beforeEnd(key []byte, end []byte) bool {
   407  	return bytes.Compare(key, end) < 0 || len(end) == 0
   408  }
   409  
   410  func insideRegion(region *metapb.Region, meta *sst.SSTMeta) bool {
   411  	rg := meta.GetRange()
   412  	return keyInsideRegion(region, rg.GetStart()) && keyInsideRegion(region, rg.GetEnd())
   413  }
   414  
   415  func keyInsideRegion(region *metapb.Region, key []byte) bool {
   416  	return bytes.Compare(key, region.GetStartKey()) >= 0 && (beforeEnd(key, region.GetEndKey()))
   417  }
   418  
   419  func intersectRange(region *metapb.Region, rg Range) Range {
   420  	var startKey, endKey []byte
   421  	if len(region.StartKey) > 0 {
   422  		_, startKey, _ = codec.DecodeBytes(region.StartKey, []byte{})
   423  	}
   424  	if bytes.Compare(startKey, rg.start) < 0 {
   425  		startKey = rg.start
   426  	}
   427  	if len(region.EndKey) > 0 {
   428  		_, endKey, _ = codec.DecodeBytes(region.EndKey, []byte{})
   429  	}
   430  	if beforeEnd(rg.end, endKey) {
   431  		endKey = rg.end
   432  	}
   433  
   434  	return Range{start: startKey, end: endKey}
   435  }