github.com/pingcap/br@v5.3.0-alpha.0.20220125034240-ec59c7b6ce30+incompatible/pkg/restore/merge.go (about)

     1  // Copyright 2020 PingCAP, Inc. Licensed under Apache-2.0.
     2  
     3  package restore
     4  
     5  import (
     6  	"strings"
     7  
     8  	"github.com/docker/go-units"
     9  	"github.com/pingcap/errors"
    10  	backuppb "github.com/pingcap/kvproto/pkg/backup"
    11  	"github.com/pingcap/tidb/kv"
    12  	"github.com/pingcap/tidb/tablecodec"
    13  
    14  	berrors "github.com/pingcap/br/pkg/errors"
    15  	"github.com/pingcap/br/pkg/rtree"
    16  )
    17  
    18  const (
    19  	// DefaultMergeRegionSizeBytes is the default region split size, 96MB.
    20  	// See https://github.com/tikv/tikv/blob/v4.0.8/components/raftstore/src/coprocessor/config.rs#L35-L38
    21  	DefaultMergeRegionSizeBytes uint64 = 96 * units.MiB
    22  
    23  	// DefaultMergeRegionKeyCount is the default region key count, 960000.
    24  	DefaultMergeRegionKeyCount uint64 = 960000
    25  
    26  	writeCFName   = "write"
    27  	defaultCFName = "default"
    28  )
    29  
    30  // MergeRangesStat holds statistics for the MergeRanges.
    31  type MergeRangesStat struct {
    32  	TotalFiles           int
    33  	TotalWriteCFFile     int
    34  	TotalDefaultCFFile   int
    35  	TotalRegions         int
    36  	RegionKeysAvg        int
    37  	RegionBytesAvg       int
    38  	MergedRegions        int
    39  	MergedRegionKeysAvg  int
    40  	MergedRegionBytesAvg int
    41  }
    42  
    43  // MergeFileRanges returns ranges of the files are merged based on
    44  // splitSizeBytes and splitKeyCount.
    45  //
    46  // By merging small ranges, it speeds up restoring a backup that contains many
    47  // small ranges (regions) as it reduces split region and scatter region.
    48  func MergeFileRanges(
    49  	files []*backuppb.File, splitSizeBytes, splitKeyCount uint64,
    50  ) ([]rtree.Range, *MergeRangesStat, error) {
    51  	if len(files) == 0 {
    52  		return []rtree.Range{}, &MergeRangesStat{}, nil
    53  	}
    54  	totalBytes := uint64(0)
    55  	totalKvs := uint64(0)
    56  	totalFiles := len(files)
    57  	writeCFFile := 0
    58  	defaultCFFile := 0
    59  
    60  	filesMap := make(map[string][]*backuppb.File)
    61  	for _, file := range files {
    62  		filesMap[string(file.StartKey)] = append(filesMap[string(file.StartKey)], file)
    63  
    64  		// We skips all default cf files because we don't range overlap.
    65  		if file.Cf == writeCFName || strings.Contains(file.GetName(), writeCFName) {
    66  			writeCFFile++
    67  		} else if file.Cf == defaultCFName || strings.Contains(file.GetName(), defaultCFName) {
    68  			defaultCFFile++
    69  		}
    70  		totalBytes += file.TotalBytes
    71  		totalKvs += file.TotalKvs
    72  	}
    73  	if writeCFFile == 0 && defaultCFFile == 0 {
    74  		return []rtree.Range{}, nil, errors.Annotatef(berrors.ErrRestoreInvalidBackup,
    75  			"unknown backup data from neither Wrtie CF nor Default CF")
    76  	}
    77  
    78  	// RawKV does not have data in write CF.
    79  	totalRegions := writeCFFile
    80  	if defaultCFFile > writeCFFile {
    81  		totalRegions = defaultCFFile
    82  	}
    83  
    84  	// Check if files are overlapped
    85  	rangeTree := rtree.NewRangeTree()
    86  	for key := range filesMap {
    87  		files := filesMap[key]
    88  		if out := rangeTree.InsertRange(rtree.Range{
    89  			StartKey: files[0].GetStartKey(),
    90  			EndKey:   files[0].GetEndKey(),
    91  			Files:    files,
    92  		}); out != nil {
    93  			return nil, nil, errors.Annotatef(berrors.ErrRestoreInvalidRange,
    94  				"duplicate range %s files %+v", out, files)
    95  		}
    96  	}
    97  
    98  	needMerge := func(left, right *rtree.Range) bool {
    99  		leftBytes, leftKeys := left.BytesAndKeys()
   100  		rightBytes, rightKeys := right.BytesAndKeys()
   101  		if rightBytes == 0 {
   102  			return true
   103  		}
   104  		if leftBytes+rightBytes > splitSizeBytes {
   105  			return false
   106  		}
   107  		if leftKeys+rightKeys > splitKeyCount {
   108  			return false
   109  		}
   110  		// Do not merge ranges in different tables.
   111  		if tablecodec.DecodeTableID(kv.Key(left.StartKey)) != tablecodec.DecodeTableID(kv.Key(right.StartKey)) {
   112  			return false
   113  		}
   114  		// Do not merge ranges in different indexes even if they are in the same
   115  		// table, as rewrite rule only supports rewriting one pattern.
   116  		// tableID, indexID, indexValues, err
   117  		_, indexID1, _, err1 := tablecodec.DecodeIndexKey(kv.Key(left.StartKey))
   118  		_, indexID2, _, err2 := tablecodec.DecodeIndexKey(kv.Key(right.StartKey))
   119  		// If both of them are index keys, ...
   120  		if err1 == nil && err2 == nil {
   121  			// Merge left and right if they are in the same index.
   122  			return indexID1 == indexID2
   123  		}
   124  		// Otherwise, merge if they are both record keys
   125  		return err1 != nil && err2 != nil
   126  	}
   127  	sortedRanges := rangeTree.GetSortedRanges()
   128  	for i := 1; i < len(sortedRanges); {
   129  		if !needMerge(&sortedRanges[i-1], &sortedRanges[i]) {
   130  			i++
   131  			continue
   132  		}
   133  		sortedRanges[i-1].EndKey = sortedRanges[i].EndKey
   134  		sortedRanges[i-1].Files = append(sortedRanges[i-1].Files, sortedRanges[i].Files...)
   135  		// TODO: this is slow when there are lots of ranges need to merge.
   136  		sortedRanges = append(sortedRanges[:i], sortedRanges[i+1:]...)
   137  	}
   138  
   139  	regionBytesAvg := totalBytes / uint64(totalRegions)
   140  	regionKeysAvg := totalKvs / uint64(totalRegions)
   141  	mergedRegionBytesAvg := totalBytes / uint64(len(sortedRanges))
   142  	mergedRegionKeysAvg := totalKvs / uint64(len(sortedRanges))
   143  
   144  	return sortedRanges, &MergeRangesStat{
   145  		TotalFiles:           totalFiles,
   146  		TotalWriteCFFile:     writeCFFile,
   147  		TotalDefaultCFFile:   defaultCFFile,
   148  		TotalRegions:         totalRegions,
   149  		RegionKeysAvg:        int(regionKeysAvg),
   150  		RegionBytesAvg:       int(regionBytesAvg),
   151  		MergedRegions:        len(sortedRanges),
   152  		MergedRegionKeysAvg:  int(mergedRegionKeysAvg),
   153  		MergedRegionBytesAvg: int(mergedRegionBytesAvg),
   154  	}, nil
   155  }