github.com/pingcap/br@v5.3.0-alpha.0.20220125034240-ec59c7b6ce30+incompatible/pkg/restore/merge.go (about) 1 // Copyright 2020 PingCAP, Inc. Licensed under Apache-2.0. 2 3 package restore 4 5 import ( 6 "strings" 7 8 "github.com/docker/go-units" 9 "github.com/pingcap/errors" 10 backuppb "github.com/pingcap/kvproto/pkg/backup" 11 "github.com/pingcap/tidb/kv" 12 "github.com/pingcap/tidb/tablecodec" 13 14 berrors "github.com/pingcap/br/pkg/errors" 15 "github.com/pingcap/br/pkg/rtree" 16 ) 17 18 const ( 19 // DefaultMergeRegionSizeBytes is the default region split size, 96MB. 20 // See https://github.com/tikv/tikv/blob/v4.0.8/components/raftstore/src/coprocessor/config.rs#L35-L38 21 DefaultMergeRegionSizeBytes uint64 = 96 * units.MiB 22 23 // DefaultMergeRegionKeyCount is the default region key count, 960000. 24 DefaultMergeRegionKeyCount uint64 = 960000 25 26 writeCFName = "write" 27 defaultCFName = "default" 28 ) 29 30 // MergeRangesStat holds statistics for the MergeRanges. 31 type MergeRangesStat struct { 32 TotalFiles int 33 TotalWriteCFFile int 34 TotalDefaultCFFile int 35 TotalRegions int 36 RegionKeysAvg int 37 RegionBytesAvg int 38 MergedRegions int 39 MergedRegionKeysAvg int 40 MergedRegionBytesAvg int 41 } 42 43 // MergeFileRanges returns ranges of the files are merged based on 44 // splitSizeBytes and splitKeyCount. 45 // 46 // By merging small ranges, it speeds up restoring a backup that contains many 47 // small ranges (regions) as it reduces split region and scatter region. 48 func MergeFileRanges( 49 files []*backuppb.File, splitSizeBytes, splitKeyCount uint64, 50 ) ([]rtree.Range, *MergeRangesStat, error) { 51 if len(files) == 0 { 52 return []rtree.Range{}, &MergeRangesStat{}, nil 53 } 54 totalBytes := uint64(0) 55 totalKvs := uint64(0) 56 totalFiles := len(files) 57 writeCFFile := 0 58 defaultCFFile := 0 59 60 filesMap := make(map[string][]*backuppb.File) 61 for _, file := range files { 62 filesMap[string(file.StartKey)] = append(filesMap[string(file.StartKey)], file) 63 64 // We skips all default cf files because we don't range overlap. 65 if file.Cf == writeCFName || strings.Contains(file.GetName(), writeCFName) { 66 writeCFFile++ 67 } else if file.Cf == defaultCFName || strings.Contains(file.GetName(), defaultCFName) { 68 defaultCFFile++ 69 } 70 totalBytes += file.TotalBytes 71 totalKvs += file.TotalKvs 72 } 73 if writeCFFile == 0 && defaultCFFile == 0 { 74 return []rtree.Range{}, nil, errors.Annotatef(berrors.ErrRestoreInvalidBackup, 75 "unknown backup data from neither Wrtie CF nor Default CF") 76 } 77 78 // RawKV does not have data in write CF. 79 totalRegions := writeCFFile 80 if defaultCFFile > writeCFFile { 81 totalRegions = defaultCFFile 82 } 83 84 // Check if files are overlapped 85 rangeTree := rtree.NewRangeTree() 86 for key := range filesMap { 87 files := filesMap[key] 88 if out := rangeTree.InsertRange(rtree.Range{ 89 StartKey: files[0].GetStartKey(), 90 EndKey: files[0].GetEndKey(), 91 Files: files, 92 }); out != nil { 93 return nil, nil, errors.Annotatef(berrors.ErrRestoreInvalidRange, 94 "duplicate range %s files %+v", out, files) 95 } 96 } 97 98 needMerge := func(left, right *rtree.Range) bool { 99 leftBytes, leftKeys := left.BytesAndKeys() 100 rightBytes, rightKeys := right.BytesAndKeys() 101 if rightBytes == 0 { 102 return true 103 } 104 if leftBytes+rightBytes > splitSizeBytes { 105 return false 106 } 107 if leftKeys+rightKeys > splitKeyCount { 108 return false 109 } 110 // Do not merge ranges in different tables. 111 if tablecodec.DecodeTableID(kv.Key(left.StartKey)) != tablecodec.DecodeTableID(kv.Key(right.StartKey)) { 112 return false 113 } 114 // Do not merge ranges in different indexes even if they are in the same 115 // table, as rewrite rule only supports rewriting one pattern. 116 // tableID, indexID, indexValues, err 117 _, indexID1, _, err1 := tablecodec.DecodeIndexKey(kv.Key(left.StartKey)) 118 _, indexID2, _, err2 := tablecodec.DecodeIndexKey(kv.Key(right.StartKey)) 119 // If both of them are index keys, ... 120 if err1 == nil && err2 == nil { 121 // Merge left and right if they are in the same index. 122 return indexID1 == indexID2 123 } 124 // Otherwise, merge if they are both record keys 125 return err1 != nil && err2 != nil 126 } 127 sortedRanges := rangeTree.GetSortedRanges() 128 for i := 1; i < len(sortedRanges); { 129 if !needMerge(&sortedRanges[i-1], &sortedRanges[i]) { 130 i++ 131 continue 132 } 133 sortedRanges[i-1].EndKey = sortedRanges[i].EndKey 134 sortedRanges[i-1].Files = append(sortedRanges[i-1].Files, sortedRanges[i].Files...) 135 // TODO: this is slow when there are lots of ranges need to merge. 136 sortedRanges = append(sortedRanges[:i], sortedRanges[i+1:]...) 137 } 138 139 regionBytesAvg := totalBytes / uint64(totalRegions) 140 regionKeysAvg := totalKvs / uint64(totalRegions) 141 mergedRegionBytesAvg := totalBytes / uint64(len(sortedRanges)) 142 mergedRegionKeysAvg := totalKvs / uint64(len(sortedRanges)) 143 144 return sortedRanges, &MergeRangesStat{ 145 TotalFiles: totalFiles, 146 TotalWriteCFFile: writeCFFile, 147 TotalDefaultCFFile: defaultCFFile, 148 TotalRegions: totalRegions, 149 RegionKeysAvg: int(regionKeysAvg), 150 RegionBytesAvg: int(regionBytesAvg), 151 MergedRegions: len(sortedRanges), 152 MergedRegionKeysAvg: int(mergedRegionKeysAvg), 153 MergedRegionBytesAvg: int(mergedRegionBytesAvg), 154 }, nil 155 }