github.com/pingcap/br@v5.3.0-alpha.0.20220125034240-ec59c7b6ce30+incompatible/pkg/restore/merge_test.go (about) 1 // Copyright 2020 PingCAP, Inc. Licensed under Apache-2.0. 2 3 package restore_test 4 5 import ( 6 "bytes" 7 "fmt" 8 "math" 9 "math/rand" 10 "testing" 11 "time" 12 13 . "github.com/pingcap/check" 14 "github.com/pingcap/errors" 15 backuppb "github.com/pingcap/kvproto/pkg/backup" 16 "github.com/pingcap/tidb/sessionctx/stmtctx" 17 "github.com/pingcap/tidb/tablecodec" 18 "github.com/pingcap/tidb/types" 19 "github.com/pingcap/tidb/util/codec" 20 21 berrors "github.com/pingcap/br/pkg/errors" 22 "github.com/pingcap/br/pkg/restore" 23 ) 24 25 var _ = Suite(&testMergeRangesSuite{}) 26 27 type testMergeRangesSuite struct{} 28 29 type fileBulder struct { 30 tableID, startKeyOffset int64 31 } 32 33 func (fb *fileBulder) build(tableID, indexID, num, bytes, kv int) (files []*backuppb.File) { 34 if num != 1 && num != 2 { 35 panic("num must be 1 or 2") 36 } 37 38 // Rotate table ID 39 if fb.tableID != int64(tableID) { 40 fb.tableID = int64(tableID) 41 fb.startKeyOffset = 0 42 } 43 44 low := codec.EncodeInt(nil, fb.startKeyOffset) 45 fb.startKeyOffset += 10 46 high := codec.EncodeInt(nil, fb.startKeyOffset) 47 48 startKey := tablecodec.EncodeRowKey(fb.tableID, low) 49 endKey := tablecodec.EncodeRowKey(fb.tableID, high) 50 if indexID != 0 { 51 lowVal := types.NewIntDatum(fb.startKeyOffset - 10) 52 highVal := types.NewIntDatum(fb.startKeyOffset) 53 sc := &stmtctx.StatementContext{TimeZone: time.UTC} 54 lowValue, err := codec.EncodeKey(sc, nil, lowVal) 55 if err != nil { 56 panic(err) 57 } 58 highValue, err := codec.EncodeKey(sc, nil, highVal) 59 if err != nil { 60 panic(err) 61 } 62 startKey = tablecodec.EncodeIndexSeekKey(int64(tableID), int64(indexID), lowValue) 63 endKey = tablecodec.EncodeIndexSeekKey(int64(tableID), int64(indexID), highValue) 64 } 65 66 files = append(files, &backuppb.File{ 67 Name: fmt.Sprint(rand.Int63n(math.MaxInt64), "_write.sst"), 68 StartKey: startKey, 69 EndKey: endKey, 70 TotalKvs: uint64(kv), 71 TotalBytes: uint64(bytes), 72 Cf: "write", 73 }) 74 if num == 1 { 75 return 76 } 77 78 // To match TiKV's behavior. 79 files[0].TotalKvs = 0 80 files[0].TotalBytes = 0 81 files = append(files, &backuppb.File{ 82 Name: fmt.Sprint(rand.Int63n(math.MaxInt64), "_default.sst"), 83 StartKey: tablecodec.EncodeRowKey(fb.tableID, low), 84 EndKey: tablecodec.EncodeRowKey(fb.tableID, high), 85 TotalKvs: uint64(kv), 86 TotalBytes: uint64(bytes), 87 Cf: "default", 88 }) 89 return files 90 } 91 92 func (s *testMergeRangesSuite) TestMergeRanges(c *C) { 93 type Case struct { 94 files [][5]int // tableID, indexID num, bytes, kv 95 merged []int // length of each merged range 96 stat restore.MergeRangesStat 97 } 98 splitSizeBytes := int(restore.DefaultMergeRegionSizeBytes) 99 splitKeyCount := int(restore.DefaultMergeRegionKeyCount) 100 cases := []Case{ 101 // Empty backup. 102 { 103 files: [][5]int{}, 104 merged: []int{}, 105 stat: restore.MergeRangesStat{TotalRegions: 0, MergedRegions: 0}, 106 }, 107 108 // Do not merge big range. 109 { 110 files: [][5]int{{1, 0, 1, splitSizeBytes, 1}, {1, 0, 1, 1, 1}}, 111 merged: []int{1, 1}, 112 stat: restore.MergeRangesStat{TotalRegions: 2, MergedRegions: 2}, 113 }, 114 { 115 files: [][5]int{{1, 0, 1, 1, 1}, {1, 0, 1, splitSizeBytes, 1}}, 116 merged: []int{1, 1}, 117 stat: restore.MergeRangesStat{TotalRegions: 2, MergedRegions: 2}, 118 }, 119 { 120 files: [][5]int{{1, 0, 1, 1, splitKeyCount}, {1, 0, 1, 1, 1}}, 121 merged: []int{1, 1}, 122 stat: restore.MergeRangesStat{TotalRegions: 2, MergedRegions: 2}, 123 }, 124 { 125 files: [][5]int{{1, 0, 1, 1, 1}, {1, 0, 1, 1, splitKeyCount}}, 126 merged: []int{1, 1}, 127 stat: restore.MergeRangesStat{TotalRegions: 2, MergedRegions: 2}, 128 }, 129 130 // 3 -> 1 131 { 132 files: [][5]int{{1, 0, 1, 1, 1}, {1, 0, 1, 1, 1}, {1, 0, 1, 1, 1}}, 133 merged: []int{3}, 134 stat: restore.MergeRangesStat{TotalRegions: 3, MergedRegions: 1}, 135 }, 136 // 3 -> 2, size: [split*1/3, split*1/3, split*1/2] -> [split*2/3, split*1/2] 137 { 138 files: [][5]int{{1, 0, 1, splitSizeBytes / 3, 1}, {1, 0, 1, splitSizeBytes / 3, 1}, {1, 0, 1, splitSizeBytes / 2, 1}}, 139 merged: []int{2, 1}, 140 stat: restore.MergeRangesStat{TotalRegions: 3, MergedRegions: 2}, 141 }, 142 // 4 -> 2, size: [split*1/3, split*1/3, split*1/2, 1] -> [split*2/3, split*1/2 +1] 143 { 144 files: [][5]int{{1, 0, 1, splitSizeBytes / 3, 1}, {1, 0, 1, splitSizeBytes / 3, 1}, {1, 0, 1, splitSizeBytes / 2, 1}, {1, 0, 1, 1, 1}}, 145 merged: []int{2, 2}, 146 stat: restore.MergeRangesStat{TotalRegions: 4, MergedRegions: 2}, 147 }, 148 // 5 -> 3, size: [split*1/3, split*1/3, split, split*1/2, 1] -> [split*2/3, split, split*1/2 +1] 149 { 150 files: [][5]int{{1, 0, 1, splitSizeBytes / 3, 1}, {1, 0, 1, splitSizeBytes / 3, 1}, {1, 0, 1, splitSizeBytes, 1}, {1, 0, 1, splitSizeBytes / 2, 1}, {1, 0, 1, 1, 1}}, 151 merged: []int{2, 1, 2}, 152 stat: restore.MergeRangesStat{TotalRegions: 5, MergedRegions: 3}, 153 }, 154 155 // Do not merge ranges from different tables 156 // 2 -> 2, size: [1, 1] -> [1, 1], table ID: [1, 2] -> [1, 2] 157 { 158 files: [][5]int{{1, 0, 1, 1, 1}, {2, 0, 1, 1, 1}}, 159 merged: []int{1, 1}, 160 stat: restore.MergeRangesStat{TotalRegions: 2, MergedRegions: 2}, 161 }, 162 // 3 -> 2, size: [1@split*1/3, 2@split*1/3, 2@split*1/2] -> [1@split*1/3, 2@split*5/6] 163 { 164 files: [][5]int{{1, 0, 1, splitSizeBytes / 3, 1}, {2, 0, 1, splitSizeBytes / 3, 1}, {2, 0, 1, splitSizeBytes / 2, 1}}, 165 merged: []int{1, 2}, 166 stat: restore.MergeRangesStat{TotalRegions: 3, MergedRegions: 2}, 167 }, 168 169 // Do not merge ranges from different indexes. 170 // 2 -> 2, size: [1, 1] -> [1, 1], index ID: [1, 2] -> [1, 2] 171 { 172 files: [][5]int{{1, 1, 1, 1, 1}, {1, 2, 1, 1, 1}}, 173 merged: []int{1, 1}, 174 stat: restore.MergeRangesStat{TotalRegions: 2, MergedRegions: 2}, 175 }, 176 // Index ID out of order. 177 // 2 -> 2, size: [1, 1] -> [1, 1], index ID: [2, 1] -> [1, 2] 178 { 179 files: [][5]int{{1, 2, 1, 1, 1}, {1, 1, 1, 1, 1}}, 180 merged: []int{1, 1}, 181 stat: restore.MergeRangesStat{TotalRegions: 2, MergedRegions: 2}, 182 }, 183 // 3 -> 3, size: [1, 1, 1] -> [1, 1, 1] 184 // (table ID, index ID): [(1, 0), (2, 1), (2, 2)] -> [(1, 0), (2, 1), (2, 2)] 185 { 186 files: [][5]int{{1, 0, 1, 1, 1}, {2, 1, 1, 1, 1}, {2, 2, 1, 1, 1}}, 187 merged: []int{1, 1, 1}, 188 stat: restore.MergeRangesStat{TotalRegions: 3, MergedRegions: 3}, 189 }, 190 // 4 -> 3, size: [1, 1, 1, 1] -> [1, 1, 2] 191 // (table ID, index ID): [(1, 0), (2, 1), (2, 0), (2, 0)] -> [(1, 0), (2, 1), (2, 0)] 192 { 193 files: [][5]int{{1, 0, 1, 1, 1}, {2, 1, 1, 1, 1}, {2, 0, 1, 1, 1}, {2, 0, 1, 1, 1}}, 194 merged: []int{1, 1, 2}, 195 stat: restore.MergeRangesStat{TotalRegions: 4, MergedRegions: 3}, 196 }, 197 // Merge the same table ID and index ID. 198 // 4 -> 3, size: [1, 1, 1, 1] -> [1, 2, 1] 199 // (table ID, index ID): [(1, 0), (2, 1), (2, 1), (2, 0)] -> [(1, 0), (2, 1), (2, 0)] 200 { 201 files: [][5]int{{1, 0, 1, 1, 1}, {2, 1, 1, 1, 1}, {2, 1, 1, 1, 1}, {2, 0, 1, 1, 1}}, 202 merged: []int{1, 2, 1}, 203 stat: restore.MergeRangesStat{TotalRegions: 4, MergedRegions: 3}, 204 }, 205 } 206 207 for i, cs := range cases { 208 files := make([]*backuppb.File, 0) 209 fb := fileBulder{} 210 for _, f := range cs.files { 211 files = append(files, fb.build(f[0], f[1], f[2], f[3], f[4])...) 212 } 213 rngs, stat, err := restore.MergeFileRanges( 214 files, restore.DefaultMergeRegionSizeBytes, restore.DefaultMergeRegionKeyCount) 215 c.Assert(err, IsNil, Commentf("%+v", cs)) 216 c.Assert(stat.TotalRegions, Equals, cs.stat.TotalRegions, Commentf("%+v", cs)) 217 c.Assert(stat.MergedRegions, Equals, cs.stat.MergedRegions, Commentf("%+v", cs)) 218 219 c.Assert(len(rngs), Equals, len(cs.merged), Commentf("case %d", i)) 220 for i, rg := range rngs { 221 c.Assert(len(rg.Files), Equals, cs.merged[i], Commentf("%+v", cs)) 222 // Files range must be in [Range.StartKey, Range.EndKey]. 223 for _, f := range rg.Files { 224 c.Assert(bytes.Compare(rg.StartKey, f.StartKey), LessEqual, 0) 225 c.Assert(bytes.Compare(rg.EndKey, f.EndKey), GreaterEqual, 0) 226 } 227 } 228 } 229 } 230 231 func (s *testMergeRangesSuite) TestMergeRawKVRanges(c *C) { 232 files := make([]*backuppb.File, 0) 233 fb := fileBulder{} 234 files = append(files, fb.build(1, 0, 2, 1, 1)...) 235 // RawKV does not have write cf 236 files = files[1:] 237 _, stat, err := restore.MergeFileRanges( 238 files, restore.DefaultMergeRegionSizeBytes, restore.DefaultMergeRegionKeyCount) 239 c.Assert(err, IsNil) 240 c.Assert(stat.TotalRegions, Equals, 1) 241 c.Assert(stat.MergedRegions, Equals, 1) 242 } 243 244 func (s *testMergeRangesSuite) TestInvalidRanges(c *C) { 245 files := make([]*backuppb.File, 0) 246 fb := fileBulder{} 247 files = append(files, fb.build(1, 0, 1, 1, 1)...) 248 files[0].Name = "invalid.sst" 249 files[0].Cf = "invalid" 250 _, _, err := restore.MergeFileRanges( 251 files, restore.DefaultMergeRegionSizeBytes, restore.DefaultMergeRegionKeyCount) 252 c.Assert(err, NotNil) 253 c.Assert(errors.Cause(err), Equals, berrors.ErrRestoreInvalidBackup) 254 } 255 256 // Benchmark results on Intel(R) Xeon(R) CPU E5-2630 v4 @ 2.20GHz 257 // 258 // BenchmarkMergeRanges100-40 9676 114344 ns/op 259 // BenchmarkMergeRanges1k-40 345 3700739 ns/op 260 // BenchmarkMergeRanges10k-40 3 414097277 ns/op 261 // BenchmarkMergeRanges50k-40 1 17258177908 ns/op 262 // BenchmarkMergeRanges100k-40 1 73403873161 ns/op 263 264 func benchmarkMergeRanges(b *testing.B, filesCount int) { 265 files := make([]*backuppb.File, 0) 266 fb := fileBulder{} 267 for i := 0; i < filesCount; i++ { 268 files = append(files, fb.build(1, 0, 1, 1, 1)...) 269 } 270 var err error 271 for i := 0; i < b.N; i++ { 272 _, _, err = restore.MergeFileRanges(files, restore.DefaultMergeRegionSizeBytes, restore.DefaultMergeRegionKeyCount) 273 if err != nil { 274 b.Error(err) 275 } 276 } 277 } 278 279 func BenchmarkMergeRanges100(b *testing.B) { 280 benchmarkMergeRanges(b, 100) 281 } 282 283 func BenchmarkMergeRanges1k(b *testing.B) { 284 benchmarkMergeRanges(b, 1000) 285 } 286 287 func BenchmarkMergeRanges10k(b *testing.B) { 288 benchmarkMergeRanges(b, 10000) 289 } 290 291 func BenchmarkMergeRanges50k(b *testing.B) { 292 benchmarkMergeRanges(b, 50000) 293 } 294 295 func BenchmarkMergeRanges100k(b *testing.B) { 296 benchmarkMergeRanges(b, 100000) 297 }