github.com/pingcap/br@v5.3.0-alpha.0.20220125034240-ec59c7b6ce30+incompatible/pkg/restore/merge_test.go (about)

     1  // Copyright 2020 PingCAP, Inc. Licensed under Apache-2.0.
     2  
     3  package restore_test
     4  
     5  import (
     6  	"bytes"
     7  	"fmt"
     8  	"math"
     9  	"math/rand"
    10  	"testing"
    11  	"time"
    12  
    13  	. "github.com/pingcap/check"
    14  	"github.com/pingcap/errors"
    15  	backuppb "github.com/pingcap/kvproto/pkg/backup"
    16  	"github.com/pingcap/tidb/sessionctx/stmtctx"
    17  	"github.com/pingcap/tidb/tablecodec"
    18  	"github.com/pingcap/tidb/types"
    19  	"github.com/pingcap/tidb/util/codec"
    20  
    21  	berrors "github.com/pingcap/br/pkg/errors"
    22  	"github.com/pingcap/br/pkg/restore"
    23  )
    24  
    25  var _ = Suite(&testMergeRangesSuite{})
    26  
    27  type testMergeRangesSuite struct{}
    28  
    29  type fileBulder struct {
    30  	tableID, startKeyOffset int64
    31  }
    32  
    33  func (fb *fileBulder) build(tableID, indexID, num, bytes, kv int) (files []*backuppb.File) {
    34  	if num != 1 && num != 2 {
    35  		panic("num must be 1 or 2")
    36  	}
    37  
    38  	// Rotate table ID
    39  	if fb.tableID != int64(tableID) {
    40  		fb.tableID = int64(tableID)
    41  		fb.startKeyOffset = 0
    42  	}
    43  
    44  	low := codec.EncodeInt(nil, fb.startKeyOffset)
    45  	fb.startKeyOffset += 10
    46  	high := codec.EncodeInt(nil, fb.startKeyOffset)
    47  
    48  	startKey := tablecodec.EncodeRowKey(fb.tableID, low)
    49  	endKey := tablecodec.EncodeRowKey(fb.tableID, high)
    50  	if indexID != 0 {
    51  		lowVal := types.NewIntDatum(fb.startKeyOffset - 10)
    52  		highVal := types.NewIntDatum(fb.startKeyOffset)
    53  		sc := &stmtctx.StatementContext{TimeZone: time.UTC}
    54  		lowValue, err := codec.EncodeKey(sc, nil, lowVal)
    55  		if err != nil {
    56  			panic(err)
    57  		}
    58  		highValue, err := codec.EncodeKey(sc, nil, highVal)
    59  		if err != nil {
    60  			panic(err)
    61  		}
    62  		startKey = tablecodec.EncodeIndexSeekKey(int64(tableID), int64(indexID), lowValue)
    63  		endKey = tablecodec.EncodeIndexSeekKey(int64(tableID), int64(indexID), highValue)
    64  	}
    65  
    66  	files = append(files, &backuppb.File{
    67  		Name:       fmt.Sprint(rand.Int63n(math.MaxInt64), "_write.sst"),
    68  		StartKey:   startKey,
    69  		EndKey:     endKey,
    70  		TotalKvs:   uint64(kv),
    71  		TotalBytes: uint64(bytes),
    72  		Cf:         "write",
    73  	})
    74  	if num == 1 {
    75  		return
    76  	}
    77  
    78  	// To match TiKV's behavior.
    79  	files[0].TotalKvs = 0
    80  	files[0].TotalBytes = 0
    81  	files = append(files, &backuppb.File{
    82  		Name:       fmt.Sprint(rand.Int63n(math.MaxInt64), "_default.sst"),
    83  		StartKey:   tablecodec.EncodeRowKey(fb.tableID, low),
    84  		EndKey:     tablecodec.EncodeRowKey(fb.tableID, high),
    85  		TotalKvs:   uint64(kv),
    86  		TotalBytes: uint64(bytes),
    87  		Cf:         "default",
    88  	})
    89  	return files
    90  }
    91  
    92  func (s *testMergeRangesSuite) TestMergeRanges(c *C) {
    93  	type Case struct {
    94  		files  [][5]int // tableID, indexID num, bytes, kv
    95  		merged []int    // length of each merged range
    96  		stat   restore.MergeRangesStat
    97  	}
    98  	splitSizeBytes := int(restore.DefaultMergeRegionSizeBytes)
    99  	splitKeyCount := int(restore.DefaultMergeRegionKeyCount)
   100  	cases := []Case{
   101  		// Empty backup.
   102  		{
   103  			files:  [][5]int{},
   104  			merged: []int{},
   105  			stat:   restore.MergeRangesStat{TotalRegions: 0, MergedRegions: 0},
   106  		},
   107  
   108  		// Do not merge big range.
   109  		{
   110  			files:  [][5]int{{1, 0, 1, splitSizeBytes, 1}, {1, 0, 1, 1, 1}},
   111  			merged: []int{1, 1},
   112  			stat:   restore.MergeRangesStat{TotalRegions: 2, MergedRegions: 2},
   113  		},
   114  		{
   115  			files:  [][5]int{{1, 0, 1, 1, 1}, {1, 0, 1, splitSizeBytes, 1}},
   116  			merged: []int{1, 1},
   117  			stat:   restore.MergeRangesStat{TotalRegions: 2, MergedRegions: 2},
   118  		},
   119  		{
   120  			files:  [][5]int{{1, 0, 1, 1, splitKeyCount}, {1, 0, 1, 1, 1}},
   121  			merged: []int{1, 1},
   122  			stat:   restore.MergeRangesStat{TotalRegions: 2, MergedRegions: 2},
   123  		},
   124  		{
   125  			files:  [][5]int{{1, 0, 1, 1, 1}, {1, 0, 1, 1, splitKeyCount}},
   126  			merged: []int{1, 1},
   127  			stat:   restore.MergeRangesStat{TotalRegions: 2, MergedRegions: 2},
   128  		},
   129  
   130  		// 3 -> 1
   131  		{
   132  			files:  [][5]int{{1, 0, 1, 1, 1}, {1, 0, 1, 1, 1}, {1, 0, 1, 1, 1}},
   133  			merged: []int{3},
   134  			stat:   restore.MergeRangesStat{TotalRegions: 3, MergedRegions: 1},
   135  		},
   136  		// 3 -> 2, size: [split*1/3, split*1/3, split*1/2] -> [split*2/3, split*1/2]
   137  		{
   138  			files:  [][5]int{{1, 0, 1, splitSizeBytes / 3, 1}, {1, 0, 1, splitSizeBytes / 3, 1}, {1, 0, 1, splitSizeBytes / 2, 1}},
   139  			merged: []int{2, 1},
   140  			stat:   restore.MergeRangesStat{TotalRegions: 3, MergedRegions: 2},
   141  		},
   142  		// 4 -> 2, size: [split*1/3, split*1/3, split*1/2, 1] -> [split*2/3, split*1/2 +1]
   143  		{
   144  			files:  [][5]int{{1, 0, 1, splitSizeBytes / 3, 1}, {1, 0, 1, splitSizeBytes / 3, 1}, {1, 0, 1, splitSizeBytes / 2, 1}, {1, 0, 1, 1, 1}},
   145  			merged: []int{2, 2},
   146  			stat:   restore.MergeRangesStat{TotalRegions: 4, MergedRegions: 2},
   147  		},
   148  		// 5 -> 3, size: [split*1/3, split*1/3, split, split*1/2, 1] -> [split*2/3, split, split*1/2 +1]
   149  		{
   150  			files:  [][5]int{{1, 0, 1, splitSizeBytes / 3, 1}, {1, 0, 1, splitSizeBytes / 3, 1}, {1, 0, 1, splitSizeBytes, 1}, {1, 0, 1, splitSizeBytes / 2, 1}, {1, 0, 1, 1, 1}},
   151  			merged: []int{2, 1, 2},
   152  			stat:   restore.MergeRangesStat{TotalRegions: 5, MergedRegions: 3},
   153  		},
   154  
   155  		// Do not merge ranges from different tables
   156  		// 2 -> 2, size: [1, 1] -> [1, 1], table ID: [1, 2] -> [1, 2]
   157  		{
   158  			files:  [][5]int{{1, 0, 1, 1, 1}, {2, 0, 1, 1, 1}},
   159  			merged: []int{1, 1},
   160  			stat:   restore.MergeRangesStat{TotalRegions: 2, MergedRegions: 2},
   161  		},
   162  		// 3 -> 2, size: [1@split*1/3, 2@split*1/3, 2@split*1/2] -> [1@split*1/3, 2@split*5/6]
   163  		{
   164  			files:  [][5]int{{1, 0, 1, splitSizeBytes / 3, 1}, {2, 0, 1, splitSizeBytes / 3, 1}, {2, 0, 1, splitSizeBytes / 2, 1}},
   165  			merged: []int{1, 2},
   166  			stat:   restore.MergeRangesStat{TotalRegions: 3, MergedRegions: 2},
   167  		},
   168  
   169  		// Do not merge ranges from different indexes.
   170  		// 2 -> 2, size: [1, 1] -> [1, 1], index ID: [1, 2] -> [1, 2]
   171  		{
   172  			files:  [][5]int{{1, 1, 1, 1, 1}, {1, 2, 1, 1, 1}},
   173  			merged: []int{1, 1},
   174  			stat:   restore.MergeRangesStat{TotalRegions: 2, MergedRegions: 2},
   175  		},
   176  		// Index ID out of order.
   177  		// 2 -> 2, size: [1, 1] -> [1, 1], index ID: [2, 1] -> [1, 2]
   178  		{
   179  			files:  [][5]int{{1, 2, 1, 1, 1}, {1, 1, 1, 1, 1}},
   180  			merged: []int{1, 1},
   181  			stat:   restore.MergeRangesStat{TotalRegions: 2, MergedRegions: 2},
   182  		},
   183  		// 3 -> 3, size: [1, 1, 1] -> [1, 1, 1]
   184  		// (table ID, index ID): [(1, 0), (2, 1), (2, 2)] -> [(1, 0), (2, 1), (2, 2)]
   185  		{
   186  			files:  [][5]int{{1, 0, 1, 1, 1}, {2, 1, 1, 1, 1}, {2, 2, 1, 1, 1}},
   187  			merged: []int{1, 1, 1},
   188  			stat:   restore.MergeRangesStat{TotalRegions: 3, MergedRegions: 3},
   189  		},
   190  		// 4 -> 3, size: [1, 1, 1, 1] -> [1, 1, 2]
   191  		// (table ID, index ID): [(1, 0), (2, 1), (2, 0), (2, 0)] -> [(1, 0), (2, 1), (2, 0)]
   192  		{
   193  			files:  [][5]int{{1, 0, 1, 1, 1}, {2, 1, 1, 1, 1}, {2, 0, 1, 1, 1}, {2, 0, 1, 1, 1}},
   194  			merged: []int{1, 1, 2},
   195  			stat:   restore.MergeRangesStat{TotalRegions: 4, MergedRegions: 3},
   196  		},
   197  		// Merge the same table ID and index ID.
   198  		// 4 -> 3, size: [1, 1, 1, 1] -> [1, 2, 1]
   199  		// (table ID, index ID): [(1, 0), (2, 1), (2, 1), (2, 0)] -> [(1, 0), (2, 1), (2, 0)]
   200  		{
   201  			files:  [][5]int{{1, 0, 1, 1, 1}, {2, 1, 1, 1, 1}, {2, 1, 1, 1, 1}, {2, 0, 1, 1, 1}},
   202  			merged: []int{1, 2, 1},
   203  			stat:   restore.MergeRangesStat{TotalRegions: 4, MergedRegions: 3},
   204  		},
   205  	}
   206  
   207  	for i, cs := range cases {
   208  		files := make([]*backuppb.File, 0)
   209  		fb := fileBulder{}
   210  		for _, f := range cs.files {
   211  			files = append(files, fb.build(f[0], f[1], f[2], f[3], f[4])...)
   212  		}
   213  		rngs, stat, err := restore.MergeFileRanges(
   214  			files, restore.DefaultMergeRegionSizeBytes, restore.DefaultMergeRegionKeyCount)
   215  		c.Assert(err, IsNil, Commentf("%+v", cs))
   216  		c.Assert(stat.TotalRegions, Equals, cs.stat.TotalRegions, Commentf("%+v", cs))
   217  		c.Assert(stat.MergedRegions, Equals, cs.stat.MergedRegions, Commentf("%+v", cs))
   218  
   219  		c.Assert(len(rngs), Equals, len(cs.merged), Commentf("case %d", i))
   220  		for i, rg := range rngs {
   221  			c.Assert(len(rg.Files), Equals, cs.merged[i], Commentf("%+v", cs))
   222  			// Files range must be in [Range.StartKey, Range.EndKey].
   223  			for _, f := range rg.Files {
   224  				c.Assert(bytes.Compare(rg.StartKey, f.StartKey), LessEqual, 0)
   225  				c.Assert(bytes.Compare(rg.EndKey, f.EndKey), GreaterEqual, 0)
   226  			}
   227  		}
   228  	}
   229  }
   230  
   231  func (s *testMergeRangesSuite) TestMergeRawKVRanges(c *C) {
   232  	files := make([]*backuppb.File, 0)
   233  	fb := fileBulder{}
   234  	files = append(files, fb.build(1, 0, 2, 1, 1)...)
   235  	// RawKV does not have write cf
   236  	files = files[1:]
   237  	_, stat, err := restore.MergeFileRanges(
   238  		files, restore.DefaultMergeRegionSizeBytes, restore.DefaultMergeRegionKeyCount)
   239  	c.Assert(err, IsNil)
   240  	c.Assert(stat.TotalRegions, Equals, 1)
   241  	c.Assert(stat.MergedRegions, Equals, 1)
   242  }
   243  
   244  func (s *testMergeRangesSuite) TestInvalidRanges(c *C) {
   245  	files := make([]*backuppb.File, 0)
   246  	fb := fileBulder{}
   247  	files = append(files, fb.build(1, 0, 1, 1, 1)...)
   248  	files[0].Name = "invalid.sst"
   249  	files[0].Cf = "invalid"
   250  	_, _, err := restore.MergeFileRanges(
   251  		files, restore.DefaultMergeRegionSizeBytes, restore.DefaultMergeRegionKeyCount)
   252  	c.Assert(err, NotNil)
   253  	c.Assert(errors.Cause(err), Equals, berrors.ErrRestoreInvalidBackup)
   254  }
   255  
   256  // Benchmark results on Intel(R) Xeon(R) CPU E5-2630 v4 @ 2.20GHz
   257  //
   258  // BenchmarkMergeRanges100-40          9676             114344 ns/op
   259  // BenchmarkMergeRanges1k-40            345            3700739 ns/op
   260  // BenchmarkMergeRanges10k-40             3          414097277 ns/op
   261  // BenchmarkMergeRanges50k-40             1        17258177908 ns/op
   262  // BenchmarkMergeRanges100k-40            1        73403873161 ns/op
   263  
   264  func benchmarkMergeRanges(b *testing.B, filesCount int) {
   265  	files := make([]*backuppb.File, 0)
   266  	fb := fileBulder{}
   267  	for i := 0; i < filesCount; i++ {
   268  		files = append(files, fb.build(1, 0, 1, 1, 1)...)
   269  	}
   270  	var err error
   271  	for i := 0; i < b.N; i++ {
   272  		_, _, err = restore.MergeFileRanges(files, restore.DefaultMergeRegionSizeBytes, restore.DefaultMergeRegionKeyCount)
   273  		if err != nil {
   274  			b.Error(err)
   275  		}
   276  	}
   277  }
   278  
   279  func BenchmarkMergeRanges100(b *testing.B) {
   280  	benchmarkMergeRanges(b, 100)
   281  }
   282  
   283  func BenchmarkMergeRanges1k(b *testing.B) {
   284  	benchmarkMergeRanges(b, 1000)
   285  }
   286  
   287  func BenchmarkMergeRanges10k(b *testing.B) {
   288  	benchmarkMergeRanges(b, 10000)
   289  }
   290  
   291  func BenchmarkMergeRanges50k(b *testing.B) {
   292  	benchmarkMergeRanges(b, 50000)
   293  }
   294  
   295  func BenchmarkMergeRanges100k(b *testing.B) {
   296  	benchmarkMergeRanges(b, 100000)
   297  }