github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/cdc/scheduler/internal/v3/keyspan/splitter_write_test.go (about)

     1  // Copyright 2023 PingCAP, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package keyspan
    15  
    16  import (
    17  	"context"
    18  	"encoding/hex"
    19  	"math"
    20  	"math/rand"
    21  	"strconv"
    22  	"testing"
    23  
    24  	"github.com/pingcap/tiflow/cdc/model"
    25  	"github.com/pingcap/tiflow/cdc/processor/tablepb"
    26  	"github.com/pingcap/tiflow/pkg/pdutil"
    27  	"github.com/stretchr/testify/require"
    28  )
    29  
    30  func prepareRegionsInfo(writtenKeys [7]int) ([]pdutil.RegionInfo, map[int][]byte, map[int][]byte) {
    31  	regions := []pdutil.RegionInfo{
    32  		pdutil.NewTestRegionInfo(2, []byte("a"), []byte("b"), uint64(writtenKeys[0])),
    33  		pdutil.NewTestRegionInfo(3, []byte("b"), []byte("c"), uint64(writtenKeys[1])),
    34  		pdutil.NewTestRegionInfo(4, []byte("c"), []byte("d"), uint64(writtenKeys[2])),
    35  		pdutil.NewTestRegionInfo(5, []byte("e"), []byte("f"), uint64(writtenKeys[3])),
    36  		pdutil.NewTestRegionInfo(6, []byte("f"), []byte("fa"), uint64(writtenKeys[4])),
    37  		pdutil.NewTestRegionInfo(7, []byte("fa"), []byte("fc"), uint64(writtenKeys[5])),
    38  		pdutil.NewTestRegionInfo(8, []byte("fc"), []byte("ff"), uint64(writtenKeys[6])),
    39  	}
    40  	startKeys := map[int][]byte{}
    41  	endKeys := map[int][]byte{}
    42  	for _, r := range regions {
    43  		b, _ := hex.DecodeString(r.StartKey)
    44  		startKeys[int(r.ID)] = b
    45  	}
    46  	for _, r := range regions {
    47  		b, _ := hex.DecodeString(r.EndKey)
    48  		endKeys[int(r.ID)] = b
    49  	}
    50  	return regions, startKeys, endKeys
    51  }
    52  
    53  func cloneRegions(info []pdutil.RegionInfo) []pdutil.RegionInfo {
    54  	return append([]pdutil.RegionInfo{}, info...)
    55  }
    56  
    57  func TestSplitRegionsByWrittenKeysUniform(t *testing.T) {
    58  	t.Parallel()
    59  	re := require.New(t)
    60  
    61  	regions, startKeys, endKeys := prepareRegionsInfo(
    62  		[7]int{100, 100, 100, 100, 100, 100, 100}) // region id: [2,3,4,5,6,7,8]
    63  	splitter := newWriteSplitter(model.ChangeFeedID4Test("test", "test"), nil, 0)
    64  	info := splitter.splitRegionsByWrittenKeysV1(0, cloneRegions(regions), 1)
    65  	re.Len(info.RegionCounts, 1)
    66  	re.EqualValues(7, info.RegionCounts[0])
    67  	re.Len(info.Spans, 1)
    68  	re.EqualValues(startKeys[2], info.Spans[0].StartKey)
    69  	re.EqualValues(endKeys[8], info.Spans[0].EndKey)
    70  
    71  	info = splitter.splitRegionsByWrittenKeysV1(0, cloneRegions(regions), 2) // [2,3,4,5], [6,7,8]
    72  	re.Len(info.RegionCounts, 2)
    73  	re.EqualValues(4, info.RegionCounts[0])
    74  	re.EqualValues(3, info.RegionCounts[1])
    75  	re.Len(info.Weights, 2)
    76  	re.EqualValues(404, info.Weights[0])
    77  	re.EqualValues(303, info.Weights[1])
    78  	re.Len(info.Spans, 2)
    79  	re.EqualValues(startKeys[2], info.Spans[0].StartKey)
    80  	re.EqualValues(endKeys[5], info.Spans[0].EndKey)
    81  	re.EqualValues(startKeys[6], info.Spans[1].StartKey)
    82  	re.EqualValues(endKeys[8], info.Spans[1].EndKey)
    83  
    84  	info = splitter.splitRegionsByWrittenKeysV1(0, cloneRegions(regions), 3) // [2,3,4], [5,6,7], [8]
    85  	re.Len(info.RegionCounts, 3)
    86  	re.EqualValues(3, info.RegionCounts[0])
    87  	re.EqualValues(3, info.RegionCounts[1])
    88  	re.EqualValues(1, info.RegionCounts[2])
    89  	re.Len(info.Weights, 3)
    90  	re.EqualValues(303, info.Weights[0])
    91  	re.EqualValues(303, info.Weights[1])
    92  	re.EqualValues(101, info.Weights[2])
    93  	re.Len(info.Spans, 3)
    94  	re.EqualValues(startKeys[2], info.Spans[0].StartKey)
    95  	re.EqualValues(endKeys[4], info.Spans[0].EndKey)
    96  	re.EqualValues(startKeys[5], info.Spans[1].StartKey)
    97  	re.EqualValues(endKeys[7], info.Spans[1].EndKey)
    98  	re.EqualValues(startKeys[8], info.Spans[2].StartKey)
    99  	re.EqualValues(endKeys[8], info.Spans[2].EndKey)
   100  
   101  	// spans > regions
   102  	for p := 7; p <= 10; p++ {
   103  		info = splitter.splitRegionsByWrittenKeysV1(0, cloneRegions(regions), p)
   104  		re.Len(info.RegionCounts, 7)
   105  		for _, c := range info.RegionCounts {
   106  			re.EqualValues(1, c)
   107  		}
   108  		re.Len(info.Weights, 7)
   109  		for _, w := range info.Weights {
   110  			re.EqualValues(101, w, info)
   111  		}
   112  		re.Len(info.Spans, 7)
   113  		for i, r := range info.Spans {
   114  			re.EqualValues(startKeys[2+i], r.StartKey)
   115  			re.EqualValues(endKeys[2+i], r.EndKey)
   116  		}
   117  	}
   118  }
   119  
   120  func TestSplitRegionsByWrittenKeysHotspot1(t *testing.T) {
   121  	t.Parallel()
   122  	re := require.New(t)
   123  
   124  	// Hotspots
   125  	regions, startKeys, endKeys := prepareRegionsInfo(
   126  		[7]int{100, 1, 100, 1, 1, 1, 100})
   127  	splitter := newWriteSplitter(model.ChangeFeedID4Test("test", "test"), nil, 4)
   128  	info := splitter.splitRegionsByWrittenKeysV1(0, regions, 4) // [2], [3,4], [5,6,7], [8]
   129  	re.Len(info.RegionCounts, 4)
   130  	re.EqualValues(1, info.RegionCounts[0])
   131  	re.EqualValues(2, info.RegionCounts[1])
   132  	re.EqualValues(3, info.RegionCounts[2])
   133  	re.EqualValues(1, info.RegionCounts[3])
   134  	re.Len(info.Weights, 4)
   135  	re.EqualValues(101, info.Weights[0])
   136  	re.EqualValues(103, info.Weights[1])
   137  	re.EqualValues(6, info.Weights[2])
   138  	re.EqualValues(101, info.Weights[3])
   139  	re.Len(info.Spans, 4)
   140  	re.EqualValues(startKeys[2], info.Spans[0].StartKey)
   141  	re.EqualValues(endKeys[2], info.Spans[0].EndKey)
   142  	re.EqualValues(startKeys[3], info.Spans[1].StartKey)
   143  	re.EqualValues(endKeys[4], info.Spans[1].EndKey)
   144  	re.EqualValues(startKeys[5], info.Spans[2].StartKey)
   145  	re.EqualValues(endKeys[7], info.Spans[2].EndKey)
   146  	re.EqualValues(startKeys[8], info.Spans[3].StartKey)
   147  	re.EqualValues(endKeys[8], info.Spans[3].EndKey)
   148  }
   149  
   150  func TestSplitRegionsByWrittenKeysHotspot2(t *testing.T) {
   151  	t.Parallel()
   152  	re := require.New(t)
   153  
   154  	// Hotspots
   155  	regions, startKeys, endKeys := prepareRegionsInfo(
   156  		[7]int{1000, 1, 1, 1, 100, 1, 99})
   157  	splitter := newWriteSplitter(model.ChangeFeedID4Test("test", "test"), nil, 4)
   158  	info := splitter.splitRegionsByWrittenKeysV1(0, regions, 4) // [2], [3,4,5,6], [7], [8]
   159  	re.Len(info.Spans, 4)
   160  	re.EqualValues(startKeys[2], info.Spans[0].StartKey)
   161  	re.EqualValues(endKeys[2], info.Spans[0].EndKey)
   162  	re.EqualValues(startKeys[3], info.Spans[1].StartKey)
   163  	re.EqualValues(endKeys[6], info.Spans[1].EndKey)
   164  	re.EqualValues(startKeys[7], info.Spans[2].StartKey)
   165  	re.EqualValues(endKeys[7], info.Spans[2].EndKey)
   166  	re.EqualValues(startKeys[8], info.Spans[3].StartKey)
   167  	re.EqualValues(endKeys[8], info.Spans[3].EndKey)
   168  }
   169  
   170  func TestSplitRegionsByWrittenKeysCold(t *testing.T) {
   171  	t.Parallel()
   172  	re := require.New(t)
   173  	splitter := newWriteSplitter(model.ChangeFeedID4Test("test", "test"), nil, 0)
   174  	regions, startKeys, endKeys := prepareRegionsInfo([7]int{})
   175  	info := splitter.splitRegionsByWrittenKeysV1(0, regions, 3) // [2,3,4], [5,6,7], [8]
   176  	re.Len(info.RegionCounts, 3)
   177  	re.EqualValues(3, info.RegionCounts[0], info)
   178  	re.EqualValues(3, info.RegionCounts[1])
   179  	re.EqualValues(1, info.RegionCounts[2])
   180  	re.Len(info.Weights, 3)
   181  	re.EqualValues(3, info.Weights[0])
   182  	re.EqualValues(3, info.Weights[1])
   183  	re.EqualValues(1, info.Weights[2])
   184  	re.Len(info.Spans, 3)
   185  	re.EqualValues(startKeys[2], info.Spans[0].StartKey)
   186  	re.EqualValues(endKeys[4], info.Spans[0].EndKey)
   187  	re.EqualValues(startKeys[5], info.Spans[1].StartKey)
   188  	re.EqualValues(endKeys[7], info.Spans[1].EndKey)
   189  	re.EqualValues(startKeys[8], info.Spans[2].StartKey)
   190  	re.EqualValues(endKeys[8], info.Spans[2].EndKey)
   191  }
   192  
   193  func TestSplitRegionsByWrittenKeysConfig(t *testing.T) {
   194  	t.Parallel()
   195  	re := require.New(t)
   196  
   197  	splitter := newWriteSplitter(model.ChangeFeedID4Test("test", "test"), nil, math.MaxInt)
   198  	regions, startKeys, endKeys := prepareRegionsInfo([7]int{1, 1, 1, 1, 1, 1, 1})
   199  	info := splitter.splitRegionsByWrittenKeysV1(1, regions, 3) // [2,3,4,5,6,7,8]
   200  	re.Len(info.RegionCounts, 1)
   201  	re.EqualValues(7, info.RegionCounts[0], info)
   202  	re.Len(info.Weights, 1)
   203  	re.EqualValues(14, info.Weights[0])
   204  	re.Len(info.Spans, 1)
   205  	re.EqualValues(startKeys[2], info.Spans[0].StartKey)
   206  	re.EqualValues(endKeys[8], info.Spans[0].EndKey)
   207  	re.EqualValues(1, info.Spans[0].TableID)
   208  
   209  	splitter.writeKeyThreshold = 0
   210  	spans := splitter.split(context.Background(), tablepb.Span{}, 3)
   211  	require.Empty(t, spans)
   212  }
   213  
   214  func TestSplitRegionEven(t *testing.T) {
   215  	tblID := model.TableID(1)
   216  	regionCount := 4653 + 1051 + 745 + 9530 + 1
   217  	regions := make([]pdutil.RegionInfo, regionCount)
   218  	for i := 0; i < regionCount; i++ {
   219  		regions[i] = pdutil.RegionInfo{
   220  			ID:          uint64(i),
   221  			StartKey:    "" + strconv.Itoa(i),
   222  			EndKey:      "" + strconv.Itoa(i),
   223  			WrittenKeys: 2,
   224  		}
   225  	}
   226  	splitter := newWriteSplitter(model.ChangeFeedID4Test("test", "test"), nil, 4)
   227  	info := splitter.splitRegionsByWrittenKeysV1(tblID, regions, 5)
   228  	require.Len(t, info.RegionCounts, 5)
   229  	require.Len(t, info.Weights, 5)
   230  	for i, w := range info.Weights {
   231  		if i == 4 {
   232  			require.Equal(t, uint64(9576), w, i)
   233  		} else {
   234  			require.Equal(t, uint64(9591), w, i)
   235  		}
   236  	}
   237  }
   238  
   239  func TestSpanRegionLimitBase(t *testing.T) {
   240  	splitter := newWriteSplitter(model.ChangeFeedID4Test("test", "test"), nil, 0)
   241  	var regions []pdutil.RegionInfo
   242  	// test spanRegionLimit works
   243  	for i := 0; i < spanRegionLimit*6; i++ {
   244  		regions = append(regions, pdutil.NewTestRegionInfo(uint64(i+9), []byte("f"), []byte("f"), 100))
   245  	}
   246  	captureNum := 2
   247  	spanNum := getSpansNumber(len(regions), captureNum)
   248  	info := splitter.splitRegionsByWrittenKeysV1(0, cloneRegions(regions), spanNum)
   249  	require.Len(t, info.RegionCounts, spanNum)
   250  	for _, c := range info.RegionCounts {
   251  		require.LessOrEqual(t, c, int(spanRegionLimit*1.1))
   252  	}
   253  }
   254  
   255  func TestSpanRegionLimit(t *testing.T) {
   256  	// Fisher-Yates shuffle algorithm to shuffle the writtenKeys
   257  	// but keep the first preservationRate% of the writtenKeys in the left side of the list
   258  	// to make the writtenKeys more like a hot region list
   259  	shuffle := func(nums []int, preservationRate float64) []int {
   260  		n := len(nums)
   261  		shuffled := make([]int, n)
   262  		copy(shuffled, nums)
   263  
   264  		for i := n - 1; i > 0; i-- {
   265  			if rand.Float64() < preservationRate {
   266  				continue
   267  			}
   268  			j := rand.Intn(i + 1)
   269  			shuffled[i], shuffled[j] = shuffled[j], shuffled[i]
   270  		}
   271  
   272  		return shuffled
   273  	}
   274  
   275  	// total region number
   276  	totalRegionNumbers := spanRegionLimit * 10
   277  
   278  	// writtenKeys over 20000 percentage
   279  	percentOver20000 := 1
   280  	// writtenKeys between 5000 and 10000 percentage
   281  	percentBetween5000And10000 := 5
   282  
   283  	countOver20000 := (percentOver20000 * totalRegionNumbers) / 100
   284  	countBetween5000And10000 := (percentBetween5000And10000 * totalRegionNumbers) / 100
   285  	countBelow1000 := totalRegionNumbers - countOver20000 - countBetween5000And10000
   286  
   287  	// random generate writtenKeys for each region
   288  	var writtenKeys []int
   289  
   290  	for i := 0; i < countOver20000; i++ {
   291  		number := rand.Intn(80000) + 20001
   292  		writtenKeys = append(writtenKeys, number)
   293  	}
   294  
   295  	for i := 0; i < countBetween5000And10000; i++ {
   296  		number := rand.Intn(5001) + 5000
   297  		writtenKeys = append(writtenKeys, number)
   298  	}
   299  
   300  	for i := 0; i < countBelow1000; i++ {
   301  		number := rand.Intn(1000)
   302  		writtenKeys = append(writtenKeys, number)
   303  	}
   304  
   305  	// 70% hot written region is in the left side of the region list
   306  	writtenKeys = shuffle(writtenKeys, 0.7)
   307  
   308  	splitter := newWriteSplitter(model.ChangeFeedID4Test("test", "test"), nil, 0)
   309  	var regions []pdutil.RegionInfo
   310  	// region number is 500,000
   311  	// weight is random between 0 and 40,000
   312  	for i := 0; i < len(writtenKeys); i++ {
   313  		regions = append(
   314  			regions,
   315  			pdutil.NewTestRegionInfo(uint64(i+9), []byte("f"), []byte("f"), uint64(writtenKeys[i])))
   316  	}
   317  	captureNum := 3
   318  	spanNum := getSpansNumber(len(regions), captureNum)
   319  	info := splitter.splitRegionsByWrittenKeysV1(0, cloneRegions(regions), spanNum)
   320  	require.LessOrEqual(t, spanNum, len(info.RegionCounts))
   321  	for _, c := range info.RegionCounts {
   322  		require.LessOrEqual(t, c, int(spanRegionLimit*1.1))
   323  	}
   324  }