github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/cdc/scheduler/internal/v3/keyspan/splitter_write_test.go (about) 1 // Copyright 2023 PingCAP, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package keyspan 15 16 import ( 17 "context" 18 "encoding/hex" 19 "math" 20 "math/rand" 21 "strconv" 22 "testing" 23 24 "github.com/pingcap/tiflow/cdc/model" 25 "github.com/pingcap/tiflow/cdc/processor/tablepb" 26 "github.com/pingcap/tiflow/pkg/pdutil" 27 "github.com/stretchr/testify/require" 28 ) 29 30 func prepareRegionsInfo(writtenKeys [7]int) ([]pdutil.RegionInfo, map[int][]byte, map[int][]byte) { 31 regions := []pdutil.RegionInfo{ 32 pdutil.NewTestRegionInfo(2, []byte("a"), []byte("b"), uint64(writtenKeys[0])), 33 pdutil.NewTestRegionInfo(3, []byte("b"), []byte("c"), uint64(writtenKeys[1])), 34 pdutil.NewTestRegionInfo(4, []byte("c"), []byte("d"), uint64(writtenKeys[2])), 35 pdutil.NewTestRegionInfo(5, []byte("e"), []byte("f"), uint64(writtenKeys[3])), 36 pdutil.NewTestRegionInfo(6, []byte("f"), []byte("fa"), uint64(writtenKeys[4])), 37 pdutil.NewTestRegionInfo(7, []byte("fa"), []byte("fc"), uint64(writtenKeys[5])), 38 pdutil.NewTestRegionInfo(8, []byte("fc"), []byte("ff"), uint64(writtenKeys[6])), 39 } 40 startKeys := map[int][]byte{} 41 endKeys := map[int][]byte{} 42 for _, r := range regions { 43 b, _ := hex.DecodeString(r.StartKey) 44 startKeys[int(r.ID)] = b 45 } 46 for _, r := range regions { 47 b, _ := hex.DecodeString(r.EndKey) 48 endKeys[int(r.ID)] = b 49 } 50 return regions, startKeys, endKeys 51 } 52 53 func cloneRegions(info []pdutil.RegionInfo) []pdutil.RegionInfo { 54 return append([]pdutil.RegionInfo{}, info...) 55 } 56 57 func TestSplitRegionsByWrittenKeysUniform(t *testing.T) { 58 t.Parallel() 59 re := require.New(t) 60 61 regions, startKeys, endKeys := prepareRegionsInfo( 62 [7]int{100, 100, 100, 100, 100, 100, 100}) // region id: [2,3,4,5,6,7,8] 63 splitter := newWriteSplitter(model.ChangeFeedID4Test("test", "test"), nil, 0) 64 info := splitter.splitRegionsByWrittenKeysV1(0, cloneRegions(regions), 1) 65 re.Len(info.RegionCounts, 1) 66 re.EqualValues(7, info.RegionCounts[0]) 67 re.Len(info.Spans, 1) 68 re.EqualValues(startKeys[2], info.Spans[0].StartKey) 69 re.EqualValues(endKeys[8], info.Spans[0].EndKey) 70 71 info = splitter.splitRegionsByWrittenKeysV1(0, cloneRegions(regions), 2) // [2,3,4,5], [6,7,8] 72 re.Len(info.RegionCounts, 2) 73 re.EqualValues(4, info.RegionCounts[0]) 74 re.EqualValues(3, info.RegionCounts[1]) 75 re.Len(info.Weights, 2) 76 re.EqualValues(404, info.Weights[0]) 77 re.EqualValues(303, info.Weights[1]) 78 re.Len(info.Spans, 2) 79 re.EqualValues(startKeys[2], info.Spans[0].StartKey) 80 re.EqualValues(endKeys[5], info.Spans[0].EndKey) 81 re.EqualValues(startKeys[6], info.Spans[1].StartKey) 82 re.EqualValues(endKeys[8], info.Spans[1].EndKey) 83 84 info = splitter.splitRegionsByWrittenKeysV1(0, cloneRegions(regions), 3) // [2,3,4], [5,6,7], [8] 85 re.Len(info.RegionCounts, 3) 86 re.EqualValues(3, info.RegionCounts[0]) 87 re.EqualValues(3, info.RegionCounts[1]) 88 re.EqualValues(1, info.RegionCounts[2]) 89 re.Len(info.Weights, 3) 90 re.EqualValues(303, info.Weights[0]) 91 re.EqualValues(303, info.Weights[1]) 92 re.EqualValues(101, info.Weights[2]) 93 re.Len(info.Spans, 3) 94 re.EqualValues(startKeys[2], info.Spans[0].StartKey) 95 re.EqualValues(endKeys[4], info.Spans[0].EndKey) 96 re.EqualValues(startKeys[5], info.Spans[1].StartKey) 97 re.EqualValues(endKeys[7], info.Spans[1].EndKey) 98 re.EqualValues(startKeys[8], info.Spans[2].StartKey) 99 re.EqualValues(endKeys[8], info.Spans[2].EndKey) 100 101 // spans > regions 102 for p := 7; p <= 10; p++ { 103 info = splitter.splitRegionsByWrittenKeysV1(0, cloneRegions(regions), p) 104 re.Len(info.RegionCounts, 7) 105 for _, c := range info.RegionCounts { 106 re.EqualValues(1, c) 107 } 108 re.Len(info.Weights, 7) 109 for _, w := range info.Weights { 110 re.EqualValues(101, w, info) 111 } 112 re.Len(info.Spans, 7) 113 for i, r := range info.Spans { 114 re.EqualValues(startKeys[2+i], r.StartKey) 115 re.EqualValues(endKeys[2+i], r.EndKey) 116 } 117 } 118 } 119 120 func TestSplitRegionsByWrittenKeysHotspot1(t *testing.T) { 121 t.Parallel() 122 re := require.New(t) 123 124 // Hotspots 125 regions, startKeys, endKeys := prepareRegionsInfo( 126 [7]int{100, 1, 100, 1, 1, 1, 100}) 127 splitter := newWriteSplitter(model.ChangeFeedID4Test("test", "test"), nil, 4) 128 info := splitter.splitRegionsByWrittenKeysV1(0, regions, 4) // [2], [3,4], [5,6,7], [8] 129 re.Len(info.RegionCounts, 4) 130 re.EqualValues(1, info.RegionCounts[0]) 131 re.EqualValues(2, info.RegionCounts[1]) 132 re.EqualValues(3, info.RegionCounts[2]) 133 re.EqualValues(1, info.RegionCounts[3]) 134 re.Len(info.Weights, 4) 135 re.EqualValues(101, info.Weights[0]) 136 re.EqualValues(103, info.Weights[1]) 137 re.EqualValues(6, info.Weights[2]) 138 re.EqualValues(101, info.Weights[3]) 139 re.Len(info.Spans, 4) 140 re.EqualValues(startKeys[2], info.Spans[0].StartKey) 141 re.EqualValues(endKeys[2], info.Spans[0].EndKey) 142 re.EqualValues(startKeys[3], info.Spans[1].StartKey) 143 re.EqualValues(endKeys[4], info.Spans[1].EndKey) 144 re.EqualValues(startKeys[5], info.Spans[2].StartKey) 145 re.EqualValues(endKeys[7], info.Spans[2].EndKey) 146 re.EqualValues(startKeys[8], info.Spans[3].StartKey) 147 re.EqualValues(endKeys[8], info.Spans[3].EndKey) 148 } 149 150 func TestSplitRegionsByWrittenKeysHotspot2(t *testing.T) { 151 t.Parallel() 152 re := require.New(t) 153 154 // Hotspots 155 regions, startKeys, endKeys := prepareRegionsInfo( 156 [7]int{1000, 1, 1, 1, 100, 1, 99}) 157 splitter := newWriteSplitter(model.ChangeFeedID4Test("test", "test"), nil, 4) 158 info := splitter.splitRegionsByWrittenKeysV1(0, regions, 4) // [2], [3,4,5,6], [7], [8] 159 re.Len(info.Spans, 4) 160 re.EqualValues(startKeys[2], info.Spans[0].StartKey) 161 re.EqualValues(endKeys[2], info.Spans[0].EndKey) 162 re.EqualValues(startKeys[3], info.Spans[1].StartKey) 163 re.EqualValues(endKeys[6], info.Spans[1].EndKey) 164 re.EqualValues(startKeys[7], info.Spans[2].StartKey) 165 re.EqualValues(endKeys[7], info.Spans[2].EndKey) 166 re.EqualValues(startKeys[8], info.Spans[3].StartKey) 167 re.EqualValues(endKeys[8], info.Spans[3].EndKey) 168 } 169 170 func TestSplitRegionsByWrittenKeysCold(t *testing.T) { 171 t.Parallel() 172 re := require.New(t) 173 splitter := newWriteSplitter(model.ChangeFeedID4Test("test", "test"), nil, 0) 174 regions, startKeys, endKeys := prepareRegionsInfo([7]int{}) 175 info := splitter.splitRegionsByWrittenKeysV1(0, regions, 3) // [2,3,4], [5,6,7], [8] 176 re.Len(info.RegionCounts, 3) 177 re.EqualValues(3, info.RegionCounts[0], info) 178 re.EqualValues(3, info.RegionCounts[1]) 179 re.EqualValues(1, info.RegionCounts[2]) 180 re.Len(info.Weights, 3) 181 re.EqualValues(3, info.Weights[0]) 182 re.EqualValues(3, info.Weights[1]) 183 re.EqualValues(1, info.Weights[2]) 184 re.Len(info.Spans, 3) 185 re.EqualValues(startKeys[2], info.Spans[0].StartKey) 186 re.EqualValues(endKeys[4], info.Spans[0].EndKey) 187 re.EqualValues(startKeys[5], info.Spans[1].StartKey) 188 re.EqualValues(endKeys[7], info.Spans[1].EndKey) 189 re.EqualValues(startKeys[8], info.Spans[2].StartKey) 190 re.EqualValues(endKeys[8], info.Spans[2].EndKey) 191 } 192 193 func TestSplitRegionsByWrittenKeysConfig(t *testing.T) { 194 t.Parallel() 195 re := require.New(t) 196 197 splitter := newWriteSplitter(model.ChangeFeedID4Test("test", "test"), nil, math.MaxInt) 198 regions, startKeys, endKeys := prepareRegionsInfo([7]int{1, 1, 1, 1, 1, 1, 1}) 199 info := splitter.splitRegionsByWrittenKeysV1(1, regions, 3) // [2,3,4,5,6,7,8] 200 re.Len(info.RegionCounts, 1) 201 re.EqualValues(7, info.RegionCounts[0], info) 202 re.Len(info.Weights, 1) 203 re.EqualValues(14, info.Weights[0]) 204 re.Len(info.Spans, 1) 205 re.EqualValues(startKeys[2], info.Spans[0].StartKey) 206 re.EqualValues(endKeys[8], info.Spans[0].EndKey) 207 re.EqualValues(1, info.Spans[0].TableID) 208 209 splitter.writeKeyThreshold = 0 210 spans := splitter.split(context.Background(), tablepb.Span{}, 3) 211 require.Empty(t, spans) 212 } 213 214 func TestSplitRegionEven(t *testing.T) { 215 tblID := model.TableID(1) 216 regionCount := 4653 + 1051 + 745 + 9530 + 1 217 regions := make([]pdutil.RegionInfo, regionCount) 218 for i := 0; i < regionCount; i++ { 219 regions[i] = pdutil.RegionInfo{ 220 ID: uint64(i), 221 StartKey: "" + strconv.Itoa(i), 222 EndKey: "" + strconv.Itoa(i), 223 WrittenKeys: 2, 224 } 225 } 226 splitter := newWriteSplitter(model.ChangeFeedID4Test("test", "test"), nil, 4) 227 info := splitter.splitRegionsByWrittenKeysV1(tblID, regions, 5) 228 require.Len(t, info.RegionCounts, 5) 229 require.Len(t, info.Weights, 5) 230 for i, w := range info.Weights { 231 if i == 4 { 232 require.Equal(t, uint64(9576), w, i) 233 } else { 234 require.Equal(t, uint64(9591), w, i) 235 } 236 } 237 } 238 239 func TestSpanRegionLimitBase(t *testing.T) { 240 splitter := newWriteSplitter(model.ChangeFeedID4Test("test", "test"), nil, 0) 241 var regions []pdutil.RegionInfo 242 // test spanRegionLimit works 243 for i := 0; i < spanRegionLimit*6; i++ { 244 regions = append(regions, pdutil.NewTestRegionInfo(uint64(i+9), []byte("f"), []byte("f"), 100)) 245 } 246 captureNum := 2 247 spanNum := getSpansNumber(len(regions), captureNum) 248 info := splitter.splitRegionsByWrittenKeysV1(0, cloneRegions(regions), spanNum) 249 require.Len(t, info.RegionCounts, spanNum) 250 for _, c := range info.RegionCounts { 251 require.LessOrEqual(t, c, int(spanRegionLimit*1.1)) 252 } 253 } 254 255 func TestSpanRegionLimit(t *testing.T) { 256 // Fisher-Yates shuffle algorithm to shuffle the writtenKeys 257 // but keep the first preservationRate% of the writtenKeys in the left side of the list 258 // to make the writtenKeys more like a hot region list 259 shuffle := func(nums []int, preservationRate float64) []int { 260 n := len(nums) 261 shuffled := make([]int, n) 262 copy(shuffled, nums) 263 264 for i := n - 1; i > 0; i-- { 265 if rand.Float64() < preservationRate { 266 continue 267 } 268 j := rand.Intn(i + 1) 269 shuffled[i], shuffled[j] = shuffled[j], shuffled[i] 270 } 271 272 return shuffled 273 } 274 275 // total region number 276 totalRegionNumbers := spanRegionLimit * 10 277 278 // writtenKeys over 20000 percentage 279 percentOver20000 := 1 280 // writtenKeys between 5000 and 10000 percentage 281 percentBetween5000And10000 := 5 282 283 countOver20000 := (percentOver20000 * totalRegionNumbers) / 100 284 countBetween5000And10000 := (percentBetween5000And10000 * totalRegionNumbers) / 100 285 countBelow1000 := totalRegionNumbers - countOver20000 - countBetween5000And10000 286 287 // random generate writtenKeys for each region 288 var writtenKeys []int 289 290 for i := 0; i < countOver20000; i++ { 291 number := rand.Intn(80000) + 20001 292 writtenKeys = append(writtenKeys, number) 293 } 294 295 for i := 0; i < countBetween5000And10000; i++ { 296 number := rand.Intn(5001) + 5000 297 writtenKeys = append(writtenKeys, number) 298 } 299 300 for i := 0; i < countBelow1000; i++ { 301 number := rand.Intn(1000) 302 writtenKeys = append(writtenKeys, number) 303 } 304 305 // 70% hot written region is in the left side of the region list 306 writtenKeys = shuffle(writtenKeys, 0.7) 307 308 splitter := newWriteSplitter(model.ChangeFeedID4Test("test", "test"), nil, 0) 309 var regions []pdutil.RegionInfo 310 // region number is 500,000 311 // weight is random between 0 and 40,000 312 for i := 0; i < len(writtenKeys); i++ { 313 regions = append( 314 regions, 315 pdutil.NewTestRegionInfo(uint64(i+9), []byte("f"), []byte("f"), uint64(writtenKeys[i]))) 316 } 317 captureNum := 3 318 spanNum := getSpansNumber(len(regions), captureNum) 319 info := splitter.splitRegionsByWrittenKeysV1(0, cloneRegions(regions), spanNum) 320 require.LessOrEqual(t, spanNum, len(info.RegionCounts)) 321 for _, c := range info.RegionCounts { 322 require.LessOrEqual(t, c, int(spanRegionLimit*1.1)) 323 } 324 }