github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/cdc/scheduler/internal/v3/keyspan/splitter_region_count.go (about) 1 // Copyright 2023 PingCAP, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package keyspan 15 16 import ( 17 "bytes" 18 "context" 19 "math" 20 21 "github.com/pingcap/log" 22 "github.com/pingcap/tiflow/cdc/model" 23 "github.com/pingcap/tiflow/cdc/processor/tablepb" 24 "github.com/tikv/client-go/v2/tikv" 25 "go.uber.org/zap" 26 ) 27 28 type regionCountSplitter struct { 29 changefeedID model.ChangeFeedID 30 regionCache RegionCache 31 regionThreshold int 32 } 33 34 func newRegionCountSplitter( 35 changefeedID model.ChangeFeedID, regionCache RegionCache, regionThreshold int, 36 ) *regionCountSplitter { 37 return ®ionCountSplitter{ 38 changefeedID: changefeedID, 39 regionCache: regionCache, 40 regionThreshold: regionThreshold, 41 } 42 } 43 44 func (m *regionCountSplitter) split( 45 ctx context.Context, span tablepb.Span, captureNum int, 46 ) []tablepb.Span { 47 bo := tikv.NewBackoffer(ctx, 500) 48 regions, err := m.regionCache.ListRegionIDsInKeyRange(bo, span.StartKey, span.EndKey) 49 if err != nil { 50 log.Warn("schedulerv3: list regions failed, skip split span", 51 zap.String("namespace", m.changefeedID.Namespace), 52 zap.String("changefeed", m.changefeedID.ID), 53 zap.String("span", span.String()), 54 zap.Error(err)) 55 return []tablepb.Span{span} 56 } 57 if len(regions) <= m.regionThreshold || captureNum == 0 { 58 log.Info("schedulerv3: skip split span by region count", 59 zap.String("namespace", m.changefeedID.Namespace), 60 zap.String("changefeed", m.changefeedID.ID), 61 zap.String("span", span.String()), 62 zap.Int("totalCaptures", captureNum), 63 zap.Int("regionCount", len(regions)), 64 zap.Int("regionThreshold", m.regionThreshold)) 65 return []tablepb.Span{span} 66 } 67 68 stepper := newEvenlySplitStepper( 69 getSpansNumber(len(regions), captureNum), 70 len(regions)) 71 72 spans := make([]tablepb.Span, 0, stepper.SpanCount()) 73 start, end := 0, stepper.Step() 74 for { 75 startRegion, err := m.regionCache.LocateRegionByID(bo, regions[start]) 76 if err != nil { 77 log.Warn("schedulerv3: get regions failed, skip split span", 78 zap.String("namespace", m.changefeedID.Namespace), 79 zap.String("changefeed", m.changefeedID.ID), 80 zap.String("span", span.String()), 81 zap.Error(err)) 82 return []tablepb.Span{span} 83 } 84 endRegion, err := m.regionCache.LocateRegionByID(bo, regions[end-1]) 85 if err != nil { 86 log.Warn("schedulerv3: get regions failed, skip split span", 87 zap.String("namespace", m.changefeedID.Namespace), 88 zap.String("changefeed", m.changefeedID.ID), 89 zap.String("span", span.String()), 90 zap.Error(err)) 91 return []tablepb.Span{span} 92 } 93 if len(spans) > 0 && 94 bytes.Compare(spans[len(spans)-1].EndKey, startRegion.StartKey) > 0 { 95 log.Warn("schedulerv3: list region out of order detected", 96 zap.String("namespace", m.changefeedID.Namespace), 97 zap.String("changefeed", m.changefeedID.ID), 98 zap.String("span", span.String()), 99 zap.Stringer("lastSpan", &spans[len(spans)-1]), 100 zap.Stringer("region", startRegion)) 101 return []tablepb.Span{span} 102 } 103 spans = append(spans, tablepb.Span{ 104 TableID: span.TableID, 105 StartKey: startRegion.StartKey, 106 EndKey: endRegion.EndKey, 107 }) 108 109 if end == len(regions) { 110 break 111 } 112 start = end 113 step := stepper.Step() 114 if end+step < len(regions) { 115 end = end + step 116 } else { 117 end = len(regions) 118 } 119 } 120 // Make sure spans does not exceed [startKey, endKey). 121 spans[0].StartKey = span.StartKey 122 spans[len(spans)-1].EndKey = span.EndKey 123 log.Info("schedulerv3: split span by region count", 124 zap.String("namespace", m.changefeedID.Namespace), 125 zap.String("changefeed", m.changefeedID.ID), 126 zap.String("span", span.String()), 127 zap.Int("spans", len(spans)), 128 zap.Int("totalCaptures", captureNum), 129 zap.Int("regionCount", len(regions)), 130 zap.Int("regionThreshold", m.regionThreshold), 131 zap.Int("spanRegionLimit", spanRegionLimit)) 132 return spans 133 } 134 135 type evenlySplitStepper struct { 136 spanCount int 137 regionPerSpan int 138 extraRegionPerSpan int 139 remain int 140 } 141 142 func newEvenlySplitStepper(pages int, totalRegion int) evenlySplitStepper { 143 extraRegionPerSpan := 0 144 regionPerSpan, remain := totalRegion/pages, totalRegion%pages 145 if regionPerSpan == 0 { 146 regionPerSpan = 1 147 extraRegionPerSpan = 0 148 pages = totalRegion 149 } else if remain != 0 { 150 // Evenly distributes the remaining regions. 151 extraRegionPerSpan = int(math.Ceil(float64(remain) / float64(pages))) 152 } 153 res := evenlySplitStepper{ 154 regionPerSpan: regionPerSpan, 155 spanCount: pages, 156 extraRegionPerSpan: extraRegionPerSpan, 157 remain: remain, 158 } 159 log.Info("schedulerv3: evenly split stepper", zap.Any("evenlySplitStepper", res)) 160 return res 161 } 162 163 func (e *evenlySplitStepper) SpanCount() int { 164 return e.spanCount 165 } 166 167 func (e *evenlySplitStepper) Step() int { 168 if e.remain <= 0 { 169 return e.regionPerSpan 170 } 171 e.remain = e.remain - e.extraRegionPerSpan 172 return e.regionPerSpan + e.extraRegionPerSpan 173 }