github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/cdc/scheduler/internal/v3/keyspan/reconciler.go (about) 1 // Copyright 2022 PingCAP, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package keyspan 15 16 import ( 17 "context" 18 19 "github.com/pingcap/log" 20 "github.com/pingcap/tiflow/cdc/model" 21 "github.com/pingcap/tiflow/cdc/processor/tablepb" 22 "github.com/pingcap/tiflow/cdc/scheduler/internal/v3/compat" 23 "github.com/pingcap/tiflow/cdc/scheduler/internal/v3/member" 24 "github.com/pingcap/tiflow/cdc/scheduler/internal/v3/replication" 25 "github.com/pingcap/tiflow/pkg/config" 26 "github.com/pingcap/tiflow/pkg/errors" 27 "github.com/pingcap/tiflow/pkg/pdutil" 28 "github.com/pingcap/tiflow/pkg/spanz" 29 "github.com/pingcap/tiflow/pkg/upstream" 30 "go.uber.org/zap" 31 ) 32 33 const ( 34 // spanRegionLimit is the maximum number of regions a span can cover. 35 spanRegionLimit = 50000 36 // baseSpanNumberCoefficient is the base coefficient that use to 37 // multiply the number of captures to get the number of spans. 38 baseSpanNumberCoefficient = 3 39 ) 40 41 type splitter interface { 42 split( 43 ctx context.Context, span tablepb.Span, totalCaptures int, 44 ) []tablepb.Span 45 } 46 47 type splittedSpans struct { 48 byAddTable bool 49 spans []tablepb.Span 50 } 51 52 // Reconciler reconciles span and table mapping, make sure spans are in 53 // a desired state and covers all table ranges. 54 type Reconciler struct { 55 tableSpans map[model.TableID]splittedSpans 56 spanCache []tablepb.Span 57 58 changefeedID model.ChangeFeedID 59 config *config.ChangefeedSchedulerConfig 60 61 splitter []splitter 62 } 63 64 // NewReconciler returns a Reconciler. 65 func NewReconciler( 66 changefeedID model.ChangeFeedID, 67 up *upstream.Upstream, 68 config *config.ChangefeedSchedulerConfig, 69 ) (*Reconciler, error) { 70 pdapi, err := pdutil.NewPDAPIClient(up.PDClient, up.SecurityConfig) 71 if err != nil { 72 return nil, errors.Trace(err) 73 } 74 return &Reconciler{ 75 tableSpans: make(map[int64]splittedSpans), 76 changefeedID: changefeedID, 77 config: config, 78 splitter: []splitter{ 79 // write splitter has the highest priority. 80 newWriteSplitter(changefeedID, pdapi, config.WriteKeyThreshold), 81 newRegionCountSplitter(changefeedID, up.RegionCache, config.RegionThreshold), 82 }, 83 }, nil 84 } 85 86 // Reconcile spans that need to be replicated based on current cluster status. 87 // It handles following cases: 88 // 1. Changefeed initialization 89 // 2. Owner switch. 90 // 3. Owner switch after some captures fail. 91 // 4. Add table by DDL. 92 // 5. Drop table by DDL. 93 // 6. Some captures fail, does NOT affect spans. 94 func (m *Reconciler) Reconcile( 95 ctx context.Context, 96 currentTables *replication.TableRanges, 97 replications *spanz.BtreeMap[*replication.ReplicationSet], 98 aliveCaptures map[model.CaptureID]*member.CaptureStatus, 99 compat *compat.Compat, 100 ) []tablepb.Span { 101 tablesLenEqual := currentTables.Len() == len(m.tableSpans) 102 allTablesFound := true 103 updateCache := false 104 currentTables.Iter(func(tableID model.TableID, tableStart, tableEnd tablepb.Span) bool { 105 if _, ok := m.tableSpans[tableID]; !ok { 106 // Find a new table. 107 allTablesFound = false 108 updateCache = true 109 } 110 111 // Reconcile spans from current replications. 112 coveredSpans, holes := replications.FindHoles(tableStart, tableEnd) 113 if len(coveredSpans) == 0 { 114 // No such spans in replications. 115 if _, ok := m.tableSpans[tableID]; ok { 116 // And we have seen such spans before, it means these spans are 117 // not yet be scheduled due to basic scheduler's batch add task 118 // rate limit. 119 return true 120 } 121 // And we have not seen such spans before, maybe: 122 // 1. it's a table being added when starting a changefeed 123 // or after owner switch. 124 // 4. it's a new table being created by DDL when a changefeed is running. 125 tableSpan := spanz.TableIDToComparableSpan(tableID) 126 spans := []tablepb.Span{tableSpan} 127 if compat.CheckSpanReplicationEnabled() { 128 for _, splitter := range m.splitter { 129 spans = splitter.split(ctx, tableSpan, len(aliveCaptures)) 130 if len(spans) > 1 { 131 break 132 } 133 } 134 } 135 m.tableSpans[tableID] = splittedSpans{ 136 byAddTable: true, 137 spans: spans, 138 } 139 updateCache = true 140 } else if len(holes) != 0 { 141 // There are some holes in the table span, maybe: 142 if spans, ok := m.tableSpans[tableID]; ok && spans.byAddTable { 143 // These spans are split by reconciler add table. It may be 144 // still in progress because of basic scheduler rate limit. 145 return true 146 } 147 // 3. owner switch after some captures failed. 148 log.Info("schedulerv3: detect owner switch after captures fail", 149 zap.String("changefeed", m.changefeedID.ID), 150 zap.String("namespace", m.changefeedID.Namespace), 151 zap.Int64("tableID", tableID), 152 zap.Int("holes", len(holes)), 153 zap.String("spanStart", tableStart.String()), 154 zap.String("spanEnd", tableEnd.String()), 155 zap.String("foundStart", coveredSpans[0].String()), 156 zap.String("foundEnd", coveredSpans[len(coveredSpans)-1].String())) 157 spans := make([]tablepb.Span, 0, len(coveredSpans)+len(holes)) 158 spans = append(spans, coveredSpans...) 159 for _, s := range holes { 160 spans = append(spans, tablepb.Span{ 161 TableID: tableID, 162 StartKey: s.StartKey, 163 EndKey: s.EndKey, 164 }) 165 // TODO: maybe we should split holes too. 166 } 167 m.tableSpans[tableID] = splittedSpans{ 168 byAddTable: false, 169 spans: spans, 170 } 171 updateCache = true 172 } else { 173 // Found and no hole, maybe: 174 // 2. owner switch and no capture fails. 175 ss := m.tableSpans[tableID] 176 ss.byAddTable = false 177 ss.spans = ss.spans[:0] 178 ss.spans = append(ss.spans, coveredSpans...) 179 m.tableSpans[tableID] = ss 180 } 181 return true 182 }) 183 184 // 4. Drop table by DDL. 185 // For most of the time, remove tables are unlikely to happen. 186 // 187 // Fast path for check whether two sets are identical: 188 // If the length of currentTables and tableSpan are equal, 189 // and for all tables in currentTables have a record in tableSpan. 190 if !tablesLenEqual || !allTablesFound { 191 // The two sets are not identical. We need to find removed tables. 192 // Build a tableID hash set to improve performance. 193 currentTableSet := make(map[model.TableID]struct{}, currentTables.Len()) 194 currentTables.Iter(func(tableID model.TableID, _, _ tablepb.Span) bool { 195 currentTableSet[tableID] = struct{}{} 196 return true 197 }) 198 for tableID := range m.tableSpans { 199 _, ok := currentTableSet[tableID] 200 if !ok { 201 // Found dropped table. 202 delete(m.tableSpans, tableID) 203 updateCache = true 204 } 205 } 206 } 207 208 if updateCache { 209 m.spanCache = make([]tablepb.Span, 0) 210 for _, ss := range m.tableSpans { 211 m.spanCache = append(m.spanCache, ss.spans...) 212 } 213 } 214 return m.spanCache 215 } 216 217 const maxSpanNumber = 100 218 219 func getSpansNumber(regionNum, captureNum int) int { 220 coefficient := captureNum - 1 221 if baseSpanNumberCoefficient > coefficient { 222 coefficient = baseSpanNumberCoefficient 223 } 224 spanNum := 1 225 if regionNum > 1 { 226 // spanNumber = max(captureNum * coefficient, totalRegions / spanRegionLimit) 227 spanNum = captureNum * coefficient 228 if regionNum/spanRegionLimit > spanNum { 229 spanNum = regionNum / spanRegionLimit 230 } 231 } 232 if spanNum > maxSpanNumber { 233 spanNum = maxSpanNumber 234 } 235 return spanNum 236 }