github.com/pingcap/ticdc@v0.0.0-20220526033649-485a10ef2652/cdc/owner/scheduler.go (about) 1 // Copyright 2021 PingCAP, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package owner 15 16 import ( 17 "math" 18 19 "github.com/pingcap/errors" 20 "github.com/pingcap/failpoint" 21 "github.com/pingcap/log" 22 "github.com/pingcap/ticdc/cdc/model" 23 cerror "github.com/pingcap/ticdc/pkg/errors" 24 "go.uber.org/zap" 25 ) 26 27 type schedulerJobType string 28 29 const ( 30 schedulerJobTypeAddTable schedulerJobType = "ADD" 31 schedulerJobTypeRemoveTable schedulerJobType = "REMOVE" 32 ) 33 34 type schedulerJob struct { 35 Tp schedulerJobType 36 TableID model.TableID 37 // if the operation is a delete operation, boundaryTs is checkpoint ts 38 // if the operation is a add operation, boundaryTs is start ts 39 BoundaryTs uint64 40 TargetCapture model.CaptureID 41 } 42 43 type moveTableJob struct { 44 tableID model.TableID 45 target model.CaptureID 46 } 47 48 type scheduler struct { 49 state *model.ChangefeedReactorState 50 currentTables []model.TableID 51 captures map[model.CaptureID]*model.CaptureInfo 52 53 moveTableTargets map[model.TableID]model.CaptureID 54 moveTableJobQueue []*moveTableJob 55 needRebalanceNextTick bool 56 lastTickCaptureCount int 57 } 58 59 func newScheduler() *scheduler { 60 return &scheduler{ 61 moveTableTargets: make(map[model.TableID]model.CaptureID), 62 } 63 } 64 65 // Tick is the main function of scheduler. It dispatches tables to captures and handles move-table and rebalance events. 66 // Tick returns a bool representing whether the changefeed's state can be updated in this tick. 67 // The state can be updated only if all the tables which should be listened to have been dispatched to captures and no operations have been sent to captures in this tick. 68 func (s *scheduler) Tick(state *model.ChangefeedReactorState, currentTables []model.TableID, captures map[model.CaptureID]*model.CaptureInfo) (shouldUpdateState bool, err error) { 69 s.state = state 70 s.currentTables = currentTables 71 s.captures = captures 72 73 s.cleanUpFinishedOperations() 74 pendingJob, err := s.syncTablesWithCurrentTables() 75 if err != nil { 76 return false, errors.Trace(err) 77 } 78 s.dispatchToTargetCaptures(pendingJob) 79 if len(pendingJob) != 0 { 80 log.Debug("scheduler:generated pending job to be executed", zap.Any("pendingJob", pendingJob)) 81 } 82 s.handleJobs(pendingJob) 83 84 // only if the pending job list is empty and no table is being rebalanced or moved, 85 // can the global resolved ts and checkpoint ts be updated 86 shouldUpdateState = len(pendingJob) == 0 87 shouldUpdateState = s.rebalance() && shouldUpdateState 88 shouldUpdateStateInMoveTable, err := s.handleMoveTableJob() 89 if err != nil { 90 return false, errors.Trace(err) 91 } 92 shouldUpdateState = shouldUpdateStateInMoveTable && shouldUpdateState 93 s.lastTickCaptureCount = len(captures) 94 return shouldUpdateState, nil 95 } 96 97 func (s *scheduler) MoveTable(tableID model.TableID, target model.CaptureID) { 98 s.moveTableJobQueue = append(s.moveTableJobQueue, &moveTableJob{ 99 tableID: tableID, 100 target: target, 101 }) 102 } 103 104 // handleMoveTableJob handles the move table job add be MoveTable function 105 func (s *scheduler) handleMoveTableJob() (shouldUpdateState bool, err error) { 106 shouldUpdateState = true 107 if len(s.moveTableJobQueue) == 0 { 108 return 109 } 110 table2CaptureIndex, err := s.table2CaptureIndex() 111 if err != nil { 112 return false, errors.Trace(err) 113 } 114 for _, job := range s.moveTableJobQueue { 115 source, exist := table2CaptureIndex[job.tableID] 116 if !exist { 117 return 118 } 119 s.moveTableTargets[job.tableID] = job.target 120 job := job 121 shouldUpdateState = false 122 // for all move table job, here just remove the table from the source capture. 123 // and the table removed by this function will be added to target capture by syncTablesWithCurrentTables in the next tick. 124 s.state.PatchTaskStatus(source, func(status *model.TaskStatus) (*model.TaskStatus, bool, error) { 125 if status == nil { 126 // the capture may be down, just skip remove this table 127 return status, false, nil 128 } 129 if status.Operation != nil && status.Operation[job.tableID] != nil { 130 // skip removing this table to avoid the remove operation created by the rebalance function interfering with the operation created by another function 131 return status, false, nil 132 } 133 status.RemoveTable(job.tableID, s.state.Status.CheckpointTs, false) 134 return status, true, nil 135 }) 136 } 137 s.moveTableJobQueue = nil 138 return 139 } 140 141 func (s *scheduler) Rebalance() { 142 s.needRebalanceNextTick = true 143 } 144 145 func (s *scheduler) table2CaptureIndex() (map[model.TableID]model.CaptureID, error) { 146 table2CaptureIndex := make(map[model.TableID]model.CaptureID) 147 for captureID, taskStatus := range s.state.TaskStatuses { 148 for tableID := range taskStatus.Tables { 149 if preCaptureID, exist := table2CaptureIndex[tableID]; exist && preCaptureID != captureID { 150 return nil, cerror.ErrTableListenReplicated.GenWithStackByArgs(tableID, preCaptureID, captureID) 151 } 152 table2CaptureIndex[tableID] = captureID 153 } 154 for tableID := range taskStatus.Operation { 155 if preCaptureID, exist := table2CaptureIndex[tableID]; exist && preCaptureID != captureID { 156 return nil, cerror.ErrTableListenReplicated.GenWithStackByArgs(tableID, preCaptureID, captureID) 157 } 158 table2CaptureIndex[tableID] = captureID 159 } 160 } 161 return table2CaptureIndex, nil 162 } 163 164 // dispatchToTargetCaptures sets the the TargetCapture of scheduler jobs 165 // If the TargetCapture of a job is not set, it chooses a capture with the minimum workload and sets the TargetCapture to the capture. 166 func (s *scheduler) dispatchToTargetCaptures(pendingJobs []*schedulerJob) { 167 workloads := make(map[model.CaptureID]uint64) 168 169 for captureID := range s.captures { 170 workloads[captureID] = 0 171 taskWorkload := s.state.Workloads[captureID] 172 if taskWorkload == nil { 173 continue 174 } 175 for _, workload := range taskWorkload { 176 workloads[captureID] += workload.Workload 177 } 178 } 179 180 for _, pendingJob := range pendingJobs { 181 if pendingJob.TargetCapture == "" { 182 target, exist := s.moveTableTargets[pendingJob.TableID] 183 if !exist { 184 continue 185 } 186 pendingJob.TargetCapture = target 187 delete(s.moveTableTargets, pendingJob.TableID) 188 continue 189 } 190 switch pendingJob.Tp { 191 case schedulerJobTypeAddTable: 192 workloads[pendingJob.TargetCapture] += 1 193 case schedulerJobTypeRemoveTable: 194 workloads[pendingJob.TargetCapture] -= 1 195 default: 196 log.Panic("Unreachable, please report a bug", 197 zap.String("changefeed", s.state.ID), zap.Any("job", pendingJob)) 198 } 199 } 200 201 getMinWorkloadCapture := func() model.CaptureID { 202 minCapture := "" 203 minWorkLoad := uint64(math.MaxUint64) 204 for captureID, workload := range workloads { 205 if workload < minWorkLoad { 206 minCapture = captureID 207 minWorkLoad = workload 208 } 209 } 210 211 if minCapture == "" { 212 log.Panic("Unreachable, no capture is found") 213 } 214 return minCapture 215 } 216 217 for _, pendingJob := range pendingJobs { 218 if pendingJob.TargetCapture != "" { 219 continue 220 } 221 minCapture := getMinWorkloadCapture() 222 pendingJob.TargetCapture = minCapture 223 workloads[minCapture] += 1 224 } 225 } 226 227 // syncTablesWithCurrentTables iterates all current tables to check whether it should be listened or not. 228 // this function will return schedulerJob to make sure all tables will be listened. 229 func (s *scheduler) syncTablesWithCurrentTables() ([]*schedulerJob, error) { 230 var pendingJob []*schedulerJob 231 allTableListeningNow, err := s.table2CaptureIndex() 232 if err != nil { 233 return nil, errors.Trace(err) 234 } 235 globalCheckpointTs := s.state.Status.CheckpointTs 236 for _, tableID := range s.currentTables { 237 if _, exist := allTableListeningNow[tableID]; exist { 238 delete(allTableListeningNow, tableID) 239 continue 240 } 241 // For each table which should be listened but is not, add an adding-table job to the pending job list 242 pendingJob = append(pendingJob, &schedulerJob{ 243 Tp: schedulerJobTypeAddTable, 244 TableID: tableID, 245 BoundaryTs: globalCheckpointTs, 246 }) 247 } 248 // The remaining tables are the tables which should be not listened 249 tablesThatShouldNotBeListened := allTableListeningNow 250 for tableID, captureID := range tablesThatShouldNotBeListened { 251 opts := s.state.TaskStatuses[captureID].Operation 252 if opts != nil && opts[tableID] != nil && opts[tableID].Delete { 253 // the table is being removed, skip 254 continue 255 } 256 pendingJob = append(pendingJob, &schedulerJob{ 257 Tp: schedulerJobTypeRemoveTable, 258 TableID: tableID, 259 BoundaryTs: globalCheckpointTs, 260 TargetCapture: captureID, 261 }) 262 } 263 return pendingJob, nil 264 } 265 266 func (s *scheduler) handleJobs(jobs []*schedulerJob) { 267 for _, job := range jobs { 268 job := job 269 s.state.PatchTaskStatus(job.TargetCapture, func(status *model.TaskStatus) (*model.TaskStatus, bool, error) { 270 switch job.Tp { 271 case schedulerJobTypeAddTable: 272 if status == nil { 273 // if task status is not found, we can just skip adding the adding-table operation, since this table will be added in the next tick 274 log.Warn("task status of the capture is not found, may be the capture is already down. specify a new capture and redo the job", zap.Any("job", job)) 275 return status, false, nil 276 } 277 status.AddTable(job.TableID, &model.TableReplicaInfo{ 278 StartTs: job.BoundaryTs, 279 MarkTableID: 0, // mark table ID will be set in processors 280 }, job.BoundaryTs) 281 case schedulerJobTypeRemoveTable: 282 failpoint.Inject("OwnerRemoveTableError", func() { 283 // just skip removing this table 284 failpoint.Return(status, false, nil) 285 }) 286 if status == nil { 287 log.Warn("Task status of the capture is not found. Maybe the capture is already down. Specify a new capture and redo the job", zap.Any("job", job)) 288 return status, false, nil 289 } 290 status.RemoveTable(job.TableID, job.BoundaryTs, false) 291 default: 292 log.Panic("Unreachable, please report a bug", zap.Any("job", job)) 293 } 294 return status, true, nil 295 }) 296 } 297 } 298 299 // cleanUpFinishedOperations clean up the finished operations. 300 func (s *scheduler) cleanUpFinishedOperations() { 301 for captureID := range s.state.TaskStatuses { 302 s.state.PatchTaskStatus(captureID, func(status *model.TaskStatus) (*model.TaskStatus, bool, error) { 303 changed := false 304 for tableID, operation := range status.Operation { 305 if operation.Status == model.OperFinished { 306 delete(status.Operation, tableID) 307 changed = true 308 } 309 } 310 return status, changed, nil 311 }) 312 } 313 } 314 315 func (s *scheduler) rebalance() (shouldUpdateState bool) { 316 if !s.shouldRebalance() { 317 // if no table is rebalanced, we can update the resolved ts and checkpoint ts 318 return true 319 } 320 // we only support rebalance by table number for now 321 return s.rebalanceByTableNum() 322 } 323 324 func (s *scheduler) shouldRebalance() bool { 325 if s.needRebalanceNextTick { 326 s.needRebalanceNextTick = false 327 return true 328 } 329 if s.lastTickCaptureCount != len(s.captures) { 330 // a new capture online and no table distributed to the capture 331 // or some captures offline 332 return true 333 } 334 // TODO periodic trigger rebalance 335 return false 336 } 337 338 // rebalanceByTableNum removes tables from captures replicating an above-average number of tables. 339 // the removed table will be dispatched again by syncTablesWithCurrentTables function 340 func (s *scheduler) rebalanceByTableNum() (shouldUpdateState bool) { 341 totalTableNum := len(s.currentTables) 342 captureNum := len(s.captures) 343 upperLimitPerCapture := int(math.Ceil(float64(totalTableNum) / float64(captureNum))) 344 shouldUpdateState = true 345 346 log.Info("Start rebalancing", 347 zap.String("changefeed", s.state.ID), 348 zap.Int("table-num", totalTableNum), 349 zap.Int("capture-num", captureNum), 350 zap.Int("target-limit", upperLimitPerCapture)) 351 352 for captureID, taskStatus := range s.state.TaskStatuses { 353 tableNum2Remove := len(taskStatus.Tables) - upperLimitPerCapture 354 if tableNum2Remove <= 0 { 355 continue 356 } 357 358 // here we pick `tableNum2Remove` tables to delete, 359 // and then the removed tables will be dispatched by `syncTablesWithCurrentTables` function in the next tick 360 for tableID := range taskStatus.Tables { 361 tableID := tableID 362 if tableNum2Remove <= 0 { 363 break 364 } 365 shouldUpdateState = false 366 s.state.PatchTaskStatus(captureID, func(status *model.TaskStatus) (*model.TaskStatus, bool, error) { 367 if status == nil { 368 // the capture may be down, just skip remove this table 369 return status, false, nil 370 } 371 if status.Operation != nil && status.Operation[tableID] != nil { 372 // skip remove this table to avoid the remove operation created by rebalance function to influence the operation created by other function 373 return status, false, nil 374 } 375 status.RemoveTable(tableID, s.state.Status.CheckpointTs, false) 376 log.Info("Rebalance: Move table", 377 zap.Int64("table-id", tableID), 378 zap.String("capture", captureID), 379 zap.String("changefeed-id", s.state.ID)) 380 return status, true, nil 381 }) 382 tableNum2Remove-- 383 } 384 } 385 return 386 }