github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/dm/master/scheduler/scheduler.go (about) 1 // Copyright 2020 PingCAP, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package scheduler 15 16 import ( 17 "context" 18 "sort" 19 "sync" 20 "time" 21 22 "github.com/pingcap/errors" 23 "github.com/pingcap/failpoint" 24 "github.com/pingcap/tiflow/dm/config" 25 "github.com/pingcap/tiflow/dm/config/dbconfig" 26 "github.com/pingcap/tiflow/dm/config/security" 27 "github.com/pingcap/tiflow/dm/master/metrics" 28 "github.com/pingcap/tiflow/dm/master/workerrpc" 29 "github.com/pingcap/tiflow/dm/pb" 30 "github.com/pingcap/tiflow/dm/pkg/etcdutil" 31 "github.com/pingcap/tiflow/dm/pkg/ha" 32 "github.com/pingcap/tiflow/dm/pkg/log" 33 "github.com/pingcap/tiflow/dm/pkg/terror" 34 "github.com/pingcap/tiflow/dm/pkg/utils" 35 clientv3 "go.etcd.io/etcd/client/v3" 36 "go.uber.org/atomic" 37 "go.uber.org/zap" 38 ) 39 40 const ( 41 maxQueryWorkerRetryNum = 10 42 ) 43 44 // Scheduler schedules tasks for DM-worker instances, including: 45 // - register/unregister DM-worker instances. 46 // - observe the online/offline status of DM-worker instances. 47 // - observe add/remove operations for upstream sources' config. 48 // - schedule upstream sources to DM-worker instances. 49 // - schedule data migration subtask operations. 50 // - holds agents of DM-worker instances. 51 // NOTE: the DM-master server MUST wait for this scheduler become started before handling client requests. 52 // Cases trigger a source-to-worker bound try: 53 // - a worker from Offline to Free: 54 // - receive keep-alive. 55 // 56 // - a worker from Bound to Free: 57 // - trigger by unbound: `a source removed`. 58 // 59 // - a new source added: 60 // - add source request from user. 61 // 62 // - a source unbound from another worker: 63 // - trigger by unbound: `a worker from Bound to Offline`. 64 // - TODO(csuzhangxc): design a strategy to ensure the old worker already shutdown its work. 65 // 66 // Cases trigger a source-to-worker unbound try. 67 // - a worker from Bound to Offline: 68 // - lost keep-alive. 69 // 70 // - a source removed: 71 // - remove source request from user. 72 // 73 // TODO: try to handle the return `err` of etcd operations, 74 // 75 // because may put into etcd, but the response to the etcd client interrupted. 76 // 77 // Relay scheduling: 78 // - scheduled by source 79 // DM-worker will enable relay according to its bound source, in current implementation, it will read `enable-relay` 80 // of source config and decide whether to enable relay. 81 // turn on `enable-relay`: 82 // - use `enable-relay: true` when create source 83 // - `start-relay -s source` to dynamically change `enable-relay` 84 // turn off `enable-relay`: 85 // - use `enable-relay: false` when create source 86 // - `stop-relay -s source` to dynamically change `enable-relay` 87 // - found conflict schedule type with (source, worker) when scheduler bootstrap 88 // - scheduled by (source, worker) 89 // DM-worker will check if relay is assigned to it no matter it's bound or not. In current implementation, it will 90 // read UpstreamRelayWorkerKeyAdapter in etcd. 91 // add UpstreamRelayWorkerKeyAdapter: 92 // - use `start-relay -s source -w worker` 93 // remove UpstreamRelayWorkerKeyAdapter: 94 // - use `stop-relay -s source -w worker` 95 // - remove worker by `offline-member` 96 type Scheduler struct { 97 mu sync.RWMutex 98 99 logger log.Logger 100 101 started atomic.Bool // whether the scheduler already started for work. 102 cancel context.CancelFunc 103 wg sync.WaitGroup 104 105 etcdCli *clientv3.Client 106 107 // must acquire latch from subtaskLatch before accessing subTaskCfgs and expectSubTaskStages, 108 // the latch key is task name. 109 // TODO: also sourceLatch, relayLatch 110 subtaskLatch *latches 111 112 // all source configs, source ID -> source config. 113 // add: 114 // - add source by user request (calling `AddSourceCfg`). 115 // - recover from etcd (calling `recoverSources`). 116 // delete: 117 // - remove source by user request (calling `RemoveSourceCfg`). 118 sourceCfgs map[string]*config.SourceConfig 119 120 // all subtask configs, task name -> source ID -> subtask config. 121 // add: 122 // - add/start subtask by user request (calling `AddSubTasks`). 123 // - recover from etcd (calling `recoverSubTasks`). 124 // delete: 125 // - remove/stop subtask by user request (calling `RemoveSubTasks`). 126 subTaskCfgs sync.Map 127 128 // all DM-workers, worker name -> worker. 129 // add: 130 // - add worker by user request (calling `AddWorker`). 131 // - recover from etcd (calling `recoverWorkersBounds`). 132 // delete: 133 // - remove worker by user request (calling `RemoveWorker`). 134 workers map[string]*Worker 135 136 // all bound relationship, source ID -> worker. 137 // add: 138 // - when bind a source to a worker, in updateStatusToBound 139 // delete: 140 // - when unbind a source from a worker, in updateStatusToUnbound 141 // see `Cases trigger a source-to-worker bound try` above. 142 bounds map[string]*Worker 143 144 // unbound (pending to bound) sources. 145 // NOTE: refactor to support scheduling by priority. 146 // add: 147 // - add source by user request (calling `AddSourceCfg`). 148 // - recover from etcd (calling `recoverWorkersBounds`). 149 // - when the bounding worker become offline, in updateStatusToUnbound. 150 // delete: 151 // - remove source by user request (calling `RemoveSourceCfg`). 152 // - when bound the source to a worker, in updateStatusToBound. 153 unbounds map[string]struct{} 154 155 // a mirror of bounds whose element is not deleted when worker unbound. worker -> SourceBound 156 lastBound map[string]ha.SourceBound 157 158 // expectant relay stages for sources, source ID -> stage. 159 // add: 160 // - bound the source to a worker (at first time). 161 // - recover from etcd (calling `recoverSources`). 162 // update: 163 // - update stage by user request (calling `UpdateExpectRelayStage`). 164 // delete: 165 // - remove source by user request (calling `RemoveSourceCfg`). 166 expectRelayStages map[string]ha.Stage 167 168 // expectant subtask stages for tasks & sources, task name -> source ID -> stage. 169 // add: 170 // - add/start subtask by user request (calling `AddSubTasks`). 171 // - recover from etcd (calling `recoverSubTasks`). 172 // update: 173 // - update stage by user request (calling `UpdateExpectSubTaskStage`). 174 // delete: 175 // - remove/stop subtask by user request (calling `RemoveSubTasks`). 176 expectSubTaskStages sync.Map 177 178 // a source has its relay workers. source-id -> set(worker-name) 179 // add: 180 // - start-relay 181 // - recover from etcd (calling `recoverRelayConfigs`) 182 // delete: 183 // - stop-relay 184 relayWorkers map[string]map[string]struct{} 185 186 // expectant validator stages, task name -> source ID -> stage. 187 // add: 188 // - on subtask start with validator mode not none 189 // - start validator manually 190 // - recover from etcd 191 // update 192 // - update stage by user request 193 // delete: 194 // - when subtask is removed by user request 195 expectValidatorStages sync.Map 196 197 // workers in load stage 198 // task -> source -> worker 199 loadTasks map[string]map[string]string 200 201 securityCfg security.Security 202 } 203 204 // NewScheduler creates a new scheduler instance. 205 func NewScheduler(pLogger *log.Logger, securityCfg security.Security) *Scheduler { 206 return &Scheduler{ 207 logger: pLogger.WithFields(zap.String("component", "scheduler")), 208 subtaskLatch: newLatches(), 209 sourceCfgs: make(map[string]*config.SourceConfig), 210 workers: make(map[string]*Worker), 211 bounds: make(map[string]*Worker), 212 unbounds: make(map[string]struct{}), 213 lastBound: make(map[string]ha.SourceBound), 214 expectRelayStages: make(map[string]ha.Stage), 215 relayWorkers: make(map[string]map[string]struct{}), 216 loadTasks: make(map[string]map[string]string), 217 securityCfg: securityCfg, 218 } 219 } 220 221 // Start starts the scheduler for work. 222 // NOTE: for logic errors, it should start without returning errors (but report via metrics or log) so that the user can fix them. 223 func (s *Scheduler) Start(pCtx context.Context, etcdCli *clientv3.Client) (err error) { 224 s.logger.Info("the scheduler is starting") 225 226 s.mu.Lock() 227 defer func() { 228 if err != nil { 229 s.CloseAllWorkers() 230 } 231 s.mu.Unlock() 232 }() 233 234 if s.started.Load() { 235 return terror.ErrSchedulerStarted.Generate() 236 } 237 238 s.etcdCli = etcdCli // set s.etcdCli first for safety, observeWorkerEvent will use s.etcdCli in retry 239 s.reset() // reset previous status. 240 241 // recover previous status from etcd. 242 err = s.recoverSources() 243 if err != nil { 244 return err 245 } 246 err = s.recoverSubTasks() 247 if err != nil { 248 return err 249 } 250 err = s.recoverRelayConfigs() 251 if err != nil { 252 return err 253 } 254 255 var loadTaskRev int64 256 loadTaskRev, err = s.recoverLoadTasks(false) 257 if err != nil { 258 return err 259 } 260 261 var rev int64 262 rev, err = s.recoverWorkersBounds() 263 if err != nil { 264 return err 265 } 266 267 // check if we can bind free or relay source and workers 268 for _, w := range s.workers { 269 if w.stage == WorkerFree || w.stage == WorkerRelay { 270 bound, err := s.tryBoundForWorker(w) 271 if err != nil { 272 return err 273 } 274 if !bound { 275 break 276 } 277 } 278 } 279 280 ctx, cancel := context.WithCancel(pCtx) 281 282 s.wg.Add(1) 283 go func(rev1 int64) { 284 defer s.wg.Done() 285 // starting to observe status of DM-worker instances. 286 // TODO: handle fatal error from observeWorkerEvent 287 //nolint:errcheck 288 s.observeWorkerEvent(ctx, rev1) 289 }(rev) 290 291 s.wg.Add(1) 292 go func(rev1 int64) { 293 defer s.wg.Done() 294 // starting to observe load task. 295 // TODO: handle fatal error from observeLoadTask 296 //nolint:errcheck 297 s.observeLoadTask(ctx, rev1) 298 }(loadTaskRev) 299 300 s.started.Store(true) // started now 301 s.cancel = cancel 302 s.logger.Info("the scheduler has started") 303 return nil 304 } 305 306 // Close closes the scheduler. 307 func (s *Scheduler) Close() { 308 s.mu.Lock() 309 310 if !s.started.Load() { 311 s.mu.Unlock() 312 return 313 } 314 315 s.logger.Info("the scheduler is closing") 316 if s.cancel != nil { 317 s.cancel() 318 s.cancel = nil 319 } 320 s.CloseAllWorkers() 321 s.mu.Unlock() 322 323 // need to wait for goroutines to return which may hold the mutex. 324 s.wg.Wait() 325 326 s.mu.Lock() 327 defer s.mu.Unlock() 328 s.started.Store(false) // closed now. 329 s.logger.Info("the scheduler has closed") 330 } 331 332 // CloseAllWorkers closes all the scheduler's workers. 333 func (s *Scheduler) CloseAllWorkers() { 334 for _, worker := range s.workers { 335 worker.Close() 336 } 337 } 338 339 // AddSourceCfg adds the upstream source config to the cluster, and try to bound source to worker 340 // NOTE: please verify the config before call this. 341 func (s *Scheduler) AddSourceCfg(cfg *config.SourceConfig) error { 342 s.mu.Lock() 343 defer s.mu.Unlock() 344 345 if !s.started.Load() { 346 return terror.ErrSchedulerNotStarted.Generate() 347 } 348 349 err := s.addSource(cfg) 350 if err != nil { 351 return err 352 } 353 354 // try to bound it to a Free worker. 355 _, err = s.tryBoundForSource(cfg.SourceID) 356 return err 357 } 358 359 // AddSourceCfgWithWorker adds the upstream source config to the cluster, and try to bound source to specify worker 360 // NOTE: please verify the config before call this. 361 func (s *Scheduler) AddSourceCfgWithWorker(cfg *config.SourceConfig, workerName string) error { 362 s.mu.Lock() 363 defer s.mu.Unlock() 364 365 if !s.started.Load() { 366 return terror.ErrSchedulerNotStarted.Generate() 367 } 368 369 // check whether worker exists. 370 w, ok := s.workers[workerName] 371 if !ok { 372 return terror.ErrSchedulerWorkerNotExist.Generate(workerName) 373 } 374 375 if w.stage != WorkerFree { 376 return terror.ErrSchedulerWorkerNotFree.Generate(workerName) 377 } 378 379 if err := s.addSource(cfg); err != nil { 380 return err 381 } 382 383 return s.boundSourceToWorker(cfg.SourceID, w) 384 } 385 386 // addSource adds the upstream source config to the cluster. 387 func (s *Scheduler) addSource(cfg *config.SourceConfig) error { 388 // 1. check whether exists. 389 if _, ok := s.sourceCfgs[cfg.SourceID]; ok { 390 return terror.ErrSchedulerSourceCfgExist.Generate(cfg.SourceID) 391 } 392 // 2. put the config into etcd. 393 _, err := ha.PutSourceCfg(s.etcdCli, cfg) 394 if err != nil { 395 return err 396 } 397 398 // 3. record the config in the scheduler. 399 s.sourceCfgs[cfg.SourceID] = cfg 400 s.unbounds[cfg.SourceID] = struct{}{} 401 return nil 402 } 403 404 // UpdateSourceCfg update the upstream source config to the cluster. 405 func (s *Scheduler) UpdateSourceCfg(cfg *config.SourceConfig) error { 406 s.mu.Lock() 407 defer s.mu.Unlock() 408 409 if !s.started.Load() { 410 return terror.ErrSchedulerNotStarted.Generate() 411 } 412 413 // 1. check whether the config exists. 414 _, ok := s.sourceCfgs[cfg.SourceID] 415 if !ok { 416 return terror.ErrSchedulerSourceCfgNotExist.Generate(cfg.SourceID) 417 } 418 // 2. check if tasks using this configuration are running 419 runningStage := pb.Stage_Running 420 if tasks := s.GetTaskNameListBySourceName(cfg.SourceID, &runningStage); len(tasks) > 0 { 421 return terror.ErrSchedulerSourceCfgUpdate.Generate(cfg.SourceID) 422 } 423 // 3. check if this source is enable relay 424 if _, ok := s.expectRelayStages[cfg.SourceID]; ok { 425 return terror.ErrSchedulerSourceCfgUpdate.Generate(cfg.SourceID) 426 } 427 // 4. put the config into etcd. 428 _, err := ha.PutSourceCfg(s.etcdCli, cfg) 429 if err != nil { 430 return err 431 } 432 // 5. record the config in the scheduler. 433 s.sourceCfgs[cfg.SourceID] = cfg 434 return nil 435 } 436 437 // RemoveSourceCfg removes the upstream source config in the cluster. 438 // when removing the upstream source config, it should also remove: 439 // - any existing relay stage. 440 // - any source-worker bound relationship. 441 func (s *Scheduler) RemoveSourceCfg(source string) error { 442 s.mu.Lock() 443 defer s.mu.Unlock() 444 445 if !s.started.Load() { 446 return terror.ErrSchedulerNotStarted.Generate() 447 } 448 449 // 1. check whether the config exists. 450 if _, ok := s.sourceCfgs[source]; !ok { 451 return terror.ErrSchedulerSourceCfgNotExist.Generate(source) 452 } 453 454 // 2. check whether any subtask or relay config exists for the source. 455 existingSubtasksM := make(map[string]struct{}) 456 s.subTaskCfgs.Range(func(k, v interface{}) bool { 457 task := k.(string) 458 cfg := v.(map[string]config.SubTaskConfig) 459 for source2 := range cfg { 460 if source2 == source { 461 existingSubtasksM[task] = struct{}{} 462 } 463 } 464 return true 465 }) 466 467 existingSubtasks := strMapToSlice(existingSubtasksM) 468 if len(existingSubtasks) > 0 { 469 return terror.ErrSchedulerSourceOpTaskExist.Generate(source, existingSubtasks) 470 } 471 relayWorkers := s.relayWorkers[source] 472 if len(relayWorkers) != 0 { 473 return terror.ErrSchedulerSourceOpRelayExist.Generate(source, strMapToSlice(relayWorkers)) 474 } 475 476 // 3. find worker name by source ID. 477 var ( 478 workerName string // empty should be fine below. 479 worker *Worker 480 ) 481 if w, ok2 := s.bounds[source]; ok2 { 482 worker = w 483 workerName = w.BaseInfo().Name 484 } 485 486 // 4. delete the info in etcd. 487 _, err := ha.DeleteSourceCfgRelayStageSourceBound(s.etcdCli, source, workerName) 488 if err != nil { 489 return err 490 } 491 492 // 5. delete the config and expectant stage in the scheduler 493 delete(s.sourceCfgs, source) 494 delete(s.expectRelayStages, source) 495 496 // 6. unbound for the source. 497 s.updateStatusToUnbound(source) 498 499 // 7. remove it from unbounds. 500 delete(s.unbounds, source) 501 502 // 8. try to bound the worker for another source. 503 if worker != nil { 504 _, err = s.tryBoundForWorker(worker) 505 if err != nil { 506 return err 507 } 508 } 509 return nil 510 } 511 512 // GetSourceCfgs gets all source cfgs, return nil when error happens. 513 func (s *Scheduler) GetSourceCfgs() map[string]*config.SourceConfig { 514 s.mu.RLock() 515 defer s.mu.RUnlock() 516 clone := make(map[string]*config.SourceConfig, len(s.sourceCfgs)) 517 for sourceID, sourceCfg := range s.sourceCfgs { 518 cloneCfg := sourceCfg.Clone() 519 clone[sourceID] = cloneCfg 520 } 521 return clone 522 } 523 524 // GetSourceCfgIDs gets all added source ID. 525 func (s *Scheduler) GetSourceCfgIDs() []string { 526 s.mu.RLock() 527 defer s.mu.RUnlock() 528 529 id := make([]string, 0, len(s.sourceCfgs)) 530 for i := range s.sourceCfgs { 531 id = append(id, i) 532 } 533 return id 534 } 535 536 // GetSourceCfgByID gets source config by source ID. 537 func (s *Scheduler) GetSourceCfgByID(source string) *config.SourceConfig { 538 s.mu.RLock() 539 defer s.mu.RUnlock() 540 cfg, ok := s.sourceCfgs[source] 541 if !ok { 542 return nil 543 } 544 clone := *cfg 545 return &clone 546 } 547 548 // transferWorkerAndSource swaps two sources between two workers (maybe empty). The input means before invocation of 549 // this function, left worker and left source are bound, right worker and right source are bound. After this function, 550 // left worker should be bound to right source and vice versa. 551 // lworker, "", "", rsource This means an unbound source bound to a free worker 552 // lworker, lsource, rworker, "" This means transfer a source from a worker to another free worker 553 // lworker, lsource, "", rsource This means transfer a worker from a bound source to another unbound source 554 // lworker, lsource, rworker, rsource This means transfer two bound relations. 555 func (s *Scheduler) transferWorkerAndSource(lworker, lsource, rworker, rsource string) error { 556 // in first four arrays, index 0 is for left worker, index 1 is for right worker 557 var ( 558 inputWorkers [2]string 559 inputSources [2]string 560 workers [2]*Worker 561 bounds [2]ha.SourceBound 562 boundWorkers []string 563 boundsToPut []ha.SourceBound 564 ok bool 565 ) 566 567 s.logger.Info("transfer source and worker", zap.String("left worker", lworker), zap.String("left source", lsource), zap.String("right worker", rworker), zap.String("right source", rsource)) 568 569 inputWorkers[0], inputWorkers[1] = lworker, rworker 570 inputSources[0], inputSources[1] = lsource, rsource 571 572 for i, workerName := range inputWorkers { 573 if workerName != "" { 574 workers[i], ok = s.workers[workerName] 575 // should not happen, avoid panic 576 if !ok { 577 s.logger.Error("could not found worker in scheduler", zap.String("worker", workerName)) 578 return terror.ErrSchedulerWorkerNotExist.Generate(workerName) 579 } 580 } 581 } 582 583 // check if the swap is valid, to avoid we messing up metadata in etcd. 584 for i := range inputWorkers { 585 if inputWorkers[i] != "" { 586 got := workers[i].bound.Source 587 expect := inputSources[i] 588 if got != expect { 589 return terror.ErrSchedulerWrongWorkerInput.Generate(inputWorkers[i], expect, got) 590 } 591 592 // if the worker has started-relay for a source, it can't be bound to another source. 593 relaySource := workers[i].RelaySourceID() 594 another := i ^ 1 // make use of XOR to flip 0 and 1 595 toBindSource := inputSources[another] 596 if relaySource != "" && toBindSource != "" && relaySource != toBindSource { 597 return terror.ErrSchedulerBoundDiffWithStartedRelay.Generate(inputWorkers[i], toBindSource, relaySource) 598 } 599 } 600 } 601 602 // get current bound workers. 603 for i := range inputWorkers { 604 if inputWorkers[i] != "" && inputSources[i] != "" { 605 boundWorkers = append(boundWorkers, inputWorkers[i]) 606 } 607 } 608 609 // del current bound relations. 610 if _, err := ha.DeleteSourceBound(s.etcdCli, boundWorkers...); err != nil { 611 return err 612 } 613 614 // update unbound sources 615 for _, sourceID := range inputSources { 616 if sourceID != "" { 617 s.updateStatusToUnbound(sourceID) 618 } 619 } 620 621 // put new bound relations. 622 // TODO: move this and above DeleteSourceBound in one txn. 623 for i := range inputWorkers { 624 another := i ^ 1 // make use of XOR to flip 0 and 1 625 if inputWorkers[i] != "" && inputSources[another] != "" { 626 b := ha.NewSourceBound(inputSources[another], inputWorkers[i]) 627 bounds[i] = b 628 boundsToPut = append(boundsToPut, b) 629 } 630 } 631 if _, err := ha.PutSourceBound(s.etcdCli, boundsToPut...); err != nil { 632 return err 633 } 634 635 // update bound sources and workers 636 for i := range inputWorkers { 637 another := i ^ 1 // make use of XOR to flip 0 and 1 638 if inputWorkers[i] != "" && inputSources[another] != "" { 639 err := s.updateStatusToBound(workers[i], bounds[i]) 640 // TODO: if we failed here, etcd has been modified!! we should try this memory check then modify persistent data 641 // and revert if failed 642 if err != nil { 643 s.logger.DPanic("failed to update status to bound, but has written etcd", zap.Error(err)) 644 } 645 } 646 } 647 648 // if one of the workers/sources become free/unbound 649 // try bound it. 650 for i := range inputWorkers { 651 another := i ^ 1 // make use of XOR to flip 0 and 1 652 if inputWorkers[i] != "" && inputSources[another] == "" { 653 if _, err := s.tryBoundForWorker(workers[i]); err != nil { 654 return err 655 } 656 } 657 } 658 for i := range inputSources { 659 another := i ^ 1 // make use of XOR to flip 0 and 1 660 if inputSources[i] != "" && inputWorkers[another] == "" { 661 if _, err := s.tryBoundForSource(inputSources[i]); err != nil { 662 return err 663 } 664 } 665 } 666 667 return nil 668 } 669 670 // TransferSource unbinds the `source` and binds it to a free or same-source-relay `worker`. 671 // If fails halfway, the old worker should try recover. 672 func (s *Scheduler) TransferSource(ctx context.Context, source, worker string) error { 673 if !s.started.Load() { 674 return terror.ErrSchedulerNotStarted.Generate() 675 } 676 s.mu.RLock() 677 // 1. check existence or no need 678 if _, ok := s.sourceCfgs[source]; !ok { 679 s.mu.RUnlock() 680 return terror.ErrSchedulerSourceCfgNotExist.Generate(source) 681 } 682 w, ok := s.workers[worker] 683 if !ok { 684 s.mu.RUnlock() 685 return terror.ErrSchedulerWorkerNotExist.Generate(worker) 686 } 687 oldWorker, hasOldWorker := s.bounds[source] 688 if hasOldWorker && oldWorker.BaseInfo().Name == worker { 689 s.mu.RUnlock() 690 return nil 691 } 692 s.mu.RUnlock() 693 694 // 2. check new worker is free and not started relay for another source 695 switch w.Stage() { 696 case WorkerOffline, WorkerBound: 697 return terror.ErrSchedulerWorkerInvalidTrans.Generate(worker, w.Stage(), WorkerBound) 698 case WorkerFree: 699 case WorkerRelay: 700 if relaySource := w.RelaySourceID(); relaySource != source { 701 return terror.ErrSchedulerBoundDiffWithStartedRelay.Generate(worker, source, relaySource) 702 } 703 } 704 705 // 3. if no old worker, bound it directly 706 if !hasOldWorker { 707 s.logger.Warn("in transfer source, found a free worker and not bound source, which should not happened", 708 zap.String("source", source), 709 zap.String("worker", worker)) 710 return s.boundSourceToWorker(source, w) 711 } 712 713 // 4. check if old worker has running tasks 714 runningStage := pb.Stage_Running 715 if runningTasks := s.GetTaskNameListBySourceName(source, &runningStage); len(runningTasks) > 0 { 716 // we only allow automatically transfer-source if all subtasks are in the sync phase. 717 resp, err := oldWorker.queryStatus(ctx) 718 if err != nil { 719 return terror.Annotatef(err, "failed to query worker: %s status err", oldWorker.baseInfo.Name) 720 } 721 for _, status := range resp.QueryStatus.GetSubTaskStatus() { 722 if status.GetUnit() != pb.UnitType_Sync { 723 return terror.ErrSchedulerRequireRunningTaskInSyncUnit.Generate(runningTasks, source) 724 } 725 } 726 // pause running tasks 727 if batchPauseErr := s.BatchOperateTaskOnWorker(ctx, oldWorker, runningTasks, source, pb.Stage_Paused, true); batchPauseErr != nil { 728 return batchPauseErr 729 } 730 // we need resume tasks that we just paused, we use another goroutine to do this because if error happens 731 // just logging this message and let user handle it manually 732 defer func() { 733 go func() { 734 if err := s.BatchOperateTaskOnWorker(context.Background(), w, runningTasks, source, pb.Stage_Running, false); err != nil { 735 s.logger.Warn( 736 "auto resume task failed", zap.Any("tasks", runningTasks), 737 zap.String("source", source), zap.String("worker", worker), zap.Error(err)) 738 } 739 }() 740 }() 741 } 742 743 // 5. replace the source bound 744 failpoint.Inject("failToReplaceSourceBound", func(_ failpoint.Value) { 745 failpoint.Return(errors.New("failToPutSourceBound")) 746 }) 747 s.mu.Lock() 748 _, err := ha.ReplaceSourceBound(s.etcdCli, source, oldWorker.BaseInfo().Name, worker) 749 if err != nil { 750 s.mu.Unlock() 751 return err 752 } 753 if err2 := oldWorker.Unbound(); err2 != nil { 754 s.logger.DPanic("the oldWorker is get from s.bound, so there should not be an error", zap.Error(err2)) 755 } 756 if err2 := s.updateStatusToBound(w, ha.NewSourceBound(source, worker)); err2 != nil { 757 s.logger.DPanic("we have checked w.stage is free, so there should not be an error", zap.Error(err2)) 758 } 759 // 6. now this old worker is free, try bound source to it 760 _, err = s.tryBoundForWorker(oldWorker) 761 if err != nil { 762 s.logger.Warn("in transfer source, error when try bound the old worker", zap.Error(err)) 763 } 764 s.mu.Unlock() 765 return nil 766 } 767 768 // BatchOperateTaskOnWorker batch operate tasks in one worker and use query-status to make sure all tasks are in expected stage if needWait=true. 769 func (s *Scheduler) BatchOperateTaskOnWorker( 770 ctx context.Context, worker *Worker, tasks []string, source string, stage pb.Stage, needWait bool, 771 ) error { 772 if len(tasks) == 0 { 773 return nil 774 } 775 for _, taskName := range tasks { 776 if err := s.UpdateExpectSubTaskStage(stage, taskName, source); err != nil { 777 return err 778 } 779 } 780 if !needWait { 781 return nil 782 } 783 // wait all tasks are in expected stage before actually starting scheduling 784 WaitLoop: 785 for retry := 0; retry < maxQueryWorkerRetryNum; retry++ { 786 resp, err := worker.queryStatus(ctx) 787 if err != nil { 788 return terror.Annotatef(err, "failed to query worker: %s status", worker.baseInfo.Name) 789 } 790 791 failpoint.Inject("batchOperateTaskOnWorkerMustRetry", func(v failpoint.Value) { 792 if retry < v.(int) { 793 resp.QueryStatus.SubTaskStatus[0].Stage = pb.Stage_InvalidStage 794 log.L().Info("batchOperateTaskOnWorkerMustRetry failpoint triggered", zap.Int("retry", retry)) 795 } else { 796 log.L().Info("batchOperateTaskOnWorkerMustRetry passed", zap.Int("retry", retry)) 797 } 798 }) 799 800 for _, status := range resp.QueryStatus.GetSubTaskStatus() { 801 if status == nil { 802 // this should not happen when rpc logic in server side not changed 803 return errors.Errorf("expect a query-status with subtask status but got a nil, resp %v", resp) 804 } 805 if status.Stage != stage { 806 // NOTE: the defaultRPCTimeout is 10m, use 1s * retry times to increase the waiting time 807 sleepTime := time.Second * time.Duration(maxQueryWorkerRetryNum-retry) 808 s.logger.Info( 809 "waiting task", 810 zap.String("task", status.Name), 811 zap.Int("retry times", retry), 812 zap.Duration("sleep time", sleepTime), 813 zap.String("want stage", stage.String()), 814 zap.String("current stage", status.Stage.String()), 815 ) 816 failpoint.Inject("skipBatchOperateTaskOnWorkerSleep", func(_ failpoint.Value) { 817 failpoint.Continue("WaitLoop") 818 }) 819 select { 820 case <-ctx.Done(): 821 return terror.Annotatef(err, "failed to wait task on worker: %s because context is canceled", worker.baseInfo.Name) 822 case <-time.After(sleepTime): 823 continue WaitLoop 824 } 825 } 826 } 827 return nil // all task are in expected stage 828 } 829 return terror.ErrSchedulerPauseTaskForTransferSource.Generate(tasks) // failed to pause tasks, need user to handle it manually 830 } 831 832 // AcquireSubtaskLatch tries acquiring a latch for subtask name. 833 func (s *Scheduler) AcquireSubtaskLatch(name string) (ReleaseFunc, error) { 834 return s.subtaskLatch.tryAcquire(name) 835 } 836 837 // AddSubTasks adds the information of one or more subtasks for one task. 838 // use s.mu.RLock() to protect s.bound, and s.subtaskLatch to protect subtask related members. 839 // setting `latched` to true means caller has acquired latch. 840 func (s *Scheduler) AddSubTasks(latched bool, expectStage pb.Stage, cfgs ...config.SubTaskConfig) error { 841 s.mu.RLock() 842 defer s.mu.RUnlock() 843 844 if !s.started.Load() { 845 return terror.ErrSchedulerNotStarted.Generate() 846 } 847 848 if len(cfgs) == 0 { 849 return nil // no subtasks need to add, this should not happen. 850 } 851 852 var ( 853 taskNamesM = make(map[string]struct{}, 1) 854 existSourcesM = make(map[string]struct{}, len(cfgs)) 855 allSources = make([]string, 0, len(cfgs)) 856 ) 857 858 for _, cfg := range cfgs { 859 taskNamesM[cfg.Name] = struct{}{} 860 } 861 taskNames := strMapToSlice(taskNamesM) 862 if len(taskNames) > 1 { 863 // only subtasks from one task supported now. 864 return terror.ErrSchedulerMultiTask.Generate(taskNames) 865 } 866 867 if !latched { 868 release, err := s.subtaskLatch.tryAcquire(taskNames[0]) 869 if err != nil { 870 return terror.ErrSchedulerLatchInUse.Generate("AddSubTasks", taskNames[0]) 871 } 872 defer release() 873 } 874 875 // 1. check whether exists. 876 for _, cfg := range cfgs { 877 allSources = append(allSources, cfg.SourceID) 878 v, ok := s.subTaskCfgs.Load(cfg.Name) 879 if !ok { 880 continue 881 } 882 cfgM := v.(map[string]config.SubTaskConfig) 883 _, ok = cfgM[cfg.SourceID] 884 if !ok { 885 continue 886 } 887 existSourcesM[cfg.SourceID] = struct{}{} 888 } 889 890 existSources := strMapToSlice(existSourcesM) 891 switch { 892 case len(existSources) == len(cfgs): 893 // all subtasks already exist, return an error. 894 return terror.ErrSchedulerSubTaskExist.Generate(taskNames[0], existSources) 895 case len(existSources) > 0: 896 // some subtasks already exists, log a warn. 897 s.logger.Warn("some subtasks already exist", zap.String("task", taskNames[0]), zap.Strings("sources", existSources)) 898 } 899 900 // 2. construct `Running` stages when adding. 901 newCfgs := make([]config.SubTaskConfig, 0, len(cfgs)-len(existSources)) 902 newStages := make([]ha.Stage, 0, cap(newCfgs)) 903 validatorStages := make([]ha.Stage, 0, cap(newCfgs)) 904 unbounds := make([]string, 0) 905 for _, cfg := range cfgs { 906 if _, ok := existSourcesM[cfg.SourceID]; ok { 907 continue 908 } 909 newCfgs = append(newCfgs, cfg) 910 newStages = append(newStages, ha.NewSubTaskStage(expectStage, cfg.SourceID, cfg.Name)) 911 if cfg.ValidatorCfg.Mode != config.ValidationNone { 912 validatorStages = append(validatorStages, ha.NewValidatorStage(pb.Stage_Running, cfg.SourceID, cfg.Name)) 913 } 914 if _, ok := s.bounds[cfg.SourceID]; !ok { 915 unbounds = append(unbounds, cfg.SourceID) 916 } 917 } 918 919 // 3. check whether any sources unbound. 920 if len(unbounds) > 0 { 921 return terror.ErrSchedulerSourcesUnbound.Generate(unbounds) 922 } 923 924 // 4. put the lightning status, configs and stages into etcd. 925 if config.HasLoad(cfgs[0].Mode) && cfgs[0].LoaderConfig.ImportMode == config.LoadModePhysical { 926 if len(existSources) > 0 { 927 // don't support add new lightning subtask when some subtasks already exist. 928 return terror.ErrSchedulerSubTaskExist.Generate(taskNames[0], existSources) 929 } 930 _, err := ha.PutLightningNotReadyForAllSources(s.etcdCli, taskNames[0], allSources) 931 if err != nil { 932 return err 933 } 934 } 935 _, err := ha.PutSubTaskCfgStage(s.etcdCli, newCfgs, newStages, validatorStages) 936 if err != nil { 937 return err 938 } 939 940 // 5. record the config and the expectant stage. 941 for _, cfg := range newCfgs { 942 v, _ := s.subTaskCfgs.LoadOrStore(cfg.Name, map[string]config.SubTaskConfig{}) 943 m := v.(map[string]config.SubTaskConfig) 944 m[cfg.SourceID] = cfg 945 } 946 for _, stage := range newStages { 947 v, _ := s.expectSubTaskStages.LoadOrStore(stage.Task, map[string]ha.Stage{}) 948 m := v.(map[string]ha.Stage) 949 m[stage.Source] = stage 950 } 951 for _, stage := range validatorStages { 952 v, _ := s.expectValidatorStages.LoadOrStore(stage.Task, map[string]ha.Stage{}) 953 m := v.(map[string]ha.Stage) 954 m[stage.Source] = stage 955 } 956 957 return nil 958 } 959 960 // RemoveSubTasks removes the information of one or more subtasks for one task. 961 func (s *Scheduler) RemoveSubTasks(task string, sources ...string) error { 962 if !s.started.Load() { 963 return terror.ErrSchedulerNotStarted.Generate() 964 } 965 966 if task == "" || len(sources) == 0 { 967 return nil // no subtask need to stop, this should not happen. 968 } 969 970 release, err := s.subtaskLatch.tryAcquire(task) 971 if err != nil { 972 return terror.ErrSchedulerLatchInUse.Generate("RemoveSubTasks", task) 973 } 974 defer release() 975 976 // 1. check the task exists. 977 stagesMapV, ok1 := s.expectSubTaskStages.Load(task) 978 cfgsMapV, ok2 := s.subTaskCfgs.Load(task) 979 if !ok1 || !ok2 { 980 return terror.ErrSchedulerSubTaskOpTaskNotExist.Generate(task) 981 } 982 983 var validatorStageM map[string]ha.Stage 984 if validatorStageV, ok := s.expectValidatorStages.Load(task); ok { 985 validatorStageM = validatorStageV.(map[string]ha.Stage) 986 } 987 988 var ( 989 stagesM = stagesMapV.(map[string]ha.Stage) 990 cfgsM = cfgsMapV.(map[string]config.SubTaskConfig) 991 notExistSourcesM = make(map[string]struct{}) 992 stages = make([]ha.Stage, 0, len(sources)) 993 validatorStages = make([]ha.Stage, 0, len(sources)) 994 cfgs = make([]config.SubTaskConfig, 0, len(sources)) 995 ) 996 for _, source := range sources { 997 if stage, ok := stagesM[source]; !ok { 998 notExistSourcesM[source] = struct{}{} 999 } else { 1000 stages = append(stages, stage) 1001 } 1002 if stage, ok := validatorStageM[source]; ok { 1003 validatorStages = append(validatorStages, stage) 1004 } 1005 if cfg, ok := cfgsM[source]; ok { 1006 cfgs = append(cfgs, cfg) 1007 } 1008 } 1009 notExistSources := strMapToSlice(notExistSourcesM) 1010 if len(notExistSources) > 0 { 1011 // some sources not exist, reject the request. 1012 return terror.ErrSchedulerSubTaskOpSourceNotExist.Generate(notExistSources) 1013 } 1014 1015 // 2. delete the configs and the stages. 1016 _, err = ha.DeleteSubTaskCfgStage(s.etcdCli, cfgs, stages, validatorStages) 1017 if err != nil { 1018 return err 1019 } 1020 1021 // 3. clear the config and the expectant stage. 1022 for _, cfg := range cfgs { 1023 delete(cfgsM, cfg.SourceID) 1024 } 1025 if len(cfgsM) == 0 { 1026 s.subTaskCfgs.Delete(task) 1027 } 1028 for _, stage := range stages { 1029 delete(stagesM, stage.Source) 1030 } 1031 if len(stagesM) == 0 { 1032 s.expectSubTaskStages.Delete(task) 1033 } 1034 for _, stage := range validatorStages { 1035 delete(validatorStageM, stage.Source) 1036 } 1037 if len(validatorStageM) == 0 { 1038 s.expectValidatorStages.Delete(task) 1039 } 1040 1041 return nil 1042 } 1043 1044 // UpdateSubTasks update the information of one or more subtasks for one task. 1045 func (s *Scheduler) UpdateSubTasks(ctx context.Context, cfgs ...config.SubTaskConfig) error { 1046 s.mu.Lock() 1047 defer s.mu.Unlock() 1048 if !s.started.Load() { 1049 return terror.ErrSchedulerNotStarted.Generate() 1050 } 1051 if len(cfgs) == 0 { 1052 return nil // no subtasks need to add, this should not happen. 1053 } 1054 taskNamesM := make(map[string]struct{}, 1) 1055 for _, cfg := range cfgs { 1056 taskNamesM[cfg.Name] = struct{}{} 1057 } 1058 if len(taskNamesM) > 1 { 1059 // only subtasks from one task supported now. 1060 return terror.ErrSchedulerMultiTask.Generate(strMapToSlice(taskNamesM)) 1061 } 1062 // check whether exists. 1063 cfg := cfgs[0] 1064 v, ok := s.subTaskCfgs.Load(cfg.Name) 1065 if !ok { 1066 return terror.ErrSchedulerTaskNotExist.Generate(cfg.Name) 1067 } 1068 cfgM := v.(map[string]config.SubTaskConfig) 1069 for _, cfg := range cfgs { 1070 _, ok = cfgM[cfg.SourceID] 1071 if !ok { 1072 return terror.ErrSchedulerSubTaskNotExist.Generate(cfg.Name, cfg.SourceID) 1073 } 1074 } 1075 // check whether in running stage 1076 stage := s.GetExpectSubTaskStage(cfg.Name, cfg.SourceID) 1077 if stage.Expect == pb.Stage_Running { 1078 return terror.ErrSchedulerSubTaskCfgUpdate.Generate(cfg.Name, cfg.SourceID) 1079 } 1080 1081 // check by workers todo batch 1082 for _, cfg := range cfgs { 1083 worker := s.bounds[cfg.SourceID] 1084 if worker == nil { 1085 return terror.ErrSchedulerSubTaskCfgUpdate.Generatef("this source: %s have not bound to worker", cfg.SourceID) 1086 } 1087 resp, err := worker.checkSubtasksCanUpdate(ctx, &cfg) 1088 if err != nil { 1089 return err 1090 } 1091 if !resp.CheckSubtasksCanUpdate.Success { 1092 return terror.ErrSchedulerSubTaskCfgUpdate.Generatef("can not update because %s", resp.CheckSubtasksCanUpdate.Msg) 1093 } 1094 } 1095 // put the configs and stages into etcd. 1096 _, err := ha.PutSubTaskCfgStage(s.etcdCli, cfgs, []ha.Stage{}, []ha.Stage{}) 1097 if err != nil { 1098 return err 1099 } 1100 // record the config 1101 for _, cfg := range cfgs { 1102 v, _ := s.subTaskCfgs.LoadOrStore(cfg.Name, map[string]config.SubTaskConfig{}) 1103 m := v.(map[string]config.SubTaskConfig) 1104 m[cfg.SourceID] = cfg 1105 } 1106 return nil 1107 } 1108 1109 // getSubTaskCfgByTaskSource gets subtask config by task name and source ID. Only used in tests. 1110 func (s *Scheduler) getSubTaskCfgByTaskSource(task, source string) *config.SubTaskConfig { 1111 v, ok := s.subTaskCfgs.Load(task) 1112 if !ok { 1113 return nil 1114 } 1115 1116 cfgM := v.(map[string]config.SubTaskConfig) 1117 cfg, ok := cfgM[source] 1118 if !ok { 1119 return nil 1120 } 1121 clone := cfg 1122 return &clone 1123 } 1124 1125 // GetDownstreamMetaByTask gets downstream db config and meta config by task name. 1126 func (s *Scheduler) GetDownstreamMetaByTask(task string) (*dbconfig.DBConfig, string) { 1127 v, ok := s.subTaskCfgs.Load(task) 1128 if !ok { 1129 return nil, "" 1130 } 1131 cfgM := v.(map[string]config.SubTaskConfig) 1132 for _, cfg := range cfgM { 1133 return cfg.To.Clone(), cfg.MetaSchema 1134 } 1135 return nil, "" 1136 } 1137 1138 // GetSubTaskCfgsByTask gets subtask configs' map by task name. 1139 func (s *Scheduler) GetSubTaskCfgsByTask(task string) map[string]*config.SubTaskConfig { 1140 v, ok := s.subTaskCfgs.Load(task) 1141 if !ok { 1142 return nil 1143 } 1144 1145 cfgM := v.(map[string]config.SubTaskConfig) 1146 cloneM := make(map[string]*config.SubTaskConfig, len(cfgM)) 1147 for source, cfg := range cfgM { 1148 clone := cfg 1149 cloneM[source] = &clone 1150 } 1151 return cloneM 1152 } 1153 1154 func (s *Scheduler) GetSubTaskCfgsByTaskAndSource(taskName string, sources []string) map[string]map[string]config.SubTaskConfig { 1155 var ret map[string]map[string]config.SubTaskConfig // task-name->sourceID->*config.SubTaskConfig 1156 if len(taskName) == 0 { 1157 ret = s.GetSubTaskCfgs() 1158 } else { 1159 // get subtask by name 1160 ret = map[string]map[string]config.SubTaskConfig{} 1161 tmp := s.GetSubTaskCfgsByTask(taskName) 1162 if tmp == nil { 1163 // no subtask matches the `task-name` 1164 return ret 1165 } 1166 ret[taskName] = map[string]config.SubTaskConfig{} 1167 for source, cfg := range tmp { 1168 ret[taskName][source] = *cfg 1169 } 1170 } 1171 // filter the source that we don't want 1172 if len(sources) > 0 { 1173 filterSource := map[string]interface{}{} 1174 for _, source := range sources { 1175 filterSource[source] = true // the source we want 1176 } 1177 for taskName, sourceCfgs := range ret { 1178 for source := range sourceCfgs { 1179 if _, ok := filterSource[source]; !ok { 1180 delete(sourceCfgs, source) 1181 } 1182 } 1183 if len(ret[taskName]) == 0 { 1184 delete(ret, taskName) 1185 } 1186 } 1187 } 1188 return ret 1189 } 1190 1191 // GetSubTaskCfgs gets all subconfig, return nil when error happens. 1192 func (s *Scheduler) GetSubTaskCfgs() map[string]map[string]config.SubTaskConfig { 1193 // taskName -> sourceName -> SubTaskConfig 1194 clone := make(map[string]map[string]config.SubTaskConfig) 1195 s.subTaskCfgs.Range(func(k, v interface{}) bool { 1196 task := k.(string) 1197 m := v.(map[string]config.SubTaskConfig) 1198 clone2 := make(map[string]config.SubTaskConfig, len(m)) 1199 for source, cfg := range m { 1200 cfg2, err := cfg.Clone() 1201 if err != nil { 1202 return true 1203 } 1204 clone2[source] = *cfg2 1205 } 1206 clone[task] = clone2 1207 return true 1208 }) 1209 return clone 1210 } 1211 1212 // GetSubTaskCfgs gets all subTask config pointer, return nil when error happens. 1213 func (s *Scheduler) GetALlSubTaskCfgs() map[string]map[string]*config.SubTaskConfig { 1214 s.mu.RLock() 1215 defer s.mu.RUnlock() 1216 // taskName -> sourceName -> SubTaskConfig 1217 clone := make(map[string]map[string]*config.SubTaskConfig) 1218 s.subTaskCfgs.Range(func(k, v interface{}) bool { 1219 task := k.(string) 1220 m := v.(map[string]config.SubTaskConfig) 1221 clone2 := make(map[string]*config.SubTaskConfig, len(m)) 1222 for source, cfg := range m { 1223 cfg2, err := cfg.Clone() 1224 if err != nil { 1225 return true 1226 } 1227 clone2[source] = cfg2 1228 } 1229 clone[task] = clone2 1230 return true 1231 }) 1232 return clone 1233 } 1234 1235 // GetTaskNameListBySourceName gets task name list by source name. 1236 func (s *Scheduler) GetTaskNameListBySourceName(sourceName string, expectStage *pb.Stage) []string { 1237 var taskNameList []string 1238 s.expectSubTaskStages.Range(func(k, v interface{}) bool { 1239 subtaskM := v.(map[string]ha.Stage) 1240 subtaskStage, ok2 := subtaskM[sourceName] 1241 if !ok2 { 1242 return true 1243 } 1244 task := k.(string) 1245 if expectStage == nil { 1246 taskNameList = append(taskNameList, task) 1247 } else if subtaskStage.Expect == *expectStage { 1248 taskNameList = append(taskNameList, task) 1249 } 1250 return true 1251 }) 1252 return taskNameList 1253 } 1254 1255 // AddWorker adds the information of the DM-worker when registering a new instance. 1256 // This only adds the information of the DM-worker, 1257 // in order to know whether it's online (ready to handle works), 1258 // we need to wait for its healthy status through keep-alive. 1259 func (s *Scheduler) AddWorker(name, addr string) error { 1260 s.mu.Lock() 1261 defer s.mu.Unlock() 1262 1263 if !s.started.Load() { 1264 return terror.ErrSchedulerNotStarted.Generate() 1265 } 1266 1267 // 1. check whether exists. 1268 if w, ok := s.workers[name]; ok { 1269 // NOTE: we do not support add the worker with different address now, support if needed later. 1270 // but we support add the worker with all the same information multiple times, and only the first one take effect, 1271 // because this is needed when restarting the worker. 1272 if addr == w.BaseInfo().Addr { 1273 s.logger.Warn("add the same worker again", zap.Stringer("worker info", w.BaseInfo())) 1274 return nil 1275 } 1276 return terror.ErrSchedulerWorkerExist.Generate(w.BaseInfo()) 1277 } 1278 1279 // 2. put the base info into etcd. 1280 info := ha.NewWorkerInfo(name, addr) 1281 _, err := ha.PutWorkerInfo(s.etcdCli, info) 1282 if err != nil { 1283 return err 1284 } 1285 1286 // generate an agent of DM-worker (with Offline stage) and keep it in the scheduler. 1287 _, err = s.recordWorker(info) 1288 return err 1289 } 1290 1291 // RemoveWorker removes the information of the DM-worker when removing the instance manually. 1292 // The user should shutdown the DM-worker instance before removing its information. 1293 func (s *Scheduler) RemoveWorker(name string) error { 1294 s.mu.Lock() 1295 defer s.mu.Unlock() 1296 1297 if !s.started.Load() { 1298 return terror.ErrSchedulerNotStarted.Generate() 1299 } 1300 1301 w, ok := s.workers[name] 1302 if !ok { 1303 return terror.ErrSchedulerWorkerNotExist.Generate(name) 1304 } else if w.Stage() != WorkerOffline { 1305 return terror.ErrSchedulerWorkerOnline.Generate(name) 1306 } 1307 1308 // delete the info in etcd. 1309 _, err := ha.DeleteWorkerInfoRelayConfig(s.etcdCli, name) 1310 if err != nil { 1311 return err 1312 } 1313 s.deleteWorker(name) 1314 return nil 1315 } 1316 1317 // GetAllWorkers gets all worker agent. 1318 func (s *Scheduler) GetAllWorkers() ([]*Worker, error) { 1319 s.mu.RLock() 1320 defer s.mu.RUnlock() 1321 1322 if !s.started.Load() { 1323 return nil, terror.ErrSchedulerNotStarted.Generate() 1324 } 1325 1326 workers := make([]*Worker, 0, len(s.workers)) 1327 for _, value := range s.workers { 1328 workers = append(workers, value) 1329 } 1330 return workers, nil 1331 } 1332 1333 // GetWorkerByName gets worker agent by worker name. 1334 func (s *Scheduler) GetWorkerByName(name string) *Worker { 1335 s.mu.RLock() 1336 defer s.mu.RUnlock() 1337 return s.workers[name] 1338 } 1339 1340 // GetWorkerBySource gets the current bound worker agent by source ID, 1341 // returns nil if the source not bound. 1342 func (s *Scheduler) GetWorkerBySource(source string) *Worker { 1343 s.mu.RLock() 1344 defer s.mu.RUnlock() 1345 return s.bounds[source] 1346 } 1347 1348 // BoundSources returns all bound source IDs in increasing order. 1349 func (s *Scheduler) BoundSources() []string { 1350 s.mu.RLock() 1351 defer s.mu.RUnlock() 1352 IDs := make([]string, 0, len(s.bounds)) 1353 for ID := range s.bounds { 1354 IDs = append(IDs, ID) 1355 } 1356 sort.Strings(IDs) 1357 return IDs 1358 } 1359 1360 // UnboundSources returns all unbound source IDs in increasing order. 1361 func (s *Scheduler) UnboundSources() []string { 1362 s.mu.RLock() 1363 defer s.mu.RUnlock() 1364 IDs := make([]string, 0, len(s.unbounds)) 1365 for ID := range s.unbounds { 1366 IDs = append(IDs, ID) 1367 } 1368 sort.Strings(IDs) 1369 return IDs 1370 } 1371 1372 // StartRelay puts etcd key-value pairs to start relay on some workers. 1373 func (s *Scheduler) StartRelay(source string, workers []string) error { 1374 s.mu.Lock() 1375 defer s.mu.Unlock() 1376 1377 if !s.started.Load() { 1378 return terror.ErrSchedulerNotStarted.Generate() 1379 } 1380 1381 // 1. precheck 1382 sourceCfg, ok := s.sourceCfgs[source] 1383 if !ok { 1384 return terror.ErrSchedulerSourceCfgNotExist.Generate(source) 1385 } 1386 startedWorkers := s.relayWorkers[source] 1387 1388 // quick path for `start-relay` without worker name 1389 if len(workers) == 0 { 1390 if len(startedWorkers) != 0 { 1391 return terror.ErrSchedulerStartRelayOnSpecified.Generate(utils.SetToSlice(startedWorkers)) 1392 } 1393 // update enable-relay in source config 1394 sourceCfg.EnableRelay = true 1395 _, err := ha.PutSourceCfg(s.etcdCli, sourceCfg) 1396 if err != nil { 1397 return err 1398 } 1399 s.sourceCfgs[source] = sourceCfg 1400 // notify bound worker 1401 w, ok2 := s.bounds[source] 1402 if !ok2 { 1403 return nil 1404 } 1405 stage := ha.NewRelayStage(pb.Stage_Running, source) 1406 _, err = ha.PutRelayStageSourceBound(s.etcdCli, stage, w.Bound()) 1407 return err 1408 } else if sourceCfg.EnableRelay { 1409 // error when `enable-relay` and `start-relay` with worker name 1410 return terror.ErrSchedulerStartRelayOnBound.Generate() 1411 } 1412 1413 if startedWorkers == nil { 1414 startedWorkers = map[string]struct{}{} 1415 s.relayWorkers[source] = startedWorkers 1416 } 1417 var ( 1418 notExistWorkers []string 1419 // below two list means the worker that requested start-relay has bound to another source 1420 boundWorkers, boundSources []string 1421 alreadyStarted []string 1422 // currently we forbid one worker starting multiple relay 1423 busyWorkers, busySources []string 1424 ) 1425 for _, workerName := range workers { 1426 var ( 1427 worker *Worker 1428 ok bool 1429 ) 1430 if worker, ok = s.workers[workerName]; !ok { 1431 notExistWorkers = append(notExistWorkers, workerName) 1432 continue 1433 } 1434 if _, ok = startedWorkers[workerName]; ok { 1435 alreadyStarted = append(alreadyStarted, workerName) 1436 } 1437 1438 // for Bound and Offline worker 1439 if worker.Bound().Source != "" && worker.Bound().Source != source { 1440 boundWorkers = append(boundWorkers, workerName) 1441 boundSources = append(boundSources, worker.Bound().Source) 1442 } 1443 if relaySource := worker.RelaySourceID(); relaySource != "" && relaySource != source { 1444 busyWorkers = append(busyWorkers, workerName) 1445 busySources = append(busySources, relaySource) 1446 } 1447 } 1448 1449 if len(notExistWorkers) > 0 { 1450 return terror.ErrSchedulerWorkerNotExist.Generate(notExistWorkers) 1451 } 1452 if len(boundWorkers) > 0 { 1453 return terror.ErrSchedulerRelayWorkersWrongBound.Generate(boundWorkers, boundSources) 1454 } 1455 if len(busyWorkers) > 0 { 1456 return terror.ErrSchedulerRelayWorkersBusy.Generate(busyWorkers, busySources) 1457 } 1458 if len(alreadyStarted) > 0 { 1459 s.logger.Warn("some workers already started relay", 1460 zap.String("source", source), 1461 zap.Strings("already started workers", alreadyStarted)) 1462 } 1463 1464 // 2. put etcd and update memory cache 1465 // if there's no relay stage, create a running one. otherwise we should respect paused stage 1466 if len(startedWorkers) == 0 { 1467 stage := ha.NewRelayStage(pb.Stage_Running, source) 1468 if _, err := ha.PutRelayStage(s.etcdCli, stage); err != nil { 1469 return err 1470 } 1471 s.expectRelayStages[source] = stage 1472 } 1473 if _, err := ha.PutRelayConfig(s.etcdCli, source, workers...); err != nil { 1474 return err 1475 } 1476 for _, workerName := range workers { 1477 s.relayWorkers[source][workerName] = struct{}{} 1478 if err := s.workers[workerName].StartRelay(source); err != nil { 1479 s.logger.DPanic("we have checked the prerequisite and updated etcd, so should be no error", 1480 zap.Error(err)) 1481 } 1482 } 1483 return nil 1484 } 1485 1486 // StopRelay deletes etcd key-value pairs to stop relay on some workers. 1487 func (s *Scheduler) StopRelay(source string, workers []string) error { 1488 s.mu.Lock() 1489 defer s.mu.Unlock() 1490 1491 if !s.started.Load() { 1492 return terror.ErrSchedulerNotStarted.Generate() 1493 } 1494 1495 // 1. precheck 1496 sourceCfg, ok := s.sourceCfgs[source] 1497 if !ok { 1498 return terror.ErrSchedulerSourceCfgNotExist.Generate(source) 1499 } 1500 // quick path for `stop-relay` without worker name 1501 if len(workers) == 0 { 1502 startedWorker := s.relayWorkers[source] 1503 if len(startedWorker) != 0 { 1504 return terror.ErrSchedulerStopRelayOnSpecified.Generate(utils.SetToSlice(startedWorker)) 1505 } 1506 // update enable-relay in source config 1507 sourceCfg.EnableRelay = false 1508 _, err := ha.PutSourceCfg(s.etcdCli, sourceCfg) 1509 if err != nil { 1510 return err 1511 } 1512 s.sourceCfgs[source] = sourceCfg 1513 // notify bound worker 1514 w, ok2 := s.bounds[source] 1515 if !ok2 { 1516 return nil 1517 } 1518 // TODO: remove orphan relay stage 1519 _, err = ha.PutSourceBound(s.etcdCli, w.Bound()) 1520 return err 1521 } else if sourceCfg.EnableRelay { 1522 // error when `enable-relay` and `stop-relay` with worker name 1523 return terror.ErrSchedulerStopRelayOnBound.Generate() 1524 } 1525 1526 var ( 1527 notExistWorkers []string 1528 unmatchedWorkers, unmatchedSources []string 1529 alreadyStopped []string 1530 ) 1531 for _, workerName := range workers { 1532 var ( 1533 worker *Worker 1534 ok bool 1535 ) 1536 1537 if worker, ok = s.workers[workerName]; !ok { 1538 notExistWorkers = append(notExistWorkers, workerName) 1539 continue 1540 } 1541 1542 startedRelay := worker.RelaySourceID() 1543 if startedRelay == "" { 1544 alreadyStopped = append(alreadyStopped, workerName) 1545 continue 1546 } 1547 1548 if startedRelay != source { 1549 unmatchedWorkers = append(unmatchedWorkers, workerName) 1550 unmatchedSources = append(unmatchedSources, startedRelay) 1551 } 1552 } 1553 if len(notExistWorkers) > 0 { 1554 return terror.ErrSchedulerWorkerNotExist.Generate(notExistWorkers) 1555 } 1556 if len(unmatchedWorkers) > 0 { 1557 return terror.ErrSchedulerRelayWorkersWrongRelay.Generate(unmatchedWorkers, unmatchedSources) 1558 } 1559 if len(alreadyStopped) > 0 { 1560 s.logger.Warn("some workers already stopped relay", 1561 zap.String("source", source), 1562 zap.Strings("already stopped workers", alreadyStopped)) 1563 } 1564 1565 // 2. delete from etcd and update memory cache 1566 if _, err := ha.DeleteRelayConfig(s.etcdCli, workers...); err != nil { 1567 return err 1568 } 1569 for _, workerName := range workers { 1570 delete(s.relayWorkers[source], workerName) 1571 s.workers[workerName].StopRelay() 1572 } 1573 if len(s.relayWorkers[source]) == 0 { 1574 if _, err := ha.DeleteRelayStage(s.etcdCli, source); err != nil { 1575 return err 1576 } 1577 delete(s.relayWorkers, source) 1578 delete(s.expectRelayStages, source) 1579 } 1580 return nil 1581 } 1582 1583 // GetRelayWorkers returns all alive worker instances for a relay source. 1584 func (s *Scheduler) GetRelayWorkers(source string) ([]*Worker, error) { 1585 s.mu.RLock() 1586 defer s.mu.RUnlock() 1587 if !s.started.Load() { 1588 return nil, terror.ErrSchedulerNotStarted.Generate() 1589 } 1590 workers := s.relayWorkers[source] 1591 ret := make([]*Worker, 0, len(workers)) 1592 for w := range workers { 1593 worker, ok := s.workers[w] 1594 if !ok { 1595 // should not happen 1596 s.logger.Error("worker instance for relay worker not found", zap.String("worker", w)) 1597 continue 1598 } 1599 ret = append(ret, worker) 1600 } 1601 sort.Slice(ret, func(i, j int) bool { 1602 return ret[i].baseInfo.Name < ret[j].baseInfo.Name 1603 }) 1604 return ret, nil 1605 } 1606 1607 // UpdateExpectRelayStage updates the current expect relay stage. 1608 // now, only support updates: 1609 // - from `Running` to `Paused`. 1610 // - from `Paused` to `Running`. 1611 // NOTE: from `Running` to `Running` and `Paused` to `Paused` still update the data in etcd, 1612 // because some user may want to update `{Running, Paused, ...}` to `{Running, Running, ...}`. 1613 // so, this should be also supported in DM-worker. 1614 func (s *Scheduler) UpdateExpectRelayStage(newStage pb.Stage, sources ...string) error { 1615 s.mu.Lock() 1616 defer s.mu.Unlock() 1617 1618 if !s.started.Load() { 1619 return terror.ErrSchedulerNotStarted.Generate() 1620 } 1621 1622 if len(sources) == 0 { 1623 return nil // no sources need to update the stage, this should not happen. 1624 } 1625 1626 // 1. check the new expectant stage. 1627 switch newStage { 1628 case pb.Stage_Running, pb.Stage_Paused: 1629 default: 1630 return terror.ErrSchedulerRelayStageInvalidUpdate.Generate(newStage) 1631 } 1632 1633 var ( 1634 notExistSourcesM = make(map[string]struct{}) 1635 currStagesM = make(map[string]struct{}) 1636 stages = make([]ha.Stage, 0, len(sources)) 1637 ) 1638 for _, source := range sources { 1639 if _, ok := s.sourceCfgs[source]; !ok { 1640 notExistSourcesM[source] = struct{}{} 1641 continue 1642 } 1643 1644 if currStage, ok := s.expectRelayStages[source]; ok { 1645 currStagesM[currStage.Expect.String()] = struct{}{} 1646 } else { 1647 s.logger.Warn("will write relay stage for a source that doesn't have previous stage", 1648 zap.String("source", source)) 1649 } 1650 stages = append(stages, ha.NewRelayStage(newStage, source)) 1651 } 1652 notExistSources := strMapToSlice(notExistSourcesM) 1653 currStages := strMapToSlice(currStagesM) 1654 if len(notExistSources) > 0 { 1655 // some sources not exist, reject the request. 1656 return terror.ErrSchedulerRelayStageSourceNotExist.Generate(notExistSources) 1657 } else if len(currStages) > 1 { 1658 // more than one current relay stage exist, but need to update to the same one, log a warn. 1659 s.logger.Warn("update more than one current expectant relay stage to the same one", 1660 zap.Strings("from", currStages), zap.Stringer("to", newStage)) 1661 } 1662 1663 // 2. put the stages into etcd. 1664 _, err := ha.PutRelayStage(s.etcdCli, stages...) 1665 if err != nil { 1666 return err 1667 } 1668 1669 // 3. update the stages in the scheduler. 1670 for _, stage := range stages { 1671 s.expectRelayStages[stage.Source] = stage 1672 } 1673 1674 return nil 1675 } 1676 1677 // GetExpectRelayStage returns the current expect relay stage. 1678 // If the stage not exists, an invalid stage is returned. 1679 // This func is used for testing. 1680 func (s *Scheduler) GetExpectRelayStage(source string) ha.Stage { 1681 s.mu.RLock() 1682 defer s.mu.RUnlock() 1683 if stage, ok := s.expectRelayStages[source]; ok { 1684 return stage 1685 } 1686 return ha.NewRelayStage(pb.Stage_InvalidStage, source) 1687 } 1688 1689 // UpdateExpectSubTaskStage updates the current expect subtask stage. 1690 // now, only support updates: 1691 // - from `Running` to `Paused/Stopped`. 1692 // - from `Paused/Stopped` to `Running`. 1693 // NOTE: from `Running` to `Running` and `Paused` to `Paused` still update the data in etcd, 1694 // because some user may want to update `{Running, Paused, ...}` to `{Running, Running, ...}`. 1695 // so, this should be also supported in DM-worker. 1696 func (s *Scheduler) UpdateExpectSubTaskStage(newStage pb.Stage, taskName string, sources ...string) error { 1697 if !s.started.Load() { 1698 return terror.ErrSchedulerNotStarted.Generate() 1699 } 1700 1701 if taskName == "" || len(sources) == 0 { 1702 return nil // no subtask need to update, this should not happen. 1703 } 1704 1705 // 1. check the new expectant stage. 1706 switch newStage { 1707 case pb.Stage_Running, pb.Stage_Paused, pb.Stage_Stopped: 1708 default: 1709 return terror.ErrSchedulerSubTaskStageInvalidUpdate.Generate(newStage) 1710 } 1711 1712 release, err := s.subtaskLatch.tryAcquire(taskName) 1713 if err != nil { 1714 return terror.ErrSchedulerLatchInUse.Generate("UpdateExpectSubTaskStage", taskName) 1715 } 1716 defer release() 1717 1718 // 2. check the task exists. 1719 v, ok := s.expectSubTaskStages.Load(taskName) 1720 if !ok { 1721 return terror.ErrSchedulerSubTaskOpTaskNotExist.Generate(taskName) 1722 } 1723 1724 var ( 1725 stagesM = v.(map[string]ha.Stage) 1726 notExistSourcesM = make(map[string]struct{}) 1727 currStagesM = make(map[string]struct{}) 1728 stages = make([]ha.Stage, 0, len(sources)) 1729 ) 1730 for _, source := range sources { 1731 if currStage, ok := stagesM[source]; !ok { 1732 notExistSourcesM[source] = struct{}{} 1733 } else { 1734 currStagesM[currStage.Expect.String()] = struct{}{} 1735 } 1736 stages = append(stages, ha.NewSubTaskStage(newStage, source, taskName)) 1737 } 1738 notExistSources := strMapToSlice(notExistSourcesM) 1739 currStages := strMapToSlice(currStagesM) 1740 if len(notExistSources) > 0 { 1741 // some sources not exist, reject the request. 1742 return terror.ErrSchedulerSubTaskOpSourceNotExist.Generate(notExistSources) 1743 } else if len(currStages) > 1 { 1744 // more than one current subtask stage exist, but need to update to the same one, log a warn. 1745 s.logger.Warn("update more than one current expectant subtask stage to the same one", 1746 zap.Strings("from", currStages), zap.Stringer("to", newStage)) 1747 } 1748 1749 // 3. put the stages into etcd. 1750 _, err = ha.PutSubTaskStage(s.etcdCli, stages...) 1751 if err != nil { 1752 return err 1753 } 1754 1755 // 4. update the stages in the scheduler. 1756 for _, stage := range stages { 1757 stagesM[stage.Source] = stage 1758 } 1759 1760 return nil 1761 } 1762 1763 // GetExpectSubTaskStage returns the current expect subtask stage. 1764 // If the stage not exists, an invalid stage is returned. 1765 func (s *Scheduler) GetExpectSubTaskStage(task, source string) ha.Stage { 1766 invalidStage := ha.NewSubTaskStage(pb.Stage_InvalidStage, source, task) 1767 1768 release, err := s.subtaskLatch.tryAcquire(task) 1769 if err != nil { 1770 return invalidStage 1771 } 1772 defer release() 1773 1774 v, ok := s.expectSubTaskStages.Load(task) 1775 if !ok { 1776 return invalidStage 1777 } 1778 stageM := v.(map[string]ha.Stage) 1779 stage, ok := stageM[source] 1780 if !ok { 1781 return invalidStage 1782 } 1783 return stage 1784 } 1785 1786 // Started returns if the scheduler is started. 1787 func (s *Scheduler) Started() bool { 1788 return s.started.Load() 1789 } 1790 1791 // recoverSourceCfgs recovers history source configs and expectant relay stages from etcd. 1792 func (s *Scheduler) recoverSources() error { 1793 // get all source configs. 1794 cfgM, _, err := ha.GetSourceCfg(s.etcdCli, "", 0) 1795 if err != nil { 1796 return err 1797 } 1798 // get all relay stages. 1799 stageM, _, err := ha.GetAllRelayStage(s.etcdCli) 1800 if err != nil { 1801 return err 1802 } 1803 1804 // recover in-memory data. 1805 for source, cfg := range cfgM { 1806 s.sourceCfgs[source] = cfg 1807 } 1808 for source, stage := range stageM { 1809 s.expectRelayStages[source] = stage 1810 } 1811 1812 return nil 1813 } 1814 1815 // recoverSubTasks recovers history subtask configs and expectant subtask stages from etcd. 1816 func (s *Scheduler) recoverSubTasks() error { 1817 // get all subtask configs. 1818 cfgMM, _, err := ha.GetAllSubTaskCfg(s.etcdCli) 1819 if err != nil { 1820 return err 1821 } 1822 // get all subtask stages. 1823 stageMM, _, err := ha.GetAllSubTaskStage(s.etcdCli) 1824 if err != nil { 1825 return err 1826 } 1827 validatorStageMM, _, err := ha.GetAllValidatorStage(s.etcdCli) 1828 if err != nil { 1829 return err 1830 } 1831 1832 // recover in-memory data. 1833 for source, cfgM := range cfgMM { 1834 for task, cfg := range cfgM { 1835 v, _ := s.subTaskCfgs.LoadOrStore(task, map[string]config.SubTaskConfig{}) 1836 m := v.(map[string]config.SubTaskConfig) 1837 m[source] = cfg 1838 } 1839 } 1840 for source, stageM := range stageMM { 1841 for task, stage := range stageM { 1842 v, _ := s.expectSubTaskStages.LoadOrStore(task, map[string]ha.Stage{}) 1843 m := v.(map[string]ha.Stage) 1844 m[source] = stage 1845 } 1846 } 1847 for source, stageM := range validatorStageMM { 1848 for task, stage := range stageM { 1849 v, _ := s.expectValidatorStages.LoadOrStore(task, map[string]ha.Stage{}) 1850 m := v.(map[string]ha.Stage) 1851 m[source] = stage 1852 } 1853 } 1854 1855 return nil 1856 } 1857 1858 // recoverRelayConfigs recovers history relay configs for each worker from etcd. 1859 // This function also removes conflicting relay schedule types, which means if a source has both `enable-relay` and 1860 // (source, worker) relay config, we remove the latter. 1861 // should be called after recoverSources. 1862 func (s *Scheduler) recoverRelayConfigs() error { 1863 relayWorkers, _, err := ha.GetAllRelayConfig(s.etcdCli) 1864 if err != nil { 1865 return err 1866 } 1867 1868 for source, workers := range relayWorkers { 1869 sourceCfg, ok := s.sourceCfgs[source] 1870 if !ok { 1871 s.logger.Warn("found a not existing source by relay config", zap.String("source", source)) 1872 continue 1873 } 1874 if sourceCfg.EnableRelay { 1875 // current etcd max-txn-op is 2048 1876 _, err2 := ha.DeleteRelayConfig(s.etcdCli, utils.SetToSlice(workers)...) 1877 if err2 != nil { 1878 return err2 1879 } 1880 delete(relayWorkers, source) 1881 } 1882 } 1883 1884 s.relayWorkers = relayWorkers 1885 return nil 1886 } 1887 1888 // recoverLoadTasks recovers history load workers from etcd. 1889 func (s *Scheduler) recoverLoadTasks(needLock bool) (int64, error) { 1890 if needLock { 1891 s.mu.Lock() 1892 defer s.mu.Unlock() 1893 } 1894 loadTasks, rev, err := ha.GetAllLoadTask(s.etcdCli) 1895 if err != nil { 1896 return 0, err 1897 } 1898 1899 s.loadTasks = loadTasks 1900 return rev, nil 1901 } 1902 1903 // recoverWorkersBounds recovers history DM-worker info and status from etcd. 1904 // and it also recovers the bound/unbound relationship. 1905 func (s *Scheduler) recoverWorkersBounds() (int64, error) { 1906 // 1. get all history base info. 1907 // it should no new DM-worker registered between this call and the below `GetKeepAliveWorkers`, 1908 // because no DM-master leader are handling DM-worker register requests. 1909 wim, _, err := ha.GetAllWorkerInfo(s.etcdCli) 1910 if err != nil { 1911 return 0, err 1912 } 1913 1914 // 2. get all history bound relationships. 1915 // it should no new bound relationship added between this call and the below `GetKeepAliveWorkers`, 1916 // because no DM-master leader are doing the scheduler. 1917 sbm, _, err := ha.GetSourceBound(s.etcdCli, "") 1918 if err != nil { 1919 return 0, err 1920 } 1921 lastSourceBoundM, _, err := ha.GetLastSourceBounds(s.etcdCli) 1922 if err != nil { 1923 return 0, err 1924 } 1925 s.lastBound = lastSourceBoundM 1926 1927 // 3. get all history offline status. 1928 kam, rev, err := ha.GetKeepAliveWorkers(s.etcdCli) 1929 if err != nil { 1930 return 0, err 1931 } 1932 1933 scm := s.sourceCfgs 1934 boundsToTrigger := make([]ha.SourceBound, 0) 1935 1936 // 4. recover DM-worker info and status. 1937 // prepare a worker -> relay source map 1938 relayInfo := map[string]string{} 1939 for source, workers := range s.relayWorkers { 1940 for worker := range workers { 1941 relayInfo[worker] = source 1942 } 1943 } 1944 1945 for name, info := range wim { 1946 // create and record the worker agent. 1947 w, err2 := s.recordWorker(info) 1948 if err2 != nil { 1949 return 0, err2 1950 } 1951 // set the stage as Free if it's keep alive. 1952 if _, ok := kam[name]; ok { 1953 w.ToFree() 1954 if source, ok2 := relayInfo[name]; ok2 { 1955 if err3 := w.StartRelay(source); err3 != nil { 1956 s.logger.DPanic("", zap.Error(err3)) 1957 } 1958 } 1959 1960 // set the stage as Bound and record the bound relationship if exists. 1961 if bound, ok := sbm[name]; ok { 1962 // source bounds without source configuration should be deleted later 1963 if _, ok := scm[bound.Source]; ok { 1964 err2 = s.updateStatusToBound(w, bound) 1965 if err2 != nil { 1966 // if etcd has saved KV that worker1 started relay for source1, but bound to source2, 1967 // we remove the bound to avoid DM master leader failed to bootstrap 1968 if terror.ErrSchedulerBoundDiffWithStartedRelay.Equal(err2) { 1969 continue 1970 } 1971 return 0, err2 1972 } 1973 boundsToTrigger = append(boundsToTrigger, bound) 1974 delete(sbm, name) 1975 } else { 1976 s.logger.Warn("find source bound without config", zap.Stringer("bound", bound)) 1977 } 1978 } 1979 } 1980 } 1981 1982 failpoint.Inject("failToRecoverWorkersBounds", func(_ failpoint.Value) { 1983 log.L().Info("mock failure", zap.String("failpoint", "failToRecoverWorkersBounds")) 1984 failpoint.Return(0, errors.New("failToRecoverWorkersBounds")) 1985 }) 1986 // 5. delete invalid source bound info in etcd 1987 if len(sbm) > 0 { 1988 invalidSourceBounds := make([]string, 0, len(sbm)) 1989 for name := range sbm { 1990 invalidSourceBounds = append(invalidSourceBounds, name) 1991 } 1992 _, err = ha.DeleteSourceBound(s.etcdCli, invalidSourceBounds...) 1993 if err != nil { 1994 return 0, err 1995 } 1996 } 1997 1998 // 6. put trigger source bounds info to etcd to order dm-workers to start source 1999 if len(boundsToTrigger) > 0 { 2000 _, err = ha.PutSourceBound(s.etcdCli, boundsToTrigger...) 2001 if err != nil { 2002 return 0, err 2003 } 2004 } 2005 2006 // 7. recover bounds/unbounds, all sources which not in bounds should be in unbounds. 2007 for source := range s.sourceCfgs { 2008 if _, ok := s.bounds[source]; !ok { 2009 s.unbounds[source] = struct{}{} 2010 } 2011 } 2012 2013 return rev, nil 2014 } 2015 2016 func (s *Scheduler) resetWorkerEv() (int64, error) { 2017 s.mu.Lock() 2018 defer s.mu.Unlock() 2019 2020 rwm := s.workers 2021 kam, rev, err := ha.GetKeepAliveWorkers(s.etcdCli) 2022 if err != nil { 2023 return 0, err 2024 } 2025 2026 // update all registered workers status 2027 for name := range rwm { 2028 ev := ha.WorkerEvent{WorkerName: name} 2029 // set the stage as Free if it's keep alive. 2030 if _, ok := kam[name]; ok { 2031 err = s.handleWorkerOnline(ev, false) 2032 if err != nil { 2033 return 0, err 2034 } 2035 } else { 2036 err = s.handleWorkerOffline(ev, false) 2037 if err != nil { 2038 return 0, err 2039 } 2040 } 2041 } 2042 return rev, nil 2043 } 2044 2045 // handleWorkerEv handles the online/offline status change event of DM-worker instances. 2046 func (s *Scheduler) handleWorkerEv(ctx context.Context, evCh <-chan ha.WorkerEvent, errCh <-chan error) error { 2047 for { 2048 select { 2049 case <-ctx.Done(): 2050 return nil 2051 case ev, ok := <-evCh: 2052 if !ok { 2053 return nil 2054 } 2055 s.logger.Info("receive worker status change event", zap.Bool("delete", ev.IsDeleted), zap.Stringer("event", ev)) 2056 var err error 2057 if ev.IsDeleted { 2058 err = s.handleWorkerOffline(ev, true) 2059 } else { 2060 err = s.handleWorkerOnline(ev, true) 2061 } 2062 if err != nil { 2063 s.logger.Error("fail to handle worker status change event", zap.Bool("delete", ev.IsDeleted), zap.Stringer("event", ev), zap.Error(err)) 2064 metrics.ReportWorkerEventErr(metrics.WorkerEventHandle) 2065 } 2066 case err, ok := <-errCh: 2067 if !ok { 2068 return nil 2069 } 2070 // error here are caused by etcd error or worker event decoding 2071 s.logger.Error("receive error when watching worker status change event", zap.Error(err)) 2072 metrics.ReportWorkerEventErr(metrics.WorkerEventWatch) 2073 if etcdutil.IsRetryableError(err) { 2074 return err 2075 } 2076 } 2077 } 2078 } 2079 2080 // nolint:dupl 2081 func (s *Scheduler) observeWorkerEvent(ctx context.Context, rev int64) error { 2082 var wg sync.WaitGroup 2083 for { 2084 workerEvCh := make(chan ha.WorkerEvent, 10) 2085 workerErrCh := make(chan error, 10) 2086 wg.Add(1) 2087 // use ctx1, cancel1 to make sure old watcher has been released 2088 ctx1, cancel1 := context.WithCancel(ctx) 2089 go func() { 2090 defer func() { 2091 close(workerEvCh) 2092 close(workerErrCh) 2093 wg.Done() 2094 }() 2095 ha.WatchWorkerEvent(ctx1, s.etcdCli, rev+1, workerEvCh, workerErrCh) 2096 }() 2097 err := s.handleWorkerEv(ctx1, workerEvCh, workerErrCh) 2098 cancel1() 2099 wg.Wait() 2100 2101 if etcdutil.IsRetryableError(err) { 2102 rev = 0 2103 retryNum := 1 2104 for rev == 0 { 2105 select { 2106 case <-ctx.Done(): 2107 return nil 2108 case <-time.After(500 * time.Millisecond): 2109 rev, err = s.resetWorkerEv() 2110 if err != nil { 2111 log.L().Error("resetWorkerEv is failed, will retry later", zap.Error(err), zap.Int("retryNum", retryNum)) 2112 } 2113 } 2114 retryNum++ 2115 } 2116 } else { 2117 if err != nil { 2118 log.L().Error("observeWorkerEvent is failed and will quit now", zap.Error(err)) 2119 } else { 2120 log.L().Info("observeWorkerEvent will quit now") 2121 } 2122 return err 2123 } 2124 } 2125 } 2126 2127 // handleWorkerOnline handles the scheduler when a DM-worker become online. 2128 // This should try to bound an unbound source to it. 2129 // NOTE: this func need to hold the mutex. 2130 func (s *Scheduler) handleWorkerOnline(ev ha.WorkerEvent, toLock bool) error { 2131 if toLock { 2132 s.mu.Lock() 2133 defer s.mu.Unlock() 2134 } 2135 2136 // 1. find the worker. 2137 w, ok := s.workers[ev.WorkerName] 2138 if !ok { 2139 s.logger.Warn("worker for the event not exists", zap.Stringer("event", ev)) 2140 return nil 2141 } 2142 2143 // 2. check whether is bound. 2144 if w.Stage() == WorkerBound { 2145 // also put identical relay config for this worker 2146 if source := w.RelaySourceID(); source != "" { 2147 _, err := ha.PutRelayConfig(s.etcdCli, source, w.BaseInfo().Name) 2148 if err != nil { 2149 return err 2150 } 2151 } 2152 // TODO: When dm-worker keepalive is broken, it will turn off its own running source 2153 // After keepalive is restored, this dm-worker should continue to run the previously bound source 2154 // So we PutSourceBound here to trigger dm-worker to get this event and start source again. 2155 // If this worker still start a source, it doesn't matter. dm-worker will omit same source and reject source with different name 2156 s.logger.Warn("worker already bound", zap.Stringer("bound", w.Bound())) 2157 _, err := ha.PutSourceBound(s.etcdCli, w.Bound()) 2158 return err 2159 } 2160 2161 // 3. change the stage (from Offline) to Free or Relay. 2162 lastRelaySource := w.RelaySourceID() 2163 if lastRelaySource == "" { 2164 // when worker is removed (for example lost keepalive when master scheduler boots up), w.RelaySourceID() is 2165 // of course nothing, so we find the relay source from a better place 2166 for source, workerM := range s.relayWorkers { 2167 if _, ok2 := workerM[w.BaseInfo().Name]; ok2 { 2168 lastRelaySource = source 2169 break 2170 } 2171 } 2172 } 2173 w.ToFree() 2174 // TODO: rename ToFree to Online and move below logic inside it 2175 if lastRelaySource != "" { 2176 if err := w.StartRelay(lastRelaySource); err != nil { 2177 s.logger.DPanic("", zap.Error(err)) 2178 } 2179 } 2180 2181 // 4. try to bind an unbound source. 2182 _, err := s.tryBoundForWorker(w) 2183 return err 2184 } 2185 2186 // handleWorkerOffline handles the scheduler when a DM-worker become offline. 2187 // This should unbind any previous bound source. 2188 // NOTE: this func need to hold the mutex. 2189 func (s *Scheduler) handleWorkerOffline(ev ha.WorkerEvent, toLock bool) error { 2190 if toLock { 2191 s.mu.Lock() 2192 defer s.mu.Unlock() 2193 } 2194 2195 // 1. find the worker. 2196 w, ok := s.workers[ev.WorkerName] 2197 if !ok { 2198 s.logger.Warn("worker for the event not exists", zap.Stringer("event", ev)) 2199 return nil 2200 } 2201 2202 // 2. find the bound relationship. 2203 bound := w.Bound() 2204 2205 // 3. check whether bound before. 2206 if bound.Source == "" { 2207 // 3.1. change the stage (from Free) to Offline. 2208 w.ToOffline() 2209 s.logger.Info("worker not bound, no need to unbound", zap.Stringer("event", ev)) 2210 return nil 2211 } 2212 2213 // 4. delete the bound relationship in etcd. 2214 _, err := ha.DeleteSourceBound(s.etcdCli, bound.Worker) 2215 if err != nil { 2216 return err 2217 } 2218 2219 // 5. unbound for the source. 2220 s.updateStatusToUnbound(bound.Source) 2221 2222 // 6. change the stage (from Free) to Offline. 2223 w.ToOffline() 2224 2225 s.logger.Info("unbound the worker for source", zap.Stringer("bound", bound), zap.Stringer("event", ev)) 2226 2227 // 7. try to bound the source to a Free worker again. 2228 _, err = s.tryBoundForSource(bound.Source) 2229 return err 2230 } 2231 2232 // tryBoundForWorker tries to bind a source to the given worker. The order of picking source is 2233 // - try to bind sources on which the worker has unfinished load task 2234 // - try to bind the last bound source 2235 // - if enabled relay, bind to the relay source or keep unbound 2236 // - try to bind any unbound sources 2237 // if the source is bound to a relay enabled worker, we must check that the source is also the relay source of worker. 2238 // pulling binlog using relay or not is determined by whether the worker has enabled relay. 2239 func (s *Scheduler) tryBoundForWorker(w *Worker) (bound bool, err error) { 2240 // 1. handle this worker has unfinished load task. 2241 worker, sourceID := s.getNextLoadTaskTransfer(w.BaseInfo().Name, "") 2242 if sourceID != "" { 2243 s.logger.Info("found unfinished load task source when worker bound", 2244 zap.String("worker", w.BaseInfo().Name), 2245 zap.String("source", sourceID)) 2246 // TODO: tolerate a failed transfer because of start-relay conflicts with loadTask 2247 err = s.transferWorkerAndSource(w.BaseInfo().Name, "", worker, sourceID) 2248 return err == nil, err 2249 } 2250 2251 // check if last bound is still available. 2252 // NOTE: if worker isn't in lastBound, we'll get "zero" SourceBound and it's OK, because "zero" string is not in 2253 // unbounds 2254 source := s.lastBound[w.baseInfo.Name].Source 2255 if _, ok := s.unbounds[source]; !ok { 2256 source = "" 2257 } 2258 2259 if source != "" { 2260 relaySource := w.RelaySourceID() 2261 if relaySource != "" && relaySource != source { 2262 source = "" 2263 } else { 2264 // worker not enable relay or last bound is relay source 2265 s.logger.Info("found history source when worker bound", 2266 zap.String("worker", w.BaseInfo().Name), 2267 zap.String("source", source)) 2268 } 2269 } 2270 2271 // try to find its relay source (currently only one relay source) 2272 if source == "" { 2273 source = w.RelaySourceID() 2274 if source != "" { 2275 s.logger.Info("found relay source when worker bound", 2276 zap.String("worker", w.BaseInfo().Name), 2277 zap.String("source", source)) 2278 // currently worker can only handle same relay source and source bound, so we don't try bound another source 2279 if oldWorker, ok := s.bounds[source]; ok { 2280 s.logger.Info("worker has started relay for a source, but that source is bound to another worker, so we let this worker free", 2281 zap.String("worker", w.BaseInfo().Name), 2282 zap.String("relay source", source), 2283 zap.String("bound worker for its relay source", oldWorker.BaseInfo().Name)) 2284 return false, nil 2285 } 2286 } 2287 } 2288 2289 // randomly pick one from unbounds 2290 if source == "" { 2291 for source = range s.unbounds { 2292 s.logger.Info("found unbound source when worker bound", 2293 zap.String("worker", w.BaseInfo().Name), 2294 zap.String("source", source)) 2295 break // got a source. 2296 } 2297 } 2298 2299 if source == "" { 2300 s.logger.Info("no unbound sources need to bound", zap.Stringer("worker", w.BaseInfo())) 2301 return false, nil 2302 } 2303 2304 // 2. try to bound them. 2305 err = s.boundSourceToWorker(source, w) 2306 if err != nil { 2307 return false, err 2308 } 2309 return true, nil 2310 } 2311 2312 // tryBoundForSource tries to bound a source to a random Free worker. The order of picking worker is 2313 // - try to bind a worker which has unfinished load task 2314 // - try to bind a relay worker which has be bound to this source before 2315 // - try to bind any relay worker 2316 // - try to bind any worker which has be bound to this source before 2317 // - try to bind any free worker 2318 // pulling binlog using relay or not is determined by whether the worker has enabled relay. 2319 // caller should update the s.unbounds. 2320 // caller should make sure this source has source config. 2321 func (s *Scheduler) tryBoundForSource(source string) (bool, error) { 2322 var worker *Worker 2323 2324 // pick a worker which has subtask in load stage. 2325 workerName, sourceID := s.getNextLoadTaskTransfer("", source) 2326 if workerName != "" { 2327 // TODO: check relay source conflict 2328 err := s.transferWorkerAndSource("", source, workerName, sourceID) 2329 return err == nil, err 2330 } 2331 2332 relayWorkers := s.relayWorkers[source] 2333 // 1. try to find a history worker in relay workers... 2334 if len(relayWorkers) > 0 { 2335 for workerName, bound := range s.lastBound { 2336 if bound.Source == source { 2337 w, ok := s.workers[workerName] 2338 if !ok { 2339 // a not found worker 2340 continue 2341 } 2342 // the worker is not Offline 2343 if _, ok2 := relayWorkers[workerName]; ok2 && w.Stage() == WorkerRelay { 2344 worker = w 2345 s.logger.Info("found history relay worker when source bound", 2346 zap.String("worker", workerName), 2347 zap.String("source", source)) 2348 break 2349 } 2350 } 2351 } 2352 } 2353 // then a relay worker for this source... 2354 if worker == nil { 2355 for workerName := range relayWorkers { 2356 w, ok := s.workers[workerName] 2357 if !ok { 2358 // a not found worker, should not happen 2359 s.logger.DPanic("worker instance not found for relay worker", zap.String("worker", workerName)) 2360 continue 2361 } 2362 // the worker is not Offline 2363 if w.Stage() == WorkerRelay { 2364 worker = w 2365 s.logger.Info("found relay worker when source bound", 2366 zap.String("worker", workerName), 2367 zap.String("source", source)) 2368 break 2369 } 2370 } 2371 } 2372 // then a history worker for this source... 2373 if worker == nil { 2374 for workerName, bound := range s.lastBound { 2375 if bound.Source == source { 2376 w, ok := s.workers[workerName] 2377 if !ok { 2378 // a not found worker 2379 continue 2380 } 2381 if w.Stage() == WorkerFree { 2382 worker = w 2383 s.logger.Info("found history worker when source bound", 2384 zap.String("worker", workerName), 2385 zap.String("source", source)) 2386 break 2387 } 2388 } 2389 } 2390 } 2391 2392 // and then a random Free worker. 2393 if worker == nil { 2394 for _, w := range s.workers { 2395 if w.Stage() == WorkerFree { 2396 worker = w 2397 s.logger.Info("found free worker when source bound", 2398 zap.String("worker", w.BaseInfo().Name), 2399 zap.String("source", source)) 2400 break 2401 } 2402 } 2403 } 2404 2405 if worker == nil { 2406 s.logger.Info("no free worker exists for bound", zap.String("source", source)) 2407 return false, nil 2408 } 2409 2410 // 2. try to bound them. 2411 err := s.boundSourceToWorker(source, worker) 2412 if err != nil { 2413 return false, err 2414 } 2415 return true, nil 2416 } 2417 2418 // boundSourceToWorker bounds the source and worker together. 2419 // we should check the bound relationship of the source and the stage of the worker in the caller. 2420 func (s *Scheduler) boundSourceToWorker(source string, w *Worker) error { 2421 // 1. put the bound relationship into etcd. 2422 var err error 2423 bound := ha.NewSourceBound(source, w.BaseInfo().Name) 2424 sourceCfg, ok := s.sourceCfgs[source] 2425 if ok && sourceCfg.EnableRelay { 2426 stage := ha.NewRelayStage(pb.Stage_Running, source) 2427 _, err = ha.PutRelayStageSourceBound(s.etcdCli, stage, bound) 2428 } else { 2429 _, err = ha.PutSourceBound(s.etcdCli, bound) 2430 } 2431 if err != nil { 2432 return err 2433 } 2434 2435 // 2. update the bound relationship in the scheduler. 2436 err = s.updateStatusToBound(w, bound) 2437 if err != nil { 2438 return err 2439 } 2440 2441 s.logger.Info("bound the source to worker", zap.Stringer("bound", bound)) 2442 return nil 2443 } 2444 2445 // recordWorker creates the worker agent (with Offline stage) and records in the scheduler. 2446 // this func is used when adding a new worker. 2447 // NOTE: trigger scheduler when the worker become online, not when added. 2448 func (s *Scheduler) recordWorker(info ha.WorkerInfo) (*Worker, error) { 2449 w, err := NewWorker(info, s.securityCfg) 2450 if err != nil { 2451 return nil, err 2452 } 2453 s.workers[info.Name] = w 2454 return w, nil 2455 } 2456 2457 // deleteWorker deletes the recorded worker and bound. 2458 // this func is used when removing the worker. 2459 // NOTE: trigger scheduler when the worker become offline, not when deleted. 2460 func (s *Scheduler) deleteWorker(name string) { 2461 for _, workers := range s.relayWorkers { 2462 delete(workers, name) 2463 } 2464 w, ok := s.workers[name] 2465 if !ok { 2466 return 2467 } 2468 w.Close() 2469 delete(s.workers, name) 2470 metrics.RemoveWorkerState(w.baseInfo.Name) 2471 } 2472 2473 // updateStatusToBound updates the in-memory status for bound, including: 2474 // - update the stage of worker to `Bound`. 2475 // - record the bound relationship and last bound relationship in the scheduler. 2476 // - remove the unbound relationship in the scheduler. 2477 // this func is called after the bound relationship existed in etcd. 2478 func (s *Scheduler) updateStatusToBound(w *Worker, b ha.SourceBound) error { 2479 if err := w.ToBound(b); err != nil { 2480 return err 2481 } 2482 s.bounds[b.Source] = w 2483 s.lastBound[b.Worker] = b 2484 delete(s.unbounds, b.Source) 2485 return nil 2486 } 2487 2488 // updateStatusToUnbound updates the in-memory status for unbound, including: 2489 // - update the stage of worker to `Free` or `Relay`. 2490 // - remove the bound relationship in the scheduler. 2491 // - record the unbound relationship in the scheduler. 2492 // this func is called after the bound relationship removed from etcd. 2493 func (s *Scheduler) updateStatusToUnbound(source string) { 2494 s.unbounds[source] = struct{}{} 2495 w, ok := s.bounds[source] 2496 if !ok { 2497 return 2498 } 2499 if err := w.Unbound(); err != nil { 2500 s.logger.DPanic("cannot updateStatusToUnbound", zap.Error(err)) 2501 } 2502 delete(s.bounds, source) 2503 } 2504 2505 // reset resets the internal status. 2506 func (s *Scheduler) reset() { 2507 s.subtaskLatch = newLatches() 2508 s.sourceCfgs = make(map[string]*config.SourceConfig) 2509 s.subTaskCfgs = sync.Map{} 2510 s.workers = make(map[string]*Worker) 2511 s.bounds = make(map[string]*Worker) 2512 s.unbounds = make(map[string]struct{}) 2513 s.expectRelayStages = make(map[string]ha.Stage) 2514 s.expectSubTaskStages = sync.Map{} 2515 s.loadTasks = make(map[string]map[string]string) 2516 } 2517 2518 // strMapToSlice converts a `map[string]struct{}` to `[]string` in increasing order. 2519 func strMapToSlice(m map[string]struct{}) []string { 2520 ret := make([]string, 0, len(m)) 2521 for s := range m { 2522 ret = append(ret, s) 2523 } 2524 sort.Strings(ret) 2525 return ret 2526 } 2527 2528 // SetWorkerClientForTest sets mockWorkerClient for specified worker, only used for test. 2529 func (s *Scheduler) SetWorkerClientForTest(name string, mockCli workerrpc.Client) { 2530 if _, ok := s.workers[name]; ok { 2531 s.workers[name].cli = mockCli 2532 } 2533 } 2534 2535 // nolint:dupl 2536 func (s *Scheduler) observeLoadTask(ctx context.Context, rev int64) error { 2537 var wg sync.WaitGroup 2538 for { 2539 loadTaskCh := make(chan ha.LoadTask, 10) 2540 loadTaskErrCh := make(chan error, 10) 2541 wg.Add(1) 2542 // use ctx1, cancel1 to make sure old watcher has been released 2543 ctx1, cancel1 := context.WithCancel(ctx) 2544 go func() { 2545 defer func() { 2546 close(loadTaskCh) 2547 close(loadTaskErrCh) 2548 wg.Done() 2549 }() 2550 ha.WatchLoadTask(ctx1, s.etcdCli, rev+1, loadTaskCh, loadTaskErrCh) 2551 }() 2552 err := s.handleLoadTask(ctx1, loadTaskCh, loadTaskErrCh) 2553 cancel1() 2554 wg.Wait() 2555 2556 if etcdutil.IsRetryableError(err) { 2557 rev = 0 2558 retryNum := 1 2559 for rev == 0 { 2560 select { 2561 case <-ctx.Done(): 2562 return nil 2563 case <-time.After(500 * time.Millisecond): 2564 rev, err = s.recoverLoadTasks(true) 2565 if err != nil { 2566 log.L().Error("resetLoadTask is failed, will retry later", zap.Error(err), zap.Int("retryNum", retryNum)) 2567 } 2568 } 2569 retryNum++ 2570 } 2571 } else { 2572 if err != nil { 2573 log.L().Error("observeLoadTask is failed and will quit now", zap.Error(err)) 2574 } else { 2575 log.L().Info("observeLoadTask will quit now") 2576 } 2577 return err 2578 } 2579 } 2580 } 2581 2582 // RemoveLoadTaskAndLightningStatus removes the loadtask and lightning status by task. 2583 func (s *Scheduler) RemoveLoadTaskAndLightningStatus(task string) error { 2584 s.mu.Lock() 2585 defer s.mu.Unlock() 2586 2587 if !s.started.Load() { 2588 return terror.ErrSchedulerNotStarted.Generate() 2589 } 2590 _, _, err := ha.DelLoadTaskByTask(s.etcdCli, task) 2591 if err != nil { 2592 return err 2593 } 2594 delete(s.loadTasks, task) 2595 _, err = ha.DeleteLightningStatusForTask(s.etcdCli, task) 2596 return err 2597 } 2598 2599 // getTransferWorkerAndSource tries to get transfer worker and source. 2600 // return (worker, source) that is used by transferWorkerAndSource, to try to resolve a paused load task that the source can't be bound to the worker which has its dump files. 2601 // worker, source This means a subtask finish load stage, often called by handleLoadTaskDel. 2602 // worker, "" This means a free worker online, often called by tryBoundForWorker. 2603 // "", source This means a unbound source online, often called by tryBoundForSource. 2604 func (s *Scheduler) getNextLoadTaskTransfer(worker, source string) (string, string) { 2605 // origin worker not free, try to get a source. 2606 if worker != "" { 2607 // try to get a unbound source 2608 for sourceID := range s.unbounds { 2609 if sourceID != source && s.hasLoadTaskByWorkerAndSource(worker, sourceID) { 2610 return "", sourceID 2611 } 2612 } 2613 // try to get a bound source 2614 for sourceID, w := range s.bounds { 2615 if sourceID != source && s.hasLoadTaskByWorkerAndSource(worker, sourceID) && !s.hasLoadTaskByWorkerAndSource(w.baseInfo.Name, sourceID) { 2616 return w.baseInfo.Name, sourceID 2617 } 2618 } 2619 } 2620 2621 // origin source is bound, try to get a worker 2622 if source != "" { 2623 // try to get a free worker 2624 for _, w := range s.workers { 2625 workerName := w.baseInfo.Name 2626 if workerName != worker && w.Stage() == WorkerFree && s.hasLoadTaskByWorkerAndSource(workerName, source) { 2627 return workerName, "" 2628 } 2629 } 2630 2631 // try to get a bound worker 2632 for _, w := range s.workers { 2633 workerName := w.baseInfo.Name 2634 if workerName != worker && w.Stage() == WorkerBound { 2635 if s.hasLoadTaskByWorkerAndSource(workerName, source) && !s.hasLoadTaskByWorkerAndSource(workerName, w.bound.Source) { 2636 return workerName, w.bound.Source 2637 } 2638 } 2639 } 2640 } 2641 2642 return "", "" 2643 } 2644 2645 // hasLoadTaskByWorkerAndSource check whether there is an existing load subtask for the worker and source. 2646 func (s *Scheduler) hasLoadTaskByWorkerAndSource(worker, source string) bool { 2647 for taskName, sourceWorkerMap := range s.loadTasks { 2648 // don't consider removed subtask 2649 subtasksV, ok := s.subTaskCfgs.Load(taskName) 2650 if !ok { 2651 continue 2652 } 2653 subtasks := subtasksV.(map[string]config.SubTaskConfig) 2654 if _, ok2 := subtasks[source]; !ok2 { 2655 continue 2656 } 2657 2658 if workerName, ok2 := sourceWorkerMap[source]; ok2 && workerName == worker { 2659 return true 2660 } 2661 } 2662 return false 2663 } 2664 2665 // TryResolveLoadTask checks if there are sources whose load task has local files and not bound to the worker which is 2666 // accessible to the local files. If so, trigger a transfer source. 2667 func (s *Scheduler) TryResolveLoadTask(sources []string) { 2668 for _, source := range sources { 2669 s.mu.Lock() 2670 worker, ok := s.bounds[source] 2671 if !ok { 2672 s.mu.Unlock() 2673 continue 2674 } 2675 if err := s.tryResolveLoadTask(worker.baseInfo.Name, source); err != nil { 2676 s.logger.Error("tryResolveLoadTask failed", zap.Error(err)) 2677 } 2678 s.mu.Unlock() 2679 } 2680 } 2681 2682 func (s *Scheduler) tryResolveLoadTask(originWorker, originSource string) error { 2683 if s.hasLoadTaskByWorkerAndSource(originWorker, originSource) { 2684 return nil 2685 } 2686 2687 worker, source := s.getNextLoadTaskTransfer(originWorker, originSource) 2688 if worker == "" && source == "" { 2689 return nil 2690 } 2691 2692 return s.transferWorkerAndSource(originWorker, originSource, worker, source) 2693 } 2694 2695 func (s *Scheduler) handleLoadTaskDel(loadTask ha.LoadTask) error { 2696 s.mu.Lock() 2697 defer s.mu.Unlock() 2698 2699 if _, ok := s.loadTasks[loadTask.Task]; !ok { 2700 return nil 2701 } 2702 if _, ok := s.loadTasks[loadTask.Task][loadTask.Source]; !ok { 2703 return nil 2704 } 2705 2706 originWorker := s.loadTasks[loadTask.Task][loadTask.Source] 2707 delete(s.loadTasks[loadTask.Task], loadTask.Source) 2708 if len(s.loadTasks[loadTask.Task]) == 0 { 2709 delete(s.loadTasks, loadTask.Task) 2710 } 2711 2712 return s.tryResolveLoadTask(originWorker, loadTask.Source) 2713 } 2714 2715 func (s *Scheduler) handleLoadTaskPut(loadTask ha.LoadTask) { 2716 s.mu.Lock() 2717 defer s.mu.Unlock() 2718 2719 if _, ok := s.loadTasks[loadTask.Task]; !ok { 2720 s.loadTasks[loadTask.Task] = make(map[string]string) 2721 } 2722 s.loadTasks[loadTask.Task][loadTask.Source] = loadTask.Worker 2723 } 2724 2725 // handleLoadTask handles the load worker status change event. 2726 func (s *Scheduler) handleLoadTask(ctx context.Context, loadTaskCh <-chan ha.LoadTask, errCh <-chan error) error { 2727 for { 2728 select { 2729 case <-ctx.Done(): 2730 return nil 2731 case loadTask, ok := <-loadTaskCh: 2732 if !ok { 2733 return nil 2734 } 2735 s.logger.Info("receive load task", zap.Bool("delete", loadTask.IsDelete), zap.String("task", loadTask.Task), zap.String("source", loadTask.Source), zap.String("worker", loadTask.Worker)) 2736 var err error 2737 if loadTask.IsDelete { 2738 err = s.handleLoadTaskDel(loadTask) 2739 } else { 2740 s.handleLoadTaskPut(loadTask) 2741 } 2742 if err != nil { 2743 s.logger.Error("fail to handle worker status change event", zap.Error(err)) 2744 } 2745 case err, ok := <-errCh: 2746 if !ok { 2747 return nil 2748 } 2749 // error here are caused by etcd error or load worker decoding 2750 s.logger.Error("receive error when watching load worker", zap.Error(err)) 2751 if etcdutil.IsRetryableError(err) { 2752 return err 2753 } 2754 } 2755 } 2756 } 2757 2758 // OperateValidationTask operate validator of subtask. 2759 // 2760 // tasks: tasks need to operate 2761 // validatorStages: stage info of subtask validators 2762 // changedSubtaskCfgs: changed subtask configs 2763 // 2764 // see server.StartValidation/StopValidation for more detail. 2765 func (s *Scheduler) OperateValidationTask(validatorStages []ha.Stage, changedSubtaskCfgs []config.SubTaskConfig) error { 2766 s.mu.Lock() 2767 defer s.mu.Unlock() 2768 if !s.started.Load() { 2769 return terror.ErrSchedulerNotStarted.Generate() 2770 } 2771 2772 // 2. setting subtask stage in etcd 2773 if len(changedSubtaskCfgs) > 0 || len(validatorStages) > 0 { 2774 _, err := ha.PutSubTaskCfgStage(s.etcdCli, changedSubtaskCfgs, []ha.Stage{}, validatorStages) 2775 if err != nil { 2776 return terror.Annotate(err, "fail to set new validator stage") 2777 } 2778 } 2779 // 3. cache validator stage 2780 for _, stage := range validatorStages { 2781 v, _ := s.expectValidatorStages.LoadOrStore(stage.Task, map[string]ha.Stage{}) 2782 m := v.(map[string]ha.Stage) 2783 m[stage.Source] = stage 2784 } 2785 for _, cfg := range changedSubtaskCfgs { 2786 v, _ := s.subTaskCfgs.LoadOrStore(cfg.Name, map[string]config.SubTaskConfig{}) 2787 m := v.(map[string]config.SubTaskConfig) 2788 m[cfg.SourceID] = cfg 2789 } 2790 return nil 2791 } 2792 2793 // ValidatorEnabled returns true when validator of task-source pair has enabled, i.e. validation mode is not none. 2794 // enabled validator can be in running or stopped stage. 2795 func (s *Scheduler) ValidatorEnabled(task, source string) bool { 2796 return s.GetValidatorStage(task, source) != nil 2797 } 2798 2799 // GetValidatorStage get validator stage of task-source pair. 2800 func (s *Scheduler) GetValidatorStage(task, source string) *ha.Stage { 2801 s.mu.RLock() 2802 defer s.mu.RUnlock() 2803 v, ok := s.expectValidatorStages.Load(task) 2804 if !ok { 2805 return nil 2806 } 2807 m := v.(map[string]ha.Stage) 2808 if stage, ok2 := m[source]; ok2 { 2809 return &stage 2810 } 2811 return nil 2812 }